{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 61215, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 2.3445475101470947, "learning_rate": 0.0005999999996049288, "loss": 10.4731, "step": 1 }, { "epoch": 0.0, "grad_norm": 2.277515411376953, "learning_rate": 0.0005999999984197153, "loss": 9.9074, "step": 2 }, { "epoch": 0.0, "grad_norm": 2.00109601020813, "learning_rate": 0.0005999999964443598, "loss": 9.3624, "step": 3 }, { "epoch": 0.0, "grad_norm": 1.896841287612915, "learning_rate": 0.0005999999936788619, "loss": 8.9861, "step": 4 }, { "epoch": 0.0, "grad_norm": 1.8488935232162476, "learning_rate": 0.0005999999901232217, "loss": 8.6725, "step": 5 }, { "epoch": 0.0, "grad_norm": 1.8929357528686523, "learning_rate": 0.0005999999857774393, "loss": 8.2981, "step": 6 }, { "epoch": 0.0, "grad_norm": 1.8532694578170776, "learning_rate": 0.0005999999806415148, "loss": 8.0649, "step": 7 }, { "epoch": 0.0, "grad_norm": 1.7290151119232178, "learning_rate": 0.0005999999747154479, "loss": 7.7909, "step": 8 }, { "epoch": 0.0, "grad_norm": 1.5064324140548706, "learning_rate": 0.0005999999679992389, "loss": 7.5804, "step": 9 }, { "epoch": 0.0, "grad_norm": 1.3336801528930664, "learning_rate": 0.0005999999604928875, "loss": 7.4451, "step": 10 }, { "epoch": 0.0, "grad_norm": 1.1641117334365845, "learning_rate": 0.0005999999521963943, "loss": 7.2529, "step": 11 }, { "epoch": 0.0, "grad_norm": 1.0922843217849731, "learning_rate": 0.0005999999431097587, "loss": 6.9598, "step": 12 }, { "epoch": 0.0, "grad_norm": 0.9296258091926575, "learning_rate": 0.0005999999332329811, "loss": 6.7569, "step": 13 }, { "epoch": 0.0, "grad_norm": 0.8542349934577942, "learning_rate": 0.0005999999225660615, "loss": 6.7468, "step": 14 }, { "epoch": 0.0, "grad_norm": 0.8458271622657776, "learning_rate": 0.0005999999111089997, "loss": 6.6628, "step": 15 }, { "epoch": 0.0, "grad_norm": 0.6604408621788025, "learning_rate": 0.0005999998988617959, "loss": 6.7246, "step": 16 }, { "epoch": 0.0, "grad_norm": 0.6733983755111694, "learning_rate": 0.00059999988582445, "loss": 6.5789, "step": 17 }, { "epoch": 0.0, "grad_norm": 0.5909510850906372, "learning_rate": 0.0005999998719969623, "loss": 6.5809, "step": 18 }, { "epoch": 0.0, "grad_norm": 0.7529872059822083, "learning_rate": 0.0005999998573793326, "loss": 6.4471, "step": 19 }, { "epoch": 0.0, "grad_norm": 0.5217795968055725, "learning_rate": 0.000599999841971561, "loss": 6.5395, "step": 20 }, { "epoch": 0.0, "grad_norm": 0.6023449897766113, "learning_rate": 0.0005999998257736476, "loss": 6.5889, "step": 21 }, { "epoch": 0.0, "grad_norm": 0.570929229259491, "learning_rate": 0.0005999998087855924, "loss": 6.3548, "step": 22 }, { "epoch": 0.0, "grad_norm": 0.62796950340271, "learning_rate": 0.0005999997910073954, "loss": 6.3005, "step": 23 }, { "epoch": 0.0, "grad_norm": 0.5817890167236328, "learning_rate": 0.0005999997724390567, "loss": 6.4104, "step": 24 }, { "epoch": 0.0, "grad_norm": 0.5530886650085449, "learning_rate": 0.0005999997530805763, "loss": 6.349, "step": 25 }, { "epoch": 0.0, "grad_norm": 0.643932580947876, "learning_rate": 0.0005999997329319543, "loss": 6.3305, "step": 26 }, { "epoch": 0.0, "grad_norm": 0.5512265563011169, "learning_rate": 0.0005999997119931908, "loss": 6.3231, "step": 27 }, { "epoch": 0.0, "grad_norm": 0.6873916387557983, "learning_rate": 0.0005999996902642858, "loss": 6.1347, "step": 28 }, { "epoch": 0.0, "grad_norm": 0.7107514142990112, "learning_rate": 0.0005999996677452392, "loss": 6.3309, "step": 29 }, { "epoch": 0.0, "grad_norm": 0.6492154002189636, "learning_rate": 0.0005999996444360514, "loss": 6.1696, "step": 30 }, { "epoch": 0.0, "grad_norm": 0.5806317925453186, "learning_rate": 0.0005999996203367222, "loss": 6.1405, "step": 31 }, { "epoch": 0.0, "grad_norm": 0.6508877873420715, "learning_rate": 0.0005999995954472518, "loss": 6.1023, "step": 32 }, { "epoch": 0.0, "grad_norm": 0.7064071893692017, "learning_rate": 0.00059999956976764, "loss": 6.0525, "step": 33 }, { "epoch": 0.0, "grad_norm": 0.6533799767494202, "learning_rate": 0.0005999995432978872, "loss": 6.2293, "step": 34 }, { "epoch": 0.0, "grad_norm": 0.5126307606697083, "learning_rate": 0.0005999995160379933, "loss": 6.0087, "step": 35 }, { "epoch": 0.0, "grad_norm": 0.6564686298370361, "learning_rate": 0.0005999994879879586, "loss": 6.1167, "step": 36 }, { "epoch": 0.0, "grad_norm": 0.6836735606193542, "learning_rate": 0.0005999994591477828, "loss": 5.8947, "step": 37 }, { "epoch": 0.0, "grad_norm": 0.6649318337440491, "learning_rate": 0.0005999994295174661, "loss": 6.0403, "step": 38 }, { "epoch": 0.0, "grad_norm": 0.6308470368385315, "learning_rate": 0.0005999993990970087, "loss": 6.0636, "step": 39 }, { "epoch": 0.0, "grad_norm": 0.5832207202911377, "learning_rate": 0.0005999993678864107, "loss": 6.0658, "step": 40 }, { "epoch": 0.0, "grad_norm": 0.5933493971824646, "learning_rate": 0.000599999335885672, "loss": 6.07, "step": 41 }, { "epoch": 0.0, "grad_norm": 0.6838207244873047, "learning_rate": 0.0005999993030947929, "loss": 5.8559, "step": 42 }, { "epoch": 0.0, "grad_norm": 0.6562783718109131, "learning_rate": 0.0005999992695137733, "loss": 5.7866, "step": 43 }, { "epoch": 0.0, "grad_norm": 0.6013045310974121, "learning_rate": 0.0005999992351426134, "loss": 5.631, "step": 44 }, { "epoch": 0.0, "grad_norm": 0.579399824142456, "learning_rate": 0.0005999991999813132, "loss": 6.0032, "step": 45 }, { "epoch": 0.0, "grad_norm": 0.6205735206604004, "learning_rate": 0.0005999991640298728, "loss": 5.4523, "step": 46 }, { "epoch": 0.0, "grad_norm": 0.5519394874572754, "learning_rate": 0.0005999991272882925, "loss": 5.6271, "step": 47 }, { "epoch": 0.0, "grad_norm": 0.5361047983169556, "learning_rate": 0.0005999990897565721, "loss": 6.0044, "step": 48 }, { "epoch": 0.0, "grad_norm": 0.6790111064910889, "learning_rate": 0.0005999990514347119, "loss": 5.683, "step": 49 }, { "epoch": 0.0, "grad_norm": 0.6192212700843811, "learning_rate": 0.0005999990123227119, "loss": 5.8079, "step": 50 }, { "epoch": 0.0, "grad_norm": 0.6024751663208008, "learning_rate": 0.0005999989724205721, "loss": 5.7658, "step": 51 }, { "epoch": 0.0, "grad_norm": 0.7180526852607727, "learning_rate": 0.000599998931728293, "loss": 5.7054, "step": 52 }, { "epoch": 0.0, "grad_norm": 0.7811439037322998, "learning_rate": 0.0005999988902458743, "loss": 5.7338, "step": 53 }, { "epoch": 0.0, "grad_norm": 0.678833544254303, "learning_rate": 0.0005999988479733163, "loss": 5.7092, "step": 54 }, { "epoch": 0.0, "grad_norm": 0.633277952671051, "learning_rate": 0.0005999988049106191, "loss": 5.6259, "step": 55 }, { "epoch": 0.0, "grad_norm": 1.4287935495376587, "learning_rate": 0.0005999987610577827, "loss": 5.6371, "step": 56 }, { "epoch": 0.0, "grad_norm": 0.7538682818412781, "learning_rate": 0.0005999987164148074, "loss": 5.7429, "step": 57 }, { "epoch": 0.0, "grad_norm": 0.5953795909881592, "learning_rate": 0.0005999986709816932, "loss": 5.7226, "step": 58 }, { "epoch": 0.0, "grad_norm": 0.7172217965126038, "learning_rate": 0.0005999986247584401, "loss": 5.5155, "step": 59 }, { "epoch": 0.0, "grad_norm": 0.7222607135772705, "learning_rate": 0.0005999985777450485, "loss": 5.4241, "step": 60 }, { "epoch": 0.0, "grad_norm": 0.7364643216133118, "learning_rate": 0.0005999985299415183, "loss": 5.5181, "step": 61 }, { "epoch": 0.0, "grad_norm": 0.7325772047042847, "learning_rate": 0.0005999984813478497, "loss": 5.6243, "step": 62 }, { "epoch": 0.0, "grad_norm": 0.6013771891593933, "learning_rate": 0.000599998431964043, "loss": 5.5167, "step": 63 }, { "epoch": 0.0, "grad_norm": 0.5635715126991272, "learning_rate": 0.000599998381790098, "loss": 5.7081, "step": 64 }, { "epoch": 0.0, "grad_norm": 0.6563093662261963, "learning_rate": 0.000599998330826015, "loss": 5.4648, "step": 65 }, { "epoch": 0.0, "grad_norm": 0.5509313941001892, "learning_rate": 0.0005999982790717942, "loss": 5.4311, "step": 66 }, { "epoch": 0.0, "grad_norm": 0.7320454716682434, "learning_rate": 0.0005999982265274357, "loss": 5.5097, "step": 67 }, { "epoch": 0.0, "grad_norm": 0.6863231062889099, "learning_rate": 0.0005999981731929395, "loss": 5.5005, "step": 68 }, { "epoch": 0.0, "grad_norm": 0.543449342250824, "learning_rate": 0.0005999981190683059, "loss": 5.5048, "step": 69 }, { "epoch": 0.0, "grad_norm": 0.5858511328697205, "learning_rate": 0.000599998064153535, "loss": 5.5506, "step": 70 }, { "epoch": 0.0, "grad_norm": 0.7430124878883362, "learning_rate": 0.0005999980084486269, "loss": 5.2905, "step": 71 }, { "epoch": 0.0, "grad_norm": 0.5860936045646667, "learning_rate": 0.0005999979519535819, "loss": 5.5213, "step": 72 }, { "epoch": 0.0, "grad_norm": 0.5965762138366699, "learning_rate": 0.0005999978946683999, "loss": 5.1871, "step": 73 }, { "epoch": 0.0, "grad_norm": 0.6979895234107971, "learning_rate": 0.0005999978365930813, "loss": 5.3346, "step": 74 }, { "epoch": 0.0, "grad_norm": 0.7160101532936096, "learning_rate": 0.000599997777727626, "loss": 5.1066, "step": 75 }, { "epoch": 0.0, "grad_norm": 0.5626764297485352, "learning_rate": 0.0005999977180720344, "loss": 5.459, "step": 76 }, { "epoch": 0.0, "grad_norm": 0.6080682277679443, "learning_rate": 0.0005999976576263064, "loss": 5.6085, "step": 77 }, { "epoch": 0.0, "grad_norm": 0.6767348647117615, "learning_rate": 0.0005999975963904425, "loss": 5.4253, "step": 78 }, { "epoch": 0.0, "grad_norm": 0.5835095047950745, "learning_rate": 0.0005999975343644425, "loss": 5.3682, "step": 79 }, { "epoch": 0.0, "grad_norm": 0.5829843282699585, "learning_rate": 0.0005999974715483068, "loss": 5.2841, "step": 80 }, { "epoch": 0.0, "grad_norm": 0.6556689739227295, "learning_rate": 0.0005999974079420355, "loss": 5.3828, "step": 81 }, { "epoch": 0.0, "grad_norm": 0.5656118392944336, "learning_rate": 0.0005999973435456287, "loss": 5.4779, "step": 82 }, { "epoch": 0.0, "grad_norm": 0.553550660610199, "learning_rate": 0.0005999972783590868, "loss": 5.4759, "step": 83 }, { "epoch": 0.0, "grad_norm": 0.5812600255012512, "learning_rate": 0.0005999972123824096, "loss": 5.3517, "step": 84 }, { "epoch": 0.0, "grad_norm": 0.6190153956413269, "learning_rate": 0.0005999971456155976, "loss": 5.4019, "step": 85 }, { "epoch": 0.0, "grad_norm": 0.5636712312698364, "learning_rate": 0.0005999970780586508, "loss": 5.2974, "step": 86 }, { "epoch": 0.0, "grad_norm": 0.546588659286499, "learning_rate": 0.0005999970097115694, "loss": 5.4874, "step": 87 }, { "epoch": 0.0, "grad_norm": 0.6141658425331116, "learning_rate": 0.0005999969405743537, "loss": 5.431, "step": 88 }, { "epoch": 0.0, "grad_norm": 0.5903357267379761, "learning_rate": 0.0005999968706470037, "loss": 5.4673, "step": 89 }, { "epoch": 0.0, "grad_norm": 0.5161350965499878, "learning_rate": 0.0005999967999295199, "loss": 5.2426, "step": 90 }, { "epoch": 0.0, "grad_norm": 0.6005253791809082, "learning_rate": 0.000599996728421902, "loss": 5.4007, "step": 91 }, { "epoch": 0.0, "grad_norm": 0.5921294093132019, "learning_rate": 0.0005999966561241506, "loss": 5.3612, "step": 92 }, { "epoch": 0.0, "grad_norm": 0.5736501812934875, "learning_rate": 0.0005999965830362657, "loss": 5.327, "step": 93 }, { "epoch": 0.0, "grad_norm": 0.5713847279548645, "learning_rate": 0.0005999965091582475, "loss": 5.2216, "step": 94 }, { "epoch": 0.0, "grad_norm": 0.583519697189331, "learning_rate": 0.0005999964344900962, "loss": 5.101, "step": 95 }, { "epoch": 0.0, "grad_norm": 0.6061059236526489, "learning_rate": 0.0005999963590318122, "loss": 5.3313, "step": 96 }, { "epoch": 0.0, "grad_norm": 0.605904221534729, "learning_rate": 0.0005999962827833954, "loss": 5.3037, "step": 97 }, { "epoch": 0.0, "grad_norm": 0.5677119493484497, "learning_rate": 0.0005999962057448462, "loss": 5.2484, "step": 98 }, { "epoch": 0.0, "grad_norm": 0.6556852459907532, "learning_rate": 0.0005999961279161646, "loss": 5.3307, "step": 99 }, { "epoch": 0.0, "grad_norm": 0.6364889144897461, "learning_rate": 0.000599996049297351, "loss": 5.3118, "step": 100 }, { "epoch": 0.0, "grad_norm": 0.6421851515769958, "learning_rate": 0.0005999959698884056, "loss": 5.3212, "step": 101 }, { "epoch": 0.0, "grad_norm": 0.5902762413024902, "learning_rate": 0.0005999958896893285, "loss": 5.2373, "step": 102 }, { "epoch": 0.01, "grad_norm": 0.5422645211219788, "learning_rate": 0.00059999580870012, "loss": 5.3965, "step": 103 }, { "epoch": 0.01, "grad_norm": 0.6656525731086731, "learning_rate": 0.0005999957269207802, "loss": 5.3206, "step": 104 }, { "epoch": 0.01, "grad_norm": 0.6076167821884155, "learning_rate": 0.0005999956443513094, "loss": 5.2297, "step": 105 }, { "epoch": 0.01, "grad_norm": 0.5958263278007507, "learning_rate": 0.0005999955609917078, "loss": 5.2308, "step": 106 }, { "epoch": 0.01, "grad_norm": 0.6043804287910461, "learning_rate": 0.0005999954768419757, "loss": 5.3056, "step": 107 }, { "epoch": 0.01, "grad_norm": 0.5793024301528931, "learning_rate": 0.0005999953919021132, "loss": 5.1175, "step": 108 }, { "epoch": 0.01, "grad_norm": 0.6292415857315063, "learning_rate": 0.0005999953061721206, "loss": 5.2404, "step": 109 }, { "epoch": 0.01, "grad_norm": 0.539836585521698, "learning_rate": 0.0005999952196519981, "loss": 5.3884, "step": 110 }, { "epoch": 0.01, "grad_norm": 0.5717549324035645, "learning_rate": 0.0005999951323417458, "loss": 5.172, "step": 111 }, { "epoch": 0.01, "grad_norm": 0.5996467471122742, "learning_rate": 0.0005999950442413642, "loss": 5.1867, "step": 112 }, { "epoch": 0.01, "grad_norm": 0.6268661022186279, "learning_rate": 0.0005999949553508534, "loss": 5.2663, "step": 113 }, { "epoch": 0.01, "grad_norm": 0.6602498888969421, "learning_rate": 0.0005999948656702136, "loss": 5.0668, "step": 114 }, { "epoch": 0.01, "grad_norm": 0.5790102481842041, "learning_rate": 0.0005999947751994451, "loss": 5.1133, "step": 115 }, { "epoch": 0.01, "grad_norm": 0.6533108949661255, "learning_rate": 0.000599994683938548, "loss": 5.1798, "step": 116 }, { "epoch": 0.01, "grad_norm": 0.6445215344429016, "learning_rate": 0.0005999945918875227, "loss": 5.1873, "step": 117 }, { "epoch": 0.01, "grad_norm": 0.5656430125236511, "learning_rate": 0.0005999944990463693, "loss": 5.0912, "step": 118 }, { "epoch": 0.01, "grad_norm": 0.5833689570426941, "learning_rate": 0.0005999944054150882, "loss": 5.1466, "step": 119 }, { "epoch": 0.01, "grad_norm": 0.6031045317649841, "learning_rate": 0.0005999943109936796, "loss": 5.1367, "step": 120 }, { "epoch": 0.01, "grad_norm": 0.6581068634986877, "learning_rate": 0.0005999942157821436, "loss": 5.1474, "step": 121 }, { "epoch": 0.01, "grad_norm": 0.5878651738166809, "learning_rate": 0.0005999941197804806, "loss": 5.116, "step": 122 }, { "epoch": 0.01, "grad_norm": 0.6598708629608154, "learning_rate": 0.0005999940229886909, "loss": 5.2586, "step": 123 }, { "epoch": 0.01, "grad_norm": 0.6158697605133057, "learning_rate": 0.0005999939254067746, "loss": 5.2573, "step": 124 }, { "epoch": 0.01, "grad_norm": 0.624995231628418, "learning_rate": 0.0005999938270347321, "loss": 5.2498, "step": 125 }, { "epoch": 0.01, "grad_norm": 0.5899373292922974, "learning_rate": 0.0005999937278725636, "loss": 5.2714, "step": 126 }, { "epoch": 0.01, "grad_norm": 0.5740645527839661, "learning_rate": 0.0005999936279202693, "loss": 5.1358, "step": 127 }, { "epoch": 0.01, "grad_norm": 0.609225332736969, "learning_rate": 0.0005999935271778495, "loss": 5.2404, "step": 128 }, { "epoch": 0.01, "grad_norm": 0.6659890413284302, "learning_rate": 0.0005999934256453046, "loss": 4.9877, "step": 129 }, { "epoch": 0.01, "grad_norm": 0.5626158714294434, "learning_rate": 0.0005999933233226347, "loss": 5.2386, "step": 130 }, { "epoch": 0.01, "grad_norm": 0.5483737587928772, "learning_rate": 0.0005999932202098401, "loss": 5.2031, "step": 131 }, { "epoch": 0.01, "grad_norm": 0.6274495124816895, "learning_rate": 0.0005999931163069211, "loss": 5.1119, "step": 132 }, { "epoch": 0.01, "grad_norm": 0.6077089309692383, "learning_rate": 0.000599993011613878, "loss": 4.9585, "step": 133 }, { "epoch": 0.01, "grad_norm": 0.5846816301345825, "learning_rate": 0.000599992906130711, "loss": 5.2099, "step": 134 }, { "epoch": 0.01, "grad_norm": 0.6238222718238831, "learning_rate": 0.0005999927998574204, "loss": 5.0893, "step": 135 }, { "epoch": 0.01, "grad_norm": 0.720881462097168, "learning_rate": 0.0005999926927940066, "loss": 5.0845, "step": 136 }, { "epoch": 0.01, "grad_norm": 0.4975273013114929, "learning_rate": 0.0005999925849404696, "loss": 5.0894, "step": 137 }, { "epoch": 0.01, "grad_norm": 0.6107990741729736, "learning_rate": 0.00059999247629681, "loss": 5.0879, "step": 138 }, { "epoch": 0.01, "grad_norm": 0.6482533812522888, "learning_rate": 0.000599992366863028, "loss": 5.1011, "step": 139 }, { "epoch": 0.01, "grad_norm": 0.5770416855812073, "learning_rate": 0.0005999922566391237, "loss": 4.9711, "step": 140 }, { "epoch": 0.01, "grad_norm": 0.5987694263458252, "learning_rate": 0.0005999921456250976, "loss": 5.2741, "step": 141 }, { "epoch": 0.01, "grad_norm": 0.5902721285820007, "learning_rate": 0.00059999203382095, "loss": 4.9697, "step": 142 }, { "epoch": 0.01, "grad_norm": 0.5815165042877197, "learning_rate": 0.000599991921226681, "loss": 5.0085, "step": 143 }, { "epoch": 0.01, "grad_norm": 0.6174842119216919, "learning_rate": 0.000599991807842291, "loss": 5.1251, "step": 144 }, { "epoch": 0.01, "grad_norm": 0.5789567232131958, "learning_rate": 0.0005999916936677804, "loss": 5.0212, "step": 145 }, { "epoch": 0.01, "grad_norm": 0.5186668634414673, "learning_rate": 0.0005999915787031493, "loss": 5.0934, "step": 146 }, { "epoch": 0.01, "grad_norm": 0.5524886250495911, "learning_rate": 0.0005999914629483983, "loss": 5.0444, "step": 147 }, { "epoch": 0.01, "grad_norm": 0.6883447766304016, "learning_rate": 0.0005999913464035273, "loss": 5.0623, "step": 148 }, { "epoch": 0.01, "grad_norm": 0.5598011016845703, "learning_rate": 0.000599991229068537, "loss": 5.1861, "step": 149 }, { "epoch": 0.01, "grad_norm": 0.6614800691604614, "learning_rate": 0.0005999911109434274, "loss": 4.9011, "step": 150 }, { "epoch": 0.01, "grad_norm": 0.5990419983863831, "learning_rate": 0.0005999909920281991, "loss": 4.9618, "step": 151 }, { "epoch": 0.01, "grad_norm": 0.5454047322273254, "learning_rate": 0.0005999908723228522, "loss": 5.1169, "step": 152 }, { "epoch": 0.01, "grad_norm": 0.6134856939315796, "learning_rate": 0.000599990751827387, "loss": 5.0239, "step": 153 }, { "epoch": 0.01, "grad_norm": 0.6012156009674072, "learning_rate": 0.000599990630541804, "loss": 5.24, "step": 154 }, { "epoch": 0.01, "grad_norm": 0.5873432755470276, "learning_rate": 0.0005999905084661033, "loss": 5.1557, "step": 155 }, { "epoch": 0.01, "grad_norm": 0.6696448922157288, "learning_rate": 0.0005999903856002855, "loss": 5.0485, "step": 156 }, { "epoch": 0.01, "grad_norm": 0.5300459265708923, "learning_rate": 0.0005999902619443507, "loss": 5.0126, "step": 157 }, { "epoch": 0.01, "grad_norm": 0.5879744291305542, "learning_rate": 0.0005999901374982992, "loss": 4.8251, "step": 158 }, { "epoch": 0.01, "grad_norm": 0.6002203822135925, "learning_rate": 0.0005999900122621314, "loss": 4.8551, "step": 159 }, { "epoch": 0.01, "grad_norm": 0.584845244884491, "learning_rate": 0.0005999898862358478, "loss": 5.0479, "step": 160 }, { "epoch": 0.01, "grad_norm": 0.7027909755706787, "learning_rate": 0.0005999897594194485, "loss": 5.1492, "step": 161 }, { "epoch": 0.01, "grad_norm": 0.6529715061187744, "learning_rate": 0.0005999896318129339, "loss": 5.0112, "step": 162 }, { "epoch": 0.01, "grad_norm": 0.7082669734954834, "learning_rate": 0.0005999895034163043, "loss": 4.8184, "step": 163 }, { "epoch": 0.01, "grad_norm": 0.6252927184104919, "learning_rate": 0.0005999893742295602, "loss": 4.8673, "step": 164 }, { "epoch": 0.01, "grad_norm": 0.6685312390327454, "learning_rate": 0.0005999892442527017, "loss": 4.9796, "step": 165 }, { "epoch": 0.01, "grad_norm": 0.6606252193450928, "learning_rate": 0.0005999891134857293, "loss": 5.0516, "step": 166 }, { "epoch": 0.01, "grad_norm": 0.6550689339637756, "learning_rate": 0.0005999889819286433, "loss": 4.9988, "step": 167 }, { "epoch": 0.01, "grad_norm": 0.6406249403953552, "learning_rate": 0.000599988849581444, "loss": 5.2205, "step": 168 }, { "epoch": 0.01, "grad_norm": 0.5724087953567505, "learning_rate": 0.0005999887164441319, "loss": 4.8884, "step": 169 }, { "epoch": 0.01, "grad_norm": 0.6238783597946167, "learning_rate": 0.0005999885825167073, "loss": 5.0775, "step": 170 }, { "epoch": 0.01, "grad_norm": 0.6989482641220093, "learning_rate": 0.0005999884477991703, "loss": 4.9027, "step": 171 }, { "epoch": 0.01, "grad_norm": 0.5641810894012451, "learning_rate": 0.0005999883122915216, "loss": 5.0401, "step": 172 }, { "epoch": 0.01, "grad_norm": 0.5841800570487976, "learning_rate": 0.0005999881759937614, "loss": 4.9143, "step": 173 }, { "epoch": 0.01, "grad_norm": 0.6667760610580444, "learning_rate": 0.0005999880389058902, "loss": 4.7318, "step": 174 }, { "epoch": 0.01, "grad_norm": 0.5549502372741699, "learning_rate": 0.0005999879010279082, "loss": 4.9401, "step": 175 }, { "epoch": 0.01, "grad_norm": 0.562578022480011, "learning_rate": 0.0005999877623598156, "loss": 4.8758, "step": 176 }, { "epoch": 0.01, "grad_norm": 0.6487370729446411, "learning_rate": 0.0005999876229016132, "loss": 4.8428, "step": 177 }, { "epoch": 0.01, "grad_norm": 0.5677328705787659, "learning_rate": 0.000599987482653301, "loss": 4.94, "step": 178 }, { "epoch": 0.01, "grad_norm": 0.5713781714439392, "learning_rate": 0.0005999873416148795, "loss": 4.9779, "step": 179 }, { "epoch": 0.01, "grad_norm": 0.6296773552894592, "learning_rate": 0.0005999871997863491, "loss": 5.0549, "step": 180 }, { "epoch": 0.01, "grad_norm": 0.609196126461029, "learning_rate": 0.0005999870571677102, "loss": 4.8211, "step": 181 }, { "epoch": 0.01, "grad_norm": 0.5543152689933777, "learning_rate": 0.0005999869137589631, "loss": 4.9272, "step": 182 }, { "epoch": 0.01, "grad_norm": 0.6066011190414429, "learning_rate": 0.0005999867695601082, "loss": 4.9566, "step": 183 }, { "epoch": 0.01, "grad_norm": 0.5656384229660034, "learning_rate": 0.0005999866245711458, "loss": 4.9814, "step": 184 }, { "epoch": 0.01, "grad_norm": 0.6074258685112, "learning_rate": 0.0005999864787920765, "loss": 4.723, "step": 185 }, { "epoch": 0.01, "grad_norm": 0.5915899872779846, "learning_rate": 0.0005999863322229005, "loss": 4.6722, "step": 186 }, { "epoch": 0.01, "grad_norm": 0.5853182673454285, "learning_rate": 0.0005999861848636183, "loss": 4.8963, "step": 187 }, { "epoch": 0.01, "grad_norm": 0.5973837375640869, "learning_rate": 0.00059998603671423, "loss": 4.9082, "step": 188 }, { "epoch": 0.01, "grad_norm": 0.5674445033073425, "learning_rate": 0.0005999858877747365, "loss": 4.8848, "step": 189 }, { "epoch": 0.01, "grad_norm": 0.6249287128448486, "learning_rate": 0.0005999857380451376, "loss": 4.7804, "step": 190 }, { "epoch": 0.01, "grad_norm": 0.5839101076126099, "learning_rate": 0.0005999855875254343, "loss": 4.9382, "step": 191 }, { "epoch": 0.01, "grad_norm": 0.6714131236076355, "learning_rate": 0.0005999854362156265, "loss": 4.9891, "step": 192 }, { "epoch": 0.01, "grad_norm": 0.6222569942474365, "learning_rate": 0.0005999852841157148, "loss": 4.8221, "step": 193 }, { "epoch": 0.01, "grad_norm": 0.671981692314148, "learning_rate": 0.0005999851312256997, "loss": 4.7026, "step": 194 }, { "epoch": 0.01, "grad_norm": 0.5763186812400818, "learning_rate": 0.0005999849775455815, "loss": 4.816, "step": 195 }, { "epoch": 0.01, "grad_norm": 0.5959866642951965, "learning_rate": 0.0005999848230753605, "loss": 4.9962, "step": 196 }, { "epoch": 0.01, "grad_norm": 0.6573556661605835, "learning_rate": 0.0005999846678150372, "loss": 4.8953, "step": 197 }, { "epoch": 0.01, "grad_norm": 0.6091251373291016, "learning_rate": 0.0005999845117646122, "loss": 4.9322, "step": 198 }, { "epoch": 0.01, "grad_norm": 0.6092210412025452, "learning_rate": 0.0005999843549240856, "loss": 5.0708, "step": 199 }, { "epoch": 0.01, "grad_norm": 0.6042549014091492, "learning_rate": 0.000599984197293458, "loss": 4.8596, "step": 200 }, { "epoch": 0.01, "grad_norm": 0.6179243326187134, "learning_rate": 0.0005999840388727296, "loss": 4.9343, "step": 201 }, { "epoch": 0.01, "grad_norm": 0.558975875377655, "learning_rate": 0.0005999838796619012, "loss": 4.736, "step": 202 }, { "epoch": 0.01, "grad_norm": 0.6428976058959961, "learning_rate": 0.0005999837196609727, "loss": 4.7473, "step": 203 }, { "epoch": 0.01, "grad_norm": 0.6300376057624817, "learning_rate": 0.0005999835588699451, "loss": 4.5575, "step": 204 }, { "epoch": 0.01, "grad_norm": 0.6382180452346802, "learning_rate": 0.0005999833972888184, "loss": 4.649, "step": 205 }, { "epoch": 0.01, "grad_norm": 0.6212637424468994, "learning_rate": 0.0005999832349175932, "loss": 4.9914, "step": 206 }, { "epoch": 0.01, "grad_norm": 0.5995153188705444, "learning_rate": 0.0005999830717562699, "loss": 4.7988, "step": 207 }, { "epoch": 0.01, "grad_norm": 0.6228866577148438, "learning_rate": 0.0005999829078048489, "loss": 4.7471, "step": 208 }, { "epoch": 0.01, "grad_norm": 0.5948235988616943, "learning_rate": 0.0005999827430633307, "loss": 4.7403, "step": 209 }, { "epoch": 0.01, "grad_norm": 0.5553930401802063, "learning_rate": 0.0005999825775317156, "loss": 4.7738, "step": 210 }, { "epoch": 0.01, "grad_norm": 0.58103346824646, "learning_rate": 0.0005999824112100042, "loss": 4.8885, "step": 211 }, { "epoch": 0.01, "grad_norm": 0.6065656542778015, "learning_rate": 0.0005999822440981968, "loss": 4.7514, "step": 212 }, { "epoch": 0.01, "grad_norm": 0.6187020540237427, "learning_rate": 0.000599982076196294, "loss": 4.7373, "step": 213 }, { "epoch": 0.01, "grad_norm": 0.6189316511154175, "learning_rate": 0.0005999819075042961, "loss": 4.7754, "step": 214 }, { "epoch": 0.01, "grad_norm": 0.6775714159011841, "learning_rate": 0.0005999817380222035, "loss": 4.7958, "step": 215 }, { "epoch": 0.01, "grad_norm": 0.6709480285644531, "learning_rate": 0.0005999815677500168, "loss": 4.6884, "step": 216 }, { "epoch": 0.01, "grad_norm": 0.5949779152870178, "learning_rate": 0.0005999813966877363, "loss": 4.7673, "step": 217 }, { "epoch": 0.01, "grad_norm": 0.6341105699539185, "learning_rate": 0.0005999812248353626, "loss": 4.9962, "step": 218 }, { "epoch": 0.01, "grad_norm": 0.7519387006759644, "learning_rate": 0.0005999810521928962, "loss": 4.685, "step": 219 }, { "epoch": 0.01, "grad_norm": 0.6779834628105164, "learning_rate": 0.0005999808787603373, "loss": 4.8129, "step": 220 }, { "epoch": 0.01, "grad_norm": 0.6366468667984009, "learning_rate": 0.0005999807045376866, "loss": 4.7325, "step": 221 }, { "epoch": 0.01, "grad_norm": 0.6042128205299377, "learning_rate": 0.0005999805295249444, "loss": 4.6506, "step": 222 }, { "epoch": 0.01, "grad_norm": 0.7438672780990601, "learning_rate": 0.0005999803537221112, "loss": 4.741, "step": 223 }, { "epoch": 0.01, "grad_norm": 0.6630218029022217, "learning_rate": 0.0005999801771291876, "loss": 4.7997, "step": 224 }, { "epoch": 0.01, "grad_norm": 0.5924032330513, "learning_rate": 0.0005999799997461738, "loss": 4.6237, "step": 225 }, { "epoch": 0.01, "grad_norm": 0.6375287175178528, "learning_rate": 0.0005999798215730705, "loss": 4.6495, "step": 226 }, { "epoch": 0.01, "grad_norm": 0.6222159266471863, "learning_rate": 0.000599979642609878, "loss": 4.7176, "step": 227 }, { "epoch": 0.01, "grad_norm": 0.6462298631668091, "learning_rate": 0.0005999794628565969, "loss": 4.79, "step": 228 }, { "epoch": 0.01, "grad_norm": 0.5806784629821777, "learning_rate": 0.0005999792823132276, "loss": 4.7, "step": 229 }, { "epoch": 0.01, "grad_norm": 0.6010195016860962, "learning_rate": 0.0005999791009797706, "loss": 4.7524, "step": 230 }, { "epoch": 0.01, "grad_norm": 0.594200849533081, "learning_rate": 0.0005999789188562264, "loss": 4.8351, "step": 231 }, { "epoch": 0.01, "grad_norm": 0.647185206413269, "learning_rate": 0.0005999787359425954, "loss": 4.7011, "step": 232 }, { "epoch": 0.01, "grad_norm": 0.6233699321746826, "learning_rate": 0.0005999785522388783, "loss": 4.8662, "step": 233 }, { "epoch": 0.01, "grad_norm": 0.6353042721748352, "learning_rate": 0.0005999783677450753, "loss": 4.5278, "step": 234 }, { "epoch": 0.01, "grad_norm": 0.6044039130210876, "learning_rate": 0.000599978182461187, "loss": 4.7688, "step": 235 }, { "epoch": 0.01, "grad_norm": 0.5654269456863403, "learning_rate": 0.000599977996387214, "loss": 4.7677, "step": 236 }, { "epoch": 0.01, "grad_norm": 0.6043969988822937, "learning_rate": 0.0005999778095231566, "loss": 4.6497, "step": 237 }, { "epoch": 0.01, "grad_norm": 0.6131507754325867, "learning_rate": 0.0005999776218690154, "loss": 4.4584, "step": 238 }, { "epoch": 0.01, "grad_norm": 0.6189717650413513, "learning_rate": 0.0005999774334247909, "loss": 4.7738, "step": 239 }, { "epoch": 0.01, "grad_norm": 0.6453130841255188, "learning_rate": 0.0005999772441904836, "loss": 4.7744, "step": 240 }, { "epoch": 0.01, "grad_norm": 0.5762782096862793, "learning_rate": 0.0005999770541660939, "loss": 4.6736, "step": 241 }, { "epoch": 0.01, "grad_norm": 0.6359663009643555, "learning_rate": 0.0005999768633516224, "loss": 4.6294, "step": 242 }, { "epoch": 0.01, "grad_norm": 0.5880213379859924, "learning_rate": 0.0005999766717470696, "loss": 4.7633, "step": 243 }, { "epoch": 0.01, "grad_norm": 0.5674964189529419, "learning_rate": 0.0005999764793524359, "loss": 4.9202, "step": 244 }, { "epoch": 0.01, "grad_norm": 0.6202216744422913, "learning_rate": 0.000599976286167722, "loss": 4.8709, "step": 245 }, { "epoch": 0.01, "grad_norm": 0.6854368448257446, "learning_rate": 0.0005999760921929283, "loss": 4.5826, "step": 246 }, { "epoch": 0.01, "grad_norm": 0.6487476825714111, "learning_rate": 0.0005999758974280551, "loss": 4.6156, "step": 247 }, { "epoch": 0.01, "grad_norm": 0.650581955909729, "learning_rate": 0.0005999757018731034, "loss": 4.6199, "step": 248 }, { "epoch": 0.01, "grad_norm": 0.6073902249336243, "learning_rate": 0.0005999755055280732, "loss": 4.6756, "step": 249 }, { "epoch": 0.01, "grad_norm": 0.8026038408279419, "learning_rate": 0.0005999753083929653, "loss": 4.6487, "step": 250 }, { "epoch": 0.01, "grad_norm": 0.5802706480026245, "learning_rate": 0.0005999751104677803, "loss": 4.8061, "step": 251 }, { "epoch": 0.01, "grad_norm": 0.6185906529426575, "learning_rate": 0.0005999749117525185, "loss": 4.5397, "step": 252 }, { "epoch": 0.01, "grad_norm": 0.6637834310531616, "learning_rate": 0.0005999747122471805, "loss": 4.7092, "step": 253 }, { "epoch": 0.01, "grad_norm": 0.6466782093048096, "learning_rate": 0.0005999745119517669, "loss": 4.8699, "step": 254 }, { "epoch": 0.01, "grad_norm": 0.7292912602424622, "learning_rate": 0.0005999743108662781, "loss": 4.6157, "step": 255 }, { "epoch": 0.01, "grad_norm": 0.6685052514076233, "learning_rate": 0.0005999741089907146, "loss": 4.4988, "step": 256 }, { "epoch": 0.01, "grad_norm": 0.6441407799720764, "learning_rate": 0.0005999739063250772, "loss": 4.7335, "step": 257 }, { "epoch": 0.01, "grad_norm": 0.7168517708778381, "learning_rate": 0.0005999737028693662, "loss": 4.7464, "step": 258 }, { "epoch": 0.01, "grad_norm": 0.5829117298126221, "learning_rate": 0.0005999734986235822, "loss": 4.6709, "step": 259 }, { "epoch": 0.01, "grad_norm": 0.6130709052085876, "learning_rate": 0.0005999732935877258, "loss": 4.942, "step": 260 }, { "epoch": 0.01, "grad_norm": 0.5860216617584229, "learning_rate": 0.0005999730877617974, "loss": 4.8475, "step": 261 }, { "epoch": 0.01, "grad_norm": 0.6693971157073975, "learning_rate": 0.0005999728811457975, "loss": 4.6322, "step": 262 }, { "epoch": 0.01, "grad_norm": 0.6591095924377441, "learning_rate": 0.0005999726737397269, "loss": 4.5757, "step": 263 }, { "epoch": 0.01, "grad_norm": 0.6148874759674072, "learning_rate": 0.0005999724655435862, "loss": 4.6693, "step": 264 }, { "epoch": 0.01, "grad_norm": 0.6252702474594116, "learning_rate": 0.0005999722565573754, "loss": 4.8816, "step": 265 }, { "epoch": 0.01, "grad_norm": 0.6648889780044556, "learning_rate": 0.0005999720467810956, "loss": 4.8358, "step": 266 }, { "epoch": 0.01, "grad_norm": 0.6808665990829468, "learning_rate": 0.0005999718362147471, "loss": 4.7751, "step": 267 }, { "epoch": 0.01, "grad_norm": 0.6691820621490479, "learning_rate": 0.0005999716248583306, "loss": 4.7164, "step": 268 }, { "epoch": 0.01, "grad_norm": 0.7447707653045654, "learning_rate": 0.0005999714127118465, "loss": 4.466, "step": 269 }, { "epoch": 0.01, "grad_norm": 0.6744222640991211, "learning_rate": 0.0005999711997752954, "loss": 4.3235, "step": 270 }, { "epoch": 0.01, "grad_norm": 0.7479273676872253, "learning_rate": 0.000599970986048678, "loss": 4.63, "step": 271 }, { "epoch": 0.01, "grad_norm": 0.7194980382919312, "learning_rate": 0.0005999707715319947, "loss": 4.4956, "step": 272 }, { "epoch": 0.01, "grad_norm": 0.6701153516769409, "learning_rate": 0.000599970556225246, "loss": 4.5292, "step": 273 }, { "epoch": 0.01, "grad_norm": 0.653418779373169, "learning_rate": 0.0005999703401284328, "loss": 4.734, "step": 274 }, { "epoch": 0.01, "grad_norm": 0.6312591433525085, "learning_rate": 0.0005999701232415552, "loss": 4.6344, "step": 275 }, { "epoch": 0.01, "grad_norm": 0.6036155223846436, "learning_rate": 0.0005999699055646143, "loss": 4.5935, "step": 276 }, { "epoch": 0.01, "grad_norm": 0.6546796560287476, "learning_rate": 0.0005999696870976103, "loss": 4.4745, "step": 277 }, { "epoch": 0.01, "grad_norm": 0.6474147439002991, "learning_rate": 0.0005999694678405438, "loss": 4.3296, "step": 278 }, { "epoch": 0.01, "grad_norm": 0.6968685984611511, "learning_rate": 0.0005999692477934153, "loss": 4.6786, "step": 279 }, { "epoch": 0.01, "grad_norm": 0.6009911894798279, "learning_rate": 0.0005999690269562259, "loss": 4.6661, "step": 280 }, { "epoch": 0.01, "grad_norm": 0.6383833289146423, "learning_rate": 0.0005999688053289757, "loss": 4.7877, "step": 281 }, { "epoch": 0.01, "grad_norm": 0.6396824717521667, "learning_rate": 0.0005999685829116652, "loss": 4.6966, "step": 282 }, { "epoch": 0.01, "grad_norm": 0.6602899432182312, "learning_rate": 0.0005999683597042953, "loss": 4.4193, "step": 283 }, { "epoch": 0.01, "grad_norm": 0.6245065927505493, "learning_rate": 0.0005999681357068665, "loss": 4.4753, "step": 284 }, { "epoch": 0.01, "grad_norm": 0.6217121481895447, "learning_rate": 0.0005999679109193794, "loss": 4.3305, "step": 285 }, { "epoch": 0.01, "grad_norm": 0.5806468725204468, "learning_rate": 0.0005999676853418345, "loss": 4.57, "step": 286 }, { "epoch": 0.01, "grad_norm": 0.7057567238807678, "learning_rate": 0.0005999674589742325, "loss": 4.7142, "step": 287 }, { "epoch": 0.01, "grad_norm": 0.6089333891868591, "learning_rate": 0.0005999672318165738, "loss": 4.5676, "step": 288 }, { "epoch": 0.01, "grad_norm": 0.6079134941101074, "learning_rate": 0.0005999670038688592, "loss": 4.7218, "step": 289 }, { "epoch": 0.01, "grad_norm": 0.611993670463562, "learning_rate": 0.0005999667751310893, "loss": 4.5366, "step": 290 }, { "epoch": 0.01, "grad_norm": 0.5751462578773499, "learning_rate": 0.0005999665456032647, "loss": 4.5681, "step": 291 }, { "epoch": 0.01, "grad_norm": 0.6958464980125427, "learning_rate": 0.0005999663152853858, "loss": 4.714, "step": 292 }, { "epoch": 0.01, "grad_norm": 0.7553857564926147, "learning_rate": 0.0005999660841774534, "loss": 4.6257, "step": 293 }, { "epoch": 0.01, "grad_norm": 0.6197561621665955, "learning_rate": 0.0005999658522794681, "loss": 4.6357, "step": 294 }, { "epoch": 0.01, "grad_norm": 0.5696096420288086, "learning_rate": 0.0005999656195914305, "loss": 4.7116, "step": 295 }, { "epoch": 0.01, "grad_norm": 0.6267894506454468, "learning_rate": 0.0005999653861133411, "loss": 4.6124, "step": 296 }, { "epoch": 0.01, "grad_norm": 0.6374806761741638, "learning_rate": 0.0005999651518452006, "loss": 4.4663, "step": 297 }, { "epoch": 0.01, "grad_norm": 0.619802713394165, "learning_rate": 0.0005999649167870097, "loss": 4.6616, "step": 298 }, { "epoch": 0.01, "grad_norm": 0.5879377722740173, "learning_rate": 0.0005999646809387689, "loss": 4.5237, "step": 299 }, { "epoch": 0.01, "grad_norm": 0.6228669881820679, "learning_rate": 0.0005999644443004789, "loss": 4.7327, "step": 300 }, { "epoch": 0.01, "grad_norm": 1.0745501518249512, "learning_rate": 0.0005999642068721403, "loss": 4.742, "step": 301 }, { "epoch": 0.01, "grad_norm": 0.6069528460502625, "learning_rate": 0.0005999639686537537, "loss": 4.5265, "step": 302 }, { "epoch": 0.01, "grad_norm": 0.6730210185050964, "learning_rate": 0.0005999637296453195, "loss": 4.7896, "step": 303 }, { "epoch": 0.01, "grad_norm": 0.665211021900177, "learning_rate": 0.0005999634898468389, "loss": 4.4585, "step": 304 }, { "epoch": 0.01, "grad_norm": 0.6540265679359436, "learning_rate": 0.0005999632492583121, "loss": 4.2711, "step": 305 }, { "epoch": 0.01, "grad_norm": 0.6752997040748596, "learning_rate": 0.0005999630078797397, "loss": 4.6462, "step": 306 }, { "epoch": 0.02, "grad_norm": 0.6478506326675415, "learning_rate": 0.0005999627657111227, "loss": 4.4006, "step": 307 }, { "epoch": 0.02, "grad_norm": 0.6323385238647461, "learning_rate": 0.0005999625227524613, "loss": 4.4891, "step": 308 }, { "epoch": 0.02, "grad_norm": 0.6497457027435303, "learning_rate": 0.0005999622790037563, "loss": 4.3089, "step": 309 }, { "epoch": 0.02, "grad_norm": 0.6490249037742615, "learning_rate": 0.0005999620344650085, "loss": 4.6512, "step": 310 }, { "epoch": 0.02, "grad_norm": 0.6243768930435181, "learning_rate": 0.0005999617891362185, "loss": 4.8247, "step": 311 }, { "epoch": 0.02, "grad_norm": 0.6056337356567383, "learning_rate": 0.0005999615430173868, "loss": 4.6136, "step": 312 }, { "epoch": 0.02, "grad_norm": 0.59369295835495, "learning_rate": 0.0005999612961085141, "loss": 4.5969, "step": 313 }, { "epoch": 0.02, "grad_norm": 0.627199113368988, "learning_rate": 0.0005999610484096011, "loss": 4.4371, "step": 314 }, { "epoch": 0.02, "grad_norm": 0.6465263962745667, "learning_rate": 0.0005999607999206484, "loss": 4.492, "step": 315 }, { "epoch": 0.02, "grad_norm": 0.6473185420036316, "learning_rate": 0.0005999605506416567, "loss": 4.4302, "step": 316 }, { "epoch": 0.02, "grad_norm": 0.6702262759208679, "learning_rate": 0.0005999603005726267, "loss": 4.6095, "step": 317 }, { "epoch": 0.02, "grad_norm": 0.6607844829559326, "learning_rate": 0.0005999600497135589, "loss": 4.6373, "step": 318 }, { "epoch": 0.02, "grad_norm": 0.6345941424369812, "learning_rate": 0.0005999597980644541, "loss": 4.602, "step": 319 }, { "epoch": 0.02, "grad_norm": 0.8155503869056702, "learning_rate": 0.0005999595456253129, "loss": 4.7077, "step": 320 }, { "epoch": 0.02, "grad_norm": 0.5928239822387695, "learning_rate": 0.000599959292396136, "loss": 4.5808, "step": 321 }, { "epoch": 0.02, "grad_norm": 0.6200684309005737, "learning_rate": 0.000599959038376924, "loss": 4.4643, "step": 322 }, { "epoch": 0.02, "grad_norm": 0.6029160618782043, "learning_rate": 0.0005999587835676778, "loss": 4.6415, "step": 323 }, { "epoch": 0.02, "grad_norm": 0.6697453260421753, "learning_rate": 0.0005999585279683977, "loss": 4.706, "step": 324 }, { "epoch": 0.02, "grad_norm": 0.6388944983482361, "learning_rate": 0.0005999582715790847, "loss": 4.4938, "step": 325 }, { "epoch": 0.02, "grad_norm": 0.6525525450706482, "learning_rate": 0.0005999580143997393, "loss": 4.4293, "step": 326 }, { "epoch": 0.02, "grad_norm": 0.6061077117919922, "learning_rate": 0.0005999577564303621, "loss": 4.4593, "step": 327 }, { "epoch": 0.02, "grad_norm": 0.6646733283996582, "learning_rate": 0.000599957497670954, "loss": 4.5408, "step": 328 }, { "epoch": 0.02, "grad_norm": 0.6451191306114197, "learning_rate": 0.0005999572381215155, "loss": 4.6175, "step": 329 }, { "epoch": 0.02, "grad_norm": 0.6172031164169312, "learning_rate": 0.0005999569777820475, "loss": 4.3329, "step": 330 }, { "epoch": 0.02, "grad_norm": 0.7403270602226257, "learning_rate": 0.0005999567166525505, "loss": 4.4766, "step": 331 }, { "epoch": 0.02, "grad_norm": 0.7167713642120361, "learning_rate": 0.0005999564547330251, "loss": 4.6084, "step": 332 }, { "epoch": 0.02, "grad_norm": 0.605120062828064, "learning_rate": 0.0005999561920234722, "loss": 4.6095, "step": 333 }, { "epoch": 0.02, "grad_norm": 0.6738837957382202, "learning_rate": 0.0005999559285238925, "loss": 4.443, "step": 334 }, { "epoch": 0.02, "grad_norm": 0.6576931476593018, "learning_rate": 0.0005999556642342866, "loss": 4.4609, "step": 335 }, { "epoch": 0.02, "grad_norm": 0.6313718557357788, "learning_rate": 0.0005999553991546552, "loss": 4.4452, "step": 336 }, { "epoch": 0.02, "grad_norm": 0.5991811156272888, "learning_rate": 0.0005999551332849989, "loss": 4.5898, "step": 337 }, { "epoch": 0.02, "grad_norm": 0.6302996277809143, "learning_rate": 0.0005999548666253186, "loss": 4.3871, "step": 338 }, { "epoch": 0.02, "grad_norm": 0.637596607208252, "learning_rate": 0.0005999545991756149, "loss": 4.563, "step": 339 }, { "epoch": 0.02, "grad_norm": 0.6226162910461426, "learning_rate": 0.0005999543309358886, "loss": 4.5605, "step": 340 }, { "epoch": 0.02, "grad_norm": 0.6292939186096191, "learning_rate": 0.0005999540619061402, "loss": 4.3628, "step": 341 }, { "epoch": 0.02, "grad_norm": 0.6156478524208069, "learning_rate": 0.0005999537920863705, "loss": 4.3611, "step": 342 }, { "epoch": 0.02, "grad_norm": 0.6901237368583679, "learning_rate": 0.0005999535214765803, "loss": 4.4794, "step": 343 }, { "epoch": 0.02, "grad_norm": 0.5876708626747131, "learning_rate": 0.0005999532500767703, "loss": 4.5018, "step": 344 }, { "epoch": 0.02, "grad_norm": 0.6149948239326477, "learning_rate": 0.0005999529778869411, "loss": 4.423, "step": 345 }, { "epoch": 0.02, "grad_norm": 0.6238973140716553, "learning_rate": 0.0005999527049070935, "loss": 4.4507, "step": 346 }, { "epoch": 0.02, "grad_norm": 0.6290735602378845, "learning_rate": 0.0005999524311372282, "loss": 4.6397, "step": 347 }, { "epoch": 0.02, "grad_norm": 0.5849318504333496, "learning_rate": 0.0005999521565773459, "loss": 4.4881, "step": 348 }, { "epoch": 0.02, "grad_norm": 0.6476309299468994, "learning_rate": 0.0005999518812274474, "loss": 4.4975, "step": 349 }, { "epoch": 0.02, "grad_norm": 0.6004956364631653, "learning_rate": 0.0005999516050875334, "loss": 4.6363, "step": 350 }, { "epoch": 0.02, "grad_norm": 0.640617311000824, "learning_rate": 0.0005999513281576045, "loss": 4.5486, "step": 351 }, { "epoch": 0.02, "grad_norm": 0.6288664937019348, "learning_rate": 0.0005999510504376616, "loss": 4.523, "step": 352 }, { "epoch": 0.02, "grad_norm": 0.6244643330574036, "learning_rate": 0.0005999507719277054, "loss": 4.4213, "step": 353 }, { "epoch": 0.02, "grad_norm": 0.6235512495040894, "learning_rate": 0.0005999504926277365, "loss": 4.4436, "step": 354 }, { "epoch": 0.02, "grad_norm": 0.6489076614379883, "learning_rate": 0.0005999502125377557, "loss": 4.3302, "step": 355 }, { "epoch": 0.02, "grad_norm": 0.7294739484786987, "learning_rate": 0.0005999499316577639, "loss": 4.3603, "step": 356 }, { "epoch": 0.02, "grad_norm": 0.6268585920333862, "learning_rate": 0.0005999496499877615, "loss": 4.5018, "step": 357 }, { "epoch": 0.02, "grad_norm": 0.6179900765419006, "learning_rate": 0.0005999493675277497, "loss": 4.4187, "step": 358 }, { "epoch": 0.02, "grad_norm": 0.6882840991020203, "learning_rate": 0.0005999490842777288, "loss": 4.3276, "step": 359 }, { "epoch": 0.02, "grad_norm": 0.6455632448196411, "learning_rate": 0.0005999488002376999, "loss": 4.6731, "step": 360 }, { "epoch": 0.02, "grad_norm": 0.6357992887496948, "learning_rate": 0.0005999485154076634, "loss": 4.3314, "step": 361 }, { "epoch": 0.02, "grad_norm": 0.641894519329071, "learning_rate": 0.0005999482297876204, "loss": 4.5446, "step": 362 }, { "epoch": 0.02, "grad_norm": 0.6699628829956055, "learning_rate": 0.0005999479433775714, "loss": 4.411, "step": 363 }, { "epoch": 0.02, "grad_norm": 0.6083547472953796, "learning_rate": 0.0005999476561775173, "loss": 4.4048, "step": 364 }, { "epoch": 0.02, "grad_norm": 0.6556709408760071, "learning_rate": 0.0005999473681874587, "loss": 4.6152, "step": 365 }, { "epoch": 0.02, "grad_norm": 0.6575928330421448, "learning_rate": 0.0005999470794073965, "loss": 4.407, "step": 366 }, { "epoch": 0.02, "grad_norm": 0.6290593147277832, "learning_rate": 0.0005999467898373314, "loss": 4.4726, "step": 367 }, { "epoch": 0.02, "grad_norm": 0.6279957890510559, "learning_rate": 0.0005999464994772643, "loss": 4.5635, "step": 368 }, { "epoch": 0.02, "grad_norm": 0.6792528033256531, "learning_rate": 0.0005999462083271957, "loss": 4.564, "step": 369 }, { "epoch": 0.02, "grad_norm": 0.6177847981452942, "learning_rate": 0.0005999459163871266, "loss": 4.3569, "step": 370 }, { "epoch": 0.02, "grad_norm": 0.6043108701705933, "learning_rate": 0.0005999456236570577, "loss": 4.5328, "step": 371 }, { "epoch": 0.02, "grad_norm": 0.7578732371330261, "learning_rate": 0.0005999453301369897, "loss": 4.3748, "step": 372 }, { "epoch": 0.02, "grad_norm": 0.6486274600028992, "learning_rate": 0.0005999450358269234, "loss": 4.4287, "step": 373 }, { "epoch": 0.02, "grad_norm": 0.6370539665222168, "learning_rate": 0.0005999447407268597, "loss": 4.2936, "step": 374 }, { "epoch": 0.02, "grad_norm": 0.5845693945884705, "learning_rate": 0.0005999444448367993, "loss": 4.2919, "step": 375 }, { "epoch": 0.02, "grad_norm": 0.6829562783241272, "learning_rate": 0.0005999441481567428, "loss": 4.6738, "step": 376 }, { "epoch": 0.02, "grad_norm": 0.6204245090484619, "learning_rate": 0.0005999438506866911, "loss": 4.5717, "step": 377 }, { "epoch": 0.02, "grad_norm": 0.6141250133514404, "learning_rate": 0.0005999435524266453, "loss": 4.5029, "step": 378 }, { "epoch": 0.02, "grad_norm": 0.6364489197731018, "learning_rate": 0.0005999432533766056, "loss": 4.2914, "step": 379 }, { "epoch": 0.02, "grad_norm": 0.5723608732223511, "learning_rate": 0.0005999429535365734, "loss": 4.4488, "step": 380 }, { "epoch": 0.02, "grad_norm": 0.6832333207130432, "learning_rate": 0.0005999426529065489, "loss": 4.3861, "step": 381 }, { "epoch": 0.02, "grad_norm": 0.6163421869277954, "learning_rate": 0.0005999423514865334, "loss": 4.8537, "step": 382 }, { "epoch": 0.02, "grad_norm": 0.6012544631958008, "learning_rate": 0.0005999420492765273, "loss": 4.525, "step": 383 }, { "epoch": 0.02, "grad_norm": 0.6326162815093994, "learning_rate": 0.0005999417462765318, "loss": 4.4894, "step": 384 }, { "epoch": 0.02, "grad_norm": 0.7766426801681519, "learning_rate": 0.0005999414424865472, "loss": 4.5683, "step": 385 }, { "epoch": 0.02, "grad_norm": 0.5986698269844055, "learning_rate": 0.0005999411379065746, "loss": 4.5938, "step": 386 }, { "epoch": 0.02, "grad_norm": 0.6302661895751953, "learning_rate": 0.0005999408325366149, "loss": 4.243, "step": 387 }, { "epoch": 0.02, "grad_norm": 0.6291135549545288, "learning_rate": 0.0005999405263766688, "loss": 4.6281, "step": 388 }, { "epoch": 0.02, "grad_norm": 0.6244451403617859, "learning_rate": 0.000599940219426737, "loss": 4.3295, "step": 389 }, { "epoch": 0.02, "grad_norm": 0.6079283952713013, "learning_rate": 0.0005999399116868203, "loss": 4.3864, "step": 390 }, { "epoch": 0.02, "grad_norm": 0.6124751567840576, "learning_rate": 0.0005999396031569197, "loss": 4.6859, "step": 391 }, { "epoch": 0.02, "grad_norm": 0.6195314526557922, "learning_rate": 0.0005999392938370358, "loss": 4.6973, "step": 392 }, { "epoch": 0.02, "grad_norm": 0.7498607039451599, "learning_rate": 0.0005999389837271697, "loss": 4.4272, "step": 393 }, { "epoch": 0.02, "grad_norm": 0.7137424349784851, "learning_rate": 0.0005999386728273219, "loss": 4.5234, "step": 394 }, { "epoch": 0.02, "grad_norm": 0.5930689573287964, "learning_rate": 0.0005999383611374934, "loss": 4.5479, "step": 395 }, { "epoch": 0.02, "grad_norm": 0.6761220097541809, "learning_rate": 0.000599938048657685, "loss": 4.4618, "step": 396 }, { "epoch": 0.02, "grad_norm": 0.6029164791107178, "learning_rate": 0.0005999377353878975, "loss": 4.362, "step": 397 }, { "epoch": 0.02, "grad_norm": 0.6475263237953186, "learning_rate": 0.0005999374213281318, "loss": 4.5006, "step": 398 }, { "epoch": 0.02, "grad_norm": 0.647782564163208, "learning_rate": 0.0005999371064783885, "loss": 4.2719, "step": 399 }, { "epoch": 0.02, "grad_norm": 0.5890044569969177, "learning_rate": 0.0005999367908386688, "loss": 4.5949, "step": 400 }, { "epoch": 0.02, "grad_norm": 0.627471923828125, "learning_rate": 0.0005999364744089731, "loss": 4.4189, "step": 401 }, { "epoch": 0.02, "grad_norm": 0.5897189378738403, "learning_rate": 0.0005999361571893026, "loss": 4.4445, "step": 402 }, { "epoch": 0.02, "grad_norm": 0.5901340246200562, "learning_rate": 0.0005999358391796578, "loss": 4.3554, "step": 403 }, { "epoch": 0.02, "grad_norm": 0.6106919646263123, "learning_rate": 0.00059993552038004, "loss": 4.3254, "step": 404 }, { "epoch": 0.02, "grad_norm": 0.7342495918273926, "learning_rate": 0.0005999352007904495, "loss": 4.3228, "step": 405 }, { "epoch": 0.02, "grad_norm": 0.5682928562164307, "learning_rate": 0.0005999348804108875, "loss": 4.4674, "step": 406 }, { "epoch": 0.02, "grad_norm": 0.6223228573799133, "learning_rate": 0.0005999345592413548, "loss": 4.4122, "step": 407 }, { "epoch": 0.02, "grad_norm": 0.632500946521759, "learning_rate": 0.0005999342372818523, "loss": 4.5875, "step": 408 }, { "epoch": 0.02, "grad_norm": 0.629377543926239, "learning_rate": 0.0005999339145323805, "loss": 4.271, "step": 409 }, { "epoch": 0.02, "grad_norm": 0.6136090755462646, "learning_rate": 0.0005999335909929405, "loss": 4.4486, "step": 410 }, { "epoch": 0.02, "grad_norm": 0.6139892339706421, "learning_rate": 0.0005999332666635332, "loss": 4.51, "step": 411 }, { "epoch": 0.02, "grad_norm": 0.8123599290847778, "learning_rate": 0.0005999329415441595, "loss": 4.2626, "step": 412 }, { "epoch": 0.02, "grad_norm": 0.6359814405441284, "learning_rate": 0.0005999326156348201, "loss": 4.5825, "step": 413 }, { "epoch": 0.02, "grad_norm": 0.702951967716217, "learning_rate": 0.000599932288935516, "loss": 4.5257, "step": 414 }, { "epoch": 0.02, "grad_norm": 0.6103016138076782, "learning_rate": 0.0005999319614462479, "loss": 4.4541, "step": 415 }, { "epoch": 0.02, "grad_norm": 0.6839025616645813, "learning_rate": 0.0005999316331670166, "loss": 4.4537, "step": 416 }, { "epoch": 0.02, "grad_norm": 0.6513589024543762, "learning_rate": 0.0005999313040978233, "loss": 4.2952, "step": 417 }, { "epoch": 0.02, "grad_norm": 0.6502857804298401, "learning_rate": 0.0005999309742386685, "loss": 4.3789, "step": 418 }, { "epoch": 0.02, "grad_norm": 0.6159502267837524, "learning_rate": 0.0005999306435895533, "loss": 4.484, "step": 419 }, { "epoch": 0.02, "grad_norm": 0.5981889963150024, "learning_rate": 0.0005999303121504786, "loss": 4.3018, "step": 420 }, { "epoch": 0.02, "grad_norm": 0.6399751901626587, "learning_rate": 0.0005999299799214451, "loss": 4.3074, "step": 421 }, { "epoch": 0.02, "grad_norm": 0.6635521054267883, "learning_rate": 0.0005999296469024537, "loss": 4.399, "step": 422 }, { "epoch": 0.02, "grad_norm": 0.5930505990982056, "learning_rate": 0.0005999293130935054, "loss": 4.4843, "step": 423 }, { "epoch": 0.02, "grad_norm": 0.6015510559082031, "learning_rate": 0.0005999289784946011, "loss": 4.4935, "step": 424 }, { "epoch": 0.02, "grad_norm": 0.690433919429779, "learning_rate": 0.0005999286431057415, "loss": 4.4129, "step": 425 }, { "epoch": 0.02, "grad_norm": 0.7033190727233887, "learning_rate": 0.0005999283069269276, "loss": 4.4088, "step": 426 }, { "epoch": 0.02, "grad_norm": 0.5974972248077393, "learning_rate": 0.0005999279699581602, "loss": 4.2984, "step": 427 }, { "epoch": 0.02, "grad_norm": 0.6253489255905151, "learning_rate": 0.0005999276321994402, "loss": 4.2365, "step": 428 }, { "epoch": 0.02, "grad_norm": 0.6363905668258667, "learning_rate": 0.0005999272936507687, "loss": 4.4612, "step": 429 }, { "epoch": 0.02, "grad_norm": 0.6299953460693359, "learning_rate": 0.0005999269543121463, "loss": 4.2972, "step": 430 }, { "epoch": 0.02, "grad_norm": 0.7025599479675293, "learning_rate": 0.0005999266141835741, "loss": 4.168, "step": 431 }, { "epoch": 0.02, "grad_norm": 0.7058206796646118, "learning_rate": 0.000599926273265053, "loss": 4.2049, "step": 432 }, { "epoch": 0.02, "grad_norm": 0.639482319355011, "learning_rate": 0.0005999259315565837, "loss": 4.2935, "step": 433 }, { "epoch": 0.02, "grad_norm": 0.5956512093544006, "learning_rate": 0.0005999255890581672, "loss": 4.2922, "step": 434 }, { "epoch": 0.02, "grad_norm": 0.6834009289741516, "learning_rate": 0.0005999252457698045, "loss": 4.1426, "step": 435 }, { "epoch": 0.02, "grad_norm": 0.6566224694252014, "learning_rate": 0.0005999249016914964, "loss": 4.4575, "step": 436 }, { "epoch": 0.02, "grad_norm": 0.6373875737190247, "learning_rate": 0.0005999245568232438, "loss": 4.634, "step": 437 }, { "epoch": 0.02, "grad_norm": 0.637948751449585, "learning_rate": 0.0005999242111650476, "loss": 4.4227, "step": 438 }, { "epoch": 0.02, "grad_norm": 0.713834285736084, "learning_rate": 0.0005999238647169089, "loss": 4.2999, "step": 439 }, { "epoch": 0.02, "grad_norm": 0.606334924697876, "learning_rate": 0.0005999235174788283, "loss": 4.3889, "step": 440 }, { "epoch": 0.02, "grad_norm": 0.6799372434616089, "learning_rate": 0.0005999231694508069, "loss": 4.3517, "step": 441 }, { "epoch": 0.02, "grad_norm": 0.6416102051734924, "learning_rate": 0.0005999228206328457, "loss": 4.5535, "step": 442 }, { "epoch": 0.02, "grad_norm": 0.6423821449279785, "learning_rate": 0.0005999224710249454, "loss": 4.0969, "step": 443 }, { "epoch": 0.02, "grad_norm": 0.6177202463150024, "learning_rate": 0.0005999221206271071, "loss": 4.3001, "step": 444 }, { "epoch": 0.02, "grad_norm": 0.5873739719390869, "learning_rate": 0.0005999217694393317, "loss": 4.3533, "step": 445 }, { "epoch": 0.02, "grad_norm": 0.6107127666473389, "learning_rate": 0.00059992141746162, "loss": 4.2398, "step": 446 }, { "epoch": 0.02, "grad_norm": 0.6472650170326233, "learning_rate": 0.0005999210646939731, "loss": 4.3991, "step": 447 }, { "epoch": 0.02, "grad_norm": 0.611621081829071, "learning_rate": 0.0005999207111363916, "loss": 4.4359, "step": 448 }, { "epoch": 0.02, "grad_norm": 0.6676717400550842, "learning_rate": 0.000599920356788877, "loss": 4.3993, "step": 449 }, { "epoch": 0.02, "grad_norm": 0.6014063954353333, "learning_rate": 0.0005999200016514296, "loss": 4.5267, "step": 450 }, { "epoch": 0.02, "grad_norm": 0.6274306774139404, "learning_rate": 0.0005999196457240508, "loss": 4.2282, "step": 451 }, { "epoch": 0.02, "grad_norm": 0.6827574372291565, "learning_rate": 0.0005999192890067413, "loss": 4.3729, "step": 452 }, { "epoch": 0.02, "grad_norm": 0.7055410742759705, "learning_rate": 0.0005999189314995022, "loss": 4.2916, "step": 453 }, { "epoch": 0.02, "grad_norm": 0.654010534286499, "learning_rate": 0.0005999185732023343, "loss": 4.2391, "step": 454 }, { "epoch": 0.02, "grad_norm": 0.5943931937217712, "learning_rate": 0.0005999182141152385, "loss": 4.2879, "step": 455 }, { "epoch": 0.02, "grad_norm": 0.6318141222000122, "learning_rate": 0.000599917854238216, "loss": 4.2995, "step": 456 }, { "epoch": 0.02, "grad_norm": 0.6332406401634216, "learning_rate": 0.0005999174935712676, "loss": 4.2975, "step": 457 }, { "epoch": 0.02, "grad_norm": 0.654248058795929, "learning_rate": 0.0005999171321143941, "loss": 4.3066, "step": 458 }, { "epoch": 0.02, "grad_norm": 0.6300703287124634, "learning_rate": 0.0005999167698675967, "loss": 4.4163, "step": 459 }, { "epoch": 0.02, "grad_norm": 0.6364467740058899, "learning_rate": 0.0005999164068308762, "loss": 4.4665, "step": 460 }, { "epoch": 0.02, "grad_norm": 0.650804340839386, "learning_rate": 0.0005999160430042337, "loss": 4.2126, "step": 461 }, { "epoch": 0.02, "grad_norm": 0.6396269202232361, "learning_rate": 0.00059991567838767, "loss": 4.4289, "step": 462 }, { "epoch": 0.02, "grad_norm": 0.7045847177505493, "learning_rate": 0.0005999153129811861, "loss": 4.1509, "step": 463 }, { "epoch": 0.02, "grad_norm": 0.895271360874176, "learning_rate": 0.0005999149467847831, "loss": 4.3426, "step": 464 }, { "epoch": 0.02, "grad_norm": 0.6522945165634155, "learning_rate": 0.0005999145797984617, "loss": 4.4832, "step": 465 }, { "epoch": 0.02, "grad_norm": 0.6630837321281433, "learning_rate": 0.000599914212022223, "loss": 4.4587, "step": 466 }, { "epoch": 0.02, "grad_norm": 0.6792482733726501, "learning_rate": 0.0005999138434560681, "loss": 4.2637, "step": 467 }, { "epoch": 0.02, "grad_norm": 0.7289097309112549, "learning_rate": 0.0005999134740999979, "loss": 4.4558, "step": 468 }, { "epoch": 0.02, "grad_norm": 0.6192370653152466, "learning_rate": 0.0005999131039540131, "loss": 4.3225, "step": 469 }, { "epoch": 0.02, "grad_norm": 0.6023765206336975, "learning_rate": 0.0005999127330181151, "loss": 4.4778, "step": 470 }, { "epoch": 0.02, "grad_norm": 0.6397039294242859, "learning_rate": 0.0005999123612923046, "loss": 4.4022, "step": 471 }, { "epoch": 0.02, "grad_norm": 0.5905510187149048, "learning_rate": 0.0005999119887765827, "loss": 4.468, "step": 472 }, { "epoch": 0.02, "grad_norm": 0.7561242580413818, "learning_rate": 0.0005999116154709504, "loss": 4.2759, "step": 473 }, { "epoch": 0.02, "grad_norm": 0.6308673024177551, "learning_rate": 0.0005999112413754084, "loss": 4.2214, "step": 474 }, { "epoch": 0.02, "grad_norm": 0.6405158638954163, "learning_rate": 0.0005999108664899582, "loss": 4.3654, "step": 475 }, { "epoch": 0.02, "grad_norm": 0.6524988412857056, "learning_rate": 0.0005999104908146003, "loss": 4.5213, "step": 476 }, { "epoch": 0.02, "grad_norm": 0.6339336037635803, "learning_rate": 0.0005999101143493359, "loss": 4.5757, "step": 477 }, { "epoch": 0.02, "grad_norm": 0.6563646793365479, "learning_rate": 0.000599909737094166, "loss": 4.4423, "step": 478 }, { "epoch": 0.02, "grad_norm": 0.6299603581428528, "learning_rate": 0.0005999093590490917, "loss": 4.1991, "step": 479 }, { "epoch": 0.02, "grad_norm": 0.8304985761642456, "learning_rate": 0.0005999089802141137, "loss": 4.5737, "step": 480 }, { "epoch": 0.02, "grad_norm": 0.6237910389900208, "learning_rate": 0.0005999086005892332, "loss": 4.298, "step": 481 }, { "epoch": 0.02, "grad_norm": 0.6513141989707947, "learning_rate": 0.0005999082201744512, "loss": 3.9653, "step": 482 }, { "epoch": 0.02, "grad_norm": 0.6443023085594177, "learning_rate": 0.0005999078389697687, "loss": 4.3509, "step": 483 }, { "epoch": 0.02, "grad_norm": 0.6092896461486816, "learning_rate": 0.0005999074569751865, "loss": 4.4119, "step": 484 }, { "epoch": 0.02, "grad_norm": 0.6280472874641418, "learning_rate": 0.000599907074190706, "loss": 4.1763, "step": 485 }, { "epoch": 0.02, "grad_norm": 0.6605474352836609, "learning_rate": 0.0005999066906163279, "loss": 4.1999, "step": 486 }, { "epoch": 0.02, "grad_norm": 0.633695125579834, "learning_rate": 0.0005999063062520532, "loss": 4.5811, "step": 487 }, { "epoch": 0.02, "grad_norm": 0.6050964593887329, "learning_rate": 0.0005999059210978832, "loss": 4.2844, "step": 488 }, { "epoch": 0.02, "grad_norm": 0.6477183699607849, "learning_rate": 0.0005999055351538186, "loss": 4.315, "step": 489 }, { "epoch": 0.02, "grad_norm": 0.6902852654457092, "learning_rate": 0.0005999051484198606, "loss": 4.4602, "step": 490 }, { "epoch": 0.02, "grad_norm": 0.9078108668327332, "learning_rate": 0.00059990476089601, "loss": 4.6133, "step": 491 }, { "epoch": 0.02, "grad_norm": 0.6101062893867493, "learning_rate": 0.000599904372582268, "loss": 4.5548, "step": 492 }, { "epoch": 0.02, "grad_norm": 0.6342147588729858, "learning_rate": 0.0005999039834786357, "loss": 4.232, "step": 493 }, { "epoch": 0.02, "grad_norm": 0.6037482619285583, "learning_rate": 0.0005999035935851142, "loss": 4.4302, "step": 494 }, { "epoch": 0.02, "grad_norm": 0.6121346950531006, "learning_rate": 0.0005999032029017041, "loss": 4.2391, "step": 495 }, { "epoch": 0.02, "grad_norm": 0.6090756058692932, "learning_rate": 0.0005999028114284067, "loss": 4.4575, "step": 496 }, { "epoch": 0.02, "grad_norm": 0.569670557975769, "learning_rate": 0.0005999024191652231, "loss": 4.2884, "step": 497 }, { "epoch": 0.02, "grad_norm": 0.5985832810401917, "learning_rate": 0.0005999020261121541, "loss": 4.3026, "step": 498 }, { "epoch": 0.02, "grad_norm": 0.6532287001609802, "learning_rate": 0.000599901632269201, "loss": 4.3152, "step": 499 }, { "epoch": 0.02, "grad_norm": 0.5899796485900879, "learning_rate": 0.0005999012376363647, "loss": 4.1943, "step": 500 }, { "epoch": 0.02, "grad_norm": 0.6288335919380188, "learning_rate": 0.0005999008422136463, "loss": 4.4126, "step": 501 }, { "epoch": 0.02, "grad_norm": 0.6783986687660217, "learning_rate": 0.0005999004460010467, "loss": 4.1275, "step": 502 }, { "epoch": 0.02, "grad_norm": 0.612891435623169, "learning_rate": 0.0005999000489985671, "loss": 4.3033, "step": 503 }, { "epoch": 0.02, "grad_norm": 0.6028085947036743, "learning_rate": 0.0005998996512062085, "loss": 4.2218, "step": 504 }, { "epoch": 0.02, "grad_norm": 0.5980657935142517, "learning_rate": 0.0005998992526239719, "loss": 4.2868, "step": 505 }, { "epoch": 0.02, "grad_norm": 0.6469926238059998, "learning_rate": 0.0005998988532518584, "loss": 4.4216, "step": 506 }, { "epoch": 0.02, "grad_norm": 0.6261191368103027, "learning_rate": 0.0005998984530898691, "loss": 4.1823, "step": 507 }, { "epoch": 0.02, "grad_norm": 0.5999051928520203, "learning_rate": 0.0005998980521380048, "loss": 4.0664, "step": 508 }, { "epoch": 0.02, "grad_norm": 0.6515395641326904, "learning_rate": 0.000599897650396267, "loss": 4.5476, "step": 509 }, { "epoch": 0.02, "grad_norm": 0.5981988906860352, "learning_rate": 0.0005998972478646564, "loss": 4.2978, "step": 510 }, { "epoch": 0.03, "grad_norm": 0.6036791205406189, "learning_rate": 0.0005998968445431742, "loss": 4.3236, "step": 511 }, { "epoch": 0.03, "grad_norm": 0.6372940540313721, "learning_rate": 0.0005998964404318213, "loss": 4.2261, "step": 512 }, { "epoch": 0.03, "grad_norm": 0.612691342830658, "learning_rate": 0.0005998960355305989, "loss": 4.2686, "step": 513 }, { "epoch": 0.03, "grad_norm": 0.6639612913131714, "learning_rate": 0.0005998956298395082, "loss": 4.4662, "step": 514 }, { "epoch": 0.03, "grad_norm": 0.6196693181991577, "learning_rate": 0.00059989522335855, "loss": 4.244, "step": 515 }, { "epoch": 0.03, "grad_norm": 0.6042441725730896, "learning_rate": 0.0005998948160877256, "loss": 4.4187, "step": 516 }, { "epoch": 0.03, "grad_norm": 0.584118127822876, "learning_rate": 0.0005998944080270359, "loss": 4.2727, "step": 517 }, { "epoch": 0.03, "grad_norm": 0.6688718795776367, "learning_rate": 0.0005998939991764821, "loss": 4.4981, "step": 518 }, { "epoch": 0.03, "grad_norm": 0.6350945234298706, "learning_rate": 0.0005998935895360651, "loss": 4.0629, "step": 519 }, { "epoch": 0.03, "grad_norm": 0.6718504428863525, "learning_rate": 0.0005998931791057863, "loss": 4.3385, "step": 520 }, { "epoch": 0.03, "grad_norm": 0.666595995426178, "learning_rate": 0.0005998927678856464, "loss": 4.1755, "step": 521 }, { "epoch": 0.03, "grad_norm": 0.6215299963951111, "learning_rate": 0.0005998923558756467, "loss": 4.4248, "step": 522 }, { "epoch": 0.03, "grad_norm": 0.6000812649726868, "learning_rate": 0.0005998919430757883, "loss": 4.3693, "step": 523 }, { "epoch": 0.03, "grad_norm": 0.643562912940979, "learning_rate": 0.0005998915294860722, "loss": 3.9877, "step": 524 }, { "epoch": 0.03, "grad_norm": 0.6312112808227539, "learning_rate": 0.0005998911151064996, "loss": 4.3255, "step": 525 }, { "epoch": 0.03, "grad_norm": 0.6362340450286865, "learning_rate": 0.0005998906999370716, "loss": 4.2147, "step": 526 }, { "epoch": 0.03, "grad_norm": 0.6441465616226196, "learning_rate": 0.0005998902839777889, "loss": 4.2851, "step": 527 }, { "epoch": 0.03, "grad_norm": 0.6550605893135071, "learning_rate": 0.0005998898672286532, "loss": 4.167, "step": 528 }, { "epoch": 0.03, "grad_norm": 0.6101124286651611, "learning_rate": 0.0005998894496896651, "loss": 4.5298, "step": 529 }, { "epoch": 0.03, "grad_norm": 0.6387701630592346, "learning_rate": 0.0005998890313608261, "loss": 4.1748, "step": 530 }, { "epoch": 0.03, "grad_norm": 0.6303190588951111, "learning_rate": 0.0005998886122421369, "loss": 4.2512, "step": 531 }, { "epoch": 0.03, "grad_norm": 0.7182632088661194, "learning_rate": 0.0005998881923335989, "loss": 4.3773, "step": 532 }, { "epoch": 0.03, "grad_norm": 0.5540059208869934, "learning_rate": 0.0005998877716352132, "loss": 4.3919, "step": 533 }, { "epoch": 0.03, "grad_norm": 0.6034284234046936, "learning_rate": 0.0005998873501469808, "loss": 4.1638, "step": 534 }, { "epoch": 0.03, "grad_norm": 0.6099483966827393, "learning_rate": 0.0005998869278689028, "loss": 4.3032, "step": 535 }, { "epoch": 0.03, "grad_norm": 0.6097630262374878, "learning_rate": 0.0005998865048009803, "loss": 4.1468, "step": 536 }, { "epoch": 0.03, "grad_norm": 0.5770888328552246, "learning_rate": 0.0005998860809432145, "loss": 4.2715, "step": 537 }, { "epoch": 0.03, "grad_norm": 0.6351972818374634, "learning_rate": 0.0005998856562956064, "loss": 4.2979, "step": 538 }, { "epoch": 0.03, "grad_norm": 0.6256553530693054, "learning_rate": 0.0005998852308581573, "loss": 4.1972, "step": 539 }, { "epoch": 0.03, "grad_norm": 0.625809907913208, "learning_rate": 0.0005998848046308682, "loss": 3.9932, "step": 540 }, { "epoch": 0.03, "grad_norm": 0.6050224900245667, "learning_rate": 0.0005998843776137402, "loss": 4.2899, "step": 541 }, { "epoch": 0.03, "grad_norm": 0.617949366569519, "learning_rate": 0.0005998839498067745, "loss": 4.069, "step": 542 }, { "epoch": 0.03, "grad_norm": 0.7680637836456299, "learning_rate": 0.0005998835212099722, "loss": 4.264, "step": 543 }, { "epoch": 0.03, "grad_norm": 0.6679553389549255, "learning_rate": 0.0005998830918233344, "loss": 4.2505, "step": 544 }, { "epoch": 0.03, "grad_norm": 1.0453165769577026, "learning_rate": 0.0005998826616468622, "loss": 4.356, "step": 545 }, { "epoch": 0.03, "grad_norm": 0.6193425059318542, "learning_rate": 0.0005998822306805568, "loss": 4.4011, "step": 546 }, { "epoch": 0.03, "grad_norm": 0.6391511559486389, "learning_rate": 0.0005998817989244194, "loss": 4.08, "step": 547 }, { "epoch": 0.03, "grad_norm": 0.6139493584632874, "learning_rate": 0.000599881366378451, "loss": 4.206, "step": 548 }, { "epoch": 0.03, "grad_norm": 0.6752720475196838, "learning_rate": 0.0005998809330426528, "loss": 4.1488, "step": 549 }, { "epoch": 0.03, "grad_norm": 0.6094870567321777, "learning_rate": 0.0005998804989170259, "loss": 4.1779, "step": 550 }, { "epoch": 0.03, "grad_norm": 0.5940812230110168, "learning_rate": 0.0005998800640015715, "loss": 4.3568, "step": 551 }, { "epoch": 0.03, "grad_norm": 0.6211913228034973, "learning_rate": 0.0005998796282962907, "loss": 4.4739, "step": 552 }, { "epoch": 0.03, "grad_norm": 0.5913729071617126, "learning_rate": 0.0005998791918011847, "loss": 4.2086, "step": 553 }, { "epoch": 0.03, "grad_norm": 0.6634578704833984, "learning_rate": 0.0005998787545162547, "loss": 4.2911, "step": 554 }, { "epoch": 0.03, "grad_norm": 0.636097252368927, "learning_rate": 0.0005998783164415017, "loss": 4.1991, "step": 555 }, { "epoch": 0.03, "grad_norm": 0.6963991522789001, "learning_rate": 0.0005998778775769269, "loss": 4.0183, "step": 556 }, { "epoch": 0.03, "grad_norm": 0.6141919493675232, "learning_rate": 0.0005998774379225315, "loss": 4.5183, "step": 557 }, { "epoch": 0.03, "grad_norm": 0.5955811142921448, "learning_rate": 0.0005998769974783167, "loss": 4.3453, "step": 558 }, { "epoch": 0.03, "grad_norm": 0.6223891377449036, "learning_rate": 0.0005998765562442835, "loss": 4.3894, "step": 559 }, { "epoch": 0.03, "grad_norm": 0.6813943386077881, "learning_rate": 0.0005998761142204333, "loss": 4.2083, "step": 560 }, { "epoch": 0.03, "grad_norm": 0.6669309735298157, "learning_rate": 0.000599875671406767, "loss": 4.566, "step": 561 }, { "epoch": 0.03, "grad_norm": 0.6055184602737427, "learning_rate": 0.0005998752278032859, "loss": 4.0433, "step": 562 }, { "epoch": 0.03, "grad_norm": 0.6754607558250427, "learning_rate": 0.0005998747834099912, "loss": 4.0311, "step": 563 }, { "epoch": 0.03, "grad_norm": 0.6373229622840881, "learning_rate": 0.000599874338226884, "loss": 4.33, "step": 564 }, { "epoch": 0.03, "grad_norm": 0.5642352104187012, "learning_rate": 0.0005998738922539656, "loss": 4.2819, "step": 565 }, { "epoch": 0.03, "grad_norm": 0.5712973475456238, "learning_rate": 0.000599873445491237, "loss": 4.3986, "step": 566 }, { "epoch": 0.03, "grad_norm": 0.6137987375259399, "learning_rate": 0.0005998729979386994, "loss": 4.4378, "step": 567 }, { "epoch": 0.03, "grad_norm": 0.6173729300498962, "learning_rate": 0.0005998725495963542, "loss": 4.3934, "step": 568 }, { "epoch": 0.03, "grad_norm": 0.6119434833526611, "learning_rate": 0.0005998721004642024, "loss": 4.3429, "step": 569 }, { "epoch": 0.03, "grad_norm": 0.6111935377120972, "learning_rate": 0.000599871650542245, "loss": 4.2851, "step": 570 }, { "epoch": 0.03, "grad_norm": 0.7171507477760315, "learning_rate": 0.0005998711998304835, "loss": 4.4319, "step": 571 }, { "epoch": 0.03, "grad_norm": 0.5924344062805176, "learning_rate": 0.000599870748328919, "loss": 4.4248, "step": 572 }, { "epoch": 0.03, "grad_norm": 0.6047422289848328, "learning_rate": 0.0005998702960375526, "loss": 4.3181, "step": 573 }, { "epoch": 0.03, "grad_norm": 0.61612468957901, "learning_rate": 0.0005998698429563856, "loss": 4.2176, "step": 574 }, { "epoch": 0.03, "grad_norm": 0.6250529289245605, "learning_rate": 0.0005998693890854192, "loss": 4.3786, "step": 575 }, { "epoch": 0.03, "grad_norm": 0.6662085652351379, "learning_rate": 0.0005998689344246544, "loss": 4.4409, "step": 576 }, { "epoch": 0.03, "grad_norm": 0.6052320599555969, "learning_rate": 0.0005998684789740926, "loss": 4.1686, "step": 577 }, { "epoch": 0.03, "grad_norm": 0.6721617579460144, "learning_rate": 0.0005998680227337351, "loss": 4.1626, "step": 578 }, { "epoch": 0.03, "grad_norm": 0.6326162815093994, "learning_rate": 0.0005998675657035827, "loss": 4.4329, "step": 579 }, { "epoch": 0.03, "grad_norm": 0.6244572401046753, "learning_rate": 0.0005998671078836369, "loss": 4.3406, "step": 580 }, { "epoch": 0.03, "grad_norm": 0.6222200989723206, "learning_rate": 0.0005998666492738989, "loss": 4.3156, "step": 581 }, { "epoch": 0.03, "grad_norm": 0.6773638725280762, "learning_rate": 0.0005998661898743698, "loss": 4.2542, "step": 582 }, { "epoch": 0.03, "grad_norm": 0.7391511797904968, "learning_rate": 0.0005998657296850509, "loss": 4.293, "step": 583 }, { "epoch": 0.03, "grad_norm": 0.6230476498603821, "learning_rate": 0.0005998652687059434, "loss": 4.1702, "step": 584 }, { "epoch": 0.03, "grad_norm": 0.6014927625656128, "learning_rate": 0.0005998648069370485, "loss": 4.0259, "step": 585 }, { "epoch": 0.03, "grad_norm": 0.6436325311660767, "learning_rate": 0.0005998643443783674, "loss": 4.1263, "step": 586 }, { "epoch": 0.03, "grad_norm": 0.6496565937995911, "learning_rate": 0.0005998638810299013, "loss": 4.0934, "step": 587 }, { "epoch": 0.03, "grad_norm": 0.5946733951568604, "learning_rate": 0.0005998634168916515, "loss": 4.3685, "step": 588 }, { "epoch": 0.03, "grad_norm": 0.5709477066993713, "learning_rate": 0.0005998629519636191, "loss": 4.0456, "step": 589 }, { "epoch": 0.03, "grad_norm": 0.6664632558822632, "learning_rate": 0.0005998624862458054, "loss": 4.2432, "step": 590 }, { "epoch": 0.03, "grad_norm": 0.6627117395401001, "learning_rate": 0.0005998620197382117, "loss": 4.492, "step": 591 }, { "epoch": 0.03, "grad_norm": 0.6205899715423584, "learning_rate": 0.0005998615524408391, "loss": 4.3008, "step": 592 }, { "epoch": 0.03, "grad_norm": 0.6240425109863281, "learning_rate": 0.0005998610843536888, "loss": 4.4099, "step": 593 }, { "epoch": 0.03, "grad_norm": 0.6774874329566956, "learning_rate": 0.0005998606154767621, "loss": 4.2297, "step": 594 }, { "epoch": 0.03, "grad_norm": 0.6011747121810913, "learning_rate": 0.0005998601458100603, "loss": 4.2218, "step": 595 }, { "epoch": 0.03, "grad_norm": 0.6347803473472595, "learning_rate": 0.0005998596753535847, "loss": 4.2884, "step": 596 }, { "epoch": 0.03, "grad_norm": 0.6915126442909241, "learning_rate": 0.0005998592041073364, "loss": 4.3879, "step": 597 }, { "epoch": 0.03, "grad_norm": 0.6727957129478455, "learning_rate": 0.0005998587320713165, "loss": 4.0906, "step": 598 }, { "epoch": 0.03, "grad_norm": 0.6125076413154602, "learning_rate": 0.0005998582592455266, "loss": 4.2448, "step": 599 }, { "epoch": 0.03, "grad_norm": 0.6534925699234009, "learning_rate": 0.0005998577856299677, "loss": 4.1813, "step": 600 }, { "epoch": 0.03, "grad_norm": 0.6901077628135681, "learning_rate": 0.0005998573112246411, "loss": 4.0296, "step": 601 }, { "epoch": 0.03, "grad_norm": 0.6255195140838623, "learning_rate": 0.000599856836029548, "loss": 4.22, "step": 602 }, { "epoch": 0.03, "grad_norm": 0.6073751449584961, "learning_rate": 0.0005998563600446898, "loss": 4.3775, "step": 603 }, { "epoch": 0.03, "grad_norm": 0.6304476857185364, "learning_rate": 0.0005998558832700675, "loss": 4.5508, "step": 604 }, { "epoch": 0.03, "grad_norm": 0.6237664222717285, "learning_rate": 0.0005998554057056825, "loss": 4.3309, "step": 605 }, { "epoch": 0.03, "grad_norm": 0.6360412836074829, "learning_rate": 0.0005998549273515362, "loss": 4.2754, "step": 606 }, { "epoch": 0.03, "grad_norm": 0.6329114437103271, "learning_rate": 0.0005998544482076297, "loss": 4.4678, "step": 607 }, { "epoch": 0.03, "grad_norm": 0.6190249919891357, "learning_rate": 0.0005998539682739643, "loss": 4.0623, "step": 608 }, { "epoch": 0.03, "grad_norm": 0.7699590921401978, "learning_rate": 0.0005998534875505413, "loss": 4.368, "step": 609 }, { "epoch": 0.03, "grad_norm": 0.6379511952400208, "learning_rate": 0.0005998530060373618, "loss": 4.0845, "step": 610 }, { "epoch": 0.03, "grad_norm": 0.6318610310554504, "learning_rate": 0.0005998525237344272, "loss": 4.1786, "step": 611 }, { "epoch": 0.03, "grad_norm": 0.654819905757904, "learning_rate": 0.0005998520406417388, "loss": 4.219, "step": 612 }, { "epoch": 0.03, "grad_norm": 0.6683518886566162, "learning_rate": 0.0005998515567592979, "loss": 4.333, "step": 613 }, { "epoch": 0.03, "grad_norm": 0.6329825520515442, "learning_rate": 0.0005998510720871057, "loss": 4.2552, "step": 614 }, { "epoch": 0.03, "grad_norm": 0.6308939456939697, "learning_rate": 0.0005998505866251635, "loss": 4.142, "step": 615 }, { "epoch": 0.03, "grad_norm": 0.557669460773468, "learning_rate": 0.0005998501003734724, "loss": 4.2415, "step": 616 }, { "epoch": 0.03, "grad_norm": 0.6747656464576721, "learning_rate": 0.0005998496133320339, "loss": 4.3223, "step": 617 }, { "epoch": 0.03, "grad_norm": 0.6284106373786926, "learning_rate": 0.0005998491255008494, "loss": 4.3184, "step": 618 }, { "epoch": 0.03, "grad_norm": 0.6386296153068542, "learning_rate": 0.0005998486368799197, "loss": 4.0359, "step": 619 }, { "epoch": 0.03, "grad_norm": 0.6140933632850647, "learning_rate": 0.0005998481474692466, "loss": 4.0157, "step": 620 }, { "epoch": 0.03, "grad_norm": 0.6379926204681396, "learning_rate": 0.0005998476572688312, "loss": 4.1319, "step": 621 }, { "epoch": 0.03, "grad_norm": 0.5984822511672974, "learning_rate": 0.0005998471662786747, "loss": 4.157, "step": 622 }, { "epoch": 0.03, "grad_norm": 0.5686042904853821, "learning_rate": 0.0005998466744987786, "loss": 4.2332, "step": 623 }, { "epoch": 0.03, "grad_norm": 0.6098901629447937, "learning_rate": 0.0005998461819291439, "loss": 4.0068, "step": 624 }, { "epoch": 0.03, "grad_norm": 0.7193558216094971, "learning_rate": 0.0005998456885697722, "loss": 4.1709, "step": 625 }, { "epoch": 0.03, "grad_norm": 0.6442568898200989, "learning_rate": 0.0005998451944206646, "loss": 4.2862, "step": 626 }, { "epoch": 0.03, "grad_norm": 0.610493004322052, "learning_rate": 0.0005998446994818225, "loss": 4.1749, "step": 627 }, { "epoch": 0.03, "grad_norm": 0.6829988360404968, "learning_rate": 0.0005998442037532471, "loss": 4.2972, "step": 628 }, { "epoch": 0.03, "grad_norm": 0.6680061221122742, "learning_rate": 0.0005998437072349397, "loss": 4.1303, "step": 629 }, { "epoch": 0.03, "grad_norm": 0.6559966206550598, "learning_rate": 0.0005998432099269019, "loss": 4.1799, "step": 630 }, { "epoch": 0.03, "grad_norm": 0.610496461391449, "learning_rate": 0.0005998427118291347, "loss": 4.2051, "step": 631 }, { "epoch": 0.03, "grad_norm": 0.5574377775192261, "learning_rate": 0.0005998422129416393, "loss": 4.2476, "step": 632 }, { "epoch": 0.03, "grad_norm": 0.6652995347976685, "learning_rate": 0.0005998417132644175, "loss": 4.1745, "step": 633 }, { "epoch": 0.03, "grad_norm": 0.609061598777771, "learning_rate": 0.0005998412127974702, "loss": 4.3128, "step": 634 }, { "epoch": 0.03, "grad_norm": 0.6088485717773438, "learning_rate": 0.0005998407115407989, "loss": 4.3749, "step": 635 }, { "epoch": 0.03, "grad_norm": 0.6192953586578369, "learning_rate": 0.0005998402094944048, "loss": 3.9438, "step": 636 }, { "epoch": 0.03, "grad_norm": 0.6353934407234192, "learning_rate": 0.0005998397066582894, "loss": 4.1745, "step": 637 }, { "epoch": 0.03, "grad_norm": 1.0005871057510376, "learning_rate": 0.0005998392030324539, "loss": 4.5231, "step": 638 }, { "epoch": 0.03, "grad_norm": 0.5929376482963562, "learning_rate": 0.0005998386986168996, "loss": 4.0178, "step": 639 }, { "epoch": 0.03, "grad_norm": 0.6462288498878479, "learning_rate": 0.0005998381934116279, "loss": 4.1872, "step": 640 }, { "epoch": 0.03, "grad_norm": 0.6652570366859436, "learning_rate": 0.0005998376874166401, "loss": 4.4516, "step": 641 }, { "epoch": 0.03, "grad_norm": 0.6204379200935364, "learning_rate": 0.0005998371806319375, "loss": 4.2446, "step": 642 }, { "epoch": 0.03, "grad_norm": 0.6597891449928284, "learning_rate": 0.0005998366730575216, "loss": 4.1447, "step": 643 }, { "epoch": 0.03, "grad_norm": 0.708060622215271, "learning_rate": 0.0005998361646933934, "loss": 4.3571, "step": 644 }, { "epoch": 0.03, "grad_norm": 0.5907878279685974, "learning_rate": 0.0005998356555395546, "loss": 4.0995, "step": 645 }, { "epoch": 0.03, "grad_norm": 0.5822071433067322, "learning_rate": 0.0005998351455960062, "loss": 4.3305, "step": 646 }, { "epoch": 0.03, "grad_norm": 0.6510571837425232, "learning_rate": 0.0005998346348627499, "loss": 4.2952, "step": 647 }, { "epoch": 0.03, "grad_norm": 0.624159574508667, "learning_rate": 0.0005998341233397869, "loss": 4.1151, "step": 648 }, { "epoch": 0.03, "grad_norm": 0.5877444744110107, "learning_rate": 0.0005998336110271185, "loss": 4.1762, "step": 649 }, { "epoch": 0.03, "grad_norm": 0.664666473865509, "learning_rate": 0.0005998330979247459, "loss": 4.0597, "step": 650 }, { "epoch": 0.03, "grad_norm": 0.5993242263793945, "learning_rate": 0.0005998325840326708, "loss": 4.3319, "step": 651 }, { "epoch": 0.03, "grad_norm": 0.603900671005249, "learning_rate": 0.0005998320693508944, "loss": 4.1354, "step": 652 }, { "epoch": 0.03, "grad_norm": 0.5658103227615356, "learning_rate": 0.000599831553879418, "loss": 4.3008, "step": 653 }, { "epoch": 0.03, "grad_norm": 0.588505208492279, "learning_rate": 0.0005998310376182429, "loss": 3.9547, "step": 654 }, { "epoch": 0.03, "grad_norm": 0.6646924018859863, "learning_rate": 0.0005998305205673707, "loss": 4.3278, "step": 655 }, { "epoch": 0.03, "grad_norm": 0.5992798805236816, "learning_rate": 0.0005998300027268024, "loss": 4.0916, "step": 656 }, { "epoch": 0.03, "grad_norm": 0.6579504013061523, "learning_rate": 0.0005998294840965397, "loss": 4.2061, "step": 657 }, { "epoch": 0.03, "grad_norm": 0.6638225317001343, "learning_rate": 0.0005998289646765839, "loss": 4.3004, "step": 658 }, { "epoch": 0.03, "grad_norm": 0.6212108135223389, "learning_rate": 0.0005998284444669363, "loss": 4.25, "step": 659 }, { "epoch": 0.03, "grad_norm": 0.7648120522499084, "learning_rate": 0.0005998279234675982, "loss": 3.9266, "step": 660 }, { "epoch": 0.03, "grad_norm": 0.6046731472015381, "learning_rate": 0.0005998274016785711, "loss": 4.0652, "step": 661 }, { "epoch": 0.03, "grad_norm": 0.6333780884742737, "learning_rate": 0.0005998268790998563, "loss": 4.2736, "step": 662 }, { "epoch": 0.03, "grad_norm": 0.6316869854927063, "learning_rate": 0.0005998263557314553, "loss": 4.2631, "step": 663 }, { "epoch": 0.03, "grad_norm": 0.593485951423645, "learning_rate": 0.0005998258315733692, "loss": 4.3236, "step": 664 }, { "epoch": 0.03, "grad_norm": 0.633284330368042, "learning_rate": 0.0005998253066255997, "loss": 4.0417, "step": 665 }, { "epoch": 0.03, "grad_norm": 0.5952419638633728, "learning_rate": 0.000599824780888148, "loss": 4.3424, "step": 666 }, { "epoch": 0.03, "grad_norm": 0.6344154477119446, "learning_rate": 0.0005998242543610155, "loss": 4.1042, "step": 667 }, { "epoch": 0.03, "grad_norm": 0.6327022910118103, "learning_rate": 0.0005998237270442037, "loss": 4.174, "step": 668 }, { "epoch": 0.03, "grad_norm": 0.7157790660858154, "learning_rate": 0.0005998231989377139, "loss": 4.0206, "step": 669 }, { "epoch": 0.03, "grad_norm": 0.6288419365882874, "learning_rate": 0.0005998226700415474, "loss": 4.3456, "step": 670 }, { "epoch": 0.03, "grad_norm": 0.6706027984619141, "learning_rate": 0.0005998221403557058, "loss": 3.9778, "step": 671 }, { "epoch": 0.03, "grad_norm": 0.6601995825767517, "learning_rate": 0.0005998216098801904, "loss": 4.2968, "step": 672 }, { "epoch": 0.03, "grad_norm": 0.6394633650779724, "learning_rate": 0.0005998210786150024, "loss": 4.3944, "step": 673 }, { "epoch": 0.03, "grad_norm": 0.7500145435333252, "learning_rate": 0.0005998205465601435, "loss": 4.0035, "step": 674 }, { "epoch": 0.03, "grad_norm": 0.6336989998817444, "learning_rate": 0.000599820013715615, "loss": 4.241, "step": 675 }, { "epoch": 0.03, "grad_norm": 0.6461735367774963, "learning_rate": 0.0005998194800814182, "loss": 4.223, "step": 676 }, { "epoch": 0.03, "grad_norm": 0.6548101902008057, "learning_rate": 0.0005998189456575547, "loss": 4.1025, "step": 677 }, { "epoch": 0.03, "grad_norm": 0.6322773694992065, "learning_rate": 0.0005998184104440257, "loss": 4.2219, "step": 678 }, { "epoch": 0.03, "grad_norm": 0.6030046939849854, "learning_rate": 0.0005998178744408328, "loss": 4.1642, "step": 679 }, { "epoch": 0.03, "grad_norm": 0.6334202289581299, "learning_rate": 0.0005998173376479773, "loss": 4.2389, "step": 680 }, { "epoch": 0.03, "grad_norm": 0.6108695864677429, "learning_rate": 0.0005998168000654606, "loss": 4.0175, "step": 681 }, { "epoch": 0.03, "grad_norm": 0.6236847639083862, "learning_rate": 0.0005998162616932841, "loss": 4.3389, "step": 682 }, { "epoch": 0.03, "grad_norm": 0.6061368584632874, "learning_rate": 0.0005998157225314493, "loss": 4.2202, "step": 683 }, { "epoch": 0.03, "grad_norm": 0.6532244682312012, "learning_rate": 0.0005998151825799576, "loss": 4.0348, "step": 684 }, { "epoch": 0.03, "grad_norm": 0.5872693061828613, "learning_rate": 0.0005998146418388105, "loss": 4.2631, "step": 685 }, { "epoch": 0.03, "grad_norm": 0.6573641300201416, "learning_rate": 0.0005998141003080092, "loss": 4.1371, "step": 686 }, { "epoch": 0.03, "grad_norm": 0.6546458601951599, "learning_rate": 0.0005998135579875554, "loss": 4.1695, "step": 687 }, { "epoch": 0.03, "grad_norm": 0.6271075010299683, "learning_rate": 0.0005998130148774502, "loss": 4.2471, "step": 688 }, { "epoch": 0.03, "grad_norm": 0.5936674475669861, "learning_rate": 0.0005998124709776953, "loss": 4.1321, "step": 689 }, { "epoch": 0.03, "grad_norm": 0.5933693051338196, "learning_rate": 0.0005998119262882921, "loss": 4.1484, "step": 690 }, { "epoch": 0.03, "grad_norm": 0.6198687553405762, "learning_rate": 0.000599811380809242, "loss": 4.4228, "step": 691 }, { "epoch": 0.03, "grad_norm": 0.6083481907844543, "learning_rate": 0.0005998108345405465, "loss": 4.266, "step": 692 }, { "epoch": 0.03, "grad_norm": 0.6238859295845032, "learning_rate": 0.0005998102874822068, "loss": 4.3885, "step": 693 }, { "epoch": 0.03, "grad_norm": 0.6317378878593445, "learning_rate": 0.0005998097396342245, "loss": 4.2074, "step": 694 }, { "epoch": 0.03, "grad_norm": 0.6172046661376953, "learning_rate": 0.0005998091909966011, "loss": 4.0928, "step": 695 }, { "epoch": 0.03, "grad_norm": 0.6623038649559021, "learning_rate": 0.000599808641569338, "loss": 4.0324, "step": 696 }, { "epoch": 0.03, "grad_norm": 0.6745285987854004, "learning_rate": 0.0005998080913524365, "loss": 4.3255, "step": 697 }, { "epoch": 0.03, "grad_norm": 0.5575293302536011, "learning_rate": 0.0005998075403458984, "loss": 4.2219, "step": 698 }, { "epoch": 0.03, "grad_norm": 0.6386272311210632, "learning_rate": 0.0005998069885497249, "loss": 4.236, "step": 699 }, { "epoch": 0.03, "grad_norm": 0.7148878574371338, "learning_rate": 0.0005998064359639173, "loss": 4.089, "step": 700 }, { "epoch": 0.03, "grad_norm": 0.6956779360771179, "learning_rate": 0.0005998058825884775, "loss": 4.1711, "step": 701 }, { "epoch": 0.03, "grad_norm": 0.610063910484314, "learning_rate": 0.0005998053284234067, "loss": 4.3074, "step": 702 }, { "epoch": 0.03, "grad_norm": 0.7103130221366882, "learning_rate": 0.0005998047734687062, "loss": 4.1535, "step": 703 }, { "epoch": 0.03, "grad_norm": 0.6960937976837158, "learning_rate": 0.0005998042177243776, "loss": 3.9969, "step": 704 }, { "epoch": 0.03, "grad_norm": 0.7523322105407715, "learning_rate": 0.0005998036611904225, "loss": 3.8138, "step": 705 }, { "epoch": 0.03, "grad_norm": 0.6804961562156677, "learning_rate": 0.0005998031038668422, "loss": 4.1109, "step": 706 }, { "epoch": 0.03, "grad_norm": 0.6763580441474915, "learning_rate": 0.0005998025457536382, "loss": 3.9702, "step": 707 }, { "epoch": 0.03, "grad_norm": 0.8153755068778992, "learning_rate": 0.0005998019868508121, "loss": 4.0995, "step": 708 }, { "epoch": 0.03, "grad_norm": 0.6804799437522888, "learning_rate": 0.0005998014271583652, "loss": 4.2456, "step": 709 }, { "epoch": 0.03, "grad_norm": 0.6983351707458496, "learning_rate": 0.0005998008666762989, "loss": 3.9431, "step": 710 }, { "epoch": 0.03, "grad_norm": 0.6080770492553711, "learning_rate": 0.0005998003054046151, "loss": 4.277, "step": 711 }, { "epoch": 0.03, "grad_norm": 0.7080684900283813, "learning_rate": 0.0005997997433433148, "loss": 4.0764, "step": 712 }, { "epoch": 0.03, "grad_norm": 0.6443161964416504, "learning_rate": 0.0005997991804923997, "loss": 4.0913, "step": 713 }, { "epoch": 0.03, "grad_norm": 0.6476138234138489, "learning_rate": 0.0005997986168518713, "loss": 4.2866, "step": 714 }, { "epoch": 0.04, "grad_norm": 0.603281557559967, "learning_rate": 0.000599798052421731, "loss": 4.1755, "step": 715 }, { "epoch": 0.04, "grad_norm": 0.6660357713699341, "learning_rate": 0.0005997974872019804, "loss": 4.0975, "step": 716 }, { "epoch": 0.04, "grad_norm": 0.6493331789970398, "learning_rate": 0.0005997969211926208, "loss": 4.2041, "step": 717 }, { "epoch": 0.04, "grad_norm": 0.6349976062774658, "learning_rate": 0.000599796354393654, "loss": 4.0962, "step": 718 }, { "epoch": 0.04, "grad_norm": 0.6662265062332153, "learning_rate": 0.000599795786805081, "loss": 4.0407, "step": 719 }, { "epoch": 0.04, "grad_norm": 0.5812216997146606, "learning_rate": 0.0005997952184269038, "loss": 4.2963, "step": 720 }, { "epoch": 0.04, "grad_norm": 0.5997583866119385, "learning_rate": 0.0005997946492591237, "loss": 4.0069, "step": 721 }, { "epoch": 0.04, "grad_norm": 0.6585355997085571, "learning_rate": 0.0005997940793017422, "loss": 4.0195, "step": 722 }, { "epoch": 0.04, "grad_norm": 0.6478659510612488, "learning_rate": 0.0005997935085547606, "loss": 4.1715, "step": 723 }, { "epoch": 0.04, "grad_norm": 0.6393293142318726, "learning_rate": 0.0005997929370181809, "loss": 4.086, "step": 724 }, { "epoch": 0.04, "grad_norm": 0.6129328608512878, "learning_rate": 0.0005997923646920041, "loss": 4.2473, "step": 725 }, { "epoch": 0.04, "grad_norm": 0.6000876426696777, "learning_rate": 0.0005997917915762319, "loss": 3.8759, "step": 726 }, { "epoch": 0.04, "grad_norm": 0.6004976034164429, "learning_rate": 0.0005997912176708657, "loss": 4.2669, "step": 727 }, { "epoch": 0.04, "grad_norm": 0.698418140411377, "learning_rate": 0.0005997906429759074, "loss": 4.2933, "step": 728 }, { "epoch": 0.04, "grad_norm": 0.6390034556388855, "learning_rate": 0.0005997900674913581, "loss": 4.2472, "step": 729 }, { "epoch": 0.04, "grad_norm": 0.6226664781570435, "learning_rate": 0.0005997894912172196, "loss": 4.0674, "step": 730 }, { "epoch": 0.04, "grad_norm": 0.6232365965843201, "learning_rate": 0.0005997889141534931, "loss": 4.3033, "step": 731 }, { "epoch": 0.04, "grad_norm": 0.6623662114143372, "learning_rate": 0.0005997883363001803, "loss": 4.1758, "step": 732 }, { "epoch": 0.04, "grad_norm": 0.9481949806213379, "learning_rate": 0.0005997877576572828, "loss": 4.5139, "step": 733 }, { "epoch": 0.04, "grad_norm": 0.6264766454696655, "learning_rate": 0.000599787178224802, "loss": 4.109, "step": 734 }, { "epoch": 0.04, "grad_norm": 0.6743102073669434, "learning_rate": 0.0005997865980027395, "loss": 4.422, "step": 735 }, { "epoch": 0.04, "grad_norm": 0.688208281993866, "learning_rate": 0.0005997860169910968, "loss": 4.1469, "step": 736 }, { "epoch": 0.04, "grad_norm": 0.5970245003700256, "learning_rate": 0.0005997854351898754, "loss": 4.2083, "step": 737 }, { "epoch": 0.04, "grad_norm": 0.6264445781707764, "learning_rate": 0.0005997848525990769, "loss": 4.251, "step": 738 }, { "epoch": 0.04, "grad_norm": 0.6513589024543762, "learning_rate": 0.0005997842692187027, "loss": 3.8674, "step": 739 }, { "epoch": 0.04, "grad_norm": 0.6829968690872192, "learning_rate": 0.0005997836850487545, "loss": 3.9729, "step": 740 }, { "epoch": 0.04, "grad_norm": 0.6139025688171387, "learning_rate": 0.0005997831000892338, "loss": 4.1896, "step": 741 }, { "epoch": 0.04, "grad_norm": 0.6279214024543762, "learning_rate": 0.0005997825143401421, "loss": 4.1086, "step": 742 }, { "epoch": 0.04, "grad_norm": 0.6188284158706665, "learning_rate": 0.0005997819278014808, "loss": 4.0587, "step": 743 }, { "epoch": 0.04, "grad_norm": 0.6335148811340332, "learning_rate": 0.0005997813404732517, "loss": 3.9916, "step": 744 }, { "epoch": 0.04, "grad_norm": 0.643291711807251, "learning_rate": 0.0005997807523554563, "loss": 4.2194, "step": 745 }, { "epoch": 0.04, "grad_norm": 0.6920807957649231, "learning_rate": 0.000599780163448096, "loss": 3.9606, "step": 746 }, { "epoch": 0.04, "grad_norm": 0.6325848698616028, "learning_rate": 0.0005997795737511725, "loss": 4.0133, "step": 747 }, { "epoch": 0.04, "grad_norm": 0.6075696349143982, "learning_rate": 0.0005997789832646874, "loss": 4.1146, "step": 748 }, { "epoch": 0.04, "grad_norm": 0.658141016960144, "learning_rate": 0.000599778391988642, "loss": 4.1188, "step": 749 }, { "epoch": 0.04, "grad_norm": 0.5975100994110107, "learning_rate": 0.000599777799923038, "loss": 4.1244, "step": 750 }, { "epoch": 0.04, "grad_norm": 0.6382097601890564, "learning_rate": 0.0005997772070678771, "loss": 4.0732, "step": 751 }, { "epoch": 0.04, "grad_norm": 0.623789370059967, "learning_rate": 0.0005997766134231606, "loss": 4.1322, "step": 752 }, { "epoch": 0.04, "grad_norm": 0.6385670900344849, "learning_rate": 0.0005997760189888902, "loss": 4.1762, "step": 753 }, { "epoch": 0.04, "grad_norm": 0.640335738658905, "learning_rate": 0.0005997754237650675, "loss": 4.0456, "step": 754 }, { "epoch": 0.04, "grad_norm": 0.6698756814002991, "learning_rate": 0.0005997748277516941, "loss": 4.3383, "step": 755 }, { "epoch": 0.04, "grad_norm": 0.6713323593139648, "learning_rate": 0.0005997742309487714, "loss": 4.0747, "step": 756 }, { "epoch": 0.04, "grad_norm": 0.61420738697052, "learning_rate": 0.0005997736333563011, "loss": 4.1113, "step": 757 }, { "epoch": 0.04, "grad_norm": 0.6245611310005188, "learning_rate": 0.0005997730349742847, "loss": 4.3387, "step": 758 }, { "epoch": 0.04, "grad_norm": 0.5801929235458374, "learning_rate": 0.0005997724358027238, "loss": 4.0914, "step": 759 }, { "epoch": 0.04, "grad_norm": 0.6528918743133545, "learning_rate": 0.0005997718358416201, "loss": 4.148, "step": 760 }, { "epoch": 0.04, "grad_norm": 0.5901684761047363, "learning_rate": 0.000599771235090975, "loss": 4.0853, "step": 761 }, { "epoch": 0.04, "grad_norm": 0.6030728220939636, "learning_rate": 0.0005997706335507902, "loss": 4.071, "step": 762 }, { "epoch": 0.04, "grad_norm": 0.624138355255127, "learning_rate": 0.0005997700312210672, "loss": 4.134, "step": 763 }, { "epoch": 0.04, "grad_norm": 0.647909939289093, "learning_rate": 0.0005997694281018077, "loss": 4.2178, "step": 764 }, { "epoch": 0.04, "grad_norm": 0.6185994148254395, "learning_rate": 0.0005997688241930131, "loss": 4.2237, "step": 765 }, { "epoch": 0.04, "grad_norm": 0.6649905443191528, "learning_rate": 0.0005997682194946852, "loss": 4.4995, "step": 766 }, { "epoch": 0.04, "grad_norm": 0.6107615828514099, "learning_rate": 0.0005997676140068255, "loss": 4.1487, "step": 767 }, { "epoch": 0.04, "grad_norm": 0.6697407364845276, "learning_rate": 0.0005997670077294355, "loss": 4.0481, "step": 768 }, { "epoch": 0.04, "grad_norm": 0.6354249715805054, "learning_rate": 0.000599766400662517, "loss": 4.0766, "step": 769 }, { "epoch": 0.04, "grad_norm": 0.63181072473526, "learning_rate": 0.0005997657928060715, "loss": 3.8411, "step": 770 }, { "epoch": 0.04, "grad_norm": 0.6216081976890564, "learning_rate": 0.0005997651841601006, "loss": 4.1454, "step": 771 }, { "epoch": 0.04, "grad_norm": 0.6391425728797913, "learning_rate": 0.0005997645747246058, "loss": 4.0997, "step": 772 }, { "epoch": 0.04, "grad_norm": 0.6952412724494934, "learning_rate": 0.0005997639644995888, "loss": 4.2179, "step": 773 }, { "epoch": 0.04, "grad_norm": 0.6759364008903503, "learning_rate": 0.0005997633534850514, "loss": 4.1654, "step": 774 }, { "epoch": 0.04, "grad_norm": 0.6045799851417542, "learning_rate": 0.0005997627416809948, "loss": 4.2034, "step": 775 }, { "epoch": 0.04, "grad_norm": 0.6865895390510559, "learning_rate": 0.0005997621290874209, "loss": 4.0764, "step": 776 }, { "epoch": 0.04, "grad_norm": 0.6113295555114746, "learning_rate": 0.0005997615157043312, "loss": 4.2651, "step": 777 }, { "epoch": 0.04, "grad_norm": 0.687916100025177, "learning_rate": 0.0005997609015317275, "loss": 4.2646, "step": 778 }, { "epoch": 0.04, "grad_norm": 0.6994914412498474, "learning_rate": 0.0005997602865696111, "loss": 3.9585, "step": 779 }, { "epoch": 0.04, "grad_norm": 0.6061134338378906, "learning_rate": 0.0005997596708179839, "loss": 4.0875, "step": 780 }, { "epoch": 0.04, "grad_norm": 0.606025218963623, "learning_rate": 0.0005997590542768475, "loss": 4.321, "step": 781 }, { "epoch": 0.04, "grad_norm": 0.6358824968338013, "learning_rate": 0.0005997584369462033, "loss": 4.1621, "step": 782 }, { "epoch": 0.04, "grad_norm": 0.6684304475784302, "learning_rate": 0.0005997578188260531, "loss": 4.2266, "step": 783 }, { "epoch": 0.04, "grad_norm": 0.5966183543205261, "learning_rate": 0.0005997571999163985, "loss": 4.0322, "step": 784 }, { "epoch": 0.04, "grad_norm": 0.5628737211227417, "learning_rate": 0.0005997565802172411, "loss": 3.8574, "step": 785 }, { "epoch": 0.04, "grad_norm": 0.7218655347824097, "learning_rate": 0.0005997559597285825, "loss": 3.857, "step": 786 }, { "epoch": 0.04, "grad_norm": 0.6253189444541931, "learning_rate": 0.0005997553384504246, "loss": 4.1211, "step": 787 }, { "epoch": 0.04, "grad_norm": 0.7218075394630432, "learning_rate": 0.0005997547163827686, "loss": 3.6799, "step": 788 }, { "epoch": 0.04, "grad_norm": 0.5922057628631592, "learning_rate": 0.0005997540935256165, "loss": 4.3404, "step": 789 }, { "epoch": 0.04, "grad_norm": 0.6513999700546265, "learning_rate": 0.0005997534698789698, "loss": 4.0325, "step": 790 }, { "epoch": 0.04, "grad_norm": 0.6125958561897278, "learning_rate": 0.0005997528454428301, "loss": 3.9919, "step": 791 }, { "epoch": 0.04, "grad_norm": 0.6877921223640442, "learning_rate": 0.0005997522202171991, "loss": 4.1427, "step": 792 }, { "epoch": 0.04, "grad_norm": 0.6257963180541992, "learning_rate": 0.0005997515942020785, "loss": 3.9095, "step": 793 }, { "epoch": 0.04, "grad_norm": 0.6871378421783447, "learning_rate": 0.0005997509673974699, "loss": 4.2673, "step": 794 }, { "epoch": 0.04, "grad_norm": 0.7322933077812195, "learning_rate": 0.0005997503398033748, "loss": 4.0279, "step": 795 }, { "epoch": 0.04, "grad_norm": 0.6184875965118408, "learning_rate": 0.0005997497114197952, "loss": 4.0317, "step": 796 }, { "epoch": 0.04, "grad_norm": 0.620513916015625, "learning_rate": 0.0005997490822467323, "loss": 4.1609, "step": 797 }, { "epoch": 0.04, "grad_norm": 0.6280710101127625, "learning_rate": 0.0005997484522841882, "loss": 3.9997, "step": 798 }, { "epoch": 0.04, "grad_norm": 0.6425851583480835, "learning_rate": 0.0005997478215321642, "loss": 4.3098, "step": 799 }, { "epoch": 0.04, "grad_norm": 0.6624174118041992, "learning_rate": 0.0005997471899906623, "loss": 3.9097, "step": 800 }, { "epoch": 0.04, "grad_norm": 0.6578688621520996, "learning_rate": 0.0005997465576596839, "loss": 4.2133, "step": 801 }, { "epoch": 0.04, "grad_norm": 0.6733409762382507, "learning_rate": 0.0005997459245392308, "loss": 4.0564, "step": 802 }, { "epoch": 0.04, "grad_norm": 0.6345073580741882, "learning_rate": 0.0005997452906293045, "loss": 4.1252, "step": 803 }, { "epoch": 0.04, "grad_norm": 0.6336762309074402, "learning_rate": 0.0005997446559299069, "loss": 4.1081, "step": 804 }, { "epoch": 0.04, "grad_norm": 0.6436794996261597, "learning_rate": 0.0005997440204410395, "loss": 4.0327, "step": 805 }, { "epoch": 0.04, "grad_norm": 1.0349910259246826, "learning_rate": 0.0005997433841627042, "loss": 4.0259, "step": 806 }, { "epoch": 0.04, "grad_norm": 0.6200885772705078, "learning_rate": 0.0005997427470949023, "loss": 4.1537, "step": 807 }, { "epoch": 0.04, "grad_norm": 0.6146049499511719, "learning_rate": 0.0005997421092376358, "loss": 4.2592, "step": 808 }, { "epoch": 0.04, "grad_norm": 0.6383852958679199, "learning_rate": 0.0005997414705909062, "loss": 3.9864, "step": 809 }, { "epoch": 0.04, "grad_norm": 0.6226063966751099, "learning_rate": 0.0005997408311547153, "loss": 4.0929, "step": 810 }, { "epoch": 0.04, "grad_norm": 0.6670148968696594, "learning_rate": 0.0005997401909290648, "loss": 4.1746, "step": 811 }, { "epoch": 0.04, "grad_norm": 0.6614567637443542, "learning_rate": 0.0005997395499139562, "loss": 3.9935, "step": 812 }, { "epoch": 0.04, "grad_norm": 0.5947785973548889, "learning_rate": 0.0005997389081093914, "loss": 4.0051, "step": 813 }, { "epoch": 0.04, "grad_norm": 0.6563360691070557, "learning_rate": 0.000599738265515372, "loss": 4.1408, "step": 814 }, { "epoch": 0.04, "grad_norm": 0.626630961894989, "learning_rate": 0.0005997376221318996, "loss": 4.2392, "step": 815 }, { "epoch": 0.04, "grad_norm": 0.5788909792900085, "learning_rate": 0.0005997369779589762, "loss": 4.0375, "step": 816 }, { "epoch": 0.04, "grad_norm": 0.5952694416046143, "learning_rate": 0.0005997363329966031, "loss": 4.0138, "step": 817 }, { "epoch": 0.04, "grad_norm": 0.6254683136940002, "learning_rate": 0.0005997356872447822, "loss": 4.0292, "step": 818 }, { "epoch": 0.04, "grad_norm": 0.5747913718223572, "learning_rate": 0.0005997350407035153, "loss": 4.1415, "step": 819 }, { "epoch": 0.04, "grad_norm": 0.638823926448822, "learning_rate": 0.0005997343933728038, "loss": 4.1323, "step": 820 }, { "epoch": 0.04, "grad_norm": 0.6529001593589783, "learning_rate": 0.0005997337452526498, "loss": 4.0443, "step": 821 }, { "epoch": 0.04, "grad_norm": 0.62080317735672, "learning_rate": 0.0005997330963430547, "loss": 4.1244, "step": 822 }, { "epoch": 0.04, "grad_norm": 0.6070016622543335, "learning_rate": 0.0005997324466440202, "loss": 4.1765, "step": 823 }, { "epoch": 0.04, "grad_norm": 0.7610416412353516, "learning_rate": 0.0005997317961555483, "loss": 4.0887, "step": 824 }, { "epoch": 0.04, "grad_norm": 0.6447186470031738, "learning_rate": 0.0005997311448776404, "loss": 4.2312, "step": 825 }, { "epoch": 0.04, "grad_norm": 0.6255171298980713, "learning_rate": 0.0005997304928102985, "loss": 3.8122, "step": 826 }, { "epoch": 0.04, "grad_norm": 0.6879158616065979, "learning_rate": 0.0005997298399535241, "loss": 4.0435, "step": 827 }, { "epoch": 0.04, "grad_norm": 0.6047009229660034, "learning_rate": 0.000599729186307319, "loss": 4.0208, "step": 828 }, { "epoch": 0.04, "grad_norm": 0.5998377203941345, "learning_rate": 0.0005997285318716847, "loss": 4.055, "step": 829 }, { "epoch": 0.04, "grad_norm": 0.5977486371994019, "learning_rate": 0.0005997278766466233, "loss": 4.238, "step": 830 }, { "epoch": 0.04, "grad_norm": 0.5870335698127747, "learning_rate": 0.0005997272206321365, "loss": 3.7925, "step": 831 }, { "epoch": 0.04, "grad_norm": 0.6094736456871033, "learning_rate": 0.0005997265638282256, "loss": 4.2518, "step": 832 }, { "epoch": 0.04, "grad_norm": 0.6728547215461731, "learning_rate": 0.0005997259062348929, "loss": 4.1802, "step": 833 }, { "epoch": 0.04, "grad_norm": 0.5765520334243774, "learning_rate": 0.0005997252478521396, "loss": 3.8716, "step": 834 }, { "epoch": 0.04, "grad_norm": 0.604479193687439, "learning_rate": 0.0005997245886799679, "loss": 4.1638, "step": 835 }, { "epoch": 0.04, "grad_norm": 0.7261841297149658, "learning_rate": 0.0005997239287183791, "loss": 4.0187, "step": 836 }, { "epoch": 0.04, "grad_norm": 0.6174228191375732, "learning_rate": 0.0005997232679673752, "loss": 4.0628, "step": 837 }, { "epoch": 0.04, "grad_norm": 0.5952420234680176, "learning_rate": 0.0005997226064269579, "loss": 4.1506, "step": 838 }, { "epoch": 0.04, "grad_norm": 0.5986089110374451, "learning_rate": 0.0005997219440971291, "loss": 4.0894, "step": 839 }, { "epoch": 0.04, "grad_norm": 0.5981305241584778, "learning_rate": 0.0005997212809778903, "loss": 3.893, "step": 840 }, { "epoch": 0.04, "grad_norm": 0.747908353805542, "learning_rate": 0.0005997206170692432, "loss": 4.2173, "step": 841 }, { "epoch": 0.04, "grad_norm": 0.6060889959335327, "learning_rate": 0.0005997199523711899, "loss": 4.1177, "step": 842 }, { "epoch": 0.04, "grad_norm": 0.6315199732780457, "learning_rate": 0.0005997192868837317, "loss": 4.1163, "step": 843 }, { "epoch": 0.04, "grad_norm": 0.6924545764923096, "learning_rate": 0.0005997186206068708, "loss": 3.9744, "step": 844 }, { "epoch": 0.04, "grad_norm": 0.6298468112945557, "learning_rate": 0.0005997179535406086, "loss": 4.0924, "step": 845 }, { "epoch": 0.04, "grad_norm": 0.5951611399650574, "learning_rate": 0.000599717285684947, "loss": 3.8841, "step": 846 }, { "epoch": 0.04, "grad_norm": 0.6348850727081299, "learning_rate": 0.0005997166170398876, "loss": 4.0741, "step": 847 }, { "epoch": 0.04, "grad_norm": 0.6831207275390625, "learning_rate": 0.0005997159476054326, "loss": 3.9413, "step": 848 }, { "epoch": 0.04, "grad_norm": 0.6047587394714355, "learning_rate": 0.0005997152773815834, "loss": 4.0532, "step": 849 }, { "epoch": 0.04, "grad_norm": 0.6082019805908203, "learning_rate": 0.0005997146063683417, "loss": 4.0897, "step": 850 }, { "epoch": 0.04, "grad_norm": 0.6189398169517517, "learning_rate": 0.0005997139345657095, "loss": 4.1492, "step": 851 }, { "epoch": 0.04, "grad_norm": 0.624375581741333, "learning_rate": 0.0005997132619736885, "loss": 4.3931, "step": 852 }, { "epoch": 0.04, "grad_norm": 0.6344230771064758, "learning_rate": 0.0005997125885922805, "loss": 3.903, "step": 853 }, { "epoch": 0.04, "grad_norm": 0.6171305179595947, "learning_rate": 0.0005997119144214872, "loss": 4.1388, "step": 854 }, { "epoch": 0.04, "grad_norm": 0.6373316645622253, "learning_rate": 0.0005997112394613102, "loss": 4.3171, "step": 855 }, { "epoch": 0.04, "grad_norm": 0.5979803204536438, "learning_rate": 0.0005997105637117516, "loss": 4.0232, "step": 856 }, { "epoch": 0.04, "grad_norm": 0.6416492462158203, "learning_rate": 0.0005997098871728131, "loss": 4.4098, "step": 857 }, { "epoch": 0.04, "grad_norm": 0.6283490061759949, "learning_rate": 0.0005997092098444964, "loss": 3.9947, "step": 858 }, { "epoch": 0.04, "grad_norm": 0.5568273663520813, "learning_rate": 0.0005997085317268033, "loss": 4.0547, "step": 859 }, { "epoch": 0.04, "grad_norm": 0.6507592797279358, "learning_rate": 0.0005997078528197356, "loss": 3.9341, "step": 860 }, { "epoch": 0.04, "grad_norm": 0.635384202003479, "learning_rate": 0.0005997071731232952, "loss": 4.0064, "step": 861 }, { "epoch": 0.04, "grad_norm": 0.5979812145233154, "learning_rate": 0.0005997064926374837, "loss": 3.8342, "step": 862 }, { "epoch": 0.04, "grad_norm": 0.6876376867294312, "learning_rate": 0.000599705811362303, "loss": 4.1938, "step": 863 }, { "epoch": 0.04, "grad_norm": 0.6582483649253845, "learning_rate": 0.0005997051292977549, "loss": 4.0655, "step": 864 }, { "epoch": 0.04, "grad_norm": 0.6212353706359863, "learning_rate": 0.0005997044464438413, "loss": 4.1384, "step": 865 }, { "epoch": 0.04, "grad_norm": 0.5523200631141663, "learning_rate": 0.0005997037628005636, "loss": 4.1858, "step": 866 }, { "epoch": 0.04, "grad_norm": 0.6378701329231262, "learning_rate": 0.0005997030783679241, "loss": 4.112, "step": 867 }, { "epoch": 0.04, "grad_norm": 0.5988345742225647, "learning_rate": 0.0005997023931459242, "loss": 4.095, "step": 868 }, { "epoch": 0.04, "grad_norm": 0.6165454983711243, "learning_rate": 0.0005997017071345659, "loss": 3.8559, "step": 869 }, { "epoch": 0.04, "grad_norm": 0.6014971733093262, "learning_rate": 0.000599701020333851, "loss": 3.8783, "step": 870 }, { "epoch": 0.04, "grad_norm": 0.6640937924385071, "learning_rate": 0.0005997003327437812, "loss": 4.1482, "step": 871 }, { "epoch": 0.04, "grad_norm": 0.6299766302108765, "learning_rate": 0.0005996996443643585, "loss": 4.0896, "step": 872 }, { "epoch": 0.04, "grad_norm": 0.6197918057441711, "learning_rate": 0.0005996989551955847, "loss": 3.9987, "step": 873 }, { "epoch": 0.04, "grad_norm": 0.6115261316299438, "learning_rate": 0.0005996982652374614, "loss": 4.0155, "step": 874 }, { "epoch": 0.04, "grad_norm": 0.6456928253173828, "learning_rate": 0.0005996975744899906, "loss": 4.2857, "step": 875 }, { "epoch": 0.04, "grad_norm": 0.6480864882469177, "learning_rate": 0.000599696882953174, "loss": 3.983, "step": 876 }, { "epoch": 0.04, "grad_norm": 0.6337156295776367, "learning_rate": 0.0005996961906270136, "loss": 4.0381, "step": 877 }, { "epoch": 0.04, "grad_norm": 0.610868513584137, "learning_rate": 0.0005996954975115111, "loss": 4.123, "step": 878 }, { "epoch": 0.04, "grad_norm": 0.6607982516288757, "learning_rate": 0.0005996948036066683, "loss": 4.2524, "step": 879 }, { "epoch": 0.04, "grad_norm": 0.6953883171081543, "learning_rate": 0.000599694108912487, "loss": 3.9404, "step": 880 }, { "epoch": 0.04, "grad_norm": 0.6293426156044006, "learning_rate": 0.0005996934134289692, "loss": 3.9467, "step": 881 }, { "epoch": 0.04, "grad_norm": 0.6299402117729187, "learning_rate": 0.0005996927171561166, "loss": 4.2004, "step": 882 }, { "epoch": 0.04, "grad_norm": 0.7695729732513428, "learning_rate": 0.0005996920200939309, "loss": 3.9446, "step": 883 }, { "epoch": 0.04, "grad_norm": 0.662330687046051, "learning_rate": 0.0005996913222424144, "loss": 4.1542, "step": 884 }, { "epoch": 0.04, "grad_norm": 0.6112087965011597, "learning_rate": 0.0005996906236015684, "loss": 4.1722, "step": 885 }, { "epoch": 0.04, "grad_norm": 0.6271827220916748, "learning_rate": 0.000599689924171395, "loss": 3.9089, "step": 886 }, { "epoch": 0.04, "grad_norm": 0.6005352735519409, "learning_rate": 0.0005996892239518961, "loss": 3.9845, "step": 887 }, { "epoch": 0.04, "grad_norm": 0.6041455864906311, "learning_rate": 0.0005996885229430734, "loss": 4.1323, "step": 888 }, { "epoch": 0.04, "grad_norm": 0.5910174250602722, "learning_rate": 0.0005996878211449289, "loss": 4.2648, "step": 889 }, { "epoch": 0.04, "grad_norm": 0.6130709648132324, "learning_rate": 0.0005996871185574643, "loss": 4.0298, "step": 890 }, { "epoch": 0.04, "grad_norm": 0.6232227087020874, "learning_rate": 0.0005996864151806815, "loss": 4.2897, "step": 891 }, { "epoch": 0.04, "grad_norm": 0.6949803233146667, "learning_rate": 0.0005996857110145823, "loss": 4.0198, "step": 892 }, { "epoch": 0.04, "grad_norm": 0.5966178774833679, "learning_rate": 0.0005996850060591687, "loss": 4.1471, "step": 893 }, { "epoch": 0.04, "grad_norm": 0.6501827836036682, "learning_rate": 0.0005996843003144424, "loss": 4.0682, "step": 894 }, { "epoch": 0.04, "grad_norm": 0.6076254844665527, "learning_rate": 0.0005996835937804054, "loss": 4.1748, "step": 895 }, { "epoch": 0.04, "grad_norm": 0.5926339030265808, "learning_rate": 0.0005996828864570594, "loss": 4.019, "step": 896 }, { "epoch": 0.04, "grad_norm": 0.6619225144386292, "learning_rate": 0.0005996821783444064, "loss": 3.9084, "step": 897 }, { "epoch": 0.04, "grad_norm": 0.6074878573417664, "learning_rate": 0.0005996814694424483, "loss": 4.0513, "step": 898 }, { "epoch": 0.04, "grad_norm": 0.7126258015632629, "learning_rate": 0.0005996807597511868, "loss": 4.0084, "step": 899 }, { "epoch": 0.04, "grad_norm": 0.6678495407104492, "learning_rate": 0.0005996800492706237, "loss": 3.9053, "step": 900 }, { "epoch": 0.04, "grad_norm": 0.6096392869949341, "learning_rate": 0.0005996793380007613, "loss": 3.9624, "step": 901 }, { "epoch": 0.04, "grad_norm": 0.6041682362556458, "learning_rate": 0.000599678625941601, "loss": 4.0106, "step": 902 }, { "epoch": 0.04, "grad_norm": 0.6434935331344604, "learning_rate": 0.000599677913093145, "loss": 4.0789, "step": 903 }, { "epoch": 0.04, "grad_norm": 0.7096984386444092, "learning_rate": 0.000599677199455395, "loss": 4.1337, "step": 904 }, { "epoch": 0.04, "grad_norm": 0.5979908108711243, "learning_rate": 0.0005996764850283529, "loss": 4.1843, "step": 905 }, { "epoch": 0.04, "grad_norm": 0.6335223913192749, "learning_rate": 0.0005996757698120206, "loss": 4.2159, "step": 906 }, { "epoch": 0.04, "grad_norm": 0.685057520866394, "learning_rate": 0.0005996750538064001, "loss": 3.8512, "step": 907 }, { "epoch": 0.04, "grad_norm": 0.6344968676567078, "learning_rate": 0.0005996743370114932, "loss": 4.1279, "step": 908 }, { "epoch": 0.04, "grad_norm": 0.6115414500236511, "learning_rate": 0.0005996736194273017, "loss": 4.1364, "step": 909 }, { "epoch": 0.04, "grad_norm": 0.6755195260047913, "learning_rate": 0.0005996729010538275, "loss": 4.0321, "step": 910 }, { "epoch": 0.04, "grad_norm": 0.5959694981575012, "learning_rate": 0.0005996721818910725, "loss": 4.2033, "step": 911 }, { "epoch": 0.04, "grad_norm": 0.6436964869499207, "learning_rate": 0.0005996714619390389, "loss": 4.0886, "step": 912 }, { "epoch": 0.04, "grad_norm": 0.6065239310264587, "learning_rate": 0.0005996707411977281, "loss": 4.1504, "step": 913 }, { "epoch": 0.04, "grad_norm": 0.5962178707122803, "learning_rate": 0.0005996700196671423, "loss": 3.8577, "step": 914 }, { "epoch": 0.04, "grad_norm": 0.6235368847846985, "learning_rate": 0.0005996692973472835, "loss": 4.1173, "step": 915 }, { "epoch": 0.04, "grad_norm": 0.6299294233322144, "learning_rate": 0.0005996685742381532, "loss": 3.9826, "step": 916 }, { "epoch": 0.04, "grad_norm": 0.6154347062110901, "learning_rate": 0.0005996678503397537, "loss": 4.0741, "step": 917 }, { "epoch": 0.04, "grad_norm": 0.6413784623146057, "learning_rate": 0.0005996671256520867, "loss": 3.9614, "step": 918 }, { "epoch": 0.05, "grad_norm": 0.631839394569397, "learning_rate": 0.0005996664001751542, "loss": 4.0662, "step": 919 }, { "epoch": 0.05, "grad_norm": 0.6405590772628784, "learning_rate": 0.0005996656739089581, "loss": 4.0365, "step": 920 }, { "epoch": 0.05, "grad_norm": 0.6293720006942749, "learning_rate": 0.0005996649468535002, "loss": 4.2496, "step": 921 }, { "epoch": 0.05, "grad_norm": 0.6363210082054138, "learning_rate": 0.0005996642190087825, "loss": 3.9949, "step": 922 }, { "epoch": 0.05, "grad_norm": 0.6418921947479248, "learning_rate": 0.000599663490374807, "loss": 4.0203, "step": 923 }, { "epoch": 0.05, "grad_norm": 0.6134077906608582, "learning_rate": 0.0005996627609515755, "loss": 3.9338, "step": 924 }, { "epoch": 0.05, "grad_norm": 0.6744232177734375, "learning_rate": 0.00059966203073909, "loss": 3.9921, "step": 925 }, { "epoch": 0.05, "grad_norm": 0.6086677312850952, "learning_rate": 0.0005996612997373524, "loss": 3.9985, "step": 926 }, { "epoch": 0.05, "grad_norm": 0.6225132942199707, "learning_rate": 0.0005996605679463644, "loss": 4.061, "step": 927 }, { "epoch": 0.05, "grad_norm": 0.6579686999320984, "learning_rate": 0.0005996598353661284, "loss": 4.1547, "step": 928 }, { "epoch": 0.05, "grad_norm": 0.6591225862503052, "learning_rate": 0.0005996591019966459, "loss": 4.1103, "step": 929 }, { "epoch": 0.05, "grad_norm": 0.601435661315918, "learning_rate": 0.0005996583678379191, "loss": 3.9549, "step": 930 }, { "epoch": 0.05, "grad_norm": 0.6121599674224854, "learning_rate": 0.0005996576328899497, "loss": 4.0187, "step": 931 }, { "epoch": 0.05, "grad_norm": 0.62762850522995, "learning_rate": 0.00059965689715274, "loss": 4.0643, "step": 932 }, { "epoch": 0.05, "grad_norm": 0.638881266117096, "learning_rate": 0.0005996561606262914, "loss": 4.0128, "step": 933 }, { "epoch": 0.05, "grad_norm": 0.5881949067115784, "learning_rate": 0.0005996554233106063, "loss": 3.9358, "step": 934 }, { "epoch": 0.05, "grad_norm": 0.6519078612327576, "learning_rate": 0.0005996546852056865, "loss": 3.9074, "step": 935 }, { "epoch": 0.05, "grad_norm": 0.6900700926780701, "learning_rate": 0.0005996539463115339, "loss": 4.0253, "step": 936 }, { "epoch": 0.05, "grad_norm": 0.6109591126441956, "learning_rate": 0.0005996532066281505, "loss": 4.1243, "step": 937 }, { "epoch": 0.05, "grad_norm": 0.6539261341094971, "learning_rate": 0.0005996524661555382, "loss": 4.1093, "step": 938 }, { "epoch": 0.05, "grad_norm": 0.6261901259422302, "learning_rate": 0.000599651724893699, "loss": 4.0016, "step": 939 }, { "epoch": 0.05, "grad_norm": 0.6098498702049255, "learning_rate": 0.0005996509828426347, "loss": 4.0939, "step": 940 }, { "epoch": 0.05, "grad_norm": 0.6256742477416992, "learning_rate": 0.0005996502400023475, "loss": 4.1468, "step": 941 }, { "epoch": 0.05, "grad_norm": 0.6049453616142273, "learning_rate": 0.0005996494963728392, "loss": 4.1078, "step": 942 }, { "epoch": 0.05, "grad_norm": 0.593472957611084, "learning_rate": 0.0005996487519541119, "loss": 4.2345, "step": 943 }, { "epoch": 0.05, "grad_norm": 0.6599992513656616, "learning_rate": 0.0005996480067461673, "loss": 4.0287, "step": 944 }, { "epoch": 0.05, "grad_norm": 0.6049973964691162, "learning_rate": 0.0005996472607490076, "loss": 3.9218, "step": 945 }, { "epoch": 0.05, "grad_norm": 0.6687982678413391, "learning_rate": 0.0005996465139626346, "loss": 4.0952, "step": 946 }, { "epoch": 0.05, "grad_norm": 0.6398851275444031, "learning_rate": 0.0005996457663870505, "loss": 3.7504, "step": 947 }, { "epoch": 0.05, "grad_norm": 0.6496148109436035, "learning_rate": 0.0005996450180222571, "loss": 3.9623, "step": 948 }, { "epoch": 0.05, "grad_norm": 0.6317514777183533, "learning_rate": 0.0005996442688682562, "loss": 4.1198, "step": 949 }, { "epoch": 0.05, "grad_norm": 0.6482225656509399, "learning_rate": 0.0005996435189250501, "loss": 3.883, "step": 950 }, { "epoch": 0.05, "grad_norm": 0.607512891292572, "learning_rate": 0.0005996427681926406, "loss": 4.0248, "step": 951 }, { "epoch": 0.05, "grad_norm": 0.6879410147666931, "learning_rate": 0.0005996420166710297, "loss": 4.0689, "step": 952 }, { "epoch": 0.05, "grad_norm": 0.6295230984687805, "learning_rate": 0.0005996412643602196, "loss": 4.2013, "step": 953 }, { "epoch": 0.05, "grad_norm": 0.6885456442832947, "learning_rate": 0.0005996405112602119, "loss": 4.1025, "step": 954 }, { "epoch": 0.05, "grad_norm": 0.5858647227287292, "learning_rate": 0.0005996397573710087, "loss": 4.0533, "step": 955 }, { "epoch": 0.05, "grad_norm": 0.6196497082710266, "learning_rate": 0.0005996390026926121, "loss": 4.094, "step": 956 }, { "epoch": 0.05, "grad_norm": 0.6471155285835266, "learning_rate": 0.0005996382472250241, "loss": 3.8297, "step": 957 }, { "epoch": 0.05, "grad_norm": 0.6373564004898071, "learning_rate": 0.0005996374909682465, "loss": 4.142, "step": 958 }, { "epoch": 0.05, "grad_norm": 0.6640136241912842, "learning_rate": 0.0005996367339222815, "loss": 4.2055, "step": 959 }, { "epoch": 0.05, "grad_norm": 0.6296126842498779, "learning_rate": 0.0005996359760871311, "loss": 3.9527, "step": 960 }, { "epoch": 0.05, "grad_norm": 0.6657034754753113, "learning_rate": 0.0005996352174627971, "loss": 4.1435, "step": 961 }, { "epoch": 0.05, "grad_norm": 0.632871687412262, "learning_rate": 0.0005996344580492817, "loss": 3.8951, "step": 962 }, { "epoch": 0.05, "grad_norm": 0.6474413275718689, "learning_rate": 0.0005996336978465866, "loss": 4.1062, "step": 963 }, { "epoch": 0.05, "grad_norm": 0.5804777145385742, "learning_rate": 0.0005996329368547141, "loss": 3.9748, "step": 964 }, { "epoch": 0.05, "grad_norm": 0.6162649989128113, "learning_rate": 0.0005996321750736662, "loss": 3.8642, "step": 965 }, { "epoch": 0.05, "grad_norm": 0.5436090230941772, "learning_rate": 0.0005996314125034447, "loss": 4.0974, "step": 966 }, { "epoch": 0.05, "grad_norm": 0.648061990737915, "learning_rate": 0.0005996306491440517, "loss": 3.9705, "step": 967 }, { "epoch": 0.05, "grad_norm": 0.6477342844009399, "learning_rate": 0.0005996298849954894, "loss": 4.1531, "step": 968 }, { "epoch": 0.05, "grad_norm": 0.6069393157958984, "learning_rate": 0.0005996291200577596, "loss": 4.0453, "step": 969 }, { "epoch": 0.05, "grad_norm": 0.6199972629547119, "learning_rate": 0.0005996283543308643, "loss": 4.2185, "step": 970 }, { "epoch": 0.05, "grad_norm": 0.6699131727218628, "learning_rate": 0.0005996275878148055, "loss": 4.0178, "step": 971 }, { "epoch": 0.05, "grad_norm": 0.7701675295829773, "learning_rate": 0.0005996268205095855, "loss": 3.9939, "step": 972 }, { "epoch": 0.05, "grad_norm": 0.5983409285545349, "learning_rate": 0.000599626052415206, "loss": 3.9987, "step": 973 }, { "epoch": 0.05, "grad_norm": 0.6925124526023865, "learning_rate": 0.0005996252835316692, "loss": 3.917, "step": 974 }, { "epoch": 0.05, "grad_norm": 0.6102199554443359, "learning_rate": 0.000599624513858977, "loss": 3.9685, "step": 975 }, { "epoch": 0.05, "grad_norm": 0.6447969079017639, "learning_rate": 0.0005996237433971315, "loss": 4.3062, "step": 976 }, { "epoch": 0.05, "grad_norm": 0.606885552406311, "learning_rate": 0.0005996229721461347, "loss": 4.2991, "step": 977 }, { "epoch": 0.05, "grad_norm": 0.6305561065673828, "learning_rate": 0.0005996222001059887, "loss": 4.1132, "step": 978 }, { "epoch": 0.05, "grad_norm": 0.6146825551986694, "learning_rate": 0.0005996214272766955, "loss": 3.8875, "step": 979 }, { "epoch": 0.05, "grad_norm": 0.6634001731872559, "learning_rate": 0.0005996206536582571, "loss": 3.9914, "step": 980 }, { "epoch": 0.05, "grad_norm": 0.623626708984375, "learning_rate": 0.0005996198792506755, "loss": 4.1724, "step": 981 }, { "epoch": 0.05, "grad_norm": 0.6683825850486755, "learning_rate": 0.000599619104053953, "loss": 4.0585, "step": 982 }, { "epoch": 0.05, "grad_norm": 0.5538197159767151, "learning_rate": 0.0005996183280680914, "loss": 3.9352, "step": 983 }, { "epoch": 0.05, "grad_norm": 0.6251077055931091, "learning_rate": 0.0005996175512930927, "loss": 4.3014, "step": 984 }, { "epoch": 0.05, "grad_norm": 0.593470573425293, "learning_rate": 0.0005996167737289589, "loss": 4.0789, "step": 985 }, { "epoch": 0.05, "grad_norm": 0.6242483258247375, "learning_rate": 0.0005996159953756923, "loss": 4.0246, "step": 986 }, { "epoch": 0.05, "grad_norm": 0.6253484487533569, "learning_rate": 0.0005996152162332949, "loss": 3.9758, "step": 987 }, { "epoch": 0.05, "grad_norm": 0.6085364818572998, "learning_rate": 0.0005996144363017686, "loss": 3.8767, "step": 988 }, { "epoch": 0.05, "grad_norm": 0.6171862483024597, "learning_rate": 0.0005996136555811156, "loss": 4.01, "step": 989 }, { "epoch": 0.05, "grad_norm": 0.5950732231140137, "learning_rate": 0.0005996128740713379, "loss": 3.7366, "step": 990 }, { "epoch": 0.05, "grad_norm": 0.6203235983848572, "learning_rate": 0.0005996120917724374, "loss": 4.0769, "step": 991 }, { "epoch": 0.05, "grad_norm": 0.608241856098175, "learning_rate": 0.0005996113086844165, "loss": 3.8565, "step": 992 }, { "epoch": 0.05, "grad_norm": 0.612503707408905, "learning_rate": 0.0005996105248072769, "loss": 4.1548, "step": 993 }, { "epoch": 0.05, "grad_norm": 0.5867249369621277, "learning_rate": 0.000599609740141021, "loss": 3.8754, "step": 994 }, { "epoch": 0.05, "grad_norm": 0.6069759726524353, "learning_rate": 0.0005996089546856507, "loss": 4.0749, "step": 995 }, { "epoch": 0.05, "grad_norm": 0.5676235556602478, "learning_rate": 0.000599608168441168, "loss": 3.8812, "step": 996 }, { "epoch": 0.05, "grad_norm": 0.6222755908966064, "learning_rate": 0.0005996073814075751, "loss": 3.8984, "step": 997 }, { "epoch": 0.05, "grad_norm": 0.5938562750816345, "learning_rate": 0.0005996065935848739, "loss": 3.961, "step": 998 }, { "epoch": 0.05, "grad_norm": 0.5748042464256287, "learning_rate": 0.0005996058049730666, "loss": 3.823, "step": 999 }, { "epoch": 0.05, "grad_norm": 0.6213597059249878, "learning_rate": 0.0005996050155721553, "loss": 3.9996, "step": 1000 }, { "epoch": 0.05, "grad_norm": 0.622829258441925, "learning_rate": 0.0005996042253821422, "loss": 3.9153, "step": 1001 }, { "epoch": 0.05, "grad_norm": 0.5811119675636292, "learning_rate": 0.000599603434403029, "loss": 4.0641, "step": 1002 }, { "epoch": 0.05, "grad_norm": 0.6401235461235046, "learning_rate": 0.0005996026426348183, "loss": 4.0072, "step": 1003 }, { "epoch": 0.05, "grad_norm": 0.6247691512107849, "learning_rate": 0.0005996018500775117, "loss": 4.0948, "step": 1004 }, { "epoch": 0.05, "grad_norm": 0.8084251284599304, "learning_rate": 0.0005996010567311115, "loss": 4.232, "step": 1005 }, { "epoch": 0.05, "grad_norm": 0.611668586730957, "learning_rate": 0.0005996002625956198, "loss": 3.9149, "step": 1006 }, { "epoch": 0.05, "grad_norm": 0.6284043788909912, "learning_rate": 0.0005995994676710386, "loss": 4.1114, "step": 1007 }, { "epoch": 0.05, "grad_norm": 0.6413992047309875, "learning_rate": 0.0005995986719573701, "loss": 4.0627, "step": 1008 }, { "epoch": 0.05, "grad_norm": 0.769960880279541, "learning_rate": 0.0005995978754546165, "loss": 4.0868, "step": 1009 }, { "epoch": 0.05, "grad_norm": 0.6613034605979919, "learning_rate": 0.0005995970781627795, "loss": 3.9804, "step": 1010 }, { "epoch": 0.05, "grad_norm": 0.6815891265869141, "learning_rate": 0.0005995962800818617, "loss": 3.9572, "step": 1011 }, { "epoch": 0.05, "grad_norm": 0.6836370825767517, "learning_rate": 0.0005995954812118648, "loss": 3.9664, "step": 1012 }, { "epoch": 0.05, "grad_norm": 0.6569324731826782, "learning_rate": 0.000599594681552791, "loss": 4.1149, "step": 1013 }, { "epoch": 0.05, "grad_norm": 0.6019830703735352, "learning_rate": 0.0005995938811046426, "loss": 3.9853, "step": 1014 }, { "epoch": 0.05, "grad_norm": 0.612848699092865, "learning_rate": 0.0005995930798674216, "loss": 4.155, "step": 1015 }, { "epoch": 0.05, "grad_norm": 0.5986717343330383, "learning_rate": 0.00059959227784113, "loss": 4.0356, "step": 1016 }, { "epoch": 0.05, "grad_norm": 0.6132051348686218, "learning_rate": 0.0005995914750257701, "loss": 3.7446, "step": 1017 }, { "epoch": 0.05, "grad_norm": 0.6531481742858887, "learning_rate": 0.0005995906714213439, "loss": 3.7868, "step": 1018 }, { "epoch": 0.05, "grad_norm": 0.6747217178344727, "learning_rate": 0.0005995898670278535, "loss": 4.0415, "step": 1019 }, { "epoch": 0.05, "grad_norm": 0.6562297344207764, "learning_rate": 0.0005995890618453009, "loss": 3.7626, "step": 1020 }, { "epoch": 0.05, "grad_norm": 0.8349602222442627, "learning_rate": 0.0005995882558736885, "loss": 3.96, "step": 1021 }, { "epoch": 0.05, "grad_norm": 0.7195594906806946, "learning_rate": 0.0005995874491130183, "loss": 3.9682, "step": 1022 }, { "epoch": 0.05, "grad_norm": 0.5842819809913635, "learning_rate": 0.0005995866415632924, "loss": 4.1458, "step": 1023 }, { "epoch": 0.05, "grad_norm": 0.6021990776062012, "learning_rate": 0.0005995858332245129, "loss": 3.7984, "step": 1024 }, { "epoch": 0.05, "grad_norm": 0.6178818941116333, "learning_rate": 0.000599585024096682, "loss": 3.95, "step": 1025 }, { "epoch": 0.05, "grad_norm": 0.7198466062545776, "learning_rate": 0.0005995842141798018, "loss": 3.782, "step": 1026 }, { "epoch": 0.05, "grad_norm": 0.6762264370918274, "learning_rate": 0.0005995834034738744, "loss": 4.1151, "step": 1027 }, { "epoch": 0.05, "grad_norm": 0.6355637311935425, "learning_rate": 0.0005995825919789021, "loss": 4.1855, "step": 1028 }, { "epoch": 0.05, "grad_norm": 0.6325914859771729, "learning_rate": 0.0005995817796948869, "loss": 3.8144, "step": 1029 }, { "epoch": 0.05, "grad_norm": 0.6539778709411621, "learning_rate": 0.0005995809666218308, "loss": 4.0136, "step": 1030 }, { "epoch": 0.05, "grad_norm": 0.701380729675293, "learning_rate": 0.0005995801527597362, "loss": 4.0071, "step": 1031 }, { "epoch": 0.05, "grad_norm": 0.617536723613739, "learning_rate": 0.0005995793381086051, "loss": 3.9345, "step": 1032 }, { "epoch": 0.05, "grad_norm": 0.5975783467292786, "learning_rate": 0.0005995785226684397, "loss": 3.9294, "step": 1033 }, { "epoch": 0.05, "grad_norm": 0.5805318355560303, "learning_rate": 0.0005995777064392421, "loss": 4.0045, "step": 1034 }, { "epoch": 0.05, "grad_norm": 0.6723288893699646, "learning_rate": 0.0005995768894210144, "loss": 3.9988, "step": 1035 }, { "epoch": 0.05, "grad_norm": 0.7065977454185486, "learning_rate": 0.000599576071613759, "loss": 3.8351, "step": 1036 }, { "epoch": 0.05, "grad_norm": 0.7041149139404297, "learning_rate": 0.0005995752530174777, "loss": 3.907, "step": 1037 }, { "epoch": 0.05, "grad_norm": 0.6054835319519043, "learning_rate": 0.0005995744336321729, "loss": 3.8096, "step": 1038 }, { "epoch": 0.05, "grad_norm": 0.6375568509101868, "learning_rate": 0.0005995736134578468, "loss": 3.9835, "step": 1039 }, { "epoch": 0.05, "grad_norm": 0.6124348640441895, "learning_rate": 0.0005995727924945014, "loss": 3.9451, "step": 1040 }, { "epoch": 0.05, "grad_norm": 0.787834644317627, "learning_rate": 0.0005995719707421388, "loss": 3.9742, "step": 1041 }, { "epoch": 0.05, "grad_norm": 0.6277994513511658, "learning_rate": 0.0005995711482007615, "loss": 3.9475, "step": 1042 }, { "epoch": 0.05, "grad_norm": 0.6354180574417114, "learning_rate": 0.0005995703248703713, "loss": 4.2671, "step": 1043 }, { "epoch": 0.05, "grad_norm": 0.5767564177513123, "learning_rate": 0.0005995695007509705, "loss": 4.0588, "step": 1044 }, { "epoch": 0.05, "grad_norm": 0.6037499904632568, "learning_rate": 0.0005995686758425614, "loss": 3.8606, "step": 1045 }, { "epoch": 0.05, "grad_norm": 0.7180562019348145, "learning_rate": 0.000599567850145146, "loss": 3.6251, "step": 1046 }, { "epoch": 0.05, "grad_norm": 0.6226548552513123, "learning_rate": 0.0005995670236587265, "loss": 3.8062, "step": 1047 }, { "epoch": 0.05, "grad_norm": 0.6276943683624268, "learning_rate": 0.0005995661963833051, "loss": 4.17, "step": 1048 }, { "epoch": 0.05, "grad_norm": 0.6406780481338501, "learning_rate": 0.0005995653683188841, "loss": 4.0168, "step": 1049 }, { "epoch": 0.05, "grad_norm": 0.660855770111084, "learning_rate": 0.0005995645394654655, "loss": 3.7325, "step": 1050 }, { "epoch": 0.05, "grad_norm": 0.7150101661682129, "learning_rate": 0.0005995637098230516, "loss": 4.2903, "step": 1051 }, { "epoch": 0.05, "grad_norm": 0.6027021408081055, "learning_rate": 0.0005995628793916445, "loss": 3.9363, "step": 1052 }, { "epoch": 0.05, "grad_norm": 0.6147125959396362, "learning_rate": 0.0005995620481712464, "loss": 4.002, "step": 1053 }, { "epoch": 0.05, "grad_norm": 0.6540363430976868, "learning_rate": 0.0005995612161618595, "loss": 4.1231, "step": 1054 }, { "epoch": 0.05, "grad_norm": 0.7382659316062927, "learning_rate": 0.0005995603833634862, "loss": 3.7431, "step": 1055 }, { "epoch": 0.05, "grad_norm": 0.6010420918464661, "learning_rate": 0.0005995595497761283, "loss": 4.0042, "step": 1056 }, { "epoch": 0.05, "grad_norm": 0.6575014591217041, "learning_rate": 0.0005995587153997882, "loss": 3.9075, "step": 1057 }, { "epoch": 0.05, "grad_norm": 0.6506360769271851, "learning_rate": 0.0005995578802344682, "loss": 4.0998, "step": 1058 }, { "epoch": 0.05, "grad_norm": 0.670628547668457, "learning_rate": 0.0005995570442801703, "loss": 3.9776, "step": 1059 }, { "epoch": 0.05, "grad_norm": 0.6330320239067078, "learning_rate": 0.0005995562075368969, "loss": 3.9504, "step": 1060 }, { "epoch": 0.05, "grad_norm": 0.604716956615448, "learning_rate": 0.0005995553700046501, "loss": 3.8521, "step": 1061 }, { "epoch": 0.05, "grad_norm": 0.6562084555625916, "learning_rate": 0.000599554531683432, "loss": 4.1304, "step": 1062 }, { "epoch": 0.05, "grad_norm": 0.6587114334106445, "learning_rate": 0.000599553692573245, "loss": 3.8732, "step": 1063 }, { "epoch": 0.05, "grad_norm": 0.6262410283088684, "learning_rate": 0.0005995528526740911, "loss": 3.9372, "step": 1064 }, { "epoch": 0.05, "grad_norm": 0.6062853336334229, "learning_rate": 0.0005995520119859727, "loss": 4.017, "step": 1065 }, { "epoch": 0.05, "grad_norm": 0.6965979337692261, "learning_rate": 0.000599551170508892, "loss": 3.9419, "step": 1066 }, { "epoch": 0.05, "grad_norm": 0.6158636212348938, "learning_rate": 0.0005995503282428512, "loss": 3.903, "step": 1067 }, { "epoch": 0.05, "grad_norm": 0.6285214424133301, "learning_rate": 0.0005995494851878524, "loss": 4.057, "step": 1068 }, { "epoch": 0.05, "grad_norm": 0.7115497589111328, "learning_rate": 0.0005995486413438979, "loss": 3.8139, "step": 1069 }, { "epoch": 0.05, "grad_norm": 0.5884736776351929, "learning_rate": 0.00059954779671099, "loss": 3.9513, "step": 1070 }, { "epoch": 0.05, "grad_norm": 0.6723683476448059, "learning_rate": 0.0005995469512891308, "loss": 3.9108, "step": 1071 }, { "epoch": 0.05, "grad_norm": 0.605546236038208, "learning_rate": 0.0005995461050783226, "loss": 4.0745, "step": 1072 }, { "epoch": 0.05, "grad_norm": 0.5845876336097717, "learning_rate": 0.0005995452580785676, "loss": 4.0279, "step": 1073 }, { "epoch": 0.05, "grad_norm": 0.6146058440208435, "learning_rate": 0.000599544410289868, "loss": 4.0084, "step": 1074 }, { "epoch": 0.05, "grad_norm": 0.6121573448181152, "learning_rate": 0.0005995435617122261, "loss": 3.9437, "step": 1075 }, { "epoch": 0.05, "grad_norm": 0.6386142373085022, "learning_rate": 0.0005995427123456441, "loss": 4.1899, "step": 1076 }, { "epoch": 0.05, "grad_norm": 0.607537567615509, "learning_rate": 0.0005995418621901243, "loss": 3.7694, "step": 1077 }, { "epoch": 0.05, "grad_norm": 0.8437908291816711, "learning_rate": 0.0005995410112456688, "loss": 3.7674, "step": 1078 }, { "epoch": 0.05, "grad_norm": 0.6184353232383728, "learning_rate": 0.00059954015951228, "loss": 3.8997, "step": 1079 }, { "epoch": 0.05, "grad_norm": 0.6318781971931458, "learning_rate": 0.0005995393069899601, "loss": 3.9576, "step": 1080 }, { "epoch": 0.05, "grad_norm": 0.6466130614280701, "learning_rate": 0.0005995384536787112, "loss": 4.0041, "step": 1081 }, { "epoch": 0.05, "grad_norm": 0.668156087398529, "learning_rate": 0.0005995375995785357, "loss": 4.0425, "step": 1082 }, { "epoch": 0.05, "grad_norm": 0.5986151695251465, "learning_rate": 0.0005995367446894358, "loss": 3.7717, "step": 1083 }, { "epoch": 0.05, "grad_norm": 0.6351150870323181, "learning_rate": 0.0005995358890114137, "loss": 4.0434, "step": 1084 }, { "epoch": 0.05, "grad_norm": 0.6070703864097595, "learning_rate": 0.0005995350325444719, "loss": 4.0091, "step": 1085 }, { "epoch": 0.05, "grad_norm": 0.7131597399711609, "learning_rate": 0.0005995341752886124, "loss": 3.9062, "step": 1086 }, { "epoch": 0.05, "grad_norm": 0.6343297958374023, "learning_rate": 0.0005995333172438374, "loss": 4.2335, "step": 1087 }, { "epoch": 0.05, "grad_norm": 0.6075549721717834, "learning_rate": 0.0005995324584101494, "loss": 3.9601, "step": 1088 }, { "epoch": 0.05, "grad_norm": 0.5800171494483948, "learning_rate": 0.0005995315987875505, "loss": 4.0497, "step": 1089 }, { "epoch": 0.05, "grad_norm": 0.5841231346130371, "learning_rate": 0.0005995307383760431, "loss": 3.9826, "step": 1090 }, { "epoch": 0.05, "grad_norm": 0.6168744564056396, "learning_rate": 0.0005995298771756293, "loss": 4.2034, "step": 1091 }, { "epoch": 0.05, "grad_norm": 0.660862922668457, "learning_rate": 0.0005995290151863114, "loss": 4.1355, "step": 1092 }, { "epoch": 0.05, "grad_norm": 0.7201418280601501, "learning_rate": 0.0005995281524080919, "loss": 3.7802, "step": 1093 }, { "epoch": 0.05, "grad_norm": 0.6478432416915894, "learning_rate": 0.0005995272888409728, "loss": 3.8778, "step": 1094 }, { "epoch": 0.05, "grad_norm": 0.6431102156639099, "learning_rate": 0.0005995264244849564, "loss": 3.8396, "step": 1095 }, { "epoch": 0.05, "grad_norm": 0.6155719757080078, "learning_rate": 0.0005995255593400452, "loss": 3.9207, "step": 1096 }, { "epoch": 0.05, "grad_norm": 0.6385898590087891, "learning_rate": 0.0005995246934062412, "loss": 3.9631, "step": 1097 }, { "epoch": 0.05, "grad_norm": 0.6196228265762329, "learning_rate": 0.0005995238266835468, "loss": 3.9081, "step": 1098 }, { "epoch": 0.05, "grad_norm": 0.6495975255966187, "learning_rate": 0.0005995229591719643, "loss": 4.0299, "step": 1099 }, { "epoch": 0.05, "grad_norm": 0.6047267913818359, "learning_rate": 0.000599522090871496, "loss": 4.1038, "step": 1100 }, { "epoch": 0.05, "grad_norm": 0.5929258465766907, "learning_rate": 0.0005995212217821441, "loss": 4.0514, "step": 1101 }, { "epoch": 0.05, "grad_norm": 0.6673069000244141, "learning_rate": 0.000599520351903911, "loss": 3.8946, "step": 1102 }, { "epoch": 0.05, "grad_norm": 0.597668468952179, "learning_rate": 0.000599519481236799, "loss": 3.8061, "step": 1103 }, { "epoch": 0.05, "grad_norm": 0.6228914260864258, "learning_rate": 0.0005995186097808102, "loss": 4.0257, "step": 1104 }, { "epoch": 0.05, "grad_norm": 0.6423768997192383, "learning_rate": 0.0005995177375359471, "loss": 4.0893, "step": 1105 }, { "epoch": 0.05, "grad_norm": 0.6223159432411194, "learning_rate": 0.0005995168645022118, "loss": 4.0435, "step": 1106 }, { "epoch": 0.05, "grad_norm": 0.6253622174263, "learning_rate": 0.0005995159906796068, "loss": 3.755, "step": 1107 }, { "epoch": 0.05, "grad_norm": 0.6040705442428589, "learning_rate": 0.0005995151160681344, "loss": 4.1872, "step": 1108 }, { "epoch": 0.05, "grad_norm": 0.5690031051635742, "learning_rate": 0.0005995142406677967, "loss": 3.9943, "step": 1109 }, { "epoch": 0.05, "grad_norm": 0.6597207188606262, "learning_rate": 0.0005995133644785963, "loss": 3.9716, "step": 1110 }, { "epoch": 0.05, "grad_norm": 0.6696934103965759, "learning_rate": 0.0005995124875005352, "loss": 3.759, "step": 1111 }, { "epoch": 0.05, "grad_norm": 0.6375954747200012, "learning_rate": 0.000599511609733616, "loss": 3.8252, "step": 1112 }, { "epoch": 0.05, "grad_norm": 0.636753499507904, "learning_rate": 0.0005995107311778406, "loss": 3.9468, "step": 1113 }, { "epoch": 0.05, "grad_norm": 0.5825153589248657, "learning_rate": 0.0005995098518332117, "loss": 3.8568, "step": 1114 }, { "epoch": 0.05, "grad_norm": 0.6210393309593201, "learning_rate": 0.0005995089716997316, "loss": 4.1555, "step": 1115 }, { "epoch": 0.05, "grad_norm": 0.6214787364006042, "learning_rate": 0.0005995080907774024, "loss": 3.9536, "step": 1116 }, { "epoch": 0.05, "grad_norm": 0.678767740726471, "learning_rate": 0.0005995072090662267, "loss": 3.835, "step": 1117 }, { "epoch": 0.05, "grad_norm": 0.6073644161224365, "learning_rate": 0.0005995063265662063, "loss": 4.2358, "step": 1118 }, { "epoch": 0.05, "grad_norm": 0.6090550422668457, "learning_rate": 0.0005995054432773442, "loss": 3.9443, "step": 1119 }, { "epoch": 0.05, "grad_norm": 0.715369462966919, "learning_rate": 0.0005995045591996423, "loss": 4.0607, "step": 1120 }, { "epoch": 0.05, "grad_norm": 0.5823421478271484, "learning_rate": 0.0005995036743331031, "loss": 4.0344, "step": 1121 }, { "epoch": 0.05, "grad_norm": 0.6796244382858276, "learning_rate": 0.0005995027886777287, "loss": 3.9048, "step": 1122 }, { "epoch": 0.06, "grad_norm": 0.5604933500289917, "learning_rate": 0.0005995019022335217, "loss": 4.0935, "step": 1123 }, { "epoch": 0.06, "grad_norm": 0.6292743682861328, "learning_rate": 0.0005995010150004844, "loss": 3.9694, "step": 1124 }, { "epoch": 0.06, "grad_norm": 0.6105656623840332, "learning_rate": 0.0005995001269786189, "loss": 3.8372, "step": 1125 }, { "epoch": 0.06, "grad_norm": 0.6465719938278198, "learning_rate": 0.0005994992381679277, "loss": 3.7912, "step": 1126 }, { "epoch": 0.06, "grad_norm": 0.704645574092865, "learning_rate": 0.0005994983485684133, "loss": 3.8925, "step": 1127 }, { "epoch": 0.06, "grad_norm": 0.6321631073951721, "learning_rate": 0.0005994974581800779, "loss": 4.1433, "step": 1128 }, { "epoch": 0.06, "grad_norm": 0.6182036399841309, "learning_rate": 0.0005994965670029237, "loss": 3.9114, "step": 1129 }, { "epoch": 0.06, "grad_norm": 0.6689995527267456, "learning_rate": 0.0005994956750369532, "loss": 4.0865, "step": 1130 }, { "epoch": 0.06, "grad_norm": 0.6217949986457825, "learning_rate": 0.0005994947822821688, "loss": 4.1664, "step": 1131 }, { "epoch": 0.06, "grad_norm": 0.608690083026886, "learning_rate": 0.0005994938887385727, "loss": 4.0256, "step": 1132 }, { "epoch": 0.06, "grad_norm": 0.6573523283004761, "learning_rate": 0.0005994929944061675, "loss": 3.9891, "step": 1133 }, { "epoch": 0.06, "grad_norm": 0.6570764780044556, "learning_rate": 0.0005994920992849552, "loss": 3.9587, "step": 1134 }, { "epoch": 0.06, "grad_norm": 0.5835046172142029, "learning_rate": 0.0005994912033749385, "loss": 4.1042, "step": 1135 }, { "epoch": 0.06, "grad_norm": 0.6346054077148438, "learning_rate": 0.0005994903066761196, "loss": 4.0866, "step": 1136 }, { "epoch": 0.06, "grad_norm": 0.6046596169471741, "learning_rate": 0.0005994894091885007, "loss": 3.9774, "step": 1137 }, { "epoch": 0.06, "grad_norm": 0.6285713911056519, "learning_rate": 0.0005994885109120845, "loss": 3.9324, "step": 1138 }, { "epoch": 0.06, "grad_norm": 0.5951323509216309, "learning_rate": 0.0005994876118468731, "loss": 3.8939, "step": 1139 }, { "epoch": 0.06, "grad_norm": 0.6478222012519836, "learning_rate": 0.0005994867119928689, "loss": 3.9917, "step": 1140 }, { "epoch": 0.06, "grad_norm": 0.6328380703926086, "learning_rate": 0.0005994858113500746, "loss": 3.8191, "step": 1141 }, { "epoch": 0.06, "grad_norm": 0.6400364637374878, "learning_rate": 0.0005994849099184921, "loss": 4.0963, "step": 1142 }, { "epoch": 0.06, "grad_norm": 0.5783833861351013, "learning_rate": 0.000599484007698124, "loss": 3.9709, "step": 1143 }, { "epoch": 0.06, "grad_norm": 0.6741927862167358, "learning_rate": 0.0005994831046889727, "loss": 3.8906, "step": 1144 }, { "epoch": 0.06, "grad_norm": 0.6302924156188965, "learning_rate": 0.0005994822008910406, "loss": 3.7855, "step": 1145 }, { "epoch": 0.06, "grad_norm": 0.5713663101196289, "learning_rate": 0.0005994812963043299, "loss": 3.963, "step": 1146 }, { "epoch": 0.06, "grad_norm": 0.5913187265396118, "learning_rate": 0.0005994803909288431, "loss": 3.9689, "step": 1147 }, { "epoch": 0.06, "grad_norm": 0.6074280738830566, "learning_rate": 0.0005994794847645827, "loss": 3.9716, "step": 1148 }, { "epoch": 0.06, "grad_norm": 0.626340925693512, "learning_rate": 0.000599478577811551, "loss": 4.1273, "step": 1149 }, { "epoch": 0.06, "grad_norm": 0.5834325551986694, "learning_rate": 0.0005994776700697503, "loss": 3.8578, "step": 1150 }, { "epoch": 0.06, "grad_norm": 0.621330738067627, "learning_rate": 0.000599476761539183, "loss": 3.8196, "step": 1151 }, { "epoch": 0.06, "grad_norm": 0.6001361608505249, "learning_rate": 0.0005994758522198516, "loss": 3.9403, "step": 1152 }, { "epoch": 0.06, "grad_norm": 0.6255820393562317, "learning_rate": 0.0005994749421117584, "loss": 4.1259, "step": 1153 }, { "epoch": 0.06, "grad_norm": 0.631493330001831, "learning_rate": 0.0005994740312149058, "loss": 4.104, "step": 1154 }, { "epoch": 0.06, "grad_norm": 0.6368297934532166, "learning_rate": 0.0005994731195292964, "loss": 4.0128, "step": 1155 }, { "epoch": 0.06, "grad_norm": 0.6331319808959961, "learning_rate": 0.0005994722070549323, "loss": 3.8825, "step": 1156 }, { "epoch": 0.06, "grad_norm": 0.5888481736183167, "learning_rate": 0.000599471293791816, "loss": 4.1243, "step": 1157 }, { "epoch": 0.06, "grad_norm": 0.5803425312042236, "learning_rate": 0.00059947037973995, "loss": 3.9828, "step": 1158 }, { "epoch": 0.06, "grad_norm": 0.5989425182342529, "learning_rate": 0.0005994694648993368, "loss": 3.7897, "step": 1159 }, { "epoch": 0.06, "grad_norm": 0.6215832233428955, "learning_rate": 0.0005994685492699785, "loss": 3.6121, "step": 1160 }, { "epoch": 0.06, "grad_norm": 0.6019163131713867, "learning_rate": 0.0005994676328518778, "loss": 4.244, "step": 1161 }, { "epoch": 0.06, "grad_norm": 0.5976054668426514, "learning_rate": 0.0005994667156450368, "loss": 4.1499, "step": 1162 }, { "epoch": 0.06, "grad_norm": 0.6580859422683716, "learning_rate": 0.0005994657976494583, "loss": 3.814, "step": 1163 }, { "epoch": 0.06, "grad_norm": 0.6277784705162048, "learning_rate": 0.0005994648788651445, "loss": 4.0219, "step": 1164 }, { "epoch": 0.06, "grad_norm": 0.6280736327171326, "learning_rate": 0.0005994639592920976, "loss": 4.0019, "step": 1165 }, { "epoch": 0.06, "grad_norm": 0.7152631878852844, "learning_rate": 0.0005994630389303205, "loss": 3.9115, "step": 1166 }, { "epoch": 0.06, "grad_norm": 0.6251851916313171, "learning_rate": 0.0005994621177798153, "loss": 4.104, "step": 1167 }, { "epoch": 0.06, "grad_norm": 0.6360085606575012, "learning_rate": 0.0005994611958405846, "loss": 3.9413, "step": 1168 }, { "epoch": 0.06, "grad_norm": 0.5834187865257263, "learning_rate": 0.0005994602731126306, "loss": 3.8453, "step": 1169 }, { "epoch": 0.06, "grad_norm": 0.6263565421104431, "learning_rate": 0.000599459349595956, "loss": 3.9509, "step": 1170 }, { "epoch": 0.06, "grad_norm": 0.6582068204879761, "learning_rate": 0.000599458425290563, "loss": 3.7823, "step": 1171 }, { "epoch": 0.06, "grad_norm": 0.6431542038917542, "learning_rate": 0.0005994575001964542, "loss": 3.9593, "step": 1172 }, { "epoch": 0.06, "grad_norm": 0.6388793587684631, "learning_rate": 0.0005994565743136321, "loss": 3.8094, "step": 1173 }, { "epoch": 0.06, "grad_norm": 0.6203798055648804, "learning_rate": 0.0005994556476420988, "loss": 3.8218, "step": 1174 }, { "epoch": 0.06, "grad_norm": 0.6218488812446594, "learning_rate": 0.0005994547201818571, "loss": 3.8657, "step": 1175 }, { "epoch": 0.06, "grad_norm": 0.6742687225341797, "learning_rate": 0.0005994537919329092, "loss": 3.8143, "step": 1176 }, { "epoch": 0.06, "grad_norm": 0.6174846887588501, "learning_rate": 0.0005994528628952577, "loss": 3.8962, "step": 1177 }, { "epoch": 0.06, "grad_norm": 0.7459059357643127, "learning_rate": 0.000599451933068905, "loss": 4.0231, "step": 1178 }, { "epoch": 0.06, "grad_norm": 0.602178692817688, "learning_rate": 0.0005994510024538535, "loss": 4.0269, "step": 1179 }, { "epoch": 0.06, "grad_norm": 0.6028525233268738, "learning_rate": 0.0005994500710501058, "loss": 4.0828, "step": 1180 }, { "epoch": 0.06, "grad_norm": 0.6567595601081848, "learning_rate": 0.0005994491388576641, "loss": 4.0124, "step": 1181 }, { "epoch": 0.06, "grad_norm": 0.6130238771438599, "learning_rate": 0.000599448205876531, "loss": 4.0882, "step": 1182 }, { "epoch": 0.06, "grad_norm": 0.5951813459396362, "learning_rate": 0.000599447272106709, "loss": 3.8659, "step": 1183 }, { "epoch": 0.06, "grad_norm": 0.6704297065734863, "learning_rate": 0.0005994463375482006, "loss": 4.0565, "step": 1184 }, { "epoch": 0.06, "grad_norm": 0.6474865674972534, "learning_rate": 0.0005994454022010081, "loss": 4.048, "step": 1185 }, { "epoch": 0.06, "grad_norm": 0.5812511444091797, "learning_rate": 0.0005994444660651339, "loss": 3.8526, "step": 1186 }, { "epoch": 0.06, "grad_norm": 0.5992949604988098, "learning_rate": 0.0005994435291405808, "loss": 3.9835, "step": 1187 }, { "epoch": 0.06, "grad_norm": 0.6317020654678345, "learning_rate": 0.0005994425914273509, "loss": 3.8099, "step": 1188 }, { "epoch": 0.06, "grad_norm": 0.6197634339332581, "learning_rate": 0.000599441652925447, "loss": 3.8242, "step": 1189 }, { "epoch": 0.06, "grad_norm": 0.6547470092773438, "learning_rate": 0.0005994407136348715, "loss": 3.9396, "step": 1190 }, { "epoch": 0.06, "grad_norm": 0.594266951084137, "learning_rate": 0.0005994397735556267, "loss": 3.9765, "step": 1191 }, { "epoch": 0.06, "grad_norm": 0.647244930267334, "learning_rate": 0.000599438832687715, "loss": 3.8405, "step": 1192 }, { "epoch": 0.06, "grad_norm": 0.6598258018493652, "learning_rate": 0.0005994378910311393, "loss": 3.7971, "step": 1193 }, { "epoch": 0.06, "grad_norm": 0.6706022620201111, "learning_rate": 0.0005994369485859016, "loss": 3.9114, "step": 1194 }, { "epoch": 0.06, "grad_norm": 0.5980822443962097, "learning_rate": 0.0005994360053520047, "loss": 3.8791, "step": 1195 }, { "epoch": 0.06, "grad_norm": 0.6569104194641113, "learning_rate": 0.000599435061329451, "loss": 3.9084, "step": 1196 }, { "epoch": 0.06, "grad_norm": 0.6087574362754822, "learning_rate": 0.000599434116518243, "loss": 3.7479, "step": 1197 }, { "epoch": 0.06, "grad_norm": 0.6148558259010315, "learning_rate": 0.0005994331709183831, "loss": 3.9402, "step": 1198 }, { "epoch": 0.06, "grad_norm": 0.6539002656936646, "learning_rate": 0.0005994322245298739, "loss": 3.9145, "step": 1199 }, { "epoch": 0.06, "grad_norm": 0.6310211420059204, "learning_rate": 0.0005994312773527178, "loss": 3.8744, "step": 1200 }, { "epoch": 0.06, "grad_norm": 0.630675196647644, "learning_rate": 0.0005994303293869175, "loss": 3.8331, "step": 1201 }, { "epoch": 0.06, "grad_norm": 0.5651322603225708, "learning_rate": 0.000599429380632475, "loss": 4.0296, "step": 1202 }, { "epoch": 0.06, "grad_norm": 0.5851694941520691, "learning_rate": 0.0005994284310893934, "loss": 3.6882, "step": 1203 }, { "epoch": 0.06, "grad_norm": 0.6342992186546326, "learning_rate": 0.0005994274807576749, "loss": 3.8372, "step": 1204 }, { "epoch": 0.06, "grad_norm": 0.5836645364761353, "learning_rate": 0.0005994265296373221, "loss": 4.0055, "step": 1205 }, { "epoch": 0.06, "grad_norm": 0.628150999546051, "learning_rate": 0.0005994255777283374, "loss": 3.7181, "step": 1206 }, { "epoch": 0.06, "grad_norm": 0.6232638955116272, "learning_rate": 0.0005994246250307232, "loss": 3.8581, "step": 1207 }, { "epoch": 0.06, "grad_norm": 0.6497608423233032, "learning_rate": 0.0005994236715444823, "loss": 3.6943, "step": 1208 }, { "epoch": 0.06, "grad_norm": 0.6445043087005615, "learning_rate": 0.0005994227172696171, "loss": 3.918, "step": 1209 }, { "epoch": 0.06, "grad_norm": 0.5761111378669739, "learning_rate": 0.0005994217622061301, "loss": 3.9378, "step": 1210 }, { "epoch": 0.06, "grad_norm": 0.5777541995048523, "learning_rate": 0.0005994208063540237, "loss": 3.8161, "step": 1211 }, { "epoch": 0.06, "grad_norm": 0.6353874206542969, "learning_rate": 0.0005994198497133006, "loss": 3.8907, "step": 1212 }, { "epoch": 0.06, "grad_norm": 0.6581094264984131, "learning_rate": 0.0005994188922839633, "loss": 3.8506, "step": 1213 }, { "epoch": 0.06, "grad_norm": 0.69733065366745, "learning_rate": 0.0005994179340660141, "loss": 4.0021, "step": 1214 }, { "epoch": 0.06, "grad_norm": 0.6014714241027832, "learning_rate": 0.0005994169750594558, "loss": 3.829, "step": 1215 }, { "epoch": 0.06, "grad_norm": 0.6537860035896301, "learning_rate": 0.0005994160152642908, "loss": 3.9849, "step": 1216 }, { "epoch": 0.06, "grad_norm": 0.7314767837524414, "learning_rate": 0.0005994150546805216, "loss": 3.9777, "step": 1217 }, { "epoch": 0.06, "grad_norm": 0.6319352984428406, "learning_rate": 0.0005994140933081507, "loss": 3.7624, "step": 1218 }, { "epoch": 0.06, "grad_norm": 0.5953729152679443, "learning_rate": 0.0005994131311471808, "loss": 3.8365, "step": 1219 }, { "epoch": 0.06, "grad_norm": 0.609235405921936, "learning_rate": 0.0005994121681976144, "loss": 4.1573, "step": 1220 }, { "epoch": 0.06, "grad_norm": 0.6277957558631897, "learning_rate": 0.0005994112044594538, "loss": 4.0577, "step": 1221 }, { "epoch": 0.06, "grad_norm": 0.5987586379051208, "learning_rate": 0.0005994102399327019, "loss": 4.0041, "step": 1222 }, { "epoch": 0.06, "grad_norm": 0.6414321660995483, "learning_rate": 0.0005994092746173609, "loss": 3.9939, "step": 1223 }, { "epoch": 0.06, "grad_norm": 0.6084062457084656, "learning_rate": 0.0005994083085134336, "loss": 4.004, "step": 1224 }, { "epoch": 0.06, "grad_norm": 0.6363257169723511, "learning_rate": 0.0005994073416209225, "loss": 3.7369, "step": 1225 }, { "epoch": 0.06, "grad_norm": 0.6323297023773193, "learning_rate": 0.0005994063739398299, "loss": 3.8556, "step": 1226 }, { "epoch": 0.06, "grad_norm": 0.7020496726036072, "learning_rate": 0.0005994054054701585, "loss": 3.9934, "step": 1227 }, { "epoch": 0.06, "grad_norm": 0.6019614338874817, "learning_rate": 0.000599404436211911, "loss": 3.985, "step": 1228 }, { "epoch": 0.06, "grad_norm": 0.6175892353057861, "learning_rate": 0.0005994034661650899, "loss": 3.9954, "step": 1229 }, { "epoch": 0.06, "grad_norm": 0.611221969127655, "learning_rate": 0.0005994024953296976, "loss": 3.852, "step": 1230 }, { "epoch": 0.06, "grad_norm": 0.5849113464355469, "learning_rate": 0.0005994015237057368, "loss": 3.7631, "step": 1231 }, { "epoch": 0.06, "grad_norm": 0.6553965210914612, "learning_rate": 0.0005994005512932099, "loss": 4.1467, "step": 1232 }, { "epoch": 0.06, "grad_norm": 0.6709908843040466, "learning_rate": 0.0005993995780921197, "loss": 4.0218, "step": 1233 }, { "epoch": 0.06, "grad_norm": 0.6183760762214661, "learning_rate": 0.0005993986041024686, "loss": 3.8164, "step": 1234 }, { "epoch": 0.06, "grad_norm": 0.5950647592544556, "learning_rate": 0.0005993976293242591, "loss": 4.1307, "step": 1235 }, { "epoch": 0.06, "grad_norm": 0.6528424620628357, "learning_rate": 0.000599396653757494, "loss": 3.8645, "step": 1236 }, { "epoch": 0.06, "grad_norm": 0.6041496396064758, "learning_rate": 0.0005993956774021757, "loss": 4.0503, "step": 1237 }, { "epoch": 0.06, "grad_norm": 0.6346700191497803, "learning_rate": 0.0005993947002583067, "loss": 3.978, "step": 1238 }, { "epoch": 0.06, "grad_norm": 0.6335063576698303, "learning_rate": 0.0005993937223258898, "loss": 3.8234, "step": 1239 }, { "epoch": 0.06, "grad_norm": 0.6392975449562073, "learning_rate": 0.0005993927436049273, "loss": 3.9517, "step": 1240 }, { "epoch": 0.06, "grad_norm": 0.5925444960594177, "learning_rate": 0.0005993917640954221, "loss": 3.798, "step": 1241 }, { "epoch": 0.06, "grad_norm": 0.6346644163131714, "learning_rate": 0.0005993907837973766, "loss": 3.9064, "step": 1242 }, { "epoch": 0.06, "grad_norm": 0.5913667678833008, "learning_rate": 0.0005993898027107933, "loss": 3.9169, "step": 1243 }, { "epoch": 0.06, "grad_norm": 0.7143539190292358, "learning_rate": 0.000599388820835675, "loss": 3.7224, "step": 1244 }, { "epoch": 0.06, "grad_norm": 0.6183956265449524, "learning_rate": 0.000599387838172024, "loss": 3.7269, "step": 1245 }, { "epoch": 0.06, "grad_norm": 0.614376962184906, "learning_rate": 0.0005993868547198432, "loss": 3.9984, "step": 1246 }, { "epoch": 0.06, "grad_norm": 0.5987918376922607, "learning_rate": 0.000599385870479135, "loss": 4.0514, "step": 1247 }, { "epoch": 0.06, "grad_norm": 0.5989696383476257, "learning_rate": 0.000599384885449902, "loss": 3.9283, "step": 1248 }, { "epoch": 0.06, "grad_norm": 0.6367683410644531, "learning_rate": 0.0005993838996321468, "loss": 4.1072, "step": 1249 }, { "epoch": 0.06, "grad_norm": 0.6802504658699036, "learning_rate": 0.0005993829130258721, "loss": 3.7503, "step": 1250 }, { "epoch": 0.06, "grad_norm": 0.6775780320167542, "learning_rate": 0.0005993819256310804, "loss": 4.0616, "step": 1251 }, { "epoch": 0.06, "grad_norm": 0.5724057555198669, "learning_rate": 0.0005993809374477744, "loss": 3.735, "step": 1252 }, { "epoch": 0.06, "grad_norm": 0.6689765453338623, "learning_rate": 0.0005993799484759566, "loss": 3.8647, "step": 1253 }, { "epoch": 0.06, "grad_norm": 0.6421110033988953, "learning_rate": 0.0005993789587156295, "loss": 3.9385, "step": 1254 }, { "epoch": 0.06, "grad_norm": 0.696961522102356, "learning_rate": 0.000599377968166796, "loss": 3.847, "step": 1255 }, { "epoch": 0.06, "grad_norm": 0.6425986886024475, "learning_rate": 0.0005993769768294583, "loss": 3.857, "step": 1256 }, { "epoch": 0.06, "grad_norm": 0.6519110798835754, "learning_rate": 0.0005993759847036195, "loss": 3.846, "step": 1257 }, { "epoch": 0.06, "grad_norm": 0.5740974545478821, "learning_rate": 0.000599374991789282, "loss": 3.8013, "step": 1258 }, { "epoch": 0.06, "grad_norm": 0.6745760440826416, "learning_rate": 0.0005993739980864483, "loss": 3.9241, "step": 1259 }, { "epoch": 0.06, "grad_norm": 0.6441271305084229, "learning_rate": 0.000599373003595121, "loss": 3.7695, "step": 1260 }, { "epoch": 0.06, "grad_norm": 0.6193081140518188, "learning_rate": 0.0005993720083153029, "loss": 3.985, "step": 1261 }, { "epoch": 0.06, "grad_norm": 0.6134259700775146, "learning_rate": 0.0005993710122469966, "loss": 3.7961, "step": 1262 }, { "epoch": 0.06, "grad_norm": 0.6477553248405457, "learning_rate": 0.0005993700153902046, "loss": 3.9387, "step": 1263 }, { "epoch": 0.06, "grad_norm": 0.6375543475151062, "learning_rate": 0.0005993690177449298, "loss": 4.016, "step": 1264 }, { "epoch": 0.06, "grad_norm": 0.615598738193512, "learning_rate": 0.0005993680193111744, "loss": 3.823, "step": 1265 }, { "epoch": 0.06, "grad_norm": 0.6295831203460693, "learning_rate": 0.0005993670200889412, "loss": 3.7599, "step": 1266 }, { "epoch": 0.06, "grad_norm": 0.6454052329063416, "learning_rate": 0.0005993660200782331, "loss": 3.9033, "step": 1267 }, { "epoch": 0.06, "grad_norm": 0.6237940192222595, "learning_rate": 0.0005993650192790525, "loss": 3.7442, "step": 1268 }, { "epoch": 0.06, "grad_norm": 0.6554262042045593, "learning_rate": 0.0005993640176914019, "loss": 3.8714, "step": 1269 }, { "epoch": 0.06, "grad_norm": 0.591227114200592, "learning_rate": 0.0005993630153152841, "loss": 3.6987, "step": 1270 }, { "epoch": 0.06, "grad_norm": 0.5855788588523865, "learning_rate": 0.0005993620121507018, "loss": 3.872, "step": 1271 }, { "epoch": 0.06, "grad_norm": 0.6025470495223999, "learning_rate": 0.0005993610081976575, "loss": 3.8734, "step": 1272 }, { "epoch": 0.06, "grad_norm": 0.6828283071517944, "learning_rate": 0.0005993600034561539, "loss": 3.8746, "step": 1273 }, { "epoch": 0.06, "grad_norm": 0.6492578983306885, "learning_rate": 0.0005993589979261939, "loss": 3.7428, "step": 1274 }, { "epoch": 0.06, "grad_norm": 0.6107031106948853, "learning_rate": 0.0005993579916077796, "loss": 3.7983, "step": 1275 }, { "epoch": 0.06, "grad_norm": 0.6975229382514954, "learning_rate": 0.0005993569845009142, "loss": 3.6364, "step": 1276 }, { "epoch": 0.06, "grad_norm": 0.6226152777671814, "learning_rate": 0.0005993559766056, "loss": 3.9707, "step": 1277 }, { "epoch": 0.06, "grad_norm": 0.6262789368629456, "learning_rate": 0.0005993549679218398, "loss": 3.9105, "step": 1278 }, { "epoch": 0.06, "grad_norm": 0.5812487006187439, "learning_rate": 0.0005993539584496362, "loss": 3.5877, "step": 1279 }, { "epoch": 0.06, "grad_norm": 0.5997166037559509, "learning_rate": 0.000599352948188992, "loss": 3.8289, "step": 1280 }, { "epoch": 0.06, "grad_norm": 0.5774847269058228, "learning_rate": 0.0005993519371399096, "loss": 4.0577, "step": 1281 }, { "epoch": 0.06, "grad_norm": 0.6224757432937622, "learning_rate": 0.0005993509253023919, "loss": 3.7593, "step": 1282 }, { "epoch": 0.06, "grad_norm": 0.5884726047515869, "learning_rate": 0.0005993499126764415, "loss": 4.0062, "step": 1283 }, { "epoch": 0.06, "grad_norm": 0.6184850931167603, "learning_rate": 0.0005993488992620608, "loss": 3.7726, "step": 1284 }, { "epoch": 0.06, "grad_norm": 0.6030580997467041, "learning_rate": 0.000599347885059253, "loss": 4.0764, "step": 1285 }, { "epoch": 0.06, "grad_norm": 0.6678992509841919, "learning_rate": 0.0005993468700680204, "loss": 4.0444, "step": 1286 }, { "epoch": 0.06, "grad_norm": 0.6198200583457947, "learning_rate": 0.0005993458542883657, "loss": 3.8496, "step": 1287 }, { "epoch": 0.06, "grad_norm": 0.5855886936187744, "learning_rate": 0.0005993448377202916, "loss": 3.8943, "step": 1288 }, { "epoch": 0.06, "grad_norm": 0.684818685054779, "learning_rate": 0.0005993438203638009, "loss": 3.8199, "step": 1289 }, { "epoch": 0.06, "grad_norm": 0.5692946910858154, "learning_rate": 0.0005993428022188961, "loss": 3.9652, "step": 1290 }, { "epoch": 0.06, "grad_norm": 0.6946492791175842, "learning_rate": 0.00059934178328558, "loss": 4.1611, "step": 1291 }, { "epoch": 0.06, "grad_norm": 0.5940146446228027, "learning_rate": 0.0005993407635638553, "loss": 3.9545, "step": 1292 }, { "epoch": 0.06, "grad_norm": 0.5907729864120483, "learning_rate": 0.0005993397430537246, "loss": 3.9311, "step": 1293 }, { "epoch": 0.06, "grad_norm": 0.6589469909667969, "learning_rate": 0.0005993387217551907, "loss": 3.795, "step": 1294 }, { "epoch": 0.06, "grad_norm": 0.5783873200416565, "learning_rate": 0.0005993376996682561, "loss": 3.9994, "step": 1295 }, { "epoch": 0.06, "grad_norm": 0.6160930395126343, "learning_rate": 0.0005993366767929237, "loss": 3.9999, "step": 1296 }, { "epoch": 0.06, "grad_norm": 0.5932180881500244, "learning_rate": 0.000599335653129196, "loss": 4.0102, "step": 1297 }, { "epoch": 0.06, "grad_norm": 0.635309100151062, "learning_rate": 0.0005993346286770759, "loss": 3.7957, "step": 1298 }, { "epoch": 0.06, "grad_norm": 0.6134257912635803, "learning_rate": 0.000599333603436566, "loss": 3.8315, "step": 1299 }, { "epoch": 0.06, "grad_norm": 0.5721672773361206, "learning_rate": 0.0005993325774076688, "loss": 4.0402, "step": 1300 }, { "epoch": 0.06, "grad_norm": 0.5775968432426453, "learning_rate": 0.0005993315505903874, "loss": 3.8583, "step": 1301 }, { "epoch": 0.06, "grad_norm": 0.618675172328949, "learning_rate": 0.0005993305229847242, "loss": 3.8447, "step": 1302 }, { "epoch": 0.06, "grad_norm": 0.6464390754699707, "learning_rate": 0.0005993294945906821, "loss": 4.1014, "step": 1303 }, { "epoch": 0.06, "grad_norm": 0.660879373550415, "learning_rate": 0.0005993284654082638, "loss": 3.9139, "step": 1304 }, { "epoch": 0.06, "grad_norm": 0.5997679233551025, "learning_rate": 0.0005993274354374718, "loss": 3.9929, "step": 1305 }, { "epoch": 0.06, "grad_norm": 0.6345100998878479, "learning_rate": 0.0005993264046783089, "loss": 4.0084, "step": 1306 }, { "epoch": 0.06, "grad_norm": 0.5924948453903198, "learning_rate": 0.000599325373130778, "loss": 3.8919, "step": 1307 }, { "epoch": 0.06, "grad_norm": 0.6048806309700012, "learning_rate": 0.0005993243407948815, "loss": 3.8444, "step": 1308 }, { "epoch": 0.06, "grad_norm": 0.5796457529067993, "learning_rate": 0.0005993233076706224, "loss": 3.8349, "step": 1309 }, { "epoch": 0.06, "grad_norm": 0.6050766706466675, "learning_rate": 0.0005993222737580033, "loss": 3.9042, "step": 1310 }, { "epoch": 0.06, "grad_norm": 0.6207561492919922, "learning_rate": 0.0005993212390570269, "loss": 4.2432, "step": 1311 }, { "epoch": 0.06, "grad_norm": 0.722430408000946, "learning_rate": 0.0005993202035676959, "loss": 3.7475, "step": 1312 }, { "epoch": 0.06, "grad_norm": 0.6052895784378052, "learning_rate": 0.0005993191672900132, "loss": 3.9763, "step": 1313 }, { "epoch": 0.06, "grad_norm": 0.6674761772155762, "learning_rate": 0.0005993181302239815, "loss": 4.0753, "step": 1314 }, { "epoch": 0.06, "grad_norm": 0.6250360012054443, "learning_rate": 0.0005993170923696033, "loss": 3.8684, "step": 1315 }, { "epoch": 0.06, "grad_norm": 0.6339815855026245, "learning_rate": 0.0005993160537268815, "loss": 3.8393, "step": 1316 }, { "epoch": 0.06, "grad_norm": 0.6206067204475403, "learning_rate": 0.0005993150142958189, "loss": 3.7968, "step": 1317 }, { "epoch": 0.06, "grad_norm": 0.5794596672058105, "learning_rate": 0.000599313974076418, "loss": 3.9016, "step": 1318 }, { "epoch": 0.06, "grad_norm": 0.706591784954071, "learning_rate": 0.0005993129330686818, "loss": 3.796, "step": 1319 }, { "epoch": 0.06, "grad_norm": 0.6444628834724426, "learning_rate": 0.000599311891272613, "loss": 3.8181, "step": 1320 }, { "epoch": 0.06, "grad_norm": 0.5789697766304016, "learning_rate": 0.0005993108486882142, "loss": 3.9068, "step": 1321 }, { "epoch": 0.06, "grad_norm": 0.6501109004020691, "learning_rate": 0.0005993098053154882, "loss": 3.836, "step": 1322 }, { "epoch": 0.06, "grad_norm": 0.6823614239692688, "learning_rate": 0.0005993087611544379, "loss": 3.8886, "step": 1323 }, { "epoch": 0.06, "grad_norm": 0.5772250890731812, "learning_rate": 0.0005993077162050658, "loss": 3.7892, "step": 1324 }, { "epoch": 0.06, "grad_norm": 0.674119770526886, "learning_rate": 0.0005993066704673748, "loss": 4.0057, "step": 1325 }, { "epoch": 0.06, "grad_norm": 0.541368305683136, "learning_rate": 0.0005993056239413677, "loss": 3.9643, "step": 1326 }, { "epoch": 0.07, "grad_norm": 0.6814593076705933, "learning_rate": 0.0005993045766270472, "loss": 3.8573, "step": 1327 }, { "epoch": 0.07, "grad_norm": 0.607441782951355, "learning_rate": 0.000599303528524416, "loss": 3.8976, "step": 1328 }, { "epoch": 0.07, "grad_norm": 0.5948410630226135, "learning_rate": 0.0005993024796334768, "loss": 3.7664, "step": 1329 }, { "epoch": 0.07, "grad_norm": 0.6131972074508667, "learning_rate": 0.0005993014299542327, "loss": 4.0518, "step": 1330 }, { "epoch": 0.07, "grad_norm": 0.6085745096206665, "learning_rate": 0.0005993003794866861, "loss": 3.8089, "step": 1331 }, { "epoch": 0.07, "grad_norm": 0.6138327717781067, "learning_rate": 0.0005992993282308398, "loss": 3.8027, "step": 1332 }, { "epoch": 0.07, "grad_norm": 0.6015024781227112, "learning_rate": 0.0005992982761866968, "loss": 3.7218, "step": 1333 }, { "epoch": 0.07, "grad_norm": 0.6075776815414429, "learning_rate": 0.0005992972233542597, "loss": 3.8605, "step": 1334 }, { "epoch": 0.07, "grad_norm": 0.603941798210144, "learning_rate": 0.0005992961697335314, "loss": 3.692, "step": 1335 }, { "epoch": 0.07, "grad_norm": 0.6697930097579956, "learning_rate": 0.0005992951153245146, "loss": 3.7488, "step": 1336 }, { "epoch": 0.07, "grad_norm": 0.6258661150932312, "learning_rate": 0.000599294060127212, "loss": 4.0558, "step": 1337 }, { "epoch": 0.07, "grad_norm": 0.5765604972839355, "learning_rate": 0.0005992930041416264, "loss": 4.1127, "step": 1338 }, { "epoch": 0.07, "grad_norm": 0.5883685946464539, "learning_rate": 0.0005992919473677607, "loss": 4.1005, "step": 1339 }, { "epoch": 0.07, "grad_norm": 0.6068031191825867, "learning_rate": 0.0005992908898056176, "loss": 4.0732, "step": 1340 }, { "epoch": 0.07, "grad_norm": 0.6263118386268616, "learning_rate": 0.0005992898314551999, "loss": 4.0635, "step": 1341 }, { "epoch": 0.07, "grad_norm": 0.630369246006012, "learning_rate": 0.0005992887723165102, "loss": 3.9827, "step": 1342 }, { "epoch": 0.07, "grad_norm": 0.5971052646636963, "learning_rate": 0.0005992877123895516, "loss": 3.8703, "step": 1343 }, { "epoch": 0.07, "grad_norm": 0.6135636568069458, "learning_rate": 0.0005992866516743269, "loss": 3.8379, "step": 1344 }, { "epoch": 0.07, "grad_norm": 0.63181471824646, "learning_rate": 0.0005992855901708387, "loss": 3.7176, "step": 1345 }, { "epoch": 0.07, "grad_norm": 0.6445341110229492, "learning_rate": 0.0005992845278790898, "loss": 3.7877, "step": 1346 }, { "epoch": 0.07, "grad_norm": 0.6358599662780762, "learning_rate": 0.000599283464799083, "loss": 3.6308, "step": 1347 }, { "epoch": 0.07, "grad_norm": 0.6615734100341797, "learning_rate": 0.0005992824009308212, "loss": 3.8782, "step": 1348 }, { "epoch": 0.07, "grad_norm": 0.6317094564437866, "learning_rate": 0.0005992813362743072, "loss": 3.935, "step": 1349 }, { "epoch": 0.07, "grad_norm": 0.6740353107452393, "learning_rate": 0.0005992802708295438, "loss": 3.6289, "step": 1350 }, { "epoch": 0.07, "grad_norm": 0.604249894618988, "learning_rate": 0.0005992792045965336, "loss": 3.8375, "step": 1351 }, { "epoch": 0.07, "grad_norm": 0.6756212115287781, "learning_rate": 0.0005992781375752797, "loss": 3.8007, "step": 1352 }, { "epoch": 0.07, "grad_norm": 0.5996575355529785, "learning_rate": 0.0005992770697657848, "loss": 3.8507, "step": 1353 }, { "epoch": 0.07, "grad_norm": 0.6114326119422913, "learning_rate": 0.0005992760011680516, "loss": 4.0967, "step": 1354 }, { "epoch": 0.07, "grad_norm": 0.6802350878715515, "learning_rate": 0.000599274931782083, "loss": 3.5996, "step": 1355 }, { "epoch": 0.07, "grad_norm": 0.6232761740684509, "learning_rate": 0.0005992738616078819, "loss": 3.8647, "step": 1356 }, { "epoch": 0.07, "grad_norm": 0.5817407369613647, "learning_rate": 0.0005992727906454511, "loss": 4.0256, "step": 1357 }, { "epoch": 0.07, "grad_norm": 0.6215899586677551, "learning_rate": 0.0005992717188947933, "loss": 4.0016, "step": 1358 }, { "epoch": 0.07, "grad_norm": 0.6303964257240295, "learning_rate": 0.0005992706463559113, "loss": 3.675, "step": 1359 }, { "epoch": 0.07, "grad_norm": 0.6754851937294006, "learning_rate": 0.0005992695730288081, "loss": 3.977, "step": 1360 }, { "epoch": 0.07, "grad_norm": 0.5957140922546387, "learning_rate": 0.0005992684989134864, "loss": 3.8991, "step": 1361 }, { "epoch": 0.07, "grad_norm": 0.6352207660675049, "learning_rate": 0.0005992674240099491, "loss": 4.0731, "step": 1362 }, { "epoch": 0.07, "grad_norm": 0.634380042552948, "learning_rate": 0.000599266348318199, "loss": 3.8853, "step": 1363 }, { "epoch": 0.07, "grad_norm": 0.6464090347290039, "learning_rate": 0.000599265271838239, "loss": 3.8244, "step": 1364 }, { "epoch": 0.07, "grad_norm": 0.6003564596176147, "learning_rate": 0.0005992641945700717, "loss": 3.7622, "step": 1365 }, { "epoch": 0.07, "grad_norm": 0.674795389175415, "learning_rate": 0.0005992631165137002, "loss": 4.0823, "step": 1366 }, { "epoch": 0.07, "grad_norm": 0.6135823726654053, "learning_rate": 0.0005992620376691273, "loss": 3.7498, "step": 1367 }, { "epoch": 0.07, "grad_norm": 0.6516311764717102, "learning_rate": 0.0005992609580363558, "loss": 3.8798, "step": 1368 }, { "epoch": 0.07, "grad_norm": 0.6563093066215515, "learning_rate": 0.0005992598776153883, "loss": 3.9035, "step": 1369 }, { "epoch": 0.07, "grad_norm": 0.5973907709121704, "learning_rate": 0.0005992587964062282, "loss": 3.9296, "step": 1370 }, { "epoch": 0.07, "grad_norm": 0.6241722702980042, "learning_rate": 0.0005992577144088778, "loss": 3.9025, "step": 1371 }, { "epoch": 0.07, "grad_norm": 0.5976197123527527, "learning_rate": 0.0005992566316233403, "loss": 3.9788, "step": 1372 }, { "epoch": 0.07, "grad_norm": 0.5596520304679871, "learning_rate": 0.0005992555480496183, "loss": 4.1813, "step": 1373 }, { "epoch": 0.07, "grad_norm": 0.7279554009437561, "learning_rate": 0.0005992544636877149, "loss": 3.7869, "step": 1374 }, { "epoch": 0.07, "grad_norm": 0.6230193376541138, "learning_rate": 0.0005992533785376328, "loss": 3.7344, "step": 1375 }, { "epoch": 0.07, "grad_norm": 0.6615080833435059, "learning_rate": 0.0005992522925993748, "loss": 3.793, "step": 1376 }, { "epoch": 0.07, "grad_norm": 0.6379623413085938, "learning_rate": 0.000599251205872944, "loss": 4.0211, "step": 1377 }, { "epoch": 0.07, "grad_norm": 0.6381189823150635, "learning_rate": 0.000599250118358343, "loss": 3.9455, "step": 1378 }, { "epoch": 0.07, "grad_norm": 0.6189027428627014, "learning_rate": 0.0005992490300555748, "loss": 3.8361, "step": 1379 }, { "epoch": 0.07, "grad_norm": 0.5973135232925415, "learning_rate": 0.0005992479409646424, "loss": 4.1191, "step": 1380 }, { "epoch": 0.07, "grad_norm": 0.6134343147277832, "learning_rate": 0.0005992468510855483, "loss": 3.5492, "step": 1381 }, { "epoch": 0.07, "grad_norm": 0.5866838693618774, "learning_rate": 0.0005992457604182957, "loss": 3.7574, "step": 1382 }, { "epoch": 0.07, "grad_norm": 0.6180101633071899, "learning_rate": 0.0005992446689628873, "loss": 3.9139, "step": 1383 }, { "epoch": 0.07, "grad_norm": 0.6157495379447937, "learning_rate": 0.0005992435767193261, "loss": 3.9326, "step": 1384 }, { "epoch": 0.07, "grad_norm": 0.6040421724319458, "learning_rate": 0.0005992424836876149, "loss": 3.9882, "step": 1385 }, { "epoch": 0.07, "grad_norm": 0.577000081539154, "learning_rate": 0.0005992413898677565, "loss": 4.0152, "step": 1386 }, { "epoch": 0.07, "grad_norm": 0.6416754722595215, "learning_rate": 0.0005992402952597539, "loss": 3.7716, "step": 1387 }, { "epoch": 0.07, "grad_norm": 0.6492377519607544, "learning_rate": 0.00059923919986361, "loss": 3.7164, "step": 1388 }, { "epoch": 0.07, "grad_norm": 0.6138007640838623, "learning_rate": 0.0005992381036793277, "loss": 3.7265, "step": 1389 }, { "epoch": 0.07, "grad_norm": 0.6215953230857849, "learning_rate": 0.0005992370067069097, "loss": 3.9165, "step": 1390 }, { "epoch": 0.07, "grad_norm": 0.5956389307975769, "learning_rate": 0.0005992359089463591, "loss": 3.9124, "step": 1391 }, { "epoch": 0.07, "grad_norm": 0.5724150538444519, "learning_rate": 0.0005992348103976786, "loss": 3.8611, "step": 1392 }, { "epoch": 0.07, "grad_norm": 0.6579985022544861, "learning_rate": 0.0005992337110608713, "loss": 3.4344, "step": 1393 }, { "epoch": 0.07, "grad_norm": 0.5854646563529968, "learning_rate": 0.00059923261093594, "loss": 3.828, "step": 1394 }, { "epoch": 0.07, "grad_norm": 0.6103180646896362, "learning_rate": 0.0005992315100228875, "loss": 3.8028, "step": 1395 }, { "epoch": 0.07, "grad_norm": 0.5735791325569153, "learning_rate": 0.0005992304083217168, "loss": 3.9679, "step": 1396 }, { "epoch": 0.07, "grad_norm": 0.5912772417068481, "learning_rate": 0.0005992293058324308, "loss": 3.7386, "step": 1397 }, { "epoch": 0.07, "grad_norm": 0.6024033427238464, "learning_rate": 0.0005992282025550325, "loss": 3.8819, "step": 1398 }, { "epoch": 0.07, "grad_norm": 0.5784563422203064, "learning_rate": 0.0005992270984895246, "loss": 3.7262, "step": 1399 }, { "epoch": 0.07, "grad_norm": 0.6742603182792664, "learning_rate": 0.0005992259936359102, "loss": 3.7894, "step": 1400 }, { "epoch": 0.07, "grad_norm": 0.5925095677375793, "learning_rate": 0.0005992248879941919, "loss": 3.9177, "step": 1401 }, { "epoch": 0.07, "grad_norm": 0.5972510576248169, "learning_rate": 0.0005992237815643732, "loss": 3.8498, "step": 1402 }, { "epoch": 0.07, "grad_norm": 0.6998504400253296, "learning_rate": 0.0005992226743464563, "loss": 3.9739, "step": 1403 }, { "epoch": 0.07, "grad_norm": 0.6386610269546509, "learning_rate": 0.0005992215663404446, "loss": 3.9983, "step": 1404 }, { "epoch": 0.07, "grad_norm": 0.6281149983406067, "learning_rate": 0.0005992204575463409, "loss": 3.82, "step": 1405 }, { "epoch": 0.07, "grad_norm": 0.6512899994850159, "learning_rate": 0.0005992193479641481, "loss": 3.5674, "step": 1406 }, { "epoch": 0.07, "grad_norm": 0.613243043422699, "learning_rate": 0.0005992182375938692, "loss": 3.8449, "step": 1407 }, { "epoch": 0.07, "grad_norm": 0.6193988919258118, "learning_rate": 0.0005992171264355069, "loss": 3.8452, "step": 1408 }, { "epoch": 0.07, "grad_norm": 0.6317744851112366, "learning_rate": 0.0005992160144890643, "loss": 3.7912, "step": 1409 }, { "epoch": 0.07, "grad_norm": 0.604690432548523, "learning_rate": 0.0005992149017545443, "loss": 3.8883, "step": 1410 }, { "epoch": 0.07, "grad_norm": 0.5913760662078857, "learning_rate": 0.0005992137882319498, "loss": 3.9683, "step": 1411 }, { "epoch": 0.07, "grad_norm": 0.6362264156341553, "learning_rate": 0.0005992126739212839, "loss": 3.9218, "step": 1412 }, { "epoch": 0.07, "grad_norm": 0.5891861319541931, "learning_rate": 0.0005992115588225493, "loss": 3.8159, "step": 1413 }, { "epoch": 0.07, "grad_norm": 0.6369192004203796, "learning_rate": 0.0005992104429357491, "loss": 3.9504, "step": 1414 }, { "epoch": 0.07, "grad_norm": 0.6242348551750183, "learning_rate": 0.0005992093262608861, "loss": 3.7089, "step": 1415 }, { "epoch": 0.07, "grad_norm": 0.5739800333976746, "learning_rate": 0.0005992082087979635, "loss": 3.9268, "step": 1416 }, { "epoch": 0.07, "grad_norm": 0.7472577095031738, "learning_rate": 0.0005992070905469839, "loss": 3.9839, "step": 1417 }, { "epoch": 0.07, "grad_norm": 0.5870568156242371, "learning_rate": 0.0005992059715079504, "loss": 3.7954, "step": 1418 }, { "epoch": 0.07, "grad_norm": 0.6301756501197815, "learning_rate": 0.000599204851680866, "loss": 3.9817, "step": 1419 }, { "epoch": 0.07, "grad_norm": 0.6080438494682312, "learning_rate": 0.0005992037310657336, "loss": 3.7202, "step": 1420 }, { "epoch": 0.07, "grad_norm": 0.6272107362747192, "learning_rate": 0.0005992026096625561, "loss": 3.6994, "step": 1421 }, { "epoch": 0.07, "grad_norm": 0.6598101258277893, "learning_rate": 0.0005992014874713366, "loss": 3.9831, "step": 1422 }, { "epoch": 0.07, "grad_norm": 0.6518803834915161, "learning_rate": 0.0005992003644920779, "loss": 3.6686, "step": 1423 }, { "epoch": 0.07, "grad_norm": 0.5766271948814392, "learning_rate": 0.0005991992407247831, "loss": 3.6382, "step": 1424 }, { "epoch": 0.07, "grad_norm": 0.6695455312728882, "learning_rate": 0.0005991981161694549, "loss": 3.7335, "step": 1425 }, { "epoch": 0.07, "grad_norm": 0.5740464329719543, "learning_rate": 0.0005991969908260966, "loss": 3.9502, "step": 1426 }, { "epoch": 0.07, "grad_norm": 0.6571002006530762, "learning_rate": 0.0005991958646947111, "loss": 3.7527, "step": 1427 }, { "epoch": 0.07, "grad_norm": 0.6101444959640503, "learning_rate": 0.0005991947377753011, "loss": 3.9642, "step": 1428 }, { "epoch": 0.07, "grad_norm": 0.6039525270462036, "learning_rate": 0.0005991936100678697, "loss": 3.6638, "step": 1429 }, { "epoch": 0.07, "grad_norm": 0.7623158097267151, "learning_rate": 0.0005991924815724201, "loss": 3.9647, "step": 1430 }, { "epoch": 0.07, "grad_norm": 0.5585444569587708, "learning_rate": 0.000599191352288955, "loss": 3.7619, "step": 1431 }, { "epoch": 0.07, "grad_norm": 0.6341544985771179, "learning_rate": 0.0005991902222174774, "loss": 3.9703, "step": 1432 }, { "epoch": 0.07, "grad_norm": 0.6054025888442993, "learning_rate": 0.0005991890913579904, "loss": 3.9349, "step": 1433 }, { "epoch": 0.07, "grad_norm": 0.6357590556144714, "learning_rate": 0.0005991879597104969, "loss": 3.6478, "step": 1434 }, { "epoch": 0.07, "grad_norm": 0.6057144403457642, "learning_rate": 0.0005991868272749998, "loss": 3.7992, "step": 1435 }, { "epoch": 0.07, "grad_norm": 0.6412066221237183, "learning_rate": 0.0005991856940515024, "loss": 3.8659, "step": 1436 }, { "epoch": 0.07, "grad_norm": 0.6738225817680359, "learning_rate": 0.0005991845600400072, "loss": 4.0249, "step": 1437 }, { "epoch": 0.07, "grad_norm": 0.6093243360519409, "learning_rate": 0.0005991834252405177, "loss": 3.6715, "step": 1438 }, { "epoch": 0.07, "grad_norm": 0.6469516158103943, "learning_rate": 0.0005991822896530364, "loss": 4.0306, "step": 1439 }, { "epoch": 0.07, "grad_norm": 0.6203641891479492, "learning_rate": 0.0005991811532775667, "loss": 3.9434, "step": 1440 }, { "epoch": 0.07, "grad_norm": 0.6038147807121277, "learning_rate": 0.0005991800161141114, "loss": 3.8991, "step": 1441 }, { "epoch": 0.07, "grad_norm": 0.6124120950698853, "learning_rate": 0.0005991788781626735, "loss": 3.8865, "step": 1442 }, { "epoch": 0.07, "grad_norm": 0.6234179735183716, "learning_rate": 0.0005991777394232559, "loss": 4.0951, "step": 1443 }, { "epoch": 0.07, "grad_norm": 0.622256875038147, "learning_rate": 0.0005991765998958618, "loss": 3.8243, "step": 1444 }, { "epoch": 0.07, "grad_norm": 0.5846306085586548, "learning_rate": 0.0005991754595804942, "loss": 3.9512, "step": 1445 }, { "epoch": 0.07, "grad_norm": 0.6416991949081421, "learning_rate": 0.0005991743184771558, "loss": 3.8896, "step": 1446 }, { "epoch": 0.07, "grad_norm": 0.6234089732170105, "learning_rate": 0.00059917317658585, "loss": 3.8233, "step": 1447 }, { "epoch": 0.07, "grad_norm": 0.6189374327659607, "learning_rate": 0.0005991720339065796, "loss": 3.6576, "step": 1448 }, { "epoch": 0.07, "grad_norm": 0.5880756378173828, "learning_rate": 0.0005991708904393476, "loss": 3.8074, "step": 1449 }, { "epoch": 0.07, "grad_norm": 0.6507730484008789, "learning_rate": 0.0005991697461841571, "loss": 4.0946, "step": 1450 }, { "epoch": 0.07, "grad_norm": 0.5725138783454895, "learning_rate": 0.0005991686011410111, "loss": 3.7917, "step": 1451 }, { "epoch": 0.07, "grad_norm": 0.6646773219108582, "learning_rate": 0.0005991674553099126, "loss": 3.728, "step": 1452 }, { "epoch": 0.07, "grad_norm": 0.5751292109489441, "learning_rate": 0.0005991663086908644, "loss": 3.7794, "step": 1453 }, { "epoch": 0.07, "grad_norm": 0.6166512966156006, "learning_rate": 0.0005991651612838698, "loss": 3.9293, "step": 1454 }, { "epoch": 0.07, "grad_norm": 0.6255661845207214, "learning_rate": 0.0005991640130889319, "loss": 3.8986, "step": 1455 }, { "epoch": 0.07, "grad_norm": 0.5917665958404541, "learning_rate": 0.0005991628641060534, "loss": 3.8005, "step": 1456 }, { "epoch": 0.07, "grad_norm": 0.5992826223373413, "learning_rate": 0.0005991617143352375, "loss": 3.8569, "step": 1457 }, { "epoch": 0.07, "grad_norm": 0.6259722113609314, "learning_rate": 0.0005991605637764872, "loss": 3.9048, "step": 1458 }, { "epoch": 0.07, "grad_norm": 0.6164818406105042, "learning_rate": 0.0005991594124298056, "loss": 3.9696, "step": 1459 }, { "epoch": 0.07, "grad_norm": 0.5641024708747864, "learning_rate": 0.0005991582602951958, "loss": 3.7273, "step": 1460 }, { "epoch": 0.07, "grad_norm": 0.6404457688331604, "learning_rate": 0.0005991571073726605, "loss": 3.7909, "step": 1461 }, { "epoch": 0.07, "grad_norm": 0.6667171120643616, "learning_rate": 0.0005991559536622031, "loss": 3.7106, "step": 1462 }, { "epoch": 0.07, "grad_norm": 0.5996845364570618, "learning_rate": 0.0005991547991638264, "loss": 3.8542, "step": 1463 }, { "epoch": 0.07, "grad_norm": 0.6826181411743164, "learning_rate": 0.0005991536438775337, "loss": 4.0089, "step": 1464 }, { "epoch": 0.07, "grad_norm": 0.6228737235069275, "learning_rate": 0.0005991524878033277, "loss": 3.7689, "step": 1465 }, { "epoch": 0.07, "grad_norm": 0.6522650718688965, "learning_rate": 0.0005991513309412116, "loss": 3.9532, "step": 1466 }, { "epoch": 0.07, "grad_norm": 0.6846997141838074, "learning_rate": 0.0005991501732911886, "loss": 3.8377, "step": 1467 }, { "epoch": 0.07, "grad_norm": 0.6278219223022461, "learning_rate": 0.0005991490148532616, "loss": 3.7568, "step": 1468 }, { "epoch": 0.07, "grad_norm": 0.6812443733215332, "learning_rate": 0.0005991478556274336, "loss": 3.8747, "step": 1469 }, { "epoch": 0.07, "grad_norm": 0.7166856527328491, "learning_rate": 0.0005991466956137077, "loss": 3.9553, "step": 1470 }, { "epoch": 0.07, "grad_norm": 0.5980615615844727, "learning_rate": 0.0005991455348120871, "loss": 4.0037, "step": 1471 }, { "epoch": 0.07, "grad_norm": 0.6599940061569214, "learning_rate": 0.0005991443732225746, "loss": 3.7815, "step": 1472 }, { "epoch": 0.07, "grad_norm": 0.6811275482177734, "learning_rate": 0.0005991432108451735, "loss": 3.9142, "step": 1473 }, { "epoch": 0.07, "grad_norm": 0.59503173828125, "learning_rate": 0.0005991420476798867, "loss": 3.6994, "step": 1474 }, { "epoch": 0.07, "grad_norm": 0.597121000289917, "learning_rate": 0.0005991408837267173, "loss": 3.911, "step": 1475 }, { "epoch": 0.07, "grad_norm": 0.5894047021865845, "learning_rate": 0.0005991397189856684, "loss": 3.7512, "step": 1476 }, { "epoch": 0.07, "grad_norm": 0.6419175267219543, "learning_rate": 0.000599138553456743, "loss": 3.892, "step": 1477 }, { "epoch": 0.07, "grad_norm": 0.6146329641342163, "learning_rate": 0.0005991373871399444, "loss": 3.7919, "step": 1478 }, { "epoch": 0.07, "grad_norm": 0.6020705103874207, "learning_rate": 0.0005991362200352754, "loss": 3.749, "step": 1479 }, { "epoch": 0.07, "grad_norm": 0.6040729284286499, "learning_rate": 0.0005991350521427391, "loss": 3.8971, "step": 1480 }, { "epoch": 0.07, "grad_norm": 0.6802822947502136, "learning_rate": 0.0005991338834623386, "loss": 3.9495, "step": 1481 }, { "epoch": 0.07, "grad_norm": 0.7739499807357788, "learning_rate": 0.0005991327139940771, "loss": 4.0075, "step": 1482 }, { "epoch": 0.07, "grad_norm": 0.7212753295898438, "learning_rate": 0.0005991315437379576, "loss": 3.7131, "step": 1483 }, { "epoch": 0.07, "grad_norm": 0.5949774980545044, "learning_rate": 0.0005991303726939832, "loss": 3.9272, "step": 1484 }, { "epoch": 0.07, "grad_norm": 0.6401901841163635, "learning_rate": 0.0005991292008621569, "loss": 3.9621, "step": 1485 }, { "epoch": 0.07, "grad_norm": 0.6232290863990784, "learning_rate": 0.0005991280282424819, "loss": 3.8078, "step": 1486 }, { "epoch": 0.07, "grad_norm": 0.644420862197876, "learning_rate": 0.0005991268548349613, "loss": 3.8307, "step": 1487 }, { "epoch": 0.07, "grad_norm": 0.6507530808448792, "learning_rate": 0.000599125680639598, "loss": 3.85, "step": 1488 }, { "epoch": 0.07, "grad_norm": 0.6406475305557251, "learning_rate": 0.0005991245056563954, "loss": 3.74, "step": 1489 }, { "epoch": 0.07, "grad_norm": 0.6073602437973022, "learning_rate": 0.0005991233298853562, "loss": 3.7604, "step": 1490 }, { "epoch": 0.07, "grad_norm": 0.6374882459640503, "learning_rate": 0.0005991221533264839, "loss": 3.842, "step": 1491 }, { "epoch": 0.07, "grad_norm": 0.6282268762588501, "learning_rate": 0.0005991209759797813, "loss": 3.845, "step": 1492 }, { "epoch": 0.07, "grad_norm": 0.6874997615814209, "learning_rate": 0.0005991197978452517, "loss": 4.1104, "step": 1493 }, { "epoch": 0.07, "grad_norm": 0.663975715637207, "learning_rate": 0.0005991186189228981, "loss": 3.9287, "step": 1494 }, { "epoch": 0.07, "grad_norm": 0.6389418840408325, "learning_rate": 0.0005991174392127236, "loss": 3.6839, "step": 1495 }, { "epoch": 0.07, "grad_norm": 0.5825007557868958, "learning_rate": 0.0005991162587147314, "loss": 4.0187, "step": 1496 }, { "epoch": 0.07, "grad_norm": 0.5879276394844055, "learning_rate": 0.0005991150774289245, "loss": 3.8135, "step": 1497 }, { "epoch": 0.07, "grad_norm": 0.6391376852989197, "learning_rate": 0.0005991138953553059, "loss": 4.0896, "step": 1498 }, { "epoch": 0.07, "grad_norm": 0.5957635045051575, "learning_rate": 0.0005991127124938791, "loss": 3.896, "step": 1499 }, { "epoch": 0.07, "grad_norm": 0.6745945811271667, "learning_rate": 0.0005991115288446468, "loss": 3.4926, "step": 1500 }, { "epoch": 0.07, "grad_norm": 0.6167849898338318, "learning_rate": 0.0005991103444076124, "loss": 3.8955, "step": 1501 }, { "epoch": 0.07, "grad_norm": 0.6245574355125427, "learning_rate": 0.000599109159182779, "loss": 3.7455, "step": 1502 }, { "epoch": 0.07, "grad_norm": 0.5934446454048157, "learning_rate": 0.0005991079731701495, "loss": 3.659, "step": 1503 }, { "epoch": 0.07, "grad_norm": 0.5914957523345947, "learning_rate": 0.0005991067863697271, "loss": 3.7625, "step": 1504 }, { "epoch": 0.07, "grad_norm": 0.6563999652862549, "learning_rate": 0.0005991055987815151, "loss": 4.0097, "step": 1505 }, { "epoch": 0.07, "grad_norm": 0.6522777676582336, "learning_rate": 0.0005991044104055166, "loss": 3.9068, "step": 1506 }, { "epoch": 0.07, "grad_norm": 0.6069871187210083, "learning_rate": 0.0005991032212417345, "loss": 3.9719, "step": 1507 }, { "epoch": 0.07, "grad_norm": 0.630017876625061, "learning_rate": 0.0005991020312901721, "loss": 3.7736, "step": 1508 }, { "epoch": 0.07, "grad_norm": 0.631232738494873, "learning_rate": 0.0005991008405508326, "loss": 3.6984, "step": 1509 }, { "epoch": 0.07, "grad_norm": 0.6594641208648682, "learning_rate": 0.000599099649023719, "loss": 3.8675, "step": 1510 }, { "epoch": 0.07, "grad_norm": 0.6070505976676941, "learning_rate": 0.0005990984567088344, "loss": 3.959, "step": 1511 }, { "epoch": 0.07, "grad_norm": 0.6136875748634338, "learning_rate": 0.0005990972636061821, "loss": 3.8672, "step": 1512 }, { "epoch": 0.07, "grad_norm": 0.7074840664863586, "learning_rate": 0.0005990960697157652, "loss": 3.8252, "step": 1513 }, { "epoch": 0.07, "grad_norm": 0.6983718276023865, "learning_rate": 0.0005990948750375868, "loss": 3.9131, "step": 1514 }, { "epoch": 0.07, "grad_norm": 0.6260075569152832, "learning_rate": 0.0005990936795716501, "loss": 3.6873, "step": 1515 }, { "epoch": 0.07, "grad_norm": 0.6146523356437683, "learning_rate": 0.0005990924833179582, "loss": 3.8511, "step": 1516 }, { "epoch": 0.07, "grad_norm": 0.586942732334137, "learning_rate": 0.0005990912862765143, "loss": 3.8194, "step": 1517 }, { "epoch": 0.07, "grad_norm": 0.6059939861297607, "learning_rate": 0.0005990900884473213, "loss": 3.8406, "step": 1518 }, { "epoch": 0.07, "grad_norm": 0.609937310218811, "learning_rate": 0.0005990888898303827, "loss": 3.4061, "step": 1519 }, { "epoch": 0.07, "grad_norm": 0.59391850233078, "learning_rate": 0.0005990876904257015, "loss": 3.8884, "step": 1520 }, { "epoch": 0.07, "grad_norm": 0.6493670344352722, "learning_rate": 0.000599086490233281, "loss": 3.9793, "step": 1521 }, { "epoch": 0.07, "grad_norm": 0.6219658851623535, "learning_rate": 0.000599085289253124, "loss": 3.8025, "step": 1522 }, { "epoch": 0.07, "grad_norm": 0.6138971447944641, "learning_rate": 0.0005990840874852342, "loss": 3.8796, "step": 1523 }, { "epoch": 0.07, "grad_norm": 0.6053948998451233, "learning_rate": 0.0005990828849296143, "loss": 4.0149, "step": 1524 }, { "epoch": 0.07, "grad_norm": 0.651043713092804, "learning_rate": 0.0005990816815862678, "loss": 3.9163, "step": 1525 }, { "epoch": 0.07, "grad_norm": 0.6234416365623474, "learning_rate": 0.0005990804774551975, "loss": 3.7939, "step": 1526 }, { "epoch": 0.07, "grad_norm": 0.5939651727676392, "learning_rate": 0.0005990792725364068, "loss": 3.8339, "step": 1527 }, { "epoch": 0.07, "grad_norm": 0.6041796207427979, "learning_rate": 0.000599078066829899, "loss": 3.7584, "step": 1528 }, { "epoch": 0.07, "grad_norm": 0.636833906173706, "learning_rate": 0.0005990768603356771, "loss": 3.8717, "step": 1529 }, { "epoch": 0.07, "grad_norm": 0.6864349246025085, "learning_rate": 0.0005990756530537442, "loss": 3.786, "step": 1530 }, { "epoch": 0.08, "grad_norm": 0.5589094161987305, "learning_rate": 0.0005990744449841037, "loss": 3.7716, "step": 1531 }, { "epoch": 0.08, "grad_norm": 0.6430098414421082, "learning_rate": 0.0005990732361267585, "loss": 3.7062, "step": 1532 }, { "epoch": 0.08, "grad_norm": 0.6288703083992004, "learning_rate": 0.0005990720264817122, "loss": 3.8552, "step": 1533 }, { "epoch": 0.08, "grad_norm": 0.5796893835067749, "learning_rate": 0.0005990708160489676, "loss": 3.8646, "step": 1534 }, { "epoch": 0.08, "grad_norm": 0.5982096791267395, "learning_rate": 0.000599069604828528, "loss": 3.9246, "step": 1535 }, { "epoch": 0.08, "grad_norm": 0.5953184366226196, "learning_rate": 0.0005990683928203967, "loss": 3.7108, "step": 1536 }, { "epoch": 0.08, "grad_norm": 0.621066153049469, "learning_rate": 0.0005990671800245768, "loss": 3.9115, "step": 1537 }, { "epoch": 0.08, "grad_norm": 0.6250355839729309, "learning_rate": 0.0005990659664410715, "loss": 3.7263, "step": 1538 }, { "epoch": 0.08, "grad_norm": 0.6018726229667664, "learning_rate": 0.0005990647520698839, "loss": 4.0379, "step": 1539 }, { "epoch": 0.08, "grad_norm": 0.5801904201507568, "learning_rate": 0.0005990635369110174, "loss": 3.8927, "step": 1540 }, { "epoch": 0.08, "grad_norm": 0.61872398853302, "learning_rate": 0.0005990623209644751, "loss": 3.806, "step": 1541 }, { "epoch": 0.08, "grad_norm": 0.5771560072898865, "learning_rate": 0.0005990611042302602, "loss": 3.8253, "step": 1542 }, { "epoch": 0.08, "grad_norm": 0.5889159440994263, "learning_rate": 0.0005990598867083759, "loss": 3.827, "step": 1543 }, { "epoch": 0.08, "grad_norm": 0.5858890414237976, "learning_rate": 0.0005990586683988254, "loss": 3.9579, "step": 1544 }, { "epoch": 0.08, "grad_norm": 0.6480501890182495, "learning_rate": 0.0005990574493016119, "loss": 3.844, "step": 1545 }, { "epoch": 0.08, "grad_norm": 0.6195836067199707, "learning_rate": 0.0005990562294167387, "loss": 3.8677, "step": 1546 }, { "epoch": 0.08, "grad_norm": 0.6144270300865173, "learning_rate": 0.0005990550087442088, "loss": 3.7589, "step": 1547 }, { "epoch": 0.08, "grad_norm": 0.6432852745056152, "learning_rate": 0.0005990537872840257, "loss": 3.8853, "step": 1548 }, { "epoch": 0.08, "grad_norm": 0.6419529914855957, "learning_rate": 0.0005990525650361925, "loss": 3.9961, "step": 1549 }, { "epoch": 0.08, "grad_norm": 0.6389473080635071, "learning_rate": 0.0005990513420007122, "loss": 3.7316, "step": 1550 }, { "epoch": 0.08, "grad_norm": 0.6231613159179688, "learning_rate": 0.0005990501181775885, "loss": 3.8515, "step": 1551 }, { "epoch": 0.08, "grad_norm": 0.5940372347831726, "learning_rate": 0.0005990488935668242, "loss": 3.9313, "step": 1552 }, { "epoch": 0.08, "grad_norm": 0.6202555298805237, "learning_rate": 0.0005990476681684226, "loss": 3.9028, "step": 1553 }, { "epoch": 0.08, "grad_norm": 0.5787298083305359, "learning_rate": 0.0005990464419823871, "loss": 3.8886, "step": 1554 }, { "epoch": 0.08, "grad_norm": 0.639350950717926, "learning_rate": 0.0005990452150087208, "loss": 3.8559, "step": 1555 }, { "epoch": 0.08, "grad_norm": 0.5683527588844299, "learning_rate": 0.000599043987247427, "loss": 3.9704, "step": 1556 }, { "epoch": 0.08, "grad_norm": 0.6336866617202759, "learning_rate": 0.0005990427586985088, "loss": 3.8659, "step": 1557 }, { "epoch": 0.08, "grad_norm": 0.6109759211540222, "learning_rate": 0.0005990415293619695, "loss": 4.0727, "step": 1558 }, { "epoch": 0.08, "grad_norm": 0.6308028101921082, "learning_rate": 0.0005990402992378125, "loss": 3.7325, "step": 1559 }, { "epoch": 0.08, "grad_norm": 0.6601607799530029, "learning_rate": 0.0005990390683260408, "loss": 3.8482, "step": 1560 }, { "epoch": 0.08, "grad_norm": 0.6257616877555847, "learning_rate": 0.0005990378366266577, "loss": 3.9858, "step": 1561 }, { "epoch": 0.08, "grad_norm": 0.6197351813316345, "learning_rate": 0.0005990366041396666, "loss": 3.7637, "step": 1562 }, { "epoch": 0.08, "grad_norm": 0.5823824405670166, "learning_rate": 0.0005990353708650706, "loss": 3.8847, "step": 1563 }, { "epoch": 0.08, "grad_norm": 0.6095857620239258, "learning_rate": 0.000599034136802873, "loss": 3.9395, "step": 1564 }, { "epoch": 0.08, "grad_norm": 0.6098863482475281, "learning_rate": 0.000599032901953077, "loss": 3.7885, "step": 1565 }, { "epoch": 0.08, "grad_norm": 0.5995204448699951, "learning_rate": 0.0005990316663156859, "loss": 3.8914, "step": 1566 }, { "epoch": 0.08, "grad_norm": 0.5945135354995728, "learning_rate": 0.0005990304298907029, "loss": 3.9679, "step": 1567 }, { "epoch": 0.08, "grad_norm": 0.5825366973876953, "learning_rate": 0.0005990291926781313, "loss": 3.8133, "step": 1568 }, { "epoch": 0.08, "grad_norm": 0.6503740549087524, "learning_rate": 0.0005990279546779745, "loss": 3.8859, "step": 1569 }, { "epoch": 0.08, "grad_norm": 0.6176950335502625, "learning_rate": 0.0005990267158902354, "loss": 3.8342, "step": 1570 }, { "epoch": 0.08, "grad_norm": 0.6380782127380371, "learning_rate": 0.0005990254763149176, "loss": 3.5816, "step": 1571 }, { "epoch": 0.08, "grad_norm": 0.6276407837867737, "learning_rate": 0.0005990242359520243, "loss": 3.9316, "step": 1572 }, { "epoch": 0.08, "grad_norm": 0.5958492159843445, "learning_rate": 0.0005990229948015587, "loss": 3.6879, "step": 1573 }, { "epoch": 0.08, "grad_norm": 0.5630125403404236, "learning_rate": 0.000599021752863524, "loss": 3.7691, "step": 1574 }, { "epoch": 0.08, "grad_norm": 0.6117847561836243, "learning_rate": 0.0005990205101379236, "loss": 3.73, "step": 1575 }, { "epoch": 0.08, "grad_norm": 0.6020464301109314, "learning_rate": 0.0005990192666247607, "loss": 3.8803, "step": 1576 }, { "epoch": 0.08, "grad_norm": 0.5830046534538269, "learning_rate": 0.0005990180223240385, "loss": 3.6786, "step": 1577 }, { "epoch": 0.08, "grad_norm": 0.6638304591178894, "learning_rate": 0.0005990167772357605, "loss": 4.1394, "step": 1578 }, { "epoch": 0.08, "grad_norm": 0.5913933515548706, "learning_rate": 0.0005990155313599298, "loss": 3.9041, "step": 1579 }, { "epoch": 0.08, "grad_norm": 0.5918366312980652, "learning_rate": 0.0005990142846965498, "loss": 3.675, "step": 1580 }, { "epoch": 0.08, "grad_norm": 0.6066710948944092, "learning_rate": 0.0005990130372456237, "loss": 3.7399, "step": 1581 }, { "epoch": 0.08, "grad_norm": 0.5532450675964355, "learning_rate": 0.0005990117890071548, "loss": 3.6508, "step": 1582 }, { "epoch": 0.08, "grad_norm": 0.5913922786712646, "learning_rate": 0.0005990105399811464, "loss": 3.7544, "step": 1583 }, { "epoch": 0.08, "grad_norm": 0.5768578052520752, "learning_rate": 0.0005990092901676018, "loss": 3.748, "step": 1584 }, { "epoch": 0.08, "grad_norm": 0.6337912678718567, "learning_rate": 0.0005990080395665242, "loss": 3.8081, "step": 1585 }, { "epoch": 0.08, "grad_norm": 0.5935748815536499, "learning_rate": 0.000599006788177917, "loss": 3.8911, "step": 1586 }, { "epoch": 0.08, "grad_norm": 0.6132523417472839, "learning_rate": 0.0005990055360017835, "loss": 3.9166, "step": 1587 }, { "epoch": 0.08, "grad_norm": 0.6079237461090088, "learning_rate": 0.000599004283038127, "loss": 3.7937, "step": 1588 }, { "epoch": 0.08, "grad_norm": 0.6035066843032837, "learning_rate": 0.0005990030292869507, "loss": 3.8474, "step": 1589 }, { "epoch": 0.08, "grad_norm": 0.620436429977417, "learning_rate": 0.0005990017747482579, "loss": 3.6852, "step": 1590 }, { "epoch": 0.08, "grad_norm": 0.6006262302398682, "learning_rate": 0.0005990005194220521, "loss": 3.7319, "step": 1591 }, { "epoch": 0.08, "grad_norm": 0.6008548140525818, "learning_rate": 0.0005989992633083364, "loss": 4.134, "step": 1592 }, { "epoch": 0.08, "grad_norm": 0.5870209336280823, "learning_rate": 0.0005989980064071142, "loss": 4.0057, "step": 1593 }, { "epoch": 0.08, "grad_norm": 0.595600962638855, "learning_rate": 0.0005989967487183888, "loss": 3.8465, "step": 1594 }, { "epoch": 0.08, "grad_norm": 0.5959302186965942, "learning_rate": 0.0005989954902421634, "loss": 3.701, "step": 1595 }, { "epoch": 0.08, "grad_norm": 0.5945109724998474, "learning_rate": 0.0005989942309784416, "loss": 3.9228, "step": 1596 }, { "epoch": 0.08, "grad_norm": 0.614081084728241, "learning_rate": 0.0005989929709272264, "loss": 3.7123, "step": 1597 }, { "epoch": 0.08, "grad_norm": 0.59315425157547, "learning_rate": 0.0005989917100885214, "loss": 3.7997, "step": 1598 }, { "epoch": 0.08, "grad_norm": 0.5761898756027222, "learning_rate": 0.0005989904484623296, "loss": 3.6235, "step": 1599 }, { "epoch": 0.08, "grad_norm": 0.6658716201782227, "learning_rate": 0.0005989891860486546, "loss": 3.8065, "step": 1600 }, { "epoch": 0.08, "grad_norm": 0.5968477129936218, "learning_rate": 0.0005989879228474997, "loss": 3.6979, "step": 1601 }, { "epoch": 0.08, "grad_norm": 0.630476713180542, "learning_rate": 0.0005989866588588681, "loss": 3.6387, "step": 1602 }, { "epoch": 0.08, "grad_norm": 0.6172245144844055, "learning_rate": 0.0005989853940827631, "loss": 3.687, "step": 1603 }, { "epoch": 0.08, "grad_norm": 0.6125102043151855, "learning_rate": 0.0005989841285191881, "loss": 3.7127, "step": 1604 }, { "epoch": 0.08, "grad_norm": 0.6071942448616028, "learning_rate": 0.0005989828621681466, "loss": 3.7638, "step": 1605 }, { "epoch": 0.08, "grad_norm": 0.6062251329421997, "learning_rate": 0.0005989815950296417, "loss": 3.706, "step": 1606 }, { "epoch": 0.08, "grad_norm": 0.5893798470497131, "learning_rate": 0.0005989803271036769, "loss": 3.8942, "step": 1607 }, { "epoch": 0.08, "grad_norm": 0.5914480090141296, "learning_rate": 0.0005989790583902553, "loss": 3.7829, "step": 1608 }, { "epoch": 0.08, "grad_norm": 0.6149440407752991, "learning_rate": 0.0005989777888893805, "loss": 3.6808, "step": 1609 }, { "epoch": 0.08, "grad_norm": 0.8986303210258484, "learning_rate": 0.0005989765186010557, "loss": 3.543, "step": 1610 }, { "epoch": 0.08, "grad_norm": 0.5829508900642395, "learning_rate": 0.0005989752475252843, "loss": 3.7894, "step": 1611 }, { "epoch": 0.08, "grad_norm": 0.6794427633285522, "learning_rate": 0.0005989739756620697, "loss": 3.7064, "step": 1612 }, { "epoch": 0.08, "grad_norm": 0.7055894732475281, "learning_rate": 0.0005989727030114151, "loss": 3.8842, "step": 1613 }, { "epoch": 0.08, "grad_norm": 0.5715697407722473, "learning_rate": 0.000598971429573324, "loss": 3.9117, "step": 1614 }, { "epoch": 0.08, "grad_norm": 0.6290931701660156, "learning_rate": 0.0005989701553477996, "loss": 3.5982, "step": 1615 }, { "epoch": 0.08, "grad_norm": 0.5757427215576172, "learning_rate": 0.0005989688803348455, "loss": 3.6704, "step": 1616 }, { "epoch": 0.08, "grad_norm": 0.6020493507385254, "learning_rate": 0.0005989676045344648, "loss": 3.5587, "step": 1617 }, { "epoch": 0.08, "grad_norm": 0.5768998265266418, "learning_rate": 0.000598966327946661, "loss": 3.5811, "step": 1618 }, { "epoch": 0.08, "grad_norm": 0.6348850131034851, "learning_rate": 0.0005989650505714374, "loss": 3.9682, "step": 1619 }, { "epoch": 0.08, "grad_norm": 0.6411147713661194, "learning_rate": 0.0005989637724087973, "loss": 3.7195, "step": 1620 }, { "epoch": 0.08, "grad_norm": 0.6555300354957581, "learning_rate": 0.0005989624934587443, "loss": 3.647, "step": 1621 }, { "epoch": 0.08, "grad_norm": 0.6385052800178528, "learning_rate": 0.0005989612137212816, "loss": 3.8637, "step": 1622 }, { "epoch": 0.08, "grad_norm": 0.6256349086761475, "learning_rate": 0.0005989599331964127, "loss": 3.765, "step": 1623 }, { "epoch": 0.08, "grad_norm": 0.6104692220687866, "learning_rate": 0.0005989586518841408, "loss": 3.721, "step": 1624 }, { "epoch": 0.08, "grad_norm": 0.5835363864898682, "learning_rate": 0.0005989573697844692, "loss": 3.9361, "step": 1625 }, { "epoch": 0.08, "grad_norm": 0.6491838693618774, "learning_rate": 0.0005989560868974015, "loss": 3.8728, "step": 1626 }, { "epoch": 0.08, "grad_norm": 0.602606475353241, "learning_rate": 0.0005989548032229411, "loss": 3.7802, "step": 1627 }, { "epoch": 0.08, "grad_norm": 0.5809963941574097, "learning_rate": 0.0005989535187610912, "loss": 3.5699, "step": 1628 }, { "epoch": 0.08, "grad_norm": 0.5906050801277161, "learning_rate": 0.0005989522335118553, "loss": 3.7924, "step": 1629 }, { "epoch": 0.08, "grad_norm": 0.6175136566162109, "learning_rate": 0.0005989509474752367, "loss": 3.8836, "step": 1630 }, { "epoch": 0.08, "grad_norm": 0.5688444972038269, "learning_rate": 0.0005989496606512389, "loss": 3.9392, "step": 1631 }, { "epoch": 0.08, "grad_norm": 0.6032138466835022, "learning_rate": 0.0005989483730398652, "loss": 4.0405, "step": 1632 }, { "epoch": 0.08, "grad_norm": 0.6493321657180786, "learning_rate": 0.000598947084641119, "loss": 3.824, "step": 1633 }, { "epoch": 0.08, "grad_norm": 0.5763911008834839, "learning_rate": 0.0005989457954550038, "loss": 3.7681, "step": 1634 }, { "epoch": 0.08, "grad_norm": 0.5691786408424377, "learning_rate": 0.0005989445054815229, "loss": 3.9562, "step": 1635 }, { "epoch": 0.08, "grad_norm": 0.6753783226013184, "learning_rate": 0.0005989432147206796, "loss": 3.7078, "step": 1636 }, { "epoch": 0.08, "grad_norm": 0.6098892092704773, "learning_rate": 0.0005989419231724775, "loss": 3.478, "step": 1637 }, { "epoch": 0.08, "grad_norm": 0.6037442684173584, "learning_rate": 0.0005989406308369199, "loss": 3.73, "step": 1638 }, { "epoch": 0.08, "grad_norm": 0.6473791003227234, "learning_rate": 0.0005989393377140101, "loss": 3.7052, "step": 1639 }, { "epoch": 0.08, "grad_norm": 0.6919148564338684, "learning_rate": 0.0005989380438037517, "loss": 3.7036, "step": 1640 }, { "epoch": 0.08, "grad_norm": 0.6580774188041687, "learning_rate": 0.000598936749106148, "loss": 3.8456, "step": 1641 }, { "epoch": 0.08, "grad_norm": 0.5730836987495422, "learning_rate": 0.0005989354536212024, "loss": 3.5748, "step": 1642 }, { "epoch": 0.08, "grad_norm": 0.5901459455490112, "learning_rate": 0.0005989341573489185, "loss": 3.7192, "step": 1643 }, { "epoch": 0.08, "grad_norm": 0.6304981708526611, "learning_rate": 0.0005989328602892994, "loss": 3.9909, "step": 1644 }, { "epoch": 0.08, "grad_norm": 0.6005657911300659, "learning_rate": 0.0005989315624423487, "loss": 3.6228, "step": 1645 }, { "epoch": 0.08, "grad_norm": 0.61674565076828, "learning_rate": 0.0005989302638080698, "loss": 3.7769, "step": 1646 }, { "epoch": 0.08, "grad_norm": 0.598010778427124, "learning_rate": 0.0005989289643864662, "loss": 3.7453, "step": 1647 }, { "epoch": 0.08, "grad_norm": 0.6126073598861694, "learning_rate": 0.0005989276641775412, "loss": 3.6865, "step": 1648 }, { "epoch": 0.08, "grad_norm": 0.6504325270652771, "learning_rate": 0.0005989263631812982, "loss": 3.6676, "step": 1649 }, { "epoch": 0.08, "grad_norm": 0.6758439540863037, "learning_rate": 0.0005989250613977408, "loss": 3.8634, "step": 1650 }, { "epoch": 0.08, "grad_norm": 0.5557368993759155, "learning_rate": 0.0005989237588268722, "loss": 3.7872, "step": 1651 }, { "epoch": 0.08, "grad_norm": 0.6107842922210693, "learning_rate": 0.0005989224554686961, "loss": 3.6353, "step": 1652 }, { "epoch": 0.08, "grad_norm": 0.5767660140991211, "learning_rate": 0.0005989211513232157, "loss": 3.9362, "step": 1653 }, { "epoch": 0.08, "grad_norm": 0.6302205324172974, "learning_rate": 0.0005989198463904344, "loss": 3.8184, "step": 1654 }, { "epoch": 0.08, "grad_norm": 0.6364771723747253, "learning_rate": 0.0005989185406703559, "loss": 3.93, "step": 1655 }, { "epoch": 0.08, "grad_norm": 0.6427229046821594, "learning_rate": 0.0005989172341629834, "loss": 3.7459, "step": 1656 }, { "epoch": 0.08, "grad_norm": 0.604668378829956, "learning_rate": 0.0005989159268683205, "loss": 3.7998, "step": 1657 }, { "epoch": 0.08, "grad_norm": 0.6303444504737854, "learning_rate": 0.0005989146187863705, "loss": 3.8282, "step": 1658 }, { "epoch": 0.08, "grad_norm": 0.5857745409011841, "learning_rate": 0.0005989133099171371, "loss": 3.8037, "step": 1659 }, { "epoch": 0.08, "grad_norm": 0.5540629625320435, "learning_rate": 0.0005989120002606233, "loss": 3.9089, "step": 1660 }, { "epoch": 0.08, "grad_norm": 0.732204794883728, "learning_rate": 0.000598910689816833, "loss": 3.7783, "step": 1661 }, { "epoch": 0.08, "grad_norm": 0.6092149615287781, "learning_rate": 0.0005989093785857694, "loss": 3.9062, "step": 1662 }, { "epoch": 0.08, "grad_norm": 0.5495967864990234, "learning_rate": 0.0005989080665674361, "loss": 3.8753, "step": 1663 }, { "epoch": 0.08, "grad_norm": 0.6210874319076538, "learning_rate": 0.0005989067537618364, "loss": 3.8944, "step": 1664 }, { "epoch": 0.08, "grad_norm": 0.6496333479881287, "learning_rate": 0.0005989054401689739, "loss": 3.9576, "step": 1665 }, { "epoch": 0.08, "grad_norm": 0.6235400438308716, "learning_rate": 0.0005989041257888519, "loss": 3.8495, "step": 1666 }, { "epoch": 0.08, "grad_norm": 0.5927395224571228, "learning_rate": 0.000598902810621474, "loss": 3.6989, "step": 1667 }, { "epoch": 0.08, "grad_norm": 0.5796555280685425, "learning_rate": 0.0005989014946668437, "loss": 3.6428, "step": 1668 }, { "epoch": 0.08, "grad_norm": 0.6373754739761353, "learning_rate": 0.0005989001779249643, "loss": 3.6226, "step": 1669 }, { "epoch": 0.08, "grad_norm": 0.5925307273864746, "learning_rate": 0.0005988988603958395, "loss": 3.735, "step": 1670 }, { "epoch": 0.08, "grad_norm": 0.6357890963554382, "learning_rate": 0.0005988975420794724, "loss": 3.7731, "step": 1671 }, { "epoch": 0.08, "grad_norm": 0.6149181127548218, "learning_rate": 0.0005988962229758668, "loss": 3.844, "step": 1672 }, { "epoch": 0.08, "grad_norm": 0.6464425921440125, "learning_rate": 0.0005988949030850261, "loss": 3.7181, "step": 1673 }, { "epoch": 0.08, "grad_norm": 0.6317658424377441, "learning_rate": 0.0005988935824069538, "loss": 3.5902, "step": 1674 }, { "epoch": 0.08, "grad_norm": 0.5768055319786072, "learning_rate": 0.0005988922609416532, "loss": 3.7552, "step": 1675 }, { "epoch": 0.08, "grad_norm": 0.6145294904708862, "learning_rate": 0.000598890938689128, "loss": 4.0868, "step": 1676 }, { "epoch": 0.08, "grad_norm": 0.6101502180099487, "learning_rate": 0.0005988896156493815, "loss": 3.8068, "step": 1677 }, { "epoch": 0.08, "grad_norm": 0.6006686091423035, "learning_rate": 0.0005988882918224175, "loss": 3.8113, "step": 1678 }, { "epoch": 0.08, "grad_norm": 0.6369596719741821, "learning_rate": 0.0005988869672082389, "loss": 3.5701, "step": 1679 }, { "epoch": 0.08, "grad_norm": 0.6296815276145935, "learning_rate": 0.0005988856418068498, "loss": 3.7768, "step": 1680 }, { "epoch": 0.08, "grad_norm": 0.6762238144874573, "learning_rate": 0.0005988843156182534, "loss": 3.77, "step": 1681 }, { "epoch": 0.08, "grad_norm": 0.5993692278862, "learning_rate": 0.0005988829886424532, "loss": 3.8124, "step": 1682 }, { "epoch": 0.08, "grad_norm": 0.6622273921966553, "learning_rate": 0.0005988816608794527, "loss": 3.7658, "step": 1683 }, { "epoch": 0.08, "grad_norm": 0.5899795293807983, "learning_rate": 0.0005988803323292556, "loss": 3.7341, "step": 1684 }, { "epoch": 0.08, "grad_norm": 0.6564462780952454, "learning_rate": 0.0005988790029918651, "loss": 3.6619, "step": 1685 }, { "epoch": 0.08, "grad_norm": 0.6012856364250183, "learning_rate": 0.0005988776728672847, "loss": 3.7838, "step": 1686 }, { "epoch": 0.08, "grad_norm": 0.5971396565437317, "learning_rate": 0.0005988763419555182, "loss": 3.7886, "step": 1687 }, { "epoch": 0.08, "grad_norm": 0.5735385417938232, "learning_rate": 0.0005988750102565689, "loss": 3.8873, "step": 1688 }, { "epoch": 0.08, "grad_norm": 0.5730529427528381, "learning_rate": 0.0005988736777704403, "loss": 3.7951, "step": 1689 }, { "epoch": 0.08, "grad_norm": 0.5634326338768005, "learning_rate": 0.0005988723444971361, "loss": 3.9817, "step": 1690 }, { "epoch": 0.08, "grad_norm": 0.5796641707420349, "learning_rate": 0.0005988710104366595, "loss": 3.6427, "step": 1691 }, { "epoch": 0.08, "grad_norm": 0.5546978116035461, "learning_rate": 0.0005988696755890142, "loss": 3.839, "step": 1692 }, { "epoch": 0.08, "grad_norm": 0.6504260897636414, "learning_rate": 0.0005988683399542037, "loss": 3.6866, "step": 1693 }, { "epoch": 0.08, "grad_norm": 0.6330404281616211, "learning_rate": 0.0005988670035322316, "loss": 3.6307, "step": 1694 }, { "epoch": 0.08, "grad_norm": 0.686377227306366, "learning_rate": 0.0005988656663231012, "loss": 3.5898, "step": 1695 }, { "epoch": 0.08, "grad_norm": 0.6027424335479736, "learning_rate": 0.0005988643283268162, "loss": 3.6807, "step": 1696 }, { "epoch": 0.08, "grad_norm": 0.6752616763114929, "learning_rate": 0.0005988629895433801, "loss": 3.5909, "step": 1697 }, { "epoch": 0.08, "grad_norm": 0.5653634071350098, "learning_rate": 0.0005988616499727965, "loss": 3.9474, "step": 1698 }, { "epoch": 0.08, "grad_norm": 0.6292624473571777, "learning_rate": 0.0005988603096150686, "loss": 3.7446, "step": 1699 }, { "epoch": 0.08, "grad_norm": 0.6524072289466858, "learning_rate": 0.0005988589684702004, "loss": 3.8772, "step": 1700 }, { "epoch": 0.08, "grad_norm": 0.6119723320007324, "learning_rate": 0.0005988576265381951, "loss": 3.5762, "step": 1701 }, { "epoch": 0.08, "grad_norm": 0.6504294872283936, "learning_rate": 0.0005988562838190562, "loss": 3.6652, "step": 1702 }, { "epoch": 0.08, "grad_norm": 0.6257925033569336, "learning_rate": 0.0005988549403127875, "loss": 3.6179, "step": 1703 }, { "epoch": 0.08, "grad_norm": 0.6477129459381104, "learning_rate": 0.0005988535960193924, "loss": 3.7043, "step": 1704 }, { "epoch": 0.08, "grad_norm": 0.6320105791091919, "learning_rate": 0.0005988522509388744, "loss": 3.7492, "step": 1705 }, { "epoch": 0.08, "grad_norm": 0.5984609127044678, "learning_rate": 0.0005988509050712371, "loss": 3.8237, "step": 1706 }, { "epoch": 0.08, "grad_norm": 0.5916309952735901, "learning_rate": 0.0005988495584164841, "loss": 3.5611, "step": 1707 }, { "epoch": 0.08, "grad_norm": 0.5889941453933716, "learning_rate": 0.0005988482109746188, "loss": 3.6234, "step": 1708 }, { "epoch": 0.08, "grad_norm": 0.5852148532867432, "learning_rate": 0.0005988468627456448, "loss": 3.5888, "step": 1709 }, { "epoch": 0.08, "grad_norm": 0.6306596398353577, "learning_rate": 0.0005988455137295657, "loss": 3.9278, "step": 1710 }, { "epoch": 0.08, "grad_norm": 0.63728266954422, "learning_rate": 0.0005988441639263851, "loss": 3.6708, "step": 1711 }, { "epoch": 0.08, "grad_norm": 0.5717198848724365, "learning_rate": 0.0005988428133361064, "loss": 3.8049, "step": 1712 }, { "epoch": 0.08, "grad_norm": 0.5758329033851624, "learning_rate": 0.0005988414619587333, "loss": 3.7912, "step": 1713 }, { "epoch": 0.08, "grad_norm": 0.6172877550125122, "learning_rate": 0.0005988401097942693, "loss": 3.6147, "step": 1714 }, { "epoch": 0.08, "grad_norm": 0.6377825140953064, "learning_rate": 0.0005988387568427179, "loss": 3.7007, "step": 1715 }, { "epoch": 0.08, "grad_norm": 0.5383907556533813, "learning_rate": 0.0005988374031040828, "loss": 3.6447, "step": 1716 }, { "epoch": 0.08, "grad_norm": 0.6975224018096924, "learning_rate": 0.0005988360485783674, "loss": 3.6552, "step": 1717 }, { "epoch": 0.08, "grad_norm": 0.5704695582389832, "learning_rate": 0.0005988346932655755, "loss": 3.9312, "step": 1718 }, { "epoch": 0.08, "grad_norm": 0.583870530128479, "learning_rate": 0.0005988333371657104, "loss": 3.8014, "step": 1719 }, { "epoch": 0.08, "grad_norm": 0.5745240449905396, "learning_rate": 0.0005988319802787759, "loss": 3.9566, "step": 1720 }, { "epoch": 0.08, "grad_norm": 0.5921694040298462, "learning_rate": 0.0005988306226047754, "loss": 3.7942, "step": 1721 }, { "epoch": 0.08, "grad_norm": 0.586078405380249, "learning_rate": 0.0005988292641437126, "loss": 3.8475, "step": 1722 }, { "epoch": 0.08, "grad_norm": 0.6041373610496521, "learning_rate": 0.000598827904895591, "loss": 3.7767, "step": 1723 }, { "epoch": 0.08, "grad_norm": 0.6381456255912781, "learning_rate": 0.0005988265448604143, "loss": 3.7119, "step": 1724 }, { "epoch": 0.08, "grad_norm": 0.6016688942909241, "learning_rate": 0.0005988251840381858, "loss": 3.9402, "step": 1725 }, { "epoch": 0.08, "grad_norm": 0.6326636672019958, "learning_rate": 0.0005988238224289094, "loss": 3.89, "step": 1726 }, { "epoch": 0.08, "grad_norm": 0.5598689317703247, "learning_rate": 0.0005988224600325886, "loss": 3.7806, "step": 1727 }, { "epoch": 0.08, "grad_norm": 0.6054954528808594, "learning_rate": 0.0005988210968492268, "loss": 3.8775, "step": 1728 }, { "epoch": 0.08, "grad_norm": 0.6688248515129089, "learning_rate": 0.0005988197328788279, "loss": 3.8513, "step": 1729 }, { "epoch": 0.08, "grad_norm": 0.6302428841590881, "learning_rate": 0.0005988183681213953, "loss": 3.7177, "step": 1730 }, { "epoch": 0.08, "grad_norm": 0.6412402391433716, "learning_rate": 0.0005988170025769326, "loss": 3.6202, "step": 1731 }, { "epoch": 0.08, "grad_norm": 0.6302105784416199, "learning_rate": 0.0005988156362454434, "loss": 3.7965, "step": 1732 }, { "epoch": 0.08, "grad_norm": 0.5867551565170288, "learning_rate": 0.0005988142691269314, "loss": 3.5873, "step": 1733 }, { "epoch": 0.08, "grad_norm": 0.670638382434845, "learning_rate": 0.0005988129012214, "loss": 3.755, "step": 1734 }, { "epoch": 0.09, "grad_norm": 0.6355201601982117, "learning_rate": 0.000598811532528853, "loss": 3.9034, "step": 1735 }, { "epoch": 0.09, "grad_norm": 0.6599445939064026, "learning_rate": 0.000598810163049294, "loss": 4.0809, "step": 1736 }, { "epoch": 0.09, "grad_norm": 0.5947389602661133, "learning_rate": 0.0005988087927827264, "loss": 3.8277, "step": 1737 }, { "epoch": 0.09, "grad_norm": 0.6837261319160461, "learning_rate": 0.000598807421729154, "loss": 3.8107, "step": 1738 }, { "epoch": 0.09, "grad_norm": 0.6405160427093506, "learning_rate": 0.0005988060498885804, "loss": 3.678, "step": 1739 }, { "epoch": 0.09, "grad_norm": 0.6684130430221558, "learning_rate": 0.0005988046772610091, "loss": 3.8897, "step": 1740 }, { "epoch": 0.09, "grad_norm": 0.6249805688858032, "learning_rate": 0.0005988033038464438, "loss": 3.9512, "step": 1741 }, { "epoch": 0.09, "grad_norm": 0.591820478439331, "learning_rate": 0.0005988019296448882, "loss": 3.6929, "step": 1742 }, { "epoch": 0.09, "grad_norm": 0.6458953022956848, "learning_rate": 0.0005988005546563457, "loss": 3.7441, "step": 1743 }, { "epoch": 0.09, "grad_norm": 0.6687983870506287, "learning_rate": 0.0005987991788808201, "loss": 3.7556, "step": 1744 }, { "epoch": 0.09, "grad_norm": 0.6560901999473572, "learning_rate": 0.000598797802318315, "loss": 3.7612, "step": 1745 }, { "epoch": 0.09, "grad_norm": 0.6090633273124695, "learning_rate": 0.000598796424968834, "loss": 3.8793, "step": 1746 }, { "epoch": 0.09, "grad_norm": 0.647350549697876, "learning_rate": 0.0005987950468323806, "loss": 3.8172, "step": 1747 }, { "epoch": 0.09, "grad_norm": 0.6341491937637329, "learning_rate": 0.0005987936679089586, "loss": 3.9446, "step": 1748 }, { "epoch": 0.09, "grad_norm": 0.5984036326408386, "learning_rate": 0.0005987922881985717, "loss": 3.6023, "step": 1749 }, { "epoch": 0.09, "grad_norm": 0.6060883402824402, "learning_rate": 0.0005987909077012233, "loss": 3.8801, "step": 1750 }, { "epoch": 0.09, "grad_norm": 0.6400834918022156, "learning_rate": 0.0005987895264169172, "loss": 3.8829, "step": 1751 }, { "epoch": 0.09, "grad_norm": 0.6266987919807434, "learning_rate": 0.0005987881443456569, "loss": 3.6213, "step": 1752 }, { "epoch": 0.09, "grad_norm": 0.6001605987548828, "learning_rate": 0.0005987867614874463, "loss": 3.7277, "step": 1753 }, { "epoch": 0.09, "grad_norm": 0.6193922758102417, "learning_rate": 0.0005987853778422887, "loss": 3.8871, "step": 1754 }, { "epoch": 0.09, "grad_norm": 0.6383598446846008, "learning_rate": 0.000598783993410188, "loss": 4.0488, "step": 1755 }, { "epoch": 0.09, "grad_norm": 0.5988031029701233, "learning_rate": 0.0005987826081911478, "loss": 3.8313, "step": 1756 }, { "epoch": 0.09, "grad_norm": 0.6383107304573059, "learning_rate": 0.0005987812221851716, "loss": 3.7355, "step": 1757 }, { "epoch": 0.09, "grad_norm": 0.5527706146240234, "learning_rate": 0.0005987798353922632, "loss": 3.9147, "step": 1758 }, { "epoch": 0.09, "grad_norm": 0.5697839856147766, "learning_rate": 0.0005987784478124263, "loss": 3.7764, "step": 1759 }, { "epoch": 0.09, "grad_norm": 0.6100751757621765, "learning_rate": 0.0005987770594456644, "loss": 3.7297, "step": 1760 }, { "epoch": 0.09, "grad_norm": 0.5448328256607056, "learning_rate": 0.0005987756702919813, "loss": 3.7812, "step": 1761 }, { "epoch": 0.09, "grad_norm": 0.6359660625457764, "learning_rate": 0.0005987742803513805, "loss": 3.583, "step": 1762 }, { "epoch": 0.09, "grad_norm": 0.5905768275260925, "learning_rate": 0.0005987728896238657, "loss": 3.7894, "step": 1763 }, { "epoch": 0.09, "grad_norm": 0.6355024576187134, "learning_rate": 0.0005987714981094407, "loss": 3.7716, "step": 1764 }, { "epoch": 0.09, "grad_norm": 0.5862598419189453, "learning_rate": 0.0005987701058081091, "loss": 3.7433, "step": 1765 }, { "epoch": 0.09, "grad_norm": 0.6261117458343506, "learning_rate": 0.0005987687127198745, "loss": 3.8265, "step": 1766 }, { "epoch": 0.09, "grad_norm": 0.5825438499450684, "learning_rate": 0.0005987673188447406, "loss": 3.598, "step": 1767 }, { "epoch": 0.09, "grad_norm": 0.6495029926300049, "learning_rate": 0.0005987659241827111, "loss": 3.4557, "step": 1768 }, { "epoch": 0.09, "grad_norm": 0.5948395729064941, "learning_rate": 0.0005987645287337896, "loss": 3.6985, "step": 1769 }, { "epoch": 0.09, "grad_norm": 0.6291310787200928, "learning_rate": 0.0005987631324979799, "loss": 3.7719, "step": 1770 }, { "epoch": 0.09, "grad_norm": 0.6354256868362427, "learning_rate": 0.0005987617354752856, "loss": 3.7825, "step": 1771 }, { "epoch": 0.09, "grad_norm": 0.5604045391082764, "learning_rate": 0.0005987603376657103, "loss": 3.9863, "step": 1772 }, { "epoch": 0.09, "grad_norm": 0.6706216931343079, "learning_rate": 0.0005987589390692578, "loss": 3.7109, "step": 1773 }, { "epoch": 0.09, "grad_norm": 0.6647695302963257, "learning_rate": 0.0005987575396859318, "loss": 3.6884, "step": 1774 }, { "epoch": 0.09, "grad_norm": 0.6067073941230774, "learning_rate": 0.000598756139515736, "loss": 3.7603, "step": 1775 }, { "epoch": 0.09, "grad_norm": 0.6076330542564392, "learning_rate": 0.0005987547385586739, "loss": 3.887, "step": 1776 }, { "epoch": 0.09, "grad_norm": 0.6393039226531982, "learning_rate": 0.0005987533368147493, "loss": 3.9307, "step": 1777 }, { "epoch": 0.09, "grad_norm": 0.5665969848632812, "learning_rate": 0.0005987519342839661, "loss": 3.8139, "step": 1778 }, { "epoch": 0.09, "grad_norm": 0.5718681812286377, "learning_rate": 0.0005987505309663275, "loss": 3.8209, "step": 1779 }, { "epoch": 0.09, "grad_norm": 0.6644183993339539, "learning_rate": 0.0005987491268618377, "loss": 3.6834, "step": 1780 }, { "epoch": 0.09, "grad_norm": 0.6793142557144165, "learning_rate": 0.0005987477219705002, "loss": 3.6879, "step": 1781 }, { "epoch": 0.09, "grad_norm": 0.8465895652770996, "learning_rate": 0.0005987463162923187, "loss": 3.7265, "step": 1782 }, { "epoch": 0.09, "grad_norm": 0.6196627020835876, "learning_rate": 0.0005987449098272968, "loss": 4.0914, "step": 1783 }, { "epoch": 0.09, "grad_norm": 0.5777091383934021, "learning_rate": 0.0005987435025754384, "loss": 3.7122, "step": 1784 }, { "epoch": 0.09, "grad_norm": 0.5573776960372925, "learning_rate": 0.000598742094536747, "loss": 4.0411, "step": 1785 }, { "epoch": 0.09, "grad_norm": 0.6396626830101013, "learning_rate": 0.0005987406857112266, "loss": 3.6454, "step": 1786 }, { "epoch": 0.09, "grad_norm": 0.5943922996520996, "learning_rate": 0.0005987392760988805, "loss": 3.7657, "step": 1787 }, { "epoch": 0.09, "grad_norm": 0.6125734448432922, "learning_rate": 0.0005987378656997128, "loss": 3.7891, "step": 1788 }, { "epoch": 0.09, "grad_norm": 0.6283426880836487, "learning_rate": 0.000598736454513727, "loss": 3.7106, "step": 1789 }, { "epoch": 0.09, "grad_norm": 0.6084054708480835, "learning_rate": 0.0005987350425409268, "loss": 3.9321, "step": 1790 }, { "epoch": 0.09, "grad_norm": 0.5988473296165466, "learning_rate": 0.0005987336297813161, "loss": 4.0916, "step": 1791 }, { "epoch": 0.09, "grad_norm": 0.650684118270874, "learning_rate": 0.0005987322162348985, "loss": 4.0522, "step": 1792 }, { "epoch": 0.09, "grad_norm": 0.5723590850830078, "learning_rate": 0.0005987308019016778, "loss": 3.8183, "step": 1793 }, { "epoch": 0.09, "grad_norm": 0.5800668001174927, "learning_rate": 0.0005987293867816575, "loss": 3.7359, "step": 1794 }, { "epoch": 0.09, "grad_norm": 0.5976575016975403, "learning_rate": 0.0005987279708748416, "loss": 3.9004, "step": 1795 }, { "epoch": 0.09, "grad_norm": 0.6423118114471436, "learning_rate": 0.0005987265541812336, "loss": 3.5266, "step": 1796 }, { "epoch": 0.09, "grad_norm": 0.5797125697135925, "learning_rate": 0.0005987251367008374, "loss": 3.9024, "step": 1797 }, { "epoch": 0.09, "grad_norm": 0.5562116503715515, "learning_rate": 0.0005987237184336568, "loss": 3.6905, "step": 1798 }, { "epoch": 0.09, "grad_norm": 0.6045980453491211, "learning_rate": 0.0005987222993796953, "loss": 3.6083, "step": 1799 }, { "epoch": 0.09, "grad_norm": 0.696354866027832, "learning_rate": 0.0005987208795389567, "loss": 3.8496, "step": 1800 }, { "epoch": 0.09, "grad_norm": 0.6290586590766907, "learning_rate": 0.0005987194589114449, "loss": 3.5814, "step": 1801 }, { "epoch": 0.09, "grad_norm": 0.5684269070625305, "learning_rate": 0.0005987180374971635, "loss": 3.8016, "step": 1802 }, { "epoch": 0.09, "grad_norm": 0.5839782953262329, "learning_rate": 0.0005987166152961163, "loss": 3.5221, "step": 1803 }, { "epoch": 0.09, "grad_norm": 0.5923112630844116, "learning_rate": 0.000598715192308307, "loss": 3.5328, "step": 1804 }, { "epoch": 0.09, "grad_norm": 0.5886315703392029, "learning_rate": 0.0005987137685337393, "loss": 4.0307, "step": 1805 }, { "epoch": 0.09, "grad_norm": 0.6204468607902527, "learning_rate": 0.0005987123439724172, "loss": 3.8404, "step": 1806 }, { "epoch": 0.09, "grad_norm": 0.5841030478477478, "learning_rate": 0.0005987109186243441, "loss": 3.3384, "step": 1807 }, { "epoch": 0.09, "grad_norm": 0.5855621099472046, "learning_rate": 0.0005987094924895241, "loss": 3.7011, "step": 1808 }, { "epoch": 0.09, "grad_norm": 0.6314960718154907, "learning_rate": 0.0005987080655679607, "loss": 3.7732, "step": 1809 }, { "epoch": 0.09, "grad_norm": 0.5664706826210022, "learning_rate": 0.0005987066378596577, "loss": 3.8319, "step": 1810 }, { "epoch": 0.09, "grad_norm": 0.6282777786254883, "learning_rate": 0.0005987052093646189, "loss": 3.6498, "step": 1811 }, { "epoch": 0.09, "grad_norm": 0.5864231586456299, "learning_rate": 0.0005987037800828482, "loss": 3.8712, "step": 1812 }, { "epoch": 0.09, "grad_norm": 0.6137101054191589, "learning_rate": 0.0005987023500143491, "loss": 3.6108, "step": 1813 }, { "epoch": 0.09, "grad_norm": 0.6736273169517517, "learning_rate": 0.0005987009191591254, "loss": 3.6828, "step": 1814 }, { "epoch": 0.09, "grad_norm": 0.6778322458267212, "learning_rate": 0.0005986994875171811, "loss": 3.6604, "step": 1815 }, { "epoch": 0.09, "grad_norm": 0.6052943468093872, "learning_rate": 0.0005986980550885198, "loss": 3.9966, "step": 1816 }, { "epoch": 0.09, "grad_norm": 0.6130955815315247, "learning_rate": 0.0005986966218731454, "loss": 3.6581, "step": 1817 }, { "epoch": 0.09, "grad_norm": 0.61712247133255, "learning_rate": 0.0005986951878710616, "loss": 3.704, "step": 1818 }, { "epoch": 0.09, "grad_norm": 0.6492055654525757, "learning_rate": 0.0005986937530822721, "loss": 3.7532, "step": 1819 }, { "epoch": 0.09, "grad_norm": 0.6408119797706604, "learning_rate": 0.0005986923175067806, "loss": 3.8313, "step": 1820 }, { "epoch": 0.09, "grad_norm": 0.577404797077179, "learning_rate": 0.0005986908811445911, "loss": 4.156, "step": 1821 }, { "epoch": 0.09, "grad_norm": 0.6154677271842957, "learning_rate": 0.0005986894439957074, "loss": 3.8513, "step": 1822 }, { "epoch": 0.09, "grad_norm": 0.5824177265167236, "learning_rate": 0.0005986880060601332, "loss": 3.7039, "step": 1823 }, { "epoch": 0.09, "grad_norm": 0.6346336603164673, "learning_rate": 0.0005986865673378722, "loss": 3.801, "step": 1824 }, { "epoch": 0.09, "grad_norm": 0.6315222978591919, "learning_rate": 0.0005986851278289283, "loss": 3.5617, "step": 1825 }, { "epoch": 0.09, "grad_norm": 0.628288209438324, "learning_rate": 0.0005986836875333052, "loss": 3.6983, "step": 1826 }, { "epoch": 0.09, "grad_norm": 0.6149936318397522, "learning_rate": 0.0005986822464510067, "loss": 3.8962, "step": 1827 }, { "epoch": 0.09, "grad_norm": 0.5607312917709351, "learning_rate": 0.0005986808045820367, "loss": 3.8976, "step": 1828 }, { "epoch": 0.09, "grad_norm": 0.6367286443710327, "learning_rate": 0.000598679361926399, "loss": 3.7893, "step": 1829 }, { "epoch": 0.09, "grad_norm": 0.6114875078201294, "learning_rate": 0.0005986779184840973, "loss": 3.5419, "step": 1830 }, { "epoch": 0.09, "grad_norm": 0.6330634355545044, "learning_rate": 0.0005986764742551355, "loss": 3.9017, "step": 1831 }, { "epoch": 0.09, "grad_norm": 0.5696288347244263, "learning_rate": 0.0005986750292395173, "loss": 3.6879, "step": 1832 }, { "epoch": 0.09, "grad_norm": 0.6309328079223633, "learning_rate": 0.0005986735834372465, "loss": 3.8622, "step": 1833 }, { "epoch": 0.09, "grad_norm": 0.5509409308433533, "learning_rate": 0.000598672136848327, "loss": 3.8122, "step": 1834 }, { "epoch": 0.09, "grad_norm": 0.6046048402786255, "learning_rate": 0.0005986706894727627, "loss": 3.7887, "step": 1835 }, { "epoch": 0.09, "grad_norm": 0.6264524459838867, "learning_rate": 0.0005986692413105571, "loss": 3.9682, "step": 1836 }, { "epoch": 0.09, "grad_norm": 0.6554989218711853, "learning_rate": 0.0005986677923617142, "loss": 4.0354, "step": 1837 }, { "epoch": 0.09, "grad_norm": 0.5879736542701721, "learning_rate": 0.0005986663426262379, "loss": 3.8353, "step": 1838 }, { "epoch": 0.09, "grad_norm": 0.6094560623168945, "learning_rate": 0.0005986648921041319, "loss": 3.7207, "step": 1839 }, { "epoch": 0.09, "grad_norm": 0.5921733975410461, "learning_rate": 0.0005986634407954001, "loss": 3.7837, "step": 1840 }, { "epoch": 0.09, "grad_norm": 0.5805111527442932, "learning_rate": 0.0005986619887000463, "loss": 3.7929, "step": 1841 }, { "epoch": 0.09, "grad_norm": 0.6623033285140991, "learning_rate": 0.0005986605358180743, "loss": 3.669, "step": 1842 }, { "epoch": 0.09, "grad_norm": 0.5905426144599915, "learning_rate": 0.0005986590821494878, "loss": 3.8428, "step": 1843 }, { "epoch": 0.09, "grad_norm": 0.6057484149932861, "learning_rate": 0.0005986576276942908, "loss": 3.5711, "step": 1844 }, { "epoch": 0.09, "grad_norm": 0.5728998184204102, "learning_rate": 0.0005986561724524871, "loss": 3.8205, "step": 1845 }, { "epoch": 0.09, "grad_norm": 0.6299675703048706, "learning_rate": 0.0005986547164240805, "loss": 3.7557, "step": 1846 }, { "epoch": 0.09, "grad_norm": 0.6935710906982422, "learning_rate": 0.000598653259609075, "loss": 3.6427, "step": 1847 }, { "epoch": 0.09, "grad_norm": 0.6175175309181213, "learning_rate": 0.0005986518020074742, "loss": 3.7941, "step": 1848 }, { "epoch": 0.09, "grad_norm": 0.5965023636817932, "learning_rate": 0.000598650343619282, "loss": 3.6033, "step": 1849 }, { "epoch": 0.09, "grad_norm": 0.6782153844833374, "learning_rate": 0.0005986488844445022, "loss": 3.6862, "step": 1850 }, { "epoch": 0.09, "grad_norm": 0.6123353838920593, "learning_rate": 0.0005986474244831389, "loss": 3.8051, "step": 1851 }, { "epoch": 0.09, "grad_norm": 0.6483194828033447, "learning_rate": 0.0005986459637351957, "loss": 3.9932, "step": 1852 }, { "epoch": 0.09, "grad_norm": 0.5647997856140137, "learning_rate": 0.0005986445022006764, "loss": 3.6815, "step": 1853 }, { "epoch": 0.09, "grad_norm": 0.6573106646537781, "learning_rate": 0.0005986430398795849, "loss": 3.9537, "step": 1854 }, { "epoch": 0.09, "grad_norm": 0.5951365828514099, "learning_rate": 0.0005986415767719254, "loss": 3.7697, "step": 1855 }, { "epoch": 0.09, "grad_norm": 0.6256296038627625, "learning_rate": 0.0005986401128777012, "loss": 3.7458, "step": 1856 }, { "epoch": 0.09, "grad_norm": 0.7118420600891113, "learning_rate": 0.0005986386481969165, "loss": 3.4327, "step": 1857 }, { "epoch": 0.09, "grad_norm": 0.5771075487136841, "learning_rate": 0.0005986371827295751, "loss": 3.6589, "step": 1858 }, { "epoch": 0.09, "grad_norm": 0.6408962607383728, "learning_rate": 0.0005986357164756809, "loss": 3.9137, "step": 1859 }, { "epoch": 0.09, "grad_norm": 0.6218885183334351, "learning_rate": 0.0005986342494352376, "loss": 3.7506, "step": 1860 }, { "epoch": 0.09, "grad_norm": 0.6180911064147949, "learning_rate": 0.0005986327816082491, "loss": 3.6067, "step": 1861 }, { "epoch": 0.09, "grad_norm": 0.6280100345611572, "learning_rate": 0.0005986313129947194, "loss": 3.9251, "step": 1862 }, { "epoch": 0.09, "grad_norm": 0.5840473175048828, "learning_rate": 0.0005986298435946523, "loss": 3.654, "step": 1863 }, { "epoch": 0.09, "grad_norm": 0.5487592220306396, "learning_rate": 0.0005986283734080517, "loss": 3.7433, "step": 1864 }, { "epoch": 0.09, "grad_norm": 0.6708935499191284, "learning_rate": 0.0005986269024349213, "loss": 3.6582, "step": 1865 }, { "epoch": 0.09, "grad_norm": 0.5990443825721741, "learning_rate": 0.0005986254306752653, "loss": 3.7431, "step": 1866 }, { "epoch": 0.09, "grad_norm": 0.6027263402938843, "learning_rate": 0.0005986239581290873, "loss": 3.6634, "step": 1867 }, { "epoch": 0.09, "grad_norm": 0.6863844394683838, "learning_rate": 0.0005986224847963912, "loss": 3.727, "step": 1868 }, { "epoch": 0.09, "grad_norm": 0.6209395527839661, "learning_rate": 0.0005986210106771811, "loss": 3.853, "step": 1869 }, { "epoch": 0.09, "grad_norm": 0.6714705228805542, "learning_rate": 0.0005986195357714605, "loss": 3.7967, "step": 1870 }, { "epoch": 0.09, "grad_norm": 0.5347830057144165, "learning_rate": 0.0005986180600792337, "loss": 3.8287, "step": 1871 }, { "epoch": 0.09, "grad_norm": 0.6196743249893188, "learning_rate": 0.0005986165836005044, "loss": 3.8296, "step": 1872 }, { "epoch": 0.09, "grad_norm": 0.5981053113937378, "learning_rate": 0.0005986151063352763, "loss": 3.7604, "step": 1873 }, { "epoch": 0.09, "grad_norm": 0.5930126905441284, "learning_rate": 0.0005986136282835537, "loss": 3.7541, "step": 1874 }, { "epoch": 0.09, "grad_norm": 0.604667067527771, "learning_rate": 0.0005986121494453401, "loss": 3.658, "step": 1875 }, { "epoch": 0.09, "grad_norm": 0.6272248029708862, "learning_rate": 0.0005986106698206398, "loss": 3.6897, "step": 1876 }, { "epoch": 0.09, "grad_norm": 0.572104811668396, "learning_rate": 0.0005986091894094563, "loss": 3.752, "step": 1877 }, { "epoch": 0.09, "grad_norm": 0.6113089323043823, "learning_rate": 0.0005986077082117936, "loss": 3.6702, "step": 1878 }, { "epoch": 0.09, "grad_norm": 0.6149545311927795, "learning_rate": 0.0005986062262276559, "loss": 3.8337, "step": 1879 }, { "epoch": 0.09, "grad_norm": 0.5942882299423218, "learning_rate": 0.0005986047434570466, "loss": 3.546, "step": 1880 }, { "epoch": 0.09, "grad_norm": 0.6164019703865051, "learning_rate": 0.00059860325989997, "loss": 3.713, "step": 1881 }, { "epoch": 0.09, "grad_norm": 0.6244223713874817, "learning_rate": 0.0005986017755564299, "loss": 3.5671, "step": 1882 }, { "epoch": 0.09, "grad_norm": 0.6063665747642517, "learning_rate": 0.0005986002904264301, "loss": 3.8345, "step": 1883 }, { "epoch": 0.09, "grad_norm": 0.5902014374732971, "learning_rate": 0.0005985988045099747, "loss": 3.7012, "step": 1884 }, { "epoch": 0.09, "grad_norm": 0.5616203546524048, "learning_rate": 0.0005985973178070675, "loss": 3.5898, "step": 1885 }, { "epoch": 0.09, "grad_norm": 0.610362708568573, "learning_rate": 0.0005985958303177124, "loss": 3.7553, "step": 1886 }, { "epoch": 0.09, "grad_norm": 0.5817036032676697, "learning_rate": 0.0005985943420419134, "loss": 3.6755, "step": 1887 }, { "epoch": 0.09, "grad_norm": 0.6104779839515686, "learning_rate": 0.0005985928529796743, "loss": 3.6263, "step": 1888 }, { "epoch": 0.09, "grad_norm": 0.5836775898933411, "learning_rate": 0.000598591363130999, "loss": 3.797, "step": 1889 }, { "epoch": 0.09, "grad_norm": 0.60204017162323, "learning_rate": 0.0005985898724958917, "loss": 3.6424, "step": 1890 }, { "epoch": 0.09, "grad_norm": 0.5760531425476074, "learning_rate": 0.000598588381074356, "loss": 3.6849, "step": 1891 }, { "epoch": 0.09, "grad_norm": 0.6347927451133728, "learning_rate": 0.0005985868888663961, "loss": 3.6574, "step": 1892 }, { "epoch": 0.09, "grad_norm": 0.6068776845932007, "learning_rate": 0.0005985853958720157, "loss": 4.0513, "step": 1893 }, { "epoch": 0.09, "grad_norm": 0.6126516461372375, "learning_rate": 0.0005985839020912189, "loss": 3.9252, "step": 1894 }, { "epoch": 0.09, "grad_norm": 0.5863696932792664, "learning_rate": 0.0005985824075240096, "loss": 3.7798, "step": 1895 }, { "epoch": 0.09, "grad_norm": 0.6498505473136902, "learning_rate": 0.0005985809121703916, "loss": 3.8625, "step": 1896 }, { "epoch": 0.09, "grad_norm": 0.5926463007926941, "learning_rate": 0.000598579416030369, "loss": 3.5835, "step": 1897 }, { "epoch": 0.09, "grad_norm": 0.6369209885597229, "learning_rate": 0.0005985779191039457, "loss": 3.7343, "step": 1898 }, { "epoch": 0.09, "grad_norm": 0.5705779194831848, "learning_rate": 0.0005985764213911255, "loss": 3.7468, "step": 1899 }, { "epoch": 0.09, "grad_norm": 0.5890957713127136, "learning_rate": 0.0005985749228919126, "loss": 3.7234, "step": 1900 }, { "epoch": 0.09, "grad_norm": 0.6325515508651733, "learning_rate": 0.0005985734236063108, "loss": 3.9433, "step": 1901 }, { "epoch": 0.09, "grad_norm": 0.6154495477676392, "learning_rate": 0.000598571923534324, "loss": 3.8898, "step": 1902 }, { "epoch": 0.09, "grad_norm": 0.5961557626724243, "learning_rate": 0.0005985704226759563, "loss": 3.7667, "step": 1903 }, { "epoch": 0.09, "grad_norm": 0.5520904064178467, "learning_rate": 0.0005985689210312114, "loss": 3.959, "step": 1904 }, { "epoch": 0.09, "grad_norm": 0.5621737241744995, "learning_rate": 0.0005985674186000935, "loss": 3.7459, "step": 1905 }, { "epoch": 0.09, "grad_norm": 0.6092246174812317, "learning_rate": 0.0005985659153826065, "loss": 3.7063, "step": 1906 }, { "epoch": 0.09, "grad_norm": 0.6204909086227417, "learning_rate": 0.0005985644113787544, "loss": 3.74, "step": 1907 }, { "epoch": 0.09, "grad_norm": 0.618402361869812, "learning_rate": 0.0005985629065885412, "loss": 3.6501, "step": 1908 }, { "epoch": 0.09, "grad_norm": 0.5384846329689026, "learning_rate": 0.0005985614010119705, "loss": 3.7754, "step": 1909 }, { "epoch": 0.09, "grad_norm": 0.5484468340873718, "learning_rate": 0.0005985598946490467, "loss": 3.6136, "step": 1910 }, { "epoch": 0.09, "grad_norm": 0.5607725977897644, "learning_rate": 0.0005985583874997736, "loss": 3.765, "step": 1911 }, { "epoch": 0.09, "grad_norm": 0.5928760170936584, "learning_rate": 0.0005985568795641551, "loss": 3.6978, "step": 1912 }, { "epoch": 0.09, "grad_norm": 0.6223922967910767, "learning_rate": 0.0005985553708421951, "loss": 3.8356, "step": 1913 }, { "epoch": 0.09, "grad_norm": 0.6009296178817749, "learning_rate": 0.0005985538613338979, "loss": 3.8473, "step": 1914 }, { "epoch": 0.09, "grad_norm": 0.5862912535667419, "learning_rate": 0.0005985523510392673, "loss": 3.8365, "step": 1915 }, { "epoch": 0.09, "grad_norm": 0.5991998910903931, "learning_rate": 0.0005985508399583072, "loss": 3.6978, "step": 1916 }, { "epoch": 0.09, "grad_norm": 0.6147690415382385, "learning_rate": 0.0005985493280910217, "loss": 3.631, "step": 1917 }, { "epoch": 0.09, "grad_norm": 0.6772075295448303, "learning_rate": 0.0005985478154374147, "loss": 3.7764, "step": 1918 }, { "epoch": 0.09, "grad_norm": 0.6254436373710632, "learning_rate": 0.0005985463019974901, "loss": 3.6895, "step": 1919 }, { "epoch": 0.09, "grad_norm": 0.5893893837928772, "learning_rate": 0.0005985447877712521, "loss": 3.9327, "step": 1920 }, { "epoch": 0.09, "grad_norm": 0.6510739922523499, "learning_rate": 0.0005985432727587045, "loss": 3.7668, "step": 1921 }, { "epoch": 0.09, "grad_norm": 0.6356686949729919, "learning_rate": 0.0005985417569598515, "loss": 3.6375, "step": 1922 }, { "epoch": 0.09, "grad_norm": 0.7121893763542175, "learning_rate": 0.0005985402403746969, "loss": 3.8785, "step": 1923 }, { "epoch": 0.09, "grad_norm": 0.6303704977035522, "learning_rate": 0.0005985387230032449, "loss": 3.7847, "step": 1924 }, { "epoch": 0.09, "grad_norm": 0.6012903451919556, "learning_rate": 0.0005985372048454992, "loss": 3.6605, "step": 1925 }, { "epoch": 0.09, "grad_norm": 0.631798267364502, "learning_rate": 0.000598535685901464, "loss": 3.4997, "step": 1926 }, { "epoch": 0.09, "grad_norm": 0.6432697176933289, "learning_rate": 0.0005985341661711432, "loss": 3.6908, "step": 1927 }, { "epoch": 0.09, "grad_norm": 0.6546198129653931, "learning_rate": 0.0005985326456545409, "loss": 3.5806, "step": 1928 }, { "epoch": 0.09, "grad_norm": 0.6076374053955078, "learning_rate": 0.0005985311243516611, "loss": 3.6955, "step": 1929 }, { "epoch": 0.09, "grad_norm": 0.673281192779541, "learning_rate": 0.0005985296022625078, "loss": 3.8208, "step": 1930 }, { "epoch": 0.09, "grad_norm": 0.614531397819519, "learning_rate": 0.000598528079387085, "loss": 3.7834, "step": 1931 }, { "epoch": 0.09, "grad_norm": 0.6124190092086792, "learning_rate": 0.0005985265557253965, "loss": 4.1048, "step": 1932 }, { "epoch": 0.09, "grad_norm": 0.5843042135238647, "learning_rate": 0.0005985250312774466, "loss": 3.6868, "step": 1933 }, { "epoch": 0.09, "grad_norm": 0.5952600836753845, "learning_rate": 0.0005985235060432393, "loss": 3.7835, "step": 1934 }, { "epoch": 0.09, "grad_norm": 0.5896327495574951, "learning_rate": 0.0005985219800227785, "loss": 3.6072, "step": 1935 }, { "epoch": 0.09, "grad_norm": 0.5987282395362854, "learning_rate": 0.0005985204532160683, "loss": 3.8684, "step": 1936 }, { "epoch": 0.09, "grad_norm": 0.661604642868042, "learning_rate": 0.0005985189256231125, "loss": 3.6414, "step": 1937 }, { "epoch": 0.09, "grad_norm": 0.6821339726448059, "learning_rate": 0.0005985173972439154, "loss": 3.9158, "step": 1938 }, { "epoch": 0.1, "grad_norm": 0.5843617916107178, "learning_rate": 0.000598515868078481, "loss": 3.8082, "step": 1939 }, { "epoch": 0.1, "grad_norm": 0.5790466070175171, "learning_rate": 0.0005985143381268132, "loss": 3.8143, "step": 1940 }, { "epoch": 0.1, "grad_norm": 0.639920175075531, "learning_rate": 0.0005985128073889161, "loss": 3.7693, "step": 1941 }, { "epoch": 0.1, "grad_norm": 0.615778386592865, "learning_rate": 0.0005985112758647937, "loss": 3.6194, "step": 1942 }, { "epoch": 0.1, "grad_norm": 0.6140244007110596, "learning_rate": 0.00059850974355445, "loss": 3.5417, "step": 1943 }, { "epoch": 0.1, "grad_norm": 0.6662708520889282, "learning_rate": 0.0005985082104578892, "loss": 3.5064, "step": 1944 }, { "epoch": 0.1, "grad_norm": 0.6774072051048279, "learning_rate": 0.0005985066765751151, "loss": 3.8021, "step": 1945 }, { "epoch": 0.1, "grad_norm": 0.6490411758422852, "learning_rate": 0.000598505141906132, "loss": 3.5669, "step": 1946 }, { "epoch": 0.1, "grad_norm": 0.6608024835586548, "learning_rate": 0.0005985036064509437, "loss": 3.6388, "step": 1947 }, { "epoch": 0.1, "grad_norm": 0.5928901433944702, "learning_rate": 0.0005985020702095543, "loss": 3.6946, "step": 1948 }, { "epoch": 0.1, "grad_norm": 0.6092929840087891, "learning_rate": 0.000598500533181968, "loss": 3.5945, "step": 1949 }, { "epoch": 0.1, "grad_norm": 0.6284611821174622, "learning_rate": 0.0005984989953681887, "loss": 3.932, "step": 1950 }, { "epoch": 0.1, "grad_norm": 0.5998930335044861, "learning_rate": 0.0005984974567682205, "loss": 3.8786, "step": 1951 }, { "epoch": 0.1, "grad_norm": 0.6137663125991821, "learning_rate": 0.0005984959173820674, "loss": 3.6663, "step": 1952 }, { "epoch": 0.1, "grad_norm": 0.6030510663986206, "learning_rate": 0.0005984943772097337, "loss": 3.7237, "step": 1953 }, { "epoch": 0.1, "grad_norm": 0.5598939061164856, "learning_rate": 0.0005984928362512231, "loss": 3.652, "step": 1954 }, { "epoch": 0.1, "grad_norm": 0.6012648940086365, "learning_rate": 0.0005984912945065397, "loss": 3.7399, "step": 1955 }, { "epoch": 0.1, "grad_norm": 0.5890420079231262, "learning_rate": 0.0005984897519756879, "loss": 3.4773, "step": 1956 }, { "epoch": 0.1, "grad_norm": 0.6079639792442322, "learning_rate": 0.0005984882086586714, "loss": 3.8788, "step": 1957 }, { "epoch": 0.1, "grad_norm": 0.6101313233375549, "learning_rate": 0.0005984866645554945, "loss": 3.924, "step": 1958 }, { "epoch": 0.1, "grad_norm": 0.5866898894309998, "learning_rate": 0.000598485119666161, "loss": 3.6906, "step": 1959 }, { "epoch": 0.1, "grad_norm": 0.7030449509620667, "learning_rate": 0.0005984835739906753, "loss": 3.7014, "step": 1960 }, { "epoch": 0.1, "grad_norm": 0.6568990349769592, "learning_rate": 0.0005984820275290413, "loss": 3.7585, "step": 1961 }, { "epoch": 0.1, "grad_norm": 0.5689438581466675, "learning_rate": 0.000598480480281263, "loss": 3.6166, "step": 1962 }, { "epoch": 0.1, "grad_norm": 0.6452824473381042, "learning_rate": 0.0005984789322473446, "loss": 3.7331, "step": 1963 }, { "epoch": 0.1, "grad_norm": 0.5881951451301575, "learning_rate": 0.0005984773834272902, "loss": 3.7973, "step": 1964 }, { "epoch": 0.1, "grad_norm": 0.5840572714805603, "learning_rate": 0.0005984758338211037, "loss": 3.763, "step": 1965 }, { "epoch": 0.1, "grad_norm": 0.5961487889289856, "learning_rate": 0.0005984742834287894, "loss": 3.6024, "step": 1966 }, { "epoch": 0.1, "grad_norm": 0.5628345608711243, "learning_rate": 0.0005984727322503512, "loss": 3.8128, "step": 1967 }, { "epoch": 0.1, "grad_norm": 0.5680345296859741, "learning_rate": 0.0005984711802857933, "loss": 3.8158, "step": 1968 }, { "epoch": 0.1, "grad_norm": 0.5904361605644226, "learning_rate": 0.0005984696275351198, "loss": 3.6442, "step": 1969 }, { "epoch": 0.1, "grad_norm": 0.6462759971618652, "learning_rate": 0.0005984680739983346, "loss": 3.826, "step": 1970 }, { "epoch": 0.1, "grad_norm": 0.5882953405380249, "learning_rate": 0.0005984665196754421, "loss": 3.7568, "step": 1971 }, { "epoch": 0.1, "grad_norm": 0.5906187891960144, "learning_rate": 0.0005984649645664461, "loss": 3.7037, "step": 1972 }, { "epoch": 0.1, "grad_norm": 0.5672358870506287, "learning_rate": 0.0005984634086713509, "loss": 3.8733, "step": 1973 }, { "epoch": 0.1, "grad_norm": 0.6111389994621277, "learning_rate": 0.0005984618519901605, "loss": 3.617, "step": 1974 }, { "epoch": 0.1, "grad_norm": 0.6110372543334961, "learning_rate": 0.000598460294522879, "loss": 3.7275, "step": 1975 }, { "epoch": 0.1, "grad_norm": 0.5851089954376221, "learning_rate": 0.0005984587362695105, "loss": 3.6567, "step": 1976 }, { "epoch": 0.1, "grad_norm": 0.6242319345474243, "learning_rate": 0.0005984571772300592, "loss": 3.698, "step": 1977 }, { "epoch": 0.1, "grad_norm": 0.5997954607009888, "learning_rate": 0.0005984556174045292, "loss": 3.5633, "step": 1978 }, { "epoch": 0.1, "grad_norm": 0.6077648401260376, "learning_rate": 0.0005984540567929244, "loss": 3.4955, "step": 1979 }, { "epoch": 0.1, "grad_norm": 0.6089391112327576, "learning_rate": 0.0005984524953952492, "loss": 3.8301, "step": 1980 }, { "epoch": 0.1, "grad_norm": 0.6587269902229309, "learning_rate": 0.0005984509332115074, "loss": 3.6826, "step": 1981 }, { "epoch": 0.1, "grad_norm": 0.6356779336929321, "learning_rate": 0.0005984493702417033, "loss": 3.7191, "step": 1982 }, { "epoch": 0.1, "grad_norm": 0.6114301085472107, "learning_rate": 0.0005984478064858411, "loss": 3.8376, "step": 1983 }, { "epoch": 0.1, "grad_norm": 0.6225729584693909, "learning_rate": 0.0005984462419439248, "loss": 3.5604, "step": 1984 }, { "epoch": 0.1, "grad_norm": 0.5687344074249268, "learning_rate": 0.0005984446766159585, "loss": 3.8095, "step": 1985 }, { "epoch": 0.1, "grad_norm": 0.6089506149291992, "learning_rate": 0.0005984431105019463, "loss": 3.5823, "step": 1986 }, { "epoch": 0.1, "grad_norm": 0.6365668177604675, "learning_rate": 0.0005984415436018925, "loss": 3.8444, "step": 1987 }, { "epoch": 0.1, "grad_norm": 0.6325536370277405, "learning_rate": 0.000598439975915801, "loss": 3.8402, "step": 1988 }, { "epoch": 0.1, "grad_norm": 0.6012512445449829, "learning_rate": 0.0005984384074436761, "loss": 3.7274, "step": 1989 }, { "epoch": 0.1, "grad_norm": 0.5962508916854858, "learning_rate": 0.0005984368381855219, "loss": 3.6733, "step": 1990 }, { "epoch": 0.1, "grad_norm": 0.618022084236145, "learning_rate": 0.0005984352681413424, "loss": 3.7132, "step": 1991 }, { "epoch": 0.1, "grad_norm": 0.5967841148376465, "learning_rate": 0.0005984336973111419, "loss": 3.9452, "step": 1992 }, { "epoch": 0.1, "grad_norm": 0.590883195400238, "learning_rate": 0.0005984321256949245, "loss": 3.5217, "step": 1993 }, { "epoch": 0.1, "grad_norm": 0.5896095633506775, "learning_rate": 0.0005984305532926943, "loss": 3.7342, "step": 1994 }, { "epoch": 0.1, "grad_norm": 0.5817958116531372, "learning_rate": 0.0005984289801044554, "loss": 3.6573, "step": 1995 }, { "epoch": 0.1, "grad_norm": 0.586776077747345, "learning_rate": 0.000598427406130212, "loss": 3.7158, "step": 1996 }, { "epoch": 0.1, "grad_norm": 0.61875981092453, "learning_rate": 0.0005984258313699684, "loss": 3.688, "step": 1997 }, { "epoch": 0.1, "grad_norm": 0.5944945216178894, "learning_rate": 0.0005984242558237285, "loss": 3.8244, "step": 1998 }, { "epoch": 0.1, "grad_norm": 0.62650465965271, "learning_rate": 0.0005984226794914965, "loss": 3.7835, "step": 1999 }, { "epoch": 0.1, "grad_norm": 0.6252263188362122, "learning_rate": 0.0005984211023732767, "loss": 3.6683, "step": 2000 }, { "epoch": 0.1, "grad_norm": 0.6127182841300964, "learning_rate": 0.000598419524469073, "loss": 3.7427, "step": 2001 }, { "epoch": 0.1, "grad_norm": 0.5782539248466492, "learning_rate": 0.0005984179457788897, "loss": 3.529, "step": 2002 }, { "epoch": 0.1, "grad_norm": 0.6279388666152954, "learning_rate": 0.0005984163663027311, "loss": 3.4904, "step": 2003 }, { "epoch": 0.1, "grad_norm": 0.6365188360214233, "learning_rate": 0.0005984147860406012, "loss": 3.75, "step": 2004 }, { "epoch": 0.1, "grad_norm": 0.6217809319496155, "learning_rate": 0.0005984132049925041, "loss": 3.6439, "step": 2005 }, { "epoch": 0.1, "grad_norm": 0.6590561270713806, "learning_rate": 0.0005984116231584441, "loss": 3.7347, "step": 2006 }, { "epoch": 0.1, "grad_norm": 0.5828232169151306, "learning_rate": 0.0005984100405384253, "loss": 3.7325, "step": 2007 }, { "epoch": 0.1, "grad_norm": 0.5944784879684448, "learning_rate": 0.0005984084571324519, "loss": 3.7656, "step": 2008 }, { "epoch": 0.1, "grad_norm": 0.6109013557434082, "learning_rate": 0.0005984068729405281, "loss": 4.0064, "step": 2009 }, { "epoch": 0.1, "grad_norm": 0.6408974528312683, "learning_rate": 0.000598405287962658, "loss": 3.7111, "step": 2010 }, { "epoch": 0.1, "grad_norm": 0.6209164261817932, "learning_rate": 0.0005984037021988458, "loss": 3.8332, "step": 2011 }, { "epoch": 0.1, "grad_norm": 0.6082508563995361, "learning_rate": 0.0005984021156490956, "loss": 3.6786, "step": 2012 }, { "epoch": 0.1, "grad_norm": 0.6087662577629089, "learning_rate": 0.0005984005283134117, "loss": 3.8417, "step": 2013 }, { "epoch": 0.1, "grad_norm": 0.6204620599746704, "learning_rate": 0.0005983989401917982, "loss": 3.5133, "step": 2014 }, { "epoch": 0.1, "grad_norm": 0.5919209122657776, "learning_rate": 0.0005983973512842595, "loss": 3.7543, "step": 2015 }, { "epoch": 0.1, "grad_norm": 0.6268076300621033, "learning_rate": 0.0005983957615907995, "loss": 3.7173, "step": 2016 }, { "epoch": 0.1, "grad_norm": 0.6411733031272888, "learning_rate": 0.0005983941711114224, "loss": 3.8086, "step": 2017 }, { "epoch": 0.1, "grad_norm": 0.5614747405052185, "learning_rate": 0.0005983925798461325, "loss": 3.7973, "step": 2018 }, { "epoch": 0.1, "grad_norm": 0.6606065630912781, "learning_rate": 0.000598390987794934, "loss": 3.6554, "step": 2019 }, { "epoch": 0.1, "grad_norm": 0.5416567325592041, "learning_rate": 0.0005983893949578311, "loss": 3.9936, "step": 2020 }, { "epoch": 0.1, "grad_norm": 0.7004711627960205, "learning_rate": 0.000598387801334828, "loss": 3.8626, "step": 2021 }, { "epoch": 0.1, "grad_norm": 0.572654664516449, "learning_rate": 0.0005983862069259288, "loss": 3.517, "step": 2022 }, { "epoch": 0.1, "grad_norm": 0.6083114147186279, "learning_rate": 0.0005983846117311377, "loss": 3.6627, "step": 2023 }, { "epoch": 0.1, "grad_norm": 0.639316976070404, "learning_rate": 0.0005983830157504591, "loss": 3.6865, "step": 2024 }, { "epoch": 0.1, "grad_norm": 0.715901792049408, "learning_rate": 0.0005983814189838969, "loss": 4.052, "step": 2025 }, { "epoch": 0.1, "grad_norm": 0.581177830696106, "learning_rate": 0.0005983798214314555, "loss": 3.903, "step": 2026 }, { "epoch": 0.1, "grad_norm": 0.5649012327194214, "learning_rate": 0.0005983782230931391, "loss": 3.772, "step": 2027 }, { "epoch": 0.1, "grad_norm": 0.6346585154533386, "learning_rate": 0.000598376623968952, "loss": 3.5737, "step": 2028 }, { "epoch": 0.1, "grad_norm": 0.70558762550354, "learning_rate": 0.0005983750240588982, "loss": 3.7745, "step": 2029 }, { "epoch": 0.1, "grad_norm": 0.6112974286079407, "learning_rate": 0.000598373423362982, "loss": 3.7495, "step": 2030 }, { "epoch": 0.1, "grad_norm": 0.5762995481491089, "learning_rate": 0.0005983718218812075, "loss": 3.8097, "step": 2031 }, { "epoch": 0.1, "grad_norm": 0.5994111895561218, "learning_rate": 0.0005983702196135793, "loss": 3.5908, "step": 2032 }, { "epoch": 0.1, "grad_norm": 0.6250690221786499, "learning_rate": 0.0005983686165601012, "loss": 3.8148, "step": 2033 }, { "epoch": 0.1, "grad_norm": 0.5601446628570557, "learning_rate": 0.0005983670127207777, "loss": 3.8132, "step": 2034 }, { "epoch": 0.1, "grad_norm": 0.619858980178833, "learning_rate": 0.0005983654080956128, "loss": 3.698, "step": 2035 }, { "epoch": 0.1, "grad_norm": 0.6175944805145264, "learning_rate": 0.0005983638026846108, "loss": 3.7716, "step": 2036 }, { "epoch": 0.1, "grad_norm": 0.6550002098083496, "learning_rate": 0.0005983621964877761, "loss": 3.9279, "step": 2037 }, { "epoch": 0.1, "grad_norm": 0.5929962396621704, "learning_rate": 0.0005983605895051127, "loss": 3.7389, "step": 2038 }, { "epoch": 0.1, "grad_norm": 0.6178693771362305, "learning_rate": 0.0005983589817366249, "loss": 3.6407, "step": 2039 }, { "epoch": 0.1, "grad_norm": 0.6257408857345581, "learning_rate": 0.0005983573731823172, "loss": 3.8194, "step": 2040 }, { "epoch": 0.1, "grad_norm": 0.6745170950889587, "learning_rate": 0.0005983557638421933, "loss": 3.7114, "step": 2041 }, { "epoch": 0.1, "grad_norm": 0.6017802357673645, "learning_rate": 0.000598354153716258, "loss": 3.6747, "step": 2042 }, { "epoch": 0.1, "grad_norm": 0.5711172223091125, "learning_rate": 0.000598352542804515, "loss": 3.7881, "step": 2043 }, { "epoch": 0.1, "grad_norm": 0.6025456786155701, "learning_rate": 0.000598350931106969, "loss": 3.7451, "step": 2044 }, { "epoch": 0.1, "grad_norm": 0.6247069239616394, "learning_rate": 0.0005983493186236241, "loss": 3.5347, "step": 2045 }, { "epoch": 0.1, "grad_norm": 0.6139021515846252, "learning_rate": 0.0005983477053544843, "loss": 3.7963, "step": 2046 }, { "epoch": 0.1, "grad_norm": 0.5898500084877014, "learning_rate": 0.0005983460912995542, "loss": 3.6405, "step": 2047 }, { "epoch": 0.1, "grad_norm": 0.5590971112251282, "learning_rate": 0.000598344476458838, "loss": 3.8969, "step": 2048 }, { "epoch": 0.1, "grad_norm": 0.613969624042511, "learning_rate": 0.0005983428608323397, "loss": 3.7783, "step": 2049 }, { "epoch": 0.1, "grad_norm": 0.6687971353530884, "learning_rate": 0.0005983412444200639, "loss": 3.541, "step": 2050 }, { "epoch": 0.1, "grad_norm": 0.5752463340759277, "learning_rate": 0.0005983396272220146, "loss": 3.7437, "step": 2051 }, { "epoch": 0.1, "grad_norm": 0.6484838724136353, "learning_rate": 0.000598338009238196, "loss": 3.5128, "step": 2052 }, { "epoch": 0.1, "grad_norm": 0.6085900664329529, "learning_rate": 0.0005983363904686127, "loss": 3.8193, "step": 2053 }, { "epoch": 0.1, "grad_norm": 0.6057000756263733, "learning_rate": 0.0005983347709132686, "loss": 3.567, "step": 2054 }, { "epoch": 0.1, "grad_norm": 0.580085039138794, "learning_rate": 0.0005983331505721683, "loss": 3.9294, "step": 2055 }, { "epoch": 0.1, "grad_norm": 0.5920203924179077, "learning_rate": 0.0005983315294453158, "loss": 3.8553, "step": 2056 }, { "epoch": 0.1, "grad_norm": 0.5826650857925415, "learning_rate": 0.0005983299075327155, "loss": 3.6751, "step": 2057 }, { "epoch": 0.1, "grad_norm": 0.6206904053688049, "learning_rate": 0.0005983282848343717, "loss": 3.8432, "step": 2058 }, { "epoch": 0.1, "grad_norm": 0.5721469521522522, "learning_rate": 0.0005983266613502885, "loss": 3.9363, "step": 2059 }, { "epoch": 0.1, "grad_norm": 0.545362114906311, "learning_rate": 0.0005983250370804702, "loss": 3.6203, "step": 2060 }, { "epoch": 0.1, "grad_norm": 0.5554171800613403, "learning_rate": 0.0005983234120249213, "loss": 3.9277, "step": 2061 }, { "epoch": 0.1, "grad_norm": 0.606959879398346, "learning_rate": 0.0005983217861836459, "loss": 3.5465, "step": 2062 }, { "epoch": 0.1, "grad_norm": 0.5941494703292847, "learning_rate": 0.0005983201595566484, "loss": 3.9184, "step": 2063 }, { "epoch": 0.1, "grad_norm": 0.6147699952125549, "learning_rate": 0.000598318532143933, "loss": 3.7999, "step": 2064 }, { "epoch": 0.1, "grad_norm": 0.570624589920044, "learning_rate": 0.000598316903945504, "loss": 3.6566, "step": 2065 }, { "epoch": 0.1, "grad_norm": 0.5827869772911072, "learning_rate": 0.0005983152749613656, "loss": 3.7424, "step": 2066 }, { "epoch": 0.1, "grad_norm": 0.6157439351081848, "learning_rate": 0.0005983136451915222, "loss": 3.569, "step": 2067 }, { "epoch": 0.1, "grad_norm": 0.6273912787437439, "learning_rate": 0.0005983120146359781, "loss": 3.8541, "step": 2068 }, { "epoch": 0.1, "grad_norm": 0.589424192905426, "learning_rate": 0.0005983103832947376, "loss": 3.7674, "step": 2069 }, { "epoch": 0.1, "grad_norm": 0.5925363898277283, "learning_rate": 0.0005983087511678049, "loss": 3.7414, "step": 2070 }, { "epoch": 0.1, "grad_norm": 0.6019349098205566, "learning_rate": 0.0005983071182551845, "loss": 3.8578, "step": 2071 }, { "epoch": 0.1, "grad_norm": 0.5667245984077454, "learning_rate": 0.0005983054845568803, "loss": 3.7414, "step": 2072 }, { "epoch": 0.1, "grad_norm": 0.5880571007728577, "learning_rate": 0.0005983038500728971, "loss": 3.8056, "step": 2073 }, { "epoch": 0.1, "grad_norm": 0.6357612013816833, "learning_rate": 0.0005983022148032389, "loss": 3.624, "step": 2074 }, { "epoch": 0.1, "grad_norm": 0.6150004267692566, "learning_rate": 0.00059830057874791, "loss": 3.6649, "step": 2075 }, { "epoch": 0.1, "grad_norm": 0.5906575918197632, "learning_rate": 0.0005982989419069148, "loss": 3.6441, "step": 2076 }, { "epoch": 0.1, "grad_norm": 0.6579732298851013, "learning_rate": 0.0005982973042802578, "loss": 3.7872, "step": 2077 }, { "epoch": 0.1, "grad_norm": 0.5626339912414551, "learning_rate": 0.0005982956658679429, "loss": 3.6208, "step": 2078 }, { "epoch": 0.1, "grad_norm": 0.6239688396453857, "learning_rate": 0.0005982940266699747, "loss": 3.5179, "step": 2079 }, { "epoch": 0.1, "grad_norm": 0.5556694269180298, "learning_rate": 0.0005982923866863574, "loss": 3.732, "step": 2080 }, { "epoch": 0.1, "grad_norm": 0.6616612672805786, "learning_rate": 0.0005982907459170954, "loss": 3.6723, "step": 2081 }, { "epoch": 0.1, "grad_norm": 0.592753529548645, "learning_rate": 0.0005982891043621929, "loss": 3.8336, "step": 2082 }, { "epoch": 0.1, "grad_norm": 0.6064931750297546, "learning_rate": 0.0005982874620216543, "loss": 3.6037, "step": 2083 }, { "epoch": 0.1, "grad_norm": 0.6325314044952393, "learning_rate": 0.000598285818895484, "loss": 3.5522, "step": 2084 }, { "epoch": 0.1, "grad_norm": 0.6174896955490112, "learning_rate": 0.0005982841749836863, "loss": 3.8025, "step": 2085 }, { "epoch": 0.1, "grad_norm": 0.5929061770439148, "learning_rate": 0.0005982825302862654, "loss": 3.7384, "step": 2086 }, { "epoch": 0.1, "grad_norm": 0.597075879573822, "learning_rate": 0.0005982808848032258, "loss": 3.8178, "step": 2087 }, { "epoch": 0.1, "grad_norm": 0.5691017508506775, "learning_rate": 0.0005982792385345717, "loss": 3.6708, "step": 2088 }, { "epoch": 0.1, "grad_norm": 0.6304391026496887, "learning_rate": 0.0005982775914803075, "loss": 3.8078, "step": 2089 }, { "epoch": 0.1, "grad_norm": 0.6006346344947815, "learning_rate": 0.0005982759436404376, "loss": 3.7204, "step": 2090 }, { "epoch": 0.1, "grad_norm": 0.6283980011940002, "learning_rate": 0.0005982742950149661, "loss": 3.4926, "step": 2091 }, { "epoch": 0.1, "grad_norm": 0.6308605074882507, "learning_rate": 0.0005982726456038977, "loss": 3.7156, "step": 2092 }, { "epoch": 0.1, "grad_norm": 0.6353732943534851, "learning_rate": 0.0005982709954072365, "loss": 3.6275, "step": 2093 }, { "epoch": 0.1, "grad_norm": 0.7076538801193237, "learning_rate": 0.0005982693444249868, "loss": 3.7353, "step": 2094 }, { "epoch": 0.1, "grad_norm": 0.5855659246444702, "learning_rate": 0.0005982676926571532, "loss": 3.555, "step": 2095 }, { "epoch": 0.1, "grad_norm": 0.6150306463241577, "learning_rate": 0.0005982660401037398, "loss": 3.6312, "step": 2096 }, { "epoch": 0.1, "grad_norm": 0.6198219656944275, "learning_rate": 0.000598264386764751, "loss": 3.7313, "step": 2097 }, { "epoch": 0.1, "grad_norm": 0.6115104556083679, "learning_rate": 0.0005982627326401914, "loss": 3.8263, "step": 2098 }, { "epoch": 0.1, "grad_norm": 0.6037588119506836, "learning_rate": 0.000598261077730065, "loss": 3.6085, "step": 2099 }, { "epoch": 0.1, "grad_norm": 0.6247969269752502, "learning_rate": 0.0005982594220343764, "loss": 3.7709, "step": 2100 }, { "epoch": 0.1, "grad_norm": 0.6451380848884583, "learning_rate": 0.0005982577655531298, "loss": 3.8026, "step": 2101 }, { "epoch": 0.1, "grad_norm": 0.6023620963096619, "learning_rate": 0.0005982561082863298, "loss": 3.5155, "step": 2102 }, { "epoch": 0.1, "grad_norm": 0.6898672580718994, "learning_rate": 0.0005982544502339805, "loss": 3.7734, "step": 2103 }, { "epoch": 0.1, "grad_norm": 0.620043933391571, "learning_rate": 0.0005982527913960863, "loss": 3.7091, "step": 2104 }, { "epoch": 0.1, "grad_norm": 0.6267081499099731, "learning_rate": 0.0005982511317726518, "loss": 3.7821, "step": 2105 }, { "epoch": 0.1, "grad_norm": 0.5801389813423157, "learning_rate": 0.0005982494713636812, "loss": 3.5639, "step": 2106 }, { "epoch": 0.1, "grad_norm": 0.5749569535255432, "learning_rate": 0.0005982478101691788, "loss": 3.7611, "step": 2107 }, { "epoch": 0.1, "grad_norm": 0.6347259283065796, "learning_rate": 0.0005982461481891491, "loss": 3.6399, "step": 2108 }, { "epoch": 0.1, "grad_norm": 0.6066511869430542, "learning_rate": 0.0005982444854235964, "loss": 3.6464, "step": 2109 }, { "epoch": 0.1, "grad_norm": 0.6020014882087708, "learning_rate": 0.0005982428218725252, "loss": 3.5554, "step": 2110 }, { "epoch": 0.1, "grad_norm": 0.6045134663581848, "learning_rate": 0.0005982411575359398, "loss": 3.835, "step": 2111 }, { "epoch": 0.1, "grad_norm": 0.6153691411018372, "learning_rate": 0.0005982394924138446, "loss": 3.2436, "step": 2112 }, { "epoch": 0.1, "grad_norm": 0.608252227306366, "learning_rate": 0.0005982378265062439, "loss": 3.7666, "step": 2113 }, { "epoch": 0.1, "grad_norm": 0.5522031784057617, "learning_rate": 0.0005982361598131422, "loss": 3.5962, "step": 2114 }, { "epoch": 0.1, "grad_norm": 0.6099326014518738, "learning_rate": 0.0005982344923345439, "loss": 3.5498, "step": 2115 }, { "epoch": 0.1, "grad_norm": 0.6369093060493469, "learning_rate": 0.0005982328240704533, "loss": 3.6785, "step": 2116 }, { "epoch": 0.1, "grad_norm": 0.5921229720115662, "learning_rate": 0.0005982311550208749, "loss": 3.7165, "step": 2117 }, { "epoch": 0.1, "grad_norm": 0.6032543778419495, "learning_rate": 0.000598229485185813, "loss": 3.6678, "step": 2118 }, { "epoch": 0.1, "grad_norm": 0.616646409034729, "learning_rate": 0.000598227814565272, "loss": 3.6858, "step": 2119 }, { "epoch": 0.1, "grad_norm": 0.610694944858551, "learning_rate": 0.0005982261431592564, "loss": 3.5347, "step": 2120 }, { "epoch": 0.1, "grad_norm": 0.5749186873435974, "learning_rate": 0.0005982244709677704, "loss": 3.8255, "step": 2121 }, { "epoch": 0.1, "grad_norm": 0.5923564434051514, "learning_rate": 0.0005982227979908186, "loss": 3.4269, "step": 2122 }, { "epoch": 0.1, "grad_norm": 0.6011732816696167, "learning_rate": 0.0005982211242284054, "loss": 3.4419, "step": 2123 }, { "epoch": 0.1, "grad_norm": 0.5927369594573975, "learning_rate": 0.0005982194496805351, "loss": 3.6693, "step": 2124 }, { "epoch": 0.1, "grad_norm": 0.575406551361084, "learning_rate": 0.0005982177743472122, "loss": 3.688, "step": 2125 }, { "epoch": 0.1, "grad_norm": 0.6836567521095276, "learning_rate": 0.000598216098228441, "loss": 3.7712, "step": 2126 }, { "epoch": 0.1, "grad_norm": 0.5466444492340088, "learning_rate": 0.0005982144213242261, "loss": 3.7642, "step": 2127 }, { "epoch": 0.1, "grad_norm": 0.6006033420562744, "learning_rate": 0.0005982127436345718, "loss": 3.6812, "step": 2128 }, { "epoch": 0.1, "grad_norm": 0.6072657704353333, "learning_rate": 0.0005982110651594824, "loss": 3.7972, "step": 2129 }, { "epoch": 0.1, "grad_norm": 0.6421571373939514, "learning_rate": 0.0005982093858989625, "loss": 3.7497, "step": 2130 }, { "epoch": 0.1, "grad_norm": 0.5948945879936218, "learning_rate": 0.0005982077058530165, "loss": 3.8954, "step": 2131 }, { "epoch": 0.1, "grad_norm": 0.6586947441101074, "learning_rate": 0.0005982060250216488, "loss": 3.7188, "step": 2132 }, { "epoch": 0.1, "grad_norm": 0.6014887094497681, "learning_rate": 0.0005982043434048638, "loss": 3.7739, "step": 2133 }, { "epoch": 0.1, "grad_norm": 0.6018311977386475, "learning_rate": 0.000598202661002666, "loss": 3.6456, "step": 2134 }, { "epoch": 0.1, "grad_norm": 0.6336399912834167, "learning_rate": 0.0005982009778150596, "loss": 3.6948, "step": 2135 }, { "epoch": 0.1, "grad_norm": 0.586958646774292, "learning_rate": 0.0005981992938420493, "loss": 3.7109, "step": 2136 }, { "epoch": 0.1, "grad_norm": 0.641817033290863, "learning_rate": 0.0005981976090836396, "loss": 3.9746, "step": 2137 }, { "epoch": 0.1, "grad_norm": 0.5889933705329895, "learning_rate": 0.0005981959235398347, "loss": 3.5553, "step": 2138 }, { "epoch": 0.1, "grad_norm": 0.5783056616783142, "learning_rate": 0.0005981942372106391, "loss": 3.7265, "step": 2139 }, { "epoch": 0.1, "grad_norm": 0.6405118107795715, "learning_rate": 0.0005981925500960574, "loss": 3.8398, "step": 2140 }, { "epoch": 0.1, "grad_norm": 0.585486114025116, "learning_rate": 0.0005981908621960937, "loss": 3.7428, "step": 2141 }, { "epoch": 0.1, "grad_norm": 0.6322093605995178, "learning_rate": 0.0005981891735107528, "loss": 3.7375, "step": 2142 }, { "epoch": 0.11, "grad_norm": 0.6491268873214722, "learning_rate": 0.0005981874840400389, "loss": 3.7564, "step": 2143 }, { "epoch": 0.11, "grad_norm": 0.5832539796829224, "learning_rate": 0.0005981857937839566, "loss": 3.7131, "step": 2144 }, { "epoch": 0.11, "grad_norm": 0.5929149389266968, "learning_rate": 0.0005981841027425102, "loss": 3.6175, "step": 2145 }, { "epoch": 0.11, "grad_norm": 0.5840476155281067, "learning_rate": 0.0005981824109157044, "loss": 3.5056, "step": 2146 }, { "epoch": 0.11, "grad_norm": 0.5910953879356384, "learning_rate": 0.0005981807183035436, "loss": 3.7863, "step": 2147 }, { "epoch": 0.11, "grad_norm": 0.6023166179656982, "learning_rate": 0.000598179024906032, "loss": 3.4796, "step": 2148 }, { "epoch": 0.11, "grad_norm": 0.6427032351493835, "learning_rate": 0.0005981773307231743, "loss": 3.5623, "step": 2149 }, { "epoch": 0.11, "grad_norm": 0.5569535493850708, "learning_rate": 0.0005981756357549749, "loss": 3.7547, "step": 2150 }, { "epoch": 0.11, "grad_norm": 0.6145084500312805, "learning_rate": 0.0005981739400014383, "loss": 3.8746, "step": 2151 }, { "epoch": 0.11, "grad_norm": 0.6303276419639587, "learning_rate": 0.0005981722434625689, "loss": 3.8413, "step": 2152 }, { "epoch": 0.11, "grad_norm": 0.6206766963005066, "learning_rate": 0.0005981705461383712, "loss": 3.7907, "step": 2153 }, { "epoch": 0.11, "grad_norm": 0.6251280903816223, "learning_rate": 0.0005981688480288496, "loss": 3.8965, "step": 2154 }, { "epoch": 0.11, "grad_norm": 0.6077283620834351, "learning_rate": 0.0005981671491340087, "loss": 3.6284, "step": 2155 }, { "epoch": 0.11, "grad_norm": 0.6264916658401489, "learning_rate": 0.0005981654494538528, "loss": 3.7238, "step": 2156 }, { "epoch": 0.11, "grad_norm": 0.5991299152374268, "learning_rate": 0.0005981637489883866, "loss": 3.7317, "step": 2157 }, { "epoch": 0.11, "grad_norm": 0.6194969415664673, "learning_rate": 0.0005981620477376144, "loss": 3.7232, "step": 2158 }, { "epoch": 0.11, "grad_norm": 0.5819993019104004, "learning_rate": 0.0005981603457015409, "loss": 3.7031, "step": 2159 }, { "epoch": 0.11, "grad_norm": 0.6135178804397583, "learning_rate": 0.0005981586428801703, "loss": 3.7341, "step": 2160 }, { "epoch": 0.11, "grad_norm": 0.5985665917396545, "learning_rate": 0.0005981569392735072, "loss": 3.774, "step": 2161 }, { "epoch": 0.11, "grad_norm": 0.5931420922279358, "learning_rate": 0.0005981552348815562, "loss": 3.6447, "step": 2162 }, { "epoch": 0.11, "grad_norm": 0.7057386636734009, "learning_rate": 0.0005981535297043216, "loss": 3.6028, "step": 2163 }, { "epoch": 0.11, "grad_norm": 0.5932669043540955, "learning_rate": 0.0005981518237418081, "loss": 3.8519, "step": 2164 }, { "epoch": 0.11, "grad_norm": 0.6052893996238708, "learning_rate": 0.0005981501169940199, "loss": 3.8123, "step": 2165 }, { "epoch": 0.11, "grad_norm": 1.3449329137802124, "learning_rate": 0.0005981484094609618, "loss": 3.8314, "step": 2166 }, { "epoch": 0.11, "grad_norm": 0.5868152976036072, "learning_rate": 0.0005981467011426381, "loss": 3.7069, "step": 2167 }, { "epoch": 0.11, "grad_norm": 0.6063858866691589, "learning_rate": 0.0005981449920390534, "loss": 3.7754, "step": 2168 }, { "epoch": 0.11, "grad_norm": 0.6096360087394714, "learning_rate": 0.0005981432821502122, "loss": 3.6768, "step": 2169 }, { "epoch": 0.11, "grad_norm": 0.6433746814727783, "learning_rate": 0.000598141571476119, "loss": 3.781, "step": 2170 }, { "epoch": 0.11, "grad_norm": 0.5999099016189575, "learning_rate": 0.0005981398600167782, "loss": 3.9962, "step": 2171 }, { "epoch": 0.11, "grad_norm": 0.6435213685035706, "learning_rate": 0.0005981381477721944, "loss": 3.5761, "step": 2172 }, { "epoch": 0.11, "grad_norm": 0.5516665577888489, "learning_rate": 0.0005981364347423722, "loss": 3.6282, "step": 2173 }, { "epoch": 0.11, "grad_norm": 0.5798248052597046, "learning_rate": 0.000598134720927316, "loss": 3.54, "step": 2174 }, { "epoch": 0.11, "grad_norm": 0.6035856604576111, "learning_rate": 0.0005981330063270302, "loss": 3.4801, "step": 2175 }, { "epoch": 0.11, "grad_norm": 0.6492979526519775, "learning_rate": 0.0005981312909415195, "loss": 3.7736, "step": 2176 }, { "epoch": 0.11, "grad_norm": 0.6277186870574951, "learning_rate": 0.0005981295747707882, "loss": 3.9117, "step": 2177 }, { "epoch": 0.11, "grad_norm": 0.5910218954086304, "learning_rate": 0.0005981278578148412, "loss": 3.8235, "step": 2178 }, { "epoch": 0.11, "grad_norm": 0.7807388305664062, "learning_rate": 0.0005981261400736827, "loss": 3.6833, "step": 2179 }, { "epoch": 0.11, "grad_norm": 0.5943345427513123, "learning_rate": 0.0005981244215473174, "loss": 3.7929, "step": 2180 }, { "epoch": 0.11, "grad_norm": 0.578851580619812, "learning_rate": 0.0005981227022357497, "loss": 3.7879, "step": 2181 }, { "epoch": 0.11, "grad_norm": 0.6276848316192627, "learning_rate": 0.0005981209821389841, "loss": 3.4234, "step": 2182 }, { "epoch": 0.11, "grad_norm": 0.6022424697875977, "learning_rate": 0.0005981192612570253, "loss": 3.7131, "step": 2183 }, { "epoch": 0.11, "grad_norm": 0.669609546661377, "learning_rate": 0.0005981175395898777, "loss": 3.4337, "step": 2184 }, { "epoch": 0.11, "grad_norm": 0.6255220174789429, "learning_rate": 0.0005981158171375459, "loss": 3.7007, "step": 2185 }, { "epoch": 0.11, "grad_norm": 0.5757836699485779, "learning_rate": 0.0005981140939000344, "loss": 3.6742, "step": 2186 }, { "epoch": 0.11, "grad_norm": 0.552218496799469, "learning_rate": 0.0005981123698773478, "loss": 3.7315, "step": 2187 }, { "epoch": 0.11, "grad_norm": 0.5861135125160217, "learning_rate": 0.0005981106450694904, "loss": 3.842, "step": 2188 }, { "epoch": 0.11, "grad_norm": 0.6071463823318481, "learning_rate": 0.0005981089194764672, "loss": 3.7262, "step": 2189 }, { "epoch": 0.11, "grad_norm": 0.6482838988304138, "learning_rate": 0.0005981071930982823, "loss": 3.4363, "step": 2190 }, { "epoch": 0.11, "grad_norm": 0.5629037618637085, "learning_rate": 0.0005981054659349405, "loss": 3.7168, "step": 2191 }, { "epoch": 0.11, "grad_norm": 0.6590587496757507, "learning_rate": 0.0005981037379864463, "loss": 3.7393, "step": 2192 }, { "epoch": 0.11, "grad_norm": 0.6444178819656372, "learning_rate": 0.0005981020092528041, "loss": 3.6663, "step": 2193 }, { "epoch": 0.11, "grad_norm": 0.5685077905654907, "learning_rate": 0.0005981002797340187, "loss": 3.8345, "step": 2194 }, { "epoch": 0.11, "grad_norm": 0.5644817352294922, "learning_rate": 0.0005980985494300946, "loss": 3.632, "step": 2195 }, { "epoch": 0.11, "grad_norm": 0.6295031309127808, "learning_rate": 0.000598096818341036, "loss": 3.5742, "step": 2196 }, { "epoch": 0.11, "grad_norm": 0.5638957023620605, "learning_rate": 0.000598095086466848, "loss": 3.7152, "step": 2197 }, { "epoch": 0.11, "grad_norm": 0.5969396829605103, "learning_rate": 0.0005980933538075349, "loss": 3.5886, "step": 2198 }, { "epoch": 0.11, "grad_norm": 0.5802797675132751, "learning_rate": 0.0005980916203631011, "loss": 3.6731, "step": 2199 }, { "epoch": 0.11, "grad_norm": 0.5977463722229004, "learning_rate": 0.0005980898861335515, "loss": 3.6158, "step": 2200 }, { "epoch": 0.11, "grad_norm": 0.5731961727142334, "learning_rate": 0.0005980881511188904, "loss": 3.8086, "step": 2201 }, { "epoch": 0.11, "grad_norm": 0.5767385363578796, "learning_rate": 0.0005980864153191226, "loss": 3.5526, "step": 2202 }, { "epoch": 0.11, "grad_norm": 0.6023370623588562, "learning_rate": 0.0005980846787342524, "loss": 3.4739, "step": 2203 }, { "epoch": 0.11, "grad_norm": 0.5843697786331177, "learning_rate": 0.0005980829413642847, "loss": 3.6017, "step": 2204 }, { "epoch": 0.11, "grad_norm": 0.5621604323387146, "learning_rate": 0.0005980812032092238, "loss": 3.8069, "step": 2205 }, { "epoch": 0.11, "grad_norm": 0.6262988448143005, "learning_rate": 0.0005980794642690744, "loss": 3.8661, "step": 2206 }, { "epoch": 0.11, "grad_norm": 0.594237744808197, "learning_rate": 0.0005980777245438411, "loss": 3.6751, "step": 2207 }, { "epoch": 0.11, "grad_norm": 0.5916548371315002, "learning_rate": 0.0005980759840335284, "loss": 3.7278, "step": 2208 }, { "epoch": 0.11, "grad_norm": 0.5857176184654236, "learning_rate": 0.0005980742427381409, "loss": 3.646, "step": 2209 }, { "epoch": 0.11, "grad_norm": 0.6896742582321167, "learning_rate": 0.0005980725006576832, "loss": 3.5892, "step": 2210 }, { "epoch": 0.11, "grad_norm": 0.678264856338501, "learning_rate": 0.0005980707577921599, "loss": 3.8561, "step": 2211 }, { "epoch": 0.11, "grad_norm": 0.5566852688789368, "learning_rate": 0.0005980690141415756, "loss": 3.6284, "step": 2212 }, { "epoch": 0.11, "grad_norm": 0.5891258716583252, "learning_rate": 0.000598067269705935, "loss": 3.4689, "step": 2213 }, { "epoch": 0.11, "grad_norm": 0.6085823178291321, "learning_rate": 0.0005980655244852424, "loss": 3.8165, "step": 2214 }, { "epoch": 0.11, "grad_norm": 0.6259714365005493, "learning_rate": 0.0005980637784795027, "loss": 3.454, "step": 2215 }, { "epoch": 0.11, "grad_norm": 0.6584763526916504, "learning_rate": 0.0005980620316887203, "loss": 3.6297, "step": 2216 }, { "epoch": 0.11, "grad_norm": 0.5733041167259216, "learning_rate": 0.0005980602841128998, "loss": 3.8796, "step": 2217 }, { "epoch": 0.11, "grad_norm": 0.6462345123291016, "learning_rate": 0.000598058535752046, "loss": 3.6018, "step": 2218 }, { "epoch": 0.11, "grad_norm": 0.6512056589126587, "learning_rate": 0.0005980567866061634, "loss": 3.7185, "step": 2219 }, { "epoch": 0.11, "grad_norm": 0.6104543209075928, "learning_rate": 0.0005980550366752565, "loss": 3.313, "step": 2220 }, { "epoch": 0.11, "grad_norm": 0.5999908447265625, "learning_rate": 0.00059805328595933, "loss": 3.7027, "step": 2221 }, { "epoch": 0.11, "grad_norm": 0.5871838331222534, "learning_rate": 0.0005980515344583886, "loss": 3.8634, "step": 2222 }, { "epoch": 0.11, "grad_norm": 0.5965851545333862, "learning_rate": 0.0005980497821724366, "loss": 3.6761, "step": 2223 }, { "epoch": 0.11, "grad_norm": 0.6039659380912781, "learning_rate": 0.000598048029101479, "loss": 3.769, "step": 2224 }, { "epoch": 0.11, "grad_norm": 0.6120340824127197, "learning_rate": 0.0005980462752455204, "loss": 3.5583, "step": 2225 }, { "epoch": 0.11, "grad_norm": 0.605404257774353, "learning_rate": 0.0005980445206045649, "loss": 3.8279, "step": 2226 }, { "epoch": 0.11, "grad_norm": 0.5787206888198853, "learning_rate": 0.0005980427651786179, "loss": 3.8282, "step": 2227 }, { "epoch": 0.11, "grad_norm": 0.5788346529006958, "learning_rate": 0.0005980410089676833, "loss": 3.9292, "step": 2228 }, { "epoch": 0.11, "grad_norm": 0.6174167394638062, "learning_rate": 0.0005980392519717661, "loss": 3.3578, "step": 2229 }, { "epoch": 0.11, "grad_norm": 0.5495995283126831, "learning_rate": 0.000598037494190871, "loss": 3.7939, "step": 2230 }, { "epoch": 0.11, "grad_norm": 0.6039677262306213, "learning_rate": 0.0005980357356250023, "loss": 3.7216, "step": 2231 }, { "epoch": 0.11, "grad_norm": 0.598181962966919, "learning_rate": 0.0005980339762741651, "loss": 3.7851, "step": 2232 }, { "epoch": 0.11, "grad_norm": 0.5918916463851929, "learning_rate": 0.0005980322161383636, "loss": 3.6826, "step": 2233 }, { "epoch": 0.11, "grad_norm": 0.5864900350570679, "learning_rate": 0.0005980304552176026, "loss": 3.8629, "step": 2234 }, { "epoch": 0.11, "grad_norm": 0.5640453696250916, "learning_rate": 0.0005980286935118868, "loss": 3.4874, "step": 2235 }, { "epoch": 0.11, "grad_norm": 0.6437229514122009, "learning_rate": 0.0005980269310212207, "loss": 3.7286, "step": 2236 }, { "epoch": 0.11, "grad_norm": 0.5866195559501648, "learning_rate": 0.000598025167745609, "loss": 3.6045, "step": 2237 }, { "epoch": 0.11, "grad_norm": 0.6194109320640564, "learning_rate": 0.0005980234036850565, "loss": 3.7216, "step": 2238 }, { "epoch": 0.11, "grad_norm": 0.6149755120277405, "learning_rate": 0.0005980216388395676, "loss": 3.7214, "step": 2239 }, { "epoch": 0.11, "grad_norm": 0.5646103024482727, "learning_rate": 0.0005980198732091471, "loss": 3.6628, "step": 2240 }, { "epoch": 0.11, "grad_norm": 0.6370061635971069, "learning_rate": 0.0005980181067937996, "loss": 3.7355, "step": 2241 }, { "epoch": 0.11, "grad_norm": 0.6375163793563843, "learning_rate": 0.0005980163395935297, "loss": 3.6579, "step": 2242 }, { "epoch": 0.11, "grad_norm": 0.600969135761261, "learning_rate": 0.0005980145716083423, "loss": 3.8107, "step": 2243 }, { "epoch": 0.11, "grad_norm": 0.5838377475738525, "learning_rate": 0.0005980128028382416, "loss": 3.8588, "step": 2244 }, { "epoch": 0.11, "grad_norm": 0.5946401357650757, "learning_rate": 0.0005980110332832328, "loss": 3.7246, "step": 2245 }, { "epoch": 0.11, "grad_norm": 0.6003056764602661, "learning_rate": 0.0005980092629433202, "loss": 3.4673, "step": 2246 }, { "epoch": 0.11, "grad_norm": 0.6012457609176636, "learning_rate": 0.0005980074918185083, "loss": 3.7586, "step": 2247 }, { "epoch": 0.11, "grad_norm": 0.5687946677207947, "learning_rate": 0.0005980057199088024, "loss": 3.4091, "step": 2248 }, { "epoch": 0.11, "grad_norm": 0.6248691082000732, "learning_rate": 0.0005980039472142065, "loss": 3.7712, "step": 2249 }, { "epoch": 0.11, "grad_norm": 0.5778698325157166, "learning_rate": 0.0005980021737347258, "loss": 3.629, "step": 2250 }, { "epoch": 0.11, "grad_norm": 1.24909245967865, "learning_rate": 0.0005980003994703647, "loss": 4.0102, "step": 2251 }, { "epoch": 0.11, "grad_norm": 0.6096078157424927, "learning_rate": 0.0005979986244211277, "loss": 3.7681, "step": 2252 }, { "epoch": 0.11, "grad_norm": 0.6022841334342957, "learning_rate": 0.0005979968485870198, "loss": 3.6394, "step": 2253 }, { "epoch": 0.11, "grad_norm": 0.6294955015182495, "learning_rate": 0.0005979950719680455, "loss": 3.8566, "step": 2254 }, { "epoch": 0.11, "grad_norm": 0.641569972038269, "learning_rate": 0.0005979932945642096, "loss": 3.5181, "step": 2255 }, { "epoch": 0.11, "grad_norm": 0.5697000622749329, "learning_rate": 0.0005979915163755168, "loss": 3.6019, "step": 2256 }, { "epoch": 0.11, "grad_norm": 0.6431049704551697, "learning_rate": 0.0005979897374019715, "loss": 3.6606, "step": 2257 }, { "epoch": 0.11, "grad_norm": 0.6189910769462585, "learning_rate": 0.0005979879576435786, "loss": 3.6255, "step": 2258 }, { "epoch": 0.11, "grad_norm": 0.6022100448608398, "learning_rate": 0.0005979861771003429, "loss": 3.7233, "step": 2259 }, { "epoch": 0.11, "grad_norm": 0.6038528084754944, "learning_rate": 0.0005979843957722688, "loss": 3.8155, "step": 2260 }, { "epoch": 0.11, "grad_norm": 0.5769616961479187, "learning_rate": 0.0005979826136593612, "loss": 3.4622, "step": 2261 }, { "epoch": 0.11, "grad_norm": 0.5804604291915894, "learning_rate": 0.0005979808307616248, "loss": 3.7446, "step": 2262 }, { "epoch": 0.11, "grad_norm": 0.5664138793945312, "learning_rate": 0.0005979790470790642, "loss": 3.9118, "step": 2263 }, { "epoch": 0.11, "grad_norm": 0.6211099028587341, "learning_rate": 0.0005979772626116841, "loss": 3.7406, "step": 2264 }, { "epoch": 0.11, "grad_norm": 0.624521791934967, "learning_rate": 0.0005979754773594894, "loss": 3.7997, "step": 2265 }, { "epoch": 0.11, "grad_norm": 0.5502910614013672, "learning_rate": 0.0005979736913224844, "loss": 3.7596, "step": 2266 }, { "epoch": 0.11, "grad_norm": 0.5801634788513184, "learning_rate": 0.0005979719045006743, "loss": 3.5966, "step": 2267 }, { "epoch": 0.11, "grad_norm": 0.6068748831748962, "learning_rate": 0.0005979701168940633, "loss": 3.5563, "step": 2268 }, { "epoch": 0.11, "grad_norm": 0.6216042041778564, "learning_rate": 0.0005979683285026565, "loss": 3.4848, "step": 2269 }, { "epoch": 0.11, "grad_norm": 0.5503404140472412, "learning_rate": 0.0005979665393264585, "loss": 3.7462, "step": 2270 }, { "epoch": 0.11, "grad_norm": 0.6132727265357971, "learning_rate": 0.000597964749365474, "loss": 3.8308, "step": 2271 }, { "epoch": 0.11, "grad_norm": 0.6045131087303162, "learning_rate": 0.0005979629586197075, "loss": 3.877, "step": 2272 }, { "epoch": 0.11, "grad_norm": 0.6383094191551208, "learning_rate": 0.0005979611670891641, "loss": 3.8716, "step": 2273 }, { "epoch": 0.11, "grad_norm": 0.6388508677482605, "learning_rate": 0.0005979593747738483, "loss": 3.7067, "step": 2274 }, { "epoch": 0.11, "grad_norm": 0.630363941192627, "learning_rate": 0.0005979575816737648, "loss": 3.6567, "step": 2275 }, { "epoch": 0.11, "grad_norm": 0.6004341244697571, "learning_rate": 0.0005979557877889184, "loss": 3.6867, "step": 2276 }, { "epoch": 0.11, "grad_norm": 0.5606918931007385, "learning_rate": 0.0005979539931193137, "loss": 3.6417, "step": 2277 }, { "epoch": 0.11, "grad_norm": 0.5711063146591187, "learning_rate": 0.0005979521976649556, "loss": 3.607, "step": 2278 }, { "epoch": 0.11, "grad_norm": 0.6434407830238342, "learning_rate": 0.0005979504014258488, "loss": 3.7508, "step": 2279 }, { "epoch": 0.11, "grad_norm": 0.6066713929176331, "learning_rate": 0.0005979486044019979, "loss": 3.6702, "step": 2280 }, { "epoch": 0.11, "grad_norm": 0.566117525100708, "learning_rate": 0.0005979468065934078, "loss": 3.7053, "step": 2281 }, { "epoch": 0.11, "grad_norm": 0.6023035645484924, "learning_rate": 0.000597945008000083, "loss": 3.7924, "step": 2282 }, { "epoch": 0.11, "grad_norm": 0.649734377861023, "learning_rate": 0.0005979432086220287, "loss": 3.6891, "step": 2283 }, { "epoch": 0.11, "grad_norm": 0.6085857152938843, "learning_rate": 0.000597941408459249, "loss": 3.6721, "step": 2284 }, { "epoch": 0.11, "grad_norm": 0.594468355178833, "learning_rate": 0.0005979396075117492, "loss": 3.7176, "step": 2285 }, { "epoch": 0.11, "grad_norm": 0.5784687995910645, "learning_rate": 0.0005979378057795337, "loss": 3.6966, "step": 2286 }, { "epoch": 0.11, "grad_norm": 0.586226224899292, "learning_rate": 0.0005979360032626073, "loss": 3.8213, "step": 2287 }, { "epoch": 0.11, "grad_norm": 0.5555378198623657, "learning_rate": 0.0005979341999609749, "loss": 3.7659, "step": 2288 }, { "epoch": 0.11, "grad_norm": 0.6186184287071228, "learning_rate": 0.0005979323958746411, "loss": 3.7084, "step": 2289 }, { "epoch": 0.11, "grad_norm": 0.5890335440635681, "learning_rate": 0.0005979305910036108, "loss": 3.6129, "step": 2290 }, { "epoch": 0.11, "grad_norm": 0.586846113204956, "learning_rate": 0.0005979287853478886, "loss": 3.7499, "step": 2291 }, { "epoch": 0.11, "grad_norm": 0.6445232033729553, "learning_rate": 0.0005979269789074793, "loss": 3.5936, "step": 2292 }, { "epoch": 0.11, "grad_norm": 0.5615684390068054, "learning_rate": 0.0005979251716823877, "loss": 3.8463, "step": 2293 }, { "epoch": 0.11, "grad_norm": 0.6355283856391907, "learning_rate": 0.0005979233636726186, "loss": 3.5838, "step": 2294 }, { "epoch": 0.11, "grad_norm": 0.5905806422233582, "learning_rate": 0.0005979215548781766, "loss": 3.5728, "step": 2295 }, { "epoch": 0.11, "grad_norm": 0.6034395694732666, "learning_rate": 0.0005979197452990665, "loss": 3.3597, "step": 2296 }, { "epoch": 0.11, "grad_norm": 0.5971759557723999, "learning_rate": 0.0005979179349352932, "loss": 3.7747, "step": 2297 }, { "epoch": 0.11, "grad_norm": 0.5553016662597656, "learning_rate": 0.0005979161237868615, "loss": 3.7177, "step": 2298 }, { "epoch": 0.11, "grad_norm": 0.6574896574020386, "learning_rate": 0.000597914311853776, "loss": 3.8484, "step": 2299 }, { "epoch": 0.11, "grad_norm": 0.5912606120109558, "learning_rate": 0.0005979124991360414, "loss": 3.713, "step": 2300 }, { "epoch": 0.11, "grad_norm": 0.5491510629653931, "learning_rate": 0.0005979106856336628, "loss": 3.7381, "step": 2301 }, { "epoch": 0.11, "grad_norm": 0.5802510976791382, "learning_rate": 0.0005979088713466447, "loss": 3.7492, "step": 2302 }, { "epoch": 0.11, "grad_norm": 0.5774723291397095, "learning_rate": 0.000597907056274992, "loss": 3.7597, "step": 2303 }, { "epoch": 0.11, "grad_norm": 0.6084253191947937, "learning_rate": 0.0005979052404187094, "loss": 3.7054, "step": 2304 }, { "epoch": 0.11, "grad_norm": 0.5598316788673401, "learning_rate": 0.0005979034237778018, "loss": 3.7911, "step": 2305 }, { "epoch": 0.11, "grad_norm": 0.6073153614997864, "learning_rate": 0.0005979016063522738, "loss": 3.6144, "step": 2306 }, { "epoch": 0.11, "grad_norm": 0.7126467227935791, "learning_rate": 0.0005978997881421304, "loss": 3.7347, "step": 2307 }, { "epoch": 0.11, "grad_norm": 0.6775062084197998, "learning_rate": 0.0005978979691473763, "loss": 3.632, "step": 2308 }, { "epoch": 0.11, "grad_norm": 0.6118178367614746, "learning_rate": 0.0005978961493680162, "loss": 3.5525, "step": 2309 }, { "epoch": 0.11, "grad_norm": 0.6124061346054077, "learning_rate": 0.000597894328804055, "loss": 3.6992, "step": 2310 }, { "epoch": 0.11, "grad_norm": 0.6024648547172546, "learning_rate": 0.0005978925074554975, "loss": 3.8113, "step": 2311 }, { "epoch": 0.11, "grad_norm": 0.6617346405982971, "learning_rate": 0.0005978906853223485, "loss": 3.7497, "step": 2312 }, { "epoch": 0.11, "grad_norm": 0.5842245221138, "learning_rate": 0.0005978888624046127, "loss": 3.5113, "step": 2313 }, { "epoch": 0.11, "grad_norm": 0.6647869348526001, "learning_rate": 0.0005978870387022949, "loss": 3.7863, "step": 2314 }, { "epoch": 0.11, "grad_norm": 0.6231033205986023, "learning_rate": 0.0005978852142154001, "loss": 3.8737, "step": 2315 }, { "epoch": 0.11, "grad_norm": 0.5999862551689148, "learning_rate": 0.000597883388943933, "loss": 3.774, "step": 2316 }, { "epoch": 0.11, "grad_norm": 0.6005386114120483, "learning_rate": 0.0005978815628878982, "loss": 3.832, "step": 2317 }, { "epoch": 0.11, "grad_norm": 0.6149207353591919, "learning_rate": 0.0005978797360473009, "loss": 3.7553, "step": 2318 }, { "epoch": 0.11, "grad_norm": 0.5701582431793213, "learning_rate": 0.0005978779084221456, "loss": 3.6775, "step": 2319 }, { "epoch": 0.11, "grad_norm": 0.5574246644973755, "learning_rate": 0.0005978760800124372, "loss": 3.6211, "step": 2320 }, { "epoch": 0.11, "grad_norm": 0.6085734963417053, "learning_rate": 0.0005978742508181805, "loss": 3.6918, "step": 2321 }, { "epoch": 0.11, "grad_norm": 0.6652593612670898, "learning_rate": 0.0005978724208393804, "loss": 3.8293, "step": 2322 }, { "epoch": 0.11, "grad_norm": 0.5725169777870178, "learning_rate": 0.0005978705900760418, "loss": 3.7319, "step": 2323 }, { "epoch": 0.11, "grad_norm": 0.6140974164009094, "learning_rate": 0.0005978687585281692, "loss": 3.7711, "step": 2324 }, { "epoch": 0.11, "grad_norm": 0.5903205871582031, "learning_rate": 0.0005978669261957676, "loss": 3.7894, "step": 2325 }, { "epoch": 0.11, "grad_norm": 0.553845226764679, "learning_rate": 0.000597865093078842, "loss": 3.6955, "step": 2326 }, { "epoch": 0.11, "grad_norm": 0.5786354541778564, "learning_rate": 0.0005978632591773969, "loss": 3.8862, "step": 2327 }, { "epoch": 0.11, "grad_norm": 0.5850010514259338, "learning_rate": 0.0005978614244914375, "loss": 3.5009, "step": 2328 }, { "epoch": 0.11, "grad_norm": 0.5797577500343323, "learning_rate": 0.0005978595890209683, "loss": 3.7438, "step": 2329 }, { "epoch": 0.11, "grad_norm": 0.5699589848518372, "learning_rate": 0.0005978577527659943, "loss": 3.7208, "step": 2330 }, { "epoch": 0.11, "grad_norm": 0.6231520771980286, "learning_rate": 0.0005978559157265203, "loss": 3.7847, "step": 2331 }, { "epoch": 0.11, "grad_norm": 0.5472413897514343, "learning_rate": 0.0005978540779025511, "loss": 3.8139, "step": 2332 }, { "epoch": 0.11, "grad_norm": 0.6323192119598389, "learning_rate": 0.0005978522392940917, "loss": 3.6501, "step": 2333 }, { "epoch": 0.11, "grad_norm": 0.8912398219108582, "learning_rate": 0.0005978503999011467, "loss": 3.9693, "step": 2334 }, { "epoch": 0.11, "grad_norm": 0.5911180377006531, "learning_rate": 0.000597848559723721, "loss": 3.5229, "step": 2335 }, { "epoch": 0.11, "grad_norm": 0.5412778258323669, "learning_rate": 0.0005978467187618198, "loss": 3.7783, "step": 2336 }, { "epoch": 0.11, "grad_norm": 0.5602182745933533, "learning_rate": 0.0005978448770154474, "loss": 3.6555, "step": 2337 }, { "epoch": 0.11, "grad_norm": 0.6185539364814758, "learning_rate": 0.000597843034484609, "loss": 3.5993, "step": 2338 }, { "epoch": 0.11, "grad_norm": 0.6103714108467102, "learning_rate": 0.0005978411911693094, "loss": 3.7237, "step": 2339 }, { "epoch": 0.11, "grad_norm": 0.6211607456207275, "learning_rate": 0.0005978393470695534, "loss": 3.5363, "step": 2340 }, { "epoch": 0.11, "grad_norm": 0.5573241710662842, "learning_rate": 0.0005978375021853459, "loss": 3.9038, "step": 2341 }, { "epoch": 0.11, "grad_norm": 0.6002277731895447, "learning_rate": 0.0005978356565166917, "loss": 3.7787, "step": 2342 }, { "epoch": 0.11, "grad_norm": 0.6228034496307373, "learning_rate": 0.0005978338100635958, "loss": 3.5716, "step": 2343 }, { "epoch": 0.11, "grad_norm": 0.5964246988296509, "learning_rate": 0.0005978319628260629, "loss": 3.5264, "step": 2344 }, { "epoch": 0.11, "grad_norm": 0.6132970452308655, "learning_rate": 0.0005978301148040978, "loss": 3.6864, "step": 2345 }, { "epoch": 0.11, "grad_norm": 0.6156790852546692, "learning_rate": 0.0005978282659977058, "loss": 3.5236, "step": 2346 }, { "epoch": 0.12, "grad_norm": 0.5750837326049805, "learning_rate": 0.0005978264164068912, "loss": 3.7859, "step": 2347 }, { "epoch": 0.12, "grad_norm": 0.6026135087013245, "learning_rate": 0.0005978245660316592, "loss": 3.6397, "step": 2348 }, { "epoch": 0.12, "grad_norm": 0.5814940929412842, "learning_rate": 0.0005978227148720146, "loss": 3.8765, "step": 2349 }, { "epoch": 0.12, "grad_norm": 0.6358333826065063, "learning_rate": 0.0005978208629279623, "loss": 3.6133, "step": 2350 }, { "epoch": 0.12, "grad_norm": 0.6395803689956665, "learning_rate": 0.0005978190101995071, "loss": 3.7048, "step": 2351 }, { "epoch": 0.12, "grad_norm": 0.5972474813461304, "learning_rate": 0.000597817156686654, "loss": 3.7461, "step": 2352 }, { "epoch": 0.12, "grad_norm": 0.5645350217819214, "learning_rate": 0.0005978153023894079, "loss": 3.6762, "step": 2353 }, { "epoch": 0.12, "grad_norm": 0.6386092305183411, "learning_rate": 0.0005978134473077736, "loss": 3.5883, "step": 2354 }, { "epoch": 0.12, "grad_norm": 0.578127384185791, "learning_rate": 0.0005978115914417559, "loss": 3.7026, "step": 2355 }, { "epoch": 0.12, "grad_norm": 0.5714936256408691, "learning_rate": 0.0005978097347913598, "loss": 3.6753, "step": 2356 }, { "epoch": 0.12, "grad_norm": 0.6167704463005066, "learning_rate": 0.0005978078773565903, "loss": 3.7017, "step": 2357 }, { "epoch": 0.12, "grad_norm": 0.6012189984321594, "learning_rate": 0.000597806019137452, "loss": 3.6736, "step": 2358 }, { "epoch": 0.12, "grad_norm": 0.6270298361778259, "learning_rate": 0.00059780416013395, "loss": 3.5279, "step": 2359 }, { "epoch": 0.12, "grad_norm": 0.6216274499893188, "learning_rate": 0.0005978023003460893, "loss": 3.4673, "step": 2360 }, { "epoch": 0.12, "grad_norm": 0.6030069589614868, "learning_rate": 0.0005978004397738744, "loss": 3.6831, "step": 2361 }, { "epoch": 0.12, "grad_norm": 0.5585080981254578, "learning_rate": 0.0005977985784173107, "loss": 3.7218, "step": 2362 }, { "epoch": 0.12, "grad_norm": 0.6055890321731567, "learning_rate": 0.0005977967162764027, "loss": 3.8694, "step": 2363 }, { "epoch": 0.12, "grad_norm": 0.58843994140625, "learning_rate": 0.0005977948533511555, "loss": 3.5457, "step": 2364 }, { "epoch": 0.12, "grad_norm": 0.6120928525924683, "learning_rate": 0.0005977929896415741, "loss": 3.7752, "step": 2365 }, { "epoch": 0.12, "grad_norm": 0.6189221739768982, "learning_rate": 0.000597791125147663, "loss": 3.6428, "step": 2366 }, { "epoch": 0.12, "grad_norm": 0.6048324108123779, "learning_rate": 0.0005977892598694276, "loss": 3.5921, "step": 2367 }, { "epoch": 0.12, "grad_norm": 0.5761814117431641, "learning_rate": 0.0005977873938068725, "loss": 3.5946, "step": 2368 }, { "epoch": 0.12, "grad_norm": 0.6603354811668396, "learning_rate": 0.0005977855269600027, "loss": 3.6483, "step": 2369 }, { "epoch": 0.12, "grad_norm": 0.5804461240768433, "learning_rate": 0.0005977836593288233, "loss": 3.5695, "step": 2370 }, { "epoch": 0.12, "grad_norm": 0.5955277681350708, "learning_rate": 0.0005977817909133389, "loss": 3.6131, "step": 2371 }, { "epoch": 0.12, "grad_norm": 0.6651073098182678, "learning_rate": 0.0005977799217135547, "loss": 3.7153, "step": 2372 }, { "epoch": 0.12, "grad_norm": 0.5861976146697998, "learning_rate": 0.0005977780517294754, "loss": 3.7365, "step": 2373 }, { "epoch": 0.12, "grad_norm": 0.5709624290466309, "learning_rate": 0.000597776180961106, "loss": 3.6133, "step": 2374 }, { "epoch": 0.12, "grad_norm": 0.6309444904327393, "learning_rate": 0.0005977743094084514, "loss": 3.681, "step": 2375 }, { "epoch": 0.12, "grad_norm": 0.5742753148078918, "learning_rate": 0.0005977724370715167, "loss": 3.7932, "step": 2376 }, { "epoch": 0.12, "grad_norm": 0.5696350336074829, "learning_rate": 0.0005977705639503067, "loss": 3.4466, "step": 2377 }, { "epoch": 0.12, "grad_norm": 0.6605788469314575, "learning_rate": 0.0005977686900448262, "loss": 3.7482, "step": 2378 }, { "epoch": 0.12, "grad_norm": 0.5416749715805054, "learning_rate": 0.0005977668153550804, "loss": 3.7162, "step": 2379 }, { "epoch": 0.12, "grad_norm": 0.6018760204315186, "learning_rate": 0.0005977649398810741, "loss": 3.5618, "step": 2380 }, { "epoch": 0.12, "grad_norm": 0.6329287886619568, "learning_rate": 0.0005977630636228123, "loss": 3.4429, "step": 2381 }, { "epoch": 0.12, "grad_norm": 0.6521981358528137, "learning_rate": 0.0005977611865802999, "loss": 3.7848, "step": 2382 }, { "epoch": 0.12, "grad_norm": 0.585591733455658, "learning_rate": 0.0005977593087535417, "loss": 3.6851, "step": 2383 }, { "epoch": 0.12, "grad_norm": 0.5864081382751465, "learning_rate": 0.0005977574301425429, "loss": 3.6888, "step": 2384 }, { "epoch": 0.12, "grad_norm": 0.6254026889801025, "learning_rate": 0.0005977555507473083, "loss": 3.7411, "step": 2385 }, { "epoch": 0.12, "grad_norm": 0.6233348250389099, "learning_rate": 0.0005977536705678429, "loss": 3.9431, "step": 2386 }, { "epoch": 0.12, "grad_norm": 0.583896279335022, "learning_rate": 0.0005977517896041516, "loss": 3.7437, "step": 2387 }, { "epoch": 0.12, "grad_norm": 0.5994085073471069, "learning_rate": 0.0005977499078562394, "loss": 3.7577, "step": 2388 }, { "epoch": 0.12, "grad_norm": 0.6116136908531189, "learning_rate": 0.0005977480253241112, "loss": 3.527, "step": 2389 }, { "epoch": 0.12, "grad_norm": 0.6351898908615112, "learning_rate": 0.0005977461420077721, "loss": 3.627, "step": 2390 }, { "epoch": 0.12, "grad_norm": 0.6024461984634399, "learning_rate": 0.0005977442579072269, "loss": 3.7272, "step": 2391 }, { "epoch": 0.12, "grad_norm": 0.608726978302002, "learning_rate": 0.0005977423730224807, "loss": 3.7063, "step": 2392 }, { "epoch": 0.12, "grad_norm": 0.7524157166481018, "learning_rate": 0.0005977404873535383, "loss": 3.5555, "step": 2393 }, { "epoch": 0.12, "grad_norm": 0.6578160524368286, "learning_rate": 0.0005977386009004048, "loss": 3.7474, "step": 2394 }, { "epoch": 0.12, "grad_norm": 0.6224360466003418, "learning_rate": 0.0005977367136630852, "loss": 3.6999, "step": 2395 }, { "epoch": 0.12, "grad_norm": 0.6000403165817261, "learning_rate": 0.0005977348256415843, "loss": 3.6645, "step": 2396 }, { "epoch": 0.12, "grad_norm": 0.5816790461540222, "learning_rate": 0.0005977329368359072, "loss": 3.4327, "step": 2397 }, { "epoch": 0.12, "grad_norm": 0.6410402059555054, "learning_rate": 0.0005977310472460588, "loss": 3.4193, "step": 2398 }, { "epoch": 0.12, "grad_norm": 0.607941210269928, "learning_rate": 0.0005977291568720442, "loss": 3.5642, "step": 2399 }, { "epoch": 0.12, "grad_norm": 0.6291766166687012, "learning_rate": 0.0005977272657138683, "loss": 3.5734, "step": 2400 }, { "epoch": 0.12, "grad_norm": 0.6615599989891052, "learning_rate": 0.000597725373771536, "loss": 3.7853, "step": 2401 }, { "epoch": 0.12, "grad_norm": 0.5869166254997253, "learning_rate": 0.0005977234810450524, "loss": 3.6767, "step": 2402 }, { "epoch": 0.12, "grad_norm": 0.647530198097229, "learning_rate": 0.0005977215875344224, "loss": 3.3796, "step": 2403 }, { "epoch": 0.12, "grad_norm": 0.5894125699996948, "learning_rate": 0.000597719693239651, "loss": 3.3358, "step": 2404 }, { "epoch": 0.12, "grad_norm": 0.5925880074501038, "learning_rate": 0.0005977177981607434, "loss": 3.5175, "step": 2405 }, { "epoch": 0.12, "grad_norm": 0.6181460022926331, "learning_rate": 0.0005977159022977043, "loss": 3.7263, "step": 2406 }, { "epoch": 0.12, "grad_norm": 0.5811930298805237, "learning_rate": 0.0005977140056505389, "loss": 3.8623, "step": 2407 }, { "epoch": 0.12, "grad_norm": 0.6250795125961304, "learning_rate": 0.000597712108219252, "loss": 3.7118, "step": 2408 }, { "epoch": 0.12, "grad_norm": 0.5673991441726685, "learning_rate": 0.0005977102100038488, "loss": 3.7807, "step": 2409 }, { "epoch": 0.12, "grad_norm": 0.60816890001297, "learning_rate": 0.0005977083110043341, "loss": 3.5926, "step": 2410 }, { "epoch": 0.12, "grad_norm": 0.5878978371620178, "learning_rate": 0.0005977064112207131, "loss": 3.7433, "step": 2411 }, { "epoch": 0.12, "grad_norm": 0.5771352648735046, "learning_rate": 0.0005977045106529906, "loss": 3.7076, "step": 2412 }, { "epoch": 0.12, "grad_norm": 0.5752608776092529, "learning_rate": 0.0005977026093011719, "loss": 3.6284, "step": 2413 }, { "epoch": 0.12, "grad_norm": 0.5990986227989197, "learning_rate": 0.0005977007071652616, "loss": 3.7138, "step": 2414 }, { "epoch": 0.12, "grad_norm": 0.5749576091766357, "learning_rate": 0.0005976988042452651, "loss": 3.5255, "step": 2415 }, { "epoch": 0.12, "grad_norm": 0.5812985897064209, "learning_rate": 0.0005976969005411871, "loss": 3.8895, "step": 2416 }, { "epoch": 0.12, "grad_norm": 0.5838669538497925, "learning_rate": 0.0005976949960530329, "loss": 3.604, "step": 2417 }, { "epoch": 0.12, "grad_norm": 0.5880911946296692, "learning_rate": 0.0005976930907808073, "loss": 3.5379, "step": 2418 }, { "epoch": 0.12, "grad_norm": 0.6260443925857544, "learning_rate": 0.0005976911847245153, "loss": 3.5423, "step": 2419 }, { "epoch": 0.12, "grad_norm": 0.6164513826370239, "learning_rate": 0.0005976892778841622, "loss": 3.9167, "step": 2420 }, { "epoch": 0.12, "grad_norm": 0.6131654977798462, "learning_rate": 0.0005976873702597527, "loss": 3.7101, "step": 2421 }, { "epoch": 0.12, "grad_norm": 0.6063238978385925, "learning_rate": 0.000597685461851292, "loss": 3.6194, "step": 2422 }, { "epoch": 0.12, "grad_norm": 0.5958883762359619, "learning_rate": 0.0005976835526587851, "loss": 3.8709, "step": 2423 }, { "epoch": 0.12, "grad_norm": 0.623001217842102, "learning_rate": 0.0005976816426822369, "loss": 3.7071, "step": 2424 }, { "epoch": 0.12, "grad_norm": 0.5648288726806641, "learning_rate": 0.0005976797319216527, "loss": 3.8756, "step": 2425 }, { "epoch": 0.12, "grad_norm": 0.5784841179847717, "learning_rate": 0.0005976778203770373, "loss": 3.9232, "step": 2426 }, { "epoch": 0.12, "grad_norm": 0.6105582118034363, "learning_rate": 0.0005976759080483958, "loss": 3.7534, "step": 2427 }, { "epoch": 0.12, "grad_norm": 0.5793781280517578, "learning_rate": 0.0005976739949357333, "loss": 3.7578, "step": 2428 }, { "epoch": 0.12, "grad_norm": 0.5981903672218323, "learning_rate": 0.0005976720810390547, "loss": 3.6772, "step": 2429 }, { "epoch": 0.12, "grad_norm": 0.585236132144928, "learning_rate": 0.0005976701663583652, "loss": 3.5096, "step": 2430 }, { "epoch": 0.12, "grad_norm": 0.6657644510269165, "learning_rate": 0.0005976682508936699, "loss": 3.6456, "step": 2431 }, { "epoch": 0.12, "grad_norm": 0.5640720129013062, "learning_rate": 0.0005976663346449735, "loss": 3.8045, "step": 2432 }, { "epoch": 0.12, "grad_norm": 0.6080378293991089, "learning_rate": 0.0005976644176122813, "loss": 3.732, "step": 2433 }, { "epoch": 0.12, "grad_norm": 0.5977432727813721, "learning_rate": 0.0005976624997955984, "loss": 3.7909, "step": 2434 }, { "epoch": 0.12, "grad_norm": 0.6109653115272522, "learning_rate": 0.0005976605811949296, "loss": 3.6774, "step": 2435 }, { "epoch": 0.12, "grad_norm": 0.6155614256858826, "learning_rate": 0.0005976586618102802, "loss": 3.7383, "step": 2436 }, { "epoch": 0.12, "grad_norm": 0.5923848748207092, "learning_rate": 0.0005976567416416552, "loss": 3.74, "step": 2437 }, { "epoch": 0.12, "grad_norm": 0.6466518640518188, "learning_rate": 0.0005976548206890597, "loss": 3.8091, "step": 2438 }, { "epoch": 0.12, "grad_norm": 0.6154281497001648, "learning_rate": 0.0005976528989524985, "loss": 3.5068, "step": 2439 }, { "epoch": 0.12, "grad_norm": 0.5379983186721802, "learning_rate": 0.000597650976431977, "loss": 3.8493, "step": 2440 }, { "epoch": 0.12, "grad_norm": 0.5665551424026489, "learning_rate": 0.0005976490531275, "loss": 3.6155, "step": 2441 }, { "epoch": 0.12, "grad_norm": 0.5697283744812012, "learning_rate": 0.0005976471290390727, "loss": 3.8031, "step": 2442 }, { "epoch": 0.12, "grad_norm": 0.6457461714744568, "learning_rate": 0.0005976452041667002, "loss": 3.6258, "step": 2443 }, { "epoch": 0.12, "grad_norm": 0.5855603218078613, "learning_rate": 0.0005976432785103875, "loss": 3.6725, "step": 2444 }, { "epoch": 0.12, "grad_norm": 0.642481803894043, "learning_rate": 0.0005976413520701397, "loss": 3.5866, "step": 2445 }, { "epoch": 0.12, "grad_norm": 0.5984083414077759, "learning_rate": 0.0005976394248459619, "loss": 3.6047, "step": 2446 }, { "epoch": 0.12, "grad_norm": 0.6179513931274414, "learning_rate": 0.0005976374968378591, "loss": 3.7578, "step": 2447 }, { "epoch": 0.12, "grad_norm": 0.5873163938522339, "learning_rate": 0.0005976355680458364, "loss": 3.4911, "step": 2448 }, { "epoch": 0.12, "grad_norm": 0.6037702560424805, "learning_rate": 0.0005976336384698989, "loss": 3.556, "step": 2449 }, { "epoch": 0.12, "grad_norm": 0.6157534718513489, "learning_rate": 0.0005976317081100517, "loss": 3.5765, "step": 2450 }, { "epoch": 0.12, "grad_norm": 0.5667474865913391, "learning_rate": 0.0005976297769662997, "loss": 3.6285, "step": 2451 }, { "epoch": 0.12, "grad_norm": 0.5748934149742126, "learning_rate": 0.0005976278450386483, "loss": 3.6365, "step": 2452 }, { "epoch": 0.12, "grad_norm": 0.5666347146034241, "learning_rate": 0.0005976259123271025, "loss": 3.5536, "step": 2453 }, { "epoch": 0.12, "grad_norm": 0.6123989224433899, "learning_rate": 0.0005976239788316671, "loss": 3.6866, "step": 2454 }, { "epoch": 0.12, "grad_norm": 0.5808203220367432, "learning_rate": 0.0005976220445523476, "loss": 3.6159, "step": 2455 }, { "epoch": 0.12, "grad_norm": 0.6113956570625305, "learning_rate": 0.0005976201094891489, "loss": 3.6135, "step": 2456 }, { "epoch": 0.12, "grad_norm": 0.5960624814033508, "learning_rate": 0.0005976181736420762, "loss": 3.7485, "step": 2457 }, { "epoch": 0.12, "grad_norm": 0.6104657649993896, "learning_rate": 0.0005976162370111343, "loss": 3.8493, "step": 2458 }, { "epoch": 0.12, "grad_norm": 0.5828893780708313, "learning_rate": 0.0005976142995963285, "loss": 3.6571, "step": 2459 }, { "epoch": 0.12, "grad_norm": 0.6023302674293518, "learning_rate": 0.000597612361397664, "loss": 3.8401, "step": 2460 }, { "epoch": 0.12, "grad_norm": 0.5271471738815308, "learning_rate": 0.000597610422415146, "loss": 3.8007, "step": 2461 }, { "epoch": 0.12, "grad_norm": 0.6546827554702759, "learning_rate": 0.0005976084826487792, "loss": 3.4448, "step": 2462 }, { "epoch": 0.12, "grad_norm": 0.610604465007782, "learning_rate": 0.0005976065420985689, "loss": 3.6248, "step": 2463 }, { "epoch": 0.12, "grad_norm": 0.5978257060050964, "learning_rate": 0.0005976046007645203, "loss": 3.578, "step": 2464 }, { "epoch": 0.12, "grad_norm": 0.599612832069397, "learning_rate": 0.0005976026586466386, "loss": 3.7923, "step": 2465 }, { "epoch": 0.12, "grad_norm": 0.6239912509918213, "learning_rate": 0.0005976007157449286, "loss": 3.4729, "step": 2466 }, { "epoch": 0.12, "grad_norm": 0.6227158904075623, "learning_rate": 0.0005975987720593957, "loss": 3.6351, "step": 2467 }, { "epoch": 0.12, "grad_norm": 0.5865234732627869, "learning_rate": 0.0005975968275900448, "loss": 3.6103, "step": 2468 }, { "epoch": 0.12, "grad_norm": 0.5831554532051086, "learning_rate": 0.0005975948823368812, "loss": 3.758, "step": 2469 }, { "epoch": 0.12, "grad_norm": 0.6110011339187622, "learning_rate": 0.0005975929362999099, "loss": 3.6188, "step": 2470 }, { "epoch": 0.12, "grad_norm": 0.6303837895393372, "learning_rate": 0.000597590989479136, "loss": 3.7157, "step": 2471 }, { "epoch": 0.12, "grad_norm": 0.602101743221283, "learning_rate": 0.000597589041874565, "loss": 3.6808, "step": 2472 }, { "epoch": 0.12, "grad_norm": 0.6086721420288086, "learning_rate": 0.0005975870934862015, "loss": 3.7579, "step": 2473 }, { "epoch": 0.12, "grad_norm": 0.5447176694869995, "learning_rate": 0.0005975851443140509, "loss": 3.8872, "step": 2474 }, { "epoch": 0.12, "grad_norm": 0.6065565347671509, "learning_rate": 0.0005975831943581184, "loss": 3.7509, "step": 2475 }, { "epoch": 0.12, "grad_norm": 0.582368791103363, "learning_rate": 0.0005975812436184089, "loss": 3.6838, "step": 2476 }, { "epoch": 0.12, "grad_norm": 0.5834957957267761, "learning_rate": 0.0005975792920949278, "loss": 3.7677, "step": 2477 }, { "epoch": 0.12, "grad_norm": 0.5708243250846863, "learning_rate": 0.00059757733978768, "loss": 3.6435, "step": 2478 }, { "epoch": 0.12, "grad_norm": 0.5927959084510803, "learning_rate": 0.0005975753866966708, "loss": 3.8947, "step": 2479 }, { "epoch": 0.12, "grad_norm": 0.5893408060073853, "learning_rate": 0.0005975734328219054, "loss": 3.4956, "step": 2480 }, { "epoch": 0.12, "grad_norm": 0.5960896611213684, "learning_rate": 0.0005975714781633887, "loss": 3.7718, "step": 2481 }, { "epoch": 0.12, "grad_norm": 0.6134823560714722, "learning_rate": 0.0005975695227211261, "loss": 3.9268, "step": 2482 }, { "epoch": 0.12, "grad_norm": 0.5897719264030457, "learning_rate": 0.0005975675664951225, "loss": 3.7412, "step": 2483 }, { "epoch": 0.12, "grad_norm": 0.5636825561523438, "learning_rate": 0.0005975656094853834, "loss": 3.6555, "step": 2484 }, { "epoch": 0.12, "grad_norm": 0.6075690984725952, "learning_rate": 0.0005975636516919136, "loss": 3.4279, "step": 2485 }, { "epoch": 0.12, "grad_norm": 0.6178411245346069, "learning_rate": 0.0005975616931147185, "loss": 3.6293, "step": 2486 }, { "epoch": 0.12, "grad_norm": 0.6706565022468567, "learning_rate": 0.0005975597337538032, "loss": 3.2851, "step": 2487 }, { "epoch": 0.12, "grad_norm": 0.5771182775497437, "learning_rate": 0.0005975577736091727, "loss": 3.4126, "step": 2488 }, { "epoch": 0.12, "grad_norm": 0.5679176449775696, "learning_rate": 0.0005975558126808323, "loss": 3.7656, "step": 2489 }, { "epoch": 0.12, "grad_norm": 0.5970303416252136, "learning_rate": 0.0005975538509687872, "loss": 3.5579, "step": 2490 }, { "epoch": 0.12, "grad_norm": 0.557059645652771, "learning_rate": 0.0005975518884730425, "loss": 3.693, "step": 2491 }, { "epoch": 0.12, "grad_norm": 0.713829755783081, "learning_rate": 0.0005975499251936034, "loss": 3.7477, "step": 2492 }, { "epoch": 0.12, "grad_norm": 0.5846167206764221, "learning_rate": 0.000597547961130475, "loss": 3.8181, "step": 2493 }, { "epoch": 0.12, "grad_norm": 0.5585032105445862, "learning_rate": 0.0005975459962836627, "loss": 3.7385, "step": 2494 }, { "epoch": 0.12, "grad_norm": 0.6127833724021912, "learning_rate": 0.0005975440306531715, "loss": 3.6375, "step": 2495 }, { "epoch": 0.12, "grad_norm": 0.6165521144866943, "learning_rate": 0.0005975420642390064, "loss": 3.6615, "step": 2496 }, { "epoch": 0.12, "grad_norm": 0.5974895358085632, "learning_rate": 0.0005975400970411729, "loss": 3.6218, "step": 2497 }, { "epoch": 0.12, "grad_norm": 0.6264663338661194, "learning_rate": 0.0005975381290596759, "loss": 3.5796, "step": 2498 }, { "epoch": 0.12, "grad_norm": 0.5844413638114929, "learning_rate": 0.0005975361602945209, "loss": 3.5147, "step": 2499 }, { "epoch": 0.12, "grad_norm": 0.6054651141166687, "learning_rate": 0.0005975341907457129, "loss": 3.6625, "step": 2500 }, { "epoch": 0.12, "grad_norm": 0.6386021971702576, "learning_rate": 0.0005975322204132571, "loss": 3.6716, "step": 2501 }, { "epoch": 0.12, "grad_norm": 0.6154173612594604, "learning_rate": 0.0005975302492971586, "loss": 3.6322, "step": 2502 }, { "epoch": 0.12, "grad_norm": 0.5977914929389954, "learning_rate": 0.0005975282773974228, "loss": 3.5976, "step": 2503 }, { "epoch": 0.12, "grad_norm": 0.6002846360206604, "learning_rate": 0.0005975263047140547, "loss": 3.5668, "step": 2504 }, { "epoch": 0.12, "grad_norm": 0.5692142844200134, "learning_rate": 0.0005975243312470596, "loss": 3.6982, "step": 2505 }, { "epoch": 0.12, "grad_norm": 0.6204922795295715, "learning_rate": 0.0005975223569964427, "loss": 3.4332, "step": 2506 }, { "epoch": 0.12, "grad_norm": 0.6070844531059265, "learning_rate": 0.0005975203819622091, "loss": 3.5652, "step": 2507 }, { "epoch": 0.12, "grad_norm": 0.6402035355567932, "learning_rate": 0.0005975184061443641, "loss": 3.9418, "step": 2508 }, { "epoch": 0.12, "grad_norm": 0.5677720308303833, "learning_rate": 0.0005975164295429129, "loss": 3.5175, "step": 2509 }, { "epoch": 0.12, "grad_norm": 0.6889002323150635, "learning_rate": 0.0005975144521578607, "loss": 3.6094, "step": 2510 }, { "epoch": 0.12, "grad_norm": 0.6181936264038086, "learning_rate": 0.0005975124739892126, "loss": 3.6502, "step": 2511 }, { "epoch": 0.12, "grad_norm": 0.6118708848953247, "learning_rate": 0.000597510495036974, "loss": 3.7122, "step": 2512 }, { "epoch": 0.12, "grad_norm": 0.5670859217643738, "learning_rate": 0.0005975085153011499, "loss": 3.677, "step": 2513 }, { "epoch": 0.12, "grad_norm": 0.5545445680618286, "learning_rate": 0.0005975065347817457, "loss": 3.8839, "step": 2514 }, { "epoch": 0.12, "grad_norm": 0.6169227361679077, "learning_rate": 0.0005975045534787666, "loss": 3.7485, "step": 2515 }, { "epoch": 0.12, "grad_norm": 0.6185595393180847, "learning_rate": 0.0005975025713922177, "loss": 3.763, "step": 2516 }, { "epoch": 0.12, "grad_norm": 0.5830788612365723, "learning_rate": 0.0005975005885221044, "loss": 3.66, "step": 2517 }, { "epoch": 0.12, "grad_norm": 0.5783771276473999, "learning_rate": 0.0005974986048684315, "loss": 3.6327, "step": 2518 }, { "epoch": 0.12, "grad_norm": 0.6497078537940979, "learning_rate": 0.0005974966204312048, "loss": 3.4842, "step": 2519 }, { "epoch": 0.12, "grad_norm": 0.5857980251312256, "learning_rate": 0.0005974946352104292, "loss": 3.6608, "step": 2520 }, { "epoch": 0.12, "grad_norm": 0.5564964413642883, "learning_rate": 0.00059749264920611, "loss": 3.7486, "step": 2521 }, { "epoch": 0.12, "grad_norm": 0.6064032912254333, "learning_rate": 0.0005974906624182523, "loss": 3.7437, "step": 2522 }, { "epoch": 0.12, "grad_norm": 0.5508198142051697, "learning_rate": 0.0005974886748468616, "loss": 3.4906, "step": 2523 }, { "epoch": 0.12, "grad_norm": 0.588590145111084, "learning_rate": 0.0005974866864919429, "loss": 3.5258, "step": 2524 }, { "epoch": 0.12, "grad_norm": 0.6488778591156006, "learning_rate": 0.0005974846973535015, "loss": 3.5921, "step": 2525 }, { "epoch": 0.12, "grad_norm": 0.634125292301178, "learning_rate": 0.0005974827074315427, "loss": 3.5026, "step": 2526 }, { "epoch": 0.12, "grad_norm": 0.6007183790206909, "learning_rate": 0.0005974807167260717, "loss": 3.5792, "step": 2527 }, { "epoch": 0.12, "grad_norm": 0.5504261255264282, "learning_rate": 0.0005974787252370937, "loss": 3.6759, "step": 2528 }, { "epoch": 0.12, "grad_norm": 0.6158027052879333, "learning_rate": 0.000597476732964614, "loss": 3.564, "step": 2529 }, { "epoch": 0.12, "grad_norm": 0.6696975827217102, "learning_rate": 0.0005974747399086379, "loss": 3.4829, "step": 2530 }, { "epoch": 0.12, "grad_norm": 0.6032002568244934, "learning_rate": 0.0005974727460691706, "loss": 3.7942, "step": 2531 }, { "epoch": 0.12, "grad_norm": 0.5709253549575806, "learning_rate": 0.0005974707514462173, "loss": 3.8751, "step": 2532 }, { "epoch": 0.12, "grad_norm": 0.6123468279838562, "learning_rate": 0.0005974687560397832, "loss": 3.5406, "step": 2533 }, { "epoch": 0.12, "grad_norm": 0.7552351355552673, "learning_rate": 0.0005974667598498737, "loss": 3.6732, "step": 2534 }, { "epoch": 0.12, "grad_norm": 0.6883453130722046, "learning_rate": 0.0005974647628764941, "loss": 3.5411, "step": 2535 }, { "epoch": 0.12, "grad_norm": 0.614341676235199, "learning_rate": 0.0005974627651196495, "loss": 3.5317, "step": 2536 }, { "epoch": 0.12, "grad_norm": 0.5992317795753479, "learning_rate": 0.0005974607665793452, "loss": 3.694, "step": 2537 }, { "epoch": 0.12, "grad_norm": 0.5962361693382263, "learning_rate": 0.0005974587672555865, "loss": 3.5415, "step": 2538 }, { "epoch": 0.12, "grad_norm": 0.6169871687889099, "learning_rate": 0.0005974567671483786, "loss": 3.5371, "step": 2539 }, { "epoch": 0.12, "grad_norm": 0.575014591217041, "learning_rate": 0.0005974547662577268, "loss": 3.692, "step": 2540 }, { "epoch": 0.12, "grad_norm": 0.66556316614151, "learning_rate": 0.0005974527645836365, "loss": 3.4761, "step": 2541 }, { "epoch": 0.12, "grad_norm": 0.5992640852928162, "learning_rate": 0.0005974507621261129, "loss": 3.7176, "step": 2542 }, { "epoch": 0.12, "grad_norm": 0.6112333536148071, "learning_rate": 0.0005974487588851611, "loss": 3.4594, "step": 2543 }, { "epoch": 0.12, "grad_norm": 0.6187512874603271, "learning_rate": 0.0005974467548607866, "loss": 3.6286, "step": 2544 }, { "epoch": 0.12, "grad_norm": 0.5970472693443298, "learning_rate": 0.0005974447500529946, "loss": 3.5783, "step": 2545 }, { "epoch": 0.12, "grad_norm": 0.622491180896759, "learning_rate": 0.0005974427444617903, "loss": 3.935, "step": 2546 }, { "epoch": 0.12, "grad_norm": 0.5881618857383728, "learning_rate": 0.000597440738087179, "loss": 3.7599, "step": 2547 }, { "epoch": 0.12, "grad_norm": 0.5989762544631958, "learning_rate": 0.0005974387309291662, "loss": 3.5662, "step": 2548 }, { "epoch": 0.12, "grad_norm": 0.5770282745361328, "learning_rate": 0.000597436722987757, "loss": 3.5526, "step": 2549 }, { "epoch": 0.12, "grad_norm": 0.5764257907867432, "learning_rate": 0.0005974347142629567, "loss": 3.6311, "step": 2550 }, { "epoch": 0.13, "grad_norm": 0.5779116749763489, "learning_rate": 0.0005974327047547706, "loss": 3.5682, "step": 2551 }, { "epoch": 0.13, "grad_norm": 0.5993980169296265, "learning_rate": 0.000597430694463204, "loss": 3.9816, "step": 2552 }, { "epoch": 0.13, "grad_norm": 0.6707353591918945, "learning_rate": 0.0005974286833882621, "loss": 3.7115, "step": 2553 }, { "epoch": 0.13, "grad_norm": 0.5597850680351257, "learning_rate": 0.0005974266715299504, "loss": 3.5479, "step": 2554 }, { "epoch": 0.13, "grad_norm": 0.5796374678611755, "learning_rate": 0.0005974246588882741, "loss": 3.6176, "step": 2555 }, { "epoch": 0.13, "grad_norm": 0.6326395869255066, "learning_rate": 0.0005974226454632384, "loss": 3.728, "step": 2556 }, { "epoch": 0.13, "grad_norm": 0.5709221363067627, "learning_rate": 0.0005974206312548487, "loss": 3.7504, "step": 2557 }, { "epoch": 0.13, "grad_norm": 0.6459200978279114, "learning_rate": 0.0005974186162631104, "loss": 3.8128, "step": 2558 }, { "epoch": 0.13, "grad_norm": 0.6132066249847412, "learning_rate": 0.0005974166004880286, "loss": 3.6669, "step": 2559 }, { "epoch": 0.13, "grad_norm": 0.593331515789032, "learning_rate": 0.0005974145839296088, "loss": 3.7232, "step": 2560 }, { "epoch": 0.13, "grad_norm": 0.5764754414558411, "learning_rate": 0.0005974125665878561, "loss": 3.4763, "step": 2561 }, { "epoch": 0.13, "grad_norm": 0.5679061412811279, "learning_rate": 0.000597410548462776, "loss": 3.3199, "step": 2562 }, { "epoch": 0.13, "grad_norm": 0.6383253931999207, "learning_rate": 0.0005974085295543739, "loss": 3.6122, "step": 2563 }, { "epoch": 0.13, "grad_norm": 0.6798160672187805, "learning_rate": 0.0005974065098626548, "loss": 3.8301, "step": 2564 }, { "epoch": 0.13, "grad_norm": 0.5811006426811218, "learning_rate": 0.0005974044893876241, "loss": 3.7606, "step": 2565 }, { "epoch": 0.13, "grad_norm": 0.6266860961914062, "learning_rate": 0.0005974024681292874, "loss": 3.6762, "step": 2566 }, { "epoch": 0.13, "grad_norm": 0.5711492896080017, "learning_rate": 0.0005974004460876498, "loss": 3.6717, "step": 2567 }, { "epoch": 0.13, "grad_norm": 0.5752375721931458, "learning_rate": 0.0005973984232627166, "loss": 3.5786, "step": 2568 }, { "epoch": 0.13, "grad_norm": 0.6051990985870361, "learning_rate": 0.0005973963996544932, "loss": 3.7287, "step": 2569 }, { "epoch": 0.13, "grad_norm": 0.6400178074836731, "learning_rate": 0.000597394375262985, "loss": 3.4631, "step": 2570 }, { "epoch": 0.13, "grad_norm": 0.6259292364120483, "learning_rate": 0.0005973923500881971, "loss": 3.5266, "step": 2571 }, { "epoch": 0.13, "grad_norm": 0.5960250496864319, "learning_rate": 0.0005973903241301352, "loss": 3.5943, "step": 2572 }, { "epoch": 0.13, "grad_norm": 0.6018271446228027, "learning_rate": 0.0005973882973888042, "loss": 3.6866, "step": 2573 }, { "epoch": 0.13, "grad_norm": 0.5785349011421204, "learning_rate": 0.0005973862698642097, "loss": 3.7706, "step": 2574 }, { "epoch": 0.13, "grad_norm": 0.7196159362792969, "learning_rate": 0.0005973842415563572, "loss": 3.6392, "step": 2575 }, { "epoch": 0.13, "grad_norm": 0.6078731417655945, "learning_rate": 0.0005973822124652516, "loss": 3.8852, "step": 2576 }, { "epoch": 0.13, "grad_norm": 0.6295037865638733, "learning_rate": 0.0005973801825908987, "loss": 3.5333, "step": 2577 }, { "epoch": 0.13, "grad_norm": 0.5880255103111267, "learning_rate": 0.0005973781519333034, "loss": 3.6906, "step": 2578 }, { "epoch": 0.13, "grad_norm": 0.5796219110488892, "learning_rate": 0.0005973761204924714, "loss": 3.6351, "step": 2579 }, { "epoch": 0.13, "grad_norm": 0.5962153077125549, "learning_rate": 0.0005973740882684079, "loss": 3.7197, "step": 2580 }, { "epoch": 0.13, "grad_norm": 0.5779426097869873, "learning_rate": 0.0005973720552611183, "loss": 3.7243, "step": 2581 }, { "epoch": 0.13, "grad_norm": 0.6200467348098755, "learning_rate": 0.000597370021470608, "loss": 3.7223, "step": 2582 }, { "epoch": 0.13, "grad_norm": 0.5809329152107239, "learning_rate": 0.0005973679868968822, "loss": 3.4354, "step": 2583 }, { "epoch": 0.13, "grad_norm": 0.617591142654419, "learning_rate": 0.0005973659515399463, "loss": 3.667, "step": 2584 }, { "epoch": 0.13, "grad_norm": 0.6338459253311157, "learning_rate": 0.0005973639153998058, "loss": 3.6246, "step": 2585 }, { "epoch": 0.13, "grad_norm": 0.5794420838356018, "learning_rate": 0.000597361878476466, "loss": 3.6043, "step": 2586 }, { "epoch": 0.13, "grad_norm": 0.6019072532653809, "learning_rate": 0.0005973598407699322, "loss": 3.6836, "step": 2587 }, { "epoch": 0.13, "grad_norm": 0.5721585154533386, "learning_rate": 0.0005973578022802098, "loss": 3.7401, "step": 2588 }, { "epoch": 0.13, "grad_norm": 0.6371548771858215, "learning_rate": 0.0005973557630073042, "loss": 3.5781, "step": 2589 }, { "epoch": 0.13, "grad_norm": 0.5836586356163025, "learning_rate": 0.0005973537229512208, "loss": 3.7384, "step": 2590 }, { "epoch": 0.13, "grad_norm": 0.5855005383491516, "learning_rate": 0.0005973516821119649, "loss": 3.6999, "step": 2591 }, { "epoch": 0.13, "grad_norm": 0.5802505016326904, "learning_rate": 0.0005973496404895419, "loss": 3.7894, "step": 2592 }, { "epoch": 0.13, "grad_norm": 0.6217300295829773, "learning_rate": 0.0005973475980839571, "loss": 3.7012, "step": 2593 }, { "epoch": 0.13, "grad_norm": 0.657211422920227, "learning_rate": 0.0005973455548952159, "loss": 3.6203, "step": 2594 }, { "epoch": 0.13, "grad_norm": 0.5941809415817261, "learning_rate": 0.0005973435109233238, "loss": 3.7269, "step": 2595 }, { "epoch": 0.13, "grad_norm": 0.5752348303794861, "learning_rate": 0.0005973414661682862, "loss": 3.7513, "step": 2596 }, { "epoch": 0.13, "grad_norm": 0.59751957654953, "learning_rate": 0.0005973394206301083, "loss": 3.5195, "step": 2597 }, { "epoch": 0.13, "grad_norm": 0.5890262722969055, "learning_rate": 0.0005973373743087956, "loss": 3.4186, "step": 2598 }, { "epoch": 0.13, "grad_norm": 0.5875232815742493, "learning_rate": 0.0005973353272043535, "loss": 3.5819, "step": 2599 }, { "epoch": 0.13, "grad_norm": 0.5776680111885071, "learning_rate": 0.0005973332793167874, "loss": 3.6035, "step": 2600 }, { "epoch": 0.13, "grad_norm": 0.652600884437561, "learning_rate": 0.0005973312306461027, "loss": 3.5718, "step": 2601 }, { "epoch": 0.13, "grad_norm": 0.6072568893432617, "learning_rate": 0.0005973291811923046, "loss": 3.6524, "step": 2602 }, { "epoch": 0.13, "grad_norm": 0.5894418954849243, "learning_rate": 0.0005973271309553988, "loss": 3.7459, "step": 2603 }, { "epoch": 0.13, "grad_norm": 0.6451753377914429, "learning_rate": 0.0005973250799353906, "loss": 3.6723, "step": 2604 }, { "epoch": 0.13, "grad_norm": 0.5642520785331726, "learning_rate": 0.0005973230281322852, "loss": 3.6376, "step": 2605 }, { "epoch": 0.13, "grad_norm": 0.6151884198188782, "learning_rate": 0.0005973209755460883, "loss": 3.6376, "step": 2606 }, { "epoch": 0.13, "grad_norm": 0.5698564052581787, "learning_rate": 0.0005973189221768051, "loss": 3.5028, "step": 2607 }, { "epoch": 0.13, "grad_norm": 0.5662623047828674, "learning_rate": 0.000597316868024441, "loss": 3.7416, "step": 2608 }, { "epoch": 0.13, "grad_norm": 0.6105310916900635, "learning_rate": 0.0005973148130890017, "loss": 3.506, "step": 2609 }, { "epoch": 0.13, "grad_norm": 0.5945753455162048, "learning_rate": 0.0005973127573704923, "loss": 3.7686, "step": 2610 }, { "epoch": 0.13, "grad_norm": 0.5580887794494629, "learning_rate": 0.0005973107008689182, "loss": 3.4169, "step": 2611 }, { "epoch": 0.13, "grad_norm": 0.5982559323310852, "learning_rate": 0.0005973086435842851, "loss": 3.8084, "step": 2612 }, { "epoch": 0.13, "grad_norm": 0.545141875743866, "learning_rate": 0.0005973065855165981, "loss": 3.7542, "step": 2613 }, { "epoch": 0.13, "grad_norm": 0.5577916502952576, "learning_rate": 0.0005973045266658629, "loss": 3.7296, "step": 2614 }, { "epoch": 0.13, "grad_norm": 0.6226997971534729, "learning_rate": 0.0005973024670320848, "loss": 3.61, "step": 2615 }, { "epoch": 0.13, "grad_norm": 0.5903077721595764, "learning_rate": 0.0005973004066152692, "loss": 3.5793, "step": 2616 }, { "epoch": 0.13, "grad_norm": 0.6483430862426758, "learning_rate": 0.0005972983454154215, "loss": 3.7537, "step": 2617 }, { "epoch": 0.13, "grad_norm": 0.5781821608543396, "learning_rate": 0.0005972962834325472, "loss": 3.7462, "step": 2618 }, { "epoch": 0.13, "grad_norm": 0.6237555742263794, "learning_rate": 0.0005972942206666517, "loss": 3.709, "step": 2619 }, { "epoch": 0.13, "grad_norm": 0.538802444934845, "learning_rate": 0.0005972921571177405, "loss": 3.8094, "step": 2620 }, { "epoch": 0.13, "grad_norm": 0.6104321479797363, "learning_rate": 0.0005972900927858188, "loss": 3.5672, "step": 2621 }, { "epoch": 0.13, "grad_norm": 0.5900411605834961, "learning_rate": 0.0005972880276708925, "loss": 3.6144, "step": 2622 }, { "epoch": 0.13, "grad_norm": 0.6186203360557556, "learning_rate": 0.0005972859617729665, "loss": 3.6618, "step": 2623 }, { "epoch": 0.13, "grad_norm": 0.5977591872215271, "learning_rate": 0.0005972838950920465, "loss": 3.5381, "step": 2624 }, { "epoch": 0.13, "grad_norm": 0.5627738237380981, "learning_rate": 0.000597281827628138, "loss": 3.5466, "step": 2625 }, { "epoch": 0.13, "grad_norm": 0.5746119022369385, "learning_rate": 0.0005972797593812464, "loss": 3.4968, "step": 2626 }, { "epoch": 0.13, "grad_norm": 0.6021314859390259, "learning_rate": 0.0005972776903513771, "loss": 3.6362, "step": 2627 }, { "epoch": 0.13, "grad_norm": 0.5764266848564148, "learning_rate": 0.0005972756205385356, "loss": 3.5602, "step": 2628 }, { "epoch": 0.13, "grad_norm": 0.560896098613739, "learning_rate": 0.0005972735499427273, "loss": 3.9325, "step": 2629 }, { "epoch": 0.13, "grad_norm": 0.5999066829681396, "learning_rate": 0.0005972714785639576, "loss": 3.5947, "step": 2630 }, { "epoch": 0.13, "grad_norm": 0.6318387389183044, "learning_rate": 0.0005972694064022322, "loss": 3.4485, "step": 2631 }, { "epoch": 0.13, "grad_norm": 0.6000919938087463, "learning_rate": 0.0005972673334575562, "loss": 3.6263, "step": 2632 }, { "epoch": 0.13, "grad_norm": 0.6186161041259766, "learning_rate": 0.0005972652597299354, "loss": 3.4619, "step": 2633 }, { "epoch": 0.13, "grad_norm": 0.6438592076301575, "learning_rate": 0.0005972631852193751, "loss": 3.8048, "step": 2634 }, { "epoch": 0.13, "grad_norm": 0.6455599069595337, "learning_rate": 0.0005972611099258808, "loss": 3.8894, "step": 2635 }, { "epoch": 0.13, "grad_norm": 0.6064257025718689, "learning_rate": 0.0005972590338494579, "loss": 3.6067, "step": 2636 }, { "epoch": 0.13, "grad_norm": 0.5977962613105774, "learning_rate": 0.0005972569569901119, "loss": 3.8438, "step": 2637 }, { "epoch": 0.13, "grad_norm": 0.5873711109161377, "learning_rate": 0.0005972548793478483, "loss": 3.4951, "step": 2638 }, { "epoch": 0.13, "grad_norm": 0.5406238436698914, "learning_rate": 0.0005972528009226725, "loss": 3.5039, "step": 2639 }, { "epoch": 0.13, "grad_norm": 0.6180803179740906, "learning_rate": 0.0005972507217145901, "loss": 3.816, "step": 2640 }, { "epoch": 0.13, "grad_norm": 0.5730354189872742, "learning_rate": 0.0005972486417236065, "loss": 3.5936, "step": 2641 }, { "epoch": 0.13, "grad_norm": 0.5667976140975952, "learning_rate": 0.0005972465609497273, "loss": 3.7131, "step": 2642 }, { "epoch": 0.13, "grad_norm": 0.5629318356513977, "learning_rate": 0.0005972444793929577, "loss": 3.7713, "step": 2643 }, { "epoch": 0.13, "grad_norm": 0.6278749704360962, "learning_rate": 0.0005972423970533033, "loss": 3.6363, "step": 2644 }, { "epoch": 0.13, "grad_norm": 0.637045681476593, "learning_rate": 0.0005972403139307698, "loss": 3.6883, "step": 2645 }, { "epoch": 0.13, "grad_norm": 0.5682446956634521, "learning_rate": 0.0005972382300253625, "loss": 3.7383, "step": 2646 }, { "epoch": 0.13, "grad_norm": 0.5715309977531433, "learning_rate": 0.0005972361453370868, "loss": 3.4358, "step": 2647 }, { "epoch": 0.13, "grad_norm": 0.5956022143363953, "learning_rate": 0.0005972340598659483, "loss": 3.5463, "step": 2648 }, { "epoch": 0.13, "grad_norm": 0.5942005515098572, "learning_rate": 0.0005972319736119526, "loss": 3.667, "step": 2649 }, { "epoch": 0.13, "grad_norm": 0.6336799263954163, "learning_rate": 0.000597229886575105, "loss": 3.5427, "step": 2650 }, { "epoch": 0.13, "grad_norm": 0.5908926129341125, "learning_rate": 0.0005972277987554111, "loss": 3.6074, "step": 2651 }, { "epoch": 0.13, "grad_norm": 0.6374375224113464, "learning_rate": 0.0005972257101528763, "loss": 3.4208, "step": 2652 }, { "epoch": 0.13, "grad_norm": 0.5974894762039185, "learning_rate": 0.0005972236207675063, "loss": 3.6029, "step": 2653 }, { "epoch": 0.13, "grad_norm": 0.5751729011535645, "learning_rate": 0.0005972215305993065, "loss": 3.6742, "step": 2654 }, { "epoch": 0.13, "grad_norm": 0.5705915689468384, "learning_rate": 0.0005972194396482823, "loss": 3.8229, "step": 2655 }, { "epoch": 0.13, "grad_norm": 0.6375955939292908, "learning_rate": 0.0005972173479144394, "loss": 3.5791, "step": 2656 }, { "epoch": 0.13, "grad_norm": 0.6054398417472839, "learning_rate": 0.0005972152553977831, "loss": 3.4664, "step": 2657 }, { "epoch": 0.13, "grad_norm": 0.5961582064628601, "learning_rate": 0.0005972131620983191, "loss": 3.7149, "step": 2658 }, { "epoch": 0.13, "grad_norm": 0.5579370856285095, "learning_rate": 0.0005972110680160528, "loss": 3.7323, "step": 2659 }, { "epoch": 0.13, "grad_norm": 0.6265712380409241, "learning_rate": 0.0005972089731509897, "loss": 3.494, "step": 2660 }, { "epoch": 0.13, "grad_norm": 0.5627157688140869, "learning_rate": 0.0005972068775031353, "loss": 3.8952, "step": 2661 }, { "epoch": 0.13, "grad_norm": 0.6081935167312622, "learning_rate": 0.0005972047810724954, "loss": 3.6359, "step": 2662 }, { "epoch": 0.13, "grad_norm": 0.6013399958610535, "learning_rate": 0.0005972026838590753, "loss": 3.7558, "step": 2663 }, { "epoch": 0.13, "grad_norm": 0.7497528791427612, "learning_rate": 0.0005972005858628803, "loss": 3.5532, "step": 2664 }, { "epoch": 0.13, "grad_norm": 0.5855658650398254, "learning_rate": 0.0005971984870839162, "loss": 3.8398, "step": 2665 }, { "epoch": 0.13, "grad_norm": 0.5891215205192566, "learning_rate": 0.0005971963875221886, "loss": 3.7379, "step": 2666 }, { "epoch": 0.13, "grad_norm": 0.5891420841217041, "learning_rate": 0.000597194287177703, "loss": 3.7002, "step": 2667 }, { "epoch": 0.13, "grad_norm": 0.5906747579574585, "learning_rate": 0.0005971921860504646, "loss": 3.6601, "step": 2668 }, { "epoch": 0.13, "grad_norm": 0.5525054931640625, "learning_rate": 0.0005971900841404794, "loss": 4.0302, "step": 2669 }, { "epoch": 0.13, "grad_norm": 0.6058871746063232, "learning_rate": 0.0005971879814477525, "loss": 3.795, "step": 2670 }, { "epoch": 0.13, "grad_norm": 0.5821624398231506, "learning_rate": 0.0005971858779722898, "loss": 3.6976, "step": 2671 }, { "epoch": 0.13, "grad_norm": 0.5979516506195068, "learning_rate": 0.0005971837737140966, "loss": 3.5227, "step": 2672 }, { "epoch": 0.13, "grad_norm": 0.558814287185669, "learning_rate": 0.0005971816686731786, "loss": 3.5519, "step": 2673 }, { "epoch": 0.13, "grad_norm": 0.5667821168899536, "learning_rate": 0.0005971795628495413, "loss": 3.7112, "step": 2674 }, { "epoch": 0.13, "grad_norm": 0.5700370669364929, "learning_rate": 0.0005971774562431901, "loss": 3.528, "step": 2675 }, { "epoch": 0.13, "grad_norm": 0.5918428301811218, "learning_rate": 0.0005971753488541307, "loss": 3.6761, "step": 2676 }, { "epoch": 0.13, "grad_norm": 0.6164225935935974, "learning_rate": 0.0005971732406823688, "loss": 3.6645, "step": 2677 }, { "epoch": 0.13, "grad_norm": 0.5859614610671997, "learning_rate": 0.0005971711317279096, "loss": 3.698, "step": 2678 }, { "epoch": 0.13, "grad_norm": 0.5908759236335754, "learning_rate": 0.0005971690219907589, "loss": 3.7241, "step": 2679 }, { "epoch": 0.13, "grad_norm": 0.6182529330253601, "learning_rate": 0.0005971669114709222, "loss": 3.5972, "step": 2680 }, { "epoch": 0.13, "grad_norm": 0.576896071434021, "learning_rate": 0.000597164800168405, "loss": 3.679, "step": 2681 }, { "epoch": 0.13, "grad_norm": 0.5724910497665405, "learning_rate": 0.000597162688083213, "loss": 3.7296, "step": 2682 }, { "epoch": 0.13, "grad_norm": 0.6576551198959351, "learning_rate": 0.0005971605752153516, "loss": 3.6643, "step": 2683 }, { "epoch": 0.13, "grad_norm": 0.6664922833442688, "learning_rate": 0.0005971584615648264, "loss": 3.5636, "step": 2684 }, { "epoch": 0.13, "grad_norm": 0.639898955821991, "learning_rate": 0.000597156347131643, "loss": 3.6449, "step": 2685 }, { "epoch": 0.13, "grad_norm": 0.680196225643158, "learning_rate": 0.000597154231915807, "loss": 3.4298, "step": 2686 }, { "epoch": 0.13, "grad_norm": 0.6258304119110107, "learning_rate": 0.0005971521159173239, "loss": 3.8802, "step": 2687 }, { "epoch": 0.13, "grad_norm": 0.5518138408660889, "learning_rate": 0.0005971499991361994, "loss": 3.5749, "step": 2688 }, { "epoch": 0.13, "grad_norm": 0.5616171956062317, "learning_rate": 0.000597147881572439, "loss": 3.7806, "step": 2689 }, { "epoch": 0.13, "grad_norm": 0.5623195171356201, "learning_rate": 0.0005971457632260482, "loss": 3.4841, "step": 2690 }, { "epoch": 0.13, "grad_norm": 0.6076975464820862, "learning_rate": 0.0005971436440970326, "loss": 3.7146, "step": 2691 }, { "epoch": 0.13, "grad_norm": 0.5725387334823608, "learning_rate": 0.0005971415241853979, "loss": 3.622, "step": 2692 }, { "epoch": 0.13, "grad_norm": 0.5814027786254883, "learning_rate": 0.0005971394034911497, "loss": 3.5529, "step": 2693 }, { "epoch": 0.13, "grad_norm": 0.5468875169754028, "learning_rate": 0.0005971372820142934, "loss": 3.4851, "step": 2694 }, { "epoch": 0.13, "grad_norm": 0.6255273222923279, "learning_rate": 0.0005971351597548346, "loss": 3.641, "step": 2695 }, { "epoch": 0.13, "grad_norm": 0.6269606351852417, "learning_rate": 0.000597133036712779, "loss": 3.7258, "step": 2696 }, { "epoch": 0.13, "grad_norm": 0.6093981266021729, "learning_rate": 0.0005971309128881324, "loss": 3.3623, "step": 2697 }, { "epoch": 0.13, "grad_norm": 0.5752262473106384, "learning_rate": 0.0005971287882808999, "loss": 3.6917, "step": 2698 }, { "epoch": 0.13, "grad_norm": 0.6046422123908997, "learning_rate": 0.0005971266628910874, "loss": 3.7567, "step": 2699 }, { "epoch": 0.13, "grad_norm": 0.6624324917793274, "learning_rate": 0.0005971245367187005, "loss": 3.6769, "step": 2700 }, { "epoch": 0.13, "grad_norm": 0.5971564054489136, "learning_rate": 0.0005971224097637447, "loss": 3.7973, "step": 2701 }, { "epoch": 0.13, "grad_norm": 0.5755377411842346, "learning_rate": 0.0005971202820262258, "loss": 3.5409, "step": 2702 }, { "epoch": 0.13, "grad_norm": 0.6195607781410217, "learning_rate": 0.0005971181535061491, "loss": 3.6003, "step": 2703 }, { "epoch": 0.13, "grad_norm": 0.6000617742538452, "learning_rate": 0.0005971160242035204, "loss": 3.6304, "step": 2704 }, { "epoch": 0.13, "grad_norm": 0.5995765328407288, "learning_rate": 0.0005971138941183454, "loss": 3.4578, "step": 2705 }, { "epoch": 0.13, "grad_norm": 0.5783835053443909, "learning_rate": 0.0005971117632506295, "loss": 3.609, "step": 2706 }, { "epoch": 0.13, "grad_norm": 0.5949788093566895, "learning_rate": 0.0005971096316003783, "loss": 3.7868, "step": 2707 }, { "epoch": 0.13, "grad_norm": 0.5998488068580627, "learning_rate": 0.0005971074991675976, "loss": 3.599, "step": 2708 }, { "epoch": 0.13, "grad_norm": 0.5909786820411682, "learning_rate": 0.0005971053659522929, "loss": 3.5773, "step": 2709 }, { "epoch": 0.13, "grad_norm": 0.5821101665496826, "learning_rate": 0.0005971032319544698, "loss": 3.5233, "step": 2710 }, { "epoch": 0.13, "grad_norm": 0.587090253829956, "learning_rate": 0.000597101097174134, "loss": 3.7505, "step": 2711 }, { "epoch": 0.13, "grad_norm": 0.622183620929718, "learning_rate": 0.0005970989616112913, "loss": 3.3969, "step": 2712 }, { "epoch": 0.13, "grad_norm": 0.6129473447799683, "learning_rate": 0.0005970968252659468, "loss": 3.6862, "step": 2713 }, { "epoch": 0.13, "grad_norm": 0.659932017326355, "learning_rate": 0.0005970946881381066, "loss": 3.5053, "step": 2714 }, { "epoch": 0.13, "grad_norm": 0.6454612016677856, "learning_rate": 0.0005970925502277761, "loss": 3.4388, "step": 2715 }, { "epoch": 0.13, "grad_norm": 0.7548837065696716, "learning_rate": 0.0005970904115349612, "loss": 3.51, "step": 2716 }, { "epoch": 0.13, "grad_norm": 0.6142184138298035, "learning_rate": 0.000597088272059667, "loss": 3.6681, "step": 2717 }, { "epoch": 0.13, "grad_norm": 0.7135729193687439, "learning_rate": 0.0005970861318018997, "loss": 3.6722, "step": 2718 }, { "epoch": 0.13, "grad_norm": 0.6415162682533264, "learning_rate": 0.0005970839907616646, "loss": 3.5688, "step": 2719 }, { "epoch": 0.13, "grad_norm": 0.5897626280784607, "learning_rate": 0.0005970818489389675, "loss": 3.8595, "step": 2720 }, { "epoch": 0.13, "grad_norm": 0.6751065850257874, "learning_rate": 0.000597079706333814, "loss": 3.435, "step": 2721 }, { "epoch": 0.13, "grad_norm": 0.6195039749145508, "learning_rate": 0.0005970775629462096, "loss": 3.6634, "step": 2722 }, { "epoch": 0.13, "grad_norm": 0.6431128978729248, "learning_rate": 0.0005970754187761602, "loss": 3.6171, "step": 2723 }, { "epoch": 0.13, "grad_norm": 0.5993673205375671, "learning_rate": 0.0005970732738236713, "loss": 3.5535, "step": 2724 }, { "epoch": 0.13, "grad_norm": 0.6270466446876526, "learning_rate": 0.0005970711280887485, "loss": 3.5313, "step": 2725 }, { "epoch": 0.13, "grad_norm": 0.5823235511779785, "learning_rate": 0.0005970689815713976, "loss": 3.6458, "step": 2726 }, { "epoch": 0.13, "grad_norm": 0.5822427272796631, "learning_rate": 0.0005970668342716242, "loss": 3.8305, "step": 2727 }, { "epoch": 0.13, "grad_norm": 0.5477180480957031, "learning_rate": 0.0005970646861894338, "loss": 3.6481, "step": 2728 }, { "epoch": 0.13, "grad_norm": 0.578444242477417, "learning_rate": 0.0005970625373248323, "loss": 3.5803, "step": 2729 }, { "epoch": 0.13, "grad_norm": 0.6037389636039734, "learning_rate": 0.0005970603876778252, "loss": 3.5236, "step": 2730 }, { "epoch": 0.13, "grad_norm": 0.6162851452827454, "learning_rate": 0.0005970582372484182, "loss": 3.6997, "step": 2731 }, { "epoch": 0.13, "grad_norm": 0.6191104650497437, "learning_rate": 0.0005970560860366171, "loss": 3.6575, "step": 2732 }, { "epoch": 0.13, "grad_norm": 0.6048828959465027, "learning_rate": 0.0005970539340424273, "loss": 3.3794, "step": 2733 }, { "epoch": 0.13, "grad_norm": 0.6663912534713745, "learning_rate": 0.0005970517812658547, "loss": 3.3146, "step": 2734 }, { "epoch": 0.13, "grad_norm": 0.5750559568405151, "learning_rate": 0.0005970496277069048, "loss": 3.5452, "step": 2735 }, { "epoch": 0.13, "grad_norm": 0.5543981790542603, "learning_rate": 0.0005970474733655833, "loss": 3.8258, "step": 2736 }, { "epoch": 0.13, "grad_norm": 0.5796617865562439, "learning_rate": 0.0005970453182418961, "loss": 3.7764, "step": 2737 }, { "epoch": 0.13, "grad_norm": 0.5608425140380859, "learning_rate": 0.0005970431623358485, "loss": 3.4656, "step": 2738 }, { "epoch": 0.13, "grad_norm": 0.6683704853057861, "learning_rate": 0.0005970410056474466, "loss": 3.7797, "step": 2739 }, { "epoch": 0.13, "grad_norm": 0.6158978343009949, "learning_rate": 0.0005970388481766957, "loss": 3.7061, "step": 2740 }, { "epoch": 0.13, "grad_norm": 0.5848861932754517, "learning_rate": 0.0005970366899236015, "loss": 3.5359, "step": 2741 }, { "epoch": 0.13, "grad_norm": 0.5714642405509949, "learning_rate": 0.0005970345308881701, "loss": 3.4968, "step": 2742 }, { "epoch": 0.13, "grad_norm": 0.5503971576690674, "learning_rate": 0.0005970323710704068, "loss": 3.5294, "step": 2743 }, { "epoch": 0.13, "grad_norm": 0.6631685495376587, "learning_rate": 0.0005970302104703174, "loss": 3.4137, "step": 2744 }, { "epoch": 0.13, "grad_norm": 0.5520222187042236, "learning_rate": 0.0005970280490879076, "loss": 3.5508, "step": 2745 }, { "epoch": 0.13, "grad_norm": 0.5935842990875244, "learning_rate": 0.0005970258869231831, "loss": 3.6917, "step": 2746 }, { "epoch": 0.13, "grad_norm": 0.6259281635284424, "learning_rate": 0.0005970237239761496, "loss": 3.5549, "step": 2747 }, { "epoch": 0.13, "grad_norm": 0.6221252679824829, "learning_rate": 0.0005970215602468127, "loss": 3.5163, "step": 2748 }, { "epoch": 0.13, "grad_norm": 0.572911262512207, "learning_rate": 0.0005970193957351782, "loss": 3.6548, "step": 2749 }, { "epoch": 0.13, "grad_norm": 0.5745850801467896, "learning_rate": 0.0005970172304412517, "loss": 3.9044, "step": 2750 }, { "epoch": 0.13, "grad_norm": 0.5390734672546387, "learning_rate": 0.0005970150643650391, "loss": 3.635, "step": 2751 }, { "epoch": 0.13, "grad_norm": 0.6086849570274353, "learning_rate": 0.0005970128975065459, "loss": 3.527, "step": 2752 }, { "epoch": 0.13, "grad_norm": 0.5953769683837891, "learning_rate": 0.000597010729865778, "loss": 3.5134, "step": 2753 }, { "epoch": 0.13, "grad_norm": 0.5471598505973816, "learning_rate": 0.0005970085614427409, "loss": 3.4587, "step": 2754 }, { "epoch": 0.14, "grad_norm": 0.5561854243278503, "learning_rate": 0.0005970063922374404, "loss": 3.6646, "step": 2755 }, { "epoch": 0.14, "grad_norm": 0.5902842283248901, "learning_rate": 0.0005970042222498823, "loss": 3.4375, "step": 2756 }, { "epoch": 0.14, "grad_norm": 0.6033221483230591, "learning_rate": 0.0005970020514800722, "loss": 3.4188, "step": 2757 }, { "epoch": 0.14, "grad_norm": 0.5598551034927368, "learning_rate": 0.0005969998799280158, "loss": 3.5559, "step": 2758 }, { "epoch": 0.14, "grad_norm": 0.5618605017662048, "learning_rate": 0.0005969977075937189, "loss": 3.778, "step": 2759 }, { "epoch": 0.14, "grad_norm": 0.6076387763023376, "learning_rate": 0.0005969955344771872, "loss": 3.5922, "step": 2760 }, { "epoch": 0.14, "grad_norm": 0.5863784551620483, "learning_rate": 0.0005969933605784265, "loss": 3.5362, "step": 2761 }, { "epoch": 0.14, "grad_norm": 0.5936189293861389, "learning_rate": 0.0005969911858974424, "loss": 3.6277, "step": 2762 }, { "epoch": 0.14, "grad_norm": 0.5283812880516052, "learning_rate": 0.0005969890104342406, "loss": 3.4249, "step": 2763 }, { "epoch": 0.14, "grad_norm": 0.6723504066467285, "learning_rate": 0.000596986834188827, "loss": 3.583, "step": 2764 }, { "epoch": 0.14, "grad_norm": 0.5366487503051758, "learning_rate": 0.0005969846571612072, "loss": 3.6369, "step": 2765 }, { "epoch": 0.14, "grad_norm": 0.6037803292274475, "learning_rate": 0.0005969824793513871, "loss": 3.4389, "step": 2766 }, { "epoch": 0.14, "grad_norm": 0.6620571613311768, "learning_rate": 0.000596980300759372, "loss": 3.6024, "step": 2767 }, { "epoch": 0.14, "grad_norm": 0.5733856558799744, "learning_rate": 0.0005969781213851682, "loss": 3.4694, "step": 2768 }, { "epoch": 0.14, "grad_norm": 0.5712093710899353, "learning_rate": 0.000596975941228781, "loss": 3.638, "step": 2769 }, { "epoch": 0.14, "grad_norm": 0.6224941611289978, "learning_rate": 0.0005969737602902165, "loss": 3.4212, "step": 2770 }, { "epoch": 0.14, "grad_norm": 0.6067854166030884, "learning_rate": 0.0005969715785694802, "loss": 3.6626, "step": 2771 }, { "epoch": 0.14, "grad_norm": 0.5889960527420044, "learning_rate": 0.0005969693960665779, "loss": 3.641, "step": 2772 }, { "epoch": 0.14, "grad_norm": 0.6073998212814331, "learning_rate": 0.0005969672127815153, "loss": 3.3504, "step": 2773 }, { "epoch": 0.14, "grad_norm": 0.5916741490364075, "learning_rate": 0.0005969650287142983, "loss": 3.6483, "step": 2774 }, { "epoch": 0.14, "grad_norm": 0.5662715435028076, "learning_rate": 0.0005969628438649326, "loss": 3.3069, "step": 2775 }, { "epoch": 0.14, "grad_norm": 0.5828217267990112, "learning_rate": 0.0005969606582334238, "loss": 3.6519, "step": 2776 }, { "epoch": 0.14, "grad_norm": 0.5853772759437561, "learning_rate": 0.0005969584718197779, "loss": 3.5239, "step": 2777 }, { "epoch": 0.14, "grad_norm": 0.5956276655197144, "learning_rate": 0.0005969562846240005, "loss": 3.6071, "step": 2778 }, { "epoch": 0.14, "grad_norm": 0.5762497782707214, "learning_rate": 0.0005969540966460974, "loss": 3.738, "step": 2779 }, { "epoch": 0.14, "grad_norm": 0.5765044093132019, "learning_rate": 0.0005969519078860742, "loss": 3.575, "step": 2780 }, { "epoch": 0.14, "grad_norm": 0.7366907596588135, "learning_rate": 0.0005969497183439371, "loss": 3.4483, "step": 2781 }, { "epoch": 0.14, "grad_norm": 0.5674998164176941, "learning_rate": 0.0005969475280196915, "loss": 3.6753, "step": 2782 }, { "epoch": 0.14, "grad_norm": 0.5822417140007019, "learning_rate": 0.0005969453369133431, "loss": 3.5733, "step": 2783 }, { "epoch": 0.14, "grad_norm": 0.5587081909179688, "learning_rate": 0.000596943145024898, "loss": 3.5532, "step": 2784 }, { "epoch": 0.14, "grad_norm": 0.5745068192481995, "learning_rate": 0.0005969409523543618, "loss": 3.5765, "step": 2785 }, { "epoch": 0.14, "grad_norm": 0.5741488933563232, "learning_rate": 0.0005969387589017402, "loss": 3.6993, "step": 2786 }, { "epoch": 0.14, "grad_norm": 0.5863240957260132, "learning_rate": 0.0005969365646670391, "loss": 3.6545, "step": 2787 }, { "epoch": 0.14, "grad_norm": 0.6137883067131042, "learning_rate": 0.0005969343696502642, "loss": 3.6242, "step": 2788 }, { "epoch": 0.14, "grad_norm": 0.593112587928772, "learning_rate": 0.0005969321738514213, "loss": 3.5533, "step": 2789 }, { "epoch": 0.14, "grad_norm": 0.5809875130653381, "learning_rate": 0.0005969299772705163, "loss": 3.8685, "step": 2790 }, { "epoch": 0.14, "grad_norm": 0.5593841671943665, "learning_rate": 0.0005969277799075548, "loss": 3.6496, "step": 2791 }, { "epoch": 0.14, "grad_norm": 0.5753694176673889, "learning_rate": 0.0005969255817625428, "loss": 3.4793, "step": 2792 }, { "epoch": 0.14, "grad_norm": 0.5818933248519897, "learning_rate": 0.0005969233828354857, "loss": 3.7245, "step": 2793 }, { "epoch": 0.14, "grad_norm": 0.5473413467407227, "learning_rate": 0.0005969211831263899, "loss": 3.3412, "step": 2794 }, { "epoch": 0.14, "grad_norm": 0.5579399466514587, "learning_rate": 0.0005969189826352605, "loss": 3.6375, "step": 2795 }, { "epoch": 0.14, "grad_norm": 0.6152604222297668, "learning_rate": 0.0005969167813621038, "loss": 3.5726, "step": 2796 }, { "epoch": 0.14, "grad_norm": 0.6010841727256775, "learning_rate": 0.0005969145793069255, "loss": 3.6222, "step": 2797 }, { "epoch": 0.14, "grad_norm": 0.5537547469139099, "learning_rate": 0.0005969123764697313, "loss": 3.7334, "step": 2798 }, { "epoch": 0.14, "grad_norm": 0.5809540748596191, "learning_rate": 0.0005969101728505269, "loss": 3.7945, "step": 2799 }, { "epoch": 0.14, "grad_norm": 0.5480958223342896, "learning_rate": 0.0005969079684493184, "loss": 3.6835, "step": 2800 }, { "epoch": 0.14, "grad_norm": 0.5858062505722046, "learning_rate": 0.0005969057632661115, "loss": 3.5088, "step": 2801 }, { "epoch": 0.14, "grad_norm": 0.6234527230262756, "learning_rate": 0.0005969035573009119, "loss": 3.5747, "step": 2802 }, { "epoch": 0.14, "grad_norm": 0.5944379568099976, "learning_rate": 0.0005969013505537254, "loss": 3.6146, "step": 2803 }, { "epoch": 0.14, "grad_norm": 0.544714093208313, "learning_rate": 0.0005968991430245579, "loss": 3.5981, "step": 2804 }, { "epoch": 0.14, "grad_norm": 0.5830975770950317, "learning_rate": 0.0005968969347134154, "loss": 3.5539, "step": 2805 }, { "epoch": 0.14, "grad_norm": 0.5846774578094482, "learning_rate": 0.0005968947256203033, "loss": 3.4609, "step": 2806 }, { "epoch": 0.14, "grad_norm": 0.5505405068397522, "learning_rate": 0.0005968925157452276, "loss": 3.5572, "step": 2807 }, { "epoch": 0.14, "grad_norm": 0.584732174873352, "learning_rate": 0.0005968903050881943, "loss": 3.5472, "step": 2808 }, { "epoch": 0.14, "grad_norm": 0.6293777227401733, "learning_rate": 0.0005968880936492089, "loss": 3.5822, "step": 2809 }, { "epoch": 0.14, "grad_norm": 0.6841658353805542, "learning_rate": 0.0005968858814282776, "loss": 3.4138, "step": 2810 }, { "epoch": 0.14, "grad_norm": 0.6176837086677551, "learning_rate": 0.0005968836684254058, "loss": 3.4622, "step": 2811 }, { "epoch": 0.14, "grad_norm": 0.5423951148986816, "learning_rate": 0.0005968814546405998, "loss": 3.7367, "step": 2812 }, { "epoch": 0.14, "grad_norm": 0.5896667242050171, "learning_rate": 0.000596879240073865, "loss": 3.8234, "step": 2813 }, { "epoch": 0.14, "grad_norm": 0.6115859150886536, "learning_rate": 0.0005968770247252074, "loss": 3.5844, "step": 2814 }, { "epoch": 0.14, "grad_norm": 0.5899752378463745, "learning_rate": 0.000596874808594633, "loss": 3.6039, "step": 2815 }, { "epoch": 0.14, "grad_norm": 0.5383777022361755, "learning_rate": 0.0005968725916821474, "loss": 3.5118, "step": 2816 }, { "epoch": 0.14, "grad_norm": 0.5764883756637573, "learning_rate": 0.0005968703739877566, "loss": 3.7468, "step": 2817 }, { "epoch": 0.14, "grad_norm": 0.927655816078186, "learning_rate": 0.0005968681555114663, "loss": 3.5418, "step": 2818 }, { "epoch": 0.14, "grad_norm": 0.5765186548233032, "learning_rate": 0.0005968659362532824, "loss": 3.6529, "step": 2819 }, { "epoch": 0.14, "grad_norm": 0.5878188610076904, "learning_rate": 0.0005968637162132108, "loss": 3.6059, "step": 2820 }, { "epoch": 0.14, "grad_norm": 0.637100875377655, "learning_rate": 0.0005968614953912574, "loss": 3.6037, "step": 2821 }, { "epoch": 0.14, "grad_norm": 0.6326918601989746, "learning_rate": 0.0005968592737874278, "loss": 3.695, "step": 2822 }, { "epoch": 0.14, "grad_norm": 0.5842844843864441, "learning_rate": 0.000596857051401728, "loss": 3.7953, "step": 2823 }, { "epoch": 0.14, "grad_norm": 0.6204371452331543, "learning_rate": 0.0005968548282341639, "loss": 3.5894, "step": 2824 }, { "epoch": 0.14, "grad_norm": 0.556685209274292, "learning_rate": 0.0005968526042847413, "loss": 3.606, "step": 2825 }, { "epoch": 0.14, "grad_norm": 0.5710826516151428, "learning_rate": 0.0005968503795534661, "loss": 3.5026, "step": 2826 }, { "epoch": 0.14, "grad_norm": 0.569648802280426, "learning_rate": 0.0005968481540403441, "loss": 3.6895, "step": 2827 }, { "epoch": 0.14, "grad_norm": 0.585290253162384, "learning_rate": 0.0005968459277453813, "loss": 3.5407, "step": 2828 }, { "epoch": 0.14, "grad_norm": 0.5872741937637329, "learning_rate": 0.0005968437006685834, "loss": 3.7963, "step": 2829 }, { "epoch": 0.14, "grad_norm": 0.5374106168746948, "learning_rate": 0.0005968414728099563, "loss": 3.6502, "step": 2830 }, { "epoch": 0.14, "grad_norm": 0.5915255546569824, "learning_rate": 0.0005968392441695058, "loss": 3.4514, "step": 2831 }, { "epoch": 0.14, "grad_norm": 0.6637536287307739, "learning_rate": 0.000596837014747238, "loss": 3.5065, "step": 2832 }, { "epoch": 0.14, "grad_norm": 0.5768295526504517, "learning_rate": 0.0005968347845431586, "loss": 3.5267, "step": 2833 }, { "epoch": 0.14, "grad_norm": 0.595833957195282, "learning_rate": 0.0005968325535572734, "loss": 3.5768, "step": 2834 }, { "epoch": 0.14, "grad_norm": 0.5429056286811829, "learning_rate": 0.0005968303217895885, "loss": 3.569, "step": 2835 }, { "epoch": 0.14, "grad_norm": 0.6460859775543213, "learning_rate": 0.0005968280892401095, "loss": 3.503, "step": 2836 }, { "epoch": 0.14, "grad_norm": 0.6241177916526794, "learning_rate": 0.0005968258559088427, "loss": 3.6161, "step": 2837 }, { "epoch": 0.14, "grad_norm": 0.5874194502830505, "learning_rate": 0.0005968236217957935, "loss": 3.6232, "step": 2838 }, { "epoch": 0.14, "grad_norm": 0.5746603608131409, "learning_rate": 0.0005968213869009681, "loss": 3.5134, "step": 2839 }, { "epoch": 0.14, "grad_norm": 0.5978888273239136, "learning_rate": 0.0005968191512243723, "loss": 3.8172, "step": 2840 }, { "epoch": 0.14, "grad_norm": 0.6809991002082825, "learning_rate": 0.0005968169147660119, "loss": 3.5159, "step": 2841 }, { "epoch": 0.14, "grad_norm": 0.6131600141525269, "learning_rate": 0.0005968146775258929, "loss": 3.5549, "step": 2842 }, { "epoch": 0.14, "grad_norm": 0.5964472889900208, "learning_rate": 0.0005968124395040212, "loss": 3.4967, "step": 2843 }, { "epoch": 0.14, "grad_norm": 0.599494457244873, "learning_rate": 0.0005968102007004026, "loss": 3.5785, "step": 2844 }, { "epoch": 0.14, "grad_norm": 0.5741751790046692, "learning_rate": 0.0005968079611150431, "loss": 3.7153, "step": 2845 }, { "epoch": 0.14, "grad_norm": 0.5869643688201904, "learning_rate": 0.0005968057207479486, "loss": 3.5068, "step": 2846 }, { "epoch": 0.14, "grad_norm": 0.5856844186782837, "learning_rate": 0.0005968034795991249, "loss": 3.8081, "step": 2847 }, { "epoch": 0.14, "grad_norm": 0.6089456677436829, "learning_rate": 0.000596801237668578, "loss": 3.6488, "step": 2848 }, { "epoch": 0.14, "grad_norm": 0.5712987780570984, "learning_rate": 0.0005967989949563136, "loss": 3.4939, "step": 2849 }, { "epoch": 0.14, "grad_norm": 0.560558021068573, "learning_rate": 0.000596796751462338, "loss": 3.4375, "step": 2850 }, { "epoch": 0.14, "grad_norm": 0.6255397200584412, "learning_rate": 0.0005967945071866567, "loss": 3.4126, "step": 2851 }, { "epoch": 0.14, "grad_norm": 0.5810412764549255, "learning_rate": 0.0005967922621292758, "loss": 3.5257, "step": 2852 }, { "epoch": 0.14, "grad_norm": 0.6136355400085449, "learning_rate": 0.0005967900162902013, "loss": 3.5239, "step": 2853 }, { "epoch": 0.14, "grad_norm": 0.6601942777633667, "learning_rate": 0.000596787769669439, "loss": 3.6628, "step": 2854 }, { "epoch": 0.14, "grad_norm": 0.5993295311927795, "learning_rate": 0.0005967855222669947, "loss": 3.4613, "step": 2855 }, { "epoch": 0.14, "grad_norm": 0.6075938940048218, "learning_rate": 0.0005967832740828746, "loss": 3.4571, "step": 2856 }, { "epoch": 0.14, "grad_norm": 0.7354562878608704, "learning_rate": 0.0005967810251170844, "loss": 3.6329, "step": 2857 }, { "epoch": 0.14, "grad_norm": 0.5470739603042603, "learning_rate": 0.0005967787753696301, "loss": 3.309, "step": 2858 }, { "epoch": 0.14, "grad_norm": 0.7858545780181885, "learning_rate": 0.0005967765248405177, "loss": 3.6904, "step": 2859 }, { "epoch": 0.14, "grad_norm": 0.6135296821594238, "learning_rate": 0.0005967742735297529, "loss": 3.6427, "step": 2860 }, { "epoch": 0.14, "grad_norm": 0.5984460711479187, "learning_rate": 0.0005967720214373419, "loss": 3.8465, "step": 2861 }, { "epoch": 0.14, "grad_norm": 0.5901587009429932, "learning_rate": 0.0005967697685632904, "loss": 3.6866, "step": 2862 }, { "epoch": 0.14, "grad_norm": 0.5759127140045166, "learning_rate": 0.0005967675149076046, "loss": 3.6211, "step": 2863 }, { "epoch": 0.14, "grad_norm": 0.6124855279922485, "learning_rate": 0.0005967652604702902, "loss": 3.5057, "step": 2864 }, { "epoch": 0.14, "grad_norm": 0.5732986927032471, "learning_rate": 0.0005967630052513532, "loss": 3.6978, "step": 2865 }, { "epoch": 0.14, "grad_norm": 0.5618267059326172, "learning_rate": 0.0005967607492507995, "loss": 3.7548, "step": 2866 }, { "epoch": 0.14, "grad_norm": 0.6002089381217957, "learning_rate": 0.0005967584924686353, "loss": 3.7316, "step": 2867 }, { "epoch": 0.14, "grad_norm": 0.5847598314285278, "learning_rate": 0.0005967562349048662, "loss": 3.5734, "step": 2868 }, { "epoch": 0.14, "grad_norm": 0.5832062363624573, "learning_rate": 0.0005967539765594983, "loss": 3.7337, "step": 2869 }, { "epoch": 0.14, "grad_norm": 0.5734034776687622, "learning_rate": 0.0005967517174325375, "loss": 3.6052, "step": 2870 }, { "epoch": 0.14, "grad_norm": 0.5718584656715393, "learning_rate": 0.0005967494575239899, "loss": 3.6527, "step": 2871 }, { "epoch": 0.14, "grad_norm": 0.6056542992591858, "learning_rate": 0.0005967471968338612, "loss": 3.7136, "step": 2872 }, { "epoch": 0.14, "grad_norm": 0.6068187952041626, "learning_rate": 0.0005967449353621575, "loss": 3.7391, "step": 2873 }, { "epoch": 0.14, "grad_norm": 0.5216867327690125, "learning_rate": 0.0005967426731088849, "loss": 3.6024, "step": 2874 }, { "epoch": 0.14, "grad_norm": 0.6825017929077148, "learning_rate": 0.000596740410074049, "loss": 3.6602, "step": 2875 }, { "epoch": 0.14, "grad_norm": 0.5960718393325806, "learning_rate": 0.0005967381462576561, "loss": 3.6202, "step": 2876 }, { "epoch": 0.14, "grad_norm": 0.594954252243042, "learning_rate": 0.0005967358816597121, "loss": 3.5797, "step": 2877 }, { "epoch": 0.14, "grad_norm": 0.5977593660354614, "learning_rate": 0.0005967336162802228, "loss": 3.6304, "step": 2878 }, { "epoch": 0.14, "grad_norm": 0.5887445211410522, "learning_rate": 0.0005967313501191942, "loss": 3.5324, "step": 2879 }, { "epoch": 0.14, "grad_norm": 0.5791778564453125, "learning_rate": 0.0005967290831766325, "loss": 3.7551, "step": 2880 }, { "epoch": 0.14, "grad_norm": 0.6645099520683289, "learning_rate": 0.0005967268154525434, "loss": 3.468, "step": 2881 }, { "epoch": 0.14, "grad_norm": 0.5834534168243408, "learning_rate": 0.000596724546946933, "loss": 3.7626, "step": 2882 }, { "epoch": 0.14, "grad_norm": 0.600719153881073, "learning_rate": 0.0005967222776598072, "loss": 3.6733, "step": 2883 }, { "epoch": 0.14, "grad_norm": 0.6395867466926575, "learning_rate": 0.0005967200075911722, "loss": 3.5922, "step": 2884 }, { "epoch": 0.14, "grad_norm": 0.5920771360397339, "learning_rate": 0.0005967177367410335, "loss": 3.7339, "step": 2885 }, { "epoch": 0.14, "grad_norm": 0.5998722910881042, "learning_rate": 0.0005967154651093977, "loss": 3.6804, "step": 2886 }, { "epoch": 0.14, "grad_norm": 0.6235655546188354, "learning_rate": 0.0005967131926962703, "loss": 3.6184, "step": 2887 }, { "epoch": 0.14, "grad_norm": 0.5598371624946594, "learning_rate": 0.0005967109195016575, "loss": 3.4734, "step": 2888 }, { "epoch": 0.14, "grad_norm": 0.5574737787246704, "learning_rate": 0.0005967086455255651, "loss": 3.7455, "step": 2889 }, { "epoch": 0.14, "grad_norm": 0.60200434923172, "learning_rate": 0.0005967063707679994, "loss": 3.5133, "step": 2890 }, { "epoch": 0.14, "grad_norm": 0.5801019668579102, "learning_rate": 0.0005967040952289661, "loss": 3.6737, "step": 2891 }, { "epoch": 0.14, "grad_norm": 0.567669153213501, "learning_rate": 0.0005967018189084715, "loss": 3.7216, "step": 2892 }, { "epoch": 0.14, "grad_norm": 0.7055478096008301, "learning_rate": 0.0005966995418065212, "loss": 3.4534, "step": 2893 }, { "epoch": 0.14, "grad_norm": 0.562245786190033, "learning_rate": 0.0005966972639231215, "loss": 3.7154, "step": 2894 }, { "epoch": 0.14, "grad_norm": 0.5916703343391418, "learning_rate": 0.0005966949852582783, "loss": 3.5089, "step": 2895 }, { "epoch": 0.14, "grad_norm": 0.5672451853752136, "learning_rate": 0.0005966927058119976, "loss": 3.7155, "step": 2896 }, { "epoch": 0.14, "grad_norm": 0.5992611646652222, "learning_rate": 0.0005966904255842856, "loss": 3.7891, "step": 2897 }, { "epoch": 0.14, "grad_norm": 0.5881920456886292, "learning_rate": 0.0005966881445751478, "loss": 3.6009, "step": 2898 }, { "epoch": 0.14, "grad_norm": 0.5571060180664062, "learning_rate": 0.0005966858627845907, "loss": 3.7047, "step": 2899 }, { "epoch": 0.14, "grad_norm": 0.6121320128440857, "learning_rate": 0.0005966835802126201, "loss": 3.518, "step": 2900 }, { "epoch": 0.14, "grad_norm": 0.6062599420547485, "learning_rate": 0.000596681296859242, "loss": 3.3848, "step": 2901 }, { "epoch": 0.14, "grad_norm": 0.6015748381614685, "learning_rate": 0.0005966790127244626, "loss": 3.5153, "step": 2902 }, { "epoch": 0.14, "grad_norm": 0.5726820230484009, "learning_rate": 0.0005966767278082877, "loss": 3.7427, "step": 2903 }, { "epoch": 0.14, "grad_norm": 0.5963112115859985, "learning_rate": 0.0005966744421107234, "loss": 3.701, "step": 2904 }, { "epoch": 0.14, "grad_norm": 0.5752584934234619, "learning_rate": 0.0005966721556317757, "loss": 3.7769, "step": 2905 }, { "epoch": 0.14, "grad_norm": 0.56533282995224, "learning_rate": 0.0005966698683714506, "loss": 3.4874, "step": 2906 }, { "epoch": 0.14, "grad_norm": 0.5514774918556213, "learning_rate": 0.0005966675803297542, "loss": 3.4779, "step": 2907 }, { "epoch": 0.14, "grad_norm": 0.6327130794525146, "learning_rate": 0.0005966652915066925, "loss": 3.6413, "step": 2908 }, { "epoch": 0.14, "grad_norm": 0.562054455280304, "learning_rate": 0.0005966630019022715, "loss": 3.7253, "step": 2909 }, { "epoch": 0.14, "grad_norm": 0.5362357497215271, "learning_rate": 0.0005966607115164974, "loss": 3.7303, "step": 2910 }, { "epoch": 0.14, "grad_norm": 0.5918092131614685, "learning_rate": 0.0005966584203493759, "loss": 3.5367, "step": 2911 }, { "epoch": 0.14, "grad_norm": 0.6234297752380371, "learning_rate": 0.0005966561284009132, "loss": 3.6713, "step": 2912 }, { "epoch": 0.14, "grad_norm": 0.6105513572692871, "learning_rate": 0.0005966538356711154, "loss": 3.6219, "step": 2913 }, { "epoch": 0.14, "grad_norm": 0.527632474899292, "learning_rate": 0.0005966515421599885, "loss": 3.4177, "step": 2914 }, { "epoch": 0.14, "grad_norm": 0.6240413188934326, "learning_rate": 0.0005966492478675384, "loss": 3.6584, "step": 2915 }, { "epoch": 0.14, "grad_norm": 0.584805965423584, "learning_rate": 0.0005966469527937716, "loss": 3.5188, "step": 2916 }, { "epoch": 0.14, "grad_norm": 0.581307053565979, "learning_rate": 0.0005966446569386936, "loss": 3.5399, "step": 2917 }, { "epoch": 0.14, "grad_norm": 0.6673707962036133, "learning_rate": 0.0005966423603023105, "loss": 3.5465, "step": 2918 }, { "epoch": 0.14, "grad_norm": 0.5942588448524475, "learning_rate": 0.0005966400628846288, "loss": 3.4065, "step": 2919 }, { "epoch": 0.14, "grad_norm": 0.5521858930587769, "learning_rate": 0.0005966377646856541, "loss": 3.5278, "step": 2920 }, { "epoch": 0.14, "grad_norm": 0.5489894151687622, "learning_rate": 0.0005966354657053926, "loss": 3.805, "step": 2921 }, { "epoch": 0.14, "grad_norm": 1.0046864748001099, "learning_rate": 0.0005966331659438505, "loss": 3.5583, "step": 2922 }, { "epoch": 0.14, "grad_norm": 0.5688363909721375, "learning_rate": 0.0005966308654010337, "loss": 3.5806, "step": 2923 }, { "epoch": 0.14, "grad_norm": 0.5838121771812439, "learning_rate": 0.0005966285640769482, "loss": 3.404, "step": 2924 }, { "epoch": 0.14, "grad_norm": 0.6157076954841614, "learning_rate": 0.0005966262619716003, "loss": 3.6069, "step": 2925 }, { "epoch": 0.14, "grad_norm": 0.5905383229255676, "learning_rate": 0.0005966239590849958, "loss": 3.6116, "step": 2926 }, { "epoch": 0.14, "grad_norm": 0.5673978328704834, "learning_rate": 0.0005966216554171409, "loss": 3.3259, "step": 2927 }, { "epoch": 0.14, "grad_norm": 0.6059072613716125, "learning_rate": 0.0005966193509680416, "loss": 3.8699, "step": 2928 }, { "epoch": 0.14, "grad_norm": 0.5982714295387268, "learning_rate": 0.000596617045737704, "loss": 3.6271, "step": 2929 }, { "epoch": 0.14, "grad_norm": 0.5978359580039978, "learning_rate": 0.0005966147397261344, "loss": 3.55, "step": 2930 }, { "epoch": 0.14, "grad_norm": 0.5846501588821411, "learning_rate": 0.0005966124329333384, "loss": 3.72, "step": 2931 }, { "epoch": 0.14, "grad_norm": 0.5485678911209106, "learning_rate": 0.0005966101253593226, "loss": 3.6435, "step": 2932 }, { "epoch": 0.14, "grad_norm": 0.6040960550308228, "learning_rate": 0.0005966078170040925, "loss": 3.4047, "step": 2933 }, { "epoch": 0.14, "grad_norm": 0.629386842250824, "learning_rate": 0.0005966055078676547, "loss": 3.6221, "step": 2934 }, { "epoch": 0.14, "grad_norm": 0.5997804999351501, "learning_rate": 0.000596603197950015, "loss": 4.0703, "step": 2935 }, { "epoch": 0.14, "grad_norm": 0.6014472842216492, "learning_rate": 0.0005966008872511796, "loss": 3.6938, "step": 2936 }, { "epoch": 0.14, "grad_norm": 0.5625265836715698, "learning_rate": 0.0005965985757711545, "loss": 3.6621, "step": 2937 }, { "epoch": 0.14, "grad_norm": 0.5449039936065674, "learning_rate": 0.0005965962635099459, "loss": 3.648, "step": 2938 }, { "epoch": 0.14, "grad_norm": 0.5726627707481384, "learning_rate": 0.0005965939504675598, "loss": 3.6625, "step": 2939 }, { "epoch": 0.14, "grad_norm": 0.5476054549217224, "learning_rate": 0.0005965916366440023, "loss": 3.7134, "step": 2940 }, { "epoch": 0.14, "grad_norm": 0.6098428964614868, "learning_rate": 0.0005965893220392795, "loss": 3.9104, "step": 2941 }, { "epoch": 0.14, "grad_norm": 0.6405155062675476, "learning_rate": 0.0005965870066533975, "loss": 3.6453, "step": 2942 }, { "epoch": 0.14, "grad_norm": 0.6072257161140442, "learning_rate": 0.0005965846904863625, "loss": 3.657, "step": 2943 }, { "epoch": 0.14, "grad_norm": 0.5281662940979004, "learning_rate": 0.0005965823735381803, "loss": 3.8624, "step": 2944 }, { "epoch": 0.14, "grad_norm": 0.5664449334144592, "learning_rate": 0.0005965800558088573, "loss": 3.7622, "step": 2945 }, { "epoch": 0.14, "grad_norm": 0.5815826654434204, "learning_rate": 0.0005965777372983996, "loss": 3.7028, "step": 2946 }, { "epoch": 0.14, "grad_norm": 0.5717793107032776, "learning_rate": 0.0005965754180068132, "loss": 3.5994, "step": 2947 }, { "epoch": 0.14, "grad_norm": 0.56984543800354, "learning_rate": 0.0005965730979341042, "loss": 3.4422, "step": 2948 }, { "epoch": 0.14, "grad_norm": 0.5990952849388123, "learning_rate": 0.0005965707770802786, "loss": 3.5145, "step": 2949 }, { "epoch": 0.14, "grad_norm": 0.5824059247970581, "learning_rate": 0.0005965684554453428, "loss": 3.5848, "step": 2950 }, { "epoch": 0.14, "grad_norm": 0.5779900550842285, "learning_rate": 0.0005965661330293027, "loss": 3.6382, "step": 2951 }, { "epoch": 0.14, "grad_norm": 0.5480761528015137, "learning_rate": 0.0005965638098321646, "loss": 3.6473, "step": 2952 }, { "epoch": 0.14, "grad_norm": 0.5885190367698669, "learning_rate": 0.0005965614858539344, "loss": 3.5578, "step": 2953 }, { "epoch": 0.14, "grad_norm": 0.5397934913635254, "learning_rate": 0.0005965591610946184, "loss": 3.547, "step": 2954 }, { "epoch": 0.14, "grad_norm": 0.5664386749267578, "learning_rate": 0.0005965568355542225, "loss": 3.7429, "step": 2955 }, { "epoch": 0.14, "grad_norm": 0.5953521728515625, "learning_rate": 0.0005965545092327531, "loss": 3.6301, "step": 2956 }, { "epoch": 0.14, "grad_norm": 0.5471416711807251, "learning_rate": 0.0005965521821302162, "loss": 3.6399, "step": 2957 }, { "epoch": 0.14, "grad_norm": 0.5689468383789062, "learning_rate": 0.0005965498542466178, "loss": 3.6759, "step": 2958 }, { "epoch": 0.15, "grad_norm": 0.5762372612953186, "learning_rate": 0.0005965475255819642, "loss": 3.6877, "step": 2959 }, { "epoch": 0.15, "grad_norm": 0.5529139637947083, "learning_rate": 0.0005965451961362615, "loss": 3.4141, "step": 2960 }, { "epoch": 0.15, "grad_norm": 0.5945937633514404, "learning_rate": 0.0005965428659095158, "loss": 3.5489, "step": 2961 }, { "epoch": 0.15, "grad_norm": 0.548331081867218, "learning_rate": 0.0005965405349017334, "loss": 3.6366, "step": 2962 }, { "epoch": 0.15, "grad_norm": 0.6198789477348328, "learning_rate": 0.0005965382031129201, "loss": 3.5774, "step": 2963 }, { "epoch": 0.15, "grad_norm": 0.6308538913726807, "learning_rate": 0.0005965358705430824, "loss": 3.5777, "step": 2964 }, { "epoch": 0.15, "grad_norm": 0.5835420489311218, "learning_rate": 0.0005965335371922261, "loss": 3.4532, "step": 2965 }, { "epoch": 0.15, "grad_norm": 0.56312495470047, "learning_rate": 0.0005965312030603578, "loss": 3.6189, "step": 2966 }, { "epoch": 0.15, "grad_norm": 0.6242998838424683, "learning_rate": 0.0005965288681474832, "loss": 3.4261, "step": 2967 }, { "epoch": 0.15, "grad_norm": 0.5804765224456787, "learning_rate": 0.0005965265324536087, "loss": 3.6305, "step": 2968 }, { "epoch": 0.15, "grad_norm": 0.5844853520393372, "learning_rate": 0.0005965241959787404, "loss": 3.4267, "step": 2969 }, { "epoch": 0.15, "grad_norm": 0.5954545140266418, "learning_rate": 0.0005965218587228844, "loss": 3.615, "step": 2970 }, { "epoch": 0.15, "grad_norm": 0.5783625245094299, "learning_rate": 0.0005965195206860468, "loss": 3.7189, "step": 2971 }, { "epoch": 0.15, "grad_norm": 0.5781705975532532, "learning_rate": 0.0005965171818682339, "loss": 3.6033, "step": 2972 }, { "epoch": 0.15, "grad_norm": 0.5989189147949219, "learning_rate": 0.0005965148422694518, "loss": 3.5753, "step": 2973 }, { "epoch": 0.15, "grad_norm": 0.6061220169067383, "learning_rate": 0.0005965125018897068, "loss": 3.6133, "step": 2974 }, { "epoch": 0.15, "grad_norm": 0.6106086373329163, "learning_rate": 0.0005965101607290047, "loss": 3.5082, "step": 2975 }, { "epoch": 0.15, "grad_norm": 0.540738046169281, "learning_rate": 0.0005965078187873522, "loss": 3.3238, "step": 2976 }, { "epoch": 0.15, "grad_norm": 0.5824552774429321, "learning_rate": 0.0005965054760647549, "loss": 3.611, "step": 2977 }, { "epoch": 0.15, "grad_norm": 0.5599578619003296, "learning_rate": 0.0005965031325612195, "loss": 3.5603, "step": 2978 }, { "epoch": 0.15, "grad_norm": 0.5765003561973572, "learning_rate": 0.0005965007882767516, "loss": 3.4808, "step": 2979 }, { "epoch": 0.15, "grad_norm": 0.5761189460754395, "learning_rate": 0.0005964984432113579, "loss": 3.5618, "step": 2980 }, { "epoch": 0.15, "grad_norm": 0.5370120406150818, "learning_rate": 0.0005964960973650444, "loss": 3.479, "step": 2981 }, { "epoch": 0.15, "grad_norm": 0.5539388060569763, "learning_rate": 0.0005964937507378171, "loss": 3.5543, "step": 2982 }, { "epoch": 0.15, "grad_norm": 0.5843010544776917, "learning_rate": 0.0005964914033296824, "loss": 3.4738, "step": 2983 }, { "epoch": 0.15, "grad_norm": 0.5479416251182556, "learning_rate": 0.0005964890551406463, "loss": 3.6649, "step": 2984 }, { "epoch": 0.15, "grad_norm": 0.590008020401001, "learning_rate": 0.0005964867061707152, "loss": 3.6951, "step": 2985 }, { "epoch": 0.15, "grad_norm": 0.6158657073974609, "learning_rate": 0.0005964843564198951, "loss": 3.5394, "step": 2986 }, { "epoch": 0.15, "grad_norm": 0.5672131180763245, "learning_rate": 0.0005964820058881925, "loss": 3.5563, "step": 2987 }, { "epoch": 0.15, "grad_norm": 0.5937507748603821, "learning_rate": 0.000596479654575613, "loss": 3.7613, "step": 2988 }, { "epoch": 0.15, "grad_norm": 0.6074742078781128, "learning_rate": 0.0005964773024821633, "loss": 3.6806, "step": 2989 }, { "epoch": 0.15, "grad_norm": 0.5991113185882568, "learning_rate": 0.0005964749496078495, "loss": 3.528, "step": 2990 }, { "epoch": 0.15, "grad_norm": 0.5741263031959534, "learning_rate": 0.0005964725959526777, "loss": 3.4517, "step": 2991 }, { "epoch": 0.15, "grad_norm": 0.6311984658241272, "learning_rate": 0.0005964702415166541, "loss": 3.5116, "step": 2992 }, { "epoch": 0.15, "grad_norm": 0.5775881409645081, "learning_rate": 0.000596467886299785, "loss": 3.4346, "step": 2993 }, { "epoch": 0.15, "grad_norm": 0.5953471660614014, "learning_rate": 0.0005964655303020766, "loss": 3.566, "step": 2994 }, { "epoch": 0.15, "grad_norm": 0.6620184779167175, "learning_rate": 0.0005964631735235349, "loss": 3.7845, "step": 2995 }, { "epoch": 0.15, "grad_norm": 0.5985296964645386, "learning_rate": 0.0005964608159641664, "loss": 3.6863, "step": 2996 }, { "epoch": 0.15, "grad_norm": 0.6449403166770935, "learning_rate": 0.000596458457623977, "loss": 3.5711, "step": 2997 }, { "epoch": 0.15, "grad_norm": 0.583063006401062, "learning_rate": 0.0005964560985029732, "loss": 3.3602, "step": 2998 }, { "epoch": 0.15, "grad_norm": 0.5750587582588196, "learning_rate": 0.0005964537386011611, "loss": 3.6342, "step": 2999 }, { "epoch": 0.15, "grad_norm": 0.5808610320091248, "learning_rate": 0.0005964513779185468, "loss": 3.5019, "step": 3000 }, { "epoch": 0.15, "grad_norm": 0.6237676739692688, "learning_rate": 0.0005964490164551367, "loss": 3.5213, "step": 3001 }, { "epoch": 0.15, "grad_norm": 0.5874052047729492, "learning_rate": 0.000596446654210937, "loss": 3.3836, "step": 3002 }, { "epoch": 0.15, "grad_norm": 0.5484369993209839, "learning_rate": 0.0005964442911859538, "loss": 3.2887, "step": 3003 }, { "epoch": 0.15, "grad_norm": 0.5622039437294006, "learning_rate": 0.0005964419273801934, "loss": 3.5959, "step": 3004 }, { "epoch": 0.15, "grad_norm": 0.601517915725708, "learning_rate": 0.000596439562793662, "loss": 3.588, "step": 3005 }, { "epoch": 0.15, "grad_norm": 0.6053403615951538, "learning_rate": 0.0005964371974263658, "loss": 3.6096, "step": 3006 }, { "epoch": 0.15, "grad_norm": 0.5661147832870483, "learning_rate": 0.0005964348312783111, "loss": 3.52, "step": 3007 }, { "epoch": 0.15, "grad_norm": 0.6172541975975037, "learning_rate": 0.0005964324643495042, "loss": 3.6069, "step": 3008 }, { "epoch": 0.15, "grad_norm": 0.5625926852226257, "learning_rate": 0.0005964300966399511, "loss": 3.6741, "step": 3009 }, { "epoch": 0.15, "grad_norm": 0.5759730339050293, "learning_rate": 0.0005964277281496583, "loss": 3.5645, "step": 3010 }, { "epoch": 0.15, "grad_norm": 0.5957322120666504, "learning_rate": 0.0005964253588786318, "loss": 3.5519, "step": 3011 }, { "epoch": 0.15, "grad_norm": 0.5785747170448303, "learning_rate": 0.000596422988826878, "loss": 3.4793, "step": 3012 }, { "epoch": 0.15, "grad_norm": 0.5878031849861145, "learning_rate": 0.0005964206179944031, "loss": 3.5527, "step": 3013 }, { "epoch": 0.15, "grad_norm": 0.5843365788459778, "learning_rate": 0.0005964182463812133, "loss": 3.5102, "step": 3014 }, { "epoch": 0.15, "grad_norm": 0.5953040719032288, "learning_rate": 0.000596415873987315, "loss": 3.6765, "step": 3015 }, { "epoch": 0.15, "grad_norm": 0.5682270526885986, "learning_rate": 0.0005964135008127143, "loss": 3.6247, "step": 3016 }, { "epoch": 0.15, "grad_norm": 0.5600559115409851, "learning_rate": 0.0005964111268574174, "loss": 3.5984, "step": 3017 }, { "epoch": 0.15, "grad_norm": 0.6236439943313599, "learning_rate": 0.0005964087521214307, "loss": 3.5485, "step": 3018 }, { "epoch": 0.15, "grad_norm": 0.5868257284164429, "learning_rate": 0.0005964063766047602, "loss": 3.6191, "step": 3019 }, { "epoch": 0.15, "grad_norm": 0.5846828818321228, "learning_rate": 0.0005964040003074125, "loss": 3.3374, "step": 3020 }, { "epoch": 0.15, "grad_norm": 0.5583348274230957, "learning_rate": 0.0005964016232293939, "loss": 3.6922, "step": 3021 }, { "epoch": 0.15, "grad_norm": 0.6195686459541321, "learning_rate": 0.0005963992453707102, "loss": 3.708, "step": 3022 }, { "epoch": 0.15, "grad_norm": 0.6074361801147461, "learning_rate": 0.0005963968667313681, "loss": 3.6087, "step": 3023 }, { "epoch": 0.15, "grad_norm": 0.5756213665008545, "learning_rate": 0.0005963944873113736, "loss": 3.5665, "step": 3024 }, { "epoch": 0.15, "grad_norm": 0.6131848096847534, "learning_rate": 0.0005963921071107331, "loss": 3.1928, "step": 3025 }, { "epoch": 0.15, "grad_norm": 0.550272524356842, "learning_rate": 0.0005963897261294528, "loss": 3.6607, "step": 3026 }, { "epoch": 0.15, "grad_norm": 0.6190971732139587, "learning_rate": 0.000596387344367539, "loss": 3.619, "step": 3027 }, { "epoch": 0.15, "grad_norm": 0.6038632392883301, "learning_rate": 0.000596384961824998, "loss": 3.775, "step": 3028 }, { "epoch": 0.15, "grad_norm": 0.5631963014602661, "learning_rate": 0.0005963825785018361, "loss": 3.5747, "step": 3029 }, { "epoch": 0.15, "grad_norm": 0.5642056465148926, "learning_rate": 0.0005963801943980595, "loss": 3.542, "step": 3030 }, { "epoch": 0.15, "grad_norm": 0.5921563506126404, "learning_rate": 0.0005963778095136744, "loss": 3.5717, "step": 3031 }, { "epoch": 0.15, "grad_norm": 0.5655155777931213, "learning_rate": 0.0005963754238486873, "loss": 3.5262, "step": 3032 }, { "epoch": 0.15, "grad_norm": 0.5852659940719604, "learning_rate": 0.0005963730374031044, "loss": 3.6345, "step": 3033 }, { "epoch": 0.15, "grad_norm": 0.5611869096755981, "learning_rate": 0.0005963706501769319, "loss": 3.6346, "step": 3034 }, { "epoch": 0.15, "grad_norm": 0.5403077602386475, "learning_rate": 0.0005963682621701761, "loss": 3.7496, "step": 3035 }, { "epoch": 0.15, "grad_norm": 0.5627409219741821, "learning_rate": 0.0005963658733828433, "loss": 3.6472, "step": 3036 }, { "epoch": 0.15, "grad_norm": 0.5713598132133484, "learning_rate": 0.00059636348381494, "loss": 3.3384, "step": 3037 }, { "epoch": 0.15, "grad_norm": 0.6267187595367432, "learning_rate": 0.0005963610934664722, "loss": 3.5897, "step": 3038 }, { "epoch": 0.15, "grad_norm": 0.6044197678565979, "learning_rate": 0.0005963587023374463, "loss": 3.7926, "step": 3039 }, { "epoch": 0.15, "grad_norm": 0.6235830783843994, "learning_rate": 0.0005963563104278687, "loss": 3.7628, "step": 3040 }, { "epoch": 0.15, "grad_norm": 0.6233013868331909, "learning_rate": 0.0005963539177377456, "loss": 3.4503, "step": 3041 }, { "epoch": 0.15, "grad_norm": 0.5676363110542297, "learning_rate": 0.0005963515242670833, "loss": 3.5615, "step": 3042 }, { "epoch": 0.15, "grad_norm": 0.6279856562614441, "learning_rate": 0.0005963491300158881, "loss": 3.6456, "step": 3043 }, { "epoch": 0.15, "grad_norm": 0.5585012435913086, "learning_rate": 0.0005963467349841663, "loss": 3.5764, "step": 3044 }, { "epoch": 0.15, "grad_norm": 0.5672016143798828, "learning_rate": 0.0005963443391719242, "loss": 3.3576, "step": 3045 }, { "epoch": 0.15, "grad_norm": 0.6148715019226074, "learning_rate": 0.0005963419425791683, "loss": 3.7448, "step": 3046 }, { "epoch": 0.15, "grad_norm": 0.5559766888618469, "learning_rate": 0.0005963395452059046, "loss": 3.7044, "step": 3047 }, { "epoch": 0.15, "grad_norm": 0.5408651232719421, "learning_rate": 0.0005963371470521396, "loss": 3.5665, "step": 3048 }, { "epoch": 0.15, "grad_norm": 0.5515286922454834, "learning_rate": 0.0005963347481178796, "loss": 3.5655, "step": 3049 }, { "epoch": 0.15, "grad_norm": 0.5659925937652588, "learning_rate": 0.0005963323484031309, "loss": 3.7384, "step": 3050 }, { "epoch": 0.15, "grad_norm": 0.5940973162651062, "learning_rate": 0.0005963299479078999, "loss": 3.6098, "step": 3051 }, { "epoch": 0.15, "grad_norm": 0.5960044264793396, "learning_rate": 0.0005963275466321928, "loss": 3.5784, "step": 3052 }, { "epoch": 0.15, "grad_norm": 0.5913186073303223, "learning_rate": 0.000596325144576016, "loss": 3.3696, "step": 3053 }, { "epoch": 0.15, "grad_norm": 0.5472952723503113, "learning_rate": 0.0005963227417393758, "loss": 3.3607, "step": 3054 }, { "epoch": 0.15, "grad_norm": 0.5894946455955505, "learning_rate": 0.0005963203381222784, "loss": 3.5893, "step": 3055 }, { "epoch": 0.15, "grad_norm": 0.586334228515625, "learning_rate": 0.0005963179337247303, "loss": 3.6586, "step": 3056 }, { "epoch": 0.15, "grad_norm": 0.5724272131919861, "learning_rate": 0.0005963155285467379, "loss": 3.7376, "step": 3057 }, { "epoch": 0.15, "grad_norm": 0.5854930281639099, "learning_rate": 0.0005963131225883074, "loss": 3.6825, "step": 3058 }, { "epoch": 0.15, "grad_norm": 0.5693349242210388, "learning_rate": 0.0005963107158494451, "loss": 3.5495, "step": 3059 }, { "epoch": 0.15, "grad_norm": 0.5654905438423157, "learning_rate": 0.0005963083083301575, "loss": 3.6469, "step": 3060 }, { "epoch": 0.15, "grad_norm": 0.5747386813163757, "learning_rate": 0.0005963059000304507, "loss": 3.6497, "step": 3061 }, { "epoch": 0.15, "grad_norm": 0.6350069046020508, "learning_rate": 0.0005963034909503311, "loss": 3.6941, "step": 3062 }, { "epoch": 0.15, "grad_norm": 0.6059046387672424, "learning_rate": 0.0005963010810898054, "loss": 3.5363, "step": 3063 }, { "epoch": 0.15, "grad_norm": 0.5842269062995911, "learning_rate": 0.0005962986704488796, "loss": 3.7521, "step": 3064 }, { "epoch": 0.15, "grad_norm": 0.5611490607261658, "learning_rate": 0.00059629625902756, "loss": 3.6577, "step": 3065 }, { "epoch": 0.15, "grad_norm": 0.5932784676551819, "learning_rate": 0.0005962938468258532, "loss": 3.5643, "step": 3066 }, { "epoch": 0.15, "grad_norm": 0.596534252166748, "learning_rate": 0.0005962914338437654, "loss": 3.7409, "step": 3067 }, { "epoch": 0.15, "grad_norm": 0.5720816254615784, "learning_rate": 0.0005962890200813029, "loss": 3.7528, "step": 3068 }, { "epoch": 0.15, "grad_norm": 0.5783271193504333, "learning_rate": 0.0005962866055384722, "loss": 3.8315, "step": 3069 }, { "epoch": 0.15, "grad_norm": 0.5567073225975037, "learning_rate": 0.0005962841902152796, "loss": 3.5997, "step": 3070 }, { "epoch": 0.15, "grad_norm": 0.5587060451507568, "learning_rate": 0.0005962817741117316, "loss": 3.7797, "step": 3071 }, { "epoch": 0.15, "grad_norm": 0.5583739876747131, "learning_rate": 0.0005962793572278342, "loss": 3.768, "step": 3072 }, { "epoch": 0.15, "grad_norm": 0.6153991222381592, "learning_rate": 0.0005962769395635942, "loss": 3.6803, "step": 3073 }, { "epoch": 0.15, "grad_norm": 0.6166850328445435, "learning_rate": 0.0005962745211190176, "loss": 3.489, "step": 3074 }, { "epoch": 0.15, "grad_norm": 0.6104187369346619, "learning_rate": 0.000596272101894111, "loss": 3.4945, "step": 3075 }, { "epoch": 0.15, "grad_norm": 0.6122152805328369, "learning_rate": 0.0005962696818888807, "loss": 3.7251, "step": 3076 }, { "epoch": 0.15, "grad_norm": 0.5618709921836853, "learning_rate": 0.0005962672611033331, "loss": 3.6053, "step": 3077 }, { "epoch": 0.15, "grad_norm": 0.5944236516952515, "learning_rate": 0.0005962648395374745, "loss": 3.4591, "step": 3078 }, { "epoch": 0.15, "grad_norm": 0.6525372266769409, "learning_rate": 0.0005962624171913113, "loss": 3.4751, "step": 3079 }, { "epoch": 0.15, "grad_norm": 0.64317387342453, "learning_rate": 0.00059625999406485, "loss": 3.6205, "step": 3080 }, { "epoch": 0.15, "grad_norm": 0.6131284236907959, "learning_rate": 0.0005962575701580968, "loss": 3.6735, "step": 3081 }, { "epoch": 0.15, "grad_norm": 0.5894636511802673, "learning_rate": 0.0005962551454710583, "loss": 3.8265, "step": 3082 }, { "epoch": 0.15, "grad_norm": 0.6023474335670471, "learning_rate": 0.0005962527200037407, "loss": 3.573, "step": 3083 }, { "epoch": 0.15, "grad_norm": 0.5374550223350525, "learning_rate": 0.0005962502937561505, "loss": 3.467, "step": 3084 }, { "epoch": 0.15, "grad_norm": 0.5970792174339294, "learning_rate": 0.0005962478667282939, "loss": 3.6305, "step": 3085 }, { "epoch": 0.15, "grad_norm": 0.6187454462051392, "learning_rate": 0.0005962454389201776, "loss": 3.5442, "step": 3086 }, { "epoch": 0.15, "grad_norm": 0.5832275152206421, "learning_rate": 0.0005962430103318077, "loss": 3.5218, "step": 3087 }, { "epoch": 0.15, "grad_norm": 0.6036730408668518, "learning_rate": 0.0005962405809631908, "loss": 3.3699, "step": 3088 }, { "epoch": 0.15, "grad_norm": 0.569485068321228, "learning_rate": 0.0005962381508143332, "loss": 3.6637, "step": 3089 }, { "epoch": 0.15, "grad_norm": 0.587161660194397, "learning_rate": 0.0005962357198852412, "loss": 3.8073, "step": 3090 }, { "epoch": 0.15, "grad_norm": 0.5674685835838318, "learning_rate": 0.0005962332881759215, "loss": 3.5457, "step": 3091 }, { "epoch": 0.15, "grad_norm": 0.5693333745002747, "learning_rate": 0.0005962308556863803, "loss": 3.6018, "step": 3092 }, { "epoch": 0.15, "grad_norm": 0.6043726801872253, "learning_rate": 0.000596228422416624, "loss": 3.5762, "step": 3093 }, { "epoch": 0.15, "grad_norm": 0.5639926791191101, "learning_rate": 0.0005962259883666591, "loss": 3.8223, "step": 3094 }, { "epoch": 0.15, "grad_norm": 0.6272081136703491, "learning_rate": 0.0005962235535364918, "loss": 3.6311, "step": 3095 }, { "epoch": 0.15, "grad_norm": 0.5490891933441162, "learning_rate": 0.0005962211179261288, "loss": 3.455, "step": 3096 }, { "epoch": 0.15, "grad_norm": 0.6012369990348816, "learning_rate": 0.0005962186815355765, "loss": 3.4856, "step": 3097 }, { "epoch": 0.15, "grad_norm": 0.5988452434539795, "learning_rate": 0.000596216244364841, "loss": 3.5496, "step": 3098 }, { "epoch": 0.15, "grad_norm": 0.6350785493850708, "learning_rate": 0.000596213806413929, "loss": 3.5515, "step": 3099 }, { "epoch": 0.15, "grad_norm": 0.6114810705184937, "learning_rate": 0.0005962113676828468, "loss": 3.4899, "step": 3100 }, { "epoch": 0.15, "grad_norm": 0.5787695646286011, "learning_rate": 0.000596208928171601, "loss": 3.5458, "step": 3101 }, { "epoch": 0.15, "grad_norm": 0.5776200890541077, "learning_rate": 0.0005962064878801977, "loss": 3.4334, "step": 3102 }, { "epoch": 0.15, "grad_norm": 0.5516855716705322, "learning_rate": 0.0005962040468086436, "loss": 3.4205, "step": 3103 }, { "epoch": 0.15, "grad_norm": 0.5812780857086182, "learning_rate": 0.0005962016049569451, "loss": 3.7321, "step": 3104 }, { "epoch": 0.15, "grad_norm": 0.5914744138717651, "learning_rate": 0.0005961991623251085, "loss": 3.6045, "step": 3105 }, { "epoch": 0.15, "grad_norm": 0.5617961287498474, "learning_rate": 0.0005961967189131403, "loss": 3.6301, "step": 3106 }, { "epoch": 0.15, "grad_norm": 0.6185020208358765, "learning_rate": 0.0005961942747210469, "loss": 3.5499, "step": 3107 }, { "epoch": 0.15, "grad_norm": 0.609798014163971, "learning_rate": 0.0005961918297488349, "loss": 3.5542, "step": 3108 }, { "epoch": 0.15, "grad_norm": 0.5684676766395569, "learning_rate": 0.0005961893839965106, "loss": 3.5955, "step": 3109 }, { "epoch": 0.15, "grad_norm": 0.584693193435669, "learning_rate": 0.0005961869374640804, "loss": 3.3336, "step": 3110 }, { "epoch": 0.15, "grad_norm": 0.6073812246322632, "learning_rate": 0.0005961844901515509, "loss": 3.669, "step": 3111 }, { "epoch": 0.15, "grad_norm": 0.5767044425010681, "learning_rate": 0.0005961820420589285, "loss": 3.63, "step": 3112 }, { "epoch": 0.15, "grad_norm": 0.5799092054367065, "learning_rate": 0.0005961795931862194, "loss": 3.4574, "step": 3113 }, { "epoch": 0.15, "grad_norm": 0.545676052570343, "learning_rate": 0.0005961771435334304, "loss": 3.5919, "step": 3114 }, { "epoch": 0.15, "grad_norm": 0.582565188407898, "learning_rate": 0.0005961746931005678, "loss": 3.664, "step": 3115 }, { "epoch": 0.15, "grad_norm": 0.5571921467781067, "learning_rate": 0.000596172241887638, "loss": 3.5959, "step": 3116 }, { "epoch": 0.15, "grad_norm": 0.5776621699333191, "learning_rate": 0.0005961697898946475, "loss": 3.5268, "step": 3117 }, { "epoch": 0.15, "grad_norm": 0.6262937188148499, "learning_rate": 0.0005961673371216029, "loss": 3.4795, "step": 3118 }, { "epoch": 0.15, "grad_norm": 0.6134164929389954, "learning_rate": 0.0005961648835685105, "loss": 3.782, "step": 3119 }, { "epoch": 0.15, "grad_norm": 0.5699538588523865, "learning_rate": 0.0005961624292353766, "loss": 3.8283, "step": 3120 }, { "epoch": 0.15, "grad_norm": 0.5842909216880798, "learning_rate": 0.0005961599741222081, "loss": 3.4827, "step": 3121 }, { "epoch": 0.15, "grad_norm": 0.5857353806495667, "learning_rate": 0.0005961575182290111, "loss": 3.5638, "step": 3122 }, { "epoch": 0.15, "grad_norm": 0.5917581915855408, "learning_rate": 0.0005961550615557923, "loss": 3.5723, "step": 3123 }, { "epoch": 0.15, "grad_norm": 0.546079158782959, "learning_rate": 0.000596152604102558, "loss": 3.6738, "step": 3124 }, { "epoch": 0.15, "grad_norm": 0.5460621118545532, "learning_rate": 0.0005961501458693147, "loss": 3.6153, "step": 3125 }, { "epoch": 0.15, "grad_norm": 0.5750552415847778, "learning_rate": 0.000596147686856069, "loss": 3.5495, "step": 3126 }, { "epoch": 0.15, "grad_norm": 0.55301833152771, "learning_rate": 0.0005961452270628272, "loss": 3.5873, "step": 3127 }, { "epoch": 0.15, "grad_norm": 0.5648224949836731, "learning_rate": 0.0005961427664895959, "loss": 3.6474, "step": 3128 }, { "epoch": 0.15, "grad_norm": 0.581761360168457, "learning_rate": 0.0005961403051363814, "loss": 3.477, "step": 3129 }, { "epoch": 0.15, "grad_norm": 0.5733395218849182, "learning_rate": 0.0005961378430031905, "loss": 3.7321, "step": 3130 }, { "epoch": 0.15, "grad_norm": 0.5752854347229004, "learning_rate": 0.0005961353800900296, "loss": 3.7605, "step": 3131 }, { "epoch": 0.15, "grad_norm": 0.5976043343544006, "learning_rate": 0.000596132916396905, "loss": 3.7176, "step": 3132 }, { "epoch": 0.15, "grad_norm": 0.6109990477561951, "learning_rate": 0.0005961304519238232, "loss": 3.9394, "step": 3133 }, { "epoch": 0.15, "grad_norm": 0.6013369560241699, "learning_rate": 0.0005961279866707909, "loss": 3.4758, "step": 3134 }, { "epoch": 0.15, "grad_norm": 0.5654555559158325, "learning_rate": 0.0005961255206378144, "loss": 3.6548, "step": 3135 }, { "epoch": 0.15, "grad_norm": 0.5910729765892029, "learning_rate": 0.0005961230538249003, "loss": 3.5525, "step": 3136 }, { "epoch": 0.15, "grad_norm": 0.6182341575622559, "learning_rate": 0.0005961205862320551, "loss": 3.5527, "step": 3137 }, { "epoch": 0.15, "grad_norm": 0.5825470089912415, "learning_rate": 0.0005961181178592852, "loss": 3.6325, "step": 3138 }, { "epoch": 0.15, "grad_norm": 0.6601877808570862, "learning_rate": 0.0005961156487065972, "loss": 3.3948, "step": 3139 }, { "epoch": 0.15, "grad_norm": 0.5642703771591187, "learning_rate": 0.0005961131787739976, "loss": 3.5232, "step": 3140 }, { "epoch": 0.15, "grad_norm": 0.5936177372932434, "learning_rate": 0.0005961107080614929, "loss": 3.6665, "step": 3141 }, { "epoch": 0.15, "grad_norm": 0.5690498948097229, "learning_rate": 0.0005961082365690894, "loss": 3.4099, "step": 3142 }, { "epoch": 0.15, "grad_norm": 0.6080211400985718, "learning_rate": 0.0005961057642967939, "loss": 3.402, "step": 3143 }, { "epoch": 0.15, "grad_norm": 0.5955443382263184, "learning_rate": 0.0005961032912446128, "loss": 3.4212, "step": 3144 }, { "epoch": 0.15, "grad_norm": 0.5814092755317688, "learning_rate": 0.0005961008174125527, "loss": 3.3803, "step": 3145 }, { "epoch": 0.15, "grad_norm": 0.5517794489860535, "learning_rate": 0.00059609834280062, "loss": 3.5878, "step": 3146 }, { "epoch": 0.15, "grad_norm": 0.5653300881385803, "learning_rate": 0.0005960958674088211, "loss": 3.535, "step": 3147 }, { "epoch": 0.15, "grad_norm": 0.5799789428710938, "learning_rate": 0.0005960933912371629, "loss": 3.6731, "step": 3148 }, { "epoch": 0.15, "grad_norm": 0.5752755403518677, "learning_rate": 0.0005960909142856516, "loss": 3.6781, "step": 3149 }, { "epoch": 0.15, "grad_norm": 0.6208489537239075, "learning_rate": 0.0005960884365542937, "loss": 3.3908, "step": 3150 }, { "epoch": 0.15, "grad_norm": 0.6199827790260315, "learning_rate": 0.000596085958043096, "loss": 3.7275, "step": 3151 }, { "epoch": 0.15, "grad_norm": 0.6208821535110474, "learning_rate": 0.0005960834787520648, "loss": 3.4982, "step": 3152 }, { "epoch": 0.15, "grad_norm": 0.5410616993904114, "learning_rate": 0.0005960809986812067, "loss": 3.5705, "step": 3153 }, { "epoch": 0.15, "grad_norm": 0.5999816060066223, "learning_rate": 0.0005960785178305283, "loss": 3.4113, "step": 3154 }, { "epoch": 0.15, "grad_norm": 0.6006971001625061, "learning_rate": 0.000596076036200036, "loss": 3.7676, "step": 3155 }, { "epoch": 0.15, "grad_norm": 0.5484111309051514, "learning_rate": 0.0005960735537897364, "loss": 3.5441, "step": 3156 }, { "epoch": 0.15, "grad_norm": 0.581990659236908, "learning_rate": 0.000596071070599636, "loss": 3.3492, "step": 3157 }, { "epoch": 0.15, "grad_norm": 0.6446429491043091, "learning_rate": 0.0005960685866297415, "loss": 3.6986, "step": 3158 }, { "epoch": 0.15, "grad_norm": 0.564018726348877, "learning_rate": 0.0005960661018800592, "loss": 3.4903, "step": 3159 }, { "epoch": 0.15, "grad_norm": 0.5862210392951965, "learning_rate": 0.0005960636163505958, "loss": 3.5107, "step": 3160 }, { "epoch": 0.15, "grad_norm": 0.5642343163490295, "learning_rate": 0.0005960611300413578, "loss": 3.6318, "step": 3161 }, { "epoch": 0.15, "grad_norm": 0.5777455568313599, "learning_rate": 0.0005960586429523518, "loss": 3.417, "step": 3162 }, { "epoch": 0.16, "grad_norm": 0.586602509021759, "learning_rate": 0.0005960561550835843, "loss": 3.5432, "step": 3163 }, { "epoch": 0.16, "grad_norm": 0.5912312269210815, "learning_rate": 0.0005960536664350619, "loss": 3.6441, "step": 3164 }, { "epoch": 0.16, "grad_norm": 0.6185685396194458, "learning_rate": 0.0005960511770067911, "loss": 3.5537, "step": 3165 }, { "epoch": 0.16, "grad_norm": 0.6227981448173523, "learning_rate": 0.0005960486867987784, "loss": 3.4673, "step": 3166 }, { "epoch": 0.16, "grad_norm": 0.6355583071708679, "learning_rate": 0.0005960461958110305, "loss": 3.5603, "step": 3167 }, { "epoch": 0.16, "grad_norm": 0.6054497957229614, "learning_rate": 0.0005960437040435539, "loss": 3.5745, "step": 3168 }, { "epoch": 0.16, "grad_norm": 0.61248379945755, "learning_rate": 0.0005960412114963552, "loss": 3.5392, "step": 3169 }, { "epoch": 0.16, "grad_norm": 0.600622832775116, "learning_rate": 0.0005960387181694408, "loss": 3.629, "step": 3170 }, { "epoch": 0.16, "grad_norm": 0.6262516379356384, "learning_rate": 0.0005960362240628175, "loss": 3.3767, "step": 3171 }, { "epoch": 0.16, "grad_norm": 0.5428594350814819, "learning_rate": 0.0005960337291764918, "loss": 3.6812, "step": 3172 }, { "epoch": 0.16, "grad_norm": 0.5798599123954773, "learning_rate": 0.0005960312335104701, "loss": 3.6121, "step": 3173 }, { "epoch": 0.16, "grad_norm": 0.600669801235199, "learning_rate": 0.0005960287370647591, "loss": 3.5828, "step": 3174 }, { "epoch": 0.16, "grad_norm": 0.5855419635772705, "learning_rate": 0.0005960262398393654, "loss": 3.7861, "step": 3175 }, { "epoch": 0.16, "grad_norm": 0.5565866827964783, "learning_rate": 0.0005960237418342957, "loss": 3.7263, "step": 3176 }, { "epoch": 0.16, "grad_norm": 0.5831587910652161, "learning_rate": 0.0005960212430495564, "loss": 3.7619, "step": 3177 }, { "epoch": 0.16, "grad_norm": 0.6254531741142273, "learning_rate": 0.000596018743485154, "loss": 3.5224, "step": 3178 }, { "epoch": 0.16, "grad_norm": 0.5760573744773865, "learning_rate": 0.0005960162431410953, "loss": 3.4772, "step": 3179 }, { "epoch": 0.16, "grad_norm": 0.5323291420936584, "learning_rate": 0.0005960137420173866, "loss": 3.4991, "step": 3180 }, { "epoch": 0.16, "grad_norm": 0.5438007116317749, "learning_rate": 0.0005960112401140348, "loss": 3.6223, "step": 3181 }, { "epoch": 0.16, "grad_norm": 0.5264496207237244, "learning_rate": 0.0005960087374310464, "loss": 3.7063, "step": 3182 }, { "epoch": 0.16, "grad_norm": 0.5956478118896484, "learning_rate": 0.0005960062339684279, "loss": 3.564, "step": 3183 }, { "epoch": 0.16, "grad_norm": 0.6297171711921692, "learning_rate": 0.000596003729726186, "loss": 3.5353, "step": 3184 }, { "epoch": 0.16, "grad_norm": 0.5785334706306458, "learning_rate": 0.0005960012247043272, "loss": 3.7652, "step": 3185 }, { "epoch": 0.16, "grad_norm": 0.6340588331222534, "learning_rate": 0.0005959987189028582, "loss": 3.6217, "step": 3186 }, { "epoch": 0.16, "grad_norm": 0.5726503133773804, "learning_rate": 0.0005959962123217855, "loss": 3.585, "step": 3187 }, { "epoch": 0.16, "grad_norm": 0.5697149038314819, "learning_rate": 0.0005959937049611156, "loss": 3.4602, "step": 3188 }, { "epoch": 0.16, "grad_norm": 0.5911883115768433, "learning_rate": 0.0005959911968208554, "loss": 3.4454, "step": 3189 }, { "epoch": 0.16, "grad_norm": 0.579783022403717, "learning_rate": 0.0005959886879010113, "loss": 3.6873, "step": 3190 }, { "epoch": 0.16, "grad_norm": 0.5348889827728271, "learning_rate": 0.00059598617820159, "loss": 3.8555, "step": 3191 }, { "epoch": 0.16, "grad_norm": 0.5788812637329102, "learning_rate": 0.000595983667722598, "loss": 3.634, "step": 3192 }, { "epoch": 0.16, "grad_norm": 0.5206723809242249, "learning_rate": 0.0005959811564640421, "loss": 3.3991, "step": 3193 }, { "epoch": 0.16, "grad_norm": 0.5956487059593201, "learning_rate": 0.0005959786444259287, "loss": 3.4554, "step": 3194 }, { "epoch": 0.16, "grad_norm": 0.5612884759902954, "learning_rate": 0.0005959761316082646, "loss": 3.6328, "step": 3195 }, { "epoch": 0.16, "grad_norm": 0.5556085705757141, "learning_rate": 0.0005959736180110563, "loss": 3.6107, "step": 3196 }, { "epoch": 0.16, "grad_norm": 0.5669443607330322, "learning_rate": 0.0005959711036343104, "loss": 3.5104, "step": 3197 }, { "epoch": 0.16, "grad_norm": 0.5675809383392334, "learning_rate": 0.0005959685884780337, "loss": 3.5678, "step": 3198 }, { "epoch": 0.16, "grad_norm": 0.605646014213562, "learning_rate": 0.0005959660725422325, "loss": 3.7298, "step": 3199 }, { "epoch": 0.16, "grad_norm": 0.5824044942855835, "learning_rate": 0.0005959635558269137, "loss": 3.416, "step": 3200 }, { "epoch": 0.16, "grad_norm": 0.5763425230979919, "learning_rate": 0.0005959610383320839, "loss": 3.6143, "step": 3201 }, { "epoch": 0.16, "grad_norm": 0.5799658894538879, "learning_rate": 0.0005959585200577497, "loss": 3.5673, "step": 3202 }, { "epoch": 0.16, "grad_norm": 0.5830967426300049, "learning_rate": 0.0005959560010039177, "loss": 3.5561, "step": 3203 }, { "epoch": 0.16, "grad_norm": 0.5466955304145813, "learning_rate": 0.0005959534811705946, "loss": 3.6316, "step": 3204 }, { "epoch": 0.16, "grad_norm": 0.6080002784729004, "learning_rate": 0.0005959509605577869, "loss": 3.6567, "step": 3205 }, { "epoch": 0.16, "grad_norm": 0.5686078071594238, "learning_rate": 0.0005959484391655013, "loss": 3.5206, "step": 3206 }, { "epoch": 0.16, "grad_norm": 0.6283111572265625, "learning_rate": 0.0005959459169937445, "loss": 3.669, "step": 3207 }, { "epoch": 0.16, "grad_norm": 0.551060140132904, "learning_rate": 0.0005959433940425231, "loss": 3.5432, "step": 3208 }, { "epoch": 0.16, "grad_norm": 0.5849263668060303, "learning_rate": 0.0005959408703118437, "loss": 3.463, "step": 3209 }, { "epoch": 0.16, "grad_norm": 0.5780369639396667, "learning_rate": 0.0005959383458017133, "loss": 3.4947, "step": 3210 }, { "epoch": 0.16, "grad_norm": 0.6128699779510498, "learning_rate": 0.000595935820512138, "loss": 3.3344, "step": 3211 }, { "epoch": 0.16, "grad_norm": 0.5325568318367004, "learning_rate": 0.0005959332944431246, "loss": 3.5697, "step": 3212 }, { "epoch": 0.16, "grad_norm": 0.6106698513031006, "learning_rate": 0.00059593076759468, "loss": 3.4766, "step": 3213 }, { "epoch": 0.16, "grad_norm": 0.6008756160736084, "learning_rate": 0.0005959282399668108, "loss": 3.5924, "step": 3214 }, { "epoch": 0.16, "grad_norm": 0.5620996952056885, "learning_rate": 0.0005959257115595235, "loss": 3.548, "step": 3215 }, { "epoch": 0.16, "grad_norm": 0.6242149472236633, "learning_rate": 0.0005959231823728247, "loss": 3.6272, "step": 3216 }, { "epoch": 0.16, "grad_norm": 0.6121542453765869, "learning_rate": 0.0005959206524067213, "loss": 3.6008, "step": 3217 }, { "epoch": 0.16, "grad_norm": 0.6612833142280579, "learning_rate": 0.0005959181216612199, "loss": 3.3679, "step": 3218 }, { "epoch": 0.16, "grad_norm": 0.6306580305099487, "learning_rate": 0.000595915590136327, "loss": 3.6513, "step": 3219 }, { "epoch": 0.16, "grad_norm": 0.615294337272644, "learning_rate": 0.0005959130578320495, "loss": 3.4633, "step": 3220 }, { "epoch": 0.16, "grad_norm": 0.5863372683525085, "learning_rate": 0.0005959105247483939, "loss": 3.6524, "step": 3221 }, { "epoch": 0.16, "grad_norm": 0.702987551689148, "learning_rate": 0.0005959079908853669, "loss": 3.7698, "step": 3222 }, { "epoch": 0.16, "grad_norm": 0.6178707480430603, "learning_rate": 0.0005959054562429753, "loss": 3.5101, "step": 3223 }, { "epoch": 0.16, "grad_norm": 0.5774521231651306, "learning_rate": 0.0005959029208212255, "loss": 3.5955, "step": 3224 }, { "epoch": 0.16, "grad_norm": 0.587511420249939, "learning_rate": 0.0005959003846201245, "loss": 3.5713, "step": 3225 }, { "epoch": 0.16, "grad_norm": 0.5820519924163818, "learning_rate": 0.0005958978476396788, "loss": 3.1941, "step": 3226 }, { "epoch": 0.16, "grad_norm": 0.5678663849830627, "learning_rate": 0.0005958953098798952, "loss": 3.6527, "step": 3227 }, { "epoch": 0.16, "grad_norm": 0.547321617603302, "learning_rate": 0.0005958927713407801, "loss": 3.5103, "step": 3228 }, { "epoch": 0.16, "grad_norm": 0.5541772246360779, "learning_rate": 0.0005958902320223405, "loss": 3.4871, "step": 3229 }, { "epoch": 0.16, "grad_norm": 0.5748250484466553, "learning_rate": 0.000595887691924583, "loss": 3.4653, "step": 3230 }, { "epoch": 0.16, "grad_norm": 0.6465830206871033, "learning_rate": 0.0005958851510475142, "loss": 3.5919, "step": 3231 }, { "epoch": 0.16, "grad_norm": 0.5622817277908325, "learning_rate": 0.000595882609391141, "loss": 3.6361, "step": 3232 }, { "epoch": 0.16, "grad_norm": 0.5414732098579407, "learning_rate": 0.0005958800669554698, "loss": 3.797, "step": 3233 }, { "epoch": 0.16, "grad_norm": 0.5994971990585327, "learning_rate": 0.0005958775237405074, "loss": 3.5292, "step": 3234 }, { "epoch": 0.16, "grad_norm": 0.6444474458694458, "learning_rate": 0.0005958749797462607, "loss": 3.5301, "step": 3235 }, { "epoch": 0.16, "grad_norm": 0.5784667730331421, "learning_rate": 0.0005958724349727362, "loss": 3.4053, "step": 3236 }, { "epoch": 0.16, "grad_norm": 0.5597053170204163, "learning_rate": 0.0005958698894199406, "loss": 3.5452, "step": 3237 }, { "epoch": 0.16, "grad_norm": 0.5689312815666199, "learning_rate": 0.0005958673430878807, "loss": 3.6081, "step": 3238 }, { "epoch": 0.16, "grad_norm": 0.5763968825340271, "learning_rate": 0.0005958647959765632, "loss": 3.3315, "step": 3239 }, { "epoch": 0.16, "grad_norm": 0.5735750794410706, "learning_rate": 0.0005958622480859947, "loss": 3.5241, "step": 3240 }, { "epoch": 0.16, "grad_norm": 0.5359008312225342, "learning_rate": 0.0005958596994161819, "loss": 3.7146, "step": 3241 }, { "epoch": 0.16, "grad_norm": 0.6043967008590698, "learning_rate": 0.0005958571499671318, "loss": 3.6643, "step": 3242 }, { "epoch": 0.16, "grad_norm": 0.588692843914032, "learning_rate": 0.0005958545997388506, "loss": 3.6953, "step": 3243 }, { "epoch": 0.16, "grad_norm": 0.5518408417701721, "learning_rate": 0.0005958520487313456, "loss": 3.49, "step": 3244 }, { "epoch": 0.16, "grad_norm": 0.6246479153633118, "learning_rate": 0.0005958494969446231, "loss": 3.5562, "step": 3245 }, { "epoch": 0.16, "grad_norm": 0.6223381757736206, "learning_rate": 0.0005958469443786901, "loss": 3.5703, "step": 3246 }, { "epoch": 0.16, "grad_norm": 0.5942324995994568, "learning_rate": 0.0005958443910335531, "loss": 3.4527, "step": 3247 }, { "epoch": 0.16, "grad_norm": 0.5596261620521545, "learning_rate": 0.0005958418369092189, "loss": 3.63, "step": 3248 }, { "epoch": 0.16, "grad_norm": 0.5686338543891907, "learning_rate": 0.0005958392820056942, "loss": 3.5783, "step": 3249 }, { "epoch": 0.16, "grad_norm": 0.5892527103424072, "learning_rate": 0.0005958367263229859, "loss": 3.5206, "step": 3250 }, { "epoch": 0.16, "grad_norm": 0.5530284643173218, "learning_rate": 0.0005958341698611004, "loss": 3.7264, "step": 3251 }, { "epoch": 0.16, "grad_norm": 0.5221811532974243, "learning_rate": 0.0005958316126200448, "loss": 3.5932, "step": 3252 }, { "epoch": 0.16, "grad_norm": 0.5697766542434692, "learning_rate": 0.0005958290545998255, "loss": 3.6294, "step": 3253 }, { "epoch": 0.16, "grad_norm": 0.5952369570732117, "learning_rate": 0.0005958264958004496, "loss": 3.5551, "step": 3254 }, { "epoch": 0.16, "grad_norm": 0.5782142281532288, "learning_rate": 0.0005958239362219235, "loss": 3.4631, "step": 3255 }, { "epoch": 0.16, "grad_norm": 0.5828744173049927, "learning_rate": 0.0005958213758642542, "loss": 3.5134, "step": 3256 }, { "epoch": 0.16, "grad_norm": 0.5539057850837708, "learning_rate": 0.0005958188147274483, "loss": 3.3305, "step": 3257 }, { "epoch": 0.16, "grad_norm": 0.6173965930938721, "learning_rate": 0.0005958162528115125, "loss": 3.7218, "step": 3258 }, { "epoch": 0.16, "grad_norm": 0.5583602786064148, "learning_rate": 0.0005958136901164537, "loss": 3.6743, "step": 3259 }, { "epoch": 0.16, "grad_norm": 0.5693215131759644, "learning_rate": 0.0005958111266422785, "loss": 3.5812, "step": 3260 }, { "epoch": 0.16, "grad_norm": 0.5246009826660156, "learning_rate": 0.0005958085623889937, "loss": 3.6991, "step": 3261 }, { "epoch": 0.16, "grad_norm": 0.5803536772727966, "learning_rate": 0.0005958059973566062, "loss": 3.7107, "step": 3262 }, { "epoch": 0.16, "grad_norm": 0.5437319874763489, "learning_rate": 0.0005958034315451224, "loss": 3.7399, "step": 3263 }, { "epoch": 0.16, "grad_norm": 0.5739423036575317, "learning_rate": 0.0005958008649545496, "loss": 3.522, "step": 3264 }, { "epoch": 0.16, "grad_norm": 0.6183177828788757, "learning_rate": 0.000595798297584894, "loss": 3.4463, "step": 3265 }, { "epoch": 0.16, "grad_norm": 0.521632194519043, "learning_rate": 0.0005957957294361628, "loss": 3.57, "step": 3266 }, { "epoch": 0.16, "grad_norm": 0.5398718118667603, "learning_rate": 0.0005957931605083624, "loss": 3.5119, "step": 3267 }, { "epoch": 0.16, "grad_norm": 0.556581437587738, "learning_rate": 0.0005957905908014999, "loss": 3.6029, "step": 3268 }, { "epoch": 0.16, "grad_norm": 0.5454385876655579, "learning_rate": 0.0005957880203155818, "loss": 3.4989, "step": 3269 }, { "epoch": 0.16, "grad_norm": 0.5625463128089905, "learning_rate": 0.000595785449050615, "loss": 3.6914, "step": 3270 }, { "epoch": 0.16, "grad_norm": 0.538007915019989, "learning_rate": 0.0005957828770066063, "loss": 3.5866, "step": 3271 }, { "epoch": 0.16, "grad_norm": 0.5634467601776123, "learning_rate": 0.0005957803041835623, "loss": 3.8559, "step": 3272 }, { "epoch": 0.16, "grad_norm": 0.5901991128921509, "learning_rate": 0.0005957777305814901, "loss": 3.4086, "step": 3273 }, { "epoch": 0.16, "grad_norm": 0.582669734954834, "learning_rate": 0.0005957751562003961, "loss": 3.506, "step": 3274 }, { "epoch": 0.16, "grad_norm": 0.5981149673461914, "learning_rate": 0.0005957725810402874, "loss": 3.717, "step": 3275 }, { "epoch": 0.16, "grad_norm": 0.6032350063323975, "learning_rate": 0.0005957700051011706, "loss": 3.6699, "step": 3276 }, { "epoch": 0.16, "grad_norm": 0.5754993557929993, "learning_rate": 0.0005957674283830525, "loss": 3.5732, "step": 3277 }, { "epoch": 0.16, "grad_norm": 0.6205776929855347, "learning_rate": 0.00059576485088594, "loss": 3.6692, "step": 3278 }, { "epoch": 0.16, "grad_norm": 0.5795102119445801, "learning_rate": 0.0005957622726098397, "loss": 3.574, "step": 3279 }, { "epoch": 0.16, "grad_norm": 0.547251284122467, "learning_rate": 0.0005957596935547586, "loss": 3.7679, "step": 3280 }, { "epoch": 0.16, "grad_norm": 0.5527950525283813, "learning_rate": 0.0005957571137207032, "loss": 3.4746, "step": 3281 }, { "epoch": 0.16, "grad_norm": 0.5651281476020813, "learning_rate": 0.0005957545331076806, "loss": 3.5383, "step": 3282 }, { "epoch": 0.16, "grad_norm": 0.599833071231842, "learning_rate": 0.0005957519517156975, "loss": 3.5415, "step": 3283 }, { "epoch": 0.16, "grad_norm": 0.6276541352272034, "learning_rate": 0.0005957493695447607, "loss": 3.5067, "step": 3284 }, { "epoch": 0.16, "grad_norm": 0.5757126212120056, "learning_rate": 0.0005957467865948769, "loss": 3.6089, "step": 3285 }, { "epoch": 0.16, "grad_norm": 0.5513081550598145, "learning_rate": 0.000595744202866053, "loss": 3.6373, "step": 3286 }, { "epoch": 0.16, "grad_norm": 0.5885028839111328, "learning_rate": 0.0005957416183582958, "loss": 3.7294, "step": 3287 }, { "epoch": 0.16, "grad_norm": 0.5601800084114075, "learning_rate": 0.0005957390330716121, "loss": 3.5964, "step": 3288 }, { "epoch": 0.16, "grad_norm": 0.6039920449256897, "learning_rate": 0.0005957364470060087, "loss": 3.3979, "step": 3289 }, { "epoch": 0.16, "grad_norm": 0.5872266888618469, "learning_rate": 0.0005957338601614924, "loss": 3.6418, "step": 3290 }, { "epoch": 0.16, "grad_norm": 0.5281251072883606, "learning_rate": 0.0005957312725380701, "loss": 3.5281, "step": 3291 }, { "epoch": 0.16, "grad_norm": 0.5793739557266235, "learning_rate": 0.0005957286841357485, "loss": 3.6252, "step": 3292 }, { "epoch": 0.16, "grad_norm": 0.6035564541816711, "learning_rate": 0.0005957260949545345, "loss": 3.4221, "step": 3293 }, { "epoch": 0.16, "grad_norm": 0.5742641687393188, "learning_rate": 0.0005957235049944347, "loss": 3.7449, "step": 3294 }, { "epoch": 0.16, "grad_norm": 0.5799552202224731, "learning_rate": 0.0005957209142554563, "loss": 3.7006, "step": 3295 }, { "epoch": 0.16, "grad_norm": 0.5929350256919861, "learning_rate": 0.0005957183227376058, "loss": 3.5188, "step": 3296 }, { "epoch": 0.16, "grad_norm": 0.5783431529998779, "learning_rate": 0.0005957157304408902, "loss": 3.4988, "step": 3297 }, { "epoch": 0.16, "grad_norm": 0.6081163883209229, "learning_rate": 0.0005957131373653163, "loss": 3.7293, "step": 3298 }, { "epoch": 0.16, "grad_norm": 0.5771412253379822, "learning_rate": 0.0005957105435108909, "loss": 3.5091, "step": 3299 }, { "epoch": 0.16, "grad_norm": 0.5864212512969971, "learning_rate": 0.0005957079488776208, "loss": 3.6186, "step": 3300 }, { "epoch": 0.16, "grad_norm": 0.5488468408584595, "learning_rate": 0.0005957053534655129, "loss": 3.617, "step": 3301 }, { "epoch": 0.16, "grad_norm": 0.5773611068725586, "learning_rate": 0.000595702757274574, "loss": 3.6083, "step": 3302 }, { "epoch": 0.16, "grad_norm": 0.5988139510154724, "learning_rate": 0.0005957001603048109, "loss": 3.4651, "step": 3303 }, { "epoch": 0.16, "grad_norm": 0.5818193554878235, "learning_rate": 0.0005956975625562305, "loss": 3.3751, "step": 3304 }, { "epoch": 0.16, "grad_norm": 0.5249636173248291, "learning_rate": 0.0005956949640288397, "loss": 3.6246, "step": 3305 }, { "epoch": 0.16, "grad_norm": 0.5715710520744324, "learning_rate": 0.0005956923647226453, "loss": 3.5151, "step": 3306 }, { "epoch": 0.16, "grad_norm": 0.5765474438667297, "learning_rate": 0.000595689764637654, "loss": 3.4035, "step": 3307 }, { "epoch": 0.16, "grad_norm": 0.6147971153259277, "learning_rate": 0.0005956871637738728, "loss": 3.6643, "step": 3308 }, { "epoch": 0.16, "grad_norm": 0.6140459775924683, "learning_rate": 0.0005956845621313086, "loss": 3.3888, "step": 3309 }, { "epoch": 0.16, "grad_norm": 0.6852372884750366, "learning_rate": 0.0005956819597099681, "loss": 3.9077, "step": 3310 }, { "epoch": 0.16, "grad_norm": 0.5585691928863525, "learning_rate": 0.0005956793565098582, "loss": 3.4769, "step": 3311 }, { "epoch": 0.16, "grad_norm": 0.5834951996803284, "learning_rate": 0.0005956767525309858, "loss": 3.588, "step": 3312 }, { "epoch": 0.16, "grad_norm": 0.6259799599647522, "learning_rate": 0.0005956741477733578, "loss": 3.5964, "step": 3313 }, { "epoch": 0.16, "grad_norm": 0.5601211190223694, "learning_rate": 0.0005956715422369809, "loss": 3.5845, "step": 3314 }, { "epoch": 0.16, "grad_norm": 0.5854201316833496, "learning_rate": 0.0005956689359218621, "loss": 3.4226, "step": 3315 }, { "epoch": 0.16, "grad_norm": 0.5881748199462891, "learning_rate": 0.0005956663288280082, "loss": 3.4351, "step": 3316 }, { "epoch": 0.16, "grad_norm": 0.6546245813369751, "learning_rate": 0.0005956637209554262, "loss": 3.44, "step": 3317 }, { "epoch": 0.16, "grad_norm": 0.5833501815795898, "learning_rate": 0.0005956611123041229, "loss": 3.6924, "step": 3318 }, { "epoch": 0.16, "grad_norm": 0.5682247877120972, "learning_rate": 0.0005956585028741049, "loss": 3.7348, "step": 3319 }, { "epoch": 0.16, "grad_norm": 0.6428276896476746, "learning_rate": 0.0005956558926653794, "loss": 3.3296, "step": 3320 }, { "epoch": 0.16, "grad_norm": 0.5545470714569092, "learning_rate": 0.0005956532816779532, "loss": 3.6842, "step": 3321 }, { "epoch": 0.16, "grad_norm": 0.5805429220199585, "learning_rate": 0.0005956506699118331, "loss": 3.3973, "step": 3322 }, { "epoch": 0.16, "grad_norm": 0.5832202434539795, "learning_rate": 0.0005956480573670261, "loss": 3.699, "step": 3323 }, { "epoch": 0.16, "grad_norm": 0.5636213421821594, "learning_rate": 0.000595645444043539, "loss": 3.647, "step": 3324 }, { "epoch": 0.16, "grad_norm": 0.599785327911377, "learning_rate": 0.0005956428299413788, "loss": 3.5346, "step": 3325 }, { "epoch": 0.16, "grad_norm": 0.5496143698692322, "learning_rate": 0.0005956402150605522, "loss": 3.6491, "step": 3326 }, { "epoch": 0.16, "grad_norm": 0.5501871109008789, "learning_rate": 0.0005956375994010661, "loss": 3.5646, "step": 3327 }, { "epoch": 0.16, "grad_norm": 0.6170458793640137, "learning_rate": 0.0005956349829629276, "loss": 3.5171, "step": 3328 }, { "epoch": 0.16, "grad_norm": 0.554207980632782, "learning_rate": 0.0005956323657461434, "loss": 3.4183, "step": 3329 }, { "epoch": 0.16, "grad_norm": 0.6159272193908691, "learning_rate": 0.0005956297477507204, "loss": 3.5173, "step": 3330 }, { "epoch": 0.16, "grad_norm": 0.5841159224510193, "learning_rate": 0.0005956271289766654, "loss": 3.5016, "step": 3331 }, { "epoch": 0.16, "grad_norm": 0.5938801169395447, "learning_rate": 0.0005956245094239857, "loss": 3.557, "step": 3332 }, { "epoch": 0.16, "grad_norm": 0.5655121207237244, "learning_rate": 0.0005956218890926878, "loss": 3.5488, "step": 3333 }, { "epoch": 0.16, "grad_norm": 0.581397294998169, "learning_rate": 0.0005956192679827788, "loss": 3.6558, "step": 3334 }, { "epoch": 0.16, "grad_norm": 0.5534145832061768, "learning_rate": 0.0005956166460942654, "loss": 3.6431, "step": 3335 }, { "epoch": 0.16, "grad_norm": 0.6180419325828552, "learning_rate": 0.0005956140234271549, "loss": 3.5917, "step": 3336 }, { "epoch": 0.16, "grad_norm": 0.5950748324394226, "learning_rate": 0.0005956113999814537, "loss": 3.5197, "step": 3337 }, { "epoch": 0.16, "grad_norm": 0.5673226118087769, "learning_rate": 0.0005956087757571691, "loss": 3.465, "step": 3338 }, { "epoch": 0.16, "grad_norm": 0.5869221091270447, "learning_rate": 0.0005956061507543078, "loss": 3.5244, "step": 3339 }, { "epoch": 0.16, "grad_norm": 0.6073868870735168, "learning_rate": 0.0005956035249728769, "loss": 3.4927, "step": 3340 }, { "epoch": 0.16, "grad_norm": 0.547737181186676, "learning_rate": 0.0005956008984128831, "loss": 3.4597, "step": 3341 }, { "epoch": 0.16, "grad_norm": 0.5933374166488647, "learning_rate": 0.0005955982710743336, "loss": 3.5967, "step": 3342 }, { "epoch": 0.16, "grad_norm": 0.564926266670227, "learning_rate": 0.000595595642957235, "loss": 3.5223, "step": 3343 }, { "epoch": 0.16, "grad_norm": 0.625313937664032, "learning_rate": 0.0005955930140615943, "loss": 3.4885, "step": 3344 }, { "epoch": 0.16, "grad_norm": 0.6144617199897766, "learning_rate": 0.0005955903843874185, "loss": 3.3671, "step": 3345 }, { "epoch": 0.16, "grad_norm": 0.6368928551673889, "learning_rate": 0.0005955877539347146, "loss": 3.5404, "step": 3346 }, { "epoch": 0.16, "grad_norm": 0.5616483688354492, "learning_rate": 0.0005955851227034894, "loss": 3.5257, "step": 3347 }, { "epoch": 0.16, "grad_norm": 0.5560274720191956, "learning_rate": 0.00059558249069375, "loss": 3.5856, "step": 3348 }, { "epoch": 0.16, "grad_norm": 0.6093305945396423, "learning_rate": 0.0005955798579055031, "loss": 3.6487, "step": 3349 }, { "epoch": 0.16, "grad_norm": 0.5649324655532837, "learning_rate": 0.0005955772243387556, "loss": 3.5062, "step": 3350 }, { "epoch": 0.16, "grad_norm": 0.5727810859680176, "learning_rate": 0.0005955745899935147, "loss": 3.2842, "step": 3351 }, { "epoch": 0.16, "grad_norm": 0.6266840100288391, "learning_rate": 0.0005955719548697873, "loss": 3.7879, "step": 3352 }, { "epoch": 0.16, "grad_norm": 0.6102510690689087, "learning_rate": 0.0005955693189675802, "loss": 3.71, "step": 3353 }, { "epoch": 0.16, "grad_norm": 0.5587088465690613, "learning_rate": 0.0005955666822869004, "loss": 3.4238, "step": 3354 }, { "epoch": 0.16, "grad_norm": 0.617514431476593, "learning_rate": 0.0005955640448277548, "loss": 3.6391, "step": 3355 }, { "epoch": 0.16, "grad_norm": 0.5572774410247803, "learning_rate": 0.0005955614065901504, "loss": 3.6646, "step": 3356 }, { "epoch": 0.16, "grad_norm": 0.598943829536438, "learning_rate": 0.0005955587675740942, "loss": 3.5632, "step": 3357 }, { "epoch": 0.16, "grad_norm": 0.6389573812484741, "learning_rate": 0.000595556127779593, "loss": 3.4648, "step": 3358 }, { "epoch": 0.16, "grad_norm": 0.5725786685943604, "learning_rate": 0.0005955534872066538, "loss": 3.387, "step": 3359 }, { "epoch": 0.16, "grad_norm": 0.5625653862953186, "learning_rate": 0.0005955508458552837, "loss": 3.6279, "step": 3360 }, { "epoch": 0.16, "grad_norm": 0.5734360814094543, "learning_rate": 0.0005955482037254895, "loss": 3.5787, "step": 3361 }, { "epoch": 0.16, "grad_norm": 0.5375578999519348, "learning_rate": 0.0005955455608172783, "loss": 3.7722, "step": 3362 }, { "epoch": 0.16, "grad_norm": 0.6041198968887329, "learning_rate": 0.0005955429171306569, "loss": 3.5613, "step": 3363 }, { "epoch": 0.16, "grad_norm": 0.5527943968772888, "learning_rate": 0.0005955402726656323, "loss": 3.4843, "step": 3364 }, { "epoch": 0.16, "grad_norm": 0.528593897819519, "learning_rate": 0.0005955376274222116, "loss": 3.5478, "step": 3365 }, { "epoch": 0.16, "grad_norm": 0.5430445671081543, "learning_rate": 0.0005955349814004016, "loss": 3.5002, "step": 3366 }, { "epoch": 0.17, "grad_norm": 0.5947743654251099, "learning_rate": 0.0005955323346002094, "loss": 3.454, "step": 3367 }, { "epoch": 0.17, "grad_norm": 0.5306910872459412, "learning_rate": 0.0005955296870216419, "loss": 3.5488, "step": 3368 }, { "epoch": 0.17, "grad_norm": 0.5531356930732727, "learning_rate": 0.000595527038664706, "loss": 3.7194, "step": 3369 }, { "epoch": 0.17, "grad_norm": 0.601188600063324, "learning_rate": 0.0005955243895294087, "loss": 3.5034, "step": 3370 }, { "epoch": 0.17, "grad_norm": 0.5490877628326416, "learning_rate": 0.0005955217396157571, "loss": 3.8795, "step": 3371 }, { "epoch": 0.17, "grad_norm": 0.5771283507347107, "learning_rate": 0.0005955190889237581, "loss": 3.66, "step": 3372 }, { "epoch": 0.17, "grad_norm": 0.5651893019676208, "learning_rate": 0.0005955164374534187, "loss": 3.3696, "step": 3373 }, { "epoch": 0.17, "grad_norm": 0.5814728140830994, "learning_rate": 0.0005955137852047459, "loss": 3.6069, "step": 3374 }, { "epoch": 0.17, "grad_norm": 0.5790256261825562, "learning_rate": 0.0005955111321777467, "loss": 3.4807, "step": 3375 }, { "epoch": 0.17, "grad_norm": 0.5738055109977722, "learning_rate": 0.0005955084783724281, "loss": 3.4382, "step": 3376 }, { "epoch": 0.17, "grad_norm": 0.5678678154945374, "learning_rate": 0.0005955058237887968, "loss": 3.3348, "step": 3377 }, { "epoch": 0.17, "grad_norm": 0.5809141397476196, "learning_rate": 0.0005955031684268603, "loss": 3.5302, "step": 3378 }, { "epoch": 0.17, "grad_norm": 0.5668545961380005, "learning_rate": 0.0005955005122866251, "loss": 3.437, "step": 3379 }, { "epoch": 0.17, "grad_norm": 0.5487068891525269, "learning_rate": 0.0005954978553680987, "loss": 3.482, "step": 3380 }, { "epoch": 0.17, "grad_norm": 0.5972830653190613, "learning_rate": 0.0005954951976712876, "loss": 3.5068, "step": 3381 }, { "epoch": 0.17, "grad_norm": 0.5785419940948486, "learning_rate": 0.0005954925391961991, "loss": 3.4964, "step": 3382 }, { "epoch": 0.17, "grad_norm": 0.5430408120155334, "learning_rate": 0.00059548987994284, "loss": 3.535, "step": 3383 }, { "epoch": 0.17, "grad_norm": 0.5556647181510925, "learning_rate": 0.0005954872199112175, "loss": 3.589, "step": 3384 }, { "epoch": 0.17, "grad_norm": 0.6139194369316101, "learning_rate": 0.0005954845591013385, "loss": 3.4409, "step": 3385 }, { "epoch": 0.17, "grad_norm": 0.5702038407325745, "learning_rate": 0.0005954818975132102, "loss": 3.5252, "step": 3386 }, { "epoch": 0.17, "grad_norm": 0.5292709469795227, "learning_rate": 0.0005954792351468393, "loss": 3.7446, "step": 3387 }, { "epoch": 0.17, "grad_norm": 0.5479516983032227, "learning_rate": 0.000595476572002233, "loss": 3.789, "step": 3388 }, { "epoch": 0.17, "grad_norm": 0.5670396089553833, "learning_rate": 0.0005954739080793983, "loss": 3.5434, "step": 3389 }, { "epoch": 0.17, "grad_norm": 0.5650784969329834, "learning_rate": 0.0005954712433783421, "loss": 3.4521, "step": 3390 }, { "epoch": 0.17, "grad_norm": 0.555449366569519, "learning_rate": 0.0005954685778990715, "loss": 3.6411, "step": 3391 }, { "epoch": 0.17, "grad_norm": 0.5617675185203552, "learning_rate": 0.0005954659116415936, "loss": 3.5113, "step": 3392 }, { "epoch": 0.17, "grad_norm": 0.545922040939331, "learning_rate": 0.0005954632446059153, "loss": 3.6585, "step": 3393 }, { "epoch": 0.17, "grad_norm": 0.562699019908905, "learning_rate": 0.0005954605767920437, "loss": 3.5584, "step": 3394 }, { "epoch": 0.17, "grad_norm": 0.5851036310195923, "learning_rate": 0.0005954579081999859, "loss": 3.3386, "step": 3395 }, { "epoch": 0.17, "grad_norm": 0.6002635359764099, "learning_rate": 0.0005954552388297488, "loss": 3.6444, "step": 3396 }, { "epoch": 0.17, "grad_norm": 0.5689345002174377, "learning_rate": 0.0005954525686813394, "loss": 3.5515, "step": 3397 }, { "epoch": 0.17, "grad_norm": 0.5846789479255676, "learning_rate": 0.0005954498977547648, "loss": 3.5281, "step": 3398 }, { "epoch": 0.17, "grad_norm": 0.5559812188148499, "learning_rate": 0.0005954472260500321, "loss": 3.4974, "step": 3399 }, { "epoch": 0.17, "grad_norm": 0.6159769296646118, "learning_rate": 0.0005954445535671482, "loss": 3.4743, "step": 3400 }, { "epoch": 0.17, "grad_norm": 0.6169503927230835, "learning_rate": 0.0005954418803061201, "loss": 3.3966, "step": 3401 }, { "epoch": 0.17, "grad_norm": 0.5606275796890259, "learning_rate": 0.0005954392062669551, "loss": 3.6091, "step": 3402 }, { "epoch": 0.17, "grad_norm": 0.5616119503974915, "learning_rate": 0.0005954365314496602, "loss": 3.3921, "step": 3403 }, { "epoch": 0.17, "grad_norm": 0.5786251425743103, "learning_rate": 0.0005954338558542421, "loss": 3.5837, "step": 3404 }, { "epoch": 0.17, "grad_norm": 0.645964503288269, "learning_rate": 0.0005954311794807082, "loss": 3.5519, "step": 3405 }, { "epoch": 0.17, "grad_norm": 0.6374315619468689, "learning_rate": 0.0005954285023290653, "loss": 3.6006, "step": 3406 }, { "epoch": 0.17, "grad_norm": 0.6593900322914124, "learning_rate": 0.0005954258243993207, "loss": 3.7227, "step": 3407 }, { "epoch": 0.17, "grad_norm": 0.5792526602745056, "learning_rate": 0.0005954231456914813, "loss": 3.665, "step": 3408 }, { "epoch": 0.17, "grad_norm": 0.5797976851463318, "learning_rate": 0.0005954204662055542, "loss": 3.3409, "step": 3409 }, { "epoch": 0.17, "grad_norm": 0.5604798197746277, "learning_rate": 0.0005954177859415465, "loss": 3.416, "step": 3410 }, { "epoch": 0.17, "grad_norm": 0.5253710150718689, "learning_rate": 0.0005954151048994652, "loss": 3.5158, "step": 3411 }, { "epoch": 0.17, "grad_norm": 0.5831363797187805, "learning_rate": 0.0005954124230793173, "loss": 3.4324, "step": 3412 }, { "epoch": 0.17, "grad_norm": 0.592633843421936, "learning_rate": 0.00059540974048111, "loss": 3.6425, "step": 3413 }, { "epoch": 0.17, "grad_norm": 0.5312915444374084, "learning_rate": 0.0005954070571048503, "loss": 3.5972, "step": 3414 }, { "epoch": 0.17, "grad_norm": 0.5762085914611816, "learning_rate": 0.0005954043729505452, "loss": 3.6361, "step": 3415 }, { "epoch": 0.17, "grad_norm": 0.5579561591148376, "learning_rate": 0.0005954016880182018, "loss": 3.5233, "step": 3416 }, { "epoch": 0.17, "grad_norm": 0.5626066327095032, "learning_rate": 0.0005953990023078273, "loss": 3.7441, "step": 3417 }, { "epoch": 0.17, "grad_norm": 0.5891571044921875, "learning_rate": 0.0005953963158194285, "loss": 3.7897, "step": 3418 }, { "epoch": 0.17, "grad_norm": 0.5446343421936035, "learning_rate": 0.0005953936285530129, "loss": 3.5544, "step": 3419 }, { "epoch": 0.17, "grad_norm": 0.6051816940307617, "learning_rate": 0.0005953909405085872, "loss": 3.5061, "step": 3420 }, { "epoch": 0.17, "grad_norm": 0.5892947316169739, "learning_rate": 0.0005953882516861586, "loss": 3.5149, "step": 3421 }, { "epoch": 0.17, "grad_norm": 0.6093165874481201, "learning_rate": 0.0005953855620857342, "loss": 3.8573, "step": 3422 }, { "epoch": 0.17, "grad_norm": 0.5854846239089966, "learning_rate": 0.0005953828717073212, "loss": 3.3395, "step": 3423 }, { "epoch": 0.17, "grad_norm": 0.5802544951438904, "learning_rate": 0.0005953801805509264, "loss": 3.4692, "step": 3424 }, { "epoch": 0.17, "grad_norm": 0.5809839367866516, "learning_rate": 0.0005953774886165572, "loss": 3.5708, "step": 3425 }, { "epoch": 0.17, "grad_norm": 0.602876603603363, "learning_rate": 0.0005953747959042204, "loss": 3.4044, "step": 3426 }, { "epoch": 0.17, "grad_norm": 0.6141166090965271, "learning_rate": 0.0005953721024139233, "loss": 3.3439, "step": 3427 }, { "epoch": 0.17, "grad_norm": 0.5813528895378113, "learning_rate": 0.000595369408145673, "loss": 3.4252, "step": 3428 }, { "epoch": 0.17, "grad_norm": 0.5566039085388184, "learning_rate": 0.0005953667130994763, "loss": 3.4517, "step": 3429 }, { "epoch": 0.17, "grad_norm": 0.5614877939224243, "learning_rate": 0.0005953640172753407, "loss": 3.4067, "step": 3430 }, { "epoch": 0.17, "grad_norm": 0.5762594938278198, "learning_rate": 0.0005953613206732731, "loss": 3.4889, "step": 3431 }, { "epoch": 0.17, "grad_norm": 0.5990262031555176, "learning_rate": 0.0005953586232932806, "loss": 3.4092, "step": 3432 }, { "epoch": 0.17, "grad_norm": 0.5427011251449585, "learning_rate": 0.0005953559251353703, "loss": 3.6497, "step": 3433 }, { "epoch": 0.17, "grad_norm": 0.5713863968849182, "learning_rate": 0.0005953532261995494, "loss": 3.4972, "step": 3434 }, { "epoch": 0.17, "grad_norm": 0.6282929182052612, "learning_rate": 0.0005953505264858249, "loss": 3.4623, "step": 3435 }, { "epoch": 0.17, "grad_norm": 0.5442312359809875, "learning_rate": 0.0005953478259942041, "loss": 3.4691, "step": 3436 }, { "epoch": 0.17, "grad_norm": 0.5707518458366394, "learning_rate": 0.0005953451247246937, "loss": 3.7231, "step": 3437 }, { "epoch": 0.17, "grad_norm": 0.5848592519760132, "learning_rate": 0.0005953424226773013, "loss": 3.6997, "step": 3438 }, { "epoch": 0.17, "grad_norm": 0.5771193504333496, "learning_rate": 0.0005953397198520337, "loss": 3.7628, "step": 3439 }, { "epoch": 0.17, "grad_norm": 0.6253427863121033, "learning_rate": 0.0005953370162488981, "loss": 3.3619, "step": 3440 }, { "epoch": 0.17, "grad_norm": 0.6061219573020935, "learning_rate": 0.0005953343118679018, "loss": 3.6352, "step": 3441 }, { "epoch": 0.17, "grad_norm": 0.5454443693161011, "learning_rate": 0.0005953316067090516, "loss": 3.4859, "step": 3442 }, { "epoch": 0.17, "grad_norm": 0.617382287979126, "learning_rate": 0.0005953289007723548, "loss": 3.5843, "step": 3443 }, { "epoch": 0.17, "grad_norm": 0.5367785096168518, "learning_rate": 0.0005953261940578186, "loss": 3.4825, "step": 3444 }, { "epoch": 0.17, "grad_norm": 0.5802174210548401, "learning_rate": 0.00059532348656545, "loss": 3.4618, "step": 3445 }, { "epoch": 0.17, "grad_norm": 0.5566971302032471, "learning_rate": 0.000595320778295256, "loss": 3.5407, "step": 3446 }, { "epoch": 0.17, "grad_norm": 0.5934160351753235, "learning_rate": 0.0005953180692472441, "loss": 3.5885, "step": 3447 }, { "epoch": 0.17, "grad_norm": 0.5748066902160645, "learning_rate": 0.0005953153594214212, "loss": 3.8451, "step": 3448 }, { "epoch": 0.17, "grad_norm": 0.6022830009460449, "learning_rate": 0.0005953126488177946, "loss": 3.5088, "step": 3449 }, { "epoch": 0.17, "grad_norm": 0.6487731337547302, "learning_rate": 0.0005953099374363711, "loss": 3.4576, "step": 3450 }, { "epoch": 0.17, "grad_norm": 0.6260390877723694, "learning_rate": 0.0005953072252771581, "loss": 3.4172, "step": 3451 }, { "epoch": 0.17, "grad_norm": 0.5463585257530212, "learning_rate": 0.0005953045123401628, "loss": 3.6589, "step": 3452 }, { "epoch": 0.17, "grad_norm": 0.6311070322990417, "learning_rate": 0.0005953017986253922, "loss": 3.6357, "step": 3453 }, { "epoch": 0.17, "grad_norm": 0.5336967706680298, "learning_rate": 0.0005952990841328536, "loss": 3.4526, "step": 3454 }, { "epoch": 0.17, "grad_norm": 0.5945968627929688, "learning_rate": 0.0005952963688625538, "loss": 3.5846, "step": 3455 }, { "epoch": 0.17, "grad_norm": 0.5942106246948242, "learning_rate": 0.0005952936528145004, "loss": 3.5166, "step": 3456 }, { "epoch": 0.17, "grad_norm": 0.5890370607376099, "learning_rate": 0.0005952909359887002, "loss": 3.7746, "step": 3457 }, { "epoch": 0.17, "grad_norm": 0.5749627351760864, "learning_rate": 0.0005952882183851606, "loss": 3.4682, "step": 3458 }, { "epoch": 0.17, "grad_norm": 0.59616619348526, "learning_rate": 0.0005952855000038886, "loss": 3.7142, "step": 3459 }, { "epoch": 0.17, "grad_norm": 0.5930215716362, "learning_rate": 0.0005952827808448916, "loss": 3.7, "step": 3460 }, { "epoch": 0.17, "grad_norm": 0.564595103263855, "learning_rate": 0.0005952800609081764, "loss": 3.5487, "step": 3461 }, { "epoch": 0.17, "grad_norm": 0.5829174518585205, "learning_rate": 0.0005952773401937504, "loss": 3.6912, "step": 3462 }, { "epoch": 0.17, "grad_norm": 0.5580412149429321, "learning_rate": 0.0005952746187016208, "loss": 3.5539, "step": 3463 }, { "epoch": 0.17, "grad_norm": 0.5945863127708435, "learning_rate": 0.0005952718964317945, "loss": 3.567, "step": 3464 }, { "epoch": 0.17, "grad_norm": 0.5504742860794067, "learning_rate": 0.000595269173384279, "loss": 3.6912, "step": 3465 }, { "epoch": 0.17, "grad_norm": 0.6526525616645813, "learning_rate": 0.0005952664495590813, "loss": 3.4437, "step": 3466 }, { "epoch": 0.17, "grad_norm": 0.5954388976097107, "learning_rate": 0.0005952637249562085, "loss": 3.6307, "step": 3467 }, { "epoch": 0.17, "grad_norm": 0.712817907333374, "learning_rate": 0.000595260999575668, "loss": 3.6924, "step": 3468 }, { "epoch": 0.17, "grad_norm": 0.6235787868499756, "learning_rate": 0.0005952582734174667, "loss": 3.5941, "step": 3469 }, { "epoch": 0.17, "grad_norm": 0.5776662826538086, "learning_rate": 0.0005952555464816122, "loss": 3.8929, "step": 3470 }, { "epoch": 0.17, "grad_norm": 0.6015415787696838, "learning_rate": 0.0005952528187681111, "loss": 3.6784, "step": 3471 }, { "epoch": 0.17, "grad_norm": 0.5785424113273621, "learning_rate": 0.0005952500902769711, "loss": 3.7124, "step": 3472 }, { "epoch": 0.17, "grad_norm": 0.6219342947006226, "learning_rate": 0.0005952473610081991, "loss": 3.6605, "step": 3473 }, { "epoch": 0.17, "grad_norm": 0.5495757460594177, "learning_rate": 0.0005952446309618023, "loss": 3.5599, "step": 3474 }, { "epoch": 0.17, "grad_norm": 0.5983782410621643, "learning_rate": 0.0005952419001377881, "loss": 3.5188, "step": 3475 }, { "epoch": 0.17, "grad_norm": 0.6638477444648743, "learning_rate": 0.0005952391685361634, "loss": 3.6374, "step": 3476 }, { "epoch": 0.17, "grad_norm": 0.5729101896286011, "learning_rate": 0.0005952364361569358, "loss": 3.4728, "step": 3477 }, { "epoch": 0.17, "grad_norm": 0.5611720681190491, "learning_rate": 0.000595233703000112, "loss": 3.4283, "step": 3478 }, { "epoch": 0.17, "grad_norm": 0.615308403968811, "learning_rate": 0.0005952309690656997, "loss": 3.6614, "step": 3479 }, { "epoch": 0.17, "grad_norm": 0.6057369709014893, "learning_rate": 0.0005952282343537057, "loss": 3.4623, "step": 3480 }, { "epoch": 0.17, "grad_norm": 0.6732720136642456, "learning_rate": 0.0005952254988641373, "loss": 3.7109, "step": 3481 }, { "epoch": 0.17, "grad_norm": 0.5600842833518982, "learning_rate": 0.0005952227625970018, "loss": 3.6057, "step": 3482 }, { "epoch": 0.17, "grad_norm": 0.5803719162940979, "learning_rate": 0.0005952200255523063, "loss": 3.6392, "step": 3483 }, { "epoch": 0.17, "grad_norm": 0.6266722679138184, "learning_rate": 0.0005952172877300582, "loss": 3.5691, "step": 3484 }, { "epoch": 0.17, "grad_norm": 0.5460732579231262, "learning_rate": 0.0005952145491302645, "loss": 3.3658, "step": 3485 }, { "epoch": 0.17, "grad_norm": 0.5742864012718201, "learning_rate": 0.0005952118097529325, "loss": 3.7658, "step": 3486 }, { "epoch": 0.17, "grad_norm": 0.5541781187057495, "learning_rate": 0.0005952090695980695, "loss": 3.4799, "step": 3487 }, { "epoch": 0.17, "grad_norm": 0.6336858868598938, "learning_rate": 0.0005952063286656824, "loss": 3.2493, "step": 3488 }, { "epoch": 0.17, "grad_norm": 0.5907337069511414, "learning_rate": 0.0005952035869557788, "loss": 3.4372, "step": 3489 }, { "epoch": 0.17, "grad_norm": 0.5762500166893005, "learning_rate": 0.0005952008444683657, "loss": 3.8303, "step": 3490 }, { "epoch": 0.17, "grad_norm": 0.580866813659668, "learning_rate": 0.0005951981012034505, "loss": 3.409, "step": 3491 }, { "epoch": 0.17, "grad_norm": 0.5785665512084961, "learning_rate": 0.0005951953571610403, "loss": 3.4152, "step": 3492 }, { "epoch": 0.17, "grad_norm": 0.5975906848907471, "learning_rate": 0.0005951926123411422, "loss": 3.554, "step": 3493 }, { "epoch": 0.17, "grad_norm": 0.5543960332870483, "learning_rate": 0.0005951898667437637, "loss": 3.4609, "step": 3494 }, { "epoch": 0.17, "grad_norm": 0.58534836769104, "learning_rate": 0.0005951871203689118, "loss": 3.5745, "step": 3495 }, { "epoch": 0.17, "grad_norm": 0.5618305802345276, "learning_rate": 0.0005951843732165938, "loss": 3.6903, "step": 3496 }, { "epoch": 0.17, "grad_norm": 0.6008855700492859, "learning_rate": 0.0005951816252868172, "loss": 3.5196, "step": 3497 }, { "epoch": 0.17, "grad_norm": 0.6193830370903015, "learning_rate": 0.0005951788765795889, "loss": 3.4215, "step": 3498 }, { "epoch": 0.17, "grad_norm": 0.5549221038818359, "learning_rate": 0.0005951761270949161, "loss": 3.4351, "step": 3499 }, { "epoch": 0.17, "grad_norm": 0.5623039603233337, "learning_rate": 0.0005951733768328063, "loss": 3.6452, "step": 3500 }, { "epoch": 0.17, "grad_norm": 0.5464016795158386, "learning_rate": 0.0005951706257932667, "loss": 3.518, "step": 3501 }, { "epoch": 0.17, "grad_norm": 0.5889121294021606, "learning_rate": 0.0005951678739763044, "loss": 3.6592, "step": 3502 }, { "epoch": 0.17, "grad_norm": 0.6298267245292664, "learning_rate": 0.0005951651213819268, "loss": 3.5137, "step": 3503 }, { "epoch": 0.17, "grad_norm": 0.5713311433792114, "learning_rate": 0.000595162368010141, "loss": 3.5688, "step": 3504 }, { "epoch": 0.17, "grad_norm": 0.610822856426239, "learning_rate": 0.0005951596138609543, "loss": 3.6497, "step": 3505 }, { "epoch": 0.17, "grad_norm": 0.5696738362312317, "learning_rate": 0.000595156858934374, "loss": 3.3546, "step": 3506 }, { "epoch": 0.17, "grad_norm": 0.5880289673805237, "learning_rate": 0.0005951541032304075, "loss": 3.7143, "step": 3507 }, { "epoch": 0.17, "grad_norm": 0.6033392548561096, "learning_rate": 0.0005951513467490617, "loss": 3.5457, "step": 3508 }, { "epoch": 0.17, "grad_norm": 0.5870084166526794, "learning_rate": 0.0005951485894903441, "loss": 3.4465, "step": 3509 }, { "epoch": 0.17, "grad_norm": 0.5881246328353882, "learning_rate": 0.0005951458314542619, "loss": 3.6633, "step": 3510 }, { "epoch": 0.17, "grad_norm": 0.5996487140655518, "learning_rate": 0.0005951430726408224, "loss": 3.5734, "step": 3511 }, { "epoch": 0.17, "grad_norm": 0.5851162075996399, "learning_rate": 0.0005951403130500329, "loss": 3.5603, "step": 3512 }, { "epoch": 0.17, "grad_norm": 0.5705832242965698, "learning_rate": 0.0005951375526819006, "loss": 3.5581, "step": 3513 }, { "epoch": 0.17, "grad_norm": 0.5960780382156372, "learning_rate": 0.0005951347915364327, "loss": 3.4452, "step": 3514 }, { "epoch": 0.17, "grad_norm": 0.5697128176689148, "learning_rate": 0.0005951320296136367, "loss": 3.6832, "step": 3515 }, { "epoch": 0.17, "grad_norm": 0.5737714171409607, "learning_rate": 0.0005951292669135197, "loss": 3.6346, "step": 3516 }, { "epoch": 0.17, "grad_norm": 0.6310943365097046, "learning_rate": 0.0005951265034360889, "loss": 3.3565, "step": 3517 }, { "epoch": 0.17, "grad_norm": 0.5577863454818726, "learning_rate": 0.0005951237391813518, "loss": 3.4109, "step": 3518 }, { "epoch": 0.17, "grad_norm": 0.5705747008323669, "learning_rate": 0.0005951209741493154, "loss": 3.4384, "step": 3519 }, { "epoch": 0.17, "grad_norm": 0.5870479345321655, "learning_rate": 0.0005951182083399874, "loss": 3.5561, "step": 3520 }, { "epoch": 0.17, "grad_norm": 0.5648481845855713, "learning_rate": 0.0005951154417533748, "loss": 3.3586, "step": 3521 }, { "epoch": 0.17, "grad_norm": 0.5373155474662781, "learning_rate": 0.0005951126743894846, "loss": 3.7483, "step": 3522 }, { "epoch": 0.17, "grad_norm": 0.5593510270118713, "learning_rate": 0.0005951099062483248, "loss": 3.4208, "step": 3523 }, { "epoch": 0.17, "grad_norm": 0.5978959202766418, "learning_rate": 0.0005951071373299021, "loss": 3.5202, "step": 3524 }, { "epoch": 0.17, "grad_norm": 0.5732042789459229, "learning_rate": 0.0005951043676342241, "loss": 3.5725, "step": 3525 }, { "epoch": 0.17, "grad_norm": 0.5535354018211365, "learning_rate": 0.000595101597161298, "loss": 3.741, "step": 3526 }, { "epoch": 0.17, "grad_norm": 0.5729936957359314, "learning_rate": 0.0005950988259111309, "loss": 3.7625, "step": 3527 }, { "epoch": 0.17, "grad_norm": 0.5622599124908447, "learning_rate": 0.0005950960538837305, "loss": 3.3271, "step": 3528 }, { "epoch": 0.17, "grad_norm": 0.5429548025131226, "learning_rate": 0.0005950932810791038, "loss": 3.5124, "step": 3529 }, { "epoch": 0.17, "grad_norm": 0.5643887519836426, "learning_rate": 0.0005950905074972581, "loss": 3.4261, "step": 3530 }, { "epoch": 0.17, "grad_norm": 0.548894464969635, "learning_rate": 0.0005950877331382009, "loss": 3.6266, "step": 3531 }, { "epoch": 0.17, "grad_norm": 0.5614020228385925, "learning_rate": 0.0005950849580019393, "loss": 3.4156, "step": 3532 }, { "epoch": 0.17, "grad_norm": 0.6196742653846741, "learning_rate": 0.0005950821820884808, "loss": 3.5224, "step": 3533 }, { "epoch": 0.17, "grad_norm": 0.5436410903930664, "learning_rate": 0.0005950794053978327, "loss": 3.4044, "step": 3534 }, { "epoch": 0.17, "grad_norm": 0.5510107278823853, "learning_rate": 0.0005950766279300021, "loss": 3.438, "step": 3535 }, { "epoch": 0.17, "grad_norm": 0.6485077738761902, "learning_rate": 0.0005950738496849965, "loss": 3.4866, "step": 3536 }, { "epoch": 0.17, "grad_norm": 0.5946866869926453, "learning_rate": 0.0005950710706628232, "loss": 3.5581, "step": 3537 }, { "epoch": 0.17, "grad_norm": 0.6310849189758301, "learning_rate": 0.0005950682908634895, "loss": 3.4498, "step": 3538 }, { "epoch": 0.17, "grad_norm": 0.5689077973365784, "learning_rate": 0.0005950655102870026, "loss": 3.4855, "step": 3539 }, { "epoch": 0.17, "grad_norm": 0.5511724352836609, "learning_rate": 0.00059506272893337, "loss": 3.6545, "step": 3540 }, { "epoch": 0.17, "grad_norm": 0.5716314911842346, "learning_rate": 0.000595059946802599, "loss": 3.5961, "step": 3541 }, { "epoch": 0.17, "grad_norm": 0.5928378105163574, "learning_rate": 0.0005950571638946968, "loss": 3.3571, "step": 3542 }, { "epoch": 0.17, "grad_norm": 0.5479989051818848, "learning_rate": 0.0005950543802096708, "loss": 3.328, "step": 3543 }, { "epoch": 0.17, "grad_norm": 0.5652512311935425, "learning_rate": 0.0005950515957475284, "loss": 3.6244, "step": 3544 }, { "epoch": 0.17, "grad_norm": 0.6006174683570862, "learning_rate": 0.000595048810508277, "loss": 3.5703, "step": 3545 }, { "epoch": 0.17, "grad_norm": 0.5618359446525574, "learning_rate": 0.0005950460244919236, "loss": 3.5093, "step": 3546 }, { "epoch": 0.17, "grad_norm": 0.6154499053955078, "learning_rate": 0.0005950432376984758, "loss": 3.6431, "step": 3547 }, { "epoch": 0.17, "grad_norm": 0.5481139421463013, "learning_rate": 0.000595040450127941, "loss": 3.5728, "step": 3548 }, { "epoch": 0.17, "grad_norm": 0.5588555932044983, "learning_rate": 0.0005950376617803264, "loss": 3.5644, "step": 3549 }, { "epoch": 0.17, "grad_norm": 0.5340237021446228, "learning_rate": 0.0005950348726556393, "loss": 3.6245, "step": 3550 }, { "epoch": 0.17, "grad_norm": 0.5997275710105896, "learning_rate": 0.0005950320827538873, "loss": 3.4301, "step": 3551 }, { "epoch": 0.17, "grad_norm": 0.6511825919151306, "learning_rate": 0.0005950292920750773, "loss": 3.5778, "step": 3552 }, { "epoch": 0.17, "grad_norm": 0.6428468823432922, "learning_rate": 0.0005950265006192171, "loss": 3.6624, "step": 3553 }, { "epoch": 0.17, "grad_norm": 0.576602041721344, "learning_rate": 0.0005950237083863139, "loss": 3.6925, "step": 3554 }, { "epoch": 0.17, "grad_norm": 0.5820669531822205, "learning_rate": 0.0005950209153763751, "loss": 3.4164, "step": 3555 }, { "epoch": 0.17, "grad_norm": 0.5869800448417664, "learning_rate": 0.0005950181215894078, "loss": 3.6757, "step": 3556 }, { "epoch": 0.17, "grad_norm": 0.5729996562004089, "learning_rate": 0.0005950153270254196, "loss": 3.4834, "step": 3557 }, { "epoch": 0.17, "grad_norm": 0.5958981513977051, "learning_rate": 0.0005950125316844179, "loss": 3.5674, "step": 3558 }, { "epoch": 0.17, "grad_norm": 0.6202664375305176, "learning_rate": 0.0005950097355664098, "loss": 3.4014, "step": 3559 }, { "epoch": 0.17, "grad_norm": 0.5707253813743591, "learning_rate": 0.000595006938671403, "loss": 3.5684, "step": 3560 }, { "epoch": 0.17, "grad_norm": 0.6256164908409119, "learning_rate": 0.0005950041409994046, "loss": 3.6933, "step": 3561 }, { "epoch": 0.17, "grad_norm": 0.5959612131118774, "learning_rate": 0.0005950013425504221, "loss": 3.553, "step": 3562 }, { "epoch": 0.17, "grad_norm": 0.8165073394775391, "learning_rate": 0.0005949985433244628, "loss": 3.6789, "step": 3563 }, { "epoch": 0.17, "grad_norm": 0.5742766261100769, "learning_rate": 0.0005949957433215342, "loss": 3.2581, "step": 3564 }, { "epoch": 0.17, "grad_norm": 0.5817901492118835, "learning_rate": 0.0005949929425416435, "loss": 3.4921, "step": 3565 }, { "epoch": 0.17, "grad_norm": 0.561545729637146, "learning_rate": 0.0005949901409847982, "loss": 3.4838, "step": 3566 }, { "epoch": 0.17, "grad_norm": 0.5388302803039551, "learning_rate": 0.0005949873386510056, "loss": 3.5186, "step": 3567 }, { "epoch": 0.17, "grad_norm": 0.5865461826324463, "learning_rate": 0.0005949845355402732, "loss": 3.4544, "step": 3568 }, { "epoch": 0.17, "grad_norm": 0.5656617879867554, "learning_rate": 0.0005949817316526083, "loss": 3.6103, "step": 3569 }, { "epoch": 0.17, "grad_norm": 0.5684191584587097, "learning_rate": 0.0005949789269880182, "loss": 3.4794, "step": 3570 }, { "epoch": 0.18, "grad_norm": 0.5902529358863831, "learning_rate": 0.0005949761215465104, "loss": 3.5839, "step": 3571 }, { "epoch": 0.18, "grad_norm": 0.6156822443008423, "learning_rate": 0.0005949733153280922, "loss": 3.6531, "step": 3572 }, { "epoch": 0.18, "grad_norm": 0.5745266079902649, "learning_rate": 0.0005949705083327713, "loss": 3.7956, "step": 3573 }, { "epoch": 0.18, "grad_norm": 0.5861325860023499, "learning_rate": 0.0005949677005605546, "loss": 3.6641, "step": 3574 }, { "epoch": 0.18, "grad_norm": 0.5582994818687439, "learning_rate": 0.0005949648920114499, "loss": 3.4384, "step": 3575 }, { "epoch": 0.18, "grad_norm": 0.5631774663925171, "learning_rate": 0.0005949620826854644, "loss": 3.3741, "step": 3576 }, { "epoch": 0.18, "grad_norm": 0.5708392858505249, "learning_rate": 0.0005949592725826055, "loss": 3.4894, "step": 3577 }, { "epoch": 0.18, "grad_norm": 0.6094598770141602, "learning_rate": 0.0005949564617028807, "loss": 3.3773, "step": 3578 }, { "epoch": 0.18, "grad_norm": 0.6077072620391846, "learning_rate": 0.0005949536500462972, "loss": 3.4769, "step": 3579 }, { "epoch": 0.18, "grad_norm": 0.5706542134284973, "learning_rate": 0.0005949508376128627, "loss": 3.5485, "step": 3580 }, { "epoch": 0.18, "grad_norm": 0.54489666223526, "learning_rate": 0.0005949480244025844, "loss": 3.4537, "step": 3581 }, { "epoch": 0.18, "grad_norm": 0.560539960861206, "learning_rate": 0.0005949452104154698, "loss": 3.6606, "step": 3582 }, { "epoch": 0.18, "grad_norm": 0.6367251873016357, "learning_rate": 0.0005949423956515263, "loss": 3.4471, "step": 3583 }, { "epoch": 0.18, "grad_norm": 0.5641475915908813, "learning_rate": 0.0005949395801107612, "loss": 3.6803, "step": 3584 }, { "epoch": 0.18, "grad_norm": 0.5714133977890015, "learning_rate": 0.0005949367637931821, "loss": 3.6771, "step": 3585 }, { "epoch": 0.18, "grad_norm": 0.539216935634613, "learning_rate": 0.0005949339466987963, "loss": 3.5072, "step": 3586 }, { "epoch": 0.18, "grad_norm": 0.5528108477592468, "learning_rate": 0.0005949311288276113, "loss": 3.6318, "step": 3587 }, { "epoch": 0.18, "grad_norm": 0.5910629630088806, "learning_rate": 0.0005949283101796342, "loss": 3.3507, "step": 3588 }, { "epoch": 0.18, "grad_norm": 0.640604555606842, "learning_rate": 0.000594925490754873, "loss": 3.4874, "step": 3589 }, { "epoch": 0.18, "grad_norm": 0.6236400008201599, "learning_rate": 0.0005949226705533348, "loss": 3.4188, "step": 3590 }, { "epoch": 0.18, "grad_norm": 0.5964713096618652, "learning_rate": 0.0005949198495750268, "loss": 3.4705, "step": 3591 }, { "epoch": 0.18, "grad_norm": 0.5554364323616028, "learning_rate": 0.0005949170278199569, "loss": 3.4395, "step": 3592 }, { "epoch": 0.18, "grad_norm": 0.5770800709724426, "learning_rate": 0.0005949142052881322, "loss": 3.5081, "step": 3593 }, { "epoch": 0.18, "grad_norm": 0.6268064975738525, "learning_rate": 0.0005949113819795603, "loss": 3.5902, "step": 3594 }, { "epoch": 0.18, "grad_norm": 0.5979000329971313, "learning_rate": 0.0005949085578942486, "loss": 3.3657, "step": 3595 }, { "epoch": 0.18, "grad_norm": 0.5864534974098206, "learning_rate": 0.0005949057330322044, "loss": 3.6838, "step": 3596 }, { "epoch": 0.18, "grad_norm": 0.5664005279541016, "learning_rate": 0.0005949029073934354, "loss": 3.4289, "step": 3597 }, { "epoch": 0.18, "grad_norm": 0.5910850167274475, "learning_rate": 0.0005949000809779488, "loss": 3.5872, "step": 3598 }, { "epoch": 0.18, "grad_norm": 0.590392529964447, "learning_rate": 0.0005948972537857521, "loss": 3.4473, "step": 3599 }, { "epoch": 0.18, "grad_norm": 0.5588194727897644, "learning_rate": 0.0005948944258168529, "loss": 3.4485, "step": 3600 }, { "epoch": 0.18, "grad_norm": 0.5453645586967468, "learning_rate": 0.0005948915970712585, "loss": 3.6381, "step": 3601 }, { "epoch": 0.18, "grad_norm": 0.5611460208892822, "learning_rate": 0.0005948887675489763, "loss": 3.7556, "step": 3602 }, { "epoch": 0.18, "grad_norm": 0.5924997329711914, "learning_rate": 0.000594885937250014, "loss": 3.5867, "step": 3603 }, { "epoch": 0.18, "grad_norm": 0.5849661231040955, "learning_rate": 0.0005948831061743788, "loss": 3.5556, "step": 3604 }, { "epoch": 0.18, "grad_norm": 0.5972135066986084, "learning_rate": 0.0005948802743220782, "loss": 3.4393, "step": 3605 }, { "epoch": 0.18, "grad_norm": 0.5754001140594482, "learning_rate": 0.0005948774416931197, "loss": 3.5528, "step": 3606 }, { "epoch": 0.18, "grad_norm": 0.592771053314209, "learning_rate": 0.0005948746082875109, "loss": 3.4963, "step": 3607 }, { "epoch": 0.18, "grad_norm": 0.6338792443275452, "learning_rate": 0.0005948717741052591, "loss": 3.2424, "step": 3608 }, { "epoch": 0.18, "grad_norm": 0.5741003751754761, "learning_rate": 0.0005948689391463718, "loss": 3.6585, "step": 3609 }, { "epoch": 0.18, "grad_norm": 0.5677915811538696, "learning_rate": 0.0005948661034108564, "loss": 3.5649, "step": 3610 }, { "epoch": 0.18, "grad_norm": 0.59786057472229, "learning_rate": 0.0005948632668987204, "loss": 3.6715, "step": 3611 }, { "epoch": 0.18, "grad_norm": 0.5489624738693237, "learning_rate": 0.0005948604296099714, "loss": 3.3292, "step": 3612 }, { "epoch": 0.18, "grad_norm": 0.5429912209510803, "learning_rate": 0.0005948575915446167, "loss": 3.6417, "step": 3613 }, { "epoch": 0.18, "grad_norm": 0.637424647808075, "learning_rate": 0.0005948547527026639, "loss": 3.4017, "step": 3614 }, { "epoch": 0.18, "grad_norm": 0.5805572867393494, "learning_rate": 0.0005948519130841203, "loss": 3.4764, "step": 3615 }, { "epoch": 0.18, "grad_norm": 0.6204468011856079, "learning_rate": 0.0005948490726889937, "loss": 3.5658, "step": 3616 }, { "epoch": 0.18, "grad_norm": 0.5520532131195068, "learning_rate": 0.0005948462315172913, "loss": 3.7156, "step": 3617 }, { "epoch": 0.18, "grad_norm": 0.5868279933929443, "learning_rate": 0.0005948433895690207, "loss": 3.5782, "step": 3618 }, { "epoch": 0.18, "grad_norm": 0.5735620856285095, "learning_rate": 0.0005948405468441893, "loss": 3.5567, "step": 3619 }, { "epoch": 0.18, "grad_norm": 0.5258257389068604, "learning_rate": 0.0005948377033428046, "loss": 3.4518, "step": 3620 }, { "epoch": 0.18, "grad_norm": 0.5907759070396423, "learning_rate": 0.0005948348590648743, "loss": 3.4862, "step": 3621 }, { "epoch": 0.18, "grad_norm": 0.5513227581977844, "learning_rate": 0.0005948320140104056, "loss": 3.3782, "step": 3622 }, { "epoch": 0.18, "grad_norm": 0.620806097984314, "learning_rate": 0.0005948291681794062, "loss": 3.4239, "step": 3623 }, { "epoch": 0.18, "grad_norm": 0.6315174698829651, "learning_rate": 0.0005948263215718834, "loss": 3.5383, "step": 3624 }, { "epoch": 0.18, "grad_norm": 0.6031731963157654, "learning_rate": 0.0005948234741878449, "loss": 3.4314, "step": 3625 }, { "epoch": 0.18, "grad_norm": 0.6577680110931396, "learning_rate": 0.0005948206260272981, "loss": 3.4231, "step": 3626 }, { "epoch": 0.18, "grad_norm": 0.5863077640533447, "learning_rate": 0.0005948177770902504, "loss": 3.5375, "step": 3627 }, { "epoch": 0.18, "grad_norm": 0.5604285597801208, "learning_rate": 0.0005948149273767095, "loss": 3.5875, "step": 3628 }, { "epoch": 0.18, "grad_norm": 0.6838080286979675, "learning_rate": 0.0005948120768866828, "loss": 3.4889, "step": 3629 }, { "epoch": 0.18, "grad_norm": 0.5543857216835022, "learning_rate": 0.0005948092256201779, "loss": 3.6624, "step": 3630 }, { "epoch": 0.18, "grad_norm": 0.5770386457443237, "learning_rate": 0.0005948063735772022, "loss": 3.6973, "step": 3631 }, { "epoch": 0.18, "grad_norm": 0.6096564531326294, "learning_rate": 0.0005948035207577632, "loss": 3.6492, "step": 3632 }, { "epoch": 0.18, "grad_norm": 0.6562588810920715, "learning_rate": 0.0005948006671618685, "loss": 3.6059, "step": 3633 }, { "epoch": 0.18, "grad_norm": 0.576315701007843, "learning_rate": 0.0005947978127895256, "loss": 3.7477, "step": 3634 }, { "epoch": 0.18, "grad_norm": 0.5762778520584106, "learning_rate": 0.000594794957640742, "loss": 3.5309, "step": 3635 }, { "epoch": 0.18, "grad_norm": 0.596248984336853, "learning_rate": 0.0005947921017155251, "loss": 3.5666, "step": 3636 }, { "epoch": 0.18, "grad_norm": 0.5929814577102661, "learning_rate": 0.0005947892450138827, "loss": 3.4502, "step": 3637 }, { "epoch": 0.18, "grad_norm": 0.581706702709198, "learning_rate": 0.000594786387535822, "loss": 3.4126, "step": 3638 }, { "epoch": 0.18, "grad_norm": 0.609158992767334, "learning_rate": 0.0005947835292813507, "loss": 3.6607, "step": 3639 }, { "epoch": 0.18, "grad_norm": 0.56010901927948, "learning_rate": 0.0005947806702504765, "loss": 3.5313, "step": 3640 }, { "epoch": 0.18, "grad_norm": 0.5166653990745544, "learning_rate": 0.0005947778104432067, "loss": 3.6868, "step": 3641 }, { "epoch": 0.18, "grad_norm": 0.6067258715629578, "learning_rate": 0.0005947749498595487, "loss": 3.4264, "step": 3642 }, { "epoch": 0.18, "grad_norm": 0.5833512544631958, "learning_rate": 0.0005947720884995104, "loss": 3.5246, "step": 3643 }, { "epoch": 0.18, "grad_norm": 0.5999512076377869, "learning_rate": 0.000594769226363099, "loss": 3.3084, "step": 3644 }, { "epoch": 0.18, "grad_norm": 0.5532363057136536, "learning_rate": 0.0005947663634503223, "loss": 3.6718, "step": 3645 }, { "epoch": 0.18, "grad_norm": 0.5565996170043945, "learning_rate": 0.0005947634997611876, "loss": 3.4042, "step": 3646 }, { "epoch": 0.18, "grad_norm": 0.6335086822509766, "learning_rate": 0.0005947606352957027, "loss": 3.5925, "step": 3647 }, { "epoch": 0.18, "grad_norm": 0.5904930830001831, "learning_rate": 0.0005947577700538751, "loss": 3.4858, "step": 3648 }, { "epoch": 0.18, "grad_norm": 0.5819961428642273, "learning_rate": 0.000594754904035712, "loss": 3.4756, "step": 3649 }, { "epoch": 0.18, "grad_norm": 0.5676370859146118, "learning_rate": 0.0005947520372412214, "loss": 3.3994, "step": 3650 }, { "epoch": 0.18, "grad_norm": 0.528371274471283, "learning_rate": 0.0005947491696704105, "loss": 3.5782, "step": 3651 }, { "epoch": 0.18, "grad_norm": 0.5858977437019348, "learning_rate": 0.0005947463013232872, "loss": 3.5947, "step": 3652 }, { "epoch": 0.18, "grad_norm": 0.6060065627098083, "learning_rate": 0.0005947434321998588, "loss": 3.3542, "step": 3653 }, { "epoch": 0.18, "grad_norm": 0.6193976402282715, "learning_rate": 0.000594740562300133, "loss": 3.4906, "step": 3654 }, { "epoch": 0.18, "grad_norm": 0.6060580015182495, "learning_rate": 0.000594737691624117, "loss": 3.4806, "step": 3655 }, { "epoch": 0.18, "grad_norm": 0.6116194725036621, "learning_rate": 0.000594734820171819, "loss": 3.5376, "step": 3656 }, { "epoch": 0.18, "grad_norm": 0.5761888027191162, "learning_rate": 0.000594731947943246, "loss": 3.5894, "step": 3657 }, { "epoch": 0.18, "grad_norm": 0.5662036538124084, "learning_rate": 0.0005947290749384059, "loss": 3.5399, "step": 3658 }, { "epoch": 0.18, "grad_norm": 0.6075831055641174, "learning_rate": 0.0005947262011573059, "loss": 3.7756, "step": 3659 }, { "epoch": 0.18, "grad_norm": 0.5735023617744446, "learning_rate": 0.000594723326599954, "loss": 3.48, "step": 3660 }, { "epoch": 0.18, "grad_norm": 0.5627445578575134, "learning_rate": 0.0005947204512663575, "loss": 3.6544, "step": 3661 }, { "epoch": 0.18, "grad_norm": 0.5845668911933899, "learning_rate": 0.0005947175751565241, "loss": 3.4011, "step": 3662 }, { "epoch": 0.18, "grad_norm": 0.5823236107826233, "learning_rate": 0.0005947146982704613, "loss": 3.2938, "step": 3663 }, { "epoch": 0.18, "grad_norm": 0.5699519515037537, "learning_rate": 0.0005947118206081767, "loss": 3.7, "step": 3664 }, { "epoch": 0.18, "grad_norm": 0.6400408744812012, "learning_rate": 0.0005947089421696779, "loss": 3.572, "step": 3665 }, { "epoch": 0.18, "grad_norm": 0.5679846405982971, "learning_rate": 0.0005947060629549725, "loss": 3.7148, "step": 3666 }, { "epoch": 0.18, "grad_norm": 0.5408234000205994, "learning_rate": 0.0005947031829640681, "loss": 3.35, "step": 3667 }, { "epoch": 0.18, "grad_norm": 0.5761770606040955, "learning_rate": 0.0005947003021969721, "loss": 3.5302, "step": 3668 }, { "epoch": 0.18, "grad_norm": 0.5421696305274963, "learning_rate": 0.0005946974206536923, "loss": 3.7173, "step": 3669 }, { "epoch": 0.18, "grad_norm": 0.5892155170440674, "learning_rate": 0.0005946945383342361, "loss": 3.5065, "step": 3670 }, { "epoch": 0.18, "grad_norm": 0.6017743349075317, "learning_rate": 0.0005946916552386113, "loss": 3.4401, "step": 3671 }, { "epoch": 0.18, "grad_norm": 0.5975637435913086, "learning_rate": 0.0005946887713668253, "loss": 3.6127, "step": 3672 }, { "epoch": 0.18, "grad_norm": 0.5787088871002197, "learning_rate": 0.000594685886718886, "loss": 3.635, "step": 3673 }, { "epoch": 0.18, "grad_norm": 0.5916794538497925, "learning_rate": 0.0005946830012948006, "loss": 3.4734, "step": 3674 }, { "epoch": 0.18, "grad_norm": 0.5836448669433594, "learning_rate": 0.000594680115094577, "loss": 3.4839, "step": 3675 }, { "epoch": 0.18, "grad_norm": 0.5569721460342407, "learning_rate": 0.0005946772281182226, "loss": 3.3795, "step": 3676 }, { "epoch": 0.18, "grad_norm": 0.6041418313980103, "learning_rate": 0.000594674340365745, "loss": 3.8186, "step": 3677 }, { "epoch": 0.18, "grad_norm": 0.5707287788391113, "learning_rate": 0.0005946714518371521, "loss": 3.5052, "step": 3678 }, { "epoch": 0.18, "grad_norm": 0.5691279768943787, "learning_rate": 0.0005946685625324512, "loss": 3.693, "step": 3679 }, { "epoch": 0.18, "grad_norm": 0.545986533164978, "learning_rate": 0.00059466567245165, "loss": 3.4695, "step": 3680 }, { "epoch": 0.18, "grad_norm": 0.5766247510910034, "learning_rate": 0.0005946627815947562, "loss": 3.6347, "step": 3681 }, { "epoch": 0.18, "grad_norm": 0.624789834022522, "learning_rate": 0.0005946598899617773, "loss": 3.4847, "step": 3682 }, { "epoch": 0.18, "grad_norm": 0.5886110067367554, "learning_rate": 0.0005946569975527209, "loss": 3.5243, "step": 3683 }, { "epoch": 0.18, "grad_norm": 0.6018827557563782, "learning_rate": 0.0005946541043675948, "loss": 3.4334, "step": 3684 }, { "epoch": 0.18, "grad_norm": 0.595812201499939, "learning_rate": 0.0005946512104064065, "loss": 3.551, "step": 3685 }, { "epoch": 0.18, "grad_norm": 0.6566250920295715, "learning_rate": 0.0005946483156691636, "loss": 3.6164, "step": 3686 }, { "epoch": 0.18, "grad_norm": 0.5355682373046875, "learning_rate": 0.0005946454201558736, "loss": 3.5166, "step": 3687 }, { "epoch": 0.18, "grad_norm": 0.5876386761665344, "learning_rate": 0.0005946425238665445, "loss": 3.4562, "step": 3688 }, { "epoch": 0.18, "grad_norm": 0.5719892978668213, "learning_rate": 0.0005946396268011834, "loss": 3.3293, "step": 3689 }, { "epoch": 0.18, "grad_norm": 0.5640550255775452, "learning_rate": 0.0005946367289597985, "loss": 3.4951, "step": 3690 }, { "epoch": 0.18, "grad_norm": 0.580362856388092, "learning_rate": 0.0005946338303423971, "loss": 3.4677, "step": 3691 }, { "epoch": 0.18, "grad_norm": 0.604098379611969, "learning_rate": 0.0005946309309489868, "loss": 3.556, "step": 3692 }, { "epoch": 0.18, "grad_norm": 0.6758443117141724, "learning_rate": 0.0005946280307795753, "loss": 3.2999, "step": 3693 }, { "epoch": 0.18, "grad_norm": 0.6679175496101379, "learning_rate": 0.0005946251298341704, "loss": 3.4755, "step": 3694 }, { "epoch": 0.18, "grad_norm": 0.5557239651679993, "learning_rate": 0.0005946222281127796, "loss": 3.712, "step": 3695 }, { "epoch": 0.18, "grad_norm": 0.5595870018005371, "learning_rate": 0.0005946193256154105, "loss": 3.5436, "step": 3696 }, { "epoch": 0.18, "grad_norm": 0.5639623403549194, "learning_rate": 0.0005946164223420708, "loss": 3.2984, "step": 3697 }, { "epoch": 0.18, "grad_norm": 0.6050103902816772, "learning_rate": 0.0005946135182927681, "loss": 3.4194, "step": 3698 }, { "epoch": 0.18, "grad_norm": 0.5879496335983276, "learning_rate": 0.0005946106134675102, "loss": 3.2472, "step": 3699 }, { "epoch": 0.18, "grad_norm": 0.5664472579956055, "learning_rate": 0.0005946077078663046, "loss": 3.4548, "step": 3700 }, { "epoch": 0.18, "grad_norm": 0.574516236782074, "learning_rate": 0.000594604801489159, "loss": 3.3961, "step": 3701 }, { "epoch": 0.18, "grad_norm": 0.583021879196167, "learning_rate": 0.000594601894336081, "loss": 3.441, "step": 3702 }, { "epoch": 0.18, "grad_norm": 0.5696473121643066, "learning_rate": 0.0005945989864070783, "loss": 3.5425, "step": 3703 }, { "epoch": 0.18, "grad_norm": 0.5899128913879395, "learning_rate": 0.0005945960777021587, "loss": 3.449, "step": 3704 }, { "epoch": 0.18, "grad_norm": 0.5821749567985535, "learning_rate": 0.0005945931682213296, "loss": 3.6383, "step": 3705 }, { "epoch": 0.18, "grad_norm": 0.5889866352081299, "learning_rate": 0.0005945902579645988, "loss": 3.5592, "step": 3706 }, { "epoch": 0.18, "grad_norm": 0.6704166531562805, "learning_rate": 0.000594587346931974, "loss": 3.2622, "step": 3707 }, { "epoch": 0.18, "grad_norm": 0.5655115842819214, "learning_rate": 0.0005945844351234628, "loss": 3.5981, "step": 3708 }, { "epoch": 0.18, "grad_norm": 0.5271401405334473, "learning_rate": 0.0005945815225390729, "loss": 3.6283, "step": 3709 }, { "epoch": 0.18, "grad_norm": 0.5654386281967163, "learning_rate": 0.0005945786091788119, "loss": 3.3157, "step": 3710 }, { "epoch": 0.18, "grad_norm": 0.5483503341674805, "learning_rate": 0.0005945756950426876, "loss": 3.4272, "step": 3711 }, { "epoch": 0.18, "grad_norm": 0.554814875125885, "learning_rate": 0.0005945727801307077, "loss": 3.5075, "step": 3712 }, { "epoch": 0.18, "grad_norm": 0.6500690579414368, "learning_rate": 0.0005945698644428797, "loss": 3.6308, "step": 3713 }, { "epoch": 0.18, "grad_norm": 0.5715118050575256, "learning_rate": 0.0005945669479792113, "loss": 3.6008, "step": 3714 }, { "epoch": 0.18, "grad_norm": 0.5506441593170166, "learning_rate": 0.0005945640307397104, "loss": 3.5656, "step": 3715 }, { "epoch": 0.18, "grad_norm": 0.5786412358283997, "learning_rate": 0.0005945611127243844, "loss": 3.575, "step": 3716 }, { "epoch": 0.18, "grad_norm": 0.6186723709106445, "learning_rate": 0.0005945581939332412, "loss": 3.6169, "step": 3717 }, { "epoch": 0.18, "grad_norm": 0.5763697624206543, "learning_rate": 0.0005945552743662883, "loss": 3.7148, "step": 3718 }, { "epoch": 0.18, "grad_norm": 0.659016489982605, "learning_rate": 0.0005945523540235336, "loss": 3.5355, "step": 3719 }, { "epoch": 0.18, "grad_norm": 0.5776824951171875, "learning_rate": 0.0005945494329049846, "loss": 3.6071, "step": 3720 }, { "epoch": 0.18, "grad_norm": 0.5591484904289246, "learning_rate": 0.0005945465110106492, "loss": 3.389, "step": 3721 }, { "epoch": 0.18, "grad_norm": 0.5636910200119019, "learning_rate": 0.0005945435883405348, "loss": 3.594, "step": 3722 }, { "epoch": 0.18, "grad_norm": 0.6116440296173096, "learning_rate": 0.0005945406648946495, "loss": 3.4926, "step": 3723 }, { "epoch": 0.18, "grad_norm": 0.558946430683136, "learning_rate": 0.0005945377406730007, "loss": 3.6447, "step": 3724 }, { "epoch": 0.18, "grad_norm": 0.5685746669769287, "learning_rate": 0.0005945348156755962, "loss": 3.4761, "step": 3725 }, { "epoch": 0.18, "grad_norm": 0.9950370192527771, "learning_rate": 0.0005945318899024435, "loss": 3.7469, "step": 3726 }, { "epoch": 0.18, "grad_norm": 0.7801612615585327, "learning_rate": 0.0005945289633535506, "loss": 3.6114, "step": 3727 }, { "epoch": 0.18, "grad_norm": 0.6060027480125427, "learning_rate": 0.0005945260360289251, "loss": 3.4032, "step": 3728 }, { "epoch": 0.18, "grad_norm": 0.580491840839386, "learning_rate": 0.0005945231079285747, "loss": 3.3195, "step": 3729 }, { "epoch": 0.18, "grad_norm": 0.7394986748695374, "learning_rate": 0.000594520179052507, "loss": 3.6785, "step": 3730 }, { "epoch": 0.18, "grad_norm": 0.5658180713653564, "learning_rate": 0.00059451724940073, "loss": 3.4843, "step": 3731 }, { "epoch": 0.18, "grad_norm": 0.5555374622344971, "learning_rate": 0.0005945143189732512, "loss": 3.4209, "step": 3732 }, { "epoch": 0.18, "grad_norm": 0.6545796990394592, "learning_rate": 0.0005945113877700785, "loss": 3.52, "step": 3733 }, { "epoch": 0.18, "grad_norm": 0.5792942047119141, "learning_rate": 0.0005945084557912192, "loss": 3.5676, "step": 3734 }, { "epoch": 0.18, "grad_norm": 0.587958574295044, "learning_rate": 0.0005945055230366815, "loss": 3.5171, "step": 3735 }, { "epoch": 0.18, "grad_norm": 0.570886492729187, "learning_rate": 0.0005945025895064728, "loss": 3.5358, "step": 3736 }, { "epoch": 0.18, "grad_norm": 0.5610484480857849, "learning_rate": 0.0005944996552006011, "loss": 3.6122, "step": 3737 }, { "epoch": 0.18, "grad_norm": 0.6265158653259277, "learning_rate": 0.000594496720119074, "loss": 3.3024, "step": 3738 }, { "epoch": 0.18, "grad_norm": 0.5752745866775513, "learning_rate": 0.0005944937842618991, "loss": 3.6183, "step": 3739 }, { "epoch": 0.18, "grad_norm": 0.6028185486793518, "learning_rate": 0.0005944908476290843, "loss": 3.4493, "step": 3740 }, { "epoch": 0.18, "grad_norm": 0.5684720873832703, "learning_rate": 0.0005944879102206372, "loss": 3.5517, "step": 3741 }, { "epoch": 0.18, "grad_norm": 0.5804165005683899, "learning_rate": 0.0005944849720365657, "loss": 3.3608, "step": 3742 }, { "epoch": 0.18, "grad_norm": 0.5693696737289429, "learning_rate": 0.0005944820330768775, "loss": 3.5387, "step": 3743 }, { "epoch": 0.18, "grad_norm": 0.6432933807373047, "learning_rate": 0.0005944790933415802, "loss": 3.5484, "step": 3744 }, { "epoch": 0.18, "grad_norm": 0.6080823540687561, "learning_rate": 0.0005944761528306817, "loss": 3.5617, "step": 3745 }, { "epoch": 0.18, "grad_norm": 0.5980885028839111, "learning_rate": 0.0005944732115441897, "loss": 3.4468, "step": 3746 }, { "epoch": 0.18, "grad_norm": 0.6021443605422974, "learning_rate": 0.0005944702694821119, "loss": 3.4973, "step": 3747 }, { "epoch": 0.18, "grad_norm": 0.6065918803215027, "learning_rate": 0.0005944673266444561, "loss": 3.3864, "step": 3748 }, { "epoch": 0.18, "grad_norm": 0.579862654209137, "learning_rate": 0.00059446438303123, "loss": 3.7255, "step": 3749 }, { "epoch": 0.18, "grad_norm": 0.5811945199966431, "learning_rate": 0.0005944614386424414, "loss": 3.5034, "step": 3750 }, { "epoch": 0.18, "grad_norm": 0.5766950249671936, "learning_rate": 0.0005944584934780981, "loss": 3.7607, "step": 3751 }, { "epoch": 0.18, "grad_norm": 0.6201891899108887, "learning_rate": 0.0005944555475382077, "loss": 3.342, "step": 3752 }, { "epoch": 0.18, "grad_norm": 0.5696931481361389, "learning_rate": 0.0005944526008227781, "loss": 3.419, "step": 3753 }, { "epoch": 0.18, "grad_norm": 0.6073271036148071, "learning_rate": 0.000594449653331817, "loss": 3.3548, "step": 3754 }, { "epoch": 0.18, "grad_norm": 0.5775006413459778, "learning_rate": 0.0005944467050653323, "loss": 3.3832, "step": 3755 }, { "epoch": 0.18, "grad_norm": 0.6126733422279358, "learning_rate": 0.0005944437560233314, "loss": 3.3462, "step": 3756 }, { "epoch": 0.18, "grad_norm": 0.7034265995025635, "learning_rate": 0.0005944408062058225, "loss": 3.4659, "step": 3757 }, { "epoch": 0.18, "grad_norm": 0.60416179895401, "learning_rate": 0.0005944378556128132, "loss": 3.2714, "step": 3758 }, { "epoch": 0.18, "grad_norm": 0.640256404876709, "learning_rate": 0.0005944349042443112, "loss": 3.5203, "step": 3759 }, { "epoch": 0.18, "grad_norm": 0.590840220451355, "learning_rate": 0.0005944319521003243, "loss": 3.5765, "step": 3760 }, { "epoch": 0.18, "grad_norm": 0.561656653881073, "learning_rate": 0.0005944289991808604, "loss": 3.509, "step": 3761 }, { "epoch": 0.18, "grad_norm": 0.5961998105049133, "learning_rate": 0.0005944260454859271, "loss": 3.3526, "step": 3762 }, { "epoch": 0.18, "grad_norm": 0.5496677756309509, "learning_rate": 0.0005944230910155322, "loss": 3.7351, "step": 3763 }, { "epoch": 0.18, "grad_norm": 0.5810037851333618, "learning_rate": 0.0005944201357696837, "loss": 3.4154, "step": 3764 }, { "epoch": 0.18, "grad_norm": 0.6055195927619934, "learning_rate": 0.0005944171797483891, "loss": 3.3845, "step": 3765 }, { "epoch": 0.18, "grad_norm": 0.5705102682113647, "learning_rate": 0.0005944142229516563, "loss": 3.4328, "step": 3766 }, { "epoch": 0.18, "grad_norm": 0.5758143067359924, "learning_rate": 0.0005944112653794931, "loss": 3.31, "step": 3767 }, { "epoch": 0.18, "grad_norm": 0.5275263786315918, "learning_rate": 0.0005944083070319074, "loss": 3.6008, "step": 3768 }, { "epoch": 0.18, "grad_norm": 0.5653240084648132, "learning_rate": 0.0005944053479089069, "loss": 3.4932, "step": 3769 }, { "epoch": 0.18, "grad_norm": 0.5635776519775391, "learning_rate": 0.0005944023880104992, "loss": 3.5663, "step": 3770 }, { "epoch": 0.18, "grad_norm": 0.6032636165618896, "learning_rate": 0.0005943994273366923, "loss": 3.2103, "step": 3771 }, { "epoch": 0.18, "grad_norm": 0.6400607228279114, "learning_rate": 0.000594396465887494, "loss": 3.3427, "step": 3772 }, { "epoch": 0.18, "grad_norm": 0.5439187288284302, "learning_rate": 0.0005943935036629122, "loss": 3.5142, "step": 3773 }, { "epoch": 0.18, "grad_norm": 0.7817490100860596, "learning_rate": 0.0005943905406629545, "loss": 3.3834, "step": 3774 }, { "epoch": 0.19, "grad_norm": 0.5938287973403931, "learning_rate": 0.0005943875768876286, "loss": 3.4816, "step": 3775 }, { "epoch": 0.19, "grad_norm": 0.5887666940689087, "learning_rate": 0.0005943846123369426, "loss": 3.7435, "step": 3776 }, { "epoch": 0.19, "grad_norm": 0.5787019729614258, "learning_rate": 0.0005943816470109042, "loss": 3.5568, "step": 3777 }, { "epoch": 0.19, "grad_norm": 0.5176889896392822, "learning_rate": 0.0005943786809095213, "loss": 3.514, "step": 3778 }, { "epoch": 0.19, "grad_norm": 0.5439308285713196, "learning_rate": 0.0005943757140328014, "loss": 3.416, "step": 3779 }, { "epoch": 0.19, "grad_norm": 0.5765101909637451, "learning_rate": 0.0005943727463807527, "loss": 3.6599, "step": 3780 }, { "epoch": 0.19, "grad_norm": 0.5823919177055359, "learning_rate": 0.0005943697779533828, "loss": 3.4311, "step": 3781 }, { "epoch": 0.19, "grad_norm": 0.5826864242553711, "learning_rate": 0.0005943668087506994, "loss": 3.5703, "step": 3782 }, { "epoch": 0.19, "grad_norm": 0.5674658417701721, "learning_rate": 0.0005943638387727107, "loss": 3.3271, "step": 3783 }, { "epoch": 0.19, "grad_norm": 0.564477801322937, "learning_rate": 0.0005943608680194241, "loss": 3.3989, "step": 3784 }, { "epoch": 0.19, "grad_norm": 0.5395248532295227, "learning_rate": 0.0005943578964908478, "loss": 3.5984, "step": 3785 }, { "epoch": 0.19, "grad_norm": 0.5394876599311829, "learning_rate": 0.0005943549241869894, "loss": 3.4793, "step": 3786 }, { "epoch": 0.19, "grad_norm": 0.5413987636566162, "learning_rate": 0.0005943519511078568, "loss": 3.4323, "step": 3787 }, { "epoch": 0.19, "grad_norm": 0.57403963804245, "learning_rate": 0.0005943489772534576, "loss": 3.6065, "step": 3788 }, { "epoch": 0.19, "grad_norm": 0.5907705426216125, "learning_rate": 0.0005943460026238001, "loss": 3.5587, "step": 3789 }, { "epoch": 0.19, "grad_norm": 0.5364418625831604, "learning_rate": 0.0005943430272188917, "loss": 3.5492, "step": 3790 }, { "epoch": 0.19, "grad_norm": 0.5795174241065979, "learning_rate": 0.0005943400510387405, "loss": 3.6131, "step": 3791 }, { "epoch": 0.19, "grad_norm": 0.5407680869102478, "learning_rate": 0.0005943370740833543, "loss": 3.5612, "step": 3792 }, { "epoch": 0.19, "grad_norm": 0.6448293924331665, "learning_rate": 0.0005943340963527409, "loss": 3.3383, "step": 3793 }, { "epoch": 0.19, "grad_norm": 0.562232255935669, "learning_rate": 0.0005943311178469079, "loss": 3.4904, "step": 3794 }, { "epoch": 0.19, "grad_norm": 0.5268046259880066, "learning_rate": 0.0005943281385658636, "loss": 3.7969, "step": 3795 }, { "epoch": 0.19, "grad_norm": 0.5817152261734009, "learning_rate": 0.0005943251585096156, "loss": 3.5373, "step": 3796 }, { "epoch": 0.19, "grad_norm": 0.523223876953125, "learning_rate": 0.0005943221776781719, "loss": 3.4779, "step": 3797 }, { "epoch": 0.19, "grad_norm": 0.6563833355903625, "learning_rate": 0.0005943191960715399, "loss": 3.5764, "step": 3798 }, { "epoch": 0.19, "grad_norm": 0.6011426448822021, "learning_rate": 0.000594316213689728, "loss": 3.5565, "step": 3799 }, { "epoch": 0.19, "grad_norm": 0.5760040283203125, "learning_rate": 0.0005943132305327438, "loss": 3.6364, "step": 3800 }, { "epoch": 0.19, "grad_norm": 0.551845371723175, "learning_rate": 0.0005943102466005952, "loss": 3.481, "step": 3801 }, { "epoch": 0.19, "grad_norm": 0.5699048638343811, "learning_rate": 0.0005943072618932901, "loss": 3.3893, "step": 3802 }, { "epoch": 0.19, "grad_norm": 0.5873308777809143, "learning_rate": 0.0005943042764108363, "loss": 3.6338, "step": 3803 }, { "epoch": 0.19, "grad_norm": 0.5894830226898193, "learning_rate": 0.0005943012901532416, "loss": 3.4884, "step": 3804 }, { "epoch": 0.19, "grad_norm": 0.5429089069366455, "learning_rate": 0.0005942983031205139, "loss": 3.4782, "step": 3805 }, { "epoch": 0.19, "grad_norm": 0.5496631264686584, "learning_rate": 0.0005942953153126613, "loss": 3.7168, "step": 3806 }, { "epoch": 0.19, "grad_norm": 0.5998590588569641, "learning_rate": 0.0005942923267296913, "loss": 3.7289, "step": 3807 }, { "epoch": 0.19, "grad_norm": 0.6006838083267212, "learning_rate": 0.000594289337371612, "loss": 3.4137, "step": 3808 }, { "epoch": 0.19, "grad_norm": 0.5551097393035889, "learning_rate": 0.0005942863472384313, "loss": 3.7467, "step": 3809 }, { "epoch": 0.19, "grad_norm": 0.5894075036048889, "learning_rate": 0.0005942833563301569, "loss": 3.3947, "step": 3810 }, { "epoch": 0.19, "grad_norm": 0.5639740228652954, "learning_rate": 0.0005942803646467968, "loss": 3.6335, "step": 3811 }, { "epoch": 0.19, "grad_norm": 0.5757173299789429, "learning_rate": 0.0005942773721883588, "loss": 3.6161, "step": 3812 }, { "epoch": 0.19, "grad_norm": 0.547919750213623, "learning_rate": 0.000594274378954851, "loss": 3.5195, "step": 3813 }, { "epoch": 0.19, "grad_norm": 0.543421745300293, "learning_rate": 0.000594271384946281, "loss": 3.2388, "step": 3814 }, { "epoch": 0.19, "grad_norm": 0.5859833359718323, "learning_rate": 0.0005942683901626569, "loss": 3.5582, "step": 3815 }, { "epoch": 0.19, "grad_norm": 0.5319817066192627, "learning_rate": 0.0005942653946039864, "loss": 3.574, "step": 3816 }, { "epoch": 0.19, "grad_norm": 0.5493303537368774, "learning_rate": 0.0005942623982702775, "loss": 3.4146, "step": 3817 }, { "epoch": 0.19, "grad_norm": 0.5733358860015869, "learning_rate": 0.000594259401161538, "loss": 3.543, "step": 3818 }, { "epoch": 0.19, "grad_norm": 0.5682793855667114, "learning_rate": 0.000594256403277776, "loss": 3.6779, "step": 3819 }, { "epoch": 0.19, "grad_norm": 0.5126398801803589, "learning_rate": 0.0005942534046189992, "loss": 3.5189, "step": 3820 }, { "epoch": 0.19, "grad_norm": 0.5806546211242676, "learning_rate": 0.0005942504051852155, "loss": 3.4849, "step": 3821 }, { "epoch": 0.19, "grad_norm": 0.5452494025230408, "learning_rate": 0.000594247404976433, "loss": 3.5726, "step": 3822 }, { "epoch": 0.19, "grad_norm": 0.5558573007583618, "learning_rate": 0.0005942444039926594, "loss": 3.5881, "step": 3823 }, { "epoch": 0.19, "grad_norm": 0.5447998642921448, "learning_rate": 0.0005942414022339027, "loss": 3.4945, "step": 3824 }, { "epoch": 0.19, "grad_norm": 0.6169920563697815, "learning_rate": 0.0005942383997001707, "loss": 3.3484, "step": 3825 }, { "epoch": 0.19, "grad_norm": 0.5817279815673828, "learning_rate": 0.0005942353963914714, "loss": 3.4676, "step": 3826 }, { "epoch": 0.19, "grad_norm": 0.5804048776626587, "learning_rate": 0.0005942323923078127, "loss": 3.2754, "step": 3827 }, { "epoch": 0.19, "grad_norm": 0.5827670097351074, "learning_rate": 0.0005942293874492025, "loss": 3.4499, "step": 3828 }, { "epoch": 0.19, "grad_norm": 0.5710329413414001, "learning_rate": 0.0005942263818156488, "loss": 3.5819, "step": 3829 }, { "epoch": 0.19, "grad_norm": 0.6096348762512207, "learning_rate": 0.0005942233754071594, "loss": 3.3934, "step": 3830 }, { "epoch": 0.19, "grad_norm": 0.5973950028419495, "learning_rate": 0.0005942203682237422, "loss": 3.4731, "step": 3831 }, { "epoch": 0.19, "grad_norm": 0.5598435401916504, "learning_rate": 0.0005942173602654053, "loss": 3.5913, "step": 3832 }, { "epoch": 0.19, "grad_norm": 0.6751794219017029, "learning_rate": 0.0005942143515321563, "loss": 3.576, "step": 3833 }, { "epoch": 0.19, "grad_norm": 0.5890194773674011, "learning_rate": 0.0005942113420240034, "loss": 3.6024, "step": 3834 }, { "epoch": 0.19, "grad_norm": 0.6194825768470764, "learning_rate": 0.0005942083317409545, "loss": 3.3712, "step": 3835 }, { "epoch": 0.19, "grad_norm": 0.5517776608467102, "learning_rate": 0.0005942053206830175, "loss": 3.4849, "step": 3836 }, { "epoch": 0.19, "grad_norm": 0.5492923855781555, "learning_rate": 0.0005942023088502001, "loss": 3.3793, "step": 3837 }, { "epoch": 0.19, "grad_norm": 0.5847030282020569, "learning_rate": 0.0005941992962425107, "loss": 3.6312, "step": 3838 }, { "epoch": 0.19, "grad_norm": 0.5900754928588867, "learning_rate": 0.0005941962828599568, "loss": 3.5081, "step": 3839 }, { "epoch": 0.19, "grad_norm": 0.5455581545829773, "learning_rate": 0.0005941932687025467, "loss": 3.4151, "step": 3840 }, { "epoch": 0.19, "grad_norm": 0.5705889463424683, "learning_rate": 0.000594190253770288, "loss": 3.6245, "step": 3841 }, { "epoch": 0.19, "grad_norm": 0.5608065724372864, "learning_rate": 0.0005941872380631888, "loss": 3.368, "step": 3842 }, { "epoch": 0.19, "grad_norm": 0.579862117767334, "learning_rate": 0.000594184221581257, "loss": 3.5874, "step": 3843 }, { "epoch": 0.19, "grad_norm": 0.570897102355957, "learning_rate": 0.0005941812043245006, "loss": 3.5095, "step": 3844 }, { "epoch": 0.19, "grad_norm": 0.5384157299995422, "learning_rate": 0.0005941781862929275, "loss": 3.6649, "step": 3845 }, { "epoch": 0.19, "grad_norm": 0.5707191824913025, "learning_rate": 0.0005941751674865457, "loss": 3.7034, "step": 3846 }, { "epoch": 0.19, "grad_norm": 0.5771855711936951, "learning_rate": 0.0005941721479053632, "loss": 3.5403, "step": 3847 }, { "epoch": 0.19, "grad_norm": 0.5326586961746216, "learning_rate": 0.0005941691275493878, "loss": 3.6345, "step": 3848 }, { "epoch": 0.19, "grad_norm": 0.6068933010101318, "learning_rate": 0.0005941661064186276, "loss": 3.5202, "step": 3849 }, { "epoch": 0.19, "grad_norm": 0.5824964642524719, "learning_rate": 0.0005941630845130905, "loss": 3.7016, "step": 3850 }, { "epoch": 0.19, "grad_norm": 0.5302342176437378, "learning_rate": 0.0005941600618327843, "loss": 3.5209, "step": 3851 }, { "epoch": 0.19, "grad_norm": 0.5523245334625244, "learning_rate": 0.0005941570383777171, "loss": 3.6873, "step": 3852 }, { "epoch": 0.19, "grad_norm": 0.5614722967147827, "learning_rate": 0.0005941540141478971, "loss": 3.5292, "step": 3853 }, { "epoch": 0.19, "grad_norm": 0.5354779958724976, "learning_rate": 0.0005941509891433319, "loss": 3.3006, "step": 3854 }, { "epoch": 0.19, "grad_norm": 0.6500388383865356, "learning_rate": 0.0005941479633640296, "loss": 3.5103, "step": 3855 }, { "epoch": 0.19, "grad_norm": 0.5664093494415283, "learning_rate": 0.0005941449368099982, "loss": 3.4756, "step": 3856 }, { "epoch": 0.19, "grad_norm": 0.5725468397140503, "learning_rate": 0.0005941419094812456, "loss": 3.4823, "step": 3857 }, { "epoch": 0.19, "grad_norm": 0.5943357348442078, "learning_rate": 0.0005941388813777799, "loss": 3.3334, "step": 3858 }, { "epoch": 0.19, "grad_norm": 0.6101706027984619, "learning_rate": 0.000594135852499609, "loss": 3.1905, "step": 3859 }, { "epoch": 0.19, "grad_norm": 0.5723959803581238, "learning_rate": 0.0005941328228467408, "loss": 3.4729, "step": 3860 }, { "epoch": 0.19, "grad_norm": 0.5894114971160889, "learning_rate": 0.0005941297924191833, "loss": 3.3462, "step": 3861 }, { "epoch": 0.19, "grad_norm": 0.5975579023361206, "learning_rate": 0.0005941267612169447, "loss": 3.4768, "step": 3862 }, { "epoch": 0.19, "grad_norm": 0.5968523621559143, "learning_rate": 0.0005941237292400326, "loss": 3.5171, "step": 3863 }, { "epoch": 0.19, "grad_norm": 0.5831944942474365, "learning_rate": 0.0005941206964884554, "loss": 3.2998, "step": 3864 }, { "epoch": 0.19, "grad_norm": 0.581929624080658, "learning_rate": 0.0005941176629622207, "loss": 3.3597, "step": 3865 }, { "epoch": 0.19, "grad_norm": 0.565826952457428, "learning_rate": 0.0005941146286613368, "loss": 3.4829, "step": 3866 }, { "epoch": 0.19, "grad_norm": 0.5366962552070618, "learning_rate": 0.0005941115935858116, "loss": 3.8651, "step": 3867 }, { "epoch": 0.19, "grad_norm": 0.5875154733657837, "learning_rate": 0.000594108557735653, "loss": 3.4664, "step": 3868 }, { "epoch": 0.19, "grad_norm": 0.5598399639129639, "learning_rate": 0.0005941055211108691, "loss": 3.6399, "step": 3869 }, { "epoch": 0.19, "grad_norm": 0.5694958567619324, "learning_rate": 0.0005941024837114679, "loss": 3.4511, "step": 3870 }, { "epoch": 0.19, "grad_norm": 0.5622062683105469, "learning_rate": 0.0005940994455374574, "loss": 3.4977, "step": 3871 }, { "epoch": 0.19, "grad_norm": 0.5713880062103271, "learning_rate": 0.0005940964065888454, "loss": 3.4234, "step": 3872 }, { "epoch": 0.19, "grad_norm": 0.5845184326171875, "learning_rate": 0.0005940933668656402, "loss": 3.4577, "step": 3873 }, { "epoch": 0.19, "grad_norm": 0.5796321034431458, "learning_rate": 0.0005940903263678495, "loss": 3.5307, "step": 3874 }, { "epoch": 0.19, "grad_norm": 0.5476338267326355, "learning_rate": 0.0005940872850954816, "loss": 3.4395, "step": 3875 }, { "epoch": 0.19, "grad_norm": 0.579772412776947, "learning_rate": 0.0005940842430485443, "loss": 3.3787, "step": 3876 }, { "epoch": 0.19, "grad_norm": 0.5444009304046631, "learning_rate": 0.0005940812002270458, "loss": 3.6889, "step": 3877 }, { "epoch": 0.19, "grad_norm": 0.5588319301605225, "learning_rate": 0.0005940781566309941, "loss": 3.6946, "step": 3878 }, { "epoch": 0.19, "grad_norm": 0.5704074501991272, "learning_rate": 0.0005940751122603971, "loss": 3.5999, "step": 3879 }, { "epoch": 0.19, "grad_norm": 0.7025076746940613, "learning_rate": 0.0005940720671152627, "loss": 3.2713, "step": 3880 }, { "epoch": 0.19, "grad_norm": 0.8407301306724548, "learning_rate": 0.0005940690211955991, "loss": 3.4089, "step": 3881 }, { "epoch": 0.19, "grad_norm": 0.5442054867744446, "learning_rate": 0.0005940659745014143, "loss": 3.5208, "step": 3882 }, { "epoch": 0.19, "grad_norm": 0.5735301375389099, "learning_rate": 0.0005940629270327165, "loss": 3.7339, "step": 3883 }, { "epoch": 0.19, "grad_norm": 0.5662720203399658, "learning_rate": 0.0005940598787895132, "loss": 3.5411, "step": 3884 }, { "epoch": 0.19, "grad_norm": 0.635032057762146, "learning_rate": 0.000594056829771813, "loss": 3.6586, "step": 3885 }, { "epoch": 0.19, "grad_norm": 0.5698032379150391, "learning_rate": 0.0005940537799796237, "loss": 3.5767, "step": 3886 }, { "epoch": 0.19, "grad_norm": 0.5576255321502686, "learning_rate": 0.0005940507294129534, "loss": 3.6144, "step": 3887 }, { "epoch": 0.19, "grad_norm": 0.5743609070777893, "learning_rate": 0.0005940476780718099, "loss": 3.4329, "step": 3888 }, { "epoch": 0.19, "grad_norm": 0.5626585483551025, "learning_rate": 0.0005940446259562015, "loss": 3.62, "step": 3889 }, { "epoch": 0.19, "grad_norm": 0.5813789963722229, "learning_rate": 0.0005940415730661361, "loss": 3.4282, "step": 3890 }, { "epoch": 0.19, "grad_norm": 0.6178430318832397, "learning_rate": 0.0005940385194016219, "loss": 3.612, "step": 3891 }, { "epoch": 0.19, "grad_norm": 0.569376528263092, "learning_rate": 0.0005940354649626667, "loss": 3.4726, "step": 3892 }, { "epoch": 0.19, "grad_norm": 0.5537204742431641, "learning_rate": 0.0005940324097492788, "loss": 3.5413, "step": 3893 }, { "epoch": 0.19, "grad_norm": 0.5710381269454956, "learning_rate": 0.000594029353761466, "loss": 3.4135, "step": 3894 }, { "epoch": 0.19, "grad_norm": 0.6075487732887268, "learning_rate": 0.0005940262969992365, "loss": 3.3629, "step": 3895 }, { "epoch": 0.19, "grad_norm": 0.5630260705947876, "learning_rate": 0.0005940232394625984, "loss": 3.4259, "step": 3896 }, { "epoch": 0.19, "grad_norm": 0.5889552235603333, "learning_rate": 0.0005940201811515595, "loss": 3.4283, "step": 3897 }, { "epoch": 0.19, "grad_norm": 0.593014657497406, "learning_rate": 0.0005940171220661281, "loss": 3.4631, "step": 3898 }, { "epoch": 0.19, "grad_norm": 0.5618011355400085, "learning_rate": 0.0005940140622063123, "loss": 3.4224, "step": 3899 }, { "epoch": 0.19, "grad_norm": 0.5981526970863342, "learning_rate": 0.0005940110015721199, "loss": 3.2224, "step": 3900 }, { "epoch": 0.19, "grad_norm": 0.5683342814445496, "learning_rate": 0.0005940079401635592, "loss": 3.5997, "step": 3901 }, { "epoch": 0.19, "grad_norm": 0.5758498311042786, "learning_rate": 0.0005940048779806381, "loss": 3.498, "step": 3902 }, { "epoch": 0.19, "grad_norm": 0.553376317024231, "learning_rate": 0.0005940018150233647, "loss": 3.613, "step": 3903 }, { "epoch": 0.19, "grad_norm": 0.5808395147323608, "learning_rate": 0.0005939987512917472, "loss": 3.5252, "step": 3904 }, { "epoch": 0.19, "grad_norm": 0.5820940732955933, "learning_rate": 0.0005939956867857936, "loss": 3.7314, "step": 3905 }, { "epoch": 0.19, "grad_norm": 0.5412302017211914, "learning_rate": 0.0005939926215055119, "loss": 3.5086, "step": 3906 }, { "epoch": 0.19, "grad_norm": 0.5737446546554565, "learning_rate": 0.0005939895554509102, "loss": 3.3745, "step": 3907 }, { "epoch": 0.19, "grad_norm": 0.5778170228004456, "learning_rate": 0.0005939864886219966, "loss": 3.6322, "step": 3908 }, { "epoch": 0.19, "grad_norm": 0.5643134713172913, "learning_rate": 0.0005939834210187792, "loss": 3.6112, "step": 3909 }, { "epoch": 0.19, "grad_norm": 0.5836457014083862, "learning_rate": 0.0005939803526412658, "loss": 3.6238, "step": 3910 }, { "epoch": 0.19, "grad_norm": 0.5769031047821045, "learning_rate": 0.0005939772834894651, "loss": 3.4665, "step": 3911 }, { "epoch": 0.19, "grad_norm": 0.5801408290863037, "learning_rate": 0.0005939742135633846, "loss": 3.1557, "step": 3912 }, { "epoch": 0.19, "grad_norm": 0.5858706831932068, "learning_rate": 0.0005939711428630326, "loss": 3.5962, "step": 3913 }, { "epoch": 0.19, "grad_norm": 0.6179161071777344, "learning_rate": 0.0005939680713884173, "loss": 3.5575, "step": 3914 }, { "epoch": 0.19, "grad_norm": 0.5316437482833862, "learning_rate": 0.0005939649991395466, "loss": 3.2445, "step": 3915 }, { "epoch": 0.19, "grad_norm": 0.5476315021514893, "learning_rate": 0.0005939619261164286, "loss": 3.5819, "step": 3916 }, { "epoch": 0.19, "grad_norm": 0.5380899906158447, "learning_rate": 0.0005939588523190716, "loss": 3.647, "step": 3917 }, { "epoch": 0.19, "grad_norm": 0.6630106568336487, "learning_rate": 0.0005939557777474833, "loss": 3.3215, "step": 3918 }, { "epoch": 0.19, "grad_norm": 0.5557861924171448, "learning_rate": 0.0005939527024016723, "loss": 3.5431, "step": 3919 }, { "epoch": 0.19, "grad_norm": 0.5520122647285461, "learning_rate": 0.0005939496262816464, "loss": 3.5015, "step": 3920 }, { "epoch": 0.19, "grad_norm": 0.593203604221344, "learning_rate": 0.0005939465493874139, "loss": 3.6008, "step": 3921 }, { "epoch": 0.19, "grad_norm": 0.5582143068313599, "learning_rate": 0.0005939434717189825, "loss": 3.3972, "step": 3922 }, { "epoch": 0.19, "grad_norm": 0.55299311876297, "learning_rate": 0.0005939403932763607, "loss": 3.5225, "step": 3923 }, { "epoch": 0.19, "grad_norm": 0.5243273377418518, "learning_rate": 0.0005939373140595565, "loss": 3.5162, "step": 3924 }, { "epoch": 0.19, "grad_norm": 0.5494229793548584, "learning_rate": 0.0005939342340685778, "loss": 3.4238, "step": 3925 }, { "epoch": 0.19, "grad_norm": 0.56685471534729, "learning_rate": 0.0005939311533034331, "loss": 3.3776, "step": 3926 }, { "epoch": 0.19, "grad_norm": 0.5525105595588684, "learning_rate": 0.0005939280717641302, "loss": 3.5084, "step": 3927 }, { "epoch": 0.19, "grad_norm": 0.6117692589759827, "learning_rate": 0.0005939249894506773, "loss": 3.4447, "step": 3928 }, { "epoch": 0.19, "grad_norm": 0.6117774844169617, "learning_rate": 0.0005939219063630826, "loss": 3.5056, "step": 3929 }, { "epoch": 0.19, "grad_norm": 0.5624076724052429, "learning_rate": 0.0005939188225013543, "loss": 3.4238, "step": 3930 }, { "epoch": 0.19, "grad_norm": 0.591431736946106, "learning_rate": 0.0005939157378655002, "loss": 3.4953, "step": 3931 }, { "epoch": 0.19, "grad_norm": 0.5721966624259949, "learning_rate": 0.0005939126524555287, "loss": 3.4561, "step": 3932 }, { "epoch": 0.19, "grad_norm": 0.5344234108924866, "learning_rate": 0.0005939095662714479, "loss": 3.4767, "step": 3933 }, { "epoch": 0.19, "grad_norm": 0.6061483025550842, "learning_rate": 0.0005939064793132658, "loss": 3.6087, "step": 3934 }, { "epoch": 0.19, "grad_norm": 0.5591382384300232, "learning_rate": 0.0005939033915809905, "loss": 3.5391, "step": 3935 }, { "epoch": 0.19, "grad_norm": 0.5948025584220886, "learning_rate": 0.0005939003030746302, "loss": 3.5729, "step": 3936 }, { "epoch": 0.19, "grad_norm": 0.6076129674911499, "learning_rate": 0.0005938972137941933, "loss": 3.4074, "step": 3937 }, { "epoch": 0.19, "grad_norm": 0.5574817061424255, "learning_rate": 0.0005938941237396876, "loss": 3.371, "step": 3938 }, { "epoch": 0.19, "grad_norm": 0.5657892823219299, "learning_rate": 0.0005938910329111213, "loss": 3.2757, "step": 3939 }, { "epoch": 0.19, "grad_norm": 0.5320510864257812, "learning_rate": 0.0005938879413085026, "loss": 3.5539, "step": 3940 }, { "epoch": 0.19, "grad_norm": 0.594287097454071, "learning_rate": 0.0005938848489318396, "loss": 3.3186, "step": 3941 }, { "epoch": 0.19, "grad_norm": 0.5167534351348877, "learning_rate": 0.0005938817557811405, "loss": 3.4876, "step": 3942 }, { "epoch": 0.19, "grad_norm": 0.5355756282806396, "learning_rate": 0.0005938786618564134, "loss": 3.6319, "step": 3943 }, { "epoch": 0.19, "grad_norm": 0.5719373226165771, "learning_rate": 0.0005938755671576664, "loss": 3.6176, "step": 3944 }, { "epoch": 0.19, "grad_norm": 0.5378432869911194, "learning_rate": 0.0005938724716849078, "loss": 3.2131, "step": 3945 }, { "epoch": 0.19, "grad_norm": 0.5518651008605957, "learning_rate": 0.0005938693754381456, "loss": 3.3925, "step": 3946 }, { "epoch": 0.19, "grad_norm": 0.5597622990608215, "learning_rate": 0.0005938662784173881, "loss": 3.436, "step": 3947 }, { "epoch": 0.19, "grad_norm": 0.5614737868309021, "learning_rate": 0.0005938631806226434, "loss": 3.696, "step": 3948 }, { "epoch": 0.19, "grad_norm": 0.5528543591499329, "learning_rate": 0.0005938600820539195, "loss": 3.4416, "step": 3949 }, { "epoch": 0.19, "grad_norm": 0.588862955570221, "learning_rate": 0.0005938569827112247, "loss": 3.5648, "step": 3950 }, { "epoch": 0.19, "grad_norm": 0.590054452419281, "learning_rate": 0.0005938538825945673, "loss": 3.3912, "step": 3951 }, { "epoch": 0.19, "grad_norm": 0.5708290934562683, "learning_rate": 0.0005938507817039553, "loss": 3.5998, "step": 3952 }, { "epoch": 0.19, "grad_norm": 0.5861159563064575, "learning_rate": 0.0005938476800393968, "loss": 3.4476, "step": 3953 }, { "epoch": 0.19, "grad_norm": 0.5528053641319275, "learning_rate": 0.0005938445776009002, "loss": 3.4957, "step": 3954 }, { "epoch": 0.19, "grad_norm": 0.5492965579032898, "learning_rate": 0.0005938414743884734, "loss": 3.3417, "step": 3955 }, { "epoch": 0.19, "grad_norm": 0.575512707233429, "learning_rate": 0.0005938383704021247, "loss": 3.398, "step": 3956 }, { "epoch": 0.19, "grad_norm": 0.5666360855102539, "learning_rate": 0.0005938352656418624, "loss": 3.5014, "step": 3957 }, { "epoch": 0.19, "grad_norm": 0.6097762584686279, "learning_rate": 0.0005938321601076944, "loss": 3.5334, "step": 3958 }, { "epoch": 0.19, "grad_norm": 0.5508065819740295, "learning_rate": 0.0005938290537996292, "loss": 3.4517, "step": 3959 }, { "epoch": 0.19, "grad_norm": 0.5451350808143616, "learning_rate": 0.0005938259467176748, "loss": 3.3272, "step": 3960 }, { "epoch": 0.19, "grad_norm": 0.5097694993019104, "learning_rate": 0.0005938228388618393, "loss": 3.3928, "step": 3961 }, { "epoch": 0.19, "grad_norm": 0.5869589447975159, "learning_rate": 0.0005938197302321312, "loss": 3.5485, "step": 3962 }, { "epoch": 0.19, "grad_norm": 0.5630464553833008, "learning_rate": 0.0005938166208285582, "loss": 3.41, "step": 3963 }, { "epoch": 0.19, "grad_norm": 0.6036396622657776, "learning_rate": 0.0005938135106511289, "loss": 3.6272, "step": 3964 }, { "epoch": 0.19, "grad_norm": 0.6252908706665039, "learning_rate": 0.0005938103996998514, "loss": 3.3919, "step": 3965 }, { "epoch": 0.19, "grad_norm": 0.5575089454650879, "learning_rate": 0.0005938072879747338, "loss": 3.47, "step": 3966 }, { "epoch": 0.19, "grad_norm": 0.5552845597267151, "learning_rate": 0.0005938041754757844, "loss": 3.384, "step": 3967 }, { "epoch": 0.19, "grad_norm": 0.5382720828056335, "learning_rate": 0.0005938010622030112, "loss": 3.4518, "step": 3968 }, { "epoch": 0.19, "grad_norm": 0.5792542695999146, "learning_rate": 0.0005937979481564227, "loss": 3.3024, "step": 3969 }, { "epoch": 0.19, "grad_norm": 0.574998676776886, "learning_rate": 0.000593794833336027, "loss": 3.4806, "step": 3970 }, { "epoch": 0.19, "grad_norm": 0.6154451966285706, "learning_rate": 0.000593791717741832, "loss": 3.3694, "step": 3971 }, { "epoch": 0.19, "grad_norm": 0.5885059237480164, "learning_rate": 0.0005937886013738464, "loss": 3.3851, "step": 3972 }, { "epoch": 0.19, "grad_norm": 0.5663660168647766, "learning_rate": 0.000593785484232078, "loss": 3.5687, "step": 3973 }, { "epoch": 0.19, "grad_norm": 0.5735329389572144, "learning_rate": 0.0005937823663165352, "loss": 3.3934, "step": 3974 }, { "epoch": 0.19, "grad_norm": 0.6525985598564148, "learning_rate": 0.0005937792476272262, "loss": 3.328, "step": 3975 }, { "epoch": 0.19, "grad_norm": 0.586041271686554, "learning_rate": 0.0005937761281641594, "loss": 3.4332, "step": 3976 }, { "epoch": 0.19, "grad_norm": 0.563795804977417, "learning_rate": 0.0005937730079273426, "loss": 3.6729, "step": 3977 }, { "epoch": 0.19, "grad_norm": 0.5492075681686401, "learning_rate": 0.0005937698869167842, "loss": 3.4315, "step": 3978 }, { "epoch": 0.2, "grad_norm": 0.6151273846626282, "learning_rate": 0.0005937667651324925, "loss": 3.7117, "step": 3979 }, { "epoch": 0.2, "grad_norm": 0.5914956331253052, "learning_rate": 0.0005937636425744757, "loss": 3.4685, "step": 3980 }, { "epoch": 0.2, "grad_norm": 0.5682784914970398, "learning_rate": 0.0005937605192427422, "loss": 3.2269, "step": 3981 }, { "epoch": 0.2, "grad_norm": 0.5962632298469543, "learning_rate": 0.0005937573951372997, "loss": 3.4843, "step": 3982 }, { "epoch": 0.2, "grad_norm": 0.6012601852416992, "learning_rate": 0.0005937542702581569, "loss": 3.4149, "step": 3983 }, { "epoch": 0.2, "grad_norm": 0.556812047958374, "learning_rate": 0.0005937511446053219, "loss": 3.5567, "step": 3984 }, { "epoch": 0.2, "grad_norm": 0.6080185174942017, "learning_rate": 0.0005937480181788029, "loss": 3.5601, "step": 3985 }, { "epoch": 0.2, "grad_norm": 0.6003567576408386, "learning_rate": 0.0005937448909786083, "loss": 3.5517, "step": 3986 }, { "epoch": 0.2, "grad_norm": 0.575144350528717, "learning_rate": 0.0005937417630047459, "loss": 3.415, "step": 3987 }, { "epoch": 0.2, "grad_norm": 0.5906019806861877, "learning_rate": 0.0005937386342572244, "loss": 3.6005, "step": 3988 }, { "epoch": 0.2, "grad_norm": 0.564968466758728, "learning_rate": 0.0005937355047360519, "loss": 3.4319, "step": 3989 }, { "epoch": 0.2, "grad_norm": 0.5609432458877563, "learning_rate": 0.0005937323744412365, "loss": 3.3004, "step": 3990 }, { "epoch": 0.2, "grad_norm": 0.5350099205970764, "learning_rate": 0.0005937292433727866, "loss": 3.5976, "step": 3991 }, { "epoch": 0.2, "grad_norm": 0.5905284881591797, "learning_rate": 0.0005937261115307104, "loss": 3.5769, "step": 3992 }, { "epoch": 0.2, "grad_norm": 0.5806431770324707, "learning_rate": 0.0005937229789150162, "loss": 3.6226, "step": 3993 }, { "epoch": 0.2, "grad_norm": 0.5419069528579712, "learning_rate": 0.0005937198455257122, "loss": 3.8644, "step": 3994 }, { "epoch": 0.2, "grad_norm": 0.5670180916786194, "learning_rate": 0.0005937167113628067, "loss": 3.4316, "step": 3995 }, { "epoch": 0.2, "grad_norm": 0.5351924896240234, "learning_rate": 0.0005937135764263077, "loss": 3.5708, "step": 3996 }, { "epoch": 0.2, "grad_norm": 0.5488182306289673, "learning_rate": 0.0005937104407162238, "loss": 3.5572, "step": 3997 }, { "epoch": 0.2, "grad_norm": 0.5685787200927734, "learning_rate": 0.0005937073042325631, "loss": 3.4464, "step": 3998 }, { "epoch": 0.2, "grad_norm": 0.5353884696960449, "learning_rate": 0.000593704166975334, "loss": 3.4783, "step": 3999 }, { "epoch": 0.2, "grad_norm": 0.6061869859695435, "learning_rate": 0.0005937010289445444, "loss": 3.2644, "step": 4000 }, { "epoch": 0.2, "grad_norm": 0.571092426776886, "learning_rate": 0.000593697890140203, "loss": 3.5456, "step": 4001 }, { "epoch": 0.2, "grad_norm": 0.6306237578392029, "learning_rate": 0.0005936947505623179, "loss": 3.3862, "step": 4002 }, { "epoch": 0.2, "grad_norm": 0.5207166075706482, "learning_rate": 0.0005936916102108974, "loss": 3.3985, "step": 4003 }, { "epoch": 0.2, "grad_norm": 0.6106595396995544, "learning_rate": 0.0005936884690859495, "loss": 3.6107, "step": 4004 }, { "epoch": 0.2, "grad_norm": 0.6063200235366821, "learning_rate": 0.0005936853271874829, "loss": 3.5122, "step": 4005 }, { "epoch": 0.2, "grad_norm": 0.5411254167556763, "learning_rate": 0.0005936821845155055, "loss": 3.4418, "step": 4006 }, { "epoch": 0.2, "grad_norm": 0.5724605321884155, "learning_rate": 0.0005936790410700259, "loss": 3.455, "step": 4007 }, { "epoch": 0.2, "grad_norm": 0.5903576016426086, "learning_rate": 0.0005936758968510521, "loss": 3.4542, "step": 4008 }, { "epoch": 0.2, "grad_norm": 0.5617760419845581, "learning_rate": 0.0005936727518585925, "loss": 3.5287, "step": 4009 }, { "epoch": 0.2, "grad_norm": 0.57075035572052, "learning_rate": 0.0005936696060926555, "loss": 3.54, "step": 4010 }, { "epoch": 0.2, "grad_norm": 0.5305947065353394, "learning_rate": 0.0005936664595532493, "loss": 3.7036, "step": 4011 }, { "epoch": 0.2, "grad_norm": 0.5543831586837769, "learning_rate": 0.000593663312240382, "loss": 3.4049, "step": 4012 }, { "epoch": 0.2, "grad_norm": 0.5600700974464417, "learning_rate": 0.0005936601641540622, "loss": 3.8008, "step": 4013 }, { "epoch": 0.2, "grad_norm": 0.5779480934143066, "learning_rate": 0.0005936570152942979, "loss": 3.7728, "step": 4014 }, { "epoch": 0.2, "grad_norm": 0.5971601009368896, "learning_rate": 0.0005936538656610977, "loss": 3.4096, "step": 4015 }, { "epoch": 0.2, "grad_norm": 0.5580905675888062, "learning_rate": 0.0005936507152544695, "loss": 3.6679, "step": 4016 }, { "epoch": 0.2, "grad_norm": 0.6468621492385864, "learning_rate": 0.0005936475640744221, "loss": 3.6336, "step": 4017 }, { "epoch": 0.2, "grad_norm": 0.6058899164199829, "learning_rate": 0.0005936444121209634, "loss": 3.4921, "step": 4018 }, { "epoch": 0.2, "grad_norm": 0.5818850994110107, "learning_rate": 0.0005936412593941019, "loss": 3.4677, "step": 4019 }, { "epoch": 0.2, "grad_norm": 0.5825283527374268, "learning_rate": 0.0005936381058938457, "loss": 3.4282, "step": 4020 }, { "epoch": 0.2, "grad_norm": 0.5536349415779114, "learning_rate": 0.0005936349516202034, "loss": 3.3787, "step": 4021 }, { "epoch": 0.2, "grad_norm": 0.664323091506958, "learning_rate": 0.000593631796573183, "loss": 3.4861, "step": 4022 }, { "epoch": 0.2, "grad_norm": 0.5751873254776001, "learning_rate": 0.0005936286407527931, "loss": 3.5405, "step": 4023 }, { "epoch": 0.2, "grad_norm": 0.5620585083961487, "learning_rate": 0.0005936254841590417, "loss": 3.2407, "step": 4024 }, { "epoch": 0.2, "grad_norm": 0.5601255893707275, "learning_rate": 0.0005936223267919374, "loss": 3.3897, "step": 4025 }, { "epoch": 0.2, "grad_norm": 0.6205532550811768, "learning_rate": 0.0005936191686514884, "loss": 3.5952, "step": 4026 }, { "epoch": 0.2, "grad_norm": 0.5451340675354004, "learning_rate": 0.000593616009737703, "loss": 3.3103, "step": 4027 }, { "epoch": 0.2, "grad_norm": 0.5882269144058228, "learning_rate": 0.0005936128500505896, "loss": 3.274, "step": 4028 }, { "epoch": 0.2, "grad_norm": 0.5673192739486694, "learning_rate": 0.0005936096895901564, "loss": 3.5805, "step": 4029 }, { "epoch": 0.2, "grad_norm": 0.5817726850509644, "learning_rate": 0.0005936065283564117, "loss": 3.4446, "step": 4030 }, { "epoch": 0.2, "grad_norm": 0.6373004913330078, "learning_rate": 0.0005936033663493641, "loss": 3.662, "step": 4031 }, { "epoch": 0.2, "grad_norm": 0.5789350271224976, "learning_rate": 0.0005936002035690216, "loss": 3.4954, "step": 4032 }, { "epoch": 0.2, "grad_norm": 0.5490628480911255, "learning_rate": 0.0005935970400153927, "loss": 3.4452, "step": 4033 }, { "epoch": 0.2, "grad_norm": 0.611883282661438, "learning_rate": 0.0005935938756884857, "loss": 3.5924, "step": 4034 }, { "epoch": 0.2, "grad_norm": 0.5716962814331055, "learning_rate": 0.000593590710588309, "loss": 3.4642, "step": 4035 }, { "epoch": 0.2, "grad_norm": 0.5935686230659485, "learning_rate": 0.0005935875447148707, "loss": 3.3395, "step": 4036 }, { "epoch": 0.2, "grad_norm": 0.5877057909965515, "learning_rate": 0.0005935843780681795, "loss": 3.3124, "step": 4037 }, { "epoch": 0.2, "grad_norm": 0.6173941493034363, "learning_rate": 0.0005935812106482433, "loss": 3.606, "step": 4038 }, { "epoch": 0.2, "grad_norm": 0.5606823563575745, "learning_rate": 0.0005935780424550709, "loss": 3.6931, "step": 4039 }, { "epoch": 0.2, "grad_norm": 0.6052460074424744, "learning_rate": 0.0005935748734886704, "loss": 3.4013, "step": 4040 }, { "epoch": 0.2, "grad_norm": 0.586379885673523, "learning_rate": 0.0005935717037490501, "loss": 3.4105, "step": 4041 }, { "epoch": 0.2, "grad_norm": 0.5679966807365417, "learning_rate": 0.0005935685332362186, "loss": 3.5497, "step": 4042 }, { "epoch": 0.2, "grad_norm": 0.5713095664978027, "learning_rate": 0.000593565361950184, "loss": 3.4705, "step": 4043 }, { "epoch": 0.2, "grad_norm": 0.5471493005752563, "learning_rate": 0.0005935621898909547, "loss": 3.4439, "step": 4044 }, { "epoch": 0.2, "grad_norm": 0.5692678093910217, "learning_rate": 0.0005935590170585391, "loss": 3.4251, "step": 4045 }, { "epoch": 0.2, "grad_norm": 0.5359514951705933, "learning_rate": 0.0005935558434529456, "loss": 3.705, "step": 4046 }, { "epoch": 0.2, "grad_norm": 0.6026199460029602, "learning_rate": 0.0005935526690741824, "loss": 3.5045, "step": 4047 }, { "epoch": 0.2, "grad_norm": 0.5974529385566711, "learning_rate": 0.0005935494939222581, "loss": 3.3607, "step": 4048 }, { "epoch": 0.2, "grad_norm": 0.583665132522583, "learning_rate": 0.0005935463179971808, "loss": 3.7261, "step": 4049 }, { "epoch": 0.2, "grad_norm": 0.5661903619766235, "learning_rate": 0.0005935431412989591, "loss": 3.591, "step": 4050 }, { "epoch": 0.2, "grad_norm": 0.5681086778640747, "learning_rate": 0.0005935399638276012, "loss": 3.5113, "step": 4051 }, { "epoch": 0.2, "grad_norm": 0.5938650965690613, "learning_rate": 0.0005935367855831155, "loss": 3.4974, "step": 4052 }, { "epoch": 0.2, "grad_norm": 0.5689106583595276, "learning_rate": 0.0005935336065655104, "loss": 3.4858, "step": 4053 }, { "epoch": 0.2, "grad_norm": 0.552765965461731, "learning_rate": 0.0005935304267747943, "loss": 3.6311, "step": 4054 }, { "epoch": 0.2, "grad_norm": 0.5879870057106018, "learning_rate": 0.0005935272462109756, "loss": 3.4236, "step": 4055 }, { "epoch": 0.2, "grad_norm": 0.5558753609657288, "learning_rate": 0.0005935240648740625, "loss": 3.447, "step": 4056 }, { "epoch": 0.2, "grad_norm": 0.5875343084335327, "learning_rate": 0.0005935208827640636, "loss": 3.5598, "step": 4057 }, { "epoch": 0.2, "grad_norm": 0.580474853515625, "learning_rate": 0.0005935176998809873, "loss": 3.5608, "step": 4058 }, { "epoch": 0.2, "grad_norm": 0.5907713770866394, "learning_rate": 0.0005935145162248416, "loss": 3.2178, "step": 4059 }, { "epoch": 0.2, "grad_norm": 0.5516918301582336, "learning_rate": 0.0005935113317956353, "loss": 3.4183, "step": 4060 }, { "epoch": 0.2, "grad_norm": 0.5646939277648926, "learning_rate": 0.0005935081465933767, "loss": 3.4653, "step": 4061 }, { "epoch": 0.2, "grad_norm": 0.5440240502357483, "learning_rate": 0.000593504960618074, "loss": 3.7186, "step": 4062 }, { "epoch": 0.2, "grad_norm": 0.5458465218544006, "learning_rate": 0.0005935017738697357, "loss": 3.4928, "step": 4063 }, { "epoch": 0.2, "grad_norm": 0.6689546704292297, "learning_rate": 0.0005934985863483704, "loss": 3.3547, "step": 4064 }, { "epoch": 0.2, "grad_norm": 0.6123103499412537, "learning_rate": 0.0005934953980539861, "loss": 3.375, "step": 4065 }, { "epoch": 0.2, "grad_norm": 0.5664320588111877, "learning_rate": 0.0005934922089865915, "loss": 3.449, "step": 4066 }, { "epoch": 0.2, "grad_norm": 0.6381657719612122, "learning_rate": 0.0005934890191461949, "loss": 3.4441, "step": 4067 }, { "epoch": 0.2, "grad_norm": 0.5956173539161682, "learning_rate": 0.0005934858285328048, "loss": 3.6867, "step": 4068 }, { "epoch": 0.2, "grad_norm": 0.6595749855041504, "learning_rate": 0.0005934826371464294, "loss": 3.3421, "step": 4069 }, { "epoch": 0.2, "grad_norm": 0.5597966313362122, "learning_rate": 0.0005934794449870772, "loss": 3.5673, "step": 4070 }, { "epoch": 0.2, "grad_norm": 0.5948126912117004, "learning_rate": 0.0005934762520547566, "loss": 3.7263, "step": 4071 }, { "epoch": 0.2, "grad_norm": 0.5999248027801514, "learning_rate": 0.0005934730583494761, "loss": 3.4787, "step": 4072 }, { "epoch": 0.2, "grad_norm": 0.6411767601966858, "learning_rate": 0.0005934698638712441, "loss": 3.4928, "step": 4073 }, { "epoch": 0.2, "grad_norm": 0.5557070374488831, "learning_rate": 0.0005934666686200689, "loss": 3.5091, "step": 4074 }, { "epoch": 0.2, "grad_norm": 0.5832461714744568, "learning_rate": 0.0005934634725959589, "loss": 3.5301, "step": 4075 }, { "epoch": 0.2, "grad_norm": 0.5976592302322388, "learning_rate": 0.0005934602757989227, "loss": 3.4888, "step": 4076 }, { "epoch": 0.2, "grad_norm": 0.5408334732055664, "learning_rate": 0.0005934570782289686, "loss": 3.6005, "step": 4077 }, { "epoch": 0.2, "grad_norm": 0.5419313311576843, "learning_rate": 0.000593453879886105, "loss": 3.5205, "step": 4078 }, { "epoch": 0.2, "grad_norm": 0.6286296844482422, "learning_rate": 0.0005934506807703403, "loss": 3.4126, "step": 4079 }, { "epoch": 0.2, "grad_norm": 0.5880137085914612, "learning_rate": 0.0005934474808816831, "loss": 3.5638, "step": 4080 }, { "epoch": 0.2, "grad_norm": 0.5868574976921082, "learning_rate": 0.0005934442802201416, "loss": 3.4796, "step": 4081 }, { "epoch": 0.2, "grad_norm": 0.5653226375579834, "learning_rate": 0.0005934410787857245, "loss": 3.0373, "step": 4082 }, { "epoch": 0.2, "grad_norm": 0.5920324921607971, "learning_rate": 0.0005934378765784399, "loss": 3.5852, "step": 4083 }, { "epoch": 0.2, "grad_norm": 0.5790802836418152, "learning_rate": 0.0005934346735982964, "loss": 3.6043, "step": 4084 }, { "epoch": 0.2, "grad_norm": 0.5777429938316345, "learning_rate": 0.0005934314698453026, "loss": 3.3492, "step": 4085 }, { "epoch": 0.2, "grad_norm": 0.569974422454834, "learning_rate": 0.0005934282653194667, "loss": 3.4926, "step": 4086 }, { "epoch": 0.2, "grad_norm": 0.6177379488945007, "learning_rate": 0.0005934250600207972, "loss": 3.6035, "step": 4087 }, { "epoch": 0.2, "grad_norm": 0.5291566848754883, "learning_rate": 0.0005934218539493026, "loss": 3.5606, "step": 4088 }, { "epoch": 0.2, "grad_norm": 0.5401890873908997, "learning_rate": 0.0005934186471049913, "loss": 3.4682, "step": 4089 }, { "epoch": 0.2, "grad_norm": 0.5783869028091431, "learning_rate": 0.0005934154394878717, "loss": 3.4999, "step": 4090 }, { "epoch": 0.2, "grad_norm": 0.5341452360153198, "learning_rate": 0.0005934122310979524, "loss": 3.4623, "step": 4091 }, { "epoch": 0.2, "grad_norm": 0.5385696887969971, "learning_rate": 0.0005934090219352416, "loss": 3.6001, "step": 4092 }, { "epoch": 0.2, "grad_norm": 0.6240125298500061, "learning_rate": 0.000593405811999748, "loss": 3.3475, "step": 4093 }, { "epoch": 0.2, "grad_norm": 0.5452415347099304, "learning_rate": 0.00059340260129148, "loss": 3.4982, "step": 4094 }, { "epoch": 0.2, "grad_norm": 0.5494929552078247, "learning_rate": 0.000593399389810446, "loss": 3.4822, "step": 4095 }, { "epoch": 0.2, "grad_norm": 0.55720055103302, "learning_rate": 0.0005933961775566543, "loss": 3.4194, "step": 4096 }, { "epoch": 0.2, "grad_norm": 0.5275405049324036, "learning_rate": 0.0005933929645301138, "loss": 3.4621, "step": 4097 }, { "epoch": 0.2, "grad_norm": 0.559727668762207, "learning_rate": 0.0005933897507308325, "loss": 3.5763, "step": 4098 }, { "epoch": 0.2, "grad_norm": 0.5319221019744873, "learning_rate": 0.0005933865361588192, "loss": 3.3231, "step": 4099 }, { "epoch": 0.2, "grad_norm": 0.5701916217803955, "learning_rate": 0.0005933833208140822, "loss": 3.5887, "step": 4100 }, { "epoch": 0.2, "grad_norm": 0.6000038981437683, "learning_rate": 0.0005933801046966299, "loss": 3.5659, "step": 4101 }, { "epoch": 0.2, "grad_norm": 0.5542453527450562, "learning_rate": 0.0005933768878064709, "loss": 3.6189, "step": 4102 }, { "epoch": 0.2, "grad_norm": 0.5546266436576843, "learning_rate": 0.0005933736701436137, "loss": 3.4361, "step": 4103 }, { "epoch": 0.2, "grad_norm": 0.5334687232971191, "learning_rate": 0.0005933704517080666, "loss": 3.5126, "step": 4104 }, { "epoch": 0.2, "grad_norm": 0.5554494857788086, "learning_rate": 0.0005933672324998382, "loss": 3.291, "step": 4105 }, { "epoch": 0.2, "grad_norm": 0.5818085670471191, "learning_rate": 0.0005933640125189371, "loss": 3.2228, "step": 4106 }, { "epoch": 0.2, "grad_norm": 0.5691803097724915, "learning_rate": 0.0005933607917653715, "loss": 3.4671, "step": 4107 }, { "epoch": 0.2, "grad_norm": 0.5687941908836365, "learning_rate": 0.0005933575702391501, "loss": 3.3455, "step": 4108 }, { "epoch": 0.2, "grad_norm": 0.5801530480384827, "learning_rate": 0.0005933543479402814, "loss": 3.5252, "step": 4109 }, { "epoch": 0.2, "grad_norm": 0.5499988198280334, "learning_rate": 0.0005933511248687738, "loss": 3.4909, "step": 4110 }, { "epoch": 0.2, "grad_norm": 0.5364803671836853, "learning_rate": 0.0005933479010246357, "loss": 3.6644, "step": 4111 }, { "epoch": 0.2, "grad_norm": 0.5526125431060791, "learning_rate": 0.0005933446764078758, "loss": 3.3869, "step": 4112 }, { "epoch": 0.2, "grad_norm": 0.5705166459083557, "learning_rate": 0.0005933414510185023, "loss": 3.4363, "step": 4113 }, { "epoch": 0.2, "grad_norm": 0.6104483008384705, "learning_rate": 0.0005933382248565239, "loss": 3.4521, "step": 4114 }, { "epoch": 0.2, "grad_norm": 0.6703182458877563, "learning_rate": 0.0005933349979219492, "loss": 3.538, "step": 4115 }, { "epoch": 0.2, "grad_norm": 0.5792509913444519, "learning_rate": 0.0005933317702147865, "loss": 3.4318, "step": 4116 }, { "epoch": 0.2, "grad_norm": 0.5312807559967041, "learning_rate": 0.0005933285417350444, "loss": 3.477, "step": 4117 }, { "epoch": 0.2, "grad_norm": 0.5642654895782471, "learning_rate": 0.0005933253124827313, "loss": 3.5236, "step": 4118 }, { "epoch": 0.2, "grad_norm": 0.5508056879043579, "learning_rate": 0.0005933220824578559, "loss": 3.6048, "step": 4119 }, { "epoch": 0.2, "grad_norm": 0.5829192399978638, "learning_rate": 0.0005933188516604264, "loss": 3.4721, "step": 4120 }, { "epoch": 0.2, "grad_norm": 0.5596129298210144, "learning_rate": 0.0005933156200904516, "loss": 3.2922, "step": 4121 }, { "epoch": 0.2, "grad_norm": 0.5868514776229858, "learning_rate": 0.00059331238774794, "loss": 3.4381, "step": 4122 }, { "epoch": 0.2, "grad_norm": 0.5288069248199463, "learning_rate": 0.0005933091546328999, "loss": 3.3479, "step": 4123 }, { "epoch": 0.2, "grad_norm": 0.5082468390464783, "learning_rate": 0.00059330592074534, "loss": 3.5534, "step": 4124 }, { "epoch": 0.2, "grad_norm": 0.5732336640357971, "learning_rate": 0.0005933026860852688, "loss": 3.3907, "step": 4125 }, { "epoch": 0.2, "grad_norm": 0.5370706915855408, "learning_rate": 0.0005932994506526947, "loss": 3.6477, "step": 4126 }, { "epoch": 0.2, "grad_norm": 0.5239009857177734, "learning_rate": 0.0005932962144476262, "loss": 3.6473, "step": 4127 }, { "epoch": 0.2, "grad_norm": 0.566159725189209, "learning_rate": 0.000593292977470072, "loss": 3.687, "step": 4128 }, { "epoch": 0.2, "grad_norm": 0.5577471256256104, "learning_rate": 0.0005932897397200405, "loss": 3.5333, "step": 4129 }, { "epoch": 0.2, "grad_norm": 0.6108332276344299, "learning_rate": 0.0005932865011975403, "loss": 3.4125, "step": 4130 }, { "epoch": 0.2, "grad_norm": 0.5488579869270325, "learning_rate": 0.00059328326190258, "loss": 3.4491, "step": 4131 }, { "epoch": 0.2, "grad_norm": 0.522066056728363, "learning_rate": 0.000593280021835168, "loss": 3.4321, "step": 4132 }, { "epoch": 0.2, "grad_norm": 0.5913230776786804, "learning_rate": 0.0005932767809953128, "loss": 3.6254, "step": 4133 }, { "epoch": 0.2, "grad_norm": 0.5286851525306702, "learning_rate": 0.0005932735393830229, "loss": 3.493, "step": 4134 }, { "epoch": 0.2, "grad_norm": 0.5211114883422852, "learning_rate": 0.0005932702969983071, "loss": 3.7179, "step": 4135 }, { "epoch": 0.2, "grad_norm": 0.5448617339134216, "learning_rate": 0.0005932670538411737, "loss": 3.5312, "step": 4136 }, { "epoch": 0.2, "grad_norm": 0.5416504740715027, "learning_rate": 0.0005932638099116314, "loss": 3.523, "step": 4137 }, { "epoch": 0.2, "grad_norm": 0.5332238078117371, "learning_rate": 0.0005932605652096887, "loss": 3.5286, "step": 4138 }, { "epoch": 0.2, "grad_norm": 0.5574766993522644, "learning_rate": 0.0005932573197353539, "loss": 3.2139, "step": 4139 }, { "epoch": 0.2, "grad_norm": 0.5327970385551453, "learning_rate": 0.000593254073488636, "loss": 3.4693, "step": 4140 }, { "epoch": 0.2, "grad_norm": 0.5722629427909851, "learning_rate": 0.0005932508264695431, "loss": 3.429, "step": 4141 }, { "epoch": 0.2, "grad_norm": 0.5846360921859741, "learning_rate": 0.0005932475786780841, "loss": 3.4747, "step": 4142 }, { "epoch": 0.2, "grad_norm": 0.5634985566139221, "learning_rate": 0.0005932443301142673, "loss": 3.4314, "step": 4143 }, { "epoch": 0.2, "grad_norm": 0.5535609722137451, "learning_rate": 0.0005932410807781015, "loss": 3.4342, "step": 4144 }, { "epoch": 0.2, "grad_norm": 0.5832380652427673, "learning_rate": 0.0005932378306695951, "loss": 3.2958, "step": 4145 }, { "epoch": 0.2, "grad_norm": 0.5615767240524292, "learning_rate": 0.0005932345797887566, "loss": 3.4979, "step": 4146 }, { "epoch": 0.2, "grad_norm": 0.5781761407852173, "learning_rate": 0.0005932313281355947, "loss": 3.5425, "step": 4147 }, { "epoch": 0.2, "grad_norm": 0.5604632496833801, "learning_rate": 0.000593228075710118, "loss": 3.5805, "step": 4148 }, { "epoch": 0.2, "grad_norm": 0.6127885580062866, "learning_rate": 0.0005932248225123349, "loss": 3.4723, "step": 4149 }, { "epoch": 0.2, "grad_norm": 0.5799558162689209, "learning_rate": 0.000593221568542254, "loss": 3.4249, "step": 4150 }, { "epoch": 0.2, "grad_norm": 0.5683193802833557, "learning_rate": 0.0005932183137998839, "loss": 3.5735, "step": 4151 }, { "epoch": 0.2, "grad_norm": 0.5393085479736328, "learning_rate": 0.0005932150582852333, "loss": 3.3668, "step": 4152 }, { "epoch": 0.2, "grad_norm": 0.6012470126152039, "learning_rate": 0.0005932118019983107, "loss": 3.3904, "step": 4153 }, { "epoch": 0.2, "grad_norm": 0.5704240798950195, "learning_rate": 0.0005932085449391245, "loss": 3.4991, "step": 4154 }, { "epoch": 0.2, "grad_norm": 0.6046105623245239, "learning_rate": 0.0005932052871076835, "loss": 3.531, "step": 4155 }, { "epoch": 0.2, "grad_norm": 0.5393555760383606, "learning_rate": 0.0005932020285039963, "loss": 3.6832, "step": 4156 }, { "epoch": 0.2, "grad_norm": 0.5603122115135193, "learning_rate": 0.0005931987691280712, "loss": 3.5048, "step": 4157 }, { "epoch": 0.2, "grad_norm": 0.5598936676979065, "learning_rate": 0.0005931955089799172, "loss": 3.4841, "step": 4158 }, { "epoch": 0.2, "grad_norm": 0.6121480464935303, "learning_rate": 0.0005931922480595424, "loss": 3.4819, "step": 4159 }, { "epoch": 0.2, "grad_norm": 0.5565993189811707, "learning_rate": 0.0005931889863669558, "loss": 3.5906, "step": 4160 }, { "epoch": 0.2, "grad_norm": 0.5580215454101562, "learning_rate": 0.0005931857239021657, "loss": 3.4656, "step": 4161 }, { "epoch": 0.2, "grad_norm": 0.604782223701477, "learning_rate": 0.000593182460665181, "loss": 3.3578, "step": 4162 }, { "epoch": 0.2, "grad_norm": 0.5579813122749329, "learning_rate": 0.00059317919665601, "loss": 3.5514, "step": 4163 }, { "epoch": 0.2, "grad_norm": 0.5879456400871277, "learning_rate": 0.0005931759318746615, "loss": 3.5933, "step": 4164 }, { "epoch": 0.2, "grad_norm": 0.5648056268692017, "learning_rate": 0.000593172666321144, "loss": 3.4556, "step": 4165 }, { "epoch": 0.2, "grad_norm": 0.6041473150253296, "learning_rate": 0.0005931693999954661, "loss": 3.6027, "step": 4166 }, { "epoch": 0.2, "grad_norm": 0.5803984999656677, "learning_rate": 0.0005931661328976365, "loss": 3.3593, "step": 4167 }, { "epoch": 0.2, "grad_norm": 0.5941168665885925, "learning_rate": 0.0005931628650276636, "loss": 3.4784, "step": 4168 }, { "epoch": 0.2, "grad_norm": 0.5952200293540955, "learning_rate": 0.0005931595963855562, "loss": 3.4204, "step": 4169 }, { "epoch": 0.2, "grad_norm": 0.5529075860977173, "learning_rate": 0.0005931563269713228, "loss": 3.5687, "step": 4170 }, { "epoch": 0.2, "grad_norm": 0.5690220594406128, "learning_rate": 0.000593153056784972, "loss": 3.5338, "step": 4171 }, { "epoch": 0.2, "grad_norm": 0.6204742789268494, "learning_rate": 0.0005931497858265126, "loss": 3.4925, "step": 4172 }, { "epoch": 0.2, "grad_norm": 0.6174020171165466, "learning_rate": 0.000593146514095953, "loss": 3.4493, "step": 4173 }, { "epoch": 0.2, "grad_norm": 0.5436307191848755, "learning_rate": 0.000593143241593302, "loss": 3.5383, "step": 4174 }, { "epoch": 0.2, "grad_norm": 0.5812988877296448, "learning_rate": 0.0005931399683185679, "loss": 3.265, "step": 4175 }, { "epoch": 0.2, "grad_norm": 0.6355542540550232, "learning_rate": 0.0005931366942717597, "loss": 3.4879, "step": 4176 }, { "epoch": 0.2, "grad_norm": 0.5769966840744019, "learning_rate": 0.0005931334194528859, "loss": 3.4083, "step": 4177 }, { "epoch": 0.2, "grad_norm": 0.5602624416351318, "learning_rate": 0.000593130143861955, "loss": 3.4455, "step": 4178 }, { "epoch": 0.2, "grad_norm": 0.5522497892379761, "learning_rate": 0.0005931268674989758, "loss": 3.589, "step": 4179 }, { "epoch": 0.2, "grad_norm": 0.6143437623977661, "learning_rate": 0.0005931235903639568, "loss": 3.39, "step": 4180 }, { "epoch": 0.2, "grad_norm": 0.5582066774368286, "learning_rate": 0.0005931203124569066, "loss": 3.2465, "step": 4181 }, { "epoch": 0.2, "grad_norm": 0.5512022972106934, "learning_rate": 0.000593117033777834, "loss": 3.3506, "step": 4182 }, { "epoch": 0.2, "grad_norm": 0.5688873529434204, "learning_rate": 0.0005931137543267476, "loss": 3.6862, "step": 4183 }, { "epoch": 0.21, "grad_norm": 0.5612702369689941, "learning_rate": 0.0005931104741036558, "loss": 3.4739, "step": 4184 }, { "epoch": 0.21, "grad_norm": 0.5855376720428467, "learning_rate": 0.0005931071931085676, "loss": 3.4008, "step": 4185 }, { "epoch": 0.21, "grad_norm": 0.5829062461853027, "learning_rate": 0.0005931039113414914, "loss": 3.3817, "step": 4186 }, { "epoch": 0.21, "grad_norm": 0.5704607963562012, "learning_rate": 0.0005931006288024358, "loss": 3.5417, "step": 4187 }, { "epoch": 0.21, "grad_norm": 0.5722986459732056, "learning_rate": 0.0005930973454914097, "loss": 3.6125, "step": 4188 }, { "epoch": 0.21, "grad_norm": 0.5302977561950684, "learning_rate": 0.0005930940614084216, "loss": 3.2082, "step": 4189 }, { "epoch": 0.21, "grad_norm": 0.5311842560768127, "learning_rate": 0.00059309077655348, "loss": 3.4686, "step": 4190 }, { "epoch": 0.21, "grad_norm": 0.5523838996887207, "learning_rate": 0.0005930874909265938, "loss": 3.525, "step": 4191 }, { "epoch": 0.21, "grad_norm": 0.6022278070449829, "learning_rate": 0.0005930842045277716, "loss": 3.3398, "step": 4192 }, { "epoch": 0.21, "grad_norm": 0.588112473487854, "learning_rate": 0.000593080917357022, "loss": 3.4084, "step": 4193 }, { "epoch": 0.21, "grad_norm": 0.5542445182800293, "learning_rate": 0.0005930776294143536, "loss": 3.2733, "step": 4194 }, { "epoch": 0.21, "grad_norm": 0.6074942350387573, "learning_rate": 0.0005930743406997752, "loss": 3.5434, "step": 4195 }, { "epoch": 0.21, "grad_norm": 0.5761960744857788, "learning_rate": 0.0005930710512132954, "loss": 3.6245, "step": 4196 }, { "epoch": 0.21, "grad_norm": 0.5809390544891357, "learning_rate": 0.0005930677609549227, "loss": 3.5375, "step": 4197 }, { "epoch": 0.21, "grad_norm": 0.572761595249176, "learning_rate": 0.000593064469924666, "loss": 3.5079, "step": 4198 }, { "epoch": 0.21, "grad_norm": 0.5556721687316895, "learning_rate": 0.000593061178122534, "loss": 3.4619, "step": 4199 }, { "epoch": 0.21, "grad_norm": 0.5726044178009033, "learning_rate": 0.0005930578855485351, "loss": 3.3863, "step": 4200 }, { "epoch": 0.21, "grad_norm": 0.5410187840461731, "learning_rate": 0.0005930545922026783, "loss": 3.3838, "step": 4201 }, { "epoch": 0.21, "grad_norm": 0.5719176530838013, "learning_rate": 0.0005930512980849719, "loss": 3.6133, "step": 4202 }, { "epoch": 0.21, "grad_norm": 0.5681977272033691, "learning_rate": 0.0005930480031954249, "loss": 3.5757, "step": 4203 }, { "epoch": 0.21, "grad_norm": 0.5701371431350708, "learning_rate": 0.0005930447075340458, "loss": 3.2612, "step": 4204 }, { "epoch": 0.21, "grad_norm": 0.8378312587738037, "learning_rate": 0.0005930414111008435, "loss": 3.5506, "step": 4205 }, { "epoch": 0.21, "grad_norm": 0.6150882840156555, "learning_rate": 0.0005930381138958263, "loss": 3.4457, "step": 4206 }, { "epoch": 0.21, "grad_norm": 0.5677423477172852, "learning_rate": 0.0005930348159190031, "loss": 3.3982, "step": 4207 }, { "epoch": 0.21, "grad_norm": 0.6335375308990479, "learning_rate": 0.0005930315171703827, "loss": 3.3809, "step": 4208 }, { "epoch": 0.21, "grad_norm": 0.5192340612411499, "learning_rate": 0.0005930282176499738, "loss": 3.4675, "step": 4209 }, { "epoch": 0.21, "grad_norm": 0.5766927599906921, "learning_rate": 0.0005930249173577848, "loss": 3.5231, "step": 4210 }, { "epoch": 0.21, "grad_norm": 0.5748723149299622, "learning_rate": 0.0005930216162938246, "loss": 3.4761, "step": 4211 }, { "epoch": 0.21, "grad_norm": 0.549691379070282, "learning_rate": 0.0005930183144581019, "loss": 3.5352, "step": 4212 }, { "epoch": 0.21, "grad_norm": 0.5617498755455017, "learning_rate": 0.0005930150118506253, "loss": 3.5251, "step": 4213 }, { "epoch": 0.21, "grad_norm": 0.5673946738243103, "learning_rate": 0.0005930117084714036, "loss": 3.4009, "step": 4214 }, { "epoch": 0.21, "grad_norm": 0.5976900458335876, "learning_rate": 0.0005930084043204454, "loss": 3.5733, "step": 4215 }, { "epoch": 0.21, "grad_norm": 0.5517022013664246, "learning_rate": 0.0005930050993977594, "loss": 3.4349, "step": 4216 }, { "epoch": 0.21, "grad_norm": 0.6215957999229431, "learning_rate": 0.0005930017937033545, "loss": 3.7329, "step": 4217 }, { "epoch": 0.21, "grad_norm": 0.5812935829162598, "learning_rate": 0.0005929984872372391, "loss": 3.5745, "step": 4218 }, { "epoch": 0.21, "grad_norm": 0.5533018112182617, "learning_rate": 0.0005929951799994222, "loss": 3.5439, "step": 4219 }, { "epoch": 0.21, "grad_norm": 0.556189775466919, "learning_rate": 0.0005929918719899123, "loss": 3.6134, "step": 4220 }, { "epoch": 0.21, "grad_norm": 0.5521323084831238, "learning_rate": 0.0005929885632087183, "loss": 3.3643, "step": 4221 }, { "epoch": 0.21, "grad_norm": 0.6353476643562317, "learning_rate": 0.0005929852536558487, "loss": 3.2529, "step": 4222 }, { "epoch": 0.21, "grad_norm": 0.6318731904029846, "learning_rate": 0.0005929819433313124, "loss": 3.5494, "step": 4223 }, { "epoch": 0.21, "grad_norm": 0.5776653289794922, "learning_rate": 0.0005929786322351181, "loss": 3.4537, "step": 4224 }, { "epoch": 0.21, "grad_norm": 0.5921760201454163, "learning_rate": 0.0005929753203672743, "loss": 3.5448, "step": 4225 }, { "epoch": 0.21, "grad_norm": 0.5609504580497742, "learning_rate": 0.00059297200772779, "loss": 3.4304, "step": 4226 }, { "epoch": 0.21, "grad_norm": 0.5797039866447449, "learning_rate": 0.0005929686943166738, "loss": 3.4262, "step": 4227 }, { "epoch": 0.21, "grad_norm": 0.578444242477417, "learning_rate": 0.0005929653801339344, "loss": 3.2239, "step": 4228 }, { "epoch": 0.21, "grad_norm": 0.5497398376464844, "learning_rate": 0.0005929620651795806, "loss": 3.4644, "step": 4229 }, { "epoch": 0.21, "grad_norm": 0.5600916743278503, "learning_rate": 0.0005929587494536212, "loss": 3.4196, "step": 4230 }, { "epoch": 0.21, "grad_norm": 0.5911729335784912, "learning_rate": 0.0005929554329560647, "loss": 3.2735, "step": 4231 }, { "epoch": 0.21, "grad_norm": 0.5636935234069824, "learning_rate": 0.00059295211568692, "loss": 3.4634, "step": 4232 }, { "epoch": 0.21, "grad_norm": 0.5678104758262634, "learning_rate": 0.0005929487976461959, "loss": 3.5156, "step": 4233 }, { "epoch": 0.21, "grad_norm": 0.5603824853897095, "learning_rate": 0.0005929454788339009, "loss": 3.3868, "step": 4234 }, { "epoch": 0.21, "grad_norm": 0.5836139917373657, "learning_rate": 0.000592942159250044, "loss": 3.5746, "step": 4235 }, { "epoch": 0.21, "grad_norm": 0.7062385678291321, "learning_rate": 0.0005929388388946338, "loss": 3.3934, "step": 4236 }, { "epoch": 0.21, "grad_norm": 0.5844118595123291, "learning_rate": 0.000592935517767679, "loss": 3.494, "step": 4237 }, { "epoch": 0.21, "grad_norm": 0.5271828174591064, "learning_rate": 0.0005929321958691885, "loss": 3.4183, "step": 4238 }, { "epoch": 0.21, "grad_norm": 0.528325617313385, "learning_rate": 0.000592928873199171, "loss": 3.3322, "step": 4239 }, { "epoch": 0.21, "grad_norm": 0.5398619174957275, "learning_rate": 0.0005929255497576353, "loss": 3.4529, "step": 4240 }, { "epoch": 0.21, "grad_norm": 0.5449317693710327, "learning_rate": 0.0005929222255445899, "loss": 3.6722, "step": 4241 }, { "epoch": 0.21, "grad_norm": 0.5887119770050049, "learning_rate": 0.0005929189005600438, "loss": 3.5835, "step": 4242 }, { "epoch": 0.21, "grad_norm": 0.5551977753639221, "learning_rate": 0.0005929155748040057, "loss": 3.3044, "step": 4243 }, { "epoch": 0.21, "grad_norm": 0.5693026781082153, "learning_rate": 0.0005929122482764844, "loss": 3.3308, "step": 4244 }, { "epoch": 0.21, "grad_norm": 0.6203973293304443, "learning_rate": 0.0005929089209774883, "loss": 3.5217, "step": 4245 }, { "epoch": 0.21, "grad_norm": 0.6212824583053589, "learning_rate": 0.0005929055929070269, "loss": 3.4606, "step": 4246 }, { "epoch": 0.21, "grad_norm": 0.5740115642547607, "learning_rate": 0.0005929022640651083, "loss": 3.4078, "step": 4247 }, { "epoch": 0.21, "grad_norm": 0.5441243052482605, "learning_rate": 0.0005928989344517415, "loss": 3.4294, "step": 4248 }, { "epoch": 0.21, "grad_norm": 0.6401486396789551, "learning_rate": 0.0005928956040669353, "loss": 3.4507, "step": 4249 }, { "epoch": 0.21, "grad_norm": 0.5190750360488892, "learning_rate": 0.0005928922729106986, "loss": 3.4062, "step": 4250 }, { "epoch": 0.21, "grad_norm": 0.5887166857719421, "learning_rate": 0.0005928889409830398, "loss": 3.5247, "step": 4251 }, { "epoch": 0.21, "grad_norm": 0.5587601065635681, "learning_rate": 0.0005928856082839681, "loss": 3.462, "step": 4252 }, { "epoch": 0.21, "grad_norm": 0.5775784254074097, "learning_rate": 0.0005928822748134919, "loss": 3.5834, "step": 4253 }, { "epoch": 0.21, "grad_norm": 0.5799462795257568, "learning_rate": 0.0005928789405716203, "loss": 3.524, "step": 4254 }, { "epoch": 0.21, "grad_norm": 0.6155045628547668, "learning_rate": 0.0005928756055583619, "loss": 3.4856, "step": 4255 }, { "epoch": 0.21, "grad_norm": 0.5401473641395569, "learning_rate": 0.0005928722697737254, "loss": 3.4083, "step": 4256 }, { "epoch": 0.21, "grad_norm": 0.5645717978477478, "learning_rate": 0.0005928689332177199, "loss": 3.6458, "step": 4257 }, { "epoch": 0.21, "grad_norm": 0.5968051552772522, "learning_rate": 0.0005928655958903538, "loss": 3.6914, "step": 4258 }, { "epoch": 0.21, "grad_norm": 0.5574849843978882, "learning_rate": 0.0005928622577916362, "loss": 3.4648, "step": 4259 }, { "epoch": 0.21, "grad_norm": 0.5590183138847351, "learning_rate": 0.0005928589189215757, "loss": 3.5068, "step": 4260 }, { "epoch": 0.21, "grad_norm": 0.5854844450950623, "learning_rate": 0.0005928555792801812, "loss": 3.4404, "step": 4261 }, { "epoch": 0.21, "grad_norm": 0.5542050004005432, "learning_rate": 0.0005928522388674616, "loss": 3.7172, "step": 4262 }, { "epoch": 0.21, "grad_norm": 0.5726750493049622, "learning_rate": 0.0005928488976834254, "loss": 3.5171, "step": 4263 }, { "epoch": 0.21, "grad_norm": 0.5503663420677185, "learning_rate": 0.0005928455557280815, "loss": 3.5306, "step": 4264 }, { "epoch": 0.21, "grad_norm": 0.5870798826217651, "learning_rate": 0.000592842213001439, "loss": 3.416, "step": 4265 }, { "epoch": 0.21, "grad_norm": 0.5509695410728455, "learning_rate": 0.0005928388695035064, "loss": 3.6081, "step": 4266 }, { "epoch": 0.21, "grad_norm": 0.578829824924469, "learning_rate": 0.0005928355252342925, "loss": 3.4515, "step": 4267 }, { "epoch": 0.21, "grad_norm": 0.6161765456199646, "learning_rate": 0.0005928321801938061, "loss": 3.5032, "step": 4268 }, { "epoch": 0.21, "grad_norm": 0.5503228306770325, "learning_rate": 0.0005928288343820563, "loss": 3.3577, "step": 4269 }, { "epoch": 0.21, "grad_norm": 0.550617516040802, "learning_rate": 0.0005928254877990515, "loss": 3.2324, "step": 4270 }, { "epoch": 0.21, "grad_norm": 0.5668909549713135, "learning_rate": 0.0005928221404448008, "loss": 3.431, "step": 4271 }, { "epoch": 0.21, "grad_norm": 0.5765318274497986, "learning_rate": 0.000592818792319313, "loss": 3.5756, "step": 4272 }, { "epoch": 0.21, "grad_norm": 0.6077347993850708, "learning_rate": 0.0005928154434225968, "loss": 3.5527, "step": 4273 }, { "epoch": 0.21, "grad_norm": 0.5627986788749695, "learning_rate": 0.0005928120937546611, "loss": 3.8691, "step": 4274 }, { "epoch": 0.21, "grad_norm": 0.5574711561203003, "learning_rate": 0.0005928087433155147, "loss": 3.5428, "step": 4275 }, { "epoch": 0.21, "grad_norm": 0.5420995950698853, "learning_rate": 0.0005928053921051664, "loss": 3.4421, "step": 4276 }, { "epoch": 0.21, "grad_norm": 0.6338256001472473, "learning_rate": 0.000592802040123625, "loss": 3.3063, "step": 4277 }, { "epoch": 0.21, "grad_norm": 0.572799026966095, "learning_rate": 0.0005927986873708995, "loss": 3.4216, "step": 4278 }, { "epoch": 0.21, "grad_norm": 0.5351685285568237, "learning_rate": 0.0005927953338469986, "loss": 3.391, "step": 4279 }, { "epoch": 0.21, "grad_norm": 0.5662745237350464, "learning_rate": 0.0005927919795519311, "loss": 3.5054, "step": 4280 }, { "epoch": 0.21, "grad_norm": 0.5973455905914307, "learning_rate": 0.0005927886244857058, "loss": 3.5656, "step": 4281 }, { "epoch": 0.21, "grad_norm": 0.5676031112670898, "learning_rate": 0.0005927852686483317, "loss": 3.2299, "step": 4282 }, { "epoch": 0.21, "grad_norm": 0.5569466352462769, "learning_rate": 0.0005927819120398175, "loss": 3.4992, "step": 4283 }, { "epoch": 0.21, "grad_norm": 0.5708233714103699, "learning_rate": 0.0005927785546601721, "loss": 3.5776, "step": 4284 }, { "epoch": 0.21, "grad_norm": 0.5976619124412537, "learning_rate": 0.0005927751965094044, "loss": 3.3507, "step": 4285 }, { "epoch": 0.21, "grad_norm": 0.5370445847511292, "learning_rate": 0.0005927718375875231, "loss": 3.3449, "step": 4286 }, { "epoch": 0.21, "grad_norm": 0.5878073573112488, "learning_rate": 0.0005927684778945371, "loss": 3.4856, "step": 4287 }, { "epoch": 0.21, "grad_norm": 0.5714808702468872, "learning_rate": 0.0005927651174304553, "loss": 3.6395, "step": 4288 }, { "epoch": 0.21, "grad_norm": 0.5233124494552612, "learning_rate": 0.0005927617561952866, "loss": 3.4073, "step": 4289 }, { "epoch": 0.21, "grad_norm": 0.5612361431121826, "learning_rate": 0.0005927583941890398, "loss": 3.3293, "step": 4290 }, { "epoch": 0.21, "grad_norm": 0.5414638519287109, "learning_rate": 0.0005927550314117235, "loss": 3.4124, "step": 4291 }, { "epoch": 0.21, "grad_norm": 0.586334764957428, "learning_rate": 0.0005927516678633471, "loss": 3.387, "step": 4292 }, { "epoch": 0.21, "grad_norm": 0.5697146654129028, "learning_rate": 0.000592748303543919, "loss": 3.6256, "step": 4293 }, { "epoch": 0.21, "grad_norm": 0.5857806205749512, "learning_rate": 0.0005927449384534482, "loss": 3.5125, "step": 4294 }, { "epoch": 0.21, "grad_norm": 0.5624878406524658, "learning_rate": 0.0005927415725919435, "loss": 3.5866, "step": 4295 }, { "epoch": 0.21, "grad_norm": 0.5444654226303101, "learning_rate": 0.0005927382059594139, "loss": 3.6301, "step": 4296 }, { "epoch": 0.21, "grad_norm": 0.5403390526771545, "learning_rate": 0.0005927348385558682, "loss": 3.4554, "step": 4297 }, { "epoch": 0.21, "grad_norm": 0.5376538634300232, "learning_rate": 0.0005927314703813154, "loss": 3.464, "step": 4298 }, { "epoch": 0.21, "grad_norm": 0.5038751363754272, "learning_rate": 0.000592728101435764, "loss": 3.5199, "step": 4299 }, { "epoch": 0.21, "grad_norm": 0.5603412389755249, "learning_rate": 0.0005927247317192233, "loss": 3.2781, "step": 4300 }, { "epoch": 0.21, "grad_norm": 0.5391646027565002, "learning_rate": 0.0005927213612317019, "loss": 3.533, "step": 4301 }, { "epoch": 0.21, "grad_norm": 0.6210039258003235, "learning_rate": 0.0005927179899732088, "loss": 3.4641, "step": 4302 }, { "epoch": 0.21, "grad_norm": 0.5959204435348511, "learning_rate": 0.0005927146179437529, "loss": 3.5338, "step": 4303 }, { "epoch": 0.21, "grad_norm": 0.53571617603302, "learning_rate": 0.000592711245143343, "loss": 3.5324, "step": 4304 }, { "epoch": 0.21, "grad_norm": 0.5784791707992554, "learning_rate": 0.000592707871571988, "loss": 3.6262, "step": 4305 }, { "epoch": 0.21, "grad_norm": 0.5615180134773254, "learning_rate": 0.0005927044972296969, "loss": 3.4118, "step": 4306 }, { "epoch": 0.21, "grad_norm": 0.5299209952354431, "learning_rate": 0.0005927011221164783, "loss": 3.7879, "step": 4307 }, { "epoch": 0.21, "grad_norm": 0.5538467168807983, "learning_rate": 0.0005926977462323414, "loss": 3.3704, "step": 4308 }, { "epoch": 0.21, "grad_norm": 0.6038463711738586, "learning_rate": 0.0005926943695772949, "loss": 3.517, "step": 4309 }, { "epoch": 0.21, "grad_norm": 0.5968214869499207, "learning_rate": 0.0005926909921513477, "loss": 3.4321, "step": 4310 }, { "epoch": 0.21, "grad_norm": 0.5371940732002258, "learning_rate": 0.0005926876139545089, "loss": 3.4314, "step": 4311 }, { "epoch": 0.21, "grad_norm": 0.6479960680007935, "learning_rate": 0.0005926842349867873, "loss": 3.4106, "step": 4312 }, { "epoch": 0.21, "grad_norm": 0.5392265319824219, "learning_rate": 0.0005926808552481917, "loss": 3.6161, "step": 4313 }, { "epoch": 0.21, "grad_norm": 0.572577714920044, "learning_rate": 0.000592677474738731, "loss": 3.5508, "step": 4314 }, { "epoch": 0.21, "grad_norm": 0.5596196055412292, "learning_rate": 0.0005926740934584141, "loss": 3.5483, "step": 4315 }, { "epoch": 0.21, "grad_norm": 0.5759965777397156, "learning_rate": 0.0005926707114072501, "loss": 3.411, "step": 4316 }, { "epoch": 0.21, "grad_norm": 0.53867506980896, "learning_rate": 0.0005926673285852477, "loss": 3.5226, "step": 4317 }, { "epoch": 0.21, "grad_norm": 0.5533181428909302, "learning_rate": 0.0005926639449924158, "loss": 3.5157, "step": 4318 }, { "epoch": 0.21, "grad_norm": 0.5320466756820679, "learning_rate": 0.0005926605606287635, "loss": 3.6545, "step": 4319 }, { "epoch": 0.21, "grad_norm": 0.5725945234298706, "learning_rate": 0.0005926571754942996, "loss": 3.4898, "step": 4320 }, { "epoch": 0.21, "grad_norm": 0.633961021900177, "learning_rate": 0.000592653789589033, "loss": 3.4104, "step": 4321 }, { "epoch": 0.21, "grad_norm": 0.5438200831413269, "learning_rate": 0.0005926504029129726, "loss": 3.4986, "step": 4322 }, { "epoch": 0.21, "grad_norm": 0.5423779487609863, "learning_rate": 0.0005926470154661275, "loss": 3.2405, "step": 4323 }, { "epoch": 0.21, "grad_norm": 0.5243359804153442, "learning_rate": 0.0005926436272485064, "loss": 3.6668, "step": 4324 }, { "epoch": 0.21, "grad_norm": 0.5646772384643555, "learning_rate": 0.0005926402382601183, "loss": 3.2187, "step": 4325 }, { "epoch": 0.21, "grad_norm": 0.5363373756408691, "learning_rate": 0.0005926368485009721, "loss": 3.6333, "step": 4326 }, { "epoch": 0.21, "grad_norm": 0.5637429356575012, "learning_rate": 0.0005926334579710768, "loss": 3.7082, "step": 4327 }, { "epoch": 0.21, "grad_norm": 0.5417162775993347, "learning_rate": 0.0005926300666704413, "loss": 3.6215, "step": 4328 }, { "epoch": 0.21, "grad_norm": 0.5589267015457153, "learning_rate": 0.0005926266745990745, "loss": 3.4844, "step": 4329 }, { "epoch": 0.21, "grad_norm": 0.5703160762786865, "learning_rate": 0.0005926232817569853, "loss": 3.4402, "step": 4330 }, { "epoch": 0.21, "grad_norm": 0.5476822257041931, "learning_rate": 0.0005926198881441828, "loss": 3.4692, "step": 4331 }, { "epoch": 0.21, "grad_norm": 0.5585446357727051, "learning_rate": 0.0005926164937606758, "loss": 3.5845, "step": 4332 }, { "epoch": 0.21, "grad_norm": 0.540649950504303, "learning_rate": 0.0005926130986064733, "loss": 3.4894, "step": 4333 }, { "epoch": 0.21, "grad_norm": 0.5446892976760864, "learning_rate": 0.0005926097026815842, "loss": 3.4582, "step": 4334 }, { "epoch": 0.21, "grad_norm": 0.5253145694732666, "learning_rate": 0.0005926063059860173, "loss": 3.4895, "step": 4335 }, { "epoch": 0.21, "grad_norm": 0.5359606742858887, "learning_rate": 0.0005926029085197819, "loss": 3.4371, "step": 4336 }, { "epoch": 0.21, "grad_norm": 0.5448620915412903, "learning_rate": 0.0005925995102828867, "loss": 3.4806, "step": 4337 }, { "epoch": 0.21, "grad_norm": 0.5616055130958557, "learning_rate": 0.0005925961112753406, "loss": 3.3458, "step": 4338 }, { "epoch": 0.21, "grad_norm": 0.584311842918396, "learning_rate": 0.0005925927114971527, "loss": 3.4402, "step": 4339 }, { "epoch": 0.21, "grad_norm": 0.5829342603683472, "learning_rate": 0.000592589310948332, "loss": 3.4649, "step": 4340 }, { "epoch": 0.21, "grad_norm": 0.5744354128837585, "learning_rate": 0.0005925859096288874, "loss": 3.5193, "step": 4341 }, { "epoch": 0.21, "grad_norm": 0.5684018731117249, "learning_rate": 0.0005925825075388277, "loss": 3.4638, "step": 4342 }, { "epoch": 0.21, "grad_norm": 0.5490890741348267, "learning_rate": 0.000592579104678162, "loss": 3.593, "step": 4343 }, { "epoch": 0.21, "grad_norm": 0.6014746427536011, "learning_rate": 0.0005925757010468993, "loss": 3.2675, "step": 4344 }, { "epoch": 0.21, "grad_norm": 0.6437888145446777, "learning_rate": 0.0005925722966450485, "loss": 3.4341, "step": 4345 }, { "epoch": 0.21, "grad_norm": 0.5803835391998291, "learning_rate": 0.0005925688914726185, "loss": 3.368, "step": 4346 }, { "epoch": 0.21, "grad_norm": 0.5558992028236389, "learning_rate": 0.0005925654855296183, "loss": 3.5642, "step": 4347 }, { "epoch": 0.21, "grad_norm": 0.6441301703453064, "learning_rate": 0.0005925620788160571, "loss": 3.2692, "step": 4348 }, { "epoch": 0.21, "grad_norm": 0.5592367649078369, "learning_rate": 0.0005925586713319436, "loss": 3.5717, "step": 4349 }, { "epoch": 0.21, "grad_norm": 0.5774462223052979, "learning_rate": 0.0005925552630772869, "loss": 3.5024, "step": 4350 }, { "epoch": 0.21, "grad_norm": 0.6486428380012512, "learning_rate": 0.0005925518540520958, "loss": 3.4881, "step": 4351 }, { "epoch": 0.21, "grad_norm": 0.5728597044944763, "learning_rate": 0.0005925484442563795, "loss": 3.3829, "step": 4352 }, { "epoch": 0.21, "grad_norm": 0.5495771765708923, "learning_rate": 0.000592545033690147, "loss": 3.5991, "step": 4353 }, { "epoch": 0.21, "grad_norm": 0.5710741281509399, "learning_rate": 0.0005925416223534071, "loss": 3.3564, "step": 4354 }, { "epoch": 0.21, "grad_norm": 0.6084657311439514, "learning_rate": 0.0005925382102461689, "loss": 3.3813, "step": 4355 }, { "epoch": 0.21, "grad_norm": 0.5266708135604858, "learning_rate": 0.0005925347973684414, "loss": 3.3215, "step": 4356 }, { "epoch": 0.21, "grad_norm": 0.6160021424293518, "learning_rate": 0.0005925313837202334, "loss": 3.5516, "step": 4357 }, { "epoch": 0.21, "grad_norm": 0.5864583253860474, "learning_rate": 0.0005925279693015541, "loss": 3.5683, "step": 4358 }, { "epoch": 0.21, "grad_norm": 0.5891723036766052, "learning_rate": 0.0005925245541124124, "loss": 3.3166, "step": 4359 }, { "epoch": 0.21, "grad_norm": 0.5659295916557312, "learning_rate": 0.0005925211381528175, "loss": 3.4871, "step": 4360 }, { "epoch": 0.21, "grad_norm": 0.609416127204895, "learning_rate": 0.000592517721422778, "loss": 3.4384, "step": 4361 }, { "epoch": 0.21, "grad_norm": 0.518798828125, "learning_rate": 0.0005925143039223032, "loss": 3.3999, "step": 4362 }, { "epoch": 0.21, "grad_norm": 0.6183592081069946, "learning_rate": 0.000592510885651402, "loss": 3.5708, "step": 4363 }, { "epoch": 0.21, "grad_norm": 0.522781491279602, "learning_rate": 0.0005925074666100834, "loss": 3.5036, "step": 4364 }, { "epoch": 0.21, "grad_norm": 0.600847601890564, "learning_rate": 0.0005925040467983564, "loss": 3.4813, "step": 4365 }, { "epoch": 0.21, "grad_norm": 0.556460440158844, "learning_rate": 0.0005925006262162302, "loss": 3.5809, "step": 4366 }, { "epoch": 0.21, "grad_norm": 0.5647059082984924, "learning_rate": 0.0005924972048637135, "loss": 3.5571, "step": 4367 }, { "epoch": 0.21, "grad_norm": 0.5725759267807007, "learning_rate": 0.0005924937827408154, "loss": 3.4548, "step": 4368 }, { "epoch": 0.21, "grad_norm": 0.5902033448219299, "learning_rate": 0.0005924903598475451, "loss": 3.4404, "step": 4369 }, { "epoch": 0.21, "grad_norm": 0.723042905330658, "learning_rate": 0.0005924869361839115, "loss": 3.5888, "step": 4370 }, { "epoch": 0.21, "grad_norm": 0.7233627438545227, "learning_rate": 0.0005924835117499235, "loss": 3.3951, "step": 4371 }, { "epoch": 0.21, "grad_norm": 0.5561408996582031, "learning_rate": 0.0005924800865455903, "loss": 3.6309, "step": 4372 }, { "epoch": 0.21, "grad_norm": 0.5627115964889526, "learning_rate": 0.0005924766605709209, "loss": 3.5393, "step": 4373 }, { "epoch": 0.21, "grad_norm": 0.5121928453445435, "learning_rate": 0.000592473233825924, "loss": 3.5696, "step": 4374 }, { "epoch": 0.21, "grad_norm": 0.6076629757881165, "learning_rate": 0.0005924698063106091, "loss": 3.4886, "step": 4375 }, { "epoch": 0.21, "grad_norm": 0.5451004505157471, "learning_rate": 0.0005924663780249851, "loss": 3.3686, "step": 4376 }, { "epoch": 0.21, "grad_norm": 0.582217276096344, "learning_rate": 0.0005924629489690608, "loss": 3.5121, "step": 4377 }, { "epoch": 0.21, "grad_norm": 0.5545997023582458, "learning_rate": 0.0005924595191428454, "loss": 3.5433, "step": 4378 }, { "epoch": 0.21, "grad_norm": 0.5341416001319885, "learning_rate": 0.0005924560885463479, "loss": 3.5336, "step": 4379 }, { "epoch": 0.21, "grad_norm": 0.6326627731323242, "learning_rate": 0.0005924526571795774, "loss": 3.4165, "step": 4380 }, { "epoch": 0.21, "grad_norm": 0.5663824677467346, "learning_rate": 0.0005924492250425428, "loss": 3.5157, "step": 4381 }, { "epoch": 0.21, "grad_norm": 0.5439797043800354, "learning_rate": 0.0005924457921352533, "loss": 3.4163, "step": 4382 }, { "epoch": 0.21, "grad_norm": 0.5896198749542236, "learning_rate": 0.0005924423584577178, "loss": 3.5275, "step": 4383 }, { "epoch": 0.21, "grad_norm": 0.5586664080619812, "learning_rate": 0.0005924389240099454, "loss": 3.576, "step": 4384 }, { "epoch": 0.21, "grad_norm": 0.5929638743400574, "learning_rate": 0.0005924354887919452, "loss": 3.5569, "step": 4385 }, { "epoch": 0.21, "grad_norm": 0.6475085020065308, "learning_rate": 0.0005924320528037263, "loss": 3.602, "step": 4386 }, { "epoch": 0.21, "grad_norm": 0.5904268622398376, "learning_rate": 0.0005924286160452974, "loss": 3.4897, "step": 4387 }, { "epoch": 0.22, "grad_norm": 0.5506060719490051, "learning_rate": 0.000592425178516668, "loss": 3.5867, "step": 4388 }, { "epoch": 0.22, "grad_norm": 0.6361829042434692, "learning_rate": 0.000592421740217847, "loss": 3.6221, "step": 4389 }, { "epoch": 0.22, "grad_norm": 0.5692065954208374, "learning_rate": 0.0005924183011488433, "loss": 3.6743, "step": 4390 }, { "epoch": 0.22, "grad_norm": 0.5525909066200256, "learning_rate": 0.000592414861309666, "loss": 3.4595, "step": 4391 }, { "epoch": 0.22, "grad_norm": 0.5622319579124451, "learning_rate": 0.0005924114207003243, "loss": 3.5213, "step": 4392 }, { "epoch": 0.22, "grad_norm": 0.5368682146072388, "learning_rate": 0.0005924079793208273, "loss": 3.5174, "step": 4393 }, { "epoch": 0.22, "grad_norm": 0.5590381622314453, "learning_rate": 0.000592404537171184, "loss": 3.4644, "step": 4394 }, { "epoch": 0.22, "grad_norm": 0.5523700714111328, "learning_rate": 0.0005924010942514032, "loss": 3.3879, "step": 4395 }, { "epoch": 0.22, "grad_norm": 0.5766987204551697, "learning_rate": 0.0005923976505614944, "loss": 3.7217, "step": 4396 }, { "epoch": 0.22, "grad_norm": 0.5824809670448303, "learning_rate": 0.0005923942061014663, "loss": 3.459, "step": 4397 }, { "epoch": 0.22, "grad_norm": 0.5317344665527344, "learning_rate": 0.0005923907608713282, "loss": 3.3727, "step": 4398 }, { "epoch": 0.22, "grad_norm": 0.5514039993286133, "learning_rate": 0.0005923873148710892, "loss": 3.3122, "step": 4399 }, { "epoch": 0.22, "grad_norm": 0.5581009387969971, "learning_rate": 0.0005923838681007581, "loss": 3.6301, "step": 4400 }, { "epoch": 0.22, "grad_norm": 0.5631245374679565, "learning_rate": 0.0005923804205603442, "loss": 3.1971, "step": 4401 }, { "epoch": 0.22, "grad_norm": 0.5507313013076782, "learning_rate": 0.0005923769722498566, "loss": 3.4534, "step": 4402 }, { "epoch": 0.22, "grad_norm": 0.5625492334365845, "learning_rate": 0.0005923735231693043, "loss": 3.5629, "step": 4403 }, { "epoch": 0.22, "grad_norm": 0.599851131439209, "learning_rate": 0.0005923700733186965, "loss": 3.6198, "step": 4404 }, { "epoch": 0.22, "grad_norm": 0.6086528897285461, "learning_rate": 0.0005923666226980421, "loss": 3.5182, "step": 4405 }, { "epoch": 0.22, "grad_norm": 0.5781559944152832, "learning_rate": 0.0005923631713073503, "loss": 3.3736, "step": 4406 }, { "epoch": 0.22, "grad_norm": 0.5884630680084229, "learning_rate": 0.0005923597191466301, "loss": 3.4333, "step": 4407 }, { "epoch": 0.22, "grad_norm": 0.5860154628753662, "learning_rate": 0.0005923562662158907, "loss": 3.5609, "step": 4408 }, { "epoch": 0.22, "grad_norm": 0.5450155138969421, "learning_rate": 0.0005923528125151412, "loss": 3.3551, "step": 4409 }, { "epoch": 0.22, "grad_norm": 0.6374961733818054, "learning_rate": 0.0005923493580443907, "loss": 3.4167, "step": 4410 }, { "epoch": 0.22, "grad_norm": 0.562510073184967, "learning_rate": 0.0005923459028036482, "loss": 3.3272, "step": 4411 }, { "epoch": 0.22, "grad_norm": 0.5314709544181824, "learning_rate": 0.0005923424467929229, "loss": 3.4734, "step": 4412 }, { "epoch": 0.22, "grad_norm": 0.5496847629547119, "learning_rate": 0.0005923389900122239, "loss": 3.0824, "step": 4413 }, { "epoch": 0.22, "grad_norm": 0.5986039638519287, "learning_rate": 0.0005923355324615601, "loss": 3.5452, "step": 4414 }, { "epoch": 0.22, "grad_norm": 0.5644450187683105, "learning_rate": 0.0005923320741409409, "loss": 3.3707, "step": 4415 }, { "epoch": 0.22, "grad_norm": 0.5469602346420288, "learning_rate": 0.0005923286150503753, "loss": 3.5989, "step": 4416 }, { "epoch": 0.22, "grad_norm": 0.5269728899002075, "learning_rate": 0.0005923251551898722, "loss": 3.6738, "step": 4417 }, { "epoch": 0.22, "grad_norm": 0.5269190073013306, "learning_rate": 0.000592321694559441, "loss": 3.6033, "step": 4418 }, { "epoch": 0.22, "grad_norm": 0.5562129020690918, "learning_rate": 0.0005923182331590908, "loss": 3.4572, "step": 4419 }, { "epoch": 0.22, "grad_norm": 0.5670672655105591, "learning_rate": 0.0005923147709888305, "loss": 3.4981, "step": 4420 }, { "epoch": 0.22, "grad_norm": 0.5788316130638123, "learning_rate": 0.0005923113080486695, "loss": 3.3878, "step": 4421 }, { "epoch": 0.22, "grad_norm": 0.585292637348175, "learning_rate": 0.0005923078443386166, "loss": 3.3861, "step": 4422 }, { "epoch": 0.22, "grad_norm": 0.5587526559829712, "learning_rate": 0.0005923043798586812, "loss": 3.3619, "step": 4423 }, { "epoch": 0.22, "grad_norm": 0.5967184901237488, "learning_rate": 0.0005923009146088724, "loss": 3.3487, "step": 4424 }, { "epoch": 0.22, "grad_norm": 0.5900785326957703, "learning_rate": 0.0005922974485891991, "loss": 3.4949, "step": 4425 }, { "epoch": 0.22, "grad_norm": 0.5535380840301514, "learning_rate": 0.0005922939817996706, "loss": 3.5115, "step": 4426 }, { "epoch": 0.22, "grad_norm": 0.5610527992248535, "learning_rate": 0.000592290514240296, "loss": 3.4378, "step": 4427 }, { "epoch": 0.22, "grad_norm": 0.5495424270629883, "learning_rate": 0.0005922870459110844, "loss": 3.2887, "step": 4428 }, { "epoch": 0.22, "grad_norm": 0.5786449313163757, "learning_rate": 0.000592283576812045, "loss": 3.4501, "step": 4429 }, { "epoch": 0.22, "grad_norm": 0.5528695583343506, "learning_rate": 0.000592280106943187, "loss": 3.58, "step": 4430 }, { "epoch": 0.22, "grad_norm": 0.6839733719825745, "learning_rate": 0.0005922766363045195, "loss": 3.3403, "step": 4431 }, { "epoch": 0.22, "grad_norm": 0.568471372127533, "learning_rate": 0.0005922731648960514, "loss": 3.4483, "step": 4432 }, { "epoch": 0.22, "grad_norm": 0.5260776877403259, "learning_rate": 0.0005922696927177921, "loss": 3.7186, "step": 4433 }, { "epoch": 0.22, "grad_norm": 0.5697051882743835, "learning_rate": 0.0005922662197697507, "loss": 3.3252, "step": 4434 }, { "epoch": 0.22, "grad_norm": 0.5838139057159424, "learning_rate": 0.0005922627460519363, "loss": 3.5432, "step": 4435 }, { "epoch": 0.22, "grad_norm": 0.5762350559234619, "learning_rate": 0.0005922592715643582, "loss": 3.4488, "step": 4436 }, { "epoch": 0.22, "grad_norm": 0.5904020071029663, "learning_rate": 0.0005922557963070252, "loss": 3.5981, "step": 4437 }, { "epoch": 0.22, "grad_norm": 0.5588435530662537, "learning_rate": 0.0005922523202799468, "loss": 3.6139, "step": 4438 }, { "epoch": 0.22, "grad_norm": 0.5399730205535889, "learning_rate": 0.000592248843483132, "loss": 3.3933, "step": 4439 }, { "epoch": 0.22, "grad_norm": 0.57188481092453, "learning_rate": 0.0005922453659165901, "loss": 3.5267, "step": 4440 }, { "epoch": 0.22, "grad_norm": 0.5735987424850464, "learning_rate": 0.0005922418875803301, "loss": 3.4599, "step": 4441 }, { "epoch": 0.22, "grad_norm": 0.5924454927444458, "learning_rate": 0.0005922384084743611, "loss": 3.754, "step": 4442 }, { "epoch": 0.22, "grad_norm": 0.5900004506111145, "learning_rate": 0.0005922349285986925, "loss": 3.3268, "step": 4443 }, { "epoch": 0.22, "grad_norm": 0.5811010003089905, "learning_rate": 0.0005922314479533333, "loss": 3.3628, "step": 4444 }, { "epoch": 0.22, "grad_norm": 0.5504472255706787, "learning_rate": 0.0005922279665382927, "loss": 3.5839, "step": 4445 }, { "epoch": 0.22, "grad_norm": 0.5668983459472656, "learning_rate": 0.0005922244843535798, "loss": 3.6179, "step": 4446 }, { "epoch": 0.22, "grad_norm": 0.5283928513526917, "learning_rate": 0.000592221001399204, "loss": 3.3998, "step": 4447 }, { "epoch": 0.22, "grad_norm": 0.5632615685462952, "learning_rate": 0.0005922175176751742, "loss": 3.3972, "step": 4448 }, { "epoch": 0.22, "grad_norm": 0.5324033498764038, "learning_rate": 0.0005922140331814997, "loss": 3.4857, "step": 4449 }, { "epoch": 0.22, "grad_norm": 0.5987648963928223, "learning_rate": 0.0005922105479181898, "loss": 3.5155, "step": 4450 }, { "epoch": 0.22, "grad_norm": 0.5266212821006775, "learning_rate": 0.0005922070618852535, "loss": 3.3693, "step": 4451 }, { "epoch": 0.22, "grad_norm": 0.5755586624145508, "learning_rate": 0.0005922035750827, "loss": 3.5271, "step": 4452 }, { "epoch": 0.22, "grad_norm": 0.6109917163848877, "learning_rate": 0.0005922000875105385, "loss": 3.3581, "step": 4453 }, { "epoch": 0.22, "grad_norm": 0.5387493968009949, "learning_rate": 0.0005921965991687783, "loss": 3.539, "step": 4454 }, { "epoch": 0.22, "grad_norm": 0.5238696336746216, "learning_rate": 0.0005921931100574284, "loss": 3.6753, "step": 4455 }, { "epoch": 0.22, "grad_norm": 0.583580493927002, "learning_rate": 0.0005921896201764981, "loss": 3.4583, "step": 4456 }, { "epoch": 0.22, "grad_norm": 0.5637732148170471, "learning_rate": 0.0005921861295259966, "loss": 3.689, "step": 4457 }, { "epoch": 0.22, "grad_norm": 0.5128093957901001, "learning_rate": 0.000592182638105933, "loss": 3.5987, "step": 4458 }, { "epoch": 0.22, "grad_norm": 0.6136223673820496, "learning_rate": 0.0005921791459163167, "loss": 3.4421, "step": 4459 }, { "epoch": 0.22, "grad_norm": 0.573216438293457, "learning_rate": 0.0005921756529571566, "loss": 3.3476, "step": 4460 }, { "epoch": 0.22, "grad_norm": 0.5862829685211182, "learning_rate": 0.0005921721592284621, "loss": 3.5071, "step": 4461 }, { "epoch": 0.22, "grad_norm": 0.5221441984176636, "learning_rate": 0.0005921686647302424, "loss": 3.4102, "step": 4462 }, { "epoch": 0.22, "grad_norm": 0.6147505640983582, "learning_rate": 0.0005921651694625067, "loss": 3.5414, "step": 4463 }, { "epoch": 0.22, "grad_norm": 0.5590131282806396, "learning_rate": 0.0005921616734252641, "loss": 3.4173, "step": 4464 }, { "epoch": 0.22, "grad_norm": 0.5548772811889648, "learning_rate": 0.0005921581766185239, "loss": 3.3825, "step": 4465 }, { "epoch": 0.22, "grad_norm": 0.6083531975746155, "learning_rate": 0.0005921546790422953, "loss": 3.3853, "step": 4466 }, { "epoch": 0.22, "grad_norm": 0.5427078604698181, "learning_rate": 0.0005921511806965875, "loss": 3.4407, "step": 4467 }, { "epoch": 0.22, "grad_norm": 0.5490184426307678, "learning_rate": 0.0005921476815814098, "loss": 3.521, "step": 4468 }, { "epoch": 0.22, "grad_norm": 0.5580472350120544, "learning_rate": 0.0005921441816967711, "loss": 3.4545, "step": 4469 }, { "epoch": 0.22, "grad_norm": 0.5435226559638977, "learning_rate": 0.000592140681042681, "loss": 3.5815, "step": 4470 }, { "epoch": 0.22, "grad_norm": 0.5188127160072327, "learning_rate": 0.0005921371796191486, "loss": 3.5472, "step": 4471 }, { "epoch": 0.22, "grad_norm": 0.5608434081077576, "learning_rate": 0.0005921336774261831, "loss": 3.374, "step": 4472 }, { "epoch": 0.22, "grad_norm": 0.5581082105636597, "learning_rate": 0.0005921301744637937, "loss": 3.5685, "step": 4473 }, { "epoch": 0.22, "grad_norm": 0.5264663696289062, "learning_rate": 0.0005921266707319896, "loss": 3.5169, "step": 4474 }, { "epoch": 0.22, "grad_norm": 0.5787158608436584, "learning_rate": 0.0005921231662307801, "loss": 3.4266, "step": 4475 }, { "epoch": 0.22, "grad_norm": 0.5512024164199829, "learning_rate": 0.0005921196609601744, "loss": 3.6085, "step": 4476 }, { "epoch": 0.22, "grad_norm": 0.559208333492279, "learning_rate": 0.0005921161549201818, "loss": 3.3556, "step": 4477 }, { "epoch": 0.22, "grad_norm": 0.5857759118080139, "learning_rate": 0.0005921126481108112, "loss": 3.5417, "step": 4478 }, { "epoch": 0.22, "grad_norm": 0.5337649583816528, "learning_rate": 0.0005921091405320724, "loss": 3.4392, "step": 4479 }, { "epoch": 0.22, "grad_norm": 0.5375748872756958, "learning_rate": 0.0005921056321839742, "loss": 3.4743, "step": 4480 }, { "epoch": 0.22, "grad_norm": 0.5559386610984802, "learning_rate": 0.0005921021230665261, "loss": 3.4687, "step": 4481 }, { "epoch": 0.22, "grad_norm": 0.5421441793441772, "learning_rate": 0.000592098613179737, "loss": 3.3652, "step": 4482 }, { "epoch": 0.22, "grad_norm": 0.5195907354354858, "learning_rate": 0.0005920951025236166, "loss": 3.6266, "step": 4483 }, { "epoch": 0.22, "grad_norm": 0.600489616394043, "learning_rate": 0.0005920915910981739, "loss": 3.3397, "step": 4484 }, { "epoch": 0.22, "grad_norm": 0.5409472584724426, "learning_rate": 0.000592088078903418, "loss": 3.2999, "step": 4485 }, { "epoch": 0.22, "grad_norm": 0.522708535194397, "learning_rate": 0.0005920845659393584, "loss": 3.3678, "step": 4486 }, { "epoch": 0.22, "grad_norm": 0.53269362449646, "learning_rate": 0.0005920810522060042, "loss": 3.5437, "step": 4487 }, { "epoch": 0.22, "grad_norm": 0.5417482256889343, "learning_rate": 0.0005920775377033648, "loss": 3.5003, "step": 4488 }, { "epoch": 0.22, "grad_norm": 0.5512046813964844, "learning_rate": 0.0005920740224314494, "loss": 3.3525, "step": 4489 }, { "epoch": 0.22, "grad_norm": 0.5806989073753357, "learning_rate": 0.0005920705063902672, "loss": 3.4915, "step": 4490 }, { "epoch": 0.22, "grad_norm": 0.5602362155914307, "learning_rate": 0.0005920669895798275, "loss": 3.5072, "step": 4491 }, { "epoch": 0.22, "grad_norm": 0.5372531414031982, "learning_rate": 0.0005920634720001396, "loss": 3.3132, "step": 4492 }, { "epoch": 0.22, "grad_norm": 0.6997660994529724, "learning_rate": 0.0005920599536512126, "loss": 3.2909, "step": 4493 }, { "epoch": 0.22, "grad_norm": 0.5167723894119263, "learning_rate": 0.000592056434533056, "loss": 3.498, "step": 4494 }, { "epoch": 0.22, "grad_norm": 0.5544456243515015, "learning_rate": 0.0005920529146456789, "loss": 3.393, "step": 4495 }, { "epoch": 0.22, "grad_norm": 0.5869852304458618, "learning_rate": 0.0005920493939890907, "loss": 3.5451, "step": 4496 }, { "epoch": 0.22, "grad_norm": 0.5493007898330688, "learning_rate": 0.0005920458725633005, "loss": 3.4858, "step": 4497 }, { "epoch": 0.22, "grad_norm": 0.5228421092033386, "learning_rate": 0.0005920423503683178, "loss": 3.5755, "step": 4498 }, { "epoch": 0.22, "grad_norm": 0.5734871029853821, "learning_rate": 0.0005920388274041516, "loss": 3.4937, "step": 4499 }, { "epoch": 0.22, "grad_norm": 0.6023586392402649, "learning_rate": 0.0005920353036708115, "loss": 3.3827, "step": 4500 }, { "epoch": 0.22, "grad_norm": 0.5095198750495911, "learning_rate": 0.0005920317791683065, "loss": 3.4317, "step": 4501 }, { "epoch": 0.22, "grad_norm": 0.5262815356254578, "learning_rate": 0.0005920282538966461, "loss": 3.2033, "step": 4502 }, { "epoch": 0.22, "grad_norm": 0.5463591814041138, "learning_rate": 0.0005920247278558394, "loss": 3.4123, "step": 4503 }, { "epoch": 0.22, "grad_norm": 0.5262253880500793, "learning_rate": 0.0005920212010458957, "loss": 3.5664, "step": 4504 }, { "epoch": 0.22, "grad_norm": 0.5638479590415955, "learning_rate": 0.0005920176734668244, "loss": 3.3824, "step": 4505 }, { "epoch": 0.22, "grad_norm": 0.5075425505638123, "learning_rate": 0.0005920141451186349, "loss": 3.5525, "step": 4506 }, { "epoch": 0.22, "grad_norm": 0.5334774255752563, "learning_rate": 0.0005920106160013362, "loss": 3.4679, "step": 4507 }, { "epoch": 0.22, "grad_norm": 0.5502597689628601, "learning_rate": 0.0005920070861149377, "loss": 3.5242, "step": 4508 }, { "epoch": 0.22, "grad_norm": 0.5628333687782288, "learning_rate": 0.0005920035554594489, "loss": 3.4435, "step": 4509 }, { "epoch": 0.22, "grad_norm": 0.5412539839744568, "learning_rate": 0.0005920000240348788, "loss": 3.5723, "step": 4510 }, { "epoch": 0.22, "grad_norm": 0.5367388725280762, "learning_rate": 0.0005919964918412368, "loss": 3.3911, "step": 4511 }, { "epoch": 0.22, "grad_norm": 0.5794838070869446, "learning_rate": 0.0005919929588785323, "loss": 3.4213, "step": 4512 }, { "epoch": 0.22, "grad_norm": 0.5445544123649597, "learning_rate": 0.0005919894251467746, "loss": 3.4142, "step": 4513 }, { "epoch": 0.22, "grad_norm": 0.5294917225837708, "learning_rate": 0.0005919858906459728, "loss": 3.5728, "step": 4514 }, { "epoch": 0.22, "grad_norm": 0.5747779607772827, "learning_rate": 0.0005919823553761365, "loss": 3.7209, "step": 4515 }, { "epoch": 0.22, "grad_norm": 0.5731784105300903, "learning_rate": 0.0005919788193372748, "loss": 3.3981, "step": 4516 }, { "epoch": 0.22, "grad_norm": 0.5659194588661194, "learning_rate": 0.000591975282529397, "loss": 3.4661, "step": 4517 }, { "epoch": 0.22, "grad_norm": 0.6080614924430847, "learning_rate": 0.0005919717449525127, "loss": 3.448, "step": 4518 }, { "epoch": 0.22, "grad_norm": 0.5205955505371094, "learning_rate": 0.0005919682066066309, "loss": 3.7119, "step": 4519 }, { "epoch": 0.22, "grad_norm": 0.6132055521011353, "learning_rate": 0.000591964667491761, "loss": 3.5377, "step": 4520 }, { "epoch": 0.22, "grad_norm": 0.5384352207183838, "learning_rate": 0.0005919611276079124, "loss": 3.5972, "step": 4521 }, { "epoch": 0.22, "grad_norm": 0.5405988693237305, "learning_rate": 0.0005919575869550944, "loss": 3.4752, "step": 4522 }, { "epoch": 0.22, "grad_norm": 0.5446063280105591, "learning_rate": 0.0005919540455333163, "loss": 3.5515, "step": 4523 }, { "epoch": 0.22, "grad_norm": 0.6086515188217163, "learning_rate": 0.0005919505033425875, "loss": 3.5652, "step": 4524 }, { "epoch": 0.22, "grad_norm": 0.5298771262168884, "learning_rate": 0.0005919469603829171, "loss": 3.3091, "step": 4525 }, { "epoch": 0.22, "grad_norm": 0.5765856504440308, "learning_rate": 0.0005919434166543146, "loss": 3.3966, "step": 4526 }, { "epoch": 0.22, "grad_norm": 0.569276750087738, "learning_rate": 0.0005919398721567895, "loss": 3.454, "step": 4527 }, { "epoch": 0.22, "grad_norm": 0.568664014339447, "learning_rate": 0.0005919363268903508, "loss": 3.6629, "step": 4528 }, { "epoch": 0.22, "grad_norm": 0.569466233253479, "learning_rate": 0.000591932780855008, "loss": 3.4888, "step": 4529 }, { "epoch": 0.22, "grad_norm": 0.5888025760650635, "learning_rate": 0.0005919292340507706, "loss": 3.3833, "step": 4530 }, { "epoch": 0.22, "grad_norm": 0.6375117897987366, "learning_rate": 0.0005919256864776476, "loss": 3.7058, "step": 4531 }, { "epoch": 0.22, "grad_norm": 0.5959801077842712, "learning_rate": 0.0005919221381356486, "loss": 3.3804, "step": 4532 }, { "epoch": 0.22, "grad_norm": 0.5718854069709778, "learning_rate": 0.0005919185890247828, "loss": 3.48, "step": 4533 }, { "epoch": 0.22, "grad_norm": 0.5459699630737305, "learning_rate": 0.0005919150391450597, "loss": 3.4685, "step": 4534 }, { "epoch": 0.22, "grad_norm": 0.5691114068031311, "learning_rate": 0.0005919114884964886, "loss": 3.4821, "step": 4535 }, { "epoch": 0.22, "grad_norm": 0.5469587445259094, "learning_rate": 0.0005919079370790789, "loss": 3.4524, "step": 4536 }, { "epoch": 0.22, "grad_norm": 0.5794917941093445, "learning_rate": 0.0005919043848928397, "loss": 3.5051, "step": 4537 }, { "epoch": 0.22, "grad_norm": 0.5750076770782471, "learning_rate": 0.0005919008319377805, "loss": 3.3637, "step": 4538 }, { "epoch": 0.22, "grad_norm": 0.5511584281921387, "learning_rate": 0.0005918972782139108, "loss": 3.3772, "step": 4539 }, { "epoch": 0.22, "grad_norm": 0.5298534631729126, "learning_rate": 0.0005918937237212397, "loss": 3.6845, "step": 4540 }, { "epoch": 0.22, "grad_norm": 0.5647959113121033, "learning_rate": 0.0005918901684597769, "loss": 3.2087, "step": 4541 }, { "epoch": 0.22, "grad_norm": 0.5687061548233032, "learning_rate": 0.0005918866124295315, "loss": 3.4084, "step": 4542 }, { "epoch": 0.22, "grad_norm": 0.5853356122970581, "learning_rate": 0.000591883055630513, "loss": 3.4805, "step": 4543 }, { "epoch": 0.22, "grad_norm": 0.5594838261604309, "learning_rate": 0.0005918794980627305, "loss": 3.728, "step": 4544 }, { "epoch": 0.22, "grad_norm": 0.5661375522613525, "learning_rate": 0.0005918759397261936, "loss": 3.4592, "step": 4545 }, { "epoch": 0.22, "grad_norm": 0.5649911165237427, "learning_rate": 0.0005918723806209119, "loss": 3.4811, "step": 4546 }, { "epoch": 0.22, "grad_norm": 0.568805456161499, "learning_rate": 0.0005918688207468942, "loss": 3.3881, "step": 4547 }, { "epoch": 0.22, "grad_norm": 0.5687074661254883, "learning_rate": 0.0005918652601041505, "loss": 3.4324, "step": 4548 }, { "epoch": 0.22, "grad_norm": 0.5697916150093079, "learning_rate": 0.0005918616986926898, "loss": 3.5119, "step": 4549 }, { "epoch": 0.22, "grad_norm": 0.5741244554519653, "learning_rate": 0.0005918581365125215, "loss": 3.5548, "step": 4550 }, { "epoch": 0.22, "grad_norm": 0.5744023323059082, "learning_rate": 0.000591854573563655, "loss": 3.5327, "step": 4551 }, { "epoch": 0.22, "grad_norm": 0.5800146460533142, "learning_rate": 0.0005918510098460997, "loss": 3.4031, "step": 4552 }, { "epoch": 0.22, "grad_norm": 0.5622119307518005, "learning_rate": 0.000591847445359865, "loss": 3.4033, "step": 4553 }, { "epoch": 0.22, "grad_norm": 0.5262593626976013, "learning_rate": 0.0005918438801049605, "loss": 3.2841, "step": 4554 }, { "epoch": 0.22, "grad_norm": 0.5564959645271301, "learning_rate": 0.0005918403140813952, "loss": 3.6266, "step": 4555 }, { "epoch": 0.22, "grad_norm": 0.574227511882782, "learning_rate": 0.0005918367472891787, "loss": 3.3961, "step": 4556 }, { "epoch": 0.22, "grad_norm": 0.563045084476471, "learning_rate": 0.0005918331797283204, "loss": 3.6054, "step": 4557 }, { "epoch": 0.22, "grad_norm": 0.5304876565933228, "learning_rate": 0.0005918296113988297, "loss": 3.3966, "step": 4558 }, { "epoch": 0.22, "grad_norm": 0.5531924962997437, "learning_rate": 0.0005918260423007159, "loss": 3.2937, "step": 4559 }, { "epoch": 0.22, "grad_norm": 0.5169435739517212, "learning_rate": 0.0005918224724339885, "loss": 3.5655, "step": 4560 }, { "epoch": 0.22, "grad_norm": 0.6689804196357727, "learning_rate": 0.0005918189017986569, "loss": 3.6632, "step": 4561 }, { "epoch": 0.22, "grad_norm": 0.5568976998329163, "learning_rate": 0.0005918153303947304, "loss": 3.6612, "step": 4562 }, { "epoch": 0.22, "grad_norm": 0.5638584494590759, "learning_rate": 0.0005918117582222185, "loss": 3.4756, "step": 4563 }, { "epoch": 0.22, "grad_norm": 0.5415082573890686, "learning_rate": 0.0005918081852811307, "loss": 3.5922, "step": 4564 }, { "epoch": 0.22, "grad_norm": 0.5457674264907837, "learning_rate": 0.0005918046115714762, "loss": 3.6223, "step": 4565 }, { "epoch": 0.22, "grad_norm": 0.5880576968193054, "learning_rate": 0.0005918010370932645, "loss": 3.3749, "step": 4566 }, { "epoch": 0.22, "grad_norm": 0.572864830493927, "learning_rate": 0.0005917974618465051, "loss": 3.428, "step": 4567 }, { "epoch": 0.22, "grad_norm": 0.5527423024177551, "learning_rate": 0.0005917938858312073, "loss": 3.3815, "step": 4568 }, { "epoch": 0.22, "grad_norm": 0.6350066661834717, "learning_rate": 0.0005917903090473806, "loss": 3.1924, "step": 4569 }, { "epoch": 0.22, "grad_norm": 0.5983766913414001, "learning_rate": 0.0005917867314950343, "loss": 3.4771, "step": 4570 }, { "epoch": 0.22, "grad_norm": 0.5613915324211121, "learning_rate": 0.0005917831531741779, "loss": 3.4837, "step": 4571 }, { "epoch": 0.22, "grad_norm": 0.5356848835945129, "learning_rate": 0.000591779574084821, "loss": 3.5368, "step": 4572 }, { "epoch": 0.22, "grad_norm": 0.5657932162284851, "learning_rate": 0.0005917759942269727, "loss": 3.3458, "step": 4573 }, { "epoch": 0.22, "grad_norm": 0.5904025435447693, "learning_rate": 0.0005917724136006425, "loss": 3.3554, "step": 4574 }, { "epoch": 0.22, "grad_norm": 0.5392588376998901, "learning_rate": 0.00059176883220584, "loss": 3.4159, "step": 4575 }, { "epoch": 0.22, "grad_norm": 0.5528022646903992, "learning_rate": 0.0005917652500425747, "loss": 3.5421, "step": 4576 }, { "epoch": 0.22, "grad_norm": 0.5817191004753113, "learning_rate": 0.0005917616671108557, "loss": 3.4776, "step": 4577 }, { "epoch": 0.22, "grad_norm": 0.5895118713378906, "learning_rate": 0.0005917580834106927, "loss": 3.4579, "step": 4578 }, { "epoch": 0.22, "grad_norm": 0.5503705143928528, "learning_rate": 0.0005917544989420951, "loss": 3.4782, "step": 4579 }, { "epoch": 0.22, "grad_norm": 0.5728640556335449, "learning_rate": 0.0005917509137050723, "loss": 3.2202, "step": 4580 }, { "epoch": 0.22, "grad_norm": 0.5362029671669006, "learning_rate": 0.0005917473276996336, "loss": 3.4599, "step": 4581 }, { "epoch": 0.22, "grad_norm": 0.5472753643989563, "learning_rate": 0.0005917437409257886, "loss": 3.5085, "step": 4582 }, { "epoch": 0.22, "grad_norm": 0.575137197971344, "learning_rate": 0.0005917401533835469, "loss": 3.4713, "step": 4583 }, { "epoch": 0.22, "grad_norm": 0.5501198768615723, "learning_rate": 0.0005917365650729176, "loss": 3.3051, "step": 4584 }, { "epoch": 0.22, "grad_norm": 0.5528760552406311, "learning_rate": 0.0005917329759939105, "loss": 3.4805, "step": 4585 }, { "epoch": 0.22, "grad_norm": 0.5414803624153137, "learning_rate": 0.0005917293861465347, "loss": 3.577, "step": 4586 }, { "epoch": 0.22, "grad_norm": 0.5812093615531921, "learning_rate": 0.0005917257955308, "loss": 3.5206, "step": 4587 }, { "epoch": 0.22, "grad_norm": 0.6059795022010803, "learning_rate": 0.0005917222041467156, "loss": 3.4236, "step": 4588 }, { "epoch": 0.22, "grad_norm": 0.5378461480140686, "learning_rate": 0.0005917186119942912, "loss": 3.4233, "step": 4589 }, { "epoch": 0.22, "grad_norm": 0.5715885758399963, "learning_rate": 0.000591715019073536, "loss": 3.3245, "step": 4590 }, { "epoch": 0.22, "grad_norm": 0.5676418542861938, "learning_rate": 0.0005917114253844596, "loss": 3.1704, "step": 4591 }, { "epoch": 0.23, "grad_norm": 0.5657680034637451, "learning_rate": 0.0005917078309270714, "loss": 3.3586, "step": 4592 }, { "epoch": 0.23, "grad_norm": 0.5191043019294739, "learning_rate": 0.000591704235701381, "loss": 3.3325, "step": 4593 }, { "epoch": 0.23, "grad_norm": 0.567353367805481, "learning_rate": 0.0005917006397073977, "loss": 3.6905, "step": 4594 }, { "epoch": 0.23, "grad_norm": 0.5378613471984863, "learning_rate": 0.0005916970429451311, "loss": 3.7788, "step": 4595 }, { "epoch": 0.23, "grad_norm": 0.548173725605011, "learning_rate": 0.0005916934454145906, "loss": 3.3993, "step": 4596 }, { "epoch": 0.23, "grad_norm": 0.5689377188682556, "learning_rate": 0.0005916898471157858, "loss": 3.5089, "step": 4597 }, { "epoch": 0.23, "grad_norm": 0.5914126634597778, "learning_rate": 0.000591686248048726, "loss": 3.4533, "step": 4598 }, { "epoch": 0.23, "grad_norm": 0.533582866191864, "learning_rate": 0.0005916826482134207, "loss": 3.5641, "step": 4599 }, { "epoch": 0.23, "grad_norm": 0.5263054370880127, "learning_rate": 0.0005916790476098796, "loss": 3.397, "step": 4600 }, { "epoch": 0.23, "grad_norm": 0.5878801941871643, "learning_rate": 0.000591675446238112, "loss": 3.4843, "step": 4601 }, { "epoch": 0.23, "grad_norm": 0.7907947301864624, "learning_rate": 0.0005916718440981272, "loss": 3.5027, "step": 4602 }, { "epoch": 0.23, "grad_norm": 0.5525874495506287, "learning_rate": 0.000591668241189935, "loss": 3.3515, "step": 4603 }, { "epoch": 0.23, "grad_norm": 0.5373077988624573, "learning_rate": 0.0005916646375135449, "loss": 3.2774, "step": 4604 }, { "epoch": 0.23, "grad_norm": 0.5783985257148743, "learning_rate": 0.0005916610330689661, "loss": 3.3954, "step": 4605 }, { "epoch": 0.23, "grad_norm": 0.5470697283744812, "learning_rate": 0.0005916574278562085, "loss": 3.3739, "step": 4606 }, { "epoch": 0.23, "grad_norm": 0.5767296552658081, "learning_rate": 0.0005916538218752812, "loss": 3.3787, "step": 4607 }, { "epoch": 0.23, "grad_norm": 0.5586828589439392, "learning_rate": 0.000591650215126194, "loss": 3.4687, "step": 4608 }, { "epoch": 0.23, "grad_norm": 0.5796303153038025, "learning_rate": 0.000591646607608956, "loss": 3.7055, "step": 4609 }, { "epoch": 0.23, "grad_norm": 0.5355789065361023, "learning_rate": 0.0005916429993235772, "loss": 3.5942, "step": 4610 }, { "epoch": 0.23, "grad_norm": 0.5896856188774109, "learning_rate": 0.0005916393902700667, "loss": 3.4948, "step": 4611 }, { "epoch": 0.23, "grad_norm": 0.5608619451522827, "learning_rate": 0.0005916357804484344, "loss": 3.5695, "step": 4612 }, { "epoch": 0.23, "grad_norm": 0.5785589218139648, "learning_rate": 0.0005916321698586894, "loss": 3.5872, "step": 4613 }, { "epoch": 0.23, "grad_norm": 0.5732633471488953, "learning_rate": 0.0005916285585008415, "loss": 3.3187, "step": 4614 }, { "epoch": 0.23, "grad_norm": 0.5910802483558655, "learning_rate": 0.0005916249463748999, "loss": 3.4223, "step": 4615 }, { "epoch": 0.23, "grad_norm": 0.5657915472984314, "learning_rate": 0.0005916213334808745, "loss": 3.5133, "step": 4616 }, { "epoch": 0.23, "grad_norm": 0.522301435470581, "learning_rate": 0.0005916177198187746, "loss": 3.4913, "step": 4617 }, { "epoch": 0.23, "grad_norm": 0.5693953037261963, "learning_rate": 0.0005916141053886097, "loss": 3.3059, "step": 4618 }, { "epoch": 0.23, "grad_norm": 0.5592297911643982, "learning_rate": 0.0005916104901903894, "loss": 3.3731, "step": 4619 }, { "epoch": 0.23, "grad_norm": 0.5223372578620911, "learning_rate": 0.0005916068742241232, "loss": 3.1434, "step": 4620 }, { "epoch": 0.23, "grad_norm": 0.5213208794593811, "learning_rate": 0.0005916032574898206, "loss": 3.3789, "step": 4621 }, { "epoch": 0.23, "grad_norm": 0.5590026378631592, "learning_rate": 0.0005915996399874911, "loss": 3.5919, "step": 4622 }, { "epoch": 0.23, "grad_norm": 0.5489098429679871, "learning_rate": 0.0005915960217171444, "loss": 3.3545, "step": 4623 }, { "epoch": 0.23, "grad_norm": 0.5755786299705505, "learning_rate": 0.0005915924026787898, "loss": 3.5858, "step": 4624 }, { "epoch": 0.23, "grad_norm": 0.5431007742881775, "learning_rate": 0.0005915887828724369, "loss": 3.5246, "step": 4625 }, { "epoch": 0.23, "grad_norm": 0.5259215235710144, "learning_rate": 0.0005915851622980954, "loss": 3.5859, "step": 4626 }, { "epoch": 0.23, "grad_norm": 0.5726738572120667, "learning_rate": 0.0005915815409557745, "loss": 3.7115, "step": 4627 }, { "epoch": 0.23, "grad_norm": 0.5531672239303589, "learning_rate": 0.000591577918845484, "loss": 3.4266, "step": 4628 }, { "epoch": 0.23, "grad_norm": 0.5267097353935242, "learning_rate": 0.0005915742959672335, "loss": 3.5795, "step": 4629 }, { "epoch": 0.23, "grad_norm": 0.5696015357971191, "learning_rate": 0.0005915706723210323, "loss": 3.2015, "step": 4630 }, { "epoch": 0.23, "grad_norm": 0.5455690622329712, "learning_rate": 0.00059156704790689, "loss": 3.44, "step": 4631 }, { "epoch": 0.23, "grad_norm": 0.6064535975456238, "learning_rate": 0.0005915634227248163, "loss": 3.2651, "step": 4632 }, { "epoch": 0.23, "grad_norm": 0.5588998794555664, "learning_rate": 0.0005915597967748207, "loss": 3.383, "step": 4633 }, { "epoch": 0.23, "grad_norm": 0.5298521518707275, "learning_rate": 0.0005915561700569127, "loss": 3.5665, "step": 4634 }, { "epoch": 0.23, "grad_norm": 0.5695735812187195, "learning_rate": 0.0005915525425711018, "loss": 3.4372, "step": 4635 }, { "epoch": 0.23, "grad_norm": 0.5828069448471069, "learning_rate": 0.0005915489143173976, "loss": 3.4493, "step": 4636 }, { "epoch": 0.23, "grad_norm": 0.5570802092552185, "learning_rate": 0.0005915452852958098, "loss": 3.6557, "step": 4637 }, { "epoch": 0.23, "grad_norm": 0.5548609495162964, "learning_rate": 0.0005915416555063478, "loss": 3.5455, "step": 4638 }, { "epoch": 0.23, "grad_norm": 0.6037477850914001, "learning_rate": 0.000591538024949021, "loss": 3.5569, "step": 4639 }, { "epoch": 0.23, "grad_norm": 0.6033467054367065, "learning_rate": 0.0005915343936238394, "loss": 3.5211, "step": 4640 }, { "epoch": 0.23, "grad_norm": 0.6203020811080933, "learning_rate": 0.0005915307615308122, "loss": 3.6181, "step": 4641 }, { "epoch": 0.23, "grad_norm": 0.5475682616233826, "learning_rate": 0.000591527128669949, "loss": 3.5266, "step": 4642 }, { "epoch": 0.23, "grad_norm": 0.5472739934921265, "learning_rate": 0.0005915234950412596, "loss": 3.6185, "step": 4643 }, { "epoch": 0.23, "grad_norm": 0.5932005047798157, "learning_rate": 0.0005915198606447533, "loss": 3.3851, "step": 4644 }, { "epoch": 0.23, "grad_norm": 0.5578650832176208, "learning_rate": 0.0005915162254804398, "loss": 3.4982, "step": 4645 }, { "epoch": 0.23, "grad_norm": 0.5666883587837219, "learning_rate": 0.0005915125895483288, "loss": 3.3541, "step": 4646 }, { "epoch": 0.23, "grad_norm": 0.6000686883926392, "learning_rate": 0.0005915089528484295, "loss": 3.376, "step": 4647 }, { "epoch": 0.23, "grad_norm": 0.593292772769928, "learning_rate": 0.0005915053153807519, "loss": 3.5401, "step": 4648 }, { "epoch": 0.23, "grad_norm": 0.5368260145187378, "learning_rate": 0.0005915016771453054, "loss": 3.6079, "step": 4649 }, { "epoch": 0.23, "grad_norm": 0.6727637648582458, "learning_rate": 0.0005914980381420996, "loss": 3.4033, "step": 4650 }, { "epoch": 0.23, "grad_norm": 0.541647732257843, "learning_rate": 0.0005914943983711439, "loss": 3.3189, "step": 4651 }, { "epoch": 0.23, "grad_norm": 0.5538272857666016, "learning_rate": 0.0005914907578324481, "loss": 3.4644, "step": 4652 }, { "epoch": 0.23, "grad_norm": 0.5518693923950195, "learning_rate": 0.0005914871165260219, "loss": 3.6281, "step": 4653 }, { "epoch": 0.23, "grad_norm": 0.5680675506591797, "learning_rate": 0.0005914834744518747, "loss": 3.4995, "step": 4654 }, { "epoch": 0.23, "grad_norm": 0.5549336671829224, "learning_rate": 0.000591479831610016, "loss": 3.264, "step": 4655 }, { "epoch": 0.23, "grad_norm": 0.5913617014884949, "learning_rate": 0.0005914761880004555, "loss": 3.2961, "step": 4656 }, { "epoch": 0.23, "grad_norm": 0.5764253735542297, "learning_rate": 0.000591472543623203, "loss": 3.5085, "step": 4657 }, { "epoch": 0.23, "grad_norm": 0.5552957057952881, "learning_rate": 0.0005914688984782677, "loss": 3.3739, "step": 4658 }, { "epoch": 0.23, "grad_norm": 0.5654916763305664, "learning_rate": 0.0005914652525656596, "loss": 3.3516, "step": 4659 }, { "epoch": 0.23, "grad_norm": 0.5623388886451721, "learning_rate": 0.0005914616058853881, "loss": 3.5072, "step": 4660 }, { "epoch": 0.23, "grad_norm": 0.5291627645492554, "learning_rate": 0.0005914579584374627, "loss": 3.4728, "step": 4661 }, { "epoch": 0.23, "grad_norm": 0.5866690874099731, "learning_rate": 0.0005914543102218933, "loss": 3.3841, "step": 4662 }, { "epoch": 0.23, "grad_norm": 0.5244299173355103, "learning_rate": 0.0005914506612386891, "loss": 3.4587, "step": 4663 }, { "epoch": 0.23, "grad_norm": 0.5474516153335571, "learning_rate": 0.0005914470114878602, "loss": 3.5114, "step": 4664 }, { "epoch": 0.23, "grad_norm": 0.56264728307724, "learning_rate": 0.0005914433609694158, "loss": 3.5291, "step": 4665 }, { "epoch": 0.23, "grad_norm": 0.5429409146308899, "learning_rate": 0.0005914397096833658, "loss": 3.7083, "step": 4666 }, { "epoch": 0.23, "grad_norm": 0.5317074656486511, "learning_rate": 0.0005914360576297196, "loss": 3.4968, "step": 4667 }, { "epoch": 0.23, "grad_norm": 0.5686256885528564, "learning_rate": 0.0005914324048084869, "loss": 3.4443, "step": 4668 }, { "epoch": 0.23, "grad_norm": 0.5332667827606201, "learning_rate": 0.0005914287512196775, "loss": 3.525, "step": 4669 }, { "epoch": 0.23, "grad_norm": 0.5805628299713135, "learning_rate": 0.0005914250968633006, "loss": 3.5423, "step": 4670 }, { "epoch": 0.23, "grad_norm": 0.5518739819526672, "learning_rate": 0.0005914214417393663, "loss": 3.5086, "step": 4671 }, { "epoch": 0.23, "grad_norm": 0.6050487160682678, "learning_rate": 0.0005914177858478837, "loss": 3.4918, "step": 4672 }, { "epoch": 0.23, "grad_norm": 0.5575416088104248, "learning_rate": 0.000591414129188863, "loss": 3.4099, "step": 4673 }, { "epoch": 0.23, "grad_norm": 0.5643271803855896, "learning_rate": 0.0005914104717623136, "loss": 3.2613, "step": 4674 }, { "epoch": 0.23, "grad_norm": 0.5856224298477173, "learning_rate": 0.000591406813568245, "loss": 3.3773, "step": 4675 }, { "epoch": 0.23, "grad_norm": 0.5721701383590698, "learning_rate": 0.0005914031546066669, "loss": 3.3944, "step": 4676 }, { "epoch": 0.23, "grad_norm": 0.5789268016815186, "learning_rate": 0.000591399494877589, "loss": 3.5411, "step": 4677 }, { "epoch": 0.23, "grad_norm": 0.5379511713981628, "learning_rate": 0.000591395834381021, "loss": 3.5688, "step": 4678 }, { "epoch": 0.23, "grad_norm": 0.5450015068054199, "learning_rate": 0.0005913921731169724, "loss": 3.5541, "step": 4679 }, { "epoch": 0.23, "grad_norm": 0.5848316550254822, "learning_rate": 0.0005913885110854529, "loss": 3.628, "step": 4680 }, { "epoch": 0.23, "grad_norm": 0.5671524405479431, "learning_rate": 0.0005913848482864721, "loss": 3.629, "step": 4681 }, { "epoch": 0.23, "grad_norm": 0.5497124791145325, "learning_rate": 0.0005913811847200397, "loss": 3.6733, "step": 4682 }, { "epoch": 0.23, "grad_norm": 0.5373255610466003, "learning_rate": 0.0005913775203861654, "loss": 3.5201, "step": 4683 }, { "epoch": 0.23, "grad_norm": 0.5729827880859375, "learning_rate": 0.0005913738552848587, "loss": 3.3868, "step": 4684 }, { "epoch": 0.23, "grad_norm": 0.5580928921699524, "learning_rate": 0.0005913701894161294, "loss": 3.4114, "step": 4685 }, { "epoch": 0.23, "grad_norm": 0.5716227889060974, "learning_rate": 0.000591366522779987, "loss": 3.5178, "step": 4686 }, { "epoch": 0.23, "grad_norm": 0.539944052696228, "learning_rate": 0.0005913628553764413, "loss": 3.4705, "step": 4687 }, { "epoch": 0.23, "grad_norm": 0.5653459429740906, "learning_rate": 0.000591359187205502, "loss": 3.3099, "step": 4688 }, { "epoch": 0.23, "grad_norm": 0.5607462525367737, "learning_rate": 0.0005913555182671786, "loss": 3.6437, "step": 4689 }, { "epoch": 0.23, "grad_norm": 0.5527281165122986, "learning_rate": 0.0005913518485614809, "loss": 3.4185, "step": 4690 }, { "epoch": 0.23, "grad_norm": 0.5707682967185974, "learning_rate": 0.0005913481780884185, "loss": 3.4184, "step": 4691 }, { "epoch": 0.23, "grad_norm": 0.5300978422164917, "learning_rate": 0.000591344506848001, "loss": 3.2963, "step": 4692 }, { "epoch": 0.23, "grad_norm": 0.553466260433197, "learning_rate": 0.0005913408348402382, "loss": 3.3923, "step": 4693 }, { "epoch": 0.23, "grad_norm": 0.5744982361793518, "learning_rate": 0.0005913371620651396, "loss": 3.3072, "step": 4694 }, { "epoch": 0.23, "grad_norm": 0.5825265645980835, "learning_rate": 0.0005913334885227151, "loss": 3.3266, "step": 4695 }, { "epoch": 0.23, "grad_norm": 0.5940053462982178, "learning_rate": 0.0005913298142129741, "loss": 3.2624, "step": 4696 }, { "epoch": 0.23, "grad_norm": 0.5737009644508362, "learning_rate": 0.0005913261391359266, "loss": 3.2402, "step": 4697 }, { "epoch": 0.23, "grad_norm": 0.5772563219070435, "learning_rate": 0.0005913224632915821, "loss": 3.541, "step": 4698 }, { "epoch": 0.23, "grad_norm": 0.5606215000152588, "learning_rate": 0.0005913187866799503, "loss": 3.2515, "step": 4699 }, { "epoch": 0.23, "grad_norm": 0.6204817295074463, "learning_rate": 0.0005913151093010408, "loss": 3.6065, "step": 4700 }, { "epoch": 0.23, "grad_norm": 0.5818078517913818, "learning_rate": 0.0005913114311548635, "loss": 3.582, "step": 4701 }, { "epoch": 0.23, "grad_norm": 0.556207001209259, "learning_rate": 0.0005913077522414278, "loss": 3.4011, "step": 4702 }, { "epoch": 0.23, "grad_norm": 0.5517778992652893, "learning_rate": 0.0005913040725607437, "loss": 3.362, "step": 4703 }, { "epoch": 0.23, "grad_norm": 0.5277376174926758, "learning_rate": 0.0005913003921128206, "loss": 3.5635, "step": 4704 }, { "epoch": 0.23, "grad_norm": 0.5681208968162537, "learning_rate": 0.0005912967108976684, "loss": 3.4729, "step": 4705 }, { "epoch": 0.23, "grad_norm": 0.554221510887146, "learning_rate": 0.0005912930289152967, "loss": 3.5178, "step": 4706 }, { "epoch": 0.23, "grad_norm": 0.5068381428718567, "learning_rate": 0.0005912893461657152, "loss": 3.5696, "step": 4707 }, { "epoch": 0.23, "grad_norm": 0.5582959055900574, "learning_rate": 0.0005912856626489337, "loss": 3.3457, "step": 4708 }, { "epoch": 0.23, "grad_norm": 0.5499424338340759, "learning_rate": 0.0005912819783649617, "loss": 3.5449, "step": 4709 }, { "epoch": 0.23, "grad_norm": 0.5410943627357483, "learning_rate": 0.0005912782933138091, "loss": 3.5805, "step": 4710 }, { "epoch": 0.23, "grad_norm": 0.5599467754364014, "learning_rate": 0.0005912746074954856, "loss": 3.4815, "step": 4711 }, { "epoch": 0.23, "grad_norm": 0.6126967072486877, "learning_rate": 0.0005912709209100007, "loss": 3.2459, "step": 4712 }, { "epoch": 0.23, "grad_norm": 0.5697960257530212, "learning_rate": 0.0005912672335573643, "loss": 3.4214, "step": 4713 }, { "epoch": 0.23, "grad_norm": 0.5213540196418762, "learning_rate": 0.0005912635454375861, "loss": 3.6434, "step": 4714 }, { "epoch": 0.23, "grad_norm": 0.667456328868866, "learning_rate": 0.0005912598565506758, "loss": 3.4364, "step": 4715 }, { "epoch": 0.23, "grad_norm": 0.5707606077194214, "learning_rate": 0.0005912561668966431, "loss": 3.4236, "step": 4716 }, { "epoch": 0.23, "grad_norm": 0.551876962184906, "learning_rate": 0.0005912524764754976, "loss": 3.4234, "step": 4717 }, { "epoch": 0.23, "grad_norm": 0.5813961029052734, "learning_rate": 0.0005912487852872492, "loss": 3.4069, "step": 4718 }, { "epoch": 0.23, "grad_norm": 0.5820927023887634, "learning_rate": 0.0005912450933319075, "loss": 3.429, "step": 4719 }, { "epoch": 0.23, "grad_norm": 0.6033832430839539, "learning_rate": 0.0005912414006094824, "loss": 3.5748, "step": 4720 }, { "epoch": 0.23, "grad_norm": 0.5736304521560669, "learning_rate": 0.0005912377071199834, "loss": 3.2263, "step": 4721 }, { "epoch": 0.23, "grad_norm": 0.555508553981781, "learning_rate": 0.0005912340128634205, "loss": 3.3534, "step": 4722 }, { "epoch": 0.23, "grad_norm": 0.5986608266830444, "learning_rate": 0.000591230317839803, "loss": 3.4823, "step": 4723 }, { "epoch": 0.23, "grad_norm": 0.6043596267700195, "learning_rate": 0.000591226622049141, "loss": 3.4663, "step": 4724 }, { "epoch": 0.23, "grad_norm": 0.540687620639801, "learning_rate": 0.0005912229254914442, "loss": 3.3754, "step": 4725 }, { "epoch": 0.23, "grad_norm": 0.6195804476737976, "learning_rate": 0.0005912192281667221, "loss": 3.39, "step": 4726 }, { "epoch": 0.23, "grad_norm": 0.5378120541572571, "learning_rate": 0.0005912155300749846, "loss": 3.5522, "step": 4727 }, { "epoch": 0.23, "grad_norm": 0.5554460287094116, "learning_rate": 0.0005912118312162416, "loss": 3.3615, "step": 4728 }, { "epoch": 0.23, "grad_norm": 0.5363547205924988, "learning_rate": 0.0005912081315905026, "loss": 3.3284, "step": 4729 }, { "epoch": 0.23, "grad_norm": 0.6088721752166748, "learning_rate": 0.0005912044311977775, "loss": 3.4141, "step": 4730 }, { "epoch": 0.23, "grad_norm": 0.5486693382263184, "learning_rate": 0.0005912007300380758, "loss": 3.3685, "step": 4731 }, { "epoch": 0.23, "grad_norm": 0.558586597442627, "learning_rate": 0.0005911970281114075, "loss": 3.4063, "step": 4732 }, { "epoch": 0.23, "grad_norm": 0.5668739676475525, "learning_rate": 0.0005911933254177824, "loss": 3.3792, "step": 4733 }, { "epoch": 0.23, "grad_norm": 0.5825808644294739, "learning_rate": 0.00059118962195721, "loss": 3.6577, "step": 4734 }, { "epoch": 0.23, "grad_norm": 0.5412962436676025, "learning_rate": 0.0005911859177297002, "loss": 3.2624, "step": 4735 }, { "epoch": 0.23, "grad_norm": 0.5681858658790588, "learning_rate": 0.0005911822127352628, "loss": 3.3996, "step": 4736 }, { "epoch": 0.23, "grad_norm": 0.6266666650772095, "learning_rate": 0.0005911785069739073, "loss": 3.5886, "step": 4737 }, { "epoch": 0.23, "grad_norm": 0.5447356700897217, "learning_rate": 0.0005911748004456438, "loss": 3.3494, "step": 4738 }, { "epoch": 0.23, "grad_norm": 0.5406894683837891, "learning_rate": 0.0005911710931504818, "loss": 3.4121, "step": 4739 }, { "epoch": 0.23, "grad_norm": 0.5392957329750061, "learning_rate": 0.0005911673850884313, "loss": 3.3898, "step": 4740 }, { "epoch": 0.23, "grad_norm": 0.5871279239654541, "learning_rate": 0.000591163676259502, "loss": 3.081, "step": 4741 }, { "epoch": 0.23, "grad_norm": 0.6096645593643188, "learning_rate": 0.0005911599666637035, "loss": 3.508, "step": 4742 }, { "epoch": 0.23, "grad_norm": 0.5398980379104614, "learning_rate": 0.0005911562563010457, "loss": 3.5063, "step": 4743 }, { "epoch": 0.23, "grad_norm": 0.5835353136062622, "learning_rate": 0.0005911525451715383, "loss": 3.477, "step": 4744 }, { "epoch": 0.23, "grad_norm": 0.5687283277511597, "learning_rate": 0.0005911488332751911, "loss": 3.4038, "step": 4745 }, { "epoch": 0.23, "grad_norm": 0.5824450254440308, "learning_rate": 0.000591145120612014, "loss": 3.5479, "step": 4746 }, { "epoch": 0.23, "grad_norm": 0.5424936413764954, "learning_rate": 0.0005911414071820167, "loss": 3.4889, "step": 4747 }, { "epoch": 0.23, "grad_norm": 0.5513694882392883, "learning_rate": 0.000591137692985209, "loss": 3.4297, "step": 4748 }, { "epoch": 0.23, "grad_norm": 0.5256475210189819, "learning_rate": 0.0005911339780216005, "loss": 3.6925, "step": 4749 }, { "epoch": 0.23, "grad_norm": 0.5420495867729187, "learning_rate": 0.0005911302622912013, "loss": 3.6353, "step": 4750 }, { "epoch": 0.23, "grad_norm": 0.5440043807029724, "learning_rate": 0.000591126545794021, "loss": 3.3512, "step": 4751 }, { "epoch": 0.23, "grad_norm": 0.5715298652648926, "learning_rate": 0.0005911228285300692, "loss": 3.3806, "step": 4752 }, { "epoch": 0.23, "grad_norm": 0.5849560499191284, "learning_rate": 0.0005911191104993561, "loss": 3.4816, "step": 4753 }, { "epoch": 0.23, "grad_norm": 0.5608013272285461, "learning_rate": 0.0005911153917018912, "loss": 3.527, "step": 4754 }, { "epoch": 0.23, "grad_norm": 0.584721028804779, "learning_rate": 0.0005911116721376844, "loss": 3.284, "step": 4755 }, { "epoch": 0.23, "grad_norm": 0.5911849737167358, "learning_rate": 0.0005911079518067455, "loss": 3.4843, "step": 4756 }, { "epoch": 0.23, "grad_norm": 0.569872260093689, "learning_rate": 0.0005911042307090843, "loss": 3.5176, "step": 4757 }, { "epoch": 0.23, "grad_norm": 0.6122729778289795, "learning_rate": 0.0005911005088447105, "loss": 3.6845, "step": 4758 }, { "epoch": 0.23, "grad_norm": 0.552301287651062, "learning_rate": 0.0005910967862136341, "loss": 3.5472, "step": 4759 }, { "epoch": 0.23, "grad_norm": 0.5530393719673157, "learning_rate": 0.0005910930628158647, "loss": 3.6068, "step": 4760 }, { "epoch": 0.23, "grad_norm": 0.5497402548789978, "learning_rate": 0.0005910893386514122, "loss": 3.3727, "step": 4761 }, { "epoch": 0.23, "grad_norm": 0.6003914475440979, "learning_rate": 0.0005910856137202865, "loss": 3.5103, "step": 4762 }, { "epoch": 0.23, "grad_norm": 0.5288448333740234, "learning_rate": 0.0005910818880224973, "loss": 3.4836, "step": 4763 }, { "epoch": 0.23, "grad_norm": 0.5383844375610352, "learning_rate": 0.0005910781615580542, "loss": 3.3261, "step": 4764 }, { "epoch": 0.23, "grad_norm": 0.5371490120887756, "learning_rate": 0.0005910744343269674, "loss": 3.5052, "step": 4765 }, { "epoch": 0.23, "grad_norm": 0.5520905256271362, "learning_rate": 0.0005910707063292466, "loss": 3.5325, "step": 4766 }, { "epoch": 0.23, "grad_norm": 0.5796260237693787, "learning_rate": 0.0005910669775649016, "loss": 3.1702, "step": 4767 }, { "epoch": 0.23, "grad_norm": 0.6260196566581726, "learning_rate": 0.0005910632480339421, "loss": 3.4242, "step": 4768 }, { "epoch": 0.23, "grad_norm": 0.5613893270492554, "learning_rate": 0.0005910595177363781, "loss": 3.6865, "step": 4769 }, { "epoch": 0.23, "grad_norm": 0.5682705044746399, "learning_rate": 0.0005910557866722193, "loss": 3.1459, "step": 4770 }, { "epoch": 0.23, "grad_norm": 0.6285190582275391, "learning_rate": 0.0005910520548414756, "loss": 3.3947, "step": 4771 }, { "epoch": 0.23, "grad_norm": 0.5714618563652039, "learning_rate": 0.0005910483222441568, "loss": 3.5817, "step": 4772 }, { "epoch": 0.23, "grad_norm": 0.5577132105827332, "learning_rate": 0.0005910445888802727, "loss": 3.4281, "step": 4773 }, { "epoch": 0.23, "grad_norm": 0.6071662306785583, "learning_rate": 0.0005910408547498332, "loss": 3.3626, "step": 4774 }, { "epoch": 0.23, "grad_norm": 0.5966734886169434, "learning_rate": 0.000591037119852848, "loss": 3.4659, "step": 4775 }, { "epoch": 0.23, "grad_norm": 0.5586175322532654, "learning_rate": 0.0005910333841893271, "loss": 3.3256, "step": 4776 }, { "epoch": 0.23, "grad_norm": 0.5855554938316345, "learning_rate": 0.0005910296477592803, "loss": 3.7103, "step": 4777 }, { "epoch": 0.23, "grad_norm": 0.52793288230896, "learning_rate": 0.0005910259105627174, "loss": 3.6772, "step": 4778 }, { "epoch": 0.23, "grad_norm": 0.5505519509315491, "learning_rate": 0.0005910221725996483, "loss": 3.278, "step": 4779 }, { "epoch": 0.23, "grad_norm": 0.5763952136039734, "learning_rate": 0.0005910184338700828, "loss": 3.4688, "step": 4780 }, { "epoch": 0.23, "grad_norm": 0.6565443873405457, "learning_rate": 0.0005910146943740308, "loss": 3.5173, "step": 4781 }, { "epoch": 0.23, "grad_norm": 0.582163393497467, "learning_rate": 0.0005910109541115019, "loss": 3.1918, "step": 4782 }, { "epoch": 0.23, "grad_norm": 0.5900031328201294, "learning_rate": 0.0005910072130825063, "loss": 3.4327, "step": 4783 }, { "epoch": 0.23, "grad_norm": 0.6079528331756592, "learning_rate": 0.0005910034712870538, "loss": 3.4477, "step": 4784 }, { "epoch": 0.23, "grad_norm": 0.5783188939094543, "learning_rate": 0.0005909997287251539, "loss": 3.5502, "step": 4785 }, { "epoch": 0.23, "grad_norm": 0.5941360592842102, "learning_rate": 0.000590995985396817, "loss": 3.351, "step": 4786 }, { "epoch": 0.23, "grad_norm": 0.6161750555038452, "learning_rate": 0.0005909922413020525, "loss": 3.5587, "step": 4787 }, { "epoch": 0.23, "grad_norm": 0.5669690370559692, "learning_rate": 0.0005909884964408705, "loss": 3.5122, "step": 4788 }, { "epoch": 0.23, "grad_norm": 0.5724545121192932, "learning_rate": 0.0005909847508132808, "loss": 3.3534, "step": 4789 }, { "epoch": 0.23, "grad_norm": 0.5895712375640869, "learning_rate": 0.0005909810044192932, "loss": 3.4805, "step": 4790 }, { "epoch": 0.23, "grad_norm": 0.542855441570282, "learning_rate": 0.0005909772572589177, "loss": 3.5939, "step": 4791 }, { "epoch": 0.23, "grad_norm": 0.5481388568878174, "learning_rate": 0.0005909735093321642, "loss": 3.4121, "step": 4792 }, { "epoch": 0.23, "grad_norm": 0.5825479626655579, "learning_rate": 0.0005909697606390423, "loss": 3.5954, "step": 4793 }, { "epoch": 0.23, "grad_norm": 0.5931483507156372, "learning_rate": 0.0005909660111795621, "loss": 3.2799, "step": 4794 }, { "epoch": 0.23, "grad_norm": 0.530947208404541, "learning_rate": 0.0005909622609537334, "loss": 3.5938, "step": 4795 }, { "epoch": 0.24, "grad_norm": 0.5414092540740967, "learning_rate": 0.0005909585099615662, "loss": 3.6877, "step": 4796 }, { "epoch": 0.24, "grad_norm": 0.5826252698898315, "learning_rate": 0.0005909547582030702, "loss": 3.4782, "step": 4797 }, { "epoch": 0.24, "grad_norm": 0.5572245717048645, "learning_rate": 0.0005909510056782554, "loss": 3.3905, "step": 4798 }, { "epoch": 0.24, "grad_norm": 0.564272403717041, "learning_rate": 0.0005909472523871317, "loss": 3.5094, "step": 4799 }, { "epoch": 0.24, "grad_norm": 0.5508630871772766, "learning_rate": 0.0005909434983297089, "loss": 3.501, "step": 4800 }, { "epoch": 0.24, "grad_norm": 0.5623564720153809, "learning_rate": 0.0005909397435059967, "loss": 3.3671, "step": 4801 }, { "epoch": 0.24, "grad_norm": 0.5778913497924805, "learning_rate": 0.0005909359879160055, "loss": 3.5242, "step": 4802 }, { "epoch": 0.24, "grad_norm": 0.5555720329284668, "learning_rate": 0.0005909322315597448, "loss": 3.4082, "step": 4803 }, { "epoch": 0.24, "grad_norm": 0.5474628806114197, "learning_rate": 0.0005909284744372245, "loss": 3.3297, "step": 4804 }, { "epoch": 0.24, "grad_norm": 0.534807026386261, "learning_rate": 0.0005909247165484547, "loss": 3.3332, "step": 4805 }, { "epoch": 0.24, "grad_norm": 0.524055540561676, "learning_rate": 0.0005909209578934452, "loss": 3.583, "step": 4806 }, { "epoch": 0.24, "grad_norm": 0.5375897288322449, "learning_rate": 0.0005909171984722058, "loss": 3.4536, "step": 4807 }, { "epoch": 0.24, "grad_norm": 0.5646499395370483, "learning_rate": 0.0005909134382847466, "loss": 3.3704, "step": 4808 }, { "epoch": 0.24, "grad_norm": 0.5710200071334839, "learning_rate": 0.0005909096773310773, "loss": 3.5032, "step": 4809 }, { "epoch": 0.24, "grad_norm": 0.5503785610198975, "learning_rate": 0.000590905915611208, "loss": 3.4326, "step": 4810 }, { "epoch": 0.24, "grad_norm": 0.5581929087638855, "learning_rate": 0.0005909021531251483, "loss": 3.5753, "step": 4811 }, { "epoch": 0.24, "grad_norm": 0.6241434216499329, "learning_rate": 0.0005908983898729084, "loss": 3.4697, "step": 4812 }, { "epoch": 0.24, "grad_norm": 0.6147471070289612, "learning_rate": 0.0005908946258544982, "loss": 3.2685, "step": 4813 }, { "epoch": 0.24, "grad_norm": 0.6412365436553955, "learning_rate": 0.0005908908610699275, "loss": 3.4211, "step": 4814 }, { "epoch": 0.24, "grad_norm": 0.5611572861671448, "learning_rate": 0.0005908870955192064, "loss": 3.4937, "step": 4815 }, { "epoch": 0.24, "grad_norm": 0.5360367298126221, "learning_rate": 0.0005908833292023445, "loss": 3.6016, "step": 4816 }, { "epoch": 0.24, "grad_norm": 0.546721339225769, "learning_rate": 0.0005908795621193519, "loss": 3.2601, "step": 4817 }, { "epoch": 0.24, "grad_norm": 0.5728215575218201, "learning_rate": 0.0005908757942702385, "loss": 3.4103, "step": 4818 }, { "epoch": 0.24, "grad_norm": 0.556920051574707, "learning_rate": 0.0005908720256550143, "loss": 3.3727, "step": 4819 }, { "epoch": 0.24, "grad_norm": 0.5450972318649292, "learning_rate": 0.0005908682562736892, "loss": 3.4042, "step": 4820 }, { "epoch": 0.24, "grad_norm": 0.5655497908592224, "learning_rate": 0.000590864486126273, "loss": 3.3996, "step": 4821 }, { "epoch": 0.24, "grad_norm": 0.5585508942604065, "learning_rate": 0.0005908607152127757, "loss": 3.4254, "step": 4822 }, { "epoch": 0.24, "grad_norm": 0.542323648929596, "learning_rate": 0.0005908569435332072, "loss": 3.457, "step": 4823 }, { "epoch": 0.24, "grad_norm": 0.5362521409988403, "learning_rate": 0.0005908531710875777, "loss": 3.5526, "step": 4824 }, { "epoch": 0.24, "grad_norm": 0.5256375670433044, "learning_rate": 0.0005908493978758969, "loss": 3.4572, "step": 4825 }, { "epoch": 0.24, "grad_norm": 0.5696166157722473, "learning_rate": 0.0005908456238981747, "loss": 3.4514, "step": 4826 }, { "epoch": 0.24, "grad_norm": 0.5578446984291077, "learning_rate": 0.000590841849154421, "loss": 3.5851, "step": 4827 }, { "epoch": 0.24, "grad_norm": 0.5300297737121582, "learning_rate": 0.0005908380736446459, "loss": 3.5034, "step": 4828 }, { "epoch": 0.24, "grad_norm": 0.5648180246353149, "learning_rate": 0.0005908342973688594, "loss": 3.599, "step": 4829 }, { "epoch": 0.24, "grad_norm": 0.6014158129692078, "learning_rate": 0.0005908305203270712, "loss": 3.4826, "step": 4830 }, { "epoch": 0.24, "grad_norm": 0.5411103367805481, "learning_rate": 0.0005908267425192914, "loss": 3.4111, "step": 4831 }, { "epoch": 0.24, "grad_norm": 0.5625185370445251, "learning_rate": 0.00059082296394553, "loss": 3.403, "step": 4832 }, { "epoch": 0.24, "grad_norm": 0.6025658845901489, "learning_rate": 0.0005908191846057968, "loss": 3.329, "step": 4833 }, { "epoch": 0.24, "grad_norm": 0.5506299734115601, "learning_rate": 0.0005908154045001019, "loss": 3.6375, "step": 4834 }, { "epoch": 0.24, "grad_norm": 0.5933164358139038, "learning_rate": 0.0005908116236284551, "loss": 3.2049, "step": 4835 }, { "epoch": 0.24, "grad_norm": 0.5729756951332092, "learning_rate": 0.0005908078419908666, "loss": 3.3538, "step": 4836 }, { "epoch": 0.24, "grad_norm": 0.5527672171592712, "learning_rate": 0.000590804059587346, "loss": 3.3438, "step": 4837 }, { "epoch": 0.24, "grad_norm": 0.5585240721702576, "learning_rate": 0.0005908002764179037, "loss": 3.5065, "step": 4838 }, { "epoch": 0.24, "grad_norm": 0.5482339262962341, "learning_rate": 0.0005907964924825493, "loss": 3.425, "step": 4839 }, { "epoch": 0.24, "grad_norm": 0.5115832686424255, "learning_rate": 0.000590792707781293, "loss": 3.4516, "step": 4840 }, { "epoch": 0.24, "grad_norm": 0.5796265006065369, "learning_rate": 0.0005907889223141446, "loss": 3.4701, "step": 4841 }, { "epoch": 0.24, "grad_norm": 0.5543243288993835, "learning_rate": 0.0005907851360811142, "loss": 3.2507, "step": 4842 }, { "epoch": 0.24, "grad_norm": 0.5635089874267578, "learning_rate": 0.0005907813490822116, "loss": 3.3394, "step": 4843 }, { "epoch": 0.24, "grad_norm": 0.5561545491218567, "learning_rate": 0.000590777561317447, "loss": 3.3604, "step": 4844 }, { "epoch": 0.24, "grad_norm": 0.5824812650680542, "learning_rate": 0.0005907737727868302, "loss": 3.5275, "step": 4845 }, { "epoch": 0.24, "grad_norm": 0.5519053339958191, "learning_rate": 0.0005907699834903713, "loss": 3.5574, "step": 4846 }, { "epoch": 0.24, "grad_norm": 0.5963732004165649, "learning_rate": 0.0005907661934280801, "loss": 3.4281, "step": 4847 }, { "epoch": 0.24, "grad_norm": 0.5388736724853516, "learning_rate": 0.0005907624025999669, "loss": 3.3095, "step": 4848 }, { "epoch": 0.24, "grad_norm": 0.5201981067657471, "learning_rate": 0.0005907586110060414, "loss": 3.2326, "step": 4849 }, { "epoch": 0.24, "grad_norm": 0.5812944769859314, "learning_rate": 0.0005907548186463137, "loss": 3.4784, "step": 4850 }, { "epoch": 0.24, "grad_norm": 0.5888471603393555, "learning_rate": 0.0005907510255207936, "loss": 3.5884, "step": 4851 }, { "epoch": 0.24, "grad_norm": 0.5519949793815613, "learning_rate": 0.0005907472316294914, "loss": 3.3247, "step": 4852 }, { "epoch": 0.24, "grad_norm": 0.5584381818771362, "learning_rate": 0.0005907434369724169, "loss": 3.4411, "step": 4853 }, { "epoch": 0.24, "grad_norm": 0.5249735713005066, "learning_rate": 0.0005907396415495802, "loss": 3.4842, "step": 4854 }, { "epoch": 0.24, "grad_norm": 0.5697634816169739, "learning_rate": 0.0005907358453609912, "loss": 3.5065, "step": 4855 }, { "epoch": 0.24, "grad_norm": 0.5187658071517944, "learning_rate": 0.00059073204840666, "loss": 3.3634, "step": 4856 }, { "epoch": 0.24, "grad_norm": 0.5130595564842224, "learning_rate": 0.0005907282506865965, "loss": 3.2991, "step": 4857 }, { "epoch": 0.24, "grad_norm": 0.5928294658660889, "learning_rate": 0.0005907244522008107, "loss": 3.4294, "step": 4858 }, { "epoch": 0.24, "grad_norm": 0.5565031170845032, "learning_rate": 0.0005907206529493127, "loss": 3.7071, "step": 4859 }, { "epoch": 0.24, "grad_norm": 0.5738771557807922, "learning_rate": 0.0005907168529321124, "loss": 3.3815, "step": 4860 }, { "epoch": 0.24, "grad_norm": 0.5372669696807861, "learning_rate": 0.0005907130521492198, "loss": 3.4133, "step": 4861 }, { "epoch": 0.24, "grad_norm": 0.7971889972686768, "learning_rate": 0.0005907092506006452, "loss": 3.5911, "step": 4862 }, { "epoch": 0.24, "grad_norm": 0.6166131496429443, "learning_rate": 0.0005907054482863981, "loss": 3.4924, "step": 4863 }, { "epoch": 0.24, "grad_norm": 0.6117991209030151, "learning_rate": 0.0005907016452064889, "loss": 3.4626, "step": 4864 }, { "epoch": 0.24, "grad_norm": 0.5576360821723938, "learning_rate": 0.0005906978413609276, "loss": 3.4898, "step": 4865 }, { "epoch": 0.24, "grad_norm": 0.5783229470252991, "learning_rate": 0.000590694036749724, "loss": 3.3548, "step": 4866 }, { "epoch": 0.24, "grad_norm": 0.5609897375106812, "learning_rate": 0.0005906902313728885, "loss": 3.3822, "step": 4867 }, { "epoch": 0.24, "grad_norm": 0.5815325379371643, "learning_rate": 0.0005906864252304305, "loss": 3.5328, "step": 4868 }, { "epoch": 0.24, "grad_norm": 0.5757078528404236, "learning_rate": 0.0005906826183223607, "loss": 3.4499, "step": 4869 }, { "epoch": 0.24, "grad_norm": 0.5703591108322144, "learning_rate": 0.0005906788106486888, "loss": 3.4756, "step": 4870 }, { "epoch": 0.24, "grad_norm": 0.551869809627533, "learning_rate": 0.0005906750022094246, "loss": 3.6005, "step": 4871 }, { "epoch": 0.24, "grad_norm": 0.643150269985199, "learning_rate": 0.0005906711930045787, "loss": 3.4636, "step": 4872 }, { "epoch": 0.24, "grad_norm": 0.5789183378219604, "learning_rate": 0.0005906673830341606, "loss": 3.5697, "step": 4873 }, { "epoch": 0.24, "grad_norm": 0.5472760796546936, "learning_rate": 0.0005906635722981807, "loss": 3.5206, "step": 4874 }, { "epoch": 0.24, "grad_norm": 0.6037294268608093, "learning_rate": 0.0005906597607966488, "loss": 3.4146, "step": 4875 }, { "epoch": 0.24, "grad_norm": 0.5475078225135803, "learning_rate": 0.000590655948529575, "loss": 3.5947, "step": 4876 }, { "epoch": 0.24, "grad_norm": 0.5711190700531006, "learning_rate": 0.0005906521354969695, "loss": 3.2974, "step": 4877 }, { "epoch": 0.24, "grad_norm": 0.5613605380058289, "learning_rate": 0.0005906483216988421, "loss": 3.2993, "step": 4878 }, { "epoch": 0.24, "grad_norm": 0.5774216055870056, "learning_rate": 0.000590644507135203, "loss": 3.3731, "step": 4879 }, { "epoch": 0.24, "grad_norm": 0.5997605323791504, "learning_rate": 0.0005906406918060622, "loss": 3.5623, "step": 4880 }, { "epoch": 0.24, "grad_norm": 0.5883737206459045, "learning_rate": 0.0005906368757114298, "loss": 3.6096, "step": 4881 }, { "epoch": 0.24, "grad_norm": 0.5710089802742004, "learning_rate": 0.0005906330588513157, "loss": 3.4704, "step": 4882 }, { "epoch": 0.24, "grad_norm": 0.516525149345398, "learning_rate": 0.0005906292412257301, "loss": 3.4233, "step": 4883 }, { "epoch": 0.24, "grad_norm": 0.585911750793457, "learning_rate": 0.000590625422834683, "loss": 3.4795, "step": 4884 }, { "epoch": 0.24, "grad_norm": 0.5662615299224854, "learning_rate": 0.0005906216036781845, "loss": 3.5267, "step": 4885 }, { "epoch": 0.24, "grad_norm": 0.5360583066940308, "learning_rate": 0.0005906177837562446, "loss": 3.7512, "step": 4886 }, { "epoch": 0.24, "grad_norm": 0.5965997576713562, "learning_rate": 0.0005906139630688735, "loss": 3.4446, "step": 4887 }, { "epoch": 0.24, "grad_norm": 0.5111775994300842, "learning_rate": 0.0005906101416160809, "loss": 3.4171, "step": 4888 }, { "epoch": 0.24, "grad_norm": 0.5461533069610596, "learning_rate": 0.0005906063193978773, "loss": 3.5264, "step": 4889 }, { "epoch": 0.24, "grad_norm": 0.543048620223999, "learning_rate": 0.0005906024964142726, "loss": 3.2279, "step": 4890 }, { "epoch": 0.24, "grad_norm": 0.5315186977386475, "learning_rate": 0.0005905986726652769, "loss": 3.5112, "step": 4891 }, { "epoch": 0.24, "grad_norm": 0.5514733791351318, "learning_rate": 0.0005905948481509, "loss": 3.1409, "step": 4892 }, { "epoch": 0.24, "grad_norm": 0.5441703200340271, "learning_rate": 0.0005905910228711524, "loss": 3.3357, "step": 4893 }, { "epoch": 0.24, "grad_norm": 0.5863307118415833, "learning_rate": 0.0005905871968260438, "loss": 3.5144, "step": 4894 }, { "epoch": 0.24, "grad_norm": 0.5528578162193298, "learning_rate": 0.0005905833700155847, "loss": 3.3631, "step": 4895 }, { "epoch": 0.24, "grad_norm": 0.565290093421936, "learning_rate": 0.0005905795424397848, "loss": 3.3386, "step": 4896 }, { "epoch": 0.24, "grad_norm": 0.5290336012840271, "learning_rate": 0.0005905757140986542, "loss": 3.6237, "step": 4897 }, { "epoch": 0.24, "grad_norm": 0.5270799398422241, "learning_rate": 0.0005905718849922031, "loss": 3.4527, "step": 4898 }, { "epoch": 0.24, "grad_norm": 0.5542150735855103, "learning_rate": 0.0005905680551204416, "loss": 3.3004, "step": 4899 }, { "epoch": 0.24, "grad_norm": 0.5722207427024841, "learning_rate": 0.0005905642244833799, "loss": 3.4647, "step": 4900 }, { "epoch": 0.24, "grad_norm": 0.5691162943840027, "learning_rate": 0.000590560393081028, "loss": 3.4091, "step": 4901 }, { "epoch": 0.24, "grad_norm": 0.5475846529006958, "learning_rate": 0.0005905565609133957, "loss": 3.6877, "step": 4902 }, { "epoch": 0.24, "grad_norm": 0.5672742128372192, "learning_rate": 0.0005905527279804934, "loss": 3.4401, "step": 4903 }, { "epoch": 0.24, "grad_norm": 0.49786025285720825, "learning_rate": 0.0005905488942823312, "loss": 3.4634, "step": 4904 }, { "epoch": 0.24, "grad_norm": 0.5762016773223877, "learning_rate": 0.0005905450598189191, "loss": 3.3126, "step": 4905 }, { "epoch": 0.24, "grad_norm": 0.5646076798439026, "learning_rate": 0.0005905412245902673, "loss": 3.4117, "step": 4906 }, { "epoch": 0.24, "grad_norm": 0.6327397227287292, "learning_rate": 0.0005905373885963857, "loss": 3.3765, "step": 4907 }, { "epoch": 0.24, "grad_norm": 0.5711735486984253, "learning_rate": 0.0005905335518372846, "loss": 3.3918, "step": 4908 }, { "epoch": 0.24, "grad_norm": 0.5844196677207947, "learning_rate": 0.0005905297143129741, "loss": 3.3744, "step": 4909 }, { "epoch": 0.24, "grad_norm": 0.603782057762146, "learning_rate": 0.0005905258760234641, "loss": 3.3061, "step": 4910 }, { "epoch": 0.24, "grad_norm": 0.5405983328819275, "learning_rate": 0.000590522036968765, "loss": 3.6347, "step": 4911 }, { "epoch": 0.24, "grad_norm": 0.5765902400016785, "learning_rate": 0.0005905181971488867, "loss": 3.4907, "step": 4912 }, { "epoch": 0.24, "grad_norm": 0.6272974014282227, "learning_rate": 0.0005905143565638394, "loss": 3.2816, "step": 4913 }, { "epoch": 0.24, "grad_norm": 0.57854825258255, "learning_rate": 0.0005905105152136332, "loss": 3.4262, "step": 4914 }, { "epoch": 0.24, "grad_norm": 0.578022301197052, "learning_rate": 0.0005905066730982782, "loss": 3.321, "step": 4915 }, { "epoch": 0.24, "grad_norm": 0.5480340123176575, "learning_rate": 0.0005905028302177845, "loss": 3.5696, "step": 4916 }, { "epoch": 0.24, "grad_norm": 0.529426634311676, "learning_rate": 0.0005904989865721622, "loss": 3.4581, "step": 4917 }, { "epoch": 0.24, "grad_norm": 0.523251473903656, "learning_rate": 0.0005904951421614216, "loss": 3.5019, "step": 4918 }, { "epoch": 0.24, "grad_norm": 0.5666603446006775, "learning_rate": 0.0005904912969855727, "loss": 3.5147, "step": 4919 }, { "epoch": 0.24, "grad_norm": 0.6100955605506897, "learning_rate": 0.0005904874510446256, "loss": 3.4469, "step": 4920 }, { "epoch": 0.24, "grad_norm": 0.5637255907058716, "learning_rate": 0.0005904836043385905, "loss": 3.3005, "step": 4921 }, { "epoch": 0.24, "grad_norm": 0.5765202641487122, "learning_rate": 0.0005904797568674774, "loss": 3.4327, "step": 4922 }, { "epoch": 0.24, "grad_norm": 0.5553877353668213, "learning_rate": 0.0005904759086312965, "loss": 3.3015, "step": 4923 }, { "epoch": 0.24, "grad_norm": 0.5453650951385498, "learning_rate": 0.0005904720596300581, "loss": 3.4105, "step": 4924 }, { "epoch": 0.24, "grad_norm": 0.5456913113594055, "learning_rate": 0.0005904682098637721, "loss": 3.4166, "step": 4925 }, { "epoch": 0.24, "grad_norm": 0.5444839596748352, "learning_rate": 0.0005904643593324487, "loss": 3.5552, "step": 4926 }, { "epoch": 0.24, "grad_norm": 0.5601552724838257, "learning_rate": 0.0005904605080360982, "loss": 3.5365, "step": 4927 }, { "epoch": 0.24, "grad_norm": 0.5500659346580505, "learning_rate": 0.0005904566559747305, "loss": 3.4039, "step": 4928 }, { "epoch": 0.24, "grad_norm": 0.5786320567131042, "learning_rate": 0.000590452803148356, "loss": 3.4035, "step": 4929 }, { "epoch": 0.24, "grad_norm": 0.5525652766227722, "learning_rate": 0.0005904489495569846, "loss": 3.2293, "step": 4930 }, { "epoch": 0.24, "grad_norm": 0.5330731272697449, "learning_rate": 0.0005904450952006267, "loss": 3.5334, "step": 4931 }, { "epoch": 0.24, "grad_norm": 0.551021158695221, "learning_rate": 0.0005904412400792921, "loss": 3.4606, "step": 4932 }, { "epoch": 0.24, "grad_norm": 0.5765814781188965, "learning_rate": 0.0005904373841929913, "loss": 3.5593, "step": 4933 }, { "epoch": 0.24, "grad_norm": 0.5668351054191589, "learning_rate": 0.0005904335275417344, "loss": 3.3573, "step": 4934 }, { "epoch": 0.24, "grad_norm": 0.5682421326637268, "learning_rate": 0.0005904296701255314, "loss": 3.3952, "step": 4935 }, { "epoch": 0.24, "grad_norm": 0.5964768528938293, "learning_rate": 0.0005904258119443925, "loss": 3.5121, "step": 4936 }, { "epoch": 0.24, "grad_norm": 0.5435137748718262, "learning_rate": 0.000590421952998328, "loss": 3.4751, "step": 4937 }, { "epoch": 0.24, "grad_norm": 0.5703478455543518, "learning_rate": 0.0005904180932873479, "loss": 3.3983, "step": 4938 }, { "epoch": 0.24, "grad_norm": 0.5706227421760559, "learning_rate": 0.0005904142328114624, "loss": 3.2301, "step": 4939 }, { "epoch": 0.24, "grad_norm": 0.5686094164848328, "learning_rate": 0.0005904103715706819, "loss": 3.5421, "step": 4940 }, { "epoch": 0.24, "grad_norm": 0.6083675026893616, "learning_rate": 0.0005904065095650162, "loss": 3.4906, "step": 4941 }, { "epoch": 0.24, "grad_norm": 0.6154701113700867, "learning_rate": 0.0005904026467944757, "loss": 3.319, "step": 4942 }, { "epoch": 0.24, "grad_norm": 0.589542031288147, "learning_rate": 0.0005903987832590703, "loss": 3.4153, "step": 4943 }, { "epoch": 0.24, "grad_norm": 0.5331887602806091, "learning_rate": 0.0005903949189588107, "loss": 3.37, "step": 4944 }, { "epoch": 0.24, "grad_norm": 0.5606480240821838, "learning_rate": 0.0005903910538937067, "loss": 3.4934, "step": 4945 }, { "epoch": 0.24, "grad_norm": 0.6116060614585876, "learning_rate": 0.0005903871880637684, "loss": 3.5101, "step": 4946 }, { "epoch": 0.24, "grad_norm": 0.5732858180999756, "learning_rate": 0.0005903833214690064, "loss": 3.4792, "step": 4947 }, { "epoch": 0.24, "grad_norm": 0.5375874638557434, "learning_rate": 0.0005903794541094304, "loss": 3.619, "step": 4948 }, { "epoch": 0.24, "grad_norm": 0.552217423915863, "learning_rate": 0.0005903755859850508, "loss": 3.3901, "step": 4949 }, { "epoch": 0.24, "grad_norm": 0.544503927230835, "learning_rate": 0.0005903717170958779, "loss": 3.3962, "step": 4950 }, { "epoch": 0.24, "grad_norm": 0.5237309336662292, "learning_rate": 0.0005903678474419217, "loss": 3.4463, "step": 4951 }, { "epoch": 0.24, "grad_norm": 0.5415375828742981, "learning_rate": 0.0005903639770231925, "loss": 3.2885, "step": 4952 }, { "epoch": 0.24, "grad_norm": 0.5329828858375549, "learning_rate": 0.0005903601058397005, "loss": 3.2622, "step": 4953 }, { "epoch": 0.24, "grad_norm": 0.5593845248222351, "learning_rate": 0.0005903562338914558, "loss": 3.6063, "step": 4954 }, { "epoch": 0.24, "grad_norm": 0.689546525478363, "learning_rate": 0.0005903523611784687, "loss": 3.3436, "step": 4955 }, { "epoch": 0.24, "grad_norm": 0.5579712986946106, "learning_rate": 0.0005903484877007493, "loss": 3.392, "step": 4956 }, { "epoch": 0.24, "grad_norm": 0.5876078009605408, "learning_rate": 0.0005903446134583079, "loss": 3.5828, "step": 4957 }, { "epoch": 0.24, "grad_norm": 0.6026819348335266, "learning_rate": 0.0005903407384511547, "loss": 3.5553, "step": 4958 }, { "epoch": 0.24, "grad_norm": 0.5198916792869568, "learning_rate": 0.0005903368626793, "loss": 3.4045, "step": 4959 }, { "epoch": 0.24, "grad_norm": 0.5764784216880798, "learning_rate": 0.0005903329861427537, "loss": 3.3433, "step": 4960 }, { "epoch": 0.24, "grad_norm": 0.5851700305938721, "learning_rate": 0.0005903291088415261, "loss": 3.554, "step": 4961 }, { "epoch": 0.24, "grad_norm": 0.5475395321846008, "learning_rate": 0.0005903252307756276, "loss": 3.3761, "step": 4962 }, { "epoch": 0.24, "grad_norm": 0.6040002107620239, "learning_rate": 0.0005903213519450683, "loss": 3.7124, "step": 4963 }, { "epoch": 0.24, "grad_norm": 0.5730305910110474, "learning_rate": 0.0005903174723498586, "loss": 3.4037, "step": 4964 }, { "epoch": 0.24, "grad_norm": 0.5729190111160278, "learning_rate": 0.0005903135919900084, "loss": 3.1942, "step": 4965 }, { "epoch": 0.24, "grad_norm": 0.6518261432647705, "learning_rate": 0.000590309710865528, "loss": 3.5579, "step": 4966 }, { "epoch": 0.24, "grad_norm": 0.5518261790275574, "learning_rate": 0.0005903058289764279, "loss": 3.6733, "step": 4967 }, { "epoch": 0.24, "grad_norm": 0.5408662557601929, "learning_rate": 0.000590301946322718, "loss": 3.2613, "step": 4968 }, { "epoch": 0.24, "grad_norm": 0.5802552700042725, "learning_rate": 0.0005902980629044085, "loss": 3.5049, "step": 4969 }, { "epoch": 0.24, "grad_norm": 0.5875983238220215, "learning_rate": 0.00059029417872151, "loss": 3.3783, "step": 4970 }, { "epoch": 0.24, "grad_norm": 0.5996633768081665, "learning_rate": 0.0005902902937740325, "loss": 3.516, "step": 4971 }, { "epoch": 0.24, "grad_norm": 0.5604908466339111, "learning_rate": 0.0005902864080619861, "loss": 3.4725, "step": 4972 }, { "epoch": 0.24, "grad_norm": 0.5704894065856934, "learning_rate": 0.0005902825215853812, "loss": 3.3333, "step": 4973 }, { "epoch": 0.24, "grad_norm": 0.5649724006652832, "learning_rate": 0.000590278634344228, "loss": 3.3508, "step": 4974 }, { "epoch": 0.24, "grad_norm": 0.5423358678817749, "learning_rate": 0.0005902747463385368, "loss": 3.4517, "step": 4975 }, { "epoch": 0.24, "grad_norm": 0.5924620032310486, "learning_rate": 0.0005902708575683177, "loss": 3.4768, "step": 4976 }, { "epoch": 0.24, "grad_norm": 0.5734094381332397, "learning_rate": 0.0005902669680335811, "loss": 3.3845, "step": 4977 }, { "epoch": 0.24, "grad_norm": 0.5569986701011658, "learning_rate": 0.0005902630777343372, "loss": 3.5556, "step": 4978 }, { "epoch": 0.24, "grad_norm": 0.555212676525116, "learning_rate": 0.0005902591866705962, "loss": 3.3604, "step": 4979 }, { "epoch": 0.24, "grad_norm": 0.5357699990272522, "learning_rate": 0.0005902552948423682, "loss": 3.4253, "step": 4980 }, { "epoch": 0.24, "grad_norm": 0.539989173412323, "learning_rate": 0.0005902514022496638, "loss": 3.2653, "step": 4981 }, { "epoch": 0.24, "grad_norm": 0.539340615272522, "learning_rate": 0.0005902475088924929, "loss": 3.6315, "step": 4982 }, { "epoch": 0.24, "grad_norm": 0.5658690929412842, "learning_rate": 0.0005902436147708661, "loss": 3.3263, "step": 4983 }, { "epoch": 0.24, "grad_norm": 0.5437087416648865, "learning_rate": 0.0005902397198847934, "loss": 3.3987, "step": 4984 }, { "epoch": 0.24, "grad_norm": 0.5789981484413147, "learning_rate": 0.0005902358242342851, "loss": 3.5408, "step": 4985 }, { "epoch": 0.24, "grad_norm": 0.5366504192352295, "learning_rate": 0.0005902319278193516, "loss": 3.4576, "step": 4986 }, { "epoch": 0.24, "grad_norm": 0.5638992190361023, "learning_rate": 0.000590228030640003, "loss": 3.4145, "step": 4987 }, { "epoch": 0.24, "grad_norm": 0.5469755530357361, "learning_rate": 0.0005902241326962496, "loss": 3.3786, "step": 4988 }, { "epoch": 0.24, "grad_norm": 0.5611241459846497, "learning_rate": 0.0005902202339881017, "loss": 3.4466, "step": 4989 }, { "epoch": 0.24, "grad_norm": 0.5863252282142639, "learning_rate": 0.0005902163345155696, "loss": 3.3491, "step": 4990 }, { "epoch": 0.24, "grad_norm": 0.577661395072937, "learning_rate": 0.0005902124342786635, "loss": 3.1473, "step": 4991 }, { "epoch": 0.24, "grad_norm": 0.5453828573226929, "learning_rate": 0.0005902085332773935, "loss": 3.3708, "step": 4992 }, { "epoch": 0.24, "grad_norm": 0.5631303787231445, "learning_rate": 0.0005902046315117704, "loss": 3.4, "step": 4993 }, { "epoch": 0.24, "grad_norm": 0.5475294589996338, "learning_rate": 0.000590200728981804, "loss": 3.3217, "step": 4994 }, { "epoch": 0.24, "grad_norm": 0.5487865805625916, "learning_rate": 0.0005901968256875047, "loss": 3.3504, "step": 4995 }, { "epoch": 0.24, "grad_norm": 0.5545656085014343, "learning_rate": 0.0005901929216288828, "loss": 3.4953, "step": 4996 }, { "epoch": 0.24, "grad_norm": 0.5044790506362915, "learning_rate": 0.0005901890168059487, "loss": 3.142, "step": 4997 }, { "epoch": 0.24, "grad_norm": 0.5951768755912781, "learning_rate": 0.0005901851112187125, "loss": 3.4991, "step": 4998 }, { "epoch": 0.24, "grad_norm": 0.540361225605011, "learning_rate": 0.0005901812048671847, "loss": 3.4398, "step": 4999 }, { "epoch": 0.25, "grad_norm": 0.5717666745185852, "learning_rate": 0.0005901772977513754, "loss": 3.3718, "step": 5000 }, { "epoch": 0.25, "grad_norm": 0.5859256386756897, "learning_rate": 0.0005901733898712948, "loss": 3.2956, "step": 5001 }, { "epoch": 0.25, "grad_norm": 0.5552880764007568, "learning_rate": 0.0005901694812269533, "loss": 3.5566, "step": 5002 }, { "epoch": 0.25, "grad_norm": 0.5178077220916748, "learning_rate": 0.0005901655718183615, "loss": 3.503, "step": 5003 }, { "epoch": 0.25, "grad_norm": 0.5720773339271545, "learning_rate": 0.0005901616616455292, "loss": 3.2363, "step": 5004 }, { "epoch": 0.25, "grad_norm": 0.5559942126274109, "learning_rate": 0.0005901577507084671, "loss": 3.4998, "step": 5005 }, { "epoch": 0.25, "grad_norm": 0.5338559746742249, "learning_rate": 0.0005901538390071853, "loss": 3.6233, "step": 5006 }, { "epoch": 0.25, "grad_norm": 0.5544248223304749, "learning_rate": 0.0005901499265416939, "loss": 3.4523, "step": 5007 }, { "epoch": 0.25, "grad_norm": 0.5183117985725403, "learning_rate": 0.0005901460133120037, "loss": 3.4746, "step": 5008 }, { "epoch": 0.25, "grad_norm": 0.5503129959106445, "learning_rate": 0.0005901420993181247, "loss": 3.5572, "step": 5009 }, { "epoch": 0.25, "grad_norm": 0.6068310141563416, "learning_rate": 0.000590138184560067, "loss": 3.5988, "step": 5010 }, { "epoch": 0.25, "grad_norm": 0.5316990613937378, "learning_rate": 0.0005901342690378413, "loss": 3.6803, "step": 5011 }, { "epoch": 0.25, "grad_norm": 0.516459047794342, "learning_rate": 0.0005901303527514579, "loss": 3.3902, "step": 5012 }, { "epoch": 0.25, "grad_norm": 0.5768346786499023, "learning_rate": 0.0005901264357009269, "loss": 3.572, "step": 5013 }, { "epoch": 0.25, "grad_norm": 0.5403907299041748, "learning_rate": 0.0005901225178862588, "loss": 3.5612, "step": 5014 }, { "epoch": 0.25, "grad_norm": 0.5107371807098389, "learning_rate": 0.0005901185993074637, "loss": 3.4745, "step": 5015 }, { "epoch": 0.25, "grad_norm": 0.527996301651001, "learning_rate": 0.0005901146799645521, "loss": 3.5359, "step": 5016 }, { "epoch": 0.25, "grad_norm": 0.5773131847381592, "learning_rate": 0.0005901107598575342, "loss": 3.4109, "step": 5017 }, { "epoch": 0.25, "grad_norm": 0.5386675596237183, "learning_rate": 0.0005901068389864204, "loss": 3.4633, "step": 5018 }, { "epoch": 0.25, "grad_norm": 0.5442395806312561, "learning_rate": 0.0005901029173512211, "loss": 3.5547, "step": 5019 }, { "epoch": 0.25, "grad_norm": 0.5196928977966309, "learning_rate": 0.0005900989949519465, "loss": 3.5074, "step": 5020 }, { "epoch": 0.25, "grad_norm": 0.5516992211341858, "learning_rate": 0.000590095071788607, "loss": 3.3833, "step": 5021 }, { "epoch": 0.25, "grad_norm": 0.5801973342895508, "learning_rate": 0.0005900911478612129, "loss": 3.2449, "step": 5022 }, { "epoch": 0.25, "grad_norm": 0.5835938453674316, "learning_rate": 0.0005900872231697745, "loss": 3.4805, "step": 5023 }, { "epoch": 0.25, "grad_norm": 0.5422643423080444, "learning_rate": 0.0005900832977143022, "loss": 3.347, "step": 5024 }, { "epoch": 0.25, "grad_norm": 0.5615981221199036, "learning_rate": 0.0005900793714948065, "loss": 3.234, "step": 5025 }, { "epoch": 0.25, "grad_norm": 0.616595447063446, "learning_rate": 0.0005900754445112974, "loss": 3.3415, "step": 5026 }, { "epoch": 0.25, "grad_norm": 0.5145601034164429, "learning_rate": 0.0005900715167637854, "loss": 3.222, "step": 5027 }, { "epoch": 0.25, "grad_norm": 0.5435516834259033, "learning_rate": 0.0005900675882522808, "loss": 3.5811, "step": 5028 }, { "epoch": 0.25, "grad_norm": 0.5470258593559265, "learning_rate": 0.0005900636589767941, "loss": 3.4718, "step": 5029 }, { "epoch": 0.25, "grad_norm": 0.5767030119895935, "learning_rate": 0.0005900597289373355, "loss": 3.4194, "step": 5030 }, { "epoch": 0.25, "grad_norm": 0.5893784761428833, "learning_rate": 0.0005900557981339153, "loss": 3.4108, "step": 5031 }, { "epoch": 0.25, "grad_norm": 0.5466517806053162, "learning_rate": 0.0005900518665665441, "loss": 3.439, "step": 5032 }, { "epoch": 0.25, "grad_norm": 0.544185996055603, "learning_rate": 0.000590047934235232, "loss": 3.5536, "step": 5033 }, { "epoch": 0.25, "grad_norm": 0.5449085235595703, "learning_rate": 0.0005900440011399895, "loss": 3.527, "step": 5034 }, { "epoch": 0.25, "grad_norm": 0.5282718539237976, "learning_rate": 0.000590040067280827, "loss": 3.6722, "step": 5035 }, { "epoch": 0.25, "grad_norm": 0.5545849204063416, "learning_rate": 0.0005900361326577547, "loss": 3.5084, "step": 5036 }, { "epoch": 0.25, "grad_norm": 0.5555055141448975, "learning_rate": 0.0005900321972707831, "loss": 3.2376, "step": 5037 }, { "epoch": 0.25, "grad_norm": 0.5830196142196655, "learning_rate": 0.0005900282611199225, "loss": 3.3006, "step": 5038 }, { "epoch": 0.25, "grad_norm": 0.5802530646324158, "learning_rate": 0.0005900243242051833, "loss": 3.2766, "step": 5039 }, { "epoch": 0.25, "grad_norm": 0.5483977198600769, "learning_rate": 0.0005900203865265757, "loss": 3.5217, "step": 5040 }, { "epoch": 0.25, "grad_norm": 0.5744450092315674, "learning_rate": 0.0005900164480841104, "loss": 3.4969, "step": 5041 }, { "epoch": 0.25, "grad_norm": 0.5572537779808044, "learning_rate": 0.0005900125088777974, "loss": 3.4738, "step": 5042 }, { "epoch": 0.25, "grad_norm": 0.5599087476730347, "learning_rate": 0.0005900085689076474, "loss": 3.4334, "step": 5043 }, { "epoch": 0.25, "grad_norm": 0.5435726046562195, "learning_rate": 0.0005900046281736707, "loss": 3.5784, "step": 5044 }, { "epoch": 0.25, "grad_norm": 0.5599098205566406, "learning_rate": 0.0005900006866758774, "loss": 3.4954, "step": 5045 }, { "epoch": 0.25, "grad_norm": 0.5361737608909607, "learning_rate": 0.0005899967444142783, "loss": 3.5627, "step": 5046 }, { "epoch": 0.25, "grad_norm": 0.5649425387382507, "learning_rate": 0.0005899928013888835, "loss": 3.447, "step": 5047 }, { "epoch": 0.25, "grad_norm": 0.568342924118042, "learning_rate": 0.0005899888575997035, "loss": 3.4439, "step": 5048 }, { "epoch": 0.25, "grad_norm": 0.5820963382720947, "learning_rate": 0.0005899849130467488, "loss": 3.4019, "step": 5049 }, { "epoch": 0.25, "grad_norm": 0.5288267135620117, "learning_rate": 0.0005899809677300294, "loss": 3.6183, "step": 5050 }, { "epoch": 0.25, "grad_norm": 0.5664812326431274, "learning_rate": 0.0005899770216495561, "loss": 3.4155, "step": 5051 }, { "epoch": 0.25, "grad_norm": 0.5544307827949524, "learning_rate": 0.000589973074805339, "loss": 3.1679, "step": 5052 }, { "epoch": 0.25, "grad_norm": 0.5682546496391296, "learning_rate": 0.0005899691271973888, "loss": 3.3364, "step": 5053 }, { "epoch": 0.25, "grad_norm": 0.5925281643867493, "learning_rate": 0.0005899651788257155, "loss": 3.2977, "step": 5054 }, { "epoch": 0.25, "grad_norm": 0.5429676175117493, "learning_rate": 0.00058996122969033, "loss": 3.4784, "step": 5055 }, { "epoch": 0.25, "grad_norm": 0.5814798474311829, "learning_rate": 0.0005899572797912422, "loss": 3.2982, "step": 5056 }, { "epoch": 0.25, "grad_norm": 0.5307891964912415, "learning_rate": 0.0005899533291284628, "loss": 3.6792, "step": 5057 }, { "epoch": 0.25, "grad_norm": 0.570961594581604, "learning_rate": 0.0005899493777020021, "loss": 3.4474, "step": 5058 }, { "epoch": 0.25, "grad_norm": 0.5177574157714844, "learning_rate": 0.0005899454255118707, "loss": 3.3287, "step": 5059 }, { "epoch": 0.25, "grad_norm": 0.5786210894584656, "learning_rate": 0.0005899414725580787, "loss": 3.4678, "step": 5060 }, { "epoch": 0.25, "grad_norm": 0.5470947623252869, "learning_rate": 0.0005899375188406366, "loss": 3.5344, "step": 5061 }, { "epoch": 0.25, "grad_norm": 0.5230193138122559, "learning_rate": 0.000589933564359555, "loss": 3.5553, "step": 5062 }, { "epoch": 0.25, "grad_norm": 0.5864706635475159, "learning_rate": 0.0005899296091148442, "loss": 3.3547, "step": 5063 }, { "epoch": 0.25, "grad_norm": 0.5561924576759338, "learning_rate": 0.0005899256531065145, "loss": 3.6271, "step": 5064 }, { "epoch": 0.25, "grad_norm": 0.5769221186637878, "learning_rate": 0.0005899216963345766, "loss": 3.4765, "step": 5065 }, { "epoch": 0.25, "grad_norm": 0.5741177201271057, "learning_rate": 0.0005899177387990406, "loss": 3.5332, "step": 5066 }, { "epoch": 0.25, "grad_norm": 0.550760805606842, "learning_rate": 0.000589913780499917, "loss": 3.583, "step": 5067 }, { "epoch": 0.25, "grad_norm": 0.6087910532951355, "learning_rate": 0.0005899098214372164, "loss": 3.2774, "step": 5068 }, { "epoch": 0.25, "grad_norm": 0.6020672917366028, "learning_rate": 0.0005899058616109492, "loss": 3.5222, "step": 5069 }, { "epoch": 0.25, "grad_norm": 0.5288599133491516, "learning_rate": 0.0005899019010211258, "loss": 3.5523, "step": 5070 }, { "epoch": 0.25, "grad_norm": 0.6012958288192749, "learning_rate": 0.0005898979396677564, "loss": 3.5154, "step": 5071 }, { "epoch": 0.25, "grad_norm": 0.5777899622917175, "learning_rate": 0.0005898939775508517, "loss": 3.2862, "step": 5072 }, { "epoch": 0.25, "grad_norm": 0.5450665950775146, "learning_rate": 0.000589890014670422, "loss": 3.2721, "step": 5073 }, { "epoch": 0.25, "grad_norm": 0.5397341847419739, "learning_rate": 0.0005898860510264778, "loss": 3.3992, "step": 5074 }, { "epoch": 0.25, "grad_norm": 0.5461152195930481, "learning_rate": 0.0005898820866190297, "loss": 3.4282, "step": 5075 }, { "epoch": 0.25, "grad_norm": 0.5408706665039062, "learning_rate": 0.0005898781214480878, "loss": 3.5096, "step": 5076 }, { "epoch": 0.25, "grad_norm": 0.5572208166122437, "learning_rate": 0.0005898741555136627, "loss": 3.4326, "step": 5077 }, { "epoch": 0.25, "grad_norm": 0.5309155583381653, "learning_rate": 0.0005898701888157649, "loss": 3.3239, "step": 5078 }, { "epoch": 0.25, "grad_norm": 0.5635367035865784, "learning_rate": 0.0005898662213544048, "loss": 3.2986, "step": 5079 }, { "epoch": 0.25, "grad_norm": 0.5413909554481506, "learning_rate": 0.0005898622531295929, "loss": 3.5096, "step": 5080 }, { "epoch": 0.25, "grad_norm": 0.5198115110397339, "learning_rate": 0.0005898582841413397, "loss": 3.3075, "step": 5081 }, { "epoch": 0.25, "grad_norm": 0.5086342692375183, "learning_rate": 0.0005898543143896555, "loss": 3.3621, "step": 5082 }, { "epoch": 0.25, "grad_norm": 0.6958548426628113, "learning_rate": 0.0005898503438745507, "loss": 3.461, "step": 5083 }, { "epoch": 0.25, "grad_norm": 0.5100436210632324, "learning_rate": 0.0005898463725960359, "loss": 3.3348, "step": 5084 }, { "epoch": 0.25, "grad_norm": 0.6006938815116882, "learning_rate": 0.0005898424005541216, "loss": 3.4243, "step": 5085 }, { "epoch": 0.25, "grad_norm": 0.5753837823867798, "learning_rate": 0.0005898384277488182, "loss": 3.5554, "step": 5086 }, { "epoch": 0.25, "grad_norm": 0.5907991528511047, "learning_rate": 0.0005898344541801362, "loss": 3.4331, "step": 5087 }, { "epoch": 0.25, "grad_norm": 0.5146076679229736, "learning_rate": 0.000589830479848086, "loss": 3.3868, "step": 5088 }, { "epoch": 0.25, "grad_norm": 0.5464553833007812, "learning_rate": 0.0005898265047526781, "loss": 3.468, "step": 5089 }, { "epoch": 0.25, "grad_norm": 0.5454960465431213, "learning_rate": 0.000589822528893923, "loss": 3.4598, "step": 5090 }, { "epoch": 0.25, "grad_norm": 0.5484764575958252, "learning_rate": 0.000589818552271831, "loss": 3.3738, "step": 5091 }, { "epoch": 0.25, "grad_norm": 0.554803729057312, "learning_rate": 0.0005898145748864128, "loss": 3.1957, "step": 5092 }, { "epoch": 0.25, "grad_norm": 0.5293271541595459, "learning_rate": 0.0005898105967376788, "loss": 3.5345, "step": 5093 }, { "epoch": 0.25, "grad_norm": 0.56730717420578, "learning_rate": 0.0005898066178256394, "loss": 3.4173, "step": 5094 }, { "epoch": 0.25, "grad_norm": 0.5431801080703735, "learning_rate": 0.0005898026381503051, "loss": 3.3932, "step": 5095 }, { "epoch": 0.25, "grad_norm": 0.6341773867607117, "learning_rate": 0.0005897986577116865, "loss": 3.318, "step": 5096 }, { "epoch": 0.25, "grad_norm": 0.5350258946418762, "learning_rate": 0.0005897946765097941, "loss": 3.4459, "step": 5097 }, { "epoch": 0.25, "grad_norm": 0.5501015186309814, "learning_rate": 0.0005897906945446382, "loss": 3.2029, "step": 5098 }, { "epoch": 0.25, "grad_norm": 0.5346304178237915, "learning_rate": 0.0005897867118162294, "loss": 3.4928, "step": 5099 }, { "epoch": 0.25, "grad_norm": 0.6124736070632935, "learning_rate": 0.0005897827283245781, "loss": 3.65, "step": 5100 }, { "epoch": 0.25, "grad_norm": 0.5016577839851379, "learning_rate": 0.000589778744069695, "loss": 3.3579, "step": 5101 }, { "epoch": 0.25, "grad_norm": 0.541294276714325, "learning_rate": 0.0005897747590515904, "loss": 3.1981, "step": 5102 }, { "epoch": 0.25, "grad_norm": 0.5059472322463989, "learning_rate": 0.0005897707732702748, "loss": 3.3077, "step": 5103 }, { "epoch": 0.25, "grad_norm": 0.5518838763237, "learning_rate": 0.0005897667867257588, "loss": 3.3557, "step": 5104 }, { "epoch": 0.25, "grad_norm": 0.5457679033279419, "learning_rate": 0.0005897627994180529, "loss": 3.3617, "step": 5105 }, { "epoch": 0.25, "grad_norm": 0.6038949489593506, "learning_rate": 0.0005897588113471674, "loss": 3.2915, "step": 5106 }, { "epoch": 0.25, "grad_norm": 0.590950071811676, "learning_rate": 0.000589754822513113, "loss": 3.1439, "step": 5107 }, { "epoch": 0.25, "grad_norm": 0.5621350407600403, "learning_rate": 0.0005897508329159003, "loss": 3.2661, "step": 5108 }, { "epoch": 0.25, "grad_norm": 0.5230415463447571, "learning_rate": 0.0005897468425555395, "loss": 3.5213, "step": 5109 }, { "epoch": 0.25, "grad_norm": 0.5285176038742065, "learning_rate": 0.0005897428514320414, "loss": 3.3294, "step": 5110 }, { "epoch": 0.25, "grad_norm": 0.5499192476272583, "learning_rate": 0.0005897388595454164, "loss": 3.4348, "step": 5111 }, { "epoch": 0.25, "grad_norm": 0.5603438019752502, "learning_rate": 0.000589734866895675, "loss": 3.4721, "step": 5112 }, { "epoch": 0.25, "grad_norm": 0.5261187553405762, "learning_rate": 0.0005897308734828276, "loss": 3.4034, "step": 5113 }, { "epoch": 0.25, "grad_norm": 0.5853512287139893, "learning_rate": 0.0005897268793068848, "loss": 3.3507, "step": 5114 }, { "epoch": 0.25, "grad_norm": 0.586706817150116, "learning_rate": 0.0005897228843678574, "loss": 3.2244, "step": 5115 }, { "epoch": 0.25, "grad_norm": 0.5517246127128601, "learning_rate": 0.0005897188886657555, "loss": 3.5185, "step": 5116 }, { "epoch": 0.25, "grad_norm": 0.5283602476119995, "learning_rate": 0.00058971489220059, "loss": 3.2524, "step": 5117 }, { "epoch": 0.25, "grad_norm": 0.5656755566596985, "learning_rate": 0.000589710894972371, "loss": 3.5088, "step": 5118 }, { "epoch": 0.25, "grad_norm": 0.5920532941818237, "learning_rate": 0.0005897068969811094, "loss": 3.5174, "step": 5119 }, { "epoch": 0.25, "grad_norm": 0.5770808458328247, "learning_rate": 0.0005897028982268155, "loss": 3.304, "step": 5120 }, { "epoch": 0.25, "grad_norm": 0.542290210723877, "learning_rate": 0.0005896988987094999, "loss": 3.5272, "step": 5121 }, { "epoch": 0.25, "grad_norm": 0.5746126770973206, "learning_rate": 0.0005896948984291733, "loss": 3.6472, "step": 5122 }, { "epoch": 0.25, "grad_norm": 0.5588904023170471, "learning_rate": 0.0005896908973858459, "loss": 3.5337, "step": 5123 }, { "epoch": 0.25, "grad_norm": 0.5217200517654419, "learning_rate": 0.0005896868955795286, "loss": 3.3863, "step": 5124 }, { "epoch": 0.25, "grad_norm": 0.5870938897132874, "learning_rate": 0.0005896828930102316, "loss": 3.3279, "step": 5125 }, { "epoch": 0.25, "grad_norm": 0.6563788652420044, "learning_rate": 0.0005896788896779658, "loss": 3.2821, "step": 5126 }, { "epoch": 0.25, "grad_norm": 0.5480024218559265, "learning_rate": 0.0005896748855827414, "loss": 3.3585, "step": 5127 }, { "epoch": 0.25, "grad_norm": 0.5863211154937744, "learning_rate": 0.0005896708807245693, "loss": 3.4657, "step": 5128 }, { "epoch": 0.25, "grad_norm": 0.6124269366264343, "learning_rate": 0.0005896668751034596, "loss": 3.5115, "step": 5129 }, { "epoch": 0.25, "grad_norm": 0.6593347191810608, "learning_rate": 0.0005896628687194232, "loss": 3.3722, "step": 5130 }, { "epoch": 0.25, "grad_norm": 0.5311346054077148, "learning_rate": 0.0005896588615724706, "loss": 3.2849, "step": 5131 }, { "epoch": 0.25, "grad_norm": 0.5981443524360657, "learning_rate": 0.0005896548536626123, "loss": 3.2468, "step": 5132 }, { "epoch": 0.25, "grad_norm": 0.5324131846427917, "learning_rate": 0.0005896508449898587, "loss": 3.4398, "step": 5133 }, { "epoch": 0.25, "grad_norm": 0.5485259294509888, "learning_rate": 0.0005896468355542206, "loss": 3.3877, "step": 5134 }, { "epoch": 0.25, "grad_norm": 0.6059028506278992, "learning_rate": 0.0005896428253557086, "loss": 3.2956, "step": 5135 }, { "epoch": 0.25, "grad_norm": 0.5261827111244202, "learning_rate": 0.0005896388143943331, "loss": 3.5208, "step": 5136 }, { "epoch": 0.25, "grad_norm": 0.596929669380188, "learning_rate": 0.0005896348026701047, "loss": 3.3708, "step": 5137 }, { "epoch": 0.25, "grad_norm": 0.5352926254272461, "learning_rate": 0.0005896307901830339, "loss": 3.3242, "step": 5138 }, { "epoch": 0.25, "grad_norm": 0.5876359939575195, "learning_rate": 0.0005896267769331313, "loss": 3.6109, "step": 5139 }, { "epoch": 0.25, "grad_norm": 0.5615381598472595, "learning_rate": 0.0005896227629204077, "loss": 3.2824, "step": 5140 }, { "epoch": 0.25, "grad_norm": 0.5441862940788269, "learning_rate": 0.0005896187481448734, "loss": 3.316, "step": 5141 }, { "epoch": 0.25, "grad_norm": 0.5573780536651611, "learning_rate": 0.000589614732606539, "loss": 3.3629, "step": 5142 }, { "epoch": 0.25, "grad_norm": 0.5792946815490723, "learning_rate": 0.0005896107163054151, "loss": 3.4812, "step": 5143 }, { "epoch": 0.25, "grad_norm": 0.5667078495025635, "learning_rate": 0.0005896066992415125, "loss": 3.4833, "step": 5144 }, { "epoch": 0.25, "grad_norm": 0.5536587834358215, "learning_rate": 0.0005896026814148414, "loss": 3.3131, "step": 5145 }, { "epoch": 0.25, "grad_norm": 0.5453284978866577, "learning_rate": 0.0005895986628254126, "loss": 3.518, "step": 5146 }, { "epoch": 0.25, "grad_norm": 0.6266303062438965, "learning_rate": 0.0005895946434732366, "loss": 3.3667, "step": 5147 }, { "epoch": 0.25, "grad_norm": 0.5294222831726074, "learning_rate": 0.0005895906233583242, "loss": 3.3286, "step": 5148 }, { "epoch": 0.25, "grad_norm": 0.6126430034637451, "learning_rate": 0.0005895866024806858, "loss": 3.3122, "step": 5149 }, { "epoch": 0.25, "grad_norm": 0.5357118248939514, "learning_rate": 0.0005895825808403321, "loss": 3.3943, "step": 5150 }, { "epoch": 0.25, "grad_norm": 0.5523558855056763, "learning_rate": 0.0005895785584372734, "loss": 3.5076, "step": 5151 }, { "epoch": 0.25, "grad_norm": 0.7012884020805359, "learning_rate": 0.0005895745352715205, "loss": 3.0987, "step": 5152 }, { "epoch": 0.25, "grad_norm": 0.5519027709960938, "learning_rate": 0.0005895705113430842, "loss": 3.6015, "step": 5153 }, { "epoch": 0.25, "grad_norm": 0.6840552091598511, "learning_rate": 0.0005895664866519748, "loss": 3.2324, "step": 5154 }, { "epoch": 0.25, "grad_norm": 0.5728353261947632, "learning_rate": 0.000589562461198203, "loss": 3.5145, "step": 5155 }, { "epoch": 0.25, "grad_norm": 0.5669745802879333, "learning_rate": 0.0005895584349817794, "loss": 3.2349, "step": 5156 }, { "epoch": 0.25, "grad_norm": 0.551141083240509, "learning_rate": 0.0005895544080027147, "loss": 3.4344, "step": 5157 }, { "epoch": 0.25, "grad_norm": 0.5899537205696106, "learning_rate": 0.0005895503802610193, "loss": 3.5101, "step": 5158 }, { "epoch": 0.25, "grad_norm": 0.541935384273529, "learning_rate": 0.000589546351756704, "loss": 3.6462, "step": 5159 }, { "epoch": 0.25, "grad_norm": 0.5857089757919312, "learning_rate": 0.0005895423224897792, "loss": 3.5987, "step": 5160 }, { "epoch": 0.25, "grad_norm": 0.5397253632545471, "learning_rate": 0.0005895382924602558, "loss": 3.4983, "step": 5161 }, { "epoch": 0.25, "grad_norm": 0.5339411497116089, "learning_rate": 0.0005895342616681442, "loss": 3.3218, "step": 5162 }, { "epoch": 0.25, "grad_norm": 0.5548738241195679, "learning_rate": 0.0005895302301134551, "loss": 3.5583, "step": 5163 }, { "epoch": 0.25, "grad_norm": 0.5891041159629822, "learning_rate": 0.0005895261977961991, "loss": 3.4377, "step": 5164 }, { "epoch": 0.25, "grad_norm": 0.569484531879425, "learning_rate": 0.0005895221647163867, "loss": 3.3795, "step": 5165 }, { "epoch": 0.25, "grad_norm": 0.5073286890983582, "learning_rate": 0.0005895181308740288, "loss": 3.6983, "step": 5166 }, { "epoch": 0.25, "grad_norm": 0.550396203994751, "learning_rate": 0.0005895140962691358, "loss": 3.3491, "step": 5167 }, { "epoch": 0.25, "grad_norm": 0.502250611782074, "learning_rate": 0.0005895100609017184, "loss": 3.3437, "step": 5168 }, { "epoch": 0.25, "grad_norm": 0.5587743520736694, "learning_rate": 0.0005895060247717871, "loss": 3.6706, "step": 5169 }, { "epoch": 0.25, "grad_norm": 0.5262305736541748, "learning_rate": 0.0005895019878793527, "loss": 3.4108, "step": 5170 }, { "epoch": 0.25, "grad_norm": 0.5481409430503845, "learning_rate": 0.0005894979502244259, "loss": 3.4605, "step": 5171 }, { "epoch": 0.25, "grad_norm": 0.5923982858657837, "learning_rate": 0.0005894939118070171, "loss": 3.4484, "step": 5172 }, { "epoch": 0.25, "grad_norm": 0.5860485434532166, "learning_rate": 0.0005894898726271371, "loss": 3.0184, "step": 5173 }, { "epoch": 0.25, "grad_norm": 0.5705693364143372, "learning_rate": 0.0005894858326847965, "loss": 3.439, "step": 5174 }, { "epoch": 0.25, "grad_norm": 0.5961979031562805, "learning_rate": 0.0005894817919800059, "loss": 3.5159, "step": 5175 }, { "epoch": 0.25, "grad_norm": 0.5484743714332581, "learning_rate": 0.000589477750512776, "loss": 3.2929, "step": 5176 }, { "epoch": 0.25, "grad_norm": 0.628601610660553, "learning_rate": 0.0005894737082831173, "loss": 3.3603, "step": 5177 }, { "epoch": 0.25, "grad_norm": 0.5901480317115784, "learning_rate": 0.0005894696652910408, "loss": 3.3222, "step": 5178 }, { "epoch": 0.25, "grad_norm": 0.555828869342804, "learning_rate": 0.0005894656215365567, "loss": 3.4859, "step": 5179 }, { "epoch": 0.25, "grad_norm": 0.5342446565628052, "learning_rate": 0.000589461577019676, "loss": 3.7213, "step": 5180 }, { "epoch": 0.25, "grad_norm": 0.5517928004264832, "learning_rate": 0.000589457531740409, "loss": 3.5133, "step": 5181 }, { "epoch": 0.25, "grad_norm": 0.5539923906326294, "learning_rate": 0.0005894534856987668, "loss": 3.4874, "step": 5182 }, { "epoch": 0.25, "grad_norm": 0.5563471913337708, "learning_rate": 0.0005894494388947598, "loss": 3.4176, "step": 5183 }, { "epoch": 0.25, "grad_norm": 0.5729748606681824, "learning_rate": 0.0005894453913283986, "loss": 3.4297, "step": 5184 }, { "epoch": 0.25, "grad_norm": 0.577461302280426, "learning_rate": 0.0005894413429996939, "loss": 3.0731, "step": 5185 }, { "epoch": 0.25, "grad_norm": 0.5624281764030457, "learning_rate": 0.0005894372939086565, "loss": 3.6104, "step": 5186 }, { "epoch": 0.25, "grad_norm": 0.593137800693512, "learning_rate": 0.0005894332440552969, "loss": 3.4637, "step": 5187 }, { "epoch": 0.25, "grad_norm": 0.5267358422279358, "learning_rate": 0.0005894291934396259, "loss": 3.2419, "step": 5188 }, { "epoch": 0.25, "grad_norm": 0.6192349195480347, "learning_rate": 0.0005894251420616541, "loss": 3.5089, "step": 5189 }, { "epoch": 0.25, "grad_norm": 0.5877721905708313, "learning_rate": 0.0005894210899213921, "loss": 3.4892, "step": 5190 }, { "epoch": 0.25, "grad_norm": 0.7060837149620056, "learning_rate": 0.0005894170370188508, "loss": 3.3611, "step": 5191 }, { "epoch": 0.25, "grad_norm": 0.5571918487548828, "learning_rate": 0.0005894129833540405, "loss": 3.5587, "step": 5192 }, { "epoch": 0.25, "grad_norm": 0.5204548239707947, "learning_rate": 0.0005894089289269724, "loss": 3.4699, "step": 5193 }, { "epoch": 0.25, "grad_norm": 0.5898545384407043, "learning_rate": 0.0005894048737376567, "loss": 3.499, "step": 5194 }, { "epoch": 0.25, "grad_norm": 0.5386577248573303, "learning_rate": 0.0005894008177861043, "loss": 3.3448, "step": 5195 }, { "epoch": 0.25, "grad_norm": 0.5543986558914185, "learning_rate": 0.0005893967610723257, "loss": 3.4441, "step": 5196 }, { "epoch": 0.25, "grad_norm": 0.6119479537010193, "learning_rate": 0.0005893927035963319, "loss": 3.3068, "step": 5197 }, { "epoch": 0.25, "grad_norm": 0.5174164772033691, "learning_rate": 0.0005893886453581334, "loss": 3.2137, "step": 5198 }, { "epoch": 0.25, "grad_norm": 0.5318208932876587, "learning_rate": 0.0005893845863577409, "loss": 3.4068, "step": 5199 }, { "epoch": 0.25, "grad_norm": 0.558030366897583, "learning_rate": 0.000589380526595165, "loss": 3.1989, "step": 5200 }, { "epoch": 0.25, "grad_norm": 0.5202679634094238, "learning_rate": 0.0005893764660704166, "loss": 3.3643, "step": 5201 }, { "epoch": 0.25, "grad_norm": 0.5347263813018799, "learning_rate": 0.0005893724047835062, "loss": 3.4693, "step": 5202 }, { "epoch": 0.25, "grad_norm": 0.5433123111724854, "learning_rate": 0.0005893683427344445, "loss": 3.2793, "step": 5203 }, { "epoch": 0.26, "grad_norm": 0.5789240598678589, "learning_rate": 0.0005893642799232425, "loss": 3.1991, "step": 5204 }, { "epoch": 0.26, "grad_norm": 0.5516834259033203, "learning_rate": 0.0005893602163499105, "loss": 3.4335, "step": 5205 }, { "epoch": 0.26, "grad_norm": 0.5537156462669373, "learning_rate": 0.0005893561520144594, "loss": 3.3853, "step": 5206 }, { "epoch": 0.26, "grad_norm": 0.5437060594558716, "learning_rate": 0.0005893520869168999, "loss": 3.301, "step": 5207 }, { "epoch": 0.26, "grad_norm": 0.6070511341094971, "learning_rate": 0.0005893480210572427, "loss": 3.4726, "step": 5208 }, { "epoch": 0.26, "grad_norm": 0.5556560754776001, "learning_rate": 0.0005893439544354984, "loss": 3.2371, "step": 5209 }, { "epoch": 0.26, "grad_norm": 0.5555329322814941, "learning_rate": 0.000589339887051678, "loss": 3.3675, "step": 5210 }, { "epoch": 0.26, "grad_norm": 0.551421582698822, "learning_rate": 0.0005893358189057919, "loss": 3.3801, "step": 5211 }, { "epoch": 0.26, "grad_norm": 0.5763034224510193, "learning_rate": 0.0005893317499978511, "loss": 3.5617, "step": 5212 }, { "epoch": 0.26, "grad_norm": 0.5456756353378296, "learning_rate": 0.0005893276803278659, "loss": 3.5274, "step": 5213 }, { "epoch": 0.26, "grad_norm": 0.5538662672042847, "learning_rate": 0.0005893236098958474, "loss": 3.3474, "step": 5214 }, { "epoch": 0.26, "grad_norm": 0.5430862307548523, "learning_rate": 0.0005893195387018062, "loss": 3.3443, "step": 5215 }, { "epoch": 0.26, "grad_norm": 0.5130735039710999, "learning_rate": 0.000589315466745753, "loss": 3.637, "step": 5216 }, { "epoch": 0.26, "grad_norm": 0.5832960605621338, "learning_rate": 0.0005893113940276985, "loss": 3.319, "step": 5217 }, { "epoch": 0.26, "grad_norm": 0.5417028665542603, "learning_rate": 0.0005893073205476535, "loss": 3.3201, "step": 5218 }, { "epoch": 0.26, "grad_norm": 0.5512108206748962, "learning_rate": 0.0005893032463056288, "loss": 3.2845, "step": 5219 }, { "epoch": 0.26, "grad_norm": 0.5375921726226807, "learning_rate": 0.0005892991713016348, "loss": 3.642, "step": 5220 }, { "epoch": 0.26, "grad_norm": 0.5193976759910583, "learning_rate": 0.0005892950955356827, "loss": 3.5162, "step": 5221 }, { "epoch": 0.26, "grad_norm": 0.5780069231987, "learning_rate": 0.0005892910190077828, "loss": 3.5228, "step": 5222 }, { "epoch": 0.26, "grad_norm": 0.5876449346542358, "learning_rate": 0.0005892869417179461, "loss": 3.2904, "step": 5223 }, { "epoch": 0.26, "grad_norm": 0.5421982407569885, "learning_rate": 0.0005892828636661833, "loss": 3.5897, "step": 5224 }, { "epoch": 0.26, "grad_norm": 0.6510359048843384, "learning_rate": 0.0005892787848525052, "loss": 3.2834, "step": 5225 }, { "epoch": 0.26, "grad_norm": 0.5625903010368347, "learning_rate": 0.0005892747052769222, "loss": 3.2265, "step": 5226 }, { "epoch": 0.26, "grad_norm": 0.5502780079841614, "learning_rate": 0.0005892706249394455, "loss": 3.3097, "step": 5227 }, { "epoch": 0.26, "grad_norm": 0.5297096967697144, "learning_rate": 0.0005892665438400856, "loss": 3.6185, "step": 5228 }, { "epoch": 0.26, "grad_norm": 0.5778887271881104, "learning_rate": 0.0005892624619788533, "loss": 3.4486, "step": 5229 }, { "epoch": 0.26, "grad_norm": 0.5754767656326294, "learning_rate": 0.0005892583793557594, "loss": 3.3823, "step": 5230 }, { "epoch": 0.26, "grad_norm": 0.5356746315956116, "learning_rate": 0.0005892542959708145, "loss": 3.686, "step": 5231 }, { "epoch": 0.26, "grad_norm": 0.5547672510147095, "learning_rate": 0.0005892502118240295, "loss": 3.4749, "step": 5232 }, { "epoch": 0.26, "grad_norm": 0.5451996326446533, "learning_rate": 0.0005892461269154151, "loss": 3.2667, "step": 5233 }, { "epoch": 0.26, "grad_norm": 0.5528019070625305, "learning_rate": 0.0005892420412449821, "loss": 3.3068, "step": 5234 }, { "epoch": 0.26, "grad_norm": 0.5734788179397583, "learning_rate": 0.0005892379548127411, "loss": 3.4308, "step": 5235 }, { "epoch": 0.26, "grad_norm": 0.6340581774711609, "learning_rate": 0.000589233867618703, "loss": 3.4311, "step": 5236 }, { "epoch": 0.26, "grad_norm": 0.6577187180519104, "learning_rate": 0.0005892297796628787, "loss": 3.4902, "step": 5237 }, { "epoch": 0.26, "grad_norm": 0.5538560152053833, "learning_rate": 0.0005892256909452788, "loss": 3.5402, "step": 5238 }, { "epoch": 0.26, "grad_norm": 0.5807637572288513, "learning_rate": 0.0005892216014659141, "loss": 3.4806, "step": 5239 }, { "epoch": 0.26, "grad_norm": 0.6450725793838501, "learning_rate": 0.0005892175112247952, "loss": 3.3013, "step": 5240 }, { "epoch": 0.26, "grad_norm": 0.5808170437812805, "learning_rate": 0.0005892134202219332, "loss": 3.4958, "step": 5241 }, { "epoch": 0.26, "grad_norm": 0.5749102234840393, "learning_rate": 0.0005892093284573386, "loss": 3.4446, "step": 5242 }, { "epoch": 0.26, "grad_norm": 0.5990297794342041, "learning_rate": 0.0005892052359310224, "loss": 3.2321, "step": 5243 }, { "epoch": 0.26, "grad_norm": 0.5670678019523621, "learning_rate": 0.0005892011426429952, "loss": 3.5992, "step": 5244 }, { "epoch": 0.26, "grad_norm": 0.5572386384010315, "learning_rate": 0.0005891970485932679, "loss": 3.3791, "step": 5245 }, { "epoch": 0.26, "grad_norm": 0.5236799120903015, "learning_rate": 0.0005891929537818512, "loss": 3.5792, "step": 5246 }, { "epoch": 0.26, "grad_norm": 0.595807671546936, "learning_rate": 0.0005891888582087559, "loss": 3.1541, "step": 5247 }, { "epoch": 0.26, "grad_norm": 0.5384485125541687, "learning_rate": 0.0005891847618739929, "loss": 3.0939, "step": 5248 }, { "epoch": 0.26, "grad_norm": 0.5316618084907532, "learning_rate": 0.0005891806647775727, "loss": 3.6524, "step": 5249 }, { "epoch": 0.26, "grad_norm": 0.527287483215332, "learning_rate": 0.0005891765669195064, "loss": 3.4803, "step": 5250 }, { "epoch": 0.26, "grad_norm": 0.5352149605751038, "learning_rate": 0.0005891724682998046, "loss": 3.4944, "step": 5251 }, { "epoch": 0.26, "grad_norm": 0.5439637303352356, "learning_rate": 0.0005891683689184784, "loss": 3.3703, "step": 5252 }, { "epoch": 0.26, "grad_norm": 0.5407761335372925, "learning_rate": 0.0005891642687755382, "loss": 3.635, "step": 5253 }, { "epoch": 0.26, "grad_norm": 0.5867874622344971, "learning_rate": 0.0005891601678709949, "loss": 3.3837, "step": 5254 }, { "epoch": 0.26, "grad_norm": 0.5279407501220703, "learning_rate": 0.0005891560662048595, "loss": 3.3589, "step": 5255 }, { "epoch": 0.26, "grad_norm": 0.549512505531311, "learning_rate": 0.0005891519637771426, "loss": 3.3413, "step": 5256 }, { "epoch": 0.26, "grad_norm": 0.566560685634613, "learning_rate": 0.0005891478605878551, "loss": 3.4066, "step": 5257 }, { "epoch": 0.26, "grad_norm": 0.5679735541343689, "learning_rate": 0.0005891437566370078, "loss": 3.3625, "step": 5258 }, { "epoch": 0.26, "grad_norm": 0.5230506062507629, "learning_rate": 0.0005891396519246114, "loss": 3.3698, "step": 5259 }, { "epoch": 0.26, "grad_norm": 0.5605278611183167, "learning_rate": 0.0005891355464506769, "loss": 3.3979, "step": 5260 }, { "epoch": 0.26, "grad_norm": 0.5670188069343567, "learning_rate": 0.000589131440215215, "loss": 3.5514, "step": 5261 }, { "epoch": 0.26, "grad_norm": 0.5283556580543518, "learning_rate": 0.0005891273332182365, "loss": 3.416, "step": 5262 }, { "epoch": 0.26, "grad_norm": 0.5201714038848877, "learning_rate": 0.0005891232254597523, "loss": 3.3487, "step": 5263 }, { "epoch": 0.26, "grad_norm": 0.6400845646858215, "learning_rate": 0.0005891191169397731, "loss": 3.2951, "step": 5264 }, { "epoch": 0.26, "grad_norm": 0.5510321259498596, "learning_rate": 0.0005891150076583098, "loss": 3.4685, "step": 5265 }, { "epoch": 0.26, "grad_norm": 0.5668927431106567, "learning_rate": 0.0005891108976153732, "loss": 3.4804, "step": 5266 }, { "epoch": 0.26, "grad_norm": 0.5134303569793701, "learning_rate": 0.0005891067868109743, "loss": 3.3349, "step": 5267 }, { "epoch": 0.26, "grad_norm": 0.6687813997268677, "learning_rate": 0.0005891026752451235, "loss": 3.4913, "step": 5268 }, { "epoch": 0.26, "grad_norm": 0.5363355278968811, "learning_rate": 0.0005890985629178321, "loss": 3.3884, "step": 5269 }, { "epoch": 0.26, "grad_norm": 0.5483888387680054, "learning_rate": 0.0005890944498291106, "loss": 3.2948, "step": 5270 }, { "epoch": 0.26, "grad_norm": 0.5730322599411011, "learning_rate": 0.00058909033597897, "loss": 3.4693, "step": 5271 }, { "epoch": 0.26, "grad_norm": 0.6208261847496033, "learning_rate": 0.0005890862213674211, "loss": 3.403, "step": 5272 }, { "epoch": 0.26, "grad_norm": 0.5502722263336182, "learning_rate": 0.0005890821059944748, "loss": 3.442, "step": 5273 }, { "epoch": 0.26, "grad_norm": 0.5494133234024048, "learning_rate": 0.0005890779898601417, "loss": 3.4079, "step": 5274 }, { "epoch": 0.26, "grad_norm": 0.5719727873802185, "learning_rate": 0.0005890738729644329, "loss": 3.4966, "step": 5275 }, { "epoch": 0.26, "grad_norm": 0.5802255868911743, "learning_rate": 0.0005890697553073591, "loss": 3.4647, "step": 5276 }, { "epoch": 0.26, "grad_norm": 0.5484011769294739, "learning_rate": 0.0005890656368889313, "loss": 3.4309, "step": 5277 }, { "epoch": 0.26, "grad_norm": 0.5723143219947815, "learning_rate": 0.0005890615177091601, "loss": 3.6246, "step": 5278 }, { "epoch": 0.26, "grad_norm": 0.5343371033668518, "learning_rate": 0.0005890573977680567, "loss": 3.6963, "step": 5279 }, { "epoch": 0.26, "grad_norm": 0.5718325972557068, "learning_rate": 0.0005890532770656316, "loss": 3.487, "step": 5280 }, { "epoch": 0.26, "grad_norm": 0.5279609560966492, "learning_rate": 0.0005890491556018958, "loss": 3.4661, "step": 5281 }, { "epoch": 0.26, "grad_norm": 0.5968024134635925, "learning_rate": 0.0005890450333768601, "loss": 3.533, "step": 5282 }, { "epoch": 0.26, "grad_norm": 0.5400145053863525, "learning_rate": 0.0005890409103905356, "loss": 3.2575, "step": 5283 }, { "epoch": 0.26, "grad_norm": 0.5489025712013245, "learning_rate": 0.0005890367866429329, "loss": 3.36, "step": 5284 }, { "epoch": 0.26, "grad_norm": 0.600946843624115, "learning_rate": 0.0005890326621340628, "loss": 3.5745, "step": 5285 }, { "epoch": 0.26, "grad_norm": 0.5801662802696228, "learning_rate": 0.0005890285368639363, "loss": 3.4846, "step": 5286 }, { "epoch": 0.26, "grad_norm": 0.5930183529853821, "learning_rate": 0.0005890244108325645, "loss": 3.3913, "step": 5287 }, { "epoch": 0.26, "grad_norm": 0.5204430818557739, "learning_rate": 0.0005890202840399579, "loss": 3.3981, "step": 5288 }, { "epoch": 0.26, "grad_norm": 0.5407831072807312, "learning_rate": 0.0005890161564861274, "loss": 3.6175, "step": 5289 }, { "epoch": 0.26, "grad_norm": 0.5350310206413269, "learning_rate": 0.000589012028171084, "loss": 3.6356, "step": 5290 }, { "epoch": 0.26, "grad_norm": 0.601344645023346, "learning_rate": 0.0005890078990948386, "loss": 3.2246, "step": 5291 }, { "epoch": 0.26, "grad_norm": 0.5307313799858093, "learning_rate": 0.000589003769257402, "loss": 3.6117, "step": 5292 }, { "epoch": 0.26, "grad_norm": 0.5290243029594421, "learning_rate": 0.000588999638658785, "loss": 3.4505, "step": 5293 }, { "epoch": 0.26, "grad_norm": 0.5569483637809753, "learning_rate": 0.0005889955072989986, "loss": 3.3312, "step": 5294 }, { "epoch": 0.26, "grad_norm": 0.5312089920043945, "learning_rate": 0.0005889913751780537, "loss": 3.5256, "step": 5295 }, { "epoch": 0.26, "grad_norm": 0.6026173830032349, "learning_rate": 0.0005889872422959612, "loss": 3.4126, "step": 5296 }, { "epoch": 0.26, "grad_norm": 0.6224252581596375, "learning_rate": 0.0005889831086527318, "loss": 3.483, "step": 5297 }, { "epoch": 0.26, "grad_norm": 0.5267722606658936, "learning_rate": 0.0005889789742483766, "loss": 3.5036, "step": 5298 }, { "epoch": 0.26, "grad_norm": 0.5711367130279541, "learning_rate": 0.0005889748390829064, "loss": 3.3469, "step": 5299 }, { "epoch": 0.26, "grad_norm": 0.5197421312332153, "learning_rate": 0.000588970703156332, "loss": 3.3095, "step": 5300 }, { "epoch": 0.26, "grad_norm": 0.552913248538971, "learning_rate": 0.0005889665664686644, "loss": 3.483, "step": 5301 }, { "epoch": 0.26, "grad_norm": 0.5599504113197327, "learning_rate": 0.0005889624290199145, "loss": 3.2881, "step": 5302 }, { "epoch": 0.26, "grad_norm": 0.5224385261535645, "learning_rate": 0.0005889582908100932, "loss": 3.6117, "step": 5303 }, { "epoch": 0.26, "grad_norm": 0.5339375734329224, "learning_rate": 0.0005889541518392114, "loss": 3.4773, "step": 5304 }, { "epoch": 0.26, "grad_norm": 0.5676855444908142, "learning_rate": 0.0005889500121072799, "loss": 3.365, "step": 5305 }, { "epoch": 0.26, "grad_norm": 0.6814284920692444, "learning_rate": 0.0005889458716143097, "loss": 3.5605, "step": 5306 }, { "epoch": 0.26, "grad_norm": 0.52662593126297, "learning_rate": 0.0005889417303603117, "loss": 3.5284, "step": 5307 }, { "epoch": 0.26, "grad_norm": 0.5496254563331604, "learning_rate": 0.0005889375883452967, "loss": 3.4624, "step": 5308 }, { "epoch": 0.26, "grad_norm": 0.5711499452590942, "learning_rate": 0.0005889334455692758, "loss": 3.4269, "step": 5309 }, { "epoch": 0.26, "grad_norm": 0.5623414516448975, "learning_rate": 0.0005889293020322597, "loss": 3.4268, "step": 5310 }, { "epoch": 0.26, "grad_norm": 0.5185620188713074, "learning_rate": 0.0005889251577342597, "loss": 3.3941, "step": 5311 }, { "epoch": 0.26, "grad_norm": 0.5413085222244263, "learning_rate": 0.0005889210126752861, "loss": 3.5135, "step": 5312 }, { "epoch": 0.26, "grad_norm": 0.5592106580734253, "learning_rate": 0.0005889168668553504, "loss": 3.3922, "step": 5313 }, { "epoch": 0.26, "grad_norm": 0.5406152606010437, "learning_rate": 0.0005889127202744631, "loss": 3.4864, "step": 5314 }, { "epoch": 0.26, "grad_norm": 0.5414677262306213, "learning_rate": 0.0005889085729326354, "loss": 3.3056, "step": 5315 }, { "epoch": 0.26, "grad_norm": 0.5268755555152893, "learning_rate": 0.0005889044248298781, "loss": 3.2041, "step": 5316 }, { "epoch": 0.26, "grad_norm": 0.5885822772979736, "learning_rate": 0.0005889002759662021, "loss": 3.4368, "step": 5317 }, { "epoch": 0.26, "grad_norm": 0.5720695853233337, "learning_rate": 0.0005888961263416185, "loss": 3.4827, "step": 5318 }, { "epoch": 0.26, "grad_norm": 0.5145799517631531, "learning_rate": 0.000588891975956138, "loss": 3.4205, "step": 5319 }, { "epoch": 0.26, "grad_norm": 0.5636258721351624, "learning_rate": 0.0005888878248097717, "loss": 3.4612, "step": 5320 }, { "epoch": 0.26, "grad_norm": 0.5309170484542847, "learning_rate": 0.0005888836729025304, "loss": 3.3744, "step": 5321 }, { "epoch": 0.26, "grad_norm": 0.5823953747749329, "learning_rate": 0.0005888795202344251, "loss": 3.1887, "step": 5322 }, { "epoch": 0.26, "grad_norm": 0.5687710046768188, "learning_rate": 0.0005888753668054667, "loss": 3.4677, "step": 5323 }, { "epoch": 0.26, "grad_norm": 0.5707094669342041, "learning_rate": 0.0005888712126156663, "loss": 3.3657, "step": 5324 }, { "epoch": 0.26, "grad_norm": 0.5369641184806824, "learning_rate": 0.0005888670576650346, "loss": 3.3792, "step": 5325 }, { "epoch": 0.26, "grad_norm": 0.550995409488678, "learning_rate": 0.0005888629019535828, "loss": 3.3031, "step": 5326 }, { "epoch": 0.26, "grad_norm": 0.5561549067497253, "learning_rate": 0.0005888587454813216, "loss": 3.3467, "step": 5327 }, { "epoch": 0.26, "grad_norm": 0.564005434513092, "learning_rate": 0.0005888545882482622, "loss": 3.262, "step": 5328 }, { "epoch": 0.26, "grad_norm": 0.5233489871025085, "learning_rate": 0.0005888504302544152, "loss": 3.1761, "step": 5329 }, { "epoch": 0.26, "grad_norm": 0.586366593837738, "learning_rate": 0.0005888462714997919, "loss": 3.2845, "step": 5330 }, { "epoch": 0.26, "grad_norm": 0.6343865990638733, "learning_rate": 0.000588842111984403, "loss": 3.5545, "step": 5331 }, { "epoch": 0.26, "grad_norm": 0.66603022813797, "learning_rate": 0.0005888379517082597, "loss": 3.4753, "step": 5332 }, { "epoch": 0.26, "grad_norm": 0.5965749621391296, "learning_rate": 0.0005888337906713728, "loss": 3.3929, "step": 5333 }, { "epoch": 0.26, "grad_norm": 0.5687329173088074, "learning_rate": 0.0005888296288737531, "loss": 3.4986, "step": 5334 }, { "epoch": 0.26, "grad_norm": 0.5994446277618408, "learning_rate": 0.0005888254663154119, "loss": 3.5974, "step": 5335 }, { "epoch": 0.26, "grad_norm": 0.5664075613021851, "learning_rate": 0.00058882130299636, "loss": 3.451, "step": 5336 }, { "epoch": 0.26, "grad_norm": 0.5647047758102417, "learning_rate": 0.0005888171389166083, "loss": 3.4053, "step": 5337 }, { "epoch": 0.26, "grad_norm": 0.5460997819900513, "learning_rate": 0.0005888129740761679, "loss": 3.2895, "step": 5338 }, { "epoch": 0.26, "grad_norm": 0.5719659328460693, "learning_rate": 0.0005888088084750497, "loss": 3.2872, "step": 5339 }, { "epoch": 0.26, "grad_norm": 0.5610400438308716, "learning_rate": 0.0005888046421132647, "loss": 3.3737, "step": 5340 }, { "epoch": 0.26, "grad_norm": 0.5502822995185852, "learning_rate": 0.0005888004749908239, "loss": 3.3927, "step": 5341 }, { "epoch": 0.26, "grad_norm": 0.6235296726226807, "learning_rate": 0.0005887963071077382, "loss": 3.4243, "step": 5342 }, { "epoch": 0.26, "grad_norm": 0.5775569081306458, "learning_rate": 0.0005887921384640186, "loss": 3.4377, "step": 5343 }, { "epoch": 0.26, "grad_norm": 0.5198445320129395, "learning_rate": 0.000588787969059676, "loss": 3.308, "step": 5344 }, { "epoch": 0.26, "grad_norm": 0.5459919571876526, "learning_rate": 0.0005887837988947216, "loss": 3.5653, "step": 5345 }, { "epoch": 0.26, "grad_norm": 0.5754595994949341, "learning_rate": 0.0005887796279691661, "loss": 3.226, "step": 5346 }, { "epoch": 0.26, "grad_norm": 0.5614638328552246, "learning_rate": 0.0005887754562830207, "loss": 3.4762, "step": 5347 }, { "epoch": 0.26, "grad_norm": 0.5696617960929871, "learning_rate": 0.0005887712838362963, "loss": 3.2895, "step": 5348 }, { "epoch": 0.26, "grad_norm": 0.529895544052124, "learning_rate": 0.000588767110629004, "loss": 3.3031, "step": 5349 }, { "epoch": 0.26, "grad_norm": 0.5466099977493286, "learning_rate": 0.0005887629366611547, "loss": 3.6197, "step": 5350 }, { "epoch": 0.26, "grad_norm": 0.6054186820983887, "learning_rate": 0.0005887587619327594, "loss": 3.3346, "step": 5351 }, { "epoch": 0.26, "grad_norm": 0.551434338092804, "learning_rate": 0.0005887545864438291, "loss": 3.6942, "step": 5352 }, { "epoch": 0.26, "grad_norm": 0.5595240592956543, "learning_rate": 0.0005887504101943746, "loss": 3.2481, "step": 5353 }, { "epoch": 0.26, "grad_norm": 0.5537325739860535, "learning_rate": 0.0005887462331844072, "loss": 3.3643, "step": 5354 }, { "epoch": 0.26, "grad_norm": 0.5237861275672913, "learning_rate": 0.0005887420554139379, "loss": 3.0849, "step": 5355 }, { "epoch": 0.26, "grad_norm": 0.5428269505500793, "learning_rate": 0.0005887378768829776, "loss": 3.6186, "step": 5356 }, { "epoch": 0.26, "grad_norm": 0.5731378793716431, "learning_rate": 0.0005887336975915372, "loss": 3.4173, "step": 5357 }, { "epoch": 0.26, "grad_norm": 0.5460600852966309, "learning_rate": 0.0005887295175396279, "loss": 3.294, "step": 5358 }, { "epoch": 0.26, "grad_norm": 0.5621720552444458, "learning_rate": 0.0005887253367272605, "loss": 3.19, "step": 5359 }, { "epoch": 0.26, "grad_norm": 0.5691437125205994, "learning_rate": 0.0005887211551544462, "loss": 3.346, "step": 5360 }, { "epoch": 0.26, "grad_norm": 0.6207178235054016, "learning_rate": 0.0005887169728211959, "loss": 3.4977, "step": 5361 }, { "epoch": 0.26, "grad_norm": 0.5653191804885864, "learning_rate": 0.0005887127897275206, "loss": 3.2567, "step": 5362 }, { "epoch": 0.26, "grad_norm": 0.6655686497688293, "learning_rate": 0.0005887086058734316, "loss": 3.4619, "step": 5363 }, { "epoch": 0.26, "grad_norm": 0.6186230182647705, "learning_rate": 0.0005887044212589395, "loss": 3.4998, "step": 5364 }, { "epoch": 0.26, "grad_norm": 0.5586534142494202, "learning_rate": 0.0005887002358840556, "loss": 3.4456, "step": 5365 }, { "epoch": 0.26, "grad_norm": 0.561849057674408, "learning_rate": 0.0005886960497487908, "loss": 3.335, "step": 5366 }, { "epoch": 0.26, "grad_norm": 0.5541581511497498, "learning_rate": 0.0005886918628531562, "loss": 3.2474, "step": 5367 }, { "epoch": 0.26, "grad_norm": 0.5545457005500793, "learning_rate": 0.0005886876751971628, "loss": 3.3815, "step": 5368 }, { "epoch": 0.26, "grad_norm": 0.5654429197311401, "learning_rate": 0.0005886834867808215, "loss": 3.2011, "step": 5369 }, { "epoch": 0.26, "grad_norm": 0.635900616645813, "learning_rate": 0.0005886792976041436, "loss": 3.4373, "step": 5370 }, { "epoch": 0.26, "grad_norm": 0.5833329558372498, "learning_rate": 0.0005886751076671399, "loss": 3.2561, "step": 5371 }, { "epoch": 0.26, "grad_norm": 0.565444827079773, "learning_rate": 0.0005886709169698216, "loss": 3.3935, "step": 5372 }, { "epoch": 0.26, "grad_norm": 0.5286258459091187, "learning_rate": 0.0005886667255121995, "loss": 3.5707, "step": 5373 }, { "epoch": 0.26, "grad_norm": 0.5440570712089539, "learning_rate": 0.000588662533294285, "loss": 3.311, "step": 5374 }, { "epoch": 0.26, "grad_norm": 0.6369450092315674, "learning_rate": 0.0005886583403160889, "loss": 3.6501, "step": 5375 }, { "epoch": 0.26, "grad_norm": 0.5417674779891968, "learning_rate": 0.0005886541465776223, "loss": 3.288, "step": 5376 }, { "epoch": 0.26, "grad_norm": 0.6063926815986633, "learning_rate": 0.0005886499520788961, "loss": 3.2348, "step": 5377 }, { "epoch": 0.26, "grad_norm": 0.5412531495094299, "learning_rate": 0.0005886457568199216, "loss": 3.6838, "step": 5378 }, { "epoch": 0.26, "grad_norm": 0.5758473873138428, "learning_rate": 0.0005886415608007096, "loss": 3.5398, "step": 5379 }, { "epoch": 0.26, "grad_norm": 0.577717661857605, "learning_rate": 0.0005886373640212714, "loss": 3.4554, "step": 5380 }, { "epoch": 0.26, "grad_norm": 0.5586448907852173, "learning_rate": 0.000588633166481618, "loss": 3.5677, "step": 5381 }, { "epoch": 0.26, "grad_norm": 0.574151873588562, "learning_rate": 0.0005886289681817601, "loss": 3.3092, "step": 5382 }, { "epoch": 0.26, "grad_norm": 0.5311241149902344, "learning_rate": 0.0005886247691217093, "loss": 3.5122, "step": 5383 }, { "epoch": 0.26, "grad_norm": 0.5521003007888794, "learning_rate": 0.0005886205693014764, "loss": 3.4751, "step": 5384 }, { "epoch": 0.26, "grad_norm": 0.5857923030853271, "learning_rate": 0.0005886163687210724, "loss": 3.531, "step": 5385 }, { "epoch": 0.26, "grad_norm": 0.5671178102493286, "learning_rate": 0.0005886121673805083, "loss": 3.3799, "step": 5386 }, { "epoch": 0.26, "grad_norm": 0.523662805557251, "learning_rate": 0.0005886079652797954, "loss": 3.6136, "step": 5387 }, { "epoch": 0.26, "grad_norm": 0.5966594219207764, "learning_rate": 0.0005886037624189448, "loss": 3.1987, "step": 5388 }, { "epoch": 0.26, "grad_norm": 0.5404354929924011, "learning_rate": 0.0005885995587979672, "loss": 3.469, "step": 5389 }, { "epoch": 0.26, "grad_norm": 0.565900981426239, "learning_rate": 0.0005885953544168741, "loss": 3.4291, "step": 5390 }, { "epoch": 0.26, "grad_norm": 0.5991320610046387, "learning_rate": 0.0005885911492756763, "loss": 3.4353, "step": 5391 }, { "epoch": 0.26, "grad_norm": 0.5885935425758362, "learning_rate": 0.0005885869433743849, "loss": 3.3665, "step": 5392 }, { "epoch": 0.26, "grad_norm": 0.5500056743621826, "learning_rate": 0.0005885827367130112, "loss": 3.3849, "step": 5393 }, { "epoch": 0.26, "grad_norm": 0.6144444942474365, "learning_rate": 0.000588578529291566, "loss": 3.4082, "step": 5394 }, { "epoch": 0.26, "grad_norm": 0.5883260369300842, "learning_rate": 0.0005885743211100603, "loss": 3.5509, "step": 5395 }, { "epoch": 0.26, "grad_norm": 0.5883315205574036, "learning_rate": 0.0005885701121685057, "loss": 3.4782, "step": 5396 }, { "epoch": 0.26, "grad_norm": 0.5557137131690979, "learning_rate": 0.0005885659024669128, "loss": 3.3478, "step": 5397 }, { "epoch": 0.26, "grad_norm": 0.5621869564056396, "learning_rate": 0.0005885616920052928, "loss": 3.2406, "step": 5398 }, { "epoch": 0.26, "grad_norm": 0.6264198422431946, "learning_rate": 0.000588557480783657, "loss": 3.2804, "step": 5399 }, { "epoch": 0.26, "grad_norm": 0.543046236038208, "learning_rate": 0.0005885532688020163, "loss": 3.3372, "step": 5400 }, { "epoch": 0.26, "grad_norm": 0.5603328943252563, "learning_rate": 0.0005885490560603818, "loss": 3.2055, "step": 5401 }, { "epoch": 0.26, "grad_norm": 0.5257130861282349, "learning_rate": 0.0005885448425587645, "loss": 3.3474, "step": 5402 }, { "epoch": 0.26, "grad_norm": 0.5107335448265076, "learning_rate": 0.0005885406282971756, "loss": 3.5721, "step": 5403 }, { "epoch": 0.26, "grad_norm": 0.5753149390220642, "learning_rate": 0.0005885364132756264, "loss": 3.4103, "step": 5404 }, { "epoch": 0.26, "grad_norm": 0.5694027543067932, "learning_rate": 0.0005885321974941277, "loss": 3.4066, "step": 5405 }, { "epoch": 0.26, "grad_norm": 0.5609138607978821, "learning_rate": 0.0005885279809526908, "loss": 3.3846, "step": 5406 }, { "epoch": 0.26, "grad_norm": 0.5206153988838196, "learning_rate": 0.0005885237636513267, "loss": 3.5455, "step": 5407 }, { "epoch": 0.27, "grad_norm": 0.6045171022415161, "learning_rate": 0.0005885195455900465, "loss": 3.2741, "step": 5408 }, { "epoch": 0.27, "grad_norm": 0.565991997718811, "learning_rate": 0.0005885153267688613, "loss": 3.5252, "step": 5409 }, { "epoch": 0.27, "grad_norm": 0.5464850664138794, "learning_rate": 0.0005885111071877823, "loss": 3.2544, "step": 5410 }, { "epoch": 0.27, "grad_norm": 0.567910373210907, "learning_rate": 0.0005885068868468206, "loss": 3.5726, "step": 5411 }, { "epoch": 0.27, "grad_norm": 0.5260792374610901, "learning_rate": 0.0005885026657459873, "loss": 3.3725, "step": 5412 }, { "epoch": 0.27, "grad_norm": 0.5899006128311157, "learning_rate": 0.0005884984438852934, "loss": 3.2411, "step": 5413 }, { "epoch": 0.27, "grad_norm": 0.6080503463745117, "learning_rate": 0.0005884942212647502, "loss": 3.32, "step": 5414 }, { "epoch": 0.27, "grad_norm": 0.5826466083526611, "learning_rate": 0.0005884899978843688, "loss": 3.2586, "step": 5415 }, { "epoch": 0.27, "grad_norm": 0.5410050749778748, "learning_rate": 0.0005884857737441601, "loss": 3.3174, "step": 5416 }, { "epoch": 0.27, "grad_norm": 0.5297449827194214, "learning_rate": 0.0005884815488441356, "loss": 3.3025, "step": 5417 }, { "epoch": 0.27, "grad_norm": 0.5570391416549683, "learning_rate": 0.0005884773231843062, "loss": 3.5041, "step": 5418 }, { "epoch": 0.27, "grad_norm": 0.5632867217063904, "learning_rate": 0.0005884730967646828, "loss": 3.4786, "step": 5419 }, { "epoch": 0.27, "grad_norm": 0.5535479784011841, "learning_rate": 0.0005884688695852769, "loss": 3.2442, "step": 5420 }, { "epoch": 0.27, "grad_norm": 0.5231944918632507, "learning_rate": 0.0005884646416460997, "loss": 3.5603, "step": 5421 }, { "epoch": 0.27, "grad_norm": 0.5422855019569397, "learning_rate": 0.000588460412947162, "loss": 3.2527, "step": 5422 }, { "epoch": 0.27, "grad_norm": 0.5713430643081665, "learning_rate": 0.000588456183488475, "loss": 3.5105, "step": 5423 }, { "epoch": 0.27, "grad_norm": 0.568607747554779, "learning_rate": 0.0005884519532700501, "loss": 3.363, "step": 5424 }, { "epoch": 0.27, "grad_norm": 0.5918583273887634, "learning_rate": 0.0005884477222918981, "loss": 3.3211, "step": 5425 }, { "epoch": 0.27, "grad_norm": 0.6431357264518738, "learning_rate": 0.0005884434905540305, "loss": 3.2004, "step": 5426 }, { "epoch": 0.27, "grad_norm": 0.5770518183708191, "learning_rate": 0.0005884392580564581, "loss": 3.4413, "step": 5427 }, { "epoch": 0.27, "grad_norm": 0.6105692982673645, "learning_rate": 0.0005884350247991923, "loss": 3.4229, "step": 5428 }, { "epoch": 0.27, "grad_norm": 0.5172119736671448, "learning_rate": 0.0005884307907822441, "loss": 3.3484, "step": 5429 }, { "epoch": 0.27, "grad_norm": 0.6026398539543152, "learning_rate": 0.0005884265560056246, "loss": 3.2793, "step": 5430 }, { "epoch": 0.27, "grad_norm": 0.5257851481437683, "learning_rate": 0.0005884223204693453, "loss": 3.324, "step": 5431 }, { "epoch": 0.27, "grad_norm": 0.5769349932670593, "learning_rate": 0.000588418084173417, "loss": 3.3327, "step": 5432 }, { "epoch": 0.27, "grad_norm": 0.5765005350112915, "learning_rate": 0.000588413847117851, "loss": 3.557, "step": 5433 }, { "epoch": 0.27, "grad_norm": 0.5662025213241577, "learning_rate": 0.0005884096093026584, "loss": 3.4275, "step": 5434 }, { "epoch": 0.27, "grad_norm": 0.5836684107780457, "learning_rate": 0.0005884053707278504, "loss": 3.4793, "step": 5435 }, { "epoch": 0.27, "grad_norm": 0.5512568354606628, "learning_rate": 0.0005884011313934381, "loss": 3.2952, "step": 5436 }, { "epoch": 0.27, "grad_norm": 0.5661933422088623, "learning_rate": 0.0005883968912994328, "loss": 3.4282, "step": 5437 }, { "epoch": 0.27, "grad_norm": 0.5790491700172424, "learning_rate": 0.0005883926504458456, "loss": 3.3611, "step": 5438 }, { "epoch": 0.27, "grad_norm": 0.5423797369003296, "learning_rate": 0.0005883884088326877, "loss": 3.3749, "step": 5439 }, { "epoch": 0.27, "grad_norm": 0.5996915698051453, "learning_rate": 0.0005883841664599701, "loss": 3.1857, "step": 5440 }, { "epoch": 0.27, "grad_norm": 0.5513800382614136, "learning_rate": 0.0005883799233277042, "loss": 3.2941, "step": 5441 }, { "epoch": 0.27, "grad_norm": 0.5614347457885742, "learning_rate": 0.000588375679435901, "loss": 3.4229, "step": 5442 }, { "epoch": 0.27, "grad_norm": 0.5885158777236938, "learning_rate": 0.0005883714347845718, "loss": 3.4624, "step": 5443 }, { "epoch": 0.27, "grad_norm": 0.5393982529640198, "learning_rate": 0.0005883671893737279, "loss": 3.2643, "step": 5444 }, { "epoch": 0.27, "grad_norm": 0.5851886868476868, "learning_rate": 0.0005883629432033801, "loss": 3.4327, "step": 5445 }, { "epoch": 0.27, "grad_norm": 0.5609333515167236, "learning_rate": 0.0005883586962735399, "loss": 3.2944, "step": 5446 }, { "epoch": 0.27, "grad_norm": 0.5521209239959717, "learning_rate": 0.0005883544485842183, "loss": 3.3134, "step": 5447 }, { "epoch": 0.27, "grad_norm": 0.5714461803436279, "learning_rate": 0.0005883502001354267, "loss": 3.5679, "step": 5448 }, { "epoch": 0.27, "grad_norm": 0.5831002593040466, "learning_rate": 0.0005883459509271762, "loss": 3.2632, "step": 5449 }, { "epoch": 0.27, "grad_norm": 0.5336947441101074, "learning_rate": 0.0005883417009594778, "loss": 3.3042, "step": 5450 }, { "epoch": 0.27, "grad_norm": 0.575208306312561, "learning_rate": 0.0005883374502323429, "loss": 3.3451, "step": 5451 }, { "epoch": 0.27, "grad_norm": 0.5348706841468811, "learning_rate": 0.0005883331987457827, "loss": 3.4311, "step": 5452 }, { "epoch": 0.27, "grad_norm": 0.5652807950973511, "learning_rate": 0.0005883289464998083, "loss": 3.4136, "step": 5453 }, { "epoch": 0.27, "grad_norm": 0.6097990870475769, "learning_rate": 0.000588324693494431, "loss": 3.3811, "step": 5454 }, { "epoch": 0.27, "grad_norm": 0.5866896510124207, "learning_rate": 0.0005883204397296619, "loss": 3.2683, "step": 5455 }, { "epoch": 0.27, "grad_norm": 0.5691618323326111, "learning_rate": 0.0005883161852055122, "loss": 3.3863, "step": 5456 }, { "epoch": 0.27, "grad_norm": 0.6082558035850525, "learning_rate": 0.0005883119299219932, "loss": 3.3178, "step": 5457 }, { "epoch": 0.27, "grad_norm": 0.5235224962234497, "learning_rate": 0.0005883076738791161, "loss": 3.5059, "step": 5458 }, { "epoch": 0.27, "grad_norm": 0.537198007106781, "learning_rate": 0.0005883034170768921, "loss": 3.3683, "step": 5459 }, { "epoch": 0.27, "grad_norm": 0.5196539163589478, "learning_rate": 0.0005882991595153323, "loss": 3.3288, "step": 5460 }, { "epoch": 0.27, "grad_norm": 0.5324885249137878, "learning_rate": 0.0005882949011944481, "loss": 3.3257, "step": 5461 }, { "epoch": 0.27, "grad_norm": 0.5442826747894287, "learning_rate": 0.0005882906421142505, "loss": 3.3675, "step": 5462 }, { "epoch": 0.27, "grad_norm": 0.5731666684150696, "learning_rate": 0.0005882863822747508, "loss": 3.4277, "step": 5463 }, { "epoch": 0.27, "grad_norm": 0.598007321357727, "learning_rate": 0.0005882821216759604, "loss": 3.4906, "step": 5464 }, { "epoch": 0.27, "grad_norm": 0.5194315910339355, "learning_rate": 0.0005882778603178903, "loss": 3.2031, "step": 5465 }, { "epoch": 0.27, "grad_norm": 0.5207369327545166, "learning_rate": 0.0005882735982005519, "loss": 3.2482, "step": 5466 }, { "epoch": 0.27, "grad_norm": 0.5437124371528625, "learning_rate": 0.0005882693353239562, "loss": 3.5863, "step": 5467 }, { "epoch": 0.27, "grad_norm": 0.5464715957641602, "learning_rate": 0.0005882650716881146, "loss": 3.4643, "step": 5468 }, { "epoch": 0.27, "grad_norm": 0.5588339567184448, "learning_rate": 0.0005882608072930381, "loss": 3.3967, "step": 5469 }, { "epoch": 0.27, "grad_norm": 0.5152556896209717, "learning_rate": 0.0005882565421387383, "loss": 3.324, "step": 5470 }, { "epoch": 0.27, "grad_norm": 0.5201581120491028, "learning_rate": 0.0005882522762252262, "loss": 3.1271, "step": 5471 }, { "epoch": 0.27, "grad_norm": 0.5199649930000305, "learning_rate": 0.0005882480095525132, "loss": 3.491, "step": 5472 }, { "epoch": 0.27, "grad_norm": 0.5226982235908508, "learning_rate": 0.0005882437421206102, "loss": 3.4153, "step": 5473 }, { "epoch": 0.27, "grad_norm": 0.5411034226417542, "learning_rate": 0.0005882394739295287, "loss": 3.4042, "step": 5474 }, { "epoch": 0.27, "grad_norm": 0.554895281791687, "learning_rate": 0.0005882352049792801, "loss": 3.384, "step": 5475 }, { "epoch": 0.27, "grad_norm": 0.538006603717804, "learning_rate": 0.0005882309352698752, "loss": 3.4293, "step": 5476 }, { "epoch": 0.27, "grad_norm": 0.5578272342681885, "learning_rate": 0.0005882266648013256, "loss": 3.4783, "step": 5477 }, { "epoch": 0.27, "grad_norm": 0.5473056435585022, "learning_rate": 0.0005882223935736424, "loss": 3.5646, "step": 5478 }, { "epoch": 0.27, "grad_norm": 0.5610681176185608, "learning_rate": 0.0005882181215868369, "loss": 3.5254, "step": 5479 }, { "epoch": 0.27, "grad_norm": 0.5292629599571228, "learning_rate": 0.0005882138488409204, "loss": 3.376, "step": 5480 }, { "epoch": 0.27, "grad_norm": 0.5061684846878052, "learning_rate": 0.000588209575335904, "loss": 3.5303, "step": 5481 }, { "epoch": 0.27, "grad_norm": 0.5533640384674072, "learning_rate": 0.0005882053010717991, "loss": 3.4842, "step": 5482 }, { "epoch": 0.27, "grad_norm": 0.5831032991409302, "learning_rate": 0.0005882010260486169, "loss": 3.5035, "step": 5483 }, { "epoch": 0.27, "grad_norm": 0.5656222701072693, "learning_rate": 0.0005881967502663687, "loss": 3.5032, "step": 5484 }, { "epoch": 0.27, "grad_norm": 0.6056988835334778, "learning_rate": 0.0005881924737250655, "loss": 3.5061, "step": 5485 }, { "epoch": 0.27, "grad_norm": 0.5542979836463928, "learning_rate": 0.0005881881964247191, "loss": 3.4491, "step": 5486 }, { "epoch": 0.27, "grad_norm": 0.5534904599189758, "learning_rate": 0.0005881839183653402, "loss": 3.4249, "step": 5487 }, { "epoch": 0.27, "grad_norm": 0.5802499651908875, "learning_rate": 0.0005881796395469406, "loss": 3.242, "step": 5488 }, { "epoch": 0.27, "grad_norm": 0.6037202477455139, "learning_rate": 0.000588175359969531, "loss": 3.4127, "step": 5489 }, { "epoch": 0.27, "grad_norm": 0.5480342507362366, "learning_rate": 0.0005881710796331231, "loss": 3.2675, "step": 5490 }, { "epoch": 0.27, "grad_norm": 0.5399625301361084, "learning_rate": 0.000588166798537728, "loss": 3.4004, "step": 5491 }, { "epoch": 0.27, "grad_norm": 0.5208690166473389, "learning_rate": 0.000588162516683357, "loss": 3.2553, "step": 5492 }, { "epoch": 0.27, "grad_norm": 0.5354910492897034, "learning_rate": 0.0005881582340700215, "loss": 3.2685, "step": 5493 }, { "epoch": 0.27, "grad_norm": 0.6398562788963318, "learning_rate": 0.0005881539506977326, "loss": 3.7163, "step": 5494 }, { "epoch": 0.27, "grad_norm": 0.6458652019500732, "learning_rate": 0.0005881496665665016, "loss": 3.4954, "step": 5495 }, { "epoch": 0.27, "grad_norm": 0.5615726113319397, "learning_rate": 0.0005881453816763398, "loss": 3.3978, "step": 5496 }, { "epoch": 0.27, "grad_norm": 0.5364488959312439, "learning_rate": 0.0005881410960272587, "loss": 3.3949, "step": 5497 }, { "epoch": 0.27, "grad_norm": 0.5817398428916931, "learning_rate": 0.0005881368096192693, "loss": 3.3621, "step": 5498 }, { "epoch": 0.27, "grad_norm": 0.8163599371910095, "learning_rate": 0.000588132522452383, "loss": 3.6545, "step": 5499 }, { "epoch": 0.27, "grad_norm": 0.5170142650604248, "learning_rate": 0.0005881282345266111, "loss": 3.4511, "step": 5500 }, { "epoch": 0.27, "grad_norm": 0.5624502897262573, "learning_rate": 0.0005881239458419648, "loss": 3.3277, "step": 5501 }, { "epoch": 0.27, "grad_norm": 0.5259974002838135, "learning_rate": 0.0005881196563984555, "loss": 3.3884, "step": 5502 }, { "epoch": 0.27, "grad_norm": 0.5127625465393066, "learning_rate": 0.0005881153661960946, "loss": 3.4118, "step": 5503 }, { "epoch": 0.27, "grad_norm": 0.5912742018699646, "learning_rate": 0.0005881110752348931, "loss": 3.5946, "step": 5504 }, { "epoch": 0.27, "grad_norm": 0.5364376902580261, "learning_rate": 0.0005881067835148626, "loss": 3.4842, "step": 5505 }, { "epoch": 0.27, "grad_norm": 0.5497884154319763, "learning_rate": 0.0005881024910360143, "loss": 3.3788, "step": 5506 }, { "epoch": 0.27, "grad_norm": 0.5624418258666992, "learning_rate": 0.0005880981977983594, "loss": 3.1812, "step": 5507 }, { "epoch": 0.27, "grad_norm": 0.548305332660675, "learning_rate": 0.0005880939038019093, "loss": 3.2861, "step": 5508 }, { "epoch": 0.27, "grad_norm": 0.6290483474731445, "learning_rate": 0.0005880896090466754, "loss": 3.2389, "step": 5509 }, { "epoch": 0.27, "grad_norm": 0.6057081818580627, "learning_rate": 0.0005880853135326688, "loss": 3.102, "step": 5510 }, { "epoch": 0.27, "grad_norm": 0.567807674407959, "learning_rate": 0.0005880810172599009, "loss": 3.3086, "step": 5511 }, { "epoch": 0.27, "grad_norm": 0.5722109079360962, "learning_rate": 0.0005880767202283832, "loss": 3.2491, "step": 5512 }, { "epoch": 0.27, "grad_norm": 0.5263040661811829, "learning_rate": 0.0005880724224381267, "loss": 3.4022, "step": 5513 }, { "epoch": 0.27, "grad_norm": 0.5402182340621948, "learning_rate": 0.0005880681238891429, "loss": 3.6702, "step": 5514 }, { "epoch": 0.27, "grad_norm": 0.5795789361000061, "learning_rate": 0.0005880638245814433, "loss": 3.3956, "step": 5515 }, { "epoch": 0.27, "grad_norm": 0.6076276302337646, "learning_rate": 0.0005880595245150388, "loss": 3.3292, "step": 5516 }, { "epoch": 0.27, "grad_norm": 0.5449411869049072, "learning_rate": 0.0005880552236899411, "loss": 3.4199, "step": 5517 }, { "epoch": 0.27, "grad_norm": 0.5408446192741394, "learning_rate": 0.0005880509221061613, "loss": 3.5047, "step": 5518 }, { "epoch": 0.27, "grad_norm": 0.548406183719635, "learning_rate": 0.0005880466197637108, "loss": 3.639, "step": 5519 }, { "epoch": 0.27, "grad_norm": 0.6209466457366943, "learning_rate": 0.000588042316662601, "loss": 3.4597, "step": 5520 }, { "epoch": 0.27, "grad_norm": 0.5157639980316162, "learning_rate": 0.0005880380128028431, "loss": 3.3713, "step": 5521 }, { "epoch": 0.27, "grad_norm": 0.5012747645378113, "learning_rate": 0.0005880337081844485, "loss": 3.3632, "step": 5522 }, { "epoch": 0.27, "grad_norm": 0.5477715134620667, "learning_rate": 0.0005880294028074286, "loss": 3.4232, "step": 5523 }, { "epoch": 0.27, "grad_norm": 0.6210871338844299, "learning_rate": 0.0005880250966717946, "loss": 3.2222, "step": 5524 }, { "epoch": 0.27, "grad_norm": 0.577610433101654, "learning_rate": 0.0005880207897775581, "loss": 3.5465, "step": 5525 }, { "epoch": 0.27, "grad_norm": 0.5438306331634521, "learning_rate": 0.0005880164821247301, "loss": 3.2486, "step": 5526 }, { "epoch": 0.27, "grad_norm": 0.5276567935943604, "learning_rate": 0.0005880121737133221, "loss": 3.3924, "step": 5527 }, { "epoch": 0.27, "grad_norm": 0.5347236394882202, "learning_rate": 0.0005880078645433456, "loss": 3.1702, "step": 5528 }, { "epoch": 0.27, "grad_norm": 0.532122015953064, "learning_rate": 0.0005880035546148118, "loss": 3.6436, "step": 5529 }, { "epoch": 0.27, "grad_norm": 0.6177424788475037, "learning_rate": 0.000587999243927732, "loss": 3.4205, "step": 5530 }, { "epoch": 0.27, "grad_norm": 0.5172913670539856, "learning_rate": 0.0005879949324821177, "loss": 3.4277, "step": 5531 }, { "epoch": 0.27, "grad_norm": 0.5155248641967773, "learning_rate": 0.0005879906202779801, "loss": 3.3915, "step": 5532 }, { "epoch": 0.27, "grad_norm": 0.5313438177108765, "learning_rate": 0.0005879863073153306, "loss": 3.1238, "step": 5533 }, { "epoch": 0.27, "grad_norm": 0.5252282619476318, "learning_rate": 0.0005879819935941806, "loss": 3.5629, "step": 5534 }, { "epoch": 0.27, "grad_norm": 0.5261170268058777, "learning_rate": 0.0005879776791145416, "loss": 3.6691, "step": 5535 }, { "epoch": 0.27, "grad_norm": 0.5184322595596313, "learning_rate": 0.0005879733638764246, "loss": 3.5832, "step": 5536 }, { "epoch": 0.27, "grad_norm": 0.5469010472297668, "learning_rate": 0.0005879690478798413, "loss": 3.4247, "step": 5537 }, { "epoch": 0.27, "grad_norm": 0.5656020045280457, "learning_rate": 0.0005879647311248029, "loss": 3.3261, "step": 5538 }, { "epoch": 0.27, "grad_norm": 0.526353657245636, "learning_rate": 0.0005879604136113209, "loss": 3.4339, "step": 5539 }, { "epoch": 0.27, "grad_norm": 0.5460639595985413, "learning_rate": 0.0005879560953394066, "loss": 3.2131, "step": 5540 }, { "epoch": 0.27, "grad_norm": 0.5069341659545898, "learning_rate": 0.0005879517763090712, "loss": 3.4322, "step": 5541 }, { "epoch": 0.27, "grad_norm": 0.5094472169876099, "learning_rate": 0.0005879474565203263, "loss": 3.36, "step": 5542 }, { "epoch": 0.27, "grad_norm": 0.520026445388794, "learning_rate": 0.0005879431359731833, "loss": 3.4691, "step": 5543 }, { "epoch": 0.27, "grad_norm": 0.5083997249603271, "learning_rate": 0.0005879388146676535, "loss": 3.3143, "step": 5544 }, { "epoch": 0.27, "grad_norm": 0.5479220747947693, "learning_rate": 0.0005879344926037482, "loss": 3.5481, "step": 5545 }, { "epoch": 0.27, "grad_norm": 0.561329185962677, "learning_rate": 0.0005879301697814789, "loss": 3.4544, "step": 5546 }, { "epoch": 0.27, "grad_norm": 0.5535502433776855, "learning_rate": 0.000587925846200857, "loss": 3.5186, "step": 5547 }, { "epoch": 0.27, "grad_norm": 0.5314511656761169, "learning_rate": 0.0005879215218618937, "loss": 3.5539, "step": 5548 }, { "epoch": 0.27, "grad_norm": 0.4937261641025543, "learning_rate": 0.0005879171967646006, "loss": 3.5703, "step": 5549 }, { "epoch": 0.27, "grad_norm": 0.5664466619491577, "learning_rate": 0.0005879128709089889, "loss": 3.454, "step": 5550 }, { "epoch": 0.27, "grad_norm": 0.513667106628418, "learning_rate": 0.0005879085442950703, "loss": 3.2555, "step": 5551 }, { "epoch": 0.27, "grad_norm": 0.5058261156082153, "learning_rate": 0.000587904216922856, "loss": 3.5081, "step": 5552 }, { "epoch": 0.27, "grad_norm": 0.5401785373687744, "learning_rate": 0.0005878998887923572, "loss": 3.4128, "step": 5553 }, { "epoch": 0.27, "grad_norm": 0.5664740800857544, "learning_rate": 0.0005878955599035857, "loss": 3.5442, "step": 5554 }, { "epoch": 0.27, "grad_norm": 0.5653204321861267, "learning_rate": 0.0005878912302565526, "loss": 3.4864, "step": 5555 }, { "epoch": 0.27, "grad_norm": 0.5319250822067261, "learning_rate": 0.0005878868998512694, "loss": 3.5001, "step": 5556 }, { "epoch": 0.27, "grad_norm": 0.5475034713745117, "learning_rate": 0.0005878825686877476, "loss": 3.4573, "step": 5557 }, { "epoch": 0.27, "grad_norm": 0.5714825391769409, "learning_rate": 0.0005878782367659984, "loss": 3.3223, "step": 5558 }, { "epoch": 0.27, "grad_norm": 0.5734561681747437, "learning_rate": 0.0005878739040860334, "loss": 3.3125, "step": 5559 }, { "epoch": 0.27, "grad_norm": 0.5484195947647095, "learning_rate": 0.0005878695706478639, "loss": 3.4178, "step": 5560 }, { "epoch": 0.27, "grad_norm": 0.5906480550765991, "learning_rate": 0.0005878652364515014, "loss": 3.4166, "step": 5561 }, { "epoch": 0.27, "grad_norm": 0.5374751091003418, "learning_rate": 0.0005878609014969572, "loss": 3.4265, "step": 5562 }, { "epoch": 0.27, "grad_norm": 0.5320459008216858, "learning_rate": 0.0005878565657842428, "loss": 3.3379, "step": 5563 }, { "epoch": 0.27, "grad_norm": 0.5613856911659241, "learning_rate": 0.0005878522293133697, "loss": 3.3711, "step": 5564 }, { "epoch": 0.27, "grad_norm": 0.5332615971565247, "learning_rate": 0.0005878478920843492, "loss": 3.4358, "step": 5565 }, { "epoch": 0.27, "grad_norm": 0.5565606951713562, "learning_rate": 0.0005878435540971926, "loss": 3.3137, "step": 5566 }, { "epoch": 0.27, "grad_norm": 0.5280033946037292, "learning_rate": 0.0005878392153519117, "loss": 3.4746, "step": 5567 }, { "epoch": 0.27, "grad_norm": 0.5665175914764404, "learning_rate": 0.0005878348758485176, "loss": 3.4319, "step": 5568 }, { "epoch": 0.27, "grad_norm": 0.5359581708908081, "learning_rate": 0.0005878305355870218, "loss": 3.4903, "step": 5569 }, { "epoch": 0.27, "grad_norm": 0.5408956408500671, "learning_rate": 0.0005878261945674358, "loss": 3.4426, "step": 5570 }, { "epoch": 0.27, "grad_norm": 0.5556782484054565, "learning_rate": 0.0005878218527897709, "loss": 3.4188, "step": 5571 }, { "epoch": 0.27, "grad_norm": 0.549187183380127, "learning_rate": 0.0005878175102540387, "loss": 3.4785, "step": 5572 }, { "epoch": 0.27, "grad_norm": 0.5007501244544983, "learning_rate": 0.0005878131669602506, "loss": 3.254, "step": 5573 }, { "epoch": 0.27, "grad_norm": 0.5330648422241211, "learning_rate": 0.000587808822908418, "loss": 3.5062, "step": 5574 }, { "epoch": 0.27, "grad_norm": 0.5661302208900452, "learning_rate": 0.0005878044780985523, "loss": 3.555, "step": 5575 }, { "epoch": 0.27, "grad_norm": 0.5426991581916809, "learning_rate": 0.000587800132530665, "loss": 3.7042, "step": 5576 }, { "epoch": 0.27, "grad_norm": 0.5796633958816528, "learning_rate": 0.0005877957862047676, "loss": 3.3673, "step": 5577 }, { "epoch": 0.27, "grad_norm": 0.5920323133468628, "learning_rate": 0.0005877914391208716, "loss": 3.3901, "step": 5578 }, { "epoch": 0.27, "grad_norm": 0.5278183817863464, "learning_rate": 0.0005877870912789882, "loss": 3.4738, "step": 5579 }, { "epoch": 0.27, "grad_norm": 0.506721556186676, "learning_rate": 0.0005877827426791289, "loss": 3.2758, "step": 5580 }, { "epoch": 0.27, "grad_norm": 0.5367504954338074, "learning_rate": 0.0005877783933213054, "loss": 3.3765, "step": 5581 }, { "epoch": 0.27, "grad_norm": 0.5439772009849548, "learning_rate": 0.0005877740432055288, "loss": 3.6383, "step": 5582 }, { "epoch": 0.27, "grad_norm": 0.5529850721359253, "learning_rate": 0.0005877696923318109, "loss": 3.3865, "step": 5583 }, { "epoch": 0.27, "grad_norm": 0.5937544703483582, "learning_rate": 0.000587765340700163, "loss": 3.6044, "step": 5584 }, { "epoch": 0.27, "grad_norm": 0.5193443894386292, "learning_rate": 0.0005877609883105966, "loss": 3.2867, "step": 5585 }, { "epoch": 0.27, "grad_norm": 0.5450002551078796, "learning_rate": 0.0005877566351631231, "loss": 3.4058, "step": 5586 }, { "epoch": 0.27, "grad_norm": 0.5292603373527527, "learning_rate": 0.0005877522812577539, "loss": 3.5574, "step": 5587 }, { "epoch": 0.27, "grad_norm": 0.5539287328720093, "learning_rate": 0.0005877479265945008, "loss": 3.3719, "step": 5588 }, { "epoch": 0.27, "grad_norm": 0.5461103320121765, "learning_rate": 0.0005877435711733748, "loss": 3.3806, "step": 5589 }, { "epoch": 0.27, "grad_norm": 0.5659968256950378, "learning_rate": 0.0005877392149943877, "loss": 3.3854, "step": 5590 }, { "epoch": 0.27, "grad_norm": 0.6119036674499512, "learning_rate": 0.0005877348580575509, "loss": 3.2403, "step": 5591 }, { "epoch": 0.27, "grad_norm": 0.5469770431518555, "learning_rate": 0.000587730500362876, "loss": 3.428, "step": 5592 }, { "epoch": 0.27, "grad_norm": 0.5068824887275696, "learning_rate": 0.0005877261419103741, "loss": 3.5666, "step": 5593 }, { "epoch": 0.27, "grad_norm": 0.5368444919586182, "learning_rate": 0.000587721782700057, "loss": 3.2824, "step": 5594 }, { "epoch": 0.27, "grad_norm": 0.5606796145439148, "learning_rate": 0.0005877174227319362, "loss": 3.2626, "step": 5595 }, { "epoch": 0.27, "grad_norm": 0.5552815794944763, "learning_rate": 0.0005877130620060229, "loss": 3.2393, "step": 5596 }, { "epoch": 0.27, "grad_norm": 0.591176450252533, "learning_rate": 0.000587708700522329, "loss": 3.339, "step": 5597 }, { "epoch": 0.27, "grad_norm": 0.5659617781639099, "learning_rate": 0.0005877043382808655, "loss": 3.582, "step": 5598 }, { "epoch": 0.27, "grad_norm": 0.5646779537200928, "learning_rate": 0.0005876999752816443, "loss": 3.5119, "step": 5599 }, { "epoch": 0.27, "grad_norm": 0.5375205278396606, "learning_rate": 0.0005876956115246767, "loss": 3.39, "step": 5600 }, { "epoch": 0.27, "grad_norm": 0.5745865106582642, "learning_rate": 0.0005876912470099742, "loss": 3.515, "step": 5601 }, { "epoch": 0.27, "grad_norm": 0.6667315363883972, "learning_rate": 0.0005876868817375483, "loss": 3.2686, "step": 5602 }, { "epoch": 0.27, "grad_norm": 0.5387046933174133, "learning_rate": 0.0005876825157074107, "loss": 3.3706, "step": 5603 }, { "epoch": 0.27, "grad_norm": 0.5582010746002197, "learning_rate": 0.0005876781489195725, "loss": 3.5323, "step": 5604 }, { "epoch": 0.27, "grad_norm": 0.5929326415061951, "learning_rate": 0.0005876737813740456, "loss": 3.3948, "step": 5605 }, { "epoch": 0.27, "grad_norm": 0.5750752091407776, "learning_rate": 0.0005876694130708412, "loss": 3.2972, "step": 5606 }, { "epoch": 0.27, "grad_norm": 0.5479739904403687, "learning_rate": 0.0005876650440099709, "loss": 3.5172, "step": 5607 }, { "epoch": 0.27, "grad_norm": 0.5554889440536499, "learning_rate": 0.0005876606741914462, "loss": 3.399, "step": 5608 }, { "epoch": 0.27, "grad_norm": 0.5355261564254761, "learning_rate": 0.0005876563036152788, "loss": 3.5163, "step": 5609 }, { "epoch": 0.27, "grad_norm": 0.5420413613319397, "learning_rate": 0.00058765193228148, "loss": 3.3165, "step": 5610 }, { "epoch": 0.27, "grad_norm": 0.5698093771934509, "learning_rate": 0.0005876475601900614, "loss": 3.0865, "step": 5611 }, { "epoch": 0.28, "grad_norm": 0.5109629034996033, "learning_rate": 0.0005876431873410344, "loss": 3.4757, "step": 5612 }, { "epoch": 0.28, "grad_norm": 0.5421175360679626, "learning_rate": 0.0005876388137344107, "loss": 3.422, "step": 5613 }, { "epoch": 0.28, "grad_norm": 0.5593693256378174, "learning_rate": 0.0005876344393702016, "loss": 3.1981, "step": 5614 }, { "epoch": 0.28, "grad_norm": 0.5609410405158997, "learning_rate": 0.0005876300642484189, "loss": 3.4627, "step": 5615 }, { "epoch": 0.28, "grad_norm": 0.5721429586410522, "learning_rate": 0.0005876256883690739, "loss": 3.5989, "step": 5616 }, { "epoch": 0.28, "grad_norm": 0.531762957572937, "learning_rate": 0.0005876213117321781, "loss": 3.5232, "step": 5617 }, { "epoch": 0.28, "grad_norm": 0.6050277352333069, "learning_rate": 0.0005876169343377432, "loss": 3.2072, "step": 5618 }, { "epoch": 0.28, "grad_norm": 0.536950945854187, "learning_rate": 0.0005876125561857806, "loss": 3.4877, "step": 5619 }, { "epoch": 0.28, "grad_norm": 0.5331342816352844, "learning_rate": 0.0005876081772763019, "loss": 3.3034, "step": 5620 }, { "epoch": 0.28, "grad_norm": 0.5625576972961426, "learning_rate": 0.0005876037976093188, "loss": 3.2909, "step": 5621 }, { "epoch": 0.28, "grad_norm": 0.5438462495803833, "learning_rate": 0.0005875994171848424, "loss": 3.5022, "step": 5622 }, { "epoch": 0.28, "grad_norm": 0.5512886643409729, "learning_rate": 0.0005875950360028846, "loss": 3.4472, "step": 5623 }, { "epoch": 0.28, "grad_norm": 0.5511341094970703, "learning_rate": 0.0005875906540634567, "loss": 3.1134, "step": 5624 }, { "epoch": 0.28, "grad_norm": 0.5950229167938232, "learning_rate": 0.0005875862713665705, "loss": 3.5233, "step": 5625 }, { "epoch": 0.28, "grad_norm": 0.5315006375312805, "learning_rate": 0.0005875818879122373, "loss": 3.6534, "step": 5626 }, { "epoch": 0.28, "grad_norm": 0.5488346815109253, "learning_rate": 0.0005875775037004689, "loss": 3.3631, "step": 5627 }, { "epoch": 0.28, "grad_norm": 0.5246545672416687, "learning_rate": 0.0005875731187312766, "loss": 3.5114, "step": 5628 }, { "epoch": 0.28, "grad_norm": 0.5800779461860657, "learning_rate": 0.000587568733004672, "loss": 3.4159, "step": 5629 }, { "epoch": 0.28, "grad_norm": 0.5169866681098938, "learning_rate": 0.0005875643465206668, "loss": 3.2865, "step": 5630 }, { "epoch": 0.28, "grad_norm": 0.5645882487297058, "learning_rate": 0.0005875599592792724, "loss": 3.4028, "step": 5631 }, { "epoch": 0.28, "grad_norm": 0.5463460087776184, "learning_rate": 0.0005875555712805005, "loss": 3.4289, "step": 5632 }, { "epoch": 0.28, "grad_norm": 0.5568528771400452, "learning_rate": 0.0005875511825243624, "loss": 3.5229, "step": 5633 }, { "epoch": 0.28, "grad_norm": 0.529748260974884, "learning_rate": 0.00058754679301087, "loss": 3.4635, "step": 5634 }, { "epoch": 0.28, "grad_norm": 0.5450758337974548, "learning_rate": 0.0005875424027400346, "loss": 3.1628, "step": 5635 }, { "epoch": 0.28, "grad_norm": 0.536340594291687, "learning_rate": 0.0005875380117118679, "loss": 3.4757, "step": 5636 }, { "epoch": 0.28, "grad_norm": 0.5831730961799622, "learning_rate": 0.0005875336199263814, "loss": 3.4245, "step": 5637 }, { "epoch": 0.28, "grad_norm": 0.532107949256897, "learning_rate": 0.0005875292273835865, "loss": 3.304, "step": 5638 }, { "epoch": 0.28, "grad_norm": 0.5280243754386902, "learning_rate": 0.0005875248340834953, "loss": 3.2755, "step": 5639 }, { "epoch": 0.28, "grad_norm": 0.5418915748596191, "learning_rate": 0.0005875204400261187, "loss": 3.337, "step": 5640 }, { "epoch": 0.28, "grad_norm": 0.5245290398597717, "learning_rate": 0.0005875160452114689, "loss": 3.528, "step": 5641 }, { "epoch": 0.28, "grad_norm": 0.5836740732192993, "learning_rate": 0.0005875116496395569, "loss": 3.3991, "step": 5642 }, { "epoch": 0.28, "grad_norm": 0.5641128420829773, "learning_rate": 0.0005875072533103946, "loss": 3.2812, "step": 5643 }, { "epoch": 0.28, "grad_norm": 0.5404115915298462, "learning_rate": 0.0005875028562239936, "loss": 3.3253, "step": 5644 }, { "epoch": 0.28, "grad_norm": 0.5600898861885071, "learning_rate": 0.0005874984583803653, "loss": 3.2992, "step": 5645 }, { "epoch": 0.28, "grad_norm": 0.5351735949516296, "learning_rate": 0.0005874940597795215, "loss": 3.5004, "step": 5646 }, { "epoch": 0.28, "grad_norm": 0.561495840549469, "learning_rate": 0.0005874896604214737, "loss": 3.392, "step": 5647 }, { "epoch": 0.28, "grad_norm": 0.5557546615600586, "learning_rate": 0.0005874852603062334, "loss": 3.2979, "step": 5648 }, { "epoch": 0.28, "grad_norm": 0.5082600116729736, "learning_rate": 0.0005874808594338122, "loss": 3.5587, "step": 5649 }, { "epoch": 0.28, "grad_norm": 0.5817442536354065, "learning_rate": 0.0005874764578042218, "loss": 3.571, "step": 5650 }, { "epoch": 0.28, "grad_norm": 0.5154877305030823, "learning_rate": 0.0005874720554174738, "loss": 3.4611, "step": 5651 }, { "epoch": 0.28, "grad_norm": 0.5388178825378418, "learning_rate": 0.0005874676522735796, "loss": 3.2679, "step": 5652 }, { "epoch": 0.28, "grad_norm": 0.568677544593811, "learning_rate": 0.000587463248372551, "loss": 3.3036, "step": 5653 }, { "epoch": 0.28, "grad_norm": 0.520805835723877, "learning_rate": 0.0005874588437143996, "loss": 3.561, "step": 5654 }, { "epoch": 0.28, "grad_norm": 0.5892708897590637, "learning_rate": 0.0005874544382991368, "loss": 3.3356, "step": 5655 }, { "epoch": 0.28, "grad_norm": 0.5433066487312317, "learning_rate": 0.0005874500321267743, "loss": 3.4601, "step": 5656 }, { "epoch": 0.28, "grad_norm": 0.6382086873054504, "learning_rate": 0.000587445625197324, "loss": 3.5814, "step": 5657 }, { "epoch": 0.28, "grad_norm": 0.5490455031394958, "learning_rate": 0.000587441217510797, "loss": 3.4123, "step": 5658 }, { "epoch": 0.28, "grad_norm": 0.5612518191337585, "learning_rate": 0.0005874368090672053, "loss": 3.3378, "step": 5659 }, { "epoch": 0.28, "grad_norm": 0.578199565410614, "learning_rate": 0.0005874323998665603, "loss": 3.3815, "step": 5660 }, { "epoch": 0.28, "grad_norm": 0.5689168572425842, "learning_rate": 0.0005874279899088735, "loss": 3.3816, "step": 5661 }, { "epoch": 0.28, "grad_norm": 0.5449416637420654, "learning_rate": 0.0005874235791941569, "loss": 3.4455, "step": 5662 }, { "epoch": 0.28, "grad_norm": 0.5448278784751892, "learning_rate": 0.0005874191677224218, "loss": 3.6087, "step": 5663 }, { "epoch": 0.28, "grad_norm": 0.521791934967041, "learning_rate": 0.00058741475549368, "loss": 3.3265, "step": 5664 }, { "epoch": 0.28, "grad_norm": 0.5685543417930603, "learning_rate": 0.000587410342507943, "loss": 3.4758, "step": 5665 }, { "epoch": 0.28, "grad_norm": 0.5723705291748047, "learning_rate": 0.0005874059287652225, "loss": 3.3845, "step": 5666 }, { "epoch": 0.28, "grad_norm": 0.5475627779960632, "learning_rate": 0.0005874015142655302, "loss": 3.4733, "step": 5667 }, { "epoch": 0.28, "grad_norm": 0.565759539604187, "learning_rate": 0.0005873970990088775, "loss": 3.0086, "step": 5668 }, { "epoch": 0.28, "grad_norm": 0.5776841640472412, "learning_rate": 0.0005873926829952762, "loss": 3.4155, "step": 5669 }, { "epoch": 0.28, "grad_norm": 0.5339114665985107, "learning_rate": 0.0005873882662247378, "loss": 3.4948, "step": 5670 }, { "epoch": 0.28, "grad_norm": 0.5639076828956604, "learning_rate": 0.000587383848697274, "loss": 3.1924, "step": 5671 }, { "epoch": 0.28, "grad_norm": 0.5452633500099182, "learning_rate": 0.0005873794304128966, "loss": 3.151, "step": 5672 }, { "epoch": 0.28, "grad_norm": 0.5319305658340454, "learning_rate": 0.000587375011371617, "loss": 3.4559, "step": 5673 }, { "epoch": 0.28, "grad_norm": 0.551712691783905, "learning_rate": 0.000587370591573447, "loss": 3.5398, "step": 5674 }, { "epoch": 0.28, "grad_norm": 0.5740674138069153, "learning_rate": 0.000587366171018398, "loss": 3.415, "step": 5675 }, { "epoch": 0.28, "grad_norm": 0.5360549688339233, "learning_rate": 0.000587361749706482, "loss": 3.3225, "step": 5676 }, { "epoch": 0.28, "grad_norm": 0.5476113557815552, "learning_rate": 0.0005873573276377103, "loss": 3.6012, "step": 5677 }, { "epoch": 0.28, "grad_norm": 0.5887551307678223, "learning_rate": 0.0005873529048120948, "loss": 3.3345, "step": 5678 }, { "epoch": 0.28, "grad_norm": 0.5409665107727051, "learning_rate": 0.000587348481229647, "loss": 3.3906, "step": 5679 }, { "epoch": 0.28, "grad_norm": 0.5526324510574341, "learning_rate": 0.0005873440568903785, "loss": 3.3438, "step": 5680 }, { "epoch": 0.28, "grad_norm": 0.5718168616294861, "learning_rate": 0.0005873396317943013, "loss": 3.4642, "step": 5681 }, { "epoch": 0.28, "grad_norm": 0.5631795525550842, "learning_rate": 0.0005873352059414267, "loss": 3.2674, "step": 5682 }, { "epoch": 0.28, "grad_norm": 0.6147184371948242, "learning_rate": 0.0005873307793317664, "loss": 3.2464, "step": 5683 }, { "epoch": 0.28, "grad_norm": 0.6067205667495728, "learning_rate": 0.0005873263519653323, "loss": 3.4046, "step": 5684 }, { "epoch": 0.28, "grad_norm": 0.542724072933197, "learning_rate": 0.0005873219238421356, "loss": 3.5222, "step": 5685 }, { "epoch": 0.28, "grad_norm": 0.5695931315422058, "learning_rate": 0.0005873174949621885, "loss": 3.4056, "step": 5686 }, { "epoch": 0.28, "grad_norm": 0.5388402938842773, "learning_rate": 0.0005873130653255023, "loss": 3.4411, "step": 5687 }, { "epoch": 0.28, "grad_norm": 0.6163471341133118, "learning_rate": 0.0005873086349320888, "loss": 3.3085, "step": 5688 }, { "epoch": 0.28, "grad_norm": 0.5419004559516907, "learning_rate": 0.0005873042037819597, "loss": 3.5004, "step": 5689 }, { "epoch": 0.28, "grad_norm": 0.5461073517799377, "learning_rate": 0.0005872997718751265, "loss": 3.3721, "step": 5690 }, { "epoch": 0.28, "grad_norm": 0.5264971852302551, "learning_rate": 0.0005872953392116011, "loss": 3.3873, "step": 5691 }, { "epoch": 0.28, "grad_norm": 0.5653229355812073, "learning_rate": 0.0005872909057913951, "loss": 3.3069, "step": 5692 }, { "epoch": 0.28, "grad_norm": 0.5882543325424194, "learning_rate": 0.00058728647161452, "loss": 3.1904, "step": 5693 }, { "epoch": 0.28, "grad_norm": 0.5486871600151062, "learning_rate": 0.0005872820366809877, "loss": 3.2986, "step": 5694 }, { "epoch": 0.28, "grad_norm": 0.53853440284729, "learning_rate": 0.0005872776009908099, "loss": 3.4683, "step": 5695 }, { "epoch": 0.28, "grad_norm": 0.5672714710235596, "learning_rate": 0.000587273164543998, "loss": 3.4573, "step": 5696 }, { "epoch": 0.28, "grad_norm": 0.5596035122871399, "learning_rate": 0.000587268727340564, "loss": 3.556, "step": 5697 }, { "epoch": 0.28, "grad_norm": 0.5245596766471863, "learning_rate": 0.0005872642893805194, "loss": 3.3127, "step": 5698 }, { "epoch": 0.28, "grad_norm": 0.5364984273910522, "learning_rate": 0.0005872598506638761, "loss": 3.3348, "step": 5699 }, { "epoch": 0.28, "grad_norm": 0.5501704812049866, "learning_rate": 0.0005872554111906454, "loss": 3.352, "step": 5700 }, { "epoch": 0.28, "grad_norm": 0.5427786707878113, "learning_rate": 0.0005872509709608394, "loss": 3.4774, "step": 5701 }, { "epoch": 0.28, "grad_norm": 0.5833746194839478, "learning_rate": 0.0005872465299744696, "loss": 3.2135, "step": 5702 }, { "epoch": 0.28, "grad_norm": 0.5424846410751343, "learning_rate": 0.0005872420882315476, "loss": 3.4114, "step": 5703 }, { "epoch": 0.28, "grad_norm": 0.5223133563995361, "learning_rate": 0.0005872376457320853, "loss": 3.2703, "step": 5704 }, { "epoch": 0.28, "grad_norm": 0.5719478726387024, "learning_rate": 0.0005872332024760944, "loss": 3.5279, "step": 5705 }, { "epoch": 0.28, "grad_norm": 0.5417484641075134, "learning_rate": 0.0005872287584635864, "loss": 3.5221, "step": 5706 }, { "epoch": 0.28, "grad_norm": 0.5238466262817383, "learning_rate": 0.0005872243136945732, "loss": 3.3905, "step": 5707 }, { "epoch": 0.28, "grad_norm": 0.5247589349746704, "learning_rate": 0.0005872198681690664, "loss": 3.3364, "step": 5708 }, { "epoch": 0.28, "grad_norm": 0.5623005628585815, "learning_rate": 0.0005872154218870778, "loss": 3.3918, "step": 5709 }, { "epoch": 0.28, "grad_norm": 0.6546081900596619, "learning_rate": 0.0005872109748486189, "loss": 3.6369, "step": 5710 }, { "epoch": 0.28, "grad_norm": 0.5542550683021545, "learning_rate": 0.0005872065270537017, "loss": 3.4674, "step": 5711 }, { "epoch": 0.28, "grad_norm": 0.5579176545143127, "learning_rate": 0.0005872020785023379, "loss": 3.3265, "step": 5712 }, { "epoch": 0.28, "grad_norm": 0.6107970476150513, "learning_rate": 0.0005871976291945388, "loss": 3.5986, "step": 5713 }, { "epoch": 0.28, "grad_norm": 0.534400463104248, "learning_rate": 0.0005871931791303167, "loss": 3.5339, "step": 5714 }, { "epoch": 0.28, "grad_norm": 0.5474011898040771, "learning_rate": 0.000587188728309683, "loss": 3.2851, "step": 5715 }, { "epoch": 0.28, "grad_norm": 0.5572850108146667, "learning_rate": 0.0005871842767326492, "loss": 3.3349, "step": 5716 }, { "epoch": 0.28, "grad_norm": 0.6158696413040161, "learning_rate": 0.0005871798243992276, "loss": 3.3417, "step": 5717 }, { "epoch": 0.28, "grad_norm": 0.5303359627723694, "learning_rate": 0.0005871753713094294, "loss": 3.5762, "step": 5718 }, { "epoch": 0.28, "grad_norm": 0.5638172030448914, "learning_rate": 0.0005871709174632666, "loss": 3.3489, "step": 5719 }, { "epoch": 0.28, "grad_norm": 0.5369972586631775, "learning_rate": 0.0005871664628607509, "loss": 3.2923, "step": 5720 }, { "epoch": 0.28, "grad_norm": 0.6055926084518433, "learning_rate": 0.000587162007501894, "loss": 3.145, "step": 5721 }, { "epoch": 0.28, "grad_norm": 0.5599731802940369, "learning_rate": 0.0005871575513867076, "loss": 3.5095, "step": 5722 }, { "epoch": 0.28, "grad_norm": 0.5579766035079956, "learning_rate": 0.0005871530945152035, "loss": 3.6883, "step": 5723 }, { "epoch": 0.28, "grad_norm": 0.5651584267616272, "learning_rate": 0.0005871486368873934, "loss": 3.4495, "step": 5724 }, { "epoch": 0.28, "grad_norm": 0.6099211573600769, "learning_rate": 0.000587144178503289, "loss": 3.2469, "step": 5725 }, { "epoch": 0.28, "grad_norm": 0.556201159954071, "learning_rate": 0.0005871397193629022, "loss": 3.5754, "step": 5726 }, { "epoch": 0.28, "grad_norm": 0.5447879433631897, "learning_rate": 0.0005871352594662446, "loss": 3.4798, "step": 5727 }, { "epoch": 0.28, "grad_norm": 0.5088903307914734, "learning_rate": 0.000587130798813328, "loss": 3.4272, "step": 5728 }, { "epoch": 0.28, "grad_norm": 0.5600134134292603, "learning_rate": 0.0005871263374041642, "loss": 3.3428, "step": 5729 }, { "epoch": 0.28, "grad_norm": 0.5825614333152771, "learning_rate": 0.0005871218752387647, "loss": 3.3385, "step": 5730 }, { "epoch": 0.28, "grad_norm": 0.5817743539810181, "learning_rate": 0.0005871174123171415, "loss": 3.3358, "step": 5731 }, { "epoch": 0.28, "grad_norm": 0.5553821325302124, "learning_rate": 0.0005871129486393064, "loss": 3.3266, "step": 5732 }, { "epoch": 0.28, "grad_norm": 0.583930253982544, "learning_rate": 0.0005871084842052711, "loss": 3.3519, "step": 5733 }, { "epoch": 0.28, "grad_norm": 0.5544360280036926, "learning_rate": 0.0005871040190150471, "loss": 3.2843, "step": 5734 }, { "epoch": 0.28, "grad_norm": 0.5816003084182739, "learning_rate": 0.0005870995530686465, "loss": 3.5052, "step": 5735 }, { "epoch": 0.28, "grad_norm": 0.5861048698425293, "learning_rate": 0.0005870950863660808, "loss": 3.4918, "step": 5736 }, { "epoch": 0.28, "grad_norm": 0.5465466380119324, "learning_rate": 0.000587090618907362, "loss": 3.5772, "step": 5737 }, { "epoch": 0.28, "grad_norm": 0.5588349103927612, "learning_rate": 0.0005870861506925018, "loss": 3.5592, "step": 5738 }, { "epoch": 0.28, "grad_norm": 0.558573842048645, "learning_rate": 0.0005870816817215119, "loss": 3.4306, "step": 5739 }, { "epoch": 0.28, "grad_norm": 0.5814093351364136, "learning_rate": 0.0005870772119944041, "loss": 3.3825, "step": 5740 }, { "epoch": 0.28, "grad_norm": 0.5688466429710388, "learning_rate": 0.0005870727415111901, "loss": 3.4397, "step": 5741 }, { "epoch": 0.28, "grad_norm": 0.5587722063064575, "learning_rate": 0.0005870682702718817, "loss": 3.4223, "step": 5742 }, { "epoch": 0.28, "grad_norm": 0.578432559967041, "learning_rate": 0.000587063798276491, "loss": 3.6142, "step": 5743 }, { "epoch": 0.28, "grad_norm": 0.5772233605384827, "learning_rate": 0.0005870593255250293, "loss": 3.4018, "step": 5744 }, { "epoch": 0.28, "grad_norm": 0.5719894170761108, "learning_rate": 0.0005870548520175086, "loss": 3.3818, "step": 5745 }, { "epoch": 0.28, "grad_norm": 0.5581337213516235, "learning_rate": 0.0005870503777539406, "loss": 3.6375, "step": 5746 }, { "epoch": 0.28, "grad_norm": 0.5520333051681519, "learning_rate": 0.0005870459027343373, "loss": 3.5316, "step": 5747 }, { "epoch": 0.28, "grad_norm": 0.524156928062439, "learning_rate": 0.0005870414269587102, "loss": 3.2556, "step": 5748 }, { "epoch": 0.28, "grad_norm": 0.5664880871772766, "learning_rate": 0.0005870369504270713, "loss": 3.3596, "step": 5749 }, { "epoch": 0.28, "grad_norm": 0.5417056679725647, "learning_rate": 0.0005870324731394323, "loss": 3.4458, "step": 5750 }, { "epoch": 0.28, "grad_norm": 0.5310121774673462, "learning_rate": 0.0005870279950958051, "loss": 3.3986, "step": 5751 }, { "epoch": 0.28, "grad_norm": 0.5482550263404846, "learning_rate": 0.0005870235162962012, "loss": 3.2324, "step": 5752 }, { "epoch": 0.28, "grad_norm": 0.6072642207145691, "learning_rate": 0.0005870190367406327, "loss": 3.5553, "step": 5753 }, { "epoch": 0.28, "grad_norm": 0.5456782579421997, "learning_rate": 0.0005870145564291113, "loss": 3.18, "step": 5754 }, { "epoch": 0.28, "grad_norm": 0.5461390018463135, "learning_rate": 0.0005870100753616488, "loss": 3.4445, "step": 5755 }, { "epoch": 0.28, "grad_norm": 0.5472182035446167, "learning_rate": 0.000587005593538257, "loss": 3.4471, "step": 5756 }, { "epoch": 0.28, "grad_norm": 0.5583928823471069, "learning_rate": 0.0005870011109589477, "loss": 3.3076, "step": 5757 }, { "epoch": 0.28, "grad_norm": 0.5505167841911316, "learning_rate": 0.0005869966276237327, "loss": 3.3274, "step": 5758 }, { "epoch": 0.28, "grad_norm": 0.5618990659713745, "learning_rate": 0.0005869921435326238, "loss": 3.4508, "step": 5759 }, { "epoch": 0.28, "grad_norm": 0.5286376476287842, "learning_rate": 0.0005869876586856328, "loss": 3.207, "step": 5760 }, { "epoch": 0.28, "grad_norm": 0.5474581718444824, "learning_rate": 0.0005869831730827715, "loss": 3.6744, "step": 5761 }, { "epoch": 0.28, "grad_norm": 0.5827766060829163, "learning_rate": 0.0005869786867240519, "loss": 3.3691, "step": 5762 }, { "epoch": 0.28, "grad_norm": 0.5204379558563232, "learning_rate": 0.0005869741996094856, "loss": 3.7098, "step": 5763 }, { "epoch": 0.28, "grad_norm": 0.5318394899368286, "learning_rate": 0.0005869697117390846, "loss": 3.5282, "step": 5764 }, { "epoch": 0.28, "grad_norm": 0.520143985748291, "learning_rate": 0.0005869652231128604, "loss": 3.621, "step": 5765 }, { "epoch": 0.28, "grad_norm": 0.5361225008964539, "learning_rate": 0.0005869607337308251, "loss": 3.4061, "step": 5766 }, { "epoch": 0.28, "grad_norm": 0.6079183220863342, "learning_rate": 0.0005869562435929905, "loss": 3.387, "step": 5767 }, { "epoch": 0.28, "grad_norm": 0.5326195359230042, "learning_rate": 0.0005869517526993684, "loss": 3.4427, "step": 5768 }, { "epoch": 0.28, "grad_norm": 0.541530191898346, "learning_rate": 0.0005869472610499706, "loss": 3.4417, "step": 5769 }, { "epoch": 0.28, "grad_norm": 0.5230389833450317, "learning_rate": 0.0005869427686448088, "loss": 3.2022, "step": 5770 }, { "epoch": 0.28, "grad_norm": 0.566440761089325, "learning_rate": 0.0005869382754838951, "loss": 3.3521, "step": 5771 }, { "epoch": 0.28, "grad_norm": 0.5397255420684814, "learning_rate": 0.0005869337815672413, "loss": 3.5468, "step": 5772 }, { "epoch": 0.28, "grad_norm": 0.5574547648429871, "learning_rate": 0.0005869292868948589, "loss": 3.13, "step": 5773 }, { "epoch": 0.28, "grad_norm": 0.5912591218948364, "learning_rate": 0.0005869247914667601, "loss": 3.2856, "step": 5774 }, { "epoch": 0.28, "grad_norm": 0.5619760751724243, "learning_rate": 0.0005869202952829567, "loss": 3.3057, "step": 5775 }, { "epoch": 0.28, "grad_norm": 0.5392544865608215, "learning_rate": 0.0005869157983434604, "loss": 3.4446, "step": 5776 }, { "epoch": 0.28, "grad_norm": 0.5367756485939026, "learning_rate": 0.000586911300648283, "loss": 3.4562, "step": 5777 }, { "epoch": 0.28, "grad_norm": 0.5628572702407837, "learning_rate": 0.0005869068021974366, "loss": 3.5371, "step": 5778 }, { "epoch": 0.28, "grad_norm": 0.5055538415908813, "learning_rate": 0.0005869023029909328, "loss": 3.5217, "step": 5779 }, { "epoch": 0.28, "grad_norm": 0.546389102935791, "learning_rate": 0.0005868978030287836, "loss": 3.4341, "step": 5780 }, { "epoch": 0.28, "grad_norm": 0.5764334201812744, "learning_rate": 0.0005868933023110008, "loss": 3.0859, "step": 5781 }, { "epoch": 0.28, "grad_norm": 0.6767985820770264, "learning_rate": 0.0005868888008375963, "loss": 3.332, "step": 5782 }, { "epoch": 0.28, "grad_norm": 0.5384335517883301, "learning_rate": 0.0005868842986085818, "loss": 3.4159, "step": 5783 }, { "epoch": 0.28, "grad_norm": 0.5437288284301758, "learning_rate": 0.0005868797956239693, "loss": 3.2161, "step": 5784 }, { "epoch": 0.28, "grad_norm": 0.5375307202339172, "learning_rate": 0.0005868752918837707, "loss": 3.3611, "step": 5785 }, { "epoch": 0.28, "grad_norm": 0.5247658491134644, "learning_rate": 0.0005868707873879978, "loss": 3.2946, "step": 5786 }, { "epoch": 0.28, "grad_norm": 0.5466168522834778, "learning_rate": 0.0005868662821366624, "loss": 3.4666, "step": 5787 }, { "epoch": 0.28, "grad_norm": 0.5654569864273071, "learning_rate": 0.0005868617761297764, "loss": 3.4604, "step": 5788 }, { "epoch": 0.28, "grad_norm": 0.5402109026908875, "learning_rate": 0.0005868572693673516, "loss": 3.359, "step": 5789 }, { "epoch": 0.28, "grad_norm": 0.517817497253418, "learning_rate": 0.0005868527618494001, "loss": 3.3345, "step": 5790 }, { "epoch": 0.28, "grad_norm": 0.49960243701934814, "learning_rate": 0.0005868482535759337, "loss": 3.4715, "step": 5791 }, { "epoch": 0.28, "grad_norm": 0.540615439414978, "learning_rate": 0.000586843744546964, "loss": 3.4621, "step": 5792 }, { "epoch": 0.28, "grad_norm": 0.5866577625274658, "learning_rate": 0.0005868392347625032, "loss": 3.2817, "step": 5793 }, { "epoch": 0.28, "grad_norm": 0.5393642783164978, "learning_rate": 0.0005868347242225632, "loss": 3.2075, "step": 5794 }, { "epoch": 0.28, "grad_norm": 0.5420916080474854, "learning_rate": 0.0005868302129271555, "loss": 3.4428, "step": 5795 }, { "epoch": 0.28, "grad_norm": 0.5432751774787903, "learning_rate": 0.0005868257008762924, "loss": 3.4497, "step": 5796 }, { "epoch": 0.28, "grad_norm": 0.5415337085723877, "learning_rate": 0.0005868211880699854, "loss": 3.3068, "step": 5797 }, { "epoch": 0.28, "grad_norm": 0.5475549697875977, "learning_rate": 0.0005868166745082468, "loss": 3.4784, "step": 5798 }, { "epoch": 0.28, "grad_norm": 0.5423837900161743, "learning_rate": 0.0005868121601910882, "loss": 3.2155, "step": 5799 }, { "epoch": 0.28, "grad_norm": 0.52655428647995, "learning_rate": 0.0005868076451185215, "loss": 3.4475, "step": 5800 }, { "epoch": 0.28, "grad_norm": 0.5762920379638672, "learning_rate": 0.0005868031292905586, "loss": 3.3285, "step": 5801 }, { "epoch": 0.28, "grad_norm": 0.5149837732315063, "learning_rate": 0.0005867986127072116, "loss": 3.5152, "step": 5802 }, { "epoch": 0.28, "grad_norm": 0.5225587487220764, "learning_rate": 0.0005867940953684921, "loss": 3.276, "step": 5803 }, { "epoch": 0.28, "grad_norm": 0.6301308870315552, "learning_rate": 0.0005867895772744124, "loss": 3.3187, "step": 5804 }, { "epoch": 0.28, "grad_norm": 0.5626131892204285, "learning_rate": 0.0005867850584249841, "loss": 3.4969, "step": 5805 }, { "epoch": 0.28, "grad_norm": 0.5513469576835632, "learning_rate": 0.0005867805388202189, "loss": 3.2877, "step": 5806 }, { "epoch": 0.28, "grad_norm": 0.5425329208374023, "learning_rate": 0.0005867760184601292, "loss": 3.5523, "step": 5807 }, { "epoch": 0.28, "grad_norm": 0.5266916155815125, "learning_rate": 0.0005867714973447265, "loss": 3.0832, "step": 5808 }, { "epoch": 0.28, "grad_norm": 0.5299080014228821, "learning_rate": 0.000586766975474023, "loss": 3.4578, "step": 5809 }, { "epoch": 0.28, "grad_norm": 0.510150671005249, "learning_rate": 0.0005867624528480303, "loss": 3.3181, "step": 5810 }, { "epoch": 0.28, "grad_norm": 0.5414705872535706, "learning_rate": 0.0005867579294667606, "loss": 3.4233, "step": 5811 }, { "epoch": 0.28, "grad_norm": 0.5347448587417603, "learning_rate": 0.0005867534053302258, "loss": 3.6438, "step": 5812 }, { "epoch": 0.28, "grad_norm": 0.5466963648796082, "learning_rate": 0.0005867488804384377, "loss": 3.5877, "step": 5813 }, { "epoch": 0.28, "grad_norm": 0.5780187249183655, "learning_rate": 0.0005867443547914081, "loss": 3.4495, "step": 5814 }, { "epoch": 0.28, "grad_norm": 0.5186501145362854, "learning_rate": 0.0005867398283891491, "loss": 3.4562, "step": 5815 }, { "epoch": 0.29, "grad_norm": 0.5260766744613647, "learning_rate": 0.0005867353012316725, "loss": 3.2264, "step": 5816 }, { "epoch": 0.29, "grad_norm": 0.5569272041320801, "learning_rate": 0.0005867307733189905, "loss": 3.3634, "step": 5817 }, { "epoch": 0.29, "grad_norm": 0.5397284030914307, "learning_rate": 0.0005867262446511147, "loss": 3.2977, "step": 5818 }, { "epoch": 0.29, "grad_norm": 0.5917802453041077, "learning_rate": 0.0005867217152280571, "loss": 3.5556, "step": 5819 }, { "epoch": 0.29, "grad_norm": 0.5505132079124451, "learning_rate": 0.0005867171850498298, "loss": 3.4379, "step": 5820 }, { "epoch": 0.29, "grad_norm": 0.5808700919151306, "learning_rate": 0.0005867126541164445, "loss": 3.3594, "step": 5821 }, { "epoch": 0.29, "grad_norm": 0.5809784531593323, "learning_rate": 0.0005867081224279133, "loss": 3.4292, "step": 5822 }, { "epoch": 0.29, "grad_norm": 0.5430020093917847, "learning_rate": 0.0005867035899842481, "loss": 3.3711, "step": 5823 }, { "epoch": 0.29, "grad_norm": 0.6083835959434509, "learning_rate": 0.0005866990567854608, "loss": 3.4043, "step": 5824 }, { "epoch": 0.29, "grad_norm": 0.5165572166442871, "learning_rate": 0.0005866945228315634, "loss": 3.6597, "step": 5825 }, { "epoch": 0.29, "grad_norm": 0.5558134913444519, "learning_rate": 0.0005866899881225678, "loss": 3.2103, "step": 5826 }, { "epoch": 0.29, "grad_norm": 0.5334741473197937, "learning_rate": 0.0005866854526584859, "loss": 3.087, "step": 5827 }, { "epoch": 0.29, "grad_norm": 0.5371214747428894, "learning_rate": 0.0005866809164393297, "loss": 3.4371, "step": 5828 }, { "epoch": 0.29, "grad_norm": 0.5531008839607239, "learning_rate": 0.0005866763794651111, "loss": 3.2865, "step": 5829 }, { "epoch": 0.29, "grad_norm": 0.5216488242149353, "learning_rate": 0.0005866718417358421, "loss": 3.4875, "step": 5830 }, { "epoch": 0.29, "grad_norm": 0.542599618434906, "learning_rate": 0.0005866673032515347, "loss": 3.3621, "step": 5831 }, { "epoch": 0.29, "grad_norm": 0.5730884075164795, "learning_rate": 0.0005866627640122008, "loss": 3.2273, "step": 5832 }, { "epoch": 0.29, "grad_norm": 0.5188013911247253, "learning_rate": 0.0005866582240178523, "loss": 3.2409, "step": 5833 }, { "epoch": 0.29, "grad_norm": 0.5457878708839417, "learning_rate": 0.0005866536832685013, "loss": 3.2904, "step": 5834 }, { "epoch": 0.29, "grad_norm": 0.569889485836029, "learning_rate": 0.0005866491417641595, "loss": 3.4469, "step": 5835 }, { "epoch": 0.29, "grad_norm": 0.5618494153022766, "learning_rate": 0.0005866445995048392, "loss": 3.3618, "step": 5836 }, { "epoch": 0.29, "grad_norm": 0.5545337796211243, "learning_rate": 0.0005866400564905521, "loss": 3.3963, "step": 5837 }, { "epoch": 0.29, "grad_norm": 0.5397124290466309, "learning_rate": 0.0005866355127213102, "loss": 3.4804, "step": 5838 }, { "epoch": 0.29, "grad_norm": 0.5252190828323364, "learning_rate": 0.0005866309681971256, "loss": 3.5564, "step": 5839 }, { "epoch": 0.29, "grad_norm": 0.5801165699958801, "learning_rate": 0.0005866264229180102, "loss": 3.5835, "step": 5840 }, { "epoch": 0.29, "grad_norm": 0.536383330821991, "learning_rate": 0.000586621876883976, "loss": 3.4126, "step": 5841 }, { "epoch": 0.29, "grad_norm": 0.5214142203330994, "learning_rate": 0.0005866173300950349, "loss": 3.2283, "step": 5842 }, { "epoch": 0.29, "grad_norm": 0.5730195641517639, "learning_rate": 0.0005866127825511989, "loss": 3.5788, "step": 5843 }, { "epoch": 0.29, "grad_norm": 0.527881383895874, "learning_rate": 0.00058660823425248, "loss": 3.5447, "step": 5844 }, { "epoch": 0.29, "grad_norm": 0.5443788766860962, "learning_rate": 0.0005866036851988901, "loss": 3.2235, "step": 5845 }, { "epoch": 0.29, "grad_norm": 0.6077579855918884, "learning_rate": 0.0005865991353904413, "loss": 3.3991, "step": 5846 }, { "epoch": 0.29, "grad_norm": 0.578214704990387, "learning_rate": 0.0005865945848271455, "loss": 3.2017, "step": 5847 }, { "epoch": 0.29, "grad_norm": 0.6207023859024048, "learning_rate": 0.0005865900335090149, "loss": 3.4464, "step": 5848 }, { "epoch": 0.29, "grad_norm": 0.5436177253723145, "learning_rate": 0.0005865854814360612, "loss": 3.3529, "step": 5849 }, { "epoch": 0.29, "grad_norm": 0.6536390781402588, "learning_rate": 0.0005865809286082963, "loss": 3.4443, "step": 5850 }, { "epoch": 0.29, "grad_norm": 0.5636897087097168, "learning_rate": 0.0005865763750257327, "loss": 3.4031, "step": 5851 }, { "epoch": 0.29, "grad_norm": 0.5775346159934998, "learning_rate": 0.0005865718206883819, "loss": 3.2281, "step": 5852 }, { "epoch": 0.29, "grad_norm": 0.5930302143096924, "learning_rate": 0.0005865672655962561, "loss": 3.2219, "step": 5853 }, { "epoch": 0.29, "grad_norm": 0.5139269828796387, "learning_rate": 0.0005865627097493672, "loss": 3.5864, "step": 5854 }, { "epoch": 0.29, "grad_norm": 0.5036550760269165, "learning_rate": 0.0005865581531477274, "loss": 3.5422, "step": 5855 }, { "epoch": 0.29, "grad_norm": 0.6104763746261597, "learning_rate": 0.0005865535957913486, "loss": 3.4522, "step": 5856 }, { "epoch": 0.29, "grad_norm": 0.5676093697547913, "learning_rate": 0.0005865490376802427, "loss": 3.4295, "step": 5857 }, { "epoch": 0.29, "grad_norm": 0.538794994354248, "learning_rate": 0.0005865444788144217, "loss": 3.2915, "step": 5858 }, { "epoch": 0.29, "grad_norm": 0.5480157732963562, "learning_rate": 0.0005865399191938979, "loss": 3.5967, "step": 5859 }, { "epoch": 0.29, "grad_norm": 0.5287188291549683, "learning_rate": 0.000586535358818683, "loss": 3.4753, "step": 5860 }, { "epoch": 0.29, "grad_norm": 0.5639832615852356, "learning_rate": 0.0005865307976887891, "loss": 3.1815, "step": 5861 }, { "epoch": 0.29, "grad_norm": 0.5048469305038452, "learning_rate": 0.0005865262358042281, "loss": 3.4191, "step": 5862 }, { "epoch": 0.29, "grad_norm": 0.5484185814857483, "learning_rate": 0.0005865216731650123, "loss": 3.4258, "step": 5863 }, { "epoch": 0.29, "grad_norm": 0.5514761209487915, "learning_rate": 0.0005865171097711535, "loss": 3.2908, "step": 5864 }, { "epoch": 0.29, "grad_norm": 0.6602954864501953, "learning_rate": 0.0005865125456226638, "loss": 3.5131, "step": 5865 }, { "epoch": 0.29, "grad_norm": 0.6218395829200745, "learning_rate": 0.0005865079807195552, "loss": 3.3947, "step": 5866 }, { "epoch": 0.29, "grad_norm": 0.5158183574676514, "learning_rate": 0.0005865034150618397, "loss": 3.2724, "step": 5867 }, { "epoch": 0.29, "grad_norm": 0.5524356961250305, "learning_rate": 0.0005864988486495293, "loss": 3.1983, "step": 5868 }, { "epoch": 0.29, "grad_norm": 0.5388345718383789, "learning_rate": 0.000586494281482636, "loss": 3.4331, "step": 5869 }, { "epoch": 0.29, "grad_norm": 0.532199501991272, "learning_rate": 0.0005864897135611721, "loss": 3.3253, "step": 5870 }, { "epoch": 0.29, "grad_norm": 0.6542239189147949, "learning_rate": 0.0005864851448851493, "loss": 3.3342, "step": 5871 }, { "epoch": 0.29, "grad_norm": 0.5302449464797974, "learning_rate": 0.0005864805754545798, "loss": 3.2958, "step": 5872 }, { "epoch": 0.29, "grad_norm": 0.5232753753662109, "learning_rate": 0.0005864760052694756, "loss": 3.6324, "step": 5873 }, { "epoch": 0.29, "grad_norm": 0.6836511492729187, "learning_rate": 0.0005864714343298488, "loss": 3.4663, "step": 5874 }, { "epoch": 0.29, "grad_norm": 0.5641322135925293, "learning_rate": 0.0005864668626357112, "loss": 3.3574, "step": 5875 }, { "epoch": 0.29, "grad_norm": 0.5231789350509644, "learning_rate": 0.0005864622901870753, "loss": 3.4077, "step": 5876 }, { "epoch": 0.29, "grad_norm": 0.5678579807281494, "learning_rate": 0.0005864577169839525, "loss": 3.2796, "step": 5877 }, { "epoch": 0.29, "grad_norm": 0.5374036431312561, "learning_rate": 0.0005864531430263555, "loss": 3.342, "step": 5878 }, { "epoch": 0.29, "grad_norm": 0.5334300994873047, "learning_rate": 0.000586448568314296, "loss": 3.6034, "step": 5879 }, { "epoch": 0.29, "grad_norm": 0.5521325469017029, "learning_rate": 0.0005864439928477859, "loss": 3.1953, "step": 5880 }, { "epoch": 0.29, "grad_norm": 0.563149094581604, "learning_rate": 0.0005864394166268376, "loss": 3.4593, "step": 5881 }, { "epoch": 0.29, "grad_norm": 0.5999089479446411, "learning_rate": 0.000586434839651463, "loss": 3.5447, "step": 5882 }, { "epoch": 0.29, "grad_norm": 0.5314249992370605, "learning_rate": 0.0005864302619216742, "loss": 3.0678, "step": 5883 }, { "epoch": 0.29, "grad_norm": 0.5752025842666626, "learning_rate": 0.0005864256834374832, "loss": 3.3008, "step": 5884 }, { "epoch": 0.29, "grad_norm": 0.6359145045280457, "learning_rate": 0.0005864211041989021, "loss": 3.1086, "step": 5885 }, { "epoch": 0.29, "grad_norm": 0.5294399857521057, "learning_rate": 0.0005864165242059427, "loss": 3.4525, "step": 5886 }, { "epoch": 0.29, "grad_norm": 0.5078052878379822, "learning_rate": 0.0005864119434586176, "loss": 3.6026, "step": 5887 }, { "epoch": 0.29, "grad_norm": 0.5631575584411621, "learning_rate": 0.0005864073619569383, "loss": 3.3792, "step": 5888 }, { "epoch": 0.29, "grad_norm": 0.5679768323898315, "learning_rate": 0.0005864027797009173, "loss": 3.169, "step": 5889 }, { "epoch": 0.29, "grad_norm": 0.5411725044250488, "learning_rate": 0.0005863981966905665, "loss": 3.448, "step": 5890 }, { "epoch": 0.29, "grad_norm": 0.5465644598007202, "learning_rate": 0.0005863936129258979, "loss": 3.3566, "step": 5891 }, { "epoch": 0.29, "grad_norm": 0.5652481317520142, "learning_rate": 0.0005863890284069236, "loss": 3.1992, "step": 5892 }, { "epoch": 0.29, "grad_norm": 0.5486741065979004, "learning_rate": 0.0005863844431336559, "loss": 3.1683, "step": 5893 }, { "epoch": 0.29, "grad_norm": 0.5372270941734314, "learning_rate": 0.0005863798571061065, "loss": 3.3014, "step": 5894 }, { "epoch": 0.29, "grad_norm": 0.609737753868103, "learning_rate": 0.0005863752703242878, "loss": 3.0947, "step": 5895 }, { "epoch": 0.29, "grad_norm": 0.5803235769271851, "learning_rate": 0.0005863706827882116, "loss": 3.4442, "step": 5896 }, { "epoch": 0.29, "grad_norm": 0.560324490070343, "learning_rate": 0.0005863660944978904, "loss": 3.2564, "step": 5897 }, { "epoch": 0.29, "grad_norm": 0.532012403011322, "learning_rate": 0.0005863615054533357, "loss": 3.5374, "step": 5898 }, { "epoch": 0.29, "grad_norm": 0.6232673525810242, "learning_rate": 0.0005863569156545601, "loss": 3.4147, "step": 5899 }, { "epoch": 0.29, "grad_norm": 0.5468286871910095, "learning_rate": 0.0005863523251015755, "loss": 3.4548, "step": 5900 }, { "epoch": 0.29, "grad_norm": 0.5706323981285095, "learning_rate": 0.0005863477337943939, "loss": 3.3646, "step": 5901 }, { "epoch": 0.29, "grad_norm": 0.5574482083320618, "learning_rate": 0.0005863431417330275, "loss": 3.5875, "step": 5902 }, { "epoch": 0.29, "grad_norm": 0.5878828763961792, "learning_rate": 0.0005863385489174883, "loss": 3.4706, "step": 5903 }, { "epoch": 0.29, "grad_norm": 0.5340185761451721, "learning_rate": 0.0005863339553477887, "loss": 3.5311, "step": 5904 }, { "epoch": 0.29, "grad_norm": 0.5711298584938049, "learning_rate": 0.0005863293610239404, "loss": 3.2338, "step": 5905 }, { "epoch": 0.29, "grad_norm": 0.5839642286300659, "learning_rate": 0.0005863247659459557, "loss": 3.3848, "step": 5906 }, { "epoch": 0.29, "grad_norm": 0.5418100953102112, "learning_rate": 0.0005863201701138466, "loss": 3.4485, "step": 5907 }, { "epoch": 0.29, "grad_norm": 0.5416857600212097, "learning_rate": 0.0005863155735276254, "loss": 3.3346, "step": 5908 }, { "epoch": 0.29, "grad_norm": 0.5420189499855042, "learning_rate": 0.000586310976187304, "loss": 3.391, "step": 5909 }, { "epoch": 0.29, "grad_norm": 0.5418663024902344, "learning_rate": 0.0005863063780928946, "loss": 3.5192, "step": 5910 }, { "epoch": 0.29, "grad_norm": 0.5965156555175781, "learning_rate": 0.0005863017792444092, "loss": 3.3288, "step": 5911 }, { "epoch": 0.29, "grad_norm": 0.5291689038276672, "learning_rate": 0.0005862971796418603, "loss": 3.4977, "step": 5912 }, { "epoch": 0.29, "grad_norm": 0.5859507322311401, "learning_rate": 0.0005862925792852595, "loss": 3.3662, "step": 5913 }, { "epoch": 0.29, "grad_norm": 0.6109771728515625, "learning_rate": 0.0005862879781746192, "loss": 3.2146, "step": 5914 }, { "epoch": 0.29, "grad_norm": 0.5541035532951355, "learning_rate": 0.0005862833763099515, "loss": 3.3712, "step": 5915 }, { "epoch": 0.29, "grad_norm": 0.5736545324325562, "learning_rate": 0.0005862787736912684, "loss": 3.3146, "step": 5916 }, { "epoch": 0.29, "grad_norm": 0.6064590215682983, "learning_rate": 0.0005862741703185822, "loss": 3.2912, "step": 5917 }, { "epoch": 0.29, "grad_norm": 0.5344297885894775, "learning_rate": 0.0005862695661919048, "loss": 3.3818, "step": 5918 }, { "epoch": 0.29, "grad_norm": 0.5522978901863098, "learning_rate": 0.0005862649613112486, "loss": 3.325, "step": 5919 }, { "epoch": 0.29, "grad_norm": 0.5202211141586304, "learning_rate": 0.0005862603556766254, "loss": 3.3577, "step": 5920 }, { "epoch": 0.29, "grad_norm": 0.5692675113677979, "learning_rate": 0.0005862557492880477, "loss": 3.6632, "step": 5921 }, { "epoch": 0.29, "grad_norm": 0.5373372435569763, "learning_rate": 0.0005862511421455274, "loss": 3.3315, "step": 5922 }, { "epoch": 0.29, "grad_norm": 0.5706237554550171, "learning_rate": 0.0005862465342490766, "loss": 3.3529, "step": 5923 }, { "epoch": 0.29, "grad_norm": 0.5713226199150085, "learning_rate": 0.0005862419255987076, "loss": 3.1983, "step": 5924 }, { "epoch": 0.29, "grad_norm": 0.567156195640564, "learning_rate": 0.0005862373161944325, "loss": 3.279, "step": 5925 }, { "epoch": 0.29, "grad_norm": 0.5587449669837952, "learning_rate": 0.0005862327060362633, "loss": 3.3058, "step": 5926 }, { "epoch": 0.29, "grad_norm": 0.5511670112609863, "learning_rate": 0.0005862280951242123, "loss": 3.5264, "step": 5927 }, { "epoch": 0.29, "grad_norm": 0.5433431267738342, "learning_rate": 0.0005862234834582916, "loss": 3.3096, "step": 5928 }, { "epoch": 0.29, "grad_norm": 0.5212329030036926, "learning_rate": 0.0005862188710385131, "loss": 3.2682, "step": 5929 }, { "epoch": 0.29, "grad_norm": 0.5597579479217529, "learning_rate": 0.0005862142578648895, "loss": 3.3191, "step": 5930 }, { "epoch": 0.29, "grad_norm": 0.5288726687431335, "learning_rate": 0.0005862096439374325, "loss": 3.4722, "step": 5931 }, { "epoch": 0.29, "grad_norm": 0.5642876029014587, "learning_rate": 0.0005862050292561544, "loss": 3.4802, "step": 5932 }, { "epoch": 0.29, "grad_norm": 0.5844772458076477, "learning_rate": 0.0005862004138210673, "loss": 3.3419, "step": 5933 }, { "epoch": 0.29, "grad_norm": 0.52292799949646, "learning_rate": 0.0005861957976321834, "loss": 3.49, "step": 5934 }, { "epoch": 0.29, "grad_norm": 0.5201006531715393, "learning_rate": 0.0005861911806895148, "loss": 3.429, "step": 5935 }, { "epoch": 0.29, "grad_norm": 0.5457620024681091, "learning_rate": 0.0005861865629930738, "loss": 3.5125, "step": 5936 }, { "epoch": 0.29, "grad_norm": 0.5805454254150391, "learning_rate": 0.0005861819445428724, "loss": 3.5591, "step": 5937 }, { "epoch": 0.29, "grad_norm": 0.5340372323989868, "learning_rate": 0.0005861773253389228, "loss": 3.2514, "step": 5938 }, { "epoch": 0.29, "grad_norm": 0.5509132146835327, "learning_rate": 0.0005861727053812373, "loss": 3.3563, "step": 5939 }, { "epoch": 0.29, "grad_norm": 0.5151408314704895, "learning_rate": 0.0005861680846698279, "loss": 3.291, "step": 5940 }, { "epoch": 0.29, "grad_norm": 0.517645537853241, "learning_rate": 0.0005861634632047069, "loss": 3.3734, "step": 5941 }, { "epoch": 0.29, "grad_norm": 0.5592371821403503, "learning_rate": 0.0005861588409858864, "loss": 3.3977, "step": 5942 }, { "epoch": 0.29, "grad_norm": 0.5972470641136169, "learning_rate": 0.0005861542180133785, "loss": 3.274, "step": 5943 }, { "epoch": 0.29, "grad_norm": 0.5444090366363525, "learning_rate": 0.0005861495942871955, "loss": 3.2464, "step": 5944 }, { "epoch": 0.29, "grad_norm": 0.5292772054672241, "learning_rate": 0.0005861449698073497, "loss": 3.4715, "step": 5945 }, { "epoch": 0.29, "grad_norm": 0.5135490298271179, "learning_rate": 0.0005861403445738529, "loss": 3.339, "step": 5946 }, { "epoch": 0.29, "grad_norm": 0.5963358283042908, "learning_rate": 0.0005861357185867176, "loss": 3.3574, "step": 5947 }, { "epoch": 0.29, "grad_norm": 0.5650749802589417, "learning_rate": 0.0005861310918459559, "loss": 3.2734, "step": 5948 }, { "epoch": 0.29, "grad_norm": 0.5786062479019165, "learning_rate": 0.0005861264643515799, "loss": 3.3048, "step": 5949 }, { "epoch": 0.29, "grad_norm": 0.578070878982544, "learning_rate": 0.0005861218361036019, "loss": 3.2631, "step": 5950 }, { "epoch": 0.29, "grad_norm": 0.5257975459098816, "learning_rate": 0.000586117207102034, "loss": 3.6505, "step": 5951 }, { "epoch": 0.29, "grad_norm": 0.5603009462356567, "learning_rate": 0.0005861125773468886, "loss": 3.297, "step": 5952 }, { "epoch": 0.29, "grad_norm": 0.6115564703941345, "learning_rate": 0.0005861079468381776, "loss": 3.466, "step": 5953 }, { "epoch": 0.29, "grad_norm": 0.5789349675178528, "learning_rate": 0.0005861033155759133, "loss": 3.4272, "step": 5954 }, { "epoch": 0.29, "grad_norm": 0.565288245677948, "learning_rate": 0.000586098683560108, "loss": 3.5096, "step": 5955 }, { "epoch": 0.29, "grad_norm": 0.563439667224884, "learning_rate": 0.0005860940507907738, "loss": 3.2744, "step": 5956 }, { "epoch": 0.29, "grad_norm": 0.5860108137130737, "learning_rate": 0.0005860894172679229, "loss": 3.4212, "step": 5957 }, { "epoch": 0.29, "grad_norm": 0.5446553826332092, "learning_rate": 0.0005860847829915676, "loss": 3.363, "step": 5958 }, { "epoch": 0.29, "grad_norm": 0.5710722208023071, "learning_rate": 0.0005860801479617201, "loss": 3.2973, "step": 5959 }, { "epoch": 0.29, "grad_norm": 0.5353807806968689, "learning_rate": 0.0005860755121783924, "loss": 3.5851, "step": 5960 }, { "epoch": 0.29, "grad_norm": 0.5594916939735413, "learning_rate": 0.0005860708756415969, "loss": 3.5233, "step": 5961 }, { "epoch": 0.29, "grad_norm": 0.5313096642494202, "learning_rate": 0.0005860662383513458, "loss": 3.3611, "step": 5962 }, { "epoch": 0.29, "grad_norm": 0.5173554420471191, "learning_rate": 0.0005860616003076514, "loss": 3.4742, "step": 5963 }, { "epoch": 0.29, "grad_norm": 0.5095874071121216, "learning_rate": 0.0005860569615105256, "loss": 3.3907, "step": 5964 }, { "epoch": 0.29, "grad_norm": 0.5513678193092346, "learning_rate": 0.0005860523219599809, "loss": 3.3048, "step": 5965 }, { "epoch": 0.29, "grad_norm": 0.5937461853027344, "learning_rate": 0.0005860476816560294, "loss": 3.3119, "step": 5966 }, { "epoch": 0.29, "grad_norm": 0.5583381056785583, "learning_rate": 0.0005860430405986833, "loss": 3.3584, "step": 5967 }, { "epoch": 0.29, "grad_norm": 0.5448343753814697, "learning_rate": 0.000586038398787955, "loss": 3.5657, "step": 5968 }, { "epoch": 0.29, "grad_norm": 0.5197991132736206, "learning_rate": 0.0005860337562238566, "loss": 3.3398, "step": 5969 }, { "epoch": 0.29, "grad_norm": 0.5560016632080078, "learning_rate": 0.0005860291129064003, "loss": 3.2854, "step": 5970 }, { "epoch": 0.29, "grad_norm": 0.5649117231369019, "learning_rate": 0.0005860244688355984, "loss": 3.3855, "step": 5971 }, { "epoch": 0.29, "grad_norm": 0.56504225730896, "learning_rate": 0.000586019824011463, "loss": 3.3499, "step": 5972 }, { "epoch": 0.29, "grad_norm": 0.5295236706733704, "learning_rate": 0.0005860151784340065, "loss": 3.2858, "step": 5973 }, { "epoch": 0.29, "grad_norm": 0.5536357164382935, "learning_rate": 0.0005860105321032411, "loss": 3.3838, "step": 5974 }, { "epoch": 0.29, "grad_norm": 0.5174973011016846, "learning_rate": 0.0005860058850191789, "loss": 3.3564, "step": 5975 }, { "epoch": 0.29, "grad_norm": 0.5602399110794067, "learning_rate": 0.0005860012371818324, "loss": 3.3537, "step": 5976 }, { "epoch": 0.29, "grad_norm": 0.5399395823478699, "learning_rate": 0.0005859965885912135, "loss": 3.35, "step": 5977 }, { "epoch": 0.29, "grad_norm": 0.5447744131088257, "learning_rate": 0.0005859919392473348, "loss": 3.2962, "step": 5978 }, { "epoch": 0.29, "grad_norm": 0.5593130588531494, "learning_rate": 0.0005859872891502083, "loss": 3.3472, "step": 5979 }, { "epoch": 0.29, "grad_norm": 0.5140863656997681, "learning_rate": 0.0005859826382998462, "loss": 3.303, "step": 5980 }, { "epoch": 0.29, "grad_norm": 0.5375498533248901, "learning_rate": 0.0005859779866962609, "loss": 3.3374, "step": 5981 }, { "epoch": 0.29, "grad_norm": 0.5679370164871216, "learning_rate": 0.0005859733343394648, "loss": 3.4518, "step": 5982 }, { "epoch": 0.29, "grad_norm": 0.5210931301116943, "learning_rate": 0.0005859686812294698, "loss": 3.4724, "step": 5983 }, { "epoch": 0.29, "grad_norm": 0.5706584453582764, "learning_rate": 0.0005859640273662885, "loss": 3.5289, "step": 5984 }, { "epoch": 0.29, "grad_norm": 0.5336747169494629, "learning_rate": 0.0005859593727499329, "loss": 3.4766, "step": 5985 }, { "epoch": 0.29, "grad_norm": 0.5442331433296204, "learning_rate": 0.0005859547173804152, "loss": 3.2875, "step": 5986 }, { "epoch": 0.29, "grad_norm": 0.5738904476165771, "learning_rate": 0.000585950061257748, "loss": 3.6571, "step": 5987 }, { "epoch": 0.29, "grad_norm": 0.7155367136001587, "learning_rate": 0.0005859454043819433, "loss": 3.3561, "step": 5988 }, { "epoch": 0.29, "grad_norm": 0.526063084602356, "learning_rate": 0.0005859407467530134, "loss": 3.3691, "step": 5989 }, { "epoch": 0.29, "grad_norm": 0.5534820556640625, "learning_rate": 0.0005859360883709707, "loss": 3.3648, "step": 5990 }, { "epoch": 0.29, "grad_norm": 0.5640885829925537, "learning_rate": 0.0005859314292358274, "loss": 3.6089, "step": 5991 }, { "epoch": 0.29, "grad_norm": 0.5343618392944336, "learning_rate": 0.0005859267693475956, "loss": 3.5103, "step": 5992 }, { "epoch": 0.29, "grad_norm": 0.5363438725471497, "learning_rate": 0.0005859221087062878, "loss": 3.4727, "step": 5993 }, { "epoch": 0.29, "grad_norm": 0.5537747144699097, "learning_rate": 0.0005859174473119162, "loss": 3.1938, "step": 5994 }, { "epoch": 0.29, "grad_norm": 0.5269249081611633, "learning_rate": 0.0005859127851644931, "loss": 3.1289, "step": 5995 }, { "epoch": 0.29, "grad_norm": 0.5200663208961487, "learning_rate": 0.0005859081222640306, "loss": 3.3508, "step": 5996 }, { "epoch": 0.29, "grad_norm": 0.5903448462486267, "learning_rate": 0.0005859034586105412, "loss": 3.3433, "step": 5997 }, { "epoch": 0.29, "grad_norm": 0.5499140024185181, "learning_rate": 0.0005858987942040371, "loss": 3.4114, "step": 5998 }, { "epoch": 0.29, "grad_norm": 0.5757197737693787, "learning_rate": 0.0005858941290445307, "loss": 3.3508, "step": 5999 }, { "epoch": 0.29, "grad_norm": 0.5636634826660156, "learning_rate": 0.000585889463132034, "loss": 3.495, "step": 6000 }, { "epoch": 0.29, "grad_norm": 0.5337130427360535, "learning_rate": 0.0005858847964665598, "loss": 3.4583, "step": 6001 }, { "epoch": 0.29, "grad_norm": 0.5628725290298462, "learning_rate": 0.0005858801290481197, "loss": 3.6494, "step": 6002 }, { "epoch": 0.29, "grad_norm": 0.5964218974113464, "learning_rate": 0.0005858754608767266, "loss": 3.3848, "step": 6003 }, { "epoch": 0.29, "grad_norm": 0.5543503165245056, "learning_rate": 0.0005858707919523924, "loss": 3.3203, "step": 6004 }, { "epoch": 0.29, "grad_norm": 0.5465249419212341, "learning_rate": 0.0005858661222751297, "loss": 3.3155, "step": 6005 }, { "epoch": 0.29, "grad_norm": 0.5593113303184509, "learning_rate": 0.0005858614518449506, "loss": 3.268, "step": 6006 }, { "epoch": 0.29, "grad_norm": 0.5494705438613892, "learning_rate": 0.0005858567806618673, "loss": 3.4332, "step": 6007 }, { "epoch": 0.29, "grad_norm": 0.5505009293556213, "learning_rate": 0.0005858521087258924, "loss": 3.4089, "step": 6008 }, { "epoch": 0.29, "grad_norm": 0.5421280264854431, "learning_rate": 0.000585847436037038, "loss": 3.431, "step": 6009 }, { "epoch": 0.29, "grad_norm": 0.5362679362297058, "learning_rate": 0.0005858427625953166, "loss": 3.4296, "step": 6010 }, { "epoch": 0.29, "grad_norm": 0.5678845047950745, "learning_rate": 0.0005858380884007403, "loss": 3.2674, "step": 6011 }, { "epoch": 0.29, "grad_norm": 0.5496119856834412, "learning_rate": 0.0005858334134533214, "loss": 3.5641, "step": 6012 }, { "epoch": 0.29, "grad_norm": 0.5399487018585205, "learning_rate": 0.0005858287377530723, "loss": 3.2081, "step": 6013 }, { "epoch": 0.29, "grad_norm": 0.5608866214752197, "learning_rate": 0.0005858240613000054, "loss": 3.3, "step": 6014 }, { "epoch": 0.29, "grad_norm": 0.5753394365310669, "learning_rate": 0.0005858193840941329, "loss": 3.3895, "step": 6015 }, { "epoch": 0.29, "grad_norm": 0.545498788356781, "learning_rate": 0.0005858147061354672, "loss": 3.5889, "step": 6016 }, { "epoch": 0.29, "grad_norm": 0.5470389723777771, "learning_rate": 0.0005858100274240205, "loss": 3.4151, "step": 6017 }, { "epoch": 0.29, "grad_norm": 0.5118675827980042, "learning_rate": 0.0005858053479598053, "loss": 3.2757, "step": 6018 }, { "epoch": 0.29, "grad_norm": 0.520729660987854, "learning_rate": 0.0005858006677428337, "loss": 3.4047, "step": 6019 }, { "epoch": 0.3, "grad_norm": 0.5678765773773193, "learning_rate": 0.0005857959867731181, "loss": 3.2805, "step": 6020 }, { "epoch": 0.3, "grad_norm": 0.5853541493415833, "learning_rate": 0.000585791305050671, "loss": 3.3365, "step": 6021 }, { "epoch": 0.3, "grad_norm": 0.5466600656509399, "learning_rate": 0.0005857866225755045, "loss": 3.2846, "step": 6022 }, { "epoch": 0.3, "grad_norm": 0.5336818099021912, "learning_rate": 0.0005857819393476312, "loss": 3.4511, "step": 6023 }, { "epoch": 0.3, "grad_norm": 0.5479405522346497, "learning_rate": 0.0005857772553670631, "loss": 3.2949, "step": 6024 }, { "epoch": 0.3, "grad_norm": 0.5725945830345154, "learning_rate": 0.0005857725706338129, "loss": 3.2428, "step": 6025 }, { "epoch": 0.3, "grad_norm": 0.6460245847702026, "learning_rate": 0.0005857678851478925, "loss": 3.3048, "step": 6026 }, { "epoch": 0.3, "grad_norm": 0.5461933016777039, "learning_rate": 0.0005857631989093147, "loss": 3.1917, "step": 6027 }, { "epoch": 0.3, "grad_norm": 0.615522563457489, "learning_rate": 0.0005857585119180915, "loss": 3.2753, "step": 6028 }, { "epoch": 0.3, "grad_norm": 0.5617178678512573, "learning_rate": 0.0005857538241742354, "loss": 3.2788, "step": 6029 }, { "epoch": 0.3, "grad_norm": 0.5808992981910706, "learning_rate": 0.0005857491356777587, "loss": 3.4891, "step": 6030 }, { "epoch": 0.3, "grad_norm": 0.49388301372528076, "learning_rate": 0.0005857444464286739, "loss": 3.6251, "step": 6031 }, { "epoch": 0.3, "grad_norm": 0.5432161688804626, "learning_rate": 0.0005857397564269931, "loss": 3.35, "step": 6032 }, { "epoch": 0.3, "grad_norm": 0.62075275182724, "learning_rate": 0.0005857350656727289, "loss": 3.3793, "step": 6033 }, { "epoch": 0.3, "grad_norm": 0.5460484027862549, "learning_rate": 0.0005857303741658933, "loss": 3.3715, "step": 6034 }, { "epoch": 0.3, "grad_norm": 0.5340959429740906, "learning_rate": 0.0005857256819064991, "loss": 3.2426, "step": 6035 }, { "epoch": 0.3, "grad_norm": 0.547926127910614, "learning_rate": 0.0005857209888945583, "loss": 3.6102, "step": 6036 }, { "epoch": 0.3, "grad_norm": 0.540779709815979, "learning_rate": 0.0005857162951300835, "loss": 3.4833, "step": 6037 }, { "epoch": 0.3, "grad_norm": 0.5335472822189331, "learning_rate": 0.0005857116006130869, "loss": 3.438, "step": 6038 }, { "epoch": 0.3, "grad_norm": 0.5480554699897766, "learning_rate": 0.0005857069053435809, "loss": 3.4828, "step": 6039 }, { "epoch": 0.3, "grad_norm": 0.5643362998962402, "learning_rate": 0.000585702209321578, "loss": 3.3695, "step": 6040 }, { "epoch": 0.3, "grad_norm": 0.5678135752677917, "learning_rate": 0.0005856975125470904, "loss": 3.3233, "step": 6041 }, { "epoch": 0.3, "grad_norm": 0.5982794165611267, "learning_rate": 0.0005856928150201306, "loss": 3.4825, "step": 6042 }, { "epoch": 0.3, "grad_norm": 0.5603371858596802, "learning_rate": 0.0005856881167407109, "loss": 3.4292, "step": 6043 }, { "epoch": 0.3, "grad_norm": 0.5519452691078186, "learning_rate": 0.0005856834177088436, "loss": 3.6237, "step": 6044 }, { "epoch": 0.3, "grad_norm": 0.5269837379455566, "learning_rate": 0.0005856787179245412, "loss": 3.5085, "step": 6045 }, { "epoch": 0.3, "grad_norm": 0.5146785378456116, "learning_rate": 0.000585674017387816, "loss": 3.3698, "step": 6046 }, { "epoch": 0.3, "grad_norm": 0.5637503862380981, "learning_rate": 0.0005856693160986805, "loss": 3.2, "step": 6047 }, { "epoch": 0.3, "grad_norm": 0.5364857316017151, "learning_rate": 0.000585664614057147, "loss": 3.4152, "step": 6048 }, { "epoch": 0.3, "grad_norm": 0.562177300453186, "learning_rate": 0.0005856599112632277, "loss": 3.415, "step": 6049 }, { "epoch": 0.3, "grad_norm": 0.548433780670166, "learning_rate": 0.0005856552077169354, "loss": 3.2522, "step": 6050 }, { "epoch": 0.3, "grad_norm": 0.5104949474334717, "learning_rate": 0.0005856505034182822, "loss": 3.5135, "step": 6051 }, { "epoch": 0.3, "grad_norm": 0.5480595231056213, "learning_rate": 0.0005856457983672805, "loss": 3.6652, "step": 6052 }, { "epoch": 0.3, "grad_norm": 0.5037899017333984, "learning_rate": 0.0005856410925639428, "loss": 3.4469, "step": 6053 }, { "epoch": 0.3, "grad_norm": 0.5276090502738953, "learning_rate": 0.0005856363860082813, "loss": 3.3164, "step": 6054 }, { "epoch": 0.3, "grad_norm": 0.5726231336593628, "learning_rate": 0.0005856316787003086, "loss": 3.4393, "step": 6055 }, { "epoch": 0.3, "grad_norm": 0.530457079410553, "learning_rate": 0.0005856269706400371, "loss": 3.3204, "step": 6056 }, { "epoch": 0.3, "grad_norm": 0.5776810050010681, "learning_rate": 0.000585622261827479, "loss": 3.0907, "step": 6057 }, { "epoch": 0.3, "grad_norm": 0.5454444289207458, "learning_rate": 0.000585617552262647, "loss": 3.1768, "step": 6058 }, { "epoch": 0.3, "grad_norm": 0.5331177115440369, "learning_rate": 0.0005856128419455532, "loss": 3.5736, "step": 6059 }, { "epoch": 0.3, "grad_norm": 0.5558076500892639, "learning_rate": 0.0005856081308762102, "loss": 3.5581, "step": 6060 }, { "epoch": 0.3, "grad_norm": 0.6561071872711182, "learning_rate": 0.0005856034190546304, "loss": 3.4186, "step": 6061 }, { "epoch": 0.3, "grad_norm": 0.5517882704734802, "learning_rate": 0.000585598706480826, "loss": 3.2384, "step": 6062 }, { "epoch": 0.3, "grad_norm": 0.5598825812339783, "learning_rate": 0.0005855939931548097, "loss": 3.2194, "step": 6063 }, { "epoch": 0.3, "grad_norm": 0.508906900882721, "learning_rate": 0.0005855892790765937, "loss": 3.4494, "step": 6064 }, { "epoch": 0.3, "grad_norm": 0.5944477319717407, "learning_rate": 0.0005855845642461907, "loss": 3.4631, "step": 6065 }, { "epoch": 0.3, "grad_norm": 0.5435997247695923, "learning_rate": 0.0005855798486636127, "loss": 3.2613, "step": 6066 }, { "epoch": 0.3, "grad_norm": 0.5678590536117554, "learning_rate": 0.0005855751323288724, "loss": 3.4153, "step": 6067 }, { "epoch": 0.3, "grad_norm": 0.5375375151634216, "learning_rate": 0.0005855704152419822, "loss": 3.378, "step": 6068 }, { "epoch": 0.3, "grad_norm": 0.5496307611465454, "learning_rate": 0.0005855656974029544, "loss": 3.4264, "step": 6069 }, { "epoch": 0.3, "grad_norm": 0.6323420405387878, "learning_rate": 0.0005855609788118017, "loss": 3.4229, "step": 6070 }, { "epoch": 0.3, "grad_norm": 0.5859168171882629, "learning_rate": 0.0005855562594685362, "loss": 3.4765, "step": 6071 }, { "epoch": 0.3, "grad_norm": 0.5254231095314026, "learning_rate": 0.0005855515393731704, "loss": 3.326, "step": 6072 }, { "epoch": 0.3, "grad_norm": 0.49722519516944885, "learning_rate": 0.0005855468185257169, "loss": 3.4279, "step": 6073 }, { "epoch": 0.3, "grad_norm": 0.5665181279182434, "learning_rate": 0.0005855420969261881, "loss": 3.4913, "step": 6074 }, { "epoch": 0.3, "grad_norm": 0.5734388828277588, "learning_rate": 0.0005855373745745962, "loss": 3.1717, "step": 6075 }, { "epoch": 0.3, "grad_norm": 0.5337647795677185, "learning_rate": 0.0005855326514709539, "loss": 3.2627, "step": 6076 }, { "epoch": 0.3, "grad_norm": 0.532321572303772, "learning_rate": 0.0005855279276152736, "loss": 3.3021, "step": 6077 }, { "epoch": 0.3, "grad_norm": 0.5565383434295654, "learning_rate": 0.0005855232030075677, "loss": 3.3376, "step": 6078 }, { "epoch": 0.3, "grad_norm": 0.5236389636993408, "learning_rate": 0.0005855184776478485, "loss": 3.5663, "step": 6079 }, { "epoch": 0.3, "grad_norm": 0.5841774940490723, "learning_rate": 0.0005855137515361286, "loss": 3.2004, "step": 6080 }, { "epoch": 0.3, "grad_norm": 0.5455382466316223, "learning_rate": 0.0005855090246724205, "loss": 3.3466, "step": 6081 }, { "epoch": 0.3, "grad_norm": 0.6889823079109192, "learning_rate": 0.0005855042970567366, "loss": 3.4259, "step": 6082 }, { "epoch": 0.3, "grad_norm": 0.5753301978111267, "learning_rate": 0.0005854995686890893, "loss": 3.2617, "step": 6083 }, { "epoch": 0.3, "grad_norm": 0.5561864972114563, "learning_rate": 0.0005854948395694911, "loss": 3.2474, "step": 6084 }, { "epoch": 0.3, "grad_norm": 0.5513240694999695, "learning_rate": 0.0005854901096979543, "loss": 3.2588, "step": 6085 }, { "epoch": 0.3, "grad_norm": 0.5519996285438538, "learning_rate": 0.0005854853790744917, "loss": 3.4927, "step": 6086 }, { "epoch": 0.3, "grad_norm": 0.5103303790092468, "learning_rate": 0.0005854806476991154, "loss": 3.6893, "step": 6087 }, { "epoch": 0.3, "grad_norm": 0.5609255433082581, "learning_rate": 0.000585475915571838, "loss": 3.2162, "step": 6088 }, { "epoch": 0.3, "grad_norm": 0.5276150703430176, "learning_rate": 0.000585471182692672, "loss": 3.3367, "step": 6089 }, { "epoch": 0.3, "grad_norm": 0.5595441460609436, "learning_rate": 0.00058546644906163, "loss": 3.4075, "step": 6090 }, { "epoch": 0.3, "grad_norm": 0.5784932971000671, "learning_rate": 0.0005854617146787242, "loss": 3.2866, "step": 6091 }, { "epoch": 0.3, "grad_norm": 0.5267335176467896, "learning_rate": 0.000585456979543967, "loss": 3.5419, "step": 6092 }, { "epoch": 0.3, "grad_norm": 0.5266568064689636, "learning_rate": 0.0005854522436573714, "loss": 3.2447, "step": 6093 }, { "epoch": 0.3, "grad_norm": 0.5461187958717346, "learning_rate": 0.0005854475070189493, "loss": 3.4585, "step": 6094 }, { "epoch": 0.3, "grad_norm": 0.594099760055542, "learning_rate": 0.0005854427696287134, "loss": 3.42, "step": 6095 }, { "epoch": 0.3, "grad_norm": 0.5209415555000305, "learning_rate": 0.0005854380314866763, "loss": 3.6037, "step": 6096 }, { "epoch": 0.3, "grad_norm": 0.5774303078651428, "learning_rate": 0.0005854332925928503, "loss": 3.5351, "step": 6097 }, { "epoch": 0.3, "grad_norm": 0.5695366263389587, "learning_rate": 0.000585428552947248, "loss": 3.3669, "step": 6098 }, { "epoch": 0.3, "grad_norm": 0.5497203469276428, "learning_rate": 0.0005854238125498817, "loss": 3.3797, "step": 6099 }, { "epoch": 0.3, "grad_norm": 0.5589160323143005, "learning_rate": 0.0005854190714007641, "loss": 3.3957, "step": 6100 }, { "epoch": 0.3, "grad_norm": 0.8883815407752991, "learning_rate": 0.0005854143294999075, "loss": 3.3728, "step": 6101 }, { "epoch": 0.3, "grad_norm": 0.5451207756996155, "learning_rate": 0.0005854095868473246, "loss": 3.6537, "step": 6102 }, { "epoch": 0.3, "grad_norm": 0.5537768006324768, "learning_rate": 0.0005854048434430277, "loss": 3.3309, "step": 6103 }, { "epoch": 0.3, "grad_norm": 0.5225555896759033, "learning_rate": 0.0005854000992870295, "loss": 3.4239, "step": 6104 }, { "epoch": 0.3, "grad_norm": 0.5561574101448059, "learning_rate": 0.0005853953543793422, "loss": 3.3518, "step": 6105 }, { "epoch": 0.3, "grad_norm": 0.5772457122802734, "learning_rate": 0.0005853906087199785, "loss": 3.5618, "step": 6106 }, { "epoch": 0.3, "grad_norm": 0.5690733194351196, "learning_rate": 0.0005853858623089509, "loss": 3.2823, "step": 6107 }, { "epoch": 0.3, "grad_norm": 0.5398492813110352, "learning_rate": 0.0005853811151462719, "loss": 3.4709, "step": 6108 }, { "epoch": 0.3, "grad_norm": 0.5477753281593323, "learning_rate": 0.0005853763672319538, "loss": 3.4316, "step": 6109 }, { "epoch": 0.3, "grad_norm": 0.5604354739189148, "learning_rate": 0.0005853716185660095, "loss": 3.4123, "step": 6110 }, { "epoch": 0.3, "grad_norm": 0.5720632076263428, "learning_rate": 0.0005853668691484512, "loss": 3.3442, "step": 6111 }, { "epoch": 0.3, "grad_norm": 0.5532767176628113, "learning_rate": 0.0005853621189792913, "loss": 3.2747, "step": 6112 }, { "epoch": 0.3, "grad_norm": 0.5390136241912842, "learning_rate": 0.0005853573680585427, "loss": 3.2519, "step": 6113 }, { "epoch": 0.3, "grad_norm": 0.5394711494445801, "learning_rate": 0.0005853526163862177, "loss": 3.4465, "step": 6114 }, { "epoch": 0.3, "grad_norm": 0.5646769404411316, "learning_rate": 0.0005853478639623287, "loss": 3.2085, "step": 6115 }, { "epoch": 0.3, "grad_norm": 0.5330178737640381, "learning_rate": 0.0005853431107868886, "loss": 3.5491, "step": 6116 }, { "epoch": 0.3, "grad_norm": 0.554105281829834, "learning_rate": 0.0005853383568599094, "loss": 3.7143, "step": 6117 }, { "epoch": 0.3, "grad_norm": 0.5287169218063354, "learning_rate": 0.000585333602181404, "loss": 3.3217, "step": 6118 }, { "epoch": 0.3, "grad_norm": 0.5341808199882507, "learning_rate": 0.0005853288467513848, "loss": 3.2725, "step": 6119 }, { "epoch": 0.3, "grad_norm": 0.5601862072944641, "learning_rate": 0.0005853240905698642, "loss": 3.3502, "step": 6120 }, { "epoch": 0.3, "grad_norm": 0.58427894115448, "learning_rate": 0.0005853193336368551, "loss": 3.4909, "step": 6121 }, { "epoch": 0.3, "grad_norm": 0.523365318775177, "learning_rate": 0.0005853145759523695, "loss": 3.47, "step": 6122 }, { "epoch": 0.3, "grad_norm": 0.5647141337394714, "learning_rate": 0.0005853098175164204, "loss": 3.2372, "step": 6123 }, { "epoch": 0.3, "grad_norm": 0.5395342111587524, "learning_rate": 0.0005853050583290202, "loss": 3.4103, "step": 6124 }, { "epoch": 0.3, "grad_norm": 0.5265369415283203, "learning_rate": 0.0005853002983901812, "loss": 3.3473, "step": 6125 }, { "epoch": 0.3, "grad_norm": 0.5595860481262207, "learning_rate": 0.0005852955376999163, "loss": 3.4987, "step": 6126 }, { "epoch": 0.3, "grad_norm": 0.6001688241958618, "learning_rate": 0.0005852907762582377, "loss": 3.3633, "step": 6127 }, { "epoch": 0.3, "grad_norm": 0.517382800579071, "learning_rate": 0.0005852860140651583, "loss": 3.3587, "step": 6128 }, { "epoch": 0.3, "grad_norm": 0.5585851669311523, "learning_rate": 0.0005852812511206902, "loss": 3.4912, "step": 6129 }, { "epoch": 0.3, "grad_norm": 0.5613790154457092, "learning_rate": 0.0005852764874248464, "loss": 3.2364, "step": 6130 }, { "epoch": 0.3, "grad_norm": 0.5714898109436035, "learning_rate": 0.0005852717229776392, "loss": 3.3002, "step": 6131 }, { "epoch": 0.3, "grad_norm": 0.59192955493927, "learning_rate": 0.000585266957779081, "loss": 3.3714, "step": 6132 }, { "epoch": 0.3, "grad_norm": 0.5501763820648193, "learning_rate": 0.0005852621918291846, "loss": 3.2259, "step": 6133 }, { "epoch": 0.3, "grad_norm": 0.5687995553016663, "learning_rate": 0.0005852574251279626, "loss": 3.3774, "step": 6134 }, { "epoch": 0.3, "grad_norm": 0.5306016802787781, "learning_rate": 0.0005852526576754274, "loss": 3.319, "step": 6135 }, { "epoch": 0.3, "grad_norm": 0.5609244704246521, "learning_rate": 0.0005852478894715917, "loss": 3.363, "step": 6136 }, { "epoch": 0.3, "grad_norm": 0.5303353667259216, "learning_rate": 0.0005852431205164678, "loss": 3.3597, "step": 6137 }, { "epoch": 0.3, "grad_norm": 0.5196649432182312, "learning_rate": 0.0005852383508100685, "loss": 3.3383, "step": 6138 }, { "epoch": 0.3, "grad_norm": 0.5467498302459717, "learning_rate": 0.0005852335803524062, "loss": 3.4312, "step": 6139 }, { "epoch": 0.3, "grad_norm": 0.5493993163108826, "learning_rate": 0.0005852288091434936, "loss": 3.209, "step": 6140 }, { "epoch": 0.3, "grad_norm": 0.534140944480896, "learning_rate": 0.0005852240371833432, "loss": 3.3898, "step": 6141 }, { "epoch": 0.3, "grad_norm": 0.5165014863014221, "learning_rate": 0.0005852192644719675, "loss": 3.1214, "step": 6142 }, { "epoch": 0.3, "grad_norm": 0.6956821084022522, "learning_rate": 0.0005852144910093792, "loss": 3.2089, "step": 6143 }, { "epoch": 0.3, "grad_norm": 0.544562816619873, "learning_rate": 0.0005852097167955909, "loss": 3.3128, "step": 6144 }, { "epoch": 0.3, "grad_norm": 0.6112951040267944, "learning_rate": 0.0005852049418306151, "loss": 3.3378, "step": 6145 }, { "epoch": 0.3, "grad_norm": 0.545120358467102, "learning_rate": 0.0005852001661144643, "loss": 3.4138, "step": 6146 }, { "epoch": 0.3, "grad_norm": 0.5408572554588318, "learning_rate": 0.0005851953896471512, "loss": 3.3295, "step": 6147 }, { "epoch": 0.3, "grad_norm": 0.5871040225028992, "learning_rate": 0.0005851906124286882, "loss": 3.3294, "step": 6148 }, { "epoch": 0.3, "grad_norm": 0.5361684560775757, "learning_rate": 0.0005851858344590881, "loss": 3.4245, "step": 6149 }, { "epoch": 0.3, "grad_norm": 0.541067361831665, "learning_rate": 0.0005851810557383634, "loss": 3.3993, "step": 6150 }, { "epoch": 0.3, "grad_norm": 0.5347387194633484, "learning_rate": 0.0005851762762665267, "loss": 3.1216, "step": 6151 }, { "epoch": 0.3, "grad_norm": 0.5603194236755371, "learning_rate": 0.0005851714960435906, "loss": 3.416, "step": 6152 }, { "epoch": 0.3, "grad_norm": 0.7698899507522583, "learning_rate": 0.0005851667150695676, "loss": 3.2229, "step": 6153 }, { "epoch": 0.3, "grad_norm": 0.5307655930519104, "learning_rate": 0.0005851619333444703, "loss": 3.2918, "step": 6154 }, { "epoch": 0.3, "grad_norm": 0.5312530994415283, "learning_rate": 0.0005851571508683115, "loss": 3.5303, "step": 6155 }, { "epoch": 0.3, "grad_norm": 0.5486868619918823, "learning_rate": 0.0005851523676411036, "loss": 3.3959, "step": 6156 }, { "epoch": 0.3, "grad_norm": 0.5212375521659851, "learning_rate": 0.0005851475836628591, "loss": 3.2698, "step": 6157 }, { "epoch": 0.3, "grad_norm": 0.5626296997070312, "learning_rate": 0.0005851427989335909, "loss": 3.4387, "step": 6158 }, { "epoch": 0.3, "grad_norm": 0.5545399785041809, "learning_rate": 0.0005851380134533114, "loss": 3.3047, "step": 6159 }, { "epoch": 0.3, "grad_norm": 0.5794121026992798, "learning_rate": 0.0005851332272220332, "loss": 3.3047, "step": 6160 }, { "epoch": 0.3, "grad_norm": 0.6324692964553833, "learning_rate": 0.0005851284402397691, "loss": 3.3857, "step": 6161 }, { "epoch": 0.3, "grad_norm": 0.530517041683197, "learning_rate": 0.0005851236525065314, "loss": 3.4357, "step": 6162 }, { "epoch": 0.3, "grad_norm": 0.5403311252593994, "learning_rate": 0.000585118864022333, "loss": 3.3202, "step": 6163 }, { "epoch": 0.3, "grad_norm": 0.7685026526451111, "learning_rate": 0.0005851140747871863, "loss": 3.2808, "step": 6164 }, { "epoch": 0.3, "grad_norm": 0.6147613525390625, "learning_rate": 0.0005851092848011039, "loss": 3.5743, "step": 6165 }, { "epoch": 0.3, "grad_norm": 0.5476180911064148, "learning_rate": 0.0005851044940640987, "loss": 3.6459, "step": 6166 }, { "epoch": 0.3, "grad_norm": 0.5747238397598267, "learning_rate": 0.000585099702576183, "loss": 3.4143, "step": 6167 }, { "epoch": 0.3, "grad_norm": 0.565980076789856, "learning_rate": 0.0005850949103373697, "loss": 3.3223, "step": 6168 }, { "epoch": 0.3, "grad_norm": 0.5255545973777771, "learning_rate": 0.0005850901173476712, "loss": 3.2993, "step": 6169 }, { "epoch": 0.3, "grad_norm": 0.5456163883209229, "learning_rate": 0.0005850853236071003, "loss": 3.4025, "step": 6170 }, { "epoch": 0.3, "grad_norm": 0.5844244956970215, "learning_rate": 0.0005850805291156692, "loss": 3.6342, "step": 6171 }, { "epoch": 0.3, "grad_norm": 0.5095750689506531, "learning_rate": 0.0005850757338733911, "loss": 3.3355, "step": 6172 }, { "epoch": 0.3, "grad_norm": 0.6050037741661072, "learning_rate": 0.0005850709378802785, "loss": 3.4111, "step": 6173 }, { "epoch": 0.3, "grad_norm": 0.5732804536819458, "learning_rate": 0.0005850661411363437, "loss": 3.5586, "step": 6174 }, { "epoch": 0.3, "grad_norm": 0.5283846855163574, "learning_rate": 0.0005850613436415996, "loss": 3.237, "step": 6175 }, { "epoch": 0.3, "grad_norm": 0.5460174083709717, "learning_rate": 0.0005850565453960586, "loss": 3.171, "step": 6176 }, { "epoch": 0.3, "grad_norm": 0.5901962518692017, "learning_rate": 0.0005850517463997339, "loss": 3.3324, "step": 6177 }, { "epoch": 0.3, "grad_norm": 0.5234120488166809, "learning_rate": 0.0005850469466526376, "loss": 3.4535, "step": 6178 }, { "epoch": 0.3, "grad_norm": 0.5157477855682373, "learning_rate": 0.0005850421461547823, "loss": 3.4048, "step": 6179 }, { "epoch": 0.3, "grad_norm": 0.518635094165802, "learning_rate": 0.000585037344906181, "loss": 3.3394, "step": 6180 }, { "epoch": 0.3, "grad_norm": 0.5400442481040955, "learning_rate": 0.0005850325429068462, "loss": 3.3305, "step": 6181 }, { "epoch": 0.3, "grad_norm": 0.5420893430709839, "learning_rate": 0.0005850277401567906, "loss": 3.4494, "step": 6182 }, { "epoch": 0.3, "grad_norm": 0.5632343292236328, "learning_rate": 0.0005850229366560268, "loss": 3.411, "step": 6183 }, { "epoch": 0.3, "grad_norm": 0.5508242249488831, "learning_rate": 0.0005850181324045673, "loss": 3.1892, "step": 6184 }, { "epoch": 0.3, "grad_norm": 0.5260775089263916, "learning_rate": 0.0005850133274024249, "loss": 3.0536, "step": 6185 }, { "epoch": 0.3, "grad_norm": 0.5268356800079346, "learning_rate": 0.0005850085216496123, "loss": 3.4394, "step": 6186 }, { "epoch": 0.3, "grad_norm": 0.4986163079738617, "learning_rate": 0.0005850037151461421, "loss": 3.4986, "step": 6187 }, { "epoch": 0.3, "grad_norm": 0.5085906386375427, "learning_rate": 0.0005849989078920271, "loss": 3.6332, "step": 6188 }, { "epoch": 0.3, "grad_norm": 0.5513754487037659, "learning_rate": 0.0005849940998872796, "loss": 3.1801, "step": 6189 }, { "epoch": 0.3, "grad_norm": 0.5337885022163391, "learning_rate": 0.0005849892911319126, "loss": 3.3828, "step": 6190 }, { "epoch": 0.3, "grad_norm": 0.5386898517608643, "learning_rate": 0.0005849844816259387, "loss": 3.4224, "step": 6191 }, { "epoch": 0.3, "grad_norm": 0.5503227710723877, "learning_rate": 0.0005849796713693704, "loss": 3.2951, "step": 6192 }, { "epoch": 0.3, "grad_norm": 0.5732369422912598, "learning_rate": 0.0005849748603622205, "loss": 3.4267, "step": 6193 }, { "epoch": 0.3, "grad_norm": 0.5297451615333557, "learning_rate": 0.0005849700486045018, "loss": 3.3835, "step": 6194 }, { "epoch": 0.3, "grad_norm": 0.5413727760314941, "learning_rate": 0.0005849652360962268, "loss": 3.3712, "step": 6195 }, { "epoch": 0.3, "grad_norm": 0.5539155006408691, "learning_rate": 0.000584960422837408, "loss": 3.3727, "step": 6196 }, { "epoch": 0.3, "grad_norm": 0.5665326118469238, "learning_rate": 0.0005849556088280585, "loss": 3.3948, "step": 6197 }, { "epoch": 0.3, "grad_norm": 0.5332841873168945, "learning_rate": 0.0005849507940681907, "loss": 3.5232, "step": 6198 }, { "epoch": 0.3, "grad_norm": 0.5573357939720154, "learning_rate": 0.0005849459785578174, "loss": 3.3837, "step": 6199 }, { "epoch": 0.3, "grad_norm": 0.5696620345115662, "learning_rate": 0.0005849411622969511, "loss": 3.4472, "step": 6200 }, { "epoch": 0.3, "grad_norm": 0.5535147190093994, "learning_rate": 0.0005849363452856048, "loss": 3.5722, "step": 6201 }, { "epoch": 0.3, "grad_norm": 0.5474180579185486, "learning_rate": 0.0005849315275237908, "loss": 3.2806, "step": 6202 }, { "epoch": 0.3, "grad_norm": 0.519481360912323, "learning_rate": 0.0005849267090115222, "loss": 3.3679, "step": 6203 }, { "epoch": 0.3, "grad_norm": 0.5893501043319702, "learning_rate": 0.0005849218897488115, "loss": 3.3842, "step": 6204 }, { "epoch": 0.3, "grad_norm": 0.5267314910888672, "learning_rate": 0.0005849170697356711, "loss": 3.5745, "step": 6205 }, { "epoch": 0.3, "grad_norm": 0.5750004649162292, "learning_rate": 0.0005849122489721142, "loss": 3.2855, "step": 6206 }, { "epoch": 0.3, "grad_norm": 0.5196614265441895, "learning_rate": 0.0005849074274581531, "loss": 3.4182, "step": 6207 }, { "epoch": 0.3, "grad_norm": 0.5150504112243652, "learning_rate": 0.0005849026051938009, "loss": 3.5894, "step": 6208 }, { "epoch": 0.3, "grad_norm": 0.532528817653656, "learning_rate": 0.0005848977821790699, "loss": 3.3033, "step": 6209 }, { "epoch": 0.3, "grad_norm": 0.63649982213974, "learning_rate": 0.0005848929584139731, "loss": 3.3009, "step": 6210 }, { "epoch": 0.3, "grad_norm": 0.5199731588363647, "learning_rate": 0.0005848881338985229, "loss": 3.6216, "step": 6211 }, { "epoch": 0.3, "grad_norm": 0.5677109956741333, "learning_rate": 0.0005848833086327323, "loss": 3.5486, "step": 6212 }, { "epoch": 0.3, "grad_norm": 0.5644501447677612, "learning_rate": 0.0005848784826166139, "loss": 3.2385, "step": 6213 }, { "epoch": 0.3, "grad_norm": 0.6257779002189636, "learning_rate": 0.0005848736558501804, "loss": 3.3152, "step": 6214 }, { "epoch": 0.3, "grad_norm": 0.6935915350914001, "learning_rate": 0.0005848688283334445, "loss": 3.4666, "step": 6215 }, { "epoch": 0.3, "grad_norm": 0.5401797890663147, "learning_rate": 0.0005848640000664188, "loss": 3.4822, "step": 6216 }, { "epoch": 0.3, "grad_norm": 0.5422528982162476, "learning_rate": 0.0005848591710491164, "loss": 3.32, "step": 6217 }, { "epoch": 0.3, "grad_norm": 0.5878642201423645, "learning_rate": 0.0005848543412815496, "loss": 3.1503, "step": 6218 }, { "epoch": 0.3, "grad_norm": 0.5432202219963074, "learning_rate": 0.0005848495107637312, "loss": 3.3538, "step": 6219 }, { "epoch": 0.3, "grad_norm": 0.6053672432899475, "learning_rate": 0.0005848446794956742, "loss": 3.1726, "step": 6220 }, { "epoch": 0.3, "grad_norm": 0.5622955560684204, "learning_rate": 0.0005848398474773911, "loss": 3.2979, "step": 6221 }, { "epoch": 0.3, "grad_norm": 0.5476831197738647, "learning_rate": 0.0005848350147088946, "loss": 3.5093, "step": 6222 }, { "epoch": 0.3, "grad_norm": 0.5248444676399231, "learning_rate": 0.0005848301811901974, "loss": 3.3824, "step": 6223 }, { "epoch": 0.31, "grad_norm": 0.5185007452964783, "learning_rate": 0.0005848253469213125, "loss": 3.4452, "step": 6224 }, { "epoch": 0.31, "grad_norm": 0.618705153465271, "learning_rate": 0.0005848205119022524, "loss": 3.6843, "step": 6225 }, { "epoch": 0.31, "grad_norm": 0.5357537865638733, "learning_rate": 0.0005848156761330298, "loss": 3.4044, "step": 6226 }, { "epoch": 0.31, "grad_norm": 0.5574560761451721, "learning_rate": 0.0005848108396136576, "loss": 3.2504, "step": 6227 }, { "epoch": 0.31, "grad_norm": 0.5414639115333557, "learning_rate": 0.0005848060023441484, "loss": 3.439, "step": 6228 }, { "epoch": 0.31, "grad_norm": 0.5341112017631531, "learning_rate": 0.0005848011643245151, "loss": 3.283, "step": 6229 }, { "epoch": 0.31, "grad_norm": 0.5189714431762695, "learning_rate": 0.0005847963255547704, "loss": 3.3486, "step": 6230 }, { "epoch": 0.31, "grad_norm": 0.6320374608039856, "learning_rate": 0.0005847914860349268, "loss": 3.5425, "step": 6231 }, { "epoch": 0.31, "grad_norm": 0.5230095386505127, "learning_rate": 0.0005847866457649973, "loss": 3.3306, "step": 6232 }, { "epoch": 0.31, "grad_norm": 0.5646961331367493, "learning_rate": 0.0005847818047449946, "loss": 3.4545, "step": 6233 }, { "epoch": 0.31, "grad_norm": 0.5346679091453552, "learning_rate": 0.0005847769629749314, "loss": 3.4056, "step": 6234 }, { "epoch": 0.31, "grad_norm": 0.5118999481201172, "learning_rate": 0.0005847721204548206, "loss": 3.3682, "step": 6235 }, { "epoch": 0.31, "grad_norm": 0.5368824005126953, "learning_rate": 0.0005847672771846748, "loss": 3.37, "step": 6236 }, { "epoch": 0.31, "grad_norm": 0.563027024269104, "learning_rate": 0.0005847624331645067, "loss": 3.4851, "step": 6237 }, { "epoch": 0.31, "grad_norm": 0.5190998911857605, "learning_rate": 0.0005847575883943292, "loss": 3.4153, "step": 6238 }, { "epoch": 0.31, "grad_norm": 0.5824965834617615, "learning_rate": 0.000584752742874155, "loss": 3.5383, "step": 6239 }, { "epoch": 0.31, "grad_norm": 0.5584920048713684, "learning_rate": 0.000584747896603997, "loss": 3.2722, "step": 6240 }, { "epoch": 0.31, "grad_norm": 0.5643733739852905, "learning_rate": 0.0005847430495838677, "loss": 3.4414, "step": 6241 }, { "epoch": 0.31, "grad_norm": 0.5031020045280457, "learning_rate": 0.00058473820181378, "loss": 3.3009, "step": 6242 }, { "epoch": 0.31, "grad_norm": 0.5333477854728699, "learning_rate": 0.0005847333532937467, "loss": 3.3855, "step": 6243 }, { "epoch": 0.31, "grad_norm": 0.5437228679656982, "learning_rate": 0.0005847285040237807, "loss": 3.2759, "step": 6244 }, { "epoch": 0.31, "grad_norm": 0.5025436878204346, "learning_rate": 0.0005847236540038944, "loss": 3.3027, "step": 6245 }, { "epoch": 0.31, "grad_norm": 0.5325117111206055, "learning_rate": 0.0005847188032341009, "loss": 3.5029, "step": 6246 }, { "epoch": 0.31, "grad_norm": 0.4821946918964386, "learning_rate": 0.0005847139517144128, "loss": 3.5097, "step": 6247 }, { "epoch": 0.31, "grad_norm": 0.5105711221694946, "learning_rate": 0.0005847090994448431, "loss": 3.3654, "step": 6248 }, { "epoch": 0.31, "grad_norm": 0.5041024088859558, "learning_rate": 0.0005847042464254043, "loss": 3.4881, "step": 6249 }, { "epoch": 0.31, "grad_norm": 0.5586902499198914, "learning_rate": 0.0005846993926561093, "loss": 3.2771, "step": 6250 }, { "epoch": 0.31, "grad_norm": 0.5156290531158447, "learning_rate": 0.0005846945381369709, "loss": 3.3685, "step": 6251 }, { "epoch": 0.31, "grad_norm": 0.5516939759254456, "learning_rate": 0.0005846896828680019, "loss": 3.4328, "step": 6252 }, { "epoch": 0.31, "grad_norm": 0.628324031829834, "learning_rate": 0.0005846848268492151, "loss": 3.164, "step": 6253 }, { "epoch": 0.31, "grad_norm": 0.5220317840576172, "learning_rate": 0.0005846799700806232, "loss": 3.5043, "step": 6254 }, { "epoch": 0.31, "grad_norm": 0.523783802986145, "learning_rate": 0.000584675112562239, "loss": 3.5469, "step": 6255 }, { "epoch": 0.31, "grad_norm": 0.5619279146194458, "learning_rate": 0.0005846702542940755, "loss": 3.3155, "step": 6256 }, { "epoch": 0.31, "grad_norm": 0.6332241296768188, "learning_rate": 0.0005846653952761452, "loss": 3.1784, "step": 6257 }, { "epoch": 0.31, "grad_norm": 0.5562300682067871, "learning_rate": 0.0005846605355084611, "loss": 3.2124, "step": 6258 }, { "epoch": 0.31, "grad_norm": 0.5105780363082886, "learning_rate": 0.0005846556749910358, "loss": 3.3471, "step": 6259 }, { "epoch": 0.31, "grad_norm": 0.5315980911254883, "learning_rate": 0.0005846508137238825, "loss": 3.2881, "step": 6260 }, { "epoch": 0.31, "grad_norm": 0.5600804686546326, "learning_rate": 0.0005846459517070135, "loss": 3.4305, "step": 6261 }, { "epoch": 0.31, "grad_norm": 0.5662427544593811, "learning_rate": 0.000584641088940442, "loss": 3.3034, "step": 6262 }, { "epoch": 0.31, "grad_norm": 0.5419402122497559, "learning_rate": 0.0005846362254241805, "loss": 3.4267, "step": 6263 }, { "epoch": 0.31, "grad_norm": 0.5330461263656616, "learning_rate": 0.000584631361158242, "loss": 3.4127, "step": 6264 }, { "epoch": 0.31, "grad_norm": 0.5632162690162659, "learning_rate": 0.0005846264961426394, "loss": 3.1738, "step": 6265 }, { "epoch": 0.31, "grad_norm": 0.5450085997581482, "learning_rate": 0.0005846216303773853, "loss": 3.4474, "step": 6266 }, { "epoch": 0.31, "grad_norm": 0.5258246660232544, "learning_rate": 0.0005846167638624926, "loss": 3.3843, "step": 6267 }, { "epoch": 0.31, "grad_norm": 0.5241590738296509, "learning_rate": 0.0005846118965979741, "loss": 3.4877, "step": 6268 }, { "epoch": 0.31, "grad_norm": 0.5271158218383789, "learning_rate": 0.0005846070285838427, "loss": 3.3302, "step": 6269 }, { "epoch": 0.31, "grad_norm": 0.5716249942779541, "learning_rate": 0.0005846021598201111, "loss": 3.6957, "step": 6270 }, { "epoch": 0.31, "grad_norm": 0.546226441860199, "learning_rate": 0.0005845972903067922, "loss": 3.4328, "step": 6271 }, { "epoch": 0.31, "grad_norm": 0.532817006111145, "learning_rate": 0.0005845924200438987, "loss": 3.4793, "step": 6272 }, { "epoch": 0.31, "grad_norm": 0.5522794723510742, "learning_rate": 0.0005845875490314437, "loss": 3.2225, "step": 6273 }, { "epoch": 0.31, "grad_norm": 0.5274179577827454, "learning_rate": 0.0005845826772694399, "loss": 2.9809, "step": 6274 }, { "epoch": 0.31, "grad_norm": 0.5768852829933167, "learning_rate": 0.0005845778047578999, "loss": 3.161, "step": 6275 }, { "epoch": 0.31, "grad_norm": 0.5117722749710083, "learning_rate": 0.0005845729314968369, "loss": 3.3208, "step": 6276 }, { "epoch": 0.31, "grad_norm": 0.6170226335525513, "learning_rate": 0.0005845680574862635, "loss": 3.3823, "step": 6277 }, { "epoch": 0.31, "grad_norm": 0.567700207233429, "learning_rate": 0.0005845631827261927, "loss": 3.5089, "step": 6278 }, { "epoch": 0.31, "grad_norm": 0.5353153347969055, "learning_rate": 0.0005845583072166371, "loss": 3.3691, "step": 6279 }, { "epoch": 0.31, "grad_norm": 0.5365519523620605, "learning_rate": 0.0005845534309576097, "loss": 3.432, "step": 6280 }, { "epoch": 0.31, "grad_norm": 0.5466122627258301, "learning_rate": 0.0005845485539491234, "loss": 3.3228, "step": 6281 }, { "epoch": 0.31, "grad_norm": 0.6011998653411865, "learning_rate": 0.0005845436761911909, "loss": 3.0104, "step": 6282 }, { "epoch": 0.31, "grad_norm": 0.5234431624412537, "learning_rate": 0.0005845387976838251, "loss": 3.3694, "step": 6283 }, { "epoch": 0.31, "grad_norm": 0.540686309337616, "learning_rate": 0.000584533918427039, "loss": 3.6565, "step": 6284 }, { "epoch": 0.31, "grad_norm": 0.5174767971038818, "learning_rate": 0.0005845290384208453, "loss": 3.5169, "step": 6285 }, { "epoch": 0.31, "grad_norm": 0.5468271970748901, "learning_rate": 0.0005845241576652567, "loss": 3.3747, "step": 6286 }, { "epoch": 0.31, "grad_norm": 0.5398919582366943, "learning_rate": 0.0005845192761602864, "loss": 3.6541, "step": 6287 }, { "epoch": 0.31, "grad_norm": 0.5341367125511169, "learning_rate": 0.000584514393905947, "loss": 3.3902, "step": 6288 }, { "epoch": 0.31, "grad_norm": 0.5506887435913086, "learning_rate": 0.0005845095109022514, "loss": 3.4969, "step": 6289 }, { "epoch": 0.31, "grad_norm": 0.5707558393478394, "learning_rate": 0.0005845046271492127, "loss": 3.2885, "step": 6290 }, { "epoch": 0.31, "grad_norm": 0.49530622363090515, "learning_rate": 0.0005844997426468434, "loss": 3.297, "step": 6291 }, { "epoch": 0.31, "grad_norm": 0.5162578821182251, "learning_rate": 0.0005844948573951565, "loss": 3.3581, "step": 6292 }, { "epoch": 0.31, "grad_norm": 0.5304474234580994, "learning_rate": 0.000584489971394165, "loss": 3.4776, "step": 6293 }, { "epoch": 0.31, "grad_norm": 0.5445380806922913, "learning_rate": 0.0005844850846438816, "loss": 3.4076, "step": 6294 }, { "epoch": 0.31, "grad_norm": 0.5633144974708557, "learning_rate": 0.0005844801971443193, "loss": 3.4512, "step": 6295 }, { "epoch": 0.31, "grad_norm": 0.5193982124328613, "learning_rate": 0.0005844753088954908, "loss": 3.404, "step": 6296 }, { "epoch": 0.31, "grad_norm": 0.5558420419692993, "learning_rate": 0.0005844704198974093, "loss": 3.5152, "step": 6297 }, { "epoch": 0.31, "grad_norm": 0.535561203956604, "learning_rate": 0.0005844655301500873, "loss": 3.3756, "step": 6298 }, { "epoch": 0.31, "grad_norm": 0.5079712271690369, "learning_rate": 0.0005844606396535378, "loss": 3.5013, "step": 6299 }, { "epoch": 0.31, "grad_norm": 0.5386590361595154, "learning_rate": 0.0005844557484077738, "loss": 3.1547, "step": 6300 }, { "epoch": 0.31, "grad_norm": 0.5513303875923157, "learning_rate": 0.000584450856412808, "loss": 3.5032, "step": 6301 }, { "epoch": 0.31, "grad_norm": 0.5191980600357056, "learning_rate": 0.0005844459636686535, "loss": 3.388, "step": 6302 }, { "epoch": 0.31, "grad_norm": 0.5659469366073608, "learning_rate": 0.0005844410701753231, "loss": 3.3341, "step": 6303 }, { "epoch": 0.31, "grad_norm": 0.5289974808692932, "learning_rate": 0.0005844361759328295, "loss": 3.4693, "step": 6304 }, { "epoch": 0.31, "grad_norm": 0.5283775925636292, "learning_rate": 0.000584431280941186, "loss": 3.4818, "step": 6305 }, { "epoch": 0.31, "grad_norm": 0.5795498490333557, "learning_rate": 0.000584426385200405, "loss": 3.4025, "step": 6306 }, { "epoch": 0.31, "grad_norm": 0.5070279240608215, "learning_rate": 0.0005844214887104998, "loss": 3.2794, "step": 6307 }, { "epoch": 0.31, "grad_norm": 0.5226815342903137, "learning_rate": 0.000584416591471483, "loss": 3.4417, "step": 6308 }, { "epoch": 0.31, "grad_norm": 0.5250857472419739, "learning_rate": 0.0005844116934833678, "loss": 3.5051, "step": 6309 }, { "epoch": 0.31, "grad_norm": 0.5460673570632935, "learning_rate": 0.0005844067947461669, "loss": 3.3849, "step": 6310 }, { "epoch": 0.31, "grad_norm": 0.5464668869972229, "learning_rate": 0.0005844018952598931, "loss": 3.2028, "step": 6311 }, { "epoch": 0.31, "grad_norm": 0.5466782450675964, "learning_rate": 0.0005843969950245595, "loss": 3.1737, "step": 6312 }, { "epoch": 0.31, "grad_norm": 0.5372828841209412, "learning_rate": 0.0005843920940401792, "loss": 3.5099, "step": 6313 }, { "epoch": 0.31, "grad_norm": 0.5676745176315308, "learning_rate": 0.0005843871923067645, "loss": 3.353, "step": 6314 }, { "epoch": 0.31, "grad_norm": 0.5780575275421143, "learning_rate": 0.0005843822898243289, "loss": 3.2481, "step": 6315 }, { "epoch": 0.31, "grad_norm": 0.7814470529556274, "learning_rate": 0.000584377386592885, "loss": 3.3843, "step": 6316 }, { "epoch": 0.31, "grad_norm": 0.5636649131774902, "learning_rate": 0.0005843724826124457, "loss": 3.5316, "step": 6317 }, { "epoch": 0.31, "grad_norm": 0.548147976398468, "learning_rate": 0.0005843675778830241, "loss": 3.1795, "step": 6318 }, { "epoch": 0.31, "grad_norm": 0.5491986870765686, "learning_rate": 0.000584362672404633, "loss": 3.2468, "step": 6319 }, { "epoch": 0.31, "grad_norm": 0.5343765020370483, "learning_rate": 0.0005843577661772854, "loss": 3.3482, "step": 6320 }, { "epoch": 0.31, "grad_norm": 0.5225366950035095, "learning_rate": 0.0005843528592009941, "loss": 3.6224, "step": 6321 }, { "epoch": 0.31, "grad_norm": 0.5188947319984436, "learning_rate": 0.0005843479514757721, "loss": 3.2754, "step": 6322 }, { "epoch": 0.31, "grad_norm": 0.5342457294464111, "learning_rate": 0.0005843430430016324, "loss": 3.4175, "step": 6323 }, { "epoch": 0.31, "grad_norm": 0.5652335286140442, "learning_rate": 0.0005843381337785877, "loss": 3.2715, "step": 6324 }, { "epoch": 0.31, "grad_norm": 0.49999839067459106, "learning_rate": 0.0005843332238066512, "loss": 3.3006, "step": 6325 }, { "epoch": 0.31, "grad_norm": 0.5379924178123474, "learning_rate": 0.0005843283130858357, "loss": 3.2138, "step": 6326 }, { "epoch": 0.31, "grad_norm": 0.5372043251991272, "learning_rate": 0.0005843234016161542, "loss": 3.3537, "step": 6327 }, { "epoch": 0.31, "grad_norm": 0.5262093544006348, "learning_rate": 0.0005843184893976194, "loss": 3.3396, "step": 6328 }, { "epoch": 0.31, "grad_norm": 0.5059529542922974, "learning_rate": 0.0005843135764302446, "loss": 3.4341, "step": 6329 }, { "epoch": 0.31, "grad_norm": 0.5077370405197144, "learning_rate": 0.0005843086627140425, "loss": 3.1674, "step": 6330 }, { "epoch": 0.31, "grad_norm": 0.5159595012664795, "learning_rate": 0.000584303748249026, "loss": 3.429, "step": 6331 }, { "epoch": 0.31, "grad_norm": 0.5877441763877869, "learning_rate": 0.0005842988330352082, "loss": 3.3886, "step": 6332 }, { "epoch": 0.31, "grad_norm": 0.5474517941474915, "learning_rate": 0.000584293917072602, "loss": 3.3393, "step": 6333 }, { "epoch": 0.31, "grad_norm": 0.5149897336959839, "learning_rate": 0.0005842890003612204, "loss": 3.4596, "step": 6334 }, { "epoch": 0.31, "grad_norm": 0.5382705330848694, "learning_rate": 0.0005842840829010762, "loss": 3.428, "step": 6335 }, { "epoch": 0.31, "grad_norm": 0.5204023718833923, "learning_rate": 0.0005842791646921825, "loss": 3.5418, "step": 6336 }, { "epoch": 0.31, "grad_norm": 0.5262681245803833, "learning_rate": 0.0005842742457345523, "loss": 3.3259, "step": 6337 }, { "epoch": 0.31, "grad_norm": 0.5385437607765198, "learning_rate": 0.0005842693260281981, "loss": 3.2901, "step": 6338 }, { "epoch": 0.31, "grad_norm": 0.5480816960334778, "learning_rate": 0.0005842644055731335, "loss": 3.2381, "step": 6339 }, { "epoch": 0.31, "grad_norm": 0.5376442670822144, "learning_rate": 0.0005842594843693711, "loss": 3.0217, "step": 6340 }, { "epoch": 0.31, "grad_norm": 0.5409801602363586, "learning_rate": 0.0005842545624169239, "loss": 3.2004, "step": 6341 }, { "epoch": 0.31, "grad_norm": 0.5036441087722778, "learning_rate": 0.0005842496397158049, "loss": 3.4932, "step": 6342 }, { "epoch": 0.31, "grad_norm": 0.6348100304603577, "learning_rate": 0.000584244716266027, "loss": 3.4698, "step": 6343 }, { "epoch": 0.31, "grad_norm": 0.5811209082603455, "learning_rate": 0.0005842397920676032, "loss": 3.4319, "step": 6344 }, { "epoch": 0.31, "grad_norm": 0.574324369430542, "learning_rate": 0.0005842348671205466, "loss": 3.3559, "step": 6345 }, { "epoch": 0.31, "grad_norm": 0.5267350077629089, "learning_rate": 0.00058422994142487, "loss": 3.3692, "step": 6346 }, { "epoch": 0.31, "grad_norm": 0.5143805742263794, "learning_rate": 0.0005842250149805865, "loss": 3.4085, "step": 6347 }, { "epoch": 0.31, "grad_norm": 0.5390629172325134, "learning_rate": 0.000584220087787709, "loss": 3.2466, "step": 6348 }, { "epoch": 0.31, "grad_norm": 0.5482839941978455, "learning_rate": 0.0005842151598462504, "loss": 3.4562, "step": 6349 }, { "epoch": 0.31, "grad_norm": 0.5902968049049377, "learning_rate": 0.0005842102311562238, "loss": 3.4197, "step": 6350 }, { "epoch": 0.31, "grad_norm": 0.561595618724823, "learning_rate": 0.0005842053017176422, "loss": 3.4425, "step": 6351 }, { "epoch": 0.31, "grad_norm": 0.5435536503791809, "learning_rate": 0.0005842003715305185, "loss": 3.595, "step": 6352 }, { "epoch": 0.31, "grad_norm": 0.5846390724182129, "learning_rate": 0.0005841954405948656, "loss": 3.5844, "step": 6353 }, { "epoch": 0.31, "grad_norm": 0.5653446912765503, "learning_rate": 0.0005841905089106968, "loss": 3.3855, "step": 6354 }, { "epoch": 0.31, "grad_norm": 0.5833756327629089, "learning_rate": 0.0005841855764780248, "loss": 3.1619, "step": 6355 }, { "epoch": 0.31, "grad_norm": 0.565251350402832, "learning_rate": 0.0005841806432968626, "loss": 3.2028, "step": 6356 }, { "epoch": 0.31, "grad_norm": 0.5358942747116089, "learning_rate": 0.0005841757093672234, "loss": 3.2987, "step": 6357 }, { "epoch": 0.31, "grad_norm": 0.5292371511459351, "learning_rate": 0.0005841707746891201, "loss": 3.6083, "step": 6358 }, { "epoch": 0.31, "grad_norm": 0.606774628162384, "learning_rate": 0.0005841658392625656, "loss": 3.3125, "step": 6359 }, { "epoch": 0.31, "grad_norm": 0.5326677560806274, "learning_rate": 0.000584160903087573, "loss": 3.3493, "step": 6360 }, { "epoch": 0.31, "grad_norm": 0.5448054671287537, "learning_rate": 0.0005841559661641552, "loss": 3.3649, "step": 6361 }, { "epoch": 0.31, "grad_norm": 0.5255556106567383, "learning_rate": 0.0005841510284923253, "loss": 3.3162, "step": 6362 }, { "epoch": 0.31, "grad_norm": 0.5699189305305481, "learning_rate": 0.0005841460900720963, "loss": 3.2386, "step": 6363 }, { "epoch": 0.31, "grad_norm": 0.5408067107200623, "learning_rate": 0.0005841411509034812, "loss": 3.5189, "step": 6364 }, { "epoch": 0.31, "grad_norm": 0.561336100101471, "learning_rate": 0.000584136210986493, "loss": 3.4752, "step": 6365 }, { "epoch": 0.31, "grad_norm": 0.5111557245254517, "learning_rate": 0.0005841312703211447, "loss": 3.3528, "step": 6366 }, { "epoch": 0.31, "grad_norm": 0.532857358455658, "learning_rate": 0.0005841263289074493, "loss": 3.4392, "step": 6367 }, { "epoch": 0.31, "grad_norm": 0.5691311359405518, "learning_rate": 0.0005841213867454198, "loss": 3.1776, "step": 6368 }, { "epoch": 0.31, "grad_norm": 0.5483730435371399, "learning_rate": 0.0005841164438350693, "loss": 3.3162, "step": 6369 }, { "epoch": 0.31, "grad_norm": 0.5207633376121521, "learning_rate": 0.0005841115001764107, "loss": 3.3971, "step": 6370 }, { "epoch": 0.31, "grad_norm": 0.5637426972389221, "learning_rate": 0.0005841065557694572, "loss": 3.3626, "step": 6371 }, { "epoch": 0.31, "grad_norm": 0.5457177758216858, "learning_rate": 0.0005841016106142216, "loss": 3.4505, "step": 6372 }, { "epoch": 0.31, "grad_norm": 0.5361736416816711, "learning_rate": 0.0005840966647107171, "loss": 3.5193, "step": 6373 }, { "epoch": 0.31, "grad_norm": 0.5250360369682312, "learning_rate": 0.0005840917180589566, "loss": 3.3164, "step": 6374 }, { "epoch": 0.31, "grad_norm": 0.5771521925926208, "learning_rate": 0.0005840867706589531, "loss": 3.1594, "step": 6375 }, { "epoch": 0.31, "grad_norm": 0.5609606504440308, "learning_rate": 0.0005840818225107199, "loss": 3.1954, "step": 6376 }, { "epoch": 0.31, "grad_norm": 0.5278244018554688, "learning_rate": 0.0005840768736142698, "loss": 3.6396, "step": 6377 }, { "epoch": 0.31, "grad_norm": 0.5072911381721497, "learning_rate": 0.0005840719239696159, "loss": 3.4169, "step": 6378 }, { "epoch": 0.31, "grad_norm": 0.5723280906677246, "learning_rate": 0.0005840669735767712, "loss": 3.2226, "step": 6379 }, { "epoch": 0.31, "grad_norm": 0.5588771104812622, "learning_rate": 0.0005840620224357487, "loss": 3.4331, "step": 6380 }, { "epoch": 0.31, "grad_norm": 0.5622850656509399, "learning_rate": 0.0005840570705465616, "loss": 3.5246, "step": 6381 }, { "epoch": 0.31, "grad_norm": 0.5287413597106934, "learning_rate": 0.0005840521179092228, "loss": 3.5396, "step": 6382 }, { "epoch": 0.31, "grad_norm": 0.6131024360656738, "learning_rate": 0.0005840471645237454, "loss": 3.4931, "step": 6383 }, { "epoch": 0.31, "grad_norm": 0.49073895812034607, "learning_rate": 0.0005840422103901425, "loss": 3.3222, "step": 6384 }, { "epoch": 0.31, "grad_norm": 0.5380456447601318, "learning_rate": 0.000584037255508427, "loss": 3.4926, "step": 6385 }, { "epoch": 0.31, "grad_norm": 0.500093400478363, "learning_rate": 0.000584032299878612, "loss": 3.5383, "step": 6386 }, { "epoch": 0.31, "grad_norm": 0.5496594905853271, "learning_rate": 0.0005840273435007106, "loss": 3.4764, "step": 6387 }, { "epoch": 0.31, "grad_norm": 0.5659000277519226, "learning_rate": 0.0005840223863747359, "loss": 3.4011, "step": 6388 }, { "epoch": 0.31, "grad_norm": 0.5337103605270386, "learning_rate": 0.0005840174285007009, "loss": 3.3357, "step": 6389 }, { "epoch": 0.31, "grad_norm": 0.5471076965332031, "learning_rate": 0.0005840124698786186, "loss": 3.3584, "step": 6390 }, { "epoch": 0.31, "grad_norm": 0.5406061410903931, "learning_rate": 0.0005840075105085021, "loss": 3.5359, "step": 6391 }, { "epoch": 0.31, "grad_norm": 0.5463137626647949, "learning_rate": 0.0005840025503903645, "loss": 3.29, "step": 6392 }, { "epoch": 0.31, "grad_norm": 0.5283336043357849, "learning_rate": 0.0005839975895242189, "loss": 3.6057, "step": 6393 }, { "epoch": 0.31, "grad_norm": 0.5675161480903625, "learning_rate": 0.0005839926279100783, "loss": 3.2287, "step": 6394 }, { "epoch": 0.31, "grad_norm": 0.5471466183662415, "learning_rate": 0.0005839876655479557, "loss": 3.4739, "step": 6395 }, { "epoch": 0.31, "grad_norm": 0.5402804017066956, "learning_rate": 0.0005839827024378643, "loss": 3.3865, "step": 6396 }, { "epoch": 0.31, "grad_norm": 0.5770726799964905, "learning_rate": 0.000583977738579817, "loss": 3.3723, "step": 6397 }, { "epoch": 0.31, "grad_norm": 0.5275018811225891, "learning_rate": 0.0005839727739738271, "loss": 3.3587, "step": 6398 }, { "epoch": 0.31, "grad_norm": 0.5402635931968689, "learning_rate": 0.0005839678086199076, "loss": 3.6001, "step": 6399 }, { "epoch": 0.31, "grad_norm": 0.6085510849952698, "learning_rate": 0.0005839628425180714, "loss": 3.303, "step": 6400 }, { "epoch": 0.31, "grad_norm": 0.626964271068573, "learning_rate": 0.0005839578756683318, "loss": 3.3263, "step": 6401 }, { "epoch": 0.31, "grad_norm": 0.5337768197059631, "learning_rate": 0.0005839529080707019, "loss": 3.4304, "step": 6402 }, { "epoch": 0.31, "grad_norm": 0.5228325128555298, "learning_rate": 0.0005839479397251946, "loss": 3.2449, "step": 6403 }, { "epoch": 0.31, "grad_norm": 0.565669596195221, "learning_rate": 0.000583942970631823, "loss": 3.3662, "step": 6404 }, { "epoch": 0.31, "grad_norm": 0.5665708780288696, "learning_rate": 0.0005839380007906003, "loss": 3.339, "step": 6405 }, { "epoch": 0.31, "grad_norm": 0.530850350856781, "learning_rate": 0.0005839330302015396, "loss": 3.4299, "step": 6406 }, { "epoch": 0.31, "grad_norm": 0.5689116716384888, "learning_rate": 0.000583928058864654, "loss": 3.4183, "step": 6407 }, { "epoch": 0.31, "grad_norm": 0.5822932720184326, "learning_rate": 0.0005839230867799565, "loss": 3.3617, "step": 6408 }, { "epoch": 0.31, "grad_norm": 0.5526020526885986, "learning_rate": 0.0005839181139474601, "loss": 3.3813, "step": 6409 }, { "epoch": 0.31, "grad_norm": 0.5911718606948853, "learning_rate": 0.0005839131403671782, "loss": 3.3916, "step": 6410 }, { "epoch": 0.31, "grad_norm": 0.501130223274231, "learning_rate": 0.0005839081660391236, "loss": 3.2104, "step": 6411 }, { "epoch": 0.31, "grad_norm": 0.5649345517158508, "learning_rate": 0.0005839031909633096, "loss": 3.3394, "step": 6412 }, { "epoch": 0.31, "grad_norm": 0.5334262847900391, "learning_rate": 0.0005838982151397492, "loss": 3.2688, "step": 6413 }, { "epoch": 0.31, "grad_norm": 0.5680837035179138, "learning_rate": 0.0005838932385684556, "loss": 3.5027, "step": 6414 }, { "epoch": 0.31, "grad_norm": 0.5568577647209167, "learning_rate": 0.0005838882612494417, "loss": 3.3118, "step": 6415 }, { "epoch": 0.31, "grad_norm": 0.5233654379844666, "learning_rate": 0.0005838832831827209, "loss": 3.3249, "step": 6416 }, { "epoch": 0.31, "grad_norm": 0.5565677881240845, "learning_rate": 0.0005838783043683062, "loss": 3.257, "step": 6417 }, { "epoch": 0.31, "grad_norm": 0.588458776473999, "learning_rate": 0.0005838733248062105, "loss": 3.3794, "step": 6418 }, { "epoch": 0.31, "grad_norm": 0.543366551399231, "learning_rate": 0.0005838683444964473, "loss": 3.4565, "step": 6419 }, { "epoch": 0.31, "grad_norm": 0.508150041103363, "learning_rate": 0.0005838633634390293, "loss": 3.2742, "step": 6420 }, { "epoch": 0.31, "grad_norm": 0.5089104175567627, "learning_rate": 0.00058385838163397, "loss": 3.1958, "step": 6421 }, { "epoch": 0.31, "grad_norm": 0.5183136463165283, "learning_rate": 0.0005838533990812822, "loss": 3.4799, "step": 6422 }, { "epoch": 0.31, "grad_norm": 0.5491008758544922, "learning_rate": 0.0005838484157809794, "loss": 3.281, "step": 6423 }, { "epoch": 0.31, "grad_norm": 0.5273602604866028, "learning_rate": 0.0005838434317330743, "loss": 3.5572, "step": 6424 }, { "epoch": 0.31, "grad_norm": 0.5344679951667786, "learning_rate": 0.0005838384469375803, "loss": 3.3987, "step": 6425 }, { "epoch": 0.31, "grad_norm": 0.5330580472946167, "learning_rate": 0.0005838334613945105, "loss": 3.1804, "step": 6426 }, { "epoch": 0.31, "grad_norm": 0.5547220706939697, "learning_rate": 0.0005838284751038779, "loss": 3.6428, "step": 6427 }, { "epoch": 0.32, "grad_norm": 0.5338019728660583, "learning_rate": 0.0005838234880656957, "loss": 3.3878, "step": 6428 }, { "epoch": 0.32, "grad_norm": 0.54743891954422, "learning_rate": 0.0005838185002799771, "loss": 3.4011, "step": 6429 }, { "epoch": 0.32, "grad_norm": 0.5494392514228821, "learning_rate": 0.0005838135117467352, "loss": 3.3812, "step": 6430 }, { "epoch": 0.32, "grad_norm": 0.6107029318809509, "learning_rate": 0.0005838085224659832, "loss": 3.3056, "step": 6431 }, { "epoch": 0.32, "grad_norm": 0.5496015548706055, "learning_rate": 0.0005838035324377341, "loss": 3.3015, "step": 6432 }, { "epoch": 0.32, "grad_norm": 0.5744660496711731, "learning_rate": 0.0005837985416620011, "loss": 3.6286, "step": 6433 }, { "epoch": 0.32, "grad_norm": 0.5377488732337952, "learning_rate": 0.0005837935501387975, "loss": 3.4111, "step": 6434 }, { "epoch": 0.32, "grad_norm": 0.5246378779411316, "learning_rate": 0.0005837885578681361, "loss": 3.4557, "step": 6435 }, { "epoch": 0.32, "grad_norm": 0.5327603220939636, "learning_rate": 0.0005837835648500304, "loss": 3.3453, "step": 6436 }, { "epoch": 0.32, "grad_norm": 0.5735540986061096, "learning_rate": 0.0005837785710844934, "loss": 3.4068, "step": 6437 }, { "epoch": 0.32, "grad_norm": 0.5326542854309082, "learning_rate": 0.0005837735765715381, "loss": 3.5506, "step": 6438 }, { "epoch": 0.32, "grad_norm": 0.6021717190742493, "learning_rate": 0.000583768581311178, "loss": 3.4768, "step": 6439 }, { "epoch": 0.32, "grad_norm": 0.5350591540336609, "learning_rate": 0.0005837635853034259, "loss": 3.3959, "step": 6440 }, { "epoch": 0.32, "grad_norm": 0.5394638180732727, "learning_rate": 0.0005837585885482953, "loss": 3.4337, "step": 6441 }, { "epoch": 0.32, "grad_norm": 0.5362926125526428, "learning_rate": 0.0005837535910457991, "loss": 3.3943, "step": 6442 }, { "epoch": 0.32, "grad_norm": 0.5286582112312317, "learning_rate": 0.0005837485927959505, "loss": 3.4177, "step": 6443 }, { "epoch": 0.32, "grad_norm": 0.5231122970581055, "learning_rate": 0.0005837435937987628, "loss": 3.5713, "step": 6444 }, { "epoch": 0.32, "grad_norm": 0.541363537311554, "learning_rate": 0.0005837385940542491, "loss": 3.2653, "step": 6445 }, { "epoch": 0.32, "grad_norm": 0.5405844449996948, "learning_rate": 0.0005837335935624225, "loss": 3.4653, "step": 6446 }, { "epoch": 0.32, "grad_norm": 0.5295756459236145, "learning_rate": 0.0005837285923232963, "loss": 3.3157, "step": 6447 }, { "epoch": 0.32, "grad_norm": 0.5377822518348694, "learning_rate": 0.0005837235903368834, "loss": 3.3217, "step": 6448 }, { "epoch": 0.32, "grad_norm": 0.5518754720687866, "learning_rate": 0.0005837185876031973, "loss": 3.2826, "step": 6449 }, { "epoch": 0.32, "grad_norm": 0.5562870502471924, "learning_rate": 0.000583713584122251, "loss": 3.1871, "step": 6450 }, { "epoch": 0.32, "grad_norm": 0.5364195704460144, "learning_rate": 0.0005837085798940577, "loss": 3.4522, "step": 6451 }, { "epoch": 0.32, "grad_norm": 0.5666096210479736, "learning_rate": 0.0005837035749186307, "loss": 3.5498, "step": 6452 }, { "epoch": 0.32, "grad_norm": 0.5489985942840576, "learning_rate": 0.000583698569195983, "loss": 3.2855, "step": 6453 }, { "epoch": 0.32, "grad_norm": 0.5230780839920044, "learning_rate": 0.0005836935627261279, "loss": 3.4296, "step": 6454 }, { "epoch": 0.32, "grad_norm": 0.5031773447990417, "learning_rate": 0.0005836885555090786, "loss": 3.455, "step": 6455 }, { "epoch": 0.32, "grad_norm": 0.5535476207733154, "learning_rate": 0.0005836835475448482, "loss": 3.2767, "step": 6456 }, { "epoch": 0.32, "grad_norm": 0.5760499835014343, "learning_rate": 0.0005836785388334499, "loss": 3.5469, "step": 6457 }, { "epoch": 0.32, "grad_norm": 0.5332911610603333, "learning_rate": 0.0005836735293748969, "loss": 3.4841, "step": 6458 }, { "epoch": 0.32, "grad_norm": 0.5543957352638245, "learning_rate": 0.0005836685191692026, "loss": 3.3279, "step": 6459 }, { "epoch": 0.32, "grad_norm": 0.576471209526062, "learning_rate": 0.0005836635082163798, "loss": 3.4943, "step": 6460 }, { "epoch": 0.32, "grad_norm": 0.5583447813987732, "learning_rate": 0.000583658496516442, "loss": 3.3526, "step": 6461 }, { "epoch": 0.32, "grad_norm": 0.5250188112258911, "learning_rate": 0.0005836534840694024, "loss": 3.4248, "step": 6462 }, { "epoch": 0.32, "grad_norm": 0.5952808856964111, "learning_rate": 0.0005836484708752739, "loss": 3.368, "step": 6463 }, { "epoch": 0.32, "grad_norm": 0.5756708979606628, "learning_rate": 0.0005836434569340701, "loss": 3.3606, "step": 6464 }, { "epoch": 0.32, "grad_norm": 0.5491698384284973, "learning_rate": 0.000583638442245804, "loss": 3.4689, "step": 6465 }, { "epoch": 0.32, "grad_norm": 0.5349763631820679, "learning_rate": 0.0005836334268104887, "loss": 3.4542, "step": 6466 }, { "epoch": 0.32, "grad_norm": 0.5523869395256042, "learning_rate": 0.0005836284106281377, "loss": 3.3126, "step": 6467 }, { "epoch": 0.32, "grad_norm": 0.5572420358657837, "learning_rate": 0.0005836233936987639, "loss": 3.4631, "step": 6468 }, { "epoch": 0.32, "grad_norm": 0.5321294665336609, "learning_rate": 0.0005836183760223808, "loss": 3.3318, "step": 6469 }, { "epoch": 0.32, "grad_norm": 0.5739319324493408, "learning_rate": 0.0005836133575990015, "loss": 3.5455, "step": 6470 }, { "epoch": 0.32, "grad_norm": 0.5573357939720154, "learning_rate": 0.0005836083384286391, "loss": 3.3142, "step": 6471 }, { "epoch": 0.32, "grad_norm": 0.6728717684745789, "learning_rate": 0.0005836033185113069, "loss": 3.4165, "step": 6472 }, { "epoch": 0.32, "grad_norm": 0.5487765073776245, "learning_rate": 0.0005835982978470182, "loss": 3.3554, "step": 6473 }, { "epoch": 0.32, "grad_norm": 0.5465099215507507, "learning_rate": 0.0005835932764357861, "loss": 3.2763, "step": 6474 }, { "epoch": 0.32, "grad_norm": 0.5527744293212891, "learning_rate": 0.0005835882542776239, "loss": 3.2369, "step": 6475 }, { "epoch": 0.32, "grad_norm": 0.6429268717765808, "learning_rate": 0.0005835832313725449, "loss": 3.3702, "step": 6476 }, { "epoch": 0.32, "grad_norm": 0.5672469735145569, "learning_rate": 0.0005835782077205623, "loss": 3.4388, "step": 6477 }, { "epoch": 0.32, "grad_norm": 0.5245232582092285, "learning_rate": 0.0005835731833216891, "loss": 3.5464, "step": 6478 }, { "epoch": 0.32, "grad_norm": 0.5858415961265564, "learning_rate": 0.0005835681581759387, "loss": 3.3392, "step": 6479 }, { "epoch": 0.32, "grad_norm": 0.5938719511032104, "learning_rate": 0.0005835631322833244, "loss": 3.3036, "step": 6480 }, { "epoch": 0.32, "grad_norm": 0.5960376262664795, "learning_rate": 0.0005835581056438595, "loss": 3.3089, "step": 6481 }, { "epoch": 0.32, "grad_norm": 0.5122845768928528, "learning_rate": 0.000583553078257557, "loss": 3.4641, "step": 6482 }, { "epoch": 0.32, "grad_norm": 0.5184789299964905, "learning_rate": 0.0005835480501244302, "loss": 3.2639, "step": 6483 }, { "epoch": 0.32, "grad_norm": 0.5567031502723694, "learning_rate": 0.0005835430212444927, "loss": 3.6092, "step": 6484 }, { "epoch": 0.32, "grad_norm": 0.6219637393951416, "learning_rate": 0.0005835379916177572, "loss": 3.4196, "step": 6485 }, { "epoch": 0.32, "grad_norm": 0.5328686833381653, "learning_rate": 0.0005835329612442372, "loss": 3.523, "step": 6486 }, { "epoch": 0.32, "grad_norm": 0.5400885939598083, "learning_rate": 0.000583527930123946, "loss": 3.0661, "step": 6487 }, { "epoch": 0.32, "grad_norm": 0.5412009954452515, "learning_rate": 0.0005835228982568968, "loss": 3.2229, "step": 6488 }, { "epoch": 0.32, "grad_norm": 0.5066078901290894, "learning_rate": 0.0005835178656431029, "loss": 3.2135, "step": 6489 }, { "epoch": 0.32, "grad_norm": 0.6025939583778381, "learning_rate": 0.0005835128322825774, "loss": 3.5742, "step": 6490 }, { "epoch": 0.32, "grad_norm": 0.5483773350715637, "learning_rate": 0.0005835077981753337, "loss": 3.4047, "step": 6491 }, { "epoch": 0.32, "grad_norm": 0.5667142868041992, "learning_rate": 0.0005835027633213851, "loss": 3.3673, "step": 6492 }, { "epoch": 0.32, "grad_norm": 0.5416460633277893, "learning_rate": 0.0005834977277207447, "loss": 3.5253, "step": 6493 }, { "epoch": 0.32, "grad_norm": 0.5411655902862549, "learning_rate": 0.0005834926913734259, "loss": 3.2967, "step": 6494 }, { "epoch": 0.32, "grad_norm": 0.5534458756446838, "learning_rate": 0.0005834876542794418, "loss": 3.2466, "step": 6495 }, { "epoch": 0.32, "grad_norm": 0.5487003922462463, "learning_rate": 0.0005834826164388059, "loss": 3.4169, "step": 6496 }, { "epoch": 0.32, "grad_norm": 0.5103386044502258, "learning_rate": 0.0005834775778515313, "loss": 3.4592, "step": 6497 }, { "epoch": 0.32, "grad_norm": 0.5426819324493408, "learning_rate": 0.0005834725385176312, "loss": 3.4416, "step": 6498 }, { "epoch": 0.32, "grad_norm": 0.5770543217658997, "learning_rate": 0.0005834674984371191, "loss": 3.3999, "step": 6499 }, { "epoch": 0.32, "grad_norm": 0.5331725478172302, "learning_rate": 0.0005834624576100082, "loss": 3.4512, "step": 6500 }, { "epoch": 0.32, "grad_norm": 0.5609892010688782, "learning_rate": 0.0005834574160363117, "loss": 3.461, "step": 6501 }, { "epoch": 0.32, "grad_norm": 0.5909136533737183, "learning_rate": 0.000583452373716043, "loss": 3.3132, "step": 6502 }, { "epoch": 0.32, "grad_norm": 0.55914705991745, "learning_rate": 0.0005834473306492152, "loss": 3.5681, "step": 6503 }, { "epoch": 0.32, "grad_norm": 0.5517709255218506, "learning_rate": 0.0005834422868358416, "loss": 3.3071, "step": 6504 }, { "epoch": 0.32, "grad_norm": 0.5129667520523071, "learning_rate": 0.0005834372422759358, "loss": 3.2362, "step": 6505 }, { "epoch": 0.32, "grad_norm": 0.5354728102684021, "learning_rate": 0.0005834321969695107, "loss": 3.3641, "step": 6506 }, { "epoch": 0.32, "grad_norm": 0.5102858543395996, "learning_rate": 0.0005834271509165798, "loss": 3.1551, "step": 6507 }, { "epoch": 0.32, "grad_norm": 0.5487684011459351, "learning_rate": 0.0005834221041171563, "loss": 3.5077, "step": 6508 }, { "epoch": 0.32, "grad_norm": 0.5984534025192261, "learning_rate": 0.0005834170565712535, "loss": 3.3783, "step": 6509 }, { "epoch": 0.32, "grad_norm": 0.5358244180679321, "learning_rate": 0.0005834120082788847, "loss": 3.3893, "step": 6510 }, { "epoch": 0.32, "grad_norm": 0.5399655103683472, "learning_rate": 0.0005834069592400632, "loss": 3.3156, "step": 6511 }, { "epoch": 0.32, "grad_norm": 0.5290285348892212, "learning_rate": 0.0005834019094548025, "loss": 3.1687, "step": 6512 }, { "epoch": 0.32, "grad_norm": 0.5532265901565552, "learning_rate": 0.0005833968589231155, "loss": 3.4857, "step": 6513 }, { "epoch": 0.32, "grad_norm": 0.5471756458282471, "learning_rate": 0.0005833918076450158, "loss": 3.2192, "step": 6514 }, { "epoch": 0.32, "grad_norm": 0.5379656553268433, "learning_rate": 0.0005833867556205165, "loss": 3.4739, "step": 6515 }, { "epoch": 0.32, "grad_norm": 0.5359077453613281, "learning_rate": 0.0005833817028496313, "loss": 3.3655, "step": 6516 }, { "epoch": 0.32, "grad_norm": 0.5260913968086243, "learning_rate": 0.000583376649332373, "loss": 3.315, "step": 6517 }, { "epoch": 0.32, "grad_norm": 0.5894081592559814, "learning_rate": 0.0005833715950687552, "loss": 3.1694, "step": 6518 }, { "epoch": 0.32, "grad_norm": 0.5539742708206177, "learning_rate": 0.0005833665400587911, "loss": 3.3007, "step": 6519 }, { "epoch": 0.32, "grad_norm": 0.506983757019043, "learning_rate": 0.0005833614843024942, "loss": 3.3166, "step": 6520 }, { "epoch": 0.32, "grad_norm": 0.5148934721946716, "learning_rate": 0.0005833564277998776, "loss": 3.3476, "step": 6521 }, { "epoch": 0.32, "grad_norm": 0.5183430910110474, "learning_rate": 0.0005833513705509547, "loss": 3.5182, "step": 6522 }, { "epoch": 0.32, "grad_norm": 0.5564296841621399, "learning_rate": 0.0005833463125557389, "loss": 3.2431, "step": 6523 }, { "epoch": 0.32, "grad_norm": 0.5783804655075073, "learning_rate": 0.0005833412538142433, "loss": 3.3268, "step": 6524 }, { "epoch": 0.32, "grad_norm": 0.5203677415847778, "learning_rate": 0.0005833361943264815, "loss": 3.3579, "step": 6525 }, { "epoch": 0.32, "grad_norm": 0.5871049165725708, "learning_rate": 0.0005833311340924666, "loss": 3.3097, "step": 6526 }, { "epoch": 0.32, "grad_norm": 0.556999146938324, "learning_rate": 0.0005833260731122121, "loss": 3.254, "step": 6527 }, { "epoch": 0.32, "grad_norm": 0.5999124646186829, "learning_rate": 0.0005833210113857313, "loss": 3.439, "step": 6528 }, { "epoch": 0.32, "grad_norm": 0.5835153460502625, "learning_rate": 0.0005833159489130373, "loss": 3.4526, "step": 6529 }, { "epoch": 0.32, "grad_norm": 0.521670401096344, "learning_rate": 0.0005833108856941438, "loss": 3.4603, "step": 6530 }, { "epoch": 0.32, "grad_norm": 0.5895057916641235, "learning_rate": 0.0005833058217290638, "loss": 3.4598, "step": 6531 }, { "epoch": 0.32, "grad_norm": 0.5201472043991089, "learning_rate": 0.0005833007570178109, "loss": 3.5078, "step": 6532 }, { "epoch": 0.32, "grad_norm": 0.5210258364677429, "learning_rate": 0.0005832956915603982, "loss": 3.3483, "step": 6533 }, { "epoch": 0.32, "grad_norm": 0.5231567621231079, "learning_rate": 0.0005832906253568392, "loss": 3.4721, "step": 6534 }, { "epoch": 0.32, "grad_norm": 0.5170723795890808, "learning_rate": 0.0005832855584071474, "loss": 3.3227, "step": 6535 }, { "epoch": 0.32, "grad_norm": 0.5135728716850281, "learning_rate": 0.0005832804907113358, "loss": 3.1659, "step": 6536 }, { "epoch": 0.32, "grad_norm": 0.5827286243438721, "learning_rate": 0.0005832754222694179, "loss": 3.3854, "step": 6537 }, { "epoch": 0.32, "grad_norm": 0.5649005174636841, "learning_rate": 0.000583270353081407, "loss": 3.2509, "step": 6538 }, { "epoch": 0.32, "grad_norm": 0.585883378982544, "learning_rate": 0.0005832652831473166, "loss": 3.3499, "step": 6539 }, { "epoch": 0.32, "grad_norm": 0.5253201723098755, "learning_rate": 0.00058326021246716, "loss": 3.4087, "step": 6540 }, { "epoch": 0.32, "grad_norm": 0.4951026141643524, "learning_rate": 0.0005832551410409505, "loss": 3.4144, "step": 6541 }, { "epoch": 0.32, "grad_norm": 0.5574667453765869, "learning_rate": 0.0005832500688687014, "loss": 3.2957, "step": 6542 }, { "epoch": 0.32, "grad_norm": 0.5781322121620178, "learning_rate": 0.0005832449959504262, "loss": 3.3506, "step": 6543 }, { "epoch": 0.32, "grad_norm": 0.6145309209823608, "learning_rate": 0.0005832399222861381, "loss": 3.4637, "step": 6544 }, { "epoch": 0.32, "grad_norm": 0.5183820724487305, "learning_rate": 0.0005832348478758507, "loss": 3.5608, "step": 6545 }, { "epoch": 0.32, "grad_norm": 0.5703016519546509, "learning_rate": 0.0005832297727195771, "loss": 3.3926, "step": 6546 }, { "epoch": 0.32, "grad_norm": 0.5798136591911316, "learning_rate": 0.0005832246968173309, "loss": 3.3352, "step": 6547 }, { "epoch": 0.32, "grad_norm": 0.5665884017944336, "learning_rate": 0.0005832196201691252, "loss": 3.2569, "step": 6548 }, { "epoch": 0.32, "grad_norm": 0.5170712471008301, "learning_rate": 0.0005832145427749737, "loss": 3.4392, "step": 6549 }, { "epoch": 0.32, "grad_norm": 0.5397904515266418, "learning_rate": 0.0005832094646348894, "loss": 3.5144, "step": 6550 }, { "epoch": 0.32, "grad_norm": 0.6113349795341492, "learning_rate": 0.000583204385748886, "loss": 3.3113, "step": 6551 }, { "epoch": 0.32, "grad_norm": 0.5789533853530884, "learning_rate": 0.0005831993061169768, "loss": 3.2817, "step": 6552 }, { "epoch": 0.32, "grad_norm": 0.5456516742706299, "learning_rate": 0.000583194225739175, "loss": 3.327, "step": 6553 }, { "epoch": 0.32, "grad_norm": 0.5421919822692871, "learning_rate": 0.0005831891446154942, "loss": 3.2764, "step": 6554 }, { "epoch": 0.32, "grad_norm": 0.5785422325134277, "learning_rate": 0.0005831840627459476, "loss": 3.0956, "step": 6555 }, { "epoch": 0.32, "grad_norm": 0.5162643194198608, "learning_rate": 0.0005831789801305488, "loss": 3.4063, "step": 6556 }, { "epoch": 0.32, "grad_norm": 0.6994883418083191, "learning_rate": 0.0005831738967693109, "loss": 3.3117, "step": 6557 }, { "epoch": 0.32, "grad_norm": 0.5696769952774048, "learning_rate": 0.0005831688126622477, "loss": 3.2208, "step": 6558 }, { "epoch": 0.32, "grad_norm": 0.5177335739135742, "learning_rate": 0.0005831637278093722, "loss": 3.2212, "step": 6559 }, { "epoch": 0.32, "grad_norm": 0.5111486315727234, "learning_rate": 0.000583158642210698, "loss": 3.4638, "step": 6560 }, { "epoch": 0.32, "grad_norm": 0.5310366153717041, "learning_rate": 0.0005831535558662383, "loss": 3.2656, "step": 6561 }, { "epoch": 0.32, "grad_norm": 0.5311713218688965, "learning_rate": 0.0005831484687760067, "loss": 3.6579, "step": 6562 }, { "epoch": 0.32, "grad_norm": 0.6105450987815857, "learning_rate": 0.0005831433809400166, "loss": 3.3503, "step": 6563 }, { "epoch": 0.32, "grad_norm": 0.5762481689453125, "learning_rate": 0.0005831382923582812, "loss": 3.4268, "step": 6564 }, { "epoch": 0.32, "grad_norm": 0.5664306879043579, "learning_rate": 0.0005831332030308142, "loss": 3.2995, "step": 6565 }, { "epoch": 0.32, "grad_norm": 0.5523332953453064, "learning_rate": 0.0005831281129576286, "loss": 3.0233, "step": 6566 }, { "epoch": 0.32, "grad_norm": 0.5818634033203125, "learning_rate": 0.0005831230221387382, "loss": 3.4713, "step": 6567 }, { "epoch": 0.32, "grad_norm": 0.5426694750785828, "learning_rate": 0.0005831179305741562, "loss": 3.3265, "step": 6568 }, { "epoch": 0.32, "grad_norm": 0.5816428661346436, "learning_rate": 0.000583112838263896, "loss": 3.1057, "step": 6569 }, { "epoch": 0.32, "grad_norm": 0.5406796932220459, "learning_rate": 0.0005831077452079712, "loss": 2.973, "step": 6570 }, { "epoch": 0.32, "grad_norm": 0.5280852317810059, "learning_rate": 0.000583102651406395, "loss": 3.3631, "step": 6571 }, { "epoch": 0.32, "grad_norm": 0.5578991174697876, "learning_rate": 0.0005830975568591809, "loss": 3.3991, "step": 6572 }, { "epoch": 0.32, "grad_norm": 0.585803210735321, "learning_rate": 0.0005830924615663423, "loss": 3.4668, "step": 6573 }, { "epoch": 0.32, "grad_norm": 0.5431290864944458, "learning_rate": 0.0005830873655278927, "loss": 3.1047, "step": 6574 }, { "epoch": 0.32, "grad_norm": 0.5530149340629578, "learning_rate": 0.0005830822687438455, "loss": 3.4276, "step": 6575 }, { "epoch": 0.32, "grad_norm": 0.5447601675987244, "learning_rate": 0.000583077171214214, "loss": 3.2974, "step": 6576 }, { "epoch": 0.32, "grad_norm": 0.5636044144630432, "learning_rate": 0.0005830720729390118, "loss": 3.1732, "step": 6577 }, { "epoch": 0.32, "grad_norm": 0.5744757056236267, "learning_rate": 0.0005830669739182522, "loss": 3.3084, "step": 6578 }, { "epoch": 0.32, "grad_norm": 0.5662171840667725, "learning_rate": 0.0005830618741519486, "loss": 3.5397, "step": 6579 }, { "epoch": 0.32, "grad_norm": 0.5077491998672485, "learning_rate": 0.0005830567736401145, "loss": 2.9837, "step": 6580 }, { "epoch": 0.32, "grad_norm": 0.5053831338882446, "learning_rate": 0.0005830516723827634, "loss": 3.2128, "step": 6581 }, { "epoch": 0.32, "grad_norm": 0.5454685688018799, "learning_rate": 0.0005830465703799086, "loss": 3.5079, "step": 6582 }, { "epoch": 0.32, "grad_norm": 0.5580922961235046, "learning_rate": 0.0005830414676315636, "loss": 3.4775, "step": 6583 }, { "epoch": 0.32, "grad_norm": 0.5592604279518127, "learning_rate": 0.000583036364137742, "loss": 3.2971, "step": 6584 }, { "epoch": 0.32, "grad_norm": 0.5692855715751648, "learning_rate": 0.0005830312598984569, "loss": 3.1209, "step": 6585 }, { "epoch": 0.32, "grad_norm": 0.5731725692749023, "learning_rate": 0.0005830261549137221, "loss": 3.2845, "step": 6586 }, { "epoch": 0.32, "grad_norm": 0.5284132957458496, "learning_rate": 0.0005830210491835508, "loss": 3.438, "step": 6587 }, { "epoch": 0.32, "grad_norm": 0.5840345621109009, "learning_rate": 0.0005830159427079565, "loss": 3.4824, "step": 6588 }, { "epoch": 0.32, "grad_norm": 0.5384155511856079, "learning_rate": 0.0005830108354869528, "loss": 3.466, "step": 6589 }, { "epoch": 0.32, "grad_norm": 0.5243905782699585, "learning_rate": 0.0005830057275205529, "loss": 3.3626, "step": 6590 }, { "epoch": 0.32, "grad_norm": 0.4902263581752777, "learning_rate": 0.0005830006188087704, "loss": 3.22, "step": 6591 }, { "epoch": 0.32, "grad_norm": 0.5410398840904236, "learning_rate": 0.0005829955093516187, "loss": 3.317, "step": 6592 }, { "epoch": 0.32, "grad_norm": 0.5666494965553284, "learning_rate": 0.0005829903991491113, "loss": 3.5118, "step": 6593 }, { "epoch": 0.32, "grad_norm": 0.5500527024269104, "learning_rate": 0.0005829852882012616, "loss": 3.1609, "step": 6594 }, { "epoch": 0.32, "grad_norm": 0.5293498039245605, "learning_rate": 0.0005829801765080833, "loss": 3.5884, "step": 6595 }, { "epoch": 0.32, "grad_norm": 0.5184808969497681, "learning_rate": 0.0005829750640695896, "loss": 3.4994, "step": 6596 }, { "epoch": 0.32, "grad_norm": 0.5679014921188354, "learning_rate": 0.000582969950885794, "loss": 3.182, "step": 6597 }, { "epoch": 0.32, "grad_norm": 0.5611880421638489, "learning_rate": 0.0005829648369567099, "loss": 3.3356, "step": 6598 }, { "epoch": 0.32, "grad_norm": 0.5921120643615723, "learning_rate": 0.0005829597222823511, "loss": 3.2854, "step": 6599 }, { "epoch": 0.32, "grad_norm": 0.62581467628479, "learning_rate": 0.0005829546068627306, "loss": 3.3484, "step": 6600 }, { "epoch": 0.32, "grad_norm": 0.554572343826294, "learning_rate": 0.0005829494906978624, "loss": 3.4792, "step": 6601 }, { "epoch": 0.32, "grad_norm": 0.535103976726532, "learning_rate": 0.0005829443737877595, "loss": 3.5559, "step": 6602 }, { "epoch": 0.32, "grad_norm": 0.5401231050491333, "learning_rate": 0.0005829392561324357, "loss": 3.3311, "step": 6603 }, { "epoch": 0.32, "grad_norm": 0.5672370791435242, "learning_rate": 0.0005829341377319042, "loss": 3.452, "step": 6604 }, { "epoch": 0.32, "grad_norm": 0.5229573845863342, "learning_rate": 0.0005829290185861788, "loss": 3.1934, "step": 6605 }, { "epoch": 0.32, "grad_norm": 0.5550198554992676, "learning_rate": 0.0005829238986952728, "loss": 3.2653, "step": 6606 }, { "epoch": 0.32, "grad_norm": 0.4899026155471802, "learning_rate": 0.0005829187780591995, "loss": 3.5498, "step": 6607 }, { "epoch": 0.32, "grad_norm": 0.5529863238334656, "learning_rate": 0.0005829136566779727, "loss": 3.4041, "step": 6608 }, { "epoch": 0.32, "grad_norm": 0.5543391704559326, "learning_rate": 0.0005829085345516058, "loss": 3.5464, "step": 6609 }, { "epoch": 0.32, "grad_norm": 0.5163125991821289, "learning_rate": 0.0005829034116801123, "loss": 3.556, "step": 6610 }, { "epoch": 0.32, "grad_norm": 0.5830318331718445, "learning_rate": 0.0005828982880635057, "loss": 3.4059, "step": 6611 }, { "epoch": 0.32, "grad_norm": 0.5730868577957153, "learning_rate": 0.0005828931637017992, "loss": 3.2501, "step": 6612 }, { "epoch": 0.32, "grad_norm": 0.5114001631736755, "learning_rate": 0.0005828880385950068, "loss": 3.3564, "step": 6613 }, { "epoch": 0.32, "grad_norm": 0.5583293437957764, "learning_rate": 0.0005828829127431418, "loss": 3.1739, "step": 6614 }, { "epoch": 0.32, "grad_norm": 0.5196751952171326, "learning_rate": 0.0005828777861462174, "loss": 3.4315, "step": 6615 }, { "epoch": 0.32, "grad_norm": 0.5976796746253967, "learning_rate": 0.0005828726588042475, "loss": 3.3738, "step": 6616 }, { "epoch": 0.32, "grad_norm": 0.5232165455818176, "learning_rate": 0.0005828675307172455, "loss": 3.3511, "step": 6617 }, { "epoch": 0.32, "grad_norm": 0.5699440240859985, "learning_rate": 0.0005828624018852247, "loss": 3.4635, "step": 6618 }, { "epoch": 0.32, "grad_norm": 0.5138837099075317, "learning_rate": 0.0005828572723081989, "loss": 3.33, "step": 6619 }, { "epoch": 0.32, "grad_norm": 0.5367459654808044, "learning_rate": 0.0005828521419861814, "loss": 3.3618, "step": 6620 }, { "epoch": 0.32, "grad_norm": 0.5628336668014526, "learning_rate": 0.0005828470109191859, "loss": 3.4253, "step": 6621 }, { "epoch": 0.32, "grad_norm": 0.5682950615882874, "learning_rate": 0.0005828418791072258, "loss": 3.3819, "step": 6622 }, { "epoch": 0.32, "grad_norm": 0.5145696401596069, "learning_rate": 0.0005828367465503145, "loss": 3.409, "step": 6623 }, { "epoch": 0.32, "grad_norm": 0.5609208345413208, "learning_rate": 0.0005828316132484657, "loss": 3.471, "step": 6624 }, { "epoch": 0.32, "grad_norm": 0.514314591884613, "learning_rate": 0.0005828264792016929, "loss": 3.4783, "step": 6625 }, { "epoch": 0.32, "grad_norm": 0.5764790773391724, "learning_rate": 0.0005828213444100096, "loss": 3.5846, "step": 6626 }, { "epoch": 0.32, "grad_norm": 0.5133751630783081, "learning_rate": 0.0005828162088734293, "loss": 3.4859, "step": 6627 }, { "epoch": 0.32, "grad_norm": 0.5235024094581604, "learning_rate": 0.0005828110725919655, "loss": 3.3398, "step": 6628 }, { "epoch": 0.32, "grad_norm": 0.5416542291641235, "learning_rate": 0.0005828059355656317, "loss": 3.279, "step": 6629 }, { "epoch": 0.32, "grad_norm": 0.5646631121635437, "learning_rate": 0.0005828007977944416, "loss": 3.3315, "step": 6630 }, { "epoch": 0.32, "grad_norm": 0.5523773431777954, "learning_rate": 0.0005827956592784087, "loss": 3.3439, "step": 6631 }, { "epoch": 0.33, "grad_norm": 0.5373170375823975, "learning_rate": 0.0005827905200175463, "loss": 3.3712, "step": 6632 }, { "epoch": 0.33, "grad_norm": 0.5419214367866516, "learning_rate": 0.0005827853800118682, "loss": 3.3848, "step": 6633 }, { "epoch": 0.33, "grad_norm": 0.5145456194877625, "learning_rate": 0.0005827802392613877, "loss": 3.4817, "step": 6634 }, { "epoch": 0.33, "grad_norm": 0.5141541957855225, "learning_rate": 0.0005827750977661184, "loss": 3.3013, "step": 6635 }, { "epoch": 0.33, "grad_norm": 0.519413948059082, "learning_rate": 0.0005827699555260742, "loss": 3.2296, "step": 6636 }, { "epoch": 0.33, "grad_norm": 0.5240925550460815, "learning_rate": 0.0005827648125412683, "loss": 3.3045, "step": 6637 }, { "epoch": 0.33, "grad_norm": 0.5714395046234131, "learning_rate": 0.0005827596688117141, "loss": 3.1928, "step": 6638 }, { "epoch": 0.33, "grad_norm": 0.5431802868843079, "learning_rate": 0.0005827545243374256, "loss": 3.2881, "step": 6639 }, { "epoch": 0.33, "grad_norm": 0.542210578918457, "learning_rate": 0.0005827493791184159, "loss": 3.4252, "step": 6640 }, { "epoch": 0.33, "grad_norm": 0.5310395956039429, "learning_rate": 0.0005827442331546987, "loss": 3.4693, "step": 6641 }, { "epoch": 0.33, "grad_norm": 0.5362105369567871, "learning_rate": 0.0005827390864462878, "loss": 3.5127, "step": 6642 }, { "epoch": 0.33, "grad_norm": 0.5233688354492188, "learning_rate": 0.0005827339389931966, "loss": 3.5151, "step": 6643 }, { "epoch": 0.33, "grad_norm": 0.5598340630531311, "learning_rate": 0.0005827287907954385, "loss": 3.3003, "step": 6644 }, { "epoch": 0.33, "grad_norm": 0.5226010680198669, "learning_rate": 0.0005827236418530272, "loss": 3.3553, "step": 6645 }, { "epoch": 0.33, "grad_norm": 0.6050033569335938, "learning_rate": 0.0005827184921659761, "loss": 3.1462, "step": 6646 }, { "epoch": 0.33, "grad_norm": 0.5034282207489014, "learning_rate": 0.0005827133417342991, "loss": 3.5462, "step": 6647 }, { "epoch": 0.33, "grad_norm": 0.5270561575889587, "learning_rate": 0.0005827081905580095, "loss": 3.5129, "step": 6648 }, { "epoch": 0.33, "grad_norm": 0.5257344245910645, "learning_rate": 0.0005827030386371209, "loss": 3.3749, "step": 6649 }, { "epoch": 0.33, "grad_norm": 0.6640146374702454, "learning_rate": 0.0005826978859716469, "loss": 3.0764, "step": 6650 }, { "epoch": 0.33, "grad_norm": 0.5421690940856934, "learning_rate": 0.0005826927325616012, "loss": 3.2901, "step": 6651 }, { "epoch": 0.33, "grad_norm": 0.5615790486335754, "learning_rate": 0.0005826875784069971, "loss": 3.3495, "step": 6652 }, { "epoch": 0.33, "grad_norm": 0.5812866687774658, "learning_rate": 0.0005826824235078484, "loss": 3.6013, "step": 6653 }, { "epoch": 0.33, "grad_norm": 0.5673434138298035, "learning_rate": 0.0005826772678641685, "loss": 3.313, "step": 6654 }, { "epoch": 0.33, "grad_norm": 0.5085522532463074, "learning_rate": 0.0005826721114759711, "loss": 3.3503, "step": 6655 }, { "epoch": 0.33, "grad_norm": 0.5811455249786377, "learning_rate": 0.0005826669543432699, "loss": 3.5832, "step": 6656 }, { "epoch": 0.33, "grad_norm": 0.584208607673645, "learning_rate": 0.0005826617964660783, "loss": 3.2827, "step": 6657 }, { "epoch": 0.33, "grad_norm": 0.547170877456665, "learning_rate": 0.0005826566378444099, "loss": 3.2459, "step": 6658 }, { "epoch": 0.33, "grad_norm": 0.5642180442810059, "learning_rate": 0.0005826514784782783, "loss": 3.3707, "step": 6659 }, { "epoch": 0.33, "grad_norm": 0.5796110033988953, "learning_rate": 0.000582646318367697, "loss": 3.4914, "step": 6660 }, { "epoch": 0.33, "grad_norm": 0.5225422978401184, "learning_rate": 0.0005826411575126798, "loss": 3.3454, "step": 6661 }, { "epoch": 0.33, "grad_norm": 0.5265496969223022, "learning_rate": 0.0005826359959132402, "loss": 3.2391, "step": 6662 }, { "epoch": 0.33, "grad_norm": 0.48403891921043396, "learning_rate": 0.0005826308335693919, "loss": 3.2319, "step": 6663 }, { "epoch": 0.33, "grad_norm": 0.5459921360015869, "learning_rate": 0.0005826256704811481, "loss": 3.488, "step": 6664 }, { "epoch": 0.33, "grad_norm": 0.524363100528717, "learning_rate": 0.0005826205066485229, "loss": 3.5475, "step": 6665 }, { "epoch": 0.33, "grad_norm": 0.5568280816078186, "learning_rate": 0.0005826153420715295, "loss": 3.3387, "step": 6666 }, { "epoch": 0.33, "grad_norm": 0.5115675330162048, "learning_rate": 0.0005826101767501818, "loss": 3.5857, "step": 6667 }, { "epoch": 0.33, "grad_norm": 0.5281095504760742, "learning_rate": 0.0005826050106844934, "loss": 3.3025, "step": 6668 }, { "epoch": 0.33, "grad_norm": 0.5385881066322327, "learning_rate": 0.0005825998438744775, "loss": 3.244, "step": 6669 }, { "epoch": 0.33, "grad_norm": 0.5694405436515808, "learning_rate": 0.0005825946763201482, "loss": 3.2063, "step": 6670 }, { "epoch": 0.33, "grad_norm": 0.5485799908638, "learning_rate": 0.0005825895080215188, "loss": 3.3736, "step": 6671 }, { "epoch": 0.33, "grad_norm": 0.5174115300178528, "learning_rate": 0.000582584338978603, "loss": 3.3348, "step": 6672 }, { "epoch": 0.33, "grad_norm": 0.5306274890899658, "learning_rate": 0.0005825791691914146, "loss": 3.4455, "step": 6673 }, { "epoch": 0.33, "grad_norm": 0.5434350371360779, "learning_rate": 0.000582573998659967, "loss": 3.4376, "step": 6674 }, { "epoch": 0.33, "grad_norm": 0.570429801940918, "learning_rate": 0.0005825688273842738, "loss": 3.3823, "step": 6675 }, { "epoch": 0.33, "grad_norm": 0.5130692720413208, "learning_rate": 0.0005825636553643488, "loss": 3.2729, "step": 6676 }, { "epoch": 0.33, "grad_norm": 0.5442194938659668, "learning_rate": 0.0005825584826002054, "loss": 3.547, "step": 6677 }, { "epoch": 0.33, "grad_norm": 0.6229523420333862, "learning_rate": 0.0005825533090918574, "loss": 3.3318, "step": 6678 }, { "epoch": 0.33, "grad_norm": 0.5505388975143433, "learning_rate": 0.0005825481348393183, "loss": 3.4383, "step": 6679 }, { "epoch": 0.33, "grad_norm": 0.5161044597625732, "learning_rate": 0.0005825429598426018, "loss": 3.3366, "step": 6680 }, { "epoch": 0.33, "grad_norm": 0.5270329117774963, "learning_rate": 0.0005825377841017215, "loss": 3.296, "step": 6681 }, { "epoch": 0.33, "grad_norm": 0.5343664884567261, "learning_rate": 0.0005825326076166912, "loss": 3.3817, "step": 6682 }, { "epoch": 0.33, "grad_norm": 0.5493436455726624, "learning_rate": 0.0005825274303875242, "loss": 3.3233, "step": 6683 }, { "epoch": 0.33, "grad_norm": 0.5526965260505676, "learning_rate": 0.0005825222524142345, "loss": 3.1901, "step": 6684 }, { "epoch": 0.33, "grad_norm": 0.5074105858802795, "learning_rate": 0.0005825170736968354, "loss": 3.3946, "step": 6685 }, { "epoch": 0.33, "grad_norm": 0.5705122947692871, "learning_rate": 0.0005825118942353408, "loss": 3.2233, "step": 6686 }, { "epoch": 0.33, "grad_norm": 0.5070473551750183, "learning_rate": 0.0005825067140297641, "loss": 3.2129, "step": 6687 }, { "epoch": 0.33, "grad_norm": 0.5425854325294495, "learning_rate": 0.0005825015330801192, "loss": 3.2749, "step": 6688 }, { "epoch": 0.33, "grad_norm": 0.5736327767372131, "learning_rate": 0.0005824963513864197, "loss": 3.2822, "step": 6689 }, { "epoch": 0.33, "grad_norm": 0.5530392527580261, "learning_rate": 0.0005824911689486791, "loss": 3.2252, "step": 6690 }, { "epoch": 0.33, "grad_norm": 0.5579785704612732, "learning_rate": 0.0005824859857669111, "loss": 3.6146, "step": 6691 }, { "epoch": 0.33, "grad_norm": 0.671022891998291, "learning_rate": 0.0005824808018411294, "loss": 3.1428, "step": 6692 }, { "epoch": 0.33, "grad_norm": 0.5243225693702698, "learning_rate": 0.0005824756171713477, "loss": 3.3311, "step": 6693 }, { "epoch": 0.33, "grad_norm": 0.6820229291915894, "learning_rate": 0.0005824704317575795, "loss": 3.2211, "step": 6694 }, { "epoch": 0.33, "grad_norm": 0.5242649912834167, "learning_rate": 0.0005824652455998385, "loss": 3.3226, "step": 6695 }, { "epoch": 0.33, "grad_norm": 0.5282347202301025, "learning_rate": 0.0005824600586981386, "loss": 3.4453, "step": 6696 }, { "epoch": 0.33, "grad_norm": 0.5148776173591614, "learning_rate": 0.0005824548710524931, "loss": 3.3774, "step": 6697 }, { "epoch": 0.33, "grad_norm": 0.5305414795875549, "learning_rate": 0.0005824496826629159, "loss": 3.282, "step": 6698 }, { "epoch": 0.33, "grad_norm": 1.0000241994857788, "learning_rate": 0.0005824444935294206, "loss": 3.4245, "step": 6699 }, { "epoch": 0.33, "grad_norm": 0.5258133411407471, "learning_rate": 0.0005824393036520208, "loss": 3.4044, "step": 6700 }, { "epoch": 0.33, "grad_norm": 0.6157311201095581, "learning_rate": 0.0005824341130307302, "loss": 3.3356, "step": 6701 }, { "epoch": 0.33, "grad_norm": 0.5688565969467163, "learning_rate": 0.0005824289216655626, "loss": 3.3015, "step": 6702 }, { "epoch": 0.33, "grad_norm": 0.5846591591835022, "learning_rate": 0.0005824237295565315, "loss": 3.5027, "step": 6703 }, { "epoch": 0.33, "grad_norm": 0.5892431139945984, "learning_rate": 0.0005824185367036507, "loss": 3.3725, "step": 6704 }, { "epoch": 0.33, "grad_norm": 0.5611383318901062, "learning_rate": 0.0005824133431069338, "loss": 3.0607, "step": 6705 }, { "epoch": 0.33, "grad_norm": 0.5581179857254028, "learning_rate": 0.0005824081487663945, "loss": 3.1806, "step": 6706 }, { "epoch": 0.33, "grad_norm": 0.5461733341217041, "learning_rate": 0.0005824029536820466, "loss": 3.1481, "step": 6707 }, { "epoch": 0.33, "grad_norm": 0.5615188479423523, "learning_rate": 0.0005823977578539035, "loss": 3.3157, "step": 6708 }, { "epoch": 0.33, "grad_norm": 0.5543760061264038, "learning_rate": 0.0005823925612819792, "loss": 3.1724, "step": 6709 }, { "epoch": 0.33, "grad_norm": 0.512069046497345, "learning_rate": 0.0005823873639662871, "loss": 3.476, "step": 6710 }, { "epoch": 0.33, "grad_norm": 0.7344093918800354, "learning_rate": 0.0005823821659068411, "loss": 3.3028, "step": 6711 }, { "epoch": 0.33, "grad_norm": 0.5303009152412415, "learning_rate": 0.0005823769671036549, "loss": 3.3448, "step": 6712 }, { "epoch": 0.33, "grad_norm": 0.5423128008842468, "learning_rate": 0.0005823717675567419, "loss": 3.2607, "step": 6713 }, { "epoch": 0.33, "grad_norm": 0.5232293009757996, "learning_rate": 0.0005823665672661161, "loss": 3.2688, "step": 6714 }, { "epoch": 0.33, "grad_norm": 0.5623641610145569, "learning_rate": 0.0005823613662317912, "loss": 3.2962, "step": 6715 }, { "epoch": 0.33, "grad_norm": 0.6031582355499268, "learning_rate": 0.0005823561644537807, "loss": 3.3641, "step": 6716 }, { "epoch": 0.33, "grad_norm": 0.5149546265602112, "learning_rate": 0.0005823509619320986, "loss": 3.4208, "step": 6717 }, { "epoch": 0.33, "grad_norm": 0.5367235541343689, "learning_rate": 0.0005823457586667582, "loss": 3.2141, "step": 6718 }, { "epoch": 0.33, "grad_norm": 0.5604205131530762, "learning_rate": 0.0005823405546577735, "loss": 3.5344, "step": 6719 }, { "epoch": 0.33, "grad_norm": 0.5116593241691589, "learning_rate": 0.000582335349905158, "loss": 3.5164, "step": 6720 }, { "epoch": 0.33, "grad_norm": 0.5447813272476196, "learning_rate": 0.0005823301444089256, "loss": 3.4071, "step": 6721 }, { "epoch": 0.33, "grad_norm": 0.5833255052566528, "learning_rate": 0.0005823249381690899, "loss": 3.3173, "step": 6722 }, { "epoch": 0.33, "grad_norm": 0.5849522352218628, "learning_rate": 0.0005823197311856647, "loss": 3.1083, "step": 6723 }, { "epoch": 0.33, "grad_norm": 0.5260932445526123, "learning_rate": 0.0005823145234586638, "loss": 3.3465, "step": 6724 }, { "epoch": 0.33, "grad_norm": 0.5599755644798279, "learning_rate": 0.0005823093149881005, "loss": 3.3558, "step": 6725 }, { "epoch": 0.33, "grad_norm": 0.5124487280845642, "learning_rate": 0.0005823041057739889, "loss": 3.589, "step": 6726 }, { "epoch": 0.33, "grad_norm": 0.5424213409423828, "learning_rate": 0.0005822988958163427, "loss": 3.3946, "step": 6727 }, { "epoch": 0.33, "grad_norm": 0.5511559844017029, "learning_rate": 0.0005822936851151755, "loss": 3.3763, "step": 6728 }, { "epoch": 0.33, "grad_norm": 0.5458391904830933, "learning_rate": 0.0005822884736705011, "loss": 3.2826, "step": 6729 }, { "epoch": 0.33, "grad_norm": 0.5722623467445374, "learning_rate": 0.000582283261482333, "loss": 3.2023, "step": 6730 }, { "epoch": 0.33, "grad_norm": 0.5854646563529968, "learning_rate": 0.0005822780485506854, "loss": 3.2832, "step": 6731 }, { "epoch": 0.33, "grad_norm": 0.5965773463249207, "learning_rate": 0.0005822728348755716, "loss": 3.1899, "step": 6732 }, { "epoch": 0.33, "grad_norm": 0.5229557156562805, "learning_rate": 0.0005822676204570054, "loss": 3.3824, "step": 6733 }, { "epoch": 0.33, "grad_norm": 0.527096688747406, "learning_rate": 0.0005822624052950006, "loss": 3.4582, "step": 6734 }, { "epoch": 0.33, "grad_norm": 0.5202319025993347, "learning_rate": 0.0005822571893895711, "loss": 3.4314, "step": 6735 }, { "epoch": 0.33, "grad_norm": 0.5747625827789307, "learning_rate": 0.0005822519727407304, "loss": 3.345, "step": 6736 }, { "epoch": 0.33, "grad_norm": 0.498573362827301, "learning_rate": 0.0005822467553484923, "loss": 3.4014, "step": 6737 }, { "epoch": 0.33, "grad_norm": 0.5384262800216675, "learning_rate": 0.0005822415372128706, "loss": 3.3868, "step": 6738 }, { "epoch": 0.33, "grad_norm": 0.5200839042663574, "learning_rate": 0.000582236318333879, "loss": 3.3441, "step": 6739 }, { "epoch": 0.33, "grad_norm": 0.5176569819450378, "learning_rate": 0.0005822310987115314, "loss": 3.2837, "step": 6740 }, { "epoch": 0.33, "grad_norm": 0.6018693447113037, "learning_rate": 0.0005822258783458412, "loss": 3.2213, "step": 6741 }, { "epoch": 0.33, "grad_norm": 0.5896228551864624, "learning_rate": 0.0005822206572368224, "loss": 3.339, "step": 6742 }, { "epoch": 0.33, "grad_norm": 0.5164976119995117, "learning_rate": 0.0005822154353844887, "loss": 3.4402, "step": 6743 }, { "epoch": 0.33, "grad_norm": 0.5089805722236633, "learning_rate": 0.0005822102127888539, "loss": 3.1868, "step": 6744 }, { "epoch": 0.33, "grad_norm": 0.5495955348014832, "learning_rate": 0.0005822049894499316, "loss": 3.5313, "step": 6745 }, { "epoch": 0.33, "grad_norm": 0.5287194848060608, "learning_rate": 0.0005821997653677359, "loss": 3.1508, "step": 6746 }, { "epoch": 0.33, "grad_norm": 0.6048151254653931, "learning_rate": 0.0005821945405422802, "loss": 3.1749, "step": 6747 }, { "epoch": 0.33, "grad_norm": 0.5822990536689758, "learning_rate": 0.0005821893149735784, "loss": 3.3297, "step": 6748 }, { "epoch": 0.33, "grad_norm": 0.518795907497406, "learning_rate": 0.0005821840886616441, "loss": 3.3508, "step": 6749 }, { "epoch": 0.33, "grad_norm": 0.608176052570343, "learning_rate": 0.0005821788616064914, "loss": 3.2698, "step": 6750 }, { "epoch": 0.33, "grad_norm": 0.5752763748168945, "learning_rate": 0.0005821736338081339, "loss": 3.3454, "step": 6751 }, { "epoch": 0.33, "grad_norm": 0.5169047117233276, "learning_rate": 0.0005821684052665851, "loss": 3.609, "step": 6752 }, { "epoch": 0.33, "grad_norm": 0.529773473739624, "learning_rate": 0.0005821631759818593, "loss": 3.369, "step": 6753 }, { "epoch": 0.33, "grad_norm": 0.543318510055542, "learning_rate": 0.00058215794595397, "loss": 3.4376, "step": 6754 }, { "epoch": 0.33, "grad_norm": 0.6167493462562561, "learning_rate": 0.0005821527151829308, "loss": 3.2169, "step": 6755 }, { "epoch": 0.33, "grad_norm": 0.5135380625724792, "learning_rate": 0.0005821474836687559, "loss": 3.2988, "step": 6756 }, { "epoch": 0.33, "grad_norm": 0.597224235534668, "learning_rate": 0.0005821422514114585, "loss": 3.3903, "step": 6757 }, { "epoch": 0.33, "grad_norm": 0.5847658514976501, "learning_rate": 0.0005821370184110529, "loss": 3.6174, "step": 6758 }, { "epoch": 0.33, "grad_norm": 0.5715068578720093, "learning_rate": 0.0005821317846675527, "loss": 3.3343, "step": 6759 }, { "epoch": 0.33, "grad_norm": 0.5231801271438599, "learning_rate": 0.0005821265501809716, "loss": 3.3706, "step": 6760 }, { "epoch": 0.33, "grad_norm": 0.5947988033294678, "learning_rate": 0.0005821213149513235, "loss": 3.2535, "step": 6761 }, { "epoch": 0.33, "grad_norm": 0.5078067779541016, "learning_rate": 0.0005821160789786222, "loss": 3.412, "step": 6762 }, { "epoch": 0.33, "grad_norm": 0.5282332897186279, "learning_rate": 0.0005821108422628813, "loss": 3.2963, "step": 6763 }, { "epoch": 0.33, "grad_norm": 0.5280579924583435, "learning_rate": 0.0005821056048041149, "loss": 3.4438, "step": 6764 }, { "epoch": 0.33, "grad_norm": 0.5364978909492493, "learning_rate": 0.0005821003666023366, "loss": 3.3856, "step": 6765 }, { "epoch": 0.33, "grad_norm": 0.5377125144004822, "learning_rate": 0.0005820951276575601, "loss": 3.4609, "step": 6766 }, { "epoch": 0.33, "grad_norm": 0.5311643481254578, "learning_rate": 0.0005820898879697994, "loss": 3.3563, "step": 6767 }, { "epoch": 0.33, "grad_norm": 0.5513588190078735, "learning_rate": 0.0005820846475390683, "loss": 3.4952, "step": 6768 }, { "epoch": 0.33, "grad_norm": 0.5474352240562439, "learning_rate": 0.0005820794063653805, "loss": 3.6479, "step": 6769 }, { "epoch": 0.33, "grad_norm": 0.5823585987091064, "learning_rate": 0.0005820741644487497, "loss": 3.1314, "step": 6770 }, { "epoch": 0.33, "grad_norm": 0.5332926511764526, "learning_rate": 0.0005820689217891899, "loss": 3.4825, "step": 6771 }, { "epoch": 0.33, "grad_norm": 0.5201532244682312, "learning_rate": 0.0005820636783867149, "loss": 3.4215, "step": 6772 }, { "epoch": 0.33, "grad_norm": 0.573883056640625, "learning_rate": 0.0005820584342413385, "loss": 3.3778, "step": 6773 }, { "epoch": 0.33, "grad_norm": 0.6504254937171936, "learning_rate": 0.0005820531893530744, "loss": 3.3315, "step": 6774 }, { "epoch": 0.33, "grad_norm": 0.5481976270675659, "learning_rate": 0.0005820479437219365, "loss": 3.4299, "step": 6775 }, { "epoch": 0.33, "grad_norm": 0.5290274024009705, "learning_rate": 0.0005820426973479386, "loss": 3.4469, "step": 6776 }, { "epoch": 0.33, "grad_norm": 0.5465989112854004, "learning_rate": 0.0005820374502310945, "loss": 3.4553, "step": 6777 }, { "epoch": 0.33, "grad_norm": 0.5408307313919067, "learning_rate": 0.0005820322023714179, "loss": 3.2675, "step": 6778 }, { "epoch": 0.33, "grad_norm": 0.5560708045959473, "learning_rate": 0.0005820269537689229, "loss": 3.235, "step": 6779 }, { "epoch": 0.33, "grad_norm": 0.5538557171821594, "learning_rate": 0.0005820217044236232, "loss": 3.3199, "step": 6780 }, { "epoch": 0.33, "grad_norm": 0.5382668972015381, "learning_rate": 0.0005820164543355326, "loss": 3.5514, "step": 6781 }, { "epoch": 0.33, "grad_norm": 0.5423834323883057, "learning_rate": 0.0005820112035046648, "loss": 3.2956, "step": 6782 }, { "epoch": 0.33, "grad_norm": 0.5152990221977234, "learning_rate": 0.0005820059519310339, "loss": 3.2701, "step": 6783 }, { "epoch": 0.33, "grad_norm": 0.5263355374336243, "learning_rate": 0.0005820006996146536, "loss": 3.4816, "step": 6784 }, { "epoch": 0.33, "grad_norm": 0.5338313579559326, "learning_rate": 0.0005819954465555377, "loss": 3.2517, "step": 6785 }, { "epoch": 0.33, "grad_norm": 0.5005643367767334, "learning_rate": 0.0005819901927537, "loss": 3.1299, "step": 6786 }, { "epoch": 0.33, "grad_norm": 0.5044447183609009, "learning_rate": 0.0005819849382091545, "loss": 3.501, "step": 6787 }, { "epoch": 0.33, "grad_norm": 0.5595723390579224, "learning_rate": 0.0005819796829219149, "loss": 3.4216, "step": 6788 }, { "epoch": 0.33, "grad_norm": 0.5340045690536499, "learning_rate": 0.0005819744268919951, "loss": 3.4126, "step": 6789 }, { "epoch": 0.33, "grad_norm": 0.5521731972694397, "learning_rate": 0.000581969170119409, "loss": 3.5361, "step": 6790 }, { "epoch": 0.33, "grad_norm": 0.546519935131073, "learning_rate": 0.0005819639126041702, "loss": 3.5096, "step": 6791 }, { "epoch": 0.33, "grad_norm": 0.5405811071395874, "learning_rate": 0.0005819586543462928, "loss": 3.2427, "step": 6792 }, { "epoch": 0.33, "grad_norm": 0.5262058973312378, "learning_rate": 0.0005819533953457906, "loss": 3.4118, "step": 6793 }, { "epoch": 0.33, "grad_norm": 0.5601271986961365, "learning_rate": 0.0005819481356026774, "loss": 3.2993, "step": 6794 }, { "epoch": 0.33, "grad_norm": 0.5209169387817383, "learning_rate": 0.0005819428751169671, "loss": 3.2435, "step": 6795 }, { "epoch": 0.33, "grad_norm": 0.5251837968826294, "learning_rate": 0.0005819376138886734, "loss": 3.2903, "step": 6796 }, { "epoch": 0.33, "grad_norm": 0.5272718667984009, "learning_rate": 0.0005819323519178105, "loss": 3.3486, "step": 6797 }, { "epoch": 0.33, "grad_norm": 0.6075021624565125, "learning_rate": 0.0005819270892043919, "loss": 3.413, "step": 6798 }, { "epoch": 0.33, "grad_norm": 0.5117523670196533, "learning_rate": 0.0005819218257484315, "loss": 3.4657, "step": 6799 }, { "epoch": 0.33, "grad_norm": 0.4947341978549957, "learning_rate": 0.0005819165615499435, "loss": 3.5324, "step": 6800 }, { "epoch": 0.33, "grad_norm": 0.5065931081771851, "learning_rate": 0.0005819112966089415, "loss": 3.4816, "step": 6801 }, { "epoch": 0.33, "grad_norm": 0.5459311604499817, "learning_rate": 0.0005819060309254393, "loss": 3.2451, "step": 6802 }, { "epoch": 0.33, "grad_norm": 0.5343260169029236, "learning_rate": 0.0005819007644994509, "loss": 3.4584, "step": 6803 }, { "epoch": 0.33, "grad_norm": 0.514397382736206, "learning_rate": 0.0005818954973309901, "loss": 3.4568, "step": 6804 }, { "epoch": 0.33, "grad_norm": 0.5538575053215027, "learning_rate": 0.0005818902294200708, "loss": 3.208, "step": 6805 }, { "epoch": 0.33, "grad_norm": 0.5236798524856567, "learning_rate": 0.0005818849607667069, "loss": 3.3301, "step": 6806 }, { "epoch": 0.33, "grad_norm": 0.5536655187606812, "learning_rate": 0.0005818796913709123, "loss": 3.4512, "step": 6807 }, { "epoch": 0.33, "grad_norm": 0.5622990727424622, "learning_rate": 0.0005818744212327009, "loss": 3.5709, "step": 6808 }, { "epoch": 0.33, "grad_norm": 0.5513145327568054, "learning_rate": 0.0005818691503520865, "loss": 3.4054, "step": 6809 }, { "epoch": 0.33, "grad_norm": 0.5191030502319336, "learning_rate": 0.0005818638787290829, "loss": 3.3703, "step": 6810 }, { "epoch": 0.33, "grad_norm": 0.5005136132240295, "learning_rate": 0.0005818586063637041, "loss": 3.3643, "step": 6811 }, { "epoch": 0.33, "grad_norm": 0.5312431454658508, "learning_rate": 0.0005818533332559642, "loss": 3.4871, "step": 6812 }, { "epoch": 0.33, "grad_norm": 0.5487942099571228, "learning_rate": 0.0005818480594058766, "loss": 3.3361, "step": 6813 }, { "epoch": 0.33, "grad_norm": 0.5031387209892273, "learning_rate": 0.0005818427848134556, "loss": 3.5931, "step": 6814 }, { "epoch": 0.33, "grad_norm": 0.5074050426483154, "learning_rate": 0.0005818375094787148, "loss": 3.3569, "step": 6815 }, { "epoch": 0.33, "grad_norm": 0.5995979905128479, "learning_rate": 0.0005818322334016684, "loss": 3.412, "step": 6816 }, { "epoch": 0.33, "grad_norm": 0.5569592118263245, "learning_rate": 0.0005818269565823301, "loss": 3.4492, "step": 6817 }, { "epoch": 0.33, "grad_norm": 0.5373505353927612, "learning_rate": 0.0005818216790207137, "loss": 3.3863, "step": 6818 }, { "epoch": 0.33, "grad_norm": 0.513282835483551, "learning_rate": 0.0005818164007168335, "loss": 3.1659, "step": 6819 }, { "epoch": 0.33, "grad_norm": 0.5331324338912964, "learning_rate": 0.0005818111216707029, "loss": 3.1737, "step": 6820 }, { "epoch": 0.33, "grad_norm": 0.5432617664337158, "learning_rate": 0.0005818058418823361, "loss": 3.5055, "step": 6821 }, { "epoch": 0.33, "grad_norm": 0.5340288877487183, "learning_rate": 0.000581800561351747, "loss": 3.3134, "step": 6822 }, { "epoch": 0.33, "grad_norm": 0.523755669593811, "learning_rate": 0.0005817952800789494, "loss": 3.41, "step": 6823 }, { "epoch": 0.33, "grad_norm": 0.5807611346244812, "learning_rate": 0.0005817899980639573, "loss": 3.3649, "step": 6824 }, { "epoch": 0.33, "grad_norm": 0.5344754457473755, "learning_rate": 0.0005817847153067846, "loss": 3.3996, "step": 6825 }, { "epoch": 0.33, "grad_norm": 0.5475096106529236, "learning_rate": 0.0005817794318074452, "loss": 3.2395, "step": 6826 }, { "epoch": 0.33, "grad_norm": 0.5475925803184509, "learning_rate": 0.0005817741475659529, "loss": 3.0788, "step": 6827 }, { "epoch": 0.33, "grad_norm": 0.5353468060493469, "learning_rate": 0.0005817688625823218, "loss": 3.3761, "step": 6828 }, { "epoch": 0.33, "grad_norm": 0.5556272864341736, "learning_rate": 0.0005817635768565657, "loss": 3.4141, "step": 6829 }, { "epoch": 0.33, "grad_norm": 0.5247024297714233, "learning_rate": 0.0005817582903886986, "loss": 3.2435, "step": 6830 }, { "epoch": 0.33, "grad_norm": 0.5360187292098999, "learning_rate": 0.0005817530031787344, "loss": 3.3121, "step": 6831 }, { "epoch": 0.33, "grad_norm": 0.5367879867553711, "learning_rate": 0.000581747715226687, "loss": 3.3657, "step": 6832 }, { "epoch": 0.33, "grad_norm": 0.5176432728767395, "learning_rate": 0.0005817424265325703, "loss": 3.3106, "step": 6833 }, { "epoch": 0.33, "grad_norm": 0.5033512115478516, "learning_rate": 0.0005817371370963984, "loss": 3.3319, "step": 6834 }, { "epoch": 0.33, "grad_norm": 0.5678747892379761, "learning_rate": 0.0005817318469181849, "loss": 3.3628, "step": 6835 }, { "epoch": 0.34, "grad_norm": 0.5114911794662476, "learning_rate": 0.000581726555997944, "loss": 3.1957, "step": 6836 }, { "epoch": 0.34, "grad_norm": 0.5610429644584656, "learning_rate": 0.0005817212643356897, "loss": 3.1318, "step": 6837 }, { "epoch": 0.34, "grad_norm": 0.5097834467887878, "learning_rate": 0.0005817159719314358, "loss": 3.2942, "step": 6838 }, { "epoch": 0.34, "grad_norm": 0.5260331630706787, "learning_rate": 0.0005817106787851962, "loss": 3.3047, "step": 6839 }, { "epoch": 0.34, "grad_norm": 0.5727056860923767, "learning_rate": 0.0005817053848969848, "loss": 3.4154, "step": 6840 }, { "epoch": 0.34, "grad_norm": 0.5135591626167297, "learning_rate": 0.0005817000902668157, "loss": 3.5154, "step": 6841 }, { "epoch": 0.34, "grad_norm": 0.5463142395019531, "learning_rate": 0.0005816947948947028, "loss": 3.2277, "step": 6842 }, { "epoch": 0.34, "grad_norm": 0.5426071882247925, "learning_rate": 0.00058168949878066, "loss": 3.4398, "step": 6843 }, { "epoch": 0.34, "grad_norm": 0.5238614082336426, "learning_rate": 0.0005816842019247012, "loss": 3.3057, "step": 6844 }, { "epoch": 0.34, "grad_norm": 0.4938346743583679, "learning_rate": 0.0005816789043268406, "loss": 3.3241, "step": 6845 }, { "epoch": 0.34, "grad_norm": 0.5384917259216309, "learning_rate": 0.0005816736059870918, "loss": 3.2526, "step": 6846 }, { "epoch": 0.34, "grad_norm": 0.5270363092422485, "learning_rate": 0.000581668306905469, "loss": 3.3041, "step": 6847 }, { "epoch": 0.34, "grad_norm": 0.508563220500946, "learning_rate": 0.0005816630070819861, "loss": 3.3541, "step": 6848 }, { "epoch": 0.34, "grad_norm": 0.5612684488296509, "learning_rate": 0.000581657706516657, "loss": 3.463, "step": 6849 }, { "epoch": 0.34, "grad_norm": 0.5991032123565674, "learning_rate": 0.0005816524052094957, "loss": 3.1475, "step": 6850 }, { "epoch": 0.34, "grad_norm": 0.5217996835708618, "learning_rate": 0.0005816471031605163, "loss": 3.565, "step": 6851 }, { "epoch": 0.34, "grad_norm": 0.5394172072410583, "learning_rate": 0.0005816418003697324, "loss": 3.2722, "step": 6852 }, { "epoch": 0.34, "grad_norm": 0.5643725395202637, "learning_rate": 0.0005816364968371584, "loss": 3.2096, "step": 6853 }, { "epoch": 0.34, "grad_norm": 0.5298509001731873, "learning_rate": 0.000581631192562808, "loss": 3.379, "step": 6854 }, { "epoch": 0.34, "grad_norm": 0.5548331141471863, "learning_rate": 0.0005816258875466953, "loss": 3.1849, "step": 6855 }, { "epoch": 0.34, "grad_norm": 0.4925016760826111, "learning_rate": 0.0005816205817888342, "loss": 3.3093, "step": 6856 }, { "epoch": 0.34, "grad_norm": 0.5486854910850525, "learning_rate": 0.0005816152752892387, "loss": 3.3507, "step": 6857 }, { "epoch": 0.34, "grad_norm": 0.5491653084754944, "learning_rate": 0.0005816099680479226, "loss": 3.3045, "step": 6858 }, { "epoch": 0.34, "grad_norm": 0.5340956449508667, "learning_rate": 0.0005816046600649002, "loss": 3.3908, "step": 6859 }, { "epoch": 0.34, "grad_norm": 0.5760660767555237, "learning_rate": 0.0005815993513401853, "loss": 3.2897, "step": 6860 }, { "epoch": 0.34, "grad_norm": 0.5405675768852234, "learning_rate": 0.0005815940418737918, "loss": 3.3984, "step": 6861 }, { "epoch": 0.34, "grad_norm": 0.5545721054077148, "learning_rate": 0.0005815887316657339, "loss": 3.6153, "step": 6862 }, { "epoch": 0.34, "grad_norm": 0.5303523540496826, "learning_rate": 0.0005815834207160255, "loss": 3.4198, "step": 6863 }, { "epoch": 0.34, "grad_norm": 0.5388019680976868, "learning_rate": 0.0005815781090246805, "loss": 3.3454, "step": 6864 }, { "epoch": 0.34, "grad_norm": 0.5352962613105774, "learning_rate": 0.0005815727965917129, "loss": 3.2491, "step": 6865 }, { "epoch": 0.34, "grad_norm": 0.5232275128364563, "learning_rate": 0.0005815674834171368, "loss": 3.3865, "step": 6866 }, { "epoch": 0.34, "grad_norm": 0.5305415987968445, "learning_rate": 0.0005815621695009662, "loss": 3.25, "step": 6867 }, { "epoch": 0.34, "grad_norm": 0.5452413558959961, "learning_rate": 0.000581556854843215, "loss": 3.3942, "step": 6868 }, { "epoch": 0.34, "grad_norm": 0.5450773239135742, "learning_rate": 0.0005815515394438971, "loss": 3.3143, "step": 6869 }, { "epoch": 0.34, "grad_norm": 0.541786789894104, "learning_rate": 0.0005815462233030267, "loss": 3.5092, "step": 6870 }, { "epoch": 0.34, "grad_norm": 0.5301926136016846, "learning_rate": 0.0005815409064206178, "loss": 3.5044, "step": 6871 }, { "epoch": 0.34, "grad_norm": 0.5375513434410095, "learning_rate": 0.0005815355887966843, "loss": 3.2972, "step": 6872 }, { "epoch": 0.34, "grad_norm": 0.5402582883834839, "learning_rate": 0.0005815302704312403, "loss": 3.2579, "step": 6873 }, { "epoch": 0.34, "grad_norm": 0.5533377528190613, "learning_rate": 0.0005815249513242996, "loss": 3.5222, "step": 6874 }, { "epoch": 0.34, "grad_norm": 0.5436393022537231, "learning_rate": 0.0005815196314758765, "loss": 3.1836, "step": 6875 }, { "epoch": 0.34, "grad_norm": 0.5396994948387146, "learning_rate": 0.0005815143108859848, "loss": 3.374, "step": 6876 }, { "epoch": 0.34, "grad_norm": 0.5397756099700928, "learning_rate": 0.0005815089895546386, "loss": 3.2155, "step": 6877 }, { "epoch": 0.34, "grad_norm": 0.5924909710884094, "learning_rate": 0.0005815036674818519, "loss": 3.35, "step": 6878 }, { "epoch": 0.34, "grad_norm": 0.5628549456596375, "learning_rate": 0.0005814983446676388, "loss": 3.44, "step": 6879 }, { "epoch": 0.34, "grad_norm": 0.5356522798538208, "learning_rate": 0.0005814930211120132, "loss": 3.2106, "step": 6880 }, { "epoch": 0.34, "grad_norm": 0.5571137070655823, "learning_rate": 0.0005814876968149891, "loss": 3.4285, "step": 6881 }, { "epoch": 0.34, "grad_norm": 0.579041600227356, "learning_rate": 0.0005814823717765806, "loss": 3.3484, "step": 6882 }, { "epoch": 0.34, "grad_norm": 0.4999987781047821, "learning_rate": 0.0005814770459968016, "loss": 3.3225, "step": 6883 }, { "epoch": 0.34, "grad_norm": 0.5245411396026611, "learning_rate": 0.0005814717194756663, "loss": 3.3605, "step": 6884 }, { "epoch": 0.34, "grad_norm": 0.5306369066238403, "learning_rate": 0.0005814663922131888, "loss": 3.4097, "step": 6885 }, { "epoch": 0.34, "grad_norm": 0.5047025084495544, "learning_rate": 0.0005814610642093828, "loss": 3.287, "step": 6886 }, { "epoch": 0.34, "grad_norm": 0.5388005971908569, "learning_rate": 0.0005814557354642627, "loss": 3.2558, "step": 6887 }, { "epoch": 0.34, "grad_norm": 0.5090034008026123, "learning_rate": 0.0005814504059778422, "loss": 3.2232, "step": 6888 }, { "epoch": 0.34, "grad_norm": 0.523379921913147, "learning_rate": 0.0005814450757501355, "loss": 3.2021, "step": 6889 }, { "epoch": 0.34, "grad_norm": 0.5205253958702087, "learning_rate": 0.0005814397447811568, "loss": 3.5486, "step": 6890 }, { "epoch": 0.34, "grad_norm": 0.5232972502708435, "learning_rate": 0.0005814344130709198, "loss": 3.3859, "step": 6891 }, { "epoch": 0.34, "grad_norm": 0.548040509223938, "learning_rate": 0.0005814290806194388, "loss": 3.2865, "step": 6892 }, { "epoch": 0.34, "grad_norm": 0.5398008227348328, "learning_rate": 0.0005814237474267277, "loss": 3.3062, "step": 6893 }, { "epoch": 0.34, "grad_norm": 0.5877861380577087, "learning_rate": 0.0005814184134928008, "loss": 3.3348, "step": 6894 }, { "epoch": 0.34, "grad_norm": 0.5505077242851257, "learning_rate": 0.0005814130788176718, "loss": 3.3898, "step": 6895 }, { "epoch": 0.34, "grad_norm": 0.5634656548500061, "learning_rate": 0.0005814077434013549, "loss": 3.2064, "step": 6896 }, { "epoch": 0.34, "grad_norm": 0.5058372616767883, "learning_rate": 0.0005814024072438642, "loss": 3.3888, "step": 6897 }, { "epoch": 0.34, "grad_norm": 0.5141379237174988, "learning_rate": 0.0005813970703452138, "loss": 3.174, "step": 6898 }, { "epoch": 0.34, "grad_norm": 0.5080946683883667, "learning_rate": 0.0005813917327054175, "loss": 3.1621, "step": 6899 }, { "epoch": 0.34, "grad_norm": 0.533840000629425, "learning_rate": 0.0005813863943244897, "loss": 3.4227, "step": 6900 }, { "epoch": 0.34, "grad_norm": 0.5628681182861328, "learning_rate": 0.0005813810552024441, "loss": 3.1773, "step": 6901 }, { "epoch": 0.34, "grad_norm": 0.5388134717941284, "learning_rate": 0.0005813757153392951, "loss": 3.5305, "step": 6902 }, { "epoch": 0.34, "grad_norm": 0.5274122953414917, "learning_rate": 0.0005813703747350566, "loss": 3.3009, "step": 6903 }, { "epoch": 0.34, "grad_norm": 0.5223855972290039, "learning_rate": 0.0005813650333897426, "loss": 3.3918, "step": 6904 }, { "epoch": 0.34, "grad_norm": 0.5121346712112427, "learning_rate": 0.0005813596913033673, "loss": 3.3949, "step": 6905 }, { "epoch": 0.34, "grad_norm": 0.5447664260864258, "learning_rate": 0.0005813543484759448, "loss": 3.3467, "step": 6906 }, { "epoch": 0.34, "grad_norm": 0.5025432705879211, "learning_rate": 0.000581349004907489, "loss": 3.388, "step": 6907 }, { "epoch": 0.34, "grad_norm": 0.5353335738182068, "learning_rate": 0.0005813436605980142, "loss": 3.2766, "step": 6908 }, { "epoch": 0.34, "grad_norm": 0.503914475440979, "learning_rate": 0.0005813383155475342, "loss": 3.2077, "step": 6909 }, { "epoch": 0.34, "grad_norm": 0.5467348098754883, "learning_rate": 0.0005813329697560633, "loss": 3.2751, "step": 6910 }, { "epoch": 0.34, "grad_norm": 0.5445327162742615, "learning_rate": 0.0005813276232236155, "loss": 3.3215, "step": 6911 }, { "epoch": 0.34, "grad_norm": 0.5562853217124939, "learning_rate": 0.0005813222759502047, "loss": 3.4057, "step": 6912 }, { "epoch": 0.34, "grad_norm": 0.5421975255012512, "learning_rate": 0.0005813169279358454, "loss": 3.2963, "step": 6913 }, { "epoch": 0.34, "grad_norm": 0.5326264500617981, "learning_rate": 0.0005813115791805513, "loss": 3.2552, "step": 6914 }, { "epoch": 0.34, "grad_norm": 0.5453374981880188, "learning_rate": 0.0005813062296843368, "loss": 3.5338, "step": 6915 }, { "epoch": 0.34, "grad_norm": 0.531548023223877, "learning_rate": 0.0005813008794472158, "loss": 3.3438, "step": 6916 }, { "epoch": 0.34, "grad_norm": 0.586500346660614, "learning_rate": 0.0005812955284692022, "loss": 3.2636, "step": 6917 }, { "epoch": 0.34, "grad_norm": 0.523780345916748, "learning_rate": 0.0005812901767503105, "loss": 3.4048, "step": 6918 }, { "epoch": 0.34, "grad_norm": 0.5507670640945435, "learning_rate": 0.0005812848242905547, "loss": 3.3771, "step": 6919 }, { "epoch": 0.34, "grad_norm": 0.5263717174530029, "learning_rate": 0.0005812794710899486, "loss": 3.1895, "step": 6920 }, { "epoch": 0.34, "grad_norm": 0.5932714939117432, "learning_rate": 0.0005812741171485066, "loss": 3.3006, "step": 6921 }, { "epoch": 0.34, "grad_norm": 0.5689055919647217, "learning_rate": 0.0005812687624662427, "loss": 3.3535, "step": 6922 }, { "epoch": 0.34, "grad_norm": 0.5490853786468506, "learning_rate": 0.000581263407043171, "loss": 3.2321, "step": 6923 }, { "epoch": 0.34, "grad_norm": 0.5083225965499878, "learning_rate": 0.0005812580508793056, "loss": 3.2538, "step": 6924 }, { "epoch": 0.34, "grad_norm": 0.5350843071937561, "learning_rate": 0.0005812526939746607, "loss": 3.351, "step": 6925 }, { "epoch": 0.34, "grad_norm": 0.5658883452415466, "learning_rate": 0.0005812473363292503, "loss": 3.2112, "step": 6926 }, { "epoch": 0.34, "grad_norm": 0.5066301822662354, "learning_rate": 0.0005812419779430885, "loss": 3.5274, "step": 6927 }, { "epoch": 0.34, "grad_norm": 0.5520161986351013, "learning_rate": 0.0005812366188161894, "loss": 3.1782, "step": 6928 }, { "epoch": 0.34, "grad_norm": 0.5627199411392212, "learning_rate": 0.0005812312589485673, "loss": 3.2834, "step": 6929 }, { "epoch": 0.34, "grad_norm": 0.544322669506073, "learning_rate": 0.0005812258983402363, "loss": 3.5101, "step": 6930 }, { "epoch": 0.34, "grad_norm": 0.5476049184799194, "learning_rate": 0.0005812205369912102, "loss": 3.2291, "step": 6931 }, { "epoch": 0.34, "grad_norm": 0.5459288358688354, "learning_rate": 0.0005812151749015034, "loss": 3.2593, "step": 6932 }, { "epoch": 0.34, "grad_norm": 0.533470094203949, "learning_rate": 0.0005812098120711299, "loss": 3.3264, "step": 6933 }, { "epoch": 0.34, "grad_norm": 0.5248565077781677, "learning_rate": 0.0005812044485001039, "loss": 3.2764, "step": 6934 }, { "epoch": 0.34, "grad_norm": 0.5313982963562012, "learning_rate": 0.0005811990841884395, "loss": 3.4671, "step": 6935 }, { "epoch": 0.34, "grad_norm": 0.695208728313446, "learning_rate": 0.0005811937191361508, "loss": 3.5434, "step": 6936 }, { "epoch": 0.34, "grad_norm": 0.5338233709335327, "learning_rate": 0.0005811883533432521, "loss": 3.249, "step": 6937 }, { "epoch": 0.34, "grad_norm": 0.5622416138648987, "learning_rate": 0.0005811829868097572, "loss": 3.4326, "step": 6938 }, { "epoch": 0.34, "grad_norm": 0.5586525797843933, "learning_rate": 0.0005811776195356805, "loss": 3.3476, "step": 6939 }, { "epoch": 0.34, "grad_norm": 0.5135695934295654, "learning_rate": 0.0005811722515210361, "loss": 3.3332, "step": 6940 }, { "epoch": 0.34, "grad_norm": 0.5429893732070923, "learning_rate": 0.0005811668827658381, "loss": 3.2199, "step": 6941 }, { "epoch": 0.34, "grad_norm": 0.6040959358215332, "learning_rate": 0.0005811615132701005, "loss": 3.493, "step": 6942 }, { "epoch": 0.34, "grad_norm": 0.5325751304626465, "learning_rate": 0.0005811561430338378, "loss": 3.2832, "step": 6943 }, { "epoch": 0.34, "grad_norm": 0.5440836548805237, "learning_rate": 0.0005811507720570638, "loss": 3.2621, "step": 6944 }, { "epoch": 0.34, "grad_norm": 0.5436229109764099, "learning_rate": 0.0005811454003397928, "loss": 3.3883, "step": 6945 }, { "epoch": 0.34, "grad_norm": 0.5637691020965576, "learning_rate": 0.0005811400278820391, "loss": 3.4486, "step": 6946 }, { "epoch": 0.34, "grad_norm": 0.5535193085670471, "learning_rate": 0.0005811346546838165, "loss": 3.4498, "step": 6947 }, { "epoch": 0.34, "grad_norm": 0.5341395139694214, "learning_rate": 0.0005811292807451393, "loss": 3.4532, "step": 6948 }, { "epoch": 0.34, "grad_norm": 0.6911645531654358, "learning_rate": 0.0005811239060660217, "loss": 3.2766, "step": 6949 }, { "epoch": 0.34, "grad_norm": 0.5011906027793884, "learning_rate": 0.0005811185306464779, "loss": 3.1654, "step": 6950 }, { "epoch": 0.34, "grad_norm": 0.5271753668785095, "learning_rate": 0.0005811131544865218, "loss": 3.3565, "step": 6951 }, { "epoch": 0.34, "grad_norm": 0.5180072784423828, "learning_rate": 0.000581107777586168, "loss": 3.4378, "step": 6952 }, { "epoch": 0.34, "grad_norm": 0.541093111038208, "learning_rate": 0.0005811023999454303, "loss": 3.2264, "step": 6953 }, { "epoch": 0.34, "grad_norm": 0.5188407897949219, "learning_rate": 0.000581097021564323, "loss": 3.2942, "step": 6954 }, { "epoch": 0.34, "grad_norm": 0.5392166376113892, "learning_rate": 0.0005810916424428602, "loss": 3.5065, "step": 6955 }, { "epoch": 0.34, "grad_norm": 0.5347229242324829, "learning_rate": 0.0005810862625810562, "loss": 3.2041, "step": 6956 }, { "epoch": 0.34, "grad_norm": 0.6093183159828186, "learning_rate": 0.000581080881978925, "loss": 3.5634, "step": 6957 }, { "epoch": 0.34, "grad_norm": 0.5233674645423889, "learning_rate": 0.0005810755006364809, "loss": 3.2121, "step": 6958 }, { "epoch": 0.34, "grad_norm": 0.5367003679275513, "learning_rate": 0.0005810701185537379, "loss": 3.4925, "step": 6959 }, { "epoch": 0.34, "grad_norm": 0.5209534764289856, "learning_rate": 0.0005810647357307105, "loss": 3.292, "step": 6960 }, { "epoch": 0.34, "grad_norm": 0.5548787713050842, "learning_rate": 0.0005810593521674125, "loss": 3.1859, "step": 6961 }, { "epoch": 0.34, "grad_norm": 0.5430557131767273, "learning_rate": 0.0005810539678638584, "loss": 3.4861, "step": 6962 }, { "epoch": 0.34, "grad_norm": 0.5306611657142639, "learning_rate": 0.0005810485828200622, "loss": 3.4295, "step": 6963 }, { "epoch": 0.34, "grad_norm": 0.6463212966918945, "learning_rate": 0.000581043197036038, "loss": 3.4153, "step": 6964 }, { "epoch": 0.34, "grad_norm": 0.5019036531448364, "learning_rate": 0.0005810378105118002, "loss": 3.3514, "step": 6965 }, { "epoch": 0.34, "grad_norm": 0.5331908464431763, "learning_rate": 0.0005810324232473629, "loss": 3.4269, "step": 6966 }, { "epoch": 0.34, "grad_norm": 0.5343552827835083, "learning_rate": 0.0005810270352427403, "loss": 3.5078, "step": 6967 }, { "epoch": 0.34, "grad_norm": 0.561147928237915, "learning_rate": 0.0005810216464979466, "loss": 3.233, "step": 6968 }, { "epoch": 0.34, "grad_norm": 0.5656723380088806, "learning_rate": 0.0005810162570129958, "loss": 3.1724, "step": 6969 }, { "epoch": 0.34, "grad_norm": 0.5117201805114746, "learning_rate": 0.0005810108667879025, "loss": 3.3606, "step": 6970 }, { "epoch": 0.34, "grad_norm": 0.5727672576904297, "learning_rate": 0.0005810054758226805, "loss": 3.4015, "step": 6971 }, { "epoch": 0.34, "grad_norm": 0.594057559967041, "learning_rate": 0.0005810000841173442, "loss": 3.4109, "step": 6972 }, { "epoch": 0.34, "grad_norm": 0.5242696404457092, "learning_rate": 0.0005809946916719077, "loss": 3.1948, "step": 6973 }, { "epoch": 0.34, "grad_norm": 0.5410612225532532, "learning_rate": 0.0005809892984863854, "loss": 3.3574, "step": 6974 }, { "epoch": 0.34, "grad_norm": 0.5484248995780945, "learning_rate": 0.0005809839045607913, "loss": 3.1889, "step": 6975 }, { "epoch": 0.34, "grad_norm": 0.5360888242721558, "learning_rate": 0.0005809785098951396, "loss": 3.3528, "step": 6976 }, { "epoch": 0.34, "grad_norm": 0.6561615467071533, "learning_rate": 0.0005809731144894445, "loss": 3.3181, "step": 6977 }, { "epoch": 0.34, "grad_norm": 0.5585454106330872, "learning_rate": 0.0005809677183437206, "loss": 3.3861, "step": 6978 }, { "epoch": 0.34, "grad_norm": 0.539271354675293, "learning_rate": 0.0005809623214579816, "loss": 3.3186, "step": 6979 }, { "epoch": 0.34, "grad_norm": 0.5142511129379272, "learning_rate": 0.0005809569238322419, "loss": 3.296, "step": 6980 }, { "epoch": 0.34, "grad_norm": 0.5410292148590088, "learning_rate": 0.0005809515254665158, "loss": 3.3056, "step": 6981 }, { "epoch": 0.34, "grad_norm": 0.5308331847190857, "learning_rate": 0.0005809461263608175, "loss": 3.3361, "step": 6982 }, { "epoch": 0.34, "grad_norm": 0.546271562576294, "learning_rate": 0.000580940726515161, "loss": 3.4194, "step": 6983 }, { "epoch": 0.34, "grad_norm": 0.566936731338501, "learning_rate": 0.0005809353259295608, "loss": 3.3992, "step": 6984 }, { "epoch": 0.34, "grad_norm": 0.5126430988311768, "learning_rate": 0.000580929924604031, "loss": 3.2163, "step": 6985 }, { "epoch": 0.34, "grad_norm": 0.4886528551578522, "learning_rate": 0.000580924522538586, "loss": 3.3871, "step": 6986 }, { "epoch": 0.34, "grad_norm": 0.5845642685890198, "learning_rate": 0.0005809191197332397, "loss": 3.4001, "step": 6987 }, { "epoch": 0.34, "grad_norm": 0.5310481786727905, "learning_rate": 0.0005809137161880065, "loss": 3.5392, "step": 6988 }, { "epoch": 0.34, "grad_norm": 0.5669585466384888, "learning_rate": 0.0005809083119029006, "loss": 3.4265, "step": 6989 }, { "epoch": 0.34, "grad_norm": 0.5161418914794922, "learning_rate": 0.0005809029068779365, "loss": 3.3468, "step": 6990 }, { "epoch": 0.34, "grad_norm": 0.5583595037460327, "learning_rate": 0.0005808975011131279, "loss": 3.4198, "step": 6991 }, { "epoch": 0.34, "grad_norm": 0.5388359427452087, "learning_rate": 0.0005808920946084896, "loss": 3.4827, "step": 6992 }, { "epoch": 0.34, "grad_norm": 0.5691794157028198, "learning_rate": 0.0005808866873640353, "loss": 3.5184, "step": 6993 }, { "epoch": 0.34, "grad_norm": 0.6655071973800659, "learning_rate": 0.0005808812793797797, "loss": 3.3542, "step": 6994 }, { "epoch": 0.34, "grad_norm": 0.5387119054794312, "learning_rate": 0.000580875870655737, "loss": 3.2979, "step": 6995 }, { "epoch": 0.34, "grad_norm": 0.6775738596916199, "learning_rate": 0.0005808704611919211, "loss": 3.3308, "step": 6996 }, { "epoch": 0.34, "grad_norm": 0.537527859210968, "learning_rate": 0.0005808650509883465, "loss": 3.4031, "step": 6997 }, { "epoch": 0.34, "grad_norm": 0.5527935028076172, "learning_rate": 0.0005808596400450275, "loss": 3.251, "step": 6998 }, { "epoch": 0.34, "grad_norm": 0.5826919674873352, "learning_rate": 0.0005808542283619781, "loss": 3.3252, "step": 6999 }, { "epoch": 0.34, "grad_norm": 0.5161682367324829, "learning_rate": 0.0005808488159392129, "loss": 3.0676, "step": 7000 }, { "epoch": 0.34, "grad_norm": 0.5332803130149841, "learning_rate": 0.0005808434027767459, "loss": 3.5714, "step": 7001 }, { "epoch": 0.34, "grad_norm": 0.5207781791687012, "learning_rate": 0.0005808379888745914, "loss": 3.3575, "step": 7002 }, { "epoch": 0.34, "grad_norm": 0.5248399376869202, "learning_rate": 0.0005808325742327636, "loss": 3.0861, "step": 7003 }, { "epoch": 0.34, "grad_norm": 0.547214686870575, "learning_rate": 0.0005808271588512771, "loss": 3.4018, "step": 7004 }, { "epoch": 0.34, "grad_norm": 0.49481138586997986, "learning_rate": 0.0005808217427301456, "loss": 3.2186, "step": 7005 }, { "epoch": 0.34, "grad_norm": 0.567725658416748, "learning_rate": 0.0005808163258693839, "loss": 3.5048, "step": 7006 }, { "epoch": 0.34, "grad_norm": 0.6113198399543762, "learning_rate": 0.000580810908269006, "loss": 3.2416, "step": 7007 }, { "epoch": 0.34, "grad_norm": 0.5307554006576538, "learning_rate": 0.0005808054899290262, "loss": 3.2915, "step": 7008 }, { "epoch": 0.34, "grad_norm": 0.5398176312446594, "learning_rate": 0.0005808000708494587, "loss": 3.2426, "step": 7009 }, { "epoch": 0.34, "grad_norm": 0.5282324552536011, "learning_rate": 0.0005807946510303179, "loss": 3.5821, "step": 7010 }, { "epoch": 0.34, "grad_norm": 0.5628719925880432, "learning_rate": 0.0005807892304716181, "loss": 3.5204, "step": 7011 }, { "epoch": 0.34, "grad_norm": 0.5679787397384644, "learning_rate": 0.0005807838091733734, "loss": 3.3541, "step": 7012 }, { "epoch": 0.34, "grad_norm": 0.5467495322227478, "learning_rate": 0.0005807783871355982, "loss": 3.2906, "step": 7013 }, { "epoch": 0.34, "grad_norm": 0.5178360939025879, "learning_rate": 0.0005807729643583069, "loss": 3.3828, "step": 7014 }, { "epoch": 0.34, "grad_norm": 0.5609511137008667, "learning_rate": 0.0005807675408415134, "loss": 3.3343, "step": 7015 }, { "epoch": 0.34, "grad_norm": 0.5502193570137024, "learning_rate": 0.0005807621165852325, "loss": 3.3203, "step": 7016 }, { "epoch": 0.34, "grad_norm": 0.5327548980712891, "learning_rate": 0.000580756691589478, "loss": 3.3419, "step": 7017 }, { "epoch": 0.34, "grad_norm": 0.506458044052124, "learning_rate": 0.0005807512658542646, "loss": 3.2494, "step": 7018 }, { "epoch": 0.34, "grad_norm": 0.5278050303459167, "learning_rate": 0.0005807458393796062, "loss": 3.3536, "step": 7019 }, { "epoch": 0.34, "grad_norm": 0.530926525592804, "learning_rate": 0.0005807404121655175, "loss": 3.3645, "step": 7020 }, { "epoch": 0.34, "grad_norm": 0.6142529845237732, "learning_rate": 0.0005807349842120124, "loss": 3.3903, "step": 7021 }, { "epoch": 0.34, "grad_norm": 0.5566715002059937, "learning_rate": 0.0005807295555191055, "loss": 3.3908, "step": 7022 }, { "epoch": 0.34, "grad_norm": 0.5471014380455017, "learning_rate": 0.0005807241260868109, "loss": 3.4871, "step": 7023 }, { "epoch": 0.34, "grad_norm": 0.584735095500946, "learning_rate": 0.000580718695915143, "loss": 3.3576, "step": 7024 }, { "epoch": 0.34, "grad_norm": 0.5210456848144531, "learning_rate": 0.0005807132650041162, "loss": 3.3165, "step": 7025 }, { "epoch": 0.34, "grad_norm": 0.5415894985198975, "learning_rate": 0.0005807078333537445, "loss": 3.4889, "step": 7026 }, { "epoch": 0.34, "grad_norm": 0.519156813621521, "learning_rate": 0.0005807024009640425, "loss": 3.3567, "step": 7027 }, { "epoch": 0.34, "grad_norm": 0.5499240159988403, "learning_rate": 0.0005806969678350243, "loss": 3.3908, "step": 7028 }, { "epoch": 0.34, "grad_norm": 0.5356557369232178, "learning_rate": 0.0005806915339667044, "loss": 3.1221, "step": 7029 }, { "epoch": 0.34, "grad_norm": 0.562337338924408, "learning_rate": 0.000580686099359097, "loss": 3.1642, "step": 7030 }, { "epoch": 0.34, "grad_norm": 0.5245587229728699, "learning_rate": 0.0005806806640122164, "loss": 3.277, "step": 7031 }, { "epoch": 0.34, "grad_norm": 0.548549473285675, "learning_rate": 0.000580675227926077, "loss": 3.497, "step": 7032 }, { "epoch": 0.34, "grad_norm": 0.5365231037139893, "learning_rate": 0.0005806697911006931, "loss": 3.257, "step": 7033 }, { "epoch": 0.34, "grad_norm": 0.5649599432945251, "learning_rate": 0.0005806643535360789, "loss": 3.2649, "step": 7034 }, { "epoch": 0.34, "grad_norm": 0.5196275115013123, "learning_rate": 0.0005806589152322489, "loss": 3.2268, "step": 7035 }, { "epoch": 0.34, "grad_norm": 0.509590744972229, "learning_rate": 0.0005806534761892172, "loss": 3.2333, "step": 7036 }, { "epoch": 0.34, "grad_norm": 0.5706533789634705, "learning_rate": 0.0005806480364069983, "loss": 3.011, "step": 7037 }, { "epoch": 0.34, "grad_norm": 0.5456781983375549, "learning_rate": 0.0005806425958856065, "loss": 3.5429, "step": 7038 }, { "epoch": 0.34, "grad_norm": 0.5550026893615723, "learning_rate": 0.0005806371546250562, "loss": 3.4187, "step": 7039 }, { "epoch": 0.35, "grad_norm": 0.5485456585884094, "learning_rate": 0.0005806317126253616, "loss": 3.2612, "step": 7040 }, { "epoch": 0.35, "grad_norm": 0.5675588250160217, "learning_rate": 0.0005806262698865369, "loss": 3.4519, "step": 7041 }, { "epoch": 0.35, "grad_norm": 0.5755375623703003, "learning_rate": 0.0005806208264085968, "loss": 3.2874, "step": 7042 }, { "epoch": 0.35, "grad_norm": 0.5225034952163696, "learning_rate": 0.0005806153821915554, "loss": 3.317, "step": 7043 }, { "epoch": 0.35, "grad_norm": 0.5519276261329651, "learning_rate": 0.0005806099372354271, "loss": 3.4839, "step": 7044 }, { "epoch": 0.35, "grad_norm": 0.5228083729743958, "learning_rate": 0.0005806044915402262, "loss": 3.334, "step": 7045 }, { "epoch": 0.35, "grad_norm": 0.5322780013084412, "learning_rate": 0.0005805990451059671, "loss": 3.397, "step": 7046 }, { "epoch": 0.35, "grad_norm": 0.5328549742698669, "learning_rate": 0.0005805935979326641, "loss": 3.4732, "step": 7047 }, { "epoch": 0.35, "grad_norm": 0.48566094040870667, "learning_rate": 0.0005805881500203316, "loss": 3.3144, "step": 7048 }, { "epoch": 0.35, "grad_norm": 0.5601107478141785, "learning_rate": 0.0005805827013689839, "loss": 3.3341, "step": 7049 }, { "epoch": 0.35, "grad_norm": 0.5418280363082886, "learning_rate": 0.0005805772519786353, "loss": 3.4561, "step": 7050 }, { "epoch": 0.35, "grad_norm": 0.5339364409446716, "learning_rate": 0.0005805718018493003, "loss": 3.4457, "step": 7051 }, { "epoch": 0.35, "grad_norm": 0.6148756146430969, "learning_rate": 0.0005805663509809932, "loss": 3.3762, "step": 7052 }, { "epoch": 0.35, "grad_norm": 0.5323485136032104, "learning_rate": 0.0005805608993737282, "loss": 3.3379, "step": 7053 }, { "epoch": 0.35, "grad_norm": 0.5185231566429138, "learning_rate": 0.0005805554470275199, "loss": 3.1767, "step": 7054 }, { "epoch": 0.35, "grad_norm": 0.54229736328125, "learning_rate": 0.0005805499939423826, "loss": 3.3344, "step": 7055 }, { "epoch": 0.35, "grad_norm": 0.5611851215362549, "learning_rate": 0.0005805445401183305, "loss": 3.4762, "step": 7056 }, { "epoch": 0.35, "grad_norm": 0.5249059200286865, "learning_rate": 0.000580539085555378, "loss": 3.4588, "step": 7057 }, { "epoch": 0.35, "grad_norm": 0.5460208654403687, "learning_rate": 0.0005805336302535397, "loss": 3.3557, "step": 7058 }, { "epoch": 0.35, "grad_norm": 0.6470723152160645, "learning_rate": 0.0005805281742128297, "loss": 3.0215, "step": 7059 }, { "epoch": 0.35, "grad_norm": 0.5494178533554077, "learning_rate": 0.0005805227174332625, "loss": 3.3288, "step": 7060 }, { "epoch": 0.35, "grad_norm": 0.5545464754104614, "learning_rate": 0.0005805172599148525, "loss": 3.222, "step": 7061 }, { "epoch": 0.35, "grad_norm": 0.5664929747581482, "learning_rate": 0.0005805118016576139, "loss": 3.2427, "step": 7062 }, { "epoch": 0.35, "grad_norm": 0.589393138885498, "learning_rate": 0.0005805063426615613, "loss": 3.3364, "step": 7063 }, { "epoch": 0.35, "grad_norm": 0.5010703802108765, "learning_rate": 0.000580500882926709, "loss": 3.4066, "step": 7064 }, { "epoch": 0.35, "grad_norm": 0.5556477904319763, "learning_rate": 0.0005804954224530712, "loss": 3.4683, "step": 7065 }, { "epoch": 0.35, "grad_norm": 0.5511776208877563, "learning_rate": 0.0005804899612406627, "loss": 3.3322, "step": 7066 }, { "epoch": 0.35, "grad_norm": 0.5432040095329285, "learning_rate": 0.0005804844992894975, "loss": 3.2741, "step": 7067 }, { "epoch": 0.35, "grad_norm": 0.5588909983634949, "learning_rate": 0.00058047903659959, "loss": 3.6012, "step": 7068 }, { "epoch": 0.35, "grad_norm": 0.5545442700386047, "learning_rate": 0.0005804735731709548, "loss": 3.373, "step": 7069 }, { "epoch": 0.35, "grad_norm": 0.5283875465393066, "learning_rate": 0.0005804681090036062, "loss": 3.2354, "step": 7070 }, { "epoch": 0.35, "grad_norm": 0.4935014843940735, "learning_rate": 0.0005804626440975585, "loss": 3.3775, "step": 7071 }, { "epoch": 0.35, "grad_norm": 0.5301677584648132, "learning_rate": 0.0005804571784528262, "loss": 3.3929, "step": 7072 }, { "epoch": 0.35, "grad_norm": 0.5176547169685364, "learning_rate": 0.0005804517120694238, "loss": 3.4168, "step": 7073 }, { "epoch": 0.35, "grad_norm": 0.5569431781768799, "learning_rate": 0.0005804462449473654, "loss": 3.4039, "step": 7074 }, { "epoch": 0.35, "grad_norm": 0.5683629512786865, "learning_rate": 0.0005804407770866656, "loss": 3.4013, "step": 7075 }, { "epoch": 0.35, "grad_norm": 0.5122573375701904, "learning_rate": 0.0005804353084873388, "loss": 3.2245, "step": 7076 }, { "epoch": 0.35, "grad_norm": 0.5609269142150879, "learning_rate": 0.0005804298391493993, "loss": 3.2241, "step": 7077 }, { "epoch": 0.35, "grad_norm": 0.5435025691986084, "learning_rate": 0.0005804243690728617, "loss": 3.2807, "step": 7078 }, { "epoch": 0.35, "grad_norm": 0.5618283152580261, "learning_rate": 0.0005804188982577402, "loss": 3.1999, "step": 7079 }, { "epoch": 0.35, "grad_norm": 0.5037075877189636, "learning_rate": 0.0005804134267040494, "loss": 3.3382, "step": 7080 }, { "epoch": 0.35, "grad_norm": 0.5446479916572571, "learning_rate": 0.0005804079544118034, "loss": 3.1628, "step": 7081 }, { "epoch": 0.35, "grad_norm": 0.5279077291488647, "learning_rate": 0.0005804024813810169, "loss": 3.2108, "step": 7082 }, { "epoch": 0.35, "grad_norm": 0.556573748588562, "learning_rate": 0.0005803970076117043, "loss": 3.47, "step": 7083 }, { "epoch": 0.35, "grad_norm": 0.5409570932388306, "learning_rate": 0.0005803915331038799, "loss": 3.2393, "step": 7084 }, { "epoch": 0.35, "grad_norm": 0.5191570520401001, "learning_rate": 0.0005803860578575581, "loss": 3.3679, "step": 7085 }, { "epoch": 0.35, "grad_norm": 0.5882177352905273, "learning_rate": 0.0005803805818727535, "loss": 3.1786, "step": 7086 }, { "epoch": 0.35, "grad_norm": 0.5755952000617981, "learning_rate": 0.0005803751051494803, "loss": 3.4671, "step": 7087 }, { "epoch": 0.35, "grad_norm": 0.5264267325401306, "learning_rate": 0.000580369627687753, "loss": 3.1961, "step": 7088 }, { "epoch": 0.35, "grad_norm": 0.5818674564361572, "learning_rate": 0.0005803641494875861, "loss": 3.334, "step": 7089 }, { "epoch": 0.35, "grad_norm": 0.529707670211792, "learning_rate": 0.000580358670548994, "loss": 3.3026, "step": 7090 }, { "epoch": 0.35, "grad_norm": 0.5402799844741821, "learning_rate": 0.0005803531908719912, "loss": 3.4426, "step": 7091 }, { "epoch": 0.35, "grad_norm": 0.5018097162246704, "learning_rate": 0.000580347710456592, "loss": 3.1922, "step": 7092 }, { "epoch": 0.35, "grad_norm": 0.5327255129814148, "learning_rate": 0.0005803422293028109, "loss": 3.2424, "step": 7093 }, { "epoch": 0.35, "grad_norm": 0.5543902516365051, "learning_rate": 0.0005803367474106623, "loss": 3.463, "step": 7094 }, { "epoch": 0.35, "grad_norm": 0.5371736288070679, "learning_rate": 0.0005803312647801607, "loss": 3.2514, "step": 7095 }, { "epoch": 0.35, "grad_norm": 0.5503165125846863, "learning_rate": 0.0005803257814113204, "loss": 3.5273, "step": 7096 }, { "epoch": 0.35, "grad_norm": 0.5683706998825073, "learning_rate": 0.0005803202973041561, "loss": 3.189, "step": 7097 }, { "epoch": 0.35, "grad_norm": 0.5162700414657593, "learning_rate": 0.0005803148124586819, "loss": 3.3167, "step": 7098 }, { "epoch": 0.35, "grad_norm": 0.49133065342903137, "learning_rate": 0.0005803093268749125, "loss": 3.1603, "step": 7099 }, { "epoch": 0.35, "grad_norm": 0.5067523717880249, "learning_rate": 0.0005803038405528622, "loss": 3.4655, "step": 7100 }, { "epoch": 0.35, "grad_norm": 0.5425522923469543, "learning_rate": 0.0005802983534925457, "loss": 3.4017, "step": 7101 }, { "epoch": 0.35, "grad_norm": 0.5662040114402771, "learning_rate": 0.0005802928656939773, "loss": 3.4728, "step": 7102 }, { "epoch": 0.35, "grad_norm": 0.5476053953170776, "learning_rate": 0.0005802873771571712, "loss": 3.5488, "step": 7103 }, { "epoch": 0.35, "grad_norm": 0.5239982604980469, "learning_rate": 0.0005802818878821424, "loss": 3.3556, "step": 7104 }, { "epoch": 0.35, "grad_norm": 0.5249889492988586, "learning_rate": 0.0005802763978689048, "loss": 3.5085, "step": 7105 }, { "epoch": 0.35, "grad_norm": 0.5688517689704895, "learning_rate": 0.0005802709071174732, "loss": 3.3199, "step": 7106 }, { "epoch": 0.35, "grad_norm": 0.5531274080276489, "learning_rate": 0.0005802654156278619, "loss": 3.3684, "step": 7107 }, { "epoch": 0.35, "grad_norm": 0.5954955220222473, "learning_rate": 0.0005802599234000855, "loss": 3.3379, "step": 7108 }, { "epoch": 0.35, "grad_norm": 0.5915130376815796, "learning_rate": 0.0005802544304341584, "loss": 3.4219, "step": 7109 }, { "epoch": 0.35, "grad_norm": 0.5060703754425049, "learning_rate": 0.000580248936730095, "loss": 3.2657, "step": 7110 }, { "epoch": 0.35, "grad_norm": 0.5294564366340637, "learning_rate": 0.0005802434422879099, "loss": 3.4222, "step": 7111 }, { "epoch": 0.35, "grad_norm": 0.5669923424720764, "learning_rate": 0.0005802379471076175, "loss": 3.4991, "step": 7112 }, { "epoch": 0.35, "grad_norm": 0.550014317035675, "learning_rate": 0.0005802324511892323, "loss": 3.3429, "step": 7113 }, { "epoch": 0.35, "grad_norm": 0.5213134288787842, "learning_rate": 0.0005802269545327688, "loss": 3.4838, "step": 7114 }, { "epoch": 0.35, "grad_norm": 0.5911935567855835, "learning_rate": 0.0005802214571382413, "loss": 3.267, "step": 7115 }, { "epoch": 0.35, "grad_norm": 0.5569819808006287, "learning_rate": 0.0005802159590056644, "loss": 3.4411, "step": 7116 }, { "epoch": 0.35, "grad_norm": 0.5387932658195496, "learning_rate": 0.0005802104601350528, "loss": 3.5086, "step": 7117 }, { "epoch": 0.35, "grad_norm": 0.5420069694519043, "learning_rate": 0.0005802049605264205, "loss": 3.3445, "step": 7118 }, { "epoch": 0.35, "grad_norm": 0.5610042214393616, "learning_rate": 0.0005801994601797825, "loss": 3.3451, "step": 7119 }, { "epoch": 0.35, "grad_norm": 0.5179895162582397, "learning_rate": 0.000580193959095153, "loss": 3.3824, "step": 7120 }, { "epoch": 0.35, "grad_norm": 0.545372486114502, "learning_rate": 0.0005801884572725464, "loss": 3.5493, "step": 7121 }, { "epoch": 0.35, "grad_norm": 0.5903968214988708, "learning_rate": 0.0005801829547119775, "loss": 3.4681, "step": 7122 }, { "epoch": 0.35, "grad_norm": 0.6884878873825073, "learning_rate": 0.0005801774514134605, "loss": 3.4401, "step": 7123 }, { "epoch": 0.35, "grad_norm": 0.5330374836921692, "learning_rate": 0.00058017194737701, "loss": 3.5123, "step": 7124 }, { "epoch": 0.35, "grad_norm": 0.5485833883285522, "learning_rate": 0.0005801664426026405, "loss": 3.4519, "step": 7125 }, { "epoch": 0.35, "grad_norm": 0.6042609214782715, "learning_rate": 0.0005801609370903666, "loss": 3.3484, "step": 7126 }, { "epoch": 0.35, "grad_norm": 0.5192358493804932, "learning_rate": 0.0005801554308402028, "loss": 3.3385, "step": 7127 }, { "epoch": 0.35, "grad_norm": 0.5295864343643188, "learning_rate": 0.0005801499238521634, "loss": 3.4082, "step": 7128 }, { "epoch": 0.35, "grad_norm": 0.5500878691673279, "learning_rate": 0.000580144416126263, "loss": 3.2593, "step": 7129 }, { "epoch": 0.35, "grad_norm": 0.5443781614303589, "learning_rate": 0.0005801389076625161, "loss": 3.3759, "step": 7130 }, { "epoch": 0.35, "grad_norm": 0.5370346307754517, "learning_rate": 0.0005801333984609372, "loss": 3.1128, "step": 7131 }, { "epoch": 0.35, "grad_norm": 0.5285878777503967, "learning_rate": 0.0005801278885215409, "loss": 3.3864, "step": 7132 }, { "epoch": 0.35, "grad_norm": 0.6192882657051086, "learning_rate": 0.0005801223778443417, "loss": 3.2203, "step": 7133 }, { "epoch": 0.35, "grad_norm": 0.5949166417121887, "learning_rate": 0.000580116866429354, "loss": 3.527, "step": 7134 }, { "epoch": 0.35, "grad_norm": 0.5258962512016296, "learning_rate": 0.0005801113542765925, "loss": 3.4491, "step": 7135 }, { "epoch": 0.35, "grad_norm": 0.524845540523529, "learning_rate": 0.0005801058413860714, "loss": 3.2764, "step": 7136 }, { "epoch": 0.35, "grad_norm": 0.5003640651702881, "learning_rate": 0.0005801003277578055, "loss": 3.3281, "step": 7137 }, { "epoch": 0.35, "grad_norm": 0.5418901443481445, "learning_rate": 0.0005800948133918094, "loss": 3.2208, "step": 7138 }, { "epoch": 0.35, "grad_norm": 0.5504566431045532, "learning_rate": 0.0005800892982880973, "loss": 3.1584, "step": 7139 }, { "epoch": 0.35, "grad_norm": 0.5194109082221985, "learning_rate": 0.000580083782446684, "loss": 3.641, "step": 7140 }, { "epoch": 0.35, "grad_norm": 0.5327237248420715, "learning_rate": 0.0005800782658675838, "loss": 3.1637, "step": 7141 }, { "epoch": 0.35, "grad_norm": 0.6268619894981384, "learning_rate": 0.0005800727485508114, "loss": 3.264, "step": 7142 }, { "epoch": 0.35, "grad_norm": 0.5085632801055908, "learning_rate": 0.0005800672304963813, "loss": 3.5367, "step": 7143 }, { "epoch": 0.35, "grad_norm": 0.6155527830123901, "learning_rate": 0.000580061711704308, "loss": 3.3654, "step": 7144 }, { "epoch": 0.35, "grad_norm": 0.5261454582214355, "learning_rate": 0.0005800561921746061, "loss": 3.4477, "step": 7145 }, { "epoch": 0.35, "grad_norm": 0.5312758684158325, "learning_rate": 0.0005800506719072899, "loss": 3.4262, "step": 7146 }, { "epoch": 0.35, "grad_norm": 0.5167414546012878, "learning_rate": 0.0005800451509023744, "loss": 3.4457, "step": 7147 }, { "epoch": 0.35, "grad_norm": 0.5222433805465698, "learning_rate": 0.0005800396291598737, "loss": 3.4803, "step": 7148 }, { "epoch": 0.35, "grad_norm": 0.49773696064949036, "learning_rate": 0.0005800341066798025, "loss": 3.1294, "step": 7149 }, { "epoch": 0.35, "grad_norm": 0.5385370254516602, "learning_rate": 0.0005800285834621754, "loss": 3.4034, "step": 7150 }, { "epoch": 0.35, "grad_norm": 0.5338417887687683, "learning_rate": 0.000580023059507007, "loss": 3.1577, "step": 7151 }, { "epoch": 0.35, "grad_norm": 0.5068842768669128, "learning_rate": 0.0005800175348143116, "loss": 3.5412, "step": 7152 }, { "epoch": 0.35, "grad_norm": 0.49130746722221375, "learning_rate": 0.0005800120093841039, "loss": 3.4468, "step": 7153 }, { "epoch": 0.35, "grad_norm": 0.5758814215660095, "learning_rate": 0.0005800064832163985, "loss": 3.3889, "step": 7154 }, { "epoch": 0.35, "grad_norm": 0.5376800298690796, "learning_rate": 0.00058000095631121, "loss": 3.4071, "step": 7155 }, { "epoch": 0.35, "grad_norm": 0.5141198635101318, "learning_rate": 0.0005799954286685527, "loss": 3.5247, "step": 7156 }, { "epoch": 0.35, "grad_norm": 0.5330366492271423, "learning_rate": 0.0005799899002884415, "loss": 3.0816, "step": 7157 }, { "epoch": 0.35, "grad_norm": 0.4993706941604614, "learning_rate": 0.0005799843711708908, "loss": 3.3283, "step": 7158 }, { "epoch": 0.35, "grad_norm": 0.520387589931488, "learning_rate": 0.0005799788413159149, "loss": 3.4417, "step": 7159 }, { "epoch": 0.35, "grad_norm": 0.5464620590209961, "learning_rate": 0.0005799733107235288, "loss": 3.5151, "step": 7160 }, { "epoch": 0.35, "grad_norm": 0.5394158363342285, "learning_rate": 0.0005799677793937469, "loss": 3.4495, "step": 7161 }, { "epoch": 0.35, "grad_norm": 0.505730390548706, "learning_rate": 0.0005799622473265837, "loss": 3.3232, "step": 7162 }, { "epoch": 0.35, "grad_norm": 0.5446467995643616, "learning_rate": 0.0005799567145220539, "loss": 3.3166, "step": 7163 }, { "epoch": 0.35, "grad_norm": 0.5053635835647583, "learning_rate": 0.0005799511809801719, "loss": 3.2405, "step": 7164 }, { "epoch": 0.35, "grad_norm": 0.5116280317306519, "learning_rate": 0.0005799456467009523, "loss": 3.0451, "step": 7165 }, { "epoch": 0.35, "grad_norm": 0.575804591178894, "learning_rate": 0.0005799401116844099, "loss": 3.4171, "step": 7166 }, { "epoch": 0.35, "grad_norm": 0.5386016368865967, "learning_rate": 0.000579934575930559, "loss": 3.5537, "step": 7167 }, { "epoch": 0.35, "grad_norm": 0.58470219373703, "learning_rate": 0.0005799290394394144, "loss": 3.4352, "step": 7168 }, { "epoch": 0.35, "grad_norm": 0.5334919691085815, "learning_rate": 0.0005799235022109906, "loss": 3.4018, "step": 7169 }, { "epoch": 0.35, "grad_norm": 0.513592004776001, "learning_rate": 0.000579917964245302, "loss": 3.5992, "step": 7170 }, { "epoch": 0.35, "grad_norm": 0.5190378427505493, "learning_rate": 0.0005799124255423634, "loss": 3.3433, "step": 7171 }, { "epoch": 0.35, "grad_norm": 0.5345826148986816, "learning_rate": 0.0005799068861021895, "loss": 3.469, "step": 7172 }, { "epoch": 0.35, "grad_norm": 0.5253705382347107, "learning_rate": 0.0005799013459247946, "loss": 3.2886, "step": 7173 }, { "epoch": 0.35, "grad_norm": 0.5313470363616943, "learning_rate": 0.0005798958050101935, "loss": 3.2108, "step": 7174 }, { "epoch": 0.35, "grad_norm": 0.545667290687561, "learning_rate": 0.0005798902633584006, "loss": 3.5013, "step": 7175 }, { "epoch": 0.35, "grad_norm": 0.5079378485679626, "learning_rate": 0.0005798847209694308, "loss": 3.3649, "step": 7176 }, { "epoch": 0.35, "grad_norm": 0.555418074131012, "learning_rate": 0.0005798791778432984, "loss": 3.406, "step": 7177 }, { "epoch": 0.35, "grad_norm": 0.5127460956573486, "learning_rate": 0.0005798736339800181, "loss": 3.333, "step": 7178 }, { "epoch": 0.35, "grad_norm": 0.5296385288238525, "learning_rate": 0.0005798680893796045, "loss": 3.2033, "step": 7179 }, { "epoch": 0.35, "grad_norm": 0.5440773367881775, "learning_rate": 0.0005798625440420721, "loss": 3.533, "step": 7180 }, { "epoch": 0.35, "grad_norm": 0.5760077834129333, "learning_rate": 0.0005798569979674358, "loss": 3.3711, "step": 7181 }, { "epoch": 0.35, "grad_norm": 0.550422728061676, "learning_rate": 0.00057985145115571, "loss": 3.3078, "step": 7182 }, { "epoch": 0.35, "grad_norm": 0.5506702661514282, "learning_rate": 0.0005798459036069094, "loss": 3.2914, "step": 7183 }, { "epoch": 0.35, "grad_norm": 0.5331199169158936, "learning_rate": 0.0005798403553210484, "loss": 3.2955, "step": 7184 }, { "epoch": 0.35, "grad_norm": 0.5018191337585449, "learning_rate": 0.0005798348062981419, "loss": 3.2215, "step": 7185 }, { "epoch": 0.35, "grad_norm": 0.5439038872718811, "learning_rate": 0.0005798292565382042, "loss": 3.3194, "step": 7186 }, { "epoch": 0.35, "grad_norm": 0.5371338725090027, "learning_rate": 0.0005798237060412502, "loss": 3.4208, "step": 7187 }, { "epoch": 0.35, "grad_norm": 0.5246415138244629, "learning_rate": 0.0005798181548072943, "loss": 3.2842, "step": 7188 }, { "epoch": 0.35, "grad_norm": 0.519935131072998, "learning_rate": 0.0005798126028363514, "loss": 3.1714, "step": 7189 }, { "epoch": 0.35, "grad_norm": 0.5213209390640259, "learning_rate": 0.0005798070501284359, "loss": 3.1316, "step": 7190 }, { "epoch": 0.35, "grad_norm": 0.5461356043815613, "learning_rate": 0.0005798014966835625, "loss": 3.3031, "step": 7191 }, { "epoch": 0.35, "grad_norm": 0.5040096044540405, "learning_rate": 0.0005797959425017457, "loss": 3.3874, "step": 7192 }, { "epoch": 0.35, "grad_norm": 0.5159637331962585, "learning_rate": 0.0005797903875830004, "loss": 3.2251, "step": 7193 }, { "epoch": 0.35, "grad_norm": 0.5395895838737488, "learning_rate": 0.000579784831927341, "loss": 3.4672, "step": 7194 }, { "epoch": 0.35, "grad_norm": 0.5256237387657166, "learning_rate": 0.000579779275534782, "loss": 3.4937, "step": 7195 }, { "epoch": 0.35, "grad_norm": 0.5334175825119019, "learning_rate": 0.0005797737184053385, "loss": 3.2622, "step": 7196 }, { "epoch": 0.35, "grad_norm": 0.5539845824241638, "learning_rate": 0.0005797681605390248, "loss": 3.3592, "step": 7197 }, { "epoch": 0.35, "grad_norm": 0.5329002737998962, "learning_rate": 0.0005797626019358556, "loss": 3.2035, "step": 7198 }, { "epoch": 0.35, "grad_norm": 0.5471001267433167, "learning_rate": 0.0005797570425958454, "loss": 3.4235, "step": 7199 }, { "epoch": 0.35, "grad_norm": 0.5086562633514404, "learning_rate": 0.0005797514825190092, "loss": 3.2384, "step": 7200 }, { "epoch": 0.35, "grad_norm": 0.5643491744995117, "learning_rate": 0.0005797459217053613, "loss": 3.4946, "step": 7201 }, { "epoch": 0.35, "grad_norm": 0.5237635374069214, "learning_rate": 0.0005797403601549166, "loss": 3.3411, "step": 7202 }, { "epoch": 0.35, "grad_norm": 0.5038819909095764, "learning_rate": 0.0005797347978676895, "loss": 3.5791, "step": 7203 }, { "epoch": 0.35, "grad_norm": 0.49819880723953247, "learning_rate": 0.0005797292348436949, "loss": 3.4183, "step": 7204 }, { "epoch": 0.35, "grad_norm": 0.5045859813690186, "learning_rate": 0.0005797236710829473, "loss": 3.3744, "step": 7205 }, { "epoch": 0.35, "grad_norm": 0.5460410714149475, "learning_rate": 0.0005797181065854613, "loss": 3.3562, "step": 7206 }, { "epoch": 0.35, "grad_norm": 0.5919173955917358, "learning_rate": 0.0005797125413512517, "loss": 3.3013, "step": 7207 }, { "epoch": 0.35, "grad_norm": 0.5470105409622192, "learning_rate": 0.0005797069753803332, "loss": 3.2812, "step": 7208 }, { "epoch": 0.35, "grad_norm": 0.5546271204948425, "learning_rate": 0.0005797014086727201, "loss": 3.5578, "step": 7209 }, { "epoch": 0.35, "grad_norm": 0.5407612323760986, "learning_rate": 0.0005796958412284275, "loss": 3.5534, "step": 7210 }, { "epoch": 0.35, "grad_norm": 0.5482301115989685, "learning_rate": 0.0005796902730474698, "loss": 3.2728, "step": 7211 }, { "epoch": 0.35, "grad_norm": 0.5478767156600952, "learning_rate": 0.0005796847041298619, "loss": 3.1991, "step": 7212 }, { "epoch": 0.35, "grad_norm": 0.5303657054901123, "learning_rate": 0.0005796791344756182, "loss": 3.3981, "step": 7213 }, { "epoch": 0.35, "grad_norm": 0.5501422882080078, "learning_rate": 0.0005796735640847535, "loss": 3.2946, "step": 7214 }, { "epoch": 0.35, "grad_norm": 0.5230575203895569, "learning_rate": 0.0005796679929572826, "loss": 3.2646, "step": 7215 }, { "epoch": 0.35, "grad_norm": 0.5465124249458313, "learning_rate": 0.0005796624210932197, "loss": 3.0865, "step": 7216 }, { "epoch": 0.35, "grad_norm": 0.5589669346809387, "learning_rate": 0.00057965684849258, "loss": 3.1481, "step": 7217 }, { "epoch": 0.35, "grad_norm": 0.5898823142051697, "learning_rate": 0.000579651275155378, "loss": 3.25, "step": 7218 }, { "epoch": 0.35, "grad_norm": 0.5371426939964294, "learning_rate": 0.0005796457010816284, "loss": 3.1897, "step": 7219 }, { "epoch": 0.35, "grad_norm": 0.5486535429954529, "learning_rate": 0.0005796401262713457, "loss": 3.1218, "step": 7220 }, { "epoch": 0.35, "grad_norm": 0.5335965156555176, "learning_rate": 0.0005796345507245448, "loss": 3.2851, "step": 7221 }, { "epoch": 0.35, "grad_norm": 0.5595109462738037, "learning_rate": 0.0005796289744412404, "loss": 3.4765, "step": 7222 }, { "epoch": 0.35, "grad_norm": 0.5471145510673523, "learning_rate": 0.000579623397421447, "loss": 3.4348, "step": 7223 }, { "epoch": 0.35, "grad_norm": 0.5449061393737793, "learning_rate": 0.0005796178196651794, "loss": 3.2696, "step": 7224 }, { "epoch": 0.35, "grad_norm": 0.5270853042602539, "learning_rate": 0.0005796122411724523, "loss": 3.5372, "step": 7225 }, { "epoch": 0.35, "grad_norm": 0.5229194164276123, "learning_rate": 0.0005796066619432803, "loss": 3.3925, "step": 7226 }, { "epoch": 0.35, "grad_norm": 0.564228892326355, "learning_rate": 0.0005796010819776782, "loss": 3.325, "step": 7227 }, { "epoch": 0.35, "grad_norm": 0.55915766954422, "learning_rate": 0.0005795955012756607, "loss": 3.4594, "step": 7228 }, { "epoch": 0.35, "grad_norm": 0.513458251953125, "learning_rate": 0.0005795899198372423, "loss": 3.3471, "step": 7229 }, { "epoch": 0.35, "grad_norm": 0.5892640948295593, "learning_rate": 0.0005795843376624381, "loss": 3.2332, "step": 7230 }, { "epoch": 0.35, "grad_norm": 0.5395854115486145, "learning_rate": 0.0005795787547512624, "loss": 3.1295, "step": 7231 }, { "epoch": 0.35, "grad_norm": 0.5873109698295593, "learning_rate": 0.0005795731711037301, "loss": 3.2344, "step": 7232 }, { "epoch": 0.35, "grad_norm": 0.5463156700134277, "learning_rate": 0.0005795675867198559, "loss": 3.2807, "step": 7233 }, { "epoch": 0.35, "grad_norm": 0.5152543783187866, "learning_rate": 0.0005795620015996545, "loss": 3.3083, "step": 7234 }, { "epoch": 0.35, "grad_norm": 0.630656361579895, "learning_rate": 0.0005795564157431405, "loss": 3.5325, "step": 7235 }, { "epoch": 0.35, "grad_norm": 0.5535502433776855, "learning_rate": 0.0005795508291503288, "loss": 3.4074, "step": 7236 }, { "epoch": 0.35, "grad_norm": 0.5285792946815491, "learning_rate": 0.0005795452418212339, "loss": 3.3968, "step": 7237 }, { "epoch": 0.35, "grad_norm": 0.548895537853241, "learning_rate": 0.0005795396537558707, "loss": 3.3346, "step": 7238 }, { "epoch": 0.35, "grad_norm": 0.5185685157775879, "learning_rate": 0.0005795340649542539, "loss": 3.5812, "step": 7239 }, { "epoch": 0.35, "grad_norm": 0.5098907351493835, "learning_rate": 0.0005795284754163981, "loss": 3.3364, "step": 7240 }, { "epoch": 0.35, "grad_norm": 0.5207344889640808, "learning_rate": 0.0005795228851423182, "loss": 3.1888, "step": 7241 }, { "epoch": 0.35, "grad_norm": 0.5404914021492004, "learning_rate": 0.0005795172941320287, "loss": 3.4547, "step": 7242 }, { "epoch": 0.35, "grad_norm": 0.6177796125411987, "learning_rate": 0.0005795117023855446, "loss": 3.2094, "step": 7243 }, { "epoch": 0.36, "grad_norm": 0.5546385049819946, "learning_rate": 0.0005795061099028802, "loss": 3.4063, "step": 7244 }, { "epoch": 0.36, "grad_norm": 0.5809313058853149, "learning_rate": 0.0005795005166840507, "loss": 3.3204, "step": 7245 }, { "epoch": 0.36, "grad_norm": 0.6135513186454773, "learning_rate": 0.0005794949227290705, "loss": 3.2965, "step": 7246 }, { "epoch": 0.36, "grad_norm": 0.5403055548667908, "learning_rate": 0.0005794893280379546, "loss": 3.3973, "step": 7247 }, { "epoch": 0.36, "grad_norm": 0.5268582105636597, "learning_rate": 0.0005794837326107175, "loss": 3.4849, "step": 7248 }, { "epoch": 0.36, "grad_norm": 0.5323728919029236, "learning_rate": 0.0005794781364473741, "loss": 3.5033, "step": 7249 }, { "epoch": 0.36, "grad_norm": 0.5301493406295776, "learning_rate": 0.000579472539547939, "loss": 3.3231, "step": 7250 }, { "epoch": 0.36, "grad_norm": 0.5442898273468018, "learning_rate": 0.0005794669419124271, "loss": 3.1438, "step": 7251 }, { "epoch": 0.36, "grad_norm": 0.6318520903587341, "learning_rate": 0.0005794613435408531, "loss": 3.1192, "step": 7252 }, { "epoch": 0.36, "grad_norm": 0.5194680094718933, "learning_rate": 0.0005794557444332316, "loss": 3.3424, "step": 7253 }, { "epoch": 0.36, "grad_norm": 0.5083394646644592, "learning_rate": 0.0005794501445895774, "loss": 3.1855, "step": 7254 }, { "epoch": 0.36, "grad_norm": 0.512860119342804, "learning_rate": 0.0005794445440099054, "loss": 3.291, "step": 7255 }, { "epoch": 0.36, "grad_norm": 0.5411530137062073, "learning_rate": 0.0005794389426942302, "loss": 3.3483, "step": 7256 }, { "epoch": 0.36, "grad_norm": 0.5959362387657166, "learning_rate": 0.0005794333406425667, "loss": 3.4056, "step": 7257 }, { "epoch": 0.36, "grad_norm": 0.596243143081665, "learning_rate": 0.0005794277378549296, "loss": 3.4625, "step": 7258 }, { "epoch": 0.36, "grad_norm": 0.523287832736969, "learning_rate": 0.0005794221343313334, "loss": 3.4901, "step": 7259 }, { "epoch": 0.36, "grad_norm": 0.5272862315177917, "learning_rate": 0.0005794165300717932, "loss": 3.3664, "step": 7260 }, { "epoch": 0.36, "grad_norm": 0.5093209147453308, "learning_rate": 0.0005794109250763236, "loss": 3.3563, "step": 7261 }, { "epoch": 0.36, "grad_norm": 0.5650598406791687, "learning_rate": 0.0005794053193449394, "loss": 3.0961, "step": 7262 }, { "epoch": 0.36, "grad_norm": 0.5295237898826599, "learning_rate": 0.0005793997128776554, "loss": 3.4869, "step": 7263 }, { "epoch": 0.36, "grad_norm": 0.5343896150588989, "learning_rate": 0.0005793941056744863, "loss": 3.316, "step": 7264 }, { "epoch": 0.36, "grad_norm": 0.5440590977668762, "learning_rate": 0.000579388497735447, "loss": 3.2155, "step": 7265 }, { "epoch": 0.36, "grad_norm": 0.4939824938774109, "learning_rate": 0.000579382889060552, "loss": 3.3011, "step": 7266 }, { "epoch": 0.36, "grad_norm": 0.49455365538597107, "learning_rate": 0.0005793772796498163, "loss": 3.4065, "step": 7267 }, { "epoch": 0.36, "grad_norm": 0.5292657613754272, "learning_rate": 0.0005793716695032546, "loss": 3.4599, "step": 7268 }, { "epoch": 0.36, "grad_norm": 0.5179243683815002, "learning_rate": 0.0005793660586208818, "loss": 3.5574, "step": 7269 }, { "epoch": 0.36, "grad_norm": 0.5624231100082397, "learning_rate": 0.0005793604470027124, "loss": 3.2575, "step": 7270 }, { "epoch": 0.36, "grad_norm": 0.5110817551612854, "learning_rate": 0.0005793548346487614, "loss": 3.4008, "step": 7271 }, { "epoch": 0.36, "grad_norm": 0.5481024384498596, "learning_rate": 0.0005793492215590435, "loss": 3.1985, "step": 7272 }, { "epoch": 0.36, "grad_norm": 0.5263863205909729, "learning_rate": 0.0005793436077335736, "loss": 3.2327, "step": 7273 }, { "epoch": 0.36, "grad_norm": 0.5095751881599426, "learning_rate": 0.0005793379931723664, "loss": 3.5152, "step": 7274 }, { "epoch": 0.36, "grad_norm": 0.5422204732894897, "learning_rate": 0.0005793323778754367, "loss": 3.341, "step": 7275 }, { "epoch": 0.36, "grad_norm": 0.5029591917991638, "learning_rate": 0.0005793267618427991, "loss": 3.4303, "step": 7276 }, { "epoch": 0.36, "grad_norm": 0.5158606767654419, "learning_rate": 0.0005793211450744688, "loss": 3.4689, "step": 7277 }, { "epoch": 0.36, "grad_norm": 0.565763533115387, "learning_rate": 0.0005793155275704601, "loss": 3.2644, "step": 7278 }, { "epoch": 0.36, "grad_norm": 0.5302228331565857, "learning_rate": 0.0005793099093307883, "loss": 3.4085, "step": 7279 }, { "epoch": 0.36, "grad_norm": 0.5519428849220276, "learning_rate": 0.0005793042903554679, "loss": 3.349, "step": 7280 }, { "epoch": 0.36, "grad_norm": 0.5244438648223877, "learning_rate": 0.0005792986706445137, "loss": 3.2967, "step": 7281 }, { "epoch": 0.36, "grad_norm": 0.5649428367614746, "learning_rate": 0.0005792930501979406, "loss": 3.2556, "step": 7282 }, { "epoch": 0.36, "grad_norm": 0.512695848941803, "learning_rate": 0.0005792874290157633, "loss": 3.3095, "step": 7283 }, { "epoch": 0.36, "grad_norm": 0.5236402750015259, "learning_rate": 0.0005792818070979967, "loss": 3.4541, "step": 7284 }, { "epoch": 0.36, "grad_norm": 0.5257241725921631, "learning_rate": 0.0005792761844446555, "loss": 3.3157, "step": 7285 }, { "epoch": 0.36, "grad_norm": 0.5090532898902893, "learning_rate": 0.0005792705610557546, "loss": 3.4076, "step": 7286 }, { "epoch": 0.36, "grad_norm": 0.5210464000701904, "learning_rate": 0.0005792649369313088, "loss": 3.1769, "step": 7287 }, { "epoch": 0.36, "grad_norm": 0.5331592559814453, "learning_rate": 0.0005792593120713329, "loss": 3.4404, "step": 7288 }, { "epoch": 0.36, "grad_norm": 0.5243934988975525, "learning_rate": 0.0005792536864758418, "loss": 3.2593, "step": 7289 }, { "epoch": 0.36, "grad_norm": 0.5213882327079773, "learning_rate": 0.0005792480601448502, "loss": 3.3763, "step": 7290 }, { "epoch": 0.36, "grad_norm": 0.5196213722229004, "learning_rate": 0.0005792424330783729, "loss": 3.3627, "step": 7291 }, { "epoch": 0.36, "grad_norm": 0.5319061875343323, "learning_rate": 0.0005792368052764248, "loss": 3.3825, "step": 7292 }, { "epoch": 0.36, "grad_norm": 0.5190185308456421, "learning_rate": 0.0005792311767390207, "loss": 3.3911, "step": 7293 }, { "epoch": 0.36, "grad_norm": 0.5538293123245239, "learning_rate": 0.0005792255474661753, "loss": 3.5731, "step": 7294 }, { "epoch": 0.36, "grad_norm": 0.5294688940048218, "learning_rate": 0.0005792199174579038, "loss": 3.4866, "step": 7295 }, { "epoch": 0.36, "grad_norm": 0.5441796183586121, "learning_rate": 0.0005792142867142206, "loss": 3.4322, "step": 7296 }, { "epoch": 0.36, "grad_norm": 0.49350860714912415, "learning_rate": 0.0005792086552351407, "loss": 3.1275, "step": 7297 }, { "epoch": 0.36, "grad_norm": 0.569159746170044, "learning_rate": 0.0005792030230206789, "loss": 3.3663, "step": 7298 }, { "epoch": 0.36, "grad_norm": 0.5132843255996704, "learning_rate": 0.0005791973900708502, "loss": 3.1675, "step": 7299 }, { "epoch": 0.36, "grad_norm": 0.6056221723556519, "learning_rate": 0.0005791917563856692, "loss": 3.4195, "step": 7300 }, { "epoch": 0.36, "grad_norm": 0.532596230506897, "learning_rate": 0.000579186121965151, "loss": 3.183, "step": 7301 }, { "epoch": 0.36, "grad_norm": 0.5601078867912292, "learning_rate": 0.0005791804868093101, "loss": 3.228, "step": 7302 }, { "epoch": 0.36, "grad_norm": 0.5013242959976196, "learning_rate": 0.0005791748509181616, "loss": 3.269, "step": 7303 }, { "epoch": 0.36, "grad_norm": 0.5339405536651611, "learning_rate": 0.0005791692142917203, "loss": 3.416, "step": 7304 }, { "epoch": 0.36, "grad_norm": 0.5372835993766785, "learning_rate": 0.000579163576930001, "loss": 3.4051, "step": 7305 }, { "epoch": 0.36, "grad_norm": 0.5756890773773193, "learning_rate": 0.0005791579388330186, "loss": 3.5151, "step": 7306 }, { "epoch": 0.36, "grad_norm": 0.5298717617988586, "learning_rate": 0.0005791523000007878, "loss": 3.3441, "step": 7307 }, { "epoch": 0.36, "grad_norm": 0.5848795175552368, "learning_rate": 0.0005791466604333237, "loss": 3.4685, "step": 7308 }, { "epoch": 0.36, "grad_norm": 0.5432910323143005, "learning_rate": 0.0005791410201306409, "loss": 3.4025, "step": 7309 }, { "epoch": 0.36, "grad_norm": 0.5559453964233398, "learning_rate": 0.0005791353790927545, "loss": 3.3999, "step": 7310 }, { "epoch": 0.36, "grad_norm": 0.5492426753044128, "learning_rate": 0.0005791297373196791, "loss": 3.4373, "step": 7311 }, { "epoch": 0.36, "grad_norm": 0.5173240900039673, "learning_rate": 0.0005791240948114297, "loss": 3.2916, "step": 7312 }, { "epoch": 0.36, "grad_norm": 0.5640772581100464, "learning_rate": 0.0005791184515680213, "loss": 3.447, "step": 7313 }, { "epoch": 0.36, "grad_norm": 0.5233004093170166, "learning_rate": 0.0005791128075894685, "loss": 3.5497, "step": 7314 }, { "epoch": 0.36, "grad_norm": 0.5180065631866455, "learning_rate": 0.0005791071628757862, "loss": 3.3526, "step": 7315 }, { "epoch": 0.36, "grad_norm": 0.5473960041999817, "learning_rate": 0.0005791015174269895, "loss": 3.1132, "step": 7316 }, { "epoch": 0.36, "grad_norm": 0.5282230377197266, "learning_rate": 0.0005790958712430931, "loss": 3.4481, "step": 7317 }, { "epoch": 0.36, "grad_norm": 0.5316844582557678, "learning_rate": 0.0005790902243241119, "loss": 3.4213, "step": 7318 }, { "epoch": 0.36, "grad_norm": 0.5286957621574402, "learning_rate": 0.0005790845766700608, "loss": 3.5678, "step": 7319 }, { "epoch": 0.36, "grad_norm": 0.49466997385025024, "learning_rate": 0.0005790789282809545, "loss": 3.1566, "step": 7320 }, { "epoch": 0.36, "grad_norm": 0.5194963812828064, "learning_rate": 0.0005790732791568081, "loss": 3.2, "step": 7321 }, { "epoch": 0.36, "grad_norm": 0.5403347611427307, "learning_rate": 0.0005790676292976363, "loss": 3.5393, "step": 7322 }, { "epoch": 0.36, "grad_norm": 0.5521409511566162, "learning_rate": 0.0005790619787034542, "loss": 3.1492, "step": 7323 }, { "epoch": 0.36, "grad_norm": 0.5029959678649902, "learning_rate": 0.0005790563273742764, "loss": 3.2783, "step": 7324 }, { "epoch": 0.36, "grad_norm": 0.5743463039398193, "learning_rate": 0.0005790506753101181, "loss": 3.3343, "step": 7325 }, { "epoch": 0.36, "grad_norm": 0.5631399154663086, "learning_rate": 0.0005790450225109939, "loss": 3.4623, "step": 7326 }, { "epoch": 0.36, "grad_norm": 0.5515002608299255, "learning_rate": 0.0005790393689769188, "loss": 3.5293, "step": 7327 }, { "epoch": 0.36, "grad_norm": 0.5547970533370972, "learning_rate": 0.0005790337147079078, "loss": 3.3389, "step": 7328 }, { "epoch": 0.36, "grad_norm": 0.5851017832756042, "learning_rate": 0.0005790280597039757, "loss": 3.4403, "step": 7329 }, { "epoch": 0.36, "grad_norm": 0.5544925332069397, "learning_rate": 0.0005790224039651374, "loss": 3.4946, "step": 7330 }, { "epoch": 0.36, "grad_norm": 0.5337487459182739, "learning_rate": 0.0005790167474914077, "loss": 3.379, "step": 7331 }, { "epoch": 0.36, "grad_norm": 0.5269622206687927, "learning_rate": 0.0005790110902828017, "loss": 3.2976, "step": 7332 }, { "epoch": 0.36, "grad_norm": 0.5398240089416504, "learning_rate": 0.000579005432339334, "loss": 3.4284, "step": 7333 }, { "epoch": 0.36, "grad_norm": 0.5220947861671448, "learning_rate": 0.0005789997736610199, "loss": 3.2636, "step": 7334 }, { "epoch": 0.36, "grad_norm": 0.5519540309906006, "learning_rate": 0.000578994114247874, "loss": 3.1985, "step": 7335 }, { "epoch": 0.36, "grad_norm": 0.604718029499054, "learning_rate": 0.0005789884540999112, "loss": 3.5494, "step": 7336 }, { "epoch": 0.36, "grad_norm": 0.5395439863204956, "learning_rate": 0.0005789827932171466, "loss": 3.4658, "step": 7337 }, { "epoch": 0.36, "grad_norm": 0.5418725609779358, "learning_rate": 0.0005789771315995951, "loss": 3.4172, "step": 7338 }, { "epoch": 0.36, "grad_norm": 0.5764614343643188, "learning_rate": 0.0005789714692472714, "loss": 3.2426, "step": 7339 }, { "epoch": 0.36, "grad_norm": 0.5681049227714539, "learning_rate": 0.0005789658061601905, "loss": 3.3262, "step": 7340 }, { "epoch": 0.36, "grad_norm": 0.5404443144798279, "learning_rate": 0.0005789601423383675, "loss": 3.6112, "step": 7341 }, { "epoch": 0.36, "grad_norm": 0.5424375534057617, "learning_rate": 0.000578954477781817, "loss": 3.5564, "step": 7342 }, { "epoch": 0.36, "grad_norm": 0.5254672765731812, "learning_rate": 0.0005789488124905542, "loss": 3.271, "step": 7343 }, { "epoch": 0.36, "grad_norm": 0.5020593404769897, "learning_rate": 0.000578943146464594, "loss": 3.3267, "step": 7344 }, { "epoch": 0.36, "grad_norm": 0.5463967323303223, "learning_rate": 0.0005789374797039511, "loss": 3.4191, "step": 7345 }, { "epoch": 0.36, "grad_norm": 0.5019806623458862, "learning_rate": 0.0005789318122086406, "loss": 3.2696, "step": 7346 }, { "epoch": 0.36, "grad_norm": 0.5514928698539734, "learning_rate": 0.0005789261439786774, "loss": 3.1895, "step": 7347 }, { "epoch": 0.36, "grad_norm": 0.512698769569397, "learning_rate": 0.0005789204750140764, "loss": 3.4189, "step": 7348 }, { "epoch": 0.36, "grad_norm": 0.5055906772613525, "learning_rate": 0.0005789148053148525, "loss": 3.2841, "step": 7349 }, { "epoch": 0.36, "grad_norm": 0.527269721031189, "learning_rate": 0.0005789091348810208, "loss": 3.5163, "step": 7350 }, { "epoch": 0.36, "grad_norm": 0.5826881527900696, "learning_rate": 0.0005789034637125961, "loss": 3.4445, "step": 7351 }, { "epoch": 0.36, "grad_norm": 0.5765799283981323, "learning_rate": 0.0005788977918095932, "loss": 3.2363, "step": 7352 }, { "epoch": 0.36, "grad_norm": 0.5874758362770081, "learning_rate": 0.0005788921191720273, "loss": 3.5972, "step": 7353 }, { "epoch": 0.36, "grad_norm": 0.5443326234817505, "learning_rate": 0.0005788864457999133, "loss": 3.3818, "step": 7354 }, { "epoch": 0.36, "grad_norm": 0.5976613759994507, "learning_rate": 0.000578880771693266, "loss": 3.1729, "step": 7355 }, { "epoch": 0.36, "grad_norm": 0.571810781955719, "learning_rate": 0.0005788750968521003, "loss": 3.3752, "step": 7356 }, { "epoch": 0.36, "grad_norm": 0.5518274307250977, "learning_rate": 0.0005788694212764314, "loss": 3.3239, "step": 7357 }, { "epoch": 0.36, "grad_norm": 0.5681546330451965, "learning_rate": 0.0005788637449662742, "loss": 3.3544, "step": 7358 }, { "epoch": 0.36, "grad_norm": 0.5265949368476868, "learning_rate": 0.0005788580679216434, "loss": 3.3936, "step": 7359 }, { "epoch": 0.36, "grad_norm": 0.5548690557479858, "learning_rate": 0.0005788523901425541, "loss": 3.2901, "step": 7360 }, { "epoch": 0.36, "grad_norm": 0.5343828201293945, "learning_rate": 0.0005788467116290214, "loss": 3.2131, "step": 7361 }, { "epoch": 0.36, "grad_norm": 0.5076448321342468, "learning_rate": 0.0005788410323810602, "loss": 3.3964, "step": 7362 }, { "epoch": 0.36, "grad_norm": 0.49037474393844604, "learning_rate": 0.0005788353523986852, "loss": 3.2415, "step": 7363 }, { "epoch": 0.36, "grad_norm": 0.5263360142707825, "learning_rate": 0.0005788296716819116, "loss": 3.4041, "step": 7364 }, { "epoch": 0.36, "grad_norm": 0.5553779006004333, "learning_rate": 0.0005788239902307543, "loss": 3.3971, "step": 7365 }, { "epoch": 0.36, "grad_norm": 0.5806408524513245, "learning_rate": 0.0005788183080452283, "loss": 3.1735, "step": 7366 }, { "epoch": 0.36, "grad_norm": 0.5436879396438599, "learning_rate": 0.0005788126251253486, "loss": 3.5634, "step": 7367 }, { "epoch": 0.36, "grad_norm": 0.5230838656425476, "learning_rate": 0.00057880694147113, "loss": 3.2974, "step": 7368 }, { "epoch": 0.36, "grad_norm": 0.630568265914917, "learning_rate": 0.0005788012570825875, "loss": 3.2782, "step": 7369 }, { "epoch": 0.36, "grad_norm": 0.5829651951789856, "learning_rate": 0.0005787955719597362, "loss": 3.0793, "step": 7370 }, { "epoch": 0.36, "grad_norm": 0.5027291774749756, "learning_rate": 0.0005787898861025912, "loss": 3.4173, "step": 7371 }, { "epoch": 0.36, "grad_norm": 0.5039538145065308, "learning_rate": 0.0005787841995111671, "loss": 3.2034, "step": 7372 }, { "epoch": 0.36, "grad_norm": 0.5734065175056458, "learning_rate": 0.0005787785121854791, "loss": 3.459, "step": 7373 }, { "epoch": 0.36, "grad_norm": 0.5559008717536926, "learning_rate": 0.0005787728241255422, "loss": 3.1911, "step": 7374 }, { "epoch": 0.36, "grad_norm": 0.5591650605201721, "learning_rate": 0.0005787671353313712, "loss": 3.1949, "step": 7375 }, { "epoch": 0.36, "grad_norm": 0.52269047498703, "learning_rate": 0.0005787614458029813, "loss": 3.3678, "step": 7376 }, { "epoch": 0.36, "grad_norm": 0.5631672143936157, "learning_rate": 0.0005787557555403875, "loss": 3.3729, "step": 7377 }, { "epoch": 0.36, "grad_norm": 0.5107264518737793, "learning_rate": 0.0005787500645436047, "loss": 3.3954, "step": 7378 }, { "epoch": 0.36, "grad_norm": 0.5732921361923218, "learning_rate": 0.0005787443728126478, "loss": 3.4472, "step": 7379 }, { "epoch": 0.36, "grad_norm": 0.5226930975914001, "learning_rate": 0.0005787386803475318, "loss": 3.4802, "step": 7380 }, { "epoch": 0.36, "grad_norm": 0.5663575530052185, "learning_rate": 0.0005787329871482717, "loss": 3.3462, "step": 7381 }, { "epoch": 0.36, "grad_norm": 0.5493120551109314, "learning_rate": 0.0005787272932148827, "loss": 3.2889, "step": 7382 }, { "epoch": 0.36, "grad_norm": 0.5059551000595093, "learning_rate": 0.0005787215985473797, "loss": 3.3501, "step": 7383 }, { "epoch": 0.36, "grad_norm": 0.5528244972229004, "learning_rate": 0.0005787159031457776, "loss": 3.3196, "step": 7384 }, { "epoch": 0.36, "grad_norm": 0.519409716129303, "learning_rate": 0.0005787102070100914, "loss": 3.2856, "step": 7385 }, { "epoch": 0.36, "grad_norm": 0.5072803497314453, "learning_rate": 0.0005787045101403362, "loss": 3.4766, "step": 7386 }, { "epoch": 0.36, "grad_norm": 0.5262261033058167, "learning_rate": 0.0005786988125365269, "loss": 3.3746, "step": 7387 }, { "epoch": 0.36, "grad_norm": 0.5350049734115601, "learning_rate": 0.0005786931141986786, "loss": 3.2329, "step": 7388 }, { "epoch": 0.36, "grad_norm": 0.5463743805885315, "learning_rate": 0.0005786874151268062, "loss": 3.2722, "step": 7389 }, { "epoch": 0.36, "grad_norm": 0.5149025321006775, "learning_rate": 0.0005786817153209249, "loss": 3.243, "step": 7390 }, { "epoch": 0.36, "grad_norm": 0.5367695093154907, "learning_rate": 0.0005786760147810496, "loss": 3.4026, "step": 7391 }, { "epoch": 0.36, "grad_norm": 0.5625688433647156, "learning_rate": 0.0005786703135071952, "loss": 3.3234, "step": 7392 }, { "epoch": 0.36, "grad_norm": 0.5635698437690735, "learning_rate": 0.000578664611499377, "loss": 3.3182, "step": 7393 }, { "epoch": 0.36, "grad_norm": 0.5578081607818604, "learning_rate": 0.0005786589087576097, "loss": 3.1293, "step": 7394 }, { "epoch": 0.36, "grad_norm": 0.5266342163085938, "learning_rate": 0.0005786532052819085, "loss": 3.4332, "step": 7395 }, { "epoch": 0.36, "grad_norm": 0.524929940700531, "learning_rate": 0.0005786475010722883, "loss": 3.1655, "step": 7396 }, { "epoch": 0.36, "grad_norm": 0.521431028842926, "learning_rate": 0.0005786417961287643, "loss": 3.2723, "step": 7397 }, { "epoch": 0.36, "grad_norm": 0.5528025031089783, "learning_rate": 0.0005786360904513515, "loss": 3.495, "step": 7398 }, { "epoch": 0.36, "grad_norm": 0.47876498103141785, "learning_rate": 0.0005786303840400647, "loss": 3.2928, "step": 7399 }, { "epoch": 0.36, "grad_norm": 0.5592080354690552, "learning_rate": 0.0005786246768949193, "loss": 3.1918, "step": 7400 }, { "epoch": 0.36, "grad_norm": 0.5398855209350586, "learning_rate": 0.0005786189690159299, "loss": 3.3853, "step": 7401 }, { "epoch": 0.36, "grad_norm": 0.5718713402748108, "learning_rate": 0.0005786132604031119, "loss": 3.3739, "step": 7402 }, { "epoch": 0.36, "grad_norm": 0.5837875604629517, "learning_rate": 0.0005786075510564801, "loss": 3.3774, "step": 7403 }, { "epoch": 0.36, "grad_norm": 0.5221856236457825, "learning_rate": 0.0005786018409760497, "loss": 3.4436, "step": 7404 }, { "epoch": 0.36, "grad_norm": 0.5874915719032288, "learning_rate": 0.0005785961301618356, "loss": 3.3879, "step": 7405 }, { "epoch": 0.36, "grad_norm": 0.5608260035514832, "learning_rate": 0.000578590418613853, "loss": 3.3331, "step": 7406 }, { "epoch": 0.36, "grad_norm": 0.5786241292953491, "learning_rate": 0.0005785847063321168, "loss": 3.2905, "step": 7407 }, { "epoch": 0.36, "grad_norm": 0.5414553284645081, "learning_rate": 0.0005785789933166422, "loss": 3.2087, "step": 7408 }, { "epoch": 0.36, "grad_norm": 0.5226951837539673, "learning_rate": 0.000578573279567444, "loss": 3.4015, "step": 7409 }, { "epoch": 0.36, "grad_norm": 0.5430212616920471, "learning_rate": 0.0005785675650845374, "loss": 3.208, "step": 7410 }, { "epoch": 0.36, "grad_norm": 0.5425985455513, "learning_rate": 0.0005785618498679375, "loss": 3.3109, "step": 7411 }, { "epoch": 0.36, "grad_norm": 0.5451919436454773, "learning_rate": 0.0005785561339176593, "loss": 3.1937, "step": 7412 }, { "epoch": 0.36, "grad_norm": 0.5355823636054993, "learning_rate": 0.0005785504172337178, "loss": 3.2868, "step": 7413 }, { "epoch": 0.36, "grad_norm": 0.4789738059043884, "learning_rate": 0.0005785446998161282, "loss": 3.4312, "step": 7414 }, { "epoch": 0.36, "grad_norm": 0.5494072437286377, "learning_rate": 0.0005785389816649054, "loss": 3.481, "step": 7415 }, { "epoch": 0.36, "grad_norm": 0.529240608215332, "learning_rate": 0.0005785332627800645, "loss": 3.231, "step": 7416 }, { "epoch": 0.36, "grad_norm": 0.5651838779449463, "learning_rate": 0.0005785275431616207, "loss": 3.3466, "step": 7417 }, { "epoch": 0.36, "grad_norm": 0.5189810395240784, "learning_rate": 0.0005785218228095889, "loss": 3.2454, "step": 7418 }, { "epoch": 0.36, "grad_norm": 0.5465260744094849, "learning_rate": 0.0005785161017239842, "loss": 3.3845, "step": 7419 }, { "epoch": 0.36, "grad_norm": 0.5511559844017029, "learning_rate": 0.0005785103799048218, "loss": 3.4355, "step": 7420 }, { "epoch": 0.36, "grad_norm": 0.5183231830596924, "learning_rate": 0.0005785046573521165, "loss": 3.3434, "step": 7421 }, { "epoch": 0.36, "grad_norm": 0.5259350538253784, "learning_rate": 0.0005784989340658837, "loss": 3.3595, "step": 7422 }, { "epoch": 0.36, "grad_norm": 0.5619202852249146, "learning_rate": 0.000578493210046138, "loss": 3.4631, "step": 7423 }, { "epoch": 0.36, "grad_norm": 0.5505042672157288, "learning_rate": 0.000578487485292895, "loss": 3.3808, "step": 7424 }, { "epoch": 0.36, "grad_norm": 0.5139535665512085, "learning_rate": 0.0005784817598061696, "loss": 3.2009, "step": 7425 }, { "epoch": 0.36, "grad_norm": 0.580228328704834, "learning_rate": 0.0005784760335859769, "loss": 3.1475, "step": 7426 }, { "epoch": 0.36, "grad_norm": 0.535790205001831, "learning_rate": 0.0005784703066323317, "loss": 3.2759, "step": 7427 }, { "epoch": 0.36, "grad_norm": 0.5042516589164734, "learning_rate": 0.0005784645789452494, "loss": 3.1972, "step": 7428 }, { "epoch": 0.36, "grad_norm": 0.5909183025360107, "learning_rate": 0.000578458850524745, "loss": 3.3301, "step": 7429 }, { "epoch": 0.36, "grad_norm": 0.5318119525909424, "learning_rate": 0.0005784531213708336, "loss": 3.4631, "step": 7430 }, { "epoch": 0.36, "grad_norm": 0.4836869537830353, "learning_rate": 0.0005784473914835302, "loss": 3.5986, "step": 7431 }, { "epoch": 0.36, "grad_norm": 0.5234432816505432, "learning_rate": 0.0005784416608628501, "loss": 3.1308, "step": 7432 }, { "epoch": 0.36, "grad_norm": 0.6105952858924866, "learning_rate": 0.0005784359295088081, "loss": 3.3292, "step": 7433 }, { "epoch": 0.36, "grad_norm": 0.600591242313385, "learning_rate": 0.0005784301974214195, "loss": 3.3737, "step": 7434 }, { "epoch": 0.36, "grad_norm": 0.48450472950935364, "learning_rate": 0.0005784244646006993, "loss": 3.2875, "step": 7435 }, { "epoch": 0.36, "grad_norm": 0.6669772863388062, "learning_rate": 0.0005784187310466628, "loss": 3.239, "step": 7436 }, { "epoch": 0.36, "grad_norm": 0.5766210556030273, "learning_rate": 0.0005784129967593249, "loss": 3.2816, "step": 7437 }, { "epoch": 0.36, "grad_norm": 0.5472173690795898, "learning_rate": 0.0005784072617387006, "loss": 3.3769, "step": 7438 }, { "epoch": 0.36, "grad_norm": 0.5345730185508728, "learning_rate": 0.0005784015259848053, "loss": 3.4177, "step": 7439 }, { "epoch": 0.36, "grad_norm": 0.5472462177276611, "learning_rate": 0.0005783957894976538, "loss": 3.3872, "step": 7440 }, { "epoch": 0.36, "grad_norm": 0.5371647477149963, "learning_rate": 0.0005783900522772615, "loss": 3.4841, "step": 7441 }, { "epoch": 0.36, "grad_norm": 0.5290404558181763, "learning_rate": 0.0005783843143236433, "loss": 3.2963, "step": 7442 }, { "epoch": 0.36, "grad_norm": 0.5215094685554504, "learning_rate": 0.0005783785756368145, "loss": 3.3373, "step": 7443 }, { "epoch": 0.36, "grad_norm": 0.61955726146698, "learning_rate": 0.0005783728362167901, "loss": 3.3818, "step": 7444 }, { "epoch": 0.36, "grad_norm": 0.5387982130050659, "learning_rate": 0.0005783670960635851, "loss": 3.3528, "step": 7445 }, { "epoch": 0.36, "grad_norm": 0.553604245185852, "learning_rate": 0.0005783613551772149, "loss": 3.3234, "step": 7446 }, { "epoch": 0.36, "grad_norm": 0.5161128640174866, "learning_rate": 0.0005783556135576942, "loss": 3.2385, "step": 7447 }, { "epoch": 0.37, "grad_norm": 0.5468826293945312, "learning_rate": 0.0005783498712050386, "loss": 3.3768, "step": 7448 }, { "epoch": 0.37, "grad_norm": 0.5205458402633667, "learning_rate": 0.000578344128119263, "loss": 3.4234, "step": 7449 }, { "epoch": 0.37, "grad_norm": 0.5659615397453308, "learning_rate": 0.0005783383843003825, "loss": 3.2642, "step": 7450 }, { "epoch": 0.37, "grad_norm": 0.6167935132980347, "learning_rate": 0.0005783326397484123, "loss": 3.4605, "step": 7451 }, { "epoch": 0.37, "grad_norm": 0.5576913356781006, "learning_rate": 0.0005783268944633675, "loss": 3.4278, "step": 7452 }, { "epoch": 0.37, "grad_norm": 0.5447071194648743, "learning_rate": 0.0005783211484452632, "loss": 3.2907, "step": 7453 }, { "epoch": 0.37, "grad_norm": 0.49073418974876404, "learning_rate": 0.0005783154016941145, "loss": 3.3534, "step": 7454 }, { "epoch": 0.37, "grad_norm": 0.5985206365585327, "learning_rate": 0.0005783096542099366, "loss": 3.1996, "step": 7455 }, { "epoch": 0.37, "grad_norm": 0.5504509806632996, "learning_rate": 0.0005783039059927448, "loss": 3.3993, "step": 7456 }, { "epoch": 0.37, "grad_norm": 0.5634692907333374, "learning_rate": 0.0005782981570425539, "loss": 3.3579, "step": 7457 }, { "epoch": 0.37, "grad_norm": 0.49616754055023193, "learning_rate": 0.0005782924073593792, "loss": 3.3781, "step": 7458 }, { "epoch": 0.37, "grad_norm": 0.533212423324585, "learning_rate": 0.0005782866569432359, "loss": 3.1977, "step": 7459 }, { "epoch": 0.37, "grad_norm": 0.5390735864639282, "learning_rate": 0.0005782809057941392, "loss": 3.358, "step": 7460 }, { "epoch": 0.37, "grad_norm": 0.5744284391403198, "learning_rate": 0.000578275153912104, "loss": 3.3979, "step": 7461 }, { "epoch": 0.37, "grad_norm": 0.5401898622512817, "learning_rate": 0.0005782694012971458, "loss": 3.557, "step": 7462 }, { "epoch": 0.37, "grad_norm": 0.5606440305709839, "learning_rate": 0.0005782636479492793, "loss": 3.2477, "step": 7463 }, { "epoch": 0.37, "grad_norm": 0.48706990480422974, "learning_rate": 0.0005782578938685201, "loss": 3.5734, "step": 7464 }, { "epoch": 0.37, "grad_norm": 0.5521231293678284, "learning_rate": 0.0005782521390548831, "loss": 3.3121, "step": 7465 }, { "epoch": 0.37, "grad_norm": 0.5380448698997498, "learning_rate": 0.0005782463835083834, "loss": 3.4478, "step": 7466 }, { "epoch": 0.37, "grad_norm": 0.5551396608352661, "learning_rate": 0.0005782406272290364, "loss": 3.2938, "step": 7467 }, { "epoch": 0.37, "grad_norm": 0.5721957683563232, "learning_rate": 0.0005782348702168572, "loss": 3.3066, "step": 7468 }, { "epoch": 0.37, "grad_norm": 0.5749885439872742, "learning_rate": 0.0005782291124718608, "loss": 3.3865, "step": 7469 }, { "epoch": 0.37, "grad_norm": 0.5412680506706238, "learning_rate": 0.0005782233539940625, "loss": 3.1974, "step": 7470 }, { "epoch": 0.37, "grad_norm": 0.530744731426239, "learning_rate": 0.0005782175947834774, "loss": 3.42, "step": 7471 }, { "epoch": 0.37, "grad_norm": 0.5663665533065796, "learning_rate": 0.0005782118348401207, "loss": 3.4735, "step": 7472 }, { "epoch": 0.37, "grad_norm": 0.5773357152938843, "learning_rate": 0.0005782060741640075, "loss": 3.287, "step": 7473 }, { "epoch": 0.37, "grad_norm": 0.5321745872497559, "learning_rate": 0.000578200312755153, "loss": 3.2528, "step": 7474 }, { "epoch": 0.37, "grad_norm": 0.543752908706665, "learning_rate": 0.0005781945506135726, "loss": 3.1304, "step": 7475 }, { "epoch": 0.37, "grad_norm": 0.5263063311576843, "learning_rate": 0.0005781887877392812, "loss": 3.5356, "step": 7476 }, { "epoch": 0.37, "grad_norm": 0.5132307410240173, "learning_rate": 0.0005781830241322942, "loss": 3.0732, "step": 7477 }, { "epoch": 0.37, "grad_norm": 0.5528070330619812, "learning_rate": 0.0005781772597926266, "loss": 3.5516, "step": 7478 }, { "epoch": 0.37, "grad_norm": 0.5620551109313965, "learning_rate": 0.0005781714947202935, "loss": 3.5392, "step": 7479 }, { "epoch": 0.37, "grad_norm": 0.5856842398643494, "learning_rate": 0.0005781657289153103, "loss": 3.4985, "step": 7480 }, { "epoch": 0.37, "grad_norm": 0.49245211482048035, "learning_rate": 0.0005781599623776922, "loss": 3.2934, "step": 7481 }, { "epoch": 0.37, "grad_norm": 0.5112742781639099, "learning_rate": 0.0005781541951074541, "loss": 3.39, "step": 7482 }, { "epoch": 0.37, "grad_norm": 0.567556619644165, "learning_rate": 0.0005781484271046115, "loss": 3.3748, "step": 7483 }, { "epoch": 0.37, "grad_norm": 0.5500913262367249, "learning_rate": 0.0005781426583691794, "loss": 3.1794, "step": 7484 }, { "epoch": 0.37, "grad_norm": 0.588733434677124, "learning_rate": 0.0005781368889011731, "loss": 3.3915, "step": 7485 }, { "epoch": 0.37, "grad_norm": 0.5132182240486145, "learning_rate": 0.0005781311187006079, "loss": 3.2548, "step": 7486 }, { "epoch": 0.37, "grad_norm": 0.5105894804000854, "learning_rate": 0.0005781253477674987, "loss": 3.4658, "step": 7487 }, { "epoch": 0.37, "grad_norm": 0.5359848737716675, "learning_rate": 0.0005781195761018609, "loss": 3.2688, "step": 7488 }, { "epoch": 0.37, "grad_norm": 0.5214976072311401, "learning_rate": 0.0005781138037037096, "loss": 3.3225, "step": 7489 }, { "epoch": 0.37, "grad_norm": 0.5228166580200195, "learning_rate": 0.0005781080305730603, "loss": 3.2782, "step": 7490 }, { "epoch": 0.37, "grad_norm": 0.5464044809341431, "learning_rate": 0.0005781022567099277, "loss": 3.2469, "step": 7491 }, { "epoch": 0.37, "grad_norm": 0.5573117733001709, "learning_rate": 0.0005780964821143274, "loss": 3.0187, "step": 7492 }, { "epoch": 0.37, "grad_norm": 0.5702837109565735, "learning_rate": 0.0005780907067862744, "loss": 3.2428, "step": 7493 }, { "epoch": 0.37, "grad_norm": 0.6031702160835266, "learning_rate": 0.0005780849307257842, "loss": 3.4579, "step": 7494 }, { "epoch": 0.37, "grad_norm": 0.5179644823074341, "learning_rate": 0.0005780791539328716, "loss": 3.1622, "step": 7495 }, { "epoch": 0.37, "grad_norm": 0.5007571578025818, "learning_rate": 0.0005780733764075521, "loss": 3.4954, "step": 7496 }, { "epoch": 0.37, "grad_norm": 0.5171201825141907, "learning_rate": 0.0005780675981498409, "loss": 3.379, "step": 7497 }, { "epoch": 0.37, "grad_norm": 0.5398955941200256, "learning_rate": 0.0005780618191597531, "loss": 3.3197, "step": 7498 }, { "epoch": 0.37, "grad_norm": 0.5368615984916687, "learning_rate": 0.000578056039437304, "loss": 3.5917, "step": 7499 }, { "epoch": 0.37, "grad_norm": 0.5254148244857788, "learning_rate": 0.0005780502589825087, "loss": 3.3423, "step": 7500 }, { "epoch": 0.37, "grad_norm": 0.49917274713516235, "learning_rate": 0.0005780444777953827, "loss": 3.3729, "step": 7501 }, { "epoch": 0.37, "grad_norm": 0.579575777053833, "learning_rate": 0.000578038695875941, "loss": 3.1839, "step": 7502 }, { "epoch": 0.37, "grad_norm": 0.5500216484069824, "learning_rate": 0.0005780329132241989, "loss": 3.4427, "step": 7503 }, { "epoch": 0.37, "grad_norm": 0.5117226243019104, "learning_rate": 0.0005780271298401715, "loss": 3.043, "step": 7504 }, { "epoch": 0.37, "grad_norm": 0.57242751121521, "learning_rate": 0.0005780213457238743, "loss": 3.2203, "step": 7505 }, { "epoch": 0.37, "grad_norm": 0.538989245891571, "learning_rate": 0.0005780155608753223, "loss": 3.4584, "step": 7506 }, { "epoch": 0.37, "grad_norm": 0.5599838495254517, "learning_rate": 0.0005780097752945307, "loss": 3.4769, "step": 7507 }, { "epoch": 0.37, "grad_norm": 0.5817453265190125, "learning_rate": 0.0005780039889815151, "loss": 3.4676, "step": 7508 }, { "epoch": 0.37, "grad_norm": 0.5717339515686035, "learning_rate": 0.0005779982019362903, "loss": 3.1948, "step": 7509 }, { "epoch": 0.37, "grad_norm": 0.5515158772468567, "learning_rate": 0.0005779924141588718, "loss": 3.3845, "step": 7510 }, { "epoch": 0.37, "grad_norm": 0.5656583905220032, "learning_rate": 0.0005779866256492749, "loss": 3.3043, "step": 7511 }, { "epoch": 0.37, "grad_norm": 0.5461367964744568, "learning_rate": 0.0005779808364075146, "loss": 3.2265, "step": 7512 }, { "epoch": 0.37, "grad_norm": 0.5367287397384644, "learning_rate": 0.0005779750464336062, "loss": 3.3777, "step": 7513 }, { "epoch": 0.37, "grad_norm": 0.5329413414001465, "learning_rate": 0.0005779692557275651, "loss": 3.3883, "step": 7514 }, { "epoch": 0.37, "grad_norm": 0.5147910714149475, "learning_rate": 0.0005779634642894066, "loss": 3.1705, "step": 7515 }, { "epoch": 0.37, "grad_norm": 0.5768789052963257, "learning_rate": 0.0005779576721191457, "loss": 3.2288, "step": 7516 }, { "epoch": 0.37, "grad_norm": 0.5086293816566467, "learning_rate": 0.0005779518792167978, "loss": 3.3029, "step": 7517 }, { "epoch": 0.37, "grad_norm": 0.5314181447029114, "learning_rate": 0.0005779460855823782, "loss": 3.5192, "step": 7518 }, { "epoch": 0.37, "grad_norm": 0.5063535571098328, "learning_rate": 0.0005779402912159021, "loss": 3.3481, "step": 7519 }, { "epoch": 0.37, "grad_norm": 0.5236873030662537, "learning_rate": 0.0005779344961173847, "loss": 3.3957, "step": 7520 }, { "epoch": 0.37, "grad_norm": 0.5629249215126038, "learning_rate": 0.0005779287002868413, "loss": 3.2262, "step": 7521 }, { "epoch": 0.37, "grad_norm": 0.6165677905082703, "learning_rate": 0.0005779229037242873, "loss": 3.184, "step": 7522 }, { "epoch": 0.37, "grad_norm": 0.5311621427536011, "learning_rate": 0.0005779171064297378, "loss": 3.3883, "step": 7523 }, { "epoch": 0.37, "grad_norm": 0.5682266354560852, "learning_rate": 0.0005779113084032082, "loss": 3.4386, "step": 7524 }, { "epoch": 0.37, "grad_norm": 0.5127180814743042, "learning_rate": 0.0005779055096447136, "loss": 3.3266, "step": 7525 }, { "epoch": 0.37, "grad_norm": 0.49226856231689453, "learning_rate": 0.0005778997101542694, "loss": 3.3149, "step": 7526 }, { "epoch": 0.37, "grad_norm": 0.5309831500053406, "learning_rate": 0.0005778939099318908, "loss": 3.2658, "step": 7527 }, { "epoch": 0.37, "grad_norm": 0.5468692779541016, "learning_rate": 0.0005778881089775933, "loss": 3.3354, "step": 7528 }, { "epoch": 0.37, "grad_norm": 0.5492680072784424, "learning_rate": 0.0005778823072913918, "loss": 3.3651, "step": 7529 }, { "epoch": 0.37, "grad_norm": 0.5209020972251892, "learning_rate": 0.0005778765048733019, "loss": 3.3828, "step": 7530 }, { "epoch": 0.37, "grad_norm": 0.5251026153564453, "learning_rate": 0.0005778707017233387, "loss": 3.4814, "step": 7531 }, { "epoch": 0.37, "grad_norm": 0.5343338847160339, "learning_rate": 0.0005778648978415176, "loss": 3.3883, "step": 7532 }, { "epoch": 0.37, "grad_norm": 0.5628687143325806, "learning_rate": 0.0005778590932278537, "loss": 3.4574, "step": 7533 }, { "epoch": 0.37, "grad_norm": 0.5103763341903687, "learning_rate": 0.0005778532878823625, "loss": 3.1286, "step": 7534 }, { "epoch": 0.37, "grad_norm": 0.5265288949012756, "learning_rate": 0.0005778474818050593, "loss": 3.2203, "step": 7535 }, { "epoch": 0.37, "grad_norm": 0.5499712824821472, "learning_rate": 0.0005778416749959592, "loss": 3.1883, "step": 7536 }, { "epoch": 0.37, "grad_norm": 0.5209925174713135, "learning_rate": 0.0005778358674550778, "loss": 3.2346, "step": 7537 }, { "epoch": 0.37, "grad_norm": 0.5306970477104187, "learning_rate": 0.00057783005918243, "loss": 3.3883, "step": 7538 }, { "epoch": 0.37, "grad_norm": 0.506366491317749, "learning_rate": 0.0005778242501780313, "loss": 3.2689, "step": 7539 }, { "epoch": 0.37, "grad_norm": 0.5261942744255066, "learning_rate": 0.0005778184404418971, "loss": 3.5414, "step": 7540 }, { "epoch": 0.37, "grad_norm": 0.539635181427002, "learning_rate": 0.0005778126299740425, "loss": 3.2619, "step": 7541 }, { "epoch": 0.37, "grad_norm": 0.5381528735160828, "learning_rate": 0.0005778068187744829, "loss": 3.2378, "step": 7542 }, { "epoch": 0.37, "grad_norm": 0.5434399843215942, "learning_rate": 0.0005778010068432336, "loss": 3.1842, "step": 7543 }, { "epoch": 0.37, "grad_norm": 0.5563649535179138, "learning_rate": 0.0005777951941803099, "loss": 3.3198, "step": 7544 }, { "epoch": 0.37, "grad_norm": 0.5185420513153076, "learning_rate": 0.0005777893807857273, "loss": 3.3414, "step": 7545 }, { "epoch": 0.37, "grad_norm": 0.5348244905471802, "learning_rate": 0.0005777835666595007, "loss": 3.2746, "step": 7546 }, { "epoch": 0.37, "grad_norm": 0.5372639894485474, "learning_rate": 0.0005777777518016458, "loss": 3.2459, "step": 7547 }, { "epoch": 0.37, "grad_norm": 0.5764162540435791, "learning_rate": 0.0005777719362121777, "loss": 3.3808, "step": 7548 }, { "epoch": 0.37, "grad_norm": 0.533909022808075, "learning_rate": 0.0005777661198911118, "loss": 3.4382, "step": 7549 }, { "epoch": 0.37, "grad_norm": 0.5675473809242249, "learning_rate": 0.0005777603028384634, "loss": 3.1793, "step": 7550 }, { "epoch": 0.37, "grad_norm": 0.5419423580169678, "learning_rate": 0.0005777544850542477, "loss": 3.309, "step": 7551 }, { "epoch": 0.37, "grad_norm": 0.5336681008338928, "learning_rate": 0.0005777486665384802, "loss": 3.4056, "step": 7552 }, { "epoch": 0.37, "grad_norm": 0.5690992474555969, "learning_rate": 0.0005777428472911763, "loss": 3.4831, "step": 7553 }, { "epoch": 0.37, "grad_norm": 0.5622754693031311, "learning_rate": 0.000577737027312351, "loss": 3.3529, "step": 7554 }, { "epoch": 0.37, "grad_norm": 0.5477057695388794, "learning_rate": 0.0005777312066020199, "loss": 3.4004, "step": 7555 }, { "epoch": 0.37, "grad_norm": 0.5189183950424194, "learning_rate": 0.0005777253851601984, "loss": 3.319, "step": 7556 }, { "epoch": 0.37, "grad_norm": 0.5486992001533508, "learning_rate": 0.0005777195629869015, "loss": 3.3856, "step": 7557 }, { "epoch": 0.37, "grad_norm": 0.5244187712669373, "learning_rate": 0.0005777137400821448, "loss": 3.2891, "step": 7558 }, { "epoch": 0.37, "grad_norm": 0.5623378157615662, "learning_rate": 0.0005777079164459436, "loss": 3.3083, "step": 7559 }, { "epoch": 0.37, "grad_norm": 0.8148874044418335, "learning_rate": 0.000577702092078313, "loss": 3.3492, "step": 7560 }, { "epoch": 0.37, "grad_norm": 0.5145521759986877, "learning_rate": 0.0005776962669792687, "loss": 3.2571, "step": 7561 }, { "epoch": 0.37, "grad_norm": 0.5764961242675781, "learning_rate": 0.0005776904411488259, "loss": 3.4779, "step": 7562 }, { "epoch": 0.37, "grad_norm": 0.518203616142273, "learning_rate": 0.0005776846145869997, "loss": 3.4448, "step": 7563 }, { "epoch": 0.37, "grad_norm": 0.6169568300247192, "learning_rate": 0.0005776787872938059, "loss": 3.3586, "step": 7564 }, { "epoch": 0.37, "grad_norm": 0.47566235065460205, "learning_rate": 0.0005776729592692596, "loss": 3.5285, "step": 7565 }, { "epoch": 0.37, "grad_norm": 0.5244961380958557, "learning_rate": 0.0005776671305133761, "loss": 3.3522, "step": 7566 }, { "epoch": 0.37, "grad_norm": 0.5160118937492371, "learning_rate": 0.0005776613010261708, "loss": 3.2136, "step": 7567 }, { "epoch": 0.37, "grad_norm": 0.6301264762878418, "learning_rate": 0.0005776554708076591, "loss": 3.1727, "step": 7568 }, { "epoch": 0.37, "grad_norm": 0.48011279106140137, "learning_rate": 0.0005776496398578562, "loss": 3.3688, "step": 7569 }, { "epoch": 0.37, "grad_norm": 0.5248762369155884, "learning_rate": 0.0005776438081767778, "loss": 3.159, "step": 7570 }, { "epoch": 0.37, "grad_norm": 0.5746804475784302, "learning_rate": 0.000577637975764439, "loss": 3.5644, "step": 7571 }, { "epoch": 0.37, "grad_norm": 0.5382371544837952, "learning_rate": 0.0005776321426208551, "loss": 3.3489, "step": 7572 }, { "epoch": 0.37, "grad_norm": 0.5431787371635437, "learning_rate": 0.0005776263087460416, "loss": 3.0477, "step": 7573 }, { "epoch": 0.37, "grad_norm": 0.5550752878189087, "learning_rate": 0.0005776204741400138, "loss": 3.3814, "step": 7574 }, { "epoch": 0.37, "grad_norm": 0.5394303798675537, "learning_rate": 0.0005776146388027872, "loss": 3.32, "step": 7575 }, { "epoch": 0.37, "grad_norm": 0.5841784477233887, "learning_rate": 0.000577608802734377, "loss": 3.4877, "step": 7576 }, { "epoch": 0.37, "grad_norm": 0.5106384754180908, "learning_rate": 0.0005776029659347986, "loss": 3.2927, "step": 7577 }, { "epoch": 0.37, "grad_norm": 0.5529481172561646, "learning_rate": 0.0005775971284040675, "loss": 3.414, "step": 7578 }, { "epoch": 0.37, "grad_norm": 0.5255731344223022, "learning_rate": 0.0005775912901421989, "loss": 3.2171, "step": 7579 }, { "epoch": 0.37, "grad_norm": 0.4985384941101074, "learning_rate": 0.0005775854511492084, "loss": 3.4437, "step": 7580 }, { "epoch": 0.37, "grad_norm": 0.5763261318206787, "learning_rate": 0.0005775796114251111, "loss": 3.3172, "step": 7581 }, { "epoch": 0.37, "grad_norm": 0.5372400879859924, "learning_rate": 0.0005775737709699227, "loss": 3.5312, "step": 7582 }, { "epoch": 0.37, "grad_norm": 0.5711855888366699, "learning_rate": 0.0005775679297836582, "loss": 3.3675, "step": 7583 }, { "epoch": 0.37, "grad_norm": 0.5218439698219299, "learning_rate": 0.0005775620878663333, "loss": 3.2173, "step": 7584 }, { "epoch": 0.37, "grad_norm": 0.5360540151596069, "learning_rate": 0.0005775562452179632, "loss": 3.2145, "step": 7585 }, { "epoch": 0.37, "grad_norm": 0.5195031762123108, "learning_rate": 0.0005775504018385635, "loss": 3.5188, "step": 7586 }, { "epoch": 0.37, "grad_norm": 0.5842971801757812, "learning_rate": 0.0005775445577281494, "loss": 3.3013, "step": 7587 }, { "epoch": 0.37, "grad_norm": 0.5489726662635803, "learning_rate": 0.0005775387128867363, "loss": 3.1806, "step": 7588 }, { "epoch": 0.37, "grad_norm": 0.529917299747467, "learning_rate": 0.0005775328673143398, "loss": 3.1544, "step": 7589 }, { "epoch": 0.37, "grad_norm": 0.5455032587051392, "learning_rate": 0.0005775270210109751, "loss": 3.2344, "step": 7590 }, { "epoch": 0.37, "grad_norm": 0.5468717217445374, "learning_rate": 0.0005775211739766574, "loss": 3.3418, "step": 7591 }, { "epoch": 0.37, "grad_norm": 0.5684468746185303, "learning_rate": 0.0005775153262114025, "loss": 3.2833, "step": 7592 }, { "epoch": 0.37, "grad_norm": 0.502007246017456, "learning_rate": 0.0005775094777152257, "loss": 3.2564, "step": 7593 }, { "epoch": 0.37, "grad_norm": 0.5651172995567322, "learning_rate": 0.0005775036284881422, "loss": 3.4975, "step": 7594 }, { "epoch": 0.37, "grad_norm": 0.525607705116272, "learning_rate": 0.0005774977785301677, "loss": 3.3124, "step": 7595 }, { "epoch": 0.37, "grad_norm": 0.5294498205184937, "learning_rate": 0.0005774919278413174, "loss": 3.3201, "step": 7596 }, { "epoch": 0.37, "grad_norm": 0.5207985639572144, "learning_rate": 0.0005774860764216068, "loss": 3.4086, "step": 7597 }, { "epoch": 0.37, "grad_norm": 0.5538904666900635, "learning_rate": 0.0005774802242710513, "loss": 3.2833, "step": 7598 }, { "epoch": 0.37, "grad_norm": 0.5455402135848999, "learning_rate": 0.0005774743713896661, "loss": 3.3564, "step": 7599 }, { "epoch": 0.37, "grad_norm": 0.48715144395828247, "learning_rate": 0.000577468517777467, "loss": 3.3534, "step": 7600 }, { "epoch": 0.37, "grad_norm": 0.520661473274231, "learning_rate": 0.0005774626634344692, "loss": 3.5091, "step": 7601 }, { "epoch": 0.37, "grad_norm": 0.5360146760940552, "learning_rate": 0.000577456808360688, "loss": 3.4542, "step": 7602 }, { "epoch": 0.37, "grad_norm": 0.511243462562561, "learning_rate": 0.0005774509525561392, "loss": 3.2027, "step": 7603 }, { "epoch": 0.37, "grad_norm": 0.5435131788253784, "learning_rate": 0.0005774450960208378, "loss": 3.2947, "step": 7604 }, { "epoch": 0.37, "grad_norm": 0.5104213953018188, "learning_rate": 0.0005774392387547995, "loss": 3.3127, "step": 7605 }, { "epoch": 0.37, "grad_norm": 0.5034766793251038, "learning_rate": 0.0005774333807580395, "loss": 3.3856, "step": 7606 }, { "epoch": 0.37, "grad_norm": 0.5869839787483215, "learning_rate": 0.0005774275220305735, "loss": 3.308, "step": 7607 }, { "epoch": 0.37, "grad_norm": 0.4974309206008911, "learning_rate": 0.0005774216625724168, "loss": 3.4021, "step": 7608 }, { "epoch": 0.37, "grad_norm": 0.5040432810783386, "learning_rate": 0.0005774158023835848, "loss": 2.9991, "step": 7609 }, { "epoch": 0.37, "grad_norm": 0.5178181529045105, "learning_rate": 0.000577409941464093, "loss": 3.2486, "step": 7610 }, { "epoch": 0.37, "grad_norm": 0.5407399535179138, "learning_rate": 0.0005774040798139567, "loss": 3.3858, "step": 7611 }, { "epoch": 0.37, "grad_norm": 0.5059396028518677, "learning_rate": 0.0005773982174331915, "loss": 3.431, "step": 7612 }, { "epoch": 0.37, "grad_norm": 0.5154821276664734, "learning_rate": 0.0005773923543218128, "loss": 3.2791, "step": 7613 }, { "epoch": 0.37, "grad_norm": 0.5808987021446228, "learning_rate": 0.000577386490479836, "loss": 3.3297, "step": 7614 }, { "epoch": 0.37, "grad_norm": 0.5294091701507568, "learning_rate": 0.0005773806259072766, "loss": 3.3628, "step": 7615 }, { "epoch": 0.37, "grad_norm": 0.49198436737060547, "learning_rate": 0.00057737476060415, "loss": 3.441, "step": 7616 }, { "epoch": 0.37, "grad_norm": 0.5616235733032227, "learning_rate": 0.0005773688945704717, "loss": 3.4603, "step": 7617 }, { "epoch": 0.37, "grad_norm": 0.5396645665168762, "learning_rate": 0.0005773630278062571, "loss": 3.3925, "step": 7618 }, { "epoch": 0.37, "grad_norm": 0.5407871007919312, "learning_rate": 0.0005773571603115216, "loss": 3.4043, "step": 7619 }, { "epoch": 0.37, "grad_norm": 0.5107448697090149, "learning_rate": 0.0005773512920862808, "loss": 3.4727, "step": 7620 }, { "epoch": 0.37, "grad_norm": 0.5386175513267517, "learning_rate": 0.00057734542313055, "loss": 3.2539, "step": 7621 }, { "epoch": 0.37, "grad_norm": 0.5494099855422974, "learning_rate": 0.0005773395534443448, "loss": 3.3771, "step": 7622 }, { "epoch": 0.37, "grad_norm": 0.5173628926277161, "learning_rate": 0.0005773336830276805, "loss": 3.379, "step": 7623 }, { "epoch": 0.37, "grad_norm": 0.5712496638298035, "learning_rate": 0.0005773278118805727, "loss": 3.1727, "step": 7624 }, { "epoch": 0.37, "grad_norm": 0.524523138999939, "learning_rate": 0.0005773219400030369, "loss": 3.1427, "step": 7625 }, { "epoch": 0.37, "grad_norm": 0.5556202530860901, "learning_rate": 0.0005773160673950883, "loss": 3.4616, "step": 7626 }, { "epoch": 0.37, "grad_norm": 0.5599361658096313, "learning_rate": 0.0005773101940567427, "loss": 3.3032, "step": 7627 }, { "epoch": 0.37, "grad_norm": 0.5155829191207886, "learning_rate": 0.0005773043199880154, "loss": 3.3193, "step": 7628 }, { "epoch": 0.37, "grad_norm": 0.5319119095802307, "learning_rate": 0.0005772984451889219, "loss": 3.3039, "step": 7629 }, { "epoch": 0.37, "grad_norm": 0.5668798089027405, "learning_rate": 0.0005772925696594776, "loss": 3.3281, "step": 7630 }, { "epoch": 0.37, "grad_norm": 0.5063618421554565, "learning_rate": 0.000577286693399698, "loss": 3.2928, "step": 7631 }, { "epoch": 0.37, "grad_norm": 0.5319681763648987, "learning_rate": 0.0005772808164095988, "loss": 3.2887, "step": 7632 }, { "epoch": 0.37, "grad_norm": 0.5006822347640991, "learning_rate": 0.0005772749386891951, "loss": 3.5356, "step": 7633 }, { "epoch": 0.37, "grad_norm": 0.5074585676193237, "learning_rate": 0.0005772690602385026, "loss": 3.4013, "step": 7634 }, { "epoch": 0.37, "grad_norm": 0.5565813779830933, "learning_rate": 0.0005772631810575369, "loss": 3.4353, "step": 7635 }, { "epoch": 0.37, "grad_norm": 0.549435019493103, "learning_rate": 0.0005772573011463131, "loss": 3.0845, "step": 7636 }, { "epoch": 0.37, "grad_norm": 0.5290880799293518, "learning_rate": 0.0005772514205048472, "loss": 3.459, "step": 7637 }, { "epoch": 0.37, "grad_norm": 0.5667818784713745, "learning_rate": 0.0005772455391331542, "loss": 3.2927, "step": 7638 }, { "epoch": 0.37, "grad_norm": 0.5045775175094604, "learning_rate": 0.0005772396570312499, "loss": 3.2672, "step": 7639 }, { "epoch": 0.37, "grad_norm": 0.5503361821174622, "learning_rate": 0.0005772337741991497, "loss": 3.2494, "step": 7640 }, { "epoch": 0.37, "grad_norm": 0.5238841772079468, "learning_rate": 0.0005772278906368692, "loss": 3.2805, "step": 7641 }, { "epoch": 0.37, "grad_norm": 0.5158736109733582, "learning_rate": 0.0005772220063444236, "loss": 3.3447, "step": 7642 }, { "epoch": 0.37, "grad_norm": 0.5552991032600403, "learning_rate": 0.0005772161213218286, "loss": 3.1852, "step": 7643 }, { "epoch": 0.37, "grad_norm": 0.5120370388031006, "learning_rate": 0.0005772102355690998, "loss": 3.2797, "step": 7644 }, { "epoch": 0.37, "grad_norm": 0.47802504897117615, "learning_rate": 0.0005772043490862525, "loss": 3.2269, "step": 7645 }, { "epoch": 0.37, "grad_norm": 0.5293357372283936, "learning_rate": 0.0005771984618733024, "loss": 3.5585, "step": 7646 }, { "epoch": 0.37, "grad_norm": 0.5576381683349609, "learning_rate": 0.0005771925739302649, "loss": 3.5653, "step": 7647 }, { "epoch": 0.37, "grad_norm": 0.6383746266365051, "learning_rate": 0.0005771866852571554, "loss": 3.4087, "step": 7648 }, { "epoch": 0.37, "grad_norm": 0.5233350992202759, "learning_rate": 0.0005771807958539895, "loss": 3.5551, "step": 7649 }, { "epoch": 0.37, "grad_norm": 0.6100677847862244, "learning_rate": 0.0005771749057207828, "loss": 3.2505, "step": 7650 }, { "epoch": 0.37, "grad_norm": 0.49944281578063965, "learning_rate": 0.0005771690148575508, "loss": 3.3978, "step": 7651 }, { "epoch": 0.38, "grad_norm": 0.5267223119735718, "learning_rate": 0.0005771631232643088, "loss": 3.1858, "step": 7652 }, { "epoch": 0.38, "grad_norm": 0.5045062899589539, "learning_rate": 0.0005771572309410726, "loss": 3.4002, "step": 7653 }, { "epoch": 0.38, "grad_norm": 0.5398784279823303, "learning_rate": 0.0005771513378878576, "loss": 3.6054, "step": 7654 }, { "epoch": 0.38, "grad_norm": 0.5356148481369019, "learning_rate": 0.0005771454441046793, "loss": 3.5048, "step": 7655 }, { "epoch": 0.38, "grad_norm": 0.5511022806167603, "learning_rate": 0.0005771395495915531, "loss": 3.1728, "step": 7656 }, { "epoch": 0.38, "grad_norm": 0.4920971691608429, "learning_rate": 0.0005771336543484948, "loss": 3.2441, "step": 7657 }, { "epoch": 0.38, "grad_norm": 0.5204853415489197, "learning_rate": 0.0005771277583755198, "loss": 3.4215, "step": 7658 }, { "epoch": 0.38, "grad_norm": 0.5585867762565613, "learning_rate": 0.0005771218616726436, "loss": 3.2756, "step": 7659 }, { "epoch": 0.38, "grad_norm": 0.5565081238746643, "learning_rate": 0.0005771159642398817, "loss": 3.4225, "step": 7660 }, { "epoch": 0.38, "grad_norm": 0.5324299335479736, "learning_rate": 0.0005771100660772497, "loss": 3.127, "step": 7661 }, { "epoch": 0.38, "grad_norm": 0.516318678855896, "learning_rate": 0.0005771041671847632, "loss": 3.4088, "step": 7662 }, { "epoch": 0.38, "grad_norm": 0.5677664279937744, "learning_rate": 0.0005770982675624374, "loss": 3.3326, "step": 7663 }, { "epoch": 0.38, "grad_norm": 0.5322689414024353, "learning_rate": 0.0005770923672102883, "loss": 3.3283, "step": 7664 }, { "epoch": 0.38, "grad_norm": 0.6116513013839722, "learning_rate": 0.0005770864661283312, "loss": 3.3218, "step": 7665 }, { "epoch": 0.38, "grad_norm": 0.5484536290168762, "learning_rate": 0.0005770805643165819, "loss": 3.2955, "step": 7666 }, { "epoch": 0.38, "grad_norm": 0.5231142044067383, "learning_rate": 0.0005770746617750553, "loss": 3.4475, "step": 7667 }, { "epoch": 0.38, "grad_norm": 0.5095486640930176, "learning_rate": 0.0005770687585037676, "loss": 3.4287, "step": 7668 }, { "epoch": 0.38, "grad_norm": 0.544468343257904, "learning_rate": 0.0005770628545027341, "loss": 3.3266, "step": 7669 }, { "epoch": 0.38, "grad_norm": 0.5009661912918091, "learning_rate": 0.0005770569497719703, "loss": 3.4781, "step": 7670 }, { "epoch": 0.38, "grad_norm": 0.510226845741272, "learning_rate": 0.0005770510443114918, "loss": 3.3332, "step": 7671 }, { "epoch": 0.38, "grad_norm": 0.5290097594261169, "learning_rate": 0.0005770451381213144, "loss": 3.4185, "step": 7672 }, { "epoch": 0.38, "grad_norm": 0.5284835696220398, "learning_rate": 0.0005770392312014532, "loss": 3.2657, "step": 7673 }, { "epoch": 0.38, "grad_norm": 0.4978867471218109, "learning_rate": 0.000577033323551924, "loss": 3.3484, "step": 7674 }, { "epoch": 0.38, "grad_norm": 0.5280537009239197, "learning_rate": 0.0005770274151727424, "loss": 3.3948, "step": 7675 }, { "epoch": 0.38, "grad_norm": 0.5177137851715088, "learning_rate": 0.0005770215060639239, "loss": 3.2166, "step": 7676 }, { "epoch": 0.38, "grad_norm": 0.5431517362594604, "learning_rate": 0.000577015596225484, "loss": 3.1223, "step": 7677 }, { "epoch": 0.38, "grad_norm": 0.5485773682594299, "learning_rate": 0.0005770096856574383, "loss": 3.2481, "step": 7678 }, { "epoch": 0.38, "grad_norm": 0.5501081347465515, "learning_rate": 0.0005770037743598025, "loss": 3.5521, "step": 7679 }, { "epoch": 0.38, "grad_norm": 0.5404449701309204, "learning_rate": 0.000576997862332592, "loss": 3.4757, "step": 7680 }, { "epoch": 0.38, "grad_norm": 0.5793304443359375, "learning_rate": 0.0005769919495758225, "loss": 3.1084, "step": 7681 }, { "epoch": 0.38, "grad_norm": 0.559897243976593, "learning_rate": 0.0005769860360895096, "loss": 3.0807, "step": 7682 }, { "epoch": 0.38, "grad_norm": 0.5876379609107971, "learning_rate": 0.0005769801218736686, "loss": 3.2023, "step": 7683 }, { "epoch": 0.38, "grad_norm": 0.548350989818573, "learning_rate": 0.0005769742069283154, "loss": 3.2818, "step": 7684 }, { "epoch": 0.38, "grad_norm": 0.5384724140167236, "learning_rate": 0.0005769682912534653, "loss": 3.2043, "step": 7685 }, { "epoch": 0.38, "grad_norm": 0.5407583713531494, "learning_rate": 0.0005769623748491342, "loss": 3.3275, "step": 7686 }, { "epoch": 0.38, "grad_norm": 0.5203924179077148, "learning_rate": 0.0005769564577153374, "loss": 3.3591, "step": 7687 }, { "epoch": 0.38, "grad_norm": 0.5495003461837769, "learning_rate": 0.0005769505398520907, "loss": 3.2688, "step": 7688 }, { "epoch": 0.38, "grad_norm": 0.5348663926124573, "learning_rate": 0.0005769446212594094, "loss": 3.3437, "step": 7689 }, { "epoch": 0.38, "grad_norm": 0.5982791185379028, "learning_rate": 0.0005769387019373094, "loss": 3.398, "step": 7690 }, { "epoch": 0.38, "grad_norm": 0.5527102947235107, "learning_rate": 0.0005769327818858062, "loss": 3.4327, "step": 7691 }, { "epoch": 0.38, "grad_norm": 0.5997951626777649, "learning_rate": 0.0005769268611049152, "loss": 3.4901, "step": 7692 }, { "epoch": 0.38, "grad_norm": 0.5478419065475464, "learning_rate": 0.0005769209395946522, "loss": 3.3142, "step": 7693 }, { "epoch": 0.38, "grad_norm": 0.49550652503967285, "learning_rate": 0.0005769150173550328, "loss": 3.5203, "step": 7694 }, { "epoch": 0.38, "grad_norm": 0.5617064833641052, "learning_rate": 0.0005769090943860724, "loss": 3.5058, "step": 7695 }, { "epoch": 0.38, "grad_norm": 0.4888365566730499, "learning_rate": 0.0005769031706877869, "loss": 3.4375, "step": 7696 }, { "epoch": 0.38, "grad_norm": 0.5538713335990906, "learning_rate": 0.0005768972462601916, "loss": 3.3487, "step": 7697 }, { "epoch": 0.38, "grad_norm": 0.5713244080543518, "learning_rate": 0.0005768913211033024, "loss": 3.2288, "step": 7698 }, { "epoch": 0.38, "grad_norm": 0.5322604775428772, "learning_rate": 0.0005768853952171346, "loss": 3.5587, "step": 7699 }, { "epoch": 0.38, "grad_norm": 0.5296763777732849, "learning_rate": 0.000576879468601704, "loss": 3.3886, "step": 7700 }, { "epoch": 0.38, "grad_norm": 0.581745982170105, "learning_rate": 0.0005768735412570262, "loss": 3.3111, "step": 7701 }, { "epoch": 0.38, "grad_norm": 0.5159407258033752, "learning_rate": 0.0005768676131831168, "loss": 3.2317, "step": 7702 }, { "epoch": 0.38, "grad_norm": 0.527847945690155, "learning_rate": 0.0005768616843799913, "loss": 3.2123, "step": 7703 }, { "epoch": 0.38, "grad_norm": 0.5626876354217529, "learning_rate": 0.0005768557548476654, "loss": 3.1141, "step": 7704 }, { "epoch": 0.38, "grad_norm": 0.5151354670524597, "learning_rate": 0.0005768498245861548, "loss": 3.2901, "step": 7705 }, { "epoch": 0.38, "grad_norm": 0.538934051990509, "learning_rate": 0.000576843893595475, "loss": 3.3142, "step": 7706 }, { "epoch": 0.38, "grad_norm": 0.5391485095024109, "learning_rate": 0.0005768379618756417, "loss": 3.1658, "step": 7707 }, { "epoch": 0.38, "grad_norm": 0.5370870232582092, "learning_rate": 0.0005768320294266705, "loss": 3.2983, "step": 7708 }, { "epoch": 0.38, "grad_norm": 0.5493991374969482, "learning_rate": 0.0005768260962485769, "loss": 3.3358, "step": 7709 }, { "epoch": 0.38, "grad_norm": 0.5690867900848389, "learning_rate": 0.0005768201623413768, "loss": 3.4018, "step": 7710 }, { "epoch": 0.38, "grad_norm": 0.5145018100738525, "learning_rate": 0.0005768142277050856, "loss": 3.3555, "step": 7711 }, { "epoch": 0.38, "grad_norm": 0.526140034198761, "learning_rate": 0.000576808292339719, "loss": 3.4113, "step": 7712 }, { "epoch": 0.38, "grad_norm": 0.5112320780754089, "learning_rate": 0.0005768023562452926, "loss": 3.3697, "step": 7713 }, { "epoch": 0.38, "grad_norm": 0.5290682315826416, "learning_rate": 0.0005767964194218221, "loss": 3.5244, "step": 7714 }, { "epoch": 0.38, "grad_norm": 0.5239760279655457, "learning_rate": 0.0005767904818693231, "loss": 3.4218, "step": 7715 }, { "epoch": 0.38, "grad_norm": 0.5848803520202637, "learning_rate": 0.0005767845435878113, "loss": 3.298, "step": 7716 }, { "epoch": 0.38, "grad_norm": 0.5328184366226196, "learning_rate": 0.0005767786045773021, "loss": 3.2251, "step": 7717 }, { "epoch": 0.38, "grad_norm": 0.5391929745674133, "learning_rate": 0.0005767726648378115, "loss": 3.2584, "step": 7718 }, { "epoch": 0.38, "grad_norm": 0.5395445823669434, "learning_rate": 0.0005767667243693548, "loss": 3.3085, "step": 7719 }, { "epoch": 0.38, "grad_norm": 0.5276076197624207, "learning_rate": 0.0005767607831719479, "loss": 3.3174, "step": 7720 }, { "epoch": 0.38, "grad_norm": 0.4999656677246094, "learning_rate": 0.0005767548412456064, "loss": 3.3946, "step": 7721 }, { "epoch": 0.38, "grad_norm": 0.5640438795089722, "learning_rate": 0.0005767488985903459, "loss": 3.4772, "step": 7722 }, { "epoch": 0.38, "grad_norm": 0.5889086723327637, "learning_rate": 0.000576742955206182, "loss": 3.4004, "step": 7723 }, { "epoch": 0.38, "grad_norm": 0.5023418068885803, "learning_rate": 0.0005767370110931306, "loss": 3.4566, "step": 7724 }, { "epoch": 0.38, "grad_norm": 0.5705462694168091, "learning_rate": 0.000576731066251207, "loss": 3.2547, "step": 7725 }, { "epoch": 0.38, "grad_norm": 0.6652705073356628, "learning_rate": 0.000576725120680427, "loss": 3.1061, "step": 7726 }, { "epoch": 0.38, "grad_norm": 0.5656840205192566, "learning_rate": 0.0005767191743808064, "loss": 3.1697, "step": 7727 }, { "epoch": 0.38, "grad_norm": 0.5446447134017944, "learning_rate": 0.0005767132273523606, "loss": 3.2713, "step": 7728 }, { "epoch": 0.38, "grad_norm": 0.5088135004043579, "learning_rate": 0.0005767072795951056, "loss": 3.439, "step": 7729 }, { "epoch": 0.38, "grad_norm": 0.573800802230835, "learning_rate": 0.0005767013311090567, "loss": 3.5233, "step": 7730 }, { "epoch": 0.38, "grad_norm": 0.5233531594276428, "learning_rate": 0.0005766953818942299, "loss": 3.368, "step": 7731 }, { "epoch": 0.38, "grad_norm": 0.5602032542228699, "learning_rate": 0.0005766894319506406, "loss": 3.5343, "step": 7732 }, { "epoch": 0.38, "grad_norm": 0.5444862246513367, "learning_rate": 0.0005766834812783047, "loss": 3.3899, "step": 7733 }, { "epoch": 0.38, "grad_norm": 0.5610346794128418, "learning_rate": 0.0005766775298772377, "loss": 3.263, "step": 7734 }, { "epoch": 0.38, "grad_norm": 0.5168694257736206, "learning_rate": 0.0005766715777474553, "loss": 3.3323, "step": 7735 }, { "epoch": 0.38, "grad_norm": 0.5316835641860962, "learning_rate": 0.0005766656248889732, "loss": 3.4178, "step": 7736 }, { "epoch": 0.38, "grad_norm": 0.5672079920768738, "learning_rate": 0.0005766596713018072, "loss": 3.3652, "step": 7737 }, { "epoch": 0.38, "grad_norm": 0.5037599205970764, "learning_rate": 0.0005766537169859728, "loss": 3.3349, "step": 7738 }, { "epoch": 0.38, "grad_norm": 0.538493812084198, "learning_rate": 0.0005766477619414858, "loss": 3.4587, "step": 7739 }, { "epoch": 0.38, "grad_norm": 0.5200104713439941, "learning_rate": 0.0005766418061683618, "loss": 3.3614, "step": 7740 }, { "epoch": 0.38, "grad_norm": 0.5429470539093018, "learning_rate": 0.0005766358496666165, "loss": 3.2853, "step": 7741 }, { "epoch": 0.38, "grad_norm": 0.5296519994735718, "learning_rate": 0.0005766298924362656, "loss": 3.3077, "step": 7742 }, { "epoch": 0.38, "grad_norm": 0.5855775475502014, "learning_rate": 0.0005766239344773249, "loss": 3.3771, "step": 7743 }, { "epoch": 0.38, "grad_norm": 0.6540366411209106, "learning_rate": 0.0005766179757898098, "loss": 3.432, "step": 7744 }, { "epoch": 0.38, "grad_norm": 0.5268910527229309, "learning_rate": 0.0005766120163737364, "loss": 3.3832, "step": 7745 }, { "epoch": 0.38, "grad_norm": 0.5402625799179077, "learning_rate": 0.00057660605622912, "loss": 3.2677, "step": 7746 }, { "epoch": 0.38, "grad_norm": 0.545888364315033, "learning_rate": 0.0005766000953559767, "loss": 3.1326, "step": 7747 }, { "epoch": 0.38, "grad_norm": 0.5113720893859863, "learning_rate": 0.0005765941337543218, "loss": 3.3382, "step": 7748 }, { "epoch": 0.38, "grad_norm": 0.5178648829460144, "learning_rate": 0.0005765881714241714, "loss": 3.3942, "step": 7749 }, { "epoch": 0.38, "grad_norm": 0.5671051740646362, "learning_rate": 0.0005765822083655407, "loss": 3.3048, "step": 7750 }, { "epoch": 0.38, "grad_norm": 0.501137375831604, "learning_rate": 0.0005765762445784459, "loss": 3.2334, "step": 7751 }, { "epoch": 0.38, "grad_norm": 0.5226545333862305, "learning_rate": 0.0005765702800629023, "loss": 3.3512, "step": 7752 }, { "epoch": 0.38, "grad_norm": 0.5014787912368774, "learning_rate": 0.000576564314818926, "loss": 3.318, "step": 7753 }, { "epoch": 0.38, "grad_norm": 0.5254672169685364, "learning_rate": 0.0005765583488465324, "loss": 3.3825, "step": 7754 }, { "epoch": 0.38, "grad_norm": 0.5159661173820496, "learning_rate": 0.0005765523821457374, "loss": 3.3095, "step": 7755 }, { "epoch": 0.38, "grad_norm": 0.5051149725914001, "learning_rate": 0.0005765464147165566, "loss": 3.2341, "step": 7756 }, { "epoch": 0.38, "grad_norm": 0.5197758078575134, "learning_rate": 0.0005765404465590059, "loss": 3.2489, "step": 7757 }, { "epoch": 0.38, "grad_norm": 0.5094234943389893, "learning_rate": 0.0005765344776731008, "loss": 3.2445, "step": 7758 }, { "epoch": 0.38, "grad_norm": 0.5342963337898254, "learning_rate": 0.000576528508058857, "loss": 3.5146, "step": 7759 }, { "epoch": 0.38, "grad_norm": 0.5151779055595398, "learning_rate": 0.0005765225377162904, "loss": 3.1479, "step": 7760 }, { "epoch": 0.38, "grad_norm": 0.5261240601539612, "learning_rate": 0.0005765165666454167, "loss": 3.0576, "step": 7761 }, { "epoch": 0.38, "grad_norm": 0.5607361793518066, "learning_rate": 0.0005765105948462516, "loss": 3.211, "step": 7762 }, { "epoch": 0.38, "grad_norm": 0.534424364566803, "learning_rate": 0.0005765046223188108, "loss": 3.3502, "step": 7763 }, { "epoch": 0.38, "grad_norm": 0.5433931946754456, "learning_rate": 0.00057649864906311, "loss": 3.3541, "step": 7764 }, { "epoch": 0.38, "grad_norm": 0.5713850855827332, "learning_rate": 0.0005764926750791649, "loss": 3.1849, "step": 7765 }, { "epoch": 0.38, "grad_norm": 0.5363174676895142, "learning_rate": 0.0005764867003669914, "loss": 3.297, "step": 7766 }, { "epoch": 0.38, "grad_norm": 0.5349758267402649, "learning_rate": 0.0005764807249266052, "loss": 3.2353, "step": 7767 }, { "epoch": 0.38, "grad_norm": 0.5053718686103821, "learning_rate": 0.0005764747487580218, "loss": 3.0331, "step": 7768 }, { "epoch": 0.38, "grad_norm": 0.5225927233695984, "learning_rate": 0.0005764687718612572, "loss": 3.2927, "step": 7769 }, { "epoch": 0.38, "grad_norm": 0.5191579461097717, "learning_rate": 0.0005764627942363271, "loss": 3.2522, "step": 7770 }, { "epoch": 0.38, "grad_norm": 0.530640721321106, "learning_rate": 0.0005764568158832472, "loss": 3.1888, "step": 7771 }, { "epoch": 0.38, "grad_norm": 0.5546700358390808, "learning_rate": 0.0005764508368020333, "loss": 2.9471, "step": 7772 }, { "epoch": 0.38, "grad_norm": 0.541641354560852, "learning_rate": 0.0005764448569927009, "loss": 3.1619, "step": 7773 }, { "epoch": 0.38, "grad_norm": 0.5144984722137451, "learning_rate": 0.0005764388764552662, "loss": 3.1562, "step": 7774 }, { "epoch": 0.38, "grad_norm": 0.5483419299125671, "learning_rate": 0.0005764328951897446, "loss": 3.5068, "step": 7775 }, { "epoch": 0.38, "grad_norm": 0.5148709416389465, "learning_rate": 0.0005764269131961519, "loss": 3.3056, "step": 7776 }, { "epoch": 0.38, "grad_norm": 0.535964846611023, "learning_rate": 0.000576420930474504, "loss": 3.4626, "step": 7777 }, { "epoch": 0.38, "grad_norm": 0.5585061311721802, "learning_rate": 0.0005764149470248166, "loss": 3.3747, "step": 7778 }, { "epoch": 0.38, "grad_norm": 0.5796385407447815, "learning_rate": 0.0005764089628471054, "loss": 3.3358, "step": 7779 }, { "epoch": 0.38, "grad_norm": 0.5621188282966614, "learning_rate": 0.000576402977941386, "loss": 3.5115, "step": 7780 }, { "epoch": 0.38, "grad_norm": 0.5454630255699158, "learning_rate": 0.0005763969923076746, "loss": 3.4531, "step": 7781 }, { "epoch": 0.38, "grad_norm": 0.5305492877960205, "learning_rate": 0.0005763910059459865, "loss": 3.3743, "step": 7782 }, { "epoch": 0.38, "grad_norm": 0.5111590027809143, "learning_rate": 0.0005763850188563378, "loss": 3.1609, "step": 7783 }, { "epoch": 0.38, "grad_norm": 0.520412266254425, "learning_rate": 0.0005763790310387441, "loss": 3.5552, "step": 7784 }, { "epoch": 0.38, "grad_norm": 0.5311888456344604, "learning_rate": 0.0005763730424932213, "loss": 3.3538, "step": 7785 }, { "epoch": 0.38, "grad_norm": 0.5347929000854492, "learning_rate": 0.0005763670532197851, "loss": 3.4564, "step": 7786 }, { "epoch": 0.38, "grad_norm": 0.5463716983795166, "learning_rate": 0.0005763610632184512, "loss": 3.3186, "step": 7787 }, { "epoch": 0.38, "grad_norm": 0.5599271655082703, "learning_rate": 0.0005763550724892355, "loss": 3.3339, "step": 7788 }, { "epoch": 0.38, "grad_norm": 0.545688271522522, "learning_rate": 0.0005763490810321537, "loss": 3.3977, "step": 7789 }, { "epoch": 0.38, "grad_norm": 0.5224574208259583, "learning_rate": 0.0005763430888472217, "loss": 3.2594, "step": 7790 }, { "epoch": 0.38, "grad_norm": 0.49941286444664, "learning_rate": 0.0005763370959344549, "loss": 3.185, "step": 7791 }, { "epoch": 0.38, "grad_norm": 0.5086252689361572, "learning_rate": 0.0005763311022938696, "loss": 3.3799, "step": 7792 }, { "epoch": 0.38, "grad_norm": 0.5339295268058777, "learning_rate": 0.0005763251079254814, "loss": 3.5027, "step": 7793 }, { "epoch": 0.38, "grad_norm": 0.5528396368026733, "learning_rate": 0.000576319112829306, "loss": 3.3822, "step": 7794 }, { "epoch": 0.38, "grad_norm": 0.5613994002342224, "learning_rate": 0.0005763131170053591, "loss": 3.439, "step": 7795 }, { "epoch": 0.38, "grad_norm": 0.5107088685035706, "learning_rate": 0.0005763071204536568, "loss": 3.243, "step": 7796 }, { "epoch": 0.38, "grad_norm": 0.5415818691253662, "learning_rate": 0.0005763011231742146, "loss": 3.1863, "step": 7797 }, { "epoch": 0.38, "grad_norm": 0.550011396408081, "learning_rate": 0.0005762951251670485, "loss": 3.3939, "step": 7798 }, { "epoch": 0.38, "grad_norm": 0.5543647408485413, "learning_rate": 0.0005762891264321742, "loss": 3.4366, "step": 7799 }, { "epoch": 0.38, "grad_norm": 0.5226086974143982, "learning_rate": 0.0005762831269696075, "loss": 3.3135, "step": 7800 }, { "epoch": 0.38, "grad_norm": 0.5125158429145813, "learning_rate": 0.0005762771267793642, "loss": 3.5191, "step": 7801 }, { "epoch": 0.38, "grad_norm": 0.485068678855896, "learning_rate": 0.0005762711258614602, "loss": 3.5, "step": 7802 }, { "epoch": 0.38, "grad_norm": 0.49382463097572327, "learning_rate": 0.0005762651242159111, "loss": 3.3202, "step": 7803 }, { "epoch": 0.38, "grad_norm": 0.5490413904190063, "learning_rate": 0.0005762591218427328, "loss": 3.1602, "step": 7804 }, { "epoch": 0.38, "grad_norm": 0.496092826128006, "learning_rate": 0.0005762531187419413, "loss": 3.511, "step": 7805 }, { "epoch": 0.38, "grad_norm": 0.5007348656654358, "learning_rate": 0.0005762471149135522, "loss": 3.2596, "step": 7806 }, { "epoch": 0.38, "grad_norm": 0.5107442736625671, "learning_rate": 0.0005762411103575813, "loss": 3.3431, "step": 7807 }, { "epoch": 0.38, "grad_norm": 0.5263485908508301, "learning_rate": 0.0005762351050740445, "loss": 3.3426, "step": 7808 }, { "epoch": 0.38, "grad_norm": 0.5867173671722412, "learning_rate": 0.0005762290990629576, "loss": 3.3641, "step": 7809 }, { "epoch": 0.38, "grad_norm": 0.5284026861190796, "learning_rate": 0.0005762230923243365, "loss": 3.2272, "step": 7810 }, { "epoch": 0.38, "grad_norm": 0.5352510809898376, "learning_rate": 0.0005762170848581968, "loss": 3.447, "step": 7811 }, { "epoch": 0.38, "grad_norm": 0.4992859959602356, "learning_rate": 0.0005762110766645546, "loss": 3.4186, "step": 7812 }, { "epoch": 0.38, "grad_norm": 0.514480471611023, "learning_rate": 0.0005762050677434254, "loss": 3.2831, "step": 7813 }, { "epoch": 0.38, "grad_norm": 0.5168455243110657, "learning_rate": 0.0005761990580948254, "loss": 3.3549, "step": 7814 }, { "epoch": 0.38, "grad_norm": 0.5284212827682495, "learning_rate": 0.0005761930477187701, "loss": 3.3554, "step": 7815 }, { "epoch": 0.38, "grad_norm": 0.5353116393089294, "learning_rate": 0.0005761870366152755, "loss": 3.2887, "step": 7816 }, { "epoch": 0.38, "grad_norm": 0.5134118795394897, "learning_rate": 0.0005761810247843574, "loss": 3.4749, "step": 7817 }, { "epoch": 0.38, "grad_norm": 0.49037235975265503, "learning_rate": 0.0005761750122260317, "loss": 3.1841, "step": 7818 }, { "epoch": 0.38, "grad_norm": 0.5198505520820618, "learning_rate": 0.0005761689989403141, "loss": 3.4457, "step": 7819 }, { "epoch": 0.38, "grad_norm": 0.5264098048210144, "learning_rate": 0.0005761629849272205, "loss": 3.3493, "step": 7820 }, { "epoch": 0.38, "grad_norm": 0.5358461141586304, "learning_rate": 0.0005761569701867668, "loss": 3.309, "step": 7821 }, { "epoch": 0.38, "grad_norm": 0.5349216461181641, "learning_rate": 0.0005761509547189688, "loss": 3.2732, "step": 7822 }, { "epoch": 0.38, "grad_norm": 0.498106986284256, "learning_rate": 0.0005761449385238422, "loss": 3.1092, "step": 7823 }, { "epoch": 0.38, "grad_norm": 0.5381754040718079, "learning_rate": 0.0005761389216014031, "loss": 3.3537, "step": 7824 }, { "epoch": 0.38, "grad_norm": 0.535024106502533, "learning_rate": 0.0005761329039516671, "loss": 3.269, "step": 7825 }, { "epoch": 0.38, "grad_norm": 0.5659539103507996, "learning_rate": 0.0005761268855746503, "loss": 3.3276, "step": 7826 }, { "epoch": 0.38, "grad_norm": 0.5511694550514221, "learning_rate": 0.0005761208664703684, "loss": 3.3624, "step": 7827 }, { "epoch": 0.38, "grad_norm": 0.5096381902694702, "learning_rate": 0.0005761148466388373, "loss": 3.2467, "step": 7828 }, { "epoch": 0.38, "grad_norm": 0.5487003326416016, "learning_rate": 0.0005761088260800728, "loss": 3.361, "step": 7829 }, { "epoch": 0.38, "grad_norm": 0.5437702536582947, "learning_rate": 0.0005761028047940907, "loss": 3.4645, "step": 7830 }, { "epoch": 0.38, "grad_norm": 0.5526220798492432, "learning_rate": 0.0005760967827809072, "loss": 3.1801, "step": 7831 }, { "epoch": 0.38, "grad_norm": 0.5293183326721191, "learning_rate": 0.0005760907600405377, "loss": 3.2272, "step": 7832 }, { "epoch": 0.38, "grad_norm": 0.5198914408683777, "learning_rate": 0.0005760847365729984, "loss": 3.3003, "step": 7833 }, { "epoch": 0.38, "grad_norm": 0.5536202788352966, "learning_rate": 0.0005760787123783049, "loss": 2.9891, "step": 7834 }, { "epoch": 0.38, "grad_norm": 0.5597622990608215, "learning_rate": 0.0005760726874564732, "loss": 3.0627, "step": 7835 }, { "epoch": 0.38, "grad_norm": 0.5665072202682495, "learning_rate": 0.0005760666618075192, "loss": 3.2544, "step": 7836 }, { "epoch": 0.38, "grad_norm": 0.6050432324409485, "learning_rate": 0.0005760606354314588, "loss": 3.3163, "step": 7837 }, { "epoch": 0.38, "grad_norm": 0.5545769929885864, "learning_rate": 0.0005760546083283079, "loss": 3.1321, "step": 7838 }, { "epoch": 0.38, "grad_norm": 0.5636788010597229, "learning_rate": 0.0005760485804980821, "loss": 3.1955, "step": 7839 }, { "epoch": 0.38, "grad_norm": 0.5258702635765076, "learning_rate": 0.0005760425519407976, "loss": 3.274, "step": 7840 }, { "epoch": 0.38, "grad_norm": 0.5311688184738159, "learning_rate": 0.0005760365226564701, "loss": 3.2431, "step": 7841 }, { "epoch": 0.38, "grad_norm": 0.5252965092658997, "learning_rate": 0.0005760304926451155, "loss": 3.3003, "step": 7842 }, { "epoch": 0.38, "grad_norm": 0.5277718901634216, "learning_rate": 0.0005760244619067498, "loss": 3.3049, "step": 7843 }, { "epoch": 0.38, "grad_norm": 0.5588723421096802, "learning_rate": 0.0005760184304413887, "loss": 3.0993, "step": 7844 }, { "epoch": 0.38, "grad_norm": 0.5348266363143921, "learning_rate": 0.0005760123982490481, "loss": 3.3248, "step": 7845 }, { "epoch": 0.38, "grad_norm": 0.5221610069274902, "learning_rate": 0.0005760063653297441, "loss": 3.1574, "step": 7846 }, { "epoch": 0.38, "grad_norm": 0.5521312355995178, "learning_rate": 0.0005760003316834924, "loss": 3.2215, "step": 7847 }, { "epoch": 0.38, "grad_norm": 0.570512056350708, "learning_rate": 0.0005759942973103089, "loss": 3.478, "step": 7848 }, { "epoch": 0.38, "grad_norm": 0.5131711363792419, "learning_rate": 0.0005759882622102096, "loss": 3.3638, "step": 7849 }, { "epoch": 0.38, "grad_norm": 0.5566141605377197, "learning_rate": 0.0005759822263832103, "loss": 3.0577, "step": 7850 }, { "epoch": 0.38, "grad_norm": 0.5762760043144226, "learning_rate": 0.0005759761898293269, "loss": 3.275, "step": 7851 }, { "epoch": 0.38, "grad_norm": 0.5507886409759521, "learning_rate": 0.0005759701525485754, "loss": 3.1812, "step": 7852 }, { "epoch": 0.38, "grad_norm": 0.5555328726768494, "learning_rate": 0.0005759641145409716, "loss": 3.3363, "step": 7853 }, { "epoch": 0.38, "grad_norm": 0.5610153079032898, "learning_rate": 0.0005759580758065315, "loss": 3.3007, "step": 7854 }, { "epoch": 0.38, "grad_norm": 0.5262787938117981, "learning_rate": 0.0005759520363452709, "loss": 3.5069, "step": 7855 }, { "epoch": 0.39, "grad_norm": 0.5241034030914307, "learning_rate": 0.0005759459961572057, "loss": 3.3264, "step": 7856 }, { "epoch": 0.39, "grad_norm": 0.5599466562271118, "learning_rate": 0.0005759399552423518, "loss": 3.4544, "step": 7857 }, { "epoch": 0.39, "grad_norm": 0.5439044833183289, "learning_rate": 0.0005759339136007253, "loss": 3.3155, "step": 7858 }, { "epoch": 0.39, "grad_norm": 0.5432597994804382, "learning_rate": 0.0005759278712323419, "loss": 3.2769, "step": 7859 }, { "epoch": 0.39, "grad_norm": 0.550878643989563, "learning_rate": 0.0005759218281372175, "loss": 3.5132, "step": 7860 }, { "epoch": 0.39, "grad_norm": 0.5362837910652161, "learning_rate": 0.0005759157843153683, "loss": 3.2445, "step": 7861 }, { "epoch": 0.39, "grad_norm": 0.5157976150512695, "learning_rate": 0.00057590973976681, "loss": 3.3654, "step": 7862 }, { "epoch": 0.39, "grad_norm": 0.5195072293281555, "learning_rate": 0.0005759036944915585, "loss": 3.2124, "step": 7863 }, { "epoch": 0.39, "grad_norm": 0.5618158578872681, "learning_rate": 0.0005758976484896296, "loss": 3.1182, "step": 7864 }, { "epoch": 0.39, "grad_norm": 0.5115683078765869, "learning_rate": 0.0005758916017610396, "loss": 3.4023, "step": 7865 }, { "epoch": 0.39, "grad_norm": 0.5243606567382812, "learning_rate": 0.0005758855543058042, "loss": 3.2927, "step": 7866 }, { "epoch": 0.39, "grad_norm": 0.5262304544448853, "learning_rate": 0.0005758795061239393, "loss": 3.3996, "step": 7867 }, { "epoch": 0.39, "grad_norm": 0.5342805981636047, "learning_rate": 0.0005758734572154609, "loss": 3.443, "step": 7868 }, { "epoch": 0.39, "grad_norm": 0.49876853823661804, "learning_rate": 0.0005758674075803848, "loss": 3.5409, "step": 7869 }, { "epoch": 0.39, "grad_norm": 0.534772515296936, "learning_rate": 0.000575861357218727, "loss": 3.3427, "step": 7870 }, { "epoch": 0.39, "grad_norm": 0.5307782292366028, "learning_rate": 0.0005758553061305038, "loss": 3.3184, "step": 7871 }, { "epoch": 0.39, "grad_norm": 0.5947427153587341, "learning_rate": 0.0005758492543157305, "loss": 3.1305, "step": 7872 }, { "epoch": 0.39, "grad_norm": 0.5112699270248413, "learning_rate": 0.0005758432017744235, "loss": 3.2861, "step": 7873 }, { "epoch": 0.39, "grad_norm": 0.5101796388626099, "learning_rate": 0.0005758371485065986, "loss": 3.3975, "step": 7874 }, { "epoch": 0.39, "grad_norm": 0.5383381247520447, "learning_rate": 0.0005758310945122717, "loss": 3.2371, "step": 7875 }, { "epoch": 0.39, "grad_norm": 0.5568742156028748, "learning_rate": 0.0005758250397914587, "loss": 3.3188, "step": 7876 }, { "epoch": 0.39, "grad_norm": 0.5260884761810303, "learning_rate": 0.0005758189843441757, "loss": 3.3591, "step": 7877 }, { "epoch": 0.39, "grad_norm": 0.5004744529724121, "learning_rate": 0.0005758129281704386, "loss": 3.2018, "step": 7878 }, { "epoch": 0.39, "grad_norm": 0.5695815682411194, "learning_rate": 0.0005758068712702633, "loss": 3.0783, "step": 7879 }, { "epoch": 0.39, "grad_norm": 0.5590498447418213, "learning_rate": 0.0005758008136436658, "loss": 3.3908, "step": 7880 }, { "epoch": 0.39, "grad_norm": 0.5661531090736389, "learning_rate": 0.0005757947552906621, "loss": 3.2036, "step": 7881 }, { "epoch": 0.39, "grad_norm": 0.5372530817985535, "learning_rate": 0.0005757886962112679, "loss": 3.3096, "step": 7882 }, { "epoch": 0.39, "grad_norm": 0.5631973147392273, "learning_rate": 0.0005757826364054995, "loss": 3.3011, "step": 7883 }, { "epoch": 0.39, "grad_norm": 0.5350229740142822, "learning_rate": 0.0005757765758733727, "loss": 3.4572, "step": 7884 }, { "epoch": 0.39, "grad_norm": 0.5403389930725098, "learning_rate": 0.0005757705146149034, "loss": 3.3266, "step": 7885 }, { "epoch": 0.39, "grad_norm": 0.5325131416320801, "learning_rate": 0.0005757644526301078, "loss": 3.3954, "step": 7886 }, { "epoch": 0.39, "grad_norm": 0.5278269052505493, "learning_rate": 0.0005757583899190016, "loss": 3.3926, "step": 7887 }, { "epoch": 0.39, "grad_norm": 0.5290004014968872, "learning_rate": 0.0005757523264816009, "loss": 3.5282, "step": 7888 }, { "epoch": 0.39, "grad_norm": 0.5585773587226868, "learning_rate": 0.0005757462623179215, "loss": 3.5418, "step": 7889 }, { "epoch": 0.39, "grad_norm": 0.5469244122505188, "learning_rate": 0.0005757401974279796, "loss": 3.2753, "step": 7890 }, { "epoch": 0.39, "grad_norm": 0.6067792177200317, "learning_rate": 0.0005757341318117911, "loss": 3.3368, "step": 7891 }, { "epoch": 0.39, "grad_norm": 0.5292524099349976, "learning_rate": 0.0005757280654693721, "loss": 3.2878, "step": 7892 }, { "epoch": 0.39, "grad_norm": 0.6546759605407715, "learning_rate": 0.0005757219984007382, "loss": 3.1904, "step": 7893 }, { "epoch": 0.39, "grad_norm": 0.5419657826423645, "learning_rate": 0.0005757159306059057, "loss": 3.4508, "step": 7894 }, { "epoch": 0.39, "grad_norm": 0.5196870565414429, "learning_rate": 0.0005757098620848905, "loss": 3.4328, "step": 7895 }, { "epoch": 0.39, "grad_norm": 0.5152660608291626, "learning_rate": 0.0005757037928377087, "loss": 3.3568, "step": 7896 }, { "epoch": 0.39, "grad_norm": 0.5874919891357422, "learning_rate": 0.0005756977228643761, "loss": 3.4204, "step": 7897 }, { "epoch": 0.39, "grad_norm": 0.5220939517021179, "learning_rate": 0.0005756916521649088, "loss": 3.5503, "step": 7898 }, { "epoch": 0.39, "grad_norm": 0.5331912636756897, "learning_rate": 0.0005756855807393227, "loss": 3.2373, "step": 7899 }, { "epoch": 0.39, "grad_norm": 0.5303639769554138, "learning_rate": 0.0005756795085876338, "loss": 3.2296, "step": 7900 }, { "epoch": 0.39, "grad_norm": 0.522990882396698, "learning_rate": 0.0005756734357098581, "loss": 3.2013, "step": 7901 }, { "epoch": 0.39, "grad_norm": 0.5241119861602783, "learning_rate": 0.0005756673621060117, "loss": 3.0806, "step": 7902 }, { "epoch": 0.39, "grad_norm": 0.5303024649620056, "learning_rate": 0.0005756612877761105, "loss": 3.3074, "step": 7903 }, { "epoch": 0.39, "grad_norm": 0.5631526112556458, "learning_rate": 0.0005756552127201706, "loss": 3.3211, "step": 7904 }, { "epoch": 0.39, "grad_norm": 0.5668905377388, "learning_rate": 0.0005756491369382078, "loss": 3.2826, "step": 7905 }, { "epoch": 0.39, "grad_norm": 0.5107238292694092, "learning_rate": 0.0005756430604302383, "loss": 3.2575, "step": 7906 }, { "epoch": 0.39, "grad_norm": 0.5243762731552124, "learning_rate": 0.0005756369831962779, "loss": 3.294, "step": 7907 }, { "epoch": 0.39, "grad_norm": 0.5488375425338745, "learning_rate": 0.0005756309052363429, "loss": 3.6128, "step": 7908 }, { "epoch": 0.39, "grad_norm": 0.5057998299598694, "learning_rate": 0.0005756248265504491, "loss": 3.294, "step": 7909 }, { "epoch": 0.39, "grad_norm": 0.544693648815155, "learning_rate": 0.0005756187471386126, "loss": 3.2293, "step": 7910 }, { "epoch": 0.39, "grad_norm": 0.5097287893295288, "learning_rate": 0.0005756126670008492, "loss": 3.1767, "step": 7911 }, { "epoch": 0.39, "grad_norm": 0.5124967098236084, "learning_rate": 0.0005756065861371751, "loss": 3.0868, "step": 7912 }, { "epoch": 0.39, "grad_norm": 0.5405759215354919, "learning_rate": 0.0005756005045476064, "loss": 3.4881, "step": 7913 }, { "epoch": 0.39, "grad_norm": 0.5262176394462585, "learning_rate": 0.000575594422232159, "loss": 3.4049, "step": 7914 }, { "epoch": 0.39, "grad_norm": 0.5306533575057983, "learning_rate": 0.0005755883391908489, "loss": 3.3627, "step": 7915 }, { "epoch": 0.39, "grad_norm": 0.5325959920883179, "learning_rate": 0.0005755822554236921, "loss": 3.4184, "step": 7916 }, { "epoch": 0.39, "grad_norm": 0.5592959523200989, "learning_rate": 0.0005755761709307048, "loss": 3.1438, "step": 7917 }, { "epoch": 0.39, "grad_norm": 0.5043314695358276, "learning_rate": 0.0005755700857119028, "loss": 3.2669, "step": 7918 }, { "epoch": 0.39, "grad_norm": 0.5433175563812256, "learning_rate": 0.0005755639997673022, "loss": 3.2691, "step": 7919 }, { "epoch": 0.39, "grad_norm": 0.5013863444328308, "learning_rate": 0.0005755579130969191, "loss": 3.1457, "step": 7920 }, { "epoch": 0.39, "grad_norm": 0.5583698749542236, "learning_rate": 0.0005755518257007696, "loss": 3.6578, "step": 7921 }, { "epoch": 0.39, "grad_norm": 0.5930216312408447, "learning_rate": 0.0005755457375788694, "loss": 3.2728, "step": 7922 }, { "epoch": 0.39, "grad_norm": 0.5604255795478821, "learning_rate": 0.000575539648731235, "loss": 3.2173, "step": 7923 }, { "epoch": 0.39, "grad_norm": 0.5618053674697876, "learning_rate": 0.000575533559157882, "loss": 3.3222, "step": 7924 }, { "epoch": 0.39, "grad_norm": 0.5377988219261169, "learning_rate": 0.0005755274688588268, "loss": 3.3963, "step": 7925 }, { "epoch": 0.39, "grad_norm": 0.5500739216804504, "learning_rate": 0.0005755213778340852, "loss": 3.6868, "step": 7926 }, { "epoch": 0.39, "grad_norm": 0.5406518578529358, "learning_rate": 0.0005755152860836733, "loss": 3.374, "step": 7927 }, { "epoch": 0.39, "grad_norm": 0.5654966235160828, "learning_rate": 0.0005755091936076071, "loss": 3.3569, "step": 7928 }, { "epoch": 0.39, "grad_norm": 0.5690451860427856, "learning_rate": 0.0005755031004059028, "loss": 3.2013, "step": 7929 }, { "epoch": 0.39, "grad_norm": 0.6072752475738525, "learning_rate": 0.0005754970064785763, "loss": 3.3478, "step": 7930 }, { "epoch": 0.39, "grad_norm": 0.5231303572654724, "learning_rate": 0.0005754909118256438, "loss": 3.1507, "step": 7931 }, { "epoch": 0.39, "grad_norm": 0.5188618898391724, "learning_rate": 0.0005754848164471211, "loss": 3.3347, "step": 7932 }, { "epoch": 0.39, "grad_norm": 0.5124830007553101, "learning_rate": 0.0005754787203430245, "loss": 3.5574, "step": 7933 }, { "epoch": 0.39, "grad_norm": 0.5399743914604187, "learning_rate": 0.0005754726235133701, "loss": 3.4946, "step": 7934 }, { "epoch": 0.39, "grad_norm": 0.526694655418396, "learning_rate": 0.0005754665259581736, "loss": 3.2302, "step": 7935 }, { "epoch": 0.39, "grad_norm": 0.5251884460449219, "learning_rate": 0.0005754604276774515, "loss": 3.5666, "step": 7936 }, { "epoch": 0.39, "grad_norm": 0.5409374237060547, "learning_rate": 0.0005754543286712193, "loss": 3.2799, "step": 7937 }, { "epoch": 0.39, "grad_norm": 0.49520036578178406, "learning_rate": 0.0005754482289394938, "loss": 3.3421, "step": 7938 }, { "epoch": 0.39, "grad_norm": 0.5165755152702332, "learning_rate": 0.0005754421284822905, "loss": 3.0666, "step": 7939 }, { "epoch": 0.39, "grad_norm": 0.5416868329048157, "learning_rate": 0.0005754360272996256, "loss": 3.2758, "step": 7940 }, { "epoch": 0.39, "grad_norm": 0.53125, "learning_rate": 0.0005754299253915153, "loss": 3.2919, "step": 7941 }, { "epoch": 0.39, "grad_norm": 0.5226501822471619, "learning_rate": 0.0005754238227579755, "loss": 3.2847, "step": 7942 }, { "epoch": 0.39, "grad_norm": 0.5210117101669312, "learning_rate": 0.0005754177193990225, "loss": 3.4445, "step": 7943 }, { "epoch": 0.39, "grad_norm": 0.5392207503318787, "learning_rate": 0.0005754116153146721, "loss": 3.2395, "step": 7944 }, { "epoch": 0.39, "grad_norm": 0.5218811631202698, "learning_rate": 0.0005754055105049406, "loss": 3.3672, "step": 7945 }, { "epoch": 0.39, "grad_norm": 0.5551583170890808, "learning_rate": 0.0005753994049698439, "loss": 3.483, "step": 7946 }, { "epoch": 0.39, "grad_norm": 0.5441166758537292, "learning_rate": 0.0005753932987093983, "loss": 3.2976, "step": 7947 }, { "epoch": 0.39, "grad_norm": 0.5118980407714844, "learning_rate": 0.0005753871917236195, "loss": 3.258, "step": 7948 }, { "epoch": 0.39, "grad_norm": 0.51288902759552, "learning_rate": 0.000575381084012524, "loss": 3.1806, "step": 7949 }, { "epoch": 0.39, "grad_norm": 0.5351516008377075, "learning_rate": 0.0005753749755761277, "loss": 3.2706, "step": 7950 }, { "epoch": 0.39, "grad_norm": 0.5109082460403442, "learning_rate": 0.0005753688664144467, "loss": 3.0689, "step": 7951 }, { "epoch": 0.39, "grad_norm": 0.49430152773857117, "learning_rate": 0.0005753627565274972, "loss": 3.5631, "step": 7952 }, { "epoch": 0.39, "grad_norm": 0.5566909909248352, "learning_rate": 0.0005753566459152952, "loss": 3.3404, "step": 7953 }, { "epoch": 0.39, "grad_norm": 0.576958179473877, "learning_rate": 0.0005753505345778567, "loss": 3.2608, "step": 7954 }, { "epoch": 0.39, "grad_norm": 0.5318806767463684, "learning_rate": 0.0005753444225151978, "loss": 3.2429, "step": 7955 }, { "epoch": 0.39, "grad_norm": 0.5288330316543579, "learning_rate": 0.0005753383097273348, "loss": 3.4162, "step": 7956 }, { "epoch": 0.39, "grad_norm": 0.5240846276283264, "learning_rate": 0.0005753321962142837, "loss": 3.142, "step": 7957 }, { "epoch": 0.39, "grad_norm": 0.5280718207359314, "learning_rate": 0.0005753260819760605, "loss": 3.2296, "step": 7958 }, { "epoch": 0.39, "grad_norm": 0.5125327110290527, "learning_rate": 0.0005753199670126815, "loss": 3.3329, "step": 7959 }, { "epoch": 0.39, "grad_norm": 0.5324898362159729, "learning_rate": 0.0005753138513241626, "loss": 3.3372, "step": 7960 }, { "epoch": 0.39, "grad_norm": 0.519921064376831, "learning_rate": 0.00057530773491052, "loss": 3.653, "step": 7961 }, { "epoch": 0.39, "grad_norm": 0.5081679821014404, "learning_rate": 0.0005753016177717699, "loss": 3.2574, "step": 7962 }, { "epoch": 0.39, "grad_norm": 0.5309244990348816, "learning_rate": 0.0005752954999079282, "loss": 3.3269, "step": 7963 }, { "epoch": 0.39, "grad_norm": 0.5274438261985779, "learning_rate": 0.0005752893813190112, "loss": 3.3909, "step": 7964 }, { "epoch": 0.39, "grad_norm": 0.5856743454933167, "learning_rate": 0.000575283262005035, "loss": 3.2714, "step": 7965 }, { "epoch": 0.39, "grad_norm": 0.528982400894165, "learning_rate": 0.0005752771419660156, "loss": 3.2405, "step": 7966 }, { "epoch": 0.39, "grad_norm": 0.5737332701683044, "learning_rate": 0.0005752710212019692, "loss": 3.5454, "step": 7967 }, { "epoch": 0.39, "grad_norm": 0.5274087190628052, "learning_rate": 0.000575264899712912, "loss": 3.3418, "step": 7968 }, { "epoch": 0.39, "grad_norm": 0.5080131888389587, "learning_rate": 0.0005752587774988598, "loss": 3.3738, "step": 7969 }, { "epoch": 0.39, "grad_norm": 0.4994853138923645, "learning_rate": 0.0005752526545598291, "loss": 3.4255, "step": 7970 }, { "epoch": 0.39, "grad_norm": 0.5325427651405334, "learning_rate": 0.0005752465308958358, "loss": 3.3933, "step": 7971 }, { "epoch": 0.39, "grad_norm": 0.5056576728820801, "learning_rate": 0.0005752404065068963, "loss": 3.4345, "step": 7972 }, { "epoch": 0.39, "grad_norm": 0.5503882765769958, "learning_rate": 0.0005752342813930264, "loss": 3.1064, "step": 7973 }, { "epoch": 0.39, "grad_norm": 0.537910521030426, "learning_rate": 0.0005752281555542423, "loss": 3.264, "step": 7974 }, { "epoch": 0.39, "grad_norm": 0.5261189937591553, "learning_rate": 0.0005752220289905604, "loss": 3.375, "step": 7975 }, { "epoch": 0.39, "grad_norm": 0.5032466650009155, "learning_rate": 0.0005752159017019965, "loss": 3.2479, "step": 7976 }, { "epoch": 0.39, "grad_norm": 0.5199475884437561, "learning_rate": 0.0005752097736885669, "loss": 3.4072, "step": 7977 }, { "epoch": 0.39, "grad_norm": 0.5200490951538086, "learning_rate": 0.0005752036449502878, "loss": 3.2218, "step": 7978 }, { "epoch": 0.39, "grad_norm": 0.5323976874351501, "learning_rate": 0.0005751975154871752, "loss": 3.3384, "step": 7979 }, { "epoch": 0.39, "grad_norm": 0.5439580082893372, "learning_rate": 0.0005751913852992454, "loss": 3.3075, "step": 7980 }, { "epoch": 0.39, "grad_norm": 0.5520688891410828, "learning_rate": 0.0005751852543865143, "loss": 3.2518, "step": 7981 }, { "epoch": 0.39, "grad_norm": 0.5149745345115662, "learning_rate": 0.0005751791227489983, "loss": 3.2855, "step": 7982 }, { "epoch": 0.39, "grad_norm": 0.6031532287597656, "learning_rate": 0.0005751729903867134, "loss": 2.9604, "step": 7983 }, { "epoch": 0.39, "grad_norm": 0.5211678147315979, "learning_rate": 0.0005751668572996758, "loss": 3.5004, "step": 7984 }, { "epoch": 0.39, "grad_norm": 0.5391950011253357, "learning_rate": 0.0005751607234879017, "loss": 3.3339, "step": 7985 }, { "epoch": 0.39, "grad_norm": 0.5196065306663513, "learning_rate": 0.0005751545889514072, "loss": 3.2076, "step": 7986 }, { "epoch": 0.39, "grad_norm": 0.5143944025039673, "learning_rate": 0.0005751484536902084, "loss": 3.1234, "step": 7987 }, { "epoch": 0.39, "grad_norm": 0.5726907253265381, "learning_rate": 0.0005751423177043216, "loss": 3.3739, "step": 7988 }, { "epoch": 0.39, "grad_norm": 0.5271036028862, "learning_rate": 0.0005751361809937629, "loss": 3.4486, "step": 7989 }, { "epoch": 0.39, "grad_norm": 0.5144461393356323, "learning_rate": 0.0005751300435585483, "loss": 3.5016, "step": 7990 }, { "epoch": 0.39, "grad_norm": 0.5567069053649902, "learning_rate": 0.0005751239053986944, "loss": 3.3089, "step": 7991 }, { "epoch": 0.39, "grad_norm": 0.5412687659263611, "learning_rate": 0.0005751177665142169, "loss": 3.2718, "step": 7992 }, { "epoch": 0.39, "grad_norm": 0.5389328598976135, "learning_rate": 0.0005751116269051321, "loss": 3.1432, "step": 7993 }, { "epoch": 0.39, "grad_norm": 0.5082566142082214, "learning_rate": 0.0005751054865714562, "loss": 3.3403, "step": 7994 }, { "epoch": 0.39, "grad_norm": 0.51983243227005, "learning_rate": 0.0005750993455132055, "loss": 3.2146, "step": 7995 }, { "epoch": 0.39, "grad_norm": 0.5300633311271667, "learning_rate": 0.0005750932037303961, "loss": 3.234, "step": 7996 }, { "epoch": 0.39, "grad_norm": 0.5436065196990967, "learning_rate": 0.0005750870612230442, "loss": 3.2246, "step": 7997 }, { "epoch": 0.39, "grad_norm": 0.5409985184669495, "learning_rate": 0.0005750809179911657, "loss": 3.1161, "step": 7998 }, { "epoch": 0.39, "grad_norm": 0.5061076879501343, "learning_rate": 0.0005750747740347771, "loss": 3.2811, "step": 7999 }, { "epoch": 0.39, "grad_norm": 0.5530107617378235, "learning_rate": 0.0005750686293538945, "loss": 3.2673, "step": 8000 }, { "epoch": 0.39, "grad_norm": 0.5269939303398132, "learning_rate": 0.0005750624839485341, "loss": 3.3215, "step": 8001 }, { "epoch": 0.39, "grad_norm": 0.5110950469970703, "learning_rate": 0.000575056337818712, "loss": 3.5774, "step": 8002 }, { "epoch": 0.39, "grad_norm": 0.5932079553604126, "learning_rate": 0.0005750501909644445, "loss": 3.5213, "step": 8003 }, { "epoch": 0.39, "grad_norm": 0.5554304122924805, "learning_rate": 0.0005750440433857477, "loss": 3.2986, "step": 8004 }, { "epoch": 0.39, "grad_norm": 0.5720034241676331, "learning_rate": 0.0005750378950826378, "loss": 3.3181, "step": 8005 }, { "epoch": 0.39, "grad_norm": 0.5665091276168823, "learning_rate": 0.000575031746055131, "loss": 2.9911, "step": 8006 }, { "epoch": 0.39, "grad_norm": 0.5483243465423584, "learning_rate": 0.0005750255963032436, "loss": 3.3377, "step": 8007 }, { "epoch": 0.39, "grad_norm": 0.5081855058670044, "learning_rate": 0.0005750194458269916, "loss": 3.2402, "step": 8008 }, { "epoch": 0.39, "grad_norm": 0.5034002065658569, "learning_rate": 0.0005750132946263914, "loss": 3.2496, "step": 8009 }, { "epoch": 0.39, "grad_norm": 0.6338850259780884, "learning_rate": 0.0005750071427014591, "loss": 3.3337, "step": 8010 }, { "epoch": 0.39, "grad_norm": 0.5657280683517456, "learning_rate": 0.0005750009900522109, "loss": 3.4191, "step": 8011 }, { "epoch": 0.39, "grad_norm": 0.5098117589950562, "learning_rate": 0.000574994836678663, "loss": 3.3851, "step": 8012 }, { "epoch": 0.39, "grad_norm": 0.5298253297805786, "learning_rate": 0.0005749886825808316, "loss": 3.3719, "step": 8013 }, { "epoch": 0.39, "grad_norm": 0.5312026143074036, "learning_rate": 0.000574982527758733, "loss": 3.4547, "step": 8014 }, { "epoch": 0.39, "grad_norm": 0.5396552085876465, "learning_rate": 0.0005749763722123832, "loss": 3.2734, "step": 8015 }, { "epoch": 0.39, "grad_norm": 0.5509809255599976, "learning_rate": 0.0005749702159417988, "loss": 3.4702, "step": 8016 }, { "epoch": 0.39, "grad_norm": 0.5270836353302002, "learning_rate": 0.0005749640589469956, "loss": 3.3203, "step": 8017 }, { "epoch": 0.39, "grad_norm": 0.540736973285675, "learning_rate": 0.00057495790122799, "loss": 3.1879, "step": 8018 }, { "epoch": 0.39, "grad_norm": 0.531613290309906, "learning_rate": 0.0005749517427847982, "loss": 3.5024, "step": 8019 }, { "epoch": 0.39, "grad_norm": 0.5209075808525085, "learning_rate": 0.0005749455836174365, "loss": 3.5551, "step": 8020 }, { "epoch": 0.39, "grad_norm": 0.536415159702301, "learning_rate": 0.0005749394237259209, "loss": 3.2383, "step": 8021 }, { "epoch": 0.39, "grad_norm": 0.5413359999656677, "learning_rate": 0.0005749332631102679, "loss": 3.1601, "step": 8022 }, { "epoch": 0.39, "grad_norm": 0.5244982242584229, "learning_rate": 0.0005749271017704935, "loss": 3.2752, "step": 8023 }, { "epoch": 0.39, "grad_norm": 0.5341219305992126, "learning_rate": 0.0005749209397066142, "loss": 3.1301, "step": 8024 }, { "epoch": 0.39, "grad_norm": 0.53799968957901, "learning_rate": 0.0005749147769186459, "loss": 3.2056, "step": 8025 }, { "epoch": 0.39, "grad_norm": 0.5490437746047974, "learning_rate": 0.0005749086134066051, "loss": 3.5046, "step": 8026 }, { "epoch": 0.39, "grad_norm": 0.5136128664016724, "learning_rate": 0.0005749024491705078, "loss": 3.2401, "step": 8027 }, { "epoch": 0.39, "grad_norm": 0.566075325012207, "learning_rate": 0.0005748962842103703, "loss": 3.2781, "step": 8028 }, { "epoch": 0.39, "grad_norm": 0.5798686742782593, "learning_rate": 0.0005748901185262091, "loss": 3.3664, "step": 8029 }, { "epoch": 0.39, "grad_norm": 0.541175365447998, "learning_rate": 0.0005748839521180401, "loss": 3.4175, "step": 8030 }, { "epoch": 0.39, "grad_norm": 0.5209834575653076, "learning_rate": 0.0005748777849858797, "loss": 3.3311, "step": 8031 }, { "epoch": 0.39, "grad_norm": 0.5575250387191772, "learning_rate": 0.0005748716171297441, "loss": 3.3556, "step": 8032 }, { "epoch": 0.39, "grad_norm": 0.5445090532302856, "learning_rate": 0.0005748654485496496, "loss": 3.2137, "step": 8033 }, { "epoch": 0.39, "grad_norm": 0.5050318241119385, "learning_rate": 0.0005748592792456124, "loss": 3.5883, "step": 8034 }, { "epoch": 0.39, "grad_norm": 0.5381130576133728, "learning_rate": 0.0005748531092176487, "loss": 3.3816, "step": 8035 }, { "epoch": 0.39, "grad_norm": 0.5211299657821655, "learning_rate": 0.0005748469384657749, "loss": 3.405, "step": 8036 }, { "epoch": 0.39, "grad_norm": 0.543992817401886, "learning_rate": 0.0005748407669900071, "loss": 3.1935, "step": 8037 }, { "epoch": 0.39, "grad_norm": 0.5342070460319519, "learning_rate": 0.0005748345947903615, "loss": 3.2672, "step": 8038 }, { "epoch": 0.39, "grad_norm": 0.5374672412872314, "learning_rate": 0.0005748284218668546, "loss": 3.6805, "step": 8039 }, { "epoch": 0.39, "grad_norm": 0.5109804272651672, "learning_rate": 0.0005748222482195026, "loss": 3.3104, "step": 8040 }, { "epoch": 0.39, "grad_norm": 0.4903632402420044, "learning_rate": 0.0005748160738483216, "loss": 3.3117, "step": 8041 }, { "epoch": 0.39, "grad_norm": 0.525119423866272, "learning_rate": 0.0005748098987533279, "loss": 3.3496, "step": 8042 }, { "epoch": 0.39, "grad_norm": 0.5801721215248108, "learning_rate": 0.0005748037229345379, "loss": 3.2369, "step": 8043 }, { "epoch": 0.39, "grad_norm": 0.514396071434021, "learning_rate": 0.0005747975463919677, "loss": 3.2263, "step": 8044 }, { "epoch": 0.39, "grad_norm": 0.5463517904281616, "learning_rate": 0.0005747913691256338, "loss": 3.296, "step": 8045 }, { "epoch": 0.39, "grad_norm": 0.5206927061080933, "learning_rate": 0.0005747851911355521, "loss": 3.3216, "step": 8046 }, { "epoch": 0.39, "grad_norm": 0.5000367760658264, "learning_rate": 0.0005747790124217393, "loss": 3.2014, "step": 8047 }, { "epoch": 0.39, "grad_norm": 0.5487034320831299, "learning_rate": 0.0005747728329842113, "loss": 3.0561, "step": 8048 }, { "epoch": 0.39, "grad_norm": 0.5461079478263855, "learning_rate": 0.0005747666528229847, "loss": 3.3097, "step": 8049 }, { "epoch": 0.39, "grad_norm": 0.4785720407962799, "learning_rate": 0.0005747604719380754, "loss": 3.4331, "step": 8050 }, { "epoch": 0.39, "grad_norm": 0.5695480704307556, "learning_rate": 0.0005747542903295, "loss": 3.2418, "step": 8051 }, { "epoch": 0.39, "grad_norm": 0.5708943009376526, "learning_rate": 0.0005747481079972749, "loss": 3.3734, "step": 8052 }, { "epoch": 0.39, "grad_norm": 0.5381008386611938, "learning_rate": 0.0005747419249414159, "loss": 3.4608, "step": 8053 }, { "epoch": 0.39, "grad_norm": 0.5541656017303467, "learning_rate": 0.0005747357411619396, "loss": 3.3497, "step": 8054 }, { "epoch": 0.39, "grad_norm": 0.5573221445083618, "learning_rate": 0.0005747295566588622, "loss": 3.3627, "step": 8055 }, { "epoch": 0.39, "grad_norm": 0.5886656641960144, "learning_rate": 0.0005747233714322001, "loss": 3.399, "step": 8056 }, { "epoch": 0.39, "grad_norm": 0.5136348605155945, "learning_rate": 0.0005747171854819694, "loss": 3.3561, "step": 8057 }, { "epoch": 0.39, "grad_norm": 0.5364802479743958, "learning_rate": 0.0005747109988081865, "loss": 3.4321, "step": 8058 }, { "epoch": 0.39, "grad_norm": 0.5847378969192505, "learning_rate": 0.000574704811410868, "loss": 3.3716, "step": 8059 }, { "epoch": 0.4, "grad_norm": 0.5139754414558411, "learning_rate": 0.0005746986232900295, "loss": 3.3843, "step": 8060 }, { "epoch": 0.4, "grad_norm": 0.5183747410774231, "learning_rate": 0.0005746924344456879, "loss": 3.3141, "step": 8061 }, { "epoch": 0.4, "grad_norm": 0.5344926714897156, "learning_rate": 0.0005746862448778593, "loss": 3.4683, "step": 8062 }, { "epoch": 0.4, "grad_norm": 0.5372681021690369, "learning_rate": 0.0005746800545865599, "loss": 3.4784, "step": 8063 }, { "epoch": 0.4, "grad_norm": 0.5549917817115784, "learning_rate": 0.0005746738635718061, "loss": 3.2525, "step": 8064 }, { "epoch": 0.4, "grad_norm": 0.5214190483093262, "learning_rate": 0.0005746676718336143, "loss": 3.4208, "step": 8065 }, { "epoch": 0.4, "grad_norm": 0.5115858912467957, "learning_rate": 0.0005746614793720007, "loss": 3.1698, "step": 8066 }, { "epoch": 0.4, "grad_norm": 0.571573793888092, "learning_rate": 0.0005746552861869815, "loss": 3.3276, "step": 8067 }, { "epoch": 0.4, "grad_norm": 0.5278741121292114, "learning_rate": 0.0005746490922785733, "loss": 3.3125, "step": 8068 }, { "epoch": 0.4, "grad_norm": 0.5263270139694214, "learning_rate": 0.0005746428976467922, "loss": 3.39, "step": 8069 }, { "epoch": 0.4, "grad_norm": 0.553209125995636, "learning_rate": 0.0005746367022916545, "loss": 3.1092, "step": 8070 }, { "epoch": 0.4, "grad_norm": 0.5743881464004517, "learning_rate": 0.0005746305062131765, "loss": 3.3103, "step": 8071 }, { "epoch": 0.4, "grad_norm": 0.5375149846076965, "learning_rate": 0.0005746243094113748, "loss": 3.4892, "step": 8072 }, { "epoch": 0.4, "grad_norm": 0.5758310556411743, "learning_rate": 0.0005746181118862655, "loss": 3.199, "step": 8073 }, { "epoch": 0.4, "grad_norm": 0.522111177444458, "learning_rate": 0.0005746119136378648, "loss": 3.2247, "step": 8074 }, { "epoch": 0.4, "grad_norm": 0.5190290808677673, "learning_rate": 0.0005746057146661892, "loss": 3.1963, "step": 8075 }, { "epoch": 0.4, "grad_norm": 0.5556468367576599, "learning_rate": 0.0005745995149712552, "loss": 3.0931, "step": 8076 }, { "epoch": 0.4, "grad_norm": 0.5247904658317566, "learning_rate": 0.0005745933145530787, "loss": 3.28, "step": 8077 }, { "epoch": 0.4, "grad_norm": 0.5039224624633789, "learning_rate": 0.0005745871134116763, "loss": 3.3245, "step": 8078 }, { "epoch": 0.4, "grad_norm": 0.5405135750770569, "learning_rate": 0.0005745809115470643, "loss": 3.1882, "step": 8079 }, { "epoch": 0.4, "grad_norm": 0.5452626347541809, "learning_rate": 0.000574574708959259, "loss": 3.3268, "step": 8080 }, { "epoch": 0.4, "grad_norm": 0.5153411626815796, "learning_rate": 0.000574568505648277, "loss": 3.3195, "step": 8081 }, { "epoch": 0.4, "grad_norm": 0.5305858254432678, "learning_rate": 0.0005745623016141341, "loss": 3.4117, "step": 8082 }, { "epoch": 0.4, "grad_norm": 0.5426940321922302, "learning_rate": 0.000574556096856847, "loss": 3.1285, "step": 8083 }, { "epoch": 0.4, "grad_norm": 0.4963018596172333, "learning_rate": 0.000574549891376432, "loss": 3.2248, "step": 8084 }, { "epoch": 0.4, "grad_norm": 0.5105463266372681, "learning_rate": 0.0005745436851729055, "loss": 3.2956, "step": 8085 }, { "epoch": 0.4, "grad_norm": 0.5522632598876953, "learning_rate": 0.0005745374782462837, "loss": 3.0467, "step": 8086 }, { "epoch": 0.4, "grad_norm": 0.5475056767463684, "learning_rate": 0.000574531270596583, "loss": 3.382, "step": 8087 }, { "epoch": 0.4, "grad_norm": 0.501108705997467, "learning_rate": 0.0005745250622238198, "loss": 3.5365, "step": 8088 }, { "epoch": 0.4, "grad_norm": 0.5344222187995911, "learning_rate": 0.0005745188531280105, "loss": 3.2848, "step": 8089 }, { "epoch": 0.4, "grad_norm": 0.5425450801849365, "learning_rate": 0.0005745126433091712, "loss": 3.5062, "step": 8090 }, { "epoch": 0.4, "grad_norm": 0.5173064470291138, "learning_rate": 0.0005745064327673185, "loss": 3.1953, "step": 8091 }, { "epoch": 0.4, "grad_norm": 0.5049318671226501, "learning_rate": 0.0005745002215024687, "loss": 3.4188, "step": 8092 }, { "epoch": 0.4, "grad_norm": 0.5095267295837402, "learning_rate": 0.0005744940095146381, "loss": 3.5542, "step": 8093 }, { "epoch": 0.4, "grad_norm": 0.5197057127952576, "learning_rate": 0.0005744877968038432, "loss": 3.341, "step": 8094 }, { "epoch": 0.4, "grad_norm": 0.4955214262008667, "learning_rate": 0.0005744815833701003, "loss": 3.3152, "step": 8095 }, { "epoch": 0.4, "grad_norm": 0.5291468501091003, "learning_rate": 0.0005744753692134256, "loss": 3.3581, "step": 8096 }, { "epoch": 0.4, "grad_norm": 0.5096137523651123, "learning_rate": 0.0005744691543338357, "loss": 3.4981, "step": 8097 }, { "epoch": 0.4, "grad_norm": 0.47097456455230713, "learning_rate": 0.0005744629387313469, "loss": 3.2238, "step": 8098 }, { "epoch": 0.4, "grad_norm": 0.4965920150279999, "learning_rate": 0.0005744567224059754, "loss": 3.5343, "step": 8099 }, { "epoch": 0.4, "grad_norm": 0.5325508713722229, "learning_rate": 0.0005744505053577379, "loss": 3.2636, "step": 8100 }, { "epoch": 0.4, "grad_norm": 0.5449921488761902, "learning_rate": 0.0005744442875866504, "loss": 3.4296, "step": 8101 }, { "epoch": 0.4, "grad_norm": 0.5376118421554565, "learning_rate": 0.0005744380690927295, "loss": 3.2763, "step": 8102 }, { "epoch": 0.4, "grad_norm": 0.5433290600776672, "learning_rate": 0.0005744318498759917, "loss": 3.2533, "step": 8103 }, { "epoch": 0.4, "grad_norm": 0.5353100299835205, "learning_rate": 0.000574425629936453, "loss": 3.3458, "step": 8104 }, { "epoch": 0.4, "grad_norm": 0.5136173963546753, "learning_rate": 0.0005744194092741302, "loss": 3.3949, "step": 8105 }, { "epoch": 0.4, "grad_norm": 0.5367599725723267, "learning_rate": 0.0005744131878890394, "loss": 3.2921, "step": 8106 }, { "epoch": 0.4, "grad_norm": 0.5330642461776733, "learning_rate": 0.0005744069657811971, "loss": 3.3031, "step": 8107 }, { "epoch": 0.4, "grad_norm": 0.5547467470169067, "learning_rate": 0.0005744007429506195, "loss": 3.5355, "step": 8108 }, { "epoch": 0.4, "grad_norm": 0.47983911633491516, "learning_rate": 0.0005743945193973233, "loss": 3.4107, "step": 8109 }, { "epoch": 0.4, "grad_norm": 0.5580479502677917, "learning_rate": 0.0005743882951213247, "loss": 3.2559, "step": 8110 }, { "epoch": 0.4, "grad_norm": 0.5408802628517151, "learning_rate": 0.0005743820701226402, "loss": 3.2648, "step": 8111 }, { "epoch": 0.4, "grad_norm": 0.5535223484039307, "learning_rate": 0.0005743758444012861, "loss": 3.3305, "step": 8112 }, { "epoch": 0.4, "grad_norm": 0.5365628600120544, "learning_rate": 0.0005743696179572788, "loss": 3.4065, "step": 8113 }, { "epoch": 0.4, "grad_norm": 0.6094711422920227, "learning_rate": 0.0005743633907906348, "loss": 3.5076, "step": 8114 }, { "epoch": 0.4, "grad_norm": 0.5201326608657837, "learning_rate": 0.0005743571629013704, "loss": 3.4203, "step": 8115 }, { "epoch": 0.4, "grad_norm": 0.5079147219657898, "learning_rate": 0.000574350934289502, "loss": 3.2888, "step": 8116 }, { "epoch": 0.4, "grad_norm": 0.5209977030754089, "learning_rate": 0.0005743447049550461, "loss": 3.292, "step": 8117 }, { "epoch": 0.4, "grad_norm": 0.524752140045166, "learning_rate": 0.000574338474898019, "loss": 3.129, "step": 8118 }, { "epoch": 0.4, "grad_norm": 0.5393475890159607, "learning_rate": 0.0005743322441184372, "loss": 3.1805, "step": 8119 }, { "epoch": 0.4, "grad_norm": 0.5741223096847534, "learning_rate": 0.000574326012616317, "loss": 3.3835, "step": 8120 }, { "epoch": 0.4, "grad_norm": 0.531087338924408, "learning_rate": 0.0005743197803916749, "loss": 3.4494, "step": 8121 }, { "epoch": 0.4, "grad_norm": 0.5499753952026367, "learning_rate": 0.0005743135474445273, "loss": 3.1866, "step": 8122 }, { "epoch": 0.4, "grad_norm": 0.5074490904808044, "learning_rate": 0.0005743073137748906, "loss": 3.3362, "step": 8123 }, { "epoch": 0.4, "grad_norm": 0.4964973032474518, "learning_rate": 0.0005743010793827813, "loss": 3.1807, "step": 8124 }, { "epoch": 0.4, "grad_norm": 0.4979309141635895, "learning_rate": 0.0005742948442682157, "loss": 3.3788, "step": 8125 }, { "epoch": 0.4, "grad_norm": 0.5132046341896057, "learning_rate": 0.0005742886084312102, "loss": 3.2557, "step": 8126 }, { "epoch": 0.4, "grad_norm": 0.5504202246665955, "learning_rate": 0.0005742823718717814, "loss": 3.3969, "step": 8127 }, { "epoch": 0.4, "grad_norm": 0.5701044797897339, "learning_rate": 0.0005742761345899455, "loss": 3.2887, "step": 8128 }, { "epoch": 0.4, "grad_norm": 0.5031047463417053, "learning_rate": 0.0005742698965857192, "loss": 3.2875, "step": 8129 }, { "epoch": 0.4, "grad_norm": 0.512702465057373, "learning_rate": 0.0005742636578591186, "loss": 3.2581, "step": 8130 }, { "epoch": 0.4, "grad_norm": 0.5461293458938599, "learning_rate": 0.0005742574184101605, "loss": 3.3255, "step": 8131 }, { "epoch": 0.4, "grad_norm": 0.5259397625923157, "learning_rate": 0.000574251178238861, "loss": 2.9756, "step": 8132 }, { "epoch": 0.4, "grad_norm": 0.5428778529167175, "learning_rate": 0.0005742449373452367, "loss": 3.2215, "step": 8133 }, { "epoch": 0.4, "grad_norm": 0.5212182402610779, "learning_rate": 0.0005742386957293041, "loss": 3.4854, "step": 8134 }, { "epoch": 0.4, "grad_norm": 0.537875235080719, "learning_rate": 0.0005742324533910794, "loss": 3.467, "step": 8135 }, { "epoch": 0.4, "grad_norm": 0.5199194550514221, "learning_rate": 0.0005742262103305793, "loss": 3.432, "step": 8136 }, { "epoch": 0.4, "grad_norm": 0.498753160238266, "learning_rate": 0.0005742199665478201, "loss": 3.2888, "step": 8137 }, { "epoch": 0.4, "grad_norm": 0.5349212288856506, "learning_rate": 0.0005742137220428184, "loss": 3.3626, "step": 8138 }, { "epoch": 0.4, "grad_norm": 0.5592776536941528, "learning_rate": 0.0005742074768155903, "loss": 3.5535, "step": 8139 }, { "epoch": 0.4, "grad_norm": 0.5438594818115234, "learning_rate": 0.0005742012308661527, "loss": 3.3263, "step": 8140 }, { "epoch": 0.4, "grad_norm": 0.5591511726379395, "learning_rate": 0.0005741949841945217, "loss": 3.191, "step": 8141 }, { "epoch": 0.4, "grad_norm": 0.5086604356765747, "learning_rate": 0.000574188736800714, "loss": 3.4601, "step": 8142 }, { "epoch": 0.4, "grad_norm": 0.5508185029029846, "learning_rate": 0.0005741824886847458, "loss": 3.2594, "step": 8143 }, { "epoch": 0.4, "grad_norm": 0.5638971328735352, "learning_rate": 0.0005741762398466337, "loss": 3.366, "step": 8144 }, { "epoch": 0.4, "grad_norm": 0.5249466300010681, "learning_rate": 0.0005741699902863943, "loss": 3.3142, "step": 8145 }, { "epoch": 0.4, "grad_norm": 0.5074349045753479, "learning_rate": 0.0005741637400040438, "loss": 3.4865, "step": 8146 }, { "epoch": 0.4, "grad_norm": 0.5369219779968262, "learning_rate": 0.0005741574889995987, "loss": 3.2189, "step": 8147 }, { "epoch": 0.4, "grad_norm": 0.546882688999176, "learning_rate": 0.0005741512372730757, "loss": 3.2913, "step": 8148 }, { "epoch": 0.4, "grad_norm": 0.5176022052764893, "learning_rate": 0.000574144984824491, "loss": 3.2776, "step": 8149 }, { "epoch": 0.4, "grad_norm": 0.5151086449623108, "learning_rate": 0.0005741387316538612, "loss": 3.3433, "step": 8150 }, { "epoch": 0.4, "grad_norm": 0.5456839799880981, "learning_rate": 0.0005741324777612027, "loss": 3.3781, "step": 8151 }, { "epoch": 0.4, "grad_norm": 0.591968834400177, "learning_rate": 0.0005741262231465321, "loss": 3.3347, "step": 8152 }, { "epoch": 0.4, "grad_norm": 0.5454823970794678, "learning_rate": 0.0005741199678098657, "loss": 3.6877, "step": 8153 }, { "epoch": 0.4, "grad_norm": 0.5745152831077576, "learning_rate": 0.00057411371175122, "loss": 3.3773, "step": 8154 }, { "epoch": 0.4, "grad_norm": 0.5442945957183838, "learning_rate": 0.0005741074549706117, "loss": 3.4401, "step": 8155 }, { "epoch": 0.4, "grad_norm": 0.5125526785850525, "learning_rate": 0.000574101197468057, "loss": 3.3032, "step": 8156 }, { "epoch": 0.4, "grad_norm": 0.5886013507843018, "learning_rate": 0.0005740949392435726, "loss": 3.3589, "step": 8157 }, { "epoch": 0.4, "grad_norm": 0.5841769576072693, "learning_rate": 0.0005740886802971748, "loss": 3.1734, "step": 8158 }, { "epoch": 0.4, "grad_norm": 0.5075914263725281, "learning_rate": 0.0005740824206288801, "loss": 3.2588, "step": 8159 }, { "epoch": 0.4, "grad_norm": 0.5713216662406921, "learning_rate": 0.0005740761602387052, "loss": 3.1835, "step": 8160 }, { "epoch": 0.4, "grad_norm": 0.5094101428985596, "learning_rate": 0.0005740698991266664, "loss": 3.5436, "step": 8161 }, { "epoch": 0.4, "grad_norm": 0.49177950620651245, "learning_rate": 0.0005740636372927802, "loss": 3.3805, "step": 8162 }, { "epoch": 0.4, "grad_norm": 0.5621153116226196, "learning_rate": 0.0005740573747370632, "loss": 3.1933, "step": 8163 }, { "epoch": 0.4, "grad_norm": 0.5192760229110718, "learning_rate": 0.0005740511114595317, "loss": 3.2768, "step": 8164 }, { "epoch": 0.4, "grad_norm": 0.5471800565719604, "learning_rate": 0.0005740448474602024, "loss": 3.3567, "step": 8165 }, { "epoch": 0.4, "grad_norm": 0.541714608669281, "learning_rate": 0.0005740385827390916, "loss": 3.1748, "step": 8166 }, { "epoch": 0.4, "grad_norm": 0.4933028519153595, "learning_rate": 0.000574032317296216, "loss": 3.3155, "step": 8167 }, { "epoch": 0.4, "grad_norm": 0.5285734534263611, "learning_rate": 0.0005740260511315921, "loss": 3.0752, "step": 8168 }, { "epoch": 0.4, "grad_norm": 0.5299739837646484, "learning_rate": 0.0005740197842452362, "loss": 3.3437, "step": 8169 }, { "epoch": 0.4, "grad_norm": 0.5052409768104553, "learning_rate": 0.000574013516637165, "loss": 3.4159, "step": 8170 }, { "epoch": 0.4, "grad_norm": 0.522113561630249, "learning_rate": 0.0005740072483073948, "loss": 3.3624, "step": 8171 }, { "epoch": 0.4, "grad_norm": 0.5488013625144958, "learning_rate": 0.0005740009792559424, "loss": 3.338, "step": 8172 }, { "epoch": 0.4, "grad_norm": 0.5112448334693909, "learning_rate": 0.000573994709482824, "loss": 3.3995, "step": 8173 }, { "epoch": 0.4, "grad_norm": 0.5400604605674744, "learning_rate": 0.0005739884389880564, "loss": 3.0325, "step": 8174 }, { "epoch": 0.4, "grad_norm": 0.5453541874885559, "learning_rate": 0.000573982167771656, "loss": 3.333, "step": 8175 }, { "epoch": 0.4, "grad_norm": 0.5471153855323792, "learning_rate": 0.0005739758958336392, "loss": 3.2572, "step": 8176 }, { "epoch": 0.4, "grad_norm": 0.5193156003952026, "learning_rate": 0.0005739696231740226, "loss": 3.1392, "step": 8177 }, { "epoch": 0.4, "grad_norm": 0.5482691526412964, "learning_rate": 0.0005739633497928228, "loss": 3.2852, "step": 8178 }, { "epoch": 0.4, "grad_norm": 0.5617150664329529, "learning_rate": 0.0005739570756900563, "loss": 3.3936, "step": 8179 }, { "epoch": 0.4, "grad_norm": 0.5217198133468628, "learning_rate": 0.0005739508008657394, "loss": 3.2155, "step": 8180 }, { "epoch": 0.4, "grad_norm": 0.5516659021377563, "learning_rate": 0.000573944525319889, "loss": 3.2566, "step": 8181 }, { "epoch": 0.4, "grad_norm": 0.5190207362174988, "learning_rate": 0.0005739382490525214, "loss": 3.6173, "step": 8182 }, { "epoch": 0.4, "grad_norm": 0.5515297055244446, "learning_rate": 0.0005739319720636532, "loss": 3.1461, "step": 8183 }, { "epoch": 0.4, "grad_norm": 0.4930954873561859, "learning_rate": 0.0005739256943533007, "loss": 3.3663, "step": 8184 }, { "epoch": 0.4, "grad_norm": 0.5274080038070679, "learning_rate": 0.0005739194159214808, "loss": 3.5295, "step": 8185 }, { "epoch": 0.4, "grad_norm": 0.5591486096382141, "learning_rate": 0.00057391313676821, "loss": 3.4101, "step": 8186 }, { "epoch": 0.4, "grad_norm": 0.5552020072937012, "learning_rate": 0.0005739068568935046, "loss": 3.3131, "step": 8187 }, { "epoch": 0.4, "grad_norm": 0.5581715106964111, "learning_rate": 0.0005739005762973813, "loss": 3.1061, "step": 8188 }, { "epoch": 0.4, "grad_norm": 0.5154845714569092, "learning_rate": 0.0005738942949798564, "loss": 3.3115, "step": 8189 }, { "epoch": 0.4, "grad_norm": 0.49115267395973206, "learning_rate": 0.0005738880129409469, "loss": 3.395, "step": 8190 }, { "epoch": 0.4, "grad_norm": 0.5890899896621704, "learning_rate": 0.0005738817301806689, "loss": 3.1336, "step": 8191 }, { "epoch": 0.4, "grad_norm": 0.5730346441268921, "learning_rate": 0.0005738754466990393, "loss": 3.2823, "step": 8192 }, { "epoch": 0.4, "grad_norm": 0.5348773002624512, "learning_rate": 0.0005738691624960743, "loss": 3.3428, "step": 8193 }, { "epoch": 0.4, "grad_norm": 0.5359728336334229, "learning_rate": 0.0005738628775717907, "loss": 3.3814, "step": 8194 }, { "epoch": 0.4, "grad_norm": 0.5193948149681091, "learning_rate": 0.000573856591926205, "loss": 3.3435, "step": 8195 }, { "epoch": 0.4, "grad_norm": 0.5473117232322693, "learning_rate": 0.0005738503055593338, "loss": 3.5604, "step": 8196 }, { "epoch": 0.4, "grad_norm": 0.5555245280265808, "learning_rate": 0.0005738440184711934, "loss": 3.3051, "step": 8197 }, { "epoch": 0.4, "grad_norm": 0.6440548896789551, "learning_rate": 0.0005738377306618008, "loss": 3.357, "step": 8198 }, { "epoch": 0.4, "grad_norm": 0.4919479489326477, "learning_rate": 0.0005738314421311722, "loss": 3.1436, "step": 8199 }, { "epoch": 0.4, "grad_norm": 0.5028539299964905, "learning_rate": 0.0005738251528793244, "loss": 3.3776, "step": 8200 }, { "epoch": 0.4, "grad_norm": 0.5393859148025513, "learning_rate": 0.0005738188629062737, "loss": 3.3237, "step": 8201 }, { "epoch": 0.4, "grad_norm": 0.5415965914726257, "learning_rate": 0.0005738125722120368, "loss": 3.3788, "step": 8202 }, { "epoch": 0.4, "grad_norm": 0.5266854763031006, "learning_rate": 0.0005738062807966303, "loss": 3.3418, "step": 8203 }, { "epoch": 0.4, "grad_norm": 0.5485467314720154, "learning_rate": 0.0005737999886600709, "loss": 3.3907, "step": 8204 }, { "epoch": 0.4, "grad_norm": 0.5207034945487976, "learning_rate": 0.0005737936958023749, "loss": 3.3886, "step": 8205 }, { "epoch": 0.4, "grad_norm": 0.5115718841552734, "learning_rate": 0.0005737874022235589, "loss": 3.4131, "step": 8206 }, { "epoch": 0.4, "grad_norm": 0.52555251121521, "learning_rate": 0.0005737811079236397, "loss": 3.308, "step": 8207 }, { "epoch": 0.4, "grad_norm": 0.5632022023200989, "learning_rate": 0.0005737748129026338, "loss": 3.2812, "step": 8208 }, { "epoch": 0.4, "grad_norm": 0.5216935873031616, "learning_rate": 0.0005737685171605577, "loss": 3.3546, "step": 8209 }, { "epoch": 0.4, "grad_norm": 0.5155556201934814, "learning_rate": 0.0005737622206974279, "loss": 3.3297, "step": 8210 }, { "epoch": 0.4, "grad_norm": 0.5012245178222656, "learning_rate": 0.0005737559235132612, "loss": 3.4148, "step": 8211 }, { "epoch": 0.4, "grad_norm": 0.8418143391609192, "learning_rate": 0.000573749625608074, "loss": 3.4831, "step": 8212 }, { "epoch": 0.4, "grad_norm": 0.5532170534133911, "learning_rate": 0.0005737433269818831, "loss": 3.3624, "step": 8213 }, { "epoch": 0.4, "grad_norm": 0.5478039979934692, "learning_rate": 0.0005737370276347049, "loss": 3.1837, "step": 8214 }, { "epoch": 0.4, "grad_norm": 0.5057279467582703, "learning_rate": 0.0005737307275665561, "loss": 3.2443, "step": 8215 }, { "epoch": 0.4, "grad_norm": 0.5062552690505981, "learning_rate": 0.0005737244267774531, "loss": 3.423, "step": 8216 }, { "epoch": 0.4, "grad_norm": 0.5578494071960449, "learning_rate": 0.0005737181252674127, "loss": 3.2908, "step": 8217 }, { "epoch": 0.4, "grad_norm": 0.5365298390388489, "learning_rate": 0.0005737118230364515, "loss": 3.4519, "step": 8218 }, { "epoch": 0.4, "grad_norm": 0.4922030568122864, "learning_rate": 0.0005737055200845861, "loss": 3.3189, "step": 8219 }, { "epoch": 0.4, "grad_norm": 0.5830522775650024, "learning_rate": 0.0005736992164118328, "loss": 3.1601, "step": 8220 }, { "epoch": 0.4, "grad_norm": 0.5145552158355713, "learning_rate": 0.0005736929120182086, "loss": 3.4044, "step": 8221 }, { "epoch": 0.4, "grad_norm": 0.5255774259567261, "learning_rate": 0.0005736866069037299, "loss": 3.2494, "step": 8222 }, { "epoch": 0.4, "grad_norm": 0.518876314163208, "learning_rate": 0.0005736803010684134, "loss": 3.1429, "step": 8223 }, { "epoch": 0.4, "grad_norm": 0.49960964918136597, "learning_rate": 0.0005736739945122756, "loss": 3.1982, "step": 8224 }, { "epoch": 0.4, "grad_norm": 0.5569823980331421, "learning_rate": 0.0005736676872353331, "loss": 3.2566, "step": 8225 }, { "epoch": 0.4, "grad_norm": 0.5008841753005981, "learning_rate": 0.0005736613792376027, "loss": 3.3866, "step": 8226 }, { "epoch": 0.4, "grad_norm": 0.5104358196258545, "learning_rate": 0.0005736550705191009, "loss": 3.291, "step": 8227 }, { "epoch": 0.4, "grad_norm": 0.5353780388832092, "learning_rate": 0.0005736487610798443, "loss": 3.2402, "step": 8228 }, { "epoch": 0.4, "grad_norm": 0.5313364863395691, "learning_rate": 0.0005736424509198495, "loss": 3.5111, "step": 8229 }, { "epoch": 0.4, "grad_norm": 0.520453155040741, "learning_rate": 0.0005736361400391332, "loss": 3.4149, "step": 8230 }, { "epoch": 0.4, "grad_norm": 0.5445863008499146, "learning_rate": 0.0005736298284377119, "loss": 3.2695, "step": 8231 }, { "epoch": 0.4, "grad_norm": 0.5336853265762329, "learning_rate": 0.0005736235161156023, "loss": 3.4905, "step": 8232 }, { "epoch": 0.4, "grad_norm": 0.5520970225334167, "learning_rate": 0.000573617203072821, "loss": 3.3523, "step": 8233 }, { "epoch": 0.4, "grad_norm": 0.5282114744186401, "learning_rate": 0.0005736108893093847, "loss": 3.2539, "step": 8234 }, { "epoch": 0.4, "grad_norm": 0.560897946357727, "learning_rate": 0.00057360457482531, "loss": 3.3655, "step": 8235 }, { "epoch": 0.4, "grad_norm": 0.5395894646644592, "learning_rate": 0.0005735982596206133, "loss": 3.3413, "step": 8236 }, { "epoch": 0.4, "grad_norm": 0.5417824983596802, "learning_rate": 0.0005735919436953117, "loss": 3.2808, "step": 8237 }, { "epoch": 0.4, "grad_norm": 0.5005999207496643, "learning_rate": 0.0005735856270494214, "loss": 3.3514, "step": 8238 }, { "epoch": 0.4, "grad_norm": 0.5334196090698242, "learning_rate": 0.0005735793096829593, "loss": 3.4646, "step": 8239 }, { "epoch": 0.4, "grad_norm": 0.571017861366272, "learning_rate": 0.0005735729915959419, "loss": 3.3096, "step": 8240 }, { "epoch": 0.4, "grad_norm": 0.560081422328949, "learning_rate": 0.0005735666727883861, "loss": 3.3187, "step": 8241 }, { "epoch": 0.4, "grad_norm": 0.5151913166046143, "learning_rate": 0.000573560353260308, "loss": 3.3688, "step": 8242 }, { "epoch": 0.4, "grad_norm": 0.5449792742729187, "learning_rate": 0.0005735540330117247, "loss": 3.1677, "step": 8243 }, { "epoch": 0.4, "grad_norm": 0.530566394329071, "learning_rate": 0.0005735477120426528, "loss": 3.0801, "step": 8244 }, { "epoch": 0.4, "grad_norm": 0.5325320363044739, "learning_rate": 0.0005735413903531089, "loss": 3.2273, "step": 8245 }, { "epoch": 0.4, "grad_norm": 0.5118095874786377, "learning_rate": 0.0005735350679431095, "loss": 3.2628, "step": 8246 }, { "epoch": 0.4, "grad_norm": 0.5123176574707031, "learning_rate": 0.0005735287448126714, "loss": 3.2, "step": 8247 }, { "epoch": 0.4, "grad_norm": 0.5000588297843933, "learning_rate": 0.0005735224209618113, "loss": 3.2256, "step": 8248 }, { "epoch": 0.4, "grad_norm": 0.6550585627555847, "learning_rate": 0.0005735160963905458, "loss": 3.3236, "step": 8249 }, { "epoch": 0.4, "grad_norm": 0.5188483595848083, "learning_rate": 0.0005735097710988914, "loss": 3.2484, "step": 8250 }, { "epoch": 0.4, "grad_norm": 0.5245037078857422, "learning_rate": 0.000573503445086865, "loss": 3.1947, "step": 8251 }, { "epoch": 0.4, "grad_norm": 0.5324363708496094, "learning_rate": 0.0005734971183544832, "loss": 3.2969, "step": 8252 }, { "epoch": 0.4, "grad_norm": 0.5693351626396179, "learning_rate": 0.0005734907909017624, "loss": 3.4193, "step": 8253 }, { "epoch": 0.4, "grad_norm": 0.5394246578216553, "learning_rate": 0.0005734844627287197, "loss": 3.3065, "step": 8254 }, { "epoch": 0.4, "grad_norm": 0.5450414419174194, "learning_rate": 0.0005734781338353716, "loss": 3.4544, "step": 8255 }, { "epoch": 0.4, "grad_norm": 0.5223867297172546, "learning_rate": 0.0005734718042217345, "loss": 3.3402, "step": 8256 }, { "epoch": 0.4, "grad_norm": 0.5268417000770569, "learning_rate": 0.0005734654738878256, "loss": 3.254, "step": 8257 }, { "epoch": 0.4, "grad_norm": 0.5623879432678223, "learning_rate": 0.0005734591428336611, "loss": 3.2588, "step": 8258 }, { "epoch": 0.4, "grad_norm": 0.5085955858230591, "learning_rate": 0.0005734528110592577, "loss": 3.3621, "step": 8259 }, { "epoch": 0.4, "grad_norm": 0.5223268270492554, "learning_rate": 0.0005734464785646325, "loss": 3.201, "step": 8260 }, { "epoch": 0.4, "grad_norm": 0.4991510808467865, "learning_rate": 0.0005734401453498018, "loss": 3.2424, "step": 8261 }, { "epoch": 0.4, "grad_norm": 0.5398638248443604, "learning_rate": 0.0005734338114147823, "loss": 3.4962, "step": 8262 }, { "epoch": 0.4, "grad_norm": 0.5050841569900513, "learning_rate": 0.0005734274767595908, "loss": 3.4429, "step": 8263 }, { "epoch": 0.4, "grad_norm": 0.5685188174247742, "learning_rate": 0.000573421141384244, "loss": 3.1578, "step": 8264 }, { "epoch": 0.41, "grad_norm": 0.5070170164108276, "learning_rate": 0.0005734148052887585, "loss": 3.3676, "step": 8265 }, { "epoch": 0.41, "grad_norm": 0.5665714144706726, "learning_rate": 0.0005734084684731511, "loss": 3.2815, "step": 8266 }, { "epoch": 0.41, "grad_norm": 0.498685359954834, "learning_rate": 0.0005734021309374383, "loss": 3.3352, "step": 8267 }, { "epoch": 0.41, "grad_norm": 0.509119987487793, "learning_rate": 0.000573395792681637, "loss": 3.2356, "step": 8268 }, { "epoch": 0.41, "grad_norm": 0.5538164973258972, "learning_rate": 0.0005733894537057638, "loss": 3.2961, "step": 8269 }, { "epoch": 0.41, "grad_norm": 0.5612331628799438, "learning_rate": 0.0005733831140098353, "loss": 3.3455, "step": 8270 }, { "epoch": 0.41, "grad_norm": 0.5053423047065735, "learning_rate": 0.0005733767735938683, "loss": 3.3879, "step": 8271 }, { "epoch": 0.41, "grad_norm": 0.56037837266922, "learning_rate": 0.0005733704324578795, "loss": 3.0991, "step": 8272 }, { "epoch": 0.41, "grad_norm": 0.5380472540855408, "learning_rate": 0.0005733640906018856, "loss": 3.2985, "step": 8273 }, { "epoch": 0.41, "grad_norm": 0.5246527791023254, "learning_rate": 0.0005733577480259034, "loss": 3.3638, "step": 8274 }, { "epoch": 0.41, "grad_norm": 0.5476314425468445, "learning_rate": 0.0005733514047299494, "loss": 3.4797, "step": 8275 }, { "epoch": 0.41, "grad_norm": 0.5380063652992249, "learning_rate": 0.0005733450607140404, "loss": 3.3618, "step": 8276 }, { "epoch": 0.41, "grad_norm": 0.5321431159973145, "learning_rate": 0.0005733387159781931, "loss": 3.3288, "step": 8277 }, { "epoch": 0.41, "grad_norm": 0.6225263476371765, "learning_rate": 0.0005733323705224243, "loss": 3.3563, "step": 8278 }, { "epoch": 0.41, "grad_norm": 0.5668192505836487, "learning_rate": 0.0005733260243467506, "loss": 3.2277, "step": 8279 }, { "epoch": 0.41, "grad_norm": 0.5005346536636353, "learning_rate": 0.0005733196774511888, "loss": 3.4365, "step": 8280 }, { "epoch": 0.41, "grad_norm": 0.597763180732727, "learning_rate": 0.0005733133298357555, "loss": 3.102, "step": 8281 }, { "epoch": 0.41, "grad_norm": 0.5184804797172546, "learning_rate": 0.0005733069815004675, "loss": 3.5517, "step": 8282 }, { "epoch": 0.41, "grad_norm": 0.540427565574646, "learning_rate": 0.0005733006324453415, "loss": 3.3953, "step": 8283 }, { "epoch": 0.41, "grad_norm": 0.595478355884552, "learning_rate": 0.0005732942826703943, "loss": 3.3344, "step": 8284 }, { "epoch": 0.41, "grad_norm": 0.551795482635498, "learning_rate": 0.0005732879321756426, "loss": 3.3396, "step": 8285 }, { "epoch": 0.41, "grad_norm": 0.5238455533981323, "learning_rate": 0.0005732815809611029, "loss": 3.5336, "step": 8286 }, { "epoch": 0.41, "grad_norm": 0.5102473497390747, "learning_rate": 0.0005732752290267921, "loss": 3.5597, "step": 8287 }, { "epoch": 0.41, "grad_norm": 0.5258164405822754, "learning_rate": 0.0005732688763727273, "loss": 3.4711, "step": 8288 }, { "epoch": 0.41, "grad_norm": 0.5059826970100403, "learning_rate": 0.0005732625229989245, "loss": 3.2522, "step": 8289 }, { "epoch": 0.41, "grad_norm": 0.5301188826560974, "learning_rate": 0.0005732561689054009, "loss": 3.5621, "step": 8290 }, { "epoch": 0.41, "grad_norm": 0.5123043060302734, "learning_rate": 0.0005732498140921732, "loss": 3.4555, "step": 8291 }, { "epoch": 0.41, "grad_norm": 0.5111595988273621, "learning_rate": 0.000573243458559258, "loss": 3.3539, "step": 8292 }, { "epoch": 0.41, "grad_norm": 0.6053318977355957, "learning_rate": 0.0005732371023066721, "loss": 3.4818, "step": 8293 }, { "epoch": 0.41, "grad_norm": 0.5144967436790466, "learning_rate": 0.0005732307453344323, "loss": 3.4808, "step": 8294 }, { "epoch": 0.41, "grad_norm": 0.5194787383079529, "learning_rate": 0.0005732243876425554, "loss": 3.1705, "step": 8295 }, { "epoch": 0.41, "grad_norm": 0.5327216386795044, "learning_rate": 0.0005732180292310579, "loss": 3.1993, "step": 8296 }, { "epoch": 0.41, "grad_norm": 0.49364081025123596, "learning_rate": 0.0005732116700999568, "loss": 3.4751, "step": 8297 }, { "epoch": 0.41, "grad_norm": 0.5248584747314453, "learning_rate": 0.0005732053102492687, "loss": 3.7122, "step": 8298 }, { "epoch": 0.41, "grad_norm": 0.5066429376602173, "learning_rate": 0.0005731989496790105, "loss": 3.073, "step": 8299 }, { "epoch": 0.41, "grad_norm": 0.5377814173698425, "learning_rate": 0.0005731925883891986, "loss": 3.3839, "step": 8300 }, { "epoch": 0.41, "grad_norm": 0.5316858291625977, "learning_rate": 0.0005731862263798502, "loss": 3.0971, "step": 8301 }, { "epoch": 0.41, "grad_norm": 0.5127855539321899, "learning_rate": 0.0005731798636509817, "loss": 3.1861, "step": 8302 }, { "epoch": 0.41, "grad_norm": 0.5574333071708679, "learning_rate": 0.0005731735002026102, "loss": 3.3133, "step": 8303 }, { "epoch": 0.41, "grad_norm": 0.5213868021965027, "learning_rate": 0.0005731671360347521, "loss": 3.1549, "step": 8304 }, { "epoch": 0.41, "grad_norm": 0.5839649438858032, "learning_rate": 0.0005731607711474244, "loss": 3.3224, "step": 8305 }, { "epoch": 0.41, "grad_norm": 0.524940013885498, "learning_rate": 0.0005731544055406439, "loss": 3.5019, "step": 8306 }, { "epoch": 0.41, "grad_norm": 0.5284970998764038, "learning_rate": 0.0005731480392144272, "loss": 3.2947, "step": 8307 }, { "epoch": 0.41, "grad_norm": 0.5189017057418823, "learning_rate": 0.000573141672168791, "loss": 3.0949, "step": 8308 }, { "epoch": 0.41, "grad_norm": 0.5156539678573608, "learning_rate": 0.0005731353044037524, "loss": 3.3861, "step": 8309 }, { "epoch": 0.41, "grad_norm": 0.49928221106529236, "learning_rate": 0.0005731289359193278, "loss": 3.4869, "step": 8310 }, { "epoch": 0.41, "grad_norm": 0.546218991279602, "learning_rate": 0.0005731225667155343, "loss": 3.2345, "step": 8311 }, { "epoch": 0.41, "grad_norm": 0.5048373937606812, "learning_rate": 0.0005731161967923885, "loss": 3.1884, "step": 8312 }, { "epoch": 0.41, "grad_norm": 0.5357444286346436, "learning_rate": 0.0005731098261499071, "loss": 3.3281, "step": 8313 }, { "epoch": 0.41, "grad_norm": 0.5371036529541016, "learning_rate": 0.0005731034547881071, "loss": 3.3279, "step": 8314 }, { "epoch": 0.41, "grad_norm": 0.539368748664856, "learning_rate": 0.000573097082707005, "loss": 3.4323, "step": 8315 }, { "epoch": 0.41, "grad_norm": 0.48763561248779297, "learning_rate": 0.0005730907099066179, "loss": 3.5516, "step": 8316 }, { "epoch": 0.41, "grad_norm": 0.5571647882461548, "learning_rate": 0.0005730843363869624, "loss": 3.4282, "step": 8317 }, { "epoch": 0.41, "grad_norm": 0.51971834897995, "learning_rate": 0.0005730779621480552, "loss": 3.2306, "step": 8318 }, { "epoch": 0.41, "grad_norm": 0.5173705816268921, "learning_rate": 0.0005730715871899133, "loss": 3.3458, "step": 8319 }, { "epoch": 0.41, "grad_norm": 0.5432358980178833, "learning_rate": 0.0005730652115125533, "loss": 3.3075, "step": 8320 }, { "epoch": 0.41, "grad_norm": 0.5046690106391907, "learning_rate": 0.0005730588351159922, "loss": 3.2926, "step": 8321 }, { "epoch": 0.41, "grad_norm": 0.5098605155944824, "learning_rate": 0.0005730524580002468, "loss": 3.21, "step": 8322 }, { "epoch": 0.41, "grad_norm": 0.5081766247749329, "learning_rate": 0.0005730460801653335, "loss": 3.401, "step": 8323 }, { "epoch": 0.41, "grad_norm": 0.5278263092041016, "learning_rate": 0.0005730397016112694, "loss": 3.2034, "step": 8324 }, { "epoch": 0.41, "grad_norm": 0.5464687347412109, "learning_rate": 0.0005730333223380714, "loss": 3.4792, "step": 8325 }, { "epoch": 0.41, "grad_norm": 0.5445109009742737, "learning_rate": 0.0005730269423457561, "loss": 3.5362, "step": 8326 }, { "epoch": 0.41, "grad_norm": 0.5181647539138794, "learning_rate": 0.0005730205616343405, "loss": 3.1952, "step": 8327 }, { "epoch": 0.41, "grad_norm": 0.5573515892028809, "learning_rate": 0.0005730141802038413, "loss": 3.1039, "step": 8328 }, { "epoch": 0.41, "grad_norm": 0.5178354978561401, "learning_rate": 0.0005730077980542751, "loss": 3.536, "step": 8329 }, { "epoch": 0.41, "grad_norm": 0.5695675015449524, "learning_rate": 0.000573001415185659, "loss": 3.3917, "step": 8330 }, { "epoch": 0.41, "grad_norm": 0.5261296629905701, "learning_rate": 0.0005729950315980098, "loss": 3.4801, "step": 8331 }, { "epoch": 0.41, "grad_norm": 0.5165058374404907, "learning_rate": 0.0005729886472913441, "loss": 3.3878, "step": 8332 }, { "epoch": 0.41, "grad_norm": 0.513191819190979, "learning_rate": 0.0005729822622656788, "loss": 3.4338, "step": 8333 }, { "epoch": 0.41, "grad_norm": 0.5528197884559631, "learning_rate": 0.0005729758765210309, "loss": 3.5439, "step": 8334 }, { "epoch": 0.41, "grad_norm": 0.5654256343841553, "learning_rate": 0.000572969490057417, "loss": 3.2453, "step": 8335 }, { "epoch": 0.41, "grad_norm": 0.5060741901397705, "learning_rate": 0.0005729631028748539, "loss": 3.3054, "step": 8336 }, { "epoch": 0.41, "grad_norm": 0.5616364479064941, "learning_rate": 0.0005729567149733587, "loss": 3.2475, "step": 8337 }, { "epoch": 0.41, "grad_norm": 0.4996776878833771, "learning_rate": 0.0005729503263529479, "loss": 3.3329, "step": 8338 }, { "epoch": 0.41, "grad_norm": 0.519364595413208, "learning_rate": 0.0005729439370136387, "loss": 3.1648, "step": 8339 }, { "epoch": 0.41, "grad_norm": 0.5395146012306213, "learning_rate": 0.0005729375469554474, "loss": 3.2874, "step": 8340 }, { "epoch": 0.41, "grad_norm": 0.5185738205909729, "learning_rate": 0.0005729311561783913, "loss": 3.092, "step": 8341 }, { "epoch": 0.41, "grad_norm": 0.5481864809989929, "learning_rate": 0.0005729247646824871, "loss": 3.2024, "step": 8342 }, { "epoch": 0.41, "grad_norm": 0.5273303389549255, "learning_rate": 0.0005729183724677515, "loss": 3.1049, "step": 8343 }, { "epoch": 0.41, "grad_norm": 0.5579976439476013, "learning_rate": 0.0005729119795342014, "loss": 3.2024, "step": 8344 }, { "epoch": 0.41, "grad_norm": 0.5214023590087891, "learning_rate": 0.0005729055858818537, "loss": 3.4417, "step": 8345 }, { "epoch": 0.41, "grad_norm": 0.5415693521499634, "learning_rate": 0.0005728991915107252, "loss": 3.4384, "step": 8346 }, { "epoch": 0.41, "grad_norm": 0.5267033576965332, "learning_rate": 0.0005728927964208329, "loss": 3.1189, "step": 8347 }, { "epoch": 0.41, "grad_norm": 0.5356072783470154, "learning_rate": 0.0005728864006121934, "loss": 3.3506, "step": 8348 }, { "epoch": 0.41, "grad_norm": 0.5404013395309448, "learning_rate": 0.0005728800040848235, "loss": 3.3664, "step": 8349 }, { "epoch": 0.41, "grad_norm": 0.5271034836769104, "learning_rate": 0.0005728736068387404, "loss": 3.3377, "step": 8350 }, { "epoch": 0.41, "grad_norm": 0.5246227383613586, "learning_rate": 0.0005728672088739606, "loss": 3.4265, "step": 8351 }, { "epoch": 0.41, "grad_norm": 0.4726375937461853, "learning_rate": 0.0005728608101905012, "loss": 3.2613, "step": 8352 }, { "epoch": 0.41, "grad_norm": 0.5876821875572205, "learning_rate": 0.0005728544107883788, "loss": 3.246, "step": 8353 }, { "epoch": 0.41, "grad_norm": 0.5646312832832336, "learning_rate": 0.0005728480106676105, "loss": 3.5403, "step": 8354 }, { "epoch": 0.41, "grad_norm": 0.5237587690353394, "learning_rate": 0.000572841609828213, "loss": 3.3111, "step": 8355 }, { "epoch": 0.41, "grad_norm": 0.5066969394683838, "learning_rate": 0.0005728352082702032, "loss": 3.2042, "step": 8356 }, { "epoch": 0.41, "grad_norm": 0.5408262610435486, "learning_rate": 0.000572828805993598, "loss": 3.1741, "step": 8357 }, { "epoch": 0.41, "grad_norm": 0.4968174695968628, "learning_rate": 0.0005728224029984142, "loss": 3.2532, "step": 8358 }, { "epoch": 0.41, "grad_norm": 0.5312142372131348, "learning_rate": 0.0005728159992846687, "loss": 3.1769, "step": 8359 }, { "epoch": 0.41, "grad_norm": 0.5489519834518433, "learning_rate": 0.0005728095948523784, "loss": 3.2044, "step": 8360 }, { "epoch": 0.41, "grad_norm": 0.5215625166893005, "learning_rate": 0.0005728031897015601, "loss": 3.2395, "step": 8361 }, { "epoch": 0.41, "grad_norm": 0.4845183491706848, "learning_rate": 0.0005727967838322307, "loss": 3.3525, "step": 8362 }, { "epoch": 0.41, "grad_norm": 0.5335796475410461, "learning_rate": 0.0005727903772444071, "loss": 3.3049, "step": 8363 }, { "epoch": 0.41, "grad_norm": 0.5164490342140198, "learning_rate": 0.0005727839699381062, "loss": 3.1817, "step": 8364 }, { "epoch": 0.41, "grad_norm": 0.5204142332077026, "learning_rate": 0.0005727775619133446, "loss": 3.0639, "step": 8365 }, { "epoch": 0.41, "grad_norm": 0.5255699753761292, "learning_rate": 0.0005727711531701396, "loss": 3.285, "step": 8366 }, { "epoch": 0.41, "grad_norm": 0.527921736240387, "learning_rate": 0.0005727647437085078, "loss": 3.4429, "step": 8367 }, { "epoch": 0.41, "grad_norm": 0.5462525486946106, "learning_rate": 0.0005727583335284662, "loss": 3.5266, "step": 8368 }, { "epoch": 0.41, "grad_norm": 0.5473728179931641, "learning_rate": 0.0005727519226300317, "loss": 3.3949, "step": 8369 }, { "epoch": 0.41, "grad_norm": 0.5657687187194824, "learning_rate": 0.000572745511013221, "loss": 3.1723, "step": 8370 }, { "epoch": 0.41, "grad_norm": 0.5343332886695862, "learning_rate": 0.0005727390986780513, "loss": 3.2042, "step": 8371 }, { "epoch": 0.41, "grad_norm": 0.4889446794986725, "learning_rate": 0.0005727326856245391, "loss": 3.2929, "step": 8372 }, { "epoch": 0.41, "grad_norm": 0.5153390169143677, "learning_rate": 0.0005727262718527014, "loss": 3.5404, "step": 8373 }, { "epoch": 0.41, "grad_norm": 0.5464727282524109, "learning_rate": 0.0005727198573625555, "loss": 3.449, "step": 8374 }, { "epoch": 0.41, "grad_norm": 0.5315839648246765, "learning_rate": 0.0005727134421541179, "loss": 3.2474, "step": 8375 }, { "epoch": 0.41, "grad_norm": 0.5293451547622681, "learning_rate": 0.0005727070262274054, "loss": 3.3761, "step": 8376 }, { "epoch": 0.41, "grad_norm": 0.5146985054016113, "learning_rate": 0.0005727006095824352, "loss": 3.2667, "step": 8377 }, { "epoch": 0.41, "grad_norm": 0.5047792196273804, "learning_rate": 0.0005726941922192242, "loss": 3.4058, "step": 8378 }, { "epoch": 0.41, "grad_norm": 0.5004801154136658, "learning_rate": 0.0005726877741377889, "loss": 3.2643, "step": 8379 }, { "epoch": 0.41, "grad_norm": 0.5195822715759277, "learning_rate": 0.0005726813553381467, "loss": 3.354, "step": 8380 }, { "epoch": 0.41, "grad_norm": 0.5108522772789001, "learning_rate": 0.0005726749358203142, "loss": 3.2618, "step": 8381 }, { "epoch": 0.41, "grad_norm": 0.5223830342292786, "learning_rate": 0.0005726685155843083, "loss": 3.3946, "step": 8382 }, { "epoch": 0.41, "grad_norm": 0.49974021315574646, "learning_rate": 0.0005726620946301462, "loss": 3.1381, "step": 8383 }, { "epoch": 0.41, "grad_norm": 0.5291284918785095, "learning_rate": 0.0005726556729578445, "loss": 3.3743, "step": 8384 }, { "epoch": 0.41, "grad_norm": 0.5420185327529907, "learning_rate": 0.0005726492505674204, "loss": 3.245, "step": 8385 }, { "epoch": 0.41, "grad_norm": 0.5729690790176392, "learning_rate": 0.0005726428274588904, "loss": 3.1877, "step": 8386 }, { "epoch": 0.41, "grad_norm": 0.5272477269172668, "learning_rate": 0.0005726364036322718, "loss": 3.1366, "step": 8387 }, { "epoch": 0.41, "grad_norm": 0.5126119256019592, "learning_rate": 0.0005726299790875814, "loss": 3.2684, "step": 8388 }, { "epoch": 0.41, "grad_norm": 0.5218641757965088, "learning_rate": 0.000572623553824836, "loss": 3.1348, "step": 8389 }, { "epoch": 0.41, "grad_norm": 0.505780041217804, "learning_rate": 0.0005726171278440527, "loss": 3.2774, "step": 8390 }, { "epoch": 0.41, "grad_norm": 0.5334300994873047, "learning_rate": 0.0005726107011452484, "loss": 3.3003, "step": 8391 }, { "epoch": 0.41, "grad_norm": 0.5299844741821289, "learning_rate": 0.0005726042737284399, "loss": 3.1663, "step": 8392 }, { "epoch": 0.41, "grad_norm": 0.5310911536216736, "learning_rate": 0.0005725978455936443, "loss": 3.3384, "step": 8393 }, { "epoch": 0.41, "grad_norm": 0.5203625559806824, "learning_rate": 0.0005725914167408784, "loss": 3.3432, "step": 8394 }, { "epoch": 0.41, "grad_norm": 0.5236132144927979, "learning_rate": 0.000572584987170159, "loss": 3.2808, "step": 8395 }, { "epoch": 0.41, "grad_norm": 0.5143657326698303, "learning_rate": 0.0005725785568815034, "loss": 3.4455, "step": 8396 }, { "epoch": 0.41, "grad_norm": 0.5301834344863892, "learning_rate": 0.0005725721258749282, "loss": 3.3219, "step": 8397 }, { "epoch": 0.41, "grad_norm": 0.5046020150184631, "learning_rate": 0.0005725656941504507, "loss": 3.4859, "step": 8398 }, { "epoch": 0.41, "grad_norm": 0.49806228280067444, "learning_rate": 0.0005725592617080873, "loss": 3.3874, "step": 8399 }, { "epoch": 0.41, "grad_norm": 0.5187866687774658, "learning_rate": 0.0005725528285478555, "loss": 3.5443, "step": 8400 }, { "epoch": 0.41, "grad_norm": 0.5415318608283997, "learning_rate": 0.000572546394669772, "loss": 3.2546, "step": 8401 }, { "epoch": 0.41, "grad_norm": 0.4997556209564209, "learning_rate": 0.0005725399600738537, "loss": 3.4427, "step": 8402 }, { "epoch": 0.41, "grad_norm": 0.5891625285148621, "learning_rate": 0.0005725335247601176, "loss": 3.5401, "step": 8403 }, { "epoch": 0.41, "grad_norm": 0.6055923104286194, "learning_rate": 0.0005725270887285806, "loss": 3.1657, "step": 8404 }, { "epoch": 0.41, "grad_norm": 0.5357984304428101, "learning_rate": 0.0005725206519792597, "loss": 3.3858, "step": 8405 }, { "epoch": 0.41, "grad_norm": 0.5380067825317383, "learning_rate": 0.0005725142145121719, "loss": 3.2251, "step": 8406 }, { "epoch": 0.41, "grad_norm": 0.5393832325935364, "learning_rate": 0.0005725077763273341, "loss": 3.3881, "step": 8407 }, { "epoch": 0.41, "grad_norm": 0.4998052716255188, "learning_rate": 0.0005725013374247633, "loss": 3.5416, "step": 8408 }, { "epoch": 0.41, "grad_norm": 0.5088112950325012, "learning_rate": 0.0005724948978044763, "loss": 3.5218, "step": 8409 }, { "epoch": 0.41, "grad_norm": 0.5030485987663269, "learning_rate": 0.0005724884574664903, "loss": 3.4079, "step": 8410 }, { "epoch": 0.41, "grad_norm": 0.5525428652763367, "learning_rate": 0.000572482016410822, "loss": 3.2823, "step": 8411 }, { "epoch": 0.41, "grad_norm": 0.5205178260803223, "learning_rate": 0.0005724755746374887, "loss": 3.3983, "step": 8412 }, { "epoch": 0.41, "grad_norm": 0.5077677369117737, "learning_rate": 0.0005724691321465071, "loss": 3.2847, "step": 8413 }, { "epoch": 0.41, "grad_norm": 0.568877100944519, "learning_rate": 0.0005724626889378942, "loss": 3.323, "step": 8414 }, { "epoch": 0.41, "grad_norm": 0.563330352306366, "learning_rate": 0.0005724562450116669, "loss": 3.1247, "step": 8415 }, { "epoch": 0.41, "grad_norm": 0.5267123579978943, "learning_rate": 0.0005724498003678425, "loss": 3.296, "step": 8416 }, { "epoch": 0.41, "grad_norm": 0.483675479888916, "learning_rate": 0.0005724433550064376, "loss": 3.4229, "step": 8417 }, { "epoch": 0.41, "grad_norm": 0.5056666731834412, "learning_rate": 0.0005724369089274695, "loss": 3.3706, "step": 8418 }, { "epoch": 0.41, "grad_norm": 0.5181419253349304, "learning_rate": 0.000572430462130955, "loss": 3.2534, "step": 8419 }, { "epoch": 0.41, "grad_norm": 0.502122700214386, "learning_rate": 0.000572424014616911, "loss": 3.5067, "step": 8420 }, { "epoch": 0.41, "grad_norm": 0.5571039319038391, "learning_rate": 0.0005724175663853545, "loss": 2.9924, "step": 8421 }, { "epoch": 0.41, "grad_norm": 0.5149720907211304, "learning_rate": 0.0005724111174363026, "loss": 3.1874, "step": 8422 }, { "epoch": 0.41, "grad_norm": 0.5191240310668945, "learning_rate": 0.0005724046677697724, "loss": 3.4348, "step": 8423 }, { "epoch": 0.41, "grad_norm": 0.5127260684967041, "learning_rate": 0.0005723982173857806, "loss": 3.1189, "step": 8424 }, { "epoch": 0.41, "grad_norm": 0.5314476490020752, "learning_rate": 0.0005723917662843444, "loss": 3.1581, "step": 8425 }, { "epoch": 0.41, "grad_norm": 0.5084034204483032, "learning_rate": 0.0005723853144654806, "loss": 3.2397, "step": 8426 }, { "epoch": 0.41, "grad_norm": 0.4968039095401764, "learning_rate": 0.0005723788619292064, "loss": 3.2453, "step": 8427 }, { "epoch": 0.41, "grad_norm": 0.5596404671669006, "learning_rate": 0.0005723724086755386, "loss": 3.329, "step": 8428 }, { "epoch": 0.41, "grad_norm": 0.5200359225273132, "learning_rate": 0.0005723659547044944, "loss": 3.4476, "step": 8429 }, { "epoch": 0.41, "grad_norm": 0.48586541414260864, "learning_rate": 0.0005723595000160906, "loss": 3.3514, "step": 8430 }, { "epoch": 0.41, "grad_norm": 0.5171741843223572, "learning_rate": 0.0005723530446103443, "loss": 3.2959, "step": 8431 }, { "epoch": 0.41, "grad_norm": 0.4921620190143585, "learning_rate": 0.0005723465884872726, "loss": 3.3176, "step": 8432 }, { "epoch": 0.41, "grad_norm": 0.5125603675842285, "learning_rate": 0.0005723401316468923, "loss": 3.377, "step": 8433 }, { "epoch": 0.41, "grad_norm": 0.5400583744049072, "learning_rate": 0.0005723336740892206, "loss": 3.1951, "step": 8434 }, { "epoch": 0.41, "grad_norm": 0.5424138903617859, "learning_rate": 0.0005723272158142744, "loss": 3.1451, "step": 8435 }, { "epoch": 0.41, "grad_norm": 0.5870009064674377, "learning_rate": 0.0005723207568220707, "loss": 3.5986, "step": 8436 }, { "epoch": 0.41, "grad_norm": 0.5539098381996155, "learning_rate": 0.0005723142971126265, "loss": 3.2467, "step": 8437 }, { "epoch": 0.41, "grad_norm": 0.520626425743103, "learning_rate": 0.0005723078366859588, "loss": 3.4634, "step": 8438 }, { "epoch": 0.41, "grad_norm": 0.501064658164978, "learning_rate": 0.0005723013755420847, "loss": 3.1619, "step": 8439 }, { "epoch": 0.41, "grad_norm": 0.5047374963760376, "learning_rate": 0.0005722949136810212, "loss": 3.2784, "step": 8440 }, { "epoch": 0.41, "grad_norm": 0.5190777778625488, "learning_rate": 0.0005722884511027853, "loss": 3.4443, "step": 8441 }, { "epoch": 0.41, "grad_norm": 0.5325513482093811, "learning_rate": 0.0005722819878073942, "loss": 3.2189, "step": 8442 }, { "epoch": 0.41, "grad_norm": 0.5083960294723511, "learning_rate": 0.0005722755237948645, "loss": 3.4314, "step": 8443 }, { "epoch": 0.41, "grad_norm": 0.5294110178947449, "learning_rate": 0.0005722690590652136, "loss": 3.4491, "step": 8444 }, { "epoch": 0.41, "grad_norm": 0.5367835164070129, "learning_rate": 0.0005722625936184583, "loss": 3.3437, "step": 8445 }, { "epoch": 0.41, "grad_norm": 0.5289018750190735, "learning_rate": 0.0005722561274546158, "loss": 3.4518, "step": 8446 }, { "epoch": 0.41, "grad_norm": 0.5518658757209778, "learning_rate": 0.0005722496605737031, "loss": 3.4819, "step": 8447 }, { "epoch": 0.41, "grad_norm": 0.5337439179420471, "learning_rate": 0.0005722431929757371, "loss": 3.3336, "step": 8448 }, { "epoch": 0.41, "grad_norm": 0.5443679094314575, "learning_rate": 0.0005722367246607349, "loss": 3.2325, "step": 8449 }, { "epoch": 0.41, "grad_norm": 0.5037354230880737, "learning_rate": 0.0005722302556287137, "loss": 3.1895, "step": 8450 }, { "epoch": 0.41, "grad_norm": 0.51902174949646, "learning_rate": 0.0005722237858796903, "loss": 3.3569, "step": 8451 }, { "epoch": 0.41, "grad_norm": 0.5206155776977539, "learning_rate": 0.0005722173154136818, "loss": 3.2948, "step": 8452 }, { "epoch": 0.41, "grad_norm": 0.5317664742469788, "learning_rate": 0.0005722108442307054, "loss": 3.4704, "step": 8453 }, { "epoch": 0.41, "grad_norm": 0.527557909488678, "learning_rate": 0.000572204372330778, "loss": 3.2911, "step": 8454 }, { "epoch": 0.41, "grad_norm": 0.5252202153205872, "learning_rate": 0.0005721978997139165, "loss": 3.3554, "step": 8455 }, { "epoch": 0.41, "grad_norm": 0.5341652035713196, "learning_rate": 0.0005721914263801382, "loss": 3.0762, "step": 8456 }, { "epoch": 0.41, "grad_norm": 0.5101057887077332, "learning_rate": 0.0005721849523294602, "loss": 3.3628, "step": 8457 }, { "epoch": 0.41, "grad_norm": 0.5079684853553772, "learning_rate": 0.0005721784775618993, "loss": 3.4156, "step": 8458 }, { "epoch": 0.41, "grad_norm": 0.5565444231033325, "learning_rate": 0.0005721720020774727, "loss": 3.3037, "step": 8459 }, { "epoch": 0.41, "grad_norm": 0.5274984240531921, "learning_rate": 0.0005721655258761973, "loss": 3.2268, "step": 8460 }, { "epoch": 0.41, "grad_norm": 0.5040982365608215, "learning_rate": 0.0005721590489580904, "loss": 3.4673, "step": 8461 }, { "epoch": 0.41, "grad_norm": 0.5225953459739685, "learning_rate": 0.000572152571323169, "loss": 3.4349, "step": 8462 }, { "epoch": 0.41, "grad_norm": 0.5098921656608582, "learning_rate": 0.0005721460929714501, "loss": 3.4011, "step": 8463 }, { "epoch": 0.41, "grad_norm": 0.5205847024917603, "learning_rate": 0.0005721396139029507, "loss": 3.2664, "step": 8464 }, { "epoch": 0.41, "grad_norm": 0.5714619755744934, "learning_rate": 0.000572133134117688, "loss": 3.2306, "step": 8465 }, { "epoch": 0.41, "grad_norm": 0.5325881838798523, "learning_rate": 0.0005721266536156789, "loss": 3.219, "step": 8466 }, { "epoch": 0.41, "grad_norm": 0.5481316447257996, "learning_rate": 0.0005721201723969407, "loss": 3.3076, "step": 8467 }, { "epoch": 0.41, "grad_norm": 0.4882158637046814, "learning_rate": 0.0005721136904614901, "loss": 3.3831, "step": 8468 }, { "epoch": 0.42, "grad_norm": 0.5075830817222595, "learning_rate": 0.0005721072078093447, "loss": 3.3366, "step": 8469 }, { "epoch": 0.42, "grad_norm": 0.5224390029907227, "learning_rate": 0.0005721007244405211, "loss": 3.4743, "step": 8470 }, { "epoch": 0.42, "grad_norm": 0.5543192028999329, "learning_rate": 0.0005720942403550366, "loss": 3.2743, "step": 8471 }, { "epoch": 0.42, "grad_norm": 0.572250485420227, "learning_rate": 0.0005720877555529082, "loss": 3.2516, "step": 8472 }, { "epoch": 0.42, "grad_norm": 0.5291645526885986, "learning_rate": 0.0005720812700341531, "loss": 3.2301, "step": 8473 }, { "epoch": 0.42, "grad_norm": 0.5106573104858398, "learning_rate": 0.0005720747837987882, "loss": 3.3192, "step": 8474 }, { "epoch": 0.42, "grad_norm": 0.5043050646781921, "learning_rate": 0.0005720682968468308, "loss": 3.3815, "step": 8475 }, { "epoch": 0.42, "grad_norm": 0.5045797824859619, "learning_rate": 0.0005720618091782978, "loss": 3.2098, "step": 8476 }, { "epoch": 0.42, "grad_norm": 0.5489840507507324, "learning_rate": 0.0005720553207932064, "loss": 3.3597, "step": 8477 }, { "epoch": 0.42, "grad_norm": 0.5759950876235962, "learning_rate": 0.0005720488316915736, "loss": 3.2866, "step": 8478 }, { "epoch": 0.42, "grad_norm": 0.4919251799583435, "learning_rate": 0.0005720423418734164, "loss": 3.3322, "step": 8479 }, { "epoch": 0.42, "grad_norm": 0.5042932033538818, "learning_rate": 0.0005720358513387522, "loss": 3.3563, "step": 8480 }, { "epoch": 0.42, "grad_norm": 0.5451899766921997, "learning_rate": 0.0005720293600875979, "loss": 3.3275, "step": 8481 }, { "epoch": 0.42, "grad_norm": 0.5713815689086914, "learning_rate": 0.0005720228681199707, "loss": 3.2709, "step": 8482 }, { "epoch": 0.42, "grad_norm": 0.5565445423126221, "learning_rate": 0.0005720163754358874, "loss": 3.3623, "step": 8483 }, { "epoch": 0.42, "grad_norm": 0.5319048166275024, "learning_rate": 0.0005720098820353655, "loss": 3.2325, "step": 8484 }, { "epoch": 0.42, "grad_norm": 0.5170350670814514, "learning_rate": 0.0005720033879184219, "loss": 3.2892, "step": 8485 }, { "epoch": 0.42, "grad_norm": 0.5210342407226562, "learning_rate": 0.0005719968930850736, "loss": 3.2944, "step": 8486 }, { "epoch": 0.42, "grad_norm": 0.5216629505157471, "learning_rate": 0.000571990397535338, "loss": 3.4078, "step": 8487 }, { "epoch": 0.42, "grad_norm": 0.5704594254493713, "learning_rate": 0.0005719839012692319, "loss": 3.3264, "step": 8488 }, { "epoch": 0.42, "grad_norm": 0.5190528035163879, "learning_rate": 0.0005719774042867726, "loss": 3.11, "step": 8489 }, { "epoch": 0.42, "grad_norm": 0.5578228235244751, "learning_rate": 0.0005719709065879771, "loss": 3.4529, "step": 8490 }, { "epoch": 0.42, "grad_norm": 0.5134179592132568, "learning_rate": 0.0005719644081728627, "loss": 3.1287, "step": 8491 }, { "epoch": 0.42, "grad_norm": 0.5582857131958008, "learning_rate": 0.0005719579090414464, "loss": 3.1355, "step": 8492 }, { "epoch": 0.42, "grad_norm": 0.5763298273086548, "learning_rate": 0.0005719514091937451, "loss": 3.2548, "step": 8493 }, { "epoch": 0.42, "grad_norm": 0.5173172950744629, "learning_rate": 0.0005719449086297762, "loss": 3.3526, "step": 8494 }, { "epoch": 0.42, "grad_norm": 1.0055021047592163, "learning_rate": 0.0005719384073495569, "loss": 3.1682, "step": 8495 }, { "epoch": 0.42, "grad_norm": 0.49112802743911743, "learning_rate": 0.000571931905353104, "loss": 3.5877, "step": 8496 }, { "epoch": 0.42, "grad_norm": 0.5321609377861023, "learning_rate": 0.0005719254026404349, "loss": 3.2739, "step": 8497 }, { "epoch": 0.42, "grad_norm": 0.5343760848045349, "learning_rate": 0.0005719188992115667, "loss": 3.4689, "step": 8498 }, { "epoch": 0.42, "grad_norm": 0.5921098589897156, "learning_rate": 0.0005719123950665162, "loss": 3.229, "step": 8499 }, { "epoch": 0.42, "grad_norm": 0.5534712672233582, "learning_rate": 0.0005719058902053009, "loss": 3.2989, "step": 8500 }, { "epoch": 0.42, "grad_norm": 0.513491690158844, "learning_rate": 0.000571899384627938, "loss": 3.183, "step": 8501 }, { "epoch": 0.42, "grad_norm": 0.5323536396026611, "learning_rate": 0.0005718928783344442, "loss": 3.2725, "step": 8502 }, { "epoch": 0.42, "grad_norm": 0.6444754004478455, "learning_rate": 0.0005718863713248371, "loss": 3.3666, "step": 8503 }, { "epoch": 0.42, "grad_norm": 0.48075416684150696, "learning_rate": 0.0005718798635991334, "loss": 3.1333, "step": 8504 }, { "epoch": 0.42, "grad_norm": 0.507790207862854, "learning_rate": 0.0005718733551573506, "loss": 3.3297, "step": 8505 }, { "epoch": 0.42, "grad_norm": 0.5401341915130615, "learning_rate": 0.0005718668459995056, "loss": 3.3617, "step": 8506 }, { "epoch": 0.42, "grad_norm": 0.5216659307479858, "learning_rate": 0.0005718603361256157, "loss": 3.3293, "step": 8507 }, { "epoch": 0.42, "grad_norm": 0.5784571170806885, "learning_rate": 0.0005718538255356981, "loss": 3.2978, "step": 8508 }, { "epoch": 0.42, "grad_norm": 0.5394245982170105, "learning_rate": 0.0005718473142297697, "loss": 3.1971, "step": 8509 }, { "epoch": 0.42, "grad_norm": 0.547707200050354, "learning_rate": 0.0005718408022078479, "loss": 3.4488, "step": 8510 }, { "epoch": 0.42, "grad_norm": 0.5773078203201294, "learning_rate": 0.0005718342894699497, "loss": 3.4022, "step": 8511 }, { "epoch": 0.42, "grad_norm": 0.5116841197013855, "learning_rate": 0.0005718277760160922, "loss": 3.5378, "step": 8512 }, { "epoch": 0.42, "grad_norm": 0.5021461844444275, "learning_rate": 0.0005718212618462928, "loss": 3.2106, "step": 8513 }, { "epoch": 0.42, "grad_norm": 0.5065045952796936, "learning_rate": 0.0005718147469605684, "loss": 3.2924, "step": 8514 }, { "epoch": 0.42, "grad_norm": 0.5146911144256592, "learning_rate": 0.0005718082313589363, "loss": 3.4854, "step": 8515 }, { "epoch": 0.42, "grad_norm": 0.5704720616340637, "learning_rate": 0.0005718017150414137, "loss": 3.1635, "step": 8516 }, { "epoch": 0.42, "grad_norm": 0.5921428799629211, "learning_rate": 0.0005717951980080176, "loss": 3.3375, "step": 8517 }, { "epoch": 0.42, "grad_norm": 0.5015804767608643, "learning_rate": 0.0005717886802587653, "loss": 3.2446, "step": 8518 }, { "epoch": 0.42, "grad_norm": 0.5033897161483765, "learning_rate": 0.0005717821617936739, "loss": 3.4058, "step": 8519 }, { "epoch": 0.42, "grad_norm": 0.5112386345863342, "learning_rate": 0.0005717756426127606, "loss": 3.4671, "step": 8520 }, { "epoch": 0.42, "grad_norm": 0.5142545104026794, "learning_rate": 0.0005717691227160426, "loss": 3.3429, "step": 8521 }, { "epoch": 0.42, "grad_norm": 0.5595449209213257, "learning_rate": 0.000571762602103537, "loss": 3.4415, "step": 8522 }, { "epoch": 0.42, "grad_norm": 0.5375891327857971, "learning_rate": 0.000571756080775261, "loss": 3.2085, "step": 8523 }, { "epoch": 0.42, "grad_norm": 0.5820460319519043, "learning_rate": 0.0005717495587312318, "loss": 3.2163, "step": 8524 }, { "epoch": 0.42, "grad_norm": 0.5265014171600342, "learning_rate": 0.0005717430359714666, "loss": 3.0536, "step": 8525 }, { "epoch": 0.42, "grad_norm": 0.5012522339820862, "learning_rate": 0.0005717365124959824, "loss": 3.2223, "step": 8526 }, { "epoch": 0.42, "grad_norm": 0.5129319429397583, "learning_rate": 0.0005717299883047967, "loss": 3.1573, "step": 8527 }, { "epoch": 0.42, "grad_norm": 0.5185990333557129, "learning_rate": 0.0005717234633979265, "loss": 3.1524, "step": 8528 }, { "epoch": 0.42, "grad_norm": 0.5384962558746338, "learning_rate": 0.0005717169377753888, "loss": 3.2809, "step": 8529 }, { "epoch": 0.42, "grad_norm": 0.5573104023933411, "learning_rate": 0.0005717104114372012, "loss": 3.3692, "step": 8530 }, { "epoch": 0.42, "grad_norm": 0.5535567402839661, "learning_rate": 0.0005717038843833805, "loss": 3.2744, "step": 8531 }, { "epoch": 0.42, "grad_norm": 0.543593168258667, "learning_rate": 0.0005716973566139441, "loss": 3.3109, "step": 8532 }, { "epoch": 0.42, "grad_norm": 0.5009624361991882, "learning_rate": 0.0005716908281289092, "loss": 3.4917, "step": 8533 }, { "epoch": 0.42, "grad_norm": 0.5481317043304443, "learning_rate": 0.000571684298928293, "loss": 3.3068, "step": 8534 }, { "epoch": 0.42, "grad_norm": 0.538657546043396, "learning_rate": 0.0005716777690121125, "loss": 3.409, "step": 8535 }, { "epoch": 0.42, "grad_norm": 0.5731958150863647, "learning_rate": 0.0005716712383803851, "loss": 3.1841, "step": 8536 }, { "epoch": 0.42, "grad_norm": 0.5232669115066528, "learning_rate": 0.000571664707033128, "loss": 3.231, "step": 8537 }, { "epoch": 0.42, "grad_norm": 0.5260790586471558, "learning_rate": 0.0005716581749703583, "loss": 3.3928, "step": 8538 }, { "epoch": 0.42, "grad_norm": 0.5380903482437134, "learning_rate": 0.0005716516421920932, "loss": 3.2905, "step": 8539 }, { "epoch": 0.42, "grad_norm": 0.5121652483940125, "learning_rate": 0.00057164510869835, "loss": 3.3719, "step": 8540 }, { "epoch": 0.42, "grad_norm": 0.5237699747085571, "learning_rate": 0.0005716385744891459, "loss": 3.273, "step": 8541 }, { "epoch": 0.42, "grad_norm": 0.6156141757965088, "learning_rate": 0.000571632039564498, "loss": 3.1406, "step": 8542 }, { "epoch": 0.42, "grad_norm": 0.48015645146369934, "learning_rate": 0.0005716255039244235, "loss": 3.4731, "step": 8543 }, { "epoch": 0.42, "grad_norm": 0.5515682697296143, "learning_rate": 0.0005716189675689399, "loss": 3.1267, "step": 8544 }, { "epoch": 0.42, "grad_norm": 0.5330041646957397, "learning_rate": 0.0005716124304980642, "loss": 3.2132, "step": 8545 }, { "epoch": 0.42, "grad_norm": 0.537356972694397, "learning_rate": 0.0005716058927118135, "loss": 3.3974, "step": 8546 }, { "epoch": 0.42, "grad_norm": 0.5180966854095459, "learning_rate": 0.0005715993542102052, "loss": 3.3525, "step": 8547 }, { "epoch": 0.42, "grad_norm": 0.5257582664489746, "learning_rate": 0.0005715928149932565, "loss": 3.2291, "step": 8548 }, { "epoch": 0.42, "grad_norm": 0.5318938493728638, "learning_rate": 0.0005715862750609845, "loss": 2.958, "step": 8549 }, { "epoch": 0.42, "grad_norm": 0.5303199887275696, "learning_rate": 0.0005715797344134067, "loss": 3.3393, "step": 8550 }, { "epoch": 0.42, "grad_norm": 0.5268381834030151, "learning_rate": 0.0005715731930505401, "loss": 3.3426, "step": 8551 }, { "epoch": 0.42, "grad_norm": 0.6098791360855103, "learning_rate": 0.0005715666509724019, "loss": 3.0801, "step": 8552 }, { "epoch": 0.42, "grad_norm": 0.5364266037940979, "learning_rate": 0.0005715601081790094, "loss": 3.0393, "step": 8553 }, { "epoch": 0.42, "grad_norm": 0.5054658651351929, "learning_rate": 0.0005715535646703798, "loss": 3.3621, "step": 8554 }, { "epoch": 0.42, "grad_norm": 0.5238931179046631, "learning_rate": 0.0005715470204465305, "loss": 3.2215, "step": 8555 }, { "epoch": 0.42, "grad_norm": 0.5390506386756897, "learning_rate": 0.0005715404755074785, "loss": 3.4286, "step": 8556 }, { "epoch": 0.42, "grad_norm": 0.5367797613143921, "learning_rate": 0.0005715339298532412, "loss": 3.428, "step": 8557 }, { "epoch": 0.42, "grad_norm": 0.5821202397346497, "learning_rate": 0.0005715273834838358, "loss": 3.3585, "step": 8558 }, { "epoch": 0.42, "grad_norm": 0.523921549320221, "learning_rate": 0.0005715208363992794, "loss": 3.2719, "step": 8559 }, { "epoch": 0.42, "grad_norm": 0.5809316635131836, "learning_rate": 0.0005715142885995895, "loss": 3.1228, "step": 8560 }, { "epoch": 0.42, "grad_norm": 0.5245625376701355, "learning_rate": 0.0005715077400847832, "loss": 3.771, "step": 8561 }, { "epoch": 0.42, "grad_norm": 0.5554155707359314, "learning_rate": 0.0005715011908548778, "loss": 3.1731, "step": 8562 }, { "epoch": 0.42, "grad_norm": 0.530573308467865, "learning_rate": 0.0005714946409098905, "loss": 3.2805, "step": 8563 }, { "epoch": 0.42, "grad_norm": 0.5505214333534241, "learning_rate": 0.0005714880902498385, "loss": 3.2432, "step": 8564 }, { "epoch": 0.42, "grad_norm": 0.5658882260322571, "learning_rate": 0.0005714815388747391, "loss": 3.2784, "step": 8565 }, { "epoch": 0.42, "grad_norm": 0.5260425209999084, "learning_rate": 0.0005714749867846097, "loss": 3.2037, "step": 8566 }, { "epoch": 0.42, "grad_norm": 0.4990243911743164, "learning_rate": 0.0005714684339794674, "loss": 3.3172, "step": 8567 }, { "epoch": 0.42, "grad_norm": 0.5792827606201172, "learning_rate": 0.0005714618804593295, "loss": 3.5224, "step": 8568 }, { "epoch": 0.42, "grad_norm": 0.5181724429130554, "learning_rate": 0.0005714553262242131, "loss": 3.3583, "step": 8569 }, { "epoch": 0.42, "grad_norm": 0.564866304397583, "learning_rate": 0.0005714487712741357, "loss": 3.4391, "step": 8570 }, { "epoch": 0.42, "grad_norm": 0.4975354075431824, "learning_rate": 0.0005714422156091146, "loss": 3.2768, "step": 8571 }, { "epoch": 0.42, "grad_norm": 0.5749588012695312, "learning_rate": 0.0005714356592291668, "loss": 3.2569, "step": 8572 }, { "epoch": 0.42, "grad_norm": 0.49790215492248535, "learning_rate": 0.0005714291021343097, "loss": 3.2321, "step": 8573 }, { "epoch": 0.42, "grad_norm": 0.5509989857673645, "learning_rate": 0.0005714225443245607, "loss": 3.2786, "step": 8574 }, { "epoch": 0.42, "grad_norm": 0.5187534093856812, "learning_rate": 0.0005714159857999368, "loss": 3.241, "step": 8575 }, { "epoch": 0.42, "grad_norm": 0.5033349394798279, "learning_rate": 0.0005714094265604556, "loss": 3.3021, "step": 8576 }, { "epoch": 0.42, "grad_norm": 0.5202603936195374, "learning_rate": 0.0005714028666061341, "loss": 3.1149, "step": 8577 }, { "epoch": 0.42, "grad_norm": 0.544248104095459, "learning_rate": 0.0005713963059369898, "loss": 3.5272, "step": 8578 }, { "epoch": 0.42, "grad_norm": 0.5000052452087402, "learning_rate": 0.0005713897445530396, "loss": 3.278, "step": 8579 }, { "epoch": 0.42, "grad_norm": 0.5294511318206787, "learning_rate": 0.0005713831824543013, "loss": 3.3534, "step": 8580 }, { "epoch": 0.42, "grad_norm": 0.5449314713478088, "learning_rate": 0.0005713766196407919, "loss": 3.1439, "step": 8581 }, { "epoch": 0.42, "grad_norm": 0.5595104694366455, "learning_rate": 0.0005713700561125286, "loss": 3.2132, "step": 8582 }, { "epoch": 0.42, "grad_norm": 0.5275298953056335, "learning_rate": 0.0005713634918695288, "loss": 3.5211, "step": 8583 }, { "epoch": 0.42, "grad_norm": 0.5486336946487427, "learning_rate": 0.0005713569269118099, "loss": 3.3394, "step": 8584 }, { "epoch": 0.42, "grad_norm": 0.5621734261512756, "learning_rate": 0.0005713503612393889, "loss": 3.2691, "step": 8585 }, { "epoch": 0.42, "grad_norm": 0.5324231386184692, "learning_rate": 0.0005713437948522834, "loss": 3.1905, "step": 8586 }, { "epoch": 0.42, "grad_norm": 0.5473950505256653, "learning_rate": 0.0005713372277505106, "loss": 3.2829, "step": 8587 }, { "epoch": 0.42, "grad_norm": 0.5286551713943481, "learning_rate": 0.0005713306599340877, "loss": 3.2689, "step": 8588 }, { "epoch": 0.42, "grad_norm": 0.5888059139251709, "learning_rate": 0.000571324091403032, "loss": 3.4355, "step": 8589 }, { "epoch": 0.42, "grad_norm": 0.5260636210441589, "learning_rate": 0.000571317522157361, "loss": 3.4292, "step": 8590 }, { "epoch": 0.42, "grad_norm": 0.5505531430244446, "learning_rate": 0.0005713109521970918, "loss": 3.3409, "step": 8591 }, { "epoch": 0.42, "grad_norm": 0.5252068638801575, "learning_rate": 0.0005713043815222418, "loss": 3.3137, "step": 8592 }, { "epoch": 0.42, "grad_norm": 0.5074268579483032, "learning_rate": 0.0005712978101328281, "loss": 3.2255, "step": 8593 }, { "epoch": 0.42, "grad_norm": 0.4985024631023407, "learning_rate": 0.0005712912380288683, "loss": 3.3656, "step": 8594 }, { "epoch": 0.42, "grad_norm": 0.5729339122772217, "learning_rate": 0.0005712846652103796, "loss": 3.1283, "step": 8595 }, { "epoch": 0.42, "grad_norm": 0.5986528396606445, "learning_rate": 0.0005712780916773794, "loss": 3.2123, "step": 8596 }, { "epoch": 0.42, "grad_norm": 0.5517857670783997, "learning_rate": 0.0005712715174298848, "loss": 3.3586, "step": 8597 }, { "epoch": 0.42, "grad_norm": 0.5476078391075134, "learning_rate": 0.0005712649424679132, "loss": 3.2001, "step": 8598 }, { "epoch": 0.42, "grad_norm": 0.5720165371894836, "learning_rate": 0.000571258366791482, "loss": 2.9817, "step": 8599 }, { "epoch": 0.42, "grad_norm": 0.5489450693130493, "learning_rate": 0.0005712517904006085, "loss": 3.4279, "step": 8600 }, { "epoch": 0.42, "grad_norm": 0.5727310180664062, "learning_rate": 0.0005712452132953099, "loss": 3.1009, "step": 8601 }, { "epoch": 0.42, "grad_norm": 0.5166281461715698, "learning_rate": 0.0005712386354756037, "loss": 3.3062, "step": 8602 }, { "epoch": 0.42, "grad_norm": 0.5071576237678528, "learning_rate": 0.0005712320569415071, "loss": 3.4458, "step": 8603 }, { "epoch": 0.42, "grad_norm": 0.5760843753814697, "learning_rate": 0.0005712254776930374, "loss": 3.3133, "step": 8604 }, { "epoch": 0.42, "grad_norm": 0.5532389283180237, "learning_rate": 0.0005712188977302121, "loss": 3.1873, "step": 8605 }, { "epoch": 0.42, "grad_norm": 0.5423989295959473, "learning_rate": 0.0005712123170530484, "loss": 3.0871, "step": 8606 }, { "epoch": 0.42, "grad_norm": 0.5491202473640442, "learning_rate": 0.0005712057356615637, "loss": 3.176, "step": 8607 }, { "epoch": 0.42, "grad_norm": 0.5447011590003967, "learning_rate": 0.0005711991535557751, "loss": 3.1241, "step": 8608 }, { "epoch": 0.42, "grad_norm": 0.5630657076835632, "learning_rate": 0.0005711925707357002, "loss": 3.3227, "step": 8609 }, { "epoch": 0.42, "grad_norm": 0.4960578978061676, "learning_rate": 0.0005711859872013563, "loss": 3.3174, "step": 8610 }, { "epoch": 0.42, "grad_norm": 0.5437517166137695, "learning_rate": 0.0005711794029527607, "loss": 3.3928, "step": 8611 }, { "epoch": 0.42, "grad_norm": 0.5181750059127808, "learning_rate": 0.0005711728179899308, "loss": 3.3892, "step": 8612 }, { "epoch": 0.42, "grad_norm": 0.5108785033226013, "learning_rate": 0.0005711662323128838, "loss": 3.3299, "step": 8613 }, { "epoch": 0.42, "grad_norm": 0.4989277720451355, "learning_rate": 0.0005711596459216372, "loss": 3.3234, "step": 8614 }, { "epoch": 0.42, "grad_norm": 0.5421345233917236, "learning_rate": 0.0005711530588162082, "loss": 3.3554, "step": 8615 }, { "epoch": 0.42, "grad_norm": 0.5189564228057861, "learning_rate": 0.0005711464709966142, "loss": 3.2916, "step": 8616 }, { "epoch": 0.42, "grad_norm": 0.5410189628601074, "learning_rate": 0.0005711398824628727, "loss": 3.3517, "step": 8617 }, { "epoch": 0.42, "grad_norm": 0.5351953506469727, "learning_rate": 0.0005711332932150008, "loss": 3.1984, "step": 8618 }, { "epoch": 0.42, "grad_norm": 0.5187792181968689, "learning_rate": 0.0005711267032530161, "loss": 3.3928, "step": 8619 }, { "epoch": 0.42, "grad_norm": 0.5344939231872559, "learning_rate": 0.0005711201125769358, "loss": 3.3316, "step": 8620 }, { "epoch": 0.42, "grad_norm": 0.5304107666015625, "learning_rate": 0.0005711135211867773, "loss": 3.4969, "step": 8621 }, { "epoch": 0.42, "grad_norm": 0.5163986682891846, "learning_rate": 0.0005711069290825579, "loss": 3.117, "step": 8622 }, { "epoch": 0.42, "grad_norm": 0.5484994649887085, "learning_rate": 0.0005711003362642951, "loss": 3.3076, "step": 8623 }, { "epoch": 0.42, "grad_norm": 0.5722675323486328, "learning_rate": 0.0005710937427320062, "loss": 3.437, "step": 8624 }, { "epoch": 0.42, "grad_norm": 0.5870904326438904, "learning_rate": 0.0005710871484857085, "loss": 3.3008, "step": 8625 }, { "epoch": 0.42, "grad_norm": 0.5617459416389465, "learning_rate": 0.0005710805535254195, "loss": 3.244, "step": 8626 }, { "epoch": 0.42, "grad_norm": 0.5233004689216614, "learning_rate": 0.0005710739578511564, "loss": 3.3681, "step": 8627 }, { "epoch": 0.42, "grad_norm": 0.5104897618293762, "learning_rate": 0.0005710673614629367, "loss": 3.3192, "step": 8628 }, { "epoch": 0.42, "grad_norm": 0.5213035345077515, "learning_rate": 0.0005710607643607778, "loss": 3.4488, "step": 8629 }, { "epoch": 0.42, "grad_norm": 0.5398159623146057, "learning_rate": 0.0005710541665446969, "loss": 3.4134, "step": 8630 }, { "epoch": 0.42, "grad_norm": 0.5116217136383057, "learning_rate": 0.0005710475680147115, "loss": 3.1915, "step": 8631 }, { "epoch": 0.42, "grad_norm": 0.5108386278152466, "learning_rate": 0.0005710409687708391, "loss": 3.2113, "step": 8632 }, { "epoch": 0.42, "grad_norm": 0.5582148432731628, "learning_rate": 0.0005710343688130968, "loss": 3.2279, "step": 8633 }, { "epoch": 0.42, "grad_norm": 0.5281783938407898, "learning_rate": 0.0005710277681415022, "loss": 3.4379, "step": 8634 }, { "epoch": 0.42, "grad_norm": 0.4925077259540558, "learning_rate": 0.0005710211667560726, "loss": 3.2618, "step": 8635 }, { "epoch": 0.42, "grad_norm": 0.5662412047386169, "learning_rate": 0.0005710145646568254, "loss": 3.2086, "step": 8636 }, { "epoch": 0.42, "grad_norm": 0.528595507144928, "learning_rate": 0.0005710079618437781, "loss": 3.452, "step": 8637 }, { "epoch": 0.42, "grad_norm": 0.5423139333724976, "learning_rate": 0.000571001358316948, "loss": 3.2282, "step": 8638 }, { "epoch": 0.42, "grad_norm": 0.5846478939056396, "learning_rate": 0.0005709947540763524, "loss": 3.3114, "step": 8639 }, { "epoch": 0.42, "grad_norm": 0.543500542640686, "learning_rate": 0.0005709881491220087, "loss": 3.2656, "step": 8640 }, { "epoch": 0.42, "grad_norm": 0.5225181579589844, "learning_rate": 0.0005709815434539344, "loss": 3.2979, "step": 8641 }, { "epoch": 0.42, "grad_norm": 0.5145890712738037, "learning_rate": 0.0005709749370721469, "loss": 3.0179, "step": 8642 }, { "epoch": 0.42, "grad_norm": 0.527047336101532, "learning_rate": 0.0005709683299766635, "loss": 3.1937, "step": 8643 }, { "epoch": 0.42, "grad_norm": 0.558195948600769, "learning_rate": 0.0005709617221675017, "loss": 3.3409, "step": 8644 }, { "epoch": 0.42, "grad_norm": 0.5157774686813354, "learning_rate": 0.000570955113644679, "loss": 3.4396, "step": 8645 }, { "epoch": 0.42, "grad_norm": 0.5344122052192688, "learning_rate": 0.0005709485044082125, "loss": 3.3056, "step": 8646 }, { "epoch": 0.42, "grad_norm": 0.5069317817687988, "learning_rate": 0.0005709418944581199, "loss": 3.2913, "step": 8647 }, { "epoch": 0.42, "grad_norm": 0.5138603448867798, "learning_rate": 0.0005709352837944184, "loss": 3.3517, "step": 8648 }, { "epoch": 0.42, "grad_norm": 0.5492541193962097, "learning_rate": 0.0005709286724171256, "loss": 3.3809, "step": 8649 }, { "epoch": 0.42, "grad_norm": 0.6118084192276001, "learning_rate": 0.0005709220603262587, "loss": 3.1827, "step": 8650 }, { "epoch": 0.42, "grad_norm": 0.522902250289917, "learning_rate": 0.0005709154475218354, "loss": 3.2828, "step": 8651 }, { "epoch": 0.42, "grad_norm": 0.5477918386459351, "learning_rate": 0.0005709088340038729, "loss": 3.431, "step": 8652 }, { "epoch": 0.42, "grad_norm": 0.577610433101654, "learning_rate": 0.0005709022197723886, "loss": 3.0517, "step": 8653 }, { "epoch": 0.42, "grad_norm": 0.563598096370697, "learning_rate": 0.0005708956048273999, "loss": 3.1275, "step": 8654 }, { "epoch": 0.42, "grad_norm": 0.5188009738922119, "learning_rate": 0.0005708889891689245, "loss": 3.1314, "step": 8655 }, { "epoch": 0.42, "grad_norm": 0.5175809264183044, "learning_rate": 0.0005708823727969796, "loss": 3.1442, "step": 8656 }, { "epoch": 0.42, "grad_norm": 0.5397357940673828, "learning_rate": 0.0005708757557115826, "loss": 3.1639, "step": 8657 }, { "epoch": 0.42, "grad_norm": 1.0455191135406494, "learning_rate": 0.000570869137912751, "loss": 3.2676, "step": 8658 }, { "epoch": 0.42, "grad_norm": 0.571491003036499, "learning_rate": 0.0005708625194005023, "loss": 3.4802, "step": 8659 }, { "epoch": 0.42, "grad_norm": 0.5649874210357666, "learning_rate": 0.0005708559001748538, "loss": 3.1757, "step": 8660 }, { "epoch": 0.42, "grad_norm": 0.5234768986701965, "learning_rate": 0.000570849280235823, "loss": 3.3455, "step": 8661 }, { "epoch": 0.42, "grad_norm": 0.5115209817886353, "learning_rate": 0.0005708426595834273, "loss": 3.167, "step": 8662 }, { "epoch": 0.42, "grad_norm": 0.5697651505470276, "learning_rate": 0.0005708360382176841, "loss": 3.4317, "step": 8663 }, { "epoch": 0.42, "grad_norm": 0.5905675292015076, "learning_rate": 0.0005708294161386109, "loss": 3.2357, "step": 8664 }, { "epoch": 0.42, "grad_norm": 0.5598018765449524, "learning_rate": 0.0005708227933462252, "loss": 3.2358, "step": 8665 }, { "epoch": 0.42, "grad_norm": 0.5009368658065796, "learning_rate": 0.0005708161698405444, "loss": 3.2861, "step": 8666 }, { "epoch": 0.42, "grad_norm": 0.5382165312767029, "learning_rate": 0.0005708095456215859, "loss": 3.3066, "step": 8667 }, { "epoch": 0.42, "grad_norm": 0.5542304515838623, "learning_rate": 0.0005708029206893672, "loss": 3.4175, "step": 8668 }, { "epoch": 0.42, "grad_norm": 0.5310438871383667, "learning_rate": 0.0005707962950439057, "loss": 3.6001, "step": 8669 }, { "epoch": 0.42, "grad_norm": 0.5058013200759888, "learning_rate": 0.0005707896686852189, "loss": 3.3482, "step": 8670 }, { "epoch": 0.42, "grad_norm": 0.5328607559204102, "learning_rate": 0.0005707830416133243, "loss": 3.2293, "step": 8671 }, { "epoch": 0.42, "grad_norm": 0.4994487464427948, "learning_rate": 0.0005707764138282391, "loss": 3.3414, "step": 8672 }, { "epoch": 0.43, "grad_norm": 0.547566294670105, "learning_rate": 0.0005707697853299811, "loss": 3.4422, "step": 8673 }, { "epoch": 0.43, "grad_norm": 0.5558493137359619, "learning_rate": 0.0005707631561185675, "loss": 3.3349, "step": 8674 }, { "epoch": 0.43, "grad_norm": 0.5128504633903503, "learning_rate": 0.0005707565261940158, "loss": 3.3234, "step": 8675 }, { "epoch": 0.43, "grad_norm": 0.543178141117096, "learning_rate": 0.0005707498955563437, "loss": 3.3392, "step": 8676 }, { "epoch": 0.43, "grad_norm": 0.5692942142486572, "learning_rate": 0.0005707432642055683, "loss": 3.4206, "step": 8677 }, { "epoch": 0.43, "grad_norm": 0.49869203567504883, "learning_rate": 0.0005707366321417073, "loss": 3.104, "step": 8678 }, { "epoch": 0.43, "grad_norm": 0.5554705262184143, "learning_rate": 0.0005707299993647782, "loss": 3.2966, "step": 8679 }, { "epoch": 0.43, "grad_norm": 0.5459063053131104, "learning_rate": 0.0005707233658747983, "loss": 3.4036, "step": 8680 }, { "epoch": 0.43, "grad_norm": 0.5476892590522766, "learning_rate": 0.0005707167316717852, "loss": 3.3779, "step": 8681 }, { "epoch": 0.43, "grad_norm": 0.5351525545120239, "learning_rate": 0.0005707100967557563, "loss": 3.26, "step": 8682 }, { "epoch": 0.43, "grad_norm": 0.5344018936157227, "learning_rate": 0.0005707034611267291, "loss": 3.2521, "step": 8683 }, { "epoch": 0.43, "grad_norm": 0.5126794576644897, "learning_rate": 0.0005706968247847212, "loss": 3.086, "step": 8684 }, { "epoch": 0.43, "grad_norm": 0.5328701734542847, "learning_rate": 0.0005706901877297498, "loss": 3.2423, "step": 8685 }, { "epoch": 0.43, "grad_norm": 0.6240466237068176, "learning_rate": 0.0005706835499618326, "loss": 3.3378, "step": 8686 }, { "epoch": 0.43, "grad_norm": 0.49643224477767944, "learning_rate": 0.000570676911480987, "loss": 3.2525, "step": 8687 }, { "epoch": 0.43, "grad_norm": 0.5192905068397522, "learning_rate": 0.0005706702722872305, "loss": 3.369, "step": 8688 }, { "epoch": 0.43, "grad_norm": 0.5646792650222778, "learning_rate": 0.0005706636323805807, "loss": 3.3023, "step": 8689 }, { "epoch": 0.43, "grad_norm": 0.5123174786567688, "learning_rate": 0.0005706569917610548, "loss": 3.2993, "step": 8690 }, { "epoch": 0.43, "grad_norm": 0.5409421324729919, "learning_rate": 0.0005706503504286707, "loss": 3.163, "step": 8691 }, { "epoch": 0.43, "grad_norm": 0.5075324773788452, "learning_rate": 0.0005706437083834456, "loss": 3.4438, "step": 8692 }, { "epoch": 0.43, "grad_norm": 0.4756852686405182, "learning_rate": 0.000570637065625397, "loss": 3.3624, "step": 8693 }, { "epoch": 0.43, "grad_norm": 0.5012529492378235, "learning_rate": 0.0005706304221545424, "loss": 3.2455, "step": 8694 }, { "epoch": 0.43, "grad_norm": 0.5726290345191956, "learning_rate": 0.0005706237779708994, "loss": 3.11, "step": 8695 }, { "epoch": 0.43, "grad_norm": 0.5796265006065369, "learning_rate": 0.0005706171330744854, "loss": 3.2597, "step": 8696 }, { "epoch": 0.43, "grad_norm": 0.5194861888885498, "learning_rate": 0.000570610487465318, "loss": 3.0619, "step": 8697 }, { "epoch": 0.43, "grad_norm": 0.5499964952468872, "learning_rate": 0.0005706038411434147, "loss": 3.4847, "step": 8698 }, { "epoch": 0.43, "grad_norm": 0.5542723536491394, "learning_rate": 0.000570597194108793, "loss": 3.3495, "step": 8699 }, { "epoch": 0.43, "grad_norm": 0.5347164273262024, "learning_rate": 0.0005705905463614702, "loss": 3.288, "step": 8700 }, { "epoch": 0.43, "grad_norm": 0.5562871098518372, "learning_rate": 0.0005705838979014642, "loss": 3.3801, "step": 8701 }, { "epoch": 0.43, "grad_norm": 0.5251455307006836, "learning_rate": 0.0005705772487287921, "loss": 3.3115, "step": 8702 }, { "epoch": 0.43, "grad_norm": 0.526947021484375, "learning_rate": 0.0005705705988434716, "loss": 3.2882, "step": 8703 }, { "epoch": 0.43, "grad_norm": 0.5250048637390137, "learning_rate": 0.0005705639482455204, "loss": 3.1308, "step": 8704 }, { "epoch": 0.43, "grad_norm": 0.5624373555183411, "learning_rate": 0.0005705572969349556, "loss": 3.2379, "step": 8705 }, { "epoch": 0.43, "grad_norm": 0.600304365158081, "learning_rate": 0.000570550644911795, "loss": 3.2652, "step": 8706 }, { "epoch": 0.43, "grad_norm": 0.5497757196426392, "learning_rate": 0.0005705439921760562, "loss": 3.2458, "step": 8707 }, { "epoch": 0.43, "grad_norm": 0.5430121421813965, "learning_rate": 0.0005705373387277566, "loss": 3.4981, "step": 8708 }, { "epoch": 0.43, "grad_norm": 0.5202553868293762, "learning_rate": 0.0005705306845669137, "loss": 3.3059, "step": 8709 }, { "epoch": 0.43, "grad_norm": 0.53725266456604, "learning_rate": 0.0005705240296935448, "loss": 3.321, "step": 8710 }, { "epoch": 0.43, "grad_norm": 0.5087431073188782, "learning_rate": 0.000570517374107668, "loss": 3.255, "step": 8711 }, { "epoch": 0.43, "grad_norm": 0.5445797443389893, "learning_rate": 0.0005705107178093004, "loss": 3.2109, "step": 8712 }, { "epoch": 0.43, "grad_norm": 0.563112199306488, "learning_rate": 0.0005705040607984595, "loss": 3.322, "step": 8713 }, { "epoch": 0.43, "grad_norm": 0.544439435005188, "learning_rate": 0.000570497403075163, "loss": 3.0042, "step": 8714 }, { "epoch": 0.43, "grad_norm": 0.5439892411231995, "learning_rate": 0.0005704907446394285, "loss": 3.3405, "step": 8715 }, { "epoch": 0.43, "grad_norm": 0.554537296295166, "learning_rate": 0.0005704840854912732, "loss": 3.3383, "step": 8716 }, { "epoch": 0.43, "grad_norm": 0.506321907043457, "learning_rate": 0.0005704774256307151, "loss": 3.3718, "step": 8717 }, { "epoch": 0.43, "grad_norm": 0.5402343273162842, "learning_rate": 0.0005704707650577716, "loss": 3.1319, "step": 8718 }, { "epoch": 0.43, "grad_norm": 0.5468218326568604, "learning_rate": 0.0005704641037724599, "loss": 3.2299, "step": 8719 }, { "epoch": 0.43, "grad_norm": 0.5793018341064453, "learning_rate": 0.000570457441774798, "loss": 3.1532, "step": 8720 }, { "epoch": 0.43, "grad_norm": 0.5519994497299194, "learning_rate": 0.0005704507790648031, "loss": 3.2421, "step": 8721 }, { "epoch": 0.43, "grad_norm": 0.5348733067512512, "learning_rate": 0.0005704441156424931, "loss": 3.1543, "step": 8722 }, { "epoch": 0.43, "grad_norm": 0.5524995923042297, "learning_rate": 0.0005704374515078853, "loss": 3.2966, "step": 8723 }, { "epoch": 0.43, "grad_norm": 0.5177032351493835, "learning_rate": 0.0005704307866609971, "loss": 3.4132, "step": 8724 }, { "epoch": 0.43, "grad_norm": 0.5006693005561829, "learning_rate": 0.0005704241211018464, "loss": 3.2054, "step": 8725 }, { "epoch": 0.43, "grad_norm": 0.5278719663619995, "learning_rate": 0.0005704174548304506, "loss": 3.2603, "step": 8726 }, { "epoch": 0.43, "grad_norm": 0.4858294129371643, "learning_rate": 0.0005704107878468272, "loss": 3.3033, "step": 8727 }, { "epoch": 0.43, "grad_norm": 0.5394824147224426, "learning_rate": 0.0005704041201509939, "loss": 3.3758, "step": 8728 }, { "epoch": 0.43, "grad_norm": 0.5565510988235474, "learning_rate": 0.0005703974517429681, "loss": 3.0167, "step": 8729 }, { "epoch": 0.43, "grad_norm": 0.533441960811615, "learning_rate": 0.0005703907826227676, "loss": 3.5257, "step": 8730 }, { "epoch": 0.43, "grad_norm": 0.5071123242378235, "learning_rate": 0.0005703841127904097, "loss": 3.347, "step": 8731 }, { "epoch": 0.43, "grad_norm": 0.5355621576309204, "learning_rate": 0.000570377442245912, "loss": 3.074, "step": 8732 }, { "epoch": 0.43, "grad_norm": 0.5217567086219788, "learning_rate": 0.0005703707709892923, "loss": 3.4603, "step": 8733 }, { "epoch": 0.43, "grad_norm": 0.5563116669654846, "learning_rate": 0.0005703640990205681, "loss": 3.2609, "step": 8734 }, { "epoch": 0.43, "grad_norm": 0.5034108757972717, "learning_rate": 0.0005703574263397566, "loss": 2.9131, "step": 8735 }, { "epoch": 0.43, "grad_norm": 0.5095783472061157, "learning_rate": 0.000570350752946876, "loss": 3.3354, "step": 8736 }, { "epoch": 0.43, "grad_norm": 0.542126476764679, "learning_rate": 0.0005703440788419435, "loss": 3.272, "step": 8737 }, { "epoch": 0.43, "grad_norm": 0.5572932362556458, "learning_rate": 0.0005703374040249765, "loss": 3.3902, "step": 8738 }, { "epoch": 0.43, "grad_norm": 0.5626958012580872, "learning_rate": 0.000570330728495993, "loss": 3.2746, "step": 8739 }, { "epoch": 0.43, "grad_norm": 0.5667341947555542, "learning_rate": 0.0005703240522550102, "loss": 3.3586, "step": 8740 }, { "epoch": 0.43, "grad_norm": 0.556536078453064, "learning_rate": 0.0005703173753020461, "loss": 3.227, "step": 8741 }, { "epoch": 0.43, "grad_norm": 0.530508816242218, "learning_rate": 0.0005703106976371179, "loss": 3.5442, "step": 8742 }, { "epoch": 0.43, "grad_norm": 0.5346199870109558, "learning_rate": 0.0005703040192602435, "loss": 3.5053, "step": 8743 }, { "epoch": 0.43, "grad_norm": 0.5241973996162415, "learning_rate": 0.0005702973401714402, "loss": 3.4193, "step": 8744 }, { "epoch": 0.43, "grad_norm": 0.590114414691925, "learning_rate": 0.0005702906603707256, "loss": 3.1247, "step": 8745 }, { "epoch": 0.43, "grad_norm": 0.5349878668785095, "learning_rate": 0.0005702839798581176, "loss": 3.3134, "step": 8746 }, { "epoch": 0.43, "grad_norm": 0.598888099193573, "learning_rate": 0.0005702772986336337, "loss": 3.546, "step": 8747 }, { "epoch": 0.43, "grad_norm": 0.4993423521518707, "learning_rate": 0.0005702706166972912, "loss": 3.3099, "step": 8748 }, { "epoch": 0.43, "grad_norm": 0.5180084705352783, "learning_rate": 0.000570263934049108, "loss": 3.175, "step": 8749 }, { "epoch": 0.43, "grad_norm": 0.5331127643585205, "learning_rate": 0.0005702572506891017, "loss": 3.0252, "step": 8750 }, { "epoch": 0.43, "grad_norm": 0.5388214588165283, "learning_rate": 0.0005702505666172897, "loss": 3.2869, "step": 8751 }, { "epoch": 0.43, "grad_norm": 0.5273463129997253, "learning_rate": 0.0005702438818336897, "loss": 3.2632, "step": 8752 }, { "epoch": 0.43, "grad_norm": 0.5084626078605652, "learning_rate": 0.0005702371963383194, "loss": 3.3897, "step": 8753 }, { "epoch": 0.43, "grad_norm": 0.5808398127555847, "learning_rate": 0.0005702305101311963, "loss": 3.3616, "step": 8754 }, { "epoch": 0.43, "grad_norm": 0.5917160511016846, "learning_rate": 0.000570223823212338, "loss": 3.1535, "step": 8755 }, { "epoch": 0.43, "grad_norm": 0.5147940516471863, "learning_rate": 0.0005702171355817623, "loss": 3.1924, "step": 8756 }, { "epoch": 0.43, "grad_norm": 0.5295524597167969, "learning_rate": 0.0005702104472394866, "loss": 3.2706, "step": 8757 }, { "epoch": 0.43, "grad_norm": 0.5461099743843079, "learning_rate": 0.0005702037581855285, "loss": 3.4574, "step": 8758 }, { "epoch": 0.43, "grad_norm": 0.5407048463821411, "learning_rate": 0.0005701970684199057, "loss": 3.1396, "step": 8759 }, { "epoch": 0.43, "grad_norm": 0.5329182744026184, "learning_rate": 0.000570190377942636, "loss": 3.1328, "step": 8760 }, { "epoch": 0.43, "grad_norm": 0.5270434617996216, "learning_rate": 0.0005701836867537367, "loss": 3.4657, "step": 8761 }, { "epoch": 0.43, "grad_norm": 0.5661664009094238, "learning_rate": 0.0005701769948532257, "loss": 3.1477, "step": 8762 }, { "epoch": 0.43, "grad_norm": 0.526901364326477, "learning_rate": 0.0005701703022411203, "loss": 3.3944, "step": 8763 }, { "epoch": 0.43, "grad_norm": 0.5473800897598267, "learning_rate": 0.0005701636089174384, "loss": 3.4588, "step": 8764 }, { "epoch": 0.43, "grad_norm": 0.5030317902565002, "learning_rate": 0.0005701569148821976, "loss": 3.2712, "step": 8765 }, { "epoch": 0.43, "grad_norm": 0.5436576008796692, "learning_rate": 0.0005701502201354154, "loss": 3.2773, "step": 8766 }, { "epoch": 0.43, "grad_norm": 0.5438995361328125, "learning_rate": 0.0005701435246771095, "loss": 3.1112, "step": 8767 }, { "epoch": 0.43, "grad_norm": 0.51812344789505, "learning_rate": 0.0005701368285072977, "loss": 3.4097, "step": 8768 }, { "epoch": 0.43, "grad_norm": 0.5479652285575867, "learning_rate": 0.0005701301316259973, "loss": 3.1205, "step": 8769 }, { "epoch": 0.43, "grad_norm": 0.5035589337348938, "learning_rate": 0.0005701234340332262, "loss": 3.3524, "step": 8770 }, { "epoch": 0.43, "grad_norm": 0.5446529388427734, "learning_rate": 0.000570116735729002, "loss": 3.2749, "step": 8771 }, { "epoch": 0.43, "grad_norm": 0.5573705434799194, "learning_rate": 0.0005701100367133422, "loss": 3.4812, "step": 8772 }, { "epoch": 0.43, "grad_norm": 0.517190158367157, "learning_rate": 0.0005701033369862647, "loss": 3.1171, "step": 8773 }, { "epoch": 0.43, "grad_norm": 0.5003644824028015, "learning_rate": 0.0005700966365477869, "loss": 3.3361, "step": 8774 }, { "epoch": 0.43, "grad_norm": 0.6054016947746277, "learning_rate": 0.0005700899353979265, "loss": 3.0853, "step": 8775 }, { "epoch": 0.43, "grad_norm": 0.46385738253593445, "learning_rate": 0.0005700832335367012, "loss": 3.4381, "step": 8776 }, { "epoch": 0.43, "grad_norm": 0.495850145816803, "learning_rate": 0.0005700765309641287, "loss": 3.2427, "step": 8777 }, { "epoch": 0.43, "grad_norm": 0.5276760458946228, "learning_rate": 0.0005700698276802266, "loss": 3.2379, "step": 8778 }, { "epoch": 0.43, "grad_norm": 0.5193692445755005, "learning_rate": 0.0005700631236850124, "loss": 3.1412, "step": 8779 }, { "epoch": 0.43, "grad_norm": 0.537988543510437, "learning_rate": 0.0005700564189785041, "loss": 3.1968, "step": 8780 }, { "epoch": 0.43, "grad_norm": 0.5210827589035034, "learning_rate": 0.0005700497135607191, "loss": 3.3336, "step": 8781 }, { "epoch": 0.43, "grad_norm": 0.5374914407730103, "learning_rate": 0.0005700430074316751, "loss": 3.2019, "step": 8782 }, { "epoch": 0.43, "grad_norm": 0.5302662253379822, "learning_rate": 0.0005700363005913898, "loss": 3.2672, "step": 8783 }, { "epoch": 0.43, "grad_norm": 0.5213940143585205, "learning_rate": 0.0005700295930398809, "loss": 3.3535, "step": 8784 }, { "epoch": 0.43, "grad_norm": 0.509855329990387, "learning_rate": 0.000570022884777166, "loss": 3.3465, "step": 8785 }, { "epoch": 0.43, "grad_norm": 0.5186008214950562, "learning_rate": 0.0005700161758032628, "loss": 3.3766, "step": 8786 }, { "epoch": 0.43, "grad_norm": 0.558765709400177, "learning_rate": 0.0005700094661181889, "loss": 3.1944, "step": 8787 }, { "epoch": 0.43, "grad_norm": 0.5422574877738953, "learning_rate": 0.000570002755721962, "loss": 3.2641, "step": 8788 }, { "epoch": 0.43, "grad_norm": 0.5169627070426941, "learning_rate": 0.0005699960446145999, "loss": 3.2564, "step": 8789 }, { "epoch": 0.43, "grad_norm": 0.5492352843284607, "learning_rate": 0.0005699893327961201, "loss": 3.0885, "step": 8790 }, { "epoch": 0.43, "grad_norm": 0.553865909576416, "learning_rate": 0.0005699826202665405, "loss": 3.4303, "step": 8791 }, { "epoch": 0.43, "grad_norm": 0.5586493611335754, "learning_rate": 0.0005699759070258785, "loss": 3.1598, "step": 8792 }, { "epoch": 0.43, "grad_norm": 0.5349708199501038, "learning_rate": 0.000569969193074152, "loss": 3.3447, "step": 8793 }, { "epoch": 0.43, "grad_norm": 0.5063890814781189, "learning_rate": 0.0005699624784113785, "loss": 3.3572, "step": 8794 }, { "epoch": 0.43, "grad_norm": 0.5007393956184387, "learning_rate": 0.0005699557630375759, "loss": 3.3862, "step": 8795 }, { "epoch": 0.43, "grad_norm": 0.4956451654434204, "learning_rate": 0.0005699490469527617, "loss": 3.3052, "step": 8796 }, { "epoch": 0.43, "grad_norm": 0.5122515559196472, "learning_rate": 0.0005699423301569536, "loss": 3.3706, "step": 8797 }, { "epoch": 0.43, "grad_norm": 0.5375229716300964, "learning_rate": 0.0005699356126501695, "loss": 3.3361, "step": 8798 }, { "epoch": 0.43, "grad_norm": 0.48476994037628174, "learning_rate": 0.0005699288944324268, "loss": 3.3599, "step": 8799 }, { "epoch": 0.43, "grad_norm": 0.5187641978263855, "learning_rate": 0.0005699221755037435, "loss": 3.2034, "step": 8800 }, { "epoch": 0.43, "grad_norm": 0.5315556526184082, "learning_rate": 0.000569915455864137, "loss": 3.5175, "step": 8801 }, { "epoch": 0.43, "grad_norm": 0.5142885446548462, "learning_rate": 0.0005699087355136252, "loss": 3.1606, "step": 8802 }, { "epoch": 0.43, "grad_norm": 0.5275764465332031, "learning_rate": 0.0005699020144522257, "loss": 3.4446, "step": 8803 }, { "epoch": 0.43, "grad_norm": 0.5340292453765869, "learning_rate": 0.0005698952926799563, "loss": 3.3616, "step": 8804 }, { "epoch": 0.43, "grad_norm": 0.50909823179245, "learning_rate": 0.0005698885701968347, "loss": 3.388, "step": 8805 }, { "epoch": 0.43, "grad_norm": 0.5391430258750916, "learning_rate": 0.0005698818470028784, "loss": 3.2484, "step": 8806 }, { "epoch": 0.43, "grad_norm": 0.5389807224273682, "learning_rate": 0.0005698751230981053, "loss": 3.1581, "step": 8807 }, { "epoch": 0.43, "grad_norm": 0.4988233745098114, "learning_rate": 0.0005698683984825331, "loss": 3.0371, "step": 8808 }, { "epoch": 0.43, "grad_norm": 0.5417897701263428, "learning_rate": 0.0005698616731561794, "loss": 3.3176, "step": 8809 }, { "epoch": 0.43, "grad_norm": 0.5258243680000305, "learning_rate": 0.0005698549471190621, "loss": 3.5196, "step": 8810 }, { "epoch": 0.43, "grad_norm": 0.48488008975982666, "learning_rate": 0.0005698482203711988, "loss": 3.2598, "step": 8811 }, { "epoch": 0.43, "grad_norm": 0.48625877499580383, "learning_rate": 0.0005698414929126071, "loss": 3.3822, "step": 8812 }, { "epoch": 0.43, "grad_norm": 0.5102300047874451, "learning_rate": 0.0005698347647433049, "loss": 3.3212, "step": 8813 }, { "epoch": 0.43, "grad_norm": 0.5392574667930603, "learning_rate": 0.0005698280358633099, "loss": 3.1794, "step": 8814 }, { "epoch": 0.43, "grad_norm": 0.5915513038635254, "learning_rate": 0.0005698213062726397, "loss": 3.3302, "step": 8815 }, { "epoch": 0.43, "grad_norm": 0.5354930758476257, "learning_rate": 0.0005698145759713122, "loss": 3.2355, "step": 8816 }, { "epoch": 0.43, "grad_norm": 0.5436181426048279, "learning_rate": 0.000569807844959345, "loss": 3.3668, "step": 8817 }, { "epoch": 0.43, "grad_norm": 0.5166468024253845, "learning_rate": 0.0005698011132367558, "loss": 3.1715, "step": 8818 }, { "epoch": 0.43, "grad_norm": 0.5697653889656067, "learning_rate": 0.0005697943808035625, "loss": 3.271, "step": 8819 }, { "epoch": 0.43, "grad_norm": 0.5263750553131104, "learning_rate": 0.0005697876476597826, "loss": 3.5375, "step": 8820 }, { "epoch": 0.43, "grad_norm": 0.5435929894447327, "learning_rate": 0.0005697809138054341, "loss": 3.3333, "step": 8821 }, { "epoch": 0.43, "grad_norm": 0.5712611079216003, "learning_rate": 0.0005697741792405344, "loss": 3.453, "step": 8822 }, { "epoch": 0.43, "grad_norm": 0.5347647666931152, "learning_rate": 0.0005697674439651017, "loss": 3.164, "step": 8823 }, { "epoch": 0.43, "grad_norm": 0.549929141998291, "learning_rate": 0.0005697607079791533, "loss": 3.4472, "step": 8824 }, { "epoch": 0.43, "grad_norm": 0.5421603918075562, "learning_rate": 0.0005697539712827071, "loss": 3.0966, "step": 8825 }, { "epoch": 0.43, "grad_norm": 0.5252081751823425, "learning_rate": 0.0005697472338757808, "loss": 3.1905, "step": 8826 }, { "epoch": 0.43, "grad_norm": 0.5157675743103027, "learning_rate": 0.0005697404957583923, "loss": 3.2351, "step": 8827 }, { "epoch": 0.43, "grad_norm": 0.5952437520027161, "learning_rate": 0.0005697337569305594, "loss": 3.0903, "step": 8828 }, { "epoch": 0.43, "grad_norm": 0.5244645476341248, "learning_rate": 0.0005697270173922994, "loss": 3.2001, "step": 8829 }, { "epoch": 0.43, "grad_norm": 0.5517503619194031, "learning_rate": 0.0005697202771436305, "loss": 3.1902, "step": 8830 }, { "epoch": 0.43, "grad_norm": 0.5599244236946106, "learning_rate": 0.0005697135361845703, "loss": 3.2742, "step": 8831 }, { "epoch": 0.43, "grad_norm": 0.5345979332923889, "learning_rate": 0.0005697067945151365, "loss": 3.5683, "step": 8832 }, { "epoch": 0.43, "grad_norm": 0.5085006356239319, "learning_rate": 0.000569700052135347, "loss": 3.2911, "step": 8833 }, { "epoch": 0.43, "grad_norm": 0.5625810027122498, "learning_rate": 0.0005696933090452193, "loss": 3.1879, "step": 8834 }, { "epoch": 0.43, "grad_norm": 0.5771826505661011, "learning_rate": 0.0005696865652447715, "loss": 3.2465, "step": 8835 }, { "epoch": 0.43, "grad_norm": 0.5013545155525208, "learning_rate": 0.0005696798207340211, "loss": 3.1855, "step": 8836 }, { "epoch": 0.43, "grad_norm": 0.5615394115447998, "learning_rate": 0.000569673075512986, "loss": 3.4138, "step": 8837 }, { "epoch": 0.43, "grad_norm": 0.5521506071090698, "learning_rate": 0.0005696663295816839, "loss": 3.1211, "step": 8838 }, { "epoch": 0.43, "grad_norm": 0.713085412979126, "learning_rate": 0.0005696595829401325, "loss": 3.1726, "step": 8839 }, { "epoch": 0.43, "grad_norm": 0.5880343914031982, "learning_rate": 0.0005696528355883497, "loss": 3.3201, "step": 8840 }, { "epoch": 0.43, "grad_norm": 0.6011033654212952, "learning_rate": 0.0005696460875263534, "loss": 3.243, "step": 8841 }, { "epoch": 0.43, "grad_norm": 0.5444574356079102, "learning_rate": 0.000569639338754161, "loss": 3.5191, "step": 8842 }, { "epoch": 0.43, "grad_norm": 0.5401180386543274, "learning_rate": 0.0005696325892717906, "loss": 3.2096, "step": 8843 }, { "epoch": 0.43, "grad_norm": 0.5681575536727905, "learning_rate": 0.0005696258390792598, "loss": 3.2179, "step": 8844 }, { "epoch": 0.43, "grad_norm": 0.505577027797699, "learning_rate": 0.0005696190881765864, "loss": 3.5161, "step": 8845 }, { "epoch": 0.43, "grad_norm": 0.5092687606811523, "learning_rate": 0.0005696123365637882, "loss": 3.3775, "step": 8846 }, { "epoch": 0.43, "grad_norm": 0.4876072108745575, "learning_rate": 0.000569605584240883, "loss": 3.3778, "step": 8847 }, { "epoch": 0.43, "grad_norm": 0.5532077550888062, "learning_rate": 0.0005695988312078886, "loss": 3.2693, "step": 8848 }, { "epoch": 0.43, "grad_norm": 0.5383113622665405, "learning_rate": 0.0005695920774648227, "loss": 3.2532, "step": 8849 }, { "epoch": 0.43, "grad_norm": 0.5278736352920532, "learning_rate": 0.0005695853230117033, "loss": 3.286, "step": 8850 }, { "epoch": 0.43, "grad_norm": 0.5102056264877319, "learning_rate": 0.000569578567848548, "loss": 3.493, "step": 8851 }, { "epoch": 0.43, "grad_norm": 0.5252404808998108, "learning_rate": 0.0005695718119753746, "loss": 3.2592, "step": 8852 }, { "epoch": 0.43, "grad_norm": 0.5343819856643677, "learning_rate": 0.0005695650553922009, "loss": 3.2568, "step": 8853 }, { "epoch": 0.43, "grad_norm": 0.5407352447509766, "learning_rate": 0.0005695582980990448, "loss": 3.2887, "step": 8854 }, { "epoch": 0.43, "grad_norm": 0.5392612218856812, "learning_rate": 0.0005695515400959239, "loss": 3.4683, "step": 8855 }, { "epoch": 0.43, "grad_norm": 0.5008077025413513, "learning_rate": 0.0005695447813828562, "loss": 3.4033, "step": 8856 }, { "epoch": 0.43, "grad_norm": 0.5120012164115906, "learning_rate": 0.0005695380219598594, "loss": 3.3343, "step": 8857 }, { "epoch": 0.43, "grad_norm": 0.545326292514801, "learning_rate": 0.0005695312618269513, "loss": 3.2657, "step": 8858 }, { "epoch": 0.43, "grad_norm": 0.550786018371582, "learning_rate": 0.0005695245009841497, "loss": 3.4312, "step": 8859 }, { "epoch": 0.43, "grad_norm": 0.5332869291305542, "learning_rate": 0.0005695177394314725, "loss": 3.4009, "step": 8860 }, { "epoch": 0.43, "grad_norm": 0.5279282927513123, "learning_rate": 0.0005695109771689375, "loss": 3.1493, "step": 8861 }, { "epoch": 0.43, "grad_norm": 0.5188350081443787, "learning_rate": 0.0005695042141965624, "loss": 3.4996, "step": 8862 }, { "epoch": 0.43, "grad_norm": 0.5265781283378601, "learning_rate": 0.000569497450514365, "loss": 3.4578, "step": 8863 }, { "epoch": 0.43, "grad_norm": 0.5063434839248657, "learning_rate": 0.0005694906861223632, "loss": 3.4383, "step": 8864 }, { "epoch": 0.43, "grad_norm": 0.5559127926826477, "learning_rate": 0.0005694839210205749, "loss": 3.2548, "step": 8865 }, { "epoch": 0.43, "grad_norm": 0.5212278962135315, "learning_rate": 0.0005694771552090177, "loss": 3.1999, "step": 8866 }, { "epoch": 0.43, "grad_norm": 0.5368652939796448, "learning_rate": 0.0005694703886877097, "loss": 3.1888, "step": 8867 }, { "epoch": 0.43, "grad_norm": 0.5175959467887878, "learning_rate": 0.0005694636214566684, "loss": 3.3606, "step": 8868 }, { "epoch": 0.43, "grad_norm": 0.5428808927536011, "learning_rate": 0.0005694568535159118, "loss": 3.2056, "step": 8869 }, { "epoch": 0.43, "grad_norm": 0.5059577822685242, "learning_rate": 0.0005694500848654578, "loss": 3.1956, "step": 8870 }, { "epoch": 0.43, "grad_norm": 0.5455737113952637, "learning_rate": 0.000569443315505324, "loss": 3.5257, "step": 8871 }, { "epoch": 0.43, "grad_norm": 0.5467769503593445, "learning_rate": 0.0005694365454355284, "loss": 3.4259, "step": 8872 }, { "epoch": 0.43, "grad_norm": 0.5153681635856628, "learning_rate": 0.0005694297746560888, "loss": 3.5655, "step": 8873 }, { "epoch": 0.43, "grad_norm": 0.4906521141529083, "learning_rate": 0.0005694230031670231, "loss": 3.5068, "step": 8874 }, { "epoch": 0.43, "grad_norm": 0.5056790113449097, "learning_rate": 0.0005694162309683489, "loss": 3.2567, "step": 8875 }, { "epoch": 0.43, "grad_norm": 0.5721079111099243, "learning_rate": 0.0005694094580600843, "loss": 3.158, "step": 8876 }, { "epoch": 0.44, "grad_norm": 0.5457658767700195, "learning_rate": 0.0005694026844422471, "loss": 3.2658, "step": 8877 }, { "epoch": 0.44, "grad_norm": 0.5211271047592163, "learning_rate": 0.000569395910114855, "loss": 3.3445, "step": 8878 }, { "epoch": 0.44, "grad_norm": 0.5187593102455139, "learning_rate": 0.000569389135077926, "loss": 3.2324, "step": 8879 }, { "epoch": 0.44, "grad_norm": 0.49315252900123596, "learning_rate": 0.0005693823593314778, "loss": 3.2866, "step": 8880 }, { "epoch": 0.44, "grad_norm": 0.5174716711044312, "learning_rate": 0.0005693755828755283, "loss": 3.0987, "step": 8881 }, { "epoch": 0.44, "grad_norm": 0.5270132422447205, "learning_rate": 0.0005693688057100953, "loss": 3.3338, "step": 8882 }, { "epoch": 0.44, "grad_norm": 0.5709814429283142, "learning_rate": 0.0005693620278351968, "loss": 3.2527, "step": 8883 }, { "epoch": 0.44, "grad_norm": 0.50661700963974, "learning_rate": 0.0005693552492508505, "loss": 3.1687, "step": 8884 }, { "epoch": 0.44, "grad_norm": 0.5201055407524109, "learning_rate": 0.0005693484699570744, "loss": 3.2431, "step": 8885 }, { "epoch": 0.44, "grad_norm": 0.5174275040626526, "learning_rate": 0.0005693416899538861, "loss": 3.1476, "step": 8886 }, { "epoch": 0.44, "grad_norm": 0.5571829676628113, "learning_rate": 0.0005693349092413038, "loss": 3.349, "step": 8887 }, { "epoch": 0.44, "grad_norm": 0.5371866226196289, "learning_rate": 0.0005693281278193452, "loss": 3.1509, "step": 8888 }, { "epoch": 0.44, "grad_norm": 0.5208328366279602, "learning_rate": 0.000569321345688028, "loss": 3.4085, "step": 8889 }, { "epoch": 0.44, "grad_norm": 0.5333773493766785, "learning_rate": 0.0005693145628473703, "loss": 3.3804, "step": 8890 }, { "epoch": 0.44, "grad_norm": 0.5705960392951965, "learning_rate": 0.0005693077792973899, "loss": 3.4182, "step": 8891 }, { "epoch": 0.44, "grad_norm": 0.5110260844230652, "learning_rate": 0.0005693009950381046, "loss": 3.2802, "step": 8892 }, { "epoch": 0.44, "grad_norm": 0.4911125600337982, "learning_rate": 0.0005692942100695322, "loss": 3.244, "step": 8893 }, { "epoch": 0.44, "grad_norm": 0.6147732138633728, "learning_rate": 0.0005692874243916908, "loss": 3.2172, "step": 8894 }, { "epoch": 0.44, "grad_norm": 0.5848454236984253, "learning_rate": 0.0005692806380045981, "loss": 3.4469, "step": 8895 }, { "epoch": 0.44, "grad_norm": 0.5394555330276489, "learning_rate": 0.000569273850908272, "loss": 3.4887, "step": 8896 }, { "epoch": 0.44, "grad_norm": 0.5056954622268677, "learning_rate": 0.0005692670631027304, "loss": 3.3544, "step": 8897 }, { "epoch": 0.44, "grad_norm": 0.4951151907444, "learning_rate": 0.0005692602745879913, "loss": 3.2587, "step": 8898 }, { "epoch": 0.44, "grad_norm": 0.5151358246803284, "learning_rate": 0.0005692534853640723, "loss": 3.3862, "step": 8899 }, { "epoch": 0.44, "grad_norm": 0.5135519504547119, "learning_rate": 0.0005692466954309915, "loss": 3.2097, "step": 8900 }, { "epoch": 0.44, "grad_norm": 0.5229550004005432, "learning_rate": 0.0005692399047887667, "loss": 3.291, "step": 8901 }, { "epoch": 0.44, "grad_norm": 0.5192024111747742, "learning_rate": 0.000569233113437416, "loss": 3.2481, "step": 8902 }, { "epoch": 0.44, "grad_norm": 0.5763697624206543, "learning_rate": 0.0005692263213769569, "loss": 3.1955, "step": 8903 }, { "epoch": 0.44, "grad_norm": 0.5331725478172302, "learning_rate": 0.0005692195286074075, "loss": 3.2931, "step": 8904 }, { "epoch": 0.44, "grad_norm": 0.5367273092269897, "learning_rate": 0.0005692127351287857, "loss": 3.2269, "step": 8905 }, { "epoch": 0.44, "grad_norm": 0.5095797777175903, "learning_rate": 0.0005692059409411094, "loss": 3.3072, "step": 8906 }, { "epoch": 0.44, "grad_norm": 0.5251290202140808, "learning_rate": 0.0005691991460443964, "loss": 3.3444, "step": 8907 }, { "epoch": 0.44, "grad_norm": 0.5581363439559937, "learning_rate": 0.0005691923504386646, "loss": 3.3821, "step": 8908 }, { "epoch": 0.44, "grad_norm": 0.536482572555542, "learning_rate": 0.000569185554123932, "loss": 3.3329, "step": 8909 }, { "epoch": 0.44, "grad_norm": 0.5598181486129761, "learning_rate": 0.0005691787571002165, "loss": 3.3854, "step": 8910 }, { "epoch": 0.44, "grad_norm": 0.5411391854286194, "learning_rate": 0.000569171959367536, "loss": 3.2254, "step": 8911 }, { "epoch": 0.44, "grad_norm": 0.5239635109901428, "learning_rate": 0.0005691651609259083, "loss": 3.4488, "step": 8912 }, { "epoch": 0.44, "grad_norm": 0.5380442142486572, "learning_rate": 0.0005691583617753514, "loss": 3.601, "step": 8913 }, { "epoch": 0.44, "grad_norm": 0.5128017663955688, "learning_rate": 0.0005691515619158831, "loss": 3.39, "step": 8914 }, { "epoch": 0.44, "grad_norm": 0.5668172240257263, "learning_rate": 0.0005691447613475214, "loss": 3.4607, "step": 8915 }, { "epoch": 0.44, "grad_norm": 0.5155226588249207, "learning_rate": 0.0005691379600702843, "loss": 3.1785, "step": 8916 }, { "epoch": 0.44, "grad_norm": 0.53493332862854, "learning_rate": 0.0005691311580841896, "loss": 3.3619, "step": 8917 }, { "epoch": 0.44, "grad_norm": 0.5263345241546631, "learning_rate": 0.0005691243553892551, "loss": 3.3008, "step": 8918 }, { "epoch": 0.44, "grad_norm": 0.5928928852081299, "learning_rate": 0.0005691175519854989, "loss": 3.1524, "step": 8919 }, { "epoch": 0.44, "grad_norm": 0.5617950558662415, "learning_rate": 0.0005691107478729389, "loss": 3.0835, "step": 8920 }, { "epoch": 0.44, "grad_norm": 0.5107870697975159, "learning_rate": 0.0005691039430515929, "loss": 3.248, "step": 8921 }, { "epoch": 0.44, "grad_norm": 0.5091202259063721, "learning_rate": 0.000569097137521479, "loss": 3.3436, "step": 8922 }, { "epoch": 0.44, "grad_norm": 0.5600295066833496, "learning_rate": 0.000569090331282615, "loss": 3.1758, "step": 8923 }, { "epoch": 0.44, "grad_norm": 0.5212362408638, "learning_rate": 0.0005690835243350188, "loss": 3.4895, "step": 8924 }, { "epoch": 0.44, "grad_norm": 0.5386121273040771, "learning_rate": 0.0005690767166787084, "loss": 3.277, "step": 8925 }, { "epoch": 0.44, "grad_norm": 0.5397125482559204, "learning_rate": 0.0005690699083137018, "loss": 3.354, "step": 8926 }, { "epoch": 0.44, "grad_norm": 0.5409417152404785, "learning_rate": 0.0005690630992400168, "loss": 3.2076, "step": 8927 }, { "epoch": 0.44, "grad_norm": 0.5249380469322205, "learning_rate": 0.0005690562894576713, "loss": 3.3417, "step": 8928 }, { "epoch": 0.44, "grad_norm": 0.4952598810195923, "learning_rate": 0.0005690494789666834, "loss": 3.2278, "step": 8929 }, { "epoch": 0.44, "grad_norm": 0.5684417486190796, "learning_rate": 0.0005690426677670709, "loss": 3.0598, "step": 8930 }, { "epoch": 0.44, "grad_norm": 0.5358139872550964, "learning_rate": 0.0005690358558588519, "loss": 3.4284, "step": 8931 }, { "epoch": 0.44, "grad_norm": 0.5555673241615295, "learning_rate": 0.0005690290432420441, "loss": 3.2636, "step": 8932 }, { "epoch": 0.44, "grad_norm": 0.5253645777702332, "learning_rate": 0.0005690222299166656, "loss": 3.4941, "step": 8933 }, { "epoch": 0.44, "grad_norm": 0.5221039056777954, "learning_rate": 0.0005690154158827344, "loss": 3.3307, "step": 8934 }, { "epoch": 0.44, "grad_norm": 0.5121043920516968, "learning_rate": 0.0005690086011402683, "loss": 3.2696, "step": 8935 }, { "epoch": 0.44, "grad_norm": 0.5969629883766174, "learning_rate": 0.0005690017856892853, "loss": 3.1725, "step": 8936 }, { "epoch": 0.44, "grad_norm": 0.5254464745521545, "learning_rate": 0.0005689949695298034, "loss": 3.3222, "step": 8937 }, { "epoch": 0.44, "grad_norm": 0.5058313608169556, "learning_rate": 0.0005689881526618405, "loss": 3.3466, "step": 8938 }, { "epoch": 0.44, "grad_norm": 0.49000418186187744, "learning_rate": 0.0005689813350854147, "loss": 3.4226, "step": 8939 }, { "epoch": 0.44, "grad_norm": 0.5364489555358887, "learning_rate": 0.0005689745168005437, "loss": 3.3661, "step": 8940 }, { "epoch": 0.44, "grad_norm": 0.5190027952194214, "learning_rate": 0.0005689676978072456, "loss": 3.2002, "step": 8941 }, { "epoch": 0.44, "grad_norm": 0.5191941261291504, "learning_rate": 0.0005689608781055383, "loss": 3.0918, "step": 8942 }, { "epoch": 0.44, "grad_norm": 0.5378019213676453, "learning_rate": 0.0005689540576954399, "loss": 3.3341, "step": 8943 }, { "epoch": 0.44, "grad_norm": 0.6198469400405884, "learning_rate": 0.0005689472365769683, "loss": 3.3351, "step": 8944 }, { "epoch": 0.44, "grad_norm": 0.6274257302284241, "learning_rate": 0.0005689404147501413, "loss": 3.1836, "step": 8945 }, { "epoch": 0.44, "grad_norm": 0.5766898393630981, "learning_rate": 0.000568933592214977, "loss": 3.3648, "step": 8946 }, { "epoch": 0.44, "grad_norm": 0.5012130737304688, "learning_rate": 0.0005689267689714934, "loss": 3.3627, "step": 8947 }, { "epoch": 0.44, "grad_norm": 0.5235632061958313, "learning_rate": 0.0005689199450197086, "loss": 3.3629, "step": 8948 }, { "epoch": 0.44, "grad_norm": 0.4902224838733673, "learning_rate": 0.0005689131203596404, "loss": 3.4249, "step": 8949 }, { "epoch": 0.44, "grad_norm": 0.555653989315033, "learning_rate": 0.0005689062949913067, "loss": 3.2133, "step": 8950 }, { "epoch": 0.44, "grad_norm": 0.49776995182037354, "learning_rate": 0.0005688994689147256, "loss": 3.3663, "step": 8951 }, { "epoch": 0.44, "grad_norm": 0.5030352473258972, "learning_rate": 0.000568892642129915, "loss": 3.1925, "step": 8952 }, { "epoch": 0.44, "grad_norm": 0.4929159879684448, "learning_rate": 0.000568885814636893, "loss": 3.2832, "step": 8953 }, { "epoch": 0.44, "grad_norm": 0.5239042639732361, "learning_rate": 0.0005688789864356775, "loss": 3.2955, "step": 8954 }, { "epoch": 0.44, "grad_norm": 0.5698967576026917, "learning_rate": 0.0005688721575262865, "loss": 3.205, "step": 8955 }, { "epoch": 0.44, "grad_norm": 0.5118188858032227, "learning_rate": 0.0005688653279087379, "loss": 3.1984, "step": 8956 }, { "epoch": 0.44, "grad_norm": 0.6158947944641113, "learning_rate": 0.0005688584975830498, "loss": 3.1382, "step": 8957 }, { "epoch": 0.44, "grad_norm": 0.571174144744873, "learning_rate": 0.0005688516665492403, "loss": 3.1952, "step": 8958 }, { "epoch": 0.44, "grad_norm": 0.5016655921936035, "learning_rate": 0.0005688448348073271, "loss": 3.2333, "step": 8959 }, { "epoch": 0.44, "grad_norm": 0.5244669914245605, "learning_rate": 0.0005688380023573284, "loss": 3.3077, "step": 8960 }, { "epoch": 0.44, "grad_norm": 0.542945146560669, "learning_rate": 0.0005688311691992621, "loss": 3.1278, "step": 8961 }, { "epoch": 0.44, "grad_norm": 0.5659850835800171, "learning_rate": 0.0005688243353331462, "loss": 3.2479, "step": 8962 }, { "epoch": 0.44, "grad_norm": 0.5096282362937927, "learning_rate": 0.0005688175007589989, "loss": 3.3169, "step": 8963 }, { "epoch": 0.44, "grad_norm": 0.5422225594520569, "learning_rate": 0.0005688106654768379, "loss": 3.412, "step": 8964 }, { "epoch": 0.44, "grad_norm": 0.5580344200134277, "learning_rate": 0.0005688038294866814, "loss": 3.2899, "step": 8965 }, { "epoch": 0.44, "grad_norm": 0.5104421973228455, "learning_rate": 0.0005687969927885474, "loss": 3.1282, "step": 8966 }, { "epoch": 0.44, "grad_norm": 0.5075674057006836, "learning_rate": 0.0005687901553824537, "loss": 3.4159, "step": 8967 }, { "epoch": 0.44, "grad_norm": 0.5359368324279785, "learning_rate": 0.0005687833172684186, "loss": 3.4478, "step": 8968 }, { "epoch": 0.44, "grad_norm": 0.5409534573554993, "learning_rate": 0.0005687764784464599, "loss": 3.3085, "step": 8969 }, { "epoch": 0.44, "grad_norm": 0.5197051763534546, "learning_rate": 0.0005687696389165956, "loss": 3.0945, "step": 8970 }, { "epoch": 0.44, "grad_norm": 0.5289903283119202, "learning_rate": 0.000568762798678844, "loss": 3.4001, "step": 8971 }, { "epoch": 0.44, "grad_norm": 0.5642632842063904, "learning_rate": 0.0005687559577332228, "loss": 3.5348, "step": 8972 }, { "epoch": 0.44, "grad_norm": 0.6010560989379883, "learning_rate": 0.0005687491160797501, "loss": 3.2148, "step": 8973 }, { "epoch": 0.44, "grad_norm": 0.5142990350723267, "learning_rate": 0.000568742273718444, "loss": 3.2754, "step": 8974 }, { "epoch": 0.44, "grad_norm": 0.5444654822349548, "learning_rate": 0.0005687354306493225, "loss": 3.4776, "step": 8975 }, { "epoch": 0.44, "grad_norm": 0.5075336694717407, "learning_rate": 0.0005687285868724035, "loss": 3.1729, "step": 8976 }, { "epoch": 0.44, "grad_norm": 0.5050414204597473, "learning_rate": 0.0005687217423877051, "loss": 3.244, "step": 8977 }, { "epoch": 0.44, "grad_norm": 0.49637553095817566, "learning_rate": 0.0005687148971952455, "loss": 3.3921, "step": 8978 }, { "epoch": 0.44, "grad_norm": 0.5068024396896362, "learning_rate": 0.0005687080512950426, "loss": 3.228, "step": 8979 }, { "epoch": 0.44, "grad_norm": 0.512059211730957, "learning_rate": 0.0005687012046871143, "loss": 3.2777, "step": 8980 }, { "epoch": 0.44, "grad_norm": 0.5337809920310974, "learning_rate": 0.0005686943573714787, "loss": 3.4469, "step": 8981 }, { "epoch": 0.44, "grad_norm": 0.5400496125221252, "learning_rate": 0.0005686875093481539, "loss": 3.2248, "step": 8982 }, { "epoch": 0.44, "grad_norm": 0.5225552916526794, "learning_rate": 0.0005686806606171579, "loss": 3.4751, "step": 8983 }, { "epoch": 0.44, "grad_norm": 0.5124357342720032, "learning_rate": 0.0005686738111785088, "loss": 3.1647, "step": 8984 }, { "epoch": 0.44, "grad_norm": 0.5976647734642029, "learning_rate": 0.0005686669610322246, "loss": 3.3468, "step": 8985 }, { "epoch": 0.44, "grad_norm": 0.5431848764419556, "learning_rate": 0.0005686601101783232, "loss": 3.3075, "step": 8986 }, { "epoch": 0.44, "grad_norm": 0.522193193435669, "learning_rate": 0.000568653258616823, "loss": 3.2333, "step": 8987 }, { "epoch": 0.44, "grad_norm": 0.5003888607025146, "learning_rate": 0.0005686464063477416, "loss": 3.4794, "step": 8988 }, { "epoch": 0.44, "grad_norm": 0.5476301312446594, "learning_rate": 0.0005686395533710973, "loss": 3.1188, "step": 8989 }, { "epoch": 0.44, "grad_norm": 0.5447575449943542, "learning_rate": 0.0005686326996869083, "loss": 3.3467, "step": 8990 }, { "epoch": 0.44, "grad_norm": 0.5251429677009583, "learning_rate": 0.0005686258452951923, "loss": 3.2945, "step": 8991 }, { "epoch": 0.44, "grad_norm": 0.5147581100463867, "learning_rate": 0.0005686189901959675, "loss": 3.4831, "step": 8992 }, { "epoch": 0.44, "grad_norm": 0.5492880344390869, "learning_rate": 0.000568612134389252, "loss": 3.4656, "step": 8993 }, { "epoch": 0.44, "grad_norm": 0.511838436126709, "learning_rate": 0.000568605277875064, "loss": 3.2636, "step": 8994 }, { "epoch": 0.44, "grad_norm": 0.5325186848640442, "learning_rate": 0.0005685984206534212, "loss": 3.3853, "step": 8995 }, { "epoch": 0.44, "grad_norm": 0.528266966342926, "learning_rate": 0.000568591562724342, "loss": 3.3297, "step": 8996 }, { "epoch": 0.44, "grad_norm": 0.5761194229125977, "learning_rate": 0.0005685847040878441, "loss": 3.1765, "step": 8997 }, { "epoch": 0.44, "grad_norm": 0.4952467978000641, "learning_rate": 0.000568577844743946, "loss": 3.4474, "step": 8998 }, { "epoch": 0.44, "grad_norm": 0.5307065844535828, "learning_rate": 0.0005685709846926654, "loss": 3.3741, "step": 8999 }, { "epoch": 0.44, "grad_norm": 0.5305284857749939, "learning_rate": 0.0005685641239340206, "loss": 3.2478, "step": 9000 }, { "epoch": 0.44, "grad_norm": 0.5165554881095886, "learning_rate": 0.0005685572624680295, "loss": 3.1295, "step": 9001 }, { "epoch": 0.44, "grad_norm": 0.5124072432518005, "learning_rate": 0.0005685504002947104, "loss": 3.3352, "step": 9002 }, { "epoch": 0.44, "grad_norm": 0.5218518376350403, "learning_rate": 0.0005685435374140811, "loss": 3.2078, "step": 9003 }, { "epoch": 0.44, "grad_norm": 0.574737548828125, "learning_rate": 0.0005685366738261598, "loss": 3.2878, "step": 9004 }, { "epoch": 0.44, "grad_norm": 0.5492660403251648, "learning_rate": 0.0005685298095309646, "loss": 3.2467, "step": 9005 }, { "epoch": 0.44, "grad_norm": 0.5594704151153564, "learning_rate": 0.0005685229445285137, "loss": 3.4172, "step": 9006 }, { "epoch": 0.44, "grad_norm": 0.5028058290481567, "learning_rate": 0.000568516078818825, "loss": 3.3881, "step": 9007 }, { "epoch": 0.44, "grad_norm": 0.5461245179176331, "learning_rate": 0.0005685092124019165, "loss": 3.5241, "step": 9008 }, { "epoch": 0.44, "grad_norm": 0.5365086793899536, "learning_rate": 0.0005685023452778065, "loss": 3.3182, "step": 9009 }, { "epoch": 0.44, "grad_norm": 0.530041515827179, "learning_rate": 0.0005684954774465129, "loss": 3.4351, "step": 9010 }, { "epoch": 0.44, "grad_norm": 0.5450629591941833, "learning_rate": 0.000568488608908054, "loss": 3.386, "step": 9011 }, { "epoch": 0.44, "grad_norm": 0.5048748254776001, "learning_rate": 0.0005684817396624476, "loss": 3.6001, "step": 9012 }, { "epoch": 0.44, "grad_norm": 0.5259529948234558, "learning_rate": 0.0005684748697097121, "loss": 3.0533, "step": 9013 }, { "epoch": 0.44, "grad_norm": 0.5435417294502258, "learning_rate": 0.0005684679990498655, "loss": 3.4664, "step": 9014 }, { "epoch": 0.44, "grad_norm": 0.5544543266296387, "learning_rate": 0.0005684611276829259, "loss": 3.151, "step": 9015 }, { "epoch": 0.44, "grad_norm": 0.536688506603241, "learning_rate": 0.0005684542556089112, "loss": 3.3264, "step": 9016 }, { "epoch": 0.44, "grad_norm": 0.5237370133399963, "learning_rate": 0.0005684473828278398, "loss": 3.3703, "step": 9017 }, { "epoch": 0.44, "grad_norm": 0.5127825140953064, "learning_rate": 0.0005684405093397296, "loss": 3.2351, "step": 9018 }, { "epoch": 0.44, "grad_norm": 0.5225836634635925, "learning_rate": 0.0005684336351445987, "loss": 3.2658, "step": 9019 }, { "epoch": 0.44, "grad_norm": 0.5963146686553955, "learning_rate": 0.0005684267602424652, "loss": 3.2942, "step": 9020 }, { "epoch": 0.44, "grad_norm": 0.5185006260871887, "learning_rate": 0.0005684198846333475, "loss": 3.1149, "step": 9021 }, { "epoch": 0.44, "grad_norm": 0.5315390229225159, "learning_rate": 0.0005684130083172634, "loss": 3.1857, "step": 9022 }, { "epoch": 0.44, "grad_norm": 0.5717655420303345, "learning_rate": 0.0005684061312942309, "loss": 3.3572, "step": 9023 }, { "epoch": 0.44, "grad_norm": 0.559047520160675, "learning_rate": 0.0005683992535642686, "loss": 3.3638, "step": 9024 }, { "epoch": 0.44, "grad_norm": 0.5396157503128052, "learning_rate": 0.0005683923751273941, "loss": 3.2502, "step": 9025 }, { "epoch": 0.44, "grad_norm": 0.5662112832069397, "learning_rate": 0.0005683854959836259, "loss": 3.244, "step": 9026 }, { "epoch": 0.44, "grad_norm": 0.49871206283569336, "learning_rate": 0.0005683786161329819, "loss": 3.1904, "step": 9027 }, { "epoch": 0.44, "grad_norm": 0.5973621606826782, "learning_rate": 0.0005683717355754802, "loss": 3.2459, "step": 9028 }, { "epoch": 0.44, "grad_norm": 0.5342198014259338, "learning_rate": 0.000568364854311139, "loss": 3.1354, "step": 9029 }, { "epoch": 0.44, "grad_norm": 0.54073566198349, "learning_rate": 0.0005683579723399765, "loss": 3.2937, "step": 9030 }, { "epoch": 0.44, "grad_norm": 0.5744414329528809, "learning_rate": 0.0005683510896620108, "loss": 3.2309, "step": 9031 }, { "epoch": 0.44, "grad_norm": 0.5294274687767029, "learning_rate": 0.0005683442062772598, "loss": 3.1342, "step": 9032 }, { "epoch": 0.44, "grad_norm": 0.5062122941017151, "learning_rate": 0.0005683373221857419, "loss": 3.3708, "step": 9033 }, { "epoch": 0.44, "grad_norm": 0.5301145315170288, "learning_rate": 0.0005683304373874752, "loss": 3.3389, "step": 9034 }, { "epoch": 0.44, "grad_norm": 0.5067840814590454, "learning_rate": 0.0005683235518824777, "loss": 3.3437, "step": 9035 }, { "epoch": 0.44, "grad_norm": 0.5595483183860779, "learning_rate": 0.0005683166656707676, "loss": 3.315, "step": 9036 }, { "epoch": 0.44, "grad_norm": 0.5125157833099365, "learning_rate": 0.0005683097787523631, "loss": 3.3297, "step": 9037 }, { "epoch": 0.44, "grad_norm": 0.5199974179267883, "learning_rate": 0.0005683028911272822, "loss": 3.23, "step": 9038 }, { "epoch": 0.44, "grad_norm": 0.5390259027481079, "learning_rate": 0.0005682960027955431, "loss": 3.2136, "step": 9039 }, { "epoch": 0.44, "grad_norm": 0.500478982925415, "learning_rate": 0.0005682891137571641, "loss": 3.3919, "step": 9040 }, { "epoch": 0.44, "grad_norm": 0.5096060037612915, "learning_rate": 0.0005682822240121631, "loss": 3.5207, "step": 9041 }, { "epoch": 0.44, "grad_norm": 0.4984170198440552, "learning_rate": 0.0005682753335605583, "loss": 3.3042, "step": 9042 }, { "epoch": 0.44, "grad_norm": 0.5067201852798462, "learning_rate": 0.000568268442402368, "loss": 3.231, "step": 9043 }, { "epoch": 0.44, "grad_norm": 0.5161800384521484, "learning_rate": 0.0005682615505376103, "loss": 3.296, "step": 9044 }, { "epoch": 0.44, "grad_norm": 0.5189632773399353, "learning_rate": 0.0005682546579663032, "loss": 3.2647, "step": 9045 }, { "epoch": 0.44, "grad_norm": 0.5524479150772095, "learning_rate": 0.000568247764688465, "loss": 3.1366, "step": 9046 }, { "epoch": 0.44, "grad_norm": 0.5149971842765808, "learning_rate": 0.0005682408707041138, "loss": 3.3692, "step": 9047 }, { "epoch": 0.44, "grad_norm": 0.5103289484977722, "learning_rate": 0.0005682339760132677, "loss": 3.6341, "step": 9048 }, { "epoch": 0.44, "grad_norm": 0.5169664621353149, "learning_rate": 0.000568227080615945, "loss": 3.3884, "step": 9049 }, { "epoch": 0.44, "grad_norm": 0.5336402058601379, "learning_rate": 0.0005682201845121638, "loss": 3.3195, "step": 9050 }, { "epoch": 0.44, "grad_norm": 0.4936128556728363, "learning_rate": 0.0005682132877019424, "loss": 3.2705, "step": 9051 }, { "epoch": 0.44, "grad_norm": 0.49011480808258057, "learning_rate": 0.0005682063901852986, "loss": 3.3899, "step": 9052 }, { "epoch": 0.44, "grad_norm": 0.5118272304534912, "learning_rate": 0.0005681994919622508, "loss": 3.3134, "step": 9053 }, { "epoch": 0.44, "grad_norm": 0.5226544737815857, "learning_rate": 0.0005681925930328172, "loss": 3.2044, "step": 9054 }, { "epoch": 0.44, "grad_norm": 0.5248229503631592, "learning_rate": 0.0005681856933970159, "loss": 3.0951, "step": 9055 }, { "epoch": 0.44, "grad_norm": 0.533872127532959, "learning_rate": 0.0005681787930548652, "loss": 3.3359, "step": 9056 }, { "epoch": 0.44, "grad_norm": 0.5588023066520691, "learning_rate": 0.0005681718920063831, "loss": 3.3904, "step": 9057 }, { "epoch": 0.44, "grad_norm": 0.5113193988800049, "learning_rate": 0.0005681649902515878, "loss": 3.3799, "step": 9058 }, { "epoch": 0.44, "grad_norm": 0.5519219040870667, "learning_rate": 0.0005681580877904977, "loss": 3.2737, "step": 9059 }, { "epoch": 0.44, "grad_norm": 0.5289116501808167, "learning_rate": 0.0005681511846231306, "loss": 3.4338, "step": 9060 }, { "epoch": 0.44, "grad_norm": 0.6075866222381592, "learning_rate": 0.000568144280749505, "loss": 3.1228, "step": 9061 }, { "epoch": 0.44, "grad_norm": 0.5013885498046875, "learning_rate": 0.0005681373761696389, "loss": 3.1611, "step": 9062 }, { "epoch": 0.44, "grad_norm": 0.5552061200141907, "learning_rate": 0.0005681304708835506, "loss": 3.2345, "step": 9063 }, { "epoch": 0.44, "grad_norm": 0.5864258408546448, "learning_rate": 0.0005681235648912581, "loss": 3.2384, "step": 9064 }, { "epoch": 0.44, "grad_norm": 0.5120783448219299, "learning_rate": 0.00056811665819278, "loss": 3.3637, "step": 9065 }, { "epoch": 0.44, "grad_norm": 0.49182936549186707, "learning_rate": 0.0005681097507881342, "loss": 3.3427, "step": 9066 }, { "epoch": 0.44, "grad_norm": 0.49345386028289795, "learning_rate": 0.0005681028426773387, "loss": 3.2404, "step": 9067 }, { "epoch": 0.44, "grad_norm": 0.5188148617744446, "learning_rate": 0.0005680959338604121, "loss": 3.4539, "step": 9068 }, { "epoch": 0.44, "grad_norm": 0.5250046849250793, "learning_rate": 0.0005680890243373722, "loss": 3.4335, "step": 9069 }, { "epoch": 0.44, "grad_norm": 0.5325458645820618, "learning_rate": 0.0005680821141082375, "loss": 3.18, "step": 9070 }, { "epoch": 0.44, "grad_norm": 0.5744656324386597, "learning_rate": 0.0005680752031730262, "loss": 3.3223, "step": 9071 }, { "epoch": 0.44, "grad_norm": 0.5349627733230591, "learning_rate": 0.0005680682915317562, "loss": 3.1908, "step": 9072 }, { "epoch": 0.44, "grad_norm": 0.5127280354499817, "learning_rate": 0.0005680613791844461, "loss": 3.29, "step": 9073 }, { "epoch": 0.44, "grad_norm": 0.5042176246643066, "learning_rate": 0.000568054466131114, "loss": 3.3658, "step": 9074 }, { "epoch": 0.44, "grad_norm": 0.5064307451248169, "learning_rate": 0.0005680475523717778, "loss": 3.3205, "step": 9075 }, { "epoch": 0.44, "grad_norm": 0.48436474800109863, "learning_rate": 0.0005680406379064561, "loss": 3.2207, "step": 9076 }, { "epoch": 0.44, "grad_norm": 0.5284563302993774, "learning_rate": 0.0005680337227351668, "loss": 3.3994, "step": 9077 }, { "epoch": 0.44, "grad_norm": 0.5303294658660889, "learning_rate": 0.0005680268068579282, "loss": 3.4003, "step": 9078 }, { "epoch": 0.44, "grad_norm": 0.5509895086288452, "learning_rate": 0.0005680198902747589, "loss": 3.2329, "step": 9079 }, { "epoch": 0.44, "grad_norm": 0.5539824366569519, "learning_rate": 0.0005680129729856765, "loss": 3.1104, "step": 9080 }, { "epoch": 0.45, "grad_norm": 0.6219602823257446, "learning_rate": 0.0005680060549906996, "loss": 3.4123, "step": 9081 }, { "epoch": 0.45, "grad_norm": 0.5088189244270325, "learning_rate": 0.0005679991362898463, "loss": 3.2834, "step": 9082 }, { "epoch": 0.45, "grad_norm": 0.557954728603363, "learning_rate": 0.0005679922168831348, "loss": 3.2455, "step": 9083 }, { "epoch": 0.45, "grad_norm": 0.6321285367012024, "learning_rate": 0.0005679852967705836, "loss": 3.1699, "step": 9084 }, { "epoch": 0.45, "grad_norm": 0.585474967956543, "learning_rate": 0.0005679783759522105, "loss": 3.1827, "step": 9085 }, { "epoch": 0.45, "grad_norm": 0.5444757342338562, "learning_rate": 0.000567971454428034, "loss": 3.3533, "step": 9086 }, { "epoch": 0.45, "grad_norm": 0.4752892553806305, "learning_rate": 0.0005679645321980723, "loss": 3.256, "step": 9087 }, { "epoch": 0.45, "grad_norm": 0.5205745100975037, "learning_rate": 0.0005679576092623435, "loss": 3.4033, "step": 9088 }, { "epoch": 0.45, "grad_norm": 0.5516600608825684, "learning_rate": 0.000567950685620866, "loss": 3.3387, "step": 9089 }, { "epoch": 0.45, "grad_norm": 0.5286070704460144, "learning_rate": 0.0005679437612736578, "loss": 3.2433, "step": 9090 }, { "epoch": 0.45, "grad_norm": 0.5709437131881714, "learning_rate": 0.0005679368362207375, "loss": 3.2315, "step": 9091 }, { "epoch": 0.45, "grad_norm": 0.5004037022590637, "learning_rate": 0.0005679299104621231, "loss": 3.3357, "step": 9092 }, { "epoch": 0.45, "grad_norm": 0.5113794207572937, "learning_rate": 0.0005679229839978329, "loss": 3.2806, "step": 9093 }, { "epoch": 0.45, "grad_norm": 0.5075574517250061, "learning_rate": 0.0005679160568278851, "loss": 3.6071, "step": 9094 }, { "epoch": 0.45, "grad_norm": 0.5023056268692017, "learning_rate": 0.0005679091289522979, "loss": 3.3308, "step": 9095 }, { "epoch": 0.45, "grad_norm": 0.5051378607749939, "learning_rate": 0.0005679022003710896, "loss": 3.4196, "step": 9096 }, { "epoch": 0.45, "grad_norm": 0.5265752077102661, "learning_rate": 0.0005678952710842785, "loss": 3.5432, "step": 9097 }, { "epoch": 0.45, "grad_norm": 0.49984413385391235, "learning_rate": 0.0005678883410918828, "loss": 3.4198, "step": 9098 }, { "epoch": 0.45, "grad_norm": 0.5776906609535217, "learning_rate": 0.0005678814103939209, "loss": 3.4116, "step": 9099 }, { "epoch": 0.45, "grad_norm": 0.5122084617614746, "learning_rate": 0.0005678744789904108, "loss": 3.3746, "step": 9100 }, { "epoch": 0.45, "grad_norm": 0.5504657626152039, "learning_rate": 0.0005678675468813709, "loss": 3.4364, "step": 9101 }, { "epoch": 0.45, "grad_norm": 0.5166839361190796, "learning_rate": 0.0005678606140668194, "loss": 3.4628, "step": 9102 }, { "epoch": 0.45, "grad_norm": 0.4547307789325714, "learning_rate": 0.0005678536805467746, "loss": 3.5267, "step": 9103 }, { "epoch": 0.45, "grad_norm": 0.5224431157112122, "learning_rate": 0.0005678467463212549, "loss": 3.2876, "step": 9104 }, { "epoch": 0.45, "grad_norm": 0.501089334487915, "learning_rate": 0.0005678398113902782, "loss": 3.4004, "step": 9105 }, { "epoch": 0.45, "grad_norm": 0.5165853500366211, "learning_rate": 0.0005678328757538631, "loss": 3.3895, "step": 9106 }, { "epoch": 0.45, "grad_norm": 0.5630010366439819, "learning_rate": 0.0005678259394120277, "loss": 3.4256, "step": 9107 }, { "epoch": 0.45, "grad_norm": 0.5082736015319824, "learning_rate": 0.0005678190023647903, "loss": 3.3324, "step": 9108 }, { "epoch": 0.45, "grad_norm": 0.5279152989387512, "learning_rate": 0.0005678120646121694, "loss": 3.4226, "step": 9109 }, { "epoch": 0.45, "grad_norm": 0.5158643126487732, "learning_rate": 0.0005678051261541828, "loss": 3.1352, "step": 9110 }, { "epoch": 0.45, "grad_norm": 0.5294057130813599, "learning_rate": 0.0005677981869908491, "loss": 3.0135, "step": 9111 }, { "epoch": 0.45, "grad_norm": 0.5090487599372864, "learning_rate": 0.0005677912471221866, "loss": 3.396, "step": 9112 }, { "epoch": 0.45, "grad_norm": 0.5615133047103882, "learning_rate": 0.0005677843065482135, "loss": 3.1692, "step": 9113 }, { "epoch": 0.45, "grad_norm": 0.5106341242790222, "learning_rate": 0.000567777365268948, "loss": 3.3025, "step": 9114 }, { "epoch": 0.45, "grad_norm": 0.5407706499099731, "learning_rate": 0.0005677704232844084, "loss": 3.2903, "step": 9115 }, { "epoch": 0.45, "grad_norm": 0.5446071028709412, "learning_rate": 0.0005677634805946131, "loss": 3.311, "step": 9116 }, { "epoch": 0.45, "grad_norm": 0.5780032277107239, "learning_rate": 0.0005677565371995804, "loss": 3.4569, "step": 9117 }, { "epoch": 0.45, "grad_norm": 0.49284398555755615, "learning_rate": 0.0005677495930993284, "loss": 3.1733, "step": 9118 }, { "epoch": 0.45, "grad_norm": 0.48607781529426575, "learning_rate": 0.0005677426482938756, "loss": 3.3819, "step": 9119 }, { "epoch": 0.45, "grad_norm": 0.520286500453949, "learning_rate": 0.0005677357027832401, "loss": 3.0716, "step": 9120 }, { "epoch": 0.45, "grad_norm": 0.5000066757202148, "learning_rate": 0.0005677287565674404, "loss": 3.2683, "step": 9121 }, { "epoch": 0.45, "grad_norm": 0.5120892524719238, "learning_rate": 0.0005677218096464946, "loss": 3.349, "step": 9122 }, { "epoch": 0.45, "grad_norm": 0.5167591571807861, "learning_rate": 0.0005677148620204211, "loss": 3.4684, "step": 9123 }, { "epoch": 0.45, "grad_norm": 0.50194251537323, "learning_rate": 0.0005677079136892381, "loss": 3.2571, "step": 9124 }, { "epoch": 0.45, "grad_norm": 0.5270057320594788, "learning_rate": 0.000567700964652964, "loss": 3.3654, "step": 9125 }, { "epoch": 0.45, "grad_norm": 0.5042098760604858, "learning_rate": 0.0005676940149116172, "loss": 3.3709, "step": 9126 }, { "epoch": 0.45, "grad_norm": 0.4862194061279297, "learning_rate": 0.0005676870644652158, "loss": 3.3156, "step": 9127 }, { "epoch": 0.45, "grad_norm": 0.5414255261421204, "learning_rate": 0.0005676801133137782, "loss": 3.191, "step": 9128 }, { "epoch": 0.45, "grad_norm": 0.5115101933479309, "learning_rate": 0.0005676731614573228, "loss": 3.276, "step": 9129 }, { "epoch": 0.45, "grad_norm": 0.4912344515323639, "learning_rate": 0.0005676662088958676, "loss": 3.393, "step": 9130 }, { "epoch": 0.45, "grad_norm": 0.5179233551025391, "learning_rate": 0.0005676592556294312, "loss": 3.2392, "step": 9131 }, { "epoch": 0.45, "grad_norm": 0.5112375617027283, "learning_rate": 0.0005676523016580319, "loss": 3.3681, "step": 9132 }, { "epoch": 0.45, "grad_norm": 0.5025057792663574, "learning_rate": 0.0005676453469816878, "loss": 3.3225, "step": 9133 }, { "epoch": 0.45, "grad_norm": 0.4907090961933136, "learning_rate": 0.0005676383916004174, "loss": 3.3381, "step": 9134 }, { "epoch": 0.45, "grad_norm": 0.5072884559631348, "learning_rate": 0.0005676314355142392, "loss": 3.2997, "step": 9135 }, { "epoch": 0.45, "grad_norm": 0.5344531536102295, "learning_rate": 0.0005676244787231711, "loss": 3.1497, "step": 9136 }, { "epoch": 0.45, "grad_norm": 0.5543556809425354, "learning_rate": 0.0005676175212272317, "loss": 3.5104, "step": 9137 }, { "epoch": 0.45, "grad_norm": 0.5010618567466736, "learning_rate": 0.0005676105630264392, "loss": 3.6381, "step": 9138 }, { "epoch": 0.45, "grad_norm": 0.5100370645523071, "learning_rate": 0.000567603604120812, "loss": 3.1735, "step": 9139 }, { "epoch": 0.45, "grad_norm": 0.5232434868812561, "learning_rate": 0.0005675966445103684, "loss": 3.5975, "step": 9140 }, { "epoch": 0.45, "grad_norm": 0.5133650898933411, "learning_rate": 0.0005675896841951268, "loss": 3.4042, "step": 9141 }, { "epoch": 0.45, "grad_norm": 0.526075541973114, "learning_rate": 0.0005675827231751055, "loss": 3.4472, "step": 9142 }, { "epoch": 0.45, "grad_norm": 0.5056864619255066, "learning_rate": 0.0005675757614503227, "loss": 3.1239, "step": 9143 }, { "epoch": 0.45, "grad_norm": 0.5270462036132812, "learning_rate": 0.0005675687990207969, "loss": 3.4077, "step": 9144 }, { "epoch": 0.45, "grad_norm": 0.5691565275192261, "learning_rate": 0.0005675618358865463, "loss": 3.3887, "step": 9145 }, { "epoch": 0.45, "grad_norm": 0.47389543056488037, "learning_rate": 0.0005675548720475894, "loss": 3.3965, "step": 9146 }, { "epoch": 0.45, "grad_norm": 0.5528371930122375, "learning_rate": 0.0005675479075039444, "loss": 3.1438, "step": 9147 }, { "epoch": 0.45, "grad_norm": 0.48816508054733276, "learning_rate": 0.0005675409422556297, "loss": 3.2555, "step": 9148 }, { "epoch": 0.45, "grad_norm": 0.5429815649986267, "learning_rate": 0.0005675339763026638, "loss": 3.1632, "step": 9149 }, { "epoch": 0.45, "grad_norm": 0.5055248141288757, "learning_rate": 0.0005675270096450648, "loss": 3.3858, "step": 9150 }, { "epoch": 0.45, "grad_norm": 0.5444815158843994, "learning_rate": 0.000567520042282851, "loss": 3.1408, "step": 9151 }, { "epoch": 0.45, "grad_norm": 0.5034661889076233, "learning_rate": 0.000567513074216041, "loss": 3.4106, "step": 9152 }, { "epoch": 0.45, "grad_norm": 0.5205017924308777, "learning_rate": 0.0005675061054446531, "loss": 3.3771, "step": 9153 }, { "epoch": 0.45, "grad_norm": 0.5044941306114197, "learning_rate": 0.0005674991359687055, "loss": 3.1945, "step": 9154 }, { "epoch": 0.45, "grad_norm": 0.5016582608222961, "learning_rate": 0.0005674921657882168, "loss": 3.1752, "step": 9155 }, { "epoch": 0.45, "grad_norm": 0.524653434753418, "learning_rate": 0.0005674851949032052, "loss": 3.1112, "step": 9156 }, { "epoch": 0.45, "grad_norm": 0.551464319229126, "learning_rate": 0.0005674782233136889, "loss": 3.4564, "step": 9157 }, { "epoch": 0.45, "grad_norm": 0.6055057644844055, "learning_rate": 0.0005674712510196865, "loss": 3.235, "step": 9158 }, { "epoch": 0.45, "grad_norm": 0.5390385389328003, "learning_rate": 0.0005674642780212165, "loss": 3.2609, "step": 9159 }, { "epoch": 0.45, "grad_norm": 0.5527321100234985, "learning_rate": 0.0005674573043182969, "loss": 3.1562, "step": 9160 }, { "epoch": 0.45, "grad_norm": 0.5559296607971191, "learning_rate": 0.0005674503299109462, "loss": 3.1802, "step": 9161 }, { "epoch": 0.45, "grad_norm": 0.5836023092269897, "learning_rate": 0.0005674433547991828, "loss": 3.2304, "step": 9162 }, { "epoch": 0.45, "grad_norm": 0.5389158725738525, "learning_rate": 0.0005674363789830251, "loss": 3.1442, "step": 9163 }, { "epoch": 0.45, "grad_norm": 0.48464417457580566, "learning_rate": 0.0005674294024624914, "loss": 3.4488, "step": 9164 }, { "epoch": 0.45, "grad_norm": 0.5225753784179688, "learning_rate": 0.0005674224252376003, "loss": 3.2688, "step": 9165 }, { "epoch": 0.45, "grad_norm": 0.516726553440094, "learning_rate": 0.0005674154473083699, "loss": 3.3803, "step": 9166 }, { "epoch": 0.45, "grad_norm": 0.5179518461227417, "learning_rate": 0.0005674084686748186, "loss": 3.438, "step": 9167 }, { "epoch": 0.45, "grad_norm": 0.5502138733863831, "learning_rate": 0.000567401489336965, "loss": 3.1833, "step": 9168 }, { "epoch": 0.45, "grad_norm": 0.523209273815155, "learning_rate": 0.0005673945092948273, "loss": 3.2305, "step": 9169 }, { "epoch": 0.45, "grad_norm": 0.48779064416885376, "learning_rate": 0.0005673875285484238, "loss": 3.3943, "step": 9170 }, { "epoch": 0.45, "grad_norm": 0.5196689963340759, "learning_rate": 0.0005673805470977732, "loss": 3.2378, "step": 9171 }, { "epoch": 0.45, "grad_norm": 0.5514273643493652, "learning_rate": 0.0005673735649428934, "loss": 3.2909, "step": 9172 }, { "epoch": 0.45, "grad_norm": 0.5025139451026917, "learning_rate": 0.0005673665820838034, "loss": 3.3022, "step": 9173 }, { "epoch": 0.45, "grad_norm": 0.5027960538864136, "learning_rate": 0.000567359598520521, "loss": 3.2242, "step": 9174 }, { "epoch": 0.45, "grad_norm": 0.5085102319717407, "learning_rate": 0.0005673526142530651, "loss": 3.2399, "step": 9175 }, { "epoch": 0.45, "grad_norm": 0.5565027594566345, "learning_rate": 0.0005673456292814539, "loss": 3.2157, "step": 9176 }, { "epoch": 0.45, "grad_norm": 0.5032663941383362, "learning_rate": 0.0005673386436057056, "loss": 3.2285, "step": 9177 }, { "epoch": 0.45, "grad_norm": 0.5043344497680664, "learning_rate": 0.0005673316572258389, "loss": 3.3707, "step": 9178 }, { "epoch": 0.45, "grad_norm": 0.5071136951446533, "learning_rate": 0.000567324670141872, "loss": 3.1895, "step": 9179 }, { "epoch": 0.45, "grad_norm": 0.4878983795642853, "learning_rate": 0.0005673176823538232, "loss": 3.1579, "step": 9180 }, { "epoch": 0.45, "grad_norm": 0.5063034892082214, "learning_rate": 0.0005673106938617113, "loss": 3.3481, "step": 9181 }, { "epoch": 0.45, "grad_norm": 0.5148180723190308, "learning_rate": 0.0005673037046655544, "loss": 3.4275, "step": 9182 }, { "epoch": 0.45, "grad_norm": 0.529994547367096, "learning_rate": 0.0005672967147653709, "loss": 3.321, "step": 9183 }, { "epoch": 0.45, "grad_norm": 0.5083233118057251, "learning_rate": 0.0005672897241611795, "loss": 3.2562, "step": 9184 }, { "epoch": 0.45, "grad_norm": 0.5259013772010803, "learning_rate": 0.0005672827328529982, "loss": 3.4605, "step": 9185 }, { "epoch": 0.45, "grad_norm": 0.5973625779151917, "learning_rate": 0.0005672757408408458, "loss": 3.2049, "step": 9186 }, { "epoch": 0.45, "grad_norm": 0.5016379952430725, "learning_rate": 0.0005672687481247404, "loss": 2.9897, "step": 9187 }, { "epoch": 0.45, "grad_norm": 0.4864937961101532, "learning_rate": 0.0005672617547047006, "loss": 3.4482, "step": 9188 }, { "epoch": 0.45, "grad_norm": 0.5325981974601746, "learning_rate": 0.0005672547605807449, "loss": 3.1332, "step": 9189 }, { "epoch": 0.45, "grad_norm": 0.5618669390678406, "learning_rate": 0.0005672477657528913, "loss": 3.6335, "step": 9190 }, { "epoch": 0.45, "grad_norm": 0.5770689249038696, "learning_rate": 0.0005672407702211588, "loss": 3.4217, "step": 9191 }, { "epoch": 0.45, "grad_norm": 0.5329024791717529, "learning_rate": 0.0005672337739855654, "loss": 3.2829, "step": 9192 }, { "epoch": 0.45, "grad_norm": 0.5350424647331238, "learning_rate": 0.0005672267770461297, "loss": 3.2882, "step": 9193 }, { "epoch": 0.45, "grad_norm": 0.5432996153831482, "learning_rate": 0.00056721977940287, "loss": 3.2945, "step": 9194 }, { "epoch": 0.45, "grad_norm": 0.5327097773551941, "learning_rate": 0.000567212781055805, "loss": 3.4755, "step": 9195 }, { "epoch": 0.45, "grad_norm": 0.5117045044898987, "learning_rate": 0.000567205782004953, "loss": 3.1168, "step": 9196 }, { "epoch": 0.45, "grad_norm": 0.5237148404121399, "learning_rate": 0.0005671987822503322, "loss": 3.2491, "step": 9197 }, { "epoch": 0.45, "grad_norm": 0.5158869624137878, "learning_rate": 0.0005671917817919613, "loss": 3.2337, "step": 9198 }, { "epoch": 0.45, "grad_norm": 0.5397765040397644, "learning_rate": 0.0005671847806298587, "loss": 3.3685, "step": 9199 }, { "epoch": 0.45, "grad_norm": 0.5293693542480469, "learning_rate": 0.0005671777787640428, "loss": 3.0961, "step": 9200 }, { "epoch": 0.45, "grad_norm": 0.522372305393219, "learning_rate": 0.000567170776194532, "loss": 3.3425, "step": 9201 }, { "epoch": 0.45, "grad_norm": 0.5064888596534729, "learning_rate": 0.000567163772921345, "loss": 3.3638, "step": 9202 }, { "epoch": 0.45, "grad_norm": 0.5614068508148193, "learning_rate": 0.0005671567689444997, "loss": 3.305, "step": 9203 }, { "epoch": 0.45, "grad_norm": 0.5083727240562439, "learning_rate": 0.0005671497642640151, "loss": 3.1742, "step": 9204 }, { "epoch": 0.45, "grad_norm": 0.5845643281936646, "learning_rate": 0.0005671427588799094, "loss": 3.2797, "step": 9205 }, { "epoch": 0.45, "grad_norm": 0.5405354499816895, "learning_rate": 0.000567135752792201, "loss": 3.4243, "step": 9206 }, { "epoch": 0.45, "grad_norm": 0.5075390934944153, "learning_rate": 0.0005671287460009086, "loss": 3.3293, "step": 9207 }, { "epoch": 0.45, "grad_norm": 0.5443362593650818, "learning_rate": 0.0005671217385060504, "loss": 3.3097, "step": 9208 }, { "epoch": 0.45, "grad_norm": 0.5043942332267761, "learning_rate": 0.0005671147303076449, "loss": 3.2578, "step": 9209 }, { "epoch": 0.45, "grad_norm": 0.5327485799789429, "learning_rate": 0.0005671077214057107, "loss": 3.3638, "step": 9210 }, { "epoch": 0.45, "grad_norm": 0.5666617751121521, "learning_rate": 0.0005671007118002662, "loss": 3.3362, "step": 9211 }, { "epoch": 0.45, "grad_norm": 0.5105660557746887, "learning_rate": 0.0005670937014913297, "loss": 3.3562, "step": 9212 }, { "epoch": 0.45, "grad_norm": 0.5232953429222107, "learning_rate": 0.0005670866904789199, "loss": 3.3715, "step": 9213 }, { "epoch": 0.45, "grad_norm": 0.5395106077194214, "learning_rate": 0.0005670796787630552, "loss": 3.213, "step": 9214 }, { "epoch": 0.45, "grad_norm": 0.48984065651893616, "learning_rate": 0.0005670726663437539, "loss": 3.3097, "step": 9215 }, { "epoch": 0.45, "grad_norm": 0.5296399593353271, "learning_rate": 0.0005670656532210346, "loss": 3.3367, "step": 9216 }, { "epoch": 0.45, "grad_norm": 0.4997063875198364, "learning_rate": 0.0005670586393949159, "loss": 3.4825, "step": 9217 }, { "epoch": 0.45, "grad_norm": 0.5573795437812805, "learning_rate": 0.000567051624865416, "loss": 3.2277, "step": 9218 }, { "epoch": 0.45, "grad_norm": 0.5867475271224976, "learning_rate": 0.0005670446096325536, "loss": 3.4567, "step": 9219 }, { "epoch": 0.45, "grad_norm": 0.5648623704910278, "learning_rate": 0.000567037593696347, "loss": 3.3481, "step": 9220 }, { "epoch": 0.45, "grad_norm": 0.5602989792823792, "learning_rate": 0.0005670305770568148, "loss": 3.4067, "step": 9221 }, { "epoch": 0.45, "grad_norm": 0.5342902541160583, "learning_rate": 0.0005670235597139755, "loss": 3.1211, "step": 9222 }, { "epoch": 0.45, "grad_norm": 0.5073335766792297, "learning_rate": 0.0005670165416678476, "loss": 3.3586, "step": 9223 }, { "epoch": 0.45, "grad_norm": 0.5396925806999207, "learning_rate": 0.0005670095229184494, "loss": 3.4748, "step": 9224 }, { "epoch": 0.45, "grad_norm": 0.5194887518882751, "learning_rate": 0.0005670025034657995, "loss": 3.4222, "step": 9225 }, { "epoch": 0.45, "grad_norm": 0.56588214635849, "learning_rate": 0.0005669954833099164, "loss": 3.3498, "step": 9226 }, { "epoch": 0.45, "grad_norm": 0.5211643576622009, "learning_rate": 0.0005669884624508186, "loss": 3.431, "step": 9227 }, { "epoch": 0.45, "grad_norm": 0.5741928219795227, "learning_rate": 0.0005669814408885246, "loss": 3.4504, "step": 9228 }, { "epoch": 0.45, "grad_norm": 0.5376482009887695, "learning_rate": 0.0005669744186230528, "loss": 3.3266, "step": 9229 }, { "epoch": 0.45, "grad_norm": 0.5111083388328552, "learning_rate": 0.0005669673956544219, "loss": 3.049, "step": 9230 }, { "epoch": 0.45, "grad_norm": 0.5390723943710327, "learning_rate": 0.0005669603719826501, "loss": 3.1554, "step": 9231 }, { "epoch": 0.45, "grad_norm": 0.5268939733505249, "learning_rate": 0.0005669533476077561, "loss": 3.4162, "step": 9232 }, { "epoch": 0.45, "grad_norm": 0.528060793876648, "learning_rate": 0.0005669463225297585, "loss": 3.2039, "step": 9233 }, { "epoch": 0.45, "grad_norm": 0.5250667333602905, "learning_rate": 0.0005669392967486755, "loss": 3.2844, "step": 9234 }, { "epoch": 0.45, "grad_norm": 0.557022213935852, "learning_rate": 0.0005669322702645257, "loss": 3.3297, "step": 9235 }, { "epoch": 0.45, "grad_norm": 0.5029047131538391, "learning_rate": 0.0005669252430773279, "loss": 3.2711, "step": 9236 }, { "epoch": 0.45, "grad_norm": 0.5264648199081421, "learning_rate": 0.0005669182151871003, "loss": 3.3115, "step": 9237 }, { "epoch": 0.45, "grad_norm": 0.5282554626464844, "learning_rate": 0.0005669111865938614, "loss": 3.3636, "step": 9238 }, { "epoch": 0.45, "grad_norm": 0.4783882200717926, "learning_rate": 0.0005669041572976299, "loss": 3.4196, "step": 9239 }, { "epoch": 0.45, "grad_norm": 0.5388532280921936, "learning_rate": 0.0005668971272984242, "loss": 3.3577, "step": 9240 }, { "epoch": 0.45, "grad_norm": 0.5190770030021667, "learning_rate": 0.0005668900965962627, "loss": 3.252, "step": 9241 }, { "epoch": 0.45, "grad_norm": 0.4901539087295532, "learning_rate": 0.0005668830651911642, "loss": 3.4138, "step": 9242 }, { "epoch": 0.45, "grad_norm": 0.5431410074234009, "learning_rate": 0.000566876033083147, "loss": 3.2324, "step": 9243 }, { "epoch": 0.45, "grad_norm": 0.5126324892044067, "learning_rate": 0.0005668690002722297, "loss": 3.2226, "step": 9244 }, { "epoch": 0.45, "grad_norm": 0.5156897306442261, "learning_rate": 0.0005668619667584309, "loss": 3.2097, "step": 9245 }, { "epoch": 0.45, "grad_norm": 0.49389657378196716, "learning_rate": 0.000566854932541769, "loss": 3.284, "step": 9246 }, { "epoch": 0.45, "grad_norm": 0.5490319728851318, "learning_rate": 0.0005668478976222624, "loss": 3.4974, "step": 9247 }, { "epoch": 0.45, "grad_norm": 0.5475131273269653, "learning_rate": 0.00056684086199993, "loss": 3.4208, "step": 9248 }, { "epoch": 0.45, "grad_norm": 0.5155820846557617, "learning_rate": 0.00056683382567479, "loss": 3.4763, "step": 9249 }, { "epoch": 0.45, "grad_norm": 0.5059611201286316, "learning_rate": 0.0005668267886468611, "loss": 3.223, "step": 9250 }, { "epoch": 0.45, "grad_norm": 0.5430670380592346, "learning_rate": 0.0005668197509161618, "loss": 3.1563, "step": 9251 }, { "epoch": 0.45, "grad_norm": 0.5147794485092163, "learning_rate": 0.0005668127124827106, "loss": 3.218, "step": 9252 }, { "epoch": 0.45, "grad_norm": 0.5046300888061523, "learning_rate": 0.0005668056733465262, "loss": 3.118, "step": 9253 }, { "epoch": 0.45, "grad_norm": 0.5530743598937988, "learning_rate": 0.0005667986335076269, "loss": 3.2957, "step": 9254 }, { "epoch": 0.45, "grad_norm": 0.5372881889343262, "learning_rate": 0.0005667915929660313, "loss": 3.3998, "step": 9255 }, { "epoch": 0.45, "grad_norm": 0.5331935286521912, "learning_rate": 0.000566784551721758, "loss": 3.4255, "step": 9256 }, { "epoch": 0.45, "grad_norm": 0.5113346576690674, "learning_rate": 0.0005667775097748255, "loss": 3.1996, "step": 9257 }, { "epoch": 0.45, "grad_norm": 0.4899602234363556, "learning_rate": 0.0005667704671252525, "loss": 3.484, "step": 9258 }, { "epoch": 0.45, "grad_norm": 0.535860538482666, "learning_rate": 0.0005667634237730573, "loss": 3.1899, "step": 9259 }, { "epoch": 0.45, "grad_norm": 0.5384031534194946, "learning_rate": 0.0005667563797182586, "loss": 3.3316, "step": 9260 }, { "epoch": 0.45, "grad_norm": 0.5427521467208862, "learning_rate": 0.0005667493349608751, "loss": 3.3506, "step": 9261 }, { "epoch": 0.45, "grad_norm": 0.543279230594635, "learning_rate": 0.000566742289500925, "loss": 3.2267, "step": 9262 }, { "epoch": 0.45, "grad_norm": 0.5359872579574585, "learning_rate": 0.0005667352433384272, "loss": 3.2173, "step": 9263 }, { "epoch": 0.45, "grad_norm": 0.5161938667297363, "learning_rate": 0.0005667281964734, "loss": 3.2177, "step": 9264 }, { "epoch": 0.45, "grad_norm": 0.5487397313117981, "learning_rate": 0.0005667211489058621, "loss": 3.3548, "step": 9265 }, { "epoch": 0.45, "grad_norm": 0.5496209263801575, "learning_rate": 0.0005667141006358321, "loss": 3.1292, "step": 9266 }, { "epoch": 0.45, "grad_norm": 0.5191566348075867, "learning_rate": 0.0005667070516633283, "loss": 3.31, "step": 9267 }, { "epoch": 0.45, "grad_norm": 0.5273240804672241, "learning_rate": 0.0005667000019883696, "loss": 2.9271, "step": 9268 }, { "epoch": 0.45, "grad_norm": 0.5322637557983398, "learning_rate": 0.0005666929516109744, "loss": 3.2176, "step": 9269 }, { "epoch": 0.45, "grad_norm": 0.5463858246803284, "learning_rate": 0.0005666859005311612, "loss": 3.1465, "step": 9270 }, { "epoch": 0.45, "grad_norm": 0.5420017838478088, "learning_rate": 0.0005666788487489488, "loss": 3.1499, "step": 9271 }, { "epoch": 0.45, "grad_norm": 0.5984858274459839, "learning_rate": 0.0005666717962643555, "loss": 3.293, "step": 9272 }, { "epoch": 0.45, "grad_norm": 0.5361172556877136, "learning_rate": 0.0005666647430774001, "loss": 3.3239, "step": 9273 }, { "epoch": 0.45, "grad_norm": 0.5419718623161316, "learning_rate": 0.000566657689188101, "loss": 3.3363, "step": 9274 }, { "epoch": 0.45, "grad_norm": 0.6194941997528076, "learning_rate": 0.000566650634596477, "loss": 3.2487, "step": 9275 }, { "epoch": 0.45, "grad_norm": 0.5028418302536011, "learning_rate": 0.0005666435793025465, "loss": 3.5507, "step": 9276 }, { "epoch": 0.45, "grad_norm": 0.5127885937690735, "learning_rate": 0.0005666365233063281, "loss": 3.4255, "step": 9277 }, { "epoch": 0.45, "grad_norm": 0.5219062566757202, "learning_rate": 0.0005666294666078404, "loss": 3.1941, "step": 9278 }, { "epoch": 0.45, "grad_norm": 0.515256404876709, "learning_rate": 0.0005666224092071019, "loss": 3.3369, "step": 9279 }, { "epoch": 0.45, "grad_norm": 0.5556721687316895, "learning_rate": 0.0005666153511041315, "loss": 3.3815, "step": 9280 }, { "epoch": 0.45, "grad_norm": 0.5763710737228394, "learning_rate": 0.0005666082922989475, "loss": 3.3639, "step": 9281 }, { "epoch": 0.45, "grad_norm": 0.5201305747032166, "learning_rate": 0.0005666012327915686, "loss": 3.2809, "step": 9282 }, { "epoch": 0.45, "grad_norm": 0.5183270573616028, "learning_rate": 0.0005665941725820133, "loss": 3.1675, "step": 9283 }, { "epoch": 0.45, "grad_norm": 0.533555805683136, "learning_rate": 0.0005665871116703003, "loss": 3.195, "step": 9284 }, { "epoch": 0.46, "grad_norm": 0.5322220325469971, "learning_rate": 0.000566580050056448, "loss": 3.3518, "step": 9285 }, { "epoch": 0.46, "grad_norm": 0.5173676013946533, "learning_rate": 0.0005665729877404753, "loss": 3.3363, "step": 9286 }, { "epoch": 0.46, "grad_norm": 0.5588550567626953, "learning_rate": 0.0005665659247224006, "loss": 3.3185, "step": 9287 }, { "epoch": 0.46, "grad_norm": 0.5288047790527344, "learning_rate": 0.0005665588610022426, "loss": 3.2531, "step": 9288 }, { "epoch": 0.46, "grad_norm": 0.4969140589237213, "learning_rate": 0.0005665517965800199, "loss": 3.3127, "step": 9289 }, { "epoch": 0.46, "grad_norm": 0.5432141423225403, "learning_rate": 0.000566544731455751, "loss": 3.082, "step": 9290 }, { "epoch": 0.46, "grad_norm": 0.5413832664489746, "learning_rate": 0.0005665376656294545, "loss": 3.3992, "step": 9291 }, { "epoch": 0.46, "grad_norm": 0.5215909481048584, "learning_rate": 0.0005665305991011492, "loss": 3.5054, "step": 9292 }, { "epoch": 0.46, "grad_norm": 0.5591144561767578, "learning_rate": 0.0005665235318708537, "loss": 3.3934, "step": 9293 }, { "epoch": 0.46, "grad_norm": 0.6027560234069824, "learning_rate": 0.0005665164639385863, "loss": 3.252, "step": 9294 }, { "epoch": 0.46, "grad_norm": 0.5260102152824402, "learning_rate": 0.0005665093953043658, "loss": 3.2683, "step": 9295 }, { "epoch": 0.46, "grad_norm": 0.525118887424469, "learning_rate": 0.0005665023259682111, "loss": 3.277, "step": 9296 }, { "epoch": 0.46, "grad_norm": 0.4957008957862854, "learning_rate": 0.0005664952559301403, "loss": 3.1469, "step": 9297 }, { "epoch": 0.46, "grad_norm": 0.5985009074211121, "learning_rate": 0.0005664881851901725, "loss": 3.195, "step": 9298 }, { "epoch": 0.46, "grad_norm": 0.5201311111450195, "learning_rate": 0.0005664811137483259, "loss": 3.4488, "step": 9299 }, { "epoch": 0.46, "grad_norm": 0.5100109577178955, "learning_rate": 0.0005664740416046195, "loss": 3.1562, "step": 9300 }, { "epoch": 0.46, "grad_norm": 0.49282416701316833, "learning_rate": 0.0005664669687590717, "loss": 3.3542, "step": 9301 }, { "epoch": 0.46, "grad_norm": 0.5214360356330872, "learning_rate": 0.0005664598952117012, "loss": 3.2439, "step": 9302 }, { "epoch": 0.46, "grad_norm": 0.5333632826805115, "learning_rate": 0.0005664528209625265, "loss": 3.2928, "step": 9303 }, { "epoch": 0.46, "grad_norm": 0.5121089816093445, "learning_rate": 0.0005664457460115665, "loss": 3.3875, "step": 9304 }, { "epoch": 0.46, "grad_norm": 0.48593372106552124, "learning_rate": 0.0005664386703588396, "loss": 3.3829, "step": 9305 }, { "epoch": 0.46, "grad_norm": 0.5539550185203552, "learning_rate": 0.0005664315940043645, "loss": 3.427, "step": 9306 }, { "epoch": 0.46, "grad_norm": 0.5157627463340759, "learning_rate": 0.00056642451694816, "loss": 3.3568, "step": 9307 }, { "epoch": 0.46, "grad_norm": 0.5772610902786255, "learning_rate": 0.0005664174391902444, "loss": 3.0817, "step": 9308 }, { "epoch": 0.46, "grad_norm": 0.5100268125534058, "learning_rate": 0.0005664103607306367, "loss": 3.1593, "step": 9309 }, { "epoch": 0.46, "grad_norm": 0.5215173363685608, "learning_rate": 0.0005664032815693553, "loss": 3.1953, "step": 9310 }, { "epoch": 0.46, "grad_norm": 0.5628623962402344, "learning_rate": 0.000566396201706419, "loss": 3.1241, "step": 9311 }, { "epoch": 0.46, "grad_norm": 0.5145275592803955, "learning_rate": 0.0005663891211418463, "loss": 3.2781, "step": 9312 }, { "epoch": 0.46, "grad_norm": 0.545166015625, "learning_rate": 0.0005663820398756559, "loss": 3.3879, "step": 9313 }, { "epoch": 0.46, "grad_norm": 0.5119915008544922, "learning_rate": 0.0005663749579078665, "loss": 3.0349, "step": 9314 }, { "epoch": 0.46, "grad_norm": 0.5298326015472412, "learning_rate": 0.0005663678752384968, "loss": 3.3834, "step": 9315 }, { "epoch": 0.46, "grad_norm": 0.5123884081840515, "learning_rate": 0.0005663607918675654, "loss": 3.3531, "step": 9316 }, { "epoch": 0.46, "grad_norm": 0.5344944000244141, "learning_rate": 0.0005663537077950908, "loss": 3.4021, "step": 9317 }, { "epoch": 0.46, "grad_norm": 0.5218607187271118, "learning_rate": 0.0005663466230210919, "loss": 3.1947, "step": 9318 }, { "epoch": 0.46, "grad_norm": 0.5089520215988159, "learning_rate": 0.0005663395375455872, "loss": 3.3224, "step": 9319 }, { "epoch": 0.46, "grad_norm": 0.5154445171356201, "learning_rate": 0.0005663324513685954, "loss": 3.4781, "step": 9320 }, { "epoch": 0.46, "grad_norm": 0.4960334002971649, "learning_rate": 0.0005663253644901351, "loss": 3.316, "step": 9321 }, { "epoch": 0.46, "grad_norm": 0.5327979326248169, "learning_rate": 0.0005663182769102252, "loss": 3.3396, "step": 9322 }, { "epoch": 0.46, "grad_norm": 0.5505029559135437, "learning_rate": 0.0005663111886288842, "loss": 3.2301, "step": 9323 }, { "epoch": 0.46, "grad_norm": 0.5501486659049988, "learning_rate": 0.0005663040996461308, "loss": 3.4768, "step": 9324 }, { "epoch": 0.46, "grad_norm": 0.5302287936210632, "learning_rate": 0.0005662970099619835, "loss": 3.1638, "step": 9325 }, { "epoch": 0.46, "grad_norm": 0.5448341965675354, "learning_rate": 0.0005662899195764612, "loss": 3.17, "step": 9326 }, { "epoch": 0.46, "grad_norm": 0.5440930724143982, "learning_rate": 0.0005662828284895826, "loss": 3.4745, "step": 9327 }, { "epoch": 0.46, "grad_norm": 0.5151364803314209, "learning_rate": 0.0005662757367013662, "loss": 3.1387, "step": 9328 }, { "epoch": 0.46, "grad_norm": 0.4982331395149231, "learning_rate": 0.0005662686442118308, "loss": 3.285, "step": 9329 }, { "epoch": 0.46, "grad_norm": 0.5508311986923218, "learning_rate": 0.0005662615510209949, "loss": 3.2912, "step": 9330 }, { "epoch": 0.46, "grad_norm": 0.4659712016582489, "learning_rate": 0.0005662544571288775, "loss": 3.4545, "step": 9331 }, { "epoch": 0.46, "grad_norm": 0.5672176480293274, "learning_rate": 0.0005662473625354969, "loss": 3.2206, "step": 9332 }, { "epoch": 0.46, "grad_norm": 0.5322652459144592, "learning_rate": 0.0005662402672408722, "loss": 3.2292, "step": 9333 }, { "epoch": 0.46, "grad_norm": 0.5741870403289795, "learning_rate": 0.0005662331712450216, "loss": 3.2488, "step": 9334 }, { "epoch": 0.46, "grad_norm": 0.5742843151092529, "learning_rate": 0.0005662260745479643, "loss": 3.36, "step": 9335 }, { "epoch": 0.46, "grad_norm": 0.5701401233673096, "learning_rate": 0.0005662189771497187, "loss": 3.166, "step": 9336 }, { "epoch": 0.46, "grad_norm": 0.5188326239585876, "learning_rate": 0.0005662118790503035, "loss": 3.4782, "step": 9337 }, { "epoch": 0.46, "grad_norm": 0.5277048945426941, "learning_rate": 0.0005662047802497376, "loss": 3.4702, "step": 9338 }, { "epoch": 0.46, "grad_norm": 0.5392853021621704, "learning_rate": 0.0005661976807480394, "loss": 3.5907, "step": 9339 }, { "epoch": 0.46, "grad_norm": 0.5241400599479675, "learning_rate": 0.0005661905805452277, "loss": 3.5178, "step": 9340 }, { "epoch": 0.46, "grad_norm": 0.5726556181907654, "learning_rate": 0.0005661834796413214, "loss": 3.3445, "step": 9341 }, { "epoch": 0.46, "grad_norm": 0.5296681523323059, "learning_rate": 0.0005661763780363389, "loss": 3.3437, "step": 9342 }, { "epoch": 0.46, "grad_norm": 0.5572839379310608, "learning_rate": 0.0005661692757302991, "loss": 3.4079, "step": 9343 }, { "epoch": 0.46, "grad_norm": 0.5110607147216797, "learning_rate": 0.0005661621727232206, "loss": 3.2675, "step": 9344 }, { "epoch": 0.46, "grad_norm": 0.4931510388851166, "learning_rate": 0.0005661550690151222, "loss": 3.3693, "step": 9345 }, { "epoch": 0.46, "grad_norm": 0.5317081212997437, "learning_rate": 0.0005661479646060227, "loss": 3.2439, "step": 9346 }, { "epoch": 0.46, "grad_norm": 0.5172111392021179, "learning_rate": 0.0005661408594959405, "loss": 3.3955, "step": 9347 }, { "epoch": 0.46, "grad_norm": 0.567136824131012, "learning_rate": 0.0005661337536848946, "loss": 3.4301, "step": 9348 }, { "epoch": 0.46, "grad_norm": 0.5417160987854004, "learning_rate": 0.0005661266471729035, "loss": 3.268, "step": 9349 }, { "epoch": 0.46, "grad_norm": 0.5237059593200684, "learning_rate": 0.0005661195399599861, "loss": 3.0861, "step": 9350 }, { "epoch": 0.46, "grad_norm": 0.5109018087387085, "learning_rate": 0.0005661124320461611, "loss": 3.3038, "step": 9351 }, { "epoch": 0.46, "grad_norm": 0.4800640940666199, "learning_rate": 0.0005661053234314471, "loss": 3.2938, "step": 9352 }, { "epoch": 0.46, "grad_norm": 0.5349900126457214, "learning_rate": 0.000566098214115863, "loss": 3.1207, "step": 9353 }, { "epoch": 0.46, "grad_norm": 0.49598759412765503, "learning_rate": 0.0005660911040994272, "loss": 3.2266, "step": 9354 }, { "epoch": 0.46, "grad_norm": 0.5334964394569397, "learning_rate": 0.0005660839933821588, "loss": 3.2708, "step": 9355 }, { "epoch": 0.46, "grad_norm": 0.6042339205741882, "learning_rate": 0.0005660768819640764, "loss": 3.1651, "step": 9356 }, { "epoch": 0.46, "grad_norm": 0.5126491785049438, "learning_rate": 0.0005660697698451985, "loss": 3.1133, "step": 9357 }, { "epoch": 0.46, "grad_norm": 0.5190067291259766, "learning_rate": 0.0005660626570255442, "loss": 3.5437, "step": 9358 }, { "epoch": 0.46, "grad_norm": 0.5068770051002502, "learning_rate": 0.0005660555435051321, "loss": 3.4367, "step": 9359 }, { "epoch": 0.46, "grad_norm": 0.5406444668769836, "learning_rate": 0.0005660484292839807, "loss": 3.3291, "step": 9360 }, { "epoch": 0.46, "grad_norm": 0.46386462450027466, "learning_rate": 0.0005660413143621091, "loss": 3.2917, "step": 9361 }, { "epoch": 0.46, "grad_norm": 0.5372816920280457, "learning_rate": 0.000566034198739536, "loss": 3.2199, "step": 9362 }, { "epoch": 0.46, "grad_norm": 0.5300079584121704, "learning_rate": 0.0005660270824162798, "loss": 3.2802, "step": 9363 }, { "epoch": 0.46, "grad_norm": 0.5637562274932861, "learning_rate": 0.0005660199653923594, "loss": 3.2477, "step": 9364 }, { "epoch": 0.46, "grad_norm": 0.5250990390777588, "learning_rate": 0.0005660128476677939, "loss": 3.078, "step": 9365 }, { "epoch": 0.46, "grad_norm": 0.5225361585617065, "learning_rate": 0.0005660057292426016, "loss": 3.4664, "step": 9366 }, { "epoch": 0.46, "grad_norm": 0.4957283139228821, "learning_rate": 0.0005659986101168013, "loss": 3.3938, "step": 9367 }, { "epoch": 0.46, "grad_norm": 0.5098510384559631, "learning_rate": 0.0005659914902904121, "loss": 3.2841, "step": 9368 }, { "epoch": 0.46, "grad_norm": 0.5317561030387878, "learning_rate": 0.0005659843697634522, "loss": 3.2659, "step": 9369 }, { "epoch": 0.46, "grad_norm": 0.4984907805919647, "learning_rate": 0.0005659772485359409, "loss": 3.1747, "step": 9370 }, { "epoch": 0.46, "grad_norm": 0.5244981646537781, "learning_rate": 0.0005659701266078966, "loss": 3.2435, "step": 9371 }, { "epoch": 0.46, "grad_norm": 0.5228347182273865, "learning_rate": 0.0005659630039793383, "loss": 3.2482, "step": 9372 }, { "epoch": 0.46, "grad_norm": 0.5450955629348755, "learning_rate": 0.0005659558806502845, "loss": 3.3684, "step": 9373 }, { "epoch": 0.46, "grad_norm": 0.5306031107902527, "learning_rate": 0.0005659487566207542, "loss": 2.9565, "step": 9374 }, { "epoch": 0.46, "grad_norm": 0.5437656044960022, "learning_rate": 0.000565941631890766, "loss": 3.2759, "step": 9375 }, { "epoch": 0.46, "grad_norm": 0.5219473838806152, "learning_rate": 0.0005659345064603387, "loss": 3.3767, "step": 9376 }, { "epoch": 0.46, "grad_norm": 0.5270979404449463, "learning_rate": 0.000565927380329491, "loss": 3.4866, "step": 9377 }, { "epoch": 0.46, "grad_norm": 0.5120222568511963, "learning_rate": 0.0005659202534982419, "loss": 3.1391, "step": 9378 }, { "epoch": 0.46, "grad_norm": 0.520464301109314, "learning_rate": 0.00056591312596661, "loss": 3.2149, "step": 9379 }, { "epoch": 0.46, "grad_norm": 0.5516645908355713, "learning_rate": 0.0005659059977346141, "loss": 3.2735, "step": 9380 }, { "epoch": 0.46, "grad_norm": 0.519219696521759, "learning_rate": 0.000565898868802273, "loss": 3.242, "step": 9381 }, { "epoch": 0.46, "grad_norm": 0.5575689077377319, "learning_rate": 0.0005658917391696054, "loss": 3.366, "step": 9382 }, { "epoch": 0.46, "grad_norm": 0.5153242349624634, "learning_rate": 0.0005658846088366302, "loss": 3.2188, "step": 9383 }, { "epoch": 0.46, "grad_norm": 0.5371017456054688, "learning_rate": 0.000565877477803366, "loss": 3.3019, "step": 9384 }, { "epoch": 0.46, "grad_norm": 0.57710200548172, "learning_rate": 0.0005658703460698318, "loss": 3.4646, "step": 9385 }, { "epoch": 0.46, "grad_norm": 0.5319957137107849, "learning_rate": 0.0005658632136360461, "loss": 3.1126, "step": 9386 }, { "epoch": 0.46, "grad_norm": 0.5224001407623291, "learning_rate": 0.0005658560805020281, "loss": 3.384, "step": 9387 }, { "epoch": 0.46, "grad_norm": 0.5222063660621643, "learning_rate": 0.0005658489466677963, "loss": 3.3125, "step": 9388 }, { "epoch": 0.46, "grad_norm": 0.5301948189735413, "learning_rate": 0.0005658418121333694, "loss": 3.3855, "step": 9389 }, { "epoch": 0.46, "grad_norm": 0.506697952747345, "learning_rate": 0.0005658346768987664, "loss": 3.4046, "step": 9390 }, { "epoch": 0.46, "grad_norm": 0.5180991291999817, "learning_rate": 0.0005658275409640062, "loss": 3.2995, "step": 9391 }, { "epoch": 0.46, "grad_norm": 0.5402610898017883, "learning_rate": 0.0005658204043291072, "loss": 3.412, "step": 9392 }, { "epoch": 0.46, "grad_norm": 0.5179954171180725, "learning_rate": 0.0005658132669940885, "loss": 3.4606, "step": 9393 }, { "epoch": 0.46, "grad_norm": 0.5407754778862, "learning_rate": 0.0005658061289589687, "loss": 3.0892, "step": 9394 }, { "epoch": 0.46, "grad_norm": 0.5019816756248474, "learning_rate": 0.0005657989902237669, "loss": 3.2621, "step": 9395 }, { "epoch": 0.46, "grad_norm": 0.5457536578178406, "learning_rate": 0.0005657918507885016, "loss": 3.1853, "step": 9396 }, { "epoch": 0.46, "grad_norm": 0.5361157059669495, "learning_rate": 0.0005657847106531916, "loss": 3.1843, "step": 9397 }, { "epoch": 0.46, "grad_norm": 0.5209142565727234, "learning_rate": 0.000565777569817856, "loss": 3.4549, "step": 9398 }, { "epoch": 0.46, "grad_norm": 0.49910151958465576, "learning_rate": 0.0005657704282825133, "loss": 3.4455, "step": 9399 }, { "epoch": 0.46, "grad_norm": 0.5092660188674927, "learning_rate": 0.0005657632860471826, "loss": 3.1643, "step": 9400 }, { "epoch": 0.46, "grad_norm": 0.5319451093673706, "learning_rate": 0.0005657561431118824, "loss": 3.1362, "step": 9401 }, { "epoch": 0.46, "grad_norm": 0.5203292369842529, "learning_rate": 0.0005657489994766318, "loss": 3.3171, "step": 9402 }, { "epoch": 0.46, "grad_norm": 0.5382462739944458, "learning_rate": 0.0005657418551414494, "loss": 3.3471, "step": 9403 }, { "epoch": 0.46, "grad_norm": 0.5401602387428284, "learning_rate": 0.0005657347101063541, "loss": 3.415, "step": 9404 }, { "epoch": 0.46, "grad_norm": 0.5223503112792969, "learning_rate": 0.0005657275643713648, "loss": 3.2212, "step": 9405 }, { "epoch": 0.46, "grad_norm": 0.4994029998779297, "learning_rate": 0.0005657204179365001, "loss": 3.1509, "step": 9406 }, { "epoch": 0.46, "grad_norm": 0.5694056749343872, "learning_rate": 0.000565713270801779, "loss": 3.3337, "step": 9407 }, { "epoch": 0.46, "grad_norm": 0.5182304978370667, "learning_rate": 0.0005657061229672203, "loss": 3.4504, "step": 9408 }, { "epoch": 0.46, "grad_norm": 0.5066438317298889, "learning_rate": 0.0005656989744328428, "loss": 2.98, "step": 9409 }, { "epoch": 0.46, "grad_norm": 0.5442813634872437, "learning_rate": 0.0005656918251986654, "loss": 3.2891, "step": 9410 }, { "epoch": 0.46, "grad_norm": 0.5112155675888062, "learning_rate": 0.0005656846752647068, "loss": 3.4086, "step": 9411 }, { "epoch": 0.46, "grad_norm": 0.4860506057739258, "learning_rate": 0.0005656775246309859, "loss": 3.1496, "step": 9412 }, { "epoch": 0.46, "grad_norm": 0.5639499425888062, "learning_rate": 0.0005656703732975215, "loss": 3.4534, "step": 9413 }, { "epoch": 0.46, "grad_norm": 0.49762552976608276, "learning_rate": 0.0005656632212643326, "loss": 3.0931, "step": 9414 }, { "epoch": 0.46, "grad_norm": 0.5720370411872864, "learning_rate": 0.0005656560685314378, "loss": 3.3915, "step": 9415 }, { "epoch": 0.46, "grad_norm": 0.5175341963768005, "learning_rate": 0.000565648915098856, "loss": 3.3709, "step": 9416 }, { "epoch": 0.46, "grad_norm": 0.5201534032821655, "learning_rate": 0.0005656417609666061, "loss": 3.2131, "step": 9417 }, { "epoch": 0.46, "grad_norm": 0.5602389574050903, "learning_rate": 0.000565634606134707, "loss": 3.4146, "step": 9418 }, { "epoch": 0.46, "grad_norm": 0.5673112869262695, "learning_rate": 0.0005656274506031775, "loss": 3.29, "step": 9419 }, { "epoch": 0.46, "grad_norm": 0.5292515754699707, "learning_rate": 0.0005656202943720363, "loss": 3.2578, "step": 9420 }, { "epoch": 0.46, "grad_norm": 0.4988608658313751, "learning_rate": 0.0005656131374413024, "loss": 3.1482, "step": 9421 }, { "epoch": 0.46, "grad_norm": 0.5025928020477295, "learning_rate": 0.0005656059798109947, "loss": 3.3671, "step": 9422 }, { "epoch": 0.46, "grad_norm": 0.5336331725120544, "learning_rate": 0.0005655988214811318, "loss": 3.1554, "step": 9423 }, { "epoch": 0.46, "grad_norm": 0.4792225956916809, "learning_rate": 0.0005655916624517328, "loss": 3.3616, "step": 9424 }, { "epoch": 0.46, "grad_norm": 0.5044949650764465, "learning_rate": 0.0005655845027228164, "loss": 3.2776, "step": 9425 }, { "epoch": 0.46, "grad_norm": 0.519375205039978, "learning_rate": 0.0005655773422944017, "loss": 3.4197, "step": 9426 }, { "epoch": 0.46, "grad_norm": 0.5419018268585205, "learning_rate": 0.0005655701811665073, "loss": 3.1409, "step": 9427 }, { "epoch": 0.46, "grad_norm": 0.5148293972015381, "learning_rate": 0.0005655630193391522, "loss": 3.2837, "step": 9428 }, { "epoch": 0.46, "grad_norm": 0.522705078125, "learning_rate": 0.0005655558568123551, "loss": 3.2122, "step": 9429 }, { "epoch": 0.46, "grad_norm": 0.4861201345920563, "learning_rate": 0.0005655486935861352, "loss": 3.1855, "step": 9430 }, { "epoch": 0.46, "grad_norm": 0.4865737557411194, "learning_rate": 0.0005655415296605109, "loss": 3.1497, "step": 9431 }, { "epoch": 0.46, "grad_norm": 0.5147886276245117, "learning_rate": 0.0005655343650355014, "loss": 3.5226, "step": 9432 }, { "epoch": 0.46, "grad_norm": 0.5273894667625427, "learning_rate": 0.0005655271997111256, "loss": 3.3109, "step": 9433 }, { "epoch": 0.46, "grad_norm": 0.5883224010467529, "learning_rate": 0.0005655200336874021, "loss": 3.2307, "step": 9434 }, { "epoch": 0.46, "grad_norm": 0.535631537437439, "learning_rate": 0.00056551286696435, "loss": 3.1471, "step": 9435 }, { "epoch": 0.46, "grad_norm": 0.5643936991691589, "learning_rate": 0.0005655056995419881, "loss": 3.1523, "step": 9436 }, { "epoch": 0.46, "grad_norm": 0.5332732200622559, "learning_rate": 0.0005654985314203354, "loss": 3.3777, "step": 9437 }, { "epoch": 0.46, "grad_norm": 0.4967159628868103, "learning_rate": 0.0005654913625994105, "loss": 3.5025, "step": 9438 }, { "epoch": 0.46, "grad_norm": 0.5050669312477112, "learning_rate": 0.0005654841930792325, "loss": 3.2085, "step": 9439 }, { "epoch": 0.46, "grad_norm": 0.5131103992462158, "learning_rate": 0.0005654770228598202, "loss": 3.3931, "step": 9440 }, { "epoch": 0.46, "grad_norm": 0.5723204612731934, "learning_rate": 0.0005654698519411925, "loss": 3.2395, "step": 9441 }, { "epoch": 0.46, "grad_norm": 0.521449089050293, "learning_rate": 0.0005654626803233684, "loss": 3.2252, "step": 9442 }, { "epoch": 0.46, "grad_norm": 0.559038519859314, "learning_rate": 0.0005654555080063665, "loss": 3.2708, "step": 9443 }, { "epoch": 0.46, "grad_norm": 0.5154927968978882, "learning_rate": 0.000565448334990206, "loss": 3.03, "step": 9444 }, { "epoch": 0.46, "grad_norm": 0.6243354678153992, "learning_rate": 0.0005654411612749058, "loss": 3.3827, "step": 9445 }, { "epoch": 0.46, "grad_norm": 0.6051532030105591, "learning_rate": 0.0005654339868604846, "loss": 3.2296, "step": 9446 }, { "epoch": 0.46, "grad_norm": 0.5143436789512634, "learning_rate": 0.0005654268117469613, "loss": 3.2727, "step": 9447 }, { "epoch": 0.46, "grad_norm": 0.485236257314682, "learning_rate": 0.0005654196359343548, "loss": 3.2742, "step": 9448 }, { "epoch": 0.46, "grad_norm": 0.536080539226532, "learning_rate": 0.0005654124594226841, "loss": 3.2754, "step": 9449 }, { "epoch": 0.46, "grad_norm": 0.648050844669342, "learning_rate": 0.0005654052822119681, "loss": 3.0665, "step": 9450 }, { "epoch": 0.46, "grad_norm": 0.5583487749099731, "learning_rate": 0.0005653981043022257, "loss": 3.2576, "step": 9451 }, { "epoch": 0.46, "grad_norm": 0.5086358785629272, "learning_rate": 0.0005653909256934757, "loss": 3.3566, "step": 9452 }, { "epoch": 0.46, "grad_norm": 0.5181884765625, "learning_rate": 0.0005653837463857371, "loss": 3.1578, "step": 9453 }, { "epoch": 0.46, "grad_norm": 0.4989009499549866, "learning_rate": 0.0005653765663790288, "loss": 2.9139, "step": 9454 }, { "epoch": 0.46, "grad_norm": 0.5821312069892883, "learning_rate": 0.0005653693856733698, "loss": 3.2885, "step": 9455 }, { "epoch": 0.46, "grad_norm": 0.5479455590248108, "learning_rate": 0.0005653622042687788, "loss": 3.0113, "step": 9456 }, { "epoch": 0.46, "grad_norm": 0.5062962770462036, "learning_rate": 0.0005653550221652747, "loss": 3.2562, "step": 9457 }, { "epoch": 0.46, "grad_norm": 0.5756667256355286, "learning_rate": 0.0005653478393628767, "loss": 3.1515, "step": 9458 }, { "epoch": 0.46, "grad_norm": 0.5819968581199646, "learning_rate": 0.0005653406558616034, "loss": 3.3712, "step": 9459 }, { "epoch": 0.46, "grad_norm": 0.53989577293396, "learning_rate": 0.0005653334716614741, "loss": 3.1972, "step": 9460 }, { "epoch": 0.46, "grad_norm": 0.5572423934936523, "learning_rate": 0.0005653262867625074, "loss": 3.3386, "step": 9461 }, { "epoch": 0.46, "grad_norm": 0.5422435998916626, "learning_rate": 0.0005653191011647223, "loss": 3.1865, "step": 9462 }, { "epoch": 0.46, "grad_norm": 0.5119712352752686, "learning_rate": 0.0005653119148681378, "loss": 3.346, "step": 9463 }, { "epoch": 0.46, "grad_norm": 0.5083188414573669, "learning_rate": 0.0005653047278727728, "loss": 3.3056, "step": 9464 }, { "epoch": 0.46, "grad_norm": 0.5463467836380005, "learning_rate": 0.0005652975401786461, "loss": 3.3122, "step": 9465 }, { "epoch": 0.46, "grad_norm": 0.5052841305732727, "learning_rate": 0.0005652903517857768, "loss": 3.3023, "step": 9466 }, { "epoch": 0.46, "grad_norm": 0.5321139097213745, "learning_rate": 0.0005652831626941838, "loss": 3.2202, "step": 9467 }, { "epoch": 0.46, "grad_norm": 0.5242906808853149, "learning_rate": 0.0005652759729038859, "loss": 3.1611, "step": 9468 }, { "epoch": 0.46, "grad_norm": 0.5150046944618225, "learning_rate": 0.0005652687824149022, "loss": 3.3922, "step": 9469 }, { "epoch": 0.46, "grad_norm": 0.5529156923294067, "learning_rate": 0.0005652615912272516, "loss": 3.2372, "step": 9470 }, { "epoch": 0.46, "grad_norm": 0.4883171021938324, "learning_rate": 0.000565254399340953, "loss": 3.1305, "step": 9471 }, { "epoch": 0.46, "grad_norm": 0.5113022923469543, "learning_rate": 0.0005652472067560254, "loss": 3.4073, "step": 9472 }, { "epoch": 0.46, "grad_norm": 0.5019404292106628, "learning_rate": 0.0005652400134724877, "loss": 3.2087, "step": 9473 }, { "epoch": 0.46, "grad_norm": 0.5218349695205688, "learning_rate": 0.0005652328194903587, "loss": 3.2981, "step": 9474 }, { "epoch": 0.46, "grad_norm": 0.6199938058853149, "learning_rate": 0.0005652256248096577, "loss": 3.3046, "step": 9475 }, { "epoch": 0.46, "grad_norm": 0.5042975544929504, "learning_rate": 0.0005652184294304034, "loss": 3.3814, "step": 9476 }, { "epoch": 0.46, "grad_norm": 0.5005037784576416, "learning_rate": 0.0005652112333526147, "loss": 3.316, "step": 9477 }, { "epoch": 0.46, "grad_norm": 0.507548451423645, "learning_rate": 0.0005652040365763107, "loss": 3.2449, "step": 9478 }, { "epoch": 0.46, "grad_norm": 0.5326326489448547, "learning_rate": 0.0005651968391015104, "loss": 3.333, "step": 9479 }, { "epoch": 0.46, "grad_norm": 0.5457335114479065, "learning_rate": 0.0005651896409282326, "loss": 3.2439, "step": 9480 }, { "epoch": 0.46, "grad_norm": 0.5438616275787354, "learning_rate": 0.0005651824420564962, "loss": 3.0847, "step": 9481 }, { "epoch": 0.46, "grad_norm": 0.5413506627082825, "learning_rate": 0.0005651752424863205, "loss": 3.3258, "step": 9482 }, { "epoch": 0.46, "grad_norm": 0.5376286506652832, "learning_rate": 0.0005651680422177241, "loss": 3.0409, "step": 9483 }, { "epoch": 0.46, "grad_norm": 0.5599796772003174, "learning_rate": 0.0005651608412507262, "loss": 3.2176, "step": 9484 }, { "epoch": 0.46, "grad_norm": 0.5151841044425964, "learning_rate": 0.0005651536395853456, "loss": 3.1449, "step": 9485 }, { "epoch": 0.46, "grad_norm": 0.5488869547843933, "learning_rate": 0.0005651464372216015, "loss": 3.2857, "step": 9486 }, { "epoch": 0.46, "grad_norm": 0.5191092491149902, "learning_rate": 0.0005651392341595125, "loss": 3.3297, "step": 9487 }, { "epoch": 0.46, "grad_norm": 0.5657668709754944, "learning_rate": 0.0005651320303990978, "loss": 3.1369, "step": 9488 }, { "epoch": 0.47, "grad_norm": 0.5233826637268066, "learning_rate": 0.0005651248259403765, "loss": 3.2528, "step": 9489 }, { "epoch": 0.47, "grad_norm": 0.49316510558128357, "learning_rate": 0.0005651176207833673, "loss": 3.2797, "step": 9490 }, { "epoch": 0.47, "grad_norm": 0.5391998291015625, "learning_rate": 0.0005651104149280894, "loss": 3.2407, "step": 9491 }, { "epoch": 0.47, "grad_norm": 0.7097063660621643, "learning_rate": 0.0005651032083745616, "loss": 3.3675, "step": 9492 }, { "epoch": 0.47, "grad_norm": 0.5312466621398926, "learning_rate": 0.0005650960011228031, "loss": 3.2063, "step": 9493 }, { "epoch": 0.47, "grad_norm": 0.5219338536262512, "learning_rate": 0.0005650887931728326, "loss": 3.3838, "step": 9494 }, { "epoch": 0.47, "grad_norm": 0.5633711218833923, "learning_rate": 0.0005650815845246694, "loss": 3.084, "step": 9495 }, { "epoch": 0.47, "grad_norm": 0.5363718867301941, "learning_rate": 0.0005650743751783321, "loss": 3.2394, "step": 9496 }, { "epoch": 0.47, "grad_norm": 0.5066879987716675, "learning_rate": 0.0005650671651338401, "loss": 3.3002, "step": 9497 }, { "epoch": 0.47, "grad_norm": 0.5164818167686462, "learning_rate": 0.0005650599543912121, "loss": 2.9735, "step": 9498 }, { "epoch": 0.47, "grad_norm": 0.5876210331916809, "learning_rate": 0.0005650527429504673, "loss": 3.4418, "step": 9499 }, { "epoch": 0.47, "grad_norm": 0.4980337917804718, "learning_rate": 0.0005650455308116245, "loss": 3.3927, "step": 9500 }, { "epoch": 0.47, "grad_norm": 0.5478733777999878, "learning_rate": 0.0005650383179747028, "loss": 3.2894, "step": 9501 }, { "epoch": 0.47, "grad_norm": 0.5227766036987305, "learning_rate": 0.0005650311044397212, "loss": 3.3741, "step": 9502 }, { "epoch": 0.47, "grad_norm": 0.4926789700984955, "learning_rate": 0.0005650238902066987, "loss": 3.3118, "step": 9503 }, { "epoch": 0.47, "grad_norm": 0.5030087828636169, "learning_rate": 0.0005650166752756542, "loss": 3.4107, "step": 9504 }, { "epoch": 0.47, "grad_norm": 0.5236660838127136, "learning_rate": 0.0005650094596466068, "loss": 3.2949, "step": 9505 }, { "epoch": 0.47, "grad_norm": 0.5276412963867188, "learning_rate": 0.0005650022433195755, "loss": 3.2227, "step": 9506 }, { "epoch": 0.47, "grad_norm": 0.5344411730766296, "learning_rate": 0.0005649950262945794, "loss": 3.1652, "step": 9507 }, { "epoch": 0.47, "grad_norm": 0.5086687207221985, "learning_rate": 0.0005649878085716372, "loss": 3.1716, "step": 9508 }, { "epoch": 0.47, "grad_norm": 0.5827347636222839, "learning_rate": 0.0005649805901507682, "loss": 3.265, "step": 9509 }, { "epoch": 0.47, "grad_norm": 0.5286895036697388, "learning_rate": 0.0005649733710319913, "loss": 3.3054, "step": 9510 }, { "epoch": 0.47, "grad_norm": 0.5271221399307251, "learning_rate": 0.0005649661512153256, "loss": 3.2053, "step": 9511 }, { "epoch": 0.47, "grad_norm": 0.4985242784023285, "learning_rate": 0.00056495893070079, "loss": 3.1904, "step": 9512 }, { "epoch": 0.47, "grad_norm": 0.5197968482971191, "learning_rate": 0.0005649517094884036, "loss": 3.2541, "step": 9513 }, { "epoch": 0.47, "grad_norm": 0.5603257417678833, "learning_rate": 0.0005649444875781853, "loss": 3.3489, "step": 9514 }, { "epoch": 0.47, "grad_norm": 0.5142351388931274, "learning_rate": 0.0005649372649701544, "loss": 3.2883, "step": 9515 }, { "epoch": 0.47, "grad_norm": 0.6001418232917786, "learning_rate": 0.0005649300416643296, "loss": 3.1696, "step": 9516 }, { "epoch": 0.47, "grad_norm": 0.5239596962928772, "learning_rate": 0.0005649228176607301, "loss": 3.3235, "step": 9517 }, { "epoch": 0.47, "grad_norm": 0.48557403683662415, "learning_rate": 0.0005649155929593748, "loss": 3.1674, "step": 9518 }, { "epoch": 0.47, "grad_norm": 0.5428034663200378, "learning_rate": 0.0005649083675602829, "loss": 3.1312, "step": 9519 }, { "epoch": 0.47, "grad_norm": 0.5654087662696838, "learning_rate": 0.0005649011414634733, "loss": 3.5056, "step": 9520 }, { "epoch": 0.47, "grad_norm": 0.4970563054084778, "learning_rate": 0.000564893914668965, "loss": 3.3086, "step": 9521 }, { "epoch": 0.47, "grad_norm": 0.5339492559432983, "learning_rate": 0.0005648866871767772, "loss": 3.2905, "step": 9522 }, { "epoch": 0.47, "grad_norm": 0.5164199471473694, "learning_rate": 0.0005648794589869289, "loss": 3.1285, "step": 9523 }, { "epoch": 0.47, "grad_norm": 0.476354718208313, "learning_rate": 0.000564872230099439, "loss": 3.2268, "step": 9524 }, { "epoch": 0.47, "grad_norm": 0.5380869507789612, "learning_rate": 0.0005648650005143267, "loss": 3.0886, "step": 9525 }, { "epoch": 0.47, "grad_norm": 0.5686689019203186, "learning_rate": 0.0005648577702316108, "loss": 3.353, "step": 9526 }, { "epoch": 0.47, "grad_norm": 0.5547523498535156, "learning_rate": 0.0005648505392513107, "loss": 3.2541, "step": 9527 }, { "epoch": 0.47, "grad_norm": 0.5288465619087219, "learning_rate": 0.0005648433075734451, "loss": 3.3771, "step": 9528 }, { "epoch": 0.47, "grad_norm": 0.5291219353675842, "learning_rate": 0.0005648360751980332, "loss": 3.2426, "step": 9529 }, { "epoch": 0.47, "grad_norm": 0.5199901461601257, "learning_rate": 0.0005648288421250942, "loss": 3.245, "step": 9530 }, { "epoch": 0.47, "grad_norm": 0.5697982311248779, "learning_rate": 0.0005648216083546469, "loss": 3.3015, "step": 9531 }, { "epoch": 0.47, "grad_norm": 0.5655345916748047, "learning_rate": 0.0005648143738867104, "loss": 3.3584, "step": 9532 }, { "epoch": 0.47, "grad_norm": 0.5142018795013428, "learning_rate": 0.0005648071387213039, "loss": 3.2386, "step": 9533 }, { "epoch": 0.47, "grad_norm": 0.5273463129997253, "learning_rate": 0.0005647999028584463, "loss": 3.211, "step": 9534 }, { "epoch": 0.47, "grad_norm": 0.58909672498703, "learning_rate": 0.0005647926662981568, "loss": 3.4849, "step": 9535 }, { "epoch": 0.47, "grad_norm": 0.7461726665496826, "learning_rate": 0.0005647854290404543, "loss": 3.036, "step": 9536 }, { "epoch": 0.47, "grad_norm": 0.5585423707962036, "learning_rate": 0.0005647781910853579, "loss": 3.2072, "step": 9537 }, { "epoch": 0.47, "grad_norm": 0.5416821241378784, "learning_rate": 0.0005647709524328867, "loss": 3.3082, "step": 9538 }, { "epoch": 0.47, "grad_norm": 0.5571542382240295, "learning_rate": 0.0005647637130830599, "loss": 3.4454, "step": 9539 }, { "epoch": 0.47, "grad_norm": 0.5479061007499695, "learning_rate": 0.0005647564730358963, "loss": 3.3225, "step": 9540 }, { "epoch": 0.47, "grad_norm": 0.5047709941864014, "learning_rate": 0.0005647492322914152, "loss": 3.4261, "step": 9541 }, { "epoch": 0.47, "grad_norm": 0.5049424171447754, "learning_rate": 0.0005647419908496355, "loss": 3.3516, "step": 9542 }, { "epoch": 0.47, "grad_norm": 0.5358158349990845, "learning_rate": 0.0005647347487105764, "loss": 3.309, "step": 9543 }, { "epoch": 0.47, "grad_norm": 0.5347208380699158, "learning_rate": 0.0005647275058742569, "loss": 3.3036, "step": 9544 }, { "epoch": 0.47, "grad_norm": 0.5153633952140808, "learning_rate": 0.000564720262340696, "loss": 3.3403, "step": 9545 }, { "epoch": 0.47, "grad_norm": 0.5642601847648621, "learning_rate": 0.0005647130181099131, "loss": 3.3031, "step": 9546 }, { "epoch": 0.47, "grad_norm": 0.5254157185554504, "learning_rate": 0.0005647057731819269, "loss": 3.3091, "step": 9547 }, { "epoch": 0.47, "grad_norm": 0.5136579275131226, "learning_rate": 0.0005646985275567566, "loss": 3.1616, "step": 9548 }, { "epoch": 0.47, "grad_norm": 0.504160463809967, "learning_rate": 0.0005646912812344214, "loss": 3.2302, "step": 9549 }, { "epoch": 0.47, "grad_norm": 0.5003314018249512, "learning_rate": 0.0005646840342149403, "loss": 3.0769, "step": 9550 }, { "epoch": 0.47, "grad_norm": 0.5534130334854126, "learning_rate": 0.0005646767864983325, "loss": 3.3755, "step": 9551 }, { "epoch": 0.47, "grad_norm": 0.5688060522079468, "learning_rate": 0.0005646695380846168, "loss": 3.3327, "step": 9552 }, { "epoch": 0.47, "grad_norm": 0.5155788660049438, "learning_rate": 0.0005646622889738125, "loss": 3.2615, "step": 9553 }, { "epoch": 0.47, "grad_norm": 0.5471298098564148, "learning_rate": 0.0005646550391659387, "loss": 3.3885, "step": 9554 }, { "epoch": 0.47, "grad_norm": 0.5412099957466125, "learning_rate": 0.0005646477886610145, "loss": 3.215, "step": 9555 }, { "epoch": 0.47, "grad_norm": 0.4741232693195343, "learning_rate": 0.0005646405374590589, "loss": 3.4638, "step": 9556 }, { "epoch": 0.47, "grad_norm": 0.5171995162963867, "learning_rate": 0.0005646332855600911, "loss": 3.1037, "step": 9557 }, { "epoch": 0.47, "grad_norm": 0.48581480979919434, "learning_rate": 0.0005646260329641302, "loss": 3.3405, "step": 9558 }, { "epoch": 0.47, "grad_norm": 0.5740549564361572, "learning_rate": 0.0005646187796711951, "loss": 2.8654, "step": 9559 }, { "epoch": 0.47, "grad_norm": 0.5170774459838867, "learning_rate": 0.0005646115256813053, "loss": 3.2295, "step": 9560 }, { "epoch": 0.47, "grad_norm": 0.5482763648033142, "learning_rate": 0.0005646042709944794, "loss": 3.3797, "step": 9561 }, { "epoch": 0.47, "grad_norm": 0.4993712604045868, "learning_rate": 0.0005645970156107369, "loss": 3.3476, "step": 9562 }, { "epoch": 0.47, "grad_norm": 0.5390969514846802, "learning_rate": 0.0005645897595300967, "loss": 3.2817, "step": 9563 }, { "epoch": 0.47, "grad_norm": 0.53498375415802, "learning_rate": 0.0005645825027525781, "loss": 3.3194, "step": 9564 }, { "epoch": 0.47, "grad_norm": 0.528544008731842, "learning_rate": 0.0005645752452782001, "loss": 3.3329, "step": 9565 }, { "epoch": 0.47, "grad_norm": 0.5482951402664185, "learning_rate": 0.0005645679871069817, "loss": 3.2478, "step": 9566 }, { "epoch": 0.47, "grad_norm": 0.5523584485054016, "learning_rate": 0.0005645607282389423, "loss": 3.0492, "step": 9567 }, { "epoch": 0.47, "grad_norm": 0.5092670321464539, "learning_rate": 0.0005645534686741009, "loss": 3.1282, "step": 9568 }, { "epoch": 0.47, "grad_norm": 0.5134148597717285, "learning_rate": 0.0005645462084124765, "loss": 3.2474, "step": 9569 }, { "epoch": 0.47, "grad_norm": 0.5111011862754822, "learning_rate": 0.0005645389474540882, "loss": 3.1094, "step": 9570 }, { "epoch": 0.47, "grad_norm": 0.5071569085121155, "learning_rate": 0.0005645316857989553, "loss": 3.3619, "step": 9571 }, { "epoch": 0.47, "grad_norm": 0.49778759479522705, "learning_rate": 0.0005645244234470969, "loss": 3.1761, "step": 9572 }, { "epoch": 0.47, "grad_norm": 0.5030775666236877, "learning_rate": 0.0005645171603985321, "loss": 3.3071, "step": 9573 }, { "epoch": 0.47, "grad_norm": 0.5095931887626648, "learning_rate": 0.0005645098966532798, "loss": 3.4006, "step": 9574 }, { "epoch": 0.47, "grad_norm": 0.5154526233673096, "learning_rate": 0.0005645026322113596, "loss": 3.3653, "step": 9575 }, { "epoch": 0.47, "grad_norm": 0.4855976700782776, "learning_rate": 0.0005644953670727902, "loss": 3.4481, "step": 9576 }, { "epoch": 0.47, "grad_norm": 0.5199698805809021, "learning_rate": 0.0005644881012375909, "loss": 3.3039, "step": 9577 }, { "epoch": 0.47, "grad_norm": 0.5196504592895508, "learning_rate": 0.0005644808347057809, "loss": 3.4343, "step": 9578 }, { "epoch": 0.47, "grad_norm": 0.5275589227676392, "learning_rate": 0.0005644735674773793, "loss": 3.3105, "step": 9579 }, { "epoch": 0.47, "grad_norm": 0.4889765977859497, "learning_rate": 0.0005644662995524051, "loss": 3.2342, "step": 9580 }, { "epoch": 0.47, "grad_norm": 0.5421981811523438, "learning_rate": 0.0005644590309308778, "loss": 3.4189, "step": 9581 }, { "epoch": 0.47, "grad_norm": 0.5198723077774048, "learning_rate": 0.0005644517616128161, "loss": 3.3944, "step": 9582 }, { "epoch": 0.47, "grad_norm": 0.518722653388977, "learning_rate": 0.0005644444915982394, "loss": 3.4588, "step": 9583 }, { "epoch": 0.47, "grad_norm": 0.5175428986549377, "learning_rate": 0.0005644372208871668, "loss": 3.2401, "step": 9584 }, { "epoch": 0.47, "grad_norm": 0.5273626446723938, "learning_rate": 0.0005644299494796175, "loss": 3.3321, "step": 9585 }, { "epoch": 0.47, "grad_norm": 0.5376038551330566, "learning_rate": 0.0005644226773756107, "loss": 3.1802, "step": 9586 }, { "epoch": 0.47, "grad_norm": 0.5051103234291077, "learning_rate": 0.0005644154045751652, "loss": 3.2906, "step": 9587 }, { "epoch": 0.47, "grad_norm": 0.5152342319488525, "learning_rate": 0.0005644081310783006, "loss": 3.231, "step": 9588 }, { "epoch": 0.47, "grad_norm": 0.490278959274292, "learning_rate": 0.0005644008568850359, "loss": 3.333, "step": 9589 }, { "epoch": 0.47, "grad_norm": 0.5564194321632385, "learning_rate": 0.0005643935819953901, "loss": 3.1443, "step": 9590 }, { "epoch": 0.47, "grad_norm": 0.5331935286521912, "learning_rate": 0.0005643863064093825, "loss": 3.2971, "step": 9591 }, { "epoch": 0.47, "grad_norm": 0.5207473635673523, "learning_rate": 0.0005643790301270323, "loss": 3.3384, "step": 9592 }, { "epoch": 0.47, "grad_norm": 0.5474972724914551, "learning_rate": 0.0005643717531483586, "loss": 3.2845, "step": 9593 }, { "epoch": 0.47, "grad_norm": 0.534555196762085, "learning_rate": 0.0005643644754733805, "loss": 3.2346, "step": 9594 }, { "epoch": 0.47, "grad_norm": 0.5441542267799377, "learning_rate": 0.0005643571971021174, "loss": 3.3468, "step": 9595 }, { "epoch": 0.47, "grad_norm": 0.5864543914794922, "learning_rate": 0.0005643499180345882, "loss": 3.3281, "step": 9596 }, { "epoch": 0.47, "grad_norm": 0.5105965733528137, "learning_rate": 0.0005643426382708124, "loss": 3.2396, "step": 9597 }, { "epoch": 0.47, "grad_norm": 0.5301392674446106, "learning_rate": 0.0005643353578108088, "loss": 3.3751, "step": 9598 }, { "epoch": 0.47, "grad_norm": 0.5022128820419312, "learning_rate": 0.0005643280766545967, "loss": 3.393, "step": 9599 }, { "epoch": 0.47, "grad_norm": 0.5763443112373352, "learning_rate": 0.0005643207948021954, "loss": 3.0997, "step": 9600 }, { "epoch": 0.47, "grad_norm": 0.5480446815490723, "learning_rate": 0.000564313512253624, "loss": 3.2441, "step": 9601 }, { "epoch": 0.47, "grad_norm": 0.5295218229293823, "learning_rate": 0.0005643062290089017, "loss": 3.4164, "step": 9602 }, { "epoch": 0.47, "grad_norm": 0.524186909198761, "learning_rate": 0.0005642989450680474, "loss": 3.4485, "step": 9603 }, { "epoch": 0.47, "grad_norm": 0.5457528829574585, "learning_rate": 0.0005642916604310809, "loss": 3.5014, "step": 9604 }, { "epoch": 0.47, "grad_norm": 0.5163187384605408, "learning_rate": 0.0005642843750980209, "loss": 3.359, "step": 9605 }, { "epoch": 0.47, "grad_norm": 0.5899530053138733, "learning_rate": 0.0005642770890688866, "loss": 3.4003, "step": 9606 }, { "epoch": 0.47, "grad_norm": 0.5038844347000122, "learning_rate": 0.0005642698023436974, "loss": 3.2742, "step": 9607 }, { "epoch": 0.47, "grad_norm": 0.5313959717750549, "learning_rate": 0.0005642625149224724, "loss": 3.1566, "step": 9608 }, { "epoch": 0.47, "grad_norm": 0.5297811627388, "learning_rate": 0.0005642552268052309, "loss": 3.3764, "step": 9609 }, { "epoch": 0.47, "grad_norm": 0.5239699482917786, "learning_rate": 0.0005642479379919918, "loss": 3.2483, "step": 9610 }, { "epoch": 0.47, "grad_norm": 0.5261852145195007, "learning_rate": 0.0005642406484827746, "loss": 3.2288, "step": 9611 }, { "epoch": 0.47, "grad_norm": 0.5086889266967773, "learning_rate": 0.0005642333582775984, "loss": 3.2811, "step": 9612 }, { "epoch": 0.47, "grad_norm": 0.5237385034561157, "learning_rate": 0.0005642260673764822, "loss": 3.2338, "step": 9613 }, { "epoch": 0.47, "grad_norm": 0.5552691221237183, "learning_rate": 0.0005642187757794456, "loss": 3.2998, "step": 9614 }, { "epoch": 0.47, "grad_norm": 0.5657722353935242, "learning_rate": 0.0005642114834865076, "loss": 3.2386, "step": 9615 }, { "epoch": 0.47, "grad_norm": 0.49646058678627014, "learning_rate": 0.0005642041904976873, "loss": 3.404, "step": 9616 }, { "epoch": 0.47, "grad_norm": 0.5342143177986145, "learning_rate": 0.000564196896813004, "loss": 3.1347, "step": 9617 }, { "epoch": 0.47, "grad_norm": 0.49228495359420776, "learning_rate": 0.0005641896024324769, "loss": 3.4549, "step": 9618 }, { "epoch": 0.47, "grad_norm": 0.5277411937713623, "learning_rate": 0.0005641823073561253, "loss": 3.385, "step": 9619 }, { "epoch": 0.47, "grad_norm": 0.5513384938240051, "learning_rate": 0.0005641750115839685, "loss": 3.2151, "step": 9620 }, { "epoch": 0.47, "grad_norm": 0.5188488960266113, "learning_rate": 0.0005641677151160253, "loss": 3.2605, "step": 9621 }, { "epoch": 0.47, "grad_norm": 0.5301359295845032, "learning_rate": 0.0005641604179523153, "loss": 3.0863, "step": 9622 }, { "epoch": 0.47, "grad_norm": 0.5934592485427856, "learning_rate": 0.0005641531200928575, "loss": 3.33, "step": 9623 }, { "epoch": 0.47, "grad_norm": 0.5196259617805481, "learning_rate": 0.0005641458215376713, "loss": 3.1144, "step": 9624 }, { "epoch": 0.47, "grad_norm": 0.5472568273544312, "learning_rate": 0.0005641385222867758, "loss": 3.197, "step": 9625 }, { "epoch": 0.47, "grad_norm": 0.5343920588493347, "learning_rate": 0.0005641312223401904, "loss": 3.3338, "step": 9626 }, { "epoch": 0.47, "grad_norm": 0.5346314907073975, "learning_rate": 0.000564123921697934, "loss": 3.0962, "step": 9627 }, { "epoch": 0.47, "grad_norm": 0.5246593356132507, "learning_rate": 0.0005641166203600262, "loss": 3.342, "step": 9628 }, { "epoch": 0.47, "grad_norm": 0.5096229910850525, "learning_rate": 0.000564109318326486, "loss": 3.1208, "step": 9629 }, { "epoch": 0.47, "grad_norm": 0.5147344470024109, "learning_rate": 0.0005641020155973326, "loss": 3.4918, "step": 9630 }, { "epoch": 0.47, "grad_norm": 0.5902569890022278, "learning_rate": 0.0005640947121725853, "loss": 3.4023, "step": 9631 }, { "epoch": 0.47, "grad_norm": 0.5098241567611694, "learning_rate": 0.0005640874080522635, "loss": 3.2977, "step": 9632 }, { "epoch": 0.47, "grad_norm": 0.5633296966552734, "learning_rate": 0.0005640801032363862, "loss": 3.3381, "step": 9633 }, { "epoch": 0.47, "grad_norm": 0.5749189257621765, "learning_rate": 0.0005640727977249728, "loss": 3.3493, "step": 9634 }, { "epoch": 0.47, "grad_norm": 0.5577384829521179, "learning_rate": 0.0005640654915180424, "loss": 3.2943, "step": 9635 }, { "epoch": 0.47, "grad_norm": 0.5394460558891296, "learning_rate": 0.0005640581846156143, "loss": 3.1971, "step": 9636 }, { "epoch": 0.47, "grad_norm": 0.6581524610519409, "learning_rate": 0.0005640508770177079, "loss": 3.4347, "step": 9637 }, { "epoch": 0.47, "grad_norm": 0.5236822962760925, "learning_rate": 0.0005640435687243421, "loss": 3.273, "step": 9638 }, { "epoch": 0.47, "grad_norm": 0.5210537910461426, "learning_rate": 0.0005640362597355365, "loss": 3.2335, "step": 9639 }, { "epoch": 0.47, "grad_norm": 0.5516870021820068, "learning_rate": 0.0005640289500513101, "loss": 3.3469, "step": 9640 }, { "epoch": 0.47, "grad_norm": 0.5240686535835266, "learning_rate": 0.0005640216396716824, "loss": 3.5318, "step": 9641 }, { "epoch": 0.47, "grad_norm": 0.48787635564804077, "learning_rate": 0.0005640143285966724, "loss": 3.423, "step": 9642 }, { "epoch": 0.47, "grad_norm": 0.5234872102737427, "learning_rate": 0.0005640070168262996, "loss": 3.292, "step": 9643 }, { "epoch": 0.47, "grad_norm": 0.5378441214561462, "learning_rate": 0.000563999704360583, "loss": 3.2868, "step": 9644 }, { "epoch": 0.47, "grad_norm": 0.5410236716270447, "learning_rate": 0.000563992391199542, "loss": 3.2939, "step": 9645 }, { "epoch": 0.47, "grad_norm": 0.5149595737457275, "learning_rate": 0.0005639850773431959, "loss": 3.3569, "step": 9646 }, { "epoch": 0.47, "grad_norm": 0.5150244235992432, "learning_rate": 0.0005639777627915639, "loss": 3.2811, "step": 9647 }, { "epoch": 0.47, "grad_norm": 0.5107737183570862, "learning_rate": 0.0005639704475446653, "loss": 3.2627, "step": 9648 }, { "epoch": 0.47, "grad_norm": 0.5012353658676147, "learning_rate": 0.0005639631316025193, "loss": 3.2458, "step": 9649 }, { "epoch": 0.47, "grad_norm": 0.5054120421409607, "learning_rate": 0.0005639558149651452, "loss": 3.141, "step": 9650 }, { "epoch": 0.47, "grad_norm": 0.5207725763320923, "learning_rate": 0.0005639484976325623, "loss": 3.3233, "step": 9651 }, { "epoch": 0.47, "grad_norm": 0.5636855363845825, "learning_rate": 0.0005639411796047898, "loss": 3.1278, "step": 9652 }, { "epoch": 0.47, "grad_norm": 0.5159328579902649, "learning_rate": 0.0005639338608818471, "loss": 3.422, "step": 9653 }, { "epoch": 0.47, "grad_norm": 0.5192365050315857, "learning_rate": 0.0005639265414637534, "loss": 3.2066, "step": 9654 }, { "epoch": 0.47, "grad_norm": 0.5930947065353394, "learning_rate": 0.0005639192213505279, "loss": 3.2735, "step": 9655 }, { "epoch": 0.47, "grad_norm": 0.5183570981025696, "learning_rate": 0.0005639119005421901, "loss": 3.1819, "step": 9656 }, { "epoch": 0.47, "grad_norm": 0.5415933132171631, "learning_rate": 0.000563904579038759, "loss": 3.3708, "step": 9657 }, { "epoch": 0.47, "grad_norm": 0.5286021828651428, "learning_rate": 0.0005638972568402542, "loss": 3.3341, "step": 9658 }, { "epoch": 0.47, "grad_norm": 0.523844838142395, "learning_rate": 0.0005638899339466948, "loss": 3.3128, "step": 9659 }, { "epoch": 0.47, "grad_norm": 0.5575231909751892, "learning_rate": 0.0005638826103580999, "loss": 3.2247, "step": 9660 }, { "epoch": 0.47, "grad_norm": 0.5189796686172485, "learning_rate": 0.0005638752860744891, "loss": 3.3183, "step": 9661 }, { "epoch": 0.47, "grad_norm": 0.5277138352394104, "learning_rate": 0.0005638679610958817, "loss": 3.2104, "step": 9662 }, { "epoch": 0.47, "grad_norm": 0.500163733959198, "learning_rate": 0.0005638606354222967, "loss": 3.3856, "step": 9663 }, { "epoch": 0.47, "grad_norm": 0.555261492729187, "learning_rate": 0.0005638533090537536, "loss": 3.3742, "step": 9664 }, { "epoch": 0.47, "grad_norm": 0.5257711410522461, "learning_rate": 0.0005638459819902718, "loss": 3.2025, "step": 9665 }, { "epoch": 0.47, "grad_norm": 0.5524579286575317, "learning_rate": 0.0005638386542318703, "loss": 3.2449, "step": 9666 }, { "epoch": 0.47, "grad_norm": 0.561862051486969, "learning_rate": 0.0005638313257785685, "loss": 3.1967, "step": 9667 }, { "epoch": 0.47, "grad_norm": 0.5200432538986206, "learning_rate": 0.0005638239966303859, "loss": 3.2598, "step": 9668 }, { "epoch": 0.47, "grad_norm": 0.524965226650238, "learning_rate": 0.0005638166667873417, "loss": 3.4027, "step": 9669 }, { "epoch": 0.47, "grad_norm": 0.5578727722167969, "learning_rate": 0.0005638093362494551, "loss": 3.0414, "step": 9670 }, { "epoch": 0.47, "grad_norm": 0.5654264092445374, "learning_rate": 0.0005638020050167456, "loss": 3.3246, "step": 9671 }, { "epoch": 0.47, "grad_norm": 0.5263561606407166, "learning_rate": 0.0005637946730892323, "loss": 3.2548, "step": 9672 }, { "epoch": 0.47, "grad_norm": 0.5265440940856934, "learning_rate": 0.0005637873404669345, "loss": 3.1688, "step": 9673 }, { "epoch": 0.47, "grad_norm": 0.5504220128059387, "learning_rate": 0.0005637800071498717, "loss": 3.1825, "step": 9674 }, { "epoch": 0.47, "grad_norm": 0.5497219562530518, "learning_rate": 0.0005637726731380631, "loss": 3.1871, "step": 9675 }, { "epoch": 0.47, "grad_norm": 0.5197966694831848, "learning_rate": 0.0005637653384315281, "loss": 3.5599, "step": 9676 }, { "epoch": 0.47, "grad_norm": 0.5388951897621155, "learning_rate": 0.0005637580030302859, "loss": 3.3868, "step": 9677 }, { "epoch": 0.47, "grad_norm": 0.5066190361976624, "learning_rate": 0.000563750666934356, "loss": 3.1101, "step": 9678 }, { "epoch": 0.47, "grad_norm": 0.5020130276679993, "learning_rate": 0.0005637433301437575, "loss": 3.2261, "step": 9679 }, { "epoch": 0.47, "grad_norm": 0.5464184880256653, "learning_rate": 0.0005637359926585099, "loss": 3.2281, "step": 9680 }, { "epoch": 0.47, "grad_norm": 0.5577077865600586, "learning_rate": 0.0005637286544786323, "loss": 3.3123, "step": 9681 }, { "epoch": 0.47, "grad_norm": 0.564420759677887, "learning_rate": 0.0005637213156041443, "loss": 3.0541, "step": 9682 }, { "epoch": 0.47, "grad_norm": 0.4688968360424042, "learning_rate": 0.000563713976035065, "loss": 3.2077, "step": 9683 }, { "epoch": 0.47, "grad_norm": 0.4943731129169464, "learning_rate": 0.000563706635771414, "loss": 3.2392, "step": 9684 }, { "epoch": 0.47, "grad_norm": 0.5055580139160156, "learning_rate": 0.0005636992948132103, "loss": 3.3449, "step": 9685 }, { "epoch": 0.47, "grad_norm": 0.4915640950202942, "learning_rate": 0.0005636919531604736, "loss": 3.3726, "step": 9686 }, { "epoch": 0.47, "grad_norm": 0.4966067969799042, "learning_rate": 0.0005636846108132229, "loss": 3.322, "step": 9687 }, { "epoch": 0.47, "grad_norm": 0.5646604299545288, "learning_rate": 0.0005636772677714777, "loss": 3.1713, "step": 9688 }, { "epoch": 0.47, "grad_norm": 0.5298094153404236, "learning_rate": 0.0005636699240352574, "loss": 3.2453, "step": 9689 }, { "epoch": 0.47, "grad_norm": 0.48152562975883484, "learning_rate": 0.0005636625796045813, "loss": 3.3521, "step": 9690 }, { "epoch": 0.47, "grad_norm": 0.5157354474067688, "learning_rate": 0.0005636552344794685, "loss": 3.3635, "step": 9691 }, { "epoch": 0.47, "grad_norm": 0.5314420461654663, "learning_rate": 0.0005636478886599387, "loss": 3.1824, "step": 9692 }, { "epoch": 0.48, "grad_norm": 0.49262553453445435, "learning_rate": 0.0005636405421460112, "loss": 3.0643, "step": 9693 }, { "epoch": 0.48, "grad_norm": 0.5675431489944458, "learning_rate": 0.000563633194937705, "loss": 3.4652, "step": 9694 }, { "epoch": 0.48, "grad_norm": 0.5083454847335815, "learning_rate": 0.0005636258470350399, "loss": 3.1061, "step": 9695 }, { "epoch": 0.48, "grad_norm": 0.5256954431533813, "learning_rate": 0.0005636184984380349, "loss": 3.2333, "step": 9696 }, { "epoch": 0.48, "grad_norm": 0.5341222286224365, "learning_rate": 0.0005636111491467097, "loss": 3.3005, "step": 9697 }, { "epoch": 0.48, "grad_norm": 0.5235113501548767, "learning_rate": 0.0005636037991610833, "loss": 3.3963, "step": 9698 }, { "epoch": 0.48, "grad_norm": 0.5327295064926147, "learning_rate": 0.0005635964484811753, "loss": 3.0668, "step": 9699 }, { "epoch": 0.48, "grad_norm": 0.5342981815338135, "learning_rate": 0.000563589097107005, "loss": 3.271, "step": 9700 }, { "epoch": 0.48, "grad_norm": 0.5108175277709961, "learning_rate": 0.0005635817450385918, "loss": 3.4472, "step": 9701 }, { "epoch": 0.48, "grad_norm": 0.5050833225250244, "learning_rate": 0.0005635743922759548, "loss": 3.4441, "step": 9702 }, { "epoch": 0.48, "grad_norm": 0.5103448033332825, "learning_rate": 0.0005635670388191137, "loss": 3.4166, "step": 9703 }, { "epoch": 0.48, "grad_norm": 0.5414007902145386, "learning_rate": 0.0005635596846680878, "loss": 3.2219, "step": 9704 }, { "epoch": 0.48, "grad_norm": 0.5395253300666809, "learning_rate": 0.0005635523298228964, "loss": 3.3887, "step": 9705 }, { "epoch": 0.48, "grad_norm": 0.5145513415336609, "learning_rate": 0.0005635449742835588, "loss": 3.3582, "step": 9706 }, { "epoch": 0.48, "grad_norm": 0.5107079148292542, "learning_rate": 0.0005635376180500945, "loss": 3.2202, "step": 9707 }, { "epoch": 0.48, "grad_norm": 0.496855229139328, "learning_rate": 0.0005635302611225228, "loss": 3.3866, "step": 9708 }, { "epoch": 0.48, "grad_norm": 0.5205982327461243, "learning_rate": 0.0005635229035008632, "loss": 3.0983, "step": 9709 }, { "epoch": 0.48, "grad_norm": 0.5210973024368286, "learning_rate": 0.0005635155451851349, "loss": 3.2638, "step": 9710 }, { "epoch": 0.48, "grad_norm": 0.5027773380279541, "learning_rate": 0.0005635081861753575, "loss": 3.3372, "step": 9711 }, { "epoch": 0.48, "grad_norm": 0.5695503950119019, "learning_rate": 0.0005635008264715501, "loss": 3.3357, "step": 9712 }, { "epoch": 0.48, "grad_norm": 0.5400940179824829, "learning_rate": 0.0005634934660737323, "loss": 3.0366, "step": 9713 }, { "epoch": 0.48, "grad_norm": 0.6115588545799255, "learning_rate": 0.0005634861049819234, "loss": 3.3567, "step": 9714 }, { "epoch": 0.48, "grad_norm": 0.5351577401161194, "learning_rate": 0.0005634787431961428, "loss": 3.1951, "step": 9715 }, { "epoch": 0.48, "grad_norm": 0.5367692112922668, "learning_rate": 0.00056347138071641, "loss": 3.216, "step": 9716 }, { "epoch": 0.48, "grad_norm": 0.5334668159484863, "learning_rate": 0.0005634640175427441, "loss": 3.4465, "step": 9717 }, { "epoch": 0.48, "grad_norm": 0.5270911455154419, "learning_rate": 0.0005634566536751648, "loss": 3.2577, "step": 9718 }, { "epoch": 0.48, "grad_norm": 0.547909677028656, "learning_rate": 0.0005634492891136914, "loss": 3.154, "step": 9719 }, { "epoch": 0.48, "grad_norm": 0.5077096819877625, "learning_rate": 0.0005634419238583433, "loss": 3.2528, "step": 9720 }, { "epoch": 0.48, "grad_norm": 0.5063366889953613, "learning_rate": 0.0005634345579091398, "loss": 3.2683, "step": 9721 }, { "epoch": 0.48, "grad_norm": 0.5194924473762512, "learning_rate": 0.0005634271912661003, "loss": 3.1052, "step": 9722 }, { "epoch": 0.48, "grad_norm": 0.4994848966598511, "learning_rate": 0.0005634198239292444, "loss": 3.3142, "step": 9723 }, { "epoch": 0.48, "grad_norm": 0.5550405383110046, "learning_rate": 0.0005634124558985913, "loss": 3.217, "step": 9724 }, { "epoch": 0.48, "grad_norm": 0.5168113708496094, "learning_rate": 0.0005634050871741606, "loss": 3.0289, "step": 9725 }, { "epoch": 0.48, "grad_norm": 0.5467432737350464, "learning_rate": 0.0005633977177559715, "loss": 3.2294, "step": 9726 }, { "epoch": 0.48, "grad_norm": 0.5153245329856873, "learning_rate": 0.0005633903476440434, "loss": 3.3251, "step": 9727 }, { "epoch": 0.48, "grad_norm": 0.5289653539657593, "learning_rate": 0.0005633829768383961, "loss": 3.0288, "step": 9728 }, { "epoch": 0.48, "grad_norm": 0.5196574330329895, "learning_rate": 0.0005633756053390485, "loss": 3.0948, "step": 9729 }, { "epoch": 0.48, "grad_norm": 0.5112874507904053, "learning_rate": 0.0005633682331460204, "loss": 3.3054, "step": 9730 }, { "epoch": 0.48, "grad_norm": 0.5316709876060486, "learning_rate": 0.000563360860259331, "loss": 3.2422, "step": 9731 }, { "epoch": 0.48, "grad_norm": 0.5173203349113464, "learning_rate": 0.0005633534866789997, "loss": 3.3232, "step": 9732 }, { "epoch": 0.48, "grad_norm": 0.5282646417617798, "learning_rate": 0.000563346112405046, "loss": 3.2662, "step": 9733 }, { "epoch": 0.48, "grad_norm": 0.598261296749115, "learning_rate": 0.0005633387374374894, "loss": 2.9985, "step": 9734 }, { "epoch": 0.48, "grad_norm": 0.4993882179260254, "learning_rate": 0.0005633313617763493, "loss": 3.2471, "step": 9735 }, { "epoch": 0.48, "grad_norm": 0.5630433559417725, "learning_rate": 0.0005633239854216449, "loss": 3.095, "step": 9736 }, { "epoch": 0.48, "grad_norm": 0.47922441363334656, "learning_rate": 0.0005633166083733959, "loss": 3.1201, "step": 9737 }, { "epoch": 0.48, "grad_norm": 0.5252969264984131, "learning_rate": 0.0005633092306316216, "loss": 3.2614, "step": 9738 }, { "epoch": 0.48, "grad_norm": 0.513140857219696, "learning_rate": 0.0005633018521963415, "loss": 3.1924, "step": 9739 }, { "epoch": 0.48, "grad_norm": 0.6242732405662537, "learning_rate": 0.0005632944730675749, "loss": 3.2182, "step": 9740 }, { "epoch": 0.48, "grad_norm": 0.5124731659889221, "learning_rate": 0.0005632870932453415, "loss": 3.3061, "step": 9741 }, { "epoch": 0.48, "grad_norm": 0.49745190143585205, "learning_rate": 0.0005632797127296605, "loss": 3.3783, "step": 9742 }, { "epoch": 0.48, "grad_norm": 0.5272019505500793, "learning_rate": 0.0005632723315205513, "loss": 3.2325, "step": 9743 }, { "epoch": 0.48, "grad_norm": 0.5917782187461853, "learning_rate": 0.0005632649496180336, "loss": 3.1377, "step": 9744 }, { "epoch": 0.48, "grad_norm": 0.5247296094894409, "learning_rate": 0.0005632575670221266, "loss": 3.1735, "step": 9745 }, { "epoch": 0.48, "grad_norm": 0.546837329864502, "learning_rate": 0.0005632501837328498, "loss": 3.6283, "step": 9746 }, { "epoch": 0.48, "grad_norm": 0.5424903035163879, "learning_rate": 0.0005632427997502227, "loss": 3.3488, "step": 9747 }, { "epoch": 0.48, "grad_norm": 0.5044673681259155, "learning_rate": 0.0005632354150742648, "loss": 3.2579, "step": 9748 }, { "epoch": 0.48, "grad_norm": 0.5371702909469604, "learning_rate": 0.0005632280297049954, "loss": 3.1453, "step": 9749 }, { "epoch": 0.48, "grad_norm": 0.5304502844810486, "learning_rate": 0.0005632206436424342, "loss": 3.2807, "step": 9750 }, { "epoch": 0.48, "grad_norm": 0.6061875820159912, "learning_rate": 0.0005632132568866002, "loss": 2.9629, "step": 9751 }, { "epoch": 0.48, "grad_norm": 0.5296114683151245, "learning_rate": 0.0005632058694375134, "loss": 3.2677, "step": 9752 }, { "epoch": 0.48, "grad_norm": 0.5466916561126709, "learning_rate": 0.0005631984812951928, "loss": 3.4509, "step": 9753 }, { "epoch": 0.48, "grad_norm": 0.5157197117805481, "learning_rate": 0.0005631910924596582, "loss": 3.1786, "step": 9754 }, { "epoch": 0.48, "grad_norm": 0.5921932458877563, "learning_rate": 0.0005631837029309288, "loss": 3.1227, "step": 9755 }, { "epoch": 0.48, "grad_norm": 0.4948883652687073, "learning_rate": 0.0005631763127090242, "loss": 3.3028, "step": 9756 }, { "epoch": 0.48, "grad_norm": 0.543792724609375, "learning_rate": 0.0005631689217939639, "loss": 3.1953, "step": 9757 }, { "epoch": 0.48, "grad_norm": 0.4762703478336334, "learning_rate": 0.0005631615301857673, "loss": 3.0992, "step": 9758 }, { "epoch": 0.48, "grad_norm": 0.5200537443161011, "learning_rate": 0.0005631541378844538, "loss": 3.2991, "step": 9759 }, { "epoch": 0.48, "grad_norm": 0.5312259793281555, "learning_rate": 0.000563146744890043, "loss": 3.2549, "step": 9760 }, { "epoch": 0.48, "grad_norm": 0.547489583492279, "learning_rate": 0.0005631393512025544, "loss": 3.392, "step": 9761 }, { "epoch": 0.48, "grad_norm": 0.5420605540275574, "learning_rate": 0.0005631319568220072, "loss": 3.5638, "step": 9762 }, { "epoch": 0.48, "grad_norm": 0.5814469456672668, "learning_rate": 0.0005631245617484211, "loss": 3.1061, "step": 9763 }, { "epoch": 0.48, "grad_norm": 0.48665496706962585, "learning_rate": 0.0005631171659818158, "loss": 3.3398, "step": 9764 }, { "epoch": 0.48, "grad_norm": 0.506272554397583, "learning_rate": 0.0005631097695222103, "loss": 3.3197, "step": 9765 }, { "epoch": 0.48, "grad_norm": 0.49559804797172546, "learning_rate": 0.0005631023723696243, "loss": 3.3218, "step": 9766 }, { "epoch": 0.48, "grad_norm": 0.5244449377059937, "learning_rate": 0.0005630949745240773, "loss": 3.3369, "step": 9767 }, { "epoch": 0.48, "grad_norm": 0.5261397957801819, "learning_rate": 0.0005630875759855889, "loss": 3.3897, "step": 9768 }, { "epoch": 0.48, "grad_norm": 0.5492829084396362, "learning_rate": 0.0005630801767541782, "loss": 3.1185, "step": 9769 }, { "epoch": 0.48, "grad_norm": 0.5077092051506042, "learning_rate": 0.0005630727768298652, "loss": 3.4211, "step": 9770 }, { "epoch": 0.48, "grad_norm": 0.5366680026054382, "learning_rate": 0.000563065376212669, "loss": 3.2115, "step": 9771 }, { "epoch": 0.48, "grad_norm": 0.5005910992622375, "learning_rate": 0.0005630579749026093, "loss": 3.2202, "step": 9772 }, { "epoch": 0.48, "grad_norm": 0.5190508365631104, "learning_rate": 0.0005630505728997055, "loss": 3.3077, "step": 9773 }, { "epoch": 0.48, "grad_norm": 0.5151124000549316, "learning_rate": 0.000563043170203977, "loss": 3.2981, "step": 9774 }, { "epoch": 0.48, "grad_norm": 0.5408289432525635, "learning_rate": 0.0005630357668154435, "loss": 3.288, "step": 9775 }, { "epoch": 0.48, "grad_norm": 0.5253680944442749, "learning_rate": 0.0005630283627341245, "loss": 3.4114, "step": 9776 }, { "epoch": 0.48, "grad_norm": 0.5189976096153259, "learning_rate": 0.0005630209579600393, "loss": 3.1448, "step": 9777 }, { "epoch": 0.48, "grad_norm": 0.5061872601509094, "learning_rate": 0.0005630135524932076, "loss": 3.1726, "step": 9778 }, { "epoch": 0.48, "grad_norm": 0.5295987725257874, "learning_rate": 0.0005630061463336488, "loss": 3.1084, "step": 9779 }, { "epoch": 0.48, "grad_norm": 0.5112553238868713, "learning_rate": 0.0005629987394813824, "loss": 3.4138, "step": 9780 }, { "epoch": 0.48, "grad_norm": 0.5010356903076172, "learning_rate": 0.0005629913319364279, "loss": 3.0226, "step": 9781 }, { "epoch": 0.48, "grad_norm": 0.497075617313385, "learning_rate": 0.0005629839236988048, "loss": 3.3942, "step": 9782 }, { "epoch": 0.48, "grad_norm": 0.5749728679656982, "learning_rate": 0.0005629765147685328, "loss": 3.2406, "step": 9783 }, { "epoch": 0.48, "grad_norm": 0.5109654664993286, "learning_rate": 0.0005629691051456312, "loss": 3.4464, "step": 9784 }, { "epoch": 0.48, "grad_norm": 0.4870873689651489, "learning_rate": 0.0005629616948301196, "loss": 3.3477, "step": 9785 }, { "epoch": 0.48, "grad_norm": 0.5036649107933044, "learning_rate": 0.0005629542838220175, "loss": 3.3398, "step": 9786 }, { "epoch": 0.48, "grad_norm": 0.4647713303565979, "learning_rate": 0.0005629468721213444, "loss": 3.4662, "step": 9787 }, { "epoch": 0.48, "grad_norm": 0.5171986818313599, "learning_rate": 0.0005629394597281199, "loss": 3.3284, "step": 9788 }, { "epoch": 0.48, "grad_norm": 0.5243804454803467, "learning_rate": 0.0005629320466423634, "loss": 3.3182, "step": 9789 }, { "epoch": 0.48, "grad_norm": 0.5532283186912537, "learning_rate": 0.0005629246328640945, "loss": 3.3757, "step": 9790 }, { "epoch": 0.48, "grad_norm": 0.5032833814620972, "learning_rate": 0.0005629172183933327, "loss": 3.3534, "step": 9791 }, { "epoch": 0.48, "grad_norm": 0.5246580839157104, "learning_rate": 0.0005629098032300978, "loss": 3.408, "step": 9792 }, { "epoch": 0.48, "grad_norm": 0.5527105927467346, "learning_rate": 0.0005629023873744087, "loss": 3.2838, "step": 9793 }, { "epoch": 0.48, "grad_norm": 0.5229583978652954, "learning_rate": 0.0005628949708262856, "loss": 3.3617, "step": 9794 }, { "epoch": 0.48, "grad_norm": 0.5168598294258118, "learning_rate": 0.0005628875535857476, "loss": 3.4933, "step": 9795 }, { "epoch": 0.48, "grad_norm": 0.4852995276451111, "learning_rate": 0.0005628801356528144, "loss": 3.2868, "step": 9796 }, { "epoch": 0.48, "grad_norm": 0.5106096267700195, "learning_rate": 0.0005628727170275055, "loss": 3.188, "step": 9797 }, { "epoch": 0.48, "grad_norm": 0.5226311683654785, "learning_rate": 0.0005628652977098405, "loss": 3.4544, "step": 9798 }, { "epoch": 0.48, "grad_norm": 0.5285156965255737, "learning_rate": 0.0005628578776998389, "loss": 2.9966, "step": 9799 }, { "epoch": 0.48, "grad_norm": 0.5192917585372925, "learning_rate": 0.0005628504569975201, "loss": 3.5259, "step": 9800 }, { "epoch": 0.48, "grad_norm": 0.5123999714851379, "learning_rate": 0.0005628430356029039, "loss": 3.1751, "step": 9801 }, { "epoch": 0.48, "grad_norm": 0.484468013048172, "learning_rate": 0.0005628356135160097, "loss": 3.3334, "step": 9802 }, { "epoch": 0.48, "grad_norm": 0.5368399620056152, "learning_rate": 0.0005628281907368571, "loss": 3.4312, "step": 9803 }, { "epoch": 0.48, "grad_norm": 0.5847996473312378, "learning_rate": 0.0005628207672654656, "loss": 3.4378, "step": 9804 }, { "epoch": 0.48, "grad_norm": 0.487942099571228, "learning_rate": 0.0005628133431018548, "loss": 3.1906, "step": 9805 }, { "epoch": 0.48, "grad_norm": 0.4921421706676483, "learning_rate": 0.0005628059182460442, "loss": 3.1767, "step": 9806 }, { "epoch": 0.48, "grad_norm": 0.5178766250610352, "learning_rate": 0.0005627984926980535, "loss": 3.2761, "step": 9807 }, { "epoch": 0.48, "grad_norm": 0.5124492049217224, "learning_rate": 0.000562791066457902, "loss": 3.4485, "step": 9808 }, { "epoch": 0.48, "grad_norm": 0.5111625790596008, "learning_rate": 0.0005627836395256095, "loss": 3.3804, "step": 9809 }, { "epoch": 0.48, "grad_norm": 0.5112327337265015, "learning_rate": 0.0005627762119011955, "loss": 3.3067, "step": 9810 }, { "epoch": 0.48, "grad_norm": 0.5200767517089844, "learning_rate": 0.0005627687835846794, "loss": 3.2413, "step": 9811 }, { "epoch": 0.48, "grad_norm": 0.5200941562652588, "learning_rate": 0.000562761354576081, "loss": 3.3482, "step": 9812 }, { "epoch": 0.48, "grad_norm": 0.5365529656410217, "learning_rate": 0.0005627539248754196, "loss": 3.281, "step": 9813 }, { "epoch": 0.48, "grad_norm": 0.5436131954193115, "learning_rate": 0.0005627464944827151, "loss": 3.3693, "step": 9814 }, { "epoch": 0.48, "grad_norm": 0.545846164226532, "learning_rate": 0.0005627390633979869, "loss": 3.3813, "step": 9815 }, { "epoch": 0.48, "grad_norm": 0.5612288117408752, "learning_rate": 0.0005627316316212544, "loss": 3.1317, "step": 9816 }, { "epoch": 0.48, "grad_norm": 0.5418693423271179, "learning_rate": 0.0005627241991525376, "loss": 3.3768, "step": 9817 }, { "epoch": 0.48, "grad_norm": 0.5217850804328918, "learning_rate": 0.0005627167659918557, "loss": 3.2675, "step": 9818 }, { "epoch": 0.48, "grad_norm": 0.5639944076538086, "learning_rate": 0.0005627093321392283, "loss": 3.313, "step": 9819 }, { "epoch": 0.48, "grad_norm": 0.5243082642555237, "learning_rate": 0.0005627018975946752, "loss": 3.4244, "step": 9820 }, { "epoch": 0.48, "grad_norm": 0.4913342595100403, "learning_rate": 0.0005626944623582158, "loss": 3.3463, "step": 9821 }, { "epoch": 0.48, "grad_norm": 0.5058574080467224, "learning_rate": 0.0005626870264298698, "loss": 3.402, "step": 9822 }, { "epoch": 0.48, "grad_norm": 0.5007573962211609, "learning_rate": 0.0005626795898096568, "loss": 3.3769, "step": 9823 }, { "epoch": 0.48, "grad_norm": 0.5387548804283142, "learning_rate": 0.0005626721524975962, "loss": 3.2397, "step": 9824 }, { "epoch": 0.48, "grad_norm": 0.5633268356323242, "learning_rate": 0.0005626647144937076, "loss": 3.4537, "step": 9825 }, { "epoch": 0.48, "grad_norm": 0.48922309279441833, "learning_rate": 0.0005626572757980109, "loss": 3.2293, "step": 9826 }, { "epoch": 0.48, "grad_norm": 0.5135512948036194, "learning_rate": 0.0005626498364105254, "loss": 3.2582, "step": 9827 }, { "epoch": 0.48, "grad_norm": 0.5401769280433655, "learning_rate": 0.0005626423963312707, "loss": 3.4899, "step": 9828 }, { "epoch": 0.48, "grad_norm": 0.5234643816947937, "learning_rate": 0.0005626349555602666, "loss": 3.3206, "step": 9829 }, { "epoch": 0.48, "grad_norm": 0.5322685837745667, "learning_rate": 0.0005626275140975326, "loss": 3.2296, "step": 9830 }, { "epoch": 0.48, "grad_norm": 0.5047280192375183, "learning_rate": 0.0005626200719430881, "loss": 3.2615, "step": 9831 }, { "epoch": 0.48, "grad_norm": 0.5207504630088806, "learning_rate": 0.0005626126290969529, "loss": 3.2849, "step": 9832 }, { "epoch": 0.48, "grad_norm": 0.4990766942501068, "learning_rate": 0.0005626051855591467, "loss": 3.21, "step": 9833 }, { "epoch": 0.48, "grad_norm": 0.5085784792900085, "learning_rate": 0.0005625977413296889, "loss": 3.4235, "step": 9834 }, { "epoch": 0.48, "grad_norm": 0.5034537315368652, "learning_rate": 0.0005625902964085992, "loss": 3.2626, "step": 9835 }, { "epoch": 0.48, "grad_norm": 0.6476481556892395, "learning_rate": 0.0005625828507958973, "loss": 3.1521, "step": 9836 }, { "epoch": 0.48, "grad_norm": 0.5102798938751221, "learning_rate": 0.0005625754044916025, "loss": 3.2906, "step": 9837 }, { "epoch": 0.48, "grad_norm": 0.510310173034668, "learning_rate": 0.0005625679574957349, "loss": 2.9805, "step": 9838 }, { "epoch": 0.48, "grad_norm": 0.5330062508583069, "learning_rate": 0.0005625605098083135, "loss": 3.287, "step": 9839 }, { "epoch": 0.48, "grad_norm": 0.5602695941925049, "learning_rate": 0.0005625530614293584, "loss": 3.1953, "step": 9840 }, { "epoch": 0.48, "grad_norm": 0.5196672677993774, "learning_rate": 0.0005625456123588892, "loss": 3.2402, "step": 9841 }, { "epoch": 0.48, "grad_norm": 0.6540876626968384, "learning_rate": 0.0005625381625969252, "loss": 3.3599, "step": 9842 }, { "epoch": 0.48, "grad_norm": 0.5737609267234802, "learning_rate": 0.0005625307121434862, "loss": 3.2897, "step": 9843 }, { "epoch": 0.48, "grad_norm": 0.5284690260887146, "learning_rate": 0.0005625232609985919, "loss": 3.2414, "step": 9844 }, { "epoch": 0.48, "grad_norm": 0.5034304261207581, "learning_rate": 0.0005625158091622619, "loss": 3.2033, "step": 9845 }, { "epoch": 0.48, "grad_norm": 0.49428412318229675, "learning_rate": 0.0005625083566345158, "loss": 3.1545, "step": 9846 }, { "epoch": 0.48, "grad_norm": 0.5276312828063965, "learning_rate": 0.0005625009034153732, "loss": 3.1025, "step": 9847 }, { "epoch": 0.48, "grad_norm": 0.5318641066551208, "learning_rate": 0.0005624934495048535, "loss": 3.2542, "step": 9848 }, { "epoch": 0.48, "grad_norm": 0.5393041372299194, "learning_rate": 0.0005624859949029768, "loss": 3.2845, "step": 9849 }, { "epoch": 0.48, "grad_norm": 0.5345759391784668, "learning_rate": 0.0005624785396097625, "loss": 3.2756, "step": 9850 }, { "epoch": 0.48, "grad_norm": 0.5293691754341125, "learning_rate": 0.0005624710836252302, "loss": 3.2409, "step": 9851 }, { "epoch": 0.48, "grad_norm": 0.5240424275398254, "learning_rate": 0.0005624636269493995, "loss": 3.1066, "step": 9852 }, { "epoch": 0.48, "grad_norm": 0.5183370113372803, "learning_rate": 0.0005624561695822903, "loss": 3.2444, "step": 9853 }, { "epoch": 0.48, "grad_norm": 0.5233088731765747, "learning_rate": 0.0005624487115239219, "loss": 3.0997, "step": 9854 }, { "epoch": 0.48, "grad_norm": 0.5566701889038086, "learning_rate": 0.0005624412527743142, "loss": 3.6284, "step": 9855 }, { "epoch": 0.48, "grad_norm": 0.5460590720176697, "learning_rate": 0.0005624337933334867, "loss": 3.2746, "step": 9856 }, { "epoch": 0.48, "grad_norm": 0.5318386554718018, "learning_rate": 0.0005624263332014591, "loss": 3.38, "step": 9857 }, { "epoch": 0.48, "grad_norm": 0.5089846253395081, "learning_rate": 0.0005624188723782511, "loss": 3.2773, "step": 9858 }, { "epoch": 0.48, "grad_norm": 0.5393161177635193, "learning_rate": 0.0005624114108638822, "loss": 2.969, "step": 9859 }, { "epoch": 0.48, "grad_norm": 0.5289279818534851, "learning_rate": 0.0005624039486583721, "loss": 3.1732, "step": 9860 }, { "epoch": 0.48, "grad_norm": 0.5223667621612549, "learning_rate": 0.0005623964857617407, "loss": 3.0991, "step": 9861 }, { "epoch": 0.48, "grad_norm": 0.5194090604782104, "learning_rate": 0.0005623890221740074, "loss": 3.1106, "step": 9862 }, { "epoch": 0.48, "grad_norm": 0.5405828952789307, "learning_rate": 0.0005623815578951918, "loss": 2.9179, "step": 9863 }, { "epoch": 0.48, "grad_norm": 0.49917301535606384, "learning_rate": 0.0005623740929253136, "loss": 3.1937, "step": 9864 }, { "epoch": 0.48, "grad_norm": 0.5121216773986816, "learning_rate": 0.0005623666272643927, "loss": 3.483, "step": 9865 }, { "epoch": 0.48, "grad_norm": 0.5729875564575195, "learning_rate": 0.0005623591609124486, "loss": 3.2528, "step": 9866 }, { "epoch": 0.48, "grad_norm": 0.47562557458877563, "learning_rate": 0.0005623516938695009, "loss": 3.1612, "step": 9867 }, { "epoch": 0.48, "grad_norm": 0.5255392789840698, "learning_rate": 0.0005623442261355694, "loss": 3.2327, "step": 9868 }, { "epoch": 0.48, "grad_norm": 0.5365827083587646, "learning_rate": 0.0005623367577106736, "loss": 3.2066, "step": 9869 }, { "epoch": 0.48, "grad_norm": 0.5231798887252808, "learning_rate": 0.0005623292885948333, "loss": 3.2322, "step": 9870 }, { "epoch": 0.48, "grad_norm": 0.5149793028831482, "learning_rate": 0.0005623218187880682, "loss": 3.4431, "step": 9871 }, { "epoch": 0.48, "grad_norm": 0.5159502625465393, "learning_rate": 0.0005623143482903979, "loss": 3.103, "step": 9872 }, { "epoch": 0.48, "grad_norm": 0.523520290851593, "learning_rate": 0.0005623068771018419, "loss": 3.2667, "step": 9873 }, { "epoch": 0.48, "grad_norm": 0.5203685760498047, "learning_rate": 0.0005622994052224203, "loss": 3.3393, "step": 9874 }, { "epoch": 0.48, "grad_norm": 0.5236057639122009, "learning_rate": 0.0005622919326521525, "loss": 3.3034, "step": 9875 }, { "epoch": 0.48, "grad_norm": 0.5350485444068909, "learning_rate": 0.0005622844593910583, "loss": 3.4209, "step": 9876 }, { "epoch": 0.48, "grad_norm": 0.48950937390327454, "learning_rate": 0.0005622769854391571, "loss": 3.3428, "step": 9877 }, { "epoch": 0.48, "grad_norm": 0.5686964988708496, "learning_rate": 0.0005622695107964689, "loss": 3.0339, "step": 9878 }, { "epoch": 0.48, "grad_norm": 0.5592756271362305, "learning_rate": 0.0005622620354630133, "loss": 3.3535, "step": 9879 }, { "epoch": 0.48, "grad_norm": 0.4988493025302887, "learning_rate": 0.00056225455943881, "loss": 3.3169, "step": 9880 }, { "epoch": 0.48, "grad_norm": 0.5318952202796936, "learning_rate": 0.0005622470827238786, "loss": 3.171, "step": 9881 }, { "epoch": 0.48, "grad_norm": 0.5253759622573853, "learning_rate": 0.000562239605318239, "loss": 3.4359, "step": 9882 }, { "epoch": 0.48, "grad_norm": 0.4880097806453705, "learning_rate": 0.0005622321272219105, "loss": 3.3283, "step": 9883 }, { "epoch": 0.48, "grad_norm": 0.49652519822120667, "learning_rate": 0.0005622246484349132, "loss": 3.1138, "step": 9884 }, { "epoch": 0.48, "grad_norm": 0.554382860660553, "learning_rate": 0.0005622171689572666, "loss": 3.1591, "step": 9885 }, { "epoch": 0.48, "grad_norm": 0.5065361857414246, "learning_rate": 0.0005622096887889905, "loss": 3.1288, "step": 9886 }, { "epoch": 0.48, "grad_norm": 0.5004228353500366, "learning_rate": 0.0005622022079301045, "loss": 3.1548, "step": 9887 }, { "epoch": 0.48, "grad_norm": 0.5251879096031189, "learning_rate": 0.0005621947263806284, "loss": 3.2474, "step": 9888 }, { "epoch": 0.48, "grad_norm": 0.5283453464508057, "learning_rate": 0.0005621872441405818, "loss": 3.3388, "step": 9889 }, { "epoch": 0.48, "grad_norm": 0.5356844067573547, "learning_rate": 0.0005621797612099845, "loss": 3.3577, "step": 9890 }, { "epoch": 0.48, "grad_norm": 0.5235812067985535, "learning_rate": 0.0005621722775888561, "loss": 3.1009, "step": 9891 }, { "epoch": 0.48, "grad_norm": 0.5591863989830017, "learning_rate": 0.0005621647932772164, "loss": 3.3538, "step": 9892 }, { "epoch": 0.48, "grad_norm": 0.48452454805374146, "learning_rate": 0.000562157308275085, "loss": 3.429, "step": 9893 }, { "epoch": 0.48, "grad_norm": 0.5546316504478455, "learning_rate": 0.0005621498225824818, "loss": 3.214, "step": 9894 }, { "epoch": 0.48, "grad_norm": 0.48921647667884827, "learning_rate": 0.0005621423361994264, "loss": 3.2568, "step": 9895 }, { "epoch": 0.48, "grad_norm": 0.5067411661148071, "learning_rate": 0.0005621348491259386, "loss": 3.5054, "step": 9896 }, { "epoch": 0.49, "grad_norm": 0.5032419562339783, "learning_rate": 0.000562127361362038, "loss": 3.3825, "step": 9897 }, { "epoch": 0.49, "grad_norm": 0.48625296354293823, "learning_rate": 0.0005621198729077444, "loss": 3.0214, "step": 9898 }, { "epoch": 0.49, "grad_norm": 0.5369133949279785, "learning_rate": 0.0005621123837630776, "loss": 3.1147, "step": 9899 }, { "epoch": 0.49, "grad_norm": 0.5231306552886963, "learning_rate": 0.000562104893928057, "loss": 3.3092, "step": 9900 }, { "epoch": 0.49, "grad_norm": 0.5136045217514038, "learning_rate": 0.0005620974034027026, "loss": 3.3411, "step": 9901 }, { "epoch": 0.49, "grad_norm": 0.5047800540924072, "learning_rate": 0.0005620899121870342, "loss": 3.3716, "step": 9902 }, { "epoch": 0.49, "grad_norm": 0.5415762662887573, "learning_rate": 0.0005620824202810713, "loss": 3.2862, "step": 9903 }, { "epoch": 0.49, "grad_norm": 0.5388754606246948, "learning_rate": 0.0005620749276848339, "loss": 3.3603, "step": 9904 }, { "epoch": 0.49, "grad_norm": 0.4989524781703949, "learning_rate": 0.0005620674343983415, "loss": 3.2319, "step": 9905 }, { "epoch": 0.49, "grad_norm": 0.5338003039360046, "learning_rate": 0.0005620599404216138, "loss": 3.2099, "step": 9906 }, { "epoch": 0.49, "grad_norm": 0.483467698097229, "learning_rate": 0.0005620524457546708, "loss": 3.3824, "step": 9907 }, { "epoch": 0.49, "grad_norm": 0.5273773670196533, "learning_rate": 0.000562044950397532, "loss": 3.2784, "step": 9908 }, { "epoch": 0.49, "grad_norm": 0.5121115446090698, "learning_rate": 0.0005620374543502173, "loss": 3.2377, "step": 9909 }, { "epoch": 0.49, "grad_norm": 0.5008372068405151, "learning_rate": 0.0005620299576127463, "loss": 3.3862, "step": 9910 }, { "epoch": 0.49, "grad_norm": 0.49955323338508606, "learning_rate": 0.0005620224601851389, "loss": 3.4437, "step": 9911 }, { "epoch": 0.49, "grad_norm": 0.5031404495239258, "learning_rate": 0.0005620149620674147, "loss": 3.3564, "step": 9912 }, { "epoch": 0.49, "grad_norm": 0.5203202962875366, "learning_rate": 0.0005620074632595936, "loss": 3.1263, "step": 9913 }, { "epoch": 0.49, "grad_norm": 0.48752161860466003, "learning_rate": 0.0005619999637616951, "loss": 3.476, "step": 9914 }, { "epoch": 0.49, "grad_norm": 0.5217284560203552, "learning_rate": 0.0005619924635737393, "loss": 3.3237, "step": 9915 }, { "epoch": 0.49, "grad_norm": 0.5151270031929016, "learning_rate": 0.0005619849626957457, "loss": 3.2331, "step": 9916 }, { "epoch": 0.49, "grad_norm": 0.549656093120575, "learning_rate": 0.0005619774611277342, "loss": 3.2498, "step": 9917 }, { "epoch": 0.49, "grad_norm": 0.568009078502655, "learning_rate": 0.0005619699588697243, "loss": 3.3618, "step": 9918 }, { "epoch": 0.49, "grad_norm": 0.4984912872314453, "learning_rate": 0.0005619624559217361, "loss": 3.3981, "step": 9919 }, { "epoch": 0.49, "grad_norm": 0.5053259134292603, "learning_rate": 0.0005619549522837891, "loss": 3.5771, "step": 9920 }, { "epoch": 0.49, "grad_norm": 0.5003093481063843, "learning_rate": 0.0005619474479559033, "loss": 3.3099, "step": 9921 }, { "epoch": 0.49, "grad_norm": 0.5107256770133972, "learning_rate": 0.0005619399429380983, "loss": 3.2729, "step": 9922 }, { "epoch": 0.49, "grad_norm": 0.5822942852973938, "learning_rate": 0.0005619324372303938, "loss": 3.1418, "step": 9923 }, { "epoch": 0.49, "grad_norm": 0.6355964541435242, "learning_rate": 0.0005619249308328098, "loss": 3.2895, "step": 9924 }, { "epoch": 0.49, "grad_norm": 0.5443882942199707, "learning_rate": 0.0005619174237453658, "loss": 3.3586, "step": 9925 }, { "epoch": 0.49, "grad_norm": 0.5277007818222046, "learning_rate": 0.0005619099159680818, "loss": 3.2578, "step": 9926 }, { "epoch": 0.49, "grad_norm": 0.49704602360725403, "learning_rate": 0.0005619024075009775, "loss": 3.2784, "step": 9927 }, { "epoch": 0.49, "grad_norm": 0.5114836692810059, "learning_rate": 0.0005618948983440727, "loss": 3.0513, "step": 9928 }, { "epoch": 0.49, "grad_norm": 0.5052823424339294, "learning_rate": 0.0005618873884973871, "loss": 3.4613, "step": 9929 }, { "epoch": 0.49, "grad_norm": 0.5057597756385803, "learning_rate": 0.0005618798779609405, "loss": 3.2938, "step": 9930 }, { "epoch": 0.49, "grad_norm": 0.5410943031311035, "learning_rate": 0.0005618723667347526, "loss": 3.2756, "step": 9931 }, { "epoch": 0.49, "grad_norm": 0.5320307612419128, "learning_rate": 0.0005618648548188434, "loss": 3.2589, "step": 9932 }, { "epoch": 0.49, "grad_norm": 0.5032922029495239, "learning_rate": 0.0005618573422132327, "loss": 3.308, "step": 9933 }, { "epoch": 0.49, "grad_norm": 0.49512460827827454, "learning_rate": 0.0005618498289179399, "loss": 3.2621, "step": 9934 }, { "epoch": 0.49, "grad_norm": 0.5111963152885437, "learning_rate": 0.0005618423149329853, "loss": 3.2512, "step": 9935 }, { "epoch": 0.49, "grad_norm": 0.5099007487297058, "learning_rate": 0.0005618348002583883, "loss": 3.4597, "step": 9936 }, { "epoch": 0.49, "grad_norm": 0.5154657363891602, "learning_rate": 0.0005618272848941687, "loss": 3.4915, "step": 9937 }, { "epoch": 0.49, "grad_norm": 0.5354853272438049, "learning_rate": 0.0005618197688403466, "loss": 3.3357, "step": 9938 }, { "epoch": 0.49, "grad_norm": 0.5375204682350159, "learning_rate": 0.0005618122520969416, "loss": 3.2745, "step": 9939 }, { "epoch": 0.49, "grad_norm": 0.5439879894256592, "learning_rate": 0.0005618047346639735, "loss": 3.3741, "step": 9940 }, { "epoch": 0.49, "grad_norm": 0.5084054470062256, "learning_rate": 0.0005617972165414621, "loss": 3.4072, "step": 9941 }, { "epoch": 0.49, "grad_norm": 0.5047259330749512, "learning_rate": 0.0005617896977294271, "loss": 3.1825, "step": 9942 }, { "epoch": 0.49, "grad_norm": 0.5263615846633911, "learning_rate": 0.0005617821782278886, "loss": 3.3154, "step": 9943 }, { "epoch": 0.49, "grad_norm": 0.5237258076667786, "learning_rate": 0.0005617746580368661, "loss": 3.3176, "step": 9944 }, { "epoch": 0.49, "grad_norm": 0.535914957523346, "learning_rate": 0.0005617671371563797, "loss": 3.2534, "step": 9945 }, { "epoch": 0.49, "grad_norm": 0.5265949368476868, "learning_rate": 0.0005617596155864489, "loss": 3.3145, "step": 9946 }, { "epoch": 0.49, "grad_norm": 0.49210605025291443, "learning_rate": 0.0005617520933270937, "loss": 3.4768, "step": 9947 }, { "epoch": 0.49, "grad_norm": 0.5354414582252502, "learning_rate": 0.0005617445703783337, "loss": 3.0916, "step": 9948 }, { "epoch": 0.49, "grad_norm": 0.525729775428772, "learning_rate": 0.0005617370467401891, "loss": 3.5307, "step": 9949 }, { "epoch": 0.49, "grad_norm": 0.5057360529899597, "learning_rate": 0.0005617295224126794, "loss": 3.1895, "step": 9950 }, { "epoch": 0.49, "grad_norm": 0.5312747955322266, "learning_rate": 0.0005617219973958244, "loss": 3.3153, "step": 9951 }, { "epoch": 0.49, "grad_norm": 0.5353907942771912, "learning_rate": 0.0005617144716896441, "loss": 3.3866, "step": 9952 }, { "epoch": 0.49, "grad_norm": 0.4996834397315979, "learning_rate": 0.0005617069452941584, "loss": 3.0625, "step": 9953 }, { "epoch": 0.49, "grad_norm": 0.5687936544418335, "learning_rate": 0.0005616994182093869, "loss": 3.1249, "step": 9954 }, { "epoch": 0.49, "grad_norm": 0.5160571336746216, "learning_rate": 0.0005616918904353494, "loss": 3.2989, "step": 9955 }, { "epoch": 0.49, "grad_norm": 0.5176610350608826, "learning_rate": 0.0005616843619720658, "loss": 3.2189, "step": 9956 }, { "epoch": 0.49, "grad_norm": 0.5297330021858215, "learning_rate": 0.000561676832819556, "loss": 3.2166, "step": 9957 }, { "epoch": 0.49, "grad_norm": 0.5623044371604919, "learning_rate": 0.00056166930297784, "loss": 3.4027, "step": 9958 }, { "epoch": 0.49, "grad_norm": 0.5169060230255127, "learning_rate": 0.0005616617724469371, "loss": 3.3397, "step": 9959 }, { "epoch": 0.49, "grad_norm": 0.5446101427078247, "learning_rate": 0.0005616542412268677, "loss": 3.3629, "step": 9960 }, { "epoch": 0.49, "grad_norm": 0.5260056853294373, "learning_rate": 0.0005616467093176511, "loss": 3.0683, "step": 9961 }, { "epoch": 0.49, "grad_norm": 0.5235419869422913, "learning_rate": 0.0005616391767193077, "loss": 3.1902, "step": 9962 }, { "epoch": 0.49, "grad_norm": 0.514287531375885, "learning_rate": 0.0005616316434318569, "loss": 3.2299, "step": 9963 }, { "epoch": 0.49, "grad_norm": 0.5356868505477905, "learning_rate": 0.0005616241094553188, "loss": 3.4199, "step": 9964 }, { "epoch": 0.49, "grad_norm": 0.5147073864936829, "learning_rate": 0.000561616574789713, "loss": 3.2718, "step": 9965 }, { "epoch": 0.49, "grad_norm": 0.5343377590179443, "learning_rate": 0.0005616090394350596, "loss": 3.2803, "step": 9966 }, { "epoch": 0.49, "grad_norm": 0.5024563670158386, "learning_rate": 0.0005616015033913784, "loss": 3.204, "step": 9967 }, { "epoch": 0.49, "grad_norm": 0.5427229404449463, "learning_rate": 0.0005615939666586891, "loss": 3.395, "step": 9968 }, { "epoch": 0.49, "grad_norm": 0.5278394222259521, "learning_rate": 0.0005615864292370116, "loss": 3.07, "step": 9969 }, { "epoch": 0.49, "grad_norm": 0.5011979937553406, "learning_rate": 0.0005615788911263659, "loss": 3.3395, "step": 9970 }, { "epoch": 0.49, "grad_norm": 0.5226938724517822, "learning_rate": 0.0005615713523267716, "loss": 3.1389, "step": 9971 }, { "epoch": 0.49, "grad_norm": 0.5529884696006775, "learning_rate": 0.0005615638128382488, "loss": 3.2872, "step": 9972 }, { "epoch": 0.49, "grad_norm": 0.5180261731147766, "learning_rate": 0.0005615562726608173, "loss": 2.9213, "step": 9973 }, { "epoch": 0.49, "grad_norm": 0.49697425961494446, "learning_rate": 0.0005615487317944969, "loss": 3.3555, "step": 9974 }, { "epoch": 0.49, "grad_norm": 0.5160676836967468, "learning_rate": 0.0005615411902393073, "loss": 3.175, "step": 9975 }, { "epoch": 0.49, "grad_norm": 0.49578121304512024, "learning_rate": 0.0005615336479952687, "loss": 3.1123, "step": 9976 }, { "epoch": 0.49, "grad_norm": 0.5369651317596436, "learning_rate": 0.0005615261050624007, "loss": 3.2317, "step": 9977 }, { "epoch": 0.49, "grad_norm": 0.557094156742096, "learning_rate": 0.0005615185614407234, "loss": 3.2406, "step": 9978 }, { "epoch": 0.49, "grad_norm": 0.5667169094085693, "learning_rate": 0.0005615110171302565, "loss": 3.2702, "step": 9979 }, { "epoch": 0.49, "grad_norm": 0.5118554830551147, "learning_rate": 0.0005615034721310199, "loss": 3.2947, "step": 9980 }, { "epoch": 0.49, "grad_norm": 0.4968426823616028, "learning_rate": 0.0005614959264430335, "loss": 3.251, "step": 9981 }, { "epoch": 0.49, "grad_norm": 0.5141085386276245, "learning_rate": 0.000561488380066317, "loss": 3.1599, "step": 9982 }, { "epoch": 0.49, "grad_norm": 0.49446555972099304, "learning_rate": 0.0005614808330008906, "loss": 3.3511, "step": 9983 }, { "epoch": 0.49, "grad_norm": 0.5145710110664368, "learning_rate": 0.0005614732852467741, "loss": 3.2855, "step": 9984 }, { "epoch": 0.49, "grad_norm": 0.5225349068641663, "learning_rate": 0.0005614657368039871, "loss": 3.3159, "step": 9985 }, { "epoch": 0.49, "grad_norm": 0.5085193514823914, "learning_rate": 0.0005614581876725497, "loss": 3.1745, "step": 9986 }, { "epoch": 0.49, "grad_norm": 0.5428699851036072, "learning_rate": 0.0005614506378524818, "loss": 3.2747, "step": 9987 }, { "epoch": 0.49, "grad_norm": 0.495169073343277, "learning_rate": 0.0005614430873438032, "loss": 3.3072, "step": 9988 }, { "epoch": 0.49, "grad_norm": 0.558083176612854, "learning_rate": 0.0005614355361465338, "loss": 3.3659, "step": 9989 }, { "epoch": 0.49, "grad_norm": 0.5748363733291626, "learning_rate": 0.0005614279842606936, "loss": 3.3423, "step": 9990 }, { "epoch": 0.49, "grad_norm": 0.5683549046516418, "learning_rate": 0.0005614204316863023, "loss": 3.1963, "step": 9991 }, { "epoch": 0.49, "grad_norm": 0.576992392539978, "learning_rate": 0.00056141287842338, "loss": 3.1108, "step": 9992 }, { "epoch": 0.49, "grad_norm": 0.545221209526062, "learning_rate": 0.0005614053244719464, "loss": 3.2083, "step": 9993 }, { "epoch": 0.49, "grad_norm": 0.5298340320587158, "learning_rate": 0.0005613977698320215, "loss": 3.231, "step": 9994 }, { "epoch": 0.49, "grad_norm": 0.5359015464782715, "learning_rate": 0.0005613902145036253, "loss": 3.2543, "step": 9995 }, { "epoch": 0.49, "grad_norm": 0.518440842628479, "learning_rate": 0.0005613826584867775, "loss": 3.2654, "step": 9996 }, { "epoch": 0.49, "grad_norm": 0.5107799768447876, "learning_rate": 0.000561375101781498, "loss": 3.2014, "step": 9997 }, { "epoch": 0.49, "grad_norm": 0.5043929219245911, "learning_rate": 0.0005613675443878069, "loss": 3.167, "step": 9998 }, { "epoch": 0.49, "grad_norm": 0.5088926553726196, "learning_rate": 0.0005613599863057238, "loss": 3.1162, "step": 9999 }, { "epoch": 0.49, "grad_norm": 0.5218754410743713, "learning_rate": 0.000561352427535269, "loss": 3.3379, "step": 10000 }, { "epoch": 0.49, "grad_norm": 0.521481990814209, "learning_rate": 0.0005613448680764621, "loss": 3.3661, "step": 10001 }, { "epoch": 0.49, "grad_norm": 0.5064173936843872, "learning_rate": 0.0005613373079293232, "loss": 3.3448, "step": 10002 }, { "epoch": 0.49, "grad_norm": 0.5323746204376221, "learning_rate": 0.0005613297470938721, "loss": 3.0224, "step": 10003 }, { "epoch": 0.49, "grad_norm": 0.5698615312576294, "learning_rate": 0.0005613221855701287, "loss": 3.3162, "step": 10004 }, { "epoch": 0.49, "grad_norm": 0.5549126267433167, "learning_rate": 0.000561314623358113, "loss": 2.9439, "step": 10005 }, { "epoch": 0.49, "grad_norm": 0.5148659944534302, "learning_rate": 0.0005613070604578448, "loss": 3.1785, "step": 10006 }, { "epoch": 0.49, "grad_norm": 0.5279960632324219, "learning_rate": 0.0005612994968693442, "loss": 3.1706, "step": 10007 }, { "epoch": 0.49, "grad_norm": 0.5391876697540283, "learning_rate": 0.0005612919325926308, "loss": 3.3005, "step": 10008 }, { "epoch": 0.49, "grad_norm": 0.5231080651283264, "learning_rate": 0.0005612843676277249, "loss": 3.3432, "step": 10009 }, { "epoch": 0.49, "grad_norm": 0.5668114423751831, "learning_rate": 0.0005612768019746464, "loss": 3.3572, "step": 10010 }, { "epoch": 0.49, "grad_norm": 0.5466375946998596, "learning_rate": 0.0005612692356334149, "loss": 3.2859, "step": 10011 }, { "epoch": 0.49, "grad_norm": 0.5328760147094727, "learning_rate": 0.0005612616686040505, "loss": 3.1786, "step": 10012 }, { "epoch": 0.49, "grad_norm": 0.5143555998802185, "learning_rate": 0.0005612541008865733, "loss": 3.2706, "step": 10013 }, { "epoch": 0.49, "grad_norm": 0.5146706104278564, "learning_rate": 0.0005612465324810029, "loss": 3.4123, "step": 10014 }, { "epoch": 0.49, "grad_norm": 0.4874396324157715, "learning_rate": 0.0005612389633873594, "loss": 3.3607, "step": 10015 }, { "epoch": 0.49, "grad_norm": 0.5126667022705078, "learning_rate": 0.0005612313936056629, "loss": 3.2169, "step": 10016 }, { "epoch": 0.49, "grad_norm": 0.5450606346130371, "learning_rate": 0.0005612238231359331, "loss": 3.3098, "step": 10017 }, { "epoch": 0.49, "grad_norm": 0.5393866896629333, "learning_rate": 0.0005612162519781901, "loss": 3.3113, "step": 10018 }, { "epoch": 0.49, "grad_norm": 0.5736554265022278, "learning_rate": 0.0005612086801324536, "loss": 3.3441, "step": 10019 }, { "epoch": 0.49, "grad_norm": 0.5407893061637878, "learning_rate": 0.0005612011075987439, "loss": 3.3276, "step": 10020 }, { "epoch": 0.49, "grad_norm": 0.5435847043991089, "learning_rate": 0.0005611935343770806, "loss": 3.1054, "step": 10021 }, { "epoch": 0.49, "grad_norm": 0.5143806338310242, "learning_rate": 0.0005611859604674839, "loss": 3.2321, "step": 10022 }, { "epoch": 0.49, "grad_norm": 0.5631576776504517, "learning_rate": 0.0005611783858699736, "loss": 3.2555, "step": 10023 }, { "epoch": 0.49, "grad_norm": 0.5366818904876709, "learning_rate": 0.0005611708105845697, "loss": 3.1924, "step": 10024 }, { "epoch": 0.49, "grad_norm": 0.5211443901062012, "learning_rate": 0.0005611632346112921, "loss": 3.3326, "step": 10025 }, { "epoch": 0.49, "grad_norm": 0.5157060623168945, "learning_rate": 0.000561155657950161, "loss": 3.3635, "step": 10026 }, { "epoch": 0.49, "grad_norm": 0.5200645327568054, "learning_rate": 0.0005611480806011959, "loss": 3.3153, "step": 10027 }, { "epoch": 0.49, "grad_norm": 0.5269324779510498, "learning_rate": 0.0005611405025644171, "loss": 3.2965, "step": 10028 }, { "epoch": 0.49, "grad_norm": 0.4967326521873474, "learning_rate": 0.0005611329238398446, "loss": 3.1998, "step": 10029 }, { "epoch": 0.49, "grad_norm": 0.5370751619338989, "learning_rate": 0.0005611253444274981, "loss": 3.2197, "step": 10030 }, { "epoch": 0.49, "grad_norm": 0.581263542175293, "learning_rate": 0.0005611177643273977, "loss": 3.2003, "step": 10031 }, { "epoch": 0.49, "grad_norm": 0.5175623297691345, "learning_rate": 0.0005611101835395634, "loss": 3.2288, "step": 10032 }, { "epoch": 0.49, "grad_norm": 0.5144805312156677, "learning_rate": 0.0005611026020640151, "loss": 3.2167, "step": 10033 }, { "epoch": 0.49, "grad_norm": 0.5305891633033752, "learning_rate": 0.0005610950199007728, "loss": 3.3823, "step": 10034 }, { "epoch": 0.49, "grad_norm": 0.5469315052032471, "learning_rate": 0.0005610874370498566, "loss": 3.0891, "step": 10035 }, { "epoch": 0.49, "grad_norm": 0.54054856300354, "learning_rate": 0.0005610798535112862, "loss": 3.3481, "step": 10036 }, { "epoch": 0.49, "grad_norm": 0.5348857641220093, "learning_rate": 0.0005610722692850817, "loss": 3.3687, "step": 10037 }, { "epoch": 0.49, "grad_norm": 0.5133238434791565, "learning_rate": 0.0005610646843712631, "loss": 3.3272, "step": 10038 }, { "epoch": 0.49, "grad_norm": 0.5264869928359985, "learning_rate": 0.0005610570987698504, "loss": 3.1792, "step": 10039 }, { "epoch": 0.49, "grad_norm": 0.5270009636878967, "learning_rate": 0.0005610495124808635, "loss": 3.1658, "step": 10040 }, { "epoch": 0.49, "grad_norm": 0.5224774479866028, "learning_rate": 0.0005610419255043225, "loss": 3.1901, "step": 10041 }, { "epoch": 0.49, "grad_norm": 0.5436100363731384, "learning_rate": 0.0005610343378402473, "loss": 3.2561, "step": 10042 }, { "epoch": 0.49, "grad_norm": 0.5261750817298889, "learning_rate": 0.0005610267494886578, "loss": 3.3767, "step": 10043 }, { "epoch": 0.49, "grad_norm": 0.5026256442070007, "learning_rate": 0.0005610191604495741, "loss": 3.0808, "step": 10044 }, { "epoch": 0.49, "grad_norm": 0.5132651329040527, "learning_rate": 0.0005610115707230162, "loss": 3.2861, "step": 10045 }, { "epoch": 0.49, "grad_norm": 0.5228791236877441, "learning_rate": 0.0005610039803090041, "loss": 3.1605, "step": 10046 }, { "epoch": 0.49, "grad_norm": 0.5274104475975037, "learning_rate": 0.0005609963892075577, "loss": 3.2428, "step": 10047 }, { "epoch": 0.49, "grad_norm": 0.53509920835495, "learning_rate": 0.0005609887974186969, "loss": 3.4265, "step": 10048 }, { "epoch": 0.49, "grad_norm": 0.5528433918952942, "learning_rate": 0.000560981204942442, "loss": 3.2642, "step": 10049 }, { "epoch": 0.49, "grad_norm": 0.5362628102302551, "learning_rate": 0.0005609736117788129, "loss": 3.0798, "step": 10050 }, { "epoch": 0.49, "grad_norm": 0.5504554510116577, "learning_rate": 0.0005609660179278295, "loss": 3.1879, "step": 10051 }, { "epoch": 0.49, "grad_norm": 0.5313317179679871, "learning_rate": 0.0005609584233895117, "loss": 3.3346, "step": 10052 }, { "epoch": 0.49, "grad_norm": 0.5195586085319519, "learning_rate": 0.0005609508281638798, "loss": 3.3072, "step": 10053 }, { "epoch": 0.49, "grad_norm": 0.5247373580932617, "learning_rate": 0.0005609432322509534, "loss": 3.222, "step": 10054 }, { "epoch": 0.49, "grad_norm": 0.5279762744903564, "learning_rate": 0.0005609356356507529, "loss": 3.2947, "step": 10055 }, { "epoch": 0.49, "grad_norm": 0.509883463382721, "learning_rate": 0.0005609280383632981, "loss": 3.4618, "step": 10056 }, { "epoch": 0.49, "grad_norm": 0.5072588324546814, "learning_rate": 0.0005609204403886092, "loss": 3.0462, "step": 10057 }, { "epoch": 0.49, "grad_norm": 0.5225916504859924, "learning_rate": 0.000560912841726706, "loss": 3.3883, "step": 10058 }, { "epoch": 0.49, "grad_norm": 0.5001477599143982, "learning_rate": 0.0005609052423776085, "loss": 3.3117, "step": 10059 }, { "epoch": 0.49, "grad_norm": 0.5187198519706726, "learning_rate": 0.0005608976423413369, "loss": 3.2966, "step": 10060 }, { "epoch": 0.49, "grad_norm": 0.5253342390060425, "learning_rate": 0.0005608900416179111, "loss": 3.3214, "step": 10061 }, { "epoch": 0.49, "grad_norm": 0.5546568632125854, "learning_rate": 0.0005608824402073512, "loss": 3.4283, "step": 10062 }, { "epoch": 0.49, "grad_norm": 0.5639400482177734, "learning_rate": 0.0005608748381096771, "loss": 3.3312, "step": 10063 }, { "epoch": 0.49, "grad_norm": 0.5478060245513916, "learning_rate": 0.0005608672353249089, "loss": 3.4401, "step": 10064 }, { "epoch": 0.49, "grad_norm": 0.5374152660369873, "learning_rate": 0.0005608596318530665, "loss": 3.4056, "step": 10065 }, { "epoch": 0.49, "grad_norm": 0.5391674041748047, "learning_rate": 0.0005608520276941701, "loss": 3.2503, "step": 10066 }, { "epoch": 0.49, "grad_norm": 0.504303514957428, "learning_rate": 0.0005608444228482398, "loss": 3.4028, "step": 10067 }, { "epoch": 0.49, "grad_norm": 0.5163530111312866, "learning_rate": 0.0005608368173152953, "loss": 3.2145, "step": 10068 }, { "epoch": 0.49, "grad_norm": 0.5681028366088867, "learning_rate": 0.0005608292110953569, "loss": 3.2051, "step": 10069 }, { "epoch": 0.49, "grad_norm": 0.5483840703964233, "learning_rate": 0.0005608216041884446, "loss": 3.1229, "step": 10070 }, { "epoch": 0.49, "grad_norm": 0.5046508312225342, "learning_rate": 0.0005608139965945783, "loss": 3.1301, "step": 10071 }, { "epoch": 0.49, "grad_norm": 0.5168729424476624, "learning_rate": 0.0005608063883137782, "loss": 3.4912, "step": 10072 }, { "epoch": 0.49, "grad_norm": 0.5135037302970886, "learning_rate": 0.0005607987793460642, "loss": 3.4096, "step": 10073 }, { "epoch": 0.49, "grad_norm": 0.524517834186554, "learning_rate": 0.0005607911696914565, "loss": 3.1052, "step": 10074 }, { "epoch": 0.49, "grad_norm": 0.4915313422679901, "learning_rate": 0.0005607835593499749, "loss": 3.2567, "step": 10075 }, { "epoch": 0.49, "grad_norm": 0.503527045249939, "learning_rate": 0.0005607759483216398, "loss": 3.0298, "step": 10076 }, { "epoch": 0.49, "grad_norm": 0.5238627791404724, "learning_rate": 0.0005607683366064709, "loss": 3.1529, "step": 10077 }, { "epoch": 0.49, "grad_norm": 0.5229913592338562, "learning_rate": 0.0005607607242044884, "loss": 3.3084, "step": 10078 }, { "epoch": 0.49, "grad_norm": 0.5081816911697388, "learning_rate": 0.0005607531111157123, "loss": 3.2387, "step": 10079 }, { "epoch": 0.49, "grad_norm": 0.5463985800743103, "learning_rate": 0.0005607454973401627, "loss": 3.1542, "step": 10080 }, { "epoch": 0.49, "grad_norm": 0.5712737441062927, "learning_rate": 0.0005607378828778598, "loss": 3.3672, "step": 10081 }, { "epoch": 0.49, "grad_norm": 0.48129820823669434, "learning_rate": 0.0005607302677288233, "loss": 3.5522, "step": 10082 }, { "epoch": 0.49, "grad_norm": 0.5331260561943054, "learning_rate": 0.0005607226518930735, "loss": 3.2012, "step": 10083 }, { "epoch": 0.49, "grad_norm": 0.5193986296653748, "learning_rate": 0.0005607150353706305, "loss": 3.3374, "step": 10084 }, { "epoch": 0.49, "grad_norm": 0.5116517543792725, "learning_rate": 0.0005607074181615141, "loss": 3.2151, "step": 10085 }, { "epoch": 0.49, "grad_norm": 0.5041857957839966, "learning_rate": 0.0005606998002657447, "loss": 3.2215, "step": 10086 }, { "epoch": 0.49, "grad_norm": 0.5375064015388489, "learning_rate": 0.0005606921816833422, "loss": 3.3488, "step": 10087 }, { "epoch": 0.49, "grad_norm": 0.5213835835456848, "learning_rate": 0.0005606845624143266, "loss": 3.346, "step": 10088 }, { "epoch": 0.49, "grad_norm": 0.5121163725852966, "learning_rate": 0.000560676942458718, "loss": 3.0301, "step": 10089 }, { "epoch": 0.49, "grad_norm": 0.5614414811134338, "learning_rate": 0.0005606693218165366, "loss": 3.2334, "step": 10090 }, { "epoch": 0.49, "grad_norm": 0.5325416326522827, "learning_rate": 0.0005606617004878024, "loss": 3.3862, "step": 10091 }, { "epoch": 0.49, "grad_norm": 0.5521125793457031, "learning_rate": 0.0005606540784725353, "loss": 3.3762, "step": 10092 }, { "epoch": 0.49, "grad_norm": 0.5565793514251709, "learning_rate": 0.0005606464557707555, "loss": 3.1573, "step": 10093 }, { "epoch": 0.49, "grad_norm": 0.5189270973205566, "learning_rate": 0.0005606388323824832, "loss": 3.4819, "step": 10094 }, { "epoch": 0.49, "grad_norm": 0.5117472410202026, "learning_rate": 0.0005606312083077384, "loss": 3.2461, "step": 10095 }, { "epoch": 0.49, "grad_norm": 0.5243008732795715, "learning_rate": 0.0005606235835465412, "loss": 3.0837, "step": 10096 }, { "epoch": 0.49, "grad_norm": 0.5307744145393372, "learning_rate": 0.0005606159580989115, "loss": 3.2632, "step": 10097 }, { "epoch": 0.49, "grad_norm": 0.5260905027389526, "learning_rate": 0.0005606083319648695, "loss": 3.2347, "step": 10098 }, { "epoch": 0.49, "grad_norm": 0.5161594152450562, "learning_rate": 0.0005606007051444354, "loss": 3.214, "step": 10099 }, { "epoch": 0.49, "grad_norm": 0.5047897100448608, "learning_rate": 0.0005605930776376293, "loss": 3.1914, "step": 10100 }, { "epoch": 0.5, "grad_norm": 0.4992629885673523, "learning_rate": 0.000560585449444471, "loss": 3.4533, "step": 10101 }, { "epoch": 0.5, "grad_norm": 0.4943458139896393, "learning_rate": 0.0005605778205649808, "loss": 3.217, "step": 10102 }, { "epoch": 0.5, "grad_norm": 0.5585146546363831, "learning_rate": 0.0005605701909991789, "loss": 3.0695, "step": 10103 }, { "epoch": 0.5, "grad_norm": 0.5517671704292297, "learning_rate": 0.0005605625607470851, "loss": 3.0032, "step": 10104 }, { "epoch": 0.5, "grad_norm": 0.5213403105735779, "learning_rate": 0.0005605549298087197, "loss": 3.3822, "step": 10105 }, { "epoch": 0.5, "grad_norm": 0.5177978277206421, "learning_rate": 0.0005605472981841028, "loss": 3.4062, "step": 10106 }, { "epoch": 0.5, "grad_norm": 0.5317561030387878, "learning_rate": 0.0005605396658732545, "loss": 3.2718, "step": 10107 }, { "epoch": 0.5, "grad_norm": 0.5072512626647949, "learning_rate": 0.0005605320328761948, "loss": 3.3612, "step": 10108 }, { "epoch": 0.5, "grad_norm": 0.519949734210968, "learning_rate": 0.000560524399192944, "loss": 3.4358, "step": 10109 }, { "epoch": 0.5, "grad_norm": 0.4792541563510895, "learning_rate": 0.0005605167648235218, "loss": 3.2888, "step": 10110 }, { "epoch": 0.5, "grad_norm": 0.5226483345031738, "learning_rate": 0.0005605091297679488, "loss": 3.1335, "step": 10111 }, { "epoch": 0.5, "grad_norm": 0.5207281112670898, "learning_rate": 0.0005605014940262449, "loss": 3.3549, "step": 10112 }, { "epoch": 0.5, "grad_norm": 0.5150095224380493, "learning_rate": 0.0005604938575984301, "loss": 3.0484, "step": 10113 }, { "epoch": 0.5, "grad_norm": 0.5432024002075195, "learning_rate": 0.0005604862204845245, "loss": 3.1677, "step": 10114 }, { "epoch": 0.5, "grad_norm": 0.5010020732879639, "learning_rate": 0.0005604785826845486, "loss": 3.2577, "step": 10115 }, { "epoch": 0.5, "grad_norm": 0.49979767203330994, "learning_rate": 0.0005604709441985222, "loss": 3.3097, "step": 10116 }, { "epoch": 0.5, "grad_norm": 0.5717746615409851, "learning_rate": 0.0005604633050264652, "loss": 3.1078, "step": 10117 }, { "epoch": 0.5, "grad_norm": 0.530775785446167, "learning_rate": 0.0005604556651683982, "loss": 3.0593, "step": 10118 }, { "epoch": 0.5, "grad_norm": 0.5330826640129089, "learning_rate": 0.000560448024624341, "loss": 3.3647, "step": 10119 }, { "epoch": 0.5, "grad_norm": 0.4870454668998718, "learning_rate": 0.000560440383394314, "loss": 3.3083, "step": 10120 }, { "epoch": 0.5, "grad_norm": 0.5176739692687988, "learning_rate": 0.000560432741478337, "loss": 3.3033, "step": 10121 }, { "epoch": 0.5, "grad_norm": 0.5079305768013, "learning_rate": 0.0005604250988764303, "loss": 3.3905, "step": 10122 }, { "epoch": 0.5, "grad_norm": 0.5102031826972961, "learning_rate": 0.000560417455588614, "loss": 3.2952, "step": 10123 }, { "epoch": 0.5, "grad_norm": 0.5234565138816833, "learning_rate": 0.0005604098116149083, "loss": 3.4796, "step": 10124 }, { "epoch": 0.5, "grad_norm": 0.48953551054000854, "learning_rate": 0.0005604021669553332, "loss": 3.135, "step": 10125 }, { "epoch": 0.5, "grad_norm": 0.5201693773269653, "learning_rate": 0.000560394521609909, "loss": 3.2396, "step": 10126 }, { "epoch": 0.5, "grad_norm": 0.5162103176116943, "learning_rate": 0.0005603868755786557, "loss": 3.3356, "step": 10127 }, { "epoch": 0.5, "grad_norm": 0.535090982913971, "learning_rate": 0.0005603792288615935, "loss": 3.2915, "step": 10128 }, { "epoch": 0.5, "grad_norm": 0.5197641253471375, "learning_rate": 0.0005603715814587425, "loss": 3.1518, "step": 10129 }, { "epoch": 0.5, "grad_norm": 0.5615101456642151, "learning_rate": 0.0005603639333701228, "loss": 3.3298, "step": 10130 }, { "epoch": 0.5, "grad_norm": 0.5671878457069397, "learning_rate": 0.0005603562845957548, "loss": 3.2195, "step": 10131 }, { "epoch": 0.5, "grad_norm": 0.654164731502533, "learning_rate": 0.0005603486351356582, "loss": 3.2873, "step": 10132 }, { "epoch": 0.5, "grad_norm": 0.5772500038146973, "learning_rate": 0.0005603409849898535, "loss": 3.2613, "step": 10133 }, { "epoch": 0.5, "grad_norm": 0.5550999641418457, "learning_rate": 0.0005603333341583607, "loss": 3.2879, "step": 10134 }, { "epoch": 0.5, "grad_norm": 0.49901139736175537, "learning_rate": 0.0005603256826412002, "loss": 3.1976, "step": 10135 }, { "epoch": 0.5, "grad_norm": 0.5248491764068604, "learning_rate": 0.0005603180304383917, "loss": 3.4905, "step": 10136 }, { "epoch": 0.5, "grad_norm": 0.5227196216583252, "learning_rate": 0.0005603103775499559, "loss": 3.3214, "step": 10137 }, { "epoch": 0.5, "grad_norm": 0.5142403841018677, "learning_rate": 0.0005603027239759124, "loss": 3.1934, "step": 10138 }, { "epoch": 0.5, "grad_norm": 0.5276763439178467, "learning_rate": 0.0005602950697162817, "loss": 3.1553, "step": 10139 }, { "epoch": 0.5, "grad_norm": 0.4911149740219116, "learning_rate": 0.0005602874147710838, "loss": 3.3715, "step": 10140 }, { "epoch": 0.5, "grad_norm": 0.5246410965919495, "learning_rate": 0.0005602797591403391, "loss": 3.1999, "step": 10141 }, { "epoch": 0.5, "grad_norm": 0.5695706605911255, "learning_rate": 0.0005602721028240675, "loss": 3.4837, "step": 10142 }, { "epoch": 0.5, "grad_norm": 0.5213264226913452, "learning_rate": 0.0005602644458222893, "loss": 3.258, "step": 10143 }, { "epoch": 0.5, "grad_norm": 0.5553370714187622, "learning_rate": 0.0005602567881350248, "loss": 3.3955, "step": 10144 }, { "epoch": 0.5, "grad_norm": 0.5400976538658142, "learning_rate": 0.0005602491297622939, "loss": 3.2611, "step": 10145 }, { "epoch": 0.5, "grad_norm": 0.5228300094604492, "learning_rate": 0.0005602414707041168, "loss": 3.2218, "step": 10146 }, { "epoch": 0.5, "grad_norm": 0.5486548542976379, "learning_rate": 0.0005602338109605137, "loss": 3.2305, "step": 10147 }, { "epoch": 0.5, "grad_norm": 0.5071641802787781, "learning_rate": 0.0005602261505315049, "loss": 3.2951, "step": 10148 }, { "epoch": 0.5, "grad_norm": 0.5746123790740967, "learning_rate": 0.0005602184894171106, "loss": 3.3812, "step": 10149 }, { "epoch": 0.5, "grad_norm": 0.5090572237968445, "learning_rate": 0.0005602108276173507, "loss": 3.2406, "step": 10150 }, { "epoch": 0.5, "grad_norm": 0.5613428354263306, "learning_rate": 0.0005602031651322457, "loss": 3.248, "step": 10151 }, { "epoch": 0.5, "grad_norm": 0.49390625953674316, "learning_rate": 0.0005601955019618156, "loss": 3.1618, "step": 10152 }, { "epoch": 0.5, "grad_norm": 0.5180720686912537, "learning_rate": 0.0005601878381060807, "loss": 3.3309, "step": 10153 }, { "epoch": 0.5, "grad_norm": 0.4854278266429901, "learning_rate": 0.0005601801735650609, "loss": 3.4998, "step": 10154 }, { "epoch": 0.5, "grad_norm": 0.539308488368988, "learning_rate": 0.0005601725083387767, "loss": 3.3767, "step": 10155 }, { "epoch": 0.5, "grad_norm": 0.5451698899269104, "learning_rate": 0.0005601648424272482, "loss": 3.0673, "step": 10156 }, { "epoch": 0.5, "grad_norm": 0.5229968428611755, "learning_rate": 0.0005601571758304957, "loss": 3.2044, "step": 10157 }, { "epoch": 0.5, "grad_norm": 0.5304858088493347, "learning_rate": 0.0005601495085485391, "loss": 3.4131, "step": 10158 }, { "epoch": 0.5, "grad_norm": 0.5545448064804077, "learning_rate": 0.0005601418405813989, "loss": 3.3089, "step": 10159 }, { "epoch": 0.5, "grad_norm": 0.47743499279022217, "learning_rate": 0.000560134171929095, "loss": 3.2294, "step": 10160 }, { "epoch": 0.5, "grad_norm": 0.5139665007591248, "learning_rate": 0.0005601265025916478, "loss": 3.2909, "step": 10161 }, { "epoch": 0.5, "grad_norm": 0.54677414894104, "learning_rate": 0.0005601188325690774, "loss": 3.0912, "step": 10162 }, { "epoch": 0.5, "grad_norm": 0.5394816994667053, "learning_rate": 0.0005601111618614041, "loss": 3.1764, "step": 10163 }, { "epoch": 0.5, "grad_norm": 0.5438968539237976, "learning_rate": 0.0005601034904686482, "loss": 3.1558, "step": 10164 }, { "epoch": 0.5, "grad_norm": 0.5133037567138672, "learning_rate": 0.0005600958183908296, "loss": 3.2835, "step": 10165 }, { "epoch": 0.5, "grad_norm": 0.5024428963661194, "learning_rate": 0.0005600881456279687, "loss": 3.1066, "step": 10166 }, { "epoch": 0.5, "grad_norm": 0.5286892652511597, "learning_rate": 0.0005600804721800857, "loss": 3.1596, "step": 10167 }, { "epoch": 0.5, "grad_norm": 0.5346733927726746, "learning_rate": 0.0005600727980472009, "loss": 3.4603, "step": 10168 }, { "epoch": 0.5, "grad_norm": 0.51430344581604, "learning_rate": 0.0005600651232293343, "loss": 3.3469, "step": 10169 }, { "epoch": 0.5, "grad_norm": 0.4911240041255951, "learning_rate": 0.0005600574477265062, "loss": 3.4604, "step": 10170 }, { "epoch": 0.5, "grad_norm": 0.5240756273269653, "learning_rate": 0.0005600497715387368, "loss": 3.4024, "step": 10171 }, { "epoch": 0.5, "grad_norm": 0.5592271685600281, "learning_rate": 0.0005600420946660464, "loss": 2.9897, "step": 10172 }, { "epoch": 0.5, "grad_norm": 0.49334201216697693, "learning_rate": 0.0005600344171084551, "loss": 3.098, "step": 10173 }, { "epoch": 0.5, "grad_norm": 0.5433202385902405, "learning_rate": 0.0005600267388659833, "loss": 3.0928, "step": 10174 }, { "epoch": 0.5, "grad_norm": 0.5227448344230652, "learning_rate": 0.0005600190599386511, "loss": 3.0703, "step": 10175 }, { "epoch": 0.5, "grad_norm": 0.5543997287750244, "learning_rate": 0.0005600113803264787, "loss": 3.0732, "step": 10176 }, { "epoch": 0.5, "grad_norm": 0.4942059814929962, "learning_rate": 0.0005600037000294863, "loss": 3.2383, "step": 10177 }, { "epoch": 0.5, "grad_norm": 0.5727730393409729, "learning_rate": 0.0005599960190476943, "loss": 3.1809, "step": 10178 }, { "epoch": 0.5, "grad_norm": 0.5260288119316101, "learning_rate": 0.0005599883373811228, "loss": 3.3584, "step": 10179 }, { "epoch": 0.5, "grad_norm": 0.5285677313804626, "learning_rate": 0.0005599806550297921, "loss": 3.2441, "step": 10180 }, { "epoch": 0.5, "grad_norm": 0.5523196458816528, "learning_rate": 0.0005599729719937222, "loss": 3.2402, "step": 10181 }, { "epoch": 0.5, "grad_norm": 0.5142799019813538, "learning_rate": 0.0005599652882729337, "loss": 3.1881, "step": 10182 }, { "epoch": 0.5, "grad_norm": 0.5352577567100525, "learning_rate": 0.0005599576038674466, "loss": 3.3204, "step": 10183 }, { "epoch": 0.5, "grad_norm": 0.5273776650428772, "learning_rate": 0.0005599499187772813, "loss": 3.105, "step": 10184 }, { "epoch": 0.5, "grad_norm": 0.5525439381599426, "learning_rate": 0.0005599422330024578, "loss": 3.2628, "step": 10185 }, { "epoch": 0.5, "grad_norm": 0.5069778561592102, "learning_rate": 0.0005599345465429965, "loss": 3.0637, "step": 10186 }, { "epoch": 0.5, "grad_norm": 0.5232580900192261, "learning_rate": 0.0005599268593989177, "loss": 3.1459, "step": 10187 }, { "epoch": 0.5, "grad_norm": 0.5141770839691162, "learning_rate": 0.0005599191715702416, "loss": 3.2485, "step": 10188 }, { "epoch": 0.5, "grad_norm": 0.5252525210380554, "learning_rate": 0.0005599114830569884, "loss": 3.5268, "step": 10189 }, { "epoch": 0.5, "grad_norm": 0.552005410194397, "learning_rate": 0.0005599037938591782, "loss": 3.3599, "step": 10190 }, { "epoch": 0.5, "grad_norm": 0.4965636134147644, "learning_rate": 0.0005598961039768316, "loss": 3.1345, "step": 10191 }, { "epoch": 0.5, "grad_norm": 0.5195544362068176, "learning_rate": 0.0005598884134099687, "loss": 3.2845, "step": 10192 }, { "epoch": 0.5, "grad_norm": 0.5534344911575317, "learning_rate": 0.0005598807221586097, "loss": 3.2648, "step": 10193 }, { "epoch": 0.5, "grad_norm": 0.5047644376754761, "learning_rate": 0.0005598730302227749, "loss": 3.363, "step": 10194 }, { "epoch": 0.5, "grad_norm": 0.5294030904769897, "learning_rate": 0.0005598653376024845, "loss": 3.3243, "step": 10195 }, { "epoch": 0.5, "grad_norm": 0.4891115725040436, "learning_rate": 0.0005598576442977588, "loss": 3.1471, "step": 10196 }, { "epoch": 0.5, "grad_norm": 0.4774000942707062, "learning_rate": 0.0005598499503086182, "loss": 3.5423, "step": 10197 }, { "epoch": 0.5, "grad_norm": 0.5198843479156494, "learning_rate": 0.0005598422556350827, "loss": 3.0888, "step": 10198 }, { "epoch": 0.5, "grad_norm": 0.5236388444900513, "learning_rate": 0.0005598345602771728, "loss": 3.2707, "step": 10199 }, { "epoch": 0.5, "grad_norm": 0.5683999061584473, "learning_rate": 0.0005598268642349086, "loss": 3.4979, "step": 10200 }, { "epoch": 0.5, "grad_norm": 0.5080311298370361, "learning_rate": 0.0005598191675083105, "loss": 3.2974, "step": 10201 }, { "epoch": 0.5, "grad_norm": 0.5282857418060303, "learning_rate": 0.0005598114700973988, "loss": 3.2676, "step": 10202 }, { "epoch": 0.5, "grad_norm": 0.5361818671226501, "learning_rate": 0.0005598037720021935, "loss": 3.1697, "step": 10203 }, { "epoch": 0.5, "grad_norm": 0.5248768925666809, "learning_rate": 0.0005597960732227152, "loss": 3.2677, "step": 10204 }, { "epoch": 0.5, "grad_norm": 0.5688296556472778, "learning_rate": 0.000559788373758984, "loss": 3.2665, "step": 10205 }, { "epoch": 0.5, "grad_norm": 0.5076455473899841, "learning_rate": 0.0005597806736110202, "loss": 3.2625, "step": 10206 }, { "epoch": 0.5, "grad_norm": 0.5241527557373047, "learning_rate": 0.0005597729727788442, "loss": 3.3101, "step": 10207 }, { "epoch": 0.5, "grad_norm": 0.561117947101593, "learning_rate": 0.0005597652712624761, "loss": 3.3028, "step": 10208 }, { "epoch": 0.5, "grad_norm": 0.49607518315315247, "learning_rate": 0.0005597575690619362, "loss": 3.3582, "step": 10209 }, { "epoch": 0.5, "grad_norm": 0.5208153128623962, "learning_rate": 0.000559749866177245, "loss": 3.2614, "step": 10210 }, { "epoch": 0.5, "grad_norm": 0.47716373205184937, "learning_rate": 0.0005597421626084225, "loss": 3.0946, "step": 10211 }, { "epoch": 0.5, "grad_norm": 0.5305400490760803, "learning_rate": 0.0005597344583554893, "loss": 3.3251, "step": 10212 }, { "epoch": 0.5, "grad_norm": 0.49316057562828064, "learning_rate": 0.0005597267534184654, "loss": 3.4436, "step": 10213 }, { "epoch": 0.5, "grad_norm": 0.5044198036193848, "learning_rate": 0.0005597190477973712, "loss": 3.4866, "step": 10214 }, { "epoch": 0.5, "grad_norm": 0.5129861831665039, "learning_rate": 0.0005597113414922271, "loss": 3.1051, "step": 10215 }, { "epoch": 0.5, "grad_norm": 0.5174407362937927, "learning_rate": 0.0005597036345030532, "loss": 3.2929, "step": 10216 }, { "epoch": 0.5, "grad_norm": 0.5471522808074951, "learning_rate": 0.00055969592682987, "loss": 3.3473, "step": 10217 }, { "epoch": 0.5, "grad_norm": 0.5389727354049683, "learning_rate": 0.0005596882184726976, "loss": 3.4021, "step": 10218 }, { "epoch": 0.5, "grad_norm": 0.5008918642997742, "learning_rate": 0.0005596805094315565, "loss": 3.2045, "step": 10219 }, { "epoch": 0.5, "grad_norm": 0.5358201265335083, "learning_rate": 0.0005596727997064669, "loss": 3.5891, "step": 10220 }, { "epoch": 0.5, "grad_norm": 0.5327726602554321, "learning_rate": 0.0005596650892974492, "loss": 3.3117, "step": 10221 }, { "epoch": 0.5, "grad_norm": 0.549594521522522, "learning_rate": 0.0005596573782045236, "loss": 3.2777, "step": 10222 }, { "epoch": 0.5, "grad_norm": 0.4677071273326874, "learning_rate": 0.0005596496664277104, "loss": 3.1865, "step": 10223 }, { "epoch": 0.5, "grad_norm": 0.5113584995269775, "learning_rate": 0.0005596419539670299, "loss": 3.4198, "step": 10224 }, { "epoch": 0.5, "grad_norm": 0.5231432914733887, "learning_rate": 0.0005596342408225024, "loss": 3.2867, "step": 10225 }, { "epoch": 0.5, "grad_norm": 0.5117649435997009, "learning_rate": 0.0005596265269941485, "loss": 3.266, "step": 10226 }, { "epoch": 0.5, "grad_norm": 0.5720617771148682, "learning_rate": 0.0005596188124819881, "loss": 3.4351, "step": 10227 }, { "epoch": 0.5, "grad_norm": 0.5223726034164429, "learning_rate": 0.0005596110972860419, "loss": 3.2824, "step": 10228 }, { "epoch": 0.5, "grad_norm": 0.5558088421821594, "learning_rate": 0.0005596033814063299, "loss": 3.3895, "step": 10229 }, { "epoch": 0.5, "grad_norm": 0.5451314449310303, "learning_rate": 0.0005595956648428726, "loss": 3.3244, "step": 10230 }, { "epoch": 0.5, "grad_norm": 0.5551238059997559, "learning_rate": 0.0005595879475956902, "loss": 3.2968, "step": 10231 }, { "epoch": 0.5, "grad_norm": 0.5394368171691895, "learning_rate": 0.0005595802296648031, "loss": 3.0322, "step": 10232 }, { "epoch": 0.5, "grad_norm": 0.5566186904907227, "learning_rate": 0.0005595725110502317, "loss": 3.4952, "step": 10233 }, { "epoch": 0.5, "grad_norm": 0.508893609046936, "learning_rate": 0.0005595647917519962, "loss": 3.338, "step": 10234 }, { "epoch": 0.5, "grad_norm": 0.5071843266487122, "learning_rate": 0.0005595570717701172, "loss": 3.3411, "step": 10235 }, { "epoch": 0.5, "grad_norm": 0.5460813045501709, "learning_rate": 0.0005595493511046146, "loss": 3.3642, "step": 10236 }, { "epoch": 0.5, "grad_norm": 0.5000070929527283, "learning_rate": 0.000559541629755509, "loss": 3.4009, "step": 10237 }, { "epoch": 0.5, "grad_norm": 0.5307058691978455, "learning_rate": 0.0005595339077228207, "loss": 3.2637, "step": 10238 }, { "epoch": 0.5, "grad_norm": 0.5203930735588074, "learning_rate": 0.0005595261850065701, "loss": 3.3635, "step": 10239 }, { "epoch": 0.5, "grad_norm": 0.5376899242401123, "learning_rate": 0.0005595184616067775, "loss": 3.3816, "step": 10240 }, { "epoch": 0.5, "grad_norm": 0.661049485206604, "learning_rate": 0.0005595107375234631, "loss": 3.3454, "step": 10241 }, { "epoch": 0.5, "grad_norm": 0.505993664264679, "learning_rate": 0.0005595030127566474, "loss": 3.274, "step": 10242 }, { "epoch": 0.5, "grad_norm": 0.4986911118030548, "learning_rate": 0.0005594952873063507, "loss": 3.2775, "step": 10243 }, { "epoch": 0.5, "grad_norm": 0.5380048155784607, "learning_rate": 0.0005594875611725934, "loss": 3.2546, "step": 10244 }, { "epoch": 0.5, "grad_norm": 0.5448837280273438, "learning_rate": 0.0005594798343553959, "loss": 3.0941, "step": 10245 }, { "epoch": 0.5, "grad_norm": 0.5074895024299622, "learning_rate": 0.0005594721068547783, "loss": 3.5444, "step": 10246 }, { "epoch": 0.5, "grad_norm": 0.5164600014686584, "learning_rate": 0.0005594643786707611, "loss": 3.3968, "step": 10247 }, { "epoch": 0.5, "grad_norm": 0.49650079011917114, "learning_rate": 0.0005594566498033647, "loss": 3.4343, "step": 10248 }, { "epoch": 0.5, "grad_norm": 0.4930596351623535, "learning_rate": 0.0005594489202526094, "loss": 3.3165, "step": 10249 }, { "epoch": 0.5, "grad_norm": 0.5038466453552246, "learning_rate": 0.0005594411900185156, "loss": 3.3183, "step": 10250 }, { "epoch": 0.5, "grad_norm": 0.5192158818244934, "learning_rate": 0.0005594334591011036, "loss": 3.2953, "step": 10251 }, { "epoch": 0.5, "grad_norm": 0.5114601254463196, "learning_rate": 0.0005594257275003938, "loss": 3.4671, "step": 10252 }, { "epoch": 0.5, "grad_norm": 0.5503695011138916, "learning_rate": 0.0005594179952164067, "loss": 3.4004, "step": 10253 }, { "epoch": 0.5, "grad_norm": 0.49898719787597656, "learning_rate": 0.0005594102622491624, "loss": 3.2809, "step": 10254 }, { "epoch": 0.5, "grad_norm": 0.5273557305335999, "learning_rate": 0.0005594025285986814, "loss": 3.3543, "step": 10255 }, { "epoch": 0.5, "grad_norm": 0.5539382100105286, "learning_rate": 0.0005593947942649841, "loss": 3.2133, "step": 10256 }, { "epoch": 0.5, "grad_norm": 0.5285389423370361, "learning_rate": 0.0005593870592480908, "loss": 3.2378, "step": 10257 }, { "epoch": 0.5, "grad_norm": 0.504951000213623, "learning_rate": 0.000559379323548022, "loss": 3.3102, "step": 10258 }, { "epoch": 0.5, "grad_norm": 0.5255095362663269, "learning_rate": 0.0005593715871647978, "loss": 3.2966, "step": 10259 }, { "epoch": 0.5, "grad_norm": 0.48558107018470764, "learning_rate": 0.0005593638500984389, "loss": 3.4101, "step": 10260 }, { "epoch": 0.5, "grad_norm": 0.5173567533493042, "learning_rate": 0.0005593561123489654, "loss": 3.4406, "step": 10261 }, { "epoch": 0.5, "grad_norm": 0.5314114093780518, "learning_rate": 0.0005593483739163979, "loss": 3.244, "step": 10262 }, { "epoch": 0.5, "grad_norm": 0.502029538154602, "learning_rate": 0.0005593406348007567, "loss": 3.39, "step": 10263 }, { "epoch": 0.5, "grad_norm": 0.5283157825469971, "learning_rate": 0.0005593328950020623, "loss": 3.2418, "step": 10264 }, { "epoch": 0.5, "grad_norm": 0.5121799111366272, "learning_rate": 0.0005593251545203348, "loss": 3.2263, "step": 10265 }, { "epoch": 0.5, "grad_norm": 0.517483651638031, "learning_rate": 0.0005593174133555948, "loss": 3.3841, "step": 10266 }, { "epoch": 0.5, "grad_norm": 0.5155675411224365, "learning_rate": 0.0005593096715078626, "loss": 3.0287, "step": 10267 }, { "epoch": 0.5, "grad_norm": 0.5659499168395996, "learning_rate": 0.0005593019289771587, "loss": 3.11, "step": 10268 }, { "epoch": 0.5, "grad_norm": 0.5271885395050049, "learning_rate": 0.0005592941857635034, "loss": 3.3701, "step": 10269 }, { "epoch": 0.5, "grad_norm": 0.5610504746437073, "learning_rate": 0.0005592864418669171, "loss": 3.2141, "step": 10270 }, { "epoch": 0.5, "grad_norm": 0.5438143014907837, "learning_rate": 0.0005592786972874203, "loss": 3.1933, "step": 10271 }, { "epoch": 0.5, "grad_norm": 0.5469740033149719, "learning_rate": 0.0005592709520250331, "loss": 3.3807, "step": 10272 }, { "epoch": 0.5, "grad_norm": 0.530945360660553, "learning_rate": 0.0005592632060797762, "loss": 3.3132, "step": 10273 }, { "epoch": 0.5, "grad_norm": 0.5154049396514893, "learning_rate": 0.00055925545945167, "loss": 3.2693, "step": 10274 }, { "epoch": 0.5, "grad_norm": 0.5086045861244202, "learning_rate": 0.0005592477121407347, "loss": 3.267, "step": 10275 }, { "epoch": 0.5, "grad_norm": 0.5159934163093567, "learning_rate": 0.000559239964146991, "loss": 3.1684, "step": 10276 }, { "epoch": 0.5, "grad_norm": 0.4987911283969879, "learning_rate": 0.000559232215470459, "loss": 3.238, "step": 10277 }, { "epoch": 0.5, "grad_norm": 0.5027976632118225, "learning_rate": 0.0005592244661111592, "loss": 3.2168, "step": 10278 }, { "epoch": 0.5, "grad_norm": 0.5337111353874207, "learning_rate": 0.0005592167160691121, "loss": 3.2408, "step": 10279 }, { "epoch": 0.5, "grad_norm": 0.4724106192588806, "learning_rate": 0.000559208965344338, "loss": 3.201, "step": 10280 }, { "epoch": 0.5, "grad_norm": 0.5122838616371155, "learning_rate": 0.0005592012139368574, "loss": 3.1215, "step": 10281 }, { "epoch": 0.5, "grad_norm": 0.5199552774429321, "learning_rate": 0.0005591934618466908, "loss": 3.2236, "step": 10282 }, { "epoch": 0.5, "grad_norm": 0.506280243396759, "learning_rate": 0.0005591857090738584, "loss": 3.0693, "step": 10283 }, { "epoch": 0.5, "grad_norm": 0.5122705101966858, "learning_rate": 0.0005591779556183807, "loss": 3.2787, "step": 10284 }, { "epoch": 0.5, "grad_norm": 0.47597208619117737, "learning_rate": 0.0005591702014802782, "loss": 3.3259, "step": 10285 }, { "epoch": 0.5, "grad_norm": 0.5174579620361328, "learning_rate": 0.0005591624466595712, "loss": 3.2819, "step": 10286 }, { "epoch": 0.5, "grad_norm": 0.5105884075164795, "learning_rate": 0.0005591546911562802, "loss": 3.4321, "step": 10287 }, { "epoch": 0.5, "grad_norm": 0.5002108812332153, "learning_rate": 0.0005591469349704257, "loss": 3.2793, "step": 10288 }, { "epoch": 0.5, "grad_norm": 0.5126345157623291, "learning_rate": 0.000559139178102028, "loss": 3.4978, "step": 10289 }, { "epoch": 0.5, "grad_norm": 0.5179390907287598, "learning_rate": 0.0005591314205511075, "loss": 3.3352, "step": 10290 }, { "epoch": 0.5, "grad_norm": 0.49281585216522217, "learning_rate": 0.0005591236623176849, "loss": 3.2815, "step": 10291 }, { "epoch": 0.5, "grad_norm": 0.5143146514892578, "learning_rate": 0.0005591159034017803, "loss": 3.4356, "step": 10292 }, { "epoch": 0.5, "grad_norm": 0.5127323865890503, "learning_rate": 0.0005591081438034143, "loss": 3.2153, "step": 10293 }, { "epoch": 0.5, "grad_norm": 0.5019605755805969, "learning_rate": 0.0005591003835226073, "loss": 3.3087, "step": 10294 }, { "epoch": 0.5, "grad_norm": 0.5273397564888, "learning_rate": 0.0005590926225593798, "loss": 3.2319, "step": 10295 }, { "epoch": 0.5, "grad_norm": 0.528471052646637, "learning_rate": 0.0005590848609137521, "loss": 3.3845, "step": 10296 }, { "epoch": 0.5, "grad_norm": 0.5653498768806458, "learning_rate": 0.0005590770985857448, "loss": 3.2638, "step": 10297 }, { "epoch": 0.5, "grad_norm": 0.5153144001960754, "learning_rate": 0.0005590693355753784, "loss": 3.3797, "step": 10298 }, { "epoch": 0.5, "grad_norm": 0.5238440632820129, "learning_rate": 0.0005590615718826731, "loss": 3.3346, "step": 10299 }, { "epoch": 0.5, "grad_norm": 0.5217507481575012, "learning_rate": 0.0005590538075076494, "loss": 3.0556, "step": 10300 }, { "epoch": 0.5, "grad_norm": 0.5109837055206299, "learning_rate": 0.000559046042450328, "loss": 3.4186, "step": 10301 }, { "epoch": 0.5, "grad_norm": 0.5007416009902954, "learning_rate": 0.0005590382767107291, "loss": 3.403, "step": 10302 }, { "epoch": 0.5, "grad_norm": 0.5148555040359497, "learning_rate": 0.0005590305102888732, "loss": 3.2634, "step": 10303 }, { "epoch": 0.5, "grad_norm": 0.5685636401176453, "learning_rate": 0.0005590227431847809, "loss": 3.3305, "step": 10304 }, { "epoch": 0.51, "grad_norm": 0.5556630492210388, "learning_rate": 0.0005590149753984724, "loss": 3.3192, "step": 10305 }, { "epoch": 0.51, "grad_norm": 0.545971691608429, "learning_rate": 0.0005590072069299684, "loss": 3.1871, "step": 10306 }, { "epoch": 0.51, "grad_norm": 0.5093954801559448, "learning_rate": 0.0005589994377792893, "loss": 3.3295, "step": 10307 }, { "epoch": 0.51, "grad_norm": 0.5161018967628479, "learning_rate": 0.0005589916679464554, "loss": 3.2058, "step": 10308 }, { "epoch": 0.51, "grad_norm": 0.527915358543396, "learning_rate": 0.0005589838974314874, "loss": 3.2002, "step": 10309 }, { "epoch": 0.51, "grad_norm": 0.5070471167564392, "learning_rate": 0.0005589761262344056, "loss": 3.0882, "step": 10310 }, { "epoch": 0.51, "grad_norm": 0.4909028708934784, "learning_rate": 0.0005589683543552305, "loss": 3.2828, "step": 10311 }, { "epoch": 0.51, "grad_norm": 0.5422300696372986, "learning_rate": 0.0005589605817939826, "loss": 3.3773, "step": 10312 }, { "epoch": 0.51, "grad_norm": 0.5772907137870789, "learning_rate": 0.0005589528085506824, "loss": 3.2922, "step": 10313 }, { "epoch": 0.51, "grad_norm": 0.5355245471000671, "learning_rate": 0.0005589450346253504, "loss": 3.1342, "step": 10314 }, { "epoch": 0.51, "grad_norm": 0.5125998258590698, "learning_rate": 0.0005589372600180069, "loss": 3.4883, "step": 10315 }, { "epoch": 0.51, "grad_norm": 0.5136677622795105, "learning_rate": 0.0005589294847286726, "loss": 3.3296, "step": 10316 }, { "epoch": 0.51, "grad_norm": 0.514228105545044, "learning_rate": 0.0005589217087573678, "loss": 3.2156, "step": 10317 }, { "epoch": 0.51, "grad_norm": 0.5587817430496216, "learning_rate": 0.000558913932104113, "loss": 3.2759, "step": 10318 }, { "epoch": 0.51, "grad_norm": 0.6269665360450745, "learning_rate": 0.0005589061547689288, "loss": 3.2579, "step": 10319 }, { "epoch": 0.51, "grad_norm": 0.5368645787239075, "learning_rate": 0.0005588983767518356, "loss": 3.3245, "step": 10320 }, { "epoch": 0.51, "grad_norm": 0.5467808842658997, "learning_rate": 0.0005588905980528539, "loss": 3.272, "step": 10321 }, { "epoch": 0.51, "grad_norm": 0.5453903079032898, "learning_rate": 0.0005588828186720041, "loss": 3.2743, "step": 10322 }, { "epoch": 0.51, "grad_norm": 0.5781856179237366, "learning_rate": 0.000558875038609307, "loss": 3.2912, "step": 10323 }, { "epoch": 0.51, "grad_norm": 0.6446077227592468, "learning_rate": 0.0005588672578647827, "loss": 3.2426, "step": 10324 }, { "epoch": 0.51, "grad_norm": 0.5255220532417297, "learning_rate": 0.0005588594764384519, "loss": 3.4687, "step": 10325 }, { "epoch": 0.51, "grad_norm": 0.5455769896507263, "learning_rate": 0.0005588516943303352, "loss": 3.2468, "step": 10326 }, { "epoch": 0.51, "grad_norm": 0.5492979288101196, "learning_rate": 0.0005588439115404527, "loss": 3.4958, "step": 10327 }, { "epoch": 0.51, "grad_norm": 0.5501328110694885, "learning_rate": 0.0005588361280688252, "loss": 3.2044, "step": 10328 }, { "epoch": 0.51, "grad_norm": 0.5918759107589722, "learning_rate": 0.0005588283439154733, "loss": 3.4665, "step": 10329 }, { "epoch": 0.51, "grad_norm": 0.5502012968063354, "learning_rate": 0.0005588205590804173, "loss": 3.2085, "step": 10330 }, { "epoch": 0.51, "grad_norm": 0.49773234128952026, "learning_rate": 0.0005588127735636776, "loss": 3.3999, "step": 10331 }, { "epoch": 0.51, "grad_norm": 0.542114794254303, "learning_rate": 0.0005588049873652751, "loss": 3.321, "step": 10332 }, { "epoch": 0.51, "grad_norm": 0.5828530788421631, "learning_rate": 0.00055879720048523, "loss": 3.2922, "step": 10333 }, { "epoch": 0.51, "grad_norm": 0.5640475153923035, "learning_rate": 0.0005587894129235628, "loss": 3.1788, "step": 10334 }, { "epoch": 0.51, "grad_norm": 0.5116139650344849, "learning_rate": 0.0005587816246802941, "loss": 3.1212, "step": 10335 }, { "epoch": 0.51, "grad_norm": 0.4903753399848938, "learning_rate": 0.0005587738357554445, "loss": 3.1048, "step": 10336 }, { "epoch": 0.51, "grad_norm": 0.5303298234939575, "learning_rate": 0.0005587660461490343, "loss": 3.1958, "step": 10337 }, { "epoch": 0.51, "grad_norm": 0.5109275579452515, "learning_rate": 0.0005587582558610843, "loss": 3.3446, "step": 10338 }, { "epoch": 0.51, "grad_norm": 0.5509363412857056, "learning_rate": 0.0005587504648916147, "loss": 3.2626, "step": 10339 }, { "epoch": 0.51, "grad_norm": 0.510272204875946, "learning_rate": 0.0005587426732406463, "loss": 3.3572, "step": 10340 }, { "epoch": 0.51, "grad_norm": 0.5146096348762512, "learning_rate": 0.0005587348809081993, "loss": 3.1036, "step": 10341 }, { "epoch": 0.51, "grad_norm": 0.48640674352645874, "learning_rate": 0.0005587270878942947, "loss": 3.4293, "step": 10342 }, { "epoch": 0.51, "grad_norm": 0.5620465278625488, "learning_rate": 0.0005587192941989525, "loss": 3.2765, "step": 10343 }, { "epoch": 0.51, "grad_norm": 0.5385037064552307, "learning_rate": 0.0005587114998221935, "loss": 3.0954, "step": 10344 }, { "epoch": 0.51, "grad_norm": 0.547918975353241, "learning_rate": 0.0005587037047640383, "loss": 3.2205, "step": 10345 }, { "epoch": 0.51, "grad_norm": 0.5199856758117676, "learning_rate": 0.0005586959090245074, "loss": 3.3202, "step": 10346 }, { "epoch": 0.51, "grad_norm": 0.49615347385406494, "learning_rate": 0.000558688112603621, "loss": 3.1684, "step": 10347 }, { "epoch": 0.51, "grad_norm": 0.5756198763847351, "learning_rate": 0.0005586803155014001, "loss": 3.2479, "step": 10348 }, { "epoch": 0.51, "grad_norm": 0.5044625997543335, "learning_rate": 0.000558672517717865, "loss": 3.2301, "step": 10349 }, { "epoch": 0.51, "grad_norm": 0.5165237784385681, "learning_rate": 0.0005586647192530362, "loss": 3.1831, "step": 10350 }, { "epoch": 0.51, "grad_norm": 0.5043236613273621, "learning_rate": 0.0005586569201069344, "loss": 3.2906, "step": 10351 }, { "epoch": 0.51, "grad_norm": 0.5174095034599304, "learning_rate": 0.00055864912027958, "loss": 3.3525, "step": 10352 }, { "epoch": 0.51, "grad_norm": 0.539576530456543, "learning_rate": 0.0005586413197709936, "loss": 3.2209, "step": 10353 }, { "epoch": 0.51, "grad_norm": 0.49933311343193054, "learning_rate": 0.0005586335185811957, "loss": 3.3751, "step": 10354 }, { "epoch": 0.51, "grad_norm": 0.48638248443603516, "learning_rate": 0.0005586257167102069, "loss": 3.1653, "step": 10355 }, { "epoch": 0.51, "grad_norm": 0.5505256652832031, "learning_rate": 0.0005586179141580478, "loss": 2.9672, "step": 10356 }, { "epoch": 0.51, "grad_norm": 0.5323838591575623, "learning_rate": 0.0005586101109247389, "loss": 3.0727, "step": 10357 }, { "epoch": 0.51, "grad_norm": 0.5171400904655457, "learning_rate": 0.0005586023070103006, "loss": 3.1937, "step": 10358 }, { "epoch": 0.51, "grad_norm": 0.5195598006248474, "learning_rate": 0.0005585945024147537, "loss": 3.6333, "step": 10359 }, { "epoch": 0.51, "grad_norm": 0.49022892117500305, "learning_rate": 0.0005585866971381186, "loss": 3.1178, "step": 10360 }, { "epoch": 0.51, "grad_norm": 0.5279939770698547, "learning_rate": 0.000558578891180416, "loss": 3.2818, "step": 10361 }, { "epoch": 0.51, "grad_norm": 0.5240474343299866, "learning_rate": 0.0005585710845416663, "loss": 3.2201, "step": 10362 }, { "epoch": 0.51, "grad_norm": 0.5336095094680786, "learning_rate": 0.0005585632772218901, "loss": 3.1314, "step": 10363 }, { "epoch": 0.51, "grad_norm": 0.4804680347442627, "learning_rate": 0.000558555469221108, "loss": 3.27, "step": 10364 }, { "epoch": 0.51, "grad_norm": 0.5072349309921265, "learning_rate": 0.0005585476605393406, "loss": 3.3179, "step": 10365 }, { "epoch": 0.51, "grad_norm": 0.6415109634399414, "learning_rate": 0.0005585398511766084, "loss": 3.493, "step": 10366 }, { "epoch": 0.51, "grad_norm": 0.49252402782440186, "learning_rate": 0.0005585320411329321, "loss": 3.1779, "step": 10367 }, { "epoch": 0.51, "grad_norm": 0.5037877559661865, "learning_rate": 0.000558524230408332, "loss": 3.2647, "step": 10368 }, { "epoch": 0.51, "grad_norm": 0.5191027522087097, "learning_rate": 0.0005585164190028287, "loss": 3.3537, "step": 10369 }, { "epoch": 0.51, "grad_norm": 0.49495670199394226, "learning_rate": 0.0005585086069164432, "loss": 3.3338, "step": 10370 }, { "epoch": 0.51, "grad_norm": 0.5146889090538025, "learning_rate": 0.0005585007941491957, "loss": 3.2084, "step": 10371 }, { "epoch": 0.51, "grad_norm": 0.5505595207214355, "learning_rate": 0.0005584929807011068, "loss": 3.2504, "step": 10372 }, { "epoch": 0.51, "grad_norm": 0.4919992983341217, "learning_rate": 0.0005584851665721972, "loss": 3.2404, "step": 10373 }, { "epoch": 0.51, "grad_norm": 0.49141108989715576, "learning_rate": 0.0005584773517624873, "loss": 3.3036, "step": 10374 }, { "epoch": 0.51, "grad_norm": 0.5041980743408203, "learning_rate": 0.0005584695362719979, "loss": 3.4077, "step": 10375 }, { "epoch": 0.51, "grad_norm": 0.5285553336143494, "learning_rate": 0.0005584617201007494, "loss": 3.3163, "step": 10376 }, { "epoch": 0.51, "grad_norm": 0.5297923684120178, "learning_rate": 0.0005584539032487626, "loss": 3.0486, "step": 10377 }, { "epoch": 0.51, "grad_norm": 0.4896916151046753, "learning_rate": 0.0005584460857160578, "loss": 3.5066, "step": 10378 }, { "epoch": 0.51, "grad_norm": 0.528682291507721, "learning_rate": 0.0005584382675026558, "loss": 3.2385, "step": 10379 }, { "epoch": 0.51, "grad_norm": 0.4898264706134796, "learning_rate": 0.0005584304486085772, "loss": 3.1054, "step": 10380 }, { "epoch": 0.51, "grad_norm": 0.5090500116348267, "learning_rate": 0.0005584226290338425, "loss": 3.3509, "step": 10381 }, { "epoch": 0.51, "grad_norm": 0.5073500871658325, "learning_rate": 0.0005584148087784724, "loss": 3.526, "step": 10382 }, { "epoch": 0.51, "grad_norm": 0.5196703672409058, "learning_rate": 0.0005584069878424873, "loss": 3.2417, "step": 10383 }, { "epoch": 0.51, "grad_norm": 0.4907332956790924, "learning_rate": 0.0005583991662259082, "loss": 3.2636, "step": 10384 }, { "epoch": 0.51, "grad_norm": 0.47070619463920593, "learning_rate": 0.000558391343928755, "loss": 3.2755, "step": 10385 }, { "epoch": 0.51, "grad_norm": 0.5050802230834961, "learning_rate": 0.000558383520951049, "loss": 3.3161, "step": 10386 }, { "epoch": 0.51, "grad_norm": 0.5075258612632751, "learning_rate": 0.0005583756972928106, "loss": 3.1821, "step": 10387 }, { "epoch": 0.51, "grad_norm": 0.5390806198120117, "learning_rate": 0.0005583678729540602, "loss": 3.2685, "step": 10388 }, { "epoch": 0.51, "grad_norm": 0.5114220380783081, "learning_rate": 0.0005583600479348185, "loss": 3.1876, "step": 10389 }, { "epoch": 0.51, "grad_norm": 0.5140728950500488, "learning_rate": 0.0005583522222351062, "loss": 3.2796, "step": 10390 }, { "epoch": 0.51, "grad_norm": 0.5219741463661194, "learning_rate": 0.0005583443958549439, "loss": 3.3553, "step": 10391 }, { "epoch": 0.51, "grad_norm": 0.5452612042427063, "learning_rate": 0.0005583365687943521, "loss": 3.4334, "step": 10392 }, { "epoch": 0.51, "grad_norm": 0.6287804841995239, "learning_rate": 0.0005583287410533516, "loss": 3.1848, "step": 10393 }, { "epoch": 0.51, "grad_norm": 0.5615096092224121, "learning_rate": 0.0005583209126319629, "loss": 3.4768, "step": 10394 }, { "epoch": 0.51, "grad_norm": 0.5138780474662781, "learning_rate": 0.0005583130835302066, "loss": 3.3886, "step": 10395 }, { "epoch": 0.51, "grad_norm": 0.5071299076080322, "learning_rate": 0.0005583052537481034, "loss": 3.3302, "step": 10396 }, { "epoch": 0.51, "grad_norm": 0.5131478905677795, "learning_rate": 0.0005582974232856738, "loss": 3.4358, "step": 10397 }, { "epoch": 0.51, "grad_norm": 0.5130304098129272, "learning_rate": 0.0005582895921429385, "loss": 3.1993, "step": 10398 }, { "epoch": 0.51, "grad_norm": 0.5018404722213745, "learning_rate": 0.0005582817603199182, "loss": 3.4178, "step": 10399 }, { "epoch": 0.51, "grad_norm": 0.5256871581077576, "learning_rate": 0.0005582739278166334, "loss": 3.0488, "step": 10400 }, { "epoch": 0.51, "grad_norm": 0.5261520743370056, "learning_rate": 0.0005582660946331047, "loss": 3.1773, "step": 10401 }, { "epoch": 0.51, "grad_norm": 0.5217165946960449, "learning_rate": 0.0005582582607693529, "loss": 3.1666, "step": 10402 }, { "epoch": 0.51, "grad_norm": 0.5378689169883728, "learning_rate": 0.0005582504262253986, "loss": 3.199, "step": 10403 }, { "epoch": 0.51, "grad_norm": 0.5194031596183777, "learning_rate": 0.0005582425910012624, "loss": 3.3709, "step": 10404 }, { "epoch": 0.51, "grad_norm": 0.5214276909828186, "learning_rate": 0.0005582347550969648, "loss": 3.2128, "step": 10405 }, { "epoch": 0.51, "grad_norm": 0.5072099566459656, "learning_rate": 0.0005582269185125264, "loss": 3.1792, "step": 10406 }, { "epoch": 0.51, "grad_norm": 0.6110155582427979, "learning_rate": 0.0005582190812479683, "loss": 3.3741, "step": 10407 }, { "epoch": 0.51, "grad_norm": 0.517043948173523, "learning_rate": 0.0005582112433033107, "loss": 3.3555, "step": 10408 }, { "epoch": 0.51, "grad_norm": 0.5081747174263, "learning_rate": 0.0005582034046785745, "loss": 3.4135, "step": 10409 }, { "epoch": 0.51, "grad_norm": 0.5313897132873535, "learning_rate": 0.0005581955653737801, "loss": 3.2883, "step": 10410 }, { "epoch": 0.51, "grad_norm": 0.5237902998924255, "learning_rate": 0.0005581877253889482, "loss": 3.3033, "step": 10411 }, { "epoch": 0.51, "grad_norm": 0.49405860900878906, "learning_rate": 0.0005581798847240997, "loss": 3.0508, "step": 10412 }, { "epoch": 0.51, "grad_norm": 0.49449262022972107, "learning_rate": 0.000558172043379255, "loss": 3.268, "step": 10413 }, { "epoch": 0.51, "grad_norm": 0.5103479623794556, "learning_rate": 0.0005581642013544347, "loss": 3.18, "step": 10414 }, { "epoch": 0.51, "grad_norm": 0.4870624244213104, "learning_rate": 0.0005581563586496597, "loss": 3.2905, "step": 10415 }, { "epoch": 0.51, "grad_norm": 0.5494616031646729, "learning_rate": 0.0005581485152649506, "loss": 3.3204, "step": 10416 }, { "epoch": 0.51, "grad_norm": 0.5629361271858215, "learning_rate": 0.0005581406712003279, "loss": 3.3171, "step": 10417 }, { "epoch": 0.51, "grad_norm": 0.48867589235305786, "learning_rate": 0.0005581328264558123, "loss": 3.3654, "step": 10418 }, { "epoch": 0.51, "grad_norm": 0.5334605574607849, "learning_rate": 0.0005581249810314245, "loss": 3.0235, "step": 10419 }, { "epoch": 0.51, "grad_norm": 0.5167070031166077, "learning_rate": 0.0005581171349271853, "loss": 3.2379, "step": 10420 }, { "epoch": 0.51, "grad_norm": 0.5437859892845154, "learning_rate": 0.0005581092881431152, "loss": 3.2161, "step": 10421 }, { "epoch": 0.51, "grad_norm": 0.4782818853855133, "learning_rate": 0.0005581014406792348, "loss": 3.2091, "step": 10422 }, { "epoch": 0.51, "grad_norm": 0.5339621901512146, "learning_rate": 0.0005580935925355652, "loss": 3.1981, "step": 10423 }, { "epoch": 0.51, "grad_norm": 0.5722216963768005, "learning_rate": 0.0005580857437121264, "loss": 3.2892, "step": 10424 }, { "epoch": 0.51, "grad_norm": 0.5274963974952698, "learning_rate": 0.0005580778942089396, "loss": 3.4175, "step": 10425 }, { "epoch": 0.51, "grad_norm": 0.5084352493286133, "learning_rate": 0.0005580700440260253, "loss": 3.3634, "step": 10426 }, { "epoch": 0.51, "grad_norm": 0.49948909878730774, "learning_rate": 0.000558062193163404, "loss": 3.2063, "step": 10427 }, { "epoch": 0.51, "grad_norm": 0.5016568899154663, "learning_rate": 0.0005580543416210967, "loss": 3.0554, "step": 10428 }, { "epoch": 0.51, "grad_norm": 0.49527838826179504, "learning_rate": 0.000558046489399124, "loss": 3.0898, "step": 10429 }, { "epoch": 0.51, "grad_norm": 0.5031401515007019, "learning_rate": 0.0005580386364975064, "loss": 3.2988, "step": 10430 }, { "epoch": 0.51, "grad_norm": 0.5411937832832336, "learning_rate": 0.0005580307829162646, "loss": 3.3959, "step": 10431 }, { "epoch": 0.51, "grad_norm": 0.48796623945236206, "learning_rate": 0.0005580229286554195, "loss": 3.2403, "step": 10432 }, { "epoch": 0.51, "grad_norm": 0.502121090888977, "learning_rate": 0.0005580150737149916, "loss": 3.2067, "step": 10433 }, { "epoch": 0.51, "grad_norm": 0.5142828822135925, "learning_rate": 0.0005580072180950017, "loss": 3.2503, "step": 10434 }, { "epoch": 0.51, "grad_norm": 0.4943016767501831, "learning_rate": 0.0005579993617954704, "loss": 3.4234, "step": 10435 }, { "epoch": 0.51, "grad_norm": 0.4971558153629303, "learning_rate": 0.0005579915048164186, "loss": 3.2003, "step": 10436 }, { "epoch": 0.51, "grad_norm": 0.508538544178009, "learning_rate": 0.0005579836471578665, "loss": 3.299, "step": 10437 }, { "epoch": 0.51, "grad_norm": 0.5330371856689453, "learning_rate": 0.0005579757888198353, "loss": 3.2828, "step": 10438 }, { "epoch": 0.51, "grad_norm": 0.48706308007240295, "learning_rate": 0.0005579679298023456, "loss": 3.369, "step": 10439 }, { "epoch": 0.51, "grad_norm": 0.519385039806366, "learning_rate": 0.0005579600701054179, "loss": 3.3164, "step": 10440 }, { "epoch": 0.51, "grad_norm": 0.504608154296875, "learning_rate": 0.0005579522097290732, "loss": 3.338, "step": 10441 }, { "epoch": 0.51, "grad_norm": 0.5118958353996277, "learning_rate": 0.0005579443486733318, "loss": 3.3114, "step": 10442 }, { "epoch": 0.51, "grad_norm": 0.49348047375679016, "learning_rate": 0.0005579364869382148, "loss": 3.2707, "step": 10443 }, { "epoch": 0.51, "grad_norm": 0.5011881589889526, "learning_rate": 0.0005579286245237426, "loss": 3.2466, "step": 10444 }, { "epoch": 0.51, "grad_norm": 0.49209505319595337, "learning_rate": 0.0005579207614299361, "loss": 3.1962, "step": 10445 }, { "epoch": 0.51, "grad_norm": 0.5165194869041443, "learning_rate": 0.000557912897656816, "loss": 3.2352, "step": 10446 }, { "epoch": 0.51, "grad_norm": 0.48565101623535156, "learning_rate": 0.0005579050332044029, "loss": 3.4099, "step": 10447 }, { "epoch": 0.51, "grad_norm": 0.5325493812561035, "learning_rate": 0.0005578971680727177, "loss": 3.5164, "step": 10448 }, { "epoch": 0.51, "grad_norm": 0.5225394368171692, "learning_rate": 0.0005578893022617808, "loss": 3.5059, "step": 10449 }, { "epoch": 0.51, "grad_norm": 0.5450068712234497, "learning_rate": 0.0005578814357716132, "loss": 3.3856, "step": 10450 }, { "epoch": 0.51, "grad_norm": 0.4797419011592865, "learning_rate": 0.0005578735686022355, "loss": 3.2904, "step": 10451 }, { "epoch": 0.51, "grad_norm": 0.5698765516281128, "learning_rate": 0.0005578657007536685, "loss": 3.274, "step": 10452 }, { "epoch": 0.51, "grad_norm": 0.5215989947319031, "learning_rate": 0.0005578578322259328, "loss": 3.422, "step": 10453 }, { "epoch": 0.51, "grad_norm": 0.5165630578994751, "learning_rate": 0.0005578499630190493, "loss": 3.3273, "step": 10454 }, { "epoch": 0.51, "grad_norm": 0.5286875367164612, "learning_rate": 0.0005578420931330385, "loss": 3.302, "step": 10455 }, { "epoch": 0.51, "grad_norm": 0.5140888690948486, "learning_rate": 0.0005578342225679213, "loss": 3.1989, "step": 10456 }, { "epoch": 0.51, "grad_norm": 0.4854901432991028, "learning_rate": 0.0005578263513237185, "loss": 3.417, "step": 10457 }, { "epoch": 0.51, "grad_norm": 0.5346885323524475, "learning_rate": 0.0005578184794004506, "loss": 3.4183, "step": 10458 }, { "epoch": 0.51, "grad_norm": 0.5289451479911804, "learning_rate": 0.0005578106067981384, "loss": 3.2724, "step": 10459 }, { "epoch": 0.51, "grad_norm": 0.536060631275177, "learning_rate": 0.0005578027335168027, "loss": 3.3203, "step": 10460 }, { "epoch": 0.51, "grad_norm": 0.5051462650299072, "learning_rate": 0.0005577948595564643, "loss": 3.2259, "step": 10461 }, { "epoch": 0.51, "grad_norm": 0.483619749546051, "learning_rate": 0.0005577869849171437, "loss": 3.3036, "step": 10462 }, { "epoch": 0.51, "grad_norm": 0.5087090730667114, "learning_rate": 0.0005577791095988619, "loss": 3.4634, "step": 10463 }, { "epoch": 0.51, "grad_norm": 0.5173831582069397, "learning_rate": 0.0005577712336016395, "loss": 3.4216, "step": 10464 }, { "epoch": 0.51, "grad_norm": 0.504951536655426, "learning_rate": 0.0005577633569254974, "loss": 3.3021, "step": 10465 }, { "epoch": 0.51, "grad_norm": 0.5104922652244568, "learning_rate": 0.0005577554795704561, "loss": 3.2196, "step": 10466 }, { "epoch": 0.51, "grad_norm": 0.4943409264087677, "learning_rate": 0.0005577476015365365, "loss": 3.1075, "step": 10467 }, { "epoch": 0.51, "grad_norm": 0.5140161514282227, "learning_rate": 0.0005577397228237593, "loss": 3.2358, "step": 10468 }, { "epoch": 0.51, "grad_norm": 0.5718799829483032, "learning_rate": 0.0005577318434321453, "loss": 3.1215, "step": 10469 }, { "epoch": 0.51, "grad_norm": 0.5070701241493225, "learning_rate": 0.0005577239633617153, "loss": 3.1489, "step": 10470 }, { "epoch": 0.51, "grad_norm": 0.5295852422714233, "learning_rate": 0.0005577160826124899, "loss": 3.2577, "step": 10471 }, { "epoch": 0.51, "grad_norm": 0.5276656150817871, "learning_rate": 0.0005577082011844898, "loss": 3.2257, "step": 10472 }, { "epoch": 0.51, "grad_norm": 0.6001366376876831, "learning_rate": 0.0005577003190777361, "loss": 3.268, "step": 10473 }, { "epoch": 0.51, "grad_norm": 0.5033782720565796, "learning_rate": 0.0005576924362922493, "loss": 3.3577, "step": 10474 }, { "epoch": 0.51, "grad_norm": 0.5481687784194946, "learning_rate": 0.0005576845528280503, "loss": 3.1024, "step": 10475 }, { "epoch": 0.51, "grad_norm": 0.5187335014343262, "learning_rate": 0.0005576766686851595, "loss": 3.2345, "step": 10476 }, { "epoch": 0.51, "grad_norm": 0.5212278962135315, "learning_rate": 0.0005576687838635983, "loss": 3.2455, "step": 10477 }, { "epoch": 0.51, "grad_norm": 0.5137622356414795, "learning_rate": 0.0005576608983633868, "loss": 3.3298, "step": 10478 }, { "epoch": 0.51, "grad_norm": 0.544165313243866, "learning_rate": 0.0005576530121845463, "loss": 3.0631, "step": 10479 }, { "epoch": 0.51, "grad_norm": 0.5154332518577576, "learning_rate": 0.0005576451253270973, "loss": 3.3821, "step": 10480 }, { "epoch": 0.51, "grad_norm": 0.5267524719238281, "learning_rate": 0.0005576372377910605, "loss": 3.2283, "step": 10481 }, { "epoch": 0.51, "grad_norm": 0.5340297818183899, "learning_rate": 0.000557629349576457, "loss": 3.1936, "step": 10482 }, { "epoch": 0.51, "grad_norm": 0.5131736397743225, "learning_rate": 0.0005576214606833073, "loss": 3.2363, "step": 10483 }, { "epoch": 0.51, "grad_norm": 0.5175976753234863, "learning_rate": 0.0005576135711116322, "loss": 3.3296, "step": 10484 }, { "epoch": 0.51, "grad_norm": 0.5071966052055359, "learning_rate": 0.0005576056808614526, "loss": 3.3664, "step": 10485 }, { "epoch": 0.51, "grad_norm": 0.5538497567176819, "learning_rate": 0.0005575977899327892, "loss": 3.1944, "step": 10486 }, { "epoch": 0.51, "grad_norm": 0.5503365993499756, "learning_rate": 0.0005575898983256627, "loss": 3.2746, "step": 10487 }, { "epoch": 0.51, "grad_norm": 0.5471153855323792, "learning_rate": 0.000557582006040094, "loss": 3.4136, "step": 10488 }, { "epoch": 0.51, "grad_norm": 0.5127497911453247, "learning_rate": 0.000557574113076104, "loss": 3.4066, "step": 10489 }, { "epoch": 0.51, "grad_norm": 0.5495885610580444, "learning_rate": 0.0005575662194337133, "loss": 3.1525, "step": 10490 }, { "epoch": 0.51, "grad_norm": 0.5071145296096802, "learning_rate": 0.0005575583251129426, "loss": 3.3041, "step": 10491 }, { "epoch": 0.51, "grad_norm": 0.5305414199829102, "learning_rate": 0.000557550430113813, "loss": 3.3928, "step": 10492 }, { "epoch": 0.51, "grad_norm": 0.5200974345207214, "learning_rate": 0.0005575425344363452, "loss": 3.4053, "step": 10493 }, { "epoch": 0.51, "grad_norm": 0.5152339339256287, "learning_rate": 0.0005575346380805598, "loss": 3.306, "step": 10494 }, { "epoch": 0.51, "grad_norm": 0.5076627135276794, "learning_rate": 0.0005575267410464778, "loss": 3.414, "step": 10495 }, { "epoch": 0.51, "grad_norm": 0.5367875695228577, "learning_rate": 0.0005575188433341198, "loss": 3.3654, "step": 10496 }, { "epoch": 0.51, "grad_norm": 0.5150578618049622, "learning_rate": 0.0005575109449435068, "loss": 3.2771, "step": 10497 }, { "epoch": 0.51, "grad_norm": 0.5308785438537598, "learning_rate": 0.0005575030458746595, "loss": 3.4856, "step": 10498 }, { "epoch": 0.51, "grad_norm": 0.5408422946929932, "learning_rate": 0.0005574951461275989, "loss": 3.018, "step": 10499 }, { "epoch": 0.51, "grad_norm": 0.5197494626045227, "learning_rate": 0.0005574872457023455, "loss": 3.2124, "step": 10500 }, { "epoch": 0.51, "grad_norm": 0.5191333293914795, "learning_rate": 0.0005574793445989202, "loss": 3.4525, "step": 10501 }, { "epoch": 0.51, "grad_norm": 0.5601697564125061, "learning_rate": 0.000557471442817344, "loss": 3.3425, "step": 10502 }, { "epoch": 0.51, "grad_norm": 0.5880317687988281, "learning_rate": 0.0005574635403576374, "loss": 3.363, "step": 10503 }, { "epoch": 0.51, "grad_norm": 0.528934121131897, "learning_rate": 0.0005574556372198215, "loss": 3.2255, "step": 10504 }, { "epoch": 0.51, "grad_norm": 0.5256865620613098, "learning_rate": 0.000557447733403917, "loss": 3.3382, "step": 10505 }, { "epoch": 0.51, "grad_norm": 0.534885823726654, "learning_rate": 0.0005574398289099448, "loss": 3.1408, "step": 10506 }, { "epoch": 0.51, "grad_norm": 0.5375804901123047, "learning_rate": 0.0005574319237379255, "loss": 3.2966, "step": 10507 }, { "epoch": 0.51, "grad_norm": 0.5356113314628601, "learning_rate": 0.00055742401788788, "loss": 2.8656, "step": 10508 }, { "epoch": 0.52, "grad_norm": 0.5177241563796997, "learning_rate": 0.0005574161113598293, "loss": 3.3429, "step": 10509 }, { "epoch": 0.52, "grad_norm": 0.619787871837616, "learning_rate": 0.0005574082041537941, "loss": 3.3243, "step": 10510 }, { "epoch": 0.52, "grad_norm": 0.5078191161155701, "learning_rate": 0.0005574002962697953, "loss": 3.2323, "step": 10511 }, { "epoch": 0.52, "grad_norm": 0.49133309721946716, "learning_rate": 0.0005573923877078534, "loss": 3.2236, "step": 10512 }, { "epoch": 0.52, "grad_norm": 0.5388320088386536, "learning_rate": 0.0005573844784679897, "loss": 3.1184, "step": 10513 }, { "epoch": 0.52, "grad_norm": 0.5303979516029358, "learning_rate": 0.0005573765685502247, "loss": 3.5108, "step": 10514 }, { "epoch": 0.52, "grad_norm": 0.5063710808753967, "learning_rate": 0.0005573686579545795, "loss": 3.2074, "step": 10515 }, { "epoch": 0.52, "grad_norm": 0.533225417137146, "learning_rate": 0.0005573607466810747, "loss": 2.9851, "step": 10516 }, { "epoch": 0.52, "grad_norm": 0.48221254348754883, "learning_rate": 0.0005573528347297312, "loss": 3.3277, "step": 10517 }, { "epoch": 0.52, "grad_norm": 0.4855428636074066, "learning_rate": 0.0005573449221005699, "loss": 3.3781, "step": 10518 }, { "epoch": 0.52, "grad_norm": 0.5387424826622009, "learning_rate": 0.0005573370087936117, "loss": 3.4274, "step": 10519 }, { "epoch": 0.52, "grad_norm": 0.5350310802459717, "learning_rate": 0.0005573290948088772, "loss": 3.2629, "step": 10520 }, { "epoch": 0.52, "grad_norm": 0.5230748653411865, "learning_rate": 0.0005573211801463874, "loss": 3.1636, "step": 10521 }, { "epoch": 0.52, "grad_norm": 0.5564438104629517, "learning_rate": 0.000557313264806163, "loss": 3.2858, "step": 10522 }, { "epoch": 0.52, "grad_norm": 0.4955500066280365, "learning_rate": 0.0005573053487882252, "loss": 3.2284, "step": 10523 }, { "epoch": 0.52, "grad_norm": 0.5798243284225464, "learning_rate": 0.0005572974320925946, "loss": 3.3389, "step": 10524 }, { "epoch": 0.52, "grad_norm": 0.5217620134353638, "learning_rate": 0.000557289514719292, "loss": 3.2003, "step": 10525 }, { "epoch": 0.52, "grad_norm": 0.49824416637420654, "learning_rate": 0.0005572815966683385, "loss": 3.1126, "step": 10526 }, { "epoch": 0.52, "grad_norm": 0.5172311663627625, "learning_rate": 0.0005572736779397546, "loss": 3.2446, "step": 10527 }, { "epoch": 0.52, "grad_norm": 0.5369932651519775, "learning_rate": 0.0005572657585335614, "loss": 3.3792, "step": 10528 }, { "epoch": 0.52, "grad_norm": 0.5192260146141052, "learning_rate": 0.0005572578384497797, "loss": 3.4216, "step": 10529 }, { "epoch": 0.52, "grad_norm": 0.5092270374298096, "learning_rate": 0.0005572499176884305, "loss": 3.4087, "step": 10530 }, { "epoch": 0.52, "grad_norm": 0.5477033853530884, "learning_rate": 0.0005572419962495344, "loss": 3.2542, "step": 10531 }, { "epoch": 0.52, "grad_norm": 0.49869391322135925, "learning_rate": 0.0005572340741331124, "loss": 3.5372, "step": 10532 }, { "epoch": 0.52, "grad_norm": 0.5513463616371155, "learning_rate": 0.0005572261513391854, "loss": 3.2842, "step": 10533 }, { "epoch": 0.52, "grad_norm": 0.5208010673522949, "learning_rate": 0.0005572182278677741, "loss": 3.2494, "step": 10534 }, { "epoch": 0.52, "grad_norm": 0.5104653239250183, "learning_rate": 0.0005572103037188996, "loss": 3.4666, "step": 10535 }, { "epoch": 0.52, "grad_norm": 0.49619725346565247, "learning_rate": 0.0005572023788925827, "loss": 3.3879, "step": 10536 }, { "epoch": 0.52, "grad_norm": 0.5280769467353821, "learning_rate": 0.000557194453388844, "loss": 3.2656, "step": 10537 }, { "epoch": 0.52, "grad_norm": 0.5171984434127808, "learning_rate": 0.0005571865272077049, "loss": 3.3996, "step": 10538 }, { "epoch": 0.52, "grad_norm": 0.4976312816143036, "learning_rate": 0.0005571786003491858, "loss": 3.3502, "step": 10539 }, { "epoch": 0.52, "grad_norm": 0.5273638963699341, "learning_rate": 0.0005571706728133078, "loss": 3.3182, "step": 10540 }, { "epoch": 0.52, "grad_norm": 0.5085924863815308, "learning_rate": 0.0005571627446000917, "loss": 3.1397, "step": 10541 }, { "epoch": 0.52, "grad_norm": 0.5273411870002747, "learning_rate": 0.0005571548157095585, "loss": 3.1939, "step": 10542 }, { "epoch": 0.52, "grad_norm": 0.5445072054862976, "learning_rate": 0.000557146886141729, "loss": 3.1008, "step": 10543 }, { "epoch": 0.52, "grad_norm": 0.5404632091522217, "learning_rate": 0.0005571389558966241, "loss": 3.4246, "step": 10544 }, { "epoch": 0.52, "grad_norm": 0.5002350211143494, "learning_rate": 0.0005571310249742647, "loss": 3.4459, "step": 10545 }, { "epoch": 0.52, "grad_norm": 0.5209913849830627, "learning_rate": 0.0005571230933746716, "loss": 3.0648, "step": 10546 }, { "epoch": 0.52, "grad_norm": 0.5363181233406067, "learning_rate": 0.0005571151610978658, "loss": 3.2458, "step": 10547 }, { "epoch": 0.52, "grad_norm": 0.5626364350318909, "learning_rate": 0.0005571072281438681, "loss": 2.9742, "step": 10548 }, { "epoch": 0.52, "grad_norm": 0.5303053259849548, "learning_rate": 0.0005570992945126994, "loss": 3.2247, "step": 10549 }, { "epoch": 0.52, "grad_norm": 0.5701689720153809, "learning_rate": 0.0005570913602043808, "loss": 3.2445, "step": 10550 }, { "epoch": 0.52, "grad_norm": 0.49203193187713623, "learning_rate": 0.0005570834252189329, "loss": 3.265, "step": 10551 }, { "epoch": 0.52, "grad_norm": 0.5791632533073425, "learning_rate": 0.0005570754895563767, "loss": 3.2336, "step": 10552 }, { "epoch": 0.52, "grad_norm": 0.5477674007415771, "learning_rate": 0.0005570675532167333, "loss": 3.1324, "step": 10553 }, { "epoch": 0.52, "grad_norm": 0.49030929803848267, "learning_rate": 0.0005570596162000233, "loss": 3.076, "step": 10554 }, { "epoch": 0.52, "grad_norm": 0.5654690861701965, "learning_rate": 0.0005570516785062678, "loss": 3.1474, "step": 10555 }, { "epoch": 0.52, "grad_norm": 0.5409621000289917, "learning_rate": 0.0005570437401354877, "loss": 3.0283, "step": 10556 }, { "epoch": 0.52, "grad_norm": 0.4912637174129486, "learning_rate": 0.0005570358010877038, "loss": 3.0622, "step": 10557 }, { "epoch": 0.52, "grad_norm": 0.5478519797325134, "learning_rate": 0.000557027861362937, "loss": 3.1291, "step": 10558 }, { "epoch": 0.52, "grad_norm": 0.5594099760055542, "learning_rate": 0.0005570199209612084, "loss": 3.2931, "step": 10559 }, { "epoch": 0.52, "grad_norm": 0.5263882279396057, "learning_rate": 0.0005570119798825388, "loss": 3.3384, "step": 10560 }, { "epoch": 0.52, "grad_norm": 0.5052241683006287, "learning_rate": 0.000557004038126949, "loss": 3.4222, "step": 10561 }, { "epoch": 0.52, "grad_norm": 0.5351318120956421, "learning_rate": 0.0005569960956944601, "loss": 3.2503, "step": 10562 }, { "epoch": 0.52, "grad_norm": 0.5089155435562134, "learning_rate": 0.0005569881525850929, "loss": 3.1974, "step": 10563 }, { "epoch": 0.52, "grad_norm": 0.5822809338569641, "learning_rate": 0.0005569802087988684, "loss": 3.2786, "step": 10564 }, { "epoch": 0.52, "grad_norm": 0.5087013840675354, "learning_rate": 0.0005569722643358075, "loss": 3.3187, "step": 10565 }, { "epoch": 0.52, "grad_norm": 0.5044753551483154, "learning_rate": 0.000556964319195931, "loss": 3.4414, "step": 10566 }, { "epoch": 0.52, "grad_norm": 0.5216333866119385, "learning_rate": 0.0005569563733792601, "loss": 3.3294, "step": 10567 }, { "epoch": 0.52, "grad_norm": 0.5563536882400513, "learning_rate": 0.0005569484268858155, "loss": 2.9519, "step": 10568 }, { "epoch": 0.52, "grad_norm": 0.5203812122344971, "learning_rate": 0.000556940479715618, "loss": 3.3726, "step": 10569 }, { "epoch": 0.52, "grad_norm": 0.545968770980835, "learning_rate": 0.000556932531868689, "loss": 3.1397, "step": 10570 }, { "epoch": 0.52, "grad_norm": 0.5787644386291504, "learning_rate": 0.000556924583345049, "loss": 3.2113, "step": 10571 }, { "epoch": 0.52, "grad_norm": 0.5733756422996521, "learning_rate": 0.0005569166341447192, "loss": 3.5598, "step": 10572 }, { "epoch": 0.52, "grad_norm": 0.541483998298645, "learning_rate": 0.0005569086842677203, "loss": 3.2303, "step": 10573 }, { "epoch": 0.52, "grad_norm": 0.549351692199707, "learning_rate": 0.0005569007337140736, "loss": 3.354, "step": 10574 }, { "epoch": 0.52, "grad_norm": 0.5365926623344421, "learning_rate": 0.0005568927824837996, "loss": 3.2604, "step": 10575 }, { "epoch": 0.52, "grad_norm": 0.5326177477836609, "learning_rate": 0.0005568848305769195, "loss": 3.2394, "step": 10576 }, { "epoch": 0.52, "grad_norm": 0.523600697517395, "learning_rate": 0.0005568768779934542, "loss": 3.1083, "step": 10577 }, { "epoch": 0.52, "grad_norm": 0.5898261666297913, "learning_rate": 0.0005568689247334247, "loss": 3.1423, "step": 10578 }, { "epoch": 0.52, "grad_norm": 0.5717616677284241, "learning_rate": 0.0005568609707968518, "loss": 3.1968, "step": 10579 }, { "epoch": 0.52, "grad_norm": 0.49297675490379333, "learning_rate": 0.0005568530161837566, "loss": 3.2594, "step": 10580 }, { "epoch": 0.52, "grad_norm": 0.505689263343811, "learning_rate": 0.0005568450608941599, "loss": 3.187, "step": 10581 }, { "epoch": 0.52, "grad_norm": 0.5439349412918091, "learning_rate": 0.0005568371049280827, "loss": 3.2561, "step": 10582 }, { "epoch": 0.52, "grad_norm": 0.5405676960945129, "learning_rate": 0.0005568291482855462, "loss": 3.353, "step": 10583 }, { "epoch": 0.52, "grad_norm": 0.4949883818626404, "learning_rate": 0.000556821190966571, "loss": 3.2305, "step": 10584 }, { "epoch": 0.52, "grad_norm": 0.5206522941589355, "learning_rate": 0.0005568132329711783, "loss": 3.2696, "step": 10585 }, { "epoch": 0.52, "grad_norm": 0.5137814283370972, "learning_rate": 0.0005568052742993889, "loss": 3.1519, "step": 10586 }, { "epoch": 0.52, "grad_norm": 0.577721118927002, "learning_rate": 0.0005567973149512239, "loss": 3.2501, "step": 10587 }, { "epoch": 0.52, "grad_norm": 0.4995252192020416, "learning_rate": 0.0005567893549267042, "loss": 3.2882, "step": 10588 }, { "epoch": 0.52, "grad_norm": 0.5364012122154236, "learning_rate": 0.0005567813942258506, "loss": 3.3206, "step": 10589 }, { "epoch": 0.52, "grad_norm": 0.5193110108375549, "learning_rate": 0.0005567734328486844, "loss": 3.1889, "step": 10590 }, { "epoch": 0.52, "grad_norm": 0.510722815990448, "learning_rate": 0.0005567654707952262, "loss": 3.3038, "step": 10591 }, { "epoch": 0.52, "grad_norm": 0.5020973682403564, "learning_rate": 0.0005567575080654974, "loss": 3.25, "step": 10592 }, { "epoch": 0.52, "grad_norm": 0.5226955413818359, "learning_rate": 0.0005567495446595187, "loss": 3.3096, "step": 10593 }, { "epoch": 0.52, "grad_norm": 0.5112230777740479, "learning_rate": 0.000556741580577311, "loss": 3.2976, "step": 10594 }, { "epoch": 0.52, "grad_norm": 0.5107260942459106, "learning_rate": 0.0005567336158188955, "loss": 3.3089, "step": 10595 }, { "epoch": 0.52, "grad_norm": 0.5062245726585388, "learning_rate": 0.0005567256503842929, "loss": 3.1591, "step": 10596 }, { "epoch": 0.52, "grad_norm": 0.625127375125885, "learning_rate": 0.0005567176842735244, "loss": 3.3238, "step": 10597 }, { "epoch": 0.52, "grad_norm": 0.48830580711364746, "learning_rate": 0.000556709717486611, "loss": 3.2201, "step": 10598 }, { "epoch": 0.52, "grad_norm": 0.5055554509162903, "learning_rate": 0.0005567017500235736, "loss": 3.2346, "step": 10599 }, { "epoch": 0.52, "grad_norm": 0.558253824710846, "learning_rate": 0.0005566937818844332, "loss": 3.3789, "step": 10600 }, { "epoch": 0.52, "grad_norm": 0.5219190120697021, "learning_rate": 0.0005566858130692107, "loss": 3.1847, "step": 10601 }, { "epoch": 0.52, "grad_norm": 0.5320719480514526, "learning_rate": 0.0005566778435779272, "loss": 3.3797, "step": 10602 }, { "epoch": 0.52, "grad_norm": 0.5657633543014526, "learning_rate": 0.0005566698734106037, "loss": 3.2031, "step": 10603 }, { "epoch": 0.52, "grad_norm": 0.6036418080329895, "learning_rate": 0.0005566619025672611, "loss": 3.3975, "step": 10604 }, { "epoch": 0.52, "grad_norm": 0.5115952491760254, "learning_rate": 0.0005566539310479206, "loss": 3.058, "step": 10605 }, { "epoch": 0.52, "grad_norm": 0.5792054533958435, "learning_rate": 0.0005566459588526028, "loss": 3.3311, "step": 10606 }, { "epoch": 0.52, "grad_norm": 0.5313816070556641, "learning_rate": 0.000556637985981329, "loss": 3.428, "step": 10607 }, { "epoch": 0.52, "grad_norm": 0.5156925320625305, "learning_rate": 0.0005566300124341203, "loss": 3.2343, "step": 10608 }, { "epoch": 0.52, "grad_norm": 0.5339927077293396, "learning_rate": 0.0005566220382109974, "loss": 3.0971, "step": 10609 }, { "epoch": 0.52, "grad_norm": 0.5031298995018005, "learning_rate": 0.0005566140633119814, "loss": 3.2465, "step": 10610 }, { "epoch": 0.52, "grad_norm": 0.5161144137382507, "learning_rate": 0.0005566060877370934, "loss": 3.1803, "step": 10611 }, { "epoch": 0.52, "grad_norm": 0.49942952394485474, "learning_rate": 0.0005565981114863544, "loss": 3.3168, "step": 10612 }, { "epoch": 0.52, "grad_norm": 0.5302490592002869, "learning_rate": 0.0005565901345597853, "loss": 3.2728, "step": 10613 }, { "epoch": 0.52, "grad_norm": 0.49967092275619507, "learning_rate": 0.0005565821569574072, "loss": 3.3966, "step": 10614 }, { "epoch": 0.52, "grad_norm": 0.516776978969574, "learning_rate": 0.000556574178679241, "loss": 3.1585, "step": 10615 }, { "epoch": 0.52, "grad_norm": 0.5194513201713562, "learning_rate": 0.0005565661997253079, "loss": 3.3124, "step": 10616 }, { "epoch": 0.52, "grad_norm": 0.5056691765785217, "learning_rate": 0.0005565582200956288, "loss": 3.3491, "step": 10617 }, { "epoch": 0.52, "grad_norm": 0.5017831921577454, "learning_rate": 0.0005565502397902246, "loss": 3.2153, "step": 10618 }, { "epoch": 0.52, "grad_norm": 0.5000698566436768, "learning_rate": 0.0005565422588091165, "loss": 3.4327, "step": 10619 }, { "epoch": 0.52, "grad_norm": 0.5435037016868591, "learning_rate": 0.0005565342771523255, "loss": 3.0531, "step": 10620 }, { "epoch": 0.52, "grad_norm": 0.5276373624801636, "learning_rate": 0.0005565262948198726, "loss": 3.4252, "step": 10621 }, { "epoch": 0.52, "grad_norm": 0.5036376118659973, "learning_rate": 0.0005565183118117787, "loss": 3.1393, "step": 10622 }, { "epoch": 0.52, "grad_norm": 0.5199164748191833, "learning_rate": 0.0005565103281280652, "loss": 3.2729, "step": 10623 }, { "epoch": 0.52, "grad_norm": 0.5530470013618469, "learning_rate": 0.0005565023437687526, "loss": 3.4069, "step": 10624 }, { "epoch": 0.52, "grad_norm": 0.5330382585525513, "learning_rate": 0.0005564943587338622, "loss": 3.2335, "step": 10625 }, { "epoch": 0.52, "grad_norm": 0.530340313911438, "learning_rate": 0.0005564863730234151, "loss": 3.1747, "step": 10626 }, { "epoch": 0.52, "grad_norm": 0.5398244261741638, "learning_rate": 0.0005564783866374323, "loss": 3.407, "step": 10627 }, { "epoch": 0.52, "grad_norm": 0.4917842447757721, "learning_rate": 0.0005564703995759347, "loss": 3.144, "step": 10628 }, { "epoch": 0.52, "grad_norm": 0.5320751667022705, "learning_rate": 0.0005564624118389435, "loss": 3.1363, "step": 10629 }, { "epoch": 0.52, "grad_norm": 0.5649660229682922, "learning_rate": 0.0005564544234264797, "loss": 3.283, "step": 10630 }, { "epoch": 0.52, "grad_norm": 0.5005009174346924, "learning_rate": 0.0005564464343385642, "loss": 3.2569, "step": 10631 }, { "epoch": 0.52, "grad_norm": 0.5064327120780945, "learning_rate": 0.0005564384445752181, "loss": 3.066, "step": 10632 }, { "epoch": 0.52, "grad_norm": 0.5261727571487427, "learning_rate": 0.0005564304541364626, "loss": 3.186, "step": 10633 }, { "epoch": 0.52, "grad_norm": 0.49180030822753906, "learning_rate": 0.0005564224630223186, "loss": 3.3107, "step": 10634 }, { "epoch": 0.52, "grad_norm": 0.5282382369041443, "learning_rate": 0.0005564144712328072, "loss": 3.2852, "step": 10635 }, { "epoch": 0.52, "grad_norm": 0.5133998394012451, "learning_rate": 0.0005564064787679494, "loss": 3.2587, "step": 10636 }, { "epoch": 0.52, "grad_norm": 0.5127143859863281, "learning_rate": 0.0005563984856277662, "loss": 3.475, "step": 10637 }, { "epoch": 0.52, "grad_norm": 0.5018812417984009, "learning_rate": 0.000556390491812279, "loss": 2.9832, "step": 10638 }, { "epoch": 0.52, "grad_norm": 0.5890460014343262, "learning_rate": 0.0005563824973215083, "loss": 3.3091, "step": 10639 }, { "epoch": 0.52, "grad_norm": 0.5279850363731384, "learning_rate": 0.0005563745021554756, "loss": 3.4503, "step": 10640 }, { "epoch": 0.52, "grad_norm": 0.5249155759811401, "learning_rate": 0.0005563665063142018, "loss": 3.0548, "step": 10641 }, { "epoch": 0.52, "grad_norm": 0.5137816667556763, "learning_rate": 0.0005563585097977079, "loss": 3.4102, "step": 10642 }, { "epoch": 0.52, "grad_norm": 0.5169107913970947, "learning_rate": 0.000556350512606015, "loss": 3.3964, "step": 10643 }, { "epoch": 0.52, "grad_norm": 0.5247167944908142, "learning_rate": 0.0005563425147391442, "loss": 3.1934, "step": 10644 }, { "epoch": 0.52, "grad_norm": 0.5554961562156677, "learning_rate": 0.0005563345161971165, "loss": 3.6094, "step": 10645 }, { "epoch": 0.52, "grad_norm": 0.5095405578613281, "learning_rate": 0.0005563265169799532, "loss": 3.5217, "step": 10646 }, { "epoch": 0.52, "grad_norm": 0.5305556058883667, "learning_rate": 0.0005563185170876751, "loss": 3.1786, "step": 10647 }, { "epoch": 0.52, "grad_norm": 0.5047399997711182, "learning_rate": 0.0005563105165203034, "loss": 3.1298, "step": 10648 }, { "epoch": 0.52, "grad_norm": 0.5853273272514343, "learning_rate": 0.0005563025152778591, "loss": 3.0866, "step": 10649 }, { "epoch": 0.52, "grad_norm": 0.5093114972114563, "learning_rate": 0.0005562945133603633, "loss": 3.273, "step": 10650 }, { "epoch": 0.52, "grad_norm": 0.4893055558204651, "learning_rate": 0.000556286510767837, "loss": 3.3782, "step": 10651 }, { "epoch": 0.52, "grad_norm": 0.6072062849998474, "learning_rate": 0.0005562785075003013, "loss": 3.24, "step": 10652 }, { "epoch": 0.52, "grad_norm": 0.49338966608047485, "learning_rate": 0.0005562705035577775, "loss": 3.4271, "step": 10653 }, { "epoch": 0.52, "grad_norm": 0.5136591792106628, "learning_rate": 0.0005562624989402864, "loss": 3.2597, "step": 10654 }, { "epoch": 0.52, "grad_norm": 0.5117157101631165, "learning_rate": 0.0005562544936478492, "loss": 3.2494, "step": 10655 }, { "epoch": 0.52, "grad_norm": 0.4980575442314148, "learning_rate": 0.0005562464876804871, "loss": 3.3992, "step": 10656 }, { "epoch": 0.52, "grad_norm": 0.5555065870285034, "learning_rate": 0.000556238481038221, "loss": 3.2717, "step": 10657 }, { "epoch": 0.52, "grad_norm": 0.5585774779319763, "learning_rate": 0.000556230473721072, "loss": 3.2388, "step": 10658 }, { "epoch": 0.52, "grad_norm": 0.6022112965583801, "learning_rate": 0.0005562224657290613, "loss": 3.0722, "step": 10659 }, { "epoch": 0.52, "grad_norm": 0.5127832889556885, "learning_rate": 0.0005562144570622099, "loss": 3.3021, "step": 10660 }, { "epoch": 0.52, "grad_norm": 0.5011223554611206, "learning_rate": 0.000556206447720539, "loss": 3.3003, "step": 10661 }, { "epoch": 0.52, "grad_norm": 0.5553777813911438, "learning_rate": 0.0005561984377040695, "loss": 3.1519, "step": 10662 }, { "epoch": 0.52, "grad_norm": 0.5582361817359924, "learning_rate": 0.0005561904270128227, "loss": 3.1381, "step": 10663 }, { "epoch": 0.52, "grad_norm": 0.5032232403755188, "learning_rate": 0.0005561824156468196, "loss": 3.1779, "step": 10664 }, { "epoch": 0.52, "grad_norm": 0.516594409942627, "learning_rate": 0.0005561744036060814, "loss": 3.3612, "step": 10665 }, { "epoch": 0.52, "grad_norm": 0.5512834191322327, "learning_rate": 0.0005561663908906291, "loss": 3.2972, "step": 10666 }, { "epoch": 0.52, "grad_norm": 0.5587075352668762, "learning_rate": 0.0005561583775004837, "loss": 3.1885, "step": 10667 }, { "epoch": 0.52, "grad_norm": 0.6395102739334106, "learning_rate": 0.0005561503634356666, "loss": 3.368, "step": 10668 }, { "epoch": 0.52, "grad_norm": 0.572404146194458, "learning_rate": 0.0005561423486961987, "loss": 3.0949, "step": 10669 }, { "epoch": 0.52, "grad_norm": 0.5496918559074402, "learning_rate": 0.000556134333282101, "loss": 3.1419, "step": 10670 }, { "epoch": 0.52, "grad_norm": 0.5133854150772095, "learning_rate": 0.0005561263171933949, "loss": 3.3315, "step": 10671 }, { "epoch": 0.52, "grad_norm": 0.5696301460266113, "learning_rate": 0.0005561183004301015, "loss": 3.1278, "step": 10672 }, { "epoch": 0.52, "grad_norm": 0.5422386527061462, "learning_rate": 0.0005561102829922415, "loss": 3.2289, "step": 10673 }, { "epoch": 0.52, "grad_norm": 0.5507988333702087, "learning_rate": 0.0005561022648798364, "loss": 3.2526, "step": 10674 }, { "epoch": 0.52, "grad_norm": 0.5414395332336426, "learning_rate": 0.0005560942460929074, "loss": 3.0932, "step": 10675 }, { "epoch": 0.52, "grad_norm": 0.611114501953125, "learning_rate": 0.0005560862266314752, "loss": 3.0416, "step": 10676 }, { "epoch": 0.52, "grad_norm": 0.5457938313484192, "learning_rate": 0.0005560782064955613, "loss": 3.1892, "step": 10677 }, { "epoch": 0.52, "grad_norm": 0.5073613524436951, "learning_rate": 0.0005560701856851866, "loss": 3.3985, "step": 10678 }, { "epoch": 0.52, "grad_norm": 0.5243719220161438, "learning_rate": 0.0005560621642003723, "loss": 3.1556, "step": 10679 }, { "epoch": 0.52, "grad_norm": 0.5766971707344055, "learning_rate": 0.0005560541420411398, "loss": 3.3581, "step": 10680 }, { "epoch": 0.52, "grad_norm": 0.5084385871887207, "learning_rate": 0.0005560461192075097, "loss": 3.1972, "step": 10681 }, { "epoch": 0.52, "grad_norm": 0.5689043402671814, "learning_rate": 0.0005560380956995035, "loss": 3.2245, "step": 10682 }, { "epoch": 0.52, "grad_norm": 0.506496787071228, "learning_rate": 0.0005560300715171421, "loss": 3.2089, "step": 10683 }, { "epoch": 0.52, "grad_norm": 0.5448225736618042, "learning_rate": 0.0005560220466604469, "loss": 3.1088, "step": 10684 }, { "epoch": 0.52, "grad_norm": 0.5289160013198853, "learning_rate": 0.0005560140211294389, "loss": 3.3693, "step": 10685 }, { "epoch": 0.52, "grad_norm": 0.5228204131126404, "learning_rate": 0.0005560059949241392, "loss": 3.4617, "step": 10686 }, { "epoch": 0.52, "grad_norm": 0.49474701285362244, "learning_rate": 0.000555997968044569, "loss": 3.2549, "step": 10687 }, { "epoch": 0.52, "grad_norm": 0.5213100910186768, "learning_rate": 0.0005559899404907493, "loss": 3.2791, "step": 10688 }, { "epoch": 0.52, "grad_norm": 0.5579019784927368, "learning_rate": 0.0005559819122627016, "loss": 3.0853, "step": 10689 }, { "epoch": 0.52, "grad_norm": 0.5246168375015259, "learning_rate": 0.0005559738833604466, "loss": 3.1685, "step": 10690 }, { "epoch": 0.52, "grad_norm": 0.5287438631057739, "learning_rate": 0.0005559658537840058, "loss": 3.2581, "step": 10691 }, { "epoch": 0.52, "grad_norm": 0.576284646987915, "learning_rate": 0.0005559578235334002, "loss": 3.3451, "step": 10692 }, { "epoch": 0.52, "grad_norm": 0.5133945345878601, "learning_rate": 0.0005559497926086508, "loss": 3.2493, "step": 10693 }, { "epoch": 0.52, "grad_norm": 0.5034992098808289, "learning_rate": 0.000555941761009779, "loss": 3.3255, "step": 10694 }, { "epoch": 0.52, "grad_norm": 0.5439068078994751, "learning_rate": 0.0005559337287368058, "loss": 3.2783, "step": 10695 }, { "epoch": 0.52, "grad_norm": 0.5081217288970947, "learning_rate": 0.0005559256957897525, "loss": 3.2928, "step": 10696 }, { "epoch": 0.52, "grad_norm": 0.5664759278297424, "learning_rate": 0.0005559176621686402, "loss": 3.2541, "step": 10697 }, { "epoch": 0.52, "grad_norm": 0.5075600743293762, "learning_rate": 0.0005559096278734899, "loss": 3.1877, "step": 10698 }, { "epoch": 0.52, "grad_norm": 0.5304969549179077, "learning_rate": 0.000555901592904323, "loss": 3.347, "step": 10699 }, { "epoch": 0.52, "grad_norm": 0.5134689211845398, "learning_rate": 0.0005558935572611605, "loss": 3.2153, "step": 10700 }, { "epoch": 0.52, "grad_norm": 0.5504531264305115, "learning_rate": 0.0005558855209440236, "loss": 3.1492, "step": 10701 }, { "epoch": 0.52, "grad_norm": 0.5000514984130859, "learning_rate": 0.0005558774839529335, "loss": 3.472, "step": 10702 }, { "epoch": 0.52, "grad_norm": 0.532200276851654, "learning_rate": 0.0005558694462879113, "loss": 3.1464, "step": 10703 }, { "epoch": 0.52, "grad_norm": 0.5475481152534485, "learning_rate": 0.0005558614079489784, "loss": 2.9702, "step": 10704 }, { "epoch": 0.52, "grad_norm": 0.5486966967582703, "learning_rate": 0.0005558533689361557, "loss": 3.5168, "step": 10705 }, { "epoch": 0.52, "grad_norm": 0.5467349886894226, "learning_rate": 0.0005558453292494644, "loss": 3.4869, "step": 10706 }, { "epoch": 0.52, "grad_norm": 0.720486581325531, "learning_rate": 0.0005558372888889258, "loss": 3.0787, "step": 10707 }, { "epoch": 0.52, "grad_norm": 0.5721949338912964, "learning_rate": 0.0005558292478545611, "loss": 3.5098, "step": 10708 }, { "epoch": 0.52, "grad_norm": 0.4928334355354309, "learning_rate": 0.0005558212061463912, "loss": 3.5137, "step": 10709 }, { "epoch": 0.52, "grad_norm": 0.5080165863037109, "learning_rate": 0.0005558131637644376, "loss": 3.3198, "step": 10710 }, { "epoch": 0.52, "grad_norm": 0.5456277132034302, "learning_rate": 0.0005558051207087214, "loss": 3.2869, "step": 10711 }, { "epoch": 0.52, "grad_norm": 0.48766282200813293, "learning_rate": 0.0005557970769792636, "loss": 3.1597, "step": 10712 }, { "epoch": 0.53, "grad_norm": 0.5692100524902344, "learning_rate": 0.0005557890325760856, "loss": 3.2611, "step": 10713 }, { "epoch": 0.53, "grad_norm": 0.5719293355941772, "learning_rate": 0.0005557809874992086, "loss": 3.0606, "step": 10714 }, { "epoch": 0.53, "grad_norm": 0.5092300772666931, "learning_rate": 0.0005557729417486536, "loss": 3.1642, "step": 10715 }, { "epoch": 0.53, "grad_norm": 0.5151323676109314, "learning_rate": 0.0005557648953244419, "loss": 3.2684, "step": 10716 }, { "epoch": 0.53, "grad_norm": 0.48842179775238037, "learning_rate": 0.0005557568482265947, "loss": 3.3302, "step": 10717 }, { "epoch": 0.53, "grad_norm": 0.5149058699607849, "learning_rate": 0.0005557488004551332, "loss": 3.3089, "step": 10718 }, { "epoch": 0.53, "grad_norm": 0.5239120125770569, "learning_rate": 0.0005557407520100785, "loss": 3.2215, "step": 10719 }, { "epoch": 0.53, "grad_norm": 0.5721972584724426, "learning_rate": 0.000555732702891452, "loss": 3.2324, "step": 10720 }, { "epoch": 0.53, "grad_norm": 0.5425095558166504, "learning_rate": 0.0005557246530992748, "loss": 3.3185, "step": 10721 }, { "epoch": 0.53, "grad_norm": 0.5574762225151062, "learning_rate": 0.000555716602633568, "loss": 3.4277, "step": 10722 }, { "epoch": 0.53, "grad_norm": 0.5534297823905945, "learning_rate": 0.0005557085514943529, "loss": 3.3156, "step": 10723 }, { "epoch": 0.53, "grad_norm": 0.5003551244735718, "learning_rate": 0.0005557004996816507, "loss": 3.3441, "step": 10724 }, { "epoch": 0.53, "grad_norm": 0.5311870574951172, "learning_rate": 0.0005556924471954826, "loss": 3.235, "step": 10725 }, { "epoch": 0.53, "grad_norm": 0.5108062624931335, "learning_rate": 0.0005556843940358698, "loss": 3.1934, "step": 10726 }, { "epoch": 0.53, "grad_norm": 0.5077083110809326, "learning_rate": 0.0005556763402028334, "loss": 3.2491, "step": 10727 }, { "epoch": 0.53, "grad_norm": 0.5326809883117676, "learning_rate": 0.0005556682856963949, "loss": 3.3736, "step": 10728 }, { "epoch": 0.53, "grad_norm": 0.49083372950553894, "learning_rate": 0.0005556602305165752, "loss": 3.0318, "step": 10729 }, { "epoch": 0.53, "grad_norm": 0.49490422010421753, "learning_rate": 0.0005556521746633957, "loss": 3.1, "step": 10730 }, { "epoch": 0.53, "grad_norm": 0.48100271821022034, "learning_rate": 0.0005556441181368776, "loss": 3.2817, "step": 10731 }, { "epoch": 0.53, "grad_norm": 0.5259497761726379, "learning_rate": 0.0005556360609370421, "loss": 3.3625, "step": 10732 }, { "epoch": 0.53, "grad_norm": 0.5028544068336487, "learning_rate": 0.0005556280030639103, "loss": 3.2937, "step": 10733 }, { "epoch": 0.53, "grad_norm": 0.628638744354248, "learning_rate": 0.0005556199445175037, "loss": 3.2895, "step": 10734 }, { "epoch": 0.53, "grad_norm": 0.517248809337616, "learning_rate": 0.0005556118852978433, "loss": 3.4441, "step": 10735 }, { "epoch": 0.53, "grad_norm": 0.5241116285324097, "learning_rate": 0.0005556038254049503, "loss": 3.071, "step": 10736 }, { "epoch": 0.53, "grad_norm": 0.5196915864944458, "learning_rate": 0.0005555957648388461, "loss": 3.4301, "step": 10737 }, { "epoch": 0.53, "grad_norm": 0.5392457246780396, "learning_rate": 0.000555587703599552, "loss": 3.0426, "step": 10738 }, { "epoch": 0.53, "grad_norm": 0.49311745166778564, "learning_rate": 0.0005555796416870888, "loss": 3.3097, "step": 10739 }, { "epoch": 0.53, "grad_norm": 0.643158495426178, "learning_rate": 0.0005555715791014781, "loss": 3.2897, "step": 10740 }, { "epoch": 0.53, "grad_norm": 0.5130305290222168, "learning_rate": 0.0005555635158427412, "loss": 3.2176, "step": 10741 }, { "epoch": 0.53, "grad_norm": 0.5505951046943665, "learning_rate": 0.000555555451910899, "loss": 3.2898, "step": 10742 }, { "epoch": 0.53, "grad_norm": 0.5373878479003906, "learning_rate": 0.000555547387305973, "loss": 3.2094, "step": 10743 }, { "epoch": 0.53, "grad_norm": 0.4722157418727875, "learning_rate": 0.0005555393220279843, "loss": 3.1176, "step": 10744 }, { "epoch": 0.53, "grad_norm": 0.5164155960083008, "learning_rate": 0.0005555312560769542, "loss": 3.1844, "step": 10745 }, { "epoch": 0.53, "grad_norm": 0.49586886167526245, "learning_rate": 0.000555523189452904, "loss": 3.0007, "step": 10746 }, { "epoch": 0.53, "grad_norm": 0.5387834906578064, "learning_rate": 0.0005555151221558549, "loss": 3.2779, "step": 10747 }, { "epoch": 0.53, "grad_norm": 0.5266426205635071, "learning_rate": 0.0005555070541858281, "loss": 3.1793, "step": 10748 }, { "epoch": 0.53, "grad_norm": 0.5374503135681152, "learning_rate": 0.000555498985542845, "loss": 3.2667, "step": 10749 }, { "epoch": 0.53, "grad_norm": 0.5462484359741211, "learning_rate": 0.0005554909162269266, "loss": 3.189, "step": 10750 }, { "epoch": 0.53, "grad_norm": 0.5269687175750732, "learning_rate": 0.0005554828462380945, "loss": 3.3992, "step": 10751 }, { "epoch": 0.53, "grad_norm": 0.5425649881362915, "learning_rate": 0.0005554747755763695, "loss": 3.2771, "step": 10752 }, { "epoch": 0.53, "grad_norm": 0.5352199077606201, "learning_rate": 0.0005554667042417733, "loss": 3.5624, "step": 10753 }, { "epoch": 0.53, "grad_norm": 0.5189509987831116, "learning_rate": 0.000555458632234327, "loss": 3.2905, "step": 10754 }, { "epoch": 0.53, "grad_norm": 0.5382845401763916, "learning_rate": 0.0005554505595540516, "loss": 3.1669, "step": 10755 }, { "epoch": 0.53, "grad_norm": 0.5350643396377563, "learning_rate": 0.0005554424862009688, "loss": 3.202, "step": 10756 }, { "epoch": 0.53, "grad_norm": 0.5131836533546448, "learning_rate": 0.0005554344121750994, "loss": 3.1861, "step": 10757 }, { "epoch": 0.53, "grad_norm": 0.5483985543251038, "learning_rate": 0.0005554263374764651, "loss": 3.2958, "step": 10758 }, { "epoch": 0.53, "grad_norm": 0.5102946758270264, "learning_rate": 0.000555418262105087, "loss": 3.2614, "step": 10759 }, { "epoch": 0.53, "grad_norm": 0.5368705987930298, "learning_rate": 0.0005554101860609864, "loss": 3.3872, "step": 10760 }, { "epoch": 0.53, "grad_norm": 0.49759748578071594, "learning_rate": 0.0005554021093441844, "loss": 3.3422, "step": 10761 }, { "epoch": 0.53, "grad_norm": 0.5840714573860168, "learning_rate": 0.0005553940319547024, "loss": 3.3937, "step": 10762 }, { "epoch": 0.53, "grad_norm": 0.5512921214103699, "learning_rate": 0.0005553859538925617, "loss": 3.3002, "step": 10763 }, { "epoch": 0.53, "grad_norm": 0.49647557735443115, "learning_rate": 0.0005553778751577836, "loss": 3.3018, "step": 10764 }, { "epoch": 0.53, "grad_norm": 0.550879955291748, "learning_rate": 0.0005553697957503893, "loss": 3.3197, "step": 10765 }, { "epoch": 0.53, "grad_norm": 0.5185007452964783, "learning_rate": 0.0005553617156704001, "loss": 3.2382, "step": 10766 }, { "epoch": 0.53, "grad_norm": 0.510525643825531, "learning_rate": 0.0005553536349178372, "loss": 3.3413, "step": 10767 }, { "epoch": 0.53, "grad_norm": 0.5063631534576416, "learning_rate": 0.0005553455534927221, "loss": 3.1335, "step": 10768 }, { "epoch": 0.53, "grad_norm": 0.5001246929168701, "learning_rate": 0.000555337471395076, "loss": 3.1726, "step": 10769 }, { "epoch": 0.53, "grad_norm": 0.5542845129966736, "learning_rate": 0.00055532938862492, "loss": 3.1714, "step": 10770 }, { "epoch": 0.53, "grad_norm": 0.5402297377586365, "learning_rate": 0.0005553213051822755, "loss": 3.2763, "step": 10771 }, { "epoch": 0.53, "grad_norm": 0.5092653632164001, "learning_rate": 0.0005553132210671639, "loss": 3.2897, "step": 10772 }, { "epoch": 0.53, "grad_norm": 0.5712873935699463, "learning_rate": 0.0005553051362796064, "loss": 3.3383, "step": 10773 }, { "epoch": 0.53, "grad_norm": 0.5029581189155579, "learning_rate": 0.0005552970508196243, "loss": 3.1195, "step": 10774 }, { "epoch": 0.53, "grad_norm": 0.558640718460083, "learning_rate": 0.0005552889646872389, "loss": 3.1996, "step": 10775 }, { "epoch": 0.53, "grad_norm": 0.5104977488517761, "learning_rate": 0.0005552808778824715, "loss": 3.1922, "step": 10776 }, { "epoch": 0.53, "grad_norm": 0.4940243363380432, "learning_rate": 0.0005552727904053435, "loss": 2.9714, "step": 10777 }, { "epoch": 0.53, "grad_norm": 0.5271364450454712, "learning_rate": 0.0005552647022558759, "loss": 3.2721, "step": 10778 }, { "epoch": 0.53, "grad_norm": 0.5337794423103333, "learning_rate": 0.0005552566134340904, "loss": 3.3958, "step": 10779 }, { "epoch": 0.53, "grad_norm": 0.4681585133075714, "learning_rate": 0.000555248523940008, "loss": 3.3627, "step": 10780 }, { "epoch": 0.53, "grad_norm": 0.5498515367507935, "learning_rate": 0.0005552404337736501, "loss": 3.3218, "step": 10781 }, { "epoch": 0.53, "grad_norm": 0.4920918941497803, "learning_rate": 0.000555232342935038, "loss": 3.2476, "step": 10782 }, { "epoch": 0.53, "grad_norm": 0.5251589417457581, "learning_rate": 0.0005552242514241931, "loss": 3.4136, "step": 10783 }, { "epoch": 0.53, "grad_norm": 0.49055927991867065, "learning_rate": 0.0005552161592411366, "loss": 3.4924, "step": 10784 }, { "epoch": 0.53, "grad_norm": 0.5254443287849426, "learning_rate": 0.0005552080663858899, "loss": 3.1118, "step": 10785 }, { "epoch": 0.53, "grad_norm": 0.5273293852806091, "learning_rate": 0.0005551999728584742, "loss": 3.2376, "step": 10786 }, { "epoch": 0.53, "grad_norm": 0.46797534823417664, "learning_rate": 0.000555191878658911, "loss": 3.1969, "step": 10787 }, { "epoch": 0.53, "grad_norm": 0.5711906552314758, "learning_rate": 0.0005551837837872213, "loss": 3.3446, "step": 10788 }, { "epoch": 0.53, "grad_norm": 0.5303103923797607, "learning_rate": 0.0005551756882434268, "loss": 3.1252, "step": 10789 }, { "epoch": 0.53, "grad_norm": 0.515155017375946, "learning_rate": 0.0005551675920275486, "loss": 3.2225, "step": 10790 }, { "epoch": 0.53, "grad_norm": 0.5145278573036194, "learning_rate": 0.0005551594951396081, "loss": 3.5122, "step": 10791 }, { "epoch": 0.53, "grad_norm": 0.4963783025741577, "learning_rate": 0.0005551513975796265, "loss": 3.4213, "step": 10792 }, { "epoch": 0.53, "grad_norm": 0.5196395516395569, "learning_rate": 0.0005551432993476254, "loss": 3.2661, "step": 10793 }, { "epoch": 0.53, "grad_norm": 0.5125848054885864, "learning_rate": 0.0005551352004436258, "loss": 3.2938, "step": 10794 }, { "epoch": 0.53, "grad_norm": 0.5345282554626465, "learning_rate": 0.0005551271008676492, "loss": 3.1414, "step": 10795 }, { "epoch": 0.53, "grad_norm": 0.4970857799053192, "learning_rate": 0.0005551190006197169, "loss": 3.3573, "step": 10796 }, { "epoch": 0.53, "grad_norm": 0.49079298973083496, "learning_rate": 0.0005551108996998503, "loss": 3.3713, "step": 10797 }, { "epoch": 0.53, "grad_norm": 0.502100944519043, "learning_rate": 0.0005551027981080707, "loss": 3.2045, "step": 10798 }, { "epoch": 0.53, "grad_norm": 0.5889394879341125, "learning_rate": 0.0005550946958443994, "loss": 3.3565, "step": 10799 }, { "epoch": 0.53, "grad_norm": 0.5061367154121399, "learning_rate": 0.0005550865929088577, "loss": 3.349, "step": 10800 }, { "epoch": 0.53, "grad_norm": 0.5013979077339172, "learning_rate": 0.000555078489301467, "loss": 3.3525, "step": 10801 }, { "epoch": 0.53, "grad_norm": 0.49582046270370483, "learning_rate": 0.0005550703850222487, "loss": 3.2322, "step": 10802 }, { "epoch": 0.53, "grad_norm": 0.5003193020820618, "learning_rate": 0.0005550622800712242, "loss": 3.3309, "step": 10803 }, { "epoch": 0.53, "grad_norm": 0.5031837821006775, "learning_rate": 0.0005550541744484147, "loss": 3.1939, "step": 10804 }, { "epoch": 0.53, "grad_norm": 0.5380825400352478, "learning_rate": 0.0005550460681538415, "loss": 3.2945, "step": 10805 }, { "epoch": 0.53, "grad_norm": 0.5140364766120911, "learning_rate": 0.0005550379611875261, "loss": 3.3461, "step": 10806 }, { "epoch": 0.53, "grad_norm": 0.5664733052253723, "learning_rate": 0.0005550298535494898, "loss": 3.0924, "step": 10807 }, { "epoch": 0.53, "grad_norm": 0.5468794703483582, "learning_rate": 0.000555021745239754, "loss": 3.0776, "step": 10808 }, { "epoch": 0.53, "grad_norm": 0.5097938776016235, "learning_rate": 0.00055501363625834, "loss": 3.4317, "step": 10809 }, { "epoch": 0.53, "grad_norm": 0.5171865820884705, "learning_rate": 0.000555005526605269, "loss": 3.1843, "step": 10810 }, { "epoch": 0.53, "grad_norm": 0.544694721698761, "learning_rate": 0.0005549974162805626, "loss": 3.1868, "step": 10811 }, { "epoch": 0.53, "grad_norm": 0.4788232147693634, "learning_rate": 0.0005549893052842421, "loss": 3.4821, "step": 10812 }, { "epoch": 0.53, "grad_norm": 0.5630718469619751, "learning_rate": 0.0005549811936163288, "loss": 3.2703, "step": 10813 }, { "epoch": 0.53, "grad_norm": 0.5108457207679749, "learning_rate": 0.0005549730812768441, "loss": 3.2386, "step": 10814 }, { "epoch": 0.53, "grad_norm": 0.5172486305236816, "learning_rate": 0.0005549649682658096, "loss": 3.4601, "step": 10815 }, { "epoch": 0.53, "grad_norm": 0.5107886791229248, "learning_rate": 0.0005549568545832462, "loss": 3.1461, "step": 10816 }, { "epoch": 0.53, "grad_norm": 0.5882880091667175, "learning_rate": 0.0005549487402291756, "loss": 3.2976, "step": 10817 }, { "epoch": 0.53, "grad_norm": 0.5461258292198181, "learning_rate": 0.000554940625203619, "loss": 3.3833, "step": 10818 }, { "epoch": 0.53, "grad_norm": 0.5405899882316589, "learning_rate": 0.0005549325095065979, "loss": 3.3489, "step": 10819 }, { "epoch": 0.53, "grad_norm": 0.5153059959411621, "learning_rate": 0.0005549243931381336, "loss": 2.9776, "step": 10820 }, { "epoch": 0.53, "grad_norm": 0.49118930101394653, "learning_rate": 0.0005549162760982477, "loss": 3.0449, "step": 10821 }, { "epoch": 0.53, "grad_norm": 0.5056452751159668, "learning_rate": 0.0005549081583869612, "loss": 3.4947, "step": 10822 }, { "epoch": 0.53, "grad_norm": 0.5282566547393799, "learning_rate": 0.0005549000400042958, "loss": 3.3881, "step": 10823 }, { "epoch": 0.53, "grad_norm": 0.5301047563552856, "learning_rate": 0.0005548919209502726, "loss": 3.1563, "step": 10824 }, { "epoch": 0.53, "grad_norm": 0.5251234769821167, "learning_rate": 0.0005548838012249132, "loss": 3.2803, "step": 10825 }, { "epoch": 0.53, "grad_norm": 0.523571252822876, "learning_rate": 0.000554875680828239, "loss": 3.318, "step": 10826 }, { "epoch": 0.53, "grad_norm": 0.5182716846466064, "learning_rate": 0.0005548675597602711, "loss": 3.3921, "step": 10827 }, { "epoch": 0.53, "grad_norm": 0.5157158970832825, "learning_rate": 0.0005548594380210313, "loss": 3.0689, "step": 10828 }, { "epoch": 0.53, "grad_norm": 0.5221959948539734, "learning_rate": 0.0005548513156105407, "loss": 3.1948, "step": 10829 }, { "epoch": 0.53, "grad_norm": 0.49166935682296753, "learning_rate": 0.0005548431925288208, "loss": 3.1451, "step": 10830 }, { "epoch": 0.53, "grad_norm": 0.5118061900138855, "learning_rate": 0.000554835068775893, "loss": 3.2432, "step": 10831 }, { "epoch": 0.53, "grad_norm": 0.49349334836006165, "learning_rate": 0.0005548269443517787, "loss": 3.2898, "step": 10832 }, { "epoch": 0.53, "grad_norm": 0.5066019892692566, "learning_rate": 0.0005548188192564991, "loss": 3.2891, "step": 10833 }, { "epoch": 0.53, "grad_norm": 0.5938020944595337, "learning_rate": 0.000554810693490076, "loss": 3.2116, "step": 10834 }, { "epoch": 0.53, "grad_norm": 0.5334991216659546, "learning_rate": 0.0005548025670525304, "loss": 3.1081, "step": 10835 }, { "epoch": 0.53, "grad_norm": 0.502778947353363, "learning_rate": 0.000554794439943884, "loss": 3.233, "step": 10836 }, { "epoch": 0.53, "grad_norm": 0.5045210123062134, "learning_rate": 0.0005547863121641581, "loss": 3.314, "step": 10837 }, { "epoch": 0.53, "grad_norm": 0.5293318033218384, "learning_rate": 0.000554778183713374, "loss": 3.3784, "step": 10838 }, { "epoch": 0.53, "grad_norm": 0.518131673336029, "learning_rate": 0.0005547700545915531, "loss": 3.3426, "step": 10839 }, { "epoch": 0.53, "grad_norm": 0.4948231279850006, "learning_rate": 0.0005547619247987171, "loss": 3.0953, "step": 10840 }, { "epoch": 0.53, "grad_norm": 0.5470211505889893, "learning_rate": 0.0005547537943348871, "loss": 3.1605, "step": 10841 }, { "epoch": 0.53, "grad_norm": 0.5280324816703796, "learning_rate": 0.0005547456632000846, "loss": 3.2811, "step": 10842 }, { "epoch": 0.53, "grad_norm": 0.5659511089324951, "learning_rate": 0.0005547375313943312, "loss": 3.2445, "step": 10843 }, { "epoch": 0.53, "grad_norm": 0.5293766856193542, "learning_rate": 0.000554729398917648, "loss": 3.2949, "step": 10844 }, { "epoch": 0.53, "grad_norm": 0.5941742062568665, "learning_rate": 0.0005547212657700568, "loss": 3.202, "step": 10845 }, { "epoch": 0.53, "grad_norm": 0.5199886560440063, "learning_rate": 0.0005547131319515787, "loss": 3.1408, "step": 10846 }, { "epoch": 0.53, "grad_norm": 0.5088639259338379, "learning_rate": 0.0005547049974622351, "loss": 3.2688, "step": 10847 }, { "epoch": 0.53, "grad_norm": 0.5248528718948364, "learning_rate": 0.0005546968623020477, "loss": 3.2334, "step": 10848 }, { "epoch": 0.53, "grad_norm": 0.5154558420181274, "learning_rate": 0.0005546887264710377, "loss": 3.2585, "step": 10849 }, { "epoch": 0.53, "grad_norm": 0.5221732258796692, "learning_rate": 0.0005546805899692267, "loss": 3.2253, "step": 10850 }, { "epoch": 0.53, "grad_norm": 0.5415873527526855, "learning_rate": 0.000554672452796636, "loss": 3.1749, "step": 10851 }, { "epoch": 0.53, "grad_norm": 0.4975447952747345, "learning_rate": 0.000554664314953287, "loss": 3.0391, "step": 10852 }, { "epoch": 0.53, "grad_norm": 0.4918574094772339, "learning_rate": 0.0005546561764392014, "loss": 3.1746, "step": 10853 }, { "epoch": 0.53, "grad_norm": 0.516654372215271, "learning_rate": 0.0005546480372544003, "loss": 3.2713, "step": 10854 }, { "epoch": 0.53, "grad_norm": 0.5436745285987854, "learning_rate": 0.0005546398973989053, "loss": 3.0698, "step": 10855 }, { "epoch": 0.53, "grad_norm": 0.5498283505439758, "learning_rate": 0.0005546317568727379, "loss": 3.3623, "step": 10856 }, { "epoch": 0.53, "grad_norm": 0.5308879613876343, "learning_rate": 0.0005546236156759194, "loss": 3.2788, "step": 10857 }, { "epoch": 0.53, "grad_norm": 0.535848081111908, "learning_rate": 0.0005546154738084711, "loss": 3.3993, "step": 10858 }, { "epoch": 0.53, "grad_norm": 0.6059902310371399, "learning_rate": 0.0005546073312704149, "loss": 3.2072, "step": 10859 }, { "epoch": 0.53, "grad_norm": 0.5028658509254456, "learning_rate": 0.0005545991880617719, "loss": 3.2419, "step": 10860 }, { "epoch": 0.53, "grad_norm": 0.5084434747695923, "learning_rate": 0.0005545910441825636, "loss": 3.2826, "step": 10861 }, { "epoch": 0.53, "grad_norm": 0.5181224942207336, "learning_rate": 0.0005545828996328116, "loss": 3.1966, "step": 10862 }, { "epoch": 0.53, "grad_norm": 0.5151437520980835, "learning_rate": 0.0005545747544125371, "loss": 3.169, "step": 10863 }, { "epoch": 0.53, "grad_norm": 0.4924545884132385, "learning_rate": 0.0005545666085217619, "loss": 3.254, "step": 10864 }, { "epoch": 0.53, "grad_norm": 0.5671659708023071, "learning_rate": 0.000554558461960507, "loss": 3.4059, "step": 10865 }, { "epoch": 0.53, "grad_norm": 0.5134097337722778, "learning_rate": 0.0005545503147287941, "loss": 3.1668, "step": 10866 }, { "epoch": 0.53, "grad_norm": 0.5386476516723633, "learning_rate": 0.0005545421668266448, "loss": 3.2502, "step": 10867 }, { "epoch": 0.53, "grad_norm": 0.5015349984169006, "learning_rate": 0.0005545340182540805, "loss": 3.3447, "step": 10868 }, { "epoch": 0.53, "grad_norm": 0.4915396273136139, "learning_rate": 0.0005545258690111224, "loss": 3.302, "step": 10869 }, { "epoch": 0.53, "grad_norm": 0.5859756469726562, "learning_rate": 0.0005545177190977923, "loss": 3.1863, "step": 10870 }, { "epoch": 0.53, "grad_norm": 0.500820517539978, "learning_rate": 0.0005545095685141113, "loss": 3.3541, "step": 10871 }, { "epoch": 0.53, "grad_norm": 0.5108267068862915, "learning_rate": 0.0005545014172601014, "loss": 3.0616, "step": 10872 }, { "epoch": 0.53, "grad_norm": 0.5519191026687622, "learning_rate": 0.0005544932653357835, "loss": 3.3225, "step": 10873 }, { "epoch": 0.53, "grad_norm": 0.4729250967502594, "learning_rate": 0.0005544851127411793, "loss": 3.2311, "step": 10874 }, { "epoch": 0.53, "grad_norm": 0.5374777317047119, "learning_rate": 0.0005544769594763104, "loss": 3.1297, "step": 10875 }, { "epoch": 0.53, "grad_norm": 0.4953976273536682, "learning_rate": 0.0005544688055411981, "loss": 3.0189, "step": 10876 }, { "epoch": 0.53, "grad_norm": 0.6086330413818359, "learning_rate": 0.0005544606509358639, "loss": 3.0814, "step": 10877 }, { "epoch": 0.53, "grad_norm": 0.5771964192390442, "learning_rate": 0.0005544524956603294, "loss": 3.194, "step": 10878 }, { "epoch": 0.53, "grad_norm": 0.5086356997489929, "learning_rate": 0.000554444339714616, "loss": 3.1714, "step": 10879 }, { "epoch": 0.53, "grad_norm": 0.4892556071281433, "learning_rate": 0.0005544361830987451, "loss": 3.2823, "step": 10880 }, { "epoch": 0.53, "grad_norm": 0.5008576512336731, "learning_rate": 0.0005544280258127383, "loss": 3.2336, "step": 10881 }, { "epoch": 0.53, "grad_norm": 0.5014019012451172, "learning_rate": 0.0005544198678566171, "loss": 3.321, "step": 10882 }, { "epoch": 0.53, "grad_norm": 0.5316650867462158, "learning_rate": 0.0005544117092304029, "loss": 3.4712, "step": 10883 }, { "epoch": 0.53, "grad_norm": 0.4876498878002167, "learning_rate": 0.0005544035499341172, "loss": 3.4199, "step": 10884 }, { "epoch": 0.53, "grad_norm": 0.5168114304542542, "learning_rate": 0.0005543953899677815, "loss": 3.1632, "step": 10885 }, { "epoch": 0.53, "grad_norm": 0.5220661163330078, "learning_rate": 0.0005543872293314174, "loss": 3.0831, "step": 10886 }, { "epoch": 0.53, "grad_norm": 0.4988704025745392, "learning_rate": 0.0005543790680250461, "loss": 3.2857, "step": 10887 }, { "epoch": 0.53, "grad_norm": 0.4903241991996765, "learning_rate": 0.0005543709060486895, "loss": 3.2408, "step": 10888 }, { "epoch": 0.53, "grad_norm": 0.5027443170547485, "learning_rate": 0.0005543627434023688, "loss": 3.2762, "step": 10889 }, { "epoch": 0.53, "grad_norm": 0.4971694350242615, "learning_rate": 0.0005543545800861055, "loss": 3.1649, "step": 10890 }, { "epoch": 0.53, "grad_norm": 0.5153796672821045, "learning_rate": 0.0005543464160999214, "loss": 3.3079, "step": 10891 }, { "epoch": 0.53, "grad_norm": 0.5482482314109802, "learning_rate": 0.0005543382514438376, "loss": 3.0886, "step": 10892 }, { "epoch": 0.53, "grad_norm": 0.5379815101623535, "learning_rate": 0.0005543300861178759, "loss": 3.1402, "step": 10893 }, { "epoch": 0.53, "grad_norm": 0.5130733251571655, "learning_rate": 0.0005543219201220576, "loss": 3.5, "step": 10894 }, { "epoch": 0.53, "grad_norm": 0.47875428199768066, "learning_rate": 0.0005543137534564044, "loss": 3.1322, "step": 10895 }, { "epoch": 0.53, "grad_norm": 0.5776912569999695, "learning_rate": 0.0005543055861209376, "loss": 3.1212, "step": 10896 }, { "epoch": 0.53, "grad_norm": 0.5165498852729797, "learning_rate": 0.0005542974181156789, "loss": 3.1643, "step": 10897 }, { "epoch": 0.53, "grad_norm": 0.5151985883712769, "learning_rate": 0.0005542892494406499, "loss": 3.0556, "step": 10898 }, { "epoch": 0.53, "grad_norm": 0.5367438793182373, "learning_rate": 0.0005542810800958718, "loss": 3.2911, "step": 10899 }, { "epoch": 0.53, "grad_norm": 0.49575287103652954, "learning_rate": 0.0005542729100813662, "loss": 3.3014, "step": 10900 }, { "epoch": 0.53, "grad_norm": 0.5376871824264526, "learning_rate": 0.0005542647393971548, "loss": 3.2976, "step": 10901 }, { "epoch": 0.53, "grad_norm": 0.5368502140045166, "learning_rate": 0.000554256568043259, "loss": 3.4037, "step": 10902 }, { "epoch": 0.53, "grad_norm": 0.5264731049537659, "learning_rate": 0.0005542483960197002, "loss": 3.3177, "step": 10903 }, { "epoch": 0.53, "grad_norm": 0.5258291363716125, "learning_rate": 0.0005542402233265003, "loss": 3.0652, "step": 10904 }, { "epoch": 0.53, "grad_norm": 0.5016001462936401, "learning_rate": 0.0005542320499636804, "loss": 3.3955, "step": 10905 }, { "epoch": 0.53, "grad_norm": 0.5093668103218079, "learning_rate": 0.0005542238759312623, "loss": 3.1226, "step": 10906 }, { "epoch": 0.53, "grad_norm": 0.504332959651947, "learning_rate": 0.0005542157012292673, "loss": 3.2307, "step": 10907 }, { "epoch": 0.53, "grad_norm": 0.5304985642433167, "learning_rate": 0.0005542075258577172, "loss": 3.1603, "step": 10908 }, { "epoch": 0.53, "grad_norm": 0.5355924367904663, "learning_rate": 0.0005541993498166334, "loss": 3.4154, "step": 10909 }, { "epoch": 0.53, "grad_norm": 0.598781943321228, "learning_rate": 0.0005541911731060374, "loss": 3.4022, "step": 10910 }, { "epoch": 0.53, "grad_norm": 0.5250735282897949, "learning_rate": 0.0005541829957259508, "loss": 3.0233, "step": 10911 }, { "epoch": 0.53, "grad_norm": 0.5747032761573792, "learning_rate": 0.0005541748176763951, "loss": 3.1773, "step": 10912 }, { "epoch": 0.53, "grad_norm": 0.5299064517021179, "learning_rate": 0.0005541666389573918, "loss": 2.9314, "step": 10913 }, { "epoch": 0.53, "grad_norm": 0.4818550646305084, "learning_rate": 0.0005541584595689624, "loss": 3.2298, "step": 10914 }, { "epoch": 0.53, "grad_norm": 0.5289633274078369, "learning_rate": 0.0005541502795111288, "loss": 3.26, "step": 10915 }, { "epoch": 0.53, "grad_norm": 0.5091275572776794, "learning_rate": 0.000554142098783912, "loss": 3.2175, "step": 10916 }, { "epoch": 0.54, "grad_norm": 0.5333389639854431, "learning_rate": 0.000554133917387334, "loss": 3.6551, "step": 10917 }, { "epoch": 0.54, "grad_norm": 0.4829789400100708, "learning_rate": 0.0005541257353214161, "loss": 3.1711, "step": 10918 }, { "epoch": 0.54, "grad_norm": 0.5595918297767639, "learning_rate": 0.00055411755258618, "loss": 3.2935, "step": 10919 }, { "epoch": 0.54, "grad_norm": 0.5571411848068237, "learning_rate": 0.000554109369181647, "loss": 3.2141, "step": 10920 }, { "epoch": 0.54, "grad_norm": 0.534190833568573, "learning_rate": 0.000554101185107839, "loss": 3.3717, "step": 10921 }, { "epoch": 0.54, "grad_norm": 0.502724289894104, "learning_rate": 0.0005540930003647773, "loss": 3.2407, "step": 10922 }, { "epoch": 0.54, "grad_norm": 0.5559344291687012, "learning_rate": 0.0005540848149524835, "loss": 3.1769, "step": 10923 }, { "epoch": 0.54, "grad_norm": 0.5261440277099609, "learning_rate": 0.0005540766288709792, "loss": 3.3852, "step": 10924 }, { "epoch": 0.54, "grad_norm": 0.5682399272918701, "learning_rate": 0.000554068442120286, "loss": 3.2593, "step": 10925 }, { "epoch": 0.54, "grad_norm": 0.4791615605354309, "learning_rate": 0.0005540602547004255, "loss": 3.428, "step": 10926 }, { "epoch": 0.54, "grad_norm": 0.5266600847244263, "learning_rate": 0.000554052066611419, "loss": 3.3388, "step": 10927 }, { "epoch": 0.54, "grad_norm": 0.5460447072982788, "learning_rate": 0.0005540438778532885, "loss": 3.0518, "step": 10928 }, { "epoch": 0.54, "grad_norm": 0.534444272518158, "learning_rate": 0.0005540356884260551, "loss": 3.2383, "step": 10929 }, { "epoch": 0.54, "grad_norm": 0.5190091729164124, "learning_rate": 0.0005540274983297407, "loss": 3.2742, "step": 10930 }, { "epoch": 0.54, "grad_norm": 0.5553072094917297, "learning_rate": 0.0005540193075643668, "loss": 3.304, "step": 10931 }, { "epoch": 0.54, "grad_norm": 0.5311570167541504, "learning_rate": 0.0005540111161299547, "loss": 3.3071, "step": 10932 }, { "epoch": 0.54, "grad_norm": 0.523281455039978, "learning_rate": 0.0005540029240265265, "loss": 3.2171, "step": 10933 }, { "epoch": 0.54, "grad_norm": 0.5270448327064514, "learning_rate": 0.0005539947312541033, "loss": 3.3145, "step": 10934 }, { "epoch": 0.54, "grad_norm": 0.5168893933296204, "learning_rate": 0.0005539865378127069, "loss": 3.2543, "step": 10935 }, { "epoch": 0.54, "grad_norm": 0.5505146384239197, "learning_rate": 0.0005539783437023588, "loss": 3.0689, "step": 10936 }, { "epoch": 0.54, "grad_norm": 0.48953521251678467, "learning_rate": 0.0005539701489230807, "loss": 3.2071, "step": 10937 }, { "epoch": 0.54, "grad_norm": 0.5777466297149658, "learning_rate": 0.000553961953474894, "loss": 3.228, "step": 10938 }, { "epoch": 0.54, "grad_norm": 0.538305938243866, "learning_rate": 0.0005539537573578205, "loss": 3.2121, "step": 10939 }, { "epoch": 0.54, "grad_norm": 0.5043644905090332, "learning_rate": 0.0005539455605718817, "loss": 3.3311, "step": 10940 }, { "epoch": 0.54, "grad_norm": 0.5230538845062256, "learning_rate": 0.000553937363117099, "loss": 3.0433, "step": 10941 }, { "epoch": 0.54, "grad_norm": 0.49755704402923584, "learning_rate": 0.0005539291649934943, "loss": 3.2049, "step": 10942 }, { "epoch": 0.54, "grad_norm": 0.49073851108551025, "learning_rate": 0.0005539209662010889, "loss": 3.0794, "step": 10943 }, { "epoch": 0.54, "grad_norm": 0.5318127274513245, "learning_rate": 0.0005539127667399048, "loss": 3.3468, "step": 10944 }, { "epoch": 0.54, "grad_norm": 0.5030101537704468, "learning_rate": 0.0005539045666099632, "loss": 3.3754, "step": 10945 }, { "epoch": 0.54, "grad_norm": 0.5165680646896362, "learning_rate": 0.0005538963658112858, "loss": 3.102, "step": 10946 }, { "epoch": 0.54, "grad_norm": 0.48056358098983765, "learning_rate": 0.0005538881643438943, "loss": 3.3966, "step": 10947 }, { "epoch": 0.54, "grad_norm": 0.517524242401123, "learning_rate": 0.0005538799622078101, "loss": 3.3516, "step": 10948 }, { "epoch": 0.54, "grad_norm": 0.5031284689903259, "learning_rate": 0.000553871759403055, "loss": 3.0759, "step": 10949 }, { "epoch": 0.54, "grad_norm": 0.5510088801383972, "learning_rate": 0.0005538635559296507, "loss": 3.1852, "step": 10950 }, { "epoch": 0.54, "grad_norm": 0.48190462589263916, "learning_rate": 0.0005538553517876185, "loss": 3.5855, "step": 10951 }, { "epoch": 0.54, "grad_norm": 0.518423318862915, "learning_rate": 0.0005538471469769802, "loss": 3.4513, "step": 10952 }, { "epoch": 0.54, "grad_norm": 0.5671064257621765, "learning_rate": 0.0005538389414977573, "loss": 3.1931, "step": 10953 }, { "epoch": 0.54, "grad_norm": 0.5123745799064636, "learning_rate": 0.0005538307353499715, "loss": 3.2022, "step": 10954 }, { "epoch": 0.54, "grad_norm": 0.4641689658164978, "learning_rate": 0.0005538225285336445, "loss": 3.3417, "step": 10955 }, { "epoch": 0.54, "grad_norm": 0.5528790950775146, "learning_rate": 0.0005538143210487977, "loss": 3.3302, "step": 10956 }, { "epoch": 0.54, "grad_norm": 0.5319774150848389, "learning_rate": 0.000553806112895453, "loss": 3.1984, "step": 10957 }, { "epoch": 0.54, "grad_norm": 0.5352564454078674, "learning_rate": 0.0005537979040736317, "loss": 3.2959, "step": 10958 }, { "epoch": 0.54, "grad_norm": 0.5923522710800171, "learning_rate": 0.0005537896945833555, "loss": 3.2073, "step": 10959 }, { "epoch": 0.54, "grad_norm": 0.5047934651374817, "learning_rate": 0.0005537814844246462, "loss": 3.5024, "step": 10960 }, { "epoch": 0.54, "grad_norm": 0.5356642007827759, "learning_rate": 0.0005537732735975252, "loss": 3.2996, "step": 10961 }, { "epoch": 0.54, "grad_norm": 0.5496769547462463, "learning_rate": 0.0005537650621020143, "loss": 3.2524, "step": 10962 }, { "epoch": 0.54, "grad_norm": 0.5538159608840942, "learning_rate": 0.0005537568499381351, "loss": 3.1377, "step": 10963 }, { "epoch": 0.54, "grad_norm": 0.5073235630989075, "learning_rate": 0.0005537486371059092, "loss": 3.4908, "step": 10964 }, { "epoch": 0.54, "grad_norm": 0.5352749228477478, "learning_rate": 0.0005537404236053582, "loss": 3.3659, "step": 10965 }, { "epoch": 0.54, "grad_norm": 0.501549482345581, "learning_rate": 0.0005537322094365038, "loss": 3.4548, "step": 10966 }, { "epoch": 0.54, "grad_norm": 0.5620731115341187, "learning_rate": 0.0005537239945993675, "loss": 3.1772, "step": 10967 }, { "epoch": 0.54, "grad_norm": 0.5060946345329285, "learning_rate": 0.0005537157790939711, "loss": 3.1628, "step": 10968 }, { "epoch": 0.54, "grad_norm": 0.5136206150054932, "learning_rate": 0.0005537075629203361, "loss": 3.1592, "step": 10969 }, { "epoch": 0.54, "grad_norm": 0.5491225123405457, "learning_rate": 0.0005536993460784843, "loss": 3.1226, "step": 10970 }, { "epoch": 0.54, "grad_norm": 0.5510766506195068, "learning_rate": 0.0005536911285684372, "loss": 3.2825, "step": 10971 }, { "epoch": 0.54, "grad_norm": 0.59326171875, "learning_rate": 0.0005536829103902164, "loss": 3.2038, "step": 10972 }, { "epoch": 0.54, "grad_norm": 0.5343856811523438, "learning_rate": 0.0005536746915438438, "loss": 3.2069, "step": 10973 }, { "epoch": 0.54, "grad_norm": 0.524446964263916, "learning_rate": 0.0005536664720293408, "loss": 3.1553, "step": 10974 }, { "epoch": 0.54, "grad_norm": 0.5818180441856384, "learning_rate": 0.0005536582518467292, "loss": 3.2898, "step": 10975 }, { "epoch": 0.54, "grad_norm": 0.49687114357948303, "learning_rate": 0.0005536500309960304, "loss": 3.4254, "step": 10976 }, { "epoch": 0.54, "grad_norm": 0.5242406725883484, "learning_rate": 0.0005536418094772664, "loss": 3.1369, "step": 10977 }, { "epoch": 0.54, "grad_norm": 0.5363040566444397, "learning_rate": 0.0005536335872904587, "loss": 3.2857, "step": 10978 }, { "epoch": 0.54, "grad_norm": 0.5277572870254517, "learning_rate": 0.0005536253644356289, "loss": 3.2198, "step": 10979 }, { "epoch": 0.54, "grad_norm": 0.5251345634460449, "learning_rate": 0.0005536171409127986, "loss": 3.3776, "step": 10980 }, { "epoch": 0.54, "grad_norm": 0.4895996153354645, "learning_rate": 0.0005536089167219897, "loss": 3.1118, "step": 10981 }, { "epoch": 0.54, "grad_norm": 0.550209105014801, "learning_rate": 0.0005536006918632236, "loss": 3.3635, "step": 10982 }, { "epoch": 0.54, "grad_norm": 0.5238742232322693, "learning_rate": 0.0005535924663365221, "loss": 3.37, "step": 10983 }, { "epoch": 0.54, "grad_norm": 0.5147226452827454, "learning_rate": 0.0005535842401419071, "loss": 3.3436, "step": 10984 }, { "epoch": 0.54, "grad_norm": 0.5709726214408875, "learning_rate": 0.0005535760132793997, "loss": 3.2941, "step": 10985 }, { "epoch": 0.54, "grad_norm": 0.5367663502693176, "learning_rate": 0.000553567785749022, "loss": 3.2, "step": 10986 }, { "epoch": 0.54, "grad_norm": 0.5051316022872925, "learning_rate": 0.0005535595575507955, "loss": 3.2523, "step": 10987 }, { "epoch": 0.54, "grad_norm": 0.502051830291748, "learning_rate": 0.0005535513286847421, "loss": 3.2888, "step": 10988 }, { "epoch": 0.54, "grad_norm": 0.5447306632995605, "learning_rate": 0.0005535430991508831, "loss": 3.2808, "step": 10989 }, { "epoch": 0.54, "grad_norm": 0.5220550894737244, "learning_rate": 0.0005535348689492404, "loss": 3.1534, "step": 10990 }, { "epoch": 0.54, "grad_norm": 0.6093209385871887, "learning_rate": 0.0005535266380798358, "loss": 3.1002, "step": 10991 }, { "epoch": 0.54, "grad_norm": 0.51113361120224, "learning_rate": 0.0005535184065426907, "loss": 3.2006, "step": 10992 }, { "epoch": 0.54, "grad_norm": 0.5078108310699463, "learning_rate": 0.000553510174337827, "loss": 3.1692, "step": 10993 }, { "epoch": 0.54, "grad_norm": 0.5096676349639893, "learning_rate": 0.0005535019414652662, "loss": 3.2649, "step": 10994 }, { "epoch": 0.54, "grad_norm": 0.6298714280128479, "learning_rate": 0.0005534937079250301, "loss": 3.0877, "step": 10995 }, { "epoch": 0.54, "grad_norm": 0.5195075869560242, "learning_rate": 0.0005534854737171402, "loss": 3.3914, "step": 10996 }, { "epoch": 0.54, "grad_norm": 0.5262744426727295, "learning_rate": 0.0005534772388416186, "loss": 3.3941, "step": 10997 }, { "epoch": 0.54, "grad_norm": 0.5323526263237, "learning_rate": 0.0005534690032984866, "loss": 3.2896, "step": 10998 }, { "epoch": 0.54, "grad_norm": 0.567234456539154, "learning_rate": 0.0005534607670877661, "loss": 3.1214, "step": 10999 }, { "epoch": 0.54, "grad_norm": 0.49547919631004333, "learning_rate": 0.0005534525302094787, "loss": 3.4712, "step": 11000 }, { "epoch": 0.54, "grad_norm": 0.5253562331199646, "learning_rate": 0.0005534442926636461, "loss": 3.4296, "step": 11001 }, { "epoch": 0.54, "grad_norm": 0.536155104637146, "learning_rate": 0.00055343605445029, "loss": 3.2442, "step": 11002 }, { "epoch": 0.54, "grad_norm": 0.5386752486228943, "learning_rate": 0.0005534278155694321, "loss": 3.1203, "step": 11003 }, { "epoch": 0.54, "grad_norm": 0.5196104049682617, "learning_rate": 0.0005534195760210941, "loss": 3.2681, "step": 11004 }, { "epoch": 0.54, "grad_norm": 0.5381636023521423, "learning_rate": 0.0005534113358052977, "loss": 3.2436, "step": 11005 }, { "epoch": 0.54, "grad_norm": 0.5484285950660706, "learning_rate": 0.0005534030949220646, "loss": 3.2857, "step": 11006 }, { "epoch": 0.54, "grad_norm": 0.5643191933631897, "learning_rate": 0.0005533948533714166, "loss": 3.2674, "step": 11007 }, { "epoch": 0.54, "grad_norm": 0.5526049733161926, "learning_rate": 0.0005533866111533753, "loss": 3.2277, "step": 11008 }, { "epoch": 0.54, "grad_norm": 0.5223939418792725, "learning_rate": 0.0005533783682679624, "loss": 3.3033, "step": 11009 }, { "epoch": 0.54, "grad_norm": 0.5286209583282471, "learning_rate": 0.0005533701247151996, "loss": 3.2965, "step": 11010 }, { "epoch": 0.54, "grad_norm": 0.5266342759132385, "learning_rate": 0.0005533618804951087, "loss": 3.2701, "step": 11011 }, { "epoch": 0.54, "grad_norm": 0.5430400967597961, "learning_rate": 0.0005533536356077113, "loss": 3.0469, "step": 11012 }, { "epoch": 0.54, "grad_norm": 0.49393025040626526, "learning_rate": 0.0005533453900530294, "loss": 3.1746, "step": 11013 }, { "epoch": 0.54, "grad_norm": 0.5422281622886658, "learning_rate": 0.0005533371438310843, "loss": 3.3271, "step": 11014 }, { "epoch": 0.54, "grad_norm": 0.5507173538208008, "learning_rate": 0.000553328896941898, "loss": 3.2458, "step": 11015 }, { "epoch": 0.54, "grad_norm": 0.4943542778491974, "learning_rate": 0.000553320649385492, "loss": 3.2087, "step": 11016 }, { "epoch": 0.54, "grad_norm": 0.5512304902076721, "learning_rate": 0.0005533124011618884, "loss": 3.1788, "step": 11017 }, { "epoch": 0.54, "grad_norm": 0.45192670822143555, "learning_rate": 0.0005533041522711085, "loss": 3.1242, "step": 11018 }, { "epoch": 0.54, "grad_norm": 0.5453776717185974, "learning_rate": 0.0005532959027131743, "loss": 3.3347, "step": 11019 }, { "epoch": 0.54, "grad_norm": 0.5177527070045471, "learning_rate": 0.0005532876524881075, "loss": 3.2072, "step": 11020 }, { "epoch": 0.54, "grad_norm": 0.5608527660369873, "learning_rate": 0.0005532794015959296, "loss": 3.2669, "step": 11021 }, { "epoch": 0.54, "grad_norm": 0.5439431667327881, "learning_rate": 0.0005532711500366625, "loss": 3.1555, "step": 11022 }, { "epoch": 0.54, "grad_norm": 0.5050414204597473, "learning_rate": 0.000553262897810328, "loss": 3.3673, "step": 11023 }, { "epoch": 0.54, "grad_norm": 0.6056579947471619, "learning_rate": 0.0005532546449169478, "loss": 3.0847, "step": 11024 }, { "epoch": 0.54, "grad_norm": 0.50806725025177, "learning_rate": 0.0005532463913565436, "loss": 3.1318, "step": 11025 }, { "epoch": 0.54, "grad_norm": 0.47920647263526917, "learning_rate": 0.0005532381371291372, "loss": 3.307, "step": 11026 }, { "epoch": 0.54, "grad_norm": 0.49545514583587646, "learning_rate": 0.0005532298822347501, "loss": 3.369, "step": 11027 }, { "epoch": 0.54, "grad_norm": 0.4789004921913147, "learning_rate": 0.0005532216266734044, "loss": 3.2807, "step": 11028 }, { "epoch": 0.54, "grad_norm": 0.5694847106933594, "learning_rate": 0.0005532133704451216, "loss": 3.2739, "step": 11029 }, { "epoch": 0.54, "grad_norm": 0.5591051578521729, "learning_rate": 0.0005532051135499236, "loss": 3.2335, "step": 11030 }, { "epoch": 0.54, "grad_norm": 0.5124754309654236, "learning_rate": 0.000553196855987832, "loss": 3.0694, "step": 11031 }, { "epoch": 0.54, "grad_norm": 0.5239121317863464, "learning_rate": 0.0005531885977588686, "loss": 3.3521, "step": 11032 }, { "epoch": 0.54, "grad_norm": 0.5368714332580566, "learning_rate": 0.0005531803388630551, "loss": 3.2614, "step": 11033 }, { "epoch": 0.54, "grad_norm": 0.5138024687767029, "learning_rate": 0.0005531720793004135, "loss": 3.2224, "step": 11034 }, { "epoch": 0.54, "grad_norm": 0.5147234201431274, "learning_rate": 0.0005531638190709651, "loss": 3.2612, "step": 11035 }, { "epoch": 0.54, "grad_norm": 0.5324978828430176, "learning_rate": 0.0005531555581747321, "loss": 3.4883, "step": 11036 }, { "epoch": 0.54, "grad_norm": 0.501106321811676, "learning_rate": 0.0005531472966117361, "loss": 3.2922, "step": 11037 }, { "epoch": 0.54, "grad_norm": 0.5278347134590149, "learning_rate": 0.0005531390343819987, "loss": 3.2189, "step": 11038 }, { "epoch": 0.54, "grad_norm": 0.5315659046173096, "learning_rate": 0.0005531307714855419, "loss": 3.2379, "step": 11039 }, { "epoch": 0.54, "grad_norm": 0.523786187171936, "learning_rate": 0.0005531225079223874, "loss": 3.2896, "step": 11040 }, { "epoch": 0.54, "grad_norm": 0.5235302448272705, "learning_rate": 0.0005531142436925569, "loss": 3.5168, "step": 11041 }, { "epoch": 0.54, "grad_norm": 0.492496132850647, "learning_rate": 0.0005531059787960721, "loss": 3.2794, "step": 11042 }, { "epoch": 0.54, "grad_norm": 0.5163391828536987, "learning_rate": 0.0005530977132329549, "loss": 3.0546, "step": 11043 }, { "epoch": 0.54, "grad_norm": 0.5126833915710449, "learning_rate": 0.0005530894470032271, "loss": 3.1981, "step": 11044 }, { "epoch": 0.54, "grad_norm": 0.5161365270614624, "learning_rate": 0.0005530811801069104, "loss": 3.2133, "step": 11045 }, { "epoch": 0.54, "grad_norm": 0.5676143169403076, "learning_rate": 0.0005530729125440265, "loss": 3.117, "step": 11046 }, { "epoch": 0.54, "grad_norm": 0.5352276563644409, "learning_rate": 0.0005530646443145973, "loss": 3.2627, "step": 11047 }, { "epoch": 0.54, "grad_norm": 0.5018179416656494, "learning_rate": 0.0005530563754186444, "loss": 3.2552, "step": 11048 }, { "epoch": 0.54, "grad_norm": 0.5012510418891907, "learning_rate": 0.0005530481058561899, "loss": 3.2785, "step": 11049 }, { "epoch": 0.54, "grad_norm": 0.5244871973991394, "learning_rate": 0.0005530398356272554, "loss": 3.3138, "step": 11050 }, { "epoch": 0.54, "grad_norm": 0.5109242796897888, "learning_rate": 0.0005530315647318626, "loss": 3.3587, "step": 11051 }, { "epoch": 0.54, "grad_norm": 0.5416744351387024, "learning_rate": 0.0005530232931700333, "loss": 3.182, "step": 11052 }, { "epoch": 0.54, "grad_norm": 0.5090746283531189, "learning_rate": 0.0005530150209417894, "loss": 3.1963, "step": 11053 }, { "epoch": 0.54, "grad_norm": 0.5169641971588135, "learning_rate": 0.0005530067480471526, "loss": 3.3494, "step": 11054 }, { "epoch": 0.54, "grad_norm": 0.4892723560333252, "learning_rate": 0.0005529984744861448, "loss": 3.3338, "step": 11055 }, { "epoch": 0.54, "grad_norm": 0.5152621865272522, "learning_rate": 0.0005529902002587877, "loss": 3.3415, "step": 11056 }, { "epoch": 0.54, "grad_norm": 0.4981408417224884, "learning_rate": 0.000552981925365103, "loss": 3.2972, "step": 11057 }, { "epoch": 0.54, "grad_norm": 0.5364402532577515, "learning_rate": 0.0005529736498051127, "loss": 3.2753, "step": 11058 }, { "epoch": 0.54, "grad_norm": 0.4937974214553833, "learning_rate": 0.0005529653735788383, "loss": 3.2908, "step": 11059 }, { "epoch": 0.54, "grad_norm": 0.5432580709457397, "learning_rate": 0.0005529570966863021, "loss": 3.2897, "step": 11060 }, { "epoch": 0.54, "grad_norm": 0.5275623798370361, "learning_rate": 0.0005529488191275253, "loss": 3.3915, "step": 11061 }, { "epoch": 0.54, "grad_norm": 0.5411865711212158, "learning_rate": 0.0005529405409025302, "loss": 3.2582, "step": 11062 }, { "epoch": 0.54, "grad_norm": 0.5429436564445496, "learning_rate": 0.0005529322620113382, "loss": 3.267, "step": 11063 }, { "epoch": 0.54, "grad_norm": 0.5083034038543701, "learning_rate": 0.0005529239824539715, "loss": 3.3395, "step": 11064 }, { "epoch": 0.54, "grad_norm": 0.49724969267845154, "learning_rate": 0.0005529157022304516, "loss": 3.0817, "step": 11065 }, { "epoch": 0.54, "grad_norm": 0.48776689171791077, "learning_rate": 0.0005529074213408004, "loss": 3.2054, "step": 11066 }, { "epoch": 0.54, "grad_norm": 0.4902147054672241, "learning_rate": 0.0005528991397850397, "loss": 3.3422, "step": 11067 }, { "epoch": 0.54, "grad_norm": 0.5294910073280334, "learning_rate": 0.0005528908575631914, "loss": 3.3306, "step": 11068 }, { "epoch": 0.54, "grad_norm": 0.5070090293884277, "learning_rate": 0.0005528825746752773, "loss": 3.4023, "step": 11069 }, { "epoch": 0.54, "grad_norm": 0.5285083055496216, "learning_rate": 0.0005528742911213191, "loss": 3.2324, "step": 11070 }, { "epoch": 0.54, "grad_norm": 0.513079822063446, "learning_rate": 0.0005528660069013387, "loss": 3.2539, "step": 11071 }, { "epoch": 0.54, "grad_norm": 0.5381377339363098, "learning_rate": 0.0005528577220153579, "loss": 3.4106, "step": 11072 }, { "epoch": 0.54, "grad_norm": 0.4675285518169403, "learning_rate": 0.0005528494364633985, "loss": 3.4012, "step": 11073 }, { "epoch": 0.54, "grad_norm": 0.5249191522598267, "learning_rate": 0.0005528411502454824, "loss": 3.1627, "step": 11074 }, { "epoch": 0.54, "grad_norm": 0.49168458580970764, "learning_rate": 0.0005528328633616313, "loss": 3.2428, "step": 11075 }, { "epoch": 0.54, "grad_norm": 0.4863361716270447, "learning_rate": 0.0005528245758118671, "loss": 3.2086, "step": 11076 }, { "epoch": 0.54, "grad_norm": 0.5234729647636414, "learning_rate": 0.0005528162875962117, "loss": 3.4566, "step": 11077 }, { "epoch": 0.54, "grad_norm": 0.5098854303359985, "learning_rate": 0.0005528079987146868, "loss": 3.3018, "step": 11078 }, { "epoch": 0.54, "grad_norm": 0.5080091953277588, "learning_rate": 0.0005527997091673143, "loss": 3.3924, "step": 11079 }, { "epoch": 0.54, "grad_norm": 0.5155888795852661, "learning_rate": 0.000552791418954116, "loss": 3.1819, "step": 11080 }, { "epoch": 0.54, "grad_norm": 0.5143377780914307, "learning_rate": 0.0005527831280751138, "loss": 3.5005, "step": 11081 }, { "epoch": 0.54, "grad_norm": 0.5565736293792725, "learning_rate": 0.0005527748365303295, "loss": 3.198, "step": 11082 }, { "epoch": 0.54, "grad_norm": 0.5252052545547485, "learning_rate": 0.0005527665443197849, "loss": 3.1299, "step": 11083 }, { "epoch": 0.54, "grad_norm": 0.5647584199905396, "learning_rate": 0.0005527582514435017, "loss": 3.5219, "step": 11084 }, { "epoch": 0.54, "grad_norm": 0.581468939781189, "learning_rate": 0.0005527499579015021, "loss": 3.3298, "step": 11085 }, { "epoch": 0.54, "grad_norm": 0.5266270041465759, "learning_rate": 0.0005527416636938077, "loss": 3.1852, "step": 11086 }, { "epoch": 0.54, "grad_norm": 0.5025612115859985, "learning_rate": 0.0005527333688204405, "loss": 3.3013, "step": 11087 }, { "epoch": 0.54, "grad_norm": 0.5126590132713318, "learning_rate": 0.0005527250732814222, "loss": 3.4074, "step": 11088 }, { "epoch": 0.54, "grad_norm": 0.5125699043273926, "learning_rate": 0.0005527167770767746, "loss": 3.0508, "step": 11089 }, { "epoch": 0.54, "grad_norm": 0.5985158085823059, "learning_rate": 0.0005527084802065197, "loss": 3.5684, "step": 11090 }, { "epoch": 0.54, "grad_norm": 0.4821082353591919, "learning_rate": 0.0005527001826706793, "loss": 3.2999, "step": 11091 }, { "epoch": 0.54, "grad_norm": 0.5150079131126404, "learning_rate": 0.0005526918844692752, "loss": 3.3448, "step": 11092 }, { "epoch": 0.54, "grad_norm": 0.5428050756454468, "learning_rate": 0.0005526835856023294, "loss": 3.0969, "step": 11093 }, { "epoch": 0.54, "grad_norm": 0.5639786720275879, "learning_rate": 0.0005526752860698636, "loss": 3.2585, "step": 11094 }, { "epoch": 0.54, "grad_norm": 0.5006402730941772, "learning_rate": 0.0005526669858718998, "loss": 3.2274, "step": 11095 }, { "epoch": 0.54, "grad_norm": 0.5905807614326477, "learning_rate": 0.0005526586850084596, "loss": 3.3441, "step": 11096 }, { "epoch": 0.54, "grad_norm": 0.5085511207580566, "learning_rate": 0.0005526503834795654, "loss": 3.416, "step": 11097 }, { "epoch": 0.54, "grad_norm": 0.54239422082901, "learning_rate": 0.0005526420812852384, "loss": 2.8477, "step": 11098 }, { "epoch": 0.54, "grad_norm": 0.5083803534507751, "learning_rate": 0.0005526337784255009, "loss": 3.3743, "step": 11099 }, { "epoch": 0.54, "grad_norm": 0.4984433352947235, "learning_rate": 0.0005526254749003746, "loss": 3.4834, "step": 11100 }, { "epoch": 0.54, "grad_norm": 0.5210543870925903, "learning_rate": 0.0005526171707098814, "loss": 3.336, "step": 11101 }, { "epoch": 0.54, "grad_norm": 0.5217922925949097, "learning_rate": 0.0005526088658540433, "loss": 3.1881, "step": 11102 }, { "epoch": 0.54, "grad_norm": 0.4963856339454651, "learning_rate": 0.000552600560332882, "loss": 3.1313, "step": 11103 }, { "epoch": 0.54, "grad_norm": 0.5491219758987427, "learning_rate": 0.0005525922541464195, "loss": 3.2162, "step": 11104 }, { "epoch": 0.54, "grad_norm": 0.5014486908912659, "learning_rate": 0.0005525839472946774, "loss": 3.2393, "step": 11105 }, { "epoch": 0.54, "grad_norm": 0.5988680720329285, "learning_rate": 0.000552575639777678, "loss": 3.3308, "step": 11106 }, { "epoch": 0.54, "grad_norm": 0.5203366279602051, "learning_rate": 0.0005525673315954428, "loss": 3.3049, "step": 11107 }, { "epoch": 0.54, "grad_norm": 0.5146699547767639, "learning_rate": 0.0005525590227479941, "loss": 3.0487, "step": 11108 }, { "epoch": 0.54, "grad_norm": 0.5197408199310303, "learning_rate": 0.0005525507132353533, "loss": 3.1794, "step": 11109 }, { "epoch": 0.54, "grad_norm": 0.5432579517364502, "learning_rate": 0.0005525424030575427, "loss": 3.0737, "step": 11110 }, { "epoch": 0.54, "grad_norm": 0.5454299449920654, "learning_rate": 0.000552534092214584, "loss": 3.1482, "step": 11111 }, { "epoch": 0.54, "grad_norm": 0.5839552283287048, "learning_rate": 0.000552525780706499, "loss": 3.2161, "step": 11112 }, { "epoch": 0.54, "grad_norm": 0.5181378126144409, "learning_rate": 0.0005525174685333098, "loss": 3.2954, "step": 11113 }, { "epoch": 0.54, "grad_norm": 0.5586148500442505, "learning_rate": 0.0005525091556950381, "loss": 3.1837, "step": 11114 }, { "epoch": 0.54, "grad_norm": 0.5175696015357971, "learning_rate": 0.000552500842191706, "loss": 3.1286, "step": 11115 }, { "epoch": 0.54, "grad_norm": 0.5837456583976746, "learning_rate": 0.0005524925280233351, "loss": 3.1588, "step": 11116 }, { "epoch": 0.54, "grad_norm": 0.5126967430114746, "learning_rate": 0.0005524842131899476, "loss": 3.4051, "step": 11117 }, { "epoch": 0.54, "grad_norm": 0.49251213669776917, "learning_rate": 0.0005524758976915652, "loss": 3.2309, "step": 11118 }, { "epoch": 0.54, "grad_norm": 0.5178259015083313, "learning_rate": 0.0005524675815282099, "loss": 3.2385, "step": 11119 }, { "epoch": 0.54, "grad_norm": 0.5161910653114319, "learning_rate": 0.0005524592646999035, "loss": 3.1784, "step": 11120 }, { "epoch": 0.55, "grad_norm": 0.5630325078964233, "learning_rate": 0.0005524509472066683, "loss": 3.0025, "step": 11121 }, { "epoch": 0.55, "grad_norm": 0.5052022337913513, "learning_rate": 0.0005524426290485255, "loss": 3.1637, "step": 11122 }, { "epoch": 0.55, "grad_norm": 0.5926064252853394, "learning_rate": 0.0005524343102254976, "loss": 3.3637, "step": 11123 }, { "epoch": 0.55, "grad_norm": 0.5273416638374329, "learning_rate": 0.0005524259907376063, "loss": 3.2147, "step": 11124 }, { "epoch": 0.55, "grad_norm": 0.552270233631134, "learning_rate": 0.0005524176705848735, "loss": 3.1198, "step": 11125 }, { "epoch": 0.55, "grad_norm": 0.5399708151817322, "learning_rate": 0.0005524093497673211, "loss": 3.0797, "step": 11126 }, { "epoch": 0.55, "grad_norm": 0.5599607229232788, "learning_rate": 0.000552401028284971, "loss": 3.0212, "step": 11127 }, { "epoch": 0.55, "grad_norm": 0.536972165107727, "learning_rate": 0.0005523927061378453, "loss": 2.9896, "step": 11128 }, { "epoch": 0.55, "grad_norm": 0.5732285976409912, "learning_rate": 0.0005523843833259658, "loss": 3.1458, "step": 11129 }, { "epoch": 0.55, "grad_norm": 0.5276511311531067, "learning_rate": 0.0005523760598493544, "loss": 3.5314, "step": 11130 }, { "epoch": 0.55, "grad_norm": 0.506087064743042, "learning_rate": 0.0005523677357080329, "loss": 3.3586, "step": 11131 }, { "epoch": 0.55, "grad_norm": 0.5099312663078308, "learning_rate": 0.0005523594109020233, "loss": 3.328, "step": 11132 }, { "epoch": 0.55, "grad_norm": 0.51421719789505, "learning_rate": 0.0005523510854313478, "loss": 3.3486, "step": 11133 }, { "epoch": 0.55, "grad_norm": 0.5033894181251526, "learning_rate": 0.000552342759296028, "loss": 3.2523, "step": 11134 }, { "epoch": 0.55, "grad_norm": 0.5557559132575989, "learning_rate": 0.000552334432496086, "loss": 3.4129, "step": 11135 }, { "epoch": 0.55, "grad_norm": 0.5153203010559082, "learning_rate": 0.0005523261050315435, "loss": 3.4043, "step": 11136 }, { "epoch": 0.55, "grad_norm": 0.5677420496940613, "learning_rate": 0.0005523177769024228, "loss": 3.0835, "step": 11137 }, { "epoch": 0.55, "grad_norm": 0.555735170841217, "learning_rate": 0.0005523094481087455, "loss": 3.2663, "step": 11138 }, { "epoch": 0.55, "grad_norm": 0.5250891447067261, "learning_rate": 0.0005523011186505338, "loss": 3.1598, "step": 11139 }, { "epoch": 0.55, "grad_norm": 0.501929759979248, "learning_rate": 0.0005522927885278094, "loss": 3.3389, "step": 11140 }, { "epoch": 0.55, "grad_norm": 0.5323585271835327, "learning_rate": 0.0005522844577405943, "loss": 3.2071, "step": 11141 }, { "epoch": 0.55, "grad_norm": 0.5253964066505432, "learning_rate": 0.0005522761262889106, "loss": 3.0697, "step": 11142 }, { "epoch": 0.55, "grad_norm": 0.5121307969093323, "learning_rate": 0.0005522677941727801, "loss": 3.1454, "step": 11143 }, { "epoch": 0.55, "grad_norm": 0.5765665173530579, "learning_rate": 0.0005522594613922248, "loss": 3.3376, "step": 11144 }, { "epoch": 0.55, "grad_norm": 0.5566869974136353, "learning_rate": 0.0005522511279472666, "loss": 3.2357, "step": 11145 }, { "epoch": 0.55, "grad_norm": 0.540515124797821, "learning_rate": 0.0005522427938379275, "loss": 3.2131, "step": 11146 }, { "epoch": 0.55, "grad_norm": 0.5782142877578735, "learning_rate": 0.0005522344590642294, "loss": 3.3415, "step": 11147 }, { "epoch": 0.55, "grad_norm": 0.5115688443183899, "learning_rate": 0.0005522261236261943, "loss": 3.1543, "step": 11148 }, { "epoch": 0.55, "grad_norm": 0.5313143730163574, "learning_rate": 0.0005522177875238441, "loss": 3.3948, "step": 11149 }, { "epoch": 0.55, "grad_norm": 0.563582718372345, "learning_rate": 0.0005522094507572009, "loss": 3.2437, "step": 11150 }, { "epoch": 0.55, "grad_norm": 0.514954686164856, "learning_rate": 0.0005522011133262864, "loss": 3.3182, "step": 11151 }, { "epoch": 0.55, "grad_norm": 0.5384813547134399, "learning_rate": 0.0005521927752311227, "loss": 3.309, "step": 11152 }, { "epoch": 0.55, "grad_norm": 0.5301558971405029, "learning_rate": 0.0005521844364717319, "loss": 3.1738, "step": 11153 }, { "epoch": 0.55, "grad_norm": 0.5261275768280029, "learning_rate": 0.0005521760970481357, "loss": 3.2791, "step": 11154 }, { "epoch": 0.55, "grad_norm": 0.539004385471344, "learning_rate": 0.0005521677569603563, "loss": 3.172, "step": 11155 }, { "epoch": 0.55, "grad_norm": 0.5053558945655823, "learning_rate": 0.0005521594162084155, "loss": 3.207, "step": 11156 }, { "epoch": 0.55, "grad_norm": 0.4946291744709015, "learning_rate": 0.0005521510747923353, "loss": 3.1878, "step": 11157 }, { "epoch": 0.55, "grad_norm": 0.5328413844108582, "learning_rate": 0.0005521427327121376, "loss": 3.2071, "step": 11158 }, { "epoch": 0.55, "grad_norm": 0.49959659576416016, "learning_rate": 0.0005521343899678447, "loss": 3.3371, "step": 11159 }, { "epoch": 0.55, "grad_norm": 0.535815417766571, "learning_rate": 0.0005521260465594782, "loss": 3.2255, "step": 11160 }, { "epoch": 0.55, "grad_norm": 0.5187474489212036, "learning_rate": 0.0005521177024870602, "loss": 3.2528, "step": 11161 }, { "epoch": 0.55, "grad_norm": 0.5385481715202332, "learning_rate": 0.0005521093577506128, "loss": 3.2155, "step": 11162 }, { "epoch": 0.55, "grad_norm": 0.5287864804267883, "learning_rate": 0.0005521010123501578, "loss": 3.2346, "step": 11163 }, { "epoch": 0.55, "grad_norm": 0.501727819442749, "learning_rate": 0.0005520926662857174, "loss": 3.2213, "step": 11164 }, { "epoch": 0.55, "grad_norm": 0.5046778321266174, "learning_rate": 0.0005520843195573132, "loss": 3.3777, "step": 11165 }, { "epoch": 0.55, "grad_norm": 0.5383339524269104, "learning_rate": 0.0005520759721649676, "loss": 3.3155, "step": 11166 }, { "epoch": 0.55, "grad_norm": 0.49202319979667664, "learning_rate": 0.0005520676241087023, "loss": 3.0838, "step": 11167 }, { "epoch": 0.55, "grad_norm": 0.4806743562221527, "learning_rate": 0.0005520592753885394, "loss": 3.3125, "step": 11168 }, { "epoch": 0.55, "grad_norm": 0.5282479524612427, "learning_rate": 0.0005520509260045009, "loss": 3.1906, "step": 11169 }, { "epoch": 0.55, "grad_norm": 0.5135144591331482, "learning_rate": 0.0005520425759566087, "loss": 3.1592, "step": 11170 }, { "epoch": 0.55, "grad_norm": 0.5296515822410583, "learning_rate": 0.000552034225244885, "loss": 3.3004, "step": 11171 }, { "epoch": 0.55, "grad_norm": 0.5206128358840942, "learning_rate": 0.0005520258738693516, "loss": 3.308, "step": 11172 }, { "epoch": 0.55, "grad_norm": 0.5016621351242065, "learning_rate": 0.0005520175218300305, "loss": 3.2374, "step": 11173 }, { "epoch": 0.55, "grad_norm": 0.5277829170227051, "learning_rate": 0.0005520091691269438, "loss": 3.2679, "step": 11174 }, { "epoch": 0.55, "grad_norm": 0.48501524329185486, "learning_rate": 0.0005520008157601134, "loss": 3.3124, "step": 11175 }, { "epoch": 0.55, "grad_norm": 0.5397695899009705, "learning_rate": 0.0005519924617295613, "loss": 3.0827, "step": 11176 }, { "epoch": 0.55, "grad_norm": 0.4927702844142914, "learning_rate": 0.0005519841070353097, "loss": 3.2994, "step": 11177 }, { "epoch": 0.55, "grad_norm": 0.48005515336990356, "learning_rate": 0.0005519757516773804, "loss": 3.4319, "step": 11178 }, { "epoch": 0.55, "grad_norm": 0.5230315327644348, "learning_rate": 0.0005519673956557954, "loss": 3.2013, "step": 11179 }, { "epoch": 0.55, "grad_norm": 0.5246223211288452, "learning_rate": 0.0005519590389705769, "loss": 3.2824, "step": 11180 }, { "epoch": 0.55, "grad_norm": 0.5252644419670105, "learning_rate": 0.0005519506816217466, "loss": 3.323, "step": 11181 }, { "epoch": 0.55, "grad_norm": 0.5019978284835815, "learning_rate": 0.0005519423236093268, "loss": 3.3634, "step": 11182 }, { "epoch": 0.55, "grad_norm": 0.5068698525428772, "learning_rate": 0.0005519339649333394, "loss": 3.2813, "step": 11183 }, { "epoch": 0.55, "grad_norm": 0.5238425731658936, "learning_rate": 0.0005519256055938064, "loss": 3.2488, "step": 11184 }, { "epoch": 0.55, "grad_norm": 0.51482093334198, "learning_rate": 0.0005519172455907499, "loss": 3.3138, "step": 11185 }, { "epoch": 0.55, "grad_norm": 0.5190244317054749, "learning_rate": 0.0005519088849241918, "loss": 3.246, "step": 11186 }, { "epoch": 0.55, "grad_norm": 0.49023398756980896, "learning_rate": 0.0005519005235941542, "loss": 3.2134, "step": 11187 }, { "epoch": 0.55, "grad_norm": 0.5314106345176697, "learning_rate": 0.0005518921616006591, "loss": 3.134, "step": 11188 }, { "epoch": 0.55, "grad_norm": 0.4904386103153229, "learning_rate": 0.0005518837989437285, "loss": 3.3674, "step": 11189 }, { "epoch": 0.55, "grad_norm": 0.49414244294166565, "learning_rate": 0.0005518754356233845, "loss": 3.0914, "step": 11190 }, { "epoch": 0.55, "grad_norm": 0.5230140089988708, "learning_rate": 0.000551867071639649, "loss": 3.1609, "step": 11191 }, { "epoch": 0.55, "grad_norm": 0.5059942007064819, "learning_rate": 0.0005518587069925442, "loss": 3.2041, "step": 11192 }, { "epoch": 0.55, "grad_norm": 0.5030388832092285, "learning_rate": 0.000551850341682092, "loss": 3.1284, "step": 11193 }, { "epoch": 0.55, "grad_norm": 0.5302931666374207, "learning_rate": 0.0005518419757083145, "loss": 3.2589, "step": 11194 }, { "epoch": 0.55, "grad_norm": 0.5348262786865234, "learning_rate": 0.0005518336090712337, "loss": 3.3606, "step": 11195 }, { "epoch": 0.55, "grad_norm": 0.5146247148513794, "learning_rate": 0.0005518252417708716, "loss": 3.214, "step": 11196 }, { "epoch": 0.55, "grad_norm": 0.5075222253799438, "learning_rate": 0.0005518168738072502, "loss": 3.259, "step": 11197 }, { "epoch": 0.55, "grad_norm": 0.5404617190361023, "learning_rate": 0.0005518085051803918, "loss": 3.3196, "step": 11198 }, { "epoch": 0.55, "grad_norm": 0.5376437306404114, "learning_rate": 0.0005518001358903182, "loss": 3.0802, "step": 11199 }, { "epoch": 0.55, "grad_norm": 0.5305649638175964, "learning_rate": 0.0005517917659370513, "loss": 3.2274, "step": 11200 }, { "epoch": 0.55, "grad_norm": 0.533257246017456, "learning_rate": 0.0005517833953206135, "loss": 3.1717, "step": 11201 }, { "epoch": 0.55, "grad_norm": 0.5252943634986877, "learning_rate": 0.0005517750240410268, "loss": 3.0743, "step": 11202 }, { "epoch": 0.55, "grad_norm": 0.4910027086734772, "learning_rate": 0.000551766652098313, "loss": 3.5248, "step": 11203 }, { "epoch": 0.55, "grad_norm": 0.5113924741744995, "learning_rate": 0.0005517582794924943, "loss": 3.1122, "step": 11204 }, { "epoch": 0.55, "grad_norm": 0.5466858744621277, "learning_rate": 0.0005517499062235928, "loss": 3.2754, "step": 11205 }, { "epoch": 0.55, "grad_norm": 0.5288564562797546, "learning_rate": 0.0005517415322916305, "loss": 3.3129, "step": 11206 }, { "epoch": 0.55, "grad_norm": 0.5409068465232849, "learning_rate": 0.0005517331576966294, "loss": 3.2236, "step": 11207 }, { "epoch": 0.55, "grad_norm": 0.581696093082428, "learning_rate": 0.0005517247824386117, "loss": 3.5911, "step": 11208 }, { "epoch": 0.55, "grad_norm": 0.4891887903213501, "learning_rate": 0.0005517164065175993, "loss": 3.3352, "step": 11209 }, { "epoch": 0.55, "grad_norm": 0.5119558572769165, "learning_rate": 0.0005517080299336143, "loss": 3.1011, "step": 11210 }, { "epoch": 0.55, "grad_norm": 0.5541062355041504, "learning_rate": 0.0005516996526866788, "loss": 3.2571, "step": 11211 }, { "epoch": 0.55, "grad_norm": 0.5030975341796875, "learning_rate": 0.0005516912747768149, "loss": 3.1197, "step": 11212 }, { "epoch": 0.55, "grad_norm": 0.5093387365341187, "learning_rate": 0.0005516828962040446, "loss": 3.2713, "step": 11213 }, { "epoch": 0.55, "grad_norm": 0.525682806968689, "learning_rate": 0.00055167451696839, "loss": 3.2748, "step": 11214 }, { "epoch": 0.55, "grad_norm": 0.565834105014801, "learning_rate": 0.0005516661370698732, "loss": 3.0386, "step": 11215 }, { "epoch": 0.55, "grad_norm": 0.5513453483581543, "learning_rate": 0.0005516577565085162, "loss": 3.2004, "step": 11216 }, { "epoch": 0.55, "grad_norm": 0.5197407007217407, "learning_rate": 0.0005516493752843411, "loss": 3.1911, "step": 11217 }, { "epoch": 0.55, "grad_norm": 0.5003480315208435, "learning_rate": 0.00055164099339737, "loss": 3.3038, "step": 11218 }, { "epoch": 0.55, "grad_norm": 0.5131300687789917, "learning_rate": 0.0005516326108476248, "loss": 3.2264, "step": 11219 }, { "epoch": 0.55, "grad_norm": 0.528200626373291, "learning_rate": 0.0005516242276351279, "loss": 3.1164, "step": 11220 }, { "epoch": 0.55, "grad_norm": 0.5266100764274597, "learning_rate": 0.0005516158437599011, "loss": 3.2815, "step": 11221 }, { "epoch": 0.55, "grad_norm": 0.5277635455131531, "learning_rate": 0.0005516074592219666, "loss": 3.2759, "step": 11222 }, { "epoch": 0.55, "grad_norm": 0.5396936535835266, "learning_rate": 0.0005515990740213466, "loss": 3.2679, "step": 11223 }, { "epoch": 0.55, "grad_norm": 0.5293653607368469, "learning_rate": 0.000551590688158063, "loss": 3.111, "step": 11224 }, { "epoch": 0.55, "grad_norm": 0.539237380027771, "learning_rate": 0.0005515823016321379, "loss": 3.2653, "step": 11225 }, { "epoch": 0.55, "grad_norm": 0.5173228979110718, "learning_rate": 0.0005515739144435934, "loss": 3.2804, "step": 11226 }, { "epoch": 0.55, "grad_norm": 0.569195032119751, "learning_rate": 0.0005515655265924518, "loss": 3.2265, "step": 11227 }, { "epoch": 0.55, "grad_norm": 0.5007268786430359, "learning_rate": 0.0005515571380787348, "loss": 3.4115, "step": 11228 }, { "epoch": 0.55, "grad_norm": 0.5260988473892212, "learning_rate": 0.0005515487489024649, "loss": 3.2786, "step": 11229 }, { "epoch": 0.55, "grad_norm": 0.5149096250534058, "learning_rate": 0.0005515403590636639, "loss": 3.382, "step": 11230 }, { "epoch": 0.55, "grad_norm": 0.5453810691833496, "learning_rate": 0.000551531968562354, "loss": 3.4435, "step": 11231 }, { "epoch": 0.55, "grad_norm": 0.5204135775566101, "learning_rate": 0.0005515235773985573, "loss": 3.2987, "step": 11232 }, { "epoch": 0.55, "grad_norm": 0.5300283432006836, "learning_rate": 0.000551515185572296, "loss": 3.264, "step": 11233 }, { "epoch": 0.55, "grad_norm": 0.5270791053771973, "learning_rate": 0.000551506793083592, "loss": 3.3112, "step": 11234 }, { "epoch": 0.55, "grad_norm": 0.5587947368621826, "learning_rate": 0.0005514983999324676, "loss": 3.2582, "step": 11235 }, { "epoch": 0.55, "grad_norm": 0.5228792428970337, "learning_rate": 0.0005514900061189447, "loss": 3.2563, "step": 11236 }, { "epoch": 0.55, "grad_norm": 0.507921040058136, "learning_rate": 0.0005514816116430456, "loss": 3.4942, "step": 11237 }, { "epoch": 0.55, "grad_norm": 0.5501565337181091, "learning_rate": 0.0005514732165047923, "loss": 3.2608, "step": 11238 }, { "epoch": 0.55, "grad_norm": 0.5317707657814026, "learning_rate": 0.000551464820704207, "loss": 3.058, "step": 11239 }, { "epoch": 0.55, "grad_norm": 0.5452500581741333, "learning_rate": 0.0005514564242413118, "loss": 2.9297, "step": 11240 }, { "epoch": 0.55, "grad_norm": 0.5187104344367981, "learning_rate": 0.0005514480271161287, "loss": 3.1697, "step": 11241 }, { "epoch": 0.55, "grad_norm": 0.5452839732170105, "learning_rate": 0.0005514396293286798, "loss": 3.2642, "step": 11242 }, { "epoch": 0.55, "grad_norm": 0.5251529812812805, "learning_rate": 0.0005514312308789875, "loss": 3.0369, "step": 11243 }, { "epoch": 0.55, "grad_norm": 0.5177854299545288, "learning_rate": 0.0005514228317670736, "loss": 3.2987, "step": 11244 }, { "epoch": 0.55, "grad_norm": 0.5245817303657532, "learning_rate": 0.0005514144319929604, "loss": 3.1808, "step": 11245 }, { "epoch": 0.55, "grad_norm": 0.5251028537750244, "learning_rate": 0.0005514060315566699, "loss": 3.0786, "step": 11246 }, { "epoch": 0.55, "grad_norm": 0.513904333114624, "learning_rate": 0.0005513976304582243, "loss": 3.1704, "step": 11247 }, { "epoch": 0.55, "grad_norm": 0.5236141681671143, "learning_rate": 0.0005513892286976458, "loss": 3.3471, "step": 11248 }, { "epoch": 0.55, "grad_norm": 0.5166938304901123, "learning_rate": 0.0005513808262749564, "loss": 3.2649, "step": 11249 }, { "epoch": 0.55, "grad_norm": 0.5092709064483643, "learning_rate": 0.0005513724231901783, "loss": 3.2016, "step": 11250 }, { "epoch": 0.55, "grad_norm": 0.5671427845954895, "learning_rate": 0.0005513640194433336, "loss": 3.0024, "step": 11251 }, { "epoch": 0.55, "grad_norm": 0.4978698194026947, "learning_rate": 0.0005513556150344445, "loss": 3.2126, "step": 11252 }, { "epoch": 0.55, "grad_norm": 0.5027517676353455, "learning_rate": 0.000551347209963533, "loss": 3.2967, "step": 11253 }, { "epoch": 0.55, "grad_norm": 0.5220968723297119, "learning_rate": 0.0005513388042306214, "loss": 3.4618, "step": 11254 }, { "epoch": 0.55, "grad_norm": 0.49995917081832886, "learning_rate": 0.0005513303978357317, "loss": 3.2012, "step": 11255 }, { "epoch": 0.55, "grad_norm": 0.5419469475746155, "learning_rate": 0.0005513219907788861, "loss": 3.134, "step": 11256 }, { "epoch": 0.55, "grad_norm": 0.5099649429321289, "learning_rate": 0.0005513135830601068, "loss": 3.1283, "step": 11257 }, { "epoch": 0.55, "grad_norm": 0.5278074741363525, "learning_rate": 0.0005513051746794159, "loss": 3.3438, "step": 11258 }, { "epoch": 0.55, "grad_norm": 0.5176796317100525, "learning_rate": 0.0005512967656368355, "loss": 3.0982, "step": 11259 }, { "epoch": 0.55, "grad_norm": 0.5078525543212891, "learning_rate": 0.0005512883559323878, "loss": 3.4361, "step": 11260 }, { "epoch": 0.55, "grad_norm": 0.4989159405231476, "learning_rate": 0.0005512799455660948, "loss": 3.2102, "step": 11261 }, { "epoch": 0.55, "grad_norm": 0.5165753960609436, "learning_rate": 0.000551271534537979, "loss": 3.2573, "step": 11262 }, { "epoch": 0.55, "grad_norm": 0.5196847319602966, "learning_rate": 0.0005512631228480622, "loss": 3.3376, "step": 11263 }, { "epoch": 0.55, "grad_norm": 0.49779194593429565, "learning_rate": 0.0005512547104963668, "loss": 3.5134, "step": 11264 }, { "epoch": 0.55, "grad_norm": 0.5193706750869751, "learning_rate": 0.0005512462974829147, "loss": 3.2742, "step": 11265 }, { "epoch": 0.55, "grad_norm": 0.5080481171607971, "learning_rate": 0.0005512378838077283, "loss": 3.1623, "step": 11266 }, { "epoch": 0.55, "grad_norm": 0.5434474945068359, "learning_rate": 0.0005512294694708296, "loss": 3.3106, "step": 11267 }, { "epoch": 0.55, "grad_norm": 0.4940679967403412, "learning_rate": 0.0005512210544722408, "loss": 3.2186, "step": 11268 }, { "epoch": 0.55, "grad_norm": 0.5241498947143555, "learning_rate": 0.0005512126388119842, "loss": 3.1095, "step": 11269 }, { "epoch": 0.55, "grad_norm": 0.5188072323799133, "learning_rate": 0.0005512042224900818, "loss": 3.3153, "step": 11270 }, { "epoch": 0.55, "grad_norm": 0.492482990026474, "learning_rate": 0.000551195805506556, "loss": 3.2382, "step": 11271 }, { "epoch": 0.55, "grad_norm": 0.49130314588546753, "learning_rate": 0.0005511873878614286, "loss": 3.236, "step": 11272 }, { "epoch": 0.55, "grad_norm": 0.517808735370636, "learning_rate": 0.0005511789695547219, "loss": 3.3371, "step": 11273 }, { "epoch": 0.55, "grad_norm": 0.5449178218841553, "learning_rate": 0.0005511705505864582, "loss": 3.0906, "step": 11274 }, { "epoch": 0.55, "grad_norm": 0.5152904987335205, "learning_rate": 0.0005511621309566597, "loss": 3.2692, "step": 11275 }, { "epoch": 0.55, "grad_norm": 0.554939866065979, "learning_rate": 0.0005511537106653484, "loss": 3.3965, "step": 11276 }, { "epoch": 0.55, "grad_norm": 0.5003484487533569, "learning_rate": 0.0005511452897125467, "loss": 3.1112, "step": 11277 }, { "epoch": 0.55, "grad_norm": 0.5301329493522644, "learning_rate": 0.0005511368680982765, "loss": 3.3394, "step": 11278 }, { "epoch": 0.55, "grad_norm": 0.4994280934333801, "learning_rate": 0.0005511284458225601, "loss": 3.1309, "step": 11279 }, { "epoch": 0.55, "grad_norm": 0.5375825762748718, "learning_rate": 0.0005511200228854197, "loss": 3.2027, "step": 11280 }, { "epoch": 0.55, "grad_norm": 0.532985270023346, "learning_rate": 0.0005511115992868776, "loss": 3.2824, "step": 11281 }, { "epoch": 0.55, "grad_norm": 0.48315703868865967, "learning_rate": 0.0005511031750269558, "loss": 3.2923, "step": 11282 }, { "epoch": 0.55, "grad_norm": 0.5368021726608276, "learning_rate": 0.0005510947501056766, "loss": 3.2745, "step": 11283 }, { "epoch": 0.55, "grad_norm": 0.5001078844070435, "learning_rate": 0.0005510863245230621, "loss": 3.2778, "step": 11284 }, { "epoch": 0.55, "grad_norm": 0.49867165088653564, "learning_rate": 0.0005510778982791345, "loss": 3.0507, "step": 11285 }, { "epoch": 0.55, "grad_norm": 0.5319203734397888, "learning_rate": 0.0005510694713739162, "loss": 3.1747, "step": 11286 }, { "epoch": 0.55, "grad_norm": 0.5031033158302307, "learning_rate": 0.0005510610438074291, "loss": 3.2447, "step": 11287 }, { "epoch": 0.55, "grad_norm": 0.4784884452819824, "learning_rate": 0.0005510526155796956, "loss": 3.4212, "step": 11288 }, { "epoch": 0.55, "grad_norm": 0.5477344989776611, "learning_rate": 0.0005510441866907377, "loss": 3.1972, "step": 11289 }, { "epoch": 0.55, "grad_norm": 0.49922096729278564, "learning_rate": 0.000551035757140578, "loss": 3.3556, "step": 11290 }, { "epoch": 0.55, "grad_norm": 0.4894360899925232, "learning_rate": 0.0005510273269292382, "loss": 3.2473, "step": 11291 }, { "epoch": 0.55, "grad_norm": 0.5014991760253906, "learning_rate": 0.0005510188960567408, "loss": 3.2122, "step": 11292 }, { "epoch": 0.55, "grad_norm": 0.5156797170639038, "learning_rate": 0.0005510104645231079, "loss": 3.3523, "step": 11293 }, { "epoch": 0.55, "grad_norm": 0.510955274105072, "learning_rate": 0.0005510020323283618, "loss": 3.1125, "step": 11294 }, { "epoch": 0.55, "grad_norm": 0.5010038614273071, "learning_rate": 0.0005509935994725245, "loss": 3.2618, "step": 11295 }, { "epoch": 0.55, "grad_norm": 0.5552043318748474, "learning_rate": 0.0005509851659556185, "loss": 3.0575, "step": 11296 }, { "epoch": 0.55, "grad_norm": 0.48965537548065186, "learning_rate": 0.0005509767317776659, "loss": 3.1812, "step": 11297 }, { "epoch": 0.55, "grad_norm": 0.521115243434906, "learning_rate": 0.0005509682969386888, "loss": 3.2456, "step": 11298 }, { "epoch": 0.55, "grad_norm": 0.544768214225769, "learning_rate": 0.0005509598614387096, "loss": 3.3468, "step": 11299 }, { "epoch": 0.55, "grad_norm": 0.5138852596282959, "learning_rate": 0.0005509514252777503, "loss": 3.4115, "step": 11300 }, { "epoch": 0.55, "grad_norm": 0.5543062090873718, "learning_rate": 0.0005509429884558334, "loss": 3.3333, "step": 11301 }, { "epoch": 0.55, "grad_norm": 0.5786649584770203, "learning_rate": 0.0005509345509729808, "loss": 3.0643, "step": 11302 }, { "epoch": 0.55, "grad_norm": 0.5284539461135864, "learning_rate": 0.000550926112829215, "loss": 3.4618, "step": 11303 }, { "epoch": 0.55, "grad_norm": 0.5231096744537354, "learning_rate": 0.000550917674024558, "loss": 3.213, "step": 11304 }, { "epoch": 0.55, "grad_norm": 0.5191011428833008, "learning_rate": 0.0005509092345590323, "loss": 3.1695, "step": 11305 }, { "epoch": 0.55, "grad_norm": 0.5704165101051331, "learning_rate": 0.0005509007944326598, "loss": 3.2244, "step": 11306 }, { "epoch": 0.55, "grad_norm": 0.5259694457054138, "learning_rate": 0.000550892353645463, "loss": 3.0671, "step": 11307 }, { "epoch": 0.55, "grad_norm": 0.5437265634536743, "learning_rate": 0.0005508839121974641, "loss": 3.2964, "step": 11308 }, { "epoch": 0.55, "grad_norm": 0.5166676044464111, "learning_rate": 0.0005508754700886851, "loss": 3.16, "step": 11309 }, { "epoch": 0.55, "grad_norm": 0.5371460318565369, "learning_rate": 0.0005508670273191485, "loss": 3.0633, "step": 11310 }, { "epoch": 0.55, "grad_norm": 0.5051896572113037, "learning_rate": 0.0005508585838888764, "loss": 3.2427, "step": 11311 }, { "epoch": 0.55, "grad_norm": 0.6367805600166321, "learning_rate": 0.000550850139797891, "loss": 3.3534, "step": 11312 }, { "epoch": 0.55, "grad_norm": 0.5592988729476929, "learning_rate": 0.0005508416950462146, "loss": 3.0948, "step": 11313 }, { "epoch": 0.55, "grad_norm": 0.5036086440086365, "learning_rate": 0.0005508332496338696, "loss": 3.1853, "step": 11314 }, { "epoch": 0.55, "grad_norm": 0.49412891268730164, "learning_rate": 0.000550824803560878, "loss": 3.1354, "step": 11315 }, { "epoch": 0.55, "grad_norm": 0.5339794158935547, "learning_rate": 0.0005508163568272622, "loss": 3.2264, "step": 11316 }, { "epoch": 0.55, "grad_norm": 0.5197786092758179, "learning_rate": 0.0005508079094330443, "loss": 3.388, "step": 11317 }, { "epoch": 0.55, "grad_norm": 0.5386886596679688, "learning_rate": 0.0005507994613782466, "loss": 3.0609, "step": 11318 }, { "epoch": 0.55, "grad_norm": 0.5509684085845947, "learning_rate": 0.0005507910126628915, "loss": 3.131, "step": 11319 }, { "epoch": 0.55, "grad_norm": 0.5496838688850403, "learning_rate": 0.0005507825632870011, "loss": 3.4422, "step": 11320 }, { "epoch": 0.55, "grad_norm": 0.5109300017356873, "learning_rate": 0.0005507741132505977, "loss": 3.2252, "step": 11321 }, { "epoch": 0.55, "grad_norm": 0.48941949009895325, "learning_rate": 0.0005507656625537035, "loss": 3.2382, "step": 11322 }, { "epoch": 0.55, "grad_norm": 0.545982301235199, "learning_rate": 0.0005507572111963408, "loss": 3.2181, "step": 11323 }, { "epoch": 0.55, "grad_norm": 0.5077481269836426, "learning_rate": 0.000550748759178532, "loss": 3.2736, "step": 11324 }, { "epoch": 0.56, "grad_norm": 0.5178395509719849, "learning_rate": 0.0005507403065002991, "loss": 3.249, "step": 11325 }, { "epoch": 0.56, "grad_norm": 0.5448976159095764, "learning_rate": 0.0005507318531616645, "loss": 3.2074, "step": 11326 }, { "epoch": 0.56, "grad_norm": 0.4983932375907898, "learning_rate": 0.0005507233991626504, "loss": 3.309, "step": 11327 }, { "epoch": 0.56, "grad_norm": 0.5093890428543091, "learning_rate": 0.0005507149445032793, "loss": 3.2261, "step": 11328 }, { "epoch": 0.56, "grad_norm": 0.4824458658695221, "learning_rate": 0.0005507064891835732, "loss": 3.1697, "step": 11329 }, { "epoch": 0.56, "grad_norm": 0.5757153034210205, "learning_rate": 0.0005506980332035543, "loss": 3.1732, "step": 11330 }, { "epoch": 0.56, "grad_norm": 0.5348081588745117, "learning_rate": 0.0005506895765632452, "loss": 2.9841, "step": 11331 }, { "epoch": 0.56, "grad_norm": 0.526430070400238, "learning_rate": 0.0005506811192626679, "loss": 3.1298, "step": 11332 }, { "epoch": 0.56, "grad_norm": 0.5291725993156433, "learning_rate": 0.0005506726613018449, "loss": 3.2061, "step": 11333 }, { "epoch": 0.56, "grad_norm": 0.5114479064941406, "learning_rate": 0.0005506642026807983, "loss": 3.3891, "step": 11334 }, { "epoch": 0.56, "grad_norm": 0.544650673866272, "learning_rate": 0.0005506557433995505, "loss": 3.2341, "step": 11335 }, { "epoch": 0.56, "grad_norm": 0.5388392806053162, "learning_rate": 0.0005506472834581236, "loss": 3.1413, "step": 11336 }, { "epoch": 0.56, "grad_norm": 0.5075357556343079, "learning_rate": 0.0005506388228565401, "loss": 3.1682, "step": 11337 }, { "epoch": 0.56, "grad_norm": 0.5130732655525208, "learning_rate": 0.000550630361594822, "loss": 3.2566, "step": 11338 }, { "epoch": 0.56, "grad_norm": 0.5059003233909607, "learning_rate": 0.0005506218996729919, "loss": 3.1209, "step": 11339 }, { "epoch": 0.56, "grad_norm": 0.5154292583465576, "learning_rate": 0.0005506134370910719, "loss": 3.1587, "step": 11340 }, { "epoch": 0.56, "grad_norm": 0.5048404932022095, "learning_rate": 0.0005506049738490844, "loss": 3.2718, "step": 11341 }, { "epoch": 0.56, "grad_norm": 0.6062512397766113, "learning_rate": 0.0005505965099470516, "loss": 3.2463, "step": 11342 }, { "epoch": 0.56, "grad_norm": 0.5517935156822205, "learning_rate": 0.0005505880453849958, "loss": 3.2156, "step": 11343 }, { "epoch": 0.56, "grad_norm": 0.49947217106819153, "learning_rate": 0.0005505795801629393, "loss": 3.1784, "step": 11344 }, { "epoch": 0.56, "grad_norm": 0.4772196114063263, "learning_rate": 0.0005505711142809043, "loss": 3.2223, "step": 11345 }, { "epoch": 0.56, "grad_norm": 0.49474474787712097, "learning_rate": 0.0005505626477389134, "loss": 3.4315, "step": 11346 }, { "epoch": 0.56, "grad_norm": 0.527829110622406, "learning_rate": 0.0005505541805369888, "loss": 3.2979, "step": 11347 }, { "epoch": 0.56, "grad_norm": 0.5038018822669983, "learning_rate": 0.0005505457126751524, "loss": 3.0596, "step": 11348 }, { "epoch": 0.56, "grad_norm": 0.5035970211029053, "learning_rate": 0.000550537244153427, "loss": 3.267, "step": 11349 }, { "epoch": 0.56, "grad_norm": 0.5311546325683594, "learning_rate": 0.0005505287749718348, "loss": 3.2461, "step": 11350 }, { "epoch": 0.56, "grad_norm": 0.5109077095985413, "learning_rate": 0.0005505203051303978, "loss": 3.3966, "step": 11351 }, { "epoch": 0.56, "grad_norm": 0.5348523855209351, "learning_rate": 0.0005505118346291386, "loss": 3.3345, "step": 11352 }, { "epoch": 0.56, "grad_norm": 0.5685103535652161, "learning_rate": 0.0005505033634680795, "loss": 3.1709, "step": 11353 }, { "epoch": 0.56, "grad_norm": 0.533107578754425, "learning_rate": 0.0005504948916472427, "loss": 3.1036, "step": 11354 }, { "epoch": 0.56, "grad_norm": 0.5173946619033813, "learning_rate": 0.0005504864191666506, "loss": 3.3519, "step": 11355 }, { "epoch": 0.56, "grad_norm": 0.5562966465950012, "learning_rate": 0.0005504779460263255, "loss": 3.2072, "step": 11356 }, { "epoch": 0.56, "grad_norm": 0.5066145062446594, "learning_rate": 0.0005504694722262897, "loss": 3.1163, "step": 11357 }, { "epoch": 0.56, "grad_norm": 0.5840992331504822, "learning_rate": 0.0005504609977665655, "loss": 3.5294, "step": 11358 }, { "epoch": 0.56, "grad_norm": 0.5080714225769043, "learning_rate": 0.0005504525226471752, "loss": 3.2118, "step": 11359 }, { "epoch": 0.56, "grad_norm": 0.5689548254013062, "learning_rate": 0.0005504440468681412, "loss": 3.426, "step": 11360 }, { "epoch": 0.56, "grad_norm": 0.5232775211334229, "learning_rate": 0.0005504355704294857, "loss": 3.3447, "step": 11361 }, { "epoch": 0.56, "grad_norm": 0.530229926109314, "learning_rate": 0.0005504270933312311, "loss": 3.1767, "step": 11362 }, { "epoch": 0.56, "grad_norm": 0.4973139762878418, "learning_rate": 0.0005504186155733998, "loss": 3.5491, "step": 11363 }, { "epoch": 0.56, "grad_norm": 0.5430886149406433, "learning_rate": 0.0005504101371560141, "loss": 3.0946, "step": 11364 }, { "epoch": 0.56, "grad_norm": 0.5411336421966553, "learning_rate": 0.0005504016580790963, "loss": 3.306, "step": 11365 }, { "epoch": 0.56, "grad_norm": 0.5088608860969543, "learning_rate": 0.0005503931783426686, "loss": 3.3453, "step": 11366 }, { "epoch": 0.56, "grad_norm": 0.5100044012069702, "learning_rate": 0.0005503846979467535, "loss": 3.177, "step": 11367 }, { "epoch": 0.56, "grad_norm": 0.49586084485054016, "learning_rate": 0.0005503762168913733, "loss": 3.3249, "step": 11368 }, { "epoch": 0.56, "grad_norm": 0.5439696311950684, "learning_rate": 0.0005503677351765503, "loss": 3.2196, "step": 11369 }, { "epoch": 0.56, "grad_norm": 0.5079057216644287, "learning_rate": 0.0005503592528023069, "loss": 3.3114, "step": 11370 }, { "epoch": 0.56, "grad_norm": 0.5126676559448242, "learning_rate": 0.0005503507697686654, "loss": 3.4438, "step": 11371 }, { "epoch": 0.56, "grad_norm": 0.5350159406661987, "learning_rate": 0.0005503422860756482, "loss": 3.2688, "step": 11372 }, { "epoch": 0.56, "grad_norm": 0.5482049584388733, "learning_rate": 0.0005503338017232776, "loss": 3.2605, "step": 11373 }, { "epoch": 0.56, "grad_norm": 0.5451943278312683, "learning_rate": 0.0005503253167115759, "loss": 3.1051, "step": 11374 }, { "epoch": 0.56, "grad_norm": 0.610254168510437, "learning_rate": 0.0005503168310405655, "loss": 3.2985, "step": 11375 }, { "epoch": 0.56, "grad_norm": 0.5316271185874939, "learning_rate": 0.0005503083447102688, "loss": 3.3086, "step": 11376 }, { "epoch": 0.56, "grad_norm": 0.5323793292045593, "learning_rate": 0.000550299857720708, "loss": 3.2379, "step": 11377 }, { "epoch": 0.56, "grad_norm": 0.5014329552650452, "learning_rate": 0.0005502913700719056, "loss": 3.2504, "step": 11378 }, { "epoch": 0.56, "grad_norm": 0.548941433429718, "learning_rate": 0.0005502828817638838, "loss": 3.0864, "step": 11379 }, { "epoch": 0.56, "grad_norm": 0.5736619234085083, "learning_rate": 0.0005502743927966652, "loss": 3.2717, "step": 11380 }, { "epoch": 0.56, "grad_norm": 0.5364370942115784, "learning_rate": 0.0005502659031702721, "loss": 3.0777, "step": 11381 }, { "epoch": 0.56, "grad_norm": 0.6484993100166321, "learning_rate": 0.0005502574128847266, "loss": 3.3747, "step": 11382 }, { "epoch": 0.56, "grad_norm": 0.5159845948219299, "learning_rate": 0.0005502489219400512, "loss": 3.377, "step": 11383 }, { "epoch": 0.56, "grad_norm": 0.48935675621032715, "learning_rate": 0.0005502404303362684, "loss": 3.2342, "step": 11384 }, { "epoch": 0.56, "grad_norm": 0.5167993307113647, "learning_rate": 0.0005502319380734005, "loss": 3.5881, "step": 11385 }, { "epoch": 0.56, "grad_norm": 0.5177289843559265, "learning_rate": 0.0005502234451514697, "loss": 3.392, "step": 11386 }, { "epoch": 0.56, "grad_norm": 0.5180376172065735, "learning_rate": 0.0005502149515704985, "loss": 2.9029, "step": 11387 }, { "epoch": 0.56, "grad_norm": 0.5249106884002686, "learning_rate": 0.0005502064573305094, "loss": 3.2103, "step": 11388 }, { "epoch": 0.56, "grad_norm": 0.5171917676925659, "learning_rate": 0.0005501979624315247, "loss": 3.109, "step": 11389 }, { "epoch": 0.56, "grad_norm": 0.5251325368881226, "learning_rate": 0.0005501894668735666, "loss": 3.3081, "step": 11390 }, { "epoch": 0.56, "grad_norm": 0.518473207950592, "learning_rate": 0.0005501809706566575, "loss": 3.0674, "step": 11391 }, { "epoch": 0.56, "grad_norm": 0.5217577815055847, "learning_rate": 0.00055017247378082, "loss": 3.2443, "step": 11392 }, { "epoch": 0.56, "grad_norm": 0.498160719871521, "learning_rate": 0.0005501639762460764, "loss": 3.1407, "step": 11393 }, { "epoch": 0.56, "grad_norm": 0.5372119545936584, "learning_rate": 0.000550155478052449, "loss": 3.3184, "step": 11394 }, { "epoch": 0.56, "grad_norm": 0.550507664680481, "learning_rate": 0.0005501469791999601, "loss": 3.2081, "step": 11395 }, { "epoch": 0.56, "grad_norm": 0.5311989784240723, "learning_rate": 0.0005501384796886323, "loss": 3.3632, "step": 11396 }, { "epoch": 0.56, "grad_norm": 0.5252476334571838, "learning_rate": 0.0005501299795184878, "loss": 3.1677, "step": 11397 }, { "epoch": 0.56, "grad_norm": 0.5009608268737793, "learning_rate": 0.0005501214786895491, "loss": 3.2492, "step": 11398 }, { "epoch": 0.56, "grad_norm": 0.5100443959236145, "learning_rate": 0.0005501129772018387, "loss": 3.4272, "step": 11399 }, { "epoch": 0.56, "grad_norm": 0.5339254140853882, "learning_rate": 0.0005501044750553788, "loss": 3.2937, "step": 11400 }, { "epoch": 0.56, "grad_norm": 0.5647916793823242, "learning_rate": 0.0005500959722501917, "loss": 3.2429, "step": 11401 }, { "epoch": 0.56, "grad_norm": 0.48013341426849365, "learning_rate": 0.0005500874687863, "loss": 3.2782, "step": 11402 }, { "epoch": 0.56, "grad_norm": 0.5085362195968628, "learning_rate": 0.000550078964663726, "loss": 3.148, "step": 11403 }, { "epoch": 0.56, "grad_norm": 0.5075708031654358, "learning_rate": 0.0005500704598824923, "loss": 3.2333, "step": 11404 }, { "epoch": 0.56, "grad_norm": 0.5158693194389343, "learning_rate": 0.0005500619544426211, "loss": 3.276, "step": 11405 }, { "epoch": 0.56, "grad_norm": 0.5510281920433044, "learning_rate": 0.0005500534483441347, "loss": 3.0602, "step": 11406 }, { "epoch": 0.56, "grad_norm": 0.492349237203598, "learning_rate": 0.0005500449415870558, "loss": 3.3378, "step": 11407 }, { "epoch": 0.56, "grad_norm": 0.502224862575531, "learning_rate": 0.0005500364341714066, "loss": 2.86, "step": 11408 }, { "epoch": 0.56, "grad_norm": 0.5376916527748108, "learning_rate": 0.0005500279260972095, "loss": 3.1087, "step": 11409 }, { "epoch": 0.56, "grad_norm": 0.5204702615737915, "learning_rate": 0.0005500194173644869, "loss": 3.235, "step": 11410 }, { "epoch": 0.56, "grad_norm": 0.5284196734428406, "learning_rate": 0.0005500109079732613, "loss": 3.2917, "step": 11411 }, { "epoch": 0.56, "grad_norm": 0.5423819422721863, "learning_rate": 0.0005500023979235552, "loss": 3.2564, "step": 11412 }, { "epoch": 0.56, "grad_norm": 0.7053835988044739, "learning_rate": 0.0005499938872153908, "loss": 3.2405, "step": 11413 }, { "epoch": 0.56, "grad_norm": 0.5075749754905701, "learning_rate": 0.0005499853758487907, "loss": 3.2424, "step": 11414 }, { "epoch": 0.56, "grad_norm": 0.5351574420928955, "learning_rate": 0.0005499768638237771, "loss": 3.2685, "step": 11415 }, { "epoch": 0.56, "grad_norm": 0.49611949920654297, "learning_rate": 0.0005499683511403727, "loss": 3.2747, "step": 11416 }, { "epoch": 0.56, "grad_norm": 0.5512820482254028, "learning_rate": 0.0005499598377985997, "loss": 3.2599, "step": 11417 }, { "epoch": 0.56, "grad_norm": 0.5308202505111694, "learning_rate": 0.0005499513237984807, "loss": 3.134, "step": 11418 }, { "epoch": 0.56, "grad_norm": 0.5064956545829773, "learning_rate": 0.0005499428091400378, "loss": 3.2349, "step": 11419 }, { "epoch": 0.56, "grad_norm": 0.5472379326820374, "learning_rate": 0.0005499342938232938, "loss": 3.1336, "step": 11420 }, { "epoch": 0.56, "grad_norm": 0.5334680676460266, "learning_rate": 0.0005499257778482709, "loss": 3.2006, "step": 11421 }, { "epoch": 0.56, "grad_norm": 0.5283361077308655, "learning_rate": 0.0005499172612149916, "loss": 3.5046, "step": 11422 }, { "epoch": 0.56, "grad_norm": 0.5037579536437988, "learning_rate": 0.0005499087439234784, "loss": 3.2281, "step": 11423 }, { "epoch": 0.56, "grad_norm": 0.5362147688865662, "learning_rate": 0.0005499002259737536, "loss": 3.2373, "step": 11424 }, { "epoch": 0.56, "grad_norm": 0.5454772710800171, "learning_rate": 0.0005498917073658397, "loss": 3.1557, "step": 11425 }, { "epoch": 0.56, "grad_norm": 0.5577088594436646, "learning_rate": 0.0005498831880997591, "loss": 3.3548, "step": 11426 }, { "epoch": 0.56, "grad_norm": 0.688560962677002, "learning_rate": 0.0005498746681755343, "loss": 3.0268, "step": 11427 }, { "epoch": 0.56, "grad_norm": 0.5097750425338745, "learning_rate": 0.0005498661475931879, "loss": 3.438, "step": 11428 }, { "epoch": 0.56, "grad_norm": 0.5364587903022766, "learning_rate": 0.000549857626352742, "loss": 3.2104, "step": 11429 }, { "epoch": 0.56, "grad_norm": 0.5260832905769348, "learning_rate": 0.0005498491044542191, "loss": 3.4208, "step": 11430 }, { "epoch": 0.56, "grad_norm": 0.49136224389076233, "learning_rate": 0.0005498405818976418, "loss": 3.4387, "step": 11431 }, { "epoch": 0.56, "grad_norm": 0.5791602730751038, "learning_rate": 0.0005498320586830326, "loss": 3.4698, "step": 11432 }, { "epoch": 0.56, "grad_norm": 0.5048266053199768, "learning_rate": 0.0005498235348104137, "loss": 3.1668, "step": 11433 }, { "epoch": 0.56, "grad_norm": 0.5733696818351746, "learning_rate": 0.0005498150102798078, "loss": 3.261, "step": 11434 }, { "epoch": 0.56, "grad_norm": 0.5145251750946045, "learning_rate": 0.0005498064850912373, "loss": 3.3864, "step": 11435 }, { "epoch": 0.56, "grad_norm": 0.5857335329055786, "learning_rate": 0.0005497979592447244, "loss": 3.1253, "step": 11436 }, { "epoch": 0.56, "grad_norm": 0.5221096277236938, "learning_rate": 0.0005497894327402918, "loss": 3.1466, "step": 11437 }, { "epoch": 0.56, "grad_norm": 0.4997885525226593, "learning_rate": 0.0005497809055779619, "loss": 3.1797, "step": 11438 }, { "epoch": 0.56, "grad_norm": 0.5484127998352051, "learning_rate": 0.0005497723777577573, "loss": 3.1413, "step": 11439 }, { "epoch": 0.56, "grad_norm": 0.5325717926025391, "learning_rate": 0.0005497638492797002, "loss": 3.3583, "step": 11440 }, { "epoch": 0.56, "grad_norm": 0.5324811339378357, "learning_rate": 0.0005497553201438132, "loss": 3.111, "step": 11441 }, { "epoch": 0.56, "grad_norm": 0.5136557221412659, "learning_rate": 0.0005497467903501188, "loss": 3.2754, "step": 11442 }, { "epoch": 0.56, "grad_norm": 0.5221689343452454, "learning_rate": 0.0005497382598986394, "loss": 3.3697, "step": 11443 }, { "epoch": 0.56, "grad_norm": 0.5226287245750427, "learning_rate": 0.0005497297287893975, "loss": 3.5221, "step": 11444 }, { "epoch": 0.56, "grad_norm": 0.5215665102005005, "learning_rate": 0.0005497211970224156, "loss": 3.2144, "step": 11445 }, { "epoch": 0.56, "grad_norm": 0.5560000538825989, "learning_rate": 0.000549712664597716, "loss": 3.0434, "step": 11446 }, { "epoch": 0.56, "grad_norm": 0.5306897163391113, "learning_rate": 0.0005497041315153215, "loss": 3.2474, "step": 11447 }, { "epoch": 0.56, "grad_norm": 0.5063650012016296, "learning_rate": 0.0005496955977752541, "loss": 3.2657, "step": 11448 }, { "epoch": 0.56, "grad_norm": 0.5555657148361206, "learning_rate": 0.0005496870633775367, "loss": 3.2211, "step": 11449 }, { "epoch": 0.56, "grad_norm": 0.5415468811988831, "learning_rate": 0.0005496785283221917, "loss": 3.3836, "step": 11450 }, { "epoch": 0.56, "grad_norm": 0.6452375054359436, "learning_rate": 0.0005496699926092415, "loss": 3.2545, "step": 11451 }, { "epoch": 0.56, "grad_norm": 0.5206488966941833, "learning_rate": 0.0005496614562387085, "loss": 3.2159, "step": 11452 }, { "epoch": 0.56, "grad_norm": 0.4810924232006073, "learning_rate": 0.0005496529192106153, "loss": 3.2473, "step": 11453 }, { "epoch": 0.56, "grad_norm": 0.5150770545005798, "learning_rate": 0.0005496443815249843, "loss": 3.1742, "step": 11454 }, { "epoch": 0.56, "grad_norm": 0.5128140449523926, "learning_rate": 0.0005496358431818381, "loss": 3.0848, "step": 11455 }, { "epoch": 0.56, "grad_norm": 0.5324752926826477, "learning_rate": 0.0005496273041811991, "loss": 3.066, "step": 11456 }, { "epoch": 0.56, "grad_norm": 0.5085709095001221, "learning_rate": 0.0005496187645230898, "loss": 3.216, "step": 11457 }, { "epoch": 0.56, "grad_norm": 0.500564455986023, "learning_rate": 0.0005496102242075328, "loss": 3.2649, "step": 11458 }, { "epoch": 0.56, "grad_norm": 0.5043260455131531, "learning_rate": 0.0005496016832345505, "loss": 3.0378, "step": 11459 }, { "epoch": 0.56, "grad_norm": 0.496572345495224, "learning_rate": 0.0005495931416041654, "loss": 3.1889, "step": 11460 }, { "epoch": 0.56, "grad_norm": 0.5041834712028503, "learning_rate": 0.0005495845993163999, "loss": 3.3908, "step": 11461 }, { "epoch": 0.56, "grad_norm": 0.50934898853302, "learning_rate": 0.0005495760563712768, "loss": 3.3334, "step": 11462 }, { "epoch": 0.56, "grad_norm": 0.5718405246734619, "learning_rate": 0.0005495675127688181, "loss": 3.1535, "step": 11463 }, { "epoch": 0.56, "grad_norm": 0.48958179354667664, "learning_rate": 0.0005495589685090468, "loss": 3.0113, "step": 11464 }, { "epoch": 0.56, "grad_norm": 0.5216549038887024, "learning_rate": 0.0005495504235919852, "loss": 3.3366, "step": 11465 }, { "epoch": 0.56, "grad_norm": 0.4936378300189972, "learning_rate": 0.0005495418780176558, "loss": 3.3092, "step": 11466 }, { "epoch": 0.56, "grad_norm": 0.5067612528800964, "learning_rate": 0.0005495333317860812, "loss": 3.3701, "step": 11467 }, { "epoch": 0.56, "grad_norm": 0.5563675165176392, "learning_rate": 0.0005495247848972837, "loss": 3.2486, "step": 11468 }, { "epoch": 0.56, "grad_norm": 0.5286310911178589, "learning_rate": 0.0005495162373512859, "loss": 3.2765, "step": 11469 }, { "epoch": 0.56, "grad_norm": 0.5466699004173279, "learning_rate": 0.0005495076891481104, "loss": 3.1424, "step": 11470 }, { "epoch": 0.56, "grad_norm": 0.5529357194900513, "learning_rate": 0.0005494991402877797, "loss": 3.1241, "step": 11471 }, { "epoch": 0.56, "grad_norm": 0.5391454696655273, "learning_rate": 0.0005494905907703164, "loss": 3.1807, "step": 11472 }, { "epoch": 0.56, "grad_norm": 0.5178135633468628, "learning_rate": 0.0005494820405957427, "loss": 3.2315, "step": 11473 }, { "epoch": 0.56, "grad_norm": 0.5371395945549011, "learning_rate": 0.0005494734897640815, "loss": 3.3146, "step": 11474 }, { "epoch": 0.56, "grad_norm": 0.4930516183376312, "learning_rate": 0.0005494649382753549, "loss": 3.1017, "step": 11475 }, { "epoch": 0.56, "grad_norm": 0.518638551235199, "learning_rate": 0.0005494563861295858, "loss": 3.098, "step": 11476 }, { "epoch": 0.56, "grad_norm": 0.515612781047821, "learning_rate": 0.0005494478333267966, "loss": 3.2221, "step": 11477 }, { "epoch": 0.56, "grad_norm": 0.5380525588989258, "learning_rate": 0.0005494392798670099, "loss": 3.3513, "step": 11478 }, { "epoch": 0.56, "grad_norm": 0.5430471301078796, "learning_rate": 0.0005494307257502479, "loss": 3.0634, "step": 11479 }, { "epoch": 0.56, "grad_norm": 0.5702585577964783, "learning_rate": 0.0005494221709765335, "loss": 3.1669, "step": 11480 }, { "epoch": 0.56, "grad_norm": 0.5334585905075073, "learning_rate": 0.0005494136155458892, "loss": 3.167, "step": 11481 }, { "epoch": 0.56, "grad_norm": 0.5363363027572632, "learning_rate": 0.0005494050594583373, "loss": 3.2432, "step": 11482 }, { "epoch": 0.56, "grad_norm": 0.5021289587020874, "learning_rate": 0.0005493965027139005, "loss": 3.4602, "step": 11483 }, { "epoch": 0.56, "grad_norm": 0.5429815649986267, "learning_rate": 0.0005493879453126013, "loss": 3.1518, "step": 11484 }, { "epoch": 0.56, "grad_norm": 0.48367413878440857, "learning_rate": 0.0005493793872544625, "loss": 3.1878, "step": 11485 }, { "epoch": 0.56, "grad_norm": 0.5013885498046875, "learning_rate": 0.0005493708285395061, "loss": 3.0526, "step": 11486 }, { "epoch": 0.56, "grad_norm": 0.5268964767456055, "learning_rate": 0.000549362269167755, "loss": 3.109, "step": 11487 }, { "epoch": 0.56, "grad_norm": 0.5033860802650452, "learning_rate": 0.0005493537091392316, "loss": 3.3082, "step": 11488 }, { "epoch": 0.56, "grad_norm": 0.535088300704956, "learning_rate": 0.0005493451484539586, "loss": 3.2002, "step": 11489 }, { "epoch": 0.56, "grad_norm": 0.49352458119392395, "learning_rate": 0.0005493365871119584, "loss": 3.4408, "step": 11490 }, { "epoch": 0.56, "grad_norm": 0.5212691426277161, "learning_rate": 0.0005493280251132538, "loss": 3.0987, "step": 11491 }, { "epoch": 0.56, "grad_norm": 0.500873327255249, "learning_rate": 0.000549319462457867, "loss": 3.3256, "step": 11492 }, { "epoch": 0.56, "grad_norm": 0.5535351037979126, "learning_rate": 0.0005493108991458207, "loss": 3.1447, "step": 11493 }, { "epoch": 0.56, "grad_norm": 0.495970219373703, "learning_rate": 0.0005493023351771376, "loss": 3.2712, "step": 11494 }, { "epoch": 0.56, "grad_norm": 0.5443682670593262, "learning_rate": 0.0005492937705518401, "loss": 3.0544, "step": 11495 }, { "epoch": 0.56, "grad_norm": 0.5153055787086487, "learning_rate": 0.0005492852052699507, "loss": 3.3102, "step": 11496 }, { "epoch": 0.56, "grad_norm": 0.48461559414863586, "learning_rate": 0.0005492766393314921, "loss": 3.2207, "step": 11497 }, { "epoch": 0.56, "grad_norm": 0.541000247001648, "learning_rate": 0.0005492680727364868, "loss": 3.2273, "step": 11498 }, { "epoch": 0.56, "grad_norm": 0.53602534532547, "learning_rate": 0.0005492595054849572, "loss": 3.2255, "step": 11499 }, { "epoch": 0.56, "grad_norm": 0.46997231245040894, "learning_rate": 0.0005492509375769264, "loss": 3.197, "step": 11500 }, { "epoch": 0.56, "grad_norm": 0.4768153429031372, "learning_rate": 0.0005492423690124164, "loss": 3.2706, "step": 11501 }, { "epoch": 0.56, "grad_norm": 0.5005391836166382, "learning_rate": 0.0005492337997914499, "loss": 3.0595, "step": 11502 }, { "epoch": 0.56, "grad_norm": 0.5174161195755005, "learning_rate": 0.0005492252299140497, "loss": 3.493, "step": 11503 }, { "epoch": 0.56, "grad_norm": 0.5118815898895264, "learning_rate": 0.000549216659380238, "loss": 3.2988, "step": 11504 }, { "epoch": 0.56, "grad_norm": 0.5185562372207642, "learning_rate": 0.0005492080881900377, "loss": 3.3501, "step": 11505 }, { "epoch": 0.56, "grad_norm": 0.5072935819625854, "learning_rate": 0.0005491995163434712, "loss": 3.4849, "step": 11506 }, { "epoch": 0.56, "grad_norm": 0.49706950783729553, "learning_rate": 0.0005491909438405612, "loss": 3.4126, "step": 11507 }, { "epoch": 0.56, "grad_norm": 0.5042622685432434, "learning_rate": 0.0005491823706813303, "loss": 3.1778, "step": 11508 }, { "epoch": 0.56, "grad_norm": 0.5073032379150391, "learning_rate": 0.0005491737968658009, "loss": 3.041, "step": 11509 }, { "epoch": 0.56, "grad_norm": 0.5095604658126831, "learning_rate": 0.0005491652223939956, "loss": 3.3084, "step": 11510 }, { "epoch": 0.56, "grad_norm": 0.5435943603515625, "learning_rate": 0.0005491566472659372, "loss": 3.1515, "step": 11511 }, { "epoch": 0.56, "grad_norm": 0.51333087682724, "learning_rate": 0.0005491480714816479, "loss": 3.2505, "step": 11512 }, { "epoch": 0.56, "grad_norm": 0.528915286064148, "learning_rate": 0.0005491394950411508, "loss": 3.1406, "step": 11513 }, { "epoch": 0.56, "grad_norm": 0.5109853744506836, "learning_rate": 0.000549130917944468, "loss": 3.2142, "step": 11514 }, { "epoch": 0.56, "grad_norm": 0.5281078219413757, "learning_rate": 0.0005491223401916225, "loss": 3.0415, "step": 11515 }, { "epoch": 0.56, "grad_norm": 0.506549596786499, "learning_rate": 0.0005491137617826366, "loss": 3.6062, "step": 11516 }, { "epoch": 0.56, "grad_norm": 0.5103456974029541, "learning_rate": 0.000549105182717533, "loss": 3.3239, "step": 11517 }, { "epoch": 0.56, "grad_norm": 0.5172756910324097, "learning_rate": 0.0005490966029963343, "loss": 3.1329, "step": 11518 }, { "epoch": 0.56, "grad_norm": 0.5389090776443481, "learning_rate": 0.0005490880226190632, "loss": 3.1909, "step": 11519 }, { "epoch": 0.56, "grad_norm": 0.5944215059280396, "learning_rate": 0.000549079441585742, "loss": 3.2218, "step": 11520 }, { "epoch": 0.56, "grad_norm": 0.525435745716095, "learning_rate": 0.0005490708598963936, "loss": 3.2495, "step": 11521 }, { "epoch": 0.56, "grad_norm": 0.511767566204071, "learning_rate": 0.0005490622775510404, "loss": 3.2823, "step": 11522 }, { "epoch": 0.56, "grad_norm": 0.5037621855735779, "learning_rate": 0.0005490536945497052, "loss": 3.3753, "step": 11523 }, { "epoch": 0.56, "grad_norm": 0.5477989315986633, "learning_rate": 0.0005490451108924104, "loss": 3.299, "step": 11524 }, { "epoch": 0.56, "grad_norm": 0.5245879292488098, "learning_rate": 0.0005490365265791787, "loss": 3.1515, "step": 11525 }, { "epoch": 0.56, "grad_norm": 0.48814061284065247, "learning_rate": 0.0005490279416100328, "loss": 3.154, "step": 11526 }, { "epoch": 0.56, "grad_norm": 0.5687476992607117, "learning_rate": 0.0005490193559849953, "loss": 3.1192, "step": 11527 }, { "epoch": 0.56, "grad_norm": 0.49493011832237244, "learning_rate": 0.0005490107697040886, "loss": 3.0917, "step": 11528 }, { "epoch": 0.57, "grad_norm": 0.5287251472473145, "learning_rate": 0.0005490021827673355, "loss": 3.2482, "step": 11529 }, { "epoch": 0.57, "grad_norm": 0.5165507197380066, "learning_rate": 0.0005489935951747586, "loss": 3.2542, "step": 11530 }, { "epoch": 0.57, "grad_norm": 0.48569759726524353, "learning_rate": 0.0005489850069263804, "loss": 3.2343, "step": 11531 }, { "epoch": 0.57, "grad_norm": 0.5181214809417725, "learning_rate": 0.0005489764180222237, "loss": 3.5893, "step": 11532 }, { "epoch": 0.57, "grad_norm": 0.5267981886863708, "learning_rate": 0.0005489678284623109, "loss": 3.2945, "step": 11533 }, { "epoch": 0.57, "grad_norm": 0.5052371025085449, "learning_rate": 0.0005489592382466649, "loss": 3.2504, "step": 11534 }, { "epoch": 0.57, "grad_norm": 0.5270872116088867, "learning_rate": 0.0005489506473753082, "loss": 3.1425, "step": 11535 }, { "epoch": 0.57, "grad_norm": 0.5012282133102417, "learning_rate": 0.0005489420558482634, "loss": 3.3208, "step": 11536 }, { "epoch": 0.57, "grad_norm": 0.5245894193649292, "learning_rate": 0.0005489334636655529, "loss": 3.1897, "step": 11537 }, { "epoch": 0.57, "grad_norm": 0.5544889569282532, "learning_rate": 0.0005489248708271999, "loss": 3.4318, "step": 11538 }, { "epoch": 0.57, "grad_norm": 0.5470820069313049, "learning_rate": 0.0005489162773332265, "loss": 3.0173, "step": 11539 }, { "epoch": 0.57, "grad_norm": 0.5013363361358643, "learning_rate": 0.0005489076831836555, "loss": 3.3224, "step": 11540 }, { "epoch": 0.57, "grad_norm": 0.514118492603302, "learning_rate": 0.0005488990883785097, "loss": 3.0869, "step": 11541 }, { "epoch": 0.57, "grad_norm": 0.5037225484848022, "learning_rate": 0.0005488904929178115, "loss": 3.1977, "step": 11542 }, { "epoch": 0.57, "grad_norm": 0.5562716722488403, "learning_rate": 0.0005488818968015836, "loss": 3.3027, "step": 11543 }, { "epoch": 0.57, "grad_norm": 0.4860294759273529, "learning_rate": 0.0005488733000298488, "loss": 3.2274, "step": 11544 }, { "epoch": 0.57, "grad_norm": 0.5283397436141968, "learning_rate": 0.0005488647026026296, "loss": 3.3057, "step": 11545 }, { "epoch": 0.57, "grad_norm": 0.5059245824813843, "learning_rate": 0.0005488561045199487, "loss": 3.3249, "step": 11546 }, { "epoch": 0.57, "grad_norm": 0.5175250172615051, "learning_rate": 0.0005488475057818286, "loss": 3.2407, "step": 11547 }, { "epoch": 0.57, "grad_norm": 0.505158007144928, "learning_rate": 0.0005488389063882922, "loss": 3.3951, "step": 11548 }, { "epoch": 0.57, "grad_norm": 0.48427003622055054, "learning_rate": 0.0005488303063393619, "loss": 3.2815, "step": 11549 }, { "epoch": 0.57, "grad_norm": 0.49035337567329407, "learning_rate": 0.0005488217056350605, "loss": 3.3064, "step": 11550 }, { "epoch": 0.57, "grad_norm": 0.5698608756065369, "learning_rate": 0.0005488131042754107, "loss": 3.2583, "step": 11551 }, { "epoch": 0.57, "grad_norm": 0.5469956994056702, "learning_rate": 0.0005488045022604349, "loss": 3.324, "step": 11552 }, { "epoch": 0.57, "grad_norm": 0.5337932109832764, "learning_rate": 0.000548795899590156, "loss": 3.0973, "step": 11553 }, { "epoch": 0.57, "grad_norm": 0.5492282509803772, "learning_rate": 0.0005487872962645966, "loss": 3.218, "step": 11554 }, { "epoch": 0.57, "grad_norm": 0.5477828979492188, "learning_rate": 0.0005487786922837793, "loss": 3.224, "step": 11555 }, { "epoch": 0.57, "grad_norm": 0.4647465944290161, "learning_rate": 0.0005487700876477268, "loss": 3.1174, "step": 11556 }, { "epoch": 0.57, "grad_norm": 0.5192469954490662, "learning_rate": 0.0005487614823564619, "loss": 3.1721, "step": 11557 }, { "epoch": 0.57, "grad_norm": 0.5096564888954163, "learning_rate": 0.000548752876410007, "loss": 3.0154, "step": 11558 }, { "epoch": 0.57, "grad_norm": 0.5024121999740601, "learning_rate": 0.0005487442698083848, "loss": 3.3643, "step": 11559 }, { "epoch": 0.57, "grad_norm": 0.5501764416694641, "learning_rate": 0.0005487356625516183, "loss": 3.3475, "step": 11560 }, { "epoch": 0.57, "grad_norm": 0.5160467624664307, "learning_rate": 0.0005487270546397299, "loss": 3.1848, "step": 11561 }, { "epoch": 0.57, "grad_norm": 0.5589426159858704, "learning_rate": 0.0005487184460727422, "loss": 2.9357, "step": 11562 }, { "epoch": 0.57, "grad_norm": 0.5194653868675232, "learning_rate": 0.000548709836850678, "loss": 3.4209, "step": 11563 }, { "epoch": 0.57, "grad_norm": 0.5185741782188416, "learning_rate": 0.0005487012269735599, "loss": 3.1228, "step": 11564 }, { "epoch": 0.57, "grad_norm": 0.4845937192440033, "learning_rate": 0.0005486926164414108, "loss": 3.3671, "step": 11565 }, { "epoch": 0.57, "grad_norm": 0.561070442199707, "learning_rate": 0.0005486840052542531, "loss": 3.0674, "step": 11566 }, { "epoch": 0.57, "grad_norm": 0.5006464719772339, "learning_rate": 0.0005486753934121095, "loss": 3.2313, "step": 11567 }, { "epoch": 0.57, "grad_norm": 0.49923497438430786, "learning_rate": 0.0005486667809150029, "loss": 3.453, "step": 11568 }, { "epoch": 0.57, "grad_norm": 0.5308060050010681, "learning_rate": 0.0005486581677629558, "loss": 3.2375, "step": 11569 }, { "epoch": 0.57, "grad_norm": 0.5206416249275208, "learning_rate": 0.000548649553955991, "loss": 3.1938, "step": 11570 }, { "epoch": 0.57, "grad_norm": 0.5626609325408936, "learning_rate": 0.0005486409394941311, "loss": 3.2765, "step": 11571 }, { "epoch": 0.57, "grad_norm": 0.5191032290458679, "learning_rate": 0.0005486323243773988, "loss": 3.3281, "step": 11572 }, { "epoch": 0.57, "grad_norm": 0.503855288028717, "learning_rate": 0.0005486237086058169, "loss": 3.0798, "step": 11573 }, { "epoch": 0.57, "grad_norm": 0.5209179520606995, "learning_rate": 0.0005486150921794079, "loss": 3.1159, "step": 11574 }, { "epoch": 0.57, "grad_norm": 0.514619767665863, "learning_rate": 0.0005486064750981946, "loss": 3.2129, "step": 11575 }, { "epoch": 0.57, "grad_norm": 0.5100206136703491, "learning_rate": 0.0005485978573621997, "loss": 3.2254, "step": 11576 }, { "epoch": 0.57, "grad_norm": 0.48534801602363586, "learning_rate": 0.0005485892389714459, "loss": 3.2878, "step": 11577 }, { "epoch": 0.57, "grad_norm": 0.5108082294464111, "learning_rate": 0.000548580619925956, "loss": 3.3976, "step": 11578 }, { "epoch": 0.57, "grad_norm": 0.5104441046714783, "learning_rate": 0.0005485720002257524, "loss": 3.0612, "step": 11579 }, { "epoch": 0.57, "grad_norm": 0.5448250770568848, "learning_rate": 0.000548563379870858, "loss": 3.1129, "step": 11580 }, { "epoch": 0.57, "grad_norm": 0.4960390329360962, "learning_rate": 0.0005485547588612956, "loss": 3.0571, "step": 11581 }, { "epoch": 0.57, "grad_norm": 0.515153706073761, "learning_rate": 0.0005485461371970878, "loss": 3.1635, "step": 11582 }, { "epoch": 0.57, "grad_norm": 0.5046236515045166, "learning_rate": 0.0005485375148782572, "loss": 3.1507, "step": 11583 }, { "epoch": 0.57, "grad_norm": 0.5140905976295471, "learning_rate": 0.0005485288919048266, "loss": 3.2966, "step": 11584 }, { "epoch": 0.57, "grad_norm": 0.5744099617004395, "learning_rate": 0.0005485202682768189, "loss": 3.2489, "step": 11585 }, { "epoch": 0.57, "grad_norm": 0.49185919761657715, "learning_rate": 0.0005485116439942566, "loss": 3.2854, "step": 11586 }, { "epoch": 0.57, "grad_norm": 0.5120055675506592, "learning_rate": 0.0005485030190571623, "loss": 3.2128, "step": 11587 }, { "epoch": 0.57, "grad_norm": 0.5046406388282776, "learning_rate": 0.000548494393465559, "loss": 2.9364, "step": 11588 }, { "epoch": 0.57, "grad_norm": 0.5551308393478394, "learning_rate": 0.0005484857672194693, "loss": 3.1919, "step": 11589 }, { "epoch": 0.57, "grad_norm": 0.5424243807792664, "learning_rate": 0.0005484771403189158, "loss": 3.4059, "step": 11590 }, { "epoch": 0.57, "grad_norm": 0.5322892069816589, "learning_rate": 0.0005484685127639215, "loss": 3.1558, "step": 11591 }, { "epoch": 0.57, "grad_norm": 0.5228599905967712, "learning_rate": 0.0005484598845545089, "loss": 3.4993, "step": 11592 }, { "epoch": 0.57, "grad_norm": 0.5182601809501648, "learning_rate": 0.0005484512556907007, "loss": 3.2109, "step": 11593 }, { "epoch": 0.57, "grad_norm": 0.5158981084823608, "learning_rate": 0.0005484426261725198, "loss": 3.4712, "step": 11594 }, { "epoch": 0.57, "grad_norm": 0.565634548664093, "learning_rate": 0.0005484339959999887, "loss": 3.5278, "step": 11595 }, { "epoch": 0.57, "grad_norm": 0.5130377411842346, "learning_rate": 0.0005484253651731305, "loss": 3.1502, "step": 11596 }, { "epoch": 0.57, "grad_norm": 0.5539126992225647, "learning_rate": 0.0005484167336919675, "loss": 3.3652, "step": 11597 }, { "epoch": 0.57, "grad_norm": 0.5684749484062195, "learning_rate": 0.0005484081015565226, "loss": 3.0461, "step": 11598 }, { "epoch": 0.57, "grad_norm": 0.5369781851768494, "learning_rate": 0.0005483994687668187, "loss": 3.3655, "step": 11599 }, { "epoch": 0.57, "grad_norm": 0.5240543484687805, "learning_rate": 0.0005483908353228784, "loss": 3.387, "step": 11600 }, { "epoch": 0.57, "grad_norm": 0.520671010017395, "learning_rate": 0.0005483822012247244, "loss": 3.1823, "step": 11601 }, { "epoch": 0.57, "grad_norm": 0.4907285273075104, "learning_rate": 0.0005483735664723795, "loss": 3.2514, "step": 11602 }, { "epoch": 0.57, "grad_norm": 0.519844651222229, "learning_rate": 0.0005483649310658665, "loss": 3.4297, "step": 11603 }, { "epoch": 0.57, "grad_norm": 0.5508689880371094, "learning_rate": 0.0005483562950052079, "loss": 3.2559, "step": 11604 }, { "epoch": 0.57, "grad_norm": 0.595680832862854, "learning_rate": 0.0005483476582904268, "loss": 3.2321, "step": 11605 }, { "epoch": 0.57, "grad_norm": 0.4791205823421478, "learning_rate": 0.0005483390209215456, "loss": 3.0654, "step": 11606 }, { "epoch": 0.57, "grad_norm": 0.4992697238922119, "learning_rate": 0.0005483303828985873, "loss": 3.3038, "step": 11607 }, { "epoch": 0.57, "grad_norm": 0.5372535586357117, "learning_rate": 0.0005483217442215745, "loss": 3.349, "step": 11608 }, { "epoch": 0.57, "grad_norm": 0.5181151032447815, "learning_rate": 0.0005483131048905301, "loss": 3.1583, "step": 11609 }, { "epoch": 0.57, "grad_norm": 0.5052180886268616, "learning_rate": 0.0005483044649054767, "loss": 3.4163, "step": 11610 }, { "epoch": 0.57, "grad_norm": 0.5012765526771545, "learning_rate": 0.0005482958242664373, "loss": 3.4786, "step": 11611 }, { "epoch": 0.57, "grad_norm": 0.5442771911621094, "learning_rate": 0.0005482871829734342, "loss": 3.1562, "step": 11612 }, { "epoch": 0.57, "grad_norm": 0.5303670167922974, "learning_rate": 0.0005482785410264907, "loss": 3.2916, "step": 11613 }, { "epoch": 0.57, "grad_norm": 0.5204977989196777, "learning_rate": 0.0005482698984256291, "loss": 3.2238, "step": 11614 }, { "epoch": 0.57, "grad_norm": 0.4961937367916107, "learning_rate": 0.0005482612551708725, "loss": 3.3505, "step": 11615 }, { "epoch": 0.57, "grad_norm": 0.5263996124267578, "learning_rate": 0.0005482526112622435, "loss": 3.1029, "step": 11616 }, { "epoch": 0.57, "grad_norm": 0.5191607475280762, "learning_rate": 0.0005482439666997648, "loss": 3.1744, "step": 11617 }, { "epoch": 0.57, "grad_norm": 0.5280757546424866, "learning_rate": 0.0005482353214834594, "loss": 3.1277, "step": 11618 }, { "epoch": 0.57, "grad_norm": 0.583758533000946, "learning_rate": 0.0005482266756133498, "loss": 3.2388, "step": 11619 }, { "epoch": 0.57, "grad_norm": 0.49690064787864685, "learning_rate": 0.0005482180290894592, "loss": 3.2565, "step": 11620 }, { "epoch": 0.57, "grad_norm": 0.4999449849128723, "learning_rate": 0.0005482093819118098, "loss": 3.1643, "step": 11621 }, { "epoch": 0.57, "grad_norm": 0.4750107228755951, "learning_rate": 0.0005482007340804248, "loss": 2.9841, "step": 11622 }, { "epoch": 0.57, "grad_norm": 0.5504389405250549, "learning_rate": 0.0005481920855953268, "loss": 3.0009, "step": 11623 }, { "epoch": 0.57, "grad_norm": 0.5175572037696838, "learning_rate": 0.0005481834364565386, "loss": 3.3435, "step": 11624 }, { "epoch": 0.57, "grad_norm": 0.5125237107276917, "learning_rate": 0.000548174786664083, "loss": 3.5311, "step": 11625 }, { "epoch": 0.57, "grad_norm": 0.556337296962738, "learning_rate": 0.0005481661362179827, "loss": 3.2554, "step": 11626 }, { "epoch": 0.57, "grad_norm": 0.5328450202941895, "learning_rate": 0.0005481574851182606, "loss": 3.2114, "step": 11627 }, { "epoch": 0.57, "grad_norm": 0.5400496125221252, "learning_rate": 0.0005481488333649395, "loss": 3.2479, "step": 11628 }, { "epoch": 0.57, "grad_norm": 0.529948890209198, "learning_rate": 0.0005481401809580421, "loss": 3.2482, "step": 11629 }, { "epoch": 0.57, "grad_norm": 0.5066934823989868, "learning_rate": 0.0005481315278975911, "loss": 3.4325, "step": 11630 }, { "epoch": 0.57, "grad_norm": 0.4977806806564331, "learning_rate": 0.0005481228741836096, "loss": 3.3261, "step": 11631 }, { "epoch": 0.57, "grad_norm": 0.526506245136261, "learning_rate": 0.0005481142198161201, "loss": 3.1154, "step": 11632 }, { "epoch": 0.57, "grad_norm": 0.6684457659721375, "learning_rate": 0.0005481055647951456, "loss": 3.2972, "step": 11633 }, { "epoch": 0.57, "grad_norm": 0.49738237261772156, "learning_rate": 0.0005480969091207086, "loss": 3.2738, "step": 11634 }, { "epoch": 0.57, "grad_norm": 0.5682615637779236, "learning_rate": 0.0005480882527928322, "loss": 3.3088, "step": 11635 }, { "epoch": 0.57, "grad_norm": 0.5242875814437866, "learning_rate": 0.0005480795958115391, "loss": 3.2557, "step": 11636 }, { "epoch": 0.57, "grad_norm": 0.5072360038757324, "learning_rate": 0.0005480709381768521, "loss": 3.0437, "step": 11637 }, { "epoch": 0.57, "grad_norm": 0.5002800822257996, "learning_rate": 0.000548062279888794, "loss": 3.2534, "step": 11638 }, { "epoch": 0.57, "grad_norm": 0.5526995062828064, "learning_rate": 0.0005480536209473874, "loss": 3.0043, "step": 11639 }, { "epoch": 0.57, "grad_norm": 0.5290579199790955, "learning_rate": 0.0005480449613526555, "loss": 3.2353, "step": 11640 }, { "epoch": 0.57, "grad_norm": 0.5533804893493652, "learning_rate": 0.0005480363011046208, "loss": 3.3581, "step": 11641 }, { "epoch": 0.57, "grad_norm": 0.4938673973083496, "learning_rate": 0.0005480276402033064, "loss": 3.2501, "step": 11642 }, { "epoch": 0.57, "grad_norm": 0.5148259401321411, "learning_rate": 0.0005480189786487348, "loss": 3.3157, "step": 11643 }, { "epoch": 0.57, "grad_norm": 0.5525953769683838, "learning_rate": 0.0005480103164409289, "loss": 3.0652, "step": 11644 }, { "epoch": 0.57, "grad_norm": 0.5157591104507446, "learning_rate": 0.0005480016535799117, "loss": 3.2691, "step": 11645 }, { "epoch": 0.57, "grad_norm": 0.4905431866645813, "learning_rate": 0.0005479929900657057, "loss": 3.1825, "step": 11646 }, { "epoch": 0.57, "grad_norm": 0.49346473813056946, "learning_rate": 0.000547984325898334, "loss": 3.2634, "step": 11647 }, { "epoch": 0.57, "grad_norm": 0.5140097141265869, "learning_rate": 0.0005479756610778194, "loss": 3.1073, "step": 11648 }, { "epoch": 0.57, "grad_norm": 0.4972335398197174, "learning_rate": 0.0005479669956041844, "loss": 3.2337, "step": 11649 }, { "epoch": 0.57, "grad_norm": 0.5696159601211548, "learning_rate": 0.0005479583294774522, "loss": 3.2024, "step": 11650 }, { "epoch": 0.57, "grad_norm": 0.5201365947723389, "learning_rate": 0.0005479496626976455, "loss": 3.4275, "step": 11651 }, { "epoch": 0.57, "grad_norm": 0.5507444143295288, "learning_rate": 0.000547940995264787, "loss": 3.0116, "step": 11652 }, { "epoch": 0.57, "grad_norm": 0.5264685153961182, "learning_rate": 0.0005479323271788997, "loss": 3.2018, "step": 11653 }, { "epoch": 0.57, "grad_norm": 0.5618388652801514, "learning_rate": 0.0005479236584400065, "loss": 3.2548, "step": 11654 }, { "epoch": 0.57, "grad_norm": 0.5223483443260193, "learning_rate": 0.0005479149890481299, "loss": 3.2424, "step": 11655 }, { "epoch": 0.57, "grad_norm": 0.5244974493980408, "learning_rate": 0.000547906319003293, "loss": 3.221, "step": 11656 }, { "epoch": 0.57, "grad_norm": 0.5352416634559631, "learning_rate": 0.0005478976483055185, "loss": 3.1539, "step": 11657 }, { "epoch": 0.57, "grad_norm": 0.5786653757095337, "learning_rate": 0.0005478889769548295, "loss": 3.2923, "step": 11658 }, { "epoch": 0.57, "grad_norm": 0.5236338376998901, "learning_rate": 0.0005478803049512484, "loss": 3.1554, "step": 11659 }, { "epoch": 0.57, "grad_norm": 0.529576301574707, "learning_rate": 0.0005478716322947985, "loss": 3.2727, "step": 11660 }, { "epoch": 0.57, "grad_norm": 0.5126217007637024, "learning_rate": 0.0005478629589855022, "loss": 3.3197, "step": 11661 }, { "epoch": 0.57, "grad_norm": 0.5071096420288086, "learning_rate": 0.0005478542850233827, "loss": 3.4426, "step": 11662 }, { "epoch": 0.57, "grad_norm": 0.5255385637283325, "learning_rate": 0.0005478456104084627, "loss": 3.2004, "step": 11663 }, { "epoch": 0.57, "grad_norm": 0.5327421426773071, "learning_rate": 0.0005478369351407651, "loss": 3.1227, "step": 11664 }, { "epoch": 0.57, "grad_norm": 0.5336896181106567, "learning_rate": 0.0005478282592203126, "loss": 3.2945, "step": 11665 }, { "epoch": 0.57, "grad_norm": 0.4557580351829529, "learning_rate": 0.0005478195826471282, "loss": 3.2835, "step": 11666 }, { "epoch": 0.57, "grad_norm": 0.5098157525062561, "learning_rate": 0.0005478109054212349, "loss": 3.3144, "step": 11667 }, { "epoch": 0.57, "grad_norm": 0.5083901286125183, "learning_rate": 0.0005478022275426551, "loss": 3.1161, "step": 11668 }, { "epoch": 0.57, "grad_norm": 0.5286469459533691, "learning_rate": 0.000547793549011412, "loss": 3.2938, "step": 11669 }, { "epoch": 0.57, "grad_norm": 0.5404655933380127, "learning_rate": 0.0005477848698275285, "loss": 3.1669, "step": 11670 }, { "epoch": 0.57, "grad_norm": 0.5109778046607971, "learning_rate": 0.0005477761899910272, "loss": 3.2569, "step": 11671 }, { "epoch": 0.57, "grad_norm": 0.5234958529472351, "learning_rate": 0.0005477675095019312, "loss": 3.2914, "step": 11672 }, { "epoch": 0.57, "grad_norm": 0.5528802871704102, "learning_rate": 0.0005477588283602632, "loss": 3.2845, "step": 11673 }, { "epoch": 0.57, "grad_norm": 0.5386664867401123, "learning_rate": 0.0005477501465660461, "loss": 3.344, "step": 11674 }, { "epoch": 0.57, "grad_norm": 0.5150508284568787, "learning_rate": 0.0005477414641193028, "loss": 3.3954, "step": 11675 }, { "epoch": 0.57, "grad_norm": 0.4998857378959656, "learning_rate": 0.0005477327810200562, "loss": 3.3447, "step": 11676 }, { "epoch": 0.57, "grad_norm": 0.5428280234336853, "learning_rate": 0.0005477240972683292, "loss": 3.1212, "step": 11677 }, { "epoch": 0.57, "grad_norm": 0.5053800940513611, "learning_rate": 0.0005477154128641445, "loss": 3.2313, "step": 11678 }, { "epoch": 0.57, "grad_norm": 0.586479663848877, "learning_rate": 0.0005477067278075251, "loss": 3.3651, "step": 11679 }, { "epoch": 0.57, "grad_norm": 0.5042681694030762, "learning_rate": 0.000547698042098494, "loss": 3.2991, "step": 11680 }, { "epoch": 0.57, "grad_norm": 0.5273053646087646, "learning_rate": 0.0005476893557370737, "loss": 3.1918, "step": 11681 }, { "epoch": 0.57, "grad_norm": 0.5238834023475647, "learning_rate": 0.0005476806687232874, "loss": 3.3427, "step": 11682 }, { "epoch": 0.57, "grad_norm": 0.5385345220565796, "learning_rate": 0.0005476719810571579, "loss": 3.2813, "step": 11683 }, { "epoch": 0.57, "grad_norm": 0.4971548318862915, "learning_rate": 0.000547663292738708, "loss": 3.2153, "step": 11684 }, { "epoch": 0.57, "grad_norm": 0.5180023908615112, "learning_rate": 0.0005476546037679608, "loss": 3.3399, "step": 11685 }, { "epoch": 0.57, "grad_norm": 0.510409414768219, "learning_rate": 0.0005476459141449388, "loss": 3.1909, "step": 11686 }, { "epoch": 0.57, "grad_norm": 0.555425763130188, "learning_rate": 0.0005476372238696653, "loss": 3.1216, "step": 11687 }, { "epoch": 0.57, "grad_norm": 0.5270985960960388, "learning_rate": 0.0005476285329421629, "loss": 3.1847, "step": 11688 }, { "epoch": 0.57, "grad_norm": 0.5697065591812134, "learning_rate": 0.0005476198413624548, "loss": 3.1772, "step": 11689 }, { "epoch": 0.57, "grad_norm": 0.514995813369751, "learning_rate": 0.0005476111491305635, "loss": 3.3174, "step": 11690 }, { "epoch": 0.57, "grad_norm": 0.5724436044692993, "learning_rate": 0.0005476024562465121, "loss": 3.3164, "step": 11691 }, { "epoch": 0.57, "grad_norm": 0.5155641436576843, "learning_rate": 0.0005475937627103237, "loss": 3.0523, "step": 11692 }, { "epoch": 0.57, "grad_norm": 0.4877888560295105, "learning_rate": 0.0005475850685220208, "loss": 3.2307, "step": 11693 }, { "epoch": 0.57, "grad_norm": 0.49294334650039673, "learning_rate": 0.0005475763736816264, "loss": 3.0084, "step": 11694 }, { "epoch": 0.57, "grad_norm": 0.4982193410396576, "learning_rate": 0.0005475676781891636, "loss": 3.3148, "step": 11695 }, { "epoch": 0.57, "grad_norm": 0.6280152797698975, "learning_rate": 0.0005475589820446552, "loss": 3.3086, "step": 11696 }, { "epoch": 0.57, "grad_norm": 0.5116281509399414, "learning_rate": 0.000547550285248124, "loss": 3.2566, "step": 11697 }, { "epoch": 0.57, "grad_norm": 0.5089260339736938, "learning_rate": 0.000547541587799593, "loss": 3.1644, "step": 11698 }, { "epoch": 0.57, "grad_norm": 0.5232623815536499, "learning_rate": 0.0005475328896990851, "loss": 3.2846, "step": 11699 }, { "epoch": 0.57, "grad_norm": 0.5016197562217712, "learning_rate": 0.0005475241909466234, "loss": 3.0528, "step": 11700 }, { "epoch": 0.57, "grad_norm": 0.5107477307319641, "learning_rate": 0.0005475154915422304, "loss": 3.0808, "step": 11701 }, { "epoch": 0.57, "grad_norm": 0.5307403206825256, "learning_rate": 0.0005475067914859292, "loss": 3.2766, "step": 11702 }, { "epoch": 0.57, "grad_norm": 0.50713050365448, "learning_rate": 0.0005474980907777428, "loss": 3.3566, "step": 11703 }, { "epoch": 0.57, "grad_norm": 0.4886563718318939, "learning_rate": 0.0005474893894176941, "loss": 3.3369, "step": 11704 }, { "epoch": 0.57, "grad_norm": 0.5123024582862854, "learning_rate": 0.000547480687405806, "loss": 3.1893, "step": 11705 }, { "epoch": 0.57, "grad_norm": 0.510797917842865, "learning_rate": 0.0005474719847421015, "loss": 3.0718, "step": 11706 }, { "epoch": 0.57, "grad_norm": 0.558925449848175, "learning_rate": 0.0005474632814266031, "loss": 2.9562, "step": 11707 }, { "epoch": 0.57, "grad_norm": 0.5396509170532227, "learning_rate": 0.0005474545774593343, "loss": 3.4154, "step": 11708 }, { "epoch": 0.57, "grad_norm": 0.5025230050086975, "learning_rate": 0.0005474458728403176, "loss": 3.0728, "step": 11709 }, { "epoch": 0.57, "grad_norm": 0.5266216993331909, "learning_rate": 0.0005474371675695762, "loss": 3.4107, "step": 11710 }, { "epoch": 0.57, "grad_norm": 0.5457305312156677, "learning_rate": 0.000547428461647133, "loss": 3.1522, "step": 11711 }, { "epoch": 0.57, "grad_norm": 0.522469162940979, "learning_rate": 0.0005474197550730107, "loss": 3.5021, "step": 11712 }, { "epoch": 0.57, "grad_norm": 0.4818090796470642, "learning_rate": 0.0005474110478472325, "loss": 3.0651, "step": 11713 }, { "epoch": 0.57, "grad_norm": 0.520215630531311, "learning_rate": 0.0005474023399698212, "loss": 3.3468, "step": 11714 }, { "epoch": 0.57, "grad_norm": 0.5050302743911743, "learning_rate": 0.0005473936314407996, "loss": 3.144, "step": 11715 }, { "epoch": 0.57, "grad_norm": 0.5783846378326416, "learning_rate": 0.0005473849222601909, "loss": 3.2924, "step": 11716 }, { "epoch": 0.57, "grad_norm": 0.4962663948535919, "learning_rate": 0.0005473762124280179, "loss": 3.0763, "step": 11717 }, { "epoch": 0.57, "grad_norm": 0.48002344369888306, "learning_rate": 0.0005473675019443036, "loss": 3.1622, "step": 11718 }, { "epoch": 0.57, "grad_norm": 0.5629715919494629, "learning_rate": 0.0005473587908090709, "loss": 3.351, "step": 11719 }, { "epoch": 0.57, "grad_norm": 0.5525310039520264, "learning_rate": 0.0005473500790223428, "loss": 3.2598, "step": 11720 }, { "epoch": 0.57, "grad_norm": 0.47623834013938904, "learning_rate": 0.000547341366584142, "loss": 3.2051, "step": 11721 }, { "epoch": 0.57, "grad_norm": 0.546389639377594, "learning_rate": 0.0005473326534944918, "loss": 3.1717, "step": 11722 }, { "epoch": 0.57, "grad_norm": 0.4834481477737427, "learning_rate": 0.000547323939753415, "loss": 3.2224, "step": 11723 }, { "epoch": 0.57, "grad_norm": 0.5237901210784912, "learning_rate": 0.0005473152253609345, "loss": 3.1499, "step": 11724 }, { "epoch": 0.57, "grad_norm": 0.5636874437332153, "learning_rate": 0.0005473065103170733, "loss": 3.1321, "step": 11725 }, { "epoch": 0.57, "grad_norm": 0.5102803707122803, "learning_rate": 0.0005472977946218543, "loss": 3.3097, "step": 11726 }, { "epoch": 0.57, "grad_norm": 0.5490740537643433, "learning_rate": 0.0005472890782753006, "loss": 3.3188, "step": 11727 }, { "epoch": 0.57, "grad_norm": 0.5862623453140259, "learning_rate": 0.000547280361277435, "loss": 3.1409, "step": 11728 }, { "epoch": 0.57, "grad_norm": 0.48724859952926636, "learning_rate": 0.0005472716436282806, "loss": 3.4519, "step": 11729 }, { "epoch": 0.57, "grad_norm": 0.5731695294380188, "learning_rate": 0.0005472629253278601, "loss": 3.2008, "step": 11730 }, { "epoch": 0.57, "grad_norm": 0.5652557611465454, "learning_rate": 0.0005472542063761968, "loss": 3.2627, "step": 11731 }, { "epoch": 0.57, "grad_norm": 0.49549639225006104, "learning_rate": 0.0005472454867733134, "loss": 3.2696, "step": 11732 }, { "epoch": 0.58, "grad_norm": 0.5207010507583618, "learning_rate": 0.0005472367665192331, "loss": 3.2412, "step": 11733 }, { "epoch": 0.58, "grad_norm": 0.5095499157905579, "learning_rate": 0.0005472280456139786, "loss": 3.066, "step": 11734 }, { "epoch": 0.58, "grad_norm": 0.5058441162109375, "learning_rate": 0.000547219324057573, "loss": 3.3581, "step": 11735 }, { "epoch": 0.58, "grad_norm": 0.536139190196991, "learning_rate": 0.0005472106018500394, "loss": 3.2069, "step": 11736 }, { "epoch": 0.58, "grad_norm": 0.5336572527885437, "learning_rate": 0.0005472018789914007, "loss": 3.1781, "step": 11737 }, { "epoch": 0.58, "grad_norm": 0.5066656470298767, "learning_rate": 0.0005471931554816797, "loss": 3.0132, "step": 11738 }, { "epoch": 0.58, "grad_norm": 0.53529292345047, "learning_rate": 0.0005471844313208995, "loss": 3.2795, "step": 11739 }, { "epoch": 0.58, "grad_norm": 0.4876529276371002, "learning_rate": 0.0005471757065090831, "loss": 3.2349, "step": 11740 }, { "epoch": 0.58, "grad_norm": 0.5300586819648743, "learning_rate": 0.0005471669810462534, "loss": 3.1156, "step": 11741 }, { "epoch": 0.58, "grad_norm": 0.5383363962173462, "learning_rate": 0.0005471582549324336, "loss": 3.3422, "step": 11742 }, { "epoch": 0.58, "grad_norm": 0.5305922031402588, "learning_rate": 0.0005471495281676464, "loss": 3.1445, "step": 11743 }, { "epoch": 0.58, "grad_norm": 0.5225808620452881, "learning_rate": 0.000547140800751915, "loss": 3.3356, "step": 11744 }, { "epoch": 0.58, "grad_norm": 0.5275900959968567, "learning_rate": 0.0005471320726852621, "loss": 3.3428, "step": 11745 }, { "epoch": 0.58, "grad_norm": 0.553205132484436, "learning_rate": 0.000547123343967711, "loss": 3.2428, "step": 11746 }, { "epoch": 0.58, "grad_norm": 0.5640978217124939, "learning_rate": 0.0005471146145992847, "loss": 3.0525, "step": 11747 }, { "epoch": 0.58, "grad_norm": 0.5505227446556091, "learning_rate": 0.0005471058845800059, "loss": 3.059, "step": 11748 }, { "epoch": 0.58, "grad_norm": 0.5284337997436523, "learning_rate": 0.0005470971539098978, "loss": 3.3115, "step": 11749 }, { "epoch": 0.58, "grad_norm": 0.5046818256378174, "learning_rate": 0.0005470884225889834, "loss": 3.2677, "step": 11750 }, { "epoch": 0.58, "grad_norm": 0.5719025135040283, "learning_rate": 0.0005470796906172855, "loss": 3.1408, "step": 11751 }, { "epoch": 0.58, "grad_norm": 0.5198563933372498, "learning_rate": 0.0005470709579948274, "loss": 3.3498, "step": 11752 }, { "epoch": 0.58, "grad_norm": 0.5236194133758545, "learning_rate": 0.0005470622247216319, "loss": 3.255, "step": 11753 }, { "epoch": 0.58, "grad_norm": 0.5159012675285339, "learning_rate": 0.000547053490797722, "loss": 3.4876, "step": 11754 }, { "epoch": 0.58, "grad_norm": 0.4822523891925812, "learning_rate": 0.0005470447562231209, "loss": 3.2904, "step": 11755 }, { "epoch": 0.58, "grad_norm": 0.5354002118110657, "learning_rate": 0.0005470360209978513, "loss": 3.123, "step": 11756 }, { "epoch": 0.58, "grad_norm": 0.5133538246154785, "learning_rate": 0.0005470272851219364, "loss": 3.412, "step": 11757 }, { "epoch": 0.58, "grad_norm": 0.5027098655700684, "learning_rate": 0.0005470185485953992, "loss": 3.2522, "step": 11758 }, { "epoch": 0.58, "grad_norm": 0.5310078859329224, "learning_rate": 0.0005470098114182627, "loss": 3.2146, "step": 11759 }, { "epoch": 0.58, "grad_norm": 0.5589718818664551, "learning_rate": 0.0005470010735905499, "loss": 3.2976, "step": 11760 }, { "epoch": 0.58, "grad_norm": 0.5355909466743469, "learning_rate": 0.0005469923351122837, "loss": 3.2431, "step": 11761 }, { "epoch": 0.58, "grad_norm": 0.5391919016838074, "learning_rate": 0.0005469835959834873, "loss": 3.0428, "step": 11762 }, { "epoch": 0.58, "grad_norm": 0.5123609304428101, "learning_rate": 0.0005469748562041837, "loss": 3.2564, "step": 11763 }, { "epoch": 0.58, "grad_norm": 0.4937504827976227, "learning_rate": 0.0005469661157743958, "loss": 3.2483, "step": 11764 }, { "epoch": 0.58, "grad_norm": 0.5278680920600891, "learning_rate": 0.0005469573746941467, "loss": 3.2399, "step": 11765 }, { "epoch": 0.58, "grad_norm": 0.5318676233291626, "learning_rate": 0.0005469486329634593, "loss": 3.1255, "step": 11766 }, { "epoch": 0.58, "grad_norm": 0.5315147638320923, "learning_rate": 0.0005469398905823569, "loss": 3.3234, "step": 11767 }, { "epoch": 0.58, "grad_norm": 0.5593608617782593, "learning_rate": 0.0005469311475508622, "loss": 3.253, "step": 11768 }, { "epoch": 0.58, "grad_norm": 0.5003385543823242, "learning_rate": 0.0005469224038689985, "loss": 2.979, "step": 11769 }, { "epoch": 0.58, "grad_norm": 0.530859649181366, "learning_rate": 0.0005469136595367887, "loss": 3.1236, "step": 11770 }, { "epoch": 0.58, "grad_norm": 0.5268099308013916, "learning_rate": 0.0005469049145542558, "loss": 3.4322, "step": 11771 }, { "epoch": 0.58, "grad_norm": 0.5292220115661621, "learning_rate": 0.0005468961689214228, "loss": 3.2846, "step": 11772 }, { "epoch": 0.58, "grad_norm": 0.5274138450622559, "learning_rate": 0.0005468874226383128, "loss": 3.1513, "step": 11773 }, { "epoch": 0.58, "grad_norm": 0.5348718762397766, "learning_rate": 0.000546878675704949, "loss": 3.2605, "step": 11774 }, { "epoch": 0.58, "grad_norm": 0.5165255665779114, "learning_rate": 0.0005468699281213541, "loss": 3.1522, "step": 11775 }, { "epoch": 0.58, "grad_norm": 0.5076016783714294, "learning_rate": 0.0005468611798875515, "loss": 3.2869, "step": 11776 }, { "epoch": 0.58, "grad_norm": 0.5093604922294617, "learning_rate": 0.0005468524310035639, "loss": 3.0047, "step": 11777 }, { "epoch": 0.58, "grad_norm": 0.5292362570762634, "learning_rate": 0.0005468436814694146, "loss": 3.1524, "step": 11778 }, { "epoch": 0.58, "grad_norm": 0.5151748657226562, "learning_rate": 0.0005468349312851265, "loss": 3.1051, "step": 11779 }, { "epoch": 0.58, "grad_norm": 0.5270185470581055, "learning_rate": 0.0005468261804507228, "loss": 3.2477, "step": 11780 }, { "epoch": 0.58, "grad_norm": 0.5115005970001221, "learning_rate": 0.0005468174289662264, "loss": 3.265, "step": 11781 }, { "epoch": 0.58, "grad_norm": 0.5105668902397156, "learning_rate": 0.0005468086768316604, "loss": 3.189, "step": 11782 }, { "epoch": 0.58, "grad_norm": 0.4958377778530121, "learning_rate": 0.0005467999240470478, "loss": 3.2936, "step": 11783 }, { "epoch": 0.58, "grad_norm": 0.5410537719726562, "learning_rate": 0.0005467911706124117, "loss": 3.2262, "step": 11784 }, { "epoch": 0.58, "grad_norm": 0.5103792548179626, "learning_rate": 0.0005467824165277752, "loss": 3.2767, "step": 11785 }, { "epoch": 0.58, "grad_norm": 0.623610258102417, "learning_rate": 0.0005467736617931613, "loss": 3.2318, "step": 11786 }, { "epoch": 0.58, "grad_norm": 0.5323020219802856, "learning_rate": 0.000546764906408593, "loss": 3.1768, "step": 11787 }, { "epoch": 0.58, "grad_norm": 0.5189688205718994, "learning_rate": 0.0005467561503740934, "loss": 3.1885, "step": 11788 }, { "epoch": 0.58, "grad_norm": 0.5025799870491028, "learning_rate": 0.0005467473936896857, "loss": 3.3244, "step": 11789 }, { "epoch": 0.58, "grad_norm": 0.5281747579574585, "learning_rate": 0.0005467386363553927, "loss": 3.3322, "step": 11790 }, { "epoch": 0.58, "grad_norm": 0.5055098533630371, "learning_rate": 0.0005467298783712378, "loss": 3.1912, "step": 11791 }, { "epoch": 0.58, "grad_norm": 0.5109471678733826, "learning_rate": 0.0005467211197372438, "loss": 3.3548, "step": 11792 }, { "epoch": 0.58, "grad_norm": 0.49919557571411133, "learning_rate": 0.0005467123604534338, "loss": 3.2776, "step": 11793 }, { "epoch": 0.58, "grad_norm": 0.5183613300323486, "learning_rate": 0.0005467036005198311, "loss": 3.3152, "step": 11794 }, { "epoch": 0.58, "grad_norm": 0.49261006712913513, "learning_rate": 0.0005466948399364584, "loss": 3.0799, "step": 11795 }, { "epoch": 0.58, "grad_norm": 0.5078513622283936, "learning_rate": 0.0005466860787033391, "loss": 3.2225, "step": 11796 }, { "epoch": 0.58, "grad_norm": 0.5038175582885742, "learning_rate": 0.000546677316820496, "loss": 3.3051, "step": 11797 }, { "epoch": 0.58, "grad_norm": 0.5619712471961975, "learning_rate": 0.0005466685542879525, "loss": 3.2478, "step": 11798 }, { "epoch": 0.58, "grad_norm": 0.4969761371612549, "learning_rate": 0.0005466597911057314, "loss": 3.2508, "step": 11799 }, { "epoch": 0.58, "grad_norm": 0.5520473122596741, "learning_rate": 0.0005466510272738559, "loss": 3.3678, "step": 11800 }, { "epoch": 0.58, "grad_norm": 0.5152010917663574, "learning_rate": 0.000546642262792349, "loss": 3.2858, "step": 11801 }, { "epoch": 0.58, "grad_norm": 0.5011783242225647, "learning_rate": 0.000546633497661234, "loss": 3.2719, "step": 11802 }, { "epoch": 0.58, "grad_norm": 0.5367514491081238, "learning_rate": 0.0005466247318805336, "loss": 3.3372, "step": 11803 }, { "epoch": 0.58, "grad_norm": 0.5518689155578613, "learning_rate": 0.0005466159654502713, "loss": 3.2038, "step": 11804 }, { "epoch": 0.58, "grad_norm": 0.5553038716316223, "learning_rate": 0.00054660719837047, "loss": 3.082, "step": 11805 }, { "epoch": 0.58, "grad_norm": 0.5008841753005981, "learning_rate": 0.0005465984306411528, "loss": 3.0853, "step": 11806 }, { "epoch": 0.58, "grad_norm": 0.48236528038978577, "learning_rate": 0.0005465896622623427, "loss": 3.2213, "step": 11807 }, { "epoch": 0.58, "grad_norm": 0.5906662940979004, "learning_rate": 0.000546580893234063, "loss": 3.1436, "step": 11808 }, { "epoch": 0.58, "grad_norm": 0.5112290382385254, "learning_rate": 0.0005465721235563365, "loss": 3.2082, "step": 11809 }, { "epoch": 0.58, "grad_norm": 0.5187637805938721, "learning_rate": 0.0005465633532291867, "loss": 3.2795, "step": 11810 }, { "epoch": 0.58, "grad_norm": 0.5141165852546692, "learning_rate": 0.0005465545822526364, "loss": 3.191, "step": 11811 }, { "epoch": 0.58, "grad_norm": 0.6304354071617126, "learning_rate": 0.0005465458106267087, "loss": 3.3437, "step": 11812 }, { "epoch": 0.58, "grad_norm": 0.5287776589393616, "learning_rate": 0.0005465370383514269, "loss": 3.1787, "step": 11813 }, { "epoch": 0.58, "grad_norm": 0.5059935450553894, "learning_rate": 0.000546528265426814, "loss": 3.3609, "step": 11814 }, { "epoch": 0.58, "grad_norm": 0.5362207293510437, "learning_rate": 0.000546519491852893, "loss": 3.1434, "step": 11815 }, { "epoch": 0.58, "grad_norm": 0.5879662036895752, "learning_rate": 0.0005465107176296872, "loss": 3.4093, "step": 11816 }, { "epoch": 0.58, "grad_norm": 0.5185505747795105, "learning_rate": 0.0005465019427572195, "loss": 3.2697, "step": 11817 }, { "epoch": 0.58, "grad_norm": 0.5067775845527649, "learning_rate": 0.0005464931672355131, "loss": 3.4235, "step": 11818 }, { "epoch": 0.58, "grad_norm": 0.5255836248397827, "learning_rate": 0.0005464843910645913, "loss": 3.3745, "step": 11819 }, { "epoch": 0.58, "grad_norm": 0.6073008179664612, "learning_rate": 0.0005464756142444769, "loss": 3.182, "step": 11820 }, { "epoch": 0.58, "grad_norm": 0.48989614844322205, "learning_rate": 0.0005464668367751933, "loss": 3.4873, "step": 11821 }, { "epoch": 0.58, "grad_norm": 0.6135048270225525, "learning_rate": 0.0005464580586567634, "loss": 3.3022, "step": 11822 }, { "epoch": 0.58, "grad_norm": 0.5023560523986816, "learning_rate": 0.0005464492798892104, "loss": 3.2013, "step": 11823 }, { "epoch": 0.58, "grad_norm": 0.5405312180519104, "learning_rate": 0.0005464405004725574, "loss": 3.3605, "step": 11824 }, { "epoch": 0.58, "grad_norm": 0.5376933217048645, "learning_rate": 0.0005464317204068276, "loss": 3.1126, "step": 11825 }, { "epoch": 0.58, "grad_norm": 0.5398737788200378, "learning_rate": 0.0005464229396920441, "loss": 3.1471, "step": 11826 }, { "epoch": 0.58, "grad_norm": 0.5017811059951782, "learning_rate": 0.0005464141583282299, "loss": 3.3562, "step": 11827 }, { "epoch": 0.58, "grad_norm": 0.49591362476348877, "learning_rate": 0.0005464053763154083, "loss": 3.1727, "step": 11828 }, { "epoch": 0.58, "grad_norm": 0.5274151563644409, "learning_rate": 0.0005463965936536024, "loss": 3.0793, "step": 11829 }, { "epoch": 0.58, "grad_norm": 0.4952261745929718, "learning_rate": 0.0005463878103428353, "loss": 3.4623, "step": 11830 }, { "epoch": 0.58, "grad_norm": 0.5281954407691956, "learning_rate": 0.0005463790263831301, "loss": 3.3092, "step": 11831 }, { "epoch": 0.58, "grad_norm": 0.5254419445991516, "learning_rate": 0.0005463702417745099, "loss": 3.333, "step": 11832 }, { "epoch": 0.58, "grad_norm": 0.5291889309883118, "learning_rate": 0.0005463614565169979, "loss": 3.2975, "step": 11833 }, { "epoch": 0.58, "grad_norm": 0.5549447536468506, "learning_rate": 0.0005463526706106173, "loss": 3.3199, "step": 11834 }, { "epoch": 0.58, "grad_norm": 0.5520808696746826, "learning_rate": 0.0005463438840553912, "loss": 3.1179, "step": 11835 }, { "epoch": 0.58, "grad_norm": 0.5752543807029724, "learning_rate": 0.0005463350968513426, "loss": 3.4448, "step": 11836 }, { "epoch": 0.58, "grad_norm": 0.5210770964622498, "learning_rate": 0.0005463263089984948, "loss": 3.3781, "step": 11837 }, { "epoch": 0.58, "grad_norm": 0.5394595265388489, "learning_rate": 0.000546317520496871, "loss": 3.0691, "step": 11838 }, { "epoch": 0.58, "grad_norm": 0.5369167327880859, "learning_rate": 0.0005463087313464942, "loss": 3.299, "step": 11839 }, { "epoch": 0.58, "grad_norm": 0.5101680755615234, "learning_rate": 0.0005462999415473877, "loss": 3.1371, "step": 11840 }, { "epoch": 0.58, "grad_norm": 0.4878643751144409, "learning_rate": 0.0005462911510995744, "loss": 3.277, "step": 11841 }, { "epoch": 0.58, "grad_norm": 0.513389527797699, "learning_rate": 0.0005462823600030776, "loss": 3.2535, "step": 11842 }, { "epoch": 0.58, "grad_norm": 0.5329154133796692, "learning_rate": 0.0005462735682579205, "loss": 3.4121, "step": 11843 }, { "epoch": 0.58, "grad_norm": 0.4917752146720886, "learning_rate": 0.0005462647758641263, "loss": 3.3547, "step": 11844 }, { "epoch": 0.58, "grad_norm": 0.49476316571235657, "learning_rate": 0.000546255982821718, "loss": 3.0001, "step": 11845 }, { "epoch": 0.58, "grad_norm": 0.4978811740875244, "learning_rate": 0.0005462471891307189, "loss": 3.2179, "step": 11846 }, { "epoch": 0.58, "grad_norm": 0.5276978611946106, "learning_rate": 0.000546238394791152, "loss": 3.4435, "step": 11847 }, { "epoch": 0.58, "grad_norm": 0.5227974653244019, "learning_rate": 0.0005462295998030406, "loss": 3.4436, "step": 11848 }, { "epoch": 0.58, "grad_norm": 0.5526250004768372, "learning_rate": 0.0005462208041664079, "loss": 3.4602, "step": 11849 }, { "epoch": 0.58, "grad_norm": 0.5186265110969543, "learning_rate": 0.0005462120078812769, "loss": 3.1297, "step": 11850 }, { "epoch": 0.58, "grad_norm": 0.5635437965393066, "learning_rate": 0.0005462032109476709, "loss": 2.9613, "step": 11851 }, { "epoch": 0.58, "grad_norm": 0.5280086398124695, "learning_rate": 0.000546194413365613, "loss": 3.1061, "step": 11852 }, { "epoch": 0.58, "grad_norm": 0.5283723473548889, "learning_rate": 0.0005461856151351264, "loss": 3.1436, "step": 11853 }, { "epoch": 0.58, "grad_norm": 0.5015434622764587, "learning_rate": 0.0005461768162562342, "loss": 3.307, "step": 11854 }, { "epoch": 0.58, "grad_norm": 0.5084378719329834, "learning_rate": 0.0005461680167289598, "loss": 3.2405, "step": 11855 }, { "epoch": 0.58, "grad_norm": 0.5142874717712402, "learning_rate": 0.0005461592165533261, "loss": 3.2759, "step": 11856 }, { "epoch": 0.58, "grad_norm": 0.5148090720176697, "learning_rate": 0.0005461504157293563, "loss": 2.9513, "step": 11857 }, { "epoch": 0.58, "grad_norm": 0.5030063390731812, "learning_rate": 0.0005461416142570739, "loss": 3.3158, "step": 11858 }, { "epoch": 0.58, "grad_norm": 0.4925903379917145, "learning_rate": 0.0005461328121365018, "loss": 3.3714, "step": 11859 }, { "epoch": 0.58, "grad_norm": 0.5264309644699097, "learning_rate": 0.0005461240093676632, "loss": 3.271, "step": 11860 }, { "epoch": 0.58, "grad_norm": 0.5105083584785461, "learning_rate": 0.0005461152059505814, "loss": 3.2343, "step": 11861 }, { "epoch": 0.58, "grad_norm": 0.48054060339927673, "learning_rate": 0.0005461064018852794, "loss": 3.1617, "step": 11862 }, { "epoch": 0.58, "grad_norm": 0.5289759635925293, "learning_rate": 0.0005460975971717805, "loss": 3.2449, "step": 11863 }, { "epoch": 0.58, "grad_norm": 0.5321834087371826, "learning_rate": 0.000546088791810108, "loss": 3.2072, "step": 11864 }, { "epoch": 0.58, "grad_norm": 0.6084837913513184, "learning_rate": 0.0005460799858002849, "loss": 3.2506, "step": 11865 }, { "epoch": 0.58, "grad_norm": 0.5481894016265869, "learning_rate": 0.0005460711791423344, "loss": 3.1303, "step": 11866 }, { "epoch": 0.58, "grad_norm": 0.5393480658531189, "learning_rate": 0.0005460623718362799, "loss": 3.2954, "step": 11867 }, { "epoch": 0.58, "grad_norm": 0.5366723537445068, "learning_rate": 0.0005460535638821444, "loss": 3.1237, "step": 11868 }, { "epoch": 0.58, "grad_norm": 0.505320131778717, "learning_rate": 0.0005460447552799512, "loss": 3.2153, "step": 11869 }, { "epoch": 0.58, "grad_norm": 0.5377161502838135, "learning_rate": 0.0005460359460297235, "loss": 3.1654, "step": 11870 }, { "epoch": 0.58, "grad_norm": 0.49337872862815857, "learning_rate": 0.0005460271361314845, "loss": 3.2592, "step": 11871 }, { "epoch": 0.58, "grad_norm": 0.5618535280227661, "learning_rate": 0.0005460183255852573, "loss": 3.2642, "step": 11872 }, { "epoch": 0.58, "grad_norm": 0.4871905446052551, "learning_rate": 0.0005460095143910652, "loss": 3.3339, "step": 11873 }, { "epoch": 0.58, "grad_norm": 0.5062762498855591, "learning_rate": 0.0005460007025489313, "loss": 3.3657, "step": 11874 }, { "epoch": 0.58, "grad_norm": 0.530390739440918, "learning_rate": 0.0005459918900588789, "loss": 3.303, "step": 11875 }, { "epoch": 0.58, "grad_norm": 0.5175230503082275, "learning_rate": 0.0005459830769209314, "loss": 3.0723, "step": 11876 }, { "epoch": 0.58, "grad_norm": 0.5422409176826477, "learning_rate": 0.0005459742631351118, "loss": 3.2174, "step": 11877 }, { "epoch": 0.58, "grad_norm": 0.5116949081420898, "learning_rate": 0.0005459654487014431, "loss": 3.0453, "step": 11878 }, { "epoch": 0.58, "grad_norm": 0.49938473105430603, "learning_rate": 0.0005459566336199488, "loss": 3.021, "step": 11879 }, { "epoch": 0.58, "grad_norm": 0.5277251601219177, "learning_rate": 0.0005459478178906522, "loss": 3.2052, "step": 11880 }, { "epoch": 0.58, "grad_norm": 0.5035902261734009, "learning_rate": 0.0005459390015135762, "loss": 3.2446, "step": 11881 }, { "epoch": 0.58, "grad_norm": 0.5442661643028259, "learning_rate": 0.0005459301844887444, "loss": 3.1998, "step": 11882 }, { "epoch": 0.58, "grad_norm": 0.5180203914642334, "learning_rate": 0.0005459213668161797, "loss": 3.4836, "step": 11883 }, { "epoch": 0.58, "grad_norm": 0.5293683409690857, "learning_rate": 0.0005459125484959054, "loss": 3.3354, "step": 11884 }, { "epoch": 0.58, "grad_norm": 0.4947066307067871, "learning_rate": 0.0005459037295279449, "loss": 2.9769, "step": 11885 }, { "epoch": 0.58, "grad_norm": 0.5257008671760559, "learning_rate": 0.0005458949099123214, "loss": 3.3676, "step": 11886 }, { "epoch": 0.58, "grad_norm": 0.4895974397659302, "learning_rate": 0.0005458860896490577, "loss": 3.1573, "step": 11887 }, { "epoch": 0.58, "grad_norm": 0.56827712059021, "learning_rate": 0.0005458772687381776, "loss": 3.0959, "step": 11888 }, { "epoch": 0.58, "grad_norm": 0.5436473488807678, "learning_rate": 0.000545868447179704, "loss": 3.3533, "step": 11889 }, { "epoch": 0.58, "grad_norm": 0.5245484113693237, "learning_rate": 0.0005458596249736604, "loss": 3.3021, "step": 11890 }, { "epoch": 0.58, "grad_norm": 0.555914044380188, "learning_rate": 0.0005458508021200697, "loss": 3.2511, "step": 11891 }, { "epoch": 0.58, "grad_norm": 0.5292685031890869, "learning_rate": 0.0005458419786189552, "loss": 3.3015, "step": 11892 }, { "epoch": 0.58, "grad_norm": 0.5257068276405334, "learning_rate": 0.0005458331544703405, "loss": 3.1262, "step": 11893 }, { "epoch": 0.58, "grad_norm": 0.5110155940055847, "learning_rate": 0.0005458243296742485, "loss": 3.2781, "step": 11894 }, { "epoch": 0.58, "grad_norm": 0.5501812100410461, "learning_rate": 0.0005458155042307024, "loss": 3.0971, "step": 11895 }, { "epoch": 0.58, "grad_norm": 0.5484451651573181, "learning_rate": 0.0005458066781397257, "loss": 3.0403, "step": 11896 }, { "epoch": 0.58, "grad_norm": 0.5546119213104248, "learning_rate": 0.0005457978514013415, "loss": 3.3563, "step": 11897 }, { "epoch": 0.58, "grad_norm": 0.5238593220710754, "learning_rate": 0.000545789024015573, "loss": 3.3929, "step": 11898 }, { "epoch": 0.58, "grad_norm": 0.5623632669448853, "learning_rate": 0.0005457801959824435, "loss": 3.2798, "step": 11899 }, { "epoch": 0.58, "grad_norm": 0.5106022357940674, "learning_rate": 0.0005457713673019764, "loss": 3.1605, "step": 11900 }, { "epoch": 0.58, "grad_norm": 0.5641065835952759, "learning_rate": 0.0005457625379741947, "loss": 3.364, "step": 11901 }, { "epoch": 0.58, "grad_norm": 0.5487107634544373, "learning_rate": 0.0005457537079991218, "loss": 3.1397, "step": 11902 }, { "epoch": 0.58, "grad_norm": 0.5087454915046692, "learning_rate": 0.000545744877376781, "loss": 3.1786, "step": 11903 }, { "epoch": 0.58, "grad_norm": 0.5044289827346802, "learning_rate": 0.0005457360461071953, "loss": 3.2765, "step": 11904 }, { "epoch": 0.58, "grad_norm": 0.5639960169792175, "learning_rate": 0.0005457272141903884, "loss": 3.3457, "step": 11905 }, { "epoch": 0.58, "grad_norm": 0.5312978029251099, "learning_rate": 0.0005457183816263831, "loss": 3.1708, "step": 11906 }, { "epoch": 0.58, "grad_norm": 0.562142550945282, "learning_rate": 0.000545709548415203, "loss": 3.4045, "step": 11907 }, { "epoch": 0.58, "grad_norm": 0.5207469463348389, "learning_rate": 0.0005457007145568712, "loss": 3.2396, "step": 11908 }, { "epoch": 0.58, "grad_norm": 0.513844907283783, "learning_rate": 0.0005456918800514109, "loss": 3.2508, "step": 11909 }, { "epoch": 0.58, "grad_norm": 0.5175647735595703, "learning_rate": 0.0005456830448988456, "loss": 3.0191, "step": 11910 }, { "epoch": 0.58, "grad_norm": 0.5360447764396667, "learning_rate": 0.0005456742090991984, "loss": 3.2997, "step": 11911 }, { "epoch": 0.58, "grad_norm": 0.5037137866020203, "learning_rate": 0.0005456653726524926, "loss": 3.2219, "step": 11912 }, { "epoch": 0.58, "grad_norm": 0.5330907106399536, "learning_rate": 0.0005456565355587515, "loss": 3.4176, "step": 11913 }, { "epoch": 0.58, "grad_norm": 0.511675238609314, "learning_rate": 0.0005456476978179984, "loss": 3.3674, "step": 11914 }, { "epoch": 0.58, "grad_norm": 0.5179082751274109, "learning_rate": 0.0005456388594302565, "loss": 3.3472, "step": 11915 }, { "epoch": 0.58, "grad_norm": 0.5486281514167786, "learning_rate": 0.0005456300203955491, "loss": 3.4349, "step": 11916 }, { "epoch": 0.58, "grad_norm": 0.516790509223938, "learning_rate": 0.0005456211807138996, "loss": 2.9798, "step": 11917 }, { "epoch": 0.58, "grad_norm": 0.5260696411132812, "learning_rate": 0.0005456123403853311, "loss": 3.0199, "step": 11918 }, { "epoch": 0.58, "grad_norm": 0.4837647080421448, "learning_rate": 0.0005456034994098671, "loss": 3.19, "step": 11919 }, { "epoch": 0.58, "grad_norm": 0.5303565263748169, "learning_rate": 0.0005455946577875304, "loss": 3.3105, "step": 11920 }, { "epoch": 0.58, "grad_norm": 0.4975842535495758, "learning_rate": 0.0005455858155183449, "loss": 3.4884, "step": 11921 }, { "epoch": 0.58, "grad_norm": 0.5344429612159729, "learning_rate": 0.0005455769726023336, "loss": 3.1137, "step": 11922 }, { "epoch": 0.58, "grad_norm": 0.5470472574234009, "learning_rate": 0.0005455681290395199, "loss": 3.0215, "step": 11923 }, { "epoch": 0.58, "grad_norm": 0.49586766958236694, "learning_rate": 0.0005455592848299269, "loss": 3.1258, "step": 11924 }, { "epoch": 0.58, "grad_norm": 0.49502456188201904, "learning_rate": 0.000545550439973578, "loss": 3.188, "step": 11925 }, { "epoch": 0.58, "grad_norm": 0.5571303963661194, "learning_rate": 0.0005455415944704966, "loss": 3.2725, "step": 11926 }, { "epoch": 0.58, "grad_norm": 0.4836958944797516, "learning_rate": 0.0005455327483207057, "loss": 3.2533, "step": 11927 }, { "epoch": 0.58, "grad_norm": 0.49969571828842163, "learning_rate": 0.0005455239015242289, "loss": 3.3009, "step": 11928 }, { "epoch": 0.58, "grad_norm": 0.5281708240509033, "learning_rate": 0.0005455150540810894, "loss": 3.2464, "step": 11929 }, { "epoch": 0.58, "grad_norm": 0.5075505375862122, "learning_rate": 0.0005455062059913106, "loss": 3.2988, "step": 11930 }, { "epoch": 0.58, "grad_norm": 0.5302159190177917, "learning_rate": 0.0005454973572549156, "loss": 3.2058, "step": 11931 }, { "epoch": 0.58, "grad_norm": 0.5521146059036255, "learning_rate": 0.0005454885078719277, "loss": 3.3473, "step": 11932 }, { "epoch": 0.58, "grad_norm": 0.5224472880363464, "learning_rate": 0.0005454796578423705, "loss": 3.1948, "step": 11933 }, { "epoch": 0.58, "grad_norm": 0.5724395513534546, "learning_rate": 0.000545470807166267, "loss": 3.4027, "step": 11934 }, { "epoch": 0.58, "grad_norm": 0.5843696594238281, "learning_rate": 0.0005454619558436407, "loss": 3.277, "step": 11935 }, { "epoch": 0.58, "grad_norm": 0.5131762027740479, "learning_rate": 0.0005454531038745148, "loss": 3.3245, "step": 11936 }, { "epoch": 0.59, "grad_norm": 0.5358636975288391, "learning_rate": 0.0005454442512589127, "loss": 3.1341, "step": 11937 }, { "epoch": 0.59, "grad_norm": 0.5540893077850342, "learning_rate": 0.0005454353979968576, "loss": 3.2948, "step": 11938 }, { "epoch": 0.59, "grad_norm": 0.48805323243141174, "learning_rate": 0.000545426544088373, "loss": 3.2973, "step": 11939 }, { "epoch": 0.59, "grad_norm": 0.4978611171245575, "learning_rate": 0.0005454176895334822, "loss": 3.3075, "step": 11940 }, { "epoch": 0.59, "grad_norm": 0.49105244874954224, "learning_rate": 0.0005454088343322083, "loss": 3.202, "step": 11941 }, { "epoch": 0.59, "grad_norm": 0.4822368621826172, "learning_rate": 0.0005453999784845747, "loss": 3.2363, "step": 11942 }, { "epoch": 0.59, "grad_norm": 0.5346906185150146, "learning_rate": 0.0005453911219906049, "loss": 3.2, "step": 11943 }, { "epoch": 0.59, "grad_norm": 0.5279293060302734, "learning_rate": 0.0005453822648503221, "loss": 3.1051, "step": 11944 }, { "epoch": 0.59, "grad_norm": 0.5374941825866699, "learning_rate": 0.0005453734070637496, "loss": 3.2868, "step": 11945 }, { "epoch": 0.59, "grad_norm": 0.5096423625946045, "learning_rate": 0.0005453645486309109, "loss": 3.2491, "step": 11946 }, { "epoch": 0.59, "grad_norm": 0.5027688145637512, "learning_rate": 0.000545355689551829, "loss": 3.252, "step": 11947 }, { "epoch": 0.59, "grad_norm": 0.5206319689750671, "learning_rate": 0.0005453468298265275, "loss": 3.3398, "step": 11948 }, { "epoch": 0.59, "grad_norm": 0.5208166241645813, "learning_rate": 0.0005453379694550297, "loss": 3.3007, "step": 11949 }, { "epoch": 0.59, "grad_norm": 0.5166978240013123, "learning_rate": 0.0005453291084373589, "loss": 3.2176, "step": 11950 }, { "epoch": 0.59, "grad_norm": 0.5170909762382507, "learning_rate": 0.0005453202467735384, "loss": 3.1967, "step": 11951 }, { "epoch": 0.59, "grad_norm": 0.5434339642524719, "learning_rate": 0.0005453113844635916, "loss": 3.2238, "step": 11952 }, { "epoch": 0.59, "grad_norm": 0.4791112244129181, "learning_rate": 0.0005453025215075419, "loss": 3.1606, "step": 11953 }, { "epoch": 0.59, "grad_norm": 0.49351251125335693, "learning_rate": 0.0005452936579054125, "loss": 3.0225, "step": 11954 }, { "epoch": 0.59, "grad_norm": 0.5247668623924255, "learning_rate": 0.0005452847936572268, "loss": 3.0996, "step": 11955 }, { "epoch": 0.59, "grad_norm": 0.5701345801353455, "learning_rate": 0.0005452759287630081, "loss": 3.3449, "step": 11956 }, { "epoch": 0.59, "grad_norm": 0.5012340545654297, "learning_rate": 0.00054526706322278, "loss": 3.1187, "step": 11957 }, { "epoch": 0.59, "grad_norm": 0.5431010723114014, "learning_rate": 0.0005452581970365655, "loss": 3.3408, "step": 11958 }, { "epoch": 0.59, "grad_norm": 0.5214460492134094, "learning_rate": 0.0005452493302043882, "loss": 3.1243, "step": 11959 }, { "epoch": 0.59, "grad_norm": 0.49427273869514465, "learning_rate": 0.0005452404627262713, "loss": 3.08, "step": 11960 }, { "epoch": 0.59, "grad_norm": 0.5280971527099609, "learning_rate": 0.0005452315946022383, "loss": 3.253, "step": 11961 }, { "epoch": 0.59, "grad_norm": 0.5085782408714294, "learning_rate": 0.0005452227258323124, "loss": 3.1777, "step": 11962 }, { "epoch": 0.59, "grad_norm": 0.5629658102989197, "learning_rate": 0.000545213856416517, "loss": 3.2163, "step": 11963 }, { "epoch": 0.59, "grad_norm": 0.49250122904777527, "learning_rate": 0.0005452049863548756, "loss": 3.1596, "step": 11964 }, { "epoch": 0.59, "grad_norm": 0.5141192078590393, "learning_rate": 0.0005451961156474113, "loss": 3.182, "step": 11965 }, { "epoch": 0.59, "grad_norm": 0.4984816908836365, "learning_rate": 0.0005451872442941478, "loss": 3.0893, "step": 11966 }, { "epoch": 0.59, "grad_norm": 0.4933840334415436, "learning_rate": 0.0005451783722951082, "loss": 3.0771, "step": 11967 }, { "epoch": 0.59, "grad_norm": 0.564106822013855, "learning_rate": 0.000545169499650316, "loss": 3.4349, "step": 11968 }, { "epoch": 0.59, "grad_norm": 0.4997556805610657, "learning_rate": 0.0005451606263597945, "loss": 3.2064, "step": 11969 }, { "epoch": 0.59, "grad_norm": 0.4757207930088043, "learning_rate": 0.000545151752423567, "loss": 3.1864, "step": 11970 }, { "epoch": 0.59, "grad_norm": 0.4952181577682495, "learning_rate": 0.0005451428778416571, "loss": 3.0965, "step": 11971 }, { "epoch": 0.59, "grad_norm": 0.5092923045158386, "learning_rate": 0.0005451340026140879, "loss": 3.2239, "step": 11972 }, { "epoch": 0.59, "grad_norm": 0.5279220342636108, "learning_rate": 0.000545125126740883, "loss": 3.4003, "step": 11973 }, { "epoch": 0.59, "grad_norm": 0.4941803216934204, "learning_rate": 0.0005451162502220657, "loss": 3.2384, "step": 11974 }, { "epoch": 0.59, "grad_norm": 0.5303682684898376, "learning_rate": 0.0005451073730576594, "loss": 3.373, "step": 11975 }, { "epoch": 0.59, "grad_norm": 0.548751175403595, "learning_rate": 0.0005450984952476874, "loss": 3.1797, "step": 11976 }, { "epoch": 0.59, "grad_norm": 0.509413480758667, "learning_rate": 0.0005450896167921731, "loss": 3.1192, "step": 11977 }, { "epoch": 0.59, "grad_norm": 0.5299882888793945, "learning_rate": 0.00054508073769114, "loss": 2.747, "step": 11978 }, { "epoch": 0.59, "grad_norm": 0.5232790112495422, "learning_rate": 0.0005450718579446113, "loss": 3.3651, "step": 11979 }, { "epoch": 0.59, "grad_norm": 0.5589876174926758, "learning_rate": 0.0005450629775526106, "loss": 3.2521, "step": 11980 }, { "epoch": 0.59, "grad_norm": 0.4836844205856323, "learning_rate": 0.0005450540965151612, "loss": 3.0705, "step": 11981 }, { "epoch": 0.59, "grad_norm": 0.5362475514411926, "learning_rate": 0.0005450452148322864, "loss": 3.1405, "step": 11982 }, { "epoch": 0.59, "grad_norm": 0.5328556895256042, "learning_rate": 0.0005450363325040096, "loss": 3.2575, "step": 11983 }, { "epoch": 0.59, "grad_norm": 0.5442496538162231, "learning_rate": 0.0005450274495303544, "loss": 3.1584, "step": 11984 }, { "epoch": 0.59, "grad_norm": 0.5376067757606506, "learning_rate": 0.0005450185659113439, "loss": 3.3907, "step": 11985 }, { "epoch": 0.59, "grad_norm": 0.5082165598869324, "learning_rate": 0.0005450096816470017, "loss": 3.1008, "step": 11986 }, { "epoch": 0.59, "grad_norm": 0.4951854944229126, "learning_rate": 0.0005450007967373512, "loss": 3.1832, "step": 11987 }, { "epoch": 0.59, "grad_norm": 0.5797527432441711, "learning_rate": 0.0005449919111824157, "loss": 3.1662, "step": 11988 }, { "epoch": 0.59, "grad_norm": 0.5416431427001953, "learning_rate": 0.0005449830249822187, "loss": 3.1162, "step": 11989 }, { "epoch": 0.59, "grad_norm": 0.5033136010169983, "learning_rate": 0.0005449741381367836, "loss": 3.1388, "step": 11990 }, { "epoch": 0.59, "grad_norm": 0.5002380609512329, "learning_rate": 0.0005449652506461337, "loss": 3.31, "step": 11991 }, { "epoch": 0.59, "grad_norm": 0.49132394790649414, "learning_rate": 0.0005449563625102926, "loss": 3.1882, "step": 11992 }, { "epoch": 0.59, "grad_norm": 0.4861402213573456, "learning_rate": 0.0005449474737292834, "loss": 3.4393, "step": 11993 }, { "epoch": 0.59, "grad_norm": 0.5278277397155762, "learning_rate": 0.0005449385843031298, "loss": 3.2842, "step": 11994 }, { "epoch": 0.59, "grad_norm": 0.5361764430999756, "learning_rate": 0.000544929694231855, "loss": 3.2113, "step": 11995 }, { "epoch": 0.59, "grad_norm": 0.5028777122497559, "learning_rate": 0.0005449208035154827, "loss": 3.294, "step": 11996 }, { "epoch": 0.59, "grad_norm": 0.5272602438926697, "learning_rate": 0.000544911912154036, "loss": 3.3859, "step": 11997 }, { "epoch": 0.59, "grad_norm": 0.5307137966156006, "learning_rate": 0.0005449030201475385, "loss": 3.1559, "step": 11998 }, { "epoch": 0.59, "grad_norm": 0.510999858379364, "learning_rate": 0.0005448941274960136, "loss": 3.1829, "step": 11999 }, { "epoch": 0.59, "grad_norm": 0.5812216997146606, "learning_rate": 0.0005448852341994846, "loss": 3.1065, "step": 12000 }, { "epoch": 0.59, "grad_norm": 0.5590925812721252, "learning_rate": 0.0005448763402579752, "loss": 3.1302, "step": 12001 }, { "epoch": 0.59, "grad_norm": 0.5319423079490662, "learning_rate": 0.0005448674456715085, "loss": 2.961, "step": 12002 }, { "epoch": 0.59, "grad_norm": 0.5174445509910583, "learning_rate": 0.0005448585504401082, "loss": 2.9525, "step": 12003 }, { "epoch": 0.59, "grad_norm": 0.5351834893226624, "learning_rate": 0.0005448496545637975, "loss": 3.1183, "step": 12004 }, { "epoch": 0.59, "grad_norm": 0.5270566940307617, "learning_rate": 0.0005448407580425999, "loss": 3.2682, "step": 12005 }, { "epoch": 0.59, "grad_norm": 0.5064947009086609, "learning_rate": 0.000544831860876539, "loss": 3.3034, "step": 12006 }, { "epoch": 0.59, "grad_norm": 0.5744919180870056, "learning_rate": 0.0005448229630656379, "loss": 3.0479, "step": 12007 }, { "epoch": 0.59, "grad_norm": 0.5197696089744568, "learning_rate": 0.0005448140646099204, "loss": 3.0092, "step": 12008 }, { "epoch": 0.59, "grad_norm": 0.52765291929245, "learning_rate": 0.0005448051655094098, "loss": 3.1997, "step": 12009 }, { "epoch": 0.59, "grad_norm": 0.5932416915893555, "learning_rate": 0.0005447962657641294, "loss": 3.3712, "step": 12010 }, { "epoch": 0.59, "grad_norm": 0.5595377087593079, "learning_rate": 0.0005447873653741027, "loss": 3.0408, "step": 12011 }, { "epoch": 0.59, "grad_norm": 0.5186452269554138, "learning_rate": 0.0005447784643393533, "loss": 3.5143, "step": 12012 }, { "epoch": 0.59, "grad_norm": 0.5053780674934387, "learning_rate": 0.0005447695626599045, "loss": 3.2042, "step": 12013 }, { "epoch": 0.59, "grad_norm": 0.5170993208885193, "learning_rate": 0.0005447606603357798, "loss": 3.0966, "step": 12014 }, { "epoch": 0.59, "grad_norm": 0.5181581377983093, "learning_rate": 0.0005447517573670026, "loss": 3.1413, "step": 12015 }, { "epoch": 0.59, "grad_norm": 0.48989567160606384, "learning_rate": 0.0005447428537535963, "loss": 3.1706, "step": 12016 }, { "epoch": 0.59, "grad_norm": 0.5295213460922241, "learning_rate": 0.0005447339494955845, "loss": 3.2038, "step": 12017 }, { "epoch": 0.59, "grad_norm": 0.5177062749862671, "learning_rate": 0.0005447250445929906, "loss": 3.3209, "step": 12018 }, { "epoch": 0.59, "grad_norm": 0.5109267234802246, "learning_rate": 0.0005447161390458381, "loss": 3.2872, "step": 12019 }, { "epoch": 0.59, "grad_norm": 0.5133575201034546, "learning_rate": 0.0005447072328541502, "loss": 3.2706, "step": 12020 }, { "epoch": 0.59, "grad_norm": 0.5542067289352417, "learning_rate": 0.0005446983260179506, "loss": 3.3886, "step": 12021 }, { "epoch": 0.59, "grad_norm": 0.537536084651947, "learning_rate": 0.0005446894185372628, "loss": 3.4365, "step": 12022 }, { "epoch": 0.59, "grad_norm": 0.5550865530967712, "learning_rate": 0.0005446805104121101, "loss": 3.3174, "step": 12023 }, { "epoch": 0.59, "grad_norm": 0.5242879390716553, "learning_rate": 0.000544671601642516, "loss": 3.1944, "step": 12024 }, { "epoch": 0.59, "grad_norm": 0.5210665464401245, "learning_rate": 0.000544662692228504, "loss": 3.1458, "step": 12025 }, { "epoch": 0.59, "grad_norm": 0.48247066140174866, "learning_rate": 0.0005446537821700977, "loss": 3.376, "step": 12026 }, { "epoch": 0.59, "grad_norm": 0.5202993154525757, "learning_rate": 0.0005446448714673202, "loss": 3.286, "step": 12027 }, { "epoch": 0.59, "grad_norm": 0.5529611706733704, "learning_rate": 0.0005446359601201953, "loss": 3.0672, "step": 12028 }, { "epoch": 0.59, "grad_norm": 0.536654531955719, "learning_rate": 0.0005446270481287463, "loss": 3.1053, "step": 12029 }, { "epoch": 0.59, "grad_norm": 0.5165926814079285, "learning_rate": 0.0005446181354929969, "loss": 3.3346, "step": 12030 }, { "epoch": 0.59, "grad_norm": 0.5090608596801758, "learning_rate": 0.0005446092222129703, "loss": 3.1495, "step": 12031 }, { "epoch": 0.59, "grad_norm": 0.5462985038757324, "learning_rate": 0.00054460030828869, "loss": 3.0608, "step": 12032 }, { "epoch": 0.59, "grad_norm": 0.48308515548706055, "learning_rate": 0.0005445913937201797, "loss": 2.9185, "step": 12033 }, { "epoch": 0.59, "grad_norm": 0.5395585894584656, "learning_rate": 0.0005445824785074627, "loss": 3.3225, "step": 12034 }, { "epoch": 0.59, "grad_norm": 0.4958081543445587, "learning_rate": 0.0005445735626505624, "loss": 3.2987, "step": 12035 }, { "epoch": 0.59, "grad_norm": 0.5074117183685303, "learning_rate": 0.0005445646461495025, "loss": 3.2805, "step": 12036 }, { "epoch": 0.59, "grad_norm": 0.5241624712944031, "learning_rate": 0.0005445557290043064, "loss": 3.3391, "step": 12037 }, { "epoch": 0.59, "grad_norm": 0.520176112651825, "learning_rate": 0.0005445468112149977, "loss": 3.4373, "step": 12038 }, { "epoch": 0.59, "grad_norm": 0.5940264463424683, "learning_rate": 0.0005445378927815995, "loss": 3.0772, "step": 12039 }, { "epoch": 0.59, "grad_norm": 0.5212997794151306, "learning_rate": 0.0005445289737041357, "loss": 3.279, "step": 12040 }, { "epoch": 0.59, "grad_norm": 0.5169647336006165, "learning_rate": 0.0005445200539826297, "loss": 3.2505, "step": 12041 }, { "epoch": 0.59, "grad_norm": 0.5246007442474365, "learning_rate": 0.0005445111336171048, "loss": 3.4347, "step": 12042 }, { "epoch": 0.59, "grad_norm": 0.524836003780365, "learning_rate": 0.0005445022126075847, "loss": 3.0207, "step": 12043 }, { "epoch": 0.59, "grad_norm": 0.490573525428772, "learning_rate": 0.0005444932909540928, "loss": 3.2743, "step": 12044 }, { "epoch": 0.59, "grad_norm": 0.5303407907485962, "learning_rate": 0.0005444843686566528, "loss": 3.2337, "step": 12045 }, { "epoch": 0.59, "grad_norm": 0.5297519564628601, "learning_rate": 0.0005444754457152878, "loss": 3.3453, "step": 12046 }, { "epoch": 0.59, "grad_norm": 0.6097574830055237, "learning_rate": 0.0005444665221300216, "loss": 3.0906, "step": 12047 }, { "epoch": 0.59, "grad_norm": 0.5111755132675171, "learning_rate": 0.0005444575979008777, "loss": 3.4244, "step": 12048 }, { "epoch": 0.59, "grad_norm": 0.5661916136741638, "learning_rate": 0.0005444486730278794, "loss": 3.2577, "step": 12049 }, { "epoch": 0.59, "grad_norm": 0.5134075880050659, "learning_rate": 0.0005444397475110504, "loss": 3.1203, "step": 12050 }, { "epoch": 0.59, "grad_norm": 0.5311046242713928, "learning_rate": 0.0005444308213504143, "loss": 3.376, "step": 12051 }, { "epoch": 0.59, "grad_norm": 0.5159074068069458, "learning_rate": 0.0005444218945459943, "loss": 3.2704, "step": 12052 }, { "epoch": 0.59, "grad_norm": 0.5199549794197083, "learning_rate": 0.000544412967097814, "loss": 3.3863, "step": 12053 }, { "epoch": 0.59, "grad_norm": 0.5141023397445679, "learning_rate": 0.000544404039005897, "loss": 3.1942, "step": 12054 }, { "epoch": 0.59, "grad_norm": 0.5393960475921631, "learning_rate": 0.000544395110270267, "loss": 3.2088, "step": 12055 }, { "epoch": 0.59, "grad_norm": 0.5226369500160217, "learning_rate": 0.0005443861808909472, "loss": 3.2853, "step": 12056 }, { "epoch": 0.59, "grad_norm": 0.5066924691200256, "learning_rate": 0.0005443772508679611, "loss": 3.0143, "step": 12057 }, { "epoch": 0.59, "grad_norm": 0.5037077069282532, "learning_rate": 0.0005443683202013325, "loss": 3.4013, "step": 12058 }, { "epoch": 0.59, "grad_norm": 0.534941554069519, "learning_rate": 0.0005443593888910847, "loss": 3.2482, "step": 12059 }, { "epoch": 0.59, "grad_norm": 0.5512545108795166, "learning_rate": 0.0005443504569372414, "loss": 3.177, "step": 12060 }, { "epoch": 0.59, "grad_norm": 0.5608600974082947, "learning_rate": 0.000544341524339826, "loss": 3.3516, "step": 12061 }, { "epoch": 0.59, "grad_norm": 0.5026220679283142, "learning_rate": 0.0005443325910988619, "loss": 3.2668, "step": 12062 }, { "epoch": 0.59, "grad_norm": 0.5782676935195923, "learning_rate": 0.0005443236572143729, "loss": 3.1726, "step": 12063 }, { "epoch": 0.59, "grad_norm": 0.5567470788955688, "learning_rate": 0.0005443147226863824, "loss": 3.1268, "step": 12064 }, { "epoch": 0.59, "grad_norm": 0.4905277192592621, "learning_rate": 0.000544305787514914, "loss": 3.0573, "step": 12065 }, { "epoch": 0.59, "grad_norm": 0.5269580483436584, "learning_rate": 0.000544296851699991, "loss": 3.2479, "step": 12066 }, { "epoch": 0.59, "grad_norm": 0.5122688412666321, "learning_rate": 0.0005442879152416373, "loss": 3.2095, "step": 12067 }, { "epoch": 0.59, "grad_norm": 0.4919382631778717, "learning_rate": 0.0005442789781398761, "loss": 3.3365, "step": 12068 }, { "epoch": 0.59, "grad_norm": 0.5227946639060974, "learning_rate": 0.0005442700403947312, "loss": 2.9657, "step": 12069 }, { "epoch": 0.59, "grad_norm": 0.5085049867630005, "learning_rate": 0.0005442611020062259, "loss": 3.358, "step": 12070 }, { "epoch": 0.59, "grad_norm": 0.5325589776039124, "learning_rate": 0.0005442521629743839, "loss": 3.1012, "step": 12071 }, { "epoch": 0.59, "grad_norm": 0.5010466575622559, "learning_rate": 0.0005442432232992289, "loss": 3.1746, "step": 12072 }, { "epoch": 0.59, "grad_norm": 0.5115364193916321, "learning_rate": 0.0005442342829807841, "loss": 3.1592, "step": 12073 }, { "epoch": 0.59, "grad_norm": 0.4746970236301422, "learning_rate": 0.0005442253420190732, "loss": 3.3611, "step": 12074 }, { "epoch": 0.59, "grad_norm": 0.5140778422355652, "learning_rate": 0.0005442164004141196, "loss": 3.2885, "step": 12075 }, { "epoch": 0.59, "grad_norm": 0.480934202671051, "learning_rate": 0.0005442074581659472, "loss": 3.3753, "step": 12076 }, { "epoch": 0.59, "grad_norm": 0.4761304557323456, "learning_rate": 0.0005441985152745794, "loss": 3.2269, "step": 12077 }, { "epoch": 0.59, "grad_norm": 0.5257875323295593, "learning_rate": 0.0005441895717400396, "loss": 3.1432, "step": 12078 }, { "epoch": 0.59, "grad_norm": 0.5360661745071411, "learning_rate": 0.0005441806275623515, "loss": 3.1401, "step": 12079 }, { "epoch": 0.59, "grad_norm": 0.48718738555908203, "learning_rate": 0.0005441716827415385, "loss": 3.3443, "step": 12080 }, { "epoch": 0.59, "grad_norm": 0.4796687066555023, "learning_rate": 0.0005441627372776244, "loss": 3.2142, "step": 12081 }, { "epoch": 0.59, "grad_norm": 0.47105666995048523, "learning_rate": 0.0005441537911706327, "loss": 3.0479, "step": 12082 }, { "epoch": 0.59, "grad_norm": 0.5267301201820374, "learning_rate": 0.0005441448444205868, "loss": 3.3318, "step": 12083 }, { "epoch": 0.59, "grad_norm": 0.5082873106002808, "learning_rate": 0.0005441358970275104, "loss": 3.2415, "step": 12084 }, { "epoch": 0.59, "grad_norm": 0.5294603705406189, "learning_rate": 0.000544126948991427, "loss": 3.1729, "step": 12085 }, { "epoch": 0.59, "grad_norm": 0.536239504814148, "learning_rate": 0.0005441180003123603, "loss": 3.1646, "step": 12086 }, { "epoch": 0.59, "grad_norm": 0.5099353194236755, "learning_rate": 0.0005441090509903336, "loss": 3.2599, "step": 12087 }, { "epoch": 0.59, "grad_norm": 0.5732430219650269, "learning_rate": 0.0005441001010253707, "loss": 3.2231, "step": 12088 }, { "epoch": 0.59, "grad_norm": 0.5284761786460876, "learning_rate": 0.0005440911504174952, "loss": 3.5178, "step": 12089 }, { "epoch": 0.59, "grad_norm": 0.5436829924583435, "learning_rate": 0.0005440821991667306, "loss": 3.4769, "step": 12090 }, { "epoch": 0.59, "grad_norm": 0.5140102505683899, "learning_rate": 0.0005440732472731004, "loss": 3.2641, "step": 12091 }, { "epoch": 0.59, "grad_norm": 0.5060403347015381, "learning_rate": 0.0005440642947366281, "loss": 3.1533, "step": 12092 }, { "epoch": 0.59, "grad_norm": 0.5052133798599243, "learning_rate": 0.0005440553415573375, "loss": 3.3999, "step": 12093 }, { "epoch": 0.59, "grad_norm": 0.6156617999076843, "learning_rate": 0.0005440463877352522, "loss": 3.3402, "step": 12094 }, { "epoch": 0.59, "grad_norm": 0.5013222098350525, "learning_rate": 0.0005440374332703956, "loss": 3.1037, "step": 12095 }, { "epoch": 0.59, "grad_norm": 0.515588641166687, "learning_rate": 0.0005440284781627915, "loss": 3.2543, "step": 12096 }, { "epoch": 0.59, "grad_norm": 0.49556267261505127, "learning_rate": 0.0005440195224124633, "loss": 3.2262, "step": 12097 }, { "epoch": 0.59, "grad_norm": 0.528473436832428, "learning_rate": 0.0005440105660194345, "loss": 3.1852, "step": 12098 }, { "epoch": 0.59, "grad_norm": 0.5471101999282837, "learning_rate": 0.0005440016089837289, "loss": 3.2124, "step": 12099 }, { "epoch": 0.59, "grad_norm": 0.5259342193603516, "learning_rate": 0.0005439926513053701, "loss": 3.318, "step": 12100 }, { "epoch": 0.59, "grad_norm": 0.4963459372520447, "learning_rate": 0.0005439836929843815, "loss": 3.2905, "step": 12101 }, { "epoch": 0.59, "grad_norm": 0.5480161309242249, "learning_rate": 0.0005439747340207869, "loss": 3.1982, "step": 12102 }, { "epoch": 0.59, "grad_norm": 0.4799657464027405, "learning_rate": 0.0005439657744146098, "loss": 3.4103, "step": 12103 }, { "epoch": 0.59, "grad_norm": 0.4825492203235626, "learning_rate": 0.0005439568141658738, "loss": 3.2936, "step": 12104 }, { "epoch": 0.59, "grad_norm": 0.5499624609947205, "learning_rate": 0.0005439478532746024, "loss": 3.1542, "step": 12105 }, { "epoch": 0.59, "grad_norm": 0.5210323333740234, "learning_rate": 0.0005439388917408194, "loss": 3.364, "step": 12106 }, { "epoch": 0.59, "grad_norm": 0.5199629664421082, "learning_rate": 0.0005439299295645482, "loss": 2.9365, "step": 12107 }, { "epoch": 0.59, "grad_norm": 0.5032206773757935, "learning_rate": 0.0005439209667458127, "loss": 3.0098, "step": 12108 }, { "epoch": 0.59, "grad_norm": 0.5243465304374695, "learning_rate": 0.0005439120032846361, "loss": 3.1939, "step": 12109 }, { "epoch": 0.59, "grad_norm": 0.4972172975540161, "learning_rate": 0.0005439030391810424, "loss": 3.5505, "step": 12110 }, { "epoch": 0.59, "grad_norm": 0.5937232971191406, "learning_rate": 0.000543894074435055, "loss": 3.0929, "step": 12111 }, { "epoch": 0.59, "grad_norm": 0.5489984154701233, "learning_rate": 0.0005438851090466975, "loss": 3.2707, "step": 12112 }, { "epoch": 0.59, "grad_norm": 0.5671371221542358, "learning_rate": 0.0005438761430159936, "loss": 3.1847, "step": 12113 }, { "epoch": 0.59, "grad_norm": 0.5630182027816772, "learning_rate": 0.0005438671763429668, "loss": 3.1924, "step": 12114 }, { "epoch": 0.59, "grad_norm": 0.5923741459846497, "learning_rate": 0.0005438582090276408, "loss": 3.3754, "step": 12115 }, { "epoch": 0.59, "grad_norm": 0.5446171164512634, "learning_rate": 0.0005438492410700394, "loss": 3.2884, "step": 12116 }, { "epoch": 0.59, "grad_norm": 0.5040573477745056, "learning_rate": 0.0005438402724701857, "loss": 3.4074, "step": 12117 }, { "epoch": 0.59, "grad_norm": 0.5180044174194336, "learning_rate": 0.0005438313032281039, "loss": 3.0427, "step": 12118 }, { "epoch": 0.59, "grad_norm": 0.5119923949241638, "learning_rate": 0.0005438223333438173, "loss": 3.0863, "step": 12119 }, { "epoch": 0.59, "grad_norm": 0.5041249394416809, "learning_rate": 0.0005438133628173496, "loss": 3.0335, "step": 12120 }, { "epoch": 0.59, "grad_norm": 0.5423731207847595, "learning_rate": 0.0005438043916487245, "loss": 3.2203, "step": 12121 }, { "epoch": 0.59, "grad_norm": 0.5120278000831604, "learning_rate": 0.0005437954198379654, "loss": 3.2297, "step": 12122 }, { "epoch": 0.59, "grad_norm": 0.5203986167907715, "learning_rate": 0.0005437864473850962, "loss": 3.1436, "step": 12123 }, { "epoch": 0.59, "grad_norm": 0.5062956213951111, "learning_rate": 0.0005437774742901403, "loss": 3.225, "step": 12124 }, { "epoch": 0.59, "grad_norm": 0.5282447934150696, "learning_rate": 0.0005437685005531216, "loss": 3.3557, "step": 12125 }, { "epoch": 0.59, "grad_norm": 0.5864943861961365, "learning_rate": 0.0005437595261740635, "loss": 3.3101, "step": 12126 }, { "epoch": 0.59, "grad_norm": 0.5011259913444519, "learning_rate": 0.0005437505511529897, "loss": 3.2331, "step": 12127 }, { "epoch": 0.59, "grad_norm": 0.5472565293312073, "learning_rate": 0.0005437415754899239, "loss": 3.4375, "step": 12128 }, { "epoch": 0.59, "grad_norm": 0.49552929401397705, "learning_rate": 0.0005437325991848896, "loss": 3.3217, "step": 12129 }, { "epoch": 0.59, "grad_norm": 0.5005829334259033, "learning_rate": 0.0005437236222379107, "loss": 3.3369, "step": 12130 }, { "epoch": 0.59, "grad_norm": 0.5018550753593445, "learning_rate": 0.0005437146446490107, "loss": 3.3297, "step": 12131 }, { "epoch": 0.59, "grad_norm": 0.5203503370285034, "learning_rate": 0.000543705666418213, "loss": 3.2202, "step": 12132 }, { "epoch": 0.59, "grad_norm": 0.568457841873169, "learning_rate": 0.0005436966875455417, "loss": 3.3885, "step": 12133 }, { "epoch": 0.59, "grad_norm": 0.5276675224304199, "learning_rate": 0.0005436877080310202, "loss": 3.2712, "step": 12134 }, { "epoch": 0.59, "grad_norm": 0.5073103308677673, "learning_rate": 0.000543678727874672, "loss": 3.3254, "step": 12135 }, { "epoch": 0.59, "grad_norm": 0.48787346482276917, "learning_rate": 0.0005436697470765212, "loss": 3.3454, "step": 12136 }, { "epoch": 0.59, "grad_norm": 0.5986006259918213, "learning_rate": 0.000543660765636591, "loss": 3.2068, "step": 12137 }, { "epoch": 0.59, "grad_norm": 0.5651094317436218, "learning_rate": 0.0005436517835549052, "loss": 3.1003, "step": 12138 }, { "epoch": 0.59, "grad_norm": 0.49831265211105347, "learning_rate": 0.0005436428008314876, "loss": 3.2614, "step": 12139 }, { "epoch": 0.59, "grad_norm": 0.5459215044975281, "learning_rate": 0.0005436338174663617, "loss": 3.2675, "step": 12140 }, { "epoch": 0.6, "grad_norm": 0.5546776652336121, "learning_rate": 0.0005436248334595512, "loss": 3.3903, "step": 12141 }, { "epoch": 0.6, "grad_norm": 0.518644392490387, "learning_rate": 0.0005436158488110798, "loss": 3.2527, "step": 12142 }, { "epoch": 0.6, "grad_norm": 0.5478095412254333, "learning_rate": 0.0005436068635209712, "loss": 3.0529, "step": 12143 }, { "epoch": 0.6, "grad_norm": 0.497527152299881, "learning_rate": 0.0005435978775892489, "loss": 3.2598, "step": 12144 }, { "epoch": 0.6, "grad_norm": 0.5034574270248413, "learning_rate": 0.0005435888910159367, "loss": 3.1994, "step": 12145 }, { "epoch": 0.6, "grad_norm": 0.5118417739868164, "learning_rate": 0.0005435799038010583, "loss": 3.4717, "step": 12146 }, { "epoch": 0.6, "grad_norm": 0.5360592007637024, "learning_rate": 0.0005435709159446374, "loss": 3.2304, "step": 12147 }, { "epoch": 0.6, "grad_norm": 0.4851125478744507, "learning_rate": 0.0005435619274466974, "loss": 3.1554, "step": 12148 }, { "epoch": 0.6, "grad_norm": 0.4884193539619446, "learning_rate": 0.0005435529383072622, "loss": 3.0128, "step": 12149 }, { "epoch": 0.6, "grad_norm": 0.5373410582542419, "learning_rate": 0.0005435439485263554, "loss": 3.152, "step": 12150 }, { "epoch": 0.6, "grad_norm": 0.5832005143165588, "learning_rate": 0.0005435349581040007, "loss": 3.1407, "step": 12151 }, { "epoch": 0.6, "grad_norm": 0.5295130014419556, "learning_rate": 0.0005435259670402219, "loss": 3.1837, "step": 12152 }, { "epoch": 0.6, "grad_norm": 0.5200740694999695, "learning_rate": 0.0005435169753350425, "loss": 3.1467, "step": 12153 }, { "epoch": 0.6, "grad_norm": 0.5249675512313843, "learning_rate": 0.0005435079829884862, "loss": 3.1414, "step": 12154 }, { "epoch": 0.6, "grad_norm": 0.5205420851707458, "learning_rate": 0.0005434989900005769, "loss": 3.291, "step": 12155 }, { "epoch": 0.6, "grad_norm": 0.4993976950645447, "learning_rate": 0.000543489996371338, "loss": 3.1936, "step": 12156 }, { "epoch": 0.6, "grad_norm": 0.49298644065856934, "learning_rate": 0.0005434810021007933, "loss": 3.2909, "step": 12157 }, { "epoch": 0.6, "grad_norm": 0.5064563751220703, "learning_rate": 0.0005434720071889666, "loss": 3.2605, "step": 12158 }, { "epoch": 0.6, "grad_norm": 0.4936967194080353, "learning_rate": 0.0005434630116358814, "loss": 3.1549, "step": 12159 }, { "epoch": 0.6, "grad_norm": 0.4993574917316437, "learning_rate": 0.0005434540154415615, "loss": 3.1781, "step": 12160 }, { "epoch": 0.6, "grad_norm": 0.5252702236175537, "learning_rate": 0.0005434450186060305, "loss": 3.3435, "step": 12161 }, { "epoch": 0.6, "grad_norm": 0.5163652896881104, "learning_rate": 0.0005434360211293123, "loss": 3.0923, "step": 12162 }, { "epoch": 0.6, "grad_norm": 0.5049394965171814, "learning_rate": 0.0005434270230114305, "loss": 3.1294, "step": 12163 }, { "epoch": 0.6, "grad_norm": 0.578566312789917, "learning_rate": 0.0005434180242524086, "loss": 3.3308, "step": 12164 }, { "epoch": 0.6, "grad_norm": 0.5135256052017212, "learning_rate": 0.0005434090248522706, "loss": 3.1645, "step": 12165 }, { "epoch": 0.6, "grad_norm": 0.4964609146118164, "learning_rate": 0.00054340002481104, "loss": 3.3597, "step": 12166 }, { "epoch": 0.6, "grad_norm": 0.5104126334190369, "learning_rate": 0.0005433910241287407, "loss": 2.9874, "step": 12167 }, { "epoch": 0.6, "grad_norm": 0.6239469051361084, "learning_rate": 0.0005433820228053962, "loss": 3.2153, "step": 12168 }, { "epoch": 0.6, "grad_norm": 0.4951239824295044, "learning_rate": 0.0005433730208410303, "loss": 3.0318, "step": 12169 }, { "epoch": 0.6, "grad_norm": 0.506628155708313, "learning_rate": 0.0005433640182356666, "loss": 3.3427, "step": 12170 }, { "epoch": 0.6, "grad_norm": 0.6572966575622559, "learning_rate": 0.0005433550149893291, "loss": 3.2291, "step": 12171 }, { "epoch": 0.6, "grad_norm": 0.5308460593223572, "learning_rate": 0.0005433460111020413, "loss": 3.1009, "step": 12172 }, { "epoch": 0.6, "grad_norm": 0.5671391487121582, "learning_rate": 0.0005433370065738268, "loss": 3.1697, "step": 12173 }, { "epoch": 0.6, "grad_norm": 0.5267753005027771, "learning_rate": 0.0005433280014047095, "loss": 3.2779, "step": 12174 }, { "epoch": 0.6, "grad_norm": 0.5052096247673035, "learning_rate": 0.0005433189955947131, "loss": 3.2854, "step": 12175 }, { "epoch": 0.6, "grad_norm": 0.5118441581726074, "learning_rate": 0.0005433099891438614, "loss": 3.276, "step": 12176 }, { "epoch": 0.6, "grad_norm": 0.504606306552887, "learning_rate": 0.0005433009820521779, "loss": 3.3353, "step": 12177 }, { "epoch": 0.6, "grad_norm": 0.5112560391426086, "learning_rate": 0.0005432919743196865, "loss": 3.2776, "step": 12178 }, { "epoch": 0.6, "grad_norm": 0.5266879200935364, "learning_rate": 0.0005432829659464107, "loss": 3.3279, "step": 12179 }, { "epoch": 0.6, "grad_norm": 0.5236815214157104, "learning_rate": 0.0005432739569323745, "loss": 3.1972, "step": 12180 }, { "epoch": 0.6, "grad_norm": 0.5193027257919312, "learning_rate": 0.0005432649472776015, "loss": 3.1793, "step": 12181 }, { "epoch": 0.6, "grad_norm": 0.5326626300811768, "learning_rate": 0.0005432559369821156, "loss": 3.2743, "step": 12182 }, { "epoch": 0.6, "grad_norm": 0.5254206657409668, "learning_rate": 0.0005432469260459402, "loss": 3.4511, "step": 12183 }, { "epoch": 0.6, "grad_norm": 0.5383450388908386, "learning_rate": 0.0005432379144690992, "loss": 3.1277, "step": 12184 }, { "epoch": 0.6, "grad_norm": 0.48813480138778687, "learning_rate": 0.0005432289022516164, "loss": 2.9519, "step": 12185 }, { "epoch": 0.6, "grad_norm": 0.5338407158851624, "learning_rate": 0.0005432198893935155, "loss": 3.1829, "step": 12186 }, { "epoch": 0.6, "grad_norm": 0.5344692468643188, "learning_rate": 0.0005432108758948201, "loss": 3.2572, "step": 12187 }, { "epoch": 0.6, "grad_norm": 0.5389733910560608, "learning_rate": 0.0005432018617555542, "loss": 3.1464, "step": 12188 }, { "epoch": 0.6, "grad_norm": 0.49581897258758545, "learning_rate": 0.0005431928469757415, "loss": 3.1573, "step": 12189 }, { "epoch": 0.6, "grad_norm": 0.5306205749511719, "learning_rate": 0.0005431838315554054, "loss": 3.3115, "step": 12190 }, { "epoch": 0.6, "grad_norm": 0.5148359537124634, "learning_rate": 0.0005431748154945701, "loss": 3.2685, "step": 12191 }, { "epoch": 0.6, "grad_norm": 0.5625180602073669, "learning_rate": 0.0005431657987932589, "loss": 3.1569, "step": 12192 }, { "epoch": 0.6, "grad_norm": 0.48063063621520996, "learning_rate": 0.000543156781451496, "loss": 3.3465, "step": 12193 }, { "epoch": 0.6, "grad_norm": 0.49699029326438904, "learning_rate": 0.0005431477634693048, "loss": 3.2766, "step": 12194 }, { "epoch": 0.6, "grad_norm": 0.4957977533340454, "learning_rate": 0.0005431387448467091, "loss": 3.4118, "step": 12195 }, { "epoch": 0.6, "grad_norm": 0.5320689678192139, "learning_rate": 0.000543129725583733, "loss": 3.2341, "step": 12196 }, { "epoch": 0.6, "grad_norm": 0.4783723056316376, "learning_rate": 0.0005431207056803999, "loss": 3.1873, "step": 12197 }, { "epoch": 0.6, "grad_norm": 0.5043808817863464, "learning_rate": 0.0005431116851367336, "loss": 3.0182, "step": 12198 }, { "epoch": 0.6, "grad_norm": 0.5587702393531799, "learning_rate": 0.000543102663952758, "loss": 3.2452, "step": 12199 }, { "epoch": 0.6, "grad_norm": 0.5152283310890198, "learning_rate": 0.0005430936421284967, "loss": 3.2618, "step": 12200 }, { "epoch": 0.6, "grad_norm": 0.5216172337532043, "learning_rate": 0.0005430846196639734, "loss": 3.1672, "step": 12201 }, { "epoch": 0.6, "grad_norm": 0.5034514665603638, "learning_rate": 0.0005430755965592121, "loss": 3.1348, "step": 12202 }, { "epoch": 0.6, "grad_norm": 0.5430858731269836, "learning_rate": 0.0005430665728142366, "loss": 3.2768, "step": 12203 }, { "epoch": 0.6, "grad_norm": 0.5175026059150696, "learning_rate": 0.0005430575484290704, "loss": 3.2739, "step": 12204 }, { "epoch": 0.6, "grad_norm": 0.5324364304542542, "learning_rate": 0.0005430485234037373, "loss": 3.3467, "step": 12205 }, { "epoch": 0.6, "grad_norm": 0.5634534955024719, "learning_rate": 0.0005430394977382613, "loss": 3.0756, "step": 12206 }, { "epoch": 0.6, "grad_norm": 0.5331823229789734, "learning_rate": 0.000543030471432666, "loss": 3.3163, "step": 12207 }, { "epoch": 0.6, "grad_norm": 0.5143170356750488, "learning_rate": 0.0005430214444869752, "loss": 3.16, "step": 12208 }, { "epoch": 0.6, "grad_norm": 0.5276603102684021, "learning_rate": 0.0005430124169012127, "loss": 3.182, "step": 12209 }, { "epoch": 0.6, "grad_norm": 0.5391786694526672, "learning_rate": 0.0005430033886754022, "loss": 3.0836, "step": 12210 }, { "epoch": 0.6, "grad_norm": 0.5072814226150513, "learning_rate": 0.0005429943598095677, "loss": 3.2794, "step": 12211 }, { "epoch": 0.6, "grad_norm": 0.5004345774650574, "learning_rate": 0.0005429853303037328, "loss": 3.3006, "step": 12212 }, { "epoch": 0.6, "grad_norm": 0.5201953053474426, "learning_rate": 0.0005429763001579211, "loss": 3.2392, "step": 12213 }, { "epoch": 0.6, "grad_norm": 0.6965551972389221, "learning_rate": 0.0005429672693721567, "loss": 3.0654, "step": 12214 }, { "epoch": 0.6, "grad_norm": 0.5072376132011414, "learning_rate": 0.0005429582379464634, "loss": 3.4563, "step": 12215 }, { "epoch": 0.6, "grad_norm": 0.47854286432266235, "learning_rate": 0.0005429492058808647, "loss": 3.2517, "step": 12216 }, { "epoch": 0.6, "grad_norm": 0.5493162870407104, "learning_rate": 0.0005429401731753846, "loss": 3.3376, "step": 12217 }, { "epoch": 0.6, "grad_norm": 0.5450829863548279, "learning_rate": 0.0005429311398300468, "loss": 3.2878, "step": 12218 }, { "epoch": 0.6, "grad_norm": 0.4950675368309021, "learning_rate": 0.0005429221058448752, "loss": 3.0321, "step": 12219 }, { "epoch": 0.6, "grad_norm": 0.5706702470779419, "learning_rate": 0.0005429130712198935, "loss": 3.4247, "step": 12220 }, { "epoch": 0.6, "grad_norm": 0.4959094226360321, "learning_rate": 0.0005429040359551256, "loss": 3.1983, "step": 12221 }, { "epoch": 0.6, "grad_norm": 0.5183280110359192, "learning_rate": 0.0005428950000505951, "loss": 3.0507, "step": 12222 }, { "epoch": 0.6, "grad_norm": 0.4983315169811249, "learning_rate": 0.0005428859635063258, "loss": 3.3056, "step": 12223 }, { "epoch": 0.6, "grad_norm": 0.5146493315696716, "learning_rate": 0.0005428769263223418, "loss": 3.3479, "step": 12224 }, { "epoch": 0.6, "grad_norm": 0.4943070709705353, "learning_rate": 0.0005428678884986667, "loss": 3.1996, "step": 12225 }, { "epoch": 0.6, "grad_norm": 0.49658817052841187, "learning_rate": 0.0005428588500353243, "loss": 3.1671, "step": 12226 }, { "epoch": 0.6, "grad_norm": 0.5471540689468384, "learning_rate": 0.0005428498109323384, "loss": 3.1609, "step": 12227 }, { "epoch": 0.6, "grad_norm": 0.5084481835365295, "learning_rate": 0.0005428407711897329, "loss": 3.2232, "step": 12228 }, { "epoch": 0.6, "grad_norm": 0.5170078277587891, "learning_rate": 0.0005428317308075315, "loss": 3.3208, "step": 12229 }, { "epoch": 0.6, "grad_norm": 0.4958844780921936, "learning_rate": 0.0005428226897857581, "loss": 3.4691, "step": 12230 }, { "epoch": 0.6, "grad_norm": 0.5121991634368896, "learning_rate": 0.0005428136481244363, "loss": 3.3307, "step": 12231 }, { "epoch": 0.6, "grad_norm": 0.5569097399711609, "learning_rate": 0.0005428046058235903, "loss": 3.2827, "step": 12232 }, { "epoch": 0.6, "grad_norm": 0.5006715655326843, "learning_rate": 0.0005427955628832436, "loss": 3.4895, "step": 12233 }, { "epoch": 0.6, "grad_norm": 0.5267584919929504, "learning_rate": 0.0005427865193034202, "loss": 3.0273, "step": 12234 }, { "epoch": 0.6, "grad_norm": 0.5398766994476318, "learning_rate": 0.0005427774750841437, "loss": 3.2227, "step": 12235 }, { "epoch": 0.6, "grad_norm": 0.49153032898902893, "learning_rate": 0.0005427684302254381, "loss": 3.2155, "step": 12236 }, { "epoch": 0.6, "grad_norm": 0.55174720287323, "learning_rate": 0.0005427593847273272, "loss": 3.3676, "step": 12237 }, { "epoch": 0.6, "grad_norm": 0.5181941986083984, "learning_rate": 0.0005427503385898348, "loss": 3.2572, "step": 12238 }, { "epoch": 0.6, "grad_norm": 0.5273623466491699, "learning_rate": 0.0005427412918129847, "loss": 3.0727, "step": 12239 }, { "epoch": 0.6, "grad_norm": 0.5499759912490845, "learning_rate": 0.0005427322443968007, "loss": 3.2162, "step": 12240 }, { "epoch": 0.6, "grad_norm": 0.4892823100090027, "learning_rate": 0.0005427231963413069, "loss": 3.2164, "step": 12241 }, { "epoch": 0.6, "grad_norm": 0.49758782982826233, "learning_rate": 0.0005427141476465266, "loss": 3.1819, "step": 12242 }, { "epoch": 0.6, "grad_norm": 0.5292083024978638, "learning_rate": 0.0005427050983124842, "loss": 3.241, "step": 12243 }, { "epoch": 0.6, "grad_norm": 0.5434751510620117, "learning_rate": 0.0005426960483392031, "loss": 3.2821, "step": 12244 }, { "epoch": 0.6, "grad_norm": 0.5042057037353516, "learning_rate": 0.0005426869977267075, "loss": 3.2146, "step": 12245 }, { "epoch": 0.6, "grad_norm": 0.5056325197219849, "learning_rate": 0.000542677946475021, "loss": 3.3742, "step": 12246 }, { "epoch": 0.6, "grad_norm": 0.5204561352729797, "learning_rate": 0.0005426688945841674, "loss": 3.2397, "step": 12247 }, { "epoch": 0.6, "grad_norm": 0.5179333686828613, "learning_rate": 0.0005426598420541707, "loss": 3.2824, "step": 12248 }, { "epoch": 0.6, "grad_norm": 0.5086974501609802, "learning_rate": 0.0005426507888850547, "loss": 3.2344, "step": 12249 }, { "epoch": 0.6, "grad_norm": 0.49645212292671204, "learning_rate": 0.0005426417350768431, "loss": 3.0371, "step": 12250 }, { "epoch": 0.6, "grad_norm": 0.4945175051689148, "learning_rate": 0.00054263268062956, "loss": 3.2515, "step": 12251 }, { "epoch": 0.6, "grad_norm": 0.5622349381446838, "learning_rate": 0.0005426236255432291, "loss": 3.3922, "step": 12252 }, { "epoch": 0.6, "grad_norm": 0.5245631337165833, "learning_rate": 0.0005426145698178742, "loss": 3.1202, "step": 12253 }, { "epoch": 0.6, "grad_norm": 0.5517042279243469, "learning_rate": 0.0005426055134535192, "loss": 3.0991, "step": 12254 }, { "epoch": 0.6, "grad_norm": 0.5217844843864441, "learning_rate": 0.000542596456450188, "loss": 3.2649, "step": 12255 }, { "epoch": 0.6, "grad_norm": 0.47473424673080444, "learning_rate": 0.0005425873988079045, "loss": 3.2014, "step": 12256 }, { "epoch": 0.6, "grad_norm": 0.5011487007141113, "learning_rate": 0.0005425783405266924, "loss": 3.3579, "step": 12257 }, { "epoch": 0.6, "grad_norm": 0.5660378336906433, "learning_rate": 0.0005425692816065757, "loss": 3.3866, "step": 12258 }, { "epoch": 0.6, "grad_norm": 0.5503248572349548, "learning_rate": 0.0005425602220475781, "loss": 3.3629, "step": 12259 }, { "epoch": 0.6, "grad_norm": 0.5156776309013367, "learning_rate": 0.0005425511618497237, "loss": 3.1339, "step": 12260 }, { "epoch": 0.6, "grad_norm": 0.5958825349807739, "learning_rate": 0.0005425421010130361, "loss": 3.1596, "step": 12261 }, { "epoch": 0.6, "grad_norm": 0.5076464414596558, "learning_rate": 0.0005425330395375392, "loss": 3.287, "step": 12262 }, { "epoch": 0.6, "grad_norm": 0.4932123124599457, "learning_rate": 0.0005425239774232572, "loss": 3.3516, "step": 12263 }, { "epoch": 0.6, "grad_norm": 0.5037664175033569, "learning_rate": 0.0005425149146702135, "loss": 3.1989, "step": 12264 }, { "epoch": 0.6, "grad_norm": 0.5109959244728088, "learning_rate": 0.0005425058512784323, "loss": 3.3902, "step": 12265 }, { "epoch": 0.6, "grad_norm": 0.49323901534080505, "learning_rate": 0.0005424967872479373, "loss": 3.4398, "step": 12266 }, { "epoch": 0.6, "grad_norm": 0.49260270595550537, "learning_rate": 0.0005424877225787525, "loss": 3.293, "step": 12267 }, { "epoch": 0.6, "grad_norm": 0.5285913348197937, "learning_rate": 0.0005424786572709015, "loss": 3.452, "step": 12268 }, { "epoch": 0.6, "grad_norm": 0.5172354578971863, "learning_rate": 0.0005424695913244087, "loss": 3.3493, "step": 12269 }, { "epoch": 0.6, "grad_norm": 0.5332759022712708, "learning_rate": 0.0005424605247392975, "loss": 3.1187, "step": 12270 }, { "epoch": 0.6, "grad_norm": 0.5140502452850342, "learning_rate": 0.0005424514575155919, "loss": 3.3081, "step": 12271 }, { "epoch": 0.6, "grad_norm": 0.49481186270713806, "learning_rate": 0.0005424423896533159, "loss": 3.1223, "step": 12272 }, { "epoch": 0.6, "grad_norm": 0.5081273913383484, "learning_rate": 0.0005424333211524932, "loss": 3.1317, "step": 12273 }, { "epoch": 0.6, "grad_norm": 0.4956912100315094, "learning_rate": 0.0005424242520131479, "loss": 3.4277, "step": 12274 }, { "epoch": 0.6, "grad_norm": 0.5238935947418213, "learning_rate": 0.0005424151822353038, "loss": 3.2959, "step": 12275 }, { "epoch": 0.6, "grad_norm": 0.5190920233726501, "learning_rate": 0.0005424061118189847, "loss": 3.226, "step": 12276 }, { "epoch": 0.6, "grad_norm": 0.5076124668121338, "learning_rate": 0.0005423970407642145, "loss": 3.2697, "step": 12277 }, { "epoch": 0.6, "grad_norm": 0.4859389066696167, "learning_rate": 0.0005423879690710172, "loss": 3.1284, "step": 12278 }, { "epoch": 0.6, "grad_norm": 0.5037886500358582, "learning_rate": 0.0005423788967394166, "loss": 3.3524, "step": 12279 }, { "epoch": 0.6, "grad_norm": 0.5088861584663391, "learning_rate": 0.0005423698237694368, "loss": 3.3835, "step": 12280 }, { "epoch": 0.6, "grad_norm": 0.4993892312049866, "learning_rate": 0.0005423607501611013, "loss": 3.1905, "step": 12281 }, { "epoch": 0.6, "grad_norm": 0.5249192714691162, "learning_rate": 0.0005423516759144343, "loss": 3.4946, "step": 12282 }, { "epoch": 0.6, "grad_norm": 0.5226430296897888, "learning_rate": 0.0005423426010294597, "loss": 3.1759, "step": 12283 }, { "epoch": 0.6, "grad_norm": 0.5157943964004517, "learning_rate": 0.0005423335255062013, "loss": 3.0393, "step": 12284 }, { "epoch": 0.6, "grad_norm": 0.4765145182609558, "learning_rate": 0.000542324449344683, "loss": 3.2868, "step": 12285 }, { "epoch": 0.6, "grad_norm": 0.539223849773407, "learning_rate": 0.0005423153725449287, "loss": 3.2323, "step": 12286 }, { "epoch": 0.6, "grad_norm": 0.5330362319946289, "learning_rate": 0.0005423062951069624, "loss": 3.2413, "step": 12287 }, { "epoch": 0.6, "grad_norm": 0.5498042106628418, "learning_rate": 0.000542297217030808, "loss": 2.9149, "step": 12288 }, { "epoch": 0.6, "grad_norm": 0.5433230400085449, "learning_rate": 0.0005422881383164893, "loss": 3.1215, "step": 12289 }, { "epoch": 0.6, "grad_norm": 0.5228878855705261, "learning_rate": 0.0005422790589640303, "loss": 3.2642, "step": 12290 }, { "epoch": 0.6, "grad_norm": 0.5186182260513306, "learning_rate": 0.0005422699789734548, "loss": 3.4114, "step": 12291 }, { "epoch": 0.6, "grad_norm": 0.5254524946212769, "learning_rate": 0.0005422608983447868, "loss": 3.4245, "step": 12292 }, { "epoch": 0.6, "grad_norm": 0.5335171222686768, "learning_rate": 0.0005422518170780504, "loss": 3.0748, "step": 12293 }, { "epoch": 0.6, "grad_norm": 0.5440496206283569, "learning_rate": 0.0005422427351732692, "loss": 3.0979, "step": 12294 }, { "epoch": 0.6, "grad_norm": 0.5325355529785156, "learning_rate": 0.0005422336526304673, "loss": 3.2867, "step": 12295 }, { "epoch": 0.6, "grad_norm": 0.536151647567749, "learning_rate": 0.0005422245694496685, "loss": 3.3285, "step": 12296 }, { "epoch": 0.6, "grad_norm": 0.516470730304718, "learning_rate": 0.0005422154856308969, "loss": 3.3174, "step": 12297 }, { "epoch": 0.6, "grad_norm": 0.4913516938686371, "learning_rate": 0.0005422064011741763, "loss": 3.1424, "step": 12298 }, { "epoch": 0.6, "grad_norm": 0.4844023883342743, "learning_rate": 0.0005421973160795306, "loss": 3.3294, "step": 12299 }, { "epoch": 0.6, "grad_norm": 0.5266075134277344, "learning_rate": 0.0005421882303469839, "loss": 3.1666, "step": 12300 }, { "epoch": 0.6, "grad_norm": 0.5134689807891846, "learning_rate": 0.0005421791439765599, "loss": 3.1568, "step": 12301 }, { "epoch": 0.6, "grad_norm": 0.5349195599555969, "learning_rate": 0.0005421700569682827, "loss": 3.3157, "step": 12302 }, { "epoch": 0.6, "grad_norm": 0.5083217024803162, "learning_rate": 0.0005421609693221761, "loss": 3.0988, "step": 12303 }, { "epoch": 0.6, "grad_norm": 0.49216586351394653, "learning_rate": 0.0005421518810382641, "loss": 3.3646, "step": 12304 }, { "epoch": 0.6, "grad_norm": 0.5193181037902832, "learning_rate": 0.0005421427921165708, "loss": 3.3176, "step": 12305 }, { "epoch": 0.6, "grad_norm": 0.5635499358177185, "learning_rate": 0.0005421337025571199, "loss": 3.1367, "step": 12306 }, { "epoch": 0.6, "grad_norm": 0.5421886444091797, "learning_rate": 0.0005421246123599354, "loss": 3.2597, "step": 12307 }, { "epoch": 0.6, "grad_norm": 0.49263355135917664, "learning_rate": 0.0005421155215250413, "loss": 3.3185, "step": 12308 }, { "epoch": 0.6, "grad_norm": 0.4998137652873993, "learning_rate": 0.0005421064300524615, "loss": 3.2735, "step": 12309 }, { "epoch": 0.6, "grad_norm": 0.5159027576446533, "learning_rate": 0.00054209733794222, "loss": 3.1562, "step": 12310 }, { "epoch": 0.6, "grad_norm": 0.48767736554145813, "learning_rate": 0.0005420882451943406, "loss": 3.0237, "step": 12311 }, { "epoch": 0.6, "grad_norm": 0.5056929588317871, "learning_rate": 0.0005420791518088474, "loss": 3.2519, "step": 12312 }, { "epoch": 0.6, "grad_norm": 0.5624472498893738, "learning_rate": 0.0005420700577857643, "loss": 3.0888, "step": 12313 }, { "epoch": 0.6, "grad_norm": 0.5267937183380127, "learning_rate": 0.0005420609631251154, "loss": 3.1991, "step": 12314 }, { "epoch": 0.6, "grad_norm": 0.5424567461013794, "learning_rate": 0.0005420518678269244, "loss": 3.2991, "step": 12315 }, { "epoch": 0.6, "grad_norm": 0.4930018484592438, "learning_rate": 0.0005420427718912153, "loss": 3.123, "step": 12316 }, { "epoch": 0.6, "grad_norm": 0.5012913346290588, "learning_rate": 0.0005420336753180122, "loss": 3.3452, "step": 12317 }, { "epoch": 0.6, "grad_norm": 0.5235104560852051, "learning_rate": 0.0005420245781073389, "loss": 3.1449, "step": 12318 }, { "epoch": 0.6, "grad_norm": 0.5059866905212402, "learning_rate": 0.0005420154802592195, "loss": 3.213, "step": 12319 }, { "epoch": 0.6, "grad_norm": 0.5268304944038391, "learning_rate": 0.0005420063817736778, "loss": 3.2597, "step": 12320 }, { "epoch": 0.6, "grad_norm": 0.5179598331451416, "learning_rate": 0.000541997282650738, "loss": 3.0205, "step": 12321 }, { "epoch": 0.6, "grad_norm": 0.501977264881134, "learning_rate": 0.0005419881828904239, "loss": 3.1745, "step": 12322 }, { "epoch": 0.6, "grad_norm": 0.5240980386734009, "learning_rate": 0.0005419790824927595, "loss": 3.2458, "step": 12323 }, { "epoch": 0.6, "grad_norm": 0.49430304765701294, "learning_rate": 0.0005419699814577687, "loss": 3.4038, "step": 12324 }, { "epoch": 0.6, "grad_norm": 0.5261852145195007, "learning_rate": 0.0005419608797854757, "loss": 3.3711, "step": 12325 }, { "epoch": 0.6, "grad_norm": 0.5073800683021545, "learning_rate": 0.0005419517774759041, "loss": 3.1167, "step": 12326 }, { "epoch": 0.6, "grad_norm": 0.5025413036346436, "learning_rate": 0.0005419426745290782, "loss": 3.3158, "step": 12327 }, { "epoch": 0.6, "grad_norm": 0.5052799582481384, "learning_rate": 0.0005419335709450218, "loss": 3.131, "step": 12328 }, { "epoch": 0.6, "grad_norm": 0.5107625722885132, "learning_rate": 0.000541924466723759, "loss": 3.4689, "step": 12329 }, { "epoch": 0.6, "grad_norm": 0.5340820550918579, "learning_rate": 0.0005419153618653137, "loss": 3.3352, "step": 12330 }, { "epoch": 0.6, "grad_norm": 0.5223778486251831, "learning_rate": 0.0005419062563697098, "loss": 3.2638, "step": 12331 }, { "epoch": 0.6, "grad_norm": 0.5212966203689575, "learning_rate": 0.0005418971502369716, "loss": 3.2023, "step": 12332 }, { "epoch": 0.6, "grad_norm": 0.5198991298675537, "learning_rate": 0.0005418880434671226, "loss": 3.2943, "step": 12333 }, { "epoch": 0.6, "grad_norm": 0.5650694966316223, "learning_rate": 0.0005418789360601872, "loss": 3.0997, "step": 12334 }, { "epoch": 0.6, "grad_norm": 0.5771616101264954, "learning_rate": 0.0005418698280161892, "loss": 3.07, "step": 12335 }, { "epoch": 0.6, "grad_norm": 0.5297648906707764, "learning_rate": 0.0005418607193351526, "loss": 3.3583, "step": 12336 }, { "epoch": 0.6, "grad_norm": 0.5116552710533142, "learning_rate": 0.0005418516100171015, "loss": 3.1286, "step": 12337 }, { "epoch": 0.6, "grad_norm": 0.5247451663017273, "learning_rate": 0.0005418425000620597, "loss": 2.9956, "step": 12338 }, { "epoch": 0.6, "grad_norm": 0.5303729772567749, "learning_rate": 0.0005418333894700513, "loss": 3.1847, "step": 12339 }, { "epoch": 0.6, "grad_norm": 0.5321176648139954, "learning_rate": 0.0005418242782411004, "loss": 3.2272, "step": 12340 }, { "epoch": 0.6, "grad_norm": 0.5363842844963074, "learning_rate": 0.0005418151663752307, "loss": 3.1136, "step": 12341 }, { "epoch": 0.6, "grad_norm": 0.543472409248352, "learning_rate": 0.0005418060538724666, "loss": 3.3665, "step": 12342 }, { "epoch": 0.6, "grad_norm": 0.5340068936347961, "learning_rate": 0.0005417969407328318, "loss": 3.1082, "step": 12343 }, { "epoch": 0.6, "grad_norm": 0.49749499559402466, "learning_rate": 0.0005417878269563504, "loss": 3.0852, "step": 12344 }, { "epoch": 0.6, "grad_norm": 0.5240546464920044, "learning_rate": 0.0005417787125430464, "loss": 3.1989, "step": 12345 }, { "epoch": 0.61, "grad_norm": 0.5031521320343018, "learning_rate": 0.0005417695974929438, "loss": 3.1801, "step": 12346 }, { "epoch": 0.61, "grad_norm": 0.5448225736618042, "learning_rate": 0.0005417604818060666, "loss": 3.4148, "step": 12347 }, { "epoch": 0.61, "grad_norm": 0.5050467848777771, "learning_rate": 0.0005417513654824387, "loss": 3.1923, "step": 12348 }, { "epoch": 0.61, "grad_norm": 0.5299510359764099, "learning_rate": 0.0005417422485220844, "loss": 3.1741, "step": 12349 }, { "epoch": 0.61, "grad_norm": 0.5421825647354126, "learning_rate": 0.0005417331309250275, "loss": 3.2135, "step": 12350 }, { "epoch": 0.61, "grad_norm": 0.5428870916366577, "learning_rate": 0.0005417240126912921, "loss": 3.1527, "step": 12351 }, { "epoch": 0.61, "grad_norm": 0.6039544939994812, "learning_rate": 0.0005417148938209021, "loss": 3.3517, "step": 12352 }, { "epoch": 0.61, "grad_norm": 0.5433862805366516, "learning_rate": 0.0005417057743138816, "loss": 3.2371, "step": 12353 }, { "epoch": 0.61, "grad_norm": 0.48752960562705994, "learning_rate": 0.0005416966541702546, "loss": 3.1304, "step": 12354 }, { "epoch": 0.61, "grad_norm": 0.529007613658905, "learning_rate": 0.0005416875333900451, "loss": 3.1325, "step": 12355 }, { "epoch": 0.61, "grad_norm": 0.5171936750411987, "learning_rate": 0.0005416784119732773, "loss": 3.1296, "step": 12356 }, { "epoch": 0.61, "grad_norm": 0.5062511563301086, "learning_rate": 0.0005416692899199749, "loss": 3.3456, "step": 12357 }, { "epoch": 0.61, "grad_norm": 0.5134553909301758, "learning_rate": 0.0005416601672301622, "loss": 3.0025, "step": 12358 }, { "epoch": 0.61, "grad_norm": 0.5461899042129517, "learning_rate": 0.0005416510439038631, "loss": 3.4576, "step": 12359 }, { "epoch": 0.61, "grad_norm": 0.6372771859169006, "learning_rate": 0.0005416419199411016, "loss": 3.4149, "step": 12360 }, { "epoch": 0.61, "grad_norm": 0.5487025380134583, "learning_rate": 0.0005416327953419018, "loss": 3.2196, "step": 12361 }, { "epoch": 0.61, "grad_norm": 0.5107635855674744, "learning_rate": 0.0005416236701062878, "loss": 3.1029, "step": 12362 }, { "epoch": 0.61, "grad_norm": 0.556648850440979, "learning_rate": 0.0005416145442342836, "loss": 3.2373, "step": 12363 }, { "epoch": 0.61, "grad_norm": 0.5657925605773926, "learning_rate": 0.0005416054177259131, "loss": 3.1103, "step": 12364 }, { "epoch": 0.61, "grad_norm": 0.49510687589645386, "learning_rate": 0.0005415962905812004, "loss": 3.2521, "step": 12365 }, { "epoch": 0.61, "grad_norm": 0.5163629651069641, "learning_rate": 0.0005415871628001696, "loss": 3.2231, "step": 12366 }, { "epoch": 0.61, "grad_norm": 0.5414348244667053, "learning_rate": 0.0005415780343828447, "loss": 3.0145, "step": 12367 }, { "epoch": 0.61, "grad_norm": 0.5388548374176025, "learning_rate": 0.0005415689053292497, "loss": 3.0301, "step": 12368 }, { "epoch": 0.61, "grad_norm": 0.49389225244522095, "learning_rate": 0.0005415597756394089, "loss": 3.2222, "step": 12369 }, { "epoch": 0.61, "grad_norm": 0.5336668491363525, "learning_rate": 0.0005415506453133459, "loss": 3.2878, "step": 12370 }, { "epoch": 0.61, "grad_norm": 0.500521719455719, "learning_rate": 0.000541541514351085, "loss": 3.113, "step": 12371 }, { "epoch": 0.61, "grad_norm": 0.4869195520877838, "learning_rate": 0.0005415323827526505, "loss": 3.1353, "step": 12372 }, { "epoch": 0.61, "grad_norm": 0.5147513747215271, "learning_rate": 0.000541523250518066, "loss": 3.3804, "step": 12373 }, { "epoch": 0.61, "grad_norm": 0.5055763721466064, "learning_rate": 0.0005415141176473557, "loss": 3.3596, "step": 12374 }, { "epoch": 0.61, "grad_norm": 0.5058522820472717, "learning_rate": 0.0005415049841405437, "loss": 3.0117, "step": 12375 }, { "epoch": 0.61, "grad_norm": 0.5772499442100525, "learning_rate": 0.0005414958499976541, "loss": 3.1889, "step": 12376 }, { "epoch": 0.61, "grad_norm": 0.5016292333602905, "learning_rate": 0.0005414867152187108, "loss": 3.1536, "step": 12377 }, { "epoch": 0.61, "grad_norm": 0.5072944760322571, "learning_rate": 0.0005414775798037382, "loss": 3.2119, "step": 12378 }, { "epoch": 0.61, "grad_norm": 0.5283709168434143, "learning_rate": 0.0005414684437527599, "loss": 2.9134, "step": 12379 }, { "epoch": 0.61, "grad_norm": 0.5712698101997375, "learning_rate": 0.0005414593070658004, "loss": 3.2677, "step": 12380 }, { "epoch": 0.61, "grad_norm": 0.5036458373069763, "learning_rate": 0.0005414501697428833, "loss": 3.2085, "step": 12381 }, { "epoch": 0.61, "grad_norm": 0.5280167460441589, "learning_rate": 0.0005414410317840331, "loss": 3.1025, "step": 12382 }, { "epoch": 0.61, "grad_norm": 0.548110842704773, "learning_rate": 0.0005414318931892737, "loss": 3.3154, "step": 12383 }, { "epoch": 0.61, "grad_norm": 0.5638526082038879, "learning_rate": 0.0005414227539586291, "loss": 3.189, "step": 12384 }, { "epoch": 0.61, "grad_norm": 0.519481897354126, "learning_rate": 0.0005414136140921234, "loss": 3.1508, "step": 12385 }, { "epoch": 0.61, "grad_norm": 0.49351656436920166, "learning_rate": 0.0005414044735897808, "loss": 3.2086, "step": 12386 }, { "epoch": 0.61, "grad_norm": 0.47887444496154785, "learning_rate": 0.0005413953324516251, "loss": 3.0731, "step": 12387 }, { "epoch": 0.61, "grad_norm": 0.5507675409317017, "learning_rate": 0.0005413861906776807, "loss": 3.3115, "step": 12388 }, { "epoch": 0.61, "grad_norm": 0.518807590007782, "learning_rate": 0.0005413770482679714, "loss": 3.5088, "step": 12389 }, { "epoch": 0.61, "grad_norm": 0.5343676209449768, "learning_rate": 0.0005413679052225216, "loss": 3.2752, "step": 12390 }, { "epoch": 0.61, "grad_norm": 0.5148628354072571, "learning_rate": 0.0005413587615413551, "loss": 3.1195, "step": 12391 }, { "epoch": 0.61, "grad_norm": 0.5338741540908813, "learning_rate": 0.000541349617224496, "loss": 3.0901, "step": 12392 }, { "epoch": 0.61, "grad_norm": 0.508698046207428, "learning_rate": 0.0005413404722719686, "loss": 3.3288, "step": 12393 }, { "epoch": 0.61, "grad_norm": 0.5255823731422424, "learning_rate": 0.0005413313266837968, "loss": 3.2126, "step": 12394 }, { "epoch": 0.61, "grad_norm": 0.5159070491790771, "learning_rate": 0.0005413221804600047, "loss": 3.0548, "step": 12395 }, { "epoch": 0.61, "grad_norm": 0.5951764583587646, "learning_rate": 0.0005413130336006164, "loss": 3.3417, "step": 12396 }, { "epoch": 0.61, "grad_norm": 0.5076818466186523, "learning_rate": 0.000541303886105656, "loss": 3.3209, "step": 12397 }, { "epoch": 0.61, "grad_norm": 0.5986706018447876, "learning_rate": 0.0005412947379751477, "loss": 3.2027, "step": 12398 }, { "epoch": 0.61, "grad_norm": 0.506370484828949, "learning_rate": 0.0005412855892091154, "loss": 3.1447, "step": 12399 }, { "epoch": 0.61, "grad_norm": 0.5274400115013123, "learning_rate": 0.0005412764398075833, "loss": 3.3458, "step": 12400 }, { "epoch": 0.61, "grad_norm": 0.5394174456596375, "learning_rate": 0.0005412672897705755, "loss": 3.3062, "step": 12401 }, { "epoch": 0.61, "grad_norm": 0.49921727180480957, "learning_rate": 0.0005412581390981161, "loss": 3.3473, "step": 12402 }, { "epoch": 0.61, "grad_norm": 0.5664904713630676, "learning_rate": 0.0005412489877902292, "loss": 3.2196, "step": 12403 }, { "epoch": 0.61, "grad_norm": 0.5725209712982178, "learning_rate": 0.0005412398358469389, "loss": 3.3237, "step": 12404 }, { "epoch": 0.61, "grad_norm": 0.5521411299705505, "learning_rate": 0.0005412306832682693, "loss": 3.1473, "step": 12405 }, { "epoch": 0.61, "grad_norm": 0.5140300393104553, "learning_rate": 0.0005412215300542446, "loss": 3.2564, "step": 12406 }, { "epoch": 0.61, "grad_norm": 0.5515618920326233, "learning_rate": 0.0005412123762048885, "loss": 3.029, "step": 12407 }, { "epoch": 0.61, "grad_norm": 0.5705258846282959, "learning_rate": 0.0005412032217202257, "loss": 3.2977, "step": 12408 }, { "epoch": 0.61, "grad_norm": 0.5424354672431946, "learning_rate": 0.00054119406660028, "loss": 3.1524, "step": 12409 }, { "epoch": 0.61, "grad_norm": 0.5054551959037781, "learning_rate": 0.0005411849108450756, "loss": 3.2744, "step": 12410 }, { "epoch": 0.61, "grad_norm": 0.5675023794174194, "learning_rate": 0.0005411757544546364, "loss": 3.1539, "step": 12411 }, { "epoch": 0.61, "grad_norm": 0.5317476391792297, "learning_rate": 0.0005411665974289867, "loss": 3.2369, "step": 12412 }, { "epoch": 0.61, "grad_norm": 0.5437188744544983, "learning_rate": 0.0005411574397681507, "loss": 3.3225, "step": 12413 }, { "epoch": 0.61, "grad_norm": 0.5426942706108093, "learning_rate": 0.0005411482814721523, "loss": 3.2466, "step": 12414 }, { "epoch": 0.61, "grad_norm": 0.5315844416618347, "learning_rate": 0.0005411391225410159, "loss": 3.1621, "step": 12415 }, { "epoch": 0.61, "grad_norm": 0.5135928392410278, "learning_rate": 0.0005411299629747654, "loss": 3.1404, "step": 12416 }, { "epoch": 0.61, "grad_norm": 0.5293831825256348, "learning_rate": 0.000541120802773425, "loss": 3.1356, "step": 12417 }, { "epoch": 0.61, "grad_norm": 0.5011104941368103, "learning_rate": 0.0005411116419370188, "loss": 3.281, "step": 12418 }, { "epoch": 0.61, "grad_norm": 0.524491012096405, "learning_rate": 0.0005411024804655708, "loss": 3.1678, "step": 12419 }, { "epoch": 0.61, "grad_norm": 0.5337167978286743, "learning_rate": 0.0005410933183591053, "loss": 3.1052, "step": 12420 }, { "epoch": 0.61, "grad_norm": 0.49512872099876404, "learning_rate": 0.0005410841556176465, "loss": 3.3314, "step": 12421 }, { "epoch": 0.61, "grad_norm": 0.5637658834457397, "learning_rate": 0.0005410749922412184, "loss": 3.0071, "step": 12422 }, { "epoch": 0.61, "grad_norm": 0.5093816518783569, "learning_rate": 0.0005410658282298451, "loss": 3.38, "step": 12423 }, { "epoch": 0.61, "grad_norm": 0.5493909120559692, "learning_rate": 0.0005410566635835509, "loss": 3.4419, "step": 12424 }, { "epoch": 0.61, "grad_norm": 0.6119068264961243, "learning_rate": 0.0005410474983023599, "loss": 3.3356, "step": 12425 }, { "epoch": 0.61, "grad_norm": 0.5353181958198547, "learning_rate": 0.000541038332386296, "loss": 3.2801, "step": 12426 }, { "epoch": 0.61, "grad_norm": 0.5060677528381348, "learning_rate": 0.0005410291658353837, "loss": 3.1649, "step": 12427 }, { "epoch": 0.61, "grad_norm": 0.5227444171905518, "learning_rate": 0.0005410199986496467, "loss": 3.4991, "step": 12428 }, { "epoch": 0.61, "grad_norm": 0.5346323251724243, "learning_rate": 0.0005410108308291097, "loss": 3.2646, "step": 12429 }, { "epoch": 0.61, "grad_norm": 0.5016912221908569, "learning_rate": 0.0005410016623737964, "loss": 3.328, "step": 12430 }, { "epoch": 0.61, "grad_norm": 0.5151358246803284, "learning_rate": 0.0005409924932837312, "loss": 3.2546, "step": 12431 }, { "epoch": 0.61, "grad_norm": 0.5453892350196838, "learning_rate": 0.0005409833235589381, "loss": 3.1258, "step": 12432 }, { "epoch": 0.61, "grad_norm": 0.5162804126739502, "learning_rate": 0.0005409741531994413, "loss": 3.212, "step": 12433 }, { "epoch": 0.61, "grad_norm": 0.5229930877685547, "learning_rate": 0.0005409649822052648, "loss": 2.9828, "step": 12434 }, { "epoch": 0.61, "grad_norm": 0.5364524722099304, "learning_rate": 0.0005409558105764332, "loss": 3.2532, "step": 12435 }, { "epoch": 0.61, "grad_norm": 0.5143020153045654, "learning_rate": 0.0005409466383129701, "loss": 3.1768, "step": 12436 }, { "epoch": 0.61, "grad_norm": 0.523679256439209, "learning_rate": 0.0005409374654149001, "loss": 3.3917, "step": 12437 }, { "epoch": 0.61, "grad_norm": 0.5435823798179626, "learning_rate": 0.0005409282918822472, "loss": 3.1854, "step": 12438 }, { "epoch": 0.61, "grad_norm": 0.5507246255874634, "learning_rate": 0.0005409191177150355, "loss": 3.1831, "step": 12439 }, { "epoch": 0.61, "grad_norm": 0.5211560726165771, "learning_rate": 0.0005409099429132891, "loss": 3.2665, "step": 12440 }, { "epoch": 0.61, "grad_norm": 0.49892207980155945, "learning_rate": 0.0005409007674770324, "loss": 2.9391, "step": 12441 }, { "epoch": 0.61, "grad_norm": 0.5270420908927917, "learning_rate": 0.0005408915914062894, "loss": 3.2581, "step": 12442 }, { "epoch": 0.61, "grad_norm": 0.518926203250885, "learning_rate": 0.0005408824147010844, "loss": 3.2074, "step": 12443 }, { "epoch": 0.61, "grad_norm": 0.4994022846221924, "learning_rate": 0.0005408732373614414, "loss": 3.0999, "step": 12444 }, { "epoch": 0.61, "grad_norm": 0.5447221994400024, "learning_rate": 0.0005408640593873846, "loss": 3.1956, "step": 12445 }, { "epoch": 0.61, "grad_norm": 0.5384201407432556, "learning_rate": 0.0005408548807789383, "loss": 3.3842, "step": 12446 }, { "epoch": 0.61, "grad_norm": 0.5086652040481567, "learning_rate": 0.0005408457015361266, "loss": 3.2858, "step": 12447 }, { "epoch": 0.61, "grad_norm": 0.49078619480133057, "learning_rate": 0.0005408365216589736, "loss": 3.143, "step": 12448 }, { "epoch": 0.61, "grad_norm": 0.5547512769699097, "learning_rate": 0.0005408273411475036, "loss": 3.374, "step": 12449 }, { "epoch": 0.61, "grad_norm": 0.49764230847358704, "learning_rate": 0.0005408181600017407, "loss": 3.1671, "step": 12450 }, { "epoch": 0.61, "grad_norm": 0.5081911087036133, "learning_rate": 0.0005408089782217092, "loss": 3.1078, "step": 12451 }, { "epoch": 0.61, "grad_norm": 0.5074054002761841, "learning_rate": 0.0005407997958074331, "loss": 3.1706, "step": 12452 }, { "epoch": 0.61, "grad_norm": 0.47185125946998596, "learning_rate": 0.0005407906127589368, "loss": 3.1016, "step": 12453 }, { "epoch": 0.61, "grad_norm": 0.5095272064208984, "learning_rate": 0.0005407814290762442, "loss": 3.3671, "step": 12454 }, { "epoch": 0.61, "grad_norm": 0.5170059204101562, "learning_rate": 0.0005407722447593798, "loss": 3.1382, "step": 12455 }, { "epoch": 0.61, "grad_norm": 0.5567418336868286, "learning_rate": 0.0005407630598083676, "loss": 3.2116, "step": 12456 }, { "epoch": 0.61, "grad_norm": 0.48573851585388184, "learning_rate": 0.0005407538742232317, "loss": 3.3303, "step": 12457 }, { "epoch": 0.61, "grad_norm": 0.5352392792701721, "learning_rate": 0.0005407446880039966, "loss": 2.8769, "step": 12458 }, { "epoch": 0.61, "grad_norm": 0.6195808053016663, "learning_rate": 0.0005407355011506862, "loss": 3.3433, "step": 12459 }, { "epoch": 0.61, "grad_norm": 0.5522502660751343, "learning_rate": 0.000540726313663325, "loss": 3.2809, "step": 12460 }, { "epoch": 0.61, "grad_norm": 0.5253701210021973, "learning_rate": 0.0005407171255419369, "loss": 3.3492, "step": 12461 }, { "epoch": 0.61, "grad_norm": 0.5422989130020142, "learning_rate": 0.0005407079367865462, "loss": 3.2108, "step": 12462 }, { "epoch": 0.61, "grad_norm": 0.478427916765213, "learning_rate": 0.0005406987473971772, "loss": 3.0953, "step": 12463 }, { "epoch": 0.61, "grad_norm": 0.616083562374115, "learning_rate": 0.0005406895573738539, "loss": 3.2778, "step": 12464 }, { "epoch": 0.61, "grad_norm": 0.5093544125556946, "learning_rate": 0.0005406803667166008, "loss": 3.3096, "step": 12465 }, { "epoch": 0.61, "grad_norm": 0.5077793598175049, "learning_rate": 0.0005406711754254418, "loss": 3.2769, "step": 12466 }, { "epoch": 0.61, "grad_norm": 0.52507483959198, "learning_rate": 0.0005406619835004012, "loss": 3.2368, "step": 12467 }, { "epoch": 0.61, "grad_norm": 0.500822126865387, "learning_rate": 0.0005406527909415033, "loss": 3.068, "step": 12468 }, { "epoch": 0.61, "grad_norm": 0.5564792156219482, "learning_rate": 0.0005406435977487723, "loss": 3.1324, "step": 12469 }, { "epoch": 0.61, "grad_norm": 0.5204517245292664, "learning_rate": 0.0005406344039222325, "loss": 3.0969, "step": 12470 }, { "epoch": 0.61, "grad_norm": 0.5361521244049072, "learning_rate": 0.0005406252094619079, "loss": 3.3305, "step": 12471 }, { "epoch": 0.61, "grad_norm": 0.5398157238960266, "learning_rate": 0.0005406160143678226, "loss": 3.0892, "step": 12472 }, { "epoch": 0.61, "grad_norm": 0.5248488187789917, "learning_rate": 0.0005406068186400013, "loss": 3.1276, "step": 12473 }, { "epoch": 0.61, "grad_norm": 0.5540500283241272, "learning_rate": 0.0005405976222784678, "loss": 3.1021, "step": 12474 }, { "epoch": 0.61, "grad_norm": 0.5601980686187744, "learning_rate": 0.0005405884252832465, "loss": 2.9468, "step": 12475 }, { "epoch": 0.61, "grad_norm": 0.5255300998687744, "learning_rate": 0.0005405792276543616, "loss": 3.3761, "step": 12476 }, { "epoch": 0.61, "grad_norm": 0.5468989014625549, "learning_rate": 0.0005405700293918373, "loss": 3.3636, "step": 12477 }, { "epoch": 0.61, "grad_norm": 0.5150614976882935, "learning_rate": 0.000540560830495698, "loss": 3.3567, "step": 12478 }, { "epoch": 0.61, "grad_norm": 0.555590033531189, "learning_rate": 0.0005405516309659674, "loss": 3.0297, "step": 12479 }, { "epoch": 0.61, "grad_norm": 0.5096856951713562, "learning_rate": 0.0005405424308026704, "loss": 3.2316, "step": 12480 }, { "epoch": 0.61, "grad_norm": 0.5092878341674805, "learning_rate": 0.0005405332300058308, "loss": 3.5083, "step": 12481 }, { "epoch": 0.61, "grad_norm": 0.5776546597480774, "learning_rate": 0.0005405240285754731, "loss": 3.1405, "step": 12482 }, { "epoch": 0.61, "grad_norm": 0.5385265946388245, "learning_rate": 0.0005405148265116213, "loss": 3.3177, "step": 12483 }, { "epoch": 0.61, "grad_norm": 0.5115643739700317, "learning_rate": 0.0005405056238142997, "loss": 3.3791, "step": 12484 }, { "epoch": 0.61, "grad_norm": 0.4903174340724945, "learning_rate": 0.0005404964204835327, "loss": 3.3797, "step": 12485 }, { "epoch": 0.61, "grad_norm": 0.5272238254547119, "learning_rate": 0.0005404872165193443, "loss": 3.2622, "step": 12486 }, { "epoch": 0.61, "grad_norm": 0.5495355725288391, "learning_rate": 0.0005404780119217589, "loss": 3.0021, "step": 12487 }, { "epoch": 0.61, "grad_norm": 0.5022799968719482, "learning_rate": 0.0005404688066908006, "loss": 3.26, "step": 12488 }, { "epoch": 0.61, "grad_norm": 0.5563308596611023, "learning_rate": 0.0005404596008264939, "loss": 3.3463, "step": 12489 }, { "epoch": 0.61, "grad_norm": 0.5790652632713318, "learning_rate": 0.0005404503943288628, "loss": 3.3775, "step": 12490 }, { "epoch": 0.61, "grad_norm": 0.5320955514907837, "learning_rate": 0.0005404411871979316, "loss": 3.2666, "step": 12491 }, { "epoch": 0.61, "grad_norm": 0.5261737704277039, "learning_rate": 0.0005404319794337246, "loss": 3.1438, "step": 12492 }, { "epoch": 0.61, "grad_norm": 0.4732470214366913, "learning_rate": 0.0005404227710362661, "loss": 3.1054, "step": 12493 }, { "epoch": 0.61, "grad_norm": 0.5220340490341187, "learning_rate": 0.0005404135620055803, "loss": 3.2321, "step": 12494 }, { "epoch": 0.61, "grad_norm": 0.4781638979911804, "learning_rate": 0.0005404043523416914, "loss": 3.1854, "step": 12495 }, { "epoch": 0.61, "grad_norm": 0.5066351294517517, "learning_rate": 0.0005403951420446237, "loss": 3.2492, "step": 12496 }, { "epoch": 0.61, "grad_norm": 0.49951955676078796, "learning_rate": 0.0005403859311144015, "loss": 3.1887, "step": 12497 }, { "epoch": 0.61, "grad_norm": 0.5178000330924988, "learning_rate": 0.000540376719551049, "loss": 3.2451, "step": 12498 }, { "epoch": 0.61, "grad_norm": 0.5040697455406189, "learning_rate": 0.0005403675073545906, "loss": 3.2617, "step": 12499 }, { "epoch": 0.61, "grad_norm": 0.48904526233673096, "learning_rate": 0.0005403582945250503, "loss": 3.0489, "step": 12500 }, { "epoch": 0.61, "grad_norm": 0.5385371446609497, "learning_rate": 0.0005403490810624525, "loss": 3.1477, "step": 12501 }, { "epoch": 0.61, "grad_norm": 0.5342295169830322, "learning_rate": 0.0005403398669668215, "loss": 3.1246, "step": 12502 }, { "epoch": 0.61, "grad_norm": 0.5917371511459351, "learning_rate": 0.0005403306522381815, "loss": 3.241, "step": 12503 }, { "epoch": 0.61, "grad_norm": 0.5648188591003418, "learning_rate": 0.0005403214368765569, "loss": 3.3202, "step": 12504 }, { "epoch": 0.61, "grad_norm": 0.534196674823761, "learning_rate": 0.0005403122208819718, "loss": 3.3616, "step": 12505 }, { "epoch": 0.61, "grad_norm": 0.5107386112213135, "learning_rate": 0.0005403030042544506, "loss": 3.179, "step": 12506 }, { "epoch": 0.61, "grad_norm": 0.5261774659156799, "learning_rate": 0.0005402937869940177, "loss": 3.5787, "step": 12507 }, { "epoch": 0.61, "grad_norm": 0.4821683168411255, "learning_rate": 0.000540284569100697, "loss": 3.2806, "step": 12508 }, { "epoch": 0.61, "grad_norm": 0.5442585349082947, "learning_rate": 0.0005402753505745131, "loss": 3.3793, "step": 12509 }, { "epoch": 0.61, "grad_norm": 0.5089293122291565, "learning_rate": 0.0005402661314154901, "loss": 3.3455, "step": 12510 }, { "epoch": 0.61, "grad_norm": 0.5478642582893372, "learning_rate": 0.0005402569116236525, "loss": 3.1172, "step": 12511 }, { "epoch": 0.61, "grad_norm": 0.4844317138195038, "learning_rate": 0.0005402476911990242, "loss": 3.3636, "step": 12512 }, { "epoch": 0.61, "grad_norm": 0.5121679902076721, "learning_rate": 0.0005402384701416298, "loss": 3.1472, "step": 12513 }, { "epoch": 0.61, "grad_norm": 0.4980253279209137, "learning_rate": 0.0005402292484514935, "loss": 3.3732, "step": 12514 }, { "epoch": 0.61, "grad_norm": 0.49577051401138306, "learning_rate": 0.0005402200261286396, "loss": 3.2947, "step": 12515 }, { "epoch": 0.61, "grad_norm": 0.5190374255180359, "learning_rate": 0.0005402108031730923, "loss": 3.0081, "step": 12516 }, { "epoch": 0.61, "grad_norm": 0.5585235953330994, "learning_rate": 0.0005402015795848762, "loss": 3.2278, "step": 12517 }, { "epoch": 0.61, "grad_norm": 0.5044002532958984, "learning_rate": 0.0005401923553640152, "loss": 3.397, "step": 12518 }, { "epoch": 0.61, "grad_norm": 0.5685398578643799, "learning_rate": 0.0005401831305105339, "loss": 3.2287, "step": 12519 }, { "epoch": 0.61, "grad_norm": 0.5105231404304504, "learning_rate": 0.0005401739050244563, "loss": 3.2115, "step": 12520 }, { "epoch": 0.61, "grad_norm": 0.49602973461151123, "learning_rate": 0.000540164678905807, "loss": 3.2298, "step": 12521 }, { "epoch": 0.61, "grad_norm": 0.5373128056526184, "learning_rate": 0.00054015545215461, "loss": 2.917, "step": 12522 }, { "epoch": 0.61, "grad_norm": 0.5047098994255066, "learning_rate": 0.0005401462247708899, "loss": 3.4197, "step": 12523 }, { "epoch": 0.61, "grad_norm": 0.5524264574050903, "learning_rate": 0.0005401369967546708, "loss": 3.0935, "step": 12524 }, { "epoch": 0.61, "grad_norm": 0.5501293540000916, "learning_rate": 0.000540127768105977, "loss": 3.1648, "step": 12525 }, { "epoch": 0.61, "grad_norm": 0.5153632760047913, "learning_rate": 0.0005401185388248329, "loss": 3.1659, "step": 12526 }, { "epoch": 0.61, "grad_norm": 0.48313668370246887, "learning_rate": 0.0005401093089112628, "loss": 3.0783, "step": 12527 }, { "epoch": 0.61, "grad_norm": 0.5056579113006592, "learning_rate": 0.0005401000783652911, "loss": 3.2297, "step": 12528 }, { "epoch": 0.61, "grad_norm": 0.5305315256118774, "learning_rate": 0.0005400908471869419, "loss": 3.1893, "step": 12529 }, { "epoch": 0.61, "grad_norm": 0.5503982901573181, "learning_rate": 0.0005400816153762396, "loss": 3.1965, "step": 12530 }, { "epoch": 0.61, "grad_norm": 0.4872680902481079, "learning_rate": 0.0005400723829332085, "loss": 3.1287, "step": 12531 }, { "epoch": 0.61, "grad_norm": 0.5678309798240662, "learning_rate": 0.000540063149857873, "loss": 3.1886, "step": 12532 }, { "epoch": 0.61, "grad_norm": 0.4857582151889801, "learning_rate": 0.0005400539161502574, "loss": 3.208, "step": 12533 }, { "epoch": 0.61, "grad_norm": 0.5303822755813599, "learning_rate": 0.000540044681810386, "loss": 3.339, "step": 12534 }, { "epoch": 0.61, "grad_norm": 0.5294134616851807, "learning_rate": 0.000540035446838283, "loss": 3.2367, "step": 12535 }, { "epoch": 0.61, "grad_norm": 0.49902549386024475, "learning_rate": 0.0005400262112339728, "loss": 3.2169, "step": 12536 }, { "epoch": 0.61, "grad_norm": 0.4956739544868469, "learning_rate": 0.0005400169749974798, "loss": 3.1918, "step": 12537 }, { "epoch": 0.61, "grad_norm": 0.5438934564590454, "learning_rate": 0.0005400077381288284, "loss": 3.1434, "step": 12538 }, { "epoch": 0.61, "grad_norm": 0.4825858771800995, "learning_rate": 0.0005399985006280427, "loss": 3.0899, "step": 12539 }, { "epoch": 0.61, "grad_norm": 0.7768726944923401, "learning_rate": 0.0005399892624951472, "loss": 3.2963, "step": 12540 }, { "epoch": 0.61, "grad_norm": 0.5147074460983276, "learning_rate": 0.0005399800237301661, "loss": 3.1657, "step": 12541 }, { "epoch": 0.61, "grad_norm": 0.5385922789573669, "learning_rate": 0.0005399707843331238, "loss": 3.1703, "step": 12542 }, { "epoch": 0.61, "grad_norm": 0.5428267121315002, "learning_rate": 0.0005399615443040447, "loss": 3.0775, "step": 12543 }, { "epoch": 0.61, "grad_norm": 0.5166685581207275, "learning_rate": 0.000539952303642953, "loss": 3.2899, "step": 12544 }, { "epoch": 0.61, "grad_norm": 0.5144116282463074, "learning_rate": 0.0005399430623498732, "loss": 3.3296, "step": 12545 }, { "epoch": 0.61, "grad_norm": 0.49154841899871826, "learning_rate": 0.0005399338204248295, "loss": 3.0634, "step": 12546 }, { "epoch": 0.61, "grad_norm": 0.5445835590362549, "learning_rate": 0.0005399245778678464, "loss": 3.3533, "step": 12547 }, { "epoch": 0.61, "grad_norm": 0.4968005418777466, "learning_rate": 0.0005399153346789479, "loss": 3.3812, "step": 12548 }, { "epoch": 0.61, "grad_norm": 0.5119567513465881, "learning_rate": 0.0005399060908581587, "loss": 3.2191, "step": 12549 }, { "epoch": 0.62, "grad_norm": 0.5225988626480103, "learning_rate": 0.0005398968464055031, "loss": 3.2854, "step": 12550 }, { "epoch": 0.62, "grad_norm": 0.5341635942459106, "learning_rate": 0.0005398876013210053, "loss": 3.2697, "step": 12551 }, { "epoch": 0.62, "grad_norm": 0.5326067805290222, "learning_rate": 0.0005398783556046897, "loss": 3.535, "step": 12552 }, { "epoch": 0.62, "grad_norm": 0.5414754152297974, "learning_rate": 0.0005398691092565808, "loss": 3.3331, "step": 12553 }, { "epoch": 0.62, "grad_norm": 0.5281270146369934, "learning_rate": 0.0005398598622767027, "loss": 3.454, "step": 12554 }, { "epoch": 0.62, "grad_norm": 0.5076268911361694, "learning_rate": 0.0005398506146650799, "loss": 3.4165, "step": 12555 }, { "epoch": 0.62, "grad_norm": 0.5204314589500427, "learning_rate": 0.0005398413664217368, "loss": 3.1195, "step": 12556 }, { "epoch": 0.62, "grad_norm": 0.5105859041213989, "learning_rate": 0.0005398321175466977, "loss": 3.2604, "step": 12557 }, { "epoch": 0.62, "grad_norm": 0.5473527908325195, "learning_rate": 0.0005398228680399869, "loss": 3.1417, "step": 12558 }, { "epoch": 0.62, "grad_norm": 0.4897322356700897, "learning_rate": 0.0005398136179016288, "loss": 3.2681, "step": 12559 }, { "epoch": 0.62, "grad_norm": 0.532136857509613, "learning_rate": 0.000539804367131648, "loss": 3.2423, "step": 12560 }, { "epoch": 0.62, "grad_norm": 0.532255232334137, "learning_rate": 0.0005397951157300684, "loss": 3.3202, "step": 12561 }, { "epoch": 0.62, "grad_norm": 0.531471312046051, "learning_rate": 0.0005397858636969148, "loss": 3.0657, "step": 12562 }, { "epoch": 0.62, "grad_norm": 0.5550201535224915, "learning_rate": 0.0005397766110322112, "loss": 3.203, "step": 12563 }, { "epoch": 0.62, "grad_norm": 0.519936740398407, "learning_rate": 0.0005397673577359822, "loss": 3.2974, "step": 12564 }, { "epoch": 0.62, "grad_norm": 0.4963712990283966, "learning_rate": 0.0005397581038082521, "loss": 3.2021, "step": 12565 }, { "epoch": 0.62, "grad_norm": 0.5781660676002502, "learning_rate": 0.0005397488492490455, "loss": 3.1871, "step": 12566 }, { "epoch": 0.62, "grad_norm": 0.49295535683631897, "learning_rate": 0.0005397395940583864, "loss": 3.127, "step": 12567 }, { "epoch": 0.62, "grad_norm": 0.5267174243927002, "learning_rate": 0.0005397303382362994, "loss": 2.8856, "step": 12568 }, { "epoch": 0.62, "grad_norm": 0.5607599020004272, "learning_rate": 0.0005397210817828088, "loss": 3.1008, "step": 12569 }, { "epoch": 0.62, "grad_norm": 0.5201016068458557, "learning_rate": 0.000539711824697939, "loss": 3.1056, "step": 12570 }, { "epoch": 0.62, "grad_norm": 0.5651845932006836, "learning_rate": 0.0005397025669817144, "loss": 3.2542, "step": 12571 }, { "epoch": 0.62, "grad_norm": 0.5444151163101196, "learning_rate": 0.0005396933086341593, "loss": 3.0831, "step": 12572 }, { "epoch": 0.62, "grad_norm": 0.5270144939422607, "learning_rate": 0.0005396840496552982, "loss": 2.9548, "step": 12573 }, { "epoch": 0.62, "grad_norm": 0.5137320756912231, "learning_rate": 0.0005396747900451555, "loss": 3.1978, "step": 12574 }, { "epoch": 0.62, "grad_norm": 0.5519542694091797, "learning_rate": 0.0005396655298037555, "loss": 3.2202, "step": 12575 }, { "epoch": 0.62, "grad_norm": 0.5144039392471313, "learning_rate": 0.0005396562689311226, "loss": 3.1693, "step": 12576 }, { "epoch": 0.62, "grad_norm": 0.5353249907493591, "learning_rate": 0.0005396470074272812, "loss": 3.404, "step": 12577 }, { "epoch": 0.62, "grad_norm": 0.5302972793579102, "learning_rate": 0.0005396377452922558, "loss": 3.4043, "step": 12578 }, { "epoch": 0.62, "grad_norm": 0.4982793927192688, "learning_rate": 0.0005396284825260705, "loss": 3.2489, "step": 12579 }, { "epoch": 0.62, "grad_norm": 0.5199756026268005, "learning_rate": 0.0005396192191287502, "loss": 3.1789, "step": 12580 }, { "epoch": 0.62, "grad_norm": 0.5148780345916748, "learning_rate": 0.0005396099551003187, "loss": 3.2521, "step": 12581 }, { "epoch": 0.62, "grad_norm": 0.5244265198707581, "learning_rate": 0.0005396006904408009, "loss": 3.3569, "step": 12582 }, { "epoch": 0.62, "grad_norm": 0.5343596935272217, "learning_rate": 0.0005395914251502208, "loss": 3.3348, "step": 12583 }, { "epoch": 0.62, "grad_norm": 0.514910101890564, "learning_rate": 0.0005395821592286031, "loss": 3.4142, "step": 12584 }, { "epoch": 0.62, "grad_norm": 0.5111855864524841, "learning_rate": 0.0005395728926759721, "loss": 3.0777, "step": 12585 }, { "epoch": 0.62, "grad_norm": 0.5080044269561768, "learning_rate": 0.0005395636254923522, "loss": 3.3565, "step": 12586 }, { "epoch": 0.62, "grad_norm": 0.47200343012809753, "learning_rate": 0.0005395543576777679, "loss": 3.1417, "step": 12587 }, { "epoch": 0.62, "grad_norm": 0.5408815741539001, "learning_rate": 0.0005395450892322433, "loss": 3.3413, "step": 12588 }, { "epoch": 0.62, "grad_norm": 0.5425565242767334, "learning_rate": 0.0005395358201558032, "loss": 3.2498, "step": 12589 }, { "epoch": 0.62, "grad_norm": 0.5002942681312561, "learning_rate": 0.0005395265504484719, "loss": 3.2378, "step": 12590 }, { "epoch": 0.62, "grad_norm": 0.5082552433013916, "learning_rate": 0.0005395172801102736, "loss": 3.2024, "step": 12591 }, { "epoch": 0.62, "grad_norm": 0.5224912166595459, "learning_rate": 0.000539508009141233, "loss": 3.215, "step": 12592 }, { "epoch": 0.62, "grad_norm": 0.7015027403831482, "learning_rate": 0.0005394987375413745, "loss": 3.2016, "step": 12593 }, { "epoch": 0.62, "grad_norm": 0.4700780510902405, "learning_rate": 0.0005394894653107222, "loss": 3.3461, "step": 12594 }, { "epoch": 0.62, "grad_norm": 0.49604517221450806, "learning_rate": 0.0005394801924493008, "loss": 3.2246, "step": 12595 }, { "epoch": 0.62, "grad_norm": 0.5033439993858337, "learning_rate": 0.0005394709189571347, "loss": 3.2227, "step": 12596 }, { "epoch": 0.62, "grad_norm": 0.5192400217056274, "learning_rate": 0.0005394616448342483, "loss": 3.3658, "step": 12597 }, { "epoch": 0.62, "grad_norm": 0.5239402055740356, "learning_rate": 0.0005394523700806659, "loss": 2.9431, "step": 12598 }, { "epoch": 0.62, "grad_norm": 0.5193118453025818, "learning_rate": 0.0005394430946964122, "loss": 3.3063, "step": 12599 }, { "epoch": 0.62, "grad_norm": 0.5492883324623108, "learning_rate": 0.0005394338186815114, "loss": 3.1588, "step": 12600 }, { "epoch": 0.62, "grad_norm": 0.48858433961868286, "learning_rate": 0.000539424542035988, "loss": 3.3016, "step": 12601 }, { "epoch": 0.62, "grad_norm": 0.5181791186332703, "learning_rate": 0.0005394152647598664, "loss": 3.2007, "step": 12602 }, { "epoch": 0.62, "grad_norm": 0.5329108238220215, "learning_rate": 0.0005394059868531711, "loss": 3.1491, "step": 12603 }, { "epoch": 0.62, "grad_norm": 0.5719881653785706, "learning_rate": 0.0005393967083159266, "loss": 3.2346, "step": 12604 }, { "epoch": 0.62, "grad_norm": 0.509117841720581, "learning_rate": 0.0005393874291481571, "loss": 3.1653, "step": 12605 }, { "epoch": 0.62, "grad_norm": 0.5386404395103455, "learning_rate": 0.0005393781493498872, "loss": 3.3633, "step": 12606 }, { "epoch": 0.62, "grad_norm": 0.5088648200035095, "learning_rate": 0.0005393688689211413, "loss": 3.1949, "step": 12607 }, { "epoch": 0.62, "grad_norm": 0.5584010481834412, "learning_rate": 0.000539359587861944, "loss": 3.4235, "step": 12608 }, { "epoch": 0.62, "grad_norm": 0.6642793416976929, "learning_rate": 0.0005393503061723196, "loss": 3.3751, "step": 12609 }, { "epoch": 0.62, "grad_norm": 0.5091199278831482, "learning_rate": 0.0005393410238522924, "loss": 3.1341, "step": 12610 }, { "epoch": 0.62, "grad_norm": 0.5205773711204529, "learning_rate": 0.0005393317409018871, "loss": 3.2806, "step": 12611 }, { "epoch": 0.62, "grad_norm": 0.5145056247711182, "learning_rate": 0.0005393224573211281, "loss": 3.3279, "step": 12612 }, { "epoch": 0.62, "grad_norm": 0.5004215240478516, "learning_rate": 0.0005393131731100398, "loss": 3.3095, "step": 12613 }, { "epoch": 0.62, "grad_norm": 0.5290515422821045, "learning_rate": 0.0005393038882686466, "loss": 3.2428, "step": 12614 }, { "epoch": 0.62, "grad_norm": 0.5009429454803467, "learning_rate": 0.000539294602796973, "loss": 3.0593, "step": 12615 }, { "epoch": 0.62, "grad_norm": 0.6031820178031921, "learning_rate": 0.0005392853166950436, "loss": 3.1133, "step": 12616 }, { "epoch": 0.62, "grad_norm": 0.5044199824333191, "learning_rate": 0.0005392760299628825, "loss": 3.3295, "step": 12617 }, { "epoch": 0.62, "grad_norm": 0.582173228263855, "learning_rate": 0.0005392667426005146, "loss": 3.172, "step": 12618 }, { "epoch": 0.62, "grad_norm": 0.5244518518447876, "learning_rate": 0.000539257454607964, "loss": 3.4616, "step": 12619 }, { "epoch": 0.62, "grad_norm": 0.5106006264686584, "learning_rate": 0.0005392481659852554, "loss": 3.453, "step": 12620 }, { "epoch": 0.62, "grad_norm": 0.5275260806083679, "learning_rate": 0.0005392388767324132, "loss": 3.2022, "step": 12621 }, { "epoch": 0.62, "grad_norm": 0.6484015583992004, "learning_rate": 0.0005392295868494617, "loss": 3.1695, "step": 12622 }, { "epoch": 0.62, "grad_norm": 0.5001370310783386, "learning_rate": 0.0005392202963364256, "loss": 3.1173, "step": 12623 }, { "epoch": 0.62, "grad_norm": 0.5128294229507446, "learning_rate": 0.0005392110051933293, "loss": 3.3133, "step": 12624 }, { "epoch": 0.62, "grad_norm": 0.48935312032699585, "learning_rate": 0.0005392017134201973, "loss": 3.2482, "step": 12625 }, { "epoch": 0.62, "grad_norm": 0.546270489692688, "learning_rate": 0.0005391924210170539, "loss": 3.2341, "step": 12626 }, { "epoch": 0.62, "grad_norm": 0.5206765532493591, "learning_rate": 0.0005391831279839237, "loss": 3.1757, "step": 12627 }, { "epoch": 0.62, "grad_norm": 0.5980457067489624, "learning_rate": 0.0005391738343208313, "loss": 3.1083, "step": 12628 }, { "epoch": 0.62, "grad_norm": 0.5202022194862366, "learning_rate": 0.0005391645400278009, "loss": 3.256, "step": 12629 }, { "epoch": 0.62, "grad_norm": 0.5010168552398682, "learning_rate": 0.0005391552451048571, "loss": 3.1127, "step": 12630 }, { "epoch": 0.62, "grad_norm": 0.5590848326683044, "learning_rate": 0.0005391459495520247, "loss": 2.9533, "step": 12631 }, { "epoch": 0.62, "grad_norm": 0.49834656715393066, "learning_rate": 0.0005391366533693276, "loss": 3.1844, "step": 12632 }, { "epoch": 0.62, "grad_norm": 0.5144838690757751, "learning_rate": 0.0005391273565567906, "loss": 3.1728, "step": 12633 }, { "epoch": 0.62, "grad_norm": 0.5475999712944031, "learning_rate": 0.0005391180591144383, "loss": 3.3422, "step": 12634 }, { "epoch": 0.62, "grad_norm": 0.5091504454612732, "learning_rate": 0.000539108761042295, "loss": 3.1376, "step": 12635 }, { "epoch": 0.62, "grad_norm": 0.5275076031684875, "learning_rate": 0.0005390994623403853, "loss": 3.3421, "step": 12636 }, { "epoch": 0.62, "grad_norm": 0.530569851398468, "learning_rate": 0.0005390901630087336, "loss": 3.1941, "step": 12637 }, { "epoch": 0.62, "grad_norm": 0.5221152901649475, "learning_rate": 0.0005390808630473643, "loss": 3.2616, "step": 12638 }, { "epoch": 0.62, "grad_norm": 0.5298863649368286, "learning_rate": 0.0005390715624563021, "loss": 3.2261, "step": 12639 }, { "epoch": 0.62, "grad_norm": 0.513576865196228, "learning_rate": 0.0005390622612355715, "loss": 3.2288, "step": 12640 }, { "epoch": 0.62, "grad_norm": 0.4933810234069824, "learning_rate": 0.0005390529593851969, "loss": 3.261, "step": 12641 }, { "epoch": 0.62, "grad_norm": 0.5063499808311462, "learning_rate": 0.0005390436569052027, "loss": 3.1588, "step": 12642 }, { "epoch": 0.62, "grad_norm": 0.4862244725227356, "learning_rate": 0.0005390343537956135, "loss": 3.3587, "step": 12643 }, { "epoch": 0.62, "grad_norm": 0.49645307660102844, "learning_rate": 0.0005390250500564539, "loss": 3.0493, "step": 12644 }, { "epoch": 0.62, "grad_norm": 0.506386399269104, "learning_rate": 0.0005390157456877483, "loss": 3.3449, "step": 12645 }, { "epoch": 0.62, "grad_norm": 0.5540480613708496, "learning_rate": 0.0005390064406895212, "loss": 3.3795, "step": 12646 }, { "epoch": 0.62, "grad_norm": 0.5100905895233154, "learning_rate": 0.0005389971350617972, "loss": 3.2195, "step": 12647 }, { "epoch": 0.62, "grad_norm": 0.5141370296478271, "learning_rate": 0.0005389878288046007, "loss": 3.3391, "step": 12648 }, { "epoch": 0.62, "grad_norm": 0.49485722184181213, "learning_rate": 0.0005389785219179562, "loss": 3.0948, "step": 12649 }, { "epoch": 0.62, "grad_norm": 0.5505667328834534, "learning_rate": 0.0005389692144018883, "loss": 3.1628, "step": 12650 }, { "epoch": 0.62, "grad_norm": 0.5638545751571655, "learning_rate": 0.0005389599062564216, "loss": 3.0784, "step": 12651 }, { "epoch": 0.62, "grad_norm": 0.5764116048812866, "learning_rate": 0.0005389505974815803, "loss": 3.0416, "step": 12652 }, { "epoch": 0.62, "grad_norm": 0.516578197479248, "learning_rate": 0.0005389412880773891, "loss": 3.4401, "step": 12653 }, { "epoch": 0.62, "grad_norm": 0.5281229019165039, "learning_rate": 0.0005389319780438728, "loss": 3.1235, "step": 12654 }, { "epoch": 0.62, "grad_norm": 0.5611839294433594, "learning_rate": 0.0005389226673810554, "loss": 3.2922, "step": 12655 }, { "epoch": 0.62, "grad_norm": 0.5751029849052429, "learning_rate": 0.0005389133560889617, "loss": 3.0524, "step": 12656 }, { "epoch": 0.62, "grad_norm": 0.5245303511619568, "learning_rate": 0.0005389040441676164, "loss": 3.2482, "step": 12657 }, { "epoch": 0.62, "grad_norm": 0.5082685351371765, "learning_rate": 0.0005388947316170437, "loss": 3.2612, "step": 12658 }, { "epoch": 0.62, "grad_norm": 0.5107821822166443, "learning_rate": 0.0005388854184372682, "loss": 3.3678, "step": 12659 }, { "epoch": 0.62, "grad_norm": 0.5098000168800354, "learning_rate": 0.0005388761046283146, "loss": 3.1828, "step": 12660 }, { "epoch": 0.62, "grad_norm": 0.506803035736084, "learning_rate": 0.0005388667901902071, "loss": 3.3027, "step": 12661 }, { "epoch": 0.62, "grad_norm": 0.5258462429046631, "learning_rate": 0.0005388574751229707, "loss": 3.2059, "step": 12662 }, { "epoch": 0.62, "grad_norm": 0.5080270767211914, "learning_rate": 0.0005388481594266295, "loss": 3.2258, "step": 12663 }, { "epoch": 0.62, "grad_norm": 0.5246100425720215, "learning_rate": 0.0005388388431012083, "loss": 3.2327, "step": 12664 }, { "epoch": 0.62, "grad_norm": 0.5078802108764648, "learning_rate": 0.0005388295261467315, "loss": 3.1975, "step": 12665 }, { "epoch": 0.62, "grad_norm": 0.5111520290374756, "learning_rate": 0.0005388202085632237, "loss": 3.2364, "step": 12666 }, { "epoch": 0.62, "grad_norm": 0.5226684212684631, "learning_rate": 0.0005388108903507095, "loss": 3.118, "step": 12667 }, { "epoch": 0.62, "grad_norm": 0.5708491802215576, "learning_rate": 0.0005388015715092133, "loss": 3.3362, "step": 12668 }, { "epoch": 0.62, "grad_norm": 0.5695480108261108, "learning_rate": 0.0005387922520387597, "loss": 3.2954, "step": 12669 }, { "epoch": 0.62, "grad_norm": 0.4956531822681427, "learning_rate": 0.0005387829319393735, "loss": 3.1396, "step": 12670 }, { "epoch": 0.62, "grad_norm": 0.5610358715057373, "learning_rate": 0.0005387736112110787, "loss": 3.1429, "step": 12671 }, { "epoch": 0.62, "grad_norm": 0.5279606580734253, "learning_rate": 0.0005387642898539004, "loss": 3.1699, "step": 12672 }, { "epoch": 0.62, "grad_norm": 0.5159085988998413, "learning_rate": 0.0005387549678678627, "loss": 3.1701, "step": 12673 }, { "epoch": 0.62, "grad_norm": 0.5232102870941162, "learning_rate": 0.0005387456452529904, "loss": 3.2938, "step": 12674 }, { "epoch": 0.62, "grad_norm": 0.5625350475311279, "learning_rate": 0.0005387363220093082, "loss": 3.1154, "step": 12675 }, { "epoch": 0.62, "grad_norm": 0.524451732635498, "learning_rate": 0.0005387269981368403, "loss": 3.2826, "step": 12676 }, { "epoch": 0.62, "grad_norm": 0.5030291080474854, "learning_rate": 0.0005387176736356116, "loss": 3.0269, "step": 12677 }, { "epoch": 0.62, "grad_norm": 0.5304322242736816, "learning_rate": 0.0005387083485056463, "loss": 3.3728, "step": 12678 }, { "epoch": 0.62, "grad_norm": 0.5527458786964417, "learning_rate": 0.0005386990227469693, "loss": 3.2158, "step": 12679 }, { "epoch": 0.62, "grad_norm": 0.5265601277351379, "learning_rate": 0.0005386896963596049, "loss": 3.3758, "step": 12680 }, { "epoch": 0.62, "grad_norm": 0.5212921500205994, "learning_rate": 0.0005386803693435778, "loss": 3.3079, "step": 12681 }, { "epoch": 0.62, "grad_norm": 0.5151424407958984, "learning_rate": 0.0005386710416989126, "loss": 3.2643, "step": 12682 }, { "epoch": 0.62, "grad_norm": 0.5002523064613342, "learning_rate": 0.0005386617134256337, "loss": 3.2302, "step": 12683 }, { "epoch": 0.62, "grad_norm": 0.5380643010139465, "learning_rate": 0.0005386523845237659, "loss": 3.1046, "step": 12684 }, { "epoch": 0.62, "grad_norm": 0.5118758082389832, "learning_rate": 0.0005386430549933336, "loss": 3.192, "step": 12685 }, { "epoch": 0.62, "grad_norm": 0.5095431804656982, "learning_rate": 0.0005386337248343613, "loss": 3.3801, "step": 12686 }, { "epoch": 0.62, "grad_norm": 0.5566089749336243, "learning_rate": 0.0005386243940468738, "loss": 3.0442, "step": 12687 }, { "epoch": 0.62, "grad_norm": 0.4650208055973053, "learning_rate": 0.0005386150626308957, "loss": 3.0336, "step": 12688 }, { "epoch": 0.62, "grad_norm": 0.5460222959518433, "learning_rate": 0.0005386057305864513, "loss": 3.206, "step": 12689 }, { "epoch": 0.62, "grad_norm": 0.5706539154052734, "learning_rate": 0.0005385963979135653, "loss": 3.3201, "step": 12690 }, { "epoch": 0.62, "grad_norm": 0.5206764936447144, "learning_rate": 0.0005385870646122624, "loss": 3.2854, "step": 12691 }, { "epoch": 0.62, "grad_norm": 0.4984837770462036, "learning_rate": 0.000538577730682567, "loss": 3.2708, "step": 12692 }, { "epoch": 0.62, "grad_norm": 0.5224167108535767, "learning_rate": 0.0005385683961245039, "loss": 3.2557, "step": 12693 }, { "epoch": 0.62, "grad_norm": 0.5294681787490845, "learning_rate": 0.0005385590609380974, "loss": 3.1294, "step": 12694 }, { "epoch": 0.62, "grad_norm": 0.544699490070343, "learning_rate": 0.0005385497251233724, "loss": 3.3876, "step": 12695 }, { "epoch": 0.62, "grad_norm": 0.5481709241867065, "learning_rate": 0.0005385403886803532, "loss": 3.2215, "step": 12696 }, { "epoch": 0.62, "grad_norm": 0.4763781428337097, "learning_rate": 0.0005385310516090646, "loss": 3.4355, "step": 12697 }, { "epoch": 0.62, "grad_norm": 0.48930293321609497, "learning_rate": 0.0005385217139095311, "loss": 3.1579, "step": 12698 }, { "epoch": 0.62, "grad_norm": 0.48442885279655457, "learning_rate": 0.0005385123755817773, "loss": 3.3198, "step": 12699 }, { "epoch": 0.62, "grad_norm": 0.551783561706543, "learning_rate": 0.0005385030366258278, "loss": 3.3245, "step": 12700 }, { "epoch": 0.62, "grad_norm": 0.4957975149154663, "learning_rate": 0.0005384936970417073, "loss": 3.1174, "step": 12701 }, { "epoch": 0.62, "grad_norm": 0.5431187152862549, "learning_rate": 0.0005384843568294401, "loss": 3.0538, "step": 12702 }, { "epoch": 0.62, "grad_norm": 0.5256251096725464, "learning_rate": 0.0005384750159890512, "loss": 3.1746, "step": 12703 }, { "epoch": 0.62, "grad_norm": 0.5222598910331726, "learning_rate": 0.0005384656745205649, "loss": 3.4051, "step": 12704 }, { "epoch": 0.62, "grad_norm": 0.5458401441574097, "learning_rate": 0.000538456332424006, "loss": 3.1197, "step": 12705 }, { "epoch": 0.62, "grad_norm": 0.4959386885166168, "learning_rate": 0.0005384469896993989, "loss": 3.2583, "step": 12706 }, { "epoch": 0.62, "grad_norm": 0.5014752745628357, "learning_rate": 0.0005384376463467683, "loss": 3.1556, "step": 12707 }, { "epoch": 0.62, "grad_norm": 0.491812527179718, "learning_rate": 0.0005384283023661389, "loss": 3.2109, "step": 12708 }, { "epoch": 0.62, "grad_norm": 0.5517925024032593, "learning_rate": 0.0005384189577575352, "loss": 3.2206, "step": 12709 }, { "epoch": 0.62, "grad_norm": 0.504330039024353, "learning_rate": 0.000538409612520982, "loss": 3.1117, "step": 12710 }, { "epoch": 0.62, "grad_norm": 0.5130345225334167, "learning_rate": 0.0005384002666565036, "loss": 3.1259, "step": 12711 }, { "epoch": 0.62, "grad_norm": 0.511404275894165, "learning_rate": 0.0005383909201641247, "loss": 3.2433, "step": 12712 }, { "epoch": 0.62, "grad_norm": 0.5336930155754089, "learning_rate": 0.0005383815730438702, "loss": 3.3069, "step": 12713 }, { "epoch": 0.62, "grad_norm": 0.5323059558868408, "learning_rate": 0.0005383722252957644, "loss": 3.0323, "step": 12714 }, { "epoch": 0.62, "grad_norm": 0.540348470211029, "learning_rate": 0.000538362876919832, "loss": 2.9597, "step": 12715 }, { "epoch": 0.62, "grad_norm": 0.5608701109886169, "learning_rate": 0.0005383535279160978, "loss": 3.2359, "step": 12716 }, { "epoch": 0.62, "grad_norm": 0.5046235918998718, "learning_rate": 0.0005383441782845863, "loss": 3.0872, "step": 12717 }, { "epoch": 0.62, "grad_norm": 0.5153566002845764, "learning_rate": 0.0005383348280253219, "loss": 3.1468, "step": 12718 }, { "epoch": 0.62, "grad_norm": 0.521328866481781, "learning_rate": 0.0005383254771383296, "loss": 3.3589, "step": 12719 }, { "epoch": 0.62, "grad_norm": 0.505582332611084, "learning_rate": 0.0005383161256236337, "loss": 3.3211, "step": 12720 }, { "epoch": 0.62, "grad_norm": 0.5521395206451416, "learning_rate": 0.0005383067734812592, "loss": 3.3091, "step": 12721 }, { "epoch": 0.62, "grad_norm": 0.5566513538360596, "learning_rate": 0.0005382974207112304, "loss": 3.1199, "step": 12722 }, { "epoch": 0.62, "grad_norm": 0.5230540633201599, "learning_rate": 0.000538288067313572, "loss": 3.3551, "step": 12723 }, { "epoch": 0.62, "grad_norm": 0.5009475350379944, "learning_rate": 0.0005382787132883087, "loss": 3.4286, "step": 12724 }, { "epoch": 0.62, "grad_norm": 0.5285469889640808, "learning_rate": 0.0005382693586354653, "loss": 3.1925, "step": 12725 }, { "epoch": 0.62, "grad_norm": 0.5307457447052002, "learning_rate": 0.000538260003355066, "loss": 3.2882, "step": 12726 }, { "epoch": 0.62, "grad_norm": 0.5530621409416199, "learning_rate": 0.0005382506474471359, "loss": 3.1743, "step": 12727 }, { "epoch": 0.62, "grad_norm": 0.5210942625999451, "learning_rate": 0.0005382412909116993, "loss": 3.0778, "step": 12728 }, { "epoch": 0.62, "grad_norm": 0.4954610764980316, "learning_rate": 0.0005382319337487812, "loss": 3.3454, "step": 12729 }, { "epoch": 0.62, "grad_norm": 0.5154182314872742, "learning_rate": 0.0005382225759584058, "loss": 3.3812, "step": 12730 }, { "epoch": 0.62, "grad_norm": 0.5248116850852966, "learning_rate": 0.0005382132175405982, "loss": 3.4016, "step": 12731 }, { "epoch": 0.62, "grad_norm": 0.524318277835846, "learning_rate": 0.0005382038584953828, "loss": 3.1419, "step": 12732 }, { "epoch": 0.62, "grad_norm": 0.5205380916595459, "learning_rate": 0.0005381944988227842, "loss": 3.2683, "step": 12733 }, { "epoch": 0.62, "grad_norm": 0.5063703060150146, "learning_rate": 0.000538185138522827, "loss": 3.3365, "step": 12734 }, { "epoch": 0.62, "grad_norm": 0.5397539734840393, "learning_rate": 0.0005381757775955362, "loss": 3.23, "step": 12735 }, { "epoch": 0.62, "grad_norm": 0.5201455950737, "learning_rate": 0.0005381664160409362, "loss": 3.2598, "step": 12736 }, { "epoch": 0.62, "grad_norm": 0.5498931407928467, "learning_rate": 0.0005381570538590517, "loss": 3.2865, "step": 12737 }, { "epoch": 0.62, "grad_norm": 0.4814511239528656, "learning_rate": 0.0005381476910499073, "loss": 3.4717, "step": 12738 }, { "epoch": 0.62, "grad_norm": 0.49921301007270813, "learning_rate": 0.0005381383276135277, "loss": 3.3063, "step": 12739 }, { "epoch": 0.62, "grad_norm": 0.5252735614776611, "learning_rate": 0.0005381289635499376, "loss": 3.1865, "step": 12740 }, { "epoch": 0.62, "grad_norm": 0.5268913507461548, "learning_rate": 0.0005381195988591617, "loss": 3.1435, "step": 12741 }, { "epoch": 0.62, "grad_norm": 0.4931754171848297, "learning_rate": 0.0005381102335412245, "loss": 3.4524, "step": 12742 }, { "epoch": 0.62, "grad_norm": 0.5258716940879822, "learning_rate": 0.0005381008675961509, "loss": 3.3626, "step": 12743 }, { "epoch": 0.62, "grad_norm": 0.5764732360839844, "learning_rate": 0.0005380915010239654, "loss": 3.3538, "step": 12744 }, { "epoch": 0.62, "grad_norm": 0.49837490916252136, "learning_rate": 0.0005380821338246926, "loss": 3.1224, "step": 12745 }, { "epoch": 0.62, "grad_norm": 0.5089791417121887, "learning_rate": 0.0005380727659983573, "loss": 3.1517, "step": 12746 }, { "epoch": 0.62, "grad_norm": 0.5005397796630859, "learning_rate": 0.0005380633975449842, "loss": 3.1022, "step": 12747 }, { "epoch": 0.62, "grad_norm": 0.5430983901023865, "learning_rate": 0.0005380540284645979, "loss": 3.3593, "step": 12748 }, { "epoch": 0.62, "grad_norm": 0.5346560478210449, "learning_rate": 0.0005380446587572231, "loss": 3.2986, "step": 12749 }, { "epoch": 0.62, "grad_norm": 0.5033416152000427, "learning_rate": 0.0005380352884228846, "loss": 3.4088, "step": 12750 }, { "epoch": 0.62, "grad_norm": 0.49493205547332764, "learning_rate": 0.0005380259174616068, "loss": 3.3954, "step": 12751 }, { "epoch": 0.62, "grad_norm": 0.49495062232017517, "learning_rate": 0.0005380165458734147, "loss": 3.3133, "step": 12752 }, { "epoch": 0.62, "grad_norm": 0.5404918193817139, "learning_rate": 0.0005380071736583327, "loss": 3.1616, "step": 12753 }, { "epoch": 0.63, "grad_norm": 0.5051382184028625, "learning_rate": 0.0005379978008163857, "loss": 3.103, "step": 12754 }, { "epoch": 0.63, "grad_norm": 0.5010060667991638, "learning_rate": 0.0005379884273475982, "loss": 3.4863, "step": 12755 }, { "epoch": 0.63, "grad_norm": 0.4935106933116913, "learning_rate": 0.0005379790532519951, "loss": 3.2361, "step": 12756 }, { "epoch": 0.63, "grad_norm": 0.5263615846633911, "learning_rate": 0.0005379696785296008, "loss": 3.1684, "step": 12757 }, { "epoch": 0.63, "grad_norm": 0.5205957293510437, "learning_rate": 0.0005379603031804404, "loss": 3.1844, "step": 12758 }, { "epoch": 0.63, "grad_norm": 0.5138325095176697, "learning_rate": 0.0005379509272045381, "loss": 3.2793, "step": 12759 }, { "epoch": 0.63, "grad_norm": 0.5030960440635681, "learning_rate": 0.000537941550601919, "loss": 3.3753, "step": 12760 }, { "epoch": 0.63, "grad_norm": 0.49174514412879944, "learning_rate": 0.0005379321733726077, "loss": 3.2894, "step": 12761 }, { "epoch": 0.63, "grad_norm": 0.5075502395629883, "learning_rate": 0.0005379227955166287, "loss": 3.1492, "step": 12762 }, { "epoch": 0.63, "grad_norm": 0.47649258375167847, "learning_rate": 0.0005379134170340069, "loss": 3.2046, "step": 12763 }, { "epoch": 0.63, "grad_norm": 0.5589198470115662, "learning_rate": 0.000537904037924767, "loss": 3.3309, "step": 12764 }, { "epoch": 0.63, "grad_norm": 0.47373253107070923, "learning_rate": 0.0005378946581889336, "loss": 3.1731, "step": 12765 }, { "epoch": 0.63, "grad_norm": 0.5284726023674011, "learning_rate": 0.0005378852778265315, "loss": 3.2825, "step": 12766 }, { "epoch": 0.63, "grad_norm": 0.5329920053482056, "learning_rate": 0.0005378758968375854, "loss": 3.1192, "step": 12767 }, { "epoch": 0.63, "grad_norm": 0.5024005770683289, "learning_rate": 0.0005378665152221198, "loss": 3.2978, "step": 12768 }, { "epoch": 0.63, "grad_norm": 0.49619409441947937, "learning_rate": 0.0005378571329801596, "loss": 3.3436, "step": 12769 }, { "epoch": 0.63, "grad_norm": 0.5017001628875732, "learning_rate": 0.0005378477501117296, "loss": 3.3643, "step": 12770 }, { "epoch": 0.63, "grad_norm": 0.5197016596794128, "learning_rate": 0.0005378383666168545, "loss": 3.1311, "step": 12771 }, { "epoch": 0.63, "grad_norm": 0.5079033374786377, "learning_rate": 0.0005378289824955587, "loss": 3.3026, "step": 12772 }, { "epoch": 0.63, "grad_norm": 0.4886915981769562, "learning_rate": 0.0005378195977478672, "loss": 3.2003, "step": 12773 }, { "epoch": 0.63, "grad_norm": 0.51219242811203, "learning_rate": 0.0005378102123738046, "loss": 3.3336, "step": 12774 }, { "epoch": 0.63, "grad_norm": 0.5035862326622009, "learning_rate": 0.0005378008263733959, "loss": 3.1417, "step": 12775 }, { "epoch": 0.63, "grad_norm": 0.5228604078292847, "learning_rate": 0.0005377914397466653, "loss": 3.2611, "step": 12776 }, { "epoch": 0.63, "grad_norm": 0.5679159760475159, "learning_rate": 0.000537782052493638, "loss": 3.4281, "step": 12777 }, { "epoch": 0.63, "grad_norm": 0.5072061419487, "learning_rate": 0.0005377726646143384, "loss": 3.1225, "step": 12778 }, { "epoch": 0.63, "grad_norm": 0.4947572350502014, "learning_rate": 0.0005377632761087915, "loss": 3.2278, "step": 12779 }, { "epoch": 0.63, "grad_norm": 0.5147088170051575, "learning_rate": 0.0005377538869770218, "loss": 3.0943, "step": 12780 }, { "epoch": 0.63, "grad_norm": 0.4995141923427582, "learning_rate": 0.0005377444972190541, "loss": 3.4808, "step": 12781 }, { "epoch": 0.63, "grad_norm": 0.5687293410301208, "learning_rate": 0.0005377351068349132, "loss": 3.3684, "step": 12782 }, { "epoch": 0.63, "grad_norm": 0.4961227476596832, "learning_rate": 0.0005377257158246237, "loss": 3.145, "step": 12783 }, { "epoch": 0.63, "grad_norm": 0.5262210965156555, "learning_rate": 0.0005377163241882105, "loss": 3.2739, "step": 12784 }, { "epoch": 0.63, "grad_norm": 0.47961992025375366, "learning_rate": 0.0005377069319256983, "loss": 3.0336, "step": 12785 }, { "epoch": 0.63, "grad_norm": 0.5090808272361755, "learning_rate": 0.0005376975390371116, "loss": 3.2362, "step": 12786 }, { "epoch": 0.63, "grad_norm": 0.5485349297523499, "learning_rate": 0.0005376881455224755, "loss": 2.8677, "step": 12787 }, { "epoch": 0.63, "grad_norm": 0.5236116647720337, "learning_rate": 0.0005376787513818145, "loss": 3.3714, "step": 12788 }, { "epoch": 0.63, "grad_norm": 0.5215224623680115, "learning_rate": 0.0005376693566151535, "loss": 3.1176, "step": 12789 }, { "epoch": 0.63, "grad_norm": 1.1414709091186523, "learning_rate": 0.0005376599612225171, "loss": 3.4685, "step": 12790 }, { "epoch": 0.63, "grad_norm": 0.5067881345748901, "learning_rate": 0.00053765056520393, "loss": 3.2304, "step": 12791 }, { "epoch": 0.63, "grad_norm": 0.5385640263557434, "learning_rate": 0.0005376411685594171, "loss": 3.2995, "step": 12792 }, { "epoch": 0.63, "grad_norm": 0.6865431666374207, "learning_rate": 0.0005376317712890032, "loss": 3.2136, "step": 12793 }, { "epoch": 0.63, "grad_norm": 0.5289443135261536, "learning_rate": 0.0005376223733927129, "loss": 3.317, "step": 12794 }, { "epoch": 0.63, "grad_norm": 0.5437031984329224, "learning_rate": 0.0005376129748705709, "loss": 3.1993, "step": 12795 }, { "epoch": 0.63, "grad_norm": 0.53926020860672, "learning_rate": 0.0005376035757226022, "loss": 3.2593, "step": 12796 }, { "epoch": 0.63, "grad_norm": 0.6048049330711365, "learning_rate": 0.0005375941759488313, "loss": 3.2746, "step": 12797 }, { "epoch": 0.63, "grad_norm": 0.5292719602584839, "learning_rate": 0.0005375847755492831, "loss": 3.1617, "step": 12798 }, { "epoch": 0.63, "grad_norm": 0.5354344844818115, "learning_rate": 0.0005375753745239823, "loss": 3.1443, "step": 12799 }, { "epoch": 0.63, "grad_norm": 0.5445627570152283, "learning_rate": 0.0005375659728729537, "loss": 3.2476, "step": 12800 }, { "epoch": 0.63, "grad_norm": 0.5120712518692017, "learning_rate": 0.0005375565705962222, "loss": 3.1865, "step": 12801 }, { "epoch": 0.63, "grad_norm": 0.5278573036193848, "learning_rate": 0.0005375471676938123, "loss": 3.3421, "step": 12802 }, { "epoch": 0.63, "grad_norm": 0.4721059501171112, "learning_rate": 0.0005375377641657487, "loss": 3.3415, "step": 12803 }, { "epoch": 0.63, "grad_norm": 0.5143515467643738, "learning_rate": 0.0005375283600120565, "loss": 3.4199, "step": 12804 }, { "epoch": 0.63, "grad_norm": 0.5790268778800964, "learning_rate": 0.0005375189552327604, "loss": 3.0928, "step": 12805 }, { "epoch": 0.63, "grad_norm": 0.5175043940544128, "learning_rate": 0.000537509549827885, "loss": 3.5718, "step": 12806 }, { "epoch": 0.63, "grad_norm": 0.5538207292556763, "learning_rate": 0.0005375001437974552, "loss": 3.1262, "step": 12807 }, { "epoch": 0.63, "grad_norm": 0.49472522735595703, "learning_rate": 0.0005374907371414956, "loss": 3.3913, "step": 12808 }, { "epoch": 0.63, "grad_norm": 0.5046636462211609, "learning_rate": 0.0005374813298600312, "loss": 3.2723, "step": 12809 }, { "epoch": 0.63, "grad_norm": 0.538169264793396, "learning_rate": 0.0005374719219530867, "loss": 3.3067, "step": 12810 }, { "epoch": 0.63, "grad_norm": 0.5092310309410095, "learning_rate": 0.0005374625134206868, "loss": 3.0894, "step": 12811 }, { "epoch": 0.63, "grad_norm": 0.5437710881233215, "learning_rate": 0.0005374531042628564, "loss": 3.0988, "step": 12812 }, { "epoch": 0.63, "grad_norm": 0.5265883207321167, "learning_rate": 0.0005374436944796202, "loss": 3.1499, "step": 12813 }, { "epoch": 0.63, "grad_norm": 0.5095208287239075, "learning_rate": 0.0005374342840710029, "loss": 3.3266, "step": 12814 }, { "epoch": 0.63, "grad_norm": 0.5680007338523865, "learning_rate": 0.0005374248730370295, "loss": 3.3421, "step": 12815 }, { "epoch": 0.63, "grad_norm": 0.5302740931510925, "learning_rate": 0.0005374154613777246, "loss": 3.314, "step": 12816 }, { "epoch": 0.63, "grad_norm": 0.5018924474716187, "learning_rate": 0.0005374060490931132, "loss": 3.4018, "step": 12817 }, { "epoch": 0.63, "grad_norm": 0.5072476267814636, "learning_rate": 0.0005373966361832199, "loss": 3.1629, "step": 12818 }, { "epoch": 0.63, "grad_norm": 0.5976406931877136, "learning_rate": 0.0005373872226480695, "loss": 3.2877, "step": 12819 }, { "epoch": 0.63, "grad_norm": 0.5225663781166077, "learning_rate": 0.0005373778084876869, "loss": 3.3186, "step": 12820 }, { "epoch": 0.63, "grad_norm": 0.5068392753601074, "learning_rate": 0.0005373683937020967, "loss": 3.1388, "step": 12821 }, { "epoch": 0.63, "grad_norm": 0.5025680065155029, "learning_rate": 0.000537358978291324, "loss": 3.3463, "step": 12822 }, { "epoch": 0.63, "grad_norm": 0.5425643920898438, "learning_rate": 0.0005373495622553934, "loss": 3.1294, "step": 12823 }, { "epoch": 0.63, "grad_norm": 0.5345323085784912, "learning_rate": 0.0005373401455943298, "loss": 3.2662, "step": 12824 }, { "epoch": 0.63, "grad_norm": 0.5626019239425659, "learning_rate": 0.0005373307283081577, "loss": 3.4571, "step": 12825 }, { "epoch": 0.63, "grad_norm": 0.5290101170539856, "learning_rate": 0.0005373213103969024, "loss": 3.3035, "step": 12826 }, { "epoch": 0.63, "grad_norm": 0.5548025965690613, "learning_rate": 0.0005373118918605883, "loss": 3.1378, "step": 12827 }, { "epoch": 0.63, "grad_norm": 0.5364861488342285, "learning_rate": 0.0005373024726992403, "loss": 3.3674, "step": 12828 }, { "epoch": 0.63, "grad_norm": 0.5761748552322388, "learning_rate": 0.0005372930529128833, "loss": 3.2908, "step": 12829 }, { "epoch": 0.63, "grad_norm": 0.5271551012992859, "learning_rate": 0.0005372836325015422, "loss": 3.2864, "step": 12830 }, { "epoch": 0.63, "grad_norm": 0.5387271046638489, "learning_rate": 0.0005372742114652415, "loss": 3.0969, "step": 12831 }, { "epoch": 0.63, "grad_norm": 0.48338204622268677, "learning_rate": 0.0005372647898040062, "loss": 3.1388, "step": 12832 }, { "epoch": 0.63, "grad_norm": 0.5032204389572144, "learning_rate": 0.0005372553675178612, "loss": 3.0124, "step": 12833 }, { "epoch": 0.63, "grad_norm": 0.5275563597679138, "learning_rate": 0.0005372459446068312, "loss": 3.3673, "step": 12834 }, { "epoch": 0.63, "grad_norm": 0.5216962695121765, "learning_rate": 0.000537236521070941, "loss": 3.2244, "step": 12835 }, { "epoch": 0.63, "grad_norm": 0.4898160398006439, "learning_rate": 0.0005372270969102156, "loss": 3.2835, "step": 12836 }, { "epoch": 0.63, "grad_norm": 0.5112550854682922, "learning_rate": 0.0005372176721246795, "loss": 3.2138, "step": 12837 }, { "epoch": 0.63, "grad_norm": 0.541236937046051, "learning_rate": 0.0005372082467143578, "loss": 3.1371, "step": 12838 }, { "epoch": 0.63, "grad_norm": 0.5265771150588989, "learning_rate": 0.0005371988206792752, "loss": 3.2119, "step": 12839 }, { "epoch": 0.63, "grad_norm": 0.5151164531707764, "learning_rate": 0.0005371893940194566, "loss": 3.1975, "step": 12840 }, { "epoch": 0.63, "grad_norm": 0.4924558997154236, "learning_rate": 0.0005371799667349267, "loss": 3.1876, "step": 12841 }, { "epoch": 0.63, "grad_norm": 0.5264354348182678, "learning_rate": 0.0005371705388257105, "loss": 3.152, "step": 12842 }, { "epoch": 0.63, "grad_norm": 0.5298112034797668, "learning_rate": 0.0005371611102918327, "loss": 3.111, "step": 12843 }, { "epoch": 0.63, "grad_norm": 0.5277727842330933, "learning_rate": 0.0005371516811333182, "loss": 3.1264, "step": 12844 }, { "epoch": 0.63, "grad_norm": 0.5544726252555847, "learning_rate": 0.0005371422513501919, "loss": 3.0475, "step": 12845 }, { "epoch": 0.63, "grad_norm": 0.4931381940841675, "learning_rate": 0.0005371328209424783, "loss": 3.3639, "step": 12846 }, { "epoch": 0.63, "grad_norm": 0.4993634819984436, "learning_rate": 0.0005371233899102027, "loss": 3.1516, "step": 12847 }, { "epoch": 0.63, "grad_norm": 0.5138599276542664, "learning_rate": 0.0005371139582533896, "loss": 3.198, "step": 12848 }, { "epoch": 0.63, "grad_norm": 0.5093522071838379, "learning_rate": 0.000537104525972064, "loss": 3.03, "step": 12849 }, { "epoch": 0.63, "grad_norm": 0.4839041531085968, "learning_rate": 0.0005370950930662508, "loss": 3.2278, "step": 12850 }, { "epoch": 0.63, "grad_norm": 0.48701995611190796, "learning_rate": 0.0005370856595359746, "loss": 3.2649, "step": 12851 }, { "epoch": 0.63, "grad_norm": 0.5446614027023315, "learning_rate": 0.0005370762253812605, "loss": 3.0454, "step": 12852 }, { "epoch": 0.63, "grad_norm": 0.500033438205719, "learning_rate": 0.0005370667906021332, "loss": 3.0629, "step": 12853 }, { "epoch": 0.63, "grad_norm": 0.5227843523025513, "learning_rate": 0.0005370573551986178, "loss": 3.3169, "step": 12854 }, { "epoch": 0.63, "grad_norm": 0.4906866252422333, "learning_rate": 0.0005370479191707387, "loss": 3.2001, "step": 12855 }, { "epoch": 0.63, "grad_norm": 0.5372707843780518, "learning_rate": 0.0005370384825185211, "loss": 3.0848, "step": 12856 }, { "epoch": 0.63, "grad_norm": 0.5140169262886047, "learning_rate": 0.0005370290452419898, "loss": 3.3082, "step": 12857 }, { "epoch": 0.63, "grad_norm": 0.5246527791023254, "learning_rate": 0.0005370196073411696, "loss": 3.0989, "step": 12858 }, { "epoch": 0.63, "grad_norm": 0.5074948668479919, "learning_rate": 0.0005370101688160852, "loss": 3.1239, "step": 12859 }, { "epoch": 0.63, "grad_norm": 0.5050709843635559, "learning_rate": 0.0005370007296667617, "loss": 3.1019, "step": 12860 }, { "epoch": 0.63, "grad_norm": 0.5377639532089233, "learning_rate": 0.000536991289893224, "loss": 3.1435, "step": 12861 }, { "epoch": 0.63, "grad_norm": 0.526069164276123, "learning_rate": 0.0005369818494954968, "loss": 3.48, "step": 12862 }, { "epoch": 0.63, "grad_norm": 0.47031062841415405, "learning_rate": 0.0005369724084736051, "loss": 3.3851, "step": 12863 }, { "epoch": 0.63, "grad_norm": 0.510369598865509, "learning_rate": 0.0005369629668275736, "loss": 3.2161, "step": 12864 }, { "epoch": 0.63, "grad_norm": 0.5503350496292114, "learning_rate": 0.0005369535245574272, "loss": 3.0545, "step": 12865 }, { "epoch": 0.63, "grad_norm": 0.48866593837738037, "learning_rate": 0.0005369440816631909, "loss": 3.3215, "step": 12866 }, { "epoch": 0.63, "grad_norm": 0.553887665271759, "learning_rate": 0.0005369346381448894, "loss": 3.1592, "step": 12867 }, { "epoch": 0.63, "grad_norm": 0.5026930570602417, "learning_rate": 0.0005369251940025478, "loss": 3.1913, "step": 12868 }, { "epoch": 0.63, "grad_norm": 0.49965840578079224, "learning_rate": 0.0005369157492361907, "loss": 3.1695, "step": 12869 }, { "epoch": 0.63, "grad_norm": 0.5119639039039612, "learning_rate": 0.0005369063038458432, "loss": 3.1687, "step": 12870 }, { "epoch": 0.63, "grad_norm": 0.5644335746765137, "learning_rate": 0.00053689685783153, "loss": 3.3533, "step": 12871 }, { "epoch": 0.63, "grad_norm": 0.5100526213645935, "learning_rate": 0.0005368874111932761, "loss": 3.2576, "step": 12872 }, { "epoch": 0.63, "grad_norm": 0.5507298111915588, "learning_rate": 0.0005368779639311064, "loss": 3.2868, "step": 12873 }, { "epoch": 0.63, "grad_norm": 0.519503116607666, "learning_rate": 0.0005368685160450457, "loss": 3.2348, "step": 12874 }, { "epoch": 0.63, "grad_norm": 0.5227647423744202, "learning_rate": 0.000536859067535119, "loss": 3.1734, "step": 12875 }, { "epoch": 0.63, "grad_norm": 0.5177075862884521, "learning_rate": 0.000536849618401351, "loss": 2.9512, "step": 12876 }, { "epoch": 0.63, "grad_norm": 0.49735453724861145, "learning_rate": 0.0005368401686437667, "loss": 3.137, "step": 12877 }, { "epoch": 0.63, "grad_norm": 0.5045885443687439, "learning_rate": 0.0005368307182623909, "loss": 3.2829, "step": 12878 }, { "epoch": 0.63, "grad_norm": 0.4974973499774933, "learning_rate": 0.0005368212672572487, "loss": 3.4202, "step": 12879 }, { "epoch": 0.63, "grad_norm": 0.49687159061431885, "learning_rate": 0.0005368118156283648, "loss": 3.3512, "step": 12880 }, { "epoch": 0.63, "grad_norm": 0.5176152586936951, "learning_rate": 0.0005368023633757642, "loss": 3.3083, "step": 12881 }, { "epoch": 0.63, "grad_norm": 0.4925430417060852, "learning_rate": 0.0005367929104994717, "loss": 3.1931, "step": 12882 }, { "epoch": 0.63, "grad_norm": 0.5005999207496643, "learning_rate": 0.0005367834569995122, "loss": 3.2917, "step": 12883 }, { "epoch": 0.63, "grad_norm": 0.5244042277336121, "learning_rate": 0.0005367740028759108, "loss": 3.0702, "step": 12884 }, { "epoch": 0.63, "grad_norm": 0.567849338054657, "learning_rate": 0.0005367645481286921, "loss": 3.2763, "step": 12885 }, { "epoch": 0.63, "grad_norm": 0.5182596445083618, "learning_rate": 0.0005367550927578812, "loss": 3.2015, "step": 12886 }, { "epoch": 0.63, "grad_norm": 0.5369324684143066, "learning_rate": 0.0005367456367635029, "loss": 3.1494, "step": 12887 }, { "epoch": 0.63, "grad_norm": 0.5310854911804199, "learning_rate": 0.0005367361801455823, "loss": 3.4733, "step": 12888 }, { "epoch": 0.63, "grad_norm": 0.4995528757572174, "learning_rate": 0.0005367267229041441, "loss": 3.316, "step": 12889 }, { "epoch": 0.63, "grad_norm": 0.5234096646308899, "learning_rate": 0.000536717265039213, "loss": 3.3181, "step": 12890 }, { "epoch": 0.63, "grad_norm": 0.5279734134674072, "learning_rate": 0.0005367078065508146, "loss": 3.3797, "step": 12891 }, { "epoch": 0.63, "grad_norm": 0.563451886177063, "learning_rate": 0.0005366983474389732, "loss": 3.2402, "step": 12892 }, { "epoch": 0.63, "grad_norm": 0.5074998140335083, "learning_rate": 0.0005366888877037138, "loss": 3.2033, "step": 12893 }, { "epoch": 0.63, "grad_norm": 0.5854350924491882, "learning_rate": 0.0005366794273450615, "loss": 3.3255, "step": 12894 }, { "epoch": 0.63, "grad_norm": 0.5305065512657166, "learning_rate": 0.0005366699663630413, "loss": 3.1053, "step": 12895 }, { "epoch": 0.63, "grad_norm": 0.5451061725616455, "learning_rate": 0.0005366605047576778, "loss": 3.2225, "step": 12896 }, { "epoch": 0.63, "grad_norm": 0.48388218879699707, "learning_rate": 0.000536651042528996, "loss": 3.3632, "step": 12897 }, { "epoch": 0.63, "grad_norm": 0.5109940767288208, "learning_rate": 0.000536641579677021, "loss": 3.1846, "step": 12898 }, { "epoch": 0.63, "grad_norm": 0.5055460333824158, "learning_rate": 0.0005366321162017778, "loss": 3.1477, "step": 12899 }, { "epoch": 0.63, "grad_norm": 0.5371001958847046, "learning_rate": 0.0005366226521032908, "loss": 3.1253, "step": 12900 }, { "epoch": 0.63, "grad_norm": 0.5191012024879456, "learning_rate": 0.0005366131873815855, "loss": 3.2262, "step": 12901 }, { "epoch": 0.63, "grad_norm": 0.5225028991699219, "learning_rate": 0.0005366037220366866, "loss": 3.5542, "step": 12902 }, { "epoch": 0.63, "grad_norm": 0.49432340264320374, "learning_rate": 0.0005365942560686189, "loss": 3.3985, "step": 12903 }, { "epoch": 0.63, "grad_norm": 0.5132206082344055, "learning_rate": 0.0005365847894774076, "loss": 3.161, "step": 12904 }, { "epoch": 0.63, "grad_norm": 0.550641655921936, "learning_rate": 0.0005365753222630774, "loss": 3.4166, "step": 12905 }, { "epoch": 0.63, "grad_norm": 0.5244418382644653, "learning_rate": 0.0005365658544256533, "loss": 3.1923, "step": 12906 }, { "epoch": 0.63, "grad_norm": 0.5510836839675903, "learning_rate": 0.0005365563859651604, "loss": 3.1081, "step": 12907 }, { "epoch": 0.63, "grad_norm": 0.5539757013320923, "learning_rate": 0.0005365469168816235, "loss": 3.3491, "step": 12908 }, { "epoch": 0.63, "grad_norm": 0.5009015798568726, "learning_rate": 0.0005365374471750674, "loss": 3.2821, "step": 12909 }, { "epoch": 0.63, "grad_norm": 0.4771251976490021, "learning_rate": 0.0005365279768455173, "loss": 3.1718, "step": 12910 }, { "epoch": 0.63, "grad_norm": 0.5260068774223328, "learning_rate": 0.000536518505892998, "loss": 3.1779, "step": 12911 }, { "epoch": 0.63, "grad_norm": 0.526455283164978, "learning_rate": 0.0005365090343175345, "loss": 3.057, "step": 12912 }, { "epoch": 0.63, "grad_norm": 0.5597702860832214, "learning_rate": 0.0005364995621191516, "loss": 3.1074, "step": 12913 }, { "epoch": 0.63, "grad_norm": 0.5866166353225708, "learning_rate": 0.0005364900892978746, "loss": 3.0222, "step": 12914 }, { "epoch": 0.63, "grad_norm": 0.49002647399902344, "learning_rate": 0.000536480615853728, "loss": 3.3856, "step": 12915 }, { "epoch": 0.63, "grad_norm": 0.5181958675384521, "learning_rate": 0.0005364711417867371, "loss": 3.288, "step": 12916 }, { "epoch": 0.63, "grad_norm": 0.5336577892303467, "learning_rate": 0.0005364616670969266, "loss": 3.3546, "step": 12917 }, { "epoch": 0.63, "grad_norm": 0.5243266224861145, "learning_rate": 0.0005364521917843217, "loss": 3.3334, "step": 12918 }, { "epoch": 0.63, "grad_norm": 0.5071207880973816, "learning_rate": 0.0005364427158489472, "loss": 3.1865, "step": 12919 }, { "epoch": 0.63, "grad_norm": 0.5273388028144836, "learning_rate": 0.000536433239290828, "loss": 3.3596, "step": 12920 }, { "epoch": 0.63, "grad_norm": 0.5106172561645508, "learning_rate": 0.0005364237621099893, "loss": 3.2821, "step": 12921 }, { "epoch": 0.63, "grad_norm": 0.5156787037849426, "learning_rate": 0.0005364142843064558, "loss": 3.2522, "step": 12922 }, { "epoch": 0.63, "grad_norm": 0.593102216720581, "learning_rate": 0.0005364048058802527, "loss": 3.1004, "step": 12923 }, { "epoch": 0.63, "grad_norm": 0.5650702118873596, "learning_rate": 0.0005363953268314048, "loss": 3.3883, "step": 12924 }, { "epoch": 0.63, "grad_norm": 0.5169339179992676, "learning_rate": 0.0005363858471599369, "loss": 3.2, "step": 12925 }, { "epoch": 0.63, "grad_norm": 0.5006090998649597, "learning_rate": 0.0005363763668658744, "loss": 3.3361, "step": 12926 }, { "epoch": 0.63, "grad_norm": 0.533066987991333, "learning_rate": 0.000536366885949242, "loss": 3.225, "step": 12927 }, { "epoch": 0.63, "grad_norm": 0.5449780225753784, "learning_rate": 0.0005363574044100647, "loss": 3.1673, "step": 12928 }, { "epoch": 0.63, "grad_norm": 0.49450409412384033, "learning_rate": 0.0005363479222483674, "loss": 3.3605, "step": 12929 }, { "epoch": 0.63, "grad_norm": 0.49364954233169556, "learning_rate": 0.0005363384394641753, "loss": 3.1942, "step": 12930 }, { "epoch": 0.63, "grad_norm": 0.4991125762462616, "learning_rate": 0.0005363289560575131, "loss": 3.0315, "step": 12931 }, { "epoch": 0.63, "grad_norm": 0.6006268858909607, "learning_rate": 0.000536319472028406, "loss": 2.9982, "step": 12932 }, { "epoch": 0.63, "grad_norm": 0.49599677324295044, "learning_rate": 0.0005363099873768787, "loss": 3.0228, "step": 12933 }, { "epoch": 0.63, "grad_norm": 0.5204678177833557, "learning_rate": 0.0005363005021029566, "loss": 3.0848, "step": 12934 }, { "epoch": 0.63, "grad_norm": 0.5052371621131897, "learning_rate": 0.0005362910162066644, "loss": 3.0953, "step": 12935 }, { "epoch": 0.63, "grad_norm": 0.5127009153366089, "learning_rate": 0.0005362815296880272, "loss": 3.5021, "step": 12936 }, { "epoch": 0.63, "grad_norm": 0.5664146542549133, "learning_rate": 0.0005362720425470698, "loss": 3.1467, "step": 12937 }, { "epoch": 0.63, "grad_norm": 0.5321229100227356, "learning_rate": 0.0005362625547838173, "loss": 3.3785, "step": 12938 }, { "epoch": 0.63, "grad_norm": 0.5562167167663574, "learning_rate": 0.0005362530663982948, "loss": 3.4932, "step": 12939 }, { "epoch": 0.63, "grad_norm": 0.508590817451477, "learning_rate": 0.0005362435773905271, "loss": 3.2824, "step": 12940 }, { "epoch": 0.63, "grad_norm": 0.511133074760437, "learning_rate": 0.0005362340877605394, "loss": 3.0221, "step": 12941 }, { "epoch": 0.63, "grad_norm": 0.5249322652816772, "learning_rate": 0.0005362245975083566, "loss": 3.1149, "step": 12942 }, { "epoch": 0.63, "grad_norm": 0.5249238014221191, "learning_rate": 0.0005362151066340035, "loss": 3.0869, "step": 12943 }, { "epoch": 0.63, "grad_norm": 0.5448753237724304, "learning_rate": 0.0005362056151375054, "loss": 3.0745, "step": 12944 }, { "epoch": 0.63, "grad_norm": 0.49821531772613525, "learning_rate": 0.0005361961230188871, "loss": 3.2544, "step": 12945 }, { "epoch": 0.63, "grad_norm": 0.5204089879989624, "learning_rate": 0.0005361866302781736, "loss": 3.0721, "step": 12946 }, { "epoch": 0.63, "grad_norm": 0.5314644575119019, "learning_rate": 0.0005361771369153901, "loss": 3.1426, "step": 12947 }, { "epoch": 0.63, "grad_norm": 0.6273903250694275, "learning_rate": 0.0005361676429305615, "loss": 3.1625, "step": 12948 }, { "epoch": 0.63, "grad_norm": 0.510073184967041, "learning_rate": 0.0005361581483237127, "loss": 3.3129, "step": 12949 }, { "epoch": 0.63, "grad_norm": 0.5302249193191528, "learning_rate": 0.0005361486530948688, "loss": 3.4637, "step": 12950 }, { "epoch": 0.63, "grad_norm": 0.49914729595184326, "learning_rate": 0.0005361391572440547, "loss": 3.4225, "step": 12951 }, { "epoch": 0.63, "grad_norm": 0.5495375394821167, "learning_rate": 0.0005361296607712956, "loss": 3.0738, "step": 12952 }, { "epoch": 0.63, "grad_norm": 0.5367246270179749, "learning_rate": 0.0005361201636766165, "loss": 3.1163, "step": 12953 }, { "epoch": 0.63, "grad_norm": 0.5100537538528442, "learning_rate": 0.0005361106659600423, "loss": 3.1204, "step": 12954 }, { "epoch": 0.63, "grad_norm": 0.49203306436538696, "learning_rate": 0.000536101167621598, "loss": 3.3834, "step": 12955 }, { "epoch": 0.63, "grad_norm": 0.5398481488227844, "learning_rate": 0.0005360916686613087, "loss": 3.2211, "step": 12956 }, { "epoch": 0.63, "grad_norm": 0.5405081510543823, "learning_rate": 0.0005360821690791992, "loss": 3.1053, "step": 12957 }, { "epoch": 0.64, "grad_norm": 0.5752179622650146, "learning_rate": 0.000536072668875295, "loss": 2.9739, "step": 12958 }, { "epoch": 0.64, "grad_norm": 0.4982735514640808, "learning_rate": 0.0005360631680496206, "loss": 3.289, "step": 12959 }, { "epoch": 0.64, "grad_norm": 0.500749409198761, "learning_rate": 0.0005360536666022014, "loss": 3.1595, "step": 12960 }, { "epoch": 0.64, "grad_norm": 0.5099477171897888, "learning_rate": 0.000536044164533062, "loss": 3.2093, "step": 12961 }, { "epoch": 0.64, "grad_norm": 0.5565930604934692, "learning_rate": 0.000536034661842228, "loss": 3.0367, "step": 12962 }, { "epoch": 0.64, "grad_norm": 0.5251843929290771, "learning_rate": 0.0005360251585297239, "loss": 3.1134, "step": 12963 }, { "epoch": 0.64, "grad_norm": 0.5190328359603882, "learning_rate": 0.0005360156545955752, "loss": 3.377, "step": 12964 }, { "epoch": 0.64, "grad_norm": 0.4984540343284607, "learning_rate": 0.0005360061500398065, "loss": 3.4222, "step": 12965 }, { "epoch": 0.64, "grad_norm": 0.5332692861557007, "learning_rate": 0.0005359966448624431, "loss": 3.0324, "step": 12966 }, { "epoch": 0.64, "grad_norm": 0.5812960863113403, "learning_rate": 0.00053598713906351, "loss": 3.3179, "step": 12967 }, { "epoch": 0.64, "grad_norm": 0.498300164937973, "learning_rate": 0.0005359776326430321, "loss": 3.3108, "step": 12968 }, { "epoch": 0.64, "grad_norm": 0.4898775517940521, "learning_rate": 0.0005359681256010345, "loss": 3.215, "step": 12969 }, { "epoch": 0.64, "grad_norm": 0.5427769422531128, "learning_rate": 0.0005359586179375424, "loss": 3.2795, "step": 12970 }, { "epoch": 0.64, "grad_norm": 0.517987072467804, "learning_rate": 0.0005359491096525806, "loss": 2.9976, "step": 12971 }, { "epoch": 0.64, "grad_norm": 0.5325692296028137, "learning_rate": 0.0005359396007461743, "loss": 3.327, "step": 12972 }, { "epoch": 0.64, "grad_norm": 0.5042824149131775, "learning_rate": 0.0005359300912183485, "loss": 3.1492, "step": 12973 }, { "epoch": 0.64, "grad_norm": 0.5307673811912537, "learning_rate": 0.0005359205810691282, "loss": 3.3302, "step": 12974 }, { "epoch": 0.64, "grad_norm": 0.5268490314483643, "learning_rate": 0.0005359110702985385, "loss": 3.3567, "step": 12975 }, { "epoch": 0.64, "grad_norm": 0.5066551566123962, "learning_rate": 0.0005359015589066046, "loss": 3.2215, "step": 12976 }, { "epoch": 0.64, "grad_norm": 0.503842294216156, "learning_rate": 0.0005358920468933511, "loss": 3.2734, "step": 12977 }, { "epoch": 0.64, "grad_norm": 0.5022884011268616, "learning_rate": 0.0005358825342588035, "loss": 3.2803, "step": 12978 }, { "epoch": 0.64, "grad_norm": 0.5310874581336975, "learning_rate": 0.0005358730210029869, "loss": 3.2037, "step": 12979 }, { "epoch": 0.64, "grad_norm": 0.509129524230957, "learning_rate": 0.0005358635071259259, "loss": 3.3409, "step": 12980 }, { "epoch": 0.64, "grad_norm": 0.548210620880127, "learning_rate": 0.0005358539926276459, "loss": 3.2471, "step": 12981 }, { "epoch": 0.64, "grad_norm": 0.5100131630897522, "learning_rate": 0.0005358444775081718, "loss": 3.1354, "step": 12982 }, { "epoch": 0.64, "grad_norm": 0.5340960621833801, "learning_rate": 0.0005358349617675289, "loss": 3.2023, "step": 12983 }, { "epoch": 0.64, "grad_norm": 0.5364866256713867, "learning_rate": 0.000535825445405742, "loss": 3.2475, "step": 12984 }, { "epoch": 0.64, "grad_norm": 0.5168478488922119, "learning_rate": 0.0005358159284228363, "loss": 3.0523, "step": 12985 }, { "epoch": 0.64, "grad_norm": 0.49745577573776245, "learning_rate": 0.0005358064108188366, "loss": 3.1681, "step": 12986 }, { "epoch": 0.64, "grad_norm": 0.5580443739891052, "learning_rate": 0.0005357968925937685, "loss": 3.4102, "step": 12987 }, { "epoch": 0.64, "grad_norm": 0.5218451023101807, "learning_rate": 0.0005357873737476565, "loss": 3.279, "step": 12988 }, { "epoch": 0.64, "grad_norm": 0.5404812693595886, "learning_rate": 0.0005357778542805262, "loss": 3.4815, "step": 12989 }, { "epoch": 0.64, "grad_norm": 0.5017077326774597, "learning_rate": 0.0005357683341924023, "loss": 3.3535, "step": 12990 }, { "epoch": 0.64, "grad_norm": 0.49366968870162964, "learning_rate": 0.00053575881348331, "loss": 3.1195, "step": 12991 }, { "epoch": 0.64, "grad_norm": 0.5663856863975525, "learning_rate": 0.0005357492921532743, "loss": 3.3108, "step": 12992 }, { "epoch": 0.64, "grad_norm": 0.5067052841186523, "learning_rate": 0.0005357397702023204, "loss": 3.3169, "step": 12993 }, { "epoch": 0.64, "grad_norm": 0.5267317295074463, "learning_rate": 0.0005357302476304732, "loss": 3.0451, "step": 12994 }, { "epoch": 0.64, "grad_norm": 0.5492493510246277, "learning_rate": 0.000535720724437758, "loss": 3.091, "step": 12995 }, { "epoch": 0.64, "grad_norm": 0.4951799511909485, "learning_rate": 0.0005357112006241998, "loss": 3.1907, "step": 12996 }, { "epoch": 0.64, "grad_norm": 0.5452933311462402, "learning_rate": 0.0005357016761898236, "loss": 3.1006, "step": 12997 }, { "epoch": 0.64, "grad_norm": 0.5037853121757507, "learning_rate": 0.0005356921511346545, "loss": 3.2203, "step": 12998 }, { "epoch": 0.64, "grad_norm": 0.48112764954566956, "learning_rate": 0.0005356826254587177, "loss": 3.2942, "step": 12999 }, { "epoch": 0.64, "grad_norm": 0.49758175015449524, "learning_rate": 0.0005356730991620382, "loss": 3.2869, "step": 13000 }, { "epoch": 0.64, "grad_norm": 0.5027998089790344, "learning_rate": 0.0005356635722446412, "loss": 3.1453, "step": 13001 }, { "epoch": 0.64, "grad_norm": 0.5159273147583008, "learning_rate": 0.0005356540447065516, "loss": 3.162, "step": 13002 }, { "epoch": 0.64, "grad_norm": 0.5522950887680054, "learning_rate": 0.0005356445165477947, "loss": 3.164, "step": 13003 }, { "epoch": 0.64, "grad_norm": 0.5712302923202515, "learning_rate": 0.0005356349877683954, "loss": 3.1857, "step": 13004 }, { "epoch": 0.64, "grad_norm": 0.4907442331314087, "learning_rate": 0.0005356254583683789, "loss": 3.4676, "step": 13005 }, { "epoch": 0.64, "grad_norm": 0.49316293001174927, "learning_rate": 0.0005356159283477703, "loss": 3.274, "step": 13006 }, { "epoch": 0.64, "grad_norm": 0.5303720831871033, "learning_rate": 0.0005356063977065948, "loss": 3.1756, "step": 13007 }, { "epoch": 0.64, "grad_norm": 0.4843452572822571, "learning_rate": 0.0005355968664448772, "loss": 3.0976, "step": 13008 }, { "epoch": 0.64, "grad_norm": 0.5406621098518372, "learning_rate": 0.0005355873345626429, "loss": 3.216, "step": 13009 }, { "epoch": 0.64, "grad_norm": 0.5066521167755127, "learning_rate": 0.0005355778020599168, "loss": 3.3491, "step": 13010 }, { "epoch": 0.64, "grad_norm": 0.5344280004501343, "learning_rate": 0.0005355682689367243, "loss": 3.3312, "step": 13011 }, { "epoch": 0.64, "grad_norm": 0.5455307364463806, "learning_rate": 0.0005355587351930902, "loss": 3.2931, "step": 13012 }, { "epoch": 0.64, "grad_norm": 0.46568596363067627, "learning_rate": 0.0005355492008290397, "loss": 3.2364, "step": 13013 }, { "epoch": 0.64, "grad_norm": 0.4989795684814453, "learning_rate": 0.000535539665844598, "loss": 3.3095, "step": 13014 }, { "epoch": 0.64, "grad_norm": 0.5595158338546753, "learning_rate": 0.0005355301302397901, "loss": 3.3839, "step": 13015 }, { "epoch": 0.64, "grad_norm": 0.4899807870388031, "learning_rate": 0.0005355205940146412, "loss": 3.3119, "step": 13016 }, { "epoch": 0.64, "grad_norm": 0.48893171548843384, "learning_rate": 0.0005355110571691764, "loss": 3.2628, "step": 13017 }, { "epoch": 0.64, "grad_norm": 0.5232262015342712, "learning_rate": 0.0005355015197034207, "loss": 3.281, "step": 13018 }, { "epoch": 0.64, "grad_norm": 0.5392224788665771, "learning_rate": 0.0005354919816173995, "loss": 3.2029, "step": 13019 }, { "epoch": 0.64, "grad_norm": 0.5342455506324768, "learning_rate": 0.0005354824429111376, "loss": 3.2832, "step": 13020 }, { "epoch": 0.64, "grad_norm": 0.48179611563682556, "learning_rate": 0.0005354729035846603, "loss": 3.0406, "step": 13021 }, { "epoch": 0.64, "grad_norm": 0.4980888068675995, "learning_rate": 0.0005354633636379927, "loss": 3.2632, "step": 13022 }, { "epoch": 0.64, "grad_norm": 0.5115153193473816, "learning_rate": 0.0005354538230711598, "loss": 3.1863, "step": 13023 }, { "epoch": 0.64, "grad_norm": 0.5516042709350586, "learning_rate": 0.0005354442818841869, "loss": 3.2411, "step": 13024 }, { "epoch": 0.64, "grad_norm": 0.4984360933303833, "learning_rate": 0.0005354347400770992, "loss": 3.1063, "step": 13025 }, { "epoch": 0.64, "grad_norm": 0.5270858407020569, "learning_rate": 0.0005354251976499217, "loss": 3.2525, "step": 13026 }, { "epoch": 0.64, "grad_norm": 0.5250088572502136, "learning_rate": 0.0005354156546026794, "loss": 3.3425, "step": 13027 }, { "epoch": 0.64, "grad_norm": 0.5461434125900269, "learning_rate": 0.0005354061109353976, "loss": 3.3327, "step": 13028 }, { "epoch": 0.64, "grad_norm": 0.5004782676696777, "learning_rate": 0.0005353965666481015, "loss": 3.1885, "step": 13029 }, { "epoch": 0.64, "grad_norm": 0.5391425490379333, "learning_rate": 0.0005353870217408161, "loss": 3.0705, "step": 13030 }, { "epoch": 0.64, "grad_norm": 0.5242264866828918, "learning_rate": 0.0005353774762135666, "loss": 3.1418, "step": 13031 }, { "epoch": 0.64, "grad_norm": 0.5164439678192139, "learning_rate": 0.000535367930066378, "loss": 3.2949, "step": 13032 }, { "epoch": 0.64, "grad_norm": 0.5032666921615601, "learning_rate": 0.0005353583832992758, "loss": 3.257, "step": 13033 }, { "epoch": 0.64, "grad_norm": 0.5135819315910339, "learning_rate": 0.0005353488359122848, "loss": 3.1992, "step": 13034 }, { "epoch": 0.64, "grad_norm": 0.5079220533370972, "learning_rate": 0.0005353392879054302, "loss": 3.2749, "step": 13035 }, { "epoch": 0.64, "grad_norm": 0.5061142444610596, "learning_rate": 0.0005353297392787373, "loss": 3.1821, "step": 13036 }, { "epoch": 0.64, "grad_norm": 0.5289643406867981, "learning_rate": 0.000535320190032231, "loss": 3.29, "step": 13037 }, { "epoch": 0.64, "grad_norm": 0.5101862549781799, "learning_rate": 0.0005353106401659367, "loss": 3.2464, "step": 13038 }, { "epoch": 0.64, "grad_norm": 0.5174379348754883, "learning_rate": 0.0005353010896798796, "loss": 3.1531, "step": 13039 }, { "epoch": 0.64, "grad_norm": 0.5504981875419617, "learning_rate": 0.0005352915385740845, "loss": 2.8933, "step": 13040 }, { "epoch": 0.64, "grad_norm": 0.534186840057373, "learning_rate": 0.0005352819868485769, "loss": 3.2163, "step": 13041 }, { "epoch": 0.64, "grad_norm": 0.5198753476142883, "learning_rate": 0.0005352724345033818, "loss": 3.1988, "step": 13042 }, { "epoch": 0.64, "grad_norm": 0.5213879942893982, "learning_rate": 0.0005352628815385244, "loss": 3.1679, "step": 13043 }, { "epoch": 0.64, "grad_norm": 0.5233698487281799, "learning_rate": 0.0005352533279540298, "loss": 3.0413, "step": 13044 }, { "epoch": 0.64, "grad_norm": 0.5285971164703369, "learning_rate": 0.0005352437737499232, "loss": 3.0021, "step": 13045 }, { "epoch": 0.64, "grad_norm": 0.5171172618865967, "learning_rate": 0.0005352342189262298, "loss": 3.1727, "step": 13046 }, { "epoch": 0.64, "grad_norm": 0.5061637163162231, "learning_rate": 0.0005352246634829748, "loss": 3.5304, "step": 13047 }, { "epoch": 0.64, "grad_norm": 0.535328209400177, "learning_rate": 0.0005352151074201832, "loss": 3.1083, "step": 13048 }, { "epoch": 0.64, "grad_norm": 0.5753775835037231, "learning_rate": 0.0005352055507378804, "loss": 3.189, "step": 13049 }, { "epoch": 0.64, "grad_norm": 0.5696597695350647, "learning_rate": 0.0005351959934360913, "loss": 3.121, "step": 13050 }, { "epoch": 0.64, "grad_norm": 0.5169010758399963, "learning_rate": 0.0005351864355148413, "loss": 3.13, "step": 13051 }, { "epoch": 0.64, "grad_norm": 0.6170527935028076, "learning_rate": 0.0005351768769741556, "loss": 3.1525, "step": 13052 }, { "epoch": 0.64, "grad_norm": 0.4995497465133667, "learning_rate": 0.000535167317814059, "loss": 3.2603, "step": 13053 }, { "epoch": 0.64, "grad_norm": 0.5279785990715027, "learning_rate": 0.0005351577580345771, "loss": 3.3865, "step": 13054 }, { "epoch": 0.64, "grad_norm": 0.5221437811851501, "learning_rate": 0.000535148197635735, "loss": 3.3428, "step": 13055 }, { "epoch": 0.64, "grad_norm": 0.5271676182746887, "learning_rate": 0.0005351386366175577, "loss": 3.1217, "step": 13056 }, { "epoch": 0.64, "grad_norm": 0.5257646441459656, "learning_rate": 0.0005351290749800705, "loss": 3.2126, "step": 13057 }, { "epoch": 0.64, "grad_norm": 0.5410515666007996, "learning_rate": 0.0005351195127232986, "loss": 3.0734, "step": 13058 }, { "epoch": 0.64, "grad_norm": 0.5229355692863464, "learning_rate": 0.0005351099498472671, "loss": 3.1239, "step": 13059 }, { "epoch": 0.64, "grad_norm": 0.5615711212158203, "learning_rate": 0.0005351003863520013, "loss": 3.0884, "step": 13060 }, { "epoch": 0.64, "grad_norm": 0.5424404740333557, "learning_rate": 0.0005350908222375263, "loss": 3.2056, "step": 13061 }, { "epoch": 0.64, "grad_norm": 0.537496030330658, "learning_rate": 0.0005350812575038673, "loss": 3.3297, "step": 13062 }, { "epoch": 0.64, "grad_norm": 0.49330636858940125, "learning_rate": 0.0005350716921510495, "loss": 3.385, "step": 13063 }, { "epoch": 0.64, "grad_norm": 0.5436633229255676, "learning_rate": 0.0005350621261790982, "loss": 3.198, "step": 13064 }, { "epoch": 0.64, "grad_norm": 0.4999907910823822, "learning_rate": 0.0005350525595880384, "loss": 3.4466, "step": 13065 }, { "epoch": 0.64, "grad_norm": 0.5513013005256653, "learning_rate": 0.0005350429923778954, "loss": 3.4655, "step": 13066 }, { "epoch": 0.64, "grad_norm": 0.5286356806755066, "learning_rate": 0.0005350334245486943, "loss": 3.3866, "step": 13067 }, { "epoch": 0.64, "grad_norm": 0.5245810747146606, "learning_rate": 0.0005350238561004606, "loss": 2.9812, "step": 13068 }, { "epoch": 0.64, "grad_norm": 0.534548282623291, "learning_rate": 0.0005350142870332192, "loss": 3.1293, "step": 13069 }, { "epoch": 0.64, "grad_norm": 0.5076059699058533, "learning_rate": 0.0005350047173469953, "loss": 3.3001, "step": 13070 }, { "epoch": 0.64, "grad_norm": 0.5420432686805725, "learning_rate": 0.0005349951470418144, "loss": 3.0998, "step": 13071 }, { "epoch": 0.64, "grad_norm": 0.5068901777267456, "learning_rate": 0.0005349855761177014, "loss": 3.2598, "step": 13072 }, { "epoch": 0.64, "grad_norm": 0.5439526438713074, "learning_rate": 0.0005349760045746816, "loss": 3.4108, "step": 13073 }, { "epoch": 0.64, "grad_norm": 0.4926943778991699, "learning_rate": 0.0005349664324127803, "loss": 3.2626, "step": 13074 }, { "epoch": 0.64, "grad_norm": 0.5149872899055481, "learning_rate": 0.0005349568596320225, "loss": 3.2164, "step": 13075 }, { "epoch": 0.64, "grad_norm": 0.5110384225845337, "learning_rate": 0.0005349472862324337, "loss": 3.1819, "step": 13076 }, { "epoch": 0.64, "grad_norm": 0.5247209072113037, "learning_rate": 0.0005349377122140388, "loss": 3.2829, "step": 13077 }, { "epoch": 0.64, "grad_norm": 0.553516149520874, "learning_rate": 0.0005349281375768634, "loss": 3.2516, "step": 13078 }, { "epoch": 0.64, "grad_norm": 0.5458715558052063, "learning_rate": 0.0005349185623209324, "loss": 3.2085, "step": 13079 }, { "epoch": 0.64, "grad_norm": 0.4971589744091034, "learning_rate": 0.0005349089864462711, "loss": 3.255, "step": 13080 }, { "epoch": 0.64, "grad_norm": 0.5303791761398315, "learning_rate": 0.0005348994099529047, "loss": 3.2376, "step": 13081 }, { "epoch": 0.64, "grad_norm": 0.5025544762611389, "learning_rate": 0.0005348898328408584, "loss": 3.2325, "step": 13082 }, { "epoch": 0.64, "grad_norm": 0.49780189990997314, "learning_rate": 0.0005348802551101578, "loss": 2.9287, "step": 13083 }, { "epoch": 0.64, "grad_norm": 0.5410318970680237, "learning_rate": 0.0005348706767608275, "loss": 3.2634, "step": 13084 }, { "epoch": 0.64, "grad_norm": 0.5140213966369629, "learning_rate": 0.0005348610977928931, "loss": 3.1781, "step": 13085 }, { "epoch": 0.64, "grad_norm": 0.5628545880317688, "learning_rate": 0.0005348515182063799, "loss": 3.239, "step": 13086 }, { "epoch": 0.64, "grad_norm": 0.5181366205215454, "learning_rate": 0.0005348419380013128, "loss": 3.2054, "step": 13087 }, { "epoch": 0.64, "grad_norm": 0.5096615552902222, "learning_rate": 0.0005348323571777174, "loss": 3.1634, "step": 13088 }, { "epoch": 0.64, "grad_norm": 0.5191645622253418, "learning_rate": 0.0005348227757356187, "loss": 3.2104, "step": 13089 }, { "epoch": 0.64, "grad_norm": 0.5057798624038696, "learning_rate": 0.0005348131936750419, "loss": 3.177, "step": 13090 }, { "epoch": 0.64, "grad_norm": 0.4993162453174591, "learning_rate": 0.0005348036109960125, "loss": 3.3898, "step": 13091 }, { "epoch": 0.64, "grad_norm": 0.5280262231826782, "learning_rate": 0.0005347940276985555, "loss": 3.3019, "step": 13092 }, { "epoch": 0.64, "grad_norm": 0.5497944951057434, "learning_rate": 0.0005347844437826962, "loss": 3.2199, "step": 13093 }, { "epoch": 0.64, "grad_norm": 0.47965291142463684, "learning_rate": 0.0005347748592484599, "loss": 3.2582, "step": 13094 }, { "epoch": 0.64, "grad_norm": 0.5250326991081238, "learning_rate": 0.0005347652740958718, "loss": 3.3034, "step": 13095 }, { "epoch": 0.64, "grad_norm": 0.5235711336135864, "learning_rate": 0.0005347556883249572, "loss": 3.5181, "step": 13096 }, { "epoch": 0.64, "grad_norm": 0.4954131245613098, "learning_rate": 0.0005347461019357412, "loss": 3.2653, "step": 13097 }, { "epoch": 0.64, "grad_norm": 0.5206896066665649, "learning_rate": 0.0005347365149282492, "loss": 3.1443, "step": 13098 }, { "epoch": 0.64, "grad_norm": 0.5158017873764038, "learning_rate": 0.0005347269273025064, "loss": 3.2635, "step": 13099 }, { "epoch": 0.64, "grad_norm": 0.5218068957328796, "learning_rate": 0.0005347173390585381, "loss": 3.2463, "step": 13100 }, { "epoch": 0.64, "grad_norm": 0.5204295516014099, "learning_rate": 0.0005347077501963694, "loss": 3.2756, "step": 13101 }, { "epoch": 0.64, "grad_norm": 0.5045682191848755, "learning_rate": 0.0005346981607160257, "loss": 3.2225, "step": 13102 }, { "epoch": 0.64, "grad_norm": 0.5521380305290222, "learning_rate": 0.0005346885706175321, "loss": 3.0098, "step": 13103 }, { "epoch": 0.64, "grad_norm": 0.5172122716903687, "learning_rate": 0.0005346789799009141, "loss": 3.2474, "step": 13104 }, { "epoch": 0.64, "grad_norm": 0.5108321905136108, "learning_rate": 0.0005346693885661968, "loss": 3.132, "step": 13105 }, { "epoch": 0.64, "grad_norm": 0.4934992790222168, "learning_rate": 0.0005346597966134056, "loss": 3.0254, "step": 13106 }, { "epoch": 0.64, "grad_norm": 0.5101711750030518, "learning_rate": 0.0005346502040425655, "loss": 3.4279, "step": 13107 }, { "epoch": 0.64, "grad_norm": 0.5210503935813904, "learning_rate": 0.000534640610853702, "loss": 3.2178, "step": 13108 }, { "epoch": 0.64, "grad_norm": 0.496926873922348, "learning_rate": 0.0005346310170468402, "loss": 3.1698, "step": 13109 }, { "epoch": 0.64, "grad_norm": 0.49992474913597107, "learning_rate": 0.0005346214226220055, "loss": 3.2576, "step": 13110 }, { "epoch": 0.64, "grad_norm": 0.5001792311668396, "learning_rate": 0.0005346118275792232, "loss": 3.4211, "step": 13111 }, { "epoch": 0.64, "grad_norm": 0.5183314681053162, "learning_rate": 0.0005346022319185185, "loss": 3.19, "step": 13112 }, { "epoch": 0.64, "grad_norm": 0.4935111403465271, "learning_rate": 0.0005345926356399166, "loss": 3.3512, "step": 13113 }, { "epoch": 0.64, "grad_norm": 0.5626166462898254, "learning_rate": 0.0005345830387434428, "loss": 3.3638, "step": 13114 }, { "epoch": 0.64, "grad_norm": 0.5216866731643677, "learning_rate": 0.0005345734412291226, "loss": 3.244, "step": 13115 }, { "epoch": 0.64, "grad_norm": 0.5332128405570984, "learning_rate": 0.000534563843096981, "loss": 3.2552, "step": 13116 }, { "epoch": 0.64, "grad_norm": 0.47880974411964417, "learning_rate": 0.0005345542443470434, "loss": 3.1028, "step": 13117 }, { "epoch": 0.64, "grad_norm": 0.4967505931854248, "learning_rate": 0.000534544644979335, "loss": 3.004, "step": 13118 }, { "epoch": 0.64, "grad_norm": 0.5178172588348389, "learning_rate": 0.0005345350449938811, "loss": 3.2698, "step": 13119 }, { "epoch": 0.64, "grad_norm": 0.5368995070457458, "learning_rate": 0.0005345254443907072, "loss": 3.3812, "step": 13120 }, { "epoch": 0.64, "grad_norm": 0.4906540513038635, "learning_rate": 0.0005345158431698383, "loss": 3.1979, "step": 13121 }, { "epoch": 0.64, "grad_norm": 0.5134233236312866, "learning_rate": 0.0005345062413312998, "loss": 3.3588, "step": 13122 }, { "epoch": 0.64, "grad_norm": 0.5044988989830017, "learning_rate": 0.0005344966388751171, "loss": 3.1731, "step": 13123 }, { "epoch": 0.64, "grad_norm": 0.5189638137817383, "learning_rate": 0.0005344870358013152, "loss": 3.1494, "step": 13124 }, { "epoch": 0.64, "grad_norm": 0.6163929104804993, "learning_rate": 0.0005344774321099197, "loss": 3.2465, "step": 13125 }, { "epoch": 0.64, "grad_norm": 0.5382804870605469, "learning_rate": 0.0005344678278009557, "loss": 3.1525, "step": 13126 }, { "epoch": 0.64, "grad_norm": 0.5891967415809631, "learning_rate": 0.0005344582228744486, "loss": 3.1272, "step": 13127 }, { "epoch": 0.64, "grad_norm": 0.5315770506858826, "learning_rate": 0.0005344486173304236, "loss": 2.9923, "step": 13128 }, { "epoch": 0.64, "grad_norm": 0.5364977121353149, "learning_rate": 0.0005344390111689061, "loss": 3.2317, "step": 13129 }, { "epoch": 0.64, "grad_norm": 0.5116824507713318, "learning_rate": 0.0005344294043899215, "loss": 3.1648, "step": 13130 }, { "epoch": 0.64, "grad_norm": 0.5573520660400391, "learning_rate": 0.0005344197969934949, "loss": 3.353, "step": 13131 }, { "epoch": 0.64, "grad_norm": 0.5192342400550842, "learning_rate": 0.0005344101889796516, "loss": 3.1479, "step": 13132 }, { "epoch": 0.64, "grad_norm": 0.49575215578079224, "learning_rate": 0.0005344005803484171, "loss": 3.4167, "step": 13133 }, { "epoch": 0.64, "grad_norm": 0.5160652995109558, "learning_rate": 0.0005343909710998164, "loss": 3.2124, "step": 13134 }, { "epoch": 0.64, "grad_norm": 0.5108956098556519, "learning_rate": 0.0005343813612338751, "loss": 3.3316, "step": 13135 }, { "epoch": 0.64, "grad_norm": 0.528973400592804, "learning_rate": 0.0005343717507506184, "loss": 3.0333, "step": 13136 }, { "epoch": 0.64, "grad_norm": 0.5308817028999329, "learning_rate": 0.0005343621396500716, "loss": 3.1145, "step": 13137 }, { "epoch": 0.64, "grad_norm": 0.5422894358634949, "learning_rate": 0.00053435252793226, "loss": 3.2858, "step": 13138 }, { "epoch": 0.64, "grad_norm": 0.4965519607067108, "learning_rate": 0.000534342915597209, "loss": 3.2758, "step": 13139 }, { "epoch": 0.64, "grad_norm": 0.5743324160575867, "learning_rate": 0.0005343333026449438, "loss": 3.2823, "step": 13140 }, { "epoch": 0.64, "grad_norm": 0.4840207099914551, "learning_rate": 0.0005343236890754898, "loss": 3.0271, "step": 13141 }, { "epoch": 0.64, "grad_norm": 0.5335875749588013, "learning_rate": 0.0005343140748888724, "loss": 3.2321, "step": 13142 }, { "epoch": 0.64, "grad_norm": 0.5198099613189697, "learning_rate": 0.0005343044600851166, "loss": 3.1172, "step": 13143 }, { "epoch": 0.64, "grad_norm": 0.5045928359031677, "learning_rate": 0.0005342948446642481, "loss": 3.2568, "step": 13144 }, { "epoch": 0.64, "grad_norm": 0.49999576807022095, "learning_rate": 0.0005342852286262921, "loss": 3.2756, "step": 13145 }, { "epoch": 0.64, "grad_norm": 0.5080724954605103, "learning_rate": 0.0005342756119712737, "loss": 3.3996, "step": 13146 }, { "epoch": 0.64, "grad_norm": 0.48446428775787354, "learning_rate": 0.0005342659946992187, "loss": 3.1447, "step": 13147 }, { "epoch": 0.64, "grad_norm": 0.5702810883522034, "learning_rate": 0.000534256376810152, "loss": 3.2459, "step": 13148 }, { "epoch": 0.64, "grad_norm": 0.5255053639411926, "learning_rate": 0.0005342467583040991, "loss": 3.0702, "step": 13149 }, { "epoch": 0.64, "grad_norm": 0.5071660280227661, "learning_rate": 0.0005342371391810853, "loss": 3.3868, "step": 13150 }, { "epoch": 0.64, "grad_norm": 0.5754165053367615, "learning_rate": 0.000534227519441136, "loss": 3.2203, "step": 13151 }, { "epoch": 0.64, "grad_norm": 0.5076290965080261, "learning_rate": 0.0005342178990842765, "loss": 3.4923, "step": 13152 }, { "epoch": 0.64, "grad_norm": 0.5148435831069946, "learning_rate": 0.0005342082781105321, "loss": 3.3192, "step": 13153 }, { "epoch": 0.64, "grad_norm": 0.5352534651756287, "learning_rate": 0.0005341986565199281, "loss": 3.2992, "step": 13154 }, { "epoch": 0.64, "grad_norm": 0.5055733323097229, "learning_rate": 0.0005341890343124902, "loss": 3.1765, "step": 13155 }, { "epoch": 0.64, "grad_norm": 0.5165096521377563, "learning_rate": 0.0005341794114882431, "loss": 3.4136, "step": 13156 }, { "epoch": 0.64, "grad_norm": 0.53326416015625, "learning_rate": 0.0005341697880472126, "loss": 3.2336, "step": 13157 }, { "epoch": 0.64, "grad_norm": 0.512313187122345, "learning_rate": 0.000534160163989424, "loss": 3.2201, "step": 13158 }, { "epoch": 0.64, "grad_norm": 0.5026753544807434, "learning_rate": 0.0005341505393149026, "loss": 3.3028, "step": 13159 }, { "epoch": 0.64, "grad_norm": 0.4956597685813904, "learning_rate": 0.0005341409140236738, "loss": 3.1856, "step": 13160 }, { "epoch": 0.64, "grad_norm": 0.5383108854293823, "learning_rate": 0.0005341312881157628, "loss": 3.2079, "step": 13161 }, { "epoch": 0.65, "grad_norm": 0.5031716823577881, "learning_rate": 0.000534121661591195, "loss": 3.363, "step": 13162 }, { "epoch": 0.65, "grad_norm": 0.564416766166687, "learning_rate": 0.0005341120344499959, "loss": 3.1272, "step": 13163 }, { "epoch": 0.65, "grad_norm": 0.5095632672309875, "learning_rate": 0.0005341024066921907, "loss": 3.2337, "step": 13164 }, { "epoch": 0.65, "grad_norm": 0.520420253276825, "learning_rate": 0.0005340927783178049, "loss": 3.2769, "step": 13165 }, { "epoch": 0.65, "grad_norm": 0.5509207248687744, "learning_rate": 0.0005340831493268637, "loss": 3.1734, "step": 13166 }, { "epoch": 0.65, "grad_norm": 0.5348342657089233, "learning_rate": 0.0005340735197193924, "loss": 3.1599, "step": 13167 }, { "epoch": 0.65, "grad_norm": 0.5090433955192566, "learning_rate": 0.0005340638894954167, "loss": 3.352, "step": 13168 }, { "epoch": 0.65, "grad_norm": 0.5051810145378113, "learning_rate": 0.0005340542586549617, "loss": 3.0708, "step": 13169 }, { "epoch": 0.65, "grad_norm": 0.5810332298278809, "learning_rate": 0.0005340446271980528, "loss": 3.34, "step": 13170 }, { "epoch": 0.65, "grad_norm": 0.5068036913871765, "learning_rate": 0.0005340349951247156, "loss": 3.2238, "step": 13171 }, { "epoch": 0.65, "grad_norm": 0.5000148415565491, "learning_rate": 0.000534025362434975, "loss": 3.3234, "step": 13172 }, { "epoch": 0.65, "grad_norm": 0.5451956987380981, "learning_rate": 0.0005340157291288568, "loss": 2.9447, "step": 13173 }, { "epoch": 0.65, "grad_norm": 0.524332582950592, "learning_rate": 0.000534006095206386, "loss": 3.1001, "step": 13174 }, { "epoch": 0.65, "grad_norm": 0.48256099224090576, "learning_rate": 0.0005339964606675883, "loss": 3.1865, "step": 13175 }, { "epoch": 0.65, "grad_norm": 0.5315820574760437, "learning_rate": 0.000533986825512489, "loss": 3.0933, "step": 13176 }, { "epoch": 0.65, "grad_norm": 0.5178393721580505, "learning_rate": 0.0005339771897411134, "loss": 3.1253, "step": 13177 }, { "epoch": 0.65, "grad_norm": 0.48656055331230164, "learning_rate": 0.0005339675533534869, "loss": 3.2523, "step": 13178 }, { "epoch": 0.65, "grad_norm": 0.5785982608795166, "learning_rate": 0.000533957916349635, "loss": 3.1421, "step": 13179 }, { "epoch": 0.65, "grad_norm": 0.5141577124595642, "learning_rate": 0.0005339482787295828, "loss": 3.4436, "step": 13180 }, { "epoch": 0.65, "grad_norm": 0.5167346000671387, "learning_rate": 0.000533938640493356, "loss": 3.1199, "step": 13181 }, { "epoch": 0.65, "grad_norm": 0.5306900143623352, "learning_rate": 0.0005339290016409797, "loss": 3.3397, "step": 13182 }, { "epoch": 0.65, "grad_norm": 0.5225565433502197, "learning_rate": 0.0005339193621724795, "loss": 3.1148, "step": 13183 }, { "epoch": 0.65, "grad_norm": 0.5168359875679016, "learning_rate": 0.0005339097220878808, "loss": 3.0261, "step": 13184 }, { "epoch": 0.65, "grad_norm": 0.5109102725982666, "learning_rate": 0.0005339000813872088, "loss": 3.1654, "step": 13185 }, { "epoch": 0.65, "grad_norm": 0.5324611663818359, "learning_rate": 0.0005338904400704891, "loss": 3.291, "step": 13186 }, { "epoch": 0.65, "grad_norm": 0.5554729104042053, "learning_rate": 0.0005338807981377469, "loss": 3.4504, "step": 13187 }, { "epoch": 0.65, "grad_norm": 0.5127044320106506, "learning_rate": 0.0005338711555890077, "loss": 3.3947, "step": 13188 }, { "epoch": 0.65, "grad_norm": 0.5299892425537109, "learning_rate": 0.000533861512424297, "loss": 3.148, "step": 13189 }, { "epoch": 0.65, "grad_norm": 0.5451264381408691, "learning_rate": 0.0005338518686436399, "loss": 3.4564, "step": 13190 }, { "epoch": 0.65, "grad_norm": 0.5090279579162598, "learning_rate": 0.0005338422242470621, "loss": 3.114, "step": 13191 }, { "epoch": 0.65, "grad_norm": 0.519496500492096, "learning_rate": 0.0005338325792345888, "loss": 2.9807, "step": 13192 }, { "epoch": 0.65, "grad_norm": 0.5295326113700867, "learning_rate": 0.0005338229336062456, "loss": 3.3167, "step": 13193 }, { "epoch": 0.65, "grad_norm": 0.5315036177635193, "learning_rate": 0.0005338132873620577, "loss": 3.0778, "step": 13194 }, { "epoch": 0.65, "grad_norm": 0.5226171016693115, "learning_rate": 0.0005338036405020507, "loss": 3.1866, "step": 13195 }, { "epoch": 0.65, "grad_norm": 0.49867236614227295, "learning_rate": 0.0005337939930262497, "loss": 3.1654, "step": 13196 }, { "epoch": 0.65, "grad_norm": 0.5161440372467041, "learning_rate": 0.0005337843449346807, "loss": 3.1951, "step": 13197 }, { "epoch": 0.65, "grad_norm": 0.49722862243652344, "learning_rate": 0.0005337746962273684, "loss": 3.2434, "step": 13198 }, { "epoch": 0.65, "grad_norm": 0.5370376706123352, "learning_rate": 0.0005337650469043387, "loss": 3.2291, "step": 13199 }, { "epoch": 0.65, "grad_norm": 0.5049730539321899, "learning_rate": 0.0005337553969656166, "loss": 3.291, "step": 13200 }, { "epoch": 0.65, "grad_norm": 0.5508888363838196, "learning_rate": 0.000533745746411228, "loss": 3.1669, "step": 13201 }, { "epoch": 0.65, "grad_norm": 0.49294355511665344, "learning_rate": 0.0005337360952411981, "loss": 3.0508, "step": 13202 }, { "epoch": 0.65, "grad_norm": 0.5044154524803162, "learning_rate": 0.0005337264434555522, "loss": 3.2915, "step": 13203 }, { "epoch": 0.65, "grad_norm": 0.5183781385421753, "learning_rate": 0.000533716791054316, "loss": 3.0519, "step": 13204 }, { "epoch": 0.65, "grad_norm": 0.559165358543396, "learning_rate": 0.0005337071380375146, "loss": 3.2492, "step": 13205 }, { "epoch": 0.65, "grad_norm": 0.5487591624259949, "learning_rate": 0.0005336974844051736, "loss": 3.2115, "step": 13206 }, { "epoch": 0.65, "grad_norm": 0.5180480480194092, "learning_rate": 0.0005336878301573185, "loss": 3.2974, "step": 13207 }, { "epoch": 0.65, "grad_norm": 0.4945024251937866, "learning_rate": 0.0005336781752939744, "loss": 3.1486, "step": 13208 }, { "epoch": 0.65, "grad_norm": 0.4666735529899597, "learning_rate": 0.0005336685198151672, "loss": 3.1456, "step": 13209 }, { "epoch": 0.65, "grad_norm": 0.5270425081253052, "learning_rate": 0.0005336588637209219, "loss": 3.1666, "step": 13210 }, { "epoch": 0.65, "grad_norm": 0.5256056785583496, "learning_rate": 0.0005336492070112642, "loss": 3.1653, "step": 13211 }, { "epoch": 0.65, "grad_norm": 0.4974687695503235, "learning_rate": 0.0005336395496862195, "loss": 3.209, "step": 13212 }, { "epoch": 0.65, "grad_norm": 0.5208483934402466, "learning_rate": 0.0005336298917458131, "loss": 3.1261, "step": 13213 }, { "epoch": 0.65, "grad_norm": 0.5191110372543335, "learning_rate": 0.0005336202331900707, "loss": 3.0473, "step": 13214 }, { "epoch": 0.65, "grad_norm": 0.5726637244224548, "learning_rate": 0.0005336105740190175, "loss": 2.9428, "step": 13215 }, { "epoch": 0.65, "grad_norm": 0.520617663860321, "learning_rate": 0.000533600914232679, "loss": 3.0333, "step": 13216 }, { "epoch": 0.65, "grad_norm": 0.51925128698349, "learning_rate": 0.0005335912538310805, "loss": 3.0626, "step": 13217 }, { "epoch": 0.65, "grad_norm": 0.5106421113014221, "learning_rate": 0.0005335815928142478, "loss": 3.2478, "step": 13218 }, { "epoch": 0.65, "grad_norm": 0.4910167455673218, "learning_rate": 0.000533571931182206, "loss": 3.4039, "step": 13219 }, { "epoch": 0.65, "grad_norm": 0.5045503973960876, "learning_rate": 0.0005335622689349807, "loss": 3.1105, "step": 13220 }, { "epoch": 0.65, "grad_norm": 0.53183513879776, "learning_rate": 0.0005335526060725975, "loss": 3.3134, "step": 13221 }, { "epoch": 0.65, "grad_norm": 0.5390097498893738, "learning_rate": 0.0005335429425950814, "loss": 3.1459, "step": 13222 }, { "epoch": 0.65, "grad_norm": 0.53877192735672, "learning_rate": 0.0005335332785024583, "loss": 3.1017, "step": 13223 }, { "epoch": 0.65, "grad_norm": 0.5238897800445557, "learning_rate": 0.0005335236137947536, "loss": 3.1823, "step": 13224 }, { "epoch": 0.65, "grad_norm": 0.5160094499588013, "learning_rate": 0.0005335139484719925, "loss": 3.0258, "step": 13225 }, { "epoch": 0.65, "grad_norm": 0.512133002281189, "learning_rate": 0.0005335042825342005, "loss": 3.1001, "step": 13226 }, { "epoch": 0.65, "grad_norm": 0.4885791838169098, "learning_rate": 0.0005334946159814033, "loss": 3.3233, "step": 13227 }, { "epoch": 0.65, "grad_norm": 0.5112274289131165, "learning_rate": 0.0005334849488136263, "loss": 3.269, "step": 13228 }, { "epoch": 0.65, "grad_norm": 0.4955732524394989, "learning_rate": 0.0005334752810308948, "loss": 3.3469, "step": 13229 }, { "epoch": 0.65, "grad_norm": 0.5273498296737671, "learning_rate": 0.0005334656126332343, "loss": 3.2339, "step": 13230 }, { "epoch": 0.65, "grad_norm": 0.5362980961799622, "learning_rate": 0.0005334559436206702, "loss": 3.2441, "step": 13231 }, { "epoch": 0.65, "grad_norm": 0.5081391334533691, "learning_rate": 0.0005334462739932282, "loss": 3.2659, "step": 13232 }, { "epoch": 0.65, "grad_norm": 0.5640556812286377, "learning_rate": 0.0005334366037509337, "loss": 3.192, "step": 13233 }, { "epoch": 0.65, "grad_norm": 0.5298064351081848, "learning_rate": 0.000533426932893812, "loss": 3.1988, "step": 13234 }, { "epoch": 0.65, "grad_norm": 0.5060878396034241, "learning_rate": 0.0005334172614218887, "loss": 3.3089, "step": 13235 }, { "epoch": 0.65, "grad_norm": 0.5255646705627441, "learning_rate": 0.0005334075893351893, "loss": 3.212, "step": 13236 }, { "epoch": 0.65, "grad_norm": 0.5083935856819153, "learning_rate": 0.0005333979166337393, "loss": 3.2419, "step": 13237 }, { "epoch": 0.65, "grad_norm": 0.5213572382926941, "learning_rate": 0.000533388243317564, "loss": 3.3522, "step": 13238 }, { "epoch": 0.65, "grad_norm": 0.495538592338562, "learning_rate": 0.000533378569386689, "loss": 3.3924, "step": 13239 }, { "epoch": 0.65, "grad_norm": 0.5392500758171082, "learning_rate": 0.0005333688948411398, "loss": 2.9577, "step": 13240 }, { "epoch": 0.65, "grad_norm": 0.5462858080863953, "learning_rate": 0.0005333592196809418, "loss": 3.1583, "step": 13241 }, { "epoch": 0.65, "grad_norm": 0.584762454032898, "learning_rate": 0.0005333495439061206, "loss": 3.1343, "step": 13242 }, { "epoch": 0.65, "grad_norm": 0.520191490650177, "learning_rate": 0.0005333398675167015, "loss": 3.3106, "step": 13243 }, { "epoch": 0.65, "grad_norm": 0.5066478252410889, "learning_rate": 0.0005333301905127101, "loss": 3.3081, "step": 13244 }, { "epoch": 0.65, "grad_norm": 0.5514070391654968, "learning_rate": 0.000533320512894172, "loss": 3.1183, "step": 13245 }, { "epoch": 0.65, "grad_norm": 0.5034546256065369, "learning_rate": 0.0005333108346611124, "loss": 3.0596, "step": 13246 }, { "epoch": 0.65, "grad_norm": 0.5097323656082153, "learning_rate": 0.0005333011558135572, "loss": 3.1469, "step": 13247 }, { "epoch": 0.65, "grad_norm": 0.5640053153038025, "learning_rate": 0.0005332914763515314, "loss": 3.1606, "step": 13248 }, { "epoch": 0.65, "grad_norm": 0.5292096138000488, "learning_rate": 0.0005332817962750609, "loss": 3.1932, "step": 13249 }, { "epoch": 0.65, "grad_norm": 0.4695495367050171, "learning_rate": 0.0005332721155841711, "loss": 3.147, "step": 13250 }, { "epoch": 0.65, "grad_norm": 0.512503445148468, "learning_rate": 0.0005332624342788873, "loss": 3.1208, "step": 13251 }, { "epoch": 0.65, "grad_norm": 0.5124956965446472, "learning_rate": 0.0005332527523592353, "loss": 3.1443, "step": 13252 }, { "epoch": 0.65, "grad_norm": 0.5241851210594177, "learning_rate": 0.0005332430698252403, "loss": 3.1076, "step": 13253 }, { "epoch": 0.65, "grad_norm": 0.5366250872612, "learning_rate": 0.0005332333866769279, "loss": 3.1135, "step": 13254 }, { "epoch": 0.65, "grad_norm": 0.506049394607544, "learning_rate": 0.0005332237029143238, "loss": 3.411, "step": 13255 }, { "epoch": 0.65, "grad_norm": 0.5145257711410522, "learning_rate": 0.0005332140185374532, "loss": 3.2594, "step": 13256 }, { "epoch": 0.65, "grad_norm": 0.5252572894096375, "learning_rate": 0.0005332043335463419, "loss": 3.2745, "step": 13257 }, { "epoch": 0.65, "grad_norm": 0.4891444146633148, "learning_rate": 0.0005331946479410152, "loss": 3.124, "step": 13258 }, { "epoch": 0.65, "grad_norm": 0.569198727607727, "learning_rate": 0.0005331849617214987, "loss": 3.3658, "step": 13259 }, { "epoch": 0.65, "grad_norm": 0.627887487411499, "learning_rate": 0.0005331752748878179, "loss": 3.0741, "step": 13260 }, { "epoch": 0.65, "grad_norm": 0.5502414703369141, "learning_rate": 0.0005331655874399982, "loss": 3.1345, "step": 13261 }, { "epoch": 0.65, "grad_norm": 0.5486642122268677, "learning_rate": 0.0005331558993780653, "loss": 3.1981, "step": 13262 }, { "epoch": 0.65, "grad_norm": 0.5511811375617981, "learning_rate": 0.0005331462107020446, "loss": 3.1109, "step": 13263 }, { "epoch": 0.65, "grad_norm": 0.5167766809463501, "learning_rate": 0.0005331365214119617, "loss": 3.2288, "step": 13264 }, { "epoch": 0.65, "grad_norm": 0.5129167437553406, "learning_rate": 0.000533126831507842, "loss": 3.3583, "step": 13265 }, { "epoch": 0.65, "grad_norm": 0.5186481475830078, "learning_rate": 0.0005331171409897112, "loss": 3.18, "step": 13266 }, { "epoch": 0.65, "grad_norm": 0.5730112195014954, "learning_rate": 0.0005331074498575946, "loss": 3.0874, "step": 13267 }, { "epoch": 0.65, "grad_norm": 0.5106200575828552, "learning_rate": 0.000533097758111518, "loss": 3.2888, "step": 13268 }, { "epoch": 0.65, "grad_norm": 0.5146166682243347, "learning_rate": 0.0005330880657515066, "loss": 3.0715, "step": 13269 }, { "epoch": 0.65, "grad_norm": 0.49323368072509766, "learning_rate": 0.0005330783727775861, "loss": 3.1591, "step": 13270 }, { "epoch": 0.65, "grad_norm": 0.5173508524894714, "learning_rate": 0.000533068679189782, "loss": 3.092, "step": 13271 }, { "epoch": 0.65, "grad_norm": 0.51826411485672, "learning_rate": 0.00053305898498812, "loss": 3.228, "step": 13272 }, { "epoch": 0.65, "grad_norm": 0.5417203307151794, "learning_rate": 0.0005330492901726255, "loss": 3.1268, "step": 13273 }, { "epoch": 0.65, "grad_norm": 0.5598623752593994, "learning_rate": 0.0005330395947433238, "loss": 3.1918, "step": 13274 }, { "epoch": 0.65, "grad_norm": 0.5296295881271362, "learning_rate": 0.0005330298987002408, "loss": 3.1156, "step": 13275 }, { "epoch": 0.65, "grad_norm": 0.4983462989330292, "learning_rate": 0.0005330202020434019, "loss": 3.0815, "step": 13276 }, { "epoch": 0.65, "grad_norm": 0.604824423789978, "learning_rate": 0.0005330105047728326, "loss": 3.241, "step": 13277 }, { "epoch": 0.65, "grad_norm": 0.5088874101638794, "learning_rate": 0.0005330008068885585, "loss": 3.255, "step": 13278 }, { "epoch": 0.65, "grad_norm": 0.5178427696228027, "learning_rate": 0.0005329911083906051, "loss": 3.2774, "step": 13279 }, { "epoch": 0.65, "grad_norm": 0.510136604309082, "learning_rate": 0.0005329814092789979, "loss": 3.4761, "step": 13280 }, { "epoch": 0.65, "grad_norm": 0.48275598883628845, "learning_rate": 0.0005329717095537627, "loss": 3.2948, "step": 13281 }, { "epoch": 0.65, "grad_norm": 0.5280422568321228, "learning_rate": 0.0005329620092149247, "loss": 3.0466, "step": 13282 }, { "epoch": 0.65, "grad_norm": 0.5315813422203064, "learning_rate": 0.0005329523082625097, "loss": 3.0598, "step": 13283 }, { "epoch": 0.65, "grad_norm": 0.49926483631134033, "learning_rate": 0.0005329426066965431, "loss": 3.2156, "step": 13284 }, { "epoch": 0.65, "grad_norm": 0.5536932349205017, "learning_rate": 0.0005329329045170503, "loss": 3.3147, "step": 13285 }, { "epoch": 0.65, "grad_norm": 0.5208083391189575, "learning_rate": 0.0005329232017240573, "loss": 3.1502, "step": 13286 }, { "epoch": 0.65, "grad_norm": 0.5383280515670776, "learning_rate": 0.0005329134983175894, "loss": 3.2438, "step": 13287 }, { "epoch": 0.65, "grad_norm": 0.5244266986846924, "learning_rate": 0.0005329037942976721, "loss": 3.4281, "step": 13288 }, { "epoch": 0.65, "grad_norm": 0.47546955943107605, "learning_rate": 0.0005328940896643311, "loss": 3.2289, "step": 13289 }, { "epoch": 0.65, "grad_norm": 0.5083588361740112, "learning_rate": 0.0005328843844175918, "loss": 3.138, "step": 13290 }, { "epoch": 0.65, "grad_norm": 0.522491455078125, "learning_rate": 0.0005328746785574799, "loss": 3.1468, "step": 13291 }, { "epoch": 0.65, "grad_norm": 0.49923422932624817, "learning_rate": 0.0005328649720840209, "loss": 3.1271, "step": 13292 }, { "epoch": 0.65, "grad_norm": 0.5488773584365845, "learning_rate": 0.0005328552649972405, "loss": 3.1722, "step": 13293 }, { "epoch": 0.65, "grad_norm": 0.5081570744514465, "learning_rate": 0.0005328455572971639, "loss": 3.3069, "step": 13294 }, { "epoch": 0.65, "grad_norm": 0.48642170429229736, "learning_rate": 0.0005328358489838171, "loss": 3.3906, "step": 13295 }, { "epoch": 0.65, "grad_norm": 0.5340069532394409, "learning_rate": 0.0005328261400572254, "loss": 3.1015, "step": 13296 }, { "epoch": 0.65, "grad_norm": 0.5175917744636536, "learning_rate": 0.0005328164305174146, "loss": 3.2171, "step": 13297 }, { "epoch": 0.65, "grad_norm": 0.5808233022689819, "learning_rate": 0.00053280672036441, "loss": 3.2978, "step": 13298 }, { "epoch": 0.65, "grad_norm": 0.4955079257488251, "learning_rate": 0.0005327970095982372, "loss": 3.1519, "step": 13299 }, { "epoch": 0.65, "grad_norm": 0.6096027493476868, "learning_rate": 0.0005327872982189221, "loss": 3.2015, "step": 13300 }, { "epoch": 0.65, "grad_norm": 0.5957887172698975, "learning_rate": 0.0005327775862264899, "loss": 3.286, "step": 13301 }, { "epoch": 0.65, "grad_norm": 0.5077028274536133, "learning_rate": 0.0005327678736209664, "loss": 3.1815, "step": 13302 }, { "epoch": 0.65, "grad_norm": 0.5380714535713196, "learning_rate": 0.0005327581604023772, "loss": 3.4044, "step": 13303 }, { "epoch": 0.65, "grad_norm": 0.6547355651855469, "learning_rate": 0.0005327484465707477, "loss": 3.2944, "step": 13304 }, { "epoch": 0.65, "grad_norm": 0.5356435179710388, "learning_rate": 0.0005327387321261035, "loss": 2.9592, "step": 13305 }, { "epoch": 0.65, "grad_norm": 0.48445892333984375, "learning_rate": 0.0005327290170684705, "loss": 3.4262, "step": 13306 }, { "epoch": 0.65, "grad_norm": 0.5008205771446228, "learning_rate": 0.0005327193013978739, "loss": 3.0121, "step": 13307 }, { "epoch": 0.65, "grad_norm": 0.5113664269447327, "learning_rate": 0.0005327095851143394, "loss": 3.3788, "step": 13308 }, { "epoch": 0.65, "grad_norm": 0.5016259551048279, "learning_rate": 0.0005326998682178927, "loss": 3.0934, "step": 13309 }, { "epoch": 0.65, "grad_norm": 0.48941028118133545, "learning_rate": 0.0005326901507085594, "loss": 3.364, "step": 13310 }, { "epoch": 0.65, "grad_norm": 0.49986913800239563, "learning_rate": 0.000532680432586365, "loss": 3.3927, "step": 13311 }, { "epoch": 0.65, "grad_norm": 0.5154104828834534, "learning_rate": 0.0005326707138513351, "loss": 3.3246, "step": 13312 }, { "epoch": 0.65, "grad_norm": 0.5213533639907837, "learning_rate": 0.0005326609945034953, "loss": 3.1087, "step": 13313 }, { "epoch": 0.65, "grad_norm": 0.5237452387809753, "learning_rate": 0.0005326512745428713, "loss": 3.347, "step": 13314 }, { "epoch": 0.65, "grad_norm": 0.49681347608566284, "learning_rate": 0.0005326415539694885, "loss": 3.1432, "step": 13315 }, { "epoch": 0.65, "grad_norm": 0.5009694695472717, "learning_rate": 0.0005326318327833726, "loss": 3.307, "step": 13316 }, { "epoch": 0.65, "grad_norm": 0.5398823618888855, "learning_rate": 0.0005326221109845493, "loss": 3.1889, "step": 13317 }, { "epoch": 0.65, "grad_norm": 0.4797592759132385, "learning_rate": 0.0005326123885730441, "loss": 3.2475, "step": 13318 }, { "epoch": 0.65, "grad_norm": 0.5444583296775818, "learning_rate": 0.0005326026655488827, "loss": 3.0342, "step": 13319 }, { "epoch": 0.65, "grad_norm": 0.5243876576423645, "learning_rate": 0.0005325929419120906, "loss": 3.2708, "step": 13320 }, { "epoch": 0.65, "grad_norm": 0.46254366636276245, "learning_rate": 0.0005325832176626934, "loss": 2.979, "step": 13321 }, { "epoch": 0.65, "grad_norm": 0.5210396647453308, "learning_rate": 0.0005325734928007168, "loss": 3.1684, "step": 13322 }, { "epoch": 0.65, "grad_norm": 0.5120729207992554, "learning_rate": 0.0005325637673261864, "loss": 3.1615, "step": 13323 }, { "epoch": 0.65, "grad_norm": 0.5329714417457581, "learning_rate": 0.0005325540412391279, "loss": 3.3009, "step": 13324 }, { "epoch": 0.65, "grad_norm": 0.5156061053276062, "learning_rate": 0.0005325443145395666, "loss": 3.1694, "step": 13325 }, { "epoch": 0.65, "grad_norm": 0.5308995842933655, "learning_rate": 0.0005325345872275285, "loss": 3.163, "step": 13326 }, { "epoch": 0.65, "grad_norm": 0.5136690735816956, "learning_rate": 0.0005325248593030389, "loss": 3.5507, "step": 13327 }, { "epoch": 0.65, "grad_norm": 0.5340861082077026, "learning_rate": 0.0005325151307661237, "loss": 3.4215, "step": 13328 }, { "epoch": 0.65, "grad_norm": 0.6588315367698669, "learning_rate": 0.0005325054016168083, "loss": 3.4133, "step": 13329 }, { "epoch": 0.65, "grad_norm": 0.49800702929496765, "learning_rate": 0.0005324956718551185, "loss": 3.2623, "step": 13330 }, { "epoch": 0.65, "grad_norm": 0.5641326308250427, "learning_rate": 0.0005324859414810798, "loss": 3.0368, "step": 13331 }, { "epoch": 0.65, "grad_norm": 0.49139222502708435, "learning_rate": 0.0005324762104947179, "loss": 3.432, "step": 13332 }, { "epoch": 0.65, "grad_norm": 0.5017745494842529, "learning_rate": 0.0005324664788960583, "loss": 3.1099, "step": 13333 }, { "epoch": 0.65, "grad_norm": 0.5510955452919006, "learning_rate": 0.0005324567466851269, "loss": 3.08, "step": 13334 }, { "epoch": 0.65, "grad_norm": 0.5165101885795593, "learning_rate": 0.0005324470138619492, "loss": 3.2788, "step": 13335 }, { "epoch": 0.65, "grad_norm": 0.5383206605911255, "learning_rate": 0.0005324372804265505, "loss": 3.1441, "step": 13336 }, { "epoch": 0.65, "grad_norm": 0.5414730906486511, "learning_rate": 0.000532427546378957, "loss": 3.2794, "step": 13337 }, { "epoch": 0.65, "grad_norm": 0.5007972717285156, "learning_rate": 0.000532417811719194, "loss": 3.2736, "step": 13338 }, { "epoch": 0.65, "grad_norm": 0.543908953666687, "learning_rate": 0.0005324080764472871, "loss": 2.9989, "step": 13339 }, { "epoch": 0.65, "grad_norm": 0.5582568049430847, "learning_rate": 0.0005323983405632623, "loss": 3.1517, "step": 13340 }, { "epoch": 0.65, "grad_norm": 0.4982832968235016, "learning_rate": 0.0005323886040671448, "loss": 3.2608, "step": 13341 }, { "epoch": 0.65, "grad_norm": 0.49134111404418945, "learning_rate": 0.0005323788669589606, "loss": 3.1525, "step": 13342 }, { "epoch": 0.65, "grad_norm": 0.5180835723876953, "learning_rate": 0.000532369129238735, "loss": 3.0439, "step": 13343 }, { "epoch": 0.65, "grad_norm": 0.49943089485168457, "learning_rate": 0.000532359390906494, "loss": 3.311, "step": 13344 }, { "epoch": 0.65, "grad_norm": 0.5632797479629517, "learning_rate": 0.0005323496519622629, "loss": 3.1605, "step": 13345 }, { "epoch": 0.65, "grad_norm": 0.4958808720111847, "learning_rate": 0.0005323399124060677, "loss": 3.3853, "step": 13346 }, { "epoch": 0.65, "grad_norm": 0.5369716286659241, "learning_rate": 0.0005323301722379338, "loss": 3.6639, "step": 13347 }, { "epoch": 0.65, "grad_norm": 0.5046685934066772, "learning_rate": 0.000532320431457887, "loss": 3.2543, "step": 13348 }, { "epoch": 0.65, "grad_norm": 0.528698205947876, "learning_rate": 0.0005323106900659529, "loss": 2.9939, "step": 13349 }, { "epoch": 0.65, "grad_norm": 0.5998996496200562, "learning_rate": 0.0005323009480621571, "loss": 3.2342, "step": 13350 }, { "epoch": 0.65, "grad_norm": 0.5694158673286438, "learning_rate": 0.0005322912054465253, "loss": 3.0568, "step": 13351 }, { "epoch": 0.65, "grad_norm": 0.4973612129688263, "learning_rate": 0.0005322814622190831, "loss": 3.1143, "step": 13352 }, { "epoch": 0.65, "grad_norm": 0.5280294418334961, "learning_rate": 0.0005322717183798564, "loss": 3.1217, "step": 13353 }, { "epoch": 0.65, "grad_norm": 0.507739245891571, "learning_rate": 0.0005322619739288706, "loss": 3.0354, "step": 13354 }, { "epoch": 0.65, "grad_norm": 0.5100513696670532, "learning_rate": 0.0005322522288661515, "loss": 3.0251, "step": 13355 }, { "epoch": 0.65, "grad_norm": 0.49843087792396545, "learning_rate": 0.0005322424831917247, "loss": 3.152, "step": 13356 }, { "epoch": 0.65, "grad_norm": 0.5560824275016785, "learning_rate": 0.000532232736905616, "loss": 3.3028, "step": 13357 }, { "epoch": 0.65, "grad_norm": 0.5524668097496033, "learning_rate": 0.0005322229900078507, "loss": 3.2832, "step": 13358 }, { "epoch": 0.65, "grad_norm": 0.5191108584403992, "learning_rate": 0.0005322132424984549, "loss": 3.136, "step": 13359 }, { "epoch": 0.65, "grad_norm": 0.49663254618644714, "learning_rate": 0.0005322034943774542, "loss": 3.0543, "step": 13360 }, { "epoch": 0.65, "grad_norm": 0.5162781476974487, "learning_rate": 0.0005321937456448741, "loss": 3.2929, "step": 13361 }, { "epoch": 0.65, "grad_norm": 0.5527589321136475, "learning_rate": 0.0005321839963007402, "loss": 3.4472, "step": 13362 }, { "epoch": 0.65, "grad_norm": 0.539014458656311, "learning_rate": 0.0005321742463450786, "loss": 3.2797, "step": 13363 }, { "epoch": 0.65, "grad_norm": 0.5228904485702515, "learning_rate": 0.0005321644957779146, "loss": 3.2703, "step": 13364 }, { "epoch": 0.65, "grad_norm": 0.5191890597343445, "learning_rate": 0.000532154744599274, "loss": 3.118, "step": 13365 }, { "epoch": 0.66, "grad_norm": 0.5481828451156616, "learning_rate": 0.0005321449928091825, "loss": 3.2102, "step": 13366 }, { "epoch": 0.66, "grad_norm": 0.5154120326042175, "learning_rate": 0.0005321352404076659, "loss": 3.273, "step": 13367 }, { "epoch": 0.66, "grad_norm": 0.5242748260498047, "learning_rate": 0.0005321254873947495, "loss": 3.2006, "step": 13368 }, { "epoch": 0.66, "grad_norm": 0.5365203619003296, "learning_rate": 0.0005321157337704594, "loss": 2.8698, "step": 13369 }, { "epoch": 0.66, "grad_norm": 0.5272378325462341, "learning_rate": 0.000532105979534821, "loss": 3.1841, "step": 13370 }, { "epoch": 0.66, "grad_norm": 0.5126152634620667, "learning_rate": 0.0005320962246878602, "loss": 3.1408, "step": 13371 }, { "epoch": 0.66, "grad_norm": 0.5221778154373169, "learning_rate": 0.0005320864692296026, "loss": 3.2598, "step": 13372 }, { "epoch": 0.66, "grad_norm": 0.5323341488838196, "learning_rate": 0.000532076713160074, "loss": 3.3795, "step": 13373 }, { "epoch": 0.66, "grad_norm": 0.5273595452308655, "learning_rate": 0.0005320669564792999, "loss": 3.5628, "step": 13374 }, { "epoch": 0.66, "grad_norm": 0.5162096619606018, "learning_rate": 0.0005320571991873061, "loss": 3.1631, "step": 13375 }, { "epoch": 0.66, "grad_norm": 0.49325329065322876, "learning_rate": 0.0005320474412841183, "loss": 3.4696, "step": 13376 }, { "epoch": 0.66, "grad_norm": 0.5000008344650269, "learning_rate": 0.0005320376827697622, "loss": 3.1889, "step": 13377 }, { "epoch": 0.66, "grad_norm": 0.5023176074028015, "learning_rate": 0.0005320279236442635, "loss": 3.2494, "step": 13378 }, { "epoch": 0.66, "grad_norm": 0.4893326759338379, "learning_rate": 0.000532018163907648, "loss": 3.1743, "step": 13379 }, { "epoch": 0.66, "grad_norm": 0.5529983639717102, "learning_rate": 0.0005320084035599413, "loss": 3.0792, "step": 13380 }, { "epoch": 0.66, "grad_norm": 0.5161319375038147, "learning_rate": 0.000531998642601169, "loss": 3.2193, "step": 13381 }, { "epoch": 0.66, "grad_norm": 0.4804127812385559, "learning_rate": 0.000531988881031357, "loss": 3.3955, "step": 13382 }, { "epoch": 0.66, "grad_norm": 0.5218368172645569, "learning_rate": 0.0005319791188505309, "loss": 3.2941, "step": 13383 }, { "epoch": 0.66, "grad_norm": 0.5038884878158569, "learning_rate": 0.0005319693560587164, "loss": 3.384, "step": 13384 }, { "epoch": 0.66, "grad_norm": 0.503178060054779, "learning_rate": 0.0005319595926559392, "loss": 3.2356, "step": 13385 }, { "epoch": 0.66, "grad_norm": 0.48960548639297485, "learning_rate": 0.0005319498286422252, "loss": 3.1029, "step": 13386 }, { "epoch": 0.66, "grad_norm": 0.5522580146789551, "learning_rate": 0.0005319400640176, "loss": 3.2378, "step": 13387 }, { "epoch": 0.66, "grad_norm": 0.5147445201873779, "learning_rate": 0.0005319302987820894, "loss": 3.3786, "step": 13388 }, { "epoch": 0.66, "grad_norm": 0.5028669238090515, "learning_rate": 0.0005319205329357188, "loss": 3.2985, "step": 13389 }, { "epoch": 0.66, "grad_norm": 0.5062679648399353, "learning_rate": 0.0005319107664785144, "loss": 3.2392, "step": 13390 }, { "epoch": 0.66, "grad_norm": 0.5048500895500183, "learning_rate": 0.0005319009994105014, "loss": 3.3131, "step": 13391 }, { "epoch": 0.66, "grad_norm": 0.5031437277793884, "learning_rate": 0.000531891231731706, "loss": 3.485, "step": 13392 }, { "epoch": 0.66, "grad_norm": 0.5089054703712463, "learning_rate": 0.0005318814634421537, "loss": 3.082, "step": 13393 }, { "epoch": 0.66, "grad_norm": 0.5406572222709656, "learning_rate": 0.0005318716945418701, "loss": 3.1211, "step": 13394 }, { "epoch": 0.66, "grad_norm": 0.5037758350372314, "learning_rate": 0.0005318619250308812, "loss": 3.055, "step": 13395 }, { "epoch": 0.66, "grad_norm": 0.48916196823120117, "learning_rate": 0.0005318521549092126, "loss": 3.3232, "step": 13396 }, { "epoch": 0.66, "grad_norm": 0.5082511305809021, "learning_rate": 0.0005318423841768901, "loss": 3.4669, "step": 13397 }, { "epoch": 0.66, "grad_norm": 0.5099027156829834, "learning_rate": 0.0005318326128339393, "loss": 3.3092, "step": 13398 }, { "epoch": 0.66, "grad_norm": 0.5375298261642456, "learning_rate": 0.0005318228408803861, "loss": 3.4794, "step": 13399 }, { "epoch": 0.66, "grad_norm": 0.5250174403190613, "learning_rate": 0.0005318130683162561, "loss": 3.2933, "step": 13400 }, { "epoch": 0.66, "grad_norm": 0.5176973938941956, "learning_rate": 0.0005318032951415751, "loss": 3.2749, "step": 13401 }, { "epoch": 0.66, "grad_norm": 0.5219639539718628, "learning_rate": 0.0005317935213563687, "loss": 3.4369, "step": 13402 }, { "epoch": 0.66, "grad_norm": 0.49903932213783264, "learning_rate": 0.000531783746960663, "loss": 3.1772, "step": 13403 }, { "epoch": 0.66, "grad_norm": 0.5053322911262512, "learning_rate": 0.0005317739719544834, "loss": 3.415, "step": 13404 }, { "epoch": 0.66, "grad_norm": 0.5507119297981262, "learning_rate": 0.0005317641963378557, "loss": 3.0963, "step": 13405 }, { "epoch": 0.66, "grad_norm": 0.5222508311271667, "learning_rate": 0.0005317544201108058, "loss": 3.2718, "step": 13406 }, { "epoch": 0.66, "grad_norm": 0.509367048740387, "learning_rate": 0.0005317446432733594, "loss": 3.0838, "step": 13407 }, { "epoch": 0.66, "grad_norm": 0.5182288885116577, "learning_rate": 0.0005317348658255421, "loss": 3.1657, "step": 13408 }, { "epoch": 0.66, "grad_norm": 0.5248708128929138, "learning_rate": 0.0005317250877673798, "loss": 3.3512, "step": 13409 }, { "epoch": 0.66, "grad_norm": 0.5264621376991272, "learning_rate": 0.0005317153090988983, "loss": 3.1901, "step": 13410 }, { "epoch": 0.66, "grad_norm": 0.5019949078559875, "learning_rate": 0.0005317055298201232, "loss": 3.4335, "step": 13411 }, { "epoch": 0.66, "grad_norm": 0.5084396004676819, "learning_rate": 0.0005316957499310802, "loss": 3.3224, "step": 13412 }, { "epoch": 0.66, "grad_norm": 0.5858707427978516, "learning_rate": 0.0005316859694317954, "loss": 3.0917, "step": 13413 }, { "epoch": 0.66, "grad_norm": 0.5144307613372803, "learning_rate": 0.0005316761883222943, "loss": 3.4099, "step": 13414 }, { "epoch": 0.66, "grad_norm": 0.5141854286193848, "learning_rate": 0.0005316664066026026, "loss": 3.2635, "step": 13415 }, { "epoch": 0.66, "grad_norm": 0.5495926737785339, "learning_rate": 0.0005316566242727463, "loss": 3.3313, "step": 13416 }, { "epoch": 0.66, "grad_norm": 0.5578013062477112, "learning_rate": 0.000531646841332751, "loss": 3.4108, "step": 13417 }, { "epoch": 0.66, "grad_norm": 0.5008851885795593, "learning_rate": 0.0005316370577826424, "loss": 3.2526, "step": 13418 }, { "epoch": 0.66, "grad_norm": 0.5154222846031189, "learning_rate": 0.0005316272736224464, "loss": 3.2379, "step": 13419 }, { "epoch": 0.66, "grad_norm": 0.527387261390686, "learning_rate": 0.0005316174888521888, "loss": 3.3232, "step": 13420 }, { "epoch": 0.66, "grad_norm": 0.5717757344245911, "learning_rate": 0.0005316077034718952, "loss": 3.1741, "step": 13421 }, { "epoch": 0.66, "grad_norm": 0.5230823159217834, "learning_rate": 0.0005315979174815916, "loss": 3.273, "step": 13422 }, { "epoch": 0.66, "grad_norm": 0.521920382976532, "learning_rate": 0.0005315881308813038, "loss": 3.2103, "step": 13423 }, { "epoch": 0.66, "grad_norm": 0.5073776245117188, "learning_rate": 0.0005315783436710572, "loss": 3.25, "step": 13424 }, { "epoch": 0.66, "grad_norm": 0.5398444533348083, "learning_rate": 0.0005315685558508779, "loss": 3.1903, "step": 13425 }, { "epoch": 0.66, "grad_norm": 0.4858630299568176, "learning_rate": 0.0005315587674207914, "loss": 3.2599, "step": 13426 }, { "epoch": 0.66, "grad_norm": 0.5114261507987976, "learning_rate": 0.0005315489783808239, "loss": 3.1523, "step": 13427 }, { "epoch": 0.66, "grad_norm": 0.5169461965560913, "learning_rate": 0.0005315391887310009, "loss": 3.1635, "step": 13428 }, { "epoch": 0.66, "grad_norm": 0.49738162755966187, "learning_rate": 0.0005315293984713482, "loss": 3.0964, "step": 13429 }, { "epoch": 0.66, "grad_norm": 0.5174451470375061, "learning_rate": 0.0005315196076018917, "loss": 3.2218, "step": 13430 }, { "epoch": 0.66, "grad_norm": 0.5436557531356812, "learning_rate": 0.000531509816122657, "loss": 3.0565, "step": 13431 }, { "epoch": 0.66, "grad_norm": 0.5209143161773682, "learning_rate": 0.0005315000240336702, "loss": 3.1762, "step": 13432 }, { "epoch": 0.66, "grad_norm": 0.5072414875030518, "learning_rate": 0.0005314902313349566, "loss": 3.4239, "step": 13433 }, { "epoch": 0.66, "grad_norm": 0.4978218674659729, "learning_rate": 0.0005314804380265425, "loss": 3.3271, "step": 13434 }, { "epoch": 0.66, "grad_norm": 0.5045718550682068, "learning_rate": 0.0005314706441084535, "loss": 3.0421, "step": 13435 }, { "epoch": 0.66, "grad_norm": 0.5093064904212952, "learning_rate": 0.0005314608495807151, "loss": 3.2584, "step": 13436 }, { "epoch": 0.66, "grad_norm": 0.5230816602706909, "learning_rate": 0.0005314510544433536, "loss": 3.1959, "step": 13437 }, { "epoch": 0.66, "grad_norm": 0.547500491142273, "learning_rate": 0.0005314412586963945, "loss": 3.1111, "step": 13438 }, { "epoch": 0.66, "grad_norm": 0.5420114398002625, "learning_rate": 0.0005314314623398637, "loss": 3.2622, "step": 13439 }, { "epoch": 0.66, "grad_norm": 0.5222113728523254, "learning_rate": 0.0005314216653737869, "loss": 3.1762, "step": 13440 }, { "epoch": 0.66, "grad_norm": 0.5479421615600586, "learning_rate": 0.00053141186779819, "loss": 3.0739, "step": 13441 }, { "epoch": 0.66, "grad_norm": 0.5246508121490479, "learning_rate": 0.0005314020696130989, "loss": 3.2307, "step": 13442 }, { "epoch": 0.66, "grad_norm": 0.5006526112556458, "learning_rate": 0.0005313922708185391, "loss": 3.3417, "step": 13443 }, { "epoch": 0.66, "grad_norm": 0.5077061653137207, "learning_rate": 0.0005313824714145367, "loss": 3.0799, "step": 13444 }, { "epoch": 0.66, "grad_norm": 0.4841029942035675, "learning_rate": 0.0005313726714011173, "loss": 3.1433, "step": 13445 }, { "epoch": 0.66, "grad_norm": 0.5092795491218567, "learning_rate": 0.000531362870778307, "loss": 3.2824, "step": 13446 }, { "epoch": 0.66, "grad_norm": 0.5002308487892151, "learning_rate": 0.0005313530695461313, "loss": 3.2313, "step": 13447 }, { "epoch": 0.66, "grad_norm": 0.5429970026016235, "learning_rate": 0.0005313432677046162, "loss": 3.2798, "step": 13448 }, { "epoch": 0.66, "grad_norm": 0.5142826437950134, "learning_rate": 0.0005313334652537873, "loss": 3.0374, "step": 13449 }, { "epoch": 0.66, "grad_norm": 0.5216500759124756, "learning_rate": 0.0005313236621936707, "loss": 3.401, "step": 13450 }, { "epoch": 0.66, "grad_norm": 0.5026384592056274, "learning_rate": 0.0005313138585242921, "loss": 3.2493, "step": 13451 }, { "epoch": 0.66, "grad_norm": 0.5115340352058411, "learning_rate": 0.0005313040542456772, "loss": 3.0881, "step": 13452 }, { "epoch": 0.66, "grad_norm": 0.5016939043998718, "learning_rate": 0.0005312942493578519, "loss": 3.0879, "step": 13453 }, { "epoch": 0.66, "grad_norm": 0.459194540977478, "learning_rate": 0.0005312844438608423, "loss": 2.9753, "step": 13454 }, { "epoch": 0.66, "grad_norm": 0.5059602856636047, "learning_rate": 0.0005312746377546739, "loss": 3.1455, "step": 13455 }, { "epoch": 0.66, "grad_norm": 0.49334749579429626, "learning_rate": 0.0005312648310393726, "loss": 3.1937, "step": 13456 }, { "epoch": 0.66, "grad_norm": 0.5341232419013977, "learning_rate": 0.0005312550237149641, "loss": 3.2977, "step": 13457 }, { "epoch": 0.66, "grad_norm": 0.5050813555717468, "learning_rate": 0.0005312452157814746, "loss": 3.3302, "step": 13458 }, { "epoch": 0.66, "grad_norm": 0.5001168251037598, "learning_rate": 0.0005312354072389296, "loss": 3.0763, "step": 13459 }, { "epoch": 0.66, "grad_norm": 0.5147970914840698, "learning_rate": 0.000531225598087355, "loss": 3.235, "step": 13460 }, { "epoch": 0.66, "grad_norm": 0.529277503490448, "learning_rate": 0.0005312157883267767, "loss": 3.2548, "step": 13461 }, { "epoch": 0.66, "grad_norm": 0.49942076206207275, "learning_rate": 0.0005312059779572205, "loss": 3.106, "step": 13462 }, { "epoch": 0.66, "grad_norm": 0.5275735259056091, "learning_rate": 0.0005311961669787124, "loss": 3.0986, "step": 13463 }, { "epoch": 0.66, "grad_norm": 0.5004652738571167, "learning_rate": 0.0005311863553912778, "loss": 3.2899, "step": 13464 }, { "epoch": 0.66, "grad_norm": 0.5354148149490356, "learning_rate": 0.0005311765431949431, "loss": 3.2583, "step": 13465 }, { "epoch": 0.66, "grad_norm": 0.5163960456848145, "learning_rate": 0.0005311667303897337, "loss": 3.1102, "step": 13466 }, { "epoch": 0.66, "grad_norm": 0.5226514935493469, "learning_rate": 0.0005311569169756757, "loss": 3.1032, "step": 13467 }, { "epoch": 0.66, "grad_norm": 0.5380831956863403, "learning_rate": 0.000531147102952795, "loss": 3.3771, "step": 13468 }, { "epoch": 0.66, "grad_norm": 0.5179706811904907, "learning_rate": 0.0005311372883211171, "loss": 3.2395, "step": 13469 }, { "epoch": 0.66, "grad_norm": 0.5135579705238342, "learning_rate": 0.0005311274730806681, "loss": 3.3656, "step": 13470 }, { "epoch": 0.66, "grad_norm": 0.500043511390686, "learning_rate": 0.0005311176572314739, "loss": 3.354, "step": 13471 }, { "epoch": 0.66, "grad_norm": 0.5214130282402039, "learning_rate": 0.0005311078407735602, "loss": 3.2447, "step": 13472 }, { "epoch": 0.66, "grad_norm": 0.5328757762908936, "learning_rate": 0.000531098023706953, "loss": 3.0472, "step": 13473 }, { "epoch": 0.66, "grad_norm": 0.49766138195991516, "learning_rate": 0.000531088206031678, "loss": 3.2254, "step": 13474 }, { "epoch": 0.66, "grad_norm": 0.5517077445983887, "learning_rate": 0.0005310783877477612, "loss": 3.3586, "step": 13475 }, { "epoch": 0.66, "grad_norm": 0.5423430800437927, "learning_rate": 0.0005310685688552284, "loss": 3.0909, "step": 13476 }, { "epoch": 0.66, "grad_norm": 0.519158124923706, "learning_rate": 0.0005310587493541054, "loss": 3.1953, "step": 13477 }, { "epoch": 0.66, "grad_norm": 0.5957249999046326, "learning_rate": 0.0005310489292444182, "loss": 3.1486, "step": 13478 }, { "epoch": 0.66, "grad_norm": 0.5332064032554626, "learning_rate": 0.0005310391085261926, "loss": 3.3535, "step": 13479 }, { "epoch": 0.66, "grad_norm": 0.5151717066764832, "learning_rate": 0.0005310292871994544, "loss": 3.2729, "step": 13480 }, { "epoch": 0.66, "grad_norm": 0.5478836297988892, "learning_rate": 0.0005310194652642295, "loss": 3.1139, "step": 13481 }, { "epoch": 0.66, "grad_norm": 0.5319055318832397, "learning_rate": 0.0005310096427205437, "loss": 3.1867, "step": 13482 }, { "epoch": 0.66, "grad_norm": 0.5295076966285706, "learning_rate": 0.0005309998195684231, "loss": 3.4644, "step": 13483 }, { "epoch": 0.66, "grad_norm": 0.5261130332946777, "learning_rate": 0.0005309899958078934, "loss": 3.1089, "step": 13484 }, { "epoch": 0.66, "grad_norm": 0.504030168056488, "learning_rate": 0.0005309801714389805, "loss": 3.1605, "step": 13485 }, { "epoch": 0.66, "grad_norm": 0.5086562037467957, "learning_rate": 0.0005309703464617103, "loss": 3.2354, "step": 13486 }, { "epoch": 0.66, "grad_norm": 0.49703478813171387, "learning_rate": 0.0005309605208761087, "loss": 3.2152, "step": 13487 }, { "epoch": 0.66, "grad_norm": 0.5037059783935547, "learning_rate": 0.0005309506946822015, "loss": 3.1068, "step": 13488 }, { "epoch": 0.66, "grad_norm": 0.49587151408195496, "learning_rate": 0.0005309408678800145, "loss": 3.2943, "step": 13489 }, { "epoch": 0.66, "grad_norm": 0.5075664520263672, "learning_rate": 0.0005309310404695739, "loss": 3.2823, "step": 13490 }, { "epoch": 0.66, "grad_norm": 0.5008799433708191, "learning_rate": 0.0005309212124509052, "loss": 3.1233, "step": 13491 }, { "epoch": 0.66, "grad_norm": 0.49640950560569763, "learning_rate": 0.0005309113838240346, "loss": 3.316, "step": 13492 }, { "epoch": 0.66, "grad_norm": 0.5615620613098145, "learning_rate": 0.0005309015545889878, "loss": 3.3059, "step": 13493 }, { "epoch": 0.66, "grad_norm": 0.5070755481719971, "learning_rate": 0.0005308917247457907, "loss": 3.2173, "step": 13494 }, { "epoch": 0.66, "grad_norm": 0.48888513445854187, "learning_rate": 0.0005308818942944693, "loss": 3.1687, "step": 13495 }, { "epoch": 0.66, "grad_norm": 0.5695043802261353, "learning_rate": 0.0005308720632350494, "loss": 3.2426, "step": 13496 }, { "epoch": 0.66, "grad_norm": 0.533906102180481, "learning_rate": 0.000530862231567557, "loss": 3.1716, "step": 13497 }, { "epoch": 0.66, "grad_norm": 0.5260034203529358, "learning_rate": 0.0005308523992920178, "loss": 3.2352, "step": 13498 }, { "epoch": 0.66, "grad_norm": 0.5359194278717041, "learning_rate": 0.0005308425664084579, "loss": 3.2188, "step": 13499 }, { "epoch": 0.66, "grad_norm": 0.49645182490348816, "learning_rate": 0.0005308327329169029, "loss": 3.2868, "step": 13500 }, { "epoch": 0.66, "grad_norm": 0.5202005505561829, "learning_rate": 0.0005308228988173792, "loss": 3.3061, "step": 13501 }, { "epoch": 0.66, "grad_norm": 0.5067334771156311, "learning_rate": 0.0005308130641099122, "loss": 3.0534, "step": 13502 }, { "epoch": 0.66, "grad_norm": 0.5037301778793335, "learning_rate": 0.0005308032287945281, "loss": 3.3825, "step": 13503 }, { "epoch": 0.66, "grad_norm": 0.6401285529136658, "learning_rate": 0.0005307933928712527, "loss": 3.1316, "step": 13504 }, { "epoch": 0.66, "grad_norm": 0.516437292098999, "learning_rate": 0.000530783556340112, "loss": 3.095, "step": 13505 }, { "epoch": 0.66, "grad_norm": 0.5353245139122009, "learning_rate": 0.0005307737192011316, "loss": 3.0556, "step": 13506 }, { "epoch": 0.66, "grad_norm": 0.5425240993499756, "learning_rate": 0.0005307638814543378, "loss": 3.2073, "step": 13507 }, { "epoch": 0.66, "grad_norm": 0.5350566506385803, "learning_rate": 0.0005307540430997563, "loss": 3.1884, "step": 13508 }, { "epoch": 0.66, "grad_norm": 0.5366530418395996, "learning_rate": 0.0005307442041374131, "loss": 3.2401, "step": 13509 }, { "epoch": 0.66, "grad_norm": 0.5320677161216736, "learning_rate": 0.0005307343645673342, "loss": 3.1042, "step": 13510 }, { "epoch": 0.66, "grad_norm": 0.5887209177017212, "learning_rate": 0.0005307245243895451, "loss": 3.0003, "step": 13511 }, { "epoch": 0.66, "grad_norm": 0.5200484991073608, "learning_rate": 0.0005307146836040722, "loss": 3.3779, "step": 13512 }, { "epoch": 0.66, "grad_norm": 0.5377318263053894, "learning_rate": 0.0005307048422109412, "loss": 3.0319, "step": 13513 }, { "epoch": 0.66, "grad_norm": 0.5469724535942078, "learning_rate": 0.000530695000210178, "loss": 3.1924, "step": 13514 }, { "epoch": 0.66, "grad_norm": 0.5550596117973328, "learning_rate": 0.0005306851576018086, "loss": 3.2543, "step": 13515 }, { "epoch": 0.66, "grad_norm": 0.49396204948425293, "learning_rate": 0.0005306753143858588, "loss": 3.2256, "step": 13516 }, { "epoch": 0.66, "grad_norm": 0.49809572100639343, "learning_rate": 0.0005306654705623547, "loss": 3.362, "step": 13517 }, { "epoch": 0.66, "grad_norm": 0.4874090254306793, "learning_rate": 0.0005306556261313222, "loss": 3.0973, "step": 13518 }, { "epoch": 0.66, "grad_norm": 0.5111352801322937, "learning_rate": 0.0005306457810927872, "loss": 3.4853, "step": 13519 }, { "epoch": 0.66, "grad_norm": 0.5032990574836731, "learning_rate": 0.0005306359354467754, "loss": 3.2901, "step": 13520 }, { "epoch": 0.66, "grad_norm": 0.5103004574775696, "learning_rate": 0.0005306260891933131, "loss": 3.3242, "step": 13521 }, { "epoch": 0.66, "grad_norm": 0.5028958916664124, "learning_rate": 0.000530616242332426, "loss": 3.232, "step": 13522 }, { "epoch": 0.66, "grad_norm": 0.5463312864303589, "learning_rate": 0.0005306063948641401, "loss": 3.2748, "step": 13523 }, { "epoch": 0.66, "grad_norm": 0.5075322985649109, "learning_rate": 0.0005305965467884813, "loss": 3.1649, "step": 13524 }, { "epoch": 0.66, "grad_norm": 0.5177689790725708, "learning_rate": 0.0005305866981054757, "loss": 3.1198, "step": 13525 }, { "epoch": 0.66, "grad_norm": 0.5131239295005798, "learning_rate": 0.000530576848815149, "loss": 3.29, "step": 13526 }, { "epoch": 0.66, "grad_norm": 0.4989679455757141, "learning_rate": 0.0005305669989175273, "loss": 3.0434, "step": 13527 }, { "epoch": 0.66, "grad_norm": 0.5141971111297607, "learning_rate": 0.0005305571484126365, "loss": 3.1655, "step": 13528 }, { "epoch": 0.66, "grad_norm": 0.5180733799934387, "learning_rate": 0.0005305472973005025, "loss": 3.3093, "step": 13529 }, { "epoch": 0.66, "grad_norm": 0.5635950565338135, "learning_rate": 0.0005305374455811514, "loss": 3.1966, "step": 13530 }, { "epoch": 0.66, "grad_norm": 0.5000056624412537, "learning_rate": 0.0005305275932546089, "loss": 3.1471, "step": 13531 }, { "epoch": 0.66, "grad_norm": 0.5147111415863037, "learning_rate": 0.0005305177403209011, "loss": 3.0226, "step": 13532 }, { "epoch": 0.66, "grad_norm": 0.48060378432273865, "learning_rate": 0.0005305078867800541, "loss": 3.1014, "step": 13533 }, { "epoch": 0.66, "grad_norm": 0.569724440574646, "learning_rate": 0.0005304980326320935, "loss": 3.2077, "step": 13534 }, { "epoch": 0.66, "grad_norm": 0.5264111161231995, "learning_rate": 0.0005304881778770455, "loss": 3.0713, "step": 13535 }, { "epoch": 0.66, "grad_norm": 0.512325644493103, "learning_rate": 0.000530478322514936, "loss": 3.3984, "step": 13536 }, { "epoch": 0.66, "grad_norm": 0.5474256873130798, "learning_rate": 0.000530468466545791, "loss": 3.0077, "step": 13537 }, { "epoch": 0.66, "grad_norm": 0.5356321334838867, "learning_rate": 0.0005304586099696364, "loss": 3.3486, "step": 13538 }, { "epoch": 0.66, "grad_norm": 0.5049594044685364, "learning_rate": 0.0005304487527864982, "loss": 3.3997, "step": 13539 }, { "epoch": 0.66, "grad_norm": 0.5442296862602234, "learning_rate": 0.0005304388949964022, "loss": 3.3842, "step": 13540 }, { "epoch": 0.66, "grad_norm": 0.5284900665283203, "learning_rate": 0.0005304290365993747, "loss": 3.0788, "step": 13541 }, { "epoch": 0.66, "grad_norm": 0.5183324217796326, "learning_rate": 0.0005304191775954414, "loss": 3.157, "step": 13542 }, { "epoch": 0.66, "grad_norm": 0.49122077226638794, "learning_rate": 0.0005304093179846281, "loss": 3.0097, "step": 13543 }, { "epoch": 0.66, "grad_norm": 0.5494361519813538, "learning_rate": 0.0005303994577669612, "loss": 3.0168, "step": 13544 }, { "epoch": 0.66, "grad_norm": 0.5453931093215942, "learning_rate": 0.0005303895969424665, "loss": 3.2503, "step": 13545 }, { "epoch": 0.66, "grad_norm": 0.5414052605628967, "learning_rate": 0.0005303797355111699, "loss": 3.0531, "step": 13546 }, { "epoch": 0.66, "grad_norm": 0.5246322751045227, "learning_rate": 0.0005303698734730974, "loss": 3.3351, "step": 13547 }, { "epoch": 0.66, "grad_norm": 0.5333435535430908, "learning_rate": 0.0005303600108282749, "loss": 3.4273, "step": 13548 }, { "epoch": 0.66, "grad_norm": 0.511567234992981, "learning_rate": 0.0005303501475767287, "loss": 3.1325, "step": 13549 }, { "epoch": 0.66, "grad_norm": 0.4871523082256317, "learning_rate": 0.0005303402837184844, "loss": 3.2711, "step": 13550 }, { "epoch": 0.66, "grad_norm": 0.5091209411621094, "learning_rate": 0.0005303304192535681, "loss": 3.3666, "step": 13551 }, { "epoch": 0.66, "grad_norm": 0.5344579815864563, "learning_rate": 0.0005303205541820058, "loss": 3.1501, "step": 13552 }, { "epoch": 0.66, "grad_norm": 0.5157179832458496, "learning_rate": 0.0005303106885038235, "loss": 3.2942, "step": 13553 }, { "epoch": 0.66, "grad_norm": 0.4973103404045105, "learning_rate": 0.0005303008222190472, "loss": 3.1057, "step": 13554 }, { "epoch": 0.66, "grad_norm": 0.5082616209983826, "learning_rate": 0.0005302909553277029, "loss": 3.079, "step": 13555 }, { "epoch": 0.66, "grad_norm": 0.491454154253006, "learning_rate": 0.0005302810878298165, "loss": 3.2827, "step": 13556 }, { "epoch": 0.66, "grad_norm": 0.5277991890907288, "learning_rate": 0.000530271219725414, "loss": 3.3536, "step": 13557 }, { "epoch": 0.66, "grad_norm": 0.49939367175102234, "learning_rate": 0.0005302613510145215, "loss": 3.1075, "step": 13558 }, { "epoch": 0.66, "grad_norm": 0.5459829568862915, "learning_rate": 0.0005302514816971648, "loss": 3.2634, "step": 13559 }, { "epoch": 0.66, "grad_norm": 0.5232791304588318, "learning_rate": 0.0005302416117733701, "loss": 3.1575, "step": 13560 }, { "epoch": 0.66, "grad_norm": 0.5249155163764954, "learning_rate": 0.0005302317412431632, "loss": 3.1753, "step": 13561 }, { "epoch": 0.66, "grad_norm": 0.5318909883499146, "learning_rate": 0.0005302218701065703, "loss": 3.177, "step": 13562 }, { "epoch": 0.66, "grad_norm": 0.49357926845550537, "learning_rate": 0.0005302119983636174, "loss": 3.2127, "step": 13563 }, { "epoch": 0.66, "grad_norm": 0.5603306293487549, "learning_rate": 0.0005302021260143303, "loss": 3.2759, "step": 13564 }, { "epoch": 0.66, "grad_norm": 0.49928614497184753, "learning_rate": 0.0005301922530587351, "loss": 3.3354, "step": 13565 }, { "epoch": 0.66, "grad_norm": 0.5590007901191711, "learning_rate": 0.0005301823794968577, "loss": 3.1456, "step": 13566 }, { "epoch": 0.66, "grad_norm": 0.5050297975540161, "learning_rate": 0.0005301725053287243, "loss": 3.1094, "step": 13567 }, { "epoch": 0.66, "grad_norm": 0.532940149307251, "learning_rate": 0.0005301626305543608, "loss": 3.2555, "step": 13568 }, { "epoch": 0.66, "grad_norm": 0.511152982711792, "learning_rate": 0.0005301527551737933, "loss": 3.3773, "step": 13569 }, { "epoch": 0.67, "grad_norm": 0.5149011611938477, "learning_rate": 0.0005301428791870476, "loss": 3.2087, "step": 13570 }, { "epoch": 0.67, "grad_norm": 0.5097518563270569, "learning_rate": 0.00053013300259415, "loss": 3.2675, "step": 13571 }, { "epoch": 0.67, "grad_norm": 0.5337220430374146, "learning_rate": 0.0005301231253951263, "loss": 3.3959, "step": 13572 }, { "epoch": 0.67, "grad_norm": 0.5316744446754456, "learning_rate": 0.0005301132475900026, "loss": 3.4082, "step": 13573 }, { "epoch": 0.67, "grad_norm": 0.48296254873275757, "learning_rate": 0.0005301033691788048, "loss": 3.2966, "step": 13574 }, { "epoch": 0.67, "grad_norm": 0.5001648664474487, "learning_rate": 0.0005300934901615591, "loss": 3.2072, "step": 13575 }, { "epoch": 0.67, "grad_norm": 0.5572800040245056, "learning_rate": 0.0005300836105382914, "loss": 3.2663, "step": 13576 }, { "epoch": 0.67, "grad_norm": 0.5312752723693848, "learning_rate": 0.0005300737303090277, "loss": 3.2437, "step": 13577 }, { "epoch": 0.67, "grad_norm": 0.528778076171875, "learning_rate": 0.0005300638494737941, "loss": 3.2888, "step": 13578 }, { "epoch": 0.67, "grad_norm": 0.5192068219184875, "learning_rate": 0.0005300539680326168, "loss": 3.2725, "step": 13579 }, { "epoch": 0.67, "grad_norm": 0.5534399747848511, "learning_rate": 0.0005300440859855214, "loss": 3.1588, "step": 13580 }, { "epoch": 0.67, "grad_norm": 0.5094475746154785, "learning_rate": 0.0005300342033325342, "loss": 3.4536, "step": 13581 }, { "epoch": 0.67, "grad_norm": 0.48778852820396423, "learning_rate": 0.0005300243200736811, "loss": 3.2469, "step": 13582 }, { "epoch": 0.67, "grad_norm": 0.47840654850006104, "learning_rate": 0.0005300144362089883, "loss": 3.2045, "step": 13583 }, { "epoch": 0.67, "grad_norm": 0.5369465947151184, "learning_rate": 0.0005300045517384818, "loss": 3.1993, "step": 13584 }, { "epoch": 0.67, "grad_norm": 0.5591325759887695, "learning_rate": 0.0005299946666621875, "loss": 3.1054, "step": 13585 }, { "epoch": 0.67, "grad_norm": 0.5063976645469666, "learning_rate": 0.0005299847809801314, "loss": 2.9926, "step": 13586 }, { "epoch": 0.67, "grad_norm": 0.5150634050369263, "learning_rate": 0.0005299748946923399, "loss": 3.2566, "step": 13587 }, { "epoch": 0.67, "grad_norm": 0.5185797214508057, "learning_rate": 0.0005299650077988386, "loss": 3.0262, "step": 13588 }, { "epoch": 0.67, "grad_norm": 0.5387712717056274, "learning_rate": 0.0005299551202996537, "loss": 3.0643, "step": 13589 }, { "epoch": 0.67, "grad_norm": 0.5186799764633179, "learning_rate": 0.0005299452321948114, "loss": 3.2493, "step": 13590 }, { "epoch": 0.67, "grad_norm": 0.5232559442520142, "learning_rate": 0.0005299353434843376, "loss": 3.5282, "step": 13591 }, { "epoch": 0.67, "grad_norm": 0.5263158679008484, "learning_rate": 0.0005299254541682583, "loss": 3.0349, "step": 13592 }, { "epoch": 0.67, "grad_norm": 0.5348055362701416, "learning_rate": 0.0005299155642465996, "loss": 3.1144, "step": 13593 }, { "epoch": 0.67, "grad_norm": 0.515724241733551, "learning_rate": 0.0005299056737193876, "loss": 3.3351, "step": 13594 }, { "epoch": 0.67, "grad_norm": 0.5149891972541809, "learning_rate": 0.0005298957825866482, "loss": 3.1768, "step": 13595 }, { "epoch": 0.67, "grad_norm": 0.509054958820343, "learning_rate": 0.0005298858908484076, "loss": 3.3885, "step": 13596 }, { "epoch": 0.67, "grad_norm": 0.5293143391609192, "learning_rate": 0.0005298759985046919, "loss": 3.3963, "step": 13597 }, { "epoch": 0.67, "grad_norm": 0.5564351677894592, "learning_rate": 0.0005298661055555269, "loss": 3.2376, "step": 13598 }, { "epoch": 0.67, "grad_norm": 0.5585691928863525, "learning_rate": 0.000529856212000939, "loss": 3.3014, "step": 13599 }, { "epoch": 0.67, "grad_norm": 0.5429052114486694, "learning_rate": 0.000529846317840954, "loss": 3.2396, "step": 13600 }, { "epoch": 0.67, "grad_norm": 0.5204060673713684, "learning_rate": 0.000529836423075598, "loss": 3.3343, "step": 13601 }, { "epoch": 0.67, "grad_norm": 0.4938051998615265, "learning_rate": 0.0005298265277048971, "loss": 3.2435, "step": 13602 }, { "epoch": 0.67, "grad_norm": 0.4976741373538971, "learning_rate": 0.0005298166317288774, "loss": 3.2764, "step": 13603 }, { "epoch": 0.67, "grad_norm": 0.5170921683311462, "learning_rate": 0.0005298067351475649, "loss": 3.1395, "step": 13604 }, { "epoch": 0.67, "grad_norm": 0.4929213225841522, "learning_rate": 0.0005297968379609858, "loss": 3.2229, "step": 13605 }, { "epoch": 0.67, "grad_norm": 0.5679933428764343, "learning_rate": 0.0005297869401691658, "loss": 3.3445, "step": 13606 }, { "epoch": 0.67, "grad_norm": 0.5538800954818726, "learning_rate": 0.0005297770417721314, "loss": 3.2866, "step": 13607 }, { "epoch": 0.67, "grad_norm": 0.5196000933647156, "learning_rate": 0.0005297671427699084, "loss": 3.204, "step": 13608 }, { "epoch": 0.67, "grad_norm": 0.5372135043144226, "learning_rate": 0.0005297572431625229, "loss": 3.2732, "step": 13609 }, { "epoch": 0.67, "grad_norm": 0.5452011227607727, "learning_rate": 0.0005297473429500013, "loss": 3.1766, "step": 13610 }, { "epoch": 0.67, "grad_norm": 0.4952123463153839, "learning_rate": 0.0005297374421323692, "loss": 3.1995, "step": 13611 }, { "epoch": 0.67, "grad_norm": 0.506626307964325, "learning_rate": 0.000529727540709653, "loss": 3.1581, "step": 13612 }, { "epoch": 0.67, "grad_norm": 0.5172677636146545, "learning_rate": 0.0005297176386818786, "loss": 2.8921, "step": 13613 }, { "epoch": 0.67, "grad_norm": 0.5049740076065063, "learning_rate": 0.0005297077360490722, "loss": 3.208, "step": 13614 }, { "epoch": 0.67, "grad_norm": 0.5029763579368591, "learning_rate": 0.0005296978328112598, "loss": 3.1353, "step": 13615 }, { "epoch": 0.67, "grad_norm": 0.5007016658782959, "learning_rate": 0.0005296879289684675, "loss": 3.1918, "step": 13616 }, { "epoch": 0.67, "grad_norm": 0.5236932039260864, "learning_rate": 0.0005296780245207215, "loss": 3.0716, "step": 13617 }, { "epoch": 0.67, "grad_norm": 0.5022386908531189, "learning_rate": 0.0005296681194680477, "loss": 3.3136, "step": 13618 }, { "epoch": 0.67, "grad_norm": 0.50746089220047, "learning_rate": 0.0005296582138104723, "loss": 3.3146, "step": 13619 }, { "epoch": 0.67, "grad_norm": 0.482505738735199, "learning_rate": 0.0005296483075480213, "loss": 3.3466, "step": 13620 }, { "epoch": 0.67, "grad_norm": 0.49940067529678345, "learning_rate": 0.0005296384006807209, "loss": 3.2996, "step": 13621 }, { "epoch": 0.67, "grad_norm": 0.49896591901779175, "learning_rate": 0.0005296284932085972, "loss": 3.1868, "step": 13622 }, { "epoch": 0.67, "grad_norm": 0.5380591750144958, "learning_rate": 0.0005296185851316761, "loss": 3.0733, "step": 13623 }, { "epoch": 0.67, "grad_norm": 0.5520894527435303, "learning_rate": 0.0005296086764499839, "loss": 3.1411, "step": 13624 }, { "epoch": 0.67, "grad_norm": 0.5142804980278015, "learning_rate": 0.0005295987671635468, "loss": 3.4073, "step": 13625 }, { "epoch": 0.67, "grad_norm": 0.5468404293060303, "learning_rate": 0.0005295888572723906, "loss": 3.0009, "step": 13626 }, { "epoch": 0.67, "grad_norm": 0.5099403858184814, "learning_rate": 0.0005295789467765414, "loss": 3.3411, "step": 13627 }, { "epoch": 0.67, "grad_norm": 0.4918704330921173, "learning_rate": 0.0005295690356760256, "loss": 3.0391, "step": 13628 }, { "epoch": 0.67, "grad_norm": 0.5046103596687317, "learning_rate": 0.0005295591239708691, "loss": 3.4971, "step": 13629 }, { "epoch": 0.67, "grad_norm": 0.5209892392158508, "learning_rate": 0.0005295492116610982, "loss": 2.7844, "step": 13630 }, { "epoch": 0.67, "grad_norm": 0.5438646078109741, "learning_rate": 0.0005295392987467387, "loss": 3.1362, "step": 13631 }, { "epoch": 0.67, "grad_norm": 0.5239036083221436, "learning_rate": 0.0005295293852278168, "loss": 3.3859, "step": 13632 }, { "epoch": 0.67, "grad_norm": 0.529381275177002, "learning_rate": 0.0005295194711043588, "loss": 3.2098, "step": 13633 }, { "epoch": 0.67, "grad_norm": 0.5509489178657532, "learning_rate": 0.0005295095563763907, "loss": 3.2301, "step": 13634 }, { "epoch": 0.67, "grad_norm": 0.5186215043067932, "learning_rate": 0.0005294996410439384, "loss": 3.159, "step": 13635 }, { "epoch": 0.67, "grad_norm": 0.5146812796592712, "learning_rate": 0.0005294897251070283, "loss": 3.2299, "step": 13636 }, { "epoch": 0.67, "grad_norm": 0.5205144882202148, "learning_rate": 0.0005294798085656865, "loss": 3.1246, "step": 13637 }, { "epoch": 0.67, "grad_norm": 0.5245778560638428, "learning_rate": 0.0005294698914199391, "loss": 3.3632, "step": 13638 }, { "epoch": 0.67, "grad_norm": 0.5007055997848511, "learning_rate": 0.0005294599736698121, "loss": 3.1628, "step": 13639 }, { "epoch": 0.67, "grad_norm": 0.5703233480453491, "learning_rate": 0.0005294500553153316, "loss": 3.3017, "step": 13640 }, { "epoch": 0.67, "grad_norm": 0.5297021269798279, "learning_rate": 0.0005294401363565239, "loss": 3.3155, "step": 13641 }, { "epoch": 0.67, "grad_norm": 0.5470933318138123, "learning_rate": 0.000529430216793415, "loss": 3.1739, "step": 13642 }, { "epoch": 0.67, "grad_norm": 0.5373654961585999, "learning_rate": 0.0005294202966260312, "loss": 3.4286, "step": 13643 }, { "epoch": 0.67, "grad_norm": 0.5070244669914246, "learning_rate": 0.0005294103758543983, "loss": 3.1327, "step": 13644 }, { "epoch": 0.67, "grad_norm": 0.5400650501251221, "learning_rate": 0.0005294004544785428, "loss": 3.1624, "step": 13645 }, { "epoch": 0.67, "grad_norm": 0.5214609503746033, "learning_rate": 0.0005293905324984905, "loss": 3.1664, "step": 13646 }, { "epoch": 0.67, "grad_norm": 0.5696268081665039, "learning_rate": 0.0005293806099142677, "loss": 3.1741, "step": 13647 }, { "epoch": 0.67, "grad_norm": 0.5091151595115662, "learning_rate": 0.0005293706867259006, "loss": 3.3379, "step": 13648 }, { "epoch": 0.67, "grad_norm": 0.5270219445228577, "learning_rate": 0.0005293607629334152, "loss": 3.1869, "step": 13649 }, { "epoch": 0.67, "grad_norm": 0.5341525077819824, "learning_rate": 0.0005293508385368378, "loss": 3.1583, "step": 13650 }, { "epoch": 0.67, "grad_norm": 0.5189785361289978, "learning_rate": 0.0005293409135361943, "loss": 3.286, "step": 13651 }, { "epoch": 0.67, "grad_norm": 0.5022042393684387, "learning_rate": 0.0005293309879315111, "loss": 3.2813, "step": 13652 }, { "epoch": 0.67, "grad_norm": 0.4763084053993225, "learning_rate": 0.0005293210617228141, "loss": 3.3161, "step": 13653 }, { "epoch": 0.67, "grad_norm": 0.510564386844635, "learning_rate": 0.0005293111349101296, "loss": 3.1931, "step": 13654 }, { "epoch": 0.67, "grad_norm": 0.531646728515625, "learning_rate": 0.0005293012074934836, "loss": 3.2027, "step": 13655 }, { "epoch": 0.67, "grad_norm": 0.5853457450866699, "learning_rate": 0.0005292912794729025, "loss": 3.1992, "step": 13656 }, { "epoch": 0.67, "grad_norm": 0.5224567651748657, "learning_rate": 0.0005292813508484122, "loss": 3.3783, "step": 13657 }, { "epoch": 0.67, "grad_norm": 0.5682322978973389, "learning_rate": 0.000529271421620039, "loss": 3.0409, "step": 13658 }, { "epoch": 0.67, "grad_norm": 0.48867014050483704, "learning_rate": 0.000529261491787809, "loss": 3.2267, "step": 13659 }, { "epoch": 0.67, "grad_norm": 0.5091564059257507, "learning_rate": 0.0005292515613517483, "loss": 3.2667, "step": 13660 }, { "epoch": 0.67, "grad_norm": 0.5306949615478516, "learning_rate": 0.0005292416303118832, "loss": 3.027, "step": 13661 }, { "epoch": 0.67, "grad_norm": 0.48959165811538696, "learning_rate": 0.0005292316986682396, "loss": 3.1372, "step": 13662 }, { "epoch": 0.67, "grad_norm": 0.5132994055747986, "learning_rate": 0.000529221766420844, "loss": 3.0894, "step": 13663 }, { "epoch": 0.67, "grad_norm": 0.48111510276794434, "learning_rate": 0.0005292118335697223, "loss": 3.4498, "step": 13664 }, { "epoch": 0.67, "grad_norm": 0.522191047668457, "learning_rate": 0.0005292019001149008, "loss": 3.1612, "step": 13665 }, { "epoch": 0.67, "grad_norm": 0.4841899275779724, "learning_rate": 0.0005291919660564055, "loss": 3.299, "step": 13666 }, { "epoch": 0.67, "grad_norm": 0.5242030620574951, "learning_rate": 0.0005291820313942627, "loss": 3.4106, "step": 13667 }, { "epoch": 0.67, "grad_norm": 0.5671795010566711, "learning_rate": 0.0005291720961284986, "loss": 3.073, "step": 13668 }, { "epoch": 0.67, "grad_norm": 0.537661612033844, "learning_rate": 0.0005291621602591393, "loss": 3.2446, "step": 13669 }, { "epoch": 0.67, "grad_norm": 0.530839204788208, "learning_rate": 0.0005291522237862109, "loss": 3.369, "step": 13670 }, { "epoch": 0.67, "grad_norm": 0.5125889182090759, "learning_rate": 0.0005291422867097397, "loss": 3.1252, "step": 13671 }, { "epoch": 0.67, "grad_norm": 0.5530248880386353, "learning_rate": 0.0005291323490297518, "loss": 3.025, "step": 13672 }, { "epoch": 0.67, "grad_norm": 0.5228330492973328, "learning_rate": 0.0005291224107462734, "loss": 3.2608, "step": 13673 }, { "epoch": 0.67, "grad_norm": 0.5220538973808289, "learning_rate": 0.0005291124718593307, "loss": 3.1958, "step": 13674 }, { "epoch": 0.67, "grad_norm": 0.5133023858070374, "learning_rate": 0.0005291025323689497, "loss": 3.341, "step": 13675 }, { "epoch": 0.67, "grad_norm": 0.5261237621307373, "learning_rate": 0.0005290925922751569, "loss": 3.1028, "step": 13676 }, { "epoch": 0.67, "grad_norm": 0.5116633176803589, "learning_rate": 0.0005290826515779782, "loss": 3.0981, "step": 13677 }, { "epoch": 0.67, "grad_norm": 0.4918628931045532, "learning_rate": 0.00052907271027744, "loss": 3.0695, "step": 13678 }, { "epoch": 0.67, "grad_norm": 0.5067062973976135, "learning_rate": 0.0005290627683735682, "loss": 3.1897, "step": 13679 }, { "epoch": 0.67, "grad_norm": 0.5566603541374207, "learning_rate": 0.0005290528258663892, "loss": 3.2876, "step": 13680 }, { "epoch": 0.67, "grad_norm": 0.5340036153793335, "learning_rate": 0.0005290428827559292, "loss": 3.0107, "step": 13681 }, { "epoch": 0.67, "grad_norm": 0.5094714164733887, "learning_rate": 0.0005290329390422142, "loss": 3.1796, "step": 13682 }, { "epoch": 0.67, "grad_norm": 0.5311017632484436, "learning_rate": 0.0005290229947252707, "loss": 3.1173, "step": 13683 }, { "epoch": 0.67, "grad_norm": 0.5469943881034851, "learning_rate": 0.0005290130498051246, "loss": 3.2645, "step": 13684 }, { "epoch": 0.67, "grad_norm": 0.49231579899787903, "learning_rate": 0.0005290031042818022, "loss": 3.0617, "step": 13685 }, { "epoch": 0.67, "grad_norm": 0.5386189222335815, "learning_rate": 0.0005289931581553297, "loss": 3.2606, "step": 13686 }, { "epoch": 0.67, "grad_norm": 0.5233540534973145, "learning_rate": 0.0005289832114257333, "loss": 3.2946, "step": 13687 }, { "epoch": 0.67, "grad_norm": 0.5471063256263733, "learning_rate": 0.0005289732640930393, "loss": 3.0275, "step": 13688 }, { "epoch": 0.67, "grad_norm": 0.5296842455863953, "learning_rate": 0.0005289633161572737, "loss": 3.3652, "step": 13689 }, { "epoch": 0.67, "grad_norm": 0.541083037853241, "learning_rate": 0.0005289533676184627, "loss": 3.2787, "step": 13690 }, { "epoch": 0.67, "grad_norm": 0.527202308177948, "learning_rate": 0.0005289434184766326, "loss": 3.1486, "step": 13691 }, { "epoch": 0.67, "grad_norm": 0.5069360136985779, "learning_rate": 0.0005289334687318098, "loss": 3.2396, "step": 13692 }, { "epoch": 0.67, "grad_norm": 0.5278681516647339, "learning_rate": 0.00052892351838402, "loss": 3.3923, "step": 13693 }, { "epoch": 0.67, "grad_norm": 0.5264055728912354, "learning_rate": 0.0005289135674332899, "loss": 3.342, "step": 13694 }, { "epoch": 0.67, "grad_norm": 0.5079107284545898, "learning_rate": 0.0005289036158796455, "loss": 3.0714, "step": 13695 }, { "epoch": 0.67, "grad_norm": 0.5393988490104675, "learning_rate": 0.000528893663723113, "loss": 3.156, "step": 13696 }, { "epoch": 0.67, "grad_norm": 0.5057147145271301, "learning_rate": 0.0005288837109637187, "loss": 3.2109, "step": 13697 }, { "epoch": 0.67, "grad_norm": 0.5622931718826294, "learning_rate": 0.0005288737576014887, "loss": 3.3168, "step": 13698 }, { "epoch": 0.67, "grad_norm": 0.5340046286582947, "learning_rate": 0.0005288638036364493, "loss": 3.1038, "step": 13699 }, { "epoch": 0.67, "grad_norm": 0.5570096969604492, "learning_rate": 0.0005288538490686267, "loss": 3.2489, "step": 13700 }, { "epoch": 0.67, "grad_norm": 0.5238069891929626, "learning_rate": 0.0005288438938980471, "loss": 3.2243, "step": 13701 }, { "epoch": 0.67, "grad_norm": 0.48438480496406555, "learning_rate": 0.0005288339381247367, "loss": 3.119, "step": 13702 }, { "epoch": 0.67, "grad_norm": 0.5663520693778992, "learning_rate": 0.0005288239817487217, "loss": 3.2317, "step": 13703 }, { "epoch": 0.67, "grad_norm": 0.5262908935546875, "learning_rate": 0.0005288140247700285, "loss": 3.1207, "step": 13704 }, { "epoch": 0.67, "grad_norm": 0.5518969893455505, "learning_rate": 0.0005288040671886831, "loss": 3.1587, "step": 13705 }, { "epoch": 0.67, "grad_norm": 0.5191221833229065, "learning_rate": 0.0005287941090047118, "loss": 3.2533, "step": 13706 }, { "epoch": 0.67, "grad_norm": 0.5094031095504761, "learning_rate": 0.0005287841502181409, "loss": 3.2238, "step": 13707 }, { "epoch": 0.67, "grad_norm": 0.512729823589325, "learning_rate": 0.0005287741908289967, "loss": 3.2058, "step": 13708 }, { "epoch": 0.67, "grad_norm": 0.5242785215377808, "learning_rate": 0.0005287642308373051, "loss": 3.2444, "step": 13709 }, { "epoch": 0.67, "grad_norm": 0.5172874927520752, "learning_rate": 0.0005287542702430926, "loss": 3.3319, "step": 13710 }, { "epoch": 0.67, "grad_norm": 0.48693451285362244, "learning_rate": 0.0005287443090463854, "loss": 3.3414, "step": 13711 }, { "epoch": 0.67, "grad_norm": 0.5203180313110352, "learning_rate": 0.0005287343472472097, "loss": 3.1379, "step": 13712 }, { "epoch": 0.67, "grad_norm": 0.5015645623207092, "learning_rate": 0.0005287243848455918, "loss": 3.2083, "step": 13713 }, { "epoch": 0.67, "grad_norm": 0.5029230117797852, "learning_rate": 0.0005287144218415579, "loss": 2.9887, "step": 13714 }, { "epoch": 0.67, "grad_norm": 0.523784875869751, "learning_rate": 0.0005287044582351341, "loss": 2.9404, "step": 13715 }, { "epoch": 0.67, "grad_norm": 0.49073922634124756, "learning_rate": 0.000528694494026347, "loss": 3.119, "step": 13716 }, { "epoch": 0.67, "grad_norm": 0.5473636984825134, "learning_rate": 0.0005286845292152224, "loss": 3.2291, "step": 13717 }, { "epoch": 0.67, "grad_norm": 0.4929855763912201, "learning_rate": 0.0005286745638017868, "loss": 3.1537, "step": 13718 }, { "epoch": 0.67, "grad_norm": 0.5156190991401672, "learning_rate": 0.0005286645977860664, "loss": 3.1542, "step": 13719 }, { "epoch": 0.67, "grad_norm": 0.4985933303833008, "learning_rate": 0.0005286546311680876, "loss": 3.0746, "step": 13720 }, { "epoch": 0.67, "grad_norm": 0.5488167405128479, "learning_rate": 0.0005286446639478764, "loss": 3.2484, "step": 13721 }, { "epoch": 0.67, "grad_norm": 0.5215466022491455, "learning_rate": 0.0005286346961254591, "loss": 3.024, "step": 13722 }, { "epoch": 0.67, "grad_norm": 0.5182087421417236, "learning_rate": 0.0005286247277008621, "loss": 3.2172, "step": 13723 }, { "epoch": 0.67, "grad_norm": 0.5041150450706482, "learning_rate": 0.0005286147586741115, "loss": 3.0219, "step": 13724 }, { "epoch": 0.67, "grad_norm": 0.5143710970878601, "learning_rate": 0.0005286047890452337, "loss": 3.1054, "step": 13725 }, { "epoch": 0.67, "grad_norm": 0.5302610993385315, "learning_rate": 0.0005285948188142549, "loss": 3.19, "step": 13726 }, { "epoch": 0.67, "grad_norm": 0.5241024494171143, "learning_rate": 0.0005285848479812012, "loss": 3.2024, "step": 13727 }, { "epoch": 0.67, "grad_norm": 0.5108481645584106, "learning_rate": 0.0005285748765460991, "loss": 3.3537, "step": 13728 }, { "epoch": 0.67, "grad_norm": 0.4921933710575104, "learning_rate": 0.0005285649045089748, "loss": 3.1969, "step": 13729 }, { "epoch": 0.67, "grad_norm": 0.5298369526863098, "learning_rate": 0.0005285549318698544, "loss": 3.1773, "step": 13730 }, { "epoch": 0.67, "grad_norm": 0.5141018033027649, "learning_rate": 0.0005285449586287644, "loss": 3.2114, "step": 13731 }, { "epoch": 0.67, "grad_norm": 0.5361664295196533, "learning_rate": 0.0005285349847857309, "loss": 3.0577, "step": 13732 }, { "epoch": 0.67, "grad_norm": 0.5357168912887573, "learning_rate": 0.0005285250103407804, "loss": 3.2206, "step": 13733 }, { "epoch": 0.67, "grad_norm": 0.5472400784492493, "learning_rate": 0.0005285150352939388, "loss": 3.0433, "step": 13734 }, { "epoch": 0.67, "grad_norm": 0.49933162331581116, "learning_rate": 0.0005285050596452326, "loss": 3.4927, "step": 13735 }, { "epoch": 0.67, "grad_norm": 0.5269633531570435, "learning_rate": 0.0005284950833946882, "loss": 3.1318, "step": 13736 }, { "epoch": 0.67, "grad_norm": 0.5392086505889893, "learning_rate": 0.0005284851065423316, "loss": 3.3404, "step": 13737 }, { "epoch": 0.67, "grad_norm": 0.4980263411998749, "learning_rate": 0.0005284751290881893, "loss": 3.2672, "step": 13738 }, { "epoch": 0.67, "grad_norm": 0.49644413590431213, "learning_rate": 0.0005284651510322874, "loss": 3.2669, "step": 13739 }, { "epoch": 0.67, "grad_norm": 0.5201795697212219, "learning_rate": 0.0005284551723746523, "loss": 3.4163, "step": 13740 }, { "epoch": 0.67, "grad_norm": 0.5354999899864197, "learning_rate": 0.0005284451931153102, "loss": 3.2036, "step": 13741 }, { "epoch": 0.67, "grad_norm": 0.5267964601516724, "learning_rate": 0.0005284352132542873, "loss": 3.118, "step": 13742 }, { "epoch": 0.67, "grad_norm": 0.5029120445251465, "learning_rate": 0.0005284252327916102, "loss": 3.2955, "step": 13743 }, { "epoch": 0.67, "grad_norm": 0.5081202983856201, "learning_rate": 0.000528415251727305, "loss": 3.2133, "step": 13744 }, { "epoch": 0.67, "grad_norm": 0.517963707447052, "learning_rate": 0.000528405270061398, "loss": 2.9726, "step": 13745 }, { "epoch": 0.67, "grad_norm": 0.55925452709198, "learning_rate": 0.0005283952877939153, "loss": 3.3627, "step": 13746 }, { "epoch": 0.67, "grad_norm": 0.5444297790527344, "learning_rate": 0.0005283853049248834, "loss": 3.1062, "step": 13747 }, { "epoch": 0.67, "grad_norm": 0.48813849687576294, "learning_rate": 0.0005283753214543287, "loss": 3.2675, "step": 13748 }, { "epoch": 0.67, "grad_norm": 0.5234923362731934, "learning_rate": 0.0005283653373822773, "loss": 3.2118, "step": 13749 }, { "epoch": 0.67, "grad_norm": 0.487904816865921, "learning_rate": 0.0005283553527087556, "loss": 3.2319, "step": 13750 }, { "epoch": 0.67, "grad_norm": 0.5277583599090576, "learning_rate": 0.0005283453674337898, "loss": 3.3372, "step": 13751 }, { "epoch": 0.67, "grad_norm": 0.5574771165847778, "learning_rate": 0.0005283353815574063, "loss": 3.1972, "step": 13752 }, { "epoch": 0.67, "grad_norm": 0.5329998731613159, "learning_rate": 0.0005283253950796312, "loss": 3.1427, "step": 13753 }, { "epoch": 0.67, "grad_norm": 0.4901135265827179, "learning_rate": 0.000528315408000491, "loss": 3.5074, "step": 13754 }, { "epoch": 0.67, "grad_norm": 0.524285078048706, "learning_rate": 0.000528305420320012, "loss": 3.2045, "step": 13755 }, { "epoch": 0.67, "grad_norm": 0.5120623707771301, "learning_rate": 0.0005282954320382205, "loss": 3.2821, "step": 13756 }, { "epoch": 0.67, "grad_norm": 0.4949445426464081, "learning_rate": 0.0005282854431551428, "loss": 3.238, "step": 13757 }, { "epoch": 0.67, "grad_norm": 0.5326241850852966, "learning_rate": 0.0005282754536708051, "loss": 3.193, "step": 13758 }, { "epoch": 0.67, "grad_norm": 0.5440043807029724, "learning_rate": 0.0005282654635852339, "loss": 3.0779, "step": 13759 }, { "epoch": 0.67, "grad_norm": 0.5041990876197815, "learning_rate": 0.0005282554728984551, "loss": 3.1848, "step": 13760 }, { "epoch": 0.67, "grad_norm": 0.5535484552383423, "learning_rate": 0.0005282454816104956, "loss": 3.2104, "step": 13761 }, { "epoch": 0.67, "grad_norm": 0.503193736076355, "learning_rate": 0.0005282354897213812, "loss": 3.0778, "step": 13762 }, { "epoch": 0.67, "grad_norm": 0.49220454692840576, "learning_rate": 0.0005282254972311386, "loss": 3.2279, "step": 13763 }, { "epoch": 0.67, "grad_norm": 0.516409695148468, "learning_rate": 0.000528215504139794, "loss": 3.0826, "step": 13764 }, { "epoch": 0.67, "grad_norm": 0.5269769430160522, "learning_rate": 0.0005282055104473736, "loss": 3.3278, "step": 13765 }, { "epoch": 0.67, "grad_norm": 0.5283230543136597, "learning_rate": 0.0005281955161539039, "loss": 3.1647, "step": 13766 }, { "epoch": 0.67, "grad_norm": 0.5059794187545776, "learning_rate": 0.0005281855212594111, "loss": 3.2336, "step": 13767 }, { "epoch": 0.67, "grad_norm": 0.53988116979599, "learning_rate": 0.0005281755257639214, "loss": 3.0983, "step": 13768 }, { "epoch": 0.67, "grad_norm": 0.520753800868988, "learning_rate": 0.0005281655296674615, "loss": 3.1827, "step": 13769 }, { "epoch": 0.67, "grad_norm": 0.5291000008583069, "learning_rate": 0.0005281555329700574, "loss": 3.2596, "step": 13770 }, { "epoch": 0.67, "grad_norm": 0.5507010817527771, "learning_rate": 0.0005281455356717355, "loss": 3.1211, "step": 13771 }, { "epoch": 0.67, "grad_norm": 0.49522608518600464, "learning_rate": 0.0005281355377725223, "loss": 3.1505, "step": 13772 }, { "epoch": 0.67, "grad_norm": 0.5150662660598755, "learning_rate": 0.0005281255392724438, "loss": 3.3192, "step": 13773 }, { "epoch": 0.68, "grad_norm": 0.5103153586387634, "learning_rate": 0.0005281155401715267, "loss": 3.0925, "step": 13774 }, { "epoch": 0.68, "grad_norm": 0.5034166574478149, "learning_rate": 0.000528105540469797, "loss": 3.3163, "step": 13775 }, { "epoch": 0.68, "grad_norm": 0.5109154582023621, "learning_rate": 0.0005280955401672814, "loss": 3.3177, "step": 13776 }, { "epoch": 0.68, "grad_norm": 0.4964618682861328, "learning_rate": 0.0005280855392640059, "loss": 3.0376, "step": 13777 }, { "epoch": 0.68, "grad_norm": 0.47001469135284424, "learning_rate": 0.000528075537759997, "loss": 3.3065, "step": 13778 }, { "epoch": 0.68, "grad_norm": 0.49114564061164856, "learning_rate": 0.0005280655356552811, "loss": 3.3961, "step": 13779 }, { "epoch": 0.68, "grad_norm": 0.5325284600257874, "learning_rate": 0.0005280555329498845, "loss": 3.2993, "step": 13780 }, { "epoch": 0.68, "grad_norm": 0.5186975598335266, "learning_rate": 0.0005280455296438333, "loss": 3.1416, "step": 13781 }, { "epoch": 0.68, "grad_norm": 0.5972892045974731, "learning_rate": 0.0005280355257371544, "loss": 3.4019, "step": 13782 }, { "epoch": 0.68, "grad_norm": 0.5253905653953552, "learning_rate": 0.0005280255212298735, "loss": 3.1386, "step": 13783 }, { "epoch": 0.68, "grad_norm": 0.5274996161460876, "learning_rate": 0.0005280155161220174, "loss": 3.2277, "step": 13784 }, { "epoch": 0.68, "grad_norm": 0.5044057369232178, "learning_rate": 0.0005280055104136123, "loss": 3.2136, "step": 13785 }, { "epoch": 0.68, "grad_norm": 0.49711552262306213, "learning_rate": 0.0005279955041046845, "loss": 3.3523, "step": 13786 }, { "epoch": 0.68, "grad_norm": 0.5024986267089844, "learning_rate": 0.0005279854971952606, "loss": 3.0944, "step": 13787 }, { "epoch": 0.68, "grad_norm": 0.5386544466018677, "learning_rate": 0.0005279754896853667, "loss": 3.4201, "step": 13788 }, { "epoch": 0.68, "grad_norm": 0.5329682230949402, "learning_rate": 0.0005279654815750291, "loss": 3.145, "step": 13789 }, { "epoch": 0.68, "grad_norm": 0.5430646538734436, "learning_rate": 0.0005279554728642744, "loss": 3.4528, "step": 13790 }, { "epoch": 0.68, "grad_norm": 0.5068038105964661, "learning_rate": 0.0005279454635531289, "loss": 3.2641, "step": 13791 }, { "epoch": 0.68, "grad_norm": 0.4926636219024658, "learning_rate": 0.0005279354536416188, "loss": 3.1893, "step": 13792 }, { "epoch": 0.68, "grad_norm": 0.5283586382865906, "learning_rate": 0.0005279254431297707, "loss": 3.173, "step": 13793 }, { "epoch": 0.68, "grad_norm": 0.5072885155677795, "learning_rate": 0.0005279154320176107, "loss": 3.0823, "step": 13794 }, { "epoch": 0.68, "grad_norm": 0.4954681694507599, "learning_rate": 0.0005279054203051655, "loss": 3.2519, "step": 13795 }, { "epoch": 0.68, "grad_norm": 0.50550377368927, "learning_rate": 0.0005278954079924611, "loss": 3.2629, "step": 13796 }, { "epoch": 0.68, "grad_norm": 0.5151498913764954, "learning_rate": 0.0005278853950795242, "loss": 3.0185, "step": 13797 }, { "epoch": 0.68, "grad_norm": 0.5135114192962646, "learning_rate": 0.0005278753815663811, "loss": 3.1437, "step": 13798 }, { "epoch": 0.68, "grad_norm": 0.49339157342910767, "learning_rate": 0.0005278653674530579, "loss": 3.2054, "step": 13799 }, { "epoch": 0.68, "grad_norm": 0.5128827691078186, "learning_rate": 0.0005278553527395813, "loss": 3.1548, "step": 13800 }, { "epoch": 0.68, "grad_norm": 0.5171065330505371, "learning_rate": 0.0005278453374259776, "loss": 3.2015, "step": 13801 }, { "epoch": 0.68, "grad_norm": 0.5121150612831116, "learning_rate": 0.0005278353215122729, "loss": 3.3808, "step": 13802 }, { "epoch": 0.68, "grad_norm": 0.5693663954734802, "learning_rate": 0.0005278253049984939, "loss": 3.0008, "step": 13803 }, { "epoch": 0.68, "grad_norm": 0.506459653377533, "learning_rate": 0.000527815287884667, "loss": 3.16, "step": 13804 }, { "epoch": 0.68, "grad_norm": 0.5166449546813965, "learning_rate": 0.0005278052701708184, "loss": 3.1424, "step": 13805 }, { "epoch": 0.68, "grad_norm": 0.5361778736114502, "learning_rate": 0.0005277952518569746, "loss": 3.4089, "step": 13806 }, { "epoch": 0.68, "grad_norm": 0.5384201407432556, "learning_rate": 0.000527785232943162, "loss": 3.3915, "step": 13807 }, { "epoch": 0.68, "grad_norm": 0.581087052822113, "learning_rate": 0.0005277752134294067, "loss": 3.2422, "step": 13808 }, { "epoch": 0.68, "grad_norm": 0.4882732331752777, "learning_rate": 0.0005277651933157355, "loss": 3.3192, "step": 13809 }, { "epoch": 0.68, "grad_norm": 0.5098223686218262, "learning_rate": 0.0005277551726021746, "loss": 3.0037, "step": 13810 }, { "epoch": 0.68, "grad_norm": 0.522487223148346, "learning_rate": 0.0005277451512887504, "loss": 3.3347, "step": 13811 }, { "epoch": 0.68, "grad_norm": 0.5181809663772583, "learning_rate": 0.0005277351293754894, "loss": 3.2624, "step": 13812 }, { "epoch": 0.68, "grad_norm": 0.5491748452186584, "learning_rate": 0.0005277251068624178, "loss": 3.2306, "step": 13813 }, { "epoch": 0.68, "grad_norm": 0.5368053913116455, "learning_rate": 0.0005277150837495621, "loss": 3.2325, "step": 13814 }, { "epoch": 0.68, "grad_norm": 0.5411176085472107, "learning_rate": 0.0005277050600369487, "loss": 3.1715, "step": 13815 }, { "epoch": 0.68, "grad_norm": 0.49559369683265686, "learning_rate": 0.000527695035724604, "loss": 3.0924, "step": 13816 }, { "epoch": 0.68, "grad_norm": 0.5154446363449097, "learning_rate": 0.0005276850108125544, "loss": 3.6344, "step": 13817 }, { "epoch": 0.68, "grad_norm": 0.5071130990982056, "learning_rate": 0.0005276749853008263, "loss": 3.0258, "step": 13818 }, { "epoch": 0.68, "grad_norm": 0.5003767013549805, "learning_rate": 0.0005276649591894459, "loss": 3.0506, "step": 13819 }, { "epoch": 0.68, "grad_norm": 0.5963944792747498, "learning_rate": 0.0005276549324784401, "loss": 3.0792, "step": 13820 }, { "epoch": 0.68, "grad_norm": 0.5032363533973694, "learning_rate": 0.0005276449051678349, "loss": 3.1895, "step": 13821 }, { "epoch": 0.68, "grad_norm": 0.48575350642204285, "learning_rate": 0.000527634877257657, "loss": 3.0814, "step": 13822 }, { "epoch": 0.68, "grad_norm": 0.5336856245994568, "learning_rate": 0.0005276248487479325, "loss": 3.0062, "step": 13823 }, { "epoch": 0.68, "grad_norm": 0.519405722618103, "learning_rate": 0.0005276148196386879, "loss": 3.1346, "step": 13824 }, { "epoch": 0.68, "grad_norm": 0.5641560554504395, "learning_rate": 0.0005276047899299497, "loss": 3.1063, "step": 13825 }, { "epoch": 0.68, "grad_norm": 0.49971362948417664, "learning_rate": 0.0005275947596217442, "loss": 3.1934, "step": 13826 }, { "epoch": 0.68, "grad_norm": 0.5345155596733093, "learning_rate": 0.0005275847287140981, "loss": 3.1373, "step": 13827 }, { "epoch": 0.68, "grad_norm": 0.5394874811172485, "learning_rate": 0.0005275746972070376, "loss": 3.301, "step": 13828 }, { "epoch": 0.68, "grad_norm": 0.5135223865509033, "learning_rate": 0.0005275646651005891, "loss": 3.301, "step": 13829 }, { "epoch": 0.68, "grad_norm": 0.5040108561515808, "learning_rate": 0.000527554632394779, "loss": 3.2598, "step": 13830 }, { "epoch": 0.68, "grad_norm": 0.5344460010528564, "learning_rate": 0.0005275445990896338, "loss": 3.1245, "step": 13831 }, { "epoch": 0.68, "grad_norm": 0.4787933826446533, "learning_rate": 0.00052753456518518, "loss": 3.3899, "step": 13832 }, { "epoch": 0.68, "grad_norm": 0.5142536759376526, "learning_rate": 0.0005275245306814439, "loss": 3.1408, "step": 13833 }, { "epoch": 0.68, "grad_norm": 0.5277648568153381, "learning_rate": 0.000527514495578452, "loss": 3.1087, "step": 13834 }, { "epoch": 0.68, "grad_norm": 0.5267074704170227, "learning_rate": 0.0005275044598762307, "loss": 3.192, "step": 13835 }, { "epoch": 0.68, "grad_norm": 0.5166609883308411, "learning_rate": 0.0005274944235748064, "loss": 3.3189, "step": 13836 }, { "epoch": 0.68, "grad_norm": 0.5297030210494995, "learning_rate": 0.0005274843866742056, "loss": 3.0285, "step": 13837 }, { "epoch": 0.68, "grad_norm": 0.5255849361419678, "learning_rate": 0.0005274743491744548, "loss": 3.2142, "step": 13838 }, { "epoch": 0.68, "grad_norm": 0.49405017495155334, "learning_rate": 0.0005274643110755801, "loss": 3.3999, "step": 13839 }, { "epoch": 0.68, "grad_norm": 0.4815720319747925, "learning_rate": 0.0005274542723776083, "loss": 3.4118, "step": 13840 }, { "epoch": 0.68, "grad_norm": 0.5125225186347961, "learning_rate": 0.0005274442330805658, "loss": 3.1375, "step": 13841 }, { "epoch": 0.68, "grad_norm": 0.5525969862937927, "learning_rate": 0.0005274341931844788, "loss": 3.3452, "step": 13842 }, { "epoch": 0.68, "grad_norm": 0.5333914756774902, "learning_rate": 0.0005274241526893741, "loss": 3.3247, "step": 13843 }, { "epoch": 0.68, "grad_norm": 0.5027710199356079, "learning_rate": 0.0005274141115952779, "loss": 3.228, "step": 13844 }, { "epoch": 0.68, "grad_norm": 0.5438841581344604, "learning_rate": 0.0005274040699022167, "loss": 3.2234, "step": 13845 }, { "epoch": 0.68, "grad_norm": 0.5225579142570496, "learning_rate": 0.0005273940276102168, "loss": 3.165, "step": 13846 }, { "epoch": 0.68, "grad_norm": 0.5789764523506165, "learning_rate": 0.000527383984719305, "loss": 3.1446, "step": 13847 }, { "epoch": 0.68, "grad_norm": 0.5264090895652771, "learning_rate": 0.0005273739412295073, "loss": 3.2548, "step": 13848 }, { "epoch": 0.68, "grad_norm": 0.5331341028213501, "learning_rate": 0.0005273638971408506, "loss": 3.1594, "step": 13849 }, { "epoch": 0.68, "grad_norm": 0.5145845413208008, "learning_rate": 0.0005273538524533612, "loss": 3.1882, "step": 13850 }, { "epoch": 0.68, "grad_norm": 0.5996436476707458, "learning_rate": 0.0005273438071670653, "loss": 3.359, "step": 13851 }, { "epoch": 0.68, "grad_norm": 0.5678809881210327, "learning_rate": 0.0005273337612819897, "loss": 3.3849, "step": 13852 }, { "epoch": 0.68, "grad_norm": 0.49239474534988403, "learning_rate": 0.0005273237147981607, "loss": 3.1734, "step": 13853 }, { "epoch": 0.68, "grad_norm": 0.5626592040061951, "learning_rate": 0.0005273136677156048, "loss": 3.0927, "step": 13854 }, { "epoch": 0.68, "grad_norm": 0.49832597374916077, "learning_rate": 0.0005273036200343484, "loss": 3.3677, "step": 13855 }, { "epoch": 0.68, "grad_norm": 0.6042376160621643, "learning_rate": 0.000527293571754418, "loss": 3.2694, "step": 13856 }, { "epoch": 0.68, "grad_norm": 0.5671773552894592, "learning_rate": 0.0005272835228758401, "loss": 2.8949, "step": 13857 }, { "epoch": 0.68, "grad_norm": 0.5112889409065247, "learning_rate": 0.0005272734733986411, "loss": 3.0336, "step": 13858 }, { "epoch": 0.68, "grad_norm": 0.5109499096870422, "learning_rate": 0.0005272634233228476, "loss": 3.3543, "step": 13859 }, { "epoch": 0.68, "grad_norm": 0.5124879479408264, "learning_rate": 0.000527253372648486, "loss": 3.2364, "step": 13860 }, { "epoch": 0.68, "grad_norm": 0.5452317595481873, "learning_rate": 0.0005272433213755827, "loss": 3.1102, "step": 13861 }, { "epoch": 0.68, "grad_norm": 0.5369084477424622, "learning_rate": 0.0005272332695041642, "loss": 3.2783, "step": 13862 }, { "epoch": 0.68, "grad_norm": 0.5925998091697693, "learning_rate": 0.0005272232170342569, "loss": 3.0603, "step": 13863 }, { "epoch": 0.68, "grad_norm": 0.5368659496307373, "learning_rate": 0.0005272131639658876, "loss": 3.1947, "step": 13864 }, { "epoch": 0.68, "grad_norm": 0.516013503074646, "learning_rate": 0.0005272031102990824, "loss": 3.3695, "step": 13865 }, { "epoch": 0.68, "grad_norm": 0.5412189960479736, "learning_rate": 0.000527193056033868, "loss": 2.8892, "step": 13866 }, { "epoch": 0.68, "grad_norm": 0.5142117142677307, "learning_rate": 0.0005271830011702708, "loss": 3.292, "step": 13867 }, { "epoch": 0.68, "grad_norm": 0.4763852059841156, "learning_rate": 0.0005271729457083173, "loss": 3.2973, "step": 13868 }, { "epoch": 0.68, "grad_norm": 0.5418897271156311, "learning_rate": 0.000527162889648034, "loss": 3.2097, "step": 13869 }, { "epoch": 0.68, "grad_norm": 0.5203564763069153, "learning_rate": 0.0005271528329894473, "loss": 3.0707, "step": 13870 }, { "epoch": 0.68, "grad_norm": 0.5115311741828918, "learning_rate": 0.0005271427757325839, "loss": 3.2188, "step": 13871 }, { "epoch": 0.68, "grad_norm": 0.5365582704544067, "learning_rate": 0.00052713271787747, "loss": 3.2721, "step": 13872 }, { "epoch": 0.68, "grad_norm": 0.529913067817688, "learning_rate": 0.0005271226594241323, "loss": 3.2094, "step": 13873 }, { "epoch": 0.68, "grad_norm": 0.5000056028366089, "learning_rate": 0.0005271126003725972, "loss": 2.963, "step": 13874 }, { "epoch": 0.68, "grad_norm": 0.5235258936882019, "learning_rate": 0.0005271025407228912, "loss": 3.294, "step": 13875 }, { "epoch": 0.68, "grad_norm": 0.5482226610183716, "learning_rate": 0.0005270924804750408, "loss": 3.2941, "step": 13876 }, { "epoch": 0.68, "grad_norm": 0.5100428462028503, "learning_rate": 0.0005270824196290726, "loss": 3.2611, "step": 13877 }, { "epoch": 0.68, "grad_norm": 0.5015908479690552, "learning_rate": 0.000527072358185013, "loss": 3.1427, "step": 13878 }, { "epoch": 0.68, "grad_norm": 0.5557044744491577, "learning_rate": 0.0005270622961428883, "loss": 3.1989, "step": 13879 }, { "epoch": 0.68, "grad_norm": 0.529284656047821, "learning_rate": 0.0005270522335027255, "loss": 3.1541, "step": 13880 }, { "epoch": 0.68, "grad_norm": 0.5141466856002808, "learning_rate": 0.0005270421702645506, "loss": 3.3865, "step": 13881 }, { "epoch": 0.68, "grad_norm": 0.5499520897865295, "learning_rate": 0.0005270321064283905, "loss": 3.1756, "step": 13882 }, { "epoch": 0.68, "grad_norm": 0.5323184728622437, "learning_rate": 0.0005270220419942714, "loss": 3.2529, "step": 13883 }, { "epoch": 0.68, "grad_norm": 0.5083839893341064, "learning_rate": 0.00052701197696222, "loss": 3.2344, "step": 13884 }, { "epoch": 0.68, "grad_norm": 0.5136963129043579, "learning_rate": 0.0005270019113322626, "loss": 3.2175, "step": 13885 }, { "epoch": 0.68, "grad_norm": 0.5176054239273071, "learning_rate": 0.000526991845104426, "loss": 3.0358, "step": 13886 }, { "epoch": 0.68, "grad_norm": 0.49073687195777893, "learning_rate": 0.0005269817782787365, "loss": 3.1841, "step": 13887 }, { "epoch": 0.68, "grad_norm": 0.495728999376297, "learning_rate": 0.0005269717108552208, "loss": 3.3771, "step": 13888 }, { "epoch": 0.68, "grad_norm": 0.5247365832328796, "learning_rate": 0.0005269616428339052, "loss": 3.1361, "step": 13889 }, { "epoch": 0.68, "grad_norm": 0.5086815357208252, "learning_rate": 0.0005269515742148163, "loss": 3.1449, "step": 13890 }, { "epoch": 0.68, "grad_norm": 0.5526536703109741, "learning_rate": 0.0005269415049979807, "loss": 3.1725, "step": 13891 }, { "epoch": 0.68, "grad_norm": 0.5365892052650452, "learning_rate": 0.0005269314351834247, "loss": 3.2791, "step": 13892 }, { "epoch": 0.68, "grad_norm": 0.5318396687507629, "learning_rate": 0.000526921364771175, "loss": 3.0982, "step": 13893 }, { "epoch": 0.68, "grad_norm": 0.5801922082901001, "learning_rate": 0.0005269112937612582, "loss": 3.0354, "step": 13894 }, { "epoch": 0.68, "grad_norm": 0.4854373335838318, "learning_rate": 0.0005269012221537008, "loss": 3.0311, "step": 13895 }, { "epoch": 0.68, "grad_norm": 0.5157737731933594, "learning_rate": 0.0005268911499485291, "loss": 3.2959, "step": 13896 }, { "epoch": 0.68, "grad_norm": 0.5271645784378052, "learning_rate": 0.0005268810771457698, "loss": 3.4125, "step": 13897 }, { "epoch": 0.68, "grad_norm": 0.5193085074424744, "learning_rate": 0.0005268710037454494, "loss": 3.1424, "step": 13898 }, { "epoch": 0.68, "grad_norm": 0.5354040861129761, "learning_rate": 0.0005268609297475944, "loss": 3.0197, "step": 13899 }, { "epoch": 0.68, "grad_norm": 0.5036852955818176, "learning_rate": 0.0005268508551522315, "loss": 3.3795, "step": 13900 }, { "epoch": 0.68, "grad_norm": 0.527779757976532, "learning_rate": 0.000526840779959387, "loss": 3.1321, "step": 13901 }, { "epoch": 0.68, "grad_norm": 0.5030642747879028, "learning_rate": 0.0005268307041690877, "loss": 3.3091, "step": 13902 }, { "epoch": 0.68, "grad_norm": 0.4996885061264038, "learning_rate": 0.0005268206277813598, "loss": 3.2489, "step": 13903 }, { "epoch": 0.68, "grad_norm": 0.5028959512710571, "learning_rate": 0.0005268105507962301, "loss": 3.2326, "step": 13904 }, { "epoch": 0.68, "grad_norm": 0.49086108803749084, "learning_rate": 0.000526800473213725, "loss": 3.3486, "step": 13905 }, { "epoch": 0.68, "grad_norm": 0.508350133895874, "learning_rate": 0.0005267903950338711, "loss": 3.2829, "step": 13906 }, { "epoch": 0.68, "grad_norm": 0.54703289270401, "learning_rate": 0.000526780316256695, "loss": 3.3257, "step": 13907 }, { "epoch": 0.68, "grad_norm": 0.5806416273117065, "learning_rate": 0.0005267702368822232, "loss": 3.2072, "step": 13908 }, { "epoch": 0.68, "grad_norm": 0.4797351062297821, "learning_rate": 0.0005267601569104823, "loss": 3.5585, "step": 13909 }, { "epoch": 0.68, "grad_norm": 0.5201489329338074, "learning_rate": 0.0005267500763414986, "loss": 3.0275, "step": 13910 }, { "epoch": 0.68, "grad_norm": 0.5400049686431885, "learning_rate": 0.000526739995175299, "loss": 3.2039, "step": 13911 }, { "epoch": 0.68, "grad_norm": 0.5291928052902222, "learning_rate": 0.0005267299134119098, "loss": 3.1484, "step": 13912 }, { "epoch": 0.68, "grad_norm": 0.49172693490982056, "learning_rate": 0.0005267198310513577, "loss": 3.0451, "step": 13913 }, { "epoch": 0.68, "grad_norm": 0.5199759006500244, "learning_rate": 0.0005267097480936691, "loss": 3.1473, "step": 13914 }, { "epoch": 0.68, "grad_norm": 0.5097964406013489, "learning_rate": 0.0005266996645388708, "loss": 3.0738, "step": 13915 }, { "epoch": 0.68, "grad_norm": 0.5464935898780823, "learning_rate": 0.0005266895803869891, "loss": 3.2276, "step": 13916 }, { "epoch": 0.68, "grad_norm": 0.4840008616447449, "learning_rate": 0.0005266794956380507, "loss": 3.1427, "step": 13917 }, { "epoch": 0.68, "grad_norm": 0.5694568753242493, "learning_rate": 0.0005266694102920822, "loss": 3.1599, "step": 13918 }, { "epoch": 0.68, "grad_norm": 0.5026246905326843, "learning_rate": 0.00052665932434911, "loss": 3.2966, "step": 13919 }, { "epoch": 0.68, "grad_norm": 0.5052033066749573, "learning_rate": 0.0005266492378091608, "loss": 3.3566, "step": 13920 }, { "epoch": 0.68, "grad_norm": 0.5257059931755066, "learning_rate": 0.0005266391506722611, "loss": 3.1304, "step": 13921 }, { "epoch": 0.68, "grad_norm": 0.5384652018547058, "learning_rate": 0.0005266290629384376, "loss": 3.2555, "step": 13922 }, { "epoch": 0.68, "grad_norm": 0.5102568864822388, "learning_rate": 0.0005266189746077167, "loss": 3.2415, "step": 13923 }, { "epoch": 0.68, "grad_norm": 0.5261817574501038, "learning_rate": 0.000526608885680125, "loss": 3.3374, "step": 13924 }, { "epoch": 0.68, "grad_norm": 0.4975242018699646, "learning_rate": 0.0005265987961556891, "loss": 3.2718, "step": 13925 }, { "epoch": 0.68, "grad_norm": 0.5032918453216553, "learning_rate": 0.0005265887060344356, "loss": 3.2327, "step": 13926 }, { "epoch": 0.68, "grad_norm": 0.6491032242774963, "learning_rate": 0.000526578615316391, "loss": 3.183, "step": 13927 }, { "epoch": 0.68, "grad_norm": 0.5199459791183472, "learning_rate": 0.0005265685240015821, "loss": 3.2972, "step": 13928 }, { "epoch": 0.68, "grad_norm": 0.5117924809455872, "learning_rate": 0.0005265584320900353, "loss": 3.1965, "step": 13929 }, { "epoch": 0.68, "grad_norm": 0.5264638662338257, "learning_rate": 0.000526548339581777, "loss": 3.0494, "step": 13930 }, { "epoch": 0.68, "grad_norm": 0.5094579458236694, "learning_rate": 0.0005265382464768341, "loss": 3.18, "step": 13931 }, { "epoch": 0.68, "grad_norm": 0.4896301329135895, "learning_rate": 0.0005265281527752331, "loss": 3.3233, "step": 13932 }, { "epoch": 0.68, "grad_norm": 0.4931809604167938, "learning_rate": 0.0005265180584770004, "loss": 3.3084, "step": 13933 }, { "epoch": 0.68, "grad_norm": 0.5263796448707581, "learning_rate": 0.0005265079635821628, "loss": 3.3991, "step": 13934 }, { "epoch": 0.68, "grad_norm": 0.5130126476287842, "learning_rate": 0.0005264978680907469, "loss": 3.2796, "step": 13935 }, { "epoch": 0.68, "grad_norm": 0.5254062414169312, "learning_rate": 0.000526487772002779, "loss": 3.2386, "step": 13936 }, { "epoch": 0.68, "grad_norm": 0.5328313708305359, "learning_rate": 0.0005264776753182861, "loss": 3.0323, "step": 13937 }, { "epoch": 0.68, "grad_norm": 0.4820843040943146, "learning_rate": 0.0005264675780372945, "loss": 3.3463, "step": 13938 }, { "epoch": 0.68, "grad_norm": 0.5069538950920105, "learning_rate": 0.0005264574801598309, "loss": 3.1807, "step": 13939 }, { "epoch": 0.68, "grad_norm": 0.5414431691169739, "learning_rate": 0.0005264473816859219, "loss": 3.4286, "step": 13940 }, { "epoch": 0.68, "grad_norm": 0.5100494623184204, "learning_rate": 0.0005264372826155941, "loss": 3.1482, "step": 13941 }, { "epoch": 0.68, "grad_norm": 0.5338406562805176, "learning_rate": 0.000526427182948874, "loss": 3.3412, "step": 13942 }, { "epoch": 0.68, "grad_norm": 0.48375827074050903, "learning_rate": 0.0005264170826857883, "loss": 3.1934, "step": 13943 }, { "epoch": 0.68, "grad_norm": 0.5023521184921265, "learning_rate": 0.0005264069818263636, "loss": 3.1973, "step": 13944 }, { "epoch": 0.68, "grad_norm": 0.5352455377578735, "learning_rate": 0.0005263968803706265, "loss": 3.0357, "step": 13945 }, { "epoch": 0.68, "grad_norm": 0.4851732552051544, "learning_rate": 0.0005263867783186036, "loss": 3.5018, "step": 13946 }, { "epoch": 0.68, "grad_norm": 0.5262126326560974, "learning_rate": 0.0005263766756703213, "loss": 3.0649, "step": 13947 }, { "epoch": 0.68, "grad_norm": 0.51038658618927, "learning_rate": 0.0005263665724258066, "loss": 3.3052, "step": 13948 }, { "epoch": 0.68, "grad_norm": 0.49130168557167053, "learning_rate": 0.000526356468585086, "loss": 3.0464, "step": 13949 }, { "epoch": 0.68, "grad_norm": 0.525030791759491, "learning_rate": 0.0005263463641481858, "loss": 3.4954, "step": 13950 }, { "epoch": 0.68, "grad_norm": 0.5531750917434692, "learning_rate": 0.000526336259115133, "loss": 3.3464, "step": 13951 }, { "epoch": 0.68, "grad_norm": 0.5086334347724915, "learning_rate": 0.0005263261534859539, "loss": 3.421, "step": 13952 }, { "epoch": 0.68, "grad_norm": 0.5100414156913757, "learning_rate": 0.0005263160472606754, "loss": 3.261, "step": 13953 }, { "epoch": 0.68, "grad_norm": 0.5146250128746033, "learning_rate": 0.0005263059404393239, "loss": 3.0504, "step": 13954 }, { "epoch": 0.68, "grad_norm": 0.5014898180961609, "learning_rate": 0.0005262958330219262, "loss": 3.3744, "step": 13955 }, { "epoch": 0.68, "grad_norm": 0.5280792117118835, "learning_rate": 0.0005262857250085088, "loss": 3.1879, "step": 13956 }, { "epoch": 0.68, "grad_norm": 0.48315441608428955, "learning_rate": 0.0005262756163990982, "loss": 3.1047, "step": 13957 }, { "epoch": 0.68, "grad_norm": 0.508493959903717, "learning_rate": 0.0005262655071937214, "loss": 3.3101, "step": 13958 }, { "epoch": 0.68, "grad_norm": 0.5163034796714783, "learning_rate": 0.0005262553973924047, "loss": 3.4717, "step": 13959 }, { "epoch": 0.68, "grad_norm": 0.5181001424789429, "learning_rate": 0.0005262452869951749, "loss": 3.2164, "step": 13960 }, { "epoch": 0.68, "grad_norm": 0.526516854763031, "learning_rate": 0.0005262351760020584, "loss": 3.2027, "step": 13961 }, { "epoch": 0.68, "grad_norm": 0.5265016555786133, "learning_rate": 0.000526225064413082, "loss": 3.2067, "step": 13962 }, { "epoch": 0.68, "grad_norm": 0.508037269115448, "learning_rate": 0.0005262149522282724, "loss": 3.2605, "step": 13963 }, { "epoch": 0.68, "grad_norm": 0.5375155806541443, "learning_rate": 0.0005262048394476562, "loss": 3.3783, "step": 13964 }, { "epoch": 0.68, "grad_norm": 0.5235868096351624, "learning_rate": 0.0005261947260712599, "loss": 3.3954, "step": 13965 }, { "epoch": 0.68, "grad_norm": 0.5014271140098572, "learning_rate": 0.0005261846120991103, "loss": 3.2368, "step": 13966 }, { "epoch": 0.68, "grad_norm": 0.5021702647209167, "learning_rate": 0.0005261744975312339, "loss": 3.1191, "step": 13967 }, { "epoch": 0.68, "grad_norm": 0.5597658753395081, "learning_rate": 0.0005261643823676574, "loss": 3.0137, "step": 13968 }, { "epoch": 0.68, "grad_norm": 0.4978190064430237, "learning_rate": 0.0005261542666084074, "loss": 3.3998, "step": 13969 }, { "epoch": 0.68, "grad_norm": 0.5252299308776855, "learning_rate": 0.0005261441502535107, "loss": 3.0559, "step": 13970 }, { "epoch": 0.68, "grad_norm": 0.5657221674919128, "learning_rate": 0.0005261340333029938, "loss": 3.1933, "step": 13971 }, { "epoch": 0.68, "grad_norm": 0.5250703692436218, "learning_rate": 0.0005261239157568833, "loss": 3.3627, "step": 13972 }, { "epoch": 0.68, "grad_norm": 0.5377254486083984, "learning_rate": 0.0005261137976152059, "loss": 3.1583, "step": 13973 }, { "epoch": 0.68, "grad_norm": 0.49435439705848694, "learning_rate": 0.0005261036788779884, "loss": 3.2259, "step": 13974 }, { "epoch": 0.68, "grad_norm": 0.5244966149330139, "learning_rate": 0.0005260935595452573, "loss": 3.2558, "step": 13975 }, { "epoch": 0.68, "grad_norm": 0.4984242916107178, "learning_rate": 0.0005260834396170393, "loss": 3.1271, "step": 13976 }, { "epoch": 0.68, "grad_norm": 0.5036216974258423, "learning_rate": 0.000526073319093361, "loss": 3.2616, "step": 13977 }, { "epoch": 0.69, "grad_norm": 0.5375218987464905, "learning_rate": 0.000526063197974249, "loss": 3.0446, "step": 13978 }, { "epoch": 0.69, "grad_norm": 0.5373899936676025, "learning_rate": 0.00052605307625973, "loss": 3.123, "step": 13979 }, { "epoch": 0.69, "grad_norm": 0.5408071279525757, "learning_rate": 0.0005260429539498308, "loss": 3.0805, "step": 13980 }, { "epoch": 0.69, "grad_norm": 0.5188453197479248, "learning_rate": 0.000526032831044578, "loss": 3.1694, "step": 13981 }, { "epoch": 0.69, "grad_norm": 0.5271825194358826, "learning_rate": 0.0005260227075439983, "loss": 3.1811, "step": 13982 }, { "epoch": 0.69, "grad_norm": 0.47875532507896423, "learning_rate": 0.0005260125834481181, "loss": 3.494, "step": 13983 }, { "epoch": 0.69, "grad_norm": 0.5747538208961487, "learning_rate": 0.0005260024587569644, "loss": 3.1184, "step": 13984 }, { "epoch": 0.69, "grad_norm": 0.5151486992835999, "learning_rate": 0.0005259923334705636, "loss": 2.9681, "step": 13985 }, { "epoch": 0.69, "grad_norm": 0.534481406211853, "learning_rate": 0.0005259822075889426, "loss": 3.2856, "step": 13986 }, { "epoch": 0.69, "grad_norm": 0.51065993309021, "learning_rate": 0.0005259720811121278, "loss": 3.1268, "step": 13987 }, { "epoch": 0.69, "grad_norm": 0.5007278323173523, "learning_rate": 0.0005259619540401462, "loss": 3.0136, "step": 13988 }, { "epoch": 0.69, "grad_norm": 0.5178660154342651, "learning_rate": 0.0005259518263730242, "loss": 3.1685, "step": 13989 }, { "epoch": 0.69, "grad_norm": 0.5047518610954285, "learning_rate": 0.0005259416981107886, "loss": 3.2531, "step": 13990 }, { "epoch": 0.69, "grad_norm": 0.5168188810348511, "learning_rate": 0.000525931569253466, "loss": 3.3957, "step": 13991 }, { "epoch": 0.69, "grad_norm": 0.5354238152503967, "learning_rate": 0.0005259214398010833, "loss": 3.3066, "step": 13992 }, { "epoch": 0.69, "grad_norm": 0.5306437611579895, "learning_rate": 0.0005259113097536667, "loss": 3.2913, "step": 13993 }, { "epoch": 0.69, "grad_norm": 0.5510126948356628, "learning_rate": 0.0005259011791112435, "loss": 3.0978, "step": 13994 }, { "epoch": 0.69, "grad_norm": 0.5774776339530945, "learning_rate": 0.00052589104787384, "loss": 3.133, "step": 13995 }, { "epoch": 0.69, "grad_norm": 0.5207593441009521, "learning_rate": 0.0005258809160414827, "loss": 3.1222, "step": 13996 }, { "epoch": 0.69, "grad_norm": 0.5279645919799805, "learning_rate": 0.0005258707836141989, "loss": 3.2507, "step": 13997 }, { "epoch": 0.69, "grad_norm": 0.5136027336120605, "learning_rate": 0.0005258606505920147, "loss": 3.4416, "step": 13998 }, { "epoch": 0.69, "grad_norm": 0.49309471249580383, "learning_rate": 0.0005258505169749571, "loss": 3.1352, "step": 13999 }, { "epoch": 0.69, "grad_norm": 0.5222823023796082, "learning_rate": 0.0005258403827630527, "loss": 3.3458, "step": 14000 }, { "epoch": 0.69, "grad_norm": 0.4934551417827606, "learning_rate": 0.0005258302479563282, "loss": 3.129, "step": 14001 }, { "epoch": 0.69, "grad_norm": 0.5131190419197083, "learning_rate": 0.0005258201125548103, "loss": 3.2386, "step": 14002 }, { "epoch": 0.69, "grad_norm": 0.6161550879478455, "learning_rate": 0.0005258099765585256, "loss": 3.0293, "step": 14003 }, { "epoch": 0.69, "grad_norm": 0.5266488194465637, "learning_rate": 0.0005257998399675009, "loss": 3.2425, "step": 14004 }, { "epoch": 0.69, "grad_norm": 0.5947927832603455, "learning_rate": 0.0005257897027817629, "loss": 3.2695, "step": 14005 }, { "epoch": 0.69, "grad_norm": 0.5191298723220825, "learning_rate": 0.0005257795650013382, "loss": 3.0016, "step": 14006 }, { "epoch": 0.69, "grad_norm": 0.5333731770515442, "learning_rate": 0.0005257694266262537, "loss": 3.1303, "step": 14007 }, { "epoch": 0.69, "grad_norm": 0.5200604200363159, "learning_rate": 0.000525759287656536, "loss": 3.3124, "step": 14008 }, { "epoch": 0.69, "grad_norm": 0.5588985681533813, "learning_rate": 0.0005257491480922117, "loss": 3.262, "step": 14009 }, { "epoch": 0.69, "grad_norm": 0.49545398354530334, "learning_rate": 0.0005257390079333077, "loss": 3.2407, "step": 14010 }, { "epoch": 0.69, "grad_norm": 0.550879180431366, "learning_rate": 0.0005257288671798504, "loss": 3.0096, "step": 14011 }, { "epoch": 0.69, "grad_norm": 0.5142801403999329, "learning_rate": 0.0005257187258318668, "loss": 3.261, "step": 14012 }, { "epoch": 0.69, "grad_norm": 0.5323050022125244, "learning_rate": 0.0005257085838893836, "loss": 3.0495, "step": 14013 }, { "epoch": 0.69, "grad_norm": 0.5185598731040955, "learning_rate": 0.0005256984413524274, "loss": 3.1516, "step": 14014 }, { "epoch": 0.69, "grad_norm": 0.5257108807563782, "learning_rate": 0.0005256882982210249, "loss": 3.1726, "step": 14015 }, { "epoch": 0.69, "grad_norm": 0.5254387259483337, "learning_rate": 0.0005256781544952028, "loss": 3.2881, "step": 14016 }, { "epoch": 0.69, "grad_norm": 0.514013409614563, "learning_rate": 0.000525668010174988, "loss": 3.06, "step": 14017 }, { "epoch": 0.69, "grad_norm": 0.5110748410224915, "learning_rate": 0.000525657865260407, "loss": 3.283, "step": 14018 }, { "epoch": 0.69, "grad_norm": 0.5237401127815247, "learning_rate": 0.0005256477197514866, "loss": 3.1188, "step": 14019 }, { "epoch": 0.69, "grad_norm": 0.4896359145641327, "learning_rate": 0.0005256375736482536, "loss": 3.2941, "step": 14020 }, { "epoch": 0.69, "grad_norm": 0.5119338035583496, "learning_rate": 0.0005256274269507346, "loss": 3.1067, "step": 14021 }, { "epoch": 0.69, "grad_norm": 0.5138943791389465, "learning_rate": 0.0005256172796589564, "loss": 3.2943, "step": 14022 }, { "epoch": 0.69, "grad_norm": 0.5650306940078735, "learning_rate": 0.0005256071317729457, "loss": 3.0597, "step": 14023 }, { "epoch": 0.69, "grad_norm": 0.5293331146240234, "learning_rate": 0.0005255969832927293, "loss": 3.2667, "step": 14024 }, { "epoch": 0.69, "grad_norm": 0.5199422836303711, "learning_rate": 0.0005255868342183338, "loss": 3.1008, "step": 14025 }, { "epoch": 0.69, "grad_norm": 0.53669273853302, "learning_rate": 0.000525576684549786, "loss": 3.1676, "step": 14026 }, { "epoch": 0.69, "grad_norm": 0.5880951285362244, "learning_rate": 0.0005255665342871126, "loss": 3.3137, "step": 14027 }, { "epoch": 0.69, "grad_norm": 0.5066351890563965, "learning_rate": 0.0005255563834303404, "loss": 3.1104, "step": 14028 }, { "epoch": 0.69, "grad_norm": 0.495430052280426, "learning_rate": 0.000525546231979496, "loss": 3.0237, "step": 14029 }, { "epoch": 0.69, "grad_norm": 0.47963449358940125, "learning_rate": 0.0005255360799346064, "loss": 3.1957, "step": 14030 }, { "epoch": 0.69, "grad_norm": 0.48525768518447876, "learning_rate": 0.0005255259272956981, "loss": 3.1917, "step": 14031 }, { "epoch": 0.69, "grad_norm": 0.5137525796890259, "learning_rate": 0.000525515774062798, "loss": 2.9276, "step": 14032 }, { "epoch": 0.69, "grad_norm": 0.4915049076080322, "learning_rate": 0.0005255056202359325, "loss": 2.9671, "step": 14033 }, { "epoch": 0.69, "grad_norm": 0.5103715062141418, "learning_rate": 0.0005254954658151288, "loss": 3.0266, "step": 14034 }, { "epoch": 0.69, "grad_norm": 0.5029316544532776, "learning_rate": 0.0005254853108004134, "loss": 3.164, "step": 14035 }, { "epoch": 0.69, "grad_norm": 0.5544018149375916, "learning_rate": 0.0005254751551918131, "loss": 3.2264, "step": 14036 }, { "epoch": 0.69, "grad_norm": 0.5042638182640076, "learning_rate": 0.0005254649989893547, "loss": 2.8606, "step": 14037 }, { "epoch": 0.69, "grad_norm": 0.49873411655426025, "learning_rate": 0.0005254548421930647, "loss": 3.2909, "step": 14038 }, { "epoch": 0.69, "grad_norm": 0.5256275534629822, "learning_rate": 0.0005254446848029701, "loss": 3.1487, "step": 14039 }, { "epoch": 0.69, "grad_norm": 0.5733166337013245, "learning_rate": 0.0005254345268190976, "loss": 2.9514, "step": 14040 }, { "epoch": 0.69, "grad_norm": 0.4876699447631836, "learning_rate": 0.000525424368241474, "loss": 3.1855, "step": 14041 }, { "epoch": 0.69, "grad_norm": 0.5447889566421509, "learning_rate": 0.000525414209070126, "loss": 3.2617, "step": 14042 }, { "epoch": 0.69, "grad_norm": 0.48302027583122253, "learning_rate": 0.0005254040493050802, "loss": 3.1729, "step": 14043 }, { "epoch": 0.69, "grad_norm": 0.4758540093898773, "learning_rate": 0.0005253938889463638, "loss": 3.3485, "step": 14044 }, { "epoch": 0.69, "grad_norm": 0.489118754863739, "learning_rate": 0.000525383727994003, "loss": 3.2733, "step": 14045 }, { "epoch": 0.69, "grad_norm": 0.5639674067497253, "learning_rate": 0.000525373566448025, "loss": 3.0852, "step": 14046 }, { "epoch": 0.69, "grad_norm": 0.5075635313987732, "learning_rate": 0.0005253634043084562, "loss": 3.3726, "step": 14047 }, { "epoch": 0.69, "grad_norm": 0.5024601817131042, "learning_rate": 0.0005253532415753238, "loss": 3.2864, "step": 14048 }, { "epoch": 0.69, "grad_norm": 0.5034321546554565, "learning_rate": 0.0005253430782486542, "loss": 3.3487, "step": 14049 }, { "epoch": 0.69, "grad_norm": 0.5592278242111206, "learning_rate": 0.0005253329143284744, "loss": 3.247, "step": 14050 }, { "epoch": 0.69, "grad_norm": 0.5167191028594971, "learning_rate": 0.000525322749814811, "loss": 3.0507, "step": 14051 }, { "epoch": 0.69, "grad_norm": 0.49541527032852173, "learning_rate": 0.0005253125847076908, "loss": 3.1837, "step": 14052 }, { "epoch": 0.69, "grad_norm": 0.5572511553764343, "learning_rate": 0.0005253024190071407, "loss": 3.1698, "step": 14053 }, { "epoch": 0.69, "grad_norm": 0.47061023116111755, "learning_rate": 0.0005252922527131873, "loss": 3.1254, "step": 14054 }, { "epoch": 0.69, "grad_norm": 0.5373715162277222, "learning_rate": 0.0005252820858258576, "loss": 3.251, "step": 14055 }, { "epoch": 0.69, "grad_norm": 0.5261591672897339, "learning_rate": 0.000525271918345178, "loss": 3.2149, "step": 14056 }, { "epoch": 0.69, "grad_norm": 0.525307834148407, "learning_rate": 0.0005252617502711758, "loss": 3.1451, "step": 14057 }, { "epoch": 0.69, "grad_norm": 0.5310854315757751, "learning_rate": 0.0005252515816038773, "loss": 3.1049, "step": 14058 }, { "epoch": 0.69, "grad_norm": 0.5353348255157471, "learning_rate": 0.0005252414123433095, "loss": 3.3399, "step": 14059 }, { "epoch": 0.69, "grad_norm": 0.5103008151054382, "learning_rate": 0.0005252312424894993, "loss": 3.5181, "step": 14060 }, { "epoch": 0.69, "grad_norm": 0.5458347201347351, "learning_rate": 0.0005252210720424733, "loss": 3.3634, "step": 14061 }, { "epoch": 0.69, "grad_norm": 0.4898107647895813, "learning_rate": 0.0005252109010022583, "loss": 3.2373, "step": 14062 }, { "epoch": 0.69, "grad_norm": 0.5220617055892944, "learning_rate": 0.0005252007293688812, "loss": 3.1624, "step": 14063 }, { "epoch": 0.69, "grad_norm": 0.5146247148513794, "learning_rate": 0.0005251905571423686, "loss": 3.045, "step": 14064 }, { "epoch": 0.69, "grad_norm": 0.5100995302200317, "learning_rate": 0.0005251803843227475, "loss": 3.2578, "step": 14065 }, { "epoch": 0.69, "grad_norm": 0.490809828042984, "learning_rate": 0.0005251702109100447, "loss": 3.16, "step": 14066 }, { "epoch": 0.69, "grad_norm": 0.5795995593070984, "learning_rate": 0.0005251600369042868, "loss": 3.0737, "step": 14067 }, { "epoch": 0.69, "grad_norm": 0.5179703831672668, "learning_rate": 0.0005251498623055006, "loss": 3.2451, "step": 14068 }, { "epoch": 0.69, "grad_norm": 0.518638014793396, "learning_rate": 0.0005251396871137131, "loss": 3.3138, "step": 14069 }, { "epoch": 0.69, "grad_norm": 0.49439534544944763, "learning_rate": 0.000525129511328951, "loss": 3.1177, "step": 14070 }, { "epoch": 0.69, "grad_norm": 0.5000263452529907, "learning_rate": 0.0005251193349512411, "loss": 3.3391, "step": 14071 }, { "epoch": 0.69, "grad_norm": 0.513812780380249, "learning_rate": 0.0005251091579806101, "loss": 3.2079, "step": 14072 }, { "epoch": 0.69, "grad_norm": 0.5052744746208191, "learning_rate": 0.000525098980417085, "loss": 2.9954, "step": 14073 }, { "epoch": 0.69, "grad_norm": 0.48377129435539246, "learning_rate": 0.0005250888022606925, "loss": 3.0341, "step": 14074 }, { "epoch": 0.69, "grad_norm": 0.5143635869026184, "learning_rate": 0.0005250786235114594, "loss": 3.0281, "step": 14075 }, { "epoch": 0.69, "grad_norm": 0.5064332485198975, "learning_rate": 0.0005250684441694125, "loss": 3.2199, "step": 14076 }, { "epoch": 0.69, "grad_norm": 0.5439924001693726, "learning_rate": 0.0005250582642345786, "loss": 3.0897, "step": 14077 }, { "epoch": 0.69, "grad_norm": 0.50617915391922, "learning_rate": 0.0005250480837069846, "loss": 3.278, "step": 14078 }, { "epoch": 0.69, "grad_norm": 0.48605844378471375, "learning_rate": 0.0005250379025866572, "loss": 3.1856, "step": 14079 }, { "epoch": 0.69, "grad_norm": 0.49857380986213684, "learning_rate": 0.0005250277208736233, "loss": 3.1719, "step": 14080 }, { "epoch": 0.69, "grad_norm": 0.6834864020347595, "learning_rate": 0.0005250175385679098, "loss": 3.2893, "step": 14081 }, { "epoch": 0.69, "grad_norm": 0.5134391188621521, "learning_rate": 0.0005250073556695433, "loss": 3.1539, "step": 14082 }, { "epoch": 0.69, "grad_norm": 0.5330235958099365, "learning_rate": 0.0005249971721785506, "loss": 3.1599, "step": 14083 }, { "epoch": 0.69, "grad_norm": 0.5358902812004089, "learning_rate": 0.0005249869880949589, "loss": 3.1398, "step": 14084 }, { "epoch": 0.69, "grad_norm": 0.5489894151687622, "learning_rate": 0.0005249768034187947, "loss": 3.195, "step": 14085 }, { "epoch": 0.69, "grad_norm": 0.505240261554718, "learning_rate": 0.0005249666181500848, "loss": 3.2595, "step": 14086 }, { "epoch": 0.69, "grad_norm": 0.5279876589775085, "learning_rate": 0.0005249564322888562, "loss": 3.1531, "step": 14087 }, { "epoch": 0.69, "grad_norm": 0.5321412682533264, "learning_rate": 0.0005249462458351358, "loss": 3.1561, "step": 14088 }, { "epoch": 0.69, "grad_norm": 0.506250262260437, "learning_rate": 0.00052493605878895, "loss": 3.286, "step": 14089 }, { "epoch": 0.69, "grad_norm": 0.528944730758667, "learning_rate": 0.0005249258711503261, "loss": 2.914, "step": 14090 }, { "epoch": 0.69, "grad_norm": 0.5110275149345398, "learning_rate": 0.0005249156829192906, "loss": 3.1901, "step": 14091 }, { "epoch": 0.69, "grad_norm": 0.4941984713077545, "learning_rate": 0.0005249054940958707, "loss": 3.1032, "step": 14092 }, { "epoch": 0.69, "grad_norm": 0.5150231719017029, "learning_rate": 0.0005248953046800928, "loss": 3.1427, "step": 14093 }, { "epoch": 0.69, "grad_norm": 0.4935740530490875, "learning_rate": 0.000524885114671984, "loss": 3.1807, "step": 14094 }, { "epoch": 0.69, "grad_norm": 0.5428426265716553, "learning_rate": 0.0005248749240715712, "loss": 3.2272, "step": 14095 }, { "epoch": 0.69, "grad_norm": 0.4966670274734497, "learning_rate": 0.000524864732878881, "loss": 3.1482, "step": 14096 }, { "epoch": 0.69, "grad_norm": 0.5251358151435852, "learning_rate": 0.0005248545410939404, "loss": 3.2162, "step": 14097 }, { "epoch": 0.69, "grad_norm": 0.5258474946022034, "learning_rate": 0.0005248443487167762, "loss": 3.1545, "step": 14098 }, { "epoch": 0.69, "grad_norm": 0.5385447144508362, "learning_rate": 0.0005248341557474152, "loss": 3.1439, "step": 14099 }, { "epoch": 0.69, "grad_norm": 0.5289866328239441, "learning_rate": 0.0005248239621858845, "loss": 3.294, "step": 14100 }, { "epoch": 0.69, "grad_norm": 0.5030156373977661, "learning_rate": 0.0005248137680322106, "loss": 3.2187, "step": 14101 }, { "epoch": 0.69, "grad_norm": 0.48551294207572937, "learning_rate": 0.0005248035732864205, "loss": 3.4024, "step": 14102 }, { "epoch": 0.69, "grad_norm": 0.5139172673225403, "learning_rate": 0.000524793377948541, "loss": 3.0436, "step": 14103 }, { "epoch": 0.69, "grad_norm": 0.529897928237915, "learning_rate": 0.0005247831820185991, "loss": 3.2084, "step": 14104 }, { "epoch": 0.69, "grad_norm": 0.5231662392616272, "learning_rate": 0.0005247729854966215, "loss": 3.0257, "step": 14105 }, { "epoch": 0.69, "grad_norm": 0.569053053855896, "learning_rate": 0.0005247627883826353, "loss": 3.2904, "step": 14106 }, { "epoch": 0.69, "grad_norm": 0.49440523982048035, "learning_rate": 0.0005247525906766669, "loss": 3.3116, "step": 14107 }, { "epoch": 0.69, "grad_norm": 0.5238437056541443, "learning_rate": 0.0005247423923787437, "loss": 3.3411, "step": 14108 }, { "epoch": 0.69, "grad_norm": 0.5570396780967712, "learning_rate": 0.0005247321934888921, "loss": 3.2735, "step": 14109 }, { "epoch": 0.69, "grad_norm": 0.5721766352653503, "learning_rate": 0.0005247219940071392, "loss": 3.2418, "step": 14110 }, { "epoch": 0.69, "grad_norm": 0.5046465396881104, "learning_rate": 0.0005247117939335119, "loss": 3.1536, "step": 14111 }, { "epoch": 0.69, "grad_norm": 0.522420346736908, "learning_rate": 0.0005247015932680368, "loss": 3.1374, "step": 14112 }, { "epoch": 0.69, "grad_norm": 0.5400798320770264, "learning_rate": 0.0005246913920107411, "loss": 3.3477, "step": 14113 }, { "epoch": 0.69, "grad_norm": 0.5389352440834045, "learning_rate": 0.0005246811901616514, "loss": 3.0954, "step": 14114 }, { "epoch": 0.69, "grad_norm": 0.6275335550308228, "learning_rate": 0.0005246709877207948, "loss": 3.3585, "step": 14115 }, { "epoch": 0.69, "grad_norm": 0.4925820827484131, "learning_rate": 0.000524660784688198, "loss": 3.2751, "step": 14116 }, { "epoch": 0.69, "grad_norm": 0.5348528027534485, "learning_rate": 0.000524650581063888, "loss": 3.2773, "step": 14117 }, { "epoch": 0.69, "grad_norm": 0.5362456440925598, "learning_rate": 0.0005246403768478916, "loss": 3.1904, "step": 14118 }, { "epoch": 0.69, "grad_norm": 0.5220507979393005, "learning_rate": 0.0005246301720402356, "loss": 3.2062, "step": 14119 }, { "epoch": 0.69, "grad_norm": 0.5358459949493408, "learning_rate": 0.0005246199666409469, "loss": 3.21, "step": 14120 }, { "epoch": 0.69, "grad_norm": 0.5036259293556213, "learning_rate": 0.0005246097606500526, "loss": 3.1651, "step": 14121 }, { "epoch": 0.69, "grad_norm": 0.5509597063064575, "learning_rate": 0.0005245995540675793, "loss": 3.1898, "step": 14122 }, { "epoch": 0.69, "grad_norm": 0.5109611749649048, "learning_rate": 0.000524589346893554, "loss": 3.3053, "step": 14123 }, { "epoch": 0.69, "grad_norm": 0.563474714756012, "learning_rate": 0.0005245791391280037, "loss": 3.2851, "step": 14124 }, { "epoch": 0.69, "grad_norm": 0.49557414650917053, "learning_rate": 0.000524568930770955, "loss": 3.2507, "step": 14125 }, { "epoch": 0.69, "grad_norm": 0.4812037944793701, "learning_rate": 0.0005245587218224351, "loss": 3.3388, "step": 14126 }, { "epoch": 0.69, "grad_norm": 0.5291796326637268, "learning_rate": 0.0005245485122824707, "loss": 3.0031, "step": 14127 }, { "epoch": 0.69, "grad_norm": 0.5801912546157837, "learning_rate": 0.0005245383021510887, "loss": 3.3179, "step": 14128 }, { "epoch": 0.69, "grad_norm": 0.4930591881275177, "learning_rate": 0.0005245280914283161, "loss": 3.2406, "step": 14129 }, { "epoch": 0.69, "grad_norm": 0.5279679894447327, "learning_rate": 0.0005245178801141796, "loss": 3.3534, "step": 14130 }, { "epoch": 0.69, "grad_norm": 0.4926796555519104, "learning_rate": 0.0005245076682087064, "loss": 3.3052, "step": 14131 }, { "epoch": 0.69, "grad_norm": 0.6082884073257446, "learning_rate": 0.000524497455711923, "loss": 3.1572, "step": 14132 }, { "epoch": 0.69, "grad_norm": 0.4980970025062561, "learning_rate": 0.0005244872426238567, "loss": 3.0342, "step": 14133 }, { "epoch": 0.69, "grad_norm": 0.5105233192443848, "learning_rate": 0.000524477028944534, "loss": 3.1551, "step": 14134 }, { "epoch": 0.69, "grad_norm": 0.5064019560813904, "learning_rate": 0.0005244668146739822, "loss": 3.1884, "step": 14135 }, { "epoch": 0.69, "grad_norm": 0.5264427661895752, "learning_rate": 0.0005244565998122278, "loss": 3.3935, "step": 14136 }, { "epoch": 0.69, "grad_norm": 0.5206676721572876, "learning_rate": 0.0005244463843592981, "loss": 3.1003, "step": 14137 }, { "epoch": 0.69, "grad_norm": 0.5028414726257324, "learning_rate": 0.0005244361683152197, "loss": 3.3872, "step": 14138 }, { "epoch": 0.69, "grad_norm": 0.5728408694267273, "learning_rate": 0.0005244259516800198, "loss": 3.1309, "step": 14139 }, { "epoch": 0.69, "grad_norm": 0.5234643816947937, "learning_rate": 0.0005244157344537249, "loss": 3.1336, "step": 14140 }, { "epoch": 0.69, "grad_norm": 0.5184975862503052, "learning_rate": 0.0005244055166363623, "loss": 3.2717, "step": 14141 }, { "epoch": 0.69, "grad_norm": 0.5001710057258606, "learning_rate": 0.0005243952982279587, "loss": 3.249, "step": 14142 }, { "epoch": 0.69, "grad_norm": 0.5176953673362732, "learning_rate": 0.0005243850792285411, "loss": 3.1428, "step": 14143 }, { "epoch": 0.69, "grad_norm": 0.5145552754402161, "learning_rate": 0.0005243748596381364, "loss": 3.0941, "step": 14144 }, { "epoch": 0.69, "grad_norm": 0.5094621777534485, "learning_rate": 0.0005243646394567714, "loss": 3.3651, "step": 14145 }, { "epoch": 0.69, "grad_norm": 0.4982603192329407, "learning_rate": 0.0005243544186844733, "loss": 3.4314, "step": 14146 }, { "epoch": 0.69, "grad_norm": 0.48291024565696716, "learning_rate": 0.0005243441973212687, "loss": 3.4426, "step": 14147 }, { "epoch": 0.69, "grad_norm": 0.5711972713470459, "learning_rate": 0.0005243339753671847, "loss": 3.273, "step": 14148 }, { "epoch": 0.69, "grad_norm": 0.5552250146865845, "learning_rate": 0.0005243237528222482, "loss": 3.2662, "step": 14149 }, { "epoch": 0.69, "grad_norm": 0.5337475538253784, "learning_rate": 0.0005243135296864861, "loss": 3.1347, "step": 14150 }, { "epoch": 0.69, "grad_norm": 0.4788720905780792, "learning_rate": 0.0005243033059599253, "loss": 3.1474, "step": 14151 }, { "epoch": 0.69, "grad_norm": 0.5156588554382324, "learning_rate": 0.0005242930816425928, "loss": 3.2473, "step": 14152 }, { "epoch": 0.69, "grad_norm": 0.49395278096199036, "learning_rate": 0.0005242828567345154, "loss": 3.2504, "step": 14153 }, { "epoch": 0.69, "grad_norm": 0.5037361979484558, "learning_rate": 0.0005242726312357202, "loss": 3.1234, "step": 14154 }, { "epoch": 0.69, "grad_norm": 0.5027718544006348, "learning_rate": 0.0005242624051462341, "loss": 3.1226, "step": 14155 }, { "epoch": 0.69, "grad_norm": 0.5126724243164062, "learning_rate": 0.0005242521784660839, "loss": 3.1279, "step": 14156 }, { "epoch": 0.69, "grad_norm": 0.5130273103713989, "learning_rate": 0.0005242419511952966, "loss": 3.2485, "step": 14157 }, { "epoch": 0.69, "grad_norm": 0.5258603692054749, "learning_rate": 0.0005242317233338992, "loss": 3.1893, "step": 14158 }, { "epoch": 0.69, "grad_norm": 0.503662645816803, "learning_rate": 0.0005242214948819186, "loss": 3.2815, "step": 14159 }, { "epoch": 0.69, "grad_norm": 0.4841233789920807, "learning_rate": 0.0005242112658393817, "loss": 3.1095, "step": 14160 }, { "epoch": 0.69, "grad_norm": 0.5432723760604858, "learning_rate": 0.0005242010362063156, "loss": 3.4093, "step": 14161 }, { "epoch": 0.69, "grad_norm": 0.5304633378982544, "learning_rate": 0.0005241908059827469, "loss": 3.2416, "step": 14162 }, { "epoch": 0.69, "grad_norm": 0.5919955372810364, "learning_rate": 0.0005241805751687029, "loss": 3.2266, "step": 14163 }, { "epoch": 0.69, "grad_norm": 0.5040702819824219, "learning_rate": 0.0005241703437642104, "loss": 3.0357, "step": 14164 }, { "epoch": 0.69, "grad_norm": 0.5021560788154602, "learning_rate": 0.0005241601117692964, "loss": 3.1437, "step": 14165 }, { "epoch": 0.69, "grad_norm": 0.513043224811554, "learning_rate": 0.0005241498791839877, "loss": 3.0822, "step": 14166 }, { "epoch": 0.69, "grad_norm": 0.4927295446395874, "learning_rate": 0.0005241396460083114, "loss": 3.3873, "step": 14167 }, { "epoch": 0.69, "grad_norm": 0.49543529748916626, "learning_rate": 0.0005241294122422945, "loss": 3.126, "step": 14168 }, { "epoch": 0.69, "grad_norm": 0.528400719165802, "learning_rate": 0.0005241191778859637, "loss": 3.4217, "step": 14169 }, { "epoch": 0.69, "grad_norm": 0.5334087014198303, "learning_rate": 0.0005241089429393462, "loss": 3.2066, "step": 14170 }, { "epoch": 0.69, "grad_norm": 0.4597061574459076, "learning_rate": 0.0005240987074024689, "loss": 3.4921, "step": 14171 }, { "epoch": 0.69, "grad_norm": 0.5277274250984192, "learning_rate": 0.0005240884712753588, "loss": 3.4269, "step": 14172 }, { "epoch": 0.69, "grad_norm": 0.5084794759750366, "learning_rate": 0.0005240782345580427, "loss": 3.0579, "step": 14173 }, { "epoch": 0.69, "grad_norm": 0.5290378332138062, "learning_rate": 0.0005240679972505476, "loss": 3.2095, "step": 14174 }, { "epoch": 0.69, "grad_norm": 0.5595150589942932, "learning_rate": 0.0005240577593529006, "loss": 3.1068, "step": 14175 }, { "epoch": 0.69, "grad_norm": 0.4827249050140381, "learning_rate": 0.0005240475208651286, "loss": 3.3068, "step": 14176 }, { "epoch": 0.69, "grad_norm": 0.5036142468452454, "learning_rate": 0.0005240372817872585, "loss": 3.2103, "step": 14177 }, { "epoch": 0.69, "grad_norm": 0.5806594491004944, "learning_rate": 0.0005240270421193173, "loss": 3.1885, "step": 14178 }, { "epoch": 0.69, "grad_norm": 0.5014117360115051, "learning_rate": 0.0005240168018613321, "loss": 3.2471, "step": 14179 }, { "epoch": 0.69, "grad_norm": 0.5045899152755737, "learning_rate": 0.0005240065610133296, "loss": 3.3355, "step": 14180 }, { "epoch": 0.69, "grad_norm": 0.5164101719856262, "learning_rate": 0.000523996319575337, "loss": 3.111, "step": 14181 }, { "epoch": 0.7, "grad_norm": 0.504895031452179, "learning_rate": 0.0005239860775473811, "loss": 3.2782, "step": 14182 }, { "epoch": 0.7, "grad_norm": 0.5253392457962036, "learning_rate": 0.0005239758349294891, "loss": 3.3066, "step": 14183 }, { "epoch": 0.7, "grad_norm": 0.5302526950836182, "learning_rate": 0.0005239655917216879, "loss": 3.176, "step": 14184 }, { "epoch": 0.7, "grad_norm": 0.5363248586654663, "learning_rate": 0.0005239553479240044, "loss": 3.1524, "step": 14185 }, { "epoch": 0.7, "grad_norm": 0.5327321290969849, "learning_rate": 0.0005239451035364656, "loss": 3.3454, "step": 14186 }, { "epoch": 0.7, "grad_norm": 0.5333127975463867, "learning_rate": 0.0005239348585590985, "loss": 3.0064, "step": 14187 }, { "epoch": 0.7, "grad_norm": 0.5116428732872009, "learning_rate": 0.0005239246129919299, "loss": 3.0169, "step": 14188 }, { "epoch": 0.7, "grad_norm": 0.5070028305053711, "learning_rate": 0.0005239143668349872, "loss": 3.157, "step": 14189 }, { "epoch": 0.7, "grad_norm": 0.5059760808944702, "learning_rate": 0.0005239041200882971, "loss": 3.4573, "step": 14190 }, { "epoch": 0.7, "grad_norm": 0.5042681097984314, "learning_rate": 0.0005238938727518867, "loss": 3.2877, "step": 14191 }, { "epoch": 0.7, "grad_norm": 0.8668105602264404, "learning_rate": 0.0005238836248257829, "loss": 3.3942, "step": 14192 }, { "epoch": 0.7, "grad_norm": 0.5118007659912109, "learning_rate": 0.0005238733763100126, "loss": 3.0267, "step": 14193 }, { "epoch": 0.7, "grad_norm": 0.527990460395813, "learning_rate": 0.0005238631272046029, "loss": 3.4337, "step": 14194 }, { "epoch": 0.7, "grad_norm": 0.5032305717468262, "learning_rate": 0.0005238528775095808, "loss": 3.2735, "step": 14195 }, { "epoch": 0.7, "grad_norm": 0.48045584559440613, "learning_rate": 0.0005238426272249735, "loss": 3.2052, "step": 14196 }, { "epoch": 0.7, "grad_norm": 0.5078832507133484, "learning_rate": 0.0005238323763508077, "loss": 2.9881, "step": 14197 }, { "epoch": 0.7, "grad_norm": 0.49898645281791687, "learning_rate": 0.0005238221248871105, "loss": 3.0997, "step": 14198 }, { "epoch": 0.7, "grad_norm": 0.5208724737167358, "learning_rate": 0.0005238118728339089, "loss": 3.1622, "step": 14199 }, { "epoch": 0.7, "grad_norm": 0.49348101019859314, "learning_rate": 0.0005238016201912298, "loss": 3.2991, "step": 14200 }, { "epoch": 0.7, "grad_norm": 0.5580134391784668, "learning_rate": 0.0005237913669591004, "loss": 3.2781, "step": 14201 }, { "epoch": 0.7, "grad_norm": 0.5383898615837097, "learning_rate": 0.0005237811131375476, "loss": 3.2633, "step": 14202 }, { "epoch": 0.7, "grad_norm": 0.5041082501411438, "learning_rate": 0.0005237708587265984, "loss": 3.2228, "step": 14203 }, { "epoch": 0.7, "grad_norm": 0.5141589641571045, "learning_rate": 0.0005237606037262799, "loss": 3.0724, "step": 14204 }, { "epoch": 0.7, "grad_norm": 0.509434700012207, "learning_rate": 0.0005237503481366189, "loss": 3.1054, "step": 14205 }, { "epoch": 0.7, "grad_norm": 0.5451233386993408, "learning_rate": 0.0005237400919576426, "loss": 3.0023, "step": 14206 }, { "epoch": 0.7, "grad_norm": 0.5041611790657043, "learning_rate": 0.000523729835189378, "loss": 3.2864, "step": 14207 }, { "epoch": 0.7, "grad_norm": 0.48671042919158936, "learning_rate": 0.0005237195778318521, "loss": 3.2833, "step": 14208 }, { "epoch": 0.7, "grad_norm": 0.5079613924026489, "learning_rate": 0.0005237093198850917, "loss": 3.0954, "step": 14209 }, { "epoch": 0.7, "grad_norm": 0.5360010862350464, "learning_rate": 0.0005236990613491242, "loss": 3.2204, "step": 14210 }, { "epoch": 0.7, "grad_norm": 0.5029088258743286, "learning_rate": 0.0005236888022239763, "loss": 3.1848, "step": 14211 }, { "epoch": 0.7, "grad_norm": 0.5785709023475647, "learning_rate": 0.0005236785425096753, "loss": 3.1963, "step": 14212 }, { "epoch": 0.7, "grad_norm": 0.5002698302268982, "learning_rate": 0.0005236682822062479, "loss": 3.2937, "step": 14213 }, { "epoch": 0.7, "grad_norm": 0.46872082352638245, "learning_rate": 0.0005236580213137214, "loss": 3.2813, "step": 14214 }, { "epoch": 0.7, "grad_norm": 0.5846667289733887, "learning_rate": 0.0005236477598321227, "loss": 3.2123, "step": 14215 }, { "epoch": 0.7, "grad_norm": 0.5419370532035828, "learning_rate": 0.0005236374977614788, "loss": 3.1255, "step": 14216 }, { "epoch": 0.7, "grad_norm": 0.5296844840049744, "learning_rate": 0.0005236272351018167, "loss": 3.2611, "step": 14217 }, { "epoch": 0.7, "grad_norm": 0.5379822254180908, "learning_rate": 0.0005236169718531637, "loss": 3.15, "step": 14218 }, { "epoch": 0.7, "grad_norm": 0.5059786438941956, "learning_rate": 0.0005236067080155465, "loss": 3.3715, "step": 14219 }, { "epoch": 0.7, "grad_norm": 0.48545315861701965, "learning_rate": 0.0005235964435889923, "loss": 3.2246, "step": 14220 }, { "epoch": 0.7, "grad_norm": 0.5349769592285156, "learning_rate": 0.0005235861785735281, "loss": 3.1605, "step": 14221 }, { "epoch": 0.7, "grad_norm": 0.5449730157852173, "learning_rate": 0.0005235759129691809, "loss": 3.2828, "step": 14222 }, { "epoch": 0.7, "grad_norm": 0.5503379106521606, "learning_rate": 0.0005235656467759778, "loss": 3.2192, "step": 14223 }, { "epoch": 0.7, "grad_norm": 0.5100115537643433, "learning_rate": 0.0005235553799939458, "loss": 3.2576, "step": 14224 }, { "epoch": 0.7, "grad_norm": 0.5590293407440186, "learning_rate": 0.0005235451126231119, "loss": 3.2798, "step": 14225 }, { "epoch": 0.7, "grad_norm": 0.5304601788520813, "learning_rate": 0.0005235348446635034, "loss": 3.0178, "step": 14226 }, { "epoch": 0.7, "grad_norm": 0.5042637586593628, "learning_rate": 0.0005235245761151468, "loss": 3.1506, "step": 14227 }, { "epoch": 0.7, "grad_norm": 0.5219356417655945, "learning_rate": 0.0005235143069780698, "loss": 3.2972, "step": 14228 }, { "epoch": 0.7, "grad_norm": 0.5481365919113159, "learning_rate": 0.000523504037252299, "loss": 3.2286, "step": 14229 }, { "epoch": 0.7, "grad_norm": 0.48297563195228577, "learning_rate": 0.0005234937669378615, "loss": 3.3976, "step": 14230 }, { "epoch": 0.7, "grad_norm": 0.5044030547142029, "learning_rate": 0.0005234834960347846, "loss": 3.2173, "step": 14231 }, { "epoch": 0.7, "grad_norm": 0.5015493035316467, "learning_rate": 0.0005234732245430952, "loss": 3.3385, "step": 14232 }, { "epoch": 0.7, "grad_norm": 0.5298082232475281, "learning_rate": 0.0005234629524628201, "loss": 3.3677, "step": 14233 }, { "epoch": 0.7, "grad_norm": 0.5369641780853271, "learning_rate": 0.0005234526797939868, "loss": 3.42, "step": 14234 }, { "epoch": 0.7, "grad_norm": 0.6291206479072571, "learning_rate": 0.000523442406536622, "loss": 3.1479, "step": 14235 }, { "epoch": 0.7, "grad_norm": 0.4945876896381378, "learning_rate": 0.000523432132690753, "loss": 3.1374, "step": 14236 }, { "epoch": 0.7, "grad_norm": 0.519692063331604, "learning_rate": 0.0005234218582564067, "loss": 3.0743, "step": 14237 }, { "epoch": 0.7, "grad_norm": 0.5264421701431274, "learning_rate": 0.0005234115832336103, "loss": 3.3572, "step": 14238 }, { "epoch": 0.7, "grad_norm": 0.49894416332244873, "learning_rate": 0.0005234013076223907, "loss": 3.1509, "step": 14239 }, { "epoch": 0.7, "grad_norm": 0.519243597984314, "learning_rate": 0.0005233910314227751, "loss": 3.2018, "step": 14240 }, { "epoch": 0.7, "grad_norm": 0.5167336463928223, "learning_rate": 0.0005233807546347906, "loss": 3.3301, "step": 14241 }, { "epoch": 0.7, "grad_norm": 0.5419256687164307, "learning_rate": 0.000523370477258464, "loss": 3.3605, "step": 14242 }, { "epoch": 0.7, "grad_norm": 0.5188645124435425, "learning_rate": 0.0005233601992938228, "loss": 3.3297, "step": 14243 }, { "epoch": 0.7, "grad_norm": 0.5342188477516174, "learning_rate": 0.0005233499207408936, "loss": 3.2517, "step": 14244 }, { "epoch": 0.7, "grad_norm": 0.5165455341339111, "learning_rate": 0.0005233396415997038, "loss": 3.2128, "step": 14245 }, { "epoch": 0.7, "grad_norm": 0.5181511640548706, "learning_rate": 0.0005233293618702804, "loss": 3.0307, "step": 14246 }, { "epoch": 0.7, "grad_norm": 0.5308560729026794, "learning_rate": 0.0005233190815526502, "loss": 3.439, "step": 14247 }, { "epoch": 0.7, "grad_norm": 0.4866609573364258, "learning_rate": 0.0005233088006468407, "loss": 3.0744, "step": 14248 }, { "epoch": 0.7, "grad_norm": 0.5222994089126587, "learning_rate": 0.0005232985191528788, "loss": 3.1265, "step": 14249 }, { "epoch": 0.7, "grad_norm": 0.495818555355072, "learning_rate": 0.0005232882370707916, "loss": 3.4966, "step": 14250 }, { "epoch": 0.7, "grad_norm": 0.5098391175270081, "learning_rate": 0.0005232779544006061, "loss": 3.2485, "step": 14251 }, { "epoch": 0.7, "grad_norm": 0.5358620882034302, "learning_rate": 0.0005232676711423495, "loss": 3.2363, "step": 14252 }, { "epoch": 0.7, "grad_norm": 0.5330485105514526, "learning_rate": 0.0005232573872960488, "loss": 3.2469, "step": 14253 }, { "epoch": 0.7, "grad_norm": 0.5173335075378418, "learning_rate": 0.0005232471028617311, "loss": 3.0878, "step": 14254 }, { "epoch": 0.7, "grad_norm": 0.510068953037262, "learning_rate": 0.0005232368178394234, "loss": 3.1082, "step": 14255 }, { "epoch": 0.7, "grad_norm": 0.567221462726593, "learning_rate": 0.000523226532229153, "loss": 3.1399, "step": 14256 }, { "epoch": 0.7, "grad_norm": 0.5452208518981934, "learning_rate": 0.0005232162460309468, "loss": 3.119, "step": 14257 }, { "epoch": 0.7, "grad_norm": 0.5163204669952393, "learning_rate": 0.000523205959244832, "loss": 3.2187, "step": 14258 }, { "epoch": 0.7, "grad_norm": 0.4994533061981201, "learning_rate": 0.0005231956718708357, "loss": 3.1825, "step": 14259 }, { "epoch": 0.7, "grad_norm": 0.5001878142356873, "learning_rate": 0.0005231853839089849, "loss": 3.2424, "step": 14260 }, { "epoch": 0.7, "grad_norm": 0.5246791243553162, "learning_rate": 0.0005231750953593068, "loss": 3.2498, "step": 14261 }, { "epoch": 0.7, "grad_norm": 0.4972473084926605, "learning_rate": 0.0005231648062218285, "loss": 3.193, "step": 14262 }, { "epoch": 0.7, "grad_norm": 0.5774008631706238, "learning_rate": 0.0005231545164965769, "loss": 3.0555, "step": 14263 }, { "epoch": 0.7, "grad_norm": 0.5512023568153381, "learning_rate": 0.0005231442261835794, "loss": 3.2012, "step": 14264 }, { "epoch": 0.7, "grad_norm": 0.5204340815544128, "learning_rate": 0.0005231339352828628, "loss": 3.0874, "step": 14265 }, { "epoch": 0.7, "grad_norm": 0.5237892866134644, "learning_rate": 0.0005231236437944545, "loss": 3.1603, "step": 14266 }, { "epoch": 0.7, "grad_norm": 0.5312855839729309, "learning_rate": 0.0005231133517183814, "loss": 3.1177, "step": 14267 }, { "epoch": 0.7, "grad_norm": 0.48745495080947876, "learning_rate": 0.0005231030590546707, "loss": 3.268, "step": 14268 }, { "epoch": 0.7, "grad_norm": 0.5135250091552734, "learning_rate": 0.0005230927658033494, "loss": 3.395, "step": 14269 }, { "epoch": 0.7, "grad_norm": 0.5581345558166504, "learning_rate": 0.0005230824719644448, "loss": 3.1017, "step": 14270 }, { "epoch": 0.7, "grad_norm": 0.47780802845954895, "learning_rate": 0.0005230721775379837, "loss": 3.1137, "step": 14271 }, { "epoch": 0.7, "grad_norm": 0.49343234300613403, "learning_rate": 0.0005230618825239937, "loss": 3.2004, "step": 14272 }, { "epoch": 0.7, "grad_norm": 0.5608261823654175, "learning_rate": 0.0005230515869225013, "loss": 3.1291, "step": 14273 }, { "epoch": 0.7, "grad_norm": 0.510046124458313, "learning_rate": 0.0005230412907335343, "loss": 3.3241, "step": 14274 }, { "epoch": 0.7, "grad_norm": 0.506389856338501, "learning_rate": 0.0005230309939571193, "loss": 3.1, "step": 14275 }, { "epoch": 0.7, "grad_norm": 0.4831939935684204, "learning_rate": 0.0005230206965932836, "loss": 3.2499, "step": 14276 }, { "epoch": 0.7, "grad_norm": 0.506610095500946, "learning_rate": 0.0005230103986420542, "loss": 3.371, "step": 14277 }, { "epoch": 0.7, "grad_norm": 0.49668189883232117, "learning_rate": 0.0005230001001034585, "loss": 3.105, "step": 14278 }, { "epoch": 0.7, "grad_norm": 0.5084254145622253, "learning_rate": 0.0005229898009775233, "loss": 3.1564, "step": 14279 }, { "epoch": 0.7, "grad_norm": 0.5354201793670654, "learning_rate": 0.000522979501264276, "loss": 3.2659, "step": 14280 }, { "epoch": 0.7, "grad_norm": 0.5142485499382019, "learning_rate": 0.0005229692009637437, "loss": 3.0802, "step": 14281 }, { "epoch": 0.7, "grad_norm": 0.5173102617263794, "learning_rate": 0.0005229589000759531, "loss": 3.0241, "step": 14282 }, { "epoch": 0.7, "grad_norm": 0.5071712732315063, "learning_rate": 0.0005229485986009319, "loss": 3.2652, "step": 14283 }, { "epoch": 0.7, "grad_norm": 0.4926835596561432, "learning_rate": 0.0005229382965387069, "loss": 3.1913, "step": 14284 }, { "epoch": 0.7, "grad_norm": 0.503774106502533, "learning_rate": 0.0005229279938893055, "loss": 3.2452, "step": 14285 }, { "epoch": 0.7, "grad_norm": 0.4997362196445465, "learning_rate": 0.0005229176906527545, "loss": 3.3595, "step": 14286 }, { "epoch": 0.7, "grad_norm": 0.5188296437263489, "learning_rate": 0.0005229073868290813, "loss": 3.1387, "step": 14287 }, { "epoch": 0.7, "grad_norm": 0.5256169438362122, "learning_rate": 0.000522897082418313, "loss": 3.1277, "step": 14288 }, { "epoch": 0.7, "grad_norm": 0.5132431387901306, "learning_rate": 0.0005228867774204765, "loss": 3.1984, "step": 14289 }, { "epoch": 0.7, "grad_norm": 0.5352613925933838, "learning_rate": 0.0005228764718355993, "loss": 3.301, "step": 14290 }, { "epoch": 0.7, "grad_norm": 0.5036006569862366, "learning_rate": 0.0005228661656637082, "loss": 3.2298, "step": 14291 }, { "epoch": 0.7, "grad_norm": 0.4817676544189453, "learning_rate": 0.0005228558589048306, "loss": 3.3918, "step": 14292 }, { "epoch": 0.7, "grad_norm": 0.528864324092865, "learning_rate": 0.0005228455515589935, "loss": 3.0981, "step": 14293 }, { "epoch": 0.7, "grad_norm": 0.5127184391021729, "learning_rate": 0.0005228352436262243, "loss": 3.3858, "step": 14294 }, { "epoch": 0.7, "grad_norm": 0.5853077173233032, "learning_rate": 0.0005228249351065498, "loss": 3.2205, "step": 14295 }, { "epoch": 0.7, "grad_norm": 0.5480331182479858, "learning_rate": 0.0005228146259999972, "loss": 3.0766, "step": 14296 }, { "epoch": 0.7, "grad_norm": 0.5229378342628479, "learning_rate": 0.000522804316306594, "loss": 3.0965, "step": 14297 }, { "epoch": 0.7, "grad_norm": 0.5020848512649536, "learning_rate": 0.0005227940060263669, "loss": 3.3201, "step": 14298 }, { "epoch": 0.7, "grad_norm": 0.5315490365028381, "learning_rate": 0.0005227836951593434, "loss": 3.2358, "step": 14299 }, { "epoch": 0.7, "grad_norm": 0.5193626880645752, "learning_rate": 0.0005227733837055504, "loss": 3.0192, "step": 14300 }, { "epoch": 0.7, "grad_norm": 0.5127216577529907, "learning_rate": 0.0005227630716650152, "loss": 3.179, "step": 14301 }, { "epoch": 0.7, "grad_norm": 0.5541197061538696, "learning_rate": 0.0005227527590377651, "loss": 3.1317, "step": 14302 }, { "epoch": 0.7, "grad_norm": 0.5353065133094788, "learning_rate": 0.0005227424458238269, "loss": 3.1438, "step": 14303 }, { "epoch": 0.7, "grad_norm": 0.4878837764263153, "learning_rate": 0.0005227321320232281, "loss": 3.3127, "step": 14304 }, { "epoch": 0.7, "grad_norm": 0.5021807551383972, "learning_rate": 0.0005227218176359957, "loss": 3.2301, "step": 14305 }, { "epoch": 0.7, "grad_norm": 0.4997186064720154, "learning_rate": 0.0005227115026621568, "loss": 3.1486, "step": 14306 }, { "epoch": 0.7, "grad_norm": 0.5215864181518555, "learning_rate": 0.0005227011871017388, "loss": 3.1568, "step": 14307 }, { "epoch": 0.7, "grad_norm": 0.5061655044555664, "learning_rate": 0.0005226908709547687, "loss": 3.1592, "step": 14308 }, { "epoch": 0.7, "grad_norm": 0.5245349407196045, "learning_rate": 0.0005226805542212737, "loss": 3.3514, "step": 14309 }, { "epoch": 0.7, "grad_norm": 0.4831959307193756, "learning_rate": 0.000522670236901281, "loss": 3.2534, "step": 14310 }, { "epoch": 0.7, "grad_norm": 0.47621917724609375, "learning_rate": 0.0005226599189948176, "loss": 3.0129, "step": 14311 }, { "epoch": 0.7, "grad_norm": 0.5170857310295105, "learning_rate": 0.000522649600501911, "loss": 3.073, "step": 14312 }, { "epoch": 0.7, "grad_norm": 0.5005858540534973, "learning_rate": 0.0005226392814225881, "loss": 3.114, "step": 14313 }, { "epoch": 0.7, "grad_norm": 0.5574570894241333, "learning_rate": 0.0005226289617568763, "loss": 3.3234, "step": 14314 }, { "epoch": 0.7, "grad_norm": 0.5117168426513672, "learning_rate": 0.0005226186415048026, "loss": 3.1659, "step": 14315 }, { "epoch": 0.7, "grad_norm": 0.49425193667411804, "learning_rate": 0.0005226083206663941, "loss": 2.9738, "step": 14316 }, { "epoch": 0.7, "grad_norm": 0.5152395963668823, "learning_rate": 0.0005225979992416783, "loss": 3.2, "step": 14317 }, { "epoch": 0.7, "grad_norm": 0.5114201903343201, "learning_rate": 0.0005225876772306822, "loss": 3.2103, "step": 14318 }, { "epoch": 0.7, "grad_norm": 0.48314422369003296, "learning_rate": 0.0005225773546334328, "loss": 3.2442, "step": 14319 }, { "epoch": 0.7, "grad_norm": 0.5234989523887634, "learning_rate": 0.0005225670314499577, "loss": 3.1864, "step": 14320 }, { "epoch": 0.7, "grad_norm": 0.5077502727508545, "learning_rate": 0.0005225567076802838, "loss": 3.2501, "step": 14321 }, { "epoch": 0.7, "grad_norm": 0.4953576624393463, "learning_rate": 0.0005225463833244384, "loss": 3.2259, "step": 14322 }, { "epoch": 0.7, "grad_norm": 0.5094617605209351, "learning_rate": 0.0005225360583824487, "loss": 3.1856, "step": 14323 }, { "epoch": 0.7, "grad_norm": 0.5310658812522888, "learning_rate": 0.0005225257328543417, "loss": 3.2715, "step": 14324 }, { "epoch": 0.7, "grad_norm": 0.5143598318099976, "learning_rate": 0.0005225154067401448, "loss": 3.2914, "step": 14325 }, { "epoch": 0.7, "grad_norm": 0.5416876673698425, "learning_rate": 0.0005225050800398851, "loss": 3.3223, "step": 14326 }, { "epoch": 0.7, "grad_norm": 0.5177731513977051, "learning_rate": 0.00052249475275359, "loss": 3.1994, "step": 14327 }, { "epoch": 0.7, "grad_norm": 0.4947225749492645, "learning_rate": 0.0005224844248812864, "loss": 3.2301, "step": 14328 }, { "epoch": 0.7, "grad_norm": 0.5090954303741455, "learning_rate": 0.0005224740964230017, "loss": 3.1208, "step": 14329 }, { "epoch": 0.7, "grad_norm": 0.5198858976364136, "learning_rate": 0.0005224637673787631, "loss": 3.2342, "step": 14330 }, { "epoch": 0.7, "grad_norm": 0.5442454814910889, "learning_rate": 0.0005224534377485977, "loss": 3.0242, "step": 14331 }, { "epoch": 0.7, "grad_norm": 0.6403926610946655, "learning_rate": 0.0005224431075325327, "loss": 3.0215, "step": 14332 }, { "epoch": 0.7, "grad_norm": 0.49326714873313904, "learning_rate": 0.0005224327767305954, "loss": 3.4625, "step": 14333 }, { "epoch": 0.7, "grad_norm": 0.527711808681488, "learning_rate": 0.000522422445342813, "loss": 3.3671, "step": 14334 }, { "epoch": 0.7, "grad_norm": 0.5429787039756775, "learning_rate": 0.0005224121133692127, "loss": 3.3924, "step": 14335 }, { "epoch": 0.7, "grad_norm": 0.5077711343765259, "learning_rate": 0.0005224017808098217, "loss": 3.2152, "step": 14336 }, { "epoch": 0.7, "grad_norm": 0.48497462272644043, "learning_rate": 0.0005223914476646671, "loss": 3.1022, "step": 14337 }, { "epoch": 0.7, "grad_norm": 0.5042962431907654, "learning_rate": 0.0005223811139337763, "loss": 3.3529, "step": 14338 }, { "epoch": 0.7, "grad_norm": 0.5740790963172913, "learning_rate": 0.0005223707796171765, "loss": 3.0065, "step": 14339 }, { "epoch": 0.7, "grad_norm": 0.5047106742858887, "learning_rate": 0.0005223604447148947, "loss": 3.2321, "step": 14340 }, { "epoch": 0.7, "grad_norm": 0.5074807405471802, "learning_rate": 0.0005223501092269584, "loss": 3.2166, "step": 14341 }, { "epoch": 0.7, "grad_norm": 0.49849480390548706, "learning_rate": 0.0005223397731533947, "loss": 2.8486, "step": 14342 }, { "epoch": 0.7, "grad_norm": 0.4887546896934509, "learning_rate": 0.0005223294364942309, "loss": 3.2793, "step": 14343 }, { "epoch": 0.7, "grad_norm": 0.5052506923675537, "learning_rate": 0.0005223190992494941, "loss": 3.2666, "step": 14344 }, { "epoch": 0.7, "grad_norm": 0.530553936958313, "learning_rate": 0.0005223087614192116, "loss": 3.3803, "step": 14345 }, { "epoch": 0.7, "grad_norm": 0.5107871294021606, "learning_rate": 0.0005222984230034105, "loss": 3.2682, "step": 14346 }, { "epoch": 0.7, "grad_norm": 0.5242406725883484, "learning_rate": 0.0005222880840021183, "loss": 3.1691, "step": 14347 }, { "epoch": 0.7, "grad_norm": 0.5011206269264221, "learning_rate": 0.0005222777444153618, "loss": 3.5911, "step": 14348 }, { "epoch": 0.7, "grad_norm": 0.5178399085998535, "learning_rate": 0.0005222674042431688, "loss": 3.2566, "step": 14349 }, { "epoch": 0.7, "grad_norm": 0.5099117159843445, "learning_rate": 0.000522257063485566, "loss": 3.159, "step": 14350 }, { "epoch": 0.7, "grad_norm": 0.5087069869041443, "learning_rate": 0.000522246722142581, "loss": 3.0981, "step": 14351 }, { "epoch": 0.7, "grad_norm": 0.49150705337524414, "learning_rate": 0.0005222363802142409, "loss": 3.2154, "step": 14352 }, { "epoch": 0.7, "grad_norm": 0.5360817313194275, "learning_rate": 0.0005222260377005729, "loss": 3.0334, "step": 14353 }, { "epoch": 0.7, "grad_norm": 0.7099424600601196, "learning_rate": 0.0005222156946016043, "loss": 3.1963, "step": 14354 }, { "epoch": 0.7, "grad_norm": 0.5025527477264404, "learning_rate": 0.0005222053509173623, "loss": 3.0433, "step": 14355 }, { "epoch": 0.7, "grad_norm": 0.543745219707489, "learning_rate": 0.0005221950066478742, "loss": 3.0136, "step": 14356 }, { "epoch": 0.7, "grad_norm": 0.5021082758903503, "learning_rate": 0.0005221846617931672, "loss": 2.9967, "step": 14357 }, { "epoch": 0.7, "grad_norm": 0.5673491358757019, "learning_rate": 0.0005221743163532686, "loss": 3.2684, "step": 14358 }, { "epoch": 0.7, "grad_norm": 0.5165929198265076, "learning_rate": 0.0005221639703282057, "loss": 3.236, "step": 14359 }, { "epoch": 0.7, "grad_norm": 0.5241513848304749, "learning_rate": 0.0005221536237180054, "loss": 3.178, "step": 14360 }, { "epoch": 0.7, "grad_norm": 0.48851609230041504, "learning_rate": 0.0005221432765226955, "loss": 3.2317, "step": 14361 }, { "epoch": 0.7, "grad_norm": 0.5319517254829407, "learning_rate": 0.0005221329287423027, "loss": 3.3219, "step": 14362 }, { "epoch": 0.7, "grad_norm": 0.5038327574729919, "learning_rate": 0.0005221225803768546, "loss": 3.2557, "step": 14363 }, { "epoch": 0.7, "grad_norm": 0.5223231911659241, "learning_rate": 0.0005221122314263785, "loss": 3.1857, "step": 14364 }, { "epoch": 0.7, "grad_norm": 0.4955352544784546, "learning_rate": 0.0005221018818909014, "loss": 3.1251, "step": 14365 }, { "epoch": 0.7, "grad_norm": 0.5417662262916565, "learning_rate": 0.0005220915317704507, "loss": 3.0374, "step": 14366 }, { "epoch": 0.7, "grad_norm": 0.4988352060317993, "learning_rate": 0.0005220811810650537, "loss": 3.1441, "step": 14367 }, { "epoch": 0.7, "grad_norm": 0.4997967779636383, "learning_rate": 0.0005220708297747375, "loss": 3.267, "step": 14368 }, { "epoch": 0.7, "grad_norm": 0.5113928914070129, "learning_rate": 0.0005220604778995296, "loss": 3.1211, "step": 14369 }, { "epoch": 0.7, "grad_norm": 0.5090401768684387, "learning_rate": 0.0005220501254394571, "loss": 3.2306, "step": 14370 }, { "epoch": 0.7, "grad_norm": 0.5390316843986511, "learning_rate": 0.0005220397723945473, "loss": 3.3519, "step": 14371 }, { "epoch": 0.7, "grad_norm": 0.5376928448677063, "learning_rate": 0.0005220294187648275, "loss": 3.0629, "step": 14372 }, { "epoch": 0.7, "grad_norm": 0.5060685276985168, "learning_rate": 0.0005220190645503248, "loss": 3.2316, "step": 14373 }, { "epoch": 0.7, "grad_norm": 0.5190165638923645, "learning_rate": 0.0005220087097510668, "loss": 3.1309, "step": 14374 }, { "epoch": 0.7, "grad_norm": 0.5150943398475647, "learning_rate": 0.0005219983543670805, "loss": 3.2837, "step": 14375 }, { "epoch": 0.7, "grad_norm": 0.4947223663330078, "learning_rate": 0.0005219879983983933, "loss": 3.376, "step": 14376 }, { "epoch": 0.7, "grad_norm": 0.48892343044281006, "learning_rate": 0.0005219776418450323, "loss": 3.24, "step": 14377 }, { "epoch": 0.7, "grad_norm": 0.5334641337394714, "learning_rate": 0.0005219672847070251, "loss": 3.2448, "step": 14378 }, { "epoch": 0.7, "grad_norm": 0.5088352560997009, "learning_rate": 0.0005219569269843987, "loss": 3.1581, "step": 14379 }, { "epoch": 0.7, "grad_norm": 0.49996280670166016, "learning_rate": 0.0005219465686771805, "loss": 3.2747, "step": 14380 }, { "epoch": 0.7, "grad_norm": 0.5581712126731873, "learning_rate": 0.0005219362097853977, "loss": 3.135, "step": 14381 }, { "epoch": 0.7, "grad_norm": 0.5300365686416626, "learning_rate": 0.0005219258503090776, "loss": 3.2328, "step": 14382 }, { "epoch": 0.7, "grad_norm": 0.4860009551048279, "learning_rate": 0.0005219154902482476, "loss": 3.2522, "step": 14383 }, { "epoch": 0.7, "grad_norm": 0.5062441825866699, "learning_rate": 0.0005219051296029349, "loss": 3.3731, "step": 14384 }, { "epoch": 0.7, "grad_norm": 0.5869806408882141, "learning_rate": 0.0005218947683731667, "loss": 3.0537, "step": 14385 }, { "epoch": 0.71, "grad_norm": 0.5176935791969299, "learning_rate": 0.0005218844065589704, "loss": 2.9489, "step": 14386 }, { "epoch": 0.71, "grad_norm": 0.5050328373908997, "learning_rate": 0.0005218740441603735, "loss": 3.3333, "step": 14387 }, { "epoch": 0.71, "grad_norm": 0.5312533378601074, "learning_rate": 0.0005218636811774029, "loss": 3.2162, "step": 14388 }, { "epoch": 0.71, "grad_norm": 0.5098238587379456, "learning_rate": 0.000521853317610086, "loss": 3.1103, "step": 14389 }, { "epoch": 0.71, "grad_norm": 0.48738306760787964, "learning_rate": 0.0005218429534584502, "loss": 3.2526, "step": 14390 }, { "epoch": 0.71, "grad_norm": 0.5178094506263733, "learning_rate": 0.0005218325887225228, "loss": 3.2081, "step": 14391 }, { "epoch": 0.71, "grad_norm": 0.5126038789749146, "learning_rate": 0.000521822223402331, "loss": 3.1075, "step": 14392 }, { "epoch": 0.71, "grad_norm": 0.49259766936302185, "learning_rate": 0.0005218118574979023, "loss": 3.2309, "step": 14393 }, { "epoch": 0.71, "grad_norm": 0.5296828746795654, "learning_rate": 0.0005218014910092636, "loss": 3.294, "step": 14394 }, { "epoch": 0.71, "grad_norm": 0.5134131908416748, "learning_rate": 0.0005217911239364427, "loss": 3.2609, "step": 14395 }, { "epoch": 0.71, "grad_norm": 0.5110315680503845, "learning_rate": 0.0005217807562794666, "loss": 3.2463, "step": 14396 }, { "epoch": 0.71, "grad_norm": 0.5391361713409424, "learning_rate": 0.0005217703880383626, "loss": 2.8263, "step": 14397 }, { "epoch": 0.71, "grad_norm": 0.5444517135620117, "learning_rate": 0.0005217600192131582, "loss": 3.1325, "step": 14398 }, { "epoch": 0.71, "grad_norm": 0.5090447664260864, "learning_rate": 0.0005217496498038805, "loss": 2.9955, "step": 14399 }, { "epoch": 0.71, "grad_norm": 0.5326402187347412, "learning_rate": 0.0005217392798105569, "loss": 2.8706, "step": 14400 }, { "epoch": 0.71, "grad_norm": 0.586733877658844, "learning_rate": 0.0005217289092332147, "loss": 3.1483, "step": 14401 }, { "epoch": 0.71, "grad_norm": 0.534562349319458, "learning_rate": 0.0005217185380718812, "loss": 3.0548, "step": 14402 }, { "epoch": 0.71, "grad_norm": 0.49788811802864075, "learning_rate": 0.0005217081663265838, "loss": 3.1239, "step": 14403 }, { "epoch": 0.71, "grad_norm": 0.5514092445373535, "learning_rate": 0.0005216977939973498, "loss": 3.2406, "step": 14404 }, { "epoch": 0.71, "grad_norm": 0.5975626707077026, "learning_rate": 0.0005216874210842064, "loss": 3.0378, "step": 14405 }, { "epoch": 0.71, "grad_norm": 0.5058104395866394, "learning_rate": 0.0005216770475871811, "loss": 3.1995, "step": 14406 }, { "epoch": 0.71, "grad_norm": 0.5320433974266052, "learning_rate": 0.0005216666735063009, "loss": 2.9181, "step": 14407 }, { "epoch": 0.71, "grad_norm": 0.5036849975585938, "learning_rate": 0.0005216562988415935, "loss": 3.3894, "step": 14408 }, { "epoch": 0.71, "grad_norm": 0.5307196378707886, "learning_rate": 0.0005216459235930861, "loss": 3.0559, "step": 14409 }, { "epoch": 0.71, "grad_norm": 0.5142438411712646, "learning_rate": 0.0005216355477608059, "loss": 3.0828, "step": 14410 }, { "epoch": 0.71, "grad_norm": 0.5658327341079712, "learning_rate": 0.0005216251713447804, "loss": 3.2546, "step": 14411 }, { "epoch": 0.71, "grad_norm": 0.5084229111671448, "learning_rate": 0.0005216147943450368, "loss": 3.3371, "step": 14412 }, { "epoch": 0.71, "grad_norm": 0.5430961847305298, "learning_rate": 0.0005216044167616025, "loss": 3.2217, "step": 14413 }, { "epoch": 0.71, "grad_norm": 0.5450170636177063, "learning_rate": 0.0005215940385945048, "loss": 3.1991, "step": 14414 }, { "epoch": 0.71, "grad_norm": 0.49537381529808044, "learning_rate": 0.000521583659843771, "loss": 3.4065, "step": 14415 }, { "epoch": 0.71, "grad_norm": 0.5271323919296265, "learning_rate": 0.0005215732805094286, "loss": 3.2777, "step": 14416 }, { "epoch": 0.71, "grad_norm": 0.5401394367218018, "learning_rate": 0.0005215629005915047, "loss": 3.1833, "step": 14417 }, { "epoch": 0.71, "grad_norm": 0.5234045386314392, "learning_rate": 0.0005215525200900269, "loss": 3.0229, "step": 14418 }, { "epoch": 0.71, "grad_norm": 0.5828486680984497, "learning_rate": 0.0005215421390050224, "loss": 3.1473, "step": 14419 }, { "epoch": 0.71, "grad_norm": 0.5277825593948364, "learning_rate": 0.0005215317573365185, "loss": 3.0845, "step": 14420 }, { "epoch": 0.71, "grad_norm": 0.5510663390159607, "learning_rate": 0.0005215213750845425, "loss": 3.2722, "step": 14421 }, { "epoch": 0.71, "grad_norm": 0.5259878039360046, "learning_rate": 0.0005215109922491218, "loss": 3.0543, "step": 14422 }, { "epoch": 0.71, "grad_norm": 0.4986727237701416, "learning_rate": 0.000521500608830284, "loss": 3.2939, "step": 14423 }, { "epoch": 0.71, "grad_norm": 0.558155357837677, "learning_rate": 0.000521490224828056, "loss": 3.0415, "step": 14424 }, { "epoch": 0.71, "grad_norm": 0.5337893962860107, "learning_rate": 0.0005214798402424655, "loss": 2.949, "step": 14425 }, { "epoch": 0.71, "grad_norm": 0.5568438172340393, "learning_rate": 0.0005214694550735396, "loss": 3.4036, "step": 14426 }, { "epoch": 0.71, "grad_norm": 0.52046138048172, "learning_rate": 0.000521459069321306, "loss": 3.054, "step": 14427 }, { "epoch": 0.71, "grad_norm": 0.4952624440193176, "learning_rate": 0.0005214486829857916, "loss": 3.2659, "step": 14428 }, { "epoch": 0.71, "grad_norm": 0.584016740322113, "learning_rate": 0.0005214382960670241, "loss": 3.0175, "step": 14429 }, { "epoch": 0.71, "grad_norm": 0.5094707608222961, "learning_rate": 0.0005214279085650308, "loss": 3.0569, "step": 14430 }, { "epoch": 0.71, "grad_norm": 0.5051601529121399, "learning_rate": 0.0005214175204798388, "loss": 3.0209, "step": 14431 }, { "epoch": 0.71, "grad_norm": 0.4987492263317108, "learning_rate": 0.0005214071318114759, "loss": 3.2559, "step": 14432 }, { "epoch": 0.71, "grad_norm": 0.4821774661540985, "learning_rate": 0.0005213967425599692, "loss": 3.23, "step": 14433 }, { "epoch": 0.71, "grad_norm": 0.5031430125236511, "learning_rate": 0.0005213863527253459, "loss": 3.2185, "step": 14434 }, { "epoch": 0.71, "grad_norm": 0.52392578125, "learning_rate": 0.0005213759623076337, "loss": 3.1812, "step": 14435 }, { "epoch": 0.71, "grad_norm": 0.4971693754196167, "learning_rate": 0.0005213655713068598, "loss": 3.0771, "step": 14436 }, { "epoch": 0.71, "grad_norm": 0.5507715344429016, "learning_rate": 0.0005213551797230516, "loss": 3.0654, "step": 14437 }, { "epoch": 0.71, "grad_norm": 0.5817131996154785, "learning_rate": 0.0005213447875562365, "loss": 3.1453, "step": 14438 }, { "epoch": 0.71, "grad_norm": 0.5093923211097717, "learning_rate": 0.0005213343948064417, "loss": 2.9989, "step": 14439 }, { "epoch": 0.71, "grad_norm": 0.5584835410118103, "learning_rate": 0.0005213240014736947, "loss": 3.1431, "step": 14440 }, { "epoch": 0.71, "grad_norm": 0.534837007522583, "learning_rate": 0.000521313607558023, "loss": 3.1652, "step": 14441 }, { "epoch": 0.71, "grad_norm": 0.532855749130249, "learning_rate": 0.0005213032130594537, "loss": 3.1471, "step": 14442 }, { "epoch": 0.71, "grad_norm": 0.5114381909370422, "learning_rate": 0.0005212928179780144, "loss": 3.1387, "step": 14443 }, { "epoch": 0.71, "grad_norm": 0.5551025867462158, "learning_rate": 0.0005212824223137325, "loss": 3.2062, "step": 14444 }, { "epoch": 0.71, "grad_norm": 0.5081254839897156, "learning_rate": 0.0005212720260666352, "loss": 3.4413, "step": 14445 }, { "epoch": 0.71, "grad_norm": 0.5733745098114014, "learning_rate": 0.00052126162923675, "loss": 3.2314, "step": 14446 }, { "epoch": 0.71, "grad_norm": 0.5209436416625977, "learning_rate": 0.0005212512318241042, "loss": 3.4682, "step": 14447 }, { "epoch": 0.71, "grad_norm": 0.5001239776611328, "learning_rate": 0.0005212408338287252, "loss": 3.1519, "step": 14448 }, { "epoch": 0.71, "grad_norm": 0.5506751537322998, "learning_rate": 0.0005212304352506405, "loss": 2.9797, "step": 14449 }, { "epoch": 0.71, "grad_norm": 0.5498661398887634, "learning_rate": 0.0005212200360898775, "loss": 3.0644, "step": 14450 }, { "epoch": 0.71, "grad_norm": 0.5015347599983215, "learning_rate": 0.0005212096363464633, "loss": 3.1071, "step": 14451 }, { "epoch": 0.71, "grad_norm": 0.5187351107597351, "learning_rate": 0.0005211992360204256, "loss": 3.1041, "step": 14452 }, { "epoch": 0.71, "grad_norm": 0.5389336943626404, "learning_rate": 0.0005211888351117917, "loss": 3.1903, "step": 14453 }, { "epoch": 0.71, "grad_norm": 0.5327998995780945, "learning_rate": 0.0005211784336205889, "loss": 3.3395, "step": 14454 }, { "epoch": 0.71, "grad_norm": 0.5216488838195801, "learning_rate": 0.0005211680315468448, "loss": 3.2868, "step": 14455 }, { "epoch": 0.71, "grad_norm": 0.5144593715667725, "learning_rate": 0.0005211576288905865, "loss": 3.35, "step": 14456 }, { "epoch": 0.71, "grad_norm": 0.5322148203849792, "learning_rate": 0.0005211472256518416, "loss": 3.0846, "step": 14457 }, { "epoch": 0.71, "grad_norm": 0.4917519688606262, "learning_rate": 0.0005211368218306377, "loss": 3.1299, "step": 14458 }, { "epoch": 0.71, "grad_norm": 0.5577586889266968, "learning_rate": 0.0005211264174270016, "loss": 3.156, "step": 14459 }, { "epoch": 0.71, "grad_norm": 0.5201316475868225, "learning_rate": 0.0005211160124409613, "loss": 3.2895, "step": 14460 }, { "epoch": 0.71, "grad_norm": 0.49748873710632324, "learning_rate": 0.000521105606872544, "loss": 3.2544, "step": 14461 }, { "epoch": 0.71, "grad_norm": 0.5247087478637695, "learning_rate": 0.000521095200721777, "loss": 3.1305, "step": 14462 }, { "epoch": 0.71, "grad_norm": 0.5503666996955872, "learning_rate": 0.0005210847939886878, "loss": 3.2872, "step": 14463 }, { "epoch": 0.71, "grad_norm": 0.5127733945846558, "learning_rate": 0.0005210743866733039, "loss": 3.3832, "step": 14464 }, { "epoch": 0.71, "grad_norm": 0.5337607860565186, "learning_rate": 0.0005210639787756525, "loss": 3.2879, "step": 14465 }, { "epoch": 0.71, "grad_norm": 0.4939156770706177, "learning_rate": 0.0005210535702957612, "loss": 3.2596, "step": 14466 }, { "epoch": 0.71, "grad_norm": 0.5159709453582764, "learning_rate": 0.0005210431612336573, "loss": 3.3191, "step": 14467 }, { "epoch": 0.71, "grad_norm": 0.5352545976638794, "learning_rate": 0.0005210327515893683, "loss": 3.1018, "step": 14468 }, { "epoch": 0.71, "grad_norm": 0.5087549686431885, "learning_rate": 0.0005210223413629214, "loss": 3.0488, "step": 14469 }, { "epoch": 0.71, "grad_norm": 0.5398578643798828, "learning_rate": 0.0005210119305543443, "loss": 3.1926, "step": 14470 }, { "epoch": 0.71, "grad_norm": 0.9343258738517761, "learning_rate": 0.0005210015191636643, "loss": 3.0479, "step": 14471 }, { "epoch": 0.71, "grad_norm": 0.5741486549377441, "learning_rate": 0.0005209911071909089, "loss": 3.1191, "step": 14472 }, { "epoch": 0.71, "grad_norm": 0.5154914855957031, "learning_rate": 0.0005209806946361055, "loss": 3.2, "step": 14473 }, { "epoch": 0.71, "grad_norm": 0.5743831992149353, "learning_rate": 0.0005209702814992813, "loss": 3.374, "step": 14474 }, { "epoch": 0.71, "grad_norm": 0.5344333648681641, "learning_rate": 0.000520959867780464, "loss": 3.467, "step": 14475 }, { "epoch": 0.71, "grad_norm": 0.5519991517066956, "learning_rate": 0.0005209494534796809, "loss": 2.9471, "step": 14476 }, { "epoch": 0.71, "grad_norm": 0.5173640251159668, "learning_rate": 0.0005209390385969595, "loss": 3.24, "step": 14477 }, { "epoch": 0.71, "grad_norm": 0.5470741987228394, "learning_rate": 0.0005209286231323271, "loss": 3.1831, "step": 14478 }, { "epoch": 0.71, "grad_norm": 0.6194097399711609, "learning_rate": 0.0005209182070858114, "loss": 3.1974, "step": 14479 }, { "epoch": 0.71, "grad_norm": 0.4889790415763855, "learning_rate": 0.0005209077904574396, "loss": 2.9937, "step": 14480 }, { "epoch": 0.71, "grad_norm": 0.5154456496238708, "learning_rate": 0.0005208973732472391, "loss": 3.2068, "step": 14481 }, { "epoch": 0.71, "grad_norm": 0.4870891273021698, "learning_rate": 0.0005208869554552375, "loss": 3.0451, "step": 14482 }, { "epoch": 0.71, "grad_norm": 0.5939205288887024, "learning_rate": 0.0005208765370814622, "loss": 3.2206, "step": 14483 }, { "epoch": 0.71, "grad_norm": 0.4910772442817688, "learning_rate": 0.0005208661181259406, "loss": 3.1084, "step": 14484 }, { "epoch": 0.71, "grad_norm": 0.5324827432632446, "learning_rate": 0.0005208556985887001, "loss": 3.1209, "step": 14485 }, { "epoch": 0.71, "grad_norm": 0.497478723526001, "learning_rate": 0.0005208452784697681, "loss": 3.2166, "step": 14486 }, { "epoch": 0.71, "grad_norm": 0.5420011878013611, "learning_rate": 0.0005208348577691723, "loss": 3.2863, "step": 14487 }, { "epoch": 0.71, "grad_norm": 0.5050036311149597, "learning_rate": 0.0005208244364869399, "loss": 3.0082, "step": 14488 }, { "epoch": 0.71, "grad_norm": 0.4819994568824768, "learning_rate": 0.0005208140146230985, "loss": 3.2324, "step": 14489 }, { "epoch": 0.71, "grad_norm": 0.5056858658790588, "learning_rate": 0.0005208035921776755, "loss": 3.2479, "step": 14490 }, { "epoch": 0.71, "grad_norm": 0.5183698534965515, "learning_rate": 0.0005207931691506982, "loss": 3.206, "step": 14491 }, { "epoch": 0.71, "grad_norm": 0.5309738516807556, "learning_rate": 0.0005207827455421943, "loss": 3.2727, "step": 14492 }, { "epoch": 0.71, "grad_norm": 0.5266025066375732, "learning_rate": 0.0005207723213521911, "loss": 3.1433, "step": 14493 }, { "epoch": 0.71, "grad_norm": 0.5019662976264954, "learning_rate": 0.0005207618965807161, "loss": 3.224, "step": 14494 }, { "epoch": 0.71, "grad_norm": 0.5374035239219666, "learning_rate": 0.0005207514712277968, "loss": 3.1059, "step": 14495 }, { "epoch": 0.71, "grad_norm": 0.5742165446281433, "learning_rate": 0.0005207410452934605, "loss": 3.2328, "step": 14496 }, { "epoch": 0.71, "grad_norm": 0.4858763813972473, "learning_rate": 0.0005207306187777348, "loss": 3.0767, "step": 14497 }, { "epoch": 0.71, "grad_norm": 0.5236046314239502, "learning_rate": 0.0005207201916806473, "loss": 3.3342, "step": 14498 }, { "epoch": 0.71, "grad_norm": 0.5222908854484558, "learning_rate": 0.000520709764002225, "loss": 3.2661, "step": 14499 }, { "epoch": 0.71, "grad_norm": 0.5480522513389587, "learning_rate": 0.0005206993357424959, "loss": 3.132, "step": 14500 }, { "epoch": 0.71, "grad_norm": 0.5132131576538086, "learning_rate": 0.0005206889069014871, "loss": 3.3595, "step": 14501 }, { "epoch": 0.71, "grad_norm": 0.494584858417511, "learning_rate": 0.0005206784774792263, "loss": 3.3245, "step": 14502 }, { "epoch": 0.71, "grad_norm": 0.4954333007335663, "learning_rate": 0.0005206680474757407, "loss": 3.2417, "step": 14503 }, { "epoch": 0.71, "grad_norm": 0.5081412196159363, "learning_rate": 0.0005206576168910581, "loss": 3.0736, "step": 14504 }, { "epoch": 0.71, "grad_norm": 0.5705395340919495, "learning_rate": 0.0005206471857252057, "loss": 3.0512, "step": 14505 }, { "epoch": 0.71, "grad_norm": 0.5100386142730713, "learning_rate": 0.000520636753978211, "loss": 3.082, "step": 14506 }, { "epoch": 0.71, "grad_norm": 0.5036544799804688, "learning_rate": 0.0005206263216501018, "loss": 3.2025, "step": 14507 }, { "epoch": 0.71, "grad_norm": 0.5994665026664734, "learning_rate": 0.0005206158887409052, "loss": 3.2004, "step": 14508 }, { "epoch": 0.71, "grad_norm": 0.50201815366745, "learning_rate": 0.0005206054552506487, "loss": 3.1203, "step": 14509 }, { "epoch": 0.71, "grad_norm": 0.5317625999450684, "learning_rate": 0.00052059502117936, "loss": 2.9033, "step": 14510 }, { "epoch": 0.71, "grad_norm": 0.5502082705497742, "learning_rate": 0.0005205845865270664, "loss": 3.1199, "step": 14511 }, { "epoch": 0.71, "grad_norm": 0.5232048034667969, "learning_rate": 0.0005205741512937955, "loss": 3.1681, "step": 14512 }, { "epoch": 0.71, "grad_norm": 0.5692151784896851, "learning_rate": 0.0005205637154795748, "loss": 3.1412, "step": 14513 }, { "epoch": 0.71, "grad_norm": 0.47583895921707153, "learning_rate": 0.0005205532790844317, "loss": 3.3911, "step": 14514 }, { "epoch": 0.71, "grad_norm": 0.5234794616699219, "learning_rate": 0.0005205428421083936, "loss": 3.0717, "step": 14515 }, { "epoch": 0.71, "grad_norm": 0.5292587876319885, "learning_rate": 0.0005205324045514882, "loss": 3.2203, "step": 14516 }, { "epoch": 0.71, "grad_norm": 0.4843212068080902, "learning_rate": 0.0005205219664137428, "loss": 3.317, "step": 14517 }, { "epoch": 0.71, "grad_norm": 0.5709378719329834, "learning_rate": 0.000520511527695185, "loss": 3.3233, "step": 14518 }, { "epoch": 0.71, "grad_norm": 0.4811621308326721, "learning_rate": 0.0005205010883958423, "loss": 3.2231, "step": 14519 }, { "epoch": 0.71, "grad_norm": 0.5761101841926575, "learning_rate": 0.0005204906485157423, "loss": 3.431, "step": 14520 }, { "epoch": 0.71, "grad_norm": 0.5028489828109741, "learning_rate": 0.0005204802080549122, "loss": 3.1521, "step": 14521 }, { "epoch": 0.71, "grad_norm": 0.5608251690864563, "learning_rate": 0.0005204697670133798, "loss": 2.9437, "step": 14522 }, { "epoch": 0.71, "grad_norm": 0.5372280478477478, "learning_rate": 0.0005204593253911724, "loss": 3.249, "step": 14523 }, { "epoch": 0.71, "grad_norm": 0.5417999029159546, "learning_rate": 0.0005204488831883174, "loss": 3.2695, "step": 14524 }, { "epoch": 0.71, "grad_norm": 0.5193753242492676, "learning_rate": 0.0005204384404048426, "loss": 3.1652, "step": 14525 }, { "epoch": 0.71, "grad_norm": 0.528472900390625, "learning_rate": 0.0005204279970407754, "loss": 3.1903, "step": 14526 }, { "epoch": 0.71, "grad_norm": 0.5966113805770874, "learning_rate": 0.0005204175530961432, "loss": 3.2464, "step": 14527 }, { "epoch": 0.71, "grad_norm": 0.5065594911575317, "learning_rate": 0.0005204071085709737, "loss": 3.4082, "step": 14528 }, { "epoch": 0.71, "grad_norm": 0.5471876859664917, "learning_rate": 0.0005203966634652942, "loss": 3.0131, "step": 14529 }, { "epoch": 0.71, "grad_norm": 0.5287893414497375, "learning_rate": 0.0005203862177791324, "loss": 3.1887, "step": 14530 }, { "epoch": 0.71, "grad_norm": 0.4954758882522583, "learning_rate": 0.0005203757715125157, "loss": 3.2469, "step": 14531 }, { "epoch": 0.71, "grad_norm": 0.4946754276752472, "learning_rate": 0.0005203653246654715, "loss": 3.3233, "step": 14532 }, { "epoch": 0.71, "grad_norm": 0.5238100290298462, "learning_rate": 0.0005203548772380276, "loss": 3.1324, "step": 14533 }, { "epoch": 0.71, "grad_norm": 0.5408486127853394, "learning_rate": 0.0005203444292302112, "loss": 3.1249, "step": 14534 }, { "epoch": 0.71, "grad_norm": 0.4843558073043823, "learning_rate": 0.0005203339806420501, "loss": 3.1595, "step": 14535 }, { "epoch": 0.71, "grad_norm": 0.5062605142593384, "learning_rate": 0.0005203235314735717, "loss": 3.1967, "step": 14536 }, { "epoch": 0.71, "grad_norm": 0.5224248766899109, "learning_rate": 0.0005203130817248035, "loss": 3.4137, "step": 14537 }, { "epoch": 0.71, "grad_norm": 0.5459402799606323, "learning_rate": 0.000520302631395773, "loss": 2.9693, "step": 14538 }, { "epoch": 0.71, "grad_norm": 0.5118008852005005, "learning_rate": 0.0005202921804865078, "loss": 3.4552, "step": 14539 }, { "epoch": 0.71, "grad_norm": 0.516243040561676, "learning_rate": 0.0005202817289970353, "loss": 3.3095, "step": 14540 }, { "epoch": 0.71, "grad_norm": 0.4901047945022583, "learning_rate": 0.0005202712769273833, "loss": 3.1625, "step": 14541 }, { "epoch": 0.71, "grad_norm": 0.5035889148712158, "learning_rate": 0.0005202608242775791, "loss": 3.2717, "step": 14542 }, { "epoch": 0.71, "grad_norm": 0.5227107405662537, "learning_rate": 0.0005202503710476502, "loss": 3.3589, "step": 14543 }, { "epoch": 0.71, "grad_norm": 0.5514225959777832, "learning_rate": 0.0005202399172376242, "loss": 3.1658, "step": 14544 }, { "epoch": 0.71, "grad_norm": 0.5554363131523132, "learning_rate": 0.0005202294628475287, "loss": 3.0967, "step": 14545 }, { "epoch": 0.71, "grad_norm": 0.47724464535713196, "learning_rate": 0.0005202190078773912, "loss": 3.2781, "step": 14546 }, { "epoch": 0.71, "grad_norm": 0.5094786882400513, "learning_rate": 0.0005202085523272393, "loss": 3.3368, "step": 14547 }, { "epoch": 0.71, "grad_norm": 0.4955451190471649, "learning_rate": 0.0005201980961971002, "loss": 3.1882, "step": 14548 }, { "epoch": 0.71, "grad_norm": 0.5031975507736206, "learning_rate": 0.0005201876394870018, "loss": 3.0168, "step": 14549 }, { "epoch": 0.71, "grad_norm": 0.5426763892173767, "learning_rate": 0.0005201771821969716, "loss": 3.14, "step": 14550 }, { "epoch": 0.71, "grad_norm": 0.4851762056350708, "learning_rate": 0.0005201667243270371, "loss": 3.2685, "step": 14551 }, { "epoch": 0.71, "grad_norm": 0.5311375856399536, "learning_rate": 0.0005201562658772258, "loss": 3.4027, "step": 14552 }, { "epoch": 0.71, "grad_norm": 0.5157365798950195, "learning_rate": 0.0005201458068475652, "loss": 3.1562, "step": 14553 }, { "epoch": 0.71, "grad_norm": 0.5381741523742676, "learning_rate": 0.0005201353472380829, "loss": 3.1626, "step": 14554 }, { "epoch": 0.71, "grad_norm": 0.532410204410553, "learning_rate": 0.0005201248870488065, "loss": 3.3552, "step": 14555 }, { "epoch": 0.71, "grad_norm": 0.5098461508750916, "learning_rate": 0.0005201144262797636, "loss": 3.1537, "step": 14556 }, { "epoch": 0.71, "grad_norm": 0.5130876898765564, "learning_rate": 0.0005201039649309815, "loss": 3.2291, "step": 14557 }, { "epoch": 0.71, "grad_norm": 0.512928307056427, "learning_rate": 0.0005200935030024881, "loss": 3.0487, "step": 14558 }, { "epoch": 0.71, "grad_norm": 0.5555957555770874, "learning_rate": 0.0005200830404943106, "loss": 3.1302, "step": 14559 }, { "epoch": 0.71, "grad_norm": 0.49892672896385193, "learning_rate": 0.0005200725774064768, "loss": 3.2873, "step": 14560 }, { "epoch": 0.71, "grad_norm": 0.5405455827713013, "learning_rate": 0.0005200621137390141, "loss": 3.1677, "step": 14561 }, { "epoch": 0.71, "grad_norm": 0.48926350474357605, "learning_rate": 0.0005200516494919502, "loss": 3.2558, "step": 14562 }, { "epoch": 0.71, "grad_norm": 0.5370481014251709, "learning_rate": 0.0005200411846653127, "loss": 3.2475, "step": 14563 }, { "epoch": 0.71, "grad_norm": 0.5003460049629211, "learning_rate": 0.000520030719259129, "loss": 3.1576, "step": 14564 }, { "epoch": 0.71, "grad_norm": 0.48471590876579285, "learning_rate": 0.0005200202532734267, "loss": 3.1899, "step": 14565 }, { "epoch": 0.71, "grad_norm": 0.5002859234809875, "learning_rate": 0.0005200097867082335, "loss": 3.2503, "step": 14566 }, { "epoch": 0.71, "grad_norm": 0.5285899639129639, "learning_rate": 0.0005199993195635768, "loss": 3.1374, "step": 14567 }, { "epoch": 0.71, "grad_norm": 0.5553933382034302, "learning_rate": 0.0005199888518394841, "loss": 3.1874, "step": 14568 }, { "epoch": 0.71, "grad_norm": 0.7586709856987, "learning_rate": 0.0005199783835359833, "loss": 3.2705, "step": 14569 }, { "epoch": 0.71, "grad_norm": 0.5392246842384338, "learning_rate": 0.0005199679146531017, "loss": 3.0995, "step": 14570 }, { "epoch": 0.71, "grad_norm": 0.5427459478378296, "learning_rate": 0.000519957445190867, "loss": 3.0778, "step": 14571 }, { "epoch": 0.71, "grad_norm": 0.5674456357955933, "learning_rate": 0.0005199469751493065, "loss": 3.1027, "step": 14572 }, { "epoch": 0.71, "grad_norm": 0.5392853021621704, "learning_rate": 0.0005199365045284482, "loss": 3.2362, "step": 14573 }, { "epoch": 0.71, "grad_norm": 0.5736402869224548, "learning_rate": 0.0005199260333283195, "loss": 3.0357, "step": 14574 }, { "epoch": 0.71, "grad_norm": 0.5310801267623901, "learning_rate": 0.0005199155615489478, "loss": 3.3081, "step": 14575 }, { "epoch": 0.71, "grad_norm": 0.5260907411575317, "learning_rate": 0.000519905089190361, "loss": 3.1554, "step": 14576 }, { "epoch": 0.71, "grad_norm": 0.4972694218158722, "learning_rate": 0.0005198946162525864, "loss": 3.1766, "step": 14577 }, { "epoch": 0.71, "grad_norm": 0.472507506608963, "learning_rate": 0.0005198841427356517, "loss": 3.1027, "step": 14578 }, { "epoch": 0.71, "grad_norm": 0.6582967042922974, "learning_rate": 0.0005198736686395846, "loss": 3.3213, "step": 14579 }, { "epoch": 0.71, "grad_norm": 0.5455946326255798, "learning_rate": 0.0005198631939644124, "loss": 3.1553, "step": 14580 }, { "epoch": 0.71, "grad_norm": 0.5628792643547058, "learning_rate": 0.000519852718710163, "loss": 3.0593, "step": 14581 }, { "epoch": 0.71, "grad_norm": 0.5313958525657654, "learning_rate": 0.0005198422428768639, "loss": 3.2098, "step": 14582 }, { "epoch": 0.71, "grad_norm": 0.5243238806724548, "learning_rate": 0.0005198317664645424, "loss": 3.2207, "step": 14583 }, { "epoch": 0.71, "grad_norm": 0.5564098358154297, "learning_rate": 0.0005198212894732266, "loss": 3.0649, "step": 14584 }, { "epoch": 0.71, "grad_norm": 0.5318445563316345, "learning_rate": 0.0005198108119029437, "loss": 3.2918, "step": 14585 }, { "epoch": 0.71, "grad_norm": 0.5206500291824341, "learning_rate": 0.0005198003337537214, "loss": 3.3708, "step": 14586 }, { "epoch": 0.71, "grad_norm": 0.5864635109901428, "learning_rate": 0.0005197898550255874, "loss": 3.2293, "step": 14587 }, { "epoch": 0.71, "grad_norm": 0.497435986995697, "learning_rate": 0.0005197793757185692, "loss": 3.2024, "step": 14588 }, { "epoch": 0.71, "grad_norm": 0.5075864195823669, "learning_rate": 0.0005197688958326944, "loss": 3.2853, "step": 14589 }, { "epoch": 0.72, "grad_norm": 0.5199374556541443, "learning_rate": 0.0005197584153679906, "loss": 3.2211, "step": 14590 }, { "epoch": 0.72, "grad_norm": 0.5085850358009338, "learning_rate": 0.0005197479343244854, "loss": 3.43, "step": 14591 }, { "epoch": 0.72, "grad_norm": 0.5507273077964783, "learning_rate": 0.0005197374527022065, "loss": 3.3325, "step": 14592 }, { "epoch": 0.72, "grad_norm": 0.5263931155204773, "learning_rate": 0.0005197269705011815, "loss": 3.1009, "step": 14593 }, { "epoch": 0.72, "grad_norm": 0.5148525834083557, "learning_rate": 0.0005197164877214379, "loss": 3.0378, "step": 14594 }, { "epoch": 0.72, "grad_norm": 0.5128089189529419, "learning_rate": 0.0005197060043630032, "loss": 3.2473, "step": 14595 }, { "epoch": 0.72, "grad_norm": 0.4934292733669281, "learning_rate": 0.0005196955204259053, "loss": 3.1324, "step": 14596 }, { "epoch": 0.72, "grad_norm": 0.5595125555992126, "learning_rate": 0.0005196850359101716, "loss": 3.201, "step": 14597 }, { "epoch": 0.72, "grad_norm": 0.5288695693016052, "learning_rate": 0.0005196745508158299, "loss": 3.0758, "step": 14598 }, { "epoch": 0.72, "grad_norm": 0.5381522178649902, "learning_rate": 0.0005196640651429078, "loss": 3.0904, "step": 14599 }, { "epoch": 0.72, "grad_norm": 0.502053439617157, "learning_rate": 0.0005196535788914326, "loss": 3.2181, "step": 14600 }, { "epoch": 0.72, "grad_norm": 0.49978283047676086, "learning_rate": 0.0005196430920614323, "loss": 3.274, "step": 14601 }, { "epoch": 0.72, "grad_norm": 0.5054643750190735, "learning_rate": 0.0005196326046529344, "loss": 3.2331, "step": 14602 }, { "epoch": 0.72, "grad_norm": 0.5227307677268982, "learning_rate": 0.0005196221166659663, "loss": 3.3232, "step": 14603 }, { "epoch": 0.72, "grad_norm": 0.5008553862571716, "learning_rate": 0.000519611628100556, "loss": 3.2415, "step": 14604 }, { "epoch": 0.72, "grad_norm": 0.5128350853919983, "learning_rate": 0.0005196011389567308, "loss": 3.219, "step": 14605 }, { "epoch": 0.72, "grad_norm": 0.5377113223075867, "learning_rate": 0.0005195906492345186, "loss": 3.0948, "step": 14606 }, { "epoch": 0.72, "grad_norm": 0.5005555152893066, "learning_rate": 0.0005195801589339468, "loss": 3.3787, "step": 14607 }, { "epoch": 0.72, "grad_norm": 0.5084679126739502, "learning_rate": 0.0005195696680550431, "loss": 3.2606, "step": 14608 }, { "epoch": 0.72, "grad_norm": 0.4670720100402832, "learning_rate": 0.0005195591765978352, "loss": 3.2102, "step": 14609 }, { "epoch": 0.72, "grad_norm": 0.5008261799812317, "learning_rate": 0.0005195486845623507, "loss": 3.3554, "step": 14610 }, { "epoch": 0.72, "grad_norm": 0.5201194286346436, "learning_rate": 0.0005195381919486171, "loss": 3.1835, "step": 14611 }, { "epoch": 0.72, "grad_norm": 0.5437430143356323, "learning_rate": 0.0005195276987566623, "loss": 3.1677, "step": 14612 }, { "epoch": 0.72, "grad_norm": 0.5281556844711304, "learning_rate": 0.0005195172049865138, "loss": 3.3595, "step": 14613 }, { "epoch": 0.72, "grad_norm": 0.5061221122741699, "learning_rate": 0.0005195067106381992, "loss": 3.2528, "step": 14614 }, { "epoch": 0.72, "grad_norm": 0.5027444362640381, "learning_rate": 0.000519496215711746, "loss": 3.115, "step": 14615 }, { "epoch": 0.72, "grad_norm": 0.5213714241981506, "learning_rate": 0.0005194857202071822, "loss": 3.3346, "step": 14616 }, { "epoch": 0.72, "grad_norm": 0.5137941837310791, "learning_rate": 0.0005194752241245352, "loss": 3.2455, "step": 14617 }, { "epoch": 0.72, "grad_norm": 0.520012617111206, "learning_rate": 0.0005194647274638327, "loss": 3.3542, "step": 14618 }, { "epoch": 0.72, "grad_norm": 0.5151776075363159, "learning_rate": 0.0005194542302251024, "loss": 3.2251, "step": 14619 }, { "epoch": 0.72, "grad_norm": 0.5593071579933167, "learning_rate": 0.0005194437324083718, "loss": 2.9776, "step": 14620 }, { "epoch": 0.72, "grad_norm": 0.4902730882167816, "learning_rate": 0.0005194332340136686, "loss": 3.2225, "step": 14621 }, { "epoch": 0.72, "grad_norm": 0.5338214039802551, "learning_rate": 0.0005194227350410205, "loss": 3.3136, "step": 14622 }, { "epoch": 0.72, "grad_norm": 0.5151000022888184, "learning_rate": 0.0005194122354904553, "loss": 3.224, "step": 14623 }, { "epoch": 0.72, "grad_norm": 0.5185823440551758, "learning_rate": 0.0005194017353620004, "loss": 3.149, "step": 14624 }, { "epoch": 0.72, "grad_norm": 0.5511698126792908, "learning_rate": 0.0005193912346556836, "loss": 3.1918, "step": 14625 }, { "epoch": 0.72, "grad_norm": 0.5243788361549377, "learning_rate": 0.0005193807333715324, "loss": 3.1177, "step": 14626 }, { "epoch": 0.72, "grad_norm": 0.5167205333709717, "learning_rate": 0.0005193702315095746, "loss": 2.923, "step": 14627 }, { "epoch": 0.72, "grad_norm": 0.6894581317901611, "learning_rate": 0.0005193597290698379, "loss": 3.3533, "step": 14628 }, { "epoch": 0.72, "grad_norm": 0.5190028548240662, "learning_rate": 0.0005193492260523499, "loss": 3.2836, "step": 14629 }, { "epoch": 0.72, "grad_norm": 0.508630633354187, "learning_rate": 0.0005193387224571381, "loss": 3.4301, "step": 14630 }, { "epoch": 0.72, "grad_norm": 0.5017585158348083, "learning_rate": 0.0005193282182842305, "loss": 3.2973, "step": 14631 }, { "epoch": 0.72, "grad_norm": 0.5430968403816223, "learning_rate": 0.0005193177135336544, "loss": 3.1365, "step": 14632 }, { "epoch": 0.72, "grad_norm": 0.5301803350448608, "learning_rate": 0.0005193072082054379, "loss": 3.2685, "step": 14633 }, { "epoch": 0.72, "grad_norm": 0.5263997912406921, "learning_rate": 0.0005192967022996083, "loss": 3.4263, "step": 14634 }, { "epoch": 0.72, "grad_norm": 0.5006689429283142, "learning_rate": 0.0005192861958161933, "loss": 3.3041, "step": 14635 }, { "epoch": 0.72, "grad_norm": 0.5174351334571838, "learning_rate": 0.0005192756887552207, "loss": 3.1699, "step": 14636 }, { "epoch": 0.72, "grad_norm": 0.6178990602493286, "learning_rate": 0.0005192651811167183, "loss": 3.1885, "step": 14637 }, { "epoch": 0.72, "grad_norm": 0.5171335339546204, "learning_rate": 0.0005192546729007135, "loss": 3.1908, "step": 14638 }, { "epoch": 0.72, "grad_norm": 0.5851891040802002, "learning_rate": 0.000519244164107234, "loss": 3.2078, "step": 14639 }, { "epoch": 0.72, "grad_norm": 0.49289432168006897, "learning_rate": 0.0005192336547363076, "loss": 3.2048, "step": 14640 }, { "epoch": 0.72, "grad_norm": 0.4947859048843384, "learning_rate": 0.0005192231447879621, "loss": 3.1892, "step": 14641 }, { "epoch": 0.72, "grad_norm": 0.5246251821517944, "learning_rate": 0.0005192126342622249, "loss": 2.9959, "step": 14642 }, { "epoch": 0.72, "grad_norm": 0.5482833385467529, "learning_rate": 0.0005192021231591238, "loss": 3.3374, "step": 14643 }, { "epoch": 0.72, "grad_norm": 0.5282585620880127, "learning_rate": 0.0005191916114786865, "loss": 3.2919, "step": 14644 }, { "epoch": 0.72, "grad_norm": 0.5180543661117554, "learning_rate": 0.0005191810992209407, "loss": 3.2924, "step": 14645 }, { "epoch": 0.72, "grad_norm": 0.5106507539749146, "learning_rate": 0.0005191705863859141, "loss": 3.0478, "step": 14646 }, { "epoch": 0.72, "grad_norm": 0.501225471496582, "learning_rate": 0.0005191600729736343, "loss": 3.3096, "step": 14647 }, { "epoch": 0.72, "grad_norm": 0.5608596205711365, "learning_rate": 0.000519149558984129, "loss": 3.1157, "step": 14648 }, { "epoch": 0.72, "grad_norm": 0.521439790725708, "learning_rate": 0.0005191390444174261, "loss": 3.2124, "step": 14649 }, { "epoch": 0.72, "grad_norm": 0.4991474747657776, "learning_rate": 0.0005191285292735529, "loss": 3.2962, "step": 14650 }, { "epoch": 0.72, "grad_norm": 0.5133585333824158, "learning_rate": 0.0005191180135525375, "loss": 3.3118, "step": 14651 }, { "epoch": 0.72, "grad_norm": 0.4970352053642273, "learning_rate": 0.0005191074972544073, "loss": 3.3128, "step": 14652 }, { "epoch": 0.72, "grad_norm": 0.5411221385002136, "learning_rate": 0.0005190969803791903, "loss": 3.2144, "step": 14653 }, { "epoch": 0.72, "grad_norm": 0.5432132482528687, "learning_rate": 0.0005190864629269139, "loss": 3.1416, "step": 14654 }, { "epoch": 0.72, "grad_norm": 0.599610447883606, "learning_rate": 0.0005190759448976059, "loss": 2.9844, "step": 14655 }, { "epoch": 0.72, "grad_norm": 0.49764925241470337, "learning_rate": 0.0005190654262912941, "loss": 3.1174, "step": 14656 }, { "epoch": 0.72, "grad_norm": 0.49829450249671936, "learning_rate": 0.000519054907108006, "loss": 3.2279, "step": 14657 }, { "epoch": 0.72, "grad_norm": 0.5487377643585205, "learning_rate": 0.0005190443873477696, "loss": 3.0803, "step": 14658 }, { "epoch": 0.72, "grad_norm": 0.5312570333480835, "learning_rate": 0.0005190338670106124, "loss": 3.2438, "step": 14659 }, { "epoch": 0.72, "grad_norm": 0.5211238265037537, "learning_rate": 0.0005190233460965621, "loss": 3.2646, "step": 14660 }, { "epoch": 0.72, "grad_norm": 0.5218424797058105, "learning_rate": 0.0005190128246056465, "loss": 3.0871, "step": 14661 }, { "epoch": 0.72, "grad_norm": 0.5989399552345276, "learning_rate": 0.0005190023025378932, "loss": 3.2677, "step": 14662 }, { "epoch": 0.72, "grad_norm": 0.5372099876403809, "learning_rate": 0.00051899177989333, "loss": 3.164, "step": 14663 }, { "epoch": 0.72, "grad_norm": 0.4821583926677704, "learning_rate": 0.0005189812566719847, "loss": 3.1532, "step": 14664 }, { "epoch": 0.72, "grad_norm": 0.5003272891044617, "learning_rate": 0.0005189707328738848, "loss": 3.3264, "step": 14665 }, { "epoch": 0.72, "grad_norm": 0.521091103553772, "learning_rate": 0.0005189602084990581, "loss": 3.1722, "step": 14666 }, { "epoch": 0.72, "grad_norm": 0.5140360593795776, "learning_rate": 0.0005189496835475325, "loss": 3.2693, "step": 14667 }, { "epoch": 0.72, "grad_norm": 0.5883575081825256, "learning_rate": 0.0005189391580193354, "loss": 3.0789, "step": 14668 }, { "epoch": 0.72, "grad_norm": 0.4931708574295044, "learning_rate": 0.0005189286319144949, "loss": 3.1453, "step": 14669 }, { "epoch": 0.72, "grad_norm": 0.5307677984237671, "learning_rate": 0.0005189181052330384, "loss": 3.3826, "step": 14670 }, { "epoch": 0.72, "grad_norm": 0.4849635362625122, "learning_rate": 0.0005189075779749937, "loss": 3.0497, "step": 14671 }, { "epoch": 0.72, "grad_norm": 0.49971646070480347, "learning_rate": 0.0005188970501403886, "loss": 3.0944, "step": 14672 }, { "epoch": 0.72, "grad_norm": 0.521876871585846, "learning_rate": 0.0005188865217292508, "loss": 3.1896, "step": 14673 }, { "epoch": 0.72, "grad_norm": 0.4860338568687439, "learning_rate": 0.000518875992741608, "loss": 3.2229, "step": 14674 }, { "epoch": 0.72, "grad_norm": 0.5542223453521729, "learning_rate": 0.0005188654631774881, "loss": 3.0625, "step": 14675 }, { "epoch": 0.72, "grad_norm": 0.5281686186790466, "learning_rate": 0.0005188549330369186, "loss": 3.1883, "step": 14676 }, { "epoch": 0.72, "grad_norm": 0.49884727597236633, "learning_rate": 0.0005188444023199272, "loss": 3.141, "step": 14677 }, { "epoch": 0.72, "grad_norm": 0.5445032715797424, "learning_rate": 0.0005188338710265419, "loss": 3.2494, "step": 14678 }, { "epoch": 0.72, "grad_norm": 0.5325313806533813, "learning_rate": 0.0005188233391567903, "loss": 3.1469, "step": 14679 }, { "epoch": 0.72, "grad_norm": 0.5177390575408936, "learning_rate": 0.0005188128067107002, "loss": 2.9586, "step": 14680 }, { "epoch": 0.72, "grad_norm": 0.5098357200622559, "learning_rate": 0.0005188022736882991, "loss": 3.1629, "step": 14681 }, { "epoch": 0.72, "grad_norm": 0.4955054521560669, "learning_rate": 0.0005187917400896149, "loss": 3.3163, "step": 14682 }, { "epoch": 0.72, "grad_norm": 0.5325889587402344, "learning_rate": 0.0005187812059146756, "loss": 3.1758, "step": 14683 }, { "epoch": 0.72, "grad_norm": 0.5700262188911438, "learning_rate": 0.0005187706711635086, "loss": 3.1304, "step": 14684 }, { "epoch": 0.72, "grad_norm": 0.517220139503479, "learning_rate": 0.0005187601358361417, "loss": 3.0101, "step": 14685 }, { "epoch": 0.72, "grad_norm": 0.5129212737083435, "learning_rate": 0.0005187495999326027, "loss": 2.9816, "step": 14686 }, { "epoch": 0.72, "grad_norm": 0.5115025043487549, "learning_rate": 0.0005187390634529194, "loss": 3.1781, "step": 14687 }, { "epoch": 0.72, "grad_norm": 0.5292808413505554, "learning_rate": 0.0005187285263971196, "loss": 3.129, "step": 14688 }, { "epoch": 0.72, "grad_norm": 0.5144400596618652, "learning_rate": 0.0005187179887652307, "loss": 3.1161, "step": 14689 }, { "epoch": 0.72, "grad_norm": 0.525768518447876, "learning_rate": 0.000518707450557281, "loss": 3.2922, "step": 14690 }, { "epoch": 0.72, "grad_norm": 0.5286670327186584, "learning_rate": 0.0005186969117732977, "loss": 3.4953, "step": 14691 }, { "epoch": 0.72, "grad_norm": 0.5114057660102844, "learning_rate": 0.000518686372413309, "loss": 3.3082, "step": 14692 }, { "epoch": 0.72, "grad_norm": 0.513731062412262, "learning_rate": 0.0005186758324773425, "loss": 3.048, "step": 14693 }, { "epoch": 0.72, "grad_norm": 0.5025843977928162, "learning_rate": 0.0005186652919654259, "loss": 3.2755, "step": 14694 }, { "epoch": 0.72, "grad_norm": 0.615983247756958, "learning_rate": 0.0005186547508775869, "loss": 3.111, "step": 14695 }, { "epoch": 0.72, "grad_norm": 0.5085950493812561, "learning_rate": 0.0005186442092138535, "loss": 3.2742, "step": 14696 }, { "epoch": 0.72, "grad_norm": 0.5130026936531067, "learning_rate": 0.0005186336669742533, "loss": 3.1467, "step": 14697 }, { "epoch": 0.72, "grad_norm": 0.5810296535491943, "learning_rate": 0.000518623124158814, "loss": 3.2415, "step": 14698 }, { "epoch": 0.72, "grad_norm": 0.5049729943275452, "learning_rate": 0.0005186125807675636, "loss": 3.09, "step": 14699 }, { "epoch": 0.72, "grad_norm": 0.5015254616737366, "learning_rate": 0.0005186020368005297, "loss": 3.0512, "step": 14700 }, { "epoch": 0.72, "grad_norm": 0.5339839458465576, "learning_rate": 0.00051859149225774, "loss": 3.2614, "step": 14701 }, { "epoch": 0.72, "grad_norm": 0.5117117762565613, "learning_rate": 0.0005185809471392226, "loss": 3.4624, "step": 14702 }, { "epoch": 0.72, "grad_norm": 0.5019668936729431, "learning_rate": 0.0005185704014450048, "loss": 3.3371, "step": 14703 }, { "epoch": 0.72, "grad_norm": 0.5273573994636536, "learning_rate": 0.0005185598551751148, "loss": 3.3187, "step": 14704 }, { "epoch": 0.72, "grad_norm": 0.5079138278961182, "learning_rate": 0.0005185493083295802, "loss": 3.2523, "step": 14705 }, { "epoch": 0.72, "grad_norm": 0.5488572120666504, "learning_rate": 0.0005185387609084286, "loss": 3.1907, "step": 14706 }, { "epoch": 0.72, "grad_norm": 0.47864294052124023, "learning_rate": 0.0005185282129116882, "loss": 2.9153, "step": 14707 }, { "epoch": 0.72, "grad_norm": 0.516621470451355, "learning_rate": 0.0005185176643393864, "loss": 3.2739, "step": 14708 }, { "epoch": 0.72, "grad_norm": 0.5150627493858337, "learning_rate": 0.0005185071151915512, "loss": 3.2769, "step": 14709 }, { "epoch": 0.72, "grad_norm": 0.5592693090438843, "learning_rate": 0.0005184965654682103, "loss": 3.2601, "step": 14710 }, { "epoch": 0.72, "grad_norm": 0.5248083472251892, "learning_rate": 0.0005184860151693914, "loss": 3.0784, "step": 14711 }, { "epoch": 0.72, "grad_norm": 0.5441768765449524, "learning_rate": 0.0005184754642951224, "loss": 3.0752, "step": 14712 }, { "epoch": 0.72, "grad_norm": 0.4891248047351837, "learning_rate": 0.0005184649128454312, "loss": 3.2164, "step": 14713 }, { "epoch": 0.72, "grad_norm": 0.5104601383209229, "learning_rate": 0.0005184543608203454, "loss": 2.9591, "step": 14714 }, { "epoch": 0.72, "grad_norm": 0.5093756318092346, "learning_rate": 0.0005184438082198929, "loss": 3.1289, "step": 14715 }, { "epoch": 0.72, "grad_norm": 0.5416193604469299, "learning_rate": 0.0005184332550441013, "loss": 3.1514, "step": 14716 }, { "epoch": 0.72, "grad_norm": 0.5121513605117798, "learning_rate": 0.0005184227012929987, "loss": 3.3509, "step": 14717 }, { "epoch": 0.72, "grad_norm": 0.5216866135597229, "learning_rate": 0.0005184121469666127, "loss": 3.1625, "step": 14718 }, { "epoch": 0.72, "grad_norm": 0.5179713368415833, "learning_rate": 0.0005184015920649711, "loss": 3.0819, "step": 14719 }, { "epoch": 0.72, "grad_norm": 0.4934249222278595, "learning_rate": 0.0005183910365881018, "loss": 3.0533, "step": 14720 }, { "epoch": 0.72, "grad_norm": 0.5014771819114685, "learning_rate": 0.0005183804805360324, "loss": 3.4464, "step": 14721 }, { "epoch": 0.72, "grad_norm": 0.4989439845085144, "learning_rate": 0.000518369923908791, "loss": 3.0768, "step": 14722 }, { "epoch": 0.72, "grad_norm": 0.5312348008155823, "learning_rate": 0.0005183593667064052, "loss": 3.1842, "step": 14723 }, { "epoch": 0.72, "grad_norm": 0.5148173570632935, "learning_rate": 0.0005183488089289029, "loss": 3.1817, "step": 14724 }, { "epoch": 0.72, "grad_norm": 0.5735609531402588, "learning_rate": 0.0005183382505763117, "loss": 3.1671, "step": 14725 }, { "epoch": 0.72, "grad_norm": 0.4953717887401581, "learning_rate": 0.0005183276916486597, "loss": 3.0436, "step": 14726 }, { "epoch": 0.72, "grad_norm": 0.5508190393447876, "learning_rate": 0.0005183171321459745, "loss": 3.1293, "step": 14727 }, { "epoch": 0.72, "grad_norm": 0.4912906289100647, "learning_rate": 0.0005183065720682842, "loss": 3.2873, "step": 14728 }, { "epoch": 0.72, "grad_norm": 0.5511575937271118, "learning_rate": 0.0005182960114156162, "loss": 3.1012, "step": 14729 }, { "epoch": 0.72, "grad_norm": 0.4874035716056824, "learning_rate": 0.0005182854501879986, "loss": 3.3522, "step": 14730 }, { "epoch": 0.72, "grad_norm": 0.5335097312927246, "learning_rate": 0.000518274888385459, "loss": 3.0398, "step": 14731 }, { "epoch": 0.72, "grad_norm": 0.5298565626144409, "learning_rate": 0.0005182643260080254, "loss": 2.8668, "step": 14732 }, { "epoch": 0.72, "grad_norm": 0.5011436939239502, "learning_rate": 0.0005182537630557257, "loss": 3.2198, "step": 14733 }, { "epoch": 0.72, "grad_norm": 0.5561090111732483, "learning_rate": 0.0005182431995285875, "loss": 2.8162, "step": 14734 }, { "epoch": 0.72, "grad_norm": 0.5102478265762329, "learning_rate": 0.0005182326354266387, "loss": 3.1414, "step": 14735 }, { "epoch": 0.72, "grad_norm": 0.472723126411438, "learning_rate": 0.0005182220707499072, "loss": 2.9028, "step": 14736 }, { "epoch": 0.72, "grad_norm": 0.5202345252037048, "learning_rate": 0.0005182115054984207, "loss": 3.2935, "step": 14737 }, { "epoch": 0.72, "grad_norm": 0.5131343007087708, "learning_rate": 0.0005182009396722071, "loss": 3.2739, "step": 14738 }, { "epoch": 0.72, "grad_norm": 0.5101212859153748, "learning_rate": 0.0005181903732712943, "loss": 3.2873, "step": 14739 }, { "epoch": 0.72, "grad_norm": 0.5080827474594116, "learning_rate": 0.0005181798062957099, "loss": 3.0049, "step": 14740 }, { "epoch": 0.72, "grad_norm": 0.526172935962677, "learning_rate": 0.000518169238745482, "loss": 3.2314, "step": 14741 }, { "epoch": 0.72, "grad_norm": 0.5397858619689941, "learning_rate": 0.0005181586706206382, "loss": 3.241, "step": 14742 }, { "epoch": 0.72, "grad_norm": 0.5016712546348572, "learning_rate": 0.0005181481019212066, "loss": 3.3635, "step": 14743 }, { "epoch": 0.72, "grad_norm": 0.5157707929611206, "learning_rate": 0.0005181375326472147, "loss": 3.2207, "step": 14744 }, { "epoch": 0.72, "grad_norm": 0.5005051493644714, "learning_rate": 0.0005181269627986908, "loss": 3.1462, "step": 14745 }, { "epoch": 0.72, "grad_norm": 0.47533586621284485, "learning_rate": 0.0005181163923756622, "loss": 3.1661, "step": 14746 }, { "epoch": 0.72, "grad_norm": 0.537551760673523, "learning_rate": 0.000518105821378157, "loss": 3.185, "step": 14747 }, { "epoch": 0.72, "grad_norm": 0.5036092400550842, "learning_rate": 0.0005180952498062032, "loss": 3.1514, "step": 14748 }, { "epoch": 0.72, "grad_norm": 0.5005072355270386, "learning_rate": 0.0005180846776598285, "loss": 3.1253, "step": 14749 }, { "epoch": 0.72, "grad_norm": 0.5564939379692078, "learning_rate": 0.0005180741049390605, "loss": 3.2356, "step": 14750 }, { "epoch": 0.72, "grad_norm": 0.584366500377655, "learning_rate": 0.0005180635316439275, "loss": 3.0674, "step": 14751 }, { "epoch": 0.72, "grad_norm": 0.5277496576309204, "learning_rate": 0.000518052957774457, "loss": 3.2329, "step": 14752 }, { "epoch": 0.72, "grad_norm": 0.5059505105018616, "learning_rate": 0.000518042383330677, "loss": 3.2356, "step": 14753 }, { "epoch": 0.72, "grad_norm": 0.5543511509895325, "learning_rate": 0.0005180318083126154, "loss": 3.39, "step": 14754 }, { "epoch": 0.72, "grad_norm": 0.537144124507904, "learning_rate": 0.0005180212327202999, "loss": 3.078, "step": 14755 }, { "epoch": 0.72, "grad_norm": 0.5184658765792847, "learning_rate": 0.0005180106565537585, "loss": 3.2036, "step": 14756 }, { "epoch": 0.72, "grad_norm": 0.5322108268737793, "learning_rate": 0.000518000079813019, "loss": 3.2073, "step": 14757 }, { "epoch": 0.72, "grad_norm": 0.511556088924408, "learning_rate": 0.0005179895024981092, "loss": 3.1481, "step": 14758 }, { "epoch": 0.72, "grad_norm": 0.5586161017417908, "learning_rate": 0.0005179789246090568, "loss": 3.3132, "step": 14759 }, { "epoch": 0.72, "grad_norm": 0.5228317379951477, "learning_rate": 0.0005179683461458901, "loss": 3.1418, "step": 14760 }, { "epoch": 0.72, "grad_norm": 0.4944567084312439, "learning_rate": 0.0005179577671086367, "loss": 3.1762, "step": 14761 }, { "epoch": 0.72, "grad_norm": 0.6001697182655334, "learning_rate": 0.0005179471874973245, "loss": 3.1919, "step": 14762 }, { "epoch": 0.72, "grad_norm": 0.5296616554260254, "learning_rate": 0.0005179366073119813, "loss": 3.1888, "step": 14763 }, { "epoch": 0.72, "grad_norm": 0.5390952825546265, "learning_rate": 0.000517926026552635, "loss": 3.0015, "step": 14764 }, { "epoch": 0.72, "grad_norm": 0.5541010499000549, "learning_rate": 0.0005179154452193135, "loss": 3.1304, "step": 14765 }, { "epoch": 0.72, "grad_norm": 0.49909520149230957, "learning_rate": 0.0005179048633120447, "loss": 3.1821, "step": 14766 }, { "epoch": 0.72, "grad_norm": 0.5527284145355225, "learning_rate": 0.0005178942808308564, "loss": 3.2751, "step": 14767 }, { "epoch": 0.72, "grad_norm": 0.5055661201477051, "learning_rate": 0.0005178836977757765, "loss": 3.2823, "step": 14768 }, { "epoch": 0.72, "grad_norm": 0.5182890892028809, "learning_rate": 0.0005178731141468329, "loss": 3.1544, "step": 14769 }, { "epoch": 0.72, "grad_norm": 0.4902885854244232, "learning_rate": 0.0005178625299440534, "loss": 3.0724, "step": 14770 }, { "epoch": 0.72, "grad_norm": 0.5040931701660156, "learning_rate": 0.0005178519451674659, "loss": 3.378, "step": 14771 }, { "epoch": 0.72, "grad_norm": 0.5123295187950134, "learning_rate": 0.0005178413598170984, "loss": 3.3552, "step": 14772 }, { "epoch": 0.72, "grad_norm": 0.533523440361023, "learning_rate": 0.0005178307738929786, "loss": 3.1523, "step": 14773 }, { "epoch": 0.72, "grad_norm": 0.5177558064460754, "learning_rate": 0.0005178201873951346, "loss": 3.139, "step": 14774 }, { "epoch": 0.72, "grad_norm": 0.5482215285301208, "learning_rate": 0.000517809600323594, "loss": 2.9881, "step": 14775 }, { "epoch": 0.72, "grad_norm": 0.5305909514427185, "learning_rate": 0.0005177990126783849, "loss": 3.1347, "step": 14776 }, { "epoch": 0.72, "grad_norm": 0.5373917818069458, "learning_rate": 0.0005177884244595352, "loss": 3.3311, "step": 14777 }, { "epoch": 0.72, "grad_norm": 0.5221949815750122, "learning_rate": 0.0005177778356670725, "loss": 3.154, "step": 14778 }, { "epoch": 0.72, "grad_norm": 0.5249523520469666, "learning_rate": 0.000517767246301025, "loss": 3.026, "step": 14779 }, { "epoch": 0.72, "grad_norm": 0.5519317388534546, "learning_rate": 0.0005177566563614205, "loss": 3.0845, "step": 14780 }, { "epoch": 0.72, "grad_norm": 0.48520752787590027, "learning_rate": 0.0005177460658482868, "loss": 3.1916, "step": 14781 }, { "epoch": 0.72, "grad_norm": 0.5459604263305664, "learning_rate": 0.0005177354747616519, "loss": 3.2222, "step": 14782 }, { "epoch": 0.72, "grad_norm": 0.4783353805541992, "learning_rate": 0.0005177248831015437, "loss": 3.1407, "step": 14783 }, { "epoch": 0.72, "grad_norm": 0.5260900259017944, "learning_rate": 0.00051771429086799, "loss": 3.2664, "step": 14784 }, { "epoch": 0.72, "grad_norm": 0.5184537768363953, "learning_rate": 0.0005177036980610188, "loss": 3.3091, "step": 14785 }, { "epoch": 0.72, "grad_norm": 0.523328959941864, "learning_rate": 0.000517693104680658, "loss": 3.2127, "step": 14786 }, { "epoch": 0.72, "grad_norm": 0.4887447953224182, "learning_rate": 0.0005176825107269353, "loss": 3.177, "step": 14787 }, { "epoch": 0.72, "grad_norm": 0.5406696200370789, "learning_rate": 0.000517671916199879, "loss": 3.3842, "step": 14788 }, { "epoch": 0.72, "grad_norm": 0.5218521952629089, "learning_rate": 0.0005176613210995166, "loss": 3.0918, "step": 14789 }, { "epoch": 0.72, "grad_norm": 0.5580902695655823, "learning_rate": 0.0005176507254258763, "loss": 3.2155, "step": 14790 }, { "epoch": 0.72, "grad_norm": 0.5313786268234253, "learning_rate": 0.0005176401291789857, "loss": 3.1422, "step": 14791 }, { "epoch": 0.72, "grad_norm": 0.5215513110160828, "learning_rate": 0.000517629532358873, "loss": 3.1058, "step": 14792 }, { "epoch": 0.72, "grad_norm": 0.490644633769989, "learning_rate": 0.000517618934965566, "loss": 3.084, "step": 14793 }, { "epoch": 0.73, "grad_norm": 0.5126657485961914, "learning_rate": 0.0005176083369990925, "loss": 2.951, "step": 14794 }, { "epoch": 0.73, "grad_norm": 0.5562359690666199, "learning_rate": 0.0005175977384594807, "loss": 3.1351, "step": 14795 }, { "epoch": 0.73, "grad_norm": 0.5439273715019226, "learning_rate": 0.0005175871393467581, "loss": 3.0672, "step": 14796 }, { "epoch": 0.73, "grad_norm": 0.5346477031707764, "learning_rate": 0.000517576539660953, "loss": 3.1833, "step": 14797 }, { "epoch": 0.73, "grad_norm": 0.5201503038406372, "learning_rate": 0.0005175659394020932, "loss": 3.2397, "step": 14798 }, { "epoch": 0.73, "grad_norm": 0.5136426091194153, "learning_rate": 0.0005175553385702065, "loss": 3.2395, "step": 14799 }, { "epoch": 0.73, "grad_norm": 0.5076440572738647, "learning_rate": 0.000517544737165321, "loss": 3.1516, "step": 14800 }, { "epoch": 0.73, "grad_norm": 0.531037449836731, "learning_rate": 0.0005175341351874645, "loss": 3.0899, "step": 14801 }, { "epoch": 0.73, "grad_norm": 0.5360001921653748, "learning_rate": 0.0005175235326366649, "loss": 3.3677, "step": 14802 }, { "epoch": 0.73, "grad_norm": 0.5268040299415588, "learning_rate": 0.0005175129295129503, "loss": 3.1516, "step": 14803 }, { "epoch": 0.73, "grad_norm": 0.5175145864486694, "learning_rate": 0.0005175023258163483, "loss": 3.1313, "step": 14804 }, { "epoch": 0.73, "grad_norm": 0.5107941627502441, "learning_rate": 0.000517491721546887, "loss": 3.1514, "step": 14805 }, { "epoch": 0.73, "grad_norm": 0.525874137878418, "learning_rate": 0.0005174811167045946, "loss": 3.1506, "step": 14806 }, { "epoch": 0.73, "grad_norm": 0.49850302934646606, "learning_rate": 0.0005174705112894987, "loss": 3.4543, "step": 14807 }, { "epoch": 0.73, "grad_norm": 0.5240212678909302, "learning_rate": 0.0005174599053016273, "loss": 3.2027, "step": 14808 }, { "epoch": 0.73, "grad_norm": 0.5271751880645752, "learning_rate": 0.0005174492987410084, "loss": 3.2392, "step": 14809 }, { "epoch": 0.73, "grad_norm": 0.4971039295196533, "learning_rate": 0.00051743869160767, "loss": 3.1203, "step": 14810 }, { "epoch": 0.73, "grad_norm": 0.5178433060646057, "learning_rate": 0.0005174280839016398, "loss": 3.1699, "step": 14811 }, { "epoch": 0.73, "grad_norm": 0.5342043042182922, "learning_rate": 0.0005174174756229458, "loss": 3.0835, "step": 14812 }, { "epoch": 0.73, "grad_norm": 0.5262744426727295, "learning_rate": 0.0005174068667716162, "loss": 3.2602, "step": 14813 }, { "epoch": 0.73, "grad_norm": 0.5131629109382629, "learning_rate": 0.0005173962573476786, "loss": 3.2553, "step": 14814 }, { "epoch": 0.73, "grad_norm": 0.5120997428894043, "learning_rate": 0.0005173856473511612, "loss": 3.2564, "step": 14815 }, { "epoch": 0.73, "grad_norm": 0.5193552374839783, "learning_rate": 0.0005173750367820917, "loss": 3.2926, "step": 14816 }, { "epoch": 0.73, "grad_norm": 0.4874288737773895, "learning_rate": 0.0005173644256404983, "loss": 3.1219, "step": 14817 }, { "epoch": 0.73, "grad_norm": 0.5450944304466248, "learning_rate": 0.0005173538139264087, "loss": 3.2068, "step": 14818 }, { "epoch": 0.73, "grad_norm": 0.5270337462425232, "learning_rate": 0.0005173432016398513, "loss": 3.3498, "step": 14819 }, { "epoch": 0.73, "grad_norm": 0.5095805525779724, "learning_rate": 0.0005173325887808535, "loss": 3.1008, "step": 14820 }, { "epoch": 0.73, "grad_norm": 0.5138509273529053, "learning_rate": 0.0005173219753494435, "loss": 3.0766, "step": 14821 }, { "epoch": 0.73, "grad_norm": 0.5434461236000061, "learning_rate": 0.0005173113613456493, "loss": 3.1396, "step": 14822 }, { "epoch": 0.73, "grad_norm": 0.5392305850982666, "learning_rate": 0.0005173007467694986, "loss": 3.1582, "step": 14823 }, { "epoch": 0.73, "grad_norm": 0.5202492475509644, "learning_rate": 0.0005172901316210197, "loss": 3.1764, "step": 14824 }, { "epoch": 0.73, "grad_norm": 0.5307274460792542, "learning_rate": 0.0005172795159002405, "loss": 3.3271, "step": 14825 }, { "epoch": 0.73, "grad_norm": 0.5426248908042908, "learning_rate": 0.0005172688996071889, "loss": 3.0903, "step": 14826 }, { "epoch": 0.73, "grad_norm": 0.5029236078262329, "learning_rate": 0.0005172582827418927, "loss": 3.386, "step": 14827 }, { "epoch": 0.73, "grad_norm": 0.522150993347168, "learning_rate": 0.0005172476653043799, "loss": 3.2013, "step": 14828 }, { "epoch": 0.73, "grad_norm": 0.5246309638023376, "learning_rate": 0.0005172370472946787, "loss": 3.2427, "step": 14829 }, { "epoch": 0.73, "grad_norm": 0.5152744650840759, "learning_rate": 0.000517226428712817, "loss": 3.1461, "step": 14830 }, { "epoch": 0.73, "grad_norm": 0.5182197690010071, "learning_rate": 0.0005172158095588227, "loss": 3.0218, "step": 14831 }, { "epoch": 0.73, "grad_norm": 0.5244132280349731, "learning_rate": 0.0005172051898327235, "loss": 3.2129, "step": 14832 }, { "epoch": 0.73, "grad_norm": 0.5407620072364807, "learning_rate": 0.0005171945695345478, "loss": 3.0372, "step": 14833 }, { "epoch": 0.73, "grad_norm": 0.5058204531669617, "learning_rate": 0.0005171839486643234, "loss": 2.9253, "step": 14834 }, { "epoch": 0.73, "grad_norm": 0.5572190284729004, "learning_rate": 0.0005171733272220783, "loss": 3.2299, "step": 14835 }, { "epoch": 0.73, "grad_norm": 0.5125101208686829, "learning_rate": 0.0005171627052078404, "loss": 3.3526, "step": 14836 }, { "epoch": 0.73, "grad_norm": 0.5376558899879456, "learning_rate": 0.0005171520826216377, "loss": 3.2156, "step": 14837 }, { "epoch": 0.73, "grad_norm": 0.5481821298599243, "learning_rate": 0.0005171414594634983, "loss": 3.2858, "step": 14838 }, { "epoch": 0.73, "grad_norm": 0.4958679974079132, "learning_rate": 0.0005171308357334501, "loss": 3.258, "step": 14839 }, { "epoch": 0.73, "grad_norm": 0.5072594285011292, "learning_rate": 0.000517120211431521, "loss": 3.2469, "step": 14840 }, { "epoch": 0.73, "grad_norm": 0.5196676850318909, "learning_rate": 0.0005171095865577391, "loss": 3.1494, "step": 14841 }, { "epoch": 0.73, "grad_norm": 0.5403569936752319, "learning_rate": 0.0005170989611121323, "loss": 3.1397, "step": 14842 }, { "epoch": 0.73, "grad_norm": 0.4967591464519501, "learning_rate": 0.0005170883350947286, "loss": 3.2614, "step": 14843 }, { "epoch": 0.73, "grad_norm": 0.5076732635498047, "learning_rate": 0.000517077708505556, "loss": 2.9743, "step": 14844 }, { "epoch": 0.73, "grad_norm": 0.5110801458358765, "learning_rate": 0.0005170670813446425, "loss": 3.1071, "step": 14845 }, { "epoch": 0.73, "grad_norm": 0.48647382855415344, "learning_rate": 0.0005170564536120161, "loss": 3.2916, "step": 14846 }, { "epoch": 0.73, "grad_norm": 0.5668714046478271, "learning_rate": 0.0005170458253077048, "loss": 3.0199, "step": 14847 }, { "epoch": 0.73, "grad_norm": 0.48818862438201904, "learning_rate": 0.0005170351964317364, "loss": 3.1778, "step": 14848 }, { "epoch": 0.73, "grad_norm": 0.5142984390258789, "learning_rate": 0.0005170245669841393, "loss": 3.3728, "step": 14849 }, { "epoch": 0.73, "grad_norm": 0.5312206149101257, "learning_rate": 0.0005170139369649412, "loss": 3.0934, "step": 14850 }, { "epoch": 0.73, "grad_norm": 0.5142450928688049, "learning_rate": 0.0005170033063741701, "loss": 3.1013, "step": 14851 }, { "epoch": 0.73, "grad_norm": 0.5436277985572815, "learning_rate": 0.0005169926752118541, "loss": 3.2664, "step": 14852 }, { "epoch": 0.73, "grad_norm": 0.545096218585968, "learning_rate": 0.0005169820434780211, "loss": 3.1664, "step": 14853 }, { "epoch": 0.73, "grad_norm": 0.5273535847663879, "learning_rate": 0.0005169714111726992, "loss": 2.9818, "step": 14854 }, { "epoch": 0.73, "grad_norm": 0.5400977730751038, "learning_rate": 0.0005169607782959163, "loss": 3.1989, "step": 14855 }, { "epoch": 0.73, "grad_norm": 0.5369027853012085, "learning_rate": 0.0005169501448477005, "loss": 3.1274, "step": 14856 }, { "epoch": 0.73, "grad_norm": 0.48228007555007935, "learning_rate": 0.0005169395108280797, "loss": 3.3068, "step": 14857 }, { "epoch": 0.73, "grad_norm": 0.562002420425415, "learning_rate": 0.0005169288762370821, "loss": 3.0936, "step": 14858 }, { "epoch": 0.73, "grad_norm": 0.5516139268875122, "learning_rate": 0.0005169182410747356, "loss": 3.0671, "step": 14859 }, { "epoch": 0.73, "grad_norm": 0.5317583680152893, "learning_rate": 0.0005169076053410681, "loss": 3.0629, "step": 14860 }, { "epoch": 0.73, "grad_norm": 0.5046291947364807, "learning_rate": 0.0005168969690361077, "loss": 3.1502, "step": 14861 }, { "epoch": 0.73, "grad_norm": 0.5158634781837463, "learning_rate": 0.0005168863321598825, "loss": 3.157, "step": 14862 }, { "epoch": 0.73, "grad_norm": 0.5672610402107239, "learning_rate": 0.0005168756947124204, "loss": 3.0771, "step": 14863 }, { "epoch": 0.73, "grad_norm": 0.5045456290245056, "learning_rate": 0.0005168650566937496, "loss": 3.2721, "step": 14864 }, { "epoch": 0.73, "grad_norm": 0.5558025240898132, "learning_rate": 0.0005168544181038978, "loss": 3.1883, "step": 14865 }, { "epoch": 0.73, "grad_norm": 0.5100647807121277, "learning_rate": 0.0005168437789428932, "loss": 3.1412, "step": 14866 }, { "epoch": 0.73, "grad_norm": 0.4676547050476074, "learning_rate": 0.0005168331392107639, "loss": 3.1914, "step": 14867 }, { "epoch": 0.73, "grad_norm": 0.5415322780609131, "learning_rate": 0.0005168224989075378, "loss": 3.0172, "step": 14868 }, { "epoch": 0.73, "grad_norm": 0.5330214500427246, "learning_rate": 0.000516811858033243, "loss": 3.2398, "step": 14869 }, { "epoch": 0.73, "grad_norm": 0.6251876354217529, "learning_rate": 0.0005168012165879074, "loss": 3.0238, "step": 14870 }, { "epoch": 0.73, "grad_norm": 0.5353378653526306, "learning_rate": 0.0005167905745715592, "loss": 3.2086, "step": 14871 }, { "epoch": 0.73, "grad_norm": 0.537301242351532, "learning_rate": 0.0005167799319842264, "loss": 2.9889, "step": 14872 }, { "epoch": 0.73, "grad_norm": 0.5349966883659363, "learning_rate": 0.0005167692888259368, "loss": 3.3271, "step": 14873 }, { "epoch": 0.73, "grad_norm": 0.5379777550697327, "learning_rate": 0.0005167586450967189, "loss": 2.9083, "step": 14874 }, { "epoch": 0.73, "grad_norm": 0.5365452766418457, "learning_rate": 0.0005167480007966002, "loss": 3.2886, "step": 14875 }, { "epoch": 0.73, "grad_norm": 0.5008100867271423, "learning_rate": 0.000516737355925609, "loss": 3.1322, "step": 14876 }, { "epoch": 0.73, "grad_norm": 0.5189768671989441, "learning_rate": 0.0005167267104837735, "loss": 3.1345, "step": 14877 }, { "epoch": 0.73, "grad_norm": 0.5536531209945679, "learning_rate": 0.0005167160644711214, "loss": 3.0225, "step": 14878 }, { "epoch": 0.73, "grad_norm": 0.4946756660938263, "learning_rate": 0.000516705417887681, "loss": 3.4478, "step": 14879 }, { "epoch": 0.73, "grad_norm": 0.5488638877868652, "learning_rate": 0.00051669477073348, "loss": 2.9697, "step": 14880 }, { "epoch": 0.73, "grad_norm": 0.520703911781311, "learning_rate": 0.0005166841230085469, "loss": 3.1385, "step": 14881 }, { "epoch": 0.73, "grad_norm": 0.5039485096931458, "learning_rate": 0.0005166734747129094, "loss": 3.1356, "step": 14882 }, { "epoch": 0.73, "grad_norm": 0.4801254868507385, "learning_rate": 0.0005166628258465958, "loss": 3.2317, "step": 14883 }, { "epoch": 0.73, "grad_norm": 0.5044119358062744, "learning_rate": 0.0005166521764096339, "loss": 2.9742, "step": 14884 }, { "epoch": 0.73, "grad_norm": 0.5282803773880005, "learning_rate": 0.0005166415264020519, "loss": 3.0444, "step": 14885 }, { "epoch": 0.73, "grad_norm": 0.5237149596214294, "learning_rate": 0.0005166308758238779, "loss": 3.3579, "step": 14886 }, { "epoch": 0.73, "grad_norm": 0.5436862111091614, "learning_rate": 0.0005166202246751397, "loss": 3.1705, "step": 14887 }, { "epoch": 0.73, "grad_norm": 0.5225011110305786, "learning_rate": 0.0005166095729558656, "loss": 3.145, "step": 14888 }, { "epoch": 0.73, "grad_norm": 0.5690309405326843, "learning_rate": 0.0005165989206660836, "loss": 2.8944, "step": 14889 }, { "epoch": 0.73, "grad_norm": 0.5105469226837158, "learning_rate": 0.0005165882678058217, "loss": 3.1488, "step": 14890 }, { "epoch": 0.73, "grad_norm": 0.5054097175598145, "learning_rate": 0.000516577614375108, "loss": 3.4983, "step": 14891 }, { "epoch": 0.73, "grad_norm": 0.570755660533905, "learning_rate": 0.0005165669603739704, "loss": 3.2432, "step": 14892 }, { "epoch": 0.73, "grad_norm": 0.5138176679611206, "learning_rate": 0.0005165563058024373, "loss": 3.1639, "step": 14893 }, { "epoch": 0.73, "grad_norm": 0.4894483685493469, "learning_rate": 0.0005165456506605365, "loss": 2.9985, "step": 14894 }, { "epoch": 0.73, "grad_norm": 0.4852317273616791, "learning_rate": 0.0005165349949482962, "loss": 3.2036, "step": 14895 }, { "epoch": 0.73, "grad_norm": 0.5104283094406128, "learning_rate": 0.0005165243386657442, "loss": 3.2489, "step": 14896 }, { "epoch": 0.73, "grad_norm": 0.5486336350440979, "learning_rate": 0.000516513681812909, "loss": 3.2546, "step": 14897 }, { "epoch": 0.73, "grad_norm": 0.5464646816253662, "learning_rate": 0.0005165030243898183, "loss": 3.1661, "step": 14898 }, { "epoch": 0.73, "grad_norm": 0.494334876537323, "learning_rate": 0.0005164923663965003, "loss": 3.3491, "step": 14899 }, { "epoch": 0.73, "grad_norm": 0.51130211353302, "learning_rate": 0.0005164817078329831, "loss": 3.4166, "step": 14900 }, { "epoch": 0.73, "grad_norm": 0.5445241928100586, "learning_rate": 0.0005164710486992947, "loss": 3.2104, "step": 14901 }, { "epoch": 0.73, "grad_norm": 0.5161874294281006, "learning_rate": 0.0005164603889954633, "loss": 3.291, "step": 14902 }, { "epoch": 0.73, "grad_norm": 0.5364497303962708, "learning_rate": 0.0005164497287215169, "loss": 3.3265, "step": 14903 }, { "epoch": 0.73, "grad_norm": 0.4792921841144562, "learning_rate": 0.0005164390678774834, "loss": 3.2952, "step": 14904 }, { "epoch": 0.73, "grad_norm": 0.5327908396720886, "learning_rate": 0.0005164284064633912, "loss": 3.1207, "step": 14905 }, { "epoch": 0.73, "grad_norm": 0.5640168190002441, "learning_rate": 0.0005164177444792683, "loss": 3.2393, "step": 14906 }, { "epoch": 0.73, "grad_norm": 0.5080165863037109, "learning_rate": 0.0005164070819251425, "loss": 3.1347, "step": 14907 }, { "epoch": 0.73, "grad_norm": 0.4810307025909424, "learning_rate": 0.0005163964188010424, "loss": 3.2237, "step": 14908 }, { "epoch": 0.73, "grad_norm": 0.5038141012191772, "learning_rate": 0.0005163857551069954, "loss": 3.2509, "step": 14909 }, { "epoch": 0.73, "grad_norm": 0.5256493091583252, "learning_rate": 0.0005163750908430303, "loss": 3.1796, "step": 14910 }, { "epoch": 0.73, "grad_norm": 0.5214911699295044, "learning_rate": 0.0005163644260091746, "loss": 3.211, "step": 14911 }, { "epoch": 0.73, "grad_norm": 0.5438799858093262, "learning_rate": 0.0005163537606054568, "loss": 3.1117, "step": 14912 }, { "epoch": 0.73, "grad_norm": 0.5240724682807922, "learning_rate": 0.0005163430946319047, "loss": 3.3282, "step": 14913 }, { "epoch": 0.73, "grad_norm": 0.5152091979980469, "learning_rate": 0.0005163324280885467, "loss": 3.1758, "step": 14914 }, { "epoch": 0.73, "grad_norm": 0.4732866585254669, "learning_rate": 0.0005163217609754105, "loss": 3.0374, "step": 14915 }, { "epoch": 0.73, "grad_norm": 0.48373696208000183, "learning_rate": 0.0005163110932925245, "loss": 3.2967, "step": 14916 }, { "epoch": 0.73, "grad_norm": 0.5080105662345886, "learning_rate": 0.0005163004250399168, "loss": 3.1916, "step": 14917 }, { "epoch": 0.73, "grad_norm": 0.4790027141571045, "learning_rate": 0.0005162897562176154, "loss": 3.2397, "step": 14918 }, { "epoch": 0.73, "grad_norm": 0.491432249546051, "learning_rate": 0.0005162790868256484, "loss": 3.2743, "step": 14919 }, { "epoch": 0.73, "grad_norm": 0.5296748876571655, "learning_rate": 0.0005162684168640439, "loss": 2.8877, "step": 14920 }, { "epoch": 0.73, "grad_norm": 0.5081189274787903, "learning_rate": 0.00051625774633283, "loss": 3.2914, "step": 14921 }, { "epoch": 0.73, "grad_norm": 0.5037716031074524, "learning_rate": 0.0005162470752320347, "loss": 3.2631, "step": 14922 }, { "epoch": 0.73, "grad_norm": 0.5493853688240051, "learning_rate": 0.0005162364035616863, "loss": 3.2527, "step": 14923 }, { "epoch": 0.73, "grad_norm": 0.48433980345726013, "learning_rate": 0.0005162257313218128, "loss": 3.1472, "step": 14924 }, { "epoch": 0.73, "grad_norm": 0.5239858627319336, "learning_rate": 0.0005162150585124425, "loss": 3.2365, "step": 14925 }, { "epoch": 0.73, "grad_norm": 0.5162671208381653, "learning_rate": 0.0005162043851336032, "loss": 3.1116, "step": 14926 }, { "epoch": 0.73, "grad_norm": 0.532788097858429, "learning_rate": 0.0005161937111853232, "loss": 3.185, "step": 14927 }, { "epoch": 0.73, "grad_norm": 0.5161779522895813, "learning_rate": 0.0005161830366676305, "loss": 3.2383, "step": 14928 }, { "epoch": 0.73, "grad_norm": 0.5137925744056702, "learning_rate": 0.0005161723615805534, "loss": 3.2344, "step": 14929 }, { "epoch": 0.73, "grad_norm": 0.5238530039787292, "learning_rate": 0.0005161616859241199, "loss": 3.2037, "step": 14930 }, { "epoch": 0.73, "grad_norm": 0.5177023410797119, "learning_rate": 0.0005161510096983581, "loss": 3.035, "step": 14931 }, { "epoch": 0.73, "grad_norm": 0.49137774109840393, "learning_rate": 0.0005161403329032961, "loss": 3.2177, "step": 14932 }, { "epoch": 0.73, "grad_norm": 0.5207857489585876, "learning_rate": 0.000516129655538962, "loss": 3.2774, "step": 14933 }, { "epoch": 0.73, "grad_norm": 0.5206941962242126, "learning_rate": 0.0005161189776053841, "loss": 3.054, "step": 14934 }, { "epoch": 0.73, "grad_norm": 0.5176581144332886, "learning_rate": 0.0005161082991025904, "loss": 3.2821, "step": 14935 }, { "epoch": 0.73, "grad_norm": 0.5066308975219727, "learning_rate": 0.000516097620030609, "loss": 3.3941, "step": 14936 }, { "epoch": 0.73, "grad_norm": 0.5001108646392822, "learning_rate": 0.000516086940389468, "loss": 3.1497, "step": 14937 }, { "epoch": 0.73, "grad_norm": 0.5629680156707764, "learning_rate": 0.0005160762601791956, "loss": 3.4361, "step": 14938 }, { "epoch": 0.73, "grad_norm": 0.5518137216567993, "learning_rate": 0.00051606557939982, "loss": 3.2953, "step": 14939 }, { "epoch": 0.73, "grad_norm": 0.5315781831741333, "learning_rate": 0.0005160548980513692, "loss": 3.1599, "step": 14940 }, { "epoch": 0.73, "grad_norm": 0.4899667501449585, "learning_rate": 0.0005160442161338713, "loss": 3.2092, "step": 14941 }, { "epoch": 0.73, "grad_norm": 0.5403282046318054, "learning_rate": 0.0005160335336473547, "loss": 3.0352, "step": 14942 }, { "epoch": 0.73, "grad_norm": 0.5120472311973572, "learning_rate": 0.0005160228505918472, "loss": 3.3255, "step": 14943 }, { "epoch": 0.73, "grad_norm": 0.5195842385292053, "learning_rate": 0.0005160121669673771, "loss": 3.2938, "step": 14944 }, { "epoch": 0.73, "grad_norm": 0.5495728254318237, "learning_rate": 0.0005160014827739726, "loss": 3.2412, "step": 14945 }, { "epoch": 0.73, "grad_norm": 0.5019406080245972, "learning_rate": 0.0005159907980116617, "loss": 3.3542, "step": 14946 }, { "epoch": 0.73, "grad_norm": 0.5161232352256775, "learning_rate": 0.0005159801126804726, "loss": 3.064, "step": 14947 }, { "epoch": 0.73, "grad_norm": 0.4725062847137451, "learning_rate": 0.0005159694267804335, "loss": 2.9515, "step": 14948 }, { "epoch": 0.73, "grad_norm": 0.5322039127349854, "learning_rate": 0.0005159587403115724, "loss": 3.1031, "step": 14949 }, { "epoch": 0.73, "grad_norm": 0.48917528986930847, "learning_rate": 0.0005159480532739176, "loss": 3.3152, "step": 14950 }, { "epoch": 0.73, "grad_norm": 0.5044992566108704, "learning_rate": 0.0005159373656674972, "loss": 3.304, "step": 14951 }, { "epoch": 0.73, "grad_norm": 0.5023238658905029, "learning_rate": 0.0005159266774923393, "loss": 2.9814, "step": 14952 }, { "epoch": 0.73, "grad_norm": 0.49678921699523926, "learning_rate": 0.0005159159887484721, "loss": 3.2713, "step": 14953 }, { "epoch": 0.73, "grad_norm": 0.5398493409156799, "learning_rate": 0.0005159052994359239, "loss": 2.8908, "step": 14954 }, { "epoch": 0.73, "grad_norm": 0.5190320014953613, "learning_rate": 0.0005158946095547225, "loss": 3.1101, "step": 14955 }, { "epoch": 0.73, "grad_norm": 0.4837747812271118, "learning_rate": 0.0005158839191048963, "loss": 3.3732, "step": 14956 }, { "epoch": 0.73, "grad_norm": 0.5262941718101501, "learning_rate": 0.0005158732280864735, "loss": 3.095, "step": 14957 }, { "epoch": 0.73, "grad_norm": 0.5336409211158752, "learning_rate": 0.000515862536499482, "loss": 3.3913, "step": 14958 }, { "epoch": 0.73, "grad_norm": 0.5454764366149902, "learning_rate": 0.0005158518443439502, "loss": 3.3525, "step": 14959 }, { "epoch": 0.73, "grad_norm": 0.536488950252533, "learning_rate": 0.0005158411516199061, "loss": 3.2649, "step": 14960 }, { "epoch": 0.73, "grad_norm": 0.5544682741165161, "learning_rate": 0.000515830458327378, "loss": 3.0752, "step": 14961 }, { "epoch": 0.73, "grad_norm": 0.5361686944961548, "learning_rate": 0.0005158197644663941, "loss": 3.163, "step": 14962 }, { "epoch": 0.73, "grad_norm": 0.5054564476013184, "learning_rate": 0.0005158090700369824, "loss": 3.0706, "step": 14963 }, { "epoch": 0.73, "grad_norm": 0.5609951019287109, "learning_rate": 0.0005157983750391711, "loss": 3.2753, "step": 14964 }, { "epoch": 0.73, "grad_norm": 0.5623591542243958, "learning_rate": 0.0005157876794729885, "loss": 3.1528, "step": 14965 }, { "epoch": 0.73, "grad_norm": 0.5499200224876404, "learning_rate": 0.0005157769833384626, "loss": 3.0385, "step": 14966 }, { "epoch": 0.73, "grad_norm": 0.47295448184013367, "learning_rate": 0.0005157662866356217, "loss": 3.0952, "step": 14967 }, { "epoch": 0.73, "grad_norm": 0.5192577838897705, "learning_rate": 0.0005157555893644939, "loss": 3.3865, "step": 14968 }, { "epoch": 0.73, "grad_norm": 0.51607346534729, "learning_rate": 0.0005157448915251074, "loss": 3.0292, "step": 14969 }, { "epoch": 0.73, "grad_norm": 0.5210020542144775, "learning_rate": 0.0005157341931174904, "loss": 3.3449, "step": 14970 }, { "epoch": 0.73, "grad_norm": 0.5266844034194946, "learning_rate": 0.0005157234941416711, "loss": 3.2106, "step": 14971 }, { "epoch": 0.73, "grad_norm": 0.6120948791503906, "learning_rate": 0.0005157127945976776, "loss": 2.9999, "step": 14972 }, { "epoch": 0.73, "grad_norm": 0.5157740116119385, "learning_rate": 0.000515702094485538, "loss": 3.1832, "step": 14973 }, { "epoch": 0.73, "grad_norm": 0.49761828780174255, "learning_rate": 0.0005156913938052808, "loss": 3.1624, "step": 14974 }, { "epoch": 0.73, "grad_norm": 0.5079160928726196, "learning_rate": 0.0005156806925569338, "loss": 3.2627, "step": 14975 }, { "epoch": 0.73, "grad_norm": 0.5453952550888062, "learning_rate": 0.0005156699907405255, "loss": 2.9036, "step": 14976 }, { "epoch": 0.73, "grad_norm": 0.5448424220085144, "learning_rate": 0.0005156592883560838, "loss": 3.1669, "step": 14977 }, { "epoch": 0.73, "grad_norm": 0.49395671486854553, "learning_rate": 0.0005156485854036371, "loss": 3.2462, "step": 14978 }, { "epoch": 0.73, "grad_norm": 0.5351203680038452, "learning_rate": 0.0005156378818832136, "loss": 3.3148, "step": 14979 }, { "epoch": 0.73, "grad_norm": 0.5157082080841064, "learning_rate": 0.0005156271777948414, "loss": 3.3561, "step": 14980 }, { "epoch": 0.73, "grad_norm": 0.5183131098747253, "learning_rate": 0.0005156164731385487, "loss": 3.1139, "step": 14981 }, { "epoch": 0.73, "grad_norm": 0.5393839478492737, "learning_rate": 0.0005156057679143636, "loss": 3.2187, "step": 14982 }, { "epoch": 0.73, "grad_norm": 0.49937084317207336, "learning_rate": 0.0005155950621223145, "loss": 3.1017, "step": 14983 }, { "epoch": 0.73, "grad_norm": 0.5061917901039124, "learning_rate": 0.0005155843557624296, "loss": 3.0941, "step": 14984 }, { "epoch": 0.73, "grad_norm": 0.5656406283378601, "learning_rate": 0.0005155736488347369, "loss": 3.0482, "step": 14985 }, { "epoch": 0.73, "grad_norm": 0.5063902735710144, "learning_rate": 0.0005155629413392646, "loss": 3.0954, "step": 14986 }, { "epoch": 0.73, "grad_norm": 0.5024520754814148, "learning_rate": 0.0005155522332760411, "loss": 3.0767, "step": 14987 }, { "epoch": 0.73, "grad_norm": 0.83547043800354, "learning_rate": 0.0005155415246450945, "loss": 3.0165, "step": 14988 }, { "epoch": 0.73, "grad_norm": 0.5294227600097656, "learning_rate": 0.000515530815446453, "loss": 3.2607, "step": 14989 }, { "epoch": 0.73, "grad_norm": 0.5271424651145935, "learning_rate": 0.0005155201056801449, "loss": 3.1893, "step": 14990 }, { "epoch": 0.73, "grad_norm": 0.5043739080429077, "learning_rate": 0.0005155093953461981, "loss": 3.1544, "step": 14991 }, { "epoch": 0.73, "grad_norm": 0.5016458034515381, "learning_rate": 0.0005154986844446411, "loss": 3.099, "step": 14992 }, { "epoch": 0.73, "grad_norm": 0.5234816670417786, "learning_rate": 0.0005154879729755022, "loss": 3.2483, "step": 14993 }, { "epoch": 0.73, "grad_norm": 0.5132349133491516, "learning_rate": 0.0005154772609388093, "loss": 3.215, "step": 14994 }, { "epoch": 0.73, "grad_norm": 0.5096055865287781, "learning_rate": 0.0005154665483345909, "loss": 3.289, "step": 14995 }, { "epoch": 0.73, "grad_norm": 0.5225401520729065, "learning_rate": 0.0005154558351628749, "loss": 3.285, "step": 14996 }, { "epoch": 0.73, "grad_norm": 0.5399203300476074, "learning_rate": 0.0005154451214236898, "loss": 3.0405, "step": 14997 }, { "epoch": 0.74, "grad_norm": 0.5019733905792236, "learning_rate": 0.0005154344071170637, "loss": 3.2109, "step": 14998 }, { "epoch": 0.74, "grad_norm": 0.5068507790565491, "learning_rate": 0.0005154236922430248, "loss": 3.0886, "step": 14999 }, { "epoch": 0.74, "grad_norm": 0.5227283835411072, "learning_rate": 0.0005154129768016013, "loss": 3.0491, "step": 15000 }, { "epoch": 0.74, "grad_norm": 0.5454354882240295, "learning_rate": 0.0005154022607928217, "loss": 3.2926, "step": 15001 }, { "epoch": 0.74, "grad_norm": 0.5321445465087891, "learning_rate": 0.0005153915442167138, "loss": 3.1433, "step": 15002 }, { "epoch": 0.74, "grad_norm": 0.5160802006721497, "learning_rate": 0.0005153808270733061, "loss": 3.0392, "step": 15003 }, { "epoch": 0.74, "grad_norm": 0.5624576210975647, "learning_rate": 0.0005153701093626267, "loss": 3.2465, "step": 15004 }, { "epoch": 0.74, "grad_norm": 0.5116503238677979, "learning_rate": 0.0005153593910847039, "loss": 3.4115, "step": 15005 }, { "epoch": 0.74, "grad_norm": 0.4815455377101898, "learning_rate": 0.000515348672239566, "loss": 3.2247, "step": 15006 }, { "epoch": 0.74, "grad_norm": 0.5008863806724548, "learning_rate": 0.0005153379528272411, "loss": 2.9155, "step": 15007 }, { "epoch": 0.74, "grad_norm": 0.5305734276771545, "learning_rate": 0.0005153272328477573, "loss": 3.3725, "step": 15008 }, { "epoch": 0.74, "grad_norm": 0.49309682846069336, "learning_rate": 0.0005153165123011432, "loss": 3.2633, "step": 15009 }, { "epoch": 0.74, "grad_norm": 0.5189629793167114, "learning_rate": 0.0005153057911874267, "loss": 3.1755, "step": 15010 }, { "epoch": 0.74, "grad_norm": 0.5381687879562378, "learning_rate": 0.0005152950695066364, "loss": 3.222, "step": 15011 }, { "epoch": 0.74, "grad_norm": 0.4915025234222412, "learning_rate": 0.0005152843472588001, "loss": 3.1748, "step": 15012 }, { "epoch": 0.74, "grad_norm": 0.5047193169593811, "learning_rate": 0.0005152736244439464, "loss": 3.1085, "step": 15013 }, { "epoch": 0.74, "grad_norm": 0.5417593717575073, "learning_rate": 0.0005152629010621033, "loss": 3.1649, "step": 15014 }, { "epoch": 0.74, "grad_norm": 0.5217851400375366, "learning_rate": 0.0005152521771132993, "loss": 3.2755, "step": 15015 }, { "epoch": 0.74, "grad_norm": 0.517413854598999, "learning_rate": 0.0005152414525975625, "loss": 3.0616, "step": 15016 }, { "epoch": 0.74, "grad_norm": 0.5072711110115051, "learning_rate": 0.0005152307275149209, "loss": 3.0893, "step": 15017 }, { "epoch": 0.74, "grad_norm": 0.5205687284469604, "learning_rate": 0.0005152200018654032, "loss": 3.1728, "step": 15018 }, { "epoch": 0.74, "grad_norm": 0.5183899402618408, "learning_rate": 0.0005152092756490373, "loss": 3.3177, "step": 15019 }, { "epoch": 0.74, "grad_norm": 0.5392817854881287, "learning_rate": 0.0005151985488658517, "loss": 3.2724, "step": 15020 }, { "epoch": 0.74, "grad_norm": 0.49955472350120544, "learning_rate": 0.0005151878215158745, "loss": 3.1084, "step": 15021 }, { "epoch": 0.74, "grad_norm": 0.5460142493247986, "learning_rate": 0.0005151770935991339, "loss": 3.2581, "step": 15022 }, { "epoch": 0.74, "grad_norm": 0.5164291262626648, "learning_rate": 0.0005151663651156584, "loss": 3.2137, "step": 15023 }, { "epoch": 0.74, "grad_norm": 0.4972623586654663, "learning_rate": 0.000515155636065476, "loss": 3.1696, "step": 15024 }, { "epoch": 0.74, "grad_norm": 0.5117320418357849, "learning_rate": 0.0005151449064486151, "loss": 2.9165, "step": 15025 }, { "epoch": 0.74, "grad_norm": 0.5329243540763855, "learning_rate": 0.000515134176265104, "loss": 3.2211, "step": 15026 }, { "epoch": 0.74, "grad_norm": 0.548600971698761, "learning_rate": 0.0005151234455149707, "loss": 3.4036, "step": 15027 }, { "epoch": 0.74, "grad_norm": 0.5310418009757996, "learning_rate": 0.0005151127141982437, "loss": 3.1994, "step": 15028 }, { "epoch": 0.74, "grad_norm": 0.5383056998252869, "learning_rate": 0.0005151019823149513, "loss": 3.2994, "step": 15029 }, { "epoch": 0.74, "grad_norm": 0.4978289008140564, "learning_rate": 0.0005150912498651216, "loss": 3.2983, "step": 15030 }, { "epoch": 0.74, "grad_norm": 0.5403746366500854, "learning_rate": 0.000515080516848783, "loss": 3.1833, "step": 15031 }, { "epoch": 0.74, "grad_norm": 0.5366250276565552, "learning_rate": 0.0005150697832659636, "loss": 3.0881, "step": 15032 }, { "epoch": 0.74, "grad_norm": 0.49605393409729004, "learning_rate": 0.0005150590491166919, "loss": 3.3317, "step": 15033 }, { "epoch": 0.74, "grad_norm": 0.5022369027137756, "learning_rate": 0.0005150483144009961, "loss": 3.1123, "step": 15034 }, { "epoch": 0.74, "grad_norm": 0.5486195683479309, "learning_rate": 0.0005150375791189043, "loss": 3.368, "step": 15035 }, { "epoch": 0.74, "grad_norm": 0.511721670627594, "learning_rate": 0.0005150268432704449, "loss": 3.1523, "step": 15036 }, { "epoch": 0.74, "grad_norm": 0.5076295137405396, "learning_rate": 0.0005150161068556463, "loss": 3.0981, "step": 15037 }, { "epoch": 0.74, "grad_norm": 0.5491887331008911, "learning_rate": 0.0005150053698745365, "loss": 3.1993, "step": 15038 }, { "epoch": 0.74, "grad_norm": 0.5024232268333435, "learning_rate": 0.000514994632327144, "loss": 3.4245, "step": 15039 }, { "epoch": 0.74, "grad_norm": 0.5337858200073242, "learning_rate": 0.0005149838942134971, "loss": 3.2517, "step": 15040 }, { "epoch": 0.74, "grad_norm": 0.5012475252151489, "learning_rate": 0.0005149731555336239, "loss": 3.2314, "step": 15041 }, { "epoch": 0.74, "grad_norm": 0.5080512166023254, "learning_rate": 0.0005149624162875528, "loss": 3.432, "step": 15042 }, { "epoch": 0.74, "grad_norm": 0.5350621938705444, "learning_rate": 0.0005149516764753121, "loss": 3.232, "step": 15043 }, { "epoch": 0.74, "grad_norm": 0.5167108774185181, "learning_rate": 0.00051494093609693, "loss": 3.1149, "step": 15044 }, { "epoch": 0.74, "grad_norm": 0.5356556177139282, "learning_rate": 0.0005149301951524348, "loss": 3.3853, "step": 15045 }, { "epoch": 0.74, "grad_norm": 0.48991358280181885, "learning_rate": 0.0005149194536418551, "loss": 3.0879, "step": 15046 }, { "epoch": 0.74, "grad_norm": 0.5196684002876282, "learning_rate": 0.0005149087115652185, "loss": 2.9087, "step": 15047 }, { "epoch": 0.74, "grad_norm": 0.4956427216529846, "learning_rate": 0.000514897968922554, "loss": 3.2385, "step": 15048 }, { "epoch": 0.74, "grad_norm": 0.48819366097450256, "learning_rate": 0.0005148872257138895, "loss": 3.2224, "step": 15049 }, { "epoch": 0.74, "grad_norm": 0.5472261905670166, "learning_rate": 0.0005148764819392535, "loss": 3.1346, "step": 15050 }, { "epoch": 0.74, "grad_norm": 0.5290123820304871, "learning_rate": 0.0005148657375986741, "loss": 3.1584, "step": 15051 }, { "epoch": 0.74, "grad_norm": 0.5149974226951599, "learning_rate": 0.0005148549926921798, "loss": 3.3616, "step": 15052 }, { "epoch": 0.74, "grad_norm": 0.5451234579086304, "learning_rate": 0.0005148442472197986, "loss": 3.145, "step": 15053 }, { "epoch": 0.74, "grad_norm": 0.4977348744869232, "learning_rate": 0.0005148335011815592, "loss": 3.1043, "step": 15054 }, { "epoch": 0.74, "grad_norm": 0.4974806010723114, "learning_rate": 0.0005148227545774898, "loss": 3.2151, "step": 15055 }, { "epoch": 0.74, "grad_norm": 0.5258920192718506, "learning_rate": 0.0005148120074076184, "loss": 3.2716, "step": 15056 }, { "epoch": 0.74, "grad_norm": 0.5211778879165649, "learning_rate": 0.0005148012596719735, "loss": 3.1984, "step": 15057 }, { "epoch": 0.74, "grad_norm": 0.5030432939529419, "learning_rate": 0.0005147905113705835, "loss": 3.1843, "step": 15058 }, { "epoch": 0.74, "grad_norm": 0.4971194267272949, "learning_rate": 0.0005147797625034766, "loss": 3.1692, "step": 15059 }, { "epoch": 0.74, "grad_norm": 0.5721365213394165, "learning_rate": 0.0005147690130706811, "loss": 3.0595, "step": 15060 }, { "epoch": 0.74, "grad_norm": 0.5956337451934814, "learning_rate": 0.0005147582630722255, "loss": 3.2497, "step": 15061 }, { "epoch": 0.74, "grad_norm": 0.5455291271209717, "learning_rate": 0.0005147475125081379, "loss": 3.286, "step": 15062 }, { "epoch": 0.74, "grad_norm": 0.49531859159469604, "learning_rate": 0.0005147367613784465, "loss": 3.2845, "step": 15063 }, { "epoch": 0.74, "grad_norm": 0.5478885769844055, "learning_rate": 0.00051472600968318, "loss": 3.4411, "step": 15064 }, { "epoch": 0.74, "grad_norm": 0.5322989821434021, "learning_rate": 0.0005147152574223665, "loss": 3.4172, "step": 15065 }, { "epoch": 0.74, "grad_norm": 0.5252341628074646, "learning_rate": 0.0005147045045960344, "loss": 3.2743, "step": 15066 }, { "epoch": 0.74, "grad_norm": 0.5184382796287537, "learning_rate": 0.0005146937512042118, "loss": 3.1589, "step": 15067 }, { "epoch": 0.74, "grad_norm": 0.5355132818222046, "learning_rate": 0.0005146829972469272, "loss": 3.0857, "step": 15068 }, { "epoch": 0.74, "grad_norm": 0.5309262275695801, "learning_rate": 0.000514672242724209, "loss": 3.3667, "step": 15069 }, { "epoch": 0.74, "grad_norm": 0.529289186000824, "learning_rate": 0.0005146614876360853, "loss": 3.4135, "step": 15070 }, { "epoch": 0.74, "grad_norm": 0.48815011978149414, "learning_rate": 0.0005146507319825846, "loss": 3.4187, "step": 15071 }, { "epoch": 0.74, "grad_norm": 0.526040256023407, "learning_rate": 0.0005146399757637352, "loss": 3.1328, "step": 15072 }, { "epoch": 0.74, "grad_norm": 0.5177284479141235, "learning_rate": 0.0005146292189795654, "loss": 3.2615, "step": 15073 }, { "epoch": 0.74, "grad_norm": 0.4890044033527374, "learning_rate": 0.0005146184616301036, "loss": 3.0297, "step": 15074 }, { "epoch": 0.74, "grad_norm": 0.511398434638977, "learning_rate": 0.000514607703715378, "loss": 3.5007, "step": 15075 }, { "epoch": 0.74, "grad_norm": 0.5235434174537659, "learning_rate": 0.000514596945235417, "loss": 3.3912, "step": 15076 }, { "epoch": 0.74, "grad_norm": 0.5152917504310608, "learning_rate": 0.0005145861861902491, "loss": 3.0959, "step": 15077 }, { "epoch": 0.74, "grad_norm": 0.555122971534729, "learning_rate": 0.0005145754265799023, "loss": 3.1187, "step": 15078 }, { "epoch": 0.74, "grad_norm": 0.5248861908912659, "learning_rate": 0.0005145646664044053, "loss": 3.3088, "step": 15079 }, { "epoch": 0.74, "grad_norm": 0.48319074511528015, "learning_rate": 0.0005145539056637861, "loss": 3.0406, "step": 15080 }, { "epoch": 0.74, "grad_norm": 0.5107579827308655, "learning_rate": 0.0005145431443580732, "loss": 2.9253, "step": 15081 }, { "epoch": 0.74, "grad_norm": 0.5582626461982727, "learning_rate": 0.000514532382487295, "loss": 3.3489, "step": 15082 }, { "epoch": 0.74, "grad_norm": 0.5222228765487671, "learning_rate": 0.00051452162005148, "loss": 3.2836, "step": 15083 }, { "epoch": 0.74, "grad_norm": 0.49412620067596436, "learning_rate": 0.0005145108570506561, "loss": 3.163, "step": 15084 }, { "epoch": 0.74, "grad_norm": 0.530225932598114, "learning_rate": 0.0005145000934848519, "loss": 3.144, "step": 15085 }, { "epoch": 0.74, "grad_norm": 0.5154251456260681, "learning_rate": 0.0005144893293540957, "loss": 3.0688, "step": 15086 }, { "epoch": 0.74, "grad_norm": 0.5582703948020935, "learning_rate": 0.0005144785646584159, "loss": 3.3622, "step": 15087 }, { "epoch": 0.74, "grad_norm": 0.5435081720352173, "learning_rate": 0.000514467799397841, "loss": 3.1106, "step": 15088 }, { "epoch": 0.74, "grad_norm": 0.5430107712745667, "learning_rate": 0.000514457033572399, "loss": 3.0035, "step": 15089 }, { "epoch": 0.74, "grad_norm": 0.543735682964325, "learning_rate": 0.0005144462671821186, "loss": 3.1732, "step": 15090 }, { "epoch": 0.74, "grad_norm": 0.5661927461624146, "learning_rate": 0.0005144355002270278, "loss": 2.9093, "step": 15091 }, { "epoch": 0.74, "grad_norm": 0.5020666122436523, "learning_rate": 0.0005144247327071553, "loss": 3.2608, "step": 15092 }, { "epoch": 0.74, "grad_norm": 0.5958285331726074, "learning_rate": 0.0005144139646225293, "loss": 3.0192, "step": 15093 }, { "epoch": 0.74, "grad_norm": 0.5555652976036072, "learning_rate": 0.0005144031959731783, "loss": 3.2023, "step": 15094 }, { "epoch": 0.74, "grad_norm": 0.5096257925033569, "learning_rate": 0.0005143924267591304, "loss": 3.2049, "step": 15095 }, { "epoch": 0.74, "grad_norm": 0.5613263249397278, "learning_rate": 0.0005143816569804141, "loss": 3.074, "step": 15096 }, { "epoch": 0.74, "grad_norm": 0.4865977168083191, "learning_rate": 0.0005143708866370579, "loss": 3.0918, "step": 15097 }, { "epoch": 0.74, "grad_norm": 0.509009838104248, "learning_rate": 0.00051436011572909, "loss": 3.2596, "step": 15098 }, { "epoch": 0.74, "grad_norm": 0.5192385315895081, "learning_rate": 0.0005143493442565387, "loss": 3.0407, "step": 15099 }, { "epoch": 0.74, "grad_norm": 0.5306475758552551, "learning_rate": 0.0005143385722194326, "loss": 3.2282, "step": 15100 }, { "epoch": 0.74, "grad_norm": 0.5092197060585022, "learning_rate": 0.0005143277996177998, "loss": 3.2562, "step": 15101 }, { "epoch": 0.74, "grad_norm": 0.49553215503692627, "learning_rate": 0.000514317026451669, "loss": 3.114, "step": 15102 }, { "epoch": 0.74, "grad_norm": 0.5059915781021118, "learning_rate": 0.0005143062527210683, "loss": 3.3094, "step": 15103 }, { "epoch": 0.74, "grad_norm": 0.4825346767902374, "learning_rate": 0.0005142954784260261, "loss": 3.2117, "step": 15104 }, { "epoch": 0.74, "grad_norm": 0.5263687968254089, "learning_rate": 0.000514284703566571, "loss": 3.0489, "step": 15105 }, { "epoch": 0.74, "grad_norm": 0.50020432472229, "learning_rate": 0.0005142739281427313, "loss": 3.2894, "step": 15106 }, { "epoch": 0.74, "grad_norm": 0.5188915133476257, "learning_rate": 0.0005142631521545351, "loss": 3.2912, "step": 15107 }, { "epoch": 0.74, "grad_norm": 0.5241820812225342, "learning_rate": 0.0005142523756020111, "loss": 3.1568, "step": 15108 }, { "epoch": 0.74, "grad_norm": 0.47457194328308105, "learning_rate": 0.0005142415984851875, "loss": 3.2036, "step": 15109 }, { "epoch": 0.74, "grad_norm": 0.46626847982406616, "learning_rate": 0.0005142308208040928, "loss": 3.0613, "step": 15110 }, { "epoch": 0.74, "grad_norm": 0.5026150941848755, "learning_rate": 0.0005142200425587555, "loss": 3.0074, "step": 15111 }, { "epoch": 0.74, "grad_norm": 0.531330406665802, "learning_rate": 0.0005142092637492036, "loss": 3.3118, "step": 15112 }, { "epoch": 0.74, "grad_norm": 0.5424619913101196, "learning_rate": 0.0005141984843754658, "loss": 3.2401, "step": 15113 }, { "epoch": 0.74, "grad_norm": 0.5372337102890015, "learning_rate": 0.0005141877044375705, "loss": 3.2051, "step": 15114 }, { "epoch": 0.74, "grad_norm": 0.5131272673606873, "learning_rate": 0.000514176923935546, "loss": 3.0717, "step": 15115 }, { "epoch": 0.74, "grad_norm": 0.4956575334072113, "learning_rate": 0.0005141661428694206, "loss": 3.1742, "step": 15116 }, { "epoch": 0.74, "grad_norm": 0.5434887409210205, "learning_rate": 0.0005141553612392229, "loss": 3.1218, "step": 15117 }, { "epoch": 0.74, "grad_norm": 0.5086133480072021, "learning_rate": 0.0005141445790449811, "loss": 3.0653, "step": 15118 }, { "epoch": 0.74, "grad_norm": 0.5095211863517761, "learning_rate": 0.0005141337962867238, "loss": 3.2132, "step": 15119 }, { "epoch": 0.74, "grad_norm": 0.5019669532775879, "learning_rate": 0.0005141230129644792, "loss": 3.1334, "step": 15120 }, { "epoch": 0.74, "grad_norm": 0.5907220244407654, "learning_rate": 0.0005141122290782758, "loss": 3.0343, "step": 15121 }, { "epoch": 0.74, "grad_norm": 0.531029462814331, "learning_rate": 0.000514101444628142, "loss": 3.2226, "step": 15122 }, { "epoch": 0.74, "grad_norm": 0.5512033104896545, "learning_rate": 0.0005140906596141063, "loss": 3.3203, "step": 15123 }, { "epoch": 0.74, "grad_norm": 0.5198848843574524, "learning_rate": 0.0005140798740361968, "loss": 3.1245, "step": 15124 }, { "epoch": 0.74, "grad_norm": 0.5568894147872925, "learning_rate": 0.0005140690878944423, "loss": 3.2085, "step": 15125 }, { "epoch": 0.74, "grad_norm": 0.5185664296150208, "learning_rate": 0.0005140583011888709, "loss": 3.3906, "step": 15126 }, { "epoch": 0.74, "grad_norm": 0.5689828991889954, "learning_rate": 0.0005140475139195112, "loss": 3.0878, "step": 15127 }, { "epoch": 0.74, "grad_norm": 0.5163256525993347, "learning_rate": 0.0005140367260863916, "loss": 3.3418, "step": 15128 }, { "epoch": 0.74, "grad_norm": 0.487690269947052, "learning_rate": 0.0005140259376895404, "loss": 3.4939, "step": 15129 }, { "epoch": 0.74, "grad_norm": 0.5142568349838257, "learning_rate": 0.000514015148728986, "loss": 3.1635, "step": 15130 }, { "epoch": 0.74, "grad_norm": 0.5546401739120483, "learning_rate": 0.000514004359204757, "loss": 3.1504, "step": 15131 }, { "epoch": 0.74, "grad_norm": 0.5494998693466187, "learning_rate": 0.0005139935691168816, "loss": 3.1133, "step": 15132 }, { "epoch": 0.74, "grad_norm": 0.5090645551681519, "learning_rate": 0.0005139827784653884, "loss": 3.1497, "step": 15133 }, { "epoch": 0.74, "grad_norm": 0.5398714542388916, "learning_rate": 0.0005139719872503057, "loss": 3.2604, "step": 15134 }, { "epoch": 0.74, "grad_norm": 0.5296069383621216, "learning_rate": 0.0005139611954716619, "loss": 3.4238, "step": 15135 }, { "epoch": 0.74, "grad_norm": 0.507793664932251, "learning_rate": 0.0005139504031294855, "loss": 3.1279, "step": 15136 }, { "epoch": 0.74, "grad_norm": 0.5212898850440979, "learning_rate": 0.000513939610223805, "loss": 3.2929, "step": 15137 }, { "epoch": 0.74, "grad_norm": 0.5241724848747253, "learning_rate": 0.0005139288167546487, "loss": 3.2657, "step": 15138 }, { "epoch": 0.74, "grad_norm": 0.5247151255607605, "learning_rate": 0.0005139180227220451, "loss": 3.2378, "step": 15139 }, { "epoch": 0.74, "grad_norm": 0.4941554665565491, "learning_rate": 0.0005139072281260226, "loss": 3.0256, "step": 15140 }, { "epoch": 0.74, "grad_norm": 0.5236513018608093, "learning_rate": 0.0005138964329666096, "loss": 3.1188, "step": 15141 }, { "epoch": 0.74, "grad_norm": 0.5333306789398193, "learning_rate": 0.0005138856372438347, "loss": 3.3072, "step": 15142 }, { "epoch": 0.74, "grad_norm": 0.5609138607978821, "learning_rate": 0.000513874840957726, "loss": 3.1602, "step": 15143 }, { "epoch": 0.74, "grad_norm": 0.49884551763534546, "learning_rate": 0.0005138640441083122, "loss": 3.1501, "step": 15144 }, { "epoch": 0.74, "grad_norm": 0.5260482430458069, "learning_rate": 0.0005138532466956216, "loss": 3.3145, "step": 15145 }, { "epoch": 0.74, "grad_norm": 0.543743908405304, "learning_rate": 0.0005138424487196829, "loss": 3.0801, "step": 15146 }, { "epoch": 0.74, "grad_norm": 0.5195195078849792, "learning_rate": 0.0005138316501805242, "loss": 3.2929, "step": 15147 }, { "epoch": 0.74, "grad_norm": 0.5003941655158997, "learning_rate": 0.0005138208510781741, "loss": 3.1971, "step": 15148 }, { "epoch": 0.74, "grad_norm": 0.5132508873939514, "learning_rate": 0.000513810051412661, "loss": 3.271, "step": 15149 }, { "epoch": 0.74, "grad_norm": 0.5287436246871948, "learning_rate": 0.0005137992511840134, "loss": 3.1374, "step": 15150 }, { "epoch": 0.74, "grad_norm": 0.5131794810295105, "learning_rate": 0.0005137884503922597, "loss": 3.1865, "step": 15151 }, { "epoch": 0.74, "grad_norm": 0.4891071021556854, "learning_rate": 0.0005137776490374284, "loss": 3.1813, "step": 15152 }, { "epoch": 0.74, "grad_norm": 0.518698513507843, "learning_rate": 0.0005137668471195478, "loss": 2.9667, "step": 15153 }, { "epoch": 0.74, "grad_norm": 0.5069079995155334, "learning_rate": 0.0005137560446386466, "loss": 3.1667, "step": 15154 }, { "epoch": 0.74, "grad_norm": 0.5019563436508179, "learning_rate": 0.0005137452415947531, "loss": 3.048, "step": 15155 }, { "epoch": 0.74, "grad_norm": 0.5977448225021362, "learning_rate": 0.0005137344379878958, "loss": 3.0489, "step": 15156 }, { "epoch": 0.74, "grad_norm": 0.526675283908844, "learning_rate": 0.000513723633818103, "loss": 3.2784, "step": 15157 }, { "epoch": 0.74, "grad_norm": 0.5488605499267578, "learning_rate": 0.0005137128290854035, "loss": 3.3835, "step": 15158 }, { "epoch": 0.74, "grad_norm": 0.5367397665977478, "learning_rate": 0.0005137020237898254, "loss": 3.3152, "step": 15159 }, { "epoch": 0.74, "grad_norm": 0.5273241400718689, "learning_rate": 0.0005136912179313973, "loss": 2.9809, "step": 15160 }, { "epoch": 0.74, "grad_norm": 0.555223822593689, "learning_rate": 0.0005136804115101475, "loss": 3.1469, "step": 15161 }, { "epoch": 0.74, "grad_norm": 0.533078670501709, "learning_rate": 0.0005136696045261049, "loss": 3.1378, "step": 15162 }, { "epoch": 0.74, "grad_norm": 0.5096789598464966, "learning_rate": 0.0005136587969792975, "loss": 3.2697, "step": 15163 }, { "epoch": 0.74, "grad_norm": 0.4871804416179657, "learning_rate": 0.000513647988869754, "loss": 3.1497, "step": 15164 }, { "epoch": 0.74, "grad_norm": 0.47410327196121216, "learning_rate": 0.0005136371801975028, "loss": 3.0985, "step": 15165 }, { "epoch": 0.74, "grad_norm": 0.5203956961631775, "learning_rate": 0.0005136263709625724, "loss": 3.1768, "step": 15166 }, { "epoch": 0.74, "grad_norm": 0.5182998180389404, "learning_rate": 0.0005136155611649912, "loss": 3.0054, "step": 15167 }, { "epoch": 0.74, "grad_norm": 0.5807198882102966, "learning_rate": 0.0005136047508047879, "loss": 3.1998, "step": 15168 }, { "epoch": 0.74, "grad_norm": 0.49342507123947144, "learning_rate": 0.0005135939398819906, "loss": 3.1338, "step": 15169 }, { "epoch": 0.74, "grad_norm": 0.5400848984718323, "learning_rate": 0.000513583128396628, "loss": 3.1402, "step": 15170 }, { "epoch": 0.74, "grad_norm": 0.7351841926574707, "learning_rate": 0.0005135723163487286, "loss": 3.0862, "step": 15171 }, { "epoch": 0.74, "grad_norm": 0.601331353187561, "learning_rate": 0.0005135615037383209, "loss": 3.1345, "step": 15172 }, { "epoch": 0.74, "grad_norm": 0.5336458086967468, "learning_rate": 0.0005135506905654331, "loss": 3.0461, "step": 15173 }, { "epoch": 0.74, "grad_norm": 0.5114117860794067, "learning_rate": 0.000513539876830094, "loss": 3.3398, "step": 15174 }, { "epoch": 0.74, "grad_norm": 0.521041750907898, "learning_rate": 0.0005135290625323319, "loss": 3.2127, "step": 15175 }, { "epoch": 0.74, "grad_norm": 0.5434523224830627, "learning_rate": 0.0005135182476721754, "loss": 3.2585, "step": 15176 }, { "epoch": 0.74, "grad_norm": 0.556186854839325, "learning_rate": 0.0005135074322496529, "loss": 3.1948, "step": 15177 }, { "epoch": 0.74, "grad_norm": 0.540410041809082, "learning_rate": 0.0005134966162647929, "loss": 2.9405, "step": 15178 }, { "epoch": 0.74, "grad_norm": 0.540989100933075, "learning_rate": 0.0005134857997176241, "loss": 3.3179, "step": 15179 }, { "epoch": 0.74, "grad_norm": 0.5502685308456421, "learning_rate": 0.0005134749826081745, "loss": 2.9779, "step": 15180 }, { "epoch": 0.74, "grad_norm": 0.5441862344741821, "learning_rate": 0.000513464164936473, "loss": 3.2497, "step": 15181 }, { "epoch": 0.74, "grad_norm": 0.5529954433441162, "learning_rate": 0.0005134533467025479, "loss": 3.1787, "step": 15182 }, { "epoch": 0.74, "grad_norm": 0.5215994715690613, "learning_rate": 0.0005134425279064279, "loss": 3.2406, "step": 15183 }, { "epoch": 0.74, "grad_norm": 0.4941001832485199, "learning_rate": 0.0005134317085481413, "loss": 2.8891, "step": 15184 }, { "epoch": 0.74, "grad_norm": 0.5015299320220947, "learning_rate": 0.0005134208886277167, "loss": 3.1398, "step": 15185 }, { "epoch": 0.74, "grad_norm": 0.4704887866973877, "learning_rate": 0.0005134100681451825, "loss": 3.2913, "step": 15186 }, { "epoch": 0.74, "grad_norm": 0.4975797235965729, "learning_rate": 0.0005133992471005672, "loss": 3.055, "step": 15187 }, { "epoch": 0.74, "grad_norm": 0.5469768047332764, "learning_rate": 0.0005133884254938994, "loss": 3.1481, "step": 15188 }, { "epoch": 0.74, "grad_norm": 0.5250169634819031, "learning_rate": 0.0005133776033252076, "loss": 3.2758, "step": 15189 }, { "epoch": 0.74, "grad_norm": 0.5461866855621338, "learning_rate": 0.0005133667805945202, "loss": 3.2773, "step": 15190 }, { "epoch": 0.74, "grad_norm": 0.501222550868988, "learning_rate": 0.0005133559573018658, "loss": 3.1817, "step": 15191 }, { "epoch": 0.74, "grad_norm": 0.5163980722427368, "learning_rate": 0.0005133451334472729, "loss": 3.3022, "step": 15192 }, { "epoch": 0.74, "grad_norm": 0.49753910303115845, "learning_rate": 0.0005133343090307699, "loss": 3.3661, "step": 15193 }, { "epoch": 0.74, "grad_norm": 0.5362524390220642, "learning_rate": 0.0005133234840523854, "loss": 3.2704, "step": 15194 }, { "epoch": 0.74, "grad_norm": 0.5319569110870361, "learning_rate": 0.000513312658512148, "loss": 3.2043, "step": 15195 }, { "epoch": 0.74, "grad_norm": 0.5191274881362915, "learning_rate": 0.0005133018324100859, "loss": 3.3014, "step": 15196 }, { "epoch": 0.74, "grad_norm": 0.6559003591537476, "learning_rate": 0.000513291005746228, "loss": 3.253, "step": 15197 }, { "epoch": 0.74, "grad_norm": 0.5224505066871643, "learning_rate": 0.0005132801785206026, "loss": 3.2519, "step": 15198 }, { "epoch": 0.74, "grad_norm": 0.5321853756904602, "learning_rate": 0.0005132693507332383, "loss": 2.9671, "step": 15199 }, { "epoch": 0.74, "grad_norm": 0.5000855326652527, "learning_rate": 0.0005132585223841635, "loss": 3.1668, "step": 15200 }, { "epoch": 0.74, "grad_norm": 0.5224664211273193, "learning_rate": 0.0005132476934734068, "loss": 3.2821, "step": 15201 }, { "epoch": 0.75, "grad_norm": 0.4886454641819, "learning_rate": 0.0005132368640009968, "loss": 3.0804, "step": 15202 }, { "epoch": 0.75, "grad_norm": 0.5113134384155273, "learning_rate": 0.0005132260339669618, "loss": 3.0912, "step": 15203 }, { "epoch": 0.75, "grad_norm": 0.5165402293205261, "learning_rate": 0.0005132152033713305, "loss": 3.1487, "step": 15204 }, { "epoch": 0.75, "grad_norm": 0.5087444186210632, "learning_rate": 0.0005132043722141314, "loss": 3.0623, "step": 15205 }, { "epoch": 0.75, "grad_norm": 0.49701055884361267, "learning_rate": 0.0005131935404953931, "loss": 3.1004, "step": 15206 }, { "epoch": 0.75, "grad_norm": 0.5303621888160706, "learning_rate": 0.000513182708215144, "loss": 3.1908, "step": 15207 }, { "epoch": 0.75, "grad_norm": 0.48327744007110596, "learning_rate": 0.0005131718753734127, "loss": 3.1967, "step": 15208 }, { "epoch": 0.75, "grad_norm": 0.5533356666564941, "learning_rate": 0.0005131610419702276, "loss": 3.2278, "step": 15209 }, { "epoch": 0.75, "grad_norm": 0.5282249450683594, "learning_rate": 0.0005131502080056174, "loss": 3.2681, "step": 15210 }, { "epoch": 0.75, "grad_norm": 0.5078888535499573, "learning_rate": 0.0005131393734796106, "loss": 3.3327, "step": 15211 }, { "epoch": 0.75, "grad_norm": 0.49346816539764404, "learning_rate": 0.0005131285383922357, "loss": 3.1853, "step": 15212 }, { "epoch": 0.75, "grad_norm": 0.5162724256515503, "learning_rate": 0.0005131177027435213, "loss": 3.0953, "step": 15213 }, { "epoch": 0.75, "grad_norm": 0.5229780673980713, "learning_rate": 0.0005131068665334957, "loss": 3.0968, "step": 15214 }, { "epoch": 0.75, "grad_norm": 0.5405413508415222, "learning_rate": 0.0005130960297621877, "loss": 2.9953, "step": 15215 }, { "epoch": 0.75, "grad_norm": 0.5310748219490051, "learning_rate": 0.000513085192429626, "loss": 3.1261, "step": 15216 }, { "epoch": 0.75, "grad_norm": 0.5561037659645081, "learning_rate": 0.0005130743545358386, "loss": 3.3495, "step": 15217 }, { "epoch": 0.75, "grad_norm": 0.5247962474822998, "learning_rate": 0.0005130635160808545, "loss": 3.3857, "step": 15218 }, { "epoch": 0.75, "grad_norm": 0.5569313168525696, "learning_rate": 0.0005130526770647021, "loss": 3.169, "step": 15219 }, { "epoch": 0.75, "grad_norm": 0.516389012336731, "learning_rate": 0.0005130418374874099, "loss": 3.2518, "step": 15220 }, { "epoch": 0.75, "grad_norm": 0.5360453128814697, "learning_rate": 0.0005130309973490066, "loss": 3.1335, "step": 15221 }, { "epoch": 0.75, "grad_norm": 0.4962994158267975, "learning_rate": 0.0005130201566495206, "loss": 3.3244, "step": 15222 }, { "epoch": 0.75, "grad_norm": 0.5042166113853455, "learning_rate": 0.0005130093153889805, "loss": 3.1213, "step": 15223 }, { "epoch": 0.75, "grad_norm": 0.504375696182251, "learning_rate": 0.0005129984735674149, "loss": 3.1488, "step": 15224 }, { "epoch": 0.75, "grad_norm": 0.5094754695892334, "learning_rate": 0.0005129876311848522, "loss": 3.4237, "step": 15225 }, { "epoch": 0.75, "grad_norm": 0.49719002842903137, "learning_rate": 0.0005129767882413211, "loss": 3.2614, "step": 15226 }, { "epoch": 0.75, "grad_norm": 0.5632519721984863, "learning_rate": 0.0005129659447368502, "loss": 3.1523, "step": 15227 }, { "epoch": 0.75, "grad_norm": 0.5078555345535278, "learning_rate": 0.0005129551006714678, "loss": 3.2035, "step": 15228 }, { "epoch": 0.75, "grad_norm": 0.5147547721862793, "learning_rate": 0.0005129442560452029, "loss": 3.3097, "step": 15229 }, { "epoch": 0.75, "grad_norm": 0.5100898742675781, "learning_rate": 0.0005129334108580837, "loss": 3.2415, "step": 15230 }, { "epoch": 0.75, "grad_norm": 0.49058064818382263, "learning_rate": 0.0005129225651101389, "loss": 3.1857, "step": 15231 }, { "epoch": 0.75, "grad_norm": 0.4862991273403168, "learning_rate": 0.0005129117188013971, "loss": 3.4175, "step": 15232 }, { "epoch": 0.75, "grad_norm": 0.5109384059906006, "learning_rate": 0.0005129008719318867, "loss": 3.3401, "step": 15233 }, { "epoch": 0.75, "grad_norm": 0.5978837013244629, "learning_rate": 0.0005128900245016365, "loss": 2.9597, "step": 15234 }, { "epoch": 0.75, "grad_norm": 0.5241730809211731, "learning_rate": 0.0005128791765106747, "loss": 3.3489, "step": 15235 }, { "epoch": 0.75, "grad_norm": 0.544468104839325, "learning_rate": 0.0005128683279590304, "loss": 3.0905, "step": 15236 }, { "epoch": 0.75, "grad_norm": 0.5095611214637756, "learning_rate": 0.0005128574788467317, "loss": 3.2927, "step": 15237 }, { "epoch": 0.75, "grad_norm": 0.6200950741767883, "learning_rate": 0.0005128466291738074, "loss": 3.3109, "step": 15238 }, { "epoch": 0.75, "grad_norm": 0.5368150472640991, "learning_rate": 0.0005128357789402861, "loss": 3.004, "step": 15239 }, { "epoch": 0.75, "grad_norm": 0.49415868520736694, "learning_rate": 0.0005128249281461963, "loss": 3.2525, "step": 15240 }, { "epoch": 0.75, "grad_norm": 0.5034611821174622, "learning_rate": 0.0005128140767915666, "loss": 3.0394, "step": 15241 }, { "epoch": 0.75, "grad_norm": 0.5524153709411621, "learning_rate": 0.0005128032248764256, "loss": 3.0271, "step": 15242 }, { "epoch": 0.75, "grad_norm": 0.537729024887085, "learning_rate": 0.0005127923724008018, "loss": 3.3173, "step": 15243 }, { "epoch": 0.75, "grad_norm": 0.5131357908248901, "learning_rate": 0.0005127815193647239, "loss": 2.9923, "step": 15244 }, { "epoch": 0.75, "grad_norm": 0.546363353729248, "learning_rate": 0.0005127706657682205, "loss": 2.9208, "step": 15245 }, { "epoch": 0.75, "grad_norm": 0.565846860408783, "learning_rate": 0.00051275981161132, "loss": 3.3611, "step": 15246 }, { "epoch": 0.75, "grad_norm": 0.5199288129806519, "learning_rate": 0.0005127489568940511, "loss": 3.185, "step": 15247 }, { "epoch": 0.75, "grad_norm": 0.5325568318367004, "learning_rate": 0.0005127381016164425, "loss": 3.0524, "step": 15248 }, { "epoch": 0.75, "grad_norm": 0.5222455263137817, "learning_rate": 0.0005127272457785225, "loss": 3.0541, "step": 15249 }, { "epoch": 0.75, "grad_norm": 0.5106666684150696, "learning_rate": 0.00051271638938032, "loss": 3.2104, "step": 15250 }, { "epoch": 0.75, "grad_norm": 0.5324909090995789, "learning_rate": 0.0005127055324218635, "loss": 3.2489, "step": 15251 }, { "epoch": 0.75, "grad_norm": 0.5093536376953125, "learning_rate": 0.0005126946749031814, "loss": 3.1664, "step": 15252 }, { "epoch": 0.75, "grad_norm": 0.5190713405609131, "learning_rate": 0.0005126838168243026, "loss": 3.1126, "step": 15253 }, { "epoch": 0.75, "grad_norm": 0.6075634360313416, "learning_rate": 0.0005126729581852556, "loss": 3.4352, "step": 15254 }, { "epoch": 0.75, "grad_norm": 0.5500290393829346, "learning_rate": 0.0005126620989860688, "loss": 3.1548, "step": 15255 }, { "epoch": 0.75, "grad_norm": 0.5063414573669434, "learning_rate": 0.000512651239226771, "loss": 3.2785, "step": 15256 }, { "epoch": 0.75, "grad_norm": 0.5023425817489624, "learning_rate": 0.0005126403789073909, "loss": 3.2736, "step": 15257 }, { "epoch": 0.75, "grad_norm": 0.5160260796546936, "learning_rate": 0.0005126295180279568, "loss": 3.3132, "step": 15258 }, { "epoch": 0.75, "grad_norm": 0.50048828125, "learning_rate": 0.0005126186565884975, "loss": 3.1487, "step": 15259 }, { "epoch": 0.75, "grad_norm": 0.49590444564819336, "learning_rate": 0.0005126077945890417, "loss": 3.2356, "step": 15260 }, { "epoch": 0.75, "grad_norm": 0.5084480047225952, "learning_rate": 0.0005125969320296178, "loss": 3.2633, "step": 15261 }, { "epoch": 0.75, "grad_norm": 0.5234677195549011, "learning_rate": 0.0005125860689102544, "loss": 3.0592, "step": 15262 }, { "epoch": 0.75, "grad_norm": 0.5146345496177673, "learning_rate": 0.0005125752052309805, "loss": 3.3137, "step": 15263 }, { "epoch": 0.75, "grad_norm": 0.5760210752487183, "learning_rate": 0.0005125643409918242, "loss": 3.3984, "step": 15264 }, { "epoch": 0.75, "grad_norm": 0.5140902996063232, "learning_rate": 0.0005125534761928144, "loss": 3.307, "step": 15265 }, { "epoch": 0.75, "grad_norm": 0.5068773031234741, "learning_rate": 0.0005125426108339795, "loss": 3.0952, "step": 15266 }, { "epoch": 0.75, "grad_norm": 0.5212466716766357, "learning_rate": 0.0005125317449153484, "loss": 3.1379, "step": 15267 }, { "epoch": 0.75, "grad_norm": 0.5364983677864075, "learning_rate": 0.0005125208784369495, "loss": 3.2761, "step": 15268 }, { "epoch": 0.75, "grad_norm": 0.4984794557094574, "learning_rate": 0.0005125100113988117, "loss": 3.1004, "step": 15269 }, { "epoch": 0.75, "grad_norm": 0.5525820851325989, "learning_rate": 0.0005124991438009632, "loss": 3.2194, "step": 15270 }, { "epoch": 0.75, "grad_norm": 0.5691623091697693, "learning_rate": 0.000512488275643433, "loss": 3.1136, "step": 15271 }, { "epoch": 0.75, "grad_norm": 0.5180755853652954, "learning_rate": 0.0005124774069262494, "loss": 3.2401, "step": 15272 }, { "epoch": 0.75, "grad_norm": 0.5265724062919617, "learning_rate": 0.0005124665376494414, "loss": 3.2386, "step": 15273 }, { "epoch": 0.75, "grad_norm": 0.48479267954826355, "learning_rate": 0.0005124556678130374, "loss": 3.1576, "step": 15274 }, { "epoch": 0.75, "grad_norm": 0.5298726558685303, "learning_rate": 0.000512444797417066, "loss": 2.8294, "step": 15275 }, { "epoch": 0.75, "grad_norm": 0.509896457195282, "learning_rate": 0.000512433926461556, "loss": 3.0053, "step": 15276 }, { "epoch": 0.75, "grad_norm": 0.5217825174331665, "learning_rate": 0.0005124230549465357, "loss": 3.2738, "step": 15277 }, { "epoch": 0.75, "grad_norm": 0.4862534999847412, "learning_rate": 0.0005124121828720341, "loss": 3.2049, "step": 15278 }, { "epoch": 0.75, "grad_norm": 0.5458556413650513, "learning_rate": 0.0005124013102380797, "loss": 3.2185, "step": 15279 }, { "epoch": 0.75, "grad_norm": 0.5805124044418335, "learning_rate": 0.0005123904370447011, "loss": 3.0137, "step": 15280 }, { "epoch": 0.75, "grad_norm": 0.521308958530426, "learning_rate": 0.000512379563291927, "loss": 3.1837, "step": 15281 }, { "epoch": 0.75, "grad_norm": 0.5270718336105347, "learning_rate": 0.000512368688979786, "loss": 3.4552, "step": 15282 }, { "epoch": 0.75, "grad_norm": 0.504327654838562, "learning_rate": 0.0005123578141083067, "loss": 3.405, "step": 15283 }, { "epoch": 0.75, "grad_norm": 0.49246448278427124, "learning_rate": 0.0005123469386775178, "loss": 3.1681, "step": 15284 }, { "epoch": 0.75, "grad_norm": 0.5137251615524292, "learning_rate": 0.0005123360626874479, "loss": 3.4152, "step": 15285 }, { "epoch": 0.75, "grad_norm": 0.477211058139801, "learning_rate": 0.0005123251861381257, "loss": 2.988, "step": 15286 }, { "epoch": 0.75, "grad_norm": 0.48780468106269836, "learning_rate": 0.0005123143090295799, "loss": 3.2013, "step": 15287 }, { "epoch": 0.75, "grad_norm": 0.535379946231842, "learning_rate": 0.0005123034313618389, "loss": 3.1484, "step": 15288 }, { "epoch": 0.75, "grad_norm": 0.5203052163124084, "learning_rate": 0.0005122925531349317, "loss": 3.1849, "step": 15289 }, { "epoch": 0.75, "grad_norm": 0.5167236924171448, "learning_rate": 0.0005122816743488866, "loss": 3.0851, "step": 15290 }, { "epoch": 0.75, "grad_norm": 0.4870116412639618, "learning_rate": 0.0005122707950037325, "loss": 3.3136, "step": 15291 }, { "epoch": 0.75, "grad_norm": 0.5424923300743103, "learning_rate": 0.0005122599150994981, "loss": 3.1071, "step": 15292 }, { "epoch": 0.75, "grad_norm": 0.5110763311386108, "learning_rate": 0.0005122490346362119, "loss": 3.0956, "step": 15293 }, { "epoch": 0.75, "grad_norm": 0.5034542083740234, "learning_rate": 0.0005122381536139025, "loss": 3.2484, "step": 15294 }, { "epoch": 0.75, "grad_norm": 0.49909499287605286, "learning_rate": 0.0005122272720325986, "loss": 3.314, "step": 15295 }, { "epoch": 0.75, "grad_norm": 0.5280368328094482, "learning_rate": 0.0005122163898923289, "loss": 3.3027, "step": 15296 }, { "epoch": 0.75, "grad_norm": 0.4968806505203247, "learning_rate": 0.0005122055071931222, "loss": 3.2456, "step": 15297 }, { "epoch": 0.75, "grad_norm": 0.5014216899871826, "learning_rate": 0.000512194623935007, "loss": 3.1594, "step": 15298 }, { "epoch": 0.75, "grad_norm": 0.535797655582428, "learning_rate": 0.000512183740118012, "loss": 3.2467, "step": 15299 }, { "epoch": 0.75, "grad_norm": 0.52153080701828, "learning_rate": 0.0005121728557421658, "loss": 2.9868, "step": 15300 }, { "epoch": 0.75, "grad_norm": 0.5368970632553101, "learning_rate": 0.0005121619708074972, "loss": 3.1427, "step": 15301 }, { "epoch": 0.75, "grad_norm": 0.5109313726425171, "learning_rate": 0.0005121510853140348, "loss": 3.0168, "step": 15302 }, { "epoch": 0.75, "grad_norm": 0.5726766586303711, "learning_rate": 0.0005121401992618073, "loss": 3.0998, "step": 15303 }, { "epoch": 0.75, "grad_norm": 0.5439186692237854, "learning_rate": 0.0005121293126508432, "loss": 3.1734, "step": 15304 }, { "epoch": 0.75, "grad_norm": 0.5013107061386108, "learning_rate": 0.0005121184254811714, "loss": 3.3949, "step": 15305 }, { "epoch": 0.75, "grad_norm": 0.5400449633598328, "learning_rate": 0.0005121075377528205, "loss": 3.0781, "step": 15306 }, { "epoch": 0.75, "grad_norm": 0.5805463790893555, "learning_rate": 0.0005120966494658192, "loss": 3.1349, "step": 15307 }, { "epoch": 0.75, "grad_norm": 0.5074591636657715, "learning_rate": 0.0005120857606201961, "loss": 3.3115, "step": 15308 }, { "epoch": 0.75, "grad_norm": 0.5145214200019836, "learning_rate": 0.00051207487121598, "loss": 3.1443, "step": 15309 }, { "epoch": 0.75, "grad_norm": 0.5054406523704529, "learning_rate": 0.0005120639812531995, "loss": 3.4488, "step": 15310 }, { "epoch": 0.75, "grad_norm": 0.5463675856590271, "learning_rate": 0.0005120530907318831, "loss": 2.9445, "step": 15311 }, { "epoch": 0.75, "grad_norm": 0.5270100235939026, "learning_rate": 0.0005120421996520597, "loss": 3.2497, "step": 15312 }, { "epoch": 0.75, "grad_norm": 0.5352219939231873, "learning_rate": 0.0005120313080137581, "loss": 3.1157, "step": 15313 }, { "epoch": 0.75, "grad_norm": 0.550274670124054, "learning_rate": 0.0005120204158170069, "loss": 3.0731, "step": 15314 }, { "epoch": 0.75, "grad_norm": 0.5238516330718994, "learning_rate": 0.0005120095230618345, "loss": 3.4092, "step": 15315 }, { "epoch": 0.75, "grad_norm": 0.4942419230937958, "learning_rate": 0.00051199862974827, "loss": 3.3665, "step": 15316 }, { "epoch": 0.75, "grad_norm": 0.489196240901947, "learning_rate": 0.0005119877358763418, "loss": 3.0608, "step": 15317 }, { "epoch": 0.75, "grad_norm": 0.5123137831687927, "learning_rate": 0.0005119768414460788, "loss": 3.3294, "step": 15318 }, { "epoch": 0.75, "grad_norm": 0.48939448595046997, "learning_rate": 0.0005119659464575096, "loss": 3.3551, "step": 15319 }, { "epoch": 0.75, "grad_norm": 0.5240415930747986, "learning_rate": 0.0005119550509106628, "loss": 3.3512, "step": 15320 }, { "epoch": 0.75, "grad_norm": 0.5232025980949402, "learning_rate": 0.0005119441548055671, "loss": 3.0429, "step": 15321 }, { "epoch": 0.75, "grad_norm": 0.5226197242736816, "learning_rate": 0.0005119332581422515, "loss": 3.2391, "step": 15322 }, { "epoch": 0.75, "grad_norm": 0.5628093481063843, "learning_rate": 0.0005119223609207443, "loss": 3.3304, "step": 15323 }, { "epoch": 0.75, "grad_norm": 0.4734574854373932, "learning_rate": 0.0005119114631410745, "loss": 3.4568, "step": 15324 }, { "epoch": 0.75, "grad_norm": 0.515416145324707, "learning_rate": 0.0005119005648032707, "loss": 3.2631, "step": 15325 }, { "epoch": 0.75, "grad_norm": 0.5349523425102234, "learning_rate": 0.0005118896659073616, "loss": 3.5373, "step": 15326 }, { "epoch": 0.75, "grad_norm": 0.562615156173706, "learning_rate": 0.0005118787664533757, "loss": 3.1333, "step": 15327 }, { "epoch": 0.75, "grad_norm": 0.49806028604507446, "learning_rate": 0.0005118678664413421, "loss": 3.2441, "step": 15328 }, { "epoch": 0.75, "grad_norm": 0.49525222182273865, "learning_rate": 0.0005118569658712893, "loss": 3.1967, "step": 15329 }, { "epoch": 0.75, "grad_norm": 0.548129141330719, "learning_rate": 0.0005118460647432461, "loss": 3.159, "step": 15330 }, { "epoch": 0.75, "grad_norm": 0.5434116721153259, "learning_rate": 0.000511835163057241, "loss": 3.2088, "step": 15331 }, { "epoch": 0.75, "grad_norm": 0.5034930109977722, "learning_rate": 0.0005118242608133028, "loss": 3.3135, "step": 15332 }, { "epoch": 0.75, "grad_norm": 0.5119454264640808, "learning_rate": 0.0005118133580114604, "loss": 3.1451, "step": 15333 }, { "epoch": 0.75, "grad_norm": 0.5325192213058472, "learning_rate": 0.0005118024546517424, "loss": 3.1097, "step": 15334 }, { "epoch": 0.75, "grad_norm": 0.5029995441436768, "learning_rate": 0.0005117915507341774, "loss": 3.3235, "step": 15335 }, { "epoch": 0.75, "grad_norm": 0.5167403221130371, "learning_rate": 0.0005117806462587942, "loss": 3.1699, "step": 15336 }, { "epoch": 0.75, "grad_norm": 0.5161690711975098, "learning_rate": 0.0005117697412256216, "loss": 3.3004, "step": 15337 }, { "epoch": 0.75, "grad_norm": 0.5440047383308411, "learning_rate": 0.0005117588356346881, "loss": 3.187, "step": 15338 }, { "epoch": 0.75, "grad_norm": 0.5046606063842773, "learning_rate": 0.0005117479294860228, "loss": 3.1708, "step": 15339 }, { "epoch": 0.75, "grad_norm": 0.5875895023345947, "learning_rate": 0.0005117370227796542, "loss": 3.21, "step": 15340 }, { "epoch": 0.75, "grad_norm": 0.5174129605293274, "learning_rate": 0.0005117261155156109, "loss": 3.0799, "step": 15341 }, { "epoch": 0.75, "grad_norm": 0.5070399045944214, "learning_rate": 0.0005117152076939218, "loss": 3.294, "step": 15342 }, { "epoch": 0.75, "grad_norm": 0.5156857967376709, "learning_rate": 0.0005117042993146156, "loss": 3.1872, "step": 15343 }, { "epoch": 0.75, "grad_norm": 0.5003373026847839, "learning_rate": 0.000511693390377721, "loss": 2.9625, "step": 15344 }, { "epoch": 0.75, "grad_norm": 0.5173194408416748, "learning_rate": 0.0005116824808832668, "loss": 3.0713, "step": 15345 }, { "epoch": 0.75, "grad_norm": 0.5497887134552002, "learning_rate": 0.0005116715708312817, "loss": 3.2665, "step": 15346 }, { "epoch": 0.75, "grad_norm": 0.5368115305900574, "learning_rate": 0.0005116606602217944, "loss": 3.3243, "step": 15347 }, { "epoch": 0.75, "grad_norm": 0.5352510213851929, "learning_rate": 0.0005116497490548335, "loss": 3.0809, "step": 15348 }, { "epoch": 0.75, "grad_norm": 0.5086609721183777, "learning_rate": 0.0005116388373304281, "loss": 3.2868, "step": 15349 }, { "epoch": 0.75, "grad_norm": 0.5148516297340393, "learning_rate": 0.0005116279250486067, "loss": 3.1258, "step": 15350 }, { "epoch": 0.75, "grad_norm": 0.5165446996688843, "learning_rate": 0.000511617012209398, "loss": 3.2221, "step": 15351 }, { "epoch": 0.75, "grad_norm": 0.5249726176261902, "learning_rate": 0.0005116060988128308, "loss": 3.2277, "step": 15352 }, { "epoch": 0.75, "grad_norm": 0.5012349486351013, "learning_rate": 0.0005115951848589339, "loss": 3.4042, "step": 15353 }, { "epoch": 0.75, "grad_norm": 0.5562968850135803, "learning_rate": 0.0005115842703477361, "loss": 3.1529, "step": 15354 }, { "epoch": 0.75, "grad_norm": 0.49537578225135803, "learning_rate": 0.0005115733552792659, "loss": 3.0784, "step": 15355 }, { "epoch": 0.75, "grad_norm": 0.5626443028450012, "learning_rate": 0.0005115624396535522, "loss": 3.1834, "step": 15356 }, { "epoch": 0.75, "grad_norm": 0.5942702889442444, "learning_rate": 0.0005115515234706238, "loss": 2.9109, "step": 15357 }, { "epoch": 0.75, "grad_norm": 0.5349305272102356, "learning_rate": 0.0005115406067305095, "loss": 3.0626, "step": 15358 }, { "epoch": 0.75, "grad_norm": 0.4917951822280884, "learning_rate": 0.0005115296894332379, "loss": 3.3152, "step": 15359 }, { "epoch": 0.75, "grad_norm": 0.5094746351242065, "learning_rate": 0.0005115187715788377, "loss": 3.1875, "step": 15360 }, { "epoch": 0.75, "grad_norm": 0.5121936798095703, "learning_rate": 0.0005115078531673379, "loss": 3.3627, "step": 15361 }, { "epoch": 0.75, "grad_norm": 0.5854499340057373, "learning_rate": 0.000511496934198767, "loss": 3.2952, "step": 15362 }, { "epoch": 0.75, "grad_norm": 0.5319592952728271, "learning_rate": 0.000511486014673154, "loss": 3.2366, "step": 15363 }, { "epoch": 0.75, "grad_norm": 0.553443193435669, "learning_rate": 0.0005114750945905275, "loss": 3.0643, "step": 15364 }, { "epoch": 0.75, "grad_norm": 0.5256295204162598, "learning_rate": 0.0005114641739509162, "loss": 3.1867, "step": 15365 }, { "epoch": 0.75, "grad_norm": 0.5130583643913269, "learning_rate": 0.0005114532527543492, "loss": 3.2345, "step": 15366 }, { "epoch": 0.75, "grad_norm": 0.5329893231391907, "learning_rate": 0.0005114423310008547, "loss": 3.4116, "step": 15367 }, { "epoch": 0.75, "grad_norm": 0.5318312644958496, "learning_rate": 0.000511431408690462, "loss": 3.2325, "step": 15368 }, { "epoch": 0.75, "grad_norm": 0.5396196842193604, "learning_rate": 0.0005114204858231997, "loss": 3.1291, "step": 15369 }, { "epoch": 0.75, "grad_norm": 0.4775935113430023, "learning_rate": 0.0005114095623990964, "loss": 3.3107, "step": 15370 }, { "epoch": 0.75, "grad_norm": 0.596052348613739, "learning_rate": 0.000511398638418181, "loss": 3.3655, "step": 15371 }, { "epoch": 0.75, "grad_norm": 0.5218362212181091, "learning_rate": 0.0005113877138804824, "loss": 3.0283, "step": 15372 }, { "epoch": 0.75, "grad_norm": 0.5000458359718323, "learning_rate": 0.0005113767887860291, "loss": 3.2683, "step": 15373 }, { "epoch": 0.75, "grad_norm": 0.5197030901908875, "learning_rate": 0.0005113658631348501, "loss": 3.058, "step": 15374 }, { "epoch": 0.75, "grad_norm": 0.5189664363861084, "learning_rate": 0.000511354936926974, "loss": 3.3382, "step": 15375 }, { "epoch": 0.75, "grad_norm": 0.571043848991394, "learning_rate": 0.0005113440101624299, "loss": 3.0839, "step": 15376 }, { "epoch": 0.75, "grad_norm": 0.5379744172096252, "learning_rate": 0.0005113330828412461, "loss": 3.1519, "step": 15377 }, { "epoch": 0.75, "grad_norm": 0.4935210943222046, "learning_rate": 0.0005113221549634517, "loss": 3.1605, "step": 15378 }, { "epoch": 0.75, "grad_norm": 0.5590223670005798, "learning_rate": 0.0005113112265290755, "loss": 3.3283, "step": 15379 }, { "epoch": 0.75, "grad_norm": 0.5487520694732666, "learning_rate": 0.0005113002975381462, "loss": 3.4678, "step": 15380 }, { "epoch": 0.75, "grad_norm": 0.5301699638366699, "learning_rate": 0.0005112893679906926, "loss": 3.0123, "step": 15381 }, { "epoch": 0.75, "grad_norm": 0.5726925730705261, "learning_rate": 0.0005112784378867432, "loss": 3.1856, "step": 15382 }, { "epoch": 0.75, "grad_norm": 0.5013753771781921, "learning_rate": 0.0005112675072263273, "loss": 3.1004, "step": 15383 }, { "epoch": 0.75, "grad_norm": 0.5306413173675537, "learning_rate": 0.0005112565760094734, "loss": 3.1682, "step": 15384 }, { "epoch": 0.75, "grad_norm": 0.4865894019603729, "learning_rate": 0.0005112456442362104, "loss": 3.1465, "step": 15385 }, { "epoch": 0.75, "grad_norm": 0.5361476540565491, "learning_rate": 0.0005112347119065669, "loss": 3.5116, "step": 15386 }, { "epoch": 0.75, "grad_norm": 0.5227615833282471, "learning_rate": 0.0005112237790205719, "loss": 3.2517, "step": 15387 }, { "epoch": 0.75, "grad_norm": 0.5013512969017029, "learning_rate": 0.000511212845578254, "loss": 2.9083, "step": 15388 }, { "epoch": 0.75, "grad_norm": 0.5280064940452576, "learning_rate": 0.0005112019115796424, "loss": 3.3894, "step": 15389 }, { "epoch": 0.75, "grad_norm": 0.5008797645568848, "learning_rate": 0.0005111909770247653, "loss": 3.4333, "step": 15390 }, { "epoch": 0.75, "grad_norm": 0.5193240642547607, "learning_rate": 0.000511180041913652, "loss": 3.1138, "step": 15391 }, { "epoch": 0.75, "grad_norm": 0.49930593371391296, "learning_rate": 0.000511169106246331, "loss": 3.3978, "step": 15392 }, { "epoch": 0.75, "grad_norm": 0.48021283745765686, "learning_rate": 0.0005111581700228313, "loss": 3.3122, "step": 15393 }, { "epoch": 0.75, "grad_norm": 0.5200793147087097, "learning_rate": 0.0005111472332431815, "loss": 3.2217, "step": 15394 }, { "epoch": 0.75, "grad_norm": 0.5211922526359558, "learning_rate": 0.0005111362959074106, "loss": 3.1195, "step": 15395 }, { "epoch": 0.75, "grad_norm": 0.5001904368400574, "learning_rate": 0.0005111253580155474, "loss": 3.1291, "step": 15396 }, { "epoch": 0.75, "grad_norm": 0.5240308046340942, "learning_rate": 0.0005111144195676206, "loss": 3.3654, "step": 15397 }, { "epoch": 0.75, "grad_norm": 0.5145796537399292, "learning_rate": 0.0005111034805636589, "loss": 3.1215, "step": 15398 }, { "epoch": 0.75, "grad_norm": 0.517951488494873, "learning_rate": 0.0005110925410036914, "loss": 3.1964, "step": 15399 }, { "epoch": 0.75, "grad_norm": 0.5089027285575867, "learning_rate": 0.0005110816008877468, "loss": 3.2889, "step": 15400 }, { "epoch": 0.75, "grad_norm": 0.5206027626991272, "learning_rate": 0.0005110706602158539, "loss": 3.2054, "step": 15401 }, { "epoch": 0.75, "grad_norm": 0.522180438041687, "learning_rate": 0.0005110597189880414, "loss": 3.2392, "step": 15402 }, { "epoch": 0.75, "grad_norm": 0.5154605507850647, "learning_rate": 0.0005110487772043383, "loss": 3.1462, "step": 15403 }, { "epoch": 0.75, "grad_norm": 0.6345930695533752, "learning_rate": 0.0005110378348647732, "loss": 3.4421, "step": 15404 }, { "epoch": 0.75, "grad_norm": 0.5428985357284546, "learning_rate": 0.0005110268919693752, "loss": 3.1896, "step": 15405 }, { "epoch": 0.76, "grad_norm": 0.5568393468856812, "learning_rate": 0.0005110159485181729, "loss": 3.1683, "step": 15406 }, { "epoch": 0.76, "grad_norm": 0.5200351476669312, "learning_rate": 0.0005110050045111953, "loss": 3.0301, "step": 15407 }, { "epoch": 0.76, "grad_norm": 0.5830309391021729, "learning_rate": 0.000510994059948471, "loss": 3.2447, "step": 15408 }, { "epoch": 0.76, "grad_norm": 0.5186257362365723, "learning_rate": 0.000510983114830029, "loss": 3.4328, "step": 15409 }, { "epoch": 0.76, "grad_norm": 0.5107519626617432, "learning_rate": 0.0005109721691558981, "loss": 3.4096, "step": 15410 }, { "epoch": 0.76, "grad_norm": 0.5244288444519043, "learning_rate": 0.0005109612229261073, "loss": 3.1377, "step": 15411 }, { "epoch": 0.76, "grad_norm": 0.47387999296188354, "learning_rate": 0.000510950276140685, "loss": 3.3397, "step": 15412 }, { "epoch": 0.76, "grad_norm": 0.5340439677238464, "learning_rate": 0.0005109393287996602, "loss": 3.1848, "step": 15413 }, { "epoch": 0.76, "grad_norm": 0.506712794303894, "learning_rate": 0.000510928380903062, "loss": 3.1481, "step": 15414 }, { "epoch": 0.76, "grad_norm": 0.5220345854759216, "learning_rate": 0.0005109174324509189, "loss": 3.0608, "step": 15415 }, { "epoch": 0.76, "grad_norm": 0.48933979868888855, "learning_rate": 0.00051090648344326, "loss": 3.0937, "step": 15416 }, { "epoch": 0.76, "grad_norm": 0.5643341541290283, "learning_rate": 0.0005108955338801139, "loss": 2.9736, "step": 15417 }, { "epoch": 0.76, "grad_norm": 0.5401406288146973, "learning_rate": 0.0005108845837615096, "loss": 3.1143, "step": 15418 }, { "epoch": 0.76, "grad_norm": 0.5696349740028381, "learning_rate": 0.0005108736330874759, "loss": 3.4066, "step": 15419 }, { "epoch": 0.76, "grad_norm": 0.5899490118026733, "learning_rate": 0.0005108626818580415, "loss": 3.0104, "step": 15420 }, { "epoch": 0.76, "grad_norm": 0.515295684337616, "learning_rate": 0.0005108517300732356, "loss": 3.2919, "step": 15421 }, { "epoch": 0.76, "grad_norm": 0.5573397874832153, "learning_rate": 0.0005108407777330867, "loss": 3.061, "step": 15422 }, { "epoch": 0.76, "grad_norm": 0.5408274531364441, "learning_rate": 0.0005108298248376238, "loss": 3.4797, "step": 15423 }, { "epoch": 0.76, "grad_norm": 0.5029387474060059, "learning_rate": 0.0005108188713868758, "loss": 3.2384, "step": 15424 }, { "epoch": 0.76, "grad_norm": 0.4932806193828583, "learning_rate": 0.0005108079173808713, "loss": 3.2024, "step": 15425 }, { "epoch": 0.76, "grad_norm": 0.530828058719635, "learning_rate": 0.0005107969628196393, "loss": 3.1832, "step": 15426 }, { "epoch": 0.76, "grad_norm": 0.528161883354187, "learning_rate": 0.0005107860077032088, "loss": 3.1924, "step": 15427 }, { "epoch": 0.76, "grad_norm": 0.5298112630844116, "learning_rate": 0.0005107750520316085, "loss": 3.2096, "step": 15428 }, { "epoch": 0.76, "grad_norm": 0.5262047648429871, "learning_rate": 0.0005107640958048673, "loss": 3.2419, "step": 15429 }, { "epoch": 0.76, "grad_norm": 0.5328362584114075, "learning_rate": 0.000510753139023014, "loss": 3.0983, "step": 15430 }, { "epoch": 0.76, "grad_norm": 0.5387527346611023, "learning_rate": 0.0005107421816860774, "loss": 3.1221, "step": 15431 }, { "epoch": 0.76, "grad_norm": 0.5194116234779358, "learning_rate": 0.0005107312237940866, "loss": 3.1749, "step": 15432 }, { "epoch": 0.76, "grad_norm": 0.506995677947998, "learning_rate": 0.0005107202653470703, "loss": 3.1372, "step": 15433 }, { "epoch": 0.76, "grad_norm": 0.5215865969657898, "learning_rate": 0.0005107093063450573, "loss": 3.1794, "step": 15434 }, { "epoch": 0.76, "grad_norm": 0.5082940459251404, "learning_rate": 0.0005106983467880765, "loss": 3.0021, "step": 15435 }, { "epoch": 0.76, "grad_norm": 0.5554096102714539, "learning_rate": 0.0005106873866761569, "loss": 3.2256, "step": 15436 }, { "epoch": 0.76, "grad_norm": 0.5048863291740417, "learning_rate": 0.0005106764260093273, "loss": 3.3262, "step": 15437 }, { "epoch": 0.76, "grad_norm": 0.507644534111023, "learning_rate": 0.0005106654647876165, "loss": 3.1868, "step": 15438 }, { "epoch": 0.76, "grad_norm": 0.5397703051567078, "learning_rate": 0.0005106545030110533, "loss": 3.0611, "step": 15439 }, { "epoch": 0.76, "grad_norm": 0.524456262588501, "learning_rate": 0.0005106435406796668, "loss": 3.3257, "step": 15440 }, { "epoch": 0.76, "grad_norm": 0.49036774039268494, "learning_rate": 0.0005106325777934857, "loss": 3.5181, "step": 15441 }, { "epoch": 0.76, "grad_norm": 0.5018916726112366, "learning_rate": 0.000510621614352539, "loss": 3.1098, "step": 15442 }, { "epoch": 0.76, "grad_norm": 0.5243989825248718, "learning_rate": 0.0005106106503568555, "loss": 3.1468, "step": 15443 }, { "epoch": 0.76, "grad_norm": 0.5382325649261475, "learning_rate": 0.000510599685806464, "loss": 3.2289, "step": 15444 }, { "epoch": 0.76, "grad_norm": 0.5662506222724915, "learning_rate": 0.0005105887207013934, "loss": 3.1284, "step": 15445 }, { "epoch": 0.76, "grad_norm": 0.5015741586685181, "learning_rate": 0.0005105777550416728, "loss": 3.3105, "step": 15446 }, { "epoch": 0.76, "grad_norm": 0.5282851457595825, "learning_rate": 0.0005105667888273309, "loss": 3.2448, "step": 15447 }, { "epoch": 0.76, "grad_norm": 0.5204810500144958, "learning_rate": 0.0005105558220583965, "loss": 3.2382, "step": 15448 }, { "epoch": 0.76, "grad_norm": 0.5286573171615601, "learning_rate": 0.0005105448547348986, "loss": 3.2456, "step": 15449 }, { "epoch": 0.76, "grad_norm": 0.5265070199966431, "learning_rate": 0.0005105338868568661, "loss": 3.2492, "step": 15450 }, { "epoch": 0.76, "grad_norm": 0.5228598713874817, "learning_rate": 0.0005105229184243279, "loss": 3.1497, "step": 15451 }, { "epoch": 0.76, "grad_norm": 0.5335772633552551, "learning_rate": 0.0005105119494373128, "loss": 3.2094, "step": 15452 }, { "epoch": 0.76, "grad_norm": 0.4976807236671448, "learning_rate": 0.0005105009798958498, "loss": 3.1789, "step": 15453 }, { "epoch": 0.76, "grad_norm": 0.5059366822242737, "learning_rate": 0.0005104900097999676, "loss": 3.2996, "step": 15454 }, { "epoch": 0.76, "grad_norm": 0.4928973913192749, "learning_rate": 0.0005104790391496953, "loss": 3.2344, "step": 15455 }, { "epoch": 0.76, "grad_norm": 0.48326247930526733, "learning_rate": 0.0005104680679450618, "loss": 3.3739, "step": 15456 }, { "epoch": 0.76, "grad_norm": 0.5389878749847412, "learning_rate": 0.0005104570961860958, "loss": 3.0607, "step": 15457 }, { "epoch": 0.76, "grad_norm": 0.5126422643661499, "learning_rate": 0.0005104461238728264, "loss": 3.2553, "step": 15458 }, { "epoch": 0.76, "grad_norm": 0.5000913143157959, "learning_rate": 0.0005104351510052823, "loss": 3.156, "step": 15459 }, { "epoch": 0.76, "grad_norm": 0.4934830665588379, "learning_rate": 0.0005104241775834926, "loss": 3.2617, "step": 15460 }, { "epoch": 0.76, "grad_norm": 0.5197305083274841, "learning_rate": 0.0005104132036074862, "loss": 3.2124, "step": 15461 }, { "epoch": 0.76, "grad_norm": 0.4591943025588989, "learning_rate": 0.0005104022290772918, "loss": 3.1996, "step": 15462 }, { "epoch": 0.76, "grad_norm": 0.5356560349464417, "learning_rate": 0.0005103912539929383, "loss": 3.1692, "step": 15463 }, { "epoch": 0.76, "grad_norm": 0.5015002489089966, "learning_rate": 0.0005103802783544549, "loss": 3.0953, "step": 15464 }, { "epoch": 0.76, "grad_norm": 0.5325064063072205, "learning_rate": 0.0005103693021618704, "loss": 3.1567, "step": 15465 }, { "epoch": 0.76, "grad_norm": 0.5010794401168823, "learning_rate": 0.0005103583254152134, "loss": 3.0977, "step": 15466 }, { "epoch": 0.76, "grad_norm": 0.644633948802948, "learning_rate": 0.0005103473481145132, "loss": 3.4053, "step": 15467 }, { "epoch": 0.76, "grad_norm": 0.5222316980361938, "learning_rate": 0.0005103363702597986, "loss": 3.3435, "step": 15468 }, { "epoch": 0.76, "grad_norm": 0.5279338955879211, "learning_rate": 0.0005103253918510984, "loss": 3.2726, "step": 15469 }, { "epoch": 0.76, "grad_norm": 0.5039032101631165, "learning_rate": 0.0005103144128884416, "loss": 3.1634, "step": 15470 }, { "epoch": 0.76, "grad_norm": 0.5194478631019592, "learning_rate": 0.0005103034333718572, "loss": 3.2469, "step": 15471 }, { "epoch": 0.76, "grad_norm": 0.5506890416145325, "learning_rate": 0.000510292453301374, "loss": 3.4793, "step": 15472 }, { "epoch": 0.76, "grad_norm": 0.5099464058876038, "learning_rate": 0.000510281472677021, "loss": 3.2517, "step": 15473 }, { "epoch": 0.76, "grad_norm": 0.5186891555786133, "learning_rate": 0.0005102704914988269, "loss": 3.1561, "step": 15474 }, { "epoch": 0.76, "grad_norm": 0.4984113276004791, "learning_rate": 0.0005102595097668209, "loss": 3.2346, "step": 15475 }, { "epoch": 0.76, "grad_norm": 0.5584531426429749, "learning_rate": 0.0005102485274810319, "loss": 3.1323, "step": 15476 }, { "epoch": 0.76, "grad_norm": 0.5731603503227234, "learning_rate": 0.0005102375446414886, "loss": 3.2928, "step": 15477 }, { "epoch": 0.76, "grad_norm": 0.4951866865158081, "learning_rate": 0.0005102265612482201, "loss": 3.2515, "step": 15478 }, { "epoch": 0.76, "grad_norm": 0.49377912282943726, "learning_rate": 0.0005102155773012553, "loss": 2.8477, "step": 15479 }, { "epoch": 0.76, "grad_norm": 0.5461580157279968, "learning_rate": 0.0005102045928006231, "loss": 3.1564, "step": 15480 }, { "epoch": 0.76, "grad_norm": 0.6521041393280029, "learning_rate": 0.0005101936077463525, "loss": 3.3003, "step": 15481 }, { "epoch": 0.76, "grad_norm": 0.48901596665382385, "learning_rate": 0.0005101826221384724, "loss": 3.1159, "step": 15482 }, { "epoch": 0.76, "grad_norm": 0.5265803933143616, "learning_rate": 0.0005101716359770117, "loss": 3.3615, "step": 15483 }, { "epoch": 0.76, "grad_norm": 0.5226854681968689, "learning_rate": 0.0005101606492619994, "loss": 3.0614, "step": 15484 }, { "epoch": 0.76, "grad_norm": 0.5515998601913452, "learning_rate": 0.0005101496619934644, "loss": 3.2238, "step": 15485 }, { "epoch": 0.76, "grad_norm": 0.487713485956192, "learning_rate": 0.0005101386741714356, "loss": 3.2274, "step": 15486 }, { "epoch": 0.76, "grad_norm": 0.49414488673210144, "learning_rate": 0.0005101276857959419, "loss": 3.247, "step": 15487 }, { "epoch": 0.76, "grad_norm": 0.5462265610694885, "learning_rate": 0.0005101166968670125, "loss": 3.2225, "step": 15488 }, { "epoch": 0.76, "grad_norm": 0.5077669620513916, "learning_rate": 0.000510105707384676, "loss": 3.2476, "step": 15489 }, { "epoch": 0.76, "grad_norm": 0.5247009992599487, "learning_rate": 0.0005100947173489615, "loss": 2.8845, "step": 15490 }, { "epoch": 0.76, "grad_norm": 0.4996735751628876, "learning_rate": 0.000510083726759898, "loss": 3.262, "step": 15491 }, { "epoch": 0.76, "grad_norm": 0.5037980675697327, "learning_rate": 0.0005100727356175145, "loss": 3.072, "step": 15492 }, { "epoch": 0.76, "grad_norm": 0.515090823173523, "learning_rate": 0.0005100617439218397, "loss": 3.2084, "step": 15493 }, { "epoch": 0.76, "grad_norm": 0.5135362148284912, "learning_rate": 0.0005100507516729027, "loss": 3.0892, "step": 15494 }, { "epoch": 0.76, "grad_norm": 0.4950462579727173, "learning_rate": 0.0005100397588707325, "loss": 3.0106, "step": 15495 }, { "epoch": 0.76, "grad_norm": 0.4800357520580292, "learning_rate": 0.000510028765515358, "loss": 3.2204, "step": 15496 }, { "epoch": 0.76, "grad_norm": 0.534273087978363, "learning_rate": 0.000510017771606808, "loss": 3.1293, "step": 15497 }, { "epoch": 0.76, "grad_norm": 0.5002960562705994, "learning_rate": 0.0005100067771451118, "loss": 3.2213, "step": 15498 }, { "epoch": 0.76, "grad_norm": 0.5246545076370239, "learning_rate": 0.0005099957821302981, "loss": 3.2642, "step": 15499 }, { "epoch": 0.76, "grad_norm": 0.5540328621864319, "learning_rate": 0.000509984786562396, "loss": 3.1108, "step": 15500 }, { "epoch": 0.76, "grad_norm": 0.49777376651763916, "learning_rate": 0.0005099737904414342, "loss": 3.2185, "step": 15501 }, { "epoch": 0.76, "grad_norm": 0.5149266719818115, "learning_rate": 0.0005099627937674421, "loss": 3.2102, "step": 15502 }, { "epoch": 0.76, "grad_norm": 0.4814930260181427, "learning_rate": 0.0005099517965404482, "loss": 3.1635, "step": 15503 }, { "epoch": 0.76, "grad_norm": 0.48603329062461853, "learning_rate": 0.0005099407987604818, "loss": 3.2968, "step": 15504 }, { "epoch": 0.76, "grad_norm": 0.4911821484565735, "learning_rate": 0.0005099298004275717, "loss": 3.3795, "step": 15505 }, { "epoch": 0.76, "grad_norm": 0.5378304719924927, "learning_rate": 0.0005099188015417468, "loss": 3.1863, "step": 15506 }, { "epoch": 0.76, "grad_norm": 0.5142403841018677, "learning_rate": 0.0005099078021030362, "loss": 3.2434, "step": 15507 }, { "epoch": 0.76, "grad_norm": 0.5013147592544556, "learning_rate": 0.000509896802111469, "loss": 3.1608, "step": 15508 }, { "epoch": 0.76, "grad_norm": 0.4968760907649994, "learning_rate": 0.000509885801567074, "loss": 3.195, "step": 15509 }, { "epoch": 0.76, "grad_norm": 0.5242990255355835, "learning_rate": 0.0005098748004698801, "loss": 3.2617, "step": 15510 }, { "epoch": 0.76, "grad_norm": 0.5505073070526123, "learning_rate": 0.0005098637988199163, "loss": 3.0103, "step": 15511 }, { "epoch": 0.76, "grad_norm": 0.5673673152923584, "learning_rate": 0.0005098527966172117, "loss": 3.1611, "step": 15512 }, { "epoch": 0.76, "grad_norm": 0.5113774538040161, "learning_rate": 0.0005098417938617952, "loss": 3.4058, "step": 15513 }, { "epoch": 0.76, "grad_norm": 0.4958935081958771, "learning_rate": 0.0005098307905536959, "loss": 3.1639, "step": 15514 }, { "epoch": 0.76, "grad_norm": 0.5038467645645142, "learning_rate": 0.0005098197866929427, "loss": 3.1384, "step": 15515 }, { "epoch": 0.76, "grad_norm": 0.493192195892334, "learning_rate": 0.0005098087822795645, "loss": 3.1471, "step": 15516 }, { "epoch": 0.76, "grad_norm": 0.5338720679283142, "learning_rate": 0.0005097977773135903, "loss": 3.2762, "step": 15517 }, { "epoch": 0.76, "grad_norm": 0.5778473615646362, "learning_rate": 0.0005097867717950492, "loss": 3.1879, "step": 15518 }, { "epoch": 0.76, "grad_norm": 0.5032196044921875, "learning_rate": 0.0005097757657239701, "loss": 3.1646, "step": 15519 }, { "epoch": 0.76, "grad_norm": 0.5262255668640137, "learning_rate": 0.000509764759100382, "loss": 3.2137, "step": 15520 }, { "epoch": 0.76, "grad_norm": 0.49957624077796936, "learning_rate": 0.0005097537519243139, "loss": 3.111, "step": 15521 }, { "epoch": 0.76, "grad_norm": 0.5052130818367004, "learning_rate": 0.0005097427441957948, "loss": 3.1133, "step": 15522 }, { "epoch": 0.76, "grad_norm": 0.48932185769081116, "learning_rate": 0.0005097317359148536, "loss": 3.3346, "step": 15523 }, { "epoch": 0.76, "grad_norm": 0.5048071146011353, "learning_rate": 0.0005097207270815194, "loss": 3.322, "step": 15524 }, { "epoch": 0.76, "grad_norm": 0.5462115406990051, "learning_rate": 0.0005097097176958212, "loss": 2.9782, "step": 15525 }, { "epoch": 0.76, "grad_norm": 0.5595006942749023, "learning_rate": 0.000509698707757788, "loss": 2.9495, "step": 15526 }, { "epoch": 0.76, "grad_norm": 0.5353357791900635, "learning_rate": 0.0005096876972674486, "loss": 3.2685, "step": 15527 }, { "epoch": 0.76, "grad_norm": 0.4759270250797272, "learning_rate": 0.0005096766862248323, "loss": 3.1232, "step": 15528 }, { "epoch": 0.76, "grad_norm": 0.5706182718276978, "learning_rate": 0.000509665674629968, "loss": 3.0231, "step": 15529 }, { "epoch": 0.76, "grad_norm": 0.5052098631858826, "learning_rate": 0.0005096546624828845, "loss": 3.12, "step": 15530 }, { "epoch": 0.76, "grad_norm": 0.5506969690322876, "learning_rate": 0.0005096436497836111, "loss": 3.1312, "step": 15531 }, { "epoch": 0.76, "grad_norm": 0.5286117792129517, "learning_rate": 0.0005096326365321767, "loss": 3.2991, "step": 15532 }, { "epoch": 0.76, "grad_norm": 0.5287132859230042, "learning_rate": 0.0005096216227286102, "loss": 3.136, "step": 15533 }, { "epoch": 0.76, "grad_norm": 0.512438952922821, "learning_rate": 0.0005096106083729406, "loss": 3.2366, "step": 15534 }, { "epoch": 0.76, "grad_norm": 0.5107895731925964, "learning_rate": 0.0005095995934651972, "loss": 3.4381, "step": 15535 }, { "epoch": 0.76, "grad_norm": 0.523353636264801, "learning_rate": 0.0005095885780054087, "loss": 3.1528, "step": 15536 }, { "epoch": 0.76, "grad_norm": 0.5121175646781921, "learning_rate": 0.0005095775619936042, "loss": 3.2085, "step": 15537 }, { "epoch": 0.76, "grad_norm": 0.5396693348884583, "learning_rate": 0.0005095665454298129, "loss": 2.9958, "step": 15538 }, { "epoch": 0.76, "grad_norm": 0.49359792470932007, "learning_rate": 0.0005095555283140635, "loss": 3.0549, "step": 15539 }, { "epoch": 0.76, "grad_norm": 0.584166944026947, "learning_rate": 0.0005095445106463852, "loss": 3.0041, "step": 15540 }, { "epoch": 0.76, "grad_norm": 0.5615528225898743, "learning_rate": 0.000509533492426807, "loss": 3.2767, "step": 15541 }, { "epoch": 0.76, "grad_norm": 0.5622826814651489, "learning_rate": 0.0005095224736553578, "loss": 3.09, "step": 15542 }, { "epoch": 0.76, "grad_norm": 0.5125862956047058, "learning_rate": 0.0005095114543320668, "loss": 3.2732, "step": 15543 }, { "epoch": 0.76, "grad_norm": 0.507877767086029, "learning_rate": 0.000509500434456963, "loss": 2.8684, "step": 15544 }, { "epoch": 0.76, "grad_norm": 0.5564316511154175, "learning_rate": 0.0005094894140300753, "loss": 3.1338, "step": 15545 }, { "epoch": 0.76, "grad_norm": 0.5386070609092712, "learning_rate": 0.0005094783930514329, "loss": 3.0938, "step": 15546 }, { "epoch": 0.76, "grad_norm": 0.4967600405216217, "learning_rate": 0.0005094673715210647, "loss": 3.153, "step": 15547 }, { "epoch": 0.76, "grad_norm": 0.4921207129955292, "learning_rate": 0.0005094563494389997, "loss": 3.1201, "step": 15548 }, { "epoch": 0.76, "grad_norm": 0.5483155250549316, "learning_rate": 0.0005094453268052671, "loss": 2.9707, "step": 15549 }, { "epoch": 0.76, "grad_norm": 0.5265492796897888, "learning_rate": 0.0005094343036198956, "loss": 3.1635, "step": 15550 }, { "epoch": 0.76, "grad_norm": 0.5425223708152771, "learning_rate": 0.0005094232798829146, "loss": 2.9743, "step": 15551 }, { "epoch": 0.76, "grad_norm": 0.5007231831550598, "learning_rate": 0.0005094122555943529, "loss": 3.2458, "step": 15552 }, { "epoch": 0.76, "grad_norm": 0.5598417520523071, "learning_rate": 0.0005094012307542398, "loss": 3.128, "step": 15553 }, { "epoch": 0.76, "grad_norm": 0.5178683996200562, "learning_rate": 0.000509390205362604, "loss": 3.3222, "step": 15554 }, { "epoch": 0.76, "grad_norm": 0.5192438364028931, "learning_rate": 0.0005093791794194747, "loss": 3.2078, "step": 15555 }, { "epoch": 0.76, "grad_norm": 0.5315037369728088, "learning_rate": 0.000509368152924881, "loss": 3.1406, "step": 15556 }, { "epoch": 0.76, "grad_norm": 0.5374244451522827, "learning_rate": 0.0005093571258788518, "loss": 3.0132, "step": 15557 }, { "epoch": 0.76, "grad_norm": 0.5307905077934265, "learning_rate": 0.0005093460982814164, "loss": 3.0444, "step": 15558 }, { "epoch": 0.76, "grad_norm": 0.5297430157661438, "learning_rate": 0.0005093350701326035, "loss": 3.3374, "step": 15559 }, { "epoch": 0.76, "grad_norm": 0.5116652846336365, "learning_rate": 0.0005093240414324423, "loss": 3.1824, "step": 15560 }, { "epoch": 0.76, "grad_norm": 0.5311102271080017, "learning_rate": 0.000509313012180962, "loss": 3.1643, "step": 15561 }, { "epoch": 0.76, "grad_norm": 0.5218523740768433, "learning_rate": 0.0005093019823781915, "loss": 3.2827, "step": 15562 }, { "epoch": 0.76, "grad_norm": 0.5624101161956787, "learning_rate": 0.0005092909520241596, "loss": 3.2227, "step": 15563 }, { "epoch": 0.76, "grad_norm": 0.5099868178367615, "learning_rate": 0.0005092799211188959, "loss": 3.1073, "step": 15564 }, { "epoch": 0.76, "grad_norm": 0.5127807259559631, "learning_rate": 0.000509268889662429, "loss": 3.2646, "step": 15565 }, { "epoch": 0.76, "grad_norm": 0.528416097164154, "learning_rate": 0.0005092578576547882, "loss": 3.1796, "step": 15566 }, { "epoch": 0.76, "grad_norm": 0.49281713366508484, "learning_rate": 0.0005092468250960025, "loss": 3.1264, "step": 15567 }, { "epoch": 0.76, "grad_norm": 0.5014824271202087, "learning_rate": 0.0005092357919861009, "loss": 3.1486, "step": 15568 }, { "epoch": 0.76, "grad_norm": 0.4935286045074463, "learning_rate": 0.0005092247583251124, "loss": 3.2687, "step": 15569 }, { "epoch": 0.76, "grad_norm": 0.520007312297821, "learning_rate": 0.0005092137241130662, "loss": 3.1096, "step": 15570 }, { "epoch": 0.76, "grad_norm": 0.5099589824676514, "learning_rate": 0.0005092026893499914, "loss": 3.2324, "step": 15571 }, { "epoch": 0.76, "grad_norm": 0.5131465196609497, "learning_rate": 0.0005091916540359169, "loss": 3.0389, "step": 15572 }, { "epoch": 0.76, "grad_norm": 0.49344658851623535, "learning_rate": 0.0005091806181708719, "loss": 3.3404, "step": 15573 }, { "epoch": 0.76, "grad_norm": 0.5237314701080322, "learning_rate": 0.0005091695817548853, "loss": 3.1772, "step": 15574 }, { "epoch": 0.76, "grad_norm": 0.5355901122093201, "learning_rate": 0.0005091585447879864, "loss": 3.2269, "step": 15575 }, { "epoch": 0.76, "grad_norm": 0.5290843844413757, "learning_rate": 0.0005091475072702041, "loss": 3.0839, "step": 15576 }, { "epoch": 0.76, "grad_norm": 0.5169116258621216, "learning_rate": 0.0005091364692015676, "loss": 3.2695, "step": 15577 }, { "epoch": 0.76, "grad_norm": 0.6524379253387451, "learning_rate": 0.0005091254305821058, "loss": 3.4017, "step": 15578 }, { "epoch": 0.76, "grad_norm": 0.5682939291000366, "learning_rate": 0.0005091143914118478, "loss": 3.2909, "step": 15579 }, { "epoch": 0.76, "grad_norm": 0.5173560976982117, "learning_rate": 0.0005091033516908228, "loss": 3.1853, "step": 15580 }, { "epoch": 0.76, "grad_norm": 0.5009781718254089, "learning_rate": 0.00050909231141906, "loss": 3.3297, "step": 15581 }, { "epoch": 0.76, "grad_norm": 0.5405966639518738, "learning_rate": 0.0005090812705965881, "loss": 3.1403, "step": 15582 }, { "epoch": 0.76, "grad_norm": 0.4971311390399933, "learning_rate": 0.0005090702292234363, "loss": 3.1587, "step": 15583 }, { "epoch": 0.76, "grad_norm": 0.5876317024230957, "learning_rate": 0.0005090591872996338, "loss": 3.1248, "step": 15584 }, { "epoch": 0.76, "grad_norm": 0.665259599685669, "learning_rate": 0.0005090481448252098, "loss": 3.1146, "step": 15585 }, { "epoch": 0.76, "grad_norm": 0.5335424542427063, "learning_rate": 0.000509037101800193, "loss": 3.1938, "step": 15586 }, { "epoch": 0.76, "grad_norm": 0.5514816045761108, "learning_rate": 0.0005090260582246128, "loss": 3.3065, "step": 15587 }, { "epoch": 0.76, "grad_norm": 0.5073038339614868, "learning_rate": 0.0005090150140984982, "loss": 3.2372, "step": 15588 }, { "epoch": 0.76, "grad_norm": 0.5548021793365479, "learning_rate": 0.0005090039694218782, "loss": 3.1944, "step": 15589 }, { "epoch": 0.76, "grad_norm": 0.5426687598228455, "learning_rate": 0.0005089929241947821, "loss": 3.3712, "step": 15590 }, { "epoch": 0.76, "grad_norm": 0.5318143963813782, "learning_rate": 0.0005089818784172388, "loss": 3.1281, "step": 15591 }, { "epoch": 0.76, "grad_norm": 0.5172745585441589, "learning_rate": 0.0005089708320892774, "loss": 3.1421, "step": 15592 }, { "epoch": 0.76, "grad_norm": 0.5684549808502197, "learning_rate": 0.0005089597852109271, "loss": 3.0396, "step": 15593 }, { "epoch": 0.76, "grad_norm": 0.5054349899291992, "learning_rate": 0.000508948737782217, "loss": 3.2081, "step": 15594 }, { "epoch": 0.76, "grad_norm": 0.4890936315059662, "learning_rate": 0.0005089376898031761, "loss": 3.3387, "step": 15595 }, { "epoch": 0.76, "grad_norm": 0.5217352509498596, "learning_rate": 0.0005089266412738334, "loss": 3.1863, "step": 15596 }, { "epoch": 0.76, "grad_norm": 0.529723048210144, "learning_rate": 0.0005089155921942183, "loss": 3.0914, "step": 15597 }, { "epoch": 0.76, "grad_norm": 0.5619720816612244, "learning_rate": 0.0005089045425643596, "loss": 3.222, "step": 15598 }, { "epoch": 0.76, "grad_norm": 0.5275721549987793, "learning_rate": 0.0005088934923842866, "loss": 3.2866, "step": 15599 }, { "epoch": 0.76, "grad_norm": 0.5122009515762329, "learning_rate": 0.0005088824416540285, "loss": 3.3297, "step": 15600 }, { "epoch": 0.76, "grad_norm": 0.5561813712120056, "learning_rate": 0.000508871390373614, "loss": 3.218, "step": 15601 }, { "epoch": 0.76, "grad_norm": 0.5213872194290161, "learning_rate": 0.0005088603385430727, "loss": 3.2862, "step": 15602 }, { "epoch": 0.76, "grad_norm": 0.532748281955719, "learning_rate": 0.0005088492861624332, "loss": 3.2017, "step": 15603 }, { "epoch": 0.76, "grad_norm": 0.5274077653884888, "learning_rate": 0.000508838233231725, "loss": 3.2662, "step": 15604 }, { "epoch": 0.76, "grad_norm": 0.5420587062835693, "learning_rate": 0.0005088271797509771, "loss": 3.2567, "step": 15605 }, { "epoch": 0.76, "grad_norm": 0.5665300488471985, "learning_rate": 0.0005088161257202186, "loss": 3.1568, "step": 15606 }, { "epoch": 0.76, "grad_norm": 0.5209993124008179, "learning_rate": 0.0005088050711394786, "loss": 3.3423, "step": 15607 }, { "epoch": 0.76, "grad_norm": 0.5469440221786499, "learning_rate": 0.0005087940160087862, "loss": 3.3472, "step": 15608 }, { "epoch": 0.76, "grad_norm": 0.5044681429862976, "learning_rate": 0.0005087829603281707, "loss": 3.0458, "step": 15609 }, { "epoch": 0.77, "grad_norm": 0.5147929787635803, "learning_rate": 0.0005087719040976609, "loss": 3.2094, "step": 15610 }, { "epoch": 0.77, "grad_norm": 0.5366151928901672, "learning_rate": 0.0005087608473172861, "loss": 3.1899, "step": 15611 }, { "epoch": 0.77, "grad_norm": 0.5306175351142883, "learning_rate": 0.0005087497899870754, "loss": 3.0799, "step": 15612 }, { "epoch": 0.77, "grad_norm": 0.5149161219596863, "learning_rate": 0.0005087387321070579, "loss": 3.1933, "step": 15613 }, { "epoch": 0.77, "grad_norm": 0.5391685366630554, "learning_rate": 0.0005087276736772628, "loss": 3.1901, "step": 15614 }, { "epoch": 0.77, "grad_norm": 0.5293787717819214, "learning_rate": 0.0005087166146977193, "loss": 2.8526, "step": 15615 }, { "epoch": 0.77, "grad_norm": 0.5558761358261108, "learning_rate": 0.0005087055551684562, "loss": 3.1358, "step": 15616 }, { "epoch": 0.77, "grad_norm": 0.5193555951118469, "learning_rate": 0.000508694495089503, "loss": 3.0509, "step": 15617 }, { "epoch": 0.77, "grad_norm": 0.49987754225730896, "learning_rate": 0.0005086834344608885, "loss": 3.1635, "step": 15618 }, { "epoch": 0.77, "grad_norm": 0.4778132736682892, "learning_rate": 0.000508672373282642, "loss": 2.9455, "step": 15619 }, { "epoch": 0.77, "grad_norm": 0.5623522996902466, "learning_rate": 0.0005086613115547928, "loss": 3.2735, "step": 15620 }, { "epoch": 0.77, "grad_norm": 0.5368536710739136, "learning_rate": 0.0005086502492773697, "loss": 3.0355, "step": 15621 }, { "epoch": 0.77, "grad_norm": 0.5360807776451111, "learning_rate": 0.0005086391864504022, "loss": 3.2697, "step": 15622 }, { "epoch": 0.77, "grad_norm": 0.5049542188644409, "learning_rate": 0.0005086281230739191, "loss": 3.2567, "step": 15623 }, { "epoch": 0.77, "grad_norm": 0.5606904625892639, "learning_rate": 0.0005086170591479496, "loss": 3.185, "step": 15624 }, { "epoch": 0.77, "grad_norm": 0.5596327185630798, "learning_rate": 0.000508605994672523, "loss": 3.1354, "step": 15625 }, { "epoch": 0.77, "grad_norm": 0.536123514175415, "learning_rate": 0.0005085949296476684, "loss": 3.1321, "step": 15626 }, { "epoch": 0.77, "grad_norm": 0.5361589789390564, "learning_rate": 0.0005085838640734149, "loss": 3.2706, "step": 15627 }, { "epoch": 0.77, "grad_norm": 0.5314226746559143, "learning_rate": 0.0005085727979497915, "loss": 3.1124, "step": 15628 }, { "epoch": 0.77, "grad_norm": 0.5757156610488892, "learning_rate": 0.0005085617312768277, "loss": 3.1707, "step": 15629 }, { "epoch": 0.77, "grad_norm": 0.5240241289138794, "learning_rate": 0.0005085506640545522, "loss": 3.105, "step": 15630 }, { "epoch": 0.77, "grad_norm": 0.5034173727035522, "learning_rate": 0.0005085395962829946, "loss": 3.1758, "step": 15631 }, { "epoch": 0.77, "grad_norm": 0.5380710363388062, "learning_rate": 0.0005085285279621838, "loss": 3.2823, "step": 15632 }, { "epoch": 0.77, "grad_norm": 0.5575569272041321, "learning_rate": 0.000508517459092149, "loss": 3.1727, "step": 15633 }, { "epoch": 0.77, "grad_norm": 0.5457764863967896, "learning_rate": 0.0005085063896729192, "loss": 3.0716, "step": 15634 }, { "epoch": 0.77, "grad_norm": 0.5306626558303833, "learning_rate": 0.0005084953197045238, "loss": 2.9403, "step": 15635 }, { "epoch": 0.77, "grad_norm": 0.5452754497528076, "learning_rate": 0.0005084842491869918, "loss": 3.3432, "step": 15636 }, { "epoch": 0.77, "grad_norm": 0.5277231931686401, "learning_rate": 0.0005084731781203525, "loss": 3.1267, "step": 15637 }, { "epoch": 0.77, "grad_norm": 0.5706171989440918, "learning_rate": 0.0005084621065046349, "loss": 3.2748, "step": 15638 }, { "epoch": 0.77, "grad_norm": 0.5383669137954712, "learning_rate": 0.0005084510343398682, "loss": 3.2757, "step": 15639 }, { "epoch": 0.77, "grad_norm": 0.5343171954154968, "learning_rate": 0.0005084399616260818, "loss": 3.24, "step": 15640 }, { "epoch": 0.77, "grad_norm": 0.49760371446609497, "learning_rate": 0.0005084288883633044, "loss": 3.049, "step": 15641 }, { "epoch": 0.77, "grad_norm": 0.5333790183067322, "learning_rate": 0.0005084178145515656, "loss": 3.1123, "step": 15642 }, { "epoch": 0.77, "grad_norm": 0.5920032858848572, "learning_rate": 0.0005084067401908942, "loss": 2.9831, "step": 15643 }, { "epoch": 0.77, "grad_norm": 0.5388069152832031, "learning_rate": 0.0005083956652813196, "loss": 3.0262, "step": 15644 }, { "epoch": 0.77, "grad_norm": 0.5454514622688293, "learning_rate": 0.0005083845898228711, "loss": 3.3217, "step": 15645 }, { "epoch": 0.77, "grad_norm": 0.5356780290603638, "learning_rate": 0.0005083735138155775, "loss": 3.1691, "step": 15646 }, { "epoch": 0.77, "grad_norm": 0.4733395278453827, "learning_rate": 0.0005083624372594683, "loss": 3.011, "step": 15647 }, { "epoch": 0.77, "grad_norm": 0.5192866921424866, "learning_rate": 0.0005083513601545725, "loss": 3.3161, "step": 15648 }, { "epoch": 0.77, "grad_norm": 0.4957810640335083, "learning_rate": 0.0005083402825009193, "loss": 3.2863, "step": 15649 }, { "epoch": 0.77, "grad_norm": 0.4861541986465454, "learning_rate": 0.0005083292042985379, "loss": 3.0497, "step": 15650 }, { "epoch": 0.77, "grad_norm": 0.5232390761375427, "learning_rate": 0.0005083181255474575, "loss": 2.973, "step": 15651 }, { "epoch": 0.77, "grad_norm": 0.5298289060592651, "learning_rate": 0.0005083070462477072, "loss": 3.3084, "step": 15652 }, { "epoch": 0.77, "grad_norm": 0.511946439743042, "learning_rate": 0.0005082959663993162, "loss": 3.1297, "step": 15653 }, { "epoch": 0.77, "grad_norm": 0.5342845320701599, "learning_rate": 0.0005082848860023137, "loss": 3.1233, "step": 15654 }, { "epoch": 0.77, "grad_norm": 0.5111925601959229, "learning_rate": 0.000508273805056729, "loss": 3.2461, "step": 15655 }, { "epoch": 0.77, "grad_norm": 0.553409993648529, "learning_rate": 0.0005082627235625911, "loss": 3.2622, "step": 15656 }, { "epoch": 0.77, "grad_norm": 0.503697395324707, "learning_rate": 0.0005082516415199293, "loss": 3.0538, "step": 15657 }, { "epoch": 0.77, "grad_norm": 0.5087273716926575, "learning_rate": 0.0005082405589287728, "loss": 3.1954, "step": 15658 }, { "epoch": 0.77, "grad_norm": 0.5421915650367737, "learning_rate": 0.0005082294757891507, "loss": 3.1112, "step": 15659 }, { "epoch": 0.77, "grad_norm": 0.5316805243492126, "learning_rate": 0.0005082183921010922, "loss": 3.1643, "step": 15660 }, { "epoch": 0.77, "grad_norm": 0.4953380227088928, "learning_rate": 0.0005082073078646266, "loss": 3.2955, "step": 15661 }, { "epoch": 0.77, "grad_norm": 0.5112192034721375, "learning_rate": 0.000508196223079783, "loss": 3.2071, "step": 15662 }, { "epoch": 0.77, "grad_norm": 0.5044480562210083, "learning_rate": 0.0005081851377465907, "loss": 3.0958, "step": 15663 }, { "epoch": 0.77, "grad_norm": 0.6140612363815308, "learning_rate": 0.0005081740518650787, "loss": 3.3124, "step": 15664 }, { "epoch": 0.77, "grad_norm": 0.5115430951118469, "learning_rate": 0.0005081629654352763, "loss": 3.1975, "step": 15665 }, { "epoch": 0.77, "grad_norm": 0.5128433108329773, "learning_rate": 0.0005081518784572127, "loss": 3.1859, "step": 15666 }, { "epoch": 0.77, "grad_norm": 0.49921858310699463, "learning_rate": 0.0005081407909309171, "loss": 3.1841, "step": 15667 }, { "epoch": 0.77, "grad_norm": 0.5513691306114197, "learning_rate": 0.0005081297028564189, "loss": 3.032, "step": 15668 }, { "epoch": 0.77, "grad_norm": 0.5237788558006287, "learning_rate": 0.0005081186142337469, "loss": 3.164, "step": 15669 }, { "epoch": 0.77, "grad_norm": 0.5211585164070129, "learning_rate": 0.0005081075250629306, "loss": 3.2778, "step": 15670 }, { "epoch": 0.77, "grad_norm": 0.5375601053237915, "learning_rate": 0.0005080964353439991, "loss": 3.2492, "step": 15671 }, { "epoch": 0.77, "grad_norm": 0.5239969491958618, "learning_rate": 0.0005080853450769817, "loss": 2.856, "step": 15672 }, { "epoch": 0.77, "grad_norm": 0.5531800389289856, "learning_rate": 0.0005080742542619075, "loss": 3.125, "step": 15673 }, { "epoch": 0.77, "grad_norm": 0.4881918728351593, "learning_rate": 0.0005080631628988058, "loss": 3.3741, "step": 15674 }, { "epoch": 0.77, "grad_norm": 0.5813001394271851, "learning_rate": 0.0005080520709877057, "loss": 3.2355, "step": 15675 }, { "epoch": 0.77, "grad_norm": 0.4898727238178253, "learning_rate": 0.0005080409785286366, "loss": 3.0873, "step": 15676 }, { "epoch": 0.77, "grad_norm": 0.4959816038608551, "learning_rate": 0.0005080298855216275, "loss": 3.1912, "step": 15677 }, { "epoch": 0.77, "grad_norm": 0.5205987691879272, "learning_rate": 0.0005080187919667078, "loss": 3.3549, "step": 15678 }, { "epoch": 0.77, "grad_norm": 0.5168250203132629, "learning_rate": 0.0005080076978639065, "loss": 3.2099, "step": 15679 }, { "epoch": 0.77, "grad_norm": 0.5724286437034607, "learning_rate": 0.000507996603213253, "loss": 3.0972, "step": 15680 }, { "epoch": 0.77, "grad_norm": 0.5305948257446289, "learning_rate": 0.0005079855080147766, "loss": 3.1634, "step": 15681 }, { "epoch": 0.77, "grad_norm": 0.5316970348358154, "learning_rate": 0.0005079744122685063, "loss": 3.1846, "step": 15682 }, { "epoch": 0.77, "grad_norm": 0.5323175191879272, "learning_rate": 0.0005079633159744715, "loss": 3.1129, "step": 15683 }, { "epoch": 0.77, "grad_norm": 0.49948516488075256, "learning_rate": 0.0005079522191327012, "loss": 3.2589, "step": 15684 }, { "epoch": 0.77, "grad_norm": 0.5208277702331543, "learning_rate": 0.0005079411217432249, "loss": 3.0243, "step": 15685 }, { "epoch": 0.77, "grad_norm": 0.5961699485778809, "learning_rate": 0.0005079300238060717, "loss": 2.9893, "step": 15686 }, { "epoch": 0.77, "grad_norm": 0.5415461659431458, "learning_rate": 0.0005079189253212708, "loss": 3.1732, "step": 15687 }, { "epoch": 0.77, "grad_norm": 0.4872366487979889, "learning_rate": 0.0005079078262888515, "loss": 3.1972, "step": 15688 }, { "epoch": 0.77, "grad_norm": 0.575570285320282, "learning_rate": 0.0005078967267088429, "loss": 2.9043, "step": 15689 }, { "epoch": 0.77, "grad_norm": 0.520625114440918, "learning_rate": 0.0005078856265812745, "loss": 3.0182, "step": 15690 }, { "epoch": 0.77, "grad_norm": 0.5354496240615845, "learning_rate": 0.0005078745259061752, "loss": 3.2163, "step": 15691 }, { "epoch": 0.77, "grad_norm": 0.5106788873672485, "learning_rate": 0.0005078634246835745, "loss": 2.838, "step": 15692 }, { "epoch": 0.77, "grad_norm": 0.5290341377258301, "learning_rate": 0.0005078523229135016, "loss": 3.2877, "step": 15693 }, { "epoch": 0.77, "grad_norm": 0.5494564175605774, "learning_rate": 0.0005078412205959856, "loss": 3.204, "step": 15694 }, { "epoch": 0.77, "grad_norm": 0.5220087766647339, "learning_rate": 0.0005078301177310557, "loss": 3.3395, "step": 15695 }, { "epoch": 0.77, "grad_norm": 0.573491096496582, "learning_rate": 0.0005078190143187415, "loss": 3.2214, "step": 15696 }, { "epoch": 0.77, "grad_norm": 0.5294093489646912, "learning_rate": 0.0005078079103590719, "loss": 3.1203, "step": 15697 }, { "epoch": 0.77, "grad_norm": 0.46979430317878723, "learning_rate": 0.0005077968058520762, "loss": 2.9733, "step": 15698 }, { "epoch": 0.77, "grad_norm": 0.49335652589797974, "learning_rate": 0.0005077857007977838, "loss": 3.4289, "step": 15699 }, { "epoch": 0.77, "grad_norm": 0.524200439453125, "learning_rate": 0.0005077745951962238, "loss": 3.0115, "step": 15700 }, { "epoch": 0.77, "grad_norm": 0.522419810295105, "learning_rate": 0.0005077634890474255, "loss": 3.1244, "step": 15701 }, { "epoch": 0.77, "grad_norm": 0.5147000551223755, "learning_rate": 0.0005077523823514183, "loss": 3.2699, "step": 15702 }, { "epoch": 0.77, "grad_norm": 0.5115140676498413, "learning_rate": 0.0005077412751082311, "loss": 3.0467, "step": 15703 }, { "epoch": 0.77, "grad_norm": 0.5686523914337158, "learning_rate": 0.0005077301673178934, "loss": 3.344, "step": 15704 }, { "epoch": 0.77, "grad_norm": 0.5637875199317932, "learning_rate": 0.0005077190589804346, "loss": 3.1739, "step": 15705 }, { "epoch": 0.77, "grad_norm": 0.5621719360351562, "learning_rate": 0.0005077079500958836, "loss": 3.2666, "step": 15706 }, { "epoch": 0.77, "grad_norm": 0.5126227736473083, "learning_rate": 0.0005076968406642699, "loss": 3.3009, "step": 15707 }, { "epoch": 0.77, "grad_norm": 0.5188931822776794, "learning_rate": 0.0005076857306856227, "loss": 3.0482, "step": 15708 }, { "epoch": 0.77, "grad_norm": 0.5244320631027222, "learning_rate": 0.0005076746201599712, "loss": 3.2294, "step": 15709 }, { "epoch": 0.77, "grad_norm": 0.5187132954597473, "learning_rate": 0.0005076635090873448, "loss": 3.364, "step": 15710 }, { "epoch": 0.77, "grad_norm": 0.4979041814804077, "learning_rate": 0.0005076523974677725, "loss": 3.2371, "step": 15711 }, { "epoch": 0.77, "grad_norm": 0.5199201703071594, "learning_rate": 0.000507641285301284, "loss": 3.3204, "step": 15712 }, { "epoch": 0.77, "grad_norm": 0.5369486212730408, "learning_rate": 0.0005076301725879082, "loss": 3.1625, "step": 15713 }, { "epoch": 0.77, "grad_norm": 0.5636427998542786, "learning_rate": 0.0005076190593276743, "loss": 3.2081, "step": 15714 }, { "epoch": 0.77, "grad_norm": 0.5089654922485352, "learning_rate": 0.0005076079455206121, "loss": 3.3712, "step": 15715 }, { "epoch": 0.77, "grad_norm": 0.48416969180107117, "learning_rate": 0.0005075968311667502, "loss": 3.1725, "step": 15716 }, { "epoch": 0.77, "grad_norm": 0.5651270151138306, "learning_rate": 0.0005075857162661184, "loss": 3.3323, "step": 15717 }, { "epoch": 0.77, "grad_norm": 0.5310222506523132, "learning_rate": 0.0005075746008187458, "loss": 3.2754, "step": 15718 }, { "epoch": 0.77, "grad_norm": 0.5395601987838745, "learning_rate": 0.0005075634848246616, "loss": 3.352, "step": 15719 }, { "epoch": 0.77, "grad_norm": 0.539057731628418, "learning_rate": 0.0005075523682838951, "loss": 3.0264, "step": 15720 }, { "epoch": 0.77, "grad_norm": 0.5499597191810608, "learning_rate": 0.0005075412511964755, "loss": 3.0386, "step": 15721 }, { "epoch": 0.77, "grad_norm": 0.5185397267341614, "learning_rate": 0.0005075301335624323, "loss": 3.2835, "step": 15722 }, { "epoch": 0.77, "grad_norm": 0.49906614422798157, "learning_rate": 0.0005075190153817948, "loss": 3.0069, "step": 15723 }, { "epoch": 0.77, "grad_norm": 0.5171295404434204, "learning_rate": 0.000507507896654592, "loss": 3.0378, "step": 15724 }, { "epoch": 0.77, "grad_norm": 0.5171312689781189, "learning_rate": 0.0005074967773808534, "loss": 3.1803, "step": 15725 }, { "epoch": 0.77, "grad_norm": 0.5338811874389648, "learning_rate": 0.0005074856575606082, "loss": 3.386, "step": 15726 }, { "epoch": 0.77, "grad_norm": 0.5010255575180054, "learning_rate": 0.0005074745371938857, "loss": 3.3692, "step": 15727 }, { "epoch": 0.77, "grad_norm": 0.6022095084190369, "learning_rate": 0.0005074634162807152, "loss": 3.1641, "step": 15728 }, { "epoch": 0.77, "grad_norm": 0.534349262714386, "learning_rate": 0.0005074522948211259, "loss": 3.0681, "step": 15729 }, { "epoch": 0.77, "grad_norm": 0.5148655772209167, "learning_rate": 0.0005074411728151473, "loss": 3.319, "step": 15730 }, { "epoch": 0.77, "grad_norm": 0.49930626153945923, "learning_rate": 0.0005074300502628085, "loss": 3.251, "step": 15731 }, { "epoch": 0.77, "grad_norm": 0.4870609939098358, "learning_rate": 0.0005074189271641388, "loss": 3.2754, "step": 15732 }, { "epoch": 0.77, "grad_norm": 0.5289334058761597, "learning_rate": 0.0005074078035191677, "loss": 3.0453, "step": 15733 }, { "epoch": 0.77, "grad_norm": 0.5236629247665405, "learning_rate": 0.0005073966793279243, "loss": 2.9417, "step": 15734 }, { "epoch": 0.77, "grad_norm": 0.5354396104812622, "learning_rate": 0.0005073855545904381, "loss": 3.1531, "step": 15735 }, { "epoch": 0.77, "grad_norm": 0.4817488193511963, "learning_rate": 0.0005073744293067382, "loss": 3.1444, "step": 15736 }, { "epoch": 0.77, "grad_norm": 0.5554103851318359, "learning_rate": 0.0005073633034768538, "loss": 3.0, "step": 15737 }, { "epoch": 0.77, "grad_norm": 0.5145366787910461, "learning_rate": 0.0005073521771008145, "loss": 3.187, "step": 15738 }, { "epoch": 0.77, "grad_norm": 0.48250049352645874, "learning_rate": 0.0005073410501786495, "loss": 3.19, "step": 15739 }, { "epoch": 0.77, "grad_norm": 0.5110865831375122, "learning_rate": 0.000507329922710388, "loss": 3.203, "step": 15740 }, { "epoch": 0.77, "grad_norm": 0.5449311137199402, "learning_rate": 0.0005073187946960594, "loss": 3.1025, "step": 15741 }, { "epoch": 0.77, "grad_norm": 0.5438368916511536, "learning_rate": 0.000507307666135693, "loss": 3.3186, "step": 15742 }, { "epoch": 0.77, "grad_norm": 0.49835509061813354, "learning_rate": 0.0005072965370293181, "loss": 3.3353, "step": 15743 }, { "epoch": 0.77, "grad_norm": 0.5272834897041321, "learning_rate": 0.000507285407376964, "loss": 3.0211, "step": 15744 }, { "epoch": 0.77, "grad_norm": 0.5524513721466064, "learning_rate": 0.0005072742771786601, "loss": 3.2202, "step": 15745 }, { "epoch": 0.77, "grad_norm": 0.5341085195541382, "learning_rate": 0.0005072631464344355, "loss": 3.0149, "step": 15746 }, { "epoch": 0.77, "grad_norm": 0.5116642117500305, "learning_rate": 0.0005072520151443197, "loss": 3.5498, "step": 15747 }, { "epoch": 0.77, "grad_norm": 0.5027918815612793, "learning_rate": 0.0005072408833083421, "loss": 3.1924, "step": 15748 }, { "epoch": 0.77, "grad_norm": 0.545111358165741, "learning_rate": 0.0005072297509265319, "loss": 3.1091, "step": 15749 }, { "epoch": 0.77, "grad_norm": 0.5268412828445435, "learning_rate": 0.0005072186179989184, "loss": 3.3507, "step": 15750 }, { "epoch": 0.77, "grad_norm": 0.49303969740867615, "learning_rate": 0.0005072074845255309, "loss": 3.1132, "step": 15751 }, { "epoch": 0.77, "grad_norm": 0.5391672849655151, "learning_rate": 0.0005071963505063988, "loss": 3.2345, "step": 15752 }, { "epoch": 0.77, "grad_norm": 0.4987182915210724, "learning_rate": 0.0005071852159415513, "loss": 3.0815, "step": 15753 }, { "epoch": 0.77, "grad_norm": 0.538796067237854, "learning_rate": 0.0005071740808310179, "loss": 2.9649, "step": 15754 }, { "epoch": 0.77, "grad_norm": 0.4982840120792389, "learning_rate": 0.0005071629451748277, "loss": 3.2089, "step": 15755 }, { "epoch": 0.77, "grad_norm": 0.49565887451171875, "learning_rate": 0.0005071518089730103, "loss": 3.3456, "step": 15756 }, { "epoch": 0.77, "grad_norm": 0.5028196573257446, "learning_rate": 0.0005071406722255948, "loss": 3.1211, "step": 15757 }, { "epoch": 0.77, "grad_norm": 0.4904448688030243, "learning_rate": 0.0005071295349326108, "loss": 3.2393, "step": 15758 }, { "epoch": 0.77, "grad_norm": 0.5175606608390808, "learning_rate": 0.0005071183970940874, "loss": 3.4368, "step": 15759 }, { "epoch": 0.77, "grad_norm": 0.5216371417045593, "learning_rate": 0.0005071072587100539, "loss": 3.0616, "step": 15760 }, { "epoch": 0.77, "grad_norm": 0.5647536516189575, "learning_rate": 0.0005070961197805399, "loss": 3.3533, "step": 15761 }, { "epoch": 0.77, "grad_norm": 0.5448745489120483, "learning_rate": 0.0005070849803055744, "loss": 3.2618, "step": 15762 }, { "epoch": 0.77, "grad_norm": 0.5875873565673828, "learning_rate": 0.0005070738402851871, "loss": 3.2061, "step": 15763 }, { "epoch": 0.77, "grad_norm": 0.5121092200279236, "learning_rate": 0.0005070626997194071, "loss": 3.2049, "step": 15764 }, { "epoch": 0.77, "grad_norm": 0.5112888813018799, "learning_rate": 0.0005070515586082638, "loss": 2.9788, "step": 15765 }, { "epoch": 0.77, "grad_norm": 0.537285327911377, "learning_rate": 0.0005070404169517865, "loss": 3.0783, "step": 15766 }, { "epoch": 0.77, "grad_norm": 0.5411680936813354, "learning_rate": 0.0005070292747500045, "loss": 2.9802, "step": 15767 }, { "epoch": 0.77, "grad_norm": 0.5896318554878235, "learning_rate": 0.0005070181320029474, "loss": 3.3174, "step": 15768 }, { "epoch": 0.77, "grad_norm": 0.5077767968177795, "learning_rate": 0.0005070069887106442, "loss": 3.1943, "step": 15769 }, { "epoch": 0.77, "grad_norm": 0.5382665395736694, "learning_rate": 0.0005069958448731247, "loss": 3.1708, "step": 15770 }, { "epoch": 0.77, "grad_norm": 0.5162548422813416, "learning_rate": 0.0005069847004904178, "loss": 3.2341, "step": 15771 }, { "epoch": 0.77, "grad_norm": 0.5016313195228577, "learning_rate": 0.0005069735555625531, "loss": 3.0787, "step": 15772 }, { "epoch": 0.77, "grad_norm": 0.5164852738380432, "learning_rate": 0.0005069624100895598, "loss": 3.2173, "step": 15773 }, { "epoch": 0.77, "grad_norm": 0.5445564389228821, "learning_rate": 0.0005069512640714673, "loss": 3.1206, "step": 15774 }, { "epoch": 0.77, "grad_norm": 0.5352805256843567, "learning_rate": 0.0005069401175083053, "loss": 3.3894, "step": 15775 }, { "epoch": 0.77, "grad_norm": 0.5283602476119995, "learning_rate": 0.0005069289704001025, "loss": 3.3639, "step": 15776 }, { "epoch": 0.77, "grad_norm": 0.5328353643417358, "learning_rate": 0.0005069178227468888, "loss": 3.1275, "step": 15777 }, { "epoch": 0.77, "grad_norm": 0.5417640805244446, "learning_rate": 0.0005069066745486934, "loss": 3.2893, "step": 15778 }, { "epoch": 0.77, "grad_norm": 0.5066238641738892, "learning_rate": 0.0005068955258055455, "loss": 3.1368, "step": 15779 }, { "epoch": 0.77, "grad_norm": 0.5763447284698486, "learning_rate": 0.0005068843765174747, "loss": 2.9799, "step": 15780 }, { "epoch": 0.77, "grad_norm": 0.5056501030921936, "learning_rate": 0.0005068732266845103, "loss": 3.3112, "step": 15781 }, { "epoch": 0.77, "grad_norm": 0.5100968480110168, "learning_rate": 0.0005068620763066816, "loss": 3.0971, "step": 15782 }, { "epoch": 0.77, "grad_norm": 0.517602264881134, "learning_rate": 0.000506850925384018, "loss": 3.315, "step": 15783 }, { "epoch": 0.77, "grad_norm": 0.5092670321464539, "learning_rate": 0.0005068397739165488, "loss": 3.2111, "step": 15784 }, { "epoch": 0.77, "grad_norm": 0.5618929266929626, "learning_rate": 0.0005068286219043035, "loss": 3.1347, "step": 15785 }, { "epoch": 0.77, "grad_norm": 0.5268994569778442, "learning_rate": 0.0005068174693473115, "loss": 3.1182, "step": 15786 }, { "epoch": 0.77, "grad_norm": 0.5237292647361755, "learning_rate": 0.000506806316245602, "loss": 3.0985, "step": 15787 }, { "epoch": 0.77, "grad_norm": 0.5168699622154236, "learning_rate": 0.0005067951625992044, "loss": 3.1803, "step": 15788 }, { "epoch": 0.77, "grad_norm": 0.47693702578544617, "learning_rate": 0.0005067840084081482, "loss": 3.1477, "step": 15789 }, { "epoch": 0.77, "grad_norm": 0.5285649299621582, "learning_rate": 0.0005067728536724627, "loss": 3.352, "step": 15790 }, { "epoch": 0.77, "grad_norm": 0.49356603622436523, "learning_rate": 0.0005067616983921774, "loss": 2.9803, "step": 15791 }, { "epoch": 0.77, "grad_norm": 0.512496829032898, "learning_rate": 0.0005067505425673215, "loss": 3.0556, "step": 15792 }, { "epoch": 0.77, "grad_norm": 0.5645531415939331, "learning_rate": 0.0005067393861979244, "loss": 3.2463, "step": 15793 }, { "epoch": 0.77, "grad_norm": 0.5089481472969055, "learning_rate": 0.0005067282292840156, "loss": 3.0876, "step": 15794 }, { "epoch": 0.77, "grad_norm": 0.5088877081871033, "learning_rate": 0.0005067170718256246, "loss": 3.4672, "step": 15795 }, { "epoch": 0.77, "grad_norm": 0.5133219957351685, "learning_rate": 0.0005067059138227803, "loss": 3.2869, "step": 15796 }, { "epoch": 0.77, "grad_norm": 0.5056898593902588, "learning_rate": 0.0005066947552755126, "loss": 3.2511, "step": 15797 }, { "epoch": 0.77, "grad_norm": 0.5130456686019897, "learning_rate": 0.0005066835961838507, "loss": 3.1219, "step": 15798 }, { "epoch": 0.77, "grad_norm": 0.5243682861328125, "learning_rate": 0.0005066724365478239, "loss": 3.0821, "step": 15799 }, { "epoch": 0.77, "grad_norm": 0.5532961487770081, "learning_rate": 0.0005066612763674617, "loss": 3.3234, "step": 15800 }, { "epoch": 0.77, "grad_norm": 0.5712582468986511, "learning_rate": 0.0005066501156427936, "loss": 3.2391, "step": 15801 }, { "epoch": 0.77, "grad_norm": 0.531348705291748, "learning_rate": 0.0005066389543738487, "loss": 3.485, "step": 15802 }, { "epoch": 0.77, "grad_norm": 0.5381166338920593, "learning_rate": 0.0005066277925606566, "loss": 3.2193, "step": 15803 }, { "epoch": 0.77, "grad_norm": 0.5312127470970154, "learning_rate": 0.0005066166302032468, "loss": 3.097, "step": 15804 }, { "epoch": 0.77, "grad_norm": 0.5262129902839661, "learning_rate": 0.0005066054673016484, "loss": 3.1406, "step": 15805 }, { "epoch": 0.77, "grad_norm": 0.5441365838050842, "learning_rate": 0.0005065943038558909, "loss": 3.0766, "step": 15806 }, { "epoch": 0.77, "grad_norm": 0.4887438714504242, "learning_rate": 0.0005065831398660039, "loss": 3.1556, "step": 15807 }, { "epoch": 0.77, "grad_norm": 0.5036865472793579, "learning_rate": 0.0005065719753320167, "loss": 3.0322, "step": 15808 }, { "epoch": 0.77, "grad_norm": 0.5196355581283569, "learning_rate": 0.0005065608102539586, "loss": 3.0775, "step": 15809 }, { "epoch": 0.77, "grad_norm": 0.5290484428405762, "learning_rate": 0.000506549644631859, "loss": 3.1921, "step": 15810 }, { "epoch": 0.77, "grad_norm": 0.5046884417533875, "learning_rate": 0.0005065384784657476, "loss": 3.0588, "step": 15811 }, { "epoch": 0.77, "grad_norm": 0.5305639505386353, "learning_rate": 0.0005065273117556534, "loss": 3.1943, "step": 15812 }, { "epoch": 0.77, "grad_norm": 0.5117027163505554, "learning_rate": 0.000506516144501606, "loss": 3.1293, "step": 15813 }, { "epoch": 0.78, "grad_norm": 0.52192223072052, "learning_rate": 0.0005065049767036349, "loss": 3.0501, "step": 15814 }, { "epoch": 0.78, "grad_norm": 0.5212222337722778, "learning_rate": 0.0005064938083617695, "loss": 3.0072, "step": 15815 }, { "epoch": 0.78, "grad_norm": 0.5260198712348938, "learning_rate": 0.0005064826394760391, "loss": 3.2323, "step": 15816 }, { "epoch": 0.78, "grad_norm": 0.5320043563842773, "learning_rate": 0.0005064714700464731, "loss": 3.187, "step": 15817 }, { "epoch": 0.78, "grad_norm": 0.5128186941146851, "learning_rate": 0.000506460300073101, "loss": 3.4666, "step": 15818 }, { "epoch": 0.78, "grad_norm": 0.5312202572822571, "learning_rate": 0.0005064491295559523, "loss": 3.3288, "step": 15819 }, { "epoch": 0.78, "grad_norm": 0.5054448843002319, "learning_rate": 0.0005064379584950562, "loss": 3.2213, "step": 15820 }, { "epoch": 0.78, "grad_norm": 0.6765322685241699, "learning_rate": 0.0005064267868904423, "loss": 3.3935, "step": 15821 }, { "epoch": 0.78, "grad_norm": 0.525917112827301, "learning_rate": 0.00050641561474214, "loss": 3.1906, "step": 15822 }, { "epoch": 0.78, "grad_norm": 0.5486940741539001, "learning_rate": 0.0005064044420501787, "loss": 3.1634, "step": 15823 }, { "epoch": 0.78, "grad_norm": 0.5462398529052734, "learning_rate": 0.0005063932688145877, "loss": 3.2199, "step": 15824 }, { "epoch": 0.78, "grad_norm": 0.5118207335472107, "learning_rate": 0.0005063820950353966, "loss": 3.217, "step": 15825 }, { "epoch": 0.78, "grad_norm": 0.5313277840614319, "learning_rate": 0.000506370920712635, "loss": 3.1615, "step": 15826 }, { "epoch": 0.78, "grad_norm": 0.49920400977134705, "learning_rate": 0.0005063597458463319, "loss": 3.3053, "step": 15827 }, { "epoch": 0.78, "grad_norm": 0.4805811047554016, "learning_rate": 0.0005063485704365169, "loss": 3.2122, "step": 15828 }, { "epoch": 0.78, "grad_norm": 0.5627545714378357, "learning_rate": 0.0005063373944832196, "loss": 3.0398, "step": 15829 }, { "epoch": 0.78, "grad_norm": 0.5166324377059937, "learning_rate": 0.0005063262179864692, "loss": 3.2995, "step": 15830 }, { "epoch": 0.78, "grad_norm": 0.8689070343971252, "learning_rate": 0.0005063150409462954, "loss": 3.2035, "step": 15831 }, { "epoch": 0.78, "grad_norm": 0.5488486289978027, "learning_rate": 0.0005063038633627274, "loss": 3.0854, "step": 15832 }, { "epoch": 0.78, "grad_norm": 0.5090166926383972, "learning_rate": 0.0005062926852357947, "loss": 3.479, "step": 15833 }, { "epoch": 0.78, "grad_norm": 0.515069842338562, "learning_rate": 0.0005062815065655269, "loss": 2.9501, "step": 15834 }, { "epoch": 0.78, "grad_norm": 0.5386884212493896, "learning_rate": 0.0005062703273519531, "loss": 3.3355, "step": 15835 }, { "epoch": 0.78, "grad_norm": 0.5605966448783875, "learning_rate": 0.0005062591475951031, "loss": 3.2428, "step": 15836 }, { "epoch": 0.78, "grad_norm": 0.580772876739502, "learning_rate": 0.0005062479672950063, "loss": 3.1849, "step": 15837 }, { "epoch": 0.78, "grad_norm": 0.5151135325431824, "learning_rate": 0.0005062367864516919, "loss": 3.139, "step": 15838 }, { "epoch": 0.78, "grad_norm": 0.5454901456832886, "learning_rate": 0.0005062256050651895, "loss": 3.0489, "step": 15839 }, { "epoch": 0.78, "grad_norm": 0.5578955411911011, "learning_rate": 0.0005062144231355285, "loss": 3.2888, "step": 15840 }, { "epoch": 0.78, "grad_norm": 0.5507684350013733, "learning_rate": 0.0005062032406627384, "loss": 3.2061, "step": 15841 }, { "epoch": 0.78, "grad_norm": 0.5147064924240112, "learning_rate": 0.0005061920576468488, "loss": 3.3796, "step": 15842 }, { "epoch": 0.78, "grad_norm": 0.5263797044754028, "learning_rate": 0.0005061808740878889, "loss": 3.0018, "step": 15843 }, { "epoch": 0.78, "grad_norm": 0.5236734747886658, "learning_rate": 0.0005061696899858883, "loss": 3.2627, "step": 15844 }, { "epoch": 0.78, "grad_norm": 0.5589058995246887, "learning_rate": 0.0005061585053408764, "loss": 3.3151, "step": 15845 }, { "epoch": 0.78, "grad_norm": 0.5322604179382324, "learning_rate": 0.0005061473201528826, "loss": 3.1628, "step": 15846 }, { "epoch": 0.78, "grad_norm": 0.5170190334320068, "learning_rate": 0.0005061361344219365, "loss": 3.2358, "step": 15847 }, { "epoch": 0.78, "grad_norm": 0.5249111652374268, "learning_rate": 0.0005061249481480675, "loss": 3.1819, "step": 15848 }, { "epoch": 0.78, "grad_norm": 0.5331235527992249, "learning_rate": 0.000506113761331305, "loss": 3.2316, "step": 15849 }, { "epoch": 0.78, "grad_norm": 0.5436180830001831, "learning_rate": 0.0005061025739716786, "loss": 2.9909, "step": 15850 }, { "epoch": 0.78, "grad_norm": 0.5154057145118713, "learning_rate": 0.0005060913860692177, "loss": 3.0543, "step": 15851 }, { "epoch": 0.78, "grad_norm": 0.5232006311416626, "learning_rate": 0.0005060801976239516, "loss": 3.1253, "step": 15852 }, { "epoch": 0.78, "grad_norm": 0.605620801448822, "learning_rate": 0.0005060690086359101, "loss": 3.3018, "step": 15853 }, { "epoch": 0.78, "grad_norm": 0.4980825185775757, "learning_rate": 0.0005060578191051225, "loss": 3.0301, "step": 15854 }, { "epoch": 0.78, "grad_norm": 0.5214237570762634, "learning_rate": 0.000506046629031618, "loss": 3.4541, "step": 15855 }, { "epoch": 0.78, "grad_norm": 0.5480488538742065, "learning_rate": 0.0005060354384154265, "loss": 3.018, "step": 15856 }, { "epoch": 0.78, "grad_norm": 0.5750018358230591, "learning_rate": 0.0005060242472565774, "loss": 3.3321, "step": 15857 }, { "epoch": 0.78, "grad_norm": 0.5459998250007629, "learning_rate": 0.0005060130555550999, "loss": 3.1318, "step": 15858 }, { "epoch": 0.78, "grad_norm": 0.5523403882980347, "learning_rate": 0.0005060018633110238, "loss": 3.311, "step": 15859 }, { "epoch": 0.78, "grad_norm": 0.5640004873275757, "learning_rate": 0.0005059906705243783, "loss": 3.1297, "step": 15860 }, { "epoch": 0.78, "grad_norm": 0.5225830674171448, "learning_rate": 0.0005059794771951931, "loss": 3.2224, "step": 15861 }, { "epoch": 0.78, "grad_norm": 0.49763786792755127, "learning_rate": 0.0005059682833234977, "loss": 3.2197, "step": 15862 }, { "epoch": 0.78, "grad_norm": 0.517906665802002, "learning_rate": 0.0005059570889093214, "loss": 3.1153, "step": 15863 }, { "epoch": 0.78, "grad_norm": 0.5416662096977234, "learning_rate": 0.0005059458939526937, "loss": 3.3586, "step": 15864 }, { "epoch": 0.78, "grad_norm": 0.48512333631515503, "learning_rate": 0.0005059346984536442, "loss": 3.1849, "step": 15865 }, { "epoch": 0.78, "grad_norm": 0.5373478531837463, "learning_rate": 0.0005059235024122024, "loss": 3.0875, "step": 15866 }, { "epoch": 0.78, "grad_norm": 0.4900364577770233, "learning_rate": 0.0005059123058283976, "loss": 3.2715, "step": 15867 }, { "epoch": 0.78, "grad_norm": 0.5320209860801697, "learning_rate": 0.0005059011087022595, "loss": 3.3284, "step": 15868 }, { "epoch": 0.78, "grad_norm": 0.5448189973831177, "learning_rate": 0.0005058899110338175, "loss": 3.1436, "step": 15869 }, { "epoch": 0.78, "grad_norm": 0.5075494647026062, "learning_rate": 0.0005058787128231011, "loss": 3.2013, "step": 15870 }, { "epoch": 0.78, "grad_norm": 0.5199207663536072, "learning_rate": 0.0005058675140701398, "loss": 3.1813, "step": 15871 }, { "epoch": 0.78, "grad_norm": 0.49894481897354126, "learning_rate": 0.000505856314774963, "loss": 3.1914, "step": 15872 }, { "epoch": 0.78, "grad_norm": 0.5516239404678345, "learning_rate": 0.0005058451149376003, "loss": 3.2814, "step": 15873 }, { "epoch": 0.78, "grad_norm": 0.5630885362625122, "learning_rate": 0.0005058339145580813, "loss": 3.219, "step": 15874 }, { "epoch": 0.78, "grad_norm": 0.5175797343254089, "learning_rate": 0.0005058227136364353, "loss": 3.3844, "step": 15875 }, { "epoch": 0.78, "grad_norm": 0.5631835460662842, "learning_rate": 0.0005058115121726918, "loss": 3.1561, "step": 15876 }, { "epoch": 0.78, "grad_norm": 0.5062498450279236, "learning_rate": 0.0005058003101668806, "loss": 3.1483, "step": 15877 }, { "epoch": 0.78, "grad_norm": 0.531692385673523, "learning_rate": 0.0005057891076190309, "loss": 3.3115, "step": 15878 }, { "epoch": 0.78, "grad_norm": 0.491470068693161, "learning_rate": 0.0005057779045291723, "loss": 3.1625, "step": 15879 }, { "epoch": 0.78, "grad_norm": 0.5266138315200806, "learning_rate": 0.0005057667008973341, "loss": 3.2485, "step": 15880 }, { "epoch": 0.78, "grad_norm": 0.5649579763412476, "learning_rate": 0.0005057554967235463, "loss": 3.2064, "step": 15881 }, { "epoch": 0.78, "grad_norm": 0.4945317804813385, "learning_rate": 0.000505744292007838, "loss": 3.077, "step": 15882 }, { "epoch": 0.78, "grad_norm": 0.5235192775726318, "learning_rate": 0.0005057330867502389, "loss": 3.2043, "step": 15883 }, { "epoch": 0.78, "grad_norm": 0.4946229159832001, "learning_rate": 0.0005057218809507782, "loss": 3.0865, "step": 15884 }, { "epoch": 0.78, "grad_norm": 0.5265135169029236, "learning_rate": 0.0005057106746094859, "loss": 3.2467, "step": 15885 }, { "epoch": 0.78, "grad_norm": 0.4959494173526764, "learning_rate": 0.0005056994677263913, "loss": 3.212, "step": 15886 }, { "epoch": 0.78, "grad_norm": 0.5413668155670166, "learning_rate": 0.0005056882603015237, "loss": 3.0447, "step": 15887 }, { "epoch": 0.78, "grad_norm": 0.4981670081615448, "learning_rate": 0.000505677052334913, "loss": 3.1375, "step": 15888 }, { "epoch": 0.78, "grad_norm": 0.5313765406608582, "learning_rate": 0.0005056658438265884, "loss": 3.1866, "step": 15889 }, { "epoch": 0.78, "grad_norm": 0.5187064409255981, "learning_rate": 0.0005056546347765796, "loss": 3.3122, "step": 15890 }, { "epoch": 0.78, "grad_norm": 0.5525301098823547, "learning_rate": 0.0005056434251849161, "loss": 3.1409, "step": 15891 }, { "epoch": 0.78, "grad_norm": 0.5722075700759888, "learning_rate": 0.0005056322150516273, "loss": 3.1696, "step": 15892 }, { "epoch": 0.78, "grad_norm": 0.5104582905769348, "learning_rate": 0.0005056210043767428, "loss": 3.09, "step": 15893 }, { "epoch": 0.78, "grad_norm": 0.5645208358764648, "learning_rate": 0.0005056097931602923, "loss": 3.2814, "step": 15894 }, { "epoch": 0.78, "grad_norm": 0.513430118560791, "learning_rate": 0.000505598581402305, "loss": 3.0936, "step": 15895 }, { "epoch": 0.78, "grad_norm": 0.5425407886505127, "learning_rate": 0.0005055873691028108, "loss": 3.3683, "step": 15896 }, { "epoch": 0.78, "grad_norm": 0.48319244384765625, "learning_rate": 0.0005055761562618388, "loss": 3.1959, "step": 15897 }, { "epoch": 0.78, "grad_norm": 0.518714964389801, "learning_rate": 0.000505564942879419, "loss": 3.0281, "step": 15898 }, { "epoch": 0.78, "grad_norm": 0.5663323998451233, "learning_rate": 0.0005055537289555806, "loss": 3.3935, "step": 15899 }, { "epoch": 0.78, "grad_norm": 0.5486282706260681, "learning_rate": 0.0005055425144903532, "loss": 3.3414, "step": 15900 }, { "epoch": 0.78, "grad_norm": 0.5473208427429199, "learning_rate": 0.0005055312994837664, "loss": 3.3134, "step": 15901 }, { "epoch": 0.78, "grad_norm": 0.5325106382369995, "learning_rate": 0.0005055200839358497, "loss": 3.1135, "step": 15902 }, { "epoch": 0.78, "grad_norm": 0.5353180170059204, "learning_rate": 0.0005055088678466327, "loss": 3.1344, "step": 15903 }, { "epoch": 0.78, "grad_norm": 0.5123947858810425, "learning_rate": 0.0005054976512161449, "loss": 3.0725, "step": 15904 }, { "epoch": 0.78, "grad_norm": 0.5428218245506287, "learning_rate": 0.0005054864340444158, "loss": 3.244, "step": 15905 }, { "epoch": 0.78, "grad_norm": 0.5174368023872375, "learning_rate": 0.000505475216331475, "loss": 3.2212, "step": 15906 }, { "epoch": 0.78, "grad_norm": 0.5281883478164673, "learning_rate": 0.0005054639980773519, "loss": 3.1813, "step": 15907 }, { "epoch": 0.78, "grad_norm": 0.564376175403595, "learning_rate": 0.0005054527792820764, "loss": 3.0156, "step": 15908 }, { "epoch": 0.78, "grad_norm": 0.5363826751708984, "learning_rate": 0.0005054415599456776, "loss": 3.3758, "step": 15909 }, { "epoch": 0.78, "grad_norm": 0.514251708984375, "learning_rate": 0.0005054303400681855, "loss": 3.2013, "step": 15910 }, { "epoch": 0.78, "grad_norm": 0.5030025839805603, "learning_rate": 0.0005054191196496293, "loss": 3.2198, "step": 15911 }, { "epoch": 0.78, "grad_norm": 0.5085748434066772, "learning_rate": 0.0005054078986900387, "loss": 3.2473, "step": 15912 }, { "epoch": 0.78, "grad_norm": 0.5087880492210388, "learning_rate": 0.0005053966771894432, "loss": 3.2511, "step": 15913 }, { "epoch": 0.78, "grad_norm": 0.550105631351471, "learning_rate": 0.0005053854551478723, "loss": 3.0949, "step": 15914 }, { "epoch": 0.78, "grad_norm": 0.5327011942863464, "learning_rate": 0.0005053742325653557, "loss": 3.2427, "step": 15915 }, { "epoch": 0.78, "grad_norm": 0.5084472894668579, "learning_rate": 0.0005053630094419228, "loss": 3.0718, "step": 15916 }, { "epoch": 0.78, "grad_norm": 0.5227506160736084, "learning_rate": 0.0005053517857776035, "loss": 3.1136, "step": 15917 }, { "epoch": 0.78, "grad_norm": 0.5210598707199097, "learning_rate": 0.000505340561572427, "loss": 3.266, "step": 15918 }, { "epoch": 0.78, "grad_norm": 0.5185865759849548, "learning_rate": 0.000505329336826423, "loss": 3.0593, "step": 15919 }, { "epoch": 0.78, "grad_norm": 0.5381935834884644, "learning_rate": 0.0005053181115396209, "loss": 3.1447, "step": 15920 }, { "epoch": 0.78, "grad_norm": 0.5370237231254578, "learning_rate": 0.0005053068857120505, "loss": 3.0868, "step": 15921 }, { "epoch": 0.78, "grad_norm": 0.5445959568023682, "learning_rate": 0.0005052956593437413, "loss": 3.2731, "step": 15922 }, { "epoch": 0.78, "grad_norm": 0.5113406181335449, "learning_rate": 0.0005052844324347228, "loss": 3.2167, "step": 15923 }, { "epoch": 0.78, "grad_norm": 0.5293589234352112, "learning_rate": 0.0005052732049850246, "loss": 3.3285, "step": 15924 }, { "epoch": 0.78, "grad_norm": 0.523661196231842, "learning_rate": 0.0005052619769946764, "loss": 3.2882, "step": 15925 }, { "epoch": 0.78, "grad_norm": 0.5041723251342773, "learning_rate": 0.0005052507484637076, "loss": 3.0888, "step": 15926 }, { "epoch": 0.78, "grad_norm": 0.5257124900817871, "learning_rate": 0.0005052395193921478, "loss": 3.2741, "step": 15927 }, { "epoch": 0.78, "grad_norm": 0.48543840646743774, "learning_rate": 0.0005052282897800266, "loss": 3.3522, "step": 15928 }, { "epoch": 0.78, "grad_norm": 0.5463740825653076, "learning_rate": 0.0005052170596273735, "loss": 3.3716, "step": 15929 }, { "epoch": 0.78, "grad_norm": 0.5675771236419678, "learning_rate": 0.0005052058289342184, "loss": 3.0156, "step": 15930 }, { "epoch": 0.78, "grad_norm": 0.511083722114563, "learning_rate": 0.0005051945977005905, "loss": 3.1242, "step": 15931 }, { "epoch": 0.78, "grad_norm": 0.5357405543327332, "learning_rate": 0.0005051833659265195, "loss": 3.2083, "step": 15932 }, { "epoch": 0.78, "grad_norm": 0.5028762221336365, "learning_rate": 0.000505172133612035, "loss": 3.1726, "step": 15933 }, { "epoch": 0.78, "grad_norm": 0.5303201675415039, "learning_rate": 0.0005051609007571666, "loss": 3.2378, "step": 15934 }, { "epoch": 0.78, "grad_norm": 0.5492925047874451, "learning_rate": 0.0005051496673619439, "loss": 3.3461, "step": 15935 }, { "epoch": 0.78, "grad_norm": 0.5268462896347046, "learning_rate": 0.0005051384334263965, "loss": 3.1529, "step": 15936 }, { "epoch": 0.78, "grad_norm": 0.5270599722862244, "learning_rate": 0.0005051271989505538, "loss": 3.1615, "step": 15937 }, { "epoch": 0.78, "grad_norm": 0.5161546468734741, "learning_rate": 0.0005051159639344456, "loss": 3.2024, "step": 15938 }, { "epoch": 0.78, "grad_norm": 0.5089823603630066, "learning_rate": 0.0005051047283781015, "loss": 3.1198, "step": 15939 }, { "epoch": 0.78, "grad_norm": 0.48041659593582153, "learning_rate": 0.000505093492281551, "loss": 3.3224, "step": 15940 }, { "epoch": 0.78, "grad_norm": 0.5333757400512695, "learning_rate": 0.0005050822556448236, "loss": 3.1934, "step": 15941 }, { "epoch": 0.78, "grad_norm": 0.5357717275619507, "learning_rate": 0.000505071018467949, "loss": 3.2549, "step": 15942 }, { "epoch": 0.78, "grad_norm": 0.5316967368125916, "learning_rate": 0.0005050597807509569, "loss": 3.0993, "step": 15943 }, { "epoch": 0.78, "grad_norm": 0.5037146210670471, "learning_rate": 0.0005050485424938769, "loss": 3.2657, "step": 15944 }, { "epoch": 0.78, "grad_norm": 0.5503755807876587, "learning_rate": 0.0005050373036967384, "loss": 3.2414, "step": 15945 }, { "epoch": 0.78, "grad_norm": 0.5169128179550171, "learning_rate": 0.0005050260643595711, "loss": 2.9953, "step": 15946 }, { "epoch": 0.78, "grad_norm": 0.5049517154693604, "learning_rate": 0.0005050148244824045, "loss": 3.2635, "step": 15947 }, { "epoch": 0.78, "grad_norm": 0.4886893928050995, "learning_rate": 0.0005050035840652684, "loss": 3.1339, "step": 15948 }, { "epoch": 0.78, "grad_norm": 0.5033433437347412, "learning_rate": 0.0005049923431081924, "loss": 3.0855, "step": 15949 }, { "epoch": 0.78, "grad_norm": 0.5404435396194458, "learning_rate": 0.0005049811016112059, "loss": 3.0479, "step": 15950 }, { "epoch": 0.78, "grad_norm": 0.5240478515625, "learning_rate": 0.0005049698595743387, "loss": 3.0661, "step": 15951 }, { "epoch": 0.78, "grad_norm": 0.5057730674743652, "learning_rate": 0.0005049586169976202, "loss": 3.3078, "step": 15952 }, { "epoch": 0.78, "grad_norm": 0.5399793982505798, "learning_rate": 0.0005049473738810803, "loss": 3.0763, "step": 15953 }, { "epoch": 0.78, "grad_norm": 0.4813418984413147, "learning_rate": 0.0005049361302247485, "loss": 3.1835, "step": 15954 }, { "epoch": 0.78, "grad_norm": 0.5555798411369324, "learning_rate": 0.0005049248860286542, "loss": 3.1501, "step": 15955 }, { "epoch": 0.78, "grad_norm": 0.5001389980316162, "learning_rate": 0.0005049136412928273, "loss": 3.143, "step": 15956 }, { "epoch": 0.78, "grad_norm": 0.5168265700340271, "learning_rate": 0.0005049023960172973, "loss": 3.2054, "step": 15957 }, { "epoch": 0.78, "grad_norm": 0.5343358516693115, "learning_rate": 0.0005048911502020938, "loss": 3.292, "step": 15958 }, { "epoch": 0.78, "grad_norm": 0.5781263113021851, "learning_rate": 0.0005048799038472465, "loss": 3.1776, "step": 15959 }, { "epoch": 0.78, "grad_norm": 0.5077539682388306, "learning_rate": 0.0005048686569527848, "loss": 3.1976, "step": 15960 }, { "epoch": 0.78, "grad_norm": 0.5165067315101624, "learning_rate": 0.0005048574095187385, "loss": 3.0778, "step": 15961 }, { "epoch": 0.78, "grad_norm": 0.5727941393852234, "learning_rate": 0.0005048461615451374, "loss": 3.2127, "step": 15962 }, { "epoch": 0.78, "grad_norm": 0.5406970381736755, "learning_rate": 0.0005048349130320108, "loss": 3.2311, "step": 15963 }, { "epoch": 0.78, "grad_norm": 0.5371522903442383, "learning_rate": 0.0005048236639793885, "loss": 3.1551, "step": 15964 }, { "epoch": 0.78, "grad_norm": 0.5899889469146729, "learning_rate": 0.0005048124143873001, "loss": 3.213, "step": 15965 }, { "epoch": 0.78, "grad_norm": 0.5044421553611755, "learning_rate": 0.0005048011642557751, "loss": 3.383, "step": 15966 }, { "epoch": 0.78, "grad_norm": 0.5018906593322754, "learning_rate": 0.0005047899135848435, "loss": 3.4013, "step": 15967 }, { "epoch": 0.78, "grad_norm": 0.47671008110046387, "learning_rate": 0.0005047786623745345, "loss": 3.3241, "step": 15968 }, { "epoch": 0.78, "grad_norm": 0.5087835788726807, "learning_rate": 0.0005047674106248779, "loss": 3.223, "step": 15969 }, { "epoch": 0.78, "grad_norm": 0.5542645454406738, "learning_rate": 0.0005047561583359034, "loss": 3.206, "step": 15970 }, { "epoch": 0.78, "grad_norm": 0.4945680499076843, "learning_rate": 0.0005047449055076407, "loss": 3.1473, "step": 15971 }, { "epoch": 0.78, "grad_norm": 0.4865614175796509, "learning_rate": 0.0005047336521401191, "loss": 3.1573, "step": 15972 }, { "epoch": 0.78, "grad_norm": 0.4855407476425171, "learning_rate": 0.0005047223982333685, "loss": 3.2028, "step": 15973 }, { "epoch": 0.78, "grad_norm": 0.5474462509155273, "learning_rate": 0.0005047111437874186, "loss": 3.4711, "step": 15974 }, { "epoch": 0.78, "grad_norm": 0.5180954933166504, "learning_rate": 0.0005046998888022988, "loss": 3.0933, "step": 15975 }, { "epoch": 0.78, "grad_norm": 0.5082975625991821, "learning_rate": 0.0005046886332780392, "loss": 3.1662, "step": 15976 }, { "epoch": 0.78, "grad_norm": 0.6138001680374146, "learning_rate": 0.0005046773772146688, "loss": 3.0991, "step": 15977 }, { "epoch": 0.78, "grad_norm": 0.5285327434539795, "learning_rate": 0.0005046661206122178, "loss": 3.1361, "step": 15978 }, { "epoch": 0.78, "grad_norm": 0.5196309685707092, "learning_rate": 0.0005046548634707155, "loss": 3.0654, "step": 15979 }, { "epoch": 0.78, "grad_norm": 0.5266782641410828, "learning_rate": 0.0005046436057901917, "loss": 3.2551, "step": 15980 }, { "epoch": 0.78, "grad_norm": 0.5191410183906555, "learning_rate": 0.0005046323475706761, "loss": 3.3328, "step": 15981 }, { "epoch": 0.78, "grad_norm": 0.5638286471366882, "learning_rate": 0.0005046210888121982, "loss": 3.324, "step": 15982 }, { "epoch": 0.78, "grad_norm": 0.5119996666908264, "learning_rate": 0.0005046098295147877, "loss": 3.0339, "step": 15983 }, { "epoch": 0.78, "grad_norm": 0.5803064703941345, "learning_rate": 0.0005045985696784743, "loss": 3.3781, "step": 15984 }, { "epoch": 0.78, "grad_norm": 0.5395102500915527, "learning_rate": 0.0005045873093032878, "loss": 3.4653, "step": 15985 }, { "epoch": 0.78, "grad_norm": 0.508126437664032, "learning_rate": 0.0005045760483892575, "loss": 3.1083, "step": 15986 }, { "epoch": 0.78, "grad_norm": 0.48580315709114075, "learning_rate": 0.0005045647869364134, "loss": 3.0414, "step": 15987 }, { "epoch": 0.78, "grad_norm": 0.5052012205123901, "learning_rate": 0.0005045535249447848, "loss": 2.9321, "step": 15988 }, { "epoch": 0.78, "grad_norm": 0.575672447681427, "learning_rate": 0.0005045422624144019, "loss": 3.2742, "step": 15989 }, { "epoch": 0.78, "grad_norm": 0.5062314867973328, "learning_rate": 0.0005045309993452939, "loss": 3.2034, "step": 15990 }, { "epoch": 0.78, "grad_norm": 0.5185468792915344, "learning_rate": 0.0005045197357374906, "loss": 3.2348, "step": 15991 }, { "epoch": 0.78, "grad_norm": 0.5151081681251526, "learning_rate": 0.0005045084715910216, "loss": 3.2609, "step": 15992 }, { "epoch": 0.78, "grad_norm": 0.4942055642604828, "learning_rate": 0.0005044972069059167, "loss": 3.2212, "step": 15993 }, { "epoch": 0.78, "grad_norm": 0.4843122065067291, "learning_rate": 0.0005044859416822056, "loss": 3.1712, "step": 15994 }, { "epoch": 0.78, "grad_norm": 0.5047922134399414, "learning_rate": 0.0005044746759199178, "loss": 3.2883, "step": 15995 }, { "epoch": 0.78, "grad_norm": 0.5109986066818237, "learning_rate": 0.0005044634096190831, "loss": 2.9292, "step": 15996 }, { "epoch": 0.78, "grad_norm": 0.5134650468826294, "learning_rate": 0.0005044521427797311, "loss": 3.1455, "step": 15997 }, { "epoch": 0.78, "grad_norm": 0.5431228280067444, "learning_rate": 0.0005044408754018915, "loss": 3.1955, "step": 15998 }, { "epoch": 0.78, "grad_norm": 0.5586522817611694, "learning_rate": 0.000504429607485594, "loss": 3.2098, "step": 15999 }, { "epoch": 0.78, "grad_norm": 0.5322924852371216, "learning_rate": 0.0005044183390308682, "loss": 3.3816, "step": 16000 }, { "epoch": 0.78, "grad_norm": 0.49801820516586304, "learning_rate": 0.000504407070037744, "loss": 3.3149, "step": 16001 }, { "epoch": 0.78, "grad_norm": 0.5596066117286682, "learning_rate": 0.0005043958005062506, "loss": 3.1123, "step": 16002 }, { "epoch": 0.78, "grad_norm": 0.5819718241691589, "learning_rate": 0.0005043845304364183, "loss": 3.3048, "step": 16003 }, { "epoch": 0.78, "grad_norm": 0.5312378406524658, "learning_rate": 0.0005043732598282763, "loss": 3.2235, "step": 16004 }, { "epoch": 0.78, "grad_norm": 0.4894445538520813, "learning_rate": 0.0005043619886818546, "loss": 3.2897, "step": 16005 }, { "epoch": 0.78, "grad_norm": 0.5514426827430725, "learning_rate": 0.0005043507169971827, "loss": 3.0689, "step": 16006 }, { "epoch": 0.78, "grad_norm": 0.5011432766914368, "learning_rate": 0.0005043394447742903, "loss": 3.2763, "step": 16007 }, { "epoch": 0.78, "grad_norm": 0.5099719762802124, "learning_rate": 0.0005043281720132072, "loss": 3.2354, "step": 16008 }, { "epoch": 0.78, "grad_norm": 0.5169076919555664, "learning_rate": 0.000504316898713963, "loss": 3.2652, "step": 16009 }, { "epoch": 0.78, "grad_norm": 0.5074393153190613, "learning_rate": 0.0005043056248765875, "loss": 3.3298, "step": 16010 }, { "epoch": 0.78, "grad_norm": 0.5243656635284424, "learning_rate": 0.0005042943505011101, "loss": 3.2304, "step": 16011 }, { "epoch": 0.78, "grad_norm": 0.5197635293006897, "learning_rate": 0.0005042830755875607, "loss": 3.1894, "step": 16012 }, { "epoch": 0.78, "grad_norm": 0.5310289263725281, "learning_rate": 0.0005042718001359691, "loss": 3.3684, "step": 16013 }, { "epoch": 0.78, "grad_norm": 0.532052218914032, "learning_rate": 0.000504260524146365, "loss": 3.1898, "step": 16014 }, { "epoch": 0.78, "grad_norm": 0.5254867672920227, "learning_rate": 0.0005042492476187778, "loss": 3.1261, "step": 16015 }, { "epoch": 0.78, "grad_norm": 0.5229167342185974, "learning_rate": 0.0005042379705532374, "loss": 3.2582, "step": 16016 }, { "epoch": 0.78, "grad_norm": 0.5486670136451721, "learning_rate": 0.0005042266929497736, "loss": 3.1984, "step": 16017 }, { "epoch": 0.79, "grad_norm": 0.5969434380531311, "learning_rate": 0.0005042154148084159, "loss": 3.3827, "step": 16018 }, { "epoch": 0.79, "grad_norm": 0.4866141378879547, "learning_rate": 0.0005042041361291941, "loss": 3.2698, "step": 16019 }, { "epoch": 0.79, "grad_norm": 0.6796237826347351, "learning_rate": 0.000504192856912138, "loss": 2.9635, "step": 16020 }, { "epoch": 0.79, "grad_norm": 0.5028907656669617, "learning_rate": 0.0005041815771572772, "loss": 3.237, "step": 16021 }, { "epoch": 0.79, "grad_norm": 0.5195736885070801, "learning_rate": 0.0005041702968646413, "loss": 3.3098, "step": 16022 }, { "epoch": 0.79, "grad_norm": 0.490695059299469, "learning_rate": 0.0005041590160342603, "loss": 3.2567, "step": 16023 }, { "epoch": 0.79, "grad_norm": 0.5087558031082153, "learning_rate": 0.0005041477346661637, "loss": 3.2146, "step": 16024 }, { "epoch": 0.79, "grad_norm": 0.5431877374649048, "learning_rate": 0.0005041364527603811, "loss": 3.2565, "step": 16025 }, { "epoch": 0.79, "grad_norm": 0.6024651527404785, "learning_rate": 0.0005041251703169425, "loss": 3.3531, "step": 16026 }, { "epoch": 0.79, "grad_norm": 0.5574399828910828, "learning_rate": 0.0005041138873358776, "loss": 3.1318, "step": 16027 }, { "epoch": 0.79, "grad_norm": 0.5236434936523438, "learning_rate": 0.0005041026038172158, "loss": 3.4343, "step": 16028 }, { "epoch": 0.79, "grad_norm": 0.5195509195327759, "learning_rate": 0.0005040913197609871, "loss": 3.097, "step": 16029 }, { "epoch": 0.79, "grad_norm": 0.5851946473121643, "learning_rate": 0.0005040800351672211, "loss": 3.3494, "step": 16030 }, { "epoch": 0.79, "grad_norm": 0.5439422130584717, "learning_rate": 0.0005040687500359476, "loss": 3.1376, "step": 16031 }, { "epoch": 0.79, "grad_norm": 0.5359077453613281, "learning_rate": 0.0005040574643671963, "loss": 3.162, "step": 16032 }, { "epoch": 0.79, "grad_norm": 0.5320112705230713, "learning_rate": 0.0005040461781609969, "loss": 3.3413, "step": 16033 }, { "epoch": 0.79, "grad_norm": 0.503227174282074, "learning_rate": 0.0005040348914173791, "loss": 3.2933, "step": 16034 }, { "epoch": 0.79, "grad_norm": 0.5737831592559814, "learning_rate": 0.0005040236041363728, "loss": 3.3733, "step": 16035 }, { "epoch": 0.79, "grad_norm": 0.4747275114059448, "learning_rate": 0.0005040123163180075, "loss": 3.1553, "step": 16036 }, { "epoch": 0.79, "grad_norm": 0.5528791546821594, "learning_rate": 0.0005040010279623129, "loss": 3.1915, "step": 16037 }, { "epoch": 0.79, "grad_norm": 0.526457667350769, "learning_rate": 0.000503989739069319, "loss": 3.3059, "step": 16038 }, { "epoch": 0.79, "grad_norm": 0.5216947197914124, "learning_rate": 0.0005039784496390554, "loss": 3.1353, "step": 16039 }, { "epoch": 0.79, "grad_norm": 0.5180486440658569, "learning_rate": 0.0005039671596715517, "loss": 3.0681, "step": 16040 }, { "epoch": 0.79, "grad_norm": 0.5172383785247803, "learning_rate": 0.0005039558691668378, "loss": 3.0283, "step": 16041 }, { "epoch": 0.79, "grad_norm": 0.5042517185211182, "learning_rate": 0.0005039445781249435, "loss": 3.1366, "step": 16042 }, { "epoch": 0.79, "grad_norm": 0.546276330947876, "learning_rate": 0.0005039332865458983, "loss": 3.0346, "step": 16043 }, { "epoch": 0.79, "grad_norm": 0.521732747554779, "learning_rate": 0.0005039219944297321, "loss": 3.2085, "step": 16044 }, { "epoch": 0.79, "grad_norm": 0.46991196274757385, "learning_rate": 0.0005039107017764747, "loss": 3.128, "step": 16045 }, { "epoch": 0.79, "grad_norm": 0.5190024971961975, "learning_rate": 0.0005038994085861556, "loss": 3.1441, "step": 16046 }, { "epoch": 0.79, "grad_norm": 0.5141576528549194, "learning_rate": 0.0005038881148588048, "loss": 2.9767, "step": 16047 }, { "epoch": 0.79, "grad_norm": 0.5428609848022461, "learning_rate": 0.000503876820594452, "loss": 3.0003, "step": 16048 }, { "epoch": 0.79, "grad_norm": 0.5093159675598145, "learning_rate": 0.0005038655257931269, "loss": 3.1081, "step": 16049 }, { "epoch": 0.79, "grad_norm": 0.5302721858024597, "learning_rate": 0.0005038542304548591, "loss": 3.2037, "step": 16050 }, { "epoch": 0.79, "grad_norm": 0.526119589805603, "learning_rate": 0.0005038429345796785, "loss": 3.4543, "step": 16051 }, { "epoch": 0.79, "grad_norm": 0.5228065848350525, "learning_rate": 0.0005038316381676149, "loss": 3.1163, "step": 16052 }, { "epoch": 0.79, "grad_norm": 0.5292603969573975, "learning_rate": 0.000503820341218698, "loss": 3.2189, "step": 16053 }, { "epoch": 0.79, "grad_norm": 0.5633078813552856, "learning_rate": 0.0005038090437329575, "loss": 3.1288, "step": 16054 }, { "epoch": 0.79, "grad_norm": 0.5860145092010498, "learning_rate": 0.0005037977457104233, "loss": 3.2443, "step": 16055 }, { "epoch": 0.79, "grad_norm": 0.614590048789978, "learning_rate": 0.000503786447151125, "loss": 3.0993, "step": 16056 }, { "epoch": 0.79, "grad_norm": 0.502687394618988, "learning_rate": 0.0005037751480550924, "loss": 3.015, "step": 16057 }, { "epoch": 0.79, "grad_norm": 0.5312213897705078, "learning_rate": 0.0005037638484223553, "loss": 3.0935, "step": 16058 }, { "epoch": 0.79, "grad_norm": 0.5091737508773804, "learning_rate": 0.0005037525482529435, "loss": 3.1461, "step": 16059 }, { "epoch": 0.79, "grad_norm": 0.5732948780059814, "learning_rate": 0.0005037412475468865, "loss": 3.1839, "step": 16060 }, { "epoch": 0.79, "grad_norm": 0.5322034955024719, "learning_rate": 0.0005037299463042144, "loss": 3.2584, "step": 16061 }, { "epoch": 0.79, "grad_norm": 0.5124638080596924, "learning_rate": 0.0005037186445249568, "loss": 3.2116, "step": 16062 }, { "epoch": 0.79, "grad_norm": 0.5579886436462402, "learning_rate": 0.0005037073422091436, "loss": 3.1975, "step": 16063 }, { "epoch": 0.79, "grad_norm": 0.5009481310844421, "learning_rate": 0.0005036960393568044, "loss": 3.136, "step": 16064 }, { "epoch": 0.79, "grad_norm": 0.5488647818565369, "learning_rate": 0.000503684735967969, "loss": 3.0356, "step": 16065 }, { "epoch": 0.79, "grad_norm": 0.5193225145339966, "learning_rate": 0.0005036734320426672, "loss": 3.2853, "step": 16066 }, { "epoch": 0.79, "grad_norm": 0.5488074421882629, "learning_rate": 0.0005036621275809288, "loss": 3.1645, "step": 16067 }, { "epoch": 0.79, "grad_norm": 0.6234692335128784, "learning_rate": 0.0005036508225827836, "loss": 2.9019, "step": 16068 }, { "epoch": 0.79, "grad_norm": 0.48300352692604065, "learning_rate": 0.0005036395170482613, "loss": 3.0794, "step": 16069 }, { "epoch": 0.79, "grad_norm": 0.5437573194503784, "learning_rate": 0.0005036282109773917, "loss": 3.2105, "step": 16070 }, { "epoch": 0.79, "grad_norm": 0.6625828742980957, "learning_rate": 0.0005036169043702045, "loss": 3.2494, "step": 16071 }, { "epoch": 0.79, "grad_norm": 0.6107515096664429, "learning_rate": 0.0005036055972267296, "loss": 3.2419, "step": 16072 }, { "epoch": 0.79, "grad_norm": 0.5302785038948059, "learning_rate": 0.0005035942895469967, "loss": 3.1094, "step": 16073 }, { "epoch": 0.79, "grad_norm": 0.6154295802116394, "learning_rate": 0.0005035829813310358, "loss": 3.1699, "step": 16074 }, { "epoch": 0.79, "grad_norm": 0.6206402778625488, "learning_rate": 0.0005035716725788763, "loss": 3.1178, "step": 16075 }, { "epoch": 0.79, "grad_norm": 0.52013099193573, "learning_rate": 0.0005035603632905484, "loss": 3.0002, "step": 16076 }, { "epoch": 0.79, "grad_norm": 0.4945783019065857, "learning_rate": 0.0005035490534660816, "loss": 3.071, "step": 16077 }, { "epoch": 0.79, "grad_norm": 0.509825587272644, "learning_rate": 0.0005035377431055057, "loss": 3.2746, "step": 16078 }, { "epoch": 0.79, "grad_norm": 0.49139195680618286, "learning_rate": 0.0005035264322088505, "loss": 3.2474, "step": 16079 }, { "epoch": 0.79, "grad_norm": 0.5229079127311707, "learning_rate": 0.000503515120776146, "loss": 3.2264, "step": 16080 }, { "epoch": 0.79, "grad_norm": 0.5331903696060181, "learning_rate": 0.0005035038088074217, "loss": 3.4112, "step": 16081 }, { "epoch": 0.79, "grad_norm": 0.5611699223518372, "learning_rate": 0.0005034924963027077, "loss": 3.0743, "step": 16082 }, { "epoch": 0.79, "grad_norm": 0.504478394985199, "learning_rate": 0.0005034811832620335, "loss": 3.1197, "step": 16083 }, { "epoch": 0.79, "grad_norm": 0.5261973142623901, "learning_rate": 0.0005034698696854291, "loss": 3.1517, "step": 16084 }, { "epoch": 0.79, "grad_norm": 0.5199773907661438, "learning_rate": 0.0005034585555729243, "loss": 3.3782, "step": 16085 }, { "epoch": 0.79, "grad_norm": 0.4901329576969147, "learning_rate": 0.0005034472409245485, "loss": 3.1992, "step": 16086 }, { "epoch": 0.79, "grad_norm": 0.5331196784973145, "learning_rate": 0.0005034359257403322, "loss": 3.1904, "step": 16087 }, { "epoch": 0.79, "grad_norm": 0.5385162830352783, "learning_rate": 0.0005034246100203046, "loss": 2.9416, "step": 16088 }, { "epoch": 0.79, "grad_norm": 0.5102599263191223, "learning_rate": 0.0005034132937644958, "loss": 3.332, "step": 16089 }, { "epoch": 0.79, "grad_norm": 0.5430083274841309, "learning_rate": 0.0005034019769729355, "loss": 3.0467, "step": 16090 }, { "epoch": 0.79, "grad_norm": 0.563103437423706, "learning_rate": 0.0005033906596456535, "loss": 3.1664, "step": 16091 }, { "epoch": 0.79, "grad_norm": 0.5284902453422546, "learning_rate": 0.0005033793417826797, "loss": 3.4442, "step": 16092 }, { "epoch": 0.79, "grad_norm": 0.477546364068985, "learning_rate": 0.0005033680233840439, "loss": 3.2581, "step": 16093 }, { "epoch": 0.79, "grad_norm": 0.5075785517692566, "learning_rate": 0.0005033567044497758, "loss": 3.1257, "step": 16094 }, { "epoch": 0.79, "grad_norm": 0.5229860544204712, "learning_rate": 0.0005033453849799053, "loss": 3.2537, "step": 16095 }, { "epoch": 0.79, "grad_norm": 0.5341659188270569, "learning_rate": 0.0005033340649744621, "loss": 3.09, "step": 16096 }, { "epoch": 0.79, "grad_norm": 0.5053170323371887, "learning_rate": 0.0005033227444334763, "loss": 3.4498, "step": 16097 }, { "epoch": 0.79, "grad_norm": 0.5055860877037048, "learning_rate": 0.0005033114233569774, "loss": 2.9695, "step": 16098 }, { "epoch": 0.79, "grad_norm": 0.4993264377117157, "learning_rate": 0.0005033001017449953, "loss": 3.0594, "step": 16099 }, { "epoch": 0.79, "grad_norm": 0.49798890948295593, "learning_rate": 0.00050328877959756, "loss": 3.2009, "step": 16100 }, { "epoch": 0.79, "grad_norm": 0.5068756341934204, "learning_rate": 0.000503277456914701, "loss": 3.3866, "step": 16101 }, { "epoch": 0.79, "grad_norm": 0.5351049900054932, "learning_rate": 0.0005032661336964484, "loss": 3.0999, "step": 16102 }, { "epoch": 0.79, "grad_norm": 0.49359703063964844, "learning_rate": 0.000503254809942832, "loss": 3.1928, "step": 16103 }, { "epoch": 0.79, "grad_norm": 0.526689887046814, "learning_rate": 0.0005032434856538814, "loss": 3.1952, "step": 16104 }, { "epoch": 0.79, "grad_norm": 0.5129401683807373, "learning_rate": 0.0005032321608296266, "loss": 2.8986, "step": 16105 }, { "epoch": 0.79, "grad_norm": 0.5190664529800415, "learning_rate": 0.0005032208354700974, "loss": 3.1921, "step": 16106 }, { "epoch": 0.79, "grad_norm": 0.5858680605888367, "learning_rate": 0.0005032095095753237, "loss": 3.2601, "step": 16107 }, { "epoch": 0.79, "grad_norm": 0.5072818994522095, "learning_rate": 0.0005031981831453352, "loss": 3.1682, "step": 16108 }, { "epoch": 0.79, "grad_norm": 0.5385854244232178, "learning_rate": 0.0005031868561801618, "loss": 3.2302, "step": 16109 }, { "epoch": 0.79, "grad_norm": 0.49237823486328125, "learning_rate": 0.0005031755286798333, "loss": 3.4822, "step": 16110 }, { "epoch": 0.79, "grad_norm": 0.5026249885559082, "learning_rate": 0.0005031642006443796, "loss": 3.3075, "step": 16111 }, { "epoch": 0.79, "grad_norm": 0.5177112817764282, "learning_rate": 0.0005031528720738306, "loss": 3.2019, "step": 16112 }, { "epoch": 0.79, "grad_norm": 0.5290037989616394, "learning_rate": 0.0005031415429682159, "loss": 3.0193, "step": 16113 }, { "epoch": 0.79, "grad_norm": 0.5207205414772034, "learning_rate": 0.0005031302133275655, "loss": 2.7481, "step": 16114 }, { "epoch": 0.79, "grad_norm": 0.5251289010047913, "learning_rate": 0.0005031188831519091, "loss": 3.2789, "step": 16115 }, { "epoch": 0.79, "grad_norm": 0.5037059187889099, "learning_rate": 0.0005031075524412769, "loss": 3.1427, "step": 16116 }, { "epoch": 0.79, "grad_norm": 0.509303629398346, "learning_rate": 0.0005030962211956982, "loss": 3.4196, "step": 16117 }, { "epoch": 0.79, "grad_norm": 0.47990882396698, "learning_rate": 0.0005030848894152034, "loss": 3.1945, "step": 16118 }, { "epoch": 0.79, "grad_norm": 0.5095180869102478, "learning_rate": 0.0005030735570998219, "loss": 3.3179, "step": 16119 }, { "epoch": 0.79, "grad_norm": 0.5448658466339111, "learning_rate": 0.0005030622242495839, "loss": 3.3563, "step": 16120 }, { "epoch": 0.79, "grad_norm": 0.5198306441307068, "learning_rate": 0.000503050890864519, "loss": 3.1658, "step": 16121 }, { "epoch": 0.79, "grad_norm": 0.5410298109054565, "learning_rate": 0.0005030395569446571, "loss": 3.2453, "step": 16122 }, { "epoch": 0.79, "grad_norm": 0.5249944925308228, "learning_rate": 0.0005030282224900281, "loss": 3.1404, "step": 16123 }, { "epoch": 0.79, "grad_norm": 0.5237621068954468, "learning_rate": 0.0005030168875006618, "loss": 3.1242, "step": 16124 }, { "epoch": 0.79, "grad_norm": 0.5496729016304016, "learning_rate": 0.0005030055519765882, "loss": 3.0199, "step": 16125 }, { "epoch": 0.79, "grad_norm": 0.5271469950675964, "learning_rate": 0.000502994215917837, "loss": 3.123, "step": 16126 }, { "epoch": 0.79, "grad_norm": 0.5577632188796997, "learning_rate": 0.000502982879324438, "loss": 3.1257, "step": 16127 }, { "epoch": 0.79, "grad_norm": 0.498698353767395, "learning_rate": 0.0005029715421964213, "loss": 3.3184, "step": 16128 }, { "epoch": 0.79, "grad_norm": 0.544201135635376, "learning_rate": 0.0005029602045338166, "loss": 2.9086, "step": 16129 }, { "epoch": 0.79, "grad_norm": 0.5301446914672852, "learning_rate": 0.0005029488663366538, "loss": 3.4906, "step": 16130 }, { "epoch": 0.79, "grad_norm": 0.5524643063545227, "learning_rate": 0.0005029375276049626, "loss": 3.4142, "step": 16131 }, { "epoch": 0.79, "grad_norm": 0.5376681685447693, "learning_rate": 0.0005029261883387732, "loss": 3.254, "step": 16132 }, { "epoch": 0.79, "grad_norm": 0.5098219513893127, "learning_rate": 0.000502914848538115, "loss": 3.3139, "step": 16133 }, { "epoch": 0.79, "grad_norm": 0.5753784775733948, "learning_rate": 0.0005029035082030184, "loss": 3.2448, "step": 16134 }, { "epoch": 0.79, "grad_norm": 0.49853768944740295, "learning_rate": 0.0005028921673335129, "loss": 3.2964, "step": 16135 }, { "epoch": 0.79, "grad_norm": 0.5184651017189026, "learning_rate": 0.0005028808259296285, "loss": 3.2924, "step": 16136 }, { "epoch": 0.79, "grad_norm": 0.5892463326454163, "learning_rate": 0.000502869483991395, "loss": 3.0275, "step": 16137 }, { "epoch": 0.79, "grad_norm": 0.4991600811481476, "learning_rate": 0.0005028581415188425, "loss": 3.1359, "step": 16138 }, { "epoch": 0.79, "grad_norm": 0.49838724732398987, "learning_rate": 0.0005028467985120006, "loss": 3.1147, "step": 16139 }, { "epoch": 0.79, "grad_norm": 0.5089886784553528, "learning_rate": 0.0005028354549708992, "loss": 2.9585, "step": 16140 }, { "epoch": 0.79, "grad_norm": 0.52250075340271, "learning_rate": 0.0005028241108955683, "loss": 3.3139, "step": 16141 }, { "epoch": 0.79, "grad_norm": 0.5590442419052124, "learning_rate": 0.0005028127662860376, "loss": 3.0677, "step": 16142 }, { "epoch": 0.79, "grad_norm": 0.5002887845039368, "learning_rate": 0.0005028014211423372, "loss": 3.018, "step": 16143 }, { "epoch": 0.79, "grad_norm": 0.5314575433731079, "learning_rate": 0.000502790075464497, "loss": 3.2391, "step": 16144 }, { "epoch": 0.79, "grad_norm": 0.4909334182739258, "learning_rate": 0.0005027787292525468, "loss": 3.0808, "step": 16145 }, { "epoch": 0.79, "grad_norm": 0.5140949487686157, "learning_rate": 0.0005027673825065163, "loss": 3.1941, "step": 16146 }, { "epoch": 0.79, "grad_norm": 0.49293190240859985, "learning_rate": 0.0005027560352264355, "loss": 3.0625, "step": 16147 }, { "epoch": 0.79, "grad_norm": 0.5335683822631836, "learning_rate": 0.0005027446874123344, "loss": 3.1745, "step": 16148 }, { "epoch": 0.79, "grad_norm": 0.5320890545845032, "learning_rate": 0.0005027333390642428, "loss": 3.0896, "step": 16149 }, { "epoch": 0.79, "grad_norm": 0.5180721879005432, "learning_rate": 0.0005027219901821906, "loss": 3.2275, "step": 16150 }, { "epoch": 0.79, "grad_norm": 0.5077602863311768, "learning_rate": 0.0005027106407662077, "loss": 2.9512, "step": 16151 }, { "epoch": 0.79, "grad_norm": 0.5526420474052429, "learning_rate": 0.0005026992908163239, "loss": 3.0982, "step": 16152 }, { "epoch": 0.79, "grad_norm": 0.5293105840682983, "learning_rate": 0.0005026879403325693, "loss": 3.2531, "step": 16153 }, { "epoch": 0.79, "grad_norm": 0.4970487058162689, "learning_rate": 0.0005026765893149736, "loss": 3.2379, "step": 16154 }, { "epoch": 0.79, "grad_norm": 0.523263156414032, "learning_rate": 0.0005026652377635669, "loss": 3.3632, "step": 16155 }, { "epoch": 0.79, "grad_norm": 0.5170415639877319, "learning_rate": 0.0005026538856783789, "loss": 3.139, "step": 16156 }, { "epoch": 0.79, "grad_norm": 0.48945000767707825, "learning_rate": 0.0005026425330594394, "loss": 3.1406, "step": 16157 }, { "epoch": 0.79, "grad_norm": 0.5514994859695435, "learning_rate": 0.0005026311799067786, "loss": 2.9625, "step": 16158 }, { "epoch": 0.79, "grad_norm": 0.5217519402503967, "learning_rate": 0.0005026198262204263, "loss": 3.0109, "step": 16159 }, { "epoch": 0.79, "grad_norm": 0.5658161044120789, "learning_rate": 0.0005026084720004125, "loss": 2.9256, "step": 16160 }, { "epoch": 0.79, "grad_norm": 0.5193248391151428, "learning_rate": 0.0005025971172467667, "loss": 3.2494, "step": 16161 }, { "epoch": 0.79, "grad_norm": 0.5272179841995239, "learning_rate": 0.0005025857619595193, "loss": 3.312, "step": 16162 }, { "epoch": 0.79, "grad_norm": 0.4956285059452057, "learning_rate": 0.0005025744061386998, "loss": 3.2104, "step": 16163 }, { "epoch": 0.79, "grad_norm": 0.521141767501831, "learning_rate": 0.0005025630497843385, "loss": 2.9773, "step": 16164 }, { "epoch": 0.79, "grad_norm": 0.525787353515625, "learning_rate": 0.000502551692896465, "loss": 3.2225, "step": 16165 }, { "epoch": 0.79, "grad_norm": 0.5088430643081665, "learning_rate": 0.0005025403354751093, "loss": 3.1288, "step": 16166 }, { "epoch": 0.79, "grad_norm": 0.5418877005577087, "learning_rate": 0.0005025289775203013, "loss": 3.2467, "step": 16167 }, { "epoch": 0.79, "grad_norm": 0.5093033313751221, "learning_rate": 0.0005025176190320711, "loss": 3.4795, "step": 16168 }, { "epoch": 0.79, "grad_norm": 0.5372544527053833, "learning_rate": 0.0005025062600104483, "loss": 3.1049, "step": 16169 }, { "epoch": 0.79, "grad_norm": 0.5223720669746399, "learning_rate": 0.0005024949004554632, "loss": 3.1735, "step": 16170 }, { "epoch": 0.79, "grad_norm": 0.5225620865821838, "learning_rate": 0.0005024835403671453, "loss": 3.1505, "step": 16171 }, { "epoch": 0.79, "grad_norm": 0.478676974773407, "learning_rate": 0.0005024721797455249, "loss": 3.1069, "step": 16172 }, { "epoch": 0.79, "grad_norm": 0.5180579423904419, "learning_rate": 0.0005024608185906315, "loss": 3.0396, "step": 16173 }, { "epoch": 0.79, "grad_norm": 0.5308378338813782, "learning_rate": 0.0005024494569024954, "loss": 3.204, "step": 16174 }, { "epoch": 0.79, "grad_norm": 0.5326209664344788, "learning_rate": 0.0005024380946811464, "loss": 3.0794, "step": 16175 }, { "epoch": 0.79, "grad_norm": 0.5528416037559509, "learning_rate": 0.0005024267319266144, "loss": 3.3221, "step": 16176 }, { "epoch": 0.79, "grad_norm": 0.5260026454925537, "learning_rate": 0.0005024153686389295, "loss": 2.9779, "step": 16177 }, { "epoch": 0.79, "grad_norm": 0.5304065346717834, "learning_rate": 0.0005024040048181213, "loss": 3.1639, "step": 16178 }, { "epoch": 0.79, "grad_norm": 0.5245885252952576, "learning_rate": 0.0005023926404642199, "loss": 3.2939, "step": 16179 }, { "epoch": 0.79, "grad_norm": 0.47927168011665344, "learning_rate": 0.0005023812755772553, "loss": 3.2998, "step": 16180 }, { "epoch": 0.79, "grad_norm": 0.5522112250328064, "learning_rate": 0.0005023699101572574, "loss": 3.3079, "step": 16181 }, { "epoch": 0.79, "grad_norm": 0.47725212574005127, "learning_rate": 0.000502358544204256, "loss": 3.0599, "step": 16182 }, { "epoch": 0.79, "grad_norm": 0.5390349626541138, "learning_rate": 0.0005023471777182813, "loss": 3.2281, "step": 16183 }, { "epoch": 0.79, "grad_norm": 0.5163893699645996, "learning_rate": 0.0005023358106993629, "loss": 2.8327, "step": 16184 }, { "epoch": 0.79, "grad_norm": 0.5547180771827698, "learning_rate": 0.000502324443147531, "loss": 3.1906, "step": 16185 }, { "epoch": 0.79, "grad_norm": 0.5237986445426941, "learning_rate": 0.0005023130750628154, "loss": 3.336, "step": 16186 }, { "epoch": 0.79, "grad_norm": 0.493787944316864, "learning_rate": 0.0005023017064452462, "loss": 3.0081, "step": 16187 }, { "epoch": 0.79, "grad_norm": 0.5115264654159546, "learning_rate": 0.0005022903372948531, "loss": 3.181, "step": 16188 }, { "epoch": 0.79, "grad_norm": 0.5147324204444885, "learning_rate": 0.0005022789676116663, "loss": 2.998, "step": 16189 }, { "epoch": 0.79, "grad_norm": 0.5190193057060242, "learning_rate": 0.0005022675973957156, "loss": 3.2676, "step": 16190 }, { "epoch": 0.79, "grad_norm": 0.522584855556488, "learning_rate": 0.000502256226647031, "loss": 3.1966, "step": 16191 }, { "epoch": 0.79, "grad_norm": 0.5173296332359314, "learning_rate": 0.0005022448553656424, "loss": 3.175, "step": 16192 }, { "epoch": 0.79, "grad_norm": 0.5209254622459412, "learning_rate": 0.0005022334835515797, "loss": 3.113, "step": 16193 }, { "epoch": 0.79, "grad_norm": 0.5002453923225403, "learning_rate": 0.000502222111204873, "loss": 3.1706, "step": 16194 }, { "epoch": 0.79, "grad_norm": 0.5135509371757507, "learning_rate": 0.0005022107383255522, "loss": 3.2399, "step": 16195 }, { "epoch": 0.79, "grad_norm": 0.5734914541244507, "learning_rate": 0.0005021993649136472, "loss": 2.9791, "step": 16196 }, { "epoch": 0.79, "grad_norm": 0.5258508324623108, "learning_rate": 0.000502187990969188, "loss": 2.8981, "step": 16197 }, { "epoch": 0.79, "grad_norm": 0.528937816619873, "learning_rate": 0.0005021766164922044, "loss": 2.9982, "step": 16198 }, { "epoch": 0.79, "grad_norm": 0.559468686580658, "learning_rate": 0.0005021652414827268, "loss": 3.1467, "step": 16199 }, { "epoch": 0.79, "grad_norm": 0.5440381169319153, "learning_rate": 0.0005021538659407845, "loss": 3.1731, "step": 16200 }, { "epoch": 0.79, "grad_norm": 0.5404337048530579, "learning_rate": 0.0005021424898664081, "loss": 3.2169, "step": 16201 }, { "epoch": 0.79, "grad_norm": 0.5026121735572815, "learning_rate": 0.0005021311132596272, "loss": 3.211, "step": 16202 }, { "epoch": 0.79, "grad_norm": 0.5181695818901062, "learning_rate": 0.0005021197361204719, "loss": 3.1374, "step": 16203 }, { "epoch": 0.79, "grad_norm": 0.5274845361709595, "learning_rate": 0.0005021083584489719, "loss": 3.118, "step": 16204 }, { "epoch": 0.79, "grad_norm": 0.5087721943855286, "learning_rate": 0.0005020969802451577, "loss": 3.1625, "step": 16205 }, { "epoch": 0.79, "grad_norm": 0.4713067412376404, "learning_rate": 0.0005020856015090587, "loss": 3.0954, "step": 16206 }, { "epoch": 0.79, "grad_norm": 0.5224311351776123, "learning_rate": 0.0005020742222407053, "loss": 3.1967, "step": 16207 }, { "epoch": 0.79, "grad_norm": 0.5140992403030396, "learning_rate": 0.0005020628424401272, "loss": 3.3444, "step": 16208 }, { "epoch": 0.79, "grad_norm": 0.568848729133606, "learning_rate": 0.0005020514621073544, "loss": 3.2186, "step": 16209 }, { "epoch": 0.79, "grad_norm": 0.5110242962837219, "learning_rate": 0.000502040081242417, "loss": 3.1196, "step": 16210 }, { "epoch": 0.79, "grad_norm": 0.5247518420219421, "learning_rate": 0.0005020286998453449, "loss": 3.2261, "step": 16211 }, { "epoch": 0.79, "grad_norm": 0.4877007007598877, "learning_rate": 0.0005020173179161681, "loss": 3.0439, "step": 16212 }, { "epoch": 0.79, "grad_norm": 0.6002936363220215, "learning_rate": 0.0005020059354549166, "loss": 3.2361, "step": 16213 }, { "epoch": 0.79, "grad_norm": 0.5274139642715454, "learning_rate": 0.0005019945524616203, "loss": 2.848, "step": 16214 }, { "epoch": 0.79, "grad_norm": 0.4768544137477875, "learning_rate": 0.0005019831689363091, "loss": 3.186, "step": 16215 }, { "epoch": 0.79, "grad_norm": 0.7357099652290344, "learning_rate": 0.0005019717848790133, "loss": 3.1149, "step": 16216 }, { "epoch": 0.79, "grad_norm": 0.4986879825592041, "learning_rate": 0.0005019604002897626, "loss": 3.2408, "step": 16217 }, { "epoch": 0.79, "grad_norm": 0.5395023822784424, "learning_rate": 0.0005019490151685871, "loss": 3.1948, "step": 16218 }, { "epoch": 0.79, "grad_norm": 0.5745142102241516, "learning_rate": 0.0005019376295155167, "loss": 3.1383, "step": 16219 }, { "epoch": 0.79, "grad_norm": 0.5301345586776733, "learning_rate": 0.0005019262433305815, "loss": 3.2496, "step": 16220 }, { "epoch": 0.79, "grad_norm": 0.4867192208766937, "learning_rate": 0.0005019148566138115, "loss": 3.2222, "step": 16221 }, { "epoch": 0.8, "grad_norm": 0.5403074622154236, "learning_rate": 0.0005019034693652365, "loss": 3.1839, "step": 16222 }, { "epoch": 0.8, "grad_norm": 0.5404332280158997, "learning_rate": 0.0005018920815848866, "loss": 3.1374, "step": 16223 }, { "epoch": 0.8, "grad_norm": 0.49627020955085754, "learning_rate": 0.0005018806932727919, "loss": 3.1589, "step": 16224 }, { "epoch": 0.8, "grad_norm": 0.516038179397583, "learning_rate": 0.0005018693044289823, "loss": 3.1485, "step": 16225 }, { "epoch": 0.8, "grad_norm": 0.49638882279396057, "learning_rate": 0.0005018579150534878, "loss": 3.1552, "step": 16226 }, { "epoch": 0.8, "grad_norm": 0.4918712079524994, "learning_rate": 0.0005018465251463384, "loss": 3.3042, "step": 16227 }, { "epoch": 0.8, "grad_norm": 0.4965533912181854, "learning_rate": 0.0005018351347075641, "loss": 2.9956, "step": 16228 }, { "epoch": 0.8, "grad_norm": 0.49797412753105164, "learning_rate": 0.0005018237437371949, "loss": 3.0222, "step": 16229 }, { "epoch": 0.8, "grad_norm": 0.5310698747634888, "learning_rate": 0.0005018123522352607, "loss": 3.1327, "step": 16230 }, { "epoch": 0.8, "grad_norm": 0.4971972107887268, "learning_rate": 0.0005018009602017918, "loss": 3.1433, "step": 16231 }, { "epoch": 0.8, "grad_norm": 0.5360703468322754, "learning_rate": 0.0005017895676368179, "loss": 3.1121, "step": 16232 }, { "epoch": 0.8, "grad_norm": 0.5207992196083069, "learning_rate": 0.0005017781745403692, "loss": 2.9475, "step": 16233 }, { "epoch": 0.8, "grad_norm": 0.49780797958374023, "learning_rate": 0.0005017667809124755, "loss": 3.2944, "step": 16234 }, { "epoch": 0.8, "grad_norm": 0.5747670531272888, "learning_rate": 0.0005017553867531669, "loss": 3.1795, "step": 16235 }, { "epoch": 0.8, "grad_norm": 0.5032791495323181, "learning_rate": 0.0005017439920624735, "loss": 3.4004, "step": 16236 }, { "epoch": 0.8, "grad_norm": 0.5304082036018372, "learning_rate": 0.0005017325968404252, "loss": 3.2782, "step": 16237 }, { "epoch": 0.8, "grad_norm": 0.5240580439567566, "learning_rate": 0.0005017212010870522, "loss": 2.8734, "step": 16238 }, { "epoch": 0.8, "grad_norm": 0.4911217987537384, "learning_rate": 0.0005017098048023843, "loss": 3.3212, "step": 16239 }, { "epoch": 0.8, "grad_norm": 0.5378339290618896, "learning_rate": 0.0005016984079864516, "loss": 3.1136, "step": 16240 }, { "epoch": 0.8, "grad_norm": 0.5092733502388, "learning_rate": 0.0005016870106392841, "loss": 3.0082, "step": 16241 }, { "epoch": 0.8, "grad_norm": 0.5283250212669373, "learning_rate": 0.0005016756127609118, "loss": 3.0912, "step": 16242 }, { "epoch": 0.8, "grad_norm": 0.5703310966491699, "learning_rate": 0.0005016642143513649, "loss": 3.0603, "step": 16243 }, { "epoch": 0.8, "grad_norm": 0.48841413855552673, "learning_rate": 0.0005016528154106731, "loss": 3.4069, "step": 16244 }, { "epoch": 0.8, "grad_norm": 0.634024977684021, "learning_rate": 0.0005016414159388667, "loss": 3.2679, "step": 16245 }, { "epoch": 0.8, "grad_norm": 0.497964084148407, "learning_rate": 0.0005016300159359755, "loss": 3.315, "step": 16246 }, { "epoch": 0.8, "grad_norm": 0.5449110865592957, "learning_rate": 0.0005016186154020297, "loss": 3.36, "step": 16247 }, { "epoch": 0.8, "grad_norm": 0.5338404774665833, "learning_rate": 0.0005016072143370593, "loss": 3.0915, "step": 16248 }, { "epoch": 0.8, "grad_norm": 0.5039170980453491, "learning_rate": 0.0005015958127410942, "loss": 3.1784, "step": 16249 }, { "epoch": 0.8, "grad_norm": 0.49808257818222046, "learning_rate": 0.0005015844106141648, "loss": 3.3481, "step": 16250 }, { "epoch": 0.8, "grad_norm": 0.555994987487793, "learning_rate": 0.0005015730079563006, "loss": 3.2594, "step": 16251 }, { "epoch": 0.8, "grad_norm": 0.5516330003738403, "learning_rate": 0.0005015616047675319, "loss": 3.1797, "step": 16252 }, { "epoch": 0.8, "grad_norm": 0.523496150970459, "learning_rate": 0.0005015502010478889, "loss": 3.1193, "step": 16253 }, { "epoch": 0.8, "grad_norm": 0.5190884470939636, "learning_rate": 0.0005015387967974012, "loss": 3.1007, "step": 16254 }, { "epoch": 0.8, "grad_norm": 0.4918883740901947, "learning_rate": 0.0005015273920160993, "loss": 3.194, "step": 16255 }, { "epoch": 0.8, "grad_norm": 0.5591009855270386, "learning_rate": 0.0005015159867040129, "loss": 3.0611, "step": 16256 }, { "epoch": 0.8, "grad_norm": 0.4997139871120453, "learning_rate": 0.0005015045808611723, "loss": 3.2017, "step": 16257 }, { "epoch": 0.8, "grad_norm": 0.63576740026474, "learning_rate": 0.0005014931744876073, "loss": 3.0073, "step": 16258 }, { "epoch": 0.8, "grad_norm": 0.5021123290061951, "learning_rate": 0.0005014817675833481, "loss": 3.3267, "step": 16259 }, { "epoch": 0.8, "grad_norm": 0.5300601720809937, "learning_rate": 0.0005014703601484248, "loss": 2.9822, "step": 16260 }, { "epoch": 0.8, "grad_norm": 0.5239089131355286, "learning_rate": 0.0005014589521828673, "loss": 3.0789, "step": 16261 }, { "epoch": 0.8, "grad_norm": 0.5151042938232422, "learning_rate": 0.0005014475436867056, "loss": 3.1529, "step": 16262 }, { "epoch": 0.8, "grad_norm": 0.5113961696624756, "learning_rate": 0.0005014361346599699, "loss": 3.0487, "step": 16263 }, { "epoch": 0.8, "grad_norm": 0.5840994119644165, "learning_rate": 0.0005014247251026901, "loss": 3.292, "step": 16264 }, { "epoch": 0.8, "grad_norm": 0.52402663230896, "learning_rate": 0.0005014133150148964, "loss": 3.2253, "step": 16265 }, { "epoch": 0.8, "grad_norm": 0.5494585037231445, "learning_rate": 0.0005014019043966189, "loss": 2.9822, "step": 16266 }, { "epoch": 0.8, "grad_norm": 0.5364586114883423, "learning_rate": 0.0005013904932478874, "loss": 3.2012, "step": 16267 }, { "epoch": 0.8, "grad_norm": 0.5051799416542053, "learning_rate": 0.0005013790815687322, "loss": 3.228, "step": 16268 }, { "epoch": 0.8, "grad_norm": 0.5270349979400635, "learning_rate": 0.0005013676693591832, "loss": 3.1309, "step": 16269 }, { "epoch": 0.8, "grad_norm": 0.523430585861206, "learning_rate": 0.0005013562566192704, "loss": 3.2644, "step": 16270 }, { "epoch": 0.8, "grad_norm": 0.4938635528087616, "learning_rate": 0.0005013448433490241, "loss": 3.3449, "step": 16271 }, { "epoch": 0.8, "grad_norm": 0.5350764989852905, "learning_rate": 0.0005013334295484742, "loss": 3.3254, "step": 16272 }, { "epoch": 0.8, "grad_norm": 0.5360183715820312, "learning_rate": 0.0005013220152176509, "loss": 2.9552, "step": 16273 }, { "epoch": 0.8, "grad_norm": 0.5047653913497925, "learning_rate": 0.000501310600356584, "loss": 3.172, "step": 16274 }, { "epoch": 0.8, "grad_norm": 0.5344395041465759, "learning_rate": 0.0005012991849653038, "loss": 3.152, "step": 16275 }, { "epoch": 0.8, "grad_norm": 0.5071520805358887, "learning_rate": 0.0005012877690438403, "loss": 3.2807, "step": 16276 }, { "epoch": 0.8, "grad_norm": 0.501033365726471, "learning_rate": 0.0005012763525922236, "loss": 3.0155, "step": 16277 }, { "epoch": 0.8, "grad_norm": 0.5294574499130249, "learning_rate": 0.0005012649356104837, "loss": 3.2542, "step": 16278 }, { "epoch": 0.8, "grad_norm": 0.565493106842041, "learning_rate": 0.0005012535180986507, "loss": 2.9964, "step": 16279 }, { "epoch": 0.8, "grad_norm": 0.5068030953407288, "learning_rate": 0.0005012421000567545, "loss": 3.2337, "step": 16280 }, { "epoch": 0.8, "grad_norm": 0.5078257918357849, "learning_rate": 0.0005012306814848255, "loss": 3.2369, "step": 16281 }, { "epoch": 0.8, "grad_norm": 0.5458744168281555, "learning_rate": 0.0005012192623828935, "loss": 3.4057, "step": 16282 }, { "epoch": 0.8, "grad_norm": 0.5356884002685547, "learning_rate": 0.0005012078427509888, "loss": 3.2353, "step": 16283 }, { "epoch": 0.8, "grad_norm": 0.5274999141693115, "learning_rate": 0.0005011964225891414, "loss": 3.2908, "step": 16284 }, { "epoch": 0.8, "grad_norm": 0.5029778480529785, "learning_rate": 0.0005011850018973813, "loss": 3.164, "step": 16285 }, { "epoch": 0.8, "grad_norm": 0.555999219417572, "learning_rate": 0.0005011735806757384, "loss": 3.0256, "step": 16286 }, { "epoch": 0.8, "grad_norm": 0.48176196217536926, "learning_rate": 0.0005011621589242433, "loss": 3.2014, "step": 16287 }, { "epoch": 0.8, "grad_norm": 0.49973785877227783, "learning_rate": 0.0005011507366429257, "loss": 3.2146, "step": 16288 }, { "epoch": 0.8, "grad_norm": 0.537611722946167, "learning_rate": 0.0005011393138318157, "loss": 2.9839, "step": 16289 }, { "epoch": 0.8, "grad_norm": 0.52414470911026, "learning_rate": 0.0005011278904909437, "loss": 3.2931, "step": 16290 }, { "epoch": 0.8, "grad_norm": 0.5327220559120178, "learning_rate": 0.0005011164666203392, "loss": 3.276, "step": 16291 }, { "epoch": 0.8, "grad_norm": 0.5477252006530762, "learning_rate": 0.0005011050422200328, "loss": 3.1397, "step": 16292 }, { "epoch": 0.8, "grad_norm": 0.5243980884552002, "learning_rate": 0.0005010936172900544, "loss": 3.3518, "step": 16293 }, { "epoch": 0.8, "grad_norm": 0.5618069171905518, "learning_rate": 0.0005010821918304342, "loss": 2.9984, "step": 16294 }, { "epoch": 0.8, "grad_norm": 0.515941321849823, "learning_rate": 0.0005010707658412021, "loss": 3.1225, "step": 16295 }, { "epoch": 0.8, "grad_norm": 0.5532481670379639, "learning_rate": 0.0005010593393223883, "loss": 3.0783, "step": 16296 }, { "epoch": 0.8, "grad_norm": 0.4828968644142151, "learning_rate": 0.0005010479122740229, "loss": 3.0916, "step": 16297 }, { "epoch": 0.8, "grad_norm": 0.5126880407333374, "learning_rate": 0.000501036484696136, "loss": 3.2333, "step": 16298 }, { "epoch": 0.8, "grad_norm": 0.5703883171081543, "learning_rate": 0.0005010250565887578, "loss": 3.1723, "step": 16299 }, { "epoch": 0.8, "grad_norm": 0.5786604881286621, "learning_rate": 0.0005010136279519182, "loss": 3.0571, "step": 16300 }, { "epoch": 0.8, "grad_norm": 0.5473567843437195, "learning_rate": 0.0005010021987856474, "loss": 3.1133, "step": 16301 }, { "epoch": 0.8, "grad_norm": 0.5086806416511536, "learning_rate": 0.0005009907690899754, "loss": 3.2605, "step": 16302 }, { "epoch": 0.8, "grad_norm": 0.5256600975990295, "learning_rate": 0.0005009793388649326, "loss": 3.1623, "step": 16303 }, { "epoch": 0.8, "grad_norm": 0.5034403800964355, "learning_rate": 0.0005009679081105487, "loss": 3.3153, "step": 16304 }, { "epoch": 0.8, "grad_norm": 0.5190585851669312, "learning_rate": 0.000500956476826854, "loss": 3.1154, "step": 16305 }, { "epoch": 0.8, "grad_norm": 0.501880407333374, "learning_rate": 0.0005009450450138787, "loss": 3.4226, "step": 16306 }, { "epoch": 0.8, "grad_norm": 0.5461406707763672, "learning_rate": 0.000500933612671653, "loss": 3.1763, "step": 16307 }, { "epoch": 0.8, "grad_norm": 0.5586167573928833, "learning_rate": 0.0005009221798002065, "loss": 3.1583, "step": 16308 }, { "epoch": 0.8, "grad_norm": 0.5345634818077087, "learning_rate": 0.0005009107463995699, "loss": 3.35, "step": 16309 }, { "epoch": 0.8, "grad_norm": 0.49737313389778137, "learning_rate": 0.0005008993124697729, "loss": 3.158, "step": 16310 }, { "epoch": 0.8, "grad_norm": 0.5575215816497803, "learning_rate": 0.0005008878780108459, "loss": 3.1295, "step": 16311 }, { "epoch": 0.8, "grad_norm": 0.5068386197090149, "learning_rate": 0.0005008764430228188, "loss": 3.2086, "step": 16312 }, { "epoch": 0.8, "grad_norm": 0.5135799050331116, "learning_rate": 0.0005008650075057218, "loss": 3.4186, "step": 16313 }, { "epoch": 0.8, "grad_norm": 0.50897216796875, "learning_rate": 0.000500853571459585, "loss": 3.1799, "step": 16314 }, { "epoch": 0.8, "grad_norm": 0.4997579753398895, "learning_rate": 0.0005008421348844387, "loss": 3.0874, "step": 16315 }, { "epoch": 0.8, "grad_norm": 0.5073021054267883, "learning_rate": 0.0005008306977803128, "loss": 3.1663, "step": 16316 }, { "epoch": 0.8, "grad_norm": 0.502220630645752, "learning_rate": 0.0005008192601472374, "loss": 3.2532, "step": 16317 }, { "epoch": 0.8, "grad_norm": 0.5047315359115601, "learning_rate": 0.0005008078219852429, "loss": 3.3159, "step": 16318 }, { "epoch": 0.8, "grad_norm": 0.5216161012649536, "learning_rate": 0.0005007963832943591, "loss": 3.3052, "step": 16319 }, { "epoch": 0.8, "grad_norm": 0.5240123867988586, "learning_rate": 0.0005007849440746163, "loss": 3.3936, "step": 16320 }, { "epoch": 0.8, "grad_norm": 0.4939900040626526, "learning_rate": 0.0005007735043260446, "loss": 3.2465, "step": 16321 }, { "epoch": 0.8, "grad_norm": 0.5109789967536926, "learning_rate": 0.0005007620640486741, "loss": 3.2318, "step": 16322 }, { "epoch": 0.8, "grad_norm": 0.5235530138015747, "learning_rate": 0.0005007506232425351, "loss": 3.0924, "step": 16323 }, { "epoch": 0.8, "grad_norm": 0.5005676746368408, "learning_rate": 0.0005007391819076574, "loss": 3.3931, "step": 16324 }, { "epoch": 0.8, "grad_norm": 0.4928778409957886, "learning_rate": 0.0005007277400440715, "loss": 3.2683, "step": 16325 }, { "epoch": 0.8, "grad_norm": 0.5503429770469666, "learning_rate": 0.0005007162976518073, "loss": 3.2935, "step": 16326 }, { "epoch": 0.8, "grad_norm": 0.526890754699707, "learning_rate": 0.0005007048547308948, "loss": 3.0424, "step": 16327 }, { "epoch": 0.8, "grad_norm": 0.5440188050270081, "learning_rate": 0.0005006934112813646, "loss": 3.2249, "step": 16328 }, { "epoch": 0.8, "grad_norm": 0.48897939920425415, "learning_rate": 0.0005006819673032465, "loss": 3.0303, "step": 16329 }, { "epoch": 0.8, "grad_norm": 0.4976714849472046, "learning_rate": 0.0005006705227965707, "loss": 3.2086, "step": 16330 }, { "epoch": 0.8, "grad_norm": 0.519629716873169, "learning_rate": 0.0005006590777613674, "loss": 3.0235, "step": 16331 }, { "epoch": 0.8, "grad_norm": 0.5517348647117615, "learning_rate": 0.0005006476321976667, "loss": 3.133, "step": 16332 }, { "epoch": 0.8, "grad_norm": 0.5207356810569763, "learning_rate": 0.0005006361861054988, "loss": 3.2264, "step": 16333 }, { "epoch": 0.8, "grad_norm": 0.521043062210083, "learning_rate": 0.0005006247394848938, "loss": 3.0017, "step": 16334 }, { "epoch": 0.8, "grad_norm": 0.519502580165863, "learning_rate": 0.0005006132923358818, "loss": 3.2157, "step": 16335 }, { "epoch": 0.8, "grad_norm": 0.47964170575141907, "learning_rate": 0.0005006018446584928, "loss": 3.2735, "step": 16336 }, { "epoch": 0.8, "grad_norm": 0.515812337398529, "learning_rate": 0.0005005903964527573, "loss": 3.256, "step": 16337 }, { "epoch": 0.8, "grad_norm": 0.5045713782310486, "learning_rate": 0.0005005789477187055, "loss": 3.1528, "step": 16338 }, { "epoch": 0.8, "grad_norm": 0.5392544865608215, "learning_rate": 0.0005005674984563672, "loss": 3.0644, "step": 16339 }, { "epoch": 0.8, "grad_norm": 0.5179302096366882, "learning_rate": 0.0005005560486657726, "loss": 3.1746, "step": 16340 }, { "epoch": 0.8, "grad_norm": 0.5171295404434204, "learning_rate": 0.0005005445983469522, "loss": 3.0787, "step": 16341 }, { "epoch": 0.8, "grad_norm": 0.5172526240348816, "learning_rate": 0.0005005331474999357, "loss": 3.0268, "step": 16342 }, { "epoch": 0.8, "grad_norm": 0.5140368938446045, "learning_rate": 0.0005005216961247535, "loss": 3.5288, "step": 16343 }, { "epoch": 0.8, "grad_norm": 0.525821328163147, "learning_rate": 0.0005005102442214358, "loss": 3.0389, "step": 16344 }, { "epoch": 0.8, "grad_norm": 0.5332549214363098, "learning_rate": 0.0005004987917900127, "loss": 3.2078, "step": 16345 }, { "epoch": 0.8, "grad_norm": 0.5117021203041077, "learning_rate": 0.0005004873388305145, "loss": 3.1699, "step": 16346 }, { "epoch": 0.8, "grad_norm": 0.526621401309967, "learning_rate": 0.000500475885342971, "loss": 3.0896, "step": 16347 }, { "epoch": 0.8, "grad_norm": 0.4984114468097687, "learning_rate": 0.0005004644313274127, "loss": 3.1271, "step": 16348 }, { "epoch": 0.8, "grad_norm": 0.5194051861763, "learning_rate": 0.0005004529767838697, "loss": 3.4425, "step": 16349 }, { "epoch": 0.8, "grad_norm": 0.530343234539032, "learning_rate": 0.0005004415217123722, "loss": 2.962, "step": 16350 }, { "epoch": 0.8, "grad_norm": 0.5183805227279663, "learning_rate": 0.0005004300661129502, "loss": 3.2518, "step": 16351 }, { "epoch": 0.8, "grad_norm": 0.5521455407142639, "learning_rate": 0.000500418609985634, "loss": 3.1005, "step": 16352 }, { "epoch": 0.8, "grad_norm": 0.5434244871139526, "learning_rate": 0.0005004071533304538, "loss": 3.0559, "step": 16353 }, { "epoch": 0.8, "grad_norm": 0.5996847152709961, "learning_rate": 0.0005003956961474397, "loss": 3.0903, "step": 16354 }, { "epoch": 0.8, "grad_norm": 0.5073933005332947, "learning_rate": 0.0005003842384366219, "loss": 3.0769, "step": 16355 }, { "epoch": 0.8, "grad_norm": 0.5215885043144226, "learning_rate": 0.0005003727801980305, "loss": 3.2447, "step": 16356 }, { "epoch": 0.8, "grad_norm": 0.5436731576919556, "learning_rate": 0.000500361321431696, "loss": 3.1657, "step": 16357 }, { "epoch": 0.8, "grad_norm": 0.5639539361000061, "learning_rate": 0.000500349862137648, "loss": 3.0724, "step": 16358 }, { "epoch": 0.8, "grad_norm": 0.5657851099967957, "learning_rate": 0.0005003384023159173, "loss": 3.2198, "step": 16359 }, { "epoch": 0.8, "grad_norm": 0.5688062906265259, "learning_rate": 0.0005003269419665336, "loss": 3.0992, "step": 16360 }, { "epoch": 0.8, "grad_norm": 0.5081748366355896, "learning_rate": 0.0005003154810895275, "loss": 3.1934, "step": 16361 }, { "epoch": 0.8, "grad_norm": 0.5793516039848328, "learning_rate": 0.0005003040196849288, "loss": 3.1196, "step": 16362 }, { "epoch": 0.8, "grad_norm": 0.5692729353904724, "learning_rate": 0.000500292557752768, "loss": 3.0594, "step": 16363 }, { "epoch": 0.8, "grad_norm": 0.5625926852226257, "learning_rate": 0.0005002810952930751, "loss": 3.3912, "step": 16364 }, { "epoch": 0.8, "grad_norm": 0.5317186117172241, "learning_rate": 0.0005002696323058803, "loss": 3.2598, "step": 16365 }, { "epoch": 0.8, "grad_norm": 0.5381388068199158, "learning_rate": 0.0005002581687912138, "loss": 3.1774, "step": 16366 }, { "epoch": 0.8, "grad_norm": 0.5291975736618042, "learning_rate": 0.0005002467047491058, "loss": 3.1837, "step": 16367 }, { "epoch": 0.8, "grad_norm": 0.5130248665809631, "learning_rate": 0.0005002352401795866, "loss": 3.1678, "step": 16368 }, { "epoch": 0.8, "grad_norm": 0.4932388365268707, "learning_rate": 0.0005002237750826863, "loss": 3.2429, "step": 16369 }, { "epoch": 0.8, "grad_norm": 0.5155643224716187, "learning_rate": 0.0005002123094584351, "loss": 3.3085, "step": 16370 }, { "epoch": 0.8, "grad_norm": 0.5444445013999939, "learning_rate": 0.0005002008433068633, "loss": 3.2845, "step": 16371 }, { "epoch": 0.8, "grad_norm": 0.5338315367698669, "learning_rate": 0.0005001893766280009, "loss": 3.1683, "step": 16372 }, { "epoch": 0.8, "grad_norm": 0.535651445388794, "learning_rate": 0.0005001779094218782, "loss": 3.0012, "step": 16373 }, { "epoch": 0.8, "grad_norm": 0.5226747989654541, "learning_rate": 0.0005001664416885254, "loss": 2.9169, "step": 16374 }, { "epoch": 0.8, "grad_norm": 0.526677131652832, "learning_rate": 0.0005001549734279729, "loss": 3.1332, "step": 16375 }, { "epoch": 0.8, "grad_norm": 0.540988028049469, "learning_rate": 0.0005001435046402505, "loss": 3.3839, "step": 16376 }, { "epoch": 0.8, "grad_norm": 0.5214574337005615, "learning_rate": 0.0005001320353253887, "loss": 3.2017, "step": 16377 }, { "epoch": 0.8, "grad_norm": 0.49261197447776794, "learning_rate": 0.0005001205654834176, "loss": 3.2731, "step": 16378 }, { "epoch": 0.8, "grad_norm": 0.507203996181488, "learning_rate": 0.0005001090951143675, "loss": 3.0643, "step": 16379 }, { "epoch": 0.8, "grad_norm": 0.5074104070663452, "learning_rate": 0.0005000976242182687, "loss": 3.1289, "step": 16380 }, { "epoch": 0.8, "grad_norm": 0.533085823059082, "learning_rate": 0.000500086152795151, "loss": 3.1851, "step": 16381 }, { "epoch": 0.8, "grad_norm": 0.5225232243537903, "learning_rate": 0.000500074680845045, "loss": 3.1941, "step": 16382 }, { "epoch": 0.8, "grad_norm": 0.5232262015342712, "learning_rate": 0.0005000632083679808, "loss": 3.1863, "step": 16383 }, { "epoch": 0.8, "grad_norm": 0.499962717294693, "learning_rate": 0.0005000517353639886, "loss": 3.2258, "step": 16384 }, { "epoch": 0.8, "grad_norm": 0.4960383176803589, "learning_rate": 0.0005000402618330986, "loss": 3.4133, "step": 16385 }, { "epoch": 0.8, "grad_norm": 0.5370568633079529, "learning_rate": 0.0005000287877753411, "loss": 3.0247, "step": 16386 }, { "epoch": 0.8, "grad_norm": 0.5205273032188416, "learning_rate": 0.0005000173131907463, "loss": 3.028, "step": 16387 }, { "epoch": 0.8, "grad_norm": 0.5405469536781311, "learning_rate": 0.0005000058380793443, "loss": 3.2769, "step": 16388 }, { "epoch": 0.8, "grad_norm": 0.5633376836776733, "learning_rate": 0.0004999943624411655, "loss": 3.2323, "step": 16389 }, { "epoch": 0.8, "grad_norm": 0.5104237198829651, "learning_rate": 0.00049998288627624, "loss": 3.2026, "step": 16390 }, { "epoch": 0.8, "grad_norm": 0.5359808206558228, "learning_rate": 0.000499971409584598, "loss": 2.988, "step": 16391 }, { "epoch": 0.8, "grad_norm": 0.5244869589805603, "learning_rate": 0.0004999599323662699, "loss": 3.309, "step": 16392 }, { "epoch": 0.8, "grad_norm": 0.5385964512825012, "learning_rate": 0.0004999484546212858, "loss": 3.0099, "step": 16393 }, { "epoch": 0.8, "grad_norm": 0.5380749702453613, "learning_rate": 0.0004999369763496759, "loss": 3.094, "step": 16394 }, { "epoch": 0.8, "grad_norm": 0.518419086933136, "learning_rate": 0.0004999254975514705, "loss": 3.0281, "step": 16395 }, { "epoch": 0.8, "grad_norm": 0.5032293200492859, "learning_rate": 0.0004999140182266997, "loss": 3.1709, "step": 16396 }, { "epoch": 0.8, "grad_norm": 0.5485716462135315, "learning_rate": 0.0004999025383753941, "loss": 3.0291, "step": 16397 }, { "epoch": 0.8, "grad_norm": 0.5190295577049255, "learning_rate": 0.0004998910579975835, "loss": 3.1124, "step": 16398 }, { "epoch": 0.8, "grad_norm": 0.5125435590744019, "learning_rate": 0.0004998795770932984, "loss": 3.0552, "step": 16399 }, { "epoch": 0.8, "grad_norm": 0.5453558564186096, "learning_rate": 0.000499868095662569, "loss": 3.0534, "step": 16400 }, { "epoch": 0.8, "grad_norm": 0.49550220370292664, "learning_rate": 0.0004998566137054255, "loss": 3.3442, "step": 16401 }, { "epoch": 0.8, "grad_norm": 0.4840456247329712, "learning_rate": 0.000499845131221898, "loss": 3.1657, "step": 16402 }, { "epoch": 0.8, "grad_norm": 0.5119274258613586, "learning_rate": 0.000499833648212017, "loss": 3.4076, "step": 16403 }, { "epoch": 0.8, "grad_norm": 0.48557212948799133, "learning_rate": 0.0004998221646758125, "loss": 3.3897, "step": 16404 }, { "epoch": 0.8, "grad_norm": 0.516492486000061, "learning_rate": 0.000499810680613315, "loss": 3.04, "step": 16405 }, { "epoch": 0.8, "grad_norm": 0.5236262083053589, "learning_rate": 0.0004997991960245547, "loss": 3.4405, "step": 16406 }, { "epoch": 0.8, "grad_norm": 0.5504629015922546, "learning_rate": 0.0004997877109095616, "loss": 3.0354, "step": 16407 }, { "epoch": 0.8, "grad_norm": 0.5525025725364685, "learning_rate": 0.0004997762252683663, "loss": 3.1878, "step": 16408 }, { "epoch": 0.8, "grad_norm": 0.5103166699409485, "learning_rate": 0.0004997647391009986, "loss": 3.0423, "step": 16409 }, { "epoch": 0.8, "grad_norm": 0.5244056582450867, "learning_rate": 0.0004997532524074892, "loss": 3.3401, "step": 16410 }, { "epoch": 0.8, "grad_norm": 0.5187060236930847, "learning_rate": 0.0004997417651878682, "loss": 3.2776, "step": 16411 }, { "epoch": 0.8, "grad_norm": 0.5291786789894104, "learning_rate": 0.0004997302774421658, "loss": 3.0764, "step": 16412 }, { "epoch": 0.8, "grad_norm": 0.509762704372406, "learning_rate": 0.0004997187891704123, "loss": 3.2449, "step": 16413 }, { "epoch": 0.8, "grad_norm": 0.5269352793693542, "learning_rate": 0.0004997073003726378, "loss": 3.169, "step": 16414 }, { "epoch": 0.8, "grad_norm": 0.526393473148346, "learning_rate": 0.0004996958110488729, "loss": 3.1764, "step": 16415 }, { "epoch": 0.8, "grad_norm": 0.5143744945526123, "learning_rate": 0.0004996843211991477, "loss": 3.1803, "step": 16416 }, { "epoch": 0.8, "grad_norm": 0.526218056678772, "learning_rate": 0.0004996728308234923, "loss": 3.1629, "step": 16417 }, { "epoch": 0.8, "grad_norm": 0.46396979689598083, "learning_rate": 0.000499661339921937, "loss": 3.2116, "step": 16418 }, { "epoch": 0.8, "grad_norm": 0.49755245447158813, "learning_rate": 0.0004996498484945123, "loss": 3.2634, "step": 16419 }, { "epoch": 0.8, "grad_norm": 0.5120879411697388, "learning_rate": 0.0004996383565412483, "loss": 3.1099, "step": 16420 }, { "epoch": 0.8, "grad_norm": 0.5071130394935608, "learning_rate": 0.0004996268640621753, "loss": 3.1576, "step": 16421 }, { "epoch": 0.8, "grad_norm": 0.5468621850013733, "learning_rate": 0.0004996153710573236, "loss": 3.1405, "step": 16422 }, { "epoch": 0.8, "grad_norm": 0.5439679622650146, "learning_rate": 0.0004996038775267233, "loss": 3.0414, "step": 16423 }, { "epoch": 0.8, "grad_norm": 0.5470306277275085, "learning_rate": 0.0004995923834704049, "loss": 3.2407, "step": 16424 }, { "epoch": 0.8, "grad_norm": 0.4964117109775543, "learning_rate": 0.0004995808888883986, "loss": 3.0354, "step": 16425 }, { "epoch": 0.8, "grad_norm": 0.5191156268119812, "learning_rate": 0.0004995693937807345, "loss": 3.2839, "step": 16426 }, { "epoch": 0.81, "grad_norm": 0.5142076015472412, "learning_rate": 0.0004995578981474431, "loss": 3.212, "step": 16427 }, { "epoch": 0.81, "grad_norm": 0.4969421923160553, "learning_rate": 0.0004995464019885548, "loss": 2.959, "step": 16428 }, { "epoch": 0.81, "grad_norm": 0.5990926623344421, "learning_rate": 0.0004995349053040993, "loss": 3.2161, "step": 16429 }, { "epoch": 0.81, "grad_norm": 0.5438251495361328, "learning_rate": 0.0004995234080941075, "loss": 3.2263, "step": 16430 }, { "epoch": 0.81, "grad_norm": 0.5286610722541809, "learning_rate": 0.0004995119103586095, "loss": 3.1011, "step": 16431 }, { "epoch": 0.81, "grad_norm": 0.5550453662872314, "learning_rate": 0.0004995004120976354, "loss": 3.2373, "step": 16432 }, { "epoch": 0.81, "grad_norm": 0.5107359290122986, "learning_rate": 0.0004994889133112156, "loss": 3.2337, "step": 16433 }, { "epoch": 0.81, "grad_norm": 0.5204411149024963, "learning_rate": 0.0004994774139993805, "loss": 3.3439, "step": 16434 }, { "epoch": 0.81, "grad_norm": 0.5322909355163574, "learning_rate": 0.0004994659141621602, "loss": 3.5071, "step": 16435 }, { "epoch": 0.81, "grad_norm": 0.5268922448158264, "learning_rate": 0.0004994544137995851, "loss": 3.0943, "step": 16436 }, { "epoch": 0.81, "grad_norm": 0.5423510074615479, "learning_rate": 0.0004994429129116854, "loss": 3.2312, "step": 16437 }, { "epoch": 0.81, "grad_norm": 0.5225160121917725, "learning_rate": 0.0004994314114984915, "loss": 3.1335, "step": 16438 }, { "epoch": 0.81, "grad_norm": 0.5021830797195435, "learning_rate": 0.0004994199095600337, "loss": 2.975, "step": 16439 }, { "epoch": 0.81, "grad_norm": 0.5276130437850952, "learning_rate": 0.0004994084070963421, "loss": 3.2795, "step": 16440 }, { "epoch": 0.81, "grad_norm": 0.5111821889877319, "learning_rate": 0.0004993969041074473, "loss": 2.9674, "step": 16441 }, { "epoch": 0.81, "grad_norm": 0.5199219584465027, "learning_rate": 0.0004993854005933793, "loss": 3.1355, "step": 16442 }, { "epoch": 0.81, "grad_norm": 0.5186803340911865, "learning_rate": 0.0004993738965541686, "loss": 3.2157, "step": 16443 }, { "epoch": 0.81, "grad_norm": 0.5261385440826416, "learning_rate": 0.0004993623919898454, "loss": 3.2304, "step": 16444 }, { "epoch": 0.81, "grad_norm": 0.5131444334983826, "learning_rate": 0.00049935088690044, "loss": 3.1116, "step": 16445 }, { "epoch": 0.81, "grad_norm": 0.5108278393745422, "learning_rate": 0.0004993393812859827, "loss": 3.322, "step": 16446 }, { "epoch": 0.81, "grad_norm": 0.5285989046096802, "learning_rate": 0.0004993278751465039, "loss": 3.3048, "step": 16447 }, { "epoch": 0.81, "grad_norm": 0.5042722225189209, "learning_rate": 0.0004993163684820338, "loss": 3.0386, "step": 16448 }, { "epoch": 0.81, "grad_norm": 0.5362065434455872, "learning_rate": 0.0004993048612926028, "loss": 3.0104, "step": 16449 }, { "epoch": 0.81, "grad_norm": 0.49889418482780457, "learning_rate": 0.0004992933535782411, "loss": 3.0916, "step": 16450 }, { "epoch": 0.81, "grad_norm": 0.521359920501709, "learning_rate": 0.0004992818453389791, "loss": 3.1019, "step": 16451 }, { "epoch": 0.81, "grad_norm": 0.5698845386505127, "learning_rate": 0.0004992703365748471, "loss": 3.1863, "step": 16452 }, { "epoch": 0.81, "grad_norm": 0.45893803238868713, "learning_rate": 0.0004992588272858753, "loss": 3.2055, "step": 16453 }, { "epoch": 0.81, "grad_norm": 0.5086631178855896, "learning_rate": 0.0004992473174720942, "loss": 3.2734, "step": 16454 }, { "epoch": 0.81, "grad_norm": 0.507537305355072, "learning_rate": 0.0004992358071335338, "loss": 3.327, "step": 16455 }, { "epoch": 0.81, "grad_norm": 0.5071486830711365, "learning_rate": 0.0004992242962702248, "loss": 3.1988, "step": 16456 }, { "epoch": 0.81, "grad_norm": 0.5107549428939819, "learning_rate": 0.0004992127848821973, "loss": 3.1467, "step": 16457 }, { "epoch": 0.81, "grad_norm": 0.5230646729469299, "learning_rate": 0.0004992012729694817, "loss": 3.1563, "step": 16458 }, { "epoch": 0.81, "grad_norm": 0.5297747850418091, "learning_rate": 0.0004991897605321082, "loss": 3.1184, "step": 16459 }, { "epoch": 0.81, "grad_norm": 0.5269367098808289, "learning_rate": 0.0004991782475701073, "loss": 2.8786, "step": 16460 }, { "epoch": 0.81, "grad_norm": 0.4930148720741272, "learning_rate": 0.0004991667340835093, "loss": 3.0121, "step": 16461 }, { "epoch": 0.81, "grad_norm": 0.5489005446434021, "learning_rate": 0.0004991552200723443, "loss": 3.0634, "step": 16462 }, { "epoch": 0.81, "grad_norm": 0.5134395360946655, "learning_rate": 0.0004991437055366428, "loss": 3.2313, "step": 16463 }, { "epoch": 0.81, "grad_norm": 0.5260294079780579, "learning_rate": 0.0004991321904764352, "loss": 3.0926, "step": 16464 }, { "epoch": 0.81, "grad_norm": 0.5649300217628479, "learning_rate": 0.0004991206748917517, "loss": 3.2247, "step": 16465 }, { "epoch": 0.81, "grad_norm": 0.5012965798377991, "learning_rate": 0.0004991091587826225, "loss": 3.1459, "step": 16466 }, { "epoch": 0.81, "grad_norm": 0.49580734968185425, "learning_rate": 0.0004990976421490783, "loss": 3.1876, "step": 16467 }, { "epoch": 0.81, "grad_norm": 0.54473477602005, "learning_rate": 0.0004990861249911492, "loss": 3.3208, "step": 16468 }, { "epoch": 0.81, "grad_norm": 0.5577808022499084, "learning_rate": 0.0004990746073088655, "loss": 3.0318, "step": 16469 }, { "epoch": 0.81, "grad_norm": 0.5220108032226562, "learning_rate": 0.0004990630891022576, "loss": 3.07, "step": 16470 }, { "epoch": 0.81, "grad_norm": 0.4759686291217804, "learning_rate": 0.0004990515703713559, "loss": 3.1017, "step": 16471 }, { "epoch": 0.81, "grad_norm": 0.5046111345291138, "learning_rate": 0.0004990400511161907, "loss": 3.1224, "step": 16472 }, { "epoch": 0.81, "grad_norm": 0.5120763182640076, "learning_rate": 0.0004990285313367922, "loss": 3.2141, "step": 16473 }, { "epoch": 0.81, "grad_norm": 0.5458747148513794, "learning_rate": 0.0004990170110331908, "loss": 3.126, "step": 16474 }, { "epoch": 0.81, "grad_norm": 0.540588915348053, "learning_rate": 0.000499005490205417, "loss": 3.1728, "step": 16475 }, { "epoch": 0.81, "grad_norm": 0.5352221727371216, "learning_rate": 0.000498993968853501, "loss": 3.1001, "step": 16476 }, { "epoch": 0.81, "grad_norm": 0.5034423470497131, "learning_rate": 0.0004989824469774732, "loss": 3.1839, "step": 16477 }, { "epoch": 0.81, "grad_norm": 0.5104349255561829, "learning_rate": 0.0004989709245773639, "loss": 3.1802, "step": 16478 }, { "epoch": 0.81, "grad_norm": 0.5068408846855164, "learning_rate": 0.0004989594016532036, "loss": 3.0528, "step": 16479 }, { "epoch": 0.81, "grad_norm": 0.5584886074066162, "learning_rate": 0.0004989478782050224, "loss": 3.1633, "step": 16480 }, { "epoch": 0.81, "grad_norm": 0.4965101480484009, "learning_rate": 0.0004989363542328508, "loss": 2.9951, "step": 16481 }, { "epoch": 0.81, "grad_norm": 0.5258487462997437, "learning_rate": 0.0004989248297367191, "loss": 3.2203, "step": 16482 }, { "epoch": 0.81, "grad_norm": 0.49109527468681335, "learning_rate": 0.0004989133047166577, "loss": 3.2713, "step": 16483 }, { "epoch": 0.81, "grad_norm": 0.49905163049697876, "learning_rate": 0.0004989017791726971, "loss": 3.117, "step": 16484 }, { "epoch": 0.81, "grad_norm": 0.5777082443237305, "learning_rate": 0.0004988902531048673, "loss": 3.1115, "step": 16485 }, { "epoch": 0.81, "grad_norm": 0.5189237594604492, "learning_rate": 0.0004988787265131989, "loss": 3.0824, "step": 16486 }, { "epoch": 0.81, "grad_norm": 0.569271981716156, "learning_rate": 0.0004988671993977221, "loss": 2.9983, "step": 16487 }, { "epoch": 0.81, "grad_norm": 0.5757372379302979, "learning_rate": 0.0004988556717584676, "loss": 3.0703, "step": 16488 }, { "epoch": 0.81, "grad_norm": 0.5193915963172913, "learning_rate": 0.0004988441435954654, "loss": 3.1659, "step": 16489 }, { "epoch": 0.81, "grad_norm": 0.5080418586730957, "learning_rate": 0.000498832614908746, "loss": 3.1788, "step": 16490 }, { "epoch": 0.81, "grad_norm": 0.5090472102165222, "learning_rate": 0.0004988210856983398, "loss": 3.1489, "step": 16491 }, { "epoch": 0.81, "grad_norm": 0.466819167137146, "learning_rate": 0.000498809555964277, "loss": 3.1436, "step": 16492 }, { "epoch": 0.81, "grad_norm": 0.5463626980781555, "learning_rate": 0.0004987980257065883, "loss": 3.2058, "step": 16493 }, { "epoch": 0.81, "grad_norm": 0.5318609476089478, "learning_rate": 0.0004987864949253037, "loss": 3.333, "step": 16494 }, { "epoch": 0.81, "grad_norm": 0.5022921562194824, "learning_rate": 0.0004987749636204537, "loss": 3.0602, "step": 16495 }, { "epoch": 0.81, "grad_norm": 0.49121004343032837, "learning_rate": 0.0004987634317920688, "loss": 3.3215, "step": 16496 }, { "epoch": 0.81, "grad_norm": 0.5183935761451721, "learning_rate": 0.0004987518994401792, "loss": 3.2748, "step": 16497 }, { "epoch": 0.81, "grad_norm": 0.4942324757575989, "learning_rate": 0.0004987403665648155, "loss": 3.0571, "step": 16498 }, { "epoch": 0.81, "grad_norm": 0.5025029182434082, "learning_rate": 0.0004987288331660078, "loss": 3.2539, "step": 16499 }, { "epoch": 0.81, "grad_norm": 0.5152884125709534, "learning_rate": 0.0004987172992437866, "loss": 3.2326, "step": 16500 }, { "epoch": 0.81, "grad_norm": 0.5195552706718445, "learning_rate": 0.0004987057647981824, "loss": 3.0311, "step": 16501 }, { "epoch": 0.81, "grad_norm": 0.5349781513214111, "learning_rate": 0.0004986942298292253, "loss": 3.1752, "step": 16502 }, { "epoch": 0.81, "grad_norm": 0.5725705027580261, "learning_rate": 0.0004986826943369459, "loss": 3.2543, "step": 16503 }, { "epoch": 0.81, "grad_norm": 0.6165834069252014, "learning_rate": 0.0004986711583213745, "loss": 3.3868, "step": 16504 }, { "epoch": 0.81, "grad_norm": 0.5173270106315613, "learning_rate": 0.0004986596217825415, "loss": 3.1788, "step": 16505 }, { "epoch": 0.81, "grad_norm": 0.5284629464149475, "learning_rate": 0.0004986480847204772, "loss": 3.2897, "step": 16506 }, { "epoch": 0.81, "grad_norm": 0.5097475647926331, "learning_rate": 0.0004986365471352122, "loss": 2.8815, "step": 16507 }, { "epoch": 0.81, "grad_norm": 0.5338273048400879, "learning_rate": 0.0004986250090267768, "loss": 3.2031, "step": 16508 }, { "epoch": 0.81, "grad_norm": 0.5456317663192749, "learning_rate": 0.0004986134703952013, "loss": 2.9568, "step": 16509 }, { "epoch": 0.81, "grad_norm": 0.55818772315979, "learning_rate": 0.0004986019312405161, "loss": 3.1333, "step": 16510 }, { "epoch": 0.81, "grad_norm": 0.5008224844932556, "learning_rate": 0.0004985903915627517, "loss": 3.2126, "step": 16511 }, { "epoch": 0.81, "grad_norm": 0.4851469099521637, "learning_rate": 0.0004985788513619384, "loss": 3.0776, "step": 16512 }, { "epoch": 0.81, "grad_norm": 0.5189734697341919, "learning_rate": 0.0004985673106381066, "loss": 3.0294, "step": 16513 }, { "epoch": 0.81, "grad_norm": 0.5031270980834961, "learning_rate": 0.0004985557693912867, "loss": 3.4658, "step": 16514 }, { "epoch": 0.81, "grad_norm": 0.5297790169715881, "learning_rate": 0.0004985442276215092, "loss": 3.2422, "step": 16515 }, { "epoch": 0.81, "grad_norm": 0.536777138710022, "learning_rate": 0.0004985326853288044, "loss": 3.1539, "step": 16516 }, { "epoch": 0.81, "grad_norm": 0.5064894556999207, "learning_rate": 0.0004985211425132027, "loss": 3.1133, "step": 16517 }, { "epoch": 0.81, "grad_norm": 0.511317789554596, "learning_rate": 0.0004985095991747345, "loss": 3.1467, "step": 16518 }, { "epoch": 0.81, "grad_norm": 0.49673980474472046, "learning_rate": 0.0004984980553134302, "loss": 3.229, "step": 16519 }, { "epoch": 0.81, "grad_norm": 0.5076729655265808, "learning_rate": 0.0004984865109293203, "loss": 3.3867, "step": 16520 }, { "epoch": 0.81, "grad_norm": 0.5597838163375854, "learning_rate": 0.0004984749660224351, "loss": 3.2826, "step": 16521 }, { "epoch": 0.81, "grad_norm": 0.5371355414390564, "learning_rate": 0.0004984634205928049, "loss": 3.2134, "step": 16522 }, { "epoch": 0.81, "grad_norm": 0.5134449005126953, "learning_rate": 0.0004984518746404604, "loss": 3.2228, "step": 16523 }, { "epoch": 0.81, "grad_norm": 0.514105498790741, "learning_rate": 0.0004984403281654318, "loss": 2.9934, "step": 16524 }, { "epoch": 0.81, "grad_norm": 0.5345868468284607, "learning_rate": 0.0004984287811677496, "loss": 3.3375, "step": 16525 }, { "epoch": 0.81, "grad_norm": 0.5686771273612976, "learning_rate": 0.0004984172336474441, "loss": 3.2227, "step": 16526 }, { "epoch": 0.81, "grad_norm": 0.4849824905395508, "learning_rate": 0.000498405685604546, "loss": 3.2113, "step": 16527 }, { "epoch": 0.81, "grad_norm": 0.5313514471054077, "learning_rate": 0.0004983941370390853, "loss": 3.3788, "step": 16528 }, { "epoch": 0.81, "grad_norm": 0.5120206475257874, "learning_rate": 0.0004983825879510927, "loss": 3.1637, "step": 16529 }, { "epoch": 0.81, "grad_norm": 0.5304714441299438, "learning_rate": 0.0004983710383405984, "loss": 2.988, "step": 16530 }, { "epoch": 0.81, "grad_norm": 0.5290876030921936, "learning_rate": 0.0004983594882076333, "loss": 3.3558, "step": 16531 }, { "epoch": 0.81, "grad_norm": 0.5857232809066772, "learning_rate": 0.0004983479375522272, "loss": 3.102, "step": 16532 }, { "epoch": 0.81, "grad_norm": 0.7322335839271545, "learning_rate": 0.0004983363863744108, "loss": 3.0765, "step": 16533 }, { "epoch": 0.81, "grad_norm": 0.5193672776222229, "learning_rate": 0.0004983248346742147, "loss": 3.0546, "step": 16534 }, { "epoch": 0.81, "grad_norm": 0.5496091246604919, "learning_rate": 0.000498313282451669, "loss": 2.9767, "step": 16535 }, { "epoch": 0.81, "grad_norm": 0.5549640655517578, "learning_rate": 0.0004983017297068044, "loss": 3.0501, "step": 16536 }, { "epoch": 0.81, "grad_norm": 0.49452897906303406, "learning_rate": 0.0004982901764396511, "loss": 3.1101, "step": 16537 }, { "epoch": 0.81, "grad_norm": 0.5188285112380981, "learning_rate": 0.0004982786226502396, "loss": 3.276, "step": 16538 }, { "epoch": 0.81, "grad_norm": 0.5108150243759155, "learning_rate": 0.0004982670683386006, "loss": 3.1788, "step": 16539 }, { "epoch": 0.81, "grad_norm": 0.5602210164070129, "learning_rate": 0.0004982555135047641, "loss": 3.1825, "step": 16540 }, { "epoch": 0.81, "grad_norm": 0.5105909109115601, "learning_rate": 0.0004982439581487607, "loss": 3.3291, "step": 16541 }, { "epoch": 0.81, "grad_norm": 0.5331498384475708, "learning_rate": 0.000498232402270621, "loss": 3.2307, "step": 16542 }, { "epoch": 0.81, "grad_norm": 0.5256636142730713, "learning_rate": 0.0004982208458703752, "loss": 2.8307, "step": 16543 }, { "epoch": 0.81, "grad_norm": 0.5503387451171875, "learning_rate": 0.000498209288948054, "loss": 3.2409, "step": 16544 }, { "epoch": 0.81, "grad_norm": 0.5669764280319214, "learning_rate": 0.0004981977315036876, "loss": 3.1763, "step": 16545 }, { "epoch": 0.81, "grad_norm": 0.5365728139877319, "learning_rate": 0.0004981861735373064, "loss": 3.0852, "step": 16546 }, { "epoch": 0.81, "grad_norm": 0.5224799513816833, "learning_rate": 0.000498174615048941, "loss": 3.1728, "step": 16547 }, { "epoch": 0.81, "grad_norm": 0.5562565922737122, "learning_rate": 0.0004981630560386219, "loss": 3.1805, "step": 16548 }, { "epoch": 0.81, "grad_norm": 0.5232570171356201, "learning_rate": 0.0004981514965063795, "loss": 3.1247, "step": 16549 }, { "epoch": 0.81, "grad_norm": 0.5539358258247375, "learning_rate": 0.0004981399364522441, "loss": 3.2232, "step": 16550 }, { "epoch": 0.81, "grad_norm": 0.5199190378189087, "learning_rate": 0.0004981283758762462, "loss": 3.1485, "step": 16551 }, { "epoch": 0.81, "grad_norm": 0.516223132610321, "learning_rate": 0.0004981168147784163, "loss": 3.1427, "step": 16552 }, { "epoch": 0.81, "grad_norm": 0.504279613494873, "learning_rate": 0.0004981052531587849, "loss": 3.318, "step": 16553 }, { "epoch": 0.81, "grad_norm": 0.5051873326301575, "learning_rate": 0.0004980936910173823, "loss": 3.2255, "step": 16554 }, { "epoch": 0.81, "grad_norm": 0.5611341595649719, "learning_rate": 0.0004980821283542392, "loss": 3.0024, "step": 16555 }, { "epoch": 0.81, "grad_norm": 0.5161449313163757, "learning_rate": 0.0004980705651693858, "loss": 3.0296, "step": 16556 }, { "epoch": 0.81, "grad_norm": 0.5371371507644653, "learning_rate": 0.0004980590014628526, "loss": 3.273, "step": 16557 }, { "epoch": 0.81, "grad_norm": 0.533517599105835, "learning_rate": 0.0004980474372346701, "loss": 3.2346, "step": 16558 }, { "epoch": 0.81, "grad_norm": 0.5515034794807434, "learning_rate": 0.0004980358724848688, "loss": 3.1066, "step": 16559 }, { "epoch": 0.81, "grad_norm": 0.5124566555023193, "learning_rate": 0.0004980243072134792, "loss": 3.2288, "step": 16560 }, { "epoch": 0.81, "grad_norm": 0.4827638864517212, "learning_rate": 0.0004980127414205317, "loss": 3.3534, "step": 16561 }, { "epoch": 0.81, "grad_norm": 0.5892248749732971, "learning_rate": 0.0004980011751060566, "loss": 3.207, "step": 16562 }, { "epoch": 0.81, "grad_norm": 0.5171937942504883, "learning_rate": 0.0004979896082700846, "loss": 3.2058, "step": 16563 }, { "epoch": 0.81, "grad_norm": 0.5337836146354675, "learning_rate": 0.000497978040912646, "loss": 3.3217, "step": 16564 }, { "epoch": 0.81, "grad_norm": 0.5042610764503479, "learning_rate": 0.0004979664730337714, "loss": 3.3613, "step": 16565 }, { "epoch": 0.81, "grad_norm": 0.515651285648346, "learning_rate": 0.0004979549046334913, "loss": 3.0309, "step": 16566 }, { "epoch": 0.81, "grad_norm": 0.5128003358840942, "learning_rate": 0.000497943335711836, "loss": 3.4516, "step": 16567 }, { "epoch": 0.81, "grad_norm": 0.5067887306213379, "learning_rate": 0.0004979317662688359, "loss": 3.1118, "step": 16568 }, { "epoch": 0.81, "grad_norm": 0.4763834476470947, "learning_rate": 0.0004979201963045218, "loss": 3.3369, "step": 16569 }, { "epoch": 0.81, "grad_norm": 0.5007560849189758, "learning_rate": 0.0004979086258189241, "loss": 3.1067, "step": 16570 }, { "epoch": 0.81, "grad_norm": 0.539618968963623, "learning_rate": 0.0004978970548120729, "loss": 3.2345, "step": 16571 }, { "epoch": 0.81, "grad_norm": 0.5771459341049194, "learning_rate": 0.0004978854832839992, "loss": 3.1452, "step": 16572 }, { "epoch": 0.81, "grad_norm": 0.5069820284843445, "learning_rate": 0.000497873911234733, "loss": 3.2058, "step": 16573 }, { "epoch": 0.81, "grad_norm": 0.5238664746284485, "learning_rate": 0.000497862338664305, "loss": 3.0031, "step": 16574 }, { "epoch": 0.81, "grad_norm": 0.5101388692855835, "learning_rate": 0.0004978507655727458, "loss": 3.123, "step": 16575 }, { "epoch": 0.81, "grad_norm": 0.5062222480773926, "learning_rate": 0.0004978391919600857, "loss": 3.1039, "step": 16576 }, { "epoch": 0.81, "grad_norm": 0.571029007434845, "learning_rate": 0.0004978276178263553, "loss": 3.1371, "step": 16577 }, { "epoch": 0.81, "grad_norm": 0.519842803478241, "learning_rate": 0.000497816043171585, "loss": 3.0524, "step": 16578 }, { "epoch": 0.81, "grad_norm": 0.6620784997940063, "learning_rate": 0.0004978044679958053, "loss": 3.2173, "step": 16579 }, { "epoch": 0.81, "grad_norm": 0.5876750349998474, "learning_rate": 0.0004977928922990467, "loss": 3.1815, "step": 16580 }, { "epoch": 0.81, "grad_norm": 0.5399341583251953, "learning_rate": 0.0004977813160813397, "loss": 3.3868, "step": 16581 }, { "epoch": 0.81, "grad_norm": 0.5021479725837708, "learning_rate": 0.0004977697393427148, "loss": 3.2335, "step": 16582 }, { "epoch": 0.81, "grad_norm": 0.5305050015449524, "learning_rate": 0.0004977581620832024, "loss": 3.2448, "step": 16583 }, { "epoch": 0.81, "grad_norm": 0.4928508698940277, "learning_rate": 0.0004977465843028331, "loss": 3.2395, "step": 16584 }, { "epoch": 0.81, "grad_norm": 0.49715203046798706, "learning_rate": 0.0004977350060016374, "loss": 3.1318, "step": 16585 }, { "epoch": 0.81, "grad_norm": 0.49613332748413086, "learning_rate": 0.0004977234271796458, "loss": 3.2116, "step": 16586 }, { "epoch": 0.81, "grad_norm": 0.5376517176628113, "learning_rate": 0.0004977118478368886, "loss": 3.1922, "step": 16587 }, { "epoch": 0.81, "grad_norm": 0.5719401836395264, "learning_rate": 0.0004977002679733964, "loss": 3.2298, "step": 16588 }, { "epoch": 0.81, "grad_norm": 0.5064448118209839, "learning_rate": 0.0004976886875891998, "loss": 3.2184, "step": 16589 }, { "epoch": 0.81, "grad_norm": 0.5336682200431824, "learning_rate": 0.0004976771066843293, "loss": 3.1029, "step": 16590 }, { "epoch": 0.81, "grad_norm": 0.5417354106903076, "learning_rate": 0.0004976655252588153, "loss": 3.2497, "step": 16591 }, { "epoch": 0.81, "grad_norm": 0.5155006051063538, "learning_rate": 0.0004976539433126884, "loss": 3.2171, "step": 16592 }, { "epoch": 0.81, "grad_norm": 0.5138181447982788, "learning_rate": 0.000497642360845979, "loss": 3.0924, "step": 16593 }, { "epoch": 0.81, "grad_norm": 0.5162732601165771, "learning_rate": 0.0004976307778587176, "loss": 3.218, "step": 16594 }, { "epoch": 0.81, "grad_norm": 0.5005708932876587, "learning_rate": 0.000497619194350935, "loss": 3.3464, "step": 16595 }, { "epoch": 0.81, "grad_norm": 0.5471556186676025, "learning_rate": 0.0004976076103226613, "loss": 3.286, "step": 16596 }, { "epoch": 0.81, "grad_norm": 0.5286422371864319, "learning_rate": 0.0004975960257739272, "loss": 2.9602, "step": 16597 }, { "epoch": 0.81, "grad_norm": 0.5641829967498779, "learning_rate": 0.0004975844407047632, "loss": 3.2977, "step": 16598 }, { "epoch": 0.81, "grad_norm": 0.5358396172523499, "learning_rate": 0.0004975728551151998, "loss": 3.1017, "step": 16599 }, { "epoch": 0.81, "grad_norm": 0.5861735939979553, "learning_rate": 0.0004975612690052675, "loss": 3.1687, "step": 16600 }, { "epoch": 0.81, "grad_norm": 0.5610910654067993, "learning_rate": 0.0004975496823749969, "loss": 3.1995, "step": 16601 }, { "epoch": 0.81, "grad_norm": 0.5274034738540649, "learning_rate": 0.0004975380952244185, "loss": 3.2817, "step": 16602 }, { "epoch": 0.81, "grad_norm": 0.5060899257659912, "learning_rate": 0.0004975265075535626, "loss": 3.2932, "step": 16603 }, { "epoch": 0.81, "grad_norm": 0.5002941489219666, "learning_rate": 0.0004975149193624601, "loss": 3.1504, "step": 16604 }, { "epoch": 0.81, "grad_norm": 0.48612910509109497, "learning_rate": 0.0004975033306511412, "loss": 3.1963, "step": 16605 }, { "epoch": 0.81, "grad_norm": 0.5231236815452576, "learning_rate": 0.0004974917414196365, "loss": 3.1359, "step": 16606 }, { "epoch": 0.81, "grad_norm": 0.5047218203544617, "learning_rate": 0.0004974801516679766, "loss": 3.1402, "step": 16607 }, { "epoch": 0.81, "grad_norm": 0.4852062165737152, "learning_rate": 0.000497468561396192, "loss": 3.2184, "step": 16608 }, { "epoch": 0.81, "grad_norm": 0.5307152271270752, "learning_rate": 0.0004974569706043132, "loss": 3.1907, "step": 16609 }, { "epoch": 0.81, "grad_norm": 0.5003814101219177, "learning_rate": 0.0004974453792923708, "loss": 3.0009, "step": 16610 }, { "epoch": 0.81, "grad_norm": 0.5295878052711487, "learning_rate": 0.0004974337874603952, "loss": 3.1932, "step": 16611 }, { "epoch": 0.81, "grad_norm": 0.5771824717521667, "learning_rate": 0.000497422195108417, "loss": 3.3406, "step": 16612 }, { "epoch": 0.81, "grad_norm": 0.5096525549888611, "learning_rate": 0.0004974106022364669, "loss": 3.0476, "step": 16613 }, { "epoch": 0.81, "grad_norm": 0.5477951765060425, "learning_rate": 0.000497399008844575, "loss": 3.2816, "step": 16614 }, { "epoch": 0.81, "grad_norm": 0.5155587196350098, "learning_rate": 0.0004973874149327722, "loss": 2.9737, "step": 16615 }, { "epoch": 0.81, "grad_norm": 0.5152885317802429, "learning_rate": 0.0004973758205010889, "loss": 3.2941, "step": 16616 }, { "epoch": 0.81, "grad_norm": 0.5501928925514221, "learning_rate": 0.0004973642255495558, "loss": 3.3104, "step": 16617 }, { "epoch": 0.81, "grad_norm": 0.5133585333824158, "learning_rate": 0.0004973526300782032, "loss": 3.1066, "step": 16618 }, { "epoch": 0.81, "grad_norm": 0.5019925832748413, "learning_rate": 0.0004973410340870618, "loss": 3.2807, "step": 16619 }, { "epoch": 0.81, "grad_norm": 0.5474452376365662, "learning_rate": 0.0004973294375761621, "loss": 3.2518, "step": 16620 }, { "epoch": 0.81, "grad_norm": 0.5307877063751221, "learning_rate": 0.0004973178405455347, "loss": 2.98, "step": 16621 }, { "epoch": 0.81, "grad_norm": 0.5195660591125488, "learning_rate": 0.00049730624299521, "loss": 3.2952, "step": 16622 }, { "epoch": 0.81, "grad_norm": 0.5276235938072205, "learning_rate": 0.0004972946449252187, "loss": 3.2442, "step": 16623 }, { "epoch": 0.81, "grad_norm": 0.5224148035049438, "learning_rate": 0.0004972830463355912, "loss": 3.0754, "step": 16624 }, { "epoch": 0.81, "grad_norm": 0.5097349286079407, "learning_rate": 0.0004972714472263581, "loss": 3.3027, "step": 16625 }, { "epoch": 0.81, "grad_norm": 0.511198878288269, "learning_rate": 0.00049725984759755, "loss": 3.229, "step": 16626 }, { "epoch": 0.81, "grad_norm": 0.5765299797058105, "learning_rate": 0.0004972482474491975, "loss": 3.101, "step": 16627 }, { "epoch": 0.81, "grad_norm": 0.5171129703521729, "learning_rate": 0.0004972366467813309, "loss": 3.1579, "step": 16628 }, { "epoch": 0.81, "grad_norm": 0.5174678564071655, "learning_rate": 0.0004972250455939811, "loss": 3.2538, "step": 16629 }, { "epoch": 0.81, "grad_norm": 0.5216899514198303, "learning_rate": 0.0004972134438871786, "loss": 3.0923, "step": 16630 }, { "epoch": 0.82, "grad_norm": 0.5786752104759216, "learning_rate": 0.0004972018416609536, "loss": 3.127, "step": 16631 }, { "epoch": 0.82, "grad_norm": 0.5211129784584045, "learning_rate": 0.000497190238915337, "loss": 3.1011, "step": 16632 }, { "epoch": 0.82, "grad_norm": 0.5141299366950989, "learning_rate": 0.0004971786356503592, "loss": 2.9091, "step": 16633 }, { "epoch": 0.82, "grad_norm": 0.48368707299232483, "learning_rate": 0.0004971670318660509, "loss": 3.324, "step": 16634 }, { "epoch": 0.82, "grad_norm": 0.47780829668045044, "learning_rate": 0.0004971554275624425, "loss": 3.0679, "step": 16635 }, { "epoch": 0.82, "grad_norm": 0.5046222805976868, "learning_rate": 0.0004971438227395648, "loss": 3.2293, "step": 16636 }, { "epoch": 0.82, "grad_norm": 0.5113916993141174, "learning_rate": 0.000497132217397448, "loss": 3.0101, "step": 16637 }, { "epoch": 0.82, "grad_norm": 0.5229575037956238, "learning_rate": 0.000497120611536123, "loss": 3.2457, "step": 16638 }, { "epoch": 0.82, "grad_norm": 0.512291669845581, "learning_rate": 0.0004971090051556202, "loss": 3.259, "step": 16639 }, { "epoch": 0.82, "grad_norm": 0.5554661750793457, "learning_rate": 0.0004970973982559702, "loss": 3.1555, "step": 16640 }, { "epoch": 0.82, "grad_norm": 0.5595203042030334, "learning_rate": 0.0004970857908372037, "loss": 3.1944, "step": 16641 }, { "epoch": 0.82, "grad_norm": 0.5318277478218079, "learning_rate": 0.0004970741828993511, "loss": 2.9727, "step": 16642 }, { "epoch": 0.82, "grad_norm": 0.5298823714256287, "learning_rate": 0.0004970625744424431, "loss": 2.9161, "step": 16643 }, { "epoch": 0.82, "grad_norm": 0.5190830230712891, "learning_rate": 0.0004970509654665101, "loss": 3.2831, "step": 16644 }, { "epoch": 0.82, "grad_norm": 0.5548256039619446, "learning_rate": 0.0004970393559715827, "loss": 3.3253, "step": 16645 }, { "epoch": 0.82, "grad_norm": 0.5534726977348328, "learning_rate": 0.0004970277459576917, "loss": 3.0462, "step": 16646 }, { "epoch": 0.82, "grad_norm": 0.5380357503890991, "learning_rate": 0.0004970161354248675, "loss": 3.2276, "step": 16647 }, { "epoch": 0.82, "grad_norm": 0.5422712564468384, "learning_rate": 0.0004970045243731406, "loss": 3.0344, "step": 16648 }, { "epoch": 0.82, "grad_norm": 0.5209787487983704, "learning_rate": 0.0004969929128025419, "loss": 3.4289, "step": 16649 }, { "epoch": 0.82, "grad_norm": 0.5309789776802063, "learning_rate": 0.0004969813007131016, "loss": 3.2243, "step": 16650 }, { "epoch": 0.82, "grad_norm": 0.5202519297599792, "learning_rate": 0.0004969696881048507, "loss": 3.3096, "step": 16651 }, { "epoch": 0.82, "grad_norm": 0.5129031538963318, "learning_rate": 0.0004969580749778193, "loss": 3.2491, "step": 16652 }, { "epoch": 0.82, "grad_norm": 0.5805734395980835, "learning_rate": 0.0004969464613320384, "loss": 3.1584, "step": 16653 }, { "epoch": 0.82, "grad_norm": 0.5051552057266235, "learning_rate": 0.0004969348471675384, "loss": 3.1706, "step": 16654 }, { "epoch": 0.82, "grad_norm": 0.5367906093597412, "learning_rate": 0.0004969232324843497, "loss": 3.2394, "step": 16655 }, { "epoch": 0.82, "grad_norm": 0.48859554529190063, "learning_rate": 0.0004969116172825034, "loss": 3.1854, "step": 16656 }, { "epoch": 0.82, "grad_norm": 0.5127988457679749, "learning_rate": 0.0004969000015620297, "loss": 3.1622, "step": 16657 }, { "epoch": 0.82, "grad_norm": 0.5549734830856323, "learning_rate": 0.0004968883853229592, "loss": 3.1321, "step": 16658 }, { "epoch": 0.82, "grad_norm": 0.5598774552345276, "learning_rate": 0.0004968767685653226, "loss": 3.4022, "step": 16659 }, { "epoch": 0.82, "grad_norm": 0.5506905913352966, "learning_rate": 0.0004968651512891506, "loss": 3.3344, "step": 16660 }, { "epoch": 0.82, "grad_norm": 0.5095792412757874, "learning_rate": 0.0004968535334944736, "loss": 3.1894, "step": 16661 }, { "epoch": 0.82, "grad_norm": 0.526882529258728, "learning_rate": 0.0004968419151813224, "loss": 3.3501, "step": 16662 }, { "epoch": 0.82, "grad_norm": 0.5462402105331421, "learning_rate": 0.0004968302963497273, "loss": 2.8678, "step": 16663 }, { "epoch": 0.82, "grad_norm": 0.528898298740387, "learning_rate": 0.0004968186769997191, "loss": 3.1944, "step": 16664 }, { "epoch": 0.82, "grad_norm": 0.5134194493293762, "learning_rate": 0.0004968070571313285, "loss": 3.1576, "step": 16665 }, { "epoch": 0.82, "grad_norm": 0.5237089395523071, "learning_rate": 0.0004967954367445859, "loss": 3.267, "step": 16666 }, { "epoch": 0.82, "grad_norm": 0.558527946472168, "learning_rate": 0.0004967838158395219, "loss": 3.1597, "step": 16667 }, { "epoch": 0.82, "grad_norm": 0.5071585178375244, "learning_rate": 0.0004967721944161673, "loss": 3.2747, "step": 16668 }, { "epoch": 0.82, "grad_norm": 0.5414650440216064, "learning_rate": 0.0004967605724745527, "loss": 3.1734, "step": 16669 }, { "epoch": 0.82, "grad_norm": 0.48503783345222473, "learning_rate": 0.0004967489500147086, "loss": 3.1875, "step": 16670 }, { "epoch": 0.82, "grad_norm": 0.5278720855712891, "learning_rate": 0.0004967373270366655, "loss": 3.0154, "step": 16671 }, { "epoch": 0.82, "grad_norm": 0.5245896577835083, "learning_rate": 0.0004967257035404542, "loss": 2.9901, "step": 16672 }, { "epoch": 0.82, "grad_norm": 0.5041775107383728, "learning_rate": 0.0004967140795261053, "loss": 3.1008, "step": 16673 }, { "epoch": 0.82, "grad_norm": 0.5002780556678772, "learning_rate": 0.0004967024549936493, "loss": 3.3013, "step": 16674 }, { "epoch": 0.82, "grad_norm": 0.5385328531265259, "learning_rate": 0.000496690829943117, "loss": 3.3536, "step": 16675 }, { "epoch": 0.82, "grad_norm": 0.47223567962646484, "learning_rate": 0.0004966792043745389, "loss": 3.1565, "step": 16676 }, { "epoch": 0.82, "grad_norm": 0.5765127539634705, "learning_rate": 0.0004966675782879455, "loss": 3.1308, "step": 16677 }, { "epoch": 0.82, "grad_norm": 0.5132501125335693, "learning_rate": 0.0004966559516833677, "loss": 3.2131, "step": 16678 }, { "epoch": 0.82, "grad_norm": 0.5195982456207275, "learning_rate": 0.0004966443245608359, "loss": 3.3829, "step": 16679 }, { "epoch": 0.82, "grad_norm": 0.4910086989402771, "learning_rate": 0.0004966326969203807, "loss": 3.2672, "step": 16680 }, { "epoch": 0.82, "grad_norm": 0.5135030746459961, "learning_rate": 0.0004966210687620329, "loss": 3.3715, "step": 16681 }, { "epoch": 0.82, "grad_norm": 0.5263185501098633, "learning_rate": 0.0004966094400858231, "loss": 3.0218, "step": 16682 }, { "epoch": 0.82, "grad_norm": 0.5208287239074707, "learning_rate": 0.0004965978108917818, "loss": 3.1165, "step": 16683 }, { "epoch": 0.82, "grad_norm": 0.5360139012336731, "learning_rate": 0.0004965861811799397, "loss": 3.0333, "step": 16684 }, { "epoch": 0.82, "grad_norm": 0.47904497385025024, "learning_rate": 0.0004965745509503275, "loss": 3.2553, "step": 16685 }, { "epoch": 0.82, "grad_norm": 0.4816090166568756, "learning_rate": 0.0004965629202029758, "loss": 3.1955, "step": 16686 }, { "epoch": 0.82, "grad_norm": 0.5097158551216125, "learning_rate": 0.0004965512889379151, "loss": 3.2383, "step": 16687 }, { "epoch": 0.82, "grad_norm": 0.509074330329895, "learning_rate": 0.000496539657155176, "loss": 3.2118, "step": 16688 }, { "epoch": 0.82, "grad_norm": 0.5360403060913086, "learning_rate": 0.0004965280248547895, "loss": 2.9973, "step": 16689 }, { "epoch": 0.82, "grad_norm": 0.5154137015342712, "learning_rate": 0.0004965163920367859, "loss": 3.3851, "step": 16690 }, { "epoch": 0.82, "grad_norm": 0.48965588212013245, "learning_rate": 0.0004965047587011959, "loss": 3.2216, "step": 16691 }, { "epoch": 0.82, "grad_norm": 0.4903731048107147, "learning_rate": 0.0004964931248480503, "loss": 3.2034, "step": 16692 }, { "epoch": 0.82, "grad_norm": 0.5228937268257141, "learning_rate": 0.0004964814904773795, "loss": 3.1598, "step": 16693 }, { "epoch": 0.82, "grad_norm": 0.5695022940635681, "learning_rate": 0.0004964698555892144, "loss": 3.1179, "step": 16694 }, { "epoch": 0.82, "grad_norm": 0.5156790018081665, "learning_rate": 0.0004964582201835855, "loss": 3.0333, "step": 16695 }, { "epoch": 0.82, "grad_norm": 0.5029388666152954, "learning_rate": 0.0004964465842605234, "loss": 3.1303, "step": 16696 }, { "epoch": 0.82, "grad_norm": 0.5495452880859375, "learning_rate": 0.0004964349478200588, "loss": 3.178, "step": 16697 }, { "epoch": 0.82, "grad_norm": 0.4993706941604614, "learning_rate": 0.0004964233108622224, "loss": 3.4597, "step": 16698 }, { "epoch": 0.82, "grad_norm": 0.5246551632881165, "learning_rate": 0.0004964116733870448, "loss": 3.1988, "step": 16699 }, { "epoch": 0.82, "grad_norm": 0.5039535164833069, "learning_rate": 0.0004964000353945566, "loss": 3.3829, "step": 16700 }, { "epoch": 0.82, "grad_norm": 0.5181304812431335, "learning_rate": 0.0004963883968847884, "loss": 3.318, "step": 16701 }, { "epoch": 0.82, "grad_norm": 0.5846447944641113, "learning_rate": 0.0004963767578577712, "loss": 3.1945, "step": 16702 }, { "epoch": 0.82, "grad_norm": 0.518142580986023, "learning_rate": 0.0004963651183135353, "loss": 3.2293, "step": 16703 }, { "epoch": 0.82, "grad_norm": 0.5273866057395935, "learning_rate": 0.0004963534782521115, "loss": 3.0665, "step": 16704 }, { "epoch": 0.82, "grad_norm": 0.5090498328208923, "learning_rate": 0.0004963418376735303, "loss": 3.1259, "step": 16705 }, { "epoch": 0.82, "grad_norm": 0.533167839050293, "learning_rate": 0.0004963301965778226, "loss": 3.3109, "step": 16706 }, { "epoch": 0.82, "grad_norm": 0.5366831421852112, "learning_rate": 0.0004963185549650189, "loss": 3.0486, "step": 16707 }, { "epoch": 0.82, "grad_norm": 0.5503694415092468, "learning_rate": 0.00049630691283515, "loss": 3.4465, "step": 16708 }, { "epoch": 0.82, "grad_norm": 0.5301731824874878, "learning_rate": 0.0004962952701882463, "loss": 3.219, "step": 16709 }, { "epoch": 0.82, "grad_norm": 0.4929310977458954, "learning_rate": 0.0004962836270243388, "loss": 3.2798, "step": 16710 }, { "epoch": 0.82, "grad_norm": 0.5285437703132629, "learning_rate": 0.0004962719833434579, "loss": 3.0686, "step": 16711 }, { "epoch": 0.82, "grad_norm": 0.5135146975517273, "learning_rate": 0.0004962603391456345, "loss": 3.1963, "step": 16712 }, { "epoch": 0.82, "grad_norm": 0.5494550466537476, "learning_rate": 0.000496248694430899, "loss": 3.0039, "step": 16713 }, { "epoch": 0.82, "grad_norm": 0.5301375389099121, "learning_rate": 0.0004962370491992823, "loss": 3.179, "step": 16714 }, { "epoch": 0.82, "grad_norm": 0.5424580574035645, "learning_rate": 0.000496225403450815, "loss": 3.2411, "step": 16715 }, { "epoch": 0.82, "grad_norm": 0.5308148264884949, "learning_rate": 0.0004962137571855276, "loss": 3.3935, "step": 16716 }, { "epoch": 0.82, "grad_norm": 0.5148587226867676, "learning_rate": 0.0004962021104034511, "loss": 3.0353, "step": 16717 }, { "epoch": 0.82, "grad_norm": 0.5002628564834595, "learning_rate": 0.0004961904631046158, "loss": 3.2585, "step": 16718 }, { "epoch": 0.82, "grad_norm": 0.5126210451126099, "learning_rate": 0.0004961788152890527, "loss": 3.2791, "step": 16719 }, { "epoch": 0.82, "grad_norm": 0.5162221789360046, "learning_rate": 0.0004961671669567924, "loss": 3.1034, "step": 16720 }, { "epoch": 0.82, "grad_norm": 0.4941016137599945, "learning_rate": 0.0004961555181078655, "loss": 3.2295, "step": 16721 }, { "epoch": 0.82, "grad_norm": 0.5905763506889343, "learning_rate": 0.0004961438687423027, "loss": 3.0365, "step": 16722 }, { "epoch": 0.82, "grad_norm": 0.49847185611724854, "learning_rate": 0.0004961322188601347, "loss": 3.0049, "step": 16723 }, { "epoch": 0.82, "grad_norm": 0.49443936347961426, "learning_rate": 0.0004961205684613922, "loss": 3.2913, "step": 16724 }, { "epoch": 0.82, "grad_norm": 0.5241524577140808, "learning_rate": 0.0004961089175461059, "loss": 2.907, "step": 16725 }, { "epoch": 0.82, "grad_norm": 0.5237706303596497, "learning_rate": 0.0004960972661143064, "loss": 3.1307, "step": 16726 }, { "epoch": 0.82, "grad_norm": 0.5108638405799866, "learning_rate": 0.0004960856141660244, "loss": 3.271, "step": 16727 }, { "epoch": 0.82, "grad_norm": 0.591118335723877, "learning_rate": 0.0004960739617012906, "loss": 3.2943, "step": 16728 }, { "epoch": 0.82, "grad_norm": 0.5045537352561951, "learning_rate": 0.0004960623087201358, "loss": 3.2844, "step": 16729 }, { "epoch": 0.82, "grad_norm": 0.569861650466919, "learning_rate": 0.0004960506552225906, "loss": 3.243, "step": 16730 }, { "epoch": 0.82, "grad_norm": 0.5104923844337463, "learning_rate": 0.0004960390012086858, "loss": 3.2714, "step": 16731 }, { "epoch": 0.82, "grad_norm": 0.5149794220924377, "learning_rate": 0.0004960273466784519, "loss": 2.9799, "step": 16732 }, { "epoch": 0.82, "grad_norm": 0.5185301303863525, "learning_rate": 0.0004960156916319196, "loss": 3.1996, "step": 16733 }, { "epoch": 0.82, "grad_norm": 0.5345672369003296, "learning_rate": 0.0004960040360691199, "loss": 3.1292, "step": 16734 }, { "epoch": 0.82, "grad_norm": 0.5602273941040039, "learning_rate": 0.0004959923799900831, "loss": 3.1398, "step": 16735 }, { "epoch": 0.82, "grad_norm": 0.5504186153411865, "learning_rate": 0.0004959807233948403, "loss": 3.2962, "step": 16736 }, { "epoch": 0.82, "grad_norm": 0.5613498687744141, "learning_rate": 0.0004959690662834219, "loss": 3.1052, "step": 16737 }, { "epoch": 0.82, "grad_norm": 0.5954709053039551, "learning_rate": 0.0004959574086558586, "loss": 3.1258, "step": 16738 }, { "epoch": 0.82, "grad_norm": 0.5291658639907837, "learning_rate": 0.0004959457505121813, "loss": 3.2572, "step": 16739 }, { "epoch": 0.82, "grad_norm": 0.5105869174003601, "learning_rate": 0.0004959340918524205, "loss": 3.157, "step": 16740 }, { "epoch": 0.82, "grad_norm": 0.5140098333358765, "learning_rate": 0.0004959224326766071, "loss": 3.2509, "step": 16741 }, { "epoch": 0.82, "grad_norm": 0.5428385734558105, "learning_rate": 0.0004959107729847717, "loss": 2.8749, "step": 16742 }, { "epoch": 0.82, "grad_norm": 0.5247282385826111, "learning_rate": 0.000495899112776945, "loss": 2.9758, "step": 16743 }, { "epoch": 0.82, "grad_norm": 0.5175803303718567, "learning_rate": 0.0004958874520531578, "loss": 3.2636, "step": 16744 }, { "epoch": 0.82, "grad_norm": 0.5175894498825073, "learning_rate": 0.0004958757908134407, "loss": 3.1949, "step": 16745 }, { "epoch": 0.82, "grad_norm": 0.49212566018104553, "learning_rate": 0.0004958641290578245, "loss": 3.232, "step": 16746 }, { "epoch": 0.82, "grad_norm": 0.5174520015716553, "learning_rate": 0.0004958524667863399, "loss": 3.2229, "step": 16747 }, { "epoch": 0.82, "grad_norm": 0.5180816054344177, "learning_rate": 0.0004958408039990174, "loss": 3.1441, "step": 16748 }, { "epoch": 0.82, "grad_norm": 0.5078367590904236, "learning_rate": 0.0004958291406958881, "loss": 3.1435, "step": 16749 }, { "epoch": 0.82, "grad_norm": 0.5164198875427246, "learning_rate": 0.0004958174768769825, "loss": 3.0394, "step": 16750 }, { "epoch": 0.82, "grad_norm": 0.5266910791397095, "learning_rate": 0.0004958058125423315, "loss": 3.048, "step": 16751 }, { "epoch": 0.82, "grad_norm": 0.523522675037384, "learning_rate": 0.0004957941476919654, "loss": 3.1053, "step": 16752 }, { "epoch": 0.82, "grad_norm": 0.5101291537284851, "learning_rate": 0.0004957824823259154, "loss": 3.1284, "step": 16753 }, { "epoch": 0.82, "grad_norm": 0.5190765261650085, "learning_rate": 0.0004957708164442119, "loss": 3.1728, "step": 16754 }, { "epoch": 0.82, "grad_norm": 0.5073868632316589, "learning_rate": 0.0004957591500468856, "loss": 3.3285, "step": 16755 }, { "epoch": 0.82, "grad_norm": 0.5432148575782776, "learning_rate": 0.0004957474831339678, "loss": 2.9638, "step": 16756 }, { "epoch": 0.82, "grad_norm": 0.5055432319641113, "learning_rate": 0.0004957358157054885, "loss": 3.1185, "step": 16757 }, { "epoch": 0.82, "grad_norm": 0.5079781413078308, "learning_rate": 0.0004957241477614787, "loss": 3.0659, "step": 16758 }, { "epoch": 0.82, "grad_norm": 0.5281221866607666, "learning_rate": 0.0004957124793019694, "loss": 2.9227, "step": 16759 }, { "epoch": 0.82, "grad_norm": 0.5235075950622559, "learning_rate": 0.0004957008103269908, "loss": 3.1578, "step": 16760 }, { "epoch": 0.82, "grad_norm": 0.49377989768981934, "learning_rate": 0.0004956891408365741, "loss": 3.3034, "step": 16761 }, { "epoch": 0.82, "grad_norm": 0.5380136370658875, "learning_rate": 0.0004956774708307499, "loss": 3.1188, "step": 16762 }, { "epoch": 0.82, "grad_norm": 0.5021156072616577, "learning_rate": 0.0004956658003095488, "loss": 3.0115, "step": 16763 }, { "epoch": 0.82, "grad_norm": 0.5770875811576843, "learning_rate": 0.0004956541292730017, "loss": 3.1021, "step": 16764 }, { "epoch": 0.82, "grad_norm": 0.5153395533561707, "learning_rate": 0.0004956424577211392, "loss": 3.0592, "step": 16765 }, { "epoch": 0.82, "grad_norm": 0.5426536798477173, "learning_rate": 0.0004956307856539922, "loss": 3.2771, "step": 16766 }, { "epoch": 0.82, "grad_norm": 0.5408112406730652, "learning_rate": 0.0004956191130715915, "loss": 3.2663, "step": 16767 }, { "epoch": 0.82, "grad_norm": 0.5236942768096924, "learning_rate": 0.0004956074399739674, "loss": 3.2467, "step": 16768 }, { "epoch": 0.82, "grad_norm": 0.523478627204895, "learning_rate": 0.000495595766361151, "loss": 3.16, "step": 16769 }, { "epoch": 0.82, "grad_norm": 0.5341914296150208, "learning_rate": 0.0004955840922331732, "loss": 3.1799, "step": 16770 }, { "epoch": 0.82, "grad_norm": 0.5106289982795715, "learning_rate": 0.0004955724175900644, "loss": 3.0653, "step": 16771 }, { "epoch": 0.82, "grad_norm": 0.5742872357368469, "learning_rate": 0.0004955607424318555, "loss": 3.1742, "step": 16772 }, { "epoch": 0.82, "grad_norm": 0.5103678107261658, "learning_rate": 0.0004955490667585773, "loss": 3.3389, "step": 16773 }, { "epoch": 0.82, "grad_norm": 0.5044515132904053, "learning_rate": 0.0004955373905702604, "loss": 2.9904, "step": 16774 }, { "epoch": 0.82, "grad_norm": 0.5162826776504517, "learning_rate": 0.0004955257138669357, "loss": 3.3376, "step": 16775 }, { "epoch": 0.82, "grad_norm": 0.545150101184845, "learning_rate": 0.000495514036648634, "loss": 3.0011, "step": 16776 }, { "epoch": 0.82, "grad_norm": 0.5908412337303162, "learning_rate": 0.0004955023589153858, "loss": 3.2328, "step": 16777 }, { "epoch": 0.82, "grad_norm": 0.5039973258972168, "learning_rate": 0.000495490680667222, "loss": 3.0851, "step": 16778 }, { "epoch": 0.82, "grad_norm": 0.5710932016372681, "learning_rate": 0.0004954790019041735, "loss": 3.0769, "step": 16779 }, { "epoch": 0.82, "grad_norm": 0.48418980836868286, "learning_rate": 0.0004954673226262708, "loss": 3.1852, "step": 16780 }, { "epoch": 0.82, "grad_norm": 0.540973424911499, "learning_rate": 0.0004954556428335449, "loss": 3.1554, "step": 16781 }, { "epoch": 0.82, "grad_norm": 0.5249459147453308, "learning_rate": 0.0004954439625260264, "loss": 3.1507, "step": 16782 }, { "epoch": 0.82, "grad_norm": 0.562673807144165, "learning_rate": 0.000495432281703746, "loss": 3.2198, "step": 16783 }, { "epoch": 0.82, "grad_norm": 0.49689170718193054, "learning_rate": 0.0004954206003667347, "loss": 3.1578, "step": 16784 }, { "epoch": 0.82, "grad_norm": 0.5022982954978943, "learning_rate": 0.0004954089185150233, "loss": 3.2703, "step": 16785 }, { "epoch": 0.82, "grad_norm": 0.5085593461990356, "learning_rate": 0.0004953972361486423, "loss": 3.3836, "step": 16786 }, { "epoch": 0.82, "grad_norm": 0.5071133375167847, "learning_rate": 0.0004953855532676225, "loss": 3.1033, "step": 16787 }, { "epoch": 0.82, "grad_norm": 0.5436131358146667, "learning_rate": 0.0004953738698719948, "loss": 3.074, "step": 16788 }, { "epoch": 0.82, "grad_norm": 0.5148676037788391, "learning_rate": 0.0004953621859617899, "loss": 3.1817, "step": 16789 }, { "epoch": 0.82, "grad_norm": 0.4939463138580322, "learning_rate": 0.0004953505015370387, "loss": 3.221, "step": 16790 }, { "epoch": 0.82, "grad_norm": 0.49772271513938904, "learning_rate": 0.0004953388165977717, "loss": 3.0617, "step": 16791 }, { "epoch": 0.82, "grad_norm": 0.5314618945121765, "learning_rate": 0.00049532713114402, "loss": 3.2513, "step": 16792 }, { "epoch": 0.82, "grad_norm": 0.5048306584358215, "learning_rate": 0.0004953154451758143, "loss": 3.2086, "step": 16793 }, { "epoch": 0.82, "grad_norm": 0.5072234272956848, "learning_rate": 0.0004953037586931851, "loss": 3.1794, "step": 16794 }, { "epoch": 0.82, "grad_norm": 0.5115417838096619, "learning_rate": 0.0004952920716961635, "loss": 3.2774, "step": 16795 }, { "epoch": 0.82, "grad_norm": 0.5184221267700195, "learning_rate": 0.0004952803841847802, "loss": 3.1443, "step": 16796 }, { "epoch": 0.82, "grad_norm": 0.5140826106071472, "learning_rate": 0.0004952686961590658, "loss": 3.1012, "step": 16797 }, { "epoch": 0.82, "grad_norm": 0.5001091957092285, "learning_rate": 0.0004952570076190514, "loss": 3.2363, "step": 16798 }, { "epoch": 0.82, "grad_norm": 0.5328136682510376, "learning_rate": 0.0004952453185647674, "loss": 3.1932, "step": 16799 }, { "epoch": 0.82, "grad_norm": 0.5111890435218811, "learning_rate": 0.000495233628996245, "loss": 3.1625, "step": 16800 }, { "epoch": 0.82, "grad_norm": 0.5230638980865479, "learning_rate": 0.0004952219389135149, "loss": 3.1986, "step": 16801 }, { "epoch": 0.82, "grad_norm": 0.5159973502159119, "learning_rate": 0.0004952102483166076, "loss": 3.3281, "step": 16802 }, { "epoch": 0.82, "grad_norm": 0.5228443145751953, "learning_rate": 0.000495198557205554, "loss": 3.2278, "step": 16803 }, { "epoch": 0.82, "grad_norm": 0.5011665225028992, "learning_rate": 0.0004951868655803851, "loss": 2.9619, "step": 16804 }, { "epoch": 0.82, "grad_norm": 0.5224314332008362, "learning_rate": 0.0004951751734411316, "loss": 3.0738, "step": 16805 }, { "epoch": 0.82, "grad_norm": 0.5976347327232361, "learning_rate": 0.0004951634807878241, "loss": 3.1095, "step": 16806 }, { "epoch": 0.82, "grad_norm": 0.5195080041885376, "learning_rate": 0.0004951517876204938, "loss": 2.9196, "step": 16807 }, { "epoch": 0.82, "grad_norm": 0.5941303968429565, "learning_rate": 0.000495140093939171, "loss": 2.9227, "step": 16808 }, { "epoch": 0.82, "grad_norm": 0.48703116178512573, "learning_rate": 0.0004951283997438869, "loss": 3.1429, "step": 16809 }, { "epoch": 0.82, "grad_norm": 0.4839284420013428, "learning_rate": 0.0004951167050346721, "loss": 3.3139, "step": 16810 }, { "epoch": 0.82, "grad_norm": 0.5387030243873596, "learning_rate": 0.0004951050098115574, "loss": 3.2789, "step": 16811 }, { "epoch": 0.82, "grad_norm": 0.5388344526290894, "learning_rate": 0.0004950933140745737, "loss": 3.1641, "step": 16812 }, { "epoch": 0.82, "grad_norm": 0.5535876154899597, "learning_rate": 0.0004950816178237518, "loss": 3.4473, "step": 16813 }, { "epoch": 0.82, "grad_norm": 0.49097132682800293, "learning_rate": 0.0004950699210591223, "loss": 3.133, "step": 16814 }, { "epoch": 0.82, "grad_norm": 0.5270285606384277, "learning_rate": 0.0004950582237807163, "loss": 3.2307, "step": 16815 }, { "epoch": 0.82, "grad_norm": 0.48164236545562744, "learning_rate": 0.0004950465259885645, "loss": 3.1475, "step": 16816 }, { "epoch": 0.82, "grad_norm": 0.531959593296051, "learning_rate": 0.0004950348276826977, "loss": 2.9178, "step": 16817 }, { "epoch": 0.82, "grad_norm": 0.5036345720291138, "learning_rate": 0.0004950231288631466, "loss": 3.4169, "step": 16818 }, { "epoch": 0.82, "grad_norm": 0.5073093175888062, "learning_rate": 0.0004950114295299422, "loss": 3.1668, "step": 16819 }, { "epoch": 0.82, "grad_norm": 0.5157398581504822, "learning_rate": 0.0004949997296831152, "loss": 3.0857, "step": 16820 }, { "epoch": 0.82, "grad_norm": 0.5297724008560181, "learning_rate": 0.0004949880293226964, "loss": 3.0957, "step": 16821 }, { "epoch": 0.82, "grad_norm": 0.5831009149551392, "learning_rate": 0.0004949763284487166, "loss": 3.3712, "step": 16822 }, { "epoch": 0.82, "grad_norm": 0.5394142866134644, "learning_rate": 0.0004949646270612069, "loss": 3.1525, "step": 16823 }, { "epoch": 0.82, "grad_norm": 0.5234691500663757, "learning_rate": 0.0004949529251601977, "loss": 3.28, "step": 16824 }, { "epoch": 0.82, "grad_norm": 0.5316514372825623, "learning_rate": 0.00049494122274572, "loss": 3.252, "step": 16825 }, { "epoch": 0.82, "grad_norm": 0.5195567011833191, "learning_rate": 0.0004949295198178048, "loss": 3.2669, "step": 16826 }, { "epoch": 0.82, "grad_norm": 0.5230311751365662, "learning_rate": 0.0004949178163764827, "loss": 3.2243, "step": 16827 }, { "epoch": 0.82, "grad_norm": 0.5435062646865845, "learning_rate": 0.0004949061124217845, "loss": 3.031, "step": 16828 }, { "epoch": 0.82, "grad_norm": 0.5098403096199036, "learning_rate": 0.0004948944079537413, "loss": 3.1558, "step": 16829 }, { "epoch": 0.82, "grad_norm": 0.5283143520355225, "learning_rate": 0.0004948827029723834, "loss": 3.2979, "step": 16830 }, { "epoch": 0.82, "grad_norm": 0.5382714867591858, "learning_rate": 0.0004948709974777422, "loss": 3.2907, "step": 16831 }, { "epoch": 0.82, "grad_norm": 0.5517304539680481, "learning_rate": 0.0004948592914698483, "loss": 2.9451, "step": 16832 }, { "epoch": 0.82, "grad_norm": 0.5490243434906006, "learning_rate": 0.0004948475849487325, "loss": 2.9241, "step": 16833 }, { "epoch": 0.82, "grad_norm": 0.5387569665908813, "learning_rate": 0.0004948358779144256, "loss": 3.127, "step": 16834 }, { "epoch": 0.83, "grad_norm": 0.48919016122817993, "learning_rate": 0.0004948241703669585, "loss": 3.278, "step": 16835 }, { "epoch": 0.83, "grad_norm": 0.5328850746154785, "learning_rate": 0.0004948124623063621, "loss": 3.1679, "step": 16836 }, { "epoch": 0.83, "grad_norm": 0.5232701897621155, "learning_rate": 0.0004948007537326672, "loss": 3.0376, "step": 16837 }, { "epoch": 0.83, "grad_norm": 0.49141281843185425, "learning_rate": 0.0004947890446459046, "loss": 3.1381, "step": 16838 }, { "epoch": 0.83, "grad_norm": 0.508006751537323, "learning_rate": 0.0004947773350461051, "loss": 3.0339, "step": 16839 }, { "epoch": 0.83, "grad_norm": 0.5270553827285767, "learning_rate": 0.0004947656249332995, "loss": 3.0294, "step": 16840 }, { "epoch": 0.83, "grad_norm": 0.5074304342269897, "learning_rate": 0.0004947539143075188, "loss": 3.3718, "step": 16841 }, { "epoch": 0.83, "grad_norm": 0.5027759075164795, "learning_rate": 0.0004947422031687938, "loss": 3.2201, "step": 16842 }, { "epoch": 0.83, "grad_norm": 0.5462760329246521, "learning_rate": 0.0004947304915171553, "loss": 3.1749, "step": 16843 }, { "epoch": 0.83, "grad_norm": 0.5136876106262207, "learning_rate": 0.0004947187793526341, "loss": 3.087, "step": 16844 }, { "epoch": 0.83, "grad_norm": 0.5102071762084961, "learning_rate": 0.0004947070666752612, "loss": 3.119, "step": 16845 }, { "epoch": 0.83, "grad_norm": 0.5251736044883728, "learning_rate": 0.0004946953534850672, "loss": 3.0766, "step": 16846 }, { "epoch": 0.83, "grad_norm": 0.5002880692481995, "learning_rate": 0.0004946836397820833, "loss": 3.1961, "step": 16847 }, { "epoch": 0.83, "grad_norm": 0.5180115103721619, "learning_rate": 0.0004946719255663402, "loss": 3.291, "step": 16848 }, { "epoch": 0.83, "grad_norm": 0.5141701102256775, "learning_rate": 0.0004946602108378685, "loss": 3.2056, "step": 16849 }, { "epoch": 0.83, "grad_norm": 0.5428344011306763, "learning_rate": 0.0004946484955966994, "loss": 3.204, "step": 16850 }, { "epoch": 0.83, "grad_norm": 0.5158218145370483, "learning_rate": 0.0004946367798428636, "loss": 3.2134, "step": 16851 }, { "epoch": 0.83, "grad_norm": 0.5351789593696594, "learning_rate": 0.0004946250635763919, "loss": 3.0868, "step": 16852 }, { "epoch": 0.83, "grad_norm": 0.5360104441642761, "learning_rate": 0.0004946133467973153, "loss": 2.9659, "step": 16853 }, { "epoch": 0.83, "grad_norm": 0.5146509408950806, "learning_rate": 0.0004946016295056646, "loss": 3.2547, "step": 16854 }, { "epoch": 0.83, "grad_norm": 0.5186895132064819, "learning_rate": 0.0004945899117014706, "loss": 3.1007, "step": 16855 }, { "epoch": 0.83, "grad_norm": 0.5219805836677551, "learning_rate": 0.0004945781933847644, "loss": 2.9965, "step": 16856 }, { "epoch": 0.83, "grad_norm": 0.5439258813858032, "learning_rate": 0.0004945664745555766, "loss": 3.1165, "step": 16857 }, { "epoch": 0.83, "grad_norm": 0.5208057761192322, "learning_rate": 0.0004945547552139382, "loss": 3.1639, "step": 16858 }, { "epoch": 0.83, "grad_norm": 0.5104433298110962, "learning_rate": 0.0004945430353598799, "loss": 3.1975, "step": 16859 }, { "epoch": 0.83, "grad_norm": 0.5061667561531067, "learning_rate": 0.0004945313149934327, "loss": 3.0906, "step": 16860 }, { "epoch": 0.83, "grad_norm": 0.5064367651939392, "learning_rate": 0.0004945195941146275, "loss": 3.2914, "step": 16861 }, { "epoch": 0.83, "grad_norm": 0.5224226713180542, "learning_rate": 0.0004945078727234951, "loss": 3.2299, "step": 16862 }, { "epoch": 0.83, "grad_norm": 0.5368633270263672, "learning_rate": 0.0004944961508200664, "loss": 3.2864, "step": 16863 }, { "epoch": 0.83, "grad_norm": 0.5402868986129761, "learning_rate": 0.0004944844284043723, "loss": 3.0904, "step": 16864 }, { "epoch": 0.83, "grad_norm": 0.5218072533607483, "learning_rate": 0.0004944727054764436, "loss": 3.2323, "step": 16865 }, { "epoch": 0.83, "grad_norm": 0.5438421964645386, "learning_rate": 0.0004944609820363112, "loss": 3.0236, "step": 16866 }, { "epoch": 0.83, "grad_norm": 0.5093616843223572, "learning_rate": 0.0004944492580840061, "loss": 3.2269, "step": 16867 }, { "epoch": 0.83, "grad_norm": 0.5139485001564026, "learning_rate": 0.000494437533619559, "loss": 3.1943, "step": 16868 }, { "epoch": 0.83, "grad_norm": 0.5316404700279236, "learning_rate": 0.0004944258086430009, "loss": 3.2521, "step": 16869 }, { "epoch": 0.83, "grad_norm": 0.5201852321624756, "learning_rate": 0.0004944140831543626, "loss": 3.0447, "step": 16870 }, { "epoch": 0.83, "grad_norm": 0.5127416849136353, "learning_rate": 0.000494402357153675, "loss": 3.1414, "step": 16871 }, { "epoch": 0.83, "grad_norm": 0.5286155343055725, "learning_rate": 0.0004943906306409691, "loss": 2.8942, "step": 16872 }, { "epoch": 0.83, "grad_norm": 0.4988267421722412, "learning_rate": 0.0004943789036162756, "loss": 3.2984, "step": 16873 }, { "epoch": 0.83, "grad_norm": 0.5161953568458557, "learning_rate": 0.0004943671760796255, "loss": 3.3117, "step": 16874 }, { "epoch": 0.83, "grad_norm": 0.5332638025283813, "learning_rate": 0.0004943554480310497, "loss": 3.0845, "step": 16875 }, { "epoch": 0.83, "grad_norm": 0.5356992483139038, "learning_rate": 0.000494343719470579, "loss": 2.9884, "step": 16876 }, { "epoch": 0.83, "grad_norm": 0.5340079069137573, "learning_rate": 0.0004943319903982444, "loss": 3.1248, "step": 16877 }, { "epoch": 0.83, "grad_norm": 0.5949048399925232, "learning_rate": 0.0004943202608140767, "loss": 3.074, "step": 16878 }, { "epoch": 0.83, "grad_norm": 0.4927256107330322, "learning_rate": 0.0004943085307181069, "loss": 3.1862, "step": 16879 }, { "epoch": 0.83, "grad_norm": 0.5098362565040588, "learning_rate": 0.0004942968001103656, "loss": 3.249, "step": 16880 }, { "epoch": 0.83, "grad_norm": 0.559185802936554, "learning_rate": 0.0004942850689908842, "loss": 3.3038, "step": 16881 }, { "epoch": 0.83, "grad_norm": 0.5128463506698608, "learning_rate": 0.0004942733373596932, "loss": 3.3892, "step": 16882 }, { "epoch": 0.83, "grad_norm": 0.5219104290008545, "learning_rate": 0.0004942616052168236, "loss": 3.3491, "step": 16883 }, { "epoch": 0.83, "grad_norm": 0.5195140838623047, "learning_rate": 0.0004942498725623064, "loss": 3.1738, "step": 16884 }, { "epoch": 0.83, "grad_norm": 0.634857177734375, "learning_rate": 0.0004942381393961724, "loss": 3.1203, "step": 16885 }, { "epoch": 0.83, "grad_norm": 0.5101731419563293, "learning_rate": 0.0004942264057184524, "loss": 3.2297, "step": 16886 }, { "epoch": 0.83, "grad_norm": 0.5076460242271423, "learning_rate": 0.0004942146715291775, "loss": 3.2373, "step": 16887 }, { "epoch": 0.83, "grad_norm": 0.5291589498519897, "learning_rate": 0.0004942029368283786, "loss": 2.9861, "step": 16888 }, { "epoch": 0.83, "grad_norm": 0.5351511836051941, "learning_rate": 0.0004941912016160864, "loss": 3.0871, "step": 16889 }, { "epoch": 0.83, "grad_norm": 0.5280094146728516, "learning_rate": 0.0004941794658923321, "loss": 3.2685, "step": 16890 }, { "epoch": 0.83, "grad_norm": 0.5539615750312805, "learning_rate": 0.0004941677296571463, "loss": 3.3623, "step": 16891 }, { "epoch": 0.83, "grad_norm": 0.5121122002601624, "learning_rate": 0.0004941559929105602, "loss": 3.0476, "step": 16892 }, { "epoch": 0.83, "grad_norm": 0.551249623298645, "learning_rate": 0.0004941442556526045, "loss": 2.9187, "step": 16893 }, { "epoch": 0.83, "grad_norm": 0.5004875659942627, "learning_rate": 0.0004941325178833102, "loss": 3.1174, "step": 16894 }, { "epoch": 0.83, "grad_norm": 0.5540305972099304, "learning_rate": 0.0004941207796027084, "loss": 3.2438, "step": 16895 }, { "epoch": 0.83, "grad_norm": 0.5179722905158997, "learning_rate": 0.0004941090408108296, "loss": 3.0293, "step": 16896 }, { "epoch": 0.83, "grad_norm": 0.5183007717132568, "learning_rate": 0.000494097301507705, "loss": 3.0611, "step": 16897 }, { "epoch": 0.83, "grad_norm": 0.5226312279701233, "learning_rate": 0.0004940855616933654, "loss": 3.2248, "step": 16898 }, { "epoch": 0.83, "grad_norm": 0.5023093819618225, "learning_rate": 0.0004940738213678419, "loss": 3.1283, "step": 16899 }, { "epoch": 0.83, "grad_norm": 0.5120278596878052, "learning_rate": 0.0004940620805311654, "loss": 3.4025, "step": 16900 }, { "epoch": 0.83, "grad_norm": 0.5045427083969116, "learning_rate": 0.0004940503391833664, "loss": 3.2388, "step": 16901 }, { "epoch": 0.83, "grad_norm": 0.5513672232627869, "learning_rate": 0.0004940385973244765, "loss": 3.2291, "step": 16902 }, { "epoch": 0.83, "grad_norm": 0.5159168243408203, "learning_rate": 0.0004940268549545261, "loss": 3.0269, "step": 16903 }, { "epoch": 0.83, "grad_norm": 0.5583893656730652, "learning_rate": 0.0004940151120735463, "loss": 3.2074, "step": 16904 }, { "epoch": 0.83, "grad_norm": 0.5445465445518494, "learning_rate": 0.000494003368681568, "loss": 3.1999, "step": 16905 }, { "epoch": 0.83, "grad_norm": 0.5105122327804565, "learning_rate": 0.0004939916247786223, "loss": 3.1474, "step": 16906 }, { "epoch": 0.83, "grad_norm": 0.5456762313842773, "learning_rate": 0.0004939798803647398, "loss": 3.3013, "step": 16907 }, { "epoch": 0.83, "grad_norm": 0.5078533887863159, "learning_rate": 0.0004939681354399518, "loss": 3.309, "step": 16908 }, { "epoch": 0.83, "grad_norm": 0.5101911425590515, "learning_rate": 0.000493956390004289, "loss": 2.935, "step": 16909 }, { "epoch": 0.83, "grad_norm": 0.5133745074272156, "learning_rate": 0.0004939446440577823, "loss": 3.2223, "step": 16910 }, { "epoch": 0.83, "grad_norm": 0.5557057857513428, "learning_rate": 0.000493932897600463, "loss": 3.179, "step": 16911 }, { "epoch": 0.83, "grad_norm": 0.5115715861320496, "learning_rate": 0.0004939211506323615, "loss": 3.0295, "step": 16912 }, { "epoch": 0.83, "grad_norm": 0.5192151069641113, "learning_rate": 0.0004939094031535091, "loss": 3.2356, "step": 16913 }, { "epoch": 0.83, "grad_norm": 0.5089118480682373, "learning_rate": 0.0004938976551639368, "loss": 3.5075, "step": 16914 }, { "epoch": 0.83, "grad_norm": 0.5137461423873901, "learning_rate": 0.0004938859066636751, "loss": 3.2699, "step": 16915 }, { "epoch": 0.83, "grad_norm": 0.5064414143562317, "learning_rate": 0.0004938741576527555, "loss": 3.0867, "step": 16916 }, { "epoch": 0.83, "grad_norm": 0.4723142683506012, "learning_rate": 0.0004938624081312085, "loss": 3.1705, "step": 16917 }, { "epoch": 0.83, "grad_norm": 0.5553394556045532, "learning_rate": 0.0004938506580990654, "loss": 3.4789, "step": 16918 }, { "epoch": 0.83, "grad_norm": 0.6030188202857971, "learning_rate": 0.0004938389075563568, "loss": 3.1171, "step": 16919 }, { "epoch": 0.83, "grad_norm": 0.5029276013374329, "learning_rate": 0.0004938271565031139, "loss": 2.9408, "step": 16920 }, { "epoch": 0.83, "grad_norm": 0.5751969814300537, "learning_rate": 0.0004938154049393676, "loss": 2.8694, "step": 16921 }, { "epoch": 0.83, "grad_norm": 0.5256320834159851, "learning_rate": 0.0004938036528651488, "loss": 3.1617, "step": 16922 }, { "epoch": 0.83, "grad_norm": 0.5677057504653931, "learning_rate": 0.0004937919002804885, "loss": 3.2418, "step": 16923 }, { "epoch": 0.83, "grad_norm": 0.5156370401382446, "learning_rate": 0.0004937801471854176, "loss": 3.0715, "step": 16924 }, { "epoch": 0.83, "grad_norm": 0.5339087843894958, "learning_rate": 0.000493768393579967, "loss": 3.2977, "step": 16925 }, { "epoch": 0.83, "grad_norm": 0.5484094619750977, "learning_rate": 0.000493756639464168, "loss": 3.0911, "step": 16926 }, { "epoch": 0.83, "grad_norm": 0.6116513013839722, "learning_rate": 0.0004937448848380511, "loss": 3.3455, "step": 16927 }, { "epoch": 0.83, "grad_norm": 0.503478467464447, "learning_rate": 0.0004937331297016474, "loss": 3.2488, "step": 16928 }, { "epoch": 0.83, "grad_norm": 0.5339494943618774, "learning_rate": 0.000493721374054988, "loss": 3.0493, "step": 16929 }, { "epoch": 0.83, "grad_norm": 0.5303932428359985, "learning_rate": 0.0004937096178981038, "loss": 3.1785, "step": 16930 }, { "epoch": 0.83, "grad_norm": 0.48240697383880615, "learning_rate": 0.0004936978612310257, "loss": 3.32, "step": 16931 }, { "epoch": 0.83, "grad_norm": 0.5174941420555115, "learning_rate": 0.0004936861040537848, "loss": 3.2424, "step": 16932 }, { "epoch": 0.83, "grad_norm": 0.5395123362541199, "learning_rate": 0.0004936743463664119, "loss": 3.1445, "step": 16933 }, { "epoch": 0.83, "grad_norm": 0.5444930791854858, "learning_rate": 0.0004936625881689382, "loss": 3.0873, "step": 16934 }, { "epoch": 0.83, "grad_norm": 0.4984789490699768, "learning_rate": 0.0004936508294613944, "loss": 3.2639, "step": 16935 }, { "epoch": 0.83, "grad_norm": 0.5275869965553284, "learning_rate": 0.0004936390702438115, "loss": 3.1615, "step": 16936 }, { "epoch": 0.83, "grad_norm": 0.5192875266075134, "learning_rate": 0.0004936273105162205, "loss": 3.0108, "step": 16937 }, { "epoch": 0.83, "grad_norm": 0.5731741189956665, "learning_rate": 0.0004936155502786527, "loss": 3.1182, "step": 16938 }, { "epoch": 0.83, "grad_norm": 0.48318421840667725, "learning_rate": 0.0004936037895311386, "loss": 3.2587, "step": 16939 }, { "epoch": 0.83, "grad_norm": 0.49875524640083313, "learning_rate": 0.0004935920282737095, "loss": 3.2617, "step": 16940 }, { "epoch": 0.83, "grad_norm": 0.5293728709220886, "learning_rate": 0.0004935802665063962, "loss": 3.3205, "step": 16941 }, { "epoch": 0.83, "grad_norm": 0.4939599335193634, "learning_rate": 0.0004935685042292297, "loss": 3.3076, "step": 16942 }, { "epoch": 0.83, "grad_norm": 0.5058974027633667, "learning_rate": 0.0004935567414422411, "loss": 3.1232, "step": 16943 }, { "epoch": 0.83, "grad_norm": 0.517693281173706, "learning_rate": 0.0004935449781454612, "loss": 3.3309, "step": 16944 }, { "epoch": 0.83, "grad_norm": 0.5269827842712402, "learning_rate": 0.0004935332143389212, "loss": 3.2564, "step": 16945 }, { "epoch": 0.83, "grad_norm": 0.5228087306022644, "learning_rate": 0.0004935214500226518, "loss": 3.284, "step": 16946 }, { "epoch": 0.83, "grad_norm": 0.5041084885597229, "learning_rate": 0.0004935096851966842, "loss": 3.1787, "step": 16947 }, { "epoch": 0.83, "grad_norm": 0.5007483959197998, "learning_rate": 0.0004934979198610493, "loss": 3.0264, "step": 16948 }, { "epoch": 0.83, "grad_norm": 0.5107980370521545, "learning_rate": 0.0004934861540157782, "loss": 3.1986, "step": 16949 }, { "epoch": 0.83, "grad_norm": 0.5581706166267395, "learning_rate": 0.0004934743876609018, "loss": 3.2615, "step": 16950 }, { "epoch": 0.83, "grad_norm": 0.555671215057373, "learning_rate": 0.000493462620796451, "loss": 3.0465, "step": 16951 }, { "epoch": 0.83, "grad_norm": 0.5258693695068359, "learning_rate": 0.000493450853422457, "loss": 3.0284, "step": 16952 }, { "epoch": 0.83, "grad_norm": 0.5174445509910583, "learning_rate": 0.0004934390855389506, "loss": 3.0346, "step": 16953 }, { "epoch": 0.83, "grad_norm": 0.5268626809120178, "learning_rate": 0.000493427317145963, "loss": 2.8945, "step": 16954 }, { "epoch": 0.83, "grad_norm": 0.510155439376831, "learning_rate": 0.0004934155482435249, "loss": 3.211, "step": 16955 }, { "epoch": 0.83, "grad_norm": 0.5453447699546814, "learning_rate": 0.0004934037788316676, "loss": 3.0665, "step": 16956 }, { "epoch": 0.83, "grad_norm": 0.5393405556678772, "learning_rate": 0.0004933920089104219, "loss": 3.237, "step": 16957 }, { "epoch": 0.83, "grad_norm": 0.5359326004981995, "learning_rate": 0.000493380238479819, "loss": 3.0703, "step": 16958 }, { "epoch": 0.83, "grad_norm": 0.5360733270645142, "learning_rate": 0.0004933684675398896, "loss": 3.2203, "step": 16959 }, { "epoch": 0.83, "grad_norm": 0.5361143946647644, "learning_rate": 0.000493356696090665, "loss": 3.1254, "step": 16960 }, { "epoch": 0.83, "grad_norm": 0.5372664332389832, "learning_rate": 0.000493344924132176, "loss": 3.2338, "step": 16961 }, { "epoch": 0.83, "grad_norm": 0.49514126777648926, "learning_rate": 0.0004933331516644537, "loss": 3.2379, "step": 16962 }, { "epoch": 0.83, "grad_norm": 0.5488557815551758, "learning_rate": 0.0004933213786875289, "loss": 3.0711, "step": 16963 }, { "epoch": 0.83, "grad_norm": 0.533857524394989, "learning_rate": 0.0004933096052014331, "loss": 3.0261, "step": 16964 }, { "epoch": 0.83, "grad_norm": 0.5344963669776917, "learning_rate": 0.0004932978312061969, "loss": 3.234, "step": 16965 }, { "epoch": 0.83, "grad_norm": 0.5355231761932373, "learning_rate": 0.0004932860567018513, "loss": 3.2416, "step": 16966 }, { "epoch": 0.83, "grad_norm": 0.5180875062942505, "learning_rate": 0.0004932742816884276, "loss": 3.1729, "step": 16967 }, { "epoch": 0.83, "grad_norm": 0.5027824640274048, "learning_rate": 0.0004932625061659564, "loss": 3.1361, "step": 16968 }, { "epoch": 0.83, "grad_norm": 0.5158348083496094, "learning_rate": 0.0004932507301344693, "loss": 3.0643, "step": 16969 }, { "epoch": 0.83, "grad_norm": 0.5437960028648376, "learning_rate": 0.0004932389535939966, "loss": 3.1696, "step": 16970 }, { "epoch": 0.83, "grad_norm": 0.5309322476387024, "learning_rate": 0.0004932271765445699, "loss": 3.168, "step": 16971 }, { "epoch": 0.83, "grad_norm": 0.5301249027252197, "learning_rate": 0.00049321539898622, "loss": 2.9829, "step": 16972 }, { "epoch": 0.83, "grad_norm": 0.5049616098403931, "learning_rate": 0.0004932036209189778, "loss": 3.0103, "step": 16973 }, { "epoch": 0.83, "grad_norm": 0.5803033709526062, "learning_rate": 0.0004931918423428746, "loss": 3.1533, "step": 16974 }, { "epoch": 0.83, "grad_norm": 0.519544243812561, "learning_rate": 0.0004931800632579412, "loss": 3.3152, "step": 16975 }, { "epoch": 0.83, "grad_norm": 0.5230675935745239, "learning_rate": 0.0004931682836642088, "loss": 3.3468, "step": 16976 }, { "epoch": 0.83, "grad_norm": 0.5146074295043945, "learning_rate": 0.0004931565035617081, "loss": 3.1737, "step": 16977 }, { "epoch": 0.83, "grad_norm": 0.5033275485038757, "learning_rate": 0.0004931447229504705, "loss": 3.1468, "step": 16978 }, { "epoch": 0.83, "grad_norm": 0.5531938076019287, "learning_rate": 0.0004931329418305267, "loss": 3.1175, "step": 16979 }, { "epoch": 0.83, "grad_norm": 0.5533403754234314, "learning_rate": 0.0004931211602019082, "loss": 3.017, "step": 16980 }, { "epoch": 0.83, "grad_norm": 0.5449845194816589, "learning_rate": 0.0004931093780646455, "loss": 3.333, "step": 16981 }, { "epoch": 0.83, "grad_norm": 0.4969363808631897, "learning_rate": 0.0004930975954187699, "loss": 3.1212, "step": 16982 }, { "epoch": 0.83, "grad_norm": 0.5029790997505188, "learning_rate": 0.0004930858122643124, "loss": 3.006, "step": 16983 }, { "epoch": 0.83, "grad_norm": 0.47780704498291016, "learning_rate": 0.0004930740286013041, "loss": 3.2491, "step": 16984 }, { "epoch": 0.83, "grad_norm": 0.544589102268219, "learning_rate": 0.0004930622444297758, "loss": 3.2378, "step": 16985 }, { "epoch": 0.83, "grad_norm": 0.5521456003189087, "learning_rate": 0.0004930504597497589, "loss": 3.331, "step": 16986 }, { "epoch": 0.83, "grad_norm": 0.5297691822052002, "learning_rate": 0.0004930386745612841, "loss": 3.2069, "step": 16987 }, { "epoch": 0.83, "grad_norm": 0.5114834308624268, "learning_rate": 0.0004930268888643827, "loss": 3.2844, "step": 16988 }, { "epoch": 0.83, "grad_norm": 0.5340949296951294, "learning_rate": 0.0004930151026590855, "loss": 2.9535, "step": 16989 }, { "epoch": 0.83, "grad_norm": 0.5658579468727112, "learning_rate": 0.0004930033159454237, "loss": 3.1912, "step": 16990 }, { "epoch": 0.83, "grad_norm": 0.5223037004470825, "learning_rate": 0.0004929915287234283, "loss": 3.1753, "step": 16991 }, { "epoch": 0.83, "grad_norm": 0.5227300524711609, "learning_rate": 0.0004929797409931305, "loss": 3.1405, "step": 16992 }, { "epoch": 0.83, "grad_norm": 0.5639767646789551, "learning_rate": 0.0004929679527545611, "loss": 3.0427, "step": 16993 }, { "epoch": 0.83, "grad_norm": 0.5049862265586853, "learning_rate": 0.0004929561640077512, "loss": 3.2142, "step": 16994 }, { "epoch": 0.83, "grad_norm": 0.5709805488586426, "learning_rate": 0.0004929443747527319, "loss": 3.0873, "step": 16995 }, { "epoch": 0.83, "grad_norm": 0.5158815383911133, "learning_rate": 0.0004929325849895344, "loss": 3.2739, "step": 16996 }, { "epoch": 0.83, "grad_norm": 0.5977637767791748, "learning_rate": 0.0004929207947181895, "loss": 3.2, "step": 16997 }, { "epoch": 0.83, "grad_norm": 0.5404507517814636, "learning_rate": 0.0004929090039387283, "loss": 3.1116, "step": 16998 }, { "epoch": 0.83, "grad_norm": 0.5217320919036865, "learning_rate": 0.0004928972126511819, "loss": 3.1959, "step": 16999 }, { "epoch": 0.83, "grad_norm": 0.5257030129432678, "learning_rate": 0.0004928854208555815, "loss": 3.105, "step": 17000 }, { "epoch": 0.83, "grad_norm": 0.526759922504425, "learning_rate": 0.0004928736285519579, "loss": 3.2357, "step": 17001 }, { "epoch": 0.83, "grad_norm": 0.5135772824287415, "learning_rate": 0.0004928618357403424, "loss": 3.1463, "step": 17002 }, { "epoch": 0.83, "grad_norm": 0.5537254810333252, "learning_rate": 0.0004928500424207658, "loss": 3.2612, "step": 17003 }, { "epoch": 0.83, "grad_norm": 0.5435295104980469, "learning_rate": 0.0004928382485932594, "loss": 2.9233, "step": 17004 }, { "epoch": 0.83, "grad_norm": 0.5425717830657959, "learning_rate": 0.0004928264542578541, "loss": 3.1752, "step": 17005 }, { "epoch": 0.83, "grad_norm": 0.5152084827423096, "learning_rate": 0.000492814659414581, "loss": 3.3909, "step": 17006 }, { "epoch": 0.83, "grad_norm": 0.5382000207901001, "learning_rate": 0.0004928028640634714, "loss": 3.092, "step": 17007 }, { "epoch": 0.83, "grad_norm": 0.5504263043403625, "learning_rate": 0.000492791068204556, "loss": 3.1998, "step": 17008 }, { "epoch": 0.83, "grad_norm": 0.5138139128684998, "learning_rate": 0.0004927792718378661, "loss": 3.2068, "step": 17009 }, { "epoch": 0.83, "grad_norm": 0.5308842062950134, "learning_rate": 0.0004927674749634326, "loss": 3.3046, "step": 17010 }, { "epoch": 0.83, "grad_norm": 0.5355445146560669, "learning_rate": 0.0004927556775812866, "loss": 3.2819, "step": 17011 }, { "epoch": 0.83, "grad_norm": 0.5167343020439148, "learning_rate": 0.0004927438796914595, "loss": 3.0078, "step": 17012 }, { "epoch": 0.83, "grad_norm": 0.5470657348632812, "learning_rate": 0.000492732081293982, "loss": 3.1608, "step": 17013 }, { "epoch": 0.83, "grad_norm": 0.5836805701255798, "learning_rate": 0.0004927202823888853, "loss": 3.1161, "step": 17014 }, { "epoch": 0.83, "grad_norm": 0.558231770992279, "learning_rate": 0.0004927084829762004, "loss": 3.1451, "step": 17015 }, { "epoch": 0.83, "grad_norm": 0.9510738849639893, "learning_rate": 0.0004926966830559585, "loss": 3.305, "step": 17016 }, { "epoch": 0.83, "grad_norm": 0.5469083786010742, "learning_rate": 0.0004926848826281907, "loss": 3.3076, "step": 17017 }, { "epoch": 0.83, "grad_norm": 0.530071496963501, "learning_rate": 0.0004926730816929277, "loss": 3.2547, "step": 17018 }, { "epoch": 0.83, "grad_norm": 0.5168865919113159, "learning_rate": 0.0004926612802502011, "loss": 3.1445, "step": 17019 }, { "epoch": 0.83, "grad_norm": 0.5375450849533081, "learning_rate": 0.0004926494783000418, "loss": 3.1627, "step": 17020 }, { "epoch": 0.83, "grad_norm": 0.5115398168563843, "learning_rate": 0.0004926376758424808, "loss": 3.2687, "step": 17021 }, { "epoch": 0.83, "grad_norm": 0.571524441242218, "learning_rate": 0.0004926258728775492, "loss": 3.0509, "step": 17022 }, { "epoch": 0.83, "grad_norm": 0.5220298767089844, "learning_rate": 0.0004926140694052782, "loss": 3.0722, "step": 17023 }, { "epoch": 0.83, "grad_norm": 0.5590171217918396, "learning_rate": 0.0004926022654256989, "loss": 3.2197, "step": 17024 }, { "epoch": 0.83, "grad_norm": 0.5292062163352966, "learning_rate": 0.0004925904609388421, "loss": 3.2796, "step": 17025 }, { "epoch": 0.83, "grad_norm": 0.4746219515800476, "learning_rate": 0.0004925786559447391, "loss": 3.2434, "step": 17026 }, { "epoch": 0.83, "grad_norm": 0.5537968277931213, "learning_rate": 0.0004925668504434211, "loss": 3.0872, "step": 17027 }, { "epoch": 0.83, "grad_norm": 0.5214465260505676, "learning_rate": 0.0004925550444349191, "loss": 3.0574, "step": 17028 }, { "epoch": 0.83, "grad_norm": 0.532576858997345, "learning_rate": 0.0004925432379192641, "loss": 3.3728, "step": 17029 }, { "epoch": 0.83, "grad_norm": 0.5130587816238403, "learning_rate": 0.0004925314308964872, "loss": 3.3297, "step": 17030 }, { "epoch": 0.83, "grad_norm": 0.5362662076950073, "learning_rate": 0.0004925196233666196, "loss": 3.3526, "step": 17031 }, { "epoch": 0.83, "grad_norm": 0.555117666721344, "learning_rate": 0.0004925078153296924, "loss": 3.2804, "step": 17032 }, { "epoch": 0.83, "grad_norm": 0.5339804291725159, "learning_rate": 0.0004924960067857367, "loss": 3.2155, "step": 17033 }, { "epoch": 0.83, "grad_norm": 0.5521584749221802, "learning_rate": 0.0004924841977347835, "loss": 3.2564, "step": 17034 }, { "epoch": 0.83, "grad_norm": 0.6111563444137573, "learning_rate": 0.0004924723881768639, "loss": 2.9852, "step": 17035 }, { "epoch": 0.83, "grad_norm": 0.5429837107658386, "learning_rate": 0.0004924605781120092, "loss": 3.0712, "step": 17036 }, { "epoch": 0.83, "grad_norm": 0.5211032629013062, "learning_rate": 0.0004924487675402504, "loss": 3.0123, "step": 17037 }, { "epoch": 0.83, "grad_norm": 0.5365507006645203, "learning_rate": 0.0004924369564616185, "loss": 3.0002, "step": 17038 }, { "epoch": 0.84, "grad_norm": 0.5042828321456909, "learning_rate": 0.0004924251448761446, "loss": 3.0771, "step": 17039 }, { "epoch": 0.84, "grad_norm": 0.5612789988517761, "learning_rate": 0.0004924133327838601, "loss": 3.0335, "step": 17040 }, { "epoch": 0.84, "grad_norm": 0.5127493739128113, "learning_rate": 0.0004924015201847958, "loss": 3.1882, "step": 17041 }, { "epoch": 0.84, "grad_norm": 0.5296911001205444, "learning_rate": 0.000492389707078983, "loss": 3.143, "step": 17042 }, { "epoch": 0.84, "grad_norm": 0.5228750109672546, "learning_rate": 0.0004923778934664526, "loss": 3.1002, "step": 17043 }, { "epoch": 0.84, "grad_norm": 0.5364986658096313, "learning_rate": 0.000492366079347236, "loss": 3.0465, "step": 17044 }, { "epoch": 0.84, "grad_norm": 0.49486231803894043, "learning_rate": 0.000492354264721364, "loss": 3.1087, "step": 17045 }, { "epoch": 0.84, "grad_norm": 0.5611582398414612, "learning_rate": 0.0004923424495888681, "loss": 3.0845, "step": 17046 }, { "epoch": 0.84, "grad_norm": 0.6187002658843994, "learning_rate": 0.0004923306339497791, "loss": 3.0551, "step": 17047 }, { "epoch": 0.84, "grad_norm": 0.5191326141357422, "learning_rate": 0.0004923188178041282, "loss": 3.0557, "step": 17048 }, { "epoch": 0.84, "grad_norm": 0.5312925577163696, "learning_rate": 0.0004923070011519466, "loss": 3.4865, "step": 17049 }, { "epoch": 0.84, "grad_norm": 0.5266924500465393, "learning_rate": 0.0004922951839932653, "loss": 3.2505, "step": 17050 }, { "epoch": 0.84, "grad_norm": 0.5350109934806824, "learning_rate": 0.0004922833663281156, "loss": 3.3707, "step": 17051 }, { "epoch": 0.84, "grad_norm": 0.4987470507621765, "learning_rate": 0.0004922715481565284, "loss": 3.332, "step": 17052 }, { "epoch": 0.84, "grad_norm": 0.49026739597320557, "learning_rate": 0.000492259729478535, "loss": 3.1229, "step": 17053 }, { "epoch": 0.84, "grad_norm": 0.5140583515167236, "learning_rate": 0.0004922479102941665, "loss": 3.2348, "step": 17054 }, { "epoch": 0.84, "grad_norm": 0.5144451260566711, "learning_rate": 0.0004922360906034538, "loss": 3.0013, "step": 17055 }, { "epoch": 0.84, "grad_norm": 0.5055959820747375, "learning_rate": 0.0004922242704064285, "loss": 3.2636, "step": 17056 }, { "epoch": 0.84, "grad_norm": 0.5427704453468323, "learning_rate": 0.0004922124497031214, "loss": 3.36, "step": 17057 }, { "epoch": 0.84, "grad_norm": 0.5104376673698425, "learning_rate": 0.0004922006284935637, "loss": 3.3998, "step": 17058 }, { "epoch": 0.84, "grad_norm": 0.5442894697189331, "learning_rate": 0.0004921888067777865, "loss": 3.113, "step": 17059 }, { "epoch": 0.84, "grad_norm": 0.5200850963592529, "learning_rate": 0.000492176984555821, "loss": 3.0532, "step": 17060 }, { "epoch": 0.84, "grad_norm": 0.4936150312423706, "learning_rate": 0.0004921651618276982, "loss": 3.0992, "step": 17061 }, { "epoch": 0.84, "grad_norm": 0.49831634759902954, "learning_rate": 0.0004921533385934495, "loss": 3.2988, "step": 17062 }, { "epoch": 0.84, "grad_norm": 0.5185843706130981, "learning_rate": 0.0004921415148531058, "loss": 3.1793, "step": 17063 }, { "epoch": 0.84, "grad_norm": 0.5367192029953003, "learning_rate": 0.0004921296906066984, "loss": 3.2338, "step": 17064 }, { "epoch": 0.84, "grad_norm": 0.5617893934249878, "learning_rate": 0.0004921178658542582, "loss": 3.0399, "step": 17065 }, { "epoch": 0.84, "grad_norm": 0.4962318241596222, "learning_rate": 0.0004921060405958167, "loss": 3.1821, "step": 17066 }, { "epoch": 0.84, "grad_norm": 0.5281431674957275, "learning_rate": 0.0004920942148314047, "loss": 3.2091, "step": 17067 }, { "epoch": 0.84, "grad_norm": 0.5583265423774719, "learning_rate": 0.0004920823885610537, "loss": 3.1087, "step": 17068 }, { "epoch": 0.84, "grad_norm": 0.5328853726387024, "learning_rate": 0.0004920705617847945, "loss": 3.2278, "step": 17069 }, { "epoch": 0.84, "grad_norm": 0.5027572512626648, "learning_rate": 0.0004920587345026585, "loss": 3.1542, "step": 17070 }, { "epoch": 0.84, "grad_norm": 0.6189442873001099, "learning_rate": 0.0004920469067146768, "loss": 3.259, "step": 17071 }, { "epoch": 0.84, "grad_norm": 0.5087341666221619, "learning_rate": 0.0004920350784208803, "loss": 2.9372, "step": 17072 }, { "epoch": 0.84, "grad_norm": 0.5113468170166016, "learning_rate": 0.0004920232496213005, "loss": 3.241, "step": 17073 }, { "epoch": 0.84, "grad_norm": 0.5053597688674927, "learning_rate": 0.0004920114203159683, "loss": 3.2242, "step": 17074 }, { "epoch": 0.84, "grad_norm": 0.5626222491264343, "learning_rate": 0.0004919995905049153, "loss": 3.0204, "step": 17075 }, { "epoch": 0.84, "grad_norm": 0.5104367136955261, "learning_rate": 0.000491987760188172, "loss": 3.0962, "step": 17076 }, { "epoch": 0.84, "grad_norm": 0.507770299911499, "learning_rate": 0.00049197592936577, "loss": 3.3188, "step": 17077 }, { "epoch": 0.84, "grad_norm": 0.5115891695022583, "learning_rate": 0.0004919640980377404, "loss": 2.9712, "step": 17078 }, { "epoch": 0.84, "grad_norm": 0.488614946603775, "learning_rate": 0.0004919522662041141, "loss": 3.3266, "step": 17079 }, { "epoch": 0.84, "grad_norm": 0.47894060611724854, "learning_rate": 0.0004919404338649227, "loss": 3.1626, "step": 17080 }, { "epoch": 0.84, "grad_norm": 0.5004280805587769, "learning_rate": 0.000491928601020197, "loss": 3.016, "step": 17081 }, { "epoch": 0.84, "grad_norm": 0.5228533148765564, "learning_rate": 0.0004919167676699684, "loss": 3.4334, "step": 17082 }, { "epoch": 0.84, "grad_norm": 0.5518790483474731, "learning_rate": 0.0004919049338142679, "loss": 3.1275, "step": 17083 }, { "epoch": 0.84, "grad_norm": 0.49410319328308105, "learning_rate": 0.0004918930994531268, "loss": 2.9918, "step": 17084 }, { "epoch": 0.84, "grad_norm": 0.4980878233909607, "learning_rate": 0.0004918812645865762, "loss": 2.9434, "step": 17085 }, { "epoch": 0.84, "grad_norm": 0.5068234801292419, "learning_rate": 0.0004918694292146473, "loss": 3.3573, "step": 17086 }, { "epoch": 0.84, "grad_norm": 0.5471787452697754, "learning_rate": 0.000491857593337371, "loss": 3.1763, "step": 17087 }, { "epoch": 0.84, "grad_norm": 0.519198477268219, "learning_rate": 0.000491845756954779, "loss": 3.2118, "step": 17088 }, { "epoch": 0.84, "grad_norm": 0.5100758075714111, "learning_rate": 0.0004918339200669021, "loss": 3.3549, "step": 17089 }, { "epoch": 0.84, "grad_norm": 0.525219202041626, "learning_rate": 0.0004918220826737717, "loss": 3.1919, "step": 17090 }, { "epoch": 0.84, "grad_norm": 0.5719212889671326, "learning_rate": 0.0004918102447754187, "loss": 3.0999, "step": 17091 }, { "epoch": 0.84, "grad_norm": 0.5068230032920837, "learning_rate": 0.0004917984063718745, "loss": 3.3169, "step": 17092 }, { "epoch": 0.84, "grad_norm": 0.5205427408218384, "learning_rate": 0.0004917865674631702, "loss": 3.2123, "step": 17093 }, { "epoch": 0.84, "grad_norm": 0.5136102437973022, "learning_rate": 0.0004917747280493369, "loss": 3.4344, "step": 17094 }, { "epoch": 0.84, "grad_norm": 0.5310423374176025, "learning_rate": 0.0004917628881304061, "loss": 3.081, "step": 17095 }, { "epoch": 0.84, "grad_norm": 0.5061217546463013, "learning_rate": 0.0004917510477064085, "loss": 2.9725, "step": 17096 }, { "epoch": 0.84, "grad_norm": 0.5577815175056458, "learning_rate": 0.0004917392067773756, "loss": 3.2315, "step": 17097 }, { "epoch": 0.84, "grad_norm": 0.5072141289710999, "learning_rate": 0.0004917273653433388, "loss": 3.2273, "step": 17098 }, { "epoch": 0.84, "grad_norm": 0.5190002918243408, "learning_rate": 0.0004917155234043287, "loss": 3.1358, "step": 17099 }, { "epoch": 0.84, "grad_norm": 0.49393171072006226, "learning_rate": 0.000491703680960377, "loss": 3.1898, "step": 17100 }, { "epoch": 0.84, "grad_norm": 0.49661365151405334, "learning_rate": 0.0004916918380115145, "loss": 3.1599, "step": 17101 }, { "epoch": 0.84, "grad_norm": 0.524917721748352, "learning_rate": 0.0004916799945577727, "loss": 3.2517, "step": 17102 }, { "epoch": 0.84, "grad_norm": 0.5378203988075256, "learning_rate": 0.0004916681505991827, "loss": 3.2473, "step": 17103 }, { "epoch": 0.84, "grad_norm": 0.5326823592185974, "learning_rate": 0.0004916563061357756, "loss": 3.1216, "step": 17104 }, { "epoch": 0.84, "grad_norm": 0.5417553782463074, "learning_rate": 0.0004916444611675828, "loss": 3.2, "step": 17105 }, { "epoch": 0.84, "grad_norm": 0.5240073204040527, "learning_rate": 0.0004916326156946353, "loss": 3.0368, "step": 17106 }, { "epoch": 0.84, "grad_norm": 0.5561971664428711, "learning_rate": 0.0004916207697169643, "loss": 3.2357, "step": 17107 }, { "epoch": 0.84, "grad_norm": 0.5211239457130432, "learning_rate": 0.0004916089232346012, "loss": 3.2528, "step": 17108 }, { "epoch": 0.84, "grad_norm": 0.4768798053264618, "learning_rate": 0.000491597076247577, "loss": 3.3263, "step": 17109 }, { "epoch": 0.84, "grad_norm": 0.4900779128074646, "learning_rate": 0.0004915852287559231, "loss": 3.2368, "step": 17110 }, { "epoch": 0.84, "grad_norm": 0.5443329811096191, "learning_rate": 0.0004915733807596705, "loss": 2.9507, "step": 17111 }, { "epoch": 0.84, "grad_norm": 0.5072310566902161, "learning_rate": 0.0004915615322588503, "loss": 3.0643, "step": 17112 }, { "epoch": 0.84, "grad_norm": 0.5106002688407898, "learning_rate": 0.0004915496832534941, "loss": 3.0003, "step": 17113 }, { "epoch": 0.84, "grad_norm": 0.5489236116409302, "learning_rate": 0.0004915378337436328, "loss": 3.0268, "step": 17114 }, { "epoch": 0.84, "grad_norm": 0.48166120052337646, "learning_rate": 0.0004915259837292978, "loss": 3.3047, "step": 17115 }, { "epoch": 0.84, "grad_norm": 0.5533964037895203, "learning_rate": 0.0004915141332105203, "loss": 3.2744, "step": 17116 }, { "epoch": 0.84, "grad_norm": 0.5538474321365356, "learning_rate": 0.0004915022821873311, "loss": 3.2312, "step": 17117 }, { "epoch": 0.84, "grad_norm": 0.5147866010665894, "learning_rate": 0.0004914904306597621, "loss": 2.9823, "step": 17118 }, { "epoch": 0.84, "grad_norm": 0.5750763416290283, "learning_rate": 0.0004914785786278439, "loss": 3.129, "step": 17119 }, { "epoch": 0.84, "grad_norm": 0.5425081253051758, "learning_rate": 0.0004914667260916081, "loss": 3.2427, "step": 17120 }, { "epoch": 0.84, "grad_norm": 0.5620341897010803, "learning_rate": 0.0004914548730510859, "loss": 3.1747, "step": 17121 }, { "epoch": 0.84, "grad_norm": 0.5213549137115479, "learning_rate": 0.0004914430195063083, "loss": 3.005, "step": 17122 }, { "epoch": 0.84, "grad_norm": 0.5401297807693481, "learning_rate": 0.0004914311654573066, "loss": 3.2856, "step": 17123 }, { "epoch": 0.84, "grad_norm": 0.5165397524833679, "learning_rate": 0.000491419310904112, "loss": 3.1327, "step": 17124 }, { "epoch": 0.84, "grad_norm": 0.5052899718284607, "learning_rate": 0.0004914074558467559, "loss": 3.3247, "step": 17125 }, { "epoch": 0.84, "grad_norm": 0.5615530014038086, "learning_rate": 0.0004913956002852693, "loss": 3.2895, "step": 17126 }, { "epoch": 0.84, "grad_norm": 0.5121963024139404, "learning_rate": 0.0004913837442196836, "loss": 3.0851, "step": 17127 }, { "epoch": 0.84, "grad_norm": 0.5730823278427124, "learning_rate": 0.0004913718876500299, "loss": 3.1933, "step": 17128 }, { "epoch": 0.84, "grad_norm": 0.5356817245483398, "learning_rate": 0.0004913600305763396, "loss": 3.3342, "step": 17129 }, { "epoch": 0.84, "grad_norm": 0.5603740811347961, "learning_rate": 0.0004913481729986436, "loss": 3.223, "step": 17130 }, { "epoch": 0.84, "grad_norm": 0.6096175312995911, "learning_rate": 0.0004913363149169735, "loss": 3.1724, "step": 17131 }, { "epoch": 0.84, "grad_norm": 0.5367498397827148, "learning_rate": 0.0004913244563313602, "loss": 3.297, "step": 17132 }, { "epoch": 0.84, "grad_norm": 0.4697509706020355, "learning_rate": 0.0004913125972418352, "loss": 3.3864, "step": 17133 }, { "epoch": 0.84, "grad_norm": 0.5341188907623291, "learning_rate": 0.0004913007376484297, "loss": 3.1566, "step": 17134 }, { "epoch": 0.84, "grad_norm": 0.5422013401985168, "learning_rate": 0.0004912888775511748, "loss": 2.9892, "step": 17135 }, { "epoch": 0.84, "grad_norm": 0.5414844155311584, "learning_rate": 0.0004912770169501018, "loss": 3.0971, "step": 17136 }, { "epoch": 0.84, "grad_norm": 0.5191177129745483, "learning_rate": 0.000491265155845242, "loss": 3.2724, "step": 17137 }, { "epoch": 0.84, "grad_norm": 0.541873037815094, "learning_rate": 0.0004912532942366266, "loss": 3.1036, "step": 17138 }, { "epoch": 0.84, "grad_norm": 0.5198567509651184, "learning_rate": 0.0004912414321242869, "loss": 3.0269, "step": 17139 }, { "epoch": 0.84, "grad_norm": 0.5153297781944275, "learning_rate": 0.0004912295695082539, "loss": 2.9057, "step": 17140 }, { "epoch": 0.84, "grad_norm": 0.5370156764984131, "learning_rate": 0.0004912177063885591, "loss": 3.333, "step": 17141 }, { "epoch": 0.84, "grad_norm": 0.5156940817832947, "learning_rate": 0.0004912058427652337, "loss": 3.1685, "step": 17142 }, { "epoch": 0.84, "grad_norm": 0.558294415473938, "learning_rate": 0.0004911939786383089, "loss": 3.0098, "step": 17143 }, { "epoch": 0.84, "grad_norm": 0.5128892064094543, "learning_rate": 0.0004911821140078158, "loss": 3.2013, "step": 17144 }, { "epoch": 0.84, "grad_norm": 0.5227497816085815, "learning_rate": 0.0004911702488737859, "loss": 3.3653, "step": 17145 }, { "epoch": 0.84, "grad_norm": 0.5522597432136536, "learning_rate": 0.0004911583832362504, "loss": 3.0272, "step": 17146 }, { "epoch": 0.84, "grad_norm": 0.5611497759819031, "learning_rate": 0.0004911465170952405, "loss": 3.3738, "step": 17147 }, { "epoch": 0.84, "grad_norm": 0.5275722742080688, "learning_rate": 0.0004911346504507874, "loss": 3.2767, "step": 17148 }, { "epoch": 0.84, "grad_norm": 0.5308437347412109, "learning_rate": 0.0004911227833029225, "loss": 3.2512, "step": 17149 }, { "epoch": 0.84, "grad_norm": 0.5144796371459961, "learning_rate": 0.000491110915651677, "loss": 3.2331, "step": 17150 }, { "epoch": 0.84, "grad_norm": 0.5319437384605408, "learning_rate": 0.0004910990474970821, "loss": 3.0634, "step": 17151 }, { "epoch": 0.84, "grad_norm": 0.5378063917160034, "learning_rate": 0.000491087178839169, "loss": 3.1613, "step": 17152 }, { "epoch": 0.84, "grad_norm": 0.4878802001476288, "learning_rate": 0.000491075309677969, "loss": 3.1845, "step": 17153 }, { "epoch": 0.84, "grad_norm": 0.5802315473556519, "learning_rate": 0.0004910634400135136, "loss": 3.2188, "step": 17154 }, { "epoch": 0.84, "grad_norm": 0.5172027945518494, "learning_rate": 0.0004910515698458338, "loss": 3.0371, "step": 17155 }, { "epoch": 0.84, "grad_norm": 0.5362008213996887, "learning_rate": 0.0004910396991749608, "loss": 3.1302, "step": 17156 }, { "epoch": 0.84, "grad_norm": 0.50388503074646, "learning_rate": 0.0004910278280009263, "loss": 3.1382, "step": 17157 }, { "epoch": 0.84, "grad_norm": 0.5768753886222839, "learning_rate": 0.0004910159563237609, "loss": 3.0569, "step": 17158 }, { "epoch": 0.84, "grad_norm": 0.5347177386283875, "learning_rate": 0.0004910040841434964, "loss": 3.1319, "step": 17159 }, { "epoch": 0.84, "grad_norm": 0.531885027885437, "learning_rate": 0.0004909922114601639, "loss": 3.4059, "step": 17160 }, { "epoch": 0.84, "grad_norm": 0.5703318119049072, "learning_rate": 0.0004909803382737947, "loss": 3.1653, "step": 17161 }, { "epoch": 0.84, "grad_norm": 0.5395308136940002, "learning_rate": 0.0004909684645844201, "loss": 3.2048, "step": 17162 }, { "epoch": 0.84, "grad_norm": 0.5464442372322083, "learning_rate": 0.0004909565903920711, "loss": 2.963, "step": 17163 }, { "epoch": 0.84, "grad_norm": 0.5010871887207031, "learning_rate": 0.0004909447156967794, "loss": 3.3017, "step": 17164 }, { "epoch": 0.84, "grad_norm": 0.5098520517349243, "learning_rate": 0.000490932840498576, "loss": 3.076, "step": 17165 }, { "epoch": 0.84, "grad_norm": 0.6024291515350342, "learning_rate": 0.0004909209647974923, "loss": 3.2139, "step": 17166 }, { "epoch": 0.84, "grad_norm": 0.521047830581665, "learning_rate": 0.0004909090885935594, "loss": 3.1004, "step": 17167 }, { "epoch": 0.84, "grad_norm": 0.534773051738739, "learning_rate": 0.0004908972118868088, "loss": 3.2295, "step": 17168 }, { "epoch": 0.84, "grad_norm": 0.5155094265937805, "learning_rate": 0.0004908853346772716, "loss": 3.1153, "step": 17169 }, { "epoch": 0.84, "grad_norm": 0.513957142829895, "learning_rate": 0.0004908734569649793, "loss": 3.0931, "step": 17170 }, { "epoch": 0.84, "grad_norm": 0.5340112447738647, "learning_rate": 0.000490861578749963, "loss": 3.0543, "step": 17171 }, { "epoch": 0.84, "grad_norm": 0.5084630250930786, "learning_rate": 0.000490849700032254, "loss": 3.323, "step": 17172 }, { "epoch": 0.84, "grad_norm": 0.5156233310699463, "learning_rate": 0.0004908378208118836, "loss": 3.2439, "step": 17173 }, { "epoch": 0.84, "grad_norm": 0.5381580591201782, "learning_rate": 0.0004908259410888831, "loss": 3.0843, "step": 17174 }, { "epoch": 0.84, "grad_norm": 0.5627414584159851, "learning_rate": 0.0004908140608632838, "loss": 2.9431, "step": 17175 }, { "epoch": 0.84, "grad_norm": 0.5171828866004944, "learning_rate": 0.000490802180135117, "loss": 3.2593, "step": 17176 }, { "epoch": 0.84, "grad_norm": 0.5454464554786682, "learning_rate": 0.000490790298904414, "loss": 3.1044, "step": 17177 }, { "epoch": 0.84, "grad_norm": 0.4904737174510956, "learning_rate": 0.0004907784171712061, "loss": 3.242, "step": 17178 }, { "epoch": 0.84, "grad_norm": 0.5514205694198608, "learning_rate": 0.0004907665349355245, "loss": 3.2227, "step": 17179 }, { "epoch": 0.84, "grad_norm": 0.49019309878349304, "learning_rate": 0.0004907546521974006, "loss": 3.1571, "step": 17180 }, { "epoch": 0.84, "grad_norm": 0.552192747592926, "learning_rate": 0.0004907427689568656, "loss": 3.09, "step": 17181 }, { "epoch": 0.84, "grad_norm": 0.5478540658950806, "learning_rate": 0.0004907308852139508, "loss": 3.1038, "step": 17182 }, { "epoch": 0.84, "grad_norm": 0.5257436633110046, "learning_rate": 0.0004907190009686878, "loss": 3.4212, "step": 17183 }, { "epoch": 0.84, "grad_norm": 0.5158893465995789, "learning_rate": 0.0004907071162211074, "loss": 3.288, "step": 17184 }, { "epoch": 0.84, "grad_norm": 0.4981338083744049, "learning_rate": 0.0004906952309712413, "loss": 3.2846, "step": 17185 }, { "epoch": 0.84, "grad_norm": 0.5687278509140015, "learning_rate": 0.0004906833452191207, "loss": 3.3939, "step": 17186 }, { "epoch": 0.84, "grad_norm": 0.5252379179000854, "learning_rate": 0.0004906714589647767, "loss": 3.3868, "step": 17187 }, { "epoch": 0.84, "grad_norm": 0.5520195364952087, "learning_rate": 0.0004906595722082409, "loss": 3.2195, "step": 17188 }, { "epoch": 0.84, "grad_norm": 0.5275287628173828, "learning_rate": 0.0004906476849495444, "loss": 3.3285, "step": 17189 }, { "epoch": 0.84, "grad_norm": 0.5144610404968262, "learning_rate": 0.0004906357971887187, "loss": 3.1138, "step": 17190 }, { "epoch": 0.84, "grad_norm": 0.5572524070739746, "learning_rate": 0.0004906239089257949, "loss": 3.0022, "step": 17191 }, { "epoch": 0.84, "grad_norm": 0.5245261192321777, "learning_rate": 0.0004906120201608044, "loss": 3.1331, "step": 17192 }, { "epoch": 0.84, "grad_norm": 0.506518542766571, "learning_rate": 0.0004906001308937787, "loss": 3.3921, "step": 17193 }, { "epoch": 0.84, "grad_norm": 0.4986775517463684, "learning_rate": 0.0004905882411247487, "loss": 3.1723, "step": 17194 }, { "epoch": 0.84, "grad_norm": 0.5022253394126892, "learning_rate": 0.0004905763508537461, "loss": 3.1883, "step": 17195 }, { "epoch": 0.84, "grad_norm": 0.5980327129364014, "learning_rate": 0.000490564460080802, "loss": 3.1204, "step": 17196 }, { "epoch": 0.84, "grad_norm": 0.5253124833106995, "learning_rate": 0.0004905525688059479, "loss": 3.2769, "step": 17197 }, { "epoch": 0.84, "grad_norm": 0.5250275135040283, "learning_rate": 0.0004905406770292148, "loss": 3.0089, "step": 17198 }, { "epoch": 0.84, "grad_norm": 0.5560047626495361, "learning_rate": 0.0004905287847506343, "loss": 3.1203, "step": 17199 }, { "epoch": 0.84, "grad_norm": 0.5059837102890015, "learning_rate": 0.0004905168919702378, "loss": 3.0903, "step": 17200 }, { "epoch": 0.84, "grad_norm": 0.530164361000061, "learning_rate": 0.0004905049986880563, "loss": 3.2861, "step": 17201 }, { "epoch": 0.84, "grad_norm": 0.5831915736198425, "learning_rate": 0.0004904931049041214, "loss": 3.202, "step": 17202 }, { "epoch": 0.84, "grad_norm": 0.546757698059082, "learning_rate": 0.0004904812106184643, "loss": 3.1316, "step": 17203 }, { "epoch": 0.84, "grad_norm": 0.49275583028793335, "learning_rate": 0.0004904693158311162, "loss": 3.0704, "step": 17204 }, { "epoch": 0.84, "grad_norm": 0.5256943106651306, "learning_rate": 0.0004904574205421089, "loss": 3.0593, "step": 17205 }, { "epoch": 0.84, "grad_norm": 0.542165219783783, "learning_rate": 0.0004904455247514731, "loss": 3.391, "step": 17206 }, { "epoch": 0.84, "grad_norm": 0.5653906464576721, "learning_rate": 0.0004904336284592407, "loss": 3.2863, "step": 17207 }, { "epoch": 0.84, "grad_norm": 0.5180942416191101, "learning_rate": 0.0004904217316654425, "loss": 3.1246, "step": 17208 }, { "epoch": 0.84, "grad_norm": 0.49898773431777954, "learning_rate": 0.0004904098343701102, "loss": 3.1719, "step": 17209 }, { "epoch": 0.84, "grad_norm": 0.531066358089447, "learning_rate": 0.0004903979365732753, "loss": 3.3398, "step": 17210 }, { "epoch": 0.84, "grad_norm": 0.4966711103916168, "learning_rate": 0.0004903860382749686, "loss": 3.0826, "step": 17211 }, { "epoch": 0.84, "grad_norm": 0.5284520983695984, "learning_rate": 0.0004903741394752218, "loss": 3.2555, "step": 17212 }, { "epoch": 0.84, "grad_norm": 0.5519339442253113, "learning_rate": 0.000490362240174066, "loss": 3.0804, "step": 17213 }, { "epoch": 0.84, "grad_norm": 0.5141843557357788, "learning_rate": 0.0004903503403715329, "loss": 3.2965, "step": 17214 }, { "epoch": 0.84, "grad_norm": 0.5470436215400696, "learning_rate": 0.0004903384400676535, "loss": 3.0265, "step": 17215 }, { "epoch": 0.84, "grad_norm": 0.5789403319358826, "learning_rate": 0.0004903265392624594, "loss": 3.0417, "step": 17216 }, { "epoch": 0.84, "grad_norm": 0.515338659286499, "learning_rate": 0.0004903146379559818, "loss": 3.0671, "step": 17217 }, { "epoch": 0.84, "grad_norm": 0.5383449196815491, "learning_rate": 0.000490302736148252, "loss": 3.0563, "step": 17218 }, { "epoch": 0.84, "grad_norm": 0.5087341666221619, "learning_rate": 0.0004902908338393014, "loss": 3.3356, "step": 17219 }, { "epoch": 0.84, "grad_norm": 0.5621129870414734, "learning_rate": 0.0004902789310291615, "loss": 3.1718, "step": 17220 }, { "epoch": 0.84, "grad_norm": 0.49984487891197205, "learning_rate": 0.0004902670277178634, "loss": 3.2502, "step": 17221 }, { "epoch": 0.84, "grad_norm": 0.6184155941009521, "learning_rate": 0.0004902551239054386, "loss": 3.1425, "step": 17222 }, { "epoch": 0.84, "grad_norm": 0.5024511218070984, "learning_rate": 0.0004902432195919184, "loss": 3.2221, "step": 17223 }, { "epoch": 0.84, "grad_norm": 0.5568563938140869, "learning_rate": 0.0004902313147773342, "loss": 2.9693, "step": 17224 }, { "epoch": 0.84, "grad_norm": 0.5303897857666016, "learning_rate": 0.0004902194094617174, "loss": 3.2374, "step": 17225 }, { "epoch": 0.84, "grad_norm": 0.5185246467590332, "learning_rate": 0.0004902075036450992, "loss": 3.2034, "step": 17226 }, { "epoch": 0.84, "grad_norm": 0.5359919667243958, "learning_rate": 0.000490195597327511, "loss": 2.8955, "step": 17227 }, { "epoch": 0.84, "grad_norm": 0.530653178691864, "learning_rate": 0.0004901836905089842, "loss": 3.2935, "step": 17228 }, { "epoch": 0.84, "grad_norm": 0.5164836645126343, "learning_rate": 0.0004901717831895503, "loss": 3.229, "step": 17229 }, { "epoch": 0.84, "grad_norm": 0.5706772804260254, "learning_rate": 0.0004901598753692403, "loss": 3.1432, "step": 17230 }, { "epoch": 0.84, "grad_norm": 0.5535086393356323, "learning_rate": 0.0004901479670480859, "loss": 3.0893, "step": 17231 }, { "epoch": 0.84, "grad_norm": 0.5143651366233826, "learning_rate": 0.0004901360582261185, "loss": 3.1783, "step": 17232 }, { "epoch": 0.84, "grad_norm": 0.5126096606254578, "learning_rate": 0.0004901241489033692, "loss": 3.2156, "step": 17233 }, { "epoch": 0.84, "grad_norm": 0.5179157853126526, "learning_rate": 0.0004901122390798694, "loss": 3.2843, "step": 17234 }, { "epoch": 0.84, "grad_norm": 0.5556790232658386, "learning_rate": 0.0004901003287556507, "loss": 3.0277, "step": 17235 }, { "epoch": 0.84, "grad_norm": 0.5082927942276001, "learning_rate": 0.0004900884179307441, "loss": 3.1995, "step": 17236 }, { "epoch": 0.84, "grad_norm": 0.5159091353416443, "learning_rate": 0.0004900765066051814, "loss": 3.0237, "step": 17237 }, { "epoch": 0.84, "grad_norm": 0.49801966547966003, "learning_rate": 0.0004900645947789938, "loss": 3.155, "step": 17238 }, { "epoch": 0.84, "grad_norm": 0.5168977379798889, "learning_rate": 0.0004900526824522125, "loss": 3.1495, "step": 17239 }, { "epoch": 0.84, "grad_norm": 0.5264788269996643, "learning_rate": 0.000490040769624869, "loss": 2.9527, "step": 17240 }, { "epoch": 0.84, "grad_norm": 0.6261751055717468, "learning_rate": 0.0004900288562969947, "loss": 3.2426, "step": 17241 }, { "epoch": 0.84, "grad_norm": 0.5559073090553284, "learning_rate": 0.000490016942468621, "loss": 3.0032, "step": 17242 }, { "epoch": 0.85, "grad_norm": 0.5475751161575317, "learning_rate": 0.0004900050281397792, "loss": 3.396, "step": 17243 }, { "epoch": 0.85, "grad_norm": 0.5120872259140015, "learning_rate": 0.0004899931133105007, "loss": 3.1183, "step": 17244 }, { "epoch": 0.85, "grad_norm": 0.5100634694099426, "learning_rate": 0.000489981197980817, "loss": 3.1615, "step": 17245 }, { "epoch": 0.85, "grad_norm": 0.5490902066230774, "learning_rate": 0.0004899692821507593, "loss": 3.0538, "step": 17246 }, { "epoch": 0.85, "grad_norm": 0.48027950525283813, "learning_rate": 0.0004899573658203591, "loss": 3.1387, "step": 17247 }, { "epoch": 0.85, "grad_norm": 0.5834192633628845, "learning_rate": 0.0004899454489896479, "loss": 3.1026, "step": 17248 }, { "epoch": 0.85, "grad_norm": 0.5443624258041382, "learning_rate": 0.0004899335316586568, "loss": 3.1497, "step": 17249 }, { "epoch": 0.85, "grad_norm": 0.5452780723571777, "learning_rate": 0.0004899216138274172, "loss": 3.2115, "step": 17250 }, { "epoch": 0.85, "grad_norm": 0.5227807760238647, "learning_rate": 0.0004899096954959608, "loss": 3.2457, "step": 17251 }, { "epoch": 0.85, "grad_norm": 0.5504705905914307, "learning_rate": 0.0004898977766643188, "loss": 2.9626, "step": 17252 }, { "epoch": 0.85, "grad_norm": 0.5524275898933411, "learning_rate": 0.0004898858573325226, "loss": 3.2484, "step": 17253 }, { "epoch": 0.85, "grad_norm": 0.5162675380706787, "learning_rate": 0.0004898739375006036, "loss": 3.2485, "step": 17254 }, { "epoch": 0.85, "grad_norm": 0.5104801058769226, "learning_rate": 0.0004898620171685932, "loss": 3.2277, "step": 17255 }, { "epoch": 0.85, "grad_norm": 0.4923844635486603, "learning_rate": 0.0004898500963365226, "loss": 3.1075, "step": 17256 }, { "epoch": 0.85, "grad_norm": 0.5191243886947632, "learning_rate": 0.0004898381750044236, "loss": 3.007, "step": 17257 }, { "epoch": 0.85, "grad_norm": 0.531520426273346, "learning_rate": 0.0004898262531723273, "loss": 3.1071, "step": 17258 }, { "epoch": 0.85, "grad_norm": 0.5321716070175171, "learning_rate": 0.0004898143308402652, "loss": 3.2239, "step": 17259 }, { "epoch": 0.85, "grad_norm": 0.533822238445282, "learning_rate": 0.0004898024080082688, "loss": 3.0522, "step": 17260 }, { "epoch": 0.85, "grad_norm": 0.5310263633728027, "learning_rate": 0.0004897904846763692, "loss": 3.0263, "step": 17261 }, { "epoch": 0.85, "grad_norm": 0.5534727573394775, "learning_rate": 0.000489778560844598, "loss": 3.2554, "step": 17262 }, { "epoch": 0.85, "grad_norm": 0.5336495041847229, "learning_rate": 0.0004897666365129867, "loss": 3.1274, "step": 17263 }, { "epoch": 0.85, "grad_norm": 0.5278199911117554, "learning_rate": 0.0004897547116815666, "loss": 3.2348, "step": 17264 }, { "epoch": 0.85, "grad_norm": 0.5093748569488525, "learning_rate": 0.0004897427863503691, "loss": 3.1455, "step": 17265 }, { "epoch": 0.85, "grad_norm": 0.565778374671936, "learning_rate": 0.0004897308605194255, "loss": 3.0988, "step": 17266 }, { "epoch": 0.85, "grad_norm": 0.5380170941352844, "learning_rate": 0.0004897189341887673, "loss": 3.0954, "step": 17267 }, { "epoch": 0.85, "grad_norm": 0.4972584843635559, "learning_rate": 0.0004897070073584262, "loss": 3.0722, "step": 17268 }, { "epoch": 0.85, "grad_norm": 0.533125638961792, "learning_rate": 0.0004896950800284332, "loss": 3.1618, "step": 17269 }, { "epoch": 0.85, "grad_norm": 0.49823445081710815, "learning_rate": 0.0004896831521988198, "loss": 3.1106, "step": 17270 }, { "epoch": 0.85, "grad_norm": 0.5134430527687073, "learning_rate": 0.0004896712238696176, "loss": 2.9883, "step": 17271 }, { "epoch": 0.85, "grad_norm": 0.5448001027107239, "learning_rate": 0.0004896592950408579, "loss": 3.3028, "step": 17272 }, { "epoch": 0.85, "grad_norm": 0.5064616799354553, "learning_rate": 0.0004896473657125719, "loss": 3.4008, "step": 17273 }, { "epoch": 0.85, "grad_norm": 0.5209435820579529, "learning_rate": 0.0004896354358847915, "loss": 3.3331, "step": 17274 }, { "epoch": 0.85, "grad_norm": 0.5029204487800598, "learning_rate": 0.0004896235055575477, "loss": 3.2065, "step": 17275 }, { "epoch": 0.85, "grad_norm": 0.5218020677566528, "learning_rate": 0.0004896115747308722, "loss": 3.2772, "step": 17276 }, { "epoch": 0.85, "grad_norm": 0.5261409878730774, "learning_rate": 0.0004895996434047962, "loss": 3.357, "step": 17277 }, { "epoch": 0.85, "grad_norm": 0.5557669997215271, "learning_rate": 0.0004895877115793513, "loss": 3.2209, "step": 17278 }, { "epoch": 0.85, "grad_norm": 0.5135267972946167, "learning_rate": 0.0004895757792545689, "loss": 3.0166, "step": 17279 }, { "epoch": 0.85, "grad_norm": 0.4932655096054077, "learning_rate": 0.0004895638464304802, "loss": 3.1704, "step": 17280 }, { "epoch": 0.85, "grad_norm": 0.5083035230636597, "learning_rate": 0.0004895519131071169, "loss": 3.0698, "step": 17281 }, { "epoch": 0.85, "grad_norm": 0.4993206560611725, "learning_rate": 0.0004895399792845105, "loss": 3.1426, "step": 17282 }, { "epoch": 0.85, "grad_norm": 0.5377535820007324, "learning_rate": 0.000489528044962692, "loss": 3.2107, "step": 17283 }, { "epoch": 0.85, "grad_norm": 0.5332820415496826, "learning_rate": 0.0004895161101416932, "loss": 3.1717, "step": 17284 }, { "epoch": 0.85, "grad_norm": 0.6028470993041992, "learning_rate": 0.0004895041748215456, "loss": 3.222, "step": 17285 }, { "epoch": 0.85, "grad_norm": 0.5052365064620972, "learning_rate": 0.0004894922390022803, "loss": 3.1013, "step": 17286 }, { "epoch": 0.85, "grad_norm": 0.5114204287528992, "learning_rate": 0.000489480302683929, "loss": 3.2889, "step": 17287 }, { "epoch": 0.85, "grad_norm": 0.5291855931282043, "learning_rate": 0.000489468365866523, "loss": 3.0852, "step": 17288 }, { "epoch": 0.85, "grad_norm": 0.5404118895530701, "learning_rate": 0.0004894564285500938, "loss": 3.1578, "step": 17289 }, { "epoch": 0.85, "grad_norm": 0.5513167977333069, "learning_rate": 0.0004894444907346729, "loss": 3.177, "step": 17290 }, { "epoch": 0.85, "grad_norm": 0.546855628490448, "learning_rate": 0.0004894325524202915, "loss": 3.1454, "step": 17291 }, { "epoch": 0.85, "grad_norm": 0.5344764590263367, "learning_rate": 0.0004894206136069813, "loss": 3.0274, "step": 17292 }, { "epoch": 0.85, "grad_norm": 0.531498908996582, "learning_rate": 0.0004894086742947737, "loss": 2.8852, "step": 17293 }, { "epoch": 0.85, "grad_norm": 0.4873696565628052, "learning_rate": 0.0004893967344837, "loss": 3.1002, "step": 17294 }, { "epoch": 0.85, "grad_norm": 0.5418617129325867, "learning_rate": 0.0004893847941737919, "loss": 3.2382, "step": 17295 }, { "epoch": 0.85, "grad_norm": 0.5241795182228088, "learning_rate": 0.0004893728533650806, "loss": 3.2205, "step": 17296 }, { "epoch": 0.85, "grad_norm": 0.5475279092788696, "learning_rate": 0.0004893609120575976, "loss": 3.2555, "step": 17297 }, { "epoch": 0.85, "grad_norm": 0.65479975938797, "learning_rate": 0.0004893489702513745, "loss": 3.0701, "step": 17298 }, { "epoch": 0.85, "grad_norm": 0.5669695138931274, "learning_rate": 0.0004893370279464427, "loss": 3.1972, "step": 17299 }, { "epoch": 0.85, "grad_norm": 0.517507791519165, "learning_rate": 0.0004893250851428335, "loss": 3.1766, "step": 17300 }, { "epoch": 0.85, "grad_norm": 0.5156394839286804, "learning_rate": 0.0004893131418405786, "loss": 3.2759, "step": 17301 }, { "epoch": 0.85, "grad_norm": 0.48930031061172485, "learning_rate": 0.0004893011980397091, "loss": 3.1122, "step": 17302 }, { "epoch": 0.85, "grad_norm": 0.518436074256897, "learning_rate": 0.0004892892537402568, "loss": 3.1862, "step": 17303 }, { "epoch": 0.85, "grad_norm": 0.5759012699127197, "learning_rate": 0.0004892773089422531, "loss": 3.1416, "step": 17304 }, { "epoch": 0.85, "grad_norm": 0.48443731665611267, "learning_rate": 0.0004892653636457293, "loss": 2.9702, "step": 17305 }, { "epoch": 0.85, "grad_norm": 0.5327646732330322, "learning_rate": 0.000489253417850717, "loss": 3.1856, "step": 17306 }, { "epoch": 0.85, "grad_norm": 0.5462185144424438, "learning_rate": 0.0004892414715572475, "loss": 3.0689, "step": 17307 }, { "epoch": 0.85, "grad_norm": 0.535132110118866, "learning_rate": 0.0004892295247653526, "loss": 3.1475, "step": 17308 }, { "epoch": 0.85, "grad_norm": 0.5083247423171997, "learning_rate": 0.0004892175774750633, "loss": 3.0753, "step": 17309 }, { "epoch": 0.85, "grad_norm": 0.5415558815002441, "learning_rate": 0.0004892056296864116, "loss": 3.2183, "step": 17310 }, { "epoch": 0.85, "grad_norm": 0.5436738729476929, "learning_rate": 0.0004891936813994285, "loss": 3.2266, "step": 17311 }, { "epoch": 0.85, "grad_norm": 0.5327355265617371, "learning_rate": 0.0004891817326141457, "loss": 3.2321, "step": 17312 }, { "epoch": 0.85, "grad_norm": 0.5031436681747437, "learning_rate": 0.0004891697833305946, "loss": 3.0962, "step": 17313 }, { "epoch": 0.85, "grad_norm": 0.5180808305740356, "learning_rate": 0.0004891578335488066, "loss": 3.0242, "step": 17314 }, { "epoch": 0.85, "grad_norm": 0.5372978448867798, "learning_rate": 0.0004891458832688135, "loss": 2.9658, "step": 17315 }, { "epoch": 0.85, "grad_norm": 0.5055201053619385, "learning_rate": 0.0004891339324906464, "loss": 3.0476, "step": 17316 }, { "epoch": 0.85, "grad_norm": 0.5402676463127136, "learning_rate": 0.0004891219812143369, "loss": 3.0622, "step": 17317 }, { "epoch": 0.85, "grad_norm": 0.4617784023284912, "learning_rate": 0.0004891100294399166, "loss": 3.1742, "step": 17318 }, { "epoch": 0.85, "grad_norm": 0.5083367228507996, "learning_rate": 0.0004890980771674169, "loss": 3.3683, "step": 17319 }, { "epoch": 0.85, "grad_norm": 0.4868304431438446, "learning_rate": 0.0004890861243968691, "loss": 3.3446, "step": 17320 }, { "epoch": 0.85, "grad_norm": 0.5708618760108948, "learning_rate": 0.000489074171128305, "loss": 3.1252, "step": 17321 }, { "epoch": 0.85, "grad_norm": 0.5267812013626099, "learning_rate": 0.0004890622173617558, "loss": 3.1191, "step": 17322 }, { "epoch": 0.85, "grad_norm": 0.5371107459068298, "learning_rate": 0.0004890502630972532, "loss": 3.1529, "step": 17323 }, { "epoch": 0.85, "grad_norm": 0.5120932459831238, "learning_rate": 0.0004890383083348285, "loss": 3.0703, "step": 17324 }, { "epoch": 0.85, "grad_norm": 0.5467737317085266, "learning_rate": 0.0004890263530745134, "loss": 3.2532, "step": 17325 }, { "epoch": 0.85, "grad_norm": 0.4981206953525543, "learning_rate": 0.0004890143973163391, "loss": 3.1652, "step": 17326 }, { "epoch": 0.85, "grad_norm": 0.5158091187477112, "learning_rate": 0.0004890024410603372, "loss": 3.0072, "step": 17327 }, { "epoch": 0.85, "grad_norm": 0.5790042281150818, "learning_rate": 0.0004889904843065394, "loss": 3.2988, "step": 17328 }, { "epoch": 0.85, "grad_norm": 0.5327931642532349, "learning_rate": 0.0004889785270549771, "loss": 3.0204, "step": 17329 }, { "epoch": 0.85, "grad_norm": 0.5174896717071533, "learning_rate": 0.0004889665693056817, "loss": 3.1526, "step": 17330 }, { "epoch": 0.85, "grad_norm": 0.5148389339447021, "learning_rate": 0.0004889546110586847, "loss": 3.1528, "step": 17331 }, { "epoch": 0.85, "grad_norm": 0.4947047829627991, "learning_rate": 0.0004889426523140175, "loss": 2.9137, "step": 17332 }, { "epoch": 0.85, "grad_norm": 0.5458266735076904, "learning_rate": 0.0004889306930717118, "loss": 3.3774, "step": 17333 }, { "epoch": 0.85, "grad_norm": 0.4884343147277832, "learning_rate": 0.0004889187333317991, "loss": 3.116, "step": 17334 }, { "epoch": 0.85, "grad_norm": 0.5519797801971436, "learning_rate": 0.0004889067730943107, "loss": 3.1202, "step": 17335 }, { "epoch": 0.85, "grad_norm": 0.5409486889839172, "learning_rate": 0.0004888948123592783, "loss": 3.1933, "step": 17336 }, { "epoch": 0.85, "grad_norm": 0.5440702438354492, "learning_rate": 0.0004888828511267332, "loss": 3.2795, "step": 17337 }, { "epoch": 0.85, "grad_norm": 0.5199924111366272, "learning_rate": 0.0004888708893967071, "loss": 2.9574, "step": 17338 }, { "epoch": 0.85, "grad_norm": 0.5392219424247742, "learning_rate": 0.0004888589271692314, "loss": 3.3653, "step": 17339 }, { "epoch": 0.85, "grad_norm": 0.49010539054870605, "learning_rate": 0.0004888469644443377, "loss": 3.1307, "step": 17340 }, { "epoch": 0.85, "grad_norm": 0.5155744552612305, "learning_rate": 0.0004888350012220573, "loss": 3.142, "step": 17341 }, { "epoch": 0.85, "grad_norm": 0.5155385732650757, "learning_rate": 0.000488823037502422, "loss": 3.3988, "step": 17342 }, { "epoch": 0.85, "grad_norm": 0.5531888008117676, "learning_rate": 0.000488811073285463, "loss": 2.9271, "step": 17343 }, { "epoch": 0.85, "grad_norm": 0.5326036214828491, "learning_rate": 0.0004887991085712121, "loss": 3.1794, "step": 17344 }, { "epoch": 0.85, "grad_norm": 0.5249530673027039, "learning_rate": 0.0004887871433597006, "loss": 3.2291, "step": 17345 }, { "epoch": 0.85, "grad_norm": 0.5138980746269226, "learning_rate": 0.0004887751776509602, "loss": 3.0757, "step": 17346 }, { "epoch": 0.85, "grad_norm": 0.5147232413291931, "learning_rate": 0.0004887632114450222, "loss": 3.0998, "step": 17347 }, { "epoch": 0.85, "grad_norm": 0.5001515746116638, "learning_rate": 0.0004887512447419184, "loss": 3.2291, "step": 17348 }, { "epoch": 0.85, "grad_norm": 0.5295124650001526, "learning_rate": 0.00048873927754168, "loss": 3.1302, "step": 17349 }, { "epoch": 0.85, "grad_norm": 0.5005471706390381, "learning_rate": 0.0004887273098443388, "loss": 3.0511, "step": 17350 }, { "epoch": 0.85, "grad_norm": 0.5099095702171326, "learning_rate": 0.000488715341649926, "loss": 3.1537, "step": 17351 }, { "epoch": 0.85, "grad_norm": 0.5452235341072083, "learning_rate": 0.0004887033729584734, "loss": 3.231, "step": 17352 }, { "epoch": 0.85, "grad_norm": 0.5622381567955017, "learning_rate": 0.0004886914037700124, "loss": 3.0763, "step": 17353 }, { "epoch": 0.85, "grad_norm": 0.5072722434997559, "learning_rate": 0.0004886794340845746, "loss": 3.0325, "step": 17354 }, { "epoch": 0.85, "grad_norm": 0.5430617332458496, "learning_rate": 0.0004886674639021914, "loss": 3.1989, "step": 17355 }, { "epoch": 0.85, "grad_norm": 0.5264281034469604, "learning_rate": 0.0004886554932228945, "loss": 3.2848, "step": 17356 }, { "epoch": 0.85, "grad_norm": 0.5271099209785461, "learning_rate": 0.0004886435220467154, "loss": 3.2634, "step": 17357 }, { "epoch": 0.85, "grad_norm": 0.578886866569519, "learning_rate": 0.0004886315503736854, "loss": 3.2401, "step": 17358 }, { "epoch": 0.85, "grad_norm": 0.48551952838897705, "learning_rate": 0.0004886195782038364, "loss": 3.2252, "step": 17359 }, { "epoch": 0.85, "grad_norm": 0.49196016788482666, "learning_rate": 0.0004886076055371995, "loss": 3.2997, "step": 17360 }, { "epoch": 0.85, "grad_norm": 0.5155906677246094, "learning_rate": 0.0004885956323738066, "loss": 3.0486, "step": 17361 }, { "epoch": 0.85, "grad_norm": 0.5028941631317139, "learning_rate": 0.0004885836587136892, "loss": 3.1732, "step": 17362 }, { "epoch": 0.85, "grad_norm": 0.49608007073402405, "learning_rate": 0.0004885716845568786, "loss": 3.0215, "step": 17363 }, { "epoch": 0.85, "grad_norm": 0.5235550999641418, "learning_rate": 0.0004885597099034064, "loss": 3.1015, "step": 17364 }, { "epoch": 0.85, "grad_norm": 0.5136752128601074, "learning_rate": 0.0004885477347533044, "loss": 3.1078, "step": 17365 }, { "epoch": 0.85, "grad_norm": 0.49417296051979065, "learning_rate": 0.0004885357591066038, "loss": 3.1097, "step": 17366 }, { "epoch": 0.85, "grad_norm": 0.4974791705608368, "learning_rate": 0.0004885237829633363, "loss": 3.176, "step": 17367 }, { "epoch": 0.85, "grad_norm": 0.4924065172672272, "learning_rate": 0.0004885118063235335, "loss": 3.2376, "step": 17368 }, { "epoch": 0.85, "grad_norm": 0.5056686997413635, "learning_rate": 0.0004884998291872269, "loss": 3.3538, "step": 17369 }, { "epoch": 0.85, "grad_norm": 0.5538915395736694, "learning_rate": 0.0004884878515544481, "loss": 3.1879, "step": 17370 }, { "epoch": 0.85, "grad_norm": 0.5767045617103577, "learning_rate": 0.0004884758734252285, "loss": 3.1592, "step": 17371 }, { "epoch": 0.85, "grad_norm": 0.5208998918533325, "learning_rate": 0.0004884638947995996, "loss": 3.1981, "step": 17372 }, { "epoch": 0.85, "grad_norm": 0.5236579775810242, "learning_rate": 0.0004884519156775932, "loss": 3.0012, "step": 17373 }, { "epoch": 0.85, "grad_norm": 0.5991045832633972, "learning_rate": 0.0004884399360592407, "loss": 3.1331, "step": 17374 }, { "epoch": 0.85, "grad_norm": 0.5242659449577332, "learning_rate": 0.0004884279559445737, "loss": 3.3605, "step": 17375 }, { "epoch": 0.85, "grad_norm": 0.5292356610298157, "learning_rate": 0.0004884159753336237, "loss": 3.2468, "step": 17376 }, { "epoch": 0.85, "grad_norm": 0.5420272946357727, "learning_rate": 0.0004884039942264222, "loss": 2.9437, "step": 17377 }, { "epoch": 0.85, "grad_norm": 0.4963341951370239, "learning_rate": 0.000488392012623001, "loss": 3.2331, "step": 17378 }, { "epoch": 0.85, "grad_norm": 0.505173921585083, "learning_rate": 0.0004883800305233914, "loss": 3.271, "step": 17379 }, { "epoch": 0.85, "grad_norm": 0.530937671661377, "learning_rate": 0.000488368047927625, "loss": 3.2967, "step": 17380 }, { "epoch": 0.85, "grad_norm": 0.5380979776382446, "learning_rate": 0.0004883560648357335, "loss": 3.0993, "step": 17381 }, { "epoch": 0.85, "grad_norm": 0.5381925702095032, "learning_rate": 0.0004883440812477484, "loss": 3.2143, "step": 17382 }, { "epoch": 0.85, "grad_norm": 0.517504096031189, "learning_rate": 0.0004883320971637012, "loss": 3.2316, "step": 17383 }, { "epoch": 0.85, "grad_norm": 0.5057746767997742, "learning_rate": 0.0004883201125836234, "loss": 3.1969, "step": 17384 }, { "epoch": 0.85, "grad_norm": 0.5114002227783203, "learning_rate": 0.0004883081275075467, "loss": 3.135, "step": 17385 }, { "epoch": 0.85, "grad_norm": 0.5425366163253784, "learning_rate": 0.0004882961419355028, "loss": 3.0397, "step": 17386 }, { "epoch": 0.85, "grad_norm": 0.5366644859313965, "learning_rate": 0.000488284155867523, "loss": 3.3255, "step": 17387 }, { "epoch": 0.85, "grad_norm": 0.5261602401733398, "learning_rate": 0.0004882721693036389, "loss": 3.1629, "step": 17388 }, { "epoch": 0.85, "grad_norm": 0.5072944760322571, "learning_rate": 0.0004882601822438823, "loss": 3.1021, "step": 17389 }, { "epoch": 0.85, "grad_norm": 0.6098434329032898, "learning_rate": 0.0004882481946882845, "loss": 2.9911, "step": 17390 }, { "epoch": 0.85, "grad_norm": 0.535154402256012, "learning_rate": 0.0004882362066368771, "loss": 3.2225, "step": 17391 }, { "epoch": 0.85, "grad_norm": 0.508673369884491, "learning_rate": 0.00048822421808969197, "loss": 3.2243, "step": 17392 }, { "epoch": 0.85, "grad_norm": 0.5214126706123352, "learning_rate": 0.0004882122290467603, "loss": 3.1554, "step": 17393 }, { "epoch": 0.85, "grad_norm": 0.5492194294929504, "learning_rate": 0.0004882002395081139, "loss": 3.1623, "step": 17394 }, { "epoch": 0.85, "grad_norm": 0.558017909526825, "learning_rate": 0.0004881882494737843, "loss": 3.2415, "step": 17395 }, { "epoch": 0.85, "grad_norm": 0.5010644793510437, "learning_rate": 0.0004881762589438031, "loss": 3.2828, "step": 17396 }, { "epoch": 0.85, "grad_norm": 0.5531977415084839, "learning_rate": 0.00048816426791820175, "loss": 3.0989, "step": 17397 }, { "epoch": 0.85, "grad_norm": 0.5068770051002502, "learning_rate": 0.00048815227639701205, "loss": 3.3572, "step": 17398 }, { "epoch": 0.85, "grad_norm": 0.6324036121368408, "learning_rate": 0.00048814028438026535, "loss": 3.0686, "step": 17399 }, { "epoch": 0.85, "grad_norm": 0.5348002910614014, "learning_rate": 0.00048812829186799337, "loss": 3.1609, "step": 17400 }, { "epoch": 0.85, "grad_norm": 0.5051308274269104, "learning_rate": 0.00048811629886022777, "loss": 3.0573, "step": 17401 }, { "epoch": 0.85, "grad_norm": 0.5545761585235596, "learning_rate": 0.00048810430535699994, "loss": 3.0548, "step": 17402 }, { "epoch": 0.85, "grad_norm": 0.692309558391571, "learning_rate": 0.0004880923113583416, "loss": 3.1483, "step": 17403 }, { "epoch": 0.85, "grad_norm": 0.5532946586608887, "learning_rate": 0.0004880803168642843, "loss": 3.1074, "step": 17404 }, { "epoch": 0.85, "grad_norm": 0.7708467841148376, "learning_rate": 0.00048806832187485964, "loss": 3.3463, "step": 17405 }, { "epoch": 0.85, "grad_norm": 0.5050947666168213, "learning_rate": 0.00048805632639009936, "loss": 3.0551, "step": 17406 }, { "epoch": 0.85, "grad_norm": 0.49402984976768494, "learning_rate": 0.0004880443304100347, "loss": 3.1811, "step": 17407 }, { "epoch": 0.85, "grad_norm": 0.6107621192932129, "learning_rate": 0.0004880323339346976, "loss": 3.1067, "step": 17408 }, { "epoch": 0.85, "grad_norm": 0.5168527960777283, "learning_rate": 0.00048802033696411946, "loss": 3.1141, "step": 17409 }, { "epoch": 0.85, "grad_norm": 0.518570601940155, "learning_rate": 0.00048800833949833196, "loss": 2.9312, "step": 17410 }, { "epoch": 0.85, "grad_norm": 0.5209082961082458, "learning_rate": 0.0004879963415373667, "loss": 2.9536, "step": 17411 }, { "epoch": 0.85, "grad_norm": 0.49775710701942444, "learning_rate": 0.0004879843430812552, "loss": 3.173, "step": 17412 }, { "epoch": 0.85, "grad_norm": 0.5183805823326111, "learning_rate": 0.00048797234413002914, "loss": 3.2114, "step": 17413 }, { "epoch": 0.85, "grad_norm": 0.5256752967834473, "learning_rate": 0.00048796034468372016, "loss": 3.0888, "step": 17414 }, { "epoch": 0.85, "grad_norm": 0.47854384779930115, "learning_rate": 0.0004879483447423597, "loss": 3.3277, "step": 17415 }, { "epoch": 0.85, "grad_norm": 0.5341052412986755, "learning_rate": 0.00048793634430597946, "loss": 3.0255, "step": 17416 }, { "epoch": 0.85, "grad_norm": 0.5231382846832275, "learning_rate": 0.00048792434337461107, "loss": 3.367, "step": 17417 }, { "epoch": 0.85, "grad_norm": 0.5256244540214539, "learning_rate": 0.0004879123419482862, "loss": 3.3875, "step": 17418 }, { "epoch": 0.85, "grad_norm": 0.529234766960144, "learning_rate": 0.00048790034002703646, "loss": 3.075, "step": 17419 }, { "epoch": 0.85, "grad_norm": 0.6451051831245422, "learning_rate": 0.0004878883376108932, "loss": 3.0641, "step": 17420 }, { "epoch": 0.85, "grad_norm": 0.5298725366592407, "learning_rate": 0.00048787633469988834, "loss": 3.2128, "step": 17421 }, { "epoch": 0.85, "grad_norm": 0.5713647603988647, "learning_rate": 0.00048786433129405335, "loss": 3.0181, "step": 17422 }, { "epoch": 0.85, "grad_norm": 0.48418837785720825, "learning_rate": 0.0004878523273934198, "loss": 3.0541, "step": 17423 }, { "epoch": 0.85, "grad_norm": 0.5305963754653931, "learning_rate": 0.00048784032299801946, "loss": 3.2004, "step": 17424 }, { "epoch": 0.85, "grad_norm": 0.5473353266716003, "learning_rate": 0.00048782831810788383, "loss": 3.0637, "step": 17425 }, { "epoch": 0.85, "grad_norm": 0.5448347926139832, "learning_rate": 0.00048781631272304453, "loss": 3.1816, "step": 17426 }, { "epoch": 0.85, "grad_norm": 0.522763729095459, "learning_rate": 0.00048780430684353327, "loss": 3.0765, "step": 17427 }, { "epoch": 0.85, "grad_norm": 0.49944186210632324, "learning_rate": 0.0004877923004693815, "loss": 3.2881, "step": 17428 }, { "epoch": 0.85, "grad_norm": 0.522628664970398, "learning_rate": 0.0004877802936006211, "loss": 3.361, "step": 17429 }, { "epoch": 0.85, "grad_norm": 0.5693197250366211, "learning_rate": 0.00048776828623728346, "loss": 3.3028, "step": 17430 }, { "epoch": 0.85, "grad_norm": 0.5283417701721191, "learning_rate": 0.00048775627837940026, "loss": 3.2618, "step": 17431 }, { "epoch": 0.85, "grad_norm": 0.5026355385780334, "learning_rate": 0.0004877442700270032, "loss": 3.0906, "step": 17432 }, { "epoch": 0.85, "grad_norm": 0.5224277973175049, "learning_rate": 0.0004877322611801239, "loss": 2.9159, "step": 17433 }, { "epoch": 0.85, "grad_norm": 0.5417836308479309, "learning_rate": 0.0004877202518387939, "loss": 3.1032, "step": 17434 }, { "epoch": 0.85, "grad_norm": 0.5314958691596985, "learning_rate": 0.00048770824200304494, "loss": 3.1461, "step": 17435 }, { "epoch": 0.85, "grad_norm": 0.5614078640937805, "learning_rate": 0.0004876962316729085, "loss": 3.0781, "step": 17436 }, { "epoch": 0.85, "grad_norm": 0.5217527747154236, "learning_rate": 0.00048768422084841643, "loss": 3.1365, "step": 17437 }, { "epoch": 0.85, "grad_norm": 0.5421162247657776, "learning_rate": 0.00048767220952960013, "loss": 3.2526, "step": 17438 }, { "epoch": 0.85, "grad_norm": 0.647996187210083, "learning_rate": 0.0004876601977164914, "loss": 2.8944, "step": 17439 }, { "epoch": 0.85, "grad_norm": 0.5372748970985413, "learning_rate": 0.0004876481854091218, "loss": 2.9952, "step": 17440 }, { "epoch": 0.85, "grad_norm": 0.541601300239563, "learning_rate": 0.000487636172607523, "loss": 3.25, "step": 17441 }, { "epoch": 0.85, "grad_norm": 0.5672915577888489, "learning_rate": 0.0004876241593117267, "loss": 2.9457, "step": 17442 }, { "epoch": 0.85, "grad_norm": 0.530311644077301, "learning_rate": 0.0004876121455217644, "loss": 2.875, "step": 17443 }, { "epoch": 0.85, "grad_norm": 0.5655810236930847, "learning_rate": 0.0004876001312376679, "loss": 3.3092, "step": 17444 }, { "epoch": 0.85, "grad_norm": 0.5126374363899231, "learning_rate": 0.0004875881164594686, "loss": 3.0786, "step": 17445 }, { "epoch": 0.85, "grad_norm": 0.5114005208015442, "learning_rate": 0.0004875761011871984, "loss": 3.4152, "step": 17446 }, { "epoch": 0.86, "grad_norm": 0.5263176560401917, "learning_rate": 0.00048756408542088885, "loss": 3.2279, "step": 17447 }, { "epoch": 0.86, "grad_norm": 0.5074858069419861, "learning_rate": 0.0004875520691605716, "loss": 3.2816, "step": 17448 }, { "epoch": 0.86, "grad_norm": 0.5116199851036072, "learning_rate": 0.00048754005240627833, "loss": 3.3319, "step": 17449 }, { "epoch": 0.86, "grad_norm": 0.5231490135192871, "learning_rate": 0.00048752803515804063, "loss": 3.3931, "step": 17450 }, { "epoch": 0.86, "grad_norm": 0.6299848556518555, "learning_rate": 0.00048751601741589014, "loss": 2.8908, "step": 17451 }, { "epoch": 0.86, "grad_norm": 0.5070332288742065, "learning_rate": 0.00048750399917985856, "loss": 3.2715, "step": 17452 }, { "epoch": 0.86, "grad_norm": 0.6328778862953186, "learning_rate": 0.0004874919804499775, "loss": 3.2513, "step": 17453 }, { "epoch": 0.86, "grad_norm": 0.52276211977005, "learning_rate": 0.0004874799612262787, "loss": 3.0997, "step": 17454 }, { "epoch": 0.86, "grad_norm": 0.5386821031570435, "learning_rate": 0.0004874679415087938, "loss": 3.2454, "step": 17455 }, { "epoch": 0.86, "grad_norm": 0.506618320941925, "learning_rate": 0.00048745592129755433, "loss": 3.1048, "step": 17456 }, { "epoch": 0.86, "grad_norm": 0.5649548768997192, "learning_rate": 0.0004874439005925921, "loss": 2.8524, "step": 17457 }, { "epoch": 0.86, "grad_norm": 0.5274940729141235, "learning_rate": 0.00048743187939393867, "loss": 3.2162, "step": 17458 }, { "epoch": 0.86, "grad_norm": 0.546772301197052, "learning_rate": 0.0004874198577016258, "loss": 3.1506, "step": 17459 }, { "epoch": 0.86, "grad_norm": 0.5609162449836731, "learning_rate": 0.00048740783551568504, "loss": 3.3401, "step": 17460 }, { "epoch": 0.86, "grad_norm": 0.5612881183624268, "learning_rate": 0.0004873958128361481, "loss": 3.1514, "step": 17461 }, { "epoch": 0.86, "grad_norm": 0.5242128968238831, "learning_rate": 0.0004873837896630467, "loss": 2.9899, "step": 17462 }, { "epoch": 0.86, "grad_norm": 0.5031869411468506, "learning_rate": 0.00048737176599641246, "loss": 3.2779, "step": 17463 }, { "epoch": 0.86, "grad_norm": 0.5017745494842529, "learning_rate": 0.00048735974183627707, "loss": 3.2305, "step": 17464 }, { "epoch": 0.86, "grad_norm": 0.5221795439720154, "learning_rate": 0.0004873477171826721, "loss": 3.2039, "step": 17465 }, { "epoch": 0.86, "grad_norm": 0.5255293846130371, "learning_rate": 0.0004873356920356293, "loss": 3.1282, "step": 17466 }, { "epoch": 0.86, "grad_norm": 0.5047944784164429, "learning_rate": 0.00048732366639518037, "loss": 2.9352, "step": 17467 }, { "epoch": 0.86, "grad_norm": 0.5233396291732788, "learning_rate": 0.000487311640261357, "loss": 3.2509, "step": 17468 }, { "epoch": 0.86, "grad_norm": 0.505703330039978, "learning_rate": 0.0004872996136341908, "loss": 3.2049, "step": 17469 }, { "epoch": 0.86, "grad_norm": 0.5102468729019165, "learning_rate": 0.00048728758651371345, "loss": 3.1221, "step": 17470 }, { "epoch": 0.86, "grad_norm": 0.5118353962898254, "learning_rate": 0.00048727555889995655, "loss": 3.0826, "step": 17471 }, { "epoch": 0.86, "grad_norm": 0.5210556387901306, "learning_rate": 0.000487263530792952, "loss": 3.0314, "step": 17472 }, { "epoch": 0.86, "grad_norm": 0.5125072598457336, "learning_rate": 0.0004872515021927313, "loss": 3.3292, "step": 17473 }, { "epoch": 0.86, "grad_norm": 0.5282806754112244, "learning_rate": 0.00048723947309932625, "loss": 3.3225, "step": 17474 }, { "epoch": 0.86, "grad_norm": 0.4971778392791748, "learning_rate": 0.00048722744351276836, "loss": 3.2805, "step": 17475 }, { "epoch": 0.86, "grad_norm": 0.5685766339302063, "learning_rate": 0.00048721541343308946, "loss": 3.1796, "step": 17476 }, { "epoch": 0.86, "grad_norm": 0.5498575568199158, "learning_rate": 0.0004872033828603212, "loss": 3.1008, "step": 17477 }, { "epoch": 0.86, "grad_norm": 0.5463971495628357, "learning_rate": 0.0004871913517944952, "loss": 3.0532, "step": 17478 }, { "epoch": 0.86, "grad_norm": 0.5287131667137146, "learning_rate": 0.0004871793202356432, "loss": 3.3487, "step": 17479 }, { "epoch": 0.86, "grad_norm": 0.5136821866035461, "learning_rate": 0.000487167288183797, "loss": 3.2506, "step": 17480 }, { "epoch": 0.86, "grad_norm": 0.5033801794052124, "learning_rate": 0.0004871552556389881, "loss": 3.2242, "step": 17481 }, { "epoch": 0.86, "grad_norm": 0.5138252377510071, "learning_rate": 0.00048714322260124825, "loss": 3.0705, "step": 17482 }, { "epoch": 0.86, "grad_norm": 0.5219220519065857, "learning_rate": 0.00048713118907060923, "loss": 2.9397, "step": 17483 }, { "epoch": 0.86, "grad_norm": 0.5228368043899536, "learning_rate": 0.00048711915504710276, "loss": 3.2545, "step": 17484 }, { "epoch": 0.86, "grad_norm": 0.5889149308204651, "learning_rate": 0.00048710712053076036, "loss": 3.1426, "step": 17485 }, { "epoch": 0.86, "grad_norm": 0.5183678865432739, "learning_rate": 0.00048709508552161377, "loss": 3.1804, "step": 17486 }, { "epoch": 0.86, "grad_norm": 0.5020313858985901, "learning_rate": 0.0004870830500196947, "loss": 3.4664, "step": 17487 }, { "epoch": 0.86, "grad_norm": 0.522677481174469, "learning_rate": 0.00048707101402503493, "loss": 3.2824, "step": 17488 }, { "epoch": 0.86, "grad_norm": 0.5405004024505615, "learning_rate": 0.0004870589775376661, "loss": 3.1882, "step": 17489 }, { "epoch": 0.86, "grad_norm": 0.4743952453136444, "learning_rate": 0.00048704694055762005, "loss": 3.257, "step": 17490 }, { "epoch": 0.86, "grad_norm": 0.49832382798194885, "learning_rate": 0.00048703490308492825, "loss": 3.156, "step": 17491 }, { "epoch": 0.86, "grad_norm": 0.553101122379303, "learning_rate": 0.0004870228651196225, "loss": 3.1769, "step": 17492 }, { "epoch": 0.86, "grad_norm": 0.5105452537536621, "learning_rate": 0.00048701082666173447, "loss": 3.228, "step": 17493 }, { "epoch": 0.86, "grad_norm": 0.4981645345687866, "learning_rate": 0.0004869987877112961, "loss": 3.1893, "step": 17494 }, { "epoch": 0.86, "grad_norm": 0.5161097645759583, "learning_rate": 0.00048698674826833875, "loss": 3.1463, "step": 17495 }, { "epoch": 0.86, "grad_norm": 0.5108583569526672, "learning_rate": 0.00048697470833289434, "loss": 3.1373, "step": 17496 }, { "epoch": 0.86, "grad_norm": 0.5210059881210327, "learning_rate": 0.00048696266790499457, "loss": 3.2682, "step": 17497 }, { "epoch": 0.86, "grad_norm": 0.5149890184402466, "learning_rate": 0.000486950626984671, "loss": 3.1596, "step": 17498 }, { "epoch": 0.86, "grad_norm": 0.5131500959396362, "learning_rate": 0.0004869385855719555, "loss": 3.0184, "step": 17499 }, { "epoch": 0.86, "grad_norm": 0.5088669061660767, "learning_rate": 0.00048692654366687985, "loss": 3.1619, "step": 17500 }, { "epoch": 0.86, "grad_norm": 0.5249161124229431, "learning_rate": 0.0004869145012694755, "loss": 3.087, "step": 17501 }, { "epoch": 0.86, "grad_norm": 0.541541337966919, "learning_rate": 0.00048690245837977445, "loss": 3.0936, "step": 17502 }, { "epoch": 0.86, "grad_norm": 0.5092646479606628, "learning_rate": 0.0004868904149978082, "loss": 3.0542, "step": 17503 }, { "epoch": 0.86, "grad_norm": 0.5421231389045715, "learning_rate": 0.0004868783711236087, "loss": 3.3001, "step": 17504 }, { "epoch": 0.86, "grad_norm": 0.5062888860702515, "learning_rate": 0.00048686632675720736, "loss": 3.0985, "step": 17505 }, { "epoch": 0.86, "grad_norm": 0.5206896066665649, "learning_rate": 0.00048685428189863616, "loss": 3.0927, "step": 17506 }, { "epoch": 0.86, "grad_norm": 0.5333325862884521, "learning_rate": 0.0004868422365479268, "loss": 3.2301, "step": 17507 }, { "epoch": 0.86, "grad_norm": 0.5655118227005005, "learning_rate": 0.0004868301907051109, "loss": 3.1063, "step": 17508 }, { "epoch": 0.86, "grad_norm": 0.5310041904449463, "learning_rate": 0.0004868181443702202, "loss": 3.1183, "step": 17509 }, { "epoch": 0.86, "grad_norm": 0.5519410967826843, "learning_rate": 0.00048680609754328644, "loss": 3.0312, "step": 17510 }, { "epoch": 0.86, "grad_norm": 0.6148340106010437, "learning_rate": 0.0004867940502243415, "loss": 3.2461, "step": 17511 }, { "epoch": 0.86, "grad_norm": 0.512523889541626, "learning_rate": 0.00048678200241341685, "loss": 3.268, "step": 17512 }, { "epoch": 0.86, "grad_norm": 0.4837605059146881, "learning_rate": 0.0004867699541105444, "loss": 2.947, "step": 17513 }, { "epoch": 0.86, "grad_norm": 0.5407007336616516, "learning_rate": 0.00048675790531575583, "loss": 3.1923, "step": 17514 }, { "epoch": 0.86, "grad_norm": 0.495334267616272, "learning_rate": 0.00048674585602908287, "loss": 3.2698, "step": 17515 }, { "epoch": 0.86, "grad_norm": 0.5168710947036743, "learning_rate": 0.00048673380625055735, "loss": 3.2698, "step": 17516 }, { "epoch": 0.86, "grad_norm": 0.5113250613212585, "learning_rate": 0.0004867217559802109, "loss": 3.401, "step": 17517 }, { "epoch": 0.86, "grad_norm": 0.5109712481498718, "learning_rate": 0.00048670970521807517, "loss": 3.0884, "step": 17518 }, { "epoch": 0.86, "grad_norm": 0.5782906413078308, "learning_rate": 0.00048669765396418214, "loss": 3.0593, "step": 17519 }, { "epoch": 0.86, "grad_norm": 0.4731515347957611, "learning_rate": 0.00048668560221856337, "loss": 3.1659, "step": 17520 }, { "epoch": 0.86, "grad_norm": 0.5255733132362366, "learning_rate": 0.0004866735499812506, "loss": 3.1536, "step": 17521 }, { "epoch": 0.86, "grad_norm": 0.5351244807243347, "learning_rate": 0.0004866614972522757, "loss": 3.2712, "step": 17522 }, { "epoch": 0.86, "grad_norm": 0.5200554132461548, "learning_rate": 0.0004866494440316703, "loss": 3.1256, "step": 17523 }, { "epoch": 0.86, "grad_norm": 0.510212242603302, "learning_rate": 0.0004866373903194661, "loss": 3.0483, "step": 17524 }, { "epoch": 0.86, "grad_norm": 0.5115465521812439, "learning_rate": 0.0004866253361156951, "loss": 2.9676, "step": 17525 }, { "epoch": 0.86, "grad_norm": 0.5391139984130859, "learning_rate": 0.0004866132814203888, "loss": 3.2172, "step": 17526 }, { "epoch": 0.86, "grad_norm": 0.5223973393440247, "learning_rate": 0.00048660122623357904, "loss": 3.2046, "step": 17527 }, { "epoch": 0.86, "grad_norm": 0.5327551364898682, "learning_rate": 0.00048658917055529753, "loss": 3.2311, "step": 17528 }, { "epoch": 0.86, "grad_norm": 0.5152480602264404, "learning_rate": 0.0004865771143855761, "loss": 3.1152, "step": 17529 }, { "epoch": 0.86, "grad_norm": 0.5064302086830139, "learning_rate": 0.0004865650577244464, "loss": 3.3183, "step": 17530 }, { "epoch": 0.86, "grad_norm": 0.5391542911529541, "learning_rate": 0.00048655300057194036, "loss": 2.9688, "step": 17531 }, { "epoch": 0.86, "grad_norm": 0.49061742424964905, "learning_rate": 0.00048654094292808955, "loss": 3.2061, "step": 17532 }, { "epoch": 0.86, "grad_norm": 0.5557584762573242, "learning_rate": 0.00048652888479292574, "loss": 3.2288, "step": 17533 }, { "epoch": 0.86, "grad_norm": 0.5739030241966248, "learning_rate": 0.0004865168261664808, "loss": 3.1942, "step": 17534 }, { "epoch": 0.86, "grad_norm": 0.5252779126167297, "learning_rate": 0.00048650476704878643, "loss": 3.271, "step": 17535 }, { "epoch": 0.86, "grad_norm": 0.5234642028808594, "learning_rate": 0.0004864927074398744, "loss": 3.3313, "step": 17536 }, { "epoch": 0.86, "grad_norm": 0.5235569477081299, "learning_rate": 0.00048648064733977653, "loss": 3.38, "step": 17537 }, { "epoch": 0.86, "grad_norm": 0.5185587406158447, "learning_rate": 0.00048646858674852443, "loss": 3.1555, "step": 17538 }, { "epoch": 0.86, "grad_norm": 0.546230673789978, "learning_rate": 0.00048645652566615, "loss": 2.8245, "step": 17539 }, { "epoch": 0.86, "grad_norm": 0.5275043249130249, "learning_rate": 0.0004864444640926849, "loss": 3.2103, "step": 17540 }, { "epoch": 0.86, "grad_norm": 0.5376495122909546, "learning_rate": 0.00048643240202816105, "loss": 3.0776, "step": 17541 }, { "epoch": 0.86, "grad_norm": 0.5582915544509888, "learning_rate": 0.0004864203394726101, "loss": 2.9081, "step": 17542 }, { "epoch": 0.86, "grad_norm": 0.5006242990493774, "learning_rate": 0.0004864082764260639, "loss": 3.2375, "step": 17543 }, { "epoch": 0.86, "grad_norm": 0.5330790281295776, "learning_rate": 0.0004863962128885541, "loss": 3.1756, "step": 17544 }, { "epoch": 0.86, "grad_norm": 0.5119918584823608, "learning_rate": 0.00048638414886011263, "loss": 3.119, "step": 17545 }, { "epoch": 0.86, "grad_norm": 0.5270196199417114, "learning_rate": 0.0004863720843407711, "loss": 3.0816, "step": 17546 }, { "epoch": 0.86, "grad_norm": 0.525503396987915, "learning_rate": 0.0004863600193305614, "loss": 3.0303, "step": 17547 }, { "epoch": 0.86, "grad_norm": 0.528007447719574, "learning_rate": 0.00048634795382951525, "loss": 3.2578, "step": 17548 }, { "epoch": 0.86, "grad_norm": 0.495852530002594, "learning_rate": 0.0004863358878376645, "loss": 3.1074, "step": 17549 }, { "epoch": 0.86, "grad_norm": 0.5401389598846436, "learning_rate": 0.0004863238213550409, "loss": 3.3623, "step": 17550 }, { "epoch": 0.86, "grad_norm": 0.5262596607208252, "learning_rate": 0.00048631175438167606, "loss": 3.3607, "step": 17551 }, { "epoch": 0.86, "grad_norm": 0.6294898986816406, "learning_rate": 0.0004862996869176021, "loss": 3.1021, "step": 17552 }, { "epoch": 0.86, "grad_norm": 0.5093241930007935, "learning_rate": 0.0004862876189628505, "loss": 3.1727, "step": 17553 }, { "epoch": 0.86, "grad_norm": 0.51717609167099, "learning_rate": 0.0004862755505174533, "loss": 3.0077, "step": 17554 }, { "epoch": 0.86, "grad_norm": 0.5517076849937439, "learning_rate": 0.00048626348158144206, "loss": 3.1981, "step": 17555 }, { "epoch": 0.86, "grad_norm": 0.5027273893356323, "learning_rate": 0.0004862514121548486, "loss": 3.0377, "step": 17556 }, { "epoch": 0.86, "grad_norm": 0.5180482268333435, "learning_rate": 0.0004862393422377048, "loss": 3.1539, "step": 17557 }, { "epoch": 0.86, "grad_norm": 0.5299745202064514, "learning_rate": 0.00048622727183004246, "loss": 3.0539, "step": 17558 }, { "epoch": 0.86, "grad_norm": 0.49521052837371826, "learning_rate": 0.0004862152009318933, "loss": 3.3228, "step": 17559 }, { "epoch": 0.86, "grad_norm": 0.5019090175628662, "learning_rate": 0.0004862031295432892, "loss": 3.1764, "step": 17560 }, { "epoch": 0.86, "grad_norm": 0.5337850451469421, "learning_rate": 0.0004861910576642618, "loss": 3.2292, "step": 17561 }, { "epoch": 0.86, "grad_norm": 0.5519272089004517, "learning_rate": 0.000486178985294843, "loss": 3.2541, "step": 17562 }, { "epoch": 0.86, "grad_norm": 0.5433281660079956, "learning_rate": 0.0004861669124350646, "loss": 3.2232, "step": 17563 }, { "epoch": 0.86, "grad_norm": 0.5310292840003967, "learning_rate": 0.0004861548390849584, "loss": 3.0135, "step": 17564 }, { "epoch": 0.86, "grad_norm": 0.50967937707901, "learning_rate": 0.00048614276524455615, "loss": 3.193, "step": 17565 }, { "epoch": 0.86, "grad_norm": 0.5428502559661865, "learning_rate": 0.0004861306909138897, "loss": 3.2202, "step": 17566 }, { "epoch": 0.86, "grad_norm": 0.5671022534370422, "learning_rate": 0.00048611861609299087, "loss": 3.2069, "step": 17567 }, { "epoch": 0.86, "grad_norm": 0.517419695854187, "learning_rate": 0.0004861065407818914, "loss": 3.1775, "step": 17568 }, { "epoch": 0.86, "grad_norm": 0.5266711711883545, "learning_rate": 0.00048609446498062303, "loss": 3.0535, "step": 17569 }, { "epoch": 0.86, "grad_norm": 0.5126215815544128, "learning_rate": 0.00048608238868921773, "loss": 3.2225, "step": 17570 }, { "epoch": 0.86, "grad_norm": 0.5277024507522583, "learning_rate": 0.0004860703119077073, "loss": 2.9624, "step": 17571 }, { "epoch": 0.86, "grad_norm": 0.5014190077781677, "learning_rate": 0.00048605823463612334, "loss": 3.1931, "step": 17572 }, { "epoch": 0.86, "grad_norm": 0.5363627076148987, "learning_rate": 0.00048604615687449794, "loss": 3.3138, "step": 17573 }, { "epoch": 0.86, "grad_norm": 0.5196545720100403, "learning_rate": 0.00048603407862286266, "loss": 3.0961, "step": 17574 }, { "epoch": 0.86, "grad_norm": 0.49793750047683716, "learning_rate": 0.0004860219998812494, "loss": 3.2758, "step": 17575 }, { "epoch": 0.86, "grad_norm": 0.5129599571228027, "learning_rate": 0.00048600992064969014, "loss": 3.2445, "step": 17576 }, { "epoch": 0.86, "grad_norm": 0.5278726816177368, "learning_rate": 0.0004859978409282164, "loss": 3.0858, "step": 17577 }, { "epoch": 0.86, "grad_norm": 0.5560405850410461, "learning_rate": 0.0004859857607168602, "loss": 3.0362, "step": 17578 }, { "epoch": 0.86, "grad_norm": 0.5135570764541626, "learning_rate": 0.0004859736800156533, "loss": 3.0105, "step": 17579 }, { "epoch": 0.86, "grad_norm": 0.5374484062194824, "learning_rate": 0.0004859615988246275, "loss": 3.118, "step": 17580 }, { "epoch": 0.86, "grad_norm": 0.5141043066978455, "learning_rate": 0.00048594951714381465, "loss": 2.999, "step": 17581 }, { "epoch": 0.86, "grad_norm": 0.5360473990440369, "learning_rate": 0.0004859374349732466, "loss": 3.1131, "step": 17582 }, { "epoch": 0.86, "grad_norm": 0.5004850029945374, "learning_rate": 0.000485925352312955, "loss": 3.1204, "step": 17583 }, { "epoch": 0.86, "grad_norm": 0.5214194655418396, "learning_rate": 0.00048591326916297184, "loss": 3.3562, "step": 17584 }, { "epoch": 0.86, "grad_norm": 0.5029579401016235, "learning_rate": 0.000485901185523329, "loss": 3.2461, "step": 17585 }, { "epoch": 0.86, "grad_norm": 0.5304317474365234, "learning_rate": 0.0004858891013940582, "loss": 3.2182, "step": 17586 }, { "epoch": 0.86, "grad_norm": 0.5088167786598206, "learning_rate": 0.00048587701677519127, "loss": 3.1036, "step": 17587 }, { "epoch": 0.86, "grad_norm": 0.5475550889968872, "learning_rate": 0.00048586493166676004, "loss": 3.0157, "step": 17588 }, { "epoch": 0.86, "grad_norm": 0.5142987370491028, "learning_rate": 0.0004858528460687963, "loss": 2.8891, "step": 17589 }, { "epoch": 0.86, "grad_norm": 0.5690994262695312, "learning_rate": 0.000485840759981332, "loss": 3.1212, "step": 17590 }, { "epoch": 0.86, "grad_norm": 0.5271167159080505, "learning_rate": 0.00048582867340439897, "loss": 3.2301, "step": 17591 }, { "epoch": 0.86, "grad_norm": 0.512944757938385, "learning_rate": 0.00048581658633802883, "loss": 3.3691, "step": 17592 }, { "epoch": 0.86, "grad_norm": 0.5304507613182068, "learning_rate": 0.00048580449878225367, "loss": 3.3141, "step": 17593 }, { "epoch": 0.86, "grad_norm": 0.6154621839523315, "learning_rate": 0.0004857924107371051, "loss": 3.1346, "step": 17594 }, { "epoch": 0.86, "grad_norm": 0.556638777256012, "learning_rate": 0.0004857803222026151, "loss": 3.1249, "step": 17595 }, { "epoch": 0.86, "grad_norm": 0.5296390652656555, "learning_rate": 0.00048576823317881564, "loss": 2.9605, "step": 17596 }, { "epoch": 0.86, "grad_norm": 0.5081044435501099, "learning_rate": 0.00048575614366573827, "loss": 3.182, "step": 17597 }, { "epoch": 0.86, "grad_norm": 0.5104318261146545, "learning_rate": 0.0004857440536634151, "loss": 3.342, "step": 17598 }, { "epoch": 0.86, "grad_norm": 0.5356442332267761, "learning_rate": 0.0004857319631718777, "loss": 3.0014, "step": 17599 }, { "epoch": 0.86, "grad_norm": 0.48045575618743896, "learning_rate": 0.000485719872191158, "loss": 3.2048, "step": 17600 }, { "epoch": 0.86, "grad_norm": 0.4831188917160034, "learning_rate": 0.00048570778072128806, "loss": 3.4564, "step": 17601 }, { "epoch": 0.86, "grad_norm": 0.5195653438568115, "learning_rate": 0.00048569568876229944, "loss": 3.4224, "step": 17602 }, { "epoch": 0.86, "grad_norm": 0.5168091654777527, "learning_rate": 0.0004856835963142242, "loss": 3.0934, "step": 17603 }, { "epoch": 0.86, "grad_norm": 0.49368128180503845, "learning_rate": 0.0004856715033770941, "loss": 2.9334, "step": 17604 }, { "epoch": 0.86, "grad_norm": 0.5106991529464722, "learning_rate": 0.00048565940995094097, "loss": 3.1075, "step": 17605 }, { "epoch": 0.86, "grad_norm": 0.5301958918571472, "learning_rate": 0.0004856473160357967, "loss": 2.885, "step": 17606 }, { "epoch": 0.86, "grad_norm": 0.5249580144882202, "learning_rate": 0.00048563522163169314, "loss": 3.176, "step": 17607 }, { "epoch": 0.86, "grad_norm": 0.5821053385734558, "learning_rate": 0.000485623126738662, "loss": 3.2309, "step": 17608 }, { "epoch": 0.86, "grad_norm": 0.5431340932846069, "learning_rate": 0.00048561103135673546, "loss": 3.0735, "step": 17609 }, { "epoch": 0.86, "grad_norm": 0.5106194019317627, "learning_rate": 0.000485598935485945, "loss": 3.1356, "step": 17610 }, { "epoch": 0.86, "grad_norm": 0.5526049733161926, "learning_rate": 0.00048558683912632277, "loss": 3.2272, "step": 17611 }, { "epoch": 0.86, "grad_norm": 0.5209463238716125, "learning_rate": 0.00048557474227790056, "loss": 3.1295, "step": 17612 }, { "epoch": 0.86, "grad_norm": 0.5306352376937866, "learning_rate": 0.00048556264494071014, "loss": 3.151, "step": 17613 }, { "epoch": 0.86, "grad_norm": 0.483786016702652, "learning_rate": 0.00048555054711478345, "loss": 3.1528, "step": 17614 }, { "epoch": 0.86, "grad_norm": 0.5121860504150391, "learning_rate": 0.00048553844880015225, "loss": 3.2255, "step": 17615 }, { "epoch": 0.86, "grad_norm": 0.5123046040534973, "learning_rate": 0.0004855263499968486, "loss": 3.2583, "step": 17616 }, { "epoch": 0.86, "grad_norm": 0.5222660899162292, "learning_rate": 0.0004855142507049042, "loss": 3.3446, "step": 17617 }, { "epoch": 0.86, "grad_norm": 0.5142706632614136, "learning_rate": 0.00048550215092435094, "loss": 3.2527, "step": 17618 }, { "epoch": 0.86, "grad_norm": 0.5308032035827637, "learning_rate": 0.00048549005065522073, "loss": 3.3778, "step": 17619 }, { "epoch": 0.86, "grad_norm": 0.5231295228004456, "learning_rate": 0.00048547794989754544, "loss": 3.0498, "step": 17620 }, { "epoch": 0.86, "grad_norm": 0.5291686654090881, "learning_rate": 0.00048546584865135684, "loss": 3.1601, "step": 17621 }, { "epoch": 0.86, "grad_norm": 0.5070790648460388, "learning_rate": 0.00048545374691668703, "loss": 3.2343, "step": 17622 }, { "epoch": 0.86, "grad_norm": 0.5530633926391602, "learning_rate": 0.00048544164469356766, "loss": 3.076, "step": 17623 }, { "epoch": 0.86, "grad_norm": 0.4944988489151001, "learning_rate": 0.0004854295419820307, "loss": 2.9956, "step": 17624 }, { "epoch": 0.86, "grad_norm": 0.5238565802574158, "learning_rate": 0.000485417438782108, "loss": 2.9795, "step": 17625 }, { "epoch": 0.86, "grad_norm": 0.5411428809165955, "learning_rate": 0.00048540533509383143, "loss": 3.3082, "step": 17626 }, { "epoch": 0.86, "grad_norm": 0.4969344437122345, "learning_rate": 0.00048539323091723296, "loss": 3.0575, "step": 17627 }, { "epoch": 0.86, "grad_norm": 0.5270441174507141, "learning_rate": 0.00048538112625234436, "loss": 3.0456, "step": 17628 }, { "epoch": 0.86, "grad_norm": 0.5126833915710449, "learning_rate": 0.00048536902109919756, "loss": 3.183, "step": 17629 }, { "epoch": 0.86, "grad_norm": 0.5584427118301392, "learning_rate": 0.00048535691545782445, "loss": 3.2503, "step": 17630 }, { "epoch": 0.86, "grad_norm": 0.5255547761917114, "learning_rate": 0.0004853448093282569, "loss": 3.2817, "step": 17631 }, { "epoch": 0.86, "grad_norm": 0.5031391382217407, "learning_rate": 0.0004853327027105267, "loss": 3.1273, "step": 17632 }, { "epoch": 0.86, "grad_norm": 0.5100876092910767, "learning_rate": 0.0004853205956046659, "loss": 3.3041, "step": 17633 }, { "epoch": 0.86, "grad_norm": 0.5045557022094727, "learning_rate": 0.0004853084880107064, "loss": 3.3764, "step": 17634 }, { "epoch": 0.86, "grad_norm": 0.512071430683136, "learning_rate": 0.00048529637992867985, "loss": 3.2025, "step": 17635 }, { "epoch": 0.86, "grad_norm": 0.505384087562561, "learning_rate": 0.00048528427135861835, "loss": 3.1017, "step": 17636 }, { "epoch": 0.86, "grad_norm": 0.5387892127037048, "learning_rate": 0.0004852721623005538, "loss": 2.9785, "step": 17637 }, { "epoch": 0.86, "grad_norm": 0.5180559158325195, "learning_rate": 0.00048526005275451804, "loss": 3.3379, "step": 17638 }, { "epoch": 0.86, "grad_norm": 0.5065121650695801, "learning_rate": 0.0004852479427205428, "loss": 3.1839, "step": 17639 }, { "epoch": 0.86, "grad_norm": 0.5370428562164307, "learning_rate": 0.00048523583219866023, "loss": 3.4099, "step": 17640 }, { "epoch": 0.86, "grad_norm": 0.5340681076049805, "learning_rate": 0.0004852237211889022, "loss": 3.2899, "step": 17641 }, { "epoch": 0.86, "grad_norm": 0.524978518486023, "learning_rate": 0.0004852116096913004, "loss": 3.0903, "step": 17642 }, { "epoch": 0.86, "grad_norm": 0.5199117660522461, "learning_rate": 0.000485199497705887, "loss": 3.3207, "step": 17643 }, { "epoch": 0.86, "grad_norm": 0.5268123149871826, "learning_rate": 0.00048518738523269366, "loss": 3.1258, "step": 17644 }, { "epoch": 0.86, "grad_norm": 0.5221728086471558, "learning_rate": 0.0004851752722717524, "loss": 3.0099, "step": 17645 }, { "epoch": 0.86, "grad_norm": 0.5612000226974487, "learning_rate": 0.00048516315882309513, "loss": 3.1997, "step": 17646 }, { "epoch": 0.86, "grad_norm": 0.4834546148777008, "learning_rate": 0.00048515104488675373, "loss": 3.2142, "step": 17647 }, { "epoch": 0.86, "grad_norm": 0.5152857899665833, "learning_rate": 0.0004851389304627601, "loss": 3.0943, "step": 17648 }, { "epoch": 0.86, "grad_norm": 0.6461136937141418, "learning_rate": 0.0004851268155511462, "loss": 3.0051, "step": 17649 }, { "epoch": 0.86, "grad_norm": 0.5414785146713257, "learning_rate": 0.00048511470015194394, "loss": 3.1216, "step": 17650 }, { "epoch": 0.87, "grad_norm": 0.5066803693771362, "learning_rate": 0.000485102584265185, "loss": 2.9696, "step": 17651 }, { "epoch": 0.87, "grad_norm": 0.5823258757591248, "learning_rate": 0.0004850904678909016, "loss": 3.2366, "step": 17652 }, { "epoch": 0.87, "grad_norm": 0.5320606231689453, "learning_rate": 0.0004850783510291256, "loss": 2.8672, "step": 17653 }, { "epoch": 0.87, "grad_norm": 0.5367481708526611, "learning_rate": 0.0004850662336798888, "loss": 3.3241, "step": 17654 }, { "epoch": 0.87, "grad_norm": 0.5208763480186462, "learning_rate": 0.00048505411584322304, "loss": 3.123, "step": 17655 }, { "epoch": 0.87, "grad_norm": 0.5208843946456909, "learning_rate": 0.00048504199751916045, "loss": 3.0314, "step": 17656 }, { "epoch": 0.87, "grad_norm": 0.5739818811416626, "learning_rate": 0.00048502987870773287, "loss": 3.1522, "step": 17657 }, { "epoch": 0.87, "grad_norm": 0.5213282108306885, "learning_rate": 0.0004850177594089722, "loss": 3.227, "step": 17658 }, { "epoch": 0.87, "grad_norm": 0.4983675479888916, "learning_rate": 0.00048500563962291024, "loss": 3.1158, "step": 17659 }, { "epoch": 0.87, "grad_norm": 0.5267578363418579, "learning_rate": 0.00048499351934957915, "loss": 3.216, "step": 17660 }, { "epoch": 0.87, "grad_norm": 0.4949793219566345, "learning_rate": 0.0004849813985890107, "loss": 2.8112, "step": 17661 }, { "epoch": 0.87, "grad_norm": 0.5270505547523499, "learning_rate": 0.0004849692773412368, "loss": 3.1422, "step": 17662 }, { "epoch": 0.87, "grad_norm": 0.604992151260376, "learning_rate": 0.00048495715560628946, "loss": 3.2086, "step": 17663 }, { "epoch": 0.87, "grad_norm": 0.5165683627128601, "learning_rate": 0.0004849450333842006, "loss": 3.1152, "step": 17664 }, { "epoch": 0.87, "grad_norm": 0.49626970291137695, "learning_rate": 0.00048493291067500214, "loss": 3.0413, "step": 17665 }, { "epoch": 0.87, "grad_norm": 0.5042181611061096, "learning_rate": 0.0004849207874787258, "loss": 3.1962, "step": 17666 }, { "epoch": 0.87, "grad_norm": 0.5245406627655029, "learning_rate": 0.0004849086637954039, "loss": 3.0042, "step": 17667 }, { "epoch": 0.87, "grad_norm": 0.49068549275398254, "learning_rate": 0.00048489653962506806, "loss": 3.0559, "step": 17668 }, { "epoch": 0.87, "grad_norm": 0.576702892780304, "learning_rate": 0.0004848844149677504, "loss": 2.9814, "step": 17669 }, { "epoch": 0.87, "grad_norm": 0.48933812975883484, "learning_rate": 0.0004848722898234827, "loss": 3.1889, "step": 17670 }, { "epoch": 0.87, "grad_norm": 0.5341169834136963, "learning_rate": 0.00048486016419229696, "loss": 3.2209, "step": 17671 }, { "epoch": 0.87, "grad_norm": 0.5466429591178894, "learning_rate": 0.0004848480380742252, "loss": 3.189, "step": 17672 }, { "epoch": 0.87, "grad_norm": 0.5925959348678589, "learning_rate": 0.00048483591146929926, "loss": 3.4804, "step": 17673 }, { "epoch": 0.87, "grad_norm": 0.544042706489563, "learning_rate": 0.00048482378437755103, "loss": 3.2635, "step": 17674 }, { "epoch": 0.87, "grad_norm": 0.518909752368927, "learning_rate": 0.0004848116567990126, "loss": 3.3417, "step": 17675 }, { "epoch": 0.87, "grad_norm": 0.5055917501449585, "learning_rate": 0.0004847995287337158, "loss": 3.0895, "step": 17676 }, { "epoch": 0.87, "grad_norm": 0.5370952486991882, "learning_rate": 0.0004847874001816926, "loss": 3.2929, "step": 17677 }, { "epoch": 0.87, "grad_norm": 0.5014994144439697, "learning_rate": 0.00048477527114297494, "loss": 3.09, "step": 17678 }, { "epoch": 0.87, "grad_norm": 0.5466054081916809, "learning_rate": 0.00048476314161759486, "loss": 3.032, "step": 17679 }, { "epoch": 0.87, "grad_norm": 0.554060161113739, "learning_rate": 0.0004847510116055842, "loss": 3.1949, "step": 17680 }, { "epoch": 0.87, "grad_norm": 0.5110071897506714, "learning_rate": 0.00048473888110697484, "loss": 3.1546, "step": 17681 }, { "epoch": 0.87, "grad_norm": 0.5429432392120361, "learning_rate": 0.00048472675012179887, "loss": 3.1586, "step": 17682 }, { "epoch": 0.87, "grad_norm": 0.48281505703926086, "learning_rate": 0.00048471461865008816, "loss": 3.0572, "step": 17683 }, { "epoch": 0.87, "grad_norm": 0.5270022749900818, "learning_rate": 0.00048470248669187484, "loss": 3.2111, "step": 17684 }, { "epoch": 0.87, "grad_norm": 0.4998297691345215, "learning_rate": 0.0004846903542471906, "loss": 3.1914, "step": 17685 }, { "epoch": 0.87, "grad_norm": 0.5160195231437683, "learning_rate": 0.00048467822131606747, "loss": 3.1542, "step": 17686 }, { "epoch": 0.87, "grad_norm": 0.5145862698554993, "learning_rate": 0.0004846660878985375, "loss": 2.9667, "step": 17687 }, { "epoch": 0.87, "grad_norm": 0.5544316172599792, "learning_rate": 0.00048465395399463257, "loss": 3.1801, "step": 17688 }, { "epoch": 0.87, "grad_norm": 0.4879906177520752, "learning_rate": 0.00048464181960438475, "loss": 3.2567, "step": 17689 }, { "epoch": 0.87, "grad_norm": 0.4931853711605072, "learning_rate": 0.00048462968472782586, "loss": 3.2061, "step": 17690 }, { "epoch": 0.87, "grad_norm": 0.5492919683456421, "learning_rate": 0.00048461754936498787, "loss": 3.0187, "step": 17691 }, { "epoch": 0.87, "grad_norm": 0.5467280149459839, "learning_rate": 0.00048460541351590277, "loss": 3.1143, "step": 17692 }, { "epoch": 0.87, "grad_norm": 0.49284547567367554, "learning_rate": 0.0004845932771806026, "loss": 3.1644, "step": 17693 }, { "epoch": 0.87, "grad_norm": 0.5207919478416443, "learning_rate": 0.0004845811403591193, "loss": 3.1206, "step": 17694 }, { "epoch": 0.87, "grad_norm": 0.5158400535583496, "learning_rate": 0.00048456900305148475, "loss": 3.1989, "step": 17695 }, { "epoch": 0.87, "grad_norm": 0.4813472330570221, "learning_rate": 0.00048455686525773094, "loss": 2.9873, "step": 17696 }, { "epoch": 0.87, "grad_norm": 0.49759748578071594, "learning_rate": 0.0004845447269778898, "loss": 3.2696, "step": 17697 }, { "epoch": 0.87, "grad_norm": 0.5683988332748413, "learning_rate": 0.00048453258821199347, "loss": 2.8853, "step": 17698 }, { "epoch": 0.87, "grad_norm": 0.5115375518798828, "learning_rate": 0.0004845204489600738, "loss": 3.1641, "step": 17699 }, { "epoch": 0.87, "grad_norm": 0.5470658540725708, "learning_rate": 0.0004845083092221629, "loss": 3.1421, "step": 17700 }, { "epoch": 0.87, "grad_norm": 0.5110447406768799, "learning_rate": 0.0004844961689982924, "loss": 3.2852, "step": 17701 }, { "epoch": 0.87, "grad_norm": 0.5411603450775146, "learning_rate": 0.0004844840282884947, "loss": 3.1624, "step": 17702 }, { "epoch": 0.87, "grad_norm": 0.5393169522285461, "learning_rate": 0.00048447188709280144, "loss": 3.1563, "step": 17703 }, { "epoch": 0.87, "grad_norm": 0.480815052986145, "learning_rate": 0.00048445974541124474, "loss": 3.3201, "step": 17704 }, { "epoch": 0.87, "grad_norm": 0.5160706639289856, "learning_rate": 0.00048444760324385655, "loss": 3.3324, "step": 17705 }, { "epoch": 0.87, "grad_norm": 0.5325861573219299, "learning_rate": 0.0004844354605906689, "loss": 3.0338, "step": 17706 }, { "epoch": 0.87, "grad_norm": 0.4856245517730713, "learning_rate": 0.0004844233174517138, "loss": 3.1634, "step": 17707 }, { "epoch": 0.87, "grad_norm": 0.5197152495384216, "learning_rate": 0.00048441117382702316, "loss": 3.1673, "step": 17708 }, { "epoch": 0.87, "grad_norm": 0.537200927734375, "learning_rate": 0.0004843990297166289, "loss": 3.1794, "step": 17709 }, { "epoch": 0.87, "grad_norm": 0.5353251695632935, "learning_rate": 0.00048438688512056317, "loss": 3.0948, "step": 17710 }, { "epoch": 0.87, "grad_norm": 0.5023453831672668, "learning_rate": 0.0004843747400388579, "loss": 3.2727, "step": 17711 }, { "epoch": 0.87, "grad_norm": 0.5291193127632141, "learning_rate": 0.00048436259447154497, "loss": 2.969, "step": 17712 }, { "epoch": 0.87, "grad_norm": 0.5221428871154785, "learning_rate": 0.00048435044841865646, "loss": 3.1545, "step": 17713 }, { "epoch": 0.87, "grad_norm": 0.5405849814414978, "learning_rate": 0.00048433830188022433, "loss": 3.109, "step": 17714 }, { "epoch": 0.87, "grad_norm": 0.518534779548645, "learning_rate": 0.0004843261548562806, "loss": 3.2215, "step": 17715 }, { "epoch": 0.87, "grad_norm": 0.5127702951431274, "learning_rate": 0.00048431400734685724, "loss": 3.1477, "step": 17716 }, { "epoch": 0.87, "grad_norm": 0.49492380023002625, "learning_rate": 0.0004843018593519863, "loss": 3.1372, "step": 17717 }, { "epoch": 0.87, "grad_norm": 0.5577251315116882, "learning_rate": 0.0004842897108716997, "loss": 3.078, "step": 17718 }, { "epoch": 0.87, "grad_norm": 0.5143783092498779, "learning_rate": 0.0004842775619060295, "loss": 3.0365, "step": 17719 }, { "epoch": 0.87, "grad_norm": 0.5474133491516113, "learning_rate": 0.0004842654124550077, "loss": 3.0492, "step": 17720 }, { "epoch": 0.87, "grad_norm": 0.5189671516418457, "learning_rate": 0.0004842532625186661, "loss": 3.1727, "step": 17721 }, { "epoch": 0.87, "grad_norm": 0.5471231341362, "learning_rate": 0.00048424111209703706, "loss": 3.169, "step": 17722 }, { "epoch": 0.87, "grad_norm": 0.4973917603492737, "learning_rate": 0.00048422896119015233, "loss": 3.0737, "step": 17723 }, { "epoch": 0.87, "grad_norm": 0.5816762447357178, "learning_rate": 0.00048421680979804393, "loss": 3.1443, "step": 17724 }, { "epoch": 0.87, "grad_norm": 0.514470100402832, "learning_rate": 0.0004842046579207439, "loss": 3.111, "step": 17725 }, { "epoch": 0.87, "grad_norm": 0.5202110409736633, "learning_rate": 0.0004841925055582843, "loss": 3.2958, "step": 17726 }, { "epoch": 0.87, "grad_norm": 0.48983487486839294, "learning_rate": 0.0004841803527106971, "loss": 3.0307, "step": 17727 }, { "epoch": 0.87, "grad_norm": 0.5123018026351929, "learning_rate": 0.0004841681993780142, "loss": 3.0805, "step": 17728 }, { "epoch": 0.87, "grad_norm": 0.5432770252227783, "learning_rate": 0.00048415604556026787, "loss": 3.2386, "step": 17729 }, { "epoch": 0.87, "grad_norm": 0.5174310207366943, "learning_rate": 0.00048414389125748977, "loss": 3.2467, "step": 17730 }, { "epoch": 0.87, "grad_norm": 0.503591001033783, "learning_rate": 0.00048413173646971226, "loss": 3.1232, "step": 17731 }, { "epoch": 0.87, "grad_norm": 0.5538753271102905, "learning_rate": 0.00048411958119696716, "loss": 2.9704, "step": 17732 }, { "epoch": 0.87, "grad_norm": 0.5586168766021729, "learning_rate": 0.0004841074254392864, "loss": 3.0653, "step": 17733 }, { "epoch": 0.87, "grad_norm": 0.5222452282905579, "learning_rate": 0.0004840952691967022, "loss": 3.0688, "step": 17734 }, { "epoch": 0.87, "grad_norm": 0.5168008804321289, "learning_rate": 0.0004840831124692465, "loss": 3.0731, "step": 17735 }, { "epoch": 0.87, "grad_norm": 0.4989527761936188, "learning_rate": 0.00048407095525695125, "loss": 3.0839, "step": 17736 }, { "epoch": 0.87, "grad_norm": 0.5122324824333191, "learning_rate": 0.0004840587975598486, "loss": 3.1072, "step": 17737 }, { "epoch": 0.87, "grad_norm": 0.5376427173614502, "learning_rate": 0.0004840466393779704, "loss": 3.2577, "step": 17738 }, { "epoch": 0.87, "grad_norm": 0.5357086062431335, "learning_rate": 0.00048403448071134887, "loss": 3.2627, "step": 17739 }, { "epoch": 0.87, "grad_norm": 0.530827522277832, "learning_rate": 0.000484022321560016, "loss": 3.1058, "step": 17740 }, { "epoch": 0.87, "grad_norm": 0.5042960047721863, "learning_rate": 0.0004840101619240036, "loss": 3.2674, "step": 17741 }, { "epoch": 0.87, "grad_norm": 0.5273114442825317, "learning_rate": 0.00048399800180334396, "loss": 3.2993, "step": 17742 }, { "epoch": 0.87, "grad_norm": 0.5568901300430298, "learning_rate": 0.0004839858411980689, "loss": 3.0452, "step": 17743 }, { "epoch": 0.87, "grad_norm": 0.5302985906600952, "learning_rate": 0.0004839736801082106, "loss": 3.0976, "step": 17744 }, { "epoch": 0.87, "grad_norm": 0.5017320513725281, "learning_rate": 0.00048396151853380106, "loss": 3.0252, "step": 17745 }, { "epoch": 0.87, "grad_norm": 0.5245184302330017, "learning_rate": 0.00048394935647487226, "loss": 2.9773, "step": 17746 }, { "epoch": 0.87, "grad_norm": 0.5124706029891968, "learning_rate": 0.00048393719393145617, "loss": 3.0761, "step": 17747 }, { "epoch": 0.87, "grad_norm": 0.5464129447937012, "learning_rate": 0.000483925030903585, "loss": 3.1286, "step": 17748 }, { "epoch": 0.87, "grad_norm": 0.5206067562103271, "learning_rate": 0.0004839128673912907, "loss": 3.2381, "step": 17749 }, { "epoch": 0.87, "grad_norm": 0.5206215381622314, "learning_rate": 0.00048390070339460526, "loss": 3.0297, "step": 17750 }, { "epoch": 0.87, "grad_norm": 0.5299220085144043, "learning_rate": 0.0004838885389135608, "loss": 3.2162, "step": 17751 }, { "epoch": 0.87, "grad_norm": 0.5235497355461121, "learning_rate": 0.00048387637394818925, "loss": 3.0847, "step": 17752 }, { "epoch": 0.87, "grad_norm": 0.5170539021492004, "learning_rate": 0.0004838642084985228, "loss": 3.1975, "step": 17753 }, { "epoch": 0.87, "grad_norm": 0.5105252265930176, "learning_rate": 0.00048385204256459334, "loss": 3.2036, "step": 17754 }, { "epoch": 0.87, "grad_norm": 0.5179150700569153, "learning_rate": 0.00048383987614643303, "loss": 3.1076, "step": 17755 }, { "epoch": 0.87, "grad_norm": 0.5759047865867615, "learning_rate": 0.0004838277092440739, "loss": 3.0669, "step": 17756 }, { "epoch": 0.87, "grad_norm": 0.4824429452419281, "learning_rate": 0.0004838155418575479, "loss": 3.1084, "step": 17757 }, { "epoch": 0.87, "grad_norm": 0.5309851765632629, "learning_rate": 0.00048380337398688713, "loss": 3.0484, "step": 17758 }, { "epoch": 0.87, "grad_norm": 0.5082951188087463, "learning_rate": 0.00048379120563212365, "loss": 3.0945, "step": 17759 }, { "epoch": 0.87, "grad_norm": 0.5423614382743835, "learning_rate": 0.0004837790367932896, "loss": 3.0284, "step": 17760 }, { "epoch": 0.87, "grad_norm": 0.503147304058075, "learning_rate": 0.00048376686747041684, "loss": 3.1037, "step": 17761 }, { "epoch": 0.87, "grad_norm": 0.5271511077880859, "learning_rate": 0.00048375469766353754, "loss": 3.2318, "step": 17762 }, { "epoch": 0.87, "grad_norm": 0.5349052548408508, "learning_rate": 0.0004837425273726838, "loss": 3.2816, "step": 17763 }, { "epoch": 0.87, "grad_norm": 0.5517288446426392, "learning_rate": 0.0004837303565978875, "loss": 3.2532, "step": 17764 }, { "epoch": 0.87, "grad_norm": 0.5327991843223572, "learning_rate": 0.00048371818533918075, "loss": 2.9928, "step": 17765 }, { "epoch": 0.87, "grad_norm": 0.5061706304550171, "learning_rate": 0.0004837060135965958, "loss": 3.2314, "step": 17766 }, { "epoch": 0.87, "grad_norm": 0.5411424040794373, "learning_rate": 0.00048369384137016456, "loss": 3.1747, "step": 17767 }, { "epoch": 0.87, "grad_norm": 0.4953779876232147, "learning_rate": 0.000483681668659919, "loss": 3.2839, "step": 17768 }, { "epoch": 0.87, "grad_norm": 0.5827047824859619, "learning_rate": 0.0004836694954658913, "loss": 3.2348, "step": 17769 }, { "epoch": 0.87, "grad_norm": 0.5480215549468994, "learning_rate": 0.00048365732178811354, "loss": 3.1482, "step": 17770 }, { "epoch": 0.87, "grad_norm": 0.4860683083534241, "learning_rate": 0.00048364514762661774, "loss": 3.1271, "step": 17771 }, { "epoch": 0.87, "grad_norm": 0.552074134349823, "learning_rate": 0.0004836329729814359, "loss": 3.3587, "step": 17772 }, { "epoch": 0.87, "grad_norm": 0.4946533441543579, "learning_rate": 0.00048362079785260027, "loss": 3.0403, "step": 17773 }, { "epoch": 0.87, "grad_norm": 0.5436502695083618, "learning_rate": 0.00048360862224014267, "loss": 3.1044, "step": 17774 }, { "epoch": 0.87, "grad_norm": 0.5207213163375854, "learning_rate": 0.00048359644614409534, "loss": 3.1488, "step": 17775 }, { "epoch": 0.87, "grad_norm": 0.5504875779151917, "learning_rate": 0.00048358426956449026, "loss": 3.2646, "step": 17776 }, { "epoch": 0.87, "grad_norm": 0.517497718334198, "learning_rate": 0.00048357209250135964, "loss": 3.1974, "step": 17777 }, { "epoch": 0.87, "grad_norm": 0.5165881514549255, "learning_rate": 0.00048355991495473545, "loss": 2.9446, "step": 17778 }, { "epoch": 0.87, "grad_norm": 0.5115556716918945, "learning_rate": 0.0004835477369246497, "loss": 3.0173, "step": 17779 }, { "epoch": 0.87, "grad_norm": 0.5526302456855774, "learning_rate": 0.00048353555841113455, "loss": 3.1211, "step": 17780 }, { "epoch": 0.87, "grad_norm": 0.5087662935256958, "learning_rate": 0.00048352337941422207, "loss": 3.1776, "step": 17781 }, { "epoch": 0.87, "grad_norm": 0.5549590587615967, "learning_rate": 0.0004835111999339444, "loss": 3.0331, "step": 17782 }, { "epoch": 0.87, "grad_norm": 0.5352929830551147, "learning_rate": 0.00048349901997033347, "loss": 3.0673, "step": 17783 }, { "epoch": 0.87, "grad_norm": 0.5016866326332092, "learning_rate": 0.00048348683952342136, "loss": 3.2462, "step": 17784 }, { "epoch": 0.87, "grad_norm": 0.4875168800354004, "learning_rate": 0.0004834746585932404, "loss": 3.2957, "step": 17785 }, { "epoch": 0.87, "grad_norm": 0.5208585858345032, "learning_rate": 0.0004834624771798224, "loss": 3.3014, "step": 17786 }, { "epoch": 0.87, "grad_norm": 0.5304600596427917, "learning_rate": 0.00048345029528319954, "loss": 3.0373, "step": 17787 }, { "epoch": 0.87, "grad_norm": 0.5236985683441162, "learning_rate": 0.00048343811290340395, "loss": 3.3229, "step": 17788 }, { "epoch": 0.87, "grad_norm": 0.5062572360038757, "learning_rate": 0.0004834259300404676, "loss": 3.0878, "step": 17789 }, { "epoch": 0.87, "grad_norm": 0.5024277567863464, "learning_rate": 0.00048341374669442274, "loss": 3.1605, "step": 17790 }, { "epoch": 0.87, "grad_norm": 0.5190656781196594, "learning_rate": 0.0004834015628653013, "loss": 3.1117, "step": 17791 }, { "epoch": 0.87, "grad_norm": 0.514510452747345, "learning_rate": 0.0004833893785531355, "loss": 2.8972, "step": 17792 }, { "epoch": 0.87, "grad_norm": 0.4951634705066681, "learning_rate": 0.0004833771937579574, "loss": 3.242, "step": 17793 }, { "epoch": 0.87, "grad_norm": 0.5369077324867249, "learning_rate": 0.000483365008479799, "loss": 3.4729, "step": 17794 }, { "epoch": 0.87, "grad_norm": 0.5123760104179382, "learning_rate": 0.0004833528227186925, "loss": 2.9924, "step": 17795 }, { "epoch": 0.87, "grad_norm": 0.5074111223220825, "learning_rate": 0.00048334063647466986, "loss": 3.329, "step": 17796 }, { "epoch": 0.87, "grad_norm": 0.5059533715248108, "learning_rate": 0.0004833284497477634, "loss": 3.0242, "step": 17797 }, { "epoch": 0.87, "grad_norm": 0.5089158415794373, "learning_rate": 0.0004833162625380049, "loss": 3.2417, "step": 17798 }, { "epoch": 0.87, "grad_norm": 0.5231832265853882, "learning_rate": 0.0004833040748454268, "loss": 3.1011, "step": 17799 }, { "epoch": 0.87, "grad_norm": 0.5030272006988525, "learning_rate": 0.00048329188667006095, "loss": 3.251, "step": 17800 }, { "epoch": 0.87, "grad_norm": 0.516279399394989, "learning_rate": 0.0004832796980119396, "loss": 2.9739, "step": 17801 }, { "epoch": 0.87, "grad_norm": 0.4982314705848694, "learning_rate": 0.0004832675088710948, "loss": 3.1481, "step": 17802 }, { "epoch": 0.87, "grad_norm": 0.5156276822090149, "learning_rate": 0.00048325531924755865, "loss": 3.2038, "step": 17803 }, { "epoch": 0.87, "grad_norm": 0.5175352692604065, "learning_rate": 0.0004832431291413633, "loss": 3.0606, "step": 17804 }, { "epoch": 0.87, "grad_norm": 0.5309943556785583, "learning_rate": 0.0004832309385525407, "loss": 3.1735, "step": 17805 }, { "epoch": 0.87, "grad_norm": 0.5159737467765808, "learning_rate": 0.00048321874748112316, "loss": 2.9259, "step": 17806 }, { "epoch": 0.87, "grad_norm": 0.5461140275001526, "learning_rate": 0.00048320655592714267, "loss": 3.0094, "step": 17807 }, { "epoch": 0.87, "grad_norm": 0.5539809465408325, "learning_rate": 0.00048319436389063144, "loss": 3.1468, "step": 17808 }, { "epoch": 0.87, "grad_norm": 0.5527653694152832, "learning_rate": 0.00048318217137162145, "loss": 3.0324, "step": 17809 }, { "epoch": 0.87, "grad_norm": 0.5274596810340881, "learning_rate": 0.00048316997837014486, "loss": 3.2017, "step": 17810 }, { "epoch": 0.87, "grad_norm": 0.5476181507110596, "learning_rate": 0.00048315778488623376, "loss": 3.2789, "step": 17811 }, { "epoch": 0.87, "grad_norm": 0.5081568956375122, "learning_rate": 0.0004831455909199204, "loss": 3.1682, "step": 17812 }, { "epoch": 0.87, "grad_norm": 0.5192145109176636, "learning_rate": 0.00048313339647123677, "loss": 3.3376, "step": 17813 }, { "epoch": 0.87, "grad_norm": 0.5384969115257263, "learning_rate": 0.00048312120154021495, "loss": 3.1973, "step": 17814 }, { "epoch": 0.87, "grad_norm": 0.543453574180603, "learning_rate": 0.00048310900612688726, "loss": 3.2764, "step": 17815 }, { "epoch": 0.87, "grad_norm": 0.53953617811203, "learning_rate": 0.00048309681023128557, "loss": 2.9113, "step": 17816 }, { "epoch": 0.87, "grad_norm": 0.5175008177757263, "learning_rate": 0.00048308461385344214, "loss": 3.3165, "step": 17817 }, { "epoch": 0.87, "grad_norm": 0.5362492799758911, "learning_rate": 0.0004830724169933891, "loss": 3.1817, "step": 17818 }, { "epoch": 0.87, "grad_norm": 0.5375868082046509, "learning_rate": 0.0004830602196511586, "loss": 3.4691, "step": 17819 }, { "epoch": 0.87, "grad_norm": 0.486717164516449, "learning_rate": 0.0004830480218267826, "loss": 3.0631, "step": 17820 }, { "epoch": 0.87, "grad_norm": 0.5145992040634155, "learning_rate": 0.00048303582352029345, "loss": 3.1301, "step": 17821 }, { "epoch": 0.87, "grad_norm": 0.5075737833976746, "learning_rate": 0.00048302362473172307, "loss": 3.2556, "step": 17822 }, { "epoch": 0.87, "grad_norm": 0.5056702494621277, "learning_rate": 0.0004830114254611037, "loss": 3.0136, "step": 17823 }, { "epoch": 0.87, "grad_norm": 0.5289957523345947, "learning_rate": 0.00048299922570846756, "loss": 3.0611, "step": 17824 }, { "epoch": 0.87, "grad_norm": 0.5203523635864258, "learning_rate": 0.00048298702547384655, "loss": 3.1454, "step": 17825 }, { "epoch": 0.87, "grad_norm": 0.5633178949356079, "learning_rate": 0.00048297482475727295, "loss": 2.9976, "step": 17826 }, { "epoch": 0.87, "grad_norm": 0.5474057197570801, "learning_rate": 0.00048296262355877897, "loss": 3.1389, "step": 17827 }, { "epoch": 0.87, "grad_norm": 0.5257229208946228, "learning_rate": 0.0004829504218783966, "loss": 3.1458, "step": 17828 }, { "epoch": 0.87, "grad_norm": 0.519266664981842, "learning_rate": 0.000482938219716158, "loss": 3.0224, "step": 17829 }, { "epoch": 0.87, "grad_norm": 0.5056769251823425, "learning_rate": 0.0004829260170720953, "loss": 3.2842, "step": 17830 }, { "epoch": 0.87, "grad_norm": 0.5659570693969727, "learning_rate": 0.0004829138139462408, "loss": 2.9025, "step": 17831 }, { "epoch": 0.87, "grad_norm": 0.4957529306411743, "learning_rate": 0.00048290161033862636, "loss": 3.181, "step": 17832 }, { "epoch": 0.87, "grad_norm": 0.5560140609741211, "learning_rate": 0.0004828894062492844, "loss": 3.1902, "step": 17833 }, { "epoch": 0.87, "grad_norm": 0.5515144467353821, "learning_rate": 0.00048287720167824696, "loss": 2.9447, "step": 17834 }, { "epoch": 0.87, "grad_norm": 0.5933049917221069, "learning_rate": 0.00048286499662554604, "loss": 3.0005, "step": 17835 }, { "epoch": 0.87, "grad_norm": 0.5094730854034424, "learning_rate": 0.000482852791091214, "loss": 3.3128, "step": 17836 }, { "epoch": 0.87, "grad_norm": 0.4971897304058075, "learning_rate": 0.0004828405850752829, "loss": 3.2185, "step": 17837 }, { "epoch": 0.87, "grad_norm": 0.5200973749160767, "learning_rate": 0.0004828283785777848, "loss": 3.1677, "step": 17838 }, { "epoch": 0.87, "grad_norm": 0.5161073207855225, "learning_rate": 0.00048281617159875203, "loss": 3.3305, "step": 17839 }, { "epoch": 0.87, "grad_norm": 0.5352292656898499, "learning_rate": 0.0004828039641382167, "loss": 3.2175, "step": 17840 }, { "epoch": 0.87, "grad_norm": 0.49615222215652466, "learning_rate": 0.00048279175619621073, "loss": 3.2629, "step": 17841 }, { "epoch": 0.87, "grad_norm": 0.5312261581420898, "learning_rate": 0.0004827795477727666, "loss": 3.0147, "step": 17842 }, { "epoch": 0.87, "grad_norm": 0.5360621809959412, "learning_rate": 0.0004827673388679163, "loss": 3.0288, "step": 17843 }, { "epoch": 0.87, "grad_norm": 0.5203635096549988, "learning_rate": 0.000482755129481692, "loss": 3.1102, "step": 17844 }, { "epoch": 0.87, "grad_norm": 0.522244930267334, "learning_rate": 0.0004827429196141259, "loss": 3.4478, "step": 17845 }, { "epoch": 0.87, "grad_norm": 0.5168775916099548, "learning_rate": 0.00048273070926525, "loss": 3.2777, "step": 17846 }, { "epoch": 0.87, "grad_norm": 0.5495973229408264, "learning_rate": 0.0004827184984350966, "loss": 3.2903, "step": 17847 }, { "epoch": 0.87, "grad_norm": 0.537128746509552, "learning_rate": 0.0004827062871236979, "loss": 3.3552, "step": 17848 }, { "epoch": 0.87, "grad_norm": 0.5331090092658997, "learning_rate": 0.00048269407533108597, "loss": 3.1168, "step": 17849 }, { "epoch": 0.87, "grad_norm": 0.4784494936466217, "learning_rate": 0.00048268186305729305, "loss": 3.2552, "step": 17850 }, { "epoch": 0.87, "grad_norm": 0.5230741500854492, "learning_rate": 0.00048266965030235116, "loss": 3.164, "step": 17851 }, { "epoch": 0.87, "grad_norm": 0.4925912320613861, "learning_rate": 0.0004826574370662927, "loss": 3.0437, "step": 17852 }, { "epoch": 0.87, "grad_norm": 0.48744553327560425, "learning_rate": 0.00048264522334914964, "loss": 3.0296, "step": 17853 }, { "epoch": 0.87, "grad_norm": 0.4946111738681793, "learning_rate": 0.0004826330091509542, "loss": 2.9261, "step": 17854 }, { "epoch": 0.88, "grad_norm": 0.523500919342041, "learning_rate": 0.0004826207944717386, "loss": 3.0822, "step": 17855 }, { "epoch": 0.88, "grad_norm": 0.5203326344490051, "learning_rate": 0.00048260857931153487, "loss": 3.2367, "step": 17856 }, { "epoch": 0.88, "grad_norm": 0.4974472224712372, "learning_rate": 0.00048259636367037535, "loss": 3.3098, "step": 17857 }, { "epoch": 0.88, "grad_norm": 0.4856162667274475, "learning_rate": 0.00048258414754829226, "loss": 3.4445, "step": 17858 }, { "epoch": 0.88, "grad_norm": 0.5150359869003296, "learning_rate": 0.0004825719309453175, "loss": 3.2439, "step": 17859 }, { "epoch": 0.88, "grad_norm": 0.5459046959877014, "learning_rate": 0.00048255971386148346, "loss": 3.2596, "step": 17860 }, { "epoch": 0.88, "grad_norm": 0.515584409236908, "learning_rate": 0.0004825474962968223, "loss": 3.2124, "step": 17861 }, { "epoch": 0.88, "grad_norm": 0.4975724518299103, "learning_rate": 0.00048253527825136615, "loss": 3.0945, "step": 17862 }, { "epoch": 0.88, "grad_norm": 0.5150234699249268, "learning_rate": 0.00048252305972514725, "loss": 2.9796, "step": 17863 }, { "epoch": 0.88, "grad_norm": 0.5973613262176514, "learning_rate": 0.0004825108407181977, "loss": 3.1476, "step": 17864 }, { "epoch": 0.88, "grad_norm": 0.514706015586853, "learning_rate": 0.0004824986212305497, "loss": 2.8594, "step": 17865 }, { "epoch": 0.88, "grad_norm": 0.5519055128097534, "learning_rate": 0.0004824864012622355, "loss": 3.0926, "step": 17866 }, { "epoch": 0.88, "grad_norm": 0.5024924278259277, "learning_rate": 0.00048247418081328724, "loss": 3.3669, "step": 17867 }, { "epoch": 0.88, "grad_norm": 0.5211602449417114, "learning_rate": 0.000482461959883737, "loss": 3.0907, "step": 17868 }, { "epoch": 0.88, "grad_norm": 0.49125707149505615, "learning_rate": 0.0004824497384736171, "loss": 3.233, "step": 17869 }, { "epoch": 0.88, "grad_norm": 0.5268430709838867, "learning_rate": 0.00048243751658295984, "loss": 2.996, "step": 17870 }, { "epoch": 0.88, "grad_norm": 0.5411224961280823, "learning_rate": 0.00048242529421179715, "loss": 3.2701, "step": 17871 }, { "epoch": 0.88, "grad_norm": 0.534550666809082, "learning_rate": 0.00048241307136016133, "loss": 3.0247, "step": 17872 }, { "epoch": 0.88, "grad_norm": 0.5058745741844177, "learning_rate": 0.0004824008480280847, "loss": 3.3005, "step": 17873 }, { "epoch": 0.88, "grad_norm": 0.5441546440124512, "learning_rate": 0.00048238862421559923, "loss": 3.0935, "step": 17874 }, { "epoch": 0.88, "grad_norm": 0.5148599147796631, "learning_rate": 0.0004823763999227373, "loss": 3.2805, "step": 17875 }, { "epoch": 0.88, "grad_norm": 0.5039719939231873, "learning_rate": 0.00048236417514953094, "loss": 3.2335, "step": 17876 }, { "epoch": 0.88, "grad_norm": 0.5283924341201782, "learning_rate": 0.0004823519498960125, "loss": 3.0868, "step": 17877 }, { "epoch": 0.88, "grad_norm": 0.5511729717254639, "learning_rate": 0.00048233972416221417, "loss": 3.0867, "step": 17878 }, { "epoch": 0.88, "grad_norm": 0.5116291642189026, "learning_rate": 0.00048232749794816806, "loss": 3.2757, "step": 17879 }, { "epoch": 0.88, "grad_norm": 0.5327066779136658, "learning_rate": 0.00048231527125390636, "loss": 3.0075, "step": 17880 }, { "epoch": 0.88, "grad_norm": 0.5154629945755005, "learning_rate": 0.00048230304407946144, "loss": 3.2752, "step": 17881 }, { "epoch": 0.88, "grad_norm": 0.5452733635902405, "learning_rate": 0.00048229081642486523, "loss": 3.2965, "step": 17882 }, { "epoch": 0.88, "grad_norm": 0.48976922035217285, "learning_rate": 0.0004822785882901502, "loss": 3.1155, "step": 17883 }, { "epoch": 0.88, "grad_norm": 0.5699204206466675, "learning_rate": 0.00048226635967534834, "loss": 3.2289, "step": 17884 }, { "epoch": 0.88, "grad_norm": 0.5284189581871033, "learning_rate": 0.0004822541305804921, "loss": 3.3774, "step": 17885 }, { "epoch": 0.88, "grad_norm": 0.48988762497901917, "learning_rate": 0.00048224190100561355, "loss": 3.0301, "step": 17886 }, { "epoch": 0.88, "grad_norm": 0.496111124753952, "learning_rate": 0.00048222967095074476, "loss": 2.9562, "step": 17887 }, { "epoch": 0.88, "grad_norm": 0.510610044002533, "learning_rate": 0.0004822174404159182, "loss": 3.4054, "step": 17888 }, { "epoch": 0.88, "grad_norm": 0.5512583255767822, "learning_rate": 0.00048220520940116593, "loss": 3.1477, "step": 17889 }, { "epoch": 0.88, "grad_norm": 0.4915429651737213, "learning_rate": 0.00048219297790652024, "loss": 3.1844, "step": 17890 }, { "epoch": 0.88, "grad_norm": 0.5592296719551086, "learning_rate": 0.0004821807459320134, "loss": 3.2548, "step": 17891 }, { "epoch": 0.88, "grad_norm": 0.49113866686820984, "learning_rate": 0.0004821685134776773, "loss": 3.1582, "step": 17892 }, { "epoch": 0.88, "grad_norm": 0.5161522626876831, "learning_rate": 0.0004821562805435446, "loss": 3.1451, "step": 17893 }, { "epoch": 0.88, "grad_norm": 0.5243754982948303, "learning_rate": 0.00048214404712964713, "loss": 3.3366, "step": 17894 }, { "epoch": 0.88, "grad_norm": 0.5705099701881409, "learning_rate": 0.00048213181323601754, "loss": 3.019, "step": 17895 }, { "epoch": 0.88, "grad_norm": 0.48698729276657104, "learning_rate": 0.00048211957886268764, "loss": 2.9851, "step": 17896 }, { "epoch": 0.88, "grad_norm": 0.5281487703323364, "learning_rate": 0.0004821073440096898, "loss": 3.1592, "step": 17897 }, { "epoch": 0.88, "grad_norm": 0.524651288986206, "learning_rate": 0.0004820951086770563, "loss": 3.1948, "step": 17898 }, { "epoch": 0.88, "grad_norm": 0.5448842644691467, "learning_rate": 0.0004820828728648194, "loss": 3.1414, "step": 17899 }, { "epoch": 0.88, "grad_norm": 0.5146068930625916, "learning_rate": 0.0004820706365730112, "loss": 3.0062, "step": 17900 }, { "epoch": 0.88, "grad_norm": 0.49320217967033386, "learning_rate": 0.000482058399801664, "loss": 3.3239, "step": 17901 }, { "epoch": 0.88, "grad_norm": 0.5205990076065063, "learning_rate": 0.00048204616255080997, "loss": 3.2077, "step": 17902 }, { "epoch": 0.88, "grad_norm": 0.5180717706680298, "learning_rate": 0.00048203392482048136, "loss": 3.2312, "step": 17903 }, { "epoch": 0.88, "grad_norm": 0.5263290405273438, "learning_rate": 0.0004820216866107105, "loss": 3.2392, "step": 17904 }, { "epoch": 0.88, "grad_norm": 0.5504626631736755, "learning_rate": 0.00048200944792152955, "loss": 3.0838, "step": 17905 }, { "epoch": 0.88, "grad_norm": 0.5280824303627014, "learning_rate": 0.0004819972087529707, "loss": 3.2164, "step": 17906 }, { "epoch": 0.88, "grad_norm": 0.5233747959136963, "learning_rate": 0.00048198496910506624, "loss": 3.1294, "step": 17907 }, { "epoch": 0.88, "grad_norm": 0.5285931825637817, "learning_rate": 0.00048197272897784835, "loss": 3.0499, "step": 17908 }, { "epoch": 0.88, "grad_norm": 0.5670228600502014, "learning_rate": 0.0004819604883713494, "loss": 3.1822, "step": 17909 }, { "epoch": 0.88, "grad_norm": 0.5432270765304565, "learning_rate": 0.0004819482472856015, "loss": 3.041, "step": 17910 }, { "epoch": 0.88, "grad_norm": 0.5179724097251892, "learning_rate": 0.0004819360057206369, "loss": 3.327, "step": 17911 }, { "epoch": 0.88, "grad_norm": 0.5055398344993591, "learning_rate": 0.0004819237636764879, "loss": 3.2491, "step": 17912 }, { "epoch": 0.88, "grad_norm": 0.531039834022522, "learning_rate": 0.0004819115211531867, "loss": 3.1858, "step": 17913 }, { "epoch": 0.88, "grad_norm": 0.5148627161979675, "learning_rate": 0.00048189927815076565, "loss": 3.1547, "step": 17914 }, { "epoch": 0.88, "grad_norm": 0.5544278621673584, "learning_rate": 0.0004818870346692569, "loss": 3.2301, "step": 17915 }, { "epoch": 0.88, "grad_norm": 0.564547598361969, "learning_rate": 0.00048187479070869267, "loss": 3.0203, "step": 17916 }, { "epoch": 0.88, "grad_norm": 0.5364139080047607, "learning_rate": 0.0004818625462691052, "loss": 2.8439, "step": 17917 }, { "epoch": 0.88, "grad_norm": 0.5530861020088196, "learning_rate": 0.00048185030135052676, "loss": 3.1894, "step": 17918 }, { "epoch": 0.88, "grad_norm": 0.5109603404998779, "learning_rate": 0.00048183805595298975, "loss": 3.2271, "step": 17919 }, { "epoch": 0.88, "grad_norm": 0.49450230598449707, "learning_rate": 0.0004818258100765262, "loss": 3.1592, "step": 17920 }, { "epoch": 0.88, "grad_norm": 0.5180266499519348, "learning_rate": 0.0004818135637211685, "loss": 3.0048, "step": 17921 }, { "epoch": 0.88, "grad_norm": 0.5050731301307678, "learning_rate": 0.00048180131688694883, "loss": 3.2961, "step": 17922 }, { "epoch": 0.88, "grad_norm": 0.5055239200592041, "learning_rate": 0.0004817890695738994, "loss": 3.1692, "step": 17923 }, { "epoch": 0.88, "grad_norm": 0.5030365586280823, "learning_rate": 0.00048177682178205273, "loss": 3.1633, "step": 17924 }, { "epoch": 0.88, "grad_norm": 0.6658396124839783, "learning_rate": 0.00048176457351144084, "loss": 3.2588, "step": 17925 }, { "epoch": 0.88, "grad_norm": 0.5380383133888245, "learning_rate": 0.000481752324762096, "loss": 3.2116, "step": 17926 }, { "epoch": 0.88, "grad_norm": 0.507267951965332, "learning_rate": 0.00048174007553405056, "loss": 3.279, "step": 17927 }, { "epoch": 0.88, "grad_norm": 0.5685015320777893, "learning_rate": 0.0004817278258273366, "loss": 3.0932, "step": 17928 }, { "epoch": 0.88, "grad_norm": 0.5036908984184265, "learning_rate": 0.0004817155756419866, "loss": 3.1301, "step": 17929 }, { "epoch": 0.88, "grad_norm": 0.516451895236969, "learning_rate": 0.0004817033249780328, "loss": 2.9769, "step": 17930 }, { "epoch": 0.88, "grad_norm": 0.5260326862335205, "learning_rate": 0.00048169107383550744, "loss": 3.3973, "step": 17931 }, { "epoch": 0.88, "grad_norm": 0.5365659594535828, "learning_rate": 0.0004816788222144427, "loss": 3.1838, "step": 17932 }, { "epoch": 0.88, "grad_norm": 0.5538583397865295, "learning_rate": 0.0004816665701148709, "loss": 3.1503, "step": 17933 }, { "epoch": 0.88, "grad_norm": 0.5433288812637329, "learning_rate": 0.00048165431753682434, "loss": 3.3311, "step": 17934 }, { "epoch": 0.88, "grad_norm": 0.5256428718566895, "learning_rate": 0.0004816420644803352, "loss": 3.1285, "step": 17935 }, { "epoch": 0.88, "grad_norm": 0.5579695701599121, "learning_rate": 0.0004816298109454359, "loss": 3.0131, "step": 17936 }, { "epoch": 0.88, "grad_norm": 0.49737823009490967, "learning_rate": 0.00048161755693215864, "loss": 3.287, "step": 17937 }, { "epoch": 0.88, "grad_norm": 0.5029211640357971, "learning_rate": 0.00048160530244053564, "loss": 3.2576, "step": 17938 }, { "epoch": 0.88, "grad_norm": 0.5466650724411011, "learning_rate": 0.0004815930474705992, "loss": 2.9216, "step": 17939 }, { "epoch": 0.88, "grad_norm": 0.5182828307151794, "learning_rate": 0.0004815807920223816, "loss": 3.2743, "step": 17940 }, { "epoch": 0.88, "grad_norm": 0.5201794505119324, "learning_rate": 0.00048156853609591525, "loss": 3.2135, "step": 17941 }, { "epoch": 0.88, "grad_norm": 0.5152579545974731, "learning_rate": 0.0004815562796912323, "loss": 3.1489, "step": 17942 }, { "epoch": 0.88, "grad_norm": 0.5244562029838562, "learning_rate": 0.00048154402280836504, "loss": 3.016, "step": 17943 }, { "epoch": 0.88, "grad_norm": 0.5533185601234436, "learning_rate": 0.00048153176544734575, "loss": 3.2095, "step": 17944 }, { "epoch": 0.88, "grad_norm": 0.5288299918174744, "learning_rate": 0.0004815195076082067, "loss": 3.0927, "step": 17945 }, { "epoch": 0.88, "grad_norm": 0.5070801377296448, "learning_rate": 0.00048150724929098027, "loss": 3.1879, "step": 17946 }, { "epoch": 0.88, "grad_norm": 0.5345847606658936, "learning_rate": 0.0004814949904956986, "loss": 3.2283, "step": 17947 }, { "epoch": 0.88, "grad_norm": 0.5151923298835754, "learning_rate": 0.0004814827312223941, "loss": 3.1727, "step": 17948 }, { "epoch": 0.88, "grad_norm": 0.5357820987701416, "learning_rate": 0.000481470471471099, "loss": 3.2602, "step": 17949 }, { "epoch": 0.88, "grad_norm": 0.4863514304161072, "learning_rate": 0.00048145821124184556, "loss": 3.1904, "step": 17950 }, { "epoch": 0.88, "grad_norm": 0.5032357573509216, "learning_rate": 0.00048144595053466616, "loss": 3.1701, "step": 17951 }, { "epoch": 0.88, "grad_norm": 0.5471490025520325, "learning_rate": 0.00048143368934959306, "loss": 3.2434, "step": 17952 }, { "epoch": 0.88, "grad_norm": 0.4980376362800598, "learning_rate": 0.00048142142768665844, "loss": 3.0547, "step": 17953 }, { "epoch": 0.88, "grad_norm": 0.4976464509963989, "learning_rate": 0.0004814091655458947, "loss": 3.0471, "step": 17954 }, { "epoch": 0.88, "grad_norm": 0.5372846722602844, "learning_rate": 0.0004813969029273343, "loss": 3.2855, "step": 17955 }, { "epoch": 0.88, "grad_norm": 0.5928505659103394, "learning_rate": 0.00048138463983100926, "loss": 3.3892, "step": 17956 }, { "epoch": 0.88, "grad_norm": 0.5085715055465698, "learning_rate": 0.00048137237625695207, "loss": 3.1189, "step": 17957 }, { "epoch": 0.88, "grad_norm": 0.5404731631278992, "learning_rate": 0.00048136011220519486, "loss": 3.259, "step": 17958 }, { "epoch": 0.88, "grad_norm": 0.5489948987960815, "learning_rate": 0.00048134784767577, "loss": 3.0618, "step": 17959 }, { "epoch": 0.88, "grad_norm": 0.5055735111236572, "learning_rate": 0.0004813355826687099, "loss": 3.149, "step": 17960 }, { "epoch": 0.88, "grad_norm": 0.5085407495498657, "learning_rate": 0.00048132331718404663, "loss": 3.2623, "step": 17961 }, { "epoch": 0.88, "grad_norm": 0.509974479675293, "learning_rate": 0.0004813110512218127, "loss": 3.0828, "step": 17962 }, { "epoch": 0.88, "grad_norm": 0.5331966876983643, "learning_rate": 0.00048129878478204047, "loss": 3.1375, "step": 17963 }, { "epoch": 0.88, "grad_norm": 0.4968498945236206, "learning_rate": 0.000481286517864762, "loss": 3.2832, "step": 17964 }, { "epoch": 0.88, "grad_norm": 0.5317128896713257, "learning_rate": 0.0004812742504700097, "loss": 3.1265, "step": 17965 }, { "epoch": 0.88, "grad_norm": 0.5396853685379028, "learning_rate": 0.000481261982597816, "loss": 3.3518, "step": 17966 }, { "epoch": 0.88, "grad_norm": 0.5501961708068848, "learning_rate": 0.00048124971424821315, "loss": 3.2126, "step": 17967 }, { "epoch": 0.88, "grad_norm": 0.5294440388679504, "learning_rate": 0.00048123744542123345, "loss": 3.1612, "step": 17968 }, { "epoch": 0.88, "grad_norm": 0.5000289678573608, "learning_rate": 0.0004812251761169091, "loss": 3.4135, "step": 17969 }, { "epoch": 0.88, "grad_norm": 0.49727967381477356, "learning_rate": 0.00048121290633527247, "loss": 2.9709, "step": 17970 }, { "epoch": 0.88, "grad_norm": 0.5257933735847473, "learning_rate": 0.0004812006360763561, "loss": 3.5203, "step": 17971 }, { "epoch": 0.88, "grad_norm": 0.5392364263534546, "learning_rate": 0.000481188365340192, "loss": 3.163, "step": 17972 }, { "epoch": 0.88, "grad_norm": 0.5661436319351196, "learning_rate": 0.0004811760941268127, "loss": 3.0577, "step": 17973 }, { "epoch": 0.88, "grad_norm": 0.5415921211242676, "learning_rate": 0.0004811638224362503, "loss": 2.9468, "step": 17974 }, { "epoch": 0.88, "grad_norm": 0.5405935645103455, "learning_rate": 0.0004811515502685374, "loss": 3.1172, "step": 17975 }, { "epoch": 0.88, "grad_norm": 0.5455482006072998, "learning_rate": 0.00048113927762370614, "loss": 3.1385, "step": 17976 }, { "epoch": 0.88, "grad_norm": 0.5725964307785034, "learning_rate": 0.00048112700450178884, "loss": 3.0701, "step": 17977 }, { "epoch": 0.88, "grad_norm": 0.5180301070213318, "learning_rate": 0.00048111473090281797, "loss": 3.0, "step": 17978 }, { "epoch": 0.88, "grad_norm": 0.5263218879699707, "learning_rate": 0.00048110245682682557, "loss": 3.2949, "step": 17979 }, { "epoch": 0.88, "grad_norm": 0.5030066967010498, "learning_rate": 0.00048109018227384434, "loss": 3.3081, "step": 17980 }, { "epoch": 0.88, "grad_norm": 0.5428774952888489, "learning_rate": 0.0004810779072439063, "loss": 3.2514, "step": 17981 }, { "epoch": 0.88, "grad_norm": 0.5697459578514099, "learning_rate": 0.0004810656317370439, "loss": 3.3227, "step": 17982 }, { "epoch": 0.88, "grad_norm": 0.522610068321228, "learning_rate": 0.00048105335575328957, "loss": 3.135, "step": 17983 }, { "epoch": 0.88, "grad_norm": 0.529964804649353, "learning_rate": 0.0004810410792926755, "loss": 3.2755, "step": 17984 }, { "epoch": 0.88, "grad_norm": 0.5211076736450195, "learning_rate": 0.00048102880235523405, "loss": 3.3943, "step": 17985 }, { "epoch": 0.88, "grad_norm": 0.5373325347900391, "learning_rate": 0.0004810165249409976, "loss": 3.0734, "step": 17986 }, { "epoch": 0.88, "grad_norm": 0.537948727607727, "learning_rate": 0.00048100424704999845, "loss": 3.2213, "step": 17987 }, { "epoch": 0.88, "grad_norm": 0.49827033281326294, "learning_rate": 0.00048099196868226895, "loss": 3.0853, "step": 17988 }, { "epoch": 0.88, "grad_norm": 0.6188706755638123, "learning_rate": 0.0004809796898378414, "loss": 3.1362, "step": 17989 }, { "epoch": 0.88, "grad_norm": 0.5543064475059509, "learning_rate": 0.00048096741051674826, "loss": 3.0382, "step": 17990 }, { "epoch": 0.88, "grad_norm": 0.49779966473579407, "learning_rate": 0.00048095513071902174, "loss": 3.1029, "step": 17991 }, { "epoch": 0.88, "grad_norm": 0.5499799251556396, "learning_rate": 0.00048094285044469415, "loss": 3.3016, "step": 17992 }, { "epoch": 0.88, "grad_norm": 0.5513309836387634, "learning_rate": 0.00048093056969379807, "loss": 3.2205, "step": 17993 }, { "epoch": 0.88, "grad_norm": 0.49735820293426514, "learning_rate": 0.0004809182884663656, "loss": 3.2093, "step": 17994 }, { "epoch": 0.88, "grad_norm": 0.5192501544952393, "learning_rate": 0.00048090600676242923, "loss": 3.1286, "step": 17995 }, { "epoch": 0.88, "grad_norm": 0.5403336882591248, "learning_rate": 0.00048089372458202115, "loss": 3.014, "step": 17996 }, { "epoch": 0.88, "grad_norm": 0.4965198338031769, "learning_rate": 0.00048088144192517387, "loss": 3.115, "step": 17997 }, { "epoch": 0.88, "grad_norm": 0.957526683807373, "learning_rate": 0.0004808691587919197, "loss": 3.324, "step": 17998 }, { "epoch": 0.88, "grad_norm": 0.5187360048294067, "learning_rate": 0.00048085687518229105, "loss": 3.2005, "step": 17999 }, { "epoch": 0.88, "grad_norm": 0.4975668489933014, "learning_rate": 0.0004808445910963201, "loss": 3.3057, "step": 18000 }, { "epoch": 0.88, "grad_norm": 0.5819920897483826, "learning_rate": 0.00048083230653403925, "loss": 3.0514, "step": 18001 }, { "epoch": 0.88, "grad_norm": 0.5546259880065918, "learning_rate": 0.000480820021495481, "loss": 2.7754, "step": 18002 }, { "epoch": 0.88, "grad_norm": 0.49003690481185913, "learning_rate": 0.00048080773598067753, "loss": 3.2775, "step": 18003 }, { "epoch": 0.88, "grad_norm": 0.515501081943512, "learning_rate": 0.00048079544998966137, "loss": 3.2949, "step": 18004 }, { "epoch": 0.88, "grad_norm": 0.5453561544418335, "learning_rate": 0.0004807831635224647, "loss": 3.0814, "step": 18005 }, { "epoch": 0.88, "grad_norm": 0.5018565058708191, "learning_rate": 0.00048077087657912005, "loss": 3.4001, "step": 18006 }, { "epoch": 0.88, "grad_norm": 0.5057216882705688, "learning_rate": 0.00048075858915965966, "loss": 3.1185, "step": 18007 }, { "epoch": 0.88, "grad_norm": 0.5177599191665649, "learning_rate": 0.00048074630126411597, "loss": 2.9411, "step": 18008 }, { "epoch": 0.88, "grad_norm": 0.5200800895690918, "learning_rate": 0.00048073401289252133, "loss": 3.33, "step": 18009 }, { "epoch": 0.88, "grad_norm": 0.5048688054084778, "learning_rate": 0.000480721724044908, "loss": 3.3512, "step": 18010 }, { "epoch": 0.88, "grad_norm": 0.5326023697853088, "learning_rate": 0.0004807094347213085, "loss": 3.1505, "step": 18011 }, { "epoch": 0.88, "grad_norm": 0.536878228187561, "learning_rate": 0.0004806971449217551, "loss": 3.0033, "step": 18012 }, { "epoch": 0.88, "grad_norm": 0.5812287926673889, "learning_rate": 0.0004806848546462802, "loss": 2.9029, "step": 18013 }, { "epoch": 0.88, "grad_norm": 0.5413682460784912, "learning_rate": 0.00048067256389491613, "loss": 3.137, "step": 18014 }, { "epoch": 0.88, "grad_norm": 0.47277653217315674, "learning_rate": 0.00048066027266769533, "loss": 3.1877, "step": 18015 }, { "epoch": 0.88, "grad_norm": 0.5533841848373413, "learning_rate": 0.0004806479809646501, "loss": 3.2071, "step": 18016 }, { "epoch": 0.88, "grad_norm": 0.5669878721237183, "learning_rate": 0.0004806356887858129, "loss": 3.3474, "step": 18017 }, { "epoch": 0.88, "grad_norm": 0.5068527460098267, "learning_rate": 0.0004806233961312161, "loss": 3.3926, "step": 18018 }, { "epoch": 0.88, "grad_norm": 0.706555187702179, "learning_rate": 0.00048061110300089203, "loss": 3.1562, "step": 18019 }, { "epoch": 0.88, "grad_norm": 0.5449730157852173, "learning_rate": 0.00048059880939487295, "loss": 3.3523, "step": 18020 }, { "epoch": 0.88, "grad_norm": 0.5296577215194702, "learning_rate": 0.0004805865153131915, "loss": 3.0907, "step": 18021 }, { "epoch": 0.88, "grad_norm": 0.5225890278816223, "learning_rate": 0.0004805742207558799, "loss": 3.0886, "step": 18022 }, { "epoch": 0.88, "grad_norm": 0.6007885336875916, "learning_rate": 0.00048056192572297046, "loss": 3.399, "step": 18023 }, { "epoch": 0.88, "grad_norm": 0.5413122177124023, "learning_rate": 0.00048054963021449575, "loss": 3.1492, "step": 18024 }, { "epoch": 0.88, "grad_norm": 0.5432742238044739, "learning_rate": 0.00048053733423048797, "loss": 3.166, "step": 18025 }, { "epoch": 0.88, "grad_norm": 0.5184715986251831, "learning_rate": 0.0004805250377709797, "loss": 2.9957, "step": 18026 }, { "epoch": 0.88, "grad_norm": 0.5569450259208679, "learning_rate": 0.0004805127408360032, "loss": 3.1505, "step": 18027 }, { "epoch": 0.88, "grad_norm": 0.5036641359329224, "learning_rate": 0.00048050044342559087, "loss": 3.0816, "step": 18028 }, { "epoch": 0.88, "grad_norm": 0.6360530257225037, "learning_rate": 0.0004804881455397751, "loss": 3.154, "step": 18029 }, { "epoch": 0.88, "grad_norm": 0.5100707411766052, "learning_rate": 0.00048047584717858825, "loss": 3.2289, "step": 18030 }, { "epoch": 0.88, "grad_norm": 0.5365529656410217, "learning_rate": 0.00048046354834206277, "loss": 3.1688, "step": 18031 }, { "epoch": 0.88, "grad_norm": 0.5900627970695496, "learning_rate": 0.0004804512490302311, "loss": 3.2375, "step": 18032 }, { "epoch": 0.88, "grad_norm": 0.566939651966095, "learning_rate": 0.0004804389492431255, "loss": 2.9624, "step": 18033 }, { "epoch": 0.88, "grad_norm": 0.5332266688346863, "learning_rate": 0.0004804266489807785, "loss": 3.0906, "step": 18034 }, { "epoch": 0.88, "grad_norm": 0.5751016139984131, "learning_rate": 0.0004804143482432224, "loss": 3.2817, "step": 18035 }, { "epoch": 0.88, "grad_norm": 0.6242155432701111, "learning_rate": 0.0004804020470304896, "loss": 3.0636, "step": 18036 }, { "epoch": 0.88, "grad_norm": 0.5122875571250916, "learning_rate": 0.00048038974534261256, "loss": 3.0294, "step": 18037 }, { "epoch": 0.88, "grad_norm": 0.5618741512298584, "learning_rate": 0.00048037744317962357, "loss": 3.0718, "step": 18038 }, { "epoch": 0.88, "grad_norm": 0.5436500310897827, "learning_rate": 0.0004803651405415553, "loss": 3.3318, "step": 18039 }, { "epoch": 0.88, "grad_norm": 0.5078718662261963, "learning_rate": 0.0004803528374284398, "loss": 3.3323, "step": 18040 }, { "epoch": 0.88, "grad_norm": 0.5190596580505371, "learning_rate": 0.00048034053384030963, "loss": 3.4991, "step": 18041 }, { "epoch": 0.88, "grad_norm": 0.5513627529144287, "learning_rate": 0.0004803282297771973, "loss": 3.1212, "step": 18042 }, { "epoch": 0.88, "grad_norm": 0.5310901403427124, "learning_rate": 0.000480315925239135, "loss": 3.0746, "step": 18043 }, { "epoch": 0.88, "grad_norm": 0.5999692678451538, "learning_rate": 0.00048030362022615533, "loss": 3.2624, "step": 18044 }, { "epoch": 0.88, "grad_norm": 0.49754953384399414, "learning_rate": 0.00048029131473829065, "loss": 3.2649, "step": 18045 }, { "epoch": 0.88, "grad_norm": 0.5372548699378967, "learning_rate": 0.00048027900877557327, "loss": 3.2262, "step": 18046 }, { "epoch": 0.88, "grad_norm": 0.528826117515564, "learning_rate": 0.00048026670233803574, "loss": 3.2068, "step": 18047 }, { "epoch": 0.88, "grad_norm": 0.5041863322257996, "learning_rate": 0.00048025439542571035, "loss": 3.0409, "step": 18048 }, { "epoch": 0.88, "grad_norm": 0.5768441557884216, "learning_rate": 0.00048024208803862964, "loss": 3.2841, "step": 18049 }, { "epoch": 0.88, "grad_norm": 0.5213032960891724, "learning_rate": 0.00048022978017682596, "loss": 3.2703, "step": 18050 }, { "epoch": 0.88, "grad_norm": 0.5481829643249512, "learning_rate": 0.00048021747184033163, "loss": 3.1394, "step": 18051 }, { "epoch": 0.88, "grad_norm": 0.5505427122116089, "learning_rate": 0.00048020516302917923, "loss": 3.4499, "step": 18052 }, { "epoch": 0.88, "grad_norm": 0.5292855501174927, "learning_rate": 0.00048019285374340106, "loss": 3.2116, "step": 18053 }, { "epoch": 0.88, "grad_norm": 0.5407589077949524, "learning_rate": 0.00048018054398302966, "loss": 3.3331, "step": 18054 }, { "epoch": 0.88, "grad_norm": 0.5229291915893555, "learning_rate": 0.0004801682337480974, "loss": 3.3401, "step": 18055 }, { "epoch": 0.88, "grad_norm": 0.47680187225341797, "learning_rate": 0.00048015592303863653, "loss": 2.9878, "step": 18056 }, { "epoch": 0.88, "grad_norm": 0.5591782331466675, "learning_rate": 0.00048014361185467986, "loss": 3.3692, "step": 18057 }, { "epoch": 0.88, "grad_norm": 0.5464686155319214, "learning_rate": 0.0004801313001962594, "loss": 3.2056, "step": 18058 }, { "epoch": 0.89, "grad_norm": 0.5384355187416077, "learning_rate": 0.00048011898806340787, "loss": 3.3496, "step": 18059 }, { "epoch": 0.89, "grad_norm": 0.5165776610374451, "learning_rate": 0.00048010667545615753, "loss": 3.3195, "step": 18060 }, { "epoch": 0.89, "grad_norm": 0.489169716835022, "learning_rate": 0.00048009436237454083, "loss": 3.109, "step": 18061 }, { "epoch": 0.89, "grad_norm": 0.5217012763023376, "learning_rate": 0.00048008204881859034, "loss": 3.3279, "step": 18062 }, { "epoch": 0.89, "grad_norm": 0.5407010912895203, "learning_rate": 0.00048006973478833837, "loss": 3.1728, "step": 18063 }, { "epoch": 0.89, "grad_norm": 0.5479187369346619, "learning_rate": 0.0004800574202838174, "loss": 2.9849, "step": 18064 }, { "epoch": 0.89, "grad_norm": 0.5429320335388184, "learning_rate": 0.00048004510530505977, "loss": 3.1398, "step": 18065 }, { "epoch": 0.89, "grad_norm": 0.5046254992485046, "learning_rate": 0.000480032789852098, "loss": 3.1535, "step": 18066 }, { "epoch": 0.89, "grad_norm": 0.5253599882125854, "learning_rate": 0.00048002047392496443, "loss": 3.0859, "step": 18067 }, { "epoch": 0.89, "grad_norm": 0.5937367081642151, "learning_rate": 0.0004800081575236917, "loss": 3.4202, "step": 18068 }, { "epoch": 0.89, "grad_norm": 0.541233479976654, "learning_rate": 0.0004799958406483121, "loss": 3.3945, "step": 18069 }, { "epoch": 0.89, "grad_norm": 0.5336980819702148, "learning_rate": 0.00047998352329885815, "loss": 3.2491, "step": 18070 }, { "epoch": 0.89, "grad_norm": 0.5044065713882446, "learning_rate": 0.00047997120547536214, "loss": 3.2804, "step": 18071 }, { "epoch": 0.89, "grad_norm": 0.4936935305595398, "learning_rate": 0.0004799588871778566, "loss": 3.3098, "step": 18072 }, { "epoch": 0.89, "grad_norm": 0.55002760887146, "learning_rate": 0.0004799465684063741, "loss": 2.9614, "step": 18073 }, { "epoch": 0.89, "grad_norm": 0.5033633708953857, "learning_rate": 0.00047993424916094687, "loss": 3.0059, "step": 18074 }, { "epoch": 0.89, "grad_norm": 0.5659985542297363, "learning_rate": 0.00047992192944160746, "loss": 3.0396, "step": 18075 }, { "epoch": 0.89, "grad_norm": 0.5208994150161743, "learning_rate": 0.0004799096092483884, "loss": 3.1537, "step": 18076 }, { "epoch": 0.89, "grad_norm": 0.5257906913757324, "learning_rate": 0.00047989728858132194, "loss": 3.0355, "step": 18077 }, { "epoch": 0.89, "grad_norm": 0.53220534324646, "learning_rate": 0.0004798849674404407, "loss": 3.109, "step": 18078 }, { "epoch": 0.89, "grad_norm": 0.49276986718177795, "learning_rate": 0.0004798726458257771, "loss": 3.0571, "step": 18079 }, { "epoch": 0.89, "grad_norm": 0.5161966681480408, "learning_rate": 0.0004798603237373636, "loss": 3.2301, "step": 18080 }, { "epoch": 0.89, "grad_norm": 0.5360630750656128, "learning_rate": 0.0004798480011752325, "loss": 2.9056, "step": 18081 }, { "epoch": 0.89, "grad_norm": 0.5495896339416504, "learning_rate": 0.00047983567813941644, "loss": 3.0623, "step": 18082 }, { "epoch": 0.89, "grad_norm": 0.5134875178337097, "learning_rate": 0.00047982335462994785, "loss": 3.2153, "step": 18083 }, { "epoch": 0.89, "grad_norm": 0.49886998534202576, "learning_rate": 0.00047981103064685904, "loss": 3.1122, "step": 18084 }, { "epoch": 0.89, "grad_norm": 0.5390711426734924, "learning_rate": 0.00047979870619018275, "loss": 3.1859, "step": 18085 }, { "epoch": 0.89, "grad_norm": 0.5308481454849243, "learning_rate": 0.00047978638125995113, "loss": 3.2953, "step": 18086 }, { "epoch": 0.89, "grad_norm": 0.5197398662567139, "learning_rate": 0.0004797740558561968, "loss": 3.2586, "step": 18087 }, { "epoch": 0.89, "grad_norm": 0.5043891668319702, "learning_rate": 0.0004797617299789522, "loss": 3.2689, "step": 18088 }, { "epoch": 0.89, "grad_norm": 0.573706865310669, "learning_rate": 0.00047974940362824987, "loss": 3.2213, "step": 18089 }, { "epoch": 0.89, "grad_norm": 0.5655125379562378, "learning_rate": 0.00047973707680412224, "loss": 3.0872, "step": 18090 }, { "epoch": 0.89, "grad_norm": 0.5690857768058777, "learning_rate": 0.00047972474950660164, "loss": 3.0915, "step": 18091 }, { "epoch": 0.89, "grad_norm": 0.5151222348213196, "learning_rate": 0.00047971242173572065, "loss": 3.0679, "step": 18092 }, { "epoch": 0.89, "grad_norm": 0.6133147478103638, "learning_rate": 0.00047970009349151174, "loss": 3.2749, "step": 18093 }, { "epoch": 0.89, "grad_norm": 0.5890400409698486, "learning_rate": 0.0004796877647740074, "loss": 3.0752, "step": 18094 }, { "epoch": 0.89, "grad_norm": 0.5378167629241943, "learning_rate": 0.00047967543558324, "loss": 3.2817, "step": 18095 }, { "epoch": 0.89, "grad_norm": 0.5156680941581726, "learning_rate": 0.0004796631059192422, "loss": 3.079, "step": 18096 }, { "epoch": 0.89, "grad_norm": 0.5856471657752991, "learning_rate": 0.0004796507757820462, "loss": 3.1464, "step": 18097 }, { "epoch": 0.89, "grad_norm": 0.5163522362709045, "learning_rate": 0.00047963844517168473, "loss": 3.284, "step": 18098 }, { "epoch": 0.89, "grad_norm": 0.5546302199363708, "learning_rate": 0.00047962611408819015, "loss": 3.0596, "step": 18099 }, { "epoch": 0.89, "grad_norm": 0.5673893690109253, "learning_rate": 0.00047961378253159496, "loss": 3.1698, "step": 18100 }, { "epoch": 0.89, "grad_norm": 0.5053473711013794, "learning_rate": 0.0004796014505019317, "loss": 3.0218, "step": 18101 }, { "epoch": 0.89, "grad_norm": 0.5111680626869202, "learning_rate": 0.0004795891179992326, "loss": 3.1531, "step": 18102 }, { "epoch": 0.89, "grad_norm": 0.5182185769081116, "learning_rate": 0.00047957678502353045, "loss": 3.0609, "step": 18103 }, { "epoch": 0.89, "grad_norm": 0.5161014199256897, "learning_rate": 0.0004795644515748576, "loss": 2.9817, "step": 18104 }, { "epoch": 0.89, "grad_norm": 0.5604485273361206, "learning_rate": 0.0004795521176532466, "loss": 3.2267, "step": 18105 }, { "epoch": 0.89, "grad_norm": 0.5659258961677551, "learning_rate": 0.00047953978325872976, "loss": 3.256, "step": 18106 }, { "epoch": 0.89, "grad_norm": 0.5269966721534729, "learning_rate": 0.00047952744839133973, "loss": 3.1339, "step": 18107 }, { "epoch": 0.89, "grad_norm": 0.5653190612792969, "learning_rate": 0.000479515113051109, "loss": 3.0896, "step": 18108 }, { "epoch": 0.89, "grad_norm": 0.5687468647956848, "learning_rate": 0.00047950277723806994, "loss": 3.2164, "step": 18109 }, { "epoch": 0.89, "grad_norm": 0.48306307196617126, "learning_rate": 0.00047949044095225524, "loss": 3.1984, "step": 18110 }, { "epoch": 0.89, "grad_norm": 0.5953868627548218, "learning_rate": 0.00047947810419369716, "loss": 3.0912, "step": 18111 }, { "epoch": 0.89, "grad_norm": 0.5070611238479614, "learning_rate": 0.0004794657669624283, "loss": 3.1685, "step": 18112 }, { "epoch": 0.89, "grad_norm": 0.5517260432243347, "learning_rate": 0.00047945342925848116, "loss": 3.4973, "step": 18113 }, { "epoch": 0.89, "grad_norm": 0.5137361884117126, "learning_rate": 0.00047944109108188817, "loss": 3.3537, "step": 18114 }, { "epoch": 0.89, "grad_norm": 0.5287407636642456, "learning_rate": 0.00047942875243268187, "loss": 3.182, "step": 18115 }, { "epoch": 0.89, "grad_norm": 0.5217429399490356, "learning_rate": 0.0004794164133108949, "loss": 3.2714, "step": 18116 }, { "epoch": 0.89, "grad_norm": 0.48636454343795776, "learning_rate": 0.00047940407371655956, "loss": 3.1138, "step": 18117 }, { "epoch": 0.89, "grad_norm": 0.5578451156616211, "learning_rate": 0.00047939173364970833, "loss": 3.1668, "step": 18118 }, { "epoch": 0.89, "grad_norm": 0.5369355082511902, "learning_rate": 0.0004793793931103739, "loss": 3.0913, "step": 18119 }, { "epoch": 0.89, "grad_norm": 0.5531823635101318, "learning_rate": 0.0004793670520985886, "loss": 2.9706, "step": 18120 }, { "epoch": 0.89, "grad_norm": 0.5396066904067993, "learning_rate": 0.0004793547106143851, "loss": 3.2062, "step": 18121 }, { "epoch": 0.89, "grad_norm": 0.5657452344894409, "learning_rate": 0.00047934236865779576, "loss": 3.1972, "step": 18122 }, { "epoch": 0.89, "grad_norm": 0.5130107998847961, "learning_rate": 0.0004793300262288531, "loss": 3.2101, "step": 18123 }, { "epoch": 0.89, "grad_norm": 0.5585821270942688, "learning_rate": 0.00047931768332758976, "loss": 3.2184, "step": 18124 }, { "epoch": 0.89, "grad_norm": 0.5192179679870605, "learning_rate": 0.0004793053399540381, "loss": 3.1912, "step": 18125 }, { "epoch": 0.89, "grad_norm": 0.5495747327804565, "learning_rate": 0.00047929299610823065, "loss": 3.2991, "step": 18126 }, { "epoch": 0.89, "grad_norm": 0.5496625304222107, "learning_rate": 0.0004792806517902, "loss": 3.2042, "step": 18127 }, { "epoch": 0.89, "grad_norm": 0.531826913356781, "learning_rate": 0.00047926830699997853, "loss": 3.0819, "step": 18128 }, { "epoch": 0.89, "grad_norm": 0.5123338103294373, "learning_rate": 0.00047925596173759895, "loss": 2.9806, "step": 18129 }, { "epoch": 0.89, "grad_norm": 0.561798095703125, "learning_rate": 0.0004792436160030936, "loss": 3.012, "step": 18130 }, { "epoch": 0.89, "grad_norm": 0.5473758578300476, "learning_rate": 0.0004792312697964951, "loss": 3.167, "step": 18131 }, { "epoch": 0.89, "grad_norm": 0.5174449682235718, "learning_rate": 0.000479218923117836, "loss": 3.0154, "step": 18132 }, { "epoch": 0.89, "grad_norm": 0.5553672909736633, "learning_rate": 0.0004792065759671486, "loss": 3.0273, "step": 18133 }, { "epoch": 0.89, "grad_norm": 0.5386341214179993, "learning_rate": 0.0004791942283444656, "loss": 3.0787, "step": 18134 }, { "epoch": 0.89, "grad_norm": 0.4873228669166565, "learning_rate": 0.0004791818802498195, "loss": 3.0802, "step": 18135 }, { "epoch": 0.89, "grad_norm": 0.5535769462585449, "learning_rate": 0.00047916953168324284, "loss": 3.0712, "step": 18136 }, { "epoch": 0.89, "grad_norm": 0.5669405460357666, "learning_rate": 0.0004791571826447681, "loss": 2.7861, "step": 18137 }, { "epoch": 0.89, "grad_norm": 0.5906660556793213, "learning_rate": 0.0004791448331344278, "loss": 3.2052, "step": 18138 }, { "epoch": 0.89, "grad_norm": 0.5243983268737793, "learning_rate": 0.0004791324831522545, "loss": 3.0515, "step": 18139 }, { "epoch": 0.89, "grad_norm": 0.5486578345298767, "learning_rate": 0.00047912013269828073, "loss": 3.2827, "step": 18140 }, { "epoch": 0.89, "grad_norm": 0.5488114953041077, "learning_rate": 0.00047910778177253906, "loss": 3.2188, "step": 18141 }, { "epoch": 0.89, "grad_norm": 0.5279528498649597, "learning_rate": 0.00047909543037506183, "loss": 3.2853, "step": 18142 }, { "epoch": 0.89, "grad_norm": 0.5320385694503784, "learning_rate": 0.00047908307850588175, "loss": 3.0498, "step": 18143 }, { "epoch": 0.89, "grad_norm": 0.5765833258628845, "learning_rate": 0.0004790707261650313, "loss": 3.1843, "step": 18144 }, { "epoch": 0.89, "grad_norm": 0.5396232604980469, "learning_rate": 0.0004790583733525431, "loss": 3.358, "step": 18145 }, { "epoch": 0.89, "grad_norm": 0.5210521817207336, "learning_rate": 0.00047904602006844957, "loss": 3.2112, "step": 18146 }, { "epoch": 0.89, "grad_norm": 0.5123770833015442, "learning_rate": 0.00047903366631278323, "loss": 2.9842, "step": 18147 }, { "epoch": 0.89, "grad_norm": 0.5506312847137451, "learning_rate": 0.00047902131208557667, "loss": 3.1214, "step": 18148 }, { "epoch": 0.89, "grad_norm": 0.511390745639801, "learning_rate": 0.00047900895738686245, "loss": 3.147, "step": 18149 }, { "epoch": 0.89, "grad_norm": 0.561761736869812, "learning_rate": 0.0004789966022166732, "loss": 3.2444, "step": 18150 }, { "epoch": 0.89, "grad_norm": 0.5431965589523315, "learning_rate": 0.00047898424657504126, "loss": 3.0698, "step": 18151 }, { "epoch": 0.89, "grad_norm": 0.5159698128700256, "learning_rate": 0.00047897189046199924, "loss": 3.238, "step": 18152 }, { "epoch": 0.89, "grad_norm": 0.5606123208999634, "learning_rate": 0.0004789595338775797, "loss": 3.0589, "step": 18153 }, { "epoch": 0.89, "grad_norm": 0.5363898873329163, "learning_rate": 0.0004789471768218152, "loss": 3.1029, "step": 18154 }, { "epoch": 0.89, "grad_norm": 0.5742982029914856, "learning_rate": 0.00047893481929473826, "loss": 3.1311, "step": 18155 }, { "epoch": 0.89, "grad_norm": 0.5812978744506836, "learning_rate": 0.00047892246129638147, "loss": 3.1121, "step": 18156 }, { "epoch": 0.89, "grad_norm": 0.5430973768234253, "learning_rate": 0.00047891010282677735, "loss": 3.005, "step": 18157 }, { "epoch": 0.89, "grad_norm": 0.524570107460022, "learning_rate": 0.00047889774388595847, "loss": 3.1939, "step": 18158 }, { "epoch": 0.89, "grad_norm": 0.5263261198997498, "learning_rate": 0.0004788853844739573, "loss": 3.114, "step": 18159 }, { "epoch": 0.89, "grad_norm": 0.5268939137458801, "learning_rate": 0.00047887302459080644, "loss": 3.271, "step": 18160 }, { "epoch": 0.89, "grad_norm": 0.5581185817718506, "learning_rate": 0.00047886066423653855, "loss": 3.1335, "step": 18161 }, { "epoch": 0.89, "grad_norm": 0.5777899026870728, "learning_rate": 0.0004788483034111861, "loss": 3.2539, "step": 18162 }, { "epoch": 0.89, "grad_norm": 0.5424737334251404, "learning_rate": 0.0004788359421147816, "loss": 3.352, "step": 18163 }, { "epoch": 0.89, "grad_norm": 0.5313791632652283, "learning_rate": 0.0004788235803473576, "loss": 3.2132, "step": 18164 }, { "epoch": 0.89, "grad_norm": 0.5410112738609314, "learning_rate": 0.0004788112181089467, "loss": 3.1526, "step": 18165 }, { "epoch": 0.89, "grad_norm": 0.5331111550331116, "learning_rate": 0.0004787988553995815, "loss": 3.1902, "step": 18166 }, { "epoch": 0.89, "grad_norm": 0.5786098837852478, "learning_rate": 0.00047878649221929455, "loss": 3.2107, "step": 18167 }, { "epoch": 0.89, "grad_norm": 0.5680441856384277, "learning_rate": 0.00047877412856811834, "loss": 3.223, "step": 18168 }, { "epoch": 0.89, "grad_norm": 0.5643728971481323, "learning_rate": 0.0004787617644460855, "loss": 3.0881, "step": 18169 }, { "epoch": 0.89, "grad_norm": 0.526891827583313, "learning_rate": 0.0004787493998532286, "loss": 2.929, "step": 18170 }, { "epoch": 0.89, "grad_norm": 0.5148626565933228, "learning_rate": 0.00047873703478958015, "loss": 3.1915, "step": 18171 }, { "epoch": 0.89, "grad_norm": 0.5031622052192688, "learning_rate": 0.00047872466925517274, "loss": 3.0245, "step": 18172 }, { "epoch": 0.89, "grad_norm": 0.5895242094993591, "learning_rate": 0.000478712303250039, "loss": 2.8557, "step": 18173 }, { "epoch": 0.89, "grad_norm": 0.4982737898826599, "learning_rate": 0.0004786999367742114, "loss": 3.1632, "step": 18174 }, { "epoch": 0.89, "grad_norm": 0.5337832570075989, "learning_rate": 0.00047868756982772265, "loss": 3.2289, "step": 18175 }, { "epoch": 0.89, "grad_norm": 0.5422765612602234, "learning_rate": 0.0004786752024106051, "loss": 3.305, "step": 18176 }, { "epoch": 0.89, "grad_norm": 0.5369231700897217, "learning_rate": 0.0004786628345228915, "loss": 3.1646, "step": 18177 }, { "epoch": 0.89, "grad_norm": 0.49044135212898254, "learning_rate": 0.00047865046616461446, "loss": 3.1306, "step": 18178 }, { "epoch": 0.89, "grad_norm": 0.520637571811676, "learning_rate": 0.00047863809733580633, "loss": 3.1229, "step": 18179 }, { "epoch": 0.89, "grad_norm": 0.5776728987693787, "learning_rate": 0.00047862572803649995, "loss": 3.1429, "step": 18180 }, { "epoch": 0.89, "grad_norm": 0.5988162755966187, "learning_rate": 0.0004786133582667277, "loss": 3.1521, "step": 18181 }, { "epoch": 0.89, "grad_norm": 0.5374999642372131, "learning_rate": 0.00047860098802652234, "loss": 3.1968, "step": 18182 }, { "epoch": 0.89, "grad_norm": 0.5586443543434143, "learning_rate": 0.0004785886173159163, "loss": 3.1005, "step": 18183 }, { "epoch": 0.89, "grad_norm": 0.516861081123352, "learning_rate": 0.00047857624613494216, "loss": 3.184, "step": 18184 }, { "epoch": 0.89, "grad_norm": 0.49465465545654297, "learning_rate": 0.0004785638744836326, "loss": 3.3298, "step": 18185 }, { "epoch": 0.89, "grad_norm": 0.5254295468330383, "learning_rate": 0.00047855150236202006, "loss": 3.2069, "step": 18186 }, { "epoch": 0.89, "grad_norm": 0.5072837471961975, "learning_rate": 0.0004785391297701374, "loss": 3.2564, "step": 18187 }, { "epoch": 0.89, "grad_norm": 0.5069559812545776, "learning_rate": 0.00047852675670801694, "loss": 3.0861, "step": 18188 }, { "epoch": 0.89, "grad_norm": 0.527495801448822, "learning_rate": 0.0004785143831756913, "loss": 3.0807, "step": 18189 }, { "epoch": 0.89, "grad_norm": 0.4928951561450958, "learning_rate": 0.00047850200917319325, "loss": 3.0653, "step": 18190 }, { "epoch": 0.89, "grad_norm": 0.5371759533882141, "learning_rate": 0.0004784896347005552, "loss": 3.1952, "step": 18191 }, { "epoch": 0.89, "grad_norm": 0.5376279354095459, "learning_rate": 0.0004784772597578098, "loss": 3.0298, "step": 18192 }, { "epoch": 0.89, "grad_norm": 0.5507842302322388, "learning_rate": 0.00047846488434498966, "loss": 3.3163, "step": 18193 }, { "epoch": 0.89, "grad_norm": 0.5731067657470703, "learning_rate": 0.0004784525084621274, "loss": 3.1126, "step": 18194 }, { "epoch": 0.89, "grad_norm": 0.5198032855987549, "learning_rate": 0.00047844013210925556, "loss": 3.0856, "step": 18195 }, { "epoch": 0.89, "grad_norm": 0.5343044400215149, "learning_rate": 0.0004784277552864067, "loss": 3.3659, "step": 18196 }, { "epoch": 0.89, "grad_norm": 0.49065908789634705, "learning_rate": 0.00047841537799361345, "loss": 3.1321, "step": 18197 }, { "epoch": 0.89, "grad_norm": 0.521992027759552, "learning_rate": 0.0004784030002309085, "loss": 3.2767, "step": 18198 }, { "epoch": 0.89, "grad_norm": 0.5364126563072205, "learning_rate": 0.0004783906219983244, "loss": 3.3079, "step": 18199 }, { "epoch": 0.89, "grad_norm": 0.5045154690742493, "learning_rate": 0.0004783782432958937, "loss": 3.2046, "step": 18200 }, { "epoch": 0.89, "grad_norm": 0.5102838277816772, "learning_rate": 0.000478365864123649, "loss": 3.0836, "step": 18201 }, { "epoch": 0.89, "grad_norm": 0.5909935235977173, "learning_rate": 0.000478353484481623, "loss": 2.9596, "step": 18202 }, { "epoch": 0.89, "grad_norm": 0.5541250705718994, "learning_rate": 0.0004783411043698483, "loss": 3.1793, "step": 18203 }, { "epoch": 0.89, "grad_norm": 0.5354369878768921, "learning_rate": 0.00047832872378835735, "loss": 3.4151, "step": 18204 }, { "epoch": 0.89, "grad_norm": 0.5579158067703247, "learning_rate": 0.0004783163427371828, "loss": 3.2288, "step": 18205 }, { "epoch": 0.89, "grad_norm": 0.5801526308059692, "learning_rate": 0.0004783039612163575, "loss": 3.1398, "step": 18206 }, { "epoch": 0.89, "grad_norm": 0.5363543629646301, "learning_rate": 0.00047829157922591375, "loss": 3.3956, "step": 18207 }, { "epoch": 0.89, "grad_norm": 0.5684135556221008, "learning_rate": 0.00047827919676588437, "loss": 3.2425, "step": 18208 }, { "epoch": 0.89, "grad_norm": 0.5314901471138, "learning_rate": 0.0004782668138363019, "loss": 3.2665, "step": 18209 }, { "epoch": 0.89, "grad_norm": 0.5224202871322632, "learning_rate": 0.0004782544304371988, "loss": 3.0523, "step": 18210 }, { "epoch": 0.89, "grad_norm": 0.5397300124168396, "learning_rate": 0.00047824204656860794, "loss": 3.1839, "step": 18211 }, { "epoch": 0.89, "grad_norm": 0.5352901220321655, "learning_rate": 0.0004782296622305619, "loss": 3.2884, "step": 18212 }, { "epoch": 0.89, "grad_norm": 0.5520905256271362, "learning_rate": 0.0004782172774230932, "loss": 3.0375, "step": 18213 }, { "epoch": 0.89, "grad_norm": 0.5324362516403198, "learning_rate": 0.0004782048921462344, "loss": 3.2757, "step": 18214 }, { "epoch": 0.89, "grad_norm": 0.5429298281669617, "learning_rate": 0.00047819250640001833, "loss": 3.2702, "step": 18215 }, { "epoch": 0.89, "grad_norm": 0.5223371386528015, "learning_rate": 0.00047818012018447737, "loss": 3.135, "step": 18216 }, { "epoch": 0.89, "grad_norm": 0.5844689607620239, "learning_rate": 0.0004781677334996443, "loss": 3.1293, "step": 18217 }, { "epoch": 0.89, "grad_norm": 0.513873279094696, "learning_rate": 0.00047815534634555187, "loss": 3.1109, "step": 18218 }, { "epoch": 0.89, "grad_norm": 0.522002637386322, "learning_rate": 0.00047814295872223233, "loss": 3.249, "step": 18219 }, { "epoch": 0.89, "grad_norm": 0.4845925569534302, "learning_rate": 0.0004781305706297186, "loss": 3.1046, "step": 18220 }, { "epoch": 0.89, "grad_norm": 0.5436228513717651, "learning_rate": 0.00047811818206804324, "loss": 3.1851, "step": 18221 }, { "epoch": 0.89, "grad_norm": 0.4941715598106384, "learning_rate": 0.0004781057930372389, "loss": 3.3141, "step": 18222 }, { "epoch": 0.89, "grad_norm": 0.5171759128570557, "learning_rate": 0.0004780934035373381, "loss": 3.3435, "step": 18223 }, { "epoch": 0.89, "grad_norm": 0.5393630862236023, "learning_rate": 0.0004780810135683736, "loss": 3.1683, "step": 18224 }, { "epoch": 0.89, "grad_norm": 0.5018099546432495, "learning_rate": 0.0004780686231303779, "loss": 3.2249, "step": 18225 }, { "epoch": 0.89, "grad_norm": 0.5250770449638367, "learning_rate": 0.0004780562322233839, "loss": 3.3423, "step": 18226 }, { "epoch": 0.89, "grad_norm": 0.5291233658790588, "learning_rate": 0.0004780438408474239, "loss": 3.191, "step": 18227 }, { "epoch": 0.89, "grad_norm": 0.521236002445221, "learning_rate": 0.00047803144900253076, "loss": 2.9488, "step": 18228 }, { "epoch": 0.89, "grad_norm": 0.5012958645820618, "learning_rate": 0.000478019056688737, "loss": 3.3888, "step": 18229 }, { "epoch": 0.89, "grad_norm": 0.5145010352134705, "learning_rate": 0.0004780066639060753, "loss": 3.2967, "step": 18230 }, { "epoch": 0.89, "grad_norm": 0.5077473521232605, "learning_rate": 0.0004779942706545783, "loss": 3.1262, "step": 18231 }, { "epoch": 0.89, "grad_norm": 0.4998171627521515, "learning_rate": 0.00047798187693427876, "loss": 3.258, "step": 18232 }, { "epoch": 0.89, "grad_norm": 0.5533021688461304, "learning_rate": 0.000477969482745209, "loss": 3.0468, "step": 18233 }, { "epoch": 0.89, "grad_norm": 0.5420917868614197, "learning_rate": 0.0004779570880874021, "loss": 3.2048, "step": 18234 }, { "epoch": 0.89, "grad_norm": 0.5190141201019287, "learning_rate": 0.0004779446929608904, "loss": 3.2259, "step": 18235 }, { "epoch": 0.89, "grad_norm": 0.5503576993942261, "learning_rate": 0.00047793229736570647, "loss": 3.3452, "step": 18236 }, { "epoch": 0.89, "grad_norm": 0.4898211359977722, "learning_rate": 0.0004779199013018833, "loss": 3.0858, "step": 18237 }, { "epoch": 0.89, "grad_norm": 0.5298957824707031, "learning_rate": 0.0004779075047694533, "loss": 3.1858, "step": 18238 }, { "epoch": 0.89, "grad_norm": 0.5395136475563049, "learning_rate": 0.00047789510776844916, "loss": 3.1886, "step": 18239 }, { "epoch": 0.89, "grad_norm": 0.5132995247840881, "learning_rate": 0.0004778827102989035, "loss": 2.9574, "step": 18240 }, { "epoch": 0.89, "grad_norm": 0.5644649267196655, "learning_rate": 0.0004778703123608491, "loss": 3.2534, "step": 18241 }, { "epoch": 0.89, "grad_norm": 0.5382485389709473, "learning_rate": 0.0004778579139543184, "loss": 3.4383, "step": 18242 }, { "epoch": 0.89, "grad_norm": 0.485053688287735, "learning_rate": 0.0004778455150793444, "loss": 3.2485, "step": 18243 }, { "epoch": 0.89, "grad_norm": 0.49913448095321655, "learning_rate": 0.0004778331157359594, "loss": 3.0871, "step": 18244 }, { "epoch": 0.89, "grad_norm": 0.550160825252533, "learning_rate": 0.0004778207159241962, "loss": 3.0416, "step": 18245 }, { "epoch": 0.89, "grad_norm": 0.507668137550354, "learning_rate": 0.0004778083156440874, "loss": 3.0399, "step": 18246 }, { "epoch": 0.89, "grad_norm": 0.5110635757446289, "learning_rate": 0.00047779591489566583, "loss": 2.992, "step": 18247 }, { "epoch": 0.89, "grad_norm": 0.5091555714607239, "learning_rate": 0.000477783513678964, "loss": 3.0346, "step": 18248 }, { "epoch": 0.89, "grad_norm": 0.5132473111152649, "learning_rate": 0.00047777111199401464, "loss": 3.1061, "step": 18249 }, { "epoch": 0.89, "grad_norm": 0.5340614914894104, "learning_rate": 0.0004777587098408503, "loss": 3.0863, "step": 18250 }, { "epoch": 0.89, "grad_norm": 0.5357064008712769, "learning_rate": 0.0004777463072195037, "loss": 3.3543, "step": 18251 }, { "epoch": 0.89, "grad_norm": 0.5139802098274231, "learning_rate": 0.0004777339041300077, "loss": 3.2554, "step": 18252 }, { "epoch": 0.89, "grad_norm": 0.5598965287208557, "learning_rate": 0.0004777215005723947, "loss": 2.9029, "step": 18253 }, { "epoch": 0.89, "grad_norm": 0.5134537220001221, "learning_rate": 0.0004777090965466975, "loss": 3.0175, "step": 18254 }, { "epoch": 0.89, "grad_norm": 0.6165127754211426, "learning_rate": 0.0004776966920529487, "loss": 3.15, "step": 18255 }, { "epoch": 0.89, "grad_norm": 0.5549294352531433, "learning_rate": 0.00047768428709118094, "loss": 3.2095, "step": 18256 }, { "epoch": 0.89, "grad_norm": 0.5079606771469116, "learning_rate": 0.0004776718816614271, "loss": 3.1343, "step": 18257 }, { "epoch": 0.89, "grad_norm": 0.5567631125450134, "learning_rate": 0.0004776594757637197, "loss": 3.0041, "step": 18258 }, { "epoch": 0.89, "grad_norm": 0.528123140335083, "learning_rate": 0.00047764706939809143, "loss": 3.1465, "step": 18259 }, { "epoch": 0.89, "grad_norm": 0.5787830352783203, "learning_rate": 0.000477634662564575, "loss": 3.06, "step": 18260 }, { "epoch": 0.89, "grad_norm": 0.5511603355407715, "learning_rate": 0.00047762225526320297, "loss": 3.1724, "step": 18261 }, { "epoch": 0.89, "grad_norm": 0.5570658445358276, "learning_rate": 0.00047760984749400806, "loss": 3.219, "step": 18262 }, { "epoch": 0.9, "grad_norm": 0.5181900858879089, "learning_rate": 0.00047759743925702313, "loss": 3.1698, "step": 18263 }, { "epoch": 0.9, "grad_norm": 0.5297556519508362, "learning_rate": 0.00047758503055228064, "loss": 3.0683, "step": 18264 }, { "epoch": 0.9, "grad_norm": 0.5381805896759033, "learning_rate": 0.0004775726213798134, "loss": 3.1135, "step": 18265 }, { "epoch": 0.9, "grad_norm": 0.5791769027709961, "learning_rate": 0.000477560211739654, "loss": 3.2524, "step": 18266 }, { "epoch": 0.9, "grad_norm": 0.5343177914619446, "learning_rate": 0.0004775478016318352, "loss": 3.055, "step": 18267 }, { "epoch": 0.9, "grad_norm": 0.5313341021537781, "learning_rate": 0.00047753539105638965, "loss": 3.1583, "step": 18268 }, { "epoch": 0.9, "grad_norm": 0.5037796497344971, "learning_rate": 0.00047752298001335007, "loss": 3.2608, "step": 18269 }, { "epoch": 0.9, "grad_norm": 0.5314697027206421, "learning_rate": 0.0004775105685027491, "loss": 3.0916, "step": 18270 }, { "epoch": 0.9, "grad_norm": 0.5548476576805115, "learning_rate": 0.0004774981565246195, "loss": 3.1843, "step": 18271 }, { "epoch": 0.9, "grad_norm": 0.5024945735931396, "learning_rate": 0.00047748574407899394, "loss": 3.1704, "step": 18272 }, { "epoch": 0.9, "grad_norm": 0.5327395796775818, "learning_rate": 0.000477473331165905, "loss": 3.1438, "step": 18273 }, { "epoch": 0.9, "grad_norm": 0.5295423269271851, "learning_rate": 0.00047746091778538553, "loss": 3.0959, "step": 18274 }, { "epoch": 0.9, "grad_norm": 0.5354325771331787, "learning_rate": 0.0004774485039374681, "loss": 3.1872, "step": 18275 }, { "epoch": 0.9, "grad_norm": 0.5765413641929626, "learning_rate": 0.0004774360896221855, "loss": 3.0475, "step": 18276 }, { "epoch": 0.9, "grad_norm": 0.5822243690490723, "learning_rate": 0.0004774236748395704, "loss": 3.0958, "step": 18277 }, { "epoch": 0.9, "grad_norm": 0.5113519430160522, "learning_rate": 0.0004774112595896554, "loss": 3.2856, "step": 18278 }, { "epoch": 0.9, "grad_norm": 0.5153071284294128, "learning_rate": 0.00047739884387247334, "loss": 3.3148, "step": 18279 }, { "epoch": 0.9, "grad_norm": 0.5023435354232788, "learning_rate": 0.0004773864276880569, "loss": 3.1991, "step": 18280 }, { "epoch": 0.9, "grad_norm": 0.5883546471595764, "learning_rate": 0.00047737401103643866, "loss": 3.0173, "step": 18281 }, { "epoch": 0.9, "grad_norm": 0.51036536693573, "learning_rate": 0.0004773615939176515, "loss": 3.1579, "step": 18282 }, { "epoch": 0.9, "grad_norm": 0.5233753323554993, "learning_rate": 0.00047734917633172804, "loss": 3.2266, "step": 18283 }, { "epoch": 0.9, "grad_norm": 0.5153245329856873, "learning_rate": 0.00047733675827870087, "loss": 3.3655, "step": 18284 }, { "epoch": 0.9, "grad_norm": 0.5131426453590393, "learning_rate": 0.000477324339758603, "loss": 3.2084, "step": 18285 }, { "epoch": 0.9, "grad_norm": 0.532879650592804, "learning_rate": 0.00047731192077146673, "loss": 3.2726, "step": 18286 }, { "epoch": 0.9, "grad_norm": 0.547607421875, "learning_rate": 0.00047729950131732515, "loss": 3.2178, "step": 18287 }, { "epoch": 0.9, "grad_norm": 0.5815809965133667, "learning_rate": 0.0004772870813962107, "loss": 3.0909, "step": 18288 }, { "epoch": 0.9, "grad_norm": 0.5434556603431702, "learning_rate": 0.00047727466100815617, "loss": 3.1988, "step": 18289 }, { "epoch": 0.9, "grad_norm": 0.5472313761711121, "learning_rate": 0.0004772622401531944, "loss": 3.2903, "step": 18290 }, { "epoch": 0.9, "grad_norm": 0.5081930160522461, "learning_rate": 0.000477249818831358, "loss": 3.1709, "step": 18291 }, { "epoch": 0.9, "grad_norm": 0.5110074281692505, "learning_rate": 0.00047723739704267964, "loss": 3.4036, "step": 18292 }, { "epoch": 0.9, "grad_norm": 0.5390797257423401, "learning_rate": 0.0004772249747871921, "loss": 3.2653, "step": 18293 }, { "epoch": 0.9, "grad_norm": 0.5095894932746887, "learning_rate": 0.00047721255206492814, "loss": 3.3201, "step": 18294 }, { "epoch": 0.9, "grad_norm": 0.5310311913490295, "learning_rate": 0.00047720012887592035, "loss": 3.1775, "step": 18295 }, { "epoch": 0.9, "grad_norm": 0.5089913606643677, "learning_rate": 0.0004771877052202016, "loss": 3.095, "step": 18296 }, { "epoch": 0.9, "grad_norm": 0.5221785306930542, "learning_rate": 0.0004771752810978044, "loss": 2.9333, "step": 18297 }, { "epoch": 0.9, "grad_norm": 0.5876374244689941, "learning_rate": 0.0004771628565087617, "loss": 3.179, "step": 18298 }, { "epoch": 0.9, "grad_norm": 0.5047369599342346, "learning_rate": 0.0004771504314531061, "loss": 3.1365, "step": 18299 }, { "epoch": 0.9, "grad_norm": 0.5240013003349304, "learning_rate": 0.0004771380059308705, "loss": 3.3203, "step": 18300 }, { "epoch": 0.9, "grad_norm": 0.5100141763687134, "learning_rate": 0.0004771255799420873, "loss": 3.1165, "step": 18301 }, { "epoch": 0.9, "grad_norm": 0.5566077828407288, "learning_rate": 0.0004771131534867894, "loss": 3.1252, "step": 18302 }, { "epoch": 0.9, "grad_norm": 0.5329804420471191, "learning_rate": 0.00047710072656500965, "loss": 3.0706, "step": 18303 }, { "epoch": 0.9, "grad_norm": 0.5710445642471313, "learning_rate": 0.00047708829917678065, "loss": 3.086, "step": 18304 }, { "epoch": 0.9, "grad_norm": 0.5329696536064148, "learning_rate": 0.00047707587132213514, "loss": 3.0563, "step": 18305 }, { "epoch": 0.9, "grad_norm": 0.5192325115203857, "learning_rate": 0.00047706344300110586, "loss": 3.245, "step": 18306 }, { "epoch": 0.9, "grad_norm": 0.5296958088874817, "learning_rate": 0.00047705101421372556, "loss": 3.0319, "step": 18307 }, { "epoch": 0.9, "grad_norm": 0.5326006412506104, "learning_rate": 0.00047703858496002697, "loss": 3.1762, "step": 18308 }, { "epoch": 0.9, "grad_norm": 0.5101357698440552, "learning_rate": 0.0004770261552400428, "loss": 3.0583, "step": 18309 }, { "epoch": 0.9, "grad_norm": 0.5171705484390259, "learning_rate": 0.0004770137250538058, "loss": 3.0426, "step": 18310 }, { "epoch": 0.9, "grad_norm": 0.5427629351615906, "learning_rate": 0.0004770012944013487, "loss": 3.1091, "step": 18311 }, { "epoch": 0.9, "grad_norm": 0.5223405957221985, "learning_rate": 0.0004769888632827043, "loss": 3.0479, "step": 18312 }, { "epoch": 0.9, "grad_norm": 0.5293853282928467, "learning_rate": 0.0004769764316979052, "loss": 3.3125, "step": 18313 }, { "epoch": 0.9, "grad_norm": 0.5013025999069214, "learning_rate": 0.00047696399964698434, "loss": 3.0253, "step": 18314 }, { "epoch": 0.9, "grad_norm": 0.5177825093269348, "learning_rate": 0.0004769515671299743, "loss": 3.0815, "step": 18315 }, { "epoch": 0.9, "grad_norm": 0.5152139663696289, "learning_rate": 0.00047693913414690795, "loss": 3.1761, "step": 18316 }, { "epoch": 0.9, "grad_norm": 0.5001190304756165, "learning_rate": 0.000476926700697818, "loss": 3.0974, "step": 18317 }, { "epoch": 0.9, "grad_norm": 0.5169618129730225, "learning_rate": 0.00047691426678273706, "loss": 3.0948, "step": 18318 }, { "epoch": 0.9, "grad_norm": 0.49277180433273315, "learning_rate": 0.00047690183240169803, "loss": 2.9982, "step": 18319 }, { "epoch": 0.9, "grad_norm": 0.536400318145752, "learning_rate": 0.00047688939755473363, "loss": 3.2277, "step": 18320 }, { "epoch": 0.9, "grad_norm": 0.4991290271282196, "learning_rate": 0.0004768769622418766, "loss": 3.0983, "step": 18321 }, { "epoch": 0.9, "grad_norm": 0.6011770963668823, "learning_rate": 0.0004768645264631597, "loss": 3.1713, "step": 18322 }, { "epoch": 0.9, "grad_norm": 0.4987938404083252, "learning_rate": 0.00047685209021861567, "loss": 3.1428, "step": 18323 }, { "epoch": 0.9, "grad_norm": 0.507752537727356, "learning_rate": 0.0004768396535082773, "loss": 3.2872, "step": 18324 }, { "epoch": 0.9, "grad_norm": 0.5121263265609741, "learning_rate": 0.0004768272163321772, "loss": 2.9893, "step": 18325 }, { "epoch": 0.9, "grad_norm": 0.5051730275154114, "learning_rate": 0.00047681477869034836, "loss": 3.1656, "step": 18326 }, { "epoch": 0.9, "grad_norm": 0.5293095111846924, "learning_rate": 0.0004768023405828235, "loss": 2.9227, "step": 18327 }, { "epoch": 0.9, "grad_norm": 0.5156005620956421, "learning_rate": 0.0004767899020096351, "loss": 3.2264, "step": 18328 }, { "epoch": 0.9, "grad_norm": 0.5265857577323914, "learning_rate": 0.0004767774629708162, "loss": 3.1721, "step": 18329 }, { "epoch": 0.9, "grad_norm": 0.5211267471313477, "learning_rate": 0.0004767650234663995, "loss": 3.0617, "step": 18330 }, { "epoch": 0.9, "grad_norm": 0.5150598883628845, "learning_rate": 0.00047675258349641773, "loss": 3.2196, "step": 18331 }, { "epoch": 0.9, "grad_norm": 0.5152638554573059, "learning_rate": 0.00047674014306090375, "loss": 3.3045, "step": 18332 }, { "epoch": 0.9, "grad_norm": 0.5148764252662659, "learning_rate": 0.0004767277021598901, "loss": 3.0967, "step": 18333 }, { "epoch": 0.9, "grad_norm": 0.5162348747253418, "learning_rate": 0.0004767152607934098, "loss": 2.8442, "step": 18334 }, { "epoch": 0.9, "grad_norm": 0.5254623889923096, "learning_rate": 0.00047670281896149553, "loss": 3.1644, "step": 18335 }, { "epoch": 0.9, "grad_norm": 0.5040985345840454, "learning_rate": 0.00047669037666418, "loss": 2.972, "step": 18336 }, { "epoch": 0.9, "grad_norm": 0.5320616960525513, "learning_rate": 0.00047667793390149607, "loss": 3.3935, "step": 18337 }, { "epoch": 0.9, "grad_norm": 0.5272945761680603, "learning_rate": 0.0004766654906734764, "loss": 3.1817, "step": 18338 }, { "epoch": 0.9, "grad_norm": 0.5453786253929138, "learning_rate": 0.0004766530469801538, "loss": 3.1222, "step": 18339 }, { "epoch": 0.9, "grad_norm": 0.5060163140296936, "learning_rate": 0.00047664060282156113, "loss": 3.1181, "step": 18340 }, { "epoch": 0.9, "grad_norm": 0.5192004442214966, "learning_rate": 0.00047662815819773106, "loss": 3.4064, "step": 18341 }, { "epoch": 0.9, "grad_norm": 0.5070635080337524, "learning_rate": 0.00047661571310869655, "loss": 3.1392, "step": 18342 }, { "epoch": 0.9, "grad_norm": 0.5369440317153931, "learning_rate": 0.0004766032675544901, "loss": 3.0245, "step": 18343 }, { "epoch": 0.9, "grad_norm": 0.5121726393699646, "learning_rate": 0.0004765908215351446, "loss": 3.2891, "step": 18344 }, { "epoch": 0.9, "grad_norm": 0.5277031064033508, "learning_rate": 0.000476578375050693, "loss": 3.0794, "step": 18345 }, { "epoch": 0.9, "grad_norm": 0.5574108958244324, "learning_rate": 0.00047656592810116794, "loss": 2.8832, "step": 18346 }, { "epoch": 0.9, "grad_norm": 0.5462555885314941, "learning_rate": 0.0004765534806866021, "loss": 3.0332, "step": 18347 }, { "epoch": 0.9, "grad_norm": 0.5030328035354614, "learning_rate": 0.0004765410328070284, "loss": 3.347, "step": 18348 }, { "epoch": 0.9, "grad_norm": 0.5085557103157043, "learning_rate": 0.00047652858446247954, "loss": 3.1421, "step": 18349 }, { "epoch": 0.9, "grad_norm": 0.5109598636627197, "learning_rate": 0.00047651613565298853, "loss": 3.2073, "step": 18350 }, { "epoch": 0.9, "grad_norm": 0.5085839033126831, "learning_rate": 0.00047650368637858784, "loss": 3.1515, "step": 18351 }, { "epoch": 0.9, "grad_norm": 0.524103581905365, "learning_rate": 0.00047649123663931047, "loss": 3.1561, "step": 18352 }, { "epoch": 0.9, "grad_norm": 0.5291694402694702, "learning_rate": 0.0004764787864351892, "loss": 3.1722, "step": 18353 }, { "epoch": 0.9, "grad_norm": 0.590645432472229, "learning_rate": 0.0004764663357662567, "loss": 3.1482, "step": 18354 }, { "epoch": 0.9, "grad_norm": 0.5344750285148621, "learning_rate": 0.0004764538846325458, "loss": 3.1071, "step": 18355 }, { "epoch": 0.9, "grad_norm": 0.5120199918746948, "learning_rate": 0.00047644143303408935, "loss": 3.0998, "step": 18356 }, { "epoch": 0.9, "grad_norm": 0.5367302894592285, "learning_rate": 0.00047642898097092015, "loss": 3.2283, "step": 18357 }, { "epoch": 0.9, "grad_norm": 0.5890613794326782, "learning_rate": 0.000476416528443071, "loss": 3.1638, "step": 18358 }, { "epoch": 0.9, "grad_norm": 0.5009247660636902, "learning_rate": 0.0004764040754505746, "loss": 3.2533, "step": 18359 }, { "epoch": 0.9, "grad_norm": 0.528739333152771, "learning_rate": 0.00047639162199346384, "loss": 3.0933, "step": 18360 }, { "epoch": 0.9, "grad_norm": 0.5243030786514282, "learning_rate": 0.00047637916807177153, "loss": 3.1643, "step": 18361 }, { "epoch": 0.9, "grad_norm": 0.5276967287063599, "learning_rate": 0.0004763667136855304, "loss": 3.2262, "step": 18362 }, { "epoch": 0.9, "grad_norm": 0.5526394844055176, "learning_rate": 0.0004763542588347733, "loss": 3.0935, "step": 18363 }, { "epoch": 0.9, "grad_norm": 0.5326481461524963, "learning_rate": 0.00047634180351953295, "loss": 3.1979, "step": 18364 }, { "epoch": 0.9, "grad_norm": 0.5490707159042358, "learning_rate": 0.0004763293477398423, "loss": 3.137, "step": 18365 }, { "epoch": 0.9, "grad_norm": 0.5224721431732178, "learning_rate": 0.0004763168914957341, "loss": 3.255, "step": 18366 }, { "epoch": 0.9, "grad_norm": 0.5512878894805908, "learning_rate": 0.0004763044347872412, "loss": 2.9361, "step": 18367 }, { "epoch": 0.9, "grad_norm": 0.5557450652122498, "learning_rate": 0.00047629197761439617, "loss": 3.1725, "step": 18368 }, { "epoch": 0.9, "grad_norm": 0.5146492719650269, "learning_rate": 0.0004762795199772321, "loss": 3.3767, "step": 18369 }, { "epoch": 0.9, "grad_norm": 0.6718878746032715, "learning_rate": 0.00047626706187578173, "loss": 2.9582, "step": 18370 }, { "epoch": 0.9, "grad_norm": 0.5548587441444397, "learning_rate": 0.0004762546033100778, "loss": 3.1481, "step": 18371 }, { "epoch": 0.9, "grad_norm": 0.5026013255119324, "learning_rate": 0.0004762421442801532, "loss": 3.3895, "step": 18372 }, { "epoch": 0.9, "grad_norm": 0.5194470286369324, "learning_rate": 0.00047622968478604064, "loss": 3.5001, "step": 18373 }, { "epoch": 0.9, "grad_norm": 0.5245919823646545, "learning_rate": 0.00047621722482777307, "loss": 3.1301, "step": 18374 }, { "epoch": 0.9, "grad_norm": 0.5411098599433899, "learning_rate": 0.00047620476440538316, "loss": 3.0287, "step": 18375 }, { "epoch": 0.9, "grad_norm": 0.5193166732788086, "learning_rate": 0.000476192303518904, "loss": 3.1614, "step": 18376 }, { "epoch": 0.9, "grad_norm": 0.5470539927482605, "learning_rate": 0.000476179842168368, "loss": 3.184, "step": 18377 }, { "epoch": 0.9, "grad_norm": 0.5132575631141663, "learning_rate": 0.0004761673803538084, "loss": 3.0766, "step": 18378 }, { "epoch": 0.9, "grad_norm": 0.5734747648239136, "learning_rate": 0.00047615491807525764, "loss": 3.2, "step": 18379 }, { "epoch": 0.9, "grad_norm": 0.536043107509613, "learning_rate": 0.0004761424553327488, "loss": 3.2023, "step": 18380 }, { "epoch": 0.9, "grad_norm": 0.5419288277626038, "learning_rate": 0.00047612999212631464, "loss": 3.2887, "step": 18381 }, { "epoch": 0.9, "grad_norm": 0.5045163631439209, "learning_rate": 0.00047611752845598803, "loss": 3.1443, "step": 18382 }, { "epoch": 0.9, "grad_norm": 0.5081565976142883, "learning_rate": 0.0004761050643218017, "loss": 3.0771, "step": 18383 }, { "epoch": 0.9, "grad_norm": 0.5471091866493225, "learning_rate": 0.00047609259972378843, "loss": 3.1064, "step": 18384 }, { "epoch": 0.9, "grad_norm": 0.5175535082817078, "learning_rate": 0.00047608013466198125, "loss": 3.1124, "step": 18385 }, { "epoch": 0.9, "grad_norm": 0.5380799174308777, "learning_rate": 0.0004760676691364129, "loss": 2.9918, "step": 18386 }, { "epoch": 0.9, "grad_norm": 0.5299926996231079, "learning_rate": 0.0004760552031471162, "loss": 3.3698, "step": 18387 }, { "epoch": 0.9, "grad_norm": 0.5114349722862244, "learning_rate": 0.00047604273669412387, "loss": 2.9142, "step": 18388 }, { "epoch": 0.9, "grad_norm": 0.5319020748138428, "learning_rate": 0.000476030269777469, "loss": 3.3346, "step": 18389 }, { "epoch": 0.9, "grad_norm": 0.5370768904685974, "learning_rate": 0.0004760178023971841, "loss": 3.076, "step": 18390 }, { "epoch": 0.9, "grad_norm": 0.5257225036621094, "learning_rate": 0.0004760053345533023, "loss": 3.0894, "step": 18391 }, { "epoch": 0.9, "grad_norm": 0.5191155076026917, "learning_rate": 0.0004759928662458562, "loss": 3.0241, "step": 18392 }, { "epoch": 0.9, "grad_norm": 0.5163692831993103, "learning_rate": 0.0004759803974748789, "loss": 3.0367, "step": 18393 }, { "epoch": 0.9, "grad_norm": 0.5202220678329468, "learning_rate": 0.00047596792824040315, "loss": 3.1828, "step": 18394 }, { "epoch": 0.9, "grad_norm": 0.5226185321807861, "learning_rate": 0.0004759554585424616, "loss": 3.1161, "step": 18395 }, { "epoch": 0.9, "grad_norm": 0.5085725784301758, "learning_rate": 0.0004759429883810873, "loss": 3.1653, "step": 18396 }, { "epoch": 0.9, "grad_norm": 0.5269706845283508, "learning_rate": 0.000475930517756313, "loss": 3.3381, "step": 18397 }, { "epoch": 0.9, "grad_norm": 0.5295675992965698, "learning_rate": 0.00047591804666817164, "loss": 3.1354, "step": 18398 }, { "epoch": 0.9, "grad_norm": 0.5254941582679749, "learning_rate": 0.00047590557511669596, "loss": 3.4621, "step": 18399 }, { "epoch": 0.9, "grad_norm": 0.4832818806171417, "learning_rate": 0.00047589310310191873, "loss": 3.0591, "step": 18400 }, { "epoch": 0.9, "grad_norm": 0.5161293745040894, "learning_rate": 0.0004758806306238731, "loss": 3.2809, "step": 18401 }, { "epoch": 0.9, "grad_norm": 0.5065767168998718, "learning_rate": 0.0004758681576825916, "loss": 3.2523, "step": 18402 }, { "epoch": 0.9, "grad_norm": 0.48637884855270386, "learning_rate": 0.0004758556842781074, "loss": 3.2953, "step": 18403 }, { "epoch": 0.9, "grad_norm": 0.5562406778335571, "learning_rate": 0.000475843210410453, "loss": 3.0296, "step": 18404 }, { "epoch": 0.9, "grad_norm": 0.553834855556488, "learning_rate": 0.0004758307360796615, "loss": 3.3277, "step": 18405 }, { "epoch": 0.9, "grad_norm": 0.5108713507652283, "learning_rate": 0.00047581826128576557, "loss": 3.1895, "step": 18406 }, { "epoch": 0.9, "grad_norm": 0.4815329909324646, "learning_rate": 0.0004758057860287984, "loss": 3.2556, "step": 18407 }, { "epoch": 0.9, "grad_norm": 0.5623984336853027, "learning_rate": 0.00047579331030879246, "loss": 3.0558, "step": 18408 }, { "epoch": 0.9, "grad_norm": 0.5089913010597229, "learning_rate": 0.00047578083412578085, "loss": 3.1773, "step": 18409 }, { "epoch": 0.9, "grad_norm": 0.5331538319587708, "learning_rate": 0.00047576835747979626, "loss": 3.1828, "step": 18410 }, { "epoch": 0.9, "grad_norm": 0.519395112991333, "learning_rate": 0.0004757558803708717, "loss": 3.0746, "step": 18411 }, { "epoch": 0.9, "grad_norm": 0.538934051990509, "learning_rate": 0.00047574340279903993, "loss": 3.1324, "step": 18412 }, { "epoch": 0.9, "grad_norm": 0.48608413338661194, "learning_rate": 0.0004757309247643339, "loss": 3.0911, "step": 18413 }, { "epoch": 0.9, "grad_norm": 0.5462820529937744, "learning_rate": 0.0004757184462667865, "loss": 3.2101, "step": 18414 }, { "epoch": 0.9, "grad_norm": 0.5184861421585083, "learning_rate": 0.00047570596730643036, "loss": 3.2584, "step": 18415 }, { "epoch": 0.9, "grad_norm": 0.5258368253707886, "learning_rate": 0.0004756934878832987, "loss": 3.1552, "step": 18416 }, { "epoch": 0.9, "grad_norm": 0.5133891701698303, "learning_rate": 0.0004756810079974241, "loss": 3.2167, "step": 18417 }, { "epoch": 0.9, "grad_norm": 0.49524182081222534, "learning_rate": 0.0004756685276488396, "loss": 2.9747, "step": 18418 }, { "epoch": 0.9, "grad_norm": 0.5075833797454834, "learning_rate": 0.000475656046837578, "loss": 3.0269, "step": 18419 }, { "epoch": 0.9, "grad_norm": 0.4994834065437317, "learning_rate": 0.0004756435655636721, "loss": 3.2545, "step": 18420 }, { "epoch": 0.9, "grad_norm": 0.5248388648033142, "learning_rate": 0.00047563108382715487, "loss": 3.1916, "step": 18421 }, { "epoch": 0.9, "grad_norm": 0.5069358944892883, "learning_rate": 0.00047561860162805925, "loss": 3.0174, "step": 18422 }, { "epoch": 0.9, "grad_norm": 0.5556639432907104, "learning_rate": 0.0004756061189664179, "loss": 3.0031, "step": 18423 }, { "epoch": 0.9, "grad_norm": 0.49170437455177307, "learning_rate": 0.00047559363584226394, "loss": 3.2966, "step": 18424 }, { "epoch": 0.9, "grad_norm": 0.5303202867507935, "learning_rate": 0.0004755811522556301, "loss": 2.9769, "step": 18425 }, { "epoch": 0.9, "grad_norm": 0.49278274178504944, "learning_rate": 0.0004755686682065493, "loss": 3.1306, "step": 18426 }, { "epoch": 0.9, "grad_norm": 0.5985496044158936, "learning_rate": 0.0004755561836950544, "loss": 3.0919, "step": 18427 }, { "epoch": 0.9, "grad_norm": 0.5184206962585449, "learning_rate": 0.00047554369872117834, "loss": 3.1546, "step": 18428 }, { "epoch": 0.9, "grad_norm": 0.5128380656242371, "learning_rate": 0.000475531213284954, "loss": 3.2689, "step": 18429 }, { "epoch": 0.9, "grad_norm": 0.5515463948249817, "learning_rate": 0.0004755187273864141, "loss": 3.2732, "step": 18430 }, { "epoch": 0.9, "grad_norm": 0.5561820864677429, "learning_rate": 0.00047550624102559173, "loss": 3.1093, "step": 18431 }, { "epoch": 0.9, "grad_norm": 0.610331654548645, "learning_rate": 0.0004754937542025197, "loss": 3.0545, "step": 18432 }, { "epoch": 0.9, "grad_norm": 0.5147985219955444, "learning_rate": 0.0004754812669172308, "loss": 3.0679, "step": 18433 }, { "epoch": 0.9, "grad_norm": 0.553709864616394, "learning_rate": 0.0004754687791697581, "loss": 3.4146, "step": 18434 }, { "epoch": 0.9, "grad_norm": 0.5066872239112854, "learning_rate": 0.0004754562909601344, "loss": 3.041, "step": 18435 }, { "epoch": 0.9, "grad_norm": 0.5329242944717407, "learning_rate": 0.0004754438022883926, "loss": 3.0274, "step": 18436 }, { "epoch": 0.9, "grad_norm": 0.570603609085083, "learning_rate": 0.00047543131315456566, "loss": 3.26, "step": 18437 }, { "epoch": 0.9, "grad_norm": 0.5356221795082092, "learning_rate": 0.0004754188235586863, "loss": 3.054, "step": 18438 }, { "epoch": 0.9, "grad_norm": 0.5116856694221497, "learning_rate": 0.00047540633350078753, "loss": 3.0638, "step": 18439 }, { "epoch": 0.9, "grad_norm": 0.5043549537658691, "learning_rate": 0.00047539384298090227, "loss": 3.1825, "step": 18440 }, { "epoch": 0.9, "grad_norm": 0.4952833950519562, "learning_rate": 0.00047538135199906334, "loss": 3.3498, "step": 18441 }, { "epoch": 0.9, "grad_norm": 0.5181792378425598, "learning_rate": 0.0004753688605553037, "loss": 3.1791, "step": 18442 }, { "epoch": 0.9, "grad_norm": 0.5022110939025879, "learning_rate": 0.0004753563686496562, "loss": 3.0999, "step": 18443 }, { "epoch": 0.9, "grad_norm": 0.5453217625617981, "learning_rate": 0.0004753438762821539, "loss": 3.1889, "step": 18444 }, { "epoch": 0.9, "grad_norm": 0.5252522826194763, "learning_rate": 0.0004753313834528294, "loss": 3.2477, "step": 18445 }, { "epoch": 0.9, "grad_norm": 0.5274051427841187, "learning_rate": 0.0004753188901617159, "loss": 3.0772, "step": 18446 }, { "epoch": 0.9, "grad_norm": 0.5190703272819519, "learning_rate": 0.00047530639640884617, "loss": 3.3103, "step": 18447 }, { "epoch": 0.9, "grad_norm": 0.5288125872612, "learning_rate": 0.0004752939021942531, "loss": 2.9752, "step": 18448 }, { "epoch": 0.9, "grad_norm": 0.5266156792640686, "learning_rate": 0.0004752814075179696, "loss": 3.1599, "step": 18449 }, { "epoch": 0.9, "grad_norm": 0.506328284740448, "learning_rate": 0.00047526891238002867, "loss": 3.2425, "step": 18450 }, { "epoch": 0.9, "grad_norm": 0.5088960528373718, "learning_rate": 0.00047525641678046313, "loss": 3.3905, "step": 18451 }, { "epoch": 0.9, "grad_norm": 0.5209529995918274, "learning_rate": 0.00047524392071930595, "loss": 3.1773, "step": 18452 }, { "epoch": 0.9, "grad_norm": 0.524338960647583, "learning_rate": 0.0004752314241965899, "loss": 3.1845, "step": 18453 }, { "epoch": 0.9, "grad_norm": 0.5339444279670715, "learning_rate": 0.00047521892721234804, "loss": 2.802, "step": 18454 }, { "epoch": 0.9, "grad_norm": 0.5499926805496216, "learning_rate": 0.0004752064297666134, "loss": 2.874, "step": 18455 }, { "epoch": 0.9, "grad_norm": 0.5348438620567322, "learning_rate": 0.00047519393185941857, "loss": 3.202, "step": 18456 }, { "epoch": 0.9, "grad_norm": 0.5232212543487549, "learning_rate": 0.00047518143349079673, "loss": 3.4002, "step": 18457 }, { "epoch": 0.9, "grad_norm": 0.4834090769290924, "learning_rate": 0.00047516893466078067, "loss": 3.2367, "step": 18458 }, { "epoch": 0.9, "grad_norm": 0.5394415855407715, "learning_rate": 0.00047515643536940336, "loss": 2.8562, "step": 18459 }, { "epoch": 0.9, "grad_norm": 0.5132725238800049, "learning_rate": 0.0004751439356166977, "loss": 3.3076, "step": 18460 }, { "epoch": 0.9, "grad_norm": 0.5205427408218384, "learning_rate": 0.00047513143540269665, "loss": 3.3082, "step": 18461 }, { "epoch": 0.9, "grad_norm": 0.4787410497665405, "learning_rate": 0.00047511893472743306, "loss": 3.0763, "step": 18462 }, { "epoch": 0.9, "grad_norm": 0.5411839485168457, "learning_rate": 0.00047510643359093983, "loss": 3.0592, "step": 18463 }, { "epoch": 0.9, "grad_norm": 0.5531505346298218, "learning_rate": 0.0004750939319932501, "loss": 3.062, "step": 18464 }, { "epoch": 0.9, "grad_norm": 0.52873295545578, "learning_rate": 0.0004750814299343966, "loss": 3.3097, "step": 18465 }, { "epoch": 0.9, "grad_norm": 0.5306869149208069, "learning_rate": 0.00047506892741441234, "loss": 3.046, "step": 18466 }, { "epoch": 0.91, "grad_norm": 0.5929498076438904, "learning_rate": 0.0004750564244333302, "loss": 2.9961, "step": 18467 }, { "epoch": 0.91, "grad_norm": 0.5276499390602112, "learning_rate": 0.0004750439209911831, "loss": 3.1753, "step": 18468 }, { "epoch": 0.91, "grad_norm": 0.5233253836631775, "learning_rate": 0.00047503141708800394, "loss": 3.1066, "step": 18469 }, { "epoch": 0.91, "grad_norm": 0.527217447757721, "learning_rate": 0.00047501891272382584, "loss": 3.0618, "step": 18470 }, { "epoch": 0.91, "grad_norm": 0.5036908388137817, "learning_rate": 0.00047500640789868156, "loss": 2.9057, "step": 18471 }, { "epoch": 0.91, "grad_norm": 0.5078242421150208, "learning_rate": 0.000474993902612604, "loss": 3.4062, "step": 18472 }, { "epoch": 0.91, "grad_norm": 0.5183581113815308, "learning_rate": 0.0004749813968656262, "loss": 3.1168, "step": 18473 }, { "epoch": 0.91, "grad_norm": 0.5693612098693848, "learning_rate": 0.0004749688906577812, "loss": 2.7811, "step": 18474 }, { "epoch": 0.91, "grad_norm": 0.5193041563034058, "learning_rate": 0.0004749563839891017, "loss": 3.0456, "step": 18475 }, { "epoch": 0.91, "grad_norm": 0.5137484669685364, "learning_rate": 0.0004749438768596208, "loss": 3.1382, "step": 18476 }, { "epoch": 0.91, "grad_norm": 0.536248505115509, "learning_rate": 0.00047493136926937137, "loss": 2.9656, "step": 18477 }, { "epoch": 0.91, "grad_norm": 0.5261755585670471, "learning_rate": 0.0004749188612183865, "loss": 3.4115, "step": 18478 }, { "epoch": 0.91, "grad_norm": 0.5276913642883301, "learning_rate": 0.00047490635270669885, "loss": 3.3476, "step": 18479 }, { "epoch": 0.91, "grad_norm": 0.5933910012245178, "learning_rate": 0.00047489384373434156, "loss": 3.1409, "step": 18480 }, { "epoch": 0.91, "grad_norm": 0.5715004205703735, "learning_rate": 0.00047488133430134764, "loss": 3.1825, "step": 18481 }, { "epoch": 0.91, "grad_norm": 0.5107777118682861, "learning_rate": 0.0004748688244077499, "loss": 3.1458, "step": 18482 }, { "epoch": 0.91, "grad_norm": 0.49393430352211, "learning_rate": 0.00047485631405358127, "loss": 3.0858, "step": 18483 }, { "epoch": 0.91, "grad_norm": 0.514667272567749, "learning_rate": 0.0004748438032388748, "loss": 3.2898, "step": 18484 }, { "epoch": 0.91, "grad_norm": 0.5319417715072632, "learning_rate": 0.00047483129196366336, "loss": 3.3375, "step": 18485 }, { "epoch": 0.91, "grad_norm": 0.5158092975616455, "learning_rate": 0.00047481878022798005, "loss": 3.162, "step": 18486 }, { "epoch": 0.91, "grad_norm": 0.5055559277534485, "learning_rate": 0.0004748062680318576, "loss": 3.204, "step": 18487 }, { "epoch": 0.91, "grad_norm": 0.5113157033920288, "learning_rate": 0.0004747937553753291, "loss": 3.156, "step": 18488 }, { "epoch": 0.91, "grad_norm": 0.5367483496665955, "learning_rate": 0.00047478124225842764, "loss": 3.0957, "step": 18489 }, { "epoch": 0.91, "grad_norm": 0.5241609811782837, "learning_rate": 0.0004747687286811859, "loss": 3.2377, "step": 18490 }, { "epoch": 0.91, "grad_norm": 0.49339574575424194, "learning_rate": 0.0004747562146436369, "loss": 3.1479, "step": 18491 }, { "epoch": 0.91, "grad_norm": 0.512546956539154, "learning_rate": 0.0004747437001458138, "loss": 3.2149, "step": 18492 }, { "epoch": 0.91, "grad_norm": 0.5302311182022095, "learning_rate": 0.0004747311851877493, "loss": 3.1386, "step": 18493 }, { "epoch": 0.91, "grad_norm": 0.5048165321350098, "learning_rate": 0.0004747186697694766, "loss": 3.132, "step": 18494 }, { "epoch": 0.91, "grad_norm": 0.5217496156692505, "learning_rate": 0.0004747061538910285, "loss": 3.2443, "step": 18495 }, { "epoch": 0.91, "grad_norm": 0.5357189774513245, "learning_rate": 0.00047469363755243797, "loss": 3.0927, "step": 18496 }, { "epoch": 0.91, "grad_norm": 0.5418717265129089, "learning_rate": 0.00047468112075373797, "loss": 3.3158, "step": 18497 }, { "epoch": 0.91, "grad_norm": 0.5489016771316528, "learning_rate": 0.0004746686034949617, "loss": 3.3727, "step": 18498 }, { "epoch": 0.91, "grad_norm": 0.5446333885192871, "learning_rate": 0.0004746560857761418, "loss": 3.3276, "step": 18499 }, { "epoch": 0.91, "grad_norm": 0.5174359679222107, "learning_rate": 0.0004746435675973115, "loss": 2.9588, "step": 18500 }, { "epoch": 0.91, "grad_norm": 0.5231800079345703, "learning_rate": 0.0004746310489585036, "loss": 3.1466, "step": 18501 }, { "epoch": 0.91, "grad_norm": 0.5035195350646973, "learning_rate": 0.000474618529859751, "loss": 2.9377, "step": 18502 }, { "epoch": 0.91, "grad_norm": 0.5154784917831421, "learning_rate": 0.000474606010301087, "loss": 3.1582, "step": 18503 }, { "epoch": 0.91, "grad_norm": 0.5330885052680969, "learning_rate": 0.0004745934902825442, "loss": 3.1804, "step": 18504 }, { "epoch": 0.91, "grad_norm": 0.4971103370189667, "learning_rate": 0.0004745809698041558, "loss": 3.2581, "step": 18505 }, { "epoch": 0.91, "grad_norm": 0.521308422088623, "learning_rate": 0.00047456844886595485, "loss": 3.0899, "step": 18506 }, { "epoch": 0.91, "grad_norm": 0.5021144151687622, "learning_rate": 0.00047455592746797404, "loss": 3.2276, "step": 18507 }, { "epoch": 0.91, "grad_norm": 0.5008601546287537, "learning_rate": 0.0004745434056102466, "loss": 3.2421, "step": 18508 }, { "epoch": 0.91, "grad_norm": 0.5506516695022583, "learning_rate": 0.0004745308832928054, "loss": 3.3397, "step": 18509 }, { "epoch": 0.91, "grad_norm": 0.5164962410926819, "learning_rate": 0.0004745183605156835, "loss": 2.8647, "step": 18510 }, { "epoch": 0.91, "grad_norm": 0.6326226592063904, "learning_rate": 0.00047450583727891375, "loss": 3.2928, "step": 18511 }, { "epoch": 0.91, "grad_norm": 0.507206380367279, "learning_rate": 0.0004744933135825292, "loss": 3.1631, "step": 18512 }, { "epoch": 0.91, "grad_norm": 0.5537012815475464, "learning_rate": 0.00047448078942656285, "loss": 3.2021, "step": 18513 }, { "epoch": 0.91, "grad_norm": 0.5219477415084839, "learning_rate": 0.00047446826481104774, "loss": 3.1808, "step": 18514 }, { "epoch": 0.91, "grad_norm": 0.5431067943572998, "learning_rate": 0.00047445573973601673, "loss": 3.2918, "step": 18515 }, { "epoch": 0.91, "grad_norm": 0.5809006690979004, "learning_rate": 0.0004744432142015029, "loss": 3.0758, "step": 18516 }, { "epoch": 0.91, "grad_norm": 0.5215701460838318, "learning_rate": 0.0004744306882075392, "loss": 3.1764, "step": 18517 }, { "epoch": 0.91, "grad_norm": 0.4926910102367401, "learning_rate": 0.0004744181617541587, "loss": 3.2759, "step": 18518 }, { "epoch": 0.91, "grad_norm": 0.5390565991401672, "learning_rate": 0.0004744056348413942, "loss": 3.1481, "step": 18519 }, { "epoch": 0.91, "grad_norm": 0.523172914981842, "learning_rate": 0.00047439310746927896, "loss": 3.0277, "step": 18520 }, { "epoch": 0.91, "grad_norm": 0.5034345984458923, "learning_rate": 0.0004743805796378458, "loss": 3.2691, "step": 18521 }, { "epoch": 0.91, "grad_norm": 0.5197492837905884, "learning_rate": 0.00047436805134712774, "loss": 3.1503, "step": 18522 }, { "epoch": 0.91, "grad_norm": 0.5116248726844788, "learning_rate": 0.00047435552259715775, "loss": 3.1957, "step": 18523 }, { "epoch": 0.91, "grad_norm": 0.5436865091323853, "learning_rate": 0.0004743429933879689, "loss": 3.2398, "step": 18524 }, { "epoch": 0.91, "grad_norm": 0.5367627739906311, "learning_rate": 0.0004743304637195942, "loss": 3.0797, "step": 18525 }, { "epoch": 0.91, "grad_norm": 0.49198198318481445, "learning_rate": 0.0004743179335920666, "loss": 3.2193, "step": 18526 }, { "epoch": 0.91, "grad_norm": 0.5412742495536804, "learning_rate": 0.00047430540300541903, "loss": 3.1761, "step": 18527 }, { "epoch": 0.91, "grad_norm": 0.5427865386009216, "learning_rate": 0.0004742928719596846, "loss": 3.1722, "step": 18528 }, { "epoch": 0.91, "grad_norm": 0.5101169347763062, "learning_rate": 0.00047428034045489636, "loss": 3.0649, "step": 18529 }, { "epoch": 0.91, "grad_norm": 0.5221566557884216, "learning_rate": 0.00047426780849108723, "loss": 3.0671, "step": 18530 }, { "epoch": 0.91, "grad_norm": 0.5130900144577026, "learning_rate": 0.00047425527606829017, "loss": 3.2507, "step": 18531 }, { "epoch": 0.91, "grad_norm": 0.522251307964325, "learning_rate": 0.0004742427431865383, "loss": 3.1376, "step": 18532 }, { "epoch": 0.91, "grad_norm": 0.5170092582702637, "learning_rate": 0.00047423020984586455, "loss": 3.3573, "step": 18533 }, { "epoch": 0.91, "grad_norm": 0.5282794833183289, "learning_rate": 0.0004742176760463019, "loss": 3.0622, "step": 18534 }, { "epoch": 0.91, "grad_norm": 0.5373873114585876, "learning_rate": 0.0004742051417878836, "loss": 2.9846, "step": 18535 }, { "epoch": 0.91, "grad_norm": 0.5340383052825928, "learning_rate": 0.0004741926070706423, "loss": 3.3364, "step": 18536 }, { "epoch": 0.91, "grad_norm": 0.5768246650695801, "learning_rate": 0.0004741800718946113, "loss": 3.0228, "step": 18537 }, { "epoch": 0.91, "grad_norm": 0.508990466594696, "learning_rate": 0.0004741675362598235, "loss": 3.1434, "step": 18538 }, { "epoch": 0.91, "grad_norm": 0.5513434410095215, "learning_rate": 0.00047415500016631187, "loss": 3.1098, "step": 18539 }, { "epoch": 0.91, "grad_norm": 0.5139069557189941, "learning_rate": 0.00047414246361410955, "loss": 3.2203, "step": 18540 }, { "epoch": 0.91, "grad_norm": 0.5511561632156372, "learning_rate": 0.0004741299266032495, "loss": 3.1658, "step": 18541 }, { "epoch": 0.91, "grad_norm": 0.5869661569595337, "learning_rate": 0.0004741173891337647, "loss": 2.964, "step": 18542 }, { "epoch": 0.91, "grad_norm": 0.5423870086669922, "learning_rate": 0.0004741048512056882, "loss": 3.1912, "step": 18543 }, { "epoch": 0.91, "grad_norm": 0.5338352918624878, "learning_rate": 0.00047409231281905296, "loss": 2.918, "step": 18544 }, { "epoch": 0.91, "grad_norm": 0.5400632619857788, "learning_rate": 0.0004740797739738922, "loss": 3.3985, "step": 18545 }, { "epoch": 0.91, "grad_norm": 0.515967607498169, "learning_rate": 0.00047406723467023886, "loss": 3.3357, "step": 18546 }, { "epoch": 0.91, "grad_norm": 0.5285216569900513, "learning_rate": 0.0004740546949081258, "loss": 3.2011, "step": 18547 }, { "epoch": 0.91, "grad_norm": 0.5364177227020264, "learning_rate": 0.0004740421546875862, "loss": 3.1159, "step": 18548 }, { "epoch": 0.91, "grad_norm": 0.5312500596046448, "learning_rate": 0.000474029614008653, "loss": 3.2883, "step": 18549 }, { "epoch": 0.91, "grad_norm": 0.5263493657112122, "learning_rate": 0.0004740170728713594, "loss": 3.2674, "step": 18550 }, { "epoch": 0.91, "grad_norm": 0.49179357290267944, "learning_rate": 0.00047400453127573834, "loss": 3.2257, "step": 18551 }, { "epoch": 0.91, "grad_norm": 0.527242124080658, "learning_rate": 0.0004739919892218228, "loss": 3.1437, "step": 18552 }, { "epoch": 0.91, "grad_norm": 0.5284623503684998, "learning_rate": 0.0004739794467096458, "loss": 3.1903, "step": 18553 }, { "epoch": 0.91, "grad_norm": 0.5200530886650085, "learning_rate": 0.0004739669037392404, "loss": 3.1414, "step": 18554 }, { "epoch": 0.91, "grad_norm": 0.5152423977851868, "learning_rate": 0.0004739543603106397, "loss": 3.1332, "step": 18555 }, { "epoch": 0.91, "grad_norm": 0.5122321844100952, "learning_rate": 0.0004739418164238766, "loss": 3.2215, "step": 18556 }, { "epoch": 0.91, "grad_norm": 0.5178201198577881, "learning_rate": 0.0004739292720789844, "loss": 3.16, "step": 18557 }, { "epoch": 0.91, "grad_norm": 0.5422595143318176, "learning_rate": 0.00047391672727599584, "loss": 3.1908, "step": 18558 }, { "epoch": 0.91, "grad_norm": 0.5113765597343445, "learning_rate": 0.0004739041820149441, "loss": 3.1155, "step": 18559 }, { "epoch": 0.91, "grad_norm": 0.5253166556358337, "learning_rate": 0.00047389163629586235, "loss": 2.9544, "step": 18560 }, { "epoch": 0.91, "grad_norm": 0.5218266248703003, "learning_rate": 0.00047387909011878333, "loss": 2.8984, "step": 18561 }, { "epoch": 0.91, "grad_norm": 0.5512539744377136, "learning_rate": 0.00047386654348374036, "loss": 3.0539, "step": 18562 }, { "epoch": 0.91, "grad_norm": 0.5330885052680969, "learning_rate": 0.0004738539963907664, "loss": 3.091, "step": 18563 }, { "epoch": 0.91, "grad_norm": 0.5763328075408936, "learning_rate": 0.0004738414488398943, "loss": 3.1968, "step": 18564 }, { "epoch": 0.91, "grad_norm": 0.5141280889511108, "learning_rate": 0.0004738289008311575, "loss": 3.1497, "step": 18565 }, { "epoch": 0.91, "grad_norm": 0.5118435025215149, "learning_rate": 0.0004738163523645887, "loss": 2.9679, "step": 18566 }, { "epoch": 0.91, "grad_norm": 0.5282021760940552, "learning_rate": 0.00047380380344022105, "loss": 3.0492, "step": 18567 }, { "epoch": 0.91, "grad_norm": 0.6659740805625916, "learning_rate": 0.0004737912540580877, "loss": 3.1675, "step": 18568 }, { "epoch": 0.91, "grad_norm": 0.520656168460846, "learning_rate": 0.0004737787042182216, "loss": 3.2612, "step": 18569 }, { "epoch": 0.91, "grad_norm": 0.550905168056488, "learning_rate": 0.0004737661539206558, "loss": 3.0811, "step": 18570 }, { "epoch": 0.91, "grad_norm": 0.532606840133667, "learning_rate": 0.0004737536031654234, "loss": 3.0482, "step": 18571 }, { "epoch": 0.91, "grad_norm": 0.5544597506523132, "learning_rate": 0.00047374105195255756, "loss": 3.0428, "step": 18572 }, { "epoch": 0.91, "grad_norm": 0.534093976020813, "learning_rate": 0.0004737285002820912, "loss": 3.176, "step": 18573 }, { "epoch": 0.91, "grad_norm": 0.5330844521522522, "learning_rate": 0.00047371594815405726, "loss": 3.0923, "step": 18574 }, { "epoch": 0.91, "grad_norm": 0.46944698691368103, "learning_rate": 0.00047370339556848906, "loss": 3.0567, "step": 18575 }, { "epoch": 0.91, "grad_norm": 0.5329000949859619, "learning_rate": 0.0004736908425254195, "loss": 3.0776, "step": 18576 }, { "epoch": 0.91, "grad_norm": 0.5113318562507629, "learning_rate": 0.0004736782890248818, "loss": 3.0746, "step": 18577 }, { "epoch": 0.91, "grad_norm": 0.507371723651886, "learning_rate": 0.0004736657350669088, "loss": 3.1737, "step": 18578 }, { "epoch": 0.91, "grad_norm": 0.5299968719482422, "learning_rate": 0.0004736531806515337, "loss": 3.0233, "step": 18579 }, { "epoch": 0.91, "grad_norm": 0.5169973373413086, "learning_rate": 0.0004736406257787895, "loss": 3.1592, "step": 18580 }, { "epoch": 0.91, "grad_norm": 0.5072317719459534, "learning_rate": 0.0004736280704487094, "loss": 3.24, "step": 18581 }, { "epoch": 0.91, "grad_norm": 0.592707097530365, "learning_rate": 0.00047361551466132634, "loss": 3.1434, "step": 18582 }, { "epoch": 0.91, "grad_norm": 0.5373207926750183, "learning_rate": 0.00047360295841667344, "loss": 3.1949, "step": 18583 }, { "epoch": 0.91, "grad_norm": 0.555252194404602, "learning_rate": 0.0004735904017147838, "loss": 3.2368, "step": 18584 }, { "epoch": 0.91, "grad_norm": 0.5238771438598633, "learning_rate": 0.00047357784455569034, "loss": 3.2168, "step": 18585 }, { "epoch": 0.91, "grad_norm": 0.5633851289749146, "learning_rate": 0.0004735652869394263, "loss": 3.4173, "step": 18586 }, { "epoch": 0.91, "grad_norm": 0.5264574289321899, "learning_rate": 0.0004735527288660247, "loss": 3.1726, "step": 18587 }, { "epoch": 0.91, "grad_norm": 0.5027996301651001, "learning_rate": 0.00047354017033551864, "loss": 3.466, "step": 18588 }, { "epoch": 0.91, "grad_norm": 0.5182791948318481, "learning_rate": 0.00047352761134794114, "loss": 3.0939, "step": 18589 }, { "epoch": 0.91, "grad_norm": 0.4975305497646332, "learning_rate": 0.00047351505190332526, "loss": 3.3298, "step": 18590 }, { "epoch": 0.91, "grad_norm": 0.5365167856216431, "learning_rate": 0.0004735024920017042, "loss": 3.0189, "step": 18591 }, { "epoch": 0.91, "grad_norm": 0.5220871567726135, "learning_rate": 0.00047348993164311086, "loss": 3.3163, "step": 18592 }, { "epoch": 0.91, "grad_norm": 0.5201936960220337, "learning_rate": 0.00047347737082757853, "loss": 3.1742, "step": 18593 }, { "epoch": 0.91, "grad_norm": 0.5495591163635254, "learning_rate": 0.0004734648095551402, "loss": 3.2221, "step": 18594 }, { "epoch": 0.91, "grad_norm": 0.504267156124115, "learning_rate": 0.0004734522478258289, "loss": 3.4931, "step": 18595 }, { "epoch": 0.91, "grad_norm": 0.5782771706581116, "learning_rate": 0.0004734396856396778, "loss": 3.1074, "step": 18596 }, { "epoch": 0.91, "grad_norm": 0.523160457611084, "learning_rate": 0.0004734271229967199, "loss": 3.0729, "step": 18597 }, { "epoch": 0.91, "grad_norm": 0.5083732008934021, "learning_rate": 0.0004734145598969884, "loss": 3.2266, "step": 18598 }, { "epoch": 0.91, "grad_norm": 0.538865864276886, "learning_rate": 0.00047340199634051625, "loss": 3.0961, "step": 18599 }, { "epoch": 0.91, "grad_norm": 0.528105616569519, "learning_rate": 0.00047338943232733664, "loss": 3.0448, "step": 18600 }, { "epoch": 0.91, "grad_norm": 0.5887966156005859, "learning_rate": 0.00047337686785748256, "loss": 3.0425, "step": 18601 }, { "epoch": 0.91, "grad_norm": 0.49562567472457886, "learning_rate": 0.0004733643029309873, "loss": 3.3061, "step": 18602 }, { "epoch": 0.91, "grad_norm": 0.5125460028648376, "learning_rate": 0.0004733517375478838, "loss": 2.8714, "step": 18603 }, { "epoch": 0.91, "grad_norm": 0.5145890712738037, "learning_rate": 0.0004733391717082052, "loss": 3.0679, "step": 18604 }, { "epoch": 0.91, "grad_norm": 0.5396844148635864, "learning_rate": 0.00047332660541198453, "loss": 3.1425, "step": 18605 }, { "epoch": 0.91, "grad_norm": 0.5691478252410889, "learning_rate": 0.00047331403865925496, "loss": 3.3351, "step": 18606 }, { "epoch": 0.91, "grad_norm": 0.5248494148254395, "learning_rate": 0.0004733014714500496, "loss": 3.2248, "step": 18607 }, { "epoch": 0.91, "grad_norm": 0.49655720591545105, "learning_rate": 0.0004732889037844014, "loss": 3.0864, "step": 18608 }, { "epoch": 0.91, "grad_norm": 0.5340755581855774, "learning_rate": 0.0004732763356623437, "loss": 3.1839, "step": 18609 }, { "epoch": 0.91, "grad_norm": 0.5292792916297913, "learning_rate": 0.0004732637670839094, "loss": 3.3367, "step": 18610 }, { "epoch": 0.91, "grad_norm": 0.48931747674942017, "learning_rate": 0.0004732511980491318, "loss": 3.4235, "step": 18611 }, { "epoch": 0.91, "grad_norm": 0.5485087037086487, "learning_rate": 0.0004732386285580438, "loss": 3.0873, "step": 18612 }, { "epoch": 0.91, "grad_norm": 0.5549004077911377, "learning_rate": 0.0004732260586106787, "loss": 3.109, "step": 18613 }, { "epoch": 0.91, "grad_norm": 0.5397618412971497, "learning_rate": 0.00047321348820706933, "loss": 3.1704, "step": 18614 }, { "epoch": 0.91, "grad_norm": 0.5148725509643555, "learning_rate": 0.000473200917347249, "loss": 3.1237, "step": 18615 }, { "epoch": 0.91, "grad_norm": 0.49203985929489136, "learning_rate": 0.00047318834603125084, "loss": 3.1485, "step": 18616 }, { "epoch": 0.91, "grad_norm": 0.5316979289054871, "learning_rate": 0.00047317577425910794, "loss": 3.2129, "step": 18617 }, { "epoch": 0.91, "grad_norm": 0.5986040234565735, "learning_rate": 0.00047316320203085333, "loss": 3.1176, "step": 18618 }, { "epoch": 0.91, "grad_norm": 0.5387392044067383, "learning_rate": 0.0004731506293465202, "loss": 3.1098, "step": 18619 }, { "epoch": 0.91, "grad_norm": 0.5144277811050415, "learning_rate": 0.0004731380562061416, "loss": 3.1374, "step": 18620 }, { "epoch": 0.91, "grad_norm": 0.5290771126747131, "learning_rate": 0.0004731254826097507, "loss": 3.1971, "step": 18621 }, { "epoch": 0.91, "grad_norm": 0.5087745785713196, "learning_rate": 0.00047311290855738057, "loss": 3.3343, "step": 18622 }, { "epoch": 0.91, "grad_norm": 0.5245369076728821, "learning_rate": 0.00047310033404906435, "loss": 3.2556, "step": 18623 }, { "epoch": 0.91, "grad_norm": 0.5257542729377747, "learning_rate": 0.00047308775908483526, "loss": 3.2636, "step": 18624 }, { "epoch": 0.91, "grad_norm": 0.50067138671875, "learning_rate": 0.00047307518366472627, "loss": 3.2939, "step": 18625 }, { "epoch": 0.91, "grad_norm": 0.550284743309021, "learning_rate": 0.0004730626077887705, "loss": 3.1894, "step": 18626 }, { "epoch": 0.91, "grad_norm": 0.5422661900520325, "learning_rate": 0.00047305003145700115, "loss": 3.2083, "step": 18627 }, { "epoch": 0.91, "grad_norm": 0.5483496785163879, "learning_rate": 0.00047303745466945133, "loss": 3.029, "step": 18628 }, { "epoch": 0.91, "grad_norm": 0.5778025388717651, "learning_rate": 0.0004730248774261542, "loss": 3.0593, "step": 18629 }, { "epoch": 0.91, "grad_norm": 0.5487713813781738, "learning_rate": 0.0004730122997271428, "loss": 3.2694, "step": 18630 }, { "epoch": 0.91, "grad_norm": 0.5207985043525696, "learning_rate": 0.00047299972157245027, "loss": 3.1083, "step": 18631 }, { "epoch": 0.91, "grad_norm": 0.5357333421707153, "learning_rate": 0.00047298714296210983, "loss": 3.2432, "step": 18632 }, { "epoch": 0.91, "grad_norm": 0.5432522296905518, "learning_rate": 0.00047297456389615446, "loss": 3.461, "step": 18633 }, { "epoch": 0.91, "grad_norm": 0.4886643886566162, "learning_rate": 0.0004729619843746175, "loss": 3.2724, "step": 18634 }, { "epoch": 0.91, "grad_norm": 0.5570962429046631, "learning_rate": 0.00047294940439753197, "loss": 3.1144, "step": 18635 }, { "epoch": 0.91, "grad_norm": 0.559655487537384, "learning_rate": 0.00047293682396493087, "loss": 3.2365, "step": 18636 }, { "epoch": 0.91, "grad_norm": 0.5164363384246826, "learning_rate": 0.0004729242430768475, "loss": 3.0816, "step": 18637 }, { "epoch": 0.91, "grad_norm": 0.5455911159515381, "learning_rate": 0.00047291166173331497, "loss": 3.2254, "step": 18638 }, { "epoch": 0.91, "grad_norm": 0.5552606582641602, "learning_rate": 0.0004728990799343664, "loss": 3.1659, "step": 18639 }, { "epoch": 0.91, "grad_norm": 0.5630795955657959, "learning_rate": 0.00047288649768003493, "loss": 3.2478, "step": 18640 }, { "epoch": 0.91, "grad_norm": 0.5146662592887878, "learning_rate": 0.00047287391497035367, "loss": 3.1473, "step": 18641 }, { "epoch": 0.91, "grad_norm": 0.5582475066184998, "learning_rate": 0.00047286133180535585, "loss": 3.0386, "step": 18642 }, { "epoch": 0.91, "grad_norm": 0.5350977778434753, "learning_rate": 0.00047284874818507444, "loss": 3.1054, "step": 18643 }, { "epoch": 0.91, "grad_norm": 0.5022348165512085, "learning_rate": 0.0004728361641095428, "loss": 3.1659, "step": 18644 }, { "epoch": 0.91, "grad_norm": 0.5127667188644409, "learning_rate": 0.00047282357957879396, "loss": 3.0755, "step": 18645 }, { "epoch": 0.91, "grad_norm": 0.5345514416694641, "learning_rate": 0.000472810994592861, "loss": 3.3378, "step": 18646 }, { "epoch": 0.91, "grad_norm": 0.5057428479194641, "learning_rate": 0.0004727984091517772, "loss": 3.0245, "step": 18647 }, { "epoch": 0.91, "grad_norm": 0.5135453343391418, "learning_rate": 0.0004727858232555756, "loss": 3.1026, "step": 18648 }, { "epoch": 0.91, "grad_norm": 0.539706289768219, "learning_rate": 0.00047277323690428943, "loss": 3.1584, "step": 18649 }, { "epoch": 0.91, "grad_norm": 0.5512105822563171, "learning_rate": 0.00047276065009795184, "loss": 3.2228, "step": 18650 }, { "epoch": 0.91, "grad_norm": 0.4987412095069885, "learning_rate": 0.00047274806283659584, "loss": 3.3146, "step": 18651 }, { "epoch": 0.91, "grad_norm": 0.5345869064331055, "learning_rate": 0.00047273547512025483, "loss": 3.2434, "step": 18652 }, { "epoch": 0.91, "grad_norm": 0.5088310837745667, "learning_rate": 0.0004727228869489617, "loss": 3.2025, "step": 18653 }, { "epoch": 0.91, "grad_norm": 0.5335360765457153, "learning_rate": 0.00047271029832274974, "loss": 3.2062, "step": 18654 }, { "epoch": 0.91, "grad_norm": 0.5184034705162048, "learning_rate": 0.0004726977092416522, "loss": 3.2948, "step": 18655 }, { "epoch": 0.91, "grad_norm": 0.5256913304328918, "learning_rate": 0.00047268511970570203, "loss": 3.186, "step": 18656 }, { "epoch": 0.91, "grad_norm": 0.5344150066375732, "learning_rate": 0.0004726725297149325, "loss": 3.1916, "step": 18657 }, { "epoch": 0.91, "grad_norm": 0.6184183955192566, "learning_rate": 0.0004726599392693768, "loss": 3.3629, "step": 18658 }, { "epoch": 0.91, "grad_norm": 0.49395012855529785, "learning_rate": 0.00047264734836906805, "loss": 3.1883, "step": 18659 }, { "epoch": 0.91, "grad_norm": 0.532318651676178, "learning_rate": 0.00047263475701403933, "loss": 2.9763, "step": 18660 }, { "epoch": 0.91, "grad_norm": 0.6311165690422058, "learning_rate": 0.000472622165204324, "loss": 3.2059, "step": 18661 }, { "epoch": 0.91, "grad_norm": 0.5295420289039612, "learning_rate": 0.000472609572939955, "loss": 3.1236, "step": 18662 }, { "epoch": 0.91, "grad_norm": 0.5523192882537842, "learning_rate": 0.0004725969802209656, "loss": 3.2315, "step": 18663 }, { "epoch": 0.91, "grad_norm": 0.52613365650177, "learning_rate": 0.0004725843870473891, "loss": 3.1807, "step": 18664 }, { "epoch": 0.91, "grad_norm": 0.5176199078559875, "learning_rate": 0.0004725717934192584, "loss": 3.0597, "step": 18665 }, { "epoch": 0.91, "grad_norm": 0.5190128684043884, "learning_rate": 0.00047255919933660695, "loss": 2.7741, "step": 18666 }, { "epoch": 0.91, "grad_norm": 0.5189964175224304, "learning_rate": 0.0004725466047994676, "loss": 3.3417, "step": 18667 }, { "epoch": 0.91, "grad_norm": 0.5286535024642944, "learning_rate": 0.0004725340098078738, "loss": 3.1332, "step": 18668 }, { "epoch": 0.91, "grad_norm": 0.5291597247123718, "learning_rate": 0.00047252141436185857, "loss": 3.0903, "step": 18669 }, { "epoch": 0.91, "grad_norm": 0.5236563682556152, "learning_rate": 0.00047250881846145525, "loss": 3.16, "step": 18670 }, { "epoch": 0.92, "grad_norm": 0.5021608471870422, "learning_rate": 0.0004724962221066969, "loss": 3.2304, "step": 18671 }, { "epoch": 0.92, "grad_norm": 0.5602541565895081, "learning_rate": 0.0004724836252976165, "loss": 3.2892, "step": 18672 }, { "epoch": 0.92, "grad_norm": 0.5403422117233276, "learning_rate": 0.0004724710280342477, "loss": 3.4172, "step": 18673 }, { "epoch": 0.92, "grad_norm": 0.5357560515403748, "learning_rate": 0.0004724584303166232, "loss": 3.4125, "step": 18674 }, { "epoch": 0.92, "grad_norm": 0.5377302765846252, "learning_rate": 0.0004724458321447765, "loss": 3.1649, "step": 18675 }, { "epoch": 0.92, "grad_norm": 0.5543449521064758, "learning_rate": 0.0004724332335187406, "loss": 3.0616, "step": 18676 }, { "epoch": 0.92, "grad_norm": 0.5366581082344055, "learning_rate": 0.0004724206344385487, "loss": 3.1542, "step": 18677 }, { "epoch": 0.92, "grad_norm": 0.5297412276268005, "learning_rate": 0.00047240803490423413, "loss": 3.3427, "step": 18678 }, { "epoch": 0.92, "grad_norm": 0.6091488599777222, "learning_rate": 0.00047239543491582993, "loss": 3.0589, "step": 18679 }, { "epoch": 0.92, "grad_norm": 0.5463923215866089, "learning_rate": 0.0004723828344733694, "loss": 3.1833, "step": 18680 }, { "epoch": 0.92, "grad_norm": 0.5743600726127625, "learning_rate": 0.0004723702335768856, "loss": 3.0632, "step": 18681 }, { "epoch": 0.92, "grad_norm": 0.5193010568618774, "learning_rate": 0.00047235763222641176, "loss": 3.3067, "step": 18682 }, { "epoch": 0.92, "grad_norm": 0.5466593503952026, "learning_rate": 0.0004723450304219811, "loss": 3.1118, "step": 18683 }, { "epoch": 0.92, "grad_norm": 0.5027567148208618, "learning_rate": 0.0004723324281636268, "loss": 3.3245, "step": 18684 }, { "epoch": 0.92, "grad_norm": 0.5612360239028931, "learning_rate": 0.00047231982545138207, "loss": 3.0984, "step": 18685 }, { "epoch": 0.92, "grad_norm": 0.49265792965888977, "learning_rate": 0.0004723072222852801, "loss": 3.2577, "step": 18686 }, { "epoch": 0.92, "grad_norm": 0.5681557059288025, "learning_rate": 0.00047229461866535403, "loss": 3.1238, "step": 18687 }, { "epoch": 0.92, "grad_norm": 0.5673431158065796, "learning_rate": 0.00047228201459163706, "loss": 3.1261, "step": 18688 }, { "epoch": 0.92, "grad_norm": 0.5525531768798828, "learning_rate": 0.00047226941006416246, "loss": 3.3082, "step": 18689 }, { "epoch": 0.92, "grad_norm": 0.5107969641685486, "learning_rate": 0.00047225680508296346, "loss": 3.0155, "step": 18690 }, { "epoch": 0.92, "grad_norm": 0.5525048971176147, "learning_rate": 0.00047224419964807306, "loss": 3.0787, "step": 18691 }, { "epoch": 0.92, "grad_norm": 0.5003828406333923, "learning_rate": 0.00047223159375952466, "loss": 3.253, "step": 18692 }, { "epoch": 0.92, "grad_norm": 0.49926337599754333, "learning_rate": 0.0004722189874173514, "loss": 3.1447, "step": 18693 }, { "epoch": 0.92, "grad_norm": 0.49048912525177, "learning_rate": 0.00047220638062158647, "loss": 3.1531, "step": 18694 }, { "epoch": 0.92, "grad_norm": 0.5253885984420776, "learning_rate": 0.000472193773372263, "loss": 3.2023, "step": 18695 }, { "epoch": 0.92, "grad_norm": 0.5272942185401917, "learning_rate": 0.0004721811656694144, "loss": 3.1818, "step": 18696 }, { "epoch": 0.92, "grad_norm": 0.5017642378807068, "learning_rate": 0.00047216855751307365, "loss": 3.174, "step": 18697 }, { "epoch": 0.92, "grad_norm": 0.5158599615097046, "learning_rate": 0.000472155948903274, "loss": 3.2798, "step": 18698 }, { "epoch": 0.92, "grad_norm": 0.48646584153175354, "learning_rate": 0.00047214333984004885, "loss": 3.1985, "step": 18699 }, { "epoch": 0.92, "grad_norm": 0.5171990990638733, "learning_rate": 0.0004721307303234312, "loss": 3.128, "step": 18700 }, { "epoch": 0.92, "grad_norm": 0.5293672680854797, "learning_rate": 0.0004721181203534543, "loss": 3.1083, "step": 18701 }, { "epoch": 0.92, "grad_norm": 0.5768073797225952, "learning_rate": 0.00047210550993015155, "loss": 3.1399, "step": 18702 }, { "epoch": 0.92, "grad_norm": 0.5409702658653259, "learning_rate": 0.0004720928990535558, "loss": 3.3084, "step": 18703 }, { "epoch": 0.92, "grad_norm": 0.5259953737258911, "learning_rate": 0.0004720802877237006, "loss": 3.1811, "step": 18704 }, { "epoch": 0.92, "grad_norm": 0.5130147337913513, "learning_rate": 0.0004720676759406191, "loss": 2.9815, "step": 18705 }, { "epoch": 0.92, "grad_norm": 0.4972241520881653, "learning_rate": 0.0004720550637043443, "loss": 3.1822, "step": 18706 }, { "epoch": 0.92, "grad_norm": 0.5117876529693604, "learning_rate": 0.00047204245101490966, "loss": 3.1758, "step": 18707 }, { "epoch": 0.92, "grad_norm": 0.5374881625175476, "learning_rate": 0.0004720298378723482, "loss": 3.3106, "step": 18708 }, { "epoch": 0.92, "grad_norm": 0.5357831716537476, "learning_rate": 0.0004720172242766934, "loss": 3.1024, "step": 18709 }, { "epoch": 0.92, "grad_norm": 0.5341154932975769, "learning_rate": 0.0004720046102279783, "loss": 3.0437, "step": 18710 }, { "epoch": 0.92, "grad_norm": 0.541704535484314, "learning_rate": 0.00047199199572623617, "loss": 3.3368, "step": 18711 }, { "epoch": 0.92, "grad_norm": 0.5049653053283691, "learning_rate": 0.0004719793807715002, "loss": 3.1156, "step": 18712 }, { "epoch": 0.92, "grad_norm": 0.517701268196106, "learning_rate": 0.0004719667653638036, "loss": 3.1901, "step": 18713 }, { "epoch": 0.92, "grad_norm": 0.547834038734436, "learning_rate": 0.00047195414950317965, "loss": 3.1269, "step": 18714 }, { "epoch": 0.92, "grad_norm": 0.5490109920501709, "learning_rate": 0.0004719415331896616, "loss": 3.1857, "step": 18715 }, { "epoch": 0.92, "grad_norm": 0.5507968068122864, "learning_rate": 0.0004719289164232827, "loss": 3.3192, "step": 18716 }, { "epoch": 0.92, "grad_norm": 0.5235647559165955, "learning_rate": 0.000471916299204076, "loss": 3.0702, "step": 18717 }, { "epoch": 0.92, "grad_norm": 0.524595320224762, "learning_rate": 0.0004719036815320749, "loss": 3.0391, "step": 18718 }, { "epoch": 0.92, "grad_norm": 0.5237332582473755, "learning_rate": 0.0004718910634073126, "loss": 3.1249, "step": 18719 }, { "epoch": 0.92, "grad_norm": 0.541515052318573, "learning_rate": 0.0004718784448298223, "loss": 3.0022, "step": 18720 }, { "epoch": 0.92, "grad_norm": 0.5793763995170593, "learning_rate": 0.0004718658257996373, "loss": 3.1743, "step": 18721 }, { "epoch": 0.92, "grad_norm": 0.5258360505104065, "learning_rate": 0.00047185320631679074, "loss": 3.2977, "step": 18722 }, { "epoch": 0.92, "grad_norm": 0.5656243562698364, "learning_rate": 0.0004718405863813159, "loss": 3.304, "step": 18723 }, { "epoch": 0.92, "grad_norm": 0.5465549826622009, "learning_rate": 0.0004718279659932461, "loss": 3.3521, "step": 18724 }, { "epoch": 0.92, "grad_norm": 0.5304923057556152, "learning_rate": 0.0004718153451526144, "loss": 3.1136, "step": 18725 }, { "epoch": 0.92, "grad_norm": 0.5266689658164978, "learning_rate": 0.00047180272385945426, "loss": 3.258, "step": 18726 }, { "epoch": 0.92, "grad_norm": 0.5561476945877075, "learning_rate": 0.0004717901021137988, "loss": 3.1804, "step": 18727 }, { "epoch": 0.92, "grad_norm": 0.5388331413269043, "learning_rate": 0.0004717774799156812, "loss": 3.2172, "step": 18728 }, { "epoch": 0.92, "grad_norm": 0.5558831691741943, "learning_rate": 0.00047176485726513486, "loss": 3.0524, "step": 18729 }, { "epoch": 0.92, "grad_norm": 0.5594724416732788, "learning_rate": 0.00047175223416219287, "loss": 3.1281, "step": 18730 }, { "epoch": 0.92, "grad_norm": 0.5364587306976318, "learning_rate": 0.00047173961060688856, "loss": 3.0472, "step": 18731 }, { "epoch": 0.92, "grad_norm": 0.4832528531551361, "learning_rate": 0.0004717269865992552, "loss": 3.1427, "step": 18732 }, { "epoch": 0.92, "grad_norm": 0.5160096287727356, "learning_rate": 0.00047171436213932594, "loss": 3.2637, "step": 18733 }, { "epoch": 0.92, "grad_norm": 0.5533026456832886, "learning_rate": 0.0004717017372271341, "loss": 3.3568, "step": 18734 }, { "epoch": 0.92, "grad_norm": 0.6159023642539978, "learning_rate": 0.000471689111862713, "loss": 3.4341, "step": 18735 }, { "epoch": 0.92, "grad_norm": 0.5422484874725342, "learning_rate": 0.00047167648604609587, "loss": 3.0425, "step": 18736 }, { "epoch": 0.92, "grad_norm": 0.5426680445671082, "learning_rate": 0.00047166385977731584, "loss": 3.0185, "step": 18737 }, { "epoch": 0.92, "grad_norm": 0.5291516780853271, "learning_rate": 0.0004716512330564062, "loss": 3.0756, "step": 18738 }, { "epoch": 0.92, "grad_norm": 0.5165324211120605, "learning_rate": 0.0004716386058834003, "loss": 3.1437, "step": 18739 }, { "epoch": 0.92, "grad_norm": 0.5441447496414185, "learning_rate": 0.00047162597825833126, "loss": 3.1802, "step": 18740 }, { "epoch": 0.92, "grad_norm": 0.5122418403625488, "learning_rate": 0.00047161335018123255, "loss": 3.0825, "step": 18741 }, { "epoch": 0.92, "grad_norm": 0.5164358019828796, "learning_rate": 0.0004716007216521372, "loss": 3.1459, "step": 18742 }, { "epoch": 0.92, "grad_norm": 0.5052106976509094, "learning_rate": 0.0004715880926710787, "loss": 3.1541, "step": 18743 }, { "epoch": 0.92, "grad_norm": 0.51518315076828, "learning_rate": 0.0004715754632380901, "loss": 3.1712, "step": 18744 }, { "epoch": 0.92, "grad_norm": 0.5447625517845154, "learning_rate": 0.0004715628333532047, "loss": 3.2673, "step": 18745 }, { "epoch": 0.92, "grad_norm": 0.5232028961181641, "learning_rate": 0.00047155020301645596, "loss": 3.2926, "step": 18746 }, { "epoch": 0.92, "grad_norm": 0.5608052015304565, "learning_rate": 0.0004715375722278769, "loss": 3.2625, "step": 18747 }, { "epoch": 0.92, "grad_norm": 0.5659303665161133, "learning_rate": 0.0004715249409875009, "loss": 3.1281, "step": 18748 }, { "epoch": 0.92, "grad_norm": 0.49152451753616333, "learning_rate": 0.0004715123092953612, "loss": 3.1931, "step": 18749 }, { "epoch": 0.92, "grad_norm": 0.5154252052307129, "learning_rate": 0.00047149967715149114, "loss": 3.0923, "step": 18750 }, { "epoch": 0.92, "grad_norm": 0.5097779035568237, "learning_rate": 0.00047148704455592387, "loss": 2.9026, "step": 18751 }, { "epoch": 0.92, "grad_norm": 0.5418404936790466, "learning_rate": 0.00047147441150869273, "loss": 2.9388, "step": 18752 }, { "epoch": 0.92, "grad_norm": 0.5441518425941467, "learning_rate": 0.00047146177800983104, "loss": 3.1708, "step": 18753 }, { "epoch": 0.92, "grad_norm": 0.5178873538970947, "learning_rate": 0.00047144914405937194, "loss": 3.2744, "step": 18754 }, { "epoch": 0.92, "grad_norm": 0.5090039968490601, "learning_rate": 0.000471436509657349, "loss": 3.1552, "step": 18755 }, { "epoch": 0.92, "grad_norm": 0.5149325728416443, "learning_rate": 0.000471423874803795, "loss": 3.2349, "step": 18756 }, { "epoch": 0.92, "grad_norm": 0.5354869961738586, "learning_rate": 0.00047141123949874373, "loss": 3.3082, "step": 18757 }, { "epoch": 0.92, "grad_norm": 0.562247633934021, "learning_rate": 0.00047139860374222813, "loss": 3.2232, "step": 18758 }, { "epoch": 0.92, "grad_norm": 0.5432979464530945, "learning_rate": 0.00047138596753428164, "loss": 3.2849, "step": 18759 }, { "epoch": 0.92, "grad_norm": 0.5252031683921814, "learning_rate": 0.00047137333087493745, "loss": 3.0496, "step": 18760 }, { "epoch": 0.92, "grad_norm": 0.5139029622077942, "learning_rate": 0.00047136069376422894, "loss": 3.0417, "step": 18761 }, { "epoch": 0.92, "grad_norm": 0.49422696232795715, "learning_rate": 0.00047134805620218935, "loss": 3.0737, "step": 18762 }, { "epoch": 0.92, "grad_norm": 0.548893928527832, "learning_rate": 0.0004713354181888519, "loss": 3.2133, "step": 18763 }, { "epoch": 0.92, "grad_norm": 0.534731924533844, "learning_rate": 0.00047132277972425, "loss": 3.1941, "step": 18764 }, { "epoch": 0.92, "grad_norm": 0.5220165252685547, "learning_rate": 0.00047131014080841675, "loss": 3.1918, "step": 18765 }, { "epoch": 0.92, "grad_norm": 0.5755146145820618, "learning_rate": 0.00047129750144138567, "loss": 3.1908, "step": 18766 }, { "epoch": 0.92, "grad_norm": 0.508004903793335, "learning_rate": 0.0004712848616231899, "loss": 3.2737, "step": 18767 }, { "epoch": 0.92, "grad_norm": 0.5089250802993774, "learning_rate": 0.0004712722213538628, "loss": 2.9431, "step": 18768 }, { "epoch": 0.92, "grad_norm": 0.573453962802887, "learning_rate": 0.0004712595806334376, "loss": 3.064, "step": 18769 }, { "epoch": 0.92, "grad_norm": 0.5167307257652283, "learning_rate": 0.0004712469394619476, "loss": 3.0751, "step": 18770 }, { "epoch": 0.92, "grad_norm": 0.5273975729942322, "learning_rate": 0.00047123429783942614, "loss": 3.1842, "step": 18771 }, { "epoch": 0.92, "grad_norm": 0.5206418633460999, "learning_rate": 0.00047122165576590655, "loss": 3.0887, "step": 18772 }, { "epoch": 0.92, "grad_norm": 0.5117706656455994, "learning_rate": 0.00047120901324142206, "loss": 3.3396, "step": 18773 }, { "epoch": 0.92, "grad_norm": 0.5400931239128113, "learning_rate": 0.0004711963702660059, "loss": 3.1191, "step": 18774 }, { "epoch": 0.92, "grad_norm": 0.5695016384124756, "learning_rate": 0.0004711837268396915, "loss": 3.0463, "step": 18775 }, { "epoch": 0.92, "grad_norm": 0.5119388699531555, "learning_rate": 0.000471171082962512, "loss": 3.3504, "step": 18776 }, { "epoch": 0.92, "grad_norm": 0.5598418712615967, "learning_rate": 0.000471158438634501, "loss": 3.1825, "step": 18777 }, { "epoch": 0.92, "grad_norm": 0.5664933919906616, "learning_rate": 0.0004711457938556915, "loss": 3.1576, "step": 18778 }, { "epoch": 0.92, "grad_norm": 0.5691736340522766, "learning_rate": 0.000471133148626117, "loss": 3.0413, "step": 18779 }, { "epoch": 0.92, "grad_norm": 0.5121724009513855, "learning_rate": 0.00047112050294581065, "loss": 3.4054, "step": 18780 }, { "epoch": 0.92, "grad_norm": 0.5062749981880188, "learning_rate": 0.0004711078568148058, "loss": 3.1947, "step": 18781 }, { "epoch": 0.92, "grad_norm": 0.5275105237960815, "learning_rate": 0.00047109521023313585, "loss": 3.1643, "step": 18782 }, { "epoch": 0.92, "grad_norm": 0.5385158061981201, "learning_rate": 0.000471082563200834, "loss": 3.0541, "step": 18783 }, { "epoch": 0.92, "grad_norm": 0.5302549600601196, "learning_rate": 0.00047106991571793357, "loss": 2.8629, "step": 18784 }, { "epoch": 0.92, "grad_norm": 0.5567852258682251, "learning_rate": 0.0004710572677844679, "loss": 3.1464, "step": 18785 }, { "epoch": 0.92, "grad_norm": 0.5577290058135986, "learning_rate": 0.0004710446194004704, "loss": 3.2132, "step": 18786 }, { "epoch": 0.92, "grad_norm": 0.516573965549469, "learning_rate": 0.0004710319705659744, "loss": 3.3098, "step": 18787 }, { "epoch": 0.92, "grad_norm": 0.5409716367721558, "learning_rate": 0.0004710193212810129, "loss": 3.0126, "step": 18788 }, { "epoch": 0.92, "grad_norm": 0.5368973612785339, "learning_rate": 0.00047100667154561946, "loss": 3.2313, "step": 18789 }, { "epoch": 0.92, "grad_norm": 0.48436570167541504, "learning_rate": 0.0004709940213598274, "loss": 3.1132, "step": 18790 }, { "epoch": 0.92, "grad_norm": 0.5413197875022888, "learning_rate": 0.00047098137072366995, "loss": 3.1343, "step": 18791 }, { "epoch": 0.92, "grad_norm": 0.530865490436554, "learning_rate": 0.00047096871963718046, "loss": 3.0911, "step": 18792 }, { "epoch": 0.92, "grad_norm": 0.5218861699104309, "learning_rate": 0.00047095606810039237, "loss": 3.2357, "step": 18793 }, { "epoch": 0.92, "grad_norm": 0.5184247493743896, "learning_rate": 0.00047094341611333886, "loss": 3.1619, "step": 18794 }, { "epoch": 0.92, "grad_norm": 0.5146009922027588, "learning_rate": 0.00047093076367605316, "loss": 3.2411, "step": 18795 }, { "epoch": 0.92, "grad_norm": 0.5616743564605713, "learning_rate": 0.0004709181107885689, "loss": 3.233, "step": 18796 }, { "epoch": 0.92, "grad_norm": 0.5262179970741272, "learning_rate": 0.0004709054574509192, "loss": 3.3252, "step": 18797 }, { "epoch": 0.92, "grad_norm": 0.5056144595146179, "learning_rate": 0.0004708928036631374, "loss": 3.1111, "step": 18798 }, { "epoch": 0.92, "grad_norm": 0.5371179580688477, "learning_rate": 0.00047088014942525675, "loss": 3.1897, "step": 18799 }, { "epoch": 0.92, "grad_norm": 0.5067994594573975, "learning_rate": 0.0004708674947373108, "loss": 3.3786, "step": 18800 }, { "epoch": 0.92, "grad_norm": 0.5345014929771423, "learning_rate": 0.0004708548395993326, "loss": 3.2104, "step": 18801 }, { "epoch": 0.92, "grad_norm": 0.5636339783668518, "learning_rate": 0.00047084218401135576, "loss": 3.3303, "step": 18802 }, { "epoch": 0.92, "grad_norm": 0.4965422749519348, "learning_rate": 0.0004708295279734135, "loss": 3.1805, "step": 18803 }, { "epoch": 0.92, "grad_norm": 0.533908486366272, "learning_rate": 0.0004708168714855391, "loss": 3.1346, "step": 18804 }, { "epoch": 0.92, "grad_norm": 0.512013852596283, "learning_rate": 0.0004708042145477659, "loss": 3.1826, "step": 18805 }, { "epoch": 0.92, "grad_norm": 0.5449269413948059, "learning_rate": 0.0004707915571601273, "loss": 3.1899, "step": 18806 }, { "epoch": 0.92, "grad_norm": 0.5012300610542297, "learning_rate": 0.0004707788993226567, "loss": 3.4398, "step": 18807 }, { "epoch": 0.92, "grad_norm": 0.5492137670516968, "learning_rate": 0.00047076624103538725, "loss": 3.1645, "step": 18808 }, { "epoch": 0.92, "grad_norm": 0.5013618469238281, "learning_rate": 0.00047075358229835236, "loss": 3.1924, "step": 18809 }, { "epoch": 0.92, "grad_norm": 0.5345459580421448, "learning_rate": 0.00047074092311158536, "loss": 3.0099, "step": 18810 }, { "epoch": 0.92, "grad_norm": 0.5092979669570923, "learning_rate": 0.0004707282634751197, "loss": 3.1141, "step": 18811 }, { "epoch": 0.92, "grad_norm": 0.5068957805633545, "learning_rate": 0.0004707156033889887, "loss": 3.3472, "step": 18812 }, { "epoch": 0.92, "grad_norm": 0.5265266299247742, "learning_rate": 0.0004707029428532256, "loss": 2.9948, "step": 18813 }, { "epoch": 0.92, "grad_norm": 0.5371838808059692, "learning_rate": 0.00047069028186786386, "loss": 3.2557, "step": 18814 }, { "epoch": 0.92, "grad_norm": 0.5402524471282959, "learning_rate": 0.0004706776204329367, "loss": 3.1617, "step": 18815 }, { "epoch": 0.92, "grad_norm": 0.5330641269683838, "learning_rate": 0.0004706649585484775, "loss": 3.1771, "step": 18816 }, { "epoch": 0.92, "grad_norm": 0.5558748841285706, "learning_rate": 0.0004706522962145197, "loss": 3.1897, "step": 18817 }, { "epoch": 0.92, "grad_norm": 0.5087716579437256, "learning_rate": 0.0004706396334310966, "loss": 3.1177, "step": 18818 }, { "epoch": 0.92, "grad_norm": 0.5257171392440796, "learning_rate": 0.00047062697019824155, "loss": 3.0529, "step": 18819 }, { "epoch": 0.92, "grad_norm": 0.5224782228469849, "learning_rate": 0.00047061430651598783, "loss": 3.3477, "step": 18820 }, { "epoch": 0.92, "grad_norm": 0.5466150641441345, "learning_rate": 0.0004706016423843689, "loss": 2.9722, "step": 18821 }, { "epoch": 0.92, "grad_norm": 0.5087209939956665, "learning_rate": 0.0004705889778034181, "loss": 3.4086, "step": 18822 }, { "epoch": 0.92, "grad_norm": 0.5442378520965576, "learning_rate": 0.00047057631277316875, "loss": 3.4454, "step": 18823 }, { "epoch": 0.92, "grad_norm": 0.5326034426689148, "learning_rate": 0.00047056364729365424, "loss": 3.2417, "step": 18824 }, { "epoch": 0.92, "grad_norm": 0.4734261929988861, "learning_rate": 0.0004705509813649079, "loss": 3.2332, "step": 18825 }, { "epoch": 0.92, "grad_norm": 0.543708324432373, "learning_rate": 0.0004705383149869631, "loss": 3.2303, "step": 18826 }, { "epoch": 0.92, "grad_norm": 0.5420598983764648, "learning_rate": 0.00047052564815985317, "loss": 3.2229, "step": 18827 }, { "epoch": 0.92, "grad_norm": 0.543842077255249, "learning_rate": 0.00047051298088361155, "loss": 3.2, "step": 18828 }, { "epoch": 0.92, "grad_norm": 0.511565089225769, "learning_rate": 0.00047050031315827156, "loss": 3.1562, "step": 18829 }, { "epoch": 0.92, "grad_norm": 0.5324139595031738, "learning_rate": 0.0004704876449838665, "loss": 3.0808, "step": 18830 }, { "epoch": 0.92, "grad_norm": 0.5224776268005371, "learning_rate": 0.00047047497636042976, "loss": 3.131, "step": 18831 }, { "epoch": 0.92, "grad_norm": 0.525739312171936, "learning_rate": 0.00047046230728799485, "loss": 3.2305, "step": 18832 }, { "epoch": 0.92, "grad_norm": 0.5212183594703674, "learning_rate": 0.00047044963776659497, "loss": 3.0373, "step": 18833 }, { "epoch": 0.92, "grad_norm": 0.5085528492927551, "learning_rate": 0.0004704369677962636, "loss": 3.235, "step": 18834 }, { "epoch": 0.92, "grad_norm": 0.5560317635536194, "learning_rate": 0.0004704242973770339, "loss": 3.2723, "step": 18835 }, { "epoch": 0.92, "grad_norm": 0.48996028304100037, "learning_rate": 0.00047041162650893957, "loss": 3.139, "step": 18836 }, { "epoch": 0.92, "grad_norm": 0.5113476514816284, "learning_rate": 0.0004703989551920138, "loss": 3.1393, "step": 18837 }, { "epoch": 0.92, "grad_norm": 0.5235118865966797, "learning_rate": 0.0004703862834262899, "loss": 3.1353, "step": 18838 }, { "epoch": 0.92, "grad_norm": 0.523135244846344, "learning_rate": 0.0004703736112118014, "loss": 3.1062, "step": 18839 }, { "epoch": 0.92, "grad_norm": 0.5716493725776672, "learning_rate": 0.0004703609385485815, "loss": 3.2806, "step": 18840 }, { "epoch": 0.92, "grad_norm": 0.4963917136192322, "learning_rate": 0.00047034826543666374, "loss": 3.0589, "step": 18841 }, { "epoch": 0.92, "grad_norm": 0.54476398229599, "learning_rate": 0.0004703355918760814, "loss": 3.0997, "step": 18842 }, { "epoch": 0.92, "grad_norm": 0.48611146211624146, "learning_rate": 0.0004703229178668679, "loss": 3.0625, "step": 18843 }, { "epoch": 0.92, "grad_norm": 0.5003273487091064, "learning_rate": 0.0004703102434090566, "loss": 3.0927, "step": 18844 }, { "epoch": 0.92, "grad_norm": 0.5381352305412292, "learning_rate": 0.00047029756850268097, "loss": 2.7574, "step": 18845 }, { "epoch": 0.92, "grad_norm": 0.5133535265922546, "learning_rate": 0.00047028489314777423, "loss": 3.2066, "step": 18846 }, { "epoch": 0.92, "grad_norm": 0.4886670708656311, "learning_rate": 0.0004702722173443699, "loss": 2.9163, "step": 18847 }, { "epoch": 0.92, "grad_norm": 0.513353705406189, "learning_rate": 0.0004702595410925013, "loss": 3.2403, "step": 18848 }, { "epoch": 0.92, "grad_norm": 0.5126110911369324, "learning_rate": 0.00047024686439220187, "loss": 3.1207, "step": 18849 }, { "epoch": 0.92, "grad_norm": 0.525732696056366, "learning_rate": 0.0004702341872435049, "loss": 3.2541, "step": 18850 }, { "epoch": 0.92, "grad_norm": 0.5350430607795715, "learning_rate": 0.0004702215096464439, "loss": 3.1009, "step": 18851 }, { "epoch": 0.92, "grad_norm": 0.5231349468231201, "learning_rate": 0.0004702088316010522, "loss": 2.9764, "step": 18852 }, { "epoch": 0.92, "grad_norm": 0.5163970589637756, "learning_rate": 0.00047019615310736314, "loss": 2.9218, "step": 18853 }, { "epoch": 0.92, "grad_norm": 0.5017314553260803, "learning_rate": 0.00047018347416541027, "loss": 3.249, "step": 18854 }, { "epoch": 0.92, "grad_norm": 0.5135806798934937, "learning_rate": 0.00047017079477522676, "loss": 3.0667, "step": 18855 }, { "epoch": 0.92, "grad_norm": 0.4848870635032654, "learning_rate": 0.0004701581149368463, "loss": 3.2183, "step": 18856 }, { "epoch": 0.92, "grad_norm": 0.5242249369621277, "learning_rate": 0.0004701454346503019, "loss": 3.0505, "step": 18857 }, { "epoch": 0.92, "grad_norm": 0.5194023847579956, "learning_rate": 0.00047013275391562736, "loss": 3.2237, "step": 18858 }, { "epoch": 0.92, "grad_norm": 0.5167834162712097, "learning_rate": 0.00047012007273285574, "loss": 3.0208, "step": 18859 }, { "epoch": 0.92, "grad_norm": 0.5210264921188354, "learning_rate": 0.0004701073911020207, "loss": 3.0097, "step": 18860 }, { "epoch": 0.92, "grad_norm": 0.5134527087211609, "learning_rate": 0.0004700947090231555, "loss": 3.0335, "step": 18861 }, { "epoch": 0.92, "grad_norm": 0.5310779213905334, "learning_rate": 0.00047008202649629347, "loss": 3.2074, "step": 18862 }, { "epoch": 0.92, "grad_norm": 0.5398174524307251, "learning_rate": 0.0004700693435214682, "loss": 2.9155, "step": 18863 }, { "epoch": 0.92, "grad_norm": 0.5127217173576355, "learning_rate": 0.000470056660098713, "loss": 3.0767, "step": 18864 }, { "epoch": 0.92, "grad_norm": 0.4852171838283539, "learning_rate": 0.00047004397622806135, "loss": 3.2109, "step": 18865 }, { "epoch": 0.92, "grad_norm": 0.514201283454895, "learning_rate": 0.0004700312919095465, "loss": 3.2189, "step": 18866 }, { "epoch": 0.92, "grad_norm": 0.4986250698566437, "learning_rate": 0.000470018607143202, "loss": 3.2653, "step": 18867 }, { "epoch": 0.92, "grad_norm": 0.531745195388794, "learning_rate": 0.00047000592192906115, "loss": 3.1656, "step": 18868 }, { "epoch": 0.92, "grad_norm": 0.5156833529472351, "learning_rate": 0.00046999323626715746, "loss": 3.1795, "step": 18869 }, { "epoch": 0.92, "grad_norm": 0.5258499383926392, "learning_rate": 0.00046998055015752435, "loss": 3.031, "step": 18870 }, { "epoch": 0.92, "grad_norm": 0.4953664541244507, "learning_rate": 0.00046996786360019516, "loss": 3.1802, "step": 18871 }, { "epoch": 0.92, "grad_norm": 0.5456046462059021, "learning_rate": 0.00046995517659520323, "loss": 3.224, "step": 18872 }, { "epoch": 0.92, "grad_norm": 0.5381906628608704, "learning_rate": 0.00046994248914258217, "loss": 3.1333, "step": 18873 }, { "epoch": 0.92, "grad_norm": 0.5225964784622192, "learning_rate": 0.00046992980124236527, "loss": 3.2528, "step": 18874 }, { "epoch": 0.93, "grad_norm": 0.492789089679718, "learning_rate": 0.00046991711289458597, "loss": 2.9492, "step": 18875 }, { "epoch": 0.93, "grad_norm": 0.5383958220481873, "learning_rate": 0.00046990442409927777, "loss": 3.2909, "step": 18876 }, { "epoch": 0.93, "grad_norm": 0.5192603468894958, "learning_rate": 0.0004698917348564739, "loss": 3.021, "step": 18877 }, { "epoch": 0.93, "grad_norm": 0.5470327138900757, "learning_rate": 0.000469879045166208, "loss": 3.0586, "step": 18878 }, { "epoch": 0.93, "grad_norm": 0.5224716067314148, "learning_rate": 0.0004698663550285133, "loss": 2.977, "step": 18879 }, { "epoch": 0.93, "grad_norm": 0.5188048481941223, "learning_rate": 0.00046985366444342335, "loss": 3.1992, "step": 18880 }, { "epoch": 0.93, "grad_norm": 0.5498374700546265, "learning_rate": 0.0004698409734109716, "loss": 2.9874, "step": 18881 }, { "epoch": 0.93, "grad_norm": 0.5041355490684509, "learning_rate": 0.00046982828193119127, "loss": 3.0512, "step": 18882 }, { "epoch": 0.93, "grad_norm": 0.5342227220535278, "learning_rate": 0.00046981559000411604, "loss": 3.1102, "step": 18883 }, { "epoch": 0.93, "grad_norm": 0.5024885535240173, "learning_rate": 0.0004698028976297792, "loss": 3.1677, "step": 18884 }, { "epoch": 0.93, "grad_norm": 0.5300989151000977, "learning_rate": 0.0004697902048082142, "loss": 3.1714, "step": 18885 }, { "epoch": 0.93, "grad_norm": 0.5051508545875549, "learning_rate": 0.00046977751153945457, "loss": 2.9421, "step": 18886 }, { "epoch": 0.93, "grad_norm": 0.5393108129501343, "learning_rate": 0.0004697648178235335, "loss": 3.0567, "step": 18887 }, { "epoch": 0.93, "grad_norm": 0.5067991614341736, "learning_rate": 0.00046975212366048467, "loss": 3.2442, "step": 18888 }, { "epoch": 0.93, "grad_norm": 0.5369142889976501, "learning_rate": 0.00046973942905034137, "loss": 3.0943, "step": 18889 }, { "epoch": 0.93, "grad_norm": 0.5115844011306763, "learning_rate": 0.0004697267339931371, "loss": 3.1095, "step": 18890 }, { "epoch": 0.93, "grad_norm": 0.48783326148986816, "learning_rate": 0.0004697140384889053, "loss": 3.1938, "step": 18891 }, { "epoch": 0.93, "grad_norm": 0.5423147678375244, "learning_rate": 0.00046970134253767937, "loss": 3.1264, "step": 18892 }, { "epoch": 0.93, "grad_norm": 0.5428636074066162, "learning_rate": 0.00046968864613949267, "loss": 3.3557, "step": 18893 }, { "epoch": 0.93, "grad_norm": 0.5630121827125549, "learning_rate": 0.00046967594929437885, "loss": 2.9878, "step": 18894 }, { "epoch": 0.93, "grad_norm": 0.5453979969024658, "learning_rate": 0.00046966325200237117, "loss": 3.0636, "step": 18895 }, { "epoch": 0.93, "grad_norm": 0.5532796382904053, "learning_rate": 0.0004696505542635032, "loss": 3.1966, "step": 18896 }, { "epoch": 0.93, "grad_norm": 0.5100991725921631, "learning_rate": 0.00046963785607780827, "loss": 3.0472, "step": 18897 }, { "epoch": 0.93, "grad_norm": 0.5884232521057129, "learning_rate": 0.0004696251574453198, "loss": 2.9916, "step": 18898 }, { "epoch": 0.93, "grad_norm": 0.5338824391365051, "learning_rate": 0.0004696124583660715, "loss": 3.2273, "step": 18899 }, { "epoch": 0.93, "grad_norm": 0.5424486398696899, "learning_rate": 0.00046959975884009646, "loss": 3.3073, "step": 18900 }, { "epoch": 0.93, "grad_norm": 0.5141465067863464, "learning_rate": 0.0004695870588674284, "loss": 3.2295, "step": 18901 }, { "epoch": 0.93, "grad_norm": 0.5177417993545532, "learning_rate": 0.00046957435844810063, "loss": 3.2054, "step": 18902 }, { "epoch": 0.93, "grad_norm": 0.5106486082077026, "learning_rate": 0.0004695616575821466, "loss": 3.1774, "step": 18903 }, { "epoch": 0.93, "grad_norm": 0.5100563168525696, "learning_rate": 0.0004695489562695998, "loss": 3.0875, "step": 18904 }, { "epoch": 0.93, "grad_norm": 0.524345338344574, "learning_rate": 0.0004695362545104938, "loss": 3.2862, "step": 18905 }, { "epoch": 0.93, "grad_norm": 0.5443475246429443, "learning_rate": 0.0004695235523048618, "loss": 2.8944, "step": 18906 }, { "epoch": 0.93, "grad_norm": 0.5217635631561279, "learning_rate": 0.00046951084965273746, "loss": 3.171, "step": 18907 }, { "epoch": 0.93, "grad_norm": 0.5589973330497742, "learning_rate": 0.00046949814655415406, "loss": 3.0554, "step": 18908 }, { "epoch": 0.93, "grad_norm": 0.49502140283584595, "learning_rate": 0.0004694854430091452, "loss": 3.2162, "step": 18909 }, { "epoch": 0.93, "grad_norm": 0.5253203511238098, "learning_rate": 0.00046947273901774443, "loss": 3.2409, "step": 18910 }, { "epoch": 0.93, "grad_norm": 0.508833646774292, "learning_rate": 0.000469460034579985, "loss": 3.0954, "step": 18911 }, { "epoch": 0.93, "grad_norm": 0.5880780816078186, "learning_rate": 0.0004694473296959004, "loss": 3.1979, "step": 18912 }, { "epoch": 0.93, "grad_norm": 0.5805985331535339, "learning_rate": 0.0004694346243655242, "loss": 3.0149, "step": 18913 }, { "epoch": 0.93, "grad_norm": 0.5318664908409119, "learning_rate": 0.00046942191858888977, "loss": 3.1204, "step": 18914 }, { "epoch": 0.93, "grad_norm": 0.5232014060020447, "learning_rate": 0.00046940921236603064, "loss": 3.1569, "step": 18915 }, { "epoch": 0.93, "grad_norm": 0.5750177502632141, "learning_rate": 0.00046939650569698024, "loss": 2.9964, "step": 18916 }, { "epoch": 0.93, "grad_norm": 0.5180473327636719, "learning_rate": 0.00046938379858177203, "loss": 3.063, "step": 18917 }, { "epoch": 0.93, "grad_norm": 0.5332753658294678, "learning_rate": 0.0004693710910204395, "loss": 3.2211, "step": 18918 }, { "epoch": 0.93, "grad_norm": 0.5309212803840637, "learning_rate": 0.0004693583830130162, "loss": 3.2455, "step": 18919 }, { "epoch": 0.93, "grad_norm": 0.530160129070282, "learning_rate": 0.0004693456745595354, "loss": 2.9723, "step": 18920 }, { "epoch": 0.93, "grad_norm": 0.5082368850708008, "learning_rate": 0.00046933296566003076, "loss": 3.102, "step": 18921 }, { "epoch": 0.93, "grad_norm": 0.5547425746917725, "learning_rate": 0.00046932025631453564, "loss": 2.9199, "step": 18922 }, { "epoch": 0.93, "grad_norm": 0.5218966603279114, "learning_rate": 0.0004693075465230835, "loss": 3.0292, "step": 18923 }, { "epoch": 0.93, "grad_norm": 0.551936149597168, "learning_rate": 0.00046929483628570793, "loss": 3.1246, "step": 18924 }, { "epoch": 0.93, "grad_norm": 0.516350269317627, "learning_rate": 0.0004692821256024423, "loss": 3.2375, "step": 18925 }, { "epoch": 0.93, "grad_norm": 0.5745588541030884, "learning_rate": 0.00046926941447332016, "loss": 2.9728, "step": 18926 }, { "epoch": 0.93, "grad_norm": 0.5034775733947754, "learning_rate": 0.00046925670289837496, "loss": 3.3807, "step": 18927 }, { "epoch": 0.93, "grad_norm": 0.6255304217338562, "learning_rate": 0.0004692439908776402, "loss": 3.2879, "step": 18928 }, { "epoch": 0.93, "grad_norm": 0.49725767970085144, "learning_rate": 0.00046923127841114924, "loss": 3.0999, "step": 18929 }, { "epoch": 0.93, "grad_norm": 0.5189871788024902, "learning_rate": 0.0004692185654989358, "loss": 3.1402, "step": 18930 }, { "epoch": 0.93, "grad_norm": 0.5304571390151978, "learning_rate": 0.00046920585214103324, "loss": 3.2725, "step": 18931 }, { "epoch": 0.93, "grad_norm": 0.5378173589706421, "learning_rate": 0.00046919313833747485, "loss": 3.1548, "step": 18932 }, { "epoch": 0.93, "grad_norm": 0.5394854545593262, "learning_rate": 0.00046918042408829446, "loss": 3.1416, "step": 18933 }, { "epoch": 0.93, "grad_norm": 0.5187913179397583, "learning_rate": 0.0004691677093935253, "loss": 2.9619, "step": 18934 }, { "epoch": 0.93, "grad_norm": 0.5153508186340332, "learning_rate": 0.000469154994253201, "loss": 3.1343, "step": 18935 }, { "epoch": 0.93, "grad_norm": 0.5249007344245911, "learning_rate": 0.0004691422786673549, "loss": 3.223, "step": 18936 }, { "epoch": 0.93, "grad_norm": 0.514267086982727, "learning_rate": 0.00046912956263602077, "loss": 3.1555, "step": 18937 }, { "epoch": 0.93, "grad_norm": 0.515212893486023, "learning_rate": 0.0004691168461592318, "loss": 3.1127, "step": 18938 }, { "epoch": 0.93, "grad_norm": 0.48960253596305847, "learning_rate": 0.0004691041292370216, "loss": 3.1147, "step": 18939 }, { "epoch": 0.93, "grad_norm": 0.5144261717796326, "learning_rate": 0.0004690914118694237, "loss": 3.2216, "step": 18940 }, { "epoch": 0.93, "grad_norm": 0.523517906665802, "learning_rate": 0.0004690786940564716, "loss": 3.0786, "step": 18941 }, { "epoch": 0.93, "grad_norm": 0.5452386140823364, "learning_rate": 0.00046906597579819865, "loss": 3.1958, "step": 18942 }, { "epoch": 0.93, "grad_norm": 0.5395174026489258, "learning_rate": 0.0004690532570946386, "loss": 2.9997, "step": 18943 }, { "epoch": 0.93, "grad_norm": 0.5560466051101685, "learning_rate": 0.00046904053794582465, "loss": 3.3784, "step": 18944 }, { "epoch": 0.93, "grad_norm": 0.525627076625824, "learning_rate": 0.0004690278183517906, "loss": 2.9482, "step": 18945 }, { "epoch": 0.93, "grad_norm": 0.5005621314048767, "learning_rate": 0.0004690150983125697, "loss": 3.0905, "step": 18946 }, { "epoch": 0.93, "grad_norm": 0.5510935187339783, "learning_rate": 0.0004690023778281957, "loss": 3.3415, "step": 18947 }, { "epoch": 0.93, "grad_norm": 0.5250145196914673, "learning_rate": 0.0004689896568987018, "loss": 3.1714, "step": 18948 }, { "epoch": 0.93, "grad_norm": 0.521665096282959, "learning_rate": 0.0004689769355241217, "loss": 3.2226, "step": 18949 }, { "epoch": 0.93, "grad_norm": 0.6473313570022583, "learning_rate": 0.00046896421370448894, "loss": 3.2003, "step": 18950 }, { "epoch": 0.93, "grad_norm": 0.5071868300437927, "learning_rate": 0.0004689514914398369, "loss": 2.9599, "step": 18951 }, { "epoch": 0.93, "grad_norm": 0.5094021558761597, "learning_rate": 0.0004689387687301992, "loss": 2.9249, "step": 18952 }, { "epoch": 0.93, "grad_norm": 0.5259853601455688, "learning_rate": 0.0004689260455756092, "loss": 3.2138, "step": 18953 }, { "epoch": 0.93, "grad_norm": 0.4910939335823059, "learning_rate": 0.00046891332197610057, "loss": 3.1469, "step": 18954 }, { "epoch": 0.93, "grad_norm": 0.5359907746315002, "learning_rate": 0.00046890059793170676, "loss": 3.2667, "step": 18955 }, { "epoch": 0.93, "grad_norm": 0.5295614004135132, "learning_rate": 0.00046888787344246134, "loss": 2.998, "step": 18956 }, { "epoch": 0.93, "grad_norm": 0.5557469129562378, "learning_rate": 0.0004688751485083977, "loss": 3.1919, "step": 18957 }, { "epoch": 0.93, "grad_norm": 0.5100527405738831, "learning_rate": 0.0004688624231295494, "loss": 3.078, "step": 18958 }, { "epoch": 0.93, "grad_norm": 0.5492949485778809, "learning_rate": 0.00046884969730595, "loss": 3.2749, "step": 18959 }, { "epoch": 0.93, "grad_norm": 0.5385236144065857, "learning_rate": 0.00046883697103763293, "loss": 3.0871, "step": 18960 }, { "epoch": 0.93, "grad_norm": 0.5516214966773987, "learning_rate": 0.0004688242443246319, "loss": 2.9884, "step": 18961 }, { "epoch": 0.93, "grad_norm": 0.5254327654838562, "learning_rate": 0.00046881151716698027, "loss": 3.0269, "step": 18962 }, { "epoch": 0.93, "grad_norm": 0.50152587890625, "learning_rate": 0.0004687987895647115, "loss": 3.1315, "step": 18963 }, { "epoch": 0.93, "grad_norm": 0.5856966376304626, "learning_rate": 0.0004687860615178593, "loss": 2.9434, "step": 18964 }, { "epoch": 0.93, "grad_norm": 0.5326898694038391, "learning_rate": 0.00046877333302645707, "loss": 3.2463, "step": 18965 }, { "epoch": 0.93, "grad_norm": 0.5047300457954407, "learning_rate": 0.00046876060409053826, "loss": 3.0858, "step": 18966 }, { "epoch": 0.93, "grad_norm": 0.601098358631134, "learning_rate": 0.00046874787471013667, "loss": 3.0427, "step": 18967 }, { "epoch": 0.93, "grad_norm": 0.5478784441947937, "learning_rate": 0.0004687351448852855, "loss": 3.1414, "step": 18968 }, { "epoch": 0.93, "grad_norm": 0.5420882701873779, "learning_rate": 0.0004687224146160186, "loss": 3.1592, "step": 18969 }, { "epoch": 0.93, "grad_norm": 0.5226287245750427, "learning_rate": 0.0004687096839023692, "loss": 3.1503, "step": 18970 }, { "epoch": 0.93, "grad_norm": 0.5345956087112427, "learning_rate": 0.000468696952744371, "loss": 3.3743, "step": 18971 }, { "epoch": 0.93, "grad_norm": 0.5423299074172974, "learning_rate": 0.00046868422114205755, "loss": 3.0927, "step": 18972 }, { "epoch": 0.93, "grad_norm": 0.532418966293335, "learning_rate": 0.00046867148909546225, "loss": 3.2014, "step": 18973 }, { "epoch": 0.93, "grad_norm": 0.5317066311836243, "learning_rate": 0.0004686587566046187, "loss": 3.1707, "step": 18974 }, { "epoch": 0.93, "grad_norm": 0.5457773208618164, "learning_rate": 0.00046864602366956037, "loss": 2.9554, "step": 18975 }, { "epoch": 0.93, "grad_norm": 0.5188515186309814, "learning_rate": 0.00046863329029032095, "loss": 3.2574, "step": 18976 }, { "epoch": 0.93, "grad_norm": 0.5296633839607239, "learning_rate": 0.000468620556466934, "loss": 3.1508, "step": 18977 }, { "epoch": 0.93, "grad_norm": 0.5568720102310181, "learning_rate": 0.0004686078221994329, "loss": 3.2111, "step": 18978 }, { "epoch": 0.93, "grad_norm": 0.5664288997650146, "learning_rate": 0.00046859508748785126, "loss": 3.0099, "step": 18979 }, { "epoch": 0.93, "grad_norm": 0.5301545262336731, "learning_rate": 0.0004685823523322225, "loss": 3.2176, "step": 18980 }, { "epoch": 0.93, "grad_norm": 0.5368116497993469, "learning_rate": 0.0004685696167325803, "loss": 3.2631, "step": 18981 }, { "epoch": 0.93, "grad_norm": 0.534150242805481, "learning_rate": 0.0004685568806889582, "loss": 3.0123, "step": 18982 }, { "epoch": 0.93, "grad_norm": 0.4915403723716736, "learning_rate": 0.00046854414420138975, "loss": 3.3539, "step": 18983 }, { "epoch": 0.93, "grad_norm": 0.5354322195053101, "learning_rate": 0.00046853140726990845, "loss": 3.245, "step": 18984 }, { "epoch": 0.93, "grad_norm": 0.4962729811668396, "learning_rate": 0.0004685186698945478, "loss": 3.1999, "step": 18985 }, { "epoch": 0.93, "grad_norm": 0.5085036158561707, "learning_rate": 0.0004685059320753414, "loss": 3.2156, "step": 18986 }, { "epoch": 0.93, "grad_norm": 0.5053709745407104, "learning_rate": 0.00046849319381232285, "loss": 3.1658, "step": 18987 }, { "epoch": 0.93, "grad_norm": 0.5463716983795166, "learning_rate": 0.0004684804551055257, "loss": 3.1854, "step": 18988 }, { "epoch": 0.93, "grad_norm": 0.5284098386764526, "learning_rate": 0.0004684677159549834, "loss": 3.3753, "step": 18989 }, { "epoch": 0.93, "grad_norm": 0.5338199734687805, "learning_rate": 0.0004684549763607296, "loss": 3.1831, "step": 18990 }, { "epoch": 0.93, "grad_norm": 0.5269510746002197, "learning_rate": 0.0004684422363227977, "loss": 3.2542, "step": 18991 }, { "epoch": 0.93, "grad_norm": 0.5175192356109619, "learning_rate": 0.0004684294958412215, "loss": 3.0125, "step": 18992 }, { "epoch": 0.93, "grad_norm": 0.5156930088996887, "learning_rate": 0.0004684167549160344, "loss": 3.3845, "step": 18993 }, { "epoch": 0.93, "grad_norm": 0.5119640231132507, "learning_rate": 0.0004684040135472699, "loss": 3.0625, "step": 18994 }, { "epoch": 0.93, "grad_norm": 0.5390982031822205, "learning_rate": 0.00046839127173496176, "loss": 3.0787, "step": 18995 }, { "epoch": 0.93, "grad_norm": 0.5263301134109497, "learning_rate": 0.00046837852947914324, "loss": 3.0068, "step": 18996 }, { "epoch": 0.93, "grad_norm": 0.5323955416679382, "learning_rate": 0.00046836578677984824, "loss": 3.0541, "step": 18997 }, { "epoch": 0.93, "grad_norm": 0.5111451745033264, "learning_rate": 0.00046835304363711006, "loss": 3.058, "step": 18998 }, { "epoch": 0.93, "grad_norm": 0.5534102916717529, "learning_rate": 0.0004683403000509624, "loss": 3.2107, "step": 18999 }, { "epoch": 0.93, "grad_norm": 0.5770662426948547, "learning_rate": 0.0004683275560214388, "loss": 3.053, "step": 19000 }, { "epoch": 0.93, "grad_norm": 0.6135122179985046, "learning_rate": 0.00046831481154857287, "loss": 3.2861, "step": 19001 }, { "epoch": 0.93, "grad_norm": 0.5752828121185303, "learning_rate": 0.00046830206663239803, "loss": 2.9293, "step": 19002 }, { "epoch": 0.93, "grad_norm": 0.5392802953720093, "learning_rate": 0.00046828932127294796, "loss": 3.1803, "step": 19003 }, { "epoch": 0.93, "grad_norm": 0.5212385654449463, "learning_rate": 0.00046827657547025625, "loss": 3.2022, "step": 19004 }, { "epoch": 0.93, "grad_norm": 0.5026683807373047, "learning_rate": 0.00046826382922435635, "loss": 3.3249, "step": 19005 }, { "epoch": 0.93, "grad_norm": 0.536699652671814, "learning_rate": 0.000468251082535282, "loss": 3.1748, "step": 19006 }, { "epoch": 0.93, "grad_norm": 0.5301597714424133, "learning_rate": 0.0004682383354030666, "loss": 3.1808, "step": 19007 }, { "epoch": 0.93, "grad_norm": 0.5335184335708618, "learning_rate": 0.0004682255878277438, "loss": 3.2511, "step": 19008 }, { "epoch": 0.93, "grad_norm": 0.5118494033813477, "learning_rate": 0.00046821283980934725, "loss": 3.0333, "step": 19009 }, { "epoch": 0.93, "grad_norm": 0.5088841319084167, "learning_rate": 0.0004682000913479104, "loss": 3.1185, "step": 19010 }, { "epoch": 0.93, "grad_norm": 0.5405777096748352, "learning_rate": 0.00046818734244346677, "loss": 3.4098, "step": 19011 }, { "epoch": 0.93, "grad_norm": 0.5063764452934265, "learning_rate": 0.0004681745930960502, "loss": 3.2377, "step": 19012 }, { "epoch": 0.93, "grad_norm": 0.5210672616958618, "learning_rate": 0.0004681618433056941, "loss": 3.2403, "step": 19013 }, { "epoch": 0.93, "grad_norm": 0.5515353679656982, "learning_rate": 0.00046814909307243204, "loss": 3.2621, "step": 19014 }, { "epoch": 0.93, "grad_norm": 0.5045962929725647, "learning_rate": 0.00046813634239629764, "loss": 3.2523, "step": 19015 }, { "epoch": 0.93, "grad_norm": 0.4997437596321106, "learning_rate": 0.0004681235912773245, "loss": 3.0961, "step": 19016 }, { "epoch": 0.93, "grad_norm": 0.5317462682723999, "learning_rate": 0.0004681108397155461, "loss": 3.061, "step": 19017 }, { "epoch": 0.93, "grad_norm": 0.5284067988395691, "learning_rate": 0.00046809808771099614, "loss": 3.2071, "step": 19018 }, { "epoch": 0.93, "grad_norm": 0.5324392914772034, "learning_rate": 0.00046808533526370826, "loss": 3.0611, "step": 19019 }, { "epoch": 0.93, "grad_norm": 0.5281850695610046, "learning_rate": 0.0004680725823737158, "loss": 3.0396, "step": 19020 }, { "epoch": 0.93, "grad_norm": 0.5365699529647827, "learning_rate": 0.00046805982904105255, "loss": 3.0534, "step": 19021 }, { "epoch": 0.93, "grad_norm": 0.5103819966316223, "learning_rate": 0.00046804707526575215, "loss": 2.9962, "step": 19022 }, { "epoch": 0.93, "grad_norm": 0.517741322517395, "learning_rate": 0.000468034321047848, "loss": 3.0247, "step": 19023 }, { "epoch": 0.93, "grad_norm": 0.5281891822814941, "learning_rate": 0.00046802156638737385, "loss": 3.0705, "step": 19024 }, { "epoch": 0.93, "grad_norm": 0.5412678718566895, "learning_rate": 0.00046800881128436316, "loss": 2.9022, "step": 19025 }, { "epoch": 0.93, "grad_norm": 0.5153235793113708, "learning_rate": 0.0004679960557388496, "loss": 3.1095, "step": 19026 }, { "epoch": 0.93, "grad_norm": 0.5097496509552002, "learning_rate": 0.0004679832997508668, "loss": 3.0689, "step": 19027 }, { "epoch": 0.93, "grad_norm": 0.5528456568717957, "learning_rate": 0.0004679705433204483, "loss": 3.1618, "step": 19028 }, { "epoch": 0.93, "grad_norm": 0.5313249230384827, "learning_rate": 0.0004679577864476278, "loss": 3.1174, "step": 19029 }, { "epoch": 0.93, "grad_norm": 0.5198782086372375, "learning_rate": 0.00046794502913243875, "loss": 3.1929, "step": 19030 }, { "epoch": 0.93, "grad_norm": 0.5220719575881958, "learning_rate": 0.00046793227137491473, "loss": 3.0891, "step": 19031 }, { "epoch": 0.93, "grad_norm": 0.5011788606643677, "learning_rate": 0.0004679195131750895, "loss": 3.106, "step": 19032 }, { "epoch": 0.93, "grad_norm": 0.525837242603302, "learning_rate": 0.00046790675453299666, "loss": 3.1896, "step": 19033 }, { "epoch": 0.93, "grad_norm": 0.5511639714241028, "learning_rate": 0.0004678939954486696, "loss": 3.213, "step": 19034 }, { "epoch": 0.93, "grad_norm": 0.5156654715538025, "learning_rate": 0.00046788123592214224, "loss": 3.1804, "step": 19035 }, { "epoch": 0.93, "grad_norm": 0.49573853611946106, "learning_rate": 0.00046786847595344774, "loss": 3.1656, "step": 19036 }, { "epoch": 0.93, "grad_norm": 0.5024093389511108, "learning_rate": 0.00046785571554262026, "loss": 3.1114, "step": 19037 }, { "epoch": 0.93, "grad_norm": 0.5238023996353149, "learning_rate": 0.000467842954689693, "loss": 3.1527, "step": 19038 }, { "epoch": 0.93, "grad_norm": 0.5388356447219849, "learning_rate": 0.0004678301933946997, "loss": 3.2028, "step": 19039 }, { "epoch": 0.93, "grad_norm": 0.512860119342804, "learning_rate": 0.00046781743165767405, "loss": 3.0169, "step": 19040 }, { "epoch": 0.93, "grad_norm": 0.5536538362503052, "learning_rate": 0.0004678046694786495, "loss": 2.8985, "step": 19041 }, { "epoch": 0.93, "grad_norm": 0.4811255931854248, "learning_rate": 0.0004677919068576597, "loss": 3.3228, "step": 19042 }, { "epoch": 0.93, "grad_norm": 0.5000419616699219, "learning_rate": 0.00046777914379473847, "loss": 3.0492, "step": 19043 }, { "epoch": 0.93, "grad_norm": 0.5147316455841064, "learning_rate": 0.0004677663802899192, "loss": 3.2764, "step": 19044 }, { "epoch": 0.93, "grad_norm": 0.5300585031509399, "learning_rate": 0.0004677536163432355, "loss": 2.9444, "step": 19045 }, { "epoch": 0.93, "grad_norm": 0.515038788318634, "learning_rate": 0.0004677408519547211, "loss": 3.0745, "step": 19046 }, { "epoch": 0.93, "grad_norm": 0.5364577174186707, "learning_rate": 0.0004677280871244096, "loss": 3.026, "step": 19047 }, { "epoch": 0.93, "grad_norm": 0.5051068067550659, "learning_rate": 0.00046771532185233456, "loss": 3.2398, "step": 19048 }, { "epoch": 0.93, "grad_norm": 0.5928849577903748, "learning_rate": 0.00046770255613852967, "loss": 3.2081, "step": 19049 }, { "epoch": 0.93, "grad_norm": 0.5345249772071838, "learning_rate": 0.0004676897899830285, "loss": 3.0596, "step": 19050 }, { "epoch": 0.93, "grad_norm": 0.513729453086853, "learning_rate": 0.00046767702338586475, "loss": 3.086, "step": 19051 }, { "epoch": 0.93, "grad_norm": 0.5252007246017456, "learning_rate": 0.0004676642563470719, "loss": 3.015, "step": 19052 }, { "epoch": 0.93, "grad_norm": 0.5086860060691833, "learning_rate": 0.00046765148886668376, "loss": 3.1461, "step": 19053 }, { "epoch": 0.93, "grad_norm": 0.535895824432373, "learning_rate": 0.0004676387209447338, "loss": 3.1293, "step": 19054 }, { "epoch": 0.93, "grad_norm": 0.5222206711769104, "learning_rate": 0.0004676259525812558, "loss": 3.0691, "step": 19055 }, { "epoch": 0.93, "grad_norm": 0.5708796381950378, "learning_rate": 0.0004676131837762832, "loss": 3.1535, "step": 19056 }, { "epoch": 0.93, "grad_norm": 0.49365147948265076, "learning_rate": 0.0004676004145298497, "loss": 3.268, "step": 19057 }, { "epoch": 0.93, "grad_norm": 0.49543845653533936, "learning_rate": 0.0004675876448419891, "loss": 3.2324, "step": 19058 }, { "epoch": 0.93, "grad_norm": 0.5274909138679504, "learning_rate": 0.00046757487471273476, "loss": 3.0694, "step": 19059 }, { "epoch": 0.93, "grad_norm": 0.5357721447944641, "learning_rate": 0.0004675621041421206, "loss": 3.2226, "step": 19060 }, { "epoch": 0.93, "grad_norm": 0.5113617777824402, "learning_rate": 0.00046754933313018, "loss": 3.2211, "step": 19061 }, { "epoch": 0.93, "grad_norm": 0.500162661075592, "learning_rate": 0.0004675365616769467, "loss": 3.2316, "step": 19062 }, { "epoch": 0.93, "grad_norm": 0.5388630628585815, "learning_rate": 0.00046752378978245435, "loss": 3.1944, "step": 19063 }, { "epoch": 0.93, "grad_norm": 0.5528714656829834, "learning_rate": 0.00046751101744673654, "loss": 3.3388, "step": 19064 }, { "epoch": 0.93, "grad_norm": 0.5113033056259155, "learning_rate": 0.0004674982446698271, "loss": 3.0037, "step": 19065 }, { "epoch": 0.93, "grad_norm": 0.5158981084823608, "learning_rate": 0.00046748547145175943, "loss": 3.2846, "step": 19066 }, { "epoch": 0.93, "grad_norm": 0.5108698010444641, "learning_rate": 0.0004674726977925672, "loss": 3.2234, "step": 19067 }, { "epoch": 0.93, "grad_norm": 0.5266377329826355, "learning_rate": 0.00046745992369228416, "loss": 3.0511, "step": 19068 }, { "epoch": 0.93, "grad_norm": 0.5855154991149902, "learning_rate": 0.00046744714915094394, "loss": 3.1555, "step": 19069 }, { "epoch": 0.93, "grad_norm": 0.5538336038589478, "learning_rate": 0.0004674343741685801, "loss": 3.1339, "step": 19070 }, { "epoch": 0.93, "grad_norm": 0.5286270976066589, "learning_rate": 0.0004674215987452264, "loss": 3.314, "step": 19071 }, { "epoch": 0.93, "grad_norm": 0.5068775415420532, "learning_rate": 0.00046740882288091634, "loss": 3.113, "step": 19072 }, { "epoch": 0.93, "grad_norm": 0.5175677537918091, "learning_rate": 0.0004673960465756837, "loss": 3.2558, "step": 19073 }, { "epoch": 0.93, "grad_norm": 0.5037120580673218, "learning_rate": 0.00046738326982956216, "loss": 3.232, "step": 19074 }, { "epoch": 0.93, "grad_norm": 0.5219966769218445, "learning_rate": 0.00046737049264258525, "loss": 3.0916, "step": 19075 }, { "epoch": 0.93, "grad_norm": 0.5453975796699524, "learning_rate": 0.00046735771501478675, "loss": 3.1197, "step": 19076 }, { "epoch": 0.93, "grad_norm": 0.5948343873023987, "learning_rate": 0.00046734493694620006, "loss": 2.7689, "step": 19077 }, { "epoch": 0.93, "grad_norm": 0.5255204439163208, "learning_rate": 0.0004673321584368591, "loss": 3.3097, "step": 19078 }, { "epoch": 0.94, "grad_norm": 0.508298933506012, "learning_rate": 0.0004673193794867975, "loss": 3.3616, "step": 19079 }, { "epoch": 0.94, "grad_norm": 0.5323010683059692, "learning_rate": 0.0004673066000960488, "loss": 3.034, "step": 19080 }, { "epoch": 0.94, "grad_norm": 0.5434712767601013, "learning_rate": 0.00046729382026464676, "loss": 3.1091, "step": 19081 }, { "epoch": 0.94, "grad_norm": 0.5189756155014038, "learning_rate": 0.0004672810399926249, "loss": 3.1893, "step": 19082 }, { "epoch": 0.94, "grad_norm": 0.5350856184959412, "learning_rate": 0.000467268259280017, "loss": 3.1141, "step": 19083 }, { "epoch": 0.94, "grad_norm": 0.6006237864494324, "learning_rate": 0.0004672554781268568, "loss": 3.2461, "step": 19084 }, { "epoch": 0.94, "grad_norm": 0.5676541328430176, "learning_rate": 0.00046724269653317774, "loss": 3.1233, "step": 19085 }, { "epoch": 0.94, "grad_norm": 0.5647549629211426, "learning_rate": 0.00046722991449901373, "loss": 3.1782, "step": 19086 }, { "epoch": 0.94, "grad_norm": 0.5188087821006775, "learning_rate": 0.00046721713202439816, "loss": 3.049, "step": 19087 }, { "epoch": 0.94, "grad_norm": 0.4982798099517822, "learning_rate": 0.00046720434910936493, "loss": 3.1946, "step": 19088 }, { "epoch": 0.94, "grad_norm": 0.549485445022583, "learning_rate": 0.00046719156575394754, "loss": 3.2946, "step": 19089 }, { "epoch": 0.94, "grad_norm": 0.5249114632606506, "learning_rate": 0.00046717878195817985, "loss": 3.2452, "step": 19090 }, { "epoch": 0.94, "grad_norm": 0.5417079925537109, "learning_rate": 0.00046716599772209544, "loss": 3.4411, "step": 19091 }, { "epoch": 0.94, "grad_norm": 0.5425086617469788, "learning_rate": 0.00046715321304572786, "loss": 3.1668, "step": 19092 }, { "epoch": 0.94, "grad_norm": 0.5171887278556824, "learning_rate": 0.0004671404279291109, "loss": 3.158, "step": 19093 }, { "epoch": 0.94, "grad_norm": 0.5016870498657227, "learning_rate": 0.00046712764237227827, "loss": 3.2172, "step": 19094 }, { "epoch": 0.94, "grad_norm": 0.4983656406402588, "learning_rate": 0.0004671148563752636, "loss": 3.0947, "step": 19095 }, { "epoch": 0.94, "grad_norm": 0.5325701832771301, "learning_rate": 0.0004671020699381005, "loss": 3.0837, "step": 19096 }, { "epoch": 0.94, "grad_norm": 0.5779774785041809, "learning_rate": 0.0004670892830608228, "loss": 3.0161, "step": 19097 }, { "epoch": 0.94, "grad_norm": 0.53163081407547, "learning_rate": 0.000467076495743464, "loss": 3.1027, "step": 19098 }, { "epoch": 0.94, "grad_norm": 0.5103086233139038, "learning_rate": 0.0004670637079860579, "loss": 3.242, "step": 19099 }, { "epoch": 0.94, "grad_norm": 0.5351077914237976, "learning_rate": 0.00046705091978863815, "loss": 3.0781, "step": 19100 }, { "epoch": 0.94, "grad_norm": 0.5188274383544922, "learning_rate": 0.0004670381311512384, "loss": 3.1909, "step": 19101 }, { "epoch": 0.94, "grad_norm": 0.514674186706543, "learning_rate": 0.0004670253420738924, "loss": 3.1387, "step": 19102 }, { "epoch": 0.94, "grad_norm": 0.4893554449081421, "learning_rate": 0.00046701255255663374, "loss": 3.322, "step": 19103 }, { "epoch": 0.94, "grad_norm": 0.49983033537864685, "learning_rate": 0.00046699976259949614, "loss": 3.0131, "step": 19104 }, { "epoch": 0.94, "grad_norm": 0.5099627375602722, "learning_rate": 0.00046698697220251344, "loss": 2.979, "step": 19105 }, { "epoch": 0.94, "grad_norm": 0.5100765228271484, "learning_rate": 0.0004669741813657191, "loss": 2.9599, "step": 19106 }, { "epoch": 0.94, "grad_norm": 0.5214683413505554, "learning_rate": 0.00046696139008914697, "loss": 3.074, "step": 19107 }, { "epoch": 0.94, "grad_norm": 0.532818078994751, "learning_rate": 0.0004669485983728305, "loss": 3.1915, "step": 19108 }, { "epoch": 0.94, "grad_norm": 0.48344460129737854, "learning_rate": 0.00046693580621680363, "loss": 3.2483, "step": 19109 }, { "epoch": 0.94, "grad_norm": 0.5342273116111755, "learning_rate": 0.0004669230136211, "loss": 2.9076, "step": 19110 }, { "epoch": 0.94, "grad_norm": 0.5045246481895447, "learning_rate": 0.0004669102205857533, "loss": 3.2139, "step": 19111 }, { "epoch": 0.94, "grad_norm": 0.5883970856666565, "learning_rate": 0.0004668974271107972, "loss": 2.8011, "step": 19112 }, { "epoch": 0.94, "grad_norm": 0.49823927879333496, "learning_rate": 0.0004668846331962654, "loss": 3.0764, "step": 19113 }, { "epoch": 0.94, "grad_norm": 0.5665649175643921, "learning_rate": 0.00046687183884219156, "loss": 3.1699, "step": 19114 }, { "epoch": 0.94, "grad_norm": 0.5797495245933533, "learning_rate": 0.0004668590440486094, "loss": 3.0427, "step": 19115 }, { "epoch": 0.94, "grad_norm": 0.5581980347633362, "learning_rate": 0.0004668462488155527, "loss": 3.2031, "step": 19116 }, { "epoch": 0.94, "grad_norm": 0.5199931263923645, "learning_rate": 0.00046683345314305503, "loss": 3.1927, "step": 19117 }, { "epoch": 0.94, "grad_norm": 0.5535823702812195, "learning_rate": 0.00046682065703115014, "loss": 3.1794, "step": 19118 }, { "epoch": 0.94, "grad_norm": 0.539772629737854, "learning_rate": 0.0004668078604798718, "loss": 3.3153, "step": 19119 }, { "epoch": 0.94, "grad_norm": 0.5513725876808167, "learning_rate": 0.00046679506348925367, "loss": 3.0796, "step": 19120 }, { "epoch": 0.94, "grad_norm": 0.5442554950714111, "learning_rate": 0.00046678226605932936, "loss": 3.2118, "step": 19121 }, { "epoch": 0.94, "grad_norm": 0.6911001205444336, "learning_rate": 0.0004667694681901327, "loss": 3.2077, "step": 19122 }, { "epoch": 0.94, "grad_norm": 0.5045164227485657, "learning_rate": 0.0004667566698816974, "loss": 3.2937, "step": 19123 }, { "epoch": 0.94, "grad_norm": 0.5363736152648926, "learning_rate": 0.000466743871134057, "loss": 3.0034, "step": 19124 }, { "epoch": 0.94, "grad_norm": 0.530031681060791, "learning_rate": 0.0004667310719472455, "loss": 3.0441, "step": 19125 }, { "epoch": 0.94, "grad_norm": 0.5397589206695557, "learning_rate": 0.00046671827232129634, "loss": 3.2575, "step": 19126 }, { "epoch": 0.94, "grad_norm": 0.5312854051589966, "learning_rate": 0.0004667054722562433, "loss": 3.1987, "step": 19127 }, { "epoch": 0.94, "grad_norm": 0.5177079439163208, "learning_rate": 0.0004666926717521203, "loss": 3.2154, "step": 19128 }, { "epoch": 0.94, "grad_norm": 0.525133490562439, "learning_rate": 0.00046667987080896065, "loss": 3.2139, "step": 19129 }, { "epoch": 0.94, "grad_norm": 0.5109131932258606, "learning_rate": 0.0004666670694267985, "loss": 2.9454, "step": 19130 }, { "epoch": 0.94, "grad_norm": 0.557773232460022, "learning_rate": 0.00046665426760566733, "loss": 3.3336, "step": 19131 }, { "epoch": 0.94, "grad_norm": 0.534517228603363, "learning_rate": 0.00046664146534560076, "loss": 3.0634, "step": 19132 }, { "epoch": 0.94, "grad_norm": 0.5470923781394958, "learning_rate": 0.0004666286626466328, "loss": 3.1499, "step": 19133 }, { "epoch": 0.94, "grad_norm": 0.5293968319892883, "learning_rate": 0.0004666158595087969, "loss": 3.2722, "step": 19134 }, { "epoch": 0.94, "grad_norm": 0.5513948202133179, "learning_rate": 0.00046660305593212694, "loss": 2.9834, "step": 19135 }, { "epoch": 0.94, "grad_norm": 0.5250282883644104, "learning_rate": 0.00046659025191665655, "loss": 3.2274, "step": 19136 }, { "epoch": 0.94, "grad_norm": 0.5674799680709839, "learning_rate": 0.0004665774474624196, "loss": 3.1483, "step": 19137 }, { "epoch": 0.94, "grad_norm": 0.49620136618614197, "learning_rate": 0.0004665646425694496, "loss": 3.1663, "step": 19138 }, { "epoch": 0.94, "grad_norm": 0.5123617053031921, "learning_rate": 0.0004665518372377804, "loss": 3.02, "step": 19139 }, { "epoch": 0.94, "grad_norm": 0.5374035835266113, "learning_rate": 0.00046653903146744576, "loss": 3.227, "step": 19140 }, { "epoch": 0.94, "grad_norm": 0.5367292165756226, "learning_rate": 0.0004665262252584794, "loss": 3.1528, "step": 19141 }, { "epoch": 0.94, "grad_norm": 0.5225591659545898, "learning_rate": 0.00046651341861091497, "loss": 3.145, "step": 19142 }, { "epoch": 0.94, "grad_norm": 0.5538541674613953, "learning_rate": 0.00046650061152478617, "loss": 3.1249, "step": 19143 }, { "epoch": 0.94, "grad_norm": 0.5019550323486328, "learning_rate": 0.00046648780400012686, "loss": 3.125, "step": 19144 }, { "epoch": 0.94, "grad_norm": 0.487470418214798, "learning_rate": 0.00046647499603697076, "loss": 3.0214, "step": 19145 }, { "epoch": 0.94, "grad_norm": 0.5381044149398804, "learning_rate": 0.0004664621876353515, "loss": 3.0006, "step": 19146 }, { "epoch": 0.94, "grad_norm": 0.563580334186554, "learning_rate": 0.0004664493787953029, "loss": 3.2355, "step": 19147 }, { "epoch": 0.94, "grad_norm": 0.5690339207649231, "learning_rate": 0.00046643656951685867, "loss": 3.1099, "step": 19148 }, { "epoch": 0.94, "grad_norm": 0.5722571015357971, "learning_rate": 0.0004664237598000525, "loss": 3.2627, "step": 19149 }, { "epoch": 0.94, "grad_norm": 0.5227568745613098, "learning_rate": 0.00046641094964491826, "loss": 3.2356, "step": 19150 }, { "epoch": 0.94, "grad_norm": 0.5287798047065735, "learning_rate": 0.00046639813905148954, "loss": 3.0889, "step": 19151 }, { "epoch": 0.94, "grad_norm": 0.5487927198410034, "learning_rate": 0.00046638532801980017, "loss": 3.235, "step": 19152 }, { "epoch": 0.94, "grad_norm": 0.5198444724082947, "learning_rate": 0.0004663725165498839, "loss": 3.1417, "step": 19153 }, { "epoch": 0.94, "grad_norm": 0.5517452955245972, "learning_rate": 0.00046635970464177436, "loss": 3.3419, "step": 19154 }, { "epoch": 0.94, "grad_norm": 0.5759121179580688, "learning_rate": 0.0004663468922955054, "loss": 3.3514, "step": 19155 }, { "epoch": 0.94, "grad_norm": 0.5133375525474548, "learning_rate": 0.00046633407951111075, "loss": 3.1282, "step": 19156 }, { "epoch": 0.94, "grad_norm": 0.5205239653587341, "learning_rate": 0.0004663212662886242, "loss": 3.0449, "step": 19157 }, { "epoch": 0.94, "grad_norm": 0.5108022093772888, "learning_rate": 0.00046630845262807935, "loss": 3.2283, "step": 19158 }, { "epoch": 0.94, "grad_norm": 0.5807484984397888, "learning_rate": 0.00046629563852951006, "loss": 3.1741, "step": 19159 }, { "epoch": 0.94, "grad_norm": 0.5120785236358643, "learning_rate": 0.0004662828239929502, "loss": 3.148, "step": 19160 }, { "epoch": 0.94, "grad_norm": 0.5248308181762695, "learning_rate": 0.00046627000901843316, "loss": 3.2335, "step": 19161 }, { "epoch": 0.94, "grad_norm": 0.5054056644439697, "learning_rate": 0.00046625719360599314, "loss": 3.0538, "step": 19162 }, { "epoch": 0.94, "grad_norm": 0.5169258117675781, "learning_rate": 0.0004662443777556635, "loss": 3.2335, "step": 19163 }, { "epoch": 0.94, "grad_norm": 0.5012264847755432, "learning_rate": 0.0004662315614674782, "loss": 3.0468, "step": 19164 }, { "epoch": 0.94, "grad_norm": 0.5255052447319031, "learning_rate": 0.00046621874474147104, "loss": 3.2484, "step": 19165 }, { "epoch": 0.94, "grad_norm": 0.5075822472572327, "learning_rate": 0.0004662059275776756, "loss": 3.2547, "step": 19166 }, { "epoch": 0.94, "grad_norm": 0.5703867077827454, "learning_rate": 0.0004661931099761258, "loss": 3.1126, "step": 19167 }, { "epoch": 0.94, "grad_norm": 0.5306580066680908, "learning_rate": 0.0004661802919368553, "loss": 3.0482, "step": 19168 }, { "epoch": 0.94, "grad_norm": 0.5228316187858582, "learning_rate": 0.0004661674734598979, "loss": 3.1605, "step": 19169 }, { "epoch": 0.94, "grad_norm": 0.5195432901382446, "learning_rate": 0.0004661546545452873, "loss": 3.1001, "step": 19170 }, { "epoch": 0.94, "grad_norm": 0.5943834781646729, "learning_rate": 0.00046614183519305745, "loss": 3.0419, "step": 19171 }, { "epoch": 0.94, "grad_norm": 0.5026077628135681, "learning_rate": 0.00046612901540324186, "loss": 3.1679, "step": 19172 }, { "epoch": 0.94, "grad_norm": 0.5464365482330322, "learning_rate": 0.00046611619517587447, "loss": 3.3324, "step": 19173 }, { "epoch": 0.94, "grad_norm": 0.5255415439605713, "learning_rate": 0.00046610337451098895, "loss": 3.0416, "step": 19174 }, { "epoch": 0.94, "grad_norm": 0.5834014415740967, "learning_rate": 0.0004660905534086192, "loss": 3.2447, "step": 19175 }, { "epoch": 0.94, "grad_norm": 0.5191861391067505, "learning_rate": 0.0004660777318687988, "loss": 3.0245, "step": 19176 }, { "epoch": 0.94, "grad_norm": 0.523767352104187, "learning_rate": 0.00046606490989156165, "loss": 3.0654, "step": 19177 }, { "epoch": 0.94, "grad_norm": 0.5461781620979309, "learning_rate": 0.00046605208747694155, "loss": 3.1267, "step": 19178 }, { "epoch": 0.94, "grad_norm": 0.507882297039032, "learning_rate": 0.0004660392646249721, "loss": 3.1003, "step": 19179 }, { "epoch": 0.94, "grad_norm": 0.543165385723114, "learning_rate": 0.00046602644133568715, "loss": 3.0974, "step": 19180 }, { "epoch": 0.94, "grad_norm": 0.5282258987426758, "learning_rate": 0.0004660136176091207, "loss": 3.1831, "step": 19181 }, { "epoch": 0.94, "grad_norm": 0.5365543961524963, "learning_rate": 0.0004660007934453062, "loss": 3.1649, "step": 19182 }, { "epoch": 0.94, "grad_norm": 0.5323196649551392, "learning_rate": 0.0004659879688442776, "loss": 3.0073, "step": 19183 }, { "epoch": 0.94, "grad_norm": 0.5398780107498169, "learning_rate": 0.00046597514380606854, "loss": 3.0684, "step": 19184 }, { "epoch": 0.94, "grad_norm": 0.5349022746086121, "learning_rate": 0.0004659623183307129, "loss": 3.065, "step": 19185 }, { "epoch": 0.94, "grad_norm": 0.5340750217437744, "learning_rate": 0.0004659494924182446, "loss": 3.074, "step": 19186 }, { "epoch": 0.94, "grad_norm": 0.5252206921577454, "learning_rate": 0.0004659366660686972, "loss": 3.1038, "step": 19187 }, { "epoch": 0.94, "grad_norm": 0.5100142359733582, "learning_rate": 0.00046592383928210457, "loss": 3.0467, "step": 19188 }, { "epoch": 0.94, "grad_norm": 0.5399733185768127, "learning_rate": 0.00046591101205850047, "loss": 3.0333, "step": 19189 }, { "epoch": 0.94, "grad_norm": 0.5943720936775208, "learning_rate": 0.0004658981843979186, "loss": 3.072, "step": 19190 }, { "epoch": 0.94, "grad_norm": 0.5421499609947205, "learning_rate": 0.0004658853563003929, "loss": 3.3646, "step": 19191 }, { "epoch": 0.94, "grad_norm": 0.532548189163208, "learning_rate": 0.00046587252776595717, "loss": 3.0745, "step": 19192 }, { "epoch": 0.94, "grad_norm": 0.5318267345428467, "learning_rate": 0.0004658596987946451, "loss": 3.0672, "step": 19193 }, { "epoch": 0.94, "grad_norm": 0.5340205430984497, "learning_rate": 0.00046584686938649044, "loss": 3.3863, "step": 19194 }, { "epoch": 0.94, "grad_norm": 0.5387397408485413, "learning_rate": 0.00046583403954152705, "loss": 3.1411, "step": 19195 }, { "epoch": 0.94, "grad_norm": 0.5424886345863342, "learning_rate": 0.0004658212092597888, "loss": 3.3, "step": 19196 }, { "epoch": 0.94, "grad_norm": 0.5942268967628479, "learning_rate": 0.0004658083785413093, "loss": 3.0225, "step": 19197 }, { "epoch": 0.94, "grad_norm": 0.5251699090003967, "learning_rate": 0.00046579554738612245, "loss": 3.1073, "step": 19198 }, { "epoch": 0.94, "grad_norm": 0.5253727436065674, "learning_rate": 0.0004657827157942621, "loss": 3.3228, "step": 19199 }, { "epoch": 0.94, "grad_norm": 0.521644651889801, "learning_rate": 0.0004657698837657619, "loss": 3.2844, "step": 19200 }, { "epoch": 0.94, "grad_norm": 0.5412613749504089, "learning_rate": 0.00046575705130065585, "loss": 3.2638, "step": 19201 }, { "epoch": 0.94, "grad_norm": 0.5078155994415283, "learning_rate": 0.00046574421839897754, "loss": 3.1013, "step": 19202 }, { "epoch": 0.94, "grad_norm": 0.5175430178642273, "learning_rate": 0.0004657313850607609, "loss": 2.9286, "step": 19203 }, { "epoch": 0.94, "grad_norm": 0.5150618553161621, "learning_rate": 0.0004657185512860397, "loss": 3.1024, "step": 19204 }, { "epoch": 0.94, "grad_norm": 0.5550238490104675, "learning_rate": 0.0004657057170748477, "loss": 2.8348, "step": 19205 }, { "epoch": 0.94, "grad_norm": 0.4976348280906677, "learning_rate": 0.00046569288242721867, "loss": 3.1287, "step": 19206 }, { "epoch": 0.94, "grad_norm": 0.5535556674003601, "learning_rate": 0.00046568004734318655, "loss": 3.095, "step": 19207 }, { "epoch": 0.94, "grad_norm": 0.525455892086029, "learning_rate": 0.0004656672118227851, "loss": 3.2602, "step": 19208 }, { "epoch": 0.94, "grad_norm": 0.5400662422180176, "learning_rate": 0.00046565437586604805, "loss": 3.2045, "step": 19209 }, { "epoch": 0.94, "grad_norm": 0.5572945475578308, "learning_rate": 0.0004656415394730092, "loss": 3.3045, "step": 19210 }, { "epoch": 0.94, "grad_norm": 0.5140613913536072, "learning_rate": 0.00046562870264370244, "loss": 3.2455, "step": 19211 }, { "epoch": 0.94, "grad_norm": 0.5592833757400513, "learning_rate": 0.0004656158653781616, "loss": 3.1826, "step": 19212 }, { "epoch": 0.94, "grad_norm": 0.5442313551902771, "learning_rate": 0.0004656030276764205, "loss": 3.1839, "step": 19213 }, { "epoch": 0.94, "grad_norm": 0.5157129168510437, "learning_rate": 0.0004655901895385128, "loss": 3.1051, "step": 19214 }, { "epoch": 0.94, "grad_norm": 0.5172207355499268, "learning_rate": 0.00046557735096447244, "loss": 3.331, "step": 19215 }, { "epoch": 0.94, "grad_norm": 0.5096434354782104, "learning_rate": 0.0004655645119543331, "loss": 2.9974, "step": 19216 }, { "epoch": 0.94, "grad_norm": 0.5327327847480774, "learning_rate": 0.00046555167250812886, "loss": 3.1229, "step": 19217 }, { "epoch": 0.94, "grad_norm": 0.5447360873222351, "learning_rate": 0.00046553883262589324, "loss": 3.0939, "step": 19218 }, { "epoch": 0.94, "grad_norm": 0.5559058785438538, "learning_rate": 0.0004655259923076603, "loss": 3.1901, "step": 19219 }, { "epoch": 0.94, "grad_norm": 0.49087318778038025, "learning_rate": 0.0004655131515534637, "loss": 3.2312, "step": 19220 }, { "epoch": 0.94, "grad_norm": 0.5363554954528809, "learning_rate": 0.00046550031036333734, "loss": 3.0807, "step": 19221 }, { "epoch": 0.94, "grad_norm": 0.5305533409118652, "learning_rate": 0.000465487468737315, "loss": 3.0706, "step": 19222 }, { "epoch": 0.94, "grad_norm": 0.5256800651550293, "learning_rate": 0.0004654746266754306, "loss": 3.1917, "step": 19223 }, { "epoch": 0.94, "grad_norm": 0.5493904948234558, "learning_rate": 0.00046546178417771774, "loss": 3.0993, "step": 19224 }, { "epoch": 0.94, "grad_norm": 0.5530810356140137, "learning_rate": 0.0004654489412442105, "loss": 3.1231, "step": 19225 }, { "epoch": 0.94, "grad_norm": 0.5260566473007202, "learning_rate": 0.0004654360978749424, "loss": 3.1445, "step": 19226 }, { "epoch": 0.94, "grad_norm": 0.5247520804405212, "learning_rate": 0.0004654232540699476, "loss": 3.3109, "step": 19227 }, { "epoch": 0.94, "grad_norm": 0.5059221386909485, "learning_rate": 0.0004654104098292598, "loss": 3.1187, "step": 19228 }, { "epoch": 0.94, "grad_norm": 0.5202158093452454, "learning_rate": 0.00046539756515291285, "loss": 3.2282, "step": 19229 }, { "epoch": 0.94, "grad_norm": 0.542550802230835, "learning_rate": 0.0004653847200409405, "loss": 3.134, "step": 19230 }, { "epoch": 0.94, "grad_norm": 0.5520852208137512, "learning_rate": 0.00046537187449337654, "loss": 3.2093, "step": 19231 }, { "epoch": 0.94, "grad_norm": 0.5276000499725342, "learning_rate": 0.00046535902851025496, "loss": 3.0294, "step": 19232 }, { "epoch": 0.94, "grad_norm": 0.5380484461784363, "learning_rate": 0.0004653461820916096, "loss": 3.097, "step": 19233 }, { "epoch": 0.94, "grad_norm": 0.5322332382202148, "learning_rate": 0.0004653333352374741, "loss": 3.2955, "step": 19234 }, { "epoch": 0.94, "grad_norm": 0.5831676125526428, "learning_rate": 0.00046532048794788243, "loss": 3.1426, "step": 19235 }, { "epoch": 0.94, "grad_norm": 0.5310155153274536, "learning_rate": 0.00046530764022286835, "loss": 3.2932, "step": 19236 }, { "epoch": 0.94, "grad_norm": 0.48738566040992737, "learning_rate": 0.00046529479206246585, "loss": 3.0159, "step": 19237 }, { "epoch": 0.94, "grad_norm": 0.5302239060401917, "learning_rate": 0.0004652819434667088, "loss": 3.221, "step": 19238 }, { "epoch": 0.94, "grad_norm": 0.5241603851318359, "learning_rate": 0.00046526909443563074, "loss": 3.0768, "step": 19239 }, { "epoch": 0.94, "grad_norm": 0.5549540519714355, "learning_rate": 0.0004652562449692658, "loss": 3.2177, "step": 19240 }, { "epoch": 0.94, "grad_norm": 0.521250307559967, "learning_rate": 0.00046524339506764755, "loss": 3.2072, "step": 19241 }, { "epoch": 0.94, "grad_norm": 0.5026115775108337, "learning_rate": 0.0004652305447308101, "loss": 3.1384, "step": 19242 }, { "epoch": 0.94, "grad_norm": 0.5054687261581421, "learning_rate": 0.0004652176939587872, "loss": 2.9898, "step": 19243 }, { "epoch": 0.94, "grad_norm": 0.5394466519355774, "learning_rate": 0.00046520484275161273, "loss": 3.0466, "step": 19244 }, { "epoch": 0.94, "grad_norm": 0.534470796585083, "learning_rate": 0.0004651919911093204, "loss": 3.0333, "step": 19245 }, { "epoch": 0.94, "grad_norm": 0.5293780565261841, "learning_rate": 0.0004651791390319443, "loss": 3.3452, "step": 19246 }, { "epoch": 0.94, "grad_norm": 0.5452441573143005, "learning_rate": 0.00046516628651951806, "loss": 3.1538, "step": 19247 }, { "epoch": 0.94, "grad_norm": 0.49200424551963806, "learning_rate": 0.00046515343357207554, "loss": 3.3007, "step": 19248 }, { "epoch": 0.94, "grad_norm": 0.5668293833732605, "learning_rate": 0.0004651405801896507, "loss": 3.2227, "step": 19249 }, { "epoch": 0.94, "grad_norm": 0.5335699915885925, "learning_rate": 0.00046512772637227745, "loss": 3.2257, "step": 19250 }, { "epoch": 0.94, "grad_norm": 0.5185949206352234, "learning_rate": 0.00046511487211998954, "loss": 3.0215, "step": 19251 }, { "epoch": 0.94, "grad_norm": 0.5684577822685242, "learning_rate": 0.0004651020174328207, "loss": 3.1563, "step": 19252 }, { "epoch": 0.94, "grad_norm": 0.5220710635185242, "learning_rate": 0.000465089162310805, "loss": 3.2089, "step": 19253 }, { "epoch": 0.94, "grad_norm": 0.5233851075172424, "learning_rate": 0.0004650763067539762, "loss": 3.2768, "step": 19254 }, { "epoch": 0.94, "grad_norm": 0.5065687298774719, "learning_rate": 0.00046506345076236823, "loss": 3.1146, "step": 19255 }, { "epoch": 0.94, "grad_norm": 0.5225540399551392, "learning_rate": 0.0004650505943360148, "loss": 3.1571, "step": 19256 }, { "epoch": 0.94, "grad_norm": 0.5644252300262451, "learning_rate": 0.00046503773747494994, "loss": 3.2514, "step": 19257 }, { "epoch": 0.94, "grad_norm": 0.5171868205070496, "learning_rate": 0.00046502488017920743, "loss": 3.1537, "step": 19258 }, { "epoch": 0.94, "grad_norm": 0.5381227731704712, "learning_rate": 0.0004650120224488212, "loss": 3.3608, "step": 19259 }, { "epoch": 0.94, "grad_norm": 0.5154421329498291, "learning_rate": 0.000464999164283825, "loss": 3.0479, "step": 19260 }, { "epoch": 0.94, "grad_norm": 0.5348089933395386, "learning_rate": 0.00046498630568425273, "loss": 3.2822, "step": 19261 }, { "epoch": 0.94, "grad_norm": 0.518226683139801, "learning_rate": 0.0004649734466501383, "loss": 3.1494, "step": 19262 }, { "epoch": 0.94, "grad_norm": 0.5043905973434448, "learning_rate": 0.0004649605871815156, "loss": 3.3799, "step": 19263 }, { "epoch": 0.94, "grad_norm": 0.5411002039909363, "learning_rate": 0.0004649477272784184, "loss": 3.1399, "step": 19264 }, { "epoch": 0.94, "grad_norm": 0.55306476354599, "learning_rate": 0.0004649348669408807, "loss": 3.0225, "step": 19265 }, { "epoch": 0.94, "grad_norm": 0.5231731534004211, "learning_rate": 0.00046492200616893623, "loss": 3.0566, "step": 19266 }, { "epoch": 0.94, "grad_norm": 0.5047503113746643, "learning_rate": 0.00046490914496261895, "loss": 3.0925, "step": 19267 }, { "epoch": 0.94, "grad_norm": 0.4931429922580719, "learning_rate": 0.0004648962833219627, "loss": 3.3032, "step": 19268 }, { "epoch": 0.94, "grad_norm": 0.5375720858573914, "learning_rate": 0.0004648834212470015, "loss": 2.9951, "step": 19269 }, { "epoch": 0.94, "grad_norm": 0.5181878209114075, "learning_rate": 0.000464870558737769, "loss": 3.1347, "step": 19270 }, { "epoch": 0.94, "grad_norm": 0.5091689825057983, "learning_rate": 0.00046485769579429924, "loss": 3.1832, "step": 19271 }, { "epoch": 0.94, "grad_norm": 0.5010989308357239, "learning_rate": 0.000464844832416626, "loss": 3.3125, "step": 19272 }, { "epoch": 0.94, "grad_norm": 0.5260189771652222, "learning_rate": 0.0004648319686047831, "loss": 3.1826, "step": 19273 }, { "epoch": 0.94, "grad_norm": 0.5514676570892334, "learning_rate": 0.0004648191043588046, "loss": 3.2043, "step": 19274 }, { "epoch": 0.94, "grad_norm": 0.5793471932411194, "learning_rate": 0.0004648062396787243, "loss": 3.106, "step": 19275 }, { "epoch": 0.94, "grad_norm": 0.5154271125793457, "learning_rate": 0.00046479337456457615, "loss": 3.215, "step": 19276 }, { "epoch": 0.94, "grad_norm": 0.5268782377243042, "learning_rate": 0.0004647805090163939, "loss": 3.0782, "step": 19277 }, { "epoch": 0.94, "grad_norm": 0.5476327538490295, "learning_rate": 0.0004647676430342115, "loss": 3.0688, "step": 19278 }, { "epoch": 0.94, "grad_norm": 0.5455402135848999, "learning_rate": 0.00046475477661806283, "loss": 3.1699, "step": 19279 }, { "epoch": 0.94, "grad_norm": 0.5583153963088989, "learning_rate": 0.00046474190976798183, "loss": 3.0606, "step": 19280 }, { "epoch": 0.94, "grad_norm": 0.5432180762290955, "learning_rate": 0.0004647290424840023, "loss": 3.3071, "step": 19281 }, { "epoch": 0.94, "grad_norm": 0.5387990474700928, "learning_rate": 0.00046471617476615825, "loss": 3.2044, "step": 19282 }, { "epoch": 0.95, "grad_norm": 0.5935296416282654, "learning_rate": 0.0004647033066144834, "loss": 3.0468, "step": 19283 }, { "epoch": 0.95, "grad_norm": 0.5138740539550781, "learning_rate": 0.00046469043802901174, "loss": 2.9805, "step": 19284 }, { "epoch": 0.95, "grad_norm": 0.5473163723945618, "learning_rate": 0.0004646775690097772, "loss": 3.0915, "step": 19285 }, { "epoch": 0.95, "grad_norm": 0.5593499541282654, "learning_rate": 0.0004646646995568136, "loss": 3.2594, "step": 19286 }, { "epoch": 0.95, "grad_norm": 0.5125278234481812, "learning_rate": 0.0004646518296701549, "loss": 2.9646, "step": 19287 }, { "epoch": 0.95, "grad_norm": 0.5372989177703857, "learning_rate": 0.000464638959349835, "loss": 3.0846, "step": 19288 }, { "epoch": 0.95, "grad_norm": 0.5440700650215149, "learning_rate": 0.0004646260885958877, "loss": 3.2548, "step": 19289 }, { "epoch": 0.95, "grad_norm": 0.5197234749794006, "learning_rate": 0.000464613217408347, "loss": 2.9965, "step": 19290 }, { "epoch": 0.95, "grad_norm": 0.5141440629959106, "learning_rate": 0.0004646003457872468, "loss": 3.0835, "step": 19291 }, { "epoch": 0.95, "grad_norm": 0.5057777762413025, "learning_rate": 0.000464587473732621, "loss": 3.2426, "step": 19292 }, { "epoch": 0.95, "grad_norm": 0.5304477214813232, "learning_rate": 0.0004645746012445033, "loss": 3.1578, "step": 19293 }, { "epoch": 0.95, "grad_norm": 0.49843043088912964, "learning_rate": 0.00046456172832292795, "loss": 3.2066, "step": 19294 }, { "epoch": 0.95, "grad_norm": 0.5783942341804504, "learning_rate": 0.0004645488549679286, "loss": 3.3959, "step": 19295 }, { "epoch": 0.95, "grad_norm": 0.529236376285553, "learning_rate": 0.0004645359811795393, "loss": 3.1483, "step": 19296 }, { "epoch": 0.95, "grad_norm": 0.5238991379737854, "learning_rate": 0.0004645231069577938, "loss": 3.0694, "step": 19297 }, { "epoch": 0.95, "grad_norm": 0.5467185974121094, "learning_rate": 0.00046451023230272606, "loss": 3.1344, "step": 19298 }, { "epoch": 0.95, "grad_norm": 0.5267390608787537, "learning_rate": 0.00046449735721437014, "loss": 2.9927, "step": 19299 }, { "epoch": 0.95, "grad_norm": 0.5365429520606995, "learning_rate": 0.0004644844816927598, "loss": 3.0123, "step": 19300 }, { "epoch": 0.95, "grad_norm": 0.5720606446266174, "learning_rate": 0.000464471605737929, "loss": 3.1872, "step": 19301 }, { "epoch": 0.95, "grad_norm": 0.5571929216384888, "learning_rate": 0.00046445872934991163, "loss": 3.2165, "step": 19302 }, { "epoch": 0.95, "grad_norm": 0.6748465299606323, "learning_rate": 0.00046444585252874163, "loss": 2.8737, "step": 19303 }, { "epoch": 0.95, "grad_norm": 0.5558235049247742, "learning_rate": 0.00046443297527445286, "loss": 3.1197, "step": 19304 }, { "epoch": 0.95, "grad_norm": 0.5244531631469727, "learning_rate": 0.0004644200975870793, "loss": 3.2594, "step": 19305 }, { "epoch": 0.95, "grad_norm": 0.5120171904563904, "learning_rate": 0.0004644072194666549, "loss": 3.2905, "step": 19306 }, { "epoch": 0.95, "grad_norm": 0.502840518951416, "learning_rate": 0.0004643943409132135, "loss": 2.9992, "step": 19307 }, { "epoch": 0.95, "grad_norm": 0.5086363554000854, "learning_rate": 0.000464381461926789, "loss": 2.906, "step": 19308 }, { "epoch": 0.95, "grad_norm": 0.5207031965255737, "learning_rate": 0.0004643685825074154, "loss": 3.3385, "step": 19309 }, { "epoch": 0.95, "grad_norm": 0.512380838394165, "learning_rate": 0.0004643557026551266, "loss": 3.2487, "step": 19310 }, { "epoch": 0.95, "grad_norm": 0.544256865978241, "learning_rate": 0.00046434282236995655, "loss": 3.1312, "step": 19311 }, { "epoch": 0.95, "grad_norm": 0.5640758872032166, "learning_rate": 0.000464329941651939, "loss": 3.1362, "step": 19312 }, { "epoch": 0.95, "grad_norm": 0.5477793216705322, "learning_rate": 0.0004643170605011081, "loss": 2.9544, "step": 19313 }, { "epoch": 0.95, "grad_norm": 0.5070680975914001, "learning_rate": 0.00046430417891749764, "loss": 3.1279, "step": 19314 }, { "epoch": 0.95, "grad_norm": 0.5301992893218994, "learning_rate": 0.00046429129690114167, "loss": 3.2035, "step": 19315 }, { "epoch": 0.95, "grad_norm": 0.5810301899909973, "learning_rate": 0.00046427841445207394, "loss": 3.1866, "step": 19316 }, { "epoch": 0.95, "grad_norm": 0.544684112071991, "learning_rate": 0.00046426553157032855, "loss": 3.2024, "step": 19317 }, { "epoch": 0.95, "grad_norm": 0.5401524305343628, "learning_rate": 0.0004642526482559394, "loss": 2.8806, "step": 19318 }, { "epoch": 0.95, "grad_norm": 0.6001310348510742, "learning_rate": 0.0004642397645089403, "loss": 3.1245, "step": 19319 }, { "epoch": 0.95, "grad_norm": 0.5786098837852478, "learning_rate": 0.0004642268803293653, "loss": 3.1757, "step": 19320 }, { "epoch": 0.95, "grad_norm": 0.579869270324707, "learning_rate": 0.00046421399571724834, "loss": 3.1806, "step": 19321 }, { "epoch": 0.95, "grad_norm": 0.5166104435920715, "learning_rate": 0.0004642011106726233, "loss": 3.0663, "step": 19322 }, { "epoch": 0.95, "grad_norm": 0.5852053165435791, "learning_rate": 0.00046418822519552416, "loss": 3.1294, "step": 19323 }, { "epoch": 0.95, "grad_norm": 0.4941536784172058, "learning_rate": 0.0004641753392859847, "loss": 3.0502, "step": 19324 }, { "epoch": 0.95, "grad_norm": 0.5297553539276123, "learning_rate": 0.00046416245294403916, "loss": 3.356, "step": 19325 }, { "epoch": 0.95, "grad_norm": 0.5243952870368958, "learning_rate": 0.00046414956616972126, "loss": 3.1456, "step": 19326 }, { "epoch": 0.95, "grad_norm": 0.5288766026496887, "learning_rate": 0.00046413667896306495, "loss": 2.9972, "step": 19327 }, { "epoch": 0.95, "grad_norm": 0.5393479466438293, "learning_rate": 0.00046412379132410427, "loss": 2.8842, "step": 19328 }, { "epoch": 0.95, "grad_norm": 0.49066945910453796, "learning_rate": 0.0004641109032528731, "loss": 2.9538, "step": 19329 }, { "epoch": 0.95, "grad_norm": 0.509368360042572, "learning_rate": 0.0004640980147494053, "loss": 3.1996, "step": 19330 }, { "epoch": 0.95, "grad_norm": 0.5085881948471069, "learning_rate": 0.00046408512581373507, "loss": 3.1171, "step": 19331 }, { "epoch": 0.95, "grad_norm": 0.5356560349464417, "learning_rate": 0.00046407223644589606, "loss": 3.0997, "step": 19332 }, { "epoch": 0.95, "grad_norm": 0.5444095730781555, "learning_rate": 0.0004640593466459225, "loss": 2.9612, "step": 19333 }, { "epoch": 0.95, "grad_norm": 0.5353986620903015, "learning_rate": 0.00046404645641384804, "loss": 2.9857, "step": 19334 }, { "epoch": 0.95, "grad_norm": 0.5451989769935608, "learning_rate": 0.00046403356574970683, "loss": 3.2518, "step": 19335 }, { "epoch": 0.95, "grad_norm": 0.5122106671333313, "learning_rate": 0.0004640206746535328, "loss": 2.9274, "step": 19336 }, { "epoch": 0.95, "grad_norm": 0.512254536151886, "learning_rate": 0.00046400778312536, "loss": 3.2984, "step": 19337 }, { "epoch": 0.95, "grad_norm": 0.5183076858520508, "learning_rate": 0.00046399489116522204, "loss": 3.2875, "step": 19338 }, { "epoch": 0.95, "grad_norm": 0.5587219595909119, "learning_rate": 0.00046398199877315313, "loss": 3.0897, "step": 19339 }, { "epoch": 0.95, "grad_norm": 0.5293834209442139, "learning_rate": 0.00046396910594918736, "loss": 3.1424, "step": 19340 }, { "epoch": 0.95, "grad_norm": 0.5190085172653198, "learning_rate": 0.0004639562126933584, "loss": 3.1517, "step": 19341 }, { "epoch": 0.95, "grad_norm": 0.5054040551185608, "learning_rate": 0.0004639433190057004, "loss": 3.1675, "step": 19342 }, { "epoch": 0.95, "grad_norm": 0.5302183628082275, "learning_rate": 0.0004639304248862472, "loss": 3.322, "step": 19343 }, { "epoch": 0.95, "grad_norm": 0.5174694657325745, "learning_rate": 0.0004639175303350328, "loss": 3.1216, "step": 19344 }, { "epoch": 0.95, "grad_norm": 0.5287314057350159, "learning_rate": 0.00046390463535209115, "loss": 2.9587, "step": 19345 }, { "epoch": 0.95, "grad_norm": 0.49835264682769775, "learning_rate": 0.0004638917399374563, "loss": 3.1747, "step": 19346 }, { "epoch": 0.95, "grad_norm": 0.5517767071723938, "learning_rate": 0.0004638788440911621, "loss": 3.1357, "step": 19347 }, { "epoch": 0.95, "grad_norm": 0.5469534993171692, "learning_rate": 0.0004638659478132426, "loss": 3.1799, "step": 19348 }, { "epoch": 0.95, "grad_norm": 0.5432090163230896, "learning_rate": 0.0004638530511037317, "loss": 3.2207, "step": 19349 }, { "epoch": 0.95, "grad_norm": 0.6552304029464722, "learning_rate": 0.00046384015396266345, "loss": 3.1993, "step": 19350 }, { "epoch": 0.95, "grad_norm": 0.5897645354270935, "learning_rate": 0.00046382725639007165, "loss": 3.1047, "step": 19351 }, { "epoch": 0.95, "grad_norm": 0.5444059371948242, "learning_rate": 0.0004638143583859905, "loss": 3.164, "step": 19352 }, { "epoch": 0.95, "grad_norm": 0.5374630093574524, "learning_rate": 0.0004638014599504539, "loss": 2.9453, "step": 19353 }, { "epoch": 0.95, "grad_norm": 0.5767490267753601, "learning_rate": 0.0004637885610834956, "loss": 3.1528, "step": 19354 }, { "epoch": 0.95, "grad_norm": 0.5264842510223389, "learning_rate": 0.0004637756617851499, "loss": 2.9574, "step": 19355 }, { "epoch": 0.95, "grad_norm": 0.5502093434333801, "learning_rate": 0.00046376276205545053, "loss": 3.3082, "step": 19356 }, { "epoch": 0.95, "grad_norm": 0.5488525629043579, "learning_rate": 0.00046374986189443165, "loss": 2.9616, "step": 19357 }, { "epoch": 0.95, "grad_norm": 0.5013023018836975, "learning_rate": 0.0004637369613021271, "loss": 3.1773, "step": 19358 }, { "epoch": 0.95, "grad_norm": 0.5348243713378906, "learning_rate": 0.0004637240602785709, "loss": 3.1887, "step": 19359 }, { "epoch": 0.95, "grad_norm": 0.5497328042984009, "learning_rate": 0.00046371115882379706, "loss": 3.0564, "step": 19360 }, { "epoch": 0.95, "grad_norm": 0.545487105846405, "learning_rate": 0.0004636982569378395, "loss": 3.0005, "step": 19361 }, { "epoch": 0.95, "grad_norm": 0.5591291785240173, "learning_rate": 0.0004636853546207323, "loss": 3.0755, "step": 19362 }, { "epoch": 0.95, "grad_norm": 0.5495262145996094, "learning_rate": 0.0004636724518725093, "loss": 3.4243, "step": 19363 }, { "epoch": 0.95, "grad_norm": 0.5503337979316711, "learning_rate": 0.00046365954869320464, "loss": 2.9641, "step": 19364 }, { "epoch": 0.95, "grad_norm": 0.508242130279541, "learning_rate": 0.00046364664508285217, "loss": 3.2054, "step": 19365 }, { "epoch": 0.95, "grad_norm": 0.5202712416648865, "learning_rate": 0.0004636337410414859, "loss": 2.9891, "step": 19366 }, { "epoch": 0.95, "grad_norm": 0.57612144947052, "learning_rate": 0.0004636208365691399, "loss": 3.0345, "step": 19367 }, { "epoch": 0.95, "grad_norm": 0.5074007511138916, "learning_rate": 0.0004636079316658481, "loss": 3.0326, "step": 19368 }, { "epoch": 0.95, "grad_norm": 0.5342861413955688, "learning_rate": 0.0004635950263316445, "loss": 3.0882, "step": 19369 }, { "epoch": 0.95, "grad_norm": 0.531694769859314, "learning_rate": 0.00046358212056656306, "loss": 3.249, "step": 19370 }, { "epoch": 0.95, "grad_norm": 0.5404502749443054, "learning_rate": 0.00046356921437063777, "loss": 3.159, "step": 19371 }, { "epoch": 0.95, "grad_norm": 0.5267776250839233, "learning_rate": 0.00046355630774390274, "loss": 3.1012, "step": 19372 }, { "epoch": 0.95, "grad_norm": 0.5390035510063171, "learning_rate": 0.0004635434006863919, "loss": 3.223, "step": 19373 }, { "epoch": 0.95, "grad_norm": 0.49688008427619934, "learning_rate": 0.00046353049319813904, "loss": 3.1074, "step": 19374 }, { "epoch": 0.95, "grad_norm": 0.557471752166748, "learning_rate": 0.00046351758527917846, "loss": 2.9832, "step": 19375 }, { "epoch": 0.95, "grad_norm": 0.4971594214439392, "learning_rate": 0.000463504676929544, "loss": 3.2285, "step": 19376 }, { "epoch": 0.95, "grad_norm": 0.5553884506225586, "learning_rate": 0.0004634917681492697, "loss": 3.1043, "step": 19377 }, { "epoch": 0.95, "grad_norm": 0.5280371308326721, "learning_rate": 0.00046347885893838957, "loss": 3.2346, "step": 19378 }, { "epoch": 0.95, "grad_norm": 0.5034427046775818, "learning_rate": 0.0004634659492969376, "loss": 3.1326, "step": 19379 }, { "epoch": 0.95, "grad_norm": 0.5268436670303345, "learning_rate": 0.0004634530392249476, "loss": 3.2807, "step": 19380 }, { "epoch": 0.95, "grad_norm": 0.5234237909317017, "learning_rate": 0.000463440128722454, "loss": 3.1346, "step": 19381 }, { "epoch": 0.95, "grad_norm": 0.539435863494873, "learning_rate": 0.0004634272177894904, "loss": 3.1197, "step": 19382 }, { "epoch": 0.95, "grad_norm": 0.5603162050247192, "learning_rate": 0.000463414306426091, "loss": 3.0302, "step": 19383 }, { "epoch": 0.95, "grad_norm": 0.5288932919502258, "learning_rate": 0.0004634013946322898, "loss": 3.2148, "step": 19384 }, { "epoch": 0.95, "grad_norm": 0.5117092728614807, "learning_rate": 0.00046338848240812063, "loss": 3.3284, "step": 19385 }, { "epoch": 0.95, "grad_norm": 0.5178090929985046, "learning_rate": 0.0004633755697536178, "loss": 3.2783, "step": 19386 }, { "epoch": 0.95, "grad_norm": 0.5456027388572693, "learning_rate": 0.0004633626566688152, "loss": 2.9413, "step": 19387 }, { "epoch": 0.95, "grad_norm": 0.4823963940143585, "learning_rate": 0.0004633497431537467, "loss": 3.0158, "step": 19388 }, { "epoch": 0.95, "grad_norm": 0.5608025193214417, "learning_rate": 0.0004633368292084464, "loss": 3.2938, "step": 19389 }, { "epoch": 0.95, "grad_norm": 0.5916173458099365, "learning_rate": 0.0004633239148329483, "loss": 3.0889, "step": 19390 }, { "epoch": 0.95, "grad_norm": 0.5329937934875488, "learning_rate": 0.0004633110000272866, "loss": 2.8621, "step": 19391 }, { "epoch": 0.95, "grad_norm": 0.5109493732452393, "learning_rate": 0.00046329808479149496, "loss": 2.8752, "step": 19392 }, { "epoch": 0.95, "grad_norm": 0.5147404074668884, "learning_rate": 0.0004632851691256078, "loss": 3.2002, "step": 19393 }, { "epoch": 0.95, "grad_norm": 0.5089914798736572, "learning_rate": 0.00046327225302965873, "loss": 3.2138, "step": 19394 }, { "epoch": 0.95, "grad_norm": 0.5555095672607422, "learning_rate": 0.00046325933650368206, "loss": 3.2143, "step": 19395 }, { "epoch": 0.95, "grad_norm": 0.6058218479156494, "learning_rate": 0.0004632464195477118, "loss": 3.1112, "step": 19396 }, { "epoch": 0.95, "grad_norm": 0.5387574434280396, "learning_rate": 0.00046323350216178174, "loss": 3.0377, "step": 19397 }, { "epoch": 0.95, "grad_norm": 0.5902401804924011, "learning_rate": 0.00046322058434592617, "loss": 3.0203, "step": 19398 }, { "epoch": 0.95, "grad_norm": 0.5146337747573853, "learning_rate": 0.0004632076661001789, "loss": 2.8675, "step": 19399 }, { "epoch": 0.95, "grad_norm": 0.508232057094574, "learning_rate": 0.0004631947474245741, "loss": 3.2896, "step": 19400 }, { "epoch": 0.95, "grad_norm": 0.5544096827507019, "learning_rate": 0.00046318182831914565, "loss": 3.0256, "step": 19401 }, { "epoch": 0.95, "grad_norm": 0.5086759328842163, "learning_rate": 0.0004631689087839278, "loss": 3.1559, "step": 19402 }, { "epoch": 0.95, "grad_norm": 0.5778783559799194, "learning_rate": 0.0004631559888189544, "loss": 3.1122, "step": 19403 }, { "epoch": 0.95, "grad_norm": 0.5708818435668945, "learning_rate": 0.00046314306842425954, "loss": 3.1534, "step": 19404 }, { "epoch": 0.95, "grad_norm": 0.5399811863899231, "learning_rate": 0.00046313014759987716, "loss": 3.0594, "step": 19405 }, { "epoch": 0.95, "grad_norm": 0.5436029434204102, "learning_rate": 0.0004631172263458414, "loss": 3.0062, "step": 19406 }, { "epoch": 0.95, "grad_norm": 0.5369762182235718, "learning_rate": 0.0004631043046621863, "loss": 3.0197, "step": 19407 }, { "epoch": 0.95, "grad_norm": 0.5159459710121155, "learning_rate": 0.00046309138254894586, "loss": 3.0661, "step": 19408 }, { "epoch": 0.95, "grad_norm": 0.5201807618141174, "learning_rate": 0.00046307846000615406, "loss": 3.1795, "step": 19409 }, { "epoch": 0.95, "grad_norm": 0.5022982954978943, "learning_rate": 0.000463065537033845, "loss": 3.2928, "step": 19410 }, { "epoch": 0.95, "grad_norm": 0.5162597298622131, "learning_rate": 0.0004630526136320527, "loss": 3.4038, "step": 19411 }, { "epoch": 0.95, "grad_norm": 0.5503665804862976, "learning_rate": 0.0004630396898008112, "loss": 3.1428, "step": 19412 }, { "epoch": 0.95, "grad_norm": 0.5348483920097351, "learning_rate": 0.0004630267655401545, "loss": 2.9746, "step": 19413 }, { "epoch": 0.95, "grad_norm": 0.5585376024246216, "learning_rate": 0.00046301384085011666, "loss": 3.4144, "step": 19414 }, { "epoch": 0.95, "grad_norm": 0.5287220478057861, "learning_rate": 0.0004630009157307319, "loss": 3.0886, "step": 19415 }, { "epoch": 0.95, "grad_norm": 0.5285464525222778, "learning_rate": 0.00046298799018203385, "loss": 3.0322, "step": 19416 }, { "epoch": 0.95, "grad_norm": 0.5362486839294434, "learning_rate": 0.00046297506420405697, "loss": 3.1066, "step": 19417 }, { "epoch": 0.95, "grad_norm": 0.5312342643737793, "learning_rate": 0.00046296213779683506, "loss": 3.1128, "step": 19418 }, { "epoch": 0.95, "grad_norm": 0.5306200981140137, "learning_rate": 0.0004629492109604023, "loss": 3.0913, "step": 19419 }, { "epoch": 0.95, "grad_norm": 0.5508071780204773, "learning_rate": 0.0004629362836947927, "loss": 3.2167, "step": 19420 }, { "epoch": 0.95, "grad_norm": 0.5401845574378967, "learning_rate": 0.0004629233560000401, "loss": 2.9779, "step": 19421 }, { "epoch": 0.95, "grad_norm": 0.5715287327766418, "learning_rate": 0.00046291042787617896, "loss": 3.215, "step": 19422 }, { "epoch": 0.95, "grad_norm": 0.5305150747299194, "learning_rate": 0.000462897499323243, "loss": 3.1558, "step": 19423 }, { "epoch": 0.95, "grad_norm": 0.5180661082267761, "learning_rate": 0.00046288457034126645, "loss": 3.02, "step": 19424 }, { "epoch": 0.95, "grad_norm": 0.5526053309440613, "learning_rate": 0.0004628716409302832, "loss": 3.0368, "step": 19425 }, { "epoch": 0.95, "grad_norm": 0.5305153727531433, "learning_rate": 0.00046285871109032743, "loss": 3.0802, "step": 19426 }, { "epoch": 0.95, "grad_norm": 0.5597370266914368, "learning_rate": 0.00046284578082143315, "loss": 3.0115, "step": 19427 }, { "epoch": 0.95, "grad_norm": 0.5298964977264404, "learning_rate": 0.00046283285012363446, "loss": 3.121, "step": 19428 }, { "epoch": 0.95, "grad_norm": 0.4885500371456146, "learning_rate": 0.00046281991899696533, "loss": 3.2538, "step": 19429 }, { "epoch": 0.95, "grad_norm": 0.5146791934967041, "learning_rate": 0.00046280698744145994, "loss": 3.0759, "step": 19430 }, { "epoch": 0.95, "grad_norm": 0.5360423922538757, "learning_rate": 0.0004627940554571522, "loss": 3.0549, "step": 19431 }, { "epoch": 0.95, "grad_norm": 0.5529478192329407, "learning_rate": 0.0004627811230440763, "loss": 3.1961, "step": 19432 }, { "epoch": 0.95, "grad_norm": 0.5056607723236084, "learning_rate": 0.00046276819020226617, "loss": 2.9044, "step": 19433 }, { "epoch": 0.95, "grad_norm": 0.5211524367332458, "learning_rate": 0.000462755256931756, "loss": 3.2076, "step": 19434 }, { "epoch": 0.95, "grad_norm": 0.5306402444839478, "learning_rate": 0.00046274232323257984, "loss": 3.2183, "step": 19435 }, { "epoch": 0.95, "grad_norm": 0.5615695714950562, "learning_rate": 0.00046272938910477167, "loss": 3.0429, "step": 19436 }, { "epoch": 0.95, "grad_norm": 0.5487105250358582, "learning_rate": 0.0004627164545483656, "loss": 3.0273, "step": 19437 }, { "epoch": 0.95, "grad_norm": 0.5336460471153259, "learning_rate": 0.0004627035195633958, "loss": 3.3118, "step": 19438 }, { "epoch": 0.95, "grad_norm": 0.5271636247634888, "learning_rate": 0.00046269058414989613, "loss": 3.1076, "step": 19439 }, { "epoch": 0.95, "grad_norm": 0.5462597012519836, "learning_rate": 0.00046267764830790084, "loss": 3.1968, "step": 19440 }, { "epoch": 0.95, "grad_norm": 0.5113808512687683, "learning_rate": 0.000462664712037444, "loss": 3.2844, "step": 19441 }, { "epoch": 0.95, "grad_norm": 0.5376443266868591, "learning_rate": 0.0004626517753385595, "loss": 3.1548, "step": 19442 }, { "epoch": 0.95, "grad_norm": 0.49754318594932556, "learning_rate": 0.0004626388382112815, "loss": 3.083, "step": 19443 }, { "epoch": 0.95, "grad_norm": 0.4905608594417572, "learning_rate": 0.0004626259006556441, "loss": 3.3342, "step": 19444 }, { "epoch": 0.95, "grad_norm": 0.5019567012786865, "learning_rate": 0.00046261296267168146, "loss": 3.1041, "step": 19445 }, { "epoch": 0.95, "grad_norm": 0.5304019451141357, "learning_rate": 0.00046260002425942755, "loss": 3.3963, "step": 19446 }, { "epoch": 0.95, "grad_norm": 0.5429536700248718, "learning_rate": 0.0004625870854189165, "loss": 3.04, "step": 19447 }, { "epoch": 0.95, "grad_norm": 0.5398834943771362, "learning_rate": 0.0004625741461501823, "loss": 3.1989, "step": 19448 }, { "epoch": 0.95, "grad_norm": 0.5834211707115173, "learning_rate": 0.00046256120645325906, "loss": 3.161, "step": 19449 }, { "epoch": 0.95, "grad_norm": 0.5296370387077332, "learning_rate": 0.000462548266328181, "loss": 3.0605, "step": 19450 }, { "epoch": 0.95, "grad_norm": 0.5220499634742737, "learning_rate": 0.000462535325774982, "loss": 3.2008, "step": 19451 }, { "epoch": 0.95, "grad_norm": 0.5129631757736206, "learning_rate": 0.00046252238479369624, "loss": 3.1372, "step": 19452 }, { "epoch": 0.95, "grad_norm": 0.5435758233070374, "learning_rate": 0.0004625094433843579, "loss": 3.0829, "step": 19453 }, { "epoch": 0.95, "grad_norm": 0.57452392578125, "learning_rate": 0.0004624965015470008, "loss": 3.2124, "step": 19454 }, { "epoch": 0.95, "grad_norm": 0.5123562216758728, "learning_rate": 0.0004624835592816593, "loss": 3.2627, "step": 19455 }, { "epoch": 0.95, "grad_norm": 0.5587196350097656, "learning_rate": 0.00046247061658836726, "loss": 3.0844, "step": 19456 }, { "epoch": 0.95, "grad_norm": 0.5435320138931274, "learning_rate": 0.000462457673467159, "loss": 3.2731, "step": 19457 }, { "epoch": 0.95, "grad_norm": 0.5726426243782043, "learning_rate": 0.0004624447299180685, "loss": 3.237, "step": 19458 }, { "epoch": 0.95, "grad_norm": 0.5328337550163269, "learning_rate": 0.0004624317859411298, "loss": 3.0805, "step": 19459 }, { "epoch": 0.95, "grad_norm": 0.5529270172119141, "learning_rate": 0.00046241884153637696, "loss": 2.9795, "step": 19460 }, { "epoch": 0.95, "grad_norm": 0.5545976161956787, "learning_rate": 0.00046240589670384425, "loss": 3.0586, "step": 19461 }, { "epoch": 0.95, "grad_norm": 0.48107704520225525, "learning_rate": 0.0004623929514435656, "loss": 3.1515, "step": 19462 }, { "epoch": 0.95, "grad_norm": 0.576178789138794, "learning_rate": 0.0004623800057555752, "loss": 3.2103, "step": 19463 }, { "epoch": 0.95, "grad_norm": 0.5457341074943542, "learning_rate": 0.00046236705963990715, "loss": 3.2126, "step": 19464 }, { "epoch": 0.95, "grad_norm": 0.5450583696365356, "learning_rate": 0.0004623541130965955, "loss": 3.0782, "step": 19465 }, { "epoch": 0.95, "grad_norm": 0.5224042534828186, "learning_rate": 0.00046234116612567437, "loss": 3.1618, "step": 19466 }, { "epoch": 0.95, "grad_norm": 0.5329810380935669, "learning_rate": 0.00046232821872717783, "loss": 3.2908, "step": 19467 }, { "epoch": 0.95, "grad_norm": 0.5755662322044373, "learning_rate": 0.00046231527090113993, "loss": 3.0585, "step": 19468 }, { "epoch": 0.95, "grad_norm": 0.5130234956741333, "learning_rate": 0.000462302322647595, "loss": 3.0353, "step": 19469 }, { "epoch": 0.95, "grad_norm": 0.5430505275726318, "learning_rate": 0.0004622893739665769, "loss": 3.3614, "step": 19470 }, { "epoch": 0.95, "grad_norm": 0.5734212398529053, "learning_rate": 0.0004622764248581198, "loss": 3.1137, "step": 19471 }, { "epoch": 0.95, "grad_norm": 0.5490341782569885, "learning_rate": 0.00046226347532225777, "loss": 2.9489, "step": 19472 }, { "epoch": 0.95, "grad_norm": 0.506134569644928, "learning_rate": 0.0004622505253590251, "loss": 3.2452, "step": 19473 }, { "epoch": 0.95, "grad_norm": 0.5133436322212219, "learning_rate": 0.00046223757496845577, "loss": 3.1175, "step": 19474 }, { "epoch": 0.95, "grad_norm": 0.4948771595954895, "learning_rate": 0.0004622246241505839, "loss": 3.2184, "step": 19475 }, { "epoch": 0.95, "grad_norm": 0.663033664226532, "learning_rate": 0.0004622116729054435, "loss": 3.2785, "step": 19476 }, { "epoch": 0.95, "grad_norm": 0.5038350224494934, "learning_rate": 0.0004621987212330688, "loss": 3.1118, "step": 19477 }, { "epoch": 0.95, "grad_norm": 0.5808413624763489, "learning_rate": 0.00046218576913349387, "loss": 3.2685, "step": 19478 }, { "epoch": 0.95, "grad_norm": 0.5481523275375366, "learning_rate": 0.00046217281660675293, "loss": 3.1188, "step": 19479 }, { "epoch": 0.95, "grad_norm": 0.5153646469116211, "learning_rate": 0.0004621598636528799, "loss": 3.2296, "step": 19480 }, { "epoch": 0.95, "grad_norm": 0.539476215839386, "learning_rate": 0.00046214691027190904, "loss": 3.3285, "step": 19481 }, { "epoch": 0.95, "grad_norm": 0.4928472638130188, "learning_rate": 0.0004621339564638744, "loss": 3.1049, "step": 19482 }, { "epoch": 0.95, "grad_norm": 0.4804633557796478, "learning_rate": 0.00046212100222881014, "loss": 3.2119, "step": 19483 }, { "epoch": 0.95, "grad_norm": 0.524301290512085, "learning_rate": 0.0004621080475667504, "loss": 3.103, "step": 19484 }, { "epoch": 0.95, "grad_norm": 0.50786954164505, "learning_rate": 0.0004620950924777292, "loss": 3.1123, "step": 19485 }, { "epoch": 0.95, "grad_norm": 0.5153059363365173, "learning_rate": 0.0004620821369617808, "loss": 3.3104, "step": 19486 }, { "epoch": 0.96, "grad_norm": 0.5356342196464539, "learning_rate": 0.0004620691810189392, "loss": 3.2367, "step": 19487 }, { "epoch": 0.96, "grad_norm": 0.5034014582633972, "learning_rate": 0.00046205622464923847, "loss": 3.2318, "step": 19488 }, { "epoch": 0.96, "grad_norm": 0.5269924402236938, "learning_rate": 0.0004620432678527129, "loss": 3.0648, "step": 19489 }, { "epoch": 0.96, "grad_norm": 0.5155195593833923, "learning_rate": 0.00046203031062939666, "loss": 3.0857, "step": 19490 }, { "epoch": 0.96, "grad_norm": 0.48004335165023804, "learning_rate": 0.00046201735297932356, "loss": 3.1868, "step": 19491 }, { "epoch": 0.96, "grad_norm": 0.5057433843612671, "learning_rate": 0.0004620043949025281, "loss": 3.1481, "step": 19492 }, { "epoch": 0.96, "grad_norm": 0.5076611638069153, "learning_rate": 0.00046199143639904416, "loss": 3.0926, "step": 19493 }, { "epoch": 0.96, "grad_norm": 0.5741182565689087, "learning_rate": 0.000461978477468906, "loss": 3.1103, "step": 19494 }, { "epoch": 0.96, "grad_norm": 0.5225886106491089, "learning_rate": 0.00046196551811214775, "loss": 3.1027, "step": 19495 }, { "epoch": 0.96, "grad_norm": 0.5760247111320496, "learning_rate": 0.0004619525583288034, "loss": 3.2087, "step": 19496 }, { "epoch": 0.96, "grad_norm": 0.5268136858940125, "learning_rate": 0.0004619395981189072, "loss": 3.1337, "step": 19497 }, { "epoch": 0.96, "grad_norm": 0.5018905401229858, "learning_rate": 0.0004619266374824932, "loss": 3.2123, "step": 19498 }, { "epoch": 0.96, "grad_norm": 0.509712815284729, "learning_rate": 0.00046191367641959573, "loss": 3.0303, "step": 19499 }, { "epoch": 0.96, "grad_norm": 0.5171404480934143, "learning_rate": 0.00046190071493024874, "loss": 3.0558, "step": 19500 }, { "epoch": 0.96, "grad_norm": 0.5230562686920166, "learning_rate": 0.00046188775301448645, "loss": 3.175, "step": 19501 }, { "epoch": 0.96, "grad_norm": 0.5551393032073975, "learning_rate": 0.00046187479067234295, "loss": 3.0652, "step": 19502 }, { "epoch": 0.96, "grad_norm": 0.5329045653343201, "learning_rate": 0.0004618618279038524, "loss": 3.0641, "step": 19503 }, { "epoch": 0.96, "grad_norm": 0.4875706732273102, "learning_rate": 0.00046184886470904894, "loss": 3.0987, "step": 19504 }, { "epoch": 0.96, "grad_norm": 0.5308302640914917, "learning_rate": 0.00046183590108796673, "loss": 3.156, "step": 19505 }, { "epoch": 0.96, "grad_norm": 0.5178402662277222, "learning_rate": 0.0004618229370406399, "loss": 3.0371, "step": 19506 }, { "epoch": 0.96, "grad_norm": 0.5128605365753174, "learning_rate": 0.00046180997256710263, "loss": 3.2408, "step": 19507 }, { "epoch": 0.96, "grad_norm": 0.5387104749679565, "learning_rate": 0.000461797007667389, "loss": 3.2067, "step": 19508 }, { "epoch": 0.96, "grad_norm": 0.5741128325462341, "learning_rate": 0.0004617840423415332, "loss": 3.2154, "step": 19509 }, { "epoch": 0.96, "grad_norm": 0.5508107542991638, "learning_rate": 0.0004617710765895693, "loss": 3.1757, "step": 19510 }, { "epoch": 0.96, "grad_norm": 0.5864234566688538, "learning_rate": 0.00046175811041153164, "loss": 3.1171, "step": 19511 }, { "epoch": 0.96, "grad_norm": 0.5283697247505188, "learning_rate": 0.00046174514380745423, "loss": 3.4572, "step": 19512 }, { "epoch": 0.96, "grad_norm": 0.4930495619773865, "learning_rate": 0.0004617321767773711, "loss": 2.9718, "step": 19513 }, { "epoch": 0.96, "grad_norm": 0.49955621361732483, "learning_rate": 0.00046171920932131666, "loss": 3.215, "step": 19514 }, { "epoch": 0.96, "grad_norm": 0.5142083764076233, "learning_rate": 0.00046170624143932495, "loss": 3.2683, "step": 19515 }, { "epoch": 0.96, "grad_norm": 0.4969509243965149, "learning_rate": 0.0004616932731314301, "loss": 3.0456, "step": 19516 }, { "epoch": 0.96, "grad_norm": 0.5548650622367859, "learning_rate": 0.00046168030439766626, "loss": 3.0429, "step": 19517 }, { "epoch": 0.96, "grad_norm": 0.5828065276145935, "learning_rate": 0.00046166733523806774, "loss": 3.3183, "step": 19518 }, { "epoch": 0.96, "grad_norm": 0.5464107990264893, "learning_rate": 0.0004616543656526684, "loss": 3.0603, "step": 19519 }, { "epoch": 0.96, "grad_norm": 0.5539250373840332, "learning_rate": 0.0004616413956415026, "loss": 3.2492, "step": 19520 }, { "epoch": 0.96, "grad_norm": 0.5596001744270325, "learning_rate": 0.00046162842520460455, "loss": 3.1842, "step": 19521 }, { "epoch": 0.96, "grad_norm": 0.5353409647941589, "learning_rate": 0.00046161545434200837, "loss": 3.1103, "step": 19522 }, { "epoch": 0.96, "grad_norm": 0.5830059051513672, "learning_rate": 0.0004616024830537481, "loss": 3.2124, "step": 19523 }, { "epoch": 0.96, "grad_norm": 0.542768120765686, "learning_rate": 0.0004615895113398579, "loss": 3.1671, "step": 19524 }, { "epoch": 0.96, "grad_norm": 0.5151305198669434, "learning_rate": 0.00046157653920037223, "loss": 3.0929, "step": 19525 }, { "epoch": 0.96, "grad_norm": 0.5473406910896301, "learning_rate": 0.000461563566635325, "loss": 3.0294, "step": 19526 }, { "epoch": 0.96, "grad_norm": 0.5182105302810669, "learning_rate": 0.00046155059364475035, "loss": 2.9818, "step": 19527 }, { "epoch": 0.96, "grad_norm": 0.5524483323097229, "learning_rate": 0.0004615376202286825, "loss": 3.2716, "step": 19528 }, { "epoch": 0.96, "grad_norm": 0.5527673363685608, "learning_rate": 0.0004615246463871557, "loss": 3.0887, "step": 19529 }, { "epoch": 0.96, "grad_norm": 0.5192067623138428, "learning_rate": 0.0004615116721202041, "loss": 3.1964, "step": 19530 }, { "epoch": 0.96, "grad_norm": 0.5294162631034851, "learning_rate": 0.0004614986974278618, "loss": 3.1753, "step": 19531 }, { "epoch": 0.96, "grad_norm": 0.5797945261001587, "learning_rate": 0.0004614857223101631, "loss": 3.1273, "step": 19532 }, { "epoch": 0.96, "grad_norm": 0.5336905717849731, "learning_rate": 0.000461472746767142, "loss": 3.2099, "step": 19533 }, { "epoch": 0.96, "grad_norm": 0.5431013703346252, "learning_rate": 0.0004614597707988327, "loss": 3.2577, "step": 19534 }, { "epoch": 0.96, "grad_norm": 0.5563946962356567, "learning_rate": 0.0004614467944052695, "loss": 2.8293, "step": 19535 }, { "epoch": 0.96, "grad_norm": 0.5071994662284851, "learning_rate": 0.00046143381758648666, "loss": 3.0538, "step": 19536 }, { "epoch": 0.96, "grad_norm": 0.5291922092437744, "learning_rate": 0.000461420840342518, "loss": 3.0829, "step": 19537 }, { "epoch": 0.96, "grad_norm": 0.5239132642745972, "learning_rate": 0.000461407862673398, "loss": 3.1648, "step": 19538 }, { "epoch": 0.96, "grad_norm": 0.5256131887435913, "learning_rate": 0.00046139488457916074, "loss": 3.1761, "step": 19539 }, { "epoch": 0.96, "grad_norm": 0.5487732887268066, "learning_rate": 0.0004613819060598404, "loss": 3.2024, "step": 19540 }, { "epoch": 0.96, "grad_norm": 0.5254712700843811, "learning_rate": 0.00046136892711547123, "loss": 3.1627, "step": 19541 }, { "epoch": 0.96, "grad_norm": 0.5246246457099915, "learning_rate": 0.00046135594774608733, "loss": 3.0529, "step": 19542 }, { "epoch": 0.96, "grad_norm": 0.5375425219535828, "learning_rate": 0.0004613429679517229, "loss": 3.3443, "step": 19543 }, { "epoch": 0.96, "grad_norm": 0.5527751445770264, "learning_rate": 0.00046132998773241215, "loss": 2.9413, "step": 19544 }, { "epoch": 0.96, "grad_norm": 0.5222285389900208, "learning_rate": 0.00046131700708818924, "loss": 3.3199, "step": 19545 }, { "epoch": 0.96, "grad_norm": 0.577925443649292, "learning_rate": 0.0004613040260190884, "loss": 2.9484, "step": 19546 }, { "epoch": 0.96, "grad_norm": 0.5509508848190308, "learning_rate": 0.0004612910445251439, "loss": 3.2603, "step": 19547 }, { "epoch": 0.96, "grad_norm": 0.5331478714942932, "learning_rate": 0.0004612780626063897, "loss": 3.0465, "step": 19548 }, { "epoch": 0.96, "grad_norm": 0.5440930128097534, "learning_rate": 0.0004612650802628602, "loss": 2.9711, "step": 19549 }, { "epoch": 0.96, "grad_norm": 0.5174834728240967, "learning_rate": 0.0004612520974945895, "loss": 3.2217, "step": 19550 }, { "epoch": 0.96, "grad_norm": 0.5301517844200134, "learning_rate": 0.00046123911430161175, "loss": 3.1833, "step": 19551 }, { "epoch": 0.96, "grad_norm": 0.556764543056488, "learning_rate": 0.00046122613068396123, "loss": 2.9512, "step": 19552 }, { "epoch": 0.96, "grad_norm": 0.5274459719657898, "learning_rate": 0.00046121314664167214, "loss": 3.0559, "step": 19553 }, { "epoch": 0.96, "grad_norm": 0.5233287811279297, "learning_rate": 0.0004612001621747786, "loss": 3.1821, "step": 19554 }, { "epoch": 0.96, "grad_norm": 0.49667537212371826, "learning_rate": 0.00046118717728331493, "loss": 3.1545, "step": 19555 }, { "epoch": 0.96, "grad_norm": 0.5035794377326965, "learning_rate": 0.00046117419196731516, "loss": 2.9625, "step": 19556 }, { "epoch": 0.96, "grad_norm": 0.536008894443512, "learning_rate": 0.00046116120622681365, "loss": 3.3597, "step": 19557 }, { "epoch": 0.96, "grad_norm": 0.5389508008956909, "learning_rate": 0.0004611482200618446, "loss": 3.0717, "step": 19558 }, { "epoch": 0.96, "grad_norm": 0.4923076629638672, "learning_rate": 0.00046113523347244206, "loss": 3.066, "step": 19559 }, { "epoch": 0.96, "grad_norm": 0.4959174394607544, "learning_rate": 0.00046112224645864023, "loss": 3.1596, "step": 19560 }, { "epoch": 0.96, "grad_norm": 0.5146404504776001, "learning_rate": 0.0004611092590204736, "loss": 3.1807, "step": 19561 }, { "epoch": 0.96, "grad_norm": 0.5296205282211304, "learning_rate": 0.0004610962711579761, "loss": 3.0542, "step": 19562 }, { "epoch": 0.96, "grad_norm": 0.5231249928474426, "learning_rate": 0.00046108328287118203, "loss": 3.3756, "step": 19563 }, { "epoch": 0.96, "grad_norm": 0.5085944533348083, "learning_rate": 0.0004610702941601256, "loss": 3.316, "step": 19564 }, { "epoch": 0.96, "grad_norm": 0.5056644082069397, "learning_rate": 0.00046105730502484107, "loss": 3.0114, "step": 19565 }, { "epoch": 0.96, "grad_norm": 0.5329414010047913, "learning_rate": 0.00046104431546536246, "loss": 3.3503, "step": 19566 }, { "epoch": 0.96, "grad_norm": 0.5404815077781677, "learning_rate": 0.00046103132548172424, "loss": 3.1329, "step": 19567 }, { "epoch": 0.96, "grad_norm": 0.5342490673065186, "learning_rate": 0.00046101833507396044, "loss": 3.1882, "step": 19568 }, { "epoch": 0.96, "grad_norm": 0.5149338245391846, "learning_rate": 0.0004610053442421054, "loss": 3.2602, "step": 19569 }, { "epoch": 0.96, "grad_norm": 0.5321595072746277, "learning_rate": 0.0004609923529861932, "loss": 3.0032, "step": 19570 }, { "epoch": 0.96, "grad_norm": 0.5394263863563538, "learning_rate": 0.0004609793613062581, "loss": 3.2131, "step": 19571 }, { "epoch": 0.96, "grad_norm": 0.5631637573242188, "learning_rate": 0.00046096636920233444, "loss": 3.0242, "step": 19572 }, { "epoch": 0.96, "grad_norm": 0.5770866870880127, "learning_rate": 0.00046095337667445633, "loss": 3.1954, "step": 19573 }, { "epoch": 0.96, "grad_norm": 0.5115048289299011, "learning_rate": 0.00046094038372265794, "loss": 3.2293, "step": 19574 }, { "epoch": 0.96, "grad_norm": 0.5524685978889465, "learning_rate": 0.0004609273903469735, "loss": 3.208, "step": 19575 }, { "epoch": 0.96, "grad_norm": 0.5510011315345764, "learning_rate": 0.00046091439654743745, "loss": 3.228, "step": 19576 }, { "epoch": 0.96, "grad_norm": 0.5279592275619507, "learning_rate": 0.00046090140232408377, "loss": 2.9963, "step": 19577 }, { "epoch": 0.96, "grad_norm": 0.5411564111709595, "learning_rate": 0.00046088840767694674, "loss": 3.2272, "step": 19578 }, { "epoch": 0.96, "grad_norm": 0.5248085260391235, "learning_rate": 0.00046087541260606056, "loss": 3.2172, "step": 19579 }, { "epoch": 0.96, "grad_norm": 0.5476329922676086, "learning_rate": 0.0004608624171114596, "loss": 3.3069, "step": 19580 }, { "epoch": 0.96, "grad_norm": 0.5507305264472961, "learning_rate": 0.000460849421193178, "loss": 3.096, "step": 19581 }, { "epoch": 0.96, "grad_norm": 0.5406851172447205, "learning_rate": 0.00046083642485124983, "loss": 3.0172, "step": 19582 }, { "epoch": 0.96, "grad_norm": 0.5372514724731445, "learning_rate": 0.0004608234280857096, "loss": 3.1018, "step": 19583 }, { "epoch": 0.96, "grad_norm": 0.5436272621154785, "learning_rate": 0.00046081043089659144, "loss": 3.1314, "step": 19584 }, { "epoch": 0.96, "grad_norm": 0.5231220126152039, "learning_rate": 0.0004607974332839295, "loss": 3.1906, "step": 19585 }, { "epoch": 0.96, "grad_norm": 0.553106963634491, "learning_rate": 0.000460784435247758, "loss": 3.1702, "step": 19586 }, { "epoch": 0.96, "grad_norm": 0.5628478527069092, "learning_rate": 0.0004607714367881114, "loss": 3.1288, "step": 19587 }, { "epoch": 0.96, "grad_norm": 0.5031068921089172, "learning_rate": 0.0004607584379050237, "loss": 3.193, "step": 19588 }, { "epoch": 0.96, "grad_norm": 0.5432695746421814, "learning_rate": 0.00046074543859852917, "loss": 3.0016, "step": 19589 }, { "epoch": 0.96, "grad_norm": 0.5564255118370056, "learning_rate": 0.00046073243886866216, "loss": 3.1017, "step": 19590 }, { "epoch": 0.96, "grad_norm": 0.7009369134902954, "learning_rate": 0.0004607194387154567, "loss": 3.213, "step": 19591 }, { "epoch": 0.96, "grad_norm": 0.5528953671455383, "learning_rate": 0.0004607064381389473, "loss": 3.1717, "step": 19592 }, { "epoch": 0.96, "grad_norm": 0.5072028636932373, "learning_rate": 0.00046069343713916805, "loss": 3.1961, "step": 19593 }, { "epoch": 0.96, "grad_norm": 0.5909320712089539, "learning_rate": 0.0004606804357161532, "loss": 3.2262, "step": 19594 }, { "epoch": 0.96, "grad_norm": 0.5035519003868103, "learning_rate": 0.0004606674338699371, "loss": 2.93, "step": 19595 }, { "epoch": 0.96, "grad_norm": 0.5433776378631592, "learning_rate": 0.0004606544316005537, "loss": 3.5309, "step": 19596 }, { "epoch": 0.96, "grad_norm": 0.550793468952179, "learning_rate": 0.00046064142890803764, "loss": 3.2649, "step": 19597 }, { "epoch": 0.96, "grad_norm": 0.5635651350021362, "learning_rate": 0.00046062842579242284, "loss": 3.0402, "step": 19598 }, { "epoch": 0.96, "grad_norm": 0.5558289289474487, "learning_rate": 0.0004606154222537438, "loss": 3.2313, "step": 19599 }, { "epoch": 0.96, "grad_norm": 0.5349392294883728, "learning_rate": 0.0004606024182920345, "loss": 3.2041, "step": 19600 }, { "epoch": 0.96, "grad_norm": 0.48463213443756104, "learning_rate": 0.0004605894139073294, "loss": 3.0712, "step": 19601 }, { "epoch": 0.96, "grad_norm": 0.5352935194969177, "learning_rate": 0.00046057640909966276, "loss": 3.1331, "step": 19602 }, { "epoch": 0.96, "grad_norm": 0.5178282856941223, "learning_rate": 0.00046056340386906866, "loss": 3.1748, "step": 19603 }, { "epoch": 0.96, "grad_norm": 0.5154528617858887, "learning_rate": 0.0004605503982155815, "loss": 3.3167, "step": 19604 }, { "epoch": 0.96, "grad_norm": 0.525182843208313, "learning_rate": 0.0004605373921392355, "loss": 3.2507, "step": 19605 }, { "epoch": 0.96, "grad_norm": 0.549490213394165, "learning_rate": 0.0004605243856400649, "loss": 3.0423, "step": 19606 }, { "epoch": 0.96, "grad_norm": 0.546172022819519, "learning_rate": 0.00046051137871810395, "loss": 3.1048, "step": 19607 }, { "epoch": 0.96, "grad_norm": 0.5274287462234497, "learning_rate": 0.00046049837137338695, "loss": 3.0897, "step": 19608 }, { "epoch": 0.96, "grad_norm": 0.5080083012580872, "learning_rate": 0.00046048536360594815, "loss": 3.1981, "step": 19609 }, { "epoch": 0.96, "grad_norm": 0.5662698745727539, "learning_rate": 0.00046047235541582174, "loss": 3.2204, "step": 19610 }, { "epoch": 0.96, "grad_norm": 0.49622294306755066, "learning_rate": 0.00046045934680304194, "loss": 2.9424, "step": 19611 }, { "epoch": 0.96, "grad_norm": 0.5338951945304871, "learning_rate": 0.0004604463377676432, "loss": 3.0564, "step": 19612 }, { "epoch": 0.96, "grad_norm": 0.5102505683898926, "learning_rate": 0.00046043332830965973, "loss": 3.1175, "step": 19613 }, { "epoch": 0.96, "grad_norm": 0.5088198781013489, "learning_rate": 0.00046042031842912575, "loss": 3.0823, "step": 19614 }, { "epoch": 0.96, "grad_norm": 0.4949013590812683, "learning_rate": 0.0004604073081260754, "loss": 3.1718, "step": 19615 }, { "epoch": 0.96, "grad_norm": 0.529629647731781, "learning_rate": 0.00046039429740054314, "loss": 3.2467, "step": 19616 }, { "epoch": 0.96, "grad_norm": 0.5418373942375183, "learning_rate": 0.0004603812862525632, "loss": 3.0991, "step": 19617 }, { "epoch": 0.96, "grad_norm": 0.5319532155990601, "learning_rate": 0.00046036827468216976, "loss": 3.222, "step": 19618 }, { "epoch": 0.96, "grad_norm": 0.5246682167053223, "learning_rate": 0.0004603552626893972, "loss": 3.1567, "step": 19619 }, { "epoch": 0.96, "grad_norm": 0.5561453700065613, "learning_rate": 0.00046034225027427974, "loss": 3.1118, "step": 19620 }, { "epoch": 0.96, "grad_norm": 0.554530143737793, "learning_rate": 0.0004603292374368516, "loss": 3.1151, "step": 19621 }, { "epoch": 0.96, "grad_norm": 0.5948189496994019, "learning_rate": 0.0004603162241771471, "loss": 3.2595, "step": 19622 }, { "epoch": 0.96, "grad_norm": 0.5682088136672974, "learning_rate": 0.0004603032104952006, "loss": 3.2056, "step": 19623 }, { "epoch": 0.96, "grad_norm": 0.5129702091217041, "learning_rate": 0.0004602901963910463, "loss": 3.1712, "step": 19624 }, { "epoch": 0.96, "grad_norm": 0.5358684659004211, "learning_rate": 0.0004602771818647184, "loss": 3.0202, "step": 19625 }, { "epoch": 0.96, "grad_norm": 0.5510245561599731, "learning_rate": 0.0004602641669162512, "loss": 3.1402, "step": 19626 }, { "epoch": 0.96, "grad_norm": 0.5696225762367249, "learning_rate": 0.00046025115154567917, "loss": 3.0832, "step": 19627 }, { "epoch": 0.96, "grad_norm": 0.5569515824317932, "learning_rate": 0.0004602381357530364, "loss": 2.9884, "step": 19628 }, { "epoch": 0.96, "grad_norm": 0.5148624181747437, "learning_rate": 0.00046022511953835716, "loss": 3.1615, "step": 19629 }, { "epoch": 0.96, "grad_norm": 0.5084801316261292, "learning_rate": 0.00046021210290167583, "loss": 3.1071, "step": 19630 }, { "epoch": 0.96, "grad_norm": 0.5485509634017944, "learning_rate": 0.00046019908584302665, "loss": 3.0196, "step": 19631 }, { "epoch": 0.96, "grad_norm": 0.5287085175514221, "learning_rate": 0.0004601860683624439, "loss": 3.1482, "step": 19632 }, { "epoch": 0.96, "grad_norm": 0.4873697757720947, "learning_rate": 0.00046017305045996187, "loss": 3.2477, "step": 19633 }, { "epoch": 0.96, "grad_norm": 0.5111532807350159, "learning_rate": 0.0004601600321356149, "loss": 3.2722, "step": 19634 }, { "epoch": 0.96, "grad_norm": 0.5960693359375, "learning_rate": 0.00046014701338943723, "loss": 2.9375, "step": 19635 }, { "epoch": 0.96, "grad_norm": 0.5306110382080078, "learning_rate": 0.00046013399422146307, "loss": 3.0017, "step": 19636 }, { "epoch": 0.96, "grad_norm": 0.5201436877250671, "learning_rate": 0.0004601209746317268, "loss": 3.2602, "step": 19637 }, { "epoch": 0.96, "grad_norm": 0.5063363909721375, "learning_rate": 0.0004601079546202628, "loss": 3.1268, "step": 19638 }, { "epoch": 0.96, "grad_norm": 0.5271068215370178, "learning_rate": 0.00046009493418710514, "loss": 3.2246, "step": 19639 }, { "epoch": 0.96, "grad_norm": 0.527073860168457, "learning_rate": 0.00046008191333228826, "loss": 3.0653, "step": 19640 }, { "epoch": 0.96, "grad_norm": 0.5272981524467468, "learning_rate": 0.0004600688920558465, "loss": 3.215, "step": 19641 }, { "epoch": 0.96, "grad_norm": 0.5382295846939087, "learning_rate": 0.000460055870357814, "loss": 3.0829, "step": 19642 }, { "epoch": 0.96, "grad_norm": 0.5190293192863464, "learning_rate": 0.0004600428482382252, "loss": 3.1779, "step": 19643 }, { "epoch": 0.96, "grad_norm": 0.508520781993866, "learning_rate": 0.0004600298256971143, "loss": 3.2162, "step": 19644 }, { "epoch": 0.96, "grad_norm": 0.5364028215408325, "learning_rate": 0.0004600168027345156, "loss": 3.2912, "step": 19645 }, { "epoch": 0.96, "grad_norm": 0.541970431804657, "learning_rate": 0.00046000377935046357, "loss": 3.2242, "step": 19646 }, { "epoch": 0.96, "grad_norm": 0.522739052772522, "learning_rate": 0.0004599907555449922, "loss": 3.1251, "step": 19647 }, { "epoch": 0.96, "grad_norm": 0.5367051959037781, "learning_rate": 0.00045997773131813607, "loss": 3.1206, "step": 19648 }, { "epoch": 0.96, "grad_norm": 0.5582072734832764, "learning_rate": 0.0004599647066699295, "loss": 3.1991, "step": 19649 }, { "epoch": 0.96, "grad_norm": 0.5519962310791016, "learning_rate": 0.00045995168160040654, "loss": 3.0101, "step": 19650 }, { "epoch": 0.96, "grad_norm": 0.529492199420929, "learning_rate": 0.0004599386561096016, "loss": 3.188, "step": 19651 }, { "epoch": 0.96, "grad_norm": 0.5170547366142273, "learning_rate": 0.0004599256301975491, "loss": 3.1679, "step": 19652 }, { "epoch": 0.96, "grad_norm": 0.5805275440216064, "learning_rate": 0.00045991260386428327, "loss": 3.0967, "step": 19653 }, { "epoch": 0.96, "grad_norm": 0.5890424847602844, "learning_rate": 0.0004598995771098384, "loss": 3.3168, "step": 19654 }, { "epoch": 0.96, "grad_norm": 0.5336859226226807, "learning_rate": 0.0004598865499342488, "loss": 3.0317, "step": 19655 }, { "epoch": 0.96, "grad_norm": 0.6045253872871399, "learning_rate": 0.0004598735223375488, "loss": 3.1161, "step": 19656 }, { "epoch": 0.96, "grad_norm": 0.5241043567657471, "learning_rate": 0.00045986049431977265, "loss": 3.141, "step": 19657 }, { "epoch": 0.96, "grad_norm": 0.515809178352356, "learning_rate": 0.0004598474658809548, "loss": 3.1444, "step": 19658 }, { "epoch": 0.96, "grad_norm": 0.5346136093139648, "learning_rate": 0.00045983443702112946, "loss": 3.181, "step": 19659 }, { "epoch": 0.96, "grad_norm": 0.5317142605781555, "learning_rate": 0.00045982140774033104, "loss": 3.4307, "step": 19660 }, { "epoch": 0.96, "grad_norm": 0.558459997177124, "learning_rate": 0.0004598083780385938, "loss": 3.2656, "step": 19661 }, { "epoch": 0.96, "grad_norm": 0.5433655381202698, "learning_rate": 0.0004597953479159519, "loss": 3.1691, "step": 19662 }, { "epoch": 0.96, "grad_norm": 0.5130866765975952, "learning_rate": 0.0004597823173724399, "loss": 3.2532, "step": 19663 }, { "epoch": 0.96, "grad_norm": 0.5559121966362, "learning_rate": 0.0004597692864080921, "loss": 3.3065, "step": 19664 }, { "epoch": 0.96, "grad_norm": 0.4829885959625244, "learning_rate": 0.0004597562550229426, "loss": 3.2244, "step": 19665 }, { "epoch": 0.96, "grad_norm": 0.5515817403793335, "learning_rate": 0.00045974322321702595, "loss": 3.0803, "step": 19666 }, { "epoch": 0.96, "grad_norm": 0.5156745314598083, "learning_rate": 0.0004597301909903764, "loss": 3.254, "step": 19667 }, { "epoch": 0.96, "grad_norm": 0.574837863445282, "learning_rate": 0.0004597171583430282, "loss": 3.2968, "step": 19668 }, { "epoch": 0.96, "grad_norm": 0.5619298219680786, "learning_rate": 0.0004597041252750158, "loss": 3.1675, "step": 19669 }, { "epoch": 0.96, "grad_norm": 0.5266140103340149, "learning_rate": 0.00045969109178637345, "loss": 3.226, "step": 19670 }, { "epoch": 0.96, "grad_norm": 0.5088499784469604, "learning_rate": 0.00045967805787713563, "loss": 3.0859, "step": 19671 }, { "epoch": 0.96, "grad_norm": 0.5350180268287659, "learning_rate": 0.0004596650235473364, "loss": 3.1795, "step": 19672 }, { "epoch": 0.96, "grad_norm": 0.5337379574775696, "learning_rate": 0.0004596519887970102, "loss": 3.135, "step": 19673 }, { "epoch": 0.96, "grad_norm": 0.5811973810195923, "learning_rate": 0.0004596389536261914, "loss": 3.3238, "step": 19674 }, { "epoch": 0.96, "grad_norm": 0.5261113047599792, "learning_rate": 0.00045962591803491444, "loss": 3.3253, "step": 19675 }, { "epoch": 0.96, "grad_norm": 0.5248528718948364, "learning_rate": 0.00045961288202321345, "loss": 3.1994, "step": 19676 }, { "epoch": 0.96, "grad_norm": 0.510138750076294, "learning_rate": 0.0004595998455911228, "loss": 3.3404, "step": 19677 }, { "epoch": 0.96, "grad_norm": 0.5522067546844482, "learning_rate": 0.0004595868087386769, "loss": 2.9776, "step": 19678 }, { "epoch": 0.96, "grad_norm": 0.7141591906547546, "learning_rate": 0.0004595737714659101, "loss": 3.2153, "step": 19679 }, { "epoch": 0.96, "grad_norm": 0.49402374029159546, "learning_rate": 0.00045956073377285663, "loss": 3.3181, "step": 19680 }, { "epoch": 0.96, "grad_norm": 0.5236515402793884, "learning_rate": 0.00045954769565955094, "loss": 3.3036, "step": 19681 }, { "epoch": 0.96, "grad_norm": 0.525768518447876, "learning_rate": 0.0004595346571260274, "loss": 3.1389, "step": 19682 }, { "epoch": 0.96, "grad_norm": 0.6157646179199219, "learning_rate": 0.00045952161817232014, "loss": 3.1411, "step": 19683 }, { "epoch": 0.96, "grad_norm": 0.5027342438697815, "learning_rate": 0.00045950857879846366, "loss": 3.3379, "step": 19684 }, { "epoch": 0.96, "grad_norm": 0.5160840153694153, "learning_rate": 0.00045949553900449233, "loss": 3.1588, "step": 19685 }, { "epoch": 0.96, "grad_norm": 0.5430676341056824, "learning_rate": 0.00045948249879044047, "loss": 3.1516, "step": 19686 }, { "epoch": 0.96, "grad_norm": 0.5123657584190369, "learning_rate": 0.00045946945815634235, "loss": 2.9488, "step": 19687 }, { "epoch": 0.96, "grad_norm": 0.4994308054447174, "learning_rate": 0.0004594564171022323, "loss": 3.1864, "step": 19688 }, { "epoch": 0.96, "grad_norm": 0.5245607495307922, "learning_rate": 0.0004594433756281449, "loss": 3.1557, "step": 19689 }, { "epoch": 0.96, "grad_norm": 0.508391261100769, "learning_rate": 0.0004594303337341143, "loss": 3.1624, "step": 19690 }, { "epoch": 0.97, "grad_norm": 0.5643361806869507, "learning_rate": 0.00045941729142017477, "loss": 3.0144, "step": 19691 }, { "epoch": 0.97, "grad_norm": 0.5318419933319092, "learning_rate": 0.00045940424868636085, "loss": 2.9979, "step": 19692 }, { "epoch": 0.97, "grad_norm": 0.5172003507614136, "learning_rate": 0.00045939120553270676, "loss": 3.1654, "step": 19693 }, { "epoch": 0.97, "grad_norm": 0.5160081386566162, "learning_rate": 0.000459378161959247, "loss": 3.1003, "step": 19694 }, { "epoch": 0.97, "grad_norm": 0.5427367687225342, "learning_rate": 0.0004593651179660158, "loss": 3.2305, "step": 19695 }, { "epoch": 0.97, "grad_norm": 0.523469865322113, "learning_rate": 0.00045935207355304744, "loss": 3.2241, "step": 19696 }, { "epoch": 0.97, "grad_norm": 0.5474386215209961, "learning_rate": 0.0004593390287203766, "loss": 3.1163, "step": 19697 }, { "epoch": 0.97, "grad_norm": 0.5134205222129822, "learning_rate": 0.0004593259834680372, "loss": 3.2631, "step": 19698 }, { "epoch": 0.97, "grad_norm": 0.5353065729141235, "learning_rate": 0.00045931293779606397, "loss": 3.2246, "step": 19699 }, { "epoch": 0.97, "grad_norm": 0.5378754138946533, "learning_rate": 0.00045929989170449115, "loss": 3.3742, "step": 19700 }, { "epoch": 0.97, "grad_norm": 0.5078233480453491, "learning_rate": 0.00045928684519335296, "loss": 3.0678, "step": 19701 }, { "epoch": 0.97, "grad_norm": 0.534570038318634, "learning_rate": 0.00045927379826268393, "loss": 3.2471, "step": 19702 }, { "epoch": 0.97, "grad_norm": 0.5478757619857788, "learning_rate": 0.0004592607509125184, "loss": 2.8799, "step": 19703 }, { "epoch": 0.97, "grad_norm": 0.5131859183311462, "learning_rate": 0.0004592477031428906, "loss": 3.0923, "step": 19704 }, { "epoch": 0.97, "grad_norm": 0.5837509036064148, "learning_rate": 0.00045923465495383513, "loss": 3.4114, "step": 19705 }, { "epoch": 0.97, "grad_norm": 0.5130158066749573, "learning_rate": 0.0004592216063453861, "loss": 3.1598, "step": 19706 }, { "epoch": 0.97, "grad_norm": 0.5604761242866516, "learning_rate": 0.000459208557317578, "loss": 3.2225, "step": 19707 }, { "epoch": 0.97, "grad_norm": 0.5324618220329285, "learning_rate": 0.00045919550787044527, "loss": 3.1212, "step": 19708 }, { "epoch": 0.97, "grad_norm": 0.5190209150314331, "learning_rate": 0.0004591824580040222, "loss": 3.0561, "step": 19709 }, { "epoch": 0.97, "grad_norm": 0.5445383191108704, "learning_rate": 0.0004591694077183432, "loss": 3.1508, "step": 19710 }, { "epoch": 0.97, "grad_norm": 0.5293034315109253, "learning_rate": 0.0004591563570134427, "loss": 3.187, "step": 19711 }, { "epoch": 0.97, "grad_norm": 0.5551683902740479, "learning_rate": 0.0004591433058893548, "loss": 3.1458, "step": 19712 }, { "epoch": 0.97, "grad_norm": 0.5138912796974182, "learning_rate": 0.00045913025434611413, "loss": 2.9319, "step": 19713 }, { "epoch": 0.97, "grad_norm": 0.5497405529022217, "learning_rate": 0.000459117202383755, "loss": 3.1789, "step": 19714 }, { "epoch": 0.97, "grad_norm": 0.572364091873169, "learning_rate": 0.00045910415000231173, "loss": 3.2243, "step": 19715 }, { "epoch": 0.97, "grad_norm": 0.5154417753219604, "learning_rate": 0.0004590910972018188, "loss": 2.9992, "step": 19716 }, { "epoch": 0.97, "grad_norm": 0.5528554320335388, "learning_rate": 0.00045907804398231055, "loss": 3.0243, "step": 19717 }, { "epoch": 0.97, "grad_norm": 0.5160757303237915, "learning_rate": 0.0004590649903438213, "loss": 3.1319, "step": 19718 }, { "epoch": 0.97, "grad_norm": 0.4939347803592682, "learning_rate": 0.0004590519362863855, "loss": 2.9028, "step": 19719 }, { "epoch": 0.97, "grad_norm": 0.5522775650024414, "learning_rate": 0.0004590388818100375, "loss": 3.0864, "step": 19720 }, { "epoch": 0.97, "grad_norm": 0.5493162870407104, "learning_rate": 0.00045902582691481175, "loss": 3.3321, "step": 19721 }, { "epoch": 0.97, "grad_norm": 0.5284614562988281, "learning_rate": 0.00045901277160074257, "loss": 3.1432, "step": 19722 }, { "epoch": 0.97, "grad_norm": 0.5101198554039001, "learning_rate": 0.00045899971586786436, "loss": 3.3082, "step": 19723 }, { "epoch": 0.97, "grad_norm": 0.5157438516616821, "learning_rate": 0.00045898665971621133, "loss": 3.1203, "step": 19724 }, { "epoch": 0.97, "grad_norm": 0.538258969783783, "learning_rate": 0.00045897360314581823, "loss": 2.9963, "step": 19725 }, { "epoch": 0.97, "grad_norm": 0.5072723627090454, "learning_rate": 0.0004589605461567192, "loss": 3.2203, "step": 19726 }, { "epoch": 0.97, "grad_norm": 0.5226778388023376, "learning_rate": 0.0004589474887489486, "loss": 3.2614, "step": 19727 }, { "epoch": 0.97, "grad_norm": 0.5579911470413208, "learning_rate": 0.000458934430922541, "loss": 3.1676, "step": 19728 }, { "epoch": 0.97, "grad_norm": 0.514929473400116, "learning_rate": 0.0004589213726775307, "loss": 3.1178, "step": 19729 }, { "epoch": 0.97, "grad_norm": 0.5149416923522949, "learning_rate": 0.000458908314013952, "loss": 3.1532, "step": 19730 }, { "epoch": 0.97, "grad_norm": 0.5170868039131165, "learning_rate": 0.00045889525493183954, "loss": 3.1691, "step": 19731 }, { "epoch": 0.97, "grad_norm": 0.5064755082130432, "learning_rate": 0.0004588821954312274, "loss": 2.9093, "step": 19732 }, { "epoch": 0.97, "grad_norm": 0.5192744135856628, "learning_rate": 0.00045886913551215026, "loss": 3.0864, "step": 19733 }, { "epoch": 0.97, "grad_norm": 0.5007292032241821, "learning_rate": 0.0004588560751746423, "loss": 3.2911, "step": 19734 }, { "epoch": 0.97, "grad_norm": 0.5587050914764404, "learning_rate": 0.000458843014418738, "loss": 3.0655, "step": 19735 }, { "epoch": 0.97, "grad_norm": 0.5086398720741272, "learning_rate": 0.0004588299532444718, "loss": 3.1727, "step": 19736 }, { "epoch": 0.97, "grad_norm": 0.5102616548538208, "learning_rate": 0.0004588168916518781, "loss": 3.1426, "step": 19737 }, { "epoch": 0.97, "grad_norm": 0.5597113966941833, "learning_rate": 0.0004588038296409913, "loss": 3.0143, "step": 19738 }, { "epoch": 0.97, "grad_norm": 0.5260500311851501, "learning_rate": 0.00045879076721184564, "loss": 2.9833, "step": 19739 }, { "epoch": 0.97, "grad_norm": 0.5252350568771362, "learning_rate": 0.00045877770436447587, "loss": 3.433, "step": 19740 }, { "epoch": 0.97, "grad_norm": 0.5167698264122009, "learning_rate": 0.00045876464109891605, "loss": 3.3007, "step": 19741 }, { "epoch": 0.97, "grad_norm": 0.512118935585022, "learning_rate": 0.0004587515774152007, "loss": 3.1823, "step": 19742 }, { "epoch": 0.97, "grad_norm": 0.53267502784729, "learning_rate": 0.00045873851331336424, "loss": 3.4128, "step": 19743 }, { "epoch": 0.97, "grad_norm": 0.5323781371116638, "learning_rate": 0.00045872544879344115, "loss": 3.2691, "step": 19744 }, { "epoch": 0.97, "grad_norm": 0.5187110900878906, "learning_rate": 0.0004587123838554657, "loss": 2.9825, "step": 19745 }, { "epoch": 0.97, "grad_norm": 0.5188029408454895, "learning_rate": 0.0004586993184994725, "loss": 3.2929, "step": 19746 }, { "epoch": 0.97, "grad_norm": 0.5603750348091125, "learning_rate": 0.00045868625272549573, "loss": 3.1024, "step": 19747 }, { "epoch": 0.97, "grad_norm": 0.5371494889259338, "learning_rate": 0.00045867318653356994, "loss": 3.2192, "step": 19748 }, { "epoch": 0.97, "grad_norm": 0.5349953770637512, "learning_rate": 0.00045866011992372953, "loss": 3.1364, "step": 19749 }, { "epoch": 0.97, "grad_norm": 0.5063318610191345, "learning_rate": 0.0004586470528960088, "loss": 3.1449, "step": 19750 }, { "epoch": 0.97, "grad_norm": 0.5154860019683838, "learning_rate": 0.00045863398545044245, "loss": 3.0492, "step": 19751 }, { "epoch": 0.97, "grad_norm": 0.4861948490142822, "learning_rate": 0.0004586209175870646, "loss": 3.3389, "step": 19752 }, { "epoch": 0.97, "grad_norm": 0.5171583294868469, "learning_rate": 0.0004586078493059098, "loss": 3.1478, "step": 19753 }, { "epoch": 0.97, "grad_norm": 0.5161516666412354, "learning_rate": 0.0004585947806070124, "loss": 2.9258, "step": 19754 }, { "epoch": 0.97, "grad_norm": 0.5537519454956055, "learning_rate": 0.00045858171149040696, "loss": 3.1637, "step": 19755 }, { "epoch": 0.97, "grad_norm": 0.5510892271995544, "learning_rate": 0.0004585686419561277, "loss": 3.1517, "step": 19756 }, { "epoch": 0.97, "grad_norm": 0.5581437349319458, "learning_rate": 0.0004585555720042093, "loss": 3.4651, "step": 19757 }, { "epoch": 0.97, "grad_norm": 0.5405652523040771, "learning_rate": 0.00045854250163468587, "loss": 3.2142, "step": 19758 }, { "epoch": 0.97, "grad_norm": 0.536334753036499, "learning_rate": 0.00045852943084759214, "loss": 3.2114, "step": 19759 }, { "epoch": 0.97, "grad_norm": 0.5953757166862488, "learning_rate": 0.00045851635964296234, "loss": 3.0759, "step": 19760 }, { "epoch": 0.97, "grad_norm": 0.4927489161491394, "learning_rate": 0.00045850328802083095, "loss": 3.3361, "step": 19761 }, { "epoch": 0.97, "grad_norm": 0.5365741848945618, "learning_rate": 0.00045849021598123245, "loss": 3.0068, "step": 19762 }, { "epoch": 0.97, "grad_norm": 0.5352941751480103, "learning_rate": 0.0004584771435242012, "loss": 3.2842, "step": 19763 }, { "epoch": 0.97, "grad_norm": 0.5236901640892029, "learning_rate": 0.00045846407064977163, "loss": 3.0848, "step": 19764 }, { "epoch": 0.97, "grad_norm": 0.5306557416915894, "learning_rate": 0.0004584509973579782, "loss": 3.1199, "step": 19765 }, { "epoch": 0.97, "grad_norm": 0.5267027616500854, "learning_rate": 0.00045843792364885533, "loss": 3.346, "step": 19766 }, { "epoch": 0.97, "grad_norm": 0.5062419176101685, "learning_rate": 0.0004584248495224375, "loss": 3.1383, "step": 19767 }, { "epoch": 0.97, "grad_norm": 0.5400051474571228, "learning_rate": 0.00045841177497875905, "loss": 3.0832, "step": 19768 }, { "epoch": 0.97, "grad_norm": 0.5341521501541138, "learning_rate": 0.00045839870001785455, "loss": 3.1805, "step": 19769 }, { "epoch": 0.97, "grad_norm": 0.5922198295593262, "learning_rate": 0.00045838562463975833, "loss": 3.1083, "step": 19770 }, { "epoch": 0.97, "grad_norm": 0.49147966504096985, "learning_rate": 0.0004583725488445048, "loss": 3.2987, "step": 19771 }, { "epoch": 0.97, "grad_norm": 0.49468183517456055, "learning_rate": 0.00045835947263212846, "loss": 3.3835, "step": 19772 }, { "epoch": 0.97, "grad_norm": 0.5233425498008728, "learning_rate": 0.00045834639600266387, "loss": 3.1561, "step": 19773 }, { "epoch": 0.97, "grad_norm": 0.5618370175361633, "learning_rate": 0.0004583333189561453, "loss": 3.0311, "step": 19774 }, { "epoch": 0.97, "grad_norm": 0.526765763759613, "learning_rate": 0.00045832024149260707, "loss": 3.2129, "step": 19775 }, { "epoch": 0.97, "grad_norm": 0.5225813388824463, "learning_rate": 0.0004583071636120839, "loss": 3.273, "step": 19776 }, { "epoch": 0.97, "grad_norm": 0.514604389667511, "learning_rate": 0.00045829408531461023, "loss": 3.0254, "step": 19777 }, { "epoch": 0.97, "grad_norm": 0.5545125603675842, "learning_rate": 0.00045828100660022037, "loss": 3.1412, "step": 19778 }, { "epoch": 0.97, "grad_norm": 0.544165849685669, "learning_rate": 0.00045826792746894875, "loss": 3.1968, "step": 19779 }, { "epoch": 0.97, "grad_norm": 0.5067391395568848, "learning_rate": 0.0004582548479208298, "loss": 3.0627, "step": 19780 }, { "epoch": 0.97, "grad_norm": 0.4904179275035858, "learning_rate": 0.0004582417679558981, "loss": 3.0005, "step": 19781 }, { "epoch": 0.97, "grad_norm": 0.5631465315818787, "learning_rate": 0.0004582286875741881, "loss": 3.186, "step": 19782 }, { "epoch": 0.97, "grad_norm": 0.5135632753372192, "learning_rate": 0.00045821560677573414, "loss": 3.1411, "step": 19783 }, { "epoch": 0.97, "grad_norm": 0.5340128540992737, "learning_rate": 0.0004582025255605708, "loss": 2.9759, "step": 19784 }, { "epoch": 0.97, "grad_norm": 0.5625054836273193, "learning_rate": 0.0004581894439287322, "loss": 3.1578, "step": 19785 }, { "epoch": 0.97, "grad_norm": 0.5040884017944336, "learning_rate": 0.00045817636188025333, "loss": 3.1584, "step": 19786 }, { "epoch": 0.97, "grad_norm": 0.5131189823150635, "learning_rate": 0.00045816327941516823, "loss": 3.3082, "step": 19787 }, { "epoch": 0.97, "grad_norm": 0.5243518948554993, "learning_rate": 0.0004581501965335115, "loss": 3.0006, "step": 19788 }, { "epoch": 0.97, "grad_norm": 0.5257127285003662, "learning_rate": 0.0004581371132353176, "loss": 3.2521, "step": 19789 }, { "epoch": 0.97, "grad_norm": 0.5097827911376953, "learning_rate": 0.00045812402952062105, "loss": 3.0542, "step": 19790 }, { "epoch": 0.97, "grad_norm": 0.572334349155426, "learning_rate": 0.00045811094538945614, "loss": 2.9641, "step": 19791 }, { "epoch": 0.97, "grad_norm": 0.529404878616333, "learning_rate": 0.00045809786084185746, "loss": 3.0158, "step": 19792 }, { "epoch": 0.97, "grad_norm": 0.5602056980133057, "learning_rate": 0.00045808477587785945, "loss": 3.2361, "step": 19793 }, { "epoch": 0.97, "grad_norm": 0.5243390202522278, "learning_rate": 0.00045807169049749653, "loss": 3.2635, "step": 19794 }, { "epoch": 0.97, "grad_norm": 0.5974134802818298, "learning_rate": 0.0004580586047008033, "loss": 3.0509, "step": 19795 }, { "epoch": 0.97, "grad_norm": 0.4903809428215027, "learning_rate": 0.0004580455184878139, "loss": 3.148, "step": 19796 }, { "epoch": 0.97, "grad_norm": 0.5298634767532349, "learning_rate": 0.00045803243185856327, "loss": 3.2435, "step": 19797 }, { "epoch": 0.97, "grad_norm": 0.5195876955986023, "learning_rate": 0.00045801934481308547, "loss": 3.1942, "step": 19798 }, { "epoch": 0.97, "grad_norm": 0.581200361251831, "learning_rate": 0.0004580062573514153, "loss": 3.017, "step": 19799 }, { "epoch": 0.97, "grad_norm": 0.5249642729759216, "learning_rate": 0.0004579931694735869, "loss": 3.0202, "step": 19800 }, { "epoch": 0.97, "grad_norm": 0.5444178581237793, "learning_rate": 0.0004579800811796349, "loss": 3.1979, "step": 19801 }, { "epoch": 0.97, "grad_norm": 0.5659769177436829, "learning_rate": 0.00045796699246959384, "loss": 3.2618, "step": 19802 }, { "epoch": 0.97, "grad_norm": 0.5352531671524048, "learning_rate": 0.00045795390334349813, "loss": 3.0874, "step": 19803 }, { "epoch": 0.97, "grad_norm": 0.5184085965156555, "learning_rate": 0.0004579408138013822, "loss": 3.1453, "step": 19804 }, { "epoch": 0.97, "grad_norm": 0.5398754477500916, "learning_rate": 0.0004579277238432805, "loss": 3.0704, "step": 19805 }, { "epoch": 0.97, "grad_norm": 0.5079792737960815, "learning_rate": 0.00045791463346922756, "loss": 3.1852, "step": 19806 }, { "epoch": 0.97, "grad_norm": 0.521885871887207, "learning_rate": 0.00045790154267925795, "loss": 3.2204, "step": 19807 }, { "epoch": 0.97, "grad_norm": 0.5815854072570801, "learning_rate": 0.00045788845147340607, "loss": 2.9648, "step": 19808 }, { "epoch": 0.97, "grad_norm": 0.5417388081550598, "learning_rate": 0.00045787535985170643, "loss": 3.3381, "step": 19809 }, { "epoch": 0.97, "grad_norm": 0.5544579029083252, "learning_rate": 0.00045786226781419347, "loss": 3.0114, "step": 19810 }, { "epoch": 0.97, "grad_norm": 0.5227230191230774, "learning_rate": 0.00045784917536090155, "loss": 3.1927, "step": 19811 }, { "epoch": 0.97, "grad_norm": 0.5432718992233276, "learning_rate": 0.0004578360824918653, "loss": 2.9235, "step": 19812 }, { "epoch": 0.97, "grad_norm": 0.5189009308815002, "learning_rate": 0.0004578229892071193, "loss": 3.1517, "step": 19813 }, { "epoch": 0.97, "grad_norm": 0.5389320850372314, "learning_rate": 0.000457809895506698, "loss": 3.2118, "step": 19814 }, { "epoch": 0.97, "grad_norm": 0.5334956049919128, "learning_rate": 0.0004577968013906356, "loss": 2.9926, "step": 19815 }, { "epoch": 0.97, "grad_norm": 0.5629310011863708, "learning_rate": 0.0004577837068589669, "loss": 3.139, "step": 19816 }, { "epoch": 0.97, "grad_norm": 0.5584556460380554, "learning_rate": 0.0004577706119117262, "loss": 3.049, "step": 19817 }, { "epoch": 0.97, "grad_norm": 0.4945135712623596, "learning_rate": 0.0004577575165489482, "loss": 3.1779, "step": 19818 }, { "epoch": 0.97, "grad_norm": 0.5308895111083984, "learning_rate": 0.0004577444207706671, "loss": 3.3229, "step": 19819 }, { "epoch": 0.97, "grad_norm": 0.5382962226867676, "learning_rate": 0.00045773132457691766, "loss": 3.2655, "step": 19820 }, { "epoch": 0.97, "grad_norm": 0.5207489728927612, "learning_rate": 0.0004577182279677342, "loss": 3.2315, "step": 19821 }, { "epoch": 0.97, "grad_norm": 0.5437517166137695, "learning_rate": 0.0004577051309431513, "loss": 3.1617, "step": 19822 }, { "epoch": 0.97, "grad_norm": 0.5084508657455444, "learning_rate": 0.0004576920335032035, "loss": 3.1338, "step": 19823 }, { "epoch": 0.97, "grad_norm": 0.48307299613952637, "learning_rate": 0.0004576789356479253, "loss": 3.0642, "step": 19824 }, { "epoch": 0.97, "grad_norm": 0.520545244216919, "learning_rate": 0.000457665837377351, "loss": 2.956, "step": 19825 }, { "epoch": 0.97, "grad_norm": 0.5704953074455261, "learning_rate": 0.00045765273869151514, "loss": 3.1909, "step": 19826 }, { "epoch": 0.97, "grad_norm": 0.5432178974151611, "learning_rate": 0.00045763963959045246, "loss": 3.1516, "step": 19827 }, { "epoch": 0.97, "grad_norm": 0.5819136500358582, "learning_rate": 0.0004576265400741973, "loss": 3.1398, "step": 19828 }, { "epoch": 0.97, "grad_norm": 0.51753169298172, "learning_rate": 0.00045761344014278414, "loss": 2.8872, "step": 19829 }, { "epoch": 0.97, "grad_norm": 0.5441774725914001, "learning_rate": 0.0004576003397962475, "loss": 3.1017, "step": 19830 }, { "epoch": 0.97, "grad_norm": 0.5453305840492249, "learning_rate": 0.0004575872390346219, "loss": 3.1247, "step": 19831 }, { "epoch": 0.97, "grad_norm": 0.5290966033935547, "learning_rate": 0.0004575741378579419, "loss": 3.0589, "step": 19832 }, { "epoch": 0.97, "grad_norm": 0.5535355806350708, "learning_rate": 0.0004575610362662418, "loss": 3.0727, "step": 19833 }, { "epoch": 0.97, "grad_norm": 0.5289100408554077, "learning_rate": 0.00045754793425955637, "loss": 3.0748, "step": 19834 }, { "epoch": 0.97, "grad_norm": 0.681056559085846, "learning_rate": 0.00045753483183792005, "loss": 3.1107, "step": 19835 }, { "epoch": 0.97, "grad_norm": 0.514905571937561, "learning_rate": 0.0004575217290013672, "loss": 3.1522, "step": 19836 }, { "epoch": 0.97, "grad_norm": 0.5799508094787598, "learning_rate": 0.00045750862574993236, "loss": 3.1752, "step": 19837 }, { "epoch": 0.97, "grad_norm": 0.5410858988761902, "learning_rate": 0.0004574955220836503, "loss": 3.1188, "step": 19838 }, { "epoch": 0.97, "grad_norm": 0.541213870048523, "learning_rate": 0.0004574824180025553, "loss": 3.1487, "step": 19839 }, { "epoch": 0.97, "grad_norm": 0.5377730131149292, "learning_rate": 0.00045746931350668183, "loss": 3.2846, "step": 19840 }, { "epoch": 0.97, "grad_norm": 0.5022572875022888, "learning_rate": 0.0004574562085960646, "loss": 3.1793, "step": 19841 }, { "epoch": 0.97, "grad_norm": 0.535351574420929, "learning_rate": 0.0004574431032707379, "loss": 2.9624, "step": 19842 }, { "epoch": 0.97, "grad_norm": 0.5488033890724182, "learning_rate": 0.0004574299975307364, "loss": 3.1208, "step": 19843 }, { "epoch": 0.97, "grad_norm": 0.5779278874397278, "learning_rate": 0.00045741689137609467, "loss": 3.1375, "step": 19844 }, { "epoch": 0.97, "grad_norm": 0.5179200768470764, "learning_rate": 0.00045740378480684706, "loss": 2.9844, "step": 19845 }, { "epoch": 0.97, "grad_norm": 0.5455824732780457, "learning_rate": 0.00045739067782302824, "loss": 2.9835, "step": 19846 }, { "epoch": 0.97, "grad_norm": 0.5416254997253418, "learning_rate": 0.00045737757042467263, "loss": 3.4106, "step": 19847 }, { "epoch": 0.97, "grad_norm": 0.5338101387023926, "learning_rate": 0.0004573644626118147, "loss": 3.2009, "step": 19848 }, { "epoch": 0.97, "grad_norm": 0.5255735516548157, "learning_rate": 0.0004573513543844892, "loss": 3.203, "step": 19849 }, { "epoch": 0.97, "grad_norm": 0.5638295412063599, "learning_rate": 0.0004573382457427305, "loss": 3.2313, "step": 19850 }, { "epoch": 0.97, "grad_norm": 0.5044254660606384, "learning_rate": 0.0004573251366865731, "loss": 3.1181, "step": 19851 }, { "epoch": 0.97, "grad_norm": 0.675537109375, "learning_rate": 0.00045731202721605144, "loss": 3.3099, "step": 19852 }, { "epoch": 0.97, "grad_norm": 0.517725944519043, "learning_rate": 0.0004572989173312004, "loss": 3.1187, "step": 19853 }, { "epoch": 0.97, "grad_norm": 0.5297645330429077, "learning_rate": 0.0004572858070320542, "loss": 3.2745, "step": 19854 }, { "epoch": 0.97, "grad_norm": 0.5531717538833618, "learning_rate": 0.00045727269631864743, "loss": 3.0869, "step": 19855 }, { "epoch": 0.97, "grad_norm": 0.5511997938156128, "learning_rate": 0.0004572595851910147, "loss": 3.2544, "step": 19856 }, { "epoch": 0.97, "grad_norm": 0.5770429968833923, "learning_rate": 0.0004572464736491904, "loss": 3.058, "step": 19857 }, { "epoch": 0.97, "grad_norm": 0.5271993279457092, "learning_rate": 0.00045723336169320916, "loss": 3.221, "step": 19858 }, { "epoch": 0.97, "grad_norm": 0.5206511616706848, "learning_rate": 0.0004572202493231056, "loss": 3.3138, "step": 19859 }, { "epoch": 0.97, "grad_norm": 0.537216067314148, "learning_rate": 0.0004572071365389141, "loss": 3.0961, "step": 19860 }, { "epoch": 0.97, "grad_norm": 0.5249066948890686, "learning_rate": 0.00045719402334066926, "loss": 3.1989, "step": 19861 }, { "epoch": 0.97, "grad_norm": 0.49584659934043884, "learning_rate": 0.0004571809097284055, "loss": 3.3242, "step": 19862 }, { "epoch": 0.97, "grad_norm": 0.4906226694583893, "learning_rate": 0.00045716779570215767, "loss": 3.2433, "step": 19863 }, { "epoch": 0.97, "grad_norm": 0.5297616720199585, "learning_rate": 0.00045715468126196, "loss": 3.0477, "step": 19864 }, { "epoch": 0.97, "grad_norm": 0.5272489786148071, "learning_rate": 0.00045714156640784727, "loss": 3.1792, "step": 19865 }, { "epoch": 0.97, "grad_norm": 0.5244301557540894, "learning_rate": 0.0004571284511398538, "loss": 3.3991, "step": 19866 }, { "epoch": 0.97, "grad_norm": 0.5065206289291382, "learning_rate": 0.0004571153354580142, "loss": 3.0658, "step": 19867 }, { "epoch": 0.97, "grad_norm": 0.5413605570793152, "learning_rate": 0.00045710221936236305, "loss": 3.2466, "step": 19868 }, { "epoch": 0.97, "grad_norm": 0.5623077154159546, "learning_rate": 0.00045708910285293487, "loss": 3.2348, "step": 19869 }, { "epoch": 0.97, "grad_norm": 0.5285463333129883, "learning_rate": 0.0004570759859297643, "loss": 2.9883, "step": 19870 }, { "epoch": 0.97, "grad_norm": 0.530189573764801, "learning_rate": 0.0004570628685928858, "loss": 3.1729, "step": 19871 }, { "epoch": 0.97, "grad_norm": 0.5128258466720581, "learning_rate": 0.00045704975084233395, "loss": 3.0372, "step": 19872 }, { "epoch": 0.97, "grad_norm": 0.5443863868713379, "learning_rate": 0.0004570366326781432, "loss": 3.1384, "step": 19873 }, { "epoch": 0.97, "grad_norm": 0.547491729259491, "learning_rate": 0.0004570235141003482, "loss": 3.2092, "step": 19874 }, { "epoch": 0.97, "grad_norm": 0.5074962377548218, "learning_rate": 0.0004570103951089836, "loss": 3.0942, "step": 19875 }, { "epoch": 0.97, "grad_norm": 0.574950635433197, "learning_rate": 0.00045699727570408375, "loss": 3.1655, "step": 19876 }, { "epoch": 0.97, "grad_norm": 0.5343877077102661, "learning_rate": 0.00045698415588568334, "loss": 3.1291, "step": 19877 }, { "epoch": 0.97, "grad_norm": 0.5411051511764526, "learning_rate": 0.0004569710356538167, "loss": 3.2585, "step": 19878 }, { "epoch": 0.97, "grad_norm": 0.5204007625579834, "learning_rate": 0.0004569579150085188, "loss": 3.2438, "step": 19879 }, { "epoch": 0.97, "grad_norm": 0.518437385559082, "learning_rate": 0.0004569447939498238, "loss": 3.0511, "step": 19880 }, { "epoch": 0.97, "grad_norm": 0.5438820123672485, "learning_rate": 0.0004569316724777665, "loss": 3.1059, "step": 19881 }, { "epoch": 0.97, "grad_norm": 0.5277252793312073, "learning_rate": 0.00045691855059238126, "loss": 3.1655, "step": 19882 }, { "epoch": 0.97, "grad_norm": 0.4945885241031647, "learning_rate": 0.0004569054282937029, "loss": 3.0627, "step": 19883 }, { "epoch": 0.97, "grad_norm": 0.5790326595306396, "learning_rate": 0.0004568923055817657, "loss": 3.1567, "step": 19884 }, { "epoch": 0.97, "grad_norm": 0.5103229880332947, "learning_rate": 0.0004568791824566045, "loss": 3.2317, "step": 19885 }, { "epoch": 0.97, "grad_norm": 0.5120726823806763, "learning_rate": 0.00045686605891825363, "loss": 3.2693, "step": 19886 }, { "epoch": 0.97, "grad_norm": 0.5078148245811462, "learning_rate": 0.00045685293496674784, "loss": 3.2933, "step": 19887 }, { "epoch": 0.97, "grad_norm": 0.5411629676818848, "learning_rate": 0.0004568398106021215, "loss": 2.9652, "step": 19888 }, { "epoch": 0.97, "grad_norm": 0.5563063621520996, "learning_rate": 0.00045682668582440933, "loss": 3.2037, "step": 19889 }, { "epoch": 0.97, "grad_norm": 0.5265916585922241, "learning_rate": 0.0004568135606336459, "loss": 3.0914, "step": 19890 }, { "epoch": 0.97, "grad_norm": 0.5466210842132568, "learning_rate": 0.0004568004350298657, "loss": 3.1264, "step": 19891 }, { "epoch": 0.97, "grad_norm": 0.5217623710632324, "learning_rate": 0.0004567873090131033, "loss": 3.1154, "step": 19892 }, { "epoch": 0.97, "grad_norm": 0.514886736869812, "learning_rate": 0.0004567741825833933, "loss": 3.2016, "step": 19893 }, { "epoch": 0.97, "grad_norm": 0.6545686721801758, "learning_rate": 0.00045676105574077026, "loss": 3.2672, "step": 19894 }, { "epoch": 0.98, "grad_norm": 0.527996838092804, "learning_rate": 0.0004567479284852689, "loss": 3.0232, "step": 19895 }, { "epoch": 0.98, "grad_norm": 0.5357192754745483, "learning_rate": 0.0004567348008169235, "loss": 3.1696, "step": 19896 }, { "epoch": 0.98, "grad_norm": 0.5141521692276001, "learning_rate": 0.00045672167273576894, "loss": 3.205, "step": 19897 }, { "epoch": 0.98, "grad_norm": 0.5258877277374268, "learning_rate": 0.00045670854424183953, "loss": 3.2861, "step": 19898 }, { "epoch": 0.98, "grad_norm": 0.574230432510376, "learning_rate": 0.00045669541533517, "loss": 3.245, "step": 19899 }, { "epoch": 0.98, "grad_norm": 0.5562745928764343, "learning_rate": 0.00045668228601579495, "loss": 2.9874, "step": 19900 }, { "epoch": 0.98, "grad_norm": 0.5093985199928284, "learning_rate": 0.0004566691562837489, "loss": 3.2173, "step": 19901 }, { "epoch": 0.98, "grad_norm": 0.5798713564872742, "learning_rate": 0.0004566560261390664, "loss": 3.2883, "step": 19902 }, { "epoch": 0.98, "grad_norm": 0.5926944017410278, "learning_rate": 0.0004566428955817821, "loss": 3.1588, "step": 19903 }, { "epoch": 0.98, "grad_norm": 0.5162215828895569, "learning_rate": 0.0004566297646119306, "loss": 3.1708, "step": 19904 }, { "epoch": 0.98, "grad_norm": 0.5461186766624451, "learning_rate": 0.0004566166332295464, "loss": 2.9684, "step": 19905 }, { "epoch": 0.98, "grad_norm": 0.5802727341651917, "learning_rate": 0.0004566035014346641, "loss": 3.1052, "step": 19906 }, { "epoch": 0.98, "grad_norm": 0.5327712297439575, "learning_rate": 0.0004565903692273184, "loss": 3.114, "step": 19907 }, { "epoch": 0.98, "grad_norm": 0.5120739936828613, "learning_rate": 0.00045657723660754384, "loss": 3.0222, "step": 19908 }, { "epoch": 0.98, "grad_norm": 0.5327271819114685, "learning_rate": 0.0004565641035753748, "loss": 3.0759, "step": 19909 }, { "epoch": 0.98, "grad_norm": 0.524299144744873, "learning_rate": 0.00045655097013084616, "loss": 3.2712, "step": 19910 }, { "epoch": 0.98, "grad_norm": 0.5683066248893738, "learning_rate": 0.00045653783627399235, "loss": 3.2524, "step": 19911 }, { "epoch": 0.98, "grad_norm": 0.5321347117424011, "learning_rate": 0.00045652470200484806, "loss": 3.205, "step": 19912 }, { "epoch": 0.98, "grad_norm": 0.5390219688415527, "learning_rate": 0.0004565115673234478, "loss": 3.1628, "step": 19913 }, { "epoch": 0.98, "grad_norm": 0.5145443677902222, "learning_rate": 0.0004564984322298261, "loss": 3.2744, "step": 19914 }, { "epoch": 0.98, "grad_norm": 0.5551137924194336, "learning_rate": 0.0004564852967240178, "loss": 3.1102, "step": 19915 }, { "epoch": 0.98, "grad_norm": 0.524955153465271, "learning_rate": 0.00045647216080605725, "loss": 3.3276, "step": 19916 }, { "epoch": 0.98, "grad_norm": 0.5320925712585449, "learning_rate": 0.00045645902447597923, "loss": 3.3742, "step": 19917 }, { "epoch": 0.98, "grad_norm": 0.518169105052948, "learning_rate": 0.00045644588773381813, "loss": 3.1312, "step": 19918 }, { "epoch": 0.98, "grad_norm": 0.5469488501548767, "learning_rate": 0.00045643275057960877, "loss": 3.137, "step": 19919 }, { "epoch": 0.98, "grad_norm": 0.5299994945526123, "learning_rate": 0.0004564196130133856, "loss": 3.2838, "step": 19920 }, { "epoch": 0.98, "grad_norm": 0.5019270181655884, "learning_rate": 0.0004564064750351833, "loss": 3.0652, "step": 19921 }, { "epoch": 0.98, "grad_norm": 0.5183849930763245, "learning_rate": 0.00045639333664503637, "loss": 2.9966, "step": 19922 }, { "epoch": 0.98, "grad_norm": 0.5174096822738647, "learning_rate": 0.0004563801978429797, "loss": 2.9913, "step": 19923 }, { "epoch": 0.98, "grad_norm": 0.5443447828292847, "learning_rate": 0.0004563670586290475, "loss": 3.057, "step": 19924 }, { "epoch": 0.98, "grad_norm": 0.564255952835083, "learning_rate": 0.0004563539190032746, "loss": 3.0481, "step": 19925 }, { "epoch": 0.98, "grad_norm": 0.5157086253166199, "learning_rate": 0.0004563407789656956, "loss": 3.0848, "step": 19926 }, { "epoch": 0.98, "grad_norm": 0.5265598893165588, "learning_rate": 0.00045632763851634496, "loss": 3.1655, "step": 19927 }, { "epoch": 0.98, "grad_norm": 0.5157798528671265, "learning_rate": 0.00045631449765525753, "loss": 3.2964, "step": 19928 }, { "epoch": 0.98, "grad_norm": 0.5361979603767395, "learning_rate": 0.00045630135638246775, "loss": 3.1274, "step": 19929 }, { "epoch": 0.98, "grad_norm": 0.5458618998527527, "learning_rate": 0.0004562882146980103, "loss": 3.1837, "step": 19930 }, { "epoch": 0.98, "grad_norm": 0.49934300780296326, "learning_rate": 0.0004562750726019197, "loss": 3.0736, "step": 19931 }, { "epoch": 0.98, "grad_norm": 0.5094740390777588, "learning_rate": 0.00045626193009423076, "loss": 2.8927, "step": 19932 }, { "epoch": 0.98, "grad_norm": 0.5375152826309204, "learning_rate": 0.00045624878717497784, "loss": 3.2418, "step": 19933 }, { "epoch": 0.98, "grad_norm": 0.5470656156539917, "learning_rate": 0.0004562356438441957, "loss": 3.2453, "step": 19934 }, { "epoch": 0.98, "grad_norm": 0.5325232148170471, "learning_rate": 0.000456222500101919, "loss": 3.1305, "step": 19935 }, { "epoch": 0.98, "grad_norm": 0.5318748950958252, "learning_rate": 0.00045620935594818234, "loss": 3.0646, "step": 19936 }, { "epoch": 0.98, "grad_norm": 0.5223355889320374, "learning_rate": 0.0004561962113830203, "loss": 3.2699, "step": 19937 }, { "epoch": 0.98, "grad_norm": 0.5093883872032166, "learning_rate": 0.00045618306640646744, "loss": 3.2268, "step": 19938 }, { "epoch": 0.98, "grad_norm": 0.5158838033676147, "learning_rate": 0.0004561699210185584, "loss": 3.0998, "step": 19939 }, { "epoch": 0.98, "grad_norm": 0.5459488034248352, "learning_rate": 0.00045615677521932785, "loss": 3.142, "step": 19940 }, { "epoch": 0.98, "grad_norm": 0.574743390083313, "learning_rate": 0.00045614362900881053, "loss": 3.1598, "step": 19941 }, { "epoch": 0.98, "grad_norm": 0.5598559379577637, "learning_rate": 0.00045613048238704086, "loss": 3.0253, "step": 19942 }, { "epoch": 0.98, "grad_norm": 0.498945027589798, "learning_rate": 0.0004561173353540535, "loss": 3.3936, "step": 19943 }, { "epoch": 0.98, "grad_norm": 0.5641855597496033, "learning_rate": 0.0004561041879098832, "loss": 3.4009, "step": 19944 }, { "epoch": 0.98, "grad_norm": 0.5260211825370789, "learning_rate": 0.0004560910400545645, "loss": 3.2477, "step": 19945 }, { "epoch": 0.98, "grad_norm": 0.5271660089492798, "learning_rate": 0.000456077891788132, "loss": 3.2298, "step": 19946 }, { "epoch": 0.98, "grad_norm": 0.499926894903183, "learning_rate": 0.00045606474311062035, "loss": 3.2045, "step": 19947 }, { "epoch": 0.98, "grad_norm": 0.5305925011634827, "learning_rate": 0.00045605159402206435, "loss": 3.1442, "step": 19948 }, { "epoch": 0.98, "grad_norm": 0.5455648303031921, "learning_rate": 0.0004560384445224984, "loss": 3.0783, "step": 19949 }, { "epoch": 0.98, "grad_norm": 0.5389901995658875, "learning_rate": 0.0004560252946119571, "loss": 3.148, "step": 19950 }, { "epoch": 0.98, "grad_norm": 0.5463103652000427, "learning_rate": 0.00045601214429047534, "loss": 2.9844, "step": 19951 }, { "epoch": 0.98, "grad_norm": 0.5185086131095886, "learning_rate": 0.00045599899355808763, "loss": 3.1557, "step": 19952 }, { "epoch": 0.98, "grad_norm": 0.5186192393302917, "learning_rate": 0.0004559858424148285, "loss": 3.2037, "step": 19953 }, { "epoch": 0.98, "grad_norm": 0.5496533513069153, "learning_rate": 0.00045597269086073273, "loss": 3.1717, "step": 19954 }, { "epoch": 0.98, "grad_norm": 0.5932478308677673, "learning_rate": 0.00045595953889583483, "loss": 2.9012, "step": 19955 }, { "epoch": 0.98, "grad_norm": 0.5475051403045654, "learning_rate": 0.0004559463865201696, "loss": 3.2653, "step": 19956 }, { "epoch": 0.98, "grad_norm": 0.5264762043952942, "learning_rate": 0.0004559332337337716, "loss": 3.2941, "step": 19957 }, { "epoch": 0.98, "grad_norm": 0.503323495388031, "learning_rate": 0.0004559200805366755, "loss": 3.1422, "step": 19958 }, { "epoch": 0.98, "grad_norm": 0.5633005499839783, "learning_rate": 0.0004559069269289159, "loss": 3.2524, "step": 19959 }, { "epoch": 0.98, "grad_norm": 0.5244492888450623, "learning_rate": 0.0004558937729105274, "loss": 3.1133, "step": 19960 }, { "epoch": 0.98, "grad_norm": 0.5386800765991211, "learning_rate": 0.00045588061848154477, "loss": 3.2379, "step": 19961 }, { "epoch": 0.98, "grad_norm": 0.5170870423316956, "learning_rate": 0.00045586746364200255, "loss": 3.2358, "step": 19962 }, { "epoch": 0.98, "grad_norm": 0.561007559299469, "learning_rate": 0.00045585430839193547, "loss": 2.9753, "step": 19963 }, { "epoch": 0.98, "grad_norm": 0.4810415208339691, "learning_rate": 0.00045584115273137807, "loss": 3.1482, "step": 19964 }, { "epoch": 0.98, "grad_norm": 0.6551200747489929, "learning_rate": 0.0004558279966603651, "loss": 2.9541, "step": 19965 }, { "epoch": 0.98, "grad_norm": 0.5145920515060425, "learning_rate": 0.00045581484017893117, "loss": 3.125, "step": 19966 }, { "epoch": 0.98, "grad_norm": 0.4966809153556824, "learning_rate": 0.000455801683287111, "loss": 3.098, "step": 19967 }, { "epoch": 0.98, "grad_norm": 0.5068411827087402, "learning_rate": 0.00045578852598493914, "loss": 2.9166, "step": 19968 }, { "epoch": 0.98, "grad_norm": 0.5475143790245056, "learning_rate": 0.00045577536827245025, "loss": 2.9523, "step": 19969 }, { "epoch": 0.98, "grad_norm": 0.5503715872764587, "learning_rate": 0.000455762210149679, "loss": 3.1146, "step": 19970 }, { "epoch": 0.98, "grad_norm": 0.5356963276863098, "learning_rate": 0.00045574905161666014, "loss": 2.8835, "step": 19971 }, { "epoch": 0.98, "grad_norm": 0.5304535627365112, "learning_rate": 0.0004557358926734282, "loss": 3.2342, "step": 19972 }, { "epoch": 0.98, "grad_norm": 0.511310875415802, "learning_rate": 0.0004557227333200179, "loss": 3.2055, "step": 19973 }, { "epoch": 0.98, "grad_norm": 0.4973483085632324, "learning_rate": 0.000455709573556464, "loss": 3.2209, "step": 19974 }, { "epoch": 0.98, "grad_norm": 0.5432186126708984, "learning_rate": 0.00045569641338280087, "loss": 2.9227, "step": 19975 }, { "epoch": 0.98, "grad_norm": 0.5128586292266846, "learning_rate": 0.00045568325279906344, "loss": 3.4699, "step": 19976 }, { "epoch": 0.98, "grad_norm": 0.5524463057518005, "learning_rate": 0.0004556700918052864, "loss": 3.1689, "step": 19977 }, { "epoch": 0.98, "grad_norm": 0.5236881375312805, "learning_rate": 0.0004556569304015041, "loss": 3.2393, "step": 19978 }, { "epoch": 0.98, "grad_norm": 0.5629315972328186, "learning_rate": 0.0004556437685877515, "loss": 3.2885, "step": 19979 }, { "epoch": 0.98, "grad_norm": 0.5505418181419373, "learning_rate": 0.0004556306063640632, "loss": 3.1929, "step": 19980 }, { "epoch": 0.98, "grad_norm": 0.5632915496826172, "learning_rate": 0.0004556174437304737, "loss": 3.0301, "step": 19981 }, { "epoch": 0.98, "grad_norm": 0.5991239547729492, "learning_rate": 0.00045560428068701787, "loss": 3.2244, "step": 19982 }, { "epoch": 0.98, "grad_norm": 0.5407932996749878, "learning_rate": 0.00045559111723373036, "loss": 3.239, "step": 19983 }, { "epoch": 0.98, "grad_norm": 0.5531562566757202, "learning_rate": 0.00045557795337064575, "loss": 3.3026, "step": 19984 }, { "epoch": 0.98, "grad_norm": 0.5307109951972961, "learning_rate": 0.00045556478909779876, "loss": 3.2586, "step": 19985 }, { "epoch": 0.98, "grad_norm": 0.515917181968689, "learning_rate": 0.000455551624415224, "loss": 3.0974, "step": 19986 }, { "epoch": 0.98, "grad_norm": 0.5192380547523499, "learning_rate": 0.0004555384593229562, "loss": 3.2065, "step": 19987 }, { "epoch": 0.98, "grad_norm": 0.523467481136322, "learning_rate": 0.00045552529382103025, "loss": 3.0622, "step": 19988 }, { "epoch": 0.98, "grad_norm": 0.5278224945068359, "learning_rate": 0.00045551212790948036, "loss": 3.1597, "step": 19989 }, { "epoch": 0.98, "grad_norm": 0.5236921310424805, "learning_rate": 0.00045549896158834154, "loss": 3.0244, "step": 19990 }, { "epoch": 0.98, "grad_norm": 0.5542269945144653, "learning_rate": 0.0004554857948576483, "loss": 3.2861, "step": 19991 }, { "epoch": 0.98, "grad_norm": 0.5261695981025696, "learning_rate": 0.00045547262771743555, "loss": 3.0632, "step": 19992 }, { "epoch": 0.98, "grad_norm": 0.530767023563385, "learning_rate": 0.00045545946016773775, "loss": 3.2265, "step": 19993 }, { "epoch": 0.98, "grad_norm": 0.5161399841308594, "learning_rate": 0.00045544629220858966, "loss": 3.2562, "step": 19994 }, { "epoch": 0.98, "grad_norm": 0.4969814717769623, "learning_rate": 0.00045543312384002595, "loss": 3.0832, "step": 19995 }, { "epoch": 0.98, "grad_norm": 0.5116978287696838, "learning_rate": 0.0004554199550620812, "loss": 3.15, "step": 19996 }, { "epoch": 0.98, "grad_norm": 0.5162671208381653, "learning_rate": 0.00045540678587479037, "loss": 3.2172, "step": 19997 }, { "epoch": 0.98, "grad_norm": 0.5306782722473145, "learning_rate": 0.0004553936162781879, "loss": 3.1452, "step": 19998 }, { "epoch": 0.98, "grad_norm": 0.535821259021759, "learning_rate": 0.0004553804462723086, "loss": 3.2178, "step": 19999 }, { "epoch": 0.98, "grad_norm": 0.5315077900886536, "learning_rate": 0.00045536727585718706, "loss": 3.2123, "step": 20000 }, { "epoch": 0.98, "grad_norm": 0.522333025932312, "learning_rate": 0.0004553541050328579, "loss": 3.0348, "step": 20001 }, { "epoch": 0.98, "grad_norm": 0.5301592350006104, "learning_rate": 0.000455340933799356, "loss": 3.1113, "step": 20002 }, { "epoch": 0.98, "grad_norm": 0.584934651851654, "learning_rate": 0.0004553277621567161, "loss": 3.18, "step": 20003 }, { "epoch": 0.98, "grad_norm": 0.6135393977165222, "learning_rate": 0.0004553145901049727, "loss": 3.1347, "step": 20004 }, { "epoch": 0.98, "grad_norm": 0.5235838890075684, "learning_rate": 0.0004553014176441605, "loss": 2.9696, "step": 20005 }, { "epoch": 0.98, "grad_norm": 0.5571325421333313, "learning_rate": 0.0004552882447743143, "loss": 3.215, "step": 20006 }, { "epoch": 0.98, "grad_norm": 0.5705804824829102, "learning_rate": 0.0004552750714954688, "loss": 3.2719, "step": 20007 }, { "epoch": 0.98, "grad_norm": 0.5331918597221375, "learning_rate": 0.00045526189780765856, "loss": 3.2195, "step": 20008 }, { "epoch": 0.98, "grad_norm": 0.5457714200019836, "learning_rate": 0.00045524872371091834, "loss": 3.2391, "step": 20009 }, { "epoch": 0.98, "grad_norm": 0.5504556894302368, "learning_rate": 0.000455235549205283, "loss": 3.2419, "step": 20010 }, { "epoch": 0.98, "grad_norm": 0.49661940336227417, "learning_rate": 0.000455222374290787, "loss": 3.1479, "step": 20011 }, { "epoch": 0.98, "grad_norm": 0.5197951197624207, "learning_rate": 0.00045520919896746516, "loss": 3.0056, "step": 20012 }, { "epoch": 0.98, "grad_norm": 0.5557920932769775, "learning_rate": 0.00045519602323535206, "loss": 3.2958, "step": 20013 }, { "epoch": 0.98, "grad_norm": 0.53912353515625, "learning_rate": 0.0004551828470944827, "loss": 2.8655, "step": 20014 }, { "epoch": 0.98, "grad_norm": 0.5061671137809753, "learning_rate": 0.0004551696705448915, "loss": 3.2487, "step": 20015 }, { "epoch": 0.98, "grad_norm": 0.5309056639671326, "learning_rate": 0.00045515649358661317, "loss": 3.1987, "step": 20016 }, { "epoch": 0.98, "grad_norm": 0.5490389466285706, "learning_rate": 0.0004551433162196826, "loss": 3.084, "step": 20017 }, { "epoch": 0.98, "grad_norm": 0.5171396136283875, "learning_rate": 0.00045513013844413435, "loss": 3.2162, "step": 20018 }, { "epoch": 0.98, "grad_norm": 0.5325402617454529, "learning_rate": 0.00045511696026000317, "loss": 3.0539, "step": 20019 }, { "epoch": 0.98, "grad_norm": 0.5274950861930847, "learning_rate": 0.00045510378166732375, "loss": 3.3672, "step": 20020 }, { "epoch": 0.98, "grad_norm": 0.5318315625190735, "learning_rate": 0.0004550906026661309, "loss": 3.0185, "step": 20021 }, { "epoch": 0.98, "grad_norm": 0.5088624358177185, "learning_rate": 0.00045507742325645914, "loss": 3.1083, "step": 20022 }, { "epoch": 0.98, "grad_norm": 0.5656629800796509, "learning_rate": 0.0004550642434383433, "loss": 2.9348, "step": 20023 }, { "epoch": 0.98, "grad_norm": 0.5793680548667908, "learning_rate": 0.0004550510632118182, "loss": 3.0635, "step": 20024 }, { "epoch": 0.98, "grad_norm": 0.5440924167633057, "learning_rate": 0.0004550378825769184, "loss": 2.9255, "step": 20025 }, { "epoch": 0.98, "grad_norm": 0.49890273809432983, "learning_rate": 0.0004550247015336786, "loss": 3.2322, "step": 20026 }, { "epoch": 0.98, "grad_norm": 0.541521430015564, "learning_rate": 0.0004550115200821335, "loss": 2.9845, "step": 20027 }, { "epoch": 0.98, "grad_norm": 0.5471722483634949, "learning_rate": 0.00045499833822231807, "loss": 3.002, "step": 20028 }, { "epoch": 0.98, "grad_norm": 0.5366112589836121, "learning_rate": 0.0004549851559542668, "loss": 3.1757, "step": 20029 }, { "epoch": 0.98, "grad_norm": 0.5042491555213928, "learning_rate": 0.0004549719732780143, "loss": 3.3144, "step": 20030 }, { "epoch": 0.98, "grad_norm": 0.5647759437561035, "learning_rate": 0.0004549587901935957, "loss": 2.917, "step": 20031 }, { "epoch": 0.98, "grad_norm": 0.5422675013542175, "learning_rate": 0.00045494560670104525, "loss": 3.0393, "step": 20032 }, { "epoch": 0.98, "grad_norm": 0.532295823097229, "learning_rate": 0.000454932422800398, "loss": 3.0517, "step": 20033 }, { "epoch": 0.98, "grad_norm": 0.5531377792358398, "learning_rate": 0.00045491923849168856, "loss": 3.1039, "step": 20034 }, { "epoch": 0.98, "grad_norm": 0.4993976056575775, "learning_rate": 0.0004549060537749516, "loss": 2.9075, "step": 20035 }, { "epoch": 0.98, "grad_norm": 0.5766929984092712, "learning_rate": 0.00045489286865022205, "loss": 3.448, "step": 20036 }, { "epoch": 0.98, "grad_norm": 0.525661289691925, "learning_rate": 0.00045487968311753427, "loss": 3.204, "step": 20037 }, { "epoch": 0.98, "grad_norm": 0.5298983454704285, "learning_rate": 0.0004548664971769234, "loss": 3.1754, "step": 20038 }, { "epoch": 0.98, "grad_norm": 0.5623906254768372, "learning_rate": 0.000454853310828424, "loss": 3.2299, "step": 20039 }, { "epoch": 0.98, "grad_norm": 0.5601446032524109, "learning_rate": 0.0004548401240720706, "loss": 3.135, "step": 20040 }, { "epoch": 0.98, "grad_norm": 0.5393292307853699, "learning_rate": 0.0004548269369078982, "loss": 3.1141, "step": 20041 }, { "epoch": 0.98, "grad_norm": 0.5087615847587585, "learning_rate": 0.0004548137493359414, "loss": 2.9242, "step": 20042 }, { "epoch": 0.98, "grad_norm": 0.5590735673904419, "learning_rate": 0.00045480056135623515, "loss": 3.1836, "step": 20043 }, { "epoch": 0.98, "grad_norm": 0.5152249932289124, "learning_rate": 0.00045478737296881383, "loss": 3.0764, "step": 20044 }, { "epoch": 0.98, "grad_norm": 0.551261842250824, "learning_rate": 0.0004547741841737125, "loss": 3.1649, "step": 20045 }, { "epoch": 0.98, "grad_norm": 0.5181664824485779, "learning_rate": 0.0004547609949709656, "loss": 3.231, "step": 20046 }, { "epoch": 0.98, "grad_norm": 0.5038676857948303, "learning_rate": 0.00045474780536060815, "loss": 3.141, "step": 20047 }, { "epoch": 0.98, "grad_norm": 0.5354689359664917, "learning_rate": 0.00045473461534267475, "loss": 3.2463, "step": 20048 }, { "epoch": 0.98, "grad_norm": 0.5693992972373962, "learning_rate": 0.00045472142491720006, "loss": 3.0521, "step": 20049 }, { "epoch": 0.98, "grad_norm": 0.532825767993927, "learning_rate": 0.00045470823408421903, "loss": 3.1193, "step": 20050 }, { "epoch": 0.98, "grad_norm": 0.5376405715942383, "learning_rate": 0.0004546950428437662, "loss": 3.2254, "step": 20051 }, { "epoch": 0.98, "grad_norm": 0.5095952749252319, "learning_rate": 0.00045468185119587644, "loss": 2.8989, "step": 20052 }, { "epoch": 0.98, "grad_norm": 0.5750041604042053, "learning_rate": 0.00045466865914058443, "loss": 3.045, "step": 20053 }, { "epoch": 0.98, "grad_norm": 0.5071499347686768, "learning_rate": 0.00045465546667792497, "loss": 3.0452, "step": 20054 }, { "epoch": 0.98, "grad_norm": 0.49465328454971313, "learning_rate": 0.0004546422738079327, "loss": 3.1946, "step": 20055 }, { "epoch": 0.98, "grad_norm": 0.5186102986335754, "learning_rate": 0.0004546290805306426, "loss": 3.2024, "step": 20056 }, { "epoch": 0.98, "grad_norm": 0.5584270358085632, "learning_rate": 0.00045461588684608914, "loss": 3.0319, "step": 20057 }, { "epoch": 0.98, "grad_norm": 0.5318349003791809, "learning_rate": 0.0004546026927543072, "loss": 2.9715, "step": 20058 }, { "epoch": 0.98, "grad_norm": 0.5026640295982361, "learning_rate": 0.0004545894982553315, "loss": 3.2427, "step": 20059 }, { "epoch": 0.98, "grad_norm": 0.5623165965080261, "learning_rate": 0.0004545763033491968, "loss": 3.2415, "step": 20060 }, { "epoch": 0.98, "grad_norm": 0.5373814702033997, "learning_rate": 0.000454563108035938, "loss": 3.3984, "step": 20061 }, { "epoch": 0.98, "grad_norm": 0.5032324194908142, "learning_rate": 0.00045454991231558967, "loss": 3.3189, "step": 20062 }, { "epoch": 0.98, "grad_norm": 0.5357590913772583, "learning_rate": 0.00045453671618818646, "loss": 3.0099, "step": 20063 }, { "epoch": 0.98, "grad_norm": 0.5333057641983032, "learning_rate": 0.00045452351965376335, "loss": 3.1873, "step": 20064 }, { "epoch": 0.98, "grad_norm": 0.5362182855606079, "learning_rate": 0.0004545103227123552, "loss": 3.1468, "step": 20065 }, { "epoch": 0.98, "grad_norm": 0.5074638724327087, "learning_rate": 0.0004544971253639964, "loss": 3.0841, "step": 20066 }, { "epoch": 0.98, "grad_norm": 0.5193195343017578, "learning_rate": 0.000454483927608722, "loss": 2.9173, "step": 20067 }, { "epoch": 0.98, "grad_norm": 0.5239328742027283, "learning_rate": 0.0004544707294465667, "loss": 3.2981, "step": 20068 }, { "epoch": 0.98, "grad_norm": 0.5404732823371887, "learning_rate": 0.00045445753087756507, "loss": 3.056, "step": 20069 }, { "epoch": 0.98, "grad_norm": 0.6814656257629395, "learning_rate": 0.0004544443319017521, "loss": 3.009, "step": 20070 }, { "epoch": 0.98, "grad_norm": 0.5465784668922424, "learning_rate": 0.0004544311325191625, "loss": 3.3474, "step": 20071 }, { "epoch": 0.98, "grad_norm": 0.5160285234451294, "learning_rate": 0.00045441793272983107, "loss": 2.8708, "step": 20072 }, { "epoch": 0.98, "grad_norm": 0.4963335692882538, "learning_rate": 0.00045440473253379246, "loss": 3.2226, "step": 20073 }, { "epoch": 0.98, "grad_norm": 0.5127677917480469, "learning_rate": 0.00045439153193108155, "loss": 3.2884, "step": 20074 }, { "epoch": 0.98, "grad_norm": 0.5165018439292908, "learning_rate": 0.000454378330921733, "loss": 3.0943, "step": 20075 }, { "epoch": 0.98, "grad_norm": 0.5334299206733704, "learning_rate": 0.0004543651295057817, "loss": 3.1644, "step": 20076 }, { "epoch": 0.98, "grad_norm": 0.5283639430999756, "learning_rate": 0.0004543519276832624, "loss": 3.1569, "step": 20077 }, { "epoch": 0.98, "grad_norm": 0.4838026762008667, "learning_rate": 0.00045433872545420966, "loss": 3.2105, "step": 20078 }, { "epoch": 0.98, "grad_norm": 0.5402780175209045, "learning_rate": 0.00045432552281865854, "loss": 2.9989, "step": 20079 }, { "epoch": 0.98, "grad_norm": 0.49882394075393677, "learning_rate": 0.00045431231977664365, "loss": 3.1154, "step": 20080 }, { "epoch": 0.98, "grad_norm": 0.5234292149543762, "learning_rate": 0.00045429911632819986, "loss": 2.9596, "step": 20081 }, { "epoch": 0.98, "grad_norm": 0.5079872608184814, "learning_rate": 0.00045428591247336186, "loss": 3.0581, "step": 20082 }, { "epoch": 0.98, "grad_norm": 0.6135797500610352, "learning_rate": 0.0004542727082121644, "loss": 3.2326, "step": 20083 }, { "epoch": 0.98, "grad_norm": 0.5245426893234253, "learning_rate": 0.0004542595035446424, "loss": 3.0909, "step": 20084 }, { "epoch": 0.98, "grad_norm": 0.5490965247154236, "learning_rate": 0.0004542462984708305, "loss": 3.227, "step": 20085 }, { "epoch": 0.98, "grad_norm": 0.5142471790313721, "learning_rate": 0.0004542330929907636, "loss": 3.2628, "step": 20086 }, { "epoch": 0.98, "grad_norm": 0.5162678360939026, "learning_rate": 0.0004542198871044764, "loss": 2.9272, "step": 20087 }, { "epoch": 0.98, "grad_norm": 0.507563591003418, "learning_rate": 0.00045420668081200364, "loss": 3.107, "step": 20088 }, { "epoch": 0.98, "grad_norm": 0.49381065368652344, "learning_rate": 0.00045419347411338015, "loss": 3.141, "step": 20089 }, { "epoch": 0.98, "grad_norm": 0.5393127202987671, "learning_rate": 0.00045418026700864083, "loss": 3.4015, "step": 20090 }, { "epoch": 0.98, "grad_norm": 0.5078210830688477, "learning_rate": 0.00045416705949782036, "loss": 3.0461, "step": 20091 }, { "epoch": 0.98, "grad_norm": 0.5738405585289001, "learning_rate": 0.00045415385158095343, "loss": 3.1799, "step": 20092 }, { "epoch": 0.98, "grad_norm": 0.5566117167472839, "learning_rate": 0.00045414064325807497, "loss": 3.0548, "step": 20093 }, { "epoch": 0.98, "grad_norm": 0.5047556161880493, "learning_rate": 0.0004541274345292197, "loss": 3.0273, "step": 20094 }, { "epoch": 0.98, "grad_norm": 0.5348913073539734, "learning_rate": 0.00045411422539442246, "loss": 3.15, "step": 20095 }, { "epoch": 0.98, "grad_norm": 0.5044894218444824, "learning_rate": 0.0004541010158537179, "loss": 3.1082, "step": 20096 }, { "epoch": 0.98, "grad_norm": 0.5232664942741394, "learning_rate": 0.00045408780590714103, "loss": 3.1608, "step": 20097 }, { "epoch": 0.98, "grad_norm": 0.5661266446113586, "learning_rate": 0.00045407459555472647, "loss": 3.3411, "step": 20098 }, { "epoch": 0.99, "grad_norm": 0.5234664082527161, "learning_rate": 0.0004540613847965092, "loss": 3.2362, "step": 20099 }, { "epoch": 0.99, "grad_norm": 0.5506570935249329, "learning_rate": 0.0004540481736325238, "loss": 3.1079, "step": 20100 }, { "epoch": 0.99, "grad_norm": 0.5332557559013367, "learning_rate": 0.00045403496206280515, "loss": 3.0901, "step": 20101 }, { "epoch": 0.99, "grad_norm": 0.5116790533065796, "learning_rate": 0.0004540217500873881, "loss": 3.0163, "step": 20102 }, { "epoch": 0.99, "grad_norm": 0.4963262677192688, "learning_rate": 0.00045400853770630737, "loss": 3.1048, "step": 20103 }, { "epoch": 0.99, "grad_norm": 0.5719655752182007, "learning_rate": 0.0004539953249195978, "loss": 3.1882, "step": 20104 }, { "epoch": 0.99, "grad_norm": 0.6064972877502441, "learning_rate": 0.0004539821117272942, "loss": 3.1947, "step": 20105 }, { "epoch": 0.99, "grad_norm": 0.5541272163391113, "learning_rate": 0.00045396889812943136, "loss": 3.1424, "step": 20106 }, { "epoch": 0.99, "grad_norm": 0.4937479496002197, "learning_rate": 0.000453955684126044, "loss": 3.3325, "step": 20107 }, { "epoch": 0.99, "grad_norm": 0.540358304977417, "learning_rate": 0.0004539424697171671, "loss": 3.3724, "step": 20108 }, { "epoch": 0.99, "grad_norm": 0.5559720396995544, "learning_rate": 0.0004539292549028352, "loss": 2.9808, "step": 20109 }, { "epoch": 0.99, "grad_norm": 0.5821501612663269, "learning_rate": 0.0004539160396830834, "loss": 3.2358, "step": 20110 }, { "epoch": 0.99, "grad_norm": 0.5225769281387329, "learning_rate": 0.00045390282405794634, "loss": 3.2651, "step": 20111 }, { "epoch": 0.99, "grad_norm": 0.50220787525177, "learning_rate": 0.0004538896080274589, "loss": 3.1423, "step": 20112 }, { "epoch": 0.99, "grad_norm": 0.5386639833450317, "learning_rate": 0.0004538763915916559, "loss": 3.2382, "step": 20113 }, { "epoch": 0.99, "grad_norm": 0.5450523495674133, "learning_rate": 0.0004538631747505719, "loss": 3.2255, "step": 20114 }, { "epoch": 0.99, "grad_norm": 0.52878737449646, "learning_rate": 0.000453849957504242, "loss": 2.9539, "step": 20115 }, { "epoch": 0.99, "grad_norm": 0.5469775199890137, "learning_rate": 0.000453836739852701, "loss": 2.9554, "step": 20116 }, { "epoch": 0.99, "grad_norm": 0.5183457136154175, "learning_rate": 0.0004538235217959836, "loss": 3.0908, "step": 20117 }, { "epoch": 0.99, "grad_norm": 0.5041838884353638, "learning_rate": 0.0004538103033341246, "loss": 3.1683, "step": 20118 }, { "epoch": 0.99, "grad_norm": 0.5320984125137329, "learning_rate": 0.00045379708446715894, "loss": 3.1186, "step": 20119 }, { "epoch": 0.99, "grad_norm": 0.5572245717048645, "learning_rate": 0.00045378386519512125, "loss": 3.2272, "step": 20120 }, { "epoch": 0.99, "grad_norm": 0.6518499255180359, "learning_rate": 0.0004537706455180465, "loss": 3.1933, "step": 20121 }, { "epoch": 0.99, "grad_norm": 0.5835242867469788, "learning_rate": 0.0004537574254359695, "loss": 3.2654, "step": 20122 }, { "epoch": 0.99, "grad_norm": 0.5642088055610657, "learning_rate": 0.0004537442049489251, "loss": 3.2556, "step": 20123 }, { "epoch": 0.99, "grad_norm": 0.5181591510772705, "learning_rate": 0.00045373098405694786, "loss": 3.1488, "step": 20124 }, { "epoch": 0.99, "grad_norm": 0.5602161884307861, "learning_rate": 0.0004537177627600729, "loss": 3.0757, "step": 20125 }, { "epoch": 0.99, "grad_norm": 0.5879859328269958, "learning_rate": 0.00045370454105833494, "loss": 3.2037, "step": 20126 }, { "epoch": 0.99, "grad_norm": 0.5683193802833557, "learning_rate": 0.00045369131895176885, "loss": 2.9962, "step": 20127 }, { "epoch": 0.99, "grad_norm": 0.5079681873321533, "learning_rate": 0.0004536780964404093, "loss": 3.0535, "step": 20128 }, { "epoch": 0.99, "grad_norm": 0.5396766662597656, "learning_rate": 0.0004536648735242912, "loss": 3.1192, "step": 20129 }, { "epoch": 0.99, "grad_norm": 0.5396043658256531, "learning_rate": 0.0004536516502034495, "loss": 3.2537, "step": 20130 }, { "epoch": 0.99, "grad_norm": 0.5514666438102722, "learning_rate": 0.0004536384264779189, "loss": 2.9611, "step": 20131 }, { "epoch": 0.99, "grad_norm": 0.5507171750068665, "learning_rate": 0.0004536252023477343, "loss": 3.2089, "step": 20132 }, { "epoch": 0.99, "grad_norm": 0.5136435627937317, "learning_rate": 0.0004536119778129304, "loss": 3.0577, "step": 20133 }, { "epoch": 0.99, "grad_norm": 0.5072475671768188, "learning_rate": 0.00045359875287354214, "loss": 3.2917, "step": 20134 }, { "epoch": 0.99, "grad_norm": 0.5063516497612, "learning_rate": 0.00045358552752960426, "loss": 3.2465, "step": 20135 }, { "epoch": 0.99, "grad_norm": 0.545147716999054, "learning_rate": 0.00045357230178115173, "loss": 3.042, "step": 20136 }, { "epoch": 0.99, "grad_norm": 0.5409039855003357, "learning_rate": 0.0004535590756282193, "loss": 3.3088, "step": 20137 }, { "epoch": 0.99, "grad_norm": 0.5154100656509399, "learning_rate": 0.0004535458490708419, "loss": 3.2658, "step": 20138 }, { "epoch": 0.99, "grad_norm": 0.5560367703437805, "learning_rate": 0.0004535326221090542, "loss": 3.0783, "step": 20139 }, { "epoch": 0.99, "grad_norm": 0.5108457207679749, "learning_rate": 0.0004535193947428911, "loss": 3.1656, "step": 20140 }, { "epoch": 0.99, "grad_norm": 0.5491657257080078, "learning_rate": 0.0004535061669723875, "loss": 3.3704, "step": 20141 }, { "epoch": 0.99, "grad_norm": 0.5991771221160889, "learning_rate": 0.00045349293879757814, "loss": 3.3599, "step": 20142 }, { "epoch": 0.99, "grad_norm": 0.530377209186554, "learning_rate": 0.00045347971021849796, "loss": 3.3084, "step": 20143 }, { "epoch": 0.99, "grad_norm": 0.546180009841919, "learning_rate": 0.0004534664812351818, "loss": 3.4132, "step": 20144 }, { "epoch": 0.99, "grad_norm": 0.5626497864723206, "learning_rate": 0.0004534532518476644, "loss": 3.2105, "step": 20145 }, { "epoch": 0.99, "grad_norm": 0.508690357208252, "learning_rate": 0.00045344002205598074, "loss": 3.274, "step": 20146 }, { "epoch": 0.99, "grad_norm": 0.5522834062576294, "learning_rate": 0.00045342679186016554, "loss": 3.0697, "step": 20147 }, { "epoch": 0.99, "grad_norm": 0.5134068727493286, "learning_rate": 0.00045341356126025366, "loss": 3.2479, "step": 20148 }, { "epoch": 0.99, "grad_norm": 0.5544729232788086, "learning_rate": 0.00045340033025628013, "loss": 3.0962, "step": 20149 }, { "epoch": 0.99, "grad_norm": 0.5248307585716248, "learning_rate": 0.00045338709884827947, "loss": 3.262, "step": 20150 }, { "epoch": 0.99, "grad_norm": 0.5262455344200134, "learning_rate": 0.00045337386703628676, "loss": 3.1155, "step": 20151 }, { "epoch": 0.99, "grad_norm": 0.5151076912879944, "learning_rate": 0.00045336063482033696, "loss": 3.0329, "step": 20152 }, { "epoch": 0.99, "grad_norm": 0.5224588513374329, "learning_rate": 0.00045334740220046455, "loss": 3.1338, "step": 20153 }, { "epoch": 0.99, "grad_norm": 0.5186708569526672, "learning_rate": 0.0004533341691767047, "loss": 3.0215, "step": 20154 }, { "epoch": 0.99, "grad_norm": 0.5381702780723572, "learning_rate": 0.0004533209357490921, "loss": 3.2761, "step": 20155 }, { "epoch": 0.99, "grad_norm": 0.5323552489280701, "learning_rate": 0.00045330770191766176, "loss": 3.1807, "step": 20156 }, { "epoch": 0.99, "grad_norm": 0.5015830993652344, "learning_rate": 0.0004532944676824484, "loss": 3.0101, "step": 20157 }, { "epoch": 0.99, "grad_norm": 0.5259556770324707, "learning_rate": 0.0004532812330434869, "loss": 3.1135, "step": 20158 }, { "epoch": 0.99, "grad_norm": 0.544632613658905, "learning_rate": 0.00045326799800081213, "loss": 3.2611, "step": 20159 }, { "epoch": 0.99, "grad_norm": 0.5256531834602356, "learning_rate": 0.00045325476255445886, "loss": 3.0899, "step": 20160 }, { "epoch": 0.99, "grad_norm": 0.5340846180915833, "learning_rate": 0.00045324152670446217, "loss": 3.2513, "step": 20161 }, { "epoch": 0.99, "grad_norm": 0.5545790791511536, "learning_rate": 0.00045322829045085674, "loss": 2.9697, "step": 20162 }, { "epoch": 0.99, "grad_norm": 0.5166133642196655, "learning_rate": 0.00045321505379367755, "loss": 3.2083, "step": 20163 }, { "epoch": 0.99, "grad_norm": 0.5389953851699829, "learning_rate": 0.00045320181673295934, "loss": 2.8237, "step": 20164 }, { "epoch": 0.99, "grad_norm": 0.5397850871086121, "learning_rate": 0.00045318857926873697, "loss": 3.0537, "step": 20165 }, { "epoch": 0.99, "grad_norm": 0.5113707780838013, "learning_rate": 0.0004531753414010454, "loss": 3.1937, "step": 20166 }, { "epoch": 0.99, "grad_norm": 0.5229274034500122, "learning_rate": 0.00045316210312991954, "loss": 3.3204, "step": 20167 }, { "epoch": 0.99, "grad_norm": 0.5383738279342651, "learning_rate": 0.00045314886445539415, "loss": 3.3542, "step": 20168 }, { "epoch": 0.99, "grad_norm": 0.5492693185806274, "learning_rate": 0.00045313562537750403, "loss": 3.1827, "step": 20169 }, { "epoch": 0.99, "grad_norm": 0.5596176385879517, "learning_rate": 0.00045312238589628425, "loss": 3.1018, "step": 20170 }, { "epoch": 0.99, "grad_norm": 0.5374123454093933, "learning_rate": 0.00045310914601176956, "loss": 3.2086, "step": 20171 }, { "epoch": 0.99, "grad_norm": 0.5204119086265564, "learning_rate": 0.0004530959057239947, "loss": 3.0035, "step": 20172 }, { "epoch": 0.99, "grad_norm": 0.5454662442207336, "learning_rate": 0.0004530826650329948, "loss": 3.2789, "step": 20173 }, { "epoch": 0.99, "grad_norm": 0.5484021902084351, "learning_rate": 0.00045306942393880475, "loss": 3.1308, "step": 20174 }, { "epoch": 0.99, "grad_norm": 0.5319493412971497, "learning_rate": 0.00045305618244145915, "loss": 3.2038, "step": 20175 }, { "epoch": 0.99, "grad_norm": 0.529231071472168, "learning_rate": 0.000453042940540993, "loss": 3.1151, "step": 20176 }, { "epoch": 0.99, "grad_norm": 0.5511825084686279, "learning_rate": 0.0004530296982374412, "loss": 3.2654, "step": 20177 }, { "epoch": 0.99, "grad_norm": 0.5340495109558105, "learning_rate": 0.0004530164555308388, "loss": 3.2052, "step": 20178 }, { "epoch": 0.99, "grad_norm": 0.5212454795837402, "learning_rate": 0.0004530032124212203, "loss": 3.1776, "step": 20179 }, { "epoch": 0.99, "grad_norm": 0.4973335564136505, "learning_rate": 0.00045298996890862087, "loss": 3.2903, "step": 20180 }, { "epoch": 0.99, "grad_norm": 0.5556290149688721, "learning_rate": 0.0004529767249930753, "loss": 3.0608, "step": 20181 }, { "epoch": 0.99, "grad_norm": 0.5058223009109497, "learning_rate": 0.00045296348067461846, "loss": 3.2073, "step": 20182 }, { "epoch": 0.99, "grad_norm": 0.5586454272270203, "learning_rate": 0.00045295023595328526, "loss": 3.0764, "step": 20183 }, { "epoch": 0.99, "grad_norm": 0.5111760497093201, "learning_rate": 0.0004529369908291106, "loss": 2.9447, "step": 20184 }, { "epoch": 0.99, "grad_norm": 0.5140455961227417, "learning_rate": 0.00045292374530212935, "loss": 3.1685, "step": 20185 }, { "epoch": 0.99, "grad_norm": 0.5379027128219604, "learning_rate": 0.0004529104993723763, "loss": 3.1918, "step": 20186 }, { "epoch": 0.99, "grad_norm": 0.4842080771923065, "learning_rate": 0.0004528972530398864, "loss": 3.2543, "step": 20187 }, { "epoch": 0.99, "grad_norm": 0.5368223786354065, "learning_rate": 0.0004528840063046947, "loss": 3.1263, "step": 20188 }, { "epoch": 0.99, "grad_norm": 0.5022522807121277, "learning_rate": 0.0004528707591668359, "loss": 3.0206, "step": 20189 }, { "epoch": 0.99, "grad_norm": 0.5098612904548645, "learning_rate": 0.0004528575116263449, "loss": 2.9806, "step": 20190 }, { "epoch": 0.99, "grad_norm": 0.5354186296463013, "learning_rate": 0.00045284426368325664, "loss": 3.1973, "step": 20191 }, { "epoch": 0.99, "grad_norm": 0.5448538661003113, "learning_rate": 0.00045283101533760613, "loss": 3.1923, "step": 20192 }, { "epoch": 0.99, "grad_norm": 0.5186355710029602, "learning_rate": 0.00045281776658942795, "loss": 3.1251, "step": 20193 }, { "epoch": 0.99, "grad_norm": 0.5228528380393982, "learning_rate": 0.00045280451743875727, "loss": 3.1935, "step": 20194 }, { "epoch": 0.99, "grad_norm": 0.5764790177345276, "learning_rate": 0.00045279126788562885, "loss": 3.1752, "step": 20195 }, { "epoch": 0.99, "grad_norm": 0.5042545199394226, "learning_rate": 0.0004527780179300777, "loss": 3.021, "step": 20196 }, { "epoch": 0.99, "grad_norm": 0.5068015456199646, "learning_rate": 0.00045276476757213864, "loss": 3.1387, "step": 20197 }, { "epoch": 0.99, "grad_norm": 0.5311840176582336, "learning_rate": 0.00045275151681184656, "loss": 2.9341, "step": 20198 }, { "epoch": 0.99, "grad_norm": 0.57582688331604, "learning_rate": 0.0004527382656492364, "loss": 2.6663, "step": 20199 }, { "epoch": 0.99, "grad_norm": 0.5275920033454895, "learning_rate": 0.00045272501408434313, "loss": 2.8333, "step": 20200 }, { "epoch": 0.99, "grad_norm": 0.5140002965927124, "learning_rate": 0.00045271176211720133, "loss": 3.1819, "step": 20201 }, { "epoch": 0.99, "grad_norm": 0.5207107067108154, "learning_rate": 0.0004526985097478464, "loss": 3.1758, "step": 20202 }, { "epoch": 0.99, "grad_norm": 0.5530972480773926, "learning_rate": 0.0004526852569763129, "loss": 3.134, "step": 20203 }, { "epoch": 0.99, "grad_norm": 0.5513622164726257, "learning_rate": 0.00045267200380263577, "loss": 3.0947, "step": 20204 }, { "epoch": 0.99, "grad_norm": 0.5309062004089355, "learning_rate": 0.00045265875022685, "loss": 3.1286, "step": 20205 }, { "epoch": 0.99, "grad_norm": 0.5550915002822876, "learning_rate": 0.0004526454962489904, "loss": 3.1141, "step": 20206 }, { "epoch": 0.99, "grad_norm": 0.5073148608207703, "learning_rate": 0.000452632241869092, "loss": 3.1703, "step": 20207 }, { "epoch": 0.99, "grad_norm": 0.544578492641449, "learning_rate": 0.00045261898708718966, "loss": 3.1061, "step": 20208 }, { "epoch": 0.99, "grad_norm": 0.5766841173171997, "learning_rate": 0.0004526057319033182, "loss": 3.0463, "step": 20209 }, { "epoch": 0.99, "grad_norm": 0.5074905753135681, "learning_rate": 0.00045259247631751265, "loss": 2.9534, "step": 20210 }, { "epoch": 0.99, "grad_norm": 0.5443254709243774, "learning_rate": 0.00045257922032980794, "loss": 3.3291, "step": 20211 }, { "epoch": 0.99, "grad_norm": 0.505168080329895, "learning_rate": 0.0004525659639402388, "loss": 3.2416, "step": 20212 }, { "epoch": 0.99, "grad_norm": 0.5314830541610718, "learning_rate": 0.00045255270714884035, "loss": 3.1842, "step": 20213 }, { "epoch": 0.99, "grad_norm": 0.5724264979362488, "learning_rate": 0.00045253944995564746, "loss": 3.0492, "step": 20214 }, { "epoch": 0.99, "grad_norm": 0.5013821721076965, "learning_rate": 0.0004525261923606951, "loss": 2.9629, "step": 20215 }, { "epoch": 0.99, "grad_norm": 0.5013492703437805, "learning_rate": 0.0004525129343640179, "loss": 3.0972, "step": 20216 }, { "epoch": 0.99, "grad_norm": 0.5238487124443054, "learning_rate": 0.00045249967596565105, "loss": 3.1335, "step": 20217 }, { "epoch": 0.99, "grad_norm": 0.5291327238082886, "learning_rate": 0.0004524864171656295, "loss": 3.0083, "step": 20218 }, { "epoch": 0.99, "grad_norm": 0.5021530985832214, "learning_rate": 0.000452473157963988, "loss": 3.1272, "step": 20219 }, { "epoch": 0.99, "grad_norm": 0.5439477562904358, "learning_rate": 0.00045245989836076154, "loss": 3.0997, "step": 20220 }, { "epoch": 0.99, "grad_norm": 0.528767466545105, "learning_rate": 0.00045244663835598505, "loss": 3.1914, "step": 20221 }, { "epoch": 0.99, "grad_norm": 0.5145512223243713, "learning_rate": 0.00045243337794969343, "loss": 3.1779, "step": 20222 }, { "epoch": 0.99, "grad_norm": 0.5444108247756958, "learning_rate": 0.00045242011714192174, "loss": 3.1191, "step": 20223 }, { "epoch": 0.99, "grad_norm": 0.537213146686554, "learning_rate": 0.00045240685593270474, "loss": 3.0256, "step": 20224 }, { "epoch": 0.99, "grad_norm": 0.4993702471256256, "learning_rate": 0.00045239359432207733, "loss": 3.2517, "step": 20225 }, { "epoch": 0.99, "grad_norm": 0.547257125377655, "learning_rate": 0.00045238033231007464, "loss": 3.1549, "step": 20226 }, { "epoch": 0.99, "grad_norm": 0.5551975965499878, "learning_rate": 0.0004523670698967314, "loss": 3.1891, "step": 20227 }, { "epoch": 0.99, "grad_norm": 0.5113047361373901, "learning_rate": 0.00045235380708208255, "loss": 3.192, "step": 20228 }, { "epoch": 0.99, "grad_norm": 0.5420333743095398, "learning_rate": 0.0004523405438661633, "loss": 3.0101, "step": 20229 }, { "epoch": 0.99, "grad_norm": 0.5321565866470337, "learning_rate": 0.0004523272802490083, "loss": 2.9095, "step": 20230 }, { "epoch": 0.99, "grad_norm": 0.5126756429672241, "learning_rate": 0.0004523140162306525, "loss": 3.0235, "step": 20231 }, { "epoch": 0.99, "grad_norm": 0.4914495646953583, "learning_rate": 0.0004523007518111309, "loss": 3.1374, "step": 20232 }, { "epoch": 0.99, "grad_norm": 0.501433253288269, "learning_rate": 0.00045228748699047844, "loss": 2.8911, "step": 20233 }, { "epoch": 0.99, "grad_norm": 0.5397343635559082, "learning_rate": 0.00045227422176872997, "loss": 3.2692, "step": 20234 }, { "epoch": 0.99, "grad_norm": 0.5279703736305237, "learning_rate": 0.0004522609561459207, "loss": 3.2089, "step": 20235 }, { "epoch": 0.99, "grad_norm": 0.533789336681366, "learning_rate": 0.00045224769012208526, "loss": 3.0578, "step": 20236 }, { "epoch": 0.99, "grad_norm": 0.5652415752410889, "learning_rate": 0.0004522344236972587, "loss": 3.2415, "step": 20237 }, { "epoch": 0.99, "grad_norm": 0.4909104108810425, "learning_rate": 0.0004522211568714759, "loss": 3.1395, "step": 20238 }, { "epoch": 0.99, "grad_norm": 0.5707433819770813, "learning_rate": 0.000452207889644772, "loss": 3.1975, "step": 20239 }, { "epoch": 0.99, "grad_norm": 0.6347876191139221, "learning_rate": 0.0004521946220171818, "loss": 3.0497, "step": 20240 }, { "epoch": 0.99, "grad_norm": 0.5172315835952759, "learning_rate": 0.0004521813539887402, "loss": 3.335, "step": 20241 }, { "epoch": 0.99, "grad_norm": 0.563426673412323, "learning_rate": 0.00045216808555948216, "loss": 3.099, "step": 20242 }, { "epoch": 0.99, "grad_norm": 0.5538248419761658, "learning_rate": 0.0004521548167294428, "loss": 3.2189, "step": 20243 }, { "epoch": 0.99, "grad_norm": 0.5178148150444031, "learning_rate": 0.0004521415474986568, "loss": 3.0827, "step": 20244 }, { "epoch": 0.99, "grad_norm": 0.5169044733047485, "learning_rate": 0.00045212827786715934, "loss": 3.1631, "step": 20245 }, { "epoch": 0.99, "grad_norm": 0.5301280617713928, "learning_rate": 0.0004521150078349852, "loss": 3.2562, "step": 20246 }, { "epoch": 0.99, "grad_norm": 0.5236168503761292, "learning_rate": 0.00045210173740216944, "loss": 3.1808, "step": 20247 }, { "epoch": 0.99, "grad_norm": 0.6142783164978027, "learning_rate": 0.00045208846656874703, "loss": 3.0495, "step": 20248 }, { "epoch": 0.99, "grad_norm": 0.5414308309555054, "learning_rate": 0.00045207519533475274, "loss": 2.9324, "step": 20249 }, { "epoch": 0.99, "grad_norm": 0.485738068819046, "learning_rate": 0.0004520619237002218, "loss": 3.1258, "step": 20250 }, { "epoch": 0.99, "grad_norm": 0.5361891388893127, "learning_rate": 0.0004520486516651889, "loss": 3.0218, "step": 20251 }, { "epoch": 0.99, "grad_norm": 0.5489019155502319, "learning_rate": 0.00045203537922968915, "loss": 3.201, "step": 20252 }, { "epoch": 0.99, "grad_norm": 0.519248902797699, "learning_rate": 0.00045202210639375747, "loss": 3.2805, "step": 20253 }, { "epoch": 0.99, "grad_norm": 0.4982036352157593, "learning_rate": 0.0004520088331574289, "loss": 3.2192, "step": 20254 }, { "epoch": 0.99, "grad_norm": 0.5829838514328003, "learning_rate": 0.00045199555952073824, "loss": 3.2538, "step": 20255 }, { "epoch": 0.99, "grad_norm": 0.5190660953521729, "learning_rate": 0.00045198228548372056, "loss": 3.0256, "step": 20256 }, { "epoch": 0.99, "grad_norm": 0.5045883655548096, "learning_rate": 0.00045196901104641073, "loss": 3.0908, "step": 20257 }, { "epoch": 0.99, "grad_norm": 0.5397174954414368, "learning_rate": 0.0004519557362088438, "loss": 3.3517, "step": 20258 }, { "epoch": 0.99, "grad_norm": 0.5239180326461792, "learning_rate": 0.00045194246097105467, "loss": 3.358, "step": 20259 }, { "epoch": 0.99, "grad_norm": 0.52662593126297, "learning_rate": 0.0004519291853330784, "loss": 3.168, "step": 20260 }, { "epoch": 0.99, "grad_norm": 0.5203768014907837, "learning_rate": 0.00045191590929494994, "loss": 3.0684, "step": 20261 }, { "epoch": 0.99, "grad_norm": 0.5362476706504822, "learning_rate": 0.0004519026328567041, "loss": 3.1779, "step": 20262 }, { "epoch": 0.99, "grad_norm": 0.5580528974533081, "learning_rate": 0.00045188935601837604, "loss": 3.0697, "step": 20263 }, { "epoch": 0.99, "grad_norm": 0.5004633069038391, "learning_rate": 0.00045187607878000057, "loss": 3.2735, "step": 20264 }, { "epoch": 0.99, "grad_norm": 0.5355501174926758, "learning_rate": 0.0004518628011416128, "loss": 3.1514, "step": 20265 }, { "epoch": 0.99, "grad_norm": 0.5439483523368835, "learning_rate": 0.0004518495231032477, "loss": 3.2839, "step": 20266 }, { "epoch": 0.99, "grad_norm": 0.5279213190078735, "learning_rate": 0.00045183624466494006, "loss": 3.1213, "step": 20267 }, { "epoch": 0.99, "grad_norm": 0.5979596376419067, "learning_rate": 0.00045182296582672496, "loss": 3.1, "step": 20268 }, { "epoch": 0.99, "grad_norm": 0.5285710096359253, "learning_rate": 0.0004518096865886375, "loss": 3.3007, "step": 20269 }, { "epoch": 0.99, "grad_norm": 0.571806788444519, "learning_rate": 0.00045179640695071246, "loss": 3.225, "step": 20270 }, { "epoch": 0.99, "grad_norm": 0.5280594229698181, "learning_rate": 0.000451783126912985, "loss": 3.1499, "step": 20271 }, { "epoch": 0.99, "grad_norm": 0.5388377904891968, "learning_rate": 0.00045176984647548994, "loss": 3.1795, "step": 20272 }, { "epoch": 0.99, "grad_norm": 0.5278087258338928, "learning_rate": 0.00045175656563826224, "loss": 3.2452, "step": 20273 }, { "epoch": 0.99, "grad_norm": 0.5274179577827454, "learning_rate": 0.000451743284401337, "loss": 3.1447, "step": 20274 }, { "epoch": 0.99, "grad_norm": 0.5059541463851929, "learning_rate": 0.0004517300027647491, "loss": 3.0891, "step": 20275 }, { "epoch": 0.99, "grad_norm": 0.5273914337158203, "learning_rate": 0.0004517167207285337, "loss": 3.1935, "step": 20276 }, { "epoch": 0.99, "grad_norm": 0.5308375954627991, "learning_rate": 0.00045170343829272565, "loss": 3.1559, "step": 20277 }, { "epoch": 0.99, "grad_norm": 0.530507504940033, "learning_rate": 0.00045169015545735975, "loss": 2.9621, "step": 20278 }, { "epoch": 0.99, "grad_norm": 0.5165071487426758, "learning_rate": 0.0004516768722224713, "loss": 3.1421, "step": 20279 }, { "epoch": 0.99, "grad_norm": 0.5435473322868347, "learning_rate": 0.00045166358858809524, "loss": 3.0609, "step": 20280 }, { "epoch": 0.99, "grad_norm": 0.5258364081382751, "learning_rate": 0.0004516503045542664, "loss": 2.9946, "step": 20281 }, { "epoch": 0.99, "grad_norm": 0.49890580773353577, "learning_rate": 0.0004516370201210198, "loss": 3.0339, "step": 20282 }, { "epoch": 0.99, "grad_norm": 0.5879883170127869, "learning_rate": 0.0004516237352883905, "loss": 3.0634, "step": 20283 }, { "epoch": 0.99, "grad_norm": 0.5395330786705017, "learning_rate": 0.00045161045005641344, "loss": 3.2598, "step": 20284 }, { "epoch": 0.99, "grad_norm": 0.5076286196708679, "learning_rate": 0.00045159716442512367, "loss": 3.1879, "step": 20285 }, { "epoch": 0.99, "grad_norm": 0.508956253528595, "learning_rate": 0.0004515838783945562, "loss": 3.0942, "step": 20286 }, { "epoch": 0.99, "grad_norm": 0.5356499552726746, "learning_rate": 0.00045157059196474593, "loss": 3.1142, "step": 20287 }, { "epoch": 0.99, "grad_norm": 0.535220742225647, "learning_rate": 0.0004515573051357278, "loss": 2.9021, "step": 20288 }, { "epoch": 0.99, "grad_norm": 0.5434659123420715, "learning_rate": 0.000451544017907537, "loss": 3.0314, "step": 20289 }, { "epoch": 0.99, "grad_norm": 0.5554776191711426, "learning_rate": 0.0004515307302802084, "loss": 2.9867, "step": 20290 }, { "epoch": 0.99, "grad_norm": 0.5213914513587952, "learning_rate": 0.00045151744225377697, "loss": 3.2232, "step": 20291 }, { "epoch": 0.99, "grad_norm": 0.5432573556900024, "learning_rate": 0.0004515041538282778, "loss": 3.1003, "step": 20292 }, { "epoch": 0.99, "grad_norm": 0.4953993558883667, "learning_rate": 0.00045149086500374585, "loss": 3.2513, "step": 20293 }, { "epoch": 0.99, "grad_norm": 0.5025012493133545, "learning_rate": 0.0004514775757802161, "loss": 3.2112, "step": 20294 }, { "epoch": 0.99, "grad_norm": 0.5353289246559143, "learning_rate": 0.0004514642861577236, "loss": 3.0389, "step": 20295 }, { "epoch": 0.99, "grad_norm": 0.5243365168571472, "learning_rate": 0.0004514509961363033, "loss": 3.0729, "step": 20296 }, { "epoch": 0.99, "grad_norm": 0.5028917789459229, "learning_rate": 0.0004514377057159902, "loss": 3.1696, "step": 20297 }, { "epoch": 0.99, "grad_norm": 0.5022268891334534, "learning_rate": 0.0004514244148968194, "loss": 2.985, "step": 20298 }, { "epoch": 0.99, "grad_norm": 0.5356721878051758, "learning_rate": 0.00045141112367882573, "loss": 3.148, "step": 20299 }, { "epoch": 0.99, "grad_norm": 0.5337976217269897, "learning_rate": 0.0004513978320620443, "loss": 2.9342, "step": 20300 }, { "epoch": 0.99, "grad_norm": 0.5110748410224915, "learning_rate": 0.00045138454004651016, "loss": 3.2471, "step": 20301 }, { "epoch": 0.99, "grad_norm": 0.5441080331802368, "learning_rate": 0.00045137124763225834, "loss": 3.0728, "step": 20302 }, { "epoch": 1.0, "grad_norm": 0.5328307747840881, "learning_rate": 0.00045135795481932375, "loss": 3.1303, "step": 20303 }, { "epoch": 1.0, "grad_norm": 0.5662921071052551, "learning_rate": 0.00045134466160774136, "loss": 3.2292, "step": 20304 }, { "epoch": 1.0, "grad_norm": 0.5180608034133911, "learning_rate": 0.0004513313679975463, "loss": 3.303, "step": 20305 }, { "epoch": 1.0, "grad_norm": 0.5455940961837769, "learning_rate": 0.0004513180739887736, "loss": 3.3352, "step": 20306 }, { "epoch": 1.0, "grad_norm": 0.5291252732276917, "learning_rate": 0.00045130477958145817, "loss": 3.2022, "step": 20307 }, { "epoch": 1.0, "grad_norm": 0.5131491422653198, "learning_rate": 0.00045129148477563504, "loss": 3.1474, "step": 20308 }, { "epoch": 1.0, "grad_norm": 0.5317510962486267, "learning_rate": 0.0004512781895713393, "loss": 3.1086, "step": 20309 }, { "epoch": 1.0, "grad_norm": 0.5623073577880859, "learning_rate": 0.0004512648939686059, "loss": 3.0699, "step": 20310 }, { "epoch": 1.0, "grad_norm": 0.5236507654190063, "learning_rate": 0.0004512515979674698, "loss": 2.855, "step": 20311 }, { "epoch": 1.0, "grad_norm": 0.5638948082923889, "learning_rate": 0.00045123830156796626, "loss": 2.9894, "step": 20312 }, { "epoch": 1.0, "grad_norm": 0.515745222568512, "learning_rate": 0.0004512250047701301, "loss": 3.3086, "step": 20313 }, { "epoch": 1.0, "grad_norm": 0.5643942952156067, "learning_rate": 0.0004512117075739963, "loss": 3.1348, "step": 20314 }, { "epoch": 1.0, "grad_norm": 0.5207350850105286, "learning_rate": 0.0004511984099796, "loss": 3.3207, "step": 20315 }, { "epoch": 1.0, "grad_norm": 0.5619992017745972, "learning_rate": 0.0004511851119869762, "loss": 3.3062, "step": 20316 }, { "epoch": 1.0, "grad_norm": 0.5149810910224915, "learning_rate": 0.00045117181359615995, "loss": 3.1298, "step": 20317 }, { "epoch": 1.0, "grad_norm": 0.528748095035553, "learning_rate": 0.00045115851480718625, "loss": 3.1774, "step": 20318 }, { "epoch": 1.0, "grad_norm": 0.5054760575294495, "learning_rate": 0.00045114521562008994, "loss": 3.1187, "step": 20319 }, { "epoch": 1.0, "grad_norm": 0.5329738259315491, "learning_rate": 0.00045113191603490636, "loss": 3.0029, "step": 20320 }, { "epoch": 1.0, "grad_norm": 0.5501445531845093, "learning_rate": 0.0004511186160516704, "loss": 3.3807, "step": 20321 }, { "epoch": 1.0, "grad_norm": 0.5410068035125732, "learning_rate": 0.00045110531567041715, "loss": 3.0491, "step": 20322 }, { "epoch": 1.0, "grad_norm": 0.5346465110778809, "learning_rate": 0.0004510920148911814, "loss": 3.1324, "step": 20323 }, { "epoch": 1.0, "grad_norm": 0.5548122525215149, "learning_rate": 0.00045107871371399854, "loss": 3.0366, "step": 20324 }, { "epoch": 1.0, "grad_norm": 0.5406346917152405, "learning_rate": 0.0004510654121389033, "loss": 3.353, "step": 20325 }, { "epoch": 1.0, "grad_norm": 0.5303011536598206, "learning_rate": 0.0004510521101659308, "loss": 3.1889, "step": 20326 }, { "epoch": 1.0, "grad_norm": 0.5354381203651428, "learning_rate": 0.0004510388077951163, "loss": 3.0421, "step": 20327 }, { "epoch": 1.0, "grad_norm": 0.5239824056625366, "learning_rate": 0.0004510255050264945, "loss": 3.2569, "step": 20328 }, { "epoch": 1.0, "grad_norm": 0.5124732255935669, "learning_rate": 0.00045101220186010056, "loss": 3.2477, "step": 20329 }, { "epoch": 1.0, "grad_norm": 0.528676450252533, "learning_rate": 0.00045099889829596965, "loss": 3.027, "step": 20330 }, { "epoch": 1.0, "grad_norm": 0.5336599349975586, "learning_rate": 0.00045098559433413675, "loss": 2.9263, "step": 20331 }, { "epoch": 1.0, "grad_norm": 0.5987780094146729, "learning_rate": 0.00045097228997463676, "loss": 3.0757, "step": 20332 }, { "epoch": 1.0, "grad_norm": 0.5460469722747803, "learning_rate": 0.00045095898521750475, "loss": 2.9283, "step": 20333 }, { "epoch": 1.0, "grad_norm": 0.5135546326637268, "learning_rate": 0.0004509456800627759, "loss": 3.0009, "step": 20334 }, { "epoch": 1.0, "grad_norm": 0.5272367596626282, "learning_rate": 0.0004509323745104852, "loss": 3.1307, "step": 20335 }, { "epoch": 1.0, "grad_norm": 0.5386490225791931, "learning_rate": 0.00045091906856066765, "loss": 3.2579, "step": 20336 }, { "epoch": 1.0, "grad_norm": 0.54600989818573, "learning_rate": 0.00045090576221335833, "loss": 3.3436, "step": 20337 }, { "epoch": 1.0, "grad_norm": 0.5243917107582092, "learning_rate": 0.00045089245546859225, "loss": 3.1057, "step": 20338 }, { "epoch": 1.0, "grad_norm": 0.5199042558670044, "learning_rate": 0.0004508791483264045, "loss": 3.1192, "step": 20339 }, { "epoch": 1.0, "grad_norm": 0.5423737168312073, "learning_rate": 0.0004508658407868301, "loss": 3.2666, "step": 20340 }, { "epoch": 1.0, "grad_norm": 0.592741847038269, "learning_rate": 0.000450852532849904, "loss": 2.9049, "step": 20341 }, { "epoch": 1.0, "grad_norm": 0.5735978484153748, "learning_rate": 0.0004508392245156615, "loss": 3.1306, "step": 20342 }, { "epoch": 1.0, "grad_norm": 0.5209268927574158, "learning_rate": 0.0004508259157841375, "loss": 3.3385, "step": 20343 }, { "epoch": 1.0, "grad_norm": 0.5497926473617554, "learning_rate": 0.0004508126066553671, "loss": 3.0599, "step": 20344 }, { "epoch": 1.0, "grad_norm": 0.5306968092918396, "learning_rate": 0.0004507992971293852, "loss": 3.067, "step": 20345 }, { "epoch": 1.0, "grad_norm": 0.5316476821899414, "learning_rate": 0.00045078598720622707, "loss": 3.1147, "step": 20346 }, { "epoch": 1.0, "grad_norm": 0.528810977935791, "learning_rate": 0.0004507726768859277, "loss": 3.0775, "step": 20347 }, { "epoch": 1.0, "grad_norm": 0.5107991099357605, "learning_rate": 0.00045075936616852206, "loss": 3.1226, "step": 20348 }, { "epoch": 1.0, "grad_norm": 0.513142466545105, "learning_rate": 0.0004507460550540452, "loss": 3.1823, "step": 20349 }, { "epoch": 1.0, "grad_norm": 0.6255738735198975, "learning_rate": 0.0004507327435425323, "loss": 3.0949, "step": 20350 }, { "epoch": 1.0, "grad_norm": 0.5634238719940186, "learning_rate": 0.00045071943163401833, "loss": 2.9638, "step": 20351 }, { "epoch": 1.0, "grad_norm": 0.5344851016998291, "learning_rate": 0.0004507061193285384, "loss": 3.1896, "step": 20352 }, { "epoch": 1.0, "grad_norm": 0.5088251233100891, "learning_rate": 0.00045069280662612764, "loss": 3.0654, "step": 20353 }, { "epoch": 1.0, "grad_norm": 0.5598111748695374, "learning_rate": 0.00045067949352682097, "loss": 3.1859, "step": 20354 }, { "epoch": 1.0, "grad_norm": 0.5614855289459229, "learning_rate": 0.00045066618003065343, "loss": 3.1514, "step": 20355 }, { "epoch": 1.0, "grad_norm": 0.5441474318504333, "learning_rate": 0.0004506528661376603, "loss": 3.2191, "step": 20356 }, { "epoch": 1.0, "grad_norm": 0.5195251107215881, "learning_rate": 0.0004506395518478765, "loss": 3.0811, "step": 20357 }, { "epoch": 1.0, "grad_norm": 0.5556178092956543, "learning_rate": 0.00045062623716133704, "loss": 3.1629, "step": 20358 }, { "epoch": 1.0, "grad_norm": 0.5472621917724609, "learning_rate": 0.00045061292207807706, "loss": 3.049, "step": 20359 }, { "epoch": 1.0, "grad_norm": 0.5794954299926758, "learning_rate": 0.00045059960659813163, "loss": 3.0257, "step": 20360 }, { "epoch": 1.0, "grad_norm": 0.5246169567108154, "learning_rate": 0.0004505862907215359, "loss": 3.2008, "step": 20361 }, { "epoch": 1.0, "grad_norm": 0.6028974056243896, "learning_rate": 0.0004505729744483248, "loss": 3.1549, "step": 20362 }, { "epoch": 1.0, "grad_norm": 0.5344709753990173, "learning_rate": 0.0004505596577785334, "loss": 3.1225, "step": 20363 }, { "epoch": 1.0, "grad_norm": 0.49651411175727844, "learning_rate": 0.000450546340712197, "loss": 3.1713, "step": 20364 }, { "epoch": 1.0, "grad_norm": 0.5115389823913574, "learning_rate": 0.0004505330232493504, "loss": 3.1379, "step": 20365 }, { "epoch": 1.0, "grad_norm": 0.5067347288131714, "learning_rate": 0.00045051970539002875, "loss": 3.1478, "step": 20366 }, { "epoch": 1.0, "grad_norm": 0.583716630935669, "learning_rate": 0.00045050638713426723, "loss": 3.0329, "step": 20367 }, { "epoch": 1.0, "grad_norm": 0.5057085156440735, "learning_rate": 0.0004504930684821009, "loss": 3.1614, "step": 20368 }, { "epoch": 1.0, "grad_norm": 0.5527999401092529, "learning_rate": 0.00045047974943356473, "loss": 3.1671, "step": 20369 }, { "epoch": 1.0, "grad_norm": 0.549720287322998, "learning_rate": 0.00045046642998869385, "loss": 3.1481, "step": 20370 }, { "epoch": 1.0, "grad_norm": 0.5233119130134583, "learning_rate": 0.0004504531101475234, "loss": 3.2703, "step": 20371 }, { "epoch": 1.0, "grad_norm": 0.5394256711006165, "learning_rate": 0.0004504397899100883, "loss": 3.0763, "step": 20372 }, { "epoch": 1.0, "grad_norm": 0.5158621668815613, "learning_rate": 0.0004504264692764239, "loss": 3.3617, "step": 20373 }, { "epoch": 1.0, "grad_norm": 0.5600789189338684, "learning_rate": 0.000450413148246565, "loss": 3.1524, "step": 20374 }, { "epoch": 1.0, "grad_norm": 0.5463529825210571, "learning_rate": 0.00045039982682054696, "loss": 3.378, "step": 20375 }, { "epoch": 1.0, "grad_norm": 0.48953327536582947, "learning_rate": 0.0004503865049984045, "loss": 3.0364, "step": 20376 }, { "epoch": 1.0, "grad_norm": 0.5219492316246033, "learning_rate": 0.0004503731827801731, "loss": 3.0698, "step": 20377 }, { "epoch": 1.0, "grad_norm": 0.557974636554718, "learning_rate": 0.0004503598601658877, "loss": 3.2128, "step": 20378 }, { "epoch": 1.0, "grad_norm": 0.5604296922683716, "learning_rate": 0.0004503465371555833, "loss": 2.9745, "step": 20379 }, { "epoch": 1.0, "grad_norm": 0.5162230730056763, "learning_rate": 0.0004503332137492951, "loss": 3.0853, "step": 20380 }, { "epoch": 1.0, "grad_norm": 0.5778909921646118, "learning_rate": 0.00045031988994705796, "loss": 3.2235, "step": 20381 }, { "epoch": 1.0, "grad_norm": 0.5140716433525085, "learning_rate": 0.00045030656574890745, "loss": 3.2436, "step": 20382 }, { "epoch": 1.0, "grad_norm": 0.5335753560066223, "learning_rate": 0.0004502932411548781, "loss": 3.1531, "step": 20383 }, { "epoch": 1.0, "grad_norm": 0.5693366527557373, "learning_rate": 0.00045027991616500545, "loss": 3.3253, "step": 20384 }, { "epoch": 1.0, "grad_norm": 0.5139354467391968, "learning_rate": 0.0004502665907793244, "loss": 2.9069, "step": 20385 }, { "epoch": 1.0, "grad_norm": 0.49556657671928406, "learning_rate": 0.00045025326499787007, "loss": 3.2041, "step": 20386 }, { "epoch": 1.0, "grad_norm": 0.513113796710968, "learning_rate": 0.0004502399388206775, "loss": 3.2202, "step": 20387 }, { "epoch": 1.0, "grad_norm": 0.5639035701751709, "learning_rate": 0.0004502266122477819, "loss": 3.2123, "step": 20388 }, { "epoch": 1.0, "grad_norm": 0.5301312208175659, "learning_rate": 0.00045021328527921825, "loss": 3.1671, "step": 20389 }, { "epoch": 1.0, "grad_norm": 0.5456419587135315, "learning_rate": 0.00045019995791502175, "loss": 3.1206, "step": 20390 }, { "epoch": 1.0, "grad_norm": 0.5344980359077454, "learning_rate": 0.00045018663015522747, "loss": 3.1698, "step": 20391 }, { "epoch": 1.0, "grad_norm": 0.5286158919334412, "learning_rate": 0.00045017330199987053, "loss": 3.1031, "step": 20392 }, { "epoch": 1.0, "grad_norm": 0.5467382073402405, "learning_rate": 0.000450159973448986, "loss": 3.0543, "step": 20393 }, { "epoch": 1.0, "grad_norm": 0.5618236064910889, "learning_rate": 0.000450146644502609, "loss": 3.0087, "step": 20394 }, { "epoch": 1.0, "grad_norm": 0.5205358266830444, "learning_rate": 0.0004501333151607747, "loss": 3.2562, "step": 20395 }, { "epoch": 1.0, "grad_norm": 0.49830591678619385, "learning_rate": 0.0004501199854235181, "loss": 3.0565, "step": 20396 }, { "epoch": 1.0, "grad_norm": 0.5011833310127258, "learning_rate": 0.0004501066552908743, "loss": 3.174, "step": 20397 }, { "epoch": 1.0, "grad_norm": 0.5207628607749939, "learning_rate": 0.00045009332476287847, "loss": 3.16, "step": 20398 }, { "epoch": 1.0, "grad_norm": 0.5745342969894409, "learning_rate": 0.00045007999383956564, "loss": 3.434, "step": 20399 }, { "epoch": 1.0, "grad_norm": 0.5692901015281677, "learning_rate": 0.00045006666252097113, "loss": 3.283, "step": 20400 }, { "epoch": 1.0, "grad_norm": 0.47566521167755127, "learning_rate": 0.00045005333080712985, "loss": 3.0213, "step": 20401 }, { "epoch": 1.0, "grad_norm": 0.5313223004341125, "learning_rate": 0.000450039998698077, "loss": 3.1536, "step": 20402 }, { "epoch": 1.0, "grad_norm": 0.5201346278190613, "learning_rate": 0.0004500266661938476, "loss": 3.1187, "step": 20403 }, { "epoch": 1.0, "grad_norm": 0.5419844388961792, "learning_rate": 0.000450013333294477, "loss": 3.0795, "step": 20404 }, { "epoch": 1.0, "grad_norm": 0.6561049222946167, "learning_rate": 0.00045, "loss": 3.1308, "step": 20405 }, { "epoch": 1.0, "grad_norm": 0.5238326191902161, "learning_rate": 0.00044998666631045184, "loss": 3.3698, "step": 20406 }, { "epoch": 1.0, "grad_norm": 0.5477263927459717, "learning_rate": 0.0004499733322258678, "loss": 3.1224, "step": 20407 }, { "epoch": 1.0, "grad_norm": 0.5472010374069214, "learning_rate": 0.0004499599977462828, "loss": 2.8647, "step": 20408 }, { "epoch": 1.0, "grad_norm": 0.5201596021652222, "learning_rate": 0.00044994666287173196, "loss": 3.1365, "step": 20409 }, { "epoch": 1.0, "grad_norm": 0.5760728716850281, "learning_rate": 0.0004499333276022506, "loss": 2.9512, "step": 20410 }, { "epoch": 1.0, "grad_norm": 0.5273072719573975, "learning_rate": 0.0004499199919378736, "loss": 2.9243, "step": 20411 }, { "epoch": 1.0, "grad_norm": 0.5336431860923767, "learning_rate": 0.0004499066558786362, "loss": 3.16, "step": 20412 }, { "epoch": 1.0, "grad_norm": 0.5312026739120483, "learning_rate": 0.0004498933194245735, "loss": 3.1535, "step": 20413 }, { "epoch": 1.0, "grad_norm": 0.5205845832824707, "learning_rate": 0.00044987998257572075, "loss": 3.1798, "step": 20414 }, { "epoch": 1.0, "grad_norm": 0.5186797380447388, "learning_rate": 0.000449866645332113, "loss": 2.9815, "step": 20415 }, { "epoch": 1.0, "grad_norm": 0.5282081365585327, "learning_rate": 0.0004498533076937852, "loss": 3.1629, "step": 20416 }, { "epoch": 1.0, "grad_norm": 0.501063346862793, "learning_rate": 0.00044983996966077263, "loss": 3.1123, "step": 20417 }, { "epoch": 1.0, "grad_norm": 0.5373258590698242, "learning_rate": 0.0004498266312331106, "loss": 3.0061, "step": 20418 }, { "epoch": 1.0, "grad_norm": 0.5097174048423767, "learning_rate": 0.0004498132924108339, "loss": 3.1618, "step": 20419 }, { "epoch": 1.0, "grad_norm": 0.4925655126571655, "learning_rate": 0.00044979995319397787, "loss": 3.2096, "step": 20420 }, { "epoch": 1.0, "grad_norm": 0.5566980242729187, "learning_rate": 0.0004497866135825776, "loss": 2.9665, "step": 20421 }, { "epoch": 1.0, "grad_norm": 0.62166428565979, "learning_rate": 0.00044977327357666815, "loss": 3.0511, "step": 20422 }, { "epoch": 1.0, "grad_norm": 0.49404507875442505, "learning_rate": 0.00044975993317628477, "loss": 3.1312, "step": 20423 }, { "epoch": 1.0, "grad_norm": 0.5426779985427856, "learning_rate": 0.00044974659238146257, "loss": 3.3202, "step": 20424 }, { "epoch": 1.0, "grad_norm": 0.549683690071106, "learning_rate": 0.0004497332511922366, "loss": 3.1595, "step": 20425 }, { "epoch": 1.0, "grad_norm": 0.5156567692756653, "learning_rate": 0.0004497199096086421, "loss": 3.1343, "step": 20426 }, { "epoch": 1.0, "grad_norm": 0.5068504810333252, "learning_rate": 0.0004497065676307142, "loss": 3.0144, "step": 20427 }, { "epoch": 1.0, "grad_norm": 0.5446821451187134, "learning_rate": 0.00044969322525848795, "loss": 3.1761, "step": 20428 }, { "epoch": 1.0, "grad_norm": 0.5488018989562988, "learning_rate": 0.00044967988249199867, "loss": 3.261, "step": 20429 }, { "epoch": 1.0, "grad_norm": 0.513907790184021, "learning_rate": 0.0004496665393312813, "loss": 3.1929, "step": 20430 }, { "epoch": 1.0, "grad_norm": 0.5646592974662781, "learning_rate": 0.0004496531957763711, "loss": 3.0872, "step": 20431 }, { "epoch": 1.0, "grad_norm": 0.487352192401886, "learning_rate": 0.0004496398518273031, "loss": 3.044, "step": 20432 }, { "epoch": 1.0, "grad_norm": 0.48989337682724, "learning_rate": 0.00044962650748411263, "loss": 3.0768, "step": 20433 }, { "epoch": 1.0, "grad_norm": 0.8140915036201477, "learning_rate": 0.0004496131627468347, "loss": 3.0274, "step": 20434 }, { "epoch": 1.0, "grad_norm": 0.5698384046554565, "learning_rate": 0.0004495998176155045, "loss": 3.1436, "step": 20435 }, { "epoch": 1.0, "grad_norm": 0.5074122548103333, "learning_rate": 0.00044958647209015714, "loss": 3.1786, "step": 20436 }, { "epoch": 1.0, "grad_norm": 0.5354686379432678, "learning_rate": 0.0004495731261708278, "loss": 3.1297, "step": 20437 }, { "epoch": 1.0, "grad_norm": 0.5391170382499695, "learning_rate": 0.0004495597798575517, "loss": 3.0141, "step": 20438 }, { "epoch": 1.0, "grad_norm": 0.5479790568351746, "learning_rate": 0.0004495464331503638, "loss": 2.9269, "step": 20439 }, { "epoch": 1.0, "grad_norm": 0.5683173537254333, "learning_rate": 0.00044953308604929953, "loss": 3.09, "step": 20440 }, { "epoch": 1.0, "grad_norm": 0.5525175333023071, "learning_rate": 0.0004495197385543938, "loss": 3.1034, "step": 20441 }, { "epoch": 1.0, "grad_norm": 0.5185175538063049, "learning_rate": 0.0004495063906656818, "loss": 3.2782, "step": 20442 }, { "epoch": 1.0, "grad_norm": 0.5282908082008362, "learning_rate": 0.0004494930423831988, "loss": 3.0233, "step": 20443 }, { "epoch": 1.0, "grad_norm": 0.5449439883232117, "learning_rate": 0.00044947969370698, "loss": 3.0911, "step": 20444 }, { "epoch": 1.0, "grad_norm": 0.5355467796325684, "learning_rate": 0.00044946634463706035, "loss": 3.019, "step": 20445 }, { "epoch": 1.0, "grad_norm": 0.514240562915802, "learning_rate": 0.00044945299517347507, "loss": 3.3083, "step": 20446 }, { "epoch": 1.0, "grad_norm": 0.5259093642234802, "learning_rate": 0.00044943964531625946, "loss": 3.0219, "step": 20447 }, { "epoch": 1.0, "grad_norm": 0.5237060189247131, "learning_rate": 0.00044942629506544853, "loss": 3.2387, "step": 20448 }, { "epoch": 1.0, "grad_norm": 0.518451988697052, "learning_rate": 0.00044941294442107753, "loss": 3.0307, "step": 20449 }, { "epoch": 1.0, "grad_norm": 0.518038272857666, "learning_rate": 0.0004493995933831815, "loss": 3.093, "step": 20450 }, { "epoch": 1.0, "grad_norm": 0.5178418159484863, "learning_rate": 0.00044938624195179586, "loss": 3.059, "step": 20451 }, { "epoch": 1.0, "grad_norm": 0.5047763586044312, "learning_rate": 0.00044937289012695545, "loss": 3.1335, "step": 20452 }, { "epoch": 1.0, "grad_norm": 0.5251728892326355, "learning_rate": 0.0004493595379086956, "loss": 3.0702, "step": 20453 }, { "epoch": 1.0, "grad_norm": 0.5640724301338196, "learning_rate": 0.0004493461852970515, "loss": 3.1382, "step": 20454 }, { "epoch": 1.0, "grad_norm": 0.5112975835800171, "learning_rate": 0.0004493328322920584, "loss": 3.2491, "step": 20455 }, { "epoch": 1.0, "grad_norm": 0.5223793983459473, "learning_rate": 0.00044931947889375126, "loss": 3.0409, "step": 20456 }, { "epoch": 1.0, "grad_norm": 0.5379955172538757, "learning_rate": 0.00044930612510216535, "loss": 3.2273, "step": 20457 }, { "epoch": 1.0, "grad_norm": 0.49986040592193604, "learning_rate": 0.0004492927709173359, "loss": 3.0213, "step": 20458 }, { "epoch": 1.0, "grad_norm": 0.5016982555389404, "learning_rate": 0.00044927941633929796, "loss": 3.1779, "step": 20459 }, { "epoch": 1.0, "grad_norm": 0.5460448265075684, "learning_rate": 0.00044926606136808675, "loss": 3.0865, "step": 20460 }, { "epoch": 1.0, "grad_norm": 0.5995565056800842, "learning_rate": 0.00044925270600373754, "loss": 3.0536, "step": 20461 }, { "epoch": 1.0, "grad_norm": 0.5643872618675232, "learning_rate": 0.0004492393502462854, "loss": 3.1441, "step": 20462 }, { "epoch": 1.0, "grad_norm": 0.5247363448143005, "learning_rate": 0.00044922599409576543, "loss": 3.1199, "step": 20463 }, { "epoch": 1.0, "grad_norm": 0.5810442566871643, "learning_rate": 0.000449212637552213, "loss": 3.1335, "step": 20464 }, { "epoch": 1.0, "grad_norm": 0.5455927848815918, "learning_rate": 0.0004491992806156632, "loss": 2.9627, "step": 20465 }, { "epoch": 1.0, "grad_norm": 0.5521914958953857, "learning_rate": 0.00044918592328615126, "loss": 3.1621, "step": 20466 }, { "epoch": 1.0, "grad_norm": 0.5023887753486633, "learning_rate": 0.00044917256556371225, "loss": 2.9914, "step": 20467 }, { "epoch": 1.0, "grad_norm": 0.5143914222717285, "learning_rate": 0.00044915920744838136, "loss": 3.181, "step": 20468 }, { "epoch": 1.0, "grad_norm": 0.5196571350097656, "learning_rate": 0.000449145848940194, "loss": 3.1984, "step": 20469 }, { "epoch": 1.0, "grad_norm": 0.5121498107910156, "learning_rate": 0.00044913249003918505, "loss": 3.0403, "step": 20470 }, { "epoch": 1.0, "grad_norm": 0.5418211817741394, "learning_rate": 0.00044911913074538977, "loss": 3.0662, "step": 20471 }, { "epoch": 1.0, "grad_norm": 0.5008609890937805, "learning_rate": 0.00044910577105884345, "loss": 3.0714, "step": 20472 }, { "epoch": 1.0, "grad_norm": 0.4887140393257141, "learning_rate": 0.00044909241097958126, "loss": 3.1298, "step": 20473 }, { "epoch": 1.0, "grad_norm": 0.5122173428535461, "learning_rate": 0.00044907905050763834, "loss": 3.1986, "step": 20474 }, { "epoch": 1.0, "grad_norm": 0.5146946907043457, "learning_rate": 0.0004490656896430498, "loss": 3.0118, "step": 20475 }, { "epoch": 1.0, "grad_norm": 0.5460460782051086, "learning_rate": 0.00044905232838585103, "loss": 3.177, "step": 20476 }, { "epoch": 1.0, "grad_norm": 0.5463166832923889, "learning_rate": 0.0004490389667360771, "loss": 3.2155, "step": 20477 }, { "epoch": 1.0, "grad_norm": 0.5182934403419495, "learning_rate": 0.00044902560469376314, "loss": 3.1258, "step": 20478 }, { "epoch": 1.0, "grad_norm": 0.5243579149246216, "learning_rate": 0.0004490122422589445, "loss": 3.1593, "step": 20479 }, { "epoch": 1.0, "grad_norm": 0.5343524813652039, "learning_rate": 0.00044899887943165634, "loss": 3.1276, "step": 20480 }, { "epoch": 1.0, "grad_norm": 0.4999130368232727, "learning_rate": 0.0004489855162119337, "loss": 3.2389, "step": 20481 }, { "epoch": 1.0, "grad_norm": 0.5230382680892944, "learning_rate": 0.0004489721525998119, "loss": 3.0518, "step": 20482 }, { "epoch": 1.0, "grad_norm": 0.516069769859314, "learning_rate": 0.0004489587885953261, "loss": 3.1985, "step": 20483 }, { "epoch": 1.0, "grad_norm": 0.5170885324478149, "learning_rate": 0.0004489454241985116, "loss": 3.0926, "step": 20484 }, { "epoch": 1.0, "grad_norm": 0.5558387041091919, "learning_rate": 0.0004489320594094035, "loss": 3.0827, "step": 20485 }, { "epoch": 1.0, "grad_norm": 0.5676001906394958, "learning_rate": 0.000448918694228037, "loss": 3.0072, "step": 20486 }, { "epoch": 1.0, "grad_norm": 0.552216112613678, "learning_rate": 0.00044890532865444723, "loss": 3.1179, "step": 20487 }, { "epoch": 1.0, "grad_norm": 0.5196042656898499, "learning_rate": 0.0004488919626886696, "loss": 3.1181, "step": 20488 }, { "epoch": 1.0, "grad_norm": 0.5262749791145325, "learning_rate": 0.0004488785963307391, "loss": 3.0952, "step": 20489 }, { "epoch": 1.0, "grad_norm": 0.5152573585510254, "learning_rate": 0.0004488652295806911, "loss": 2.9795, "step": 20490 }, { "epoch": 1.0, "grad_norm": 0.5612766742706299, "learning_rate": 0.0004488518624385608, "loss": 3.1665, "step": 20491 }, { "epoch": 1.0, "grad_norm": 0.5625437498092651, "learning_rate": 0.0004488384949043833, "loss": 3.3981, "step": 20492 }, { "epoch": 1.0, "grad_norm": 0.5344268679618835, "learning_rate": 0.00044882512697819383, "loss": 3.0059, "step": 20493 }, { "epoch": 1.0, "grad_norm": 0.5187119841575623, "learning_rate": 0.0004488117586600275, "loss": 3.1045, "step": 20494 }, { "epoch": 1.0, "grad_norm": 0.5119081139564514, "learning_rate": 0.0004487983899499198, "loss": 3.1185, "step": 20495 }, { "epoch": 1.0, "grad_norm": 0.5100210905075073, "learning_rate": 0.00044878502084790564, "loss": 3.0568, "step": 20496 }, { "epoch": 1.0, "grad_norm": 0.5492945313453674, "learning_rate": 0.0004487716513540205, "loss": 3.2403, "step": 20497 }, { "epoch": 1.0, "grad_norm": 0.5489396452903748, "learning_rate": 0.0004487582814682994, "loss": 2.8496, "step": 20498 }, { "epoch": 1.0, "grad_norm": 0.5205833315849304, "learning_rate": 0.0004487449111907776, "loss": 2.8851, "step": 20499 }, { "epoch": 1.0, "grad_norm": 0.5621615648269653, "learning_rate": 0.0004487315405214903, "loss": 2.9772, "step": 20500 }, { "epoch": 1.0, "grad_norm": 0.5375982522964478, "learning_rate": 0.00044871816946047286, "loss": 3.1913, "step": 20501 }, { "epoch": 1.0, "grad_norm": 0.5279413461685181, "learning_rate": 0.0004487047980077604, "loss": 3.0989, "step": 20502 }, { "epoch": 1.0, "grad_norm": 0.5290868282318115, "learning_rate": 0.00044869142616338803, "loss": 3.0419, "step": 20503 }, { "epoch": 1.0, "grad_norm": 0.5295721292495728, "learning_rate": 0.00044867805392739097, "loss": 3.1698, "step": 20504 }, { "epoch": 1.0, "grad_norm": 0.531470000743866, "learning_rate": 0.00044866468129980464, "loss": 2.8806, "step": 20505 }, { "epoch": 1.0, "grad_norm": 0.5461409091949463, "learning_rate": 0.0004486513082806642, "loss": 3.332, "step": 20506 }, { "epoch": 1.0, "grad_norm": 0.5531258583068848, "learning_rate": 0.00044863793487000475, "loss": 3.2661, "step": 20507 }, { "epoch": 1.01, "grad_norm": 0.5420560240745544, "learning_rate": 0.00044862456106786166, "loss": 3.0748, "step": 20508 }, { "epoch": 1.01, "grad_norm": 0.5236428380012512, "learning_rate": 0.00044861118687427, "loss": 3.0693, "step": 20509 }, { "epoch": 1.01, "grad_norm": 0.5320883393287659, "learning_rate": 0.00044859781228926505, "loss": 3.2626, "step": 20510 }, { "epoch": 1.01, "grad_norm": 0.5048290491104126, "learning_rate": 0.0004485844373128821, "loss": 3.0668, "step": 20511 }, { "epoch": 1.01, "grad_norm": 0.5460326075553894, "learning_rate": 0.00044857106194515635, "loss": 3.1908, "step": 20512 }, { "epoch": 1.01, "grad_norm": 0.4952123463153839, "learning_rate": 0.0004485576861861231, "loss": 3.0871, "step": 20513 }, { "epoch": 1.01, "grad_norm": 0.5370456576347351, "learning_rate": 0.0004485443100358173, "loss": 3.0149, "step": 20514 }, { "epoch": 1.01, "grad_norm": 0.5347251892089844, "learning_rate": 0.0004485309334942745, "loss": 3.1556, "step": 20515 }, { "epoch": 1.01, "grad_norm": 0.5180676579475403, "learning_rate": 0.0004485175565615298, "loss": 3.0769, "step": 20516 }, { "epoch": 1.01, "grad_norm": 0.5209394097328186, "learning_rate": 0.0004485041792376184, "loss": 3.2223, "step": 20517 }, { "epoch": 1.01, "grad_norm": 0.5076389908790588, "learning_rate": 0.00044849080152257564, "loss": 3.1164, "step": 20518 }, { "epoch": 1.01, "grad_norm": 0.5367461442947388, "learning_rate": 0.00044847742341643654, "loss": 3.1111, "step": 20519 }, { "epoch": 1.01, "grad_norm": 0.5419523119926453, "learning_rate": 0.0004484640449192367, "loss": 2.9594, "step": 20520 }, { "epoch": 1.01, "grad_norm": 0.5312870144844055, "learning_rate": 0.00044845066603101103, "loss": 3.0924, "step": 20521 }, { "epoch": 1.01, "grad_norm": 0.5425320863723755, "learning_rate": 0.0004484372867517948, "loss": 3.1313, "step": 20522 }, { "epoch": 1.01, "grad_norm": 0.5226745009422302, "learning_rate": 0.00044842390708162345, "loss": 3.1729, "step": 20523 }, { "epoch": 1.01, "grad_norm": 0.5791988968849182, "learning_rate": 0.000448410527020532, "loss": 2.9848, "step": 20524 }, { "epoch": 1.01, "grad_norm": 0.5693748593330383, "learning_rate": 0.0004483971465685558, "loss": 3.1733, "step": 20525 }, { "epoch": 1.01, "grad_norm": 0.5362666249275208, "learning_rate": 0.0004483837657257301, "loss": 3.0081, "step": 20526 }, { "epoch": 1.01, "grad_norm": 0.5347479581832886, "learning_rate": 0.0004483703844920901, "loss": 3.0189, "step": 20527 }, { "epoch": 1.01, "grad_norm": 0.529528796672821, "learning_rate": 0.00044835700286767114, "loss": 2.9859, "step": 20528 }, { "epoch": 1.01, "grad_norm": 0.5108277797698975, "learning_rate": 0.0004483436208525083, "loss": 3.1131, "step": 20529 }, { "epoch": 1.01, "grad_norm": 0.5495550632476807, "learning_rate": 0.00044833023844663693, "loss": 3.2628, "step": 20530 }, { "epoch": 1.01, "grad_norm": 0.5666603446006775, "learning_rate": 0.0004483168556500922, "loss": 3.1046, "step": 20531 }, { "epoch": 1.01, "grad_norm": 0.5286827683448792, "learning_rate": 0.00044830347246290956, "loss": 2.8698, "step": 20532 }, { "epoch": 1.01, "grad_norm": 0.5373377203941345, "learning_rate": 0.000448290088885124, "loss": 2.9937, "step": 20533 }, { "epoch": 1.01, "grad_norm": 0.5299882292747498, "learning_rate": 0.00044827670491677095, "loss": 3.0146, "step": 20534 }, { "epoch": 1.01, "grad_norm": 0.5432134866714478, "learning_rate": 0.00044826332055788553, "loss": 3.3426, "step": 20535 }, { "epoch": 1.01, "grad_norm": 0.5193071961402893, "learning_rate": 0.00044824993580850313, "loss": 2.8983, "step": 20536 }, { "epoch": 1.01, "grad_norm": 0.5308823585510254, "learning_rate": 0.00044823655066865886, "loss": 3.3115, "step": 20537 }, { "epoch": 1.01, "grad_norm": 0.5253787040710449, "learning_rate": 0.0004482231651383881, "loss": 3.2114, "step": 20538 }, { "epoch": 1.01, "grad_norm": 0.583215594291687, "learning_rate": 0.000448209779217726, "loss": 3.1361, "step": 20539 }, { "epoch": 1.01, "grad_norm": 0.5469831824302673, "learning_rate": 0.0004481963929067078, "loss": 3.1252, "step": 20540 }, { "epoch": 1.01, "grad_norm": 0.5351081490516663, "learning_rate": 0.000448183006205369, "loss": 3.0161, "step": 20541 }, { "epoch": 1.01, "grad_norm": 0.4953567385673523, "learning_rate": 0.00044816961911374464, "loss": 3.3067, "step": 20542 }, { "epoch": 1.01, "grad_norm": 0.5496410727500916, "learning_rate": 0.00044815623163186994, "loss": 3.1639, "step": 20543 }, { "epoch": 1.01, "grad_norm": 0.5172861218452454, "learning_rate": 0.0004481428437597803, "loss": 2.8968, "step": 20544 }, { "epoch": 1.01, "grad_norm": 0.5500335097312927, "learning_rate": 0.0004481294554975108, "loss": 3.1042, "step": 20545 }, { "epoch": 1.01, "grad_norm": 0.5287953019142151, "learning_rate": 0.000448116066845097, "loss": 3.0327, "step": 20546 }, { "epoch": 1.01, "grad_norm": 0.5284257531166077, "learning_rate": 0.00044810267780257386, "loss": 3.2541, "step": 20547 }, { "epoch": 1.01, "grad_norm": 0.5908827781677246, "learning_rate": 0.0004480892883699768, "loss": 3.0347, "step": 20548 }, { "epoch": 1.01, "grad_norm": 0.5372423529624939, "learning_rate": 0.00044807589854734106, "loss": 2.9296, "step": 20549 }, { "epoch": 1.01, "grad_norm": 0.5452413558959961, "learning_rate": 0.0004480625083347019, "loss": 2.8282, "step": 20550 }, { "epoch": 1.01, "grad_norm": 0.5139778256416321, "learning_rate": 0.0004480491177320946, "loss": 2.9481, "step": 20551 }, { "epoch": 1.01, "grad_norm": 0.5498486757278442, "learning_rate": 0.0004480357267395544, "loss": 2.954, "step": 20552 }, { "epoch": 1.01, "grad_norm": 0.550351083278656, "learning_rate": 0.00044802233535711666, "loss": 3.1411, "step": 20553 }, { "epoch": 1.01, "grad_norm": 0.5372947454452515, "learning_rate": 0.0004480089435848165, "loss": 3.0283, "step": 20554 }, { "epoch": 1.01, "grad_norm": 0.5474216341972351, "learning_rate": 0.0004479955514226892, "loss": 3.0082, "step": 20555 }, { "epoch": 1.01, "grad_norm": 0.578901469707489, "learning_rate": 0.0004479821588707702, "loss": 3.0129, "step": 20556 }, { "epoch": 1.01, "grad_norm": 0.5624405145645142, "learning_rate": 0.0004479687659290947, "loss": 2.9875, "step": 20557 }, { "epoch": 1.01, "grad_norm": 0.5033628940582275, "learning_rate": 0.0004479553725976979, "loss": 3.0755, "step": 20558 }, { "epoch": 1.01, "grad_norm": 0.5456181764602661, "learning_rate": 0.0004479419788766151, "loss": 2.937, "step": 20559 }, { "epoch": 1.01, "grad_norm": 0.595481276512146, "learning_rate": 0.0004479285847658816, "loss": 3.1074, "step": 20560 }, { "epoch": 1.01, "grad_norm": 0.5183221101760864, "learning_rate": 0.00044791519026553267, "loss": 2.8591, "step": 20561 }, { "epoch": 1.01, "grad_norm": 0.537140965461731, "learning_rate": 0.0004479017953756037, "loss": 3.0136, "step": 20562 }, { "epoch": 1.01, "grad_norm": 0.5604956746101379, "learning_rate": 0.00044788840009612975, "loss": 3.0379, "step": 20563 }, { "epoch": 1.01, "grad_norm": 0.5302656292915344, "learning_rate": 0.0004478750044271463, "loss": 3.1423, "step": 20564 }, { "epoch": 1.01, "grad_norm": 0.5702511072158813, "learning_rate": 0.0004478616083686884, "loss": 3.0888, "step": 20565 }, { "epoch": 1.01, "grad_norm": 0.5529466271400452, "learning_rate": 0.0004478482119207916, "loss": 3.0283, "step": 20566 }, { "epoch": 1.01, "grad_norm": 0.49684593081474304, "learning_rate": 0.0004478348150834911, "loss": 3.3656, "step": 20567 }, { "epoch": 1.01, "grad_norm": 0.5289061665534973, "learning_rate": 0.00044782141785682216, "loss": 3.0752, "step": 20568 }, { "epoch": 1.01, "grad_norm": 0.527275025844574, "learning_rate": 0.00044780802024081993, "loss": 3.1675, "step": 20569 }, { "epoch": 1.01, "grad_norm": 0.5364570617675781, "learning_rate": 0.00044779462223551995, "loss": 2.9237, "step": 20570 }, { "epoch": 1.01, "grad_norm": 0.5187329053878784, "learning_rate": 0.00044778122384095723, "loss": 2.9779, "step": 20571 }, { "epoch": 1.01, "grad_norm": 0.5524115562438965, "learning_rate": 0.0004477678250571673, "loss": 3.1814, "step": 20572 }, { "epoch": 1.01, "grad_norm": 0.5237755179405212, "learning_rate": 0.00044775442588418536, "loss": 3.1823, "step": 20573 }, { "epoch": 1.01, "grad_norm": 0.5263662338256836, "learning_rate": 0.0004477410263220467, "loss": 3.1545, "step": 20574 }, { "epoch": 1.01, "grad_norm": 0.5157889127731323, "learning_rate": 0.00044772762637078665, "loss": 2.9089, "step": 20575 }, { "epoch": 1.01, "grad_norm": 0.5321006178855896, "learning_rate": 0.0004477142260304403, "loss": 3.334, "step": 20576 }, { "epoch": 1.01, "grad_norm": 0.5617817044258118, "learning_rate": 0.0004477008253010432, "loss": 2.9518, "step": 20577 }, { "epoch": 1.01, "grad_norm": 0.5136963725090027, "learning_rate": 0.00044768742418263053, "loss": 2.9438, "step": 20578 }, { "epoch": 1.01, "grad_norm": 0.5390790104866028, "learning_rate": 0.00044767402267523773, "loss": 3.1914, "step": 20579 }, { "epoch": 1.01, "grad_norm": 0.5621577501296997, "learning_rate": 0.0004476606207788999, "loss": 3.0937, "step": 20580 }, { "epoch": 1.01, "grad_norm": 0.5234653949737549, "learning_rate": 0.0004476472184936523, "loss": 3.087, "step": 20581 }, { "epoch": 1.01, "grad_norm": 0.5514974594116211, "learning_rate": 0.0004476338158195305, "loss": 2.9886, "step": 20582 }, { "epoch": 1.01, "grad_norm": 0.5614942908287048, "learning_rate": 0.0004476204127565696, "loss": 3.0167, "step": 20583 }, { "epoch": 1.01, "grad_norm": 0.5162169933319092, "learning_rate": 0.0004476070093048049, "loss": 3.1614, "step": 20584 }, { "epoch": 1.01, "grad_norm": 0.5244741439819336, "learning_rate": 0.00044759360546427175, "loss": 3.1057, "step": 20585 }, { "epoch": 1.01, "grad_norm": 0.5119442343711853, "learning_rate": 0.0004475802012350055, "loss": 2.9263, "step": 20586 }, { "epoch": 1.01, "grad_norm": 0.48682332038879395, "learning_rate": 0.0004475667966170414, "loss": 3.0508, "step": 20587 }, { "epoch": 1.01, "grad_norm": 0.5387108325958252, "learning_rate": 0.00044755339161041467, "loss": 2.8952, "step": 20588 }, { "epoch": 1.01, "grad_norm": 0.5450228452682495, "learning_rate": 0.00044753998621516075, "loss": 3.332, "step": 20589 }, { "epoch": 1.01, "grad_norm": 0.5503659844398499, "learning_rate": 0.000447526580431315, "loss": 2.9963, "step": 20590 }, { "epoch": 1.01, "grad_norm": 0.5000738501548767, "learning_rate": 0.0004475131742589125, "loss": 2.9757, "step": 20591 }, { "epoch": 1.01, "grad_norm": 0.5334386229515076, "learning_rate": 0.00044749976769798875, "loss": 3.3276, "step": 20592 }, { "epoch": 1.01, "grad_norm": 0.5666833519935608, "learning_rate": 0.00044748636074857904, "loss": 3.1172, "step": 20593 }, { "epoch": 1.01, "grad_norm": 0.5799906253814697, "learning_rate": 0.00044747295341071857, "loss": 3.0151, "step": 20594 }, { "epoch": 1.01, "grad_norm": 0.5235270857810974, "learning_rate": 0.00044745954568444266, "loss": 2.9707, "step": 20595 }, { "epoch": 1.01, "grad_norm": 0.5744423866271973, "learning_rate": 0.0004474461375697867, "loss": 2.9403, "step": 20596 }, { "epoch": 1.01, "grad_norm": 0.571638286113739, "learning_rate": 0.00044743272906678616, "loss": 3.1176, "step": 20597 }, { "epoch": 1.01, "grad_norm": 0.5430690050125122, "learning_rate": 0.00044741932017547604, "loss": 3.1179, "step": 20598 }, { "epoch": 1.01, "grad_norm": 0.5648912191390991, "learning_rate": 0.0004474059108958918, "loss": 3.1133, "step": 20599 }, { "epoch": 1.01, "grad_norm": 0.5373355746269226, "learning_rate": 0.00044739250122806883, "loss": 3.0403, "step": 20600 }, { "epoch": 1.01, "grad_norm": 0.5279290676116943, "learning_rate": 0.0004473790911720423, "loss": 3.1451, "step": 20601 }, { "epoch": 1.01, "grad_norm": 0.565265953540802, "learning_rate": 0.0004473656807278477, "loss": 3.1434, "step": 20602 }, { "epoch": 1.01, "grad_norm": 0.5297691226005554, "learning_rate": 0.00044735226989552014, "loss": 3.2683, "step": 20603 }, { "epoch": 1.01, "grad_norm": 0.5779011249542236, "learning_rate": 0.0004473388586750952, "loss": 3.1056, "step": 20604 }, { "epoch": 1.01, "grad_norm": 0.5088967084884644, "learning_rate": 0.000447325447066608, "loss": 3.2698, "step": 20605 }, { "epoch": 1.01, "grad_norm": 0.5209295153617859, "learning_rate": 0.00044731203507009386, "loss": 3.1012, "step": 20606 }, { "epoch": 1.01, "grad_norm": 0.517296552658081, "learning_rate": 0.0004472986226855882, "loss": 3.1231, "step": 20607 }, { "epoch": 1.01, "grad_norm": 0.5545491576194763, "learning_rate": 0.0004472852099131264, "loss": 3.1827, "step": 20608 }, { "epoch": 1.01, "grad_norm": 0.5324487686157227, "learning_rate": 0.00044727179675274365, "loss": 2.9018, "step": 20609 }, { "epoch": 1.01, "grad_norm": 0.5557654500007629, "learning_rate": 0.00044725838320447533, "loss": 2.9244, "step": 20610 }, { "epoch": 1.01, "grad_norm": 0.5294561982154846, "learning_rate": 0.00044724496926835673, "loss": 3.2031, "step": 20611 }, { "epoch": 1.01, "grad_norm": 0.4875947833061218, "learning_rate": 0.0004472315549444233, "loss": 2.9372, "step": 20612 }, { "epoch": 1.01, "grad_norm": 0.5307518243789673, "learning_rate": 0.00044721814023271025, "loss": 3.1983, "step": 20613 }, { "epoch": 1.01, "grad_norm": 0.5280675888061523, "learning_rate": 0.00044720472513325296, "loss": 3.1898, "step": 20614 }, { "epoch": 1.01, "grad_norm": 0.5537136793136597, "learning_rate": 0.0004471913096460867, "loss": 3.2588, "step": 20615 }, { "epoch": 1.01, "grad_norm": 0.5422837138175964, "learning_rate": 0.00044717789377124695, "loss": 3.0336, "step": 20616 }, { "epoch": 1.01, "grad_norm": 0.5528018474578857, "learning_rate": 0.0004471644775087688, "loss": 3.0397, "step": 20617 }, { "epoch": 1.01, "grad_norm": 0.5226984620094299, "learning_rate": 0.00044715106085868784, "loss": 3.2272, "step": 20618 }, { "epoch": 1.01, "grad_norm": 0.5283844470977783, "learning_rate": 0.0004471376438210394, "loss": 3.2111, "step": 20619 }, { "epoch": 1.01, "grad_norm": 0.5217439532279968, "learning_rate": 0.00044712422639585863, "loss": 3.0164, "step": 20620 }, { "epoch": 1.01, "grad_norm": 0.5724291205406189, "learning_rate": 0.0004471108085831809, "loss": 3.1398, "step": 20621 }, { "epoch": 1.01, "grad_norm": 0.5415415167808533, "learning_rate": 0.0004470973903830417, "loss": 3.0994, "step": 20622 }, { "epoch": 1.01, "grad_norm": 0.5410470366477966, "learning_rate": 0.00044708397179547626, "loss": 3.1288, "step": 20623 }, { "epoch": 1.01, "grad_norm": 0.5544204711914062, "learning_rate": 0.0004470705528205199, "loss": 3.2814, "step": 20624 }, { "epoch": 1.01, "grad_norm": 0.5386510491371155, "learning_rate": 0.0004470571334582081, "loss": 3.041, "step": 20625 }, { "epoch": 1.01, "grad_norm": 0.5222130417823792, "learning_rate": 0.00044704371370857607, "loss": 3.1223, "step": 20626 }, { "epoch": 1.01, "grad_norm": 0.5678313374519348, "learning_rate": 0.0004470302935716591, "loss": 2.9597, "step": 20627 }, { "epoch": 1.01, "grad_norm": 0.5152552127838135, "learning_rate": 0.00044701687304749276, "loss": 2.9862, "step": 20628 }, { "epoch": 1.01, "grad_norm": 0.5379000902175903, "learning_rate": 0.0004470034521361122, "loss": 3.1142, "step": 20629 }, { "epoch": 1.01, "grad_norm": 0.5807501673698425, "learning_rate": 0.0004469900308375529, "loss": 3.2559, "step": 20630 }, { "epoch": 1.01, "grad_norm": 0.5624402165412903, "learning_rate": 0.0004469766091518502, "loss": 3.1534, "step": 20631 }, { "epoch": 1.01, "grad_norm": 0.5430057048797607, "learning_rate": 0.0004469631870790392, "loss": 3.1118, "step": 20632 }, { "epoch": 1.01, "grad_norm": 0.5057716369628906, "learning_rate": 0.0004469497646191556, "loss": 2.9988, "step": 20633 }, { "epoch": 1.01, "grad_norm": 0.5503453612327576, "learning_rate": 0.0004469363417722346, "loss": 3.0555, "step": 20634 }, { "epoch": 1.01, "grad_norm": 0.5180730223655701, "learning_rate": 0.0004469229185383115, "loss": 3.1024, "step": 20635 }, { "epoch": 1.01, "grad_norm": 0.5402533411979675, "learning_rate": 0.0004469094949174217, "loss": 3.1781, "step": 20636 }, { "epoch": 1.01, "grad_norm": 0.6356052160263062, "learning_rate": 0.0004468960709096006, "loss": 3.0357, "step": 20637 }, { "epoch": 1.01, "grad_norm": 0.5212626457214355, "learning_rate": 0.0004468826465148835, "loss": 3.0721, "step": 20638 }, { "epoch": 1.01, "grad_norm": 0.5012721419334412, "learning_rate": 0.00044686922173330584, "loss": 3.0777, "step": 20639 }, { "epoch": 1.01, "grad_norm": 0.5270209908485413, "learning_rate": 0.00044685579656490287, "loss": 3.2153, "step": 20640 }, { "epoch": 1.01, "grad_norm": 0.5531798005104065, "learning_rate": 0.00044684237100971, "loss": 3.1648, "step": 20641 }, { "epoch": 1.01, "grad_norm": 0.5050953030586243, "learning_rate": 0.00044682894506776246, "loss": 3.2221, "step": 20642 }, { "epoch": 1.01, "grad_norm": 0.5169261693954468, "learning_rate": 0.0004468155187390959, "loss": 3.1671, "step": 20643 }, { "epoch": 1.01, "grad_norm": 0.5264015197753906, "learning_rate": 0.0004468020920237455, "loss": 3.0668, "step": 20644 }, { "epoch": 1.01, "grad_norm": 0.5398049354553223, "learning_rate": 0.0004467886649217466, "loss": 3.1813, "step": 20645 }, { "epoch": 1.01, "grad_norm": 0.5570565462112427, "learning_rate": 0.0004467752374331346, "loss": 2.8079, "step": 20646 }, { "epoch": 1.01, "grad_norm": 0.5235486030578613, "learning_rate": 0.0004467618095579449, "loss": 3.1429, "step": 20647 }, { "epoch": 1.01, "grad_norm": 0.5539608001708984, "learning_rate": 0.0004467483812962128, "loss": 3.1071, "step": 20648 }, { "epoch": 1.01, "grad_norm": 0.5413884520530701, "learning_rate": 0.0004467349526479738, "loss": 2.7632, "step": 20649 }, { "epoch": 1.01, "grad_norm": 0.5679776072502136, "learning_rate": 0.00044672152361326307, "loss": 3.0299, "step": 20650 }, { "epoch": 1.01, "grad_norm": 0.5429620742797852, "learning_rate": 0.0004467080941921161, "loss": 3.1816, "step": 20651 }, { "epoch": 1.01, "grad_norm": 0.5164234042167664, "learning_rate": 0.00044669466438456833, "loss": 3.2438, "step": 20652 }, { "epoch": 1.01, "grad_norm": 0.5403413772583008, "learning_rate": 0.0004466812341906549, "loss": 3.1561, "step": 20653 }, { "epoch": 1.01, "grad_norm": 0.5405500531196594, "learning_rate": 0.00044666780361041143, "loss": 3.229, "step": 20654 }, { "epoch": 1.01, "grad_norm": 0.5156397819519043, "learning_rate": 0.0004466543726438733, "loss": 3.0178, "step": 20655 }, { "epoch": 1.01, "grad_norm": 0.5206366777420044, "learning_rate": 0.00044664094129107557, "loss": 2.7869, "step": 20656 }, { "epoch": 1.01, "grad_norm": 0.5250067710876465, "learning_rate": 0.00044662750955205393, "loss": 3.0707, "step": 20657 }, { "epoch": 1.01, "grad_norm": 0.5365163087844849, "learning_rate": 0.00044661407742684355, "loss": 2.8563, "step": 20658 }, { "epoch": 1.01, "grad_norm": 0.5403412580490112, "learning_rate": 0.00044660064491548003, "loss": 3.037, "step": 20659 }, { "epoch": 1.01, "grad_norm": 0.5710760354995728, "learning_rate": 0.00044658721201799856, "loss": 2.9855, "step": 20660 }, { "epoch": 1.01, "grad_norm": 0.5208877921104431, "learning_rate": 0.00044657377873443454, "loss": 3.0927, "step": 20661 }, { "epoch": 1.01, "grad_norm": 0.5228715538978577, "learning_rate": 0.00044656034506482354, "loss": 3.0964, "step": 20662 }, { "epoch": 1.01, "grad_norm": 0.5221062302589417, "learning_rate": 0.00044654691100920067, "loss": 3.1844, "step": 20663 }, { "epoch": 1.01, "grad_norm": 0.5395482778549194, "learning_rate": 0.00044653347656760145, "loss": 3.2539, "step": 20664 }, { "epoch": 1.01, "grad_norm": 0.5604360103607178, "learning_rate": 0.00044652004174006133, "loss": 3.3412, "step": 20665 }, { "epoch": 1.01, "grad_norm": 0.5532729029655457, "learning_rate": 0.0004465066065266156, "loss": 3.1004, "step": 20666 }, { "epoch": 1.01, "grad_norm": 0.5858287811279297, "learning_rate": 0.0004464931709272996, "loss": 3.0845, "step": 20667 }, { "epoch": 1.01, "grad_norm": 0.506764829158783, "learning_rate": 0.0004464797349421488, "loss": 3.3258, "step": 20668 }, { "epoch": 1.01, "grad_norm": 0.974445641040802, "learning_rate": 0.00044646629857119854, "loss": 2.916, "step": 20669 }, { "epoch": 1.01, "grad_norm": 0.5399503111839294, "learning_rate": 0.0004464528618144843, "loss": 2.9863, "step": 20670 }, { "epoch": 1.01, "grad_norm": 0.5604903101921082, "learning_rate": 0.0004464394246720415, "loss": 3.0492, "step": 20671 }, { "epoch": 1.01, "grad_norm": 0.5462265014648438, "learning_rate": 0.00044642598714390527, "loss": 3.0305, "step": 20672 }, { "epoch": 1.01, "grad_norm": 0.512052595615387, "learning_rate": 0.00044641254923011124, "loss": 3.0729, "step": 20673 }, { "epoch": 1.01, "grad_norm": 0.521513044834137, "learning_rate": 0.0004463991109306947, "loss": 3.0554, "step": 20674 }, { "epoch": 1.01, "grad_norm": 0.5272539258003235, "learning_rate": 0.0004463856722456911, "loss": 3.171, "step": 20675 }, { "epoch": 1.01, "grad_norm": 0.5902304649353027, "learning_rate": 0.00044637223317513583, "loss": 3.1536, "step": 20676 }, { "epoch": 1.01, "grad_norm": 0.5469979643821716, "learning_rate": 0.00044635879371906427, "loss": 2.9833, "step": 20677 }, { "epoch": 1.01, "grad_norm": 0.5268833041191101, "learning_rate": 0.0004463453538775118, "loss": 3.2544, "step": 20678 }, { "epoch": 1.01, "grad_norm": 0.5354952812194824, "learning_rate": 0.0004463319136505138, "loss": 2.8549, "step": 20679 }, { "epoch": 1.01, "grad_norm": 0.6132659316062927, "learning_rate": 0.0004463184730381057, "loss": 2.9428, "step": 20680 }, { "epoch": 1.01, "grad_norm": 0.5660962462425232, "learning_rate": 0.000446305032040323, "loss": 2.9525, "step": 20681 }, { "epoch": 1.01, "grad_norm": 0.5585618019104004, "learning_rate": 0.0004462915906572009, "loss": 3.1878, "step": 20682 }, { "epoch": 1.01, "grad_norm": 0.6061045527458191, "learning_rate": 0.0004462781488887749, "loss": 3.2639, "step": 20683 }, { "epoch": 1.01, "grad_norm": 0.550226628780365, "learning_rate": 0.00044626470673508043, "loss": 3.0404, "step": 20684 }, { "epoch": 1.01, "grad_norm": 0.5399933457374573, "learning_rate": 0.00044625126419615296, "loss": 3.0385, "step": 20685 }, { "epoch": 1.01, "grad_norm": 0.5338366031646729, "learning_rate": 0.0004462378212720277, "loss": 3.1391, "step": 20686 }, { "epoch": 1.01, "grad_norm": 0.5670849084854126, "learning_rate": 0.00044622437796274016, "loss": 3.1828, "step": 20687 }, { "epoch": 1.01, "grad_norm": 0.5496315360069275, "learning_rate": 0.0004462109342683259, "loss": 2.9966, "step": 20688 }, { "epoch": 1.01, "grad_norm": 0.5669832229614258, "learning_rate": 0.00044619749018881994, "loss": 2.9435, "step": 20689 }, { "epoch": 1.01, "grad_norm": 0.5618574023246765, "learning_rate": 0.0004461840457242581, "loss": 3.1487, "step": 20690 }, { "epoch": 1.01, "grad_norm": 0.5448206067085266, "learning_rate": 0.00044617060087467556, "loss": 3.1906, "step": 20691 }, { "epoch": 1.01, "grad_norm": 0.5339007377624512, "learning_rate": 0.0004461571556401078, "loss": 3.164, "step": 20692 }, { "epoch": 1.01, "grad_norm": 0.5732952952384949, "learning_rate": 0.00044614371002059026, "loss": 3.197, "step": 20693 }, { "epoch": 1.01, "grad_norm": 0.5226903557777405, "learning_rate": 0.0004461302640161582, "loss": 3.1404, "step": 20694 }, { "epoch": 1.01, "grad_norm": 0.547990620136261, "learning_rate": 0.00044611681762684723, "loss": 3.2876, "step": 20695 }, { "epoch": 1.01, "grad_norm": 0.5328800082206726, "learning_rate": 0.0004461033708526927, "loss": 3.0255, "step": 20696 }, { "epoch": 1.01, "grad_norm": 0.5429891347885132, "learning_rate": 0.00044608992369372995, "loss": 3.4003, "step": 20697 }, { "epoch": 1.01, "grad_norm": 0.561837375164032, "learning_rate": 0.00044607647614999454, "loss": 3.0505, "step": 20698 }, { "epoch": 1.01, "grad_norm": 0.5248981714248657, "learning_rate": 0.00044606302822152176, "loss": 3.3295, "step": 20699 }, { "epoch": 1.01, "grad_norm": 0.5702486038208008, "learning_rate": 0.000446049579908347, "loss": 3.072, "step": 20700 }, { "epoch": 1.01, "grad_norm": 0.5280510187149048, "learning_rate": 0.0004460361312105058, "loss": 3.1817, "step": 20701 }, { "epoch": 1.01, "grad_norm": 0.5336136221885681, "learning_rate": 0.0004460226821280336, "loss": 3.1157, "step": 20702 }, { "epoch": 1.01, "grad_norm": 0.523258626461029, "learning_rate": 0.0004460092326609658, "loss": 3.1815, "step": 20703 }, { "epoch": 1.01, "grad_norm": 0.5246843099594116, "learning_rate": 0.00044599578280933756, "loss": 3.0741, "step": 20704 }, { "epoch": 1.01, "grad_norm": 0.5236319303512573, "learning_rate": 0.00044598233257318474, "loss": 2.9568, "step": 20705 }, { "epoch": 1.01, "grad_norm": 0.5435445308685303, "learning_rate": 0.0004459688819525425, "loss": 2.9715, "step": 20706 }, { "epoch": 1.01, "grad_norm": 0.5246100425720215, "learning_rate": 0.00044595543094744623, "loss": 3.0166, "step": 20707 }, { "epoch": 1.01, "grad_norm": 0.5761013627052307, "learning_rate": 0.00044594197955793156, "loss": 3.0693, "step": 20708 }, { "epoch": 1.01, "grad_norm": 0.5138135552406311, "learning_rate": 0.00044592852778403366, "loss": 2.9992, "step": 20709 }, { "epoch": 1.01, "grad_norm": 0.5575985312461853, "learning_rate": 0.00044591507562578825, "loss": 2.9294, "step": 20710 }, { "epoch": 1.01, "grad_norm": 0.5381579995155334, "learning_rate": 0.0004459016230832306, "loss": 3.0036, "step": 20711 }, { "epoch": 1.02, "grad_norm": 0.5545154213905334, "learning_rate": 0.00044588817015639605, "loss": 3.0897, "step": 20712 }, { "epoch": 1.02, "grad_norm": 0.507051408290863, "learning_rate": 0.00044587471684532016, "loss": 3.0543, "step": 20713 }, { "epoch": 1.02, "grad_norm": 0.5791917443275452, "learning_rate": 0.00044586126315003836, "loss": 3.2333, "step": 20714 }, { "epoch": 1.02, "grad_norm": 0.5103766322135925, "learning_rate": 0.0004458478090705861, "loss": 3.193, "step": 20715 }, { "epoch": 1.02, "grad_norm": 0.5501724481582642, "learning_rate": 0.00044583435460699875, "loss": 3.0267, "step": 20716 }, { "epoch": 1.02, "grad_norm": 0.5263850092887878, "learning_rate": 0.00044582089975931185, "loss": 3.2196, "step": 20717 }, { "epoch": 1.02, "grad_norm": 0.5362185835838318, "learning_rate": 0.0004458074445275607, "loss": 2.9511, "step": 20718 }, { "epoch": 1.02, "grad_norm": 0.535193920135498, "learning_rate": 0.0004457939889117807, "loss": 3.2362, "step": 20719 }, { "epoch": 1.02, "grad_norm": 0.5330086350440979, "learning_rate": 0.0004457805329120075, "loss": 2.9531, "step": 20720 }, { "epoch": 1.02, "grad_norm": 0.5201607942581177, "learning_rate": 0.00044576707652827646, "loss": 3.2588, "step": 20721 }, { "epoch": 1.02, "grad_norm": 0.5220875144004822, "learning_rate": 0.000445753619760623, "loss": 3.0818, "step": 20722 }, { "epoch": 1.02, "grad_norm": 0.5611007213592529, "learning_rate": 0.0004457401626090825, "loss": 3.1012, "step": 20723 }, { "epoch": 1.02, "grad_norm": 0.5594449639320374, "learning_rate": 0.0004457267050736904, "loss": 3.3061, "step": 20724 }, { "epoch": 1.02, "grad_norm": 0.5162807106971741, "learning_rate": 0.00044571324715448235, "loss": 3.0391, "step": 20725 }, { "epoch": 1.02, "grad_norm": 0.5748878717422485, "learning_rate": 0.00044569978885149354, "loss": 3.1104, "step": 20726 }, { "epoch": 1.02, "grad_norm": 0.5533029437065125, "learning_rate": 0.0004456863301647596, "loss": 3.0481, "step": 20727 }, { "epoch": 1.02, "grad_norm": 0.5461177229881287, "learning_rate": 0.00044567287109431586, "loss": 3.0085, "step": 20728 }, { "epoch": 1.02, "grad_norm": 0.5752653479576111, "learning_rate": 0.00044565941164019784, "loss": 3.1422, "step": 20729 }, { "epoch": 1.02, "grad_norm": 0.5168489813804626, "learning_rate": 0.00044564595180244095, "loss": 3.0288, "step": 20730 }, { "epoch": 1.02, "grad_norm": 0.5368278622627258, "learning_rate": 0.00044563249158108064, "loss": 3.215, "step": 20731 }, { "epoch": 1.02, "grad_norm": 0.5282604098320007, "learning_rate": 0.00044561903097615243, "loss": 3.0692, "step": 20732 }, { "epoch": 1.02, "grad_norm": 0.568565309047699, "learning_rate": 0.00044560556998769166, "loss": 3.1969, "step": 20733 }, { "epoch": 1.02, "grad_norm": 0.5004165172576904, "learning_rate": 0.0004455921086157338, "loss": 2.8507, "step": 20734 }, { "epoch": 1.02, "grad_norm": 0.5740750432014465, "learning_rate": 0.0004455786468603144, "loss": 3.0852, "step": 20735 }, { "epoch": 1.02, "grad_norm": 0.5363320112228394, "learning_rate": 0.0004455651847214689, "loss": 2.942, "step": 20736 }, { "epoch": 1.02, "grad_norm": 0.5179160237312317, "learning_rate": 0.00044555172219923263, "loss": 3.0065, "step": 20737 }, { "epoch": 1.02, "grad_norm": 0.5659471154212952, "learning_rate": 0.0004455382592936412, "loss": 3.1902, "step": 20738 }, { "epoch": 1.02, "grad_norm": 0.514021635055542, "learning_rate": 0.00044552479600473, "loss": 2.9964, "step": 20739 }, { "epoch": 1.02, "grad_norm": 0.5664601922035217, "learning_rate": 0.00044551133233253443, "loss": 2.9117, "step": 20740 }, { "epoch": 1.02, "grad_norm": 0.5283511877059937, "learning_rate": 0.0004454978682770901, "loss": 3.3048, "step": 20741 }, { "epoch": 1.02, "grad_norm": 0.5478894710540771, "learning_rate": 0.0004454844038384323, "loss": 3.247, "step": 20742 }, { "epoch": 1.02, "grad_norm": 0.5319153666496277, "learning_rate": 0.0004454709390165967, "loss": 3.2392, "step": 20743 }, { "epoch": 1.02, "grad_norm": 0.5409319400787354, "learning_rate": 0.0004454574738116186, "loss": 3.0977, "step": 20744 }, { "epoch": 1.02, "grad_norm": 0.5745616555213928, "learning_rate": 0.0004454440082235334, "loss": 3.072, "step": 20745 }, { "epoch": 1.02, "grad_norm": 0.6125838756561279, "learning_rate": 0.0004454305422523768, "loss": 3.2891, "step": 20746 }, { "epoch": 1.02, "grad_norm": 0.5295475721359253, "learning_rate": 0.00044541707589818404, "loss": 3.3818, "step": 20747 }, { "epoch": 1.02, "grad_norm": 0.588213324546814, "learning_rate": 0.0004454036091609908, "loss": 3.2082, "step": 20748 }, { "epoch": 1.02, "grad_norm": 0.5516581535339355, "learning_rate": 0.0004453901420408324, "loss": 3.3228, "step": 20749 }, { "epoch": 1.02, "grad_norm": 0.5377952456474304, "learning_rate": 0.0004453766745377442, "loss": 3.0391, "step": 20750 }, { "epoch": 1.02, "grad_norm": 0.5352370738983154, "learning_rate": 0.000445363206651762, "loss": 3.2461, "step": 20751 }, { "epoch": 1.02, "grad_norm": 0.5557248592376709, "learning_rate": 0.0004453497383829211, "loss": 2.8931, "step": 20752 }, { "epoch": 1.02, "grad_norm": 0.5509526133537292, "learning_rate": 0.00044533626973125687, "loss": 3.1886, "step": 20753 }, { "epoch": 1.02, "grad_norm": 0.5012487173080444, "learning_rate": 0.0004453228006968049, "loss": 2.9703, "step": 20754 }, { "epoch": 1.02, "grad_norm": 0.5457666516304016, "learning_rate": 0.0004453093312796006, "loss": 2.8753, "step": 20755 }, { "epoch": 1.02, "grad_norm": 0.5392427444458008, "learning_rate": 0.0004452958614796795, "loss": 3.1255, "step": 20756 }, { "epoch": 1.02, "grad_norm": 0.5236079096794128, "learning_rate": 0.0004452823912970772, "loss": 2.8528, "step": 20757 }, { "epoch": 1.02, "grad_norm": 0.5426657795906067, "learning_rate": 0.0004452689207318289, "loss": 3.0241, "step": 20758 }, { "epoch": 1.02, "grad_norm": 0.5343982577323914, "learning_rate": 0.00044525544978397025, "loss": 3.1399, "step": 20759 }, { "epoch": 1.02, "grad_norm": 0.535350501537323, "learning_rate": 0.0004452419784535367, "loss": 3.1378, "step": 20760 }, { "epoch": 1.02, "grad_norm": 0.5450648665428162, "learning_rate": 0.0004452285067405638, "loss": 3.1517, "step": 20761 }, { "epoch": 1.02, "grad_norm": 0.5389657020568848, "learning_rate": 0.0004452150346450869, "loss": 3.3121, "step": 20762 }, { "epoch": 1.02, "grad_norm": 0.5154426693916321, "learning_rate": 0.00044520156216714145, "loss": 3.1708, "step": 20763 }, { "epoch": 1.02, "grad_norm": 0.5560336112976074, "learning_rate": 0.0004451880893067632, "loss": 3.0882, "step": 20764 }, { "epoch": 1.02, "grad_norm": 0.5327739119529724, "learning_rate": 0.0004451746160639874, "loss": 3.1679, "step": 20765 }, { "epoch": 1.02, "grad_norm": 0.5418607592582703, "learning_rate": 0.0004451611424388496, "loss": 3.1208, "step": 20766 }, { "epoch": 1.02, "grad_norm": 0.57447749376297, "learning_rate": 0.0004451476684313852, "loss": 2.9681, "step": 20767 }, { "epoch": 1.02, "grad_norm": 0.5182052850723267, "learning_rate": 0.0004451341940416299, "loss": 3.2641, "step": 20768 }, { "epoch": 1.02, "grad_norm": 0.5607951283454895, "learning_rate": 0.00044512071926961904, "loss": 3.117, "step": 20769 }, { "epoch": 1.02, "grad_norm": 0.5186867713928223, "learning_rate": 0.0004451072441153881, "loss": 3.0579, "step": 20770 }, { "epoch": 1.02, "grad_norm": 0.5786254405975342, "learning_rate": 0.0004450937685789725, "loss": 3.1559, "step": 20771 }, { "epoch": 1.02, "grad_norm": 0.5199046730995178, "learning_rate": 0.0004450802926604081, "loss": 3.0105, "step": 20772 }, { "epoch": 1.02, "grad_norm": 0.5281519293785095, "learning_rate": 0.00044506681635972996, "loss": 3.0246, "step": 20773 }, { "epoch": 1.02, "grad_norm": 0.4995059072971344, "learning_rate": 0.0004450533396769737, "loss": 3.2023, "step": 20774 }, { "epoch": 1.02, "grad_norm": 0.526586651802063, "learning_rate": 0.000445039862612175, "loss": 2.8645, "step": 20775 }, { "epoch": 1.02, "grad_norm": 0.5115561485290527, "learning_rate": 0.00044502638516536906, "loss": 2.8865, "step": 20776 }, { "epoch": 1.02, "grad_norm": 0.5567725896835327, "learning_rate": 0.00044501290733659166, "loss": 3.3244, "step": 20777 }, { "epoch": 1.02, "grad_norm": 0.5572482943534851, "learning_rate": 0.00044499942912587813, "loss": 3.0157, "step": 20778 }, { "epoch": 1.02, "grad_norm": 0.5388816595077515, "learning_rate": 0.00044498595053326403, "loss": 2.9588, "step": 20779 }, { "epoch": 1.02, "grad_norm": 0.5826587677001953, "learning_rate": 0.0004449724715587849, "loss": 2.875, "step": 20780 }, { "epoch": 1.02, "grad_norm": 0.5563898682594299, "learning_rate": 0.000444958992202476, "loss": 2.9278, "step": 20781 }, { "epoch": 1.02, "grad_norm": 0.5438665747642517, "learning_rate": 0.0004449455124643731, "loss": 3.1942, "step": 20782 }, { "epoch": 1.02, "grad_norm": 0.5232053995132446, "learning_rate": 0.00044493203234451166, "loss": 3.0504, "step": 20783 }, { "epoch": 1.02, "grad_norm": 0.5295712947845459, "learning_rate": 0.00044491855184292713, "loss": 3.0075, "step": 20784 }, { "epoch": 1.02, "grad_norm": 0.5307983756065369, "learning_rate": 0.000444905070959655, "loss": 3.025, "step": 20785 }, { "epoch": 1.02, "grad_norm": 0.619365394115448, "learning_rate": 0.00044489158969473076, "loss": 2.8898, "step": 20786 }, { "epoch": 1.02, "grad_norm": 0.5883886218070984, "learning_rate": 0.00044487810804819, "loss": 2.9573, "step": 20787 }, { "epoch": 1.02, "grad_norm": 0.5858182907104492, "learning_rate": 0.0004448646260200682, "loss": 3.2317, "step": 20788 }, { "epoch": 1.02, "grad_norm": 0.5257620215415955, "learning_rate": 0.0004448511436104008, "loss": 3.1254, "step": 20789 }, { "epoch": 1.02, "grad_norm": 0.5104060173034668, "learning_rate": 0.00044483766081922346, "loss": 3.1547, "step": 20790 }, { "epoch": 1.02, "grad_norm": 0.5185402035713196, "learning_rate": 0.00044482417764657147, "loss": 3.099, "step": 20791 }, { "epoch": 1.02, "grad_norm": 0.5819932818412781, "learning_rate": 0.00044481069409248056, "loss": 3.0686, "step": 20792 }, { "epoch": 1.02, "grad_norm": 0.5629081726074219, "learning_rate": 0.00044479721015698614, "loss": 3.1719, "step": 20793 }, { "epoch": 1.02, "grad_norm": 0.5439802408218384, "learning_rate": 0.00044478372584012376, "loss": 3.1272, "step": 20794 }, { "epoch": 1.02, "grad_norm": 0.5107980370521545, "learning_rate": 0.0004447702411419289, "loss": 3.1131, "step": 20795 }, { "epoch": 1.02, "grad_norm": 0.5272974967956543, "learning_rate": 0.000444756756062437, "loss": 3.1204, "step": 20796 }, { "epoch": 1.02, "grad_norm": 0.5283223986625671, "learning_rate": 0.00044474327060168374, "loss": 3.1007, "step": 20797 }, { "epoch": 1.02, "grad_norm": 0.5268027186393738, "learning_rate": 0.00044472978475970453, "loss": 3.1367, "step": 20798 }, { "epoch": 1.02, "grad_norm": 0.5282880663871765, "learning_rate": 0.00044471629853653496, "loss": 3.08, "step": 20799 }, { "epoch": 1.02, "grad_norm": 0.6580334901809692, "learning_rate": 0.0004447028119322105, "loss": 3.0333, "step": 20800 }, { "epoch": 1.02, "grad_norm": 0.5694079995155334, "learning_rate": 0.0004446893249467666, "loss": 3.1917, "step": 20801 }, { "epoch": 1.02, "grad_norm": 0.5710819959640503, "learning_rate": 0.00044467583758023895, "loss": 3.1855, "step": 20802 }, { "epoch": 1.02, "grad_norm": 0.5371394157409668, "learning_rate": 0.000444662349832663, "loss": 3.1907, "step": 20803 }, { "epoch": 1.02, "grad_norm": 0.5172489285469055, "learning_rate": 0.0004446488617040742, "loss": 3.2504, "step": 20804 }, { "epoch": 1.02, "grad_norm": 0.531115710735321, "learning_rate": 0.0004446353731945082, "loss": 2.9735, "step": 20805 }, { "epoch": 1.02, "grad_norm": 0.49821171164512634, "learning_rate": 0.00044462188430400044, "loss": 3.0562, "step": 20806 }, { "epoch": 1.02, "grad_norm": 0.6454533338546753, "learning_rate": 0.00044460839503258643, "loss": 3.1137, "step": 20807 }, { "epoch": 1.02, "grad_norm": 0.64560866355896, "learning_rate": 0.0004445949053803018, "loss": 3.0143, "step": 20808 }, { "epoch": 1.02, "grad_norm": 0.6050903797149658, "learning_rate": 0.000444581415347182, "loss": 3.0204, "step": 20809 }, { "epoch": 1.02, "grad_norm": 0.5002428293228149, "learning_rate": 0.00044456792493326256, "loss": 2.9459, "step": 20810 }, { "epoch": 1.02, "grad_norm": 0.534598708152771, "learning_rate": 0.000444554434138579, "loss": 3.1241, "step": 20811 }, { "epoch": 1.02, "grad_norm": 0.6012741923332214, "learning_rate": 0.0004445409429631669, "loss": 3.0102, "step": 20812 }, { "epoch": 1.02, "grad_norm": 0.5511965155601501, "learning_rate": 0.0004445274514070618, "loss": 3.1177, "step": 20813 }, { "epoch": 1.02, "grad_norm": 0.5788381099700928, "learning_rate": 0.0004445139594702992, "loss": 3.2391, "step": 20814 }, { "epoch": 1.02, "grad_norm": 0.6373004913330078, "learning_rate": 0.0004445004671529147, "loss": 3.0853, "step": 20815 }, { "epoch": 1.02, "grad_norm": 0.5785526633262634, "learning_rate": 0.00044448697445494367, "loss": 3.1423, "step": 20816 }, { "epoch": 1.02, "grad_norm": 0.5426591634750366, "learning_rate": 0.00044447348137642177, "loss": 3.3273, "step": 20817 }, { "epoch": 1.02, "grad_norm": 0.5850669145584106, "learning_rate": 0.00044445998791738453, "loss": 2.9099, "step": 20818 }, { "epoch": 1.02, "grad_norm": 0.5589839816093445, "learning_rate": 0.0004444464940778676, "loss": 3.0122, "step": 20819 }, { "epoch": 1.02, "grad_norm": 0.5280645489692688, "learning_rate": 0.0004444329998579063, "loss": 3.1035, "step": 20820 }, { "epoch": 1.02, "grad_norm": 0.593413233757019, "learning_rate": 0.00044441950525753626, "loss": 3.1984, "step": 20821 }, { "epoch": 1.02, "grad_norm": 0.5713319182395935, "learning_rate": 0.00044440601027679303, "loss": 3.2343, "step": 20822 }, { "epoch": 1.02, "grad_norm": 0.5799878239631653, "learning_rate": 0.0004443925149157123, "loss": 2.8615, "step": 20823 }, { "epoch": 1.02, "grad_norm": 0.5746982097625732, "learning_rate": 0.0004443790191743293, "loss": 3.1915, "step": 20824 }, { "epoch": 1.02, "grad_norm": 0.5620375275611877, "learning_rate": 0.00044436552305267984, "loss": 2.9491, "step": 20825 }, { "epoch": 1.02, "grad_norm": 0.5882107615470886, "learning_rate": 0.00044435202655079934, "loss": 3.0635, "step": 20826 }, { "epoch": 1.02, "grad_norm": 0.5356667637825012, "learning_rate": 0.0004443385296687234, "loss": 2.9083, "step": 20827 }, { "epoch": 1.02, "grad_norm": 0.5147374868392944, "learning_rate": 0.00044432503240648757, "loss": 3.1242, "step": 20828 }, { "epoch": 1.02, "grad_norm": 0.550109326839447, "learning_rate": 0.00044431153476412737, "loss": 2.9514, "step": 20829 }, { "epoch": 1.02, "grad_norm": 0.5489082336425781, "learning_rate": 0.0004442980367416784, "loss": 3.1057, "step": 20830 }, { "epoch": 1.02, "grad_norm": 0.49882984161376953, "learning_rate": 0.0004442845383391761, "loss": 3.1469, "step": 20831 }, { "epoch": 1.02, "grad_norm": 0.6269406080245972, "learning_rate": 0.00044427103955665606, "loss": 3.0736, "step": 20832 }, { "epoch": 1.02, "grad_norm": 0.5686376690864563, "learning_rate": 0.00044425754039415394, "loss": 3.1878, "step": 20833 }, { "epoch": 1.02, "grad_norm": 0.5298441052436829, "learning_rate": 0.00044424404085170526, "loss": 3.2007, "step": 20834 }, { "epoch": 1.02, "grad_norm": 0.5424293279647827, "learning_rate": 0.00044423054092934547, "loss": 3.182, "step": 20835 }, { "epoch": 1.02, "grad_norm": 0.5576657652854919, "learning_rate": 0.0004442170406271102, "loss": 3.2624, "step": 20836 }, { "epoch": 1.02, "grad_norm": 0.5367722511291504, "learning_rate": 0.00044420353994503503, "loss": 3.0781, "step": 20837 }, { "epoch": 1.02, "grad_norm": 0.57781982421875, "learning_rate": 0.00044419003888315544, "loss": 3.0836, "step": 20838 }, { "epoch": 1.02, "grad_norm": 0.5427062511444092, "learning_rate": 0.00044417653744150705, "loss": 3.1441, "step": 20839 }, { "epoch": 1.02, "grad_norm": 0.5554705858230591, "learning_rate": 0.0004441630356201254, "loss": 2.918, "step": 20840 }, { "epoch": 1.02, "grad_norm": 0.5354554653167725, "learning_rate": 0.00044414953341904615, "loss": 2.9882, "step": 20841 }, { "epoch": 1.02, "grad_norm": 0.5082659721374512, "learning_rate": 0.0004441360308383047, "loss": 3.0436, "step": 20842 }, { "epoch": 1.02, "grad_norm": 0.5301640033721924, "learning_rate": 0.00044412252787793665, "loss": 2.9847, "step": 20843 }, { "epoch": 1.02, "grad_norm": 0.5346314907073975, "learning_rate": 0.0004441090245379776, "loss": 3.1013, "step": 20844 }, { "epoch": 1.02, "grad_norm": 0.5592143535614014, "learning_rate": 0.0004440955208184632, "loss": 3.0265, "step": 20845 }, { "epoch": 1.02, "grad_norm": 0.5529156923294067, "learning_rate": 0.00044408201671942884, "loss": 2.9318, "step": 20846 }, { "epoch": 1.02, "grad_norm": 0.5625426769256592, "learning_rate": 0.0004440685122409102, "loss": 3.1834, "step": 20847 }, { "epoch": 1.02, "grad_norm": 0.5384853482246399, "learning_rate": 0.00044405500738294284, "loss": 3.1567, "step": 20848 }, { "epoch": 1.02, "grad_norm": 0.5495803952217102, "learning_rate": 0.0004440415021455624, "loss": 3.1289, "step": 20849 }, { "epoch": 1.02, "grad_norm": 0.5577335357666016, "learning_rate": 0.0004440279965288042, "loss": 3.0094, "step": 20850 }, { "epoch": 1.02, "grad_norm": 0.6182305216789246, "learning_rate": 0.0004440144905327041, "loss": 3.1001, "step": 20851 }, { "epoch": 1.02, "grad_norm": 0.5557514429092407, "learning_rate": 0.00044400098415729754, "loss": 3.1761, "step": 20852 }, { "epoch": 1.02, "grad_norm": 0.5622000694274902, "learning_rate": 0.00044398747740261995, "loss": 2.9326, "step": 20853 }, { "epoch": 1.02, "grad_norm": 0.5277696251869202, "learning_rate": 0.00044397397026870724, "loss": 3.0355, "step": 20854 }, { "epoch": 1.02, "grad_norm": 0.5236798524856567, "learning_rate": 0.0004439604627555947, "loss": 3.1484, "step": 20855 }, { "epoch": 1.02, "grad_norm": 0.5746625661849976, "learning_rate": 0.0004439469548633181, "loss": 3.1309, "step": 20856 }, { "epoch": 1.02, "grad_norm": 0.5382353663444519, "learning_rate": 0.00044393344659191284, "loss": 3.0546, "step": 20857 }, { "epoch": 1.02, "grad_norm": 0.5459259748458862, "learning_rate": 0.00044391993794141456, "loss": 3.1935, "step": 20858 }, { "epoch": 1.02, "grad_norm": 0.5256115794181824, "learning_rate": 0.000443906428911859, "loss": 3.2811, "step": 20859 }, { "epoch": 1.02, "grad_norm": 0.5325765013694763, "learning_rate": 0.00044389291950328144, "loss": 3.2127, "step": 20860 }, { "epoch": 1.02, "grad_norm": 0.5285824537277222, "learning_rate": 0.00044387940971571773, "loss": 3.1977, "step": 20861 }, { "epoch": 1.02, "grad_norm": 0.5795076489448547, "learning_rate": 0.00044386589954920324, "loss": 3.1272, "step": 20862 }, { "epoch": 1.02, "grad_norm": 0.522125780582428, "learning_rate": 0.0004438523890037738, "loss": 3.1669, "step": 20863 }, { "epoch": 1.02, "grad_norm": 0.5119063258171082, "learning_rate": 0.0004438388780794647, "loss": 3.2521, "step": 20864 }, { "epoch": 1.02, "grad_norm": 0.5453227758407593, "learning_rate": 0.00044382536677631176, "loss": 3.1975, "step": 20865 }, { "epoch": 1.02, "grad_norm": 0.5346236228942871, "learning_rate": 0.0004438118550943504, "loss": 3.1095, "step": 20866 }, { "epoch": 1.02, "grad_norm": 0.5401219129562378, "learning_rate": 0.00044379834303361645, "loss": 3.1477, "step": 20867 }, { "epoch": 1.02, "grad_norm": 0.5600337386131287, "learning_rate": 0.0004437848305941452, "loss": 3.135, "step": 20868 }, { "epoch": 1.02, "grad_norm": 0.5327202081680298, "learning_rate": 0.00044377131777597245, "loss": 2.9688, "step": 20869 }, { "epoch": 1.02, "grad_norm": 0.557734489440918, "learning_rate": 0.0004437578045791338, "loss": 3.2244, "step": 20870 }, { "epoch": 1.02, "grad_norm": 0.5608131885528564, "learning_rate": 0.0004437442910036646, "loss": 3.2067, "step": 20871 }, { "epoch": 1.02, "grad_norm": 0.5300242304801941, "learning_rate": 0.0004437307770496007, "loss": 3.3376, "step": 20872 }, { "epoch": 1.02, "grad_norm": 0.5177008509635925, "learning_rate": 0.00044371726271697745, "loss": 3.0261, "step": 20873 }, { "epoch": 1.02, "grad_norm": 0.5148898959159851, "learning_rate": 0.0004437037480058308, "loss": 3.2279, "step": 20874 }, { "epoch": 1.02, "grad_norm": 0.5264756679534912, "learning_rate": 0.000443690232916196, "loss": 3.0305, "step": 20875 }, { "epoch": 1.02, "grad_norm": 0.5689302682876587, "learning_rate": 0.00044367671744810884, "loss": 3.263, "step": 20876 }, { "epoch": 1.02, "grad_norm": 0.5246378779411316, "learning_rate": 0.00044366320160160483, "loss": 3.2429, "step": 20877 }, { "epoch": 1.02, "grad_norm": 0.5417928695678711, "learning_rate": 0.0004436496853767196, "loss": 3.1085, "step": 20878 }, { "epoch": 1.02, "grad_norm": 0.5359458327293396, "learning_rate": 0.0004436361687734887, "loss": 3.1174, "step": 20879 }, { "epoch": 1.02, "grad_norm": 0.5729898810386658, "learning_rate": 0.00044362265179194785, "loss": 2.9681, "step": 20880 }, { "epoch": 1.02, "grad_norm": 0.5482082366943359, "learning_rate": 0.00044360913443213255, "loss": 3.1622, "step": 20881 }, { "epoch": 1.02, "grad_norm": 0.5981318354606628, "learning_rate": 0.0004435956166940784, "loss": 3.0729, "step": 20882 }, { "epoch": 1.02, "grad_norm": 0.5297590494155884, "learning_rate": 0.00044358209857782107, "loss": 2.9377, "step": 20883 }, { "epoch": 1.02, "grad_norm": 0.5387971997261047, "learning_rate": 0.00044356858008339606, "loss": 3.1172, "step": 20884 }, { "epoch": 1.02, "grad_norm": 0.5694130063056946, "learning_rate": 0.0004435550612108391, "loss": 3.0498, "step": 20885 }, { "epoch": 1.02, "grad_norm": 0.5102159380912781, "learning_rate": 0.00044354154196018576, "loss": 3.0899, "step": 20886 }, { "epoch": 1.02, "grad_norm": 0.5234137773513794, "learning_rate": 0.0004435280223314715, "loss": 3.0329, "step": 20887 }, { "epoch": 1.02, "grad_norm": 0.5225757956504822, "learning_rate": 0.00044351450232473215, "loss": 3.3125, "step": 20888 }, { "epoch": 1.02, "grad_norm": 0.5035845041275024, "learning_rate": 0.00044350098194000326, "loss": 3.1396, "step": 20889 }, { "epoch": 1.02, "grad_norm": 0.4975343942642212, "learning_rate": 0.00044348746117732035, "loss": 3.2713, "step": 20890 }, { "epoch": 1.02, "grad_norm": 0.49224379658699036, "learning_rate": 0.00044347394003671904, "loss": 3.1967, "step": 20891 }, { "epoch": 1.02, "grad_norm": 0.5111834406852722, "learning_rate": 0.0004434604185182351, "loss": 3.0417, "step": 20892 }, { "epoch": 1.02, "grad_norm": 0.5302641987800598, "learning_rate": 0.00044344689662190396, "loss": 2.9933, "step": 20893 }, { "epoch": 1.02, "grad_norm": 0.5250119566917419, "learning_rate": 0.00044343337434776125, "loss": 3.0811, "step": 20894 }, { "epoch": 1.02, "grad_norm": 0.5314276218414307, "learning_rate": 0.00044341985169584267, "loss": 3.2476, "step": 20895 }, { "epoch": 1.02, "grad_norm": 0.5408056974411011, "learning_rate": 0.0004434063286661838, "loss": 3.0161, "step": 20896 }, { "epoch": 1.02, "grad_norm": 0.5244573354721069, "learning_rate": 0.00044339280525882026, "loss": 3.2421, "step": 20897 }, { "epoch": 1.02, "grad_norm": 0.5429604053497314, "learning_rate": 0.0004433792814737877, "loss": 3.1625, "step": 20898 }, { "epoch": 1.02, "grad_norm": 0.5206112265586853, "learning_rate": 0.00044336575731112167, "loss": 3.0023, "step": 20899 }, { "epoch": 1.02, "grad_norm": 0.5225122570991516, "learning_rate": 0.0004433522327708579, "loss": 3.059, "step": 20900 }, { "epoch": 1.02, "grad_norm": 0.5603109002113342, "learning_rate": 0.0004433387078530318, "loss": 2.9529, "step": 20901 }, { "epoch": 1.02, "grad_norm": 0.536223828792572, "learning_rate": 0.00044332518255767926, "loss": 2.9272, "step": 20902 }, { "epoch": 1.02, "grad_norm": 0.5303646922111511, "learning_rate": 0.00044331165688483575, "loss": 3.1247, "step": 20903 }, { "epoch": 1.02, "grad_norm": 0.5381811261177063, "learning_rate": 0.00044329813083453685, "loss": 3.1472, "step": 20904 }, { "epoch": 1.02, "grad_norm": 0.5109459757804871, "learning_rate": 0.0004432846044068183, "loss": 3.186, "step": 20905 }, { "epoch": 1.02, "grad_norm": 0.5285735726356506, "learning_rate": 0.00044327107760171565, "loss": 3.1352, "step": 20906 }, { "epoch": 1.02, "grad_norm": 0.5155451893806458, "learning_rate": 0.0004432575504192646, "loss": 2.8308, "step": 20907 }, { "epoch": 1.02, "grad_norm": 0.5433021187782288, "learning_rate": 0.00044324402285950067, "loss": 3.295, "step": 20908 }, { "epoch": 1.02, "grad_norm": 0.5422189235687256, "learning_rate": 0.00044323049492245954, "loss": 3.0311, "step": 20909 }, { "epoch": 1.02, "grad_norm": 0.5412802696228027, "learning_rate": 0.000443216966608177, "loss": 2.9925, "step": 20910 }, { "epoch": 1.02, "grad_norm": 0.5609269142150879, "learning_rate": 0.00044320343791668835, "loss": 3.1073, "step": 20911 }, { "epoch": 1.02, "grad_norm": 0.5714790225028992, "learning_rate": 0.0004431899088480295, "loss": 2.9488, "step": 20912 }, { "epoch": 1.02, "grad_norm": 0.5238274931907654, "learning_rate": 0.00044317637940223596, "loss": 3.0765, "step": 20913 }, { "epoch": 1.02, "grad_norm": 0.5142273902893066, "learning_rate": 0.0004431628495793434, "loss": 2.9384, "step": 20914 }, { "epoch": 1.02, "grad_norm": 0.5264378786087036, "learning_rate": 0.00044314931937938746, "loss": 3.1992, "step": 20915 }, { "epoch": 1.03, "grad_norm": 0.5493513345718384, "learning_rate": 0.00044313578880240376, "loss": 3.225, "step": 20916 }, { "epoch": 1.03, "grad_norm": 0.5445050597190857, "learning_rate": 0.00044312225784842794, "loss": 3.0925, "step": 20917 }, { "epoch": 1.03, "grad_norm": 0.5303078889846802, "learning_rate": 0.0004431087265174957, "loss": 3.0849, "step": 20918 }, { "epoch": 1.03, "grad_norm": 0.5611590147018433, "learning_rate": 0.0004430951948096425, "loss": 3.0506, "step": 20919 }, { "epoch": 1.03, "grad_norm": 0.52338707447052, "learning_rate": 0.0004430816627249041, "loss": 3.1669, "step": 20920 }, { "epoch": 1.03, "grad_norm": 0.5278040170669556, "learning_rate": 0.00044306813026331627, "loss": 3.0816, "step": 20921 }, { "epoch": 1.03, "grad_norm": 0.5498781800270081, "learning_rate": 0.00044305459742491435, "loss": 3.0202, "step": 20922 }, { "epoch": 1.03, "grad_norm": 0.5435249209403992, "learning_rate": 0.00044304106420973424, "loss": 3.0002, "step": 20923 }, { "epoch": 1.03, "grad_norm": 0.5514103174209595, "learning_rate": 0.00044302753061781154, "loss": 3.0938, "step": 20924 }, { "epoch": 1.03, "grad_norm": 0.5727975368499756, "learning_rate": 0.0004430139966491817, "loss": 2.9635, "step": 20925 }, { "epoch": 1.03, "grad_norm": 0.4973636567592621, "learning_rate": 0.00044300046230388065, "loss": 3.0122, "step": 20926 }, { "epoch": 1.03, "grad_norm": 0.5237885117530823, "learning_rate": 0.0004429869275819439, "loss": 3.0556, "step": 20927 }, { "epoch": 1.03, "grad_norm": 0.5372980237007141, "learning_rate": 0.000442973392483407, "loss": 3.109, "step": 20928 }, { "epoch": 1.03, "grad_norm": 0.5056158900260925, "learning_rate": 0.00044295985700830583, "loss": 2.9878, "step": 20929 }, { "epoch": 1.03, "grad_norm": 0.5174768567085266, "learning_rate": 0.00044294632115667574, "loss": 3.0221, "step": 20930 }, { "epoch": 1.03, "grad_norm": 0.5897946953773499, "learning_rate": 0.0004429327849285527, "loss": 3.0459, "step": 20931 }, { "epoch": 1.03, "grad_norm": 0.5413453578948975, "learning_rate": 0.00044291924832397223, "loss": 3.4315, "step": 20932 }, { "epoch": 1.03, "grad_norm": 0.5361354947090149, "learning_rate": 0.0004429057113429699, "loss": 3.1492, "step": 20933 }, { "epoch": 1.03, "grad_norm": 0.5186129808425903, "learning_rate": 0.00044289217398558144, "loss": 3.0495, "step": 20934 }, { "epoch": 1.03, "grad_norm": 0.6256024241447449, "learning_rate": 0.0004428786362518424, "loss": 3.0102, "step": 20935 }, { "epoch": 1.03, "grad_norm": 0.5609905123710632, "learning_rate": 0.00044286509814178866, "loss": 3.0142, "step": 20936 }, { "epoch": 1.03, "grad_norm": 0.5775830149650574, "learning_rate": 0.00044285155965545573, "loss": 3.0254, "step": 20937 }, { "epoch": 1.03, "grad_norm": 0.5974602699279785, "learning_rate": 0.0004428380207928792, "loss": 3.2062, "step": 20938 }, { "epoch": 1.03, "grad_norm": 0.5600636005401611, "learning_rate": 0.0004428244815540949, "loss": 3.0143, "step": 20939 }, { "epoch": 1.03, "grad_norm": 0.5269203782081604, "learning_rate": 0.00044281094193913837, "loss": 3.2125, "step": 20940 }, { "epoch": 1.03, "grad_norm": 0.5174791812896729, "learning_rate": 0.0004427974019480453, "loss": 3.2041, "step": 20941 }, { "epoch": 1.03, "grad_norm": 0.5623819231987, "learning_rate": 0.00044278386158085135, "loss": 2.9925, "step": 20942 }, { "epoch": 1.03, "grad_norm": 0.5486443638801575, "learning_rate": 0.00044277032083759227, "loss": 2.9681, "step": 20943 }, { "epoch": 1.03, "grad_norm": 0.536963939666748, "learning_rate": 0.0004427567797183036, "loss": 3.0695, "step": 20944 }, { "epoch": 1.03, "grad_norm": 0.5224885940551758, "learning_rate": 0.00044274323822302095, "loss": 3.2491, "step": 20945 }, { "epoch": 1.03, "grad_norm": 0.5229834914207458, "learning_rate": 0.0004427296963517801, "loss": 3.227, "step": 20946 }, { "epoch": 1.03, "grad_norm": 0.49516236782073975, "learning_rate": 0.0004427161541046169, "loss": 3.0456, "step": 20947 }, { "epoch": 1.03, "grad_norm": 0.4853455126285553, "learning_rate": 0.00044270261148156656, "loss": 3.1123, "step": 20948 }, { "epoch": 1.03, "grad_norm": 0.547874927520752, "learning_rate": 0.00044268906848266514, "loss": 3.1893, "step": 20949 }, { "epoch": 1.03, "grad_norm": 0.5608683228492737, "learning_rate": 0.00044267552510794813, "loss": 3.2441, "step": 20950 }, { "epoch": 1.03, "grad_norm": 0.5529409646987915, "learning_rate": 0.00044266198135745126, "loss": 3.0731, "step": 20951 }, { "epoch": 1.03, "grad_norm": 0.5526501536369324, "learning_rate": 0.0004426484372312102, "loss": 2.9746, "step": 20952 }, { "epoch": 1.03, "grad_norm": 0.5399184823036194, "learning_rate": 0.0004426348927292606, "loss": 3.0924, "step": 20953 }, { "epoch": 1.03, "grad_norm": 0.5350891351699829, "learning_rate": 0.00044262134785163815, "loss": 3.1553, "step": 20954 }, { "epoch": 1.03, "grad_norm": 0.523499071598053, "learning_rate": 0.00044260780259837846, "loss": 2.9259, "step": 20955 }, { "epoch": 1.03, "grad_norm": 0.519354522228241, "learning_rate": 0.0004425942569695173, "loss": 3.0434, "step": 20956 }, { "epoch": 1.03, "grad_norm": 0.5334795117378235, "learning_rate": 0.00044258071096509033, "loss": 3.1886, "step": 20957 }, { "epoch": 1.03, "grad_norm": 0.5632496476173401, "learning_rate": 0.0004425671645851333, "loss": 2.9461, "step": 20958 }, { "epoch": 1.03, "grad_norm": 0.5479925870895386, "learning_rate": 0.0004425536178296816, "loss": 3.0682, "step": 20959 }, { "epoch": 1.03, "grad_norm": 0.5662195086479187, "learning_rate": 0.0004425400706987712, "loss": 3.1047, "step": 20960 }, { "epoch": 1.03, "grad_norm": 0.5479673147201538, "learning_rate": 0.0004425265231924377, "loss": 3.1159, "step": 20961 }, { "epoch": 1.03, "grad_norm": 0.5581120848655701, "learning_rate": 0.0004425129753107168, "loss": 3.1118, "step": 20962 }, { "epoch": 1.03, "grad_norm": 0.562798261642456, "learning_rate": 0.00044249942705364403, "loss": 3.1258, "step": 20963 }, { "epoch": 1.03, "grad_norm": 0.5359207987785339, "learning_rate": 0.0004424858784212553, "loss": 3.1239, "step": 20964 }, { "epoch": 1.03, "grad_norm": 0.502883791923523, "learning_rate": 0.0004424723294135862, "loss": 3.0139, "step": 20965 }, { "epoch": 1.03, "grad_norm": 0.5590471625328064, "learning_rate": 0.0004424587800306723, "loss": 3.0502, "step": 20966 }, { "epoch": 1.03, "grad_norm": 0.5840969085693359, "learning_rate": 0.0004424452302725495, "loss": 3.2399, "step": 20967 }, { "epoch": 1.03, "grad_norm": 0.5094526410102844, "learning_rate": 0.00044243168013925326, "loss": 3.0227, "step": 20968 }, { "epoch": 1.03, "grad_norm": 0.5850446224212646, "learning_rate": 0.0004424181296308195, "loss": 3.0581, "step": 20969 }, { "epoch": 1.03, "grad_norm": 0.5337172746658325, "learning_rate": 0.0004424045787472838, "loss": 3.1475, "step": 20970 }, { "epoch": 1.03, "grad_norm": 0.5515691041946411, "learning_rate": 0.0004423910274886817, "loss": 2.8972, "step": 20971 }, { "epoch": 1.03, "grad_norm": 0.5271448493003845, "learning_rate": 0.0004423774758550492, "loss": 3.1464, "step": 20972 }, { "epoch": 1.03, "grad_norm": 0.5401154160499573, "learning_rate": 0.0004423639238464217, "loss": 3.0886, "step": 20973 }, { "epoch": 1.03, "grad_norm": 0.5540905594825745, "learning_rate": 0.0004423503714628351, "loss": 3.0209, "step": 20974 }, { "epoch": 1.03, "grad_norm": 0.5254117250442505, "learning_rate": 0.00044233681870432497, "loss": 3.0147, "step": 20975 }, { "epoch": 1.03, "grad_norm": 0.5366216897964478, "learning_rate": 0.0004423232655709271, "loss": 3.191, "step": 20976 }, { "epoch": 1.03, "grad_norm": 0.56840580701828, "learning_rate": 0.00044230971206267716, "loss": 3.1895, "step": 20977 }, { "epoch": 1.03, "grad_norm": 0.5454578995704651, "learning_rate": 0.0004422961581796108, "loss": 3.1494, "step": 20978 }, { "epoch": 1.03, "grad_norm": 0.5930941104888916, "learning_rate": 0.0004422826039217637, "loss": 3.036, "step": 20979 }, { "epoch": 1.03, "grad_norm": 0.5133326649665833, "learning_rate": 0.00044226904928917167, "loss": 3.2738, "step": 20980 }, { "epoch": 1.03, "grad_norm": 0.5534525513648987, "learning_rate": 0.0004422554942818703, "loss": 3.0809, "step": 20981 }, { "epoch": 1.03, "grad_norm": 0.5347831845283508, "learning_rate": 0.00044224193889989534, "loss": 3.062, "step": 20982 }, { "epoch": 1.03, "grad_norm": 0.5829472541809082, "learning_rate": 0.00044222838314328253, "loss": 3.0576, "step": 20983 }, { "epoch": 1.03, "grad_norm": 0.5392614603042603, "learning_rate": 0.00044221482701206746, "loss": 3.0753, "step": 20984 }, { "epoch": 1.03, "grad_norm": 0.5648199319839478, "learning_rate": 0.000442201270506286, "loss": 2.8233, "step": 20985 }, { "epoch": 1.03, "grad_norm": 0.5383753776550293, "learning_rate": 0.00044218771362597366, "loss": 3.1036, "step": 20986 }, { "epoch": 1.03, "grad_norm": 0.5280416011810303, "learning_rate": 0.0004421741563711663, "loss": 3.2258, "step": 20987 }, { "epoch": 1.03, "grad_norm": 0.5613174438476562, "learning_rate": 0.0004421605987418996, "loss": 3.0589, "step": 20988 }, { "epoch": 1.03, "grad_norm": 0.551788866519928, "learning_rate": 0.00044214704073820913, "loss": 3.3632, "step": 20989 }, { "epoch": 1.03, "grad_norm": 0.5428652763366699, "learning_rate": 0.00044213348236013083, "loss": 2.9893, "step": 20990 }, { "epoch": 1.03, "grad_norm": 0.5227196216583252, "learning_rate": 0.00044211992360770024, "loss": 3.0317, "step": 20991 }, { "epoch": 1.03, "grad_norm": 0.4978506863117218, "learning_rate": 0.0004421063644809532, "loss": 2.8453, "step": 20992 }, { "epoch": 1.03, "grad_norm": 0.5322362780570984, "learning_rate": 0.0004420928049799252, "loss": 3.1358, "step": 20993 }, { "epoch": 1.03, "grad_norm": 0.5089728236198425, "learning_rate": 0.0004420792451046523, "loss": 3.2766, "step": 20994 }, { "epoch": 1.03, "grad_norm": 0.5534874200820923, "learning_rate": 0.00044206568485516987, "loss": 3.0792, "step": 20995 }, { "epoch": 1.03, "grad_norm": 0.530881941318512, "learning_rate": 0.00044205212423151366, "loss": 3.2128, "step": 20996 }, { "epoch": 1.03, "grad_norm": 0.5450581312179565, "learning_rate": 0.0004420385632337196, "loss": 3.2734, "step": 20997 }, { "epoch": 1.03, "grad_norm": 0.5764946937561035, "learning_rate": 0.00044202500186182344, "loss": 2.963, "step": 20998 }, { "epoch": 1.03, "grad_norm": 0.5664291381835938, "learning_rate": 0.0004420114401158606, "loss": 3.1355, "step": 20999 }, { "epoch": 1.03, "grad_norm": 0.5113416314125061, "learning_rate": 0.00044199787799586695, "loss": 3.1817, "step": 21000 }, { "epoch": 1.03, "grad_norm": 0.5444121360778809, "learning_rate": 0.0004419843155018783, "loss": 2.9416, "step": 21001 }, { "epoch": 1.03, "grad_norm": 0.5358783602714539, "learning_rate": 0.00044197075263393025, "loss": 3.1008, "step": 21002 }, { "epoch": 1.03, "grad_norm": 0.5368015170097351, "learning_rate": 0.0004419571893920586, "loss": 2.9991, "step": 21003 }, { "epoch": 1.03, "grad_norm": 0.5103992819786072, "learning_rate": 0.000441943625776299, "loss": 3.1975, "step": 21004 }, { "epoch": 1.03, "grad_norm": 0.5424513816833496, "learning_rate": 0.0004419300617866872, "loss": 3.0472, "step": 21005 }, { "epoch": 1.03, "grad_norm": 0.5450505614280701, "learning_rate": 0.00044191649742325893, "loss": 3.0328, "step": 21006 }, { "epoch": 1.03, "grad_norm": 0.5383841395378113, "learning_rate": 0.00044190293268604985, "loss": 3.1623, "step": 21007 }, { "epoch": 1.03, "grad_norm": 0.5024534463882446, "learning_rate": 0.0004418893675750958, "loss": 3.2498, "step": 21008 }, { "epoch": 1.03, "grad_norm": 0.5342257618904114, "learning_rate": 0.0004418758020904326, "loss": 3.198, "step": 21009 }, { "epoch": 1.03, "grad_norm": 0.5287760496139526, "learning_rate": 0.0004418622362320957, "loss": 3.22, "step": 21010 }, { "epoch": 1.03, "grad_norm": 0.5340474247932434, "learning_rate": 0.00044184867000012103, "loss": 3.2134, "step": 21011 }, { "epoch": 1.03, "grad_norm": 0.6150773167610168, "learning_rate": 0.00044183510339454424, "loss": 3.0173, "step": 21012 }, { "epoch": 1.03, "grad_norm": 0.5500164031982422, "learning_rate": 0.00044182153641540104, "loss": 3.0495, "step": 21013 }, { "epoch": 1.03, "grad_norm": 0.5410817861557007, "learning_rate": 0.00044180796906272726, "loss": 3.0827, "step": 21014 }, { "epoch": 1.03, "grad_norm": 0.5385421514511108, "learning_rate": 0.00044179440133655857, "loss": 2.9338, "step": 21015 }, { "epoch": 1.03, "grad_norm": 0.5334212779998779, "learning_rate": 0.0004417808332369307, "loss": 2.9631, "step": 21016 }, { "epoch": 1.03, "grad_norm": 0.5273452401161194, "learning_rate": 0.0004417672647638794, "loss": 3.2463, "step": 21017 }, { "epoch": 1.03, "grad_norm": 0.5127688050270081, "learning_rate": 0.00044175369591744045, "loss": 3.2323, "step": 21018 }, { "epoch": 1.03, "grad_norm": 0.5606804490089417, "learning_rate": 0.00044174012669764953, "loss": 3.0001, "step": 21019 }, { "epoch": 1.03, "grad_norm": 0.5266181230545044, "learning_rate": 0.00044172655710454244, "loss": 3.1192, "step": 21020 }, { "epoch": 1.03, "grad_norm": 0.5243688821792603, "learning_rate": 0.0004417129871381548, "loss": 3.2963, "step": 21021 }, { "epoch": 1.03, "grad_norm": 0.5265859365463257, "learning_rate": 0.0004416994167985224, "loss": 3.1358, "step": 21022 }, { "epoch": 1.03, "grad_norm": 0.5116739273071289, "learning_rate": 0.0004416858460856811, "loss": 2.9943, "step": 21023 }, { "epoch": 1.03, "grad_norm": 0.543095052242279, "learning_rate": 0.0004416722749996665, "loss": 2.9124, "step": 21024 }, { "epoch": 1.03, "grad_norm": 0.550938069820404, "learning_rate": 0.0004416587035405143, "loss": 3.2173, "step": 21025 }, { "epoch": 1.03, "grad_norm": 0.5226496458053589, "learning_rate": 0.00044164513170826046, "loss": 3.1445, "step": 21026 }, { "epoch": 1.03, "grad_norm": 0.5465294122695923, "learning_rate": 0.0004416315595029406, "loss": 3.1399, "step": 21027 }, { "epoch": 1.03, "grad_norm": 0.5636183619499207, "learning_rate": 0.00044161798692459044, "loss": 3.2226, "step": 21028 }, { "epoch": 1.03, "grad_norm": 0.5646752119064331, "learning_rate": 0.0004416044139732457, "loss": 3.0557, "step": 21029 }, { "epoch": 1.03, "grad_norm": 0.5204924941062927, "learning_rate": 0.0004415908406489423, "loss": 3.1327, "step": 21030 }, { "epoch": 1.03, "grad_norm": 0.5620864033699036, "learning_rate": 0.00044157726695171585, "loss": 3.0268, "step": 21031 }, { "epoch": 1.03, "grad_norm": 0.5253093838691711, "learning_rate": 0.0004415636928816021, "loss": 3.0451, "step": 21032 }, { "epoch": 1.03, "grad_norm": 0.5312303900718689, "learning_rate": 0.0004415501184386368, "loss": 3.1457, "step": 21033 }, { "epoch": 1.03, "grad_norm": 0.5458439588546753, "learning_rate": 0.0004415365436228558, "loss": 3.0072, "step": 21034 }, { "epoch": 1.03, "grad_norm": 0.5164788961410522, "learning_rate": 0.00044152296843429474, "loss": 3.0088, "step": 21035 }, { "epoch": 1.03, "grad_norm": 0.5227516293525696, "learning_rate": 0.0004415093928729894, "loss": 2.7882, "step": 21036 }, { "epoch": 1.03, "grad_norm": 0.5490872859954834, "learning_rate": 0.0004414958169389756, "loss": 3.146, "step": 21037 }, { "epoch": 1.03, "grad_norm": 0.5249037742614746, "learning_rate": 0.000441482240632289, "loss": 3.1291, "step": 21038 }, { "epoch": 1.03, "grad_norm": 0.590631902217865, "learning_rate": 0.00044146866395296545, "loss": 3.1117, "step": 21039 }, { "epoch": 1.03, "grad_norm": 0.5558299422264099, "learning_rate": 0.00044145508690104056, "loss": 2.882, "step": 21040 }, { "epoch": 1.03, "grad_norm": 0.5602800846099854, "learning_rate": 0.00044144150947655035, "loss": 2.8093, "step": 21041 }, { "epoch": 1.03, "grad_norm": 0.5326724648475647, "learning_rate": 0.00044142793167953033, "loss": 3.3363, "step": 21042 }, { "epoch": 1.03, "grad_norm": 0.5459589958190918, "learning_rate": 0.0004414143535100164, "loss": 3.1367, "step": 21043 }, { "epoch": 1.03, "grad_norm": 0.5694371461868286, "learning_rate": 0.00044140077496804427, "loss": 3.1642, "step": 21044 }, { "epoch": 1.03, "grad_norm": 0.566260039806366, "learning_rate": 0.00044138719605364967, "loss": 3.2619, "step": 21045 }, { "epoch": 1.03, "grad_norm": 0.5829527974128723, "learning_rate": 0.00044137361676686843, "loss": 3.019, "step": 21046 }, { "epoch": 1.03, "grad_norm": 0.5660526752471924, "learning_rate": 0.00044136003710773633, "loss": 3.0725, "step": 21047 }, { "epoch": 1.03, "grad_norm": 0.5259793996810913, "learning_rate": 0.000441346457076289, "loss": 3.1078, "step": 21048 }, { "epoch": 1.03, "grad_norm": 0.5162500143051147, "learning_rate": 0.0004413328766725624, "loss": 2.8862, "step": 21049 }, { "epoch": 1.03, "grad_norm": 0.560583233833313, "learning_rate": 0.00044131929589659213, "loss": 3.1804, "step": 21050 }, { "epoch": 1.03, "grad_norm": 0.5528599619865417, "learning_rate": 0.00044130571474841406, "loss": 2.9594, "step": 21051 }, { "epoch": 1.03, "grad_norm": 0.5608258843421936, "learning_rate": 0.000441292133228064, "loss": 3.2056, "step": 21052 }, { "epoch": 1.03, "grad_norm": 0.546389102935791, "learning_rate": 0.0004412785513355776, "loss": 3.0145, "step": 21053 }, { "epoch": 1.03, "grad_norm": 0.5085633397102356, "learning_rate": 0.0004412649690709907, "loss": 3.1211, "step": 21054 }, { "epoch": 1.03, "grad_norm": 0.543613851070404, "learning_rate": 0.000441251386434339, "loss": 2.996, "step": 21055 }, { "epoch": 1.03, "grad_norm": 0.5142641067504883, "learning_rate": 0.00044123780342565844, "loss": 3.1159, "step": 21056 }, { "epoch": 1.03, "grad_norm": 0.5283182859420776, "learning_rate": 0.00044122422004498466, "loss": 3.142, "step": 21057 }, { "epoch": 1.03, "grad_norm": 0.5592846274375916, "learning_rate": 0.0004412106362923533, "loss": 3.3198, "step": 21058 }, { "epoch": 1.03, "grad_norm": 0.5456278324127197, "learning_rate": 0.00044119705216780046, "loss": 3.1421, "step": 21059 }, { "epoch": 1.03, "grad_norm": 0.5718361139297485, "learning_rate": 0.0004411834676713618, "loss": 2.7871, "step": 21060 }, { "epoch": 1.03, "grad_norm": 0.5030151605606079, "learning_rate": 0.000441169882803073, "loss": 3.1338, "step": 21061 }, { "epoch": 1.03, "grad_norm": 0.5248968005180359, "learning_rate": 0.0004411562975629698, "loss": 3.273, "step": 21062 }, { "epoch": 1.03, "grad_norm": 0.5286002159118652, "learning_rate": 0.00044114271195108814, "loss": 3.218, "step": 21063 }, { "epoch": 1.03, "grad_norm": 0.511260986328125, "learning_rate": 0.0004411291259674638, "loss": 3.1222, "step": 21064 }, { "epoch": 1.03, "grad_norm": 0.5192838907241821, "learning_rate": 0.00044111553961213244, "loss": 3.0487, "step": 21065 }, { "epoch": 1.03, "grad_norm": 0.49542543292045593, "learning_rate": 0.00044110195288512996, "loss": 2.9655, "step": 21066 }, { "epoch": 1.03, "grad_norm": 0.5238027572631836, "learning_rate": 0.0004410883657864921, "loss": 3.0834, "step": 21067 }, { "epoch": 1.03, "grad_norm": 0.5153197050094604, "learning_rate": 0.00044107477831625456, "loss": 2.9757, "step": 21068 }, { "epoch": 1.03, "grad_norm": 0.5175606608390808, "learning_rate": 0.00044106119047445324, "loss": 3.0309, "step": 21069 }, { "epoch": 1.03, "grad_norm": 0.5227696299552917, "learning_rate": 0.00044104760226112386, "loss": 3.0677, "step": 21070 }, { "epoch": 1.03, "grad_norm": 0.5260235071182251, "learning_rate": 0.0004410340136763023, "loss": 3.1779, "step": 21071 }, { "epoch": 1.03, "grad_norm": 0.567876398563385, "learning_rate": 0.0004410204247200243, "loss": 2.9599, "step": 21072 }, { "epoch": 1.03, "grad_norm": 0.5913751721382141, "learning_rate": 0.0004410068353923255, "loss": 3.0299, "step": 21073 }, { "epoch": 1.03, "grad_norm": 0.5325940847396851, "learning_rate": 0.000440993245693242, "loss": 3.0769, "step": 21074 }, { "epoch": 1.03, "grad_norm": 0.5309601426124573, "learning_rate": 0.0004409796556228094, "loss": 3.043, "step": 21075 }, { "epoch": 1.03, "grad_norm": 0.5216854810714722, "learning_rate": 0.0004409660651810635, "loss": 3.264, "step": 21076 }, { "epoch": 1.03, "grad_norm": 0.5479691624641418, "learning_rate": 0.00044095247436804006, "loss": 2.9584, "step": 21077 }, { "epoch": 1.03, "grad_norm": 0.5204125642776489, "learning_rate": 0.00044093888318377507, "loss": 3.1291, "step": 21078 }, { "epoch": 1.03, "grad_norm": 0.5596486330032349, "learning_rate": 0.00044092529162830397, "loss": 3.1667, "step": 21079 }, { "epoch": 1.03, "grad_norm": 0.5593887567520142, "learning_rate": 0.0004409116997016629, "loss": 2.9974, "step": 21080 }, { "epoch": 1.03, "grad_norm": 0.5333544611930847, "learning_rate": 0.00044089810740388755, "loss": 2.9289, "step": 21081 }, { "epoch": 1.03, "grad_norm": 0.5625095367431641, "learning_rate": 0.0004408845147350137, "loss": 3.0538, "step": 21082 }, { "epoch": 1.03, "grad_norm": 0.5155503153800964, "learning_rate": 0.0004408709216950771, "loss": 2.9528, "step": 21083 }, { "epoch": 1.03, "grad_norm": 0.5525764226913452, "learning_rate": 0.00044085732828411355, "loss": 3.1537, "step": 21084 }, { "epoch": 1.03, "grad_norm": 0.5102589726448059, "learning_rate": 0.000440843734502159, "loss": 3.3095, "step": 21085 }, { "epoch": 1.03, "grad_norm": 0.5470672845840454, "learning_rate": 0.00044083014034924917, "loss": 3.2635, "step": 21086 }, { "epoch": 1.03, "grad_norm": 0.5264706015586853, "learning_rate": 0.0004408165458254198, "loss": 3.5163, "step": 21087 }, { "epoch": 1.03, "grad_norm": 0.5716114044189453, "learning_rate": 0.00044080295093070675, "loss": 3.1289, "step": 21088 }, { "epoch": 1.03, "grad_norm": 0.54709792137146, "learning_rate": 0.0004407893556651459, "loss": 3.2208, "step": 21089 }, { "epoch": 1.03, "grad_norm": 0.6111564636230469, "learning_rate": 0.0004407757600287729, "loss": 3.1601, "step": 21090 }, { "epoch": 1.03, "grad_norm": 0.5455335974693298, "learning_rate": 0.0004407621640216237, "loss": 3.0098, "step": 21091 }, { "epoch": 1.03, "grad_norm": 0.5645591020584106, "learning_rate": 0.00044074856764373397, "loss": 3.1535, "step": 21092 }, { "epoch": 1.03, "grad_norm": 0.53360915184021, "learning_rate": 0.00044073497089513976, "loss": 3.0372, "step": 21093 }, { "epoch": 1.03, "grad_norm": 0.5450435876846313, "learning_rate": 0.00044072137377587655, "loss": 2.9299, "step": 21094 }, { "epoch": 1.03, "grad_norm": 0.5204038023948669, "learning_rate": 0.0004407077762859804, "loss": 2.9236, "step": 21095 }, { "epoch": 1.03, "grad_norm": 0.5424490571022034, "learning_rate": 0.00044069417842548705, "loss": 3.2089, "step": 21096 }, { "epoch": 1.03, "grad_norm": 0.5224910378456116, "learning_rate": 0.0004406805801944323, "loss": 3.0936, "step": 21097 }, { "epoch": 1.03, "grad_norm": 0.5337421894073486, "learning_rate": 0.00044066698159285196, "loss": 3.069, "step": 21098 }, { "epoch": 1.03, "grad_norm": 0.5657770037651062, "learning_rate": 0.00044065338262078184, "loss": 3.0047, "step": 21099 }, { "epoch": 1.03, "grad_norm": 0.5155889987945557, "learning_rate": 0.00044063978327825784, "loss": 3.1635, "step": 21100 }, { "epoch": 1.03, "grad_norm": 0.560525119304657, "learning_rate": 0.00044062618356531573, "loss": 3.1346, "step": 21101 }, { "epoch": 1.03, "grad_norm": 0.5268765687942505, "learning_rate": 0.00044061258348199124, "loss": 2.9914, "step": 21102 }, { "epoch": 1.03, "grad_norm": 0.5468574166297913, "learning_rate": 0.0004405989830283203, "loss": 3.3028, "step": 21103 }, { "epoch": 1.03, "grad_norm": 0.5861000418663025, "learning_rate": 0.00044058538220433866, "loss": 3.2922, "step": 21104 }, { "epoch": 1.03, "grad_norm": 0.5265964269638062, "learning_rate": 0.0004405717810100823, "loss": 3.2175, "step": 21105 }, { "epoch": 1.03, "grad_norm": 0.5208206176757812, "learning_rate": 0.00044055817944558683, "loss": 3.2776, "step": 21106 }, { "epoch": 1.03, "grad_norm": 0.5527623891830444, "learning_rate": 0.0004405445775108882, "loss": 3.129, "step": 21107 }, { "epoch": 1.03, "grad_norm": 0.5665170550346375, "learning_rate": 0.00044053097520602223, "loss": 2.96, "step": 21108 }, { "epoch": 1.03, "grad_norm": 0.5453583002090454, "learning_rate": 0.0004405173725310246, "loss": 3.0435, "step": 21109 }, { "epoch": 1.03, "grad_norm": 0.5542186498641968, "learning_rate": 0.0004405037694859313, "loss": 3.1687, "step": 21110 }, { "epoch": 1.03, "grad_norm": 0.5576282143592834, "learning_rate": 0.0004404901660707782, "loss": 3.0296, "step": 21111 }, { "epoch": 1.03, "grad_norm": 0.5522076487541199, "learning_rate": 0.0004404765622856009, "loss": 2.9013, "step": 21112 }, { "epoch": 1.03, "grad_norm": 0.5171756744384766, "learning_rate": 0.0004404629581304355, "loss": 3.1172, "step": 21113 }, { "epoch": 1.03, "grad_norm": 0.5567202568054199, "learning_rate": 0.00044044935360531765, "loss": 2.9008, "step": 21114 }, { "epoch": 1.03, "grad_norm": 0.5733157992362976, "learning_rate": 0.0004404357487102832, "loss": 2.8591, "step": 21115 }, { "epoch": 1.03, "grad_norm": 0.5684636831283569, "learning_rate": 0.0004404221434453681, "loss": 3.2353, "step": 21116 }, { "epoch": 1.03, "grad_norm": 0.5617053508758545, "learning_rate": 0.000440408537810608, "loss": 3.1757, "step": 21117 }, { "epoch": 1.03, "grad_norm": 0.5794061422348022, "learning_rate": 0.0004403949318060389, "loss": 3.0882, "step": 21118 }, { "epoch": 1.03, "grad_norm": 0.5320546627044678, "learning_rate": 0.00044038132543169656, "loss": 3.0382, "step": 21119 }, { "epoch": 1.04, "grad_norm": 0.5199233293533325, "learning_rate": 0.00044036771868761674, "loss": 2.9563, "step": 21120 }, { "epoch": 1.04, "grad_norm": 0.5452735424041748, "learning_rate": 0.0004403541115738354, "loss": 3.1861, "step": 21121 }, { "epoch": 1.04, "grad_norm": 0.5553098917007446, "learning_rate": 0.00044034050409038847, "loss": 3.1264, "step": 21122 }, { "epoch": 1.04, "grad_norm": 0.5302225947380066, "learning_rate": 0.0004403268962373116, "loss": 3.0457, "step": 21123 }, { "epoch": 1.04, "grad_norm": 0.5287278890609741, "learning_rate": 0.00044031328801464063, "loss": 3.0917, "step": 21124 }, { "epoch": 1.04, "grad_norm": 0.5537627935409546, "learning_rate": 0.0004402996794224115, "loss": 3.0715, "step": 21125 }, { "epoch": 1.04, "grad_norm": 0.5674768686294556, "learning_rate": 0.00044028607046066, "loss": 3.1355, "step": 21126 }, { "epoch": 1.04, "grad_norm": 0.505564272403717, "learning_rate": 0.00044027246112942204, "loss": 2.8738, "step": 21127 }, { "epoch": 1.04, "grad_norm": 0.537092924118042, "learning_rate": 0.00044025885142873336, "loss": 3.241, "step": 21128 }, { "epoch": 1.04, "grad_norm": 0.514145016670227, "learning_rate": 0.0004402452413586299, "loss": 3.0004, "step": 21129 }, { "epoch": 1.04, "grad_norm": 0.5343130230903625, "learning_rate": 0.00044023163091914735, "loss": 3.1431, "step": 21130 }, { "epoch": 1.04, "grad_norm": 0.5057582855224609, "learning_rate": 0.00044021802011032175, "loss": 3.0871, "step": 21131 }, { "epoch": 1.04, "grad_norm": 0.5197306871414185, "learning_rate": 0.0004402044089321889, "loss": 3.0718, "step": 21132 }, { "epoch": 1.04, "grad_norm": 0.594575047492981, "learning_rate": 0.00044019079738478464, "loss": 3.019, "step": 21133 }, { "epoch": 1.04, "grad_norm": 0.5036117434501648, "learning_rate": 0.00044017718546814473, "loss": 2.9993, "step": 21134 }, { "epoch": 1.04, "grad_norm": 0.5579177737236023, "learning_rate": 0.0004401635731823051, "loss": 3.3196, "step": 21135 }, { "epoch": 1.04, "grad_norm": 0.5319854021072388, "learning_rate": 0.0004401499605273016, "loss": 3.0564, "step": 21136 }, { "epoch": 1.04, "grad_norm": 0.5116011500358582, "learning_rate": 0.0004401363475031701, "loss": 3.1638, "step": 21137 }, { "epoch": 1.04, "grad_norm": 0.5688881278038025, "learning_rate": 0.00044012273410994643, "loss": 3.2267, "step": 21138 }, { "epoch": 1.04, "grad_norm": 0.5254831314086914, "learning_rate": 0.0004401091203476664, "loss": 2.9605, "step": 21139 }, { "epoch": 1.04, "grad_norm": 0.5896934270858765, "learning_rate": 0.00044009550621636597, "loss": 3.0805, "step": 21140 }, { "epoch": 1.04, "grad_norm": 0.5865271091461182, "learning_rate": 0.0004400818917160809, "loss": 3.0489, "step": 21141 }, { "epoch": 1.04, "grad_norm": 0.5466059446334839, "learning_rate": 0.0004400682768468471, "loss": 3.198, "step": 21142 }, { "epoch": 1.04, "grad_norm": 0.5474870800971985, "learning_rate": 0.00044005466160870044, "loss": 3.1467, "step": 21143 }, { "epoch": 1.04, "grad_norm": 0.5172299742698669, "learning_rate": 0.0004400410460016767, "loss": 3.1705, "step": 21144 }, { "epoch": 1.04, "grad_norm": 0.5260311365127563, "learning_rate": 0.0004400274300258118, "loss": 3.1582, "step": 21145 }, { "epoch": 1.04, "grad_norm": 0.5976084470748901, "learning_rate": 0.0004400138136811417, "loss": 3.1107, "step": 21146 }, { "epoch": 1.04, "grad_norm": 0.5335716009140015, "learning_rate": 0.0004400001969677021, "loss": 3.1361, "step": 21147 }, { "epoch": 1.04, "grad_norm": 0.5297675728797913, "learning_rate": 0.0004399865798855289, "loss": 3.0556, "step": 21148 }, { "epoch": 1.04, "grad_norm": 0.5369957685470581, "learning_rate": 0.0004399729624346581, "loss": 3.162, "step": 21149 }, { "epoch": 1.04, "grad_norm": 0.5566016435623169, "learning_rate": 0.00043995934461512525, "loss": 3.0682, "step": 21150 }, { "epoch": 1.04, "grad_norm": 0.5417798757553101, "learning_rate": 0.00043994572642696656, "loss": 3.1494, "step": 21151 }, { "epoch": 1.04, "grad_norm": 0.5253806114196777, "learning_rate": 0.0004399321078702177, "loss": 2.9937, "step": 21152 }, { "epoch": 1.04, "grad_norm": 0.5396881699562073, "learning_rate": 0.00043991848894491464, "loss": 2.9225, "step": 21153 }, { "epoch": 1.04, "grad_norm": 0.5461945533752441, "learning_rate": 0.0004399048696510932, "loss": 3.1803, "step": 21154 }, { "epoch": 1.04, "grad_norm": 0.5187393426895142, "learning_rate": 0.0004398912499887893, "loss": 3.0976, "step": 21155 }, { "epoch": 1.04, "grad_norm": 0.5625729560852051, "learning_rate": 0.00043987762995803867, "loss": 2.9497, "step": 21156 }, { "epoch": 1.04, "grad_norm": 0.5398131012916565, "learning_rate": 0.00043986400955887733, "loss": 3.0063, "step": 21157 }, { "epoch": 1.04, "grad_norm": 0.5377815961837769, "learning_rate": 0.0004398503887913411, "loss": 3.0048, "step": 21158 }, { "epoch": 1.04, "grad_norm": 0.5535577535629272, "learning_rate": 0.0004398367676554659, "loss": 3.0225, "step": 21159 }, { "epoch": 1.04, "grad_norm": 0.5389450192451477, "learning_rate": 0.0004398231461512875, "loss": 3.0952, "step": 21160 }, { "epoch": 1.04, "grad_norm": 0.5288699865341187, "learning_rate": 0.0004398095242788419, "loss": 3.0028, "step": 21161 }, { "epoch": 1.04, "grad_norm": 0.5519115924835205, "learning_rate": 0.00043979590203816496, "loss": 3.0892, "step": 21162 }, { "epoch": 1.04, "grad_norm": 0.5267183780670166, "learning_rate": 0.0004397822794292924, "loss": 3.1845, "step": 21163 }, { "epoch": 1.04, "grad_norm": 0.5286177396774292, "learning_rate": 0.0004397686564522603, "loss": 3.182, "step": 21164 }, { "epoch": 1.04, "grad_norm": 0.5310975909233093, "learning_rate": 0.00043975503310710436, "loss": 3.0952, "step": 21165 }, { "epoch": 1.04, "grad_norm": 0.5732306838035583, "learning_rate": 0.0004397414093938607, "loss": 3.0944, "step": 21166 }, { "epoch": 1.04, "grad_norm": 0.5607469081878662, "learning_rate": 0.000439727785312565, "loss": 2.9405, "step": 21167 }, { "epoch": 1.04, "grad_norm": 0.5709458589553833, "learning_rate": 0.0004397141608632532, "loss": 3.1021, "step": 21168 }, { "epoch": 1.04, "grad_norm": 0.5287217497825623, "learning_rate": 0.00043970053604596126, "loss": 3.0768, "step": 21169 }, { "epoch": 1.04, "grad_norm": 0.5781598091125488, "learning_rate": 0.0004396869108607249, "loss": 3.214, "step": 21170 }, { "epoch": 1.04, "grad_norm": 0.5571904182434082, "learning_rate": 0.00043967328530758004, "loss": 3.0581, "step": 21171 }, { "epoch": 1.04, "grad_norm": 0.5215210914611816, "learning_rate": 0.0004396596593865627, "loss": 2.9378, "step": 21172 }, { "epoch": 1.04, "grad_norm": 0.6075809597969055, "learning_rate": 0.0004396460330977088, "loss": 2.9941, "step": 21173 }, { "epoch": 1.04, "grad_norm": 0.5475901365280151, "learning_rate": 0.000439632406441054, "loss": 3.277, "step": 21174 }, { "epoch": 1.04, "grad_norm": 0.5757737159729004, "learning_rate": 0.00043961877941663433, "loss": 3.2825, "step": 21175 }, { "epoch": 1.04, "grad_norm": 0.5772517919540405, "learning_rate": 0.0004396051520244857, "loss": 3.1746, "step": 21176 }, { "epoch": 1.04, "grad_norm": 0.5674006938934326, "learning_rate": 0.00043959152426464393, "loss": 3.1398, "step": 21177 }, { "epoch": 1.04, "grad_norm": 0.5191792845726013, "learning_rate": 0.00043957789613714493, "loss": 3.1744, "step": 21178 }, { "epoch": 1.04, "grad_norm": 0.5092511177062988, "learning_rate": 0.0004395642676420247, "loss": 3.0467, "step": 21179 }, { "epoch": 1.04, "grad_norm": 0.5251277089118958, "learning_rate": 0.000439550638779319, "loss": 2.8873, "step": 21180 }, { "epoch": 1.04, "grad_norm": 0.5171329975128174, "learning_rate": 0.0004395370095490637, "loss": 2.8804, "step": 21181 }, { "epoch": 1.04, "grad_norm": 0.5684592127799988, "learning_rate": 0.0004395233799512949, "loss": 3.1189, "step": 21182 }, { "epoch": 1.04, "grad_norm": 0.5765049457550049, "learning_rate": 0.0004395097499860483, "loss": 3.0344, "step": 21183 }, { "epoch": 1.04, "grad_norm": 0.5399144291877747, "learning_rate": 0.0004394961196533599, "loss": 3.1513, "step": 21184 }, { "epoch": 1.04, "grad_norm": 0.5205578207969666, "learning_rate": 0.00043948248895326556, "loss": 3.0598, "step": 21185 }, { "epoch": 1.04, "grad_norm": 0.5342963933944702, "learning_rate": 0.0004394688578858011, "loss": 3.099, "step": 21186 }, { "epoch": 1.04, "grad_norm": 0.5252129435539246, "learning_rate": 0.00043945522645100264, "loss": 3.1557, "step": 21187 }, { "epoch": 1.04, "grad_norm": 0.5322774052619934, "learning_rate": 0.0004394415946489059, "loss": 3.1597, "step": 21188 }, { "epoch": 1.04, "grad_norm": 0.5448768138885498, "learning_rate": 0.00043942796247954685, "loss": 3.07, "step": 21189 }, { "epoch": 1.04, "grad_norm": 0.5391474962234497, "learning_rate": 0.00043941432994296136, "loss": 3.2065, "step": 21190 }, { "epoch": 1.04, "grad_norm": 0.5174856185913086, "learning_rate": 0.0004394006970391854, "loss": 3.3146, "step": 21191 }, { "epoch": 1.04, "grad_norm": 0.5522015690803528, "learning_rate": 0.00043938706376825474, "loss": 2.9034, "step": 21192 }, { "epoch": 1.04, "grad_norm": 0.5845143795013428, "learning_rate": 0.0004393734301302054, "loss": 3.2134, "step": 21193 }, { "epoch": 1.04, "grad_norm": 0.5457573533058167, "learning_rate": 0.0004393597961250733, "loss": 2.9945, "step": 21194 }, { "epoch": 1.04, "grad_norm": 0.5486933588981628, "learning_rate": 0.0004393461617528944, "loss": 3.1322, "step": 21195 }, { "epoch": 1.04, "grad_norm": 0.5403074622154236, "learning_rate": 0.00043933252701370445, "loss": 3.1812, "step": 21196 }, { "epoch": 1.04, "grad_norm": 0.5348062515258789, "learning_rate": 0.00043931889190753937, "loss": 3.1889, "step": 21197 }, { "epoch": 1.04, "grad_norm": 0.5630962252616882, "learning_rate": 0.00043930525643443523, "loss": 3.1408, "step": 21198 }, { "epoch": 1.04, "grad_norm": 0.5177236795425415, "learning_rate": 0.00043929162059442786, "loss": 3.0108, "step": 21199 }, { "epoch": 1.04, "grad_norm": 0.6250644326210022, "learning_rate": 0.00043927798438755315, "loss": 2.906, "step": 21200 }, { "epoch": 1.04, "grad_norm": 0.596113920211792, "learning_rate": 0.00043926434781384703, "loss": 3.0855, "step": 21201 }, { "epoch": 1.04, "grad_norm": 0.5471876859664917, "learning_rate": 0.0004392507108733454, "loss": 2.9947, "step": 21202 }, { "epoch": 1.04, "grad_norm": 0.529731273651123, "learning_rate": 0.00043923707356608414, "loss": 3.0602, "step": 21203 }, { "epoch": 1.04, "grad_norm": 0.5773414969444275, "learning_rate": 0.0004392234358920993, "loss": 3.1828, "step": 21204 }, { "epoch": 1.04, "grad_norm": 0.5588471293449402, "learning_rate": 0.0004392097978514267, "loss": 3.2139, "step": 21205 }, { "epoch": 1.04, "grad_norm": 0.5582773089408875, "learning_rate": 0.0004391961594441024, "loss": 3.0678, "step": 21206 }, { "epoch": 1.04, "grad_norm": 0.5097718834877014, "learning_rate": 0.000439182520670162, "loss": 3.1309, "step": 21207 }, { "epoch": 1.04, "grad_norm": 0.6590524911880493, "learning_rate": 0.00043916888152964174, "loss": 2.9753, "step": 21208 }, { "epoch": 1.04, "grad_norm": 0.545197606086731, "learning_rate": 0.0004391552420225775, "loss": 3.1811, "step": 21209 }, { "epoch": 1.04, "grad_norm": 0.557826042175293, "learning_rate": 0.000439141602149005, "loss": 3.1237, "step": 21210 }, { "epoch": 1.04, "grad_norm": 0.5410374999046326, "learning_rate": 0.0004391279619089604, "loss": 3.0101, "step": 21211 }, { "epoch": 1.04, "grad_norm": 0.5548087358474731, "learning_rate": 0.0004391143213024794, "loss": 3.0225, "step": 21212 }, { "epoch": 1.04, "grad_norm": 0.5963765382766724, "learning_rate": 0.0004391006803295982, "loss": 3.2314, "step": 21213 }, { "epoch": 1.04, "grad_norm": 0.5618867874145508, "learning_rate": 0.0004390870389903525, "loss": 3.0364, "step": 21214 }, { "epoch": 1.04, "grad_norm": 0.5385682582855225, "learning_rate": 0.0004390733972847783, "loss": 3.1931, "step": 21215 }, { "epoch": 1.04, "grad_norm": 0.5348148941993713, "learning_rate": 0.0004390597552129116, "loss": 3.0025, "step": 21216 }, { "epoch": 1.04, "grad_norm": 0.5246903896331787, "learning_rate": 0.00043904611277478816, "loss": 3.2069, "step": 21217 }, { "epoch": 1.04, "grad_norm": 0.5948673486709595, "learning_rate": 0.0004390324699704441, "loss": 2.9454, "step": 21218 }, { "epoch": 1.04, "grad_norm": 0.5328468084335327, "learning_rate": 0.0004390188267999152, "loss": 3.3611, "step": 21219 }, { "epoch": 1.04, "grad_norm": 0.5216394066810608, "learning_rate": 0.0004390051832632376, "loss": 3.0009, "step": 21220 }, { "epoch": 1.04, "grad_norm": 0.7438657283782959, "learning_rate": 0.000438991539360447, "loss": 3.1814, "step": 21221 }, { "epoch": 1.04, "grad_norm": 0.5399791598320007, "learning_rate": 0.0004389778950915794, "loss": 2.9652, "step": 21222 }, { "epoch": 1.04, "grad_norm": 0.6201183795928955, "learning_rate": 0.00043896425045667083, "loss": 3.1625, "step": 21223 }, { "epoch": 1.04, "grad_norm": 0.6501622200012207, "learning_rate": 0.00043895060545575716, "loss": 2.9417, "step": 21224 }, { "epoch": 1.04, "grad_norm": 0.5394759178161621, "learning_rate": 0.00043893696008887436, "loss": 3.2379, "step": 21225 }, { "epoch": 1.04, "grad_norm": 0.5677136182785034, "learning_rate": 0.0004389233143560583, "loss": 3.1612, "step": 21226 }, { "epoch": 1.04, "grad_norm": 0.5787396430969238, "learning_rate": 0.00043890966825734504, "loss": 3.1594, "step": 21227 }, { "epoch": 1.04, "grad_norm": 0.5350275635719299, "learning_rate": 0.00043889602179277037, "loss": 3.0324, "step": 21228 }, { "epoch": 1.04, "grad_norm": 0.5961716175079346, "learning_rate": 0.0004388823749623704, "loss": 3.1801, "step": 21229 }, { "epoch": 1.04, "grad_norm": 0.5254611372947693, "learning_rate": 0.00043886872776618084, "loss": 2.9515, "step": 21230 }, { "epoch": 1.04, "grad_norm": 0.5171568393707275, "learning_rate": 0.0004388550802042379, "loss": 2.9371, "step": 21231 }, { "epoch": 1.04, "grad_norm": 0.5672535300254822, "learning_rate": 0.0004388414322765773, "loss": 2.9656, "step": 21232 }, { "epoch": 1.04, "grad_norm": 0.551892101764679, "learning_rate": 0.0004388277839832352, "loss": 3.0332, "step": 21233 }, { "epoch": 1.04, "grad_norm": 0.571571946144104, "learning_rate": 0.00043881413532424735, "loss": 3.3032, "step": 21234 }, { "epoch": 1.04, "grad_norm": 0.5441715717315674, "learning_rate": 0.0004388004862996499, "loss": 3.0702, "step": 21235 }, { "epoch": 1.04, "grad_norm": 0.5653895735740662, "learning_rate": 0.00043878683690947855, "loss": 3.2735, "step": 21236 }, { "epoch": 1.04, "grad_norm": 0.548560380935669, "learning_rate": 0.00043877318715376937, "loss": 3.2087, "step": 21237 }, { "epoch": 1.04, "grad_norm": 0.5536999702453613, "learning_rate": 0.00043875953703255844, "loss": 3.2161, "step": 21238 }, { "epoch": 1.04, "grad_norm": 0.5580829977989197, "learning_rate": 0.00043874588654588145, "loss": 3.2814, "step": 21239 }, { "epoch": 1.04, "grad_norm": 0.5434279441833496, "learning_rate": 0.00043873223569377456, "loss": 3.1529, "step": 21240 }, { "epoch": 1.04, "grad_norm": 0.5689913630485535, "learning_rate": 0.00043871858447627375, "loss": 3.1142, "step": 21241 }, { "epoch": 1.04, "grad_norm": 0.554728090763092, "learning_rate": 0.0004387049328934148, "loss": 3.0535, "step": 21242 }, { "epoch": 1.04, "grad_norm": 0.5019456148147583, "learning_rate": 0.00043869128094523377, "loss": 3.2623, "step": 21243 }, { "epoch": 1.04, "grad_norm": 0.5831273794174194, "learning_rate": 0.00043867762863176654, "loss": 3.2874, "step": 21244 }, { "epoch": 1.04, "grad_norm": 0.5366065502166748, "learning_rate": 0.0004386639759530492, "loss": 2.943, "step": 21245 }, { "epoch": 1.04, "grad_norm": 0.5083133578300476, "learning_rate": 0.00043865032290911764, "loss": 3.0425, "step": 21246 }, { "epoch": 1.04, "grad_norm": 0.5319649577140808, "learning_rate": 0.0004386366695000078, "loss": 3.1767, "step": 21247 }, { "epoch": 1.04, "grad_norm": 0.5332556366920471, "learning_rate": 0.0004386230157257556, "loss": 3.1384, "step": 21248 }, { "epoch": 1.04, "grad_norm": 0.511458694934845, "learning_rate": 0.0004386093615863972, "loss": 3.0138, "step": 21249 }, { "epoch": 1.04, "grad_norm": 0.5145825147628784, "learning_rate": 0.0004385957070819683, "loss": 3.0111, "step": 21250 }, { "epoch": 1.04, "grad_norm": 0.5127465128898621, "learning_rate": 0.00043858205221250496, "loss": 2.944, "step": 21251 }, { "epoch": 1.04, "grad_norm": 0.5706564784049988, "learning_rate": 0.0004385683969780432, "loss": 3.2836, "step": 21252 }, { "epoch": 1.04, "grad_norm": 0.5455026030540466, "learning_rate": 0.00043855474137861894, "loss": 3.0848, "step": 21253 }, { "epoch": 1.04, "grad_norm": 0.5656867623329163, "learning_rate": 0.00043854108541426813, "loss": 3.077, "step": 21254 }, { "epoch": 1.04, "grad_norm": 0.5728113651275635, "learning_rate": 0.00043852742908502676, "loss": 3.1265, "step": 21255 }, { "epoch": 1.04, "grad_norm": 0.5125572085380554, "learning_rate": 0.0004385137723909309, "loss": 3.0643, "step": 21256 }, { "epoch": 1.04, "grad_norm": 0.5141593217849731, "learning_rate": 0.00043850011533201643, "loss": 2.931, "step": 21257 }, { "epoch": 1.04, "grad_norm": 0.5243428945541382, "learning_rate": 0.0004384864579083192, "loss": 2.8332, "step": 21258 }, { "epoch": 1.04, "grad_norm": 0.5448452234268188, "learning_rate": 0.0004384728001198753, "loss": 3.0665, "step": 21259 }, { "epoch": 1.04, "grad_norm": 0.5277436375617981, "learning_rate": 0.0004384591419667208, "loss": 2.9827, "step": 21260 }, { "epoch": 1.04, "grad_norm": 0.5299006700515747, "learning_rate": 0.00043844548344889147, "loss": 2.9862, "step": 21261 }, { "epoch": 1.04, "grad_norm": 0.5536235570907593, "learning_rate": 0.00043843182456642336, "loss": 3.3202, "step": 21262 }, { "epoch": 1.04, "grad_norm": 0.5166927576065063, "learning_rate": 0.0004384181653193525, "loss": 3.0759, "step": 21263 }, { "epoch": 1.04, "grad_norm": 0.5285334587097168, "learning_rate": 0.0004384045057077149, "loss": 3.2137, "step": 21264 }, { "epoch": 1.04, "grad_norm": 0.5186501145362854, "learning_rate": 0.00043839084573154635, "loss": 2.869, "step": 21265 }, { "epoch": 1.04, "grad_norm": 0.49396657943725586, "learning_rate": 0.0004383771853908831, "loss": 2.9673, "step": 21266 }, { "epoch": 1.04, "grad_norm": 0.559752345085144, "learning_rate": 0.0004383635246857608, "loss": 3.2458, "step": 21267 }, { "epoch": 1.04, "grad_norm": 0.5300299525260925, "learning_rate": 0.0004383498636162157, "loss": 3.0761, "step": 21268 }, { "epoch": 1.04, "grad_norm": 0.5181698203086853, "learning_rate": 0.0004383362021822837, "loss": 3.0975, "step": 21269 }, { "epoch": 1.04, "grad_norm": 0.5253779888153076, "learning_rate": 0.0004383225403840007, "loss": 3.0754, "step": 21270 }, { "epoch": 1.04, "grad_norm": 0.519812822341919, "learning_rate": 0.0004383088782214029, "loss": 3.1472, "step": 21271 }, { "epoch": 1.04, "grad_norm": 0.5578188300132751, "learning_rate": 0.000438295215694526, "loss": 3.1391, "step": 21272 }, { "epoch": 1.04, "grad_norm": 0.5534501075744629, "learning_rate": 0.00043828155280340613, "loss": 2.9352, "step": 21273 }, { "epoch": 1.04, "grad_norm": 0.5486505031585693, "learning_rate": 0.00043826788954807924, "loss": 3.2344, "step": 21274 }, { "epoch": 1.04, "grad_norm": 0.5528342127799988, "learning_rate": 0.0004382542259285814, "loss": 3.014, "step": 21275 }, { "epoch": 1.04, "grad_norm": 0.5291114449501038, "learning_rate": 0.0004382405619449485, "loss": 2.9962, "step": 21276 }, { "epoch": 1.04, "grad_norm": 0.5442724227905273, "learning_rate": 0.0004382268975972166, "loss": 2.9954, "step": 21277 }, { "epoch": 1.04, "grad_norm": 0.5385951995849609, "learning_rate": 0.00043821323288542163, "loss": 3.3394, "step": 21278 }, { "epoch": 1.04, "grad_norm": 0.5655089616775513, "learning_rate": 0.00043819956780959953, "loss": 3.0762, "step": 21279 }, { "epoch": 1.04, "grad_norm": 0.5629735589027405, "learning_rate": 0.00043818590236978643, "loss": 3.1988, "step": 21280 }, { "epoch": 1.04, "grad_norm": 0.5002853274345398, "learning_rate": 0.0004381722365660183, "loss": 3.2544, "step": 21281 }, { "epoch": 1.04, "grad_norm": 0.6046683192253113, "learning_rate": 0.00043815857039833107, "loss": 3.0964, "step": 21282 }, { "epoch": 1.04, "grad_norm": 0.5325875282287598, "learning_rate": 0.0004381449038667608, "loss": 3.0506, "step": 21283 }, { "epoch": 1.04, "grad_norm": 0.568646252155304, "learning_rate": 0.0004381312369713433, "loss": 3.3053, "step": 21284 }, { "epoch": 1.04, "grad_norm": 0.5482074022293091, "learning_rate": 0.0004381175697121148, "loss": 3.158, "step": 21285 }, { "epoch": 1.04, "grad_norm": 0.5522348284721375, "learning_rate": 0.0004381039020891112, "loss": 3.0225, "step": 21286 }, { "epoch": 1.04, "grad_norm": 0.5628721714019775, "learning_rate": 0.0004380902341023685, "loss": 3.2838, "step": 21287 }, { "epoch": 1.04, "grad_norm": 0.5041287541389465, "learning_rate": 0.0004380765657519227, "loss": 3.1902, "step": 21288 }, { "epoch": 1.04, "grad_norm": 0.5522623062133789, "learning_rate": 0.0004380628970378098, "loss": 3.1488, "step": 21289 }, { "epoch": 1.04, "grad_norm": 0.5240798592567444, "learning_rate": 0.0004380492279600658, "loss": 2.9737, "step": 21290 }, { "epoch": 1.04, "grad_norm": 0.532329261302948, "learning_rate": 0.00043803555851872663, "loss": 3.1439, "step": 21291 }, { "epoch": 1.04, "grad_norm": 0.5527257919311523, "learning_rate": 0.0004380218887138284, "loss": 3.1124, "step": 21292 }, { "epoch": 1.04, "grad_norm": 0.5227271318435669, "learning_rate": 0.0004380082185454072, "loss": 3.1057, "step": 21293 }, { "epoch": 1.04, "grad_norm": 0.5554690957069397, "learning_rate": 0.00043799454801349876, "loss": 3.0577, "step": 21294 }, { "epoch": 1.04, "grad_norm": 0.5670045614242554, "learning_rate": 0.0004379808771181393, "loss": 3.0609, "step": 21295 }, { "epoch": 1.04, "grad_norm": 0.5955886840820312, "learning_rate": 0.00043796720585936475, "loss": 3.13, "step": 21296 }, { "epoch": 1.04, "grad_norm": 0.546093761920929, "learning_rate": 0.0004379535342372112, "loss": 3.1067, "step": 21297 }, { "epoch": 1.04, "grad_norm": 0.5156376361846924, "learning_rate": 0.0004379398622517145, "loss": 3.1343, "step": 21298 }, { "epoch": 1.04, "grad_norm": 0.5549337267875671, "learning_rate": 0.0004379261899029107, "loss": 3.1189, "step": 21299 }, { "epoch": 1.04, "grad_norm": 0.5297946929931641, "learning_rate": 0.000437912517190836, "loss": 2.8991, "step": 21300 }, { "epoch": 1.04, "grad_norm": 0.566015362739563, "learning_rate": 0.0004378988441155262, "loss": 2.8466, "step": 21301 }, { "epoch": 1.04, "grad_norm": 0.5280096530914307, "learning_rate": 0.0004378851706770174, "loss": 3.1138, "step": 21302 }, { "epoch": 1.04, "grad_norm": 0.5393192768096924, "learning_rate": 0.00043787149687534555, "loss": 3.0668, "step": 21303 }, { "epoch": 1.04, "grad_norm": 0.5300957560539246, "learning_rate": 0.00043785782271054676, "loss": 3.1409, "step": 21304 }, { "epoch": 1.04, "grad_norm": 0.5181828737258911, "learning_rate": 0.000437844148182657, "loss": 3.191, "step": 21305 }, { "epoch": 1.04, "grad_norm": 0.5715096592903137, "learning_rate": 0.00043783047329171225, "loss": 3.0793, "step": 21306 }, { "epoch": 1.04, "grad_norm": 0.5914278626441956, "learning_rate": 0.0004378167980377486, "loss": 3.0991, "step": 21307 }, { "epoch": 1.04, "grad_norm": 0.520348310470581, "learning_rate": 0.0004378031224208021, "loss": 2.8398, "step": 21308 }, { "epoch": 1.04, "grad_norm": 0.5296949744224548, "learning_rate": 0.00043778944644090855, "loss": 3.1946, "step": 21309 }, { "epoch": 1.04, "grad_norm": 0.5429492592811584, "learning_rate": 0.00043777577009810416, "loss": 3.0502, "step": 21310 }, { "epoch": 1.04, "grad_norm": 0.524493396282196, "learning_rate": 0.00043776209339242494, "loss": 2.9544, "step": 21311 }, { "epoch": 1.04, "grad_norm": 0.5563609004020691, "learning_rate": 0.00043774841632390684, "loss": 3.0386, "step": 21312 }, { "epoch": 1.04, "grad_norm": 0.5135151147842407, "learning_rate": 0.0004377347388925859, "loss": 3.131, "step": 21313 }, { "epoch": 1.04, "grad_norm": 0.5470807552337646, "learning_rate": 0.00043772106109849824, "loss": 3.1214, "step": 21314 }, { "epoch": 1.04, "grad_norm": 0.5473372340202332, "learning_rate": 0.0004377073829416798, "loss": 3.019, "step": 21315 }, { "epoch": 1.04, "grad_norm": 0.5544005632400513, "learning_rate": 0.00043769370442216655, "loss": 3.2527, "step": 21316 }, { "epoch": 1.04, "grad_norm": 0.5615202188491821, "learning_rate": 0.0004376800255399946, "loss": 3.1916, "step": 21317 }, { "epoch": 1.04, "grad_norm": 0.5153510570526123, "learning_rate": 0.0004376663462952, "loss": 3.1229, "step": 21318 }, { "epoch": 1.04, "grad_norm": 0.5381267666816711, "learning_rate": 0.00043765266668781874, "loss": 3.0508, "step": 21319 }, { "epoch": 1.04, "grad_norm": 0.5629384517669678, "learning_rate": 0.00043763898671788677, "loss": 2.9926, "step": 21320 }, { "epoch": 1.04, "grad_norm": 0.5090012550354004, "learning_rate": 0.00043762530638544026, "loss": 3.157, "step": 21321 }, { "epoch": 1.04, "grad_norm": 0.5678680539131165, "learning_rate": 0.00043761162569051526, "loss": 2.9729, "step": 21322 }, { "epoch": 1.04, "grad_norm": 0.5650219917297363, "learning_rate": 0.0004375979446331476, "loss": 3.3654, "step": 21323 }, { "epoch": 1.05, "grad_norm": 0.5620492100715637, "learning_rate": 0.00043758426321337355, "loss": 3.1908, "step": 21324 }, { "epoch": 1.05, "grad_norm": 0.5956307053565979, "learning_rate": 0.0004375705814312288, "loss": 2.8768, "step": 21325 }, { "epoch": 1.05, "grad_norm": 0.5379623174667358, "learning_rate": 0.00043755689928674985, "loss": 3.0727, "step": 21326 }, { "epoch": 1.05, "grad_norm": 0.5664665699005127, "learning_rate": 0.00043754321677997237, "loss": 2.7609, "step": 21327 }, { "epoch": 1.05, "grad_norm": 0.527374267578125, "learning_rate": 0.0004375295339109326, "loss": 3.1016, "step": 21328 }, { "epoch": 1.05, "grad_norm": 0.5893529653549194, "learning_rate": 0.0004375158506796665, "loss": 3.1118, "step": 21329 }, { "epoch": 1.05, "grad_norm": 0.5682183504104614, "learning_rate": 0.0004375021670862101, "loss": 3.1754, "step": 21330 }, { "epoch": 1.05, "grad_norm": 0.5709665417671204, "learning_rate": 0.0004374884831305995, "loss": 3.0515, "step": 21331 }, { "epoch": 1.05, "grad_norm": 0.5187592506408691, "learning_rate": 0.00043747479881287054, "loss": 3.2883, "step": 21332 }, { "epoch": 1.05, "grad_norm": 0.5648815035820007, "learning_rate": 0.0004374611141330596, "loss": 3.1431, "step": 21333 }, { "epoch": 1.05, "grad_norm": 0.500318169593811, "learning_rate": 0.0004374474290912025, "loss": 3.0056, "step": 21334 }, { "epoch": 1.05, "grad_norm": 0.5123018622398376, "learning_rate": 0.00043743374368733517, "loss": 3.0267, "step": 21335 }, { "epoch": 1.05, "grad_norm": 0.5368598103523254, "learning_rate": 0.00043742005792149393, "loss": 3.1508, "step": 21336 }, { "epoch": 1.05, "grad_norm": 0.5632544159889221, "learning_rate": 0.00043740637179371473, "loss": 3.1209, "step": 21337 }, { "epoch": 1.05, "grad_norm": 0.5279106497764587, "learning_rate": 0.0004373926853040336, "loss": 3.3057, "step": 21338 }, { "epoch": 1.05, "grad_norm": 0.5026139616966248, "learning_rate": 0.0004373789984524865, "loss": 3.1144, "step": 21339 }, { "epoch": 1.05, "grad_norm": 0.5417594909667969, "learning_rate": 0.0004373653112391096, "loss": 3.1992, "step": 21340 }, { "epoch": 1.05, "grad_norm": 0.5107925534248352, "learning_rate": 0.0004373516236639389, "loss": 3.1297, "step": 21341 }, { "epoch": 1.05, "grad_norm": 0.532360315322876, "learning_rate": 0.00043733793572701046, "loss": 3.104, "step": 21342 }, { "epoch": 1.05, "grad_norm": 0.5431941151618958, "learning_rate": 0.0004373242474283603, "loss": 3.0766, "step": 21343 }, { "epoch": 1.05, "grad_norm": 0.5421182513237, "learning_rate": 0.00043731055876802463, "loss": 3.2512, "step": 21344 }, { "epoch": 1.05, "grad_norm": 0.5257422924041748, "learning_rate": 0.0004372968697460392, "loss": 3.1045, "step": 21345 }, { "epoch": 1.05, "grad_norm": 0.4982752203941345, "learning_rate": 0.0004372831803624403, "loss": 3.182, "step": 21346 }, { "epoch": 1.05, "grad_norm": 0.6206594705581665, "learning_rate": 0.000437269490617264, "loss": 3.0784, "step": 21347 }, { "epoch": 1.05, "grad_norm": 0.5198937058448792, "learning_rate": 0.00043725580051054625, "loss": 3.357, "step": 21348 }, { "epoch": 1.05, "grad_norm": 0.5259478092193604, "learning_rate": 0.00043724211004232303, "loss": 3.0271, "step": 21349 }, { "epoch": 1.05, "grad_norm": 0.523259699344635, "learning_rate": 0.00043722841921263055, "loss": 3.1932, "step": 21350 }, { "epoch": 1.05, "grad_norm": 0.5149825215339661, "learning_rate": 0.0004372147280215049, "loss": 3.171, "step": 21351 }, { "epoch": 1.05, "grad_norm": 0.5463266968727112, "learning_rate": 0.00043720103646898204, "loss": 2.9304, "step": 21352 }, { "epoch": 1.05, "grad_norm": 0.5451768636703491, "learning_rate": 0.000437187344555098, "loss": 3.0214, "step": 21353 }, { "epoch": 1.05, "grad_norm": 0.5087425112724304, "learning_rate": 0.0004371736522798889, "loss": 2.9616, "step": 21354 }, { "epoch": 1.05, "grad_norm": 0.5477582216262817, "learning_rate": 0.0004371599596433909, "loss": 3.0898, "step": 21355 }, { "epoch": 1.05, "grad_norm": 0.5459511280059814, "learning_rate": 0.0004371462666456398, "loss": 3.2466, "step": 21356 }, { "epoch": 1.05, "grad_norm": 0.5397904515266418, "learning_rate": 0.00043713257328667195, "loss": 2.9689, "step": 21357 }, { "epoch": 1.05, "grad_norm": 0.528396487236023, "learning_rate": 0.00043711887956652325, "loss": 2.9729, "step": 21358 }, { "epoch": 1.05, "grad_norm": 0.5352534651756287, "learning_rate": 0.00043710518548522985, "loss": 3.2941, "step": 21359 }, { "epoch": 1.05, "grad_norm": 0.529255747795105, "learning_rate": 0.0004370914910428278, "loss": 3.1262, "step": 21360 }, { "epoch": 1.05, "grad_norm": 0.5711490511894226, "learning_rate": 0.000437077796239353, "loss": 2.9798, "step": 21361 }, { "epoch": 1.05, "grad_norm": 0.5521866083145142, "learning_rate": 0.00043706410107484183, "loss": 3.0056, "step": 21362 }, { "epoch": 1.05, "grad_norm": 0.5383753776550293, "learning_rate": 0.0004370504055493301, "loss": 2.9838, "step": 21363 }, { "epoch": 1.05, "grad_norm": 0.5527660846710205, "learning_rate": 0.000437036709662854, "loss": 3.1455, "step": 21364 }, { "epoch": 1.05, "grad_norm": 0.5740596652030945, "learning_rate": 0.0004370230134154496, "loss": 3.1023, "step": 21365 }, { "epoch": 1.05, "grad_norm": 0.5765985250473022, "learning_rate": 0.0004370093168071529, "loss": 2.9812, "step": 21366 }, { "epoch": 1.05, "grad_norm": 0.5971035957336426, "learning_rate": 0.00043699561983800007, "loss": 3.1561, "step": 21367 }, { "epoch": 1.05, "grad_norm": 0.5382588505744934, "learning_rate": 0.0004369819225080272, "loss": 3.1575, "step": 21368 }, { "epoch": 1.05, "grad_norm": 0.5307689905166626, "learning_rate": 0.0004369682248172702, "loss": 3.0418, "step": 21369 }, { "epoch": 1.05, "grad_norm": 0.551903486251831, "learning_rate": 0.00043695452676576537, "loss": 3.1686, "step": 21370 }, { "epoch": 1.05, "grad_norm": 0.5409027934074402, "learning_rate": 0.00043694082835354854, "loss": 2.9421, "step": 21371 }, { "epoch": 1.05, "grad_norm": 0.5229660868644714, "learning_rate": 0.000436927129580656, "loss": 2.9775, "step": 21372 }, { "epoch": 1.05, "grad_norm": 0.5019228458404541, "learning_rate": 0.0004369134304471238, "loss": 3.0205, "step": 21373 }, { "epoch": 1.05, "grad_norm": 0.5376384854316711, "learning_rate": 0.0004368997309529879, "loss": 3.1344, "step": 21374 }, { "epoch": 1.05, "grad_norm": 0.5281094908714294, "learning_rate": 0.0004368860310982845, "loss": 2.9118, "step": 21375 }, { "epoch": 1.05, "grad_norm": 0.5503444075584412, "learning_rate": 0.0004368723308830495, "loss": 2.9234, "step": 21376 }, { "epoch": 1.05, "grad_norm": 0.5356557369232178, "learning_rate": 0.00043685863030731934, "loss": 3.2536, "step": 21377 }, { "epoch": 1.05, "grad_norm": 0.5426025390625, "learning_rate": 0.0004368449293711298, "loss": 3.1201, "step": 21378 }, { "epoch": 1.05, "grad_norm": 0.5253592133522034, "learning_rate": 0.00043683122807451695, "loss": 3.1486, "step": 21379 }, { "epoch": 1.05, "grad_norm": 0.5157790184020996, "learning_rate": 0.0004368175264175171, "loss": 2.9994, "step": 21380 }, { "epoch": 1.05, "grad_norm": 0.5356464385986328, "learning_rate": 0.0004368038244001661, "loss": 2.9935, "step": 21381 }, { "epoch": 1.05, "grad_norm": 0.5198023915290833, "learning_rate": 0.00043679012202250023, "loss": 2.9333, "step": 21382 }, { "epoch": 1.05, "grad_norm": 0.6020377278327942, "learning_rate": 0.00043677641928455553, "loss": 3.1164, "step": 21383 }, { "epoch": 1.05, "grad_norm": 0.540126383304596, "learning_rate": 0.0004367627161863681, "loss": 3.0763, "step": 21384 }, { "epoch": 1.05, "grad_norm": 0.5544701814651489, "learning_rate": 0.00043674901272797395, "loss": 2.9656, "step": 21385 }, { "epoch": 1.05, "grad_norm": 0.57041335105896, "learning_rate": 0.0004367353089094092, "loss": 3.096, "step": 21386 }, { "epoch": 1.05, "grad_norm": 0.5330620408058167, "learning_rate": 0.0004367216047307099, "loss": 3.0942, "step": 21387 }, { "epoch": 1.05, "grad_norm": 0.5197447538375854, "learning_rate": 0.0004367079001919124, "loss": 2.9982, "step": 21388 }, { "epoch": 1.05, "grad_norm": 0.5680919885635376, "learning_rate": 0.0004366941952930524, "loss": 3.1648, "step": 21389 }, { "epoch": 1.05, "grad_norm": 0.6198605895042419, "learning_rate": 0.0004366804900341663, "loss": 3.0439, "step": 21390 }, { "epoch": 1.05, "grad_norm": 0.5180275440216064, "learning_rate": 0.00043666678441529004, "loss": 2.7934, "step": 21391 }, { "epoch": 1.05, "grad_norm": 0.5544524192810059, "learning_rate": 0.00043665307843645976, "loss": 2.8057, "step": 21392 }, { "epoch": 1.05, "grad_norm": 0.6640998125076294, "learning_rate": 0.0004366393720977116, "loss": 3.081, "step": 21393 }, { "epoch": 1.05, "grad_norm": 0.5651010870933533, "learning_rate": 0.00043662566539908165, "loss": 2.9706, "step": 21394 }, { "epoch": 1.05, "grad_norm": 0.5061413049697876, "learning_rate": 0.00043661195834060603, "loss": 3.0914, "step": 21395 }, { "epoch": 1.05, "grad_norm": 0.5113970637321472, "learning_rate": 0.00043659825092232075, "loss": 3.0878, "step": 21396 }, { "epoch": 1.05, "grad_norm": 0.521929919719696, "learning_rate": 0.0004365845431442619, "loss": 3.1446, "step": 21397 }, { "epoch": 1.05, "grad_norm": 0.5344848036766052, "learning_rate": 0.0004365708350064657, "loss": 3.2237, "step": 21398 }, { "epoch": 1.05, "grad_norm": 0.5206398963928223, "learning_rate": 0.0004365571265089683, "loss": 3.0752, "step": 21399 }, { "epoch": 1.05, "grad_norm": 0.583916187286377, "learning_rate": 0.0004365434176518057, "loss": 3.0648, "step": 21400 }, { "epoch": 1.05, "grad_norm": 0.5128787159919739, "learning_rate": 0.0004365297084350139, "loss": 3.3052, "step": 21401 }, { "epoch": 1.05, "grad_norm": 0.5522475242614746, "learning_rate": 0.00043651599885862917, "loss": 3.155, "step": 21402 }, { "epoch": 1.05, "grad_norm": 0.5218787789344788, "learning_rate": 0.00043650228892268755, "loss": 2.8891, "step": 21403 }, { "epoch": 1.05, "grad_norm": 0.5376594662666321, "learning_rate": 0.0004364885786272252, "loss": 3.1309, "step": 21404 }, { "epoch": 1.05, "grad_norm": 0.5462920069694519, "learning_rate": 0.0004364748679722782, "loss": 3.139, "step": 21405 }, { "epoch": 1.05, "grad_norm": 0.5011506080627441, "learning_rate": 0.0004364611569578827, "loss": 3.2214, "step": 21406 }, { "epoch": 1.05, "grad_norm": 0.5488290190696716, "learning_rate": 0.00043644744558407465, "loss": 2.852, "step": 21407 }, { "epoch": 1.05, "grad_norm": 0.5682697296142578, "learning_rate": 0.0004364337338508904, "loss": 3.0328, "step": 21408 }, { "epoch": 1.05, "grad_norm": 0.5012774467468262, "learning_rate": 0.0004364200217583659, "loss": 2.9092, "step": 21409 }, { "epoch": 1.05, "grad_norm": 0.5332022309303284, "learning_rate": 0.00043640630930653744, "loss": 3.1763, "step": 21410 }, { "epoch": 1.05, "grad_norm": 0.49493035674095154, "learning_rate": 0.00043639259649544084, "loss": 3.2316, "step": 21411 }, { "epoch": 1.05, "grad_norm": 0.5405813455581665, "learning_rate": 0.00043637888332511245, "loss": 3.1027, "step": 21412 }, { "epoch": 1.05, "grad_norm": 0.531501829624176, "learning_rate": 0.0004363651697955884, "loss": 3.249, "step": 21413 }, { "epoch": 1.05, "grad_norm": 0.5270764827728271, "learning_rate": 0.0004363514559069047, "loss": 2.9355, "step": 21414 }, { "epoch": 1.05, "grad_norm": 0.5820385217666626, "learning_rate": 0.00043633774165909744, "loss": 3.1851, "step": 21415 }, { "epoch": 1.05, "grad_norm": 0.5525915026664734, "learning_rate": 0.0004363240270522029, "loss": 2.9714, "step": 21416 }, { "epoch": 1.05, "grad_norm": 0.5313616991043091, "learning_rate": 0.00043631031208625704, "loss": 3.1641, "step": 21417 }, { "epoch": 1.05, "grad_norm": 0.5130735635757446, "learning_rate": 0.0004362965967612961, "loss": 3.1911, "step": 21418 }, { "epoch": 1.05, "grad_norm": 0.5332739353179932, "learning_rate": 0.0004362828810773561, "loss": 3.2662, "step": 21419 }, { "epoch": 1.05, "grad_norm": 0.4927477538585663, "learning_rate": 0.0004362691650344732, "loss": 3.204, "step": 21420 }, { "epoch": 1.05, "grad_norm": 0.5364248752593994, "learning_rate": 0.00043625544863268366, "loss": 3.1127, "step": 21421 }, { "epoch": 1.05, "grad_norm": 0.5149003267288208, "learning_rate": 0.0004362417318720234, "loss": 3.1497, "step": 21422 }, { "epoch": 1.05, "grad_norm": 0.5644574761390686, "learning_rate": 0.00043622801475252866, "loss": 3.0328, "step": 21423 }, { "epoch": 1.05, "grad_norm": 0.5710899829864502, "learning_rate": 0.0004362142972742356, "loss": 2.9797, "step": 21424 }, { "epoch": 1.05, "grad_norm": 0.5768548846244812, "learning_rate": 0.00043620057943718023, "loss": 3.2249, "step": 21425 }, { "epoch": 1.05, "grad_norm": 0.5565029382705688, "learning_rate": 0.0004361868612413988, "loss": 3.0481, "step": 21426 }, { "epoch": 1.05, "grad_norm": 0.5066186785697937, "learning_rate": 0.0004361731426869273, "loss": 3.2739, "step": 21427 }, { "epoch": 1.05, "grad_norm": 0.5121492743492126, "learning_rate": 0.000436159423773802, "loss": 3.0838, "step": 21428 }, { "epoch": 1.05, "grad_norm": 0.5542187094688416, "learning_rate": 0.000436145704502059, "loss": 3.2834, "step": 21429 }, { "epoch": 1.05, "grad_norm": 0.5797318816184998, "learning_rate": 0.0004361319848717344, "loss": 2.9999, "step": 21430 }, { "epoch": 1.05, "grad_norm": 0.5291181802749634, "learning_rate": 0.00043611826488286433, "loss": 3.0283, "step": 21431 }, { "epoch": 1.05, "grad_norm": 0.5589236617088318, "learning_rate": 0.00043610454453548505, "loss": 3.2367, "step": 21432 }, { "epoch": 1.05, "grad_norm": 0.5176942348480225, "learning_rate": 0.0004360908238296324, "loss": 3.0578, "step": 21433 }, { "epoch": 1.05, "grad_norm": 0.5708388686180115, "learning_rate": 0.0004360771027653428, "loss": 2.9649, "step": 21434 }, { "epoch": 1.05, "grad_norm": 0.5405948758125305, "learning_rate": 0.0004360633813426524, "loss": 3.1171, "step": 21435 }, { "epoch": 1.05, "grad_norm": 0.5287649631500244, "learning_rate": 0.00043604965956159716, "loss": 3.1744, "step": 21436 }, { "epoch": 1.05, "grad_norm": 0.5235514640808105, "learning_rate": 0.0004360359374222133, "loss": 2.9239, "step": 21437 }, { "epoch": 1.05, "grad_norm": 0.5278797745704651, "learning_rate": 0.0004360222149245369, "loss": 2.9237, "step": 21438 }, { "epoch": 1.05, "grad_norm": 0.5604250431060791, "learning_rate": 0.0004360084920686042, "loss": 3.0579, "step": 21439 }, { "epoch": 1.05, "grad_norm": 0.5143141150474548, "learning_rate": 0.0004359947688544514, "loss": 3.105, "step": 21440 }, { "epoch": 1.05, "grad_norm": 0.5379894971847534, "learning_rate": 0.0004359810452821145, "loss": 2.9211, "step": 21441 }, { "epoch": 1.05, "grad_norm": 0.5371367931365967, "learning_rate": 0.00043596732135162965, "loss": 3.3108, "step": 21442 }, { "epoch": 1.05, "grad_norm": 0.5497921109199524, "learning_rate": 0.00043595359706303307, "loss": 3.1547, "step": 21443 }, { "epoch": 1.05, "grad_norm": 0.5384805798530579, "learning_rate": 0.0004359398724163609, "loss": 3.2494, "step": 21444 }, { "epoch": 1.05, "grad_norm": 0.5575272440910339, "learning_rate": 0.00043592614741164924, "loss": 3.0403, "step": 21445 }, { "epoch": 1.05, "grad_norm": 0.5456324219703674, "learning_rate": 0.0004359124220489343, "loss": 3.1509, "step": 21446 }, { "epoch": 1.05, "grad_norm": 0.5280311107635498, "learning_rate": 0.00043589869632825216, "loss": 3.0561, "step": 21447 }, { "epoch": 1.05, "grad_norm": 0.542693555355072, "learning_rate": 0.000435884970249639, "loss": 2.9996, "step": 21448 }, { "epoch": 1.05, "grad_norm": 0.531836748123169, "learning_rate": 0.00043587124381313104, "loss": 3.1081, "step": 21449 }, { "epoch": 1.05, "grad_norm": 0.7221289873123169, "learning_rate": 0.0004358575170187644, "loss": 2.9935, "step": 21450 }, { "epoch": 1.05, "grad_norm": 0.5565702319145203, "learning_rate": 0.0004358437898665751, "loss": 2.9451, "step": 21451 }, { "epoch": 1.05, "grad_norm": 0.5191698670387268, "learning_rate": 0.0004358300623565994, "loss": 2.9686, "step": 21452 }, { "epoch": 1.05, "grad_norm": 0.5412079691886902, "learning_rate": 0.0004358163344888735, "loss": 3.1743, "step": 21453 }, { "epoch": 1.05, "grad_norm": 0.5316622257232666, "learning_rate": 0.00043580260626343355, "loss": 3.1001, "step": 21454 }, { "epoch": 1.05, "grad_norm": 0.5519059896469116, "learning_rate": 0.00043578887768031563, "loss": 3.2956, "step": 21455 }, { "epoch": 1.05, "grad_norm": 0.507394552230835, "learning_rate": 0.0004357751487395559, "loss": 2.9144, "step": 21456 }, { "epoch": 1.05, "grad_norm": 0.5262680649757385, "learning_rate": 0.00043576141944119066, "loss": 3.1354, "step": 21457 }, { "epoch": 1.05, "grad_norm": 0.5504863858222961, "learning_rate": 0.0004357476897852558, "loss": 3.2812, "step": 21458 }, { "epoch": 1.05, "grad_norm": 0.5362016558647156, "learning_rate": 0.0004357339597717878, "loss": 3.0618, "step": 21459 }, { "epoch": 1.05, "grad_norm": 0.5501962900161743, "learning_rate": 0.0004357202294008227, "loss": 3.2196, "step": 21460 }, { "epoch": 1.05, "grad_norm": 0.5307948589324951, "learning_rate": 0.00043570649867239657, "loss": 3.2588, "step": 21461 }, { "epoch": 1.05, "grad_norm": 0.5310550332069397, "learning_rate": 0.00043569276758654565, "loss": 3.2253, "step": 21462 }, { "epoch": 1.05, "grad_norm": 0.6042519211769104, "learning_rate": 0.00043567903614330603, "loss": 3.2064, "step": 21463 }, { "epoch": 1.05, "grad_norm": 0.5459402799606323, "learning_rate": 0.00043566530434271407, "loss": 2.8162, "step": 21464 }, { "epoch": 1.05, "grad_norm": 0.5372798442840576, "learning_rate": 0.0004356515721848057, "loss": 3.1366, "step": 21465 }, { "epoch": 1.05, "grad_norm": 0.5367051959037781, "learning_rate": 0.0004356378396696172, "loss": 3.0794, "step": 21466 }, { "epoch": 1.05, "grad_norm": 0.5590572953224182, "learning_rate": 0.0004356241067971848, "loss": 3.1426, "step": 21467 }, { "epoch": 1.05, "grad_norm": 0.5460636615753174, "learning_rate": 0.0004356103735675446, "loss": 3.1592, "step": 21468 }, { "epoch": 1.05, "grad_norm": 0.5461768507957458, "learning_rate": 0.00043559663998073264, "loss": 2.9046, "step": 21469 }, { "epoch": 1.05, "grad_norm": 0.5281131267547607, "learning_rate": 0.00043558290603678537, "loss": 2.9527, "step": 21470 }, { "epoch": 1.05, "grad_norm": 0.5545151829719543, "learning_rate": 0.00043556917173573877, "loss": 2.9797, "step": 21471 }, { "epoch": 1.05, "grad_norm": 0.5535064935684204, "learning_rate": 0.00043555543707762915, "loss": 3.0521, "step": 21472 }, { "epoch": 1.05, "grad_norm": 0.5981447100639343, "learning_rate": 0.0004355417020624925, "loss": 3.0078, "step": 21473 }, { "epoch": 1.05, "grad_norm": 0.5304649472236633, "learning_rate": 0.000435527966690365, "loss": 3.0455, "step": 21474 }, { "epoch": 1.05, "grad_norm": 0.5150699615478516, "learning_rate": 0.0004355142309612831, "loss": 3.1424, "step": 21475 }, { "epoch": 1.05, "grad_norm": 0.5696814060211182, "learning_rate": 0.0004355004948752827, "loss": 3.0476, "step": 21476 }, { "epoch": 1.05, "grad_norm": 0.6048765778541565, "learning_rate": 0.0004354867584324002, "loss": 3.0301, "step": 21477 }, { "epoch": 1.05, "grad_norm": 0.5578874349594116, "learning_rate": 0.0004354730216326714, "loss": 2.9297, "step": 21478 }, { "epoch": 1.05, "grad_norm": 0.5642783641815186, "learning_rate": 0.00043545928447613294, "loss": 3.1453, "step": 21479 }, { "epoch": 1.05, "grad_norm": 0.5326113104820251, "learning_rate": 0.00043544554696282075, "loss": 3.2642, "step": 21480 }, { "epoch": 1.05, "grad_norm": 0.54203861951828, "learning_rate": 0.00043543180909277104, "loss": 2.9685, "step": 21481 }, { "epoch": 1.05, "grad_norm": 0.5783830881118774, "learning_rate": 0.00043541807086602, "loss": 3.046, "step": 21482 }, { "epoch": 1.05, "grad_norm": 0.5656664371490479, "learning_rate": 0.0004354043322826039, "loss": 3.1215, "step": 21483 }, { "epoch": 1.05, "grad_norm": 0.5210521221160889, "learning_rate": 0.00043539059334255875, "loss": 3.3077, "step": 21484 }, { "epoch": 1.05, "grad_norm": 0.5449597239494324, "learning_rate": 0.00043537685404592086, "loss": 3.0667, "step": 21485 }, { "epoch": 1.05, "grad_norm": 0.5515075325965881, "learning_rate": 0.00043536311439272644, "loss": 3.0646, "step": 21486 }, { "epoch": 1.05, "grad_norm": 0.5687845945358276, "learning_rate": 0.00043534937438301154, "loss": 3.2882, "step": 21487 }, { "epoch": 1.05, "grad_norm": 0.6585469245910645, "learning_rate": 0.00043533563401681254, "loss": 3.0092, "step": 21488 }, { "epoch": 1.05, "grad_norm": 0.5366838574409485, "learning_rate": 0.00043532189329416543, "loss": 3.0872, "step": 21489 }, { "epoch": 1.05, "grad_norm": 0.5649664998054504, "learning_rate": 0.00043530815221510654, "loss": 2.9941, "step": 21490 }, { "epoch": 1.05, "grad_norm": 0.5187537670135498, "learning_rate": 0.000435294410779672, "loss": 3.248, "step": 21491 }, { "epoch": 1.05, "grad_norm": 0.5714203119277954, "learning_rate": 0.0004352806689878981, "loss": 3.077, "step": 21492 }, { "epoch": 1.05, "grad_norm": 0.5206428170204163, "learning_rate": 0.0004352669268398208, "loss": 2.8386, "step": 21493 }, { "epoch": 1.05, "grad_norm": 0.5927132368087769, "learning_rate": 0.00043525318433547654, "loss": 2.9447, "step": 21494 }, { "epoch": 1.05, "grad_norm": 0.5436407923698425, "learning_rate": 0.0004352394414749014, "loss": 3.2219, "step": 21495 }, { "epoch": 1.05, "grad_norm": 0.5328101515769958, "learning_rate": 0.0004352256982581316, "loss": 3.0678, "step": 21496 }, { "epoch": 1.05, "grad_norm": 0.7768601179122925, "learning_rate": 0.0004352119546852034, "loss": 3.0912, "step": 21497 }, { "epoch": 1.05, "grad_norm": 0.531445324420929, "learning_rate": 0.0004351982107561529, "loss": 3.0633, "step": 21498 }, { "epoch": 1.05, "grad_norm": 0.565925657749176, "learning_rate": 0.00043518446647101625, "loss": 3.1066, "step": 21499 }, { "epoch": 1.05, "grad_norm": 0.5696445107460022, "learning_rate": 0.0004351707218298298, "loss": 3.1754, "step": 21500 }, { "epoch": 1.05, "grad_norm": 0.5589838624000549, "learning_rate": 0.0004351569768326298, "loss": 2.9757, "step": 21501 }, { "epoch": 1.05, "grad_norm": 0.539966881275177, "learning_rate": 0.0004351432314794522, "loss": 3.2406, "step": 21502 }, { "epoch": 1.05, "grad_norm": 0.5462685823440552, "learning_rate": 0.0004351294857703333, "loss": 3.1382, "step": 21503 }, { "epoch": 1.05, "grad_norm": 0.5470060110092163, "learning_rate": 0.0004351157397053094, "loss": 3.0518, "step": 21504 }, { "epoch": 1.05, "grad_norm": 0.5174694657325745, "learning_rate": 0.0004351019932844166, "loss": 3.1323, "step": 21505 }, { "epoch": 1.05, "grad_norm": 0.533613920211792, "learning_rate": 0.00043508824650769125, "loss": 3.0013, "step": 21506 }, { "epoch": 1.05, "grad_norm": 0.5810818076133728, "learning_rate": 0.0004350744993751694, "loss": 3.3575, "step": 21507 }, { "epoch": 1.05, "grad_norm": 0.5429716110229492, "learning_rate": 0.0004350607518868873, "loss": 3.0456, "step": 21508 }, { "epoch": 1.05, "grad_norm": 0.5317225456237793, "learning_rate": 0.0004350470040428812, "loss": 3.0374, "step": 21509 }, { "epoch": 1.05, "grad_norm": 0.5231348276138306, "learning_rate": 0.0004350332558431872, "loss": 3.0947, "step": 21510 }, { "epoch": 1.05, "grad_norm": 0.5566467642784119, "learning_rate": 0.00043501950728784165, "loss": 3.2049, "step": 21511 }, { "epoch": 1.05, "grad_norm": 0.5202820301055908, "learning_rate": 0.0004350057583768808, "loss": 3.0884, "step": 21512 }, { "epoch": 1.05, "grad_norm": 0.5291932821273804, "learning_rate": 0.00043499200911034067, "loss": 2.998, "step": 21513 }, { "epoch": 1.05, "grad_norm": 0.5607914924621582, "learning_rate": 0.0004349782594882576, "loss": 3.1113, "step": 21514 }, { "epoch": 1.05, "grad_norm": 0.557773768901825, "learning_rate": 0.0004349645095106677, "loss": 3.0285, "step": 21515 }, { "epoch": 1.05, "grad_norm": 0.5368457436561584, "learning_rate": 0.00043495075917760726, "loss": 3.0034, "step": 21516 }, { "epoch": 1.05, "grad_norm": 0.526913583278656, "learning_rate": 0.00043493700848911256, "loss": 3.1358, "step": 21517 }, { "epoch": 1.05, "grad_norm": 0.5346729755401611, "learning_rate": 0.0004349232574452197, "loss": 3.1478, "step": 21518 }, { "epoch": 1.05, "grad_norm": 0.554854154586792, "learning_rate": 0.00043490950604596504, "loss": 3.1444, "step": 21519 }, { "epoch": 1.05, "grad_norm": 0.5558192133903503, "learning_rate": 0.00043489575429138454, "loss": 2.9225, "step": 21520 }, { "epoch": 1.05, "grad_norm": 0.5165724158287048, "learning_rate": 0.00043488200218151467, "loss": 3.0812, "step": 21521 }, { "epoch": 1.05, "grad_norm": 0.5703836679458618, "learning_rate": 0.0004348682497163916, "loss": 3.0469, "step": 21522 }, { "epoch": 1.05, "grad_norm": 0.5449686050415039, "learning_rate": 0.0004348544968960515, "loss": 3.1636, "step": 21523 }, { "epoch": 1.05, "grad_norm": 0.6131134033203125, "learning_rate": 0.0004348407437205306, "loss": 3.1791, "step": 21524 }, { "epoch": 1.05, "grad_norm": 0.563544750213623, "learning_rate": 0.000434826990189865, "loss": 3.2026, "step": 21525 }, { "epoch": 1.05, "grad_norm": 0.5371457934379578, "learning_rate": 0.0004348132363040913, "loss": 3.04, "step": 21526 }, { "epoch": 1.05, "grad_norm": 0.5195092558860779, "learning_rate": 0.00043479948206324537, "loss": 3.2698, "step": 21527 }, { "epoch": 1.06, "grad_norm": 0.5485871434211731, "learning_rate": 0.0004347857274673635, "loss": 3.0969, "step": 21528 }, { "epoch": 1.06, "grad_norm": 0.5748703479766846, "learning_rate": 0.000434771972516482, "loss": 3.1038, "step": 21529 }, { "epoch": 1.06, "grad_norm": 0.5171026587486267, "learning_rate": 0.0004347582172106371, "loss": 3.1868, "step": 21530 }, { "epoch": 1.06, "grad_norm": 0.5326929092407227, "learning_rate": 0.00043474446154986493, "loss": 2.8315, "step": 21531 }, { "epoch": 1.06, "grad_norm": 0.5850439667701721, "learning_rate": 0.00043473070553420184, "loss": 3.2327, "step": 21532 }, { "epoch": 1.06, "grad_norm": 0.5027980804443359, "learning_rate": 0.00043471694916368393, "loss": 3.0613, "step": 21533 }, { "epoch": 1.06, "grad_norm": 0.5234944224357605, "learning_rate": 0.0004347031924383476, "loss": 3.2077, "step": 21534 }, { "epoch": 1.06, "grad_norm": 0.5578376650810242, "learning_rate": 0.0004346894353582289, "loss": 3.0004, "step": 21535 }, { "epoch": 1.06, "grad_norm": 0.49966779351234436, "learning_rate": 0.00043467567792336413, "loss": 2.9163, "step": 21536 }, { "epoch": 1.06, "grad_norm": 0.5220000147819519, "learning_rate": 0.0004346619201337896, "loss": 3.1377, "step": 21537 }, { "epoch": 1.06, "grad_norm": 0.5169272422790527, "learning_rate": 0.0004346481619895415, "loss": 3.0862, "step": 21538 }, { "epoch": 1.06, "grad_norm": 0.5571988821029663, "learning_rate": 0.000434634403490656, "loss": 2.9343, "step": 21539 }, { "epoch": 1.06, "grad_norm": 0.5117565989494324, "learning_rate": 0.0004346206446371694, "loss": 3.1412, "step": 21540 }, { "epoch": 1.06, "grad_norm": 0.6265973448753357, "learning_rate": 0.00043460688542911797, "loss": 3.1845, "step": 21541 }, { "epoch": 1.06, "grad_norm": 0.526631772518158, "learning_rate": 0.00043459312586653794, "loss": 3.0802, "step": 21542 }, { "epoch": 1.06, "grad_norm": 0.6042065620422363, "learning_rate": 0.00043457936594946543, "loss": 3.0749, "step": 21543 }, { "epoch": 1.06, "grad_norm": 0.5184115171432495, "learning_rate": 0.0004345656056779368, "loss": 2.9749, "step": 21544 }, { "epoch": 1.06, "grad_norm": 0.560426652431488, "learning_rate": 0.0004345518450519884, "loss": 3.2708, "step": 21545 }, { "epoch": 1.06, "grad_norm": 0.5563343167304993, "learning_rate": 0.0004345380840716561, "loss": 3.0454, "step": 21546 }, { "epoch": 1.06, "grad_norm": 0.5319254398345947, "learning_rate": 0.0004345243227369765, "loss": 3.0709, "step": 21547 }, { "epoch": 1.06, "grad_norm": 0.5544840693473816, "learning_rate": 0.00043451056104798583, "loss": 3.2259, "step": 21548 }, { "epoch": 1.06, "grad_norm": 0.5176193714141846, "learning_rate": 0.00043449679900472, "loss": 3.1498, "step": 21549 }, { "epoch": 1.06, "grad_norm": 0.5405734777450562, "learning_rate": 0.0004344830366072156, "loss": 2.9556, "step": 21550 }, { "epoch": 1.06, "grad_norm": 0.5425923466682434, "learning_rate": 0.00043446927385550874, "loss": 3.234, "step": 21551 }, { "epoch": 1.06, "grad_norm": 0.5294197201728821, "learning_rate": 0.00043445551074963577, "loss": 3.239, "step": 21552 }, { "epoch": 1.06, "grad_norm": 0.6730620861053467, "learning_rate": 0.00043444174728963277, "loss": 3.1966, "step": 21553 }, { "epoch": 1.06, "grad_norm": 0.538125216960907, "learning_rate": 0.0004344279834755361, "loss": 3.1703, "step": 21554 }, { "epoch": 1.06, "grad_norm": 0.5608587861061096, "learning_rate": 0.00043441421930738203, "loss": 2.9973, "step": 21555 }, { "epoch": 1.06, "grad_norm": 0.5333779454231262, "learning_rate": 0.00043440045478520677, "loss": 3.3017, "step": 21556 }, { "epoch": 1.06, "grad_norm": 0.5401734113693237, "learning_rate": 0.00043438668990904654, "loss": 3.0712, "step": 21557 }, { "epoch": 1.06, "grad_norm": 0.5446183085441589, "learning_rate": 0.0004343729246789377, "loss": 3.1535, "step": 21558 }, { "epoch": 1.06, "grad_norm": 0.5327209234237671, "learning_rate": 0.0004343591590949164, "loss": 2.9274, "step": 21559 }, { "epoch": 1.06, "grad_norm": 0.6195545196533203, "learning_rate": 0.0004343453931570189, "loss": 3.0559, "step": 21560 }, { "epoch": 1.06, "grad_norm": 0.5407031178474426, "learning_rate": 0.0004343316268652815, "loss": 3.1942, "step": 21561 }, { "epoch": 1.06, "grad_norm": 0.5744057893753052, "learning_rate": 0.0004343178602197404, "loss": 3.0987, "step": 21562 }, { "epoch": 1.06, "grad_norm": 0.5579788684844971, "learning_rate": 0.0004343040932204321, "loss": 3.0874, "step": 21563 }, { "epoch": 1.06, "grad_norm": 0.5619020462036133, "learning_rate": 0.0004342903258673925, "loss": 3.2928, "step": 21564 }, { "epoch": 1.06, "grad_norm": 0.5300469398498535, "learning_rate": 0.00043427655816065803, "loss": 3.3128, "step": 21565 }, { "epoch": 1.06, "grad_norm": 0.5448369979858398, "learning_rate": 0.000434262790100265, "loss": 3.2438, "step": 21566 }, { "epoch": 1.06, "grad_norm": 0.5507218241691589, "learning_rate": 0.00043424902168624963, "loss": 3.1717, "step": 21567 }, { "epoch": 1.06, "grad_norm": 0.5300918221473694, "learning_rate": 0.0004342352529186481, "loss": 2.9758, "step": 21568 }, { "epoch": 1.06, "grad_norm": 0.537909746170044, "learning_rate": 0.0004342214837974968, "loss": 3.0509, "step": 21569 }, { "epoch": 1.06, "grad_norm": 0.5577003955841064, "learning_rate": 0.000434207714322832, "loss": 3.1697, "step": 21570 }, { "epoch": 1.06, "grad_norm": 0.5539290308952332, "learning_rate": 0.00043419394449468974, "loss": 2.8774, "step": 21571 }, { "epoch": 1.06, "grad_norm": 0.5374821424484253, "learning_rate": 0.0004341801743131066, "loss": 2.8361, "step": 21572 }, { "epoch": 1.06, "grad_norm": 0.5573104023933411, "learning_rate": 0.0004341664037781186, "loss": 3.016, "step": 21573 }, { "epoch": 1.06, "grad_norm": 0.5746645331382751, "learning_rate": 0.00043415263288976223, "loss": 3.2311, "step": 21574 }, { "epoch": 1.06, "grad_norm": 0.524361789226532, "learning_rate": 0.00043413886164807357, "loss": 3.2525, "step": 21575 }, { "epoch": 1.06, "grad_norm": 0.5610430836677551, "learning_rate": 0.00043412509005308895, "loss": 3.2134, "step": 21576 }, { "epoch": 1.06, "grad_norm": 0.5629414319992065, "learning_rate": 0.0004341113181048447, "loss": 3.0197, "step": 21577 }, { "epoch": 1.06, "grad_norm": 0.5457921624183655, "learning_rate": 0.00043409754580337704, "loss": 3.1535, "step": 21578 }, { "epoch": 1.06, "grad_norm": 0.5242516398429871, "learning_rate": 0.0004340837731487223, "loss": 3.0792, "step": 21579 }, { "epoch": 1.06, "grad_norm": 0.5492205023765564, "learning_rate": 0.0004340700001409167, "loss": 2.9799, "step": 21580 }, { "epoch": 1.06, "grad_norm": 0.5539306402206421, "learning_rate": 0.0004340562267799964, "loss": 2.9942, "step": 21581 }, { "epoch": 1.06, "grad_norm": 0.5252792835235596, "learning_rate": 0.00043404245306599785, "loss": 2.9845, "step": 21582 }, { "epoch": 1.06, "grad_norm": 0.5377252697944641, "learning_rate": 0.0004340286789989573, "loss": 3.0228, "step": 21583 }, { "epoch": 1.06, "grad_norm": 0.5324434638023376, "learning_rate": 0.000434014904578911, "loss": 3.0127, "step": 21584 }, { "epoch": 1.06, "grad_norm": 0.5117992162704468, "learning_rate": 0.0004340011298058953, "loss": 3.0829, "step": 21585 }, { "epoch": 1.06, "grad_norm": 0.5125967860221863, "learning_rate": 0.0004339873546799464, "loss": 3.1856, "step": 21586 }, { "epoch": 1.06, "grad_norm": 0.502491295337677, "learning_rate": 0.0004339735792011004, "loss": 3.1148, "step": 21587 }, { "epoch": 1.06, "grad_norm": 0.5541898608207703, "learning_rate": 0.000433959803369394, "loss": 3.0582, "step": 21588 }, { "epoch": 1.06, "grad_norm": 0.5374077558517456, "learning_rate": 0.0004339460271848632, "loss": 3.0841, "step": 21589 }, { "epoch": 1.06, "grad_norm": 0.5276811122894287, "learning_rate": 0.00043393225064754427, "loss": 3.1326, "step": 21590 }, { "epoch": 1.06, "grad_norm": 0.5653324723243713, "learning_rate": 0.0004339184737574736, "loss": 3.1166, "step": 21591 }, { "epoch": 1.06, "grad_norm": 0.570012092590332, "learning_rate": 0.0004339046965146875, "loss": 2.9614, "step": 21592 }, { "epoch": 1.06, "grad_norm": 1.030922770500183, "learning_rate": 0.00043389091891922216, "loss": 3.1976, "step": 21593 }, { "epoch": 1.06, "grad_norm": 0.5297044515609741, "learning_rate": 0.0004338771409711139, "loss": 3.275, "step": 21594 }, { "epoch": 1.06, "grad_norm": 0.5701044201850891, "learning_rate": 0.00043386336267039907, "loss": 2.9492, "step": 21595 }, { "epoch": 1.06, "grad_norm": 0.5452165007591248, "learning_rate": 0.0004338495840171139, "loss": 3.2421, "step": 21596 }, { "epoch": 1.06, "grad_norm": 0.5350081324577332, "learning_rate": 0.00043383580501129453, "loss": 2.9024, "step": 21597 }, { "epoch": 1.06, "grad_norm": 0.5603721737861633, "learning_rate": 0.00043382202565297757, "loss": 2.9177, "step": 21598 }, { "epoch": 1.06, "grad_norm": 0.559532642364502, "learning_rate": 0.00043380824594219916, "loss": 3.1842, "step": 21599 }, { "epoch": 1.06, "grad_norm": 0.5277261137962341, "learning_rate": 0.00043379446587899547, "loss": 3.056, "step": 21600 }, { "epoch": 1.06, "grad_norm": 0.5619999766349792, "learning_rate": 0.000433780685463403, "loss": 3.2529, "step": 21601 }, { "epoch": 1.06, "grad_norm": 0.5646684169769287, "learning_rate": 0.0004337669046954578, "loss": 3.2436, "step": 21602 }, { "epoch": 1.06, "grad_norm": 0.5427494645118713, "learning_rate": 0.0004337531235751965, "loss": 3.1102, "step": 21603 }, { "epoch": 1.06, "grad_norm": 0.5302757620811462, "learning_rate": 0.00043373934210265517, "loss": 3.152, "step": 21604 }, { "epoch": 1.06, "grad_norm": 0.5829156637191772, "learning_rate": 0.00043372556027787014, "loss": 2.9137, "step": 21605 }, { "epoch": 1.06, "grad_norm": 0.5077518820762634, "learning_rate": 0.0004337117781008777, "loss": 2.888, "step": 21606 }, { "epoch": 1.06, "grad_norm": 0.5359871983528137, "learning_rate": 0.00043369799557171413, "loss": 3.1001, "step": 21607 }, { "epoch": 1.06, "grad_norm": 0.5384306311607361, "learning_rate": 0.0004336842126904159, "loss": 3.2559, "step": 21608 }, { "epoch": 1.06, "grad_norm": 0.5284044742584229, "learning_rate": 0.00043367042945701903, "loss": 3.4134, "step": 21609 }, { "epoch": 1.06, "grad_norm": 0.5183429718017578, "learning_rate": 0.0004336566458715601, "loss": 2.9548, "step": 21610 }, { "epoch": 1.06, "grad_norm": 0.5918934345245361, "learning_rate": 0.0004336428619340752, "loss": 3.1684, "step": 21611 }, { "epoch": 1.06, "grad_norm": 0.555814266204834, "learning_rate": 0.0004336290776446007, "loss": 2.91, "step": 21612 }, { "epoch": 1.06, "grad_norm": 0.551300048828125, "learning_rate": 0.000433615293003173, "loss": 3.1364, "step": 21613 }, { "epoch": 1.06, "grad_norm": 0.5664514899253845, "learning_rate": 0.00043360150800982836, "loss": 3.1111, "step": 21614 }, { "epoch": 1.06, "grad_norm": 0.5319811105728149, "learning_rate": 0.000433587722664603, "loss": 2.9241, "step": 21615 }, { "epoch": 1.06, "grad_norm": 0.560508131980896, "learning_rate": 0.00043357393696753327, "loss": 3.0344, "step": 21616 }, { "epoch": 1.06, "grad_norm": 0.5378097891807556, "learning_rate": 0.00043356015091865555, "loss": 3.2021, "step": 21617 }, { "epoch": 1.06, "grad_norm": 0.5653480887413025, "learning_rate": 0.00043354636451800604, "loss": 3.1775, "step": 21618 }, { "epoch": 1.06, "grad_norm": 0.5392255187034607, "learning_rate": 0.00043353257776562114, "loss": 3.4296, "step": 21619 }, { "epoch": 1.06, "grad_norm": 0.5405376553535461, "learning_rate": 0.00043351879066153714, "loss": 3.2874, "step": 21620 }, { "epoch": 1.06, "grad_norm": 0.5263589024543762, "learning_rate": 0.0004335050032057904, "loss": 3.0824, "step": 21621 }, { "epoch": 1.06, "grad_norm": 0.5315642356872559, "learning_rate": 0.0004334912153984171, "loss": 3.1399, "step": 21622 }, { "epoch": 1.06, "grad_norm": 0.5322493314743042, "learning_rate": 0.0004334774272394536, "loss": 3.1131, "step": 21623 }, { "epoch": 1.06, "grad_norm": 0.5302774310112, "learning_rate": 0.0004334636387289362, "loss": 3.124, "step": 21624 }, { "epoch": 1.06, "grad_norm": 0.5446813702583313, "learning_rate": 0.00043344984986690144, "loss": 3.0245, "step": 21625 }, { "epoch": 1.06, "grad_norm": 0.5131939649581909, "learning_rate": 0.00043343606065338535, "loss": 2.8756, "step": 21626 }, { "epoch": 1.06, "grad_norm": 0.5474516153335571, "learning_rate": 0.0004334222710884243, "loss": 3.3199, "step": 21627 }, { "epoch": 1.06, "grad_norm": 0.5619019269943237, "learning_rate": 0.00043340848117205476, "loss": 3.1003, "step": 21628 }, { "epoch": 1.06, "grad_norm": 0.5801265239715576, "learning_rate": 0.0004333946909043129, "loss": 2.9848, "step": 21629 }, { "epoch": 1.06, "grad_norm": 0.5769397616386414, "learning_rate": 0.00043338090028523517, "loss": 3.037, "step": 21630 }, { "epoch": 1.06, "grad_norm": 0.5513659119606018, "learning_rate": 0.0004333671093148577, "loss": 2.9997, "step": 21631 }, { "epoch": 1.06, "grad_norm": 0.5560690760612488, "learning_rate": 0.0004333533179932171, "loss": 2.9288, "step": 21632 }, { "epoch": 1.06, "grad_norm": 0.5233303308486938, "learning_rate": 0.00043333952632034937, "loss": 3.0383, "step": 21633 }, { "epoch": 1.06, "grad_norm": 0.5534685850143433, "learning_rate": 0.00043332573429629106, "loss": 3.0307, "step": 21634 }, { "epoch": 1.06, "grad_norm": 0.5539918541908264, "learning_rate": 0.00043331194192107843, "loss": 3.0601, "step": 21635 }, { "epoch": 1.06, "grad_norm": 0.5524195432662964, "learning_rate": 0.0004332981491947478, "loss": 3.0906, "step": 21636 }, { "epoch": 1.06, "grad_norm": 0.5301859974861145, "learning_rate": 0.00043328435611733547, "loss": 3.0146, "step": 21637 }, { "epoch": 1.06, "grad_norm": 0.5274763107299805, "learning_rate": 0.0004332705626888778, "loss": 3.1862, "step": 21638 }, { "epoch": 1.06, "grad_norm": 0.6033293604850769, "learning_rate": 0.0004332567689094111, "loss": 2.8906, "step": 21639 }, { "epoch": 1.06, "grad_norm": 0.5655192732810974, "learning_rate": 0.00043324297477897176, "loss": 3.1, "step": 21640 }, { "epoch": 1.06, "grad_norm": 0.578748345375061, "learning_rate": 0.00043322918029759606, "loss": 3.1724, "step": 21641 }, { "epoch": 1.06, "grad_norm": 0.5158365368843079, "learning_rate": 0.0004332153854653203, "loss": 3.1176, "step": 21642 }, { "epoch": 1.06, "grad_norm": 0.5151905417442322, "learning_rate": 0.0004332015902821809, "loss": 3.1752, "step": 21643 }, { "epoch": 1.06, "grad_norm": 0.5332306027412415, "learning_rate": 0.0004331877947482141, "loss": 3.0743, "step": 21644 }, { "epoch": 1.06, "grad_norm": 0.576889157295227, "learning_rate": 0.0004331739988634563, "loss": 2.9053, "step": 21645 }, { "epoch": 1.06, "grad_norm": 0.5512627363204956, "learning_rate": 0.00043316020262794386, "loss": 3.171, "step": 21646 }, { "epoch": 1.06, "grad_norm": 0.5933674573898315, "learning_rate": 0.00043314640604171303, "loss": 3.1488, "step": 21647 }, { "epoch": 1.06, "grad_norm": 0.5496378540992737, "learning_rate": 0.00043313260910480015, "loss": 3.0251, "step": 21648 }, { "epoch": 1.06, "grad_norm": 0.5148006677627563, "learning_rate": 0.0004331188118172417, "loss": 3.2106, "step": 21649 }, { "epoch": 1.06, "grad_norm": 0.5798931121826172, "learning_rate": 0.0004331050141790739, "loss": 2.9724, "step": 21650 }, { "epoch": 1.06, "grad_norm": 0.567121684551239, "learning_rate": 0.0004330912161903331, "loss": 3.1009, "step": 21651 }, { "epoch": 1.06, "grad_norm": 0.5588269233703613, "learning_rate": 0.00043307741785105554, "loss": 2.9866, "step": 21652 }, { "epoch": 1.06, "grad_norm": 0.5300847291946411, "learning_rate": 0.0004330636191612778, "loss": 2.9866, "step": 21653 }, { "epoch": 1.06, "grad_norm": 0.5314716100692749, "learning_rate": 0.00043304982012103604, "loss": 3.0428, "step": 21654 }, { "epoch": 1.06, "grad_norm": 0.5971154570579529, "learning_rate": 0.0004330360207303667, "loss": 3.1492, "step": 21655 }, { "epoch": 1.06, "grad_norm": 0.7022563219070435, "learning_rate": 0.00043302222098930604, "loss": 3.0283, "step": 21656 }, { "epoch": 1.06, "grad_norm": 0.5879517793655396, "learning_rate": 0.0004330084208978905, "loss": 3.0705, "step": 21657 }, { "epoch": 1.06, "grad_norm": 0.5577943921089172, "learning_rate": 0.0004329946204561563, "loss": 3.0612, "step": 21658 }, { "epoch": 1.06, "grad_norm": 0.5404565930366516, "learning_rate": 0.0004329808196641399, "loss": 3.1377, "step": 21659 }, { "epoch": 1.06, "grad_norm": 0.5410740971565247, "learning_rate": 0.00043296701852187764, "loss": 3.3028, "step": 21660 }, { "epoch": 1.06, "grad_norm": 0.5340285897254944, "learning_rate": 0.0004329532170294059, "loss": 3.3272, "step": 21661 }, { "epoch": 1.06, "grad_norm": 0.5251579284667969, "learning_rate": 0.0004329394151867609, "loss": 3.6786, "step": 21662 }, { "epoch": 1.06, "grad_norm": 0.5060845613479614, "learning_rate": 0.00043292561299397906, "loss": 3.2364, "step": 21663 }, { "epoch": 1.06, "grad_norm": 0.568199872970581, "learning_rate": 0.0004329118104510967, "loss": 2.9753, "step": 21664 }, { "epoch": 1.06, "grad_norm": 0.5439720749855042, "learning_rate": 0.0004328980075581503, "loss": 3.0583, "step": 21665 }, { "epoch": 1.06, "grad_norm": 0.5244399309158325, "learning_rate": 0.00043288420431517613, "loss": 3.0426, "step": 21666 }, { "epoch": 1.06, "grad_norm": 0.5180443525314331, "learning_rate": 0.0004328704007222105, "loss": 3.1875, "step": 21667 }, { "epoch": 1.06, "grad_norm": 0.48462343215942383, "learning_rate": 0.00043285659677928974, "loss": 3.0099, "step": 21668 }, { "epoch": 1.06, "grad_norm": 0.5198360681533813, "learning_rate": 0.00043284279248645034, "loss": 2.9479, "step": 21669 }, { "epoch": 1.06, "grad_norm": 0.5512001514434814, "learning_rate": 0.0004328289878437286, "loss": 3.1317, "step": 21670 }, { "epoch": 1.06, "grad_norm": 0.5167283415794373, "learning_rate": 0.00043281518285116083, "loss": 3.2503, "step": 21671 }, { "epoch": 1.06, "grad_norm": 0.5513035655021667, "learning_rate": 0.0004328013775087835, "loss": 3.2889, "step": 21672 }, { "epoch": 1.06, "grad_norm": 0.5547505617141724, "learning_rate": 0.0004327875718166328, "loss": 3.1017, "step": 21673 }, { "epoch": 1.06, "grad_norm": 0.5487305521965027, "learning_rate": 0.0004327737657747452, "loss": 2.9976, "step": 21674 }, { "epoch": 1.06, "grad_norm": 0.5412231087684631, "learning_rate": 0.00043275995938315713, "loss": 3.1704, "step": 21675 }, { "epoch": 1.06, "grad_norm": 0.5449442863464355, "learning_rate": 0.0004327461526419049, "loss": 3.0803, "step": 21676 }, { "epoch": 1.06, "grad_norm": 0.566912829875946, "learning_rate": 0.0004327323455510248, "loss": 3.2645, "step": 21677 }, { "epoch": 1.06, "grad_norm": 0.551240861415863, "learning_rate": 0.00043271853811055315, "loss": 2.908, "step": 21678 }, { "epoch": 1.06, "grad_norm": 0.5297688245773315, "learning_rate": 0.00043270473032052656, "loss": 3.0444, "step": 21679 }, { "epoch": 1.06, "grad_norm": 0.5144250392913818, "learning_rate": 0.00043269092218098116, "loss": 2.945, "step": 21680 }, { "epoch": 1.06, "grad_norm": 0.5448917746543884, "learning_rate": 0.00043267711369195344, "loss": 3.1495, "step": 21681 }, { "epoch": 1.06, "grad_norm": 0.5812903642654419, "learning_rate": 0.00043266330485347977, "loss": 3.1761, "step": 21682 }, { "epoch": 1.06, "grad_norm": 0.5192112922668457, "learning_rate": 0.0004326494956655965, "loss": 3.2557, "step": 21683 }, { "epoch": 1.06, "grad_norm": 0.5966460704803467, "learning_rate": 0.00043263568612833986, "loss": 3.0654, "step": 21684 }, { "epoch": 1.06, "grad_norm": 0.5527370572090149, "learning_rate": 0.00043262187624174643, "loss": 3.0416, "step": 21685 }, { "epoch": 1.06, "grad_norm": 0.5254831910133362, "learning_rate": 0.0004326080660058525, "loss": 3.1767, "step": 21686 }, { "epoch": 1.06, "grad_norm": 0.5413762927055359, "learning_rate": 0.0004325942554206945, "loss": 3.2666, "step": 21687 }, { "epoch": 1.06, "grad_norm": 0.5316290855407715, "learning_rate": 0.0004325804444863087, "loss": 2.9736, "step": 21688 }, { "epoch": 1.06, "grad_norm": 0.5323042273521423, "learning_rate": 0.00043256663320273146, "loss": 2.9594, "step": 21689 }, { "epoch": 1.06, "grad_norm": 0.5886972546577454, "learning_rate": 0.0004325528215699993, "loss": 3.1795, "step": 21690 }, { "epoch": 1.06, "grad_norm": 0.5144338607788086, "learning_rate": 0.00043253900958814854, "loss": 2.8915, "step": 21691 }, { "epoch": 1.06, "grad_norm": 0.5302631258964539, "learning_rate": 0.0004325251972572155, "loss": 3.2102, "step": 21692 }, { "epoch": 1.06, "grad_norm": 0.534359335899353, "learning_rate": 0.00043251138457723655, "loss": 3.0445, "step": 21693 }, { "epoch": 1.06, "grad_norm": 0.5207918882369995, "learning_rate": 0.0004324975715482482, "loss": 3.1168, "step": 21694 }, { "epoch": 1.06, "grad_norm": 0.5734944343566895, "learning_rate": 0.00043248375817028664, "loss": 2.9265, "step": 21695 }, { "epoch": 1.06, "grad_norm": 0.5305697321891785, "learning_rate": 0.00043246994444338845, "loss": 3.1291, "step": 21696 }, { "epoch": 1.06, "grad_norm": 0.5317648649215698, "learning_rate": 0.00043245613036758994, "loss": 2.9974, "step": 21697 }, { "epoch": 1.06, "grad_norm": 0.5591028332710266, "learning_rate": 0.0004324423159429275, "loss": 3.0407, "step": 21698 }, { "epoch": 1.06, "grad_norm": 0.5748549103736877, "learning_rate": 0.0004324285011694374, "loss": 3.2274, "step": 21699 }, { "epoch": 1.06, "grad_norm": 0.5331709980964661, "learning_rate": 0.000432414686047156, "loss": 3.1211, "step": 21700 }, { "epoch": 1.06, "grad_norm": 0.5106818079948425, "learning_rate": 0.00043240087057612003, "loss": 2.9146, "step": 21701 }, { "epoch": 1.06, "grad_norm": 0.5366308689117432, "learning_rate": 0.0004323870547563655, "loss": 3.3333, "step": 21702 }, { "epoch": 1.06, "grad_norm": 0.5502612590789795, "learning_rate": 0.000432373238587929, "loss": 3.2715, "step": 21703 }, { "epoch": 1.06, "grad_norm": 0.5137237906455994, "learning_rate": 0.00043235942207084686, "loss": 3.1082, "step": 21704 }, { "epoch": 1.06, "grad_norm": 0.506195068359375, "learning_rate": 0.00043234560520515546, "loss": 2.997, "step": 21705 }, { "epoch": 1.06, "grad_norm": 0.5691025257110596, "learning_rate": 0.0004323317879908913, "loss": 3.1141, "step": 21706 }, { "epoch": 1.06, "grad_norm": 0.5307044386863708, "learning_rate": 0.0004323179704280905, "loss": 2.9933, "step": 21707 }, { "epoch": 1.06, "grad_norm": 0.5617866516113281, "learning_rate": 0.0004323041525167898, "loss": 2.9266, "step": 21708 }, { "epoch": 1.06, "grad_norm": 0.5357836484909058, "learning_rate": 0.0004322903342570254, "loss": 2.938, "step": 21709 }, { "epoch": 1.06, "grad_norm": 0.5504767894744873, "learning_rate": 0.0004322765156488336, "loss": 3.1159, "step": 21710 }, { "epoch": 1.06, "grad_norm": 0.537348747253418, "learning_rate": 0.00043226269669225097, "loss": 3.1031, "step": 21711 }, { "epoch": 1.06, "grad_norm": 0.5378528237342834, "learning_rate": 0.0004322488773873139, "loss": 3.1137, "step": 21712 }, { "epoch": 1.06, "grad_norm": 0.5131245255470276, "learning_rate": 0.00043223505773405874, "loss": 3.0508, "step": 21713 }, { "epoch": 1.06, "grad_norm": 0.533130943775177, "learning_rate": 0.00043222123773252187, "loss": 3.1552, "step": 21714 }, { "epoch": 1.06, "grad_norm": 0.5789387822151184, "learning_rate": 0.0004322074173827396, "loss": 3.1302, "step": 21715 }, { "epoch": 1.06, "grad_norm": 0.5390761494636536, "learning_rate": 0.0004321935966847486, "loss": 2.9585, "step": 21716 }, { "epoch": 1.06, "grad_norm": 0.5794828534126282, "learning_rate": 0.00043217977563858503, "loss": 3.1416, "step": 21717 }, { "epoch": 1.06, "grad_norm": 0.5323156118392944, "learning_rate": 0.00043216595424428535, "loss": 3.0928, "step": 21718 }, { "epoch": 1.06, "grad_norm": 0.5307153463363647, "learning_rate": 0.00043215213250188603, "loss": 3.1511, "step": 21719 }, { "epoch": 1.06, "grad_norm": 0.5392312407493591, "learning_rate": 0.00043213831041142344, "loss": 3.0964, "step": 21720 }, { "epoch": 1.06, "grad_norm": 0.5451873540878296, "learning_rate": 0.0004321244879729339, "loss": 3.0374, "step": 21721 }, { "epoch": 1.06, "grad_norm": 0.5187518000602722, "learning_rate": 0.0004321106651864539, "loss": 3.1617, "step": 21722 }, { "epoch": 1.06, "grad_norm": 0.5146177411079407, "learning_rate": 0.0004320968420520199, "loss": 2.9917, "step": 21723 }, { "epoch": 1.06, "grad_norm": 0.5389916300773621, "learning_rate": 0.00043208301856966825, "loss": 3.11, "step": 21724 }, { "epoch": 1.06, "grad_norm": 0.6462126970291138, "learning_rate": 0.0004320691947394352, "loss": 3.0173, "step": 21725 }, { "epoch": 1.06, "grad_norm": 0.5558220148086548, "learning_rate": 0.00043205537056135735, "loss": 3.0579, "step": 21726 }, { "epoch": 1.06, "grad_norm": 0.5146065354347229, "learning_rate": 0.0004320415460354712, "loss": 3.0825, "step": 21727 }, { "epoch": 1.06, "grad_norm": 0.5261913537979126, "learning_rate": 0.00043202772116181297, "loss": 3.142, "step": 21728 }, { "epoch": 1.06, "grad_norm": 0.5271784663200378, "learning_rate": 0.0004320138959404191, "loss": 3.0003, "step": 21729 }, { "epoch": 1.06, "grad_norm": 0.5543608665466309, "learning_rate": 0.00043200007037132605, "loss": 3.0188, "step": 21730 }, { "epoch": 1.06, "grad_norm": 0.5403302311897278, "learning_rate": 0.00043198624445457023, "loss": 3.0804, "step": 21731 }, { "epoch": 1.07, "grad_norm": 0.5381479263305664, "learning_rate": 0.000431972418190188, "loss": 3.0915, "step": 21732 }, { "epoch": 1.07, "grad_norm": 0.5359466075897217, "learning_rate": 0.0004319585915782159, "loss": 3.1204, "step": 21733 }, { "epoch": 1.07, "grad_norm": 0.5506048202514648, "learning_rate": 0.0004319447646186902, "loss": 3.1377, "step": 21734 }, { "epoch": 1.07, "grad_norm": 0.5816706418991089, "learning_rate": 0.00043193093731164737, "loss": 3.3217, "step": 21735 }, { "epoch": 1.07, "grad_norm": 0.543936014175415, "learning_rate": 0.00043191710965712385, "loss": 3.3028, "step": 21736 }, { "epoch": 1.07, "grad_norm": 0.5475443601608276, "learning_rate": 0.0004319032816551561, "loss": 3.1762, "step": 21737 }, { "epoch": 1.07, "grad_norm": 0.5085486173629761, "learning_rate": 0.0004318894533057805, "loss": 3.2277, "step": 21738 }, { "epoch": 1.07, "grad_norm": 0.5390211343765259, "learning_rate": 0.0004318756246090334, "loss": 3.0807, "step": 21739 }, { "epoch": 1.07, "grad_norm": 0.5398746132850647, "learning_rate": 0.00043186179556495134, "loss": 2.9588, "step": 21740 }, { "epoch": 1.07, "grad_norm": 0.5575569272041321, "learning_rate": 0.00043184796617357063, "loss": 2.9674, "step": 21741 }, { "epoch": 1.07, "grad_norm": 0.5330104231834412, "learning_rate": 0.00043183413643492784, "loss": 3.0524, "step": 21742 }, { "epoch": 1.07, "grad_norm": 0.5363730788230896, "learning_rate": 0.0004318203063490592, "loss": 3.0613, "step": 21743 }, { "epoch": 1.07, "grad_norm": 0.5895825028419495, "learning_rate": 0.0004318064759160013, "loss": 2.9534, "step": 21744 }, { "epoch": 1.07, "grad_norm": 0.560642659664154, "learning_rate": 0.00043179264513579056, "loss": 2.9325, "step": 21745 }, { "epoch": 1.07, "grad_norm": 0.588516354560852, "learning_rate": 0.0004317788140084632, "loss": 2.9729, "step": 21746 }, { "epoch": 1.07, "grad_norm": 0.5563256144523621, "learning_rate": 0.0004317649825340559, "loss": 3.0784, "step": 21747 }, { "epoch": 1.07, "grad_norm": 0.51703280210495, "learning_rate": 0.00043175115071260496, "loss": 3.132, "step": 21748 }, { "epoch": 1.07, "grad_norm": 0.6014596819877625, "learning_rate": 0.00043173731854414697, "loss": 3.1507, "step": 21749 }, { "epoch": 1.07, "grad_norm": 0.5737568736076355, "learning_rate": 0.00043172348602871815, "loss": 3.1788, "step": 21750 }, { "epoch": 1.07, "grad_norm": 0.5655453205108643, "learning_rate": 0.0004317096531663549, "loss": 3.0277, "step": 21751 }, { "epoch": 1.07, "grad_norm": 0.5275116562843323, "learning_rate": 0.000431695819957094, "loss": 3.0195, "step": 21752 }, { "epoch": 1.07, "grad_norm": 0.5256880521774292, "learning_rate": 0.00043168198640097154, "loss": 3.193, "step": 21753 }, { "epoch": 1.07, "grad_norm": 0.547167181968689, "learning_rate": 0.00043166815249802404, "loss": 2.8198, "step": 21754 }, { "epoch": 1.07, "grad_norm": 0.522355854511261, "learning_rate": 0.000431654318248288, "loss": 3.2742, "step": 21755 }, { "epoch": 1.07, "grad_norm": 0.5362539887428284, "learning_rate": 0.00043164048365179977, "loss": 3.1321, "step": 21756 }, { "epoch": 1.07, "grad_norm": 0.5473201274871826, "learning_rate": 0.00043162664870859596, "loss": 3.2113, "step": 21757 }, { "epoch": 1.07, "grad_norm": 0.5578786134719849, "learning_rate": 0.0004316128134187128, "loss": 3.3162, "step": 21758 }, { "epoch": 1.07, "grad_norm": 0.5824030041694641, "learning_rate": 0.0004315989777821868, "loss": 3.1726, "step": 21759 }, { "epoch": 1.07, "grad_norm": 0.5133621096611023, "learning_rate": 0.00043158514179905455, "loss": 3.0582, "step": 21760 }, { "epoch": 1.07, "grad_norm": 0.5308951139450073, "learning_rate": 0.00043157130546935224, "loss": 3.3032, "step": 21761 }, { "epoch": 1.07, "grad_norm": 0.5566686391830444, "learning_rate": 0.0004315574687931164, "loss": 3.3004, "step": 21762 }, { "epoch": 1.07, "grad_norm": 0.6115220189094543, "learning_rate": 0.0004315436317703836, "loss": 2.962, "step": 21763 }, { "epoch": 1.07, "grad_norm": 0.52565997838974, "learning_rate": 0.0004315297944011902, "loss": 3.1985, "step": 21764 }, { "epoch": 1.07, "grad_norm": 0.5332111716270447, "learning_rate": 0.0004315159566855726, "loss": 3.0903, "step": 21765 }, { "epoch": 1.07, "grad_norm": 0.5685824155807495, "learning_rate": 0.00043150211862356724, "loss": 2.9799, "step": 21766 }, { "epoch": 1.07, "grad_norm": 0.5400283932685852, "learning_rate": 0.0004314882802152107, "loss": 3.1849, "step": 21767 }, { "epoch": 1.07, "grad_norm": 0.5025684833526611, "learning_rate": 0.00043147444146053924, "loss": 3.1382, "step": 21768 }, { "epoch": 1.07, "grad_norm": 0.5145972371101379, "learning_rate": 0.0004314606023595894, "loss": 3.0659, "step": 21769 }, { "epoch": 1.07, "grad_norm": 0.5135764479637146, "learning_rate": 0.0004314467629123977, "loss": 3.0465, "step": 21770 }, { "epoch": 1.07, "grad_norm": 0.5721059441566467, "learning_rate": 0.00043143292311900055, "loss": 3.1543, "step": 21771 }, { "epoch": 1.07, "grad_norm": 0.5446774959564209, "learning_rate": 0.00043141908297943425, "loss": 3.3055, "step": 21772 }, { "epoch": 1.07, "grad_norm": 0.5134919285774231, "learning_rate": 0.0004314052424937355, "loss": 2.8909, "step": 21773 }, { "epoch": 1.07, "grad_norm": 0.518467903137207, "learning_rate": 0.00043139140166194057, "loss": 3.2775, "step": 21774 }, { "epoch": 1.07, "grad_norm": 0.5316968560218811, "learning_rate": 0.000431377560484086, "loss": 3.2102, "step": 21775 }, { "epoch": 1.07, "grad_norm": 0.5495380163192749, "learning_rate": 0.0004313637189602082, "loss": 3.1133, "step": 21776 }, { "epoch": 1.07, "grad_norm": 0.5162259340286255, "learning_rate": 0.00043134987709034364, "loss": 3.1323, "step": 21777 }, { "epoch": 1.07, "grad_norm": 0.5471155047416687, "learning_rate": 0.0004313360348745289, "loss": 3.0562, "step": 21778 }, { "epoch": 1.07, "grad_norm": 0.5574952960014343, "learning_rate": 0.0004313221923128001, "loss": 3.027, "step": 21779 }, { "epoch": 1.07, "grad_norm": 0.5334025025367737, "learning_rate": 0.00043130834940519405, "loss": 3.2028, "step": 21780 }, { "epoch": 1.07, "grad_norm": 0.5593041181564331, "learning_rate": 0.0004312945061517471, "loss": 3.1218, "step": 21781 }, { "epoch": 1.07, "grad_norm": 0.5666584968566895, "learning_rate": 0.00043128066255249565, "loss": 3.173, "step": 21782 }, { "epoch": 1.07, "grad_norm": 0.5271990299224854, "learning_rate": 0.0004312668186074762, "loss": 3.0105, "step": 21783 }, { "epoch": 1.07, "grad_norm": 0.5458244681358337, "learning_rate": 0.0004312529743167252, "loss": 2.9528, "step": 21784 }, { "epoch": 1.07, "grad_norm": 0.5140916705131531, "learning_rate": 0.0004312391296802792, "loss": 3.1281, "step": 21785 }, { "epoch": 1.07, "grad_norm": 0.5619597434997559, "learning_rate": 0.0004312252846981745, "loss": 2.9598, "step": 21786 }, { "epoch": 1.07, "grad_norm": 0.5158681273460388, "learning_rate": 0.0004312114393704476, "loss": 3.0605, "step": 21787 }, { "epoch": 1.07, "grad_norm": 0.6134312152862549, "learning_rate": 0.00043119759369713515, "loss": 3.008, "step": 21788 }, { "epoch": 1.07, "grad_norm": 0.5468149781227112, "learning_rate": 0.0004311837476782735, "loss": 2.905, "step": 21789 }, { "epoch": 1.07, "grad_norm": 0.5272074341773987, "learning_rate": 0.000431169901313899, "loss": 3.1477, "step": 21790 }, { "epoch": 1.07, "grad_norm": 0.5446023344993591, "learning_rate": 0.0004311560546040483, "loss": 3.1807, "step": 21791 }, { "epoch": 1.07, "grad_norm": 0.5380364656448364, "learning_rate": 0.0004311422075487578, "loss": 2.9937, "step": 21792 }, { "epoch": 1.07, "grad_norm": 0.5410694479942322, "learning_rate": 0.00043112836014806385, "loss": 3.0857, "step": 21793 }, { "epoch": 1.07, "grad_norm": 0.5312390327453613, "learning_rate": 0.00043111451240200317, "loss": 2.923, "step": 21794 }, { "epoch": 1.07, "grad_norm": 0.5283505320549011, "learning_rate": 0.000431100664310612, "loss": 3.2019, "step": 21795 }, { "epoch": 1.07, "grad_norm": 0.5480759739875793, "learning_rate": 0.000431086815873927, "loss": 3.091, "step": 21796 }, { "epoch": 1.07, "grad_norm": 0.5413346290588379, "learning_rate": 0.00043107296709198447, "loss": 3.1228, "step": 21797 }, { "epoch": 1.07, "grad_norm": 0.6117550134658813, "learning_rate": 0.00043105911796482104, "loss": 3.184, "step": 21798 }, { "epoch": 1.07, "grad_norm": 0.514468252658844, "learning_rate": 0.00043104526849247306, "loss": 3.165, "step": 21799 }, { "epoch": 1.07, "grad_norm": 0.542933464050293, "learning_rate": 0.00043103141867497713, "loss": 3.1681, "step": 21800 }, { "epoch": 1.07, "grad_norm": 0.5387585163116455, "learning_rate": 0.0004310175685123696, "loss": 3.1421, "step": 21801 }, { "epoch": 1.07, "grad_norm": 0.5503361821174622, "learning_rate": 0.00043100371800468696, "loss": 3.2455, "step": 21802 }, { "epoch": 1.07, "grad_norm": 0.5273892879486084, "learning_rate": 0.00043098986715196596, "loss": 3.1728, "step": 21803 }, { "epoch": 1.07, "grad_norm": 0.5189810395240784, "learning_rate": 0.0004309760159542426, "loss": 3.1855, "step": 21804 }, { "epoch": 1.07, "grad_norm": 0.5295484066009521, "learning_rate": 0.0004309621644115538, "loss": 3.0618, "step": 21805 }, { "epoch": 1.07, "grad_norm": 0.5467668175697327, "learning_rate": 0.0004309483125239358, "loss": 2.8678, "step": 21806 }, { "epoch": 1.07, "grad_norm": 0.5472779273986816, "learning_rate": 0.00043093446029142515, "loss": 3.0829, "step": 21807 }, { "epoch": 1.07, "grad_norm": 0.5611221194267273, "learning_rate": 0.0004309206077140583, "loss": 3.1447, "step": 21808 }, { "epoch": 1.07, "grad_norm": 0.5621599555015564, "learning_rate": 0.0004309067547918718, "loss": 3.067, "step": 21809 }, { "epoch": 1.07, "grad_norm": 0.5594969391822815, "learning_rate": 0.0004308929015249021, "loss": 3.1116, "step": 21810 }, { "epoch": 1.07, "grad_norm": 0.5223482847213745, "learning_rate": 0.0004308790479131857, "loss": 3.0363, "step": 21811 }, { "epoch": 1.07, "grad_norm": 0.5609418749809265, "learning_rate": 0.00043086519395675897, "loss": 3.0169, "step": 21812 }, { "epoch": 1.07, "grad_norm": 0.5535745024681091, "learning_rate": 0.0004308513396556585, "loss": 2.7776, "step": 21813 }, { "epoch": 1.07, "grad_norm": 0.5150181651115417, "learning_rate": 0.000430837485009921, "loss": 2.8866, "step": 21814 }, { "epoch": 1.07, "grad_norm": 0.5247113108634949, "learning_rate": 0.0004308236300195826, "loss": 3.1063, "step": 21815 }, { "epoch": 1.07, "grad_norm": 0.5388785600662231, "learning_rate": 0.00043080977468467995, "loss": 3.3039, "step": 21816 }, { "epoch": 1.07, "grad_norm": 0.6435933709144592, "learning_rate": 0.00043079591900524954, "loss": 2.9531, "step": 21817 }, { "epoch": 1.07, "grad_norm": 0.5959275960922241, "learning_rate": 0.00043078206298132783, "loss": 3.0319, "step": 21818 }, { "epoch": 1.07, "grad_norm": 0.5229818820953369, "learning_rate": 0.00043076820661295135, "loss": 3.1529, "step": 21819 }, { "epoch": 1.07, "grad_norm": 0.5054709911346436, "learning_rate": 0.0004307543499001566, "loss": 3.1894, "step": 21820 }, { "epoch": 1.07, "grad_norm": 0.5688246488571167, "learning_rate": 0.00043074049284297995, "loss": 3.086, "step": 21821 }, { "epoch": 1.07, "grad_norm": 0.598905622959137, "learning_rate": 0.00043072663544145817, "loss": 3.2127, "step": 21822 }, { "epoch": 1.07, "grad_norm": 0.5313297510147095, "learning_rate": 0.00043071277769562744, "loss": 3.0613, "step": 21823 }, { "epoch": 1.07, "grad_norm": 0.5411408543586731, "learning_rate": 0.00043069891960552446, "loss": 3.1642, "step": 21824 }, { "epoch": 1.07, "grad_norm": 0.5270294547080994, "learning_rate": 0.0004306850611711858, "loss": 3.2479, "step": 21825 }, { "epoch": 1.07, "grad_norm": 0.5298737287521362, "learning_rate": 0.00043067120239264776, "loss": 3.2503, "step": 21826 }, { "epoch": 1.07, "grad_norm": 0.544009804725647, "learning_rate": 0.0004306573432699468, "loss": 3.013, "step": 21827 }, { "epoch": 1.07, "grad_norm": 0.51810622215271, "learning_rate": 0.0004306434838031196, "loss": 3.04, "step": 21828 }, { "epoch": 1.07, "grad_norm": 0.5270453095436096, "learning_rate": 0.0004306296239922027, "loss": 2.9395, "step": 21829 }, { "epoch": 1.07, "grad_norm": 0.5867906212806702, "learning_rate": 0.0004306157638372325, "loss": 3.0299, "step": 21830 }, { "epoch": 1.07, "grad_norm": 0.5443738698959351, "learning_rate": 0.0004306019033382454, "loss": 3.091, "step": 21831 }, { "epoch": 1.07, "grad_norm": 0.5243510007858276, "learning_rate": 0.0004305880424952781, "loss": 3.0763, "step": 21832 }, { "epoch": 1.07, "grad_norm": 0.5453609824180603, "learning_rate": 0.00043057418130836706, "loss": 3.0139, "step": 21833 }, { "epoch": 1.07, "grad_norm": 0.5322545766830444, "learning_rate": 0.0004305603197775487, "loss": 2.9306, "step": 21834 }, { "epoch": 1.07, "grad_norm": 0.5745664834976196, "learning_rate": 0.00043054645790285964, "loss": 3.1827, "step": 21835 }, { "epoch": 1.07, "grad_norm": 0.5085702538490295, "learning_rate": 0.0004305325956843363, "loss": 3.0335, "step": 21836 }, { "epoch": 1.07, "grad_norm": 0.5018842816352844, "learning_rate": 0.00043051873312201526, "loss": 3.0121, "step": 21837 }, { "epoch": 1.07, "grad_norm": 0.5426105260848999, "learning_rate": 0.00043050487021593285, "loss": 3.0908, "step": 21838 }, { "epoch": 1.07, "grad_norm": 0.529456615447998, "learning_rate": 0.0004304910069661259, "loss": 3.108, "step": 21839 }, { "epoch": 1.07, "grad_norm": 0.5271927714347839, "learning_rate": 0.0004304771433726307, "loss": 3.0775, "step": 21840 }, { "epoch": 1.07, "grad_norm": 0.5279380679130554, "learning_rate": 0.00043046327943548383, "loss": 3.059, "step": 21841 }, { "epoch": 1.07, "grad_norm": 0.5312852263450623, "learning_rate": 0.0004304494151547217, "loss": 3.2413, "step": 21842 }, { "epoch": 1.07, "grad_norm": 0.525822103023529, "learning_rate": 0.00043043555053038095, "loss": 3.0476, "step": 21843 }, { "epoch": 1.07, "grad_norm": 0.5275469422340393, "learning_rate": 0.0004304216855624981, "loss": 3.1864, "step": 21844 }, { "epoch": 1.07, "grad_norm": 0.5422474145889282, "learning_rate": 0.00043040782025110966, "loss": 3.1227, "step": 21845 }, { "epoch": 1.07, "grad_norm": 0.5537338256835938, "learning_rate": 0.00043039395459625203, "loss": 3.2381, "step": 21846 }, { "epoch": 1.07, "grad_norm": 0.5338166356086731, "learning_rate": 0.000430380088597962, "loss": 3.2902, "step": 21847 }, { "epoch": 1.07, "grad_norm": 0.5591501593589783, "learning_rate": 0.00043036622225627567, "loss": 3.2828, "step": 21848 }, { "epoch": 1.07, "grad_norm": 0.5365943312644958, "learning_rate": 0.00043035235557122985, "loss": 2.9454, "step": 21849 }, { "epoch": 1.07, "grad_norm": 0.5561801195144653, "learning_rate": 0.00043033848854286106, "loss": 3.1468, "step": 21850 }, { "epoch": 1.07, "grad_norm": 0.5465043187141418, "learning_rate": 0.0004303246211712059, "loss": 3.0577, "step": 21851 }, { "epoch": 1.07, "grad_norm": 0.5761997699737549, "learning_rate": 0.0004303107534563006, "loss": 2.8597, "step": 21852 }, { "epoch": 1.07, "grad_norm": 0.5243141055107117, "learning_rate": 0.0004302968853981819, "loss": 3.114, "step": 21853 }, { "epoch": 1.07, "grad_norm": 0.5474295020103455, "learning_rate": 0.0004302830169968863, "loss": 3.0841, "step": 21854 }, { "epoch": 1.07, "grad_norm": 0.5314984321594238, "learning_rate": 0.0004302691482524503, "loss": 3.25, "step": 21855 }, { "epoch": 1.07, "grad_norm": 0.5473430156707764, "learning_rate": 0.0004302552791649105, "loss": 3.2211, "step": 21856 }, { "epoch": 1.07, "grad_norm": 0.6076376438140869, "learning_rate": 0.0004302414097343032, "loss": 3.1258, "step": 21857 }, { "epoch": 1.07, "grad_norm": 0.555774986743927, "learning_rate": 0.0004302275399606653, "loss": 3.2512, "step": 21858 }, { "epoch": 1.07, "grad_norm": 0.5176228880882263, "learning_rate": 0.0004302136698440329, "loss": 3.0976, "step": 21859 }, { "epoch": 1.07, "grad_norm": 0.5607179403305054, "learning_rate": 0.0004301997993844429, "loss": 3.1035, "step": 21860 }, { "epoch": 1.07, "grad_norm": 0.5279338955879211, "learning_rate": 0.00043018592858193165, "loss": 3.0925, "step": 21861 }, { "epoch": 1.07, "grad_norm": 0.506940484046936, "learning_rate": 0.00043017205743653577, "loss": 3.1532, "step": 21862 }, { "epoch": 1.07, "grad_norm": 0.5521911978721619, "learning_rate": 0.0004301581859482917, "loss": 3.0362, "step": 21863 }, { "epoch": 1.07, "grad_norm": 0.5607286095619202, "learning_rate": 0.0004301443141172359, "loss": 3.0133, "step": 21864 }, { "epoch": 1.07, "grad_norm": 0.5357416272163391, "learning_rate": 0.00043013044194340513, "loss": 3.1389, "step": 21865 }, { "epoch": 1.07, "grad_norm": 0.5648325681686401, "learning_rate": 0.0004301165694268359, "loss": 3.0022, "step": 21866 }, { "epoch": 1.07, "grad_norm": 0.5390356779098511, "learning_rate": 0.0004301026965675646, "loss": 3.0165, "step": 21867 }, { "epoch": 1.07, "grad_norm": 0.5444007515907288, "learning_rate": 0.00043008882336562776, "loss": 3.1716, "step": 21868 }, { "epoch": 1.07, "grad_norm": 0.5538380742073059, "learning_rate": 0.00043007494982106204, "loss": 2.798, "step": 21869 }, { "epoch": 1.07, "grad_norm": 0.5302233099937439, "learning_rate": 0.000430061075933904, "loss": 3.2093, "step": 21870 }, { "epoch": 1.07, "grad_norm": 0.5354451537132263, "learning_rate": 0.00043004720170419005, "loss": 3.1233, "step": 21871 }, { "epoch": 1.07, "grad_norm": 0.553068995475769, "learning_rate": 0.00043003332713195685, "loss": 2.9238, "step": 21872 }, { "epoch": 1.07, "grad_norm": 0.5181149840354919, "learning_rate": 0.0004300194522172409, "loss": 3.185, "step": 21873 }, { "epoch": 1.07, "grad_norm": 0.53992760181427, "learning_rate": 0.0004300055769600786, "loss": 3.1029, "step": 21874 }, { "epoch": 1.07, "grad_norm": 0.5325241088867188, "learning_rate": 0.00042999170136050674, "loss": 3.1176, "step": 21875 }, { "epoch": 1.07, "grad_norm": 0.5475149750709534, "learning_rate": 0.0004299778254185618, "loss": 3.044, "step": 21876 }, { "epoch": 1.07, "grad_norm": 0.5398291945457458, "learning_rate": 0.00042996394913428023, "loss": 3.1001, "step": 21877 }, { "epoch": 1.07, "grad_norm": 0.5641384124755859, "learning_rate": 0.0004299500725076986, "loss": 2.9772, "step": 21878 }, { "epoch": 1.07, "grad_norm": 0.5371559262275696, "learning_rate": 0.00042993619553885346, "loss": 2.9344, "step": 21879 }, { "epoch": 1.07, "grad_norm": 0.5246857404708862, "learning_rate": 0.00042992231822778154, "loss": 3.0261, "step": 21880 }, { "epoch": 1.07, "grad_norm": 0.5782071948051453, "learning_rate": 0.0004299084405745191, "loss": 3.1893, "step": 21881 }, { "epoch": 1.07, "grad_norm": 0.5363188982009888, "learning_rate": 0.0004298945625791028, "loss": 2.9895, "step": 21882 }, { "epoch": 1.07, "grad_norm": 0.5487208366394043, "learning_rate": 0.0004298806842415693, "loss": 2.9313, "step": 21883 }, { "epoch": 1.07, "grad_norm": 0.5662634968757629, "learning_rate": 0.0004298668055619551, "loss": 3.0855, "step": 21884 }, { "epoch": 1.07, "grad_norm": 0.5252002477645874, "learning_rate": 0.0004298529265402967, "loss": 2.8349, "step": 21885 }, { "epoch": 1.07, "grad_norm": 0.7216542363166809, "learning_rate": 0.00042983904717663066, "loss": 3.1169, "step": 21886 }, { "epoch": 1.07, "grad_norm": 0.5242555141448975, "learning_rate": 0.0004298251674709936, "loss": 3.0732, "step": 21887 }, { "epoch": 1.07, "grad_norm": 0.5485615730285645, "learning_rate": 0.00042981128742342197, "loss": 3.0352, "step": 21888 }, { "epoch": 1.07, "grad_norm": 0.5380790829658508, "learning_rate": 0.00042979740703395243, "loss": 3.0147, "step": 21889 }, { "epoch": 1.07, "grad_norm": 0.5466700792312622, "learning_rate": 0.0004297835263026215, "loss": 3.0625, "step": 21890 }, { "epoch": 1.07, "grad_norm": 0.6295219659805298, "learning_rate": 0.00042976964522946576, "loss": 2.8748, "step": 21891 }, { "epoch": 1.07, "grad_norm": 0.5317986011505127, "learning_rate": 0.0004297557638145217, "loss": 2.9312, "step": 21892 }, { "epoch": 1.07, "grad_norm": 0.5600071549415588, "learning_rate": 0.000429741882057826, "loss": 3.1082, "step": 21893 }, { "epoch": 1.07, "grad_norm": 0.5254099369049072, "learning_rate": 0.0004297279999594151, "loss": 2.9492, "step": 21894 }, { "epoch": 1.07, "grad_norm": 0.557667076587677, "learning_rate": 0.00042971411751932564, "loss": 2.9501, "step": 21895 }, { "epoch": 1.07, "grad_norm": 0.5373759865760803, "learning_rate": 0.0004297002347375941, "loss": 2.9849, "step": 21896 }, { "epoch": 1.07, "grad_norm": 0.5822132229804993, "learning_rate": 0.00042968635161425717, "loss": 3.1294, "step": 21897 }, { "epoch": 1.07, "grad_norm": 0.5314226150512695, "learning_rate": 0.0004296724681493513, "loss": 3.0397, "step": 21898 }, { "epoch": 1.07, "grad_norm": 0.5426365733146667, "learning_rate": 0.00042965858434291317, "loss": 2.9715, "step": 21899 }, { "epoch": 1.07, "grad_norm": 0.5367540121078491, "learning_rate": 0.00042964470019497917, "loss": 3.0716, "step": 21900 }, { "epoch": 1.07, "grad_norm": 0.5575674176216125, "learning_rate": 0.000429630815705586, "loss": 3.2273, "step": 21901 }, { "epoch": 1.07, "grad_norm": 0.5367324948310852, "learning_rate": 0.0004296169308747704, "loss": 2.9208, "step": 21902 }, { "epoch": 1.07, "grad_norm": 0.5453097224235535, "learning_rate": 0.00042960304570256855, "loss": 3.211, "step": 21903 }, { "epoch": 1.07, "grad_norm": 0.5521195530891418, "learning_rate": 0.00042958916018901724, "loss": 3.0296, "step": 21904 }, { "epoch": 1.07, "grad_norm": 0.5742548108100891, "learning_rate": 0.0004295752743341531, "loss": 2.9801, "step": 21905 }, { "epoch": 1.07, "grad_norm": 0.5651740431785583, "learning_rate": 0.00042956138813801263, "loss": 3.216, "step": 21906 }, { "epoch": 1.07, "grad_norm": 0.5569771528244019, "learning_rate": 0.0004295475016006323, "loss": 3.0902, "step": 21907 }, { "epoch": 1.07, "grad_norm": 0.5515370965003967, "learning_rate": 0.0004295336147220488, "loss": 3.2262, "step": 21908 }, { "epoch": 1.07, "grad_norm": 0.5465577244758606, "learning_rate": 0.0004295197275022988, "loss": 3.0305, "step": 21909 }, { "epoch": 1.07, "grad_norm": 0.5600085854530334, "learning_rate": 0.00042950583994141867, "loss": 3.2192, "step": 21910 }, { "epoch": 1.07, "grad_norm": 0.5199048519134521, "learning_rate": 0.00042949195203944505, "loss": 3.2271, "step": 21911 }, { "epoch": 1.07, "grad_norm": 0.5712819695472717, "learning_rate": 0.0004294780637964146, "loss": 3.1676, "step": 21912 }, { "epoch": 1.07, "grad_norm": 0.5674663782119751, "learning_rate": 0.00042946417521236394, "loss": 3.0938, "step": 21913 }, { "epoch": 1.07, "grad_norm": 0.5764085054397583, "learning_rate": 0.0004294502862873294, "loss": 3.1466, "step": 21914 }, { "epoch": 1.07, "grad_norm": 0.5495882630348206, "learning_rate": 0.0004294363970213477, "loss": 3.1355, "step": 21915 }, { "epoch": 1.07, "grad_norm": 0.5135552287101746, "learning_rate": 0.0004294225074144556, "loss": 2.932, "step": 21916 }, { "epoch": 1.07, "grad_norm": 0.5441713929176331, "learning_rate": 0.0004294086174666895, "loss": 3.0714, "step": 21917 }, { "epoch": 1.07, "grad_norm": 0.539913535118103, "learning_rate": 0.00042939472717808596, "loss": 3.0711, "step": 21918 }, { "epoch": 1.07, "grad_norm": 0.5363076329231262, "learning_rate": 0.00042938083654868155, "loss": 3.0245, "step": 21919 }, { "epoch": 1.07, "grad_norm": 0.5468325614929199, "learning_rate": 0.000429366945578513, "loss": 2.9802, "step": 21920 }, { "epoch": 1.07, "grad_norm": 0.5180367827415466, "learning_rate": 0.0004293530542676168, "loss": 3.2203, "step": 21921 }, { "epoch": 1.07, "grad_norm": 0.5398200154304504, "learning_rate": 0.00042933916261602946, "loss": 2.9909, "step": 21922 }, { "epoch": 1.07, "grad_norm": 0.5421903729438782, "learning_rate": 0.0004293252706237878, "loss": 2.8939, "step": 21923 }, { "epoch": 1.07, "grad_norm": 0.6031635999679565, "learning_rate": 0.00042931137829092824, "loss": 2.8989, "step": 21924 }, { "epoch": 1.07, "grad_norm": 0.5310101509094238, "learning_rate": 0.00042929748561748723, "loss": 2.9027, "step": 21925 }, { "epoch": 1.07, "grad_norm": 0.5500092506408691, "learning_rate": 0.0004292835926035017, "loss": 3.0154, "step": 21926 }, { "epoch": 1.07, "grad_norm": 0.5398533940315247, "learning_rate": 0.00042926969924900806, "loss": 3.1928, "step": 21927 }, { "epoch": 1.07, "grad_norm": 0.5290772914886475, "learning_rate": 0.0004292558055540429, "loss": 3.0907, "step": 21928 }, { "epoch": 1.07, "grad_norm": 0.553503692150116, "learning_rate": 0.00042924191151864276, "loss": 3.2309, "step": 21929 }, { "epoch": 1.07, "grad_norm": 0.5868078470230103, "learning_rate": 0.00042922801714284425, "loss": 3.244, "step": 21930 }, { "epoch": 1.07, "grad_norm": 0.535077691078186, "learning_rate": 0.0004292141224266842, "loss": 3.3127, "step": 21931 }, { "epoch": 1.07, "grad_norm": 0.5569257736206055, "learning_rate": 0.0004292002273701989, "loss": 3.0853, "step": 21932 }, { "epoch": 1.07, "grad_norm": 0.5465898513793945, "learning_rate": 0.00042918633197342506, "loss": 3.0265, "step": 21933 }, { "epoch": 1.07, "grad_norm": 0.5579783916473389, "learning_rate": 0.00042917243623639934, "loss": 3.087, "step": 21934 }, { "epoch": 1.07, "grad_norm": 0.5138366222381592, "learning_rate": 0.00042915854015915825, "loss": 3.2086, "step": 21935 }, { "epoch": 1.08, "grad_norm": 0.5494992136955261, "learning_rate": 0.0004291446437417383, "loss": 3.0049, "step": 21936 }, { "epoch": 1.08, "grad_norm": 0.5276719927787781, "learning_rate": 0.00042913074698417645, "loss": 3.0952, "step": 21937 }, { "epoch": 1.08, "grad_norm": 0.6109719276428223, "learning_rate": 0.00042911684988650894, "loss": 3.0782, "step": 21938 }, { "epoch": 1.08, "grad_norm": 0.5517315864562988, "learning_rate": 0.0004291029524487725, "loss": 2.9928, "step": 21939 }, { "epoch": 1.08, "grad_norm": 0.5608121752738953, "learning_rate": 0.0004290890546710038, "loss": 3.0408, "step": 21940 }, { "epoch": 1.08, "grad_norm": 0.5328395366668701, "learning_rate": 0.0004290751565532392, "loss": 3.0053, "step": 21941 }, { "epoch": 1.08, "grad_norm": 0.519293487071991, "learning_rate": 0.0004290612580955156, "loss": 3.1365, "step": 21942 }, { "epoch": 1.08, "grad_norm": 0.5312771797180176, "learning_rate": 0.0004290473592978695, "loss": 3.1626, "step": 21943 }, { "epoch": 1.08, "grad_norm": 0.5550572276115417, "learning_rate": 0.00042903346016033746, "loss": 2.9873, "step": 21944 }, { "epoch": 1.08, "grad_norm": 0.5349480509757996, "learning_rate": 0.0004290195606829561, "loss": 3.0404, "step": 21945 }, { "epoch": 1.08, "grad_norm": 0.53115314245224, "learning_rate": 0.0004290056608657621, "loss": 3.1882, "step": 21946 }, { "epoch": 1.08, "grad_norm": 0.5174546837806702, "learning_rate": 0.00042899176070879195, "loss": 3.1557, "step": 21947 }, { "epoch": 1.08, "grad_norm": 0.5151444673538208, "learning_rate": 0.0004289778602120824, "loss": 2.9821, "step": 21948 }, { "epoch": 1.08, "grad_norm": 0.5512619614601135, "learning_rate": 0.00042896395937567003, "loss": 3.2606, "step": 21949 }, { "epoch": 1.08, "grad_norm": 0.5739850997924805, "learning_rate": 0.00042895005819959124, "loss": 3.0911, "step": 21950 }, { "epoch": 1.08, "grad_norm": 0.5571601390838623, "learning_rate": 0.00042893615668388287, "loss": 3.0645, "step": 21951 }, { "epoch": 1.08, "grad_norm": 0.5094681978225708, "learning_rate": 0.00042892225482858145, "loss": 3.1872, "step": 21952 }, { "epoch": 1.08, "grad_norm": 0.531054675579071, "learning_rate": 0.00042890835263372374, "loss": 2.8496, "step": 21953 }, { "epoch": 1.08, "grad_norm": 0.5505704283714294, "learning_rate": 0.0004288944500993462, "loss": 3.0071, "step": 21954 }, { "epoch": 1.08, "grad_norm": 0.5394859313964844, "learning_rate": 0.0004288805472254855, "loss": 3.2745, "step": 21955 }, { "epoch": 1.08, "grad_norm": 0.5463074445724487, "learning_rate": 0.0004288666440121782, "loss": 3.0487, "step": 21956 }, { "epoch": 1.08, "grad_norm": 0.5753787159919739, "learning_rate": 0.00042885274045946096, "loss": 3.0229, "step": 21957 }, { "epoch": 1.08, "grad_norm": 0.5389484763145447, "learning_rate": 0.0004288388365673704, "loss": 2.9471, "step": 21958 }, { "epoch": 1.08, "grad_norm": 0.5314404964447021, "learning_rate": 0.00042882493233594316, "loss": 3.1627, "step": 21959 }, { "epoch": 1.08, "grad_norm": 0.5405857563018799, "learning_rate": 0.0004288110277652159, "loss": 3.0673, "step": 21960 }, { "epoch": 1.08, "grad_norm": 0.5381196141242981, "learning_rate": 0.0004287971228552251, "loss": 3.1375, "step": 21961 }, { "epoch": 1.08, "grad_norm": 0.5330754518508911, "learning_rate": 0.0004287832176060074, "loss": 2.9481, "step": 21962 }, { "epoch": 1.08, "grad_norm": 0.5497124791145325, "learning_rate": 0.0004287693120175996, "loss": 3.0511, "step": 21963 }, { "epoch": 1.08, "grad_norm": 0.5864923596382141, "learning_rate": 0.0004287554060900382, "loss": 3.1814, "step": 21964 }, { "epoch": 1.08, "grad_norm": 0.5531643629074097, "learning_rate": 0.0004287414998233598, "loss": 3.0753, "step": 21965 }, { "epoch": 1.08, "grad_norm": 0.5571635961532593, "learning_rate": 0.0004287275932176011, "loss": 2.926, "step": 21966 }, { "epoch": 1.08, "grad_norm": 0.5513104796409607, "learning_rate": 0.00042871368627279873, "loss": 3.1882, "step": 21967 }, { "epoch": 1.08, "grad_norm": 0.548090934753418, "learning_rate": 0.0004286997789889892, "loss": 3.265, "step": 21968 }, { "epoch": 1.08, "grad_norm": 0.5597220063209534, "learning_rate": 0.00042868587136620927, "loss": 2.9922, "step": 21969 }, { "epoch": 1.08, "grad_norm": 0.5412937998771667, "learning_rate": 0.0004286719634044955, "loss": 3.0208, "step": 21970 }, { "epoch": 1.08, "grad_norm": 0.5296668410301208, "learning_rate": 0.00042865805510388456, "loss": 3.1406, "step": 21971 }, { "epoch": 1.08, "grad_norm": 0.5780396461486816, "learning_rate": 0.000428644146464413, "loss": 3.0601, "step": 21972 }, { "epoch": 1.08, "grad_norm": 0.5567464828491211, "learning_rate": 0.00042863023748611757, "loss": 3.2744, "step": 21973 }, { "epoch": 1.08, "grad_norm": 0.5450098514556885, "learning_rate": 0.00042861632816903485, "loss": 3.0649, "step": 21974 }, { "epoch": 1.08, "grad_norm": 0.5599391460418701, "learning_rate": 0.00042860241851320157, "loss": 3.1379, "step": 21975 }, { "epoch": 1.08, "grad_norm": 0.5682699680328369, "learning_rate": 0.00042858850851865414, "loss": 3.0891, "step": 21976 }, { "epoch": 1.08, "grad_norm": 0.578801155090332, "learning_rate": 0.0004285745981854293, "loss": 2.6889, "step": 21977 }, { "epoch": 1.08, "grad_norm": 0.5470324754714966, "learning_rate": 0.0004285606875135638, "loss": 3.2862, "step": 21978 }, { "epoch": 1.08, "grad_norm": 0.5256801843643188, "learning_rate": 0.00042854677650309416, "loss": 3.3307, "step": 21979 }, { "epoch": 1.08, "grad_norm": 0.5400407910346985, "learning_rate": 0.0004285328651540571, "loss": 2.9132, "step": 21980 }, { "epoch": 1.08, "grad_norm": 0.5310783982276917, "learning_rate": 0.00042851895346648916, "loss": 3.0173, "step": 21981 }, { "epoch": 1.08, "grad_norm": 0.5397918820381165, "learning_rate": 0.00042850504144042697, "loss": 3.1176, "step": 21982 }, { "epoch": 1.08, "grad_norm": 0.5340597033500671, "learning_rate": 0.0004284911290759073, "loss": 3.2694, "step": 21983 }, { "epoch": 1.08, "grad_norm": 0.5765253901481628, "learning_rate": 0.0004284772163729667, "loss": 2.9625, "step": 21984 }, { "epoch": 1.08, "grad_norm": 0.5977330803871155, "learning_rate": 0.0004284633033316419, "loss": 2.8798, "step": 21985 }, { "epoch": 1.08, "grad_norm": 0.5429593324661255, "learning_rate": 0.0004284493899519695, "loss": 3.2659, "step": 21986 }, { "epoch": 1.08, "grad_norm": 0.5388059020042419, "learning_rate": 0.000428435476233986, "loss": 2.9844, "step": 21987 }, { "epoch": 1.08, "grad_norm": 0.5572206377983093, "learning_rate": 0.0004284215621777282, "loss": 3.0551, "step": 21988 }, { "epoch": 1.08, "grad_norm": 0.5941150784492493, "learning_rate": 0.0004284076477832328, "loss": 2.8342, "step": 21989 }, { "epoch": 1.08, "grad_norm": 0.5501981973648071, "learning_rate": 0.00042839373305053635, "loss": 3.0811, "step": 21990 }, { "epoch": 1.08, "grad_norm": 0.5432038903236389, "learning_rate": 0.00042837981797967553, "loss": 3.0867, "step": 21991 }, { "epoch": 1.08, "grad_norm": 0.5538283586502075, "learning_rate": 0.00042836590257068686, "loss": 3.3209, "step": 21992 }, { "epoch": 1.08, "grad_norm": 0.5830565094947815, "learning_rate": 0.00042835198682360725, "loss": 2.9133, "step": 21993 }, { "epoch": 1.08, "grad_norm": 0.5611438155174255, "learning_rate": 0.00042833807073847313, "loss": 2.9194, "step": 21994 }, { "epoch": 1.08, "grad_norm": 0.5672524571418762, "learning_rate": 0.0004283241543153212, "loss": 3.0636, "step": 21995 }, { "epoch": 1.08, "grad_norm": 0.5321059226989746, "learning_rate": 0.00042831023755418823, "loss": 3.1639, "step": 21996 }, { "epoch": 1.08, "grad_norm": 0.574716329574585, "learning_rate": 0.0004282963204551108, "loss": 3.1508, "step": 21997 }, { "epoch": 1.08, "grad_norm": 0.540168046951294, "learning_rate": 0.0004282824030181254, "loss": 2.825, "step": 21998 }, { "epoch": 1.08, "grad_norm": 0.5404049158096313, "learning_rate": 0.00042826848524326906, "loss": 3.2393, "step": 21999 }, { "epoch": 1.08, "grad_norm": 0.5294651389122009, "learning_rate": 0.0004282545671305781, "loss": 3.2042, "step": 22000 }, { "epoch": 1.08, "grad_norm": 0.5121399760246277, "learning_rate": 0.0004282406486800893, "loss": 3.3916, "step": 22001 }, { "epoch": 1.08, "grad_norm": 0.5601375699043274, "learning_rate": 0.0004282267298918392, "loss": 2.9623, "step": 22002 }, { "epoch": 1.08, "grad_norm": 0.5114780068397522, "learning_rate": 0.0004282128107658647, "loss": 2.9027, "step": 22003 }, { "epoch": 1.08, "grad_norm": 0.5449838638305664, "learning_rate": 0.0004281988913022024, "loss": 3.0159, "step": 22004 }, { "epoch": 1.08, "grad_norm": 0.5693271160125732, "learning_rate": 0.0004281849715008888, "loss": 3.162, "step": 22005 }, { "epoch": 1.08, "grad_norm": 0.5240155458450317, "learning_rate": 0.0004281710513619606, "loss": 3.1996, "step": 22006 }, { "epoch": 1.08, "grad_norm": 0.5236963629722595, "learning_rate": 0.0004281571308854546, "loss": 3.3674, "step": 22007 }, { "epoch": 1.08, "grad_norm": 0.581671953201294, "learning_rate": 0.00042814321007140734, "loss": 3.1306, "step": 22008 }, { "epoch": 1.08, "grad_norm": 0.5509883761405945, "learning_rate": 0.00042812928891985556, "loss": 3.1849, "step": 22009 }, { "epoch": 1.08, "grad_norm": 0.5407490134239197, "learning_rate": 0.00042811536743083583, "loss": 3.2232, "step": 22010 }, { "epoch": 1.08, "grad_norm": 0.5536962151527405, "learning_rate": 0.00042810144560438495, "loss": 2.9887, "step": 22011 }, { "epoch": 1.08, "grad_norm": 0.5936238765716553, "learning_rate": 0.0004280875234405395, "loss": 2.943, "step": 22012 }, { "epoch": 1.08, "grad_norm": 0.5407906174659729, "learning_rate": 0.0004280736009393361, "loss": 3.271, "step": 22013 }, { "epoch": 1.08, "grad_norm": 0.569972813129425, "learning_rate": 0.00042805967810081156, "loss": 3.0759, "step": 22014 }, { "epoch": 1.08, "grad_norm": 0.5183060765266418, "learning_rate": 0.00042804575492500245, "loss": 3.2847, "step": 22015 }, { "epoch": 1.08, "grad_norm": 0.5623323917388916, "learning_rate": 0.00042803183141194547, "loss": 3.2323, "step": 22016 }, { "epoch": 1.08, "grad_norm": 0.5643849968910217, "learning_rate": 0.00042801790756167733, "loss": 3.0499, "step": 22017 }, { "epoch": 1.08, "grad_norm": 0.5582993030548096, "learning_rate": 0.00042800398337423457, "loss": 3.051, "step": 22018 }, { "epoch": 1.08, "grad_norm": 0.5692091584205627, "learning_rate": 0.000427990058849654, "loss": 3.0984, "step": 22019 }, { "epoch": 1.08, "grad_norm": 0.5723261833190918, "learning_rate": 0.0004279761339879722, "loss": 3.0136, "step": 22020 }, { "epoch": 1.08, "grad_norm": 0.5091471672058105, "learning_rate": 0.0004279622087892259, "loss": 3.2192, "step": 22021 }, { "epoch": 1.08, "grad_norm": 0.5590563416481018, "learning_rate": 0.0004279482832534519, "loss": 3.0695, "step": 22022 }, { "epoch": 1.08, "grad_norm": 0.5491331815719604, "learning_rate": 0.0004279343573806866, "loss": 3.1763, "step": 22023 }, { "epoch": 1.08, "grad_norm": 0.5999693274497986, "learning_rate": 0.00042792043117096686, "loss": 2.8919, "step": 22024 }, { "epoch": 1.08, "grad_norm": 0.5291171669960022, "learning_rate": 0.0004279065046243293, "loss": 3.404, "step": 22025 }, { "epoch": 1.08, "grad_norm": 0.5340095162391663, "learning_rate": 0.00042789257774081073, "loss": 3.2515, "step": 22026 }, { "epoch": 1.08, "grad_norm": 0.5588778853416443, "learning_rate": 0.00042787865052044765, "loss": 3.3579, "step": 22027 }, { "epoch": 1.08, "grad_norm": 0.5633630156517029, "learning_rate": 0.0004278647229632767, "loss": 3.0803, "step": 22028 }, { "epoch": 1.08, "grad_norm": 0.5125625133514404, "learning_rate": 0.0004278507950693348, "loss": 2.9785, "step": 22029 }, { "epoch": 1.08, "grad_norm": 0.6125033497810364, "learning_rate": 0.00042783686683865854, "loss": 3.1076, "step": 22030 }, { "epoch": 1.08, "grad_norm": 0.5360808372497559, "learning_rate": 0.00042782293827128457, "loss": 3.1738, "step": 22031 }, { "epoch": 1.08, "grad_norm": 0.5409636497497559, "learning_rate": 0.00042780900936724957, "loss": 3.0977, "step": 22032 }, { "epoch": 1.08, "grad_norm": 0.5472274422645569, "learning_rate": 0.0004277950801265902, "loss": 2.994, "step": 22033 }, { "epoch": 1.08, "grad_norm": 0.5469160079956055, "learning_rate": 0.00042778115054934314, "loss": 3.0537, "step": 22034 }, { "epoch": 1.08, "grad_norm": 0.5636223554611206, "learning_rate": 0.0004277672206355452, "loss": 3.01, "step": 22035 }, { "epoch": 1.08, "grad_norm": 0.6423721313476562, "learning_rate": 0.000427753290385233, "loss": 2.9419, "step": 22036 }, { "epoch": 1.08, "grad_norm": 0.53389573097229, "learning_rate": 0.00042773935979844327, "loss": 3.0792, "step": 22037 }, { "epoch": 1.08, "grad_norm": 0.5442113280296326, "learning_rate": 0.00042772542887521247, "loss": 3.0606, "step": 22038 }, { "epoch": 1.08, "grad_norm": 0.5777226686477661, "learning_rate": 0.0004277114976155776, "loss": 3.2876, "step": 22039 }, { "epoch": 1.08, "grad_norm": 0.5454306602478027, "learning_rate": 0.0004276975660195752, "loss": 2.9339, "step": 22040 }, { "epoch": 1.08, "grad_norm": 0.544465959072113, "learning_rate": 0.000427683634087242, "loss": 3.0272, "step": 22041 }, { "epoch": 1.08, "grad_norm": 0.5971094369888306, "learning_rate": 0.00042766970181861466, "loss": 3.0422, "step": 22042 }, { "epoch": 1.08, "grad_norm": 0.5495122075080872, "learning_rate": 0.00042765576921372986, "loss": 3.0553, "step": 22043 }, { "epoch": 1.08, "grad_norm": 0.5605582594871521, "learning_rate": 0.00042764183627262437, "loss": 3.3398, "step": 22044 }, { "epoch": 1.08, "grad_norm": 0.564033031463623, "learning_rate": 0.00042762790299533484, "loss": 3.0244, "step": 22045 }, { "epoch": 1.08, "grad_norm": 0.5470286011695862, "learning_rate": 0.000427613969381898, "loss": 2.9283, "step": 22046 }, { "epoch": 1.08, "grad_norm": 0.5006475448608398, "learning_rate": 0.00042760003543235056, "loss": 2.9596, "step": 22047 }, { "epoch": 1.08, "grad_norm": 0.5973907709121704, "learning_rate": 0.00042758610114672905, "loss": 2.888, "step": 22048 }, { "epoch": 1.08, "grad_norm": 0.5585212111473083, "learning_rate": 0.00042757216652507046, "loss": 3.1172, "step": 22049 }, { "epoch": 1.08, "grad_norm": 0.5310104489326477, "learning_rate": 0.00042755823156741127, "loss": 3.0854, "step": 22050 }, { "epoch": 1.08, "grad_norm": 0.5287449359893799, "learning_rate": 0.00042754429627378824, "loss": 3.1736, "step": 22051 }, { "epoch": 1.08, "grad_norm": 0.5601946711540222, "learning_rate": 0.00042753036064423813, "loss": 3.1034, "step": 22052 }, { "epoch": 1.08, "grad_norm": 0.5369007587432861, "learning_rate": 0.00042751642467879746, "loss": 2.9668, "step": 22053 }, { "epoch": 1.08, "grad_norm": 0.5678867101669312, "learning_rate": 0.00042750248837750317, "loss": 3.331, "step": 22054 }, { "epoch": 1.08, "grad_norm": 0.5435790419578552, "learning_rate": 0.0004274885517403919, "loss": 2.823, "step": 22055 }, { "epoch": 1.08, "grad_norm": 0.4990702271461487, "learning_rate": 0.00042747461476750026, "loss": 2.8055, "step": 22056 }, { "epoch": 1.08, "grad_norm": 0.551609218120575, "learning_rate": 0.000427460677458865, "loss": 3.0637, "step": 22057 }, { "epoch": 1.08, "grad_norm": 0.5618946552276611, "learning_rate": 0.00042744673981452285, "loss": 3.0707, "step": 22058 }, { "epoch": 1.08, "grad_norm": 0.5400345921516418, "learning_rate": 0.0004274328018345105, "loss": 2.8357, "step": 22059 }, { "epoch": 1.08, "grad_norm": 0.5235921740531921, "learning_rate": 0.0004274188635188647, "loss": 3.0765, "step": 22060 }, { "epoch": 1.08, "grad_norm": 0.5762670636177063, "learning_rate": 0.00042740492486762215, "loss": 3.2314, "step": 22061 }, { "epoch": 1.08, "grad_norm": 0.5849509239196777, "learning_rate": 0.0004273909858808196, "loss": 2.9629, "step": 22062 }, { "epoch": 1.08, "grad_norm": 0.5751528143882751, "learning_rate": 0.0004273770465584937, "loss": 3.1594, "step": 22063 }, { "epoch": 1.08, "grad_norm": 0.5581267476081848, "learning_rate": 0.00042736310690068106, "loss": 3.0604, "step": 22064 }, { "epoch": 1.08, "grad_norm": 0.6129317283630371, "learning_rate": 0.00042734916690741854, "loss": 3.1213, "step": 22065 }, { "epoch": 1.08, "grad_norm": 0.5418512225151062, "learning_rate": 0.00042733522657874295, "loss": 3.0227, "step": 22066 }, { "epoch": 1.08, "grad_norm": 0.5475884079933167, "learning_rate": 0.0004273212859146908, "loss": 3.254, "step": 22067 }, { "epoch": 1.08, "grad_norm": 0.5630029439926147, "learning_rate": 0.00042730734491529883, "loss": 3.1402, "step": 22068 }, { "epoch": 1.08, "grad_norm": 0.5417295098304749, "learning_rate": 0.00042729340358060386, "loss": 3.2479, "step": 22069 }, { "epoch": 1.08, "grad_norm": 0.5166475772857666, "learning_rate": 0.0004272794619106426, "loss": 3.123, "step": 22070 }, { "epoch": 1.08, "grad_norm": 0.554018497467041, "learning_rate": 0.00042726551990545167, "loss": 3.0511, "step": 22071 }, { "epoch": 1.08, "grad_norm": 0.520998477935791, "learning_rate": 0.00042725157756506784, "loss": 2.9895, "step": 22072 }, { "epoch": 1.08, "grad_norm": 0.5566690564155579, "learning_rate": 0.00042723763488952796, "loss": 3.1612, "step": 22073 }, { "epoch": 1.08, "grad_norm": 0.5547829270362854, "learning_rate": 0.0004272236918788685, "loss": 3.1395, "step": 22074 }, { "epoch": 1.08, "grad_norm": 0.6837734580039978, "learning_rate": 0.00042720974853312633, "loss": 3.0537, "step": 22075 }, { "epoch": 1.08, "grad_norm": 0.6061183214187622, "learning_rate": 0.00042719580485233826, "loss": 2.9883, "step": 22076 }, { "epoch": 1.08, "grad_norm": 0.5670740604400635, "learning_rate": 0.00042718186083654094, "loss": 3.0004, "step": 22077 }, { "epoch": 1.08, "grad_norm": 0.5454275608062744, "learning_rate": 0.000427167916485771, "loss": 3.0537, "step": 22078 }, { "epoch": 1.08, "grad_norm": 0.5352046489715576, "learning_rate": 0.00042715397180006516, "loss": 3.1883, "step": 22079 }, { "epoch": 1.08, "grad_norm": 0.5673153400421143, "learning_rate": 0.0004271400267794604, "loss": 3.106, "step": 22080 }, { "epoch": 1.08, "grad_norm": 0.5358964800834656, "learning_rate": 0.0004271260814239932, "loss": 3.0141, "step": 22081 }, { "epoch": 1.08, "grad_norm": 0.6573508977890015, "learning_rate": 0.0004271121357337003, "loss": 3.0543, "step": 22082 }, { "epoch": 1.08, "grad_norm": 0.5617774128913879, "learning_rate": 0.0004270981897086186, "loss": 3.0414, "step": 22083 }, { "epoch": 1.08, "grad_norm": 0.5869306921958923, "learning_rate": 0.0004270842433487847, "loss": 2.9109, "step": 22084 }, { "epoch": 1.08, "grad_norm": 0.5464381575584412, "learning_rate": 0.00042707029665423535, "loss": 2.9973, "step": 22085 }, { "epoch": 1.08, "grad_norm": 0.5536450147628784, "learning_rate": 0.00042705634962500726, "loss": 3.1901, "step": 22086 }, { "epoch": 1.08, "grad_norm": 0.5760237574577332, "learning_rate": 0.00042704240226113725, "loss": 3.2899, "step": 22087 }, { "epoch": 1.08, "grad_norm": 0.5971628427505493, "learning_rate": 0.00042702845456266207, "loss": 2.9888, "step": 22088 }, { "epoch": 1.08, "grad_norm": 0.5124607682228088, "learning_rate": 0.0004270145065296183, "loss": 3.1622, "step": 22089 }, { "epoch": 1.08, "grad_norm": 0.5447020530700684, "learning_rate": 0.0004270005581620427, "loss": 3.1413, "step": 22090 }, { "epoch": 1.08, "grad_norm": 0.5350099802017212, "learning_rate": 0.00042698660945997215, "loss": 3.0142, "step": 22091 }, { "epoch": 1.08, "grad_norm": 0.5177395343780518, "learning_rate": 0.0004269726604234433, "loss": 3.2439, "step": 22092 }, { "epoch": 1.08, "grad_norm": 0.5653977990150452, "learning_rate": 0.0004269587110524929, "loss": 3.0023, "step": 22093 }, { "epoch": 1.08, "grad_norm": 0.5047891736030579, "learning_rate": 0.0004269447613471577, "loss": 2.987, "step": 22094 }, { "epoch": 1.08, "grad_norm": 0.540993869304657, "learning_rate": 0.00042693081130747444, "loss": 3.1142, "step": 22095 }, { "epoch": 1.08, "grad_norm": 0.5562851428985596, "learning_rate": 0.0004269168609334798, "loss": 2.9607, "step": 22096 }, { "epoch": 1.08, "grad_norm": 0.5496192574501038, "learning_rate": 0.0004269029102252106, "loss": 3.0992, "step": 22097 }, { "epoch": 1.08, "grad_norm": 0.5273379683494568, "learning_rate": 0.0004268889591827036, "loss": 2.926, "step": 22098 }, { "epoch": 1.08, "grad_norm": 0.5540835857391357, "learning_rate": 0.00042687500780599544, "loss": 3.0501, "step": 22099 }, { "epoch": 1.08, "grad_norm": 0.5160466432571411, "learning_rate": 0.0004268610560951229, "loss": 3.152, "step": 22100 }, { "epoch": 1.08, "grad_norm": 0.5677379965782166, "learning_rate": 0.0004268471040501228, "loss": 3.0895, "step": 22101 }, { "epoch": 1.08, "grad_norm": 0.5393813848495483, "learning_rate": 0.00042683315167103196, "loss": 3.0357, "step": 22102 }, { "epoch": 1.08, "grad_norm": 0.5417753458023071, "learning_rate": 0.00042681919895788683, "loss": 3.1901, "step": 22103 }, { "epoch": 1.08, "grad_norm": 0.4963914155960083, "learning_rate": 0.0004268052459107244, "loss": 3.3258, "step": 22104 }, { "epoch": 1.08, "grad_norm": 0.5311129689216614, "learning_rate": 0.0004267912925295813, "loss": 3.1505, "step": 22105 }, { "epoch": 1.08, "grad_norm": 0.5351983904838562, "learning_rate": 0.00042677733881449446, "loss": 3.0273, "step": 22106 }, { "epoch": 1.08, "grad_norm": 0.5361194610595703, "learning_rate": 0.0004267633847655004, "loss": 2.9613, "step": 22107 }, { "epoch": 1.08, "grad_norm": 0.5353830456733704, "learning_rate": 0.000426749430382636, "loss": 3.164, "step": 22108 }, { "epoch": 1.08, "grad_norm": 0.5087656378746033, "learning_rate": 0.0004267354756659381, "loss": 3.2214, "step": 22109 }, { "epoch": 1.08, "grad_norm": 0.5443871021270752, "learning_rate": 0.00042672152061544314, "loss": 3.043, "step": 22110 }, { "epoch": 1.08, "grad_norm": 0.567270815372467, "learning_rate": 0.0004267075652311882, "loss": 3.1497, "step": 22111 }, { "epoch": 1.08, "grad_norm": 0.520046055316925, "learning_rate": 0.00042669360951321, "loss": 3.1484, "step": 22112 }, { "epoch": 1.08, "grad_norm": 0.5559588670730591, "learning_rate": 0.00042667965346154514, "loss": 3.0016, "step": 22113 }, { "epoch": 1.08, "grad_norm": 0.5984733700752258, "learning_rate": 0.0004266656970762305, "loss": 3.0313, "step": 22114 }, { "epoch": 1.08, "grad_norm": 0.5269164443016052, "learning_rate": 0.0004266517403573026, "loss": 3.2197, "step": 22115 }, { "epoch": 1.08, "grad_norm": 0.6935906410217285, "learning_rate": 0.00042663778330479863, "loss": 3.2497, "step": 22116 }, { "epoch": 1.08, "grad_norm": 0.5183548927307129, "learning_rate": 0.000426623825918755, "loss": 3.3231, "step": 22117 }, { "epoch": 1.08, "grad_norm": 0.5844008326530457, "learning_rate": 0.0004266098681992086, "loss": 3.2325, "step": 22118 }, { "epoch": 1.08, "grad_norm": 0.4959857165813446, "learning_rate": 0.0004265959101461962, "loss": 3.2061, "step": 22119 }, { "epoch": 1.08, "grad_norm": 0.5572559833526611, "learning_rate": 0.0004265819517597545, "loss": 2.9519, "step": 22120 }, { "epoch": 1.08, "grad_norm": 0.5319798588752747, "learning_rate": 0.0004265679930399203, "loss": 3.1005, "step": 22121 }, { "epoch": 1.08, "grad_norm": 0.5380662679672241, "learning_rate": 0.00042655403398673036, "loss": 3.1476, "step": 22122 }, { "epoch": 1.08, "grad_norm": 0.5653159618377686, "learning_rate": 0.0004265400746002214, "loss": 3.0577, "step": 22123 }, { "epoch": 1.08, "grad_norm": 0.5704825520515442, "learning_rate": 0.0004265261148804304, "loss": 3.1806, "step": 22124 }, { "epoch": 1.08, "grad_norm": 0.5176539421081543, "learning_rate": 0.0004265121548273938, "loss": 3.1776, "step": 22125 }, { "epoch": 1.08, "grad_norm": 0.5719426870346069, "learning_rate": 0.0004264981944411486, "loss": 3.0773, "step": 22126 }, { "epoch": 1.08, "grad_norm": 0.48471060395240784, "learning_rate": 0.0004264842337217315, "loss": 3.0145, "step": 22127 }, { "epoch": 1.08, "grad_norm": 0.515109121799469, "learning_rate": 0.00042647027266917926, "loss": 3.2385, "step": 22128 }, { "epoch": 1.08, "grad_norm": 0.5453347563743591, "learning_rate": 0.0004264563112835287, "loss": 3.0087, "step": 22129 }, { "epoch": 1.08, "grad_norm": 0.5508375763893127, "learning_rate": 0.00042644234956481646, "loss": 3.0716, "step": 22130 }, { "epoch": 1.08, "grad_norm": 0.5232502818107605, "learning_rate": 0.00042642838751307953, "loss": 3.2418, "step": 22131 }, { "epoch": 1.08, "grad_norm": 0.5501592755317688, "learning_rate": 0.00042641442512835446, "loss": 3.0997, "step": 22132 }, { "epoch": 1.08, "grad_norm": 0.54051673412323, "learning_rate": 0.00042640046241067817, "loss": 3.012, "step": 22133 }, { "epoch": 1.08, "grad_norm": 0.52208411693573, "learning_rate": 0.00042638649936008736, "loss": 3.0246, "step": 22134 }, { "epoch": 1.08, "grad_norm": 0.6216174364089966, "learning_rate": 0.0004263725359766189, "loss": 3.0755, "step": 22135 }, { "epoch": 1.08, "grad_norm": 0.5394618511199951, "learning_rate": 0.00042635857226030934, "loss": 3.1135, "step": 22136 }, { "epoch": 1.08, "grad_norm": 0.5192962288856506, "learning_rate": 0.00042634460821119576, "loss": 3.096, "step": 22137 }, { "epoch": 1.08, "grad_norm": 0.5286705493927002, "learning_rate": 0.0004263306438293148, "loss": 3.0381, "step": 22138 }, { "epoch": 1.08, "grad_norm": 0.5518622398376465, "learning_rate": 0.0004263166791147032, "loss": 3.0308, "step": 22139 }, { "epoch": 1.09, "grad_norm": 0.5467196702957153, "learning_rate": 0.0004263027140673978, "loss": 3.172, "step": 22140 }, { "epoch": 1.09, "grad_norm": 0.5387907028198242, "learning_rate": 0.00042628874868743527, "loss": 3.3111, "step": 22141 }, { "epoch": 1.09, "grad_norm": 0.5448318719863892, "learning_rate": 0.00042627478297485263, "loss": 2.9398, "step": 22142 }, { "epoch": 1.09, "grad_norm": 0.57187819480896, "learning_rate": 0.0004262608169296864, "loss": 3.2116, "step": 22143 }, { "epoch": 1.09, "grad_norm": 0.6088796257972717, "learning_rate": 0.00042624685055197355, "loss": 3.0892, "step": 22144 }, { "epoch": 1.09, "grad_norm": 0.5294945240020752, "learning_rate": 0.00042623288384175073, "loss": 3.0999, "step": 22145 }, { "epoch": 1.09, "grad_norm": 0.5282583236694336, "learning_rate": 0.00042621891679905477, "loss": 3.2845, "step": 22146 }, { "epoch": 1.09, "grad_norm": 0.556050181388855, "learning_rate": 0.00042620494942392247, "loss": 3.1564, "step": 22147 }, { "epoch": 1.09, "grad_norm": 0.5315901041030884, "learning_rate": 0.0004261909817163907, "loss": 3.0353, "step": 22148 }, { "epoch": 1.09, "grad_norm": 0.5163993239402771, "learning_rate": 0.00042617701367649616, "loss": 3.0004, "step": 22149 }, { "epoch": 1.09, "grad_norm": 0.5579254031181335, "learning_rate": 0.00042616304530427565, "loss": 3.0205, "step": 22150 }, { "epoch": 1.09, "grad_norm": 0.5503444671630859, "learning_rate": 0.00042614907659976593, "loss": 3.187, "step": 22151 }, { "epoch": 1.09, "grad_norm": 0.5766018033027649, "learning_rate": 0.0004261351075630038, "loss": 3.2087, "step": 22152 }, { "epoch": 1.09, "grad_norm": 0.5432386994361877, "learning_rate": 0.00042612113819402616, "loss": 3.0861, "step": 22153 }, { "epoch": 1.09, "grad_norm": 0.5107462406158447, "learning_rate": 0.0004261071684928696, "loss": 2.9946, "step": 22154 }, { "epoch": 1.09, "grad_norm": 0.5693354606628418, "learning_rate": 0.00042609319845957115, "loss": 2.9644, "step": 22155 }, { "epoch": 1.09, "grad_norm": 0.6248636841773987, "learning_rate": 0.00042607922809416736, "loss": 3.1401, "step": 22156 }, { "epoch": 1.09, "grad_norm": 0.5272868871688843, "learning_rate": 0.0004260652573966952, "loss": 2.98, "step": 22157 }, { "epoch": 1.09, "grad_norm": 0.5685650706291199, "learning_rate": 0.00042605128636719145, "loss": 2.7644, "step": 22158 }, { "epoch": 1.09, "grad_norm": 0.5144070982933044, "learning_rate": 0.0004260373150056928, "loss": 3.078, "step": 22159 }, { "epoch": 1.09, "grad_norm": 0.5530477166175842, "learning_rate": 0.00042602334331223615, "loss": 3.1147, "step": 22160 }, { "epoch": 1.09, "grad_norm": 0.5493810772895813, "learning_rate": 0.0004260093712868583, "loss": 3.2218, "step": 22161 }, { "epoch": 1.09, "grad_norm": 0.5624595284461975, "learning_rate": 0.00042599539892959593, "loss": 3.3188, "step": 22162 }, { "epoch": 1.09, "grad_norm": 0.5464377999305725, "learning_rate": 0.00042598142624048597, "loss": 3.1378, "step": 22163 }, { "epoch": 1.09, "grad_norm": 0.5302308201789856, "learning_rate": 0.00042596745321956525, "loss": 3.4019, "step": 22164 }, { "epoch": 1.09, "grad_norm": 0.5592415928840637, "learning_rate": 0.0004259534798668705, "loss": 2.9761, "step": 22165 }, { "epoch": 1.09, "grad_norm": 0.6183668971061707, "learning_rate": 0.0004259395061824384, "loss": 2.8968, "step": 22166 }, { "epoch": 1.09, "grad_norm": 0.5616676807403564, "learning_rate": 0.00042592553216630593, "loss": 3.13, "step": 22167 }, { "epoch": 1.09, "grad_norm": 0.5477641820907593, "learning_rate": 0.0004259115578185099, "loss": 3.1877, "step": 22168 }, { "epoch": 1.09, "grad_norm": 0.5532886385917664, "learning_rate": 0.000425897583139087, "loss": 2.7935, "step": 22169 }, { "epoch": 1.09, "grad_norm": 0.5679529309272766, "learning_rate": 0.00042588360812807416, "loss": 3.2103, "step": 22170 }, { "epoch": 1.09, "grad_norm": 0.5274649262428284, "learning_rate": 0.00042586963278550807, "loss": 2.974, "step": 22171 }, { "epoch": 1.09, "grad_norm": 0.5478577613830566, "learning_rate": 0.0004258556571114256, "loss": 3.0606, "step": 22172 }, { "epoch": 1.09, "grad_norm": 0.5519079566001892, "learning_rate": 0.00042584168110586354, "loss": 3.1437, "step": 22173 }, { "epoch": 1.09, "grad_norm": 0.5189817547798157, "learning_rate": 0.00042582770476885877, "loss": 3.0276, "step": 22174 }, { "epoch": 1.09, "grad_norm": 0.544884979724884, "learning_rate": 0.00042581372810044797, "loss": 3.0585, "step": 22175 }, { "epoch": 1.09, "grad_norm": 0.5363135933876038, "learning_rate": 0.0004257997511006681, "loss": 3.2155, "step": 22176 }, { "epoch": 1.09, "grad_norm": 0.5269807577133179, "learning_rate": 0.0004257857737695558, "loss": 3.0673, "step": 22177 }, { "epoch": 1.09, "grad_norm": 0.535571813583374, "learning_rate": 0.00042577179610714795, "loss": 3.3365, "step": 22178 }, { "epoch": 1.09, "grad_norm": 0.6153188943862915, "learning_rate": 0.00042575781811348155, "loss": 2.9663, "step": 22179 }, { "epoch": 1.09, "grad_norm": 0.5156885385513306, "learning_rate": 0.0004257438397885932, "loss": 3.134, "step": 22180 }, { "epoch": 1.09, "grad_norm": 0.5604032874107361, "learning_rate": 0.00042572986113251975, "loss": 3.2063, "step": 22181 }, { "epoch": 1.09, "grad_norm": 0.5438750386238098, "learning_rate": 0.000425715882145298, "loss": 3.0467, "step": 22182 }, { "epoch": 1.09, "grad_norm": 0.5508151054382324, "learning_rate": 0.00042570190282696484, "loss": 3.2381, "step": 22183 }, { "epoch": 1.09, "grad_norm": 0.7027482986450195, "learning_rate": 0.0004256879231775571, "loss": 3.0696, "step": 22184 }, { "epoch": 1.09, "grad_norm": 0.5488677620887756, "learning_rate": 0.00042567394319711156, "loss": 2.7966, "step": 22185 }, { "epoch": 1.09, "grad_norm": 0.6230414509773254, "learning_rate": 0.000425659962885665, "loss": 3.157, "step": 22186 }, { "epoch": 1.09, "grad_norm": 0.5432460308074951, "learning_rate": 0.0004256459822432543, "loss": 3.2616, "step": 22187 }, { "epoch": 1.09, "grad_norm": 0.520793616771698, "learning_rate": 0.00042563200126991625, "loss": 3.0267, "step": 22188 }, { "epoch": 1.09, "grad_norm": 0.533034086227417, "learning_rate": 0.0004256180199656877, "loss": 3.0244, "step": 22189 }, { "epoch": 1.09, "grad_norm": 0.5230249762535095, "learning_rate": 0.0004256040383306055, "loss": 3.1633, "step": 22190 }, { "epoch": 1.09, "grad_norm": 0.5286110639572144, "learning_rate": 0.00042559005636470636, "loss": 3.0445, "step": 22191 }, { "epoch": 1.09, "grad_norm": 0.541115403175354, "learning_rate": 0.00042557607406802715, "loss": 3.1721, "step": 22192 }, { "epoch": 1.09, "grad_norm": 0.5243132710456848, "learning_rate": 0.0004255620914406048, "loss": 3.2148, "step": 22193 }, { "epoch": 1.09, "grad_norm": 0.5529786944389343, "learning_rate": 0.00042554810848247613, "loss": 3.1431, "step": 22194 }, { "epoch": 1.09, "grad_norm": 0.5576296448707581, "learning_rate": 0.00042553412519367783, "loss": 3.0609, "step": 22195 }, { "epoch": 1.09, "grad_norm": 0.5397801995277405, "learning_rate": 0.00042552014157424677, "loss": 2.8357, "step": 22196 }, { "epoch": 1.09, "grad_norm": 0.5517771244049072, "learning_rate": 0.00042550615762421986, "loss": 3.21, "step": 22197 }, { "epoch": 1.09, "grad_norm": 0.5241683721542358, "learning_rate": 0.0004254921733436339, "loss": 3.1942, "step": 22198 }, { "epoch": 1.09, "grad_norm": 0.5698009729385376, "learning_rate": 0.00042547818873252567, "loss": 3.1291, "step": 22199 }, { "epoch": 1.09, "grad_norm": 0.5370516777038574, "learning_rate": 0.000425464203790932, "loss": 3.3494, "step": 22200 }, { "epoch": 1.09, "grad_norm": 0.6023409962654114, "learning_rate": 0.0004254502185188899, "loss": 3.1343, "step": 22201 }, { "epoch": 1.09, "grad_norm": 0.5534052848815918, "learning_rate": 0.000425436232916436, "loss": 3.1015, "step": 22202 }, { "epoch": 1.09, "grad_norm": 0.5142847299575806, "learning_rate": 0.00042542224698360713, "loss": 2.9254, "step": 22203 }, { "epoch": 1.09, "grad_norm": 0.5432417392730713, "learning_rate": 0.00042540826072044036, "loss": 3.1449, "step": 22204 }, { "epoch": 1.09, "grad_norm": 0.5595294833183289, "learning_rate": 0.0004253942741269722, "loss": 3.3928, "step": 22205 }, { "epoch": 1.09, "grad_norm": 0.5585522651672363, "learning_rate": 0.00042538028720323976, "loss": 3.2472, "step": 22206 }, { "epoch": 1.09, "grad_norm": 0.6130068302154541, "learning_rate": 0.0004253662999492797, "loss": 3.0106, "step": 22207 }, { "epoch": 1.09, "grad_norm": 0.5520464777946472, "learning_rate": 0.000425352312365129, "loss": 3.2156, "step": 22208 }, { "epoch": 1.09, "grad_norm": 0.5398738384246826, "learning_rate": 0.0004253383244508243, "loss": 3.2174, "step": 22209 }, { "epoch": 1.09, "grad_norm": 0.5600001215934753, "learning_rate": 0.00042532433620640276, "loss": 3.0729, "step": 22210 }, { "epoch": 1.09, "grad_norm": 0.5530575513839722, "learning_rate": 0.00042531034763190095, "loss": 3.0908, "step": 22211 }, { "epoch": 1.09, "grad_norm": 0.550061047077179, "learning_rate": 0.0004252963587273558, "loss": 3.0893, "step": 22212 }, { "epoch": 1.09, "grad_norm": 0.5559527277946472, "learning_rate": 0.0004252823694928041, "loss": 2.8335, "step": 22213 }, { "epoch": 1.09, "grad_norm": 0.7743620276451111, "learning_rate": 0.00042526837992828287, "loss": 3.1083, "step": 22214 }, { "epoch": 1.09, "grad_norm": 0.5912230610847473, "learning_rate": 0.0004252543900338288, "loss": 3.0625, "step": 22215 }, { "epoch": 1.09, "grad_norm": 0.5378463864326477, "learning_rate": 0.0004252403998094787, "loss": 3.0251, "step": 22216 }, { "epoch": 1.09, "grad_norm": 0.563388466835022, "learning_rate": 0.0004252264092552696, "loss": 3.153, "step": 22217 }, { "epoch": 1.09, "grad_norm": 0.5485290288925171, "learning_rate": 0.0004252124183712381, "loss": 3.3472, "step": 22218 }, { "epoch": 1.09, "grad_norm": 0.5503448247909546, "learning_rate": 0.0004251984271574213, "loss": 3.1022, "step": 22219 }, { "epoch": 1.09, "grad_norm": 0.5638839602470398, "learning_rate": 0.0004251844356138559, "loss": 3.016, "step": 22220 }, { "epoch": 1.09, "grad_norm": 0.5732755064964294, "learning_rate": 0.0004251704437405788, "loss": 2.9927, "step": 22221 }, { "epoch": 1.09, "grad_norm": 0.5192015171051025, "learning_rate": 0.00042515645153762683, "loss": 3.2112, "step": 22222 }, { "epoch": 1.09, "grad_norm": 0.49721595644950867, "learning_rate": 0.0004251424590050368, "loss": 3.0738, "step": 22223 }, { "epoch": 1.09, "grad_norm": 0.5540822148323059, "learning_rate": 0.00042512846614284566, "loss": 3.1562, "step": 22224 }, { "epoch": 1.09, "grad_norm": 0.5434120893478394, "learning_rate": 0.0004251144729510903, "loss": 3.0941, "step": 22225 }, { "epoch": 1.09, "grad_norm": 0.5510311722755432, "learning_rate": 0.0004251004794298074, "loss": 2.973, "step": 22226 }, { "epoch": 1.09, "grad_norm": 0.5258462429046631, "learning_rate": 0.000425086485579034, "loss": 2.8038, "step": 22227 }, { "epoch": 1.09, "grad_norm": 0.5398142337799072, "learning_rate": 0.0004250724913988067, "loss": 3.1084, "step": 22228 }, { "epoch": 1.09, "grad_norm": 0.5346091985702515, "learning_rate": 0.00042505849688916265, "loss": 2.9255, "step": 22229 }, { "epoch": 1.09, "grad_norm": 0.5627778172492981, "learning_rate": 0.0004250445020501387, "loss": 3.1123, "step": 22230 }, { "epoch": 1.09, "grad_norm": 0.5590901970863342, "learning_rate": 0.0004250305068817715, "loss": 3.1776, "step": 22231 }, { "epoch": 1.09, "grad_norm": 0.5717828869819641, "learning_rate": 0.0004250165113840979, "loss": 3.1254, "step": 22232 }, { "epoch": 1.09, "grad_norm": 0.5412927269935608, "learning_rate": 0.00042500251555715497, "loss": 3.1597, "step": 22233 }, { "epoch": 1.09, "grad_norm": 0.5460436344146729, "learning_rate": 0.0004249885194009794, "loss": 3.062, "step": 22234 }, { "epoch": 1.09, "grad_norm": 0.5982441306114197, "learning_rate": 0.0004249745229156082, "loss": 3.0677, "step": 22235 }, { "epoch": 1.09, "grad_norm": 0.6243758201599121, "learning_rate": 0.00042496052610107817, "loss": 3.0814, "step": 22236 }, { "epoch": 1.09, "grad_norm": 0.5570179224014282, "learning_rate": 0.00042494652895742614, "loss": 3.1024, "step": 22237 }, { "epoch": 1.09, "grad_norm": 0.5606381297111511, "learning_rate": 0.00042493253148468893, "loss": 3.1431, "step": 22238 }, { "epoch": 1.09, "grad_norm": 0.5624200701713562, "learning_rate": 0.0004249185336829035, "loss": 3.0207, "step": 22239 }, { "epoch": 1.09, "grad_norm": 0.5409414768218994, "learning_rate": 0.00042490453555210677, "loss": 3.0627, "step": 22240 }, { "epoch": 1.09, "grad_norm": 0.5465520024299622, "learning_rate": 0.00042489053709233554, "loss": 3.1536, "step": 22241 }, { "epoch": 1.09, "grad_norm": 0.5893270373344421, "learning_rate": 0.00042487653830362664, "loss": 3.2021, "step": 22242 }, { "epoch": 1.09, "grad_norm": 0.5436384081840515, "learning_rate": 0.0004248625391860168, "loss": 3.0846, "step": 22243 }, { "epoch": 1.09, "grad_norm": 0.5279334187507629, "learning_rate": 0.00042484853973954334, "loss": 2.9912, "step": 22244 }, { "epoch": 1.09, "grad_norm": 0.5567160844802856, "learning_rate": 0.00042483453996424266, "loss": 3.0722, "step": 22245 }, { "epoch": 1.09, "grad_norm": 0.5820968151092529, "learning_rate": 0.00042482053986015193, "loss": 3.2603, "step": 22246 }, { "epoch": 1.09, "grad_norm": 0.5296043753623962, "learning_rate": 0.00042480653942730785, "loss": 2.8998, "step": 22247 }, { "epoch": 1.09, "grad_norm": 0.5903947353363037, "learning_rate": 0.0004247925386657475, "loss": 2.9915, "step": 22248 }, { "epoch": 1.09, "grad_norm": 0.5724281072616577, "learning_rate": 0.00042477853757550744, "loss": 3.0855, "step": 22249 }, { "epoch": 1.09, "grad_norm": 0.5451366305351257, "learning_rate": 0.0004247645361566248, "loss": 3.2043, "step": 22250 }, { "epoch": 1.09, "grad_norm": 0.5273775458335876, "learning_rate": 0.0004247505344091364, "loss": 3.0935, "step": 22251 }, { "epoch": 1.09, "grad_norm": 0.5182363390922546, "learning_rate": 0.00042473653233307914, "loss": 3.0011, "step": 22252 }, { "epoch": 1.09, "grad_norm": 0.5897952914237976, "learning_rate": 0.0004247225299284898, "loss": 3.0319, "step": 22253 }, { "epoch": 1.09, "grad_norm": 0.5534531474113464, "learning_rate": 0.00042470852719540526, "loss": 3.067, "step": 22254 }, { "epoch": 1.09, "grad_norm": 0.6196662187576294, "learning_rate": 0.0004246945241338626, "loss": 3.1267, "step": 22255 }, { "epoch": 1.09, "grad_norm": 0.5049815773963928, "learning_rate": 0.0004246805207438985, "loss": 3.0543, "step": 22256 }, { "epoch": 1.09, "grad_norm": 0.5456374287605286, "learning_rate": 0.00042466651702554984, "loss": 2.9115, "step": 22257 }, { "epoch": 1.09, "grad_norm": 0.5522916913032532, "learning_rate": 0.00042465251297885365, "loss": 3.1055, "step": 22258 }, { "epoch": 1.09, "grad_norm": 0.5380954146385193, "learning_rate": 0.0004246385086038467, "loss": 2.957, "step": 22259 }, { "epoch": 1.09, "grad_norm": 0.5323505997657776, "learning_rate": 0.00042462450390056593, "loss": 3.18, "step": 22260 }, { "epoch": 1.09, "grad_norm": 0.5911219120025635, "learning_rate": 0.0004246104988690481, "loss": 3.0147, "step": 22261 }, { "epoch": 1.09, "grad_norm": 0.5536802411079407, "learning_rate": 0.00042459649350933026, "loss": 2.9749, "step": 22262 }, { "epoch": 1.09, "grad_norm": 0.5558472275733948, "learning_rate": 0.00042458248782144937, "loss": 3.0357, "step": 22263 }, { "epoch": 1.09, "grad_norm": 0.5627835988998413, "learning_rate": 0.00042456848180544196, "loss": 3.1438, "step": 22264 }, { "epoch": 1.09, "grad_norm": 0.5418185591697693, "learning_rate": 0.00042455447546134526, "loss": 3.2752, "step": 22265 }, { "epoch": 1.09, "grad_norm": 0.5398939251899719, "learning_rate": 0.0004245404687891961, "loss": 3.2987, "step": 22266 }, { "epoch": 1.09, "grad_norm": 0.5812855362892151, "learning_rate": 0.0004245264617890312, "loss": 3.2357, "step": 22267 }, { "epoch": 1.09, "grad_norm": 0.5603014826774597, "learning_rate": 0.00042451245446088764, "loss": 3.1168, "step": 22268 }, { "epoch": 1.09, "grad_norm": 0.5525497794151306, "learning_rate": 0.00042449844680480213, "loss": 3.0883, "step": 22269 }, { "epoch": 1.09, "grad_norm": 0.5205212235450745, "learning_rate": 0.0004244844388208118, "loss": 2.9792, "step": 22270 }, { "epoch": 1.09, "grad_norm": 0.5575414896011353, "learning_rate": 0.00042447043050895334, "loss": 3.0944, "step": 22271 }, { "epoch": 1.09, "grad_norm": 0.5364728569984436, "learning_rate": 0.00042445642186926373, "loss": 2.9596, "step": 22272 }, { "epoch": 1.09, "grad_norm": 0.5745815634727478, "learning_rate": 0.0004244424129017799, "loss": 3.0151, "step": 22273 }, { "epoch": 1.09, "grad_norm": 0.5751864314079285, "learning_rate": 0.0004244284036065387, "loss": 3.1432, "step": 22274 }, { "epoch": 1.09, "grad_norm": 0.5519656538963318, "learning_rate": 0.00042441439398357696, "loss": 3.0842, "step": 22275 }, { "epoch": 1.09, "grad_norm": 0.5192199945449829, "learning_rate": 0.0004244003840329317, "loss": 3.051, "step": 22276 }, { "epoch": 1.09, "grad_norm": 0.5178653597831726, "learning_rate": 0.00042438637375463977, "loss": 2.9998, "step": 22277 }, { "epoch": 1.09, "grad_norm": 0.5251026153564453, "learning_rate": 0.00042437236314873814, "loss": 3.2947, "step": 22278 }, { "epoch": 1.09, "grad_norm": 0.5604255199432373, "learning_rate": 0.00042435835221526347, "loss": 3.0916, "step": 22279 }, { "epoch": 1.09, "grad_norm": 0.5459524393081665, "learning_rate": 0.000424344340954253, "loss": 2.81, "step": 22280 }, { "epoch": 1.09, "grad_norm": 0.5570917725563049, "learning_rate": 0.0004243303293657434, "loss": 2.95, "step": 22281 }, { "epoch": 1.09, "grad_norm": 0.5491553544998169, "learning_rate": 0.00042431631744977165, "loss": 2.8636, "step": 22282 }, { "epoch": 1.09, "grad_norm": 0.5401395559310913, "learning_rate": 0.0004243023052063746, "loss": 3.0923, "step": 22283 }, { "epoch": 1.09, "grad_norm": 0.619458019733429, "learning_rate": 0.00042428829263558925, "loss": 3.1343, "step": 22284 }, { "epoch": 1.09, "grad_norm": 0.6060642600059509, "learning_rate": 0.0004242742797374524, "loss": 2.9652, "step": 22285 }, { "epoch": 1.09, "grad_norm": 0.5287819504737854, "learning_rate": 0.0004242602665120011, "loss": 2.9648, "step": 22286 }, { "epoch": 1.09, "grad_norm": 0.5329878330230713, "learning_rate": 0.0004242462529592721, "loss": 3.0809, "step": 22287 }, { "epoch": 1.09, "grad_norm": 0.5269094705581665, "learning_rate": 0.00042423223907930244, "loss": 3.151, "step": 22288 }, { "epoch": 1.09, "grad_norm": 0.5310917496681213, "learning_rate": 0.00042421822487212897, "loss": 3.0105, "step": 22289 }, { "epoch": 1.09, "grad_norm": 0.5504202246665955, "learning_rate": 0.0004242042103377885, "loss": 3.1283, "step": 22290 }, { "epoch": 1.09, "grad_norm": 0.5809419751167297, "learning_rate": 0.00042419019547631815, "loss": 3.0451, "step": 22291 }, { "epoch": 1.09, "grad_norm": 0.5401477217674255, "learning_rate": 0.0004241761802877547, "loss": 2.8967, "step": 22292 }, { "epoch": 1.09, "grad_norm": 0.5379841327667236, "learning_rate": 0.00042416216477213507, "loss": 3.0813, "step": 22293 }, { "epoch": 1.09, "grad_norm": 0.5749803781509399, "learning_rate": 0.0004241481489294962, "loss": 3.0934, "step": 22294 }, { "epoch": 1.09, "grad_norm": 0.5080057978630066, "learning_rate": 0.000424134132759875, "loss": 3.249, "step": 22295 }, { "epoch": 1.09, "grad_norm": 0.5740684270858765, "learning_rate": 0.0004241201162633084, "loss": 3.2469, "step": 22296 }, { "epoch": 1.09, "grad_norm": 0.6056188344955444, "learning_rate": 0.0004241060994398333, "loss": 3.118, "step": 22297 }, { "epoch": 1.09, "grad_norm": 0.5314207673072815, "learning_rate": 0.0004240920822894866, "loss": 3.1676, "step": 22298 }, { "epoch": 1.09, "grad_norm": 0.5285966992378235, "learning_rate": 0.00042407806481230525, "loss": 3.1967, "step": 22299 }, { "epoch": 1.09, "grad_norm": 0.5155075192451477, "learning_rate": 0.0004240640470083261, "loss": 3.0571, "step": 22300 }, { "epoch": 1.09, "grad_norm": 0.5423807501792908, "learning_rate": 0.0004240500288775861, "loss": 2.8794, "step": 22301 }, { "epoch": 1.09, "grad_norm": 0.5311214327812195, "learning_rate": 0.00042403601042012237, "loss": 3.1135, "step": 22302 }, { "epoch": 1.09, "grad_norm": 0.530709981918335, "learning_rate": 0.00042402199163597164, "loss": 3.0584, "step": 22303 }, { "epoch": 1.09, "grad_norm": 0.5539402961730957, "learning_rate": 0.00042400797252517075, "loss": 3.2242, "step": 22304 }, { "epoch": 1.09, "grad_norm": 0.5307897925376892, "learning_rate": 0.0004239939530877567, "loss": 2.8867, "step": 22305 }, { "epoch": 1.09, "grad_norm": 0.5629311800003052, "learning_rate": 0.0004239799333237666, "loss": 3.2001, "step": 22306 }, { "epoch": 1.09, "grad_norm": 0.533023476600647, "learning_rate": 0.000423965913233237, "loss": 2.9562, "step": 22307 }, { "epoch": 1.09, "grad_norm": 0.5523614287376404, "learning_rate": 0.0004239518928162052, "loss": 3.0158, "step": 22308 }, { "epoch": 1.09, "grad_norm": 0.5242599844932556, "learning_rate": 0.00042393787207270793, "loss": 3.0386, "step": 22309 }, { "epoch": 1.09, "grad_norm": 0.5767312049865723, "learning_rate": 0.0004239238510027821, "loss": 3.1031, "step": 22310 }, { "epoch": 1.09, "grad_norm": 0.5401198863983154, "learning_rate": 0.0004239098296064648, "loss": 3.0174, "step": 22311 }, { "epoch": 1.09, "grad_norm": 0.5361244082450867, "learning_rate": 0.0004238958078837927, "loss": 3.1997, "step": 22312 }, { "epoch": 1.09, "grad_norm": 0.5413596630096436, "learning_rate": 0.000423881785834803, "loss": 3.0465, "step": 22313 }, { "epoch": 1.09, "grad_norm": 0.5148143172264099, "learning_rate": 0.0004238677634595326, "loss": 3.1441, "step": 22314 }, { "epoch": 1.09, "grad_norm": 0.5160139203071594, "learning_rate": 0.0004238537407580182, "loss": 3.0543, "step": 22315 }, { "epoch": 1.09, "grad_norm": 0.5179514288902283, "learning_rate": 0.0004238397177302969, "loss": 3.2671, "step": 22316 }, { "epoch": 1.09, "grad_norm": 0.5649704337120056, "learning_rate": 0.00042382569437640573, "loss": 3.1858, "step": 22317 }, { "epoch": 1.09, "grad_norm": 0.5431568622589111, "learning_rate": 0.0004238116706963814, "loss": 3.3245, "step": 22318 }, { "epoch": 1.09, "grad_norm": 0.6058350205421448, "learning_rate": 0.00042379764669026103, "loss": 3.1804, "step": 22319 }, { "epoch": 1.09, "grad_norm": 0.5485496520996094, "learning_rate": 0.0004237836223580814, "loss": 3.1041, "step": 22320 }, { "epoch": 1.09, "grad_norm": 0.5472593903541565, "learning_rate": 0.00042376959769987966, "loss": 3.1254, "step": 22321 }, { "epoch": 1.09, "grad_norm": 0.5176267027854919, "learning_rate": 0.00042375557271569247, "loss": 3.2074, "step": 22322 }, { "epoch": 1.09, "grad_norm": 0.5529015064239502, "learning_rate": 0.000423741547405557, "loss": 3.1134, "step": 22323 }, { "epoch": 1.09, "grad_norm": 0.5462213158607483, "learning_rate": 0.00042372752176951013, "loss": 3.1059, "step": 22324 }, { "epoch": 1.09, "grad_norm": 0.5348901152610779, "learning_rate": 0.0004237134958075887, "loss": 3.1225, "step": 22325 }, { "epoch": 1.09, "grad_norm": 0.6045992374420166, "learning_rate": 0.0004236994695198298, "loss": 3.0944, "step": 22326 }, { "epoch": 1.09, "grad_norm": 0.5294902920722961, "learning_rate": 0.0004236854429062703, "loss": 3.1684, "step": 22327 }, { "epoch": 1.09, "grad_norm": 0.5004157423973083, "learning_rate": 0.00042367141596694717, "loss": 2.921, "step": 22328 }, { "epoch": 1.09, "grad_norm": 0.5467502474784851, "learning_rate": 0.00042365738870189725, "loss": 3.0567, "step": 22329 }, { "epoch": 1.09, "grad_norm": 0.5614712238311768, "learning_rate": 0.0004236433611111576, "loss": 3.165, "step": 22330 }, { "epoch": 1.09, "grad_norm": 0.5636160373687744, "learning_rate": 0.0004236293331947651, "loss": 2.931, "step": 22331 }, { "epoch": 1.09, "grad_norm": 0.510614275932312, "learning_rate": 0.0004236153049527568, "loss": 2.879, "step": 22332 }, { "epoch": 1.09, "grad_norm": 0.5611421465873718, "learning_rate": 0.00042360127638516954, "loss": 3.0598, "step": 22333 }, { "epoch": 1.09, "grad_norm": 0.5256950855255127, "learning_rate": 0.0004235872474920403, "loss": 2.9983, "step": 22334 }, { "epoch": 1.09, "grad_norm": 0.528252899646759, "learning_rate": 0.00042357321827340603, "loss": 3.2655, "step": 22335 }, { "epoch": 1.09, "grad_norm": 0.528097927570343, "learning_rate": 0.0004235591887293037, "loss": 2.9888, "step": 22336 }, { "epoch": 1.09, "grad_norm": 0.4826788008213043, "learning_rate": 0.0004235451588597702, "loss": 3.2171, "step": 22337 }, { "epoch": 1.09, "grad_norm": 0.5288520455360413, "learning_rate": 0.00042353112866484266, "loss": 3.1955, "step": 22338 }, { "epoch": 1.09, "grad_norm": 0.5336189866065979, "learning_rate": 0.0004235170981445579, "loss": 3.0057, "step": 22339 }, { "epoch": 1.09, "grad_norm": 0.5776178240776062, "learning_rate": 0.0004235030672989527, "loss": 2.89, "step": 22340 }, { "epoch": 1.09, "grad_norm": 0.533409059047699, "learning_rate": 0.00042348903612806423, "loss": 3.0783, "step": 22341 }, { "epoch": 1.09, "grad_norm": 0.547615647315979, "learning_rate": 0.0004234750046319294, "loss": 3.1035, "step": 22342 }, { "epoch": 1.09, "grad_norm": 0.5494769215583801, "learning_rate": 0.00042346097281058536, "loss": 3.252, "step": 22343 }, { "epoch": 1.1, "grad_norm": 0.570833146572113, "learning_rate": 0.0004234469406640687, "loss": 3.049, "step": 22344 }, { "epoch": 1.1, "grad_norm": 0.5642995834350586, "learning_rate": 0.0004234329081924166, "loss": 3.1304, "step": 22345 }, { "epoch": 1.1, "grad_norm": 0.5693486332893372, "learning_rate": 0.00042341887539566595, "loss": 2.8819, "step": 22346 }, { "epoch": 1.1, "grad_norm": 0.5467201471328735, "learning_rate": 0.0004234048422738538, "loss": 3.0296, "step": 22347 }, { "epoch": 1.1, "grad_norm": 0.5357093811035156, "learning_rate": 0.00042339080882701697, "loss": 3.0394, "step": 22348 }, { "epoch": 1.1, "grad_norm": 0.5732297301292419, "learning_rate": 0.00042337677505519254, "loss": 3.1511, "step": 22349 }, { "epoch": 1.1, "grad_norm": 0.5539453029632568, "learning_rate": 0.00042336274095841744, "loss": 3.0462, "step": 22350 }, { "epoch": 1.1, "grad_norm": 0.5515852570533752, "learning_rate": 0.00042334870653672863, "loss": 2.9138, "step": 22351 }, { "epoch": 1.1, "grad_norm": 0.5160804986953735, "learning_rate": 0.00042333467179016303, "loss": 3.0142, "step": 22352 }, { "epoch": 1.1, "grad_norm": 0.5109854936599731, "learning_rate": 0.0004233206367187576, "loss": 3.1363, "step": 22353 }, { "epoch": 1.1, "grad_norm": 0.5832191705703735, "learning_rate": 0.0004233066013225495, "loss": 3.0025, "step": 22354 }, { "epoch": 1.1, "grad_norm": 0.5390937924385071, "learning_rate": 0.00042329256560157537, "loss": 3.1417, "step": 22355 }, { "epoch": 1.1, "grad_norm": 0.5653042793273926, "learning_rate": 0.0004232785295558724, "loss": 3.1148, "step": 22356 }, { "epoch": 1.1, "grad_norm": 0.5726116895675659, "learning_rate": 0.0004232644931854775, "loss": 3.2039, "step": 22357 }, { "epoch": 1.1, "grad_norm": 0.5899278521537781, "learning_rate": 0.0004232504564904278, "loss": 3.2208, "step": 22358 }, { "epoch": 1.1, "grad_norm": 0.5757837295532227, "learning_rate": 0.00042323641947075995, "loss": 3.1871, "step": 22359 }, { "epoch": 1.1, "grad_norm": 0.5611856579780579, "learning_rate": 0.0004232223821265111, "loss": 3.1254, "step": 22360 }, { "epoch": 1.1, "grad_norm": 0.568060576915741, "learning_rate": 0.00042320834445771827, "loss": 3.2821, "step": 22361 }, { "epoch": 1.1, "grad_norm": 0.5462589859962463, "learning_rate": 0.0004231943064644183, "loss": 3.0981, "step": 22362 }, { "epoch": 1.1, "grad_norm": 0.5303399562835693, "learning_rate": 0.0004231802681466483, "loss": 3.2174, "step": 22363 }, { "epoch": 1.1, "grad_norm": 0.5866662263870239, "learning_rate": 0.0004231662295044451, "loss": 2.9066, "step": 22364 }, { "epoch": 1.1, "grad_norm": 0.5583893060684204, "learning_rate": 0.0004231521905378459, "loss": 2.9209, "step": 22365 }, { "epoch": 1.1, "grad_norm": 0.5403869152069092, "learning_rate": 0.00042313815124688745, "loss": 3.0106, "step": 22366 }, { "epoch": 1.1, "grad_norm": 0.5992947220802307, "learning_rate": 0.0004231241116316068, "loss": 3.2221, "step": 22367 }, { "epoch": 1.1, "grad_norm": 0.5342088937759399, "learning_rate": 0.0004231100716920409, "loss": 2.932, "step": 22368 }, { "epoch": 1.1, "grad_norm": 0.5689389705657959, "learning_rate": 0.00042309603142822686, "loss": 3.0559, "step": 22369 }, { "epoch": 1.1, "grad_norm": 0.5494974851608276, "learning_rate": 0.0004230819908402015, "loss": 3.1754, "step": 22370 }, { "epoch": 1.1, "grad_norm": 0.5982123613357544, "learning_rate": 0.0004230679499280018, "loss": 3.2388, "step": 22371 }, { "epoch": 1.1, "grad_norm": 0.5255215167999268, "learning_rate": 0.00042305390869166494, "loss": 3.2487, "step": 22372 }, { "epoch": 1.1, "grad_norm": 0.5233165621757507, "learning_rate": 0.00042303986713122766, "loss": 3.0138, "step": 22373 }, { "epoch": 1.1, "grad_norm": 0.5595539808273315, "learning_rate": 0.0004230258252467271, "loss": 3.1675, "step": 22374 }, { "epoch": 1.1, "grad_norm": 0.5262620449066162, "learning_rate": 0.00042301178303820025, "loss": 3.3422, "step": 22375 }, { "epoch": 1.1, "grad_norm": 0.5396249890327454, "learning_rate": 0.00042299774050568394, "loss": 3.0504, "step": 22376 }, { "epoch": 1.1, "grad_norm": 0.5464133620262146, "learning_rate": 0.00042298369764921524, "loss": 3.2645, "step": 22377 }, { "epoch": 1.1, "grad_norm": 0.561158299446106, "learning_rate": 0.0004229696544688312, "loss": 3.1811, "step": 22378 }, { "epoch": 1.1, "grad_norm": 0.5148482918739319, "learning_rate": 0.0004229556109645688, "loss": 3.2374, "step": 22379 }, { "epoch": 1.1, "grad_norm": 0.5801580548286438, "learning_rate": 0.00042294156713646494, "loss": 2.9333, "step": 22380 }, { "epoch": 1.1, "grad_norm": 0.5597841739654541, "learning_rate": 0.0004229275229845567, "loss": 3.2094, "step": 22381 }, { "epoch": 1.1, "grad_norm": 0.5296369791030884, "learning_rate": 0.00042291347850888087, "loss": 2.8841, "step": 22382 }, { "epoch": 1.1, "grad_norm": 0.553082287311554, "learning_rate": 0.00042289943370947476, "loss": 3.1016, "step": 22383 }, { "epoch": 1.1, "grad_norm": 0.6039657592773438, "learning_rate": 0.0004228853885863751, "loss": 3.0928, "step": 22384 }, { "epoch": 1.1, "grad_norm": 0.5544348359107971, "learning_rate": 0.000422871343139619, "loss": 3.093, "step": 22385 }, { "epoch": 1.1, "grad_norm": 0.5134222507476807, "learning_rate": 0.00042285729736924344, "loss": 3.134, "step": 22386 }, { "epoch": 1.1, "grad_norm": 0.5257523059844971, "learning_rate": 0.00042284325127528536, "loss": 3.1072, "step": 22387 }, { "epoch": 1.1, "grad_norm": 0.5168401598930359, "learning_rate": 0.00042282920485778183, "loss": 3.0503, "step": 22388 }, { "epoch": 1.1, "grad_norm": 0.5573122501373291, "learning_rate": 0.0004228151581167698, "loss": 3.0215, "step": 22389 }, { "epoch": 1.1, "grad_norm": 0.5416275858879089, "learning_rate": 0.00042280111105228634, "loss": 3.1959, "step": 22390 }, { "epoch": 1.1, "grad_norm": 0.5634099841117859, "learning_rate": 0.00042278706366436836, "loss": 3.1155, "step": 22391 }, { "epoch": 1.1, "grad_norm": 0.58818519115448, "learning_rate": 0.0004227730159530528, "loss": 3.0849, "step": 22392 }, { "epoch": 1.1, "grad_norm": 0.5335586071014404, "learning_rate": 0.0004227589679183768, "loss": 2.8114, "step": 22393 }, { "epoch": 1.1, "grad_norm": 0.5638708472251892, "learning_rate": 0.00042274491956037735, "loss": 2.9018, "step": 22394 }, { "epoch": 1.1, "grad_norm": 0.5199167132377625, "learning_rate": 0.0004227308708790914, "loss": 2.95, "step": 22395 }, { "epoch": 1.1, "grad_norm": 0.5466358661651611, "learning_rate": 0.00042271682187455594, "loss": 3.0032, "step": 22396 }, { "epoch": 1.1, "grad_norm": 0.5106999278068542, "learning_rate": 0.000422702772546808, "loss": 3.1893, "step": 22397 }, { "epoch": 1.1, "grad_norm": 0.5572851896286011, "learning_rate": 0.00042268872289588454, "loss": 2.9937, "step": 22398 }, { "epoch": 1.1, "grad_norm": 0.5566379427909851, "learning_rate": 0.0004226746729218226, "loss": 2.8665, "step": 22399 }, { "epoch": 1.1, "grad_norm": 0.5310401916503906, "learning_rate": 0.00042266062262465913, "loss": 3.1665, "step": 22400 }, { "epoch": 1.1, "grad_norm": 0.5580877065658569, "learning_rate": 0.0004226465720044313, "loss": 3.0617, "step": 22401 }, { "epoch": 1.1, "grad_norm": 0.5510329604148865, "learning_rate": 0.0004226325210611759, "loss": 3.1422, "step": 22402 }, { "epoch": 1.1, "grad_norm": 0.6322705149650574, "learning_rate": 0.00042261846979493005, "loss": 3.0221, "step": 22403 }, { "epoch": 1.1, "grad_norm": 0.56231290102005, "learning_rate": 0.00042260441820573077, "loss": 3.0854, "step": 22404 }, { "epoch": 1.1, "grad_norm": 0.5650330781936646, "learning_rate": 0.00042259036629361515, "loss": 3.1679, "step": 22405 }, { "epoch": 1.1, "grad_norm": 0.5603417754173279, "learning_rate": 0.00042257631405862, "loss": 3.1113, "step": 22406 }, { "epoch": 1.1, "grad_norm": 0.5578677654266357, "learning_rate": 0.0004225622615007824, "loss": 3.2132, "step": 22407 }, { "epoch": 1.1, "grad_norm": 0.5346105694770813, "learning_rate": 0.00042254820862013947, "loss": 3.1515, "step": 22408 }, { "epoch": 1.1, "grad_norm": 0.5724475979804993, "learning_rate": 0.0004225341554167281, "loss": 3.0174, "step": 22409 }, { "epoch": 1.1, "grad_norm": 0.6160552501678467, "learning_rate": 0.00042252010189058534, "loss": 3.0215, "step": 22410 }, { "epoch": 1.1, "grad_norm": 0.5466728806495667, "learning_rate": 0.0004225060480417482, "loss": 3.0644, "step": 22411 }, { "epoch": 1.1, "grad_norm": 0.5517079830169678, "learning_rate": 0.00042249199387025383, "loss": 3.0097, "step": 22412 }, { "epoch": 1.1, "grad_norm": 0.5601084232330322, "learning_rate": 0.00042247793937613893, "loss": 3.0367, "step": 22413 }, { "epoch": 1.1, "grad_norm": 0.5142292976379395, "learning_rate": 0.00042246388455944076, "loss": 3.0302, "step": 22414 }, { "epoch": 1.1, "grad_norm": 0.5414049625396729, "learning_rate": 0.0004224498294201964, "loss": 3.0613, "step": 22415 }, { "epoch": 1.1, "grad_norm": 0.5946900844573975, "learning_rate": 0.0004224357739584427, "loss": 2.8341, "step": 22416 }, { "epoch": 1.1, "grad_norm": 0.5414168238639832, "learning_rate": 0.0004224217181742167, "loss": 3.109, "step": 22417 }, { "epoch": 1.1, "grad_norm": 0.5585944056510925, "learning_rate": 0.0004224076620675554, "loss": 3.1668, "step": 22418 }, { "epoch": 1.1, "grad_norm": 0.5433940887451172, "learning_rate": 0.000422393605638496, "loss": 2.8695, "step": 22419 }, { "epoch": 1.1, "grad_norm": 0.5730898976325989, "learning_rate": 0.00042237954888707535, "loss": 3.1879, "step": 22420 }, { "epoch": 1.1, "grad_norm": 0.5441948771476746, "learning_rate": 0.0004223654918133305, "loss": 3.0039, "step": 22421 }, { "epoch": 1.1, "grad_norm": 0.5263099074363708, "learning_rate": 0.00042235143441729853, "loss": 3.0273, "step": 22422 }, { "epoch": 1.1, "grad_norm": 0.5462498068809509, "learning_rate": 0.0004223373766990164, "loss": 3.1222, "step": 22423 }, { "epoch": 1.1, "grad_norm": 0.5350505709648132, "learning_rate": 0.0004223233186585211, "loss": 3.0815, "step": 22424 }, { "epoch": 1.1, "grad_norm": 0.5752918720245361, "learning_rate": 0.0004223092602958498, "loss": 3.2427, "step": 22425 }, { "epoch": 1.1, "grad_norm": 0.5051820278167725, "learning_rate": 0.0004222952016110395, "loss": 3.1282, "step": 22426 }, { "epoch": 1.1, "grad_norm": 0.568838894367218, "learning_rate": 0.0004222811426041271, "loss": 3.133, "step": 22427 }, { "epoch": 1.1, "grad_norm": 0.5199463367462158, "learning_rate": 0.00042226708327514973, "loss": 3.1213, "step": 22428 }, { "epoch": 1.1, "grad_norm": 0.5748323202133179, "learning_rate": 0.00042225302362414435, "loss": 3.2312, "step": 22429 }, { "epoch": 1.1, "grad_norm": 0.5747201442718506, "learning_rate": 0.00042223896365114815, "loss": 2.9831, "step": 22430 }, { "epoch": 1.1, "grad_norm": 0.5636919140815735, "learning_rate": 0.00042222490335619797, "loss": 3.0873, "step": 22431 }, { "epoch": 1.1, "grad_norm": 0.5946205854415894, "learning_rate": 0.00042221084273933086, "loss": 2.8779, "step": 22432 }, { "epoch": 1.1, "grad_norm": 0.545413613319397, "learning_rate": 0.0004221967818005839, "loss": 3.1714, "step": 22433 }, { "epoch": 1.1, "grad_norm": 0.5125501751899719, "learning_rate": 0.00042218272053999427, "loss": 3.2075, "step": 22434 }, { "epoch": 1.1, "grad_norm": 0.5402535796165466, "learning_rate": 0.0004221686589575988, "loss": 3.0146, "step": 22435 }, { "epoch": 1.1, "grad_norm": 0.5473110675811768, "learning_rate": 0.0004221545970534346, "loss": 3.0171, "step": 22436 }, { "epoch": 1.1, "grad_norm": 0.5473494529724121, "learning_rate": 0.00042214053482753864, "loss": 2.9442, "step": 22437 }, { "epoch": 1.1, "grad_norm": 0.5252751708030701, "learning_rate": 0.00042212647227994805, "loss": 3.2078, "step": 22438 }, { "epoch": 1.1, "grad_norm": 0.5198621153831482, "learning_rate": 0.0004221124094106999, "loss": 2.9712, "step": 22439 }, { "epoch": 1.1, "grad_norm": 0.5305339694023132, "learning_rate": 0.00042209834621983105, "loss": 3.0418, "step": 22440 }, { "epoch": 1.1, "grad_norm": 0.5615828037261963, "learning_rate": 0.00042208428270737875, "loss": 3.0449, "step": 22441 }, { "epoch": 1.1, "grad_norm": 0.554909884929657, "learning_rate": 0.00042207021887337995, "loss": 3.2129, "step": 22442 }, { "epoch": 1.1, "grad_norm": 0.578995406627655, "learning_rate": 0.0004220561547178717, "loss": 3.0968, "step": 22443 }, { "epoch": 1.1, "grad_norm": 0.5198182463645935, "learning_rate": 0.0004220420902408908, "loss": 3.035, "step": 22444 }, { "epoch": 1.1, "grad_norm": 0.5391382575035095, "learning_rate": 0.0004220280254424748, "loss": 3.1567, "step": 22445 }, { "epoch": 1.1, "grad_norm": 0.5328619480133057, "learning_rate": 0.0004220139603226603, "loss": 3.2176, "step": 22446 }, { "epoch": 1.1, "grad_norm": 0.5532649159431458, "learning_rate": 0.0004219998948814846, "loss": 3.105, "step": 22447 }, { "epoch": 1.1, "grad_norm": 0.5565139055252075, "learning_rate": 0.00042198582911898467, "loss": 3.1877, "step": 22448 }, { "epoch": 1.1, "grad_norm": 0.522406280040741, "learning_rate": 0.00042197176303519746, "loss": 3.0828, "step": 22449 }, { "epoch": 1.1, "grad_norm": 0.5588423609733582, "learning_rate": 0.00042195769663016017, "loss": 3.1538, "step": 22450 }, { "epoch": 1.1, "grad_norm": 0.5381976962089539, "learning_rate": 0.0004219436299039097, "loss": 3.2286, "step": 22451 }, { "epoch": 1.1, "grad_norm": 0.5260021090507507, "learning_rate": 0.00042192956285648335, "loss": 3.2763, "step": 22452 }, { "epoch": 1.1, "grad_norm": 0.5152723789215088, "learning_rate": 0.00042191549548791787, "loss": 3.0711, "step": 22453 }, { "epoch": 1.1, "grad_norm": 0.5516050457954407, "learning_rate": 0.00042190142779825035, "loss": 2.9179, "step": 22454 }, { "epoch": 1.1, "grad_norm": 0.539625883102417, "learning_rate": 0.000421887359787518, "loss": 2.9954, "step": 22455 }, { "epoch": 1.1, "grad_norm": 0.5438560843467712, "learning_rate": 0.0004218732914557579, "loss": 3.1334, "step": 22456 }, { "epoch": 1.1, "grad_norm": 0.5455101132392883, "learning_rate": 0.00042185922280300696, "loss": 3.2678, "step": 22457 }, { "epoch": 1.1, "grad_norm": 0.5365552306175232, "learning_rate": 0.0004218451538293023, "loss": 3.0484, "step": 22458 }, { "epoch": 1.1, "grad_norm": 0.549579918384552, "learning_rate": 0.0004218310845346808, "loss": 3.3398, "step": 22459 }, { "epoch": 1.1, "grad_norm": 0.537279486656189, "learning_rate": 0.00042181701491917983, "loss": 3.0408, "step": 22460 }, { "epoch": 1.1, "grad_norm": 0.5590521693229675, "learning_rate": 0.0004218029449828362, "loss": 3.1195, "step": 22461 }, { "epoch": 1.1, "grad_norm": 0.5591493844985962, "learning_rate": 0.0004217888747256871, "loss": 3.0398, "step": 22462 }, { "epoch": 1.1, "grad_norm": 0.5219199061393738, "learning_rate": 0.00042177480414776956, "loss": 2.9367, "step": 22463 }, { "epoch": 1.1, "grad_norm": 0.5333996415138245, "learning_rate": 0.0004217607332491206, "loss": 3.2085, "step": 22464 }, { "epoch": 1.1, "grad_norm": 0.5313884615898132, "learning_rate": 0.00042174666202977727, "loss": 2.8805, "step": 22465 }, { "epoch": 1.1, "grad_norm": 0.5648304224014282, "learning_rate": 0.0004217325904897766, "loss": 3.2723, "step": 22466 }, { "epoch": 1.1, "grad_norm": 0.5362172722816467, "learning_rate": 0.0004217185186291559, "loss": 3.132, "step": 22467 }, { "epoch": 1.1, "grad_norm": 0.5308043956756592, "learning_rate": 0.00042170444644795197, "loss": 3.1991, "step": 22468 }, { "epoch": 1.1, "grad_norm": 0.5285980701446533, "learning_rate": 0.0004216903739462018, "loss": 2.998, "step": 22469 }, { "epoch": 1.1, "grad_norm": 0.529820442199707, "learning_rate": 0.00042167630112394284, "loss": 2.9558, "step": 22470 }, { "epoch": 1.1, "grad_norm": 0.5706838965415955, "learning_rate": 0.0004216622279812118, "loss": 3.1321, "step": 22471 }, { "epoch": 1.1, "grad_norm": 0.5605747103691101, "learning_rate": 0.0004216481545180459, "loss": 3.1693, "step": 22472 }, { "epoch": 1.1, "grad_norm": 0.52308189868927, "learning_rate": 0.00042163408073448214, "loss": 2.8613, "step": 22473 }, { "epoch": 1.1, "grad_norm": 0.5640896558761597, "learning_rate": 0.0004216200066305576, "loss": 3.1425, "step": 22474 }, { "epoch": 1.1, "grad_norm": 0.5801900029182434, "learning_rate": 0.0004216059322063094, "loss": 3.1555, "step": 22475 }, { "epoch": 1.1, "grad_norm": 0.5530162453651428, "learning_rate": 0.0004215918574617746, "loss": 3.1143, "step": 22476 }, { "epoch": 1.1, "grad_norm": 0.5521156191825867, "learning_rate": 0.0004215777823969902, "loss": 3.103, "step": 22477 }, { "epoch": 1.1, "grad_norm": 0.5479637980461121, "learning_rate": 0.0004215637070119934, "loss": 3.0354, "step": 22478 }, { "epoch": 1.1, "grad_norm": 0.551988422870636, "learning_rate": 0.0004215496313068212, "loss": 3.1629, "step": 22479 }, { "epoch": 1.1, "grad_norm": 0.5706598162651062, "learning_rate": 0.0004215355552815105, "loss": 3.1129, "step": 22480 }, { "epoch": 1.1, "grad_norm": 0.5351764559745789, "learning_rate": 0.0004215214789360987, "loss": 3.071, "step": 22481 }, { "epoch": 1.1, "grad_norm": 0.5388553738594055, "learning_rate": 0.00042150740227062263, "loss": 3.0048, "step": 22482 }, { "epoch": 1.1, "grad_norm": 0.5683891773223877, "learning_rate": 0.0004214933252851195, "loss": 3.0454, "step": 22483 }, { "epoch": 1.1, "grad_norm": 0.5642884969711304, "learning_rate": 0.00042147924797962625, "loss": 3.0447, "step": 22484 }, { "epoch": 1.1, "grad_norm": 0.5272451043128967, "learning_rate": 0.0004214651703541801, "loss": 2.9475, "step": 22485 }, { "epoch": 1.1, "grad_norm": 0.5619935989379883, "learning_rate": 0.00042145109240881805, "loss": 2.9711, "step": 22486 }, { "epoch": 1.1, "grad_norm": 0.6104692220687866, "learning_rate": 0.0004214370141435772, "loss": 3.2122, "step": 22487 }, { "epoch": 1.1, "grad_norm": 0.5447784066200256, "learning_rate": 0.0004214229355584946, "loss": 3.0263, "step": 22488 }, { "epoch": 1.1, "grad_norm": 0.5399470329284668, "learning_rate": 0.0004214088566536075, "loss": 3.0962, "step": 22489 }, { "epoch": 1.1, "grad_norm": 0.5518352389335632, "learning_rate": 0.0004213947774289526, "loss": 2.8093, "step": 22490 }, { "epoch": 1.1, "grad_norm": 0.5711850523948669, "learning_rate": 0.00042138069788456736, "loss": 2.9572, "step": 22491 }, { "epoch": 1.1, "grad_norm": 0.5700511932373047, "learning_rate": 0.00042136661802048874, "loss": 3.058, "step": 22492 }, { "epoch": 1.1, "grad_norm": 0.5496143102645874, "learning_rate": 0.0004213525378367538, "loss": 3.0367, "step": 22493 }, { "epoch": 1.1, "grad_norm": 0.5528961420059204, "learning_rate": 0.0004213384573333996, "loss": 3.0467, "step": 22494 }, { "epoch": 1.1, "grad_norm": 0.5584120750427246, "learning_rate": 0.00042132437651046315, "loss": 3.1026, "step": 22495 }, { "epoch": 1.1, "grad_norm": 0.5595943927764893, "learning_rate": 0.0004213102953679818, "loss": 2.9348, "step": 22496 }, { "epoch": 1.1, "grad_norm": 0.5819007158279419, "learning_rate": 0.0004212962139059924, "loss": 3.1339, "step": 22497 }, { "epoch": 1.1, "grad_norm": 0.5432302951812744, "learning_rate": 0.00042128213212453216, "loss": 3.1587, "step": 22498 }, { "epoch": 1.1, "grad_norm": 0.5066870450973511, "learning_rate": 0.00042126805002363815, "loss": 3.1825, "step": 22499 }, { "epoch": 1.1, "grad_norm": 0.531493604183197, "learning_rate": 0.0004212539676033474, "loss": 3.3026, "step": 22500 }, { "epoch": 1.1, "grad_norm": 0.5524630546569824, "learning_rate": 0.00042123988486369703, "loss": 3.0143, "step": 22501 }, { "epoch": 1.1, "grad_norm": 0.6150621771812439, "learning_rate": 0.00042122580180472413, "loss": 2.8606, "step": 22502 }, { "epoch": 1.1, "grad_norm": 0.55769282579422, "learning_rate": 0.00042121171842646584, "loss": 3.0798, "step": 22503 }, { "epoch": 1.1, "grad_norm": 0.5654763579368591, "learning_rate": 0.00042119763472895917, "loss": 3.1062, "step": 22504 }, { "epoch": 1.1, "grad_norm": 0.5415908098220825, "learning_rate": 0.00042118355071224116, "loss": 3.2484, "step": 22505 }, { "epoch": 1.1, "grad_norm": 0.5293490290641785, "learning_rate": 0.00042116946637634915, "loss": 3.3383, "step": 22506 }, { "epoch": 1.1, "grad_norm": 0.5430842041969299, "learning_rate": 0.00042115538172132007, "loss": 2.9673, "step": 22507 }, { "epoch": 1.1, "grad_norm": 0.9073577523231506, "learning_rate": 0.000421141296747191, "loss": 3.183, "step": 22508 }, { "epoch": 1.1, "grad_norm": 0.5296067595481873, "learning_rate": 0.00042112721145399904, "loss": 3.0694, "step": 22509 }, { "epoch": 1.1, "grad_norm": 0.5582892894744873, "learning_rate": 0.00042111312584178136, "loss": 2.915, "step": 22510 }, { "epoch": 1.1, "grad_norm": 0.5482213497161865, "learning_rate": 0.000421099039910575, "loss": 3.0819, "step": 22511 }, { "epoch": 1.1, "grad_norm": 0.534598708152771, "learning_rate": 0.00042108495366041703, "loss": 3.0956, "step": 22512 }, { "epoch": 1.1, "grad_norm": 0.5279784202575684, "learning_rate": 0.0004210708670913446, "loss": 3.136, "step": 22513 }, { "epoch": 1.1, "grad_norm": 0.5435617566108704, "learning_rate": 0.00042105678020339495, "loss": 3.3483, "step": 22514 }, { "epoch": 1.1, "grad_norm": 0.5815554857254028, "learning_rate": 0.00042104269299660487, "loss": 3.2077, "step": 22515 }, { "epoch": 1.1, "grad_norm": 0.5558618903160095, "learning_rate": 0.00042102860547101165, "loss": 2.9707, "step": 22516 }, { "epoch": 1.1, "grad_norm": 0.5012040734291077, "learning_rate": 0.00042101451762665247, "loss": 3.1028, "step": 22517 }, { "epoch": 1.1, "grad_norm": 0.5293975472450256, "learning_rate": 0.00042100042946356436, "loss": 2.9891, "step": 22518 }, { "epoch": 1.1, "grad_norm": 0.5717222690582275, "learning_rate": 0.0004209863409817843, "loss": 3.0784, "step": 22519 }, { "epoch": 1.1, "grad_norm": 0.5145463347434998, "learning_rate": 0.0004209722521813495, "loss": 2.9634, "step": 22520 }, { "epoch": 1.1, "grad_norm": 0.5181236267089844, "learning_rate": 0.0004209581630622971, "loss": 3.2429, "step": 22521 }, { "epoch": 1.1, "grad_norm": 0.5378340482711792, "learning_rate": 0.0004209440736246642, "loss": 3.152, "step": 22522 }, { "epoch": 1.1, "grad_norm": 0.7082079648971558, "learning_rate": 0.0004209299838684879, "loss": 2.9937, "step": 22523 }, { "epoch": 1.1, "grad_norm": 0.5510277152061462, "learning_rate": 0.00042091589379380525, "loss": 3.4617, "step": 22524 }, { "epoch": 1.1, "grad_norm": 0.5319778919219971, "learning_rate": 0.00042090180340065346, "loss": 3.0375, "step": 22525 }, { "epoch": 1.1, "grad_norm": 0.5088480710983276, "learning_rate": 0.0004208877126890695, "loss": 3.2293, "step": 22526 }, { "epoch": 1.1, "grad_norm": 0.5296842455863953, "learning_rate": 0.00042087362165909057, "loss": 3.0811, "step": 22527 }, { "epoch": 1.1, "grad_norm": 0.5537300109863281, "learning_rate": 0.0004208595303107539, "loss": 3.1295, "step": 22528 }, { "epoch": 1.1, "grad_norm": 0.5461785197257996, "learning_rate": 0.00042084543864409646, "loss": 3.2753, "step": 22529 }, { "epoch": 1.1, "grad_norm": 0.571995735168457, "learning_rate": 0.0004208313466591553, "loss": 3.087, "step": 22530 }, { "epoch": 1.1, "grad_norm": 0.5290436148643494, "learning_rate": 0.00042081725435596764, "loss": 3.0314, "step": 22531 }, { "epoch": 1.1, "grad_norm": 0.5442591905593872, "learning_rate": 0.00042080316173457064, "loss": 3.2783, "step": 22532 }, { "epoch": 1.1, "grad_norm": 0.5432357788085938, "learning_rate": 0.00042078906879500135, "loss": 3.2025, "step": 22533 }, { "epoch": 1.1, "grad_norm": 0.53834468126297, "learning_rate": 0.0004207749755372969, "loss": 3.2905, "step": 22534 }, { "epoch": 1.1, "grad_norm": 0.5716333389282227, "learning_rate": 0.0004207608819614944, "loss": 3.1693, "step": 22535 }, { "epoch": 1.1, "grad_norm": 0.5577285885810852, "learning_rate": 0.00042074678806763095, "loss": 3.0463, "step": 22536 }, { "epoch": 1.1, "grad_norm": 0.5382303595542908, "learning_rate": 0.00042073269385574374, "loss": 3.0666, "step": 22537 }, { "epoch": 1.1, "grad_norm": 0.5647816061973572, "learning_rate": 0.00042071859932586985, "loss": 3.0805, "step": 22538 }, { "epoch": 1.1, "grad_norm": 0.5214908123016357, "learning_rate": 0.0004207045044780464, "loss": 3.1665, "step": 22539 }, { "epoch": 1.1, "grad_norm": 0.5525656938552856, "learning_rate": 0.0004206904093123106, "loss": 3.0327, "step": 22540 }, { "epoch": 1.1, "grad_norm": 0.5515208840370178, "learning_rate": 0.0004206763138286993, "loss": 3.1713, "step": 22541 }, { "epoch": 1.1, "grad_norm": 0.5453616380691528, "learning_rate": 0.0004206622180272499, "loss": 3.0146, "step": 22542 }, { "epoch": 1.1, "grad_norm": 0.5157879590988159, "learning_rate": 0.00042064812190799946, "loss": 3.1077, "step": 22543 }, { "epoch": 1.1, "grad_norm": 0.5753068327903748, "learning_rate": 0.0004206340254709851, "loss": 2.9845, "step": 22544 }, { "epoch": 1.1, "grad_norm": 0.5232889652252197, "learning_rate": 0.0004206199287162439, "loss": 3.1151, "step": 22545 }, { "epoch": 1.1, "grad_norm": 0.527743399143219, "learning_rate": 0.000420605831643813, "loss": 3.25, "step": 22546 }, { "epoch": 1.1, "grad_norm": 0.5670763254165649, "learning_rate": 0.00042059173425372965, "loss": 3.3105, "step": 22547 }, { "epoch": 1.11, "grad_norm": 0.5618078708648682, "learning_rate": 0.0004205776365460307, "loss": 3.1793, "step": 22548 }, { "epoch": 1.11, "grad_norm": 0.5352552533149719, "learning_rate": 0.00042056353852075364, "loss": 3.1404, "step": 22549 }, { "epoch": 1.11, "grad_norm": 0.5104782581329346, "learning_rate": 0.0004205494401779354, "loss": 3.1323, "step": 22550 }, { "epoch": 1.11, "grad_norm": 0.5447371602058411, "learning_rate": 0.00042053534151761306, "loss": 3.3586, "step": 22551 }, { "epoch": 1.11, "grad_norm": 0.5591816902160645, "learning_rate": 0.0004205212425398238, "loss": 3.0942, "step": 22552 }, { "epoch": 1.11, "grad_norm": 0.5504888892173767, "learning_rate": 0.00042050714324460485, "loss": 3.0393, "step": 22553 }, { "epoch": 1.11, "grad_norm": 0.5453820824623108, "learning_rate": 0.00042049304363199334, "loss": 3.0541, "step": 22554 }, { "epoch": 1.11, "grad_norm": 0.5488752722740173, "learning_rate": 0.00042047894370202625, "loss": 3.0418, "step": 22555 }, { "epoch": 1.11, "grad_norm": 0.5417929887771606, "learning_rate": 0.00042046484345474084, "loss": 3.0206, "step": 22556 }, { "epoch": 1.11, "grad_norm": 0.5546737909317017, "learning_rate": 0.00042045074289017413, "loss": 3.0535, "step": 22557 }, { "epoch": 1.11, "grad_norm": 0.5946574807167053, "learning_rate": 0.00042043664200836346, "loss": 3.1673, "step": 22558 }, { "epoch": 1.11, "grad_norm": 0.5759459137916565, "learning_rate": 0.0004204225408093458, "loss": 3.0653, "step": 22559 }, { "epoch": 1.11, "grad_norm": 0.5452854037284851, "learning_rate": 0.0004204084392931583, "loss": 3.092, "step": 22560 }, { "epoch": 1.11, "grad_norm": 0.5289528369903564, "learning_rate": 0.0004203943374598382, "loss": 3.2014, "step": 22561 }, { "epoch": 1.11, "grad_norm": 0.5537521243095398, "learning_rate": 0.00042038023530942265, "loss": 3.1951, "step": 22562 }, { "epoch": 1.11, "grad_norm": 0.5194694399833679, "learning_rate": 0.0004203661328419486, "loss": 2.9367, "step": 22563 }, { "epoch": 1.11, "grad_norm": 0.5450275540351868, "learning_rate": 0.00042035203005745336, "loss": 2.9492, "step": 22564 }, { "epoch": 1.11, "grad_norm": 0.527119517326355, "learning_rate": 0.0004203379269559741, "loss": 3.1056, "step": 22565 }, { "epoch": 1.11, "grad_norm": 0.5397108197212219, "learning_rate": 0.0004203238235375479, "loss": 3.1839, "step": 22566 }, { "epoch": 1.11, "grad_norm": 0.5303124189376831, "learning_rate": 0.0004203097198022118, "loss": 3.2767, "step": 22567 }, { "epoch": 1.11, "grad_norm": 0.5147061347961426, "learning_rate": 0.0004202956157500031, "loss": 3.1816, "step": 22568 }, { "epoch": 1.11, "grad_norm": 0.5415562391281128, "learning_rate": 0.00042028151138095895, "loss": 3.3555, "step": 22569 }, { "epoch": 1.11, "grad_norm": 0.5352077484130859, "learning_rate": 0.0004202674066951165, "loss": 3.0523, "step": 22570 }, { "epoch": 1.11, "grad_norm": 0.5709248781204224, "learning_rate": 0.0004202533016925127, "loss": 3.0357, "step": 22571 }, { "epoch": 1.11, "grad_norm": 0.5660057663917542, "learning_rate": 0.0004202391963731849, "loss": 3.1059, "step": 22572 }, { "epoch": 1.11, "grad_norm": 0.5669631958007812, "learning_rate": 0.0004202250907371702, "loss": 3.0434, "step": 22573 }, { "epoch": 1.11, "grad_norm": 0.5338017344474792, "learning_rate": 0.0004202109847845057, "loss": 3.0701, "step": 22574 }, { "epoch": 1.11, "grad_norm": 0.5378772020339966, "learning_rate": 0.00042019687851522873, "loss": 3.3332, "step": 22575 }, { "epoch": 1.11, "grad_norm": 0.5645471215248108, "learning_rate": 0.0004201827719293762, "loss": 3.2192, "step": 22576 }, { "epoch": 1.11, "grad_norm": 0.5384810566902161, "learning_rate": 0.00042016866502698536, "loss": 2.9694, "step": 22577 }, { "epoch": 1.11, "grad_norm": 0.5898525714874268, "learning_rate": 0.00042015455780809345, "loss": 2.8896, "step": 22578 }, { "epoch": 1.11, "grad_norm": 0.5633717179298401, "learning_rate": 0.00042014045027273755, "loss": 2.9377, "step": 22579 }, { "epoch": 1.11, "grad_norm": 0.551085352897644, "learning_rate": 0.00042012634242095487, "loss": 3.0136, "step": 22580 }, { "epoch": 1.11, "grad_norm": 0.5334330201148987, "learning_rate": 0.00042011223425278253, "loss": 3.0485, "step": 22581 }, { "epoch": 1.11, "grad_norm": 0.5643008947372437, "learning_rate": 0.0004200981257682575, "loss": 3.142, "step": 22582 }, { "epoch": 1.11, "grad_norm": 0.5641648769378662, "learning_rate": 0.0004200840169674173, "loss": 3.2212, "step": 22583 }, { "epoch": 1.11, "grad_norm": 0.5251689553260803, "learning_rate": 0.00042006990785029886, "loss": 3.2124, "step": 22584 }, { "epoch": 1.11, "grad_norm": 0.5536843538284302, "learning_rate": 0.0004200557984169394, "loss": 3.3359, "step": 22585 }, { "epoch": 1.11, "grad_norm": 0.5234001874923706, "learning_rate": 0.00042004168866737607, "loss": 3.098, "step": 22586 }, { "epoch": 1.11, "grad_norm": 0.5152735114097595, "learning_rate": 0.000420027578601646, "loss": 3.1305, "step": 22587 }, { "epoch": 1.11, "grad_norm": 0.5633012056350708, "learning_rate": 0.0004200134682197864, "loss": 3.0323, "step": 22588 }, { "epoch": 1.11, "grad_norm": 0.5583577752113342, "learning_rate": 0.00041999935752183446, "loss": 3.1308, "step": 22589 }, { "epoch": 1.11, "grad_norm": 0.5771940350532532, "learning_rate": 0.0004199852465078273, "loss": 3.1042, "step": 22590 }, { "epoch": 1.11, "grad_norm": 0.5792399644851685, "learning_rate": 0.0004199711351778021, "loss": 2.8656, "step": 22591 }, { "epoch": 1.11, "grad_norm": 0.5672153234481812, "learning_rate": 0.0004199570235317959, "loss": 2.8594, "step": 22592 }, { "epoch": 1.11, "grad_norm": 0.5597004890441895, "learning_rate": 0.0004199429115698461, "loss": 3.1582, "step": 22593 }, { "epoch": 1.11, "grad_norm": 0.5668061971664429, "learning_rate": 0.00041992879929198977, "loss": 2.9705, "step": 22594 }, { "epoch": 1.11, "grad_norm": 0.5375760793685913, "learning_rate": 0.000419914686698264, "loss": 2.9946, "step": 22595 }, { "epoch": 1.11, "grad_norm": 0.5492685437202454, "learning_rate": 0.00041990057378870607, "loss": 2.9779, "step": 22596 }, { "epoch": 1.11, "grad_norm": 0.5831897854804993, "learning_rate": 0.00041988646056335304, "loss": 3.0992, "step": 22597 }, { "epoch": 1.11, "grad_norm": 0.5664513111114502, "learning_rate": 0.00041987234702224217, "loss": 3.0218, "step": 22598 }, { "epoch": 1.11, "grad_norm": 0.5811390280723572, "learning_rate": 0.00041985823316541065, "loss": 2.9748, "step": 22599 }, { "epoch": 1.11, "grad_norm": 0.5502529740333557, "learning_rate": 0.00041984411899289556, "loss": 3.1049, "step": 22600 }, { "epoch": 1.11, "grad_norm": 0.57771897315979, "learning_rate": 0.0004198300045047342, "loss": 3.1521, "step": 22601 }, { "epoch": 1.11, "grad_norm": 0.5387015342712402, "learning_rate": 0.00041981588970096365, "loss": 3.0113, "step": 22602 }, { "epoch": 1.11, "grad_norm": 0.551127016544342, "learning_rate": 0.000419801774581621, "loss": 3.27, "step": 22603 }, { "epoch": 1.11, "grad_norm": 0.5647044777870178, "learning_rate": 0.0004197876591467436, "loss": 3.3264, "step": 22604 }, { "epoch": 1.11, "grad_norm": 0.5634638071060181, "learning_rate": 0.0004197735433963686, "loss": 2.9904, "step": 22605 }, { "epoch": 1.11, "grad_norm": 0.5413936972618103, "learning_rate": 0.0004197594273305332, "loss": 3.1296, "step": 22606 }, { "epoch": 1.11, "grad_norm": 0.526145875453949, "learning_rate": 0.0004197453109492743, "loss": 3.0424, "step": 22607 }, { "epoch": 1.11, "grad_norm": 0.5350415706634521, "learning_rate": 0.0004197311942526294, "loss": 3.137, "step": 22608 }, { "epoch": 1.11, "grad_norm": 0.5814176797866821, "learning_rate": 0.0004197170772406357, "loss": 3.0361, "step": 22609 }, { "epoch": 1.11, "grad_norm": 0.5808823108673096, "learning_rate": 0.0004197029599133301, "loss": 3.1731, "step": 22610 }, { "epoch": 1.11, "grad_norm": 0.5619063973426819, "learning_rate": 0.00041968884227075, "loss": 2.9452, "step": 22611 }, { "epoch": 1.11, "grad_norm": 0.5297744870185852, "learning_rate": 0.0004196747243129325, "loss": 3.2803, "step": 22612 }, { "epoch": 1.11, "grad_norm": 0.527192234992981, "learning_rate": 0.00041966060603991484, "loss": 3.133, "step": 22613 }, { "epoch": 1.11, "grad_norm": 0.5389567017555237, "learning_rate": 0.00041964648745173417, "loss": 3.2495, "step": 22614 }, { "epoch": 1.11, "grad_norm": 0.5459379553794861, "learning_rate": 0.0004196323685484277, "loss": 3.1859, "step": 22615 }, { "epoch": 1.11, "grad_norm": 0.558315634727478, "learning_rate": 0.0004196182493300326, "loss": 3.0052, "step": 22616 }, { "epoch": 1.11, "grad_norm": 0.5612319707870483, "learning_rate": 0.00041960412979658604, "loss": 3.2109, "step": 22617 }, { "epoch": 1.11, "grad_norm": 0.5649484992027283, "learning_rate": 0.00041959000994812513, "loss": 3.0352, "step": 22618 }, { "epoch": 1.11, "grad_norm": 0.5512779951095581, "learning_rate": 0.00041957588978468717, "loss": 3.1786, "step": 22619 }, { "epoch": 1.11, "grad_norm": 0.5309266448020935, "learning_rate": 0.00041956176930630946, "loss": 3.0735, "step": 22620 }, { "epoch": 1.11, "grad_norm": 0.5767373442649841, "learning_rate": 0.0004195476485130289, "loss": 2.8406, "step": 22621 }, { "epoch": 1.11, "grad_norm": 0.5628970861434937, "learning_rate": 0.00041953352740488293, "loss": 3.0715, "step": 22622 }, { "epoch": 1.11, "grad_norm": 0.5341333150863647, "learning_rate": 0.00041951940598190866, "loss": 3.0542, "step": 22623 }, { "epoch": 1.11, "grad_norm": 0.5453346967697144, "learning_rate": 0.0004195052842441432, "loss": 3.0502, "step": 22624 }, { "epoch": 1.11, "grad_norm": 0.5791659951210022, "learning_rate": 0.00041949116219162393, "loss": 3.192, "step": 22625 }, { "epoch": 1.11, "grad_norm": 0.5513650178909302, "learning_rate": 0.0004194770398243879, "loss": 3.1476, "step": 22626 }, { "epoch": 1.11, "grad_norm": 0.5383404493331909, "learning_rate": 0.0004194629171424724, "loss": 2.8348, "step": 22627 }, { "epoch": 1.11, "grad_norm": 0.5772968530654907, "learning_rate": 0.00041944879414591434, "loss": 3.2311, "step": 22628 }, { "epoch": 1.11, "grad_norm": 0.5837339758872986, "learning_rate": 0.0004194346708347513, "loss": 2.9275, "step": 22629 }, { "epoch": 1.11, "grad_norm": 0.5292240381240845, "learning_rate": 0.0004194205472090204, "loss": 3.0504, "step": 22630 }, { "epoch": 1.11, "grad_norm": 0.5393617153167725, "learning_rate": 0.0004194064232687587, "loss": 2.9578, "step": 22631 }, { "epoch": 1.11, "grad_norm": 0.5465915203094482, "learning_rate": 0.0004193922990140034, "loss": 3.2505, "step": 22632 }, { "epoch": 1.11, "grad_norm": 0.5606839656829834, "learning_rate": 0.0004193781744447917, "loss": 2.9551, "step": 22633 }, { "epoch": 1.11, "grad_norm": 0.5263482928276062, "learning_rate": 0.0004193640495611611, "loss": 3.2808, "step": 22634 }, { "epoch": 1.11, "grad_norm": 0.5450267195701599, "learning_rate": 0.0004193499243631484, "loss": 3.1876, "step": 22635 }, { "epoch": 1.11, "grad_norm": 0.5732899904251099, "learning_rate": 0.000419335798850791, "loss": 2.9856, "step": 22636 }, { "epoch": 1.11, "grad_norm": 0.55254727602005, "learning_rate": 0.0004193216730241261, "loss": 3.1878, "step": 22637 }, { "epoch": 1.11, "grad_norm": 0.5654364824295044, "learning_rate": 0.00041930754688319086, "loss": 3.154, "step": 22638 }, { "epoch": 1.11, "grad_norm": 0.5142787098884583, "learning_rate": 0.0004192934204280224, "loss": 3.4391, "step": 22639 }, { "epoch": 1.11, "grad_norm": 0.5899552702903748, "learning_rate": 0.00041927929365865815, "loss": 3.126, "step": 22640 }, { "epoch": 1.11, "grad_norm": 0.5144187808036804, "learning_rate": 0.00041926516657513513, "loss": 3.1848, "step": 22641 }, { "epoch": 1.11, "grad_norm": 0.5327566266059875, "learning_rate": 0.0004192510391774907, "loss": 3.2201, "step": 22642 }, { "epoch": 1.11, "grad_norm": 0.5743151307106018, "learning_rate": 0.00041923691146576194, "loss": 3.0937, "step": 22643 }, { "epoch": 1.11, "grad_norm": 0.538710355758667, "learning_rate": 0.000419222783439986, "loss": 3.1129, "step": 22644 }, { "epoch": 1.11, "grad_norm": 0.5390024781227112, "learning_rate": 0.00041920865510020045, "loss": 3.0571, "step": 22645 }, { "epoch": 1.11, "grad_norm": 0.5429919958114624, "learning_rate": 0.000419194526446442, "loss": 3.0848, "step": 22646 }, { "epoch": 1.11, "grad_norm": 0.532172679901123, "learning_rate": 0.0004191803974787482, "loss": 2.8347, "step": 22647 }, { "epoch": 1.11, "grad_norm": 0.5515339970588684, "learning_rate": 0.00041916626819715617, "loss": 3.0664, "step": 22648 }, { "epoch": 1.11, "grad_norm": 0.5160419940948486, "learning_rate": 0.0004191521386017031, "loss": 3.019, "step": 22649 }, { "epoch": 1.11, "grad_norm": 0.5607364177703857, "learning_rate": 0.0004191380086924263, "loss": 2.895, "step": 22650 }, { "epoch": 1.11, "grad_norm": 0.5337927937507629, "learning_rate": 0.00041912387846936277, "loss": 3.1408, "step": 22651 }, { "epoch": 1.11, "grad_norm": 0.5739155411720276, "learning_rate": 0.00041910974793255, "loss": 3.2168, "step": 22652 }, { "epoch": 1.11, "grad_norm": 0.579247236251831, "learning_rate": 0.0004190956170820251, "loss": 2.9161, "step": 22653 }, { "epoch": 1.11, "grad_norm": 0.5730478167533875, "learning_rate": 0.0004190814859178251, "loss": 3.0244, "step": 22654 }, { "epoch": 1.11, "grad_norm": 0.5330377817153931, "learning_rate": 0.0004190673544399875, "loss": 2.9202, "step": 22655 }, { "epoch": 1.11, "grad_norm": 0.5503820180892944, "learning_rate": 0.00041905322264854946, "loss": 3.1697, "step": 22656 }, { "epoch": 1.11, "grad_norm": 0.5626627206802368, "learning_rate": 0.00041903909054354804, "loss": 3.1492, "step": 22657 }, { "epoch": 1.11, "grad_norm": 0.5176795721054077, "learning_rate": 0.0004190249581250206, "loss": 3.0295, "step": 22658 }, { "epoch": 1.11, "grad_norm": 0.5228406190872192, "learning_rate": 0.00041901082539300426, "loss": 3.0384, "step": 22659 }, { "epoch": 1.11, "grad_norm": 0.5561695694923401, "learning_rate": 0.0004189966923475364, "loss": 3.0576, "step": 22660 }, { "epoch": 1.11, "grad_norm": 0.5149094462394714, "learning_rate": 0.0004189825589886542, "loss": 3.0316, "step": 22661 }, { "epoch": 1.11, "grad_norm": 0.5652263164520264, "learning_rate": 0.00041896842531639476, "loss": 3.0514, "step": 22662 }, { "epoch": 1.11, "grad_norm": 0.5679181218147278, "learning_rate": 0.00041895429133079534, "loss": 3.0378, "step": 22663 }, { "epoch": 1.11, "grad_norm": 0.5677348375320435, "learning_rate": 0.00041894015703189326, "loss": 3.1728, "step": 22664 }, { "epoch": 1.11, "grad_norm": 0.5508793592453003, "learning_rate": 0.00041892602241972566, "loss": 3.1805, "step": 22665 }, { "epoch": 1.11, "grad_norm": 0.5380650162696838, "learning_rate": 0.0004189118874943298, "loss": 2.9287, "step": 22666 }, { "epoch": 1.11, "grad_norm": 0.5400630235671997, "learning_rate": 0.00041889775225574305, "loss": 3.0148, "step": 22667 }, { "epoch": 1.11, "grad_norm": 0.5459088683128357, "learning_rate": 0.0004188836167040024, "loss": 3.2834, "step": 22668 }, { "epoch": 1.11, "grad_norm": 0.5284379720687866, "learning_rate": 0.00041886948083914513, "loss": 3.0427, "step": 22669 }, { "epoch": 1.11, "grad_norm": 0.5413404107093811, "learning_rate": 0.0004188553446612086, "loss": 3.0797, "step": 22670 }, { "epoch": 1.11, "grad_norm": 0.6029809713363647, "learning_rate": 0.00041884120817023, "loss": 3.0449, "step": 22671 }, { "epoch": 1.11, "grad_norm": 0.5510432720184326, "learning_rate": 0.0004188270713662465, "loss": 3.0182, "step": 22672 }, { "epoch": 1.11, "grad_norm": 0.5862864255905151, "learning_rate": 0.0004188129342492953, "loss": 3.2882, "step": 22673 }, { "epoch": 1.11, "grad_norm": 0.5308249592781067, "learning_rate": 0.0004187987968194137, "loss": 3.0142, "step": 22674 }, { "epoch": 1.11, "grad_norm": 0.5954753756523132, "learning_rate": 0.000418784659076639, "loss": 3.2163, "step": 22675 }, { "epoch": 1.11, "grad_norm": 0.5317316651344299, "learning_rate": 0.00041877052102100835, "loss": 2.9223, "step": 22676 }, { "epoch": 1.11, "grad_norm": 0.5436576008796692, "learning_rate": 0.00041875638265255894, "loss": 3.0907, "step": 22677 }, { "epoch": 1.11, "grad_norm": 0.5222603678703308, "learning_rate": 0.0004187422439713282, "loss": 2.9988, "step": 22678 }, { "epoch": 1.11, "grad_norm": 0.5745731592178345, "learning_rate": 0.0004187281049773532, "loss": 2.8509, "step": 22679 }, { "epoch": 1.11, "grad_norm": 0.5338032841682434, "learning_rate": 0.0004187139656706711, "loss": 3.1961, "step": 22680 }, { "epoch": 1.11, "grad_norm": 0.5111261010169983, "learning_rate": 0.00041869982605131933, "loss": 3.2963, "step": 22681 }, { "epoch": 1.11, "grad_norm": 0.5470437407493591, "learning_rate": 0.0004186856861193352, "loss": 3.1833, "step": 22682 }, { "epoch": 1.11, "grad_norm": 0.5256366729736328, "learning_rate": 0.0004186715458747557, "loss": 3.2079, "step": 22683 }, { "epoch": 1.11, "grad_norm": 0.5694595575332642, "learning_rate": 0.00041865740531761813, "loss": 2.9047, "step": 22684 }, { "epoch": 1.11, "grad_norm": 0.5024998188018799, "learning_rate": 0.0004186432644479599, "loss": 3.2674, "step": 22685 }, { "epoch": 1.11, "grad_norm": 0.5744755268096924, "learning_rate": 0.000418629123265818, "loss": 3.1185, "step": 22686 }, { "epoch": 1.11, "grad_norm": 0.5288079380989075, "learning_rate": 0.00041861498177122995, "loss": 3.1339, "step": 22687 }, { "epoch": 1.11, "grad_norm": 0.5845724940299988, "learning_rate": 0.0004186008399642328, "loss": 2.8917, "step": 22688 }, { "epoch": 1.11, "grad_norm": 0.5915581583976746, "learning_rate": 0.00041858669784486396, "loss": 3.0969, "step": 22689 }, { "epoch": 1.11, "grad_norm": 0.5309663414955139, "learning_rate": 0.0004185725554131604, "loss": 3.0139, "step": 22690 }, { "epoch": 1.11, "grad_norm": 0.5378329753875732, "learning_rate": 0.0004185584126691597, "loss": 3.2019, "step": 22691 }, { "epoch": 1.11, "grad_norm": 0.5664974451065063, "learning_rate": 0.0004185442696128989, "loss": 3.0974, "step": 22692 }, { "epoch": 1.11, "grad_norm": 0.5150554776191711, "learning_rate": 0.00041853012624441544, "loss": 3.0179, "step": 22693 }, { "epoch": 1.11, "grad_norm": 0.5422317981719971, "learning_rate": 0.0004185159825637463, "loss": 3.0485, "step": 22694 }, { "epoch": 1.11, "grad_norm": 0.5424741506576538, "learning_rate": 0.0004185018385709288, "loss": 3.1692, "step": 22695 }, { "epoch": 1.11, "grad_norm": 0.5382916927337646, "learning_rate": 0.0004184876942660004, "loss": 3.027, "step": 22696 }, { "epoch": 1.11, "grad_norm": 0.5341999530792236, "learning_rate": 0.00041847354964899823, "loss": 3.0156, "step": 22697 }, { "epoch": 1.11, "grad_norm": 0.5673853754997253, "learning_rate": 0.00041845940471995946, "loss": 3.045, "step": 22698 }, { "epoch": 1.11, "grad_norm": 0.5665754079818726, "learning_rate": 0.0004184452594789214, "loss": 3.0651, "step": 22699 }, { "epoch": 1.11, "grad_norm": 0.5702019333839417, "learning_rate": 0.0004184311139259214, "loss": 2.9393, "step": 22700 }, { "epoch": 1.11, "grad_norm": 0.5696156620979309, "learning_rate": 0.00041841696806099656, "loss": 2.9479, "step": 22701 }, { "epoch": 1.11, "grad_norm": 0.577709436416626, "learning_rate": 0.00041840282188418426, "loss": 3.0455, "step": 22702 }, { "epoch": 1.11, "grad_norm": 0.5778018832206726, "learning_rate": 0.0004183886753955217, "loss": 3.0691, "step": 22703 }, { "epoch": 1.11, "grad_norm": 0.5543364882469177, "learning_rate": 0.0004183745285950462, "loss": 3.0442, "step": 22704 }, { "epoch": 1.11, "grad_norm": 0.5635522603988647, "learning_rate": 0.0004183603814827949, "loss": 3.0389, "step": 22705 }, { "epoch": 1.11, "grad_norm": 0.5350740551948547, "learning_rate": 0.0004183462340588051, "loss": 3.1186, "step": 22706 }, { "epoch": 1.11, "grad_norm": 0.5741170048713684, "learning_rate": 0.00041833208632311423, "loss": 3.017, "step": 22707 }, { "epoch": 1.11, "grad_norm": 0.5641077756881714, "learning_rate": 0.0004183179382757593, "loss": 3.2578, "step": 22708 }, { "epoch": 1.11, "grad_norm": 0.5855833888053894, "learning_rate": 0.0004183037899167778, "loss": 2.9378, "step": 22709 }, { "epoch": 1.11, "grad_norm": 0.5699617266654968, "learning_rate": 0.0004182896412462067, "loss": 3.2279, "step": 22710 }, { "epoch": 1.11, "grad_norm": 0.5196274518966675, "learning_rate": 0.0004182754922640837, "loss": 3.0284, "step": 22711 }, { "epoch": 1.11, "grad_norm": 0.5257676243782043, "learning_rate": 0.0004182613429704456, "loss": 3.1445, "step": 22712 }, { "epoch": 1.11, "grad_norm": 0.5339217782020569, "learning_rate": 0.00041824719336533005, "loss": 3.0042, "step": 22713 }, { "epoch": 1.11, "grad_norm": 0.5554669499397278, "learning_rate": 0.00041823304344877404, "loss": 3.0607, "step": 22714 }, { "epoch": 1.11, "grad_norm": 0.5328848958015442, "learning_rate": 0.000418218893220815, "loss": 3.1213, "step": 22715 }, { "epoch": 1.11, "grad_norm": 0.5345682501792908, "learning_rate": 0.0004182047426814901, "loss": 3.122, "step": 22716 }, { "epoch": 1.11, "grad_norm": 0.5637184381484985, "learning_rate": 0.00041819059183083665, "loss": 3.186, "step": 22717 }, { "epoch": 1.11, "grad_norm": 0.5496988892555237, "learning_rate": 0.00041817644066889203, "loss": 3.1549, "step": 22718 }, { "epoch": 1.11, "grad_norm": 0.5595161318778992, "learning_rate": 0.00041816228919569335, "loss": 3.1797, "step": 22719 }, { "epoch": 1.11, "grad_norm": 0.5612072944641113, "learning_rate": 0.00041814813741127796, "loss": 3.1245, "step": 22720 }, { "epoch": 1.11, "grad_norm": 0.5443363785743713, "learning_rate": 0.000418133985315683, "loss": 3.2956, "step": 22721 }, { "epoch": 1.11, "grad_norm": 0.5439444780349731, "learning_rate": 0.000418119832908946, "loss": 3.0141, "step": 22722 }, { "epoch": 1.11, "grad_norm": 0.5160937309265137, "learning_rate": 0.000418105680191104, "loss": 3.2615, "step": 22723 }, { "epoch": 1.11, "grad_norm": 0.5264231562614441, "learning_rate": 0.0004180915271621945, "loss": 3.0696, "step": 22724 }, { "epoch": 1.11, "grad_norm": 0.5493642687797546, "learning_rate": 0.0004180773738222545, "loss": 3.0094, "step": 22725 }, { "epoch": 1.11, "grad_norm": 0.535229504108429, "learning_rate": 0.0004180632201713215, "loss": 2.9877, "step": 22726 }, { "epoch": 1.11, "grad_norm": 0.5449292063713074, "learning_rate": 0.0004180490662094326, "loss": 3.1424, "step": 22727 }, { "epoch": 1.11, "grad_norm": 0.5288578271865845, "learning_rate": 0.00041803491193662524, "loss": 3.1605, "step": 22728 }, { "epoch": 1.11, "grad_norm": 0.5462258458137512, "learning_rate": 0.0004180207573529367, "loss": 3.204, "step": 22729 }, { "epoch": 1.11, "grad_norm": 0.507749080657959, "learning_rate": 0.00041800660245840414, "loss": 3.1402, "step": 22730 }, { "epoch": 1.11, "grad_norm": 0.554622232913971, "learning_rate": 0.00041799244725306483, "loss": 3.1245, "step": 22731 }, { "epoch": 1.11, "grad_norm": 0.5544594526290894, "learning_rate": 0.0004179782917369562, "loss": 3.0031, "step": 22732 }, { "epoch": 1.11, "grad_norm": 0.5762028098106384, "learning_rate": 0.00041796413591011557, "loss": 2.9274, "step": 22733 }, { "epoch": 1.11, "grad_norm": 0.5185121893882751, "learning_rate": 0.00041794997977257994, "loss": 2.9783, "step": 22734 }, { "epoch": 1.11, "grad_norm": 0.5906330943107605, "learning_rate": 0.0004179358233243868, "loss": 3.1314, "step": 22735 }, { "epoch": 1.11, "grad_norm": 0.5670438408851624, "learning_rate": 0.00041792166656557343, "loss": 3.1062, "step": 22736 }, { "epoch": 1.11, "grad_norm": 0.6015538573265076, "learning_rate": 0.00041790750949617704, "loss": 2.9635, "step": 22737 }, { "epoch": 1.11, "grad_norm": 0.5294987559318542, "learning_rate": 0.000417893352116235, "loss": 3.0218, "step": 22738 }, { "epoch": 1.11, "grad_norm": 0.5639286041259766, "learning_rate": 0.00041787919442578457, "loss": 3.1125, "step": 22739 }, { "epoch": 1.11, "grad_norm": 0.5568161010742188, "learning_rate": 0.0004178650364248631, "loss": 3.2293, "step": 22740 }, { "epoch": 1.11, "grad_norm": 0.5709725022315979, "learning_rate": 0.0004178508781135076, "loss": 3.1711, "step": 22741 }, { "epoch": 1.11, "grad_norm": 0.5765569806098938, "learning_rate": 0.00041783671949175566, "loss": 3.2865, "step": 22742 }, { "epoch": 1.11, "grad_norm": 0.5151780247688293, "learning_rate": 0.0004178225605596445, "loss": 3.2819, "step": 22743 }, { "epoch": 1.11, "grad_norm": 0.5453171730041504, "learning_rate": 0.00041780840131721143, "loss": 3.089, "step": 22744 }, { "epoch": 1.11, "grad_norm": 0.5520200729370117, "learning_rate": 0.0004177942417644937, "loss": 3.0535, "step": 22745 }, { "epoch": 1.11, "grad_norm": 0.5933725237846375, "learning_rate": 0.00041778008190152856, "loss": 3.0728, "step": 22746 }, { "epoch": 1.11, "grad_norm": 0.5700779557228088, "learning_rate": 0.0004177659217283533, "loss": 3.0971, "step": 22747 }, { "epoch": 1.11, "grad_norm": 0.5266256928443909, "learning_rate": 0.0004177517612450054, "loss": 3.1489, "step": 22748 }, { "epoch": 1.11, "grad_norm": 0.5342414975166321, "learning_rate": 0.00041773760045152195, "loss": 3.1828, "step": 22749 }, { "epoch": 1.11, "grad_norm": 0.5465877056121826, "learning_rate": 0.0004177234393479403, "loss": 3.0301, "step": 22750 }, { "epoch": 1.11, "grad_norm": 0.5596908330917358, "learning_rate": 0.0004177092779342978, "loss": 2.8841, "step": 22751 }, { "epoch": 1.12, "grad_norm": 0.5438765287399292, "learning_rate": 0.00041769511621063166, "loss": 3.1112, "step": 22752 }, { "epoch": 1.12, "grad_norm": 0.5041543245315552, "learning_rate": 0.00041768095417697937, "loss": 3.0129, "step": 22753 }, { "epoch": 1.12, "grad_norm": 0.527295708656311, "learning_rate": 0.00041766679183337797, "loss": 3.2012, "step": 22754 }, { "epoch": 1.12, "grad_norm": 0.5652879476547241, "learning_rate": 0.00041765262917986497, "loss": 3.2307, "step": 22755 }, { "epoch": 1.12, "grad_norm": 0.5673266053199768, "learning_rate": 0.00041763846621647755, "loss": 3.135, "step": 22756 }, { "epoch": 1.12, "grad_norm": 0.5326421856880188, "learning_rate": 0.00041762430294325296, "loss": 3.0118, "step": 22757 }, { "epoch": 1.12, "grad_norm": 0.5760022401809692, "learning_rate": 0.0004176101393602287, "loss": 3.2049, "step": 22758 }, { "epoch": 1.12, "grad_norm": 0.5360777974128723, "learning_rate": 0.00041759597546744194, "loss": 3.1222, "step": 22759 }, { "epoch": 1.12, "grad_norm": 0.5167760848999023, "learning_rate": 0.00041758181126493006, "loss": 2.9691, "step": 22760 }, { "epoch": 1.12, "grad_norm": 0.8578059077262878, "learning_rate": 0.00041756764675273024, "loss": 3.2545, "step": 22761 }, { "epoch": 1.12, "grad_norm": 0.6276934742927551, "learning_rate": 0.0004175534819308799, "loss": 3.0123, "step": 22762 }, { "epoch": 1.12, "grad_norm": 0.5215904712677002, "learning_rate": 0.0004175393167994163, "loss": 3.1768, "step": 22763 }, { "epoch": 1.12, "grad_norm": 0.51545250415802, "learning_rate": 0.00041752515135837676, "loss": 2.9006, "step": 22764 }, { "epoch": 1.12, "grad_norm": 0.535327672958374, "learning_rate": 0.00041751098560779856, "loss": 3.1213, "step": 22765 }, { "epoch": 1.12, "grad_norm": 0.54879230260849, "learning_rate": 0.00041749681954771913, "loss": 3.2119, "step": 22766 }, { "epoch": 1.12, "grad_norm": 0.5459108352661133, "learning_rate": 0.00041748265317817566, "loss": 3.0567, "step": 22767 }, { "epoch": 1.12, "grad_norm": 0.5210204720497131, "learning_rate": 0.0004174684864992054, "loss": 3.2307, "step": 22768 }, { "epoch": 1.12, "grad_norm": 0.5359011292457581, "learning_rate": 0.0004174543195108459, "loss": 2.9444, "step": 22769 }, { "epoch": 1.12, "grad_norm": 0.5488296747207642, "learning_rate": 0.00041744015221313423, "loss": 3.263, "step": 22770 }, { "epoch": 1.12, "grad_norm": 0.5671960115432739, "learning_rate": 0.0004174259846061078, "loss": 2.9674, "step": 22771 }, { "epoch": 1.12, "grad_norm": 0.5221547484397888, "learning_rate": 0.00041741181668980386, "loss": 2.937, "step": 22772 }, { "epoch": 1.12, "grad_norm": 0.5802128911018372, "learning_rate": 0.00041739764846425995, "loss": 3.0267, "step": 22773 }, { "epoch": 1.12, "grad_norm": 0.5907021760940552, "learning_rate": 0.0004173834799295132, "loss": 3.1479, "step": 22774 }, { "epoch": 1.12, "grad_norm": 0.5466511845588684, "learning_rate": 0.0004173693110856009, "loss": 2.9301, "step": 22775 }, { "epoch": 1.12, "grad_norm": 0.5275335311889648, "learning_rate": 0.00041735514193256044, "loss": 3.1554, "step": 22776 }, { "epoch": 1.12, "grad_norm": 0.5344946980476379, "learning_rate": 0.0004173409724704291, "loss": 3.2402, "step": 22777 }, { "epoch": 1.12, "grad_norm": 0.5361987948417664, "learning_rate": 0.0004173268026992442, "loss": 3.0482, "step": 22778 }, { "epoch": 1.12, "grad_norm": 0.5685098171234131, "learning_rate": 0.0004173126326190431, "loss": 3.1086, "step": 22779 }, { "epoch": 1.12, "grad_norm": 0.5359070897102356, "learning_rate": 0.0004172984622298632, "loss": 3.1898, "step": 22780 }, { "epoch": 1.12, "grad_norm": 0.5278506875038147, "learning_rate": 0.0004172842915317417, "loss": 3.0062, "step": 22781 }, { "epoch": 1.12, "grad_norm": 0.5743756294250488, "learning_rate": 0.00041727012052471577, "loss": 3.1249, "step": 22782 }, { "epoch": 1.12, "grad_norm": 0.5638933777809143, "learning_rate": 0.00041725594920882304, "loss": 2.8144, "step": 22783 }, { "epoch": 1.12, "grad_norm": 0.5870307683944702, "learning_rate": 0.00041724177758410073, "loss": 3.1692, "step": 22784 }, { "epoch": 1.12, "grad_norm": 0.530704140663147, "learning_rate": 0.0004172276056505861, "loss": 2.8009, "step": 22785 }, { "epoch": 1.12, "grad_norm": 0.5861652493476868, "learning_rate": 0.00041721343340831655, "loss": 3.2099, "step": 22786 }, { "epoch": 1.12, "grad_norm": 0.5355147123336792, "learning_rate": 0.0004171992608573293, "loss": 2.9651, "step": 22787 }, { "epoch": 1.12, "grad_norm": 0.5698812007904053, "learning_rate": 0.0004171850879976618, "loss": 3.0561, "step": 22788 }, { "epoch": 1.12, "grad_norm": 0.5511211156845093, "learning_rate": 0.0004171709148293514, "loss": 3.2349, "step": 22789 }, { "epoch": 1.12, "grad_norm": 0.5315529108047485, "learning_rate": 0.00041715674135243524, "loss": 2.962, "step": 22790 }, { "epoch": 1.12, "grad_norm": 0.5924056768417358, "learning_rate": 0.0004171425675669509, "loss": 3.0684, "step": 22791 }, { "epoch": 1.12, "grad_norm": 0.5384601950645447, "learning_rate": 0.00041712839347293556, "loss": 3.1687, "step": 22792 }, { "epoch": 1.12, "grad_norm": 0.5141586065292358, "learning_rate": 0.0004171142190704264, "loss": 3.0498, "step": 22793 }, { "epoch": 1.12, "grad_norm": 0.5134872794151306, "learning_rate": 0.00041710004435946104, "loss": 3.0604, "step": 22794 }, { "epoch": 1.12, "grad_norm": 0.5418192744255066, "learning_rate": 0.0004170858693400768, "loss": 3.0924, "step": 22795 }, { "epoch": 1.12, "grad_norm": 0.5468102097511292, "learning_rate": 0.0004170716940123108, "loss": 2.9778, "step": 22796 }, { "epoch": 1.12, "grad_norm": 0.519443929195404, "learning_rate": 0.00041705751837620055, "loss": 3.0669, "step": 22797 }, { "epoch": 1.12, "grad_norm": 0.534935712814331, "learning_rate": 0.0004170433424317832, "loss": 3.0799, "step": 22798 }, { "epoch": 1.12, "grad_norm": 0.5599324703216553, "learning_rate": 0.00041702916617909633, "loss": 3.2856, "step": 22799 }, { "epoch": 1.12, "grad_norm": 0.5286010503768921, "learning_rate": 0.0004170149896181771, "loss": 2.9179, "step": 22800 }, { "epoch": 1.12, "grad_norm": 0.5498127341270447, "learning_rate": 0.000417000812749063, "loss": 2.9982, "step": 22801 }, { "epoch": 1.12, "grad_norm": 0.5402305126190186, "learning_rate": 0.00041698663557179126, "loss": 2.922, "step": 22802 }, { "epoch": 1.12, "grad_norm": 0.5140150785446167, "learning_rate": 0.0004169724580863991, "loss": 3.1083, "step": 22803 }, { "epoch": 1.12, "grad_norm": 0.518584132194519, "learning_rate": 0.00041695828029292406, "loss": 3.1583, "step": 22804 }, { "epoch": 1.12, "grad_norm": 0.5516182780265808, "learning_rate": 0.00041694410219140344, "loss": 3.2764, "step": 22805 }, { "epoch": 1.12, "grad_norm": 0.5740772485733032, "learning_rate": 0.0004169299237818747, "loss": 3.0374, "step": 22806 }, { "epoch": 1.12, "grad_norm": 0.5997231602668762, "learning_rate": 0.00041691574506437485, "loss": 3.056, "step": 22807 }, { "epoch": 1.12, "grad_norm": 0.566772997379303, "learning_rate": 0.0004169015660389414, "loss": 3.1544, "step": 22808 }, { "epoch": 1.12, "grad_norm": 0.5957667827606201, "learning_rate": 0.0004168873867056119, "loss": 3.0864, "step": 22809 }, { "epoch": 1.12, "grad_norm": 0.5558214783668518, "learning_rate": 0.00041687320706442345, "loss": 3.0475, "step": 22810 }, { "epoch": 1.12, "grad_norm": 0.5514797568321228, "learning_rate": 0.0004168590271154134, "loss": 3.1521, "step": 22811 }, { "epoch": 1.12, "grad_norm": 0.5616558194160461, "learning_rate": 0.0004168448468586192, "loss": 2.9716, "step": 22812 }, { "epoch": 1.12, "grad_norm": 0.555022120475769, "learning_rate": 0.00041683066629407816, "loss": 2.985, "step": 22813 }, { "epoch": 1.12, "grad_norm": 0.5655428767204285, "learning_rate": 0.00041681648542182765, "loss": 3.1157, "step": 22814 }, { "epoch": 1.12, "grad_norm": 0.5333276391029358, "learning_rate": 0.00041680230424190497, "loss": 3.1813, "step": 22815 }, { "epoch": 1.12, "grad_norm": 0.5510712265968323, "learning_rate": 0.0004167881227543475, "loss": 2.9741, "step": 22816 }, { "epoch": 1.12, "grad_norm": 0.5697930455207825, "learning_rate": 0.0004167739409591927, "loss": 3.3855, "step": 22817 }, { "epoch": 1.12, "grad_norm": 0.5547995567321777, "learning_rate": 0.0004167597588564776, "loss": 3.1625, "step": 22818 }, { "epoch": 1.12, "grad_norm": 0.5366219878196716, "learning_rate": 0.00041674557644623986, "loss": 3.1218, "step": 22819 }, { "epoch": 1.12, "grad_norm": 0.5644329786300659, "learning_rate": 0.0004167313937285169, "loss": 3.2227, "step": 22820 }, { "epoch": 1.12, "grad_norm": 0.5153218507766724, "learning_rate": 0.0004167172107033457, "loss": 3.1479, "step": 22821 }, { "epoch": 1.12, "grad_norm": 0.5740698575973511, "learning_rate": 0.00041670302737076395, "loss": 3.0745, "step": 22822 }, { "epoch": 1.12, "grad_norm": 0.5266560316085815, "learning_rate": 0.00041668884373080876, "loss": 3.0566, "step": 22823 }, { "epoch": 1.12, "grad_norm": 0.5471863150596619, "learning_rate": 0.0004166746597835178, "loss": 3.0059, "step": 22824 }, { "epoch": 1.12, "grad_norm": 0.53224116563797, "learning_rate": 0.00041666047552892815, "loss": 3.1979, "step": 22825 }, { "epoch": 1.12, "grad_norm": 0.5300314426422119, "learning_rate": 0.00041664629096707716, "loss": 3.0605, "step": 22826 }, { "epoch": 1.12, "grad_norm": 0.536897599697113, "learning_rate": 0.00041663210609800246, "loss": 3.075, "step": 22827 }, { "epoch": 1.12, "grad_norm": 0.5496629476547241, "learning_rate": 0.0004166179209217411, "loss": 3.1034, "step": 22828 }, { "epoch": 1.12, "grad_norm": 0.5242640376091003, "learning_rate": 0.00041660373543833066, "loss": 3.0961, "step": 22829 }, { "epoch": 1.12, "grad_norm": 0.5705289840698242, "learning_rate": 0.00041658954964780837, "loss": 3.0197, "step": 22830 }, { "epoch": 1.12, "grad_norm": 0.5280457139015198, "learning_rate": 0.0004165753635502118, "loss": 3.0366, "step": 22831 }, { "epoch": 1.12, "grad_norm": 0.5403161644935608, "learning_rate": 0.000416561177145578, "loss": 3.0601, "step": 22832 }, { "epoch": 1.12, "grad_norm": 0.5531915426254272, "learning_rate": 0.0004165469904339445, "loss": 3.1521, "step": 22833 }, { "epoch": 1.12, "grad_norm": 0.5562331080436707, "learning_rate": 0.0004165328034153486, "loss": 3.1089, "step": 22834 }, { "epoch": 1.12, "grad_norm": 0.5130084156990051, "learning_rate": 0.00041651861608982786, "loss": 3.2384, "step": 22835 }, { "epoch": 1.12, "grad_norm": 0.5995485186576843, "learning_rate": 0.00041650442845741944, "loss": 2.9602, "step": 22836 }, { "epoch": 1.12, "grad_norm": 0.5520250201225281, "learning_rate": 0.00041649024051816077, "loss": 2.9841, "step": 22837 }, { "epoch": 1.12, "grad_norm": 0.5484051704406738, "learning_rate": 0.0004164760522720892, "loss": 3.0843, "step": 22838 }, { "epoch": 1.12, "grad_norm": 0.5702111124992371, "learning_rate": 0.0004164618637192422, "loss": 3.1287, "step": 22839 }, { "epoch": 1.12, "grad_norm": 0.5361772179603577, "learning_rate": 0.00041644767485965705, "loss": 2.8899, "step": 22840 }, { "epoch": 1.12, "grad_norm": 0.5582685470581055, "learning_rate": 0.0004164334856933711, "loss": 3.2705, "step": 22841 }, { "epoch": 1.12, "grad_norm": 0.5814189910888672, "learning_rate": 0.00041641929622042185, "loss": 2.9787, "step": 22842 }, { "epoch": 1.12, "grad_norm": 0.543645977973938, "learning_rate": 0.0004164051064408465, "loss": 2.9174, "step": 22843 }, { "epoch": 1.12, "grad_norm": 0.5395931601524353, "learning_rate": 0.0004163909163546824, "loss": 2.9608, "step": 22844 }, { "epoch": 1.12, "grad_norm": 0.5268449783325195, "learning_rate": 0.0004163767259619671, "loss": 3.1027, "step": 22845 }, { "epoch": 1.12, "grad_norm": 0.5362355709075928, "learning_rate": 0.0004163625352627381, "loss": 2.9239, "step": 22846 }, { "epoch": 1.12, "grad_norm": 0.5752016305923462, "learning_rate": 0.00041634834425703236, "loss": 3.0805, "step": 22847 }, { "epoch": 1.12, "grad_norm": 0.5360502004623413, "learning_rate": 0.0004163341529448875, "loss": 2.909, "step": 22848 }, { "epoch": 1.12, "grad_norm": 0.5390860438346863, "learning_rate": 0.00041631996132634086, "loss": 3.0375, "step": 22849 }, { "epoch": 1.12, "grad_norm": 0.5611980557441711, "learning_rate": 0.0004163057694014299, "loss": 3.1001, "step": 22850 }, { "epoch": 1.12, "grad_norm": 0.5395224094390869, "learning_rate": 0.00041629157717019185, "loss": 3.2519, "step": 22851 }, { "epoch": 1.12, "grad_norm": 0.5417009592056274, "learning_rate": 0.00041627738463266426, "loss": 2.8745, "step": 22852 }, { "epoch": 1.12, "grad_norm": 0.5183812379837036, "learning_rate": 0.0004162631917888844, "loss": 3.1916, "step": 22853 }, { "epoch": 1.12, "grad_norm": 0.5822927355766296, "learning_rate": 0.0004162489986388896, "loss": 3.0131, "step": 22854 }, { "epoch": 1.12, "grad_norm": 0.5340526103973389, "learning_rate": 0.0004162348051827173, "loss": 3.151, "step": 22855 }, { "epoch": 1.12, "grad_norm": 0.5964843034744263, "learning_rate": 0.00041622061142040494, "loss": 3.1859, "step": 22856 }, { "epoch": 1.12, "grad_norm": 0.5237846970558167, "learning_rate": 0.0004162064173519899, "loss": 2.9817, "step": 22857 }, { "epoch": 1.12, "grad_norm": 0.5963344573974609, "learning_rate": 0.0004161922229775094, "loss": 3.1043, "step": 22858 }, { "epoch": 1.12, "grad_norm": 0.5289567708969116, "learning_rate": 0.00041617802829700104, "loss": 3.0147, "step": 22859 }, { "epoch": 1.12, "grad_norm": 0.545599102973938, "learning_rate": 0.00041616383331050214, "loss": 3.104, "step": 22860 }, { "epoch": 1.12, "grad_norm": 0.5368244647979736, "learning_rate": 0.00041614963801804995, "loss": 2.9397, "step": 22861 }, { "epoch": 1.12, "grad_norm": 0.5587390661239624, "learning_rate": 0.00041613544241968204, "loss": 3.1038, "step": 22862 }, { "epoch": 1.12, "grad_norm": 0.5761477947235107, "learning_rate": 0.0004161212465154357, "loss": 3.1405, "step": 22863 }, { "epoch": 1.12, "grad_norm": 0.5684610605239868, "learning_rate": 0.00041610705030534835, "loss": 3.0149, "step": 22864 }, { "epoch": 1.12, "grad_norm": 0.5535710453987122, "learning_rate": 0.0004160928537894574, "loss": 3.1474, "step": 22865 }, { "epoch": 1.12, "grad_norm": 0.5354270339012146, "learning_rate": 0.00041607865696780016, "loss": 2.9943, "step": 22866 }, { "epoch": 1.12, "grad_norm": 0.5734537839889526, "learning_rate": 0.0004160644598404141, "loss": 3.0333, "step": 22867 }, { "epoch": 1.12, "grad_norm": 0.5363245010375977, "learning_rate": 0.0004160502624073367, "loss": 3.1273, "step": 22868 }, { "epoch": 1.12, "grad_norm": 0.5657275319099426, "learning_rate": 0.0004160360646686051, "loss": 3.1475, "step": 22869 }, { "epoch": 1.12, "grad_norm": 0.5877406001091003, "learning_rate": 0.0004160218666242569, "loss": 3.1525, "step": 22870 }, { "epoch": 1.12, "grad_norm": 0.5854254961013794, "learning_rate": 0.0004160076682743294, "loss": 3.343, "step": 22871 }, { "epoch": 1.12, "grad_norm": 0.4987305700778961, "learning_rate": 0.00041599346961886005, "loss": 3.1971, "step": 22872 }, { "epoch": 1.12, "grad_norm": 0.5421817302703857, "learning_rate": 0.0004159792706578862, "loss": 3.1557, "step": 22873 }, { "epoch": 1.12, "grad_norm": 0.5647212862968445, "learning_rate": 0.0004159650713914453, "loss": 3.059, "step": 22874 }, { "epoch": 1.12, "grad_norm": 0.5287631154060364, "learning_rate": 0.0004159508718195747, "loss": 3.1896, "step": 22875 }, { "epoch": 1.12, "grad_norm": 0.5936976671218872, "learning_rate": 0.0004159366719423118, "loss": 3.188, "step": 22876 }, { "epoch": 1.12, "grad_norm": 0.5288457870483398, "learning_rate": 0.00041592247175969413, "loss": 3.1293, "step": 22877 }, { "epoch": 1.12, "grad_norm": 0.5637390613555908, "learning_rate": 0.00041590827127175884, "loss": 3.1525, "step": 22878 }, { "epoch": 1.12, "grad_norm": 0.5691234469413757, "learning_rate": 0.00041589407047854356, "loss": 3.1165, "step": 22879 }, { "epoch": 1.12, "grad_norm": 0.5463374853134155, "learning_rate": 0.0004158798693800855, "loss": 3.185, "step": 22880 }, { "epoch": 1.12, "grad_norm": 0.5565144419670105, "learning_rate": 0.0004158656679764223, "loss": 3.179, "step": 22881 }, { "epoch": 1.12, "grad_norm": 0.5509166717529297, "learning_rate": 0.0004158514662675911, "loss": 3.1439, "step": 22882 }, { "epoch": 1.12, "grad_norm": 0.5458000302314758, "learning_rate": 0.00041583726425362957, "loss": 2.9293, "step": 22883 }, { "epoch": 1.12, "grad_norm": 0.536400318145752, "learning_rate": 0.0004158230619345749, "loss": 2.9197, "step": 22884 }, { "epoch": 1.12, "grad_norm": 0.5528376698493958, "learning_rate": 0.0004158088593104645, "loss": 3.1646, "step": 22885 }, { "epoch": 1.12, "grad_norm": 0.5160115361213684, "learning_rate": 0.000415794656381336, "loss": 3.0597, "step": 22886 }, { "epoch": 1.12, "grad_norm": 0.5468571782112122, "learning_rate": 0.0004157804531472266, "loss": 3.257, "step": 22887 }, { "epoch": 1.12, "grad_norm": 0.5390594601631165, "learning_rate": 0.0004157662496081738, "loss": 3.2257, "step": 22888 }, { "epoch": 1.12, "grad_norm": 0.5498918890953064, "learning_rate": 0.00041575204576421493, "loss": 3.1676, "step": 22889 }, { "epoch": 1.12, "grad_norm": 0.5376027822494507, "learning_rate": 0.0004157378416153874, "loss": 3.087, "step": 22890 }, { "epoch": 1.12, "grad_norm": 0.5271984934806824, "learning_rate": 0.00041572363716172876, "loss": 3.092, "step": 22891 }, { "epoch": 1.12, "grad_norm": 0.5507243275642395, "learning_rate": 0.00041570943240327627, "loss": 3.0022, "step": 22892 }, { "epoch": 1.12, "grad_norm": 0.5260735154151917, "learning_rate": 0.0004156952273400675, "loss": 3.0662, "step": 22893 }, { "epoch": 1.12, "grad_norm": 0.5336798429489136, "learning_rate": 0.0004156810219721397, "loss": 3.1374, "step": 22894 }, { "epoch": 1.12, "grad_norm": 0.5313898324966431, "learning_rate": 0.0004156668162995303, "loss": 3.1071, "step": 22895 }, { "epoch": 1.12, "grad_norm": 0.5449457764625549, "learning_rate": 0.0004156526103222768, "loss": 3.2074, "step": 22896 }, { "epoch": 1.12, "grad_norm": 0.5850553512573242, "learning_rate": 0.00041563840404041665, "loss": 2.977, "step": 22897 }, { "epoch": 1.12, "grad_norm": 0.5299965143203735, "learning_rate": 0.00041562419745398715, "loss": 3.154, "step": 22898 }, { "epoch": 1.12, "grad_norm": 0.5133044719696045, "learning_rate": 0.00041560999056302575, "loss": 3.0494, "step": 22899 }, { "epoch": 1.12, "grad_norm": 0.6028819680213928, "learning_rate": 0.0004155957833675699, "loss": 2.9978, "step": 22900 }, { "epoch": 1.12, "grad_norm": 0.5400010347366333, "learning_rate": 0.00041558157586765704, "loss": 3.2693, "step": 22901 }, { "epoch": 1.12, "grad_norm": 0.5579959750175476, "learning_rate": 0.0004155673680633245, "loss": 3.1617, "step": 22902 }, { "epoch": 1.12, "grad_norm": 0.5807639360427856, "learning_rate": 0.00041555315995460976, "loss": 3.0796, "step": 22903 }, { "epoch": 1.12, "grad_norm": 0.547631025314331, "learning_rate": 0.0004155389515415503, "loss": 3.0593, "step": 22904 }, { "epoch": 1.12, "grad_norm": 0.5586033463478088, "learning_rate": 0.00041552474282418336, "loss": 3.0915, "step": 22905 }, { "epoch": 1.12, "grad_norm": 0.5403106212615967, "learning_rate": 0.00041551053380254656, "loss": 3.0695, "step": 22906 }, { "epoch": 1.12, "grad_norm": 0.5964490175247192, "learning_rate": 0.0004154963244766772, "loss": 2.9246, "step": 22907 }, { "epoch": 1.12, "grad_norm": 0.6330263018608093, "learning_rate": 0.00041548211484661286, "loss": 3.1909, "step": 22908 }, { "epoch": 1.12, "grad_norm": 0.5445305109024048, "learning_rate": 0.0004154679049123908, "loss": 3.2143, "step": 22909 }, { "epoch": 1.12, "grad_norm": 0.49582967162132263, "learning_rate": 0.0004154536946740484, "loss": 3.0744, "step": 22910 }, { "epoch": 1.12, "grad_norm": 0.6021095514297485, "learning_rate": 0.0004154394841316233, "loss": 3.1623, "step": 22911 }, { "epoch": 1.12, "grad_norm": 0.5545721650123596, "learning_rate": 0.00041542527328515273, "loss": 3.0835, "step": 22912 }, { "epoch": 1.12, "grad_norm": 0.5633933544158936, "learning_rate": 0.0004154110621346743, "loss": 3.0356, "step": 22913 }, { "epoch": 1.12, "grad_norm": 0.5791369676589966, "learning_rate": 0.00041539685068022527, "loss": 3.1277, "step": 22914 }, { "epoch": 1.12, "grad_norm": 0.588784396648407, "learning_rate": 0.0004153826389218432, "loss": 3.1858, "step": 22915 }, { "epoch": 1.12, "grad_norm": 0.5584728121757507, "learning_rate": 0.0004153684268595654, "loss": 2.9936, "step": 22916 }, { "epoch": 1.12, "grad_norm": 0.6015306711196899, "learning_rate": 0.00041535421449342936, "loss": 3.0412, "step": 22917 }, { "epoch": 1.12, "grad_norm": 0.6056309342384338, "learning_rate": 0.00041534000182347257, "loss": 2.8666, "step": 22918 }, { "epoch": 1.12, "grad_norm": 0.5673311948776245, "learning_rate": 0.00041532578884973247, "loss": 3.0582, "step": 22919 }, { "epoch": 1.12, "grad_norm": 0.5334855318069458, "learning_rate": 0.0004153115755722464, "loss": 3.0588, "step": 22920 }, { "epoch": 1.12, "grad_norm": 0.589484453201294, "learning_rate": 0.0004152973619910517, "loss": 3.1469, "step": 22921 }, { "epoch": 1.12, "grad_norm": 0.5717774629592896, "learning_rate": 0.0004152831481061861, "loss": 2.9427, "step": 22922 }, { "epoch": 1.12, "grad_norm": 0.6408734321594238, "learning_rate": 0.00041526893391768683, "loss": 3.0572, "step": 22923 }, { "epoch": 1.12, "grad_norm": 0.5661062002182007, "learning_rate": 0.0004152547194255913, "loss": 2.8823, "step": 22924 }, { "epoch": 1.12, "grad_norm": 0.5968921780586243, "learning_rate": 0.0004152405046299371, "loss": 3.0135, "step": 22925 }, { "epoch": 1.12, "grad_norm": 0.5666815638542175, "learning_rate": 0.0004152262895307616, "loss": 3.0244, "step": 22926 }, { "epoch": 1.12, "grad_norm": 0.5514220595359802, "learning_rate": 0.00041521207412810213, "loss": 2.9396, "step": 22927 }, { "epoch": 1.12, "grad_norm": 0.5360330939292908, "learning_rate": 0.0004151978584219963, "loss": 3.1101, "step": 22928 }, { "epoch": 1.12, "grad_norm": 0.5596000552177429, "learning_rate": 0.0004151836424124815, "loss": 3.379, "step": 22929 }, { "epoch": 1.12, "grad_norm": 0.5417352318763733, "learning_rate": 0.0004151694260995952, "loss": 3.0873, "step": 22930 }, { "epoch": 1.12, "grad_norm": 0.4918670952320099, "learning_rate": 0.0004151552094833746, "loss": 3.1007, "step": 22931 }, { "epoch": 1.12, "grad_norm": 0.5756099224090576, "learning_rate": 0.00041514099256385753, "loss": 3.0216, "step": 22932 }, { "epoch": 1.12, "grad_norm": 0.5461629033088684, "learning_rate": 0.0004151267753410812, "loss": 3.107, "step": 22933 }, { "epoch": 1.12, "grad_norm": 0.5471493005752563, "learning_rate": 0.00041511255781508306, "loss": 3.1213, "step": 22934 }, { "epoch": 1.12, "grad_norm": 0.600498616695404, "learning_rate": 0.00041509833998590063, "loss": 3.1274, "step": 22935 }, { "epoch": 1.12, "grad_norm": 0.5208067297935486, "learning_rate": 0.00041508412185357126, "loss": 3.1673, "step": 22936 }, { "epoch": 1.12, "grad_norm": 0.5667810440063477, "learning_rate": 0.00041506990341813257, "loss": 3.0739, "step": 22937 }, { "epoch": 1.12, "grad_norm": 0.5090360045433044, "learning_rate": 0.00041505568467962187, "loss": 3.2307, "step": 22938 }, { "epoch": 1.12, "grad_norm": 0.5476797819137573, "learning_rate": 0.0004150414656380766, "loss": 3.0299, "step": 22939 }, { "epoch": 1.12, "grad_norm": 0.5608437657356262, "learning_rate": 0.00041502724629353423, "loss": 3.0124, "step": 22940 }, { "epoch": 1.12, "grad_norm": 0.5343563556671143, "learning_rate": 0.0004150130266460322, "loss": 3.0477, "step": 22941 }, { "epoch": 1.12, "grad_norm": 0.5381127595901489, "learning_rate": 0.0004149988066956081, "loss": 3.1718, "step": 22942 }, { "epoch": 1.12, "grad_norm": 0.5361109375953674, "learning_rate": 0.00041498458644229926, "loss": 3.0919, "step": 22943 }, { "epoch": 1.12, "grad_norm": 0.5653386116027832, "learning_rate": 0.0004149703658861431, "loss": 3.1964, "step": 22944 }, { "epoch": 1.12, "grad_norm": 0.5344790816307068, "learning_rate": 0.0004149561450271772, "loss": 2.9937, "step": 22945 }, { "epoch": 1.12, "grad_norm": 0.5467535257339478, "learning_rate": 0.00041494192386543885, "loss": 3.1078, "step": 22946 }, { "epoch": 1.12, "grad_norm": 0.6098953485488892, "learning_rate": 0.0004149277024009655, "loss": 3.178, "step": 22947 }, { "epoch": 1.12, "grad_norm": 0.5292100310325623, "learning_rate": 0.00041491348063379496, "loss": 3.0706, "step": 22948 }, { "epoch": 1.12, "grad_norm": 0.5830525159835815, "learning_rate": 0.0004148992585639643, "loss": 3.1763, "step": 22949 }, { "epoch": 1.12, "grad_norm": 0.5466551184654236, "learning_rate": 0.00041488503619151103, "loss": 2.918, "step": 22950 }, { "epoch": 1.12, "grad_norm": 0.5626237392425537, "learning_rate": 0.00041487081351647274, "loss": 2.8359, "step": 22951 }, { "epoch": 1.12, "grad_norm": 0.6049579977989197, "learning_rate": 0.00041485659053888696, "loss": 3.1318, "step": 22952 }, { "epoch": 1.12, "grad_norm": 0.5381505489349365, "learning_rate": 0.0004148423672587908, "loss": 3.1775, "step": 22953 }, { "epoch": 1.12, "grad_norm": 0.5498600006103516, "learning_rate": 0.0004148281436762221, "loss": 3.1253, "step": 22954 }, { "epoch": 1.12, "grad_norm": 0.5618240237236023, "learning_rate": 0.00041481391979121817, "loss": 2.8616, "step": 22955 }, { "epoch": 1.13, "grad_norm": 0.5557640194892883, "learning_rate": 0.00041479969560381645, "loss": 3.1876, "step": 22956 }, { "epoch": 1.13, "grad_norm": 0.5765591859817505, "learning_rate": 0.0004147854711140543, "loss": 3.2525, "step": 22957 }, { "epoch": 1.13, "grad_norm": 0.5476953983306885, "learning_rate": 0.0004147712463219694, "loss": 2.9851, "step": 22958 }, { "epoch": 1.13, "grad_norm": 0.5657317042350769, "learning_rate": 0.00041475702122759924, "loss": 2.9882, "step": 22959 }, { "epoch": 1.13, "grad_norm": 0.5439834594726562, "learning_rate": 0.00041474279583098104, "loss": 3.3056, "step": 22960 }, { "epoch": 1.13, "grad_norm": 0.5226112604141235, "learning_rate": 0.0004147285701321525, "loss": 3.0917, "step": 22961 }, { "epoch": 1.13, "grad_norm": 0.5537917613983154, "learning_rate": 0.0004147143441311509, "loss": 3.0329, "step": 22962 }, { "epoch": 1.13, "grad_norm": 0.5705544948577881, "learning_rate": 0.00041470011782801373, "loss": 3.3239, "step": 22963 }, { "epoch": 1.13, "grad_norm": 0.5459992289543152, "learning_rate": 0.0004146858912227786, "loss": 3.0937, "step": 22964 }, { "epoch": 1.13, "grad_norm": 0.6103513240814209, "learning_rate": 0.00041467166431548297, "loss": 3.2497, "step": 22965 }, { "epoch": 1.13, "grad_norm": 0.591571033000946, "learning_rate": 0.00041465743710616417, "loss": 3.1043, "step": 22966 }, { "epoch": 1.13, "grad_norm": 0.5303111672401428, "learning_rate": 0.00041464320959485977, "loss": 3.111, "step": 22967 }, { "epoch": 1.13, "grad_norm": 0.5746419429779053, "learning_rate": 0.00041462898178160713, "loss": 2.8603, "step": 22968 }, { "epoch": 1.13, "grad_norm": 0.5438621044158936, "learning_rate": 0.00041461475366644387, "loss": 3.2288, "step": 22969 }, { "epoch": 1.13, "grad_norm": 0.6318126320838928, "learning_rate": 0.00041460052524940755, "loss": 3.101, "step": 22970 }, { "epoch": 1.13, "grad_norm": 0.549228847026825, "learning_rate": 0.00041458629653053536, "loss": 3.118, "step": 22971 }, { "epoch": 1.13, "grad_norm": 0.5160179138183594, "learning_rate": 0.0004145720675098649, "loss": 2.8717, "step": 22972 }, { "epoch": 1.13, "grad_norm": 0.5238551497459412, "learning_rate": 0.0004145578381874337, "loss": 3.0346, "step": 22973 }, { "epoch": 1.13, "grad_norm": 0.5568090677261353, "learning_rate": 0.00041454360856327927, "loss": 3.0845, "step": 22974 }, { "epoch": 1.13, "grad_norm": 0.5128166079521179, "learning_rate": 0.0004145293786374389, "loss": 3.0882, "step": 22975 }, { "epoch": 1.13, "grad_norm": 0.5460500717163086, "learning_rate": 0.00041451514840995025, "loss": 3.252, "step": 22976 }, { "epoch": 1.13, "grad_norm": 0.5138880610466003, "learning_rate": 0.00041450091788085075, "loss": 3.0992, "step": 22977 }, { "epoch": 1.13, "grad_norm": 0.5311721563339233, "learning_rate": 0.0004144866870501778, "loss": 3.0771, "step": 22978 }, { "epoch": 1.13, "grad_norm": 0.5450507402420044, "learning_rate": 0.000414472455917969, "loss": 3.0126, "step": 22979 }, { "epoch": 1.13, "grad_norm": 0.5414109230041504, "learning_rate": 0.0004144582244842618, "loss": 3.1002, "step": 22980 }, { "epoch": 1.13, "grad_norm": 0.5404634475708008, "learning_rate": 0.0004144439927490937, "loss": 2.9594, "step": 22981 }, { "epoch": 1.13, "grad_norm": 0.564774751663208, "learning_rate": 0.000414429760712502, "loss": 3.2406, "step": 22982 }, { "epoch": 1.13, "grad_norm": 0.5353189706802368, "learning_rate": 0.0004144155283745244, "loss": 2.8896, "step": 22983 }, { "epoch": 1.13, "grad_norm": 0.5611631274223328, "learning_rate": 0.00041440129573519843, "loss": 2.9384, "step": 22984 }, { "epoch": 1.13, "grad_norm": 0.5437825918197632, "learning_rate": 0.00041438706279456133, "loss": 3.0938, "step": 22985 }, { "epoch": 1.13, "grad_norm": 0.5270655155181885, "learning_rate": 0.0004143728295526508, "loss": 3.125, "step": 22986 }, { "epoch": 1.13, "grad_norm": 0.5163417458534241, "learning_rate": 0.0004143585960095042, "loss": 3.1141, "step": 22987 }, { "epoch": 1.13, "grad_norm": 0.5559436678886414, "learning_rate": 0.0004143443621651591, "loss": 3.045, "step": 22988 }, { "epoch": 1.13, "grad_norm": 0.5473134517669678, "learning_rate": 0.00041433012801965296, "loss": 3.099, "step": 22989 }, { "epoch": 1.13, "grad_norm": 0.567561686038971, "learning_rate": 0.00041431589357302325, "loss": 3.1221, "step": 22990 }, { "epoch": 1.13, "grad_norm": 0.5573049783706665, "learning_rate": 0.0004143016588253074, "loss": 3.0608, "step": 22991 }, { "epoch": 1.13, "grad_norm": 0.5784956812858582, "learning_rate": 0.00041428742377654315, "loss": 3.14, "step": 22992 }, { "epoch": 1.13, "grad_norm": 0.5378555655479431, "learning_rate": 0.0004142731884267676, "loss": 3.107, "step": 22993 }, { "epoch": 1.13, "grad_norm": 0.5544320344924927, "learning_rate": 0.00041425895277601865, "loss": 2.9306, "step": 22994 }, { "epoch": 1.13, "grad_norm": 0.536886990070343, "learning_rate": 0.00041424471682433353, "loss": 3.1313, "step": 22995 }, { "epoch": 1.13, "grad_norm": 0.5307275056838989, "learning_rate": 0.00041423048057174984, "loss": 2.9803, "step": 22996 }, { "epoch": 1.13, "grad_norm": 0.5753107666969299, "learning_rate": 0.00041421624401830504, "loss": 3.0398, "step": 22997 }, { "epoch": 1.13, "grad_norm": 0.5424846410751343, "learning_rate": 0.00041420200716403663, "loss": 3.042, "step": 22998 }, { "epoch": 1.13, "grad_norm": 0.549351692199707, "learning_rate": 0.00041418777000898214, "loss": 3.035, "step": 22999 }, { "epoch": 1.13, "grad_norm": 0.5418702960014343, "learning_rate": 0.000414173532553179, "loss": 2.835, "step": 23000 }, { "epoch": 1.13, "grad_norm": 0.5061715841293335, "learning_rate": 0.0004141592947966647, "loss": 3.0573, "step": 23001 }, { "epoch": 1.13, "grad_norm": 0.5275315046310425, "learning_rate": 0.00041414505673947687, "loss": 3.2917, "step": 23002 }, { "epoch": 1.13, "grad_norm": 0.5400534272193909, "learning_rate": 0.00041413081838165294, "loss": 2.8802, "step": 23003 }, { "epoch": 1.13, "grad_norm": 0.5462265014648438, "learning_rate": 0.0004141165797232304, "loss": 3.0331, "step": 23004 }, { "epoch": 1.13, "grad_norm": 0.515167772769928, "learning_rate": 0.00041410234076424664, "loss": 3.0803, "step": 23005 }, { "epoch": 1.13, "grad_norm": 0.557414174079895, "learning_rate": 0.0004140881015047394, "loss": 3.1524, "step": 23006 }, { "epoch": 1.13, "grad_norm": 0.5607942342758179, "learning_rate": 0.000414073861944746, "loss": 3.1207, "step": 23007 }, { "epoch": 1.13, "grad_norm": 0.5383069515228271, "learning_rate": 0.000414059622084304, "loss": 3.0531, "step": 23008 }, { "epoch": 1.13, "grad_norm": 0.5618544816970825, "learning_rate": 0.0004140453819234509, "loss": 3.1155, "step": 23009 }, { "epoch": 1.13, "grad_norm": 0.5300397276878357, "learning_rate": 0.0004140311414622243, "loss": 3.2335, "step": 23010 }, { "epoch": 1.13, "grad_norm": 0.5628443360328674, "learning_rate": 0.0004140169007006615, "loss": 3.3326, "step": 23011 }, { "epoch": 1.13, "grad_norm": 0.5267999768257141, "learning_rate": 0.0004140026596388002, "loss": 3.1844, "step": 23012 }, { "epoch": 1.13, "grad_norm": 0.5517027378082275, "learning_rate": 0.0004139884182766778, "loss": 3.1886, "step": 23013 }, { "epoch": 1.13, "grad_norm": 0.5515194535255432, "learning_rate": 0.00041397417661433183, "loss": 3.0972, "step": 23014 }, { "epoch": 1.13, "grad_norm": 0.5435203313827515, "learning_rate": 0.00041395993465179984, "loss": 3.0578, "step": 23015 }, { "epoch": 1.13, "grad_norm": 0.5524570941925049, "learning_rate": 0.0004139456923891193, "loss": 3.0469, "step": 23016 }, { "epoch": 1.13, "grad_norm": 0.5539202094078064, "learning_rate": 0.0004139314498263278, "loss": 3.0198, "step": 23017 }, { "epoch": 1.13, "grad_norm": 0.5375218987464905, "learning_rate": 0.00041391720696346267, "loss": 3.1131, "step": 23018 }, { "epoch": 1.13, "grad_norm": 0.6372843384742737, "learning_rate": 0.00041390296380056156, "loss": 3.2209, "step": 23019 }, { "epoch": 1.13, "grad_norm": 0.5595908164978027, "learning_rate": 0.00041388872033766203, "loss": 3.2019, "step": 23020 }, { "epoch": 1.13, "grad_norm": 0.5718174576759338, "learning_rate": 0.00041387447657480154, "loss": 3.0502, "step": 23021 }, { "epoch": 1.13, "grad_norm": 0.5092378854751587, "learning_rate": 0.0004138602325120176, "loss": 3.2092, "step": 23022 }, { "epoch": 1.13, "grad_norm": 0.507887601852417, "learning_rate": 0.0004138459881493476, "loss": 3.2074, "step": 23023 }, { "epoch": 1.13, "grad_norm": 0.502585232257843, "learning_rate": 0.0004138317434868293, "loss": 3.1789, "step": 23024 }, { "epoch": 1.13, "grad_norm": 0.5779663920402527, "learning_rate": 0.0004138174985245001, "loss": 3.0216, "step": 23025 }, { "epoch": 1.13, "grad_norm": 0.580353856086731, "learning_rate": 0.0004138032532623974, "loss": 3.0584, "step": 23026 }, { "epoch": 1.13, "grad_norm": 0.5854467153549194, "learning_rate": 0.00041378900770055896, "loss": 3.0347, "step": 23027 }, { "epoch": 1.13, "grad_norm": 0.5568118095397949, "learning_rate": 0.0004137747618390221, "loss": 2.9267, "step": 23028 }, { "epoch": 1.13, "grad_norm": 0.5543918609619141, "learning_rate": 0.0004137605156778244, "loss": 3.0005, "step": 23029 }, { "epoch": 1.13, "grad_norm": 0.5414887070655823, "learning_rate": 0.00041374626921700345, "loss": 3.0285, "step": 23030 }, { "epoch": 1.13, "grad_norm": 0.5289766788482666, "learning_rate": 0.00041373202245659663, "loss": 2.9147, "step": 23031 }, { "epoch": 1.13, "grad_norm": 0.5012800097465515, "learning_rate": 0.0004137177753966417, "loss": 2.962, "step": 23032 }, { "epoch": 1.13, "grad_norm": 0.5366766452789307, "learning_rate": 0.0004137035280371759, "loss": 2.9534, "step": 23033 }, { "epoch": 1.13, "grad_norm": 0.524847149848938, "learning_rate": 0.0004136892803782369, "loss": 3.2415, "step": 23034 }, { "epoch": 1.13, "grad_norm": 0.5566405057907104, "learning_rate": 0.0004136750324198623, "loss": 3.0081, "step": 23035 }, { "epoch": 1.13, "grad_norm": 0.5404036045074463, "learning_rate": 0.0004136607841620895, "loss": 3.1826, "step": 23036 }, { "epoch": 1.13, "grad_norm": 0.5708770155906677, "learning_rate": 0.00041364653560495605, "loss": 2.9988, "step": 23037 }, { "epoch": 1.13, "grad_norm": 0.557307779788971, "learning_rate": 0.00041363228674849955, "loss": 2.9934, "step": 23038 }, { "epoch": 1.13, "grad_norm": 0.5527191758155823, "learning_rate": 0.0004136180375927574, "loss": 3.2225, "step": 23039 }, { "epoch": 1.13, "grad_norm": 0.5346764922142029, "learning_rate": 0.0004136037881377672, "loss": 3.1241, "step": 23040 }, { "epoch": 1.13, "grad_norm": 0.5354068875312805, "learning_rate": 0.0004135895383835666, "loss": 3.0392, "step": 23041 }, { "epoch": 1.13, "grad_norm": 0.5253552198410034, "learning_rate": 0.00041357528833019285, "loss": 3.235, "step": 23042 }, { "epoch": 1.13, "grad_norm": 0.5762072205543518, "learning_rate": 0.0004135610379776838, "loss": 3.0428, "step": 23043 }, { "epoch": 1.13, "grad_norm": 0.554805338382721, "learning_rate": 0.00041354678732607677, "loss": 3.0625, "step": 23044 }, { "epoch": 1.13, "grad_norm": 0.5384538173675537, "learning_rate": 0.00041353253637540935, "loss": 2.9583, "step": 23045 }, { "epoch": 1.13, "grad_norm": 0.5378395915031433, "learning_rate": 0.00041351828512571913, "loss": 3.2187, "step": 23046 }, { "epoch": 1.13, "grad_norm": 0.5567187070846558, "learning_rate": 0.0004135040335770436, "loss": 3.2139, "step": 23047 }, { "epoch": 1.13, "grad_norm": 0.504665732383728, "learning_rate": 0.0004134897817294202, "loss": 3.2191, "step": 23048 }, { "epoch": 1.13, "grad_norm": 0.5528210997581482, "learning_rate": 0.0004134755295828865, "loss": 3.1749, "step": 23049 }, { "epoch": 1.13, "grad_norm": 0.5319205522537231, "learning_rate": 0.00041346127713748023, "loss": 3.1229, "step": 23050 }, { "epoch": 1.13, "grad_norm": 0.5349425077438354, "learning_rate": 0.00041344702439323873, "loss": 2.9538, "step": 23051 }, { "epoch": 1.13, "grad_norm": 0.5903375148773193, "learning_rate": 0.0004134327713501997, "loss": 3.0624, "step": 23052 }, { "epoch": 1.13, "grad_norm": 0.5711473226547241, "learning_rate": 0.0004134185180084004, "loss": 3.2639, "step": 23053 }, { "epoch": 1.13, "grad_norm": 0.5508195161819458, "learning_rate": 0.0004134042643678787, "loss": 2.9987, "step": 23054 }, { "epoch": 1.13, "grad_norm": 0.5824445486068726, "learning_rate": 0.00041339001042867195, "loss": 3.1495, "step": 23055 }, { "epoch": 1.13, "grad_norm": 0.5956960320472717, "learning_rate": 0.0004133757561908177, "loss": 3.384, "step": 23056 }, { "epoch": 1.13, "grad_norm": 0.5386295318603516, "learning_rate": 0.0004133615016543536, "loss": 3.0116, "step": 23057 }, { "epoch": 1.13, "grad_norm": 0.5671989321708679, "learning_rate": 0.0004133472468193171, "loss": 2.8844, "step": 23058 }, { "epoch": 1.13, "grad_norm": 0.5901870727539062, "learning_rate": 0.00041333299168574565, "loss": 3.076, "step": 23059 }, { "epoch": 1.13, "grad_norm": 0.566315233707428, "learning_rate": 0.00041331873625367703, "loss": 3.1643, "step": 23060 }, { "epoch": 1.13, "grad_norm": 0.5477898716926575, "learning_rate": 0.0004133044805231487, "loss": 3.3274, "step": 23061 }, { "epoch": 1.13, "grad_norm": 0.54547119140625, "learning_rate": 0.0004132902244941981, "loss": 3.2076, "step": 23062 }, { "epoch": 1.13, "grad_norm": 0.5427873730659485, "learning_rate": 0.0004132759681668629, "loss": 3.1031, "step": 23063 }, { "epoch": 1.13, "grad_norm": 0.6867091655731201, "learning_rate": 0.0004132617115411805, "loss": 2.9959, "step": 23064 }, { "epoch": 1.13, "grad_norm": 0.6565173864364624, "learning_rate": 0.00041324745461718867, "loss": 3.1724, "step": 23065 }, { "epoch": 1.13, "grad_norm": 0.5303226709365845, "learning_rate": 0.0004132331973949248, "loss": 3.0475, "step": 23066 }, { "epoch": 1.13, "grad_norm": 0.537520170211792, "learning_rate": 0.00041321893987442647, "loss": 3.1931, "step": 23067 }, { "epoch": 1.13, "grad_norm": 0.5558544397354126, "learning_rate": 0.00041320468205573125, "loss": 2.9065, "step": 23068 }, { "epoch": 1.13, "grad_norm": 0.5704731941223145, "learning_rate": 0.00041319042393887675, "loss": 3.0102, "step": 23069 }, { "epoch": 1.13, "grad_norm": 0.5413227081298828, "learning_rate": 0.0004131761655239003, "loss": 2.8069, "step": 23070 }, { "epoch": 1.13, "grad_norm": 0.5648303627967834, "learning_rate": 0.00041316190681083963, "loss": 3.0567, "step": 23071 }, { "epoch": 1.13, "grad_norm": 0.5538491010665894, "learning_rate": 0.00041314764779973247, "loss": 3.1438, "step": 23072 }, { "epoch": 1.13, "grad_norm": 0.5491966605186462, "learning_rate": 0.0004131333884906161, "loss": 2.889, "step": 23073 }, { "epoch": 1.13, "grad_norm": 0.572340190410614, "learning_rate": 0.000413119128883528, "loss": 3.1123, "step": 23074 }, { "epoch": 1.13, "grad_norm": 0.5277241468429565, "learning_rate": 0.00041310486897850604, "loss": 3.0489, "step": 23075 }, { "epoch": 1.13, "grad_norm": 0.5485084652900696, "learning_rate": 0.0004130906087755876, "loss": 3.134, "step": 23076 }, { "epoch": 1.13, "grad_norm": 0.510722815990448, "learning_rate": 0.0004130763482748102, "loss": 3.1496, "step": 23077 }, { "epoch": 1.13, "grad_norm": 0.5820092558860779, "learning_rate": 0.0004130620874762115, "loss": 2.9881, "step": 23078 }, { "epoch": 1.13, "grad_norm": 0.5545257329940796, "learning_rate": 0.00041304782637982903, "loss": 3.1989, "step": 23079 }, { "epoch": 1.13, "grad_norm": 0.5428977012634277, "learning_rate": 0.00041303356498570036, "loss": 2.969, "step": 23080 }, { "epoch": 1.13, "grad_norm": 0.5307221412658691, "learning_rate": 0.000413019303293863, "loss": 3.1404, "step": 23081 }, { "epoch": 1.13, "grad_norm": 0.5607012510299683, "learning_rate": 0.0004130050413043545, "loss": 3.0869, "step": 23082 }, { "epoch": 1.13, "grad_norm": 0.5869443416595459, "learning_rate": 0.00041299077901721257, "loss": 3.0999, "step": 23083 }, { "epoch": 1.13, "grad_norm": 0.5176264047622681, "learning_rate": 0.00041297651643247465, "loss": 2.937, "step": 23084 }, { "epoch": 1.13, "grad_norm": 0.5371460318565369, "learning_rate": 0.00041296225355017826, "loss": 2.9182, "step": 23085 }, { "epoch": 1.13, "grad_norm": 0.5200726985931396, "learning_rate": 0.00041294799037036107, "loss": 3.0559, "step": 23086 }, { "epoch": 1.13, "grad_norm": 0.5590261220932007, "learning_rate": 0.0004129337268930607, "loss": 3.2351, "step": 23087 }, { "epoch": 1.13, "grad_norm": 0.5306074023246765, "learning_rate": 0.00041291946311831446, "loss": 3.2695, "step": 23088 }, { "epoch": 1.13, "grad_norm": 0.5653502345085144, "learning_rate": 0.0004129051990461602, "loss": 3.1316, "step": 23089 }, { "epoch": 1.13, "grad_norm": 0.5234277248382568, "learning_rate": 0.00041289093467663536, "loss": 3.009, "step": 23090 }, { "epoch": 1.13, "grad_norm": 0.5736457109451294, "learning_rate": 0.0004128766700097775, "loss": 3.1699, "step": 23091 }, { "epoch": 1.13, "grad_norm": 0.607955276966095, "learning_rate": 0.00041286240504562416, "loss": 2.9498, "step": 23092 }, { "epoch": 1.13, "grad_norm": 0.5673606395721436, "learning_rate": 0.00041284813978421307, "loss": 3.0839, "step": 23093 }, { "epoch": 1.13, "grad_norm": 0.5501077175140381, "learning_rate": 0.0004128338742255817, "loss": 3.0898, "step": 23094 }, { "epoch": 1.13, "grad_norm": 0.5964276790618896, "learning_rate": 0.0004128196083697676, "loss": 3.0892, "step": 23095 }, { "epoch": 1.13, "grad_norm": 0.5602797865867615, "learning_rate": 0.00041280534221680825, "loss": 3.0006, "step": 23096 }, { "epoch": 1.13, "grad_norm": 0.5748618841171265, "learning_rate": 0.00041279107576674154, "loss": 2.9523, "step": 23097 }, { "epoch": 1.13, "grad_norm": 0.5489750504493713, "learning_rate": 0.0004127768090196047, "loss": 3.1644, "step": 23098 }, { "epoch": 1.13, "grad_norm": 0.5585864186286926, "learning_rate": 0.00041276254197543545, "loss": 3.1808, "step": 23099 }, { "epoch": 1.13, "grad_norm": 0.6432881951332092, "learning_rate": 0.0004127482746342714, "loss": 2.9174, "step": 23100 }, { "epoch": 1.13, "grad_norm": 0.5519260764122009, "learning_rate": 0.0004127340069961501, "loss": 3.0454, "step": 23101 }, { "epoch": 1.13, "grad_norm": 0.5701799988746643, "learning_rate": 0.00041271973906110916, "loss": 3.0101, "step": 23102 }, { "epoch": 1.13, "grad_norm": 0.5463408827781677, "learning_rate": 0.000412705470829186, "loss": 3.3575, "step": 23103 }, { "epoch": 1.13, "grad_norm": 0.5591546297073364, "learning_rate": 0.0004126912023004184, "loss": 2.722, "step": 23104 }, { "epoch": 1.13, "grad_norm": 0.6106502413749695, "learning_rate": 0.00041267693347484403, "loss": 3.2153, "step": 23105 }, { "epoch": 1.13, "grad_norm": 0.5445353984832764, "learning_rate": 0.00041266266435250004, "loss": 3.1634, "step": 23106 }, { "epoch": 1.13, "grad_norm": 0.5435615181922913, "learning_rate": 0.00041264839493342434, "loss": 3.0936, "step": 23107 }, { "epoch": 1.13, "grad_norm": 0.5400312542915344, "learning_rate": 0.00041263412521765454, "loss": 3.1408, "step": 23108 }, { "epoch": 1.13, "grad_norm": 0.5756552219390869, "learning_rate": 0.00041261985520522806, "loss": 3.2401, "step": 23109 }, { "epoch": 1.13, "grad_norm": 0.573711633682251, "learning_rate": 0.00041260558489618256, "loss": 3.0456, "step": 23110 }, { "epoch": 1.13, "grad_norm": 0.5202263593673706, "learning_rate": 0.0004125913142905555, "loss": 2.958, "step": 23111 }, { "epoch": 1.13, "grad_norm": 0.5493197441101074, "learning_rate": 0.0004125770433883848, "loss": 3.1262, "step": 23112 }, { "epoch": 1.13, "grad_norm": 0.5797801613807678, "learning_rate": 0.00041256277218970774, "loss": 3.0246, "step": 23113 }, { "epoch": 1.13, "grad_norm": 0.5684946179389954, "learning_rate": 0.000412548500694562, "loss": 3.1813, "step": 23114 }, { "epoch": 1.13, "grad_norm": 0.54192054271698, "learning_rate": 0.00041253422890298515, "loss": 2.8924, "step": 23115 }, { "epoch": 1.13, "grad_norm": 0.5381801724433899, "learning_rate": 0.00041251995681501483, "loss": 3.0613, "step": 23116 }, { "epoch": 1.13, "grad_norm": 0.5283708572387695, "learning_rate": 0.0004125056844306886, "loss": 3.0999, "step": 23117 }, { "epoch": 1.13, "grad_norm": 0.5617417693138123, "learning_rate": 0.000412491411750044, "loss": 2.9397, "step": 23118 }, { "epoch": 1.13, "grad_norm": 0.6005975604057312, "learning_rate": 0.0004124771387731188, "loss": 3.3285, "step": 23119 }, { "epoch": 1.13, "grad_norm": 0.5726503729820251, "learning_rate": 0.00041246286549995035, "loss": 3.1522, "step": 23120 }, { "epoch": 1.13, "grad_norm": 0.5579819679260254, "learning_rate": 0.0004124485919305763, "loss": 3.278, "step": 23121 }, { "epoch": 1.13, "grad_norm": 0.5398461818695068, "learning_rate": 0.00041243431806503437, "loss": 2.8825, "step": 23122 }, { "epoch": 1.13, "grad_norm": 0.5483943819999695, "learning_rate": 0.00041242004390336216, "loss": 3.1911, "step": 23123 }, { "epoch": 1.13, "grad_norm": 0.5343546867370605, "learning_rate": 0.0004124057694455971, "loss": 3.0709, "step": 23124 }, { "epoch": 1.13, "grad_norm": 0.5518394708633423, "learning_rate": 0.0004123914946917769, "loss": 3.088, "step": 23125 }, { "epoch": 1.13, "grad_norm": 0.541649580001831, "learning_rate": 0.00041237721964193914, "loss": 2.9646, "step": 23126 }, { "epoch": 1.13, "grad_norm": 0.5012128949165344, "learning_rate": 0.00041236294429612143, "loss": 3.0232, "step": 23127 }, { "epoch": 1.13, "grad_norm": 0.5622835755348206, "learning_rate": 0.0004123486686543613, "loss": 3.1136, "step": 23128 }, { "epoch": 1.13, "grad_norm": 0.5492423176765442, "learning_rate": 0.00041233439271669644, "loss": 3.2057, "step": 23129 }, { "epoch": 1.13, "grad_norm": 0.5439629554748535, "learning_rate": 0.0004123201164831644, "loss": 2.9353, "step": 23130 }, { "epoch": 1.13, "grad_norm": 0.5032997727394104, "learning_rate": 0.0004123058399538027, "loss": 3.1584, "step": 23131 }, { "epoch": 1.13, "grad_norm": 0.5795646905899048, "learning_rate": 0.0004122915631286492, "loss": 3.1761, "step": 23132 }, { "epoch": 1.13, "grad_norm": 0.5473034381866455, "learning_rate": 0.0004122772860077412, "loss": 2.8715, "step": 23133 }, { "epoch": 1.13, "grad_norm": 0.5456127524375916, "learning_rate": 0.0004122630085911165, "loss": 2.9562, "step": 23134 }, { "epoch": 1.13, "grad_norm": 0.5090837478637695, "learning_rate": 0.00041224873087881265, "loss": 3.0014, "step": 23135 }, { "epoch": 1.13, "grad_norm": 0.5396968126296997, "learning_rate": 0.00041223445287086715, "loss": 3.173, "step": 23136 }, { "epoch": 1.13, "grad_norm": 0.5504181981086731, "learning_rate": 0.0004122201745673178, "loss": 3.0763, "step": 23137 }, { "epoch": 1.13, "grad_norm": 0.551391065120697, "learning_rate": 0.00041220589596820204, "loss": 3.0412, "step": 23138 }, { "epoch": 1.13, "grad_norm": 0.5569359064102173, "learning_rate": 0.0004121916170735577, "loss": 3.136, "step": 23139 }, { "epoch": 1.13, "grad_norm": 0.5515356063842773, "learning_rate": 0.00041217733788342204, "loss": 3.1659, "step": 23140 }, { "epoch": 1.13, "grad_norm": 0.5313219428062439, "learning_rate": 0.0004121630583978329, "loss": 2.951, "step": 23141 }, { "epoch": 1.13, "grad_norm": 0.5136998295783997, "learning_rate": 0.00041214877861682795, "loss": 2.9565, "step": 23142 }, { "epoch": 1.13, "grad_norm": 0.531518816947937, "learning_rate": 0.00041213449854044464, "loss": 3.2081, "step": 23143 }, { "epoch": 1.13, "grad_norm": 0.5740218758583069, "learning_rate": 0.0004121202181687206, "loss": 3.0908, "step": 23144 }, { "epoch": 1.13, "grad_norm": 0.5602694153785706, "learning_rate": 0.0004121059375016936, "loss": 3.0052, "step": 23145 }, { "epoch": 1.13, "grad_norm": 0.5783454775810242, "learning_rate": 0.0004120916565394011, "loss": 3.056, "step": 23146 }, { "epoch": 1.13, "grad_norm": 0.5084561705589294, "learning_rate": 0.00041207737528188056, "loss": 3.152, "step": 23147 }, { "epoch": 1.13, "grad_norm": 0.5585759878158569, "learning_rate": 0.00041206309372917, "loss": 3.1573, "step": 23148 }, { "epoch": 1.13, "grad_norm": 0.5565611720085144, "learning_rate": 0.00041204881188130674, "loss": 3.1032, "step": 23149 }, { "epoch": 1.13, "grad_norm": 0.5540475249290466, "learning_rate": 0.00041203452973832843, "loss": 2.8712, "step": 23150 }, { "epoch": 1.13, "grad_norm": 0.5279893279075623, "learning_rate": 0.0004120202473002728, "loss": 3.0523, "step": 23151 }, { "epoch": 1.13, "grad_norm": 0.5814106464385986, "learning_rate": 0.0004120059645671774, "loss": 3.2728, "step": 23152 }, { "epoch": 1.13, "grad_norm": 0.5316987633705139, "learning_rate": 0.0004119916815390798, "loss": 3.185, "step": 23153 }, { "epoch": 1.13, "grad_norm": 0.5352542996406555, "learning_rate": 0.00041197739821601767, "loss": 2.9784, "step": 23154 }, { "epoch": 1.13, "grad_norm": 0.587202250957489, "learning_rate": 0.00041196311459802866, "loss": 3.1149, "step": 23155 }, { "epoch": 1.13, "grad_norm": 0.5219794511795044, "learning_rate": 0.0004119488306851504, "loss": 3.1565, "step": 23156 }, { "epoch": 1.13, "grad_norm": 0.5088773965835571, "learning_rate": 0.0004119345464774203, "loss": 2.8584, "step": 23157 }, { "epoch": 1.13, "grad_norm": 0.5774076581001282, "learning_rate": 0.0004119202619748763, "loss": 3.02, "step": 23158 }, { "epoch": 1.13, "grad_norm": 0.5148767232894897, "learning_rate": 0.0004119059771775559, "loss": 3.0098, "step": 23159 }, { "epoch": 1.14, "grad_norm": 0.5569112300872803, "learning_rate": 0.00041189169208549655, "loss": 3.0286, "step": 23160 }, { "epoch": 1.14, "grad_norm": 0.5477631688117981, "learning_rate": 0.00041187740669873605, "loss": 3.2834, "step": 23161 }, { "epoch": 1.14, "grad_norm": 0.5244240164756775, "learning_rate": 0.00041186312101731195, "loss": 3.1129, "step": 23162 }, { "epoch": 1.14, "grad_norm": 0.6075512170791626, "learning_rate": 0.0004118488350412621, "loss": 3.1451, "step": 23163 }, { "epoch": 1.14, "grad_norm": 0.5244618058204651, "learning_rate": 0.0004118345487706238, "loss": 2.9186, "step": 23164 }, { "epoch": 1.14, "grad_norm": 0.5191154479980469, "learning_rate": 0.00041182026220543485, "loss": 3.2372, "step": 23165 }, { "epoch": 1.14, "grad_norm": 0.540847897529602, "learning_rate": 0.00041180597534573284, "loss": 2.9988, "step": 23166 }, { "epoch": 1.14, "grad_norm": 0.5709373950958252, "learning_rate": 0.0004117916881915554, "loss": 3.1161, "step": 23167 }, { "epoch": 1.14, "grad_norm": 0.6198864579200745, "learning_rate": 0.0004117774007429402, "loss": 3.0857, "step": 23168 }, { "epoch": 1.14, "grad_norm": 0.5441683530807495, "learning_rate": 0.00041176311299992484, "loss": 3.1707, "step": 23169 }, { "epoch": 1.14, "grad_norm": 0.5470789670944214, "learning_rate": 0.000411748824962547, "loss": 3.0281, "step": 23170 }, { "epoch": 1.14, "grad_norm": 0.5256059169769287, "learning_rate": 0.00041173453663084417, "loss": 3.274, "step": 23171 }, { "epoch": 1.14, "grad_norm": 0.5547487735748291, "learning_rate": 0.00041172024800485403, "loss": 3.1233, "step": 23172 }, { "epoch": 1.14, "grad_norm": 0.5724942684173584, "learning_rate": 0.00041170595908461436, "loss": 3.2013, "step": 23173 }, { "epoch": 1.14, "grad_norm": 0.5241184830665588, "learning_rate": 0.00041169166987016265, "loss": 3.3218, "step": 23174 }, { "epoch": 1.14, "grad_norm": 0.5703988671302795, "learning_rate": 0.00041167738036153664, "loss": 3.1718, "step": 23175 }, { "epoch": 1.14, "grad_norm": 0.5422783493995667, "learning_rate": 0.0004116630905587738, "loss": 3.011, "step": 23176 }, { "epoch": 1.14, "grad_norm": 0.581802248954773, "learning_rate": 0.00041164880046191195, "loss": 2.9262, "step": 23177 }, { "epoch": 1.14, "grad_norm": 0.5663591027259827, "learning_rate": 0.0004116345100709886, "loss": 3.3378, "step": 23178 }, { "epoch": 1.14, "grad_norm": 0.5546205639839172, "learning_rate": 0.00041162021938604147, "loss": 3.0222, "step": 23179 }, { "epoch": 1.14, "grad_norm": 0.583781361579895, "learning_rate": 0.0004116059284071081, "loss": 3.0932, "step": 23180 }, { "epoch": 1.14, "grad_norm": 0.5724231004714966, "learning_rate": 0.0004115916371342263, "loss": 3.3044, "step": 23181 }, { "epoch": 1.14, "grad_norm": 0.5571467876434326, "learning_rate": 0.0004115773455674336, "loss": 3.1352, "step": 23182 }, { "epoch": 1.14, "grad_norm": 0.5352392792701721, "learning_rate": 0.00041156305370676754, "loss": 3.0393, "step": 23183 }, { "epoch": 1.14, "grad_norm": 0.5406389236450195, "learning_rate": 0.0004115487615522658, "loss": 3.0062, "step": 23184 }, { "epoch": 1.14, "grad_norm": 0.6502759456634521, "learning_rate": 0.0004115344691039663, "loss": 2.9669, "step": 23185 }, { "epoch": 1.14, "grad_norm": 0.5307773351669312, "learning_rate": 0.00041152017636190643, "loss": 3.3796, "step": 23186 }, { "epoch": 1.14, "grad_norm": 0.5802614092826843, "learning_rate": 0.00041150588332612383, "loss": 3.2169, "step": 23187 }, { "epoch": 1.14, "grad_norm": 0.5349951386451721, "learning_rate": 0.0004114915899966561, "loss": 3.144, "step": 23188 }, { "epoch": 1.14, "grad_norm": 0.5569307804107666, "learning_rate": 0.0004114772963735411, "loss": 2.9595, "step": 23189 }, { "epoch": 1.14, "grad_norm": 0.5304500460624695, "learning_rate": 0.0004114630024568163, "loss": 3.0931, "step": 23190 }, { "epoch": 1.14, "grad_norm": 0.5246667861938477, "learning_rate": 0.00041144870824651945, "loss": 3.2074, "step": 23191 }, { "epoch": 1.14, "grad_norm": 0.524143397808075, "learning_rate": 0.00041143441374268824, "loss": 2.7597, "step": 23192 }, { "epoch": 1.14, "grad_norm": 0.5903804898262024, "learning_rate": 0.00041142011894536003, "loss": 3.0905, "step": 23193 }, { "epoch": 1.14, "grad_norm": 0.5838131308555603, "learning_rate": 0.0004114058238545728, "loss": 3.0597, "step": 23194 }, { "epoch": 1.14, "grad_norm": 0.5308125615119934, "learning_rate": 0.000411391528470364, "loss": 3.0877, "step": 23195 }, { "epoch": 1.14, "grad_norm": 0.5069236755371094, "learning_rate": 0.0004113772327927714, "loss": 3.1319, "step": 23196 }, { "epoch": 1.14, "grad_norm": 0.533857524394989, "learning_rate": 0.0004113629368218326, "loss": 2.9676, "step": 23197 }, { "epoch": 1.14, "grad_norm": 0.5685359835624695, "learning_rate": 0.00041134864055758513, "loss": 3.0839, "step": 23198 }, { "epoch": 1.14, "grad_norm": 0.6156092286109924, "learning_rate": 0.00041133434400006695, "loss": 3.2202, "step": 23199 }, { "epoch": 1.14, "grad_norm": 0.5481438636779785, "learning_rate": 0.0004113200471493155, "loss": 3.1862, "step": 23200 }, { "epoch": 1.14, "grad_norm": 0.5097211599349976, "learning_rate": 0.0004113057500053684, "loss": 3.0117, "step": 23201 }, { "epoch": 1.14, "grad_norm": 0.5516743063926697, "learning_rate": 0.00041129145256826345, "loss": 3.0283, "step": 23202 }, { "epoch": 1.14, "grad_norm": 0.5222983360290527, "learning_rate": 0.0004112771548380382, "loss": 2.9669, "step": 23203 }, { "epoch": 1.14, "grad_norm": 0.5839381217956543, "learning_rate": 0.0004112628568147303, "loss": 3.015, "step": 23204 }, { "epoch": 1.14, "grad_norm": 0.5553185939788818, "learning_rate": 0.0004112485584983775, "loss": 2.915, "step": 23205 }, { "epoch": 1.14, "grad_norm": 0.5800415873527527, "learning_rate": 0.0004112342598890174, "loss": 3.1186, "step": 23206 }, { "epoch": 1.14, "grad_norm": 0.569153904914856, "learning_rate": 0.0004112199609866877, "loss": 2.9612, "step": 23207 }, { "epoch": 1.14, "grad_norm": 0.5515949130058289, "learning_rate": 0.00041120566179142596, "loss": 3.1817, "step": 23208 }, { "epoch": 1.14, "grad_norm": 0.5314306020736694, "learning_rate": 0.00041119136230326984, "loss": 3.0635, "step": 23209 }, { "epoch": 1.14, "grad_norm": 0.5693814754486084, "learning_rate": 0.00041117706252225727, "loss": 3.0755, "step": 23210 }, { "epoch": 1.14, "grad_norm": 0.5441717505455017, "learning_rate": 0.0004111627624484256, "loss": 3.1078, "step": 23211 }, { "epoch": 1.14, "grad_norm": 0.5225312113761902, "learning_rate": 0.0004111484620818126, "loss": 2.992, "step": 23212 }, { "epoch": 1.14, "grad_norm": 0.5491369962692261, "learning_rate": 0.00041113416142245587, "loss": 2.9803, "step": 23213 }, { "epoch": 1.14, "grad_norm": 0.530925989151001, "learning_rate": 0.00041111986047039333, "loss": 3.0763, "step": 23214 }, { "epoch": 1.14, "grad_norm": 0.5920950174331665, "learning_rate": 0.00041110555922566235, "loss": 3.195, "step": 23215 }, { "epoch": 1.14, "grad_norm": 0.544621467590332, "learning_rate": 0.0004110912576883007, "loss": 3.1061, "step": 23216 }, { "epoch": 1.14, "grad_norm": 0.526262640953064, "learning_rate": 0.0004110769558583461, "loss": 3.167, "step": 23217 }, { "epoch": 1.14, "grad_norm": 0.5441073179244995, "learning_rate": 0.00041106265373583615, "loss": 3.0507, "step": 23218 }, { "epoch": 1.14, "grad_norm": 0.529232382774353, "learning_rate": 0.00041104835132080856, "loss": 3.1984, "step": 23219 }, { "epoch": 1.14, "grad_norm": 0.5716642141342163, "learning_rate": 0.00041103404861330095, "loss": 3.1493, "step": 23220 }, { "epoch": 1.14, "grad_norm": 0.5743255019187927, "learning_rate": 0.0004110197456133511, "loss": 3.0924, "step": 23221 }, { "epoch": 1.14, "grad_norm": 0.6008023023605347, "learning_rate": 0.0004110054423209966, "loss": 3.0093, "step": 23222 }, { "epoch": 1.14, "grad_norm": 0.5539536476135254, "learning_rate": 0.00041099113873627505, "loss": 3.0168, "step": 23223 }, { "epoch": 1.14, "grad_norm": 0.5438982248306274, "learning_rate": 0.0004109768348592242, "loss": 2.9509, "step": 23224 }, { "epoch": 1.14, "grad_norm": 0.5372403860092163, "learning_rate": 0.0004109625306898818, "loss": 3.1003, "step": 23225 }, { "epoch": 1.14, "grad_norm": 0.5633687376976013, "learning_rate": 0.0004109482262282854, "loss": 3.1442, "step": 23226 }, { "epoch": 1.14, "grad_norm": 0.6077851057052612, "learning_rate": 0.0004109339214744727, "loss": 3.112, "step": 23227 }, { "epoch": 1.14, "grad_norm": 0.5495371222496033, "learning_rate": 0.00041091961642848143, "loss": 3.1602, "step": 23228 }, { "epoch": 1.14, "grad_norm": 0.5324305295944214, "learning_rate": 0.0004109053110903493, "loss": 3.0703, "step": 23229 }, { "epoch": 1.14, "grad_norm": 0.5331787467002869, "learning_rate": 0.00041089100546011384, "loss": 3.1745, "step": 23230 }, { "epoch": 1.14, "grad_norm": 0.5928345918655396, "learning_rate": 0.00041087669953781284, "loss": 3.0949, "step": 23231 }, { "epoch": 1.14, "grad_norm": 0.5761058330535889, "learning_rate": 0.0004108623933234841, "loss": 3.163, "step": 23232 }, { "epoch": 1.14, "grad_norm": 0.5908313989639282, "learning_rate": 0.0004108480868171649, "loss": 3.2201, "step": 23233 }, { "epoch": 1.14, "grad_norm": 0.5895100235939026, "learning_rate": 0.00041083378001889327, "loss": 2.8646, "step": 23234 }, { "epoch": 1.14, "grad_norm": 0.5367791652679443, "learning_rate": 0.0004108194729287068, "loss": 3.2775, "step": 23235 }, { "epoch": 1.14, "grad_norm": 0.5391991138458252, "learning_rate": 0.00041080516554664327, "loss": 3.1389, "step": 23236 }, { "epoch": 1.14, "grad_norm": 0.5764256119728088, "learning_rate": 0.00041079085787274015, "loss": 2.9448, "step": 23237 }, { "epoch": 1.14, "grad_norm": 0.5493453145027161, "learning_rate": 0.0004107765499070352, "loss": 3.2057, "step": 23238 }, { "epoch": 1.14, "grad_norm": 0.5182337760925293, "learning_rate": 0.00041076224164956614, "loss": 3.0028, "step": 23239 }, { "epoch": 1.14, "grad_norm": 0.5463001132011414, "learning_rate": 0.0004107479331003707, "loss": 3.3298, "step": 23240 }, { "epoch": 1.14, "grad_norm": 0.5629000067710876, "learning_rate": 0.0004107336242594865, "loss": 3.131, "step": 23241 }, { "epoch": 1.14, "grad_norm": 0.5420428514480591, "learning_rate": 0.0004107193151269512, "loss": 3.1846, "step": 23242 }, { "epoch": 1.14, "grad_norm": 0.5327157974243164, "learning_rate": 0.0004107050057028027, "loss": 3.1262, "step": 23243 }, { "epoch": 1.14, "grad_norm": 0.5222551822662354, "learning_rate": 0.0004106906959870783, "loss": 3.1124, "step": 23244 }, { "epoch": 1.14, "grad_norm": 0.5623447895050049, "learning_rate": 0.00041067638597981604, "loss": 3.1196, "step": 23245 }, { "epoch": 1.14, "grad_norm": 0.5250970721244812, "learning_rate": 0.00041066207568105344, "loss": 3.1107, "step": 23246 }, { "epoch": 1.14, "grad_norm": 0.5632120966911316, "learning_rate": 0.0004106477650908283, "loss": 3.0244, "step": 23247 }, { "epoch": 1.14, "grad_norm": 0.5749978423118591, "learning_rate": 0.0004106334542091782, "loss": 3.0074, "step": 23248 }, { "epoch": 1.14, "grad_norm": 0.548491358757019, "learning_rate": 0.00041061914303614085, "loss": 2.8617, "step": 23249 }, { "epoch": 1.14, "grad_norm": 0.5380204319953918, "learning_rate": 0.00041060483157175396, "loss": 3.0328, "step": 23250 }, { "epoch": 1.14, "grad_norm": 0.5522347092628479, "learning_rate": 0.00041059051981605523, "loss": 3.1683, "step": 23251 }, { "epoch": 1.14, "grad_norm": 0.5788419842720032, "learning_rate": 0.00041057620776908244, "loss": 3.1031, "step": 23252 }, { "epoch": 1.14, "grad_norm": 0.5642889738082886, "learning_rate": 0.0004105618954308731, "loss": 2.9625, "step": 23253 }, { "epoch": 1.14, "grad_norm": 0.5192211866378784, "learning_rate": 0.00041054758280146506, "loss": 3.1176, "step": 23254 }, { "epoch": 1.14, "grad_norm": 0.5606915950775146, "learning_rate": 0.0004105332698808959, "loss": 2.9903, "step": 23255 }, { "epoch": 1.14, "grad_norm": 0.5417342782020569, "learning_rate": 0.0004105189566692034, "loss": 2.9431, "step": 23256 }, { "epoch": 1.14, "grad_norm": 0.5401815176010132, "learning_rate": 0.00041050464316642524, "loss": 2.9124, "step": 23257 }, { "epoch": 1.14, "grad_norm": 0.5581971406936646, "learning_rate": 0.0004104903293725992, "loss": 3.1402, "step": 23258 }, { "epoch": 1.14, "grad_norm": 0.5499171018600464, "learning_rate": 0.0004104760152877628, "loss": 3.1808, "step": 23259 }, { "epoch": 1.14, "grad_norm": 0.5453315377235413, "learning_rate": 0.0004104617009119539, "loss": 3.0836, "step": 23260 }, { "epoch": 1.14, "grad_norm": 0.5436408519744873, "learning_rate": 0.0004104473862452101, "loss": 3.2032, "step": 23261 }, { "epoch": 1.14, "grad_norm": 0.595960795879364, "learning_rate": 0.00041043307128756914, "loss": 3.0531, "step": 23262 }, { "epoch": 1.14, "grad_norm": 0.5662114024162292, "learning_rate": 0.0004104187560390687, "loss": 2.9397, "step": 23263 }, { "epoch": 1.14, "grad_norm": 0.548102080821991, "learning_rate": 0.00041040444049974655, "loss": 2.9091, "step": 23264 }, { "epoch": 1.14, "grad_norm": 0.5476301312446594, "learning_rate": 0.00041039012466964034, "loss": 2.8001, "step": 23265 }, { "epoch": 1.14, "grad_norm": 0.5653447508811951, "learning_rate": 0.00041037580854878775, "loss": 3.2257, "step": 23266 }, { "epoch": 1.14, "grad_norm": 0.5923088788986206, "learning_rate": 0.00041036149213722657, "loss": 3.1438, "step": 23267 }, { "epoch": 1.14, "grad_norm": 0.5683751702308655, "learning_rate": 0.0004103471754349945, "loss": 3.0826, "step": 23268 }, { "epoch": 1.14, "grad_norm": 0.5276745557785034, "learning_rate": 0.00041033285844212914, "loss": 3.1649, "step": 23269 }, { "epoch": 1.14, "grad_norm": 0.5372428894042969, "learning_rate": 0.0004103185411586682, "loss": 3.1504, "step": 23270 }, { "epoch": 1.14, "grad_norm": 0.5391978621482849, "learning_rate": 0.00041030422358464955, "loss": 3.1403, "step": 23271 }, { "epoch": 1.14, "grad_norm": 0.5556966066360474, "learning_rate": 0.0004102899057201108, "loss": 3.1582, "step": 23272 }, { "epoch": 1.14, "grad_norm": 0.5490073561668396, "learning_rate": 0.00041027558756508967, "loss": 3.1393, "step": 23273 }, { "epoch": 1.14, "grad_norm": 0.5323960185050964, "learning_rate": 0.00041026126911962386, "loss": 3.0192, "step": 23274 }, { "epoch": 1.14, "grad_norm": 0.5622329711914062, "learning_rate": 0.00041024695038375104, "loss": 3.0085, "step": 23275 }, { "epoch": 1.14, "grad_norm": 0.5242308378219604, "learning_rate": 0.00041023263135750904, "loss": 3.1784, "step": 23276 }, { "epoch": 1.14, "grad_norm": 0.5954573154449463, "learning_rate": 0.0004102183120409354, "loss": 3.0906, "step": 23277 }, { "epoch": 1.14, "grad_norm": 0.542190670967102, "learning_rate": 0.000410203992434068, "loss": 3.102, "step": 23278 }, { "epoch": 1.14, "grad_norm": 0.5081149339675903, "learning_rate": 0.0004101896725369445, "loss": 3.1372, "step": 23279 }, { "epoch": 1.14, "grad_norm": 0.5064743161201477, "learning_rate": 0.0004101753523496026, "loss": 3.1823, "step": 23280 }, { "epoch": 1.14, "grad_norm": 0.6618224382400513, "learning_rate": 0.0004101610318720801, "loss": 3.0695, "step": 23281 }, { "epoch": 1.14, "grad_norm": 0.5303663015365601, "learning_rate": 0.00041014671110441457, "loss": 3.1745, "step": 23282 }, { "epoch": 1.14, "grad_norm": 0.5263445973396301, "learning_rate": 0.0004101323900466438, "loss": 3.022, "step": 23283 }, { "epoch": 1.14, "grad_norm": 0.5309288501739502, "learning_rate": 0.00041011806869880555, "loss": 2.762, "step": 23284 }, { "epoch": 1.14, "grad_norm": 0.5699345469474792, "learning_rate": 0.00041010374706093735, "loss": 2.9958, "step": 23285 }, { "epoch": 1.14, "grad_norm": 0.5486957430839539, "learning_rate": 0.0004100894251330772, "loss": 3.0072, "step": 23286 }, { "epoch": 1.14, "grad_norm": 0.5336732864379883, "learning_rate": 0.00041007510291526277, "loss": 3.1703, "step": 23287 }, { "epoch": 1.14, "grad_norm": 0.5408821702003479, "learning_rate": 0.00041006078040753163, "loss": 3.0636, "step": 23288 }, { "epoch": 1.14, "grad_norm": 0.5387353897094727, "learning_rate": 0.0004100464576099215, "loss": 3.0886, "step": 23289 }, { "epoch": 1.14, "grad_norm": 0.5669566988945007, "learning_rate": 0.00041003213452247026, "loss": 3.0922, "step": 23290 }, { "epoch": 1.14, "grad_norm": 0.5251073241233826, "learning_rate": 0.0004100178111452155, "loss": 3.3268, "step": 23291 }, { "epoch": 1.14, "grad_norm": 0.536972165107727, "learning_rate": 0.000410003487478195, "loss": 2.9341, "step": 23292 }, { "epoch": 1.14, "grad_norm": 0.5253141522407532, "learning_rate": 0.0004099891635214465, "loss": 3.0662, "step": 23293 }, { "epoch": 1.14, "grad_norm": 0.6211135983467102, "learning_rate": 0.00040997483927500785, "loss": 3.1949, "step": 23294 }, { "epoch": 1.14, "grad_norm": 0.5622311234474182, "learning_rate": 0.0004099605147389164, "loss": 3.218, "step": 23295 }, { "epoch": 1.14, "grad_norm": 0.5925061702728271, "learning_rate": 0.0004099461899132102, "loss": 3.2212, "step": 23296 }, { "epoch": 1.14, "grad_norm": 0.5344102382659912, "learning_rate": 0.00040993186479792694, "loss": 2.9887, "step": 23297 }, { "epoch": 1.14, "grad_norm": 0.6124759316444397, "learning_rate": 0.0004099175393931044, "loss": 3.0576, "step": 23298 }, { "epoch": 1.14, "grad_norm": 0.5534654259681702, "learning_rate": 0.00040990321369878006, "loss": 3.1516, "step": 23299 }, { "epoch": 1.14, "grad_norm": 0.5637291073799133, "learning_rate": 0.0004098888877149918, "loss": 2.9373, "step": 23300 }, { "epoch": 1.14, "grad_norm": 0.5289487242698669, "learning_rate": 0.00040987456144177747, "loss": 3.0108, "step": 23301 }, { "epoch": 1.14, "grad_norm": 0.5784006118774414, "learning_rate": 0.0004098602348791746, "loss": 2.9401, "step": 23302 }, { "epoch": 1.14, "grad_norm": 0.5589538216590881, "learning_rate": 0.00040984590802722106, "loss": 2.9519, "step": 23303 }, { "epoch": 1.14, "grad_norm": 0.573984682559967, "learning_rate": 0.00040983158088595456, "loss": 2.9528, "step": 23304 }, { "epoch": 1.14, "grad_norm": 0.5732395648956299, "learning_rate": 0.0004098172534554128, "loss": 3.081, "step": 23305 }, { "epoch": 1.14, "grad_norm": 0.5892529487609863, "learning_rate": 0.0004098029257356334, "loss": 3.1453, "step": 23306 }, { "epoch": 1.14, "grad_norm": 0.5673011541366577, "learning_rate": 0.0004097885977266544, "loss": 3.0574, "step": 23307 }, { "epoch": 1.14, "grad_norm": 0.5459287166595459, "learning_rate": 0.00040977426942851326, "loss": 3.0539, "step": 23308 }, { "epoch": 1.14, "grad_norm": 0.545254647731781, "learning_rate": 0.0004097599408412479, "loss": 2.968, "step": 23309 }, { "epoch": 1.14, "grad_norm": 0.5447995066642761, "learning_rate": 0.0004097456119648959, "loss": 2.9872, "step": 23310 }, { "epoch": 1.14, "grad_norm": 0.5695222020149231, "learning_rate": 0.0004097312827994951, "loss": 3.0334, "step": 23311 }, { "epoch": 1.14, "grad_norm": 0.5799598693847656, "learning_rate": 0.00040971695334508323, "loss": 3.1335, "step": 23312 }, { "epoch": 1.14, "grad_norm": 0.5451822876930237, "learning_rate": 0.000409702623601698, "loss": 3.1662, "step": 23313 }, { "epoch": 1.14, "grad_norm": 0.5687471628189087, "learning_rate": 0.00040968829356937726, "loss": 3.1632, "step": 23314 }, { "epoch": 1.14, "grad_norm": 0.5137524604797363, "learning_rate": 0.00040967396324815853, "loss": 3.1517, "step": 23315 }, { "epoch": 1.14, "grad_norm": 0.535132110118866, "learning_rate": 0.00040965963263807977, "loss": 3.0008, "step": 23316 }, { "epoch": 1.14, "grad_norm": 0.5807247161865234, "learning_rate": 0.0004096453017391786, "loss": 3.0634, "step": 23317 }, { "epoch": 1.14, "grad_norm": 0.585669994354248, "learning_rate": 0.00040963097055149287, "loss": 3.0915, "step": 23318 }, { "epoch": 1.14, "grad_norm": 0.5763066411018372, "learning_rate": 0.00040961663907506025, "loss": 3.1631, "step": 23319 }, { "epoch": 1.14, "grad_norm": 0.563368558883667, "learning_rate": 0.0004096023073099185, "loss": 3.3913, "step": 23320 }, { "epoch": 1.14, "grad_norm": 0.5762029886245728, "learning_rate": 0.00040958797525610525, "loss": 3.0807, "step": 23321 }, { "epoch": 1.14, "grad_norm": 0.5520102977752686, "learning_rate": 0.0004095736429136585, "loss": 3.1066, "step": 23322 }, { "epoch": 1.14, "grad_norm": 0.537263035774231, "learning_rate": 0.0004095593102826159, "loss": 3.1889, "step": 23323 }, { "epoch": 1.14, "grad_norm": 0.5755485892295837, "learning_rate": 0.00040954497736301503, "loss": 3.0597, "step": 23324 }, { "epoch": 1.14, "grad_norm": 0.5270916819572449, "learning_rate": 0.00040953064415489383, "loss": 3.1009, "step": 23325 }, { "epoch": 1.14, "grad_norm": 0.5502188801765442, "learning_rate": 0.00040951631065828996, "loss": 2.9506, "step": 23326 }, { "epoch": 1.14, "grad_norm": 0.5408546328544617, "learning_rate": 0.00040950197687324126, "loss": 2.9593, "step": 23327 }, { "epoch": 1.14, "grad_norm": 0.5552592873573303, "learning_rate": 0.0004094876427997854, "loss": 2.8134, "step": 23328 }, { "epoch": 1.14, "grad_norm": 0.5457807779312134, "learning_rate": 0.00040947330843796016, "loss": 3.2202, "step": 23329 }, { "epoch": 1.14, "grad_norm": 0.6020430326461792, "learning_rate": 0.0004094589737878033, "loss": 3.1423, "step": 23330 }, { "epoch": 1.14, "grad_norm": 0.5497642159461975, "learning_rate": 0.00040944463884935256, "loss": 3.1488, "step": 23331 }, { "epoch": 1.14, "grad_norm": 0.5718896389007568, "learning_rate": 0.00040943030362264565, "loss": 3.162, "step": 23332 }, { "epoch": 1.14, "grad_norm": 0.5680598616600037, "learning_rate": 0.00040941596810772045, "loss": 3.1336, "step": 23333 }, { "epoch": 1.14, "grad_norm": 0.5549580454826355, "learning_rate": 0.0004094016323046147, "loss": 3.2387, "step": 23334 }, { "epoch": 1.14, "grad_norm": 0.586732029914856, "learning_rate": 0.00040938729621336605, "loss": 3.0624, "step": 23335 }, { "epoch": 1.14, "grad_norm": 0.5963259339332581, "learning_rate": 0.00040937295983401226, "loss": 2.9214, "step": 23336 }, { "epoch": 1.14, "grad_norm": 0.5552912354469299, "learning_rate": 0.00040935862316659116, "loss": 3.0501, "step": 23337 }, { "epoch": 1.14, "grad_norm": 0.509372889995575, "learning_rate": 0.0004093442862111406, "loss": 3.0564, "step": 23338 }, { "epoch": 1.14, "grad_norm": 0.5622632503509521, "learning_rate": 0.00040932994896769815, "loss": 3.0737, "step": 23339 }, { "epoch": 1.14, "grad_norm": 0.5708377361297607, "learning_rate": 0.0004093156114363016, "loss": 3.2455, "step": 23340 }, { "epoch": 1.14, "grad_norm": 0.5660071969032288, "learning_rate": 0.00040930127361698887, "loss": 3.1272, "step": 23341 }, { "epoch": 1.14, "grad_norm": 0.5480991005897522, "learning_rate": 0.00040928693550979753, "loss": 3.2418, "step": 23342 }, { "epoch": 1.14, "grad_norm": 0.560892641544342, "learning_rate": 0.0004092725971147655, "loss": 2.9972, "step": 23343 }, { "epoch": 1.14, "grad_norm": 0.5563073754310608, "learning_rate": 0.00040925825843193044, "loss": 3.082, "step": 23344 }, { "epoch": 1.14, "grad_norm": 0.5521158576011658, "learning_rate": 0.0004092439194613302, "loss": 3.0532, "step": 23345 }, { "epoch": 1.14, "grad_norm": 0.5784248113632202, "learning_rate": 0.0004092295802030025, "loss": 2.9597, "step": 23346 }, { "epoch": 1.14, "grad_norm": 0.551121175289154, "learning_rate": 0.00040921524065698505, "loss": 3.1059, "step": 23347 }, { "epoch": 1.14, "grad_norm": 0.5605080723762512, "learning_rate": 0.00040920090082331565, "loss": 3.1398, "step": 23348 }, { "epoch": 1.14, "grad_norm": 0.5794740319252014, "learning_rate": 0.00040918656070203224, "loss": 2.8233, "step": 23349 }, { "epoch": 1.14, "grad_norm": 0.5346746444702148, "learning_rate": 0.00040917222029317234, "loss": 3.1589, "step": 23350 }, { "epoch": 1.14, "grad_norm": 0.540032684803009, "learning_rate": 0.0004091578795967739, "loss": 3.0918, "step": 23351 }, { "epoch": 1.14, "grad_norm": 0.5193694233894348, "learning_rate": 0.00040914353861287446, "loss": 3.3181, "step": 23352 }, { "epoch": 1.14, "grad_norm": 0.5316674113273621, "learning_rate": 0.00040912919734151205, "loss": 3.1497, "step": 23353 }, { "epoch": 1.14, "grad_norm": 0.5241429805755615, "learning_rate": 0.00040911485578272433, "loss": 3.0704, "step": 23354 }, { "epoch": 1.14, "grad_norm": 0.5313575863838196, "learning_rate": 0.00040910051393654905, "loss": 3.2197, "step": 23355 }, { "epoch": 1.14, "grad_norm": 0.5644849538803101, "learning_rate": 0.00040908617180302403, "loss": 3.0536, "step": 23356 }, { "epoch": 1.14, "grad_norm": 0.5238354802131653, "learning_rate": 0.000409071829382187, "loss": 3.1429, "step": 23357 }, { "epoch": 1.14, "grad_norm": 0.5016031265258789, "learning_rate": 0.00040905748667407576, "loss": 3.2018, "step": 23358 }, { "epoch": 1.14, "grad_norm": 0.5598127245903015, "learning_rate": 0.00040904314367872814, "loss": 3.0267, "step": 23359 }, { "epoch": 1.14, "grad_norm": 0.557971715927124, "learning_rate": 0.0004090288003961819, "loss": 3.3368, "step": 23360 }, { "epoch": 1.14, "grad_norm": 0.5256807208061218, "learning_rate": 0.00040901445682647473, "loss": 3.044, "step": 23361 }, { "epoch": 1.14, "grad_norm": 0.5249261260032654, "learning_rate": 0.0004090001129696444, "loss": 3.1656, "step": 23362 }, { "epoch": 1.14, "grad_norm": 0.5444071888923645, "learning_rate": 0.0004089857688257289, "loss": 3.0243, "step": 23363 }, { "epoch": 1.15, "grad_norm": 0.5587050318717957, "learning_rate": 0.00040897142439476575, "loss": 3.3505, "step": 23364 }, { "epoch": 1.15, "grad_norm": 0.5856272578239441, "learning_rate": 0.00040895707967679283, "loss": 3.0021, "step": 23365 }, { "epoch": 1.15, "grad_norm": 0.5790854096412659, "learning_rate": 0.000408942734671848, "loss": 3.1403, "step": 23366 }, { "epoch": 1.15, "grad_norm": 0.53217613697052, "learning_rate": 0.00040892838937996894, "loss": 3.2079, "step": 23367 }, { "epoch": 1.15, "grad_norm": 0.5617000460624695, "learning_rate": 0.0004089140438011934, "loss": 2.9651, "step": 23368 }, { "epoch": 1.15, "grad_norm": 0.5272381901741028, "learning_rate": 0.0004088996979355593, "loss": 3.0486, "step": 23369 }, { "epoch": 1.15, "grad_norm": 0.543768048286438, "learning_rate": 0.0004088853517831044, "loss": 3.0786, "step": 23370 }, { "epoch": 1.15, "grad_norm": 0.5788832902908325, "learning_rate": 0.0004088710053438664, "loss": 3.0349, "step": 23371 }, { "epoch": 1.15, "grad_norm": 0.5837016105651855, "learning_rate": 0.000408856658617883, "loss": 3.0429, "step": 23372 }, { "epoch": 1.15, "grad_norm": 0.5341154336929321, "learning_rate": 0.00040884231160519225, "loss": 2.9318, "step": 23373 }, { "epoch": 1.15, "grad_norm": 0.5163646936416626, "learning_rate": 0.0004088279643058318, "loss": 3.2756, "step": 23374 }, { "epoch": 1.15, "grad_norm": 0.5284332036972046, "learning_rate": 0.00040881361671983937, "loss": 3.1314, "step": 23375 }, { "epoch": 1.15, "grad_norm": 0.5456615686416626, "learning_rate": 0.0004087992688472529, "loss": 2.9555, "step": 23376 }, { "epoch": 1.15, "grad_norm": 0.5189909338951111, "learning_rate": 0.00040878492068811004, "loss": 2.9664, "step": 23377 }, { "epoch": 1.15, "grad_norm": 0.5077756643295288, "learning_rate": 0.0004087705722424486, "loss": 3.0454, "step": 23378 }, { "epoch": 1.15, "grad_norm": 0.5315677523612976, "learning_rate": 0.0004087562235103065, "loss": 3.1714, "step": 23379 }, { "epoch": 1.15, "grad_norm": 0.5543997883796692, "learning_rate": 0.00040874187449172134, "loss": 3.0133, "step": 23380 }, { "epoch": 1.15, "grad_norm": 0.560238242149353, "learning_rate": 0.0004087275251867311, "loss": 3.0748, "step": 23381 }, { "epoch": 1.15, "grad_norm": 0.5396565198898315, "learning_rate": 0.0004087131755953734, "loss": 3.253, "step": 23382 }, { "epoch": 1.15, "grad_norm": 0.5551555156707764, "learning_rate": 0.0004086988257176861, "loss": 2.9754, "step": 23383 }, { "epoch": 1.15, "grad_norm": 0.5715532302856445, "learning_rate": 0.00040868447555370707, "loss": 3.0113, "step": 23384 }, { "epoch": 1.15, "grad_norm": 0.5877143144607544, "learning_rate": 0.0004086701251034741, "loss": 3.0628, "step": 23385 }, { "epoch": 1.15, "grad_norm": 0.547389566898346, "learning_rate": 0.00040865577436702483, "loss": 3.0458, "step": 23386 }, { "epoch": 1.15, "grad_norm": 0.5390537977218628, "learning_rate": 0.00040864142334439725, "loss": 3.17, "step": 23387 }, { "epoch": 1.15, "grad_norm": 0.5335311889648438, "learning_rate": 0.000408627072035629, "loss": 2.9859, "step": 23388 }, { "epoch": 1.15, "grad_norm": 0.5414642691612244, "learning_rate": 0.00040861272044075803, "loss": 3.0252, "step": 23389 }, { "epoch": 1.15, "grad_norm": 0.5587677359580994, "learning_rate": 0.00040859836855982196, "loss": 3.0681, "step": 23390 }, { "epoch": 1.15, "grad_norm": 0.6065161824226379, "learning_rate": 0.0004085840163928588, "loss": 2.8998, "step": 23391 }, { "epoch": 1.15, "grad_norm": 0.5559492111206055, "learning_rate": 0.0004085696639399061, "loss": 3.1246, "step": 23392 }, { "epoch": 1.15, "grad_norm": 0.5463036894798279, "learning_rate": 0.0004085553112010019, "loss": 3.0655, "step": 23393 }, { "epoch": 1.15, "grad_norm": 0.5573052167892456, "learning_rate": 0.00040854095817618384, "loss": 2.826, "step": 23394 }, { "epoch": 1.15, "grad_norm": 0.5249746441841125, "learning_rate": 0.0004085266048654899, "loss": 3.0882, "step": 23395 }, { "epoch": 1.15, "grad_norm": 0.5521349906921387, "learning_rate": 0.00040851225126895766, "loss": 2.9564, "step": 23396 }, { "epoch": 1.15, "grad_norm": 0.550536572933197, "learning_rate": 0.00040849789738662504, "loss": 2.9643, "step": 23397 }, { "epoch": 1.15, "grad_norm": 0.5547955632209778, "learning_rate": 0.00040848354321852985, "loss": 3.2019, "step": 23398 }, { "epoch": 1.15, "grad_norm": 0.5499030351638794, "learning_rate": 0.00040846918876470987, "loss": 3.1381, "step": 23399 }, { "epoch": 1.15, "grad_norm": 0.548309326171875, "learning_rate": 0.000408454834025203, "loss": 3.0595, "step": 23400 }, { "epoch": 1.15, "grad_norm": 0.5371249914169312, "learning_rate": 0.0004084404790000469, "loss": 2.9364, "step": 23401 }, { "epoch": 1.15, "grad_norm": 0.5594949722290039, "learning_rate": 0.00040842612368927945, "loss": 3.1443, "step": 23402 }, { "epoch": 1.15, "grad_norm": 0.5433090329170227, "learning_rate": 0.0004084117680929385, "loss": 2.9709, "step": 23403 }, { "epoch": 1.15, "grad_norm": 0.5581390261650085, "learning_rate": 0.0004083974122110618, "loss": 2.9891, "step": 23404 }, { "epoch": 1.15, "grad_norm": 0.540836751461029, "learning_rate": 0.0004083830560436871, "loss": 3.2713, "step": 23405 }, { "epoch": 1.15, "grad_norm": 0.5747362971305847, "learning_rate": 0.00040836869959085236, "loss": 3.0827, "step": 23406 }, { "epoch": 1.15, "grad_norm": 0.5611171126365662, "learning_rate": 0.0004083543428525954, "loss": 3.1278, "step": 23407 }, { "epoch": 1.15, "grad_norm": 0.5559061765670776, "learning_rate": 0.0004083399858289538, "loss": 3.0086, "step": 23408 }, { "epoch": 1.15, "grad_norm": 0.5641838908195496, "learning_rate": 0.0004083256285199656, "loss": 3.1977, "step": 23409 }, { "epoch": 1.15, "grad_norm": 0.5294587016105652, "learning_rate": 0.0004083112709256685, "loss": 3.0881, "step": 23410 }, { "epoch": 1.15, "grad_norm": 0.5804036259651184, "learning_rate": 0.0004082969130461005, "loss": 2.8935, "step": 23411 }, { "epoch": 1.15, "grad_norm": 0.5449517369270325, "learning_rate": 0.00040828255488129916, "loss": 3.2973, "step": 23412 }, { "epoch": 1.15, "grad_norm": 0.5502049326896667, "learning_rate": 0.00040826819643130236, "loss": 3.2441, "step": 23413 }, { "epoch": 1.15, "grad_norm": 0.5578358769416809, "learning_rate": 0.0004082538376961481, "loss": 3.2078, "step": 23414 }, { "epoch": 1.15, "grad_norm": 0.5473148822784424, "learning_rate": 0.00040823947867587397, "loss": 3.1273, "step": 23415 }, { "epoch": 1.15, "grad_norm": 0.6118950843811035, "learning_rate": 0.00040822511937051793, "loss": 3.0009, "step": 23416 }, { "epoch": 1.15, "grad_norm": 0.5265421867370605, "learning_rate": 0.0004082107597801177, "loss": 3.1703, "step": 23417 }, { "epoch": 1.15, "grad_norm": 0.5736425518989563, "learning_rate": 0.0004081963999047112, "loss": 3.0401, "step": 23418 }, { "epoch": 1.15, "grad_norm": 0.572830080986023, "learning_rate": 0.00040818203974433623, "loss": 3.2007, "step": 23419 }, { "epoch": 1.15, "grad_norm": 0.5175371170043945, "learning_rate": 0.0004081676792990305, "loss": 3.3056, "step": 23420 }, { "epoch": 1.15, "grad_norm": 0.5525279641151428, "learning_rate": 0.000408153318568832, "loss": 2.9136, "step": 23421 }, { "epoch": 1.15, "grad_norm": 0.5556487441062927, "learning_rate": 0.00040813895755377846, "loss": 3.2804, "step": 23422 }, { "epoch": 1.15, "grad_norm": 0.5376157164573669, "learning_rate": 0.00040812459625390774, "loss": 3.0081, "step": 23423 }, { "epoch": 1.15, "grad_norm": 0.5336998701095581, "learning_rate": 0.0004081102346692575, "loss": 3.0859, "step": 23424 }, { "epoch": 1.15, "grad_norm": 0.5490345358848572, "learning_rate": 0.0004080958727998659, "loss": 3.1922, "step": 23425 }, { "epoch": 1.15, "grad_norm": 0.5638200044631958, "learning_rate": 0.0004080815106457705, "loss": 3.2863, "step": 23426 }, { "epoch": 1.15, "grad_norm": 0.5599801540374756, "learning_rate": 0.0004080671482070092, "loss": 3.0831, "step": 23427 }, { "epoch": 1.15, "grad_norm": 0.5768230557441711, "learning_rate": 0.0004080527854836198, "loss": 3.0359, "step": 23428 }, { "epoch": 1.15, "grad_norm": 0.591766357421875, "learning_rate": 0.00040803842247564014, "loss": 2.9248, "step": 23429 }, { "epoch": 1.15, "grad_norm": 0.5740898847579956, "learning_rate": 0.0004080240591831081, "loss": 3.3461, "step": 23430 }, { "epoch": 1.15, "grad_norm": 0.6085920929908752, "learning_rate": 0.0004080096956060615, "loss": 2.907, "step": 23431 }, { "epoch": 1.15, "grad_norm": 0.5598239898681641, "learning_rate": 0.00040799533174453806, "loss": 3.2174, "step": 23432 }, { "epoch": 1.15, "grad_norm": 0.5733321905136108, "learning_rate": 0.00040798096759857587, "loss": 3.2491, "step": 23433 }, { "epoch": 1.15, "grad_norm": 0.5727006793022156, "learning_rate": 0.00040796660316821243, "loss": 3.1616, "step": 23434 }, { "epoch": 1.15, "grad_norm": 0.5333359241485596, "learning_rate": 0.00040795223845348574, "loss": 3.3976, "step": 23435 }, { "epoch": 1.15, "grad_norm": 0.57224440574646, "learning_rate": 0.00040793787345443376, "loss": 3.1264, "step": 23436 }, { "epoch": 1.15, "grad_norm": 0.5239753723144531, "learning_rate": 0.0004079235081710941, "loss": 2.9847, "step": 23437 }, { "epoch": 1.15, "grad_norm": 0.5496913194656372, "learning_rate": 0.0004079091426035047, "loss": 2.9158, "step": 23438 }, { "epoch": 1.15, "grad_norm": 0.5474773049354553, "learning_rate": 0.0004078947767517034, "loss": 2.9988, "step": 23439 }, { "epoch": 1.15, "grad_norm": 0.5493378639221191, "learning_rate": 0.000407880410615728, "loss": 3.0624, "step": 23440 }, { "epoch": 1.15, "grad_norm": 0.5190552473068237, "learning_rate": 0.0004078660441956164, "loss": 3.2048, "step": 23441 }, { "epoch": 1.15, "grad_norm": 0.5590404272079468, "learning_rate": 0.0004078516774914064, "loss": 3.277, "step": 23442 }, { "epoch": 1.15, "grad_norm": 0.5510579347610474, "learning_rate": 0.00040783731050313577, "loss": 3.0241, "step": 23443 }, { "epoch": 1.15, "grad_norm": 0.534829318523407, "learning_rate": 0.0004078229432308425, "loss": 3.1232, "step": 23444 }, { "epoch": 1.15, "grad_norm": 0.5541685223579407, "learning_rate": 0.0004078085756745643, "loss": 3.0511, "step": 23445 }, { "epoch": 1.15, "grad_norm": 0.5376542806625366, "learning_rate": 0.0004077942078343391, "loss": 3.2308, "step": 23446 }, { "epoch": 1.15, "grad_norm": 0.5467217564582825, "learning_rate": 0.00040777983971020473, "loss": 3.0171, "step": 23447 }, { "epoch": 1.15, "grad_norm": 0.5329676866531372, "learning_rate": 0.0004077654713021989, "loss": 3.0905, "step": 23448 }, { "epoch": 1.15, "grad_norm": 0.5383985638618469, "learning_rate": 0.0004077511026103596, "loss": 2.947, "step": 23449 }, { "epoch": 1.15, "grad_norm": 0.5557945966720581, "learning_rate": 0.00040773673363472465, "loss": 3.0836, "step": 23450 }, { "epoch": 1.15, "grad_norm": 0.5461722612380981, "learning_rate": 0.0004077223643753319, "loss": 3.061, "step": 23451 }, { "epoch": 1.15, "grad_norm": 0.5956500172615051, "learning_rate": 0.00040770799483221914, "loss": 3.1172, "step": 23452 }, { "epoch": 1.15, "grad_norm": 0.5400736331939697, "learning_rate": 0.00040769362500542425, "loss": 3.1167, "step": 23453 }, { "epoch": 1.15, "grad_norm": 0.5344555377960205, "learning_rate": 0.0004076792548949851, "loss": 3.1462, "step": 23454 }, { "epoch": 1.15, "grad_norm": 0.5609763860702515, "learning_rate": 0.00040766488450093947, "loss": 3.0945, "step": 23455 }, { "epoch": 1.15, "grad_norm": 0.5006883144378662, "learning_rate": 0.00040765051382332527, "loss": 3.1908, "step": 23456 }, { "epoch": 1.15, "grad_norm": 0.566444456577301, "learning_rate": 0.0004076361428621804, "loss": 3.2382, "step": 23457 }, { "epoch": 1.15, "grad_norm": 0.5385657548904419, "learning_rate": 0.00040762177161754264, "loss": 2.8872, "step": 23458 }, { "epoch": 1.15, "grad_norm": 0.5454058647155762, "learning_rate": 0.00040760740008944973, "loss": 2.94, "step": 23459 }, { "epoch": 1.15, "grad_norm": 0.5491828322410583, "learning_rate": 0.00040759302827793965, "loss": 3.0851, "step": 23460 }, { "epoch": 1.15, "grad_norm": 0.5193360447883606, "learning_rate": 0.0004075786561830503, "loss": 2.9652, "step": 23461 }, { "epoch": 1.15, "grad_norm": 0.534925103187561, "learning_rate": 0.0004075642838048195, "loss": 3.0946, "step": 23462 }, { "epoch": 1.15, "grad_norm": 0.5688135027885437, "learning_rate": 0.00040754991114328506, "loss": 2.9249, "step": 23463 }, { "epoch": 1.15, "grad_norm": 0.5361714959144592, "learning_rate": 0.00040753553819848485, "loss": 3.122, "step": 23464 }, { "epoch": 1.15, "grad_norm": 0.5302151441574097, "learning_rate": 0.0004075211649704568, "loss": 2.9271, "step": 23465 }, { "epoch": 1.15, "grad_norm": 0.5387076735496521, "learning_rate": 0.0004075067914592385, "loss": 3.0661, "step": 23466 }, { "epoch": 1.15, "grad_norm": 0.5546697974205017, "learning_rate": 0.0004074924176648681, "loss": 3.1147, "step": 23467 }, { "epoch": 1.15, "grad_norm": 0.5648727416992188, "learning_rate": 0.0004074780435873834, "loss": 2.842, "step": 23468 }, { "epoch": 1.15, "grad_norm": 0.5225715637207031, "learning_rate": 0.00040746366922682223, "loss": 3.1203, "step": 23469 }, { "epoch": 1.15, "grad_norm": 0.5498008728027344, "learning_rate": 0.00040744929458322234, "loss": 3.1043, "step": 23470 }, { "epoch": 1.15, "grad_norm": 0.5694062113761902, "learning_rate": 0.0004074349196566217, "loss": 3.1744, "step": 23471 }, { "epoch": 1.15, "grad_norm": 0.5407145023345947, "learning_rate": 0.0004074205444470582, "loss": 3.1199, "step": 23472 }, { "epoch": 1.15, "grad_norm": 0.5782432556152344, "learning_rate": 0.00040740616895456976, "loss": 3.1877, "step": 23473 }, { "epoch": 1.15, "grad_norm": 0.5868061184883118, "learning_rate": 0.00040739179317919404, "loss": 3.298, "step": 23474 }, { "epoch": 1.15, "grad_norm": 0.5635443329811096, "learning_rate": 0.00040737741712096895, "loss": 3.4142, "step": 23475 }, { "epoch": 1.15, "grad_norm": 0.5290628671646118, "learning_rate": 0.00040736304077993254, "loss": 3.034, "step": 23476 }, { "epoch": 1.15, "grad_norm": 0.5918185710906982, "learning_rate": 0.00040734866415612244, "loss": 3.0493, "step": 23477 }, { "epoch": 1.15, "grad_norm": 0.5599088668823242, "learning_rate": 0.00040733428724957664, "loss": 3.263, "step": 23478 }, { "epoch": 1.15, "grad_norm": 0.5094558596611023, "learning_rate": 0.000407319910060333, "loss": 3.1001, "step": 23479 }, { "epoch": 1.15, "grad_norm": 0.6165416836738586, "learning_rate": 0.0004073055325884293, "loss": 3.0571, "step": 23480 }, { "epoch": 1.15, "grad_norm": 0.5464115142822266, "learning_rate": 0.00040729115483390357, "loss": 3.1606, "step": 23481 }, { "epoch": 1.15, "grad_norm": 0.5529763698577881, "learning_rate": 0.00040727677679679354, "loss": 3.0281, "step": 23482 }, { "epoch": 1.15, "grad_norm": 0.5393357276916504, "learning_rate": 0.00040726239847713717, "loss": 3.0014, "step": 23483 }, { "epoch": 1.15, "grad_norm": 0.5649782419204712, "learning_rate": 0.00040724801987497236, "loss": 2.9159, "step": 23484 }, { "epoch": 1.15, "grad_norm": 0.5705831050872803, "learning_rate": 0.0004072336409903367, "loss": 3.0978, "step": 23485 }, { "epoch": 1.15, "grad_norm": 0.5222892165184021, "learning_rate": 0.0004072192618232683, "loss": 3.0826, "step": 23486 }, { "epoch": 1.15, "grad_norm": 0.5631487965583801, "learning_rate": 0.0004072048823738052, "loss": 3.0558, "step": 23487 }, { "epoch": 1.15, "grad_norm": 0.5665938258171082, "learning_rate": 0.00040719050264198493, "loss": 3.2, "step": 23488 }, { "epoch": 1.15, "grad_norm": 0.5198308825492859, "learning_rate": 0.0004071761226278455, "loss": 2.922, "step": 23489 }, { "epoch": 1.15, "grad_norm": 0.5553904175758362, "learning_rate": 0.00040716174233142475, "loss": 3.1187, "step": 23490 }, { "epoch": 1.15, "grad_norm": 0.5327962636947632, "learning_rate": 0.00040714736175276073, "loss": 3.0355, "step": 23491 }, { "epoch": 1.15, "grad_norm": 0.5585110187530518, "learning_rate": 0.0004071329808918911, "loss": 3.0713, "step": 23492 }, { "epoch": 1.15, "grad_norm": 0.5731109976768494, "learning_rate": 0.0004071185997488538, "loss": 3.1877, "step": 23493 }, { "epoch": 1.15, "grad_norm": 0.5556461215019226, "learning_rate": 0.0004071042183236867, "loss": 2.8029, "step": 23494 }, { "epoch": 1.15, "grad_norm": 0.5197128653526306, "learning_rate": 0.00040708983661642785, "loss": 3.1096, "step": 23495 }, { "epoch": 1.15, "grad_norm": 0.5375272035598755, "learning_rate": 0.00040707545462711483, "loss": 3.1094, "step": 23496 }, { "epoch": 1.15, "grad_norm": 0.5286567807197571, "learning_rate": 0.00040706107235578565, "loss": 2.863, "step": 23497 }, { "epoch": 1.15, "grad_norm": 0.5974603891372681, "learning_rate": 0.00040704668980247837, "loss": 2.7441, "step": 23498 }, { "epoch": 1.15, "grad_norm": 0.5531563758850098, "learning_rate": 0.0004070323069672306, "loss": 2.9755, "step": 23499 }, { "epoch": 1.15, "grad_norm": 0.518351137638092, "learning_rate": 0.00040701792385008034, "loss": 3.214, "step": 23500 }, { "epoch": 1.15, "grad_norm": 0.5526236891746521, "learning_rate": 0.00040700354045106543, "loss": 2.9767, "step": 23501 }, { "epoch": 1.15, "grad_norm": 0.5635164976119995, "learning_rate": 0.0004069891567702238, "loss": 3.2102, "step": 23502 }, { "epoch": 1.15, "grad_norm": 0.5762778520584106, "learning_rate": 0.00040697477280759336, "loss": 3.1312, "step": 23503 }, { "epoch": 1.15, "grad_norm": 0.5641893148422241, "learning_rate": 0.000406960388563212, "loss": 3.2119, "step": 23504 }, { "epoch": 1.15, "grad_norm": 0.5612379908561707, "learning_rate": 0.0004069460040371175, "loss": 3.1178, "step": 23505 }, { "epoch": 1.15, "grad_norm": 0.5747913718223572, "learning_rate": 0.0004069316192293478, "loss": 3.3066, "step": 23506 }, { "epoch": 1.15, "grad_norm": 0.5277937650680542, "learning_rate": 0.0004069172341399408, "loss": 3.2138, "step": 23507 }, { "epoch": 1.15, "grad_norm": 0.5456346869468689, "learning_rate": 0.0004069028487689344, "loss": 3.0695, "step": 23508 }, { "epoch": 1.15, "grad_norm": 0.5213927626609802, "learning_rate": 0.00040688846311636654, "loss": 3.2051, "step": 23509 }, { "epoch": 1.15, "grad_norm": 0.5669615268707275, "learning_rate": 0.00040687407718227494, "loss": 2.951, "step": 23510 }, { "epoch": 1.15, "grad_norm": 0.5207639336585999, "learning_rate": 0.0004068596909666975, "loss": 3.2213, "step": 23511 }, { "epoch": 1.15, "grad_norm": 0.5501463413238525, "learning_rate": 0.0004068453044696724, "loss": 3.1305, "step": 23512 }, { "epoch": 1.15, "grad_norm": 0.5365391969680786, "learning_rate": 0.00040683091769123724, "loss": 2.9962, "step": 23513 }, { "epoch": 1.15, "grad_norm": 0.514644980430603, "learning_rate": 0.00040681653063143, "loss": 3.0177, "step": 23514 }, { "epoch": 1.15, "grad_norm": 0.5442690849304199, "learning_rate": 0.0004068021432902886, "loss": 3.1254, "step": 23515 }, { "epoch": 1.15, "grad_norm": 0.508587121963501, "learning_rate": 0.00040678775566785086, "loss": 3.0467, "step": 23516 }, { "epoch": 1.15, "grad_norm": 0.5336624383926392, "learning_rate": 0.00040677336776415477, "loss": 3.0867, "step": 23517 }, { "epoch": 1.15, "grad_norm": 0.5563424229621887, "learning_rate": 0.00040675897957923816, "loss": 3.1045, "step": 23518 }, { "epoch": 1.15, "grad_norm": 0.5636367797851562, "learning_rate": 0.0004067445911131389, "loss": 3.4081, "step": 23519 }, { "epoch": 1.15, "grad_norm": 0.5157569050788879, "learning_rate": 0.00040673020236589504, "loss": 3.1305, "step": 23520 }, { "epoch": 1.15, "grad_norm": 0.5343283414840698, "learning_rate": 0.00040671581333754427, "loss": 3.1285, "step": 23521 }, { "epoch": 1.15, "grad_norm": 0.5524337291717529, "learning_rate": 0.00040670142402812455, "loss": 3.1464, "step": 23522 }, { "epoch": 1.15, "grad_norm": 0.638904333114624, "learning_rate": 0.0004066870344376739, "loss": 2.9966, "step": 23523 }, { "epoch": 1.15, "grad_norm": 0.5390263199806213, "learning_rate": 0.00040667264456623016, "loss": 3.2796, "step": 23524 }, { "epoch": 1.15, "grad_norm": 0.588818371295929, "learning_rate": 0.0004066582544138312, "loss": 3.1716, "step": 23525 }, { "epoch": 1.15, "grad_norm": 0.6020216345787048, "learning_rate": 0.00040664386398051483, "loss": 3.2594, "step": 23526 }, { "epoch": 1.15, "grad_norm": 0.5486242771148682, "learning_rate": 0.00040662947326631913, "loss": 3.0998, "step": 23527 }, { "epoch": 1.15, "grad_norm": 0.5452220439910889, "learning_rate": 0.0004066150822712819, "loss": 2.853, "step": 23528 }, { "epoch": 1.15, "grad_norm": 0.5223864912986755, "learning_rate": 0.000406600690995441, "loss": 3.1148, "step": 23529 }, { "epoch": 1.15, "grad_norm": 0.5381213426589966, "learning_rate": 0.00040658629943883447, "loss": 3.2326, "step": 23530 }, { "epoch": 1.15, "grad_norm": 0.5543666481971741, "learning_rate": 0.00040657190760150013, "loss": 3.2087, "step": 23531 }, { "epoch": 1.15, "grad_norm": 0.5769178867340088, "learning_rate": 0.00040655751548347583, "loss": 3.337, "step": 23532 }, { "epoch": 1.15, "grad_norm": 0.5365214347839355, "learning_rate": 0.0004065431230847996, "loss": 3.0142, "step": 23533 }, { "epoch": 1.15, "grad_norm": 0.5313071012496948, "learning_rate": 0.00040652873040550926, "loss": 3.0712, "step": 23534 }, { "epoch": 1.15, "grad_norm": 0.6116557121276855, "learning_rate": 0.0004065143374456429, "loss": 3.0255, "step": 23535 }, { "epoch": 1.15, "grad_norm": 0.5586936473846436, "learning_rate": 0.0004064999442052381, "loss": 2.9973, "step": 23536 }, { "epoch": 1.15, "grad_norm": 0.5303226709365845, "learning_rate": 0.00040648555068433293, "loss": 3.2365, "step": 23537 }, { "epoch": 1.15, "grad_norm": 0.5624071359634399, "learning_rate": 0.0004064711568829654, "loss": 3.0904, "step": 23538 }, { "epoch": 1.15, "grad_norm": 0.5486844778060913, "learning_rate": 0.00040645676280117327, "loss": 3.0888, "step": 23539 }, { "epoch": 1.15, "grad_norm": 0.5096597671508789, "learning_rate": 0.00040644236843899456, "loss": 3.0044, "step": 23540 }, { "epoch": 1.15, "grad_norm": 0.5384427905082703, "learning_rate": 0.00040642797379646713, "loss": 3.0459, "step": 23541 }, { "epoch": 1.15, "grad_norm": 0.551108181476593, "learning_rate": 0.0004064135788736289, "loss": 3.2747, "step": 23542 }, { "epoch": 1.15, "grad_norm": 0.5435267090797424, "learning_rate": 0.00040639918367051776, "loss": 3.2177, "step": 23543 }, { "epoch": 1.15, "grad_norm": 0.5716750621795654, "learning_rate": 0.00040638478818717165, "loss": 3.2905, "step": 23544 }, { "epoch": 1.15, "grad_norm": 0.5734542608261108, "learning_rate": 0.0004063703924236285, "loss": 3.4277, "step": 23545 }, { "epoch": 1.15, "grad_norm": 0.5500821471214294, "learning_rate": 0.0004063559963799262, "loss": 3.1241, "step": 23546 }, { "epoch": 1.15, "grad_norm": 0.6079295873641968, "learning_rate": 0.00040634160005610263, "loss": 2.9869, "step": 23547 }, { "epoch": 1.15, "grad_norm": 0.5442874431610107, "learning_rate": 0.00040632720345219585, "loss": 3.1278, "step": 23548 }, { "epoch": 1.15, "grad_norm": 0.5300388336181641, "learning_rate": 0.00040631280656824365, "loss": 3.0279, "step": 23549 }, { "epoch": 1.15, "grad_norm": 0.5295524597167969, "learning_rate": 0.000406298409404284, "loss": 3.2271, "step": 23550 }, { "epoch": 1.15, "grad_norm": 0.5851762890815735, "learning_rate": 0.0004062840119603547, "loss": 3.0275, "step": 23551 }, { "epoch": 1.15, "grad_norm": 0.5763968825340271, "learning_rate": 0.00040626961423649376, "loss": 3.0724, "step": 23552 }, { "epoch": 1.15, "grad_norm": 0.534521758556366, "learning_rate": 0.0004062552162327393, "loss": 3.0856, "step": 23553 }, { "epoch": 1.15, "grad_norm": 0.5817490220069885, "learning_rate": 0.0004062408179491288, "loss": 3.1984, "step": 23554 }, { "epoch": 1.15, "grad_norm": 0.548794686794281, "learning_rate": 0.0004062264193857006, "loss": 3.0361, "step": 23555 }, { "epoch": 1.15, "grad_norm": 0.5278934240341187, "learning_rate": 0.0004062120205424924, "loss": 2.9397, "step": 23556 }, { "epoch": 1.15, "grad_norm": 0.5557737350463867, "learning_rate": 0.0004061976214195422, "loss": 3.0761, "step": 23557 }, { "epoch": 1.15, "grad_norm": 0.6352978944778442, "learning_rate": 0.0004061832220168879, "loss": 3.2706, "step": 23558 }, { "epoch": 1.15, "grad_norm": 0.5525439381599426, "learning_rate": 0.00040616882233456735, "loss": 3.1981, "step": 23559 }, { "epoch": 1.15, "grad_norm": 0.5778921246528625, "learning_rate": 0.0004061544223726187, "loss": 3.0622, "step": 23560 }, { "epoch": 1.15, "grad_norm": 0.5746831297874451, "learning_rate": 0.0004061400221310796, "loss": 3.335, "step": 23561 }, { "epoch": 1.15, "grad_norm": 0.5181114077568054, "learning_rate": 0.00040612562160998817, "loss": 3.0855, "step": 23562 }, { "epoch": 1.15, "grad_norm": 0.5968653559684753, "learning_rate": 0.00040611122080938224, "loss": 3.1416, "step": 23563 }, { "epoch": 1.15, "grad_norm": 0.5655339956283569, "learning_rate": 0.0004060968197292999, "loss": 3.0138, "step": 23564 }, { "epoch": 1.15, "grad_norm": 0.5550764203071594, "learning_rate": 0.00040608241836977886, "loss": 3.0035, "step": 23565 }, { "epoch": 1.15, "grad_norm": 0.5673668384552002, "learning_rate": 0.0004060680167308571, "loss": 3.0248, "step": 23566 }, { "epoch": 1.15, "grad_norm": 0.5502573251724243, "learning_rate": 0.0004060536148125726, "loss": 3.3946, "step": 23567 }, { "epoch": 1.16, "grad_norm": 0.5763288140296936, "learning_rate": 0.00040603921261496336, "loss": 3.1626, "step": 23568 }, { "epoch": 1.16, "grad_norm": 0.5315199494361877, "learning_rate": 0.0004060248101380672, "loss": 2.9473, "step": 23569 }, { "epoch": 1.16, "grad_norm": 0.5365606546401978, "learning_rate": 0.00040601040738192214, "loss": 3.0127, "step": 23570 }, { "epoch": 1.16, "grad_norm": 0.5493980050086975, "learning_rate": 0.000405996004346566, "loss": 2.8699, "step": 23571 }, { "epoch": 1.16, "grad_norm": 0.5779330730438232, "learning_rate": 0.0004059816010320369, "loss": 3.0912, "step": 23572 }, { "epoch": 1.16, "grad_norm": 0.5333728790283203, "learning_rate": 0.00040596719743837253, "loss": 3.2728, "step": 23573 }, { "epoch": 1.16, "grad_norm": 0.5707440376281738, "learning_rate": 0.0004059527935656109, "loss": 3.0194, "step": 23574 }, { "epoch": 1.16, "grad_norm": 0.5592421293258667, "learning_rate": 0.0004059383894137902, "loss": 2.8304, "step": 23575 }, { "epoch": 1.16, "grad_norm": 0.5238040089607239, "learning_rate": 0.0004059239849829481, "loss": 2.9382, "step": 23576 }, { "epoch": 1.16, "grad_norm": 0.5463867783546448, "learning_rate": 0.00040590958027312255, "loss": 3.3477, "step": 23577 }, { "epoch": 1.16, "grad_norm": 0.5499131083488464, "learning_rate": 0.0004058951752843516, "loss": 3.274, "step": 23578 }, { "epoch": 1.16, "grad_norm": 0.5574745535850525, "learning_rate": 0.0004058807700166731, "loss": 2.9782, "step": 23579 }, { "epoch": 1.16, "grad_norm": 0.5117610692977905, "learning_rate": 0.0004058663644701251, "loss": 3.0166, "step": 23580 }, { "epoch": 1.16, "grad_norm": 0.5790355205535889, "learning_rate": 0.00040585195864474544, "loss": 3.2326, "step": 23581 }, { "epoch": 1.16, "grad_norm": 0.5722032785415649, "learning_rate": 0.00040583755254057204, "loss": 3.116, "step": 23582 }, { "epoch": 1.16, "grad_norm": 0.5745276212692261, "learning_rate": 0.00040582314615764293, "loss": 3.0388, "step": 23583 }, { "epoch": 1.16, "grad_norm": 0.5759443640708923, "learning_rate": 0.000405808739495996, "loss": 2.9188, "step": 23584 }, { "epoch": 1.16, "grad_norm": 0.5403763651847839, "learning_rate": 0.00040579433255566927, "loss": 3.0228, "step": 23585 }, { "epoch": 1.16, "grad_norm": 0.5597376227378845, "learning_rate": 0.00040577992533670065, "loss": 3.3179, "step": 23586 }, { "epoch": 1.16, "grad_norm": 0.565265417098999, "learning_rate": 0.000405765517839128, "loss": 3.2375, "step": 23587 }, { "epoch": 1.16, "grad_norm": 0.584064781665802, "learning_rate": 0.00040575111006298925, "loss": 2.8922, "step": 23588 }, { "epoch": 1.16, "grad_norm": 0.5477665066719055, "learning_rate": 0.00040573670200832253, "loss": 3.0098, "step": 23589 }, { "epoch": 1.16, "grad_norm": 0.5629181265830994, "learning_rate": 0.0004057222936751657, "loss": 3.0527, "step": 23590 }, { "epoch": 1.16, "grad_norm": 0.5321118235588074, "learning_rate": 0.0004057078850635567, "loss": 3.1828, "step": 23591 }, { "epoch": 1.16, "grad_norm": 0.5479146242141724, "learning_rate": 0.00040569347617353343, "loss": 3.1211, "step": 23592 }, { "epoch": 1.16, "grad_norm": 0.5773611068725586, "learning_rate": 0.0004056790670051339, "loss": 2.9645, "step": 23593 }, { "epoch": 1.16, "grad_norm": 0.5333780646324158, "learning_rate": 0.000405664657558396, "loss": 3.1218, "step": 23594 }, { "epoch": 1.16, "grad_norm": 0.5267741680145264, "learning_rate": 0.00040565024783335775, "loss": 3.0897, "step": 23595 }, { "epoch": 1.16, "grad_norm": 0.5395077466964722, "learning_rate": 0.00040563583783005707, "loss": 3.0424, "step": 23596 }, { "epoch": 1.16, "grad_norm": 0.562576949596405, "learning_rate": 0.00040562142754853206, "loss": 3.3082, "step": 23597 }, { "epoch": 1.16, "grad_norm": 0.5555576682090759, "learning_rate": 0.00040560701698882034, "loss": 3.0102, "step": 23598 }, { "epoch": 1.16, "grad_norm": 0.5478713512420654, "learning_rate": 0.0004055926061509602, "loss": 2.9769, "step": 23599 }, { "epoch": 1.16, "grad_norm": 0.5155161619186401, "learning_rate": 0.00040557819503498943, "loss": 3.0548, "step": 23600 }, { "epoch": 1.16, "grad_norm": 0.563854455947876, "learning_rate": 0.0004055637836409459, "loss": 2.9703, "step": 23601 }, { "epoch": 1.16, "grad_norm": 0.5164005756378174, "learning_rate": 0.0004055493719688678, "loss": 3.032, "step": 23602 }, { "epoch": 1.16, "grad_norm": 0.6590232849121094, "learning_rate": 0.00040553496001879296, "loss": 3.1402, "step": 23603 }, { "epoch": 1.16, "grad_norm": 0.5741829872131348, "learning_rate": 0.00040552054779075935, "loss": 2.7843, "step": 23604 }, { "epoch": 1.16, "grad_norm": 0.5617377758026123, "learning_rate": 0.00040550613528480493, "loss": 3.1812, "step": 23605 }, { "epoch": 1.16, "grad_norm": 0.52630615234375, "learning_rate": 0.0004054917225009676, "loss": 3.1588, "step": 23606 }, { "epoch": 1.16, "grad_norm": 0.5488269329071045, "learning_rate": 0.0004054773094392854, "loss": 3.2574, "step": 23607 }, { "epoch": 1.16, "grad_norm": 0.7178289294242859, "learning_rate": 0.00040546289609979624, "loss": 2.9536, "step": 23608 }, { "epoch": 1.16, "grad_norm": 0.5843186974525452, "learning_rate": 0.0004054484824825382, "loss": 3.1758, "step": 23609 }, { "epoch": 1.16, "grad_norm": 0.6291219592094421, "learning_rate": 0.00040543406858754903, "loss": 3.0335, "step": 23610 }, { "epoch": 1.16, "grad_norm": 0.5768383741378784, "learning_rate": 0.0004054196544148669, "loss": 2.8571, "step": 23611 }, { "epoch": 1.16, "grad_norm": 0.5430237054824829, "learning_rate": 0.0004054052399645297, "loss": 2.9117, "step": 23612 }, { "epoch": 1.16, "grad_norm": 0.5219388008117676, "learning_rate": 0.00040539082523657536, "loss": 3.1468, "step": 23613 }, { "epoch": 1.16, "grad_norm": 0.547167181968689, "learning_rate": 0.0004053764102310417, "loss": 3.237, "step": 23614 }, { "epoch": 1.16, "grad_norm": 0.5904198288917542, "learning_rate": 0.0004053619949479671, "loss": 3.1433, "step": 23615 }, { "epoch": 1.16, "grad_norm": 0.5035914778709412, "learning_rate": 0.0004053475793873892, "loss": 2.966, "step": 23616 }, { "epoch": 1.16, "grad_norm": 0.5650410652160645, "learning_rate": 0.00040533316354934607, "loss": 3.1302, "step": 23617 }, { "epoch": 1.16, "grad_norm": 0.5530869960784912, "learning_rate": 0.0004053187474338756, "loss": 3.0593, "step": 23618 }, { "epoch": 1.16, "grad_norm": 0.560907781124115, "learning_rate": 0.00040530433104101583, "loss": 3.1948, "step": 23619 }, { "epoch": 1.16, "grad_norm": 0.5566398501396179, "learning_rate": 0.00040528991437080474, "loss": 3.2553, "step": 23620 }, { "epoch": 1.16, "grad_norm": 0.5609525442123413, "learning_rate": 0.00040527549742328027, "loss": 2.9138, "step": 23621 }, { "epoch": 1.16, "grad_norm": 0.5385319590568542, "learning_rate": 0.0004052610801984805, "loss": 3.1255, "step": 23622 }, { "epoch": 1.16, "grad_norm": 0.5298342108726501, "learning_rate": 0.0004052466626964432, "loss": 3.3531, "step": 23623 }, { "epoch": 1.16, "grad_norm": 0.5590463280677795, "learning_rate": 0.00040523224491720636, "loss": 3.0695, "step": 23624 }, { "epoch": 1.16, "grad_norm": 0.5449240803718567, "learning_rate": 0.00040521782686080816, "loss": 3.0979, "step": 23625 }, { "epoch": 1.16, "grad_norm": 0.5203918218612671, "learning_rate": 0.00040520340852728647, "loss": 3.023, "step": 23626 }, { "epoch": 1.16, "grad_norm": 0.6454471945762634, "learning_rate": 0.0004051889899166792, "loss": 3.0371, "step": 23627 }, { "epoch": 1.16, "grad_norm": 0.546628475189209, "learning_rate": 0.0004051745710290244, "loss": 2.9801, "step": 23628 }, { "epoch": 1.16, "grad_norm": 0.5471029877662659, "learning_rate": 0.00040516015186436, "loss": 3.0225, "step": 23629 }, { "epoch": 1.16, "grad_norm": 0.57635498046875, "learning_rate": 0.00040514573242272396, "loss": 3.1095, "step": 23630 }, { "epoch": 1.16, "grad_norm": 0.5368325710296631, "learning_rate": 0.0004051313127041544, "loss": 3.0904, "step": 23631 }, { "epoch": 1.16, "grad_norm": 0.5363153219223022, "learning_rate": 0.00040511689270868905, "loss": 2.986, "step": 23632 }, { "epoch": 1.16, "grad_norm": 0.5831332802772522, "learning_rate": 0.00040510247243636614, "loss": 3.0365, "step": 23633 }, { "epoch": 1.16, "grad_norm": 0.5615931749343872, "learning_rate": 0.0004050880518872235, "loss": 3.142, "step": 23634 }, { "epoch": 1.16, "grad_norm": 0.5161574482917786, "learning_rate": 0.0004050736310612992, "loss": 3.2197, "step": 23635 }, { "epoch": 1.16, "grad_norm": 0.5491262078285217, "learning_rate": 0.00040505920995863114, "loss": 2.9822, "step": 23636 }, { "epoch": 1.16, "grad_norm": 0.5797311067581177, "learning_rate": 0.00040504478857925736, "loss": 3.1336, "step": 23637 }, { "epoch": 1.16, "grad_norm": 0.5718839168548584, "learning_rate": 0.00040503036692321584, "loss": 3.0789, "step": 23638 }, { "epoch": 1.16, "grad_norm": 0.529486358165741, "learning_rate": 0.0004050159449905445, "loss": 3.0697, "step": 23639 }, { "epoch": 1.16, "grad_norm": 0.5350091457366943, "learning_rate": 0.0004050015227812815, "loss": 3.1539, "step": 23640 }, { "epoch": 1.16, "grad_norm": 0.5986202359199524, "learning_rate": 0.0004049871002954645, "loss": 3.2035, "step": 23641 }, { "epoch": 1.16, "grad_norm": 0.5608084797859192, "learning_rate": 0.0004049726775331318, "loss": 3.0752, "step": 23642 }, { "epoch": 1.16, "grad_norm": 0.5415424108505249, "learning_rate": 0.00040495825449432125, "loss": 3.2252, "step": 23643 }, { "epoch": 1.16, "grad_norm": 0.5681462287902832, "learning_rate": 0.00040494383117907083, "loss": 3.228, "step": 23644 }, { "epoch": 1.16, "grad_norm": 0.5260352492332458, "learning_rate": 0.0004049294075874186, "loss": 3.1951, "step": 23645 }, { "epoch": 1.16, "grad_norm": 0.5569955706596375, "learning_rate": 0.0004049149837194024, "loss": 3.1433, "step": 23646 }, { "epoch": 1.16, "grad_norm": 0.5537160634994507, "learning_rate": 0.00040490055957506047, "loss": 2.7933, "step": 23647 }, { "epoch": 1.16, "grad_norm": 0.5751855373382568, "learning_rate": 0.00040488613515443064, "loss": 2.9887, "step": 23648 }, { "epoch": 1.16, "grad_norm": 0.6018997430801392, "learning_rate": 0.00040487171045755087, "loss": 2.9387, "step": 23649 }, { "epoch": 1.16, "grad_norm": 0.5773023366928101, "learning_rate": 0.0004048572854844591, "loss": 3.0559, "step": 23650 }, { "epoch": 1.16, "grad_norm": 0.5616927146911621, "learning_rate": 0.0004048428602351936, "loss": 3.2197, "step": 23651 }, { "epoch": 1.16, "grad_norm": 0.5932993292808533, "learning_rate": 0.00040482843470979207, "loss": 2.9324, "step": 23652 }, { "epoch": 1.16, "grad_norm": 0.5337039232254028, "learning_rate": 0.0004048140089082927, "loss": 3.0964, "step": 23653 }, { "epoch": 1.16, "grad_norm": 0.5320896506309509, "learning_rate": 0.00040479958283073334, "loss": 3.0553, "step": 23654 }, { "epoch": 1.16, "grad_norm": 0.5578566193580627, "learning_rate": 0.0004047851564771521, "loss": 2.9863, "step": 23655 }, { "epoch": 1.16, "grad_norm": 0.5269665718078613, "learning_rate": 0.00040477072984758687, "loss": 3.0494, "step": 23656 }, { "epoch": 1.16, "grad_norm": 0.6187220811843872, "learning_rate": 0.0004047563029420757, "loss": 3.0293, "step": 23657 }, { "epoch": 1.16, "grad_norm": 0.5536328554153442, "learning_rate": 0.00040474187576065656, "loss": 2.991, "step": 23658 }, { "epoch": 1.16, "grad_norm": 0.5319032669067383, "learning_rate": 0.0004047274483033676, "loss": 3.0669, "step": 23659 }, { "epoch": 1.16, "grad_norm": 0.5195010304450989, "learning_rate": 0.00040471302057024653, "loss": 2.8542, "step": 23660 }, { "epoch": 1.16, "grad_norm": 0.5610055327415466, "learning_rate": 0.0004046985925613316, "loss": 2.9366, "step": 23661 }, { "epoch": 1.16, "grad_norm": 0.5877775549888611, "learning_rate": 0.0004046841642766608, "loss": 3.2743, "step": 23662 }, { "epoch": 1.16, "grad_norm": 0.5556247234344482, "learning_rate": 0.000404669735716272, "loss": 3.2213, "step": 23663 }, { "epoch": 1.16, "grad_norm": 0.5412140488624573, "learning_rate": 0.00040465530688020324, "loss": 3.2489, "step": 23664 }, { "epoch": 1.16, "grad_norm": 0.5714347958564758, "learning_rate": 0.00040464087776849244, "loss": 3.0755, "step": 23665 }, { "epoch": 1.16, "grad_norm": 0.5732872486114502, "learning_rate": 0.00040462644838117783, "loss": 3.0111, "step": 23666 }, { "epoch": 1.16, "grad_norm": 0.5459180474281311, "learning_rate": 0.00040461201871829736, "loss": 2.8323, "step": 23667 }, { "epoch": 1.16, "grad_norm": 0.5625903606414795, "learning_rate": 0.00040459758877988886, "loss": 3.0188, "step": 23668 }, { "epoch": 1.16, "grad_norm": 0.5748194456100464, "learning_rate": 0.00040458315856599044, "loss": 2.6947, "step": 23669 }, { "epoch": 1.16, "grad_norm": 0.5342017412185669, "learning_rate": 0.00040456872807664016, "loss": 3.1381, "step": 23670 }, { "epoch": 1.16, "grad_norm": 0.5473812818527222, "learning_rate": 0.0004045542973118759, "loss": 3.1439, "step": 23671 }, { "epoch": 1.16, "grad_norm": 0.517485499382019, "learning_rate": 0.00040453986627173586, "loss": 3.0633, "step": 23672 }, { "epoch": 1.16, "grad_norm": 0.5450983047485352, "learning_rate": 0.0004045254349562579, "loss": 3.056, "step": 23673 }, { "epoch": 1.16, "grad_norm": 0.6088648438453674, "learning_rate": 0.00040451100336548, "loss": 3.1054, "step": 23674 }, { "epoch": 1.16, "grad_norm": 0.5295372009277344, "learning_rate": 0.0004044965714994402, "loss": 3.1122, "step": 23675 }, { "epoch": 1.16, "grad_norm": 0.5838629007339478, "learning_rate": 0.00040448213935817654, "loss": 3.1979, "step": 23676 }, { "epoch": 1.16, "grad_norm": 0.5640658736228943, "learning_rate": 0.0004044677069417272, "loss": 3.163, "step": 23677 }, { "epoch": 1.16, "grad_norm": 0.5375956296920776, "learning_rate": 0.00040445327425012986, "loss": 3.2325, "step": 23678 }, { "epoch": 1.16, "grad_norm": 0.5397927761077881, "learning_rate": 0.0004044388412834228, "loss": 3.1302, "step": 23679 }, { "epoch": 1.16, "grad_norm": 0.5797949433326721, "learning_rate": 0.00040442440804164384, "loss": 3.0378, "step": 23680 }, { "epoch": 1.16, "grad_norm": 0.5403143763542175, "learning_rate": 0.00040440997452483116, "loss": 3.1009, "step": 23681 }, { "epoch": 1.16, "grad_norm": 0.5413429141044617, "learning_rate": 0.00040439554073302264, "loss": 2.7718, "step": 23682 }, { "epoch": 1.16, "grad_norm": 0.5402602553367615, "learning_rate": 0.0004043811066662564, "loss": 3.1233, "step": 23683 }, { "epoch": 1.16, "grad_norm": 0.5142017006874084, "learning_rate": 0.00040436667232457044, "loss": 3.0242, "step": 23684 }, { "epoch": 1.16, "grad_norm": 0.5384440422058105, "learning_rate": 0.0004043522377080026, "loss": 3.1141, "step": 23685 }, { "epoch": 1.16, "grad_norm": 0.539061963558197, "learning_rate": 0.0004043378028165912, "loss": 2.9895, "step": 23686 }, { "epoch": 1.16, "grad_norm": 0.5121895670890808, "learning_rate": 0.0004043233676503741, "loss": 3.0622, "step": 23687 }, { "epoch": 1.16, "grad_norm": 0.5728363990783691, "learning_rate": 0.00040430893220938925, "loss": 3.0116, "step": 23688 }, { "epoch": 1.16, "grad_norm": 0.5647080540657043, "learning_rate": 0.00040429449649367487, "loss": 3.0383, "step": 23689 }, { "epoch": 1.16, "grad_norm": 0.5658575296401978, "learning_rate": 0.0004042800605032687, "loss": 3.0062, "step": 23690 }, { "epoch": 1.16, "grad_norm": 0.5728873014450073, "learning_rate": 0.00040426562423820904, "loss": 2.9859, "step": 23691 }, { "epoch": 1.16, "grad_norm": 0.5607736110687256, "learning_rate": 0.0004042511876985338, "loss": 3.1462, "step": 23692 }, { "epoch": 1.16, "grad_norm": 0.5391986966133118, "learning_rate": 0.00040423675088428095, "loss": 3.165, "step": 23693 }, { "epoch": 1.16, "grad_norm": 0.5368701815605164, "learning_rate": 0.0004042223137954885, "loss": 3.1621, "step": 23694 }, { "epoch": 1.16, "grad_norm": 0.578046441078186, "learning_rate": 0.0004042078764321945, "loss": 3.2745, "step": 23695 }, { "epoch": 1.16, "grad_norm": 0.5303065776824951, "learning_rate": 0.0004041934387944372, "loss": 3.2873, "step": 23696 }, { "epoch": 1.16, "grad_norm": 0.5496455430984497, "learning_rate": 0.00040417900088225435, "loss": 2.9276, "step": 23697 }, { "epoch": 1.16, "grad_norm": 0.6175884008407593, "learning_rate": 0.00040416456269568404, "loss": 2.8928, "step": 23698 }, { "epoch": 1.16, "grad_norm": 0.5311169028282166, "learning_rate": 0.0004041501242347644, "loss": 3.1252, "step": 23699 }, { "epoch": 1.16, "grad_norm": 0.5432295203208923, "learning_rate": 0.0004041356854995332, "loss": 3.2053, "step": 23700 }, { "epoch": 1.16, "grad_norm": 0.5909796357154846, "learning_rate": 0.00040412124649002876, "loss": 3.0875, "step": 23701 }, { "epoch": 1.16, "grad_norm": 0.5231812000274658, "learning_rate": 0.000404106807206289, "loss": 2.9765, "step": 23702 }, { "epoch": 1.16, "grad_norm": 0.5160446763038635, "learning_rate": 0.0004040923676483519, "loss": 3.0671, "step": 23703 }, { "epoch": 1.16, "grad_norm": 0.5727381706237793, "learning_rate": 0.00040407792781625555, "loss": 2.9777, "step": 23704 }, { "epoch": 1.16, "grad_norm": 0.5669409036636353, "learning_rate": 0.000404063487710038, "loss": 3.185, "step": 23705 }, { "epoch": 1.16, "grad_norm": 0.5885549783706665, "learning_rate": 0.0004040490473297372, "loss": 3.3162, "step": 23706 }, { "epoch": 1.16, "grad_norm": 0.5521764159202576, "learning_rate": 0.0004040346066753913, "loss": 3.0025, "step": 23707 }, { "epoch": 1.16, "grad_norm": 0.5233154892921448, "learning_rate": 0.0004040201657470382, "loss": 3.0222, "step": 23708 }, { "epoch": 1.16, "grad_norm": 0.6025112867355347, "learning_rate": 0.00040400572454471603, "loss": 3.0527, "step": 23709 }, { "epoch": 1.16, "grad_norm": 0.5110952258110046, "learning_rate": 0.00040399128306846285, "loss": 3.1567, "step": 23710 }, { "epoch": 1.16, "grad_norm": 0.5763827562332153, "learning_rate": 0.00040397684131831656, "loss": 3.1964, "step": 23711 }, { "epoch": 1.16, "grad_norm": 0.5659586191177368, "learning_rate": 0.00040396239929431534, "loss": 3.1401, "step": 23712 }, { "epoch": 1.16, "grad_norm": 0.5608625411987305, "learning_rate": 0.0004039479569964971, "loss": 3.0601, "step": 23713 }, { "epoch": 1.16, "grad_norm": 0.5368182063102722, "learning_rate": 0.00040393351442490007, "loss": 2.9722, "step": 23714 }, { "epoch": 1.16, "grad_norm": 0.5225261449813843, "learning_rate": 0.00040391907157956214, "loss": 3.0737, "step": 23715 }, { "epoch": 1.16, "grad_norm": 0.5581330060958862, "learning_rate": 0.0004039046284605212, "loss": 3.1152, "step": 23716 }, { "epoch": 1.16, "grad_norm": 0.5440344214439392, "learning_rate": 0.00040389018506781563, "loss": 3.2068, "step": 23717 }, { "epoch": 1.16, "grad_norm": 0.5621079802513123, "learning_rate": 0.0004038757414014833, "loss": 3.0555, "step": 23718 }, { "epoch": 1.16, "grad_norm": 0.566859781742096, "learning_rate": 0.00040386129746156215, "loss": 2.9883, "step": 23719 }, { "epoch": 1.16, "grad_norm": 0.5255882143974304, "learning_rate": 0.0004038468532480905, "loss": 2.8779, "step": 23720 }, { "epoch": 1.16, "grad_norm": 0.5612319111824036, "learning_rate": 0.0004038324087611061, "loss": 3.115, "step": 23721 }, { "epoch": 1.16, "grad_norm": 0.5677599906921387, "learning_rate": 0.00040381796400064716, "loss": 3.0859, "step": 23722 }, { "epoch": 1.16, "grad_norm": 0.5437223315238953, "learning_rate": 0.00040380351896675157, "loss": 3.0952, "step": 23723 }, { "epoch": 1.16, "grad_norm": 0.5329674482345581, "learning_rate": 0.0004037890736594577, "loss": 3.0249, "step": 23724 }, { "epoch": 1.16, "grad_norm": 0.5678834319114685, "learning_rate": 0.00040377462807880324, "loss": 3.0096, "step": 23725 }, { "epoch": 1.16, "grad_norm": 0.5391994118690491, "learning_rate": 0.0004037601822248264, "loss": 3.2036, "step": 23726 }, { "epoch": 1.16, "grad_norm": 0.5704066753387451, "learning_rate": 0.0004037457360975652, "loss": 3.2307, "step": 23727 }, { "epoch": 1.16, "grad_norm": 0.528113842010498, "learning_rate": 0.00040373128969705774, "loss": 3.3054, "step": 23728 }, { "epoch": 1.16, "grad_norm": 0.5105913877487183, "learning_rate": 0.00040371684302334203, "loss": 3.173, "step": 23729 }, { "epoch": 1.16, "grad_norm": 0.5552327036857605, "learning_rate": 0.0004037023960764561, "loss": 3.1766, "step": 23730 }, { "epoch": 1.16, "grad_norm": 0.5556386113166809, "learning_rate": 0.000403687948856438, "loss": 2.9731, "step": 23731 }, { "epoch": 1.16, "grad_norm": 0.5652289986610413, "learning_rate": 0.0004036735013633258, "loss": 3.2652, "step": 23732 }, { "epoch": 1.16, "grad_norm": 0.4932308495044708, "learning_rate": 0.0004036590535971576, "loss": 3.1269, "step": 23733 }, { "epoch": 1.16, "grad_norm": 0.5384199023246765, "learning_rate": 0.0004036446055579713, "loss": 3.1946, "step": 23734 }, { "epoch": 1.16, "grad_norm": 0.5091381669044495, "learning_rate": 0.00040363015724580517, "loss": 2.9825, "step": 23735 }, { "epoch": 1.16, "grad_norm": 0.5702914595603943, "learning_rate": 0.0004036157086606971, "loss": 3.1606, "step": 23736 }, { "epoch": 1.16, "grad_norm": 0.5650493502616882, "learning_rate": 0.00040360125980268513, "loss": 3.0586, "step": 23737 }, { "epoch": 1.16, "grad_norm": 0.5145954489707947, "learning_rate": 0.00040358681067180746, "loss": 3.2997, "step": 23738 }, { "epoch": 1.16, "grad_norm": 0.5607908368110657, "learning_rate": 0.00040357236126810207, "loss": 3.0453, "step": 23739 }, { "epoch": 1.16, "grad_norm": 0.5422621965408325, "learning_rate": 0.000403557911591607, "loss": 2.933, "step": 23740 }, { "epoch": 1.16, "grad_norm": 0.5554590225219727, "learning_rate": 0.0004035434616423603, "loss": 2.9494, "step": 23741 }, { "epoch": 1.16, "grad_norm": 0.6001891493797302, "learning_rate": 0.0004035290114204, "loss": 3.1366, "step": 23742 }, { "epoch": 1.16, "grad_norm": 0.5242499113082886, "learning_rate": 0.00040351456092576433, "loss": 3.277, "step": 23743 }, { "epoch": 1.16, "grad_norm": 0.5295645594596863, "learning_rate": 0.00040350011015849116, "loss": 3.2324, "step": 23744 }, { "epoch": 1.16, "grad_norm": 0.5429471135139465, "learning_rate": 0.00040348565911861866, "loss": 3.0578, "step": 23745 }, { "epoch": 1.16, "grad_norm": 0.5438092350959778, "learning_rate": 0.0004034712078061849, "loss": 3.0985, "step": 23746 }, { "epoch": 1.16, "grad_norm": 0.5752182006835938, "learning_rate": 0.0004034567562212277, "loss": 3.2343, "step": 23747 }, { "epoch": 1.16, "grad_norm": 0.5302133560180664, "learning_rate": 0.0004034423043637854, "loss": 3.0236, "step": 23748 }, { "epoch": 1.16, "grad_norm": 0.5672379732131958, "learning_rate": 0.000403427852233896, "loss": 3.2175, "step": 23749 }, { "epoch": 1.16, "grad_norm": 0.5564298033714294, "learning_rate": 0.0004034133998315976, "loss": 3.2707, "step": 23750 }, { "epoch": 1.16, "grad_norm": 0.5528357028961182, "learning_rate": 0.00040339894715692815, "loss": 3.1907, "step": 23751 }, { "epoch": 1.16, "grad_norm": 0.5306649208068848, "learning_rate": 0.0004033844942099257, "loss": 3.0427, "step": 23752 }, { "epoch": 1.16, "grad_norm": 0.5104833841323853, "learning_rate": 0.00040337004099062854, "loss": 3.1894, "step": 23753 }, { "epoch": 1.16, "grad_norm": 0.5718398094177246, "learning_rate": 0.00040335558749907455, "loss": 3.0523, "step": 23754 }, { "epoch": 1.16, "grad_norm": 0.568389892578125, "learning_rate": 0.00040334113373530174, "loss": 3.0214, "step": 23755 }, { "epoch": 1.16, "grad_norm": 0.567775547504425, "learning_rate": 0.00040332667969934837, "loss": 3.217, "step": 23756 }, { "epoch": 1.16, "grad_norm": 0.5069112777709961, "learning_rate": 0.00040331222539125234, "loss": 2.9524, "step": 23757 }, { "epoch": 1.16, "grad_norm": 0.5586796402931213, "learning_rate": 0.0004032977708110518, "loss": 3.1551, "step": 23758 }, { "epoch": 1.16, "grad_norm": 0.5643654465675354, "learning_rate": 0.0004032833159587848, "loss": 2.9142, "step": 23759 }, { "epoch": 1.16, "grad_norm": 0.5675867199897766, "learning_rate": 0.00040326886083448944, "loss": 2.9242, "step": 23760 }, { "epoch": 1.16, "grad_norm": 0.5585340261459351, "learning_rate": 0.00040325440543820387, "loss": 3.1171, "step": 23761 }, { "epoch": 1.16, "grad_norm": 0.5579453706741333, "learning_rate": 0.0004032399497699659, "loss": 3.082, "step": 23762 }, { "epoch": 1.16, "grad_norm": 0.5434767603874207, "learning_rate": 0.00040322549382981385, "loss": 2.9839, "step": 23763 }, { "epoch": 1.16, "grad_norm": 0.5664116740226746, "learning_rate": 0.00040321103761778584, "loss": 2.8925, "step": 23764 }, { "epoch": 1.16, "grad_norm": 0.5497961640357971, "learning_rate": 0.00040319658113391967, "loss": 3.0979, "step": 23765 }, { "epoch": 1.16, "grad_norm": 0.5843026041984558, "learning_rate": 0.00040318212437825355, "loss": 3.1466, "step": 23766 }, { "epoch": 1.16, "grad_norm": 0.5227053761482239, "learning_rate": 0.00040316766735082565, "loss": 2.9814, "step": 23767 }, { "epoch": 1.16, "grad_norm": 0.5491824150085449, "learning_rate": 0.0004031532100516739, "loss": 3.0808, "step": 23768 }, { "epoch": 1.16, "grad_norm": 0.5716967582702637, "learning_rate": 0.00040313875248083647, "loss": 3.2243, "step": 23769 }, { "epoch": 1.16, "grad_norm": 0.5618823170661926, "learning_rate": 0.00040312429463835146, "loss": 3.162, "step": 23770 }, { "epoch": 1.16, "grad_norm": 0.5274955630302429, "learning_rate": 0.00040310983652425695, "loss": 2.892, "step": 23771 }, { "epoch": 1.17, "grad_norm": 0.6322970390319824, "learning_rate": 0.0004030953781385909, "loss": 3.0052, "step": 23772 }, { "epoch": 1.17, "grad_norm": 0.5498408079147339, "learning_rate": 0.00040308091948139136, "loss": 2.9249, "step": 23773 }, { "epoch": 1.17, "grad_norm": 0.5550096035003662, "learning_rate": 0.0004030664605526966, "loss": 3.0928, "step": 23774 }, { "epoch": 1.17, "grad_norm": 0.5641299486160278, "learning_rate": 0.00040305200135254475, "loss": 3.1518, "step": 23775 }, { "epoch": 1.17, "grad_norm": 0.5485759973526001, "learning_rate": 0.0004030375418809736, "loss": 2.9378, "step": 23776 }, { "epoch": 1.17, "grad_norm": 0.5412816405296326, "learning_rate": 0.0004030230821380215, "loss": 2.9547, "step": 23777 }, { "epoch": 1.17, "grad_norm": 0.5825008749961853, "learning_rate": 0.0004030086221237263, "loss": 3.1217, "step": 23778 }, { "epoch": 1.17, "grad_norm": 0.533901035785675, "learning_rate": 0.00040299416183812634, "loss": 3.1133, "step": 23779 }, { "epoch": 1.17, "grad_norm": 0.5950905084609985, "learning_rate": 0.00040297970128125955, "loss": 3.0327, "step": 23780 }, { "epoch": 1.17, "grad_norm": 0.5376090407371521, "learning_rate": 0.00040296524045316403, "loss": 3.1263, "step": 23781 }, { "epoch": 1.17, "grad_norm": 0.552521288394928, "learning_rate": 0.00040295077935387783, "loss": 3.0581, "step": 23782 }, { "epoch": 1.17, "grad_norm": 0.5569019913673401, "learning_rate": 0.0004029363179834391, "loss": 3.0234, "step": 23783 }, { "epoch": 1.17, "grad_norm": 0.5935654044151306, "learning_rate": 0.000402921856341886, "loss": 3.1942, "step": 23784 }, { "epoch": 1.17, "grad_norm": 0.5477899312973022, "learning_rate": 0.00040290739442925644, "loss": 3.1383, "step": 23785 }, { "epoch": 1.17, "grad_norm": 0.5384113788604736, "learning_rate": 0.00040289293224558874, "loss": 3.1697, "step": 23786 }, { "epoch": 1.17, "grad_norm": 0.5377801656723022, "learning_rate": 0.00040287846979092075, "loss": 3.1197, "step": 23787 }, { "epoch": 1.17, "grad_norm": 0.560335636138916, "learning_rate": 0.0004028640070652907, "loss": 2.9627, "step": 23788 }, { "epoch": 1.17, "grad_norm": 0.5258608460426331, "learning_rate": 0.0004028495440687366, "loss": 3.1526, "step": 23789 }, { "epoch": 1.17, "grad_norm": 0.5460545420646667, "learning_rate": 0.00040283508080129674, "loss": 3.1596, "step": 23790 }, { "epoch": 1.17, "grad_norm": 0.5956621766090393, "learning_rate": 0.00040282061726300896, "loss": 3.0147, "step": 23791 }, { "epoch": 1.17, "grad_norm": 0.5216905474662781, "learning_rate": 0.00040280615345391144, "loss": 2.9739, "step": 23792 }, { "epoch": 1.17, "grad_norm": 0.508186399936676, "learning_rate": 0.00040279168937404233, "loss": 3.0885, "step": 23793 }, { "epoch": 1.17, "grad_norm": 0.5671628713607788, "learning_rate": 0.0004027772250234397, "loss": 3.0457, "step": 23794 }, { "epoch": 1.17, "grad_norm": 0.582146406173706, "learning_rate": 0.0004027627604021416, "loss": 2.9687, "step": 23795 }, { "epoch": 1.17, "grad_norm": 0.6142930388450623, "learning_rate": 0.0004027482955101863, "loss": 3.0002, "step": 23796 }, { "epoch": 1.17, "grad_norm": 0.5688886642456055, "learning_rate": 0.00040273383034761164, "loss": 3.0811, "step": 23797 }, { "epoch": 1.17, "grad_norm": 0.5424383878707886, "learning_rate": 0.0004027193649144558, "loss": 3.1528, "step": 23798 }, { "epoch": 1.17, "grad_norm": 0.6007323861122131, "learning_rate": 0.00040270489921075706, "loss": 2.7161, "step": 23799 }, { "epoch": 1.17, "grad_norm": 0.5330042839050293, "learning_rate": 0.0004026904332365533, "loss": 2.7069, "step": 23800 }, { "epoch": 1.17, "grad_norm": 0.5643306970596313, "learning_rate": 0.0004026759669918828, "loss": 2.8007, "step": 23801 }, { "epoch": 1.17, "grad_norm": 0.5669317245483398, "learning_rate": 0.00040266150047678346, "loss": 3.0615, "step": 23802 }, { "epoch": 1.17, "grad_norm": 0.6755366325378418, "learning_rate": 0.00040264703369129336, "loss": 3.0581, "step": 23803 }, { "epoch": 1.17, "grad_norm": 0.5715072751045227, "learning_rate": 0.000402632566635451, "loss": 3.1604, "step": 23804 }, { "epoch": 1.17, "grad_norm": 0.5770458579063416, "learning_rate": 0.0004026180993092941, "loss": 2.9243, "step": 23805 }, { "epoch": 1.17, "grad_norm": 0.5324617028236389, "learning_rate": 0.0004026036317128608, "loss": 3.3259, "step": 23806 }, { "epoch": 1.17, "grad_norm": 0.5772245526313782, "learning_rate": 0.00040258916384618935, "loss": 3.2526, "step": 23807 }, { "epoch": 1.17, "grad_norm": 0.5446373224258423, "learning_rate": 0.0004025746957093178, "loss": 3.2472, "step": 23808 }, { "epoch": 1.17, "grad_norm": 0.5985844135284424, "learning_rate": 0.0004025602273022842, "loss": 3.1847, "step": 23809 }, { "epoch": 1.17, "grad_norm": 0.5797475576400757, "learning_rate": 0.00040254575862512667, "loss": 2.9985, "step": 23810 }, { "epoch": 1.17, "grad_norm": 0.5373989939689636, "learning_rate": 0.00040253128967788343, "loss": 2.9025, "step": 23811 }, { "epoch": 1.17, "grad_norm": 0.5533153414726257, "learning_rate": 0.0004025168204605925, "loss": 3.2097, "step": 23812 }, { "epoch": 1.17, "grad_norm": 0.5629292726516724, "learning_rate": 0.000402502350973292, "loss": 3.1, "step": 23813 }, { "epoch": 1.17, "grad_norm": 0.5873868465423584, "learning_rate": 0.00040248788121601995, "loss": 3.1942, "step": 23814 }, { "epoch": 1.17, "grad_norm": 0.5775611996650696, "learning_rate": 0.00040247341118881464, "loss": 2.9935, "step": 23815 }, { "epoch": 1.17, "grad_norm": 0.5191925168037415, "learning_rate": 0.000402458940891714, "loss": 3.1487, "step": 23816 }, { "epoch": 1.17, "grad_norm": 0.5707910656929016, "learning_rate": 0.00040244447032475626, "loss": 3.0536, "step": 23817 }, { "epoch": 1.17, "grad_norm": 0.555317223072052, "learning_rate": 0.0004024299994879795, "loss": 2.9648, "step": 23818 }, { "epoch": 1.17, "grad_norm": 0.5581421852111816, "learning_rate": 0.0004024155283814219, "loss": 3.1438, "step": 23819 }, { "epoch": 1.17, "grad_norm": 0.607385516166687, "learning_rate": 0.0004024010570051214, "loss": 2.9115, "step": 23820 }, { "epoch": 1.17, "grad_norm": 0.5479592084884644, "learning_rate": 0.0004023865853591163, "loss": 2.9924, "step": 23821 }, { "epoch": 1.17, "grad_norm": 0.5671947598457336, "learning_rate": 0.0004023721134434446, "loss": 2.9375, "step": 23822 }, { "epoch": 1.17, "grad_norm": 0.5418531894683838, "learning_rate": 0.0004023576412581445, "loss": 3.1671, "step": 23823 }, { "epoch": 1.17, "grad_norm": 0.5851473212242126, "learning_rate": 0.00040234316880325403, "loss": 3.1394, "step": 23824 }, { "epoch": 1.17, "grad_norm": 0.5450239181518555, "learning_rate": 0.0004023286960788113, "loss": 3.149, "step": 23825 }, { "epoch": 1.17, "grad_norm": 0.5457543730735779, "learning_rate": 0.00040231422308485465, "loss": 2.8019, "step": 23826 }, { "epoch": 1.17, "grad_norm": 0.5215305685997009, "learning_rate": 0.0004022997498214219, "loss": 3.36, "step": 23827 }, { "epoch": 1.17, "grad_norm": 0.5583608746528625, "learning_rate": 0.0004022852762885513, "loss": 2.9352, "step": 23828 }, { "epoch": 1.17, "grad_norm": 0.5622576475143433, "learning_rate": 0.0004022708024862809, "loss": 3.029, "step": 23829 }, { "epoch": 1.17, "grad_norm": 0.5386072993278503, "learning_rate": 0.0004022563284146491, "loss": 3.2786, "step": 23830 }, { "epoch": 1.17, "grad_norm": 0.6399205327033997, "learning_rate": 0.0004022418540736936, "loss": 2.9981, "step": 23831 }, { "epoch": 1.17, "grad_norm": 0.5961595177650452, "learning_rate": 0.0004022273794634528, "loss": 3.1205, "step": 23832 }, { "epoch": 1.17, "grad_norm": 0.5393549203872681, "learning_rate": 0.0004022129045839648, "loss": 2.9914, "step": 23833 }, { "epoch": 1.17, "grad_norm": 0.5417840480804443, "learning_rate": 0.0004021984294352676, "loss": 3.0758, "step": 23834 }, { "epoch": 1.17, "grad_norm": 0.582919180393219, "learning_rate": 0.00040218395401739944, "loss": 3.1598, "step": 23835 }, { "epoch": 1.17, "grad_norm": 0.5843506455421448, "learning_rate": 0.0004021694783303985, "loss": 2.9717, "step": 23836 }, { "epoch": 1.17, "grad_norm": 0.5359125733375549, "learning_rate": 0.00040215500237430275, "loss": 3.0764, "step": 23837 }, { "epoch": 1.17, "grad_norm": 0.608666181564331, "learning_rate": 0.00040214052614915045, "loss": 2.9321, "step": 23838 }, { "epoch": 1.17, "grad_norm": 0.5720089673995972, "learning_rate": 0.0004021260496549795, "loss": 3.0585, "step": 23839 }, { "epoch": 1.17, "grad_norm": 0.528830885887146, "learning_rate": 0.00040211157289182824, "loss": 3.2521, "step": 23840 }, { "epoch": 1.17, "grad_norm": 0.57254958152771, "learning_rate": 0.00040209709585973483, "loss": 3.148, "step": 23841 }, { "epoch": 1.17, "grad_norm": 0.529454231262207, "learning_rate": 0.0004020826185587373, "loss": 3.0549, "step": 23842 }, { "epoch": 1.17, "grad_norm": 0.5671842694282532, "learning_rate": 0.00040206814098887373, "loss": 3.1676, "step": 23843 }, { "epoch": 1.17, "grad_norm": 0.5969575047492981, "learning_rate": 0.00040205366315018237, "loss": 3.3495, "step": 23844 }, { "epoch": 1.17, "grad_norm": 0.5233942270278931, "learning_rate": 0.00040203918504270135, "loss": 2.9989, "step": 23845 }, { "epoch": 1.17, "grad_norm": 0.5281428694725037, "learning_rate": 0.0004020247066664686, "loss": 3.1292, "step": 23846 }, { "epoch": 1.17, "grad_norm": 0.5774059891700745, "learning_rate": 0.00040201022802152257, "loss": 2.922, "step": 23847 }, { "epoch": 1.17, "grad_norm": 0.5433593392372131, "learning_rate": 0.0004019957491079012, "loss": 3.0951, "step": 23848 }, { "epoch": 1.17, "grad_norm": 0.5464013814926147, "learning_rate": 0.00040198126992564264, "loss": 3.2079, "step": 23849 }, { "epoch": 1.17, "grad_norm": 0.5496659874916077, "learning_rate": 0.00040196679047478493, "loss": 3.1016, "step": 23850 }, { "epoch": 1.17, "grad_norm": 0.542181134223938, "learning_rate": 0.00040195231075536644, "loss": 3.0924, "step": 23851 }, { "epoch": 1.17, "grad_norm": 0.596310019493103, "learning_rate": 0.00040193783076742523, "loss": 3.1904, "step": 23852 }, { "epoch": 1.17, "grad_norm": 0.5427301526069641, "learning_rate": 0.0004019233505109993, "loss": 3.1372, "step": 23853 }, { "epoch": 1.17, "grad_norm": 0.5772640705108643, "learning_rate": 0.00040190886998612695, "loss": 2.9875, "step": 23854 }, { "epoch": 1.17, "grad_norm": 0.5249664187431335, "learning_rate": 0.00040189438919284616, "loss": 3.1938, "step": 23855 }, { "epoch": 1.17, "grad_norm": 0.5812081694602966, "learning_rate": 0.00040187990813119525, "loss": 3.1606, "step": 23856 }, { "epoch": 1.17, "grad_norm": 0.5547779202461243, "learning_rate": 0.0004018654268012122, "loss": 3.2556, "step": 23857 }, { "epoch": 1.17, "grad_norm": 0.5426790714263916, "learning_rate": 0.0004018509452029352, "loss": 3.1825, "step": 23858 }, { "epoch": 1.17, "grad_norm": 0.5476883053779602, "learning_rate": 0.0004018364633364025, "loss": 3.0241, "step": 23859 }, { "epoch": 1.17, "grad_norm": 0.5173876285552979, "learning_rate": 0.00040182198120165206, "loss": 3.0905, "step": 23860 }, { "epoch": 1.17, "grad_norm": 0.5504484176635742, "learning_rate": 0.00040180749879872217, "loss": 3.212, "step": 23861 }, { "epoch": 1.17, "grad_norm": 0.5541347861289978, "learning_rate": 0.0004017930161276509, "loss": 2.9228, "step": 23862 }, { "epoch": 1.17, "grad_norm": 0.580261766910553, "learning_rate": 0.0004017785331884765, "loss": 2.9678, "step": 23863 }, { "epoch": 1.17, "grad_norm": 0.6364923715591431, "learning_rate": 0.000401764049981237, "loss": 3.1864, "step": 23864 }, { "epoch": 1.17, "grad_norm": 0.5156949758529663, "learning_rate": 0.0004017495665059704, "loss": 3.3035, "step": 23865 }, { "epoch": 1.17, "grad_norm": 0.5521565675735474, "learning_rate": 0.00040173508276271526, "loss": 2.9317, "step": 23866 }, { "epoch": 1.17, "grad_norm": 0.5523860454559326, "learning_rate": 0.0004017205987515094, "loss": 3.1082, "step": 23867 }, { "epoch": 1.17, "grad_norm": 0.5502112507820129, "learning_rate": 0.00040170611447239103, "loss": 3.0595, "step": 23868 }, { "epoch": 1.17, "grad_norm": 0.5495514273643494, "learning_rate": 0.00040169162992539833, "loss": 3.05, "step": 23869 }, { "epoch": 1.17, "grad_norm": 0.5663180947303772, "learning_rate": 0.00040167714511056947, "loss": 3.133, "step": 23870 }, { "epoch": 1.17, "grad_norm": 0.5522775650024414, "learning_rate": 0.0004016626600279426, "loss": 3.1148, "step": 23871 }, { "epoch": 1.17, "grad_norm": 0.5634680390357971, "learning_rate": 0.00040164817467755584, "loss": 3.1422, "step": 23872 }, { "epoch": 1.17, "grad_norm": 0.5490415096282959, "learning_rate": 0.0004016336890594473, "loss": 2.8798, "step": 23873 }, { "epoch": 1.17, "grad_norm": 0.5496166348457336, "learning_rate": 0.00040161920317365534, "loss": 2.8576, "step": 23874 }, { "epoch": 1.17, "grad_norm": 0.5630446076393127, "learning_rate": 0.00040160471702021773, "loss": 3.266, "step": 23875 }, { "epoch": 1.17, "grad_norm": 0.5701159238815308, "learning_rate": 0.0004015902305991729, "loss": 3.0536, "step": 23876 }, { "epoch": 1.17, "grad_norm": 0.5700026154518127, "learning_rate": 0.00040157574391055907, "loss": 2.979, "step": 23877 }, { "epoch": 1.17, "grad_norm": 0.5726680159568787, "learning_rate": 0.0004015612569544143, "loss": 3.0092, "step": 23878 }, { "epoch": 1.17, "grad_norm": 0.5549166202545166, "learning_rate": 0.0004015467697307766, "loss": 3.2318, "step": 23879 }, { "epoch": 1.17, "grad_norm": 0.5379598736763, "learning_rate": 0.0004015322822396842, "loss": 2.9661, "step": 23880 }, { "epoch": 1.17, "grad_norm": 0.5367391705513, "learning_rate": 0.00040151779448117545, "loss": 3.1219, "step": 23881 }, { "epoch": 1.17, "grad_norm": 0.5859577655792236, "learning_rate": 0.00040150330645528833, "loss": 3.0839, "step": 23882 }, { "epoch": 1.17, "grad_norm": 0.5608367323875427, "learning_rate": 0.0004014888181620611, "loss": 3.1321, "step": 23883 }, { "epoch": 1.17, "grad_norm": 0.5417865514755249, "learning_rate": 0.00040147432960153175, "loss": 2.9928, "step": 23884 }, { "epoch": 1.17, "grad_norm": 0.5564191341400146, "learning_rate": 0.0004014598407737386, "loss": 3.0766, "step": 23885 }, { "epoch": 1.17, "grad_norm": 0.5441717505455017, "learning_rate": 0.0004014453516787197, "loss": 3.1433, "step": 23886 }, { "epoch": 1.17, "grad_norm": 0.5383797287940979, "learning_rate": 0.0004014308623165133, "loss": 3.194, "step": 23887 }, { "epoch": 1.17, "grad_norm": 0.6364131569862366, "learning_rate": 0.00040141637268715754, "loss": 3.347, "step": 23888 }, { "epoch": 1.17, "grad_norm": 0.565334677696228, "learning_rate": 0.0004014018827906906, "loss": 3.252, "step": 23889 }, { "epoch": 1.17, "grad_norm": 0.5537137985229492, "learning_rate": 0.0004013873926271506, "loss": 3.0982, "step": 23890 }, { "epoch": 1.17, "grad_norm": 0.5591994524002075, "learning_rate": 0.0004013729021965756, "loss": 3.0882, "step": 23891 }, { "epoch": 1.17, "grad_norm": 0.5560016632080078, "learning_rate": 0.0004013584114990041, "loss": 3.1173, "step": 23892 }, { "epoch": 1.17, "grad_norm": 0.5662415623664856, "learning_rate": 0.00040134392053447387, "loss": 2.995, "step": 23893 }, { "epoch": 1.17, "grad_norm": 0.5805153846740723, "learning_rate": 0.0004013294293030233, "loss": 3.3028, "step": 23894 }, { "epoch": 1.17, "grad_norm": 0.5564658045768738, "learning_rate": 0.0004013149378046905, "loss": 2.9882, "step": 23895 }, { "epoch": 1.17, "grad_norm": 0.5353381633758545, "learning_rate": 0.0004013004460395137, "loss": 2.9248, "step": 23896 }, { "epoch": 1.17, "grad_norm": 0.5824044346809387, "learning_rate": 0.000401285954007531, "loss": 2.8985, "step": 23897 }, { "epoch": 1.17, "grad_norm": 0.5630552768707275, "learning_rate": 0.0004012714617087806, "loss": 3.1425, "step": 23898 }, { "epoch": 1.17, "grad_norm": 0.5532223582267761, "learning_rate": 0.0004012569691433008, "loss": 3.1631, "step": 23899 }, { "epoch": 1.17, "grad_norm": 0.5480971932411194, "learning_rate": 0.00040124247631112943, "loss": 2.8964, "step": 23900 }, { "epoch": 1.17, "grad_norm": 0.5849064588546753, "learning_rate": 0.00040122798321230485, "loss": 2.9088, "step": 23901 }, { "epoch": 1.17, "grad_norm": 0.5372636318206787, "learning_rate": 0.0004012134898468653, "loss": 2.7437, "step": 23902 }, { "epoch": 1.17, "grad_norm": 0.5332382321357727, "learning_rate": 0.000401198996214849, "loss": 3.2017, "step": 23903 }, { "epoch": 1.17, "grad_norm": 0.5774133801460266, "learning_rate": 0.00040118450231629393, "loss": 3.2153, "step": 23904 }, { "epoch": 1.17, "grad_norm": 0.5455067157745361, "learning_rate": 0.0004011700081512384, "loss": 3.1119, "step": 23905 }, { "epoch": 1.17, "grad_norm": 0.5268059968948364, "learning_rate": 0.00040115551371972045, "loss": 3.0404, "step": 23906 }, { "epoch": 1.17, "grad_norm": 0.5563852190971375, "learning_rate": 0.0004011410190217784, "loss": 2.8807, "step": 23907 }, { "epoch": 1.17, "grad_norm": 0.5379848480224609, "learning_rate": 0.0004011265240574503, "loss": 3.1135, "step": 23908 }, { "epoch": 1.17, "grad_norm": 0.5759731531143188, "learning_rate": 0.0004011120288267745, "loss": 3.054, "step": 23909 }, { "epoch": 1.17, "grad_norm": 0.571288526058197, "learning_rate": 0.0004010975333297891, "loss": 3.1158, "step": 23910 }, { "epoch": 1.17, "grad_norm": 0.5381522178649902, "learning_rate": 0.00040108303756653213, "loss": 3.1559, "step": 23911 }, { "epoch": 1.17, "grad_norm": 0.6180474162101746, "learning_rate": 0.00040106854153704196, "loss": 3.0476, "step": 23912 }, { "epoch": 1.17, "grad_norm": 0.5325801372528076, "learning_rate": 0.00040105404524135664, "loss": 3.0364, "step": 23913 }, { "epoch": 1.17, "grad_norm": 0.5483402013778687, "learning_rate": 0.00040103954867951454, "loss": 2.9758, "step": 23914 }, { "epoch": 1.17, "grad_norm": 0.5332612991333008, "learning_rate": 0.00040102505185155365, "loss": 3.0309, "step": 23915 }, { "epoch": 1.17, "grad_norm": 0.5399067997932434, "learning_rate": 0.00040101055475751216, "loss": 3.2472, "step": 23916 }, { "epoch": 1.17, "grad_norm": 0.5286165475845337, "learning_rate": 0.0004009960573974284, "loss": 3.3073, "step": 23917 }, { "epoch": 1.17, "grad_norm": 0.5932464003562927, "learning_rate": 0.0004009815597713404, "loss": 3.0836, "step": 23918 }, { "epoch": 1.17, "grad_norm": 0.5477786660194397, "learning_rate": 0.00040096706187928643, "loss": 3.1094, "step": 23919 }, { "epoch": 1.17, "grad_norm": 0.5101897716522217, "learning_rate": 0.0004009525637213046, "loss": 3.043, "step": 23920 }, { "epoch": 1.17, "grad_norm": 0.5300383567810059, "learning_rate": 0.0004009380652974332, "loss": 3.2401, "step": 23921 }, { "epoch": 1.17, "grad_norm": 0.5190649628639221, "learning_rate": 0.0004009235666077103, "loss": 3.224, "step": 23922 }, { "epoch": 1.17, "grad_norm": 0.5531300902366638, "learning_rate": 0.00040090906765217425, "loss": 2.9455, "step": 23923 }, { "epoch": 1.17, "grad_norm": 0.5615624189376831, "learning_rate": 0.00040089456843086304, "loss": 2.9886, "step": 23924 }, { "epoch": 1.17, "grad_norm": 0.562551736831665, "learning_rate": 0.0004008800689438151, "loss": 3.0263, "step": 23925 }, { "epoch": 1.17, "grad_norm": 0.541335940361023, "learning_rate": 0.00040086556919106833, "loss": 3.0767, "step": 23926 }, { "epoch": 1.17, "grad_norm": 0.5761546492576599, "learning_rate": 0.00040085106917266106, "loss": 2.9538, "step": 23927 }, { "epoch": 1.17, "grad_norm": 0.5472856163978577, "learning_rate": 0.00040083656888863163, "loss": 2.8641, "step": 23928 }, { "epoch": 1.17, "grad_norm": 0.5612488389015198, "learning_rate": 0.000400822068339018, "loss": 2.9537, "step": 23929 }, { "epoch": 1.17, "grad_norm": 0.5841686725616455, "learning_rate": 0.0004008075675238584, "loss": 3.1257, "step": 23930 }, { "epoch": 1.17, "grad_norm": 0.5874391794204712, "learning_rate": 0.0004007930664431911, "loss": 3.0393, "step": 23931 }, { "epoch": 1.17, "grad_norm": 0.5234120488166809, "learning_rate": 0.00040077856509705424, "loss": 3.1151, "step": 23932 }, { "epoch": 1.17, "grad_norm": 0.5505731105804443, "learning_rate": 0.00040076406348548603, "loss": 3.1803, "step": 23933 }, { "epoch": 1.17, "grad_norm": 0.5326248407363892, "learning_rate": 0.00040074956160852476, "loss": 3.2542, "step": 23934 }, { "epoch": 1.17, "grad_norm": 0.5707079768180847, "learning_rate": 0.00040073505946620837, "loss": 3.028, "step": 23935 }, { "epoch": 1.17, "grad_norm": 0.5336579084396362, "learning_rate": 0.0004007205570585754, "loss": 3.2687, "step": 23936 }, { "epoch": 1.17, "grad_norm": 0.5472836494445801, "learning_rate": 0.00040070605438566375, "loss": 2.8165, "step": 23937 }, { "epoch": 1.17, "grad_norm": 0.6249715089797974, "learning_rate": 0.00040069155144751173, "loss": 3.1156, "step": 23938 }, { "epoch": 1.17, "grad_norm": 0.533698320388794, "learning_rate": 0.00040067704824415764, "loss": 2.9911, "step": 23939 }, { "epoch": 1.17, "grad_norm": 0.5471363663673401, "learning_rate": 0.00040066254477563956, "loss": 2.9763, "step": 23940 }, { "epoch": 1.17, "grad_norm": 0.5757012963294983, "learning_rate": 0.0004006480410419957, "loss": 3.2284, "step": 23941 }, { "epoch": 1.17, "grad_norm": 0.5495263934135437, "learning_rate": 0.0004006335370432641, "loss": 2.9926, "step": 23942 }, { "epoch": 1.17, "grad_norm": 0.5950400233268738, "learning_rate": 0.00040061903277948335, "loss": 2.9854, "step": 23943 }, { "epoch": 1.17, "grad_norm": 0.5375027060508728, "learning_rate": 0.00040060452825069133, "loss": 3.3018, "step": 23944 }, { "epoch": 1.17, "grad_norm": 0.5809231400489807, "learning_rate": 0.00040059002345692644, "loss": 3.1755, "step": 23945 }, { "epoch": 1.17, "grad_norm": 0.5961464643478394, "learning_rate": 0.00040057551839822665, "loss": 3.0865, "step": 23946 }, { "epoch": 1.17, "grad_norm": 0.574310839176178, "learning_rate": 0.0004005610130746303, "loss": 3.0654, "step": 23947 }, { "epoch": 1.17, "grad_norm": 0.5345257520675659, "learning_rate": 0.00040054650748617575, "loss": 3.1405, "step": 23948 }, { "epoch": 1.17, "grad_norm": 0.565226674079895, "learning_rate": 0.0004005320016329009, "loss": 2.7882, "step": 23949 }, { "epoch": 1.17, "grad_norm": 0.5397844314575195, "learning_rate": 0.00040051749551484425, "loss": 2.9382, "step": 23950 }, { "epoch": 1.17, "grad_norm": 0.5760013461112976, "learning_rate": 0.00040050298913204375, "loss": 3.017, "step": 23951 }, { "epoch": 1.17, "grad_norm": 0.5508855581283569, "learning_rate": 0.0004004884824845377, "loss": 3.2093, "step": 23952 }, { "epoch": 1.17, "grad_norm": 0.5632848739624023, "learning_rate": 0.0004004739755723644, "loss": 3.0376, "step": 23953 }, { "epoch": 1.17, "grad_norm": 0.536578893661499, "learning_rate": 0.00040045946839556196, "loss": 3.0994, "step": 23954 }, { "epoch": 1.17, "grad_norm": 0.5494163632392883, "learning_rate": 0.00040044496095416863, "loss": 2.9971, "step": 23955 }, { "epoch": 1.17, "grad_norm": 0.5547284483909607, "learning_rate": 0.0004004304532482226, "loss": 3.268, "step": 23956 }, { "epoch": 1.17, "grad_norm": 0.551442563533783, "learning_rate": 0.00040041594527776204, "loss": 3.0469, "step": 23957 }, { "epoch": 1.17, "grad_norm": 0.5382516384124756, "learning_rate": 0.00040040143704282526, "loss": 3.4146, "step": 23958 }, { "epoch": 1.17, "grad_norm": 0.5559409856796265, "learning_rate": 0.0004003869285434504, "loss": 3.0229, "step": 23959 }, { "epoch": 1.17, "grad_norm": 0.5598931312561035, "learning_rate": 0.0004003724197796756, "loss": 3.0695, "step": 23960 }, { "epoch": 1.17, "grad_norm": 0.59652179479599, "learning_rate": 0.00040035791075153935, "loss": 3.1012, "step": 23961 }, { "epoch": 1.17, "grad_norm": 0.5521093606948853, "learning_rate": 0.0004003434014590796, "loss": 3.1512, "step": 23962 }, { "epoch": 1.17, "grad_norm": 0.5462397933006287, "learning_rate": 0.0004003288919023346, "loss": 3.1358, "step": 23963 }, { "epoch": 1.17, "grad_norm": 0.5487635731697083, "learning_rate": 0.0004003143820813425, "loss": 3.373, "step": 23964 }, { "epoch": 1.17, "grad_norm": 0.6183003783226013, "learning_rate": 0.00040029987199614186, "loss": 2.8504, "step": 23965 }, { "epoch": 1.17, "grad_norm": 0.5639381408691406, "learning_rate": 0.00040028536164677053, "loss": 2.9285, "step": 23966 }, { "epoch": 1.17, "grad_norm": 0.5435758829116821, "learning_rate": 0.00040027085103326685, "loss": 3.1571, "step": 23967 }, { "epoch": 1.17, "grad_norm": 0.5409099459648132, "learning_rate": 0.0004002563401556691, "loss": 3.0643, "step": 23968 }, { "epoch": 1.17, "grad_norm": 0.564572811126709, "learning_rate": 0.00040024182901401544, "loss": 3.0944, "step": 23969 }, { "epoch": 1.17, "grad_norm": 0.5847697257995605, "learning_rate": 0.00040022731760834405, "loss": 3.1069, "step": 23970 }, { "epoch": 1.17, "grad_norm": 0.5416810512542725, "learning_rate": 0.00040021280593869317, "loss": 3.1393, "step": 23971 }, { "epoch": 1.17, "grad_norm": 0.5735564231872559, "learning_rate": 0.00040019829400510113, "loss": 3.278, "step": 23972 }, { "epoch": 1.17, "grad_norm": 0.5522201657295227, "learning_rate": 0.000400183781807606, "loss": 3.1469, "step": 23973 }, { "epoch": 1.17, "grad_norm": 0.5339809060096741, "learning_rate": 0.00040016926934624607, "loss": 3.2685, "step": 23974 }, { "epoch": 1.17, "grad_norm": 0.5736157894134521, "learning_rate": 0.00040015475662105965, "loss": 3.1981, "step": 23975 }, { "epoch": 1.18, "grad_norm": 0.5715289115905762, "learning_rate": 0.00040014024363208483, "loss": 3.1167, "step": 23976 }, { "epoch": 1.18, "grad_norm": 0.5391354560852051, "learning_rate": 0.00040012573037935984, "loss": 3.2589, "step": 23977 }, { "epoch": 1.18, "grad_norm": 0.5813712477684021, "learning_rate": 0.0004001112168629229, "loss": 3.0653, "step": 23978 }, { "epoch": 1.18, "grad_norm": 0.6210728287696838, "learning_rate": 0.0004000967030828124, "loss": 3.1392, "step": 23979 }, { "epoch": 1.18, "grad_norm": 0.5406283736228943, "learning_rate": 0.0004000821890390664, "loss": 3.2409, "step": 23980 }, { "epoch": 1.18, "grad_norm": 0.5507621765136719, "learning_rate": 0.00040006767473172323, "loss": 3.1331, "step": 23981 }, { "epoch": 1.18, "grad_norm": 0.5478275418281555, "learning_rate": 0.00040005316016082095, "loss": 2.9733, "step": 23982 }, { "epoch": 1.18, "grad_norm": 0.6610849499702454, "learning_rate": 0.00040003864532639797, "loss": 2.9103, "step": 23983 }, { "epoch": 1.18, "grad_norm": 0.5360865592956543, "learning_rate": 0.00040002413022849245, "loss": 3.118, "step": 23984 }, { "epoch": 1.18, "grad_norm": 0.5579555034637451, "learning_rate": 0.0004000096148671426, "loss": 3.1183, "step": 23985 }, { "epoch": 1.18, "grad_norm": 0.5361294746398926, "learning_rate": 0.0003999950992423867, "loss": 3.1518, "step": 23986 }, { "epoch": 1.18, "grad_norm": 0.5184491872787476, "learning_rate": 0.000399980583354263, "loss": 2.8963, "step": 23987 }, { "epoch": 1.18, "grad_norm": 0.551647961139679, "learning_rate": 0.0003999660672028095, "loss": 3.1424, "step": 23988 }, { "epoch": 1.18, "grad_norm": 0.5427066087722778, "learning_rate": 0.0003999515507880648, "loss": 3.0262, "step": 23989 }, { "epoch": 1.18, "grad_norm": 0.5327326059341431, "learning_rate": 0.0003999370341100669, "loss": 2.976, "step": 23990 }, { "epoch": 1.18, "grad_norm": 0.5397793650627136, "learning_rate": 0.0003999225171688541, "loss": 2.9345, "step": 23991 }, { "epoch": 1.18, "grad_norm": 0.5645992159843445, "learning_rate": 0.00039990799996446466, "loss": 2.9815, "step": 23992 }, { "epoch": 1.18, "grad_norm": 0.5220346450805664, "learning_rate": 0.00039989348249693657, "loss": 3.2895, "step": 23993 }, { "epoch": 1.18, "grad_norm": 0.5440986156463623, "learning_rate": 0.00039987896476630845, "loss": 2.9243, "step": 23994 }, { "epoch": 1.18, "grad_norm": 0.6222359538078308, "learning_rate": 0.00039986444677261835, "loss": 3.1331, "step": 23995 }, { "epoch": 1.18, "grad_norm": 0.5543932914733887, "learning_rate": 0.0003998499285159045, "loss": 3.0349, "step": 23996 }, { "epoch": 1.18, "grad_norm": 0.5490472316741943, "learning_rate": 0.0003998354099962051, "loss": 3.0956, "step": 23997 }, { "epoch": 1.18, "grad_norm": 0.5755308866500854, "learning_rate": 0.0003998208912135585, "loss": 3.0513, "step": 23998 }, { "epoch": 1.18, "grad_norm": 0.5586222410202026, "learning_rate": 0.0003998063721680029, "loss": 3.3977, "step": 23999 }, { "epoch": 1.18, "grad_norm": 0.5579812526702881, "learning_rate": 0.00039979185285957645, "loss": 2.8466, "step": 24000 }, { "epoch": 1.18, "grad_norm": 0.5372399091720581, "learning_rate": 0.0003997773332883176, "loss": 3.0433, "step": 24001 }, { "epoch": 1.18, "grad_norm": 0.5638691186904907, "learning_rate": 0.0003997628134542644, "loss": 2.8779, "step": 24002 }, { "epoch": 1.18, "grad_norm": 0.5711449384689331, "learning_rate": 0.0003997482933574551, "loss": 3.1942, "step": 24003 }, { "epoch": 1.18, "grad_norm": 0.5793060660362244, "learning_rate": 0.0003997337729979279, "loss": 2.9375, "step": 24004 }, { "epoch": 1.18, "grad_norm": 0.5681477189064026, "learning_rate": 0.00039971925237572127, "loss": 3.0421, "step": 24005 }, { "epoch": 1.18, "grad_norm": 0.5376327037811279, "learning_rate": 0.0003997047314908733, "loss": 3.0035, "step": 24006 }, { "epoch": 1.18, "grad_norm": 0.5498843789100647, "learning_rate": 0.00039969021034342233, "loss": 3.1599, "step": 24007 }, { "epoch": 1.18, "grad_norm": 0.5775700807571411, "learning_rate": 0.0003996756889334064, "loss": 2.8919, "step": 24008 }, { "epoch": 1.18, "grad_norm": 0.5586049556732178, "learning_rate": 0.00039966116726086395, "loss": 3.3297, "step": 24009 }, { "epoch": 1.18, "grad_norm": 0.5521610379219055, "learning_rate": 0.0003996466453258331, "loss": 2.9728, "step": 24010 }, { "epoch": 1.18, "grad_norm": 0.5638512969017029, "learning_rate": 0.0003996321231283523, "loss": 3.262, "step": 24011 }, { "epoch": 1.18, "grad_norm": 0.7236627340316772, "learning_rate": 0.0003996176006684596, "loss": 3.0747, "step": 24012 }, { "epoch": 1.18, "grad_norm": 0.555939793586731, "learning_rate": 0.0003996030779461933, "loss": 3.1563, "step": 24013 }, { "epoch": 1.18, "grad_norm": 0.5715100169181824, "learning_rate": 0.0003995885549615916, "loss": 3.1999, "step": 24014 }, { "epoch": 1.18, "grad_norm": 0.5524293780326843, "learning_rate": 0.00039957403171469286, "loss": 2.9745, "step": 24015 }, { "epoch": 1.18, "grad_norm": 0.545418381690979, "learning_rate": 0.0003995595082055354, "loss": 2.8698, "step": 24016 }, { "epoch": 1.18, "grad_norm": 0.5677865743637085, "learning_rate": 0.00039954498443415723, "loss": 2.9115, "step": 24017 }, { "epoch": 1.18, "grad_norm": 0.5774267911911011, "learning_rate": 0.0003995304604005967, "loss": 3.2582, "step": 24018 }, { "epoch": 1.18, "grad_norm": 0.5392197966575623, "learning_rate": 0.00039951593610489214, "loss": 3.1144, "step": 24019 }, { "epoch": 1.18, "grad_norm": 0.6039441823959351, "learning_rate": 0.00039950141154708176, "loss": 2.9836, "step": 24020 }, { "epoch": 1.18, "grad_norm": 0.5584951639175415, "learning_rate": 0.0003994868867272038, "loss": 3.0626, "step": 24021 }, { "epoch": 1.18, "grad_norm": 0.5053369402885437, "learning_rate": 0.0003994723616452965, "loss": 2.9783, "step": 24022 }, { "epoch": 1.18, "grad_norm": 0.5872316956520081, "learning_rate": 0.0003994578363013982, "loss": 3.0212, "step": 24023 }, { "epoch": 1.18, "grad_norm": 0.6137145161628723, "learning_rate": 0.0003994433106955471, "loss": 3.0148, "step": 24024 }, { "epoch": 1.18, "grad_norm": 0.8941653370857239, "learning_rate": 0.00039942878482778144, "loss": 2.8822, "step": 24025 }, { "epoch": 1.18, "grad_norm": 0.5612619519233704, "learning_rate": 0.00039941425869813947, "loss": 3.1952, "step": 24026 }, { "epoch": 1.18, "grad_norm": 0.553356409072876, "learning_rate": 0.0003993997323066596, "loss": 3.2152, "step": 24027 }, { "epoch": 1.18, "grad_norm": 0.5677022337913513, "learning_rate": 0.00039938520565337984, "loss": 3.0083, "step": 24028 }, { "epoch": 1.18, "grad_norm": 0.5404044985771179, "learning_rate": 0.0003993706787383385, "loss": 3.0666, "step": 24029 }, { "epoch": 1.18, "grad_norm": 0.5190367698669434, "learning_rate": 0.00039935615156157407, "loss": 3.021, "step": 24030 }, { "epoch": 1.18, "grad_norm": 0.5348750948905945, "learning_rate": 0.0003993416241231246, "loss": 3.0933, "step": 24031 }, { "epoch": 1.18, "grad_norm": 0.5640134811401367, "learning_rate": 0.0003993270964230284, "loss": 3.0944, "step": 24032 }, { "epoch": 1.18, "grad_norm": 0.571087658405304, "learning_rate": 0.0003993125684613238, "loss": 2.8386, "step": 24033 }, { "epoch": 1.18, "grad_norm": 0.5981314778327942, "learning_rate": 0.000399298040238049, "loss": 3.3129, "step": 24034 }, { "epoch": 1.18, "grad_norm": 0.5638099908828735, "learning_rate": 0.00039928351175324216, "loss": 3.1856, "step": 24035 }, { "epoch": 1.18, "grad_norm": 0.5106322169303894, "learning_rate": 0.00039926898300694173, "loss": 2.9335, "step": 24036 }, { "epoch": 1.18, "grad_norm": 0.545299232006073, "learning_rate": 0.0003992544539991859, "loss": 3.3001, "step": 24037 }, { "epoch": 1.18, "grad_norm": 0.5997403264045715, "learning_rate": 0.00039923992473001307, "loss": 3.2128, "step": 24038 }, { "epoch": 1.18, "grad_norm": 0.5346583127975464, "learning_rate": 0.0003992253951994612, "loss": 2.8758, "step": 24039 }, { "epoch": 1.18, "grad_norm": 0.560383141040802, "learning_rate": 0.0003992108654075687, "loss": 2.9834, "step": 24040 }, { "epoch": 1.18, "grad_norm": 0.5270285606384277, "learning_rate": 0.0003991963353543741, "loss": 3.0306, "step": 24041 }, { "epoch": 1.18, "grad_norm": 0.5616056323051453, "learning_rate": 0.00039918180503991524, "loss": 3.1098, "step": 24042 }, { "epoch": 1.18, "grad_norm": 0.5329706072807312, "learning_rate": 0.0003991672744642307, "loss": 3.1355, "step": 24043 }, { "epoch": 1.18, "grad_norm": 0.7826784253120422, "learning_rate": 0.0003991527436273586, "loss": 3.0251, "step": 24044 }, { "epoch": 1.18, "grad_norm": 0.5575398206710815, "learning_rate": 0.0003991382125293373, "loss": 3.0598, "step": 24045 }, { "epoch": 1.18, "grad_norm": 0.5318834185600281, "learning_rate": 0.0003991236811702049, "loss": 3.1486, "step": 24046 }, { "epoch": 1.18, "grad_norm": 0.5418036580085754, "learning_rate": 0.0003991091495499999, "loss": 2.9798, "step": 24047 }, { "epoch": 1.18, "grad_norm": 0.5505343675613403, "learning_rate": 0.0003990946176687605, "loss": 3.2627, "step": 24048 }, { "epoch": 1.18, "grad_norm": 0.5388018488883972, "learning_rate": 0.000399080085526525, "loss": 3.0686, "step": 24049 }, { "epoch": 1.18, "grad_norm": 0.6034098267555237, "learning_rate": 0.0003990655531233314, "loss": 3.0145, "step": 24050 }, { "epoch": 1.18, "grad_norm": 0.5737993717193604, "learning_rate": 0.0003990510204592184, "loss": 3.0209, "step": 24051 }, { "epoch": 1.18, "grad_norm": 0.7313047051429749, "learning_rate": 0.000399036487534224, "loss": 3.1884, "step": 24052 }, { "epoch": 1.18, "grad_norm": 0.5465160012245178, "learning_rate": 0.00039902195434838656, "loss": 3.2552, "step": 24053 }, { "epoch": 1.18, "grad_norm": 0.5565720200538635, "learning_rate": 0.00039900742090174434, "loss": 3.2226, "step": 24054 }, { "epoch": 1.18, "grad_norm": 0.525080144405365, "learning_rate": 0.0003989928871943356, "loss": 3.1065, "step": 24055 }, { "epoch": 1.18, "grad_norm": 0.5119069814682007, "learning_rate": 0.00039897835322619875, "loss": 3.3655, "step": 24056 }, { "epoch": 1.18, "grad_norm": 0.5813723206520081, "learning_rate": 0.00039896381899737194, "loss": 2.8918, "step": 24057 }, { "epoch": 1.18, "grad_norm": 0.5397539138793945, "learning_rate": 0.0003989492845078934, "loss": 3.1229, "step": 24058 }, { "epoch": 1.18, "grad_norm": 0.571702778339386, "learning_rate": 0.0003989347497578015, "loss": 3.1404, "step": 24059 }, { "epoch": 1.18, "grad_norm": 0.5537548065185547, "learning_rate": 0.00039892021474713455, "loss": 3.0636, "step": 24060 }, { "epoch": 1.18, "grad_norm": 0.5488508343696594, "learning_rate": 0.0003989056794759307, "loss": 3.1616, "step": 24061 }, { "epoch": 1.18, "grad_norm": 0.5703139901161194, "learning_rate": 0.00039889114394422846, "loss": 3.2048, "step": 24062 }, { "epoch": 1.18, "grad_norm": 0.5412448644638062, "learning_rate": 0.000398876608152066, "loss": 3.161, "step": 24063 }, { "epoch": 1.18, "grad_norm": 0.5422495007514954, "learning_rate": 0.0003988620720994815, "loss": 3.114, "step": 24064 }, { "epoch": 1.18, "grad_norm": 0.5373824834823608, "learning_rate": 0.00039884753578651327, "loss": 3.222, "step": 24065 }, { "epoch": 1.18, "grad_norm": 0.5398744344711304, "learning_rate": 0.0003988329992131997, "loss": 2.9718, "step": 24066 }, { "epoch": 1.18, "grad_norm": 0.5511226058006287, "learning_rate": 0.00039881846237957907, "loss": 3.0787, "step": 24067 }, { "epoch": 1.18, "grad_norm": 0.548173725605011, "learning_rate": 0.00039880392528568963, "loss": 2.9258, "step": 24068 }, { "epoch": 1.18, "grad_norm": 0.5512843728065491, "learning_rate": 0.0003987893879315696, "loss": 2.9631, "step": 24069 }, { "epoch": 1.18, "grad_norm": 0.58067387342453, "learning_rate": 0.0003987748503172574, "loss": 3.2054, "step": 24070 }, { "epoch": 1.18, "grad_norm": 0.5880147814750671, "learning_rate": 0.00039876031244279127, "loss": 3.1545, "step": 24071 }, { "epoch": 1.18, "grad_norm": 0.5471587777137756, "learning_rate": 0.0003987457743082094, "loss": 3.0934, "step": 24072 }, { "epoch": 1.18, "grad_norm": 0.5884271264076233, "learning_rate": 0.00039873123591355024, "loss": 3.1801, "step": 24073 }, { "epoch": 1.18, "grad_norm": 0.5782662630081177, "learning_rate": 0.00039871669725885197, "loss": 2.9995, "step": 24074 }, { "epoch": 1.18, "grad_norm": 0.5610849261283875, "learning_rate": 0.0003987021583441529, "loss": 2.916, "step": 24075 }, { "epoch": 1.18, "grad_norm": 0.5222210884094238, "learning_rate": 0.00039868761916949134, "loss": 3.0684, "step": 24076 }, { "epoch": 1.18, "grad_norm": 0.5449653267860413, "learning_rate": 0.0003986730797349056, "loss": 3.2768, "step": 24077 }, { "epoch": 1.18, "grad_norm": 0.5474014282226562, "learning_rate": 0.00039865854004043406, "loss": 3.3026, "step": 24078 }, { "epoch": 1.18, "grad_norm": 0.564146876335144, "learning_rate": 0.0003986440000861148, "loss": 3.0649, "step": 24079 }, { "epoch": 1.18, "grad_norm": 0.5155508518218994, "learning_rate": 0.00039862945987198626, "loss": 2.9618, "step": 24080 }, { "epoch": 1.18, "grad_norm": 0.5813475251197815, "learning_rate": 0.0003986149193980867, "loss": 3.1608, "step": 24081 }, { "epoch": 1.18, "grad_norm": 0.5683935284614563, "learning_rate": 0.0003986003786644544, "loss": 3.1103, "step": 24082 }, { "epoch": 1.18, "grad_norm": 1.1067701578140259, "learning_rate": 0.0003985858376711277, "loss": 3.224, "step": 24083 }, { "epoch": 1.18, "grad_norm": 0.5283617377281189, "learning_rate": 0.0003985712964181448, "loss": 3.2039, "step": 24084 }, { "epoch": 1.18, "grad_norm": 0.5957078337669373, "learning_rate": 0.00039855675490554416, "loss": 3.2682, "step": 24085 }, { "epoch": 1.18, "grad_norm": 0.549714207649231, "learning_rate": 0.0003985422131333639, "loss": 3.0082, "step": 24086 }, { "epoch": 1.18, "grad_norm": 0.5549113154411316, "learning_rate": 0.00039852767110164255, "loss": 3.0617, "step": 24087 }, { "epoch": 1.18, "grad_norm": 0.5381019115447998, "learning_rate": 0.0003985131288104182, "loss": 3.1637, "step": 24088 }, { "epoch": 1.18, "grad_norm": 0.5498014092445374, "learning_rate": 0.0003984985862597293, "loss": 3.1576, "step": 24089 }, { "epoch": 1.18, "grad_norm": 0.5414360761642456, "learning_rate": 0.000398484043449614, "loss": 3.2084, "step": 24090 }, { "epoch": 1.18, "grad_norm": 0.5614427924156189, "learning_rate": 0.0003984695003801107, "loss": 3.0681, "step": 24091 }, { "epoch": 1.18, "grad_norm": 0.5734679698944092, "learning_rate": 0.00039845495705125776, "loss": 3.0153, "step": 24092 }, { "epoch": 1.18, "grad_norm": 0.5509762763977051, "learning_rate": 0.00039844041346309337, "loss": 2.9354, "step": 24093 }, { "epoch": 1.18, "grad_norm": 0.5663211941719055, "learning_rate": 0.00039842586961565585, "loss": 3.1139, "step": 24094 }, { "epoch": 1.18, "grad_norm": 0.5385051965713501, "learning_rate": 0.0003984113255089835, "loss": 3.0458, "step": 24095 }, { "epoch": 1.18, "grad_norm": 0.543887197971344, "learning_rate": 0.0003983967811431147, "loss": 3.1119, "step": 24096 }, { "epoch": 1.18, "grad_norm": 0.5807494521141052, "learning_rate": 0.0003983822365180877, "loss": 3.1381, "step": 24097 }, { "epoch": 1.18, "grad_norm": 0.5525929927825928, "learning_rate": 0.0003983676916339408, "loss": 3.1725, "step": 24098 }, { "epoch": 1.18, "grad_norm": 0.5865118503570557, "learning_rate": 0.00039835314649071235, "loss": 3.1903, "step": 24099 }, { "epoch": 1.18, "grad_norm": 0.5506489276885986, "learning_rate": 0.0003983386010884408, "loss": 2.9352, "step": 24100 }, { "epoch": 1.18, "grad_norm": 0.561968207359314, "learning_rate": 0.00039832405542716403, "loss": 2.9563, "step": 24101 }, { "epoch": 1.18, "grad_norm": 0.5727139115333557, "learning_rate": 0.00039830950950692077, "loss": 3.2105, "step": 24102 }, { "epoch": 1.18, "grad_norm": 0.6121776103973389, "learning_rate": 0.0003982949633277492, "loss": 3.0155, "step": 24103 }, { "epoch": 1.18, "grad_norm": 0.5447525382041931, "learning_rate": 0.0003982804168896876, "loss": 3.2316, "step": 24104 }, { "epoch": 1.18, "grad_norm": 0.540285587310791, "learning_rate": 0.0003982658701927742, "loss": 3.2454, "step": 24105 }, { "epoch": 1.18, "grad_norm": 0.5771687030792236, "learning_rate": 0.00039825132323704744, "loss": 3.3046, "step": 24106 }, { "epoch": 1.18, "grad_norm": 0.5980179905891418, "learning_rate": 0.00039823677602254576, "loss": 3.088, "step": 24107 }, { "epoch": 1.18, "grad_norm": 0.5670889019966125, "learning_rate": 0.0003982222285493072, "loss": 3.3893, "step": 24108 }, { "epoch": 1.18, "grad_norm": 0.5559228658676147, "learning_rate": 0.0003982076808173701, "loss": 3.106, "step": 24109 }, { "epoch": 1.18, "grad_norm": 0.5641289353370667, "learning_rate": 0.000398193132826773, "loss": 3.0903, "step": 24110 }, { "epoch": 1.18, "grad_norm": 0.5894806981086731, "learning_rate": 0.000398178584577554, "loss": 3.1442, "step": 24111 }, { "epoch": 1.18, "grad_norm": 0.557578980922699, "learning_rate": 0.0003981640360697516, "loss": 2.9691, "step": 24112 }, { "epoch": 1.18, "grad_norm": 0.5747301578521729, "learning_rate": 0.000398149487303404, "loss": 2.961, "step": 24113 }, { "epoch": 1.18, "grad_norm": 0.5464869737625122, "learning_rate": 0.00039813493827854955, "loss": 3.1954, "step": 24114 }, { "epoch": 1.18, "grad_norm": 0.5992234945297241, "learning_rate": 0.0003981203889952265, "loss": 3.0543, "step": 24115 }, { "epoch": 1.18, "grad_norm": 0.5721291899681091, "learning_rate": 0.00039810583945347326, "loss": 3.1144, "step": 24116 }, { "epoch": 1.18, "grad_norm": 0.6171442270278931, "learning_rate": 0.000398091289653328, "loss": 3.1549, "step": 24117 }, { "epoch": 1.18, "grad_norm": 0.5718544125556946, "learning_rate": 0.00039807673959482936, "loss": 3.1521, "step": 24118 }, { "epoch": 1.18, "grad_norm": 0.5414995551109314, "learning_rate": 0.0003980621892780154, "loss": 3.1951, "step": 24119 }, { "epoch": 1.18, "grad_norm": 0.5666779279708862, "learning_rate": 0.00039804763870292443, "loss": 3.0715, "step": 24120 }, { "epoch": 1.18, "grad_norm": 0.5580045580863953, "learning_rate": 0.0003980330878695949, "loss": 3.2015, "step": 24121 }, { "epoch": 1.18, "grad_norm": 0.5530674457550049, "learning_rate": 0.00039801853677806507, "loss": 3.3237, "step": 24122 }, { "epoch": 1.18, "grad_norm": 0.5504000186920166, "learning_rate": 0.0003980039854283733, "loss": 3.0533, "step": 24123 }, { "epoch": 1.18, "grad_norm": 0.5976004600524902, "learning_rate": 0.0003979894338205579, "loss": 3.1773, "step": 24124 }, { "epoch": 1.18, "grad_norm": 0.5514919757843018, "learning_rate": 0.00039797488195465725, "loss": 3.1461, "step": 24125 }, { "epoch": 1.18, "grad_norm": 0.572727620601654, "learning_rate": 0.0003979603298307095, "loss": 3.2894, "step": 24126 }, { "epoch": 1.18, "grad_norm": 0.5436162948608398, "learning_rate": 0.00039794577744875313, "loss": 3.0208, "step": 24127 }, { "epoch": 1.18, "grad_norm": 0.5952852368354797, "learning_rate": 0.0003979312248088264, "loss": 3.1083, "step": 24128 }, { "epoch": 1.18, "grad_norm": 0.5643694400787354, "learning_rate": 0.00039791667191096773, "loss": 3.1304, "step": 24129 }, { "epoch": 1.18, "grad_norm": 0.55324786901474, "learning_rate": 0.0003979021187552154, "loss": 2.9863, "step": 24130 }, { "epoch": 1.18, "grad_norm": 0.6007174253463745, "learning_rate": 0.0003978875653416077, "loss": 2.975, "step": 24131 }, { "epoch": 1.18, "grad_norm": 0.5391753315925598, "learning_rate": 0.00039787301167018297, "loss": 2.843, "step": 24132 }, { "epoch": 1.18, "grad_norm": 0.5486133098602295, "learning_rate": 0.00039785845774097957, "loss": 2.8428, "step": 24133 }, { "epoch": 1.18, "grad_norm": 0.5005714297294617, "learning_rate": 0.00039784390355403584, "loss": 3.3195, "step": 24134 }, { "epoch": 1.18, "grad_norm": 0.5840128064155579, "learning_rate": 0.0003978293491093901, "loss": 3.1903, "step": 24135 }, { "epoch": 1.18, "grad_norm": 0.5641036033630371, "learning_rate": 0.00039781479440708067, "loss": 3.0371, "step": 24136 }, { "epoch": 1.18, "grad_norm": 0.5386980772018433, "learning_rate": 0.0003978002394471459, "loss": 3.1271, "step": 24137 }, { "epoch": 1.18, "grad_norm": 0.5703660249710083, "learning_rate": 0.00039778568422962413, "loss": 3.1415, "step": 24138 }, { "epoch": 1.18, "grad_norm": 0.5552119016647339, "learning_rate": 0.0003977711287545537, "loss": 3.0826, "step": 24139 }, { "epoch": 1.18, "grad_norm": 0.5309668779373169, "learning_rate": 0.00039775657302197295, "loss": 3.1237, "step": 24140 }, { "epoch": 1.18, "grad_norm": 0.5294352173805237, "learning_rate": 0.0003977420170319201, "loss": 3.1383, "step": 24141 }, { "epoch": 1.18, "grad_norm": 0.522890031337738, "learning_rate": 0.0003977274607844337, "loss": 3.101, "step": 24142 }, { "epoch": 1.18, "grad_norm": 0.6287959218025208, "learning_rate": 0.000397712904279552, "loss": 3.0709, "step": 24143 }, { "epoch": 1.18, "grad_norm": 0.5275660157203674, "learning_rate": 0.0003976983475173132, "loss": 3.1218, "step": 24144 }, { "epoch": 1.18, "grad_norm": 0.5518277287483215, "learning_rate": 0.0003976837904977558, "loss": 3.0445, "step": 24145 }, { "epoch": 1.18, "grad_norm": 0.5437619090080261, "learning_rate": 0.0003976692332209181, "loss": 3.0587, "step": 24146 }, { "epoch": 1.18, "grad_norm": 0.6017915606498718, "learning_rate": 0.00039765467568683846, "loss": 3.0058, "step": 24147 }, { "epoch": 1.18, "grad_norm": 0.5371351838111877, "learning_rate": 0.00039764011789555513, "loss": 2.9967, "step": 24148 }, { "epoch": 1.18, "grad_norm": 0.5650933980941772, "learning_rate": 0.0003976255598471066, "loss": 2.981, "step": 24149 }, { "epoch": 1.18, "grad_norm": 0.5488359332084656, "learning_rate": 0.00039761100154153103, "loss": 3.1748, "step": 24150 }, { "epoch": 1.18, "grad_norm": 0.5488278269767761, "learning_rate": 0.00039759644297886703, "loss": 2.9815, "step": 24151 }, { "epoch": 1.18, "grad_norm": 0.562656819820404, "learning_rate": 0.0003975818841591526, "loss": 3.2104, "step": 24152 }, { "epoch": 1.18, "grad_norm": 0.530597448348999, "learning_rate": 0.00039756732508242635, "loss": 3.1563, "step": 24153 }, { "epoch": 1.18, "grad_norm": 0.520431399345398, "learning_rate": 0.00039755276574872665, "loss": 3.0995, "step": 24154 }, { "epoch": 1.18, "grad_norm": 0.5567206740379333, "learning_rate": 0.0003975382061580916, "loss": 3.1326, "step": 24155 }, { "epoch": 1.18, "grad_norm": 0.5744289755821228, "learning_rate": 0.0003975236463105597, "loss": 3.183, "step": 24156 }, { "epoch": 1.18, "grad_norm": 0.5486968755722046, "learning_rate": 0.00039750908620616936, "loss": 3.2061, "step": 24157 }, { "epoch": 1.18, "grad_norm": 0.5852683186531067, "learning_rate": 0.00039749452584495875, "loss": 3.0303, "step": 24158 }, { "epoch": 1.18, "grad_norm": 0.5728834271430969, "learning_rate": 0.0003974799652269664, "loss": 3.0113, "step": 24159 }, { "epoch": 1.18, "grad_norm": 0.5454309582710266, "learning_rate": 0.0003974654043522306, "loss": 3.1408, "step": 24160 }, { "epoch": 1.18, "grad_norm": 0.5599636435508728, "learning_rate": 0.00039745084322078956, "loss": 3.2156, "step": 24161 }, { "epoch": 1.18, "grad_norm": 0.540389358997345, "learning_rate": 0.0003974362818326819, "loss": 3.0243, "step": 24162 }, { "epoch": 1.18, "grad_norm": 0.5856047868728638, "learning_rate": 0.0003974217201879457, "loss": 2.8006, "step": 24163 }, { "epoch": 1.18, "grad_norm": 0.5460880398750305, "learning_rate": 0.0003974071582866195, "loss": 3.036, "step": 24164 }, { "epoch": 1.18, "grad_norm": 0.5401052832603455, "learning_rate": 0.00039739259612874163, "loss": 3.0225, "step": 24165 }, { "epoch": 1.18, "grad_norm": 0.6093460917472839, "learning_rate": 0.0003973780337143504, "loss": 3.0323, "step": 24166 }, { "epoch": 1.18, "grad_norm": 0.5864385366439819, "learning_rate": 0.0003973634710434841, "loss": 3.0327, "step": 24167 }, { "epoch": 1.18, "grad_norm": 0.5695944428443909, "learning_rate": 0.00039734890811618116, "loss": 3.1903, "step": 24168 }, { "epoch": 1.18, "grad_norm": 0.5272200107574463, "learning_rate": 0.00039733434493248, "loss": 3.1964, "step": 24169 }, { "epoch": 1.18, "grad_norm": 0.5584858655929565, "learning_rate": 0.00039731978149241883, "loss": 2.8314, "step": 24170 }, { "epoch": 1.18, "grad_norm": 0.542620837688446, "learning_rate": 0.0003973052177960361, "loss": 3.1823, "step": 24171 }, { "epoch": 1.18, "grad_norm": 0.5688046813011169, "learning_rate": 0.00039729065384337025, "loss": 2.9456, "step": 24172 }, { "epoch": 1.18, "grad_norm": 0.5506514310836792, "learning_rate": 0.0003972760896344594, "loss": 3.1059, "step": 24173 }, { "epoch": 1.18, "grad_norm": 0.5505015254020691, "learning_rate": 0.00039726152516934214, "loss": 3.2437, "step": 24174 }, { "epoch": 1.18, "grad_norm": 0.5867189764976501, "learning_rate": 0.0003972469604480567, "loss": 2.8984, "step": 24175 }, { "epoch": 1.18, "grad_norm": 0.5614295601844788, "learning_rate": 0.00039723239547064155, "loss": 3.1634, "step": 24176 }, { "epoch": 1.18, "grad_norm": 0.5461905002593994, "learning_rate": 0.00039721783023713496, "loss": 3.1337, "step": 24177 }, { "epoch": 1.18, "grad_norm": 0.5388670563697815, "learning_rate": 0.0003972032647475752, "loss": 3.1324, "step": 24178 }, { "epoch": 1.18, "grad_norm": 0.5529229640960693, "learning_rate": 0.00039718869900200085, "loss": 3.2731, "step": 24179 }, { "epoch": 1.19, "grad_norm": 0.5644211173057556, "learning_rate": 0.0003971741330004502, "loss": 3.0189, "step": 24180 }, { "epoch": 1.19, "grad_norm": 0.5382275581359863, "learning_rate": 0.00039715956674296154, "loss": 3.1831, "step": 24181 }, { "epoch": 1.19, "grad_norm": 0.5577836036682129, "learning_rate": 0.00039714500022957326, "loss": 3.1967, "step": 24182 }, { "epoch": 1.19, "grad_norm": 0.5363103151321411, "learning_rate": 0.0003971304334603238, "loss": 2.9104, "step": 24183 }, { "epoch": 1.19, "grad_norm": 0.538547694683075, "learning_rate": 0.0003971158664352514, "loss": 3.1109, "step": 24184 }, { "epoch": 1.19, "grad_norm": 0.5949356555938721, "learning_rate": 0.0003971012991543945, "loss": 3.0122, "step": 24185 }, { "epoch": 1.19, "grad_norm": 0.5695586204528809, "learning_rate": 0.0003970867316177915, "loss": 3.1775, "step": 24186 }, { "epoch": 1.19, "grad_norm": 0.6203005313873291, "learning_rate": 0.0003970721638254808, "loss": 3.1296, "step": 24187 }, { "epoch": 1.19, "grad_norm": 0.5122324228286743, "learning_rate": 0.0003970575957775006, "loss": 3.079, "step": 24188 }, { "epoch": 1.19, "grad_norm": 0.5441964864730835, "learning_rate": 0.0003970430274738894, "loss": 3.109, "step": 24189 }, { "epoch": 1.19, "grad_norm": 0.5421319007873535, "learning_rate": 0.00039702845891468556, "loss": 3.1183, "step": 24190 }, { "epoch": 1.19, "grad_norm": 0.5920052528381348, "learning_rate": 0.0003970138900999275, "loss": 3.0434, "step": 24191 }, { "epoch": 1.19, "grad_norm": 0.5872803926467896, "learning_rate": 0.0003969993210296535, "loss": 2.8057, "step": 24192 }, { "epoch": 1.19, "grad_norm": 0.557303786277771, "learning_rate": 0.0003969847517039018, "loss": 3.0588, "step": 24193 }, { "epoch": 1.19, "grad_norm": 0.5577976107597351, "learning_rate": 0.0003969701821227111, "loss": 3.2499, "step": 24194 }, { "epoch": 1.19, "grad_norm": 0.5627845525741577, "learning_rate": 0.00039695561228611953, "loss": 3.059, "step": 24195 }, { "epoch": 1.19, "grad_norm": 0.607268750667572, "learning_rate": 0.0003969410421941655, "loss": 3.0055, "step": 24196 }, { "epoch": 1.19, "grad_norm": 0.5258342027664185, "learning_rate": 0.0003969264718468875, "loss": 2.8618, "step": 24197 }, { "epoch": 1.19, "grad_norm": 0.5296414494514465, "learning_rate": 0.0003969119012443238, "loss": 3.2552, "step": 24198 }, { "epoch": 1.19, "grad_norm": 0.5506784915924072, "learning_rate": 0.00039689733038651275, "loss": 2.9617, "step": 24199 }, { "epoch": 1.19, "grad_norm": 0.5685448050498962, "learning_rate": 0.00039688275927349286, "loss": 3.12, "step": 24200 }, { "epoch": 1.19, "grad_norm": 0.5335084199905396, "learning_rate": 0.0003968681879053024, "loss": 3.0428, "step": 24201 }, { "epoch": 1.19, "grad_norm": 0.6116160750389099, "learning_rate": 0.0003968536162819798, "loss": 2.9557, "step": 24202 }, { "epoch": 1.19, "grad_norm": 0.5441951751708984, "learning_rate": 0.00039683904440356336, "loss": 3.272, "step": 24203 }, { "epoch": 1.19, "grad_norm": 0.5496557950973511, "learning_rate": 0.00039682447227009146, "loss": 3.1211, "step": 24204 }, { "epoch": 1.19, "grad_norm": 0.6027690768241882, "learning_rate": 0.00039680989988160275, "loss": 3.2041, "step": 24205 }, { "epoch": 1.19, "grad_norm": 0.7103535532951355, "learning_rate": 0.00039679532723813516, "loss": 3.384, "step": 24206 }, { "epoch": 1.19, "grad_norm": 0.570144772529602, "learning_rate": 0.00039678075433972746, "loss": 3.0117, "step": 24207 }, { "epoch": 1.19, "grad_norm": 0.5552254915237427, "learning_rate": 0.0003967661811864178, "loss": 3.0736, "step": 24208 }, { "epoch": 1.19, "grad_norm": 0.5385003685951233, "learning_rate": 0.00039675160777824465, "loss": 3.0785, "step": 24209 }, { "epoch": 1.19, "grad_norm": 0.5606467127799988, "learning_rate": 0.00039673703411524647, "loss": 3.0978, "step": 24210 }, { "epoch": 1.19, "grad_norm": 0.5405035614967346, "learning_rate": 0.0003967224601974615, "loss": 3.1803, "step": 24211 }, { "epoch": 1.19, "grad_norm": 0.5441907048225403, "learning_rate": 0.0003967078860249281, "loss": 3.0588, "step": 24212 }, { "epoch": 1.19, "grad_norm": 0.5538133382797241, "learning_rate": 0.0003966933115976849, "loss": 3.1926, "step": 24213 }, { "epoch": 1.19, "grad_norm": 0.528517484664917, "learning_rate": 0.00039667873691576997, "loss": 3.4385, "step": 24214 }, { "epoch": 1.19, "grad_norm": 0.5590735077857971, "learning_rate": 0.0003966641619792219, "loss": 3.067, "step": 24215 }, { "epoch": 1.19, "grad_norm": 0.591706395149231, "learning_rate": 0.0003966495867880791, "loss": 3.0503, "step": 24216 }, { "epoch": 1.19, "grad_norm": 0.5574644207954407, "learning_rate": 0.00039663501134237983, "loss": 3.0304, "step": 24217 }, { "epoch": 1.19, "grad_norm": 0.5569961667060852, "learning_rate": 0.00039662043564216257, "loss": 2.9025, "step": 24218 }, { "epoch": 1.19, "grad_norm": 0.5450246930122375, "learning_rate": 0.00039660585968746554, "loss": 3.1877, "step": 24219 }, { "epoch": 1.19, "grad_norm": 0.562933623790741, "learning_rate": 0.00039659128347832747, "loss": 3.0375, "step": 24220 }, { "epoch": 1.19, "grad_norm": 0.5538458824157715, "learning_rate": 0.0003965767070147865, "loss": 3.0816, "step": 24221 }, { "epoch": 1.19, "grad_norm": 0.60169917345047, "learning_rate": 0.000396562130296881, "loss": 3.1415, "step": 24222 }, { "epoch": 1.19, "grad_norm": 0.5521277785301208, "learning_rate": 0.00039654755332464947, "loss": 3.0604, "step": 24223 }, { "epoch": 1.19, "grad_norm": 0.5698322653770447, "learning_rate": 0.0003965329760981303, "loss": 3.283, "step": 24224 }, { "epoch": 1.19, "grad_norm": 0.5327075123786926, "learning_rate": 0.0003965183986173618, "loss": 3.0547, "step": 24225 }, { "epoch": 1.19, "grad_norm": 0.5273543000221252, "learning_rate": 0.0003965038208823824, "loss": 3.2407, "step": 24226 }, { "epoch": 1.19, "grad_norm": 0.7036231756210327, "learning_rate": 0.00039648924289323057, "loss": 3.1334, "step": 24227 }, { "epoch": 1.19, "grad_norm": 0.5902255773544312, "learning_rate": 0.00039647466464994463, "loss": 2.9559, "step": 24228 }, { "epoch": 1.19, "grad_norm": 0.5623027086257935, "learning_rate": 0.0003964600861525629, "loss": 2.899, "step": 24229 }, { "epoch": 1.19, "grad_norm": 0.5198655128479004, "learning_rate": 0.00039644550740112393, "loss": 3.029, "step": 24230 }, { "epoch": 1.19, "grad_norm": 0.5730830430984497, "learning_rate": 0.00039643092839566614, "loss": 2.9395, "step": 24231 }, { "epoch": 1.19, "grad_norm": 0.5397255420684814, "learning_rate": 0.00039641634913622776, "loss": 3.1824, "step": 24232 }, { "epoch": 1.19, "grad_norm": 0.5603347420692444, "learning_rate": 0.0003964017696228473, "loss": 3.0076, "step": 24233 }, { "epoch": 1.19, "grad_norm": 0.556710422039032, "learning_rate": 0.00039638718985556307, "loss": 3.1231, "step": 24234 }, { "epoch": 1.19, "grad_norm": 0.5215965509414673, "learning_rate": 0.0003963726098344136, "loss": 3.0744, "step": 24235 }, { "epoch": 1.19, "grad_norm": 0.5522467494010925, "learning_rate": 0.0003963580295594372, "loss": 3.2905, "step": 24236 }, { "epoch": 1.19, "grad_norm": 0.5642509460449219, "learning_rate": 0.0003963434490306722, "loss": 3.1552, "step": 24237 }, { "epoch": 1.19, "grad_norm": 0.5451124310493469, "learning_rate": 0.0003963288682481573, "loss": 3.0713, "step": 24238 }, { "epoch": 1.19, "grad_norm": 0.5440728664398193, "learning_rate": 0.00039631428721193055, "loss": 3.1808, "step": 24239 }, { "epoch": 1.19, "grad_norm": 0.5344854593276978, "learning_rate": 0.00039629970592203043, "loss": 3.118, "step": 24240 }, { "epoch": 1.19, "grad_norm": 0.5784305930137634, "learning_rate": 0.00039628512437849553, "loss": 3.2412, "step": 24241 }, { "epoch": 1.19, "grad_norm": 0.5519080758094788, "learning_rate": 0.00039627054258136417, "loss": 3.3491, "step": 24242 }, { "epoch": 1.19, "grad_norm": 0.5608418583869934, "learning_rate": 0.0003962559605306747, "loss": 3.0723, "step": 24243 }, { "epoch": 1.19, "grad_norm": 0.5935826897621155, "learning_rate": 0.0003962413782264656, "loss": 3.1175, "step": 24244 }, { "epoch": 1.19, "grad_norm": 0.5591660141944885, "learning_rate": 0.0003962267956687752, "loss": 3.0419, "step": 24245 }, { "epoch": 1.19, "grad_norm": 0.6037750244140625, "learning_rate": 0.0003962122128576419, "loss": 3.0272, "step": 24246 }, { "epoch": 1.19, "grad_norm": 0.5254233479499817, "learning_rate": 0.00039619762979310416, "loss": 3.1549, "step": 24247 }, { "epoch": 1.19, "grad_norm": 0.5338966846466064, "learning_rate": 0.0003961830464752004, "loss": 3.102, "step": 24248 }, { "epoch": 1.19, "grad_norm": 0.5736029744148254, "learning_rate": 0.000396168462903969, "loss": 3.2333, "step": 24249 }, { "epoch": 1.19, "grad_norm": 0.5698594450950623, "learning_rate": 0.00039615387907944834, "loss": 3.2735, "step": 24250 }, { "epoch": 1.19, "grad_norm": 0.5339264869689941, "learning_rate": 0.0003961392950016769, "loss": 2.9759, "step": 24251 }, { "epoch": 1.19, "grad_norm": 0.5492169857025146, "learning_rate": 0.000396124710670693, "loss": 3.0123, "step": 24252 }, { "epoch": 1.19, "grad_norm": 0.5674329996109009, "learning_rate": 0.00039611012608653523, "loss": 3.2794, "step": 24253 }, { "epoch": 1.19, "grad_norm": 0.5592032670974731, "learning_rate": 0.00039609554124924183, "loss": 3.1204, "step": 24254 }, { "epoch": 1.19, "grad_norm": 0.5712992548942566, "learning_rate": 0.0003960809561588512, "loss": 2.8761, "step": 24255 }, { "epoch": 1.19, "grad_norm": 0.5755642652511597, "learning_rate": 0.0003960663708154019, "loss": 2.9666, "step": 24256 }, { "epoch": 1.19, "grad_norm": 0.5444374084472656, "learning_rate": 0.0003960517852189323, "loss": 3.0622, "step": 24257 }, { "epoch": 1.19, "grad_norm": 0.5554080605506897, "learning_rate": 0.00039603719936948074, "loss": 3.2019, "step": 24258 }, { "epoch": 1.19, "grad_norm": 0.6017959713935852, "learning_rate": 0.0003960226132670857, "loss": 2.9427, "step": 24259 }, { "epoch": 1.19, "grad_norm": 0.5440652966499329, "learning_rate": 0.00039600802691178556, "loss": 3.1756, "step": 24260 }, { "epoch": 1.19, "grad_norm": 0.5530151128768921, "learning_rate": 0.0003959934403036187, "loss": 2.9358, "step": 24261 }, { "epoch": 1.19, "grad_norm": 0.5590417385101318, "learning_rate": 0.00039597885344262366, "loss": 3.0388, "step": 24262 }, { "epoch": 1.19, "grad_norm": 0.5701658725738525, "learning_rate": 0.0003959642663288387, "loss": 3.0352, "step": 24263 }, { "epoch": 1.19, "grad_norm": 0.5556230545043945, "learning_rate": 0.0003959496789623025, "loss": 3.1208, "step": 24264 }, { "epoch": 1.19, "grad_norm": 0.5601057410240173, "learning_rate": 0.00039593509134305317, "loss": 3.0421, "step": 24265 }, { "epoch": 1.19, "grad_norm": 0.5562484860420227, "learning_rate": 0.00039592050347112933, "loss": 3.1872, "step": 24266 }, { "epoch": 1.19, "grad_norm": 0.5837891101837158, "learning_rate": 0.00039590591534656937, "loss": 3.0225, "step": 24267 }, { "epoch": 1.19, "grad_norm": 0.5743361115455627, "learning_rate": 0.0003958913269694116, "loss": 3.1041, "step": 24268 }, { "epoch": 1.19, "grad_norm": 0.6154342889785767, "learning_rate": 0.00039587673833969464, "loss": 2.9143, "step": 24269 }, { "epoch": 1.19, "grad_norm": 0.5403562188148499, "learning_rate": 0.00039586214945745664, "loss": 2.9925, "step": 24270 }, { "epoch": 1.19, "grad_norm": 0.5781658291816711, "learning_rate": 0.00039584756032273635, "loss": 3.1171, "step": 24271 }, { "epoch": 1.19, "grad_norm": 0.5835286378860474, "learning_rate": 0.00039583297093557195, "loss": 3.1155, "step": 24272 }, { "epoch": 1.19, "grad_norm": 0.5412124395370483, "learning_rate": 0.00039581838129600197, "loss": 3.056, "step": 24273 }, { "epoch": 1.19, "grad_norm": 0.5261394381523132, "learning_rate": 0.00039580379140406475, "loss": 3.0973, "step": 24274 }, { "epoch": 1.19, "grad_norm": 0.5671351552009583, "learning_rate": 0.00039578920125979893, "loss": 3.0282, "step": 24275 }, { "epoch": 1.19, "grad_norm": 0.5514994263648987, "learning_rate": 0.0003957746108632427, "loss": 3.1963, "step": 24276 }, { "epoch": 1.19, "grad_norm": 0.576802134513855, "learning_rate": 0.00039576002021443456, "loss": 3.0313, "step": 24277 }, { "epoch": 1.19, "grad_norm": 0.5313069224357605, "learning_rate": 0.00039574542931341297, "loss": 3.2151, "step": 24278 }, { "epoch": 1.19, "grad_norm": 0.536454439163208, "learning_rate": 0.00039573083816021637, "loss": 3.1187, "step": 24279 }, { "epoch": 1.19, "grad_norm": 0.5508918762207031, "learning_rate": 0.00039571624675488313, "loss": 2.9256, "step": 24280 }, { "epoch": 1.19, "grad_norm": 0.5629120469093323, "learning_rate": 0.0003957016550974517, "loss": 3.0875, "step": 24281 }, { "epoch": 1.19, "grad_norm": 0.5517382025718689, "learning_rate": 0.0003956870631879606, "loss": 2.9645, "step": 24282 }, { "epoch": 1.19, "grad_norm": 0.5510847568511963, "learning_rate": 0.00039567247102644817, "loss": 3.0028, "step": 24283 }, { "epoch": 1.19, "grad_norm": 0.5683895945549011, "learning_rate": 0.0003956578786129528, "loss": 3.2715, "step": 24284 }, { "epoch": 1.19, "grad_norm": 0.5647633671760559, "learning_rate": 0.00039564328594751306, "loss": 2.9625, "step": 24285 }, { "epoch": 1.19, "grad_norm": 0.5621564388275146, "learning_rate": 0.00039562869303016724, "loss": 3.2011, "step": 24286 }, { "epoch": 1.19, "grad_norm": 0.5985468029975891, "learning_rate": 0.0003956140998609539, "loss": 3.3408, "step": 24287 }, { "epoch": 1.19, "grad_norm": 0.5399235486984253, "learning_rate": 0.00039559950643991144, "loss": 3.1029, "step": 24288 }, { "epoch": 1.19, "grad_norm": 0.5459802150726318, "learning_rate": 0.0003955849127670783, "loss": 2.9857, "step": 24289 }, { "epoch": 1.19, "grad_norm": 0.5875890254974365, "learning_rate": 0.00039557031884249286, "loss": 2.938, "step": 24290 }, { "epoch": 1.19, "grad_norm": 0.5480020642280579, "learning_rate": 0.0003955557246661935, "loss": 3.3471, "step": 24291 }, { "epoch": 1.19, "grad_norm": 0.5252436995506287, "learning_rate": 0.0003955411302382189, "loss": 3.2947, "step": 24292 }, { "epoch": 1.19, "grad_norm": 0.5304093956947327, "learning_rate": 0.0003955265355586073, "loss": 3.3092, "step": 24293 }, { "epoch": 1.19, "grad_norm": 0.5480414628982544, "learning_rate": 0.00039551194062739713, "loss": 3.0931, "step": 24294 }, { "epoch": 1.19, "grad_norm": 0.5469361543655396, "learning_rate": 0.0003954973454446269, "loss": 3.1805, "step": 24295 }, { "epoch": 1.19, "grad_norm": 0.5798367857933044, "learning_rate": 0.0003954827500103351, "loss": 3.0148, "step": 24296 }, { "epoch": 1.19, "grad_norm": 0.5936307907104492, "learning_rate": 0.0003954681543245601, "loss": 2.8564, "step": 24297 }, { "epoch": 1.19, "grad_norm": 0.5227268934249878, "learning_rate": 0.0003954535583873403, "loss": 3.0408, "step": 24298 }, { "epoch": 1.19, "grad_norm": 0.5441100001335144, "learning_rate": 0.0003954389621987143, "loss": 3.0124, "step": 24299 }, { "epoch": 1.19, "grad_norm": 0.5319622159004211, "learning_rate": 0.0003954243657587203, "loss": 2.8448, "step": 24300 }, { "epoch": 1.19, "grad_norm": 0.640273928642273, "learning_rate": 0.000395409769067397, "loss": 3.1077, "step": 24301 }, { "epoch": 1.19, "grad_norm": 0.5414076447486877, "learning_rate": 0.0003953951721247826, "loss": 3.1144, "step": 24302 }, { "epoch": 1.19, "grad_norm": 0.5242058038711548, "learning_rate": 0.00039538057493091584, "loss": 2.9945, "step": 24303 }, { "epoch": 1.19, "grad_norm": 0.5769972205162048, "learning_rate": 0.00039536597748583496, "loss": 2.9874, "step": 24304 }, { "epoch": 1.19, "grad_norm": 0.5243396759033203, "learning_rate": 0.0003953513797895784, "loss": 3.0878, "step": 24305 }, { "epoch": 1.19, "grad_norm": 0.5818905234336853, "learning_rate": 0.0003953367818421847, "loss": 3.1227, "step": 24306 }, { "epoch": 1.19, "grad_norm": 0.5517255067825317, "learning_rate": 0.00039532218364369225, "loss": 3.006, "step": 24307 }, { "epoch": 1.19, "grad_norm": 0.5664587020874023, "learning_rate": 0.0003953075851941395, "loss": 3.0768, "step": 24308 }, { "epoch": 1.19, "grad_norm": 0.547131359577179, "learning_rate": 0.0003952929864935649, "loss": 3.054, "step": 24309 }, { "epoch": 1.19, "grad_norm": 0.5453808307647705, "learning_rate": 0.00039527838754200694, "loss": 3.1298, "step": 24310 }, { "epoch": 1.19, "grad_norm": 0.549038827419281, "learning_rate": 0.00039526378833950406, "loss": 2.9323, "step": 24311 }, { "epoch": 1.19, "grad_norm": 0.5720995664596558, "learning_rate": 0.00039524918888609467, "loss": 3.0093, "step": 24312 }, { "epoch": 1.19, "grad_norm": 0.5207635164260864, "learning_rate": 0.0003952345891818172, "loss": 2.853, "step": 24313 }, { "epoch": 1.19, "grad_norm": 0.5525292754173279, "learning_rate": 0.0003952199892267102, "loss": 3.2314, "step": 24314 }, { "epoch": 1.19, "grad_norm": 0.5358814001083374, "learning_rate": 0.0003952053890208121, "loss": 2.8513, "step": 24315 }, { "epoch": 1.19, "grad_norm": 0.5728432536125183, "learning_rate": 0.00039519078856416136, "loss": 3.1612, "step": 24316 }, { "epoch": 1.19, "grad_norm": 0.5785890817642212, "learning_rate": 0.0003951761878567962, "loss": 3.1774, "step": 24317 }, { "epoch": 1.19, "grad_norm": 0.6545047760009766, "learning_rate": 0.00039516158689875555, "loss": 2.9229, "step": 24318 }, { "epoch": 1.19, "grad_norm": 0.5325422883033752, "learning_rate": 0.0003951469856900775, "loss": 2.9676, "step": 24319 }, { "epoch": 1.19, "grad_norm": 0.5706045627593994, "learning_rate": 0.0003951323842308005, "loss": 3.0287, "step": 24320 }, { "epoch": 1.19, "grad_norm": 0.5675446391105652, "learning_rate": 0.00039511778252096323, "loss": 3.1198, "step": 24321 }, { "epoch": 1.19, "grad_norm": 0.5522854924201965, "learning_rate": 0.00039510318056060396, "loss": 2.9583, "step": 24322 }, { "epoch": 1.19, "grad_norm": 0.5343487858772278, "learning_rate": 0.00039508857834976124, "loss": 3.3867, "step": 24323 }, { "epoch": 1.19, "grad_norm": 0.5691496133804321, "learning_rate": 0.00039507397588847346, "loss": 3.257, "step": 24324 }, { "epoch": 1.19, "grad_norm": 0.5750447511672974, "learning_rate": 0.00039505937317677915, "loss": 2.8305, "step": 24325 }, { "epoch": 1.19, "grad_norm": 0.6205912828445435, "learning_rate": 0.0003950447702147168, "loss": 2.6935, "step": 24326 }, { "epoch": 1.19, "grad_norm": 0.5219376087188721, "learning_rate": 0.0003950301670023247, "loss": 3.142, "step": 24327 }, { "epoch": 1.19, "grad_norm": 0.5368987917900085, "learning_rate": 0.00039501556353964146, "loss": 3.1002, "step": 24328 }, { "epoch": 1.19, "grad_norm": 0.5906136631965637, "learning_rate": 0.0003950009598267057, "loss": 2.9932, "step": 24329 }, { "epoch": 1.19, "grad_norm": 0.5572717785835266, "learning_rate": 0.0003949863558635555, "loss": 3.0393, "step": 24330 }, { "epoch": 1.19, "grad_norm": 0.5366165041923523, "learning_rate": 0.00039497175165022956, "loss": 2.838, "step": 24331 }, { "epoch": 1.19, "grad_norm": 0.5345458388328552, "learning_rate": 0.0003949571471867663, "loss": 2.8754, "step": 24332 }, { "epoch": 1.19, "grad_norm": 0.5713475942611694, "learning_rate": 0.00039494254247320423, "loss": 2.9894, "step": 24333 }, { "epoch": 1.19, "grad_norm": 0.5291685461997986, "learning_rate": 0.00039492793750958173, "loss": 3.1748, "step": 24334 }, { "epoch": 1.19, "grad_norm": 0.5592206120491028, "learning_rate": 0.0003949133322959373, "loss": 3.2341, "step": 24335 }, { "epoch": 1.19, "grad_norm": 0.5599371194839478, "learning_rate": 0.0003948987268323094, "loss": 3.2312, "step": 24336 }, { "epoch": 1.19, "grad_norm": 0.5585187077522278, "learning_rate": 0.0003948841211187366, "loss": 3.0695, "step": 24337 }, { "epoch": 1.19, "grad_norm": 0.5566004514694214, "learning_rate": 0.0003948695151552572, "loss": 3.202, "step": 24338 }, { "epoch": 1.19, "grad_norm": 0.5448996424674988, "learning_rate": 0.0003948549089419098, "loss": 3.0852, "step": 24339 }, { "epoch": 1.19, "grad_norm": 0.5520145893096924, "learning_rate": 0.0003948403024787329, "loss": 3.1257, "step": 24340 }, { "epoch": 1.19, "grad_norm": 0.5336360931396484, "learning_rate": 0.00039482569576576484, "loss": 3.1598, "step": 24341 }, { "epoch": 1.19, "grad_norm": 0.5652227997779846, "learning_rate": 0.00039481108880304403, "loss": 3.0888, "step": 24342 }, { "epoch": 1.19, "grad_norm": 0.5527975559234619, "learning_rate": 0.0003947964815906091, "loss": 3.1309, "step": 24343 }, { "epoch": 1.19, "grad_norm": 0.5577079057693481, "learning_rate": 0.0003947818741284987, "loss": 3.1479, "step": 24344 }, { "epoch": 1.19, "grad_norm": 0.5621340274810791, "learning_rate": 0.0003947672664167509, "loss": 3.0016, "step": 24345 }, { "epoch": 1.19, "grad_norm": 0.5487366914749146, "learning_rate": 0.0003947526584554044, "loss": 3.0614, "step": 24346 }, { "epoch": 1.19, "grad_norm": 0.5261529088020325, "learning_rate": 0.0003947380502444976, "loss": 3.2059, "step": 24347 }, { "epoch": 1.19, "grad_norm": 0.5683314204216003, "learning_rate": 0.000394723441784069, "loss": 3.2642, "step": 24348 }, { "epoch": 1.19, "grad_norm": 0.5974188446998596, "learning_rate": 0.0003947088330741571, "loss": 3.0305, "step": 24349 }, { "epoch": 1.19, "grad_norm": 0.5211678147315979, "learning_rate": 0.0003946942241148004, "loss": 3.1625, "step": 24350 }, { "epoch": 1.19, "grad_norm": 0.553632915019989, "learning_rate": 0.0003946796149060373, "loss": 3.1093, "step": 24351 }, { "epoch": 1.19, "grad_norm": 0.5715633034706116, "learning_rate": 0.00039466500544790633, "loss": 3.187, "step": 24352 }, { "epoch": 1.19, "grad_norm": 0.5198462009429932, "learning_rate": 0.00039465039574044597, "loss": 3.118, "step": 24353 }, { "epoch": 1.19, "grad_norm": 0.5220559239387512, "learning_rate": 0.00039463578578369463, "loss": 3.1978, "step": 24354 }, { "epoch": 1.19, "grad_norm": 0.5109367370605469, "learning_rate": 0.00039462117557769095, "loss": 3.1694, "step": 24355 }, { "epoch": 1.19, "grad_norm": 0.5752742886543274, "learning_rate": 0.0003946065651224732, "loss": 3.0348, "step": 24356 }, { "epoch": 1.19, "grad_norm": 0.5721966028213501, "learning_rate": 0.00039459195441807996, "loss": 3.1092, "step": 24357 }, { "epoch": 1.19, "grad_norm": 0.5311843156814575, "learning_rate": 0.0003945773434645498, "loss": 3.1743, "step": 24358 }, { "epoch": 1.19, "grad_norm": 0.5497158765792847, "learning_rate": 0.00039456273226192107, "loss": 2.7434, "step": 24359 }, { "epoch": 1.19, "grad_norm": 0.5891440510749817, "learning_rate": 0.0003945481208102323, "loss": 3.17, "step": 24360 }, { "epoch": 1.19, "grad_norm": 0.5738333463668823, "learning_rate": 0.0003945335091095219, "loss": 3.0605, "step": 24361 }, { "epoch": 1.19, "grad_norm": 0.5178869366645813, "learning_rate": 0.0003945188971598285, "loss": 3.113, "step": 24362 }, { "epoch": 1.19, "grad_norm": 0.5586382746696472, "learning_rate": 0.00039450428496119055, "loss": 3.1943, "step": 24363 }, { "epoch": 1.19, "grad_norm": 0.5722424983978271, "learning_rate": 0.0003944896725136464, "loss": 2.919, "step": 24364 }, { "epoch": 1.19, "grad_norm": 0.547747015953064, "learning_rate": 0.0003944750598172348, "loss": 3.0477, "step": 24365 }, { "epoch": 1.19, "grad_norm": 0.5872486233711243, "learning_rate": 0.000394460446871994, "loss": 3.2328, "step": 24366 }, { "epoch": 1.19, "grad_norm": 0.5272241234779358, "learning_rate": 0.0003944458336779626, "loss": 3.1448, "step": 24367 }, { "epoch": 1.19, "grad_norm": 0.5975600481033325, "learning_rate": 0.00039443122023517894, "loss": 3.0531, "step": 24368 }, { "epoch": 1.19, "grad_norm": 0.5917435884475708, "learning_rate": 0.0003944166065436817, "loss": 3.1291, "step": 24369 }, { "epoch": 1.19, "grad_norm": 0.5495269894599915, "learning_rate": 0.00039440199260350927, "loss": 3.056, "step": 24370 }, { "epoch": 1.19, "grad_norm": 0.5593695640563965, "learning_rate": 0.00039438737841470013, "loss": 2.9955, "step": 24371 }, { "epoch": 1.19, "grad_norm": 0.5663504600524902, "learning_rate": 0.0003943727639772928, "loss": 3.0223, "step": 24372 }, { "epoch": 1.19, "grad_norm": 0.5404443144798279, "learning_rate": 0.00039435814929132586, "loss": 3.2137, "step": 24373 }, { "epoch": 1.19, "grad_norm": 0.52837735414505, "learning_rate": 0.00039434353435683766, "loss": 3.2715, "step": 24374 }, { "epoch": 1.19, "grad_norm": 0.5688294768333435, "learning_rate": 0.0003943289191738667, "loss": 3.1562, "step": 24375 }, { "epoch": 1.19, "grad_norm": 0.5669829249382019, "learning_rate": 0.00039431430374245154, "loss": 3.2254, "step": 24376 }, { "epoch": 1.19, "grad_norm": 0.5809857249259949, "learning_rate": 0.00039429968806263077, "loss": 3.1399, "step": 24377 }, { "epoch": 1.19, "grad_norm": 0.5638022422790527, "learning_rate": 0.00039428507213444254, "loss": 3.0601, "step": 24378 }, { "epoch": 1.19, "grad_norm": 0.5520922541618347, "learning_rate": 0.0003942704559579257, "loss": 3.0555, "step": 24379 }, { "epoch": 1.19, "grad_norm": 0.5883176922798157, "learning_rate": 0.0003942558395331188, "loss": 3.0359, "step": 24380 }, { "epoch": 1.19, "grad_norm": 0.5609521269798279, "learning_rate": 0.0003942412228600599, "loss": 3.101, "step": 24381 }, { "epoch": 1.19, "grad_norm": 0.5642240047454834, "learning_rate": 0.0003942266059387879, "loss": 2.9432, "step": 24382 }, { "epoch": 1.19, "grad_norm": 0.5660053491592407, "learning_rate": 0.000394211988769341, "loss": 3.0687, "step": 24383 }, { "epoch": 1.2, "grad_norm": 0.5571756362915039, "learning_rate": 0.00039419737135175804, "loss": 2.9455, "step": 24384 }, { "epoch": 1.2, "grad_norm": 0.6327832937240601, "learning_rate": 0.0003941827536860773, "loss": 3.3466, "step": 24385 }, { "epoch": 1.2, "grad_norm": 0.5416423082351685, "learning_rate": 0.0003941681357723372, "loss": 3.1423, "step": 24386 }, { "epoch": 1.2, "grad_norm": 0.56650310754776, "learning_rate": 0.0003941535176105764, "loss": 3.2203, "step": 24387 }, { "epoch": 1.2, "grad_norm": 0.516886830329895, "learning_rate": 0.00039413889920083335, "loss": 2.9785, "step": 24388 }, { "epoch": 1.2, "grad_norm": 0.5625539422035217, "learning_rate": 0.0003941242805431466, "loss": 3.0304, "step": 24389 }, { "epoch": 1.2, "grad_norm": 0.5716773867607117, "learning_rate": 0.0003941096616375546, "loss": 3.2899, "step": 24390 }, { "epoch": 1.2, "grad_norm": 0.580208420753479, "learning_rate": 0.00039409504248409595, "loss": 3.1046, "step": 24391 }, { "epoch": 1.2, "grad_norm": 0.5867831707000732, "learning_rate": 0.0003940804230828089, "loss": 3.0618, "step": 24392 }, { "epoch": 1.2, "grad_norm": 0.5907902717590332, "learning_rate": 0.00039406580343373217, "loss": 3.3336, "step": 24393 }, { "epoch": 1.2, "grad_norm": 0.5453541278839111, "learning_rate": 0.0003940511835369041, "loss": 2.8692, "step": 24394 }, { "epoch": 1.2, "grad_norm": 0.5520537495613098, "learning_rate": 0.0003940365633923636, "loss": 3.0061, "step": 24395 }, { "epoch": 1.2, "grad_norm": 0.5857133865356445, "learning_rate": 0.0003940219430001487, "loss": 3.0555, "step": 24396 }, { "epoch": 1.2, "grad_norm": 0.5415214896202087, "learning_rate": 0.00039400732236029816, "loss": 3.0087, "step": 24397 }, { "epoch": 1.2, "grad_norm": 0.5039085745811462, "learning_rate": 0.0003939927014728504, "loss": 3.37, "step": 24398 }, { "epoch": 1.2, "grad_norm": 0.5987560153007507, "learning_rate": 0.000393978080337844, "loss": 3.3081, "step": 24399 }, { "epoch": 1.2, "grad_norm": 0.5762282013893127, "learning_rate": 0.0003939634589553173, "loss": 3.048, "step": 24400 }, { "epoch": 1.2, "grad_norm": 0.5720338225364685, "learning_rate": 0.000393948837325309, "loss": 3.0585, "step": 24401 }, { "epoch": 1.2, "grad_norm": 0.5712810754776001, "learning_rate": 0.00039393421544785766, "loss": 3.0319, "step": 24402 }, { "epoch": 1.2, "grad_norm": 0.5783328413963318, "learning_rate": 0.0003939195933230015, "loss": 3.0169, "step": 24403 }, { "epoch": 1.2, "grad_norm": 0.5550515055656433, "learning_rate": 0.00039390497095077924, "loss": 3.2278, "step": 24404 }, { "epoch": 1.2, "grad_norm": 0.5710436105728149, "learning_rate": 0.0003938903483312294, "loss": 3.0629, "step": 24405 }, { "epoch": 1.2, "grad_norm": 1.2239809036254883, "learning_rate": 0.00039387572546439046, "loss": 3.3184, "step": 24406 }, { "epoch": 1.2, "grad_norm": 0.5772742033004761, "learning_rate": 0.00039386110235030094, "loss": 3.2588, "step": 24407 }, { "epoch": 1.2, "grad_norm": 0.576033353805542, "learning_rate": 0.00039384647898899934, "loss": 2.8866, "step": 24408 }, { "epoch": 1.2, "grad_norm": 0.5748345851898193, "learning_rate": 0.0003938318553805241, "loss": 3.0798, "step": 24409 }, { "epoch": 1.2, "grad_norm": 0.5759305953979492, "learning_rate": 0.00039381723152491385, "loss": 3.0736, "step": 24410 }, { "epoch": 1.2, "grad_norm": 0.5629940032958984, "learning_rate": 0.0003938026074222071, "loss": 3.3334, "step": 24411 }, { "epoch": 1.2, "grad_norm": 0.5594444274902344, "learning_rate": 0.00039378798307244234, "loss": 2.9158, "step": 24412 }, { "epoch": 1.2, "grad_norm": 0.5800905823707581, "learning_rate": 0.000393773358475658, "loss": 2.9424, "step": 24413 }, { "epoch": 1.2, "grad_norm": 0.5819612741470337, "learning_rate": 0.0003937587336318927, "loss": 3.0931, "step": 24414 }, { "epoch": 1.2, "grad_norm": 0.5524073243141174, "learning_rate": 0.00039374410854118495, "loss": 3.1325, "step": 24415 }, { "epoch": 1.2, "grad_norm": 0.5730950832366943, "learning_rate": 0.0003937294832035733, "loss": 2.9164, "step": 24416 }, { "epoch": 1.2, "grad_norm": 0.5506508350372314, "learning_rate": 0.00039371485761909627, "loss": 3.0187, "step": 24417 }, { "epoch": 1.2, "grad_norm": 0.5478151440620422, "learning_rate": 0.00039370023178779233, "loss": 2.788, "step": 24418 }, { "epoch": 1.2, "grad_norm": 0.5452743768692017, "learning_rate": 0.00039368560570969985, "loss": 2.9242, "step": 24419 }, { "epoch": 1.2, "grad_norm": 0.49868834018707275, "learning_rate": 0.0003936709793848577, "loss": 2.9301, "step": 24420 }, { "epoch": 1.2, "grad_norm": 0.597121000289917, "learning_rate": 0.00039365635281330417, "loss": 2.7868, "step": 24421 }, { "epoch": 1.2, "grad_norm": 0.5634990334510803, "learning_rate": 0.00039364172599507777, "loss": 2.8906, "step": 24422 }, { "epoch": 1.2, "grad_norm": 0.5244052410125732, "learning_rate": 0.0003936270989302171, "loss": 3.1885, "step": 24423 }, { "epoch": 1.2, "grad_norm": 0.5397709012031555, "learning_rate": 0.0003936124716187607, "loss": 3.1282, "step": 24424 }, { "epoch": 1.2, "grad_norm": 0.5438342690467834, "learning_rate": 0.00039359784406074706, "loss": 3.1376, "step": 24425 }, { "epoch": 1.2, "grad_norm": 0.5476157665252686, "learning_rate": 0.00039358321625621473, "loss": 3.1603, "step": 24426 }, { "epoch": 1.2, "grad_norm": 0.5249218940734863, "learning_rate": 0.00039356858820520217, "loss": 3.1193, "step": 24427 }, { "epoch": 1.2, "grad_norm": 0.548941969871521, "learning_rate": 0.0003935539599077481, "loss": 3.2686, "step": 24428 }, { "epoch": 1.2, "grad_norm": 0.5758370757102966, "learning_rate": 0.0003935393313638908, "loss": 3.1205, "step": 24429 }, { "epoch": 1.2, "grad_norm": 0.5937607884407043, "learning_rate": 0.0003935247025736688, "loss": 2.9228, "step": 24430 }, { "epoch": 1.2, "grad_norm": 0.5786687731742859, "learning_rate": 0.00039351007353712086, "loss": 2.9846, "step": 24431 }, { "epoch": 1.2, "grad_norm": 0.5394309163093567, "learning_rate": 0.0003934954442542854, "loss": 3.2228, "step": 24432 }, { "epoch": 1.2, "grad_norm": 0.5585785508155823, "learning_rate": 0.00039348081472520086, "loss": 3.1961, "step": 24433 }, { "epoch": 1.2, "grad_norm": 0.5906318426132202, "learning_rate": 0.00039346618494990584, "loss": 3.2208, "step": 24434 }, { "epoch": 1.2, "grad_norm": 0.5899059176445007, "learning_rate": 0.00039345155492843893, "loss": 3.0491, "step": 24435 }, { "epoch": 1.2, "grad_norm": 0.5428707003593445, "learning_rate": 0.00039343692466083856, "loss": 2.9778, "step": 24436 }, { "epoch": 1.2, "grad_norm": 0.5404731631278992, "learning_rate": 0.0003934222941471433, "loss": 2.9239, "step": 24437 }, { "epoch": 1.2, "grad_norm": 0.5213099718093872, "learning_rate": 0.00039340766338739175, "loss": 3.2136, "step": 24438 }, { "epoch": 1.2, "grad_norm": 0.5822370052337646, "learning_rate": 0.00039339303238162247, "loss": 3.0366, "step": 24439 }, { "epoch": 1.2, "grad_norm": 0.581151008605957, "learning_rate": 0.00039337840112987373, "loss": 3.1906, "step": 24440 }, { "epoch": 1.2, "grad_norm": 0.5343998670578003, "learning_rate": 0.00039336376963218435, "loss": 3.202, "step": 24441 }, { "epoch": 1.2, "grad_norm": 0.5333302617073059, "learning_rate": 0.00039334913788859283, "loss": 3.2351, "step": 24442 }, { "epoch": 1.2, "grad_norm": 0.5405131578445435, "learning_rate": 0.00039333450589913754, "loss": 3.0949, "step": 24443 }, { "epoch": 1.2, "grad_norm": 0.5513492822647095, "learning_rate": 0.0003933198736638572, "loss": 3.023, "step": 24444 }, { "epoch": 1.2, "grad_norm": 0.587897539138794, "learning_rate": 0.00039330524118279015, "loss": 3.1852, "step": 24445 }, { "epoch": 1.2, "grad_norm": 0.5661784410476685, "learning_rate": 0.00039329060845597524, "loss": 2.9803, "step": 24446 }, { "epoch": 1.2, "grad_norm": 0.5629352927207947, "learning_rate": 0.0003932759754834507, "loss": 3.0325, "step": 24447 }, { "epoch": 1.2, "grad_norm": 0.5622941851615906, "learning_rate": 0.00039326134226525515, "loss": 2.8962, "step": 24448 }, { "epoch": 1.2, "grad_norm": 0.5289006233215332, "learning_rate": 0.00039324670880142726, "loss": 3.1545, "step": 24449 }, { "epoch": 1.2, "grad_norm": 0.5608207583427429, "learning_rate": 0.00039323207509200545, "loss": 3.1841, "step": 24450 }, { "epoch": 1.2, "grad_norm": 0.5729897618293762, "learning_rate": 0.0003932174411370283, "loss": 3.1255, "step": 24451 }, { "epoch": 1.2, "grad_norm": 0.5416507720947266, "learning_rate": 0.00039320280693653435, "loss": 3.0346, "step": 24452 }, { "epoch": 1.2, "grad_norm": 0.5522789359092712, "learning_rate": 0.00039318817249056224, "loss": 3.1717, "step": 24453 }, { "epoch": 1.2, "grad_norm": 0.6044777631759644, "learning_rate": 0.00039317353779915034, "loss": 3.0349, "step": 24454 }, { "epoch": 1.2, "grad_norm": 0.5705912709236145, "learning_rate": 0.0003931589028623371, "loss": 3.0584, "step": 24455 }, { "epoch": 1.2, "grad_norm": 0.5330730676651001, "learning_rate": 0.0003931442676801614, "loss": 3.2936, "step": 24456 }, { "epoch": 1.2, "grad_norm": 0.526887059211731, "learning_rate": 0.0003931296322526617, "loss": 2.9784, "step": 24457 }, { "epoch": 1.2, "grad_norm": 0.5354217886924744, "learning_rate": 0.0003931149965798764, "loss": 3.1643, "step": 24458 }, { "epoch": 1.2, "grad_norm": 0.5469911098480225, "learning_rate": 0.0003931003606618441, "loss": 3.0493, "step": 24459 }, { "epoch": 1.2, "grad_norm": 0.5541418790817261, "learning_rate": 0.00039308572449860336, "loss": 2.8854, "step": 24460 }, { "epoch": 1.2, "grad_norm": 0.568221390247345, "learning_rate": 0.0003930710880901928, "loss": 2.958, "step": 24461 }, { "epoch": 1.2, "grad_norm": 0.5707933902740479, "learning_rate": 0.0003930564514366509, "loss": 3.1631, "step": 24462 }, { "epoch": 1.2, "grad_norm": 0.5753080248832703, "learning_rate": 0.00039304181453801614, "loss": 3.1559, "step": 24463 }, { "epoch": 1.2, "grad_norm": 0.5365299582481384, "learning_rate": 0.0003930271773943272, "loss": 3.1203, "step": 24464 }, { "epoch": 1.2, "grad_norm": 0.5476408004760742, "learning_rate": 0.00039301254000562256, "loss": 3.0281, "step": 24465 }, { "epoch": 1.2, "grad_norm": 0.5769860744476318, "learning_rate": 0.0003929979023719408, "loss": 3.121, "step": 24466 }, { "epoch": 1.2, "grad_norm": 0.6829390525817871, "learning_rate": 0.00039298326449332044, "loss": 2.9079, "step": 24467 }, { "epoch": 1.2, "grad_norm": 0.5319774746894836, "learning_rate": 0.00039296862636980015, "loss": 2.9785, "step": 24468 }, { "epoch": 1.2, "grad_norm": 0.5541231632232666, "learning_rate": 0.0003929539880014183, "loss": 3.0751, "step": 24469 }, { "epoch": 1.2, "grad_norm": 0.5718669295310974, "learning_rate": 0.00039293934938821354, "loss": 2.9471, "step": 24470 }, { "epoch": 1.2, "grad_norm": 0.5447272658348083, "learning_rate": 0.0003929247105302244, "loss": 2.8957, "step": 24471 }, { "epoch": 1.2, "grad_norm": 0.5534474849700928, "learning_rate": 0.0003929100714274895, "loss": 3.0906, "step": 24472 }, { "epoch": 1.2, "grad_norm": 0.9372397661209106, "learning_rate": 0.00039289543208004734, "loss": 3.2436, "step": 24473 }, { "epoch": 1.2, "grad_norm": 0.5349345207214355, "learning_rate": 0.00039288079248793646, "loss": 3.0896, "step": 24474 }, { "epoch": 1.2, "grad_norm": 0.5702376365661621, "learning_rate": 0.0003928661526511955, "loss": 2.9394, "step": 24475 }, { "epoch": 1.2, "grad_norm": 0.5644256472587585, "learning_rate": 0.00039285151256986296, "loss": 3.2093, "step": 24476 }, { "epoch": 1.2, "grad_norm": 0.5325481295585632, "learning_rate": 0.00039283687224397744, "loss": 2.844, "step": 24477 }, { "epoch": 1.2, "grad_norm": 0.5378096103668213, "learning_rate": 0.0003928222316735773, "loss": 3.0989, "step": 24478 }, { "epoch": 1.2, "grad_norm": 0.5697956085205078, "learning_rate": 0.0003928075908587015, "loss": 3.1788, "step": 24479 }, { "epoch": 1.2, "grad_norm": 0.5447378754615784, "learning_rate": 0.0003927929497993882, "loss": 3.0596, "step": 24480 }, { "epoch": 1.2, "grad_norm": 0.6714164614677429, "learning_rate": 0.00039277830849567615, "loss": 2.8876, "step": 24481 }, { "epoch": 1.2, "grad_norm": 0.5856902003288269, "learning_rate": 0.000392763666947604, "loss": 2.8871, "step": 24482 }, { "epoch": 1.2, "grad_norm": 0.58744215965271, "learning_rate": 0.0003927490251552101, "loss": 3.1451, "step": 24483 }, { "epoch": 1.2, "grad_norm": 0.563101053237915, "learning_rate": 0.00039273438311853315, "loss": 3.0435, "step": 24484 }, { "epoch": 1.2, "grad_norm": 0.5481135845184326, "learning_rate": 0.00039271974083761167, "loss": 3.0859, "step": 24485 }, { "epoch": 1.2, "grad_norm": 0.5843150019645691, "learning_rate": 0.0003927050983124842, "loss": 3.0686, "step": 24486 }, { "epoch": 1.2, "grad_norm": 0.5351256728172302, "learning_rate": 0.0003926904555431894, "loss": 3.0086, "step": 24487 }, { "epoch": 1.2, "grad_norm": 0.6021181344985962, "learning_rate": 0.00039267581252976574, "loss": 2.9542, "step": 24488 }, { "epoch": 1.2, "grad_norm": 0.5292304754257202, "learning_rate": 0.00039266116927225186, "loss": 3.0605, "step": 24489 }, { "epoch": 1.2, "grad_norm": 0.5263028144836426, "learning_rate": 0.00039264652577068634, "loss": 2.9425, "step": 24490 }, { "epoch": 1.2, "grad_norm": 0.5306416749954224, "learning_rate": 0.0003926318820251076, "loss": 3.0063, "step": 24491 }, { "epoch": 1.2, "grad_norm": 0.5751937031745911, "learning_rate": 0.00039261723803555427, "loss": 3.029, "step": 24492 }, { "epoch": 1.2, "grad_norm": 0.5904483795166016, "learning_rate": 0.00039260259380206517, "loss": 3.3765, "step": 24493 }, { "epoch": 1.2, "grad_norm": 0.5914291739463806, "learning_rate": 0.00039258794932467845, "loss": 2.9694, "step": 24494 }, { "epoch": 1.2, "grad_norm": 0.5873374938964844, "learning_rate": 0.000392573304603433, "loss": 3.0701, "step": 24495 }, { "epoch": 1.2, "grad_norm": 0.6778043508529663, "learning_rate": 0.00039255865963836714, "loss": 3.0645, "step": 24496 }, { "epoch": 1.2, "grad_norm": 0.5494279265403748, "learning_rate": 0.0003925440144295198, "loss": 3.1383, "step": 24497 }, { "epoch": 1.2, "grad_norm": 0.5600928068161011, "learning_rate": 0.0003925293689769292, "loss": 3.2732, "step": 24498 }, { "epoch": 1.2, "grad_norm": 0.5523821711540222, "learning_rate": 0.0003925147232806341, "loss": 2.9552, "step": 24499 }, { "epoch": 1.2, "grad_norm": 0.5339621305465698, "learning_rate": 0.000392500077340673, "loss": 3.0629, "step": 24500 }, { "epoch": 1.2, "grad_norm": 0.5230332016944885, "learning_rate": 0.0003924854311570845, "loss": 3.3095, "step": 24501 }, { "epoch": 1.2, "grad_norm": 0.5600953698158264, "learning_rate": 0.00039247078472990716, "loss": 3.1273, "step": 24502 }, { "epoch": 1.2, "grad_norm": 0.5858113765716553, "learning_rate": 0.00039245613805917955, "loss": 2.9782, "step": 24503 }, { "epoch": 1.2, "grad_norm": 0.5632593631744385, "learning_rate": 0.00039244149114494036, "loss": 3.1121, "step": 24504 }, { "epoch": 1.2, "grad_norm": 0.5598071813583374, "learning_rate": 0.00039242684398722806, "loss": 3.083, "step": 24505 }, { "epoch": 1.2, "grad_norm": 0.598629891872406, "learning_rate": 0.00039241219658608115, "loss": 3.0239, "step": 24506 }, { "epoch": 1.2, "grad_norm": 0.6259543895721436, "learning_rate": 0.00039239754894153833, "loss": 3.2252, "step": 24507 }, { "epoch": 1.2, "grad_norm": 0.5557901263237, "learning_rate": 0.00039238290105363815, "loss": 3.1079, "step": 24508 }, { "epoch": 1.2, "grad_norm": 0.5629055500030518, "learning_rate": 0.00039236825292241926, "loss": 3.1228, "step": 24509 }, { "epoch": 1.2, "grad_norm": 0.578502357006073, "learning_rate": 0.00039235360454792015, "loss": 3.042, "step": 24510 }, { "epoch": 1.2, "grad_norm": 0.591256320476532, "learning_rate": 0.0003923389559301793, "loss": 3.0652, "step": 24511 }, { "epoch": 1.2, "grad_norm": 0.566704273223877, "learning_rate": 0.00039232430706923554, "loss": 3.0541, "step": 24512 }, { "epoch": 1.2, "grad_norm": 0.5784484148025513, "learning_rate": 0.00039230965796512723, "loss": 2.751, "step": 24513 }, { "epoch": 1.2, "grad_norm": 0.5382969975471497, "learning_rate": 0.0003922950086178931, "loss": 2.9646, "step": 24514 }, { "epoch": 1.2, "grad_norm": 0.5786687731742859, "learning_rate": 0.00039228035902757173, "loss": 3.1632, "step": 24515 }, { "epoch": 1.2, "grad_norm": 0.6045399308204651, "learning_rate": 0.0003922657091942016, "loss": 3.0142, "step": 24516 }, { "epoch": 1.2, "grad_norm": 0.5659675598144531, "learning_rate": 0.00039225105911782124, "loss": 3.183, "step": 24517 }, { "epoch": 1.2, "grad_norm": 0.5671629905700684, "learning_rate": 0.00039223640879846943, "loss": 3.1611, "step": 24518 }, { "epoch": 1.2, "grad_norm": 0.5662316083908081, "learning_rate": 0.0003922217582361848, "loss": 3.2395, "step": 24519 }, { "epoch": 1.2, "grad_norm": 0.5527083873748779, "learning_rate": 0.00039220710743100565, "loss": 3.312, "step": 24520 }, { "epoch": 1.2, "grad_norm": 0.553294837474823, "learning_rate": 0.0003921924563829708, "loss": 3.0211, "step": 24521 }, { "epoch": 1.2, "grad_norm": 0.658713161945343, "learning_rate": 0.0003921778050921187, "loss": 3.2227, "step": 24522 }, { "epoch": 1.2, "grad_norm": 0.6088311076164246, "learning_rate": 0.00039216315355848803, "loss": 3.2464, "step": 24523 }, { "epoch": 1.2, "grad_norm": 0.6018931269645691, "learning_rate": 0.0003921485017821173, "loss": 3.1967, "step": 24524 }, { "epoch": 1.2, "grad_norm": 0.5140994787216187, "learning_rate": 0.00039213384976304527, "loss": 3.0007, "step": 24525 }, { "epoch": 1.2, "grad_norm": 0.5533731579780579, "learning_rate": 0.0003921191975013103, "loss": 2.992, "step": 24526 }, { "epoch": 1.2, "grad_norm": 0.5808982849121094, "learning_rate": 0.00039210454499695116, "loss": 3.2767, "step": 24527 }, { "epoch": 1.2, "grad_norm": 0.5596871972084045, "learning_rate": 0.00039208989225000633, "loss": 3.1751, "step": 24528 }, { "epoch": 1.2, "grad_norm": 0.542251467704773, "learning_rate": 0.00039207523926051453, "loss": 3.0595, "step": 24529 }, { "epoch": 1.2, "grad_norm": 0.6116825342178345, "learning_rate": 0.0003920605860285142, "loss": 3.1456, "step": 24530 }, { "epoch": 1.2, "grad_norm": 0.5442792177200317, "learning_rate": 0.000392045932554044, "loss": 3.2253, "step": 24531 }, { "epoch": 1.2, "grad_norm": 0.5724294781684875, "learning_rate": 0.0003920312788371425, "loss": 3.3303, "step": 24532 }, { "epoch": 1.2, "grad_norm": 0.5326899290084839, "learning_rate": 0.00039201662487784844, "loss": 2.9463, "step": 24533 }, { "epoch": 1.2, "grad_norm": 0.5549022555351257, "learning_rate": 0.0003920019706762002, "loss": 3.0103, "step": 24534 }, { "epoch": 1.2, "grad_norm": 0.533281147480011, "learning_rate": 0.0003919873162322365, "loss": 3.0965, "step": 24535 }, { "epoch": 1.2, "grad_norm": 0.5557292699813843, "learning_rate": 0.0003919726615459959, "loss": 3.0286, "step": 24536 }, { "epoch": 1.2, "grad_norm": 0.558367133140564, "learning_rate": 0.000391958006617517, "loss": 3.2902, "step": 24537 }, { "epoch": 1.2, "grad_norm": 0.5451139211654663, "learning_rate": 0.00039194335144683844, "loss": 3.2517, "step": 24538 }, { "epoch": 1.2, "grad_norm": 0.5590549111366272, "learning_rate": 0.00039192869603399877, "loss": 2.977, "step": 24539 }, { "epoch": 1.2, "grad_norm": 0.5913295745849609, "learning_rate": 0.00039191404037903664, "loss": 2.984, "step": 24540 }, { "epoch": 1.2, "grad_norm": 0.5299440622329712, "learning_rate": 0.0003918993844819906, "loss": 3.129, "step": 24541 }, { "epoch": 1.2, "grad_norm": 0.5578473210334778, "learning_rate": 0.0003918847283428992, "loss": 2.9886, "step": 24542 }, { "epoch": 1.2, "grad_norm": 0.5618113875389099, "learning_rate": 0.0003918700719618012, "loss": 3.1438, "step": 24543 }, { "epoch": 1.2, "grad_norm": 0.5523414015769958, "learning_rate": 0.0003918554153387351, "loss": 3.1398, "step": 24544 }, { "epoch": 1.2, "grad_norm": 0.5433165431022644, "learning_rate": 0.0003918407584737395, "loss": 3.0228, "step": 24545 }, { "epoch": 1.2, "grad_norm": 0.5516573786735535, "learning_rate": 0.0003918261013668531, "loss": 3.2176, "step": 24546 }, { "epoch": 1.2, "grad_norm": 0.548460841178894, "learning_rate": 0.00039181144401811426, "loss": 3.0904, "step": 24547 }, { "epoch": 1.2, "grad_norm": 0.57688307762146, "learning_rate": 0.0003917967864275618, "loss": 2.8368, "step": 24548 }, { "epoch": 1.2, "grad_norm": 0.5394658446311951, "learning_rate": 0.0003917821285952343, "loss": 3.1838, "step": 24549 }, { "epoch": 1.2, "grad_norm": 0.5557076334953308, "learning_rate": 0.0003917674705211703, "loss": 3.0597, "step": 24550 }, { "epoch": 1.2, "grad_norm": 0.5342375636100769, "learning_rate": 0.00039175281220540844, "loss": 2.9482, "step": 24551 }, { "epoch": 1.2, "grad_norm": 0.5965703725814819, "learning_rate": 0.00039173815364798744, "loss": 2.9069, "step": 24552 }, { "epoch": 1.2, "grad_norm": 0.5493952631950378, "learning_rate": 0.0003917234948489457, "loss": 3.0136, "step": 24553 }, { "epoch": 1.2, "grad_norm": 0.563545286655426, "learning_rate": 0.0003917088358083219, "loss": 2.9065, "step": 24554 }, { "epoch": 1.2, "grad_norm": 0.575928807258606, "learning_rate": 0.0003916941765261548, "loss": 3.1358, "step": 24555 }, { "epoch": 1.2, "grad_norm": 0.5435425043106079, "learning_rate": 0.0003916795170024828, "loss": 2.9169, "step": 24556 }, { "epoch": 1.2, "grad_norm": 0.5062286853790283, "learning_rate": 0.0003916648572373446, "loss": 2.9257, "step": 24557 }, { "epoch": 1.2, "grad_norm": 0.5492591857910156, "learning_rate": 0.0003916501972307787, "loss": 3.0671, "step": 24558 }, { "epoch": 1.2, "grad_norm": 0.5551416873931885, "learning_rate": 0.000391635536982824, "loss": 3.117, "step": 24559 }, { "epoch": 1.2, "grad_norm": 0.5714171528816223, "learning_rate": 0.0003916208764935189, "loss": 3.0568, "step": 24560 }, { "epoch": 1.2, "grad_norm": 0.5315268635749817, "learning_rate": 0.000391606215762902, "loss": 3.2503, "step": 24561 }, { "epoch": 1.2, "grad_norm": 0.5530765056610107, "learning_rate": 0.00039159155479101196, "loss": 3.0153, "step": 24562 }, { "epoch": 1.2, "grad_norm": 0.5797913074493408, "learning_rate": 0.0003915768935778874, "loss": 3.2337, "step": 24563 }, { "epoch": 1.2, "grad_norm": 0.5785074830055237, "learning_rate": 0.0003915622321235669, "loss": 3.169, "step": 24564 }, { "epoch": 1.2, "grad_norm": 0.5695971846580505, "learning_rate": 0.0003915475704280891, "loss": 3.0674, "step": 24565 }, { "epoch": 1.2, "grad_norm": 0.5791469216346741, "learning_rate": 0.00039153290849149275, "loss": 3.1011, "step": 24566 }, { "epoch": 1.2, "grad_norm": 0.5918307304382324, "learning_rate": 0.0003915182463138161, "loss": 3.1235, "step": 24567 }, { "epoch": 1.2, "grad_norm": 0.5434072613716125, "learning_rate": 0.00039150358389509815, "loss": 2.9738, "step": 24568 }, { "epoch": 1.2, "grad_norm": 0.6053929328918457, "learning_rate": 0.0003914889212353773, "loss": 3.0853, "step": 24569 }, { "epoch": 1.2, "grad_norm": 0.5387932062149048, "learning_rate": 0.0003914742583346924, "loss": 3.0236, "step": 24570 }, { "epoch": 1.2, "grad_norm": 0.572083592414856, "learning_rate": 0.00039145959519308176, "loss": 2.891, "step": 24571 }, { "epoch": 1.2, "grad_norm": 0.5593575835227966, "learning_rate": 0.0003914449318105842, "loss": 3.3136, "step": 24572 }, { "epoch": 1.2, "grad_norm": 0.5914036631584167, "learning_rate": 0.0003914302681872382, "loss": 3.0979, "step": 24573 }, { "epoch": 1.2, "grad_norm": 0.5585025548934937, "learning_rate": 0.0003914156043230826, "loss": 3.1267, "step": 24574 }, { "epoch": 1.2, "grad_norm": 0.5593438148498535, "learning_rate": 0.0003914009402181558, "loss": 2.9214, "step": 24575 }, { "epoch": 1.2, "grad_norm": 0.5880452394485474, "learning_rate": 0.0003913862758724966, "loss": 3.0177, "step": 24576 }, { "epoch": 1.2, "grad_norm": 0.5511958003044128, "learning_rate": 0.00039137161128614345, "loss": 3.0525, "step": 24577 }, { "epoch": 1.2, "grad_norm": 0.5612953305244446, "learning_rate": 0.00039135694645913506, "loss": 3.189, "step": 24578 }, { "epoch": 1.2, "grad_norm": 0.5851930379867554, "learning_rate": 0.0003913422813915101, "loss": 2.9018, "step": 24579 }, { "epoch": 1.2, "grad_norm": 0.5985074043273926, "learning_rate": 0.00039132761608330716, "loss": 3.0104, "step": 24580 }, { "epoch": 1.2, "grad_norm": 0.545453667640686, "learning_rate": 0.0003913129505345649, "loss": 3.1573, "step": 24581 }, { "epoch": 1.2, "grad_norm": 0.5423109531402588, "learning_rate": 0.0003912982847453218, "loss": 2.9684, "step": 24582 }, { "epoch": 1.2, "grad_norm": 0.5626406073570251, "learning_rate": 0.0003912836187156166, "loss": 3.0915, "step": 24583 }, { "epoch": 1.2, "grad_norm": 0.5251243710517883, "learning_rate": 0.000391268952445488, "loss": 2.8087, "step": 24584 }, { "epoch": 1.2, "grad_norm": 0.5345422625541687, "learning_rate": 0.0003912542859349745, "loss": 3.0146, "step": 24585 }, { "epoch": 1.2, "grad_norm": 0.563607394695282, "learning_rate": 0.0003912396191841148, "loss": 3.1094, "step": 24586 }, { "epoch": 1.2, "grad_norm": 0.5774620175361633, "learning_rate": 0.00039122495219294734, "loss": 3.1227, "step": 24587 }, { "epoch": 1.2, "grad_norm": 0.6058965921401978, "learning_rate": 0.0003912102849615111, "loss": 3.0073, "step": 24588 }, { "epoch": 1.21, "grad_norm": 0.5583047866821289, "learning_rate": 0.00039119561748984446, "loss": 3.2368, "step": 24589 }, { "epoch": 1.21, "grad_norm": 0.5507198572158813, "learning_rate": 0.00039118094977798605, "loss": 3.062, "step": 24590 }, { "epoch": 1.21, "grad_norm": 0.5293483734130859, "learning_rate": 0.00039116628182597464, "loss": 3.1221, "step": 24591 }, { "epoch": 1.21, "grad_norm": 0.5906999111175537, "learning_rate": 0.0003911516136338489, "loss": 3.1421, "step": 24592 }, { "epoch": 1.21, "grad_norm": 0.5505334734916687, "learning_rate": 0.0003911369452016472, "loss": 3.2336, "step": 24593 }, { "epoch": 1.21, "grad_norm": 0.5691468119621277, "learning_rate": 0.00039112227652940825, "loss": 2.9671, "step": 24594 }, { "epoch": 1.21, "grad_norm": 0.6026682257652283, "learning_rate": 0.00039110760761717095, "loss": 3.0389, "step": 24595 }, { "epoch": 1.21, "grad_norm": 0.5420871376991272, "learning_rate": 0.00039109293846497365, "loss": 3.0556, "step": 24596 }, { "epoch": 1.21, "grad_norm": 0.5781927704811096, "learning_rate": 0.0003910782690728551, "loss": 3.016, "step": 24597 }, { "epoch": 1.21, "grad_norm": 0.5528817772865295, "learning_rate": 0.0003910635994408539, "loss": 3.1946, "step": 24598 }, { "epoch": 1.21, "grad_norm": 0.5814532041549683, "learning_rate": 0.0003910489295690087, "loss": 3.0579, "step": 24599 }, { "epoch": 1.21, "grad_norm": 0.6129553914070129, "learning_rate": 0.00039103425945735817, "loss": 3.1454, "step": 24600 }, { "epoch": 1.21, "grad_norm": 0.5644974708557129, "learning_rate": 0.0003910195891059409, "loss": 2.8995, "step": 24601 }, { "epoch": 1.21, "grad_norm": 0.5495239496231079, "learning_rate": 0.0003910049185147955, "loss": 3.2219, "step": 24602 }, { "epoch": 1.21, "grad_norm": 0.5390620231628418, "learning_rate": 0.0003909902476839608, "loss": 3.3411, "step": 24603 }, { "epoch": 1.21, "grad_norm": 0.5480315685272217, "learning_rate": 0.0003909755766134751, "loss": 3.2259, "step": 24604 }, { "epoch": 1.21, "grad_norm": 0.5507662892341614, "learning_rate": 0.0003909609053033774, "loss": 3.0374, "step": 24605 }, { "epoch": 1.21, "grad_norm": 0.5539723038673401, "learning_rate": 0.00039094623375370626, "loss": 3.1604, "step": 24606 }, { "epoch": 1.21, "grad_norm": 0.5907084345817566, "learning_rate": 0.00039093156196450007, "loss": 3.0563, "step": 24607 }, { "epoch": 1.21, "grad_norm": 0.569409191608429, "learning_rate": 0.0003909168899357977, "loss": 3.0649, "step": 24608 }, { "epoch": 1.21, "grad_norm": 0.529839277267456, "learning_rate": 0.00039090221766763765, "loss": 3.0733, "step": 24609 }, { "epoch": 1.21, "grad_norm": 0.5536385774612427, "learning_rate": 0.00039088754516005883, "loss": 3.1285, "step": 24610 }, { "epoch": 1.21, "grad_norm": 0.5590646266937256, "learning_rate": 0.0003908728724130996, "loss": 2.8481, "step": 24611 }, { "epoch": 1.21, "grad_norm": 0.5736700296401978, "learning_rate": 0.00039085819942679876, "loss": 3.0309, "step": 24612 }, { "epoch": 1.21, "grad_norm": 0.5361476540565491, "learning_rate": 0.00039084352620119485, "loss": 3.094, "step": 24613 }, { "epoch": 1.21, "grad_norm": 0.5522602796554565, "learning_rate": 0.0003908288527363266, "loss": 3.1455, "step": 24614 }, { "epoch": 1.21, "grad_norm": 0.5784001350402832, "learning_rate": 0.00039081417903223263, "loss": 3.0122, "step": 24615 }, { "epoch": 1.21, "grad_norm": 0.5663528442382812, "learning_rate": 0.00039079950508895156, "loss": 2.9354, "step": 24616 }, { "epoch": 1.21, "grad_norm": 0.5776238441467285, "learning_rate": 0.00039078483090652217, "loss": 3.0843, "step": 24617 }, { "epoch": 1.21, "grad_norm": 0.5722895860671997, "learning_rate": 0.00039077015648498293, "loss": 3.0982, "step": 24618 }, { "epoch": 1.21, "grad_norm": 0.5736603140830994, "learning_rate": 0.0003907554818243725, "loss": 2.9995, "step": 24619 }, { "epoch": 1.21, "grad_norm": 0.5491389632225037, "learning_rate": 0.00039074080692472966, "loss": 2.9911, "step": 24620 }, { "epoch": 1.21, "grad_norm": 0.5545860528945923, "learning_rate": 0.0003907261317860931, "loss": 3.074, "step": 24621 }, { "epoch": 1.21, "grad_norm": 0.6800806522369385, "learning_rate": 0.0003907114564085013, "loss": 3.2136, "step": 24622 }, { "epoch": 1.21, "grad_norm": 0.5676558017730713, "learning_rate": 0.0003906967807919929, "loss": 3.1701, "step": 24623 }, { "epoch": 1.21, "grad_norm": 0.540444552898407, "learning_rate": 0.0003906821049366067, "loss": 3.037, "step": 24624 }, { "epoch": 1.21, "grad_norm": 0.583018958568573, "learning_rate": 0.00039066742884238134, "loss": 3.1316, "step": 24625 }, { "epoch": 1.21, "grad_norm": 0.5733615159988403, "learning_rate": 0.00039065275250935535, "loss": 2.998, "step": 24626 }, { "epoch": 1.21, "grad_norm": 0.5599751472473145, "learning_rate": 0.00039063807593756744, "loss": 3.0973, "step": 24627 }, { "epoch": 1.21, "grad_norm": 0.5724146962165833, "learning_rate": 0.0003906233991270563, "loss": 2.935, "step": 24628 }, { "epoch": 1.21, "grad_norm": 0.5427190065383911, "learning_rate": 0.0003906087220778607, "loss": 3.0934, "step": 24629 }, { "epoch": 1.21, "grad_norm": 0.5561226010322571, "learning_rate": 0.000390594044790019, "loss": 3.2373, "step": 24630 }, { "epoch": 1.21, "grad_norm": 0.5783647894859314, "learning_rate": 0.0003905793672635701, "loss": 3.0405, "step": 24631 }, { "epoch": 1.21, "grad_norm": 0.5754805207252502, "learning_rate": 0.00039056468949855253, "loss": 3.2403, "step": 24632 }, { "epoch": 1.21, "grad_norm": 0.558912992477417, "learning_rate": 0.000390550011495005, "loss": 3.0081, "step": 24633 }, { "epoch": 1.21, "grad_norm": 0.5601596236228943, "learning_rate": 0.0003905353332529662, "loss": 3.0755, "step": 24634 }, { "epoch": 1.21, "grad_norm": 0.5683655142784119, "learning_rate": 0.0003905206547724748, "loss": 3.0046, "step": 24635 }, { "epoch": 1.21, "grad_norm": 0.556339681148529, "learning_rate": 0.00039050597605356927, "loss": 3.1934, "step": 24636 }, { "epoch": 1.21, "grad_norm": 0.5865704417228699, "learning_rate": 0.0003904912970962885, "loss": 2.9044, "step": 24637 }, { "epoch": 1.21, "grad_norm": 0.5483426451683044, "learning_rate": 0.00039047661790067107, "loss": 2.9773, "step": 24638 }, { "epoch": 1.21, "grad_norm": 0.5963693857192993, "learning_rate": 0.00039046193846675563, "loss": 3.1899, "step": 24639 }, { "epoch": 1.21, "grad_norm": 0.5514311790466309, "learning_rate": 0.00039044725879458086, "loss": 3.0625, "step": 24640 }, { "epoch": 1.21, "grad_norm": 0.5385047793388367, "learning_rate": 0.0003904325788841854, "loss": 3.1098, "step": 24641 }, { "epoch": 1.21, "grad_norm": 0.5517145991325378, "learning_rate": 0.00039041789873560796, "loss": 2.91, "step": 24642 }, { "epoch": 1.21, "grad_norm": 0.5656097531318665, "learning_rate": 0.00039040321834888725, "loss": 3.1927, "step": 24643 }, { "epoch": 1.21, "grad_norm": 0.5733511447906494, "learning_rate": 0.00039038853772406176, "loss": 3.0975, "step": 24644 }, { "epoch": 1.21, "grad_norm": 0.5602204203605652, "learning_rate": 0.00039037385686117024, "loss": 3.1281, "step": 24645 }, { "epoch": 1.21, "grad_norm": 0.551032304763794, "learning_rate": 0.0003903591757602514, "loss": 3.3527, "step": 24646 }, { "epoch": 1.21, "grad_norm": 0.5796739459037781, "learning_rate": 0.0003903444944213439, "loss": 2.9157, "step": 24647 }, { "epoch": 1.21, "grad_norm": 0.5496836304664612, "learning_rate": 0.0003903298128444864, "loss": 3.0158, "step": 24648 }, { "epoch": 1.21, "grad_norm": 0.5698907375335693, "learning_rate": 0.0003903151310297176, "loss": 3.2225, "step": 24649 }, { "epoch": 1.21, "grad_norm": 0.5394782423973083, "learning_rate": 0.000390300448977076, "loss": 3.0648, "step": 24650 }, { "epoch": 1.21, "grad_norm": 0.5592007040977478, "learning_rate": 0.00039028576668660047, "loss": 3.0059, "step": 24651 }, { "epoch": 1.21, "grad_norm": 0.5389769077301025, "learning_rate": 0.0003902710841583296, "loss": 3.0591, "step": 24652 }, { "epoch": 1.21, "grad_norm": 0.5288125872612, "learning_rate": 0.00039025640139230206, "loss": 3.061, "step": 24653 }, { "epoch": 1.21, "grad_norm": 0.5310102105140686, "learning_rate": 0.0003902417183885566, "loss": 3.0722, "step": 24654 }, { "epoch": 1.21, "grad_norm": 0.5603654980659485, "learning_rate": 0.00039022703514713167, "loss": 2.9501, "step": 24655 }, { "epoch": 1.21, "grad_norm": 0.6040814518928528, "learning_rate": 0.00039021235166806617, "loss": 2.9324, "step": 24656 }, { "epoch": 1.21, "grad_norm": 0.5468594431877136, "learning_rate": 0.00039019766795139873, "loss": 3.1048, "step": 24657 }, { "epoch": 1.21, "grad_norm": 0.5571081042289734, "learning_rate": 0.00039018298399716797, "loss": 3.1354, "step": 24658 }, { "epoch": 1.21, "grad_norm": 0.5484535694122314, "learning_rate": 0.0003901682998054126, "loss": 3.2044, "step": 24659 }, { "epoch": 1.21, "grad_norm": 0.5328761339187622, "learning_rate": 0.0003901536153761712, "loss": 2.9363, "step": 24660 }, { "epoch": 1.21, "grad_norm": 0.5752051472663879, "learning_rate": 0.0003901389307094827, "loss": 3.0491, "step": 24661 }, { "epoch": 1.21, "grad_norm": 0.7036685943603516, "learning_rate": 0.0003901242458053854, "loss": 2.9426, "step": 24662 }, { "epoch": 1.21, "grad_norm": 0.5637723803520203, "learning_rate": 0.0003901095606639183, "loss": 2.9939, "step": 24663 }, { "epoch": 1.21, "grad_norm": 0.5546841025352478, "learning_rate": 0.00039009487528512, "loss": 2.8504, "step": 24664 }, { "epoch": 1.21, "grad_norm": 0.5602949857711792, "learning_rate": 0.00039008018966902913, "loss": 3.3448, "step": 24665 }, { "epoch": 1.21, "grad_norm": 0.5492205023765564, "learning_rate": 0.00039006550381568425, "loss": 3.0376, "step": 24666 }, { "epoch": 1.21, "grad_norm": 0.6059777140617371, "learning_rate": 0.0003900508177251242, "loss": 3.0945, "step": 24667 }, { "epoch": 1.21, "grad_norm": 0.5843932628631592, "learning_rate": 0.0003900361313973877, "loss": 3.0452, "step": 24668 }, { "epoch": 1.21, "grad_norm": 0.59035325050354, "learning_rate": 0.0003900214448325133, "loss": 3.145, "step": 24669 }, { "epoch": 1.21, "grad_norm": 0.5399641394615173, "learning_rate": 0.00039000675803053985, "loss": 3.2487, "step": 24670 }, { "epoch": 1.21, "grad_norm": 0.6116369366645813, "learning_rate": 0.0003899920709915057, "loss": 3.2259, "step": 24671 }, { "epoch": 1.21, "grad_norm": 0.5800071954727173, "learning_rate": 0.00038997738371545, "loss": 2.9216, "step": 24672 }, { "epoch": 1.21, "grad_norm": 0.608817458152771, "learning_rate": 0.00038996269620241103, "loss": 3.0501, "step": 24673 }, { "epoch": 1.21, "grad_norm": 0.5628402829170227, "learning_rate": 0.00038994800845242766, "loss": 3.0112, "step": 24674 }, { "epoch": 1.21, "grad_norm": 0.5566816329956055, "learning_rate": 0.00038993332046553857, "loss": 3.2272, "step": 24675 }, { "epoch": 1.21, "grad_norm": 0.5373614430427551, "learning_rate": 0.0003899186322417824, "loss": 2.8742, "step": 24676 }, { "epoch": 1.21, "grad_norm": 0.5301047563552856, "learning_rate": 0.0003899039437811979, "loss": 3.0577, "step": 24677 }, { "epoch": 1.21, "grad_norm": 0.5556225776672363, "learning_rate": 0.00038988925508382367, "loss": 3.0731, "step": 24678 }, { "epoch": 1.21, "grad_norm": 0.5261009931564331, "learning_rate": 0.00038987456614969853, "loss": 2.9431, "step": 24679 }, { "epoch": 1.21, "grad_norm": 0.6140367388725281, "learning_rate": 0.000389859876978861, "loss": 3.1981, "step": 24680 }, { "epoch": 1.21, "grad_norm": 0.5956748127937317, "learning_rate": 0.00038984518757134983, "loss": 3.1423, "step": 24681 }, { "epoch": 1.21, "grad_norm": 0.5351552963256836, "learning_rate": 0.00038983049792720373, "loss": 3.0685, "step": 24682 }, { "epoch": 1.21, "grad_norm": 0.5489239692687988, "learning_rate": 0.00038981580804646146, "loss": 3.2112, "step": 24683 }, { "epoch": 1.21, "grad_norm": 0.5213435292243958, "learning_rate": 0.00038980111792916156, "loss": 3.0596, "step": 24684 }, { "epoch": 1.21, "grad_norm": 0.5946751236915588, "learning_rate": 0.00038978642757534285, "loss": 3.0055, "step": 24685 }, { "epoch": 1.21, "grad_norm": 0.5385335087776184, "learning_rate": 0.0003897717369850439, "loss": 3.146, "step": 24686 }, { "epoch": 1.21, "grad_norm": 0.5397436022758484, "learning_rate": 0.00038975704615830354, "loss": 3.0557, "step": 24687 }, { "epoch": 1.21, "grad_norm": 0.5667961239814758, "learning_rate": 0.00038974235509516036, "loss": 3.1658, "step": 24688 }, { "epoch": 1.21, "grad_norm": 0.5461593866348267, "learning_rate": 0.0003897276637956531, "loss": 3.0923, "step": 24689 }, { "epoch": 1.21, "grad_norm": 0.5522091388702393, "learning_rate": 0.0003897129722598204, "loss": 3.0271, "step": 24690 }, { "epoch": 1.21, "grad_norm": 0.5801872611045837, "learning_rate": 0.00038969828048770105, "loss": 3.3063, "step": 24691 }, { "epoch": 1.21, "grad_norm": 0.5313283801078796, "learning_rate": 0.0003896835884793337, "loss": 2.9629, "step": 24692 }, { "epoch": 1.21, "grad_norm": 0.5575485229492188, "learning_rate": 0.000389668896234757, "loss": 3.1138, "step": 24693 }, { "epoch": 1.21, "grad_norm": 0.5732911825180054, "learning_rate": 0.0003896542037540098, "loss": 3.1065, "step": 24694 }, { "epoch": 1.21, "grad_norm": 0.5623870491981506, "learning_rate": 0.0003896395110371306, "loss": 3.2062, "step": 24695 }, { "epoch": 1.21, "grad_norm": 0.5679678320884705, "learning_rate": 0.00038962481808415807, "loss": 3.0646, "step": 24696 }, { "epoch": 1.21, "grad_norm": 0.578198254108429, "learning_rate": 0.0003896101248951312, "loss": 3.3072, "step": 24697 }, { "epoch": 1.21, "grad_norm": 0.5689905881881714, "learning_rate": 0.0003895954314700884, "loss": 3.1302, "step": 24698 }, { "epoch": 1.21, "grad_norm": 0.5264217853546143, "learning_rate": 0.00038958073780906845, "loss": 3.0872, "step": 24699 }, { "epoch": 1.21, "grad_norm": 0.5618980526924133, "learning_rate": 0.00038956604391211016, "loss": 3.2415, "step": 24700 }, { "epoch": 1.21, "grad_norm": 0.5655535459518433, "learning_rate": 0.0003895513497792521, "loss": 3.0696, "step": 24701 }, { "epoch": 1.21, "grad_norm": 0.5163285136222839, "learning_rate": 0.000389536655410533, "loss": 3.1711, "step": 24702 }, { "epoch": 1.21, "grad_norm": 0.7728413343429565, "learning_rate": 0.0003895219608059916, "loss": 3.008, "step": 24703 }, { "epoch": 1.21, "grad_norm": 0.5822486877441406, "learning_rate": 0.0003895072659656666, "loss": 3.1006, "step": 24704 }, { "epoch": 1.21, "grad_norm": 0.5410815477371216, "learning_rate": 0.00038949257088959674, "loss": 2.9528, "step": 24705 }, { "epoch": 1.21, "grad_norm": 0.5558587312698364, "learning_rate": 0.0003894778755778206, "loss": 3.0161, "step": 24706 }, { "epoch": 1.21, "grad_norm": 0.5925754904747009, "learning_rate": 0.0003894631800303769, "loss": 3.1687, "step": 24707 }, { "epoch": 1.21, "grad_norm": 0.5678203701972961, "learning_rate": 0.00038944848424730456, "loss": 3.1855, "step": 24708 }, { "epoch": 1.21, "grad_norm": 0.5488570928573608, "learning_rate": 0.00038943378822864203, "loss": 3.2203, "step": 24709 }, { "epoch": 1.21, "grad_norm": 0.5649476051330566, "learning_rate": 0.00038941909197442813, "loss": 3.1933, "step": 24710 }, { "epoch": 1.21, "grad_norm": 0.599998950958252, "learning_rate": 0.0003894043954847015, "loss": 3.1783, "step": 24711 }, { "epoch": 1.21, "grad_norm": 0.5275146961212158, "learning_rate": 0.00038938969875950097, "loss": 3.058, "step": 24712 }, { "epoch": 1.21, "grad_norm": 0.5559340119361877, "learning_rate": 0.0003893750017988651, "loss": 3.2087, "step": 24713 }, { "epoch": 1.21, "grad_norm": 0.5753278732299805, "learning_rate": 0.00038936030460283276, "loss": 3.2299, "step": 24714 }, { "epoch": 1.21, "grad_norm": 0.5743871927261353, "learning_rate": 0.0003893456071714425, "loss": 3.1301, "step": 24715 }, { "epoch": 1.21, "grad_norm": 0.5973168611526489, "learning_rate": 0.0003893309095047332, "loss": 2.9648, "step": 24716 }, { "epoch": 1.21, "grad_norm": 0.5473564863204956, "learning_rate": 0.00038931621160274337, "loss": 3.1843, "step": 24717 }, { "epoch": 1.21, "grad_norm": 0.5533649325370789, "learning_rate": 0.00038930151346551185, "loss": 2.9377, "step": 24718 }, { "epoch": 1.21, "grad_norm": 0.551533043384552, "learning_rate": 0.0003892868150930775, "loss": 3.2214, "step": 24719 }, { "epoch": 1.21, "grad_norm": 0.5771843791007996, "learning_rate": 0.0003892721164854787, "loss": 3.0803, "step": 24720 }, { "epoch": 1.21, "grad_norm": 0.5571311712265015, "learning_rate": 0.0003892574176427543, "loss": 2.997, "step": 24721 }, { "epoch": 1.21, "grad_norm": 0.5600820183753967, "learning_rate": 0.00038924271856494305, "loss": 3.082, "step": 24722 }, { "epoch": 1.21, "grad_norm": 0.5592990517616272, "learning_rate": 0.0003892280192520837, "loss": 3.0899, "step": 24723 }, { "epoch": 1.21, "grad_norm": 0.5576978921890259, "learning_rate": 0.00038921331970421494, "loss": 3.3335, "step": 24724 }, { "epoch": 1.21, "grad_norm": 0.5841004848480225, "learning_rate": 0.0003891986199213754, "loss": 3.0562, "step": 24725 }, { "epoch": 1.21, "grad_norm": 0.5277174115180969, "learning_rate": 0.0003891839199036039, "loss": 2.9582, "step": 24726 }, { "epoch": 1.21, "grad_norm": 0.5508502125740051, "learning_rate": 0.0003891692196509391, "loss": 3.1022, "step": 24727 }, { "epoch": 1.21, "grad_norm": 0.5699372887611389, "learning_rate": 0.00038915451916341973, "loss": 3.0745, "step": 24728 }, { "epoch": 1.21, "grad_norm": 0.5580192804336548, "learning_rate": 0.0003891398184410846, "loss": 2.8848, "step": 24729 }, { "epoch": 1.21, "grad_norm": 0.6069357395172119, "learning_rate": 0.00038912511748397235, "loss": 2.7991, "step": 24730 }, { "epoch": 1.21, "grad_norm": 0.5295454859733582, "learning_rate": 0.00038911041629212156, "loss": 2.9931, "step": 24731 }, { "epoch": 1.21, "grad_norm": 0.5806009769439697, "learning_rate": 0.0003890957148655711, "loss": 3.0751, "step": 24732 }, { "epoch": 1.21, "grad_norm": 0.5367207527160645, "learning_rate": 0.0003890810132043597, "loss": 2.8636, "step": 24733 }, { "epoch": 1.21, "grad_norm": 0.5449971556663513, "learning_rate": 0.00038906631130852614, "loss": 2.8036, "step": 24734 }, { "epoch": 1.21, "grad_norm": 0.5441697835922241, "learning_rate": 0.00038905160917810896, "loss": 3.0157, "step": 24735 }, { "epoch": 1.21, "grad_norm": 0.5573702454566956, "learning_rate": 0.000389036906813147, "loss": 3.2127, "step": 24736 }, { "epoch": 1.21, "grad_norm": 0.5617653131484985, "learning_rate": 0.000389022204213679, "loss": 3.0465, "step": 24737 }, { "epoch": 1.21, "grad_norm": 0.5416889786720276, "learning_rate": 0.0003890075013797437, "loss": 3.1156, "step": 24738 }, { "epoch": 1.21, "grad_norm": 0.5474164485931396, "learning_rate": 0.0003889927983113796, "loss": 3.0115, "step": 24739 }, { "epoch": 1.21, "grad_norm": 0.5465503931045532, "learning_rate": 0.0003889780950086257, "loss": 3.2573, "step": 24740 }, { "epoch": 1.21, "grad_norm": 0.5438640117645264, "learning_rate": 0.00038896339147152066, "loss": 3.0766, "step": 24741 }, { "epoch": 1.21, "grad_norm": 0.5305720567703247, "learning_rate": 0.00038894868770010315, "loss": 2.8584, "step": 24742 }, { "epoch": 1.21, "grad_norm": 0.5442739129066467, "learning_rate": 0.0003889339836944118, "loss": 3.0769, "step": 24743 }, { "epoch": 1.21, "grad_norm": 0.6273128390312195, "learning_rate": 0.0003889192794544856, "loss": 2.9629, "step": 24744 }, { "epoch": 1.21, "grad_norm": 0.5607988834381104, "learning_rate": 0.0003889045749803631, "loss": 3.2894, "step": 24745 }, { "epoch": 1.21, "grad_norm": 0.5226300954818726, "learning_rate": 0.00038888987027208303, "loss": 3.0109, "step": 24746 }, { "epoch": 1.21, "grad_norm": 0.5920791625976562, "learning_rate": 0.0003888751653296841, "loss": 3.0861, "step": 24747 }, { "epoch": 1.21, "grad_norm": 0.552906334400177, "learning_rate": 0.0003888604601532052, "loss": 3.0653, "step": 24748 }, { "epoch": 1.21, "grad_norm": 0.565069317817688, "learning_rate": 0.0003888457547426848, "loss": 3.0598, "step": 24749 }, { "epoch": 1.21, "grad_norm": 0.5915144681930542, "learning_rate": 0.0003888310490981619, "loss": 3.0202, "step": 24750 }, { "epoch": 1.21, "grad_norm": 0.5616911053657532, "learning_rate": 0.0003888163432196751, "loss": 2.9658, "step": 24751 }, { "epoch": 1.21, "grad_norm": 0.5911539793014526, "learning_rate": 0.0003888016371072631, "loss": 2.8024, "step": 24752 }, { "epoch": 1.21, "grad_norm": 0.5526560544967651, "learning_rate": 0.0003887869307609647, "loss": 3.0992, "step": 24753 }, { "epoch": 1.21, "grad_norm": 0.5728417634963989, "learning_rate": 0.00038877222418081856, "loss": 3.0105, "step": 24754 }, { "epoch": 1.21, "grad_norm": 0.5776088833808899, "learning_rate": 0.00038875751736686353, "loss": 3.1478, "step": 24755 }, { "epoch": 1.21, "grad_norm": 0.5825873613357544, "learning_rate": 0.00038874281031913837, "loss": 3.2437, "step": 24756 }, { "epoch": 1.21, "grad_norm": 0.5393372774124146, "learning_rate": 0.0003887281030376816, "loss": 3.0675, "step": 24757 }, { "epoch": 1.21, "grad_norm": 0.5632210969924927, "learning_rate": 0.00038871339552253205, "loss": 2.968, "step": 24758 }, { "epoch": 1.21, "grad_norm": 0.553977370262146, "learning_rate": 0.00038869868777372863, "loss": 3.0479, "step": 24759 }, { "epoch": 1.21, "grad_norm": 0.588313639163971, "learning_rate": 0.00038868397979130984, "loss": 3.0204, "step": 24760 }, { "epoch": 1.21, "grad_norm": 0.5613222718238831, "learning_rate": 0.0003886692715753145, "loss": 2.8621, "step": 24761 }, { "epoch": 1.21, "grad_norm": 0.5414893627166748, "learning_rate": 0.0003886545631257814, "loss": 2.8851, "step": 24762 }, { "epoch": 1.21, "grad_norm": 0.5642477869987488, "learning_rate": 0.0003886398544427493, "loss": 2.9733, "step": 24763 }, { "epoch": 1.21, "grad_norm": 0.5491669178009033, "learning_rate": 0.00038862514552625676, "loss": 3.1974, "step": 24764 }, { "epoch": 1.21, "grad_norm": 0.5662064552307129, "learning_rate": 0.0003886104363763427, "loss": 2.9075, "step": 24765 }, { "epoch": 1.21, "grad_norm": 0.5613273978233337, "learning_rate": 0.0003885957269930458, "loss": 3.1875, "step": 24766 }, { "epoch": 1.21, "grad_norm": 0.5412261486053467, "learning_rate": 0.0003885810173764049, "loss": 3.193, "step": 24767 }, { "epoch": 1.21, "grad_norm": 0.5381949543952942, "learning_rate": 0.0003885663075264585, "loss": 3.0033, "step": 24768 }, { "epoch": 1.21, "grad_norm": 0.5455308556556702, "learning_rate": 0.0003885515974432455, "loss": 3.0217, "step": 24769 }, { "epoch": 1.21, "grad_norm": 0.5200797915458679, "learning_rate": 0.0003885368871268047, "loss": 2.9717, "step": 24770 }, { "epoch": 1.21, "grad_norm": 0.5431541800498962, "learning_rate": 0.00038852217657717484, "loss": 2.9984, "step": 24771 }, { "epoch": 1.21, "grad_norm": 0.5343523621559143, "learning_rate": 0.0003885074657943945, "loss": 3.1071, "step": 24772 }, { "epoch": 1.21, "grad_norm": 0.5573854446411133, "learning_rate": 0.00038849275477850255, "loss": 3.0734, "step": 24773 }, { "epoch": 1.21, "grad_norm": 0.5465708374977112, "learning_rate": 0.0003884780435295377, "loss": 3.0892, "step": 24774 }, { "epoch": 1.21, "grad_norm": 0.5360824465751648, "learning_rate": 0.00038846333204753874, "loss": 2.935, "step": 24775 }, { "epoch": 1.21, "grad_norm": 0.5567554831504822, "learning_rate": 0.0003884486203325444, "loss": 3.0637, "step": 24776 }, { "epoch": 1.21, "grad_norm": 0.5666375756263733, "learning_rate": 0.0003884339083845934, "loss": 2.9533, "step": 24777 }, { "epoch": 1.21, "grad_norm": 0.5433690547943115, "learning_rate": 0.0003884191962037245, "loss": 3.0508, "step": 24778 }, { "epoch": 1.21, "grad_norm": 0.5279322266578674, "learning_rate": 0.0003884044837899764, "loss": 3.0479, "step": 24779 }, { "epoch": 1.21, "grad_norm": 0.524019718170166, "learning_rate": 0.00038838977114338797, "loss": 3.0864, "step": 24780 }, { "epoch": 1.21, "grad_norm": 0.5386550426483154, "learning_rate": 0.00038837505826399795, "loss": 3.109, "step": 24781 }, { "epoch": 1.21, "grad_norm": 0.5483960509300232, "learning_rate": 0.0003883603451518449, "loss": 2.943, "step": 24782 }, { "epoch": 1.21, "grad_norm": 0.5979861617088318, "learning_rate": 0.0003883456318069678, "loss": 3.1003, "step": 24783 }, { "epoch": 1.21, "grad_norm": 0.5268682837486267, "learning_rate": 0.0003883309182294052, "loss": 3.2098, "step": 24784 }, { "epoch": 1.21, "grad_norm": 0.5503677129745483, "learning_rate": 0.0003883162044191962, "loss": 3.1854, "step": 24785 }, { "epoch": 1.21, "grad_norm": 0.5744519829750061, "learning_rate": 0.00038830149037637906, "loss": 3.1521, "step": 24786 }, { "epoch": 1.21, "grad_norm": 0.5381206274032593, "learning_rate": 0.00038828677610099294, "loss": 3.1749, "step": 24787 }, { "epoch": 1.21, "grad_norm": 0.569991946220398, "learning_rate": 0.00038827206159307634, "loss": 3.2375, "step": 24788 }, { "epoch": 1.21, "grad_norm": 0.5447730422019958, "learning_rate": 0.0003882573468526682, "loss": 3.1548, "step": 24789 }, { "epoch": 1.21, "grad_norm": 0.5430207252502441, "learning_rate": 0.0003882426318798071, "loss": 3.0477, "step": 24790 }, { "epoch": 1.21, "grad_norm": 0.5270898342132568, "learning_rate": 0.00038822791667453196, "loss": 3.1821, "step": 24791 }, { "epoch": 1.21, "grad_norm": 0.5762162804603577, "learning_rate": 0.0003882132012368815, "loss": 2.8535, "step": 24792 }, { "epoch": 1.22, "grad_norm": 0.5520655512809753, "learning_rate": 0.00038819848556689444, "loss": 3.1805, "step": 24793 }, { "epoch": 1.22, "grad_norm": 0.557744026184082, "learning_rate": 0.00038818376966460945, "loss": 3.0716, "step": 24794 }, { "epoch": 1.22, "grad_norm": 0.5885498523712158, "learning_rate": 0.0003881690535300654, "loss": 3.1626, "step": 24795 }, { "epoch": 1.22, "grad_norm": 0.559755802154541, "learning_rate": 0.00038815433716330117, "loss": 3.0249, "step": 24796 }, { "epoch": 1.22, "grad_norm": 0.5420626401901245, "learning_rate": 0.0003881396205643553, "loss": 3.1339, "step": 24797 }, { "epoch": 1.22, "grad_norm": 0.5564322471618652, "learning_rate": 0.00038812490373326655, "loss": 2.9808, "step": 24798 }, { "epoch": 1.22, "grad_norm": 0.5733435750007629, "learning_rate": 0.00038811018667007387, "loss": 2.9819, "step": 24799 }, { "epoch": 1.22, "grad_norm": 0.5582762956619263, "learning_rate": 0.00038809546937481585, "loss": 2.9805, "step": 24800 }, { "epoch": 1.22, "grad_norm": 0.577571451663971, "learning_rate": 0.00038808075184753135, "loss": 3.1034, "step": 24801 }, { "epoch": 1.22, "grad_norm": 0.5833460688591003, "learning_rate": 0.000388066034088259, "loss": 3.1709, "step": 24802 }, { "epoch": 1.22, "grad_norm": 0.5358278155326843, "learning_rate": 0.0003880513160970378, "loss": 2.8111, "step": 24803 }, { "epoch": 1.22, "grad_norm": 0.5528193116188049, "learning_rate": 0.00038803659787390633, "loss": 3.4981, "step": 24804 }, { "epoch": 1.22, "grad_norm": 0.5586181879043579, "learning_rate": 0.0003880218794189034, "loss": 2.8715, "step": 24805 }, { "epoch": 1.22, "grad_norm": 0.5519729256629944, "learning_rate": 0.00038800716073206785, "loss": 3.1437, "step": 24806 }, { "epoch": 1.22, "grad_norm": 0.5765265226364136, "learning_rate": 0.0003879924418134383, "loss": 3.204, "step": 24807 }, { "epoch": 1.22, "grad_norm": 0.5296533107757568, "learning_rate": 0.00038797772266305365, "loss": 3.1702, "step": 24808 }, { "epoch": 1.22, "grad_norm": 0.5647825598716736, "learning_rate": 0.00038796300328095245, "loss": 3.1688, "step": 24809 }, { "epoch": 1.22, "grad_norm": 0.5361009836196899, "learning_rate": 0.00038794828366717384, "loss": 3.139, "step": 24810 }, { "epoch": 1.22, "grad_norm": 0.5457804799079895, "learning_rate": 0.0003879335638217563, "loss": 3.1071, "step": 24811 }, { "epoch": 1.22, "grad_norm": 0.5646703839302063, "learning_rate": 0.0003879188437447386, "loss": 3.1663, "step": 24812 }, { "epoch": 1.22, "grad_norm": 0.5620137453079224, "learning_rate": 0.0003879041234361597, "loss": 3.2054, "step": 24813 }, { "epoch": 1.22, "grad_norm": 0.5515795350074768, "learning_rate": 0.0003878894028960582, "loss": 3.1787, "step": 24814 }, { "epoch": 1.22, "grad_norm": 0.5928753614425659, "learning_rate": 0.00038787468212447287, "loss": 3.2087, "step": 24815 }, { "epoch": 1.22, "grad_norm": 0.5143396854400635, "learning_rate": 0.00038785996112144263, "loss": 3.0081, "step": 24816 }, { "epoch": 1.22, "grad_norm": 0.5518531799316406, "learning_rate": 0.0003878452398870061, "loss": 3.0035, "step": 24817 }, { "epoch": 1.22, "grad_norm": 0.5611719489097595, "learning_rate": 0.00038783051842120227, "loss": 3.1042, "step": 24818 }, { "epoch": 1.22, "grad_norm": 0.5301064252853394, "learning_rate": 0.0003878157967240696, "loss": 3.0097, "step": 24819 }, { "epoch": 1.22, "grad_norm": 0.575943648815155, "learning_rate": 0.00038780107479564695, "loss": 2.8759, "step": 24820 }, { "epoch": 1.22, "grad_norm": 0.5593007802963257, "learning_rate": 0.00038778635263597336, "loss": 3.2868, "step": 24821 }, { "epoch": 1.22, "grad_norm": 0.5593023300170898, "learning_rate": 0.00038777163024508726, "loss": 3.0437, "step": 24822 }, { "epoch": 1.22, "grad_norm": 0.5502452254295349, "learning_rate": 0.0003877569076230277, "loss": 3.179, "step": 24823 }, { "epoch": 1.22, "grad_norm": 0.5816333889961243, "learning_rate": 0.0003877421847698333, "loss": 2.8658, "step": 24824 }, { "epoch": 1.22, "grad_norm": 0.5567662119865417, "learning_rate": 0.0003877274616855428, "loss": 2.9821, "step": 24825 }, { "epoch": 1.22, "grad_norm": 0.5488287210464478, "learning_rate": 0.0003877127383701951, "loss": 3.1091, "step": 24826 }, { "epoch": 1.22, "grad_norm": 0.534261167049408, "learning_rate": 0.0003876980148238289, "loss": 2.8979, "step": 24827 }, { "epoch": 1.22, "grad_norm": 0.5407263040542603, "learning_rate": 0.000387683291046483, "loss": 3.2039, "step": 24828 }, { "epoch": 1.22, "grad_norm": 0.5979218482971191, "learning_rate": 0.0003876685670381964, "loss": 3.146, "step": 24829 }, { "epoch": 1.22, "grad_norm": 0.5862070322036743, "learning_rate": 0.0003876538427990073, "loss": 3.0431, "step": 24830 }, { "epoch": 1.22, "grad_norm": 0.5690160393714905, "learning_rate": 0.0003876391183289551, "loss": 3.1891, "step": 24831 }, { "epoch": 1.22, "grad_norm": 0.5591949224472046, "learning_rate": 0.00038762439362807825, "loss": 3.1933, "step": 24832 }, { "epoch": 1.22, "grad_norm": 0.5995550751686096, "learning_rate": 0.0003876096686964157, "loss": 3.0018, "step": 24833 }, { "epoch": 1.22, "grad_norm": 0.5738639235496521, "learning_rate": 0.000387594943534006, "loss": 3.0424, "step": 24834 }, { "epoch": 1.22, "grad_norm": 0.5617707371711731, "learning_rate": 0.00038758021814088806, "loss": 2.9083, "step": 24835 }, { "epoch": 1.22, "grad_norm": 0.5648507475852966, "learning_rate": 0.00038756549251710087, "loss": 2.8961, "step": 24836 }, { "epoch": 1.22, "grad_norm": 0.6076490879058838, "learning_rate": 0.00038755076666268284, "loss": 3.1156, "step": 24837 }, { "epoch": 1.22, "grad_norm": 0.5720618963241577, "learning_rate": 0.00038753604057767307, "loss": 2.8891, "step": 24838 }, { "epoch": 1.22, "grad_norm": 0.5429185628890991, "learning_rate": 0.00038752131426211014, "loss": 3.1625, "step": 24839 }, { "epoch": 1.22, "grad_norm": 0.5565420389175415, "learning_rate": 0.00038750658771603293, "loss": 3.0722, "step": 24840 }, { "epoch": 1.22, "grad_norm": 0.5758388042449951, "learning_rate": 0.00038749186093948025, "loss": 3.2304, "step": 24841 }, { "epoch": 1.22, "grad_norm": 0.5630769729614258, "learning_rate": 0.00038747713393249075, "loss": 3.1224, "step": 24842 }, { "epoch": 1.22, "grad_norm": 0.5871720910072327, "learning_rate": 0.0003874624066951034, "loss": 2.9868, "step": 24843 }, { "epoch": 1.22, "grad_norm": 0.5743727684020996, "learning_rate": 0.00038744767922735694, "loss": 3.1502, "step": 24844 }, { "epoch": 1.22, "grad_norm": 0.5455135107040405, "learning_rate": 0.00038743295152929, "loss": 3.2048, "step": 24845 }, { "epoch": 1.22, "grad_norm": 0.5863217115402222, "learning_rate": 0.0003874182236009415, "loss": 3.1062, "step": 24846 }, { "epoch": 1.22, "grad_norm": 0.5657460689544678, "learning_rate": 0.00038740349544235037, "loss": 3.026, "step": 24847 }, { "epoch": 1.22, "grad_norm": 0.5640048980712891, "learning_rate": 0.0003873887670535551, "loss": 2.9006, "step": 24848 }, { "epoch": 1.22, "grad_norm": 0.5563377737998962, "learning_rate": 0.00038737403843459476, "loss": 3.159, "step": 24849 }, { "epoch": 1.22, "grad_norm": 0.5812902450561523, "learning_rate": 0.0003873593095855079, "loss": 3.2999, "step": 24850 }, { "epoch": 1.22, "grad_norm": 0.5502186417579651, "learning_rate": 0.0003873445805063335, "loss": 3.1485, "step": 24851 }, { "epoch": 1.22, "grad_norm": 0.6045628786087036, "learning_rate": 0.00038732985119711025, "loss": 3.1333, "step": 24852 }, { "epoch": 1.22, "grad_norm": 0.5175624489784241, "learning_rate": 0.000387315121657877, "loss": 3.1418, "step": 24853 }, { "epoch": 1.22, "grad_norm": 0.5742688179016113, "learning_rate": 0.00038730039188867254, "loss": 3.2371, "step": 24854 }, { "epoch": 1.22, "grad_norm": 0.5657257437705994, "learning_rate": 0.0003872856618895356, "loss": 3.2323, "step": 24855 }, { "epoch": 1.22, "grad_norm": 0.5566824674606323, "learning_rate": 0.00038727093166050503, "loss": 2.8398, "step": 24856 }, { "epoch": 1.22, "grad_norm": 0.5399627089500427, "learning_rate": 0.0003872562012016197, "loss": 2.7679, "step": 24857 }, { "epoch": 1.22, "grad_norm": 0.5554389357566833, "learning_rate": 0.0003872414705129183, "loss": 2.9823, "step": 24858 }, { "epoch": 1.22, "grad_norm": 0.5519767999649048, "learning_rate": 0.00038722673959443963, "loss": 3.1616, "step": 24859 }, { "epoch": 1.22, "grad_norm": 0.5414425134658813, "learning_rate": 0.00038721200844622257, "loss": 3.1378, "step": 24860 }, { "epoch": 1.22, "grad_norm": 0.5529886484146118, "learning_rate": 0.0003871972770683058, "loss": 3.2881, "step": 24861 }, { "epoch": 1.22, "grad_norm": 0.5492392778396606, "learning_rate": 0.00038718254546072815, "loss": 3.008, "step": 24862 }, { "epoch": 1.22, "grad_norm": 0.5687651634216309, "learning_rate": 0.00038716781362352847, "loss": 3.2633, "step": 24863 }, { "epoch": 1.22, "grad_norm": 0.5575495958328247, "learning_rate": 0.0003871530815567456, "loss": 2.9893, "step": 24864 }, { "epoch": 1.22, "grad_norm": 0.6896212100982666, "learning_rate": 0.00038713834926041825, "loss": 3.0161, "step": 24865 }, { "epoch": 1.22, "grad_norm": 0.5157473087310791, "learning_rate": 0.00038712361673458524, "loss": 3.2208, "step": 24866 }, { "epoch": 1.22, "grad_norm": 0.5381258130073547, "learning_rate": 0.00038710888397928544, "loss": 2.9892, "step": 24867 }, { "epoch": 1.22, "grad_norm": 0.577904999256134, "learning_rate": 0.00038709415099455756, "loss": 3.0439, "step": 24868 }, { "epoch": 1.22, "grad_norm": 0.5449947118759155, "learning_rate": 0.00038707941778044053, "loss": 2.9669, "step": 24869 }, { "epoch": 1.22, "grad_norm": 0.557723879814148, "learning_rate": 0.000387064684336973, "loss": 3.1256, "step": 24870 }, { "epoch": 1.22, "grad_norm": 0.557279109954834, "learning_rate": 0.0003870499506641938, "loss": 3.046, "step": 24871 }, { "epoch": 1.22, "grad_norm": 0.5531541705131531, "learning_rate": 0.0003870352167621419, "loss": 2.9408, "step": 24872 }, { "epoch": 1.22, "grad_norm": 0.5873280167579651, "learning_rate": 0.0003870204826308559, "loss": 3.2446, "step": 24873 }, { "epoch": 1.22, "grad_norm": 0.5433573722839355, "learning_rate": 0.0003870057482703748, "loss": 3.1398, "step": 24874 }, { "epoch": 1.22, "grad_norm": 0.574711263179779, "learning_rate": 0.0003869910136807372, "loss": 3.261, "step": 24875 }, { "epoch": 1.22, "grad_norm": 0.5614281296730042, "learning_rate": 0.00038697627886198204, "loss": 3.2698, "step": 24876 }, { "epoch": 1.22, "grad_norm": 0.5823600888252258, "learning_rate": 0.0003869615438141481, "loss": 2.9666, "step": 24877 }, { "epoch": 1.22, "grad_norm": 0.5654398798942566, "learning_rate": 0.00038694680853727423, "loss": 3.2506, "step": 24878 }, { "epoch": 1.22, "grad_norm": 0.6435055136680603, "learning_rate": 0.0003869320730313991, "loss": 3.0697, "step": 24879 }, { "epoch": 1.22, "grad_norm": 0.5971924662590027, "learning_rate": 0.0003869173372965618, "loss": 3.1637, "step": 24880 }, { "epoch": 1.22, "grad_norm": 0.5890087485313416, "learning_rate": 0.0003869026013328008, "loss": 3.1743, "step": 24881 }, { "epoch": 1.22, "grad_norm": 0.5819125771522522, "learning_rate": 0.0003868878651401551, "loss": 3.0368, "step": 24882 }, { "epoch": 1.22, "grad_norm": 0.5607448816299438, "learning_rate": 0.00038687312871866353, "loss": 3.2866, "step": 24883 }, { "epoch": 1.22, "grad_norm": 0.5538012981414795, "learning_rate": 0.0003868583920683648, "loss": 2.9722, "step": 24884 }, { "epoch": 1.22, "grad_norm": 0.5519068241119385, "learning_rate": 0.00038684365518929783, "loss": 3.1197, "step": 24885 }, { "epoch": 1.22, "grad_norm": 0.5657366514205933, "learning_rate": 0.00038682891808150126, "loss": 3.3442, "step": 24886 }, { "epoch": 1.22, "grad_norm": 0.5565481781959534, "learning_rate": 0.00038681418074501425, "loss": 3.1246, "step": 24887 }, { "epoch": 1.22, "grad_norm": 0.5680059194564819, "learning_rate": 0.00038679944317987524, "loss": 2.9612, "step": 24888 }, { "epoch": 1.22, "grad_norm": 0.5660066604614258, "learning_rate": 0.00038678470538612325, "loss": 3.0581, "step": 24889 }, { "epoch": 1.22, "grad_norm": 0.575242280960083, "learning_rate": 0.00038676996736379705, "loss": 3.0885, "step": 24890 }, { "epoch": 1.22, "grad_norm": 0.5943603515625, "learning_rate": 0.00038675522911293546, "loss": 3.0541, "step": 24891 }, { "epoch": 1.22, "grad_norm": 0.5383366942405701, "learning_rate": 0.0003867404906335773, "loss": 2.9924, "step": 24892 }, { "epoch": 1.22, "grad_norm": 0.5978488326072693, "learning_rate": 0.0003867257519257614, "loss": 3.0037, "step": 24893 }, { "epoch": 1.22, "grad_norm": 0.5685155987739563, "learning_rate": 0.00038671101298952653, "loss": 3.1197, "step": 24894 }, { "epoch": 1.22, "grad_norm": 0.5263768434524536, "learning_rate": 0.00038669627382491154, "loss": 3.3945, "step": 24895 }, { "epoch": 1.22, "grad_norm": 0.5680824518203735, "learning_rate": 0.0003866815344319552, "loss": 3.2136, "step": 24896 }, { "epoch": 1.22, "grad_norm": 0.5891380906105042, "learning_rate": 0.0003866667948106964, "loss": 3.1585, "step": 24897 }, { "epoch": 1.22, "grad_norm": 0.5633382797241211, "learning_rate": 0.000386652054961174, "loss": 3.1851, "step": 24898 }, { "epoch": 1.22, "grad_norm": 0.5629472732543945, "learning_rate": 0.00038663731488342674, "loss": 3.1504, "step": 24899 }, { "epoch": 1.22, "grad_norm": 0.5522317290306091, "learning_rate": 0.0003866225745774935, "loss": 3.1356, "step": 24900 }, { "epoch": 1.22, "grad_norm": 0.5409548282623291, "learning_rate": 0.0003866078340434129, "loss": 3.1164, "step": 24901 }, { "epoch": 1.22, "grad_norm": 0.5608346462249756, "learning_rate": 0.0003865930932812241, "loss": 2.968, "step": 24902 }, { "epoch": 1.22, "grad_norm": 0.5439483523368835, "learning_rate": 0.00038657835229096577, "loss": 3.13, "step": 24903 }, { "epoch": 1.22, "grad_norm": 0.5601171255111694, "learning_rate": 0.00038656361107267665, "loss": 3.0996, "step": 24904 }, { "epoch": 1.22, "grad_norm": 0.5870618224143982, "learning_rate": 0.00038654886962639565, "loss": 3.0531, "step": 24905 }, { "epoch": 1.22, "grad_norm": 0.6319909691810608, "learning_rate": 0.0003865341279521617, "loss": 3.1479, "step": 24906 }, { "epoch": 1.22, "grad_norm": 0.6008051037788391, "learning_rate": 0.00038651938605001324, "loss": 2.8563, "step": 24907 }, { "epoch": 1.22, "grad_norm": 0.549253523349762, "learning_rate": 0.0003865046439199895, "loss": 3.1851, "step": 24908 }, { "epoch": 1.22, "grad_norm": 0.5713079571723938, "learning_rate": 0.0003864899015621293, "loss": 3.2438, "step": 24909 }, { "epoch": 1.22, "grad_norm": 0.55234694480896, "learning_rate": 0.00038647515897647126, "loss": 3.1086, "step": 24910 }, { "epoch": 1.22, "grad_norm": 0.5803124308586121, "learning_rate": 0.00038646041616305425, "loss": 2.9661, "step": 24911 }, { "epoch": 1.22, "grad_norm": 0.5373254418373108, "learning_rate": 0.0003864456731219172, "loss": 3.077, "step": 24912 }, { "epoch": 1.22, "grad_norm": 0.5681374669075012, "learning_rate": 0.00038643092985309887, "loss": 3.1995, "step": 24913 }, { "epoch": 1.22, "grad_norm": 0.586463451385498, "learning_rate": 0.00038641618635663807, "loss": 2.939, "step": 24914 }, { "epoch": 1.22, "grad_norm": 0.5377286076545715, "learning_rate": 0.0003864014426325737, "loss": 3.2162, "step": 24915 }, { "epoch": 1.22, "grad_norm": 0.583235502243042, "learning_rate": 0.00038638669868094454, "loss": 3.0064, "step": 24916 }, { "epoch": 1.22, "grad_norm": 0.5170654654502869, "learning_rate": 0.00038637195450178945, "loss": 3.0976, "step": 24917 }, { "epoch": 1.22, "grad_norm": 0.5743283033370972, "learning_rate": 0.00038635721009514727, "loss": 3.1247, "step": 24918 }, { "epoch": 1.22, "grad_norm": 0.5659081935882568, "learning_rate": 0.00038634246546105676, "loss": 3.0991, "step": 24919 }, { "epoch": 1.22, "grad_norm": 0.5626477599143982, "learning_rate": 0.0003863277205995569, "loss": 2.9735, "step": 24920 }, { "epoch": 1.22, "grad_norm": 0.525495707988739, "learning_rate": 0.00038631297551068643, "loss": 2.8968, "step": 24921 }, { "epoch": 1.22, "grad_norm": 0.5388118624687195, "learning_rate": 0.00038629823019448406, "loss": 2.9915, "step": 24922 }, { "epoch": 1.22, "grad_norm": 0.5696624517440796, "learning_rate": 0.00038628348465098895, "loss": 3.2016, "step": 24923 }, { "epoch": 1.22, "grad_norm": 0.5766279697418213, "learning_rate": 0.00038626873888023965, "loss": 3.1201, "step": 24924 }, { "epoch": 1.22, "grad_norm": 0.5821237564086914, "learning_rate": 0.00038625399288227507, "loss": 2.9969, "step": 24925 }, { "epoch": 1.22, "grad_norm": 0.570102870464325, "learning_rate": 0.0003862392466571341, "loss": 2.9724, "step": 24926 }, { "epoch": 1.22, "grad_norm": 0.5588696599006653, "learning_rate": 0.00038622450020485557, "loss": 2.8742, "step": 24927 }, { "epoch": 1.22, "grad_norm": 0.5280874967575073, "learning_rate": 0.0003862097535254783, "loss": 3.2865, "step": 24928 }, { "epoch": 1.22, "grad_norm": 0.5691091418266296, "learning_rate": 0.0003861950066190411, "loss": 3.1516, "step": 24929 }, { "epoch": 1.22, "grad_norm": 0.5672391057014465, "learning_rate": 0.00038618025948558283, "loss": 3.0566, "step": 24930 }, { "epoch": 1.22, "grad_norm": 0.5959036350250244, "learning_rate": 0.0003861655121251425, "loss": 2.8303, "step": 24931 }, { "epoch": 1.22, "grad_norm": 0.55399090051651, "learning_rate": 0.00038615076453775855, "loss": 3.0734, "step": 24932 }, { "epoch": 1.22, "grad_norm": 0.5398240089416504, "learning_rate": 0.0003861360167234702, "loss": 2.8201, "step": 24933 }, { "epoch": 1.22, "grad_norm": 0.5640655159950256, "learning_rate": 0.00038612126868231615, "loss": 3.159, "step": 24934 }, { "epoch": 1.22, "grad_norm": 0.5160124897956848, "learning_rate": 0.00038610652041433526, "loss": 3.1671, "step": 24935 }, { "epoch": 1.22, "grad_norm": 0.5614923238754272, "learning_rate": 0.0003860917719195663, "loss": 3.0478, "step": 24936 }, { "epoch": 1.22, "grad_norm": 0.5773776173591614, "learning_rate": 0.00038607702319804827, "loss": 3.1768, "step": 24937 }, { "epoch": 1.22, "grad_norm": 0.5507655143737793, "learning_rate": 0.0003860622742498199, "loss": 3.0052, "step": 24938 }, { "epoch": 1.22, "grad_norm": 0.6294071078300476, "learning_rate": 0.00038604752507492, "loss": 2.8901, "step": 24939 }, { "epoch": 1.22, "grad_norm": 0.5447244644165039, "learning_rate": 0.0003860327756733876, "loss": 3.0518, "step": 24940 }, { "epoch": 1.22, "grad_norm": 0.5702586770057678, "learning_rate": 0.0003860180260452613, "loss": 2.9634, "step": 24941 }, { "epoch": 1.22, "grad_norm": 0.5664628148078918, "learning_rate": 0.00038600327619058017, "loss": 2.7702, "step": 24942 }, { "epoch": 1.22, "grad_norm": 0.6090885400772095, "learning_rate": 0.00038598852610938287, "loss": 2.9708, "step": 24943 }, { "epoch": 1.22, "grad_norm": 0.5620157718658447, "learning_rate": 0.00038597377580170837, "loss": 3.2652, "step": 24944 }, { "epoch": 1.22, "grad_norm": 0.5198007225990295, "learning_rate": 0.0003859590252675956, "loss": 3.0837, "step": 24945 }, { "epoch": 1.22, "grad_norm": 0.5496712327003479, "learning_rate": 0.0003859442745070832, "loss": 3.1429, "step": 24946 }, { "epoch": 1.22, "grad_norm": 0.5467877984046936, "learning_rate": 0.0003859295235202101, "loss": 3.1486, "step": 24947 }, { "epoch": 1.22, "grad_norm": 0.7828726172447205, "learning_rate": 0.0003859147723070151, "loss": 3.0038, "step": 24948 }, { "epoch": 1.22, "grad_norm": 0.5720941424369812, "learning_rate": 0.00038590002086753735, "loss": 3.0948, "step": 24949 }, { "epoch": 1.22, "grad_norm": 0.6058550477027893, "learning_rate": 0.0003858852692018154, "loss": 3.1602, "step": 24950 }, { "epoch": 1.22, "grad_norm": 0.5809235572814941, "learning_rate": 0.0003858705173098881, "loss": 3.0933, "step": 24951 }, { "epoch": 1.22, "grad_norm": 0.5285111665725708, "learning_rate": 0.0003858557651917944, "loss": 3.0971, "step": 24952 }, { "epoch": 1.22, "grad_norm": 0.5728558301925659, "learning_rate": 0.0003858410128475731, "loss": 3.2456, "step": 24953 }, { "epoch": 1.22, "grad_norm": 0.55547696352005, "learning_rate": 0.0003858262602772632, "loss": 3.1905, "step": 24954 }, { "epoch": 1.22, "grad_norm": 0.5570668578147888, "learning_rate": 0.0003858115074809034, "loss": 3.0247, "step": 24955 }, { "epoch": 1.22, "grad_norm": 0.5741897225379944, "learning_rate": 0.00038579675445853264, "loss": 3.1321, "step": 24956 }, { "epoch": 1.22, "grad_norm": 0.538557231426239, "learning_rate": 0.00038578200121018976, "loss": 3.133, "step": 24957 }, { "epoch": 1.22, "grad_norm": 0.576298713684082, "learning_rate": 0.0003857672477359134, "loss": 3.1654, "step": 24958 }, { "epoch": 1.22, "grad_norm": 0.5457455515861511, "learning_rate": 0.00038575249403574276, "loss": 3.1689, "step": 24959 }, { "epoch": 1.22, "grad_norm": 0.5845022201538086, "learning_rate": 0.00038573774010971666, "loss": 2.9617, "step": 24960 }, { "epoch": 1.22, "grad_norm": 0.5689767599105835, "learning_rate": 0.00038572298595787374, "loss": 2.9591, "step": 24961 }, { "epoch": 1.22, "grad_norm": 0.5592098832130432, "learning_rate": 0.0003857082315802529, "loss": 3.0949, "step": 24962 }, { "epoch": 1.22, "grad_norm": 0.5083266496658325, "learning_rate": 0.0003856934769768932, "loss": 2.9243, "step": 24963 }, { "epoch": 1.22, "grad_norm": 0.5481277108192444, "learning_rate": 0.0003856787221478333, "loss": 3.1659, "step": 24964 }, { "epoch": 1.22, "grad_norm": 0.5553989410400391, "learning_rate": 0.00038566396709311215, "loss": 3.1719, "step": 24965 }, { "epoch": 1.22, "grad_norm": 0.5781741738319397, "learning_rate": 0.00038564921181276867, "loss": 3.1653, "step": 24966 }, { "epoch": 1.22, "grad_norm": 0.5346400141716003, "learning_rate": 0.00038563445630684153, "loss": 2.9946, "step": 24967 }, { "epoch": 1.22, "grad_norm": 0.5499003529548645, "learning_rate": 0.00038561970057536977, "loss": 2.9757, "step": 24968 }, { "epoch": 1.22, "grad_norm": 0.5649663209915161, "learning_rate": 0.0003856049446183922, "loss": 3.0766, "step": 24969 }, { "epoch": 1.22, "grad_norm": 0.5463762283325195, "learning_rate": 0.0003855901884359476, "loss": 2.9893, "step": 24970 }, { "epoch": 1.22, "grad_norm": 0.5486137270927429, "learning_rate": 0.00038557543202807513, "loss": 3.1587, "step": 24971 }, { "epoch": 1.22, "grad_norm": 0.5812304019927979, "learning_rate": 0.0003855606753948133, "loss": 3.0608, "step": 24972 }, { "epoch": 1.22, "grad_norm": 0.6184636354446411, "learning_rate": 0.000385545918536201, "loss": 3.0295, "step": 24973 }, { "epoch": 1.22, "grad_norm": 0.5484544038772583, "learning_rate": 0.00038553116145227743, "loss": 3.0903, "step": 24974 }, { "epoch": 1.22, "grad_norm": 0.5901756882667542, "learning_rate": 0.0003855164041430811, "loss": 3.1048, "step": 24975 }, { "epoch": 1.22, "grad_norm": 0.5762054920196533, "learning_rate": 0.0003855016466086511, "loss": 3.1637, "step": 24976 }, { "epoch": 1.22, "grad_norm": 0.5577733516693115, "learning_rate": 0.00038548688884902616, "loss": 3.1275, "step": 24977 }, { "epoch": 1.22, "grad_norm": 0.5851019620895386, "learning_rate": 0.00038547213086424526, "loss": 3.3804, "step": 24978 }, { "epoch": 1.22, "grad_norm": 0.5264205932617188, "learning_rate": 0.0003854573726543472, "loss": 2.9034, "step": 24979 }, { "epoch": 1.22, "grad_norm": 0.529139518737793, "learning_rate": 0.0003854426142193708, "loss": 3.13, "step": 24980 }, { "epoch": 1.22, "grad_norm": 0.5476992726325989, "learning_rate": 0.000385427855559355, "loss": 3.1176, "step": 24981 }, { "epoch": 1.22, "grad_norm": 0.5486899614334106, "learning_rate": 0.0003854130966743388, "loss": 3.0411, "step": 24982 }, { "epoch": 1.22, "grad_norm": 0.5495012998580933, "learning_rate": 0.00038539833756436086, "loss": 3.0098, "step": 24983 }, { "epoch": 1.22, "grad_norm": 0.5303811430931091, "learning_rate": 0.0003853835782294601, "loss": 3.1033, "step": 24984 }, { "epoch": 1.22, "grad_norm": 0.5448635220527649, "learning_rate": 0.00038536881866967554, "loss": 2.9309, "step": 24985 }, { "epoch": 1.22, "grad_norm": 0.5653622150421143, "learning_rate": 0.0003853540588850459, "loss": 3.0957, "step": 24986 }, { "epoch": 1.22, "grad_norm": 0.6244167685508728, "learning_rate": 0.00038533929887561003, "loss": 3.134, "step": 24987 }, { "epoch": 1.22, "grad_norm": 0.5666537284851074, "learning_rate": 0.00038532453864140685, "loss": 3.1218, "step": 24988 }, { "epoch": 1.22, "grad_norm": 0.5365763306617737, "learning_rate": 0.00038530977818247535, "loss": 2.8336, "step": 24989 }, { "epoch": 1.22, "grad_norm": 0.5451518297195435, "learning_rate": 0.0003852950174988542, "loss": 2.8832, "step": 24990 }, { "epoch": 1.22, "grad_norm": 0.5833927989006042, "learning_rate": 0.00038528025659058255, "loss": 2.9466, "step": 24991 }, { "epoch": 1.22, "grad_norm": 0.5745109915733337, "learning_rate": 0.000385265495457699, "loss": 3.0929, "step": 24992 }, { "epoch": 1.22, "grad_norm": 0.5827248096466064, "learning_rate": 0.00038525073410024263, "loss": 3.0634, "step": 24993 }, { "epoch": 1.22, "grad_norm": 0.544745922088623, "learning_rate": 0.00038523597251825207, "loss": 3.1738, "step": 24994 }, { "epoch": 1.22, "grad_norm": 0.5738770961761475, "learning_rate": 0.0003852212107117665, "loss": 2.9558, "step": 24995 }, { "epoch": 1.22, "grad_norm": 0.631645143032074, "learning_rate": 0.0003852064486808247, "loss": 2.8622, "step": 24996 }, { "epoch": 1.23, "grad_norm": 0.5620419979095459, "learning_rate": 0.0003851916864254654, "loss": 2.6171, "step": 24997 }, { "epoch": 1.23, "grad_norm": 0.5576983094215393, "learning_rate": 0.0003851769239457277, "loss": 3.1946, "step": 24998 }, { "epoch": 1.23, "grad_norm": 0.6105443835258484, "learning_rate": 0.0003851621612416502, "loss": 3.0174, "step": 24999 }, { "epoch": 1.23, "grad_norm": 0.5353231430053711, "learning_rate": 0.00038514739831327214, "loss": 2.901, "step": 25000 }, { "epoch": 1.23, "grad_norm": 0.600609540939331, "learning_rate": 0.0003851326351606321, "loss": 3.0875, "step": 25001 }, { "epoch": 1.23, "grad_norm": 0.6092653274536133, "learning_rate": 0.0003851178717837691, "loss": 3.0929, "step": 25002 }, { "epoch": 1.23, "grad_norm": 0.5666999816894531, "learning_rate": 0.00038510310818272203, "loss": 2.9879, "step": 25003 }, { "epoch": 1.23, "grad_norm": 0.5675250291824341, "learning_rate": 0.0003850883443575298, "loss": 3.2092, "step": 25004 }, { "epoch": 1.23, "grad_norm": 0.5591527223587036, "learning_rate": 0.0003850735803082312, "loss": 3.0273, "step": 25005 }, { "epoch": 1.23, "grad_norm": 0.5454131364822388, "learning_rate": 0.0003850588160348652, "loss": 3.1456, "step": 25006 }, { "epoch": 1.23, "grad_norm": 0.5496153235435486, "learning_rate": 0.00038504405153747066, "loss": 2.9497, "step": 25007 }, { "epoch": 1.23, "grad_norm": 0.5893572568893433, "learning_rate": 0.0003850292868160864, "loss": 3.2013, "step": 25008 }, { "epoch": 1.23, "grad_norm": 0.6619960069656372, "learning_rate": 0.0003850145218707513, "loss": 3.0674, "step": 25009 }, { "epoch": 1.23, "grad_norm": 0.5621336102485657, "learning_rate": 0.00038499975670150437, "loss": 3.1975, "step": 25010 }, { "epoch": 1.23, "grad_norm": 0.5568299889564514, "learning_rate": 0.00038498499130838455, "loss": 3.166, "step": 25011 }, { "epoch": 1.23, "grad_norm": 0.5435270071029663, "learning_rate": 0.0003849702256914305, "loss": 3.067, "step": 25012 }, { "epoch": 1.23, "grad_norm": 0.5628631114959717, "learning_rate": 0.0003849554598506812, "loss": 3.017, "step": 25013 }, { "epoch": 1.23, "grad_norm": 0.5889310240745544, "learning_rate": 0.00038494069378617566, "loss": 3.1861, "step": 25014 }, { "epoch": 1.23, "grad_norm": 0.5779412984848022, "learning_rate": 0.0003849259274979526, "loss": 3.0177, "step": 25015 }, { "epoch": 1.23, "grad_norm": 0.5566220283508301, "learning_rate": 0.00038491116098605103, "loss": 3.0443, "step": 25016 }, { "epoch": 1.23, "grad_norm": 0.5883911848068237, "learning_rate": 0.0003848963942505098, "loss": 2.8931, "step": 25017 }, { "epoch": 1.23, "grad_norm": 0.5884630084037781, "learning_rate": 0.0003848816272913678, "loss": 2.978, "step": 25018 }, { "epoch": 1.23, "grad_norm": 0.5841085910797119, "learning_rate": 0.000384866860108664, "loss": 2.8837, "step": 25019 }, { "epoch": 1.23, "grad_norm": 0.5884820818901062, "learning_rate": 0.00038485209270243707, "loss": 3.0159, "step": 25020 }, { "epoch": 1.23, "grad_norm": 0.6222401857376099, "learning_rate": 0.0003848373250727262, "loss": 3.1475, "step": 25021 }, { "epoch": 1.23, "grad_norm": 0.6234578490257263, "learning_rate": 0.0003848225572195702, "loss": 3.2096, "step": 25022 }, { "epoch": 1.23, "grad_norm": 0.6008965373039246, "learning_rate": 0.00038480778914300775, "loss": 3.2338, "step": 25023 }, { "epoch": 1.23, "grad_norm": 0.5736437439918518, "learning_rate": 0.000384793020843078, "loss": 3.1751, "step": 25024 }, { "epoch": 1.23, "grad_norm": 0.5647448301315308, "learning_rate": 0.00038477825231981967, "loss": 3.1197, "step": 25025 }, { "epoch": 1.23, "grad_norm": 0.5257458686828613, "learning_rate": 0.00038476348357327175, "loss": 3.2591, "step": 25026 }, { "epoch": 1.23, "grad_norm": 0.553209662437439, "learning_rate": 0.00038474871460347315, "loss": 3.3708, "step": 25027 }, { "epoch": 1.23, "grad_norm": 0.6161184310913086, "learning_rate": 0.00038473394541046276, "loss": 2.9489, "step": 25028 }, { "epoch": 1.23, "grad_norm": 0.5613665580749512, "learning_rate": 0.0003847191759942794, "loss": 2.9919, "step": 25029 }, { "epoch": 1.23, "grad_norm": 0.5895677804946899, "learning_rate": 0.00038470440635496214, "loss": 2.8271, "step": 25030 }, { "epoch": 1.23, "grad_norm": 0.5605174899101257, "learning_rate": 0.00038468963649254976, "loss": 3.0974, "step": 25031 }, { "epoch": 1.23, "grad_norm": 0.5734592080116272, "learning_rate": 0.00038467486640708106, "loss": 2.9672, "step": 25032 }, { "epoch": 1.23, "grad_norm": 0.6092544794082642, "learning_rate": 0.0003846600960985953, "loss": 3.1621, "step": 25033 }, { "epoch": 1.23, "grad_norm": 0.5776329636573792, "learning_rate": 0.0003846453255671309, "loss": 3.0484, "step": 25034 }, { "epoch": 1.23, "grad_norm": 0.555668294429779, "learning_rate": 0.000384630554812727, "loss": 2.8431, "step": 25035 }, { "epoch": 1.23, "grad_norm": 0.5371507406234741, "learning_rate": 0.00038461578383542266, "loss": 3.2272, "step": 25036 }, { "epoch": 1.23, "grad_norm": 0.5646740198135376, "learning_rate": 0.00038460101263525654, "loss": 3.1364, "step": 25037 }, { "epoch": 1.23, "grad_norm": 0.5981430411338806, "learning_rate": 0.0003845862412122677, "loss": 3.0595, "step": 25038 }, { "epoch": 1.23, "grad_norm": 0.5709407329559326, "learning_rate": 0.00038457146956649496, "loss": 3.3635, "step": 25039 }, { "epoch": 1.23, "grad_norm": 0.5592368245124817, "learning_rate": 0.00038455669769797723, "loss": 3.0549, "step": 25040 }, { "epoch": 1.23, "grad_norm": 0.5950730443000793, "learning_rate": 0.00038454192560675337, "loss": 3.0904, "step": 25041 }, { "epoch": 1.23, "grad_norm": 0.5664187073707581, "learning_rate": 0.00038452715329286236, "loss": 3.2247, "step": 25042 }, { "epoch": 1.23, "grad_norm": 0.5902990102767944, "learning_rate": 0.00038451238075634317, "loss": 2.9901, "step": 25043 }, { "epoch": 1.23, "grad_norm": 0.5837813019752502, "learning_rate": 0.00038449760799723467, "loss": 3.281, "step": 25044 }, { "epoch": 1.23, "grad_norm": 0.5957124829292297, "learning_rate": 0.0003844828350155756, "loss": 3.1995, "step": 25045 }, { "epoch": 1.23, "grad_norm": 0.5741605758666992, "learning_rate": 0.000384468061811405, "loss": 3.0371, "step": 25046 }, { "epoch": 1.23, "grad_norm": 0.5534799098968506, "learning_rate": 0.00038445328838476197, "loss": 3.2169, "step": 25047 }, { "epoch": 1.23, "grad_norm": 0.5551855564117432, "learning_rate": 0.00038443851473568503, "loss": 2.9418, "step": 25048 }, { "epoch": 1.23, "grad_norm": 0.5379355549812317, "learning_rate": 0.0003844237408642134, "loss": 3.0535, "step": 25049 }, { "epoch": 1.23, "grad_norm": 0.5475854873657227, "learning_rate": 0.0003844089667703858, "loss": 2.9308, "step": 25050 }, { "epoch": 1.23, "grad_norm": 0.5689952969551086, "learning_rate": 0.0003843941924542413, "loss": 2.939, "step": 25051 }, { "epoch": 1.23, "grad_norm": 0.5918108224868774, "learning_rate": 0.0003843794179158187, "loss": 2.8464, "step": 25052 }, { "epoch": 1.23, "grad_norm": 0.5874655842781067, "learning_rate": 0.000384364643155157, "loss": 3.1436, "step": 25053 }, { "epoch": 1.23, "grad_norm": 0.535483181476593, "learning_rate": 0.00038434986817229504, "loss": 2.867, "step": 25054 }, { "epoch": 1.23, "grad_norm": 0.6092949509620667, "learning_rate": 0.0003843350929672719, "loss": 3.1436, "step": 25055 }, { "epoch": 1.23, "grad_norm": 0.5654955506324768, "learning_rate": 0.0003843203175401261, "loss": 3.0727, "step": 25056 }, { "epoch": 1.23, "grad_norm": 0.5241906642913818, "learning_rate": 0.0003843055418908969, "loss": 2.7895, "step": 25057 }, { "epoch": 1.23, "grad_norm": 0.577303409576416, "learning_rate": 0.0003842907660196232, "loss": 2.9957, "step": 25058 }, { "epoch": 1.23, "grad_norm": 0.5571273565292358, "learning_rate": 0.0003842759899263438, "loss": 3.1266, "step": 25059 }, { "epoch": 1.23, "grad_norm": 0.5683521628379822, "learning_rate": 0.0003842612136110976, "loss": 2.9504, "step": 25060 }, { "epoch": 1.23, "grad_norm": 0.5134350657463074, "learning_rate": 0.00038424643707392364, "loss": 3.295, "step": 25061 }, { "epoch": 1.23, "grad_norm": 0.6152227520942688, "learning_rate": 0.0003842316603148608, "loss": 2.764, "step": 25062 }, { "epoch": 1.23, "grad_norm": 0.5723081231117249, "learning_rate": 0.000384216883333948, "loss": 3.0838, "step": 25063 }, { "epoch": 1.23, "grad_norm": 0.5563600659370422, "learning_rate": 0.000384202106131224, "loss": 3.2504, "step": 25064 }, { "epoch": 1.23, "grad_norm": 0.5456048250198364, "learning_rate": 0.00038418732870672804, "loss": 2.9962, "step": 25065 }, { "epoch": 1.23, "grad_norm": 0.5585772395133972, "learning_rate": 0.0003841725510604987, "loss": 2.8871, "step": 25066 }, { "epoch": 1.23, "grad_norm": 0.5854873061180115, "learning_rate": 0.00038415777319257517, "loss": 2.9557, "step": 25067 }, { "epoch": 1.23, "grad_norm": 0.5794108510017395, "learning_rate": 0.0003841429951029962, "loss": 3.1969, "step": 25068 }, { "epoch": 1.23, "grad_norm": 0.688174307346344, "learning_rate": 0.0003841282167918008, "loss": 3.1212, "step": 25069 }, { "epoch": 1.23, "grad_norm": 0.5816777944564819, "learning_rate": 0.0003841134382590279, "loss": 3.0208, "step": 25070 }, { "epoch": 1.23, "grad_norm": 0.5570030808448792, "learning_rate": 0.00038409865950471634, "loss": 3.1805, "step": 25071 }, { "epoch": 1.23, "grad_norm": 0.5498480796813965, "learning_rate": 0.00038408388052890507, "loss": 3.0436, "step": 25072 }, { "epoch": 1.23, "grad_norm": 0.5754635334014893, "learning_rate": 0.0003840691013316331, "loss": 3.1856, "step": 25073 }, { "epoch": 1.23, "grad_norm": 0.5430830121040344, "learning_rate": 0.0003840543219129393, "loss": 2.9776, "step": 25074 }, { "epoch": 1.23, "grad_norm": 0.5669942498207092, "learning_rate": 0.0003840395422728626, "loss": 3.1792, "step": 25075 }, { "epoch": 1.23, "grad_norm": 0.5448963046073914, "learning_rate": 0.0003840247624114418, "loss": 3.0217, "step": 25076 }, { "epoch": 1.23, "grad_norm": 0.5780636072158813, "learning_rate": 0.0003840099823287161, "loss": 3.0021, "step": 25077 }, { "epoch": 1.23, "grad_norm": 0.5799130201339722, "learning_rate": 0.00038399520202472414, "loss": 2.9277, "step": 25078 }, { "epoch": 1.23, "grad_norm": 0.5543425679206848, "learning_rate": 0.0003839804214995051, "loss": 3.0757, "step": 25079 }, { "epoch": 1.23, "grad_norm": 0.5854793787002563, "learning_rate": 0.0003839656407530977, "loss": 3.0283, "step": 25080 }, { "epoch": 1.23, "grad_norm": 0.5901440978050232, "learning_rate": 0.00038395085978554103, "loss": 3.0345, "step": 25081 }, { "epoch": 1.23, "grad_norm": 0.6366936564445496, "learning_rate": 0.00038393607859687394, "loss": 3.1309, "step": 25082 }, { "epoch": 1.23, "grad_norm": 0.554716169834137, "learning_rate": 0.0003839212971871354, "loss": 3.1066, "step": 25083 }, { "epoch": 1.23, "grad_norm": 0.5704500675201416, "learning_rate": 0.0003839065155563643, "loss": 2.9534, "step": 25084 }, { "epoch": 1.23, "grad_norm": 0.5622331500053406, "learning_rate": 0.00038389173370459955, "loss": 3.0545, "step": 25085 }, { "epoch": 1.23, "grad_norm": 0.5637291073799133, "learning_rate": 0.0003838769516318801, "loss": 3.018, "step": 25086 }, { "epoch": 1.23, "grad_norm": 0.5698617696762085, "learning_rate": 0.000383862169338245, "loss": 3.1951, "step": 25087 }, { "epoch": 1.23, "grad_norm": 0.5883769392967224, "learning_rate": 0.00038384738682373297, "loss": 3.2073, "step": 25088 }, { "epoch": 1.23, "grad_norm": 0.54131680727005, "learning_rate": 0.0003838326040883831, "loss": 3.1775, "step": 25089 }, { "epoch": 1.23, "grad_norm": 0.5682424306869507, "learning_rate": 0.00038381782113223426, "loss": 3.1067, "step": 25090 }, { "epoch": 1.23, "grad_norm": 0.5473423004150391, "learning_rate": 0.0003838030379553255, "loss": 2.8472, "step": 25091 }, { "epoch": 1.23, "grad_norm": 0.5303641557693481, "learning_rate": 0.0003837882545576956, "loss": 3.1275, "step": 25092 }, { "epoch": 1.23, "grad_norm": 0.5696659684181213, "learning_rate": 0.00038377347093938353, "loss": 3.0899, "step": 25093 }, { "epoch": 1.23, "grad_norm": 0.5956259965896606, "learning_rate": 0.00038375868710042836, "loss": 3.0071, "step": 25094 }, { "epoch": 1.23, "grad_norm": 0.5872520208358765, "learning_rate": 0.00038374390304086884, "loss": 3.1197, "step": 25095 }, { "epoch": 1.23, "grad_norm": 0.5226383805274963, "learning_rate": 0.00038372911876074405, "loss": 3.1719, "step": 25096 }, { "epoch": 1.23, "grad_norm": 0.5661742687225342, "learning_rate": 0.00038371433426009276, "loss": 2.9626, "step": 25097 }, { "epoch": 1.23, "grad_norm": 0.5806180834770203, "learning_rate": 0.0003836995495389542, "loss": 3.1586, "step": 25098 }, { "epoch": 1.23, "grad_norm": 0.5616850256919861, "learning_rate": 0.00038368476459736705, "loss": 3.0818, "step": 25099 }, { "epoch": 1.23, "grad_norm": 0.5661494731903076, "learning_rate": 0.0003836699794353703, "loss": 3.0206, "step": 25100 }, { "epoch": 1.23, "grad_norm": 0.5832769274711609, "learning_rate": 0.000383655194053003, "loss": 2.8287, "step": 25101 }, { "epoch": 1.23, "grad_norm": 0.5530614256858826, "learning_rate": 0.000383640408450304, "loss": 3.0402, "step": 25102 }, { "epoch": 1.23, "grad_norm": 0.5663602352142334, "learning_rate": 0.00038362562262731224, "loss": 3.2647, "step": 25103 }, { "epoch": 1.23, "grad_norm": 0.5610358119010925, "learning_rate": 0.00038361083658406666, "loss": 3.0342, "step": 25104 }, { "epoch": 1.23, "grad_norm": 0.5962334275245667, "learning_rate": 0.0003835960503206063, "loss": 3.039, "step": 25105 }, { "epoch": 1.23, "grad_norm": 0.5851008296012878, "learning_rate": 0.00038358126383697, "loss": 2.7736, "step": 25106 }, { "epoch": 1.23, "grad_norm": 0.6189122796058655, "learning_rate": 0.0003835664771331967, "loss": 2.9924, "step": 25107 }, { "epoch": 1.23, "grad_norm": 0.5865572094917297, "learning_rate": 0.00038355169020932534, "loss": 3.1454, "step": 25108 }, { "epoch": 1.23, "grad_norm": 0.5793099403381348, "learning_rate": 0.00038353690306539506, "loss": 3.0783, "step": 25109 }, { "epoch": 1.23, "grad_norm": 0.577268660068512, "learning_rate": 0.00038352211570144454, "loss": 3.1842, "step": 25110 }, { "epoch": 1.23, "grad_norm": 0.524253249168396, "learning_rate": 0.0003835073281175129, "loss": 3.1753, "step": 25111 }, { "epoch": 1.23, "grad_norm": 0.553202748298645, "learning_rate": 0.0003834925403136389, "loss": 2.9092, "step": 25112 }, { "epoch": 1.23, "grad_norm": 0.5760665535926819, "learning_rate": 0.0003834777522898618, "loss": 3.1152, "step": 25113 }, { "epoch": 1.23, "grad_norm": 0.5376695990562439, "learning_rate": 0.00038346296404622024, "loss": 3.0326, "step": 25114 }, { "epoch": 1.23, "grad_norm": 0.5973801612854004, "learning_rate": 0.00038344817558275335, "loss": 3.1555, "step": 25115 }, { "epoch": 1.23, "grad_norm": 0.5529061555862427, "learning_rate": 0.00038343338689950004, "loss": 3.1566, "step": 25116 }, { "epoch": 1.23, "grad_norm": 0.6026521325111389, "learning_rate": 0.0003834185979964992, "loss": 3.1871, "step": 25117 }, { "epoch": 1.23, "grad_norm": 0.5608975887298584, "learning_rate": 0.0003834038088737898, "loss": 2.9394, "step": 25118 }, { "epoch": 1.23, "grad_norm": 0.530158519744873, "learning_rate": 0.00038338901953141087, "loss": 3.0237, "step": 25119 }, { "epoch": 1.23, "grad_norm": 0.5642078518867493, "learning_rate": 0.0003833742299694014, "loss": 3.2089, "step": 25120 }, { "epoch": 1.23, "grad_norm": 0.526268720626831, "learning_rate": 0.0003833594401878001, "loss": 3.1074, "step": 25121 }, { "epoch": 1.23, "grad_norm": 0.5271223187446594, "learning_rate": 0.00038334465018664615, "loss": 3.1286, "step": 25122 }, { "epoch": 1.23, "grad_norm": 0.5779740214347839, "learning_rate": 0.0003833298599659783, "loss": 3.0723, "step": 25123 }, { "epoch": 1.23, "grad_norm": 0.5797901749610901, "learning_rate": 0.00038331506952583584, "loss": 3.1721, "step": 25124 }, { "epoch": 1.23, "grad_norm": 0.5432953834533691, "learning_rate": 0.00038330027886625744, "loss": 2.9935, "step": 25125 }, { "epoch": 1.23, "grad_norm": 0.5544005632400513, "learning_rate": 0.0003832854879872821, "loss": 2.8895, "step": 25126 }, { "epoch": 1.23, "grad_norm": 0.5620940327644348, "learning_rate": 0.00038327069688894885, "loss": 3.053, "step": 25127 }, { "epoch": 1.23, "grad_norm": 0.5733068585395813, "learning_rate": 0.00038325590557129665, "loss": 2.9839, "step": 25128 }, { "epoch": 1.23, "grad_norm": 0.5551736354827881, "learning_rate": 0.00038324111403436435, "loss": 3.268, "step": 25129 }, { "epoch": 1.23, "grad_norm": 0.584686279296875, "learning_rate": 0.00038322632227819097, "loss": 3.2248, "step": 25130 }, { "epoch": 1.23, "grad_norm": 0.6471843123435974, "learning_rate": 0.00038321153030281555, "loss": 3.0796, "step": 25131 }, { "epoch": 1.23, "grad_norm": 0.5832377672195435, "learning_rate": 0.000383196738108277, "loss": 3.0214, "step": 25132 }, { "epoch": 1.23, "grad_norm": 0.5637081861495972, "learning_rate": 0.0003831819456946141, "loss": 3.1815, "step": 25133 }, { "epoch": 1.23, "grad_norm": 0.5314174890518188, "learning_rate": 0.00038316715306186604, "loss": 2.947, "step": 25134 }, { "epoch": 1.23, "grad_norm": 0.564139187335968, "learning_rate": 0.0003831523602100717, "loss": 3.0108, "step": 25135 }, { "epoch": 1.23, "grad_norm": 0.5853064656257629, "learning_rate": 0.00038313756713927004, "loss": 2.963, "step": 25136 }, { "epoch": 1.23, "grad_norm": 0.5824783444404602, "learning_rate": 0.0003831227738495001, "loss": 3.0401, "step": 25137 }, { "epoch": 1.23, "grad_norm": 0.590248703956604, "learning_rate": 0.0003831079803408007, "loss": 3.1766, "step": 25138 }, { "epoch": 1.23, "grad_norm": 0.5577486157417297, "learning_rate": 0.0003830931866132109, "loss": 2.9757, "step": 25139 }, { "epoch": 1.23, "grad_norm": 0.5475689768791199, "learning_rate": 0.0003830783926667696, "loss": 3.2508, "step": 25140 }, { "epoch": 1.23, "grad_norm": 0.5604707598686218, "learning_rate": 0.0003830635985015158, "loss": 2.9826, "step": 25141 }, { "epoch": 1.23, "grad_norm": 0.5635944604873657, "learning_rate": 0.0003830488041174885, "loss": 3.0229, "step": 25142 }, { "epoch": 1.23, "grad_norm": 0.5407227873802185, "learning_rate": 0.00038303400951472666, "loss": 2.8838, "step": 25143 }, { "epoch": 1.23, "grad_norm": 0.5590699315071106, "learning_rate": 0.0003830192146932692, "loss": 3.2534, "step": 25144 }, { "epoch": 1.23, "grad_norm": 0.5566086769104004, "learning_rate": 0.00038300441965315505, "loss": 2.9712, "step": 25145 }, { "epoch": 1.23, "grad_norm": 0.5474108457565308, "learning_rate": 0.0003829896243944234, "loss": 3.1384, "step": 25146 }, { "epoch": 1.23, "grad_norm": 0.5965452790260315, "learning_rate": 0.00038297482891711286, "loss": 3.0701, "step": 25147 }, { "epoch": 1.23, "grad_norm": 0.5965765714645386, "learning_rate": 0.0003829600332212626, "loss": 3.038, "step": 25148 }, { "epoch": 1.23, "grad_norm": 0.547390878200531, "learning_rate": 0.00038294523730691174, "loss": 2.8804, "step": 25149 }, { "epoch": 1.23, "grad_norm": 0.5904244184494019, "learning_rate": 0.00038293044117409896, "loss": 2.9532, "step": 25150 }, { "epoch": 1.23, "grad_norm": 0.5980238914489746, "learning_rate": 0.00038291564482286335, "loss": 3.1317, "step": 25151 }, { "epoch": 1.23, "grad_norm": 0.6068405508995056, "learning_rate": 0.00038290084825324394, "loss": 3.1838, "step": 25152 }, { "epoch": 1.23, "grad_norm": 0.5584993362426758, "learning_rate": 0.00038288605146527966, "loss": 3.1354, "step": 25153 }, { "epoch": 1.23, "grad_norm": 0.5200023651123047, "learning_rate": 0.00038287125445900943, "loss": 3.1367, "step": 25154 }, { "epoch": 1.23, "grad_norm": 0.5400229096412659, "learning_rate": 0.00038285645723447223, "loss": 3.1147, "step": 25155 }, { "epoch": 1.23, "grad_norm": 0.6204379200935364, "learning_rate": 0.0003828416597917071, "loss": 3.0594, "step": 25156 }, { "epoch": 1.23, "grad_norm": 0.5439574122428894, "learning_rate": 0.0003828268621307531, "loss": 3.294, "step": 25157 }, { "epoch": 1.23, "grad_norm": 0.5771875381469727, "learning_rate": 0.00038281206425164893, "loss": 3.033, "step": 25158 }, { "epoch": 1.23, "grad_norm": 0.5748353004455566, "learning_rate": 0.0003827972661544337, "loss": 3.3069, "step": 25159 }, { "epoch": 1.23, "grad_norm": 0.5352070331573486, "learning_rate": 0.00038278246783914657, "loss": 3.2755, "step": 25160 }, { "epoch": 1.23, "grad_norm": 0.533769965171814, "learning_rate": 0.00038276766930582626, "loss": 3.2693, "step": 25161 }, { "epoch": 1.23, "grad_norm": 0.5342288017272949, "learning_rate": 0.0003827528705545118, "loss": 3.1622, "step": 25162 }, { "epoch": 1.23, "grad_norm": 0.5443651676177979, "learning_rate": 0.00038273807158524216, "loss": 3.1329, "step": 25163 }, { "epoch": 1.23, "grad_norm": 0.5513403415679932, "learning_rate": 0.0003827232723980566, "loss": 3.0414, "step": 25164 }, { "epoch": 1.23, "grad_norm": 0.5552654266357422, "learning_rate": 0.00038270847299299367, "loss": 3.0813, "step": 25165 }, { "epoch": 1.23, "grad_norm": 0.5734013319015503, "learning_rate": 0.00038269367337009257, "loss": 3.0957, "step": 25166 }, { "epoch": 1.23, "grad_norm": 0.6037915349006653, "learning_rate": 0.0003826788735293923, "loss": 3.2297, "step": 25167 }, { "epoch": 1.23, "grad_norm": 0.5523087382316589, "learning_rate": 0.0003826640734709317, "loss": 2.9195, "step": 25168 }, { "epoch": 1.23, "grad_norm": 0.5460664629936218, "learning_rate": 0.00038264927319474985, "loss": 3.202, "step": 25169 }, { "epoch": 1.23, "grad_norm": 0.5905874967575073, "learning_rate": 0.0003826344727008858, "loss": 2.9142, "step": 25170 }, { "epoch": 1.23, "grad_norm": 0.5918222665786743, "learning_rate": 0.0003826196719893785, "loss": 3.0206, "step": 25171 }, { "epoch": 1.23, "grad_norm": 0.539842426776886, "learning_rate": 0.00038260487106026676, "loss": 3.134, "step": 25172 }, { "epoch": 1.23, "grad_norm": 0.5324265956878662, "learning_rate": 0.00038259006991358966, "loss": 3.1243, "step": 25173 }, { "epoch": 1.23, "grad_norm": 0.5232551097869873, "learning_rate": 0.00038257526854938625, "loss": 3.4041, "step": 25174 }, { "epoch": 1.23, "grad_norm": 0.5780461430549622, "learning_rate": 0.0003825604669676955, "loss": 3.1711, "step": 25175 }, { "epoch": 1.23, "grad_norm": 0.5784001350402832, "learning_rate": 0.00038254566516855633, "loss": 3.0586, "step": 25176 }, { "epoch": 1.23, "grad_norm": 0.5395734310150146, "learning_rate": 0.0003825308631520079, "loss": 3.1729, "step": 25177 }, { "epoch": 1.23, "grad_norm": 0.57415372133255, "learning_rate": 0.0003825160609180889, "loss": 2.9213, "step": 25178 }, { "epoch": 1.23, "grad_norm": 0.5726572275161743, "learning_rate": 0.0003825012584668385, "loss": 3.1291, "step": 25179 }, { "epoch": 1.23, "grad_norm": 0.571429431438446, "learning_rate": 0.00038248645579829567, "loss": 2.9315, "step": 25180 }, { "epoch": 1.23, "grad_norm": 0.5924422144889832, "learning_rate": 0.0003824716529124994, "loss": 3.3614, "step": 25181 }, { "epoch": 1.23, "grad_norm": 0.5415179133415222, "learning_rate": 0.0003824568498094887, "loss": 3.1108, "step": 25182 }, { "epoch": 1.23, "grad_norm": 0.5803284645080566, "learning_rate": 0.0003824420464893026, "loss": 3.2433, "step": 25183 }, { "epoch": 1.23, "grad_norm": 0.5539078712463379, "learning_rate": 0.00038242724295197984, "loss": 2.8731, "step": 25184 }, { "epoch": 1.23, "grad_norm": 0.549834668636322, "learning_rate": 0.00038241243919755967, "loss": 3.0925, "step": 25185 }, { "epoch": 1.23, "grad_norm": 0.5797627568244934, "learning_rate": 0.0003823976352260811, "loss": 3.2575, "step": 25186 }, { "epoch": 1.23, "grad_norm": 0.57608562707901, "learning_rate": 0.0003823828310375829, "loss": 2.9751, "step": 25187 }, { "epoch": 1.23, "grad_norm": 0.587173342704773, "learning_rate": 0.0003823680266321042, "loss": 3.1131, "step": 25188 }, { "epoch": 1.23, "grad_norm": 0.5533391833305359, "learning_rate": 0.00038235322200968396, "loss": 3.2133, "step": 25189 }, { "epoch": 1.23, "grad_norm": 0.5643752813339233, "learning_rate": 0.00038233841717036116, "loss": 3.2175, "step": 25190 }, { "epoch": 1.23, "grad_norm": 0.5208031535148621, "learning_rate": 0.00038232361211417484, "loss": 3.1639, "step": 25191 }, { "epoch": 1.23, "grad_norm": 0.5685301423072815, "learning_rate": 0.00038230880684116396, "loss": 3.0885, "step": 25192 }, { "epoch": 1.23, "grad_norm": 0.5917596817016602, "learning_rate": 0.0003822940013513675, "loss": 2.8533, "step": 25193 }, { "epoch": 1.23, "grad_norm": 0.5756223797798157, "learning_rate": 0.0003822791956448246, "loss": 3.0704, "step": 25194 }, { "epoch": 1.23, "grad_norm": 0.5527355074882507, "learning_rate": 0.0003822643897215739, "loss": 2.929, "step": 25195 }, { "epoch": 1.23, "grad_norm": 0.6033982634544373, "learning_rate": 0.0003822495835816548, "loss": 3.028, "step": 25196 }, { "epoch": 1.23, "grad_norm": 0.5483729243278503, "learning_rate": 0.00038223477722510623, "loss": 3.1289, "step": 25197 }, { "epoch": 1.23, "grad_norm": 0.5902836918830872, "learning_rate": 0.0003822199706519669, "loss": 3.0741, "step": 25198 }, { "epoch": 1.23, "grad_norm": 0.5841925740242004, "learning_rate": 0.000382205163862276, "loss": 3.0422, "step": 25199 }, { "epoch": 1.23, "grad_norm": 0.5686092972755432, "learning_rate": 0.0003821903568560726, "loss": 3.0314, "step": 25200 }, { "epoch": 1.24, "grad_norm": 0.5432037115097046, "learning_rate": 0.0003821755496333956, "loss": 3.2488, "step": 25201 }, { "epoch": 1.24, "grad_norm": 0.546054482460022, "learning_rate": 0.000382160742194284, "loss": 2.9305, "step": 25202 }, { "epoch": 1.24, "grad_norm": 0.5392909049987793, "learning_rate": 0.0003821459345387768, "loss": 3.0723, "step": 25203 }, { "epoch": 1.24, "grad_norm": 0.5378334522247314, "learning_rate": 0.00038213112666691303, "loss": 2.9682, "step": 25204 }, { "epoch": 1.24, "grad_norm": 0.5470960736274719, "learning_rate": 0.0003821163185787317, "loss": 3.1647, "step": 25205 }, { "epoch": 1.24, "grad_norm": 0.5794514417648315, "learning_rate": 0.00038210151027427176, "loss": 3.0332, "step": 25206 }, { "epoch": 1.24, "grad_norm": 0.611207127571106, "learning_rate": 0.00038208670175357226, "loss": 3.2109, "step": 25207 }, { "epoch": 1.24, "grad_norm": 0.5396631956100464, "learning_rate": 0.00038207189301667227, "loss": 3.1659, "step": 25208 }, { "epoch": 1.24, "grad_norm": 0.5904266238212585, "learning_rate": 0.00038205708406361056, "loss": 3.0525, "step": 25209 }, { "epoch": 1.24, "grad_norm": 0.5775328278541565, "learning_rate": 0.0003820422748944263, "loss": 3.0498, "step": 25210 }, { "epoch": 1.24, "grad_norm": 0.5564374327659607, "learning_rate": 0.00038202746550915857, "loss": 3.1151, "step": 25211 }, { "epoch": 1.24, "grad_norm": 0.5537264943122864, "learning_rate": 0.00038201265590784625, "loss": 3.2185, "step": 25212 }, { "epoch": 1.24, "grad_norm": 0.5900477170944214, "learning_rate": 0.00038199784609052834, "loss": 3.1797, "step": 25213 }, { "epoch": 1.24, "grad_norm": 0.5341221690177917, "learning_rate": 0.00038198303605724385, "loss": 3.1518, "step": 25214 }, { "epoch": 1.24, "grad_norm": 0.5363330245018005, "learning_rate": 0.00038196822580803194, "loss": 2.8538, "step": 25215 }, { "epoch": 1.24, "grad_norm": 0.5329729914665222, "learning_rate": 0.0003819534153429314, "loss": 3.1315, "step": 25216 }, { "epoch": 1.24, "grad_norm": 0.5687403082847595, "learning_rate": 0.0003819386046619814, "loss": 3.1472, "step": 25217 }, { "epoch": 1.24, "grad_norm": 0.5683347582817078, "learning_rate": 0.00038192379376522087, "loss": 3.2782, "step": 25218 }, { "epoch": 1.24, "grad_norm": 0.5241825580596924, "learning_rate": 0.0003819089826526889, "loss": 2.9968, "step": 25219 }, { "epoch": 1.24, "grad_norm": 0.5936827659606934, "learning_rate": 0.00038189417132442424, "loss": 2.9825, "step": 25220 }, { "epoch": 1.24, "grad_norm": 0.5642827749252319, "learning_rate": 0.0003818793597804662, "loss": 2.9358, "step": 25221 }, { "epoch": 1.24, "grad_norm": 0.5551975965499878, "learning_rate": 0.00038186454802085373, "loss": 3.0648, "step": 25222 }, { "epoch": 1.24, "grad_norm": 0.5875920057296753, "learning_rate": 0.00038184973604562573, "loss": 3.0493, "step": 25223 }, { "epoch": 1.24, "grad_norm": 0.5492988228797913, "learning_rate": 0.00038183492385482137, "loss": 3.1779, "step": 25224 }, { "epoch": 1.24, "grad_norm": 0.5400311350822449, "learning_rate": 0.0003818201114484794, "loss": 3.2273, "step": 25225 }, { "epoch": 1.24, "grad_norm": 0.5957015156745911, "learning_rate": 0.00038180529882663914, "loss": 3.1921, "step": 25226 }, { "epoch": 1.24, "grad_norm": 0.5321521162986755, "learning_rate": 0.00038179048598933944, "loss": 3.0101, "step": 25227 }, { "epoch": 1.24, "grad_norm": 0.5768882036209106, "learning_rate": 0.00038177567293661935, "loss": 2.9978, "step": 25228 }, { "epoch": 1.24, "grad_norm": 0.5608060359954834, "learning_rate": 0.0003817608596685179, "loss": 3.0694, "step": 25229 }, { "epoch": 1.24, "grad_norm": 0.5481438040733337, "learning_rate": 0.00038174604618507397, "loss": 3.0578, "step": 25230 }, { "epoch": 1.24, "grad_norm": 0.570463240146637, "learning_rate": 0.00038173123248632675, "loss": 3.1923, "step": 25231 }, { "epoch": 1.24, "grad_norm": 0.6016967296600342, "learning_rate": 0.0003817164185723152, "loss": 3.0681, "step": 25232 }, { "epoch": 1.24, "grad_norm": 0.5828872919082642, "learning_rate": 0.00038170160444307835, "loss": 3.0874, "step": 25233 }, { "epoch": 1.24, "grad_norm": 0.5865387916564941, "learning_rate": 0.00038168679009865523, "loss": 3.0659, "step": 25234 }, { "epoch": 1.24, "grad_norm": 0.5488494038581848, "learning_rate": 0.00038167197553908467, "loss": 2.983, "step": 25235 }, { "epoch": 1.24, "grad_norm": 0.5348222255706787, "learning_rate": 0.00038165716076440595, "loss": 3.0415, "step": 25236 }, { "epoch": 1.24, "grad_norm": 0.5851247906684875, "learning_rate": 0.0003816423457746581, "loss": 3.0879, "step": 25237 }, { "epoch": 1.24, "grad_norm": 0.6187082529067993, "learning_rate": 0.00038162753056987986, "loss": 3.1523, "step": 25238 }, { "epoch": 1.24, "grad_norm": 0.5365039110183716, "learning_rate": 0.00038161271515011044, "loss": 3.1376, "step": 25239 }, { "epoch": 1.24, "grad_norm": 0.5412645936012268, "learning_rate": 0.00038159789951538884, "loss": 2.9421, "step": 25240 }, { "epoch": 1.24, "grad_norm": 0.5671802759170532, "learning_rate": 0.00038158308366575413, "loss": 3.0663, "step": 25241 }, { "epoch": 1.24, "grad_norm": 0.5550203919410706, "learning_rate": 0.00038156826760124524, "loss": 2.914, "step": 25242 }, { "epoch": 1.24, "grad_norm": 0.5574257969856262, "learning_rate": 0.0003815534513219012, "loss": 2.9547, "step": 25243 }, { "epoch": 1.24, "grad_norm": 0.5445705652236938, "learning_rate": 0.0003815386348277611, "loss": 3.0675, "step": 25244 }, { "epoch": 1.24, "grad_norm": 0.5782180428504944, "learning_rate": 0.00038152381811886393, "loss": 3.0991, "step": 25245 }, { "epoch": 1.24, "grad_norm": 0.5452681183815002, "learning_rate": 0.0003815090011952487, "loss": 2.943, "step": 25246 }, { "epoch": 1.24, "grad_norm": 0.5488862991333008, "learning_rate": 0.00038149418405695443, "loss": 2.8228, "step": 25247 }, { "epoch": 1.24, "grad_norm": 0.5082352161407471, "learning_rate": 0.0003814793667040203, "loss": 3.0863, "step": 25248 }, { "epoch": 1.24, "grad_norm": 0.5326624512672424, "learning_rate": 0.000381464549136485, "loss": 3.1076, "step": 25249 }, { "epoch": 1.24, "grad_norm": 0.568250834941864, "learning_rate": 0.0003814497313543879, "loss": 3.1315, "step": 25250 }, { "epoch": 1.24, "grad_norm": 0.5677963495254517, "learning_rate": 0.00038143491335776783, "loss": 3.137, "step": 25251 }, { "epoch": 1.24, "grad_norm": 0.5616076588630676, "learning_rate": 0.0003814200951466638, "loss": 3.0442, "step": 25252 }, { "epoch": 1.24, "grad_norm": 0.5570437908172607, "learning_rate": 0.00038140527672111496, "loss": 3.0987, "step": 25253 }, { "epoch": 1.24, "grad_norm": 0.5529975891113281, "learning_rate": 0.00038139045808116036, "loss": 3.2037, "step": 25254 }, { "epoch": 1.24, "grad_norm": 0.6022285223007202, "learning_rate": 0.0003813756392268389, "loss": 3.0878, "step": 25255 }, { "epoch": 1.24, "grad_norm": 0.5654017925262451, "learning_rate": 0.00038136082015818965, "loss": 3.1599, "step": 25256 }, { "epoch": 1.24, "grad_norm": 0.5939410924911499, "learning_rate": 0.00038134600087525166, "loss": 3.2432, "step": 25257 }, { "epoch": 1.24, "grad_norm": 0.5679937601089478, "learning_rate": 0.000381331181378064, "loss": 2.9138, "step": 25258 }, { "epoch": 1.24, "grad_norm": 0.584583580493927, "learning_rate": 0.00038131636166666567, "loss": 3.2667, "step": 25259 }, { "epoch": 1.24, "grad_norm": 0.5996798872947693, "learning_rate": 0.0003813015417410957, "loss": 3.2236, "step": 25260 }, { "epoch": 1.24, "grad_norm": 0.5446107387542725, "learning_rate": 0.00038128672160139295, "loss": 3.1285, "step": 25261 }, { "epoch": 1.24, "grad_norm": 0.5301439762115479, "learning_rate": 0.0003812719012475968, "loss": 3.0362, "step": 25262 }, { "epoch": 1.24, "grad_norm": 0.5626558661460876, "learning_rate": 0.000381257080679746, "loss": 3.0798, "step": 25263 }, { "epoch": 1.24, "grad_norm": 0.5594674944877625, "learning_rate": 0.00038124225989787975, "loss": 2.9337, "step": 25264 }, { "epoch": 1.24, "grad_norm": 0.5297481417655945, "learning_rate": 0.00038122743890203704, "loss": 3.1576, "step": 25265 }, { "epoch": 1.24, "grad_norm": 0.5789366960525513, "learning_rate": 0.00038121261769225685, "loss": 3.0484, "step": 25266 }, { "epoch": 1.24, "grad_norm": 0.5611054301261902, "learning_rate": 0.0003811977962685782, "loss": 3.1415, "step": 25267 }, { "epoch": 1.24, "grad_norm": 0.5735259056091309, "learning_rate": 0.0003811829746310403, "loss": 3.061, "step": 25268 }, { "epoch": 1.24, "grad_norm": 0.5358028411865234, "learning_rate": 0.00038116815277968195, "loss": 3.0565, "step": 25269 }, { "epoch": 1.24, "grad_norm": 0.5914501547813416, "learning_rate": 0.00038115333071454246, "loss": 3.1501, "step": 25270 }, { "epoch": 1.24, "grad_norm": 0.5669057369232178, "learning_rate": 0.0003811385084356606, "loss": 3.2603, "step": 25271 }, { "epoch": 1.24, "grad_norm": 0.549128532409668, "learning_rate": 0.00038112368594307554, "loss": 3.0044, "step": 25272 }, { "epoch": 1.24, "grad_norm": 0.5490759015083313, "learning_rate": 0.0003811088632368263, "loss": 3.2027, "step": 25273 }, { "epoch": 1.24, "grad_norm": 0.5359374284744263, "learning_rate": 0.000381094040316952, "loss": 3.2197, "step": 25274 }, { "epoch": 1.24, "grad_norm": 0.5579591393470764, "learning_rate": 0.0003810792171834915, "loss": 3.1138, "step": 25275 }, { "epoch": 1.24, "grad_norm": 0.5682498216629028, "learning_rate": 0.00038106439383648396, "loss": 3.0713, "step": 25276 }, { "epoch": 1.24, "grad_norm": 0.5187976360321045, "learning_rate": 0.0003810495702759685, "loss": 2.9462, "step": 25277 }, { "epoch": 1.24, "grad_norm": 0.5464273691177368, "learning_rate": 0.00038103474650198396, "loss": 2.9093, "step": 25278 }, { "epoch": 1.24, "grad_norm": 0.5833659768104553, "learning_rate": 0.0003810199225145695, "loss": 3.0675, "step": 25279 }, { "epoch": 1.24, "grad_norm": 0.5375162959098816, "learning_rate": 0.0003810050983137642, "loss": 2.9478, "step": 25280 }, { "epoch": 1.24, "grad_norm": 0.5666972994804382, "learning_rate": 0.00038099027389960704, "loss": 2.9796, "step": 25281 }, { "epoch": 1.24, "grad_norm": 0.526598334312439, "learning_rate": 0.0003809754492721371, "loss": 3.0346, "step": 25282 }, { "epoch": 1.24, "grad_norm": 0.5676760077476501, "learning_rate": 0.0003809606244313934, "loss": 3.2404, "step": 25283 }, { "epoch": 1.24, "grad_norm": 0.5630086064338684, "learning_rate": 0.00038094579937741506, "loss": 2.936, "step": 25284 }, { "epoch": 1.24, "grad_norm": 0.5473533272743225, "learning_rate": 0.00038093097411024106, "loss": 3.0267, "step": 25285 }, { "epoch": 1.24, "grad_norm": 0.6100202202796936, "learning_rate": 0.0003809161486299103, "loss": 3.1214, "step": 25286 }, { "epoch": 1.24, "grad_norm": 0.5748114585876465, "learning_rate": 0.0003809013229364621, "loss": 3.1075, "step": 25287 }, { "epoch": 1.24, "grad_norm": 0.567639172077179, "learning_rate": 0.00038088649702993537, "loss": 3.0915, "step": 25288 }, { "epoch": 1.24, "grad_norm": 0.5674895644187927, "learning_rate": 0.00038087167091036916, "loss": 3.0159, "step": 25289 }, { "epoch": 1.24, "grad_norm": 0.5389357209205627, "learning_rate": 0.0003808568445778025, "loss": 3.1525, "step": 25290 }, { "epoch": 1.24, "grad_norm": 0.5887845754623413, "learning_rate": 0.00038084201803227456, "loss": 3.0113, "step": 25291 }, { "epoch": 1.24, "grad_norm": 0.5666884183883667, "learning_rate": 0.00038082719127382423, "loss": 3.0803, "step": 25292 }, { "epoch": 1.24, "grad_norm": 0.5322736501693726, "learning_rate": 0.00038081236430249064, "loss": 3.0178, "step": 25293 }, { "epoch": 1.24, "grad_norm": 0.519856333732605, "learning_rate": 0.0003807975371183129, "loss": 3.0541, "step": 25294 }, { "epoch": 1.24, "grad_norm": 0.5431399345397949, "learning_rate": 0.0003807827097213299, "loss": 3.0365, "step": 25295 }, { "epoch": 1.24, "grad_norm": 0.5920628905296326, "learning_rate": 0.0003807678821115809, "loss": 3.0214, "step": 25296 }, { "epoch": 1.24, "grad_norm": 0.5433076024055481, "learning_rate": 0.0003807530542891047, "loss": 2.8857, "step": 25297 }, { "epoch": 1.24, "grad_norm": 0.5601280331611633, "learning_rate": 0.0003807382262539406, "loss": 2.8714, "step": 25298 }, { "epoch": 1.24, "grad_norm": 0.5662012696266174, "learning_rate": 0.00038072339800612753, "loss": 2.8085, "step": 25299 }, { "epoch": 1.24, "grad_norm": 0.5706889629364014, "learning_rate": 0.0003807085695457046, "loss": 3.1877, "step": 25300 }, { "epoch": 1.24, "grad_norm": 0.5297551155090332, "learning_rate": 0.0003806937408727108, "loss": 3.0017, "step": 25301 }, { "epoch": 1.24, "grad_norm": 0.5498740673065186, "learning_rate": 0.0003806789119871852, "loss": 3.1668, "step": 25302 }, { "epoch": 1.24, "grad_norm": 0.6031630635261536, "learning_rate": 0.0003806640828891669, "loss": 3.1125, "step": 25303 }, { "epoch": 1.24, "grad_norm": 0.5248180627822876, "learning_rate": 0.00038064925357869493, "loss": 2.9471, "step": 25304 }, { "epoch": 1.24, "grad_norm": 0.5380234718322754, "learning_rate": 0.0003806344240558083, "loss": 3.0616, "step": 25305 }, { "epoch": 1.24, "grad_norm": 0.544638991355896, "learning_rate": 0.00038061959432054613, "loss": 3.1263, "step": 25306 }, { "epoch": 1.24, "grad_norm": 0.5599265694618225, "learning_rate": 0.00038060476437294747, "loss": 3.083, "step": 25307 }, { "epoch": 1.24, "grad_norm": 0.5477656126022339, "learning_rate": 0.0003805899342130514, "loss": 3.1702, "step": 25308 }, { "epoch": 1.24, "grad_norm": 0.5713855028152466, "learning_rate": 0.00038057510384089695, "loss": 3.1395, "step": 25309 }, { "epoch": 1.24, "grad_norm": 0.5825148820877075, "learning_rate": 0.00038056027325652315, "loss": 3.1251, "step": 25310 }, { "epoch": 1.24, "grad_norm": 0.5400784015655518, "learning_rate": 0.00038054544245996915, "loss": 3.0086, "step": 25311 }, { "epoch": 1.24, "grad_norm": 0.5549777150154114, "learning_rate": 0.00038053061145127385, "loss": 3.0571, "step": 25312 }, { "epoch": 1.24, "grad_norm": 0.5992767810821533, "learning_rate": 0.0003805157802304766, "loss": 3.0949, "step": 25313 }, { "epoch": 1.24, "grad_norm": 0.5606101155281067, "learning_rate": 0.0003805009487976161, "loss": 3.0668, "step": 25314 }, { "epoch": 1.24, "grad_norm": 0.6076589822769165, "learning_rate": 0.00038048611715273166, "loss": 3.0241, "step": 25315 }, { "epoch": 1.24, "grad_norm": 0.5783843994140625, "learning_rate": 0.00038047128529586225, "loss": 3.0586, "step": 25316 }, { "epoch": 1.24, "grad_norm": 0.5715208053588867, "learning_rate": 0.000380456453227047, "loss": 3.1446, "step": 25317 }, { "epoch": 1.24, "grad_norm": 0.5512316823005676, "learning_rate": 0.00038044162094632493, "loss": 3.0362, "step": 25318 }, { "epoch": 1.24, "grad_norm": 0.539901852607727, "learning_rate": 0.0003804267884537351, "loss": 3.0684, "step": 25319 }, { "epoch": 1.24, "grad_norm": 0.5319052338600159, "learning_rate": 0.0003804119557493166, "loss": 3.0007, "step": 25320 }, { "epoch": 1.24, "grad_norm": 0.5402718782424927, "learning_rate": 0.0003803971228331085, "loss": 2.9589, "step": 25321 }, { "epoch": 1.24, "grad_norm": 0.6263967156410217, "learning_rate": 0.0003803822897051498, "loss": 3.0552, "step": 25322 }, { "epoch": 1.24, "grad_norm": 0.5595197081565857, "learning_rate": 0.00038036745636547964, "loss": 2.8282, "step": 25323 }, { "epoch": 1.24, "grad_norm": 0.5648815035820007, "learning_rate": 0.0003803526228141371, "loss": 2.9941, "step": 25324 }, { "epoch": 1.24, "grad_norm": 0.6085686683654785, "learning_rate": 0.00038033778905116123, "loss": 3.0553, "step": 25325 }, { "epoch": 1.24, "grad_norm": 0.5713744163513184, "learning_rate": 0.000380322955076591, "loss": 2.9707, "step": 25326 }, { "epoch": 1.24, "grad_norm": 0.5762509107589722, "learning_rate": 0.0003803081208904656, "loss": 3.0925, "step": 25327 }, { "epoch": 1.24, "grad_norm": 0.5664671063423157, "learning_rate": 0.0003802932864928241, "loss": 2.9792, "step": 25328 }, { "epoch": 1.24, "grad_norm": 0.5856859087944031, "learning_rate": 0.00038027845188370554, "loss": 2.8393, "step": 25329 }, { "epoch": 1.24, "grad_norm": 0.5801443457603455, "learning_rate": 0.00038026361706314894, "loss": 3.1843, "step": 25330 }, { "epoch": 1.24, "grad_norm": 0.5778241157531738, "learning_rate": 0.00038024878203119346, "loss": 3.1812, "step": 25331 }, { "epoch": 1.24, "grad_norm": 0.5924252271652222, "learning_rate": 0.0003802339467878781, "loss": 3.0418, "step": 25332 }, { "epoch": 1.24, "grad_norm": 0.5770880579948425, "learning_rate": 0.00038021911133324194, "loss": 2.8706, "step": 25333 }, { "epoch": 1.24, "grad_norm": 0.547100841999054, "learning_rate": 0.0003802042756673241, "loss": 3.2091, "step": 25334 }, { "epoch": 1.24, "grad_norm": 0.6044512391090393, "learning_rate": 0.0003801894397901637, "loss": 3.153, "step": 25335 }, { "epoch": 1.24, "grad_norm": 0.5438991189002991, "learning_rate": 0.00038017460370179976, "loss": 3.1416, "step": 25336 }, { "epoch": 1.24, "grad_norm": 0.5397824645042419, "learning_rate": 0.00038015976740227126, "loss": 3.065, "step": 25337 }, { "epoch": 1.24, "grad_norm": 0.6003246903419495, "learning_rate": 0.0003801449308916173, "loss": 2.9869, "step": 25338 }, { "epoch": 1.24, "grad_norm": 0.5817223191261292, "learning_rate": 0.00038013009416987714, "loss": 3.0956, "step": 25339 }, { "epoch": 1.24, "grad_norm": 0.5436729192733765, "learning_rate": 0.00038011525723708974, "loss": 3.1657, "step": 25340 }, { "epoch": 1.24, "grad_norm": 0.5461451411247253, "learning_rate": 0.0003801004200932941, "loss": 2.9516, "step": 25341 }, { "epoch": 1.24, "grad_norm": 0.5592874884605408, "learning_rate": 0.0003800855827385294, "loss": 3.01, "step": 25342 }, { "epoch": 1.24, "grad_norm": 0.5277195572853088, "learning_rate": 0.00038007074517283466, "loss": 2.9353, "step": 25343 }, { "epoch": 1.24, "grad_norm": 0.5816713571548462, "learning_rate": 0.000380055907396249, "loss": 3.2137, "step": 25344 }, { "epoch": 1.24, "grad_norm": 0.5725088715553284, "learning_rate": 0.00038004106940881145, "loss": 2.9513, "step": 25345 }, { "epoch": 1.24, "grad_norm": 0.5226502418518066, "learning_rate": 0.0003800262312105612, "loss": 2.9282, "step": 25346 }, { "epoch": 1.24, "grad_norm": 0.5807681679725647, "learning_rate": 0.0003800113928015373, "loss": 3.0696, "step": 25347 }, { "epoch": 1.24, "grad_norm": 0.5598049163818359, "learning_rate": 0.00037999655418177866, "loss": 3.0249, "step": 25348 }, { "epoch": 1.24, "grad_norm": 0.5281652212142944, "learning_rate": 0.00037998171535132456, "loss": 3.0612, "step": 25349 }, { "epoch": 1.24, "grad_norm": 0.5705523490905762, "learning_rate": 0.00037996687631021404, "loss": 3.0364, "step": 25350 }, { "epoch": 1.24, "grad_norm": 0.599563717842102, "learning_rate": 0.00037995203705848613, "loss": 3.0559, "step": 25351 }, { "epoch": 1.24, "grad_norm": 0.5675721168518066, "learning_rate": 0.0003799371975961799, "loss": 2.8777, "step": 25352 }, { "epoch": 1.24, "grad_norm": 0.5328501462936401, "learning_rate": 0.00037992235792333457, "loss": 3.0203, "step": 25353 }, { "epoch": 1.24, "grad_norm": 0.554772138595581, "learning_rate": 0.000379907518039989, "loss": 3.1119, "step": 25354 }, { "epoch": 1.24, "grad_norm": 0.6034314632415771, "learning_rate": 0.0003798926779461825, "loss": 2.9382, "step": 25355 }, { "epoch": 1.24, "grad_norm": 0.582343578338623, "learning_rate": 0.000379877837641954, "loss": 3.0815, "step": 25356 }, { "epoch": 1.24, "grad_norm": 0.5476296544075012, "learning_rate": 0.00037986299712734273, "loss": 3.1573, "step": 25357 }, { "epoch": 1.24, "grad_norm": 0.5945506691932678, "learning_rate": 0.00037984815640238764, "loss": 3.1786, "step": 25358 }, { "epoch": 1.24, "grad_norm": 0.5869128704071045, "learning_rate": 0.0003798333154671279, "loss": 3.2192, "step": 25359 }, { "epoch": 1.24, "grad_norm": 0.585900604724884, "learning_rate": 0.00037981847432160253, "loss": 2.7552, "step": 25360 }, { "epoch": 1.24, "grad_norm": 0.6065345406532288, "learning_rate": 0.0003798036329658508, "loss": 3.0549, "step": 25361 }, { "epoch": 1.24, "grad_norm": 0.6202573180198669, "learning_rate": 0.0003797887913999115, "loss": 3.2176, "step": 25362 }, { "epoch": 1.24, "grad_norm": 0.5414901971817017, "learning_rate": 0.00037977394962382383, "loss": 3.2003, "step": 25363 }, { "epoch": 1.24, "grad_norm": 0.5944116115570068, "learning_rate": 0.00037975910763762713, "loss": 3.0485, "step": 25364 }, { "epoch": 1.24, "grad_norm": 0.5542477369308472, "learning_rate": 0.00037974426544136014, "loss": 3.0894, "step": 25365 }, { "epoch": 1.24, "grad_norm": 0.5856495499610901, "learning_rate": 0.00037972942303506215, "loss": 3.2326, "step": 25366 }, { "epoch": 1.24, "grad_norm": 0.5559060573577881, "learning_rate": 0.0003797145804187722, "loss": 3.1288, "step": 25367 }, { "epoch": 1.24, "grad_norm": 0.6178712248802185, "learning_rate": 0.0003796997375925294, "loss": 2.901, "step": 25368 }, { "epoch": 1.24, "grad_norm": 0.534890353679657, "learning_rate": 0.00037968489455637286, "loss": 3.0637, "step": 25369 }, { "epoch": 1.24, "grad_norm": 0.5386385917663574, "learning_rate": 0.0003796700513103416, "loss": 3.1557, "step": 25370 }, { "epoch": 1.24, "grad_norm": 0.574946403503418, "learning_rate": 0.00037965520785447474, "loss": 3.0145, "step": 25371 }, { "epoch": 1.24, "grad_norm": 0.5754439830780029, "learning_rate": 0.0003796403641888114, "loss": 3.0277, "step": 25372 }, { "epoch": 1.24, "grad_norm": 0.6202484369277954, "learning_rate": 0.0003796255203133907, "loss": 3.1161, "step": 25373 }, { "epoch": 1.24, "grad_norm": 0.5231525897979736, "learning_rate": 0.0003796106762282517, "loss": 3.1765, "step": 25374 }, { "epoch": 1.24, "grad_norm": 0.5547069311141968, "learning_rate": 0.0003795958319334334, "loss": 3.0762, "step": 25375 }, { "epoch": 1.24, "grad_norm": 0.5507140159606934, "learning_rate": 0.0003795809874289751, "loss": 2.9072, "step": 25376 }, { "epoch": 1.24, "grad_norm": 0.5551223754882812, "learning_rate": 0.0003795661427149158, "loss": 2.9539, "step": 25377 }, { "epoch": 1.24, "grad_norm": 0.5440731048583984, "learning_rate": 0.0003795512977912946, "loss": 3.1145, "step": 25378 }, { "epoch": 1.24, "grad_norm": 0.5543565154075623, "learning_rate": 0.0003795364526581505, "loss": 3.1151, "step": 25379 }, { "epoch": 1.24, "grad_norm": 0.5475043654441833, "learning_rate": 0.0003795216073155228, "loss": 3.1024, "step": 25380 }, { "epoch": 1.24, "grad_norm": 0.5892181396484375, "learning_rate": 0.00037950676176345044, "loss": 2.9669, "step": 25381 }, { "epoch": 1.24, "grad_norm": 0.5114701390266418, "learning_rate": 0.00037949191600197254, "loss": 2.8436, "step": 25382 }, { "epoch": 1.24, "grad_norm": 0.5685053467750549, "learning_rate": 0.00037947707003112833, "loss": 3.0272, "step": 25383 }, { "epoch": 1.24, "grad_norm": 0.579219400882721, "learning_rate": 0.0003794622238509567, "loss": 3.0499, "step": 25384 }, { "epoch": 1.24, "grad_norm": 0.5205180644989014, "learning_rate": 0.00037944737746149694, "loss": 3.0484, "step": 25385 }, { "epoch": 1.24, "grad_norm": 0.561149001121521, "learning_rate": 0.0003794325308627881, "loss": 3.0581, "step": 25386 }, { "epoch": 1.24, "grad_norm": 0.5951744318008423, "learning_rate": 0.0003794176840548691, "loss": 3.1024, "step": 25387 }, { "epoch": 1.24, "grad_norm": 0.6370974779129028, "learning_rate": 0.0003794028370377794, "loss": 3.1705, "step": 25388 }, { "epoch": 1.24, "grad_norm": 0.5966722965240479, "learning_rate": 0.0003793879898115577, "loss": 2.9605, "step": 25389 }, { "epoch": 1.24, "grad_norm": 0.5954158306121826, "learning_rate": 0.00037937314237624357, "loss": 3.0356, "step": 25390 }, { "epoch": 1.24, "grad_norm": 0.5606188774108887, "learning_rate": 0.00037935829473187573, "loss": 2.9174, "step": 25391 }, { "epoch": 1.24, "grad_norm": 0.5629304051399231, "learning_rate": 0.0003793434468784934, "loss": 2.9253, "step": 25392 }, { "epoch": 1.24, "grad_norm": 0.5472045540809631, "learning_rate": 0.0003793285988161357, "loss": 3.3048, "step": 25393 }, { "epoch": 1.24, "grad_norm": 0.5473329424858093, "learning_rate": 0.0003793137505448417, "loss": 3.1923, "step": 25394 }, { "epoch": 1.24, "grad_norm": 0.5582367777824402, "learning_rate": 0.0003792989020646506, "loss": 3.0302, "step": 25395 }, { "epoch": 1.24, "grad_norm": 1.2111254930496216, "learning_rate": 0.00037928405337560143, "loss": 2.9217, "step": 25396 }, { "epoch": 1.24, "grad_norm": 0.5781081318855286, "learning_rate": 0.0003792692044777334, "loss": 3.1969, "step": 25397 }, { "epoch": 1.24, "grad_norm": 0.5536330938339233, "learning_rate": 0.00037925435537108546, "loss": 3.2146, "step": 25398 }, { "epoch": 1.24, "grad_norm": 0.5391330122947693, "learning_rate": 0.00037923950605569677, "loss": 2.8489, "step": 25399 }, { "epoch": 1.24, "grad_norm": 0.5480332374572754, "learning_rate": 0.00037922465653160653, "loss": 2.9888, "step": 25400 }, { "epoch": 1.24, "grad_norm": 0.5818240642547607, "learning_rate": 0.00037920980679885383, "loss": 2.974, "step": 25401 }, { "epoch": 1.24, "grad_norm": 0.5594255924224854, "learning_rate": 0.0003791949568574777, "loss": 3.0717, "step": 25402 }, { "epoch": 1.24, "grad_norm": 0.5563430786132812, "learning_rate": 0.00037918010670751735, "loss": 3.3801, "step": 25403 }, { "epoch": 1.24, "grad_norm": 0.5488848090171814, "learning_rate": 0.0003791652563490118, "loss": 2.8365, "step": 25404 }, { "epoch": 1.25, "grad_norm": 0.5589650869369507, "learning_rate": 0.0003791504057820001, "loss": 2.8259, "step": 25405 }, { "epoch": 1.25, "grad_norm": 0.5604705214500427, "learning_rate": 0.00037913555500652154, "loss": 3.1208, "step": 25406 }, { "epoch": 1.25, "grad_norm": 0.5571016073226929, "learning_rate": 0.0003791207040226152, "loss": 3.1, "step": 25407 }, { "epoch": 1.25, "grad_norm": 0.559099555015564, "learning_rate": 0.00037910585283032006, "loss": 3.0902, "step": 25408 }, { "epoch": 1.25, "grad_norm": 0.5342800617218018, "learning_rate": 0.0003790910014296754, "loss": 3.0075, "step": 25409 }, { "epoch": 1.25, "grad_norm": 0.555751621723175, "learning_rate": 0.00037907614982072023, "loss": 3.2053, "step": 25410 }, { "epoch": 1.25, "grad_norm": 0.6236014366149902, "learning_rate": 0.0003790612980034937, "loss": 3.0104, "step": 25411 }, { "epoch": 1.25, "grad_norm": 0.5512464642524719, "learning_rate": 0.000379046445978035, "loss": 3.1207, "step": 25412 }, { "epoch": 1.25, "grad_norm": 0.5613242387771606, "learning_rate": 0.00037903159374438314, "loss": 3.1059, "step": 25413 }, { "epoch": 1.25, "grad_norm": 0.5795272588729858, "learning_rate": 0.00037901674130257724, "loss": 3.1214, "step": 25414 }, { "epoch": 1.25, "grad_norm": 0.5552756190299988, "learning_rate": 0.00037900188865265646, "loss": 3.2082, "step": 25415 }, { "epoch": 1.25, "grad_norm": 0.5399115681648254, "learning_rate": 0.0003789870357946599, "loss": 3.035, "step": 25416 }, { "epoch": 1.25, "grad_norm": 0.5368036031723022, "learning_rate": 0.0003789721827286267, "loss": 2.8561, "step": 25417 }, { "epoch": 1.25, "grad_norm": 0.5200400352478027, "learning_rate": 0.000378957329454596, "loss": 2.9214, "step": 25418 }, { "epoch": 1.25, "grad_norm": 0.569953978061676, "learning_rate": 0.0003789424759726068, "loss": 3.2006, "step": 25419 }, { "epoch": 1.25, "grad_norm": 0.5677704215049744, "learning_rate": 0.0003789276222826984, "loss": 2.8954, "step": 25420 }, { "epoch": 1.25, "grad_norm": 0.5435131788253784, "learning_rate": 0.00037891276838490976, "loss": 3.0026, "step": 25421 }, { "epoch": 1.25, "grad_norm": 0.5513231754302979, "learning_rate": 0.00037889791427928016, "loss": 3.173, "step": 25422 }, { "epoch": 1.25, "grad_norm": 0.5995567440986633, "learning_rate": 0.00037888305996584864, "loss": 2.996, "step": 25423 }, { "epoch": 1.25, "grad_norm": 0.5517016649246216, "learning_rate": 0.00037886820544465427, "loss": 2.964, "step": 25424 }, { "epoch": 1.25, "grad_norm": 0.572759747505188, "learning_rate": 0.00037885335071573614, "loss": 2.8945, "step": 25425 }, { "epoch": 1.25, "grad_norm": 0.6202085018157959, "learning_rate": 0.0003788384957791336, "loss": 3.1124, "step": 25426 }, { "epoch": 1.25, "grad_norm": 0.5376129746437073, "learning_rate": 0.00037882364063488566, "loss": 3.0313, "step": 25427 }, { "epoch": 1.25, "grad_norm": 0.5526015758514404, "learning_rate": 0.00037880878528303126, "loss": 3.0498, "step": 25428 }, { "epoch": 1.25, "grad_norm": 0.553801417350769, "learning_rate": 0.0003787939297236098, "loss": 3.0979, "step": 25429 }, { "epoch": 1.25, "grad_norm": 0.5524164438247681, "learning_rate": 0.0003787790739566603, "loss": 3.1177, "step": 25430 }, { "epoch": 1.25, "grad_norm": 0.5112387537956238, "learning_rate": 0.00037876421798222176, "loss": 3.2164, "step": 25431 }, { "epoch": 1.25, "grad_norm": 0.5780650973320007, "learning_rate": 0.00037874936180033356, "loss": 3.0476, "step": 25432 }, { "epoch": 1.25, "grad_norm": 0.5552143454551697, "learning_rate": 0.0003787345054110347, "loss": 3.1861, "step": 25433 }, { "epoch": 1.25, "grad_norm": 0.5469130277633667, "learning_rate": 0.00037871964881436423, "loss": 2.8781, "step": 25434 }, { "epoch": 1.25, "grad_norm": 0.5303400158882141, "learning_rate": 0.00037870479201036137, "loss": 2.987, "step": 25435 }, { "epoch": 1.25, "grad_norm": 0.5632544755935669, "learning_rate": 0.00037868993499906524, "loss": 3.0197, "step": 25436 }, { "epoch": 1.25, "grad_norm": 0.5879452228546143, "learning_rate": 0.000378675077780515, "loss": 3.1763, "step": 25437 }, { "epoch": 1.25, "grad_norm": 0.6507343649864197, "learning_rate": 0.00037866022035474976, "loss": 2.9622, "step": 25438 }, { "epoch": 1.25, "grad_norm": 0.5414554476737976, "learning_rate": 0.0003786453627218086, "loss": 3.0865, "step": 25439 }, { "epoch": 1.25, "grad_norm": 0.564536988735199, "learning_rate": 0.0003786305048817306, "loss": 3.1138, "step": 25440 }, { "epoch": 1.25, "grad_norm": 0.5658540725708008, "learning_rate": 0.0003786156468345551, "loss": 2.9164, "step": 25441 }, { "epoch": 1.25, "grad_norm": 0.5722022652626038, "learning_rate": 0.0003786007885803211, "loss": 3.1499, "step": 25442 }, { "epoch": 1.25, "grad_norm": 0.5444322228431702, "learning_rate": 0.0003785859301190677, "loss": 3.0966, "step": 25443 }, { "epoch": 1.25, "grad_norm": 0.5430464148521423, "learning_rate": 0.00037857107145083415, "loss": 2.9632, "step": 25444 }, { "epoch": 1.25, "grad_norm": 0.5783938765525818, "learning_rate": 0.00037855621257565955, "loss": 2.8965, "step": 25445 }, { "epoch": 1.25, "grad_norm": 0.5244552493095398, "learning_rate": 0.00037854135349358285, "loss": 3.1105, "step": 25446 }, { "epoch": 1.25, "grad_norm": 0.5750003457069397, "learning_rate": 0.0003785264942046434, "loss": 3.1679, "step": 25447 }, { "epoch": 1.25, "grad_norm": 0.5536020398139954, "learning_rate": 0.00037851163470888045, "loss": 3.4111, "step": 25448 }, { "epoch": 1.25, "grad_norm": 0.7663441896438599, "learning_rate": 0.0003784967750063328, "loss": 2.8687, "step": 25449 }, { "epoch": 1.25, "grad_norm": 0.5692669153213501, "learning_rate": 0.0003784819150970397, "loss": 2.9943, "step": 25450 }, { "epoch": 1.25, "grad_norm": 0.5337068438529968, "learning_rate": 0.0003784670549810404, "loss": 3.1486, "step": 25451 }, { "epoch": 1.25, "grad_norm": 0.5633428692817688, "learning_rate": 0.0003784521946583741, "loss": 3.135, "step": 25452 }, { "epoch": 1.25, "grad_norm": 0.5698487758636475, "learning_rate": 0.0003784373341290797, "loss": 3.0194, "step": 25453 }, { "epoch": 1.25, "grad_norm": 0.5451970100402832, "learning_rate": 0.00037842247339319645, "loss": 3.0151, "step": 25454 }, { "epoch": 1.25, "grad_norm": 0.5770736336708069, "learning_rate": 0.0003784076124507635, "loss": 3.2033, "step": 25455 }, { "epoch": 1.25, "grad_norm": 0.5762639045715332, "learning_rate": 0.00037839275130182004, "loss": 2.977, "step": 25456 }, { "epoch": 1.25, "grad_norm": 0.5343278646469116, "learning_rate": 0.0003783778899464051, "loss": 3.0204, "step": 25457 }, { "epoch": 1.25, "grad_norm": 0.5425517559051514, "learning_rate": 0.00037836302838455794, "loss": 3.0833, "step": 25458 }, { "epoch": 1.25, "grad_norm": 0.5324981212615967, "learning_rate": 0.0003783481666163176, "loss": 3.2069, "step": 25459 }, { "epoch": 1.25, "grad_norm": 0.5692557096481323, "learning_rate": 0.0003783333046417233, "loss": 3.0091, "step": 25460 }, { "epoch": 1.25, "grad_norm": 0.5556652545928955, "learning_rate": 0.00037831844246081413, "loss": 2.867, "step": 25461 }, { "epoch": 1.25, "grad_norm": 0.5014756321907043, "learning_rate": 0.00037830358007362924, "loss": 3.1228, "step": 25462 }, { "epoch": 1.25, "grad_norm": 0.5904489159584045, "learning_rate": 0.00037828871748020785, "loss": 3.0725, "step": 25463 }, { "epoch": 1.25, "grad_norm": 0.5578276515007019, "learning_rate": 0.000378273854680589, "loss": 3.1744, "step": 25464 }, { "epoch": 1.25, "grad_norm": 0.5439172387123108, "learning_rate": 0.00037825899167481184, "loss": 3.1216, "step": 25465 }, { "epoch": 1.25, "grad_norm": 0.5510421395301819, "learning_rate": 0.0003782441284629156, "loss": 3.1522, "step": 25466 }, { "epoch": 1.25, "grad_norm": 0.581859827041626, "learning_rate": 0.0003782292650449394, "loss": 3.0195, "step": 25467 }, { "epoch": 1.25, "grad_norm": 0.5343931913375854, "learning_rate": 0.00037821440142092236, "loss": 3.1396, "step": 25468 }, { "epoch": 1.25, "grad_norm": 0.5974150896072388, "learning_rate": 0.0003781995375909036, "loss": 3.2128, "step": 25469 }, { "epoch": 1.25, "grad_norm": 0.5799618363380432, "learning_rate": 0.00037818467355492234, "loss": 3.0855, "step": 25470 }, { "epoch": 1.25, "grad_norm": 0.5386278629302979, "learning_rate": 0.00037816980931301773, "loss": 3.1758, "step": 25471 }, { "epoch": 1.25, "grad_norm": 0.6230940818786621, "learning_rate": 0.0003781549448652288, "loss": 3.2251, "step": 25472 }, { "epoch": 1.25, "grad_norm": 0.5379871129989624, "learning_rate": 0.00037814008021159486, "loss": 3.0945, "step": 25473 }, { "epoch": 1.25, "grad_norm": 0.5654126405715942, "learning_rate": 0.00037812521535215504, "loss": 3.1067, "step": 25474 }, { "epoch": 1.25, "grad_norm": 0.5493576526641846, "learning_rate": 0.00037811035028694834, "loss": 3.1932, "step": 25475 }, { "epoch": 1.25, "grad_norm": 0.5735651850700378, "learning_rate": 0.000378095485016014, "loss": 3.2, "step": 25476 }, { "epoch": 1.25, "grad_norm": 0.5522319674491882, "learning_rate": 0.0003780806195393913, "loss": 3.0353, "step": 25477 }, { "epoch": 1.25, "grad_norm": 0.575622022151947, "learning_rate": 0.00037806575385711916, "loss": 2.8894, "step": 25478 }, { "epoch": 1.25, "grad_norm": 0.6093813180923462, "learning_rate": 0.0003780508879692369, "loss": 2.8872, "step": 25479 }, { "epoch": 1.25, "grad_norm": 0.5739896297454834, "learning_rate": 0.00037803602187578357, "loss": 2.9564, "step": 25480 }, { "epoch": 1.25, "grad_norm": 0.560309648513794, "learning_rate": 0.00037802115557679844, "loss": 3.3268, "step": 25481 }, { "epoch": 1.25, "grad_norm": 0.5892646908760071, "learning_rate": 0.00037800628907232056, "loss": 3.1495, "step": 25482 }, { "epoch": 1.25, "grad_norm": 0.5857096910476685, "learning_rate": 0.0003779914223623892, "loss": 2.9523, "step": 25483 }, { "epoch": 1.25, "grad_norm": 0.5739219784736633, "learning_rate": 0.0003779765554470433, "loss": 2.9484, "step": 25484 }, { "epoch": 1.25, "grad_norm": 0.5851458311080933, "learning_rate": 0.0003779616883263223, "loss": 3.0733, "step": 25485 }, { "epoch": 1.25, "grad_norm": 0.5476492047309875, "learning_rate": 0.0003779468210002652, "loss": 3.0483, "step": 25486 }, { "epoch": 1.25, "grad_norm": 0.5560165047645569, "learning_rate": 0.000377931953468911, "loss": 3.0811, "step": 25487 }, { "epoch": 1.25, "grad_norm": 0.5552588105201721, "learning_rate": 0.00037791708573229926, "loss": 3.3325, "step": 25488 }, { "epoch": 1.25, "grad_norm": 0.5610097646713257, "learning_rate": 0.0003779022177904689, "loss": 3.1599, "step": 25489 }, { "epoch": 1.25, "grad_norm": 0.6263189911842346, "learning_rate": 0.00037788734964345897, "loss": 3.2049, "step": 25490 }, { "epoch": 1.25, "grad_norm": 0.5741820931434631, "learning_rate": 0.0003778724812913088, "loss": 3.0006, "step": 25491 }, { "epoch": 1.25, "grad_norm": 0.579188346862793, "learning_rate": 0.00037785761273405745, "loss": 3.0864, "step": 25492 }, { "epoch": 1.25, "grad_norm": 0.5346486568450928, "learning_rate": 0.00037784274397174416, "loss": 2.8912, "step": 25493 }, { "epoch": 1.25, "grad_norm": 0.5457817912101746, "learning_rate": 0.0003778278750044081, "loss": 2.8513, "step": 25494 }, { "epoch": 1.25, "grad_norm": 0.5900013446807861, "learning_rate": 0.00037781300583208835, "loss": 3.0625, "step": 25495 }, { "epoch": 1.25, "grad_norm": 0.5467310547828674, "learning_rate": 0.0003777981364548242, "loss": 3.1045, "step": 25496 }, { "epoch": 1.25, "grad_norm": 0.5489881038665771, "learning_rate": 0.0003777832668726546, "loss": 2.9645, "step": 25497 }, { "epoch": 1.25, "grad_norm": 0.5877793431282043, "learning_rate": 0.0003777683970856189, "loss": 3.1978, "step": 25498 }, { "epoch": 1.25, "grad_norm": 0.5987591743469238, "learning_rate": 0.0003777535270937563, "loss": 2.9118, "step": 25499 }, { "epoch": 1.25, "grad_norm": 0.5459617972373962, "learning_rate": 0.00037773865689710585, "loss": 3.2444, "step": 25500 }, { "epoch": 1.25, "grad_norm": 0.5510004162788391, "learning_rate": 0.00037772378649570667, "loss": 3.0586, "step": 25501 }, { "epoch": 1.25, "grad_norm": 0.5648934245109558, "learning_rate": 0.000377708915889598, "loss": 3.0798, "step": 25502 }, { "epoch": 1.25, "grad_norm": 0.5991185307502747, "learning_rate": 0.00037769404507881897, "loss": 3.095, "step": 25503 }, { "epoch": 1.25, "grad_norm": 0.5326690673828125, "learning_rate": 0.00037767917406340883, "loss": 3.0806, "step": 25504 }, { "epoch": 1.25, "grad_norm": 0.5778882503509521, "learning_rate": 0.0003776643028434067, "loss": 3.0523, "step": 25505 }, { "epoch": 1.25, "grad_norm": 0.5468420386314392, "learning_rate": 0.00037764943141885174, "loss": 3.038, "step": 25506 }, { "epoch": 1.25, "grad_norm": 0.617231547832489, "learning_rate": 0.0003776345597897831, "loss": 2.9654, "step": 25507 }, { "epoch": 1.25, "grad_norm": 0.5568265914916992, "learning_rate": 0.00037761968795624, "loss": 2.8395, "step": 25508 }, { "epoch": 1.25, "grad_norm": 0.5809617638587952, "learning_rate": 0.00037760481591826153, "loss": 3.1054, "step": 25509 }, { "epoch": 1.25, "grad_norm": 0.5677468180656433, "learning_rate": 0.00037758994367588695, "loss": 2.9851, "step": 25510 }, { "epoch": 1.25, "grad_norm": 0.5191762447357178, "learning_rate": 0.00037757507122915544, "loss": 3.2637, "step": 25511 }, { "epoch": 1.25, "grad_norm": 0.5888081192970276, "learning_rate": 0.00037756019857810604, "loss": 3.06, "step": 25512 }, { "epoch": 1.25, "grad_norm": 0.5667383074760437, "learning_rate": 0.000377545325722778, "loss": 2.9374, "step": 25513 }, { "epoch": 1.25, "grad_norm": 0.5688818693161011, "learning_rate": 0.0003775304526632106, "loss": 3.2734, "step": 25514 }, { "epoch": 1.25, "grad_norm": 0.5567358136177063, "learning_rate": 0.0003775155793994428, "loss": 3.0426, "step": 25515 }, { "epoch": 1.25, "grad_norm": 0.5471105575561523, "learning_rate": 0.00037750070593151394, "loss": 3.25, "step": 25516 }, { "epoch": 1.25, "grad_norm": 0.5713381767272949, "learning_rate": 0.00037748583225946306, "loss": 3.0582, "step": 25517 }, { "epoch": 1.25, "grad_norm": 0.5663639903068542, "learning_rate": 0.00037747095838332954, "loss": 3.2855, "step": 25518 }, { "epoch": 1.25, "grad_norm": 0.5420747399330139, "learning_rate": 0.00037745608430315227, "loss": 3.3252, "step": 25519 }, { "epoch": 1.25, "grad_norm": 0.5536601543426514, "learning_rate": 0.0003774412100189707, "loss": 3.0771, "step": 25520 }, { "epoch": 1.25, "grad_norm": 0.5411950945854187, "learning_rate": 0.0003774263355308238, "loss": 3.0431, "step": 25521 }, { "epoch": 1.25, "grad_norm": 0.571185827255249, "learning_rate": 0.0003774114608387509, "loss": 3.2206, "step": 25522 }, { "epoch": 1.25, "grad_norm": 0.5684292316436768, "learning_rate": 0.0003773965859427911, "loss": 3.0063, "step": 25523 }, { "epoch": 1.25, "grad_norm": 0.5304699540138245, "learning_rate": 0.0003773817108429836, "loss": 3.0202, "step": 25524 }, { "epoch": 1.25, "grad_norm": 0.5694625973701477, "learning_rate": 0.0003773668355393676, "loss": 3.0962, "step": 25525 }, { "epoch": 1.25, "grad_norm": 0.545238733291626, "learning_rate": 0.00037735196003198213, "loss": 2.9854, "step": 25526 }, { "epoch": 1.25, "grad_norm": 0.578179657459259, "learning_rate": 0.0003773370843208666, "loss": 3.029, "step": 25527 }, { "epoch": 1.25, "grad_norm": 0.5803759694099426, "learning_rate": 0.00037732220840606, "loss": 3.2408, "step": 25528 }, { "epoch": 1.25, "grad_norm": 0.5503095388412476, "learning_rate": 0.0003773073322876016, "loss": 3.4273, "step": 25529 }, { "epoch": 1.25, "grad_norm": 0.532310426235199, "learning_rate": 0.0003772924559655305, "loss": 3.0394, "step": 25530 }, { "epoch": 1.25, "grad_norm": 0.5836457014083862, "learning_rate": 0.000377277579439886, "loss": 2.9752, "step": 25531 }, { "epoch": 1.25, "grad_norm": 0.5723191499710083, "learning_rate": 0.0003772627027107073, "loss": 3.0069, "step": 25532 }, { "epoch": 1.25, "grad_norm": 0.5557374358177185, "learning_rate": 0.00037724782577803345, "loss": 3.2241, "step": 25533 }, { "epoch": 1.25, "grad_norm": 0.5663606524467468, "learning_rate": 0.0003772329486419036, "loss": 3.2456, "step": 25534 }, { "epoch": 1.25, "grad_norm": 0.5517666339874268, "learning_rate": 0.0003772180713023572, "loss": 2.9475, "step": 25535 }, { "epoch": 1.25, "grad_norm": 0.5494801998138428, "learning_rate": 0.0003772031937594332, "loss": 3.1934, "step": 25536 }, { "epoch": 1.25, "grad_norm": 0.5216752886772156, "learning_rate": 0.0003771883160131708, "loss": 3.0643, "step": 25537 }, { "epoch": 1.25, "grad_norm": 0.5742051005363464, "learning_rate": 0.0003771734380636092, "loss": 2.9488, "step": 25538 }, { "epoch": 1.25, "grad_norm": 0.53158038854599, "learning_rate": 0.00037715855991078776, "loss": 2.9859, "step": 25539 }, { "epoch": 1.25, "grad_norm": 0.5515921711921692, "learning_rate": 0.00037714368155474545, "loss": 3.0378, "step": 25540 }, { "epoch": 1.25, "grad_norm": 0.5819881558418274, "learning_rate": 0.0003771288029955215, "loss": 3.0099, "step": 25541 }, { "epoch": 1.25, "grad_norm": 0.5356078743934631, "learning_rate": 0.00037711392423315517, "loss": 3.1849, "step": 25542 }, { "epoch": 1.25, "grad_norm": 0.5517314076423645, "learning_rate": 0.00037709904526768553, "loss": 3.141, "step": 25543 }, { "epoch": 1.25, "grad_norm": 0.5887091159820557, "learning_rate": 0.0003770841660991519, "loss": 3.2121, "step": 25544 }, { "epoch": 1.25, "grad_norm": 0.6048860549926758, "learning_rate": 0.0003770692867275934, "loss": 3.0597, "step": 25545 }, { "epoch": 1.25, "grad_norm": 0.5582966804504395, "learning_rate": 0.0003770544071530492, "loss": 2.9708, "step": 25546 }, { "epoch": 1.25, "grad_norm": 0.588004469871521, "learning_rate": 0.0003770395273755586, "loss": 3.0733, "step": 25547 }, { "epoch": 1.25, "grad_norm": 0.6418083906173706, "learning_rate": 0.00037702464739516065, "loss": 2.9607, "step": 25548 }, { "epoch": 1.25, "grad_norm": 0.5486046075820923, "learning_rate": 0.00037700976721189464, "loss": 3.0641, "step": 25549 }, { "epoch": 1.25, "grad_norm": 0.5865910649299622, "learning_rate": 0.0003769948868257997, "loss": 3.2268, "step": 25550 }, { "epoch": 1.25, "grad_norm": 0.571828305721283, "learning_rate": 0.0003769800062369151, "loss": 3.1411, "step": 25551 }, { "epoch": 1.25, "grad_norm": 0.6386631727218628, "learning_rate": 0.00037696512544527986, "loss": 3.2847, "step": 25552 }, { "epoch": 1.25, "grad_norm": 0.5370537042617798, "learning_rate": 0.0003769502444509333, "loss": 2.9321, "step": 25553 }, { "epoch": 1.25, "grad_norm": 0.5921683311462402, "learning_rate": 0.00037693536325391475, "loss": 3.0641, "step": 25554 }, { "epoch": 1.25, "grad_norm": 0.533803403377533, "learning_rate": 0.0003769204818542632, "loss": 2.8298, "step": 25555 }, { "epoch": 1.25, "grad_norm": 0.5238431096076965, "learning_rate": 0.0003769056002520178, "loss": 3.0571, "step": 25556 }, { "epoch": 1.25, "grad_norm": 0.6707302331924438, "learning_rate": 0.00037689071844721796, "loss": 3.0363, "step": 25557 }, { "epoch": 1.25, "grad_norm": 0.5460362434387207, "learning_rate": 0.00037687583643990273, "loss": 3.3561, "step": 25558 }, { "epoch": 1.25, "grad_norm": 0.5882094502449036, "learning_rate": 0.00037686095423011134, "loss": 3.268, "step": 25559 }, { "epoch": 1.25, "grad_norm": 0.5774903297424316, "learning_rate": 0.000376846071817883, "loss": 3.0655, "step": 25560 }, { "epoch": 1.25, "grad_norm": 0.5424748659133911, "learning_rate": 0.0003768311892032569, "loss": 3.0053, "step": 25561 }, { "epoch": 1.25, "grad_norm": 0.5767691731452942, "learning_rate": 0.00037681630638627223, "loss": 2.964, "step": 25562 }, { "epoch": 1.25, "grad_norm": 0.5361145734786987, "learning_rate": 0.0003768014233669682, "loss": 3.1929, "step": 25563 }, { "epoch": 1.25, "grad_norm": 0.582098126411438, "learning_rate": 0.000376786540145384, "loss": 2.9265, "step": 25564 }, { "epoch": 1.25, "grad_norm": 0.5639384984970093, "learning_rate": 0.00037677165672155884, "loss": 2.9726, "step": 25565 }, { "epoch": 1.25, "grad_norm": 0.608085036277771, "learning_rate": 0.0003767567730955319, "loss": 3.1035, "step": 25566 }, { "epoch": 1.25, "grad_norm": 0.5574251413345337, "learning_rate": 0.00037674188926734237, "loss": 2.9313, "step": 25567 }, { "epoch": 1.25, "grad_norm": 0.5575839281082153, "learning_rate": 0.00037672700523702944, "loss": 3.0273, "step": 25568 }, { "epoch": 1.25, "grad_norm": 0.6037725806236267, "learning_rate": 0.0003767121210046324, "loss": 3.2235, "step": 25569 }, { "epoch": 1.25, "grad_norm": 0.5843100547790527, "learning_rate": 0.0003766972365701904, "loss": 3.0442, "step": 25570 }, { "epoch": 1.25, "grad_norm": 0.5654159188270569, "learning_rate": 0.0003766823519337426, "loss": 3.1427, "step": 25571 }, { "epoch": 1.25, "grad_norm": 0.5915213227272034, "learning_rate": 0.00037666746709532825, "loss": 3.533, "step": 25572 }, { "epoch": 1.25, "grad_norm": 0.5641651153564453, "learning_rate": 0.0003766525820549866, "loss": 3.0753, "step": 25573 }, { "epoch": 1.25, "grad_norm": 0.5694716572761536, "learning_rate": 0.0003766376968127567, "loss": 3.1201, "step": 25574 }, { "epoch": 1.25, "grad_norm": 0.5624406933784485, "learning_rate": 0.00037662281136867793, "loss": 3.0598, "step": 25575 }, { "epoch": 1.25, "grad_norm": 0.560565710067749, "learning_rate": 0.00037660792572278943, "loss": 3.111, "step": 25576 }, { "epoch": 1.25, "grad_norm": 0.5786372423171997, "learning_rate": 0.0003765930398751304, "loss": 3.0341, "step": 25577 }, { "epoch": 1.25, "grad_norm": 0.535211980342865, "learning_rate": 0.00037657815382574004, "loss": 2.8081, "step": 25578 }, { "epoch": 1.25, "grad_norm": 0.5386492609977722, "learning_rate": 0.0003765632675746576, "loss": 3.2728, "step": 25579 }, { "epoch": 1.25, "grad_norm": 0.5663537979125977, "learning_rate": 0.00037654838112192215, "loss": 3.2322, "step": 25580 }, { "epoch": 1.25, "grad_norm": 0.6106393337249756, "learning_rate": 0.00037653349446757303, "loss": 2.9779, "step": 25581 }, { "epoch": 1.25, "grad_norm": 0.5462380051612854, "learning_rate": 0.0003765186076116495, "loss": 3.2093, "step": 25582 }, { "epoch": 1.25, "grad_norm": 0.5549441576004028, "learning_rate": 0.0003765037205541905, "loss": 3.0289, "step": 25583 }, { "epoch": 1.25, "grad_norm": 0.5625503063201904, "learning_rate": 0.0003764888332952355, "loss": 3.1362, "step": 25584 }, { "epoch": 1.25, "grad_norm": 0.582071840763092, "learning_rate": 0.0003764739458348237, "loss": 3.035, "step": 25585 }, { "epoch": 1.25, "grad_norm": 0.5360458493232727, "learning_rate": 0.0003764590581729942, "loss": 2.8962, "step": 25586 }, { "epoch": 1.25, "grad_norm": 0.5536467432975769, "learning_rate": 0.0003764441703097864, "loss": 3.0696, "step": 25587 }, { "epoch": 1.25, "grad_norm": 0.5753385424613953, "learning_rate": 0.0003764292822452392, "loss": 3.1238, "step": 25588 }, { "epoch": 1.25, "grad_norm": 0.5996004343032837, "learning_rate": 0.00037641439397939196, "loss": 3.0104, "step": 25589 }, { "epoch": 1.25, "grad_norm": 0.5788471698760986, "learning_rate": 0.00037639950551228397, "loss": 3.1478, "step": 25590 }, { "epoch": 1.25, "grad_norm": 0.5919115543365479, "learning_rate": 0.00037638461684395445, "loss": 3.2666, "step": 25591 }, { "epoch": 1.25, "grad_norm": 0.5502909421920776, "learning_rate": 0.0003763697279744424, "loss": 3.3325, "step": 25592 }, { "epoch": 1.25, "grad_norm": 0.5541229844093323, "learning_rate": 0.0003763548389037873, "loss": 3.0699, "step": 25593 }, { "epoch": 1.25, "grad_norm": 0.5316629409790039, "learning_rate": 0.0003763399496320282, "loss": 3.2613, "step": 25594 }, { "epoch": 1.25, "grad_norm": 0.5380682349205017, "learning_rate": 0.00037632506015920433, "loss": 3.2379, "step": 25595 }, { "epoch": 1.25, "grad_norm": 0.6165295243263245, "learning_rate": 0.00037631017048535503, "loss": 3.3058, "step": 25596 }, { "epoch": 1.25, "grad_norm": 0.5723467469215393, "learning_rate": 0.0003762952806105193, "loss": 3.1968, "step": 25597 }, { "epoch": 1.25, "grad_norm": 0.5385229587554932, "learning_rate": 0.0003762803905347367, "loss": 3.1022, "step": 25598 }, { "epoch": 1.25, "grad_norm": 0.5821444392204285, "learning_rate": 0.0003762655002580461, "loss": 3.036, "step": 25599 }, { "epoch": 1.25, "grad_norm": 0.5753803253173828, "learning_rate": 0.0003762506097804868, "loss": 3.0051, "step": 25600 }, { "epoch": 1.25, "grad_norm": 0.5555531978607178, "learning_rate": 0.00037623571910209817, "loss": 3.2474, "step": 25601 }, { "epoch": 1.25, "grad_norm": 0.5928922295570374, "learning_rate": 0.00037622082822291926, "loss": 3.2314, "step": 25602 }, { "epoch": 1.25, "grad_norm": 0.5480112433433533, "learning_rate": 0.00037620593714298936, "loss": 3.2276, "step": 25603 }, { "epoch": 1.25, "grad_norm": 0.5796836018562317, "learning_rate": 0.00037619104586234767, "loss": 2.9367, "step": 25604 }, { "epoch": 1.25, "grad_norm": 0.535346508026123, "learning_rate": 0.0003761761543810335, "loss": 3.2006, "step": 25605 }, { "epoch": 1.25, "grad_norm": 0.6117957830429077, "learning_rate": 0.0003761612626990859, "loss": 3.0288, "step": 25606 }, { "epoch": 1.25, "grad_norm": 0.5398089289665222, "learning_rate": 0.0003761463708165443, "loss": 3.1357, "step": 25607 }, { "epoch": 1.25, "grad_norm": 0.5904059410095215, "learning_rate": 0.0003761314787334477, "loss": 3.1091, "step": 25608 }, { "epoch": 1.26, "grad_norm": 0.5292810797691345, "learning_rate": 0.00037611658644983565, "loss": 2.9714, "step": 25609 }, { "epoch": 1.26, "grad_norm": 0.5337926149368286, "learning_rate": 0.0003761016939657469, "loss": 3.1214, "step": 25610 }, { "epoch": 1.26, "grad_norm": 0.5683236122131348, "learning_rate": 0.00037608680128122104, "loss": 2.8445, "step": 25611 }, { "epoch": 1.26, "grad_norm": 0.5837772488594055, "learning_rate": 0.0003760719083962973, "loss": 3.0071, "step": 25612 }, { "epoch": 1.26, "grad_norm": 0.5842117071151733, "learning_rate": 0.00037605701531101467, "loss": 2.9642, "step": 25613 }, { "epoch": 1.26, "grad_norm": 0.5550433993339539, "learning_rate": 0.0003760421220254125, "loss": 3.0106, "step": 25614 }, { "epoch": 1.26, "grad_norm": 0.5435743927955627, "learning_rate": 0.00037602722853953, "loss": 3.0388, "step": 25615 }, { "epoch": 1.26, "grad_norm": 0.6093657612800598, "learning_rate": 0.00037601233485340645, "loss": 3.0343, "step": 25616 }, { "epoch": 1.26, "grad_norm": 0.5637142062187195, "learning_rate": 0.0003759974409670811, "loss": 3.1616, "step": 25617 }, { "epoch": 1.26, "grad_norm": 0.5223868489265442, "learning_rate": 0.000375982546880593, "loss": 3.189, "step": 25618 }, { "epoch": 1.26, "grad_norm": 0.578644871711731, "learning_rate": 0.00037596765259398157, "loss": 2.9092, "step": 25619 }, { "epoch": 1.26, "grad_norm": 0.5757935643196106, "learning_rate": 0.00037595275810728597, "loss": 3.1162, "step": 25620 }, { "epoch": 1.26, "grad_norm": 0.6163303852081299, "learning_rate": 0.00037593786342054536, "loss": 3.0835, "step": 25621 }, { "epoch": 1.26, "grad_norm": 0.5282571315765381, "learning_rate": 0.00037592296853379906, "loss": 3.0402, "step": 25622 }, { "epoch": 1.26, "grad_norm": 0.5777333378791809, "learning_rate": 0.00037590807344708624, "loss": 3.1, "step": 25623 }, { "epoch": 1.26, "grad_norm": 0.5467130541801453, "learning_rate": 0.0003758931781604463, "loss": 3.2504, "step": 25624 }, { "epoch": 1.26, "grad_norm": 0.5582188367843628, "learning_rate": 0.00037587828267391814, "loss": 3.0181, "step": 25625 }, { "epoch": 1.26, "grad_norm": 0.5501718521118164, "learning_rate": 0.0003758633869875413, "loss": 2.9322, "step": 25626 }, { "epoch": 1.26, "grad_norm": 0.5445879101753235, "learning_rate": 0.0003758484911013549, "loss": 3.2152, "step": 25627 }, { "epoch": 1.26, "grad_norm": 0.5596219301223755, "learning_rate": 0.0003758335950153982, "loss": 3.0867, "step": 25628 }, { "epoch": 1.26, "grad_norm": 0.5973536968231201, "learning_rate": 0.00037581869872971036, "loss": 2.9504, "step": 25629 }, { "epoch": 1.26, "grad_norm": 0.5466140508651733, "learning_rate": 0.0003758038022443306, "loss": 2.9407, "step": 25630 }, { "epoch": 1.26, "grad_norm": 0.6324753165245056, "learning_rate": 0.0003757889055592983, "loss": 3.0794, "step": 25631 }, { "epoch": 1.26, "grad_norm": 0.6060929298400879, "learning_rate": 0.0003757740086746526, "loss": 3.026, "step": 25632 }, { "epoch": 1.26, "grad_norm": 0.5800380110740662, "learning_rate": 0.00037575911159043273, "loss": 3.027, "step": 25633 }, { "epoch": 1.26, "grad_norm": 0.5752260684967041, "learning_rate": 0.00037574421430667794, "loss": 3.1041, "step": 25634 }, { "epoch": 1.26, "grad_norm": 0.5729960799217224, "learning_rate": 0.00037572931682342735, "loss": 3.0167, "step": 25635 }, { "epoch": 1.26, "grad_norm": 0.5333654284477234, "learning_rate": 0.0003757144191407204, "loss": 3.3717, "step": 25636 }, { "epoch": 1.26, "grad_norm": 0.5531308650970459, "learning_rate": 0.0003756995212585963, "loss": 3.0053, "step": 25637 }, { "epoch": 1.26, "grad_norm": 0.563683271408081, "learning_rate": 0.00037568462317709423, "loss": 3.0348, "step": 25638 }, { "epoch": 1.26, "grad_norm": 0.5568251609802246, "learning_rate": 0.00037566972489625345, "loss": 2.9534, "step": 25639 }, { "epoch": 1.26, "grad_norm": 0.5349379181861877, "learning_rate": 0.0003756548264161131, "loss": 3.0158, "step": 25640 }, { "epoch": 1.26, "grad_norm": 0.5677779316902161, "learning_rate": 0.0003756399277367125, "loss": 3.0896, "step": 25641 }, { "epoch": 1.26, "grad_norm": 0.5582015514373779, "learning_rate": 0.0003756250288580909, "loss": 3.0478, "step": 25642 }, { "epoch": 1.26, "grad_norm": 0.539047360420227, "learning_rate": 0.00037561012978028755, "loss": 2.9591, "step": 25643 }, { "epoch": 1.26, "grad_norm": 0.5575973391532898, "learning_rate": 0.0003755952305033416, "loss": 3.1518, "step": 25644 }, { "epoch": 1.26, "grad_norm": 0.5632666349411011, "learning_rate": 0.00037558033102729247, "loss": 3.1166, "step": 25645 }, { "epoch": 1.26, "grad_norm": 0.5250673890113831, "learning_rate": 0.00037556543135217925, "loss": 3.0891, "step": 25646 }, { "epoch": 1.26, "grad_norm": 0.571751594543457, "learning_rate": 0.00037555053147804115, "loss": 2.9595, "step": 25647 }, { "epoch": 1.26, "grad_norm": 0.5709319114685059, "learning_rate": 0.0003755356314049176, "loss": 2.9572, "step": 25648 }, { "epoch": 1.26, "grad_norm": 0.6075392961502075, "learning_rate": 0.0003755207311328478, "loss": 3.1889, "step": 25649 }, { "epoch": 1.26, "grad_norm": 0.5540075898170471, "learning_rate": 0.00037550583066187077, "loss": 3.046, "step": 25650 }, { "epoch": 1.26, "grad_norm": 0.5717571973800659, "learning_rate": 0.0003754909299920259, "loss": 3.1389, "step": 25651 }, { "epoch": 1.26, "grad_norm": 0.5485336780548096, "learning_rate": 0.00037547602912335257, "loss": 2.9172, "step": 25652 }, { "epoch": 1.26, "grad_norm": 0.6030722260475159, "learning_rate": 0.00037546112805588984, "loss": 2.8894, "step": 25653 }, { "epoch": 1.26, "grad_norm": 0.5882648825645447, "learning_rate": 0.000375446226789677, "loss": 2.9703, "step": 25654 }, { "epoch": 1.26, "grad_norm": 0.5773414969444275, "learning_rate": 0.0003754313253247533, "loss": 3.2545, "step": 25655 }, { "epoch": 1.26, "grad_norm": 0.5464721322059631, "learning_rate": 0.00037541642366115813, "loss": 3.2103, "step": 25656 }, { "epoch": 1.26, "grad_norm": 0.59247225522995, "learning_rate": 0.00037540152179893054, "loss": 3.0535, "step": 25657 }, { "epoch": 1.26, "grad_norm": 0.5408132076263428, "learning_rate": 0.0003753866197381098, "loss": 2.9782, "step": 25658 }, { "epoch": 1.26, "grad_norm": 0.5488524436950684, "learning_rate": 0.0003753717174787352, "loss": 2.9827, "step": 25659 }, { "epoch": 1.26, "grad_norm": 0.5907993316650391, "learning_rate": 0.0003753568150208462, "loss": 3.2103, "step": 25660 }, { "epoch": 1.26, "grad_norm": 0.568075954914093, "learning_rate": 0.0003753419123644816, "loss": 2.89, "step": 25661 }, { "epoch": 1.26, "grad_norm": 0.6084296703338623, "learning_rate": 0.000375327009509681, "loss": 3.1351, "step": 25662 }, { "epoch": 1.26, "grad_norm": 0.5559454560279846, "learning_rate": 0.00037531210645648364, "loss": 3.2423, "step": 25663 }, { "epoch": 1.26, "grad_norm": 0.5514500141143799, "learning_rate": 0.00037529720320492865, "loss": 3.065, "step": 25664 }, { "epoch": 1.26, "grad_norm": 0.5497595071792603, "learning_rate": 0.00037528229975505525, "loss": 3.0918, "step": 25665 }, { "epoch": 1.26, "grad_norm": 0.5739080905914307, "learning_rate": 0.00037526739610690274, "loss": 3.0684, "step": 25666 }, { "epoch": 1.26, "grad_norm": 0.5830264687538147, "learning_rate": 0.00037525249226051053, "loss": 3.192, "step": 25667 }, { "epoch": 1.26, "grad_norm": 0.5546814203262329, "learning_rate": 0.00037523758821591765, "loss": 3.0591, "step": 25668 }, { "epoch": 1.26, "grad_norm": 0.5485287308692932, "learning_rate": 0.00037522268397316346, "loss": 3.1005, "step": 25669 }, { "epoch": 1.26, "grad_norm": 0.5739904642105103, "learning_rate": 0.0003752077795322872, "loss": 3.0651, "step": 25670 }, { "epoch": 1.26, "grad_norm": 0.6011346578598022, "learning_rate": 0.0003751928748933281, "loss": 3.1442, "step": 25671 }, { "epoch": 1.26, "grad_norm": 0.5489895939826965, "learning_rate": 0.0003751779700563254, "loss": 2.8407, "step": 25672 }, { "epoch": 1.26, "grad_norm": 0.5272802710533142, "learning_rate": 0.00037516306502131843, "loss": 3.075, "step": 25673 }, { "epoch": 1.26, "grad_norm": 0.6106321811676025, "learning_rate": 0.0003751481597883464, "loss": 3.0871, "step": 25674 }, { "epoch": 1.26, "grad_norm": 0.5586559772491455, "learning_rate": 0.0003751332543574487, "loss": 2.8499, "step": 25675 }, { "epoch": 1.26, "grad_norm": 0.5668689012527466, "learning_rate": 0.0003751183487286644, "loss": 3.2104, "step": 25676 }, { "epoch": 1.26, "grad_norm": 0.5656947493553162, "learning_rate": 0.0003751034429020327, "loss": 3.0308, "step": 25677 }, { "epoch": 1.26, "grad_norm": 0.5509669184684753, "learning_rate": 0.0003750885368775932, "loss": 3.2247, "step": 25678 }, { "epoch": 1.26, "grad_norm": 0.581092894077301, "learning_rate": 0.00037507363065538476, "loss": 3.0334, "step": 25679 }, { "epoch": 1.26, "grad_norm": 0.5440135598182678, "learning_rate": 0.0003750587242354469, "loss": 3.162, "step": 25680 }, { "epoch": 1.26, "grad_norm": 0.5443293452262878, "learning_rate": 0.0003750438176178188, "loss": 3.1321, "step": 25681 }, { "epoch": 1.26, "grad_norm": 0.5595262050628662, "learning_rate": 0.00037502891080253974, "loss": 3.1387, "step": 25682 }, { "epoch": 1.26, "grad_norm": 0.5496951937675476, "learning_rate": 0.00037501400378964894, "loss": 2.9888, "step": 25683 }, { "epoch": 1.26, "grad_norm": 0.5384321212768555, "learning_rate": 0.00037499909657918567, "loss": 3.1049, "step": 25684 }, { "epoch": 1.26, "grad_norm": 0.6079148650169373, "learning_rate": 0.0003749841891711893, "loss": 3.1243, "step": 25685 }, { "epoch": 1.26, "grad_norm": 0.5618904829025269, "learning_rate": 0.000374969281565699, "loss": 2.9805, "step": 25686 }, { "epoch": 1.26, "grad_norm": 0.6069911122322083, "learning_rate": 0.00037495437376275397, "loss": 3.343, "step": 25687 }, { "epoch": 1.26, "grad_norm": 0.7043254971504211, "learning_rate": 0.0003749394657623936, "loss": 3.1197, "step": 25688 }, { "epoch": 1.26, "grad_norm": 0.5782732367515564, "learning_rate": 0.00037492455756465705, "loss": 2.9482, "step": 25689 }, { "epoch": 1.26, "grad_norm": 0.5924282670021057, "learning_rate": 0.00037490964916958363, "loss": 3.2733, "step": 25690 }, { "epoch": 1.26, "grad_norm": 0.5641574859619141, "learning_rate": 0.0003748947405772127, "loss": 3.0016, "step": 25691 }, { "epoch": 1.26, "grad_norm": 0.596458375453949, "learning_rate": 0.0003748798317875833, "loss": 3.0319, "step": 25692 }, { "epoch": 1.26, "grad_norm": 0.6097240447998047, "learning_rate": 0.00037486492280073495, "loss": 3.3717, "step": 25693 }, { "epoch": 1.26, "grad_norm": 0.5604461431503296, "learning_rate": 0.00037485001361670675, "loss": 2.8643, "step": 25694 }, { "epoch": 1.26, "grad_norm": 0.5882611274719238, "learning_rate": 0.000374835104235538, "loss": 2.9761, "step": 25695 }, { "epoch": 1.26, "grad_norm": 0.5667465925216675, "learning_rate": 0.000374820194657268, "loss": 3.1456, "step": 25696 }, { "epoch": 1.26, "grad_norm": 0.5910031199455261, "learning_rate": 0.00037480528488193605, "loss": 2.9162, "step": 25697 }, { "epoch": 1.26, "grad_norm": 0.5716198682785034, "learning_rate": 0.00037479037490958136, "loss": 3.0564, "step": 25698 }, { "epoch": 1.26, "grad_norm": 0.5668255090713501, "learning_rate": 0.0003747754647402432, "loss": 3.2102, "step": 25699 }, { "epoch": 1.26, "grad_norm": 0.5733356475830078, "learning_rate": 0.00037476055437396093, "loss": 3.3008, "step": 25700 }, { "epoch": 1.26, "grad_norm": 0.5971511006355286, "learning_rate": 0.00037474564381077363, "loss": 3.0688, "step": 25701 }, { "epoch": 1.26, "grad_norm": 0.5802672505378723, "learning_rate": 0.0003747307330507207, "loss": 2.9328, "step": 25702 }, { "epoch": 1.26, "grad_norm": 0.6406810879707336, "learning_rate": 0.0003747158220938415, "loss": 3.0136, "step": 25703 }, { "epoch": 1.26, "grad_norm": 0.5840638279914856, "learning_rate": 0.0003747009109401752, "loss": 3.0113, "step": 25704 }, { "epoch": 1.26, "grad_norm": 0.55064457654953, "learning_rate": 0.000374685999589761, "loss": 3.0928, "step": 25705 }, { "epoch": 1.26, "grad_norm": 0.5626662373542786, "learning_rate": 0.00037467108804263826, "loss": 2.9046, "step": 25706 }, { "epoch": 1.26, "grad_norm": 0.6290438175201416, "learning_rate": 0.00037465617629884625, "loss": 3.0341, "step": 25707 }, { "epoch": 1.26, "grad_norm": 0.5759810209274292, "learning_rate": 0.00037464126435842425, "loss": 3.1417, "step": 25708 }, { "epoch": 1.26, "grad_norm": 0.6134321689605713, "learning_rate": 0.00037462635222141146, "loss": 3.2309, "step": 25709 }, { "epoch": 1.26, "grad_norm": 0.578365683555603, "learning_rate": 0.0003746114398878473, "loss": 3.1481, "step": 25710 }, { "epoch": 1.26, "grad_norm": 0.5462852716445923, "learning_rate": 0.00037459652735777105, "loss": 3.1731, "step": 25711 }, { "epoch": 1.26, "grad_norm": 0.636318027973175, "learning_rate": 0.00037458161463122165, "loss": 2.9947, "step": 25712 }, { "epoch": 1.26, "grad_norm": 0.5679839849472046, "learning_rate": 0.00037456670170823883, "loss": 3.122, "step": 25713 }, { "epoch": 1.26, "grad_norm": 0.5561462640762329, "learning_rate": 0.0003745517885888617, "loss": 3.2713, "step": 25714 }, { "epoch": 1.26, "grad_norm": 0.6204407811164856, "learning_rate": 0.00037453687527312944, "loss": 3.0994, "step": 25715 }, { "epoch": 1.26, "grad_norm": 0.52779620885849, "learning_rate": 0.0003745219617610813, "loss": 3.1557, "step": 25716 }, { "epoch": 1.26, "grad_norm": 0.5464897155761719, "learning_rate": 0.00037450704805275676, "loss": 3.1137, "step": 25717 }, { "epoch": 1.26, "grad_norm": 0.6134060621261597, "learning_rate": 0.00037449213414819497, "loss": 2.9496, "step": 25718 }, { "epoch": 1.26, "grad_norm": 0.5954126119613647, "learning_rate": 0.00037447722004743526, "loss": 3.1065, "step": 25719 }, { "epoch": 1.26, "grad_norm": 0.5473684668540955, "learning_rate": 0.00037446230575051686, "loss": 3.0049, "step": 25720 }, { "epoch": 1.26, "grad_norm": 0.5591050386428833, "learning_rate": 0.0003744473912574791, "loss": 3.114, "step": 25721 }, { "epoch": 1.26, "grad_norm": 0.5836315751075745, "learning_rate": 0.0003744324765683613, "loss": 3.046, "step": 25722 }, { "epoch": 1.26, "grad_norm": 0.6122407913208008, "learning_rate": 0.0003744175616832025, "loss": 3.0378, "step": 25723 }, { "epoch": 1.26, "grad_norm": 0.570861279964447, "learning_rate": 0.0003744026466020422, "loss": 3.1949, "step": 25724 }, { "epoch": 1.26, "grad_norm": 0.534397542476654, "learning_rate": 0.00037438773132491984, "loss": 3.0312, "step": 25725 }, { "epoch": 1.26, "grad_norm": 0.536069929599762, "learning_rate": 0.0003743728158518744, "loss": 3.0838, "step": 25726 }, { "epoch": 1.26, "grad_norm": 0.5722454190254211, "learning_rate": 0.0003743579001829452, "loss": 3.2043, "step": 25727 }, { "epoch": 1.26, "grad_norm": 0.5977911949157715, "learning_rate": 0.0003743429843181716, "loss": 3.0963, "step": 25728 }, { "epoch": 1.26, "grad_norm": 0.566826343536377, "learning_rate": 0.000374328068257593, "loss": 2.9931, "step": 25729 }, { "epoch": 1.26, "grad_norm": 0.5475797057151794, "learning_rate": 0.00037431315200124853, "loss": 3.1475, "step": 25730 }, { "epoch": 1.26, "grad_norm": 0.5495274662971497, "learning_rate": 0.0003742982355491775, "loss": 3.1989, "step": 25731 }, { "epoch": 1.26, "grad_norm": 0.559955358505249, "learning_rate": 0.00037428331890141926, "loss": 3.015, "step": 25732 }, { "epoch": 1.26, "grad_norm": 0.6134682893753052, "learning_rate": 0.000374268402058013, "loss": 2.9699, "step": 25733 }, { "epoch": 1.26, "grad_norm": 0.5740057826042175, "learning_rate": 0.0003742534850189981, "loss": 3.2211, "step": 25734 }, { "epoch": 1.26, "grad_norm": 0.610024094581604, "learning_rate": 0.00037423856778441377, "loss": 2.9174, "step": 25735 }, { "epoch": 1.26, "grad_norm": 0.6208667755126953, "learning_rate": 0.00037422365035429936, "loss": 3.155, "step": 25736 }, { "epoch": 1.26, "grad_norm": 0.5483558773994446, "learning_rate": 0.00037420873272869424, "loss": 3.1108, "step": 25737 }, { "epoch": 1.26, "grad_norm": 0.5884615182876587, "learning_rate": 0.00037419381490763745, "loss": 3.1444, "step": 25738 }, { "epoch": 1.26, "grad_norm": 0.5598413348197937, "learning_rate": 0.00037417889689116845, "loss": 3.1624, "step": 25739 }, { "epoch": 1.26, "grad_norm": 0.5523557066917419, "learning_rate": 0.0003741639786793266, "loss": 3.1961, "step": 25740 }, { "epoch": 1.26, "grad_norm": 0.6840699911117554, "learning_rate": 0.00037414906027215104, "loss": 3.0235, "step": 25741 }, { "epoch": 1.26, "grad_norm": 0.5679307579994202, "learning_rate": 0.0003741341416696811, "loss": 3.0551, "step": 25742 }, { "epoch": 1.26, "grad_norm": 0.5635999441146851, "learning_rate": 0.00037411922287195615, "loss": 3.016, "step": 25743 }, { "epoch": 1.26, "grad_norm": 0.561026394367218, "learning_rate": 0.0003741043038790154, "loss": 2.9945, "step": 25744 }, { "epoch": 1.26, "grad_norm": 0.5478890538215637, "learning_rate": 0.0003740893846908982, "loss": 3.12, "step": 25745 }, { "epoch": 1.26, "grad_norm": 0.5661895871162415, "learning_rate": 0.0003740744653076438, "loss": 3.1501, "step": 25746 }, { "epoch": 1.26, "grad_norm": 0.5741684436798096, "learning_rate": 0.00037405954572929155, "loss": 2.9514, "step": 25747 }, { "epoch": 1.26, "grad_norm": 0.5624836087226868, "learning_rate": 0.0003740446259558807, "loss": 3.0239, "step": 25748 }, { "epoch": 1.26, "grad_norm": 0.5481595993041992, "learning_rate": 0.0003740297059874505, "loss": 3.1957, "step": 25749 }, { "epoch": 1.26, "grad_norm": 0.5887608528137207, "learning_rate": 0.0003740147858240403, "loss": 3.2924, "step": 25750 }, { "epoch": 1.26, "grad_norm": 0.5847222805023193, "learning_rate": 0.0003739998654656896, "loss": 3.0791, "step": 25751 }, { "epoch": 1.26, "grad_norm": 0.6101052761077881, "learning_rate": 0.00037398494491243736, "loss": 2.9752, "step": 25752 }, { "epoch": 1.26, "grad_norm": 0.5880925059318542, "learning_rate": 0.00037397002416432283, "loss": 2.9945, "step": 25753 }, { "epoch": 1.26, "grad_norm": 0.5594073534011841, "learning_rate": 0.00037395510322138574, "loss": 3.0858, "step": 25754 }, { "epoch": 1.26, "grad_norm": 0.5806262493133545, "learning_rate": 0.00037394018208366505, "loss": 3.1636, "step": 25755 }, { "epoch": 1.26, "grad_norm": 0.6017138361930847, "learning_rate": 0.00037392526075120014, "loss": 3.1412, "step": 25756 }, { "epoch": 1.26, "grad_norm": 0.5765290260314941, "learning_rate": 0.00037391033922403035, "loss": 3.1801, "step": 25757 }, { "epoch": 1.26, "grad_norm": 0.6031667590141296, "learning_rate": 0.00037389541750219494, "loss": 3.0756, "step": 25758 }, { "epoch": 1.26, "grad_norm": 0.5250658988952637, "learning_rate": 0.0003738804955857332, "loss": 3.0219, "step": 25759 }, { "epoch": 1.26, "grad_norm": 0.5491588115692139, "learning_rate": 0.0003738655734746845, "loss": 2.9849, "step": 25760 }, { "epoch": 1.26, "grad_norm": 0.5519552826881409, "learning_rate": 0.000373850651169088, "loss": 3.1823, "step": 25761 }, { "epoch": 1.26, "grad_norm": 0.5483062267303467, "learning_rate": 0.0003738357286689832, "loss": 3.0518, "step": 25762 }, { "epoch": 1.26, "grad_norm": 0.5620872378349304, "learning_rate": 0.0003738208059744093, "loss": 3.1545, "step": 25763 }, { "epoch": 1.26, "grad_norm": 0.5591562986373901, "learning_rate": 0.00037380588308540553, "loss": 3.0748, "step": 25764 }, { "epoch": 1.26, "grad_norm": 0.6279521584510803, "learning_rate": 0.0003737909600020113, "loss": 3.0854, "step": 25765 }, { "epoch": 1.26, "grad_norm": 0.6239844560623169, "learning_rate": 0.0003737760367242659, "loss": 3.1755, "step": 25766 }, { "epoch": 1.26, "grad_norm": 0.536332905292511, "learning_rate": 0.0003737611132522087, "loss": 3.1022, "step": 25767 }, { "epoch": 1.26, "grad_norm": 0.5568017363548279, "learning_rate": 0.00037374618958587873, "loss": 3.2276, "step": 25768 }, { "epoch": 1.26, "grad_norm": 0.546877920627594, "learning_rate": 0.0003737312657253156, "loss": 2.9514, "step": 25769 }, { "epoch": 1.26, "grad_norm": 0.6245750784873962, "learning_rate": 0.0003737163416705585, "loss": 3.0925, "step": 25770 }, { "epoch": 1.26, "grad_norm": 0.609423816204071, "learning_rate": 0.0003737014174216467, "loss": 3.0713, "step": 25771 }, { "epoch": 1.26, "grad_norm": 0.5926220417022705, "learning_rate": 0.0003736864929786196, "loss": 3.1992, "step": 25772 }, { "epoch": 1.26, "grad_norm": 0.5451133251190186, "learning_rate": 0.0003736715683415165, "loss": 3.0744, "step": 25773 }, { "epoch": 1.26, "grad_norm": 0.5710421204566956, "learning_rate": 0.00037365664351037654, "loss": 3.0953, "step": 25774 }, { "epoch": 1.26, "grad_norm": 0.5600199699401855, "learning_rate": 0.0003736417184852393, "loss": 3.1269, "step": 25775 }, { "epoch": 1.26, "grad_norm": 0.5701009035110474, "learning_rate": 0.00037362679326614386, "loss": 3.1154, "step": 25776 }, { "epoch": 1.26, "grad_norm": 0.6089168787002563, "learning_rate": 0.0003736118678531297, "loss": 3.0837, "step": 25777 }, { "epoch": 1.26, "grad_norm": 0.5491774082183838, "learning_rate": 0.0003735969422462359, "loss": 3.3128, "step": 25778 }, { "epoch": 1.26, "grad_norm": 0.5762932300567627, "learning_rate": 0.000373582016445502, "loss": 3.0711, "step": 25779 }, { "epoch": 1.26, "grad_norm": 0.5476071834564209, "learning_rate": 0.0003735670904509673, "loss": 2.887, "step": 25780 }, { "epoch": 1.26, "grad_norm": 0.6149681806564331, "learning_rate": 0.00037355216426267093, "loss": 3.0442, "step": 25781 }, { "epoch": 1.26, "grad_norm": 0.5968160033226013, "learning_rate": 0.00037353723788065236, "loss": 2.8897, "step": 25782 }, { "epoch": 1.26, "grad_norm": 0.5533415675163269, "learning_rate": 0.00037352231130495083, "loss": 3.1161, "step": 25783 }, { "epoch": 1.26, "grad_norm": 0.5641621351242065, "learning_rate": 0.00037350738453560575, "loss": 3.1067, "step": 25784 }, { "epoch": 1.26, "grad_norm": 0.5823823809623718, "learning_rate": 0.00037349245757265626, "loss": 3.1385, "step": 25785 }, { "epoch": 1.26, "grad_norm": 0.6093672513961792, "learning_rate": 0.0003734775304161419, "loss": 3.1447, "step": 25786 }, { "epoch": 1.26, "grad_norm": 0.5846396684646606, "learning_rate": 0.00037346260306610175, "loss": 2.9545, "step": 25787 }, { "epoch": 1.26, "grad_norm": 0.5794227719306946, "learning_rate": 0.0003734476755225754, "loss": 2.8661, "step": 25788 }, { "epoch": 1.26, "grad_norm": 0.566815972328186, "learning_rate": 0.0003734327477856018, "loss": 3.0093, "step": 25789 }, { "epoch": 1.26, "grad_norm": 0.5866063833236694, "learning_rate": 0.00037341781985522057, "loss": 2.9509, "step": 25790 }, { "epoch": 1.26, "grad_norm": 0.552588939666748, "learning_rate": 0.000373402891731471, "loss": 3.1778, "step": 25791 }, { "epoch": 1.26, "grad_norm": 0.5920195579528809, "learning_rate": 0.0003733879634143923, "loss": 3.0382, "step": 25792 }, { "epoch": 1.26, "grad_norm": 0.5480687618255615, "learning_rate": 0.0003733730349040238, "loss": 3.0309, "step": 25793 }, { "epoch": 1.26, "grad_norm": 0.5250555276870728, "learning_rate": 0.0003733581062004049, "loss": 3.2171, "step": 25794 }, { "epoch": 1.26, "grad_norm": 0.6039732694625854, "learning_rate": 0.0003733431773035748, "loss": 3.1888, "step": 25795 }, { "epoch": 1.26, "grad_norm": 0.5604811310768127, "learning_rate": 0.0003733282482135729, "loss": 3.1069, "step": 25796 }, { "epoch": 1.26, "grad_norm": 0.597496509552002, "learning_rate": 0.0003733133189304385, "loss": 3.1567, "step": 25797 }, { "epoch": 1.26, "grad_norm": 0.5957289338111877, "learning_rate": 0.00037329838945421095, "loss": 3.0694, "step": 25798 }, { "epoch": 1.26, "grad_norm": 0.5216500163078308, "learning_rate": 0.00037328345978492966, "loss": 3.0539, "step": 25799 }, { "epoch": 1.26, "grad_norm": 0.5505014061927795, "learning_rate": 0.0003732685299226336, "loss": 3.1195, "step": 25800 }, { "epoch": 1.26, "grad_norm": 0.5500777363777161, "learning_rate": 0.0003732535998673624, "loss": 3.007, "step": 25801 }, { "epoch": 1.26, "grad_norm": 0.6095644235610962, "learning_rate": 0.00037323866961915545, "loss": 2.8892, "step": 25802 }, { "epoch": 1.26, "grad_norm": 0.5533205270767212, "learning_rate": 0.0003732237391780519, "loss": 3.0392, "step": 25803 }, { "epoch": 1.26, "grad_norm": 0.576029360294342, "learning_rate": 0.0003732088085440911, "loss": 2.8924, "step": 25804 }, { "epoch": 1.26, "grad_norm": 0.5615506172180176, "learning_rate": 0.00037319387771731237, "loss": 3.0801, "step": 25805 }, { "epoch": 1.26, "grad_norm": 0.5568882822990417, "learning_rate": 0.000373178946697755, "loss": 3.1054, "step": 25806 }, { "epoch": 1.26, "grad_norm": 0.554295003414154, "learning_rate": 0.00037316401548545845, "loss": 3.1786, "step": 25807 }, { "epoch": 1.26, "grad_norm": 0.5566840767860413, "learning_rate": 0.0003731490840804619, "loss": 3.0353, "step": 25808 }, { "epoch": 1.26, "grad_norm": 0.5562843084335327, "learning_rate": 0.00037313415248280477, "loss": 3.3003, "step": 25809 }, { "epoch": 1.26, "grad_norm": 0.569489061832428, "learning_rate": 0.00037311922069252636, "loss": 3.155, "step": 25810 }, { "epoch": 1.26, "grad_norm": 0.537324070930481, "learning_rate": 0.00037310428870966595, "loss": 3.0376, "step": 25811 }, { "epoch": 1.26, "grad_norm": 0.5831682085990906, "learning_rate": 0.00037308935653426295, "loss": 3.1946, "step": 25812 }, { "epoch": 1.27, "grad_norm": 0.5841866135597229, "learning_rate": 0.0003730744241663567, "loss": 3.2204, "step": 25813 }, { "epoch": 1.27, "grad_norm": 0.5594460368156433, "learning_rate": 0.0003730594916059864, "loss": 3.1202, "step": 25814 }, { "epoch": 1.27, "grad_norm": 0.635952353477478, "learning_rate": 0.0003730445588531915, "loss": 3.0469, "step": 25815 }, { "epoch": 1.27, "grad_norm": 0.5496728420257568, "learning_rate": 0.00037302962590801133, "loss": 3.0797, "step": 25816 }, { "epoch": 1.27, "grad_norm": 0.5622497797012329, "learning_rate": 0.00037301469277048515, "loss": 3.0121, "step": 25817 }, { "epoch": 1.27, "grad_norm": 0.5610560774803162, "learning_rate": 0.0003729997594406523, "loss": 2.9417, "step": 25818 }, { "epoch": 1.27, "grad_norm": 0.5938122272491455, "learning_rate": 0.0003729848259185521, "loss": 2.8371, "step": 25819 }, { "epoch": 1.27, "grad_norm": 0.5797512531280518, "learning_rate": 0.0003729698922042239, "loss": 2.8959, "step": 25820 }, { "epoch": 1.27, "grad_norm": 0.5663214921951294, "learning_rate": 0.00037295495829770713, "loss": 3.0954, "step": 25821 }, { "epoch": 1.27, "grad_norm": 0.5849252343177795, "learning_rate": 0.00037294002419904104, "loss": 2.9451, "step": 25822 }, { "epoch": 1.27, "grad_norm": 0.5354498028755188, "learning_rate": 0.00037292508990826494, "loss": 3.0579, "step": 25823 }, { "epoch": 1.27, "grad_norm": 0.5267759561538696, "learning_rate": 0.0003729101554254182, "loss": 3.1529, "step": 25824 }, { "epoch": 1.27, "grad_norm": 0.5743757486343384, "learning_rate": 0.00037289522075054007, "loss": 2.9971, "step": 25825 }, { "epoch": 1.27, "grad_norm": 0.5666912198066711, "learning_rate": 0.00037288028588367, "loss": 3.1979, "step": 25826 }, { "epoch": 1.27, "grad_norm": 0.5688309669494629, "learning_rate": 0.0003728653508248474, "loss": 3.0861, "step": 25827 }, { "epoch": 1.27, "grad_norm": 0.5575060248374939, "learning_rate": 0.00037285041557411135, "loss": 3.1098, "step": 25828 }, { "epoch": 1.27, "grad_norm": 0.5437538623809814, "learning_rate": 0.0003728354801315014, "loss": 3.1594, "step": 25829 }, { "epoch": 1.27, "grad_norm": 0.6106091141700745, "learning_rate": 0.00037282054449705665, "loss": 3.0998, "step": 25830 }, { "epoch": 1.27, "grad_norm": 0.6044281125068665, "learning_rate": 0.0003728056086708168, "loss": 3.0176, "step": 25831 }, { "epoch": 1.27, "grad_norm": 0.5630094408988953, "learning_rate": 0.00037279067265282094, "loss": 3.1904, "step": 25832 }, { "epoch": 1.27, "grad_norm": 0.5635753273963928, "learning_rate": 0.0003727757364431084, "loss": 3.1233, "step": 25833 }, { "epoch": 1.27, "grad_norm": 0.5579873919487, "learning_rate": 0.0003727608000417186, "loss": 2.8207, "step": 25834 }, { "epoch": 1.27, "grad_norm": 0.5992753505706787, "learning_rate": 0.0003727458634486909, "loss": 3.0802, "step": 25835 }, { "epoch": 1.27, "grad_norm": 0.5542998313903809, "learning_rate": 0.0003727309266640645, "loss": 3.1656, "step": 25836 }, { "epoch": 1.27, "grad_norm": 0.5979121327400208, "learning_rate": 0.0003727159896878789, "loss": 3.2221, "step": 25837 }, { "epoch": 1.27, "grad_norm": 0.5464156270027161, "learning_rate": 0.00037270105252017335, "loss": 3.0004, "step": 25838 }, { "epoch": 1.27, "grad_norm": 0.5646561980247498, "learning_rate": 0.00037268611516098725, "loss": 3.3144, "step": 25839 }, { "epoch": 1.27, "grad_norm": 0.60554438829422, "learning_rate": 0.00037267117761036, "loss": 3.0871, "step": 25840 }, { "epoch": 1.27, "grad_norm": 0.5654262900352478, "learning_rate": 0.0003726562398683307, "loss": 2.938, "step": 25841 }, { "epoch": 1.27, "grad_norm": 0.5625606179237366, "learning_rate": 0.0003726413019349389, "loss": 3.161, "step": 25842 }, { "epoch": 1.27, "grad_norm": 0.5684981346130371, "learning_rate": 0.0003726263638102239, "loss": 3.0923, "step": 25843 }, { "epoch": 1.27, "grad_norm": 0.5840631723403931, "learning_rate": 0.00037261142549422505, "loss": 3.1465, "step": 25844 }, { "epoch": 1.27, "grad_norm": 0.5702567100524902, "learning_rate": 0.0003725964869869816, "loss": 3.1471, "step": 25845 }, { "epoch": 1.27, "grad_norm": 0.61046302318573, "learning_rate": 0.0003725815482885331, "loss": 3.1633, "step": 25846 }, { "epoch": 1.27, "grad_norm": 0.557167649269104, "learning_rate": 0.00037256660939891865, "loss": 3.0677, "step": 25847 }, { "epoch": 1.27, "grad_norm": 0.5500303506851196, "learning_rate": 0.00037255167031817775, "loss": 3.3199, "step": 25848 }, { "epoch": 1.27, "grad_norm": 0.5514801144599915, "learning_rate": 0.00037253673104634973, "loss": 3.1673, "step": 25849 }, { "epoch": 1.27, "grad_norm": 0.5589605569839478, "learning_rate": 0.000372521791583474, "loss": 3.0712, "step": 25850 }, { "epoch": 1.27, "grad_norm": 0.5684011578559875, "learning_rate": 0.0003725068519295897, "loss": 3.0508, "step": 25851 }, { "epoch": 1.27, "grad_norm": 0.559701144695282, "learning_rate": 0.0003724919120847363, "loss": 3.0181, "step": 25852 }, { "epoch": 1.27, "grad_norm": 0.5717242360115051, "learning_rate": 0.00037247697204895327, "loss": 2.9951, "step": 25853 }, { "epoch": 1.27, "grad_norm": 0.5555175542831421, "learning_rate": 0.0003724620318222798, "loss": 2.9823, "step": 25854 }, { "epoch": 1.27, "grad_norm": 0.5793154239654541, "learning_rate": 0.00037244709140475527, "loss": 3.1666, "step": 25855 }, { "epoch": 1.27, "grad_norm": 0.552769660949707, "learning_rate": 0.0003724321507964191, "loss": 3.2035, "step": 25856 }, { "epoch": 1.27, "grad_norm": 0.5471254587173462, "learning_rate": 0.0003724172099973105, "loss": 3.1123, "step": 25857 }, { "epoch": 1.27, "grad_norm": 0.5685672760009766, "learning_rate": 0.0003724022690074689, "loss": 3.0484, "step": 25858 }, { "epoch": 1.27, "grad_norm": 0.5822762250900269, "learning_rate": 0.0003723873278269337, "loss": 3.0311, "step": 25859 }, { "epoch": 1.27, "grad_norm": 0.5946375131607056, "learning_rate": 0.0003723723864557442, "loss": 3.2406, "step": 25860 }, { "epoch": 1.27, "grad_norm": 0.5624514818191528, "learning_rate": 0.0003723574448939398, "loss": 3.0958, "step": 25861 }, { "epoch": 1.27, "grad_norm": 0.6144577264785767, "learning_rate": 0.0003723425031415598, "loss": 3.117, "step": 25862 }, { "epoch": 1.27, "grad_norm": 0.5530250668525696, "learning_rate": 0.0003723275611986435, "loss": 3.0762, "step": 25863 }, { "epoch": 1.27, "grad_norm": 0.5647308230400085, "learning_rate": 0.0003723126190652304, "loss": 3.2921, "step": 25864 }, { "epoch": 1.27, "grad_norm": 0.5888532996177673, "learning_rate": 0.0003722976767413598, "loss": 3.1137, "step": 25865 }, { "epoch": 1.27, "grad_norm": 0.5185219645500183, "learning_rate": 0.0003722827342270709, "loss": 3.0245, "step": 25866 }, { "epoch": 1.27, "grad_norm": 0.5749824643135071, "learning_rate": 0.00037226779152240336, "loss": 3.186, "step": 25867 }, { "epoch": 1.27, "grad_norm": 0.5528282523155212, "learning_rate": 0.0003722528486273962, "loss": 3.329, "step": 25868 }, { "epoch": 1.27, "grad_norm": 0.5749999284744263, "learning_rate": 0.00037223790554208905, "loss": 3.1513, "step": 25869 }, { "epoch": 1.27, "grad_norm": 0.5693772435188293, "learning_rate": 0.00037222296226652115, "loss": 3.1707, "step": 25870 }, { "epoch": 1.27, "grad_norm": 0.5503202080726624, "learning_rate": 0.0003722080188007318, "loss": 3.2248, "step": 25871 }, { "epoch": 1.27, "grad_norm": 0.639018714427948, "learning_rate": 0.0003721930751447605, "loss": 3.225, "step": 25872 }, { "epoch": 1.27, "grad_norm": 0.5662673711776733, "learning_rate": 0.00037217813129864646, "loss": 3.1343, "step": 25873 }, { "epoch": 1.27, "grad_norm": 0.6173918843269348, "learning_rate": 0.0003721631872624292, "loss": 3.1776, "step": 25874 }, { "epoch": 1.27, "grad_norm": 0.5484563112258911, "learning_rate": 0.000372148243036148, "loss": 3.0044, "step": 25875 }, { "epoch": 1.27, "grad_norm": 0.55157870054245, "learning_rate": 0.00037213329861984215, "loss": 3.0107, "step": 25876 }, { "epoch": 1.27, "grad_norm": 0.5684902667999268, "learning_rate": 0.000372118354013551, "loss": 2.8688, "step": 25877 }, { "epoch": 1.27, "grad_norm": 0.5629575252532959, "learning_rate": 0.00037210340921731415, "loss": 3.3597, "step": 25878 }, { "epoch": 1.27, "grad_norm": 0.5630050897598267, "learning_rate": 0.0003720884642311707, "loss": 2.9911, "step": 25879 }, { "epoch": 1.27, "grad_norm": 0.5761358141899109, "learning_rate": 0.0003720735190551602, "loss": 3.1811, "step": 25880 }, { "epoch": 1.27, "grad_norm": 0.5436105132102966, "learning_rate": 0.0003720585736893218, "loss": 2.9532, "step": 25881 }, { "epoch": 1.27, "grad_norm": 0.6045496463775635, "learning_rate": 0.00037204362813369503, "loss": 2.8204, "step": 25882 }, { "epoch": 1.27, "grad_norm": 0.5377562642097473, "learning_rate": 0.0003720286823883192, "loss": 3.1401, "step": 25883 }, { "epoch": 1.27, "grad_norm": 0.5733555555343628, "learning_rate": 0.0003720137364532337, "loss": 3.1384, "step": 25884 }, { "epoch": 1.27, "grad_norm": 0.5425565242767334, "learning_rate": 0.00037199879032847786, "loss": 2.9473, "step": 25885 }, { "epoch": 1.27, "grad_norm": 0.5795601606369019, "learning_rate": 0.00037198384401409114, "loss": 2.8055, "step": 25886 }, { "epoch": 1.27, "grad_norm": 0.5669113993644714, "learning_rate": 0.0003719688975101127, "loss": 3.1459, "step": 25887 }, { "epoch": 1.27, "grad_norm": 0.5572779178619385, "learning_rate": 0.0003719539508165821, "loss": 3.1748, "step": 25888 }, { "epoch": 1.27, "grad_norm": 0.5794333219528198, "learning_rate": 0.0003719390039335387, "loss": 3.1461, "step": 25889 }, { "epoch": 1.27, "grad_norm": 0.5872516632080078, "learning_rate": 0.00037192405686102174, "loss": 2.8606, "step": 25890 }, { "epoch": 1.27, "grad_norm": 0.5817949175834656, "learning_rate": 0.0003719091095990707, "loss": 3.0884, "step": 25891 }, { "epoch": 1.27, "grad_norm": 0.590277910232544, "learning_rate": 0.00037189416214772477, "loss": 3.0184, "step": 25892 }, { "epoch": 1.27, "grad_norm": 0.5216969847679138, "learning_rate": 0.00037187921450702367, "loss": 3.1245, "step": 25893 }, { "epoch": 1.27, "grad_norm": 0.545471727848053, "learning_rate": 0.0003718642666770064, "loss": 3.1712, "step": 25894 }, { "epoch": 1.27, "grad_norm": 0.5600536465644836, "learning_rate": 0.0003718493186577125, "loss": 2.9244, "step": 25895 }, { "epoch": 1.27, "grad_norm": 0.5645468831062317, "learning_rate": 0.00037183437044918134, "loss": 3.2514, "step": 25896 }, { "epoch": 1.27, "grad_norm": 0.5973244309425354, "learning_rate": 0.0003718194220514523, "loss": 2.9894, "step": 25897 }, { "epoch": 1.27, "grad_norm": 0.5746738314628601, "learning_rate": 0.0003718044734645647, "loss": 2.8643, "step": 25898 }, { "epoch": 1.27, "grad_norm": 0.5265095829963684, "learning_rate": 0.00037178952468855793, "loss": 3.2349, "step": 25899 }, { "epoch": 1.27, "grad_norm": 0.5884544849395752, "learning_rate": 0.0003717745757234714, "loss": 3.1012, "step": 25900 }, { "epoch": 1.27, "grad_norm": 0.5789063572883606, "learning_rate": 0.00037175962656934447, "loss": 3.0908, "step": 25901 }, { "epoch": 1.27, "grad_norm": 0.568000316619873, "learning_rate": 0.0003717446772262163, "loss": 2.9355, "step": 25902 }, { "epoch": 1.27, "grad_norm": 0.5491638779640198, "learning_rate": 0.0003717297276941267, "loss": 3.2674, "step": 25903 }, { "epoch": 1.27, "grad_norm": 0.5411843061447144, "learning_rate": 0.00037171477797311473, "loss": 3.0928, "step": 25904 }, { "epoch": 1.27, "grad_norm": 0.6108878254890442, "learning_rate": 0.0003716998280632198, "loss": 3.2653, "step": 25905 }, { "epoch": 1.27, "grad_norm": 0.5373321771621704, "learning_rate": 0.00037168487796448134, "loss": 3.0299, "step": 25906 }, { "epoch": 1.27, "grad_norm": 0.5621278882026672, "learning_rate": 0.0003716699276769387, "loss": 3.019, "step": 25907 }, { "epoch": 1.27, "grad_norm": 0.5679013133049011, "learning_rate": 0.00037165497720063125, "loss": 3.3508, "step": 25908 }, { "epoch": 1.27, "grad_norm": 0.5926734209060669, "learning_rate": 0.0003716400265355984, "loss": 3.0921, "step": 25909 }, { "epoch": 1.27, "grad_norm": 0.5435253381729126, "learning_rate": 0.00037162507568187954, "loss": 2.8687, "step": 25910 }, { "epoch": 1.27, "grad_norm": 0.5271333456039429, "learning_rate": 0.0003716101246395139, "loss": 2.9884, "step": 25911 }, { "epoch": 1.27, "grad_norm": 0.5629345774650574, "learning_rate": 0.00037159517340854117, "loss": 2.8821, "step": 25912 }, { "epoch": 1.27, "grad_norm": 0.5234237313270569, "learning_rate": 0.0003715802219890003, "loss": 3.2772, "step": 25913 }, { "epoch": 1.27, "grad_norm": 0.5610054135322571, "learning_rate": 0.00037156527038093105, "loss": 3.1372, "step": 25914 }, { "epoch": 1.27, "grad_norm": 0.5486628413200378, "learning_rate": 0.0003715503185843727, "loss": 3.1335, "step": 25915 }, { "epoch": 1.27, "grad_norm": 0.5305410027503967, "learning_rate": 0.00037153536659936446, "loss": 3.053, "step": 25916 }, { "epoch": 1.27, "grad_norm": 0.6576041579246521, "learning_rate": 0.00037152041442594584, "loss": 3.1786, "step": 25917 }, { "epoch": 1.27, "grad_norm": 0.5794118046760559, "learning_rate": 0.00037150546206415625, "loss": 3.1182, "step": 25918 }, { "epoch": 1.27, "grad_norm": 0.5535377264022827, "learning_rate": 0.00037149050951403506, "loss": 2.9718, "step": 25919 }, { "epoch": 1.27, "grad_norm": 0.5769936442375183, "learning_rate": 0.0003714755567756216, "loss": 2.987, "step": 25920 }, { "epoch": 1.27, "grad_norm": 0.59618079662323, "learning_rate": 0.00037146060384895527, "loss": 3.2961, "step": 25921 }, { "epoch": 1.27, "grad_norm": 0.5642500519752502, "learning_rate": 0.00037144565073407544, "loss": 2.9942, "step": 25922 }, { "epoch": 1.27, "grad_norm": 0.5690189003944397, "learning_rate": 0.0003714306974310215, "loss": 3.0254, "step": 25923 }, { "epoch": 1.27, "grad_norm": 0.5622614026069641, "learning_rate": 0.0003714157439398329, "loss": 3.1844, "step": 25924 }, { "epoch": 1.27, "grad_norm": 0.5633580684661865, "learning_rate": 0.000371400790260549, "loss": 3.1334, "step": 25925 }, { "epoch": 1.27, "grad_norm": 0.5582607984542847, "learning_rate": 0.00037138583639320915, "loss": 3.026, "step": 25926 }, { "epoch": 1.27, "grad_norm": 0.5949597954750061, "learning_rate": 0.00037137088233785273, "loss": 3.076, "step": 25927 }, { "epoch": 1.27, "grad_norm": 0.5441601872444153, "learning_rate": 0.000371355928094519, "loss": 3.0003, "step": 25928 }, { "epoch": 1.27, "grad_norm": 0.606289803981781, "learning_rate": 0.00037134097366324774, "loss": 3.1652, "step": 25929 }, { "epoch": 1.27, "grad_norm": 0.551216185092926, "learning_rate": 0.000371326019044078, "loss": 3.121, "step": 25930 }, { "epoch": 1.27, "grad_norm": 0.5833144783973694, "learning_rate": 0.0003713110642370492, "loss": 3.4873, "step": 25931 }, { "epoch": 1.27, "grad_norm": 0.5816163420677185, "learning_rate": 0.00037129610924220075, "loss": 3.1136, "step": 25932 }, { "epoch": 1.27, "grad_norm": 0.5476531386375427, "learning_rate": 0.00037128115405957217, "loss": 3.0812, "step": 25933 }, { "epoch": 1.27, "grad_norm": 0.5457040071487427, "learning_rate": 0.0003712661986892027, "loss": 3.187, "step": 25934 }, { "epoch": 1.27, "grad_norm": 0.5884016156196594, "learning_rate": 0.0003712512431311317, "loss": 3.158, "step": 25935 }, { "epoch": 1.27, "grad_norm": 0.5727925300598145, "learning_rate": 0.0003712362873853988, "loss": 3.2241, "step": 25936 }, { "epoch": 1.27, "grad_norm": 0.5885441303253174, "learning_rate": 0.00037122133145204314, "loss": 3.1813, "step": 25937 }, { "epoch": 1.27, "grad_norm": 0.6078060865402222, "learning_rate": 0.00037120637533110415, "loss": 3.0894, "step": 25938 }, { "epoch": 1.27, "grad_norm": 0.51329106092453, "learning_rate": 0.00037119141902262133, "loss": 3.0433, "step": 25939 }, { "epoch": 1.27, "grad_norm": 0.6121600866317749, "learning_rate": 0.0003711764625266341, "loss": 2.934, "step": 25940 }, { "epoch": 1.27, "grad_norm": 0.5866429805755615, "learning_rate": 0.0003711615058431816, "loss": 2.975, "step": 25941 }, { "epoch": 1.27, "grad_norm": 0.5531361103057861, "learning_rate": 0.00037114654897230353, "loss": 3.0505, "step": 25942 }, { "epoch": 1.27, "grad_norm": 0.7733150124549866, "learning_rate": 0.00037113159191403896, "loss": 3.2246, "step": 25943 }, { "epoch": 1.27, "grad_norm": 0.5927426218986511, "learning_rate": 0.0003711166346684276, "loss": 2.9347, "step": 25944 }, { "epoch": 1.27, "grad_norm": 0.5862705707550049, "learning_rate": 0.00037110167723550876, "loss": 2.817, "step": 25945 }, { "epoch": 1.27, "grad_norm": 0.6004965901374817, "learning_rate": 0.0003710867196153217, "loss": 3.1906, "step": 25946 }, { "epoch": 1.27, "grad_norm": 0.563130259513855, "learning_rate": 0.0003710717618079059, "loss": 3.124, "step": 25947 }, { "epoch": 1.27, "grad_norm": 0.5575785636901855, "learning_rate": 0.0003710568038133008, "loss": 3.0767, "step": 25948 }, { "epoch": 1.27, "grad_norm": 0.5379173159599304, "learning_rate": 0.0003710418456315457, "loss": 3.207, "step": 25949 }, { "epoch": 1.27, "grad_norm": 0.6190086603164673, "learning_rate": 0.0003710268872626801, "loss": 2.9019, "step": 25950 }, { "epoch": 1.27, "grad_norm": 0.5316742062568665, "learning_rate": 0.0003710119287067433, "loss": 2.996, "step": 25951 }, { "epoch": 1.27, "grad_norm": 0.5887004137039185, "learning_rate": 0.0003709969699637749, "loss": 2.9726, "step": 25952 }, { "epoch": 1.27, "grad_norm": 0.560197651386261, "learning_rate": 0.00037098201103381395, "loss": 3.1608, "step": 25953 }, { "epoch": 1.27, "grad_norm": 0.5498967170715332, "learning_rate": 0.0003709670519169, "loss": 3.1144, "step": 25954 }, { "epoch": 1.27, "grad_norm": 0.571072518825531, "learning_rate": 0.0003709520926130727, "loss": 3.1361, "step": 25955 }, { "epoch": 1.27, "grad_norm": 0.5782363414764404, "learning_rate": 0.0003709371331223712, "loss": 2.9036, "step": 25956 }, { "epoch": 1.27, "grad_norm": 0.5756257772445679, "learning_rate": 0.00037092217344483487, "loss": 2.8669, "step": 25957 }, { "epoch": 1.27, "grad_norm": 0.5726673603057861, "learning_rate": 0.00037090721358050324, "loss": 2.84, "step": 25958 }, { "epoch": 1.27, "grad_norm": 0.5259421467781067, "learning_rate": 0.00037089225352941556, "loss": 3.0836, "step": 25959 }, { "epoch": 1.27, "grad_norm": 0.5834009051322937, "learning_rate": 0.0003708772932916114, "loss": 2.9831, "step": 25960 }, { "epoch": 1.27, "grad_norm": 0.5248730778694153, "learning_rate": 0.0003708623328671301, "loss": 3.1426, "step": 25961 }, { "epoch": 1.27, "grad_norm": 0.5834124088287354, "learning_rate": 0.000370847372256011, "loss": 3.0896, "step": 25962 }, { "epoch": 1.27, "grad_norm": 0.5581058859825134, "learning_rate": 0.0003708324114582937, "loss": 2.8448, "step": 25963 }, { "epoch": 1.27, "grad_norm": 0.6077955961227417, "learning_rate": 0.0003708174504740173, "loss": 2.7767, "step": 25964 }, { "epoch": 1.27, "grad_norm": 0.5812661647796631, "learning_rate": 0.00037080248930322136, "loss": 3.1861, "step": 25965 }, { "epoch": 1.27, "grad_norm": 0.5405663847923279, "learning_rate": 0.00037078752794594545, "loss": 3.198, "step": 25966 }, { "epoch": 1.27, "grad_norm": 0.5473437905311584, "learning_rate": 0.0003707725664022287, "loss": 2.858, "step": 25967 }, { "epoch": 1.27, "grad_norm": 0.539084255695343, "learning_rate": 0.0003707576046721106, "loss": 3.262, "step": 25968 }, { "epoch": 1.27, "grad_norm": 0.5603315830230713, "learning_rate": 0.00037074264275563064, "loss": 3.0577, "step": 25969 }, { "epoch": 1.27, "grad_norm": 0.5609519481658936, "learning_rate": 0.0003707276806528282, "loss": 3.1609, "step": 25970 }, { "epoch": 1.27, "grad_norm": 0.5762900710105896, "learning_rate": 0.0003707127183637426, "loss": 3.103, "step": 25971 }, { "epoch": 1.27, "grad_norm": 0.5924959778785706, "learning_rate": 0.0003706977558884133, "loss": 2.9045, "step": 25972 }, { "epoch": 1.27, "grad_norm": 0.5308851003646851, "learning_rate": 0.0003706827932268798, "loss": 2.995, "step": 25973 }, { "epoch": 1.27, "grad_norm": 0.605690062046051, "learning_rate": 0.0003706678303791813, "loss": 3.0956, "step": 25974 }, { "epoch": 1.27, "grad_norm": 0.5422868728637695, "learning_rate": 0.0003706528673453574, "loss": 2.8659, "step": 25975 }, { "epoch": 1.27, "grad_norm": 0.5357705950737, "learning_rate": 0.00037063790412544747, "loss": 2.9898, "step": 25976 }, { "epoch": 1.27, "grad_norm": 0.5521494150161743, "learning_rate": 0.00037062294071949094, "loss": 2.9851, "step": 25977 }, { "epoch": 1.27, "grad_norm": 0.5768835544586182, "learning_rate": 0.0003706079771275271, "loss": 3.1727, "step": 25978 }, { "epoch": 1.27, "grad_norm": 0.5737690329551697, "learning_rate": 0.00037059301334959536, "loss": 2.8939, "step": 25979 }, { "epoch": 1.27, "grad_norm": 0.5862882733345032, "learning_rate": 0.0003705780493857354, "loss": 2.9907, "step": 25980 }, { "epoch": 1.27, "grad_norm": 0.5231944918632507, "learning_rate": 0.00037056308523598637, "loss": 2.9625, "step": 25981 }, { "epoch": 1.27, "grad_norm": 0.5753133893013, "learning_rate": 0.0003705481209003877, "loss": 2.9762, "step": 25982 }, { "epoch": 1.27, "grad_norm": 0.5374119281768799, "learning_rate": 0.00037053315637897887, "loss": 3.159, "step": 25983 }, { "epoch": 1.27, "grad_norm": 0.5538880228996277, "learning_rate": 0.0003705181916717993, "loss": 2.9437, "step": 25984 }, { "epoch": 1.27, "grad_norm": 0.5667445063591003, "learning_rate": 0.00037050322677888837, "loss": 3.2595, "step": 25985 }, { "epoch": 1.27, "grad_norm": 0.5335412621498108, "learning_rate": 0.0003704882617002855, "loss": 3.1344, "step": 25986 }, { "epoch": 1.27, "grad_norm": 0.5830770134925842, "learning_rate": 0.00037047329643603014, "loss": 3.1744, "step": 25987 }, { "epoch": 1.27, "grad_norm": 0.549442708492279, "learning_rate": 0.00037045833098616176, "loss": 2.9406, "step": 25988 }, { "epoch": 1.27, "grad_norm": 0.5874332785606384, "learning_rate": 0.00037044336535071954, "loss": 3.1014, "step": 25989 }, { "epoch": 1.27, "grad_norm": 0.5202533006668091, "learning_rate": 0.00037042839952974307, "loss": 3.0192, "step": 25990 }, { "epoch": 1.27, "grad_norm": 0.5560673475265503, "learning_rate": 0.00037041343352327185, "loss": 3.1054, "step": 25991 }, { "epoch": 1.27, "grad_norm": 0.5724155306816101, "learning_rate": 0.00037039846733134514, "loss": 3.2477, "step": 25992 }, { "epoch": 1.27, "grad_norm": 0.528751790523529, "learning_rate": 0.00037038350095400246, "loss": 3.2029, "step": 25993 }, { "epoch": 1.27, "grad_norm": 0.55881667137146, "learning_rate": 0.0003703685343912831, "loss": 3.1877, "step": 25994 }, { "epoch": 1.27, "grad_norm": 0.5411615967750549, "learning_rate": 0.0003703535676432266, "loss": 3.1792, "step": 25995 }, { "epoch": 1.27, "grad_norm": 0.5560112595558167, "learning_rate": 0.00037033860070987237, "loss": 3.1054, "step": 25996 }, { "epoch": 1.27, "grad_norm": 0.5627215504646301, "learning_rate": 0.00037032363359125974, "loss": 3.0732, "step": 25997 }, { "epoch": 1.27, "grad_norm": 0.600377082824707, "learning_rate": 0.0003703086662874283, "loss": 3.0793, "step": 25998 }, { "epoch": 1.27, "grad_norm": 0.5853224992752075, "learning_rate": 0.00037029369879841735, "loss": 2.9752, "step": 25999 }, { "epoch": 1.27, "grad_norm": 0.5395614504814148, "learning_rate": 0.0003702787311242662, "loss": 3.0602, "step": 26000 }, { "epoch": 1.27, "grad_norm": 0.555023729801178, "learning_rate": 0.0003702637632650145, "loss": 3.2359, "step": 26001 }, { "epoch": 1.27, "grad_norm": 0.5492096543312073, "learning_rate": 0.00037024879522070157, "loss": 2.8831, "step": 26002 }, { "epoch": 1.27, "grad_norm": 0.5381262302398682, "learning_rate": 0.00037023382699136686, "loss": 3.0899, "step": 26003 }, { "epoch": 1.27, "grad_norm": 0.5479117631912231, "learning_rate": 0.0003702188585770497, "loss": 3.2155, "step": 26004 }, { "epoch": 1.27, "grad_norm": 0.5779570937156677, "learning_rate": 0.00037020388997778954, "loss": 3.0202, "step": 26005 }, { "epoch": 1.27, "grad_norm": 0.5237216949462891, "learning_rate": 0.0003701889211936259, "loss": 3.0036, "step": 26006 }, { "epoch": 1.27, "grad_norm": 0.5618526339530945, "learning_rate": 0.00037017395222459817, "loss": 3.1664, "step": 26007 }, { "epoch": 1.27, "grad_norm": 0.5620414614677429, "learning_rate": 0.0003701589830707457, "loss": 3.166, "step": 26008 }, { "epoch": 1.27, "grad_norm": 0.5533866286277771, "learning_rate": 0.00037014401373210805, "loss": 3.2417, "step": 26009 }, { "epoch": 1.27, "grad_norm": 0.6008995771408081, "learning_rate": 0.00037012904420872454, "loss": 2.9938, "step": 26010 }, { "epoch": 1.27, "grad_norm": 0.5710824728012085, "learning_rate": 0.0003701140745006345, "loss": 3.0668, "step": 26011 }, { "epoch": 1.27, "grad_norm": 0.5625746846199036, "learning_rate": 0.0003700991046078777, "loss": 3.1091, "step": 26012 }, { "epoch": 1.27, "grad_norm": 0.5556397438049316, "learning_rate": 0.00037008413453049315, "loss": 3.1491, "step": 26013 }, { "epoch": 1.27, "grad_norm": 0.5745502710342407, "learning_rate": 0.0003700691642685207, "loss": 3.0732, "step": 26014 }, { "epoch": 1.27, "grad_norm": 0.5311285257339478, "learning_rate": 0.00037005419382199933, "loss": 2.9755, "step": 26015 }, { "epoch": 1.27, "grad_norm": 0.573462963104248, "learning_rate": 0.00037003922319096876, "loss": 3.2097, "step": 26016 }, { "epoch": 1.28, "grad_norm": 0.6004371047019958, "learning_rate": 0.00037002425237546844, "loss": 2.9479, "step": 26017 }, { "epoch": 1.28, "grad_norm": 0.5266828536987305, "learning_rate": 0.00037000928137553764, "loss": 3.1431, "step": 26018 }, { "epoch": 1.28, "grad_norm": 0.5740906000137329, "learning_rate": 0.00036999431019121593, "loss": 2.9836, "step": 26019 }, { "epoch": 1.28, "grad_norm": 0.5576544404029846, "learning_rate": 0.0003699793388225426, "loss": 2.9814, "step": 26020 }, { "epoch": 1.28, "grad_norm": 0.568863570690155, "learning_rate": 0.0003699643672695572, "loss": 3.1698, "step": 26021 }, { "epoch": 1.28, "grad_norm": 0.5414212942123413, "learning_rate": 0.0003699493955322991, "loss": 2.9529, "step": 26022 }, { "epoch": 1.28, "grad_norm": 0.5581563711166382, "learning_rate": 0.00036993442361080777, "loss": 3.2253, "step": 26023 }, { "epoch": 1.28, "grad_norm": 0.578456461429596, "learning_rate": 0.0003699194515051226, "loss": 3.0586, "step": 26024 }, { "epoch": 1.28, "grad_norm": 0.5558168888092041, "learning_rate": 0.00036990447921528307, "loss": 2.9423, "step": 26025 }, { "epoch": 1.28, "grad_norm": 0.5423651337623596, "learning_rate": 0.0003698895067413286, "loss": 3.0314, "step": 26026 }, { "epoch": 1.28, "grad_norm": 0.6390382647514343, "learning_rate": 0.0003698745340832986, "loss": 3.0495, "step": 26027 }, { "epoch": 1.28, "grad_norm": 0.5630806684494019, "learning_rate": 0.0003698595612412327, "loss": 3.0398, "step": 26028 }, { "epoch": 1.28, "grad_norm": 0.5756039023399353, "learning_rate": 0.0003698445882151699, "loss": 3.2904, "step": 26029 }, { "epoch": 1.28, "grad_norm": 0.5660070180892944, "learning_rate": 0.00036982961500515006, "loss": 3.0634, "step": 26030 }, { "epoch": 1.28, "grad_norm": 0.6267983913421631, "learning_rate": 0.0003698146416112124, "loss": 2.9549, "step": 26031 }, { "epoch": 1.28, "grad_norm": 0.5327684283256531, "learning_rate": 0.0003697996680333964, "loss": 2.9134, "step": 26032 }, { "epoch": 1.28, "grad_norm": 0.543506383895874, "learning_rate": 0.00036978469427174145, "loss": 3.131, "step": 26033 }, { "epoch": 1.28, "grad_norm": 0.5573419332504272, "learning_rate": 0.00036976972032628716, "loss": 3.2857, "step": 26034 }, { "epoch": 1.28, "grad_norm": 0.5342174768447876, "learning_rate": 0.0003697547461970728, "loss": 3.2678, "step": 26035 }, { "epoch": 1.28, "grad_norm": 0.5931220650672913, "learning_rate": 0.0003697397718841379, "loss": 3.0568, "step": 26036 }, { "epoch": 1.28, "grad_norm": 0.5429893136024475, "learning_rate": 0.0003697247973875218, "loss": 3.1089, "step": 26037 }, { "epoch": 1.28, "grad_norm": 0.5769212245941162, "learning_rate": 0.000369709822707264, "loss": 3.0458, "step": 26038 }, { "epoch": 1.28, "grad_norm": 0.549676239490509, "learning_rate": 0.00036969484784340405, "loss": 3.0389, "step": 26039 }, { "epoch": 1.28, "grad_norm": 0.6053200364112854, "learning_rate": 0.00036967987279598125, "loss": 3.218, "step": 26040 }, { "epoch": 1.28, "grad_norm": 0.6391343474388123, "learning_rate": 0.00036966489756503495, "loss": 3.1639, "step": 26041 }, { "epoch": 1.28, "grad_norm": 0.632569432258606, "learning_rate": 0.00036964992215060484, "loss": 3.2523, "step": 26042 }, { "epoch": 1.28, "grad_norm": 0.5485760569572449, "learning_rate": 0.00036963494655273016, "loss": 3.1035, "step": 26043 }, { "epoch": 1.28, "grad_norm": 0.6080588698387146, "learning_rate": 0.0003696199707714505, "loss": 3.2853, "step": 26044 }, { "epoch": 1.28, "grad_norm": 0.5590080618858337, "learning_rate": 0.0003696049948068052, "loss": 2.9434, "step": 26045 }, { "epoch": 1.28, "grad_norm": 0.5305129885673523, "learning_rate": 0.0003695900186588338, "loss": 3.0217, "step": 26046 }, { "epoch": 1.28, "grad_norm": 0.5904659032821655, "learning_rate": 0.00036957504232757555, "loss": 3.0828, "step": 26047 }, { "epoch": 1.28, "grad_norm": 0.5809376239776611, "learning_rate": 0.00036956006581307017, "loss": 3.1483, "step": 26048 }, { "epoch": 1.28, "grad_norm": 0.5855849385261536, "learning_rate": 0.0003695450891153569, "loss": 3.1708, "step": 26049 }, { "epoch": 1.28, "grad_norm": 0.55824214220047, "learning_rate": 0.00036953011223447526, "loss": 3.0215, "step": 26050 }, { "epoch": 1.28, "grad_norm": 0.5497469305992126, "learning_rate": 0.0003695151351704646, "loss": 3.1189, "step": 26051 }, { "epoch": 1.28, "grad_norm": 0.558712899684906, "learning_rate": 0.0003695001579233646, "loss": 3.0199, "step": 26052 }, { "epoch": 1.28, "grad_norm": 0.5559713840484619, "learning_rate": 0.0003694851804932145, "loss": 3.1461, "step": 26053 }, { "epoch": 1.28, "grad_norm": 0.5822705030441284, "learning_rate": 0.0003694702028800538, "loss": 3.0182, "step": 26054 }, { "epoch": 1.28, "grad_norm": 0.579784631729126, "learning_rate": 0.00036945522508392197, "loss": 3.0663, "step": 26055 }, { "epoch": 1.28, "grad_norm": 0.5558045506477356, "learning_rate": 0.00036944024710485834, "loss": 3.012, "step": 26056 }, { "epoch": 1.28, "grad_norm": 0.5622739195823669, "learning_rate": 0.0003694252689429026, "loss": 3.1491, "step": 26057 }, { "epoch": 1.28, "grad_norm": 0.5217506289482117, "learning_rate": 0.00036941029059809407, "loss": 3.0809, "step": 26058 }, { "epoch": 1.28, "grad_norm": 0.5513173341751099, "learning_rate": 0.00036939531207047207, "loss": 2.9512, "step": 26059 }, { "epoch": 1.28, "grad_norm": 0.5600497722625732, "learning_rate": 0.00036938033336007624, "loss": 3.2733, "step": 26060 }, { "epoch": 1.28, "grad_norm": 0.5448271036148071, "learning_rate": 0.00036936535446694595, "loss": 2.8906, "step": 26061 }, { "epoch": 1.28, "grad_norm": 0.564911961555481, "learning_rate": 0.0003693503753911207, "loss": 2.9206, "step": 26062 }, { "epoch": 1.28, "grad_norm": 0.5715450644493103, "learning_rate": 0.0003693353961326399, "loss": 2.9661, "step": 26063 }, { "epoch": 1.28, "grad_norm": 0.5593858361244202, "learning_rate": 0.000369320416691543, "loss": 3.2893, "step": 26064 }, { "epoch": 1.28, "grad_norm": 0.5523359179496765, "learning_rate": 0.00036930543706786953, "loss": 3.0601, "step": 26065 }, { "epoch": 1.28, "grad_norm": 0.5331974029541016, "learning_rate": 0.0003692904572616588, "loss": 3.0984, "step": 26066 }, { "epoch": 1.28, "grad_norm": 0.5936278700828552, "learning_rate": 0.00036927547727295024, "loss": 2.9378, "step": 26067 }, { "epoch": 1.28, "grad_norm": 0.5834710001945496, "learning_rate": 0.0003692604971017836, "loss": 3.0566, "step": 26068 }, { "epoch": 1.28, "grad_norm": 0.6169381737709045, "learning_rate": 0.0003692455167481981, "loss": 3.1942, "step": 26069 }, { "epoch": 1.28, "grad_norm": 0.5709614753723145, "learning_rate": 0.00036923053621223316, "loss": 3.2825, "step": 26070 }, { "epoch": 1.28, "grad_norm": 0.5453691482543945, "learning_rate": 0.0003692155554939283, "loss": 3.0132, "step": 26071 }, { "epoch": 1.28, "grad_norm": 0.5543729662895203, "learning_rate": 0.00036920057459332304, "loss": 3.0603, "step": 26072 }, { "epoch": 1.28, "grad_norm": 0.572906494140625, "learning_rate": 0.0003691855935104568, "loss": 3.2762, "step": 26073 }, { "epoch": 1.28, "grad_norm": 0.5718940496444702, "learning_rate": 0.0003691706122453689, "loss": 3.1996, "step": 26074 }, { "epoch": 1.28, "grad_norm": 0.534610390663147, "learning_rate": 0.00036915563079809906, "loss": 3.0727, "step": 26075 }, { "epoch": 1.28, "grad_norm": 0.6224848628044128, "learning_rate": 0.00036914064916868664, "loss": 2.8755, "step": 26076 }, { "epoch": 1.28, "grad_norm": 0.5702244639396667, "learning_rate": 0.0003691256673571709, "loss": 3.2086, "step": 26077 }, { "epoch": 1.28, "grad_norm": 0.567895770072937, "learning_rate": 0.0003691106853635915, "loss": 3.1703, "step": 26078 }, { "epoch": 1.28, "grad_norm": 0.5513347387313843, "learning_rate": 0.00036909570318798793, "loss": 2.9534, "step": 26079 }, { "epoch": 1.28, "grad_norm": 0.5436404943466187, "learning_rate": 0.00036908072083039953, "loss": 2.9507, "step": 26080 }, { "epoch": 1.28, "grad_norm": 0.5403690338134766, "learning_rate": 0.00036906573829086585, "loss": 3.1926, "step": 26081 }, { "epoch": 1.28, "grad_norm": 0.5600476264953613, "learning_rate": 0.00036905075556942625, "loss": 3.1376, "step": 26082 }, { "epoch": 1.28, "grad_norm": 0.6057079434394836, "learning_rate": 0.0003690357726661202, "loss": 3.2067, "step": 26083 }, { "epoch": 1.28, "grad_norm": 0.575212299823761, "learning_rate": 0.0003690207895809873, "loss": 3.1779, "step": 26084 }, { "epoch": 1.28, "grad_norm": 0.5557694435119629, "learning_rate": 0.00036900580631406683, "loss": 2.9421, "step": 26085 }, { "epoch": 1.28, "grad_norm": 0.5446481108665466, "learning_rate": 0.00036899082286539846, "loss": 3.1631, "step": 26086 }, { "epoch": 1.28, "grad_norm": 0.5733548998832703, "learning_rate": 0.0003689758392350215, "loss": 3.1699, "step": 26087 }, { "epoch": 1.28, "grad_norm": 0.5587981343269348, "learning_rate": 0.00036896085542297545, "loss": 3.245, "step": 26088 }, { "epoch": 1.28, "grad_norm": 0.5554731488227844, "learning_rate": 0.00036894587142929973, "loss": 3.1064, "step": 26089 }, { "epoch": 1.28, "grad_norm": 0.583678662776947, "learning_rate": 0.0003689308872540339, "loss": 3.456, "step": 26090 }, { "epoch": 1.28, "grad_norm": 0.5885918140411377, "learning_rate": 0.0003689159028972175, "loss": 3.3753, "step": 26091 }, { "epoch": 1.28, "grad_norm": 0.5499343872070312, "learning_rate": 0.00036890091835888966, "loss": 3.1146, "step": 26092 }, { "epoch": 1.28, "grad_norm": 0.5878946781158447, "learning_rate": 0.0003688859336390902, "loss": 2.9873, "step": 26093 }, { "epoch": 1.28, "grad_norm": 0.5701496601104736, "learning_rate": 0.0003688709487378584, "loss": 3.1183, "step": 26094 }, { "epoch": 1.28, "grad_norm": 0.5646578669548035, "learning_rate": 0.00036885596365523375, "loss": 3.3045, "step": 26095 }, { "epoch": 1.28, "grad_norm": 0.5535494089126587, "learning_rate": 0.0003688409783912557, "loss": 3.0198, "step": 26096 }, { "epoch": 1.28, "grad_norm": 0.5509173274040222, "learning_rate": 0.0003688259929459639, "loss": 3.1636, "step": 26097 }, { "epoch": 1.28, "grad_norm": 0.5773724317550659, "learning_rate": 0.00036881100731939756, "loss": 3.0848, "step": 26098 }, { "epoch": 1.28, "grad_norm": 0.5771932601928711, "learning_rate": 0.00036879602151159633, "loss": 2.8706, "step": 26099 }, { "epoch": 1.28, "grad_norm": 0.5937799215316772, "learning_rate": 0.0003687810355225996, "loss": 3.1804, "step": 26100 }, { "epoch": 1.28, "grad_norm": 0.5623434782028198, "learning_rate": 0.00036876604935244696, "loss": 3.0081, "step": 26101 }, { "epoch": 1.28, "grad_norm": 0.5697646737098694, "learning_rate": 0.0003687510630011776, "loss": 3.1045, "step": 26102 }, { "epoch": 1.28, "grad_norm": 0.562432050704956, "learning_rate": 0.00036873607646883125, "loss": 3.0006, "step": 26103 }, { "epoch": 1.28, "grad_norm": 0.5757910013198853, "learning_rate": 0.0003687210897554474, "loss": 2.8835, "step": 26104 }, { "epoch": 1.28, "grad_norm": 0.5502630472183228, "learning_rate": 0.0003687061028610653, "loss": 3.1017, "step": 26105 }, { "epoch": 1.28, "grad_norm": 0.5945281386375427, "learning_rate": 0.00036869111578572463, "loss": 2.9427, "step": 26106 }, { "epoch": 1.28, "grad_norm": 0.562147855758667, "learning_rate": 0.0003686761285294647, "loss": 2.8959, "step": 26107 }, { "epoch": 1.28, "grad_norm": 0.5568060874938965, "learning_rate": 0.0003686611410923251, "loss": 3.1515, "step": 26108 }, { "epoch": 1.28, "grad_norm": 0.5851083993911743, "learning_rate": 0.0003686461534743453, "loss": 2.9902, "step": 26109 }, { "epoch": 1.28, "grad_norm": 0.5441685318946838, "learning_rate": 0.0003686311656755647, "loss": 3.1402, "step": 26110 }, { "epoch": 1.28, "grad_norm": 0.5639269351959229, "learning_rate": 0.0003686161776960228, "loss": 2.9541, "step": 26111 }, { "epoch": 1.28, "grad_norm": 0.5413896441459656, "learning_rate": 0.0003686011895357592, "loss": 3.0014, "step": 26112 }, { "epoch": 1.28, "grad_norm": 0.5627785921096802, "learning_rate": 0.0003685862011948131, "loss": 3.0785, "step": 26113 }, { "epoch": 1.28, "grad_norm": 0.6175768375396729, "learning_rate": 0.00036857121267322424, "loss": 3.2395, "step": 26114 }, { "epoch": 1.28, "grad_norm": 0.5553895235061646, "learning_rate": 0.000368556223971032, "loss": 3.1911, "step": 26115 }, { "epoch": 1.28, "grad_norm": 0.5802327394485474, "learning_rate": 0.00036854123508827595, "loss": 3.1287, "step": 26116 }, { "epoch": 1.28, "grad_norm": 0.561752200126648, "learning_rate": 0.00036852624602499534, "loss": 3.2061, "step": 26117 }, { "epoch": 1.28, "grad_norm": 0.6642087697982788, "learning_rate": 0.0003685112567812298, "loss": 3.0804, "step": 26118 }, { "epoch": 1.28, "grad_norm": 0.5541870594024658, "learning_rate": 0.0003684962673570189, "loss": 3.0745, "step": 26119 }, { "epoch": 1.28, "grad_norm": 0.5653018355369568, "learning_rate": 0.0003684812777524019, "loss": 3.1752, "step": 26120 }, { "epoch": 1.28, "grad_norm": 0.5748471021652222, "learning_rate": 0.0003684662879674185, "loss": 3.1672, "step": 26121 }, { "epoch": 1.28, "grad_norm": 0.6177536845207214, "learning_rate": 0.00036845129800210796, "loss": 3.0762, "step": 26122 }, { "epoch": 1.28, "grad_norm": 0.5813118815422058, "learning_rate": 0.00036843630785650996, "loss": 3.0871, "step": 26123 }, { "epoch": 1.28, "grad_norm": 0.5994959473609924, "learning_rate": 0.0003684213175306639, "loss": 2.9619, "step": 26124 }, { "epoch": 1.28, "grad_norm": 0.6316370368003845, "learning_rate": 0.0003684063270246092, "loss": 3.1457, "step": 26125 }, { "epoch": 1.28, "grad_norm": 0.5947257876396179, "learning_rate": 0.0003683913363383854, "loss": 3.196, "step": 26126 }, { "epoch": 1.28, "grad_norm": 0.5939607620239258, "learning_rate": 0.00036837634547203207, "loss": 3.2313, "step": 26127 }, { "epoch": 1.28, "grad_norm": 0.5786470770835876, "learning_rate": 0.0003683613544255884, "loss": 3.1698, "step": 26128 }, { "epoch": 1.28, "grad_norm": 0.5446995496749878, "learning_rate": 0.00036834636319909425, "loss": 3.4354, "step": 26129 }, { "epoch": 1.28, "grad_norm": 0.5662088394165039, "learning_rate": 0.00036833137179258897, "loss": 3.104, "step": 26130 }, { "epoch": 1.28, "grad_norm": 0.5539671182632446, "learning_rate": 0.0003683163802061119, "loss": 3.2375, "step": 26131 }, { "epoch": 1.28, "grad_norm": 0.576744019985199, "learning_rate": 0.00036830138843970274, "loss": 3.222, "step": 26132 }, { "epoch": 1.28, "grad_norm": 0.5656651258468628, "learning_rate": 0.0003682863964934007, "loss": 3.09, "step": 26133 }, { "epoch": 1.28, "grad_norm": 0.5607897043228149, "learning_rate": 0.0003682714043672456, "loss": 3.0891, "step": 26134 }, { "epoch": 1.28, "grad_norm": 0.5817605257034302, "learning_rate": 0.00036825641206127667, "loss": 3.1378, "step": 26135 }, { "epoch": 1.28, "grad_norm": 0.5782478451728821, "learning_rate": 0.0003682414195755335, "loss": 3.0419, "step": 26136 }, { "epoch": 1.28, "grad_norm": 0.575994610786438, "learning_rate": 0.0003682264269100556, "loss": 3.3339, "step": 26137 }, { "epoch": 1.28, "grad_norm": 0.6501098871231079, "learning_rate": 0.00036821143406488236, "loss": 3.114, "step": 26138 }, { "epoch": 1.28, "grad_norm": 0.5928285121917725, "learning_rate": 0.0003681964410400534, "loss": 3.1105, "step": 26139 }, { "epoch": 1.28, "grad_norm": 0.5721243619918823, "learning_rate": 0.00036818144783560804, "loss": 2.8842, "step": 26140 }, { "epoch": 1.28, "grad_norm": 0.5766890645027161, "learning_rate": 0.0003681664544515861, "loss": 2.9824, "step": 26141 }, { "epoch": 1.28, "grad_norm": 0.5364489555358887, "learning_rate": 0.00036815146088802663, "loss": 3.034, "step": 26142 }, { "epoch": 1.28, "grad_norm": 0.5811851620674133, "learning_rate": 0.0003681364671449693, "loss": 3.1483, "step": 26143 }, { "epoch": 1.28, "grad_norm": 0.6257297992706299, "learning_rate": 0.0003681214732224537, "loss": 3.1882, "step": 26144 }, { "epoch": 1.28, "grad_norm": 0.5299636721611023, "learning_rate": 0.00036810647912051924, "loss": 3.008, "step": 26145 }, { "epoch": 1.28, "grad_norm": 0.5441427826881409, "learning_rate": 0.00036809148483920543, "loss": 3.0218, "step": 26146 }, { "epoch": 1.28, "grad_norm": 0.573083758354187, "learning_rate": 0.00036807649037855173, "loss": 3.064, "step": 26147 }, { "epoch": 1.28, "grad_norm": 0.5455029010772705, "learning_rate": 0.0003680614957385977, "loss": 3.1727, "step": 26148 }, { "epoch": 1.28, "grad_norm": 0.5636841654777527, "learning_rate": 0.00036804650091938276, "loss": 3.0886, "step": 26149 }, { "epoch": 1.28, "grad_norm": 0.5653840899467468, "learning_rate": 0.0003680315059209464, "loss": 3.1209, "step": 26150 }, { "epoch": 1.28, "grad_norm": 0.5494506359100342, "learning_rate": 0.0003680165107433282, "loss": 2.749, "step": 26151 }, { "epoch": 1.28, "grad_norm": 0.6142790913581848, "learning_rate": 0.00036800151538656764, "loss": 3.0748, "step": 26152 }, { "epoch": 1.28, "grad_norm": 0.5461570620536804, "learning_rate": 0.00036798651985070415, "loss": 3.0659, "step": 26153 }, { "epoch": 1.28, "grad_norm": 0.5468064546585083, "learning_rate": 0.00036797152413577713, "loss": 2.7883, "step": 26154 }, { "epoch": 1.28, "grad_norm": 0.5630825757980347, "learning_rate": 0.0003679565282418263, "loss": 3.0646, "step": 26155 }, { "epoch": 1.28, "grad_norm": 0.5849050879478455, "learning_rate": 0.0003679415321688911, "loss": 3.1669, "step": 26156 }, { "epoch": 1.28, "grad_norm": 0.5444782972335815, "learning_rate": 0.0003679265359170109, "loss": 3.1053, "step": 26157 }, { "epoch": 1.28, "grad_norm": 0.5830182433128357, "learning_rate": 0.00036791153948622534, "loss": 2.9533, "step": 26158 }, { "epoch": 1.28, "grad_norm": 0.5592882633209229, "learning_rate": 0.00036789654287657376, "loss": 3.0239, "step": 26159 }, { "epoch": 1.28, "grad_norm": 0.5635514855384827, "learning_rate": 0.00036788154608809583, "loss": 3.0102, "step": 26160 }, { "epoch": 1.28, "grad_norm": 0.5419289469718933, "learning_rate": 0.00036786654912083095, "loss": 3.061, "step": 26161 }, { "epoch": 1.28, "grad_norm": 0.5628889799118042, "learning_rate": 0.0003678515519748186, "loss": 2.9982, "step": 26162 }, { "epoch": 1.28, "grad_norm": 0.5445288419723511, "learning_rate": 0.00036783655465009844, "loss": 3.154, "step": 26163 }, { "epoch": 1.28, "grad_norm": 0.5867927074432373, "learning_rate": 0.00036782155714670974, "loss": 2.873, "step": 26164 }, { "epoch": 1.28, "grad_norm": 0.5903654098510742, "learning_rate": 0.0003678065594646921, "loss": 3.2046, "step": 26165 }, { "epoch": 1.28, "grad_norm": 0.5572836995124817, "learning_rate": 0.00036779156160408514, "loss": 3.1028, "step": 26166 }, { "epoch": 1.28, "grad_norm": 0.5365197658538818, "learning_rate": 0.0003677765635649282, "loss": 2.9957, "step": 26167 }, { "epoch": 1.28, "grad_norm": 0.567305862903595, "learning_rate": 0.0003677615653472608, "loss": 3.0076, "step": 26168 }, { "epoch": 1.28, "grad_norm": 0.5919384956359863, "learning_rate": 0.00036774656695112246, "loss": 2.7972, "step": 26169 }, { "epoch": 1.28, "grad_norm": 0.5833246111869812, "learning_rate": 0.0003677315683765528, "loss": 2.9785, "step": 26170 }, { "epoch": 1.28, "grad_norm": 0.5576356053352356, "learning_rate": 0.00036771656962359116, "loss": 3.0201, "step": 26171 }, { "epoch": 1.28, "grad_norm": 0.5311302542686462, "learning_rate": 0.0003677015706922771, "loss": 3.1483, "step": 26172 }, { "epoch": 1.28, "grad_norm": 0.6967976689338684, "learning_rate": 0.0003676865715826502, "loss": 3.0607, "step": 26173 }, { "epoch": 1.28, "grad_norm": 0.5725787281990051, "learning_rate": 0.00036767157229474975, "loss": 3.0772, "step": 26174 }, { "epoch": 1.28, "grad_norm": 0.5482276082038879, "learning_rate": 0.0003676565728286155, "loss": 3.0737, "step": 26175 }, { "epoch": 1.28, "grad_norm": 0.5845776796340942, "learning_rate": 0.0003676415731842868, "loss": 3.0155, "step": 26176 }, { "epoch": 1.28, "grad_norm": 0.56215900182724, "learning_rate": 0.0003676265733618033, "loss": 3.1768, "step": 26177 }, { "epoch": 1.28, "grad_norm": 0.638214111328125, "learning_rate": 0.00036761157336120444, "loss": 3.0996, "step": 26178 }, { "epoch": 1.28, "grad_norm": 0.5645318627357483, "learning_rate": 0.0003675965731825296, "loss": 3.1095, "step": 26179 }, { "epoch": 1.28, "grad_norm": 0.5821544528007507, "learning_rate": 0.0003675815728258184, "loss": 3.1556, "step": 26180 }, { "epoch": 1.28, "grad_norm": 0.564754843711853, "learning_rate": 0.00036756657229111055, "loss": 3.1484, "step": 26181 }, { "epoch": 1.28, "grad_norm": 0.559941291809082, "learning_rate": 0.0003675515715784452, "loss": 3.2373, "step": 26182 }, { "epoch": 1.28, "grad_norm": 0.5572832226753235, "learning_rate": 0.00036753657068786194, "loss": 3.0946, "step": 26183 }, { "epoch": 1.28, "grad_norm": 0.5751796364784241, "learning_rate": 0.00036752156961940047, "loss": 3.1057, "step": 26184 }, { "epoch": 1.28, "grad_norm": 0.7486311793327332, "learning_rate": 0.0003675065683731001, "loss": 2.961, "step": 26185 }, { "epoch": 1.28, "grad_norm": 0.593322217464447, "learning_rate": 0.0003674915669490004, "loss": 3.2956, "step": 26186 }, { "epoch": 1.28, "grad_norm": 0.5414199233055115, "learning_rate": 0.00036747656534714094, "loss": 2.9403, "step": 26187 }, { "epoch": 1.28, "grad_norm": 0.613577127456665, "learning_rate": 0.00036746156356756124, "loss": 3.0939, "step": 26188 }, { "epoch": 1.28, "grad_norm": 0.6109552979469299, "learning_rate": 0.00036744656161030075, "loss": 3.0483, "step": 26189 }, { "epoch": 1.28, "grad_norm": 0.6177679300308228, "learning_rate": 0.00036743155947539894, "loss": 3.0387, "step": 26190 }, { "epoch": 1.28, "grad_norm": 0.557323157787323, "learning_rate": 0.0003674165571628954, "loss": 3.1379, "step": 26191 }, { "epoch": 1.28, "grad_norm": 0.6148905754089355, "learning_rate": 0.0003674015546728297, "loss": 3.0182, "step": 26192 }, { "epoch": 1.28, "grad_norm": 0.5857757329940796, "learning_rate": 0.00036738655200524125, "loss": 3.0732, "step": 26193 }, { "epoch": 1.28, "grad_norm": 0.5747817754745483, "learning_rate": 0.00036737154916016956, "loss": 2.8798, "step": 26194 }, { "epoch": 1.28, "grad_norm": 0.5815746188163757, "learning_rate": 0.00036735654613765414, "loss": 3.2067, "step": 26195 }, { "epoch": 1.28, "grad_norm": 0.5818893313407898, "learning_rate": 0.0003673415429377345, "loss": 3.1208, "step": 26196 }, { "epoch": 1.28, "grad_norm": 0.5754139423370361, "learning_rate": 0.0003673265395604503, "loss": 3.2264, "step": 26197 }, { "epoch": 1.28, "grad_norm": 0.5612207055091858, "learning_rate": 0.00036731153600584094, "loss": 3.0593, "step": 26198 }, { "epoch": 1.28, "grad_norm": 0.5592952966690063, "learning_rate": 0.0003672965322739459, "loss": 3.3381, "step": 26199 }, { "epoch": 1.28, "grad_norm": 0.5539454817771912, "learning_rate": 0.0003672815283648048, "loss": 3.2051, "step": 26200 }, { "epoch": 1.28, "grad_norm": 0.5505897998809814, "learning_rate": 0.00036726652427845706, "loss": 3.0047, "step": 26201 }, { "epoch": 1.28, "grad_norm": 0.6153740286827087, "learning_rate": 0.00036725152001494226, "loss": 3.0584, "step": 26202 }, { "epoch": 1.28, "grad_norm": 0.5606221556663513, "learning_rate": 0.0003672365155742999, "loss": 3.0653, "step": 26203 }, { "epoch": 1.28, "grad_norm": 0.5304719805717468, "learning_rate": 0.0003672215109565696, "loss": 3.1395, "step": 26204 }, { "epoch": 1.28, "grad_norm": 0.5453681349754333, "learning_rate": 0.00036720650616179056, "loss": 3.0008, "step": 26205 }, { "epoch": 1.28, "grad_norm": 0.5311553478240967, "learning_rate": 0.00036719150119000264, "loss": 3.022, "step": 26206 }, { "epoch": 1.28, "grad_norm": 0.5572003126144409, "learning_rate": 0.0003671764960412453, "loss": 3.121, "step": 26207 }, { "epoch": 1.28, "grad_norm": 0.6088648438453674, "learning_rate": 0.0003671614907155579, "loss": 3.1107, "step": 26208 }, { "epoch": 1.28, "grad_norm": 0.6267878413200378, "learning_rate": 0.0003671464852129801, "loss": 3.201, "step": 26209 }, { "epoch": 1.28, "grad_norm": 0.5654227137565613, "learning_rate": 0.0003671314795335513, "loss": 3.1772, "step": 26210 }, { "epoch": 1.28, "grad_norm": 0.5888288617134094, "learning_rate": 0.0003671164736773112, "loss": 2.9714, "step": 26211 }, { "epoch": 1.28, "grad_norm": 0.542107343673706, "learning_rate": 0.00036710146764429915, "loss": 3.125, "step": 26212 }, { "epoch": 1.28, "grad_norm": 0.5171507596969604, "learning_rate": 0.00036708646143455477, "loss": 3.0741, "step": 26213 }, { "epoch": 1.28, "grad_norm": 0.5658581852912903, "learning_rate": 0.00036707145504811763, "loss": 3.1359, "step": 26214 }, { "epoch": 1.28, "grad_norm": 0.583036482334137, "learning_rate": 0.00036705644848502716, "loss": 3.0781, "step": 26215 }, { "epoch": 1.28, "grad_norm": 0.553354024887085, "learning_rate": 0.00036704144174532286, "loss": 3.1118, "step": 26216 }, { "epoch": 1.28, "grad_norm": 0.5677482485771179, "learning_rate": 0.0003670264348290443, "loss": 3.0024, "step": 26217 }, { "epoch": 1.28, "grad_norm": 0.6651977896690369, "learning_rate": 0.0003670114277362311, "loss": 2.9855, "step": 26218 }, { "epoch": 1.28, "grad_norm": 0.6104764342308044, "learning_rate": 0.00036699642046692264, "loss": 2.775, "step": 26219 }, { "epoch": 1.28, "grad_norm": 0.5855001211166382, "learning_rate": 0.0003669814130211584, "loss": 2.8599, "step": 26220 }, { "epoch": 1.29, "grad_norm": 0.5708563923835754, "learning_rate": 0.0003669664053989782, "loss": 3.1819, "step": 26221 }, { "epoch": 1.29, "grad_norm": 0.5730376243591309, "learning_rate": 0.00036695139760042125, "loss": 3.2359, "step": 26222 }, { "epoch": 1.29, "grad_norm": 0.5522173643112183, "learning_rate": 0.0003669363896255272, "loss": 2.9391, "step": 26223 }, { "epoch": 1.29, "grad_norm": 0.6628093123435974, "learning_rate": 0.00036692138147433566, "loss": 2.9406, "step": 26224 }, { "epoch": 1.29, "grad_norm": 0.5647956132888794, "learning_rate": 0.00036690637314688607, "loss": 3.0561, "step": 26225 }, { "epoch": 1.29, "grad_norm": 0.541031002998352, "learning_rate": 0.0003668913646432179, "loss": 3.0519, "step": 26226 }, { "epoch": 1.29, "grad_norm": 0.5448402166366577, "learning_rate": 0.00036687635596337084, "loss": 3.3572, "step": 26227 }, { "epoch": 1.29, "grad_norm": 0.5486478209495544, "learning_rate": 0.00036686134710738424, "loss": 2.9945, "step": 26228 }, { "epoch": 1.29, "grad_norm": 0.5686172842979431, "learning_rate": 0.0003668463380752978, "loss": 2.9606, "step": 26229 }, { "epoch": 1.29, "grad_norm": 0.54524165391922, "learning_rate": 0.00036683132886715093, "loss": 2.9665, "step": 26230 }, { "epoch": 1.29, "grad_norm": 0.5635949969291687, "learning_rate": 0.0003668163194829831, "loss": 2.8544, "step": 26231 }, { "epoch": 1.29, "grad_norm": 0.6484821438789368, "learning_rate": 0.00036680130992283413, "loss": 3.0592, "step": 26232 }, { "epoch": 1.29, "grad_norm": 0.5802894830703735, "learning_rate": 0.00036678630018674327, "loss": 3.1788, "step": 26233 }, { "epoch": 1.29, "grad_norm": 0.5594797730445862, "learning_rate": 0.0003667712902747501, "loss": 3.0831, "step": 26234 }, { "epoch": 1.29, "grad_norm": 0.5902174115180969, "learning_rate": 0.0003667562801868943, "loss": 3.0271, "step": 26235 }, { "epoch": 1.29, "grad_norm": 0.5711631178855896, "learning_rate": 0.00036674126992321525, "loss": 2.6302, "step": 26236 }, { "epoch": 1.29, "grad_norm": 0.527863621711731, "learning_rate": 0.0003667262594837525, "loss": 3.146, "step": 26237 }, { "epoch": 1.29, "grad_norm": 0.5474887490272522, "learning_rate": 0.0003667112488685457, "loss": 3.1675, "step": 26238 }, { "epoch": 1.29, "grad_norm": 0.570087730884552, "learning_rate": 0.00036669623807763434, "loss": 3.0728, "step": 26239 }, { "epoch": 1.29, "grad_norm": 0.582175612449646, "learning_rate": 0.00036668122711105793, "loss": 3.0308, "step": 26240 }, { "epoch": 1.29, "grad_norm": 0.5625713467597961, "learning_rate": 0.00036666621596885584, "loss": 3.1616, "step": 26241 }, { "epoch": 1.29, "grad_norm": 0.5727894306182861, "learning_rate": 0.00036665120465106786, "loss": 3.1067, "step": 26242 }, { "epoch": 1.29, "grad_norm": 0.5584968328475952, "learning_rate": 0.00036663619315773356, "loss": 2.8448, "step": 26243 }, { "epoch": 1.29, "grad_norm": 0.5649468898773193, "learning_rate": 0.0003666211814888922, "loss": 3.0906, "step": 26244 }, { "epoch": 1.29, "grad_norm": 0.5201927423477173, "learning_rate": 0.0003666061696445835, "loss": 3.1189, "step": 26245 }, { "epoch": 1.29, "grad_norm": 0.5626629590988159, "learning_rate": 0.0003665911576248469, "loss": 3.1336, "step": 26246 }, { "epoch": 1.29, "grad_norm": 0.580909013748169, "learning_rate": 0.00036657614542972216, "loss": 2.9194, "step": 26247 }, { "epoch": 1.29, "grad_norm": 0.5470664501190186, "learning_rate": 0.00036656113305924857, "loss": 2.9906, "step": 26248 }, { "epoch": 1.29, "grad_norm": 0.5362159013748169, "learning_rate": 0.0003665461205134657, "loss": 3.0446, "step": 26249 }, { "epoch": 1.29, "grad_norm": 0.5313224792480469, "learning_rate": 0.0003665311077924133, "loss": 3.1602, "step": 26250 }, { "epoch": 1.29, "grad_norm": 0.5694699287414551, "learning_rate": 0.0003665160948961307, "loss": 2.9436, "step": 26251 }, { "epoch": 1.29, "grad_norm": 0.5497816205024719, "learning_rate": 0.0003665010818246575, "loss": 3.0351, "step": 26252 }, { "epoch": 1.29, "grad_norm": 0.5596659779548645, "learning_rate": 0.00036648606857803324, "loss": 3.0668, "step": 26253 }, { "epoch": 1.29, "grad_norm": 0.6068937182426453, "learning_rate": 0.0003664710551562976, "loss": 3.0934, "step": 26254 }, { "epoch": 1.29, "grad_norm": 0.5531221032142639, "learning_rate": 0.0003664560415594899, "loss": 2.95, "step": 26255 }, { "epoch": 1.29, "grad_norm": 0.5915762782096863, "learning_rate": 0.00036644102778764963, "loss": 2.8952, "step": 26256 }, { "epoch": 1.29, "grad_norm": 0.5725966095924377, "learning_rate": 0.0003664260138408167, "loss": 3.1125, "step": 26257 }, { "epoch": 1.29, "grad_norm": 0.5639798045158386, "learning_rate": 0.00036641099971903033, "loss": 3.226, "step": 26258 }, { "epoch": 1.29, "grad_norm": 0.5750287771224976, "learning_rate": 0.0003663959854223302, "loss": 3.0015, "step": 26259 }, { "epoch": 1.29, "grad_norm": 0.5510179400444031, "learning_rate": 0.0003663809709507558, "loss": 2.9119, "step": 26260 }, { "epoch": 1.29, "grad_norm": 0.6045522093772888, "learning_rate": 0.00036636595630434664, "loss": 3.2426, "step": 26261 }, { "epoch": 1.29, "grad_norm": 0.5941025018692017, "learning_rate": 0.0003663509414831424, "loss": 3.1572, "step": 26262 }, { "epoch": 1.29, "grad_norm": 0.5708783268928528, "learning_rate": 0.0003663359264871825, "loss": 3.0711, "step": 26263 }, { "epoch": 1.29, "grad_norm": 0.5933129191398621, "learning_rate": 0.00036632091131650664, "loss": 2.9989, "step": 26264 }, { "epoch": 1.29, "grad_norm": 0.5684006214141846, "learning_rate": 0.00036630589597115424, "loss": 3.0895, "step": 26265 }, { "epoch": 1.29, "grad_norm": 0.555566132068634, "learning_rate": 0.00036629088045116485, "loss": 3.0108, "step": 26266 }, { "epoch": 1.29, "grad_norm": 0.5590725541114807, "learning_rate": 0.0003662758647565779, "loss": 3.1664, "step": 26267 }, { "epoch": 1.29, "grad_norm": 0.5853378176689148, "learning_rate": 0.00036626084888743325, "loss": 3.2277, "step": 26268 }, { "epoch": 1.29, "grad_norm": 0.5699875354766846, "learning_rate": 0.00036624583284377025, "loss": 2.9595, "step": 26269 }, { "epoch": 1.29, "grad_norm": 0.5524995923042297, "learning_rate": 0.0003662308166256284, "loss": 3.1834, "step": 26270 }, { "epoch": 1.29, "grad_norm": 0.5816988945007324, "learning_rate": 0.0003662158002330474, "loss": 3.0219, "step": 26271 }, { "epoch": 1.29, "grad_norm": 0.5661150813102722, "learning_rate": 0.0003662007836660666, "loss": 2.9393, "step": 26272 }, { "epoch": 1.29, "grad_norm": 0.5578607320785522, "learning_rate": 0.0003661857669247258, "loss": 2.9993, "step": 26273 }, { "epoch": 1.29, "grad_norm": 0.5460517406463623, "learning_rate": 0.0003661707500090644, "loss": 2.9927, "step": 26274 }, { "epoch": 1.29, "grad_norm": 0.5846410393714905, "learning_rate": 0.000366155732919122, "loss": 3.0969, "step": 26275 }, { "epoch": 1.29, "grad_norm": 0.5628123879432678, "learning_rate": 0.0003661407156549382, "loss": 3.1571, "step": 26276 }, { "epoch": 1.29, "grad_norm": 0.5363184809684753, "learning_rate": 0.0003661256982165523, "loss": 2.9979, "step": 26277 }, { "epoch": 1.29, "grad_norm": 0.5130451321601868, "learning_rate": 0.0003661106806040041, "loss": 3.1275, "step": 26278 }, { "epoch": 1.29, "grad_norm": 0.5658857226371765, "learning_rate": 0.0003660956628173331, "loss": 3.0147, "step": 26279 }, { "epoch": 1.29, "grad_norm": 0.56007319688797, "learning_rate": 0.00036608064485657896, "loss": 3.1505, "step": 26280 }, { "epoch": 1.29, "grad_norm": 0.6172868013381958, "learning_rate": 0.00036606562672178097, "loss": 3.1135, "step": 26281 }, { "epoch": 1.29, "grad_norm": 0.5785728693008423, "learning_rate": 0.0003660506084129789, "loss": 3.201, "step": 26282 }, { "epoch": 1.29, "grad_norm": 0.60005784034729, "learning_rate": 0.0003660355899302123, "loss": 3.0202, "step": 26283 }, { "epoch": 1.29, "grad_norm": 0.5991455912590027, "learning_rate": 0.0003660205712735205, "loss": 3.0963, "step": 26284 }, { "epoch": 1.29, "grad_norm": 0.6216191649436951, "learning_rate": 0.0003660055524429433, "loss": 3.1045, "step": 26285 }, { "epoch": 1.29, "grad_norm": 0.5496450662612915, "learning_rate": 0.00036599053343852026, "loss": 2.9522, "step": 26286 }, { "epoch": 1.29, "grad_norm": 0.5841752886772156, "learning_rate": 0.00036597551426029076, "loss": 3.2331, "step": 26287 }, { "epoch": 1.29, "grad_norm": 0.5961534380912781, "learning_rate": 0.00036596049490829444, "loss": 3.0853, "step": 26288 }, { "epoch": 1.29, "grad_norm": 0.5837684273719788, "learning_rate": 0.0003659454753825709, "loss": 2.9664, "step": 26289 }, { "epoch": 1.29, "grad_norm": 0.5820373892784119, "learning_rate": 0.0003659304556831597, "loss": 2.9441, "step": 26290 }, { "epoch": 1.29, "grad_norm": 0.5966834425926208, "learning_rate": 0.00036591543581010043, "loss": 2.9539, "step": 26291 }, { "epoch": 1.29, "grad_norm": 0.5738745331764221, "learning_rate": 0.00036590041576343237, "loss": 3.2159, "step": 26292 }, { "epoch": 1.29, "grad_norm": 0.5458084344863892, "learning_rate": 0.00036588539554319546, "loss": 3.111, "step": 26293 }, { "epoch": 1.29, "grad_norm": 0.5425732731819153, "learning_rate": 0.00036587037514942917, "loss": 3.3392, "step": 26294 }, { "epoch": 1.29, "grad_norm": 0.6192220449447632, "learning_rate": 0.0003658553545821728, "loss": 3.0393, "step": 26295 }, { "epoch": 1.29, "grad_norm": 0.5837422013282776, "learning_rate": 0.0003658403338414662, "loss": 3.2242, "step": 26296 }, { "epoch": 1.29, "grad_norm": 0.5665894746780396, "learning_rate": 0.00036582531292734887, "loss": 3.3003, "step": 26297 }, { "epoch": 1.29, "grad_norm": 0.5463784337043762, "learning_rate": 0.00036581029183986027, "loss": 3.0505, "step": 26298 }, { "epoch": 1.29, "grad_norm": 0.5693387389183044, "learning_rate": 0.00036579527057904005, "loss": 3.14, "step": 26299 }, { "epoch": 1.29, "grad_norm": 0.5675256848335266, "learning_rate": 0.00036578024914492774, "loss": 3.2636, "step": 26300 }, { "epoch": 1.29, "grad_norm": 0.5448147654533386, "learning_rate": 0.0003657652275375629, "loss": 3.1507, "step": 26301 }, { "epoch": 1.29, "grad_norm": 0.5830021500587463, "learning_rate": 0.00036575020575698515, "loss": 3.1082, "step": 26302 }, { "epoch": 1.29, "grad_norm": 0.5744504332542419, "learning_rate": 0.0003657351838032339, "loss": 3.057, "step": 26303 }, { "epoch": 1.29, "grad_norm": 0.5693845152854919, "learning_rate": 0.00036572016167634895, "loss": 2.9967, "step": 26304 }, { "epoch": 1.29, "grad_norm": 0.5699017643928528, "learning_rate": 0.00036570513937636974, "loss": 3.131, "step": 26305 }, { "epoch": 1.29, "grad_norm": 0.6183264851570129, "learning_rate": 0.0003656901169033358, "loss": 3.0087, "step": 26306 }, { "epoch": 1.29, "grad_norm": 0.5810492634773254, "learning_rate": 0.00036567509425728675, "loss": 3.1105, "step": 26307 }, { "epoch": 1.29, "grad_norm": 0.5565503239631653, "learning_rate": 0.0003656600714382621, "loss": 2.939, "step": 26308 }, { "epoch": 1.29, "grad_norm": 0.5506647825241089, "learning_rate": 0.00036564504844630155, "loss": 3.0549, "step": 26309 }, { "epoch": 1.29, "grad_norm": 0.53928142786026, "learning_rate": 0.00036563002528144445, "loss": 3.0072, "step": 26310 }, { "epoch": 1.29, "grad_norm": 0.5917065143585205, "learning_rate": 0.00036561500194373057, "loss": 3.0097, "step": 26311 }, { "epoch": 1.29, "grad_norm": 0.584202766418457, "learning_rate": 0.0003655999784331994, "loss": 3.2125, "step": 26312 }, { "epoch": 1.29, "grad_norm": 0.5942773222923279, "learning_rate": 0.00036558495474989044, "loss": 2.9767, "step": 26313 }, { "epoch": 1.29, "grad_norm": 0.5491356253623962, "learning_rate": 0.0003655699308938434, "loss": 3.1626, "step": 26314 }, { "epoch": 1.29, "grad_norm": 0.5383910536766052, "learning_rate": 0.0003655549068650978, "loss": 3.2015, "step": 26315 }, { "epoch": 1.29, "grad_norm": 0.5347186326980591, "learning_rate": 0.00036553988266369323, "loss": 3.1515, "step": 26316 }, { "epoch": 1.29, "grad_norm": 0.5822970867156982, "learning_rate": 0.0003655248582896692, "loss": 3.1044, "step": 26317 }, { "epoch": 1.29, "grad_norm": 0.5540775060653687, "learning_rate": 0.0003655098337430651, "loss": 3.1069, "step": 26318 }, { "epoch": 1.29, "grad_norm": 0.5978838205337524, "learning_rate": 0.00036549480902392094, "loss": 2.8344, "step": 26319 }, { "epoch": 1.29, "grad_norm": 0.546452522277832, "learning_rate": 0.00036547978413227597, "loss": 2.8552, "step": 26320 }, { "epoch": 1.29, "grad_norm": 0.5403779149055481, "learning_rate": 0.0003654647590681699, "loss": 3.1634, "step": 26321 }, { "epoch": 1.29, "grad_norm": 0.5638641119003296, "learning_rate": 0.0003654497338316422, "loss": 3.0284, "step": 26322 }, { "epoch": 1.29, "grad_norm": 0.5681201219558716, "learning_rate": 0.0003654347084227325, "loss": 3.1705, "step": 26323 }, { "epoch": 1.29, "grad_norm": 0.5366926193237305, "learning_rate": 0.0003654196828414804, "loss": 3.0462, "step": 26324 }, { "epoch": 1.29, "grad_norm": 0.5784863829612732, "learning_rate": 0.0003654046570879254, "loss": 3.0464, "step": 26325 }, { "epoch": 1.29, "grad_norm": 0.5681374073028564, "learning_rate": 0.0003653896311621072, "loss": 2.7868, "step": 26326 }, { "epoch": 1.29, "grad_norm": 0.5848317742347717, "learning_rate": 0.00036537460506406533, "loss": 3.2213, "step": 26327 }, { "epoch": 1.29, "grad_norm": 0.5783429741859436, "learning_rate": 0.00036535957879383913, "loss": 2.9188, "step": 26328 }, { "epoch": 1.29, "grad_norm": 0.5719509124755859, "learning_rate": 0.00036534455235146847, "loss": 3.0323, "step": 26329 }, { "epoch": 1.29, "grad_norm": 0.5757635235786438, "learning_rate": 0.00036532952573699296, "loss": 3.2186, "step": 26330 }, { "epoch": 1.29, "grad_norm": 0.56982421875, "learning_rate": 0.00036531449895045197, "loss": 2.9804, "step": 26331 }, { "epoch": 1.29, "grad_norm": 0.6023058295249939, "learning_rate": 0.00036529947199188516, "loss": 2.9169, "step": 26332 }, { "epoch": 1.29, "grad_norm": 0.5599899888038635, "learning_rate": 0.00036528444486133197, "loss": 3.1561, "step": 26333 }, { "epoch": 1.29, "grad_norm": 0.5620229840278625, "learning_rate": 0.00036526941755883233, "loss": 2.9379, "step": 26334 }, { "epoch": 1.29, "grad_norm": 0.6028508543968201, "learning_rate": 0.0003652543900844255, "loss": 3.1183, "step": 26335 }, { "epoch": 1.29, "grad_norm": 0.5630659461021423, "learning_rate": 0.0003652393624381512, "loss": 3.0555, "step": 26336 }, { "epoch": 1.29, "grad_norm": 0.5554866790771484, "learning_rate": 0.00036522433462004894, "loss": 3.0993, "step": 26337 }, { "epoch": 1.29, "grad_norm": 0.595605194568634, "learning_rate": 0.0003652093066301583, "loss": 2.8999, "step": 26338 }, { "epoch": 1.29, "grad_norm": 0.5355490446090698, "learning_rate": 0.00036519427846851897, "loss": 2.9579, "step": 26339 }, { "epoch": 1.29, "grad_norm": 0.5914220213890076, "learning_rate": 0.0003651792501351704, "loss": 2.9879, "step": 26340 }, { "epoch": 1.29, "grad_norm": 0.5520111322402954, "learning_rate": 0.0003651642216301523, "loss": 2.8358, "step": 26341 }, { "epoch": 1.29, "grad_norm": 0.5735923051834106, "learning_rate": 0.00036514919295350414, "loss": 3.1905, "step": 26342 }, { "epoch": 1.29, "grad_norm": 0.583451509475708, "learning_rate": 0.0003651341641052656, "loss": 3.2077, "step": 26343 }, { "epoch": 1.29, "grad_norm": 0.5587623715400696, "learning_rate": 0.0003651191350854761, "loss": 3.1237, "step": 26344 }, { "epoch": 1.29, "grad_norm": 0.5999316573143005, "learning_rate": 0.0003651041058941755, "loss": 2.9136, "step": 26345 }, { "epoch": 1.29, "grad_norm": 0.5469268560409546, "learning_rate": 0.000365089076531403, "loss": 3.1107, "step": 26346 }, { "epoch": 1.29, "grad_norm": 0.5756465792655945, "learning_rate": 0.0003650740469971985, "loss": 3.0659, "step": 26347 }, { "epoch": 1.29, "grad_norm": 0.5584861040115356, "learning_rate": 0.00036505901729160146, "loss": 3.095, "step": 26348 }, { "epoch": 1.29, "grad_norm": 0.5867658853530884, "learning_rate": 0.0003650439874146515, "loss": 2.9141, "step": 26349 }, { "epoch": 1.29, "grad_norm": 0.5676002502441406, "learning_rate": 0.0003650289573663882, "loss": 3.2514, "step": 26350 }, { "epoch": 1.29, "grad_norm": 0.5888804793357849, "learning_rate": 0.0003650139271468512, "loss": 3.1704, "step": 26351 }, { "epoch": 1.29, "grad_norm": 0.5709172487258911, "learning_rate": 0.00036499889675608, "loss": 3.2181, "step": 26352 }, { "epoch": 1.29, "grad_norm": 0.560498058795929, "learning_rate": 0.0003649838661941142, "loss": 3.1361, "step": 26353 }, { "epoch": 1.29, "grad_norm": 0.5983741283416748, "learning_rate": 0.0003649688354609934, "loss": 2.8439, "step": 26354 }, { "epoch": 1.29, "grad_norm": 0.5880367159843445, "learning_rate": 0.0003649538045567571, "loss": 2.862, "step": 26355 }, { "epoch": 1.29, "grad_norm": 0.5435540080070496, "learning_rate": 0.00036493877348144516, "loss": 3.0035, "step": 26356 }, { "epoch": 1.29, "grad_norm": 0.5692511796951294, "learning_rate": 0.00036492374223509686, "loss": 3.3313, "step": 26357 }, { "epoch": 1.29, "grad_norm": 0.5610879063606262, "learning_rate": 0.000364908710817752, "loss": 3.1846, "step": 26358 }, { "epoch": 1.29, "grad_norm": 0.5607772469520569, "learning_rate": 0.00036489367922945, "loss": 3.1864, "step": 26359 }, { "epoch": 1.29, "grad_norm": 0.5416234731674194, "learning_rate": 0.00036487864747023055, "loss": 3.1087, "step": 26360 }, { "epoch": 1.29, "grad_norm": 0.5746535062789917, "learning_rate": 0.0003648636155401333, "loss": 2.9298, "step": 26361 }, { "epoch": 1.29, "grad_norm": 0.5644704699516296, "learning_rate": 0.0003648485834391977, "loss": 3.1022, "step": 26362 }, { "epoch": 1.29, "grad_norm": 0.5640776753425598, "learning_rate": 0.00036483355116746346, "loss": 3.1836, "step": 26363 }, { "epoch": 1.29, "grad_norm": 0.6316478848457336, "learning_rate": 0.00036481851872497003, "loss": 2.929, "step": 26364 }, { "epoch": 1.29, "grad_norm": 0.5855584740638733, "learning_rate": 0.0003648034861117572, "loss": 2.9929, "step": 26365 }, { "epoch": 1.29, "grad_norm": 0.5345319509506226, "learning_rate": 0.00036478845332786446, "loss": 3.098, "step": 26366 }, { "epoch": 1.29, "grad_norm": 0.6064936518669128, "learning_rate": 0.0003647734203733314, "loss": 3.1827, "step": 26367 }, { "epoch": 1.29, "grad_norm": 0.5909245610237122, "learning_rate": 0.0003647583872481976, "loss": 2.9642, "step": 26368 }, { "epoch": 1.29, "grad_norm": 0.5904161334037781, "learning_rate": 0.00036474335395250256, "loss": 2.8956, "step": 26369 }, { "epoch": 1.29, "grad_norm": 0.6184599995613098, "learning_rate": 0.00036472832048628615, "loss": 3.0058, "step": 26370 }, { "epoch": 1.29, "grad_norm": 0.5821686387062073, "learning_rate": 0.0003647132868495877, "loss": 3.1638, "step": 26371 }, { "epoch": 1.29, "grad_norm": 0.5509762167930603, "learning_rate": 0.00036469825304244696, "loss": 3.0594, "step": 26372 }, { "epoch": 1.29, "grad_norm": 0.5704058408737183, "learning_rate": 0.00036468321906490346, "loss": 3.077, "step": 26373 }, { "epoch": 1.29, "grad_norm": 0.5785650014877319, "learning_rate": 0.0003646681849169968, "loss": 3.288, "step": 26374 }, { "epoch": 1.29, "grad_norm": 0.5538581609725952, "learning_rate": 0.0003646531505987666, "loss": 3.2065, "step": 26375 }, { "epoch": 1.29, "grad_norm": 0.5718187093734741, "learning_rate": 0.00036463811611025246, "loss": 3.0738, "step": 26376 }, { "epoch": 1.29, "grad_norm": 0.5524046421051025, "learning_rate": 0.0003646230814514939, "loss": 2.9332, "step": 26377 }, { "epoch": 1.29, "grad_norm": 0.528167188167572, "learning_rate": 0.0003646080466225307, "loss": 3.0779, "step": 26378 }, { "epoch": 1.29, "grad_norm": 0.5965219736099243, "learning_rate": 0.00036459301162340224, "loss": 3.0625, "step": 26379 }, { "epoch": 1.29, "grad_norm": 0.5643555521965027, "learning_rate": 0.00036457797645414816, "loss": 3.1285, "step": 26380 }, { "epoch": 1.29, "grad_norm": 0.5772526264190674, "learning_rate": 0.00036456294111480823, "loss": 2.9713, "step": 26381 }, { "epoch": 1.29, "grad_norm": 0.5830170512199402, "learning_rate": 0.0003645479056054219, "loss": 3.3178, "step": 26382 }, { "epoch": 1.29, "grad_norm": 0.5554019808769226, "learning_rate": 0.0003645328699260288, "loss": 3.0084, "step": 26383 }, { "epoch": 1.29, "grad_norm": 0.6931495070457458, "learning_rate": 0.0003645178340766686, "loss": 3.0225, "step": 26384 }, { "epoch": 1.29, "grad_norm": 0.5578175187110901, "learning_rate": 0.0003645027980573807, "loss": 2.7523, "step": 26385 }, { "epoch": 1.29, "grad_norm": 0.5534636378288269, "learning_rate": 0.00036448776186820495, "loss": 3.299, "step": 26386 }, { "epoch": 1.29, "grad_norm": 0.5296128392219543, "learning_rate": 0.00036447272550918085, "loss": 3.2003, "step": 26387 }, { "epoch": 1.29, "grad_norm": 0.550498902797699, "learning_rate": 0.00036445768898034803, "loss": 2.945, "step": 26388 }, { "epoch": 1.29, "grad_norm": 0.5512216687202454, "learning_rate": 0.000364442652281746, "loss": 3.1243, "step": 26389 }, { "epoch": 1.29, "grad_norm": 0.552977979183197, "learning_rate": 0.0003644276154134144, "loss": 3.0263, "step": 26390 }, { "epoch": 1.29, "grad_norm": 0.5711373090744019, "learning_rate": 0.00036441257837539293, "loss": 3.0337, "step": 26391 }, { "epoch": 1.29, "grad_norm": 0.6151379942893982, "learning_rate": 0.00036439754116772107, "loss": 3.0109, "step": 26392 }, { "epoch": 1.29, "grad_norm": 0.5529595017433167, "learning_rate": 0.0003643825037904385, "loss": 3.1506, "step": 26393 }, { "epoch": 1.29, "grad_norm": 0.5650982856750488, "learning_rate": 0.00036436746624358485, "loss": 3.193, "step": 26394 }, { "epoch": 1.29, "grad_norm": 0.5503296852111816, "learning_rate": 0.0003643524285271996, "loss": 3.1614, "step": 26395 }, { "epoch": 1.29, "grad_norm": 0.5685902237892151, "learning_rate": 0.00036433739064132257, "loss": 3.0464, "step": 26396 }, { "epoch": 1.29, "grad_norm": 0.5520438551902771, "learning_rate": 0.0003643223525859931, "loss": 3.0988, "step": 26397 }, { "epoch": 1.29, "grad_norm": 0.5292802453041077, "learning_rate": 0.00036430731436125097, "loss": 3.1851, "step": 26398 }, { "epoch": 1.29, "grad_norm": 0.5346623659133911, "learning_rate": 0.00036429227596713575, "loss": 3.0767, "step": 26399 }, { "epoch": 1.29, "grad_norm": 0.5210029482841492, "learning_rate": 0.000364277237403687, "loss": 3.0894, "step": 26400 }, { "epoch": 1.29, "grad_norm": 0.5822991728782654, "learning_rate": 0.00036426219867094446, "loss": 3.1194, "step": 26401 }, { "epoch": 1.29, "grad_norm": 0.5511487126350403, "learning_rate": 0.0003642471597689477, "loss": 2.9108, "step": 26402 }, { "epoch": 1.29, "grad_norm": 0.575802743434906, "learning_rate": 0.0003642321206977362, "loss": 3.0977, "step": 26403 }, { "epoch": 1.29, "grad_norm": 0.5361688137054443, "learning_rate": 0.00036421708145734967, "loss": 3.2185, "step": 26404 }, { "epoch": 1.29, "grad_norm": 0.5705873966217041, "learning_rate": 0.00036420204204782767, "loss": 3.0483, "step": 26405 }, { "epoch": 1.29, "grad_norm": 0.5684397220611572, "learning_rate": 0.00036418700246920987, "loss": 2.9966, "step": 26406 }, { "epoch": 1.29, "grad_norm": 0.6203626394271851, "learning_rate": 0.00036417196272153595, "loss": 3.0679, "step": 26407 }, { "epoch": 1.29, "grad_norm": 0.55103999376297, "learning_rate": 0.0003641569228048453, "loss": 3.128, "step": 26408 }, { "epoch": 1.29, "grad_norm": 0.5689854621887207, "learning_rate": 0.0003641418827191777, "loss": 2.9817, "step": 26409 }, { "epoch": 1.29, "grad_norm": 0.5948728322982788, "learning_rate": 0.00036412684246457276, "loss": 3.2106, "step": 26410 }, { "epoch": 1.29, "grad_norm": 0.5416500568389893, "learning_rate": 0.0003641118020410701, "loss": 2.9918, "step": 26411 }, { "epoch": 1.29, "grad_norm": 0.5819432139396667, "learning_rate": 0.00036409676144870916, "loss": 2.9693, "step": 26412 }, { "epoch": 1.29, "grad_norm": 0.5323217511177063, "learning_rate": 0.00036408172068752977, "loss": 3.0868, "step": 26413 }, { "epoch": 1.29, "grad_norm": 0.5703701972961426, "learning_rate": 0.00036406667975757143, "loss": 3.0828, "step": 26414 }, { "epoch": 1.29, "grad_norm": 0.5979413390159607, "learning_rate": 0.00036405163865887383, "loss": 2.9349, "step": 26415 }, { "epoch": 1.29, "grad_norm": 0.6079196929931641, "learning_rate": 0.0003640365973914765, "loss": 3.2151, "step": 26416 }, { "epoch": 1.29, "grad_norm": 0.5648472905158997, "learning_rate": 0.0003640215559554191, "loss": 3.2673, "step": 26417 }, { "epoch": 1.29, "grad_norm": 0.576112687587738, "learning_rate": 0.00036400651435074136, "loss": 3.2403, "step": 26418 }, { "epoch": 1.29, "grad_norm": 0.6245038509368896, "learning_rate": 0.0003639914725774826, "loss": 3.04, "step": 26419 }, { "epoch": 1.29, "grad_norm": 0.5649415254592896, "learning_rate": 0.0003639764306356827, "loss": 2.7593, "step": 26420 }, { "epoch": 1.29, "grad_norm": 0.6045398116111755, "learning_rate": 0.0003639613885253812, "loss": 2.9559, "step": 26421 }, { "epoch": 1.29, "grad_norm": 0.5652473568916321, "learning_rate": 0.00036394634624661767, "loss": 2.9299, "step": 26422 }, { "epoch": 1.29, "grad_norm": 0.5572347640991211, "learning_rate": 0.0003639313037994318, "loss": 3.1491, "step": 26423 }, { "epoch": 1.29, "grad_norm": 0.7249118685722351, "learning_rate": 0.00036391626118386316, "loss": 3.1411, "step": 26424 }, { "epoch": 1.3, "grad_norm": 0.581577718257904, "learning_rate": 0.00036390121839995147, "loss": 3.0565, "step": 26425 }, { "epoch": 1.3, "grad_norm": 0.5638243556022644, "learning_rate": 0.00036388617544773615, "loss": 3.0026, "step": 26426 }, { "epoch": 1.3, "grad_norm": 0.585462749004364, "learning_rate": 0.000363871132327257, "loss": 3.1258, "step": 26427 }, { "epoch": 1.3, "grad_norm": 0.6039126515388489, "learning_rate": 0.00036385608903855357, "loss": 3.2572, "step": 26428 }, { "epoch": 1.3, "grad_norm": 0.5659081339836121, "learning_rate": 0.0003638410455816655, "loss": 3.0501, "step": 26429 }, { "epoch": 1.3, "grad_norm": 0.610337495803833, "learning_rate": 0.00036382600195663243, "loss": 2.9333, "step": 26430 }, { "epoch": 1.3, "grad_norm": 0.5635059475898743, "learning_rate": 0.00036381095816349386, "loss": 3.2617, "step": 26431 }, { "epoch": 1.3, "grad_norm": 0.5821727514266968, "learning_rate": 0.00036379591420228964, "loss": 3.0316, "step": 26432 }, { "epoch": 1.3, "grad_norm": 0.6468302607536316, "learning_rate": 0.0003637808700730592, "loss": 3.1508, "step": 26433 }, { "epoch": 1.3, "grad_norm": 0.6181358098983765, "learning_rate": 0.0003637658257758421, "loss": 3.0418, "step": 26434 }, { "epoch": 1.3, "grad_norm": 0.5608850717544556, "learning_rate": 0.0003637507813106783, "loss": 3.0204, "step": 26435 }, { "epoch": 1.3, "grad_norm": 0.584475576877594, "learning_rate": 0.0003637357366776071, "loss": 3.158, "step": 26436 }, { "epoch": 1.3, "grad_norm": 0.5593780875205994, "learning_rate": 0.00036372069187666826, "loss": 3.0006, "step": 26437 }, { "epoch": 1.3, "grad_norm": 0.5664603114128113, "learning_rate": 0.0003637056469079013, "loss": 3.2513, "step": 26438 }, { "epoch": 1.3, "grad_norm": 0.5719146132469177, "learning_rate": 0.00036369060177134603, "loss": 3.076, "step": 26439 }, { "epoch": 1.3, "grad_norm": 0.5447546243667603, "learning_rate": 0.000363675556467042, "loss": 2.7886, "step": 26440 }, { "epoch": 1.3, "grad_norm": 0.607576310634613, "learning_rate": 0.00036366051099502865, "loss": 3.0216, "step": 26441 }, { "epoch": 1.3, "grad_norm": 0.5661342740058899, "learning_rate": 0.00036364546535534584, "loss": 3.0811, "step": 26442 }, { "epoch": 1.3, "grad_norm": 0.5804358124732971, "learning_rate": 0.0003636304195480332, "loss": 3.0749, "step": 26443 }, { "epoch": 1.3, "grad_norm": 0.5507537126541138, "learning_rate": 0.00036361537357313024, "loss": 3.028, "step": 26444 }, { "epoch": 1.3, "grad_norm": 0.6021802425384521, "learning_rate": 0.0003636003274306766, "loss": 3.0707, "step": 26445 }, { "epoch": 1.3, "grad_norm": 0.6064923405647278, "learning_rate": 0.0003635852811207119, "loss": 3.1915, "step": 26446 }, { "epoch": 1.3, "grad_norm": 0.5652244687080383, "learning_rate": 0.0003635702346432759, "loss": 2.9867, "step": 26447 }, { "epoch": 1.3, "grad_norm": 0.5297040939331055, "learning_rate": 0.0003635551879984081, "loss": 3.2154, "step": 26448 }, { "epoch": 1.3, "grad_norm": 0.6315213441848755, "learning_rate": 0.00036354014118614815, "loss": 2.971, "step": 26449 }, { "epoch": 1.3, "grad_norm": 0.5433605313301086, "learning_rate": 0.0003635250942065357, "loss": 3.0269, "step": 26450 }, { "epoch": 1.3, "grad_norm": 0.5568627119064331, "learning_rate": 0.0003635100470596104, "loss": 3.047, "step": 26451 }, { "epoch": 1.3, "grad_norm": 0.563654363155365, "learning_rate": 0.0003634949997454118, "loss": 2.9285, "step": 26452 }, { "epoch": 1.3, "grad_norm": 0.5413941740989685, "learning_rate": 0.00036347995226397963, "loss": 3.1223, "step": 26453 }, { "epoch": 1.3, "grad_norm": 0.5784571766853333, "learning_rate": 0.00036346490461535347, "loss": 3.0467, "step": 26454 }, { "epoch": 1.3, "grad_norm": 0.5411424040794373, "learning_rate": 0.00036344985679957303, "loss": 3.1296, "step": 26455 }, { "epoch": 1.3, "grad_norm": 0.5692468285560608, "learning_rate": 0.00036343480881667785, "loss": 2.8744, "step": 26456 }, { "epoch": 1.3, "grad_norm": 0.6368603706359863, "learning_rate": 0.00036341976066670754, "loss": 3.0441, "step": 26457 }, { "epoch": 1.3, "grad_norm": 0.546215832233429, "learning_rate": 0.00036340471234970186, "loss": 3.0811, "step": 26458 }, { "epoch": 1.3, "grad_norm": 0.582275390625, "learning_rate": 0.0003633896638657003, "loss": 3.0124, "step": 26459 }, { "epoch": 1.3, "grad_norm": 0.5756465792655945, "learning_rate": 0.00036337461521474256, "loss": 2.997, "step": 26460 }, { "epoch": 1.3, "grad_norm": 0.6010496020317078, "learning_rate": 0.0003633595663968683, "loss": 3.0752, "step": 26461 }, { "epoch": 1.3, "grad_norm": 0.5619287490844727, "learning_rate": 0.00036334451741211715, "loss": 2.9749, "step": 26462 }, { "epoch": 1.3, "grad_norm": 0.5364715456962585, "learning_rate": 0.00036332946826052873, "loss": 3.3402, "step": 26463 }, { "epoch": 1.3, "grad_norm": 0.5843341946601868, "learning_rate": 0.00036331441894214275, "loss": 3.1867, "step": 26464 }, { "epoch": 1.3, "grad_norm": 0.5982407331466675, "learning_rate": 0.00036329936945699866, "loss": 3.0285, "step": 26465 }, { "epoch": 1.3, "grad_norm": 0.5931727290153503, "learning_rate": 0.0003632843198051363, "loss": 2.9798, "step": 26466 }, { "epoch": 1.3, "grad_norm": 0.5525808930397034, "learning_rate": 0.00036326926998659514, "loss": 3.1638, "step": 26467 }, { "epoch": 1.3, "grad_norm": 0.5755184888839722, "learning_rate": 0.0003632542200014149, "loss": 3.1532, "step": 26468 }, { "epoch": 1.3, "grad_norm": 0.5768822431564331, "learning_rate": 0.00036323916984963534, "loss": 3.1317, "step": 26469 }, { "epoch": 1.3, "grad_norm": 0.5717555284500122, "learning_rate": 0.0003632241195312959, "loss": 3.1596, "step": 26470 }, { "epoch": 1.3, "grad_norm": 0.5730190873146057, "learning_rate": 0.00036320906904643624, "loss": 2.9991, "step": 26471 }, { "epoch": 1.3, "grad_norm": 0.5641260147094727, "learning_rate": 0.00036319401839509616, "loss": 3.223, "step": 26472 }, { "epoch": 1.3, "grad_norm": 0.5625524520874023, "learning_rate": 0.0003631789675773151, "loss": 3.0021, "step": 26473 }, { "epoch": 1.3, "grad_norm": 0.5481633543968201, "learning_rate": 0.00036316391659313283, "loss": 3.018, "step": 26474 }, { "epoch": 1.3, "grad_norm": 0.6047376990318298, "learning_rate": 0.000363148865442589, "loss": 2.9506, "step": 26475 }, { "epoch": 1.3, "grad_norm": 0.5548542737960815, "learning_rate": 0.00036313381412572313, "loss": 3.0886, "step": 26476 }, { "epoch": 1.3, "grad_norm": 0.5693079829216003, "learning_rate": 0.000363118762642575, "loss": 3.0023, "step": 26477 }, { "epoch": 1.3, "grad_norm": 0.5523504614830017, "learning_rate": 0.00036310371099318423, "loss": 3.0461, "step": 26478 }, { "epoch": 1.3, "grad_norm": 0.5627286434173584, "learning_rate": 0.0003630886591775904, "loss": 3.0872, "step": 26479 }, { "epoch": 1.3, "grad_norm": 0.5450271368026733, "learning_rate": 0.00036307360719583316, "loss": 2.9879, "step": 26480 }, { "epoch": 1.3, "grad_norm": 0.5695874094963074, "learning_rate": 0.0003630585550479522, "loss": 2.8817, "step": 26481 }, { "epoch": 1.3, "grad_norm": 0.5733633041381836, "learning_rate": 0.0003630435027339871, "loss": 3.1564, "step": 26482 }, { "epoch": 1.3, "grad_norm": 0.6155542731285095, "learning_rate": 0.00036302845025397765, "loss": 3.1808, "step": 26483 }, { "epoch": 1.3, "grad_norm": 0.5391807556152344, "learning_rate": 0.00036301339760796335, "loss": 3.1328, "step": 26484 }, { "epoch": 1.3, "grad_norm": 0.5551477670669556, "learning_rate": 0.0003629983447959838, "loss": 3.0805, "step": 26485 }, { "epoch": 1.3, "grad_norm": 0.5700846910476685, "learning_rate": 0.0003629832918180788, "loss": 3.0817, "step": 26486 }, { "epoch": 1.3, "grad_norm": 0.5457755327224731, "learning_rate": 0.00036296823867428793, "loss": 3.0501, "step": 26487 }, { "epoch": 1.3, "grad_norm": 0.5421336889266968, "learning_rate": 0.0003629531853646508, "loss": 3.0883, "step": 26488 }, { "epoch": 1.3, "grad_norm": 0.5504266023635864, "learning_rate": 0.00036293813188920717, "loss": 3.1587, "step": 26489 }, { "epoch": 1.3, "grad_norm": 0.5366515517234802, "learning_rate": 0.00036292307824799654, "loss": 3.1722, "step": 26490 }, { "epoch": 1.3, "grad_norm": 0.5298593044281006, "learning_rate": 0.0003629080244410587, "loss": 3.1069, "step": 26491 }, { "epoch": 1.3, "grad_norm": 0.5564733147621155, "learning_rate": 0.0003628929704684332, "loss": 3.0798, "step": 26492 }, { "epoch": 1.3, "grad_norm": 0.5588745474815369, "learning_rate": 0.0003628779163301597, "loss": 3.2701, "step": 26493 }, { "epoch": 1.3, "grad_norm": 0.6034541130065918, "learning_rate": 0.0003628628620262779, "loss": 2.9638, "step": 26494 }, { "epoch": 1.3, "grad_norm": 0.54830402135849, "learning_rate": 0.0003628478075568274, "loss": 3.1212, "step": 26495 }, { "epoch": 1.3, "grad_norm": 0.6392161250114441, "learning_rate": 0.0003628327529218479, "loss": 3.3524, "step": 26496 }, { "epoch": 1.3, "grad_norm": 0.5647044777870178, "learning_rate": 0.000362817698121379, "loss": 3.0019, "step": 26497 }, { "epoch": 1.3, "grad_norm": 0.5975145697593689, "learning_rate": 0.0003628026431554603, "loss": 3.0822, "step": 26498 }, { "epoch": 1.3, "grad_norm": 0.5746291279792786, "learning_rate": 0.00036278758802413166, "loss": 3.2055, "step": 26499 }, { "epoch": 1.3, "grad_norm": 0.6121372580528259, "learning_rate": 0.0003627725327274325, "loss": 3.0877, "step": 26500 }, { "epoch": 1.3, "grad_norm": 0.550165593624115, "learning_rate": 0.0003627574772654026, "loss": 3.1267, "step": 26501 }, { "epoch": 1.3, "grad_norm": 0.6445377469062805, "learning_rate": 0.00036274242163808164, "loss": 3.2958, "step": 26502 }, { "epoch": 1.3, "grad_norm": 0.5646840333938599, "learning_rate": 0.00036272736584550913, "loss": 2.9063, "step": 26503 }, { "epoch": 1.3, "grad_norm": 0.6248049736022949, "learning_rate": 0.0003627123098877248, "loss": 3.142, "step": 26504 }, { "epoch": 1.3, "grad_norm": 0.5855923295021057, "learning_rate": 0.00036269725376476835, "loss": 2.976, "step": 26505 }, { "epoch": 1.3, "grad_norm": 0.5521533489227295, "learning_rate": 0.0003626821974766795, "loss": 3.2744, "step": 26506 }, { "epoch": 1.3, "grad_norm": 0.6442424058914185, "learning_rate": 0.00036266714102349773, "loss": 3.0419, "step": 26507 }, { "epoch": 1.3, "grad_norm": 0.5635976791381836, "learning_rate": 0.00036265208440526266, "loss": 3.0997, "step": 26508 }, { "epoch": 1.3, "grad_norm": 0.671798825263977, "learning_rate": 0.0003626370276220142, "loss": 3.037, "step": 26509 }, { "epoch": 1.3, "grad_norm": 0.5717962384223938, "learning_rate": 0.0003626219706737918, "loss": 3.3547, "step": 26510 }, { "epoch": 1.3, "grad_norm": 0.5673326253890991, "learning_rate": 0.0003626069135606352, "loss": 3.074, "step": 26511 }, { "epoch": 1.3, "grad_norm": 0.5579215884208679, "learning_rate": 0.00036259185628258406, "loss": 3.1001, "step": 26512 }, { "epoch": 1.3, "grad_norm": 0.6117144227027893, "learning_rate": 0.000362576798839678, "loss": 2.9077, "step": 26513 }, { "epoch": 1.3, "grad_norm": 0.5998569130897522, "learning_rate": 0.00036256174123195663, "loss": 3.0331, "step": 26514 }, { "epoch": 1.3, "grad_norm": 0.618823230266571, "learning_rate": 0.00036254668345945976, "loss": 3.2402, "step": 26515 }, { "epoch": 1.3, "grad_norm": 0.6104410290718079, "learning_rate": 0.0003625316255222269, "loss": 3.0506, "step": 26516 }, { "epoch": 1.3, "grad_norm": 0.6050528883934021, "learning_rate": 0.0003625165674202979, "loss": 3.0046, "step": 26517 }, { "epoch": 1.3, "grad_norm": 0.5506466627120972, "learning_rate": 0.0003625015091537121, "loss": 2.9101, "step": 26518 }, { "epoch": 1.3, "grad_norm": 0.5498501658439636, "learning_rate": 0.0003624864507225094, "loss": 3.0459, "step": 26519 }, { "epoch": 1.3, "grad_norm": 0.5689669251441956, "learning_rate": 0.0003624713921267296, "loss": 3.1916, "step": 26520 }, { "epoch": 1.3, "grad_norm": 0.5426681637763977, "learning_rate": 0.000362456333366412, "loss": 2.9685, "step": 26521 }, { "epoch": 1.3, "grad_norm": 0.604785680770874, "learning_rate": 0.0003624412744415965, "loss": 3.0663, "step": 26522 }, { "epoch": 1.3, "grad_norm": 0.5710480213165283, "learning_rate": 0.0003624262153523226, "loss": 2.9204, "step": 26523 }, { "epoch": 1.3, "grad_norm": 0.6017956733703613, "learning_rate": 0.0003624111560986302, "loss": 3.011, "step": 26524 }, { "epoch": 1.3, "grad_norm": 0.5829985737800598, "learning_rate": 0.00036239609668055876, "loss": 2.9643, "step": 26525 }, { "epoch": 1.3, "grad_norm": 0.6299350261688232, "learning_rate": 0.000362381037098148, "loss": 2.9682, "step": 26526 }, { "epoch": 1.3, "grad_norm": 0.613353967666626, "learning_rate": 0.0003623659773514376, "loss": 3.0985, "step": 26527 }, { "epoch": 1.3, "grad_norm": 0.5568817257881165, "learning_rate": 0.00036235091744046724, "loss": 2.8491, "step": 26528 }, { "epoch": 1.3, "grad_norm": 0.5802782773971558, "learning_rate": 0.0003623358573652765, "loss": 3.1953, "step": 26529 }, { "epoch": 1.3, "grad_norm": 0.5671431422233582, "learning_rate": 0.0003623207971259052, "loss": 2.9758, "step": 26530 }, { "epoch": 1.3, "grad_norm": 0.5735278725624084, "learning_rate": 0.0003623057367223929, "loss": 3.119, "step": 26531 }, { "epoch": 1.3, "grad_norm": 0.6418932676315308, "learning_rate": 0.0003622906761547793, "loss": 2.8465, "step": 26532 }, { "epoch": 1.3, "grad_norm": 0.6021503210067749, "learning_rate": 0.0003622756154231039, "loss": 2.9617, "step": 26533 }, { "epoch": 1.3, "grad_norm": 0.5770435929298401, "learning_rate": 0.0003622605545274067, "loss": 3.1281, "step": 26534 }, { "epoch": 1.3, "grad_norm": 0.6095218062400818, "learning_rate": 0.0003622454934677271, "loss": 2.9292, "step": 26535 }, { "epoch": 1.3, "grad_norm": 0.6107041239738464, "learning_rate": 0.00036223043224410483, "loss": 3.124, "step": 26536 }, { "epoch": 1.3, "grad_norm": 0.5658743977546692, "learning_rate": 0.0003622153708565796, "loss": 3.0487, "step": 26537 }, { "epoch": 1.3, "grad_norm": 0.5829092264175415, "learning_rate": 0.000362200309305191, "loss": 3.0127, "step": 26538 }, { "epoch": 1.3, "grad_norm": 0.5712616443634033, "learning_rate": 0.00036218524758997885, "loss": 3.0086, "step": 26539 }, { "epoch": 1.3, "grad_norm": 0.5344133377075195, "learning_rate": 0.0003621701857109827, "loss": 3.1907, "step": 26540 }, { "epoch": 1.3, "grad_norm": 0.5803573727607727, "learning_rate": 0.0003621551236682422, "loss": 3.1347, "step": 26541 }, { "epoch": 1.3, "grad_norm": 0.566353976726532, "learning_rate": 0.0003621400614617972, "loss": 3.1542, "step": 26542 }, { "epoch": 1.3, "grad_norm": 0.5749666094779968, "learning_rate": 0.00036212499909168706, "loss": 3.1458, "step": 26543 }, { "epoch": 1.3, "grad_norm": 0.6262149214744568, "learning_rate": 0.00036210993655795163, "loss": 3.1817, "step": 26544 }, { "epoch": 1.3, "grad_norm": 0.629041314125061, "learning_rate": 0.00036209487386063075, "loss": 3.0455, "step": 26545 }, { "epoch": 1.3, "grad_norm": 0.5451223850250244, "learning_rate": 0.0003620798109997638, "loss": 3.1538, "step": 26546 }, { "epoch": 1.3, "grad_norm": 0.5455161929130554, "learning_rate": 0.00036206474797539066, "loss": 3.1579, "step": 26547 }, { "epoch": 1.3, "grad_norm": 0.5440385341644287, "learning_rate": 0.0003620496847875508, "loss": 3.1674, "step": 26548 }, { "epoch": 1.3, "grad_norm": 0.5767881274223328, "learning_rate": 0.00036203462143628406, "loss": 3.1339, "step": 26549 }, { "epoch": 1.3, "grad_norm": 0.5350778698921204, "learning_rate": 0.0003620195579216301, "loss": 3.0594, "step": 26550 }, { "epoch": 1.3, "grad_norm": 0.5580967664718628, "learning_rate": 0.0003620044942436285, "loss": 3.04, "step": 26551 }, { "epoch": 1.3, "grad_norm": 0.5478173494338989, "learning_rate": 0.00036198943040231904, "loss": 3.1905, "step": 26552 }, { "epoch": 1.3, "grad_norm": 0.6238259077072144, "learning_rate": 0.00036197436639774145, "loss": 3.0206, "step": 26553 }, { "epoch": 1.3, "grad_norm": 0.6044278144836426, "learning_rate": 0.00036195930222993506, "loss": 3.0768, "step": 26554 }, { "epoch": 1.3, "grad_norm": 0.5655761957168579, "learning_rate": 0.00036194423789893997, "loss": 3.1671, "step": 26555 }, { "epoch": 1.3, "grad_norm": 0.5538684725761414, "learning_rate": 0.00036192917340479566, "loss": 3.0836, "step": 26556 }, { "epoch": 1.3, "grad_norm": 0.5838094353675842, "learning_rate": 0.0003619141087475418, "loss": 3.0374, "step": 26557 }, { "epoch": 1.3, "grad_norm": 0.5795071721076965, "learning_rate": 0.00036189904392721814, "loss": 2.9839, "step": 26558 }, { "epoch": 1.3, "grad_norm": 0.5634998083114624, "learning_rate": 0.0003618839789438642, "loss": 3.0211, "step": 26559 }, { "epoch": 1.3, "grad_norm": 0.5954675078392029, "learning_rate": 0.0003618689137975199, "loss": 3.102, "step": 26560 }, { "epoch": 1.3, "grad_norm": 0.5686203241348267, "learning_rate": 0.0003618538484882247, "loss": 3.0318, "step": 26561 }, { "epoch": 1.3, "grad_norm": 0.5589979887008667, "learning_rate": 0.0003618387830160184, "loss": 2.9281, "step": 26562 }, { "epoch": 1.3, "grad_norm": 0.5522468686103821, "learning_rate": 0.0003618237173809406, "loss": 2.823, "step": 26563 }, { "epoch": 1.3, "grad_norm": 0.5559152364730835, "learning_rate": 0.00036180865158303116, "loss": 3.1592, "step": 26564 }, { "epoch": 1.3, "grad_norm": 0.5371105074882507, "learning_rate": 0.00036179358562232945, "loss": 3.0656, "step": 26565 }, { "epoch": 1.3, "grad_norm": 0.5746232271194458, "learning_rate": 0.00036177851949887544, "loss": 3.042, "step": 26566 }, { "epoch": 1.3, "grad_norm": 0.5857375264167786, "learning_rate": 0.00036176345321270865, "loss": 3.1788, "step": 26567 }, { "epoch": 1.3, "grad_norm": 0.5602015852928162, "learning_rate": 0.00036174838676386893, "loss": 2.8983, "step": 26568 }, { "epoch": 1.3, "grad_norm": 0.5846152305603027, "learning_rate": 0.00036173332015239563, "loss": 2.7467, "step": 26569 }, { "epoch": 1.3, "grad_norm": 0.541037917137146, "learning_rate": 0.0003617182533783288, "loss": 2.9718, "step": 26570 }, { "epoch": 1.3, "grad_norm": 0.5495973825454712, "learning_rate": 0.000361703186441708, "loss": 3.0222, "step": 26571 }, { "epoch": 1.3, "grad_norm": 0.5735442042350769, "learning_rate": 0.00036168811934257275, "loss": 3.2686, "step": 26572 }, { "epoch": 1.3, "grad_norm": 0.5584540367126465, "learning_rate": 0.00036167305208096293, "loss": 2.8147, "step": 26573 }, { "epoch": 1.3, "grad_norm": 0.5874664187431335, "learning_rate": 0.00036165798465691816, "loss": 3.1831, "step": 26574 }, { "epoch": 1.3, "grad_norm": 0.5908390879631042, "learning_rate": 0.00036164291707047817, "loss": 2.9889, "step": 26575 }, { "epoch": 1.3, "grad_norm": 0.5895627737045288, "learning_rate": 0.00036162784932168254, "loss": 2.9451, "step": 26576 }, { "epoch": 1.3, "grad_norm": 0.5433921813964844, "learning_rate": 0.000361612781410571, "loss": 3.2065, "step": 26577 }, { "epoch": 1.3, "grad_norm": 0.5549830794334412, "learning_rate": 0.0003615977133371833, "loss": 3.1892, "step": 26578 }, { "epoch": 1.3, "grad_norm": 0.6039674282073975, "learning_rate": 0.0003615826451015591, "loss": 2.8504, "step": 26579 }, { "epoch": 1.3, "grad_norm": 0.526862382888794, "learning_rate": 0.0003615675767037379, "loss": 3.1544, "step": 26580 }, { "epoch": 1.3, "grad_norm": 0.5542131662368774, "learning_rate": 0.0003615525081437597, "loss": 3.1678, "step": 26581 }, { "epoch": 1.3, "grad_norm": 0.5980327129364014, "learning_rate": 0.0003615374394216641, "loss": 3.1879, "step": 26582 }, { "epoch": 1.3, "grad_norm": 0.5924147367477417, "learning_rate": 0.00036152237053749064, "loss": 3.0792, "step": 26583 }, { "epoch": 1.3, "grad_norm": 0.5593456029891968, "learning_rate": 0.00036150730149127905, "loss": 2.8554, "step": 26584 }, { "epoch": 1.3, "grad_norm": 0.5941464304924011, "learning_rate": 0.0003614922322830691, "loss": 3.0402, "step": 26585 }, { "epoch": 1.3, "grad_norm": 0.5819092988967896, "learning_rate": 0.0003614771629129005, "loss": 3.1216, "step": 26586 }, { "epoch": 1.3, "grad_norm": 0.5705400705337524, "learning_rate": 0.0003614620933808128, "loss": 3.1876, "step": 26587 }, { "epoch": 1.3, "grad_norm": 0.5921162366867065, "learning_rate": 0.00036144702368684587, "loss": 2.9728, "step": 26588 }, { "epoch": 1.3, "grad_norm": 0.5853647589683533, "learning_rate": 0.0003614319538310392, "loss": 3.2357, "step": 26589 }, { "epoch": 1.3, "grad_norm": 0.5218216180801392, "learning_rate": 0.0003614168838134327, "loss": 3.1811, "step": 26590 }, { "epoch": 1.3, "grad_norm": 0.5547449588775635, "learning_rate": 0.00036140181363406585, "loss": 3.2543, "step": 26591 }, { "epoch": 1.3, "grad_norm": 0.5389057993888855, "learning_rate": 0.0003613867432929785, "loss": 3.062, "step": 26592 }, { "epoch": 1.3, "grad_norm": 0.5760816931724548, "learning_rate": 0.0003613716727902103, "loss": 3.0541, "step": 26593 }, { "epoch": 1.3, "grad_norm": 0.5420430302619934, "learning_rate": 0.00036135660212580084, "loss": 3.1731, "step": 26594 }, { "epoch": 1.3, "grad_norm": 0.5819205045700073, "learning_rate": 0.0003613415312997899, "loss": 3.0585, "step": 26595 }, { "epoch": 1.3, "grad_norm": 0.5555254817008972, "learning_rate": 0.00036132646031221725, "loss": 3.1077, "step": 26596 }, { "epoch": 1.3, "grad_norm": 0.580630362033844, "learning_rate": 0.0003613113891631225, "loss": 2.9911, "step": 26597 }, { "epoch": 1.3, "grad_norm": 0.5484400391578674, "learning_rate": 0.0003612963178525453, "loss": 3.0661, "step": 26598 }, { "epoch": 1.3, "grad_norm": 0.5431928634643555, "learning_rate": 0.0003612812463805254, "loss": 3.32, "step": 26599 }, { "epoch": 1.3, "grad_norm": 0.5618100166320801, "learning_rate": 0.00036126617474710254, "loss": 3.0773, "step": 26600 }, { "epoch": 1.3, "grad_norm": 0.5543720722198486, "learning_rate": 0.0003612511029523163, "loss": 2.8697, "step": 26601 }, { "epoch": 1.3, "grad_norm": 0.6032953858375549, "learning_rate": 0.00036123603099620644, "loss": 3.1716, "step": 26602 }, { "epoch": 1.3, "grad_norm": 0.5478854179382324, "learning_rate": 0.0003612209588788127, "loss": 3.1367, "step": 26603 }, { "epoch": 1.3, "grad_norm": 0.5998310446739197, "learning_rate": 0.0003612058866001748, "loss": 3.1668, "step": 26604 }, { "epoch": 1.3, "grad_norm": 0.5461510419845581, "learning_rate": 0.00036119081416033225, "loss": 3.213, "step": 26605 }, { "epoch": 1.3, "grad_norm": 0.5615707635879517, "learning_rate": 0.0003611757415593249, "loss": 3.0274, "step": 26606 }, { "epoch": 1.3, "grad_norm": 0.5446403622627258, "learning_rate": 0.0003611606687971925, "loss": 3.1591, "step": 26607 }, { "epoch": 1.3, "grad_norm": 0.5432937145233154, "learning_rate": 0.0003611455958739746, "loss": 2.9783, "step": 26608 }, { "epoch": 1.3, "grad_norm": 0.5421168208122253, "learning_rate": 0.000361130522789711, "loss": 3.0148, "step": 26609 }, { "epoch": 1.3, "grad_norm": 0.5991039872169495, "learning_rate": 0.0003611154495444413, "loss": 2.9486, "step": 26610 }, { "epoch": 1.3, "grad_norm": 0.5641329884529114, "learning_rate": 0.0003611003761382054, "loss": 2.8076, "step": 26611 }, { "epoch": 1.3, "grad_norm": 0.5584830045700073, "learning_rate": 0.00036108530257104274, "loss": 2.9863, "step": 26612 }, { "epoch": 1.3, "grad_norm": 0.591867983341217, "learning_rate": 0.0003610702288429932, "loss": 2.9895, "step": 26613 }, { "epoch": 1.3, "grad_norm": 0.5379605293273926, "learning_rate": 0.0003610551549540964, "loss": 3.1134, "step": 26614 }, { "epoch": 1.3, "grad_norm": 0.570203959941864, "learning_rate": 0.00036104008090439215, "loss": 3.0738, "step": 26615 }, { "epoch": 1.3, "grad_norm": 0.531862735748291, "learning_rate": 0.00036102500669392, "loss": 2.9935, "step": 26616 }, { "epoch": 1.3, "grad_norm": 0.5866312980651855, "learning_rate": 0.0003610099323227197, "loss": 3.0712, "step": 26617 }, { "epoch": 1.3, "grad_norm": 0.6026278734207153, "learning_rate": 0.00036099485779083105, "loss": 3.1248, "step": 26618 }, { "epoch": 1.3, "grad_norm": 0.5626994967460632, "learning_rate": 0.0003609797830982937, "loss": 3.2064, "step": 26619 }, { "epoch": 1.3, "grad_norm": 0.5667855143547058, "learning_rate": 0.00036096470824514734, "loss": 3.1069, "step": 26620 }, { "epoch": 1.3, "grad_norm": 0.5892822742462158, "learning_rate": 0.00036094963323143154, "loss": 3.106, "step": 26621 }, { "epoch": 1.3, "grad_norm": 0.5754657983779907, "learning_rate": 0.00036093455805718633, "loss": 2.9988, "step": 26622 }, { "epoch": 1.3, "grad_norm": 0.5949243903160095, "learning_rate": 0.0003609194827224511, "loss": 2.8595, "step": 26623 }, { "epoch": 1.3, "grad_norm": 0.5688416957855225, "learning_rate": 0.00036090440722726574, "loss": 3.1287, "step": 26624 }, { "epoch": 1.3, "grad_norm": 0.563495934009552, "learning_rate": 0.0003608893315716698, "loss": 3.0331, "step": 26625 }, { "epoch": 1.3, "grad_norm": 0.5768406987190247, "learning_rate": 0.0003608742557557031, "loss": 3.1165, "step": 26626 }, { "epoch": 1.3, "grad_norm": 0.6202779412269592, "learning_rate": 0.00036085917977940533, "loss": 3.0652, "step": 26627 }, { "epoch": 1.3, "grad_norm": 0.556969940662384, "learning_rate": 0.0003608441036428163, "loss": 3.163, "step": 26628 }, { "epoch": 1.31, "grad_norm": 0.5989406704902649, "learning_rate": 0.0003608290273459755, "loss": 2.933, "step": 26629 }, { "epoch": 1.31, "grad_norm": 0.5388544201850891, "learning_rate": 0.0003608139508889228, "loss": 3.2661, "step": 26630 }, { "epoch": 1.31, "grad_norm": 0.5508213639259338, "learning_rate": 0.00036079887427169775, "loss": 3.278, "step": 26631 }, { "epoch": 1.31, "grad_norm": 0.5620518326759338, "learning_rate": 0.00036078379749434025, "loss": 2.8503, "step": 26632 }, { "epoch": 1.31, "grad_norm": 0.5819762349128723, "learning_rate": 0.00036076872055689, "loss": 3.1962, "step": 26633 }, { "epoch": 1.31, "grad_norm": 0.5653870701789856, "learning_rate": 0.0003607536434593866, "loss": 3.0897, "step": 26634 }, { "epoch": 1.31, "grad_norm": 0.6262655258178711, "learning_rate": 0.0003607385662018697, "loss": 3.0569, "step": 26635 }, { "epoch": 1.31, "grad_norm": 0.5709400773048401, "learning_rate": 0.0003607234887843792, "loss": 3.0048, "step": 26636 }, { "epoch": 1.31, "grad_norm": 0.5970860719680786, "learning_rate": 0.00036070841120695467, "loss": 3.1965, "step": 26637 }, { "epoch": 1.31, "grad_norm": 0.5762056112289429, "learning_rate": 0.0003606933334696359, "loss": 3.0631, "step": 26638 }, { "epoch": 1.31, "grad_norm": 0.5508270263671875, "learning_rate": 0.0003606782555724625, "loss": 3.2962, "step": 26639 }, { "epoch": 1.31, "grad_norm": 0.5481507778167725, "learning_rate": 0.00036066317751547427, "loss": 3.0597, "step": 26640 }, { "epoch": 1.31, "grad_norm": 0.5892797708511353, "learning_rate": 0.000360648099298711, "loss": 3.0084, "step": 26641 }, { "epoch": 1.31, "grad_norm": 0.5414859056472778, "learning_rate": 0.0003606330209222122, "loss": 3.1328, "step": 26642 }, { "epoch": 1.31, "grad_norm": 0.5723474621772766, "learning_rate": 0.0003606179423860178, "loss": 3.0681, "step": 26643 }, { "epoch": 1.31, "grad_norm": 0.5581324100494385, "learning_rate": 0.00036060286369016736, "loss": 3.0845, "step": 26644 }, { "epoch": 1.31, "grad_norm": 0.5598316788673401, "learning_rate": 0.0003605877848347006, "loss": 3.1011, "step": 26645 }, { "epoch": 1.31, "grad_norm": 0.55232173204422, "learning_rate": 0.0003605727058196572, "loss": 3.3967, "step": 26646 }, { "epoch": 1.31, "grad_norm": 0.6095204949378967, "learning_rate": 0.0003605576266450771, "loss": 3.0187, "step": 26647 }, { "epoch": 1.31, "grad_norm": 0.6039015650749207, "learning_rate": 0.00036054254731099984, "loss": 2.9391, "step": 26648 }, { "epoch": 1.31, "grad_norm": 0.5620260238647461, "learning_rate": 0.00036052746781746513, "loss": 3.067, "step": 26649 }, { "epoch": 1.31, "grad_norm": 0.5486672520637512, "learning_rate": 0.0003605123881645127, "loss": 3.137, "step": 26650 }, { "epoch": 1.31, "grad_norm": 0.5771086812019348, "learning_rate": 0.0003604973083521823, "loss": 3.0855, "step": 26651 }, { "epoch": 1.31, "grad_norm": 0.5530979037284851, "learning_rate": 0.0003604822283805137, "loss": 3.3067, "step": 26652 }, { "epoch": 1.31, "grad_norm": 0.5996308922767639, "learning_rate": 0.00036046714824954643, "loss": 2.8549, "step": 26653 }, { "epoch": 1.31, "grad_norm": 0.5549845099449158, "learning_rate": 0.00036045206795932045, "loss": 2.9425, "step": 26654 }, { "epoch": 1.31, "grad_norm": 0.5953495502471924, "learning_rate": 0.00036043698750987536, "loss": 3.1898, "step": 26655 }, { "epoch": 1.31, "grad_norm": 0.5502009987831116, "learning_rate": 0.00036042190690125075, "loss": 3.0692, "step": 26656 }, { "epoch": 1.31, "grad_norm": 0.5558758974075317, "learning_rate": 0.0003604068261334865, "loss": 3.0813, "step": 26657 }, { "epoch": 1.31, "grad_norm": 0.5885339975357056, "learning_rate": 0.0003603917452066224, "loss": 3.0579, "step": 26658 }, { "epoch": 1.31, "grad_norm": 0.5545154809951782, "learning_rate": 0.000360376664120698, "loss": 3.0188, "step": 26659 }, { "epoch": 1.31, "grad_norm": 0.6164775490760803, "learning_rate": 0.00036036158287575305, "loss": 3.0655, "step": 26660 }, { "epoch": 1.31, "grad_norm": 0.6128799915313721, "learning_rate": 0.0003603465014718273, "loss": 3.0116, "step": 26661 }, { "epoch": 1.31, "grad_norm": 0.5506448149681091, "learning_rate": 0.00036033141990896055, "loss": 3.1085, "step": 26662 }, { "epoch": 1.31, "grad_norm": 0.5668236613273621, "learning_rate": 0.0003603163381871924, "loss": 3.0768, "step": 26663 }, { "epoch": 1.31, "grad_norm": 0.5560147166252136, "learning_rate": 0.00036030125630656266, "loss": 3.0992, "step": 26664 }, { "epoch": 1.31, "grad_norm": 0.5709168910980225, "learning_rate": 0.00036028617426711097, "loss": 3.2251, "step": 26665 }, { "epoch": 1.31, "grad_norm": 0.5365732908248901, "learning_rate": 0.00036027109206887716, "loss": 2.8935, "step": 26666 }, { "epoch": 1.31, "grad_norm": 0.559626042842865, "learning_rate": 0.00036025600971190076, "loss": 3.0365, "step": 26667 }, { "epoch": 1.31, "grad_norm": 0.6036146879196167, "learning_rate": 0.00036024092719622177, "loss": 2.8695, "step": 26668 }, { "epoch": 1.31, "grad_norm": 0.5651452541351318, "learning_rate": 0.0003602258445218797, "loss": 3.1308, "step": 26669 }, { "epoch": 1.31, "grad_norm": 0.5566524267196655, "learning_rate": 0.0003602107616889145, "loss": 2.8533, "step": 26670 }, { "epoch": 1.31, "grad_norm": 0.5793358683586121, "learning_rate": 0.00036019567869736556, "loss": 3.0315, "step": 26671 }, { "epoch": 1.31, "grad_norm": 0.5406820774078369, "learning_rate": 0.0003601805955472728, "loss": 3.1241, "step": 26672 }, { "epoch": 1.31, "grad_norm": 0.590872585773468, "learning_rate": 0.000360165512238676, "loss": 3.0732, "step": 26673 }, { "epoch": 1.31, "grad_norm": 0.6023873090744019, "learning_rate": 0.00036015042877161484, "loss": 3.323, "step": 26674 }, { "epoch": 1.31, "grad_norm": 0.575550377368927, "learning_rate": 0.00036013534514612894, "loss": 3.0127, "step": 26675 }, { "epoch": 1.31, "grad_norm": 0.583389163017273, "learning_rate": 0.00036012026136225813, "loss": 3.1352, "step": 26676 }, { "epoch": 1.31, "grad_norm": 0.5580568909645081, "learning_rate": 0.0003601051774200422, "loss": 3.1552, "step": 26677 }, { "epoch": 1.31, "grad_norm": 0.5758119225502014, "learning_rate": 0.0003600900933195207, "loss": 2.7847, "step": 26678 }, { "epoch": 1.31, "grad_norm": 0.5357880592346191, "learning_rate": 0.00036007500906073346, "loss": 3.0774, "step": 26679 }, { "epoch": 1.31, "grad_norm": 0.5659300684928894, "learning_rate": 0.0003600599246437203, "loss": 3.1311, "step": 26680 }, { "epoch": 1.31, "grad_norm": 0.5540302395820618, "learning_rate": 0.00036004484006852083, "loss": 3.2338, "step": 26681 }, { "epoch": 1.31, "grad_norm": 0.5465559363365173, "learning_rate": 0.00036002975533517477, "loss": 3.0121, "step": 26682 }, { "epoch": 1.31, "grad_norm": 0.6347934007644653, "learning_rate": 0.00036001467044372183, "loss": 3.2469, "step": 26683 }, { "epoch": 1.31, "grad_norm": 0.6012324690818787, "learning_rate": 0.000359999585394202, "loss": 2.9777, "step": 26684 }, { "epoch": 1.31, "grad_norm": 0.5452451705932617, "learning_rate": 0.00035998450018665467, "loss": 3.0931, "step": 26685 }, { "epoch": 1.31, "grad_norm": 0.6085728406906128, "learning_rate": 0.0003599694148211197, "loss": 3.0797, "step": 26686 }, { "epoch": 1.31, "grad_norm": 0.5629120469093323, "learning_rate": 0.0003599543292976369, "loss": 3.1281, "step": 26687 }, { "epoch": 1.31, "grad_norm": 0.5537635087966919, "learning_rate": 0.00035993924361624587, "loss": 3.176, "step": 26688 }, { "epoch": 1.31, "grad_norm": 0.5594296455383301, "learning_rate": 0.0003599241577769864, "loss": 3.1268, "step": 26689 }, { "epoch": 1.31, "grad_norm": 0.5878404974937439, "learning_rate": 0.00035990907177989827, "loss": 2.9992, "step": 26690 }, { "epoch": 1.31, "grad_norm": 0.5586697459220886, "learning_rate": 0.0003598939856250212, "loss": 3.011, "step": 26691 }, { "epoch": 1.31, "grad_norm": 0.5551723837852478, "learning_rate": 0.00035987889931239484, "loss": 2.9541, "step": 26692 }, { "epoch": 1.31, "grad_norm": 0.5686328411102295, "learning_rate": 0.000359863812842059, "loss": 3.3019, "step": 26693 }, { "epoch": 1.31, "grad_norm": 0.5753780603408813, "learning_rate": 0.00035984872621405337, "loss": 3.2523, "step": 26694 }, { "epoch": 1.31, "grad_norm": 0.5661664009094238, "learning_rate": 0.0003598336394284178, "loss": 3.0049, "step": 26695 }, { "epoch": 1.31, "grad_norm": 0.5784562826156616, "learning_rate": 0.0003598185524851919, "loss": 2.8481, "step": 26696 }, { "epoch": 1.31, "grad_norm": 0.6021720767021179, "learning_rate": 0.0003598034653844154, "loss": 3.2881, "step": 26697 }, { "epoch": 1.31, "grad_norm": 0.5414631366729736, "learning_rate": 0.0003597883781261281, "loss": 3.1237, "step": 26698 }, { "epoch": 1.31, "grad_norm": 0.5559752583503723, "learning_rate": 0.00035977329071036976, "loss": 3.177, "step": 26699 }, { "epoch": 1.31, "grad_norm": 0.6183595061302185, "learning_rate": 0.00035975820313718004, "loss": 2.9357, "step": 26700 }, { "epoch": 1.31, "grad_norm": 0.5673224925994873, "learning_rate": 0.00035974311540659876, "loss": 3.0607, "step": 26701 }, { "epoch": 1.31, "grad_norm": 0.6092291474342346, "learning_rate": 0.00035972802751866554, "loss": 2.9099, "step": 26702 }, { "epoch": 1.31, "grad_norm": 0.541416347026825, "learning_rate": 0.0003597129394734202, "loss": 3.0918, "step": 26703 }, { "epoch": 1.31, "grad_norm": 0.6167910695075989, "learning_rate": 0.0003596978512709025, "loss": 2.8992, "step": 26704 }, { "epoch": 1.31, "grad_norm": 0.6436028480529785, "learning_rate": 0.0003596827629111521, "loss": 2.8388, "step": 26705 }, { "epoch": 1.31, "grad_norm": 0.5907847285270691, "learning_rate": 0.0003596676743942089, "loss": 3.1507, "step": 26706 }, { "epoch": 1.31, "grad_norm": 0.5741157531738281, "learning_rate": 0.00035965258572011246, "loss": 3.16, "step": 26707 }, { "epoch": 1.31, "grad_norm": 0.545328676700592, "learning_rate": 0.00035963749688890253, "loss": 3.1713, "step": 26708 }, { "epoch": 1.31, "grad_norm": 0.5556525588035583, "learning_rate": 0.000359622407900619, "loss": 3.1528, "step": 26709 }, { "epoch": 1.31, "grad_norm": 0.5546360015869141, "learning_rate": 0.0003596073187553015, "loss": 2.8478, "step": 26710 }, { "epoch": 1.31, "grad_norm": 0.6153631806373596, "learning_rate": 0.0003595922294529898, "loss": 3.0284, "step": 26711 }, { "epoch": 1.31, "grad_norm": 0.5715863704681396, "learning_rate": 0.0003595771399937236, "loss": 3.2872, "step": 26712 }, { "epoch": 1.31, "grad_norm": 0.5800357460975647, "learning_rate": 0.00035956205037754275, "loss": 3.2561, "step": 26713 }, { "epoch": 1.31, "grad_norm": 0.557467520236969, "learning_rate": 0.0003595469606044869, "loss": 3.136, "step": 26714 }, { "epoch": 1.31, "grad_norm": 0.5892956852912903, "learning_rate": 0.00035953187067459575, "loss": 3.1928, "step": 26715 }, { "epoch": 1.31, "grad_norm": 0.5511610507965088, "learning_rate": 0.00035951678058790913, "loss": 2.9516, "step": 26716 }, { "epoch": 1.31, "grad_norm": 0.5597865581512451, "learning_rate": 0.0003595016903444669, "loss": 3.0769, "step": 26717 }, { "epoch": 1.31, "grad_norm": 0.5834679007530212, "learning_rate": 0.00035948659994430843, "loss": 3.0175, "step": 26718 }, { "epoch": 1.31, "grad_norm": 0.5704622268676758, "learning_rate": 0.0003594715093874738, "loss": 3.0997, "step": 26719 }, { "epoch": 1.31, "grad_norm": 0.5578005909919739, "learning_rate": 0.0003594564186740027, "loss": 3.0305, "step": 26720 }, { "epoch": 1.31, "grad_norm": 0.6064382195472717, "learning_rate": 0.0003594413278039349, "loss": 3.0124, "step": 26721 }, { "epoch": 1.31, "grad_norm": 0.5383819341659546, "learning_rate": 0.00035942623677731004, "loss": 3.1193, "step": 26722 }, { "epoch": 1.31, "grad_norm": 0.5984358191490173, "learning_rate": 0.00035941114559416776, "loss": 3.0482, "step": 26723 }, { "epoch": 1.31, "grad_norm": 0.5805661082267761, "learning_rate": 0.0003593960542545481, "loss": 2.9751, "step": 26724 }, { "epoch": 1.31, "grad_norm": 0.5724626779556274, "learning_rate": 0.0003593809627584906, "loss": 3.0252, "step": 26725 }, { "epoch": 1.31, "grad_norm": 0.545838475227356, "learning_rate": 0.0003593658711060352, "loss": 3.1077, "step": 26726 }, { "epoch": 1.31, "grad_norm": 0.5773608088493347, "learning_rate": 0.0003593507792972214, "loss": 3.4112, "step": 26727 }, { "epoch": 1.31, "grad_norm": 0.5865770578384399, "learning_rate": 0.0003593356873320891, "loss": 3.1387, "step": 26728 }, { "epoch": 1.31, "grad_norm": 0.5342745184898376, "learning_rate": 0.000359320595210678, "loss": 2.9596, "step": 26729 }, { "epoch": 1.31, "grad_norm": 0.567023754119873, "learning_rate": 0.0003593055029330279, "loss": 3.2155, "step": 26730 }, { "epoch": 1.31, "grad_norm": 0.5480973124504089, "learning_rate": 0.0003592904104991785, "loss": 3.1711, "step": 26731 }, { "epoch": 1.31, "grad_norm": 0.5474660396575928, "learning_rate": 0.0003592753179091696, "loss": 2.9138, "step": 26732 }, { "epoch": 1.31, "grad_norm": 0.5918500423431396, "learning_rate": 0.00035926022516304085, "loss": 3.0106, "step": 26733 }, { "epoch": 1.31, "grad_norm": 0.5381353497505188, "learning_rate": 0.00035924513226083207, "loss": 2.9064, "step": 26734 }, { "epoch": 1.31, "grad_norm": 0.5770012140274048, "learning_rate": 0.00035923003920258306, "loss": 2.8773, "step": 26735 }, { "epoch": 1.31, "grad_norm": 0.6201854348182678, "learning_rate": 0.0003592149459883336, "loss": 3.1339, "step": 26736 }, { "epoch": 1.31, "grad_norm": 0.559162437915802, "learning_rate": 0.0003591998526181233, "loss": 3.0548, "step": 26737 }, { "epoch": 1.31, "grad_norm": 0.5843203067779541, "learning_rate": 0.0003591847590919919, "loss": 3.2486, "step": 26738 }, { "epoch": 1.31, "grad_norm": 0.6074045896530151, "learning_rate": 0.00035916966540997934, "loss": 3.2939, "step": 26739 }, { "epoch": 1.31, "grad_norm": 0.605433464050293, "learning_rate": 0.0003591545715721252, "loss": 3.2019, "step": 26740 }, { "epoch": 1.31, "grad_norm": 0.6092764735221863, "learning_rate": 0.00035913947757846936, "loss": 3.0387, "step": 26741 }, { "epoch": 1.31, "grad_norm": 0.5730652213096619, "learning_rate": 0.00035912438342905153, "loss": 3.214, "step": 26742 }, { "epoch": 1.31, "grad_norm": 0.5732026696205139, "learning_rate": 0.0003591092891239115, "loss": 2.8984, "step": 26743 }, { "epoch": 1.31, "grad_norm": 0.5797938704490662, "learning_rate": 0.0003590941946630888, "loss": 2.9541, "step": 26744 }, { "epoch": 1.31, "grad_norm": 0.555505096912384, "learning_rate": 0.00035907910004662343, "loss": 2.9514, "step": 26745 }, { "epoch": 1.31, "grad_norm": 0.6078484058380127, "learning_rate": 0.00035906400527455524, "loss": 3.2675, "step": 26746 }, { "epoch": 1.31, "grad_norm": 0.5532565116882324, "learning_rate": 0.0003590489103469236, "loss": 3.0698, "step": 26747 }, { "epoch": 1.31, "grad_norm": 0.5619004964828491, "learning_rate": 0.00035903381526376863, "loss": 2.9572, "step": 26748 }, { "epoch": 1.31, "grad_norm": 0.5572454929351807, "learning_rate": 0.0003590187200251299, "loss": 3.2266, "step": 26749 }, { "epoch": 1.31, "grad_norm": 0.5924436450004578, "learning_rate": 0.00035900362463104724, "loss": 2.8406, "step": 26750 }, { "epoch": 1.31, "grad_norm": 0.6133246421813965, "learning_rate": 0.00035898852908156044, "loss": 2.9873, "step": 26751 }, { "epoch": 1.31, "grad_norm": 0.5816695094108582, "learning_rate": 0.00035897343337670915, "loss": 2.9221, "step": 26752 }, { "epoch": 1.31, "grad_norm": 0.5877933502197266, "learning_rate": 0.0003589583375165331, "loss": 3.0776, "step": 26753 }, { "epoch": 1.31, "grad_norm": 0.5489791631698608, "learning_rate": 0.00035894324150107225, "loss": 2.8286, "step": 26754 }, { "epoch": 1.31, "grad_norm": 0.5626378059387207, "learning_rate": 0.0003589281453303662, "loss": 3.1682, "step": 26755 }, { "epoch": 1.31, "grad_norm": 0.5878295302391052, "learning_rate": 0.0003589130490044548, "loss": 3.0382, "step": 26756 }, { "epoch": 1.31, "grad_norm": 0.5876728892326355, "learning_rate": 0.00035889795252337783, "loss": 2.8998, "step": 26757 }, { "epoch": 1.31, "grad_norm": 0.5746318101882935, "learning_rate": 0.00035888285588717486, "loss": 2.9987, "step": 26758 }, { "epoch": 1.31, "grad_norm": 0.5840819478034973, "learning_rate": 0.0003588677590958857, "loss": 3.0352, "step": 26759 }, { "epoch": 1.31, "grad_norm": 0.5611565709114075, "learning_rate": 0.0003588526621495504, "loss": 3.0672, "step": 26760 }, { "epoch": 1.31, "grad_norm": 0.595608115196228, "learning_rate": 0.0003588375650482085, "loss": 2.868, "step": 26761 }, { "epoch": 1.31, "grad_norm": 0.5854583978652954, "learning_rate": 0.00035882246779189963, "loss": 3.467, "step": 26762 }, { "epoch": 1.31, "grad_norm": 0.5326132774353027, "learning_rate": 0.0003588073703806638, "loss": 3.0428, "step": 26763 }, { "epoch": 1.31, "grad_norm": 0.5872798562049866, "learning_rate": 0.0003587922728145407, "loss": 2.8781, "step": 26764 }, { "epoch": 1.31, "grad_norm": 0.5464463829994202, "learning_rate": 0.00035877717509356997, "loss": 3.3402, "step": 26765 }, { "epoch": 1.31, "grad_norm": 0.6281166672706604, "learning_rate": 0.0003587620772177915, "loss": 3.1979, "step": 26766 }, { "epoch": 1.31, "grad_norm": 0.5617860555648804, "learning_rate": 0.000358746979187245, "loss": 3.0359, "step": 26767 }, { "epoch": 1.31, "grad_norm": 0.6079573035240173, "learning_rate": 0.0003587318810019704, "loss": 3.0718, "step": 26768 }, { "epoch": 1.31, "grad_norm": 0.5587426424026489, "learning_rate": 0.0003587167826620073, "loss": 2.9656, "step": 26769 }, { "epoch": 1.31, "grad_norm": 0.5827581882476807, "learning_rate": 0.0003587016841673953, "loss": 3.1422, "step": 26770 }, { "epoch": 1.31, "grad_norm": 0.6292572021484375, "learning_rate": 0.00035868658551817457, "loss": 3.1551, "step": 26771 }, { "epoch": 1.31, "grad_norm": 0.6260858774185181, "learning_rate": 0.0003586714867143846, "loss": 3.1601, "step": 26772 }, { "epoch": 1.31, "grad_norm": 0.5667866468429565, "learning_rate": 0.0003586563877560652, "loss": 3.1435, "step": 26773 }, { "epoch": 1.31, "grad_norm": 0.6008673310279846, "learning_rate": 0.0003586412886432562, "loss": 2.9808, "step": 26774 }, { "epoch": 1.31, "grad_norm": 0.5359012484550476, "learning_rate": 0.0003586261893759973, "loss": 3.068, "step": 26775 }, { "epoch": 1.31, "grad_norm": 0.5466083288192749, "learning_rate": 0.00035861108995432833, "loss": 3.0155, "step": 26776 }, { "epoch": 1.31, "grad_norm": 0.5517134666442871, "learning_rate": 0.000358595990378289, "loss": 2.9651, "step": 26777 }, { "epoch": 1.31, "grad_norm": 0.55260169506073, "learning_rate": 0.00035858089064791913, "loss": 2.9182, "step": 26778 }, { "epoch": 1.31, "grad_norm": 0.5760247111320496, "learning_rate": 0.0003585657907632585, "loss": 2.8463, "step": 26779 }, { "epoch": 1.31, "grad_norm": 0.596767246723175, "learning_rate": 0.0003585506907243467, "loss": 3.3759, "step": 26780 }, { "epoch": 1.31, "grad_norm": 0.5780153274536133, "learning_rate": 0.0003585355905312238, "loss": 3.0084, "step": 26781 }, { "epoch": 1.31, "grad_norm": 0.5638839602470398, "learning_rate": 0.00035852049018392934, "loss": 3.1286, "step": 26782 }, { "epoch": 1.31, "grad_norm": 0.5510829091072083, "learning_rate": 0.0003585053896825033, "loss": 2.9903, "step": 26783 }, { "epoch": 1.31, "grad_norm": 0.6048457026481628, "learning_rate": 0.0003584902890269852, "loss": 3.044, "step": 26784 }, { "epoch": 1.31, "grad_norm": 0.5909347534179688, "learning_rate": 0.0003584751882174149, "loss": 3.0262, "step": 26785 }, { "epoch": 1.31, "grad_norm": 0.5883007645606995, "learning_rate": 0.00035846008725383237, "loss": 3.2037, "step": 26786 }, { "epoch": 1.31, "grad_norm": 0.6115594506263733, "learning_rate": 0.0003584449861362771, "loss": 3.1592, "step": 26787 }, { "epoch": 1.31, "grad_norm": 0.5681421756744385, "learning_rate": 0.00035842988486478903, "loss": 2.844, "step": 26788 }, { "epoch": 1.31, "grad_norm": 0.6011251211166382, "learning_rate": 0.00035841478343940784, "loss": 3.0141, "step": 26789 }, { "epoch": 1.31, "grad_norm": 0.5426762700080872, "learning_rate": 0.0003583996818601734, "loss": 3.2084, "step": 26790 }, { "epoch": 1.31, "grad_norm": 0.6285281181335449, "learning_rate": 0.0003583845801271254, "loss": 3.0548, "step": 26791 }, { "epoch": 1.31, "grad_norm": 0.6543646454811096, "learning_rate": 0.0003583694782403036, "loss": 2.9277, "step": 26792 }, { "epoch": 1.31, "grad_norm": 0.5835058093070984, "learning_rate": 0.0003583543761997479, "loss": 2.9471, "step": 26793 }, { "epoch": 1.31, "grad_norm": 0.5639581084251404, "learning_rate": 0.0003583392740054981, "loss": 2.9748, "step": 26794 }, { "epoch": 1.31, "grad_norm": 0.5835383534431458, "learning_rate": 0.0003583241716575937, "loss": 3.1496, "step": 26795 }, { "epoch": 1.31, "grad_norm": 0.5585667490959167, "learning_rate": 0.00035830906915607476, "loss": 2.8306, "step": 26796 }, { "epoch": 1.31, "grad_norm": 0.6079902052879333, "learning_rate": 0.000358293966500981, "loss": 3.0118, "step": 26797 }, { "epoch": 1.31, "grad_norm": 0.6015511155128479, "learning_rate": 0.00035827886369235203, "loss": 3.0294, "step": 26798 }, { "epoch": 1.31, "grad_norm": 0.5599113702774048, "learning_rate": 0.00035826376073022783, "loss": 3.0174, "step": 26799 }, { "epoch": 1.31, "grad_norm": 0.5897526741027832, "learning_rate": 0.0003582486576146481, "loss": 3.1255, "step": 26800 }, { "epoch": 1.31, "grad_norm": 0.570527195930481, "learning_rate": 0.00035823355434565256, "loss": 2.9353, "step": 26801 }, { "epoch": 1.31, "grad_norm": 0.5881776809692383, "learning_rate": 0.0003582184509232811, "loss": 3.1936, "step": 26802 }, { "epoch": 1.31, "grad_norm": 0.5746738314628601, "learning_rate": 0.00035820334734757336, "loss": 2.9701, "step": 26803 }, { "epoch": 1.31, "grad_norm": 0.5680601000785828, "learning_rate": 0.0003581882436185693, "loss": 3.3507, "step": 26804 }, { "epoch": 1.31, "grad_norm": 0.5403523445129395, "learning_rate": 0.0003581731397363085, "loss": 3.082, "step": 26805 }, { "epoch": 1.31, "grad_norm": 0.5722833275794983, "learning_rate": 0.0003581580357008309, "loss": 3.041, "step": 26806 }, { "epoch": 1.31, "grad_norm": 0.5738394260406494, "learning_rate": 0.00035814293151217625, "loss": 2.9287, "step": 26807 }, { "epoch": 1.31, "grad_norm": 0.6539166569709778, "learning_rate": 0.00035812782717038435, "loss": 3.1007, "step": 26808 }, { "epoch": 1.31, "grad_norm": 0.5289335250854492, "learning_rate": 0.0003581127226754949, "loss": 2.8963, "step": 26809 }, { "epoch": 1.31, "grad_norm": 0.5261499285697937, "learning_rate": 0.0003580976180275477, "loss": 3.2443, "step": 26810 }, { "epoch": 1.31, "grad_norm": 0.5962910652160645, "learning_rate": 0.00035808251322658256, "loss": 2.9977, "step": 26811 }, { "epoch": 1.31, "grad_norm": 0.6628442406654358, "learning_rate": 0.0003580674082726392, "loss": 3.1296, "step": 26812 }, { "epoch": 1.31, "grad_norm": 0.5591052770614624, "learning_rate": 0.00035805230316575756, "loss": 3.0394, "step": 26813 }, { "epoch": 1.31, "grad_norm": 0.5713579058647156, "learning_rate": 0.00035803719790597727, "loss": 3.0677, "step": 26814 }, { "epoch": 1.31, "grad_norm": 0.5537461042404175, "learning_rate": 0.0003580220924933382, "loss": 2.9197, "step": 26815 }, { "epoch": 1.31, "grad_norm": 0.5648530721664429, "learning_rate": 0.0003580069869278801, "loss": 3.0549, "step": 26816 }, { "epoch": 1.31, "grad_norm": 0.595908522605896, "learning_rate": 0.0003579918812096428, "loss": 2.9516, "step": 26817 }, { "epoch": 1.31, "grad_norm": 0.5620924830436707, "learning_rate": 0.00035797677533866597, "loss": 2.9171, "step": 26818 }, { "epoch": 1.31, "grad_norm": 0.5861213803291321, "learning_rate": 0.00035796166931498956, "loss": 2.938, "step": 26819 }, { "epoch": 1.31, "grad_norm": 0.6200598478317261, "learning_rate": 0.0003579465631386532, "loss": 3.1466, "step": 26820 }, { "epoch": 1.31, "grad_norm": 0.5952265858650208, "learning_rate": 0.00035793145680969665, "loss": 3.194, "step": 26821 }, { "epoch": 1.31, "grad_norm": 0.5975573062896729, "learning_rate": 0.00035791635032816, "loss": 3.1285, "step": 26822 }, { "epoch": 1.31, "grad_norm": 0.5594826340675354, "learning_rate": 0.0003579012436940827, "loss": 3.0251, "step": 26823 }, { "epoch": 1.31, "grad_norm": 0.5513445734977722, "learning_rate": 0.00035788613690750467, "loss": 3.175, "step": 26824 }, { "epoch": 1.31, "grad_norm": 0.5082058310508728, "learning_rate": 0.0003578710299684657, "loss": 3.244, "step": 26825 }, { "epoch": 1.31, "grad_norm": 0.560768723487854, "learning_rate": 0.00035785592287700563, "loss": 3.1634, "step": 26826 }, { "epoch": 1.31, "grad_norm": 0.6477553844451904, "learning_rate": 0.00035784081563316415, "loss": 2.7827, "step": 26827 }, { "epoch": 1.31, "grad_norm": 0.5783835053443909, "learning_rate": 0.0003578257082369811, "loss": 3.0997, "step": 26828 }, { "epoch": 1.31, "grad_norm": 0.6416663527488708, "learning_rate": 0.0003578106006884962, "loss": 3.2326, "step": 26829 }, { "epoch": 1.31, "grad_norm": 0.5797775983810425, "learning_rate": 0.00035779549298774947, "loss": 3.1772, "step": 26830 }, { "epoch": 1.31, "grad_norm": 0.5553704500198364, "learning_rate": 0.00035778038513478033, "loss": 2.9769, "step": 26831 }, { "epoch": 1.31, "grad_norm": 0.5943940281867981, "learning_rate": 0.0003577652771296289, "loss": 3.226, "step": 26832 }, { "epoch": 1.32, "grad_norm": 0.5958367586135864, "learning_rate": 0.00035775016897233487, "loss": 3.1038, "step": 26833 }, { "epoch": 1.32, "grad_norm": 0.5723438858985901, "learning_rate": 0.000357735060662938, "loss": 3.147, "step": 26834 }, { "epoch": 1.32, "grad_norm": 0.5667865872383118, "learning_rate": 0.0003577199522014781, "loss": 3.0796, "step": 26835 }, { "epoch": 1.32, "grad_norm": 0.585544228553772, "learning_rate": 0.00035770484358799474, "loss": 3.0049, "step": 26836 }, { "epoch": 1.32, "grad_norm": 0.586850106716156, "learning_rate": 0.00035768973482252814, "loss": 3.0851, "step": 26837 }, { "epoch": 1.32, "grad_norm": 0.6466086506843567, "learning_rate": 0.00035767462590511793, "loss": 2.9241, "step": 26838 }, { "epoch": 1.32, "grad_norm": 0.5506917834281921, "learning_rate": 0.00035765951683580367, "loss": 3.1222, "step": 26839 }, { "epoch": 1.32, "grad_norm": 0.545690655708313, "learning_rate": 0.0003576444076146255, "loss": 3.0191, "step": 26840 }, { "epoch": 1.32, "grad_norm": 0.6054341793060303, "learning_rate": 0.00035762929824162295, "loss": 2.9853, "step": 26841 }, { "epoch": 1.32, "grad_norm": 0.5546567440032959, "learning_rate": 0.00035761418871683596, "loss": 2.9322, "step": 26842 }, { "epoch": 1.32, "grad_norm": 0.5472365617752075, "learning_rate": 0.00035759907904030425, "loss": 3.009, "step": 26843 }, { "epoch": 1.32, "grad_norm": 0.5797613263130188, "learning_rate": 0.0003575839692120677, "loss": 3.1111, "step": 26844 }, { "epoch": 1.32, "grad_norm": 0.5437989234924316, "learning_rate": 0.0003575688592321661, "loss": 3.1502, "step": 26845 }, { "epoch": 1.32, "grad_norm": 0.568385124206543, "learning_rate": 0.00035755374910063914, "loss": 2.8418, "step": 26846 }, { "epoch": 1.32, "grad_norm": 0.5577401518821716, "learning_rate": 0.0003575386388175266, "loss": 2.9497, "step": 26847 }, { "epoch": 1.32, "grad_norm": 0.5725609064102173, "learning_rate": 0.0003575235283828685, "loss": 3.0207, "step": 26848 }, { "epoch": 1.32, "grad_norm": 0.6471617817878723, "learning_rate": 0.00035750841779670444, "loss": 2.9642, "step": 26849 }, { "epoch": 1.32, "grad_norm": 0.5361818075180054, "learning_rate": 0.0003574933070590743, "loss": 2.933, "step": 26850 }, { "epoch": 1.32, "grad_norm": 0.5480607748031616, "learning_rate": 0.00035747819617001783, "loss": 3.1263, "step": 26851 }, { "epoch": 1.32, "grad_norm": 0.5243863463401794, "learning_rate": 0.00035746308512957486, "loss": 3.0354, "step": 26852 }, { "epoch": 1.32, "grad_norm": 0.5858286023139954, "learning_rate": 0.0003574479739377852, "loss": 3.1605, "step": 26853 }, { "epoch": 1.32, "grad_norm": 0.5662160515785217, "learning_rate": 0.0003574328625946885, "loss": 3.2539, "step": 26854 }, { "epoch": 1.32, "grad_norm": 0.5930647253990173, "learning_rate": 0.0003574177511003248, "loss": 2.7858, "step": 26855 }, { "epoch": 1.32, "grad_norm": 0.5709060430526733, "learning_rate": 0.0003574026394547339, "loss": 3.0939, "step": 26856 }, { "epoch": 1.32, "grad_norm": 0.6525852680206299, "learning_rate": 0.00035738752765795533, "loss": 3.0615, "step": 26857 }, { "epoch": 1.32, "grad_norm": 0.5705323815345764, "learning_rate": 0.0003573724157100291, "loss": 2.8539, "step": 26858 }, { "epoch": 1.32, "grad_norm": 0.5645226836204529, "learning_rate": 0.00035735730361099506, "loss": 3.1802, "step": 26859 }, { "epoch": 1.32, "grad_norm": 0.5563384294509888, "learning_rate": 0.00035734219136089287, "loss": 3.1764, "step": 26860 }, { "epoch": 1.32, "grad_norm": 0.5454398989677429, "learning_rate": 0.0003573270789597623, "loss": 3.2813, "step": 26861 }, { "epoch": 1.32, "grad_norm": 0.5893054008483887, "learning_rate": 0.0003573119664076433, "loss": 2.9479, "step": 26862 }, { "epoch": 1.32, "grad_norm": 0.6017407178878784, "learning_rate": 0.00035729685370457557, "loss": 2.981, "step": 26863 }, { "epoch": 1.32, "grad_norm": 0.5848926901817322, "learning_rate": 0.00035728174085059906, "loss": 2.9967, "step": 26864 }, { "epoch": 1.32, "grad_norm": 0.5659344792366028, "learning_rate": 0.0003572666278457533, "loss": 3.0336, "step": 26865 }, { "epoch": 1.32, "grad_norm": 0.5689499378204346, "learning_rate": 0.0003572515146900784, "loss": 3.1079, "step": 26866 }, { "epoch": 1.32, "grad_norm": 0.5527841448783875, "learning_rate": 0.00035723640138361404, "loss": 2.9584, "step": 26867 }, { "epoch": 1.32, "grad_norm": 0.5790805220603943, "learning_rate": 0.0003572212879263999, "loss": 3.1476, "step": 26868 }, { "epoch": 1.32, "grad_norm": 0.5489739775657654, "learning_rate": 0.00035720617431847604, "loss": 3.0897, "step": 26869 }, { "epoch": 1.32, "grad_norm": 0.5729896426200867, "learning_rate": 0.00035719106055988206, "loss": 3.0103, "step": 26870 }, { "epoch": 1.32, "grad_norm": 0.6073732376098633, "learning_rate": 0.0003571759466506578, "loss": 2.9835, "step": 26871 }, { "epoch": 1.32, "grad_norm": 0.63726407289505, "learning_rate": 0.0003571608325908431, "loss": 2.8864, "step": 26872 }, { "epoch": 1.32, "grad_norm": 0.5688294172286987, "learning_rate": 0.0003571457183804779, "loss": 2.7645, "step": 26873 }, { "epoch": 1.32, "grad_norm": 0.5426622033119202, "learning_rate": 0.00035713060401960174, "loss": 3.0762, "step": 26874 }, { "epoch": 1.32, "grad_norm": 0.57330322265625, "learning_rate": 0.0003571154895082546, "loss": 3.0234, "step": 26875 }, { "epoch": 1.32, "grad_norm": 0.5708470940589905, "learning_rate": 0.0003571003748464762, "loss": 2.8983, "step": 26876 }, { "epoch": 1.32, "grad_norm": 0.5946699976921082, "learning_rate": 0.00035708526003430647, "loss": 3.0277, "step": 26877 }, { "epoch": 1.32, "grad_norm": 0.5799338817596436, "learning_rate": 0.0003570701450717851, "loss": 3.0412, "step": 26878 }, { "epoch": 1.32, "grad_norm": 0.558404803276062, "learning_rate": 0.000357055029958952, "loss": 3.0555, "step": 26879 }, { "epoch": 1.32, "grad_norm": 0.7721039056777954, "learning_rate": 0.00035703991469584686, "loss": 3.0843, "step": 26880 }, { "epoch": 1.32, "grad_norm": 0.5947189927101135, "learning_rate": 0.0003570247992825097, "loss": 3.2052, "step": 26881 }, { "epoch": 1.32, "grad_norm": 0.5546451807022095, "learning_rate": 0.00035700968371898007, "loss": 3.1527, "step": 26882 }, { "epoch": 1.32, "grad_norm": 0.5192341208457947, "learning_rate": 0.0003569945680052979, "loss": 3.0922, "step": 26883 }, { "epoch": 1.32, "grad_norm": 0.570080041885376, "learning_rate": 0.00035697945214150306, "loss": 3.1406, "step": 26884 }, { "epoch": 1.32, "grad_norm": 0.5520020723342896, "learning_rate": 0.00035696433612763536, "loss": 3.2305, "step": 26885 }, { "epoch": 1.32, "grad_norm": 0.5862802267074585, "learning_rate": 0.0003569492199637345, "loss": 3.0221, "step": 26886 }, { "epoch": 1.32, "grad_norm": 0.5665404200553894, "learning_rate": 0.0003569341036498404, "loss": 3.0495, "step": 26887 }, { "epoch": 1.32, "grad_norm": 0.5680643320083618, "learning_rate": 0.00035691898718599274, "loss": 3.2575, "step": 26888 }, { "epoch": 1.32, "grad_norm": 0.5598427057266235, "learning_rate": 0.0003569038705722315, "loss": 2.9607, "step": 26889 }, { "epoch": 1.32, "grad_norm": 0.5373253226280212, "learning_rate": 0.00035688875380859636, "loss": 2.9572, "step": 26890 }, { "epoch": 1.32, "grad_norm": 0.5592238306999207, "learning_rate": 0.00035687363689512714, "loss": 2.8467, "step": 26891 }, { "epoch": 1.32, "grad_norm": 0.7021776437759399, "learning_rate": 0.0003568585198318639, "loss": 2.9761, "step": 26892 }, { "epoch": 1.32, "grad_norm": 0.6009718775749207, "learning_rate": 0.00035684340261884606, "loss": 3.3808, "step": 26893 }, { "epoch": 1.32, "grad_norm": 0.6137123107910156, "learning_rate": 0.0003568282852561137, "loss": 3.1077, "step": 26894 }, { "epoch": 1.32, "grad_norm": 0.5503768920898438, "learning_rate": 0.0003568131677437066, "loss": 3.0737, "step": 26895 }, { "epoch": 1.32, "grad_norm": 0.6017237901687622, "learning_rate": 0.0003567980500816646, "loss": 3.045, "step": 26896 }, { "epoch": 1.32, "grad_norm": 0.5975255966186523, "learning_rate": 0.00035678293227002744, "loss": 3.138, "step": 26897 }, { "epoch": 1.32, "grad_norm": 0.5789278745651245, "learning_rate": 0.0003567678143088348, "loss": 2.9976, "step": 26898 }, { "epoch": 1.32, "grad_norm": 0.6184183359146118, "learning_rate": 0.0003567526961981269, "loss": 2.8672, "step": 26899 }, { "epoch": 1.32, "grad_norm": 0.5539001822471619, "learning_rate": 0.0003567375779379432, "loss": 3.2786, "step": 26900 }, { "epoch": 1.32, "grad_norm": 0.5661340951919556, "learning_rate": 0.00035672245952832367, "loss": 3.013, "step": 26901 }, { "epoch": 1.32, "grad_norm": 0.5806612968444824, "learning_rate": 0.000356707340969308, "loss": 2.8103, "step": 26902 }, { "epoch": 1.32, "grad_norm": 0.6162651181221008, "learning_rate": 0.00035669222226093625, "loss": 3.3411, "step": 26903 }, { "epoch": 1.32, "grad_norm": 0.615796685218811, "learning_rate": 0.000356677103403248, "loss": 2.9888, "step": 26904 }, { "epoch": 1.32, "grad_norm": 0.5535275340080261, "learning_rate": 0.0003566619843962832, "loss": 3.151, "step": 26905 }, { "epoch": 1.32, "grad_norm": 0.5614963173866272, "learning_rate": 0.0003566468652400817, "loss": 3.1728, "step": 26906 }, { "epoch": 1.32, "grad_norm": 0.5885602235794067, "learning_rate": 0.0003566317459346832, "loss": 2.9835, "step": 26907 }, { "epoch": 1.32, "grad_norm": 0.5614680647850037, "learning_rate": 0.00035661662648012757, "loss": 3.2124, "step": 26908 }, { "epoch": 1.32, "grad_norm": 0.6076555848121643, "learning_rate": 0.0003566015068764546, "loss": 2.9445, "step": 26909 }, { "epoch": 1.32, "grad_norm": 0.5833714604377747, "learning_rate": 0.0003565863871237043, "loss": 3.1111, "step": 26910 }, { "epoch": 1.32, "grad_norm": 0.5717360973358154, "learning_rate": 0.0003565712672219162, "loss": 2.9425, "step": 26911 }, { "epoch": 1.32, "grad_norm": 0.5546928644180298, "learning_rate": 0.0003565561471711303, "loss": 2.9382, "step": 26912 }, { "epoch": 1.32, "grad_norm": 0.6168055534362793, "learning_rate": 0.0003565410269713865, "loss": 2.9419, "step": 26913 }, { "epoch": 1.32, "grad_norm": 0.6003530025482178, "learning_rate": 0.0003565259066227244, "loss": 3.2024, "step": 26914 }, { "epoch": 1.32, "grad_norm": 0.5346034169197083, "learning_rate": 0.000356510786125184, "loss": 3.1215, "step": 26915 }, { "epoch": 1.32, "grad_norm": 0.5668526291847229, "learning_rate": 0.00035649566547880495, "loss": 3.1087, "step": 26916 }, { "epoch": 1.32, "grad_norm": 0.7049530148506165, "learning_rate": 0.0003564805446836273, "loss": 3.0214, "step": 26917 }, { "epoch": 1.32, "grad_norm": 0.5999130010604858, "learning_rate": 0.0003564654237396907, "loss": 2.9735, "step": 26918 }, { "epoch": 1.32, "grad_norm": 0.5857048034667969, "learning_rate": 0.0003564503026470351, "loss": 2.8567, "step": 26919 }, { "epoch": 1.32, "grad_norm": 0.5745254158973694, "learning_rate": 0.0003564351814057003, "loss": 3.0944, "step": 26920 }, { "epoch": 1.32, "grad_norm": 0.5591810941696167, "learning_rate": 0.0003564200600157261, "loss": 3.124, "step": 26921 }, { "epoch": 1.32, "grad_norm": 0.5647045373916626, "learning_rate": 0.0003564049384771522, "loss": 2.9827, "step": 26922 }, { "epoch": 1.32, "grad_norm": 0.5440300107002258, "learning_rate": 0.00035638981679001863, "loss": 3.0062, "step": 26923 }, { "epoch": 1.32, "grad_norm": 0.5504894852638245, "learning_rate": 0.0003563746949543651, "loss": 3.1177, "step": 26924 }, { "epoch": 1.32, "grad_norm": 0.5864576697349548, "learning_rate": 0.00035635957297023153, "loss": 3.037, "step": 26925 }, { "epoch": 1.32, "grad_norm": 0.5730857849121094, "learning_rate": 0.00035634445083765765, "loss": 2.8243, "step": 26926 }, { "epoch": 1.32, "grad_norm": 0.5582221746444702, "learning_rate": 0.0003563293285566833, "loss": 3.1537, "step": 26927 }, { "epoch": 1.32, "grad_norm": 0.5400102138519287, "learning_rate": 0.00035631420612734836, "loss": 2.955, "step": 26928 }, { "epoch": 1.32, "grad_norm": 0.5986092686653137, "learning_rate": 0.00035629908354969263, "loss": 3.1307, "step": 26929 }, { "epoch": 1.32, "grad_norm": 0.5586465001106262, "learning_rate": 0.000356283960823756, "loss": 3.022, "step": 26930 }, { "epoch": 1.32, "grad_norm": 0.6682041883468628, "learning_rate": 0.0003562688379495782, "loss": 2.9636, "step": 26931 }, { "epoch": 1.32, "grad_norm": 0.5432518124580383, "learning_rate": 0.0003562537149271992, "loss": 3.2366, "step": 26932 }, { "epoch": 1.32, "grad_norm": 0.6639944314956665, "learning_rate": 0.0003562385917566587, "loss": 3.053, "step": 26933 }, { "epoch": 1.32, "grad_norm": 0.5308532118797302, "learning_rate": 0.0003562234684379965, "loss": 3.0926, "step": 26934 }, { "epoch": 1.32, "grad_norm": 0.5658736824989319, "learning_rate": 0.0003562083449712525, "loss": 3.069, "step": 26935 }, { "epoch": 1.32, "grad_norm": 0.5990812182426453, "learning_rate": 0.0003561932213564666, "loss": 3.0671, "step": 26936 }, { "epoch": 1.32, "grad_norm": 0.5363149046897888, "learning_rate": 0.00035617809759367854, "loss": 2.8551, "step": 26937 }, { "epoch": 1.32, "grad_norm": 0.5616311430931091, "learning_rate": 0.0003561629736829282, "loss": 3.2784, "step": 26938 }, { "epoch": 1.32, "grad_norm": 0.5642018914222717, "learning_rate": 0.00035614784962425546, "loss": 3.0111, "step": 26939 }, { "epoch": 1.32, "grad_norm": 0.558562695980072, "learning_rate": 0.0003561327254177, "loss": 3.0165, "step": 26940 }, { "epoch": 1.32, "grad_norm": 0.5792576670646667, "learning_rate": 0.00035611760106330176, "loss": 3.0326, "step": 26941 }, { "epoch": 1.32, "grad_norm": 0.5646781921386719, "learning_rate": 0.0003561024765611005, "loss": 3.0448, "step": 26942 }, { "epoch": 1.32, "grad_norm": 0.5456598997116089, "learning_rate": 0.00035608735191113624, "loss": 2.9734, "step": 26943 }, { "epoch": 1.32, "grad_norm": 0.601516842842102, "learning_rate": 0.0003560722271134486, "loss": 3.1814, "step": 26944 }, { "epoch": 1.32, "grad_norm": 0.5825067758560181, "learning_rate": 0.0003560571021680775, "loss": 2.9143, "step": 26945 }, { "epoch": 1.32, "grad_norm": 0.5741273164749146, "learning_rate": 0.00035604197707506287, "loss": 2.8824, "step": 26946 }, { "epoch": 1.32, "grad_norm": 0.6200869083404541, "learning_rate": 0.0003560268518344444, "loss": 3.2152, "step": 26947 }, { "epoch": 1.32, "grad_norm": 0.6041608452796936, "learning_rate": 0.000356011726446262, "loss": 3.0224, "step": 26948 }, { "epoch": 1.32, "grad_norm": 0.5746281147003174, "learning_rate": 0.00035599660091055544, "loss": 3.0115, "step": 26949 }, { "epoch": 1.32, "grad_norm": 0.5583726763725281, "learning_rate": 0.0003559814752273647, "loss": 3.2274, "step": 26950 }, { "epoch": 1.32, "grad_norm": 0.5739026665687561, "learning_rate": 0.00035596634939672947, "loss": 3.0633, "step": 26951 }, { "epoch": 1.32, "grad_norm": 0.551236629486084, "learning_rate": 0.0003559512234186896, "loss": 3.0881, "step": 26952 }, { "epoch": 1.32, "grad_norm": 0.5410869717597961, "learning_rate": 0.0003559360972932851, "loss": 3.0438, "step": 26953 }, { "epoch": 1.32, "grad_norm": 0.6108107566833496, "learning_rate": 0.00035592097102055554, "loss": 3.1604, "step": 26954 }, { "epoch": 1.32, "grad_norm": 0.5781041979789734, "learning_rate": 0.00035590584460054104, "loss": 3.1562, "step": 26955 }, { "epoch": 1.32, "grad_norm": 0.5774297118186951, "learning_rate": 0.00035589071803328125, "loss": 2.885, "step": 26956 }, { "epoch": 1.32, "grad_norm": 0.5566204786300659, "learning_rate": 0.00035587559131881603, "loss": 3.1957, "step": 26957 }, { "epoch": 1.32, "grad_norm": 0.5503354668617249, "learning_rate": 0.00035586046445718533, "loss": 3.0237, "step": 26958 }, { "epoch": 1.32, "grad_norm": 0.5675086975097656, "learning_rate": 0.00035584533744842877, "loss": 3.0587, "step": 26959 }, { "epoch": 1.32, "grad_norm": 0.5769832730293274, "learning_rate": 0.00035583021029258647, "loss": 3.1677, "step": 26960 }, { "epoch": 1.32, "grad_norm": 0.614290714263916, "learning_rate": 0.00035581508298969817, "loss": 2.9065, "step": 26961 }, { "epoch": 1.32, "grad_norm": 0.5537709593772888, "learning_rate": 0.00035579995553980365, "loss": 3.1548, "step": 26962 }, { "epoch": 1.32, "grad_norm": 0.6154881119728088, "learning_rate": 0.0003557848279429427, "loss": 3.3156, "step": 26963 }, { "epoch": 1.32, "grad_norm": 0.5546302795410156, "learning_rate": 0.00035576970019915536, "loss": 2.939, "step": 26964 }, { "epoch": 1.32, "grad_norm": 0.6049430966377258, "learning_rate": 0.00035575457230848127, "loss": 2.7861, "step": 26965 }, { "epoch": 1.32, "grad_norm": 0.5868650078773499, "learning_rate": 0.00035573944427096045, "loss": 3.0257, "step": 26966 }, { "epoch": 1.32, "grad_norm": 0.5958408713340759, "learning_rate": 0.0003557243160866326, "loss": 2.7296, "step": 26967 }, { "epoch": 1.32, "grad_norm": 0.5856796503067017, "learning_rate": 0.00035570918775553763, "loss": 3.0672, "step": 26968 }, { "epoch": 1.32, "grad_norm": 0.5468848347663879, "learning_rate": 0.0003556940592777155, "loss": 2.9349, "step": 26969 }, { "epoch": 1.32, "grad_norm": 0.5882099270820618, "learning_rate": 0.0003556789306532057, "loss": 3.1575, "step": 26970 }, { "epoch": 1.32, "grad_norm": 0.5558639168739319, "learning_rate": 0.0003556638018820485, "loss": 2.948, "step": 26971 }, { "epoch": 1.32, "grad_norm": 0.582410454750061, "learning_rate": 0.0003556486729642835, "loss": 3.0795, "step": 26972 }, { "epoch": 1.32, "grad_norm": 0.5897334218025208, "learning_rate": 0.0003556335438999506, "loss": 2.867, "step": 26973 }, { "epoch": 1.32, "grad_norm": 0.5801978707313538, "learning_rate": 0.0003556184146890897, "loss": 3.112, "step": 26974 }, { "epoch": 1.32, "grad_norm": 0.5620837807655334, "learning_rate": 0.00035560328533174057, "loss": 3.1498, "step": 26975 }, { "epoch": 1.32, "grad_norm": 0.5551138520240784, "learning_rate": 0.00035558815582794305, "loss": 3.1267, "step": 26976 }, { "epoch": 1.32, "grad_norm": 0.5612112283706665, "learning_rate": 0.00035557302617773704, "loss": 3.1203, "step": 26977 }, { "epoch": 1.32, "grad_norm": 0.6193846464157104, "learning_rate": 0.0003555578963811624, "loss": 3.0384, "step": 26978 }, { "epoch": 1.32, "grad_norm": 0.6214466691017151, "learning_rate": 0.0003555427664382589, "loss": 3.0276, "step": 26979 }, { "epoch": 1.32, "grad_norm": 0.5702659487724304, "learning_rate": 0.00035552763634906655, "loss": 3.2472, "step": 26980 }, { "epoch": 1.32, "grad_norm": 0.5576087832450867, "learning_rate": 0.000355512506113625, "loss": 2.9014, "step": 26981 }, { "epoch": 1.32, "grad_norm": 0.578071653842926, "learning_rate": 0.00035549737573197415, "loss": 3.1932, "step": 26982 }, { "epoch": 1.32, "grad_norm": 0.5625930428504944, "learning_rate": 0.00035548224520415403, "loss": 3.0508, "step": 26983 }, { "epoch": 1.32, "grad_norm": 0.6400315761566162, "learning_rate": 0.0003554671145302044, "loss": 3.0855, "step": 26984 }, { "epoch": 1.32, "grad_norm": 0.5485404133796692, "learning_rate": 0.00035545198371016477, "loss": 3.1335, "step": 26985 }, { "epoch": 1.32, "grad_norm": 0.5850667357444763, "learning_rate": 0.0003554368527440756, "loss": 2.8681, "step": 26986 }, { "epoch": 1.32, "grad_norm": 0.5740143656730652, "learning_rate": 0.00035542172163197626, "loss": 3.0152, "step": 26987 }, { "epoch": 1.32, "grad_norm": 0.5631483793258667, "learning_rate": 0.0003554065903739068, "loss": 3.1364, "step": 26988 }, { "epoch": 1.32, "grad_norm": 0.604009211063385, "learning_rate": 0.0003553914589699071, "loss": 3.162, "step": 26989 }, { "epoch": 1.32, "grad_norm": 0.5691906213760376, "learning_rate": 0.00035537632742001685, "loss": 3.1606, "step": 26990 }, { "epoch": 1.32, "grad_norm": 0.629859983921051, "learning_rate": 0.00035536119572427603, "loss": 2.9259, "step": 26991 }, { "epoch": 1.32, "grad_norm": 0.5835464000701904, "learning_rate": 0.00035534606388272456, "loss": 3.2523, "step": 26992 }, { "epoch": 1.32, "grad_norm": 0.5969851613044739, "learning_rate": 0.00035533093189540216, "loss": 3.1687, "step": 26993 }, { "epoch": 1.32, "grad_norm": 0.541480302810669, "learning_rate": 0.0003553157997623488, "loss": 2.9884, "step": 26994 }, { "epoch": 1.32, "grad_norm": 0.5837870240211487, "learning_rate": 0.0003553006674836041, "loss": 2.7295, "step": 26995 }, { "epoch": 1.32, "grad_norm": 0.609303891658783, "learning_rate": 0.00035528553505920825, "loss": 3.0732, "step": 26996 }, { "epoch": 1.32, "grad_norm": 0.5507553815841675, "learning_rate": 0.00035527040248920085, "loss": 3.0481, "step": 26997 }, { "epoch": 1.32, "grad_norm": 0.5484434366226196, "learning_rate": 0.00035525526977362197, "loss": 3.0371, "step": 26998 }, { "epoch": 1.32, "grad_norm": 0.5981265902519226, "learning_rate": 0.0003552401369125112, "loss": 3.0613, "step": 26999 }, { "epoch": 1.32, "grad_norm": 0.5397427678108215, "learning_rate": 0.00035522500390590856, "loss": 3.1179, "step": 27000 }, { "epoch": 1.32, "grad_norm": 0.5383358001708984, "learning_rate": 0.00035520987075385394, "loss": 3.0821, "step": 27001 }, { "epoch": 1.32, "grad_norm": 0.564812183380127, "learning_rate": 0.0003551947374563871, "loss": 2.9963, "step": 27002 }, { "epoch": 1.32, "grad_norm": 0.5476768016815186, "learning_rate": 0.00035517960401354806, "loss": 3.0847, "step": 27003 }, { "epoch": 1.32, "grad_norm": 0.5901896357536316, "learning_rate": 0.00035516447042537643, "loss": 3.2513, "step": 27004 }, { "epoch": 1.32, "grad_norm": 0.5683808326721191, "learning_rate": 0.0003551493366919123, "loss": 3.2693, "step": 27005 }, { "epoch": 1.32, "grad_norm": 0.563849151134491, "learning_rate": 0.00035513420281319533, "loss": 3.1826, "step": 27006 }, { "epoch": 1.32, "grad_norm": 0.5702254772186279, "learning_rate": 0.0003551190687892655, "loss": 3.1978, "step": 27007 }, { "epoch": 1.32, "grad_norm": 0.5525578856468201, "learning_rate": 0.00035510393462016273, "loss": 3.1693, "step": 27008 }, { "epoch": 1.32, "grad_norm": 0.5483326315879822, "learning_rate": 0.0003550888003059268, "loss": 3.0161, "step": 27009 }, { "epoch": 1.32, "grad_norm": 0.6483874320983887, "learning_rate": 0.0003550736658465975, "loss": 3.1485, "step": 27010 }, { "epoch": 1.32, "grad_norm": 0.5594171285629272, "learning_rate": 0.0003550585312422147, "loss": 2.9653, "step": 27011 }, { "epoch": 1.32, "grad_norm": 0.5625665187835693, "learning_rate": 0.0003550433964928185, "loss": 3.1067, "step": 27012 }, { "epoch": 1.32, "grad_norm": 0.5742067694664001, "learning_rate": 0.0003550282615984485, "loss": 3.0806, "step": 27013 }, { "epoch": 1.32, "grad_norm": 0.6136051416397095, "learning_rate": 0.0003550131265591446, "loss": 3.2865, "step": 27014 }, { "epoch": 1.32, "grad_norm": 0.7125194072723389, "learning_rate": 0.0003549979913749468, "loss": 3.0679, "step": 27015 }, { "epoch": 1.32, "grad_norm": 0.5937182307243347, "learning_rate": 0.0003549828560458948, "loss": 2.9668, "step": 27016 }, { "epoch": 1.32, "grad_norm": 0.5569949150085449, "learning_rate": 0.0003549677205720286, "loss": 3.0069, "step": 27017 }, { "epoch": 1.32, "grad_norm": 0.5426252484321594, "learning_rate": 0.000354952584953388, "loss": 3.1013, "step": 27018 }, { "epoch": 1.32, "grad_norm": 0.5773874521255493, "learning_rate": 0.0003549374491900128, "loss": 2.9514, "step": 27019 }, { "epoch": 1.32, "grad_norm": 0.5283383131027222, "learning_rate": 0.00035492231328194306, "loss": 2.845, "step": 27020 }, { "epoch": 1.32, "grad_norm": 0.5627959966659546, "learning_rate": 0.00035490717722921834, "loss": 3.0731, "step": 27021 }, { "epoch": 1.32, "grad_norm": 0.584900975227356, "learning_rate": 0.0003548920410318788, "loss": 2.9836, "step": 27022 }, { "epoch": 1.32, "grad_norm": 0.5437769889831543, "learning_rate": 0.0003548769046899642, "loss": 3.0244, "step": 27023 }, { "epoch": 1.32, "grad_norm": 0.5971786975860596, "learning_rate": 0.00035486176820351435, "loss": 3.2019, "step": 27024 }, { "epoch": 1.32, "grad_norm": 0.5955939888954163, "learning_rate": 0.0003548466315725691, "loss": 2.9108, "step": 27025 }, { "epoch": 1.32, "grad_norm": 0.5769616961479187, "learning_rate": 0.0003548314947971685, "loss": 2.9552, "step": 27026 }, { "epoch": 1.32, "grad_norm": 0.5936218500137329, "learning_rate": 0.0003548163578773522, "loss": 3.0934, "step": 27027 }, { "epoch": 1.32, "grad_norm": 0.5732731819152832, "learning_rate": 0.0003548012208131602, "loss": 3.1205, "step": 27028 }, { "epoch": 1.32, "grad_norm": 0.5782310962677002, "learning_rate": 0.00035478608360463233, "loss": 2.8778, "step": 27029 }, { "epoch": 1.32, "grad_norm": 0.5631444454193115, "learning_rate": 0.0003547709462518085, "loss": 2.9783, "step": 27030 }, { "epoch": 1.32, "grad_norm": 0.5475609302520752, "learning_rate": 0.0003547558087547285, "loss": 2.9648, "step": 27031 }, { "epoch": 1.32, "grad_norm": 0.5806057453155518, "learning_rate": 0.0003547406711134322, "loss": 2.8895, "step": 27032 }, { "epoch": 1.32, "grad_norm": 0.5406964421272278, "learning_rate": 0.00035472553332795956, "loss": 3.2604, "step": 27033 }, { "epoch": 1.32, "grad_norm": 0.5376538634300232, "learning_rate": 0.00035471039539835045, "loss": 3.0621, "step": 27034 }, { "epoch": 1.32, "grad_norm": 0.5387920141220093, "learning_rate": 0.00035469525732464464, "loss": 3.0724, "step": 27035 }, { "epoch": 1.32, "grad_norm": 0.6255925297737122, "learning_rate": 0.0003546801191068819, "loss": 3.0135, "step": 27036 }, { "epoch": 1.33, "grad_norm": 0.5752521753311157, "learning_rate": 0.00035466498074510246, "loss": 3.1237, "step": 27037 }, { "epoch": 1.33, "grad_norm": 0.5635229349136353, "learning_rate": 0.0003546498422393459, "loss": 3.0906, "step": 27038 }, { "epoch": 1.33, "grad_norm": 0.5960707664489746, "learning_rate": 0.0003546347035896522, "loss": 2.9693, "step": 27039 }, { "epoch": 1.33, "grad_norm": 0.5936806201934814, "learning_rate": 0.00035461956479606113, "loss": 3.1782, "step": 27040 }, { "epoch": 1.33, "grad_norm": 0.5770202875137329, "learning_rate": 0.0003546044258586127, "loss": 3.1503, "step": 27041 }, { "epoch": 1.33, "grad_norm": 0.607388973236084, "learning_rate": 0.0003545892867773467, "loss": 3.1449, "step": 27042 }, { "epoch": 1.33, "grad_norm": 0.5307580232620239, "learning_rate": 0.00035457414755230303, "loss": 3.0214, "step": 27043 }, { "epoch": 1.33, "grad_norm": 0.5784158110618591, "learning_rate": 0.0003545590081835215, "loss": 3.0766, "step": 27044 }, { "epoch": 1.33, "grad_norm": 0.5791460275650024, "learning_rate": 0.0003545438686710422, "loss": 2.999, "step": 27045 }, { "epoch": 1.33, "grad_norm": 0.5575026869773865, "learning_rate": 0.00035452872901490476, "loss": 3.0514, "step": 27046 }, { "epoch": 1.33, "grad_norm": 0.54404217004776, "learning_rate": 0.0003545135892151491, "loss": 2.9649, "step": 27047 }, { "epoch": 1.33, "grad_norm": 0.5947194695472717, "learning_rate": 0.00035449844927181525, "loss": 3.0767, "step": 27048 }, { "epoch": 1.33, "grad_norm": 0.5407034158706665, "learning_rate": 0.00035448330918494284, "loss": 2.9749, "step": 27049 }, { "epoch": 1.33, "grad_norm": 0.5737541317939758, "learning_rate": 0.00035446816895457194, "loss": 3.2021, "step": 27050 }, { "epoch": 1.33, "grad_norm": 0.5305442214012146, "learning_rate": 0.00035445302858074234, "loss": 3.2367, "step": 27051 }, { "epoch": 1.33, "grad_norm": 0.5993185639381409, "learning_rate": 0.000354437888063494, "loss": 3.2942, "step": 27052 }, { "epoch": 1.33, "grad_norm": 0.5892103314399719, "learning_rate": 0.00035442274740286666, "loss": 3.1939, "step": 27053 }, { "epoch": 1.33, "grad_norm": 0.6093845963478088, "learning_rate": 0.00035440760659890034, "loss": 2.9334, "step": 27054 }, { "epoch": 1.33, "grad_norm": 0.5915141701698303, "learning_rate": 0.00035439246565163485, "loss": 3.1818, "step": 27055 }, { "epoch": 1.33, "grad_norm": 0.5777159333229065, "learning_rate": 0.0003543773245611101, "loss": 3.1014, "step": 27056 }, { "epoch": 1.33, "grad_norm": 0.6174264550209045, "learning_rate": 0.0003543621833273658, "loss": 3.036, "step": 27057 }, { "epoch": 1.33, "grad_norm": 0.584246814250946, "learning_rate": 0.00035434704195044206, "loss": 3.0767, "step": 27058 }, { "epoch": 1.33, "grad_norm": 0.5757298469543457, "learning_rate": 0.0003543319004303787, "loss": 3.1766, "step": 27059 }, { "epoch": 1.33, "grad_norm": 0.5471251606941223, "learning_rate": 0.0003543167587672156, "loss": 3.087, "step": 27060 }, { "epoch": 1.33, "grad_norm": 0.6035965085029602, "learning_rate": 0.00035430161696099256, "loss": 3.0348, "step": 27061 }, { "epoch": 1.33, "grad_norm": 0.5603316426277161, "learning_rate": 0.00035428647501174943, "loss": 3.1404, "step": 27062 }, { "epoch": 1.33, "grad_norm": 0.6384663581848145, "learning_rate": 0.00035427133291952633, "loss": 2.9896, "step": 27063 }, { "epoch": 1.33, "grad_norm": 0.5779756903648376, "learning_rate": 0.00035425619068436294, "loss": 3.0965, "step": 27064 }, { "epoch": 1.33, "grad_norm": 0.5718583464622498, "learning_rate": 0.00035424104830629916, "loss": 3.1002, "step": 27065 }, { "epoch": 1.33, "grad_norm": 0.5677472352981567, "learning_rate": 0.0003542259057853749, "loss": 3.1221, "step": 27066 }, { "epoch": 1.33, "grad_norm": 0.575522243976593, "learning_rate": 0.00035421076312163, "loss": 3.0776, "step": 27067 }, { "epoch": 1.33, "grad_norm": 0.5618709325790405, "learning_rate": 0.0003541956203151044, "loss": 2.8956, "step": 27068 }, { "epoch": 1.33, "grad_norm": 0.5451009273529053, "learning_rate": 0.000354180477365838, "loss": 3.0641, "step": 27069 }, { "epoch": 1.33, "grad_norm": 0.618395984172821, "learning_rate": 0.0003541653342738706, "loss": 2.8972, "step": 27070 }, { "epoch": 1.33, "grad_norm": 0.5467285513877869, "learning_rate": 0.0003541501910392422, "loss": 3.0502, "step": 27071 }, { "epoch": 1.33, "grad_norm": 0.6058321595191956, "learning_rate": 0.00035413504766199257, "loss": 2.9549, "step": 27072 }, { "epoch": 1.33, "grad_norm": 0.5389471054077148, "learning_rate": 0.00035411990414216165, "loss": 3.2793, "step": 27073 }, { "epoch": 1.33, "grad_norm": 0.6167011857032776, "learning_rate": 0.0003541047604797894, "loss": 3.0513, "step": 27074 }, { "epoch": 1.33, "grad_norm": 0.6449909210205078, "learning_rate": 0.00035408961667491554, "loss": 2.965, "step": 27075 }, { "epoch": 1.33, "grad_norm": 0.5643405914306641, "learning_rate": 0.0003540744727275801, "loss": 2.8066, "step": 27076 }, { "epoch": 1.33, "grad_norm": 0.5791833400726318, "learning_rate": 0.0003540593286378228, "loss": 3.0858, "step": 27077 }, { "epoch": 1.33, "grad_norm": 0.5517277717590332, "learning_rate": 0.0003540441844056837, "loss": 3.2508, "step": 27078 }, { "epoch": 1.33, "grad_norm": 0.5787827372550964, "learning_rate": 0.00035402904003120254, "loss": 3.0538, "step": 27079 }, { "epoch": 1.33, "grad_norm": 0.5583292841911316, "learning_rate": 0.0003540138955144194, "loss": 3.1336, "step": 27080 }, { "epoch": 1.33, "grad_norm": 0.5447515249252319, "learning_rate": 0.00035399875085537403, "loss": 2.9675, "step": 27081 }, { "epoch": 1.33, "grad_norm": 0.5473519563674927, "learning_rate": 0.00035398360605410636, "loss": 2.6685, "step": 27082 }, { "epoch": 1.33, "grad_norm": 0.6127763986587524, "learning_rate": 0.00035396846111065614, "loss": 2.9615, "step": 27083 }, { "epoch": 1.33, "grad_norm": 0.5656737685203552, "learning_rate": 0.00035395331602506344, "loss": 3.0083, "step": 27084 }, { "epoch": 1.33, "grad_norm": 0.6185324192047119, "learning_rate": 0.0003539381707973682, "loss": 3.1667, "step": 27085 }, { "epoch": 1.33, "grad_norm": 0.5483812689781189, "learning_rate": 0.00035392302542761007, "loss": 3.0815, "step": 27086 }, { "epoch": 1.33, "grad_norm": 0.5831971764564514, "learning_rate": 0.0003539078799158291, "loss": 3.103, "step": 27087 }, { "epoch": 1.33, "grad_norm": 0.5833414196968079, "learning_rate": 0.0003538927342620651, "loss": 2.9695, "step": 27088 }, { "epoch": 1.33, "grad_norm": 0.543148934841156, "learning_rate": 0.0003538775884663581, "loss": 3.0304, "step": 27089 }, { "epoch": 1.33, "grad_norm": 0.6388452649116516, "learning_rate": 0.0003538624425287479, "loss": 3.0587, "step": 27090 }, { "epoch": 1.33, "grad_norm": 0.601694643497467, "learning_rate": 0.0003538472964492743, "loss": 2.9898, "step": 27091 }, { "epoch": 1.33, "grad_norm": 0.6530476212501526, "learning_rate": 0.00035383215022797735, "loss": 3.0919, "step": 27092 }, { "epoch": 1.33, "grad_norm": 0.5525394678115845, "learning_rate": 0.0003538170038648969, "loss": 3.1727, "step": 27093 }, { "epoch": 1.33, "grad_norm": 0.562701404094696, "learning_rate": 0.0003538018573600728, "loss": 3.1354, "step": 27094 }, { "epoch": 1.33, "grad_norm": 0.6188597679138184, "learning_rate": 0.0003537867107135449, "loss": 3.1935, "step": 27095 }, { "epoch": 1.33, "grad_norm": 0.5348824858665466, "learning_rate": 0.0003537715639253533, "loss": 2.9653, "step": 27096 }, { "epoch": 1.33, "grad_norm": 0.5821238160133362, "learning_rate": 0.00035375641699553765, "loss": 2.973, "step": 27097 }, { "epoch": 1.33, "grad_norm": 0.5549263954162598, "learning_rate": 0.0003537412699241379, "loss": 3.0837, "step": 27098 }, { "epoch": 1.33, "grad_norm": 0.5607396960258484, "learning_rate": 0.0003537261227111941, "loss": 3.1077, "step": 27099 }, { "epoch": 1.33, "grad_norm": 0.5585238337516785, "learning_rate": 0.00035371097535674597, "loss": 2.7311, "step": 27100 }, { "epoch": 1.33, "grad_norm": 0.5680395364761353, "learning_rate": 0.00035369582786083344, "loss": 2.9498, "step": 27101 }, { "epoch": 1.33, "grad_norm": 0.582237184047699, "learning_rate": 0.00035368068022349644, "loss": 2.9487, "step": 27102 }, { "epoch": 1.33, "grad_norm": 0.6273513436317444, "learning_rate": 0.00035366553244477487, "loss": 3.2227, "step": 27103 }, { "epoch": 1.33, "grad_norm": 0.5687019228935242, "learning_rate": 0.0003536503845247087, "loss": 3.0682, "step": 27104 }, { "epoch": 1.33, "grad_norm": 0.5843102931976318, "learning_rate": 0.0003536352364633376, "loss": 3.2752, "step": 27105 }, { "epoch": 1.33, "grad_norm": 0.5721175670623779, "learning_rate": 0.00035362008826070166, "loss": 3.2501, "step": 27106 }, { "epoch": 1.33, "grad_norm": 0.5818900465965271, "learning_rate": 0.00035360493991684085, "loss": 3.0691, "step": 27107 }, { "epoch": 1.33, "grad_norm": 0.5466283559799194, "learning_rate": 0.00035358979143179474, "loss": 2.9073, "step": 27108 }, { "epoch": 1.33, "grad_norm": 0.5692821741104126, "learning_rate": 0.00035357464280560356, "loss": 2.9878, "step": 27109 }, { "epoch": 1.33, "grad_norm": 0.6057048439979553, "learning_rate": 0.000353559494038307, "loss": 3.1523, "step": 27110 }, { "epoch": 1.33, "grad_norm": 0.7662555575370789, "learning_rate": 0.0003535443451299452, "loss": 3.0899, "step": 27111 }, { "epoch": 1.33, "grad_norm": 0.6070552468299866, "learning_rate": 0.00035352919608055777, "loss": 3.0291, "step": 27112 }, { "epoch": 1.33, "grad_norm": 0.5470950603485107, "learning_rate": 0.0003535140468901847, "loss": 2.9255, "step": 27113 }, { "epoch": 1.33, "grad_norm": 0.5878978967666626, "learning_rate": 0.00035349889755886604, "loss": 2.828, "step": 27114 }, { "epoch": 1.33, "grad_norm": 0.5929617881774902, "learning_rate": 0.00035348374808664155, "loss": 2.9915, "step": 27115 }, { "epoch": 1.33, "grad_norm": 0.5788769125938416, "learning_rate": 0.0003534685984735511, "loss": 3.2226, "step": 27116 }, { "epoch": 1.33, "grad_norm": 0.5542086362838745, "learning_rate": 0.0003534534487196347, "loss": 3.269, "step": 27117 }, { "epoch": 1.33, "grad_norm": 0.5548458099365234, "learning_rate": 0.0003534382988249323, "loss": 3.044, "step": 27118 }, { "epoch": 1.33, "grad_norm": 0.5794603228569031, "learning_rate": 0.0003534231487894835, "loss": 3.151, "step": 27119 }, { "epoch": 1.33, "grad_norm": 0.6125290989875793, "learning_rate": 0.0003534079986133285, "loss": 3.0768, "step": 27120 }, { "epoch": 1.33, "grad_norm": 0.5400205254554749, "learning_rate": 0.00035339284829650716, "loss": 3.0074, "step": 27121 }, { "epoch": 1.33, "grad_norm": 0.5447748303413391, "learning_rate": 0.0003533776978390593, "loss": 3.031, "step": 27122 }, { "epoch": 1.33, "grad_norm": 0.5757708549499512, "learning_rate": 0.00035336254724102487, "loss": 3.0801, "step": 27123 }, { "epoch": 1.33, "grad_norm": 0.5604739189147949, "learning_rate": 0.0003533473965024437, "loss": 3.0351, "step": 27124 }, { "epoch": 1.33, "grad_norm": 0.5703689455986023, "learning_rate": 0.00035333224562335584, "loss": 3.2484, "step": 27125 }, { "epoch": 1.33, "grad_norm": 0.5888387560844421, "learning_rate": 0.00035331709460380105, "loss": 2.9558, "step": 27126 }, { "epoch": 1.33, "grad_norm": 0.5701423287391663, "learning_rate": 0.0003533019434438193, "loss": 3.1372, "step": 27127 }, { "epoch": 1.33, "grad_norm": 0.5726101398468018, "learning_rate": 0.0003532867921434505, "loss": 2.931, "step": 27128 }, { "epoch": 1.33, "grad_norm": 0.5894249677658081, "learning_rate": 0.00035327164070273457, "loss": 3.1105, "step": 27129 }, { "epoch": 1.33, "grad_norm": 0.5488255620002747, "learning_rate": 0.0003532564891217113, "loss": 3.0723, "step": 27130 }, { "epoch": 1.33, "grad_norm": 0.5957520604133606, "learning_rate": 0.00035324133740042077, "loss": 3.1297, "step": 27131 }, { "epoch": 1.33, "grad_norm": 0.5768669843673706, "learning_rate": 0.00035322618553890276, "loss": 2.9614, "step": 27132 }, { "epoch": 1.33, "grad_norm": 0.5305699110031128, "learning_rate": 0.00035321103353719736, "loss": 2.8638, "step": 27133 }, { "epoch": 1.33, "grad_norm": 0.5721878409385681, "learning_rate": 0.00035319588139534413, "loss": 3.203, "step": 27134 }, { "epoch": 1.33, "grad_norm": 0.5690099596977234, "learning_rate": 0.0003531807291133833, "loss": 3.0033, "step": 27135 }, { "epoch": 1.33, "grad_norm": 0.6488524079322815, "learning_rate": 0.0003531655766913547, "loss": 3.1853, "step": 27136 }, { "epoch": 1.33, "grad_norm": 0.6288285255432129, "learning_rate": 0.00035315042412929813, "loss": 3.1391, "step": 27137 }, { "epoch": 1.33, "grad_norm": 0.586223840713501, "learning_rate": 0.00035313527142725364, "loss": 2.9698, "step": 27138 }, { "epoch": 1.33, "grad_norm": 0.5799707174301147, "learning_rate": 0.000353120118585261, "loss": 3.1249, "step": 27139 }, { "epoch": 1.33, "grad_norm": 0.555907666683197, "learning_rate": 0.0003531049656033602, "loss": 3.1875, "step": 27140 }, { "epoch": 1.33, "grad_norm": 0.5725418329238892, "learning_rate": 0.00035308981248159113, "loss": 3.1464, "step": 27141 }, { "epoch": 1.33, "grad_norm": 0.542593240737915, "learning_rate": 0.00035307465921999374, "loss": 3.1674, "step": 27142 }, { "epoch": 1.33, "grad_norm": 0.5610252022743225, "learning_rate": 0.0003530595058186079, "loss": 3.0766, "step": 27143 }, { "epoch": 1.33, "grad_norm": 0.6028214693069458, "learning_rate": 0.0003530443522774735, "loss": 3.3676, "step": 27144 }, { "epoch": 1.33, "grad_norm": 0.5492804646492004, "learning_rate": 0.0003530291985966305, "loss": 3.0786, "step": 27145 }, { "epoch": 1.33, "grad_norm": 0.5824100375175476, "learning_rate": 0.0003530140447761188, "loss": 3.1429, "step": 27146 }, { "epoch": 1.33, "grad_norm": 0.5860824584960938, "learning_rate": 0.0003529988908159784, "loss": 3.1449, "step": 27147 }, { "epoch": 1.33, "grad_norm": 0.6098974347114563, "learning_rate": 0.0003529837367162491, "loss": 3.0493, "step": 27148 }, { "epoch": 1.33, "grad_norm": 0.5788512825965881, "learning_rate": 0.0003529685824769707, "loss": 2.9447, "step": 27149 }, { "epoch": 1.33, "grad_norm": 0.5958915948867798, "learning_rate": 0.0003529534280981834, "loss": 3.1485, "step": 27150 }, { "epoch": 1.33, "grad_norm": 0.552696704864502, "learning_rate": 0.0003529382735799269, "loss": 3.0101, "step": 27151 }, { "epoch": 1.33, "grad_norm": 0.571831226348877, "learning_rate": 0.00035292311892224114, "loss": 2.9421, "step": 27152 }, { "epoch": 1.33, "grad_norm": 0.5760900378227234, "learning_rate": 0.0003529079641251661, "loss": 3.0002, "step": 27153 }, { "epoch": 1.33, "grad_norm": 0.6153770089149475, "learning_rate": 0.0003528928091887417, "loss": 2.9689, "step": 27154 }, { "epoch": 1.33, "grad_norm": 0.5628465414047241, "learning_rate": 0.00035287765411300777, "loss": 2.8312, "step": 27155 }, { "epoch": 1.33, "grad_norm": 0.5893851518630981, "learning_rate": 0.00035286249889800425, "loss": 2.9367, "step": 27156 }, { "epoch": 1.33, "grad_norm": 0.5659940242767334, "learning_rate": 0.00035284734354377117, "loss": 3.078, "step": 27157 }, { "epoch": 1.33, "grad_norm": 0.5788155198097229, "learning_rate": 0.0003528321880503483, "loss": 2.9413, "step": 27158 }, { "epoch": 1.33, "grad_norm": 0.6268181204795837, "learning_rate": 0.00035281703241777566, "loss": 3.0774, "step": 27159 }, { "epoch": 1.33, "grad_norm": 0.5367437601089478, "learning_rate": 0.000352801876646093, "loss": 3.0378, "step": 27160 }, { "epoch": 1.33, "grad_norm": 0.5380226969718933, "learning_rate": 0.00035278672073534045, "loss": 2.9433, "step": 27161 }, { "epoch": 1.33, "grad_norm": 0.5570831894874573, "learning_rate": 0.00035277156468555785, "loss": 3.0755, "step": 27162 }, { "epoch": 1.33, "grad_norm": 0.5446562767028809, "learning_rate": 0.00035275640849678514, "loss": 2.9051, "step": 27163 }, { "epoch": 1.33, "grad_norm": 0.5816878080368042, "learning_rate": 0.0003527412521690621, "loss": 3.0586, "step": 27164 }, { "epoch": 1.33, "grad_norm": 0.6048847436904907, "learning_rate": 0.0003527260957024288, "loss": 3.2571, "step": 27165 }, { "epoch": 1.33, "grad_norm": 0.5430023670196533, "learning_rate": 0.0003527109390969251, "loss": 3.1818, "step": 27166 }, { "epoch": 1.33, "grad_norm": 0.5993807911872864, "learning_rate": 0.00035269578235259095, "loss": 3.0313, "step": 27167 }, { "epoch": 1.33, "grad_norm": 0.638937771320343, "learning_rate": 0.0003526806254694662, "loss": 3.304, "step": 27168 }, { "epoch": 1.33, "grad_norm": 0.5941781997680664, "learning_rate": 0.000352665468447591, "loss": 2.8478, "step": 27169 }, { "epoch": 1.33, "grad_norm": 0.5695825815200806, "learning_rate": 0.0003526503112870049, "loss": 3.1401, "step": 27170 }, { "epoch": 1.33, "grad_norm": 0.563214898109436, "learning_rate": 0.000352635153987748, "loss": 3.2235, "step": 27171 }, { "epoch": 1.33, "grad_norm": 0.5385993719100952, "learning_rate": 0.00035261999654986035, "loss": 3.0607, "step": 27172 }, { "epoch": 1.33, "grad_norm": 0.612762987613678, "learning_rate": 0.0003526048389733818, "loss": 3.0266, "step": 27173 }, { "epoch": 1.33, "grad_norm": 0.5819412469863892, "learning_rate": 0.0003525896812583521, "loss": 2.93, "step": 27174 }, { "epoch": 1.33, "grad_norm": 0.5829300880432129, "learning_rate": 0.0003525745234048112, "loss": 2.9441, "step": 27175 }, { "epoch": 1.33, "grad_norm": 0.5720100998878479, "learning_rate": 0.00035255936541279937, "loss": 3.02, "step": 27176 }, { "epoch": 1.33, "grad_norm": 0.5454930663108826, "learning_rate": 0.0003525442072823562, "loss": 2.9635, "step": 27177 }, { "epoch": 1.33, "grad_norm": 0.5697073936462402, "learning_rate": 0.0003525290490135217, "loss": 3.1289, "step": 27178 }, { "epoch": 1.33, "grad_norm": 0.5639901161193848, "learning_rate": 0.00035251389060633574, "loss": 3.077, "step": 27179 }, { "epoch": 1.33, "grad_norm": 0.5947508811950684, "learning_rate": 0.0003524987320608383, "loss": 3.0138, "step": 27180 }, { "epoch": 1.33, "grad_norm": 0.555055558681488, "learning_rate": 0.00035248357337706935, "loss": 3.1569, "step": 27181 }, { "epoch": 1.33, "grad_norm": 0.5873993039131165, "learning_rate": 0.00035246841455506875, "loss": 3.0096, "step": 27182 }, { "epoch": 1.33, "grad_norm": 0.5661149621009827, "learning_rate": 0.00035245325559487645, "loss": 2.9492, "step": 27183 }, { "epoch": 1.33, "grad_norm": 0.55522221326828, "learning_rate": 0.0003524380964965324, "loss": 3.1046, "step": 27184 }, { "epoch": 1.33, "grad_norm": 0.5756954550743103, "learning_rate": 0.00035242293726007643, "loss": 3.1467, "step": 27185 }, { "epoch": 1.33, "grad_norm": 0.5577954649925232, "learning_rate": 0.00035240777788554857, "loss": 3.1245, "step": 27186 }, { "epoch": 1.33, "grad_norm": 0.5636711120605469, "learning_rate": 0.00035239261837298876, "loss": 3.1186, "step": 27187 }, { "epoch": 1.33, "grad_norm": 0.5825818181037903, "learning_rate": 0.0003523774587224368, "loss": 3.1649, "step": 27188 }, { "epoch": 1.33, "grad_norm": 0.570296585559845, "learning_rate": 0.0003523622989339327, "loss": 2.9089, "step": 27189 }, { "epoch": 1.33, "grad_norm": 0.5361173152923584, "learning_rate": 0.0003523471390075164, "loss": 2.944, "step": 27190 }, { "epoch": 1.33, "grad_norm": 0.5616334080696106, "learning_rate": 0.0003523319789432278, "loss": 3.2198, "step": 27191 }, { "epoch": 1.33, "grad_norm": 0.6106418371200562, "learning_rate": 0.00035231681874110685, "loss": 3.0541, "step": 27192 }, { "epoch": 1.33, "grad_norm": 0.6105589270591736, "learning_rate": 0.00035230165840119344, "loss": 3.1066, "step": 27193 }, { "epoch": 1.33, "grad_norm": 0.5929547548294067, "learning_rate": 0.0003522864979235276, "loss": 2.9546, "step": 27194 }, { "epoch": 1.33, "grad_norm": 0.5807821154594421, "learning_rate": 0.0003522713373081491, "loss": 3.1256, "step": 27195 }, { "epoch": 1.33, "grad_norm": 0.5757489204406738, "learning_rate": 0.00035225617655509797, "loss": 3.0397, "step": 27196 }, { "epoch": 1.33, "grad_norm": 0.5699480772018433, "learning_rate": 0.0003522410156644142, "loss": 3.3366, "step": 27197 }, { "epoch": 1.33, "grad_norm": 0.6023076176643372, "learning_rate": 0.00035222585463613764, "loss": 3.103, "step": 27198 }, { "epoch": 1.33, "grad_norm": 0.5710323452949524, "learning_rate": 0.0003522106934703082, "loss": 3.1119, "step": 27199 }, { "epoch": 1.33, "grad_norm": 0.5762554407119751, "learning_rate": 0.0003521955321669658, "loss": 3.0356, "step": 27200 }, { "epoch": 1.33, "grad_norm": 0.5505509972572327, "learning_rate": 0.00035218037072615047, "loss": 3.0826, "step": 27201 }, { "epoch": 1.33, "grad_norm": 0.5784319639205933, "learning_rate": 0.00035216520914790205, "loss": 3.0583, "step": 27202 }, { "epoch": 1.33, "grad_norm": 0.5438815951347351, "learning_rate": 0.00035215004743226056, "loss": 2.8998, "step": 27203 }, { "epoch": 1.33, "grad_norm": 0.5637693405151367, "learning_rate": 0.0003521348855792658, "loss": 3.0585, "step": 27204 }, { "epoch": 1.33, "grad_norm": 0.5552592277526855, "learning_rate": 0.00035211972358895776, "loss": 2.8598, "step": 27205 }, { "epoch": 1.33, "grad_norm": 0.6333317756652832, "learning_rate": 0.0003521045614613765, "loss": 3.0529, "step": 27206 }, { "epoch": 1.33, "grad_norm": 0.6301619410514832, "learning_rate": 0.0003520893991965618, "loss": 2.963, "step": 27207 }, { "epoch": 1.33, "grad_norm": 0.5590476989746094, "learning_rate": 0.0003520742367945537, "loss": 2.9396, "step": 27208 }, { "epoch": 1.33, "grad_norm": 0.5794091820716858, "learning_rate": 0.00035205907425539214, "loss": 3.0028, "step": 27209 }, { "epoch": 1.33, "grad_norm": 0.5639303922653198, "learning_rate": 0.00035204391157911684, "loss": 3.0661, "step": 27210 }, { "epoch": 1.33, "grad_norm": 0.5517282485961914, "learning_rate": 0.0003520287487657679, "loss": 2.9745, "step": 27211 }, { "epoch": 1.33, "grad_norm": 0.6066323518753052, "learning_rate": 0.0003520135858153854, "loss": 3.1238, "step": 27212 }, { "epoch": 1.33, "grad_norm": 0.5425803661346436, "learning_rate": 0.000351998422728009, "loss": 3.1991, "step": 27213 }, { "epoch": 1.33, "grad_norm": 0.5908896923065186, "learning_rate": 0.0003519832595036788, "loss": 3.1234, "step": 27214 }, { "epoch": 1.33, "grad_norm": 0.5824942588806152, "learning_rate": 0.0003519680961424347, "loss": 3.0476, "step": 27215 }, { "epoch": 1.33, "grad_norm": 0.5757591724395752, "learning_rate": 0.00035195293264431664, "loss": 3.0747, "step": 27216 }, { "epoch": 1.33, "grad_norm": 0.5482543110847473, "learning_rate": 0.00035193776900936457, "loss": 3.2661, "step": 27217 }, { "epoch": 1.33, "grad_norm": 0.6184133887290955, "learning_rate": 0.00035192260523761833, "loss": 3.3704, "step": 27218 }, { "epoch": 1.33, "grad_norm": 0.5994088053703308, "learning_rate": 0.000351907441329118, "loss": 2.8227, "step": 27219 }, { "epoch": 1.33, "grad_norm": 0.5714132785797119, "learning_rate": 0.0003518922772839035, "loss": 3.102, "step": 27220 }, { "epoch": 1.33, "grad_norm": 0.5737739205360413, "learning_rate": 0.00035187711310201463, "loss": 3.0617, "step": 27221 }, { "epoch": 1.33, "grad_norm": 0.5780137777328491, "learning_rate": 0.0003518619487834915, "loss": 3.0814, "step": 27222 }, { "epoch": 1.33, "grad_norm": 0.5979353785514832, "learning_rate": 0.00035184678432837396, "loss": 3.1507, "step": 27223 }, { "epoch": 1.33, "grad_norm": 0.5943918824195862, "learning_rate": 0.000351831619736702, "loss": 2.9943, "step": 27224 }, { "epoch": 1.33, "grad_norm": 0.5988668203353882, "learning_rate": 0.0003518164550085155, "loss": 3.1677, "step": 27225 }, { "epoch": 1.33, "grad_norm": 0.5408164262771606, "learning_rate": 0.00035180129014385437, "loss": 3.1091, "step": 27226 }, { "epoch": 1.33, "grad_norm": 0.5693389177322388, "learning_rate": 0.0003517861251427587, "loss": 3.2419, "step": 27227 }, { "epoch": 1.33, "grad_norm": 0.5514744520187378, "learning_rate": 0.00035177096000526837, "loss": 3.0412, "step": 27228 }, { "epoch": 1.33, "grad_norm": 0.5758994221687317, "learning_rate": 0.0003517557947314232, "loss": 3.0927, "step": 27229 }, { "epoch": 1.33, "grad_norm": 0.5742641687393188, "learning_rate": 0.00035174062932126325, "loss": 3.0443, "step": 27230 }, { "epoch": 1.33, "grad_norm": 0.587285041809082, "learning_rate": 0.00035172546377482843, "loss": 3.2015, "step": 27231 }, { "epoch": 1.33, "grad_norm": 0.5484117865562439, "learning_rate": 0.0003517102980921587, "loss": 3.2877, "step": 27232 }, { "epoch": 1.33, "grad_norm": 0.5856320261955261, "learning_rate": 0.000351695132273294, "loss": 3.1225, "step": 27233 }, { "epoch": 1.33, "grad_norm": 0.5759357810020447, "learning_rate": 0.00035167996631827435, "loss": 3.2482, "step": 27234 }, { "epoch": 1.33, "grad_norm": 0.556387722492218, "learning_rate": 0.0003516648002271396, "loss": 2.9464, "step": 27235 }, { "epoch": 1.33, "grad_norm": 0.5765631198883057, "learning_rate": 0.00035164963399992964, "loss": 3.0678, "step": 27236 }, { "epoch": 1.33, "grad_norm": 0.5664901733398438, "learning_rate": 0.0003516344676366844, "loss": 3.1126, "step": 27237 }, { "epoch": 1.33, "grad_norm": 0.5461036562919617, "learning_rate": 0.00035161930113744404, "loss": 3.123, "step": 27238 }, { "epoch": 1.33, "grad_norm": 0.5736780762672424, "learning_rate": 0.00035160413450224836, "loss": 3.1065, "step": 27239 }, { "epoch": 1.33, "grad_norm": 0.5623251795768738, "learning_rate": 0.00035158896773113737, "loss": 3.1294, "step": 27240 }, { "epoch": 1.34, "grad_norm": 0.5517589449882507, "learning_rate": 0.00035157380082415085, "loss": 3.1211, "step": 27241 }, { "epoch": 1.34, "grad_norm": 0.6113983988761902, "learning_rate": 0.000351558633781329, "loss": 2.9765, "step": 27242 }, { "epoch": 1.34, "grad_norm": 0.5473376512527466, "learning_rate": 0.00035154346660271144, "loss": 3.0213, "step": 27243 }, { "epoch": 1.34, "grad_norm": 0.5686004161834717, "learning_rate": 0.00035152829928833846, "loss": 3.1543, "step": 27244 }, { "epoch": 1.34, "grad_norm": 0.5545783042907715, "learning_rate": 0.0003515131318382498, "loss": 3.1051, "step": 27245 }, { "epoch": 1.34, "grad_norm": 0.5631744861602783, "learning_rate": 0.0003514979642524855, "loss": 3.134, "step": 27246 }, { "epoch": 1.34, "grad_norm": 0.5703157186508179, "learning_rate": 0.0003514827965310854, "loss": 3.1174, "step": 27247 }, { "epoch": 1.34, "grad_norm": 0.5550886392593384, "learning_rate": 0.0003514676286740896, "loss": 3.1043, "step": 27248 }, { "epoch": 1.34, "grad_norm": 0.5608128309249878, "learning_rate": 0.00035145246068153804, "loss": 3.0714, "step": 27249 }, { "epoch": 1.34, "grad_norm": 0.6270610690116882, "learning_rate": 0.00035143729255347045, "loss": 3.0144, "step": 27250 }, { "epoch": 1.34, "grad_norm": 0.6169301867485046, "learning_rate": 0.000351422124289927, "loss": 2.9064, "step": 27251 }, { "epoch": 1.34, "grad_norm": 0.6073354482650757, "learning_rate": 0.0003514069558909475, "loss": 3.1812, "step": 27252 }, { "epoch": 1.34, "grad_norm": 0.5473766326904297, "learning_rate": 0.00035139178735657214, "loss": 3.0509, "step": 27253 }, { "epoch": 1.34, "grad_norm": 0.5443681478500366, "learning_rate": 0.00035137661868684056, "loss": 3.1905, "step": 27254 }, { "epoch": 1.34, "grad_norm": 0.5778075456619263, "learning_rate": 0.00035136144988179286, "loss": 2.8105, "step": 27255 }, { "epoch": 1.34, "grad_norm": 0.581585168838501, "learning_rate": 0.00035134628094146903, "loss": 3.0035, "step": 27256 }, { "epoch": 1.34, "grad_norm": 0.6521691679954529, "learning_rate": 0.0003513311118659089, "loss": 2.892, "step": 27257 }, { "epoch": 1.34, "grad_norm": 0.6202303171157837, "learning_rate": 0.0003513159426551526, "loss": 2.8636, "step": 27258 }, { "epoch": 1.34, "grad_norm": 0.573544979095459, "learning_rate": 0.00035130077330923997, "loss": 3.2428, "step": 27259 }, { "epoch": 1.34, "grad_norm": 0.6818689107894897, "learning_rate": 0.00035128560382821097, "loss": 2.8362, "step": 27260 }, { "epoch": 1.34, "grad_norm": 0.5987098813056946, "learning_rate": 0.0003512704342121055, "loss": 2.9345, "step": 27261 }, { "epoch": 1.34, "grad_norm": 0.6563383936882019, "learning_rate": 0.0003512552644609636, "loss": 2.9929, "step": 27262 }, { "epoch": 1.34, "grad_norm": 0.5845065712928772, "learning_rate": 0.00035124009457482524, "loss": 3.0538, "step": 27263 }, { "epoch": 1.34, "grad_norm": 0.5674929618835449, "learning_rate": 0.0003512249245537303, "loss": 3.1348, "step": 27264 }, { "epoch": 1.34, "grad_norm": 0.5974243879318237, "learning_rate": 0.00035120975439771883, "loss": 3.188, "step": 27265 }, { "epoch": 1.34, "grad_norm": 0.5698199272155762, "learning_rate": 0.0003511945841068306, "loss": 2.9841, "step": 27266 }, { "epoch": 1.34, "grad_norm": 0.5573409795761108, "learning_rate": 0.0003511794136811058, "loss": 2.9835, "step": 27267 }, { "epoch": 1.34, "grad_norm": 0.5393723249435425, "learning_rate": 0.00035116424312058416, "loss": 3.1279, "step": 27268 }, { "epoch": 1.34, "grad_norm": 0.5727754235267639, "learning_rate": 0.0003511490724253058, "loss": 3.0733, "step": 27269 }, { "epoch": 1.34, "grad_norm": 0.5624639391899109, "learning_rate": 0.00035113390159531067, "loss": 3.0563, "step": 27270 }, { "epoch": 1.34, "grad_norm": 0.5651065707206726, "learning_rate": 0.0003511187306306387, "loss": 2.9617, "step": 27271 }, { "epoch": 1.34, "grad_norm": 0.5551247000694275, "learning_rate": 0.00035110355953132976, "loss": 3.096, "step": 27272 }, { "epoch": 1.34, "grad_norm": 0.5362761616706848, "learning_rate": 0.0003510883882974239, "loss": 3.133, "step": 27273 }, { "epoch": 1.34, "grad_norm": 0.6459551453590393, "learning_rate": 0.00035107321692896105, "loss": 2.9986, "step": 27274 }, { "epoch": 1.34, "grad_norm": 0.5925512313842773, "learning_rate": 0.00035105804542598124, "loss": 3.1903, "step": 27275 }, { "epoch": 1.34, "grad_norm": 0.5428948998451233, "learning_rate": 0.0003510428737885243, "loss": 2.8865, "step": 27276 }, { "epoch": 1.34, "grad_norm": 0.5987960696220398, "learning_rate": 0.0003510277020166303, "loss": 3.1261, "step": 27277 }, { "epoch": 1.34, "grad_norm": 0.5686396956443787, "learning_rate": 0.00035101253011033914, "loss": 2.9944, "step": 27278 }, { "epoch": 1.34, "grad_norm": 0.5981069207191467, "learning_rate": 0.00035099735806969073, "loss": 3.1263, "step": 27279 }, { "epoch": 1.34, "grad_norm": 0.5610902309417725, "learning_rate": 0.0003509821858947252, "loss": 2.8438, "step": 27280 }, { "epoch": 1.34, "grad_norm": 0.5656661987304688, "learning_rate": 0.0003509670135854823, "loss": 3.1531, "step": 27281 }, { "epoch": 1.34, "grad_norm": 0.5572855472564697, "learning_rate": 0.0003509518411420022, "loss": 3.1005, "step": 27282 }, { "epoch": 1.34, "grad_norm": 0.5681395530700684, "learning_rate": 0.0003509366685643246, "loss": 3.1331, "step": 27283 }, { "epoch": 1.34, "grad_norm": 0.603127121925354, "learning_rate": 0.0003509214958524897, "loss": 3.265, "step": 27284 }, { "epoch": 1.34, "grad_norm": 0.5976519584655762, "learning_rate": 0.0003509063230065374, "loss": 2.9344, "step": 27285 }, { "epoch": 1.34, "grad_norm": 0.5389209985733032, "learning_rate": 0.0003508911500265077, "loss": 3.0831, "step": 27286 }, { "epoch": 1.34, "grad_norm": 0.5420101881027222, "learning_rate": 0.0003508759769124405, "loss": 3.0395, "step": 27287 }, { "epoch": 1.34, "grad_norm": 0.5472182035446167, "learning_rate": 0.00035086080366437554, "loss": 2.9825, "step": 27288 }, { "epoch": 1.34, "grad_norm": 0.5901409983634949, "learning_rate": 0.0003508456302823533, "loss": 3.0206, "step": 27289 }, { "epoch": 1.34, "grad_norm": 0.5679577589035034, "learning_rate": 0.0003508304567664133, "loss": 3.061, "step": 27290 }, { "epoch": 1.34, "grad_norm": 0.5943844318389893, "learning_rate": 0.0003508152831165957, "loss": 2.9039, "step": 27291 }, { "epoch": 1.34, "grad_norm": 0.619647741317749, "learning_rate": 0.0003508001093329404, "loss": 3.2114, "step": 27292 }, { "epoch": 1.34, "grad_norm": 0.5737244486808777, "learning_rate": 0.0003507849354154874, "loss": 2.9268, "step": 27293 }, { "epoch": 1.34, "grad_norm": 0.574532687664032, "learning_rate": 0.00035076976136427665, "loss": 3.3231, "step": 27294 }, { "epoch": 1.34, "grad_norm": 0.6363462805747986, "learning_rate": 0.00035075458717934816, "loss": 3.227, "step": 27295 }, { "epoch": 1.34, "grad_norm": 0.5762098431587219, "learning_rate": 0.00035073941286074183, "loss": 3.2721, "step": 27296 }, { "epoch": 1.34, "grad_norm": 0.6113587021827698, "learning_rate": 0.0003507242384084977, "loss": 2.9124, "step": 27297 }, { "epoch": 1.34, "grad_norm": 0.5779880881309509, "learning_rate": 0.00035070906382265554, "loss": 3.2056, "step": 27298 }, { "epoch": 1.34, "grad_norm": 0.5307174921035767, "learning_rate": 0.00035069388910325555, "loss": 2.8949, "step": 27299 }, { "epoch": 1.34, "grad_norm": 0.5478222966194153, "learning_rate": 0.0003506787142503377, "loss": 3.1332, "step": 27300 }, { "epoch": 1.34, "grad_norm": 0.5192190408706665, "learning_rate": 0.0003506635392639418, "loss": 3.0535, "step": 27301 }, { "epoch": 1.34, "grad_norm": 0.5842856168746948, "learning_rate": 0.00035064836414410783, "loss": 2.8508, "step": 27302 }, { "epoch": 1.34, "grad_norm": 0.5742753148078918, "learning_rate": 0.00035063318889087586, "loss": 3.1244, "step": 27303 }, { "epoch": 1.34, "grad_norm": 0.576598048210144, "learning_rate": 0.0003506180135042859, "loss": 3.0379, "step": 27304 }, { "epoch": 1.34, "grad_norm": 0.5685365796089172, "learning_rate": 0.0003506028379843777, "loss": 3.1901, "step": 27305 }, { "epoch": 1.34, "grad_norm": 0.6361297965049744, "learning_rate": 0.0003505876623311914, "loss": 3.3425, "step": 27306 }, { "epoch": 1.34, "grad_norm": 0.6099052429199219, "learning_rate": 0.00035057248654476694, "loss": 3.1613, "step": 27307 }, { "epoch": 1.34, "grad_norm": 0.5758441686630249, "learning_rate": 0.0003505573106251443, "loss": 3.2276, "step": 27308 }, { "epoch": 1.34, "grad_norm": 0.5512416958808899, "learning_rate": 0.00035054213457236347, "loss": 3.0823, "step": 27309 }, { "epoch": 1.34, "grad_norm": 0.5952394008636475, "learning_rate": 0.0003505269583864643, "loss": 3.3958, "step": 27310 }, { "epoch": 1.34, "grad_norm": 0.569972813129425, "learning_rate": 0.00035051178206748696, "loss": 3.049, "step": 27311 }, { "epoch": 1.34, "grad_norm": 0.5571011304855347, "learning_rate": 0.00035049660561547124, "loss": 3.2249, "step": 27312 }, { "epoch": 1.34, "grad_norm": 0.6160045862197876, "learning_rate": 0.0003504814290304572, "loss": 3.3971, "step": 27313 }, { "epoch": 1.34, "grad_norm": 0.6157644987106323, "learning_rate": 0.0003504662523124848, "loss": 2.9801, "step": 27314 }, { "epoch": 1.34, "grad_norm": 0.5774762034416199, "learning_rate": 0.00035045107546159395, "loss": 3.0074, "step": 27315 }, { "epoch": 1.34, "grad_norm": 0.6000568270683289, "learning_rate": 0.00035043589847782465, "loss": 2.9885, "step": 27316 }, { "epoch": 1.34, "grad_norm": 0.5656048655509949, "learning_rate": 0.00035042072136121696, "loss": 3.1387, "step": 27317 }, { "epoch": 1.34, "grad_norm": 0.613006055355072, "learning_rate": 0.0003504055441118108, "loss": 3.0847, "step": 27318 }, { "epoch": 1.34, "grad_norm": 0.5949397683143616, "learning_rate": 0.00035039036672964607, "loss": 3.1484, "step": 27319 }, { "epoch": 1.34, "grad_norm": 0.5421934127807617, "learning_rate": 0.00035037518921476283, "loss": 3.214, "step": 27320 }, { "epoch": 1.34, "grad_norm": 0.574578046798706, "learning_rate": 0.0003503600115672011, "loss": 3.0972, "step": 27321 }, { "epoch": 1.34, "grad_norm": 0.5909102559089661, "learning_rate": 0.0003503448337870008, "loss": 3.0916, "step": 27322 }, { "epoch": 1.34, "grad_norm": 0.5599302649497986, "learning_rate": 0.00035032965587420187, "loss": 2.9846, "step": 27323 }, { "epoch": 1.34, "grad_norm": 0.558342695236206, "learning_rate": 0.0003503144778288442, "loss": 3.1657, "step": 27324 }, { "epoch": 1.34, "grad_norm": 0.6576507091522217, "learning_rate": 0.000350299299650968, "loss": 2.9349, "step": 27325 }, { "epoch": 1.34, "grad_norm": 0.5906968712806702, "learning_rate": 0.00035028412134061314, "loss": 2.8856, "step": 27326 }, { "epoch": 1.34, "grad_norm": 0.53647381067276, "learning_rate": 0.0003502689428978196, "loss": 3.2723, "step": 27327 }, { "epoch": 1.34, "grad_norm": 0.8482022285461426, "learning_rate": 0.00035025376432262724, "loss": 3.0158, "step": 27328 }, { "epoch": 1.34, "grad_norm": 0.603201687335968, "learning_rate": 0.0003502385856150762, "loss": 3.1107, "step": 27329 }, { "epoch": 1.34, "grad_norm": 0.551382303237915, "learning_rate": 0.0003502234067752064, "loss": 3.1786, "step": 27330 }, { "epoch": 1.34, "grad_norm": 0.5987643003463745, "learning_rate": 0.0003502082278030577, "loss": 3.0718, "step": 27331 }, { "epoch": 1.34, "grad_norm": 0.5653188228607178, "learning_rate": 0.0003501930486986703, "loss": 2.962, "step": 27332 }, { "epoch": 1.34, "grad_norm": 0.5514883399009705, "learning_rate": 0.0003501778694620841, "loss": 3.1602, "step": 27333 }, { "epoch": 1.34, "grad_norm": 0.571922242641449, "learning_rate": 0.0003501626900933389, "loss": 3.0968, "step": 27334 }, { "epoch": 1.34, "grad_norm": 0.5911169648170471, "learning_rate": 0.0003501475105924749, "loss": 3.297, "step": 27335 }, { "epoch": 1.34, "grad_norm": 0.5552850365638733, "learning_rate": 0.0003501323309595321, "loss": 3.193, "step": 27336 }, { "epoch": 1.34, "grad_norm": 0.582349419593811, "learning_rate": 0.00035011715119455036, "loss": 2.9565, "step": 27337 }, { "epoch": 1.34, "grad_norm": 0.5713590383529663, "learning_rate": 0.0003501019712975696, "loss": 3.1794, "step": 27338 }, { "epoch": 1.34, "grad_norm": 0.5552675127983093, "learning_rate": 0.00035008679126862993, "loss": 2.889, "step": 27339 }, { "epoch": 1.34, "grad_norm": 0.548956573009491, "learning_rate": 0.00035007161110777135, "loss": 3.2242, "step": 27340 }, { "epoch": 1.34, "grad_norm": 0.5811518430709839, "learning_rate": 0.0003500564308150337, "loss": 3.1934, "step": 27341 }, { "epoch": 1.34, "grad_norm": 0.5775765776634216, "learning_rate": 0.0003500412503904571, "loss": 3.0469, "step": 27342 }, { "epoch": 1.34, "grad_norm": 0.5719749927520752, "learning_rate": 0.00035002606983408147, "loss": 2.9931, "step": 27343 }, { "epoch": 1.34, "grad_norm": 0.5796716809272766, "learning_rate": 0.00035001088914594675, "loss": 3.1023, "step": 27344 }, { "epoch": 1.34, "grad_norm": 0.5906006693840027, "learning_rate": 0.000349995708326093, "loss": 3.2135, "step": 27345 }, { "epoch": 1.34, "grad_norm": 0.58286052942276, "learning_rate": 0.0003499805273745602, "loss": 3.2402, "step": 27346 }, { "epoch": 1.34, "grad_norm": 0.5934612154960632, "learning_rate": 0.00034996534629138824, "loss": 3.0281, "step": 27347 }, { "epoch": 1.34, "grad_norm": 0.5473476648330688, "learning_rate": 0.00034995016507661733, "loss": 3.0496, "step": 27348 }, { "epoch": 1.34, "grad_norm": 0.5469577312469482, "learning_rate": 0.0003499349837302871, "loss": 3.2524, "step": 27349 }, { "epoch": 1.34, "grad_norm": 0.6002890467643738, "learning_rate": 0.0003499198022524379, "loss": 3.1881, "step": 27350 }, { "epoch": 1.34, "grad_norm": 0.564316987991333, "learning_rate": 0.00034990462064310945, "loss": 3.2093, "step": 27351 }, { "epoch": 1.34, "grad_norm": 0.5971184968948364, "learning_rate": 0.00034988943890234186, "loss": 2.937, "step": 27352 }, { "epoch": 1.34, "grad_norm": 0.5803214311599731, "learning_rate": 0.0003498742570301751, "loss": 2.9276, "step": 27353 }, { "epoch": 1.34, "grad_norm": 0.5375666618347168, "learning_rate": 0.00034985907502664917, "loss": 3.1482, "step": 27354 }, { "epoch": 1.34, "grad_norm": 0.60368412733078, "learning_rate": 0.00034984389289180397, "loss": 3.1235, "step": 27355 }, { "epoch": 1.34, "grad_norm": 0.6041290760040283, "learning_rate": 0.00034982871062567956, "loss": 3.1815, "step": 27356 }, { "epoch": 1.34, "grad_norm": 0.5556178092956543, "learning_rate": 0.00034981352822831594, "loss": 3.0318, "step": 27357 }, { "epoch": 1.34, "grad_norm": 0.5745914578437805, "learning_rate": 0.000349798345699753, "loss": 2.7708, "step": 27358 }, { "epoch": 1.34, "grad_norm": 0.5874989628791809, "learning_rate": 0.00034978316304003096, "loss": 2.9932, "step": 27359 }, { "epoch": 1.34, "grad_norm": 0.5912497043609619, "learning_rate": 0.0003497679802491895, "loss": 2.8349, "step": 27360 }, { "epoch": 1.34, "grad_norm": 0.5921602845191956, "learning_rate": 0.0003497527973272688, "loss": 2.8376, "step": 27361 }, { "epoch": 1.34, "grad_norm": 0.5861170887947083, "learning_rate": 0.0003497376142743089, "loss": 2.9823, "step": 27362 }, { "epoch": 1.34, "grad_norm": 0.6155903339385986, "learning_rate": 0.00034972243109034957, "loss": 3.0171, "step": 27363 }, { "epoch": 1.34, "grad_norm": 0.5605053901672363, "learning_rate": 0.00034970724777543097, "loss": 3.1852, "step": 27364 }, { "epoch": 1.34, "grad_norm": 0.5607673525810242, "learning_rate": 0.0003496920643295931, "loss": 3.17, "step": 27365 }, { "epoch": 1.34, "grad_norm": 0.5722825527191162, "learning_rate": 0.00034967688075287584, "loss": 3.1404, "step": 27366 }, { "epoch": 1.34, "grad_norm": 0.5873673558235168, "learning_rate": 0.0003496616970453192, "loss": 3.0477, "step": 27367 }, { "epoch": 1.34, "grad_norm": 0.6069886088371277, "learning_rate": 0.00034964651320696326, "loss": 2.9818, "step": 27368 }, { "epoch": 1.34, "grad_norm": 0.5635610222816467, "learning_rate": 0.00034963132923784796, "loss": 2.8535, "step": 27369 }, { "epoch": 1.34, "grad_norm": 0.5699170231819153, "learning_rate": 0.0003496161451380133, "loss": 3.075, "step": 27370 }, { "epoch": 1.34, "grad_norm": 0.5741016268730164, "learning_rate": 0.00034960096090749923, "loss": 3.0363, "step": 27371 }, { "epoch": 1.34, "grad_norm": 0.5453298687934875, "learning_rate": 0.00034958577654634575, "loss": 2.9578, "step": 27372 }, { "epoch": 1.34, "grad_norm": 0.577882707118988, "learning_rate": 0.000349570592054593, "loss": 2.9079, "step": 27373 }, { "epoch": 1.34, "grad_norm": 0.5363011360168457, "learning_rate": 0.0003495554074322807, "loss": 3.054, "step": 27374 }, { "epoch": 1.34, "grad_norm": 0.5371752381324768, "learning_rate": 0.000349540222679449, "loss": 3.0363, "step": 27375 }, { "epoch": 1.34, "grad_norm": 0.5992576479911804, "learning_rate": 0.000349525037796138, "loss": 3.2143, "step": 27376 }, { "epoch": 1.34, "grad_norm": 0.5904133319854736, "learning_rate": 0.00034950985278238753, "loss": 3.2084, "step": 27377 }, { "epoch": 1.34, "grad_norm": 0.6310132145881653, "learning_rate": 0.00034949466763823766, "loss": 2.872, "step": 27378 }, { "epoch": 1.34, "grad_norm": 0.5347175002098083, "learning_rate": 0.0003494794823637283, "loss": 2.8966, "step": 27379 }, { "epoch": 1.34, "grad_norm": 0.5774542689323425, "learning_rate": 0.00034946429695889954, "loss": 3.0197, "step": 27380 }, { "epoch": 1.34, "grad_norm": 0.6394713521003723, "learning_rate": 0.00034944911142379136, "loss": 3.0879, "step": 27381 }, { "epoch": 1.34, "grad_norm": 0.5754245519638062, "learning_rate": 0.0003494339257584437, "loss": 3.056, "step": 27382 }, { "epoch": 1.34, "grad_norm": 0.5918018817901611, "learning_rate": 0.0003494187399628966, "loss": 3.0525, "step": 27383 }, { "epoch": 1.34, "grad_norm": 0.5479419231414795, "learning_rate": 0.0003494035540371901, "loss": 3.0767, "step": 27384 }, { "epoch": 1.34, "grad_norm": 0.5651912093162537, "learning_rate": 0.0003493883679813641, "loss": 3.0674, "step": 27385 }, { "epoch": 1.34, "grad_norm": 0.610517680644989, "learning_rate": 0.0003493731817954586, "loss": 2.9939, "step": 27386 }, { "epoch": 1.34, "grad_norm": 0.5542714595794678, "learning_rate": 0.0003493579954795137, "loss": 2.9815, "step": 27387 }, { "epoch": 1.34, "grad_norm": 0.5359601974487305, "learning_rate": 0.0003493428090335694, "loss": 3.1892, "step": 27388 }, { "epoch": 1.34, "grad_norm": 0.5919166207313538, "learning_rate": 0.00034932762245766557, "loss": 3.1598, "step": 27389 }, { "epoch": 1.34, "grad_norm": 0.5871334075927734, "learning_rate": 0.0003493124357518422, "loss": 3.1153, "step": 27390 }, { "epoch": 1.34, "grad_norm": 0.5805013179779053, "learning_rate": 0.0003492972489161395, "loss": 3.0343, "step": 27391 }, { "epoch": 1.34, "grad_norm": 0.6670655012130737, "learning_rate": 0.0003492820619505973, "loss": 3.1204, "step": 27392 }, { "epoch": 1.34, "grad_norm": 0.5655217170715332, "learning_rate": 0.00034926687485525555, "loss": 2.9918, "step": 27393 }, { "epoch": 1.34, "grad_norm": 0.5714253187179565, "learning_rate": 0.00034925168763015444, "loss": 3.0133, "step": 27394 }, { "epoch": 1.34, "grad_norm": 0.6281166076660156, "learning_rate": 0.0003492365002753338, "loss": 3.1404, "step": 27395 }, { "epoch": 1.34, "grad_norm": 0.6088977456092834, "learning_rate": 0.00034922131279083364, "loss": 2.8393, "step": 27396 }, { "epoch": 1.34, "grad_norm": 0.6364918351173401, "learning_rate": 0.0003492061251766941, "loss": 3.0798, "step": 27397 }, { "epoch": 1.34, "grad_norm": 0.5799409747123718, "learning_rate": 0.0003491909374329551, "loss": 2.9797, "step": 27398 }, { "epoch": 1.34, "grad_norm": 0.5414788126945496, "learning_rate": 0.00034917574955965666, "loss": 3.2742, "step": 27399 }, { "epoch": 1.34, "grad_norm": 0.5648382902145386, "learning_rate": 0.00034916056155683873, "loss": 3.2028, "step": 27400 }, { "epoch": 1.34, "grad_norm": 0.5464253425598145, "learning_rate": 0.00034914537342454127, "loss": 3.0764, "step": 27401 }, { "epoch": 1.34, "grad_norm": 0.5706402063369751, "learning_rate": 0.00034913018516280443, "loss": 3.0302, "step": 27402 }, { "epoch": 1.34, "grad_norm": 0.5839251279830933, "learning_rate": 0.0003491149967716681, "loss": 3.1927, "step": 27403 }, { "epoch": 1.34, "grad_norm": 0.5891869068145752, "learning_rate": 0.0003490998082511724, "loss": 3.1018, "step": 27404 }, { "epoch": 1.34, "grad_norm": 0.6145084500312805, "learning_rate": 0.00034908461960135714, "loss": 3.0066, "step": 27405 }, { "epoch": 1.34, "grad_norm": 0.6099284887313843, "learning_rate": 0.0003490694308222625, "loss": 2.9118, "step": 27406 }, { "epoch": 1.34, "grad_norm": 0.5804110765457153, "learning_rate": 0.0003490542419139284, "loss": 3.0253, "step": 27407 }, { "epoch": 1.34, "grad_norm": 0.6241898536682129, "learning_rate": 0.0003490390528763949, "loss": 3.0083, "step": 27408 }, { "epoch": 1.34, "grad_norm": 0.5708284974098206, "learning_rate": 0.00034902386370970197, "loss": 3.1177, "step": 27409 }, { "epoch": 1.34, "grad_norm": 0.5617969036102295, "learning_rate": 0.00034900867441388963, "loss": 3.1507, "step": 27410 }, { "epoch": 1.34, "grad_norm": 0.5512824058532715, "learning_rate": 0.0003489934849889978, "loss": 3.2521, "step": 27411 }, { "epoch": 1.34, "grad_norm": 0.5705128908157349, "learning_rate": 0.00034897829543506666, "loss": 2.9359, "step": 27412 }, { "epoch": 1.34, "grad_norm": 0.5484633445739746, "learning_rate": 0.00034896310575213604, "loss": 3.2642, "step": 27413 }, { "epoch": 1.34, "grad_norm": 0.6320682764053345, "learning_rate": 0.00034894791594024607, "loss": 3.0843, "step": 27414 }, { "epoch": 1.34, "grad_norm": 0.5714908242225647, "learning_rate": 0.0003489327259994367, "loss": 3.2425, "step": 27415 }, { "epoch": 1.34, "grad_norm": 0.5834794640541077, "learning_rate": 0.0003489175359297479, "loss": 3.0362, "step": 27416 }, { "epoch": 1.34, "grad_norm": 0.580422580242157, "learning_rate": 0.0003489023457312198, "loss": 3.0221, "step": 27417 }, { "epoch": 1.34, "grad_norm": 0.5791642665863037, "learning_rate": 0.00034888715540389226, "loss": 3.1245, "step": 27418 }, { "epoch": 1.34, "grad_norm": 0.5940735936164856, "learning_rate": 0.00034887196494780536, "loss": 3.1225, "step": 27419 }, { "epoch": 1.34, "grad_norm": 0.5686174035072327, "learning_rate": 0.00034885677436299916, "loss": 2.943, "step": 27420 }, { "epoch": 1.34, "grad_norm": 0.5724006295204163, "learning_rate": 0.00034884158364951356, "loss": 3.0081, "step": 27421 }, { "epoch": 1.34, "grad_norm": 0.646140456199646, "learning_rate": 0.0003488263928073887, "loss": 3.0739, "step": 27422 }, { "epoch": 1.34, "grad_norm": 0.6348581314086914, "learning_rate": 0.0003488112018366645, "loss": 3.2155, "step": 27423 }, { "epoch": 1.34, "grad_norm": 0.5576120018959045, "learning_rate": 0.000348796010737381, "loss": 3.326, "step": 27424 }, { "epoch": 1.34, "grad_norm": 0.5346035361289978, "learning_rate": 0.0003487808195095782, "loss": 2.9394, "step": 27425 }, { "epoch": 1.34, "grad_norm": 0.6104670763015747, "learning_rate": 0.000348765628153296, "loss": 3.2695, "step": 27426 }, { "epoch": 1.34, "grad_norm": 0.5855081081390381, "learning_rate": 0.0003487504366685747, "loss": 3.1842, "step": 27427 }, { "epoch": 1.34, "grad_norm": 0.604956865310669, "learning_rate": 0.00034873524505545404, "loss": 2.8405, "step": 27428 }, { "epoch": 1.34, "grad_norm": 0.5948700904846191, "learning_rate": 0.0003487200533139741, "loss": 3.2639, "step": 27429 }, { "epoch": 1.34, "grad_norm": 0.5722827911376953, "learning_rate": 0.00034870486144417486, "loss": 3.0473, "step": 27430 }, { "epoch": 1.34, "grad_norm": 0.5703803300857544, "learning_rate": 0.0003486896694460965, "loss": 3.1484, "step": 27431 }, { "epoch": 1.34, "grad_norm": 0.5681774616241455, "learning_rate": 0.00034867447731977885, "loss": 2.9344, "step": 27432 }, { "epoch": 1.34, "grad_norm": 0.5943686962127686, "learning_rate": 0.00034865928506526206, "loss": 3.2241, "step": 27433 }, { "epoch": 1.34, "grad_norm": 0.5889038443565369, "learning_rate": 0.00034864409268258606, "loss": 3.0752, "step": 27434 }, { "epoch": 1.34, "grad_norm": 0.551078736782074, "learning_rate": 0.0003486289001717909, "loss": 3.0876, "step": 27435 }, { "epoch": 1.34, "grad_norm": 0.5845509767532349, "learning_rate": 0.00034861370753291654, "loss": 3.1042, "step": 27436 }, { "epoch": 1.34, "grad_norm": 0.5808750987052917, "learning_rate": 0.000348598514766003, "loss": 3.0015, "step": 27437 }, { "epoch": 1.34, "grad_norm": 0.5626875758171082, "learning_rate": 0.00034858332187109035, "loss": 3.1874, "step": 27438 }, { "epoch": 1.34, "grad_norm": 0.5855140089988708, "learning_rate": 0.0003485681288482186, "loss": 3.0787, "step": 27439 }, { "epoch": 1.34, "grad_norm": 0.5767703056335449, "learning_rate": 0.00034855293569742776, "loss": 3.1796, "step": 27440 }, { "epoch": 1.34, "grad_norm": 0.5492141246795654, "learning_rate": 0.0003485377424187578, "loss": 2.7963, "step": 27441 }, { "epoch": 1.34, "grad_norm": 0.5425765514373779, "learning_rate": 0.0003485225490122488, "loss": 3.1762, "step": 27442 }, { "epoch": 1.34, "grad_norm": 0.5987265706062317, "learning_rate": 0.00034850735547794066, "loss": 3.1511, "step": 27443 }, { "epoch": 1.34, "grad_norm": 0.5857033729553223, "learning_rate": 0.00034849216181587363, "loss": 3.1447, "step": 27444 }, { "epoch": 1.35, "grad_norm": 0.6146132946014404, "learning_rate": 0.0003484769680260874, "loss": 3.1278, "step": 27445 }, { "epoch": 1.35, "grad_norm": 0.5451048016548157, "learning_rate": 0.00034846177410862236, "loss": 3.1896, "step": 27446 }, { "epoch": 1.35, "grad_norm": 0.6035172343254089, "learning_rate": 0.00034844658006351814, "loss": 3.1108, "step": 27447 }, { "epoch": 1.35, "grad_norm": 0.5181143283843994, "learning_rate": 0.0003484313858908151, "loss": 3.073, "step": 27448 }, { "epoch": 1.35, "grad_norm": 0.5882478356361389, "learning_rate": 0.000348416191590553, "loss": 3.1133, "step": 27449 }, { "epoch": 1.35, "grad_norm": 0.5757107138633728, "learning_rate": 0.00034840099716277207, "loss": 3.1586, "step": 27450 }, { "epoch": 1.35, "grad_norm": 0.5847183465957642, "learning_rate": 0.0003483858026075122, "loss": 3.0846, "step": 27451 }, { "epoch": 1.35, "grad_norm": 0.6361482739448547, "learning_rate": 0.00034837060792481333, "loss": 3.0394, "step": 27452 }, { "epoch": 1.35, "grad_norm": 0.5674738883972168, "learning_rate": 0.00034835541311471573, "loss": 2.8753, "step": 27453 }, { "epoch": 1.35, "grad_norm": 0.5853537321090698, "learning_rate": 0.00034834021817725923, "loss": 2.982, "step": 27454 }, { "epoch": 1.35, "grad_norm": 0.6073134541511536, "learning_rate": 0.00034832502311248385, "loss": 3.1412, "step": 27455 }, { "epoch": 1.35, "grad_norm": 0.5832349061965942, "learning_rate": 0.00034830982792042974, "loss": 3.0898, "step": 27456 }, { "epoch": 1.35, "grad_norm": 0.5568851828575134, "learning_rate": 0.00034829463260113675, "loss": 2.9267, "step": 27457 }, { "epoch": 1.35, "grad_norm": 0.5909299850463867, "learning_rate": 0.00034827943715464506, "loss": 2.9748, "step": 27458 }, { "epoch": 1.35, "grad_norm": 0.5588631629943848, "learning_rate": 0.0003482642415809946, "loss": 2.9251, "step": 27459 }, { "epoch": 1.35, "grad_norm": 0.5427162051200867, "learning_rate": 0.0003482490458802254, "loss": 3.0714, "step": 27460 }, { "epoch": 1.35, "grad_norm": 0.5868051052093506, "learning_rate": 0.00034823385005237765, "loss": 3.2121, "step": 27461 }, { "epoch": 1.35, "grad_norm": 0.5954635143280029, "learning_rate": 0.00034821865409749103, "loss": 3.2533, "step": 27462 }, { "epoch": 1.35, "grad_norm": 0.5590815544128418, "learning_rate": 0.0003482034580156058, "loss": 3.1354, "step": 27463 }, { "epoch": 1.35, "grad_norm": 0.5484957695007324, "learning_rate": 0.00034818826180676196, "loss": 2.9512, "step": 27464 }, { "epoch": 1.35, "grad_norm": 0.6143427491188049, "learning_rate": 0.0003481730654709995, "loss": 2.9583, "step": 27465 }, { "epoch": 1.35, "grad_norm": 0.602432131767273, "learning_rate": 0.0003481578690083585, "loss": 3.1544, "step": 27466 }, { "epoch": 1.35, "grad_norm": 0.5481232404708862, "learning_rate": 0.0003481426724188789, "loss": 3.1557, "step": 27467 }, { "epoch": 1.35, "grad_norm": 0.5532791018486023, "learning_rate": 0.00034812747570260073, "loss": 3.0697, "step": 27468 }, { "epoch": 1.35, "grad_norm": 0.5800402164459229, "learning_rate": 0.0003481122788595641, "loss": 2.9307, "step": 27469 }, { "epoch": 1.35, "grad_norm": 0.6530680656433105, "learning_rate": 0.00034809708188980896, "loss": 3.2462, "step": 27470 }, { "epoch": 1.35, "grad_norm": 0.5887762308120728, "learning_rate": 0.0003480818847933754, "loss": 3.0021, "step": 27471 }, { "epoch": 1.35, "grad_norm": 0.5353226661682129, "learning_rate": 0.0003480666875703035, "loss": 3.0513, "step": 27472 }, { "epoch": 1.35, "grad_norm": 0.5689337253570557, "learning_rate": 0.00034805149022063296, "loss": 3.1072, "step": 27473 }, { "epoch": 1.35, "grad_norm": 0.5485574007034302, "learning_rate": 0.0003480362927444042, "loss": 3.1626, "step": 27474 }, { "epoch": 1.35, "grad_norm": 0.6580893993377686, "learning_rate": 0.0003480210951416571, "loss": 3.0229, "step": 27475 }, { "epoch": 1.35, "grad_norm": 0.6442924737930298, "learning_rate": 0.00034800589741243167, "loss": 3.077, "step": 27476 }, { "epoch": 1.35, "grad_norm": 0.5990310311317444, "learning_rate": 0.00034799069955676794, "loss": 3.1217, "step": 27477 }, { "epoch": 1.35, "grad_norm": 0.5739229321479797, "learning_rate": 0.0003479755015747059, "loss": 3.1821, "step": 27478 }, { "epoch": 1.35, "grad_norm": 0.6192485690116882, "learning_rate": 0.0003479603034662856, "loss": 3.071, "step": 27479 }, { "epoch": 1.35, "grad_norm": 0.5898162722587585, "learning_rate": 0.0003479451052315471, "loss": 3.1133, "step": 27480 }, { "epoch": 1.35, "grad_norm": 0.5484324097633362, "learning_rate": 0.00034792990687053046, "loss": 2.8319, "step": 27481 }, { "epoch": 1.35, "grad_norm": 0.5545966625213623, "learning_rate": 0.0003479147083832757, "loss": 3.0854, "step": 27482 }, { "epoch": 1.35, "grad_norm": 0.5734389424324036, "learning_rate": 0.00034789950976982275, "loss": 3.2265, "step": 27483 }, { "epoch": 1.35, "grad_norm": 0.6044603586196899, "learning_rate": 0.00034788431103021175, "loss": 2.8291, "step": 27484 }, { "epoch": 1.35, "grad_norm": 0.5482417345046997, "learning_rate": 0.00034786911216448267, "loss": 3.1032, "step": 27485 }, { "epoch": 1.35, "grad_norm": 0.5807044506072998, "learning_rate": 0.0003478539131726757, "loss": 3.2418, "step": 27486 }, { "epoch": 1.35, "grad_norm": 0.5764628648757935, "learning_rate": 0.00034783871405483056, "loss": 3.1398, "step": 27487 }, { "epoch": 1.35, "grad_norm": 0.5609407424926758, "learning_rate": 0.00034782351481098744, "loss": 3.0961, "step": 27488 }, { "epoch": 1.35, "grad_norm": 0.5483243465423584, "learning_rate": 0.0003478083154411865, "loss": 3.3225, "step": 27489 }, { "epoch": 1.35, "grad_norm": 0.5866134166717529, "learning_rate": 0.0003477931159454675, "loss": 3.1778, "step": 27490 }, { "epoch": 1.35, "grad_norm": 0.5812581181526184, "learning_rate": 0.0003477779163238708, "loss": 2.9979, "step": 27491 }, { "epoch": 1.35, "grad_norm": 0.5664998292922974, "learning_rate": 0.00034776271657643615, "loss": 3.0774, "step": 27492 }, { "epoch": 1.35, "grad_norm": 0.6252369284629822, "learning_rate": 0.0003477475167032038, "loss": 2.8475, "step": 27493 }, { "epoch": 1.35, "grad_norm": 0.647500216960907, "learning_rate": 0.0003477323167042136, "loss": 2.954, "step": 27494 }, { "epoch": 1.35, "grad_norm": 0.5603214502334595, "learning_rate": 0.0003477171165795057, "loss": 2.9279, "step": 27495 }, { "epoch": 1.35, "grad_norm": 0.5812236070632935, "learning_rate": 0.00034770191632912, "loss": 3.0352, "step": 27496 }, { "epoch": 1.35, "grad_norm": 0.5692586898803711, "learning_rate": 0.0003476867159530968, "loss": 3.1431, "step": 27497 }, { "epoch": 1.35, "grad_norm": 0.5523695945739746, "learning_rate": 0.0003476715154514758, "loss": 3.2377, "step": 27498 }, { "epoch": 1.35, "grad_norm": 0.5964672565460205, "learning_rate": 0.00034765631482429733, "loss": 3.2893, "step": 27499 }, { "epoch": 1.35, "grad_norm": 0.601300835609436, "learning_rate": 0.0003476411140716012, "loss": 2.9938, "step": 27500 }, { "epoch": 1.35, "grad_norm": 0.5985939502716064, "learning_rate": 0.0003476259131934277, "loss": 3.0204, "step": 27501 }, { "epoch": 1.35, "grad_norm": 0.5791720151901245, "learning_rate": 0.0003476107121898166, "loss": 3.0052, "step": 27502 }, { "epoch": 1.35, "grad_norm": 0.6213023662567139, "learning_rate": 0.00034759551106080795, "loss": 3.0617, "step": 27503 }, { "epoch": 1.35, "grad_norm": 0.5528647899627686, "learning_rate": 0.0003475803098064421, "loss": 3.024, "step": 27504 }, { "epoch": 1.35, "grad_norm": 0.5452709197998047, "learning_rate": 0.0003475651084267587, "loss": 3.1488, "step": 27505 }, { "epoch": 1.35, "grad_norm": 0.5762589573860168, "learning_rate": 0.000347549906921798, "loss": 3.0249, "step": 27506 }, { "epoch": 1.35, "grad_norm": 0.6029790043830872, "learning_rate": 0.00034753470529160006, "loss": 3.1336, "step": 27507 }, { "epoch": 1.35, "grad_norm": 0.5660719871520996, "learning_rate": 0.00034751950353620494, "loss": 3.1616, "step": 27508 }, { "epoch": 1.35, "grad_norm": 0.5759086012840271, "learning_rate": 0.00034750430165565233, "loss": 3.1031, "step": 27509 }, { "epoch": 1.35, "grad_norm": 0.5439847111701965, "learning_rate": 0.00034748909964998264, "loss": 3.1131, "step": 27510 }, { "epoch": 1.35, "grad_norm": 0.6624140739440918, "learning_rate": 0.0003474738975192359, "loss": 3.0788, "step": 27511 }, { "epoch": 1.35, "grad_norm": 0.5809428095817566, "learning_rate": 0.000347458695263452, "loss": 3.0249, "step": 27512 }, { "epoch": 1.35, "grad_norm": 0.5749208927154541, "learning_rate": 0.00034744349288267105, "loss": 2.9156, "step": 27513 }, { "epoch": 1.35, "grad_norm": 0.5683353543281555, "learning_rate": 0.0003474282903769329, "loss": 3.1445, "step": 27514 }, { "epoch": 1.35, "grad_norm": 0.5920277833938599, "learning_rate": 0.00034741308774627794, "loss": 3.0885, "step": 27515 }, { "epoch": 1.35, "grad_norm": 0.561812698841095, "learning_rate": 0.000347397884990746, "loss": 3.0101, "step": 27516 }, { "epoch": 1.35, "grad_norm": 0.5506225228309631, "learning_rate": 0.00034738268211037716, "loss": 3.156, "step": 27517 }, { "epoch": 1.35, "grad_norm": 0.5584924817085266, "learning_rate": 0.00034736747910521135, "loss": 2.9141, "step": 27518 }, { "epoch": 1.35, "grad_norm": 0.6194260716438293, "learning_rate": 0.00034735227597528884, "loss": 3.0649, "step": 27519 }, { "epoch": 1.35, "grad_norm": 0.5642139315605164, "learning_rate": 0.0003473370727206495, "loss": 2.6767, "step": 27520 }, { "epoch": 1.35, "grad_norm": 0.6075909733772278, "learning_rate": 0.0003473218693413334, "loss": 2.9534, "step": 27521 }, { "epoch": 1.35, "grad_norm": 0.5577836632728577, "learning_rate": 0.00034730666583738064, "loss": 2.9497, "step": 27522 }, { "epoch": 1.35, "grad_norm": 0.565424382686615, "learning_rate": 0.0003472914622088312, "loss": 3.0355, "step": 27523 }, { "epoch": 1.35, "grad_norm": 0.5719602704048157, "learning_rate": 0.0003472762584557252, "loss": 3.1099, "step": 27524 }, { "epoch": 1.35, "grad_norm": 0.5585300922393799, "learning_rate": 0.00034726105457810253, "loss": 3.027, "step": 27525 }, { "epoch": 1.35, "grad_norm": 0.5087852478027344, "learning_rate": 0.0003472458505760035, "loss": 3.1373, "step": 27526 }, { "epoch": 1.35, "grad_norm": 0.5587421655654907, "learning_rate": 0.00034723064644946787, "loss": 3.1473, "step": 27527 }, { "epoch": 1.35, "grad_norm": 0.5711315274238586, "learning_rate": 0.0003472154421985359, "loss": 3.0316, "step": 27528 }, { "epoch": 1.35, "grad_norm": 0.5235508680343628, "learning_rate": 0.0003472002378232474, "loss": 3.0433, "step": 27529 }, { "epoch": 1.35, "grad_norm": 0.5291568040847778, "learning_rate": 0.00034718503332364264, "loss": 3.2164, "step": 27530 }, { "epoch": 1.35, "grad_norm": 0.5746374130249023, "learning_rate": 0.00034716982869976157, "loss": 3.1122, "step": 27531 }, { "epoch": 1.35, "grad_norm": 0.5441637635231018, "learning_rate": 0.0003471546239516443, "loss": 2.9419, "step": 27532 }, { "epoch": 1.35, "grad_norm": 0.5394623279571533, "learning_rate": 0.00034713941907933075, "loss": 3.0706, "step": 27533 }, { "epoch": 1.35, "grad_norm": 0.5609070062637329, "learning_rate": 0.00034712421408286106, "loss": 2.9203, "step": 27534 }, { "epoch": 1.35, "grad_norm": 0.5641400218009949, "learning_rate": 0.0003471090089622752, "loss": 3.1299, "step": 27535 }, { "epoch": 1.35, "grad_norm": 0.5568336248397827, "learning_rate": 0.0003470938037176134, "loss": 3.0579, "step": 27536 }, { "epoch": 1.35, "grad_norm": 0.5555543899536133, "learning_rate": 0.00034707859834891557, "loss": 2.9768, "step": 27537 }, { "epoch": 1.35, "grad_norm": 0.5764153599739075, "learning_rate": 0.0003470633928562218, "loss": 2.9253, "step": 27538 }, { "epoch": 1.35, "grad_norm": 0.5446388125419617, "learning_rate": 0.00034704818723957197, "loss": 3.1424, "step": 27539 }, { "epoch": 1.35, "grad_norm": 0.5743789076805115, "learning_rate": 0.0003470329814990064, "loss": 3.0394, "step": 27540 }, { "epoch": 1.35, "grad_norm": 0.5995753407478333, "learning_rate": 0.00034701777563456496, "loss": 3.1474, "step": 27541 }, { "epoch": 1.35, "grad_norm": 0.5293822288513184, "learning_rate": 0.00034700256964628767, "loss": 3.0878, "step": 27542 }, { "epoch": 1.35, "grad_norm": 0.5851097702980042, "learning_rate": 0.00034698736353421477, "loss": 3.1295, "step": 27543 }, { "epoch": 1.35, "grad_norm": 0.5822820067405701, "learning_rate": 0.00034697215729838615, "loss": 3.1411, "step": 27544 }, { "epoch": 1.35, "grad_norm": 0.5606449842453003, "learning_rate": 0.00034695695093884193, "loss": 3.1297, "step": 27545 }, { "epoch": 1.35, "grad_norm": 0.5498665571212769, "learning_rate": 0.00034694174445562206, "loss": 3.0238, "step": 27546 }, { "epoch": 1.35, "grad_norm": 0.5421035289764404, "learning_rate": 0.0003469265378487668, "loss": 3.0742, "step": 27547 }, { "epoch": 1.35, "grad_norm": 0.5203042030334473, "learning_rate": 0.000346911331118316, "loss": 3.0389, "step": 27548 }, { "epoch": 1.35, "grad_norm": 0.5701861381530762, "learning_rate": 0.0003468961242643098, "loss": 3.2245, "step": 27549 }, { "epoch": 1.35, "grad_norm": 0.5597485303878784, "learning_rate": 0.0003468809172867881, "loss": 3.0925, "step": 27550 }, { "epoch": 1.35, "grad_norm": 0.5598206520080566, "learning_rate": 0.00034686571018579127, "loss": 2.9806, "step": 27551 }, { "epoch": 1.35, "grad_norm": 0.5926447510719299, "learning_rate": 0.00034685050296135914, "loss": 2.9184, "step": 27552 }, { "epoch": 1.35, "grad_norm": 0.5669886469841003, "learning_rate": 0.00034683529561353174, "loss": 3.074, "step": 27553 }, { "epoch": 1.35, "grad_norm": 0.5824142098426819, "learning_rate": 0.0003468200881423493, "loss": 3.116, "step": 27554 }, { "epoch": 1.35, "grad_norm": 0.5819292068481445, "learning_rate": 0.00034680488054785163, "loss": 3.0986, "step": 27555 }, { "epoch": 1.35, "grad_norm": 0.5521724820137024, "learning_rate": 0.000346789672830079, "loss": 2.9404, "step": 27556 }, { "epoch": 1.35, "grad_norm": 0.5370104908943176, "learning_rate": 0.0003467744649890713, "loss": 3.0389, "step": 27557 }, { "epoch": 1.35, "grad_norm": 0.5862271189689636, "learning_rate": 0.0003467592570248687, "loss": 3.1758, "step": 27558 }, { "epoch": 1.35, "grad_norm": 0.5811551809310913, "learning_rate": 0.0003467440489375113, "loss": 2.9348, "step": 27559 }, { "epoch": 1.35, "grad_norm": 0.523688554763794, "learning_rate": 0.00034672884072703885, "loss": 2.9629, "step": 27560 }, { "epoch": 1.35, "grad_norm": 0.5698621273040771, "learning_rate": 0.0003467136323934918, "loss": 3.0013, "step": 27561 }, { "epoch": 1.35, "grad_norm": 0.5459543466567993, "learning_rate": 0.00034669842393691, "loss": 3.0861, "step": 27562 }, { "epoch": 1.35, "grad_norm": 0.6095524430274963, "learning_rate": 0.0003466832153573336, "loss": 3.0588, "step": 27563 }, { "epoch": 1.35, "grad_norm": 0.5736280679702759, "learning_rate": 0.00034666800665480253, "loss": 3.1733, "step": 27564 }, { "epoch": 1.35, "grad_norm": 0.579998254776001, "learning_rate": 0.0003466527978293568, "loss": 3.0893, "step": 27565 }, { "epoch": 1.35, "grad_norm": 0.5939657688140869, "learning_rate": 0.00034663758888103677, "loss": 3.0966, "step": 27566 }, { "epoch": 1.35, "grad_norm": 0.5807148814201355, "learning_rate": 0.00034662237980988226, "loss": 3.03, "step": 27567 }, { "epoch": 1.35, "grad_norm": 0.57145756483078, "learning_rate": 0.00034660717061593335, "loss": 3.0573, "step": 27568 }, { "epoch": 1.35, "grad_norm": 0.5279229283332825, "learning_rate": 0.00034659196129923004, "loss": 2.9326, "step": 27569 }, { "epoch": 1.35, "grad_norm": 0.5835081338882446, "learning_rate": 0.00034657675185981255, "loss": 2.9735, "step": 27570 }, { "epoch": 1.35, "grad_norm": 0.5817593932151794, "learning_rate": 0.00034656154229772084, "loss": 3.1158, "step": 27571 }, { "epoch": 1.35, "grad_norm": 0.5388606786727905, "learning_rate": 0.000346546332612995, "loss": 2.8509, "step": 27572 }, { "epoch": 1.35, "grad_norm": 0.622467041015625, "learning_rate": 0.0003465311228056751, "loss": 3.0875, "step": 27573 }, { "epoch": 1.35, "grad_norm": 0.5687299966812134, "learning_rate": 0.00034651591287580125, "loss": 2.9959, "step": 27574 }, { "epoch": 1.35, "grad_norm": 0.5633476972579956, "learning_rate": 0.00034650070282341326, "loss": 2.9332, "step": 27575 }, { "epoch": 1.35, "grad_norm": 0.5782656073570251, "learning_rate": 0.00034648549264855146, "loss": 3.0858, "step": 27576 }, { "epoch": 1.35, "grad_norm": 0.577627956867218, "learning_rate": 0.0003464702823512558, "loss": 3.012, "step": 27577 }, { "epoch": 1.35, "grad_norm": 0.559639573097229, "learning_rate": 0.00034645507193156646, "loss": 3.1584, "step": 27578 }, { "epoch": 1.35, "grad_norm": 0.5748494863510132, "learning_rate": 0.0003464398613895233, "loss": 3.0548, "step": 27579 }, { "epoch": 1.35, "grad_norm": 0.5871093273162842, "learning_rate": 0.0003464246507251664, "loss": 3.1131, "step": 27580 }, { "epoch": 1.35, "grad_norm": 0.5574132204055786, "learning_rate": 0.00034640943993853606, "loss": 3.1498, "step": 27581 }, { "epoch": 1.35, "grad_norm": 0.5610949397087097, "learning_rate": 0.0003463942290296721, "loss": 3.0497, "step": 27582 }, { "epoch": 1.35, "grad_norm": 0.5792922377586365, "learning_rate": 0.0003463790179986147, "loss": 3.3694, "step": 27583 }, { "epoch": 1.35, "grad_norm": 0.5634068250656128, "learning_rate": 0.0003463638068454039, "loss": 3.0192, "step": 27584 }, { "epoch": 1.35, "grad_norm": 0.5394881367683411, "learning_rate": 0.00034634859557007976, "loss": 3.1326, "step": 27585 }, { "epoch": 1.35, "grad_norm": 0.6030181050300598, "learning_rate": 0.00034633338417268227, "loss": 3.0469, "step": 27586 }, { "epoch": 1.35, "grad_norm": 0.6113521456718445, "learning_rate": 0.0003463181726532516, "loss": 3.0221, "step": 27587 }, { "epoch": 1.35, "grad_norm": 0.5368742346763611, "learning_rate": 0.00034630296101182794, "loss": 3.1202, "step": 27588 }, { "epoch": 1.35, "grad_norm": 0.588915228843689, "learning_rate": 0.000346287749248451, "loss": 3.0284, "step": 27589 }, { "epoch": 1.35, "grad_norm": 0.5892957448959351, "learning_rate": 0.00034627253736316103, "loss": 3.1462, "step": 27590 }, { "epoch": 1.35, "grad_norm": 0.5666577816009521, "learning_rate": 0.0003462573253559982, "loss": 3.0818, "step": 27591 }, { "epoch": 1.35, "grad_norm": 0.5563762187957764, "learning_rate": 0.0003462421132270024, "loss": 3.2716, "step": 27592 }, { "epoch": 1.35, "grad_norm": 0.615742564201355, "learning_rate": 0.0003462269009762138, "loss": 3.0507, "step": 27593 }, { "epoch": 1.35, "grad_norm": 0.5579602122306824, "learning_rate": 0.0003462116886036725, "loss": 3.0305, "step": 27594 }, { "epoch": 1.35, "grad_norm": 0.5372714996337891, "learning_rate": 0.0003461964761094184, "loss": 2.8777, "step": 27595 }, { "epoch": 1.35, "grad_norm": 0.5734666585922241, "learning_rate": 0.0003461812634934917, "loss": 2.9552, "step": 27596 }, { "epoch": 1.35, "grad_norm": 0.5839183330535889, "learning_rate": 0.0003461660507559325, "loss": 3.0588, "step": 27597 }, { "epoch": 1.35, "grad_norm": 0.6317874193191528, "learning_rate": 0.00034615083789678075, "loss": 2.8828, "step": 27598 }, { "epoch": 1.35, "grad_norm": 0.572235643863678, "learning_rate": 0.0003461356249160767, "loss": 3.0354, "step": 27599 }, { "epoch": 1.35, "grad_norm": 0.5442335605621338, "learning_rate": 0.00034612041181386014, "loss": 3.099, "step": 27600 }, { "epoch": 1.35, "grad_norm": 0.5586719512939453, "learning_rate": 0.0003461051985901713, "loss": 3.1294, "step": 27601 }, { "epoch": 1.35, "grad_norm": 0.5442153811454773, "learning_rate": 0.0003460899852450502, "loss": 2.9258, "step": 27602 }, { "epoch": 1.35, "grad_norm": 0.5317257642745972, "learning_rate": 0.0003460747717785371, "loss": 3.2071, "step": 27603 }, { "epoch": 1.35, "grad_norm": 0.5560556054115295, "learning_rate": 0.0003460595581906719, "loss": 3.2065, "step": 27604 }, { "epoch": 1.35, "grad_norm": 0.6010359525680542, "learning_rate": 0.0003460443444814946, "loss": 2.8649, "step": 27605 }, { "epoch": 1.35, "grad_norm": 0.5659478306770325, "learning_rate": 0.0003460291306510454, "loss": 3.0582, "step": 27606 }, { "epoch": 1.35, "grad_norm": 0.5736438035964966, "learning_rate": 0.00034601391669936436, "loss": 3.0973, "step": 27607 }, { "epoch": 1.35, "grad_norm": 0.5653098821640015, "learning_rate": 0.0003459987026264914, "loss": 3.3044, "step": 27608 }, { "epoch": 1.35, "grad_norm": 0.5595373511314392, "learning_rate": 0.0003459834884324668, "loss": 3.1815, "step": 27609 }, { "epoch": 1.35, "grad_norm": 0.5338704586029053, "learning_rate": 0.0003459682741173306, "loss": 3.2657, "step": 27610 }, { "epoch": 1.35, "grad_norm": 0.595348060131073, "learning_rate": 0.0003459530596811227, "loss": 3.0412, "step": 27611 }, { "epoch": 1.35, "grad_norm": 0.5767225623130798, "learning_rate": 0.0003459378451238833, "loss": 3.1244, "step": 27612 }, { "epoch": 1.35, "grad_norm": 0.5606244206428528, "learning_rate": 0.00034592263044565247, "loss": 2.9589, "step": 27613 }, { "epoch": 1.35, "grad_norm": 0.581843376159668, "learning_rate": 0.0003459074156464704, "loss": 3.1054, "step": 27614 }, { "epoch": 1.35, "grad_norm": 0.5597598552703857, "learning_rate": 0.0003458922007263769, "loss": 3.1613, "step": 27615 }, { "epoch": 1.35, "grad_norm": 0.6114240884780884, "learning_rate": 0.0003458769856854121, "loss": 2.9326, "step": 27616 }, { "epoch": 1.35, "grad_norm": 0.6092174649238586, "learning_rate": 0.0003458617705236163, "loss": 3.1666, "step": 27617 }, { "epoch": 1.35, "grad_norm": 0.5693092346191406, "learning_rate": 0.0003458465552410294, "loss": 3.1785, "step": 27618 }, { "epoch": 1.35, "grad_norm": 0.5575876235961914, "learning_rate": 0.00034583133983769146, "loss": 3.0092, "step": 27619 }, { "epoch": 1.35, "grad_norm": 0.5795199871063232, "learning_rate": 0.00034581612431364253, "loss": 3.2021, "step": 27620 }, { "epoch": 1.35, "grad_norm": 0.5466296076774597, "learning_rate": 0.0003458009086689228, "loss": 3.271, "step": 27621 }, { "epoch": 1.35, "grad_norm": 0.5569012761116028, "learning_rate": 0.00034578569290357234, "loss": 3.0389, "step": 27622 }, { "epoch": 1.35, "grad_norm": 0.554438591003418, "learning_rate": 0.00034577047701763114, "loss": 3.284, "step": 27623 }, { "epoch": 1.35, "grad_norm": 0.540482759475708, "learning_rate": 0.00034575526101113924, "loss": 3.0049, "step": 27624 }, { "epoch": 1.35, "grad_norm": 0.5931518077850342, "learning_rate": 0.0003457400448841369, "loss": 3.0533, "step": 27625 }, { "epoch": 1.35, "grad_norm": 0.5511003136634827, "learning_rate": 0.0003457248286366641, "loss": 2.9812, "step": 27626 }, { "epoch": 1.35, "grad_norm": 0.5712304711341858, "learning_rate": 0.00034570961226876074, "loss": 3.1466, "step": 27627 }, { "epoch": 1.35, "grad_norm": 0.5965542197227478, "learning_rate": 0.0003456943957804672, "loss": 3.0022, "step": 27628 }, { "epoch": 1.35, "grad_norm": 0.5600318312644958, "learning_rate": 0.0003456791791718234, "loss": 3.0214, "step": 27629 }, { "epoch": 1.35, "grad_norm": 0.5907925367355347, "learning_rate": 0.00034566396244286945, "loss": 3.0262, "step": 27630 }, { "epoch": 1.35, "grad_norm": 0.6217458844184875, "learning_rate": 0.00034564874559364536, "loss": 3.1207, "step": 27631 }, { "epoch": 1.35, "grad_norm": 0.5708328485488892, "learning_rate": 0.00034563352862419127, "loss": 3.0498, "step": 27632 }, { "epoch": 1.35, "grad_norm": 0.5754064321517944, "learning_rate": 0.00034561831153454725, "loss": 3.1127, "step": 27633 }, { "epoch": 1.35, "grad_norm": 0.5828617215156555, "learning_rate": 0.00034560309432475335, "loss": 3.1008, "step": 27634 }, { "epoch": 1.35, "grad_norm": 0.553674042224884, "learning_rate": 0.0003455878769948497, "loss": 3.0445, "step": 27635 }, { "epoch": 1.35, "grad_norm": 0.5497589707374573, "learning_rate": 0.0003455726595448764, "loss": 3.0791, "step": 27636 }, { "epoch": 1.35, "grad_norm": 0.5885263681411743, "learning_rate": 0.00034555744197487334, "loss": 2.9445, "step": 27637 }, { "epoch": 1.35, "grad_norm": 0.5797986388206482, "learning_rate": 0.0003455422242848809, "loss": 3.3291, "step": 27638 }, { "epoch": 1.35, "grad_norm": 0.5659964680671692, "learning_rate": 0.000345527006474939, "loss": 2.9028, "step": 27639 }, { "epoch": 1.35, "grad_norm": 0.5410609841346741, "learning_rate": 0.00034551178854508763, "loss": 3.1878, "step": 27640 }, { "epoch": 1.35, "grad_norm": 0.6087222099304199, "learning_rate": 0.0003454965704953671, "loss": 3.0399, "step": 27641 }, { "epoch": 1.35, "grad_norm": 0.5863704085350037, "learning_rate": 0.0003454813523258172, "loss": 2.9568, "step": 27642 }, { "epoch": 1.35, "grad_norm": 0.5730543732643127, "learning_rate": 0.00034546613403647826, "loss": 2.7912, "step": 27643 }, { "epoch": 1.35, "grad_norm": 0.5473147630691528, "learning_rate": 0.0003454509156273903, "loss": 3.1532, "step": 27644 }, { "epoch": 1.35, "grad_norm": 0.6057653427124023, "learning_rate": 0.0003454356970985933, "loss": 3.098, "step": 27645 }, { "epoch": 1.35, "grad_norm": 0.5998139381408691, "learning_rate": 0.0003454204784501275, "loss": 3.0748, "step": 27646 }, { "epoch": 1.35, "grad_norm": 0.6057741641998291, "learning_rate": 0.0003454052596820328, "loss": 3.0626, "step": 27647 }, { "epoch": 1.35, "grad_norm": 0.5644229650497437, "learning_rate": 0.00034539004079434945, "loss": 3.1077, "step": 27648 }, { "epoch": 1.36, "grad_norm": 0.5536103248596191, "learning_rate": 0.00034537482178711743, "loss": 2.9196, "step": 27649 }, { "epoch": 1.36, "grad_norm": 0.5765447616577148, "learning_rate": 0.00034535960266037695, "loss": 3.158, "step": 27650 }, { "epoch": 1.36, "grad_norm": 0.551717221736908, "learning_rate": 0.00034534438341416796, "loss": 3.047, "step": 27651 }, { "epoch": 1.36, "grad_norm": 0.55844646692276, "learning_rate": 0.0003453291640485305, "loss": 3.0137, "step": 27652 }, { "epoch": 1.36, "grad_norm": 0.5359190702438354, "learning_rate": 0.00034531394456350486, "loss": 3.0382, "step": 27653 }, { "epoch": 1.36, "grad_norm": 0.5825155377388, "learning_rate": 0.00034529872495913104, "loss": 3.2114, "step": 27654 }, { "epoch": 1.36, "grad_norm": 0.5790165662765503, "learning_rate": 0.000345283505235449, "loss": 2.943, "step": 27655 }, { "epoch": 1.36, "grad_norm": 0.6118806004524231, "learning_rate": 0.00034526828539249894, "loss": 2.9642, "step": 27656 }, { "epoch": 1.36, "grad_norm": 0.5818917751312256, "learning_rate": 0.00034525306543032095, "loss": 2.8687, "step": 27657 }, { "epoch": 1.36, "grad_norm": 0.5818273425102234, "learning_rate": 0.0003452378453489551, "loss": 2.9843, "step": 27658 }, { "epoch": 1.36, "grad_norm": 0.5957201719284058, "learning_rate": 0.00034522262514844143, "loss": 2.9917, "step": 27659 }, { "epoch": 1.36, "grad_norm": 0.5800155997276306, "learning_rate": 0.0003452074048288201, "loss": 2.8889, "step": 27660 }, { "epoch": 1.36, "grad_norm": 0.5779448747634888, "learning_rate": 0.0003451921843901312, "loss": 3.0047, "step": 27661 }, { "epoch": 1.36, "grad_norm": 0.6519986391067505, "learning_rate": 0.0003451769638324147, "loss": 3.1293, "step": 27662 }, { "epoch": 1.36, "grad_norm": 0.577875554561615, "learning_rate": 0.00034516174315571077, "loss": 3.0927, "step": 27663 }, { "epoch": 1.36, "grad_norm": 0.5626934766769409, "learning_rate": 0.00034514652236005956, "loss": 2.9672, "step": 27664 }, { "epoch": 1.36, "grad_norm": 0.5806666612625122, "learning_rate": 0.00034513130144550114, "loss": 2.8056, "step": 27665 }, { "epoch": 1.36, "grad_norm": 0.5470517873764038, "learning_rate": 0.0003451160804120755, "loss": 2.9702, "step": 27666 }, { "epoch": 1.36, "grad_norm": 0.560828685760498, "learning_rate": 0.00034510085925982284, "loss": 3.1832, "step": 27667 }, { "epoch": 1.36, "grad_norm": 0.5910745859146118, "learning_rate": 0.0003450856379887831, "loss": 2.9672, "step": 27668 }, { "epoch": 1.36, "grad_norm": 0.5761743187904358, "learning_rate": 0.0003450704165989965, "loss": 2.8815, "step": 27669 }, { "epoch": 1.36, "grad_norm": 0.5565659403800964, "learning_rate": 0.00034505519509050314, "loss": 3.0549, "step": 27670 }, { "epoch": 1.36, "grad_norm": 0.5785242319107056, "learning_rate": 0.000345039973463343, "loss": 2.9993, "step": 27671 }, { "epoch": 1.36, "grad_norm": 0.5992151498794556, "learning_rate": 0.00034502475171755635, "loss": 2.9655, "step": 27672 }, { "epoch": 1.36, "grad_norm": 0.5523272752761841, "learning_rate": 0.00034500952985318305, "loss": 3.1217, "step": 27673 }, { "epoch": 1.36, "grad_norm": 0.543624222278595, "learning_rate": 0.0003449943078702633, "loss": 3.0863, "step": 27674 }, { "epoch": 1.36, "grad_norm": 0.5931307673454285, "learning_rate": 0.0003449790857688373, "loss": 3.2511, "step": 27675 }, { "epoch": 1.36, "grad_norm": 0.5931395888328552, "learning_rate": 0.0003449638635489451, "loss": 2.8998, "step": 27676 }, { "epoch": 1.36, "grad_norm": 0.5682350993156433, "learning_rate": 0.0003449486412106266, "loss": 3.0556, "step": 27677 }, { "epoch": 1.36, "grad_norm": 0.590588390827179, "learning_rate": 0.00034493341875392196, "loss": 3.0171, "step": 27678 }, { "epoch": 1.36, "grad_norm": 0.6109996438026428, "learning_rate": 0.00034491819617887154, "loss": 3.0134, "step": 27679 }, { "epoch": 1.36, "grad_norm": 0.6124204397201538, "learning_rate": 0.0003449029734855152, "loss": 3.1012, "step": 27680 }, { "epoch": 1.36, "grad_norm": 0.5548977255821228, "learning_rate": 0.00034488775067389294, "loss": 3.282, "step": 27681 }, { "epoch": 1.36, "grad_norm": 0.6532643437385559, "learning_rate": 0.00034487252774404505, "loss": 2.9508, "step": 27682 }, { "epoch": 1.36, "grad_norm": 0.6053386330604553, "learning_rate": 0.00034485730469601154, "loss": 3.2382, "step": 27683 }, { "epoch": 1.36, "grad_norm": 0.567348837852478, "learning_rate": 0.00034484208152983257, "loss": 2.7893, "step": 27684 }, { "epoch": 1.36, "grad_norm": 0.5835475325584412, "learning_rate": 0.00034482685824554815, "loss": 3.0216, "step": 27685 }, { "epoch": 1.36, "grad_norm": 0.5753740668296814, "learning_rate": 0.00034481163484319845, "loss": 3.3452, "step": 27686 }, { "epoch": 1.36, "grad_norm": 0.6069841980934143, "learning_rate": 0.0003447964113228236, "loss": 3.1379, "step": 27687 }, { "epoch": 1.36, "grad_norm": 0.5838615894317627, "learning_rate": 0.0003447811876844634, "loss": 2.9669, "step": 27688 }, { "epoch": 1.36, "grad_norm": 0.547076940536499, "learning_rate": 0.00034476596392815835, "loss": 3.0967, "step": 27689 }, { "epoch": 1.36, "grad_norm": 0.6205630898475647, "learning_rate": 0.00034475074005394836, "loss": 3.1115, "step": 27690 }, { "epoch": 1.36, "grad_norm": 0.5432915091514587, "learning_rate": 0.0003447355160618735, "loss": 3.1954, "step": 27691 }, { "epoch": 1.36, "grad_norm": 0.5629213452339172, "learning_rate": 0.0003447202919519739, "loss": 3.134, "step": 27692 }, { "epoch": 1.36, "grad_norm": 0.6095461249351501, "learning_rate": 0.00034470506772428966, "loss": 2.9366, "step": 27693 }, { "epoch": 1.36, "grad_norm": 0.5309693813323975, "learning_rate": 0.00034468984337886085, "loss": 3.2188, "step": 27694 }, { "epoch": 1.36, "grad_norm": 0.5798507928848267, "learning_rate": 0.0003446746189157276, "loss": 2.9499, "step": 27695 }, { "epoch": 1.36, "grad_norm": 0.5637386441230774, "learning_rate": 0.00034465939433493003, "loss": 2.9753, "step": 27696 }, { "epoch": 1.36, "grad_norm": 0.5978463888168335, "learning_rate": 0.0003446441696365082, "loss": 2.9246, "step": 27697 }, { "epoch": 1.36, "grad_norm": 0.5650119781494141, "learning_rate": 0.00034462894482050214, "loss": 3.1587, "step": 27698 }, { "epoch": 1.36, "grad_norm": 0.6245388984680176, "learning_rate": 0.00034461371988695215, "loss": 3.1073, "step": 27699 }, { "epoch": 1.36, "grad_norm": 0.6453614234924316, "learning_rate": 0.0003445984948358981, "loss": 3.1702, "step": 27700 }, { "epoch": 1.36, "grad_norm": 0.5954031944274902, "learning_rate": 0.0003445832696673803, "loss": 3.2765, "step": 27701 }, { "epoch": 1.36, "grad_norm": 0.5462033152580261, "learning_rate": 0.0003445680443814387, "loss": 2.9853, "step": 27702 }, { "epoch": 1.36, "grad_norm": 0.5822536945343018, "learning_rate": 0.00034455281897811344, "loss": 3.147, "step": 27703 }, { "epoch": 1.36, "grad_norm": 0.5529893040657043, "learning_rate": 0.0003445375934574446, "loss": 3.1181, "step": 27704 }, { "epoch": 1.36, "grad_norm": 0.5892257690429688, "learning_rate": 0.0003445223678194724, "loss": 3.0272, "step": 27705 }, { "epoch": 1.36, "grad_norm": 0.5971611738204956, "learning_rate": 0.0003445071420642368, "loss": 3.0081, "step": 27706 }, { "epoch": 1.36, "grad_norm": 0.5674407482147217, "learning_rate": 0.000344491916191778, "loss": 3.1847, "step": 27707 }, { "epoch": 1.36, "grad_norm": 0.6164575219154358, "learning_rate": 0.000344476690202136, "loss": 3.0107, "step": 27708 }, { "epoch": 1.36, "grad_norm": 0.5708376169204712, "learning_rate": 0.0003444614640953509, "loss": 3.1381, "step": 27709 }, { "epoch": 1.36, "grad_norm": 0.5596605539321899, "learning_rate": 0.00034444623787146297, "loss": 3.0325, "step": 27710 }, { "epoch": 1.36, "grad_norm": 0.5818103551864624, "learning_rate": 0.0003444310115305121, "loss": 3.1746, "step": 27711 }, { "epoch": 1.36, "grad_norm": 0.5505349040031433, "learning_rate": 0.0003444157850725386, "loss": 3.128, "step": 27712 }, { "epoch": 1.36, "grad_norm": 0.5452769994735718, "learning_rate": 0.00034440055849758246, "loss": 3.2109, "step": 27713 }, { "epoch": 1.36, "grad_norm": 0.5605431199073792, "learning_rate": 0.0003443853318056837, "loss": 2.9834, "step": 27714 }, { "epoch": 1.36, "grad_norm": 0.5733485221862793, "learning_rate": 0.0003443701049968826, "loss": 3.1925, "step": 27715 }, { "epoch": 1.36, "grad_norm": 0.5892324447631836, "learning_rate": 0.0003443548780712192, "loss": 2.9853, "step": 27716 }, { "epoch": 1.36, "grad_norm": 0.6256640553474426, "learning_rate": 0.0003443396510287335, "loss": 3.0881, "step": 27717 }, { "epoch": 1.36, "grad_norm": 0.6237658858299255, "learning_rate": 0.00034432442386946575, "loss": 3.0615, "step": 27718 }, { "epoch": 1.36, "grad_norm": 0.6183185577392578, "learning_rate": 0.000344309196593456, "loss": 2.9544, "step": 27719 }, { "epoch": 1.36, "grad_norm": 0.5464622974395752, "learning_rate": 0.00034429396920074436, "loss": 2.9632, "step": 27720 }, { "epoch": 1.36, "grad_norm": 0.5681126117706299, "learning_rate": 0.0003442787416913709, "loss": 3.0697, "step": 27721 }, { "epoch": 1.36, "grad_norm": 0.5842775106430054, "learning_rate": 0.0003442635140653758, "loss": 3.0525, "step": 27722 }, { "epoch": 1.36, "grad_norm": 0.5486373901367188, "learning_rate": 0.00034424828632279914, "loss": 3.0561, "step": 27723 }, { "epoch": 1.36, "grad_norm": 0.5385405421257019, "learning_rate": 0.0003442330584636809, "loss": 3.1888, "step": 27724 }, { "epoch": 1.36, "grad_norm": 0.5733704566955566, "learning_rate": 0.0003442178304880613, "loss": 2.9091, "step": 27725 }, { "epoch": 1.36, "grad_norm": 0.5765992999076843, "learning_rate": 0.00034420260239598053, "loss": 3.0318, "step": 27726 }, { "epoch": 1.36, "grad_norm": 0.5930641889572144, "learning_rate": 0.0003441873741874786, "loss": 3.0233, "step": 27727 }, { "epoch": 1.36, "grad_norm": 0.5822330117225647, "learning_rate": 0.00034417214586259567, "loss": 3.1026, "step": 27728 }, { "epoch": 1.36, "grad_norm": 0.6270058751106262, "learning_rate": 0.0003441569174213717, "loss": 3.1726, "step": 27729 }, { "epoch": 1.36, "grad_norm": 0.5660275220870972, "learning_rate": 0.000344141688863847, "loss": 2.8885, "step": 27730 }, { "epoch": 1.36, "grad_norm": 0.6260101199150085, "learning_rate": 0.00034412646019006156, "loss": 3.075, "step": 27731 }, { "epoch": 1.36, "grad_norm": 0.577351450920105, "learning_rate": 0.0003441112314000555, "loss": 3.1064, "step": 27732 }, { "epoch": 1.36, "grad_norm": 0.5536080598831177, "learning_rate": 0.00034409600249386894, "loss": 3.1454, "step": 27733 }, { "epoch": 1.36, "grad_norm": 0.5863784551620483, "learning_rate": 0.000344080773471542, "loss": 3.0272, "step": 27734 }, { "epoch": 1.36, "grad_norm": 0.6073148846626282, "learning_rate": 0.0003440655443331148, "loss": 3.2349, "step": 27735 }, { "epoch": 1.36, "grad_norm": 0.5578989386558533, "learning_rate": 0.00034405031507862746, "loss": 3.078, "step": 27736 }, { "epoch": 1.36, "grad_norm": 0.5783877372741699, "learning_rate": 0.00034403508570811993, "loss": 3.208, "step": 27737 }, { "epoch": 1.36, "grad_norm": 0.5361840128898621, "learning_rate": 0.0003440198562216327, "loss": 3.0135, "step": 27738 }, { "epoch": 1.36, "grad_norm": 0.5612678527832031, "learning_rate": 0.0003440046266192054, "loss": 2.8991, "step": 27739 }, { "epoch": 1.36, "grad_norm": 0.575139045715332, "learning_rate": 0.0003439893969008785, "loss": 2.9018, "step": 27740 }, { "epoch": 1.36, "grad_norm": 0.5466126799583435, "learning_rate": 0.000343974167066692, "loss": 2.9975, "step": 27741 }, { "epoch": 1.36, "grad_norm": 0.5556744337081909, "learning_rate": 0.000343958937116686, "loss": 2.9647, "step": 27742 }, { "epoch": 1.36, "grad_norm": 0.6033949255943298, "learning_rate": 0.00034394370705090067, "loss": 2.8281, "step": 27743 }, { "epoch": 1.36, "grad_norm": 0.5831649303436279, "learning_rate": 0.000343928476869376, "loss": 3.053, "step": 27744 }, { "epoch": 1.36, "grad_norm": 0.5626701712608337, "learning_rate": 0.0003439132465721522, "loss": 3.0697, "step": 27745 }, { "epoch": 1.36, "grad_norm": 0.5939829349517822, "learning_rate": 0.0003438980161592693, "loss": 3.2721, "step": 27746 }, { "epoch": 1.36, "grad_norm": 0.564018964767456, "learning_rate": 0.0003438827856307675, "loss": 2.8827, "step": 27747 }, { "epoch": 1.36, "grad_norm": 0.5679821968078613, "learning_rate": 0.0003438675549866869, "loss": 3.1603, "step": 27748 }, { "epoch": 1.36, "grad_norm": 0.56532222032547, "learning_rate": 0.0003438523242270677, "loss": 3.0086, "step": 27749 }, { "epoch": 1.36, "grad_norm": 0.5731778144836426, "learning_rate": 0.00034383709335194975, "loss": 3.0396, "step": 27750 }, { "epoch": 1.36, "grad_norm": 0.5794793963432312, "learning_rate": 0.00034382186236137346, "loss": 2.9388, "step": 27751 }, { "epoch": 1.36, "grad_norm": 0.5715826153755188, "learning_rate": 0.0003438066312553788, "loss": 3.0948, "step": 27752 }, { "epoch": 1.36, "grad_norm": 0.6393271088600159, "learning_rate": 0.0003437914000340059, "loss": 2.9898, "step": 27753 }, { "epoch": 1.36, "grad_norm": 0.5703928470611572, "learning_rate": 0.00034377616869729486, "loss": 3.2764, "step": 27754 }, { "epoch": 1.36, "grad_norm": 0.5397366285324097, "learning_rate": 0.0003437609372452858, "loss": 3.1067, "step": 27755 }, { "epoch": 1.36, "grad_norm": 0.5772479772567749, "learning_rate": 0.0003437457056780188, "loss": 3.0379, "step": 27756 }, { "epoch": 1.36, "grad_norm": 0.5816461443901062, "learning_rate": 0.0003437304739955341, "loss": 3.0917, "step": 27757 }, { "epoch": 1.36, "grad_norm": 0.5699825882911682, "learning_rate": 0.00034371524219787176, "loss": 2.9859, "step": 27758 }, { "epoch": 1.36, "grad_norm": 0.6363911032676697, "learning_rate": 0.0003437000102850719, "loss": 2.9362, "step": 27759 }, { "epoch": 1.36, "grad_norm": 0.6590486168861389, "learning_rate": 0.00034368477825717455, "loss": 3.0905, "step": 27760 }, { "epoch": 1.36, "grad_norm": 0.5966184139251709, "learning_rate": 0.00034366954611421993, "loss": 2.9768, "step": 27761 }, { "epoch": 1.36, "grad_norm": 0.6021097302436829, "learning_rate": 0.00034365431385624816, "loss": 3.0687, "step": 27762 }, { "epoch": 1.36, "grad_norm": 0.5527408719062805, "learning_rate": 0.0003436390814832994, "loss": 3.1802, "step": 27763 }, { "epoch": 1.36, "grad_norm": 0.5687882900238037, "learning_rate": 0.00034362384899541356, "loss": 3.1737, "step": 27764 }, { "epoch": 1.36, "grad_norm": 0.5569482445716858, "learning_rate": 0.00034360861639263086, "loss": 3.1958, "step": 27765 }, { "epoch": 1.36, "grad_norm": 0.5489016771316528, "learning_rate": 0.0003435933836749916, "loss": 2.8973, "step": 27766 }, { "epoch": 1.36, "grad_norm": 0.5932565927505493, "learning_rate": 0.00034357815084253576, "loss": 2.9053, "step": 27767 }, { "epoch": 1.36, "grad_norm": 0.5763736963272095, "learning_rate": 0.00034356291789530333, "loss": 3.1687, "step": 27768 }, { "epoch": 1.36, "grad_norm": 0.6087867021560669, "learning_rate": 0.00034354768483333466, "loss": 2.9277, "step": 27769 }, { "epoch": 1.36, "grad_norm": 0.5887653231620789, "learning_rate": 0.0003435324516566697, "loss": 2.8634, "step": 27770 }, { "epoch": 1.36, "grad_norm": 0.5681719183921814, "learning_rate": 0.0003435172183653487, "loss": 2.9306, "step": 27771 }, { "epoch": 1.36, "grad_norm": 0.6251999735832214, "learning_rate": 0.0003435019849594117, "loss": 2.9546, "step": 27772 }, { "epoch": 1.36, "grad_norm": 0.5517759919166565, "learning_rate": 0.00034348675143889884, "loss": 3.1553, "step": 27773 }, { "epoch": 1.36, "grad_norm": 0.5633260607719421, "learning_rate": 0.00034347151780385034, "loss": 3.0941, "step": 27774 }, { "epoch": 1.36, "grad_norm": 0.5699298977851868, "learning_rate": 0.000343456284054306, "loss": 3.0928, "step": 27775 }, { "epoch": 1.36, "grad_norm": 0.5600057244300842, "learning_rate": 0.0003434410501903063, "loss": 3.3317, "step": 27776 }, { "epoch": 1.36, "grad_norm": 0.5598301291465759, "learning_rate": 0.0003434258162118913, "loss": 3.1272, "step": 27777 }, { "epoch": 1.36, "grad_norm": 0.6225312352180481, "learning_rate": 0.000343410582119101, "loss": 2.9665, "step": 27778 }, { "epoch": 1.36, "grad_norm": 0.5822789072990417, "learning_rate": 0.00034339534791197557, "loss": 2.927, "step": 27779 }, { "epoch": 1.36, "grad_norm": 0.6133136749267578, "learning_rate": 0.00034338011359055516, "loss": 3.1081, "step": 27780 }, { "epoch": 1.36, "grad_norm": 0.597339391708374, "learning_rate": 0.0003433648791548799, "loss": 3.1425, "step": 27781 }, { "epoch": 1.36, "grad_norm": 0.562778115272522, "learning_rate": 0.00034334964460498984, "loss": 3.2396, "step": 27782 }, { "epoch": 1.36, "grad_norm": 0.5441209673881531, "learning_rate": 0.0003433344099409252, "loss": 3.2741, "step": 27783 }, { "epoch": 1.36, "grad_norm": 0.5804208517074585, "learning_rate": 0.00034331917516272604, "loss": 3.0572, "step": 27784 }, { "epoch": 1.36, "grad_norm": 0.6229684352874756, "learning_rate": 0.0003433039402704326, "loss": 2.9544, "step": 27785 }, { "epoch": 1.36, "grad_norm": 0.5673180818557739, "learning_rate": 0.0003432887052640848, "loss": 2.9424, "step": 27786 }, { "epoch": 1.36, "grad_norm": 0.5669776797294617, "learning_rate": 0.0003432734701437229, "loss": 3.0883, "step": 27787 }, { "epoch": 1.36, "grad_norm": 0.5755397081375122, "learning_rate": 0.000343258234909387, "loss": 3.2678, "step": 27788 }, { "epoch": 1.36, "grad_norm": 0.8632124662399292, "learning_rate": 0.00034324299956111737, "loss": 3.0894, "step": 27789 }, { "epoch": 1.36, "grad_norm": 0.5921272039413452, "learning_rate": 0.00034322776409895385, "loss": 2.7846, "step": 27790 }, { "epoch": 1.36, "grad_norm": 0.5782287120819092, "learning_rate": 0.0003432125285229367, "loss": 2.951, "step": 27791 }, { "epoch": 1.36, "grad_norm": 0.5763784646987915, "learning_rate": 0.0003431972928331061, "loss": 2.9963, "step": 27792 }, { "epoch": 1.36, "grad_norm": 0.6159095764160156, "learning_rate": 0.0003431820570295022, "loss": 2.9511, "step": 27793 }, { "epoch": 1.36, "grad_norm": 0.5671179890632629, "learning_rate": 0.00034316682111216513, "loss": 2.9702, "step": 27794 }, { "epoch": 1.36, "grad_norm": 0.6086155772209167, "learning_rate": 0.00034315158508113483, "loss": 2.9524, "step": 27795 }, { "epoch": 1.36, "grad_norm": 0.6278063654899597, "learning_rate": 0.0003431363489364516, "loss": 2.9647, "step": 27796 }, { "epoch": 1.36, "grad_norm": 0.5405550003051758, "learning_rate": 0.0003431211126781555, "loss": 3.1255, "step": 27797 }, { "epoch": 1.36, "grad_norm": 0.5947132110595703, "learning_rate": 0.0003431058763062867, "loss": 3.0813, "step": 27798 }, { "epoch": 1.36, "grad_norm": 0.6197540760040283, "learning_rate": 0.00034309063982088534, "loss": 3.1518, "step": 27799 }, { "epoch": 1.36, "grad_norm": 0.5950043201446533, "learning_rate": 0.00034307540322199163, "loss": 3.0943, "step": 27800 }, { "epoch": 1.36, "grad_norm": 0.5821655988693237, "learning_rate": 0.00034306016650964544, "loss": 3.0351, "step": 27801 }, { "epoch": 1.36, "grad_norm": 0.6075488328933716, "learning_rate": 0.00034304492968388703, "loss": 3.1314, "step": 27802 }, { "epoch": 1.36, "grad_norm": 0.5653616786003113, "learning_rate": 0.00034302969274475675, "loss": 2.9401, "step": 27803 }, { "epoch": 1.36, "grad_norm": 0.556991696357727, "learning_rate": 0.00034301445569229444, "loss": 2.8252, "step": 27804 }, { "epoch": 1.36, "grad_norm": 0.5913997888565063, "learning_rate": 0.00034299921852654037, "loss": 3.0624, "step": 27805 }, { "epoch": 1.36, "grad_norm": 1.0726978778839111, "learning_rate": 0.00034298398124753455, "loss": 3.1772, "step": 27806 }, { "epoch": 1.36, "grad_norm": 0.5975397825241089, "learning_rate": 0.0003429687438553172, "loss": 2.8712, "step": 27807 }, { "epoch": 1.36, "grad_norm": 0.5815263390541077, "learning_rate": 0.0003429535063499285, "loss": 3.0278, "step": 27808 }, { "epoch": 1.36, "grad_norm": 0.563348114490509, "learning_rate": 0.0003429382687314085, "loss": 2.9911, "step": 27809 }, { "epoch": 1.36, "grad_norm": 0.6096580028533936, "learning_rate": 0.00034292303099979737, "loss": 2.9992, "step": 27810 }, { "epoch": 1.36, "grad_norm": 0.5842347145080566, "learning_rate": 0.00034290779315513525, "loss": 2.8416, "step": 27811 }, { "epoch": 1.36, "grad_norm": 0.558156430721283, "learning_rate": 0.00034289255519746225, "loss": 2.9634, "step": 27812 }, { "epoch": 1.36, "grad_norm": 0.6108639240264893, "learning_rate": 0.0003428773171268185, "loss": 3.1804, "step": 27813 }, { "epoch": 1.36, "grad_norm": 0.6039309501647949, "learning_rate": 0.0003428620789432443, "loss": 3.0395, "step": 27814 }, { "epoch": 1.36, "grad_norm": 0.5984408855438232, "learning_rate": 0.0003428468406467794, "loss": 3.1099, "step": 27815 }, { "epoch": 1.36, "grad_norm": 0.5448001027107239, "learning_rate": 0.0003428316022374642, "loss": 3.2093, "step": 27816 }, { "epoch": 1.36, "grad_norm": 0.577616274356842, "learning_rate": 0.00034281636371533895, "loss": 3.1489, "step": 27817 }, { "epoch": 1.36, "grad_norm": 0.5480098128318787, "learning_rate": 0.0003428011250804436, "loss": 3.0566, "step": 27818 }, { "epoch": 1.36, "grad_norm": 0.5774540305137634, "learning_rate": 0.00034278588633281825, "loss": 3.0605, "step": 27819 }, { "epoch": 1.36, "grad_norm": 0.5654169917106628, "learning_rate": 0.00034277064747250313, "loss": 2.8954, "step": 27820 }, { "epoch": 1.36, "grad_norm": 0.6514195799827576, "learning_rate": 0.0003427554084995384, "loss": 3.0725, "step": 27821 }, { "epoch": 1.36, "grad_norm": 0.5787948369979858, "learning_rate": 0.0003427401694139641, "loss": 3.034, "step": 27822 }, { "epoch": 1.36, "grad_norm": 0.59248948097229, "learning_rate": 0.0003427249302158205, "loss": 2.8765, "step": 27823 }, { "epoch": 1.36, "grad_norm": 0.5247685313224792, "learning_rate": 0.0003427096909051475, "loss": 3.0719, "step": 27824 }, { "epoch": 1.36, "grad_norm": 0.5795184969902039, "learning_rate": 0.00034269445148198553, "loss": 3.2857, "step": 27825 }, { "epoch": 1.36, "grad_norm": 0.5434640049934387, "learning_rate": 0.0003426792119463746, "loss": 3.0622, "step": 27826 }, { "epoch": 1.36, "grad_norm": 0.5672294497489929, "learning_rate": 0.0003426639722983547, "loss": 2.9796, "step": 27827 }, { "epoch": 1.36, "grad_norm": 0.6290318369865417, "learning_rate": 0.0003426487325379662, "loss": 2.8113, "step": 27828 }, { "epoch": 1.36, "grad_norm": 0.5443248152732849, "learning_rate": 0.0003426334926652492, "loss": 3.0352, "step": 27829 }, { "epoch": 1.36, "grad_norm": 0.5471267700195312, "learning_rate": 0.00034261825268024374, "loss": 3.1233, "step": 27830 }, { "epoch": 1.36, "grad_norm": 0.5816265940666199, "learning_rate": 0.00034260301258299, "loss": 3.1235, "step": 27831 }, { "epoch": 1.36, "grad_norm": 0.5702730417251587, "learning_rate": 0.00034258777237352814, "loss": 3.0953, "step": 27832 }, { "epoch": 1.36, "grad_norm": 0.5764862895011902, "learning_rate": 0.0003425725320518982, "loss": 3.0093, "step": 27833 }, { "epoch": 1.36, "grad_norm": 0.5817887187004089, "learning_rate": 0.0003425572916181405, "loss": 3.0378, "step": 27834 }, { "epoch": 1.36, "grad_norm": 0.5505737066268921, "learning_rate": 0.00034254205107229504, "loss": 3.0721, "step": 27835 }, { "epoch": 1.36, "grad_norm": 0.6135760545730591, "learning_rate": 0.0003425268104144021, "loss": 2.9754, "step": 27836 }, { "epoch": 1.36, "grad_norm": 0.5726092457771301, "learning_rate": 0.00034251156964450153, "loss": 3.2546, "step": 27837 }, { "epoch": 1.36, "grad_norm": 0.5802931785583496, "learning_rate": 0.0003424963287626338, "loss": 2.9391, "step": 27838 }, { "epoch": 1.36, "grad_norm": 0.5456535816192627, "learning_rate": 0.0003424810877688389, "loss": 3.1814, "step": 27839 }, { "epoch": 1.36, "grad_norm": 0.5868333578109741, "learning_rate": 0.00034246584666315703, "loss": 3.0369, "step": 27840 }, { "epoch": 1.36, "grad_norm": 0.6372731924057007, "learning_rate": 0.00034245060544562826, "loss": 2.9961, "step": 27841 }, { "epoch": 1.36, "grad_norm": 0.5847151279449463, "learning_rate": 0.00034243536411629266, "loss": 3.073, "step": 27842 }, { "epoch": 1.36, "grad_norm": 0.5791596174240112, "learning_rate": 0.0003424201226751906, "loss": 3.0307, "step": 27843 }, { "epoch": 1.36, "grad_norm": 0.5760388970375061, "learning_rate": 0.0003424048811223621, "loss": 2.9906, "step": 27844 }, { "epoch": 1.36, "grad_norm": 0.5581043362617493, "learning_rate": 0.0003423896394578473, "loss": 3.1661, "step": 27845 }, { "epoch": 1.36, "grad_norm": 0.6014453172683716, "learning_rate": 0.0003423743976816863, "loss": 3.0195, "step": 27846 }, { "epoch": 1.36, "grad_norm": 0.6490236520767212, "learning_rate": 0.0003423591557939193, "loss": 2.8944, "step": 27847 }, { "epoch": 1.36, "grad_norm": 0.5952842831611633, "learning_rate": 0.00034234391379458655, "loss": 3.0426, "step": 27848 }, { "epoch": 1.36, "grad_norm": 0.5552687644958496, "learning_rate": 0.00034232867168372793, "loss": 2.9578, "step": 27849 }, { "epoch": 1.36, "grad_norm": 0.5637187361717224, "learning_rate": 0.0003423134294613838, "loss": 3.1498, "step": 27850 }, { "epoch": 1.36, "grad_norm": 0.5724777579307556, "learning_rate": 0.00034229818712759423, "loss": 3.0703, "step": 27851 }, { "epoch": 1.36, "grad_norm": 0.6271558403968811, "learning_rate": 0.0003422829446823994, "loss": 3.046, "step": 27852 }, { "epoch": 1.37, "grad_norm": 0.5826039910316467, "learning_rate": 0.00034226770212583934, "loss": 2.9899, "step": 27853 }, { "epoch": 1.37, "grad_norm": 0.5479075312614441, "learning_rate": 0.0003422524594579544, "loss": 2.9437, "step": 27854 }, { "epoch": 1.37, "grad_norm": 0.572819709777832, "learning_rate": 0.0003422372166787846, "loss": 3.0236, "step": 27855 }, { "epoch": 1.37, "grad_norm": 0.5536037683486938, "learning_rate": 0.00034222197378837006, "loss": 3.1475, "step": 27856 }, { "epoch": 1.37, "grad_norm": 0.6179131865501404, "learning_rate": 0.00034220673078675095, "loss": 2.9968, "step": 27857 }, { "epoch": 1.37, "grad_norm": 0.562525749206543, "learning_rate": 0.00034219148767396744, "loss": 3.0093, "step": 27858 }, { "epoch": 1.37, "grad_norm": 0.5559420585632324, "learning_rate": 0.00034217624445005967, "loss": 3.0252, "step": 27859 }, { "epoch": 1.37, "grad_norm": 0.5643912553787231, "learning_rate": 0.00034216100111506784, "loss": 3.1213, "step": 27860 }, { "epoch": 1.37, "grad_norm": 0.5565373301506042, "learning_rate": 0.00034214575766903203, "loss": 3.0101, "step": 27861 }, { "epoch": 1.37, "grad_norm": 0.5774194002151489, "learning_rate": 0.00034213051411199246, "loss": 2.9123, "step": 27862 }, { "epoch": 1.37, "grad_norm": 0.5667964220046997, "learning_rate": 0.000342115270443989, "loss": 3.2832, "step": 27863 }, { "epoch": 1.37, "grad_norm": 0.5750254988670349, "learning_rate": 0.0003421000266650622, "loss": 3.1505, "step": 27864 }, { "epoch": 1.37, "grad_norm": 0.6452105641365051, "learning_rate": 0.0003420847827752521, "loss": 3.0935, "step": 27865 }, { "epoch": 1.37, "grad_norm": 0.5923091173171997, "learning_rate": 0.0003420695387745987, "loss": 2.9655, "step": 27866 }, { "epoch": 1.37, "grad_norm": 0.5900555849075317, "learning_rate": 0.0003420542946631422, "loss": 2.9454, "step": 27867 }, { "epoch": 1.37, "grad_norm": 0.5830941200256348, "learning_rate": 0.00034203905044092274, "loss": 3.0925, "step": 27868 }, { "epoch": 1.37, "grad_norm": 0.5814031362533569, "learning_rate": 0.00034202380610798066, "loss": 3.0646, "step": 27869 }, { "epoch": 1.37, "grad_norm": 0.5986719131469727, "learning_rate": 0.0003420085616643558, "loss": 2.8448, "step": 27870 }, { "epoch": 1.37, "grad_norm": 0.5420025587081909, "learning_rate": 0.00034199331711008864, "loss": 2.9547, "step": 27871 }, { "epoch": 1.37, "grad_norm": 0.5797125101089478, "learning_rate": 0.00034197807244521904, "loss": 3.1777, "step": 27872 }, { "epoch": 1.37, "grad_norm": 0.5667112469673157, "learning_rate": 0.00034196282766978727, "loss": 3.1301, "step": 27873 }, { "epoch": 1.37, "grad_norm": 0.5843361616134644, "learning_rate": 0.0003419475827838335, "loss": 2.8819, "step": 27874 }, { "epoch": 1.37, "grad_norm": 0.5419958829879761, "learning_rate": 0.00034193233778739797, "loss": 2.8901, "step": 27875 }, { "epoch": 1.37, "grad_norm": 0.5321311354637146, "learning_rate": 0.0003419170926805207, "loss": 2.8699, "step": 27876 }, { "epoch": 1.37, "grad_norm": 0.6508222818374634, "learning_rate": 0.0003419018474632418, "loss": 3.2209, "step": 27877 }, { "epoch": 1.37, "grad_norm": 0.6090697646141052, "learning_rate": 0.0003418866021356015, "loss": 3.0021, "step": 27878 }, { "epoch": 1.37, "grad_norm": 0.5967793464660645, "learning_rate": 0.00034187135669764, "loss": 2.9383, "step": 27879 }, { "epoch": 1.37, "grad_norm": 0.5427808165550232, "learning_rate": 0.00034185611114939744, "loss": 3.1093, "step": 27880 }, { "epoch": 1.37, "grad_norm": 0.601786732673645, "learning_rate": 0.0003418408654909139, "loss": 3.0333, "step": 27881 }, { "epoch": 1.37, "grad_norm": 0.6144266724586487, "learning_rate": 0.0003418256197222295, "loss": 3.0911, "step": 27882 }, { "epoch": 1.37, "grad_norm": 0.5672413110733032, "learning_rate": 0.00034181037384338453, "loss": 3.0872, "step": 27883 }, { "epoch": 1.37, "grad_norm": 0.6033337712287903, "learning_rate": 0.0003417951278544191, "loss": 3.2807, "step": 27884 }, { "epoch": 1.37, "grad_norm": 0.5832936763763428, "learning_rate": 0.0003417798817553733, "loss": 2.9746, "step": 27885 }, { "epoch": 1.37, "grad_norm": 0.5509626269340515, "learning_rate": 0.0003417646355462874, "loss": 3.0896, "step": 27886 }, { "epoch": 1.37, "grad_norm": 0.6345809698104858, "learning_rate": 0.0003417493892272015, "loss": 2.9941, "step": 27887 }, { "epoch": 1.37, "grad_norm": 0.5827943682670593, "learning_rate": 0.00034173414279815563, "loss": 3.0285, "step": 27888 }, { "epoch": 1.37, "grad_norm": 0.5544496774673462, "learning_rate": 0.0003417188962591901, "loss": 3.0064, "step": 27889 }, { "epoch": 1.37, "grad_norm": 0.5938286185264587, "learning_rate": 0.00034170364961034507, "loss": 2.9717, "step": 27890 }, { "epoch": 1.37, "grad_norm": 0.5810260772705078, "learning_rate": 0.00034168840285166065, "loss": 2.9629, "step": 27891 }, { "epoch": 1.37, "grad_norm": 0.6095471978187561, "learning_rate": 0.00034167315598317697, "loss": 3.1627, "step": 27892 }, { "epoch": 1.37, "grad_norm": 0.5932841897010803, "learning_rate": 0.00034165790900493413, "loss": 2.9353, "step": 27893 }, { "epoch": 1.37, "grad_norm": 0.6483215093612671, "learning_rate": 0.00034164266191697253, "loss": 2.9442, "step": 27894 }, { "epoch": 1.37, "grad_norm": 0.5468027591705322, "learning_rate": 0.00034162741471933217, "loss": 3.1029, "step": 27895 }, { "epoch": 1.37, "grad_norm": 0.5780773758888245, "learning_rate": 0.00034161216741205317, "loss": 3.1393, "step": 27896 }, { "epoch": 1.37, "grad_norm": 0.6147868037223816, "learning_rate": 0.0003415969199951757, "loss": 2.9525, "step": 27897 }, { "epoch": 1.37, "grad_norm": 0.6018847227096558, "learning_rate": 0.00034158167246874, "loss": 3.2364, "step": 27898 }, { "epoch": 1.37, "grad_norm": 0.6293785572052002, "learning_rate": 0.0003415664248327861, "loss": 3.0502, "step": 27899 }, { "epoch": 1.37, "grad_norm": 0.5687551498413086, "learning_rate": 0.0003415511770873542, "loss": 2.9601, "step": 27900 }, { "epoch": 1.37, "grad_norm": 0.5848996639251709, "learning_rate": 0.00034153592923248465, "loss": 2.9939, "step": 27901 }, { "epoch": 1.37, "grad_norm": 0.5531403422355652, "learning_rate": 0.00034152068126821745, "loss": 2.9455, "step": 27902 }, { "epoch": 1.37, "grad_norm": 0.6372696757316589, "learning_rate": 0.0003415054331945927, "loss": 3.0152, "step": 27903 }, { "epoch": 1.37, "grad_norm": 0.6175010204315186, "learning_rate": 0.00034149018501165054, "loss": 3.0507, "step": 27904 }, { "epoch": 1.37, "grad_norm": 0.5934361815452576, "learning_rate": 0.00034147493671943133, "loss": 3.14, "step": 27905 }, { "epoch": 1.37, "grad_norm": 0.5523131489753723, "learning_rate": 0.00034145968831797506, "loss": 3.2342, "step": 27906 }, { "epoch": 1.37, "grad_norm": 0.595689594745636, "learning_rate": 0.000341444439807322, "loss": 2.9376, "step": 27907 }, { "epoch": 1.37, "grad_norm": 0.5672516822814941, "learning_rate": 0.0003414291911875122, "loss": 3.2794, "step": 27908 }, { "epoch": 1.37, "grad_norm": 0.6107340455055237, "learning_rate": 0.000341413942458586, "loss": 3.1038, "step": 27909 }, { "epoch": 1.37, "grad_norm": 0.6264364719390869, "learning_rate": 0.0003413986936205833, "loss": 2.9496, "step": 27910 }, { "epoch": 1.37, "grad_norm": 0.6131812334060669, "learning_rate": 0.00034138344467354447, "loss": 2.895, "step": 27911 }, { "epoch": 1.37, "grad_norm": 0.5950072407722473, "learning_rate": 0.00034136819561750965, "loss": 3.083, "step": 27912 }, { "epoch": 1.37, "grad_norm": 0.5509093403816223, "learning_rate": 0.00034135294645251896, "loss": 3.0271, "step": 27913 }, { "epoch": 1.37, "grad_norm": 0.5661293864250183, "learning_rate": 0.0003413376971786125, "loss": 3.1161, "step": 27914 }, { "epoch": 1.37, "grad_norm": 0.5739542841911316, "learning_rate": 0.0003413224477958306, "loss": 3.0398, "step": 27915 }, { "epoch": 1.37, "grad_norm": 0.5414571166038513, "learning_rate": 0.00034130719830421333, "loss": 3.0356, "step": 27916 }, { "epoch": 1.37, "grad_norm": 0.5896730422973633, "learning_rate": 0.00034129194870380076, "loss": 3.0442, "step": 27917 }, { "epoch": 1.37, "grad_norm": 0.606563925743103, "learning_rate": 0.0003412766989946332, "loss": 3.0527, "step": 27918 }, { "epoch": 1.37, "grad_norm": 0.5917063355445862, "learning_rate": 0.0003412614491767507, "loss": 3.0547, "step": 27919 }, { "epoch": 1.37, "grad_norm": 0.5469411015510559, "learning_rate": 0.00034124619925019346, "loss": 3.4306, "step": 27920 }, { "epoch": 1.37, "grad_norm": 0.5653911828994751, "learning_rate": 0.0003412309492150018, "loss": 3.1284, "step": 27921 }, { "epoch": 1.37, "grad_norm": 0.5671815872192383, "learning_rate": 0.00034121569907121565, "loss": 3.1207, "step": 27922 }, { "epoch": 1.37, "grad_norm": 0.6383528709411621, "learning_rate": 0.0003412004488188753, "loss": 2.8917, "step": 27923 }, { "epoch": 1.37, "grad_norm": 0.5371748805046082, "learning_rate": 0.0003411851984580209, "loss": 3.0595, "step": 27924 }, { "epoch": 1.37, "grad_norm": 0.5818402767181396, "learning_rate": 0.0003411699479886926, "loss": 3.001, "step": 27925 }, { "epoch": 1.37, "grad_norm": 0.6108590960502625, "learning_rate": 0.0003411546974109306, "loss": 3.0168, "step": 27926 }, { "epoch": 1.37, "grad_norm": 0.5887522101402283, "learning_rate": 0.0003411394467247751, "loss": 3.0405, "step": 27927 }, { "epoch": 1.37, "grad_norm": 0.5705615878105164, "learning_rate": 0.00034112419593026615, "loss": 3.0193, "step": 27928 }, { "epoch": 1.37, "grad_norm": 0.5551228523254395, "learning_rate": 0.0003411089450274439, "loss": 3.1167, "step": 27929 }, { "epoch": 1.37, "grad_norm": 0.567327082157135, "learning_rate": 0.0003410936940163487, "loss": 2.9874, "step": 27930 }, { "epoch": 1.37, "grad_norm": 0.5707133412361145, "learning_rate": 0.0003410784428970207, "loss": 3.0144, "step": 27931 }, { "epoch": 1.37, "grad_norm": 0.5892231464385986, "learning_rate": 0.0003410631916694999, "loss": 3.1033, "step": 27932 }, { "epoch": 1.37, "grad_norm": 0.6045179963111877, "learning_rate": 0.00034104794033382657, "loss": 2.9812, "step": 27933 }, { "epoch": 1.37, "grad_norm": 0.5541436076164246, "learning_rate": 0.0003410326888900408, "loss": 3.1712, "step": 27934 }, { "epoch": 1.37, "grad_norm": 0.5599698424339294, "learning_rate": 0.0003410174373381829, "loss": 3.0917, "step": 27935 }, { "epoch": 1.37, "grad_norm": 0.5694020986557007, "learning_rate": 0.0003410021856782929, "loss": 3.1889, "step": 27936 }, { "epoch": 1.37, "grad_norm": 0.5983900427818298, "learning_rate": 0.00034098693391041107, "loss": 2.9114, "step": 27937 }, { "epoch": 1.37, "grad_norm": 0.6155137419700623, "learning_rate": 0.0003409716820345776, "loss": 3.0607, "step": 27938 }, { "epoch": 1.37, "grad_norm": 0.6034281849861145, "learning_rate": 0.00034095643005083257, "loss": 2.961, "step": 27939 }, { "epoch": 1.37, "grad_norm": 0.5892189145088196, "learning_rate": 0.0003409411779592161, "loss": 3.244, "step": 27940 }, { "epoch": 1.37, "grad_norm": 0.5782514214515686, "learning_rate": 0.00034092592575976855, "loss": 3.2606, "step": 27941 }, { "epoch": 1.37, "grad_norm": 0.5579574108123779, "learning_rate": 0.00034091067345253, "loss": 2.9648, "step": 27942 }, { "epoch": 1.37, "grad_norm": 0.5963599681854248, "learning_rate": 0.0003408954210375405, "loss": 3.0709, "step": 27943 }, { "epoch": 1.37, "grad_norm": 0.6034666299819946, "learning_rate": 0.00034088016851484043, "loss": 2.8616, "step": 27944 }, { "epoch": 1.37, "grad_norm": 0.6475614309310913, "learning_rate": 0.00034086491588446985, "loss": 3.0576, "step": 27945 }, { "epoch": 1.37, "grad_norm": 0.5820233821868896, "learning_rate": 0.0003408496631464689, "loss": 2.9848, "step": 27946 }, { "epoch": 1.37, "grad_norm": 0.5455297827720642, "learning_rate": 0.00034083441030087784, "loss": 3.0637, "step": 27947 }, { "epoch": 1.37, "grad_norm": 0.568981945514679, "learning_rate": 0.00034081915734773675, "loss": 2.9338, "step": 27948 }, { "epoch": 1.37, "grad_norm": 0.5847499370574951, "learning_rate": 0.00034080390428708595, "loss": 2.8865, "step": 27949 }, { "epoch": 1.37, "grad_norm": 0.6259151697158813, "learning_rate": 0.0003407886511189654, "loss": 3.0548, "step": 27950 }, { "epoch": 1.37, "grad_norm": 0.5772634744644165, "learning_rate": 0.00034077339784341544, "loss": 3.0336, "step": 27951 }, { "epoch": 1.37, "grad_norm": 0.5600981712341309, "learning_rate": 0.00034075814446047617, "loss": 2.9458, "step": 27952 }, { "epoch": 1.37, "grad_norm": 0.5539212226867676, "learning_rate": 0.0003407428909701879, "loss": 3.0916, "step": 27953 }, { "epoch": 1.37, "grad_norm": 0.598659336566925, "learning_rate": 0.0003407276373725906, "loss": 3.18, "step": 27954 }, { "epoch": 1.37, "grad_norm": 0.5764479637145996, "learning_rate": 0.00034071238366772456, "loss": 3.0754, "step": 27955 }, { "epoch": 1.37, "grad_norm": 0.5627712607383728, "learning_rate": 0.00034069712985563, "loss": 2.9324, "step": 27956 }, { "epoch": 1.37, "grad_norm": 0.6134984493255615, "learning_rate": 0.00034068187593634696, "loss": 2.9614, "step": 27957 }, { "epoch": 1.37, "grad_norm": 0.581798255443573, "learning_rate": 0.0003406666219099157, "loss": 2.8332, "step": 27958 }, { "epoch": 1.37, "grad_norm": 0.5963696837425232, "learning_rate": 0.0003406513677763764, "loss": 2.9447, "step": 27959 }, { "epoch": 1.37, "grad_norm": 0.5944687128067017, "learning_rate": 0.0003406361135357692, "loss": 2.9688, "step": 27960 }, { "epoch": 1.37, "grad_norm": 0.6353680491447449, "learning_rate": 0.00034062085918813435, "loss": 2.8498, "step": 27961 }, { "epoch": 1.37, "grad_norm": 0.5559459328651428, "learning_rate": 0.00034060560473351187, "loss": 2.8492, "step": 27962 }, { "epoch": 1.37, "grad_norm": 0.5717218518257141, "learning_rate": 0.00034059035017194216, "loss": 2.782, "step": 27963 }, { "epoch": 1.37, "grad_norm": 0.5911750197410583, "learning_rate": 0.00034057509550346524, "loss": 3.1497, "step": 27964 }, { "epoch": 1.37, "grad_norm": 0.5874834060668945, "learning_rate": 0.0003405598407281212, "loss": 2.923, "step": 27965 }, { "epoch": 1.37, "grad_norm": 0.5998968482017517, "learning_rate": 0.0003405445858459505, "loss": 2.8489, "step": 27966 }, { "epoch": 1.37, "grad_norm": 0.576008677482605, "learning_rate": 0.0003405293308569932, "loss": 2.9175, "step": 27967 }, { "epoch": 1.37, "grad_norm": 0.5811209082603455, "learning_rate": 0.0003405140757612893, "loss": 3.0114, "step": 27968 }, { "epoch": 1.37, "grad_norm": 0.6185311675071716, "learning_rate": 0.0003404988205588792, "loss": 3.1087, "step": 27969 }, { "epoch": 1.37, "grad_norm": 0.6029673218727112, "learning_rate": 0.000340483565249803, "loss": 3.0329, "step": 27970 }, { "epoch": 1.37, "grad_norm": 0.6005308628082275, "learning_rate": 0.0003404683098341009, "loss": 3.1651, "step": 27971 }, { "epoch": 1.37, "grad_norm": 0.5758666396141052, "learning_rate": 0.00034045305431181297, "loss": 3.0992, "step": 27972 }, { "epoch": 1.37, "grad_norm": 0.546139657497406, "learning_rate": 0.00034043779868297953, "loss": 2.9213, "step": 27973 }, { "epoch": 1.37, "grad_norm": 0.5436400771141052, "learning_rate": 0.00034042254294764073, "loss": 3.0611, "step": 27974 }, { "epoch": 1.37, "grad_norm": 0.5803444385528564, "learning_rate": 0.00034040728710583665, "loss": 3.0152, "step": 27975 }, { "epoch": 1.37, "grad_norm": 0.5593194961547852, "learning_rate": 0.0003403920311576077, "loss": 3.0609, "step": 27976 }, { "epoch": 1.37, "grad_norm": 0.5657615661621094, "learning_rate": 0.0003403767751029938, "loss": 3.1276, "step": 27977 }, { "epoch": 1.37, "grad_norm": 0.5875985622406006, "learning_rate": 0.00034036151894203536, "loss": 3.0625, "step": 27978 }, { "epoch": 1.37, "grad_norm": 0.55743807554245, "learning_rate": 0.00034034626267477234, "loss": 3.1621, "step": 27979 }, { "epoch": 1.37, "grad_norm": 0.6265092492103577, "learning_rate": 0.00034033100630124504, "loss": 2.9605, "step": 27980 }, { "epoch": 1.37, "grad_norm": 0.5701452493667603, "learning_rate": 0.00034031574982149363, "loss": 3.0356, "step": 27981 }, { "epoch": 1.37, "grad_norm": 0.5367501974105835, "learning_rate": 0.00034030049323555833, "loss": 3.0456, "step": 27982 }, { "epoch": 1.37, "grad_norm": 0.5916096568107605, "learning_rate": 0.00034028523654347926, "loss": 3.2683, "step": 27983 }, { "epoch": 1.37, "grad_norm": 0.5602060556411743, "learning_rate": 0.00034026997974529664, "loss": 3.2333, "step": 27984 }, { "epoch": 1.37, "grad_norm": 0.542546272277832, "learning_rate": 0.00034025472284105065, "loss": 2.9991, "step": 27985 }, { "epoch": 1.37, "grad_norm": 0.5879301428794861, "learning_rate": 0.0003402394658307814, "loss": 2.8912, "step": 27986 }, { "epoch": 1.37, "grad_norm": 0.5856759548187256, "learning_rate": 0.00034022420871452925, "loss": 2.9014, "step": 27987 }, { "epoch": 1.37, "grad_norm": 0.5833997130393982, "learning_rate": 0.0003402089514923342, "loss": 2.9192, "step": 27988 }, { "epoch": 1.37, "grad_norm": 0.5771424770355225, "learning_rate": 0.0003401936941642366, "loss": 3.1536, "step": 27989 }, { "epoch": 1.37, "grad_norm": 0.5892386436462402, "learning_rate": 0.00034017843673027646, "loss": 2.9945, "step": 27990 }, { "epoch": 1.37, "grad_norm": 0.6291250586509705, "learning_rate": 0.00034016317919049403, "loss": 2.9909, "step": 27991 }, { "epoch": 1.37, "grad_norm": 0.5596926212310791, "learning_rate": 0.00034014792154492957, "loss": 3.1103, "step": 27992 }, { "epoch": 1.37, "grad_norm": 0.5595067739486694, "learning_rate": 0.00034013266379362325, "loss": 3.0245, "step": 27993 }, { "epoch": 1.37, "grad_norm": 0.5994234085083008, "learning_rate": 0.00034011740593661513, "loss": 3.1417, "step": 27994 }, { "epoch": 1.37, "grad_norm": 0.578930675983429, "learning_rate": 0.00034010214797394555, "loss": 3.1635, "step": 27995 }, { "epoch": 1.37, "grad_norm": 0.6194568872451782, "learning_rate": 0.00034008688990565456, "loss": 2.962, "step": 27996 }, { "epoch": 1.37, "grad_norm": 0.5864658951759338, "learning_rate": 0.00034007163173178245, "loss": 3.2179, "step": 27997 }, { "epoch": 1.37, "grad_norm": 0.5644214153289795, "learning_rate": 0.0003400563734523694, "loss": 2.9034, "step": 27998 }, { "epoch": 1.37, "grad_norm": 0.5568000674247742, "learning_rate": 0.00034004111506745553, "loss": 3.0852, "step": 27999 }, { "epoch": 1.37, "grad_norm": 0.5540627241134644, "learning_rate": 0.0003400258565770811, "loss": 3.0086, "step": 28000 }, { "epoch": 1.37, "grad_norm": 0.5743499994277954, "learning_rate": 0.0003400105979812862, "loss": 2.6773, "step": 28001 }, { "epoch": 1.37, "grad_norm": 0.5981676578521729, "learning_rate": 0.0003399953392801111, "loss": 3.0462, "step": 28002 }, { "epoch": 1.37, "grad_norm": 0.5800593495368958, "learning_rate": 0.00033998008047359603, "loss": 3.116, "step": 28003 }, { "epoch": 1.37, "grad_norm": 0.590241014957428, "learning_rate": 0.0003399648215617812, "loss": 3.228, "step": 28004 }, { "epoch": 1.37, "grad_norm": 0.5530853271484375, "learning_rate": 0.0003399495625447066, "loss": 3.315, "step": 28005 }, { "epoch": 1.37, "grad_norm": 0.5522137880325317, "learning_rate": 0.00033993430342241254, "loss": 3.0778, "step": 28006 }, { "epoch": 1.37, "grad_norm": 0.5803789496421814, "learning_rate": 0.00033991904419493926, "loss": 3.1161, "step": 28007 }, { "epoch": 1.37, "grad_norm": 0.5703111886978149, "learning_rate": 0.00033990378486232684, "loss": 3.0318, "step": 28008 }, { "epoch": 1.37, "grad_norm": 0.5972906351089478, "learning_rate": 0.0003398885254246156, "loss": 3.0836, "step": 28009 }, { "epoch": 1.37, "grad_norm": 0.6131359338760376, "learning_rate": 0.00033987326588184565, "loss": 3.1311, "step": 28010 }, { "epoch": 1.37, "grad_norm": 0.5489694476127625, "learning_rate": 0.00033985800623405717, "loss": 3.1116, "step": 28011 }, { "epoch": 1.37, "grad_norm": 0.552985668182373, "learning_rate": 0.0003398427464812904, "loss": 3.0374, "step": 28012 }, { "epoch": 1.37, "grad_norm": 0.581065833568573, "learning_rate": 0.00033982748662358546, "loss": 2.8298, "step": 28013 }, { "epoch": 1.37, "grad_norm": 0.5935637354850769, "learning_rate": 0.00033981222666098256, "loss": 3.1071, "step": 28014 }, { "epoch": 1.37, "grad_norm": 0.5556374788284302, "learning_rate": 0.00033979696659352203, "loss": 3.204, "step": 28015 }, { "epoch": 1.37, "grad_norm": 0.5717475414276123, "learning_rate": 0.00033978170642124387, "loss": 2.7786, "step": 28016 }, { "epoch": 1.37, "grad_norm": 0.5912850499153137, "learning_rate": 0.0003397664461441883, "loss": 3.1273, "step": 28017 }, { "epoch": 1.37, "grad_norm": 0.5778431296348572, "learning_rate": 0.00033975118576239565, "loss": 2.9896, "step": 28018 }, { "epoch": 1.37, "grad_norm": 0.5879780054092407, "learning_rate": 0.000339735925275906, "loss": 3.0575, "step": 28019 }, { "epoch": 1.37, "grad_norm": 0.5398730635643005, "learning_rate": 0.00033972066468475955, "loss": 3.0526, "step": 28020 }, { "epoch": 1.37, "grad_norm": 0.6177681088447571, "learning_rate": 0.00033970540398899654, "loss": 3.0964, "step": 28021 }, { "epoch": 1.37, "grad_norm": 0.5534880757331848, "learning_rate": 0.00033969014318865713, "loss": 3.0763, "step": 28022 }, { "epoch": 1.37, "grad_norm": 0.5374436378479004, "learning_rate": 0.0003396748822837815, "loss": 3.2786, "step": 28023 }, { "epoch": 1.37, "grad_norm": 0.5656604170799255, "learning_rate": 0.0003396596212744099, "loss": 3.1471, "step": 28024 }, { "epoch": 1.37, "grad_norm": 0.5550830960273743, "learning_rate": 0.0003396443601605825, "loss": 3.0216, "step": 28025 }, { "epoch": 1.37, "grad_norm": 0.5797797441482544, "learning_rate": 0.0003396290989423395, "loss": 2.9703, "step": 28026 }, { "epoch": 1.37, "grad_norm": 0.5749794244766235, "learning_rate": 0.000339613837619721, "loss": 3.1812, "step": 28027 }, { "epoch": 1.37, "grad_norm": 0.6089553833007812, "learning_rate": 0.0003395985761927673, "loss": 3.0538, "step": 28028 }, { "epoch": 1.37, "grad_norm": 0.5771617889404297, "learning_rate": 0.00033958331466151865, "loss": 3.1415, "step": 28029 }, { "epoch": 1.37, "grad_norm": 0.602817952632904, "learning_rate": 0.0003395680530260151, "loss": 3.1378, "step": 28030 }, { "epoch": 1.37, "grad_norm": 0.597654402256012, "learning_rate": 0.000339552791286297, "loss": 3.1961, "step": 28031 }, { "epoch": 1.37, "grad_norm": 0.6101335287094116, "learning_rate": 0.0003395375294424043, "loss": 2.981, "step": 28032 }, { "epoch": 1.37, "grad_norm": 0.5998036861419678, "learning_rate": 0.0003395222674943775, "loss": 3.0004, "step": 28033 }, { "epoch": 1.37, "grad_norm": 0.9481212496757507, "learning_rate": 0.0003395070054422566, "loss": 3.0037, "step": 28034 }, { "epoch": 1.37, "grad_norm": 0.5774849653244019, "learning_rate": 0.00033949174328608175, "loss": 3.4101, "step": 28035 }, { "epoch": 1.37, "grad_norm": 0.608545184135437, "learning_rate": 0.0003394764810258934, "loss": 3.1483, "step": 28036 }, { "epoch": 1.37, "grad_norm": 0.5682400465011597, "learning_rate": 0.00033946121866173146, "loss": 3.0725, "step": 28037 }, { "epoch": 1.37, "grad_norm": 0.634087085723877, "learning_rate": 0.0003394459561936364, "loss": 3.0956, "step": 28038 }, { "epoch": 1.37, "grad_norm": 0.5982432961463928, "learning_rate": 0.00033943069362164823, "loss": 3.0107, "step": 28039 }, { "epoch": 1.37, "grad_norm": 0.5551794171333313, "learning_rate": 0.0003394154309458072, "loss": 2.9166, "step": 28040 }, { "epoch": 1.37, "grad_norm": 0.594632625579834, "learning_rate": 0.00033940016816615354, "loss": 2.9562, "step": 28041 }, { "epoch": 1.37, "grad_norm": 0.5509760975837708, "learning_rate": 0.0003393849052827273, "loss": 3.1103, "step": 28042 }, { "epoch": 1.37, "grad_norm": 0.5657804012298584, "learning_rate": 0.0003393696422955689, "loss": 3.1675, "step": 28043 }, { "epoch": 1.37, "grad_norm": 0.5348253846168518, "learning_rate": 0.0003393543792047185, "loss": 3.0305, "step": 28044 }, { "epoch": 1.37, "grad_norm": 0.572263240814209, "learning_rate": 0.0003393391160102161, "loss": 3.1031, "step": 28045 }, { "epoch": 1.37, "grad_norm": 0.581788182258606, "learning_rate": 0.00033932385271210214, "loss": 2.9799, "step": 28046 }, { "epoch": 1.37, "grad_norm": 0.5556021928787231, "learning_rate": 0.0003393085893104167, "loss": 3.1716, "step": 28047 }, { "epoch": 1.37, "grad_norm": 0.5477098226547241, "learning_rate": 0.00033929332580519994, "loss": 3.0578, "step": 28048 }, { "epoch": 1.37, "grad_norm": 0.6444635987281799, "learning_rate": 0.0003392780621964922, "loss": 3.1512, "step": 28049 }, { "epoch": 1.37, "grad_norm": 0.5834984183311462, "learning_rate": 0.0003392627984843335, "loss": 3.3345, "step": 28050 }, { "epoch": 1.37, "grad_norm": 0.5475103259086609, "learning_rate": 0.00033924753466876427, "loss": 3.094, "step": 28051 }, { "epoch": 1.37, "grad_norm": 0.5777970552444458, "learning_rate": 0.0003392322707498245, "loss": 3.1841, "step": 28052 }, { "epoch": 1.37, "grad_norm": 0.5672211647033691, "learning_rate": 0.00033921700672755455, "loss": 3.0541, "step": 28053 }, { "epoch": 1.37, "grad_norm": 0.7378207445144653, "learning_rate": 0.0003392017426019945, "loss": 3.0522, "step": 28054 }, { "epoch": 1.37, "grad_norm": 0.5990445017814636, "learning_rate": 0.0003391864783731846, "loss": 3.0336, "step": 28055 }, { "epoch": 1.37, "grad_norm": 0.5967203974723816, "learning_rate": 0.00033917121404116515, "loss": 2.9228, "step": 28056 }, { "epoch": 1.38, "grad_norm": 0.5567073225975037, "learning_rate": 0.00033915594960597616, "loss": 3.0333, "step": 28057 }, { "epoch": 1.38, "grad_norm": 0.5595899820327759, "learning_rate": 0.0003391406850676579, "loss": 3.093, "step": 28058 }, { "epoch": 1.38, "grad_norm": 0.5615085363388062, "learning_rate": 0.0003391254204262507, "loss": 2.9224, "step": 28059 }, { "epoch": 1.38, "grad_norm": 0.5954691767692566, "learning_rate": 0.00033911015568179466, "loss": 2.9878, "step": 28060 }, { "epoch": 1.38, "grad_norm": 0.557086169719696, "learning_rate": 0.00033909489083433, "loss": 3.1065, "step": 28061 }, { "epoch": 1.38, "grad_norm": 0.5938393473625183, "learning_rate": 0.0003390796258838969, "loss": 2.859, "step": 28062 }, { "epoch": 1.38, "grad_norm": 0.5960376262664795, "learning_rate": 0.00033906436083053557, "loss": 3.1894, "step": 28063 }, { "epoch": 1.38, "grad_norm": 0.5832287669181824, "learning_rate": 0.0003390490956742862, "loss": 2.9972, "step": 28064 }, { "epoch": 1.38, "grad_norm": 0.8595984578132629, "learning_rate": 0.0003390338304151891, "loss": 3.2039, "step": 28065 }, { "epoch": 1.38, "grad_norm": 0.5543466806411743, "learning_rate": 0.0003390185650532844, "loss": 3.0077, "step": 28066 }, { "epoch": 1.38, "grad_norm": 0.6111711263656616, "learning_rate": 0.00033900329958861227, "loss": 2.9772, "step": 28067 }, { "epoch": 1.38, "grad_norm": 0.5975467562675476, "learning_rate": 0.0003389880340212129, "loss": 3.1056, "step": 28068 }, { "epoch": 1.38, "grad_norm": 0.5642098784446716, "learning_rate": 0.00033897276835112675, "loss": 2.9942, "step": 28069 }, { "epoch": 1.38, "grad_norm": 0.5549335479736328, "learning_rate": 0.0003389575025783937, "loss": 3.0474, "step": 28070 }, { "epoch": 1.38, "grad_norm": 0.6323215961456299, "learning_rate": 0.00033894223670305403, "loss": 2.9807, "step": 28071 }, { "epoch": 1.38, "grad_norm": 0.5654608607292175, "learning_rate": 0.000338926970725148, "loss": 3.0737, "step": 28072 }, { "epoch": 1.38, "grad_norm": 0.574693500995636, "learning_rate": 0.00033891170464471596, "loss": 2.8669, "step": 28073 }, { "epoch": 1.38, "grad_norm": 0.5713275671005249, "learning_rate": 0.00033889643846179784, "loss": 3.2703, "step": 28074 }, { "epoch": 1.38, "grad_norm": 0.6364814639091492, "learning_rate": 0.00033888117217643405, "loss": 3.1299, "step": 28075 }, { "epoch": 1.38, "grad_norm": 0.5909932255744934, "learning_rate": 0.0003388659057886648, "loss": 3.0662, "step": 28076 }, { "epoch": 1.38, "grad_norm": 0.5821861624717712, "learning_rate": 0.0003388506392985302, "loss": 3.0567, "step": 28077 }, { "epoch": 1.38, "grad_norm": 0.5835837721824646, "learning_rate": 0.0003388353727060704, "loss": 3.1389, "step": 28078 }, { "epoch": 1.38, "grad_norm": 0.5665825605392456, "learning_rate": 0.00033882010601132573, "loss": 3.3765, "step": 28079 }, { "epoch": 1.38, "grad_norm": 0.5594505071640015, "learning_rate": 0.0003388048392143365, "loss": 3.0203, "step": 28080 }, { "epoch": 1.38, "grad_norm": 0.5456429123878479, "learning_rate": 0.0003387895723151427, "loss": 3.107, "step": 28081 }, { "epoch": 1.38, "grad_norm": 0.571052610874176, "learning_rate": 0.0003387743053137846, "loss": 3.1842, "step": 28082 }, { "epoch": 1.38, "grad_norm": 0.6176466941833496, "learning_rate": 0.0003387590382103025, "loss": 2.9045, "step": 28083 }, { "epoch": 1.38, "grad_norm": 0.5826286673545837, "learning_rate": 0.00033874377100473653, "loss": 2.9922, "step": 28084 }, { "epoch": 1.38, "grad_norm": 0.5976090431213379, "learning_rate": 0.0003387285036971268, "loss": 2.9568, "step": 28085 }, { "epoch": 1.38, "grad_norm": 0.5784058570861816, "learning_rate": 0.0003387132362875138, "loss": 2.9194, "step": 28086 }, { "epoch": 1.38, "grad_norm": 0.5768011212348938, "learning_rate": 0.00033869796877593756, "loss": 3.0978, "step": 28087 }, { "epoch": 1.38, "grad_norm": 0.563029944896698, "learning_rate": 0.0003386827011624383, "loss": 3.0959, "step": 28088 }, { "epoch": 1.38, "grad_norm": 0.5373599529266357, "learning_rate": 0.00033866743344705626, "loss": 3.0315, "step": 28089 }, { "epoch": 1.38, "grad_norm": 0.5780729055404663, "learning_rate": 0.0003386521656298316, "loss": 3.013, "step": 28090 }, { "epoch": 1.38, "grad_norm": 0.5696938037872314, "learning_rate": 0.0003386368977108047, "loss": 2.8036, "step": 28091 }, { "epoch": 1.38, "grad_norm": 0.55682373046875, "learning_rate": 0.0003386216296900155, "loss": 3.0036, "step": 28092 }, { "epoch": 1.38, "grad_norm": 0.6155198812484741, "learning_rate": 0.0003386063615675045, "loss": 2.979, "step": 28093 }, { "epoch": 1.38, "grad_norm": 0.5788460373878479, "learning_rate": 0.00033859109334331166, "loss": 3.1754, "step": 28094 }, { "epoch": 1.38, "grad_norm": 0.6345138549804688, "learning_rate": 0.0003385758250174774, "loss": 3.0151, "step": 28095 }, { "epoch": 1.38, "grad_norm": 0.5743170380592346, "learning_rate": 0.0003385605565900417, "loss": 3.0853, "step": 28096 }, { "epoch": 1.38, "grad_norm": 0.6293991208076477, "learning_rate": 0.00033854528806104495, "loss": 2.999, "step": 28097 }, { "epoch": 1.38, "grad_norm": 0.6683945655822754, "learning_rate": 0.00033853001943052736, "loss": 3.2133, "step": 28098 }, { "epoch": 1.38, "grad_norm": 0.5892733335494995, "learning_rate": 0.00033851475069852916, "loss": 2.9365, "step": 28099 }, { "epoch": 1.38, "grad_norm": 0.5764167904853821, "learning_rate": 0.0003384994818650905, "loss": 3.0822, "step": 28100 }, { "epoch": 1.38, "grad_norm": 0.5932957530021667, "learning_rate": 0.00033848421293025156, "loss": 3.2123, "step": 28101 }, { "epoch": 1.38, "grad_norm": 0.5980968475341797, "learning_rate": 0.0003384689438940528, "loss": 2.9073, "step": 28102 }, { "epoch": 1.38, "grad_norm": 0.5964227318763733, "learning_rate": 0.0003384536747565341, "loss": 2.7695, "step": 28103 }, { "epoch": 1.38, "grad_norm": 0.5735576152801514, "learning_rate": 0.00033843840551773564, "loss": 3.0268, "step": 28104 }, { "epoch": 1.38, "grad_norm": 0.6148794889450073, "learning_rate": 0.000338423136177698, "loss": 2.8976, "step": 28105 }, { "epoch": 1.38, "grad_norm": 0.6786152124404907, "learning_rate": 0.00033840786673646134, "loss": 3.1084, "step": 28106 }, { "epoch": 1.38, "grad_norm": 0.748644232749939, "learning_rate": 0.00033839259719406554, "loss": 3.0318, "step": 28107 }, { "epoch": 1.38, "grad_norm": 0.5767213106155396, "learning_rate": 0.00033837732755055114, "loss": 2.9741, "step": 28108 }, { "epoch": 1.38, "grad_norm": 0.6026296019554138, "learning_rate": 0.0003383620578059582, "loss": 3.0951, "step": 28109 }, { "epoch": 1.38, "grad_norm": 0.5640915036201477, "learning_rate": 0.000338346787960327, "loss": 3.0876, "step": 28110 }, { "epoch": 1.38, "grad_norm": 0.5978099703788757, "learning_rate": 0.00033833151801369773, "loss": 3.0631, "step": 28111 }, { "epoch": 1.38, "grad_norm": 0.5457133054733276, "learning_rate": 0.0003383162479661106, "loss": 3.207, "step": 28112 }, { "epoch": 1.38, "grad_norm": 0.5326697826385498, "learning_rate": 0.00033830097781760595, "loss": 3.086, "step": 28113 }, { "epoch": 1.38, "grad_norm": 0.5941699147224426, "learning_rate": 0.0003382857075682237, "loss": 3.1537, "step": 28114 }, { "epoch": 1.38, "grad_norm": 0.5777954459190369, "learning_rate": 0.0003382704372180044, "loss": 3.0681, "step": 28115 }, { "epoch": 1.38, "grad_norm": 0.5638835430145264, "learning_rate": 0.00033825516676698816, "loss": 3.0887, "step": 28116 }, { "epoch": 1.38, "grad_norm": 0.5932229161262512, "learning_rate": 0.0003382398962152152, "loss": 2.9348, "step": 28117 }, { "epoch": 1.38, "grad_norm": 0.566215991973877, "learning_rate": 0.0003382246255627256, "loss": 3.0488, "step": 28118 }, { "epoch": 1.38, "grad_norm": 0.5923061370849609, "learning_rate": 0.0003382093548095597, "loss": 3.053, "step": 28119 }, { "epoch": 1.38, "grad_norm": 0.5887033939361572, "learning_rate": 0.0003381940839557578, "loss": 3.1721, "step": 28120 }, { "epoch": 1.38, "grad_norm": 0.6126934289932251, "learning_rate": 0.00033817881300136, "loss": 3.0277, "step": 28121 }, { "epoch": 1.38, "grad_norm": 0.5494149923324585, "learning_rate": 0.0003381635419464065, "loss": 3.209, "step": 28122 }, { "epoch": 1.38, "grad_norm": 0.5944859385490417, "learning_rate": 0.00033814827079093755, "loss": 3.0693, "step": 28123 }, { "epoch": 1.38, "grad_norm": 0.5820398926734924, "learning_rate": 0.0003381329995349935, "loss": 3.2692, "step": 28124 }, { "epoch": 1.38, "grad_norm": 0.580773651599884, "learning_rate": 0.00033811772817861444, "loss": 3.0431, "step": 28125 }, { "epoch": 1.38, "grad_norm": 0.5761692523956299, "learning_rate": 0.00033810245672184053, "loss": 3.0927, "step": 28126 }, { "epoch": 1.38, "grad_norm": 0.5915241837501526, "learning_rate": 0.00033808718516471217, "loss": 3.1483, "step": 28127 }, { "epoch": 1.38, "grad_norm": 0.5700193047523499, "learning_rate": 0.00033807191350726957, "loss": 3.0007, "step": 28128 }, { "epoch": 1.38, "grad_norm": 0.596190333366394, "learning_rate": 0.00033805664174955274, "loss": 2.9628, "step": 28129 }, { "epoch": 1.38, "grad_norm": 0.5701046586036682, "learning_rate": 0.0003380413698916021, "loss": 3.1288, "step": 28130 }, { "epoch": 1.38, "grad_norm": 0.5389936566352844, "learning_rate": 0.00033802609793345784, "loss": 3.0459, "step": 28131 }, { "epoch": 1.38, "grad_norm": 0.5747068524360657, "learning_rate": 0.0003380108258751601, "loss": 2.9633, "step": 28132 }, { "epoch": 1.38, "grad_norm": 0.606648862361908, "learning_rate": 0.00033799555371674916, "loss": 2.9476, "step": 28133 }, { "epoch": 1.38, "grad_norm": 0.6043571829795837, "learning_rate": 0.00033798028145826526, "loss": 3.1849, "step": 28134 }, { "epoch": 1.38, "grad_norm": 0.5670210719108582, "learning_rate": 0.0003379650090997486, "loss": 2.9245, "step": 28135 }, { "epoch": 1.38, "grad_norm": 0.5730489492416382, "learning_rate": 0.00033794973664123937, "loss": 3.2632, "step": 28136 }, { "epoch": 1.38, "grad_norm": 0.6139273643493652, "learning_rate": 0.000337934464082778, "loss": 2.8662, "step": 28137 }, { "epoch": 1.38, "grad_norm": 0.5544430613517761, "learning_rate": 0.0003379191914244044, "loss": 2.9654, "step": 28138 }, { "epoch": 1.38, "grad_norm": 0.5451558232307434, "learning_rate": 0.000337903918666159, "loss": 3.077, "step": 28139 }, { "epoch": 1.38, "grad_norm": 0.5961140990257263, "learning_rate": 0.0003378886458080819, "loss": 2.9309, "step": 28140 }, { "epoch": 1.38, "grad_norm": 0.5541216135025024, "learning_rate": 0.00033787337285021347, "loss": 3.2097, "step": 28141 }, { "epoch": 1.38, "grad_norm": 0.6020674705505371, "learning_rate": 0.0003378580997925939, "loss": 3.0919, "step": 28142 }, { "epoch": 1.38, "grad_norm": 0.5713543891906738, "learning_rate": 0.0003378428266352633, "loss": 2.98, "step": 28143 }, { "epoch": 1.38, "grad_norm": 0.5623284578323364, "learning_rate": 0.00033782755337826207, "loss": 3.1572, "step": 28144 }, { "epoch": 1.38, "grad_norm": 0.616407036781311, "learning_rate": 0.00033781228002163023, "loss": 3.0148, "step": 28145 }, { "epoch": 1.38, "grad_norm": 0.6024094820022583, "learning_rate": 0.0003377970065654082, "loss": 3.2478, "step": 28146 }, { "epoch": 1.38, "grad_norm": 0.5678855180740356, "learning_rate": 0.0003377817330096361, "loss": 3.0946, "step": 28147 }, { "epoch": 1.38, "grad_norm": 0.5765829682350159, "learning_rate": 0.00033776645935435423, "loss": 3.0297, "step": 28148 }, { "epoch": 1.38, "grad_norm": 0.5960372090339661, "learning_rate": 0.0003377511855996027, "loss": 3.2084, "step": 28149 }, { "epoch": 1.38, "grad_norm": 0.5560441017150879, "learning_rate": 0.00033773591174542187, "loss": 3.2332, "step": 28150 }, { "epoch": 1.38, "grad_norm": 0.5740705132484436, "learning_rate": 0.0003377206377918519, "loss": 3.1825, "step": 28151 }, { "epoch": 1.38, "grad_norm": 0.5770044922828674, "learning_rate": 0.00033770536373893296, "loss": 2.9729, "step": 28152 }, { "epoch": 1.38, "grad_norm": 0.5786614418029785, "learning_rate": 0.0003376900895867055, "loss": 2.9429, "step": 28153 }, { "epoch": 1.38, "grad_norm": 0.5744767189025879, "learning_rate": 0.00033767481533520955, "loss": 3.047, "step": 28154 }, { "epoch": 1.38, "grad_norm": 0.5745210647583008, "learning_rate": 0.0003376595409844852, "loss": 3.0093, "step": 28155 }, { "epoch": 1.38, "grad_norm": 0.5670796632766724, "learning_rate": 0.000337644266534573, "loss": 3.3227, "step": 28156 }, { "epoch": 1.38, "grad_norm": 0.5535241365432739, "learning_rate": 0.00033762899198551313, "loss": 2.8861, "step": 28157 }, { "epoch": 1.38, "grad_norm": 0.5871549248695374, "learning_rate": 0.00033761371733734573, "loss": 3.0057, "step": 28158 }, { "epoch": 1.38, "grad_norm": 0.5588001012802124, "learning_rate": 0.0003375984425901109, "loss": 3.0635, "step": 28159 }, { "epoch": 1.38, "grad_norm": 0.5769440531730652, "learning_rate": 0.00033758316774384905, "loss": 3.0683, "step": 28160 }, { "epoch": 1.38, "grad_norm": 0.5671083331108093, "learning_rate": 0.00033756789279860045, "loss": 3.2814, "step": 28161 }, { "epoch": 1.38, "grad_norm": 0.5821115970611572, "learning_rate": 0.00033755261775440516, "loss": 3.0085, "step": 28162 }, { "epoch": 1.38, "grad_norm": 0.5854321122169495, "learning_rate": 0.00033753734261130354, "loss": 3.0935, "step": 28163 }, { "epoch": 1.38, "grad_norm": 0.6202100515365601, "learning_rate": 0.0003375220673693359, "loss": 2.9791, "step": 28164 }, { "epoch": 1.38, "grad_norm": 0.5914233326911926, "learning_rate": 0.00033750679202854215, "loss": 3.1086, "step": 28165 }, { "epoch": 1.38, "grad_norm": 0.6070291996002197, "learning_rate": 0.0003374915165889628, "loss": 2.9375, "step": 28166 }, { "epoch": 1.38, "grad_norm": 0.5866225361824036, "learning_rate": 0.000337476241050638, "loss": 2.9277, "step": 28167 }, { "epoch": 1.38, "grad_norm": 0.5692453980445862, "learning_rate": 0.0003374609654136081, "loss": 3.2167, "step": 28168 }, { "epoch": 1.38, "grad_norm": 0.5685532689094543, "learning_rate": 0.0003374456896779132, "loss": 3.0796, "step": 28169 }, { "epoch": 1.38, "grad_norm": 0.5635455846786499, "learning_rate": 0.00033743041384359335, "loss": 2.8701, "step": 28170 }, { "epoch": 1.38, "grad_norm": 0.6098818182945251, "learning_rate": 0.0003374151379106893, "loss": 2.901, "step": 28171 }, { "epoch": 1.38, "grad_norm": 0.579856812953949, "learning_rate": 0.00033739986187924083, "loss": 3.2155, "step": 28172 }, { "epoch": 1.38, "grad_norm": 0.596610963344574, "learning_rate": 0.0003373845857492883, "loss": 2.9086, "step": 28173 }, { "epoch": 1.38, "grad_norm": 0.5602899789810181, "learning_rate": 0.000337369309520872, "loss": 3.1105, "step": 28174 }, { "epoch": 1.38, "grad_norm": 0.5983844995498657, "learning_rate": 0.00033735403319403215, "loss": 3.0502, "step": 28175 }, { "epoch": 1.38, "grad_norm": 0.5782126784324646, "learning_rate": 0.0003373387567688089, "loss": 3.148, "step": 28176 }, { "epoch": 1.38, "grad_norm": 0.5897262692451477, "learning_rate": 0.0003373234802452425, "loss": 2.9129, "step": 28177 }, { "epoch": 1.38, "grad_norm": 0.5535157918930054, "learning_rate": 0.00033730820362337335, "loss": 3.263, "step": 28178 }, { "epoch": 1.38, "grad_norm": 0.5690072178840637, "learning_rate": 0.00033729292690324166, "loss": 3.0606, "step": 28179 }, { "epoch": 1.38, "grad_norm": 0.5716756582260132, "learning_rate": 0.00033727765008488743, "loss": 3.0934, "step": 28180 }, { "epoch": 1.38, "grad_norm": 0.6298738718032837, "learning_rate": 0.00033726237316835094, "loss": 2.8377, "step": 28181 }, { "epoch": 1.38, "grad_norm": 0.5696219205856323, "learning_rate": 0.0003372470961536728, "loss": 3.2661, "step": 28182 }, { "epoch": 1.38, "grad_norm": 0.5914949178695679, "learning_rate": 0.00033723181904089284, "loss": 3.0636, "step": 28183 }, { "epoch": 1.38, "grad_norm": 0.5469275116920471, "learning_rate": 0.0003372165418300514, "loss": 3.2017, "step": 28184 }, { "epoch": 1.38, "grad_norm": 0.543024480342865, "learning_rate": 0.00033720126452118883, "loss": 2.9891, "step": 28185 }, { "epoch": 1.38, "grad_norm": 0.5579934120178223, "learning_rate": 0.0003371859871143452, "loss": 2.9713, "step": 28186 }, { "epoch": 1.38, "grad_norm": 0.5843299627304077, "learning_rate": 0.0003371707096095609, "loss": 2.8568, "step": 28187 }, { "epoch": 1.38, "grad_norm": 0.5556427240371704, "learning_rate": 0.000337155432006876, "loss": 3.2779, "step": 28188 }, { "epoch": 1.38, "grad_norm": 0.5713661313056946, "learning_rate": 0.000337140154306331, "loss": 2.8248, "step": 28189 }, { "epoch": 1.38, "grad_norm": 0.5614469647407532, "learning_rate": 0.000337124876507966, "loss": 3.1321, "step": 28190 }, { "epoch": 1.38, "grad_norm": 0.5511723160743713, "learning_rate": 0.00033710959861182107, "loss": 3.0381, "step": 28191 }, { "epoch": 1.38, "grad_norm": 0.5804082751274109, "learning_rate": 0.00033709432061793663, "loss": 3.0038, "step": 28192 }, { "epoch": 1.38, "grad_norm": 0.5728508234024048, "learning_rate": 0.000337079042526353, "loss": 3.1805, "step": 28193 }, { "epoch": 1.38, "grad_norm": 0.5317544341087341, "learning_rate": 0.0003370637643371102, "loss": 3.1494, "step": 28194 }, { "epoch": 1.38, "grad_norm": 0.5826964378356934, "learning_rate": 0.0003370484860502486, "loss": 3.038, "step": 28195 }, { "epoch": 1.38, "grad_norm": 0.5762989521026611, "learning_rate": 0.0003370332076658084, "loss": 3.0305, "step": 28196 }, { "epoch": 1.38, "grad_norm": 0.55995774269104, "learning_rate": 0.00033701792918382987, "loss": 3.0058, "step": 28197 }, { "epoch": 1.38, "grad_norm": 0.6025938987731934, "learning_rate": 0.0003370026506043533, "loss": 3.2507, "step": 28198 }, { "epoch": 1.38, "grad_norm": 0.5528343915939331, "learning_rate": 0.0003369873719274188, "loss": 3.0487, "step": 28199 }, { "epoch": 1.38, "grad_norm": 0.5329409241676331, "learning_rate": 0.0003369720931530667, "loss": 2.9969, "step": 28200 }, { "epoch": 1.38, "grad_norm": 0.5734084248542786, "learning_rate": 0.00033695681428133725, "loss": 3.2161, "step": 28201 }, { "epoch": 1.38, "grad_norm": 0.562954306602478, "learning_rate": 0.00033694153531227053, "loss": 3.1746, "step": 28202 }, { "epoch": 1.38, "grad_norm": 0.5255847573280334, "learning_rate": 0.0003369262562459071, "loss": 2.9532, "step": 28203 }, { "epoch": 1.38, "grad_norm": 0.5966204404830933, "learning_rate": 0.000336910977082287, "loss": 3.195, "step": 28204 }, { "epoch": 1.38, "grad_norm": 0.549161434173584, "learning_rate": 0.00033689569782145045, "loss": 3.1496, "step": 28205 }, { "epoch": 1.38, "grad_norm": 0.5623190999031067, "learning_rate": 0.0003368804184634376, "loss": 3.1611, "step": 28206 }, { "epoch": 1.38, "grad_norm": 0.5726973414421082, "learning_rate": 0.000336865139008289, "loss": 3.0249, "step": 28207 }, { "epoch": 1.38, "grad_norm": 0.5972573161125183, "learning_rate": 0.00033684985945604465, "loss": 3.0751, "step": 28208 }, { "epoch": 1.38, "grad_norm": 0.5603774785995483, "learning_rate": 0.0003368345798067449, "loss": 3.1158, "step": 28209 }, { "epoch": 1.38, "grad_norm": 0.5560487508773804, "learning_rate": 0.0003368193000604299, "loss": 3.1079, "step": 28210 }, { "epoch": 1.38, "grad_norm": 0.5797421336174011, "learning_rate": 0.00033680402021714, "loss": 3.0904, "step": 28211 }, { "epoch": 1.38, "grad_norm": 0.5546010136604309, "learning_rate": 0.0003367887402769153, "loss": 3.1467, "step": 28212 }, { "epoch": 1.38, "grad_norm": 0.6668386459350586, "learning_rate": 0.0003367734602397962, "loss": 3.0496, "step": 28213 }, { "epoch": 1.38, "grad_norm": 0.5610592365264893, "learning_rate": 0.0003367581801058229, "loss": 3.0401, "step": 28214 }, { "epoch": 1.38, "grad_norm": 0.5730311870574951, "learning_rate": 0.00033674289987503566, "loss": 3.0605, "step": 28215 }, { "epoch": 1.38, "grad_norm": 0.5653447508811951, "learning_rate": 0.0003367276195474747, "loss": 3.0281, "step": 28216 }, { "epoch": 1.38, "grad_norm": 0.5662187933921814, "learning_rate": 0.0003367123391231801, "loss": 2.9896, "step": 28217 }, { "epoch": 1.38, "grad_norm": 0.5825369954109192, "learning_rate": 0.00033669705860219233, "loss": 3.0219, "step": 28218 }, { "epoch": 1.38, "grad_norm": 0.5653578042984009, "learning_rate": 0.0003366817779845517, "loss": 3.1838, "step": 28219 }, { "epoch": 1.38, "grad_norm": 0.5515113472938538, "learning_rate": 0.00033666649727029824, "loss": 2.9519, "step": 28220 }, { "epoch": 1.38, "grad_norm": 0.5668362975120544, "learning_rate": 0.0003366512164594723, "loss": 3.0131, "step": 28221 }, { "epoch": 1.38, "grad_norm": 0.5645515322685242, "learning_rate": 0.0003366359355521141, "loss": 3.1945, "step": 28222 }, { "epoch": 1.38, "grad_norm": 0.5728771686553955, "learning_rate": 0.0003366206545482639, "loss": 3.2453, "step": 28223 }, { "epoch": 1.38, "grad_norm": 0.5764223337173462, "learning_rate": 0.00033660537344796187, "loss": 3.0845, "step": 28224 }, { "epoch": 1.38, "grad_norm": 0.5650098323822021, "learning_rate": 0.00033659009225124836, "loss": 2.9356, "step": 28225 }, { "epoch": 1.38, "grad_norm": 0.5907217860221863, "learning_rate": 0.0003365748109581637, "loss": 3.1849, "step": 28226 }, { "epoch": 1.38, "grad_norm": 0.5731244683265686, "learning_rate": 0.0003365595295687479, "loss": 3.1806, "step": 28227 }, { "epoch": 1.38, "grad_norm": 0.5921067595481873, "learning_rate": 0.0003365442480830414, "loss": 3.1239, "step": 28228 }, { "epoch": 1.38, "grad_norm": 0.5963445901870728, "learning_rate": 0.00033652896650108435, "loss": 2.944, "step": 28229 }, { "epoch": 1.38, "grad_norm": 0.565613329410553, "learning_rate": 0.00033651368482291705, "loss": 2.9348, "step": 28230 }, { "epoch": 1.38, "grad_norm": 0.6107422709465027, "learning_rate": 0.00033649840304857977, "loss": 2.9047, "step": 28231 }, { "epoch": 1.38, "grad_norm": 0.5712419748306274, "learning_rate": 0.0003364831211781126, "loss": 3.086, "step": 28232 }, { "epoch": 1.38, "grad_norm": 0.5632501840591431, "learning_rate": 0.0003364678392115561, "loss": 3.0059, "step": 28233 }, { "epoch": 1.38, "grad_norm": 0.5802181363105774, "learning_rate": 0.0003364525571489502, "loss": 3.0808, "step": 28234 }, { "epoch": 1.38, "grad_norm": 0.5678035616874695, "learning_rate": 0.00033643727499033533, "loss": 3.0125, "step": 28235 }, { "epoch": 1.38, "grad_norm": 0.5598823428153992, "learning_rate": 0.0003364219927357516, "loss": 3.0799, "step": 28236 }, { "epoch": 1.38, "grad_norm": 0.5781440138816833, "learning_rate": 0.00033640671038523943, "loss": 3.0678, "step": 28237 }, { "epoch": 1.38, "grad_norm": 0.5706907510757446, "learning_rate": 0.000336391427938839, "loss": 3.1238, "step": 28238 }, { "epoch": 1.38, "grad_norm": 0.5403823852539062, "learning_rate": 0.0003363761453965905, "loss": 3.0808, "step": 28239 }, { "epoch": 1.38, "grad_norm": 0.5869365334510803, "learning_rate": 0.0003363608627585342, "loss": 3.202, "step": 28240 }, { "epoch": 1.38, "grad_norm": 0.590049684047699, "learning_rate": 0.00033634558002471055, "loss": 2.983, "step": 28241 }, { "epoch": 1.38, "grad_norm": 0.6182698607444763, "learning_rate": 0.00033633029719515946, "loss": 2.9491, "step": 28242 }, { "epoch": 1.38, "grad_norm": 0.5630959868431091, "learning_rate": 0.00033631501426992146, "loss": 3.1776, "step": 28243 }, { "epoch": 1.38, "grad_norm": 0.5790128111839294, "learning_rate": 0.00033629973124903666, "loss": 3.2561, "step": 28244 }, { "epoch": 1.38, "grad_norm": 0.612781286239624, "learning_rate": 0.0003362844481325454, "loss": 3.0595, "step": 28245 }, { "epoch": 1.38, "grad_norm": 0.5653966069221497, "learning_rate": 0.00033626916492048784, "loss": 3.0165, "step": 28246 }, { "epoch": 1.38, "grad_norm": 0.5676288604736328, "learning_rate": 0.0003362538816129043, "loss": 3.1517, "step": 28247 }, { "epoch": 1.38, "grad_norm": 0.5745164752006531, "learning_rate": 0.000336238598209835, "loss": 3.2458, "step": 28248 }, { "epoch": 1.38, "grad_norm": 0.624270498752594, "learning_rate": 0.0003362233147113202, "loss": 2.9842, "step": 28249 }, { "epoch": 1.38, "grad_norm": 0.5752471089363098, "learning_rate": 0.0003362080311174002, "loss": 3.08, "step": 28250 }, { "epoch": 1.38, "grad_norm": 0.6258599758148193, "learning_rate": 0.0003361927474281152, "loss": 2.9069, "step": 28251 }, { "epoch": 1.38, "grad_norm": 0.5874921083450317, "learning_rate": 0.0003361774636435055, "loss": 3.077, "step": 28252 }, { "epoch": 1.38, "grad_norm": 0.5625190734863281, "learning_rate": 0.0003361621797636112, "loss": 2.7603, "step": 28253 }, { "epoch": 1.38, "grad_norm": 0.5910776257514954, "learning_rate": 0.00033614689578847275, "loss": 2.8325, "step": 28254 }, { "epoch": 1.38, "grad_norm": 0.5543468594551086, "learning_rate": 0.0003361316117181304, "loss": 3.0568, "step": 28255 }, { "epoch": 1.38, "grad_norm": 0.6161579489707947, "learning_rate": 0.0003361163275526243, "loss": 3.1239, "step": 28256 }, { "epoch": 1.38, "grad_norm": 0.5997530817985535, "learning_rate": 0.0003361010432919947, "loss": 3.1178, "step": 28257 }, { "epoch": 1.38, "grad_norm": 0.6007872223854065, "learning_rate": 0.0003360857589362819, "loss": 2.8373, "step": 28258 }, { "epoch": 1.38, "grad_norm": 0.5524115562438965, "learning_rate": 0.00033607047448552617, "loss": 3.1908, "step": 28259 }, { "epoch": 1.38, "grad_norm": 0.5868061184883118, "learning_rate": 0.00033605518993976783, "loss": 3.091, "step": 28260 }, { "epoch": 1.39, "grad_norm": 0.5432960391044617, "learning_rate": 0.00033603990529904694, "loss": 3.1476, "step": 28261 }, { "epoch": 1.39, "grad_norm": 0.5888095498085022, "learning_rate": 0.00033602462056340387, "loss": 3.0807, "step": 28262 }, { "epoch": 1.39, "grad_norm": 0.5865523219108582, "learning_rate": 0.00033600933573287896, "loss": 2.9347, "step": 28263 }, { "epoch": 1.39, "grad_norm": 0.5949804186820984, "learning_rate": 0.0003359940508075124, "loss": 3.151, "step": 28264 }, { "epoch": 1.39, "grad_norm": 0.5891909599304199, "learning_rate": 0.0003359787657873444, "loss": 2.8826, "step": 28265 }, { "epoch": 1.39, "grad_norm": 0.5554693937301636, "learning_rate": 0.0003359634806724153, "loss": 3.0168, "step": 28266 }, { "epoch": 1.39, "grad_norm": 0.583011269569397, "learning_rate": 0.0003359481954627653, "loss": 2.8308, "step": 28267 }, { "epoch": 1.39, "grad_norm": 0.5493490695953369, "learning_rate": 0.00033593291015843456, "loss": 2.6961, "step": 28268 }, { "epoch": 1.39, "grad_norm": 0.5663343667984009, "learning_rate": 0.0003359176247594635, "loss": 3.1074, "step": 28269 }, { "epoch": 1.39, "grad_norm": 0.6228228807449341, "learning_rate": 0.00033590233926589246, "loss": 3.2481, "step": 28270 }, { "epoch": 1.39, "grad_norm": 0.6101515293121338, "learning_rate": 0.00033588705367776145, "loss": 2.8834, "step": 28271 }, { "epoch": 1.39, "grad_norm": 0.5485780239105225, "learning_rate": 0.0003358717679951109, "loss": 3.0186, "step": 28272 }, { "epoch": 1.39, "grad_norm": 0.5793207287788391, "learning_rate": 0.0003358564822179809, "loss": 3.0909, "step": 28273 }, { "epoch": 1.39, "grad_norm": 0.6276209354400635, "learning_rate": 0.000335841196346412, "loss": 3.0662, "step": 28274 }, { "epoch": 1.39, "grad_norm": 0.5886393785476685, "learning_rate": 0.00033582591038044414, "loss": 3.1548, "step": 28275 }, { "epoch": 1.39, "grad_norm": 0.5739596486091614, "learning_rate": 0.0003358106243201178, "loss": 2.8666, "step": 28276 }, { "epoch": 1.39, "grad_norm": 0.583791196346283, "learning_rate": 0.0003357953381654732, "loss": 3.0231, "step": 28277 }, { "epoch": 1.39, "grad_norm": 0.584082305431366, "learning_rate": 0.00033578005191655047, "loss": 3.1848, "step": 28278 }, { "epoch": 1.39, "grad_norm": 0.5657137632369995, "learning_rate": 0.00033576476557339, "loss": 2.9686, "step": 28279 }, { "epoch": 1.39, "grad_norm": 0.5658915042877197, "learning_rate": 0.00033574947913603205, "loss": 3.1709, "step": 28280 }, { "epoch": 1.39, "grad_norm": 0.5720155835151672, "learning_rate": 0.00033573419260451694, "loss": 3.1229, "step": 28281 }, { "epoch": 1.39, "grad_norm": 0.5654585361480713, "learning_rate": 0.00033571890597888473, "loss": 2.9455, "step": 28282 }, { "epoch": 1.39, "grad_norm": 0.5792554616928101, "learning_rate": 0.00033570361925917575, "loss": 3.1386, "step": 28283 }, { "epoch": 1.39, "grad_norm": 0.5770618915557861, "learning_rate": 0.0003356883324454304, "loss": 3.1544, "step": 28284 }, { "epoch": 1.39, "grad_norm": 0.5782315731048584, "learning_rate": 0.00033567304553768884, "loss": 3.1645, "step": 28285 }, { "epoch": 1.39, "grad_norm": 0.570188581943512, "learning_rate": 0.0003356577585359913, "loss": 3.1487, "step": 28286 }, { "epoch": 1.39, "grad_norm": 0.5855571031570435, "learning_rate": 0.00033564247144037815, "loss": 2.8481, "step": 28287 }, { "epoch": 1.39, "grad_norm": 0.5529902577400208, "learning_rate": 0.0003356271842508896, "loss": 2.9648, "step": 28288 }, { "epoch": 1.39, "grad_norm": 0.5426622629165649, "learning_rate": 0.00033561189696756574, "loss": 3.077, "step": 28289 }, { "epoch": 1.39, "grad_norm": 0.5838942527770996, "learning_rate": 0.00033559660959044715, "loss": 3.0563, "step": 28290 }, { "epoch": 1.39, "grad_norm": 0.6251130700111389, "learning_rate": 0.00033558132211957385, "loss": 2.962, "step": 28291 }, { "epoch": 1.39, "grad_norm": 0.5622515678405762, "learning_rate": 0.0003355660345549863, "loss": 3.0919, "step": 28292 }, { "epoch": 1.39, "grad_norm": 0.5667559504508972, "learning_rate": 0.00033555074689672464, "loss": 2.9387, "step": 28293 }, { "epoch": 1.39, "grad_norm": 0.5650799870491028, "learning_rate": 0.00033553545914482907, "loss": 3.1512, "step": 28294 }, { "epoch": 1.39, "grad_norm": 0.5878683924674988, "learning_rate": 0.00033552017129934, "loss": 2.8555, "step": 28295 }, { "epoch": 1.39, "grad_norm": 0.61265629529953, "learning_rate": 0.00033550488336029765, "loss": 3.0915, "step": 28296 }, { "epoch": 1.39, "grad_norm": 0.5722835659980774, "learning_rate": 0.00033548959532774225, "loss": 2.8942, "step": 28297 }, { "epoch": 1.39, "grad_norm": 0.5835713744163513, "learning_rate": 0.0003354743072017141, "loss": 2.9738, "step": 28298 }, { "epoch": 1.39, "grad_norm": 0.6345725655555725, "learning_rate": 0.0003354590189822534, "loss": 3.3849, "step": 28299 }, { "epoch": 1.39, "grad_norm": 0.5987057685852051, "learning_rate": 0.0003354437306694005, "loss": 2.9575, "step": 28300 }, { "epoch": 1.39, "grad_norm": 0.5893240571022034, "learning_rate": 0.00033542844226319566, "loss": 3.0071, "step": 28301 }, { "epoch": 1.39, "grad_norm": 0.595427930355072, "learning_rate": 0.0003354131537636791, "loss": 3.1305, "step": 28302 }, { "epoch": 1.39, "grad_norm": 0.6372551321983337, "learning_rate": 0.0003353978651708911, "loss": 2.9446, "step": 28303 }, { "epoch": 1.39, "grad_norm": 0.6060798168182373, "learning_rate": 0.00033538257648487195, "loss": 3.0027, "step": 28304 }, { "epoch": 1.39, "grad_norm": 0.5530107021331787, "learning_rate": 0.00033536728770566186, "loss": 3.0878, "step": 28305 }, { "epoch": 1.39, "grad_norm": 0.6247791051864624, "learning_rate": 0.00033535199883330123, "loss": 3.2343, "step": 28306 }, { "epoch": 1.39, "grad_norm": 0.5921459794044495, "learning_rate": 0.00033533670986783014, "loss": 3.1694, "step": 28307 }, { "epoch": 1.39, "grad_norm": 0.5848678946495056, "learning_rate": 0.000335321420809289, "loss": 3.0928, "step": 28308 }, { "epoch": 1.39, "grad_norm": 0.6024354100227356, "learning_rate": 0.00033530613165771804, "loss": 2.881, "step": 28309 }, { "epoch": 1.39, "grad_norm": 0.5735805034637451, "learning_rate": 0.0003352908424131576, "loss": 2.9333, "step": 28310 }, { "epoch": 1.39, "grad_norm": 0.5353273153305054, "learning_rate": 0.00033527555307564773, "loss": 3.0901, "step": 28311 }, { "epoch": 1.39, "grad_norm": 0.5976811647415161, "learning_rate": 0.00033526026364522895, "loss": 3.2398, "step": 28312 }, { "epoch": 1.39, "grad_norm": 0.5682657361030579, "learning_rate": 0.00033524497412194137, "loss": 3.137, "step": 28313 }, { "epoch": 1.39, "grad_norm": 0.5598923563957214, "learning_rate": 0.0003352296845058253, "loss": 2.9647, "step": 28314 }, { "epoch": 1.39, "grad_norm": 0.5730248093605042, "learning_rate": 0.000335214394796921, "loss": 3.0873, "step": 28315 }, { "epoch": 1.39, "grad_norm": 0.5653427839279175, "learning_rate": 0.00033519910499526883, "loss": 2.9421, "step": 28316 }, { "epoch": 1.39, "grad_norm": 0.5537395477294922, "learning_rate": 0.000335183815100909, "loss": 2.9381, "step": 28317 }, { "epoch": 1.39, "grad_norm": 0.5599444508552551, "learning_rate": 0.00033516852511388174, "loss": 2.903, "step": 28318 }, { "epoch": 1.39, "grad_norm": 0.552375853061676, "learning_rate": 0.00033515323503422725, "loss": 3.0835, "step": 28319 }, { "epoch": 1.39, "grad_norm": 0.6051978468894958, "learning_rate": 0.00033513794486198607, "loss": 3.013, "step": 28320 }, { "epoch": 1.39, "grad_norm": 0.5906317234039307, "learning_rate": 0.0003351226545971983, "loss": 3.0689, "step": 28321 }, { "epoch": 1.39, "grad_norm": 0.5584150552749634, "learning_rate": 0.0003351073642399041, "loss": 3.0572, "step": 28322 }, { "epoch": 1.39, "grad_norm": 0.5732271075248718, "learning_rate": 0.0003350920737901439, "loss": 3.0508, "step": 28323 }, { "epoch": 1.39, "grad_norm": 0.6018269658088684, "learning_rate": 0.000335076783247958, "loss": 2.9892, "step": 28324 }, { "epoch": 1.39, "grad_norm": 0.5475950837135315, "learning_rate": 0.00033506149261338655, "loss": 3.0967, "step": 28325 }, { "epoch": 1.39, "grad_norm": 0.5918049812316895, "learning_rate": 0.0003350462018864698, "loss": 2.9518, "step": 28326 }, { "epoch": 1.39, "grad_norm": 0.6317940950393677, "learning_rate": 0.0003350309110672482, "loss": 3.0599, "step": 28327 }, { "epoch": 1.39, "grad_norm": 0.5397853255271912, "learning_rate": 0.00033501562015576195, "loss": 3.1353, "step": 28328 }, { "epoch": 1.39, "grad_norm": 0.5789517760276794, "learning_rate": 0.0003350003291520513, "loss": 3.1184, "step": 28329 }, { "epoch": 1.39, "grad_norm": 0.6313763856887817, "learning_rate": 0.00033498503805615636, "loss": 2.9678, "step": 28330 }, { "epoch": 1.39, "grad_norm": 0.5650973916053772, "learning_rate": 0.00033496974686811766, "loss": 3.1104, "step": 28331 }, { "epoch": 1.39, "grad_norm": 0.5666373372077942, "learning_rate": 0.00033495445558797543, "loss": 2.8425, "step": 28332 }, { "epoch": 1.39, "grad_norm": 0.5651002526283264, "learning_rate": 0.0003349391642157698, "loss": 2.909, "step": 28333 }, { "epoch": 1.39, "grad_norm": 0.5783306360244751, "learning_rate": 0.0003349238727515412, "loss": 3.1748, "step": 28334 }, { "epoch": 1.39, "grad_norm": 0.5995091795921326, "learning_rate": 0.00033490858119532985, "loss": 2.9953, "step": 28335 }, { "epoch": 1.39, "grad_norm": 0.5513354539871216, "learning_rate": 0.00033489328954717596, "loss": 2.9552, "step": 28336 }, { "epoch": 1.39, "grad_norm": 0.56820148229599, "learning_rate": 0.0003348779978071199, "loss": 3.0048, "step": 28337 }, { "epoch": 1.39, "grad_norm": 0.5699313282966614, "learning_rate": 0.0003348627059752019, "loss": 3.1077, "step": 28338 }, { "epoch": 1.39, "grad_norm": 0.5874160528182983, "learning_rate": 0.00033484741405146227, "loss": 3.1624, "step": 28339 }, { "epoch": 1.39, "grad_norm": 0.5903382301330566, "learning_rate": 0.00033483212203594116, "loss": 3.124, "step": 28340 }, { "epoch": 1.39, "grad_norm": 0.5418932437896729, "learning_rate": 0.00033481682992867904, "loss": 3.1153, "step": 28341 }, { "epoch": 1.39, "grad_norm": 0.6493607759475708, "learning_rate": 0.00033480153772971603, "loss": 2.9999, "step": 28342 }, { "epoch": 1.39, "grad_norm": 0.5959764122962952, "learning_rate": 0.00033478624543909256, "loss": 3.0586, "step": 28343 }, { "epoch": 1.39, "grad_norm": 0.5970855355262756, "learning_rate": 0.0003347709530568487, "loss": 3.1342, "step": 28344 }, { "epoch": 1.39, "grad_norm": 0.5649682879447937, "learning_rate": 0.00033475566058302486, "loss": 3.1413, "step": 28345 }, { "epoch": 1.39, "grad_norm": 0.6004948616027832, "learning_rate": 0.00033474036801766136, "loss": 3.0267, "step": 28346 }, { "epoch": 1.39, "grad_norm": 0.6019605398178101, "learning_rate": 0.0003347250753607984, "loss": 2.954, "step": 28347 }, { "epoch": 1.39, "grad_norm": 0.5807358026504517, "learning_rate": 0.0003347097826124762, "loss": 3.1699, "step": 28348 }, { "epoch": 1.39, "grad_norm": 0.5428721308708191, "learning_rate": 0.0003346944897727352, "loss": 3.1219, "step": 28349 }, { "epoch": 1.39, "grad_norm": 0.6022670269012451, "learning_rate": 0.00033467919684161554, "loss": 3.1146, "step": 28350 }, { "epoch": 1.39, "grad_norm": 0.5750691294670105, "learning_rate": 0.0003346639038191576, "loss": 3.06, "step": 28351 }, { "epoch": 1.39, "grad_norm": 0.5875911712646484, "learning_rate": 0.0003346486107054016, "loss": 3.0479, "step": 28352 }, { "epoch": 1.39, "grad_norm": 0.588360071182251, "learning_rate": 0.0003346333175003878, "loss": 3.0508, "step": 28353 }, { "epoch": 1.39, "grad_norm": 0.5815553665161133, "learning_rate": 0.00033461802420415654, "loss": 3.2218, "step": 28354 }, { "epoch": 1.39, "grad_norm": 0.5743089914321899, "learning_rate": 0.00033460273081674797, "loss": 3.039, "step": 28355 }, { "epoch": 1.39, "grad_norm": 0.5957072377204895, "learning_rate": 0.0003345874373382025, "loss": 3.0255, "step": 28356 }, { "epoch": 1.39, "grad_norm": 0.5829094648361206, "learning_rate": 0.0003345721437685605, "loss": 3.2637, "step": 28357 }, { "epoch": 1.39, "grad_norm": 0.5787622928619385, "learning_rate": 0.00033455685010786205, "loss": 3.0796, "step": 28358 }, { "epoch": 1.39, "grad_norm": 0.6022943258285522, "learning_rate": 0.0003345415563561475, "loss": 3.2131, "step": 28359 }, { "epoch": 1.39, "grad_norm": 0.59725421667099, "learning_rate": 0.00033452626251345713, "loss": 3.1263, "step": 28360 }, { "epoch": 1.39, "grad_norm": 0.5658643245697021, "learning_rate": 0.00033451096857983124, "loss": 2.9285, "step": 28361 }, { "epoch": 1.39, "grad_norm": 0.5705099105834961, "learning_rate": 0.0003344956745553101, "loss": 2.9658, "step": 28362 }, { "epoch": 1.39, "grad_norm": 0.5385516881942749, "learning_rate": 0.000334480380439934, "loss": 3.1083, "step": 28363 }, { "epoch": 1.39, "grad_norm": 0.5449607372283936, "learning_rate": 0.00033446508623374317, "loss": 3.1891, "step": 28364 }, { "epoch": 1.39, "grad_norm": 0.6035528779029846, "learning_rate": 0.000334449791936778, "loss": 3.2337, "step": 28365 }, { "epoch": 1.39, "grad_norm": 0.5781618356704712, "learning_rate": 0.0003344344975490786, "loss": 3.1241, "step": 28366 }, { "epoch": 1.39, "grad_norm": 0.561470091342926, "learning_rate": 0.0003344192030706855, "loss": 3.0613, "step": 28367 }, { "epoch": 1.39, "grad_norm": 0.5787182450294495, "learning_rate": 0.00033440390850163876, "loss": 3.063, "step": 28368 }, { "epoch": 1.39, "grad_norm": 0.5583317875862122, "learning_rate": 0.0003343886138419788, "loss": 3.0326, "step": 28369 }, { "epoch": 1.39, "grad_norm": 0.5830174088478088, "learning_rate": 0.0003343733190917458, "loss": 3.1601, "step": 28370 }, { "epoch": 1.39, "grad_norm": 0.5853716731071472, "learning_rate": 0.00033435802425098006, "loss": 2.985, "step": 28371 }, { "epoch": 1.39, "grad_norm": 0.5587119460105896, "learning_rate": 0.000334342729319722, "loss": 3.1248, "step": 28372 }, { "epoch": 1.39, "grad_norm": 0.5844123363494873, "learning_rate": 0.00033432743429801176, "loss": 3.1114, "step": 28373 }, { "epoch": 1.39, "grad_norm": 0.620872974395752, "learning_rate": 0.0003343121391858896, "loss": 3.1804, "step": 28374 }, { "epoch": 1.39, "grad_norm": 0.5749686360359192, "learning_rate": 0.00033429684398339596, "loss": 2.8822, "step": 28375 }, { "epoch": 1.39, "grad_norm": 0.5744134783744812, "learning_rate": 0.00033428154869057097, "loss": 3.0772, "step": 28376 }, { "epoch": 1.39, "grad_norm": 0.6181778311729431, "learning_rate": 0.000334266253307455, "loss": 3.1124, "step": 28377 }, { "epoch": 1.39, "grad_norm": 0.5899921655654907, "learning_rate": 0.0003342509578340883, "loss": 3.2172, "step": 28378 }, { "epoch": 1.39, "grad_norm": 0.5781901478767395, "learning_rate": 0.00033423566227051127, "loss": 3.12, "step": 28379 }, { "epoch": 1.39, "grad_norm": 0.5640218257904053, "learning_rate": 0.000334220366616764, "loss": 2.9047, "step": 28380 }, { "epoch": 1.39, "grad_norm": 0.6122222542762756, "learning_rate": 0.0003342050708728868, "loss": 3.0602, "step": 28381 }, { "epoch": 1.39, "grad_norm": 0.5799720883369446, "learning_rate": 0.0003341897750389201, "loss": 3.148, "step": 28382 }, { "epoch": 1.39, "grad_norm": 0.57460618019104, "learning_rate": 0.0003341744791149042, "loss": 3.3129, "step": 28383 }, { "epoch": 1.39, "grad_norm": 0.5734221339225769, "learning_rate": 0.0003341591831008792, "loss": 3.0301, "step": 28384 }, { "epoch": 1.39, "grad_norm": 0.5949209928512573, "learning_rate": 0.0003341438869968855, "loss": 3.1525, "step": 28385 }, { "epoch": 1.39, "grad_norm": 0.5924066305160522, "learning_rate": 0.00033412859080296335, "loss": 3.2143, "step": 28386 }, { "epoch": 1.39, "grad_norm": 0.5933117270469666, "learning_rate": 0.0003341132945191531, "loss": 3.2277, "step": 28387 }, { "epoch": 1.39, "grad_norm": 0.5546301007270813, "learning_rate": 0.00033409799814549496, "loss": 2.9102, "step": 28388 }, { "epoch": 1.39, "grad_norm": 0.5834781527519226, "learning_rate": 0.0003340827016820293, "loss": 2.8856, "step": 28389 }, { "epoch": 1.39, "grad_norm": 0.5664399266242981, "learning_rate": 0.00033406740512879635, "loss": 3.0256, "step": 28390 }, { "epoch": 1.39, "grad_norm": 0.5528917908668518, "learning_rate": 0.00033405210848583636, "loss": 2.9815, "step": 28391 }, { "epoch": 1.39, "grad_norm": 0.5898980498313904, "learning_rate": 0.00033403681175318974, "loss": 2.9924, "step": 28392 }, { "epoch": 1.39, "grad_norm": 0.6130119562149048, "learning_rate": 0.00033402151493089663, "loss": 2.812, "step": 28393 }, { "epoch": 1.39, "grad_norm": 0.5696910619735718, "learning_rate": 0.0003340062180189975, "loss": 3.2702, "step": 28394 }, { "epoch": 1.39, "grad_norm": 0.5849608778953552, "learning_rate": 0.0003339909210175325, "loss": 3.0146, "step": 28395 }, { "epoch": 1.39, "grad_norm": 0.596095621585846, "learning_rate": 0.0003339756239265419, "loss": 2.9181, "step": 28396 }, { "epoch": 1.39, "grad_norm": 0.5665485262870789, "learning_rate": 0.0003339603267460661, "loss": 3.069, "step": 28397 }, { "epoch": 1.39, "grad_norm": 0.5528839826583862, "learning_rate": 0.0003339450294761453, "loss": 3.0766, "step": 28398 }, { "epoch": 1.39, "grad_norm": 0.5885125398635864, "learning_rate": 0.00033392973211681987, "loss": 2.9717, "step": 28399 }, { "epoch": 1.39, "grad_norm": 0.6001590490341187, "learning_rate": 0.00033391443466813004, "loss": 2.8122, "step": 28400 }, { "epoch": 1.39, "grad_norm": 0.5653418898582458, "learning_rate": 0.0003338991371301161, "loss": 3.1272, "step": 28401 }, { "epoch": 1.39, "grad_norm": 0.577953577041626, "learning_rate": 0.0003338838395028183, "loss": 3.0803, "step": 28402 }, { "epoch": 1.39, "grad_norm": 0.5523266196250916, "learning_rate": 0.00033386854178627705, "loss": 3.0247, "step": 28403 }, { "epoch": 1.39, "grad_norm": 0.5539414882659912, "learning_rate": 0.0003338532439805326, "loss": 3.1748, "step": 28404 }, { "epoch": 1.39, "grad_norm": 0.5848209857940674, "learning_rate": 0.0003338379460856253, "loss": 2.9997, "step": 28405 }, { "epoch": 1.39, "grad_norm": 0.573849618434906, "learning_rate": 0.00033382264810159523, "loss": 3.0206, "step": 28406 }, { "epoch": 1.39, "grad_norm": 0.575800895690918, "learning_rate": 0.00033380735002848274, "loss": 3.0534, "step": 28407 }, { "epoch": 1.39, "grad_norm": 0.5938015580177307, "learning_rate": 0.0003337920518663284, "loss": 3.1933, "step": 28408 }, { "epoch": 1.39, "grad_norm": 0.5658040642738342, "learning_rate": 0.0003337767536151722, "loss": 2.8285, "step": 28409 }, { "epoch": 1.39, "grad_norm": 0.5854364633560181, "learning_rate": 0.0003337614552750545, "loss": 2.8454, "step": 28410 }, { "epoch": 1.39, "grad_norm": 0.5785530209541321, "learning_rate": 0.00033374615684601567, "loss": 3.0105, "step": 28411 }, { "epoch": 1.39, "grad_norm": 0.5590497851371765, "learning_rate": 0.000333730858328096, "loss": 2.8793, "step": 28412 }, { "epoch": 1.39, "grad_norm": 0.5437723994255066, "learning_rate": 0.00033371555972133563, "loss": 2.9129, "step": 28413 }, { "epoch": 1.39, "grad_norm": 0.6012238264083862, "learning_rate": 0.00033370026102577503, "loss": 3.1785, "step": 28414 }, { "epoch": 1.39, "grad_norm": 0.5660067200660706, "learning_rate": 0.0003336849622414544, "loss": 3.0921, "step": 28415 }, { "epoch": 1.39, "grad_norm": 0.5316462516784668, "learning_rate": 0.0003336696633684141, "loss": 3.1858, "step": 28416 }, { "epoch": 1.39, "grad_norm": 0.6181156039237976, "learning_rate": 0.0003336543644066944, "loss": 3.0513, "step": 28417 }, { "epoch": 1.39, "grad_norm": 0.5790272951126099, "learning_rate": 0.00033363906535633546, "loss": 3.0921, "step": 28418 }, { "epoch": 1.39, "grad_norm": 0.553527295589447, "learning_rate": 0.0003336237662173779, "loss": 2.9811, "step": 28419 }, { "epoch": 1.39, "grad_norm": 0.5918067097663879, "learning_rate": 0.00033360846698986165, "loss": 2.9584, "step": 28420 }, { "epoch": 1.39, "grad_norm": 0.5768551826477051, "learning_rate": 0.0003335931676738272, "loss": 3.1019, "step": 28421 }, { "epoch": 1.39, "grad_norm": 0.573397696018219, "learning_rate": 0.00033357786826931484, "loss": 3.0213, "step": 28422 }, { "epoch": 1.39, "grad_norm": 0.5723202228546143, "learning_rate": 0.00033356256877636485, "loss": 2.847, "step": 28423 }, { "epoch": 1.39, "grad_norm": 0.5666738748550415, "learning_rate": 0.00033354726919501747, "loss": 2.8922, "step": 28424 }, { "epoch": 1.39, "grad_norm": 0.5918300747871399, "learning_rate": 0.00033353196952531304, "loss": 3.0617, "step": 28425 }, { "epoch": 1.39, "grad_norm": 0.5779464840888977, "learning_rate": 0.0003335166697672919, "loss": 3.0998, "step": 28426 }, { "epoch": 1.39, "grad_norm": 0.5665059685707092, "learning_rate": 0.00033350136992099416, "loss": 3.207, "step": 28427 }, { "epoch": 1.39, "grad_norm": 0.5882079601287842, "learning_rate": 0.0003334860699864604, "loss": 3.0049, "step": 28428 }, { "epoch": 1.39, "grad_norm": 0.5963062047958374, "learning_rate": 0.00033347076996373073, "loss": 3.2008, "step": 28429 }, { "epoch": 1.39, "grad_norm": 0.5421083569526672, "learning_rate": 0.0003334554698528456, "loss": 2.9144, "step": 28430 }, { "epoch": 1.39, "grad_norm": 0.5771951675415039, "learning_rate": 0.0003334401696538451, "loss": 3.1604, "step": 28431 }, { "epoch": 1.39, "grad_norm": 0.6018733978271484, "learning_rate": 0.00033342486936676957, "loss": 3.2034, "step": 28432 }, { "epoch": 1.39, "grad_norm": 0.5767068862915039, "learning_rate": 0.0003334095689916594, "loss": 3.2174, "step": 28433 }, { "epoch": 1.39, "grad_norm": 0.5713096857070923, "learning_rate": 0.00033339426852855496, "loss": 3.0759, "step": 28434 }, { "epoch": 1.39, "grad_norm": 0.5891026258468628, "learning_rate": 0.0003333789679774964, "loss": 3.3295, "step": 28435 }, { "epoch": 1.39, "grad_norm": 0.5787071585655212, "learning_rate": 0.000333363667338524, "loss": 3.164, "step": 28436 }, { "epoch": 1.39, "grad_norm": 0.6013773679733276, "learning_rate": 0.00033334836661167816, "loss": 2.8607, "step": 28437 }, { "epoch": 1.39, "grad_norm": 0.5678498148918152, "learning_rate": 0.00033333306579699914, "loss": 3.0511, "step": 28438 }, { "epoch": 1.39, "grad_norm": 0.6372431516647339, "learning_rate": 0.00033331776489452724, "loss": 2.7539, "step": 28439 }, { "epoch": 1.39, "grad_norm": 0.5957581400871277, "learning_rate": 0.0003333024639043028, "loss": 3.117, "step": 28440 }, { "epoch": 1.39, "grad_norm": 0.5882934927940369, "learning_rate": 0.0003332871628263661, "loss": 3.0676, "step": 28441 }, { "epoch": 1.39, "grad_norm": 0.5675431489944458, "learning_rate": 0.00033327186166075723, "loss": 3.2111, "step": 28442 }, { "epoch": 1.39, "grad_norm": 0.5941580533981323, "learning_rate": 0.0003332565604075168, "loss": 3.0244, "step": 28443 }, { "epoch": 1.39, "grad_norm": 0.6287396550178528, "learning_rate": 0.000333241259066685, "loss": 3.1406, "step": 28444 }, { "epoch": 1.39, "grad_norm": 0.5600050091743469, "learning_rate": 0.0003332259576383022, "loss": 2.9896, "step": 28445 }, { "epoch": 1.39, "grad_norm": 0.5782938599586487, "learning_rate": 0.00033321065612240854, "loss": 3.0307, "step": 28446 }, { "epoch": 1.39, "grad_norm": 0.5861037969589233, "learning_rate": 0.0003331953545190444, "loss": 2.9698, "step": 28447 }, { "epoch": 1.39, "grad_norm": 0.5864157676696777, "learning_rate": 0.00033318005282825, "loss": 2.9486, "step": 28448 }, { "epoch": 1.39, "grad_norm": 0.5786110162734985, "learning_rate": 0.0003331647510500659, "loss": 2.9526, "step": 28449 }, { "epoch": 1.39, "grad_norm": 0.5792027711868286, "learning_rate": 0.0003331494491845321, "loss": 3.2205, "step": 28450 }, { "epoch": 1.39, "grad_norm": 0.546311616897583, "learning_rate": 0.00033313414723168904, "loss": 2.8147, "step": 28451 }, { "epoch": 1.39, "grad_norm": 0.5609930753707886, "learning_rate": 0.0003331188451915771, "loss": 3.0984, "step": 28452 }, { "epoch": 1.39, "grad_norm": 0.6086905598640442, "learning_rate": 0.0003331035430642364, "loss": 3.138, "step": 28453 }, { "epoch": 1.39, "grad_norm": 0.5497440099716187, "learning_rate": 0.00033308824084970736, "loss": 3.0029, "step": 28454 }, { "epoch": 1.39, "grad_norm": 0.5903970003128052, "learning_rate": 0.0003330729385480303, "loss": 2.9264, "step": 28455 }, { "epoch": 1.39, "grad_norm": 0.6043407320976257, "learning_rate": 0.0003330576361592455, "loss": 2.8884, "step": 28456 }, { "epoch": 1.39, "grad_norm": 0.559085488319397, "learning_rate": 0.0003330423336833933, "loss": 3.0239, "step": 28457 }, { "epoch": 1.39, "grad_norm": 0.5664832592010498, "learning_rate": 0.0003330270311205137, "loss": 3.116, "step": 28458 }, { "epoch": 1.39, "grad_norm": 0.5917896032333374, "learning_rate": 0.0003330117284706475, "loss": 3.139, "step": 28459 }, { "epoch": 1.39, "grad_norm": 0.588679850101471, "learning_rate": 0.0003329964257338347, "loss": 3.0135, "step": 28460 }, { "epoch": 1.39, "grad_norm": 0.5861005187034607, "learning_rate": 0.0003329811229101156, "loss": 2.8976, "step": 28461 }, { "epoch": 1.39, "grad_norm": 0.5760902166366577, "learning_rate": 0.00033296581999953063, "loss": 3.094, "step": 28462 }, { "epoch": 1.39, "grad_norm": 0.5762536525726318, "learning_rate": 0.00033295051700212003, "loss": 3.2222, "step": 28463 }, { "epoch": 1.39, "grad_norm": 0.5697655081748962, "learning_rate": 0.0003329352139179241, "loss": 3.0622, "step": 28464 }, { "epoch": 1.4, "grad_norm": 0.6054670810699463, "learning_rate": 0.0003329199107469832, "loss": 2.8424, "step": 28465 }, { "epoch": 1.4, "grad_norm": 0.5558887720108032, "learning_rate": 0.00033290460748933745, "loss": 3.2195, "step": 28466 }, { "epoch": 1.4, "grad_norm": 0.6065505743026733, "learning_rate": 0.0003328893041450275, "loss": 3.1301, "step": 28467 }, { "epoch": 1.4, "grad_norm": 0.6185204982757568, "learning_rate": 0.00033287400071409327, "loss": 3.1541, "step": 28468 }, { "epoch": 1.4, "grad_norm": 0.5224096179008484, "learning_rate": 0.0003328586971965753, "loss": 2.9278, "step": 28469 }, { "epoch": 1.4, "grad_norm": 0.5885241627693176, "learning_rate": 0.0003328433935925139, "loss": 3.2153, "step": 28470 }, { "epoch": 1.4, "grad_norm": 0.5575811266899109, "learning_rate": 0.00033282808990194935, "loss": 3.1026, "step": 28471 }, { "epoch": 1.4, "grad_norm": 0.6058753132820129, "learning_rate": 0.00033281278612492187, "loss": 2.8013, "step": 28472 }, { "epoch": 1.4, "grad_norm": 0.585891842842102, "learning_rate": 0.0003327974822614718, "loss": 3.0083, "step": 28473 }, { "epoch": 1.4, "grad_norm": 0.5971677303314209, "learning_rate": 0.0003327821783116395, "loss": 3.1401, "step": 28474 }, { "epoch": 1.4, "grad_norm": 0.5844119191169739, "learning_rate": 0.0003327668742754653, "loss": 2.9653, "step": 28475 }, { "epoch": 1.4, "grad_norm": 0.6075958609580994, "learning_rate": 0.0003327515701529894, "loss": 3.0223, "step": 28476 }, { "epoch": 1.4, "grad_norm": 0.5761697888374329, "learning_rate": 0.00033273626594425217, "loss": 3.2987, "step": 28477 }, { "epoch": 1.4, "grad_norm": 0.7378432750701904, "learning_rate": 0.0003327209616492939, "loss": 3.1888, "step": 28478 }, { "epoch": 1.4, "grad_norm": 0.5896404981613159, "learning_rate": 0.0003327056572681549, "loss": 3.1417, "step": 28479 }, { "epoch": 1.4, "grad_norm": 0.581295371055603, "learning_rate": 0.00033269035280087555, "loss": 2.915, "step": 28480 }, { "epoch": 1.4, "grad_norm": 0.5805588960647583, "learning_rate": 0.0003326750482474961, "loss": 3.0804, "step": 28481 }, { "epoch": 1.4, "grad_norm": 0.631949782371521, "learning_rate": 0.00033265974360805687, "loss": 2.8173, "step": 28482 }, { "epoch": 1.4, "grad_norm": 0.5704339742660522, "learning_rate": 0.0003326444388825981, "loss": 2.9781, "step": 28483 }, { "epoch": 1.4, "grad_norm": 0.5693856477737427, "learning_rate": 0.0003326291340711602, "loss": 2.923, "step": 28484 }, { "epoch": 1.4, "grad_norm": 0.5859050750732422, "learning_rate": 0.0003326138291737835, "loss": 2.9329, "step": 28485 }, { "epoch": 1.4, "grad_norm": 0.5935141444206238, "learning_rate": 0.00033259852419050814, "loss": 3.1226, "step": 28486 }, { "epoch": 1.4, "grad_norm": 0.6061211228370667, "learning_rate": 0.00033258321912137465, "loss": 3.1309, "step": 28487 }, { "epoch": 1.4, "grad_norm": 0.6038804650306702, "learning_rate": 0.0003325679139664231, "loss": 2.8745, "step": 28488 }, { "epoch": 1.4, "grad_norm": 0.6384372711181641, "learning_rate": 0.00033255260872569405, "loss": 2.9845, "step": 28489 }, { "epoch": 1.4, "grad_norm": 0.6087737679481506, "learning_rate": 0.00033253730339922766, "loss": 3.0332, "step": 28490 }, { "epoch": 1.4, "grad_norm": 0.544606626033783, "learning_rate": 0.00033252199798706426, "loss": 3.0271, "step": 28491 }, { "epoch": 1.4, "grad_norm": 0.6075654625892639, "learning_rate": 0.00033250669248924416, "loss": 3.1083, "step": 28492 }, { "epoch": 1.4, "grad_norm": 0.5745288729667664, "learning_rate": 0.0003324913869058077, "loss": 3.118, "step": 28493 }, { "epoch": 1.4, "grad_norm": 0.585982620716095, "learning_rate": 0.0003324760812367952, "loss": 2.8791, "step": 28494 }, { "epoch": 1.4, "grad_norm": 0.6062518358230591, "learning_rate": 0.00033246077548224686, "loss": 3.0527, "step": 28495 }, { "epoch": 1.4, "grad_norm": 0.5591403841972351, "learning_rate": 0.0003324454696422033, "loss": 3.087, "step": 28496 }, { "epoch": 1.4, "grad_norm": 0.6129733324050903, "learning_rate": 0.00033243016371670446, "loss": 3.1021, "step": 28497 }, { "epoch": 1.4, "grad_norm": 0.5932202935218811, "learning_rate": 0.00033241485770579084, "loss": 2.89, "step": 28498 }, { "epoch": 1.4, "grad_norm": 0.5794448256492615, "learning_rate": 0.0003323995516095027, "loss": 3.1646, "step": 28499 }, { "epoch": 1.4, "grad_norm": 0.6294394135475159, "learning_rate": 0.0003323842454278804, "loss": 3.0625, "step": 28500 }, { "epoch": 1.4, "grad_norm": 0.5836816430091858, "learning_rate": 0.0003323689391609642, "loss": 2.9569, "step": 28501 }, { "epoch": 1.4, "grad_norm": 0.5736550688743591, "learning_rate": 0.0003323536328087945, "loss": 3.0808, "step": 28502 }, { "epoch": 1.4, "grad_norm": 0.6020394563674927, "learning_rate": 0.0003323383263714115, "loss": 3.1046, "step": 28503 }, { "epoch": 1.4, "grad_norm": 0.566116213798523, "learning_rate": 0.0003323230198488555, "loss": 2.8113, "step": 28504 }, { "epoch": 1.4, "grad_norm": 0.5596758723258972, "learning_rate": 0.000332307713241167, "loss": 3.1021, "step": 28505 }, { "epoch": 1.4, "grad_norm": 0.5862348675727844, "learning_rate": 0.0003322924065483862, "loss": 2.9094, "step": 28506 }, { "epoch": 1.4, "grad_norm": 0.5914575457572937, "learning_rate": 0.00033227709977055344, "loss": 3.1915, "step": 28507 }, { "epoch": 1.4, "grad_norm": 0.6266211867332458, "learning_rate": 0.000332261792907709, "loss": 3.0326, "step": 28508 }, { "epoch": 1.4, "grad_norm": 0.6111003756523132, "learning_rate": 0.00033224648595989306, "loss": 3.3007, "step": 28509 }, { "epoch": 1.4, "grad_norm": 0.611010730266571, "learning_rate": 0.00033223117892714626, "loss": 2.8517, "step": 28510 }, { "epoch": 1.4, "grad_norm": 0.6048029065132141, "learning_rate": 0.0003322158718095087, "loss": 3.065, "step": 28511 }, { "epoch": 1.4, "grad_norm": 0.6111939549446106, "learning_rate": 0.0003322005646070207, "loss": 2.9375, "step": 28512 }, { "epoch": 1.4, "grad_norm": 0.5618122816085815, "learning_rate": 0.0003321852573197226, "loss": 3.0391, "step": 28513 }, { "epoch": 1.4, "grad_norm": 0.6132274866104126, "learning_rate": 0.00033216994994765477, "loss": 3.0492, "step": 28514 }, { "epoch": 1.4, "grad_norm": 0.6032343506813049, "learning_rate": 0.0003321546424908574, "loss": 2.9005, "step": 28515 }, { "epoch": 1.4, "grad_norm": 0.6772813200950623, "learning_rate": 0.00033213933494937093, "loss": 3.089, "step": 28516 }, { "epoch": 1.4, "grad_norm": 0.544093668460846, "learning_rate": 0.0003321240273232357, "loss": 3.0564, "step": 28517 }, { "epoch": 1.4, "grad_norm": 0.6309990286827087, "learning_rate": 0.00033210871961249195, "loss": 2.9296, "step": 28518 }, { "epoch": 1.4, "grad_norm": 0.5908814668655396, "learning_rate": 0.00033209341181717996, "loss": 3.1371, "step": 28519 }, { "epoch": 1.4, "grad_norm": 0.55782151222229, "learning_rate": 0.0003320781039373401, "loss": 2.9583, "step": 28520 }, { "epoch": 1.4, "grad_norm": 0.5843961834907532, "learning_rate": 0.00033206279597301276, "loss": 3.2184, "step": 28521 }, { "epoch": 1.4, "grad_norm": 0.5808646082878113, "learning_rate": 0.0003320474879242381, "loss": 2.8148, "step": 28522 }, { "epoch": 1.4, "grad_norm": 0.6347132325172424, "learning_rate": 0.00033203217979105656, "loss": 3.2595, "step": 28523 }, { "epoch": 1.4, "grad_norm": 0.5245764851570129, "learning_rate": 0.00033201687157350843, "loss": 3.0635, "step": 28524 }, { "epoch": 1.4, "grad_norm": 0.6027230024337769, "learning_rate": 0.000332001563271634, "loss": 3.0783, "step": 28525 }, { "epoch": 1.4, "grad_norm": 0.5538972616195679, "learning_rate": 0.0003319862548854736, "loss": 2.8988, "step": 28526 }, { "epoch": 1.4, "grad_norm": 0.6136893630027771, "learning_rate": 0.00033197094641506763, "loss": 2.9789, "step": 28527 }, { "epoch": 1.4, "grad_norm": 0.5786117911338806, "learning_rate": 0.0003319556378604563, "loss": 2.8044, "step": 28528 }, { "epoch": 1.4, "grad_norm": 0.6115828156471252, "learning_rate": 0.00033194032922167994, "loss": 3.114, "step": 28529 }, { "epoch": 1.4, "grad_norm": 0.5768436193466187, "learning_rate": 0.0003319250204987789, "loss": 2.6763, "step": 28530 }, { "epoch": 1.4, "grad_norm": 0.5673786401748657, "learning_rate": 0.0003319097116917935, "loss": 2.8227, "step": 28531 }, { "epoch": 1.4, "grad_norm": 0.6050379276275635, "learning_rate": 0.00033189440280076416, "loss": 2.9611, "step": 28532 }, { "epoch": 1.4, "grad_norm": 0.5654118061065674, "learning_rate": 0.000331879093825731, "loss": 3.0181, "step": 28533 }, { "epoch": 1.4, "grad_norm": 0.5509759783744812, "learning_rate": 0.00033186378476673446, "loss": 3.2072, "step": 28534 }, { "epoch": 1.4, "grad_norm": 0.5696454644203186, "learning_rate": 0.0003318484756238149, "loss": 3.1229, "step": 28535 }, { "epoch": 1.4, "grad_norm": 0.641002357006073, "learning_rate": 0.0003318331663970125, "loss": 3.2484, "step": 28536 }, { "epoch": 1.4, "grad_norm": 0.6124339699745178, "learning_rate": 0.00033181785708636774, "loss": 3.158, "step": 28537 }, { "epoch": 1.4, "grad_norm": 0.5635184049606323, "learning_rate": 0.0003318025476919208, "loss": 2.9605, "step": 28538 }, { "epoch": 1.4, "grad_norm": 0.6408677697181702, "learning_rate": 0.00033178723821371216, "loss": 2.8396, "step": 28539 }, { "epoch": 1.4, "grad_norm": 0.5962166786193848, "learning_rate": 0.00033177192865178196, "loss": 3.1288, "step": 28540 }, { "epoch": 1.4, "grad_norm": 0.5760708451271057, "learning_rate": 0.00033175661900617065, "loss": 3.0704, "step": 28541 }, { "epoch": 1.4, "grad_norm": 0.667230486869812, "learning_rate": 0.0003317413092769185, "loss": 2.9527, "step": 28542 }, { "epoch": 1.4, "grad_norm": 0.5681466460227966, "learning_rate": 0.00033172599946406587, "loss": 3.1464, "step": 28543 }, { "epoch": 1.4, "grad_norm": 0.6880142092704773, "learning_rate": 0.0003317106895676531, "loss": 3.2, "step": 28544 }, { "epoch": 1.4, "grad_norm": 0.6255538463592529, "learning_rate": 0.0003316953795877204, "loss": 3.1399, "step": 28545 }, { "epoch": 1.4, "grad_norm": 0.5599697232246399, "learning_rate": 0.0003316800695243082, "loss": 3.1353, "step": 28546 }, { "epoch": 1.4, "grad_norm": 0.5526019930839539, "learning_rate": 0.0003316647593774568, "loss": 2.8375, "step": 28547 }, { "epoch": 1.4, "grad_norm": 0.6557570695877075, "learning_rate": 0.0003316494491472065, "loss": 2.9793, "step": 28548 }, { "epoch": 1.4, "grad_norm": 0.5730993747711182, "learning_rate": 0.00033163413883359764, "loss": 3.0206, "step": 28549 }, { "epoch": 1.4, "grad_norm": 1.1626458168029785, "learning_rate": 0.0003316188284366706, "loss": 3.0879, "step": 28550 }, { "epoch": 1.4, "grad_norm": 0.5795100927352905, "learning_rate": 0.0003316035179564656, "loss": 2.9078, "step": 28551 }, { "epoch": 1.4, "grad_norm": 0.5477898120880127, "learning_rate": 0.000331588207393023, "loss": 2.9622, "step": 28552 }, { "epoch": 1.4, "grad_norm": 0.5724733471870422, "learning_rate": 0.0003315728967463832, "loss": 3.2156, "step": 28553 }, { "epoch": 1.4, "grad_norm": 0.5982617139816284, "learning_rate": 0.0003315575860165865, "loss": 2.8647, "step": 28554 }, { "epoch": 1.4, "grad_norm": 0.6391007900238037, "learning_rate": 0.0003315422752036731, "loss": 3.0067, "step": 28555 }, { "epoch": 1.4, "grad_norm": 0.6046311855316162, "learning_rate": 0.00033152696430768347, "loss": 2.9357, "step": 28556 }, { "epoch": 1.4, "grad_norm": 0.5996942520141602, "learning_rate": 0.0003315116533286578, "loss": 3.0517, "step": 28557 }, { "epoch": 1.4, "grad_norm": 0.5585609078407288, "learning_rate": 0.00033149634226663666, "loss": 3.0389, "step": 28558 }, { "epoch": 1.4, "grad_norm": 0.6330249309539795, "learning_rate": 0.0003314810311216601, "loss": 3.0515, "step": 28559 }, { "epoch": 1.4, "grad_norm": 0.574908971786499, "learning_rate": 0.0003314657198937685, "loss": 3.255, "step": 28560 }, { "epoch": 1.4, "grad_norm": 0.597728431224823, "learning_rate": 0.0003314504085830024, "loss": 3.0646, "step": 28561 }, { "epoch": 1.4, "grad_norm": 0.6056450009346008, "learning_rate": 0.00033143509718940186, "loss": 3.1887, "step": 28562 }, { "epoch": 1.4, "grad_norm": 0.5824889540672302, "learning_rate": 0.0003314197857130074, "loss": 3.1014, "step": 28563 }, { "epoch": 1.4, "grad_norm": 0.5638570189476013, "learning_rate": 0.00033140447415385923, "loss": 3.0617, "step": 28564 }, { "epoch": 1.4, "grad_norm": 0.5699633359909058, "learning_rate": 0.00033138916251199775, "loss": 3.1922, "step": 28565 }, { "epoch": 1.4, "grad_norm": 0.5394684672355652, "learning_rate": 0.0003313738507874632, "loss": 3.1004, "step": 28566 }, { "epoch": 1.4, "grad_norm": 0.5617507100105286, "learning_rate": 0.000331358538980296, "loss": 3.1166, "step": 28567 }, { "epoch": 1.4, "grad_norm": 0.5685690641403198, "learning_rate": 0.00033134322709053643, "loss": 3.0646, "step": 28568 }, { "epoch": 1.4, "grad_norm": 0.5826153755187988, "learning_rate": 0.000331327915118225, "loss": 3.073, "step": 28569 }, { "epoch": 1.4, "grad_norm": 0.5799400210380554, "learning_rate": 0.0003313126030634016, "loss": 3.1073, "step": 28570 }, { "epoch": 1.4, "grad_norm": 0.6147586703300476, "learning_rate": 0.0003312972909261069, "loss": 3.0393, "step": 28571 }, { "epoch": 1.4, "grad_norm": 0.5650718212127686, "learning_rate": 0.0003312819787063813, "loss": 2.536, "step": 28572 }, { "epoch": 1.4, "grad_norm": 0.6030925512313843, "learning_rate": 0.00033126666640426487, "loss": 2.9619, "step": 28573 }, { "epoch": 1.4, "grad_norm": 0.573800802230835, "learning_rate": 0.0003312513540197981, "loss": 3.1022, "step": 28574 }, { "epoch": 1.4, "grad_norm": 0.5658773183822632, "learning_rate": 0.0003312360415530213, "loss": 3.0579, "step": 28575 }, { "epoch": 1.4, "grad_norm": 0.5464009642601013, "learning_rate": 0.00033122072900397466, "loss": 3.1756, "step": 28576 }, { "epoch": 1.4, "grad_norm": 0.5734522342681885, "learning_rate": 0.00033120541637269874, "loss": 3.1855, "step": 28577 }, { "epoch": 1.4, "grad_norm": 0.6184408068656921, "learning_rate": 0.00033119010365923374, "loss": 2.8647, "step": 28578 }, { "epoch": 1.4, "grad_norm": 0.6169735789299011, "learning_rate": 0.00033117479086362, "loss": 3.0088, "step": 28579 }, { "epoch": 1.4, "grad_norm": 0.5645252466201782, "learning_rate": 0.00033115947798589786, "loss": 3.0772, "step": 28580 }, { "epoch": 1.4, "grad_norm": 0.587335467338562, "learning_rate": 0.0003311441650261076, "loss": 3.1952, "step": 28581 }, { "epoch": 1.4, "grad_norm": 0.6208239197731018, "learning_rate": 0.00033112885198428963, "loss": 3.0525, "step": 28582 }, { "epoch": 1.4, "grad_norm": 0.5548790097236633, "learning_rate": 0.00033111353886048435, "loss": 3.0921, "step": 28583 }, { "epoch": 1.4, "grad_norm": 0.5708760619163513, "learning_rate": 0.00033109822565473187, "loss": 3.0512, "step": 28584 }, { "epoch": 1.4, "grad_norm": 0.5541423559188843, "learning_rate": 0.0003310829123670728, "loss": 3.001, "step": 28585 }, { "epoch": 1.4, "grad_norm": 0.6086994409561157, "learning_rate": 0.00033106759899754714, "loss": 2.9911, "step": 28586 }, { "epoch": 1.4, "grad_norm": 0.5725777745246887, "learning_rate": 0.0003310522855461955, "loss": 3.1316, "step": 28587 }, { "epoch": 1.4, "grad_norm": 0.5796346664428711, "learning_rate": 0.0003310369720130581, "loss": 3.2368, "step": 28588 }, { "epoch": 1.4, "grad_norm": 0.5890423655509949, "learning_rate": 0.00033102165839817525, "loss": 3.0038, "step": 28589 }, { "epoch": 1.4, "grad_norm": 0.5724145770072937, "learning_rate": 0.0003310063447015874, "loss": 3.2116, "step": 28590 }, { "epoch": 1.4, "grad_norm": 0.560804009437561, "learning_rate": 0.0003309910309233348, "loss": 2.8441, "step": 28591 }, { "epoch": 1.4, "grad_norm": 0.5579610466957092, "learning_rate": 0.0003309757170634577, "loss": 3.0832, "step": 28592 }, { "epoch": 1.4, "grad_norm": 0.5677865147590637, "learning_rate": 0.0003309604031219966, "loss": 3.1131, "step": 28593 }, { "epoch": 1.4, "grad_norm": 0.5703995227813721, "learning_rate": 0.00033094508909899177, "loss": 3.0792, "step": 28594 }, { "epoch": 1.4, "grad_norm": 0.5485500693321228, "learning_rate": 0.00033092977499448356, "loss": 3.1622, "step": 28595 }, { "epoch": 1.4, "grad_norm": 0.5834635496139526, "learning_rate": 0.0003309144608085121, "loss": 3.1295, "step": 28596 }, { "epoch": 1.4, "grad_norm": 0.5779139995574951, "learning_rate": 0.000330899146541118, "loss": 3.1709, "step": 28597 }, { "epoch": 1.4, "grad_norm": 0.6009939908981323, "learning_rate": 0.00033088383219234154, "loss": 2.9788, "step": 28598 }, { "epoch": 1.4, "grad_norm": 0.6160380840301514, "learning_rate": 0.00033086851776222297, "loss": 2.8536, "step": 28599 }, { "epoch": 1.4, "grad_norm": 0.5951359868049622, "learning_rate": 0.00033085320325080273, "loss": 3.1106, "step": 28600 }, { "epoch": 1.4, "grad_norm": 0.6209423542022705, "learning_rate": 0.000330837888658121, "loss": 3.0604, "step": 28601 }, { "epoch": 1.4, "grad_norm": 0.5782553553581238, "learning_rate": 0.0003308225739842182, "loss": 3.2548, "step": 28602 }, { "epoch": 1.4, "grad_norm": 0.5685989260673523, "learning_rate": 0.00033080725922913476, "loss": 3.0094, "step": 28603 }, { "epoch": 1.4, "grad_norm": 0.5853109955787659, "learning_rate": 0.00033079194439291086, "loss": 3.1369, "step": 28604 }, { "epoch": 1.4, "grad_norm": 0.5716216564178467, "learning_rate": 0.000330776629475587, "loss": 3.156, "step": 28605 }, { "epoch": 1.4, "grad_norm": 0.6147785186767578, "learning_rate": 0.00033076131447720334, "loss": 2.942, "step": 28606 }, { "epoch": 1.4, "grad_norm": 0.5691528916358948, "learning_rate": 0.0003307459993978002, "loss": 3.1663, "step": 28607 }, { "epoch": 1.4, "grad_norm": 0.6025173664093018, "learning_rate": 0.00033073068423741814, "loss": 3.1461, "step": 28608 }, { "epoch": 1.4, "grad_norm": 0.59565669298172, "learning_rate": 0.00033071536899609744, "loss": 2.7866, "step": 28609 }, { "epoch": 1.4, "grad_norm": 0.543670654296875, "learning_rate": 0.00033070005367387824, "loss": 3.0595, "step": 28610 }, { "epoch": 1.4, "grad_norm": 0.6227362751960754, "learning_rate": 0.000330684738270801, "loss": 3.0906, "step": 28611 }, { "epoch": 1.4, "grad_norm": 0.5895864367485046, "learning_rate": 0.00033066942278690614, "loss": 3.08, "step": 28612 }, { "epoch": 1.4, "grad_norm": 0.5541119575500488, "learning_rate": 0.0003306541072222339, "loss": 3.106, "step": 28613 }, { "epoch": 1.4, "grad_norm": 0.5648583769798279, "learning_rate": 0.0003306387915768246, "loss": 3.2744, "step": 28614 }, { "epoch": 1.4, "grad_norm": 0.6211113929748535, "learning_rate": 0.00033062347585071863, "loss": 2.9499, "step": 28615 }, { "epoch": 1.4, "grad_norm": 0.6163092255592346, "learning_rate": 0.00033060816004395647, "loss": 3.1507, "step": 28616 }, { "epoch": 1.4, "grad_norm": 0.6097185015678406, "learning_rate": 0.0003305928441565781, "loss": 3.0956, "step": 28617 }, { "epoch": 1.4, "grad_norm": 0.6374078989028931, "learning_rate": 0.0003305775281886241, "loss": 3.0742, "step": 28618 }, { "epoch": 1.4, "grad_norm": 0.6147667169570923, "learning_rate": 0.0003305622121401348, "loss": 3.029, "step": 28619 }, { "epoch": 1.4, "grad_norm": 0.5697017312049866, "learning_rate": 0.00033054689601115067, "loss": 3.0943, "step": 28620 }, { "epoch": 1.4, "grad_norm": 0.563630998134613, "learning_rate": 0.0003305315798017117, "loss": 2.928, "step": 28621 }, { "epoch": 1.4, "grad_norm": 0.5571799278259277, "learning_rate": 0.00033051626351185843, "loss": 3.0234, "step": 28622 }, { "epoch": 1.4, "grad_norm": 0.5704235434532166, "learning_rate": 0.00033050094714163136, "loss": 3.1792, "step": 28623 }, { "epoch": 1.4, "grad_norm": 0.5932474732398987, "learning_rate": 0.0003304856306910705, "loss": 3.0762, "step": 28624 }, { "epoch": 1.4, "grad_norm": 0.6049175262451172, "learning_rate": 0.00033047031416021637, "loss": 3.0379, "step": 28625 }, { "epoch": 1.4, "grad_norm": 0.5922101736068726, "learning_rate": 0.00033045499754910933, "loss": 3.1122, "step": 28626 }, { "epoch": 1.4, "grad_norm": 0.5743208527565002, "learning_rate": 0.00033043968085778977, "loss": 3.0287, "step": 28627 }, { "epoch": 1.4, "grad_norm": 0.6035799384117126, "learning_rate": 0.00033042436408629785, "loss": 2.9865, "step": 28628 }, { "epoch": 1.4, "grad_norm": 0.5719898343086243, "learning_rate": 0.000330409047234674, "loss": 3.0642, "step": 28629 }, { "epoch": 1.4, "grad_norm": 0.6188897490501404, "learning_rate": 0.00033039373030295863, "loss": 3.123, "step": 28630 }, { "epoch": 1.4, "grad_norm": 0.5590984225273132, "learning_rate": 0.00033037841329119205, "loss": 3.3236, "step": 28631 }, { "epoch": 1.4, "grad_norm": 0.5841843485832214, "learning_rate": 0.0003303630961994145, "loss": 3.2134, "step": 28632 }, { "epoch": 1.4, "grad_norm": 0.5968827605247498, "learning_rate": 0.00033034777902766636, "loss": 2.9945, "step": 28633 }, { "epoch": 1.4, "grad_norm": 0.5639210939407349, "learning_rate": 0.00033033246177598813, "loss": 3.0465, "step": 28634 }, { "epoch": 1.4, "grad_norm": 0.5694261193275452, "learning_rate": 0.00033031714444441997, "loss": 3.1198, "step": 28635 }, { "epoch": 1.4, "grad_norm": 0.5945818424224854, "learning_rate": 0.0003303018270330023, "loss": 3.1175, "step": 28636 }, { "epoch": 1.4, "grad_norm": 0.6064791679382324, "learning_rate": 0.0003302865095417754, "loss": 3.0226, "step": 28637 }, { "epoch": 1.4, "grad_norm": 0.5732207298278809, "learning_rate": 0.0003302711919707798, "loss": 3.1993, "step": 28638 }, { "epoch": 1.4, "grad_norm": 0.604217529296875, "learning_rate": 0.00033025587432005554, "loss": 2.9412, "step": 28639 }, { "epoch": 1.4, "grad_norm": 0.589924156665802, "learning_rate": 0.0003302405565896432, "loss": 3.0485, "step": 28640 }, { "epoch": 1.4, "grad_norm": 0.5883309841156006, "learning_rate": 0.00033022523877958304, "loss": 2.848, "step": 28641 }, { "epoch": 1.4, "grad_norm": 0.641121506690979, "learning_rate": 0.0003302099208899156, "loss": 3.0409, "step": 28642 }, { "epoch": 1.4, "grad_norm": 0.5524410605430603, "learning_rate": 0.00033019460292068074, "loss": 3.0698, "step": 28643 }, { "epoch": 1.4, "grad_norm": 0.5567286610603333, "learning_rate": 0.00033017928487191924, "loss": 3.1788, "step": 28644 }, { "epoch": 1.4, "grad_norm": 0.5724384784698486, "learning_rate": 0.0003301639667436714, "loss": 3.3344, "step": 28645 }, { "epoch": 1.4, "grad_norm": 0.6331698298454285, "learning_rate": 0.00033014864853597736, "loss": 3.0563, "step": 28646 }, { "epoch": 1.4, "grad_norm": 0.6174417734146118, "learning_rate": 0.0003301333302488776, "loss": 3.0488, "step": 28647 }, { "epoch": 1.4, "grad_norm": 0.5746307373046875, "learning_rate": 0.00033011801188241244, "loss": 3.0419, "step": 28648 }, { "epoch": 1.4, "grad_norm": 0.6337699294090271, "learning_rate": 0.0003301026934366223, "loss": 3.0054, "step": 28649 }, { "epoch": 1.4, "grad_norm": 0.5989909768104553, "learning_rate": 0.00033008737491154735, "loss": 3.1279, "step": 28650 }, { "epoch": 1.4, "grad_norm": 0.6146669387817383, "learning_rate": 0.00033007205630722813, "loss": 2.9037, "step": 28651 }, { "epoch": 1.4, "grad_norm": 0.5555586218833923, "learning_rate": 0.00033005673762370486, "loss": 2.9902, "step": 28652 }, { "epoch": 1.4, "grad_norm": 0.5725548267364502, "learning_rate": 0.0003300414188610179, "loss": 3.2456, "step": 28653 }, { "epoch": 1.4, "grad_norm": 0.5860716700553894, "learning_rate": 0.00033002610001920773, "loss": 3.106, "step": 28654 }, { "epoch": 1.4, "grad_norm": 0.5727003812789917, "learning_rate": 0.0003300107810983145, "loss": 3.0971, "step": 28655 }, { "epoch": 1.4, "grad_norm": 0.5849184989929199, "learning_rate": 0.0003299954620983788, "loss": 2.8865, "step": 28656 }, { "epoch": 1.4, "grad_norm": 0.5811796188354492, "learning_rate": 0.0003299801430194407, "loss": 2.9816, "step": 28657 }, { "epoch": 1.4, "grad_norm": 0.5917805433273315, "learning_rate": 0.00032996482386154054, "loss": 3.1353, "step": 28658 }, { "epoch": 1.4, "grad_norm": 0.5500307083129883, "learning_rate": 0.0003299495046247189, "loss": 3.0833, "step": 28659 }, { "epoch": 1.4, "grad_norm": 0.5789609551429749, "learning_rate": 0.0003299341853090162, "loss": 2.9942, "step": 28660 }, { "epoch": 1.4, "grad_norm": 0.539592981338501, "learning_rate": 0.0003299188659144725, "loss": 3.0309, "step": 28661 }, { "epoch": 1.4, "grad_norm": 0.601234495639801, "learning_rate": 0.00032990354644112823, "loss": 2.9512, "step": 28662 }, { "epoch": 1.4, "grad_norm": 0.5849595665931702, "learning_rate": 0.00032988822688902376, "loss": 3.0492, "step": 28663 }, { "epoch": 1.4, "grad_norm": 0.609544038772583, "learning_rate": 0.00032987290725819955, "loss": 3.0828, "step": 28664 }, { "epoch": 1.4, "grad_norm": 0.5832148194313049, "learning_rate": 0.00032985758754869575, "loss": 2.9258, "step": 28665 }, { "epoch": 1.4, "grad_norm": 0.5789518356323242, "learning_rate": 0.00032984226776055283, "loss": 3.1485, "step": 28666 }, { "epoch": 1.4, "grad_norm": 0.5975507497787476, "learning_rate": 0.0003298269478938112, "loss": 3.1122, "step": 28667 }, { "epoch": 1.4, "grad_norm": 0.5649939179420471, "learning_rate": 0.000329811627948511, "loss": 3.0364, "step": 28668 }, { "epoch": 1.4, "grad_norm": 0.6013187170028687, "learning_rate": 0.0003297963079246928, "loss": 3.1851, "step": 28669 }, { "epoch": 1.41, "grad_norm": 0.6145163774490356, "learning_rate": 0.00032978098782239686, "loss": 3.0284, "step": 28670 }, { "epoch": 1.41, "grad_norm": 0.5722653865814209, "learning_rate": 0.00032976566764166356, "loss": 3.088, "step": 28671 }, { "epoch": 1.41, "grad_norm": 0.597767174243927, "learning_rate": 0.0003297503473825331, "loss": 3.2594, "step": 28672 }, { "epoch": 1.41, "grad_norm": 0.5960759520530701, "learning_rate": 0.000329735027045046, "loss": 3.149, "step": 28673 }, { "epoch": 1.41, "grad_norm": 0.629086434841156, "learning_rate": 0.0003297197066292426, "loss": 3.1178, "step": 28674 }, { "epoch": 1.41, "grad_norm": 0.5683742165565491, "learning_rate": 0.00032970438613516323, "loss": 3.1392, "step": 28675 }, { "epoch": 1.41, "grad_norm": 0.5865838527679443, "learning_rate": 0.00032968906556284815, "loss": 3.1683, "step": 28676 }, { "epoch": 1.41, "grad_norm": 0.5845195651054382, "learning_rate": 0.0003296737449123378, "loss": 2.9377, "step": 28677 }, { "epoch": 1.41, "grad_norm": 0.5908820033073425, "learning_rate": 0.0003296584241836726, "loss": 3.0637, "step": 28678 }, { "epoch": 1.41, "grad_norm": 0.5620695352554321, "learning_rate": 0.00032964310337689273, "loss": 3.1156, "step": 28679 }, { "epoch": 1.41, "grad_norm": 0.5610857009887695, "learning_rate": 0.00032962778249203865, "loss": 3.1601, "step": 28680 }, { "epoch": 1.41, "grad_norm": 0.5518953800201416, "learning_rate": 0.0003296124615291507, "loss": 3.0757, "step": 28681 }, { "epoch": 1.41, "grad_norm": 0.5811406373977661, "learning_rate": 0.00032959714048826923, "loss": 3.0635, "step": 28682 }, { "epoch": 1.41, "grad_norm": 0.5600650310516357, "learning_rate": 0.0003295818193694346, "loss": 2.9543, "step": 28683 }, { "epoch": 1.41, "grad_norm": 0.6001834869384766, "learning_rate": 0.00032956649817268707, "loss": 3.0855, "step": 28684 }, { "epoch": 1.41, "grad_norm": 0.5461838841438293, "learning_rate": 0.0003295511768980672, "loss": 3.0432, "step": 28685 }, { "epoch": 1.41, "grad_norm": 0.5799773335456848, "learning_rate": 0.00032953585554561514, "loss": 2.9155, "step": 28686 }, { "epoch": 1.41, "grad_norm": 0.589601993560791, "learning_rate": 0.0003295205341153713, "loss": 3.1836, "step": 28687 }, { "epoch": 1.41, "grad_norm": 0.5646551251411438, "learning_rate": 0.0003295052126073761, "loss": 3.2984, "step": 28688 }, { "epoch": 1.41, "grad_norm": 0.5855870246887207, "learning_rate": 0.00032948989102166983, "loss": 3.1553, "step": 28689 }, { "epoch": 1.41, "grad_norm": 0.5871852040290833, "learning_rate": 0.00032947456935829284, "loss": 3.1598, "step": 28690 }, { "epoch": 1.41, "grad_norm": 0.6054069995880127, "learning_rate": 0.0003294592476172856, "loss": 2.9861, "step": 28691 }, { "epoch": 1.41, "grad_norm": 0.600292980670929, "learning_rate": 0.00032944392579868826, "loss": 3.0694, "step": 28692 }, { "epoch": 1.41, "grad_norm": 0.570586621761322, "learning_rate": 0.0003294286039025414, "loss": 2.9997, "step": 28693 }, { "epoch": 1.41, "grad_norm": 0.5904337167739868, "learning_rate": 0.0003294132819288851, "loss": 2.9074, "step": 28694 }, { "epoch": 1.41, "grad_norm": 0.5638096332550049, "learning_rate": 0.00032939795987776, "loss": 2.9564, "step": 28695 }, { "epoch": 1.41, "grad_norm": 0.5465283393859863, "learning_rate": 0.0003293826377492064, "loss": 3.1155, "step": 28696 }, { "epoch": 1.41, "grad_norm": 0.5867735147476196, "learning_rate": 0.00032936731554326443, "loss": 3.1527, "step": 28697 }, { "epoch": 1.41, "grad_norm": 0.5899142026901245, "learning_rate": 0.0003293519932599747, "loss": 3.0795, "step": 28698 }, { "epoch": 1.41, "grad_norm": 0.5677284002304077, "learning_rate": 0.00032933667089937745, "loss": 2.8966, "step": 28699 }, { "epoch": 1.41, "grad_norm": 0.5423473715782166, "learning_rate": 0.000329321348461513, "loss": 3.2039, "step": 28700 }, { "epoch": 1.41, "grad_norm": 0.6081904768943787, "learning_rate": 0.0003293060259464218, "loss": 3.1396, "step": 28701 }, { "epoch": 1.41, "grad_norm": 0.5386385321617126, "learning_rate": 0.0003292907033541441, "loss": 3.0957, "step": 28702 }, { "epoch": 1.41, "grad_norm": 0.5593324303627014, "learning_rate": 0.0003292753806847205, "loss": 3.0017, "step": 28703 }, { "epoch": 1.41, "grad_norm": 0.5604330897331238, "learning_rate": 0.000329260057938191, "loss": 2.9814, "step": 28704 }, { "epoch": 1.41, "grad_norm": 0.6002910733222961, "learning_rate": 0.0003292447351145962, "loss": 2.9692, "step": 28705 }, { "epoch": 1.41, "grad_norm": 0.5873032808303833, "learning_rate": 0.00032922941221397644, "loss": 3.2579, "step": 28706 }, { "epoch": 1.41, "grad_norm": 0.5412537455558777, "learning_rate": 0.000329214089236372, "loss": 2.7752, "step": 28707 }, { "epoch": 1.41, "grad_norm": 0.58359295129776, "learning_rate": 0.0003291987661818233, "loss": 3.2543, "step": 28708 }, { "epoch": 1.41, "grad_norm": 0.5666883587837219, "learning_rate": 0.00032918344305037057, "loss": 3.2266, "step": 28709 }, { "epoch": 1.41, "grad_norm": 0.5886451005935669, "learning_rate": 0.00032916811984205427, "loss": 3.0358, "step": 28710 }, { "epoch": 1.41, "grad_norm": 0.5730712413787842, "learning_rate": 0.0003291527965569149, "loss": 3.3266, "step": 28711 }, { "epoch": 1.41, "grad_norm": 0.6448611617088318, "learning_rate": 0.00032913747319499265, "loss": 2.9833, "step": 28712 }, { "epoch": 1.41, "grad_norm": 0.5740808844566345, "learning_rate": 0.00032912214975632783, "loss": 3.1631, "step": 28713 }, { "epoch": 1.41, "grad_norm": 0.5787137746810913, "learning_rate": 0.00032910682624096087, "loss": 3.0614, "step": 28714 }, { "epoch": 1.41, "grad_norm": 0.6143020391464233, "learning_rate": 0.0003290915026489322, "loss": 2.9691, "step": 28715 }, { "epoch": 1.41, "grad_norm": 0.5791074633598328, "learning_rate": 0.000329076178980282, "loss": 2.8866, "step": 28716 }, { "epoch": 1.41, "grad_norm": 0.5702418088912964, "learning_rate": 0.0003290608552350508, "loss": 2.997, "step": 28717 }, { "epoch": 1.41, "grad_norm": 0.5896664261817932, "learning_rate": 0.000329045531413279, "loss": 2.9138, "step": 28718 }, { "epoch": 1.41, "grad_norm": 0.6688372492790222, "learning_rate": 0.00032903020751500676, "loss": 3.1648, "step": 28719 }, { "epoch": 1.41, "grad_norm": 0.5435354113578796, "learning_rate": 0.00032901488354027445, "loss": 3.0354, "step": 28720 }, { "epoch": 1.41, "grad_norm": 0.5935372710227966, "learning_rate": 0.0003289995594891227, "loss": 3.0646, "step": 28721 }, { "epoch": 1.41, "grad_norm": 0.5761470198631287, "learning_rate": 0.00032898423536159165, "loss": 3.2373, "step": 28722 }, { "epoch": 1.41, "grad_norm": 0.6226154565811157, "learning_rate": 0.00032896891115772166, "loss": 2.8922, "step": 28723 }, { "epoch": 1.41, "grad_norm": 0.5801342129707336, "learning_rate": 0.0003289535868775531, "loss": 3.0508, "step": 28724 }, { "epoch": 1.41, "grad_norm": 0.5750948190689087, "learning_rate": 0.0003289382625211264, "loss": 3.152, "step": 28725 }, { "epoch": 1.41, "grad_norm": 0.612395167350769, "learning_rate": 0.00032892293808848183, "loss": 2.9434, "step": 28726 }, { "epoch": 1.41, "grad_norm": 0.607111930847168, "learning_rate": 0.0003289076135796599, "loss": 3.1274, "step": 28727 }, { "epoch": 1.41, "grad_norm": 0.5647713541984558, "learning_rate": 0.00032889228899470085, "loss": 3.1467, "step": 28728 }, { "epoch": 1.41, "grad_norm": 0.5546459555625916, "learning_rate": 0.0003288769643336451, "loss": 3.2392, "step": 28729 }, { "epoch": 1.41, "grad_norm": 0.6006819605827332, "learning_rate": 0.00032886163959653286, "loss": 3.1325, "step": 28730 }, { "epoch": 1.41, "grad_norm": 0.593948245048523, "learning_rate": 0.0003288463147834047, "loss": 3.1686, "step": 28731 }, { "epoch": 1.41, "grad_norm": 0.6291419267654419, "learning_rate": 0.0003288309898943009, "loss": 3.0538, "step": 28732 }, { "epoch": 1.41, "grad_norm": 0.5162724256515503, "learning_rate": 0.00032881566492926187, "loss": 3.0556, "step": 28733 }, { "epoch": 1.41, "grad_norm": 0.6125902533531189, "learning_rate": 0.0003288003398883279, "loss": 3.1117, "step": 28734 }, { "epoch": 1.41, "grad_norm": 0.5817069411277771, "learning_rate": 0.0003287850147715393, "loss": 3.0981, "step": 28735 }, { "epoch": 1.41, "grad_norm": 0.5820333361625671, "learning_rate": 0.0003287696895789366, "loss": 3.18, "step": 28736 }, { "epoch": 1.41, "grad_norm": 0.6199052929878235, "learning_rate": 0.00032875436431056, "loss": 2.9355, "step": 28737 }, { "epoch": 1.41, "grad_norm": 0.6050741672515869, "learning_rate": 0.00032873903896645, "loss": 3.0017, "step": 28738 }, { "epoch": 1.41, "grad_norm": 0.5911166667938232, "learning_rate": 0.00032872371354664685, "loss": 2.894, "step": 28739 }, { "epoch": 1.41, "grad_norm": 0.5618557929992676, "learning_rate": 0.000328708388051191, "loss": 2.8401, "step": 28740 }, { "epoch": 1.41, "grad_norm": 0.5915781259536743, "learning_rate": 0.00032869306248012277, "loss": 3.02, "step": 28741 }, { "epoch": 1.41, "grad_norm": 0.5654686689376831, "learning_rate": 0.00032867773683348256, "loss": 2.9968, "step": 28742 }, { "epoch": 1.41, "grad_norm": 0.584214448928833, "learning_rate": 0.0003286624111113107, "loss": 3.1967, "step": 28743 }, { "epoch": 1.41, "grad_norm": 0.5594285726547241, "learning_rate": 0.0003286470853136476, "loss": 3.0001, "step": 28744 }, { "epoch": 1.41, "grad_norm": 0.5778558254241943, "learning_rate": 0.0003286317594405335, "loss": 3.0812, "step": 28745 }, { "epoch": 1.41, "grad_norm": 0.6112017631530762, "learning_rate": 0.0003286164334920089, "loss": 2.981, "step": 28746 }, { "epoch": 1.41, "grad_norm": 0.5561625361442566, "learning_rate": 0.00032860110746811414, "loss": 2.9896, "step": 28747 }, { "epoch": 1.41, "grad_norm": 0.57763671875, "learning_rate": 0.00032858578136888957, "loss": 2.7896, "step": 28748 }, { "epoch": 1.41, "grad_norm": 0.5785109996795654, "learning_rate": 0.00032857045519437555, "loss": 2.9054, "step": 28749 }, { "epoch": 1.41, "grad_norm": 0.5656970143318176, "learning_rate": 0.0003285551289446124, "loss": 3.0454, "step": 28750 }, { "epoch": 1.41, "grad_norm": 0.5791580080986023, "learning_rate": 0.00032853980261964054, "loss": 2.8888, "step": 28751 }, { "epoch": 1.41, "grad_norm": 0.586801290512085, "learning_rate": 0.0003285244762195004, "loss": 2.8627, "step": 28752 }, { "epoch": 1.41, "grad_norm": 0.5646578073501587, "learning_rate": 0.00032850914974423225, "loss": 3.1138, "step": 28753 }, { "epoch": 1.41, "grad_norm": 0.5531555414199829, "learning_rate": 0.00032849382319387647, "loss": 3.193, "step": 28754 }, { "epoch": 1.41, "grad_norm": 0.5732769966125488, "learning_rate": 0.0003284784965684734, "loss": 3.1193, "step": 28755 }, { "epoch": 1.41, "grad_norm": 0.5827401876449585, "learning_rate": 0.00032846316986806355, "loss": 3.1374, "step": 28756 }, { "epoch": 1.41, "grad_norm": 0.5628218650817871, "learning_rate": 0.00032844784309268713, "loss": 3.2275, "step": 28757 }, { "epoch": 1.41, "grad_norm": 0.5630583167076111, "learning_rate": 0.00032843251624238464, "loss": 3.212, "step": 28758 }, { "epoch": 1.41, "grad_norm": 0.5518205165863037, "learning_rate": 0.00032841718931719626, "loss": 2.8222, "step": 28759 }, { "epoch": 1.41, "grad_norm": 0.576407253742218, "learning_rate": 0.00032840186231716253, "loss": 3.1868, "step": 28760 }, { "epoch": 1.41, "grad_norm": 0.5976382493972778, "learning_rate": 0.00032838653524232367, "loss": 3.1501, "step": 28761 }, { "epoch": 1.41, "grad_norm": 0.5755053758621216, "learning_rate": 0.0003283712080927203, "loss": 2.9968, "step": 28762 }, { "epoch": 1.41, "grad_norm": 0.5521357655525208, "learning_rate": 0.00032835588086839256, "loss": 3.211, "step": 28763 }, { "epoch": 1.41, "grad_norm": 0.5395427346229553, "learning_rate": 0.0003283405535693809, "loss": 2.959, "step": 28764 }, { "epoch": 1.41, "grad_norm": 0.5700335502624512, "learning_rate": 0.00032832522619572564, "loss": 3.0619, "step": 28765 }, { "epoch": 1.41, "grad_norm": 0.5535926222801208, "learning_rate": 0.0003283098987474672, "loss": 3.1751, "step": 28766 }, { "epoch": 1.41, "grad_norm": 0.6124323606491089, "learning_rate": 0.00032829457122464595, "loss": 3.1244, "step": 28767 }, { "epoch": 1.41, "grad_norm": 0.6224178671836853, "learning_rate": 0.0003282792436273022, "loss": 2.879, "step": 28768 }, { "epoch": 1.41, "grad_norm": 0.5655115842819214, "learning_rate": 0.0003282639159554765, "loss": 3.1099, "step": 28769 }, { "epoch": 1.41, "grad_norm": 0.6045935153961182, "learning_rate": 0.00032824858820920894, "loss": 2.9087, "step": 28770 }, { "epoch": 1.41, "grad_norm": 0.5513472557067871, "learning_rate": 0.00032823326038854003, "loss": 2.8912, "step": 28771 }, { "epoch": 1.41, "grad_norm": 0.6038599610328674, "learning_rate": 0.0003282179324935102, "loss": 2.9113, "step": 28772 }, { "epoch": 1.41, "grad_norm": 0.5618096590042114, "learning_rate": 0.00032820260452415983, "loss": 3.2247, "step": 28773 }, { "epoch": 1.41, "grad_norm": 0.6248725652694702, "learning_rate": 0.00032818727648052914, "loss": 3.141, "step": 28774 }, { "epoch": 1.41, "grad_norm": 0.5756538510322571, "learning_rate": 0.0003281719483626585, "loss": 3.1785, "step": 28775 }, { "epoch": 1.41, "grad_norm": 0.5904281735420227, "learning_rate": 0.0003281566201705885, "loss": 2.9667, "step": 28776 }, { "epoch": 1.41, "grad_norm": 0.5620636940002441, "learning_rate": 0.00032814129190435936, "loss": 3.1892, "step": 28777 }, { "epoch": 1.41, "grad_norm": 0.5535551905632019, "learning_rate": 0.0003281259635640115, "loss": 3.2308, "step": 28778 }, { "epoch": 1.41, "grad_norm": 0.602520227432251, "learning_rate": 0.0003281106351495852, "loss": 2.8856, "step": 28779 }, { "epoch": 1.41, "grad_norm": 0.5640451908111572, "learning_rate": 0.000328095306661121, "loss": 3.0938, "step": 28780 }, { "epoch": 1.41, "grad_norm": 0.6214268207550049, "learning_rate": 0.000328079978098659, "loss": 2.7807, "step": 28781 }, { "epoch": 1.41, "grad_norm": 0.5931678414344788, "learning_rate": 0.0003280646494622399, "loss": 3.0696, "step": 28782 }, { "epoch": 1.41, "grad_norm": 0.5738914012908936, "learning_rate": 0.00032804932075190384, "loss": 3.1592, "step": 28783 }, { "epoch": 1.41, "grad_norm": 0.5861164331436157, "learning_rate": 0.00032803399196769135, "loss": 3.0072, "step": 28784 }, { "epoch": 1.41, "grad_norm": 0.5898017883300781, "learning_rate": 0.0003280186631096426, "loss": 3.1016, "step": 28785 }, { "epoch": 1.41, "grad_norm": 0.5697299242019653, "learning_rate": 0.00032800333417779806, "loss": 3.2083, "step": 28786 }, { "epoch": 1.41, "grad_norm": 0.5507596135139465, "learning_rate": 0.00032798800517219826, "loss": 2.8548, "step": 28787 }, { "epoch": 1.41, "grad_norm": 0.5449158549308777, "learning_rate": 0.00032797267609288344, "loss": 3.2197, "step": 28788 }, { "epoch": 1.41, "grad_norm": 0.5956053733825684, "learning_rate": 0.0003279573469398939, "loss": 3.011, "step": 28789 }, { "epoch": 1.41, "grad_norm": 0.6707125902175903, "learning_rate": 0.0003279420177132701, "loss": 3.1171, "step": 28790 }, { "epoch": 1.41, "grad_norm": 0.5548729300498962, "learning_rate": 0.00032792668841305245, "loss": 2.96, "step": 28791 }, { "epoch": 1.41, "grad_norm": 0.5525285601615906, "learning_rate": 0.00032791135903928123, "loss": 3.0747, "step": 28792 }, { "epoch": 1.41, "grad_norm": 0.5493025183677673, "learning_rate": 0.0003278960295919968, "loss": 3.1805, "step": 28793 }, { "epoch": 1.41, "grad_norm": 0.5797526836395264, "learning_rate": 0.00032788070007123965, "loss": 2.8174, "step": 28794 }, { "epoch": 1.41, "grad_norm": 0.5741456151008606, "learning_rate": 0.00032786537047705023, "loss": 3.1977, "step": 28795 }, { "epoch": 1.41, "grad_norm": 0.5887008905410767, "learning_rate": 0.0003278500408094686, "loss": 2.9331, "step": 28796 }, { "epoch": 1.41, "grad_norm": 0.6311918497085571, "learning_rate": 0.00032783471106853537, "loss": 2.8529, "step": 28797 }, { "epoch": 1.41, "grad_norm": 0.5639904141426086, "learning_rate": 0.00032781938125429097, "loss": 2.9302, "step": 28798 }, { "epoch": 1.41, "grad_norm": 0.5747509002685547, "learning_rate": 0.0003278040513667756, "loss": 3.0155, "step": 28799 }, { "epoch": 1.41, "grad_norm": 0.5683767795562744, "learning_rate": 0.00032778872140602967, "loss": 3.1515, "step": 28800 }, { "epoch": 1.41, "grad_norm": 0.5533831119537354, "learning_rate": 0.00032777339137209365, "loss": 2.9977, "step": 28801 }, { "epoch": 1.41, "grad_norm": 0.5720163583755493, "learning_rate": 0.0003277580612650078, "loss": 3.1106, "step": 28802 }, { "epoch": 1.41, "grad_norm": 0.605758011341095, "learning_rate": 0.0003277427310848127, "loss": 3.0415, "step": 28803 }, { "epoch": 1.41, "grad_norm": 0.5644210577011108, "learning_rate": 0.00032772740083154844, "loss": 2.9584, "step": 28804 }, { "epoch": 1.41, "grad_norm": 0.603422999382019, "learning_rate": 0.0003277120705052556, "loss": 2.9925, "step": 28805 }, { "epoch": 1.41, "grad_norm": 0.5727534890174866, "learning_rate": 0.00032769674010597454, "loss": 3.1375, "step": 28806 }, { "epoch": 1.41, "grad_norm": 0.5674635171890259, "learning_rate": 0.0003276814096337455, "loss": 3.0833, "step": 28807 }, { "epoch": 1.41, "grad_norm": 0.5521325469017029, "learning_rate": 0.00032766607908860904, "loss": 2.9992, "step": 28808 }, { "epoch": 1.41, "grad_norm": 0.5518462657928467, "learning_rate": 0.0003276507484706054, "loss": 3.1441, "step": 28809 }, { "epoch": 1.41, "grad_norm": 0.5592528581619263, "learning_rate": 0.0003276354177797751, "loss": 3.0381, "step": 28810 }, { "epoch": 1.41, "grad_norm": 0.5564046502113342, "learning_rate": 0.00032762008701615826, "loss": 3.1174, "step": 28811 }, { "epoch": 1.41, "grad_norm": 0.5931382775306702, "learning_rate": 0.0003276047561797955, "loss": 3.051, "step": 28812 }, { "epoch": 1.41, "grad_norm": 0.5513250231742859, "learning_rate": 0.0003275894252707272, "loss": 2.845, "step": 28813 }, { "epoch": 1.41, "grad_norm": 0.5776057839393616, "learning_rate": 0.0003275740942889936, "loss": 3.111, "step": 28814 }, { "epoch": 1.41, "grad_norm": 0.5935764908790588, "learning_rate": 0.00032755876323463514, "loss": 3.1766, "step": 28815 }, { "epoch": 1.41, "grad_norm": 0.6411653757095337, "learning_rate": 0.00032754343210769226, "loss": 3.037, "step": 28816 }, { "epoch": 1.41, "grad_norm": 0.5873118042945862, "learning_rate": 0.0003275281009082052, "loss": 3.1031, "step": 28817 }, { "epoch": 1.41, "grad_norm": 0.57716965675354, "learning_rate": 0.0003275127696362144, "loss": 3.1625, "step": 28818 }, { "epoch": 1.41, "grad_norm": 0.5541166067123413, "learning_rate": 0.00032749743829176036, "loss": 2.9424, "step": 28819 }, { "epoch": 1.41, "grad_norm": 0.5837197303771973, "learning_rate": 0.0003274821068748834, "loss": 3.1427, "step": 28820 }, { "epoch": 1.41, "grad_norm": 0.6180403828620911, "learning_rate": 0.00032746677538562376, "loss": 3.0543, "step": 28821 }, { "epoch": 1.41, "grad_norm": 0.5609276294708252, "learning_rate": 0.00032745144382402184, "loss": 3.2852, "step": 28822 }, { "epoch": 1.41, "grad_norm": 0.5260721445083618, "learning_rate": 0.00032743611219011816, "loss": 3.1377, "step": 28823 }, { "epoch": 1.41, "grad_norm": 0.568089485168457, "learning_rate": 0.00032742078048395316, "loss": 3.2754, "step": 28824 }, { "epoch": 1.41, "grad_norm": 0.5611562728881836, "learning_rate": 0.000327405448705567, "loss": 2.9644, "step": 28825 }, { "epoch": 1.41, "grad_norm": 0.5755119919776917, "learning_rate": 0.00032739011685500017, "loss": 3.0372, "step": 28826 }, { "epoch": 1.41, "grad_norm": 0.579472541809082, "learning_rate": 0.00032737478493229306, "loss": 3.2508, "step": 28827 }, { "epoch": 1.41, "grad_norm": 0.5705549716949463, "learning_rate": 0.00032735945293748607, "loss": 3.0324, "step": 28828 }, { "epoch": 1.41, "grad_norm": 0.582402229309082, "learning_rate": 0.0003273441208706194, "loss": 2.9384, "step": 28829 }, { "epoch": 1.41, "grad_norm": 0.5975726842880249, "learning_rate": 0.00032732878873173373, "loss": 2.7791, "step": 28830 }, { "epoch": 1.41, "grad_norm": 0.546567440032959, "learning_rate": 0.0003273134565208693, "loss": 3.1615, "step": 28831 }, { "epoch": 1.41, "grad_norm": 0.5662059783935547, "learning_rate": 0.0003272981242380664, "loss": 3.037, "step": 28832 }, { "epoch": 1.41, "grad_norm": 0.567099392414093, "learning_rate": 0.00032728279188336544, "loss": 3.0003, "step": 28833 }, { "epoch": 1.41, "grad_norm": 0.584934651851654, "learning_rate": 0.0003272674594568069, "loss": 2.908, "step": 28834 }, { "epoch": 1.41, "grad_norm": 0.5936591029167175, "learning_rate": 0.0003272521269584313, "loss": 3.0655, "step": 28835 }, { "epoch": 1.41, "grad_norm": 0.5734385251998901, "learning_rate": 0.0003272367943882787, "loss": 2.9705, "step": 28836 }, { "epoch": 1.41, "grad_norm": 0.5487362742424011, "learning_rate": 0.0003272214617463896, "loss": 3.0344, "step": 28837 }, { "epoch": 1.41, "grad_norm": 0.5989526510238647, "learning_rate": 0.0003272061290328044, "loss": 3.11, "step": 28838 }, { "epoch": 1.41, "grad_norm": 0.5572174191474915, "learning_rate": 0.00032719079624756353, "loss": 2.9015, "step": 28839 }, { "epoch": 1.41, "grad_norm": 0.5623894929885864, "learning_rate": 0.00032717546339070734, "loss": 2.982, "step": 28840 }, { "epoch": 1.41, "grad_norm": 0.6148263216018677, "learning_rate": 0.0003271601304622762, "loss": 2.9311, "step": 28841 }, { "epoch": 1.41, "grad_norm": 0.5677447319030762, "learning_rate": 0.0003271447974623105, "loss": 3.0023, "step": 28842 }, { "epoch": 1.41, "grad_norm": 0.5840166807174683, "learning_rate": 0.0003271294643908506, "loss": 3.0108, "step": 28843 }, { "epoch": 1.41, "grad_norm": 0.5517377853393555, "learning_rate": 0.00032711413124793693, "loss": 3.1285, "step": 28844 }, { "epoch": 1.41, "grad_norm": 0.5746965408325195, "learning_rate": 0.0003270987980336099, "loss": 3.0579, "step": 28845 }, { "epoch": 1.41, "grad_norm": 0.5635104775428772, "learning_rate": 0.00032708346474790985, "loss": 2.8986, "step": 28846 }, { "epoch": 1.41, "grad_norm": 0.5739995241165161, "learning_rate": 0.00032706813139087715, "loss": 3.0193, "step": 28847 }, { "epoch": 1.41, "grad_norm": 0.5986478328704834, "learning_rate": 0.0003270527979625521, "loss": 3.2475, "step": 28848 }, { "epoch": 1.41, "grad_norm": 0.5273615121841431, "learning_rate": 0.0003270374644629753, "loss": 3.2175, "step": 28849 }, { "epoch": 1.41, "grad_norm": 0.5814329981803894, "learning_rate": 0.000327022130892187, "loss": 2.9465, "step": 28850 }, { "epoch": 1.41, "grad_norm": 0.5327739715576172, "learning_rate": 0.0003270067972502276, "loss": 3.0869, "step": 28851 }, { "epoch": 1.41, "grad_norm": 0.5583226680755615, "learning_rate": 0.0003269914635371375, "loss": 3.0564, "step": 28852 }, { "epoch": 1.41, "grad_norm": 0.5671470761299133, "learning_rate": 0.00032697612975295706, "loss": 3.3398, "step": 28853 }, { "epoch": 1.41, "grad_norm": 0.6003071665763855, "learning_rate": 0.00032696079589772674, "loss": 2.9829, "step": 28854 }, { "epoch": 1.41, "grad_norm": 0.5671707391738892, "learning_rate": 0.00032694546197148676, "loss": 3.1985, "step": 28855 }, { "epoch": 1.41, "grad_norm": 0.5648956298828125, "learning_rate": 0.00032693012797427773, "loss": 3.1373, "step": 28856 }, { "epoch": 1.41, "grad_norm": 0.5903165936470032, "learning_rate": 0.0003269147939061399, "loss": 2.9278, "step": 28857 }, { "epoch": 1.41, "grad_norm": 0.5882886648178101, "learning_rate": 0.00032689945976711366, "loss": 3.102, "step": 28858 }, { "epoch": 1.41, "grad_norm": 0.6090853810310364, "learning_rate": 0.00032688412555723946, "loss": 3.0155, "step": 28859 }, { "epoch": 1.41, "grad_norm": 0.5981293320655823, "learning_rate": 0.0003268687912765576, "loss": 2.9891, "step": 28860 }, { "epoch": 1.41, "grad_norm": 0.6331175565719604, "learning_rate": 0.0003268534569251086, "loss": 2.9693, "step": 28861 }, { "epoch": 1.41, "grad_norm": 0.5615060925483704, "learning_rate": 0.0003268381225029327, "loss": 3.0143, "step": 28862 }, { "epoch": 1.41, "grad_norm": 0.5723454356193542, "learning_rate": 0.0003268227880100703, "loss": 3.0968, "step": 28863 }, { "epoch": 1.41, "grad_norm": 0.5850404500961304, "learning_rate": 0.0003268074534465619, "loss": 2.9922, "step": 28864 }, { "epoch": 1.41, "grad_norm": 0.5858272910118103, "learning_rate": 0.0003267921188124478, "loss": 3.0211, "step": 28865 }, { "epoch": 1.41, "grad_norm": 0.5558182597160339, "learning_rate": 0.0003267767841077684, "loss": 2.8762, "step": 28866 }, { "epoch": 1.41, "grad_norm": 0.570468544960022, "learning_rate": 0.0003267614493325641, "loss": 3.2, "step": 28867 }, { "epoch": 1.41, "grad_norm": 0.5738118886947632, "learning_rate": 0.0003267461144868753, "loss": 2.9635, "step": 28868 }, { "epoch": 1.41, "grad_norm": 0.8501331806182861, "learning_rate": 0.00032673077957074244, "loss": 3.1468, "step": 28869 }, { "epoch": 1.41, "grad_norm": 0.582685112953186, "learning_rate": 0.00032671544458420587, "loss": 2.9663, "step": 28870 }, { "epoch": 1.41, "grad_norm": 0.5798792839050293, "learning_rate": 0.0003267001095273059, "loss": 3.1031, "step": 28871 }, { "epoch": 1.41, "grad_norm": 0.9409517645835876, "learning_rate": 0.000326684774400083, "loss": 3.17, "step": 28872 }, { "epoch": 1.41, "grad_norm": 0.5941800475120544, "learning_rate": 0.00032666943920257754, "loss": 3.1101, "step": 28873 }, { "epoch": 1.42, "grad_norm": 0.5913896560668945, "learning_rate": 0.0003266541039348298, "loss": 3.1029, "step": 28874 }, { "epoch": 1.42, "grad_norm": 0.5694225430488586, "learning_rate": 0.00032663876859688045, "loss": 3.1596, "step": 28875 }, { "epoch": 1.42, "grad_norm": 0.600014865398407, "learning_rate": 0.00032662343318876964, "loss": 3.0626, "step": 28876 }, { "epoch": 1.42, "grad_norm": 0.5729812383651733, "learning_rate": 0.00032660809771053784, "loss": 2.9556, "step": 28877 }, { "epoch": 1.42, "grad_norm": 0.5758359432220459, "learning_rate": 0.0003265927621622254, "loss": 3.1092, "step": 28878 }, { "epoch": 1.42, "grad_norm": 0.5663775205612183, "learning_rate": 0.0003265774265438727, "loss": 3.0048, "step": 28879 }, { "epoch": 1.42, "grad_norm": 0.5517311692237854, "learning_rate": 0.0003265620908555203, "loss": 2.9829, "step": 28880 }, { "epoch": 1.42, "grad_norm": 0.550212025642395, "learning_rate": 0.0003265467550972084, "loss": 3.1944, "step": 28881 }, { "epoch": 1.42, "grad_norm": 0.5680566430091858, "learning_rate": 0.00032653141926897747, "loss": 3.0897, "step": 28882 }, { "epoch": 1.42, "grad_norm": 0.5950158834457397, "learning_rate": 0.0003265160833708679, "loss": 3.0992, "step": 28883 }, { "epoch": 1.42, "grad_norm": 1.2692245244979858, "learning_rate": 0.00032650074740291996, "loss": 3.2001, "step": 28884 }, { "epoch": 1.42, "grad_norm": 0.560718297958374, "learning_rate": 0.0003264854113651742, "loss": 3.074, "step": 28885 }, { "epoch": 1.42, "grad_norm": 0.6030946373939514, "learning_rate": 0.00032647007525767106, "loss": 3.1256, "step": 28886 }, { "epoch": 1.42, "grad_norm": 0.580146312713623, "learning_rate": 0.0003264547390804508, "loss": 3.1419, "step": 28887 }, { "epoch": 1.42, "grad_norm": 0.6022080779075623, "learning_rate": 0.0003264394028335538, "loss": 3.111, "step": 28888 }, { "epoch": 1.42, "grad_norm": 0.6084839105606079, "learning_rate": 0.00032642406651702053, "loss": 3.088, "step": 28889 }, { "epoch": 1.42, "grad_norm": 0.767863392829895, "learning_rate": 0.00032640873013089133, "loss": 3.033, "step": 28890 }, { "epoch": 1.42, "grad_norm": 0.5766077041625977, "learning_rate": 0.00032639339367520665, "loss": 2.9984, "step": 28891 }, { "epoch": 1.42, "grad_norm": 0.5633019804954529, "learning_rate": 0.00032637805715000684, "loss": 3.1288, "step": 28892 }, { "epoch": 1.42, "grad_norm": 0.5685615539550781, "learning_rate": 0.0003263627205553323, "loss": 3.0734, "step": 28893 }, { "epoch": 1.42, "grad_norm": 0.6013651490211487, "learning_rate": 0.0003263473838912234, "loss": 2.937, "step": 28894 }, { "epoch": 1.42, "grad_norm": 0.6018322706222534, "learning_rate": 0.0003263320471577205, "loss": 3.0378, "step": 28895 }, { "epoch": 1.42, "grad_norm": 0.6196945905685425, "learning_rate": 0.00032631671035486423, "loss": 3.0361, "step": 28896 }, { "epoch": 1.42, "grad_norm": 0.5838954448699951, "learning_rate": 0.0003263013734826948, "loss": 3.1028, "step": 28897 }, { "epoch": 1.42, "grad_norm": 0.5709912776947021, "learning_rate": 0.00032628603654125246, "loss": 3.1711, "step": 28898 }, { "epoch": 1.42, "grad_norm": 0.5789138674736023, "learning_rate": 0.0003262706995305778, "loss": 3.2224, "step": 28899 }, { "epoch": 1.42, "grad_norm": 0.6325254440307617, "learning_rate": 0.0003262553624507113, "loss": 3.0815, "step": 28900 }, { "epoch": 1.42, "grad_norm": 0.5830736756324768, "learning_rate": 0.0003262400253016931, "loss": 3.0138, "step": 28901 }, { "epoch": 1.42, "grad_norm": 0.6032917499542236, "learning_rate": 0.00032622468808356377, "loss": 3.0851, "step": 28902 }, { "epoch": 1.42, "grad_norm": 0.6263208985328674, "learning_rate": 0.00032620935079636363, "loss": 3.0902, "step": 28903 }, { "epoch": 1.42, "grad_norm": 0.6103036403656006, "learning_rate": 0.0003261940134401331, "loss": 2.7757, "step": 28904 }, { "epoch": 1.42, "grad_norm": 0.5601415038108826, "learning_rate": 0.00032617867601491266, "loss": 3.0745, "step": 28905 }, { "epoch": 1.42, "grad_norm": 0.5642362833023071, "learning_rate": 0.00032616333852074255, "loss": 3.1547, "step": 28906 }, { "epoch": 1.42, "grad_norm": 0.6236977577209473, "learning_rate": 0.0003261480009576632, "loss": 2.9319, "step": 28907 }, { "epoch": 1.42, "grad_norm": 0.6301894187927246, "learning_rate": 0.0003261326633257152, "loss": 3.04, "step": 28908 }, { "epoch": 1.42, "grad_norm": 0.5724765658378601, "learning_rate": 0.00032611732562493864, "loss": 3.1385, "step": 28909 }, { "epoch": 1.42, "grad_norm": 0.5792955756187439, "learning_rate": 0.00032610198785537414, "loss": 3.0763, "step": 28910 }, { "epoch": 1.42, "grad_norm": 0.5876222848892212, "learning_rate": 0.0003260866500170621, "loss": 2.9057, "step": 28911 }, { "epoch": 1.42, "grad_norm": 0.5676042437553406, "learning_rate": 0.0003260713121100427, "loss": 3.0592, "step": 28912 }, { "epoch": 1.42, "grad_norm": 0.6063807010650635, "learning_rate": 0.0003260559741343566, "loss": 3.0202, "step": 28913 }, { "epoch": 1.42, "grad_norm": 0.5811011791229248, "learning_rate": 0.000326040636090044, "loss": 3.1869, "step": 28914 }, { "epoch": 1.42, "grad_norm": 0.5812034606933594, "learning_rate": 0.0003260252979771454, "loss": 2.9437, "step": 28915 }, { "epoch": 1.42, "grad_norm": 0.57767254114151, "learning_rate": 0.00032600995979570113, "loss": 3.1279, "step": 28916 }, { "epoch": 1.42, "grad_norm": 0.5982522368431091, "learning_rate": 0.00032599462154575174, "loss": 3.0572, "step": 28917 }, { "epoch": 1.42, "grad_norm": 0.6943778395652771, "learning_rate": 0.0003259792832273374, "loss": 3.1348, "step": 28918 }, { "epoch": 1.42, "grad_norm": 0.5726862549781799, "learning_rate": 0.0003259639448404988, "loss": 3.3887, "step": 28919 }, { "epoch": 1.42, "grad_norm": 0.5609963536262512, "learning_rate": 0.000325948606385276, "loss": 3.0386, "step": 28920 }, { "epoch": 1.42, "grad_norm": 0.573071300983429, "learning_rate": 0.0003259332678617095, "loss": 3.2499, "step": 28921 }, { "epoch": 1.42, "grad_norm": 0.5977573990821838, "learning_rate": 0.00032591792926983995, "loss": 3.0938, "step": 28922 }, { "epoch": 1.42, "grad_norm": 0.5285585522651672, "learning_rate": 0.00032590259060970755, "loss": 3.3141, "step": 28923 }, { "epoch": 1.42, "grad_norm": 0.6297756433486938, "learning_rate": 0.0003258872518813526, "loss": 3.0633, "step": 28924 }, { "epoch": 1.42, "grad_norm": 0.5830036401748657, "learning_rate": 0.00032587191308481564, "loss": 3.0276, "step": 28925 }, { "epoch": 1.42, "grad_norm": 0.6116616725921631, "learning_rate": 0.0003258565742201371, "loss": 2.7825, "step": 28926 }, { "epoch": 1.42, "grad_norm": 0.5653083920478821, "learning_rate": 0.00032584123528735725, "loss": 3.0379, "step": 28927 }, { "epoch": 1.42, "grad_norm": 0.6250095963478088, "learning_rate": 0.00032582589628651665, "loss": 3.0878, "step": 28928 }, { "epoch": 1.42, "grad_norm": 0.5555509924888611, "learning_rate": 0.00032581055721765553, "loss": 2.9807, "step": 28929 }, { "epoch": 1.42, "grad_norm": 0.5599590539932251, "learning_rate": 0.00032579521808081435, "loss": 2.8723, "step": 28930 }, { "epoch": 1.42, "grad_norm": 0.589675784111023, "learning_rate": 0.0003257798788760336, "loss": 3.0035, "step": 28931 }, { "epoch": 1.42, "grad_norm": 0.5757635831832886, "learning_rate": 0.00032576453960335357, "loss": 3.2037, "step": 28932 }, { "epoch": 1.42, "grad_norm": 0.5922831892967224, "learning_rate": 0.0003257492002628148, "loss": 3.1709, "step": 28933 }, { "epoch": 1.42, "grad_norm": 0.5696724653244019, "learning_rate": 0.00032573386085445747, "loss": 3.0498, "step": 28934 }, { "epoch": 1.42, "grad_norm": 0.6184026002883911, "learning_rate": 0.0003257185213783221, "loss": 2.9888, "step": 28935 }, { "epoch": 1.42, "grad_norm": 0.552787184715271, "learning_rate": 0.00032570318183444913, "loss": 3.1177, "step": 28936 }, { "epoch": 1.42, "grad_norm": 0.6209615468978882, "learning_rate": 0.00032568784222287896, "loss": 3.0414, "step": 28937 }, { "epoch": 1.42, "grad_norm": 0.5979040265083313, "learning_rate": 0.0003256725025436519, "loss": 2.9614, "step": 28938 }, { "epoch": 1.42, "grad_norm": 0.6105141043663025, "learning_rate": 0.0003256571627968084, "loss": 2.9355, "step": 28939 }, { "epoch": 1.42, "grad_norm": 0.564461350440979, "learning_rate": 0.000325641822982389, "loss": 2.9066, "step": 28940 }, { "epoch": 1.42, "grad_norm": 0.5573233962059021, "learning_rate": 0.0003256264831004338, "loss": 2.9489, "step": 28941 }, { "epoch": 1.42, "grad_norm": 0.5542727112770081, "learning_rate": 0.0003256111431509835, "loss": 2.9582, "step": 28942 }, { "epoch": 1.42, "grad_norm": 0.5618546605110168, "learning_rate": 0.0003255958031340783, "loss": 3.084, "step": 28943 }, { "epoch": 1.42, "grad_norm": 0.5816484093666077, "learning_rate": 0.00032558046304975876, "loss": 2.9806, "step": 28944 }, { "epoch": 1.42, "grad_norm": 0.6100296974182129, "learning_rate": 0.0003255651228980651, "loss": 3.202, "step": 28945 }, { "epoch": 1.42, "grad_norm": 0.5640974640846252, "learning_rate": 0.00032554978267903786, "loss": 3.1282, "step": 28946 }, { "epoch": 1.42, "grad_norm": 0.5738323330879211, "learning_rate": 0.00032553444239271745, "loss": 3.0857, "step": 28947 }, { "epoch": 1.42, "grad_norm": 0.6203998923301697, "learning_rate": 0.0003255191020391443, "loss": 3.0114, "step": 28948 }, { "epoch": 1.42, "grad_norm": 0.570510983467102, "learning_rate": 0.00032550376161835865, "loss": 3.0182, "step": 28949 }, { "epoch": 1.42, "grad_norm": 0.5366271138191223, "learning_rate": 0.0003254884211304009, "loss": 2.9167, "step": 28950 }, { "epoch": 1.42, "grad_norm": 0.5703023672103882, "learning_rate": 0.00032547308057531173, "loss": 3.073, "step": 28951 }, { "epoch": 1.42, "grad_norm": 0.57989501953125, "learning_rate": 0.0003254577399531313, "loss": 2.995, "step": 28952 }, { "epoch": 1.42, "grad_norm": 0.5939070582389832, "learning_rate": 0.0003254423992639001, "loss": 3.1934, "step": 28953 }, { "epoch": 1.42, "grad_norm": 0.5694959759712219, "learning_rate": 0.00032542705850765845, "loss": 3.1665, "step": 28954 }, { "epoch": 1.42, "grad_norm": 0.5695184469223022, "learning_rate": 0.000325411717684447, "loss": 2.9085, "step": 28955 }, { "epoch": 1.42, "grad_norm": 0.6038377285003662, "learning_rate": 0.00032539637679430576, "loss": 2.978, "step": 28956 }, { "epoch": 1.42, "grad_norm": 0.5637387037277222, "learning_rate": 0.00032538103583727544, "loss": 3.1269, "step": 28957 }, { "epoch": 1.42, "grad_norm": 0.6748733520507812, "learning_rate": 0.00032536569481339637, "loss": 3.1292, "step": 28958 }, { "epoch": 1.42, "grad_norm": 0.5864824652671814, "learning_rate": 0.00032535035372270897, "loss": 3.1735, "step": 28959 }, { "epoch": 1.42, "grad_norm": 0.6003410220146179, "learning_rate": 0.0003253350125652536, "loss": 3.1991, "step": 28960 }, { "epoch": 1.42, "grad_norm": 0.5306596159934998, "learning_rate": 0.0003253196713410706, "loss": 2.9204, "step": 28961 }, { "epoch": 1.42, "grad_norm": 0.5750020742416382, "learning_rate": 0.0003253043300502006, "loss": 3.019, "step": 28962 }, { "epoch": 1.42, "grad_norm": 0.5885328054428101, "learning_rate": 0.0003252889886926838, "loss": 2.971, "step": 28963 }, { "epoch": 1.42, "grad_norm": 0.6015452146530151, "learning_rate": 0.0003252736472685606, "loss": 3.061, "step": 28964 }, { "epoch": 1.42, "grad_norm": 0.5836628675460815, "learning_rate": 0.00032525830577787154, "loss": 3.0505, "step": 28965 }, { "epoch": 1.42, "grad_norm": 0.5610659718513489, "learning_rate": 0.00032524296422065693, "loss": 3.1, "step": 28966 }, { "epoch": 1.42, "grad_norm": 0.5679178237915039, "learning_rate": 0.0003252276225969572, "loss": 3.0292, "step": 28967 }, { "epoch": 1.42, "grad_norm": 0.541135311126709, "learning_rate": 0.0003252122809068128, "loss": 2.9467, "step": 28968 }, { "epoch": 1.42, "grad_norm": 0.6564010977745056, "learning_rate": 0.0003251969391502641, "loss": 2.9938, "step": 28969 }, { "epoch": 1.42, "grad_norm": 0.5690862536430359, "learning_rate": 0.0003251815973273516, "loss": 3.131, "step": 28970 }, { "epoch": 1.42, "grad_norm": 0.56672203540802, "learning_rate": 0.0003251662554381154, "loss": 3.0208, "step": 28971 }, { "epoch": 1.42, "grad_norm": 0.6128435730934143, "learning_rate": 0.0003251509134825963, "loss": 2.9169, "step": 28972 }, { "epoch": 1.42, "grad_norm": 0.5664635896682739, "learning_rate": 0.00032513557146083454, "loss": 3.0566, "step": 28973 }, { "epoch": 1.42, "grad_norm": 0.5436082482337952, "learning_rate": 0.0003251202293728705, "loss": 3.0333, "step": 28974 }, { "epoch": 1.42, "grad_norm": 0.6189156770706177, "learning_rate": 0.0003251048872187446, "loss": 2.9735, "step": 28975 }, { "epoch": 1.42, "grad_norm": 0.5798361897468567, "learning_rate": 0.0003250895449984972, "loss": 3.1293, "step": 28976 }, { "epoch": 1.42, "grad_norm": 0.584924042224884, "learning_rate": 0.00032507420271216884, "loss": 2.9706, "step": 28977 }, { "epoch": 1.42, "grad_norm": 0.597545862197876, "learning_rate": 0.0003250588603597998, "loss": 3.2138, "step": 28978 }, { "epoch": 1.42, "grad_norm": 0.5903199315071106, "learning_rate": 0.0003250435179414306, "loss": 3.1368, "step": 28979 }, { "epoch": 1.42, "grad_norm": 0.6203091144561768, "learning_rate": 0.0003250281754571016, "loss": 3.1918, "step": 28980 }, { "epoch": 1.42, "grad_norm": 0.6048563718795776, "learning_rate": 0.0003250128329068531, "loss": 3.1939, "step": 28981 }, { "epoch": 1.42, "grad_norm": 0.5847277641296387, "learning_rate": 0.0003249974902907257, "loss": 3.2304, "step": 28982 }, { "epoch": 1.42, "grad_norm": 0.6675156354904175, "learning_rate": 0.0003249821476087597, "loss": 2.9924, "step": 28983 }, { "epoch": 1.42, "grad_norm": 0.5942085385322571, "learning_rate": 0.00032496680486099566, "loss": 2.9132, "step": 28984 }, { "epoch": 1.42, "grad_norm": 0.6033164858818054, "learning_rate": 0.0003249514620474737, "loss": 3.0188, "step": 28985 }, { "epoch": 1.42, "grad_norm": 0.5678269863128662, "learning_rate": 0.00032493611916823437, "loss": 3.0577, "step": 28986 }, { "epoch": 1.42, "grad_norm": 0.5534139275550842, "learning_rate": 0.0003249207762233181, "loss": 2.9875, "step": 28987 }, { "epoch": 1.42, "grad_norm": 0.5515173673629761, "learning_rate": 0.00032490543321276547, "loss": 3.0418, "step": 28988 }, { "epoch": 1.42, "grad_norm": 0.585387110710144, "learning_rate": 0.0003248900901366166, "loss": 3.2985, "step": 28989 }, { "epoch": 1.42, "grad_norm": 0.5718480944633484, "learning_rate": 0.00032487474699491206, "loss": 2.9427, "step": 28990 }, { "epoch": 1.42, "grad_norm": 0.5717833638191223, "learning_rate": 0.0003248594037876922, "loss": 2.9248, "step": 28991 }, { "epoch": 1.42, "grad_norm": 0.5640171766281128, "learning_rate": 0.0003248440605149974, "loss": 3.0837, "step": 28992 }, { "epoch": 1.42, "grad_norm": 0.5515601634979248, "learning_rate": 0.00032482871717686817, "loss": 3.1191, "step": 28993 }, { "epoch": 1.42, "grad_norm": 0.5653744339942932, "learning_rate": 0.00032481337377334487, "loss": 3.1291, "step": 28994 }, { "epoch": 1.42, "grad_norm": 0.5796131491661072, "learning_rate": 0.00032479803030446803, "loss": 3.0333, "step": 28995 }, { "epoch": 1.42, "grad_norm": 0.5645726919174194, "learning_rate": 0.0003247826867702778, "loss": 3.1165, "step": 28996 }, { "epoch": 1.42, "grad_norm": 0.5933886170387268, "learning_rate": 0.0003247673431708148, "loss": 3.0208, "step": 28997 }, { "epoch": 1.42, "grad_norm": 0.5697206258773804, "learning_rate": 0.00032475199950611933, "loss": 2.9842, "step": 28998 }, { "epoch": 1.42, "grad_norm": 0.5712945461273193, "learning_rate": 0.000324736655776232, "loss": 3.1261, "step": 28999 }, { "epoch": 1.42, "grad_norm": 0.689601480960846, "learning_rate": 0.000324721311981193, "loss": 3.2871, "step": 29000 }, { "epoch": 1.42, "grad_norm": 0.5841153264045715, "learning_rate": 0.00032470596812104275, "loss": 3.1395, "step": 29001 }, { "epoch": 1.42, "grad_norm": 0.6104511618614197, "learning_rate": 0.0003246906241958218, "loss": 3.1197, "step": 29002 }, { "epoch": 1.42, "grad_norm": 0.575042724609375, "learning_rate": 0.0003246752802055705, "loss": 2.9056, "step": 29003 }, { "epoch": 1.42, "grad_norm": 0.6196618676185608, "learning_rate": 0.0003246599361503292, "loss": 2.9237, "step": 29004 }, { "epoch": 1.42, "grad_norm": 0.607953667640686, "learning_rate": 0.00032464459203013847, "loss": 2.9469, "step": 29005 }, { "epoch": 1.42, "grad_norm": 0.574377179145813, "learning_rate": 0.00032462924784503865, "loss": 2.9129, "step": 29006 }, { "epoch": 1.42, "grad_norm": 0.5855792760848999, "learning_rate": 0.00032461390359507, "loss": 3.2642, "step": 29007 }, { "epoch": 1.42, "grad_norm": 0.5718584656715393, "learning_rate": 0.00032459855928027313, "loss": 2.9375, "step": 29008 }, { "epoch": 1.42, "grad_norm": 0.6099596619606018, "learning_rate": 0.00032458321490068837, "loss": 3.0389, "step": 29009 }, { "epoch": 1.42, "grad_norm": 0.5883126854896545, "learning_rate": 0.00032456787045635624, "loss": 2.9312, "step": 29010 }, { "epoch": 1.42, "grad_norm": 0.5781348943710327, "learning_rate": 0.00032455252594731704, "loss": 2.8841, "step": 29011 }, { "epoch": 1.42, "grad_norm": 0.5610764026641846, "learning_rate": 0.00032453718137361113, "loss": 2.9505, "step": 29012 }, { "epoch": 1.42, "grad_norm": 0.5973708033561707, "learning_rate": 0.0003245218367352791, "loss": 2.8758, "step": 29013 }, { "epoch": 1.42, "grad_norm": 0.6375737190246582, "learning_rate": 0.00032450649203236116, "loss": 2.9811, "step": 29014 }, { "epoch": 1.42, "grad_norm": 0.6129302382469177, "learning_rate": 0.00032449114726489786, "loss": 3.2167, "step": 29015 }, { "epoch": 1.42, "grad_norm": 0.5988703966140747, "learning_rate": 0.0003244758024329297, "loss": 3.0494, "step": 29016 }, { "epoch": 1.42, "grad_norm": 0.6101521253585815, "learning_rate": 0.0003244604575364969, "loss": 3.0984, "step": 29017 }, { "epoch": 1.42, "grad_norm": 0.5767313838005066, "learning_rate": 0.00032444511257564, "loss": 3.0744, "step": 29018 }, { "epoch": 1.42, "grad_norm": 0.6167458891868591, "learning_rate": 0.0003244297675503993, "loss": 3.1639, "step": 29019 }, { "epoch": 1.42, "grad_norm": 0.5684388875961304, "learning_rate": 0.0003244144224608154, "loss": 2.9517, "step": 29020 }, { "epoch": 1.42, "grad_norm": 0.5900117754936218, "learning_rate": 0.0003243990773069286, "loss": 3.0036, "step": 29021 }, { "epoch": 1.42, "grad_norm": 0.72846919298172, "learning_rate": 0.0003243837320887792, "loss": 3.1484, "step": 29022 }, { "epoch": 1.42, "grad_norm": 0.5999704003334045, "learning_rate": 0.0003243683868064078, "loss": 3.0344, "step": 29023 }, { "epoch": 1.42, "grad_norm": 0.5632615089416504, "learning_rate": 0.0003243530414598549, "loss": 3.179, "step": 29024 }, { "epoch": 1.42, "grad_norm": 0.645645022392273, "learning_rate": 0.00032433769604916065, "loss": 3.1495, "step": 29025 }, { "epoch": 1.42, "grad_norm": 0.6163223385810852, "learning_rate": 0.0003243223505743656, "loss": 2.983, "step": 29026 }, { "epoch": 1.42, "grad_norm": 0.5711262226104736, "learning_rate": 0.0003243070050355102, "loss": 3.2788, "step": 29027 }, { "epoch": 1.42, "grad_norm": 0.5460343956947327, "learning_rate": 0.00032429165943263473, "loss": 2.9364, "step": 29028 }, { "epoch": 1.42, "grad_norm": 0.5538520216941833, "learning_rate": 0.00032427631376577977, "loss": 2.8617, "step": 29029 }, { "epoch": 1.42, "grad_norm": 0.5613263845443726, "learning_rate": 0.00032426096803498566, "loss": 3.1502, "step": 29030 }, { "epoch": 1.42, "grad_norm": 0.5788880586624146, "learning_rate": 0.00032424562224029284, "loss": 3.0286, "step": 29031 }, { "epoch": 1.42, "grad_norm": 0.6063810586929321, "learning_rate": 0.00032423027638174177, "loss": 2.8803, "step": 29032 }, { "epoch": 1.42, "grad_norm": 0.5913493633270264, "learning_rate": 0.00032421493045937266, "loss": 2.9329, "step": 29033 }, { "epoch": 1.42, "grad_norm": 0.6406772136688232, "learning_rate": 0.0003241995844732262, "loss": 3.2342, "step": 29034 }, { "epoch": 1.42, "grad_norm": 0.5965743064880371, "learning_rate": 0.00032418423842334274, "loss": 2.9745, "step": 29035 }, { "epoch": 1.42, "grad_norm": 0.5714675188064575, "learning_rate": 0.00032416889230976265, "loss": 2.8127, "step": 29036 }, { "epoch": 1.42, "grad_norm": 0.5749967694282532, "learning_rate": 0.0003241535461325263, "loss": 3.0176, "step": 29037 }, { "epoch": 1.42, "grad_norm": 0.5987011194229126, "learning_rate": 0.000324138199891674, "loss": 3.1697, "step": 29038 }, { "epoch": 1.42, "grad_norm": 0.5846742391586304, "learning_rate": 0.00032412285358724655, "loss": 3.1089, "step": 29039 }, { "epoch": 1.42, "grad_norm": 0.6523012518882751, "learning_rate": 0.00032410750721928406, "loss": 2.933, "step": 29040 }, { "epoch": 1.42, "grad_norm": 0.5596588850021362, "learning_rate": 0.00032409216078782706, "loss": 3.1106, "step": 29041 }, { "epoch": 1.42, "grad_norm": 0.5636228322982788, "learning_rate": 0.00032407681429291594, "loss": 3.2038, "step": 29042 }, { "epoch": 1.42, "grad_norm": 0.5773536562919617, "learning_rate": 0.0003240614677345911, "loss": 3.2443, "step": 29043 }, { "epoch": 1.42, "grad_norm": 0.6011802554130554, "learning_rate": 0.0003240461211128929, "loss": 3.0798, "step": 29044 }, { "epoch": 1.42, "grad_norm": 0.5544044971466064, "learning_rate": 0.00032403077442786195, "loss": 3.0196, "step": 29045 }, { "epoch": 1.42, "grad_norm": 0.6246320605278015, "learning_rate": 0.0003240154276795387, "loss": 2.8156, "step": 29046 }, { "epoch": 1.42, "grad_norm": 0.5959727764129639, "learning_rate": 0.0003240000808679633, "loss": 3.1569, "step": 29047 }, { "epoch": 1.42, "grad_norm": 0.579071581363678, "learning_rate": 0.00032398473399317617, "loss": 3.1073, "step": 29048 }, { "epoch": 1.42, "grad_norm": 0.5742684006690979, "learning_rate": 0.000323969387055218, "loss": 3.3636, "step": 29049 }, { "epoch": 1.42, "grad_norm": 0.5994040966033936, "learning_rate": 0.0003239540400541291, "loss": 3.1828, "step": 29050 }, { "epoch": 1.42, "grad_norm": 0.5804879069328308, "learning_rate": 0.0003239386929899499, "loss": 2.8866, "step": 29051 }, { "epoch": 1.42, "grad_norm": 0.5392642021179199, "learning_rate": 0.0003239233458627207, "loss": 3.1973, "step": 29052 }, { "epoch": 1.42, "grad_norm": 0.5686118602752686, "learning_rate": 0.000323907998672482, "loss": 2.977, "step": 29053 }, { "epoch": 1.42, "grad_norm": 0.5497790575027466, "learning_rate": 0.00032389265141927426, "loss": 2.9656, "step": 29054 }, { "epoch": 1.42, "grad_norm": 0.5894394516944885, "learning_rate": 0.00032387730410313794, "loss": 2.9002, "step": 29055 }, { "epoch": 1.42, "grad_norm": 0.5542726516723633, "learning_rate": 0.0003238619567241133, "loss": 3.0072, "step": 29056 }, { "epoch": 1.42, "grad_norm": 0.5575839877128601, "learning_rate": 0.00032384660928224095, "loss": 3.0584, "step": 29057 }, { "epoch": 1.42, "grad_norm": 0.5948037505149841, "learning_rate": 0.0003238312617775611, "loss": 3.1864, "step": 29058 }, { "epoch": 1.42, "grad_norm": 0.5639663934707642, "learning_rate": 0.0003238159142101144, "loss": 3.0449, "step": 29059 }, { "epoch": 1.42, "grad_norm": 0.5659995079040527, "learning_rate": 0.0003238005665799411, "loss": 3.1618, "step": 29060 }, { "epoch": 1.42, "grad_norm": 0.5502591133117676, "learning_rate": 0.0003237852188870818, "loss": 3.0585, "step": 29061 }, { "epoch": 1.42, "grad_norm": 0.6027945280075073, "learning_rate": 0.00032376987113157666, "loss": 3.142, "step": 29062 }, { "epoch": 1.42, "grad_norm": 0.5965318083763123, "learning_rate": 0.00032375452331346634, "loss": 3.0064, "step": 29063 }, { "epoch": 1.42, "grad_norm": 0.5846608877182007, "learning_rate": 0.00032373917543279115, "loss": 3.1892, "step": 29064 }, { "epoch": 1.42, "grad_norm": 0.6004025340080261, "learning_rate": 0.00032372382748959165, "loss": 3.1346, "step": 29065 }, { "epoch": 1.42, "grad_norm": 0.581133246421814, "learning_rate": 0.000323708479483908, "loss": 2.9457, "step": 29066 }, { "epoch": 1.42, "grad_norm": 0.5575944781303406, "learning_rate": 0.0003236931314157808, "loss": 3.0139, "step": 29067 }, { "epoch": 1.42, "grad_norm": 0.589728832244873, "learning_rate": 0.0003236777832852506, "loss": 3.127, "step": 29068 }, { "epoch": 1.42, "grad_norm": 0.5565273761749268, "learning_rate": 0.00032366243509235744, "loss": 3.1569, "step": 29069 }, { "epoch": 1.42, "grad_norm": 0.5574138164520264, "learning_rate": 0.00032364708683714206, "loss": 3.0355, "step": 29070 }, { "epoch": 1.42, "grad_norm": 0.5988021492958069, "learning_rate": 0.00032363173851964486, "loss": 2.9168, "step": 29071 }, { "epoch": 1.42, "grad_norm": 0.5684751272201538, "learning_rate": 0.0003236163901399063, "loss": 3.1833, "step": 29072 }, { "epoch": 1.42, "grad_norm": 0.619386613368988, "learning_rate": 0.0003236010416979666, "loss": 2.9159, "step": 29073 }, { "epoch": 1.42, "grad_norm": 0.5681483149528503, "learning_rate": 0.0003235856931938662, "loss": 2.9759, "step": 29074 }, { "epoch": 1.42, "grad_norm": 0.5597151517868042, "learning_rate": 0.0003235703446276459, "loss": 3.2504, "step": 29075 }, { "epoch": 1.42, "grad_norm": 0.5716343522071838, "learning_rate": 0.0003235549959993456, "loss": 3.0314, "step": 29076 }, { "epoch": 1.42, "grad_norm": 0.6016743779182434, "learning_rate": 0.00032353964730900606, "loss": 3.0251, "step": 29077 }, { "epoch": 1.43, "grad_norm": 0.6275364756584167, "learning_rate": 0.0003235242985566676, "loss": 2.8835, "step": 29078 }, { "epoch": 1.43, "grad_norm": 0.5814191699028015, "learning_rate": 0.00032350894974237065, "loss": 3.0817, "step": 29079 }, { "epoch": 1.43, "grad_norm": 0.5964180827140808, "learning_rate": 0.0003234936008661557, "loss": 2.8892, "step": 29080 }, { "epoch": 1.43, "grad_norm": 0.5740237236022949, "learning_rate": 0.00032347825192806314, "loss": 2.9775, "step": 29081 }, { "epoch": 1.43, "grad_norm": 0.5742438435554504, "learning_rate": 0.00032346290292813325, "loss": 2.7709, "step": 29082 }, { "epoch": 1.43, "grad_norm": 0.5780929327011108, "learning_rate": 0.00032344755386640677, "loss": 3.0928, "step": 29083 }, { "epoch": 1.43, "grad_norm": 0.5740398168563843, "learning_rate": 0.0003234322047429238, "loss": 3.1639, "step": 29084 }, { "epoch": 1.43, "grad_norm": 0.6118278503417969, "learning_rate": 0.00032341685555772496, "loss": 3.0158, "step": 29085 }, { "epoch": 1.43, "grad_norm": 0.5554642677307129, "learning_rate": 0.00032340150631085074, "loss": 3.0543, "step": 29086 }, { "epoch": 1.43, "grad_norm": 0.5830543041229248, "learning_rate": 0.00032338615700234135, "loss": 3.0372, "step": 29087 }, { "epoch": 1.43, "grad_norm": 0.5947121381759644, "learning_rate": 0.00032337080763223736, "loss": 2.7584, "step": 29088 }, { "epoch": 1.43, "grad_norm": 0.5922091007232666, "learning_rate": 0.00032335545820057904, "loss": 3.1258, "step": 29089 }, { "epoch": 1.43, "grad_norm": 0.5475114583969116, "learning_rate": 0.0003233401087074071, "loss": 2.8411, "step": 29090 }, { "epoch": 1.43, "grad_norm": 0.5651057362556458, "learning_rate": 0.0003233247591527617, "loss": 3.0353, "step": 29091 }, { "epoch": 1.43, "grad_norm": 0.5793414115905762, "learning_rate": 0.0003233094095366834, "loss": 2.8918, "step": 29092 }, { "epoch": 1.43, "grad_norm": 0.5808545351028442, "learning_rate": 0.0003232940598592126, "loss": 3.0612, "step": 29093 }, { "epoch": 1.43, "grad_norm": 0.5640668272972107, "learning_rate": 0.00032327871012038977, "loss": 3.0701, "step": 29094 }, { "epoch": 1.43, "grad_norm": 0.5635700225830078, "learning_rate": 0.00032326336032025525, "loss": 3.1003, "step": 29095 }, { "epoch": 1.43, "grad_norm": 0.5963321328163147, "learning_rate": 0.0003232480104588495, "loss": 2.8877, "step": 29096 }, { "epoch": 1.43, "grad_norm": 0.5794896483421326, "learning_rate": 0.0003232326605362131, "loss": 2.9719, "step": 29097 }, { "epoch": 1.43, "grad_norm": 0.5805581212043762, "learning_rate": 0.0003232173105523862, "loss": 3.0281, "step": 29098 }, { "epoch": 1.43, "grad_norm": 0.600281834602356, "learning_rate": 0.00032320196050740935, "loss": 3.3348, "step": 29099 }, { "epoch": 1.43, "grad_norm": 0.5603992938995361, "learning_rate": 0.00032318661040132307, "loss": 3.0012, "step": 29100 }, { "epoch": 1.43, "grad_norm": 0.6025208830833435, "learning_rate": 0.0003231712602341678, "loss": 2.9332, "step": 29101 }, { "epoch": 1.43, "grad_norm": 0.5629807710647583, "learning_rate": 0.00032315591000598375, "loss": 3.1528, "step": 29102 }, { "epoch": 1.43, "grad_norm": 0.5991537570953369, "learning_rate": 0.0003231405597168116, "loss": 3.1373, "step": 29103 }, { "epoch": 1.43, "grad_norm": 0.5493240356445312, "learning_rate": 0.0003231252093666916, "loss": 2.9525, "step": 29104 }, { "epoch": 1.43, "grad_norm": 0.5737966299057007, "learning_rate": 0.0003231098589556642, "loss": 2.9983, "step": 29105 }, { "epoch": 1.43, "grad_norm": 0.6087252497673035, "learning_rate": 0.00032309450848377, "loss": 3.0698, "step": 29106 }, { "epoch": 1.43, "grad_norm": 0.6365086436271667, "learning_rate": 0.00032307915795104925, "loss": 2.9886, "step": 29107 }, { "epoch": 1.43, "grad_norm": 0.5757536292076111, "learning_rate": 0.0003230638073575425, "loss": 2.9974, "step": 29108 }, { "epoch": 1.43, "grad_norm": 0.5831839442253113, "learning_rate": 0.00032304845670329003, "loss": 3.0675, "step": 29109 }, { "epoch": 1.43, "grad_norm": 0.5890743136405945, "learning_rate": 0.0003230331059883323, "loss": 2.9943, "step": 29110 }, { "epoch": 1.43, "grad_norm": 0.6159026622772217, "learning_rate": 0.0003230177552127099, "loss": 3.2641, "step": 29111 }, { "epoch": 1.43, "grad_norm": 0.6064269542694092, "learning_rate": 0.0003230024043764632, "loss": 3.031, "step": 29112 }, { "epoch": 1.43, "grad_norm": 0.6121566891670227, "learning_rate": 0.0003229870534796326, "loss": 3.0392, "step": 29113 }, { "epoch": 1.43, "grad_norm": 0.6181187629699707, "learning_rate": 0.0003229717025222585, "loss": 3.1578, "step": 29114 }, { "epoch": 1.43, "grad_norm": 0.6169598698616028, "learning_rate": 0.00032295635150438127, "loss": 3.1092, "step": 29115 }, { "epoch": 1.43, "grad_norm": 0.5787153840065002, "learning_rate": 0.0003229410004260415, "loss": 3.0131, "step": 29116 }, { "epoch": 1.43, "grad_norm": 0.5475699305534363, "learning_rate": 0.00032292564928727954, "loss": 3.3638, "step": 29117 }, { "epoch": 1.43, "grad_norm": 0.5485643744468689, "learning_rate": 0.0003229102980881358, "loss": 2.7888, "step": 29118 }, { "epoch": 1.43, "grad_norm": 0.5797244310379028, "learning_rate": 0.0003228949468286509, "loss": 3.1413, "step": 29119 }, { "epoch": 1.43, "grad_norm": 0.5789276957511902, "learning_rate": 0.00032287959550886486, "loss": 2.7676, "step": 29120 }, { "epoch": 1.43, "grad_norm": 0.5958558917045593, "learning_rate": 0.0003228642441288185, "loss": 3.1023, "step": 29121 }, { "epoch": 1.43, "grad_norm": 0.5825849175453186, "learning_rate": 0.00032284889268855213, "loss": 3.2999, "step": 29122 }, { "epoch": 1.43, "grad_norm": 0.5534700751304626, "learning_rate": 0.0003228335411881063, "loss": 3.1667, "step": 29123 }, { "epoch": 1.43, "grad_norm": 0.581171989440918, "learning_rate": 0.00032281818962752115, "loss": 2.8703, "step": 29124 }, { "epoch": 1.43, "grad_norm": 0.5967993140220642, "learning_rate": 0.00032280283800683717, "loss": 3.2153, "step": 29125 }, { "epoch": 1.43, "grad_norm": 0.5790849924087524, "learning_rate": 0.00032278748632609517, "loss": 2.9798, "step": 29126 }, { "epoch": 1.43, "grad_norm": 0.5897535681724548, "learning_rate": 0.00032277213458533515, "loss": 3.0861, "step": 29127 }, { "epoch": 1.43, "grad_norm": 0.5886898636817932, "learning_rate": 0.0003227567827845978, "loss": 2.9372, "step": 29128 }, { "epoch": 1.43, "grad_norm": 0.5656288862228394, "learning_rate": 0.00032274143092392337, "loss": 3.1163, "step": 29129 }, { "epoch": 1.43, "grad_norm": 0.5811963081359863, "learning_rate": 0.00032272607900335243, "loss": 3.0505, "step": 29130 }, { "epoch": 1.43, "grad_norm": 0.5721598267555237, "learning_rate": 0.0003227107270229254, "loss": 3.1477, "step": 29131 }, { "epoch": 1.43, "grad_norm": 0.5497804880142212, "learning_rate": 0.0003226953749826826, "loss": 3.1137, "step": 29132 }, { "epoch": 1.43, "grad_norm": 0.6076148748397827, "learning_rate": 0.0003226800228826646, "loss": 3.0502, "step": 29133 }, { "epoch": 1.43, "grad_norm": 0.5574772357940674, "learning_rate": 0.0003226646707229119, "loss": 2.9401, "step": 29134 }, { "epoch": 1.43, "grad_norm": 0.5898085236549377, "learning_rate": 0.00032264931850346464, "loss": 3.1227, "step": 29135 }, { "epoch": 1.43, "grad_norm": 0.5748165845870972, "learning_rate": 0.0003226339662243635, "loss": 3.0951, "step": 29136 }, { "epoch": 1.43, "grad_norm": 0.6368452310562134, "learning_rate": 0.0003226186138856489, "loss": 3.0673, "step": 29137 }, { "epoch": 1.43, "grad_norm": 0.5354744791984558, "learning_rate": 0.0003226032614873612, "loss": 3.025, "step": 29138 }, { "epoch": 1.43, "grad_norm": 0.5715710520744324, "learning_rate": 0.0003225879090295408, "loss": 3.2214, "step": 29139 }, { "epoch": 1.43, "grad_norm": 0.6316717863082886, "learning_rate": 0.00032257255651222826, "loss": 3.273, "step": 29140 }, { "epoch": 1.43, "grad_norm": 0.5766654014587402, "learning_rate": 0.0003225572039354639, "loss": 3.1074, "step": 29141 }, { "epoch": 1.43, "grad_norm": 0.6002153754234314, "learning_rate": 0.0003225418512992883, "loss": 3.0406, "step": 29142 }, { "epoch": 1.43, "grad_norm": 0.6038157343864441, "learning_rate": 0.0003225264986037417, "loss": 3.014, "step": 29143 }, { "epoch": 1.43, "grad_norm": 0.5729610323905945, "learning_rate": 0.00032251114584886466, "loss": 2.8022, "step": 29144 }, { "epoch": 1.43, "grad_norm": 0.5932610034942627, "learning_rate": 0.00032249579303469765, "loss": 3.0532, "step": 29145 }, { "epoch": 1.43, "grad_norm": 0.5910505056381226, "learning_rate": 0.000322480440161281, "loss": 2.8465, "step": 29146 }, { "epoch": 1.43, "grad_norm": 0.5964004397392273, "learning_rate": 0.00032246508722865515, "loss": 3.0927, "step": 29147 }, { "epoch": 1.43, "grad_norm": 0.5830546617507935, "learning_rate": 0.00032244973423686075, "loss": 3.0487, "step": 29148 }, { "epoch": 1.43, "grad_norm": 0.5955746173858643, "learning_rate": 0.0003224343811859379, "loss": 3.1017, "step": 29149 }, { "epoch": 1.43, "grad_norm": 0.6120107173919678, "learning_rate": 0.0003224190280759273, "loss": 3.0038, "step": 29150 }, { "epoch": 1.43, "grad_norm": 0.6444991827011108, "learning_rate": 0.0003224036749068692, "loss": 3.089, "step": 29151 }, { "epoch": 1.43, "grad_norm": 0.5521337389945984, "learning_rate": 0.0003223883216788042, "loss": 3.0886, "step": 29152 }, { "epoch": 1.43, "grad_norm": 0.6167043447494507, "learning_rate": 0.00032237296839177266, "loss": 2.9639, "step": 29153 }, { "epoch": 1.43, "grad_norm": 0.6153653264045715, "learning_rate": 0.000322357615045815, "loss": 3.2767, "step": 29154 }, { "epoch": 1.43, "grad_norm": 0.6091169118881226, "learning_rate": 0.00032234226164097173, "loss": 3.0682, "step": 29155 }, { "epoch": 1.43, "grad_norm": 0.577284574508667, "learning_rate": 0.00032232690817728327, "loss": 3.0631, "step": 29156 }, { "epoch": 1.43, "grad_norm": 0.6353074908256531, "learning_rate": 0.00032231155465478996, "loss": 3.0545, "step": 29157 }, { "epoch": 1.43, "grad_norm": 0.6882631778717041, "learning_rate": 0.00032229620107353236, "loss": 3.0973, "step": 29158 }, { "epoch": 1.43, "grad_norm": 0.6139711141586304, "learning_rate": 0.0003222808474335509, "loss": 2.9405, "step": 29159 }, { "epoch": 1.43, "grad_norm": 0.6546551585197449, "learning_rate": 0.00032226549373488584, "loss": 3.1871, "step": 29160 }, { "epoch": 1.43, "grad_norm": 0.5913887619972229, "learning_rate": 0.00032225013997757776, "loss": 3.0151, "step": 29161 }, { "epoch": 1.43, "grad_norm": 0.5905306935310364, "learning_rate": 0.0003222347861616672, "loss": 2.8942, "step": 29162 }, { "epoch": 1.43, "grad_norm": 0.5535931587219238, "learning_rate": 0.0003222194322871945, "loss": 3.1094, "step": 29163 }, { "epoch": 1.43, "grad_norm": 0.6039665341377258, "learning_rate": 0.00032220407835420006, "loss": 2.9505, "step": 29164 }, { "epoch": 1.43, "grad_norm": 0.5726522207260132, "learning_rate": 0.0003221887243627243, "loss": 2.9469, "step": 29165 }, { "epoch": 1.43, "grad_norm": 0.6165740489959717, "learning_rate": 0.00032217337031280774, "loss": 2.9994, "step": 29166 }, { "epoch": 1.43, "grad_norm": 0.6031109690666199, "learning_rate": 0.0003221580162044908, "loss": 3.1434, "step": 29167 }, { "epoch": 1.43, "grad_norm": 0.6150469779968262, "learning_rate": 0.0003221426620378139, "loss": 2.9249, "step": 29168 }, { "epoch": 1.43, "grad_norm": 0.5885416865348816, "learning_rate": 0.00032212730781281746, "loss": 3.2877, "step": 29169 }, { "epoch": 1.43, "grad_norm": 0.5557782649993896, "learning_rate": 0.0003221119535295421, "loss": 3.0221, "step": 29170 }, { "epoch": 1.43, "grad_norm": 0.6185104846954346, "learning_rate": 0.00032209659918802793, "loss": 3.0152, "step": 29171 }, { "epoch": 1.43, "grad_norm": 0.5505481958389282, "learning_rate": 0.00032208124478831564, "loss": 3.0942, "step": 29172 }, { "epoch": 1.43, "grad_norm": 0.5830556154251099, "learning_rate": 0.00032206589033044556, "loss": 3.1301, "step": 29173 }, { "epoch": 1.43, "grad_norm": 0.5747976303100586, "learning_rate": 0.0003220505358144583, "loss": 3.0054, "step": 29174 }, { "epoch": 1.43, "grad_norm": 0.6036010980606079, "learning_rate": 0.000322035181240394, "loss": 2.9831, "step": 29175 }, { "epoch": 1.43, "grad_norm": 0.6000482439994812, "learning_rate": 0.0003220198266082933, "loss": 2.9121, "step": 29176 }, { "epoch": 1.43, "grad_norm": 0.5834548473358154, "learning_rate": 0.00032200447191819677, "loss": 3.0241, "step": 29177 }, { "epoch": 1.43, "grad_norm": 0.5785706639289856, "learning_rate": 0.00032198911717014453, "loss": 3.1835, "step": 29178 }, { "epoch": 1.43, "grad_norm": 0.5782999992370605, "learning_rate": 0.0003219737623641773, "loss": 3.0318, "step": 29179 }, { "epoch": 1.43, "grad_norm": 0.5533021688461304, "learning_rate": 0.00032195840750033535, "loss": 3.1158, "step": 29180 }, { "epoch": 1.43, "grad_norm": 0.5466267466545105, "learning_rate": 0.00032194305257865914, "loss": 3.1926, "step": 29181 }, { "epoch": 1.43, "grad_norm": 0.624919056892395, "learning_rate": 0.0003219276975991892, "loss": 2.9148, "step": 29182 }, { "epoch": 1.43, "grad_norm": 0.5868450403213501, "learning_rate": 0.00032191234256196587, "loss": 2.9879, "step": 29183 }, { "epoch": 1.43, "grad_norm": 0.5854912400245667, "learning_rate": 0.00032189698746702976, "loss": 2.9871, "step": 29184 }, { "epoch": 1.43, "grad_norm": 0.5936477780342102, "learning_rate": 0.0003218816323144212, "loss": 3.0536, "step": 29185 }, { "epoch": 1.43, "grad_norm": 0.5781201720237732, "learning_rate": 0.00032186627710418056, "loss": 3.1815, "step": 29186 }, { "epoch": 1.43, "grad_norm": 0.5902300477027893, "learning_rate": 0.0003218509218363483, "loss": 3.0949, "step": 29187 }, { "epoch": 1.43, "grad_norm": 0.5788503289222717, "learning_rate": 0.00032183556651096506, "loss": 2.8732, "step": 29188 }, { "epoch": 1.43, "grad_norm": 0.5969659090042114, "learning_rate": 0.0003218202111280711, "loss": 2.9742, "step": 29189 }, { "epoch": 1.43, "grad_norm": 0.603798508644104, "learning_rate": 0.0003218048556877068, "loss": 3.2926, "step": 29190 }, { "epoch": 1.43, "grad_norm": 0.5414276123046875, "learning_rate": 0.00032178950018991277, "loss": 3.0918, "step": 29191 }, { "epoch": 1.43, "grad_norm": 0.5900503993034363, "learning_rate": 0.0003217741446347294, "loss": 3.2083, "step": 29192 }, { "epoch": 1.43, "grad_norm": 0.6011453866958618, "learning_rate": 0.00032175878902219714, "loss": 3.215, "step": 29193 }, { "epoch": 1.43, "grad_norm": 0.6004211902618408, "learning_rate": 0.00032174343335235637, "loss": 3.0323, "step": 29194 }, { "epoch": 1.43, "grad_norm": 0.5935797095298767, "learning_rate": 0.0003217280776252476, "loss": 2.909, "step": 29195 }, { "epoch": 1.43, "grad_norm": 0.6498414278030396, "learning_rate": 0.0003217127218409113, "loss": 3.0649, "step": 29196 }, { "epoch": 1.43, "grad_norm": 0.5944969058036804, "learning_rate": 0.00032169736599938777, "loss": 3.234, "step": 29197 }, { "epoch": 1.43, "grad_norm": 0.6110669374465942, "learning_rate": 0.0003216820101007177, "loss": 2.709, "step": 29198 }, { "epoch": 1.43, "grad_norm": 0.5599201321601868, "learning_rate": 0.0003216666541449413, "loss": 3.0135, "step": 29199 }, { "epoch": 1.43, "grad_norm": 0.5829009413719177, "learning_rate": 0.0003216512981320991, "loss": 2.8439, "step": 29200 }, { "epoch": 1.43, "grad_norm": 0.6302081942558289, "learning_rate": 0.0003216359420622315, "loss": 2.9406, "step": 29201 }, { "epoch": 1.43, "grad_norm": 0.6054470539093018, "learning_rate": 0.000321620585935379, "loss": 3.1103, "step": 29202 }, { "epoch": 1.43, "grad_norm": 0.6377239227294922, "learning_rate": 0.00032160522975158216, "loss": 2.9263, "step": 29203 }, { "epoch": 1.43, "grad_norm": 0.5627120733261108, "learning_rate": 0.0003215898735108812, "loss": 2.8536, "step": 29204 }, { "epoch": 1.43, "grad_norm": 0.5820385217666626, "learning_rate": 0.00032157451721331675, "loss": 2.9532, "step": 29205 }, { "epoch": 1.43, "grad_norm": 0.593466579914093, "learning_rate": 0.0003215591608589291, "loss": 3.0497, "step": 29206 }, { "epoch": 1.43, "grad_norm": 0.5768133401870728, "learning_rate": 0.00032154380444775877, "loss": 3.1535, "step": 29207 }, { "epoch": 1.43, "grad_norm": 0.6087096929550171, "learning_rate": 0.0003215284479798462, "loss": 3.0309, "step": 29208 }, { "epoch": 1.43, "grad_norm": 0.6114547848701477, "learning_rate": 0.0003215130914552319, "loss": 3.0905, "step": 29209 }, { "epoch": 1.43, "grad_norm": 0.6147521734237671, "learning_rate": 0.00032149773487395624, "loss": 2.9946, "step": 29210 }, { "epoch": 1.43, "grad_norm": 0.5623575448989868, "learning_rate": 0.0003214823782360597, "loss": 2.9643, "step": 29211 }, { "epoch": 1.43, "grad_norm": 0.5678356289863586, "learning_rate": 0.00032146702154158257, "loss": 3.0765, "step": 29212 }, { "epoch": 1.43, "grad_norm": 0.5572715401649475, "learning_rate": 0.00032145166479056553, "loss": 2.6901, "step": 29213 }, { "epoch": 1.43, "grad_norm": 0.6046434640884399, "learning_rate": 0.00032143630798304903, "loss": 2.9514, "step": 29214 }, { "epoch": 1.43, "grad_norm": 0.6713277101516724, "learning_rate": 0.0003214209511190733, "loss": 2.9896, "step": 29215 }, { "epoch": 1.43, "grad_norm": 0.6149976253509521, "learning_rate": 0.00032140559419867895, "loss": 3.0438, "step": 29216 }, { "epoch": 1.43, "grad_norm": 0.603386640548706, "learning_rate": 0.0003213902372219064, "loss": 3.2325, "step": 29217 }, { "epoch": 1.43, "grad_norm": 0.5698647499084473, "learning_rate": 0.000321374880188796, "loss": 2.9808, "step": 29218 }, { "epoch": 1.43, "grad_norm": 0.588376522064209, "learning_rate": 0.00032135952309938836, "loss": 3.0994, "step": 29219 }, { "epoch": 1.43, "grad_norm": 0.5656581521034241, "learning_rate": 0.0003213441659537238, "loss": 3.2486, "step": 29220 }, { "epoch": 1.43, "grad_norm": 0.56963711977005, "learning_rate": 0.0003213288087518429, "loss": 3.1403, "step": 29221 }, { "epoch": 1.43, "grad_norm": 0.5951929092407227, "learning_rate": 0.0003213134514937859, "loss": 2.8392, "step": 29222 }, { "epoch": 1.43, "grad_norm": 0.5948726534843445, "learning_rate": 0.00032129809417959346, "loss": 3.0278, "step": 29223 }, { "epoch": 1.43, "grad_norm": 0.632287323474884, "learning_rate": 0.0003212827368093059, "loss": 3.0306, "step": 29224 }, { "epoch": 1.43, "grad_norm": 0.5774150490760803, "learning_rate": 0.00032126737938296376, "loss": 3.0625, "step": 29225 }, { "epoch": 1.43, "grad_norm": 0.687145471572876, "learning_rate": 0.0003212520219006074, "loss": 3.0098, "step": 29226 }, { "epoch": 1.43, "grad_norm": 0.615565299987793, "learning_rate": 0.00032123666436227735, "loss": 3.0459, "step": 29227 }, { "epoch": 1.43, "grad_norm": 0.6112501621246338, "learning_rate": 0.0003212213067680139, "loss": 3.0546, "step": 29228 }, { "epoch": 1.43, "grad_norm": 0.5874766707420349, "learning_rate": 0.0003212059491178577, "loss": 2.7821, "step": 29229 }, { "epoch": 1.43, "grad_norm": 0.64829021692276, "learning_rate": 0.0003211905914118491, "loss": 3.0968, "step": 29230 }, { "epoch": 1.43, "grad_norm": 0.6121979355812073, "learning_rate": 0.0003211752336500285, "loss": 3.0179, "step": 29231 }, { "epoch": 1.43, "grad_norm": 0.6101468801498413, "learning_rate": 0.00032115987583243653, "loss": 2.9941, "step": 29232 }, { "epoch": 1.43, "grad_norm": 0.5940099358558655, "learning_rate": 0.0003211445179591134, "loss": 3.0411, "step": 29233 }, { "epoch": 1.43, "grad_norm": 0.5766475796699524, "learning_rate": 0.00032112916003009965, "loss": 2.939, "step": 29234 }, { "epoch": 1.43, "grad_norm": 0.5749085545539856, "learning_rate": 0.00032111380204543586, "loss": 3.0331, "step": 29235 }, { "epoch": 1.43, "grad_norm": 0.5809697508811951, "learning_rate": 0.0003210984440051624, "loss": 3.1916, "step": 29236 }, { "epoch": 1.43, "grad_norm": 0.5791935920715332, "learning_rate": 0.00032108308590931965, "loss": 2.8306, "step": 29237 }, { "epoch": 1.43, "grad_norm": 0.6096273064613342, "learning_rate": 0.00032106772775794806, "loss": 3.0382, "step": 29238 }, { "epoch": 1.43, "grad_norm": 0.5717841386795044, "learning_rate": 0.00032105236955108825, "loss": 2.9114, "step": 29239 }, { "epoch": 1.43, "grad_norm": 0.589238166809082, "learning_rate": 0.0003210370112887805, "loss": 3.0616, "step": 29240 }, { "epoch": 1.43, "grad_norm": 0.6342265009880066, "learning_rate": 0.0003210216529710653, "loss": 3.1, "step": 29241 }, { "epoch": 1.43, "grad_norm": 0.609419047832489, "learning_rate": 0.00032100629459798303, "loss": 3.0961, "step": 29242 }, { "epoch": 1.43, "grad_norm": 0.5756333470344543, "learning_rate": 0.0003209909361695743, "loss": 2.8789, "step": 29243 }, { "epoch": 1.43, "grad_norm": 0.5944945812225342, "learning_rate": 0.0003209755776858795, "loss": 2.9332, "step": 29244 }, { "epoch": 1.43, "grad_norm": 0.612972617149353, "learning_rate": 0.00032096021914693906, "loss": 3.2249, "step": 29245 }, { "epoch": 1.43, "grad_norm": 0.6202994585037231, "learning_rate": 0.0003209448605527934, "loss": 2.9364, "step": 29246 }, { "epoch": 1.43, "grad_norm": 0.6110982298851013, "learning_rate": 0.0003209295019034831, "loss": 3.1885, "step": 29247 }, { "epoch": 1.43, "grad_norm": 0.5498843193054199, "learning_rate": 0.0003209141431990484, "loss": 3.1529, "step": 29248 }, { "epoch": 1.43, "grad_norm": 0.5546060800552368, "learning_rate": 0.00032089878443952984, "loss": 3.0771, "step": 29249 }, { "epoch": 1.43, "grad_norm": 0.5500269532203674, "learning_rate": 0.0003208834256249681, "loss": 3.0521, "step": 29250 }, { "epoch": 1.43, "grad_norm": 0.5937256217002869, "learning_rate": 0.0003208680667554033, "loss": 3.1337, "step": 29251 }, { "epoch": 1.43, "grad_norm": 0.5819010734558105, "learning_rate": 0.00032085270783087605, "loss": 3.4204, "step": 29252 }, { "epoch": 1.43, "grad_norm": 0.5727535486221313, "learning_rate": 0.00032083734885142673, "loss": 2.879, "step": 29253 }, { "epoch": 1.43, "grad_norm": 0.6039817333221436, "learning_rate": 0.0003208219898170959, "loss": 2.9596, "step": 29254 }, { "epoch": 1.43, "grad_norm": 0.5760222673416138, "learning_rate": 0.000320806630727924, "loss": 3.2594, "step": 29255 }, { "epoch": 1.43, "grad_norm": 0.5537810921669006, "learning_rate": 0.00032079127158395136, "loss": 3.0045, "step": 29256 }, { "epoch": 1.43, "grad_norm": 0.5716868042945862, "learning_rate": 0.00032077591238521857, "loss": 2.921, "step": 29257 }, { "epoch": 1.43, "grad_norm": 0.5600032210350037, "learning_rate": 0.00032076055313176593, "loss": 3.0156, "step": 29258 }, { "epoch": 1.43, "grad_norm": 0.5719262361526489, "learning_rate": 0.00032074519382363406, "loss": 3.1698, "step": 29259 }, { "epoch": 1.43, "grad_norm": 0.5441270470619202, "learning_rate": 0.0003207298344608634, "loss": 3.0605, "step": 29260 }, { "epoch": 1.43, "grad_norm": 0.5567929744720459, "learning_rate": 0.00032071447504349433, "loss": 3.0992, "step": 29261 }, { "epoch": 1.43, "grad_norm": 0.5503496527671814, "learning_rate": 0.0003206991155715672, "loss": 2.9937, "step": 29262 }, { "epoch": 1.43, "grad_norm": 0.576248049736023, "learning_rate": 0.0003206837560451227, "loss": 2.9549, "step": 29263 }, { "epoch": 1.43, "grad_norm": 0.5835015773773193, "learning_rate": 0.000320668396464201, "loss": 3.1543, "step": 29264 }, { "epoch": 1.43, "grad_norm": 0.5682410597801208, "learning_rate": 0.00032065303682884293, "loss": 3.2322, "step": 29265 }, { "epoch": 1.43, "grad_norm": 0.5933088660240173, "learning_rate": 0.0003206376771390887, "loss": 3.2144, "step": 29266 }, { "epoch": 1.43, "grad_norm": 0.5696515440940857, "learning_rate": 0.0003206223173949787, "loss": 2.8856, "step": 29267 }, { "epoch": 1.43, "grad_norm": 0.6060404181480408, "learning_rate": 0.0003206069575965535, "loss": 3.1012, "step": 29268 }, { "epoch": 1.43, "grad_norm": 0.5765473246574402, "learning_rate": 0.0003205915977438536, "loss": 3.0763, "step": 29269 }, { "epoch": 1.43, "grad_norm": 0.5708712339401245, "learning_rate": 0.00032057623783691936, "loss": 3.1127, "step": 29270 }, { "epoch": 1.43, "grad_norm": 0.5930168032646179, "learning_rate": 0.0003205608778757912, "loss": 2.9539, "step": 29271 }, { "epoch": 1.43, "grad_norm": 0.5647690892219543, "learning_rate": 0.0003205455178605099, "loss": 2.9147, "step": 29272 }, { "epoch": 1.43, "grad_norm": 0.6091307401657104, "learning_rate": 0.00032053015779111543, "loss": 3.0897, "step": 29273 }, { "epoch": 1.43, "grad_norm": 0.5425414443016052, "learning_rate": 0.00032051479766764845, "loss": 2.9467, "step": 29274 }, { "epoch": 1.43, "grad_norm": 0.589598536491394, "learning_rate": 0.00032049943749014954, "loss": 3.1705, "step": 29275 }, { "epoch": 1.43, "grad_norm": 0.6278284192085266, "learning_rate": 0.00032048407725865905, "loss": 2.9084, "step": 29276 }, { "epoch": 1.43, "grad_norm": 0.5885589718818665, "learning_rate": 0.0003204687169732174, "loss": 3.0185, "step": 29277 }, { "epoch": 1.43, "grad_norm": 0.589991569519043, "learning_rate": 0.0003204533566338652, "loss": 3.0392, "step": 29278 }, { "epoch": 1.43, "grad_norm": 0.5731289982795715, "learning_rate": 0.0003204379962406427, "loss": 2.9205, "step": 29279 }, { "epoch": 1.43, "grad_norm": 0.5898691415786743, "learning_rate": 0.00032042263579359047, "loss": 3.116, "step": 29280 }, { "epoch": 1.43, "grad_norm": 0.5861981511116028, "learning_rate": 0.0003204072752927489, "loss": 3.2275, "step": 29281 }, { "epoch": 1.44, "grad_norm": 0.5974080562591553, "learning_rate": 0.00032039191473815854, "loss": 2.8365, "step": 29282 }, { "epoch": 1.44, "grad_norm": 0.569999635219574, "learning_rate": 0.0003203765541298598, "loss": 2.9994, "step": 29283 }, { "epoch": 1.44, "grad_norm": 0.5541989207267761, "learning_rate": 0.0003203611934678931, "loss": 2.6546, "step": 29284 }, { "epoch": 1.44, "grad_norm": 0.5642061233520508, "learning_rate": 0.00032034583275229896, "loss": 3.0176, "step": 29285 }, { "epoch": 1.44, "grad_norm": 0.5687254667282104, "learning_rate": 0.00032033047198311786, "loss": 3.0303, "step": 29286 }, { "epoch": 1.44, "grad_norm": 0.5605220198631287, "learning_rate": 0.0003203151111603902, "loss": 3.0205, "step": 29287 }, { "epoch": 1.44, "grad_norm": 0.5649875998497009, "learning_rate": 0.0003202997502841564, "loss": 2.9837, "step": 29288 }, { "epoch": 1.44, "grad_norm": 0.5587196350097656, "learning_rate": 0.00032028438935445693, "loss": 3.0814, "step": 29289 }, { "epoch": 1.44, "grad_norm": 0.5571485161781311, "learning_rate": 0.0003202690283713324, "loss": 3.0345, "step": 29290 }, { "epoch": 1.44, "grad_norm": 0.6157510876655579, "learning_rate": 0.0003202536673348231, "loss": 3.1435, "step": 29291 }, { "epoch": 1.44, "grad_norm": 0.5682488083839417, "learning_rate": 0.0003202383062449695, "loss": 3.1119, "step": 29292 }, { "epoch": 1.44, "grad_norm": 0.5798583626747131, "learning_rate": 0.0003202229451018121, "loss": 2.833, "step": 29293 }, { "epoch": 1.44, "grad_norm": 0.5703584551811218, "learning_rate": 0.00032020758390539136, "loss": 3.1149, "step": 29294 }, { "epoch": 1.44, "grad_norm": 0.5614786148071289, "learning_rate": 0.00032019222265574775, "loss": 3.0893, "step": 29295 }, { "epoch": 1.44, "grad_norm": 0.541699230670929, "learning_rate": 0.0003201768613529217, "loss": 3.0179, "step": 29296 }, { "epoch": 1.44, "grad_norm": 0.615832507610321, "learning_rate": 0.00032016149999695374, "loss": 3.193, "step": 29297 }, { "epoch": 1.44, "grad_norm": 0.5494009852409363, "learning_rate": 0.0003201461385878842, "loss": 3.1504, "step": 29298 }, { "epoch": 1.44, "grad_norm": 0.5631309747695923, "learning_rate": 0.00032013077712575357, "loss": 3.1214, "step": 29299 }, { "epoch": 1.44, "grad_norm": 0.5645419359207153, "learning_rate": 0.00032011541561060246, "loss": 3.1237, "step": 29300 }, { "epoch": 1.44, "grad_norm": 0.577457070350647, "learning_rate": 0.0003201000540424712, "loss": 2.8725, "step": 29301 }, { "epoch": 1.44, "grad_norm": 0.6075976490974426, "learning_rate": 0.0003200846924214002, "loss": 3.0414, "step": 29302 }, { "epoch": 1.44, "grad_norm": 0.5680819749832153, "learning_rate": 0.00032006933074743, "loss": 3.1522, "step": 29303 }, { "epoch": 1.44, "grad_norm": 0.573302149772644, "learning_rate": 0.00032005396902060106, "loss": 3.2003, "step": 29304 }, { "epoch": 1.44, "grad_norm": 0.572464108467102, "learning_rate": 0.0003200386072409538, "loss": 2.9017, "step": 29305 }, { "epoch": 1.44, "grad_norm": 0.562468945980072, "learning_rate": 0.00032002324540852874, "loss": 3.0256, "step": 29306 }, { "epoch": 1.44, "grad_norm": 0.615037202835083, "learning_rate": 0.00032000788352336626, "loss": 2.9762, "step": 29307 }, { "epoch": 1.44, "grad_norm": 0.5618579387664795, "learning_rate": 0.0003199925215855069, "loss": 3.0673, "step": 29308 }, { "epoch": 1.44, "grad_norm": 0.5961542725563049, "learning_rate": 0.0003199771595949912, "loss": 3.2632, "step": 29309 }, { "epoch": 1.44, "grad_norm": 0.6055513620376587, "learning_rate": 0.00031996179755185925, "loss": 2.8112, "step": 29310 }, { "epoch": 1.44, "grad_norm": 0.5619701147079468, "learning_rate": 0.000319946435456152, "loss": 2.9445, "step": 29311 }, { "epoch": 1.44, "grad_norm": 0.5580838322639465, "learning_rate": 0.00031993107330790957, "loss": 2.8617, "step": 29312 }, { "epoch": 1.44, "grad_norm": 0.5237293839454651, "learning_rate": 0.00031991571110717254, "loss": 2.8157, "step": 29313 }, { "epoch": 1.44, "grad_norm": 0.545191764831543, "learning_rate": 0.00031990034885398137, "loss": 3.2033, "step": 29314 }, { "epoch": 1.44, "grad_norm": 0.5724726319313049, "learning_rate": 0.00031988498654837645, "loss": 3.0922, "step": 29315 }, { "epoch": 1.44, "grad_norm": 0.6132506728172302, "learning_rate": 0.0003198696241903984, "loss": 2.8161, "step": 29316 }, { "epoch": 1.44, "grad_norm": 0.5981416702270508, "learning_rate": 0.0003198542617800876, "loss": 3.0212, "step": 29317 }, { "epoch": 1.44, "grad_norm": 0.6016727685928345, "learning_rate": 0.0003198388993174844, "loss": 3.171, "step": 29318 }, { "epoch": 1.44, "grad_norm": 0.5826240181922913, "learning_rate": 0.00031982353680262937, "loss": 2.9993, "step": 29319 }, { "epoch": 1.44, "grad_norm": 0.5482321381568909, "learning_rate": 0.000319808174235563, "loss": 3.193, "step": 29320 }, { "epoch": 1.44, "grad_norm": 0.5854382514953613, "learning_rate": 0.0003197928116163257, "loss": 3.4126, "step": 29321 }, { "epoch": 1.44, "grad_norm": 0.5981870293617249, "learning_rate": 0.0003197774489449579, "loss": 3.1969, "step": 29322 }, { "epoch": 1.44, "grad_norm": 0.6282458305358887, "learning_rate": 0.00031976208622150023, "loss": 2.8896, "step": 29323 }, { "epoch": 1.44, "grad_norm": 0.5802420377731323, "learning_rate": 0.0003197467234459929, "loss": 3.0195, "step": 29324 }, { "epoch": 1.44, "grad_norm": 0.6020224690437317, "learning_rate": 0.0003197313606184765, "loss": 3.0872, "step": 29325 }, { "epoch": 1.44, "grad_norm": 0.542820394039154, "learning_rate": 0.0003197159977389915, "loss": 3.0749, "step": 29326 }, { "epoch": 1.44, "grad_norm": 0.5725154280662537, "learning_rate": 0.00031970063480757843, "loss": 3.0649, "step": 29327 }, { "epoch": 1.44, "grad_norm": 0.602067232131958, "learning_rate": 0.00031968527182427766, "loss": 2.877, "step": 29328 }, { "epoch": 1.44, "grad_norm": 0.6186937689781189, "learning_rate": 0.0003196699087891297, "loss": 3.1819, "step": 29329 }, { "epoch": 1.44, "grad_norm": 0.6243546009063721, "learning_rate": 0.00031965454570217485, "loss": 3.1825, "step": 29330 }, { "epoch": 1.44, "grad_norm": 0.586065948009491, "learning_rate": 0.0003196391825634538, "loss": 2.9946, "step": 29331 }, { "epoch": 1.44, "grad_norm": 0.6126989722251892, "learning_rate": 0.00031962381937300694, "loss": 3.0819, "step": 29332 }, { "epoch": 1.44, "grad_norm": 0.5854355692863464, "learning_rate": 0.00031960845613087464, "loss": 3.2089, "step": 29333 }, { "epoch": 1.44, "grad_norm": 0.5818975567817688, "learning_rate": 0.0003195930928370976, "loss": 3.1475, "step": 29334 }, { "epoch": 1.44, "grad_norm": 0.5570355653762817, "learning_rate": 0.000319577729491716, "loss": 3.0968, "step": 29335 }, { "epoch": 1.44, "grad_norm": 0.5599480867385864, "learning_rate": 0.0003195623660947704, "loss": 2.9759, "step": 29336 }, { "epoch": 1.44, "grad_norm": 0.5848993062973022, "learning_rate": 0.00031954700264630136, "loss": 3.2071, "step": 29337 }, { "epoch": 1.44, "grad_norm": 0.5863507390022278, "learning_rate": 0.00031953163914634936, "loss": 3.3054, "step": 29338 }, { "epoch": 1.44, "grad_norm": 0.5722285509109497, "learning_rate": 0.0003195162755949547, "loss": 3.1209, "step": 29339 }, { "epoch": 1.44, "grad_norm": 0.5752810835838318, "learning_rate": 0.00031950091199215775, "loss": 3.1011, "step": 29340 }, { "epoch": 1.44, "grad_norm": 0.6314206123352051, "learning_rate": 0.0003194855483379994, "loss": 3.1192, "step": 29341 }, { "epoch": 1.44, "grad_norm": 0.5731692910194397, "learning_rate": 0.00031947018463251975, "loss": 2.9817, "step": 29342 }, { "epoch": 1.44, "grad_norm": 0.5550987124443054, "learning_rate": 0.0003194548208757594, "loss": 3.079, "step": 29343 }, { "epoch": 1.44, "grad_norm": 0.5599222183227539, "learning_rate": 0.0003194394570677588, "loss": 3.0314, "step": 29344 }, { "epoch": 1.44, "grad_norm": 0.5602944493293762, "learning_rate": 0.00031942409320855845, "loss": 3.015, "step": 29345 }, { "epoch": 1.44, "grad_norm": 0.5581718683242798, "learning_rate": 0.00031940872929819874, "loss": 3.0226, "step": 29346 }, { "epoch": 1.44, "grad_norm": 0.6010013818740845, "learning_rate": 0.00031939336533672014, "loss": 3.1017, "step": 29347 }, { "epoch": 1.44, "grad_norm": 0.5561316013336182, "learning_rate": 0.00031937800132416315, "loss": 3.0053, "step": 29348 }, { "epoch": 1.44, "grad_norm": 0.620624840259552, "learning_rate": 0.0003193626372605683, "loss": 3.0979, "step": 29349 }, { "epoch": 1.44, "grad_norm": 0.5798450708389282, "learning_rate": 0.000319347273145976, "loss": 3.1279, "step": 29350 }, { "epoch": 1.44, "grad_norm": 0.6187215447425842, "learning_rate": 0.0003193319089804266, "loss": 3.1367, "step": 29351 }, { "epoch": 1.44, "grad_norm": 0.6134929656982422, "learning_rate": 0.00031931654476396077, "loss": 3.0664, "step": 29352 }, { "epoch": 1.44, "grad_norm": 0.5657615661621094, "learning_rate": 0.0003193011804966188, "loss": 3.0826, "step": 29353 }, { "epoch": 1.44, "grad_norm": 0.5925348401069641, "learning_rate": 0.00031928581617844134, "loss": 2.892, "step": 29354 }, { "epoch": 1.44, "grad_norm": 0.5548837780952454, "learning_rate": 0.0003192704518094686, "loss": 2.9854, "step": 29355 }, { "epoch": 1.44, "grad_norm": 0.5412815809249878, "learning_rate": 0.00031925508738974134, "loss": 2.9617, "step": 29356 }, { "epoch": 1.44, "grad_norm": 0.5807236433029175, "learning_rate": 0.0003192397229192998, "loss": 3.1411, "step": 29357 }, { "epoch": 1.44, "grad_norm": 0.5956936478614807, "learning_rate": 0.00031922435839818456, "loss": 3.1444, "step": 29358 }, { "epoch": 1.44, "grad_norm": 0.5684614181518555, "learning_rate": 0.00031920899382643605, "loss": 2.9935, "step": 29359 }, { "epoch": 1.44, "grad_norm": 0.6149534583091736, "learning_rate": 0.0003191936292040948, "loss": 2.8515, "step": 29360 }, { "epoch": 1.44, "grad_norm": 0.5479702353477478, "learning_rate": 0.00031917826453120113, "loss": 3.1102, "step": 29361 }, { "epoch": 1.44, "grad_norm": 0.5614345073699951, "learning_rate": 0.00031916289980779566, "loss": 3.0526, "step": 29362 }, { "epoch": 1.44, "grad_norm": 0.6308231353759766, "learning_rate": 0.0003191475350339189, "loss": 3.0099, "step": 29363 }, { "epoch": 1.44, "grad_norm": 0.5764047503471375, "learning_rate": 0.0003191321702096111, "loss": 3.1026, "step": 29364 }, { "epoch": 1.44, "grad_norm": 0.5685030221939087, "learning_rate": 0.00031911680533491284, "loss": 2.9923, "step": 29365 }, { "epoch": 1.44, "grad_norm": 0.5890762805938721, "learning_rate": 0.0003191014404098645, "loss": 2.9788, "step": 29366 }, { "epoch": 1.44, "grad_norm": 0.5421032309532166, "learning_rate": 0.00031908607543450684, "loss": 3.1369, "step": 29367 }, { "epoch": 1.44, "grad_norm": 0.6088888049125671, "learning_rate": 0.00031907071040888007, "loss": 3.1285, "step": 29368 }, { "epoch": 1.44, "grad_norm": 0.585292637348175, "learning_rate": 0.00031905534533302465, "loss": 3.0663, "step": 29369 }, { "epoch": 1.44, "grad_norm": 0.5948856472969055, "learning_rate": 0.0003190399802069812, "loss": 3.0343, "step": 29370 }, { "epoch": 1.44, "grad_norm": 0.6100438833236694, "learning_rate": 0.0003190246150307901, "loss": 2.9583, "step": 29371 }, { "epoch": 1.44, "grad_norm": 0.5696083307266235, "learning_rate": 0.00031900924980449174, "loss": 3.1466, "step": 29372 }, { "epoch": 1.44, "grad_norm": 0.5895338654518127, "learning_rate": 0.0003189938845281267, "loss": 3.001, "step": 29373 }, { "epoch": 1.44, "grad_norm": 0.5648195147514343, "learning_rate": 0.0003189785192017355, "loss": 3.0352, "step": 29374 }, { "epoch": 1.44, "grad_norm": 0.5830442905426025, "learning_rate": 0.0003189631538253585, "loss": 3.0942, "step": 29375 }, { "epoch": 1.44, "grad_norm": 0.5957970023155212, "learning_rate": 0.00031894778839903614, "loss": 2.8905, "step": 29376 }, { "epoch": 1.44, "grad_norm": 0.6936584115028381, "learning_rate": 0.000318932422922809, "loss": 3.0502, "step": 29377 }, { "epoch": 1.44, "grad_norm": 0.6041005849838257, "learning_rate": 0.0003189170573967176, "loss": 3.1924, "step": 29378 }, { "epoch": 1.44, "grad_norm": 0.5614645481109619, "learning_rate": 0.00031890169182080216, "loss": 3.0978, "step": 29379 }, { "epoch": 1.44, "grad_norm": 0.7858589291572571, "learning_rate": 0.0003188863261951033, "loss": 3.061, "step": 29380 }, { "epoch": 1.44, "grad_norm": 0.6027712225914001, "learning_rate": 0.00031887096051966154, "loss": 3.1522, "step": 29381 }, { "epoch": 1.44, "grad_norm": 0.6058451533317566, "learning_rate": 0.00031885559479451735, "loss": 3.0498, "step": 29382 }, { "epoch": 1.44, "grad_norm": 0.6025016903877258, "learning_rate": 0.0003188402290197111, "loss": 3.1248, "step": 29383 }, { "epoch": 1.44, "grad_norm": 0.6525845527648926, "learning_rate": 0.0003188248631952833, "loss": 2.9143, "step": 29384 }, { "epoch": 1.44, "grad_norm": 0.5875680446624756, "learning_rate": 0.0003188094973212745, "loss": 2.9911, "step": 29385 }, { "epoch": 1.44, "grad_norm": 0.5825726985931396, "learning_rate": 0.00031879413139772507, "loss": 3.0715, "step": 29386 }, { "epoch": 1.44, "grad_norm": 0.5770814418792725, "learning_rate": 0.00031877876542467544, "loss": 2.9507, "step": 29387 }, { "epoch": 1.44, "grad_norm": 0.5879018306732178, "learning_rate": 0.00031876339940216624, "loss": 2.9064, "step": 29388 }, { "epoch": 1.44, "grad_norm": 0.5837126970291138, "learning_rate": 0.0003187480333302379, "loss": 2.9365, "step": 29389 }, { "epoch": 1.44, "grad_norm": 0.5690444111824036, "learning_rate": 0.00031873266720893075, "loss": 3.1901, "step": 29390 }, { "epoch": 1.44, "grad_norm": 0.5976502299308777, "learning_rate": 0.00031871730103828544, "loss": 3.0219, "step": 29391 }, { "epoch": 1.44, "grad_norm": 0.6792353987693787, "learning_rate": 0.0003187019348183423, "loss": 2.9063, "step": 29392 }, { "epoch": 1.44, "grad_norm": 0.5715599656105042, "learning_rate": 0.00031868656854914184, "loss": 3.1014, "step": 29393 }, { "epoch": 1.44, "grad_norm": 0.5927602648735046, "learning_rate": 0.0003186712022307246, "loss": 3.0256, "step": 29394 }, { "epoch": 1.44, "grad_norm": 0.583453893661499, "learning_rate": 0.00031865583586313103, "loss": 3.0197, "step": 29395 }, { "epoch": 1.44, "grad_norm": 0.5917752385139465, "learning_rate": 0.0003186404694464016, "loss": 3.064, "step": 29396 }, { "epoch": 1.44, "grad_norm": 0.5617307424545288, "learning_rate": 0.0003186251029805767, "loss": 3.1082, "step": 29397 }, { "epoch": 1.44, "grad_norm": 0.5545293688774109, "learning_rate": 0.00031860973646569683, "loss": 2.9116, "step": 29398 }, { "epoch": 1.44, "grad_norm": 0.5984163880348206, "learning_rate": 0.00031859436990180255, "loss": 3.0933, "step": 29399 }, { "epoch": 1.44, "grad_norm": 0.6275175213813782, "learning_rate": 0.0003185790032889343, "loss": 3.1102, "step": 29400 }, { "epoch": 1.44, "grad_norm": 0.5929582715034485, "learning_rate": 0.0003185636366271325, "loss": 3.0094, "step": 29401 }, { "epoch": 1.44, "grad_norm": 0.5918399095535278, "learning_rate": 0.0003185482699164376, "loss": 2.9954, "step": 29402 }, { "epoch": 1.44, "grad_norm": 0.6211867928504944, "learning_rate": 0.0003185329031568902, "loss": 3.0263, "step": 29403 }, { "epoch": 1.44, "grad_norm": 0.615907609462738, "learning_rate": 0.0003185175363485307, "loss": 3.3152, "step": 29404 }, { "epoch": 1.44, "grad_norm": 0.594133734703064, "learning_rate": 0.0003185021694913995, "loss": 3.0418, "step": 29405 }, { "epoch": 1.44, "grad_norm": 0.5710883140563965, "learning_rate": 0.0003184868025855373, "loss": 3.1399, "step": 29406 }, { "epoch": 1.44, "grad_norm": 0.544840395450592, "learning_rate": 0.00031847143563098427, "loss": 2.8117, "step": 29407 }, { "epoch": 1.44, "grad_norm": 0.6463403701782227, "learning_rate": 0.0003184560686277811, "loss": 3.0027, "step": 29408 }, { "epoch": 1.44, "grad_norm": 0.6022986173629761, "learning_rate": 0.00031844070157596813, "loss": 2.9204, "step": 29409 }, { "epoch": 1.44, "grad_norm": 0.568325400352478, "learning_rate": 0.00031842533447558596, "loss": 3.0302, "step": 29410 }, { "epoch": 1.44, "grad_norm": 0.573235809803009, "learning_rate": 0.00031840996732667505, "loss": 3.1247, "step": 29411 }, { "epoch": 1.44, "grad_norm": 0.5684030055999756, "learning_rate": 0.0003183946001292757, "loss": 3.1886, "step": 29412 }, { "epoch": 1.44, "grad_norm": 0.5798808336257935, "learning_rate": 0.0003183792328834286, "loss": 3.2167, "step": 29413 }, { "epoch": 1.44, "grad_norm": 0.5618143677711487, "learning_rate": 0.0003183638655891742, "loss": 3.0885, "step": 29414 }, { "epoch": 1.44, "grad_norm": 0.5687463879585266, "learning_rate": 0.00031834849824655283, "loss": 2.8543, "step": 29415 }, { "epoch": 1.44, "grad_norm": 0.5497147440910339, "learning_rate": 0.00031833313085560507, "loss": 3.104, "step": 29416 }, { "epoch": 1.44, "grad_norm": 0.5957204103469849, "learning_rate": 0.0003183177634163713, "loss": 3.0252, "step": 29417 }, { "epoch": 1.44, "grad_norm": 0.6223195195198059, "learning_rate": 0.0003183023959288922, "loss": 2.9171, "step": 29418 }, { "epoch": 1.44, "grad_norm": 0.5928622484207153, "learning_rate": 0.000318287028393208, "loss": 3.1098, "step": 29419 }, { "epoch": 1.44, "grad_norm": 0.567246675491333, "learning_rate": 0.0003182716608093593, "loss": 3.0079, "step": 29420 }, { "epoch": 1.44, "grad_norm": 0.6125072836875916, "learning_rate": 0.00031825629317738655, "loss": 2.9814, "step": 29421 }, { "epoch": 1.44, "grad_norm": 0.5972563624382019, "learning_rate": 0.0003182409254973303, "loss": 3.1398, "step": 29422 }, { "epoch": 1.44, "grad_norm": 0.5955824255943298, "learning_rate": 0.0003182255577692309, "loss": 2.8948, "step": 29423 }, { "epoch": 1.44, "grad_norm": 0.5571625828742981, "learning_rate": 0.00031821018999312895, "loss": 3.1142, "step": 29424 }, { "epoch": 1.44, "grad_norm": 0.6010100841522217, "learning_rate": 0.00031819482216906487, "loss": 3.1236, "step": 29425 }, { "epoch": 1.44, "grad_norm": 0.5668064951896667, "learning_rate": 0.0003181794542970791, "loss": 2.9231, "step": 29426 }, { "epoch": 1.44, "grad_norm": 0.5944365859031677, "learning_rate": 0.0003181640863772121, "loss": 3.2458, "step": 29427 }, { "epoch": 1.44, "grad_norm": 0.5827503800392151, "learning_rate": 0.0003181487184095044, "loss": 3.2371, "step": 29428 }, { "epoch": 1.44, "grad_norm": 0.5670848488807678, "learning_rate": 0.00031813335039399654, "loss": 3.0366, "step": 29429 }, { "epoch": 1.44, "grad_norm": 0.5577699542045593, "learning_rate": 0.0003181179823307289, "loss": 3.1032, "step": 29430 }, { "epoch": 1.44, "grad_norm": 0.5776100754737854, "learning_rate": 0.000318102614219742, "loss": 3.0566, "step": 29431 }, { "epoch": 1.44, "grad_norm": 0.6043997406959534, "learning_rate": 0.0003180872460610762, "loss": 2.892, "step": 29432 }, { "epoch": 1.44, "grad_norm": 0.584821343421936, "learning_rate": 0.0003180718778547722, "loss": 3.0156, "step": 29433 }, { "epoch": 1.44, "grad_norm": 0.591511070728302, "learning_rate": 0.0003180565096008703, "loss": 3.0376, "step": 29434 }, { "epoch": 1.44, "grad_norm": 0.5555319786071777, "learning_rate": 0.00031804114129941095, "loss": 3.0716, "step": 29435 }, { "epoch": 1.44, "grad_norm": 0.6005762815475464, "learning_rate": 0.0003180257729504349, "loss": 3.0269, "step": 29436 }, { "epoch": 1.44, "grad_norm": 0.5544703602790833, "learning_rate": 0.0003180104045539823, "loss": 3.006, "step": 29437 }, { "epoch": 1.44, "grad_norm": 0.5543263554573059, "learning_rate": 0.00031799503611009366, "loss": 2.9448, "step": 29438 }, { "epoch": 1.44, "grad_norm": 0.5978908538818359, "learning_rate": 0.0003179796676188097, "loss": 3.1896, "step": 29439 }, { "epoch": 1.44, "grad_norm": 0.5939019322395325, "learning_rate": 0.0003179642990801708, "loss": 3.0236, "step": 29440 }, { "epoch": 1.44, "grad_norm": 0.5759717226028442, "learning_rate": 0.00031794893049421724, "loss": 3.0154, "step": 29441 }, { "epoch": 1.44, "grad_norm": 0.5859683156013489, "learning_rate": 0.00031793356186098976, "loss": 3.1299, "step": 29442 }, { "epoch": 1.44, "grad_norm": 0.6028755307197571, "learning_rate": 0.0003179181931805287, "loss": 3.0498, "step": 29443 }, { "epoch": 1.44, "grad_norm": 0.5671459436416626, "learning_rate": 0.0003179028244528746, "loss": 3.1373, "step": 29444 }, { "epoch": 1.44, "grad_norm": 0.5760989785194397, "learning_rate": 0.00031788745567806786, "loss": 3.0959, "step": 29445 }, { "epoch": 1.44, "grad_norm": 0.5798001885414124, "learning_rate": 0.00031787208685614903, "loss": 3.223, "step": 29446 }, { "epoch": 1.44, "grad_norm": 0.5691527724266052, "learning_rate": 0.0003178567179871586, "loss": 2.9604, "step": 29447 }, { "epoch": 1.44, "grad_norm": 0.5820776224136353, "learning_rate": 0.00031784134907113687, "loss": 2.9364, "step": 29448 }, { "epoch": 1.44, "grad_norm": 0.576815664768219, "learning_rate": 0.0003178259801081246, "loss": 2.951, "step": 29449 }, { "epoch": 1.44, "grad_norm": 0.5452558994293213, "learning_rate": 0.000317810611098162, "loss": 3.0218, "step": 29450 }, { "epoch": 1.44, "grad_norm": 0.6160311102867126, "learning_rate": 0.00031779524204128985, "loss": 3.0444, "step": 29451 }, { "epoch": 1.44, "grad_norm": 0.6155081391334534, "learning_rate": 0.0003177798729375484, "loss": 3.1733, "step": 29452 }, { "epoch": 1.44, "grad_norm": 0.5370228886604309, "learning_rate": 0.00031776450378697807, "loss": 2.9489, "step": 29453 }, { "epoch": 1.44, "grad_norm": 0.5774576663970947, "learning_rate": 0.0003177491345896196, "loss": 3.1492, "step": 29454 }, { "epoch": 1.44, "grad_norm": 0.580348789691925, "learning_rate": 0.0003177337653455132, "loss": 3.0479, "step": 29455 }, { "epoch": 1.44, "grad_norm": 0.554613471031189, "learning_rate": 0.0003177183960546996, "loss": 2.9147, "step": 29456 }, { "epoch": 1.44, "grad_norm": 0.5800740122795105, "learning_rate": 0.00031770302671721906, "loss": 2.9172, "step": 29457 }, { "epoch": 1.44, "grad_norm": 0.5789035558700562, "learning_rate": 0.0003176876573331123, "loss": 2.9797, "step": 29458 }, { "epoch": 1.44, "grad_norm": 0.6241493821144104, "learning_rate": 0.0003176722879024194, "loss": 2.9661, "step": 29459 }, { "epoch": 1.44, "grad_norm": 0.6004448533058167, "learning_rate": 0.00031765691842518127, "loss": 3.0091, "step": 29460 }, { "epoch": 1.44, "grad_norm": 0.6146655678749084, "learning_rate": 0.00031764154890143815, "loss": 2.7731, "step": 29461 }, { "epoch": 1.44, "grad_norm": 0.5921909213066101, "learning_rate": 0.0003176261793312307, "loss": 3.0267, "step": 29462 }, { "epoch": 1.44, "grad_norm": 0.6653693914413452, "learning_rate": 0.0003176108097145992, "loss": 2.9743, "step": 29463 }, { "epoch": 1.44, "grad_norm": 0.575783371925354, "learning_rate": 0.00031759544005158415, "loss": 3.0894, "step": 29464 }, { "epoch": 1.44, "grad_norm": 0.5989192128181458, "learning_rate": 0.0003175800703422262, "loss": 2.8907, "step": 29465 }, { "epoch": 1.44, "grad_norm": 0.5665627121925354, "learning_rate": 0.0003175647005865657, "loss": 3.0642, "step": 29466 }, { "epoch": 1.44, "grad_norm": 0.595417320728302, "learning_rate": 0.0003175493307846432, "loss": 3.1442, "step": 29467 }, { "epoch": 1.44, "grad_norm": 0.5861067771911621, "learning_rate": 0.00031753396093649904, "loss": 2.803, "step": 29468 }, { "epoch": 1.44, "grad_norm": 0.5747168064117432, "learning_rate": 0.0003175185910421739, "loss": 2.9859, "step": 29469 }, { "epoch": 1.44, "grad_norm": 0.5734466314315796, "learning_rate": 0.0003175032211017081, "loss": 3.217, "step": 29470 }, { "epoch": 1.44, "grad_norm": 0.5525097846984863, "learning_rate": 0.0003174878511151422, "loss": 3.0217, "step": 29471 }, { "epoch": 1.44, "grad_norm": 0.5869901776313782, "learning_rate": 0.0003174724810825166, "loss": 2.9827, "step": 29472 }, { "epoch": 1.44, "grad_norm": 0.5891143679618835, "learning_rate": 0.00031745711100387205, "loss": 3.0641, "step": 29473 }, { "epoch": 1.44, "grad_norm": 0.5963634252548218, "learning_rate": 0.0003174417408792486, "loss": 2.9668, "step": 29474 }, { "epoch": 1.44, "grad_norm": 0.5769605040550232, "learning_rate": 0.00031742637070868705, "loss": 3.2163, "step": 29475 }, { "epoch": 1.44, "grad_norm": 0.5539005398750305, "learning_rate": 0.00031741100049222787, "loss": 3.0295, "step": 29476 }, { "epoch": 1.44, "grad_norm": 0.5648486018180847, "learning_rate": 0.00031739563022991144, "loss": 3.2033, "step": 29477 }, { "epoch": 1.44, "grad_norm": 0.6375812292098999, "learning_rate": 0.00031738025992177814, "loss": 3.0786, "step": 29478 }, { "epoch": 1.44, "grad_norm": 0.5986570119857788, "learning_rate": 0.00031736488956786863, "loss": 3.2335, "step": 29479 }, { "epoch": 1.44, "grad_norm": 0.5965058207511902, "learning_rate": 0.00031734951916822347, "loss": 3.0896, "step": 29480 }, { "epoch": 1.44, "grad_norm": 0.5741937756538391, "learning_rate": 0.0003173341487228829, "loss": 3.1851, "step": 29481 }, { "epoch": 1.44, "grad_norm": 0.5639917254447937, "learning_rate": 0.00031731877823188754, "loss": 3.0339, "step": 29482 }, { "epoch": 1.44, "grad_norm": 0.5802062153816223, "learning_rate": 0.00031730340769527783, "loss": 3.1775, "step": 29483 }, { "epoch": 1.44, "grad_norm": 0.5636395812034607, "learning_rate": 0.0003172880371130943, "loss": 3.0593, "step": 29484 }, { "epoch": 1.44, "grad_norm": 0.57658851146698, "learning_rate": 0.00031727266648537737, "loss": 3.1828, "step": 29485 }, { "epoch": 1.45, "grad_norm": 0.5845613479614258, "learning_rate": 0.0003172572958121676, "loss": 2.8095, "step": 29486 }, { "epoch": 1.45, "grad_norm": 0.5840170979499817, "learning_rate": 0.0003172419250935055, "loss": 2.8715, "step": 29487 }, { "epoch": 1.45, "grad_norm": 0.5798373222351074, "learning_rate": 0.0003172265543294314, "loss": 2.9696, "step": 29488 }, { "epoch": 1.45, "grad_norm": 0.5842916965484619, "learning_rate": 0.0003172111835199858, "loss": 3.2531, "step": 29489 }, { "epoch": 1.45, "grad_norm": 0.57811439037323, "learning_rate": 0.00031719581266520937, "loss": 3.1352, "step": 29490 }, { "epoch": 1.45, "grad_norm": 0.6172042489051819, "learning_rate": 0.0003171804417651425, "loss": 3.3323, "step": 29491 }, { "epoch": 1.45, "grad_norm": 0.6265311241149902, "learning_rate": 0.0003171650708198256, "loss": 2.9154, "step": 29492 }, { "epoch": 1.45, "grad_norm": 0.5730522274971008, "learning_rate": 0.0003171496998292992, "loss": 3.0178, "step": 29493 }, { "epoch": 1.45, "grad_norm": 0.5934553146362305, "learning_rate": 0.0003171343287936038, "loss": 3.076, "step": 29494 }, { "epoch": 1.45, "grad_norm": 0.6215176582336426, "learning_rate": 0.00031711895771277987, "loss": 3.1317, "step": 29495 }, { "epoch": 1.45, "grad_norm": 0.5459911227226257, "learning_rate": 0.0003171035865868679, "loss": 3.2385, "step": 29496 }, { "epoch": 1.45, "grad_norm": 0.5924990177154541, "learning_rate": 0.0003170882154159084, "loss": 2.8411, "step": 29497 }, { "epoch": 1.45, "grad_norm": 0.5878446102142334, "learning_rate": 0.00031707284419994183, "loss": 2.9754, "step": 29498 }, { "epoch": 1.45, "grad_norm": 0.6080270409584045, "learning_rate": 0.0003170574729390087, "loss": 3.1571, "step": 29499 }, { "epoch": 1.45, "grad_norm": 0.5498298406600952, "learning_rate": 0.00031704210163314934, "loss": 3.1642, "step": 29500 }, { "epoch": 1.45, "grad_norm": 0.5822784900665283, "learning_rate": 0.0003170267302824044, "loss": 3.0001, "step": 29501 }, { "epoch": 1.45, "grad_norm": 0.5680847764015198, "learning_rate": 0.0003170113588868145, "loss": 2.9522, "step": 29502 }, { "epoch": 1.45, "grad_norm": 0.589328944683075, "learning_rate": 0.0003169959874464198, "loss": 3.1955, "step": 29503 }, { "epoch": 1.45, "grad_norm": 0.6632125973701477, "learning_rate": 0.00031698061596126096, "loss": 3.0567, "step": 29504 }, { "epoch": 1.45, "grad_norm": 0.5721989274024963, "learning_rate": 0.00031696524443137846, "loss": 2.9241, "step": 29505 }, { "epoch": 1.45, "grad_norm": 0.6211953163146973, "learning_rate": 0.0003169498728568128, "loss": 3.0061, "step": 29506 }, { "epoch": 1.45, "grad_norm": 0.5938788056373596, "learning_rate": 0.00031693450123760437, "loss": 3.0157, "step": 29507 }, { "epoch": 1.45, "grad_norm": 0.5666404366493225, "learning_rate": 0.0003169191295737938, "loss": 3.0017, "step": 29508 }, { "epoch": 1.45, "grad_norm": 0.5512176752090454, "learning_rate": 0.00031690375786542146, "loss": 3.1269, "step": 29509 }, { "epoch": 1.45, "grad_norm": 0.5751276612281799, "learning_rate": 0.00031688838611252783, "loss": 2.9614, "step": 29510 }, { "epoch": 1.45, "grad_norm": 0.6172812581062317, "learning_rate": 0.00031687301431515346, "loss": 2.8939, "step": 29511 }, { "epoch": 1.45, "grad_norm": 0.5911383628845215, "learning_rate": 0.0003168576424733389, "loss": 3.2453, "step": 29512 }, { "epoch": 1.45, "grad_norm": 0.5967884659767151, "learning_rate": 0.0003168422705871245, "loss": 3.016, "step": 29513 }, { "epoch": 1.45, "grad_norm": 0.5799013376235962, "learning_rate": 0.0003168268986565508, "loss": 3.1217, "step": 29514 }, { "epoch": 1.45, "grad_norm": 0.6159305572509766, "learning_rate": 0.0003168115266816582, "loss": 2.9896, "step": 29515 }, { "epoch": 1.45, "grad_norm": 0.5831592679023743, "learning_rate": 0.00031679615466248746, "loss": 3.1185, "step": 29516 }, { "epoch": 1.45, "grad_norm": 0.5765199065208435, "learning_rate": 0.0003167807825990787, "loss": 3.116, "step": 29517 }, { "epoch": 1.45, "grad_norm": 0.6035337448120117, "learning_rate": 0.00031676541049147273, "loss": 3.001, "step": 29518 }, { "epoch": 1.45, "grad_norm": 0.6357006430625916, "learning_rate": 0.0003167500383397098, "loss": 2.9424, "step": 29519 }, { "epoch": 1.45, "grad_norm": 0.595038115978241, "learning_rate": 0.0003167346661438305, "loss": 3.0421, "step": 29520 }, { "epoch": 1.45, "grad_norm": 0.6188342571258545, "learning_rate": 0.0003167192939038754, "loss": 3.2593, "step": 29521 }, { "epoch": 1.45, "grad_norm": 0.6148853302001953, "learning_rate": 0.00031670392161988484, "loss": 2.9146, "step": 29522 }, { "epoch": 1.45, "grad_norm": 0.6067038774490356, "learning_rate": 0.00031668854929189927, "loss": 2.9484, "step": 29523 }, { "epoch": 1.45, "grad_norm": 0.5747334361076355, "learning_rate": 0.00031667317691995943, "loss": 3.1063, "step": 29524 }, { "epoch": 1.45, "grad_norm": 0.6633566617965698, "learning_rate": 0.0003166578045041055, "loss": 3.0578, "step": 29525 }, { "epoch": 1.45, "grad_norm": 0.6091784238815308, "learning_rate": 0.0003166424320443782, "loss": 2.8992, "step": 29526 }, { "epoch": 1.45, "grad_norm": 0.5521370768547058, "learning_rate": 0.00031662705954081807, "loss": 3.3263, "step": 29527 }, { "epoch": 1.45, "grad_norm": 0.5992710590362549, "learning_rate": 0.0003166116869934653, "loss": 2.9598, "step": 29528 }, { "epoch": 1.45, "grad_norm": 0.5992154479026794, "learning_rate": 0.0003165963144023606, "loss": 3.0288, "step": 29529 }, { "epoch": 1.45, "grad_norm": 0.563355028629303, "learning_rate": 0.0003165809417675443, "loss": 3.1072, "step": 29530 }, { "epoch": 1.45, "grad_norm": 0.575053334236145, "learning_rate": 0.0003165655690890571, "loss": 3.067, "step": 29531 }, { "epoch": 1.45, "grad_norm": 0.5506818890571594, "learning_rate": 0.00031655019636693933, "loss": 3.2032, "step": 29532 }, { "epoch": 1.45, "grad_norm": 0.5816770195960999, "learning_rate": 0.0003165348236012316, "loss": 3.2077, "step": 29533 }, { "epoch": 1.45, "grad_norm": 0.5859194993972778, "learning_rate": 0.00031651945079197425, "loss": 3.084, "step": 29534 }, { "epoch": 1.45, "grad_norm": 0.6002169847488403, "learning_rate": 0.0003165040779392079, "loss": 3.0688, "step": 29535 }, { "epoch": 1.45, "grad_norm": 0.5905258655548096, "learning_rate": 0.00031648870504297287, "loss": 2.9794, "step": 29536 }, { "epoch": 1.45, "grad_norm": 0.5949984788894653, "learning_rate": 0.0003164733321033099, "loss": 2.9201, "step": 29537 }, { "epoch": 1.45, "grad_norm": 0.6448434591293335, "learning_rate": 0.00031645795912025937, "loss": 2.964, "step": 29538 }, { "epoch": 1.45, "grad_norm": 0.5883697867393494, "learning_rate": 0.0003164425860938616, "loss": 3.0997, "step": 29539 }, { "epoch": 1.45, "grad_norm": 0.5910248160362244, "learning_rate": 0.0003164272130241573, "loss": 3.3129, "step": 29540 }, { "epoch": 1.45, "grad_norm": 0.5907135009765625, "learning_rate": 0.0003164118399111868, "loss": 2.9919, "step": 29541 }, { "epoch": 1.45, "grad_norm": 0.5622631907463074, "learning_rate": 0.0003163964667549908, "loss": 2.835, "step": 29542 }, { "epoch": 1.45, "grad_norm": 0.609990656375885, "learning_rate": 0.00031638109355560957, "loss": 3.2596, "step": 29543 }, { "epoch": 1.45, "grad_norm": 0.5881011486053467, "learning_rate": 0.00031636572031308374, "loss": 3.0279, "step": 29544 }, { "epoch": 1.45, "grad_norm": 0.6357592940330505, "learning_rate": 0.0003163503470274537, "loss": 3.2961, "step": 29545 }, { "epoch": 1.45, "grad_norm": 0.5936046242713928, "learning_rate": 0.00031633497369876006, "loss": 2.9672, "step": 29546 }, { "epoch": 1.45, "grad_norm": 0.6165637969970703, "learning_rate": 0.00031631960032704325, "loss": 2.9565, "step": 29547 }, { "epoch": 1.45, "grad_norm": 0.6325369477272034, "learning_rate": 0.0003163042269123436, "loss": 2.9206, "step": 29548 }, { "epoch": 1.45, "grad_norm": 0.6465588212013245, "learning_rate": 0.000316288853454702, "loss": 2.9257, "step": 29549 }, { "epoch": 1.45, "grad_norm": 0.585541307926178, "learning_rate": 0.00031627347995415856, "loss": 2.8962, "step": 29550 }, { "epoch": 1.45, "grad_norm": 0.5976788997650146, "learning_rate": 0.00031625810641075383, "loss": 2.9734, "step": 29551 }, { "epoch": 1.45, "grad_norm": 0.5807003378868103, "learning_rate": 0.00031624273282452844, "loss": 3.1595, "step": 29552 }, { "epoch": 1.45, "grad_norm": 0.586675763130188, "learning_rate": 0.00031622735919552297, "loss": 2.9426, "step": 29553 }, { "epoch": 1.45, "grad_norm": 0.583739161491394, "learning_rate": 0.0003162119855237776, "loss": 2.9561, "step": 29554 }, { "epoch": 1.45, "grad_norm": 0.5614110231399536, "learning_rate": 0.00031619661180933294, "loss": 2.9349, "step": 29555 }, { "epoch": 1.45, "grad_norm": 0.5753343105316162, "learning_rate": 0.00031618123805222964, "loss": 3.2066, "step": 29556 }, { "epoch": 1.45, "grad_norm": 0.6021926403045654, "learning_rate": 0.00031616586425250797, "loss": 3.0151, "step": 29557 }, { "epoch": 1.45, "grad_norm": 0.6058591604232788, "learning_rate": 0.00031615049041020855, "loss": 3.2783, "step": 29558 }, { "epoch": 1.45, "grad_norm": 0.608304500579834, "learning_rate": 0.0003161351165253719, "loss": 2.7891, "step": 29559 }, { "epoch": 1.45, "grad_norm": 0.5610315203666687, "learning_rate": 0.0003161197425980385, "loss": 3.0305, "step": 29560 }, { "epoch": 1.45, "grad_norm": 0.5982028245925903, "learning_rate": 0.0003161043686282487, "loss": 2.8197, "step": 29561 }, { "epoch": 1.45, "grad_norm": 0.5895063877105713, "learning_rate": 0.00031608899461604313, "loss": 3.0352, "step": 29562 }, { "epoch": 1.45, "grad_norm": 0.6164365410804749, "learning_rate": 0.0003160736205614622, "loss": 3.0017, "step": 29563 }, { "epoch": 1.45, "grad_norm": 0.5825421214103699, "learning_rate": 0.00031605824646454655, "loss": 2.9548, "step": 29564 }, { "epoch": 1.45, "grad_norm": 0.58476722240448, "learning_rate": 0.00031604287232533647, "loss": 3.157, "step": 29565 }, { "epoch": 1.45, "grad_norm": 0.5678737163543701, "learning_rate": 0.00031602749814387253, "loss": 2.9115, "step": 29566 }, { "epoch": 1.45, "grad_norm": 0.5935869812965393, "learning_rate": 0.00031601212392019535, "loss": 2.9762, "step": 29567 }, { "epoch": 1.45, "grad_norm": 0.5967699885368347, "learning_rate": 0.00031599674965434525, "loss": 3.1632, "step": 29568 }, { "epoch": 1.45, "grad_norm": 0.6144185066223145, "learning_rate": 0.00031598137534636283, "loss": 2.9605, "step": 29569 }, { "epoch": 1.45, "grad_norm": 0.5704265236854553, "learning_rate": 0.0003159660009962885, "loss": 3.0765, "step": 29570 }, { "epoch": 1.45, "grad_norm": 0.5627557635307312, "learning_rate": 0.0003159506266041628, "loss": 3.1848, "step": 29571 }, { "epoch": 1.45, "grad_norm": 0.5492954254150391, "learning_rate": 0.0003159352521700262, "loss": 2.9258, "step": 29572 }, { "epoch": 1.45, "grad_norm": 0.5822631120681763, "learning_rate": 0.0003159198776939193, "loss": 2.9043, "step": 29573 }, { "epoch": 1.45, "grad_norm": 0.6288532614707947, "learning_rate": 0.0003159045031758824, "loss": 2.9592, "step": 29574 }, { "epoch": 1.45, "grad_norm": 0.5812193751335144, "learning_rate": 0.00031588912861595623, "loss": 3.0854, "step": 29575 }, { "epoch": 1.45, "grad_norm": 0.6251932382583618, "learning_rate": 0.00031587375401418105, "loss": 2.8665, "step": 29576 }, { "epoch": 1.45, "grad_norm": 0.5608442425727844, "learning_rate": 0.00031585837937059746, "loss": 3.2142, "step": 29577 }, { "epoch": 1.45, "grad_norm": 0.6160196661949158, "learning_rate": 0.000315843004685246, "loss": 3.0009, "step": 29578 }, { "epoch": 1.45, "grad_norm": 0.5881204009056091, "learning_rate": 0.0003158276299581671, "loss": 2.8357, "step": 29579 }, { "epoch": 1.45, "grad_norm": 0.5906282067298889, "learning_rate": 0.00031581225518940124, "loss": 3.1925, "step": 29580 }, { "epoch": 1.45, "grad_norm": 0.5734764337539673, "learning_rate": 0.00031579688037898897, "loss": 3.1714, "step": 29581 }, { "epoch": 1.45, "grad_norm": 0.5693907141685486, "learning_rate": 0.00031578150552697077, "loss": 3.1624, "step": 29582 }, { "epoch": 1.45, "grad_norm": 0.6162329316139221, "learning_rate": 0.00031576613063338707, "loss": 2.9136, "step": 29583 }, { "epoch": 1.45, "grad_norm": 0.6050886511802673, "learning_rate": 0.0003157507556982785, "loss": 2.9093, "step": 29584 }, { "epoch": 1.45, "grad_norm": 0.5759333968162537, "learning_rate": 0.0003157353807216854, "loss": 3.1633, "step": 29585 }, { "epoch": 1.45, "grad_norm": 0.6068954467773438, "learning_rate": 0.0003157200057036484, "loss": 3.0777, "step": 29586 }, { "epoch": 1.45, "grad_norm": 0.5611181259155273, "learning_rate": 0.0003157046306442078, "loss": 3.1995, "step": 29587 }, { "epoch": 1.45, "grad_norm": 0.5561584830284119, "learning_rate": 0.0003156892555434043, "loss": 3.1371, "step": 29588 }, { "epoch": 1.45, "grad_norm": 0.5815036296844482, "learning_rate": 0.0003156738804012784, "loss": 3.0672, "step": 29589 }, { "epoch": 1.45, "grad_norm": 0.5883771777153015, "learning_rate": 0.00031565850521787043, "loss": 3.0313, "step": 29590 }, { "epoch": 1.45, "grad_norm": 0.5724140405654907, "learning_rate": 0.000315643129993221, "loss": 3.099, "step": 29591 }, { "epoch": 1.45, "grad_norm": 0.5555368065834045, "learning_rate": 0.0003156277547273704, "loss": 3.1058, "step": 29592 }, { "epoch": 1.45, "grad_norm": 0.5770111083984375, "learning_rate": 0.0003156123794203596, "loss": 3.0728, "step": 29593 }, { "epoch": 1.45, "grad_norm": 0.6504952311515808, "learning_rate": 0.00031559700407222865, "loss": 3.0687, "step": 29594 }, { "epoch": 1.45, "grad_norm": 0.6245805025100708, "learning_rate": 0.0003155816286830182, "loss": 3.0529, "step": 29595 }, { "epoch": 1.45, "grad_norm": 0.6823046207427979, "learning_rate": 0.0003155662532527687, "loss": 3.0419, "step": 29596 }, { "epoch": 1.45, "grad_norm": 0.5609052181243896, "learning_rate": 0.00031555087778152075, "loss": 2.8952, "step": 29597 }, { "epoch": 1.45, "grad_norm": 0.6359115839004517, "learning_rate": 0.00031553550226931477, "loss": 2.9609, "step": 29598 }, { "epoch": 1.45, "grad_norm": 0.5841073989868164, "learning_rate": 0.0003155201267161912, "loss": 2.9918, "step": 29599 }, { "epoch": 1.45, "grad_norm": 0.5604556202888489, "learning_rate": 0.00031550475112219074, "loss": 2.8732, "step": 29600 }, { "epoch": 1.45, "grad_norm": 0.6146168112754822, "learning_rate": 0.0003154893754873537, "loss": 2.8485, "step": 29601 }, { "epoch": 1.45, "grad_norm": 0.5645619034767151, "learning_rate": 0.0003154739998117205, "loss": 3.0853, "step": 29602 }, { "epoch": 1.45, "grad_norm": 0.6000223159790039, "learning_rate": 0.0003154586240953318, "loss": 3.0441, "step": 29603 }, { "epoch": 1.45, "grad_norm": 0.5900564193725586, "learning_rate": 0.00031544324833822823, "loss": 3.1783, "step": 29604 }, { "epoch": 1.45, "grad_norm": 0.6112537384033203, "learning_rate": 0.00031542787254044995, "loss": 3.0663, "step": 29605 }, { "epoch": 1.45, "grad_norm": 0.6015599966049194, "learning_rate": 0.00031541249670203773, "loss": 2.9529, "step": 29606 }, { "epoch": 1.45, "grad_norm": 0.562324583530426, "learning_rate": 0.00031539712082303184, "loss": 3.2351, "step": 29607 }, { "epoch": 1.45, "grad_norm": 0.6208454370498657, "learning_rate": 0.000315381744903473, "loss": 3.4744, "step": 29608 }, { "epoch": 1.45, "grad_norm": 0.5760998725891113, "learning_rate": 0.0003153663689434016, "loss": 3.0427, "step": 29609 }, { "epoch": 1.45, "grad_norm": 0.5720778107643127, "learning_rate": 0.00031535099294285803, "loss": 2.9921, "step": 29610 }, { "epoch": 1.45, "grad_norm": 0.5598829388618469, "learning_rate": 0.000315335616901883, "loss": 3.1476, "step": 29611 }, { "epoch": 1.45, "grad_norm": 0.5947191119194031, "learning_rate": 0.00031532024082051686, "loss": 3.2268, "step": 29612 }, { "epoch": 1.45, "grad_norm": 0.5800713300704956, "learning_rate": 0.00031530486469880017, "loss": 2.9083, "step": 29613 }, { "epoch": 1.45, "grad_norm": 0.5774427652359009, "learning_rate": 0.0003152894885367734, "loss": 2.8321, "step": 29614 }, { "epoch": 1.45, "grad_norm": 0.5743990540504456, "learning_rate": 0.0003152741123344771, "loss": 3.0104, "step": 29615 }, { "epoch": 1.45, "grad_norm": 0.557272732257843, "learning_rate": 0.0003152587360919517, "loss": 3.0485, "step": 29616 }, { "epoch": 1.45, "grad_norm": 0.5970534682273865, "learning_rate": 0.0003152433598092378, "loss": 3.011, "step": 29617 }, { "epoch": 1.45, "grad_norm": 0.5622311234474182, "learning_rate": 0.0003152279834863757, "loss": 3.1436, "step": 29618 }, { "epoch": 1.45, "grad_norm": 0.5816734433174133, "learning_rate": 0.00031521260712340606, "loss": 3.0806, "step": 29619 }, { "epoch": 1.45, "grad_norm": 0.5890375375747681, "learning_rate": 0.0003151972307203694, "loss": 3.2086, "step": 29620 }, { "epoch": 1.45, "grad_norm": 0.6203916072845459, "learning_rate": 0.00031518185427730606, "loss": 3.126, "step": 29621 }, { "epoch": 1.45, "grad_norm": 0.5634053945541382, "learning_rate": 0.00031516647779425677, "loss": 3.3819, "step": 29622 }, { "epoch": 1.45, "grad_norm": 0.5932179689407349, "learning_rate": 0.0003151511012712617, "loss": 3.0579, "step": 29623 }, { "epoch": 1.45, "grad_norm": 0.5462847948074341, "learning_rate": 0.0003151357247083617, "loss": 3.0788, "step": 29624 }, { "epoch": 1.45, "grad_norm": 0.6403987407684326, "learning_rate": 0.00031512034810559703, "loss": 3.0505, "step": 29625 }, { "epoch": 1.45, "grad_norm": 0.6161810755729675, "learning_rate": 0.0003151049714630084, "loss": 3.0068, "step": 29626 }, { "epoch": 1.45, "grad_norm": 0.5708884596824646, "learning_rate": 0.0003150895947806361, "loss": 2.9304, "step": 29627 }, { "epoch": 1.45, "grad_norm": 0.5830209851264954, "learning_rate": 0.0003150742180585207, "loss": 3.0144, "step": 29628 }, { "epoch": 1.45, "grad_norm": 0.6132103204727173, "learning_rate": 0.0003150588412967028, "loss": 3.1479, "step": 29629 }, { "epoch": 1.45, "grad_norm": 0.5505216717720032, "learning_rate": 0.0003150434644952227, "loss": 3.3665, "step": 29630 }, { "epoch": 1.45, "grad_norm": 0.5873746275901794, "learning_rate": 0.00031502808765412107, "loss": 3.2011, "step": 29631 }, { "epoch": 1.45, "grad_norm": 0.6037224531173706, "learning_rate": 0.0003150127107734383, "loss": 3.2015, "step": 29632 }, { "epoch": 1.45, "grad_norm": 0.6088622212409973, "learning_rate": 0.00031499733385321495, "loss": 3.0153, "step": 29633 }, { "epoch": 1.45, "grad_norm": 0.6723757386207581, "learning_rate": 0.0003149819568934915, "loss": 3.1598, "step": 29634 }, { "epoch": 1.45, "grad_norm": 0.5801018476486206, "learning_rate": 0.0003149665798943085, "loss": 2.9169, "step": 29635 }, { "epoch": 1.45, "grad_norm": 0.5815666913986206, "learning_rate": 0.00031495120285570635, "loss": 3.0191, "step": 29636 }, { "epoch": 1.45, "grad_norm": 0.5963479280471802, "learning_rate": 0.00031493582577772573, "loss": 3.0861, "step": 29637 }, { "epoch": 1.45, "grad_norm": 0.5587741732597351, "learning_rate": 0.0003149204486604069, "loss": 3.1379, "step": 29638 }, { "epoch": 1.45, "grad_norm": 0.574137806892395, "learning_rate": 0.0003149050715037905, "loss": 2.9059, "step": 29639 }, { "epoch": 1.45, "grad_norm": 0.6238990426063538, "learning_rate": 0.0003148896943079171, "loss": 3.1322, "step": 29640 }, { "epoch": 1.45, "grad_norm": 0.6018304824829102, "learning_rate": 0.000314874317072827, "loss": 3.0981, "step": 29641 }, { "epoch": 1.45, "grad_norm": 0.5812610983848572, "learning_rate": 0.0003148589397985609, "loss": 2.9599, "step": 29642 }, { "epoch": 1.45, "grad_norm": 0.6216371655464172, "learning_rate": 0.00031484356248515907, "loss": 3.1933, "step": 29643 }, { "epoch": 1.45, "grad_norm": 0.621842086315155, "learning_rate": 0.0003148281851326623, "loss": 2.7123, "step": 29644 }, { "epoch": 1.45, "grad_norm": 0.5874238014221191, "learning_rate": 0.0003148128077411109, "loss": 3.2495, "step": 29645 }, { "epoch": 1.45, "grad_norm": 0.63470059633255, "learning_rate": 0.0003147974303105454, "loss": 2.9006, "step": 29646 }, { "epoch": 1.45, "grad_norm": 0.5815867781639099, "learning_rate": 0.0003147820528410063, "loss": 3.078, "step": 29647 }, { "epoch": 1.45, "grad_norm": 0.5960510969161987, "learning_rate": 0.00031476667533253416, "loss": 3.0143, "step": 29648 }, { "epoch": 1.45, "grad_norm": 0.5853951573371887, "learning_rate": 0.0003147512977851694, "loss": 3.179, "step": 29649 }, { "epoch": 1.45, "grad_norm": 0.5538525581359863, "learning_rate": 0.00031473592019895253, "loss": 2.9866, "step": 29650 }, { "epoch": 1.45, "grad_norm": 0.6282745599746704, "learning_rate": 0.0003147205425739242, "loss": 3.2866, "step": 29651 }, { "epoch": 1.45, "grad_norm": 0.6330800652503967, "learning_rate": 0.00031470516491012476, "loss": 2.8979, "step": 29652 }, { "epoch": 1.45, "grad_norm": 0.6331319808959961, "learning_rate": 0.00031468978720759473, "loss": 2.9076, "step": 29653 }, { "epoch": 1.45, "grad_norm": 1.587941288948059, "learning_rate": 0.0003146744094663745, "loss": 3.2152, "step": 29654 }, { "epoch": 1.45, "grad_norm": 0.5891446471214294, "learning_rate": 0.0003146590316865049, "loss": 3.0108, "step": 29655 }, { "epoch": 1.45, "grad_norm": 0.5783140063285828, "learning_rate": 0.0003146436538680261, "loss": 2.781, "step": 29656 }, { "epoch": 1.45, "grad_norm": 0.5963090658187866, "learning_rate": 0.00031462827601097873, "loss": 3.212, "step": 29657 }, { "epoch": 1.45, "grad_norm": 0.6018965840339661, "learning_rate": 0.0003146128981154034, "loss": 3.201, "step": 29658 }, { "epoch": 1.45, "grad_norm": 0.5825430750846863, "learning_rate": 0.0003145975201813404, "loss": 3.1323, "step": 29659 }, { "epoch": 1.45, "grad_norm": 0.58023601770401, "learning_rate": 0.0003145821422088304, "loss": 2.8852, "step": 29660 }, { "epoch": 1.45, "grad_norm": 0.6018133759498596, "learning_rate": 0.00031456676419791384, "loss": 3.2422, "step": 29661 }, { "epoch": 1.45, "grad_norm": 0.6052143573760986, "learning_rate": 0.0003145513861486313, "loss": 3.0118, "step": 29662 }, { "epoch": 1.45, "grad_norm": 0.5762540102005005, "learning_rate": 0.0003145360080610231, "loss": 3.1484, "step": 29663 }, { "epoch": 1.45, "grad_norm": 0.5601885914802551, "learning_rate": 0.0003145206299351298, "loss": 3.2171, "step": 29664 }, { "epoch": 1.45, "grad_norm": 0.6563633680343628, "learning_rate": 0.00031450525177099205, "loss": 2.9215, "step": 29665 }, { "epoch": 1.45, "grad_norm": 0.5905854105949402, "learning_rate": 0.0003144898735686503, "loss": 3.0631, "step": 29666 }, { "epoch": 1.45, "grad_norm": 0.5945887565612793, "learning_rate": 0.00031447449532814495, "loss": 2.9145, "step": 29667 }, { "epoch": 1.45, "grad_norm": 0.6269522309303284, "learning_rate": 0.00031445911704951657, "loss": 3.1563, "step": 29668 }, { "epoch": 1.45, "grad_norm": 0.577925980091095, "learning_rate": 0.00031444373873280564, "loss": 2.9717, "step": 29669 }, { "epoch": 1.45, "grad_norm": 0.572482705116272, "learning_rate": 0.00031442836037805276, "loss": 3.1004, "step": 29670 }, { "epoch": 1.45, "grad_norm": 0.6007272005081177, "learning_rate": 0.0003144129819852983, "loss": 3.157, "step": 29671 }, { "epoch": 1.45, "grad_norm": 0.5722142457962036, "learning_rate": 0.00031439760355458284, "loss": 2.966, "step": 29672 }, { "epoch": 1.45, "grad_norm": 0.5537738800048828, "learning_rate": 0.00031438222508594683, "loss": 3.095, "step": 29673 }, { "epoch": 1.45, "grad_norm": 0.5694095492362976, "learning_rate": 0.0003143668465794308, "loss": 3.0124, "step": 29674 }, { "epoch": 1.45, "grad_norm": 0.6016697287559509, "learning_rate": 0.0003143514680350752, "loss": 3.0182, "step": 29675 }, { "epoch": 1.45, "grad_norm": 0.5762494206428528, "learning_rate": 0.0003143360894529207, "loss": 2.9215, "step": 29676 }, { "epoch": 1.45, "grad_norm": 0.5735913515090942, "learning_rate": 0.00031432071083300773, "loss": 3.148, "step": 29677 }, { "epoch": 1.45, "grad_norm": 0.5936062932014465, "learning_rate": 0.0003143053321753767, "loss": 2.9584, "step": 29678 }, { "epoch": 1.45, "grad_norm": 0.5691812634468079, "learning_rate": 0.00031428995348006816, "loss": 3.23, "step": 29679 }, { "epoch": 1.45, "grad_norm": 0.5691925883293152, "learning_rate": 0.00031427457474712274, "loss": 3.1767, "step": 29680 }, { "epoch": 1.45, "grad_norm": 0.6052525043487549, "learning_rate": 0.00031425919597658067, "loss": 2.9384, "step": 29681 }, { "epoch": 1.45, "grad_norm": 0.6288476586341858, "learning_rate": 0.00031424381716848275, "loss": 2.9858, "step": 29682 }, { "epoch": 1.45, "grad_norm": 0.5947161316871643, "learning_rate": 0.0003142284383228693, "loss": 3.2128, "step": 29683 }, { "epoch": 1.45, "grad_norm": 0.7295497059822083, "learning_rate": 0.00031421305943978093, "loss": 3.237, "step": 29684 }, { "epoch": 1.45, "grad_norm": 0.5625701546669006, "learning_rate": 0.000314197680519258, "loss": 2.8254, "step": 29685 }, { "epoch": 1.45, "grad_norm": 0.6221656203269958, "learning_rate": 0.0003141823015613412, "loss": 2.8866, "step": 29686 }, { "epoch": 1.45, "grad_norm": 0.6753302812576294, "learning_rate": 0.0003141669225660709, "loss": 3.0037, "step": 29687 }, { "epoch": 1.45, "grad_norm": 0.579978883266449, "learning_rate": 0.0003141515435334878, "loss": 2.7757, "step": 29688 }, { "epoch": 1.45, "grad_norm": 0.5823621153831482, "learning_rate": 0.000314136164463632, "loss": 3.2779, "step": 29689 }, { "epoch": 1.46, "grad_norm": 0.6142686605453491, "learning_rate": 0.0003141207853565444, "loss": 2.8307, "step": 29690 }, { "epoch": 1.46, "grad_norm": 0.5641099810600281, "learning_rate": 0.0003141054062122654, "loss": 2.9068, "step": 29691 }, { "epoch": 1.46, "grad_norm": 0.6020845770835876, "learning_rate": 0.00031409002703083547, "loss": 3.0636, "step": 29692 }, { "epoch": 1.46, "grad_norm": 0.5882956981658936, "learning_rate": 0.00031407464781229503, "loss": 3.0913, "step": 29693 }, { "epoch": 1.46, "grad_norm": 0.593789279460907, "learning_rate": 0.00031405926855668475, "loss": 3.1935, "step": 29694 }, { "epoch": 1.46, "grad_norm": 0.5638978481292725, "learning_rate": 0.00031404388926404507, "loss": 3.1643, "step": 29695 }, { "epoch": 1.46, "grad_norm": 0.595602810382843, "learning_rate": 0.00031402850993441647, "loss": 3.1038, "step": 29696 }, { "epoch": 1.46, "grad_norm": 0.6187957525253296, "learning_rate": 0.00031401313056783945, "loss": 2.9744, "step": 29697 }, { "epoch": 1.46, "grad_norm": 0.5919390916824341, "learning_rate": 0.0003139977511643546, "loss": 3.2703, "step": 29698 }, { "epoch": 1.46, "grad_norm": 0.566708505153656, "learning_rate": 0.00031398237172400236, "loss": 3.0879, "step": 29699 }, { "epoch": 1.46, "grad_norm": 0.5841004848480225, "learning_rate": 0.0003139669922468231, "loss": 3.036, "step": 29700 }, { "epoch": 1.46, "grad_norm": 0.5895936489105225, "learning_rate": 0.00031395161273285765, "loss": 2.9205, "step": 29701 }, { "epoch": 1.46, "grad_norm": 0.5551169514656067, "learning_rate": 0.00031393623318214635, "loss": 2.9754, "step": 29702 }, { "epoch": 1.46, "grad_norm": 0.5898941159248352, "learning_rate": 0.00031392085359472954, "loss": 2.9792, "step": 29703 }, { "epoch": 1.46, "grad_norm": 0.6206042170524597, "learning_rate": 0.000313905473970648, "loss": 3.1062, "step": 29704 }, { "epoch": 1.46, "grad_norm": 0.6914759278297424, "learning_rate": 0.00031389009430994195, "loss": 3.0206, "step": 29705 }, { "epoch": 1.46, "grad_norm": 0.619304358959198, "learning_rate": 0.00031387471461265227, "loss": 2.9323, "step": 29706 }, { "epoch": 1.46, "grad_norm": 0.573853075504303, "learning_rate": 0.00031385933487881916, "loss": 3.3458, "step": 29707 }, { "epoch": 1.46, "grad_norm": 0.5841895341873169, "learning_rate": 0.0003138439551084833, "loss": 2.78, "step": 29708 }, { "epoch": 1.46, "grad_norm": 0.5773389339447021, "learning_rate": 0.0003138285753016851, "loss": 3.123, "step": 29709 }, { "epoch": 1.46, "grad_norm": 0.5712064504623413, "learning_rate": 0.00031381319545846503, "loss": 3.2139, "step": 29710 }, { "epoch": 1.46, "grad_norm": 0.6082553267478943, "learning_rate": 0.00031379781557886366, "loss": 3.19, "step": 29711 }, { "epoch": 1.46, "grad_norm": 0.6170501112937927, "learning_rate": 0.00031378243566292156, "loss": 3.0305, "step": 29712 }, { "epoch": 1.46, "grad_norm": 0.5564154982566833, "learning_rate": 0.00031376705571067924, "loss": 3.0066, "step": 29713 }, { "epoch": 1.46, "grad_norm": 0.6043330430984497, "learning_rate": 0.000313751675722177, "loss": 3.0141, "step": 29714 }, { "epoch": 1.46, "grad_norm": 0.633105456829071, "learning_rate": 0.00031373629569745546, "loss": 2.8554, "step": 29715 }, { "epoch": 1.46, "grad_norm": 0.5764904022216797, "learning_rate": 0.0003137209156365553, "loss": 2.9793, "step": 29716 }, { "epoch": 1.46, "grad_norm": 0.6007531881332397, "learning_rate": 0.00031370553553951687, "loss": 3.0194, "step": 29717 }, { "epoch": 1.46, "grad_norm": 0.597282350063324, "learning_rate": 0.0003136901554063807, "loss": 3.086, "step": 29718 }, { "epoch": 1.46, "grad_norm": 0.6131802797317505, "learning_rate": 0.0003136747752371873, "loss": 3.1779, "step": 29719 }, { "epoch": 1.46, "grad_norm": 0.5542619824409485, "learning_rate": 0.0003136593950319771, "loss": 3.0502, "step": 29720 }, { "epoch": 1.46, "grad_norm": 0.662255048751831, "learning_rate": 0.00031364401479079074, "loss": 3.0517, "step": 29721 }, { "epoch": 1.46, "grad_norm": 0.5674471259117126, "learning_rate": 0.00031362863451366864, "loss": 2.9217, "step": 29722 }, { "epoch": 1.46, "grad_norm": 0.5355050563812256, "learning_rate": 0.0003136132542006514, "loss": 3.1893, "step": 29723 }, { "epoch": 1.46, "grad_norm": 0.5745908617973328, "learning_rate": 0.00031359787385177944, "loss": 3.2364, "step": 29724 }, { "epoch": 1.46, "grad_norm": 0.5746610760688782, "learning_rate": 0.0003135824934670932, "loss": 3.1486, "step": 29725 }, { "epoch": 1.46, "grad_norm": 0.5605559945106506, "learning_rate": 0.00031356711304663336, "loss": 3.0998, "step": 29726 }, { "epoch": 1.46, "grad_norm": 0.5893903970718384, "learning_rate": 0.0003135517325904403, "loss": 3.0817, "step": 29727 }, { "epoch": 1.46, "grad_norm": 0.583816647529602, "learning_rate": 0.00031353635209855474, "loss": 3.0743, "step": 29728 }, { "epoch": 1.46, "grad_norm": 0.616067111492157, "learning_rate": 0.00031352097157101696, "loss": 3.1714, "step": 29729 }, { "epoch": 1.46, "grad_norm": 0.6139724850654602, "learning_rate": 0.00031350559100786743, "loss": 3.0468, "step": 29730 }, { "epoch": 1.46, "grad_norm": 0.5720040202140808, "learning_rate": 0.0003134902104091469, "loss": 3.2753, "step": 29731 }, { "epoch": 1.46, "grad_norm": 0.6097339391708374, "learning_rate": 0.00031347482977489577, "loss": 3.0575, "step": 29732 }, { "epoch": 1.46, "grad_norm": 0.6279599666595459, "learning_rate": 0.00031345944910515447, "loss": 3.0261, "step": 29733 }, { "epoch": 1.46, "grad_norm": 0.5827269554138184, "learning_rate": 0.00031344406839996355, "loss": 3.2295, "step": 29734 }, { "epoch": 1.46, "grad_norm": 0.5751627683639526, "learning_rate": 0.0003134286876593637, "loss": 2.9174, "step": 29735 }, { "epoch": 1.46, "grad_norm": 0.5884239077568054, "learning_rate": 0.000313413306883395, "loss": 3.0503, "step": 29736 }, { "epoch": 1.46, "grad_norm": 0.5882185697555542, "learning_rate": 0.00031339792607209844, "loss": 2.9154, "step": 29737 }, { "epoch": 1.46, "grad_norm": 0.5342128276824951, "learning_rate": 0.00031338254522551417, "loss": 3.0905, "step": 29738 }, { "epoch": 1.46, "grad_norm": 0.566861093044281, "learning_rate": 0.00031336716434368303, "loss": 3.0281, "step": 29739 }, { "epoch": 1.46, "grad_norm": 0.5697751045227051, "learning_rate": 0.00031335178342664524, "loss": 2.994, "step": 29740 }, { "epoch": 1.46, "grad_norm": 0.6492906212806702, "learning_rate": 0.00031333640247444136, "loss": 3.1775, "step": 29741 }, { "epoch": 1.46, "grad_norm": 0.6085817813873291, "learning_rate": 0.00031332102148711214, "loss": 3.1993, "step": 29742 }, { "epoch": 1.46, "grad_norm": 0.5619418621063232, "learning_rate": 0.0003133056404646978, "loss": 2.982, "step": 29743 }, { "epoch": 1.46, "grad_norm": 0.5778747200965881, "learning_rate": 0.000313290259407239, "loss": 2.8579, "step": 29744 }, { "epoch": 1.46, "grad_norm": 0.597019374370575, "learning_rate": 0.00031327487831477616, "loss": 2.9197, "step": 29745 }, { "epoch": 1.46, "grad_norm": 0.5481294989585876, "learning_rate": 0.00031325949718734986, "loss": 3.0998, "step": 29746 }, { "epoch": 1.46, "grad_norm": 0.5638428926467896, "learning_rate": 0.0003132441160250006, "loss": 3.1415, "step": 29747 }, { "epoch": 1.46, "grad_norm": 0.5789369940757751, "learning_rate": 0.0003132287348277689, "loss": 3.1912, "step": 29748 }, { "epoch": 1.46, "grad_norm": 0.5999630689620972, "learning_rate": 0.00031321335359569527, "loss": 3.0397, "step": 29749 }, { "epoch": 1.46, "grad_norm": 0.5871257185935974, "learning_rate": 0.0003131979723288203, "loss": 2.9909, "step": 29750 }, { "epoch": 1.46, "grad_norm": 0.6134441494941711, "learning_rate": 0.0003131825910271842, "loss": 3.1867, "step": 29751 }, { "epoch": 1.46, "grad_norm": 0.622126579284668, "learning_rate": 0.0003131672096908278, "loss": 3.1849, "step": 29752 }, { "epoch": 1.46, "grad_norm": 0.5536166429519653, "learning_rate": 0.00031315182831979157, "loss": 3.1054, "step": 29753 }, { "epoch": 1.46, "grad_norm": 0.5597190856933594, "learning_rate": 0.0003131364469141159, "loss": 3.1826, "step": 29754 }, { "epoch": 1.46, "grad_norm": 0.5869278311729431, "learning_rate": 0.00031312106547384133, "loss": 3.029, "step": 29755 }, { "epoch": 1.46, "grad_norm": 0.6084804534912109, "learning_rate": 0.0003131056839990084, "loss": 2.6037, "step": 29756 }, { "epoch": 1.46, "grad_norm": 0.5875492691993713, "learning_rate": 0.00031309030248965764, "loss": 3.1481, "step": 29757 }, { "epoch": 1.46, "grad_norm": 0.6422194242477417, "learning_rate": 0.00031307492094582957, "loss": 3.0253, "step": 29758 }, { "epoch": 1.46, "grad_norm": 0.5797473788261414, "learning_rate": 0.0003130595393675646, "loss": 3.0883, "step": 29759 }, { "epoch": 1.46, "grad_norm": 0.6029950976371765, "learning_rate": 0.0003130441577549034, "loss": 2.8537, "step": 29760 }, { "epoch": 1.46, "grad_norm": 0.5672838687896729, "learning_rate": 0.00031302877610788637, "loss": 2.8068, "step": 29761 }, { "epoch": 1.46, "grad_norm": 0.5959682464599609, "learning_rate": 0.00031301339442655403, "loss": 3.1368, "step": 29762 }, { "epoch": 1.46, "grad_norm": 0.6413449645042419, "learning_rate": 0.00031299801271094687, "loss": 2.7994, "step": 29763 }, { "epoch": 1.46, "grad_norm": 0.5850305557250977, "learning_rate": 0.0003129826309611056, "loss": 3.1345, "step": 29764 }, { "epoch": 1.46, "grad_norm": 0.6164899468421936, "learning_rate": 0.0003129672491770705, "loss": 3.048, "step": 29765 }, { "epoch": 1.46, "grad_norm": 0.6072206497192383, "learning_rate": 0.00031295186735888204, "loss": 3.0531, "step": 29766 }, { "epoch": 1.46, "grad_norm": 0.5849358439445496, "learning_rate": 0.000312936485506581, "loss": 3.0533, "step": 29767 }, { "epoch": 1.46, "grad_norm": 0.5805505514144897, "learning_rate": 0.0003129211036202077, "loss": 2.9414, "step": 29768 }, { "epoch": 1.46, "grad_norm": 0.5903421640396118, "learning_rate": 0.0003129057216998027, "loss": 3.0021, "step": 29769 }, { "epoch": 1.46, "grad_norm": 0.5872484445571899, "learning_rate": 0.0003128903397454066, "loss": 2.9317, "step": 29770 }, { "epoch": 1.46, "grad_norm": 0.6200918555259705, "learning_rate": 0.00031287495775705963, "loss": 3.0879, "step": 29771 }, { "epoch": 1.46, "grad_norm": 0.5617815256118774, "learning_rate": 0.00031285957573480263, "loss": 3.0599, "step": 29772 }, { "epoch": 1.46, "grad_norm": 0.5852945446968079, "learning_rate": 0.00031284419367867596, "loss": 2.9581, "step": 29773 }, { "epoch": 1.46, "grad_norm": 0.6024762392044067, "learning_rate": 0.0003128288115887202, "loss": 2.8992, "step": 29774 }, { "epoch": 1.46, "grad_norm": 0.6069262623786926, "learning_rate": 0.00031281342946497584, "loss": 3.1949, "step": 29775 }, { "epoch": 1.46, "grad_norm": 0.6076410412788391, "learning_rate": 0.00031279804730748327, "loss": 2.93, "step": 29776 }, { "epoch": 1.46, "grad_norm": 0.599779486656189, "learning_rate": 0.00031278266511628307, "loss": 3.0456, "step": 29777 }, { "epoch": 1.46, "grad_norm": 0.6234508156776428, "learning_rate": 0.0003127672828914159, "loss": 3.0092, "step": 29778 }, { "epoch": 1.46, "grad_norm": 0.5925348997116089, "learning_rate": 0.00031275190063292214, "loss": 3.2223, "step": 29779 }, { "epoch": 1.46, "grad_norm": 0.6192617416381836, "learning_rate": 0.00031273651834084234, "loss": 2.9969, "step": 29780 }, { "epoch": 1.46, "grad_norm": 0.6267848014831543, "learning_rate": 0.000312721136015217, "loss": 2.8548, "step": 29781 }, { "epoch": 1.46, "grad_norm": 0.5980367064476013, "learning_rate": 0.00031270575365608663, "loss": 2.9697, "step": 29782 }, { "epoch": 1.46, "grad_norm": 0.5981336236000061, "learning_rate": 0.0003126903712634917, "loss": 3.051, "step": 29783 }, { "epoch": 1.46, "grad_norm": 0.6255171298980713, "learning_rate": 0.00031267498883747277, "loss": 3.1955, "step": 29784 }, { "epoch": 1.46, "grad_norm": 0.5721108913421631, "learning_rate": 0.00031265960637807044, "loss": 3.077, "step": 29785 }, { "epoch": 1.46, "grad_norm": 0.5838683843612671, "learning_rate": 0.00031264422388532515, "loss": 3.2894, "step": 29786 }, { "epoch": 1.46, "grad_norm": 0.5979468822479248, "learning_rate": 0.0003126288413592773, "loss": 2.9987, "step": 29787 }, { "epoch": 1.46, "grad_norm": 0.601974368095398, "learning_rate": 0.0003126134587999676, "loss": 2.9889, "step": 29788 }, { "epoch": 1.46, "grad_norm": 0.5563658475875854, "learning_rate": 0.0003125980762074364, "loss": 2.9098, "step": 29789 }, { "epoch": 1.46, "grad_norm": 0.628157913684845, "learning_rate": 0.0003125826935817244, "loss": 3.2493, "step": 29790 }, { "epoch": 1.46, "grad_norm": 0.5461869239807129, "learning_rate": 0.00031256731092287195, "loss": 3.1618, "step": 29791 }, { "epoch": 1.46, "grad_norm": 0.6051981449127197, "learning_rate": 0.0003125519282309196, "loss": 3.0727, "step": 29792 }, { "epoch": 1.46, "grad_norm": 0.5399933457374573, "learning_rate": 0.00031253654550590795, "loss": 2.8477, "step": 29793 }, { "epoch": 1.46, "grad_norm": 0.5897714495658875, "learning_rate": 0.00031252116274787736, "loss": 3.0971, "step": 29794 }, { "epoch": 1.46, "grad_norm": 0.5866346955299377, "learning_rate": 0.00031250577995686854, "loss": 3.0104, "step": 29795 }, { "epoch": 1.46, "grad_norm": 0.6055521368980408, "learning_rate": 0.00031249039713292183, "loss": 3.2093, "step": 29796 }, { "epoch": 1.46, "grad_norm": 0.581646203994751, "learning_rate": 0.0003124750142760778, "loss": 3.2742, "step": 29797 }, { "epoch": 1.46, "grad_norm": 0.5622509717941284, "learning_rate": 0.00031245963138637706, "loss": 3.1615, "step": 29798 }, { "epoch": 1.46, "grad_norm": 0.5802165269851685, "learning_rate": 0.00031244424846386, "loss": 2.7669, "step": 29799 }, { "epoch": 1.46, "grad_norm": 0.578447699546814, "learning_rate": 0.0003124288655085671, "loss": 3.0275, "step": 29800 }, { "epoch": 1.46, "grad_norm": 0.6063286066055298, "learning_rate": 0.00031241348252053914, "loss": 3.052, "step": 29801 }, { "epoch": 1.46, "grad_norm": 0.5866938829421997, "learning_rate": 0.0003123980994998163, "loss": 2.9231, "step": 29802 }, { "epoch": 1.46, "grad_norm": 0.5843689441680908, "learning_rate": 0.00031238271644643933, "loss": 3.0812, "step": 29803 }, { "epoch": 1.46, "grad_norm": 0.6158820986747742, "learning_rate": 0.00031236733336044876, "loss": 2.9661, "step": 29804 }, { "epoch": 1.46, "grad_norm": 0.6195018887519836, "learning_rate": 0.00031235195024188484, "loss": 2.955, "step": 29805 }, { "epoch": 1.46, "grad_norm": 0.5677433013916016, "learning_rate": 0.00031233656709078836, "loss": 3.0007, "step": 29806 }, { "epoch": 1.46, "grad_norm": 0.5664675831794739, "learning_rate": 0.0003123211839071997, "loss": 2.8784, "step": 29807 }, { "epoch": 1.46, "grad_norm": 0.5745792984962463, "learning_rate": 0.00031230580069115945, "loss": 2.9201, "step": 29808 }, { "epoch": 1.46, "grad_norm": 0.5681601762771606, "learning_rate": 0.000312290417442708, "loss": 2.9938, "step": 29809 }, { "epoch": 1.46, "grad_norm": 0.580299437046051, "learning_rate": 0.00031227503416188604, "loss": 2.8414, "step": 29810 }, { "epoch": 1.46, "grad_norm": 0.5805426239967346, "learning_rate": 0.00031225965084873396, "loss": 3.0765, "step": 29811 }, { "epoch": 1.46, "grad_norm": 0.5639891624450684, "learning_rate": 0.00031224426750329245, "loss": 3.0892, "step": 29812 }, { "epoch": 1.46, "grad_norm": 0.5976626873016357, "learning_rate": 0.0003122288841256017, "loss": 3.1597, "step": 29813 }, { "epoch": 1.46, "grad_norm": 0.6398038864135742, "learning_rate": 0.0003122135007157025, "loss": 2.881, "step": 29814 }, { "epoch": 1.46, "grad_norm": 0.5923072695732117, "learning_rate": 0.0003121981172736354, "loss": 3.1807, "step": 29815 }, { "epoch": 1.46, "grad_norm": 0.5970144271850586, "learning_rate": 0.00031218273379944063, "loss": 3.1799, "step": 29816 }, { "epoch": 1.46, "grad_norm": 0.6289509534835815, "learning_rate": 0.00031216735029315903, "loss": 2.7998, "step": 29817 }, { "epoch": 1.46, "grad_norm": 0.609671413898468, "learning_rate": 0.00031215196675483074, "loss": 2.9472, "step": 29818 }, { "epoch": 1.46, "grad_norm": 0.5885813236236572, "learning_rate": 0.00031213658318449676, "loss": 3.249, "step": 29819 }, { "epoch": 1.46, "grad_norm": 0.5870927572250366, "learning_rate": 0.0003121211995821973, "loss": 2.9601, "step": 29820 }, { "epoch": 1.46, "grad_norm": 0.5752508044242859, "learning_rate": 0.00031210581594797284, "loss": 3.1265, "step": 29821 }, { "epoch": 1.46, "grad_norm": 0.5618848204612732, "learning_rate": 0.00031209043228186406, "loss": 2.8466, "step": 29822 }, { "epoch": 1.46, "grad_norm": 0.5932725667953491, "learning_rate": 0.00031207504858391145, "loss": 3.1074, "step": 29823 }, { "epoch": 1.46, "grad_norm": 0.567245602607727, "learning_rate": 0.00031205966485415535, "loss": 2.9608, "step": 29824 }, { "epoch": 1.46, "grad_norm": 0.5531851649284363, "learning_rate": 0.0003120442810926365, "loss": 3.1427, "step": 29825 }, { "epoch": 1.46, "grad_norm": 0.5592467188835144, "learning_rate": 0.00031202889729939543, "loss": 3.1028, "step": 29826 }, { "epoch": 1.46, "grad_norm": 0.6010973453521729, "learning_rate": 0.0003120135134744724, "loss": 3.1718, "step": 29827 }, { "epoch": 1.46, "grad_norm": 0.587733805179596, "learning_rate": 0.0003119981296179081, "loss": 2.8077, "step": 29828 }, { "epoch": 1.46, "grad_norm": 0.5497106313705444, "learning_rate": 0.0003119827457297431, "loss": 3.0282, "step": 29829 }, { "epoch": 1.46, "grad_norm": 0.5502404570579529, "learning_rate": 0.00031196736181001784, "loss": 2.9867, "step": 29830 }, { "epoch": 1.46, "grad_norm": 0.5896203517913818, "learning_rate": 0.0003119519778587729, "loss": 2.9452, "step": 29831 }, { "epoch": 1.46, "grad_norm": 0.5907154679298401, "learning_rate": 0.0003119365938760486, "loss": 2.919, "step": 29832 }, { "epoch": 1.46, "grad_norm": 0.5724136829376221, "learning_rate": 0.00031192120986188573, "loss": 3.052, "step": 29833 }, { "epoch": 1.46, "grad_norm": 0.5927634835243225, "learning_rate": 0.0003119058258163247, "loss": 3.0744, "step": 29834 }, { "epoch": 1.46, "grad_norm": 0.5511701107025146, "learning_rate": 0.00031189044173940596, "loss": 2.9463, "step": 29835 }, { "epoch": 1.46, "grad_norm": 0.6173757314682007, "learning_rate": 0.00031187505763117016, "loss": 2.9852, "step": 29836 }, { "epoch": 1.46, "grad_norm": 0.6480523347854614, "learning_rate": 0.0003118596734916577, "loss": 3.0302, "step": 29837 }, { "epoch": 1.46, "grad_norm": 0.6319320201873779, "learning_rate": 0.00031184428932090906, "loss": 3.1108, "step": 29838 }, { "epoch": 1.46, "grad_norm": 0.5854665040969849, "learning_rate": 0.0003118289051189649, "loss": 3.1069, "step": 29839 }, { "epoch": 1.46, "grad_norm": 0.5656838417053223, "learning_rate": 0.00031181352088586567, "loss": 3.0943, "step": 29840 }, { "epoch": 1.46, "grad_norm": 0.5562570691108704, "learning_rate": 0.000311798136621652, "loss": 2.9549, "step": 29841 }, { "epoch": 1.46, "grad_norm": 0.635686457157135, "learning_rate": 0.0003117827523263642, "loss": 3.1737, "step": 29842 }, { "epoch": 1.46, "grad_norm": 0.5831470489501953, "learning_rate": 0.0003117673680000429, "loss": 2.9611, "step": 29843 }, { "epoch": 1.46, "grad_norm": 0.5976853966712952, "learning_rate": 0.00031175198364272866, "loss": 2.9907, "step": 29844 }, { "epoch": 1.46, "grad_norm": 0.6116719841957092, "learning_rate": 0.000311736599254462, "loss": 2.9301, "step": 29845 }, { "epoch": 1.46, "grad_norm": 0.6009196639060974, "learning_rate": 0.0003117212148352833, "loss": 3.0093, "step": 29846 }, { "epoch": 1.46, "grad_norm": 0.5840451717376709, "learning_rate": 0.00031170583038523324, "loss": 3.1293, "step": 29847 }, { "epoch": 1.46, "grad_norm": 0.5879883766174316, "learning_rate": 0.0003116904459043523, "loss": 2.8642, "step": 29848 }, { "epoch": 1.46, "grad_norm": 0.5799393057823181, "learning_rate": 0.00031167506139268084, "loss": 3.0245, "step": 29849 }, { "epoch": 1.46, "grad_norm": 0.5922977328300476, "learning_rate": 0.0003116596768502596, "loss": 3.0836, "step": 29850 }, { "epoch": 1.46, "grad_norm": 0.5858464241027832, "learning_rate": 0.000311644292277129, "loss": 3.0924, "step": 29851 }, { "epoch": 1.46, "grad_norm": 0.5689262747764587, "learning_rate": 0.00031162890767332966, "loss": 2.9704, "step": 29852 }, { "epoch": 1.46, "grad_norm": 0.6013664603233337, "learning_rate": 0.0003116135230389019, "loss": 2.994, "step": 29853 }, { "epoch": 1.46, "grad_norm": 0.5736306309700012, "learning_rate": 0.00031159813837388636, "loss": 2.8971, "step": 29854 }, { "epoch": 1.46, "grad_norm": 0.6058791279792786, "learning_rate": 0.00031158275367832366, "loss": 3.0407, "step": 29855 }, { "epoch": 1.46, "grad_norm": 0.563359797000885, "learning_rate": 0.00031156736895225417, "loss": 3.3032, "step": 29856 }, { "epoch": 1.46, "grad_norm": 0.6421247720718384, "learning_rate": 0.00031155198419571844, "loss": 3.0148, "step": 29857 }, { "epoch": 1.46, "grad_norm": 0.5899890065193176, "learning_rate": 0.00031153659940875696, "loss": 2.8782, "step": 29858 }, { "epoch": 1.46, "grad_norm": 0.5498698353767395, "learning_rate": 0.00031152121459141033, "loss": 3.1737, "step": 29859 }, { "epoch": 1.46, "grad_norm": 0.564635694026947, "learning_rate": 0.00031150582974371905, "loss": 3.1709, "step": 29860 }, { "epoch": 1.46, "grad_norm": 0.5672764778137207, "learning_rate": 0.0003114904448657236, "loss": 3.0535, "step": 29861 }, { "epoch": 1.46, "grad_norm": 0.6894760727882385, "learning_rate": 0.0003114750599574646, "loss": 3.0999, "step": 29862 }, { "epoch": 1.46, "grad_norm": 0.6003525257110596, "learning_rate": 0.0003114596750189825, "loss": 3.137, "step": 29863 }, { "epoch": 1.46, "grad_norm": 0.5983224511146545, "learning_rate": 0.0003114442900503177, "loss": 3.0206, "step": 29864 }, { "epoch": 1.46, "grad_norm": 0.5564965009689331, "learning_rate": 0.000311428905051511, "loss": 2.9906, "step": 29865 }, { "epoch": 1.46, "grad_norm": 0.5878624320030212, "learning_rate": 0.0003114135200226027, "loss": 2.8516, "step": 29866 }, { "epoch": 1.46, "grad_norm": 0.5519507527351379, "learning_rate": 0.00031139813496363337, "loss": 3.2566, "step": 29867 }, { "epoch": 1.46, "grad_norm": 0.5717609524726868, "learning_rate": 0.0003113827498746435, "loss": 3.0263, "step": 29868 }, { "epoch": 1.46, "grad_norm": 0.5674951076507568, "learning_rate": 0.00031136736475567364, "loss": 3.1298, "step": 29869 }, { "epoch": 1.46, "grad_norm": 0.6507361531257629, "learning_rate": 0.0003113519796067645, "loss": 3.1109, "step": 29870 }, { "epoch": 1.46, "grad_norm": 0.6233397722244263, "learning_rate": 0.0003113365944279563, "loss": 2.8556, "step": 29871 }, { "epoch": 1.46, "grad_norm": 0.5794210433959961, "learning_rate": 0.0003113212092192897, "loss": 2.9724, "step": 29872 }, { "epoch": 1.46, "grad_norm": 0.5545445084571838, "learning_rate": 0.0003113058239808052, "loss": 2.9606, "step": 29873 }, { "epoch": 1.46, "grad_norm": 0.5938491821289062, "learning_rate": 0.00031129043871254344, "loss": 3.1588, "step": 29874 }, { "epoch": 1.46, "grad_norm": 0.5351107716560364, "learning_rate": 0.0003112750534145448, "loss": 3.0698, "step": 29875 }, { "epoch": 1.46, "grad_norm": 0.5751978754997253, "learning_rate": 0.0003112596680868497, "loss": 3.298, "step": 29876 }, { "epoch": 1.46, "grad_norm": 0.6225106120109558, "learning_rate": 0.00031124428272949896, "loss": 3.1264, "step": 29877 }, { "epoch": 1.46, "grad_norm": 0.5818681716918945, "learning_rate": 0.00031122889734253294, "loss": 3.1766, "step": 29878 }, { "epoch": 1.46, "grad_norm": 0.5759578943252563, "learning_rate": 0.000311213511925992, "loss": 3.0989, "step": 29879 }, { "epoch": 1.46, "grad_norm": 0.5782442688941956, "learning_rate": 0.0003111981264799169, "loss": 3.1062, "step": 29880 }, { "epoch": 1.46, "grad_norm": 0.606756865978241, "learning_rate": 0.00031118274100434823, "loss": 2.9649, "step": 29881 }, { "epoch": 1.46, "grad_norm": 0.5722266435623169, "learning_rate": 0.0003111673554993263, "loss": 2.9258, "step": 29882 }, { "epoch": 1.46, "grad_norm": 0.5965888500213623, "learning_rate": 0.0003111519699648916, "loss": 2.7842, "step": 29883 }, { "epoch": 1.46, "grad_norm": 0.5483811497688293, "learning_rate": 0.0003111365844010849, "loss": 2.9596, "step": 29884 }, { "epoch": 1.46, "grad_norm": 0.5529528856277466, "learning_rate": 0.00031112119880794654, "loss": 3.1932, "step": 29885 }, { "epoch": 1.46, "grad_norm": 0.624657392501831, "learning_rate": 0.000311105813185517, "loss": 2.9514, "step": 29886 }, { "epoch": 1.46, "grad_norm": 0.5750808119773865, "learning_rate": 0.0003110904275338369, "loss": 3.1854, "step": 29887 }, { "epoch": 1.46, "grad_norm": 0.5993642807006836, "learning_rate": 0.0003110750418529469, "loss": 2.9088, "step": 29888 }, { "epoch": 1.46, "grad_norm": 0.6091515421867371, "learning_rate": 0.00031105965614288723, "loss": 3.1339, "step": 29889 }, { "epoch": 1.46, "grad_norm": 0.6110482811927795, "learning_rate": 0.0003110442704036985, "loss": 3.286, "step": 29890 }, { "epoch": 1.46, "grad_norm": 0.6325213313102722, "learning_rate": 0.00031102888463542137, "loss": 3.0859, "step": 29891 }, { "epoch": 1.46, "grad_norm": 0.586706280708313, "learning_rate": 0.0003110134988380964, "loss": 2.8581, "step": 29892 }, { "epoch": 1.46, "grad_norm": 0.5791521668434143, "learning_rate": 0.0003109981130117639, "loss": 3.0277, "step": 29893 }, { "epoch": 1.47, "grad_norm": 0.6084620952606201, "learning_rate": 0.0003109827271564644, "loss": 3.1767, "step": 29894 }, { "epoch": 1.47, "grad_norm": 0.6085389852523804, "learning_rate": 0.0003109673412722386, "loss": 3.0384, "step": 29895 }, { "epoch": 1.47, "grad_norm": 0.6390563249588013, "learning_rate": 0.0003109519553591269, "loss": 2.8968, "step": 29896 }, { "epoch": 1.47, "grad_norm": 0.5879514813423157, "learning_rate": 0.0003109365694171699, "loss": 3.0246, "step": 29897 }, { "epoch": 1.47, "grad_norm": 0.6184220314025879, "learning_rate": 0.000310921183446408, "loss": 2.9875, "step": 29898 }, { "epoch": 1.47, "grad_norm": 0.629254162311554, "learning_rate": 0.00031090579744688196, "loss": 3.1362, "step": 29899 }, { "epoch": 1.47, "grad_norm": 0.6382981538772583, "learning_rate": 0.00031089041141863194, "loss": 3.0567, "step": 29900 }, { "epoch": 1.47, "grad_norm": 0.5889188647270203, "learning_rate": 0.00031087502536169875, "loss": 3.0674, "step": 29901 }, { "epoch": 1.47, "grad_norm": 0.6274064183235168, "learning_rate": 0.0003108596392761229, "loss": 3.0911, "step": 29902 }, { "epoch": 1.47, "grad_norm": 0.6702287197113037, "learning_rate": 0.0003108442531619448, "loss": 2.8349, "step": 29903 }, { "epoch": 1.47, "grad_norm": 0.5988737940788269, "learning_rate": 0.00031082886701920505, "loss": 3.0651, "step": 29904 }, { "epoch": 1.47, "grad_norm": 0.6140710115432739, "learning_rate": 0.00031081348084794404, "loss": 2.8459, "step": 29905 }, { "epoch": 1.47, "grad_norm": 0.5804564356803894, "learning_rate": 0.00031079809464820257, "loss": 3.0062, "step": 29906 }, { "epoch": 1.47, "grad_norm": 0.5837451219558716, "learning_rate": 0.0003107827084200209, "loss": 3.1248, "step": 29907 }, { "epoch": 1.47, "grad_norm": 0.6358888149261475, "learning_rate": 0.0003107673221634397, "loss": 3.0773, "step": 29908 }, { "epoch": 1.47, "grad_norm": 0.6011119484901428, "learning_rate": 0.00031075193587849943, "loss": 3.0292, "step": 29909 }, { "epoch": 1.47, "grad_norm": 0.6402463316917419, "learning_rate": 0.00031073654956524063, "loss": 2.7842, "step": 29910 }, { "epoch": 1.47, "grad_norm": 0.5692670345306396, "learning_rate": 0.0003107211632237037, "loss": 3.114, "step": 29911 }, { "epoch": 1.47, "grad_norm": 0.5846226215362549, "learning_rate": 0.0003107057768539295, "loss": 3.1666, "step": 29912 }, { "epoch": 1.47, "grad_norm": 0.5675181150436401, "learning_rate": 0.0003106903904559582, "loss": 2.7647, "step": 29913 }, { "epoch": 1.47, "grad_norm": 0.6056368350982666, "learning_rate": 0.00031067500402983056, "loss": 3.0578, "step": 29914 }, { "epoch": 1.47, "grad_norm": 0.5673414468765259, "learning_rate": 0.00031065961757558694, "loss": 2.9745, "step": 29915 }, { "epoch": 1.47, "grad_norm": 0.6073873043060303, "learning_rate": 0.00031064423109326795, "loss": 3.1665, "step": 29916 }, { "epoch": 1.47, "grad_norm": 0.5638627409934998, "learning_rate": 0.0003106288445829142, "loss": 3.1064, "step": 29917 }, { "epoch": 1.47, "grad_norm": 0.5839649438858032, "learning_rate": 0.00031061345804456604, "loss": 2.7305, "step": 29918 }, { "epoch": 1.47, "grad_norm": 0.5768024325370789, "learning_rate": 0.00031059807147826405, "loss": 2.9832, "step": 29919 }, { "epoch": 1.47, "grad_norm": 0.6130768656730652, "learning_rate": 0.0003105826848840488, "loss": 2.9809, "step": 29920 }, { "epoch": 1.47, "grad_norm": 0.5641641616821289, "learning_rate": 0.0003105672982619608, "loss": 2.9487, "step": 29921 }, { "epoch": 1.47, "grad_norm": 0.5932754278182983, "learning_rate": 0.0003105519116120405, "loss": 3.1855, "step": 29922 }, { "epoch": 1.47, "grad_norm": 0.6482731699943542, "learning_rate": 0.00031053652493432857, "loss": 3.0039, "step": 29923 }, { "epoch": 1.47, "grad_norm": 0.586216926574707, "learning_rate": 0.0003105211382288655, "loss": 3.0655, "step": 29924 }, { "epoch": 1.47, "grad_norm": 0.5854665637016296, "learning_rate": 0.0003105057514956917, "loss": 3.1271, "step": 29925 }, { "epoch": 1.47, "grad_norm": 0.592591404914856, "learning_rate": 0.0003104903647348478, "loss": 2.8566, "step": 29926 }, { "epoch": 1.47, "grad_norm": 0.5938833951950073, "learning_rate": 0.0003104749779463743, "loss": 2.9205, "step": 29927 }, { "epoch": 1.47, "grad_norm": 0.5724154114723206, "learning_rate": 0.00031045959113031187, "loss": 2.8484, "step": 29928 }, { "epoch": 1.47, "grad_norm": 0.5788401961326599, "learning_rate": 0.00031044420428670067, "loss": 3.0497, "step": 29929 }, { "epoch": 1.47, "grad_norm": 0.5472137928009033, "learning_rate": 0.0003104288174155816, "loss": 3.1811, "step": 29930 }, { "epoch": 1.47, "grad_norm": 0.6054922938346863, "learning_rate": 0.0003104134305169948, "loss": 3.059, "step": 29931 }, { "epoch": 1.47, "grad_norm": 0.6112335920333862, "learning_rate": 0.0003103980435909812, "loss": 2.9871, "step": 29932 }, { "epoch": 1.47, "grad_norm": 0.5911417007446289, "learning_rate": 0.0003103826566375812, "loss": 3.0777, "step": 29933 }, { "epoch": 1.47, "grad_norm": 0.724625825881958, "learning_rate": 0.0003103672696568352, "loss": 3.1604, "step": 29934 }, { "epoch": 1.47, "grad_norm": 0.5843529105186462, "learning_rate": 0.0003103518826487838, "loss": 2.8306, "step": 29935 }, { "epoch": 1.47, "grad_norm": 0.5531800389289856, "learning_rate": 0.0003103364956134676, "loss": 2.9553, "step": 29936 }, { "epoch": 1.47, "grad_norm": 0.6143542528152466, "learning_rate": 0.00031032110855092705, "loss": 2.8877, "step": 29937 }, { "epoch": 1.47, "grad_norm": 0.5682981014251709, "learning_rate": 0.0003103057214612026, "loss": 3.1894, "step": 29938 }, { "epoch": 1.47, "grad_norm": 0.6502469182014465, "learning_rate": 0.000310290334344335, "loss": 3.2924, "step": 29939 }, { "epoch": 1.47, "grad_norm": 0.5962908267974854, "learning_rate": 0.00031027494720036456, "loss": 2.8696, "step": 29940 }, { "epoch": 1.47, "grad_norm": 0.6846643090248108, "learning_rate": 0.0003102595600293318, "loss": 2.8676, "step": 29941 }, { "epoch": 1.47, "grad_norm": 0.5837406516075134, "learning_rate": 0.00031024417283127744, "loss": 3.0501, "step": 29942 }, { "epoch": 1.47, "grad_norm": 0.6407919526100159, "learning_rate": 0.0003102287856062419, "loss": 3.2333, "step": 29943 }, { "epoch": 1.47, "grad_norm": 0.615726888179779, "learning_rate": 0.0003102133983542657, "loss": 3.1496, "step": 29944 }, { "epoch": 1.47, "grad_norm": 0.5835103392601013, "learning_rate": 0.00031019801107538933, "loss": 3.0538, "step": 29945 }, { "epoch": 1.47, "grad_norm": 0.5882119536399841, "learning_rate": 0.0003101826237696534, "loss": 2.8205, "step": 29946 }, { "epoch": 1.47, "grad_norm": 0.5969010591506958, "learning_rate": 0.0003101672364370984, "loss": 2.8979, "step": 29947 }, { "epoch": 1.47, "grad_norm": 0.561356782913208, "learning_rate": 0.00031015184907776485, "loss": 2.9959, "step": 29948 }, { "epoch": 1.47, "grad_norm": 0.5852020978927612, "learning_rate": 0.00031013646169169326, "loss": 3.0055, "step": 29949 }, { "epoch": 1.47, "grad_norm": 0.613553524017334, "learning_rate": 0.0003101210742789242, "loss": 2.9737, "step": 29950 }, { "epoch": 1.47, "grad_norm": 0.5886765122413635, "learning_rate": 0.00031010568683949817, "loss": 2.8426, "step": 29951 }, { "epoch": 1.47, "grad_norm": 0.6295921206474304, "learning_rate": 0.00031009029937345567, "loss": 3.2008, "step": 29952 }, { "epoch": 1.47, "grad_norm": 0.5718304514884949, "learning_rate": 0.0003100749118808373, "loss": 3.2872, "step": 29953 }, { "epoch": 1.47, "grad_norm": 0.5818148851394653, "learning_rate": 0.0003100595243616836, "loss": 3.315, "step": 29954 }, { "epoch": 1.47, "grad_norm": 0.5743296146392822, "learning_rate": 0.00031004413681603505, "loss": 3.1359, "step": 29955 }, { "epoch": 1.47, "grad_norm": 0.6252809166908264, "learning_rate": 0.00031002874924393204, "loss": 2.96, "step": 29956 }, { "epoch": 1.47, "grad_norm": 0.5608223676681519, "learning_rate": 0.00031001336164541537, "loss": 3.1832, "step": 29957 }, { "epoch": 1.47, "grad_norm": 0.5842495560646057, "learning_rate": 0.00030999797402052543, "loss": 3.2502, "step": 29958 }, { "epoch": 1.47, "grad_norm": 0.569668710231781, "learning_rate": 0.00030998258636930266, "loss": 3.2578, "step": 29959 }, { "epoch": 1.47, "grad_norm": 0.5936859250068665, "learning_rate": 0.00030996719869178776, "loss": 3.2201, "step": 29960 }, { "epoch": 1.47, "grad_norm": 0.5749362111091614, "learning_rate": 0.00030995181098802114, "loss": 3.1107, "step": 29961 }, { "epoch": 1.47, "grad_norm": 0.6007623076438904, "learning_rate": 0.00030993642325804333, "loss": 3.0569, "step": 29962 }, { "epoch": 1.47, "grad_norm": 0.6313218474388123, "learning_rate": 0.000309921035501895, "loss": 3.0859, "step": 29963 }, { "epoch": 1.47, "grad_norm": 0.6418139934539795, "learning_rate": 0.00030990564771961646, "loss": 3.1744, "step": 29964 }, { "epoch": 1.47, "grad_norm": 0.5875469446182251, "learning_rate": 0.00030989025991124845, "loss": 2.8325, "step": 29965 }, { "epoch": 1.47, "grad_norm": 0.5943783521652222, "learning_rate": 0.00030987487207683134, "loss": 3.1794, "step": 29966 }, { "epoch": 1.47, "grad_norm": 0.5599361658096313, "learning_rate": 0.00030985948421640564, "loss": 3.1811, "step": 29967 }, { "epoch": 1.47, "grad_norm": 0.605446994304657, "learning_rate": 0.00030984409633001215, "loss": 3.1107, "step": 29968 }, { "epoch": 1.47, "grad_norm": 0.609178900718689, "learning_rate": 0.00030982870841769105, "loss": 2.9717, "step": 29969 }, { "epoch": 1.47, "grad_norm": 0.5545887351036072, "learning_rate": 0.00030981332047948305, "loss": 2.9905, "step": 29970 }, { "epoch": 1.47, "grad_norm": 0.5839653611183167, "learning_rate": 0.0003097979325154287, "loss": 2.9023, "step": 29971 }, { "epoch": 1.47, "grad_norm": 0.5799376964569092, "learning_rate": 0.0003097825445255684, "loss": 3.0182, "step": 29972 }, { "epoch": 1.47, "grad_norm": 0.560981273651123, "learning_rate": 0.00030976715650994283, "loss": 3.1762, "step": 29973 }, { "epoch": 1.47, "grad_norm": 0.6042661666870117, "learning_rate": 0.00030975176846859245, "loss": 3.1572, "step": 29974 }, { "epoch": 1.47, "grad_norm": 0.6044111847877502, "learning_rate": 0.0003097363804015577, "loss": 3.0491, "step": 29975 }, { "epoch": 1.47, "grad_norm": 0.5796766877174377, "learning_rate": 0.00030972099230887936, "loss": 3.0295, "step": 29976 }, { "epoch": 1.47, "grad_norm": 0.7335373163223267, "learning_rate": 0.0003097056041905977, "loss": 2.8727, "step": 29977 }, { "epoch": 1.47, "grad_norm": 0.6482856273651123, "learning_rate": 0.00030969021604675335, "loss": 3.0558, "step": 29978 }, { "epoch": 1.47, "grad_norm": 0.5912559628486633, "learning_rate": 0.00030967482787738683, "loss": 2.9217, "step": 29979 }, { "epoch": 1.47, "grad_norm": 0.577346920967102, "learning_rate": 0.0003096594396825387, "loss": 3.1919, "step": 29980 }, { "epoch": 1.47, "grad_norm": 0.586104154586792, "learning_rate": 0.0003096440514622495, "loss": 3.0131, "step": 29981 }, { "epoch": 1.47, "grad_norm": 0.5916348695755005, "learning_rate": 0.0003096286632165596, "loss": 3.0654, "step": 29982 }, { "epoch": 1.47, "grad_norm": 0.6343048810958862, "learning_rate": 0.00030961327494550975, "loss": 2.8386, "step": 29983 }, { "epoch": 1.47, "grad_norm": 0.60175621509552, "learning_rate": 0.00030959788664914034, "loss": 2.8354, "step": 29984 }, { "epoch": 1.47, "grad_norm": 0.5782569050788879, "learning_rate": 0.00030958249832749193, "loss": 3.1649, "step": 29985 }, { "epoch": 1.47, "grad_norm": 0.5811314582824707, "learning_rate": 0.00030956710998060515, "loss": 2.8642, "step": 29986 }, { "epoch": 1.47, "grad_norm": 0.6273580193519592, "learning_rate": 0.0003095517216085203, "loss": 3.082, "step": 29987 }, { "epoch": 1.47, "grad_norm": 0.595939576625824, "learning_rate": 0.0003095363332112782, "loss": 3.0947, "step": 29988 }, { "epoch": 1.47, "grad_norm": 0.6219014525413513, "learning_rate": 0.00030952094478891917, "loss": 2.9882, "step": 29989 }, { "epoch": 1.47, "grad_norm": 0.6124245524406433, "learning_rate": 0.0003095055563414839, "loss": 2.9679, "step": 29990 }, { "epoch": 1.47, "grad_norm": 0.5397445559501648, "learning_rate": 0.0003094901678690127, "loss": 3.1045, "step": 29991 }, { "epoch": 1.47, "grad_norm": 0.6168830990791321, "learning_rate": 0.0003094747793715462, "loss": 3.0952, "step": 29992 }, { "epoch": 1.47, "grad_norm": 0.6066709756851196, "learning_rate": 0.000309459390849125, "loss": 2.8169, "step": 29993 }, { "epoch": 1.47, "grad_norm": 0.6061710119247437, "learning_rate": 0.00030944400230178965, "loss": 3.1075, "step": 29994 }, { "epoch": 1.47, "grad_norm": 0.5859954953193665, "learning_rate": 0.00030942861372958055, "loss": 3.0391, "step": 29995 }, { "epoch": 1.47, "grad_norm": 0.6048535704612732, "learning_rate": 0.0003094132251325383, "loss": 3.171, "step": 29996 }, { "epoch": 1.47, "grad_norm": 0.6408707499504089, "learning_rate": 0.00030939783651070347, "loss": 3.0741, "step": 29997 }, { "epoch": 1.47, "grad_norm": 0.5734691023826599, "learning_rate": 0.0003093824478641165, "loss": 2.9984, "step": 29998 }, { "epoch": 1.47, "grad_norm": 0.5938532948493958, "learning_rate": 0.00030936705919281797, "loss": 2.9143, "step": 29999 }, { "epoch": 1.47, "grad_norm": 0.5862475037574768, "learning_rate": 0.00030935167049684845, "loss": 2.9718, "step": 30000 }, { "epoch": 1.47, "grad_norm": 0.5853474140167236, "learning_rate": 0.00030933628177624845, "loss": 2.987, "step": 30001 }, { "epoch": 1.47, "grad_norm": 0.5849444270133972, "learning_rate": 0.00030932089303105836, "loss": 3.0514, "step": 30002 }, { "epoch": 1.47, "grad_norm": 0.5930509567260742, "learning_rate": 0.0003093055042613189, "loss": 3.1043, "step": 30003 }, { "epoch": 1.47, "grad_norm": 0.5608189702033997, "learning_rate": 0.0003092901154670705, "loss": 3.0665, "step": 30004 }, { "epoch": 1.47, "grad_norm": 0.5968251824378967, "learning_rate": 0.00030927472664835385, "loss": 2.9385, "step": 30005 }, { "epoch": 1.47, "grad_norm": 0.6607335805892944, "learning_rate": 0.0003092593378052093, "loss": 2.8233, "step": 30006 }, { "epoch": 1.47, "grad_norm": 0.5770511031150818, "learning_rate": 0.00030924394893767736, "loss": 3.1056, "step": 30007 }, { "epoch": 1.47, "grad_norm": 0.671971321105957, "learning_rate": 0.0003092285600457987, "loss": 2.833, "step": 30008 }, { "epoch": 1.47, "grad_norm": 0.5936998724937439, "learning_rate": 0.0003092131711296137, "loss": 3.1278, "step": 30009 }, { "epoch": 1.47, "grad_norm": 0.6061517596244812, "learning_rate": 0.00030919778218916304, "loss": 3.2403, "step": 30010 }, { "epoch": 1.47, "grad_norm": 0.6386778354644775, "learning_rate": 0.00030918239322448723, "loss": 3.0888, "step": 30011 }, { "epoch": 1.47, "grad_norm": 0.5995616316795349, "learning_rate": 0.00030916700423562677, "loss": 2.953, "step": 30012 }, { "epoch": 1.47, "grad_norm": 0.6046136021614075, "learning_rate": 0.0003091516152226221, "loss": 3.1343, "step": 30013 }, { "epoch": 1.47, "grad_norm": 0.6061043739318848, "learning_rate": 0.00030913622618551384, "loss": 3.0659, "step": 30014 }, { "epoch": 1.47, "grad_norm": 0.6259770393371582, "learning_rate": 0.00030912083712434263, "loss": 3.0636, "step": 30015 }, { "epoch": 1.47, "grad_norm": 0.5781980156898499, "learning_rate": 0.00030910544803914886, "loss": 3.0077, "step": 30016 }, { "epoch": 1.47, "grad_norm": 0.6106329560279846, "learning_rate": 0.0003090900589299731, "loss": 2.8722, "step": 30017 }, { "epoch": 1.47, "grad_norm": 0.6144434213638306, "learning_rate": 0.00030907466979685573, "loss": 2.9317, "step": 30018 }, { "epoch": 1.47, "grad_norm": 0.5561577081680298, "learning_rate": 0.0003090592806398376, "loss": 3.058, "step": 30019 }, { "epoch": 1.47, "grad_norm": 0.5754068493843079, "learning_rate": 0.00030904389145895895, "loss": 2.7952, "step": 30020 }, { "epoch": 1.47, "grad_norm": 0.566835343837738, "learning_rate": 0.00030902850225426046, "loss": 3.3087, "step": 30021 }, { "epoch": 1.47, "grad_norm": 0.6028580069541931, "learning_rate": 0.00030901311302578265, "loss": 2.8337, "step": 30022 }, { "epoch": 1.47, "grad_norm": 0.6136353611946106, "learning_rate": 0.000308997723773566, "loss": 2.9233, "step": 30023 }, { "epoch": 1.47, "grad_norm": 0.5877104997634888, "learning_rate": 0.00030898233449765117, "loss": 3.2272, "step": 30024 }, { "epoch": 1.47, "grad_norm": 0.5864350199699402, "learning_rate": 0.0003089669451980785, "loss": 3.0262, "step": 30025 }, { "epoch": 1.47, "grad_norm": 0.5969464182853699, "learning_rate": 0.00030895155587488867, "loss": 3.0745, "step": 30026 }, { "epoch": 1.47, "grad_norm": 0.6056321859359741, "learning_rate": 0.00030893616652812224, "loss": 3.0233, "step": 30027 }, { "epoch": 1.47, "grad_norm": 0.5771879553794861, "learning_rate": 0.0003089207771578195, "loss": 3.1142, "step": 30028 }, { "epoch": 1.47, "grad_norm": 0.5768468976020813, "learning_rate": 0.00030890538776402125, "loss": 3.2566, "step": 30029 }, { "epoch": 1.47, "grad_norm": 0.6096814870834351, "learning_rate": 0.0003088899983467679, "loss": 3.0353, "step": 30030 }, { "epoch": 1.47, "grad_norm": 0.5729770660400391, "learning_rate": 0.0003088746089061, "loss": 3.0693, "step": 30031 }, { "epoch": 1.47, "grad_norm": 0.6186831593513489, "learning_rate": 0.0003088592194420581, "loss": 3.2713, "step": 30032 }, { "epoch": 1.47, "grad_norm": 0.5830119848251343, "learning_rate": 0.00030884382995468267, "loss": 3.2221, "step": 30033 }, { "epoch": 1.47, "grad_norm": 0.5844249129295349, "learning_rate": 0.00030882844044401446, "loss": 2.9112, "step": 30034 }, { "epoch": 1.47, "grad_norm": 0.6245279312133789, "learning_rate": 0.0003088130509100937, "loss": 3.1312, "step": 30035 }, { "epoch": 1.47, "grad_norm": 0.6162157654762268, "learning_rate": 0.00030879766135296103, "loss": 3.0903, "step": 30036 }, { "epoch": 1.47, "grad_norm": 0.6323968172073364, "learning_rate": 0.00030878227177265707, "loss": 3.0378, "step": 30037 }, { "epoch": 1.47, "grad_norm": 0.5921278595924377, "learning_rate": 0.0003087668821692223, "loss": 3.1487, "step": 30038 }, { "epoch": 1.47, "grad_norm": 0.5776242613792419, "learning_rate": 0.00030875149254269723, "loss": 2.8284, "step": 30039 }, { "epoch": 1.47, "grad_norm": 0.5871045589447021, "learning_rate": 0.0003087361028931224, "loss": 3.0864, "step": 30040 }, { "epoch": 1.47, "grad_norm": 0.6193739175796509, "learning_rate": 0.0003087207132205384, "loss": 3.0912, "step": 30041 }, { "epoch": 1.47, "grad_norm": 0.5853241086006165, "learning_rate": 0.00030870532352498577, "loss": 3.095, "step": 30042 }, { "epoch": 1.47, "grad_norm": 0.5713692903518677, "learning_rate": 0.00030868993380650495, "loss": 2.9933, "step": 30043 }, { "epoch": 1.47, "grad_norm": 0.589024543762207, "learning_rate": 0.00030867454406513636, "loss": 3.1075, "step": 30044 }, { "epoch": 1.47, "grad_norm": 0.638248085975647, "learning_rate": 0.0003086591543009209, "loss": 3.1879, "step": 30045 }, { "epoch": 1.47, "grad_norm": 0.5836499333381653, "learning_rate": 0.00030864376451389885, "loss": 3.2182, "step": 30046 }, { "epoch": 1.47, "grad_norm": 0.6031222343444824, "learning_rate": 0.0003086283747041107, "loss": 3.1655, "step": 30047 }, { "epoch": 1.47, "grad_norm": 0.5775892734527588, "learning_rate": 0.00030861298487159714, "loss": 3.1287, "step": 30048 }, { "epoch": 1.47, "grad_norm": 0.5872842669487, "learning_rate": 0.00030859759501639865, "loss": 2.8683, "step": 30049 }, { "epoch": 1.47, "grad_norm": 0.6183463931083679, "learning_rate": 0.00030858220513855567, "loss": 3.0174, "step": 30050 }, { "epoch": 1.47, "grad_norm": 0.56663578748703, "learning_rate": 0.0003085668152381088, "loss": 3.0062, "step": 30051 }, { "epoch": 1.47, "grad_norm": 0.610784649848938, "learning_rate": 0.0003085514253150988, "loss": 3.0837, "step": 30052 }, { "epoch": 1.47, "grad_norm": 0.5991683602333069, "learning_rate": 0.0003085360353695658, "loss": 3.1117, "step": 30053 }, { "epoch": 1.47, "grad_norm": 0.5896287560462952, "learning_rate": 0.0003085206454015505, "loss": 3.2123, "step": 30054 }, { "epoch": 1.47, "grad_norm": 0.5315856337547302, "learning_rate": 0.00030850525541109347, "loss": 3.1274, "step": 30055 }, { "epoch": 1.47, "grad_norm": 0.600836992263794, "learning_rate": 0.0003084898653982354, "loss": 3.0838, "step": 30056 }, { "epoch": 1.47, "grad_norm": 0.6084719300270081, "learning_rate": 0.00030847447536301657, "loss": 2.7858, "step": 30057 }, { "epoch": 1.47, "grad_norm": 0.597867488861084, "learning_rate": 0.0003084590853054775, "loss": 3.0085, "step": 30058 }, { "epoch": 1.47, "grad_norm": 0.6317042112350464, "learning_rate": 0.000308443695225659, "loss": 2.9331, "step": 30059 }, { "epoch": 1.47, "grad_norm": 0.6137262582778931, "learning_rate": 0.00030842830512360127, "loss": 3.237, "step": 30060 }, { "epoch": 1.47, "grad_norm": 0.6111647486686707, "learning_rate": 0.0003084129149993451, "loss": 3.0709, "step": 30061 }, { "epoch": 1.47, "grad_norm": 0.6254605650901794, "learning_rate": 0.0003083975248529309, "loss": 3.1905, "step": 30062 }, { "epoch": 1.47, "grad_norm": 0.5825523138046265, "learning_rate": 0.00030838213468439927, "loss": 3.2801, "step": 30063 }, { "epoch": 1.47, "grad_norm": 0.6181331276893616, "learning_rate": 0.00030836674449379064, "loss": 2.8734, "step": 30064 }, { "epoch": 1.47, "grad_norm": 0.6186519861221313, "learning_rate": 0.0003083513542811456, "loss": 3.1002, "step": 30065 }, { "epoch": 1.47, "grad_norm": 0.5745614767074585, "learning_rate": 0.00030833596404650476, "loss": 3.0784, "step": 30066 }, { "epoch": 1.47, "grad_norm": 0.5756679773330688, "learning_rate": 0.0003083205737899087, "loss": 2.988, "step": 30067 }, { "epoch": 1.47, "grad_norm": 0.6421597003936768, "learning_rate": 0.0003083051835113977, "loss": 3.2194, "step": 30068 }, { "epoch": 1.47, "grad_norm": 0.610306978225708, "learning_rate": 0.00030828979321101245, "loss": 3.0658, "step": 30069 }, { "epoch": 1.47, "grad_norm": 0.5943095088005066, "learning_rate": 0.0003082744028887936, "loss": 3.0469, "step": 30070 }, { "epoch": 1.47, "grad_norm": 0.5513453483581543, "learning_rate": 0.0003082590125447815, "loss": 3.1213, "step": 30071 }, { "epoch": 1.47, "grad_norm": 0.577889084815979, "learning_rate": 0.0003082436221790167, "loss": 2.8904, "step": 30072 }, { "epoch": 1.47, "grad_norm": 0.5584579706192017, "learning_rate": 0.0003082282317915398, "loss": 2.9921, "step": 30073 }, { "epoch": 1.47, "grad_norm": 0.5914309620857239, "learning_rate": 0.00030821284138239137, "loss": 3.0308, "step": 30074 }, { "epoch": 1.47, "grad_norm": 0.5481913089752197, "learning_rate": 0.00030819745095161184, "loss": 3.0113, "step": 30075 }, { "epoch": 1.47, "grad_norm": 0.6211130619049072, "learning_rate": 0.00030818206049924184, "loss": 3.1022, "step": 30076 }, { "epoch": 1.47, "grad_norm": 0.5855414271354675, "learning_rate": 0.0003081666700253218, "loss": 2.867, "step": 30077 }, { "epoch": 1.47, "grad_norm": 0.626336395740509, "learning_rate": 0.00030815127952989245, "loss": 3.2556, "step": 30078 }, { "epoch": 1.47, "grad_norm": 0.5970550179481506, "learning_rate": 0.00030813588901299404, "loss": 3.2407, "step": 30079 }, { "epoch": 1.47, "grad_norm": 0.6191651821136475, "learning_rate": 0.00030812049847466736, "loss": 3.1078, "step": 30080 }, { "epoch": 1.47, "grad_norm": 0.6230210661888123, "learning_rate": 0.00030810510791495294, "loss": 3.3527, "step": 30081 }, { "epoch": 1.47, "grad_norm": 0.6455768942832947, "learning_rate": 0.00030808971733389107, "loss": 2.8692, "step": 30082 }, { "epoch": 1.47, "grad_norm": 0.6259360909461975, "learning_rate": 0.00030807432673152257, "loss": 3.0794, "step": 30083 }, { "epoch": 1.47, "grad_norm": 0.5726274251937866, "learning_rate": 0.00030805893610788776, "loss": 3.0452, "step": 30084 }, { "epoch": 1.47, "grad_norm": 0.5579006671905518, "learning_rate": 0.00030804354546302726, "loss": 2.9137, "step": 30085 }, { "epoch": 1.47, "grad_norm": 0.582209050655365, "learning_rate": 0.0003080281547969816, "loss": 2.8087, "step": 30086 }, { "epoch": 1.47, "grad_norm": 0.5763527750968933, "learning_rate": 0.00030801276410979137, "loss": 3.0132, "step": 30087 }, { "epoch": 1.47, "grad_norm": 0.606497585773468, "learning_rate": 0.00030799737340149707, "loss": 2.8572, "step": 30088 }, { "epoch": 1.47, "grad_norm": 0.6698089241981506, "learning_rate": 0.0003079819826721392, "loss": 2.9752, "step": 30089 }, { "epoch": 1.47, "grad_norm": 0.6636598110198975, "learning_rate": 0.00030796659192175826, "loss": 3.3192, "step": 30090 }, { "epoch": 1.47, "grad_norm": 0.5712068676948547, "learning_rate": 0.0003079512011503949, "loss": 2.9899, "step": 30091 }, { "epoch": 1.47, "grad_norm": 0.5981823205947876, "learning_rate": 0.00030793581035808967, "loss": 3.0397, "step": 30092 }, { "epoch": 1.47, "grad_norm": 0.5917591452598572, "learning_rate": 0.0003079204195448831, "loss": 3.2434, "step": 30093 }, { "epoch": 1.47, "grad_norm": 0.616977870464325, "learning_rate": 0.00030790502871081555, "loss": 3.1531, "step": 30094 }, { "epoch": 1.47, "grad_norm": 0.5743931531906128, "learning_rate": 0.0003078896378559276, "loss": 2.8923, "step": 30095 }, { "epoch": 1.47, "grad_norm": 0.6095641851425171, "learning_rate": 0.00030787424698026, "loss": 3.196, "step": 30096 }, { "epoch": 1.47, "grad_norm": 0.5719312429428101, "learning_rate": 0.0003078588560838531, "loss": 2.9232, "step": 30097 }, { "epoch": 1.48, "grad_norm": 0.5918893218040466, "learning_rate": 0.00030784346516674743, "loss": 3.1492, "step": 30098 }, { "epoch": 1.48, "grad_norm": 0.6037421822547913, "learning_rate": 0.00030782807422898364, "loss": 2.899, "step": 30099 }, { "epoch": 1.48, "grad_norm": 0.5701082944869995, "learning_rate": 0.00030781268327060223, "loss": 2.8979, "step": 30100 }, { "epoch": 1.48, "grad_norm": 0.5683816075325012, "learning_rate": 0.0003077972922916437, "loss": 2.8652, "step": 30101 }, { "epoch": 1.48, "grad_norm": 0.5692213177680969, "learning_rate": 0.0003077819012921486, "loss": 3.1262, "step": 30102 }, { "epoch": 1.48, "grad_norm": 0.5966072082519531, "learning_rate": 0.00030776651027215754, "loss": 3.1198, "step": 30103 }, { "epoch": 1.48, "grad_norm": 0.59067302942276, "learning_rate": 0.0003077511192317109, "loss": 3.0482, "step": 30104 }, { "epoch": 1.48, "grad_norm": 0.6178147196769714, "learning_rate": 0.00030773572817084925, "loss": 3.0714, "step": 30105 }, { "epoch": 1.48, "grad_norm": 0.5838093161582947, "learning_rate": 0.0003077203370896132, "loss": 3.0316, "step": 30106 }, { "epoch": 1.48, "grad_norm": 0.5537781715393066, "learning_rate": 0.00030770494598804344, "loss": 3.1259, "step": 30107 }, { "epoch": 1.48, "grad_norm": 0.5612012147903442, "learning_rate": 0.00030768955486618017, "loss": 3.0059, "step": 30108 }, { "epoch": 1.48, "grad_norm": 0.5880160927772522, "learning_rate": 0.0003076741637240641, "loss": 2.9501, "step": 30109 }, { "epoch": 1.48, "grad_norm": 0.5850368142127991, "learning_rate": 0.0003076587725617358, "loss": 2.9588, "step": 30110 }, { "epoch": 1.48, "grad_norm": 0.5933541655540466, "learning_rate": 0.00030764338137923583, "loss": 3.0509, "step": 30111 }, { "epoch": 1.48, "grad_norm": 0.6294256448745728, "learning_rate": 0.00030762799017660454, "loss": 3.0801, "step": 30112 }, { "epoch": 1.48, "grad_norm": 0.5729942917823792, "learning_rate": 0.00030761259895388266, "loss": 3.0384, "step": 30113 }, { "epoch": 1.48, "grad_norm": 0.575340986251831, "learning_rate": 0.0003075972077111107, "loss": 3.067, "step": 30114 }, { "epoch": 1.48, "grad_norm": 0.5830142498016357, "learning_rate": 0.000307581816448329, "loss": 3.1674, "step": 30115 }, { "epoch": 1.48, "grad_norm": 0.5919670462608337, "learning_rate": 0.00030756642516557836, "loss": 3.2883, "step": 30116 }, { "epoch": 1.48, "grad_norm": 0.5591514110565186, "learning_rate": 0.00030755103386289923, "loss": 2.8813, "step": 30117 }, { "epoch": 1.48, "grad_norm": 0.6111984252929688, "learning_rate": 0.0003075356425403322, "loss": 3.2448, "step": 30118 }, { "epoch": 1.48, "grad_norm": 0.5747086405754089, "learning_rate": 0.0003075202511979176, "loss": 3.2162, "step": 30119 }, { "epoch": 1.48, "grad_norm": 0.6217588782310486, "learning_rate": 0.0003075048598356961, "loss": 3.0186, "step": 30120 }, { "epoch": 1.48, "grad_norm": 0.5869501829147339, "learning_rate": 0.00030748946845370837, "loss": 3.1229, "step": 30121 }, { "epoch": 1.48, "grad_norm": 0.6082846522331238, "learning_rate": 0.00030747407705199474, "loss": 3.0859, "step": 30122 }, { "epoch": 1.48, "grad_norm": 0.6041231751441956, "learning_rate": 0.00030745868563059585, "loss": 3.1107, "step": 30123 }, { "epoch": 1.48, "grad_norm": 0.5768772959709167, "learning_rate": 0.0003074432941895522, "loss": 3.1982, "step": 30124 }, { "epoch": 1.48, "grad_norm": 0.5537108182907104, "learning_rate": 0.00030742790272890436, "loss": 2.9797, "step": 30125 }, { "epoch": 1.48, "grad_norm": 0.582661509513855, "learning_rate": 0.0003074125112486928, "loss": 3.0316, "step": 30126 }, { "epoch": 1.48, "grad_norm": 0.5950396656990051, "learning_rate": 0.00030739711974895814, "loss": 2.7202, "step": 30127 }, { "epoch": 1.48, "grad_norm": 0.5830130577087402, "learning_rate": 0.00030738172822974096, "loss": 2.9846, "step": 30128 }, { "epoch": 1.48, "grad_norm": 0.5635205507278442, "learning_rate": 0.0003073663366910817, "loss": 2.9789, "step": 30129 }, { "epoch": 1.48, "grad_norm": 0.577469527721405, "learning_rate": 0.00030735094513302085, "loss": 2.9118, "step": 30130 }, { "epoch": 1.48, "grad_norm": 0.603352963924408, "learning_rate": 0.00030733555355559903, "loss": 2.9981, "step": 30131 }, { "epoch": 1.48, "grad_norm": 0.5949218273162842, "learning_rate": 0.0003073201619588569, "loss": 2.8709, "step": 30132 }, { "epoch": 1.48, "grad_norm": 0.5829793214797974, "learning_rate": 0.00030730477034283477, "loss": 2.947, "step": 30133 }, { "epoch": 1.48, "grad_norm": 0.5699021220207214, "learning_rate": 0.0003072893787075733, "loss": 3.1005, "step": 30134 }, { "epoch": 1.48, "grad_norm": 0.5696962475776672, "learning_rate": 0.000307273987053113, "loss": 3.1134, "step": 30135 }, { "epoch": 1.48, "grad_norm": 0.5930047631263733, "learning_rate": 0.0003072585953794945, "loss": 2.9946, "step": 30136 }, { "epoch": 1.48, "grad_norm": 0.6077128052711487, "learning_rate": 0.0003072432036867581, "loss": 3.1896, "step": 30137 }, { "epoch": 1.48, "grad_norm": 0.6007574796676636, "learning_rate": 0.0003072278119749446, "loss": 3.0898, "step": 30138 }, { "epoch": 1.48, "grad_norm": 0.613781750202179, "learning_rate": 0.00030721242024409447, "loss": 2.9471, "step": 30139 }, { "epoch": 1.48, "grad_norm": 0.5998162031173706, "learning_rate": 0.0003071970284942482, "loss": 3.1317, "step": 30140 }, { "epoch": 1.48, "grad_norm": 0.5879024863243103, "learning_rate": 0.0003071816367254462, "loss": 3.2085, "step": 30141 }, { "epoch": 1.48, "grad_norm": 0.5877510905265808, "learning_rate": 0.00030716624493772927, "loss": 3.1028, "step": 30142 }, { "epoch": 1.48, "grad_norm": 0.6133305430412292, "learning_rate": 0.00030715085313113785, "loss": 2.9439, "step": 30143 }, { "epoch": 1.48, "grad_norm": 0.5921503901481628, "learning_rate": 0.0003071354613057124, "loss": 2.9244, "step": 30144 }, { "epoch": 1.48, "grad_norm": 0.5787282586097717, "learning_rate": 0.0003071200694614936, "loss": 2.927, "step": 30145 }, { "epoch": 1.48, "grad_norm": 0.5823509097099304, "learning_rate": 0.00030710467759852177, "loss": 3.1512, "step": 30146 }, { "epoch": 1.48, "grad_norm": 0.6131330132484436, "learning_rate": 0.0003070892857168377, "loss": 3.0768, "step": 30147 }, { "epoch": 1.48, "grad_norm": 0.6605939865112305, "learning_rate": 0.00030707389381648176, "loss": 3.1089, "step": 30148 }, { "epoch": 1.48, "grad_norm": 0.5737425088882446, "learning_rate": 0.0003070585018974946, "loss": 3.2414, "step": 30149 }, { "epoch": 1.48, "grad_norm": 0.5738173127174377, "learning_rate": 0.00030704310995991667, "loss": 3.024, "step": 30150 }, { "epoch": 1.48, "grad_norm": 0.6123209595680237, "learning_rate": 0.00030702771800378856, "loss": 3.0543, "step": 30151 }, { "epoch": 1.48, "grad_norm": 0.5468517541885376, "learning_rate": 0.00030701232602915073, "loss": 3.029, "step": 30152 }, { "epoch": 1.48, "grad_norm": 0.5879467725753784, "learning_rate": 0.0003069969340360438, "loss": 3.1501, "step": 30153 }, { "epoch": 1.48, "grad_norm": 0.609234631061554, "learning_rate": 0.0003069815420245084, "loss": 3.1284, "step": 30154 }, { "epoch": 1.48, "grad_norm": 0.5802608132362366, "learning_rate": 0.00030696614999458494, "loss": 3.3857, "step": 30155 }, { "epoch": 1.48, "grad_norm": 0.5865260362625122, "learning_rate": 0.00030695075794631385, "loss": 2.9436, "step": 30156 }, { "epoch": 1.48, "grad_norm": 0.5951496362686157, "learning_rate": 0.00030693536587973584, "loss": 3.2445, "step": 30157 }, { "epoch": 1.48, "grad_norm": 0.59162837266922, "learning_rate": 0.0003069199737948915, "loss": 3.0387, "step": 30158 }, { "epoch": 1.48, "grad_norm": 0.574717104434967, "learning_rate": 0.0003069045816918212, "loss": 2.8136, "step": 30159 }, { "epoch": 1.48, "grad_norm": 0.6385117769241333, "learning_rate": 0.00030688918957056565, "loss": 2.971, "step": 30160 }, { "epoch": 1.48, "grad_norm": 0.6385549902915955, "learning_rate": 0.0003068737974311652, "loss": 3.1969, "step": 30161 }, { "epoch": 1.48, "grad_norm": 0.5969220399856567, "learning_rate": 0.0003068584052736606, "loss": 3.2271, "step": 30162 }, { "epoch": 1.48, "grad_norm": 0.5814893841743469, "learning_rate": 0.0003068430130980921, "loss": 2.9764, "step": 30163 }, { "epoch": 1.48, "grad_norm": 0.5654752850532532, "learning_rate": 0.0003068276209045005, "loss": 3.1884, "step": 30164 }, { "epoch": 1.48, "grad_norm": 0.5874380469322205, "learning_rate": 0.0003068122286929264, "loss": 3.2886, "step": 30165 }, { "epoch": 1.48, "grad_norm": 0.5981268286705017, "learning_rate": 0.0003067968364634101, "loss": 2.7295, "step": 30166 }, { "epoch": 1.48, "grad_norm": 0.6147415041923523, "learning_rate": 0.00030678144421599217, "loss": 3.2141, "step": 30167 }, { "epoch": 1.48, "grad_norm": 0.5960661768913269, "learning_rate": 0.00030676605195071325, "loss": 3.0085, "step": 30168 }, { "epoch": 1.48, "grad_norm": 0.5469895601272583, "learning_rate": 0.00030675065966761394, "loss": 3.0473, "step": 30169 }, { "epoch": 1.48, "grad_norm": 0.6334496736526489, "learning_rate": 0.0003067352673667347, "loss": 2.9559, "step": 30170 }, { "epoch": 1.48, "grad_norm": 0.6375976800918579, "learning_rate": 0.000306719875048116, "loss": 3.0748, "step": 30171 }, { "epoch": 1.48, "grad_norm": 0.5987905859947205, "learning_rate": 0.0003067044827117984, "loss": 2.7641, "step": 30172 }, { "epoch": 1.48, "grad_norm": 0.6431529521942139, "learning_rate": 0.00030668909035782253, "loss": 2.9902, "step": 30173 }, { "epoch": 1.48, "grad_norm": 0.5719122290611267, "learning_rate": 0.00030667369798622886, "loss": 3.1214, "step": 30174 }, { "epoch": 1.48, "grad_norm": 0.5882480144500732, "learning_rate": 0.00030665830559705804, "loss": 3.3428, "step": 30175 }, { "epoch": 1.48, "grad_norm": 0.5951686501502991, "learning_rate": 0.00030664291319035047, "loss": 3.058, "step": 30176 }, { "epoch": 1.48, "grad_norm": 0.594463050365448, "learning_rate": 0.00030662752076614664, "loss": 2.9867, "step": 30177 }, { "epoch": 1.48, "grad_norm": 0.6419702768325806, "learning_rate": 0.0003066121283244873, "loss": 3.0913, "step": 30178 }, { "epoch": 1.48, "grad_norm": 0.6004655361175537, "learning_rate": 0.0003065967358654129, "loss": 3.0112, "step": 30179 }, { "epoch": 1.48, "grad_norm": 0.6011263728141785, "learning_rate": 0.00030658134338896396, "loss": 2.9962, "step": 30180 }, { "epoch": 1.48, "grad_norm": 0.5436926484107971, "learning_rate": 0.000306565950895181, "loss": 3.1637, "step": 30181 }, { "epoch": 1.48, "grad_norm": 0.6261427998542786, "learning_rate": 0.0003065505583841045, "loss": 2.9735, "step": 30182 }, { "epoch": 1.48, "grad_norm": 0.6169971227645874, "learning_rate": 0.00030653516585577527, "loss": 2.9467, "step": 30183 }, { "epoch": 1.48, "grad_norm": 0.6584175825119019, "learning_rate": 0.0003065197733102336, "loss": 3.1038, "step": 30184 }, { "epoch": 1.48, "grad_norm": 0.5873148441314697, "learning_rate": 0.00030650438074752013, "loss": 2.959, "step": 30185 }, { "epoch": 1.48, "grad_norm": 0.5871749520301819, "learning_rate": 0.0003064889881676753, "loss": 2.9619, "step": 30186 }, { "epoch": 1.48, "grad_norm": 0.6506713628768921, "learning_rate": 0.00030647359557073973, "loss": 3.1184, "step": 30187 }, { "epoch": 1.48, "grad_norm": 0.5763834118843079, "learning_rate": 0.000306458202956754, "loss": 3.2246, "step": 30188 }, { "epoch": 1.48, "grad_norm": 0.5880481600761414, "learning_rate": 0.00030644281032575857, "loss": 2.8984, "step": 30189 }, { "epoch": 1.48, "grad_norm": 0.5734893679618835, "learning_rate": 0.00030642741767779406, "loss": 3.1565, "step": 30190 }, { "epoch": 1.48, "grad_norm": 0.6054985523223877, "learning_rate": 0.000306412025012901, "loss": 2.9848, "step": 30191 }, { "epoch": 1.48, "grad_norm": 0.5837637186050415, "learning_rate": 0.00030639663233111977, "loss": 3.0006, "step": 30192 }, { "epoch": 1.48, "grad_norm": 0.5740242004394531, "learning_rate": 0.00030638123963249114, "loss": 2.8531, "step": 30193 }, { "epoch": 1.48, "grad_norm": 0.5803605318069458, "learning_rate": 0.0003063658469170556, "loss": 2.9359, "step": 30194 }, { "epoch": 1.48, "grad_norm": 0.6074859499931335, "learning_rate": 0.0003063504541848535, "loss": 3.085, "step": 30195 }, { "epoch": 1.48, "grad_norm": 0.5865538120269775, "learning_rate": 0.00030633506143592566, "loss": 3.0822, "step": 30196 }, { "epoch": 1.48, "grad_norm": 0.589298665523529, "learning_rate": 0.0003063196686703123, "loss": 3.2545, "step": 30197 }, { "epoch": 1.48, "grad_norm": 0.5684748291969299, "learning_rate": 0.0003063042758880544, "loss": 2.933, "step": 30198 }, { "epoch": 1.48, "grad_norm": 0.5615243911743164, "learning_rate": 0.0003062888830891921, "loss": 3.0079, "step": 30199 }, { "epoch": 1.48, "grad_norm": 0.6056434512138367, "learning_rate": 0.0003062734902737661, "loss": 2.8702, "step": 30200 }, { "epoch": 1.48, "grad_norm": 0.61896151304245, "learning_rate": 0.00030625809744181694, "loss": 3.1829, "step": 30201 }, { "epoch": 1.48, "grad_norm": 0.5565640330314636, "learning_rate": 0.0003062427045933853, "loss": 3.0956, "step": 30202 }, { "epoch": 1.48, "grad_norm": 0.634736180305481, "learning_rate": 0.00030622731172851134, "loss": 3.0891, "step": 30203 }, { "epoch": 1.48, "grad_norm": 0.5769646763801575, "learning_rate": 0.0003062119188472359, "loss": 3.0662, "step": 30204 }, { "epoch": 1.48, "grad_norm": 0.5608484148979187, "learning_rate": 0.0003061965259495996, "loss": 3.0664, "step": 30205 }, { "epoch": 1.48, "grad_norm": 0.5996660590171814, "learning_rate": 0.0003061811330356427, "loss": 3.0626, "step": 30206 }, { "epoch": 1.48, "grad_norm": 0.606673538684845, "learning_rate": 0.00030616574010540595, "loss": 3.0254, "step": 30207 }, { "epoch": 1.48, "grad_norm": 0.6172423958778381, "learning_rate": 0.0003061503471589298, "loss": 2.9156, "step": 30208 }, { "epoch": 1.48, "grad_norm": 0.5741183757781982, "learning_rate": 0.00030613495419625484, "loss": 3.0138, "step": 30209 }, { "epoch": 1.48, "grad_norm": 0.5955791473388672, "learning_rate": 0.00030611956121742164, "loss": 3.0918, "step": 30210 }, { "epoch": 1.48, "grad_norm": 0.5800381898880005, "learning_rate": 0.0003061041682224706, "loss": 3.1426, "step": 30211 }, { "epoch": 1.48, "grad_norm": 0.6998795866966248, "learning_rate": 0.0003060887752114424, "loss": 3.0431, "step": 30212 }, { "epoch": 1.48, "grad_norm": 0.5997923016548157, "learning_rate": 0.00030607338218437754, "loss": 2.8662, "step": 30213 }, { "epoch": 1.48, "grad_norm": 0.5872188210487366, "learning_rate": 0.00030605798914131653, "loss": 2.9839, "step": 30214 }, { "epoch": 1.48, "grad_norm": 0.6395425200462341, "learning_rate": 0.00030604259608229997, "loss": 2.9624, "step": 30215 }, { "epoch": 1.48, "grad_norm": 0.5830799341201782, "learning_rate": 0.0003060272030073684, "loss": 3.0724, "step": 30216 }, { "epoch": 1.48, "grad_norm": 0.5782269835472107, "learning_rate": 0.00030601180991656233, "loss": 3.0847, "step": 30217 }, { "epoch": 1.48, "grad_norm": 0.5734887719154358, "learning_rate": 0.0003059964168099222, "loss": 3.0501, "step": 30218 }, { "epoch": 1.48, "grad_norm": 0.6264581680297852, "learning_rate": 0.00030598102368748875, "loss": 3.0401, "step": 30219 }, { "epoch": 1.48, "grad_norm": 0.6159687042236328, "learning_rate": 0.00030596563054930244, "loss": 3.3187, "step": 30220 }, { "epoch": 1.48, "grad_norm": 0.601514458656311, "learning_rate": 0.00030595023739540383, "loss": 2.9992, "step": 30221 }, { "epoch": 1.48, "grad_norm": 0.6012780070304871, "learning_rate": 0.0003059348442258334, "loss": 2.8195, "step": 30222 }, { "epoch": 1.48, "grad_norm": 0.6135539412498474, "learning_rate": 0.00030591945104063175, "loss": 3.0922, "step": 30223 }, { "epoch": 1.48, "grad_norm": 0.6214767098426819, "learning_rate": 0.0003059040578398394, "loss": 3.2023, "step": 30224 }, { "epoch": 1.48, "grad_norm": 0.5887531042098999, "learning_rate": 0.0003058886646234969, "loss": 2.8366, "step": 30225 }, { "epoch": 1.48, "grad_norm": 0.5788924694061279, "learning_rate": 0.00030587327139164474, "loss": 3.0441, "step": 30226 }, { "epoch": 1.48, "grad_norm": 0.6139992475509644, "learning_rate": 0.0003058578781443236, "loss": 3.1177, "step": 30227 }, { "epoch": 1.48, "grad_norm": 0.5603842735290527, "learning_rate": 0.00030584248488157393, "loss": 2.965, "step": 30228 }, { "epoch": 1.48, "grad_norm": 0.5821728110313416, "learning_rate": 0.00030582709160343615, "loss": 2.9993, "step": 30229 }, { "epoch": 1.48, "grad_norm": 0.5948326587677002, "learning_rate": 0.00030581169830995106, "loss": 2.8071, "step": 30230 }, { "epoch": 1.48, "grad_norm": 0.6164664030075073, "learning_rate": 0.0003057963050011591, "loss": 3.2478, "step": 30231 }, { "epoch": 1.48, "grad_norm": 0.6196146607398987, "learning_rate": 0.00030578091167710075, "loss": 2.7503, "step": 30232 }, { "epoch": 1.48, "grad_norm": 0.6732343435287476, "learning_rate": 0.00030576551833781646, "loss": 3.0339, "step": 30233 }, { "epoch": 1.48, "grad_norm": 0.5696006417274475, "learning_rate": 0.00030575012498334705, "loss": 2.8955, "step": 30234 }, { "epoch": 1.48, "grad_norm": 0.5879353284835815, "learning_rate": 0.0003057347316137329, "loss": 3.0795, "step": 30235 }, { "epoch": 1.48, "grad_norm": 0.5767134428024292, "learning_rate": 0.0003057193382290146, "loss": 2.9579, "step": 30236 }, { "epoch": 1.48, "grad_norm": 0.5511177182197571, "learning_rate": 0.00030570394482923254, "loss": 3.0081, "step": 30237 }, { "epoch": 1.48, "grad_norm": 0.5799663662910461, "learning_rate": 0.0003056885514144276, "loss": 3.1636, "step": 30238 }, { "epoch": 1.48, "grad_norm": 0.582383394241333, "learning_rate": 0.00030567315798463987, "loss": 2.8857, "step": 30239 }, { "epoch": 1.48, "grad_norm": 0.554225742816925, "learning_rate": 0.00030565776453991025, "loss": 2.9747, "step": 30240 }, { "epoch": 1.48, "grad_norm": 0.6220203638076782, "learning_rate": 0.00030564237108027915, "loss": 2.9467, "step": 30241 }, { "epoch": 1.48, "grad_norm": 0.5915405750274658, "learning_rate": 0.0003056269776057872, "loss": 2.7895, "step": 30242 }, { "epoch": 1.48, "grad_norm": 0.5988735556602478, "learning_rate": 0.0003056115841164748, "loss": 2.9361, "step": 30243 }, { "epoch": 1.48, "grad_norm": 0.6032905578613281, "learning_rate": 0.0003055961906123825, "loss": 3.1007, "step": 30244 }, { "epoch": 1.48, "grad_norm": 0.6156014204025269, "learning_rate": 0.00030558079709355106, "loss": 2.9049, "step": 30245 }, { "epoch": 1.48, "grad_norm": 0.6102397441864014, "learning_rate": 0.0003055654035600208, "loss": 2.9683, "step": 30246 }, { "epoch": 1.48, "grad_norm": 0.6204525232315063, "learning_rate": 0.0003055500100118323, "loss": 3.0618, "step": 30247 }, { "epoch": 1.48, "grad_norm": 0.6160491108894348, "learning_rate": 0.0003055346164490262, "loss": 3.1209, "step": 30248 }, { "epoch": 1.48, "grad_norm": 0.5589972734451294, "learning_rate": 0.000305519222871643, "loss": 3.1358, "step": 30249 }, { "epoch": 1.48, "grad_norm": 0.5976723432540894, "learning_rate": 0.00030550382927972317, "loss": 2.9208, "step": 30250 }, { "epoch": 1.48, "grad_norm": 0.5512217283248901, "learning_rate": 0.0003054884356733073, "loss": 3.0062, "step": 30251 }, { "epoch": 1.48, "grad_norm": 0.5562359690666199, "learning_rate": 0.000305473042052436, "loss": 3.0616, "step": 30252 }, { "epoch": 1.48, "grad_norm": 0.6103971600532532, "learning_rate": 0.0003054576484171498, "loss": 2.8764, "step": 30253 }, { "epoch": 1.48, "grad_norm": 0.5834851861000061, "learning_rate": 0.00030544225476748907, "loss": 3.1122, "step": 30254 }, { "epoch": 1.48, "grad_norm": 0.5874277949333191, "learning_rate": 0.0003054268611034946, "loss": 3.0748, "step": 30255 }, { "epoch": 1.48, "grad_norm": 0.5750007629394531, "learning_rate": 0.00030541146742520684, "loss": 3.0289, "step": 30256 }, { "epoch": 1.48, "grad_norm": 0.5605846047401428, "learning_rate": 0.0003053960737326663, "loss": 2.9117, "step": 30257 }, { "epoch": 1.48, "grad_norm": 0.577288031578064, "learning_rate": 0.0003053806800259135, "loss": 2.9049, "step": 30258 }, { "epoch": 1.48, "grad_norm": 0.5720159411430359, "learning_rate": 0.00030536528630498893, "loss": 3.2772, "step": 30259 }, { "epoch": 1.48, "grad_norm": 0.5890551805496216, "learning_rate": 0.0003053498925699334, "loss": 2.8408, "step": 30260 }, { "epoch": 1.48, "grad_norm": 0.6078507900238037, "learning_rate": 0.00030533449882078723, "loss": 3.1933, "step": 30261 }, { "epoch": 1.48, "grad_norm": 0.5361073017120361, "learning_rate": 0.000305319105057591, "loss": 3.0317, "step": 30262 }, { "epoch": 1.48, "grad_norm": 0.6214322447776794, "learning_rate": 0.00030530371128038527, "loss": 2.9402, "step": 30263 }, { "epoch": 1.48, "grad_norm": 0.5904223322868347, "learning_rate": 0.00030528831748921056, "loss": 3.1021, "step": 30264 }, { "epoch": 1.48, "grad_norm": 0.6872094869613647, "learning_rate": 0.0003052729236841075, "loss": 3.0022, "step": 30265 }, { "epoch": 1.48, "grad_norm": 0.6223731637001038, "learning_rate": 0.0003052575298651165, "loss": 2.7194, "step": 30266 }, { "epoch": 1.48, "grad_norm": 0.592032253742218, "learning_rate": 0.00030524213603227827, "loss": 3.336, "step": 30267 }, { "epoch": 1.48, "grad_norm": 0.5875898599624634, "learning_rate": 0.0003052267421856332, "loss": 3.031, "step": 30268 }, { "epoch": 1.48, "grad_norm": 0.6270338892936707, "learning_rate": 0.00030521134832522186, "loss": 2.9393, "step": 30269 }, { "epoch": 1.48, "grad_norm": 0.5827121734619141, "learning_rate": 0.00030519595445108487, "loss": 3.0373, "step": 30270 }, { "epoch": 1.48, "grad_norm": 0.5908574461936951, "learning_rate": 0.0003051805605632628, "loss": 2.9094, "step": 30271 }, { "epoch": 1.48, "grad_norm": 0.6578699946403503, "learning_rate": 0.00030516516666179605, "loss": 3.124, "step": 30272 }, { "epoch": 1.48, "grad_norm": 0.6305190324783325, "learning_rate": 0.0003051497727467253, "loss": 3.0748, "step": 30273 }, { "epoch": 1.48, "grad_norm": 0.5935174226760864, "learning_rate": 0.000305134378818091, "loss": 2.9313, "step": 30274 }, { "epoch": 1.48, "grad_norm": 0.5942251682281494, "learning_rate": 0.00030511898487593374, "loss": 2.8511, "step": 30275 }, { "epoch": 1.48, "grad_norm": 0.5874195694923401, "learning_rate": 0.00030510359092029407, "loss": 3.1529, "step": 30276 }, { "epoch": 1.48, "grad_norm": 0.5620760917663574, "learning_rate": 0.00030508819695121246, "loss": 3.1332, "step": 30277 }, { "epoch": 1.48, "grad_norm": 0.573728084564209, "learning_rate": 0.0003050728029687297, "loss": 3.0645, "step": 30278 }, { "epoch": 1.48, "grad_norm": 0.5932513475418091, "learning_rate": 0.000305057408972886, "loss": 3.1181, "step": 30279 }, { "epoch": 1.48, "grad_norm": 0.6048024296760559, "learning_rate": 0.00030504201496372205, "loss": 3.0854, "step": 30280 }, { "epoch": 1.48, "grad_norm": 0.5609991550445557, "learning_rate": 0.0003050266209412784, "loss": 3.0611, "step": 30281 }, { "epoch": 1.48, "grad_norm": 0.598209023475647, "learning_rate": 0.00030501122690559576, "loss": 3.0283, "step": 30282 }, { "epoch": 1.48, "grad_norm": 0.5940065979957581, "learning_rate": 0.00030499583285671433, "loss": 3.0705, "step": 30283 }, { "epoch": 1.48, "grad_norm": 0.6010187268257141, "learning_rate": 0.0003049804387946749, "loss": 3.1678, "step": 30284 }, { "epoch": 1.48, "grad_norm": 0.6073846220970154, "learning_rate": 0.00030496504471951794, "loss": 3.0441, "step": 30285 }, { "epoch": 1.48, "grad_norm": 0.6591005325317383, "learning_rate": 0.000304949650631284, "loss": 2.9177, "step": 30286 }, { "epoch": 1.48, "grad_norm": 0.6189901828765869, "learning_rate": 0.0003049342565300137, "loss": 3.0777, "step": 30287 }, { "epoch": 1.48, "grad_norm": 0.6542283892631531, "learning_rate": 0.00030491886241574743, "loss": 2.9349, "step": 30288 }, { "epoch": 1.48, "grad_norm": 0.5866391062736511, "learning_rate": 0.00030490346828852594, "loss": 3.0771, "step": 30289 }, { "epoch": 1.48, "grad_norm": 0.6572050452232361, "learning_rate": 0.00030488807414838953, "loss": 3.0787, "step": 30290 }, { "epoch": 1.48, "grad_norm": 0.6037701368331909, "learning_rate": 0.00030487267999537894, "loss": 3.1791, "step": 30291 }, { "epoch": 1.48, "grad_norm": 0.6054988503456116, "learning_rate": 0.0003048572858295346, "loss": 2.9688, "step": 30292 }, { "epoch": 1.48, "grad_norm": 0.6137593984603882, "learning_rate": 0.00030484189165089714, "loss": 3.1096, "step": 30293 }, { "epoch": 1.48, "grad_norm": 0.5879693031311035, "learning_rate": 0.00030482649745950714, "loss": 3.1275, "step": 30294 }, { "epoch": 1.48, "grad_norm": 0.588402271270752, "learning_rate": 0.00030481110325540487, "loss": 3.0677, "step": 30295 }, { "epoch": 1.48, "grad_norm": 0.5765928030014038, "learning_rate": 0.00030479570903863126, "loss": 3.2167, "step": 30296 }, { "epoch": 1.48, "grad_norm": 0.6110025644302368, "learning_rate": 0.0003047803148092267, "loss": 3.1337, "step": 30297 }, { "epoch": 1.48, "grad_norm": 0.6048384308815002, "learning_rate": 0.00030476492056723156, "loss": 2.8711, "step": 30298 }, { "epoch": 1.48, "grad_norm": 0.6859794855117798, "learning_rate": 0.0003047495263126867, "loss": 3.0686, "step": 30299 }, { "epoch": 1.48, "grad_norm": 0.6263118982315063, "learning_rate": 0.0003047341320456324, "loss": 3.052, "step": 30300 }, { "epoch": 1.48, "grad_norm": 0.5739185214042664, "learning_rate": 0.00030471873776610926, "loss": 3.1724, "step": 30301 }, { "epoch": 1.49, "grad_norm": 0.6050714254379272, "learning_rate": 0.00030470334347415794, "loss": 2.7828, "step": 30302 }, { "epoch": 1.49, "grad_norm": 0.5985298156738281, "learning_rate": 0.0003046879491698189, "loss": 3.2557, "step": 30303 }, { "epoch": 1.49, "grad_norm": 0.6600600481033325, "learning_rate": 0.00030467255485313276, "loss": 3.1668, "step": 30304 }, { "epoch": 1.49, "grad_norm": 0.6146441698074341, "learning_rate": 0.00030465716052413994, "loss": 2.9902, "step": 30305 }, { "epoch": 1.49, "grad_norm": 0.5583391785621643, "learning_rate": 0.00030464176618288106, "loss": 3.1554, "step": 30306 }, { "epoch": 1.49, "grad_norm": 0.5961558222770691, "learning_rate": 0.00030462637182939676, "loss": 3.1391, "step": 30307 }, { "epoch": 1.49, "grad_norm": 0.5696814656257629, "learning_rate": 0.00030461097746372744, "loss": 2.9831, "step": 30308 }, { "epoch": 1.49, "grad_norm": 0.5873986482620239, "learning_rate": 0.00030459558308591365, "loss": 3.1334, "step": 30309 }, { "epoch": 1.49, "grad_norm": 0.5988091230392456, "learning_rate": 0.0003045801886959959, "loss": 2.9959, "step": 30310 }, { "epoch": 1.49, "grad_norm": 0.5809959173202515, "learning_rate": 0.000304564794294015, "loss": 3.0092, "step": 30311 }, { "epoch": 1.49, "grad_norm": 0.647441565990448, "learning_rate": 0.00030454939988001114, "loss": 3.0226, "step": 30312 }, { "epoch": 1.49, "grad_norm": 0.5705923438072205, "learning_rate": 0.0003045340054540251, "loss": 3.0666, "step": 30313 }, { "epoch": 1.49, "grad_norm": 0.5672298669815063, "learning_rate": 0.00030451861101609746, "loss": 2.9993, "step": 30314 }, { "epoch": 1.49, "grad_norm": 0.6036275029182434, "learning_rate": 0.0003045032165662686, "loss": 3.2952, "step": 30315 }, { "epoch": 1.49, "grad_norm": 0.5786696672439575, "learning_rate": 0.00030448782210457906, "loss": 3.0459, "step": 30316 }, { "epoch": 1.49, "grad_norm": 0.5882992148399353, "learning_rate": 0.0003044724276310695, "loss": 3.1442, "step": 30317 }, { "epoch": 1.49, "grad_norm": 0.5856705904006958, "learning_rate": 0.0003044570331457805, "loss": 3.0808, "step": 30318 }, { "epoch": 1.49, "grad_norm": 0.5906365513801575, "learning_rate": 0.0003044416386487525, "loss": 3.0573, "step": 30319 }, { "epoch": 1.49, "grad_norm": 0.587424635887146, "learning_rate": 0.00030442624414002607, "loss": 2.9874, "step": 30320 }, { "epoch": 1.49, "grad_norm": 0.5522072315216064, "learning_rate": 0.00030441084961964164, "loss": 3.0988, "step": 30321 }, { "epoch": 1.49, "grad_norm": 0.617243230342865, "learning_rate": 0.0003043954550876401, "loss": 2.9937, "step": 30322 }, { "epoch": 1.49, "grad_norm": 0.5649192929267883, "learning_rate": 0.00030438006054406166, "loss": 3.0672, "step": 30323 }, { "epoch": 1.49, "grad_norm": 0.5680512189865112, "learning_rate": 0.000304364665988947, "loss": 2.9709, "step": 30324 }, { "epoch": 1.49, "grad_norm": 0.560331404209137, "learning_rate": 0.00030434927142233663, "loss": 2.9678, "step": 30325 }, { "epoch": 1.49, "grad_norm": 0.652197539806366, "learning_rate": 0.00030433387684427113, "loss": 2.8437, "step": 30326 }, { "epoch": 1.49, "grad_norm": 0.5792146921157837, "learning_rate": 0.000304318482254791, "loss": 3.1141, "step": 30327 }, { "epoch": 1.49, "grad_norm": 0.589543342590332, "learning_rate": 0.0003043030876539368, "loss": 3.0839, "step": 30328 }, { "epoch": 1.49, "grad_norm": 0.613621175289154, "learning_rate": 0.0003042876930417493, "loss": 3.0055, "step": 30329 }, { "epoch": 1.49, "grad_norm": 0.5933493971824646, "learning_rate": 0.00030427229841826863, "loss": 2.9089, "step": 30330 }, { "epoch": 1.49, "grad_norm": 0.5833031535148621, "learning_rate": 0.00030425690378353557, "loss": 3.0106, "step": 30331 }, { "epoch": 1.49, "grad_norm": 0.6349642872810364, "learning_rate": 0.00030424150913759067, "loss": 3.0396, "step": 30332 }, { "epoch": 1.49, "grad_norm": 0.6486304998397827, "learning_rate": 0.0003042261144804745, "loss": 3.0428, "step": 30333 }, { "epoch": 1.49, "grad_norm": 0.6241731643676758, "learning_rate": 0.0003042107198122275, "loss": 3.0911, "step": 30334 }, { "epoch": 1.49, "grad_norm": 0.6061176657676697, "learning_rate": 0.0003041953251328903, "loss": 2.8672, "step": 30335 }, { "epoch": 1.49, "grad_norm": 0.5616305470466614, "learning_rate": 0.00030417993044250344, "loss": 3.0808, "step": 30336 }, { "epoch": 1.49, "grad_norm": 0.5880496501922607, "learning_rate": 0.00030416453574110734, "loss": 3.1441, "step": 30337 }, { "epoch": 1.49, "grad_norm": 0.5873252749443054, "learning_rate": 0.0003041491410287428, "loss": 3.2033, "step": 30338 }, { "epoch": 1.49, "grad_norm": 0.5877861380577087, "learning_rate": 0.0003041337463054501, "loss": 3.1364, "step": 30339 }, { "epoch": 1.49, "grad_norm": 0.6224821209907532, "learning_rate": 0.00030411835157127007, "loss": 3.01, "step": 30340 }, { "epoch": 1.49, "grad_norm": 0.5856956839561462, "learning_rate": 0.0003041029568262429, "loss": 3.1922, "step": 30341 }, { "epoch": 1.49, "grad_norm": 0.6419745683670044, "learning_rate": 0.0003040875620704094, "loss": 3.0602, "step": 30342 }, { "epoch": 1.49, "grad_norm": 0.7789932489395142, "learning_rate": 0.0003040721673038101, "loss": 2.9055, "step": 30343 }, { "epoch": 1.49, "grad_norm": 0.5722334384918213, "learning_rate": 0.00030405677252648547, "loss": 3.3164, "step": 30344 }, { "epoch": 1.49, "grad_norm": 0.6217427849769592, "learning_rate": 0.0003040413777384761, "loss": 3.1159, "step": 30345 }, { "epoch": 1.49, "grad_norm": 0.6086733937263489, "learning_rate": 0.00030402598293982245, "loss": 2.9625, "step": 30346 }, { "epoch": 1.49, "grad_norm": 0.5802361965179443, "learning_rate": 0.00030401058813056526, "loss": 3.132, "step": 30347 }, { "epoch": 1.49, "grad_norm": 0.617330014705658, "learning_rate": 0.00030399519331074485, "loss": 2.8512, "step": 30348 }, { "epoch": 1.49, "grad_norm": 0.5500723123550415, "learning_rate": 0.00030397979848040187, "loss": 3.1054, "step": 30349 }, { "epoch": 1.49, "grad_norm": 0.5912415981292725, "learning_rate": 0.00030396440363957683, "loss": 2.6677, "step": 30350 }, { "epoch": 1.49, "grad_norm": 0.5975785255432129, "learning_rate": 0.0003039490087883104, "loss": 3.1374, "step": 30351 }, { "epoch": 1.49, "grad_norm": 0.6210824251174927, "learning_rate": 0.000303933613926643, "loss": 3.0865, "step": 30352 }, { "epoch": 1.49, "grad_norm": 0.5919585824012756, "learning_rate": 0.00030391821905461524, "loss": 2.9867, "step": 30353 }, { "epoch": 1.49, "grad_norm": 0.6664869785308838, "learning_rate": 0.00030390282417226764, "loss": 3.0154, "step": 30354 }, { "epoch": 1.49, "grad_norm": 0.6210564970970154, "learning_rate": 0.00030388742927964083, "loss": 3.0155, "step": 30355 }, { "epoch": 1.49, "grad_norm": 0.6251997947692871, "learning_rate": 0.0003038720343767752, "loss": 3.2516, "step": 30356 }, { "epoch": 1.49, "grad_norm": 0.6180994510650635, "learning_rate": 0.0003038566394637113, "loss": 2.9035, "step": 30357 }, { "epoch": 1.49, "grad_norm": 0.590447187423706, "learning_rate": 0.00030384124454048986, "loss": 3.1161, "step": 30358 }, { "epoch": 1.49, "grad_norm": 0.5581066608428955, "learning_rate": 0.00030382584960715137, "loss": 2.945, "step": 30359 }, { "epoch": 1.49, "grad_norm": 0.6092578768730164, "learning_rate": 0.0003038104546637362, "loss": 3.1729, "step": 30360 }, { "epoch": 1.49, "grad_norm": 0.5907177329063416, "learning_rate": 0.00030379505971028504, "loss": 2.8266, "step": 30361 }, { "epoch": 1.49, "grad_norm": 0.5585633516311646, "learning_rate": 0.0003037796647468385, "loss": 3.0163, "step": 30362 }, { "epoch": 1.49, "grad_norm": 0.5564131736755371, "learning_rate": 0.000303764269773437, "loss": 3.1012, "step": 30363 }, { "epoch": 1.49, "grad_norm": 0.578472375869751, "learning_rate": 0.00030374887479012115, "loss": 2.98, "step": 30364 }, { "epoch": 1.49, "grad_norm": 0.5936715602874756, "learning_rate": 0.00030373347979693145, "loss": 3.014, "step": 30365 }, { "epoch": 1.49, "grad_norm": 0.6830310821533203, "learning_rate": 0.00030371808479390857, "loss": 3.0181, "step": 30366 }, { "epoch": 1.49, "grad_norm": 0.5550583600997925, "learning_rate": 0.0003037026897810928, "loss": 3.1111, "step": 30367 }, { "epoch": 1.49, "grad_norm": 0.5920956134796143, "learning_rate": 0.000303687294758525, "loss": 3.0784, "step": 30368 }, { "epoch": 1.49, "grad_norm": 0.6229501366615295, "learning_rate": 0.00030367189972624564, "loss": 3.1064, "step": 30369 }, { "epoch": 1.49, "grad_norm": 0.6036462783813477, "learning_rate": 0.0003036565046842951, "loss": 2.8816, "step": 30370 }, { "epoch": 1.49, "grad_norm": 0.5995955467224121, "learning_rate": 0.00030364110963271404, "loss": 3.0448, "step": 30371 }, { "epoch": 1.49, "grad_norm": 0.5678886771202087, "learning_rate": 0.00030362571457154296, "loss": 3.1554, "step": 30372 }, { "epoch": 1.49, "grad_norm": 0.5802719593048096, "learning_rate": 0.0003036103195008225, "loss": 3.1493, "step": 30373 }, { "epoch": 1.49, "grad_norm": 0.5747050642967224, "learning_rate": 0.00030359492442059315, "loss": 3.1413, "step": 30374 }, { "epoch": 1.49, "grad_norm": 0.6112601161003113, "learning_rate": 0.0003035795293308954, "loss": 3.0674, "step": 30375 }, { "epoch": 1.49, "grad_norm": 0.5865578055381775, "learning_rate": 0.0003035641342317699, "loss": 2.955, "step": 30376 }, { "epoch": 1.49, "grad_norm": 0.7048220038414001, "learning_rate": 0.0003035487391232572, "loss": 3.0602, "step": 30377 }, { "epoch": 1.49, "grad_norm": 0.585225522518158, "learning_rate": 0.00030353334400539777, "loss": 2.9674, "step": 30378 }, { "epoch": 1.49, "grad_norm": 0.6342913508415222, "learning_rate": 0.0003035179488782322, "loss": 3.0666, "step": 30379 }, { "epoch": 1.49, "grad_norm": 0.5397252440452576, "learning_rate": 0.0003035025537418011, "loss": 3.041, "step": 30380 }, { "epoch": 1.49, "grad_norm": 0.5967416167259216, "learning_rate": 0.0003034871585961448, "loss": 3.2197, "step": 30381 }, { "epoch": 1.49, "grad_norm": 0.5769542455673218, "learning_rate": 0.000303471763441304, "loss": 3.0792, "step": 30382 }, { "epoch": 1.49, "grad_norm": 0.5737739205360413, "learning_rate": 0.00030345636827731936, "loss": 3.0399, "step": 30383 }, { "epoch": 1.49, "grad_norm": 0.5969375967979431, "learning_rate": 0.00030344097310423124, "loss": 3.2229, "step": 30384 }, { "epoch": 1.49, "grad_norm": 0.5473321676254272, "learning_rate": 0.00030342557792208027, "loss": 3.1632, "step": 30385 }, { "epoch": 1.49, "grad_norm": 0.5815901756286621, "learning_rate": 0.000303410182730907, "loss": 3.0973, "step": 30386 }, { "epoch": 1.49, "grad_norm": 0.5692906975746155, "learning_rate": 0.00030339478753075194, "loss": 2.9992, "step": 30387 }, { "epoch": 1.49, "grad_norm": 0.5809762477874756, "learning_rate": 0.0003033793923216557, "loss": 3.1701, "step": 30388 }, { "epoch": 1.49, "grad_norm": 0.5709482431411743, "learning_rate": 0.00030336399710365877, "loss": 3.102, "step": 30389 }, { "epoch": 1.49, "grad_norm": 0.578090250492096, "learning_rate": 0.0003033486018768017, "loss": 3.0336, "step": 30390 }, { "epoch": 1.49, "grad_norm": 0.6145226359367371, "learning_rate": 0.0003033332066411252, "loss": 2.9575, "step": 30391 }, { "epoch": 1.49, "grad_norm": 0.5879982113838196, "learning_rate": 0.00030331781139666947, "loss": 2.979, "step": 30392 }, { "epoch": 1.49, "grad_norm": 0.5679128766059875, "learning_rate": 0.0003033024161434753, "loss": 3.4305, "step": 30393 }, { "epoch": 1.49, "grad_norm": 0.6054519414901733, "learning_rate": 0.00030328702088158326, "loss": 3.1972, "step": 30394 }, { "epoch": 1.49, "grad_norm": 0.5590806603431702, "learning_rate": 0.0003032716256110339, "loss": 3.0287, "step": 30395 }, { "epoch": 1.49, "grad_norm": 0.660683274269104, "learning_rate": 0.00030325623033186763, "loss": 3.0268, "step": 30396 }, { "epoch": 1.49, "grad_norm": 0.5682603716850281, "learning_rate": 0.0003032408350441251, "loss": 3.1007, "step": 30397 }, { "epoch": 1.49, "grad_norm": 0.5996838808059692, "learning_rate": 0.00030322543974784676, "loss": 2.8001, "step": 30398 }, { "epoch": 1.49, "grad_norm": 0.5741966962814331, "learning_rate": 0.0003032100444430733, "loss": 3.041, "step": 30399 }, { "epoch": 1.49, "grad_norm": 0.604943573474884, "learning_rate": 0.0003031946491298452, "loss": 3.1721, "step": 30400 }, { "epoch": 1.49, "grad_norm": 0.5835703015327454, "learning_rate": 0.000303179253808203, "loss": 2.8629, "step": 30401 }, { "epoch": 1.49, "grad_norm": 0.5605635046958923, "learning_rate": 0.0003031638584781873, "loss": 2.9483, "step": 30402 }, { "epoch": 1.49, "grad_norm": 0.5778949856758118, "learning_rate": 0.0003031484631398385, "loss": 2.8903, "step": 30403 }, { "epoch": 1.49, "grad_norm": 0.5777247548103333, "learning_rate": 0.0003031330677931973, "loss": 3.0132, "step": 30404 }, { "epoch": 1.49, "grad_norm": 0.6003771424293518, "learning_rate": 0.0003031176724383042, "loss": 2.9525, "step": 30405 }, { "epoch": 1.49, "grad_norm": 0.6186116337776184, "learning_rate": 0.00030310227707519983, "loss": 2.8372, "step": 30406 }, { "epoch": 1.49, "grad_norm": 0.7651210427284241, "learning_rate": 0.0003030868817039246, "loss": 3.118, "step": 30407 }, { "epoch": 1.49, "grad_norm": 0.5799486637115479, "learning_rate": 0.00030307148632451904, "loss": 2.9309, "step": 30408 }, { "epoch": 1.49, "grad_norm": 0.5795913934707642, "learning_rate": 0.0003030560909370239, "loss": 2.9225, "step": 30409 }, { "epoch": 1.49, "grad_norm": 0.6140474081039429, "learning_rate": 0.0003030406955414796, "loss": 2.9041, "step": 30410 }, { "epoch": 1.49, "grad_norm": 0.5724575519561768, "learning_rate": 0.00030302530013792656, "loss": 3.0642, "step": 30411 }, { "epoch": 1.49, "grad_norm": 0.5742631554603577, "learning_rate": 0.0003030099047264056, "loss": 2.8497, "step": 30412 }, { "epoch": 1.49, "grad_norm": 0.6531141996383667, "learning_rate": 0.0003029945093069571, "loss": 3.0945, "step": 30413 }, { "epoch": 1.49, "grad_norm": 0.5915018916130066, "learning_rate": 0.0003029791138796216, "loss": 3.1048, "step": 30414 }, { "epoch": 1.49, "grad_norm": 0.5942657589912415, "learning_rate": 0.0003029637184444397, "loss": 2.9497, "step": 30415 }, { "epoch": 1.49, "grad_norm": 0.5846244096755981, "learning_rate": 0.00030294832300145196, "loss": 2.9391, "step": 30416 }, { "epoch": 1.49, "grad_norm": 0.5968058109283447, "learning_rate": 0.0003029329275506989, "loss": 2.9962, "step": 30417 }, { "epoch": 1.49, "grad_norm": 0.5921707153320312, "learning_rate": 0.00030291753209222097, "loss": 3.0783, "step": 30418 }, { "epoch": 1.49, "grad_norm": 0.6028823852539062, "learning_rate": 0.00030290213662605896, "loss": 3.063, "step": 30419 }, { "epoch": 1.49, "grad_norm": 0.577496349811554, "learning_rate": 0.00030288674115225327, "loss": 3.1577, "step": 30420 }, { "epoch": 1.49, "grad_norm": 0.5886926054954529, "learning_rate": 0.0003028713456708444, "loss": 3.0918, "step": 30421 }, { "epoch": 1.49, "grad_norm": 0.6270083785057068, "learning_rate": 0.000302855950181873, "loss": 3.2039, "step": 30422 }, { "epoch": 1.49, "grad_norm": 0.5588559508323669, "learning_rate": 0.00030284055468537946, "loss": 2.9458, "step": 30423 }, { "epoch": 1.49, "grad_norm": 0.5802051424980164, "learning_rate": 0.0003028251591814046, "loss": 3.1478, "step": 30424 }, { "epoch": 1.49, "grad_norm": 0.5683912038803101, "learning_rate": 0.00030280976366998876, "loss": 2.8673, "step": 30425 }, { "epoch": 1.49, "grad_norm": 0.5958796143531799, "learning_rate": 0.0003027943681511726, "loss": 3.0611, "step": 30426 }, { "epoch": 1.49, "grad_norm": 0.5795177817344666, "learning_rate": 0.0003027789726249965, "loss": 2.9482, "step": 30427 }, { "epoch": 1.49, "grad_norm": 0.6204610466957092, "learning_rate": 0.0003027635770915012, "loss": 3.1761, "step": 30428 }, { "epoch": 1.49, "grad_norm": 0.5817151069641113, "learning_rate": 0.0003027481815507271, "loss": 3.0828, "step": 30429 }, { "epoch": 1.49, "grad_norm": 0.5747313499450684, "learning_rate": 0.00030273278600271485, "loss": 2.9711, "step": 30430 }, { "epoch": 1.49, "grad_norm": 0.6377949714660645, "learning_rate": 0.00030271739044750504, "loss": 2.9626, "step": 30431 }, { "epoch": 1.49, "grad_norm": 0.599992036819458, "learning_rate": 0.0003027019948851381, "loss": 3.3038, "step": 30432 }, { "epoch": 1.49, "grad_norm": 0.5743573307991028, "learning_rate": 0.00030268659931565465, "loss": 3.0086, "step": 30433 }, { "epoch": 1.49, "grad_norm": 0.5959254503250122, "learning_rate": 0.00030267120373909506, "loss": 3.1205, "step": 30434 }, { "epoch": 1.49, "grad_norm": 0.5806088447570801, "learning_rate": 0.00030265580815550024, "loss": 2.9793, "step": 30435 }, { "epoch": 1.49, "grad_norm": 0.6778531074523926, "learning_rate": 0.0003026404125649105, "loss": 2.9428, "step": 30436 }, { "epoch": 1.49, "grad_norm": 0.5825092792510986, "learning_rate": 0.0003026250169673663, "loss": 2.9743, "step": 30437 }, { "epoch": 1.49, "grad_norm": 0.5714017748832703, "learning_rate": 0.00030260962136290835, "loss": 2.9662, "step": 30438 }, { "epoch": 1.49, "grad_norm": 0.5609602928161621, "learning_rate": 0.0003025942257515772, "loss": 2.913, "step": 30439 }, { "epoch": 1.49, "grad_norm": 0.5940423011779785, "learning_rate": 0.00030257883013341336, "loss": 3.082, "step": 30440 }, { "epoch": 1.49, "grad_norm": 0.5933805704116821, "learning_rate": 0.0003025634345084573, "loss": 3.3069, "step": 30441 }, { "epoch": 1.49, "grad_norm": 0.6016649603843689, "learning_rate": 0.0003025480388767498, "loss": 3.1425, "step": 30442 }, { "epoch": 1.49, "grad_norm": 0.5964870452880859, "learning_rate": 0.0003025326432383312, "loss": 3.0939, "step": 30443 }, { "epoch": 1.49, "grad_norm": 0.6471694111824036, "learning_rate": 0.0003025172475932419, "loss": 3.2347, "step": 30444 }, { "epoch": 1.49, "grad_norm": 0.5884523987770081, "learning_rate": 0.00030250185194152286, "loss": 2.8428, "step": 30445 }, { "epoch": 1.49, "grad_norm": 0.6636972427368164, "learning_rate": 0.0003024864562832144, "loss": 3.0002, "step": 30446 }, { "epoch": 1.49, "grad_norm": 0.6032358407974243, "learning_rate": 0.00030247106061835713, "loss": 3.0426, "step": 30447 }, { "epoch": 1.49, "grad_norm": 0.6321635842323303, "learning_rate": 0.0003024556649469915, "loss": 2.8345, "step": 30448 }, { "epoch": 1.49, "grad_norm": 0.5757449865341187, "learning_rate": 0.00030244026926915814, "loss": 2.9545, "step": 30449 }, { "epoch": 1.49, "grad_norm": 0.5811001658439636, "learning_rate": 0.00030242487358489754, "loss": 3.0403, "step": 30450 }, { "epoch": 1.49, "grad_norm": 0.582600474357605, "learning_rate": 0.00030240947789425033, "loss": 2.9595, "step": 30451 }, { "epoch": 1.49, "grad_norm": 0.6360583305358887, "learning_rate": 0.000302394082197257, "loss": 2.8013, "step": 30452 }, { "epoch": 1.49, "grad_norm": 0.6164677739143372, "learning_rate": 0.00030237868649395815, "loss": 3.1337, "step": 30453 }, { "epoch": 1.49, "grad_norm": 0.5845510363578796, "learning_rate": 0.0003023632907843942, "loss": 3.1204, "step": 30454 }, { "epoch": 1.49, "grad_norm": 0.5880382657051086, "learning_rate": 0.00030234789506860594, "loss": 3.1311, "step": 30455 }, { "epoch": 1.49, "grad_norm": 0.6087186932563782, "learning_rate": 0.0003023324993466337, "loss": 3.1319, "step": 30456 }, { "epoch": 1.49, "grad_norm": 0.6022242307662964, "learning_rate": 0.00030231710361851814, "loss": 3.0978, "step": 30457 }, { "epoch": 1.49, "grad_norm": 0.5847160816192627, "learning_rate": 0.0003023017078842998, "loss": 2.8099, "step": 30458 }, { "epoch": 1.49, "grad_norm": 0.5767192840576172, "learning_rate": 0.00030228631214401905, "loss": 3.2496, "step": 30459 }, { "epoch": 1.49, "grad_norm": 0.5725535154342651, "learning_rate": 0.00030227091639771676, "loss": 3.0633, "step": 30460 }, { "epoch": 1.49, "grad_norm": 0.6081965565681458, "learning_rate": 0.00030225552064543326, "loss": 3.1437, "step": 30461 }, { "epoch": 1.49, "grad_norm": 0.6389308571815491, "learning_rate": 0.0003022401248872091, "loss": 3.1606, "step": 30462 }, { "epoch": 1.49, "grad_norm": 0.5722255110740662, "learning_rate": 0.0003022247291230849, "loss": 3.1019, "step": 30463 }, { "epoch": 1.49, "grad_norm": 0.6214210987091064, "learning_rate": 0.00030220933335310126, "loss": 3.0384, "step": 30464 }, { "epoch": 1.49, "grad_norm": 0.5633954405784607, "learning_rate": 0.0003021939375772986, "loss": 2.9655, "step": 30465 }, { "epoch": 1.49, "grad_norm": 0.583755373954773, "learning_rate": 0.00030217854179571756, "loss": 3.0302, "step": 30466 }, { "epoch": 1.49, "grad_norm": 0.5567439794540405, "learning_rate": 0.00030216314600839866, "loss": 3.1762, "step": 30467 }, { "epoch": 1.49, "grad_norm": 0.6298270225524902, "learning_rate": 0.00030214775021538244, "loss": 2.9571, "step": 30468 }, { "epoch": 1.49, "grad_norm": 0.5982662439346313, "learning_rate": 0.00030213235441670945, "loss": 2.9339, "step": 30469 }, { "epoch": 1.49, "grad_norm": 0.5737664103507996, "learning_rate": 0.00030211695861242024, "loss": 3.0476, "step": 30470 }, { "epoch": 1.49, "grad_norm": 0.5396885275840759, "learning_rate": 0.00030210156280255547, "loss": 3.04, "step": 30471 }, { "epoch": 1.49, "grad_norm": 0.6123176217079163, "learning_rate": 0.00030208616698715546, "loss": 2.9829, "step": 30472 }, { "epoch": 1.49, "grad_norm": 0.6010289788246155, "learning_rate": 0.00030207077116626093, "loss": 3.0169, "step": 30473 }, { "epoch": 1.49, "grad_norm": 0.5837356448173523, "learning_rate": 0.0003020553753399124, "loss": 3.0827, "step": 30474 }, { "epoch": 1.49, "grad_norm": 0.6117029190063477, "learning_rate": 0.0003020399795081504, "loss": 2.9524, "step": 30475 }, { "epoch": 1.49, "grad_norm": 0.595624566078186, "learning_rate": 0.0003020245836710155, "loss": 3.2267, "step": 30476 }, { "epoch": 1.49, "grad_norm": 0.5812867879867554, "learning_rate": 0.0003020091878285482, "loss": 3.0827, "step": 30477 }, { "epoch": 1.49, "grad_norm": 0.6493427157402039, "learning_rate": 0.0003019937919807891, "loss": 3.1577, "step": 30478 }, { "epoch": 1.49, "grad_norm": 0.5776815414428711, "learning_rate": 0.0003019783961277788, "loss": 2.8263, "step": 30479 }, { "epoch": 1.49, "grad_norm": 0.5874110460281372, "learning_rate": 0.00030196300026955763, "loss": 3.2444, "step": 30480 }, { "epoch": 1.49, "grad_norm": 0.5695233345031738, "learning_rate": 0.0003019476044061664, "loss": 2.9263, "step": 30481 }, { "epoch": 1.49, "grad_norm": 0.5816549062728882, "learning_rate": 0.0003019322085376456, "loss": 3.0367, "step": 30482 }, { "epoch": 1.49, "grad_norm": 0.5536554455757141, "learning_rate": 0.0003019168126640357, "loss": 2.8394, "step": 30483 }, { "epoch": 1.49, "grad_norm": 0.5806564688682556, "learning_rate": 0.00030190141678537724, "loss": 3.1847, "step": 30484 }, { "epoch": 1.49, "grad_norm": 0.5865193605422974, "learning_rate": 0.00030188602090171077, "loss": 3.1498, "step": 30485 }, { "epoch": 1.49, "grad_norm": 0.5693684220314026, "learning_rate": 0.000301870625013077, "loss": 3.0875, "step": 30486 }, { "epoch": 1.49, "grad_norm": 0.6434977650642395, "learning_rate": 0.00030185522911951634, "loss": 2.9599, "step": 30487 }, { "epoch": 1.49, "grad_norm": 0.6276722550392151, "learning_rate": 0.0003018398332210693, "loss": 3.3054, "step": 30488 }, { "epoch": 1.49, "grad_norm": 0.6278494000434875, "learning_rate": 0.0003018244373177765, "loss": 3.1279, "step": 30489 }, { "epoch": 1.49, "grad_norm": 0.639779806137085, "learning_rate": 0.0003018090414096786, "loss": 2.9098, "step": 30490 }, { "epoch": 1.49, "grad_norm": 0.58705735206604, "learning_rate": 0.00030179364549681595, "loss": 3.1877, "step": 30491 }, { "epoch": 1.49, "grad_norm": 0.6035036444664001, "learning_rate": 0.00030177824957922913, "loss": 3.1528, "step": 30492 }, { "epoch": 1.49, "grad_norm": 0.6042265295982361, "learning_rate": 0.00030176285365695883, "loss": 2.8595, "step": 30493 }, { "epoch": 1.49, "grad_norm": 0.5860368013381958, "learning_rate": 0.0003017474577300455, "loss": 3.2413, "step": 30494 }, { "epoch": 1.49, "grad_norm": 0.580884575843811, "learning_rate": 0.00030173206179852965, "loss": 2.9413, "step": 30495 }, { "epoch": 1.49, "grad_norm": 0.6143849492073059, "learning_rate": 0.00030171666586245187, "loss": 2.9255, "step": 30496 }, { "epoch": 1.49, "grad_norm": 0.5543144941329956, "learning_rate": 0.00030170126992185286, "loss": 3.1471, "step": 30497 }, { "epoch": 1.49, "grad_norm": 0.5888075232505798, "learning_rate": 0.0003016858739767729, "loss": 2.8926, "step": 30498 }, { "epoch": 1.49, "grad_norm": 0.6028613448143005, "learning_rate": 0.0003016704780272527, "loss": 3.1541, "step": 30499 }, { "epoch": 1.49, "grad_norm": 0.5923327207565308, "learning_rate": 0.00030165508207333284, "loss": 2.9867, "step": 30500 }, { "epoch": 1.49, "grad_norm": 0.5775189399719238, "learning_rate": 0.00030163968611505375, "loss": 3.1233, "step": 30501 }, { "epoch": 1.49, "grad_norm": 0.6112174391746521, "learning_rate": 0.000301624290152456, "loss": 3.0625, "step": 30502 }, { "epoch": 1.49, "grad_norm": 0.562967836856842, "learning_rate": 0.0003016088941855803, "loss": 2.9785, "step": 30503 }, { "epoch": 1.49, "grad_norm": 0.6123626828193665, "learning_rate": 0.00030159349821446703, "loss": 2.9996, "step": 30504 }, { "epoch": 1.49, "grad_norm": 0.578737199306488, "learning_rate": 0.00030157810223915675, "loss": 2.9256, "step": 30505 }, { "epoch": 1.5, "grad_norm": 0.6503991484642029, "learning_rate": 0.00030156270625969006, "loss": 3.0334, "step": 30506 }, { "epoch": 1.5, "grad_norm": 0.5757296085357666, "learning_rate": 0.00030154731027610753, "loss": 2.9777, "step": 30507 }, { "epoch": 1.5, "grad_norm": 0.5990521907806396, "learning_rate": 0.00030153191428844976, "loss": 3.0282, "step": 30508 }, { "epoch": 1.5, "grad_norm": 0.5953813791275024, "learning_rate": 0.0003015165182967572, "loss": 3.1037, "step": 30509 }, { "epoch": 1.5, "grad_norm": 0.5794540047645569, "learning_rate": 0.00030150112230107023, "loss": 3.0526, "step": 30510 }, { "epoch": 1.5, "grad_norm": 0.564243495464325, "learning_rate": 0.0003014857263014298, "loss": 2.8519, "step": 30511 }, { "epoch": 1.5, "grad_norm": 0.5954510569572449, "learning_rate": 0.0003014703302978762, "loss": 3.1894, "step": 30512 }, { "epoch": 1.5, "grad_norm": 0.6306339502334595, "learning_rate": 0.00030145493429044995, "loss": 3.0282, "step": 30513 }, { "epoch": 1.5, "grad_norm": 0.5806563496589661, "learning_rate": 0.00030143953827919175, "loss": 2.8584, "step": 30514 }, { "epoch": 1.5, "grad_norm": 0.6290732026100159, "learning_rate": 0.00030142414226414215, "loss": 3.2384, "step": 30515 }, { "epoch": 1.5, "grad_norm": 0.5588606595993042, "learning_rate": 0.00030140874624534153, "loss": 2.962, "step": 30516 }, { "epoch": 1.5, "grad_norm": 0.5796079635620117, "learning_rate": 0.00030139335022283056, "loss": 2.8573, "step": 30517 }, { "epoch": 1.5, "grad_norm": 0.5561217069625854, "learning_rate": 0.0003013779541966498, "loss": 2.8819, "step": 30518 }, { "epoch": 1.5, "grad_norm": 0.5866872072219849, "learning_rate": 0.00030136255816683985, "loss": 3.0356, "step": 30519 }, { "epoch": 1.5, "grad_norm": 0.5967084765434265, "learning_rate": 0.0003013471621334411, "loss": 2.724, "step": 30520 }, { "epoch": 1.5, "grad_norm": 0.5654155611991882, "learning_rate": 0.0003013317660964941, "loss": 3.045, "step": 30521 }, { "epoch": 1.5, "grad_norm": 0.5480029582977295, "learning_rate": 0.0003013163700560396, "loss": 3.0384, "step": 30522 }, { "epoch": 1.5, "grad_norm": 0.5529598593711853, "learning_rate": 0.000301300974012118, "loss": 2.8905, "step": 30523 }, { "epoch": 1.5, "grad_norm": 0.6232104301452637, "learning_rate": 0.0003012855779647699, "loss": 3.1526, "step": 30524 }, { "epoch": 1.5, "grad_norm": 0.5751103758811951, "learning_rate": 0.0003012701819140358, "loss": 3.1856, "step": 30525 }, { "epoch": 1.5, "grad_norm": 0.5563510060310364, "learning_rate": 0.0003012547858599563, "loss": 2.8839, "step": 30526 }, { "epoch": 1.5, "grad_norm": 0.597868800163269, "learning_rate": 0.00030123938980257197, "loss": 2.8558, "step": 30527 }, { "epoch": 1.5, "grad_norm": 0.5711178779602051, "learning_rate": 0.00030122399374192326, "loss": 3.1168, "step": 30528 }, { "epoch": 1.5, "grad_norm": 0.5784346461296082, "learning_rate": 0.0003012085976780509, "loss": 3.1221, "step": 30529 }, { "epoch": 1.5, "grad_norm": 0.5460003614425659, "learning_rate": 0.0003011932016109953, "loss": 2.9339, "step": 30530 }, { "epoch": 1.5, "grad_norm": 0.5536607503890991, "learning_rate": 0.0003011778055407969, "loss": 2.7398, "step": 30531 }, { "epoch": 1.5, "grad_norm": 0.5944807529449463, "learning_rate": 0.0003011624094674964, "loss": 2.9107, "step": 30532 }, { "epoch": 1.5, "grad_norm": 0.5808088183403015, "learning_rate": 0.00030114701339113454, "loss": 3.1568, "step": 30533 }, { "epoch": 1.5, "grad_norm": 0.6210165619850159, "learning_rate": 0.0003011316173117515, "loss": 3.0295, "step": 30534 }, { "epoch": 1.5, "grad_norm": 0.5955290794372559, "learning_rate": 0.00030111622122938803, "loss": 3.1209, "step": 30535 }, { "epoch": 1.5, "grad_norm": 0.608069121837616, "learning_rate": 0.0003011008251440846, "loss": 3.093, "step": 30536 }, { "epoch": 1.5, "grad_norm": 0.5967391133308411, "learning_rate": 0.00030108542905588195, "loss": 3.195, "step": 30537 }, { "epoch": 1.5, "grad_norm": 0.5866439938545227, "learning_rate": 0.0003010700329648204, "loss": 3.0273, "step": 30538 }, { "epoch": 1.5, "grad_norm": 0.6006977558135986, "learning_rate": 0.0003010546368709406, "loss": 3.0791, "step": 30539 }, { "epoch": 1.5, "grad_norm": 0.5997205972671509, "learning_rate": 0.000301039240774283, "loss": 2.8495, "step": 30540 }, { "epoch": 1.5, "grad_norm": 0.5799188613891602, "learning_rate": 0.0003010238446748884, "loss": 3.0779, "step": 30541 }, { "epoch": 1.5, "grad_norm": 0.6030257940292358, "learning_rate": 0.0003010084485727971, "loss": 2.7769, "step": 30542 }, { "epoch": 1.5, "grad_norm": 0.6359684467315674, "learning_rate": 0.0003009930524680497, "loss": 3.0085, "step": 30543 }, { "epoch": 1.5, "grad_norm": 0.5771874189376831, "learning_rate": 0.0003009776563606869, "loss": 2.8918, "step": 30544 }, { "epoch": 1.5, "grad_norm": 0.5872456431388855, "learning_rate": 0.0003009622602507491, "loss": 3.0065, "step": 30545 }, { "epoch": 1.5, "grad_norm": 0.6064155697822571, "learning_rate": 0.00030094686413827685, "loss": 3.1301, "step": 30546 }, { "epoch": 1.5, "grad_norm": 0.6292130351066589, "learning_rate": 0.0003009314680233107, "loss": 3.0003, "step": 30547 }, { "epoch": 1.5, "grad_norm": 0.6809849739074707, "learning_rate": 0.00030091607190589143, "loss": 3.2596, "step": 30548 }, { "epoch": 1.5, "grad_norm": 0.6155319213867188, "learning_rate": 0.00030090067578605927, "loss": 2.9799, "step": 30549 }, { "epoch": 1.5, "grad_norm": 0.5968411564826965, "learning_rate": 0.00030088527966385493, "loss": 3.0207, "step": 30550 }, { "epoch": 1.5, "grad_norm": 0.5945718288421631, "learning_rate": 0.0003008698835393189, "loss": 3.0553, "step": 30551 }, { "epoch": 1.5, "grad_norm": 0.654512882232666, "learning_rate": 0.0003008544874124918, "loss": 3.2307, "step": 30552 }, { "epoch": 1.5, "grad_norm": 0.5625406503677368, "learning_rate": 0.0003008390912834141, "loss": 3.1419, "step": 30553 }, { "epoch": 1.5, "grad_norm": 0.6100373864173889, "learning_rate": 0.00030082369515212643, "loss": 3.0421, "step": 30554 }, { "epoch": 1.5, "grad_norm": 0.6178447008132935, "learning_rate": 0.0003008082990186694, "loss": 3.1594, "step": 30555 }, { "epoch": 1.5, "grad_norm": 0.5651780962944031, "learning_rate": 0.0003007929028830833, "loss": 3.0438, "step": 30556 }, { "epoch": 1.5, "grad_norm": 0.5665957927703857, "learning_rate": 0.00030077750674540883, "loss": 2.9222, "step": 30557 }, { "epoch": 1.5, "grad_norm": 0.5908525586128235, "learning_rate": 0.00030076211060568666, "loss": 2.9867, "step": 30558 }, { "epoch": 1.5, "grad_norm": 0.5936393141746521, "learning_rate": 0.00030074671446395725, "loss": 2.962, "step": 30559 }, { "epoch": 1.5, "grad_norm": 0.6061809659004211, "learning_rate": 0.0003007313183202611, "loss": 3.1466, "step": 30560 }, { "epoch": 1.5, "grad_norm": 0.5717028975486755, "learning_rate": 0.00030071592217463885, "loss": 2.985, "step": 30561 }, { "epoch": 1.5, "grad_norm": 0.596230149269104, "learning_rate": 0.00030070052602713097, "loss": 3.0271, "step": 30562 }, { "epoch": 1.5, "grad_norm": 0.6109982132911682, "learning_rate": 0.000300685129877778, "loss": 3.0328, "step": 30563 }, { "epoch": 1.5, "grad_norm": 0.5894909501075745, "learning_rate": 0.0003006697337266206, "loss": 2.791, "step": 30564 }, { "epoch": 1.5, "grad_norm": 0.5837761163711548, "learning_rate": 0.0003006543375736991, "loss": 2.8944, "step": 30565 }, { "epoch": 1.5, "grad_norm": 0.5935973525047302, "learning_rate": 0.0003006389414190544, "loss": 2.9046, "step": 30566 }, { "epoch": 1.5, "grad_norm": 0.6078287959098816, "learning_rate": 0.0003006235452627267, "loss": 3.2071, "step": 30567 }, { "epoch": 1.5, "grad_norm": 0.6492429971694946, "learning_rate": 0.0003006081491047567, "loss": 3.0934, "step": 30568 }, { "epoch": 1.5, "grad_norm": 0.5882315635681152, "learning_rate": 0.00030059275294518507, "loss": 2.9978, "step": 30569 }, { "epoch": 1.5, "grad_norm": 0.6093411445617676, "learning_rate": 0.0003005773567840522, "loss": 3.0642, "step": 30570 }, { "epoch": 1.5, "grad_norm": 0.5847349762916565, "learning_rate": 0.0003005619606213987, "loss": 2.9372, "step": 30571 }, { "epoch": 1.5, "grad_norm": 0.5897664427757263, "learning_rate": 0.00030054656445726495, "loss": 3.059, "step": 30572 }, { "epoch": 1.5, "grad_norm": 0.5819639563560486, "learning_rate": 0.0003005311682916918, "loss": 2.9334, "step": 30573 }, { "epoch": 1.5, "grad_norm": 0.5720083713531494, "learning_rate": 0.00030051577212471964, "loss": 2.8956, "step": 30574 }, { "epoch": 1.5, "grad_norm": 0.6046839356422424, "learning_rate": 0.00030050037595638905, "loss": 3.0251, "step": 30575 }, { "epoch": 1.5, "grad_norm": 0.618550717830658, "learning_rate": 0.00030048497978674057, "loss": 2.8597, "step": 30576 }, { "epoch": 1.5, "grad_norm": 0.5971773266792297, "learning_rate": 0.0003004695836158147, "loss": 2.9743, "step": 30577 }, { "epoch": 1.5, "grad_norm": 0.6202574968338013, "learning_rate": 0.00030045418744365204, "loss": 3.063, "step": 30578 }, { "epoch": 1.5, "grad_norm": 0.6273716688156128, "learning_rate": 0.00030043879127029314, "loss": 3.1348, "step": 30579 }, { "epoch": 1.5, "grad_norm": 0.6152206063270569, "learning_rate": 0.0003004233950957785, "loss": 3.1494, "step": 30580 }, { "epoch": 1.5, "grad_norm": 0.5668372511863708, "learning_rate": 0.0003004079989201489, "loss": 3.1972, "step": 30581 }, { "epoch": 1.5, "grad_norm": 0.5945584177970886, "learning_rate": 0.0003003926027434445, "loss": 2.9756, "step": 30582 }, { "epoch": 1.5, "grad_norm": 0.68867427110672, "learning_rate": 0.0003003772065657061, "loss": 2.9829, "step": 30583 }, { "epoch": 1.5, "grad_norm": 0.6505774259567261, "learning_rate": 0.00030036181038697434, "loss": 3.1015, "step": 30584 }, { "epoch": 1.5, "grad_norm": 0.5589194297790527, "learning_rate": 0.00030034641420728956, "loss": 2.9296, "step": 30585 }, { "epoch": 1.5, "grad_norm": 0.5537344217300415, "learning_rate": 0.0003003310180266924, "loss": 2.9285, "step": 30586 }, { "epoch": 1.5, "grad_norm": 0.6337488889694214, "learning_rate": 0.00030031562184522326, "loss": 3.0206, "step": 30587 }, { "epoch": 1.5, "grad_norm": 0.6181750297546387, "learning_rate": 0.00030030022566292306, "loss": 2.8851, "step": 30588 }, { "epoch": 1.5, "grad_norm": 0.5827994346618652, "learning_rate": 0.00030028482947983204, "loss": 3.1003, "step": 30589 }, { "epoch": 1.5, "grad_norm": 0.6145060658454895, "learning_rate": 0.0003002694332959908, "loss": 3.0419, "step": 30590 }, { "epoch": 1.5, "grad_norm": 0.6019697189331055, "learning_rate": 0.00030025403711143997, "loss": 2.9749, "step": 30591 }, { "epoch": 1.5, "grad_norm": 0.625700056552887, "learning_rate": 0.0003002386409262201, "loss": 3.0995, "step": 30592 }, { "epoch": 1.5, "grad_norm": 0.5388084650039673, "learning_rate": 0.00030022324474037154, "loss": 2.9934, "step": 30593 }, { "epoch": 1.5, "grad_norm": 0.626770555973053, "learning_rate": 0.0003002078485539351, "loss": 3.071, "step": 30594 }, { "epoch": 1.5, "grad_norm": 0.5812084078788757, "learning_rate": 0.00030019245236695126, "loss": 2.9377, "step": 30595 }, { "epoch": 1.5, "grad_norm": 0.6054795384407043, "learning_rate": 0.0003001770561794605, "loss": 2.7962, "step": 30596 }, { "epoch": 1.5, "grad_norm": 0.5243318676948547, "learning_rate": 0.00030016165999150336, "loss": 3.0612, "step": 30597 }, { "epoch": 1.5, "grad_norm": 0.562089741230011, "learning_rate": 0.0003001462638031204, "loss": 3.0272, "step": 30598 }, { "epoch": 1.5, "grad_norm": 0.5881873369216919, "learning_rate": 0.0003001308676143524, "loss": 3.2251, "step": 30599 }, { "epoch": 1.5, "grad_norm": 0.5781797170639038, "learning_rate": 0.00030011547142523955, "loss": 2.8792, "step": 30600 }, { "epoch": 1.5, "grad_norm": 0.6735518574714661, "learning_rate": 0.0003001000752358226, "loss": 3.1419, "step": 30601 }, { "epoch": 1.5, "grad_norm": 0.5663983821868896, "learning_rate": 0.0003000846790461421, "loss": 2.9512, "step": 30602 }, { "epoch": 1.5, "grad_norm": 0.6189948320388794, "learning_rate": 0.00030006928285623857, "loss": 2.9013, "step": 30603 }, { "epoch": 1.5, "grad_norm": 0.6442484855651855, "learning_rate": 0.00030005388666615255, "loss": 3.1847, "step": 30604 }, { "epoch": 1.5, "grad_norm": 0.629422128200531, "learning_rate": 0.0003000384904759246, "loss": 3.0761, "step": 30605 }, { "epoch": 1.5, "grad_norm": 0.5894017219543457, "learning_rate": 0.0003000230942855954, "loss": 3.1667, "step": 30606 }, { "epoch": 1.5, "grad_norm": 0.5657310485839844, "learning_rate": 0.00030000769809520526, "loss": 3.1474, "step": 30607 }, { "epoch": 1.5, "grad_norm": 0.664206862449646, "learning_rate": 0.0002999923019047948, "loss": 2.9338, "step": 30608 }, { "epoch": 1.5, "grad_norm": 0.5959284901618958, "learning_rate": 0.0002999769057144046, "loss": 3.0428, "step": 30609 }, { "epoch": 1.5, "grad_norm": 0.5982975959777832, "learning_rate": 0.00029996150952407534, "loss": 2.9871, "step": 30610 }, { "epoch": 1.5, "grad_norm": 0.6166161894798279, "learning_rate": 0.00029994611333384745, "loss": 3.0232, "step": 30611 }, { "epoch": 1.5, "grad_norm": 0.5803634524345398, "learning_rate": 0.0002999307171437613, "loss": 3.2022, "step": 30612 }, { "epoch": 1.5, "grad_norm": 0.5776461958885193, "learning_rate": 0.0002999153209538579, "loss": 2.9408, "step": 30613 }, { "epoch": 1.5, "grad_norm": 0.5784315466880798, "learning_rate": 0.00029989992476417735, "loss": 3.2184, "step": 30614 }, { "epoch": 1.5, "grad_norm": 0.635517418384552, "learning_rate": 0.00029988452857476045, "loss": 3.1963, "step": 30615 }, { "epoch": 1.5, "grad_norm": 0.6005640625953674, "learning_rate": 0.0002998691323856476, "loss": 3.0256, "step": 30616 }, { "epoch": 1.5, "grad_norm": 0.5672207474708557, "learning_rate": 0.0002998537361968795, "loss": 3.0099, "step": 30617 }, { "epoch": 1.5, "grad_norm": 0.6029596328735352, "learning_rate": 0.00029983834000849664, "loss": 3.125, "step": 30618 }, { "epoch": 1.5, "grad_norm": 0.5798458456993103, "learning_rate": 0.0002998229438205395, "loss": 3.0502, "step": 30619 }, { "epoch": 1.5, "grad_norm": 0.5979757308959961, "learning_rate": 0.0002998075476330488, "loss": 3.0172, "step": 30620 }, { "epoch": 1.5, "grad_norm": 0.6162039041519165, "learning_rate": 0.0002997921514460649, "loss": 3.0594, "step": 30621 }, { "epoch": 1.5, "grad_norm": 0.5752620100975037, "learning_rate": 0.00029977675525962835, "loss": 3.0725, "step": 30622 }, { "epoch": 1.5, "grad_norm": 0.6150903701782227, "learning_rate": 0.00029976135907377994, "loss": 2.9637, "step": 30623 }, { "epoch": 1.5, "grad_norm": 0.607782781124115, "learning_rate": 0.00029974596288856003, "loss": 2.8398, "step": 30624 }, { "epoch": 1.5, "grad_norm": 0.5732407569885254, "learning_rate": 0.00029973056670400925, "loss": 2.9802, "step": 30625 }, { "epoch": 1.5, "grad_norm": 0.5856612324714661, "learning_rate": 0.0002997151705201679, "loss": 2.9968, "step": 30626 }, { "epoch": 1.5, "grad_norm": 0.5715699791908264, "learning_rate": 0.00029969977433707683, "loss": 3.0805, "step": 30627 }, { "epoch": 1.5, "grad_norm": 0.6128286123275757, "learning_rate": 0.0002996843781547767, "loss": 2.8269, "step": 30628 }, { "epoch": 1.5, "grad_norm": 0.5702658891677856, "learning_rate": 0.0002996689819733076, "loss": 2.8871, "step": 30629 }, { "epoch": 1.5, "grad_norm": 0.5865205526351929, "learning_rate": 0.0002996535857927105, "loss": 2.8866, "step": 30630 }, { "epoch": 1.5, "grad_norm": 0.5985194444656372, "learning_rate": 0.0002996381896130256, "loss": 3.1501, "step": 30631 }, { "epoch": 1.5, "grad_norm": 0.6110131144523621, "learning_rate": 0.0002996227934342938, "loss": 2.9409, "step": 30632 }, { "epoch": 1.5, "grad_norm": 0.5706114768981934, "learning_rate": 0.00029960739725655547, "loss": 3.0904, "step": 30633 }, { "epoch": 1.5, "grad_norm": 0.6214529275894165, "learning_rate": 0.0002995920010798511, "loss": 3.0926, "step": 30634 }, { "epoch": 1.5, "grad_norm": 0.6152591109275818, "learning_rate": 0.00029957660490422146, "loss": 3.1173, "step": 30635 }, { "epoch": 1.5, "grad_norm": 0.6281127333641052, "learning_rate": 0.00029956120872970686, "loss": 3.0511, "step": 30636 }, { "epoch": 1.5, "grad_norm": 0.6203930377960205, "learning_rate": 0.0002995458125563479, "loss": 3.079, "step": 30637 }, { "epoch": 1.5, "grad_norm": 0.603507399559021, "learning_rate": 0.0002995304163841853, "loss": 3.2659, "step": 30638 }, { "epoch": 1.5, "grad_norm": 0.5846010446548462, "learning_rate": 0.0002995150202132595, "loss": 3.1515, "step": 30639 }, { "epoch": 1.5, "grad_norm": 0.6017332673072815, "learning_rate": 0.000299499624043611, "loss": 2.8583, "step": 30640 }, { "epoch": 1.5, "grad_norm": 0.6173145771026611, "learning_rate": 0.0002994842278752803, "loss": 2.9607, "step": 30641 }, { "epoch": 1.5, "grad_norm": 0.6321989297866821, "learning_rate": 0.0002994688317083081, "loss": 3.1546, "step": 30642 }, { "epoch": 1.5, "grad_norm": 0.6010352373123169, "learning_rate": 0.00029945343554273505, "loss": 3.1106, "step": 30643 }, { "epoch": 1.5, "grad_norm": 0.6526492238044739, "learning_rate": 0.00029943803937860136, "loss": 3.2372, "step": 30644 }, { "epoch": 1.5, "grad_norm": 0.5786515474319458, "learning_rate": 0.0002994226432159479, "loss": 2.9052, "step": 30645 }, { "epoch": 1.5, "grad_norm": 0.5871065258979797, "learning_rate": 0.00029940724705481493, "loss": 3.2204, "step": 30646 }, { "epoch": 1.5, "grad_norm": 0.5986695289611816, "learning_rate": 0.0002993918508952432, "loss": 3.0846, "step": 30647 }, { "epoch": 1.5, "grad_norm": 0.5756219625473022, "learning_rate": 0.0002993764547372733, "loss": 2.9358, "step": 30648 }, { "epoch": 1.5, "grad_norm": 0.5979821681976318, "learning_rate": 0.0002993610585809456, "loss": 3.1195, "step": 30649 }, { "epoch": 1.5, "grad_norm": 0.6420942544937134, "learning_rate": 0.00029934566242630084, "loss": 3.1625, "step": 30650 }, { "epoch": 1.5, "grad_norm": 0.7682424783706665, "learning_rate": 0.0002993302662733795, "loss": 3.1441, "step": 30651 }, { "epoch": 1.5, "grad_norm": 0.5857537984848022, "learning_rate": 0.0002993148701222219, "loss": 2.9752, "step": 30652 }, { "epoch": 1.5, "grad_norm": 0.6025992035865784, "learning_rate": 0.00029929947397286903, "loss": 3.0435, "step": 30653 }, { "epoch": 1.5, "grad_norm": 0.6120580434799194, "learning_rate": 0.00029928407782536115, "loss": 3.14, "step": 30654 }, { "epoch": 1.5, "grad_norm": 0.5636025667190552, "learning_rate": 0.0002992686816797389, "loss": 3.1197, "step": 30655 }, { "epoch": 1.5, "grad_norm": 0.5800879001617432, "learning_rate": 0.0002992532855360427, "loss": 2.947, "step": 30656 }, { "epoch": 1.5, "grad_norm": 0.6197709441184998, "learning_rate": 0.00029923788939431324, "loss": 3.0448, "step": 30657 }, { "epoch": 1.5, "grad_norm": 0.6334044337272644, "learning_rate": 0.0002992224932545911, "loss": 2.8652, "step": 30658 }, { "epoch": 1.5, "grad_norm": 0.6068458557128906, "learning_rate": 0.0002992070971169166, "loss": 2.781, "step": 30659 }, { "epoch": 1.5, "grad_norm": 0.6140934824943542, "learning_rate": 0.0002991917009813307, "loss": 2.8921, "step": 30660 }, { "epoch": 1.5, "grad_norm": 0.593880295753479, "learning_rate": 0.00029917630484787357, "loss": 3.1028, "step": 30661 }, { "epoch": 1.5, "grad_norm": 0.5681495666503906, "learning_rate": 0.0002991609087165858, "loss": 3.0147, "step": 30662 }, { "epoch": 1.5, "grad_norm": 0.6396278738975525, "learning_rate": 0.0002991455125875082, "loss": 3.0758, "step": 30663 }, { "epoch": 1.5, "grad_norm": 0.6014585494995117, "learning_rate": 0.00029913011646068104, "loss": 3.1103, "step": 30664 }, { "epoch": 1.5, "grad_norm": 0.6441971659660339, "learning_rate": 0.0002991147203361451, "loss": 2.9616, "step": 30665 }, { "epoch": 1.5, "grad_norm": 0.5746193528175354, "learning_rate": 0.00029909932421394073, "loss": 2.9842, "step": 30666 }, { "epoch": 1.5, "grad_norm": 0.6001632213592529, "learning_rate": 0.0002990839280941086, "loss": 3.1733, "step": 30667 }, { "epoch": 1.5, "grad_norm": 0.5855227708816528, "learning_rate": 0.00029906853197668925, "loss": 3.0555, "step": 30668 }, { "epoch": 1.5, "grad_norm": 0.6073086857795715, "learning_rate": 0.0002990531358617231, "loss": 2.9114, "step": 30669 }, { "epoch": 1.5, "grad_norm": 0.6142853498458862, "learning_rate": 0.00029903773974925094, "loss": 3.2542, "step": 30670 }, { "epoch": 1.5, "grad_norm": 0.6134935617446899, "learning_rate": 0.00029902234363931305, "loss": 3.0456, "step": 30671 }, { "epoch": 1.5, "grad_norm": 0.6078312993049622, "learning_rate": 0.0002990069475319502, "loss": 3.1121, "step": 30672 }, { "epoch": 1.5, "grad_norm": 0.5812007784843445, "learning_rate": 0.0002989915514272029, "loss": 2.9169, "step": 30673 }, { "epoch": 1.5, "grad_norm": 0.5966639518737793, "learning_rate": 0.00029897615532511156, "loss": 3.0635, "step": 30674 }, { "epoch": 1.5, "grad_norm": 0.6440772414207458, "learning_rate": 0.00029896075922571693, "loss": 3.085, "step": 30675 }, { "epoch": 1.5, "grad_norm": 0.6082709431648254, "learning_rate": 0.00029894536312905943, "loss": 3.0807, "step": 30676 }, { "epoch": 1.5, "grad_norm": 0.585754930973053, "learning_rate": 0.0002989299670351795, "loss": 3.2426, "step": 30677 }, { "epoch": 1.5, "grad_norm": 0.5902261734008789, "learning_rate": 0.00029891457094411805, "loss": 2.7597, "step": 30678 }, { "epoch": 1.5, "grad_norm": 0.6010871529579163, "learning_rate": 0.0002988991748559153, "loss": 2.9683, "step": 30679 }, { "epoch": 1.5, "grad_norm": 0.6037760972976685, "learning_rate": 0.00029888377877061197, "loss": 3.1568, "step": 30680 }, { "epoch": 1.5, "grad_norm": 0.6149284839630127, "learning_rate": 0.0002988683826882484, "loss": 3.2423, "step": 30681 }, { "epoch": 1.5, "grad_norm": 0.897918164730072, "learning_rate": 0.00029885298660886546, "loss": 2.9964, "step": 30682 }, { "epoch": 1.5, "grad_norm": 0.5681318044662476, "learning_rate": 0.0002988375905325035, "loss": 3.1145, "step": 30683 }, { "epoch": 1.5, "grad_norm": 0.6103178262710571, "learning_rate": 0.00029882219445920305, "loss": 2.9291, "step": 30684 }, { "epoch": 1.5, "grad_norm": 0.5969374179840088, "learning_rate": 0.0002988067983890048, "loss": 3.1072, "step": 30685 }, { "epoch": 1.5, "grad_norm": 0.5936666131019592, "learning_rate": 0.00029879140232194916, "loss": 3.129, "step": 30686 }, { "epoch": 1.5, "grad_norm": 0.6052805185317993, "learning_rate": 0.0002987760062580768, "loss": 2.9546, "step": 30687 }, { "epoch": 1.5, "grad_norm": 0.5850530862808228, "learning_rate": 0.00029876061019742803, "loss": 2.9499, "step": 30688 }, { "epoch": 1.5, "grad_norm": 0.5777554512023926, "learning_rate": 0.0002987452141400436, "loss": 2.9389, "step": 30689 }, { "epoch": 1.5, "grad_norm": 0.5716623067855835, "learning_rate": 0.0002987298180859642, "loss": 2.9837, "step": 30690 }, { "epoch": 1.5, "grad_norm": 0.5977976322174072, "learning_rate": 0.0002987144220352301, "loss": 3.116, "step": 30691 }, { "epoch": 1.5, "grad_norm": 0.6488404273986816, "learning_rate": 0.000298699025987882, "loss": 3.0038, "step": 30692 }, { "epoch": 1.5, "grad_norm": 0.6428252458572388, "learning_rate": 0.0002986836299439604, "loss": 3.0782, "step": 30693 }, { "epoch": 1.5, "grad_norm": 0.6638103127479553, "learning_rate": 0.0002986682339035058, "loss": 3.1157, "step": 30694 }, { "epoch": 1.5, "grad_norm": 0.5968785285949707, "learning_rate": 0.00029865283786655896, "loss": 3.2548, "step": 30695 }, { "epoch": 1.5, "grad_norm": 0.6197184324264526, "learning_rate": 0.00029863744183316015, "loss": 3.1877, "step": 30696 }, { "epoch": 1.5, "grad_norm": 0.6367472410202026, "learning_rate": 0.00029862204580335017, "loss": 3.0486, "step": 30697 }, { "epoch": 1.5, "grad_norm": 0.644905149936676, "learning_rate": 0.00029860664977716944, "loss": 2.8002, "step": 30698 }, { "epoch": 1.5, "grad_norm": 0.6608884334564209, "learning_rate": 0.0002985912537546584, "loss": 3.0254, "step": 30699 }, { "epoch": 1.5, "grad_norm": 0.6148144006729126, "learning_rate": 0.00029857585773585785, "loss": 3.0313, "step": 30700 }, { "epoch": 1.5, "grad_norm": 0.6347461342811584, "learning_rate": 0.0002985604617208082, "loss": 2.7207, "step": 30701 }, { "epoch": 1.5, "grad_norm": 0.6112677454948425, "learning_rate": 0.00029854506570955005, "loss": 3.0828, "step": 30702 }, { "epoch": 1.5, "grad_norm": 0.5936033129692078, "learning_rate": 0.0002985296697021238, "loss": 3.1319, "step": 30703 }, { "epoch": 1.5, "grad_norm": 0.6078819632530212, "learning_rate": 0.0002985142736985702, "loss": 3.0869, "step": 30704 }, { "epoch": 1.5, "grad_norm": 0.6138762831687927, "learning_rate": 0.00029849887769892977, "loss": 2.7921, "step": 30705 }, { "epoch": 1.5, "grad_norm": 0.6070495247840881, "learning_rate": 0.0002984834817032429, "loss": 3.1173, "step": 30706 }, { "epoch": 1.5, "grad_norm": 0.6134839653968811, "learning_rate": 0.0002984680857115503, "loss": 2.9879, "step": 30707 }, { "epoch": 1.5, "grad_norm": 0.5913065671920776, "learning_rate": 0.00029845268972389247, "loss": 3.2981, "step": 30708 }, { "epoch": 1.5, "grad_norm": 0.5801486372947693, "learning_rate": 0.00029843729374030983, "loss": 3.3145, "step": 30709 }, { "epoch": 1.51, "grad_norm": 0.6102262139320374, "learning_rate": 0.00029842189776084325, "loss": 3.0449, "step": 30710 }, { "epoch": 1.51, "grad_norm": 0.5686670541763306, "learning_rate": 0.0002984065017855329, "loss": 2.991, "step": 30711 }, { "epoch": 1.51, "grad_norm": 0.6398444175720215, "learning_rate": 0.00029839110581441974, "loss": 3.1699, "step": 30712 }, { "epoch": 1.51, "grad_norm": 0.5702043175697327, "learning_rate": 0.00029837570984754395, "loss": 2.7375, "step": 30713 }, { "epoch": 1.51, "grad_norm": 0.5870870351791382, "learning_rate": 0.0002983603138849462, "loss": 3.0903, "step": 30714 }, { "epoch": 1.51, "grad_norm": 0.5614882707595825, "learning_rate": 0.00029834491792666716, "loss": 3.1141, "step": 30715 }, { "epoch": 1.51, "grad_norm": 0.56441730260849, "learning_rate": 0.00029832952197274726, "loss": 3.1426, "step": 30716 }, { "epoch": 1.51, "grad_norm": 0.5936001539230347, "learning_rate": 0.00029831412602322716, "loss": 3.0108, "step": 30717 }, { "epoch": 1.51, "grad_norm": 0.5969215035438538, "learning_rate": 0.00029829873007814714, "loss": 3.0834, "step": 30718 }, { "epoch": 1.51, "grad_norm": 0.6016257405281067, "learning_rate": 0.000298283334137548, "loss": 2.9115, "step": 30719 }, { "epoch": 1.51, "grad_norm": 0.6029444336891174, "learning_rate": 0.00029826793820147035, "loss": 3.0821, "step": 30720 }, { "epoch": 1.51, "grad_norm": 0.5698739886283875, "learning_rate": 0.00029825254226995446, "loss": 3.0591, "step": 30721 }, { "epoch": 1.51, "grad_norm": 0.595145046710968, "learning_rate": 0.00029823714634304117, "loss": 3.0994, "step": 30722 }, { "epoch": 1.51, "grad_norm": 0.6198235750198364, "learning_rate": 0.0002982217504207708, "loss": 2.9106, "step": 30723 }, { "epoch": 1.51, "grad_norm": 0.5928401350975037, "learning_rate": 0.000298206354503184, "loss": 2.9303, "step": 30724 }, { "epoch": 1.51, "grad_norm": 0.5822376608848572, "learning_rate": 0.0002981909585903214, "loss": 3.0786, "step": 30725 }, { "epoch": 1.51, "grad_norm": 0.5875365138053894, "learning_rate": 0.00029817556268222343, "loss": 2.81, "step": 30726 }, { "epoch": 1.51, "grad_norm": 0.6183528900146484, "learning_rate": 0.00029816016677893073, "loss": 3.1806, "step": 30727 }, { "epoch": 1.51, "grad_norm": 0.5537392497062683, "learning_rate": 0.00029814477088048366, "loss": 3.1093, "step": 30728 }, { "epoch": 1.51, "grad_norm": 0.5720838904380798, "learning_rate": 0.000298129374986923, "loss": 3.1148, "step": 30729 }, { "epoch": 1.51, "grad_norm": 0.6122817397117615, "learning_rate": 0.00029811397909828923, "loss": 3.0031, "step": 30730 }, { "epoch": 1.51, "grad_norm": 0.5819237232208252, "learning_rate": 0.00029809858321462276, "loss": 3.1225, "step": 30731 }, { "epoch": 1.51, "grad_norm": 0.6177088618278503, "learning_rate": 0.0002980831873359644, "loss": 2.9787, "step": 30732 }, { "epoch": 1.51, "grad_norm": 0.6123387813568115, "learning_rate": 0.00029806779146235437, "loss": 3.1079, "step": 30733 }, { "epoch": 1.51, "grad_norm": 0.6111491918563843, "learning_rate": 0.0002980523955938335, "loss": 2.9765, "step": 30734 }, { "epoch": 1.51, "grad_norm": 0.566396176815033, "learning_rate": 0.0002980369997304423, "loss": 2.815, "step": 30735 }, { "epoch": 1.51, "grad_norm": 0.588200032711029, "learning_rate": 0.0002980216038722212, "loss": 3.112, "step": 30736 }, { "epoch": 1.51, "grad_norm": 0.5717114806175232, "learning_rate": 0.00029800620801921087, "loss": 3.0413, "step": 30737 }, { "epoch": 1.51, "grad_norm": 0.6023069620132446, "learning_rate": 0.00029799081217145177, "loss": 3.1242, "step": 30738 }, { "epoch": 1.51, "grad_norm": 0.5799679756164551, "learning_rate": 0.00029797541632898444, "loss": 2.8477, "step": 30739 }, { "epoch": 1.51, "grad_norm": 0.5831611752510071, "learning_rate": 0.0002979600204918496, "loss": 3.0047, "step": 30740 }, { "epoch": 1.51, "grad_norm": 0.6182281374931335, "learning_rate": 0.00029794462466008756, "loss": 3.0421, "step": 30741 }, { "epoch": 1.51, "grad_norm": 0.5878432989120483, "learning_rate": 0.00029792922883373907, "loss": 2.879, "step": 30742 }, { "epoch": 1.51, "grad_norm": 0.5995323061943054, "learning_rate": 0.0002979138330128445, "loss": 2.9612, "step": 30743 }, { "epoch": 1.51, "grad_norm": 0.5848367810249329, "learning_rate": 0.00029789843719744453, "loss": 3.1774, "step": 30744 }, { "epoch": 1.51, "grad_norm": 0.5988871455192566, "learning_rate": 0.00029788304138757976, "loss": 3.0458, "step": 30745 }, { "epoch": 1.51, "grad_norm": 0.6023797988891602, "learning_rate": 0.0002978676455832905, "loss": 3.1648, "step": 30746 }, { "epoch": 1.51, "grad_norm": 0.6229612827301025, "learning_rate": 0.00029785224978461756, "loss": 3.0046, "step": 30747 }, { "epoch": 1.51, "grad_norm": 0.6516324877738953, "learning_rate": 0.00029783685399160134, "loss": 3.0324, "step": 30748 }, { "epoch": 1.51, "grad_norm": 0.6502434611320496, "learning_rate": 0.0002978214582042824, "loss": 2.9143, "step": 30749 }, { "epoch": 1.51, "grad_norm": 0.5838092565536499, "learning_rate": 0.00029780606242270135, "loss": 2.9889, "step": 30750 }, { "epoch": 1.51, "grad_norm": 0.5811021327972412, "learning_rate": 0.0002977906666468987, "loss": 3.0722, "step": 30751 }, { "epoch": 1.51, "grad_norm": 0.5942781567573547, "learning_rate": 0.0002977752708769151, "loss": 2.9056, "step": 30752 }, { "epoch": 1.51, "grad_norm": 0.5923861861228943, "learning_rate": 0.00029775987511279086, "loss": 3.1602, "step": 30753 }, { "epoch": 1.51, "grad_norm": 0.6492545008659363, "learning_rate": 0.00029774447935456663, "loss": 3.1609, "step": 30754 }, { "epoch": 1.51, "grad_norm": 0.6185020804405212, "learning_rate": 0.00029772908360228324, "loss": 2.8854, "step": 30755 }, { "epoch": 1.51, "grad_norm": 0.6205618381500244, "learning_rate": 0.00029771368785598085, "loss": 2.8574, "step": 30756 }, { "epoch": 1.51, "grad_norm": 0.590815544128418, "learning_rate": 0.0002976982921157003, "loss": 2.9544, "step": 30757 }, { "epoch": 1.51, "grad_norm": 0.6202199459075928, "learning_rate": 0.0002976828963814818, "loss": 3.0796, "step": 30758 }, { "epoch": 1.51, "grad_norm": 0.6064924597740173, "learning_rate": 0.00029766750065336625, "loss": 2.9535, "step": 30759 }, { "epoch": 1.51, "grad_norm": 0.6110441088676453, "learning_rate": 0.00029765210493139406, "loss": 3.1612, "step": 30760 }, { "epoch": 1.51, "grad_norm": 0.5866239666938782, "learning_rate": 0.00029763670921560567, "loss": 2.9289, "step": 30761 }, { "epoch": 1.51, "grad_norm": 0.5617367625236511, "learning_rate": 0.00029762131350604185, "loss": 3.1119, "step": 30762 }, { "epoch": 1.51, "grad_norm": 0.551780104637146, "learning_rate": 0.000297605917802743, "loss": 3.0988, "step": 30763 }, { "epoch": 1.51, "grad_norm": 0.5915526747703552, "learning_rate": 0.0002975905221057497, "loss": 3.0443, "step": 30764 }, { "epoch": 1.51, "grad_norm": 0.5596851110458374, "learning_rate": 0.0002975751264151024, "loss": 2.8931, "step": 30765 }, { "epoch": 1.51, "grad_norm": 0.6421846151351929, "learning_rate": 0.00029755973073084186, "loss": 2.9407, "step": 30766 }, { "epoch": 1.51, "grad_norm": 0.6003358364105225, "learning_rate": 0.00029754433505300857, "loss": 3.0634, "step": 30767 }, { "epoch": 1.51, "grad_norm": 0.5622581839561462, "learning_rate": 0.00029752893938164287, "loss": 3.1398, "step": 30768 }, { "epoch": 1.51, "grad_norm": 0.6357739567756653, "learning_rate": 0.0002975135437167856, "loss": 3.0894, "step": 30769 }, { "epoch": 1.51, "grad_norm": 0.6039325594902039, "learning_rate": 0.0002974981480584771, "loss": 3.1304, "step": 30770 }, { "epoch": 1.51, "grad_norm": 0.5737800002098083, "learning_rate": 0.00029748275240675795, "loss": 2.8875, "step": 30771 }, { "epoch": 1.51, "grad_norm": 0.6089869141578674, "learning_rate": 0.0002974673567616689, "loss": 2.9146, "step": 30772 }, { "epoch": 1.51, "grad_norm": 0.6089136600494385, "learning_rate": 0.0002974519611232502, "loss": 3.2106, "step": 30773 }, { "epoch": 1.51, "grad_norm": 0.5950151681900024, "learning_rate": 0.00029743656549154263, "loss": 2.8026, "step": 30774 }, { "epoch": 1.51, "grad_norm": 0.6248009204864502, "learning_rate": 0.00029742116986658664, "loss": 3.1683, "step": 30775 }, { "epoch": 1.51, "grad_norm": 0.6114397048950195, "learning_rate": 0.00029740577424842274, "loss": 2.9792, "step": 30776 }, { "epoch": 1.51, "grad_norm": 0.6554459929466248, "learning_rate": 0.00029739037863709165, "loss": 3.2027, "step": 30777 }, { "epoch": 1.51, "grad_norm": 0.6299651265144348, "learning_rate": 0.00029737498303263374, "loss": 3.015, "step": 30778 }, { "epoch": 1.51, "grad_norm": 0.6014512777328491, "learning_rate": 0.00029735958743508964, "loss": 3.058, "step": 30779 }, { "epoch": 1.51, "grad_norm": 0.6071848273277283, "learning_rate": 0.00029734419184449976, "loss": 3.0521, "step": 30780 }, { "epoch": 1.51, "grad_norm": 0.6110485792160034, "learning_rate": 0.00029732879626090484, "loss": 2.9316, "step": 30781 }, { "epoch": 1.51, "grad_norm": 0.5651352405548096, "learning_rate": 0.0002973134006843454, "loss": 3.0355, "step": 30782 }, { "epoch": 1.51, "grad_norm": 0.6445112824440002, "learning_rate": 0.00029729800511486186, "loss": 3.1326, "step": 30783 }, { "epoch": 1.51, "grad_norm": 0.582943320274353, "learning_rate": 0.00029728260955249496, "loss": 3.223, "step": 30784 }, { "epoch": 1.51, "grad_norm": 0.611379861831665, "learning_rate": 0.0002972672139972851, "loss": 2.8036, "step": 30785 }, { "epoch": 1.51, "grad_norm": 0.6126112937927246, "learning_rate": 0.0002972518184492728, "loss": 2.797, "step": 30786 }, { "epoch": 1.51, "grad_norm": 0.5877750515937805, "learning_rate": 0.0002972364229084988, "loss": 2.9347, "step": 30787 }, { "epoch": 1.51, "grad_norm": 0.6406726837158203, "learning_rate": 0.0002972210273750035, "loss": 3.1474, "step": 30788 }, { "epoch": 1.51, "grad_norm": 0.6040596961975098, "learning_rate": 0.0002972056318488275, "loss": 3.1666, "step": 30789 }, { "epoch": 1.51, "grad_norm": 0.6338227987289429, "learning_rate": 0.0002971902363300112, "loss": 3.1337, "step": 30790 }, { "epoch": 1.51, "grad_norm": 0.6211509704589844, "learning_rate": 0.00029717484081859534, "loss": 2.8265, "step": 30791 }, { "epoch": 1.51, "grad_norm": 0.5889843106269836, "learning_rate": 0.0002971594453146205, "loss": 3.1715, "step": 30792 }, { "epoch": 1.51, "grad_norm": 0.5887772440910339, "learning_rate": 0.00029714404981812695, "loss": 2.9016, "step": 30793 }, { "epoch": 1.51, "grad_norm": 0.5816041231155396, "learning_rate": 0.00029712865432915566, "loss": 2.9766, "step": 30794 }, { "epoch": 1.51, "grad_norm": 0.5636284947395325, "learning_rate": 0.00029711325884774673, "loss": 3.1463, "step": 30795 }, { "epoch": 1.51, "grad_norm": 0.5685577392578125, "learning_rate": 0.000297097863373941, "loss": 2.8435, "step": 30796 }, { "epoch": 1.51, "grad_norm": 0.6058424115180969, "learning_rate": 0.000297082467907779, "loss": 3.2246, "step": 30797 }, { "epoch": 1.51, "grad_norm": 0.6104329824447632, "learning_rate": 0.0002970670724493011, "loss": 3.0745, "step": 30798 }, { "epoch": 1.51, "grad_norm": 0.5837059020996094, "learning_rate": 0.00029705167699854804, "loss": 2.9483, "step": 30799 }, { "epoch": 1.51, "grad_norm": 0.6254569888114929, "learning_rate": 0.0002970362815555603, "loss": 2.8121, "step": 30800 }, { "epoch": 1.51, "grad_norm": 0.5854609608650208, "learning_rate": 0.00029702088612037835, "loss": 3.0961, "step": 30801 }, { "epoch": 1.51, "grad_norm": 0.5687035322189331, "learning_rate": 0.0002970054906930429, "loss": 2.8367, "step": 30802 }, { "epoch": 1.51, "grad_norm": 0.5669450759887695, "learning_rate": 0.00029699009527359437, "loss": 3.0268, "step": 30803 }, { "epoch": 1.51, "grad_norm": 0.5802821516990662, "learning_rate": 0.00029697469986207344, "loss": 3.1936, "step": 30804 }, { "epoch": 1.51, "grad_norm": 0.6279524564743042, "learning_rate": 0.0002969593044585204, "loss": 3.0683, "step": 30805 }, { "epoch": 1.51, "grad_norm": 0.5692258477210999, "learning_rate": 0.0002969439090629761, "loss": 3.2225, "step": 30806 }, { "epoch": 1.51, "grad_norm": 0.5868411064147949, "learning_rate": 0.00029692851367548096, "loss": 2.9523, "step": 30807 }, { "epoch": 1.51, "grad_norm": 0.5972650647163391, "learning_rate": 0.0002969131182960754, "loss": 2.8724, "step": 30808 }, { "epoch": 1.51, "grad_norm": 0.6030793786048889, "learning_rate": 0.0002968977229248002, "loss": 3.2454, "step": 30809 }, { "epoch": 1.51, "grad_norm": 0.6051518321037292, "learning_rate": 0.00029688232756169575, "loss": 3.1048, "step": 30810 }, { "epoch": 1.51, "grad_norm": 0.5919243097305298, "learning_rate": 0.0002968669322068026, "loss": 3.0596, "step": 30811 }, { "epoch": 1.51, "grad_norm": 0.6317167282104492, "learning_rate": 0.00029685153686016146, "loss": 2.9332, "step": 30812 }, { "epoch": 1.51, "grad_norm": 0.6132222414016724, "learning_rate": 0.0002968361415218127, "loss": 3.0974, "step": 30813 }, { "epoch": 1.51, "grad_norm": 0.621911346912384, "learning_rate": 0.000296820746191797, "loss": 3.1756, "step": 30814 }, { "epoch": 1.51, "grad_norm": 0.6402205228805542, "learning_rate": 0.0002968053508701548, "loss": 3.0177, "step": 30815 }, { "epoch": 1.51, "grad_norm": 0.5846377015113831, "learning_rate": 0.0002967899555569266, "loss": 3.0623, "step": 30816 }, { "epoch": 1.51, "grad_norm": 0.6023883819580078, "learning_rate": 0.00029677456025215324, "loss": 2.9456, "step": 30817 }, { "epoch": 1.51, "grad_norm": 0.6518478393554688, "learning_rate": 0.0002967591649558749, "loss": 3.2577, "step": 30818 }, { "epoch": 1.51, "grad_norm": 0.5860528945922852, "learning_rate": 0.0002967437696681324, "loss": 2.9268, "step": 30819 }, { "epoch": 1.51, "grad_norm": 0.5669897198677063, "learning_rate": 0.0002967283743889661, "loss": 2.9486, "step": 30820 }, { "epoch": 1.51, "grad_norm": 0.5880569815635681, "learning_rate": 0.00029671297911841663, "loss": 3.3732, "step": 30821 }, { "epoch": 1.51, "grad_norm": 0.588849663734436, "learning_rate": 0.00029669758385652466, "loss": 3.0296, "step": 30822 }, { "epoch": 1.51, "grad_norm": 0.5880824327468872, "learning_rate": 0.0002966821886033305, "loss": 3.1179, "step": 30823 }, { "epoch": 1.51, "grad_norm": 0.5957514047622681, "learning_rate": 0.00029666679335887486, "loss": 3.0971, "step": 30824 }, { "epoch": 1.51, "grad_norm": 0.646440327167511, "learning_rate": 0.00029665139812319824, "loss": 3.0703, "step": 30825 }, { "epoch": 1.51, "grad_norm": 0.6188963055610657, "learning_rate": 0.0002966360028963412, "loss": 2.8634, "step": 30826 }, { "epoch": 1.51, "grad_norm": 0.5998916625976562, "learning_rate": 0.00029662060767834427, "loss": 3.0819, "step": 30827 }, { "epoch": 1.51, "grad_norm": 0.5960819721221924, "learning_rate": 0.000296605212469248, "loss": 3.1007, "step": 30828 }, { "epoch": 1.51, "grad_norm": 0.6161120533943176, "learning_rate": 0.00029658981726909305, "loss": 2.9573, "step": 30829 }, { "epoch": 1.51, "grad_norm": 0.6249703168869019, "learning_rate": 0.0002965744220779197, "loss": 3.0573, "step": 30830 }, { "epoch": 1.51, "grad_norm": 0.6068581342697144, "learning_rate": 0.0002965590268957687, "loss": 3.1307, "step": 30831 }, { "epoch": 1.51, "grad_norm": 0.5972431302070618, "learning_rate": 0.0002965436317226807, "loss": 3.2302, "step": 30832 }, { "epoch": 1.51, "grad_norm": 0.5628926753997803, "learning_rate": 0.0002965282365586959, "loss": 3.204, "step": 30833 }, { "epoch": 1.51, "grad_norm": 0.6214185357093811, "learning_rate": 0.00029651284140385525, "loss": 3.0294, "step": 30834 }, { "epoch": 1.51, "grad_norm": 0.6001722812652588, "learning_rate": 0.0002964974462581989, "loss": 2.8884, "step": 30835 }, { "epoch": 1.51, "grad_norm": 0.6169463396072388, "learning_rate": 0.0002964820511217678, "loss": 2.9654, "step": 30836 }, { "epoch": 1.51, "grad_norm": 0.5828376412391663, "learning_rate": 0.00029646665599460223, "loss": 3.0695, "step": 30837 }, { "epoch": 1.51, "grad_norm": 0.6448942422866821, "learning_rate": 0.0002964512608767427, "loss": 3.1534, "step": 30838 }, { "epoch": 1.51, "grad_norm": 0.5935970544815063, "learning_rate": 0.00029643586576823006, "loss": 2.8783, "step": 30839 }, { "epoch": 1.51, "grad_norm": 0.6004209518432617, "learning_rate": 0.0002964204706691046, "loss": 2.9735, "step": 30840 }, { "epoch": 1.51, "grad_norm": 0.5771257877349854, "learning_rate": 0.0002964050755794068, "loss": 2.7251, "step": 30841 }, { "epoch": 1.51, "grad_norm": 0.5918455123901367, "learning_rate": 0.0002963896804991775, "loss": 2.9045, "step": 30842 }, { "epoch": 1.51, "grad_norm": 0.5754095911979675, "learning_rate": 0.000296374285428457, "loss": 3.2429, "step": 30843 }, { "epoch": 1.51, "grad_norm": 0.5826681852340698, "learning_rate": 0.000296358890367286, "loss": 3.0659, "step": 30844 }, { "epoch": 1.51, "grad_norm": 0.610785722732544, "learning_rate": 0.00029634349531570487, "loss": 3.0146, "step": 30845 }, { "epoch": 1.51, "grad_norm": 0.5650796890258789, "learning_rate": 0.0002963281002737544, "loss": 2.9735, "step": 30846 }, { "epoch": 1.51, "grad_norm": 0.6261953711509705, "learning_rate": 0.000296312705241475, "loss": 2.9293, "step": 30847 }, { "epoch": 1.51, "grad_norm": 0.5803135633468628, "learning_rate": 0.00029629731021890704, "loss": 3.1329, "step": 30848 }, { "epoch": 1.51, "grad_norm": 0.611264169216156, "learning_rate": 0.0002962819152060915, "loss": 3.0638, "step": 30849 }, { "epoch": 1.51, "grad_norm": 0.587598979473114, "learning_rate": 0.00029626652020306855, "loss": 3.1472, "step": 30850 }, { "epoch": 1.51, "grad_norm": 0.6119747161865234, "learning_rate": 0.0002962511252098789, "loss": 3.0915, "step": 30851 }, { "epoch": 1.51, "grad_norm": 0.6030398011207581, "learning_rate": 0.000296235730226563, "loss": 2.7529, "step": 30852 }, { "epoch": 1.51, "grad_norm": 0.6091843843460083, "learning_rate": 0.0002962203352531615, "loss": 3.1295, "step": 30853 }, { "epoch": 1.51, "grad_norm": 0.5616978406906128, "learning_rate": 0.00029620494028971496, "loss": 3.268, "step": 30854 }, { "epoch": 1.51, "grad_norm": 0.6114065647125244, "learning_rate": 0.0002961895453362638, "loss": 3.12, "step": 30855 }, { "epoch": 1.51, "grad_norm": 0.6025994420051575, "learning_rate": 0.00029617415039284874, "loss": 2.9599, "step": 30856 }, { "epoch": 1.51, "grad_norm": 0.5990439057350159, "learning_rate": 0.0002961587554595101, "loss": 3.1287, "step": 30857 }, { "epoch": 1.51, "grad_norm": 0.5876929759979248, "learning_rate": 0.0002961433605362886, "loss": 3.0938, "step": 30858 }, { "epoch": 1.51, "grad_norm": 0.6120889186859131, "learning_rate": 0.00029612796562322486, "loss": 3.1546, "step": 30859 }, { "epoch": 1.51, "grad_norm": 0.5933310985565186, "learning_rate": 0.00029611257072035917, "loss": 3.0761, "step": 30860 }, { "epoch": 1.51, "grad_norm": 0.646134614944458, "learning_rate": 0.0002960971758277323, "loss": 3.1372, "step": 30861 }, { "epoch": 1.51, "grad_norm": 0.5914332866668701, "learning_rate": 0.0002960817809453847, "loss": 2.8682, "step": 30862 }, { "epoch": 1.51, "grad_norm": 0.6064649820327759, "learning_rate": 0.00029606638607335687, "loss": 3.0682, "step": 30863 }, { "epoch": 1.51, "grad_norm": 0.5667405128479004, "learning_rate": 0.00029605099121168956, "loss": 3.1389, "step": 30864 }, { "epoch": 1.51, "grad_norm": 0.5688351988792419, "learning_rate": 0.0002960355963604231, "loss": 3.0559, "step": 30865 }, { "epoch": 1.51, "grad_norm": 0.5366978645324707, "learning_rate": 0.0002960202015195982, "loss": 3.0342, "step": 30866 }, { "epoch": 1.51, "grad_norm": 0.6322748064994812, "learning_rate": 0.00029600480668925516, "loss": 3.4253, "step": 30867 }, { "epoch": 1.51, "grad_norm": 0.6188519597053528, "learning_rate": 0.00029598941186943475, "loss": 3.2374, "step": 30868 }, { "epoch": 1.51, "grad_norm": 0.567204475402832, "learning_rate": 0.00029597401706017755, "loss": 3.1257, "step": 30869 }, { "epoch": 1.51, "grad_norm": 0.6143216490745544, "learning_rate": 0.00029595862226152386, "loss": 2.9782, "step": 30870 }, { "epoch": 1.51, "grad_norm": 0.6275702118873596, "learning_rate": 0.00029594322747351453, "loss": 3.0836, "step": 30871 }, { "epoch": 1.51, "grad_norm": 0.7560949325561523, "learning_rate": 0.0002959278326961899, "loss": 3.0159, "step": 30872 }, { "epoch": 1.51, "grad_norm": 0.6060622334480286, "learning_rate": 0.0002959124379295905, "loss": 3.2309, "step": 30873 }, { "epoch": 1.51, "grad_norm": 0.5544068813323975, "learning_rate": 0.0002958970431737571, "loss": 2.9914, "step": 30874 }, { "epoch": 1.51, "grad_norm": 0.5971145629882812, "learning_rate": 0.00029588164842872994, "loss": 3.0419, "step": 30875 }, { "epoch": 1.51, "grad_norm": 0.6468105912208557, "learning_rate": 0.0002958662536945499, "loss": 2.7897, "step": 30876 }, { "epoch": 1.51, "grad_norm": 0.6096239686012268, "learning_rate": 0.0002958508589712572, "loss": 3.1458, "step": 30877 }, { "epoch": 1.51, "grad_norm": 0.6860781908035278, "learning_rate": 0.00029583546425889255, "loss": 2.8722, "step": 30878 }, { "epoch": 1.51, "grad_norm": 0.5959381461143494, "learning_rate": 0.0002958200695574966, "loss": 3.0762, "step": 30879 }, { "epoch": 1.51, "grad_norm": 0.587128221988678, "learning_rate": 0.00029580467486710967, "loss": 3.1168, "step": 30880 }, { "epoch": 1.51, "grad_norm": 0.6013617515563965, "learning_rate": 0.00029578928018777253, "loss": 3.0795, "step": 30881 }, { "epoch": 1.51, "grad_norm": 0.6354458928108215, "learning_rate": 0.0002957738855195255, "loss": 3.2363, "step": 30882 }, { "epoch": 1.51, "grad_norm": 0.6061522960662842, "learning_rate": 0.0002957584908624093, "loss": 3.1117, "step": 30883 }, { "epoch": 1.51, "grad_norm": 0.6070138216018677, "learning_rate": 0.00029574309621646443, "loss": 2.9919, "step": 30884 }, { "epoch": 1.51, "grad_norm": 0.6107861399650574, "learning_rate": 0.0002957277015817314, "loss": 3.265, "step": 30885 }, { "epoch": 1.51, "grad_norm": 0.5738034844398499, "learning_rate": 0.0002957123069582508, "loss": 3.1544, "step": 30886 }, { "epoch": 1.51, "grad_norm": 0.6216245889663696, "learning_rate": 0.00029569691234606314, "loss": 2.9596, "step": 30887 }, { "epoch": 1.51, "grad_norm": 0.6213082671165466, "learning_rate": 0.0002956815177452089, "loss": 2.9898, "step": 30888 }, { "epoch": 1.51, "grad_norm": 0.6034766435623169, "learning_rate": 0.0002956661231557289, "loss": 3.1961, "step": 30889 }, { "epoch": 1.51, "grad_norm": 0.5714133977890015, "learning_rate": 0.0002956507285776634, "loss": 3.2041, "step": 30890 }, { "epoch": 1.51, "grad_norm": 0.5667616724967957, "learning_rate": 0.0002956353340110531, "loss": 3.1601, "step": 30891 }, { "epoch": 1.51, "grad_norm": 0.5944485068321228, "learning_rate": 0.00029561993945593834, "loss": 3.0033, "step": 30892 }, { "epoch": 1.51, "grad_norm": 0.5806108117103577, "learning_rate": 0.0002956045449123599, "loss": 2.8649, "step": 30893 }, { "epoch": 1.51, "grad_norm": 0.621354877948761, "learning_rate": 0.0002955891503803583, "loss": 3.0581, "step": 30894 }, { "epoch": 1.51, "grad_norm": 0.6162626147270203, "learning_rate": 0.00029557375585997393, "loss": 2.9909, "step": 30895 }, { "epoch": 1.51, "grad_norm": 0.5693962574005127, "learning_rate": 0.00029555836135124755, "loss": 3.117, "step": 30896 }, { "epoch": 1.51, "grad_norm": 0.7049458622932434, "learning_rate": 0.00029554296685421945, "loss": 3.0019, "step": 30897 }, { "epoch": 1.51, "grad_norm": 0.5936644077301025, "learning_rate": 0.0002955275723689304, "loss": 3.3479, "step": 30898 }, { "epoch": 1.51, "grad_norm": 0.5873288512229919, "learning_rate": 0.00029551217789542094, "loss": 2.8661, "step": 30899 }, { "epoch": 1.51, "grad_norm": 0.5874931812286377, "learning_rate": 0.00029549678343373137, "loss": 3.2489, "step": 30900 }, { "epoch": 1.51, "grad_norm": 0.6320956945419312, "learning_rate": 0.00029548138898390254, "loss": 2.8875, "step": 30901 }, { "epoch": 1.51, "grad_norm": 0.5752156972885132, "learning_rate": 0.00029546599454597484, "loss": 2.9791, "step": 30902 }, { "epoch": 1.51, "grad_norm": 0.5847448110580444, "learning_rate": 0.00029545060011998875, "loss": 2.9069, "step": 30903 }, { "epoch": 1.51, "grad_norm": 0.5820603966712952, "learning_rate": 0.00029543520570598505, "loss": 3.1561, "step": 30904 }, { "epoch": 1.51, "grad_norm": 0.6131413578987122, "learning_rate": 0.000295419811304004, "loss": 3.1054, "step": 30905 }, { "epoch": 1.51, "grad_norm": 0.6036339402198792, "learning_rate": 0.0002954044169140864, "loss": 2.9931, "step": 30906 }, { "epoch": 1.51, "grad_norm": 0.6479763984680176, "learning_rate": 0.0002953890225362726, "loss": 3.1477, "step": 30907 }, { "epoch": 1.51, "grad_norm": 0.5721249580383301, "learning_rate": 0.00029537362817060324, "loss": 3.0113, "step": 30908 }, { "epoch": 1.51, "grad_norm": 0.6328310370445251, "learning_rate": 0.0002953582338171189, "loss": 2.9072, "step": 30909 }, { "epoch": 1.51, "grad_norm": 0.6006953716278076, "learning_rate": 0.00029534283947586, "loss": 2.95, "step": 30910 }, { "epoch": 1.51, "grad_norm": 0.6052888631820679, "learning_rate": 0.00029532744514686724, "loss": 2.8012, "step": 30911 }, { "epoch": 1.51, "grad_norm": 0.6413729190826416, "learning_rate": 0.00029531205083018104, "loss": 3.1825, "step": 30912 }, { "epoch": 1.51, "grad_norm": 0.5984901785850525, "learning_rate": 0.00029529665652584195, "loss": 2.9248, "step": 30913 }, { "epoch": 1.52, "grad_norm": 0.5955290198326111, "learning_rate": 0.0002952812622338907, "loss": 2.9168, "step": 30914 }, { "epoch": 1.52, "grad_norm": 0.6276412606239319, "learning_rate": 0.00029526586795436757, "loss": 3.1651, "step": 30915 }, { "epoch": 1.52, "grad_norm": 0.5960054397583008, "learning_rate": 0.0002952504736873134, "loss": 3.1858, "step": 30916 }, { "epoch": 1.52, "grad_norm": 0.5850929021835327, "learning_rate": 0.0002952350794327684, "loss": 3.048, "step": 30917 }, { "epoch": 1.52, "grad_norm": 0.616568386554718, "learning_rate": 0.00029521968519077325, "loss": 2.8989, "step": 30918 }, { "epoch": 1.52, "grad_norm": 0.623950183391571, "learning_rate": 0.0002952042909613687, "loss": 3.1168, "step": 30919 }, { "epoch": 1.52, "grad_norm": 0.5852279663085938, "learning_rate": 0.000295188896744595, "loss": 3.0668, "step": 30920 }, { "epoch": 1.52, "grad_norm": 0.5998043417930603, "learning_rate": 0.00029517350254049297, "loss": 3.0312, "step": 30921 }, { "epoch": 1.52, "grad_norm": 0.6115825772285461, "learning_rate": 0.0002951581083491028, "loss": 2.8682, "step": 30922 }, { "epoch": 1.52, "grad_norm": 0.6442579627037048, "learning_rate": 0.0002951427141704654, "loss": 3.1506, "step": 30923 }, { "epoch": 1.52, "grad_norm": 0.6034064292907715, "learning_rate": 0.0002951273200046211, "loss": 3.1786, "step": 30924 }, { "epoch": 1.52, "grad_norm": 0.5963617563247681, "learning_rate": 0.0002951119258516104, "loss": 2.975, "step": 30925 }, { "epoch": 1.52, "grad_norm": 0.6269310712814331, "learning_rate": 0.0002950965317114741, "loss": 3.1549, "step": 30926 }, { "epoch": 1.52, "grad_norm": 0.5554681420326233, "learning_rate": 0.0002950811375842526, "loss": 3.099, "step": 30927 }, { "epoch": 1.52, "grad_norm": 0.6449235677719116, "learning_rate": 0.0002950657434699864, "loss": 3.0467, "step": 30928 }, { "epoch": 1.52, "grad_norm": 0.633747398853302, "learning_rate": 0.00029505034936871597, "loss": 2.9924, "step": 30929 }, { "epoch": 1.52, "grad_norm": 0.5843096971511841, "learning_rate": 0.00029503495528048206, "loss": 3.2848, "step": 30930 }, { "epoch": 1.52, "grad_norm": 0.6081466674804688, "learning_rate": 0.00029501956120532515, "loss": 3.054, "step": 30931 }, { "epoch": 1.52, "grad_norm": 0.5923753976821899, "learning_rate": 0.0002950041671432856, "loss": 2.893, "step": 30932 }, { "epoch": 1.52, "grad_norm": 0.638497531414032, "learning_rate": 0.00029498877309440435, "loss": 3.1434, "step": 30933 }, { "epoch": 1.52, "grad_norm": 0.5726613998413086, "learning_rate": 0.00029497337905872155, "loss": 3.0945, "step": 30934 }, { "epoch": 1.52, "grad_norm": 0.6029412746429443, "learning_rate": 0.0002949579850362779, "loss": 3.0213, "step": 30935 }, { "epoch": 1.52, "grad_norm": 0.6141619086265564, "learning_rate": 0.00029494259102711405, "loss": 2.8401, "step": 30936 }, { "epoch": 1.52, "grad_norm": 0.5874308347702026, "learning_rate": 0.0002949271970312703, "loss": 3.1897, "step": 30937 }, { "epoch": 1.52, "grad_norm": 0.5788091421127319, "learning_rate": 0.0002949118030487875, "loss": 3.0942, "step": 30938 }, { "epoch": 1.52, "grad_norm": 0.6286618709564209, "learning_rate": 0.00029489640907970593, "loss": 2.9367, "step": 30939 }, { "epoch": 1.52, "grad_norm": 0.588944137096405, "learning_rate": 0.00029488101512406615, "loss": 3.0126, "step": 30940 }, { "epoch": 1.52, "grad_norm": 0.5936257243156433, "learning_rate": 0.000294865621181909, "loss": 2.9992, "step": 30941 }, { "epoch": 1.52, "grad_norm": 0.5838150382041931, "learning_rate": 0.0002948502272532747, "loss": 3.0763, "step": 30942 }, { "epoch": 1.52, "grad_norm": 0.5851708650588989, "learning_rate": 0.000294834833338204, "loss": 3.0234, "step": 30943 }, { "epoch": 1.52, "grad_norm": 0.6120582818984985, "learning_rate": 0.00029481943943673715, "loss": 2.7278, "step": 30944 }, { "epoch": 1.52, "grad_norm": 0.6074814796447754, "learning_rate": 0.00029480404554891503, "loss": 3.1052, "step": 30945 }, { "epoch": 1.52, "grad_norm": 0.5912714004516602, "learning_rate": 0.00029478865167477814, "loss": 2.8922, "step": 30946 }, { "epoch": 1.52, "grad_norm": 0.5891293287277222, "learning_rate": 0.00029477325781436677, "loss": 3.0284, "step": 30947 }, { "epoch": 1.52, "grad_norm": 0.714016854763031, "learning_rate": 0.0002947578639677218, "loss": 2.9723, "step": 30948 }, { "epoch": 1.52, "grad_norm": 0.5967224836349487, "learning_rate": 0.0002947424701348835, "loss": 3.0212, "step": 30949 }, { "epoch": 1.52, "grad_norm": 0.5975646376609802, "learning_rate": 0.00029472707631589246, "loss": 3.0859, "step": 30950 }, { "epoch": 1.52, "grad_norm": 0.6335617899894714, "learning_rate": 0.00029471168251078944, "loss": 2.9693, "step": 30951 }, { "epoch": 1.52, "grad_norm": 0.6031022071838379, "learning_rate": 0.00029469628871961473, "loss": 2.8384, "step": 30952 }, { "epoch": 1.52, "grad_norm": 0.5797258019447327, "learning_rate": 0.0002946808949424091, "loss": 2.9484, "step": 30953 }, { "epoch": 1.52, "grad_norm": 0.5635722875595093, "learning_rate": 0.00029466550117921277, "loss": 3.0065, "step": 30954 }, { "epoch": 1.52, "grad_norm": 0.6008172631263733, "learning_rate": 0.00029465010743006656, "loss": 3.1646, "step": 30955 }, { "epoch": 1.52, "grad_norm": 0.5772174596786499, "learning_rate": 0.0002946347136950111, "loss": 2.8605, "step": 30956 }, { "epoch": 1.52, "grad_norm": 0.6074007749557495, "learning_rate": 0.00029461931997408654, "loss": 3.1638, "step": 30957 }, { "epoch": 1.52, "grad_norm": 0.5578022003173828, "learning_rate": 0.0002946039262673338, "loss": 2.9493, "step": 30958 }, { "epoch": 1.52, "grad_norm": 0.6266377568244934, "learning_rate": 0.0002945885325747931, "loss": 2.9573, "step": 30959 }, { "epoch": 1.52, "grad_norm": 0.6051025986671448, "learning_rate": 0.0002945731388965053, "loss": 2.8036, "step": 30960 }, { "epoch": 1.52, "grad_norm": 0.6597091555595398, "learning_rate": 0.0002945577452325109, "loss": 3.0133, "step": 30961 }, { "epoch": 1.52, "grad_norm": 0.5791193842887878, "learning_rate": 0.00029454235158285014, "loss": 3.1355, "step": 30962 }, { "epoch": 1.52, "grad_norm": 0.6521401405334473, "learning_rate": 0.00029452695794756397, "loss": 3.2595, "step": 30963 }, { "epoch": 1.52, "grad_norm": 0.614771842956543, "learning_rate": 0.00029451156432669265, "loss": 3.0447, "step": 30964 }, { "epoch": 1.52, "grad_norm": 0.6045995354652405, "learning_rate": 0.0002944961707202768, "loss": 3.0624, "step": 30965 }, { "epoch": 1.52, "grad_norm": 0.5819627642631531, "learning_rate": 0.000294480777128357, "loss": 3.2695, "step": 30966 }, { "epoch": 1.52, "grad_norm": 0.5987125039100647, "learning_rate": 0.0002944653835509738, "loss": 3.1373, "step": 30967 }, { "epoch": 1.52, "grad_norm": 0.5703858137130737, "learning_rate": 0.00029444998998816774, "loss": 3.2609, "step": 30968 }, { "epoch": 1.52, "grad_norm": 0.6514813303947449, "learning_rate": 0.0002944345964399792, "loss": 3.0562, "step": 30969 }, { "epoch": 1.52, "grad_norm": 0.6097198724746704, "learning_rate": 0.00029441920290644894, "loss": 3.1765, "step": 30970 }, { "epoch": 1.52, "grad_norm": 0.5842579007148743, "learning_rate": 0.00029440380938761747, "loss": 3.0327, "step": 30971 }, { "epoch": 1.52, "grad_norm": 0.5636705756187439, "learning_rate": 0.0002943884158835252, "loss": 2.9826, "step": 30972 }, { "epoch": 1.52, "grad_norm": 0.5914285778999329, "learning_rate": 0.00029437302239421285, "loss": 2.9421, "step": 30973 }, { "epoch": 1.52, "grad_norm": 0.6153814792633057, "learning_rate": 0.00029435762891972085, "loss": 3.1329, "step": 30974 }, { "epoch": 1.52, "grad_norm": 0.5827009081840515, "learning_rate": 0.0002943422354600897, "loss": 3.1252, "step": 30975 }, { "epoch": 1.52, "grad_norm": 0.5662691593170166, "learning_rate": 0.00029432684201536013, "loss": 3.2731, "step": 30976 }, { "epoch": 1.52, "grad_norm": 0.6179322600364685, "learning_rate": 0.00029431144858557245, "loss": 3.048, "step": 30977 }, { "epoch": 1.52, "grad_norm": 0.6341006755828857, "learning_rate": 0.0002942960551707674, "loss": 3.1041, "step": 30978 }, { "epoch": 1.52, "grad_norm": 0.626679539680481, "learning_rate": 0.0002942806617709854, "loss": 3.1037, "step": 30979 }, { "epoch": 1.52, "grad_norm": 0.5652845501899719, "learning_rate": 0.000294265268386267, "loss": 2.7815, "step": 30980 }, { "epoch": 1.52, "grad_norm": 0.6046680212020874, "learning_rate": 0.0002942498750166529, "loss": 2.9942, "step": 30981 }, { "epoch": 1.52, "grad_norm": 0.5820021629333496, "learning_rate": 0.00029423448166218343, "loss": 3.126, "step": 30982 }, { "epoch": 1.52, "grad_norm": 0.6065924167633057, "learning_rate": 0.0002942190883228993, "loss": 2.8712, "step": 30983 }, { "epoch": 1.52, "grad_norm": 0.6059271693229675, "learning_rate": 0.00029420369499884086, "loss": 3.0145, "step": 30984 }, { "epoch": 1.52, "grad_norm": 0.6043378710746765, "learning_rate": 0.0002941883016900489, "loss": 3.1985, "step": 30985 }, { "epoch": 1.52, "grad_norm": 0.5847151875495911, "learning_rate": 0.0002941729083965638, "loss": 3.1297, "step": 30986 }, { "epoch": 1.52, "grad_norm": 0.6242455244064331, "learning_rate": 0.000294157515118426, "loss": 3.034, "step": 30987 }, { "epoch": 1.52, "grad_norm": 0.6258992552757263, "learning_rate": 0.00029414212185567643, "loss": 2.9636, "step": 30988 }, { "epoch": 1.52, "grad_norm": 0.6174604892730713, "learning_rate": 0.0002941267286083552, "loss": 3.1873, "step": 30989 }, { "epoch": 1.52, "grad_norm": 0.6053414940834045, "learning_rate": 0.00029411133537650305, "loss": 3.0621, "step": 30990 }, { "epoch": 1.52, "grad_norm": 0.6238487362861633, "learning_rate": 0.00029409594216016057, "loss": 3.3516, "step": 30991 }, { "epoch": 1.52, "grad_norm": 0.6055889129638672, "learning_rate": 0.0002940805489593682, "loss": 2.9447, "step": 30992 }, { "epoch": 1.52, "grad_norm": 0.6507366895675659, "learning_rate": 0.0002940651557741666, "loss": 3.007, "step": 30993 }, { "epoch": 1.52, "grad_norm": 0.5801891684532166, "learning_rate": 0.0002940497626045962, "loss": 3.0388, "step": 30994 }, { "epoch": 1.52, "grad_norm": 0.5716410875320435, "learning_rate": 0.0002940343694506975, "loss": 2.8096, "step": 30995 }, { "epoch": 1.52, "grad_norm": 0.5872518420219421, "learning_rate": 0.00029401897631251125, "loss": 3.0487, "step": 30996 }, { "epoch": 1.52, "grad_norm": 0.6373841762542725, "learning_rate": 0.0002940035831900777, "loss": 3.104, "step": 30997 }, { "epoch": 1.52, "grad_norm": 0.6203256845474243, "learning_rate": 0.0002939881900834377, "loss": 2.8221, "step": 30998 }, { "epoch": 1.52, "grad_norm": 0.5925654768943787, "learning_rate": 0.0002939727969926316, "loss": 3.0561, "step": 30999 }, { "epoch": 1.52, "grad_norm": 0.58648282289505, "learning_rate": 0.00029395740391770003, "loss": 3.1059, "step": 31000 }, { "epoch": 1.52, "grad_norm": 0.5882959365844727, "learning_rate": 0.00029394201085868347, "loss": 3.2673, "step": 31001 }, { "epoch": 1.52, "grad_norm": 0.6323480606079102, "learning_rate": 0.0002939266178156224, "loss": 3.2371, "step": 31002 }, { "epoch": 1.52, "grad_norm": 0.5646398067474365, "learning_rate": 0.0002939112247885576, "loss": 3.3075, "step": 31003 }, { "epoch": 1.52, "grad_norm": 0.9151919484138489, "learning_rate": 0.0002938958317775294, "loss": 3.2684, "step": 31004 }, { "epoch": 1.52, "grad_norm": 0.5926310420036316, "learning_rate": 0.00029388043878257847, "loss": 3.0728, "step": 31005 }, { "epoch": 1.52, "grad_norm": 0.580113410949707, "learning_rate": 0.0002938650458037451, "loss": 3.203, "step": 31006 }, { "epoch": 1.52, "grad_norm": 0.5675933361053467, "learning_rate": 0.00029384965284107016, "loss": 3.0887, "step": 31007 }, { "epoch": 1.52, "grad_norm": 0.6494491696357727, "learning_rate": 0.00029383425989459405, "loss": 3.0097, "step": 31008 }, { "epoch": 1.52, "grad_norm": 0.5970069169998169, "learning_rate": 0.00029381886696435724, "loss": 2.7491, "step": 31009 }, { "epoch": 1.52, "grad_norm": 0.5872333645820618, "learning_rate": 0.0002938034740504004, "loss": 3.293, "step": 31010 }, { "epoch": 1.52, "grad_norm": 0.5623251795768738, "learning_rate": 0.00029378808115276405, "loss": 3.0746, "step": 31011 }, { "epoch": 1.52, "grad_norm": 0.5938485860824585, "learning_rate": 0.00029377268827148855, "loss": 3.1148, "step": 31012 }, { "epoch": 1.52, "grad_norm": 0.5834391713142395, "learning_rate": 0.0002937572954066148, "loss": 2.8762, "step": 31013 }, { "epoch": 1.52, "grad_norm": 0.5958360433578491, "learning_rate": 0.000293741902558183, "loss": 2.9429, "step": 31014 }, { "epoch": 1.52, "grad_norm": 0.5586824417114258, "learning_rate": 0.00029372650972623395, "loss": 3.0833, "step": 31015 }, { "epoch": 1.52, "grad_norm": 0.6077871322631836, "learning_rate": 0.00029371111691080785, "loss": 2.9211, "step": 31016 }, { "epoch": 1.52, "grad_norm": 0.566399097442627, "learning_rate": 0.00029369572411194555, "loss": 3.0479, "step": 31017 }, { "epoch": 1.52, "grad_norm": 0.5618554949760437, "learning_rate": 0.00029368033132968764, "loss": 2.9492, "step": 31018 }, { "epoch": 1.52, "grad_norm": 0.6119033694267273, "learning_rate": 0.00029366493856407435, "loss": 3.0998, "step": 31019 }, { "epoch": 1.52, "grad_norm": 0.5961689352989197, "learning_rate": 0.0002936495458151465, "loss": 2.8835, "step": 31020 }, { "epoch": 1.52, "grad_norm": 0.563315212726593, "learning_rate": 0.0002936341530829444, "loss": 3.0595, "step": 31021 }, { "epoch": 1.52, "grad_norm": 0.5843584537506104, "learning_rate": 0.0002936187603675088, "loss": 3.117, "step": 31022 }, { "epoch": 1.52, "grad_norm": 0.6035363674163818, "learning_rate": 0.00029360336766888023, "loss": 2.9555, "step": 31023 }, { "epoch": 1.52, "grad_norm": 0.6223370432853699, "learning_rate": 0.00029358797498709897, "loss": 3.3564, "step": 31024 }, { "epoch": 1.52, "grad_norm": 0.5917815566062927, "learning_rate": 0.00029357258232220595, "loss": 3.1168, "step": 31025 }, { "epoch": 1.52, "grad_norm": 0.567468523979187, "learning_rate": 0.00029355718967424144, "loss": 3.0815, "step": 31026 }, { "epoch": 1.52, "grad_norm": 0.6053411364555359, "learning_rate": 0.00029354179704324594, "loss": 2.9032, "step": 31027 }, { "epoch": 1.52, "grad_norm": 0.6136942505836487, "learning_rate": 0.0002935264044292602, "loss": 2.9913, "step": 31028 }, { "epoch": 1.52, "grad_norm": 0.5973330736160278, "learning_rate": 0.00029351101183232466, "loss": 3.091, "step": 31029 }, { "epoch": 1.52, "grad_norm": 0.6054880619049072, "learning_rate": 0.00029349561925248, "loss": 2.8339, "step": 31030 }, { "epoch": 1.52, "grad_norm": 0.5846839547157288, "learning_rate": 0.0002934802266897664, "loss": 2.973, "step": 31031 }, { "epoch": 1.52, "grad_norm": 0.5589364767074585, "learning_rate": 0.0002934648341442247, "loss": 3.0979, "step": 31032 }, { "epoch": 1.52, "grad_norm": 0.5708386898040771, "learning_rate": 0.0002934494416158954, "loss": 3.19, "step": 31033 }, { "epoch": 1.52, "grad_norm": 0.5712014436721802, "learning_rate": 0.000293434049104819, "loss": 3.1109, "step": 31034 }, { "epoch": 1.52, "grad_norm": 0.6233195662498474, "learning_rate": 0.00029341865661103604, "loss": 3.1072, "step": 31035 }, { "epoch": 1.52, "grad_norm": 0.5828282833099365, "learning_rate": 0.0002934032641345871, "loss": 3.1385, "step": 31036 }, { "epoch": 1.52, "grad_norm": 0.6146054863929749, "learning_rate": 0.00029338787167551263, "loss": 2.9546, "step": 31037 }, { "epoch": 1.52, "grad_norm": 0.5709480047225952, "learning_rate": 0.0002933724792338533, "loss": 2.9423, "step": 31038 }, { "epoch": 1.52, "grad_norm": 0.5619285702705383, "learning_rate": 0.00029335708680964953, "loss": 2.9497, "step": 31039 }, { "epoch": 1.52, "grad_norm": 0.6475139856338501, "learning_rate": 0.000293341694402942, "loss": 2.9842, "step": 31040 }, { "epoch": 1.52, "grad_norm": 0.5895599722862244, "learning_rate": 0.0002933263020137711, "loss": 2.8115, "step": 31041 }, { "epoch": 1.52, "grad_norm": 0.5933480858802795, "learning_rate": 0.0002933109096421774, "loss": 3.0456, "step": 31042 }, { "epoch": 1.52, "grad_norm": 0.6113942861557007, "learning_rate": 0.00029329551728820153, "loss": 3.0089, "step": 31043 }, { "epoch": 1.52, "grad_norm": 0.5870497226715088, "learning_rate": 0.000293280124951884, "loss": 3.2366, "step": 31044 }, { "epoch": 1.52, "grad_norm": 0.6341589689254761, "learning_rate": 0.00029326473263326536, "loss": 3.2206, "step": 31045 }, { "epoch": 1.52, "grad_norm": 0.6039684414863586, "learning_rate": 0.000293249340332386, "loss": 2.9294, "step": 31046 }, { "epoch": 1.52, "grad_norm": 0.609302282333374, "learning_rate": 0.0002932339480492866, "loss": 3.1502, "step": 31047 }, { "epoch": 1.52, "grad_norm": 0.5908909440040588, "learning_rate": 0.0002932185557840078, "loss": 3.0183, "step": 31048 }, { "epoch": 1.52, "grad_norm": 0.6392471790313721, "learning_rate": 0.0002932031635365899, "loss": 2.9455, "step": 31049 }, { "epoch": 1.52, "grad_norm": 0.6417034864425659, "learning_rate": 0.00029318777130707364, "loss": 2.9563, "step": 31050 }, { "epoch": 1.52, "grad_norm": 0.637570858001709, "learning_rate": 0.0002931723790954994, "loss": 2.9481, "step": 31051 }, { "epoch": 1.52, "grad_norm": 0.5919541716575623, "learning_rate": 0.0002931569869019078, "loss": 3.021, "step": 31052 }, { "epoch": 1.52, "grad_norm": 0.6471995115280151, "learning_rate": 0.00029314159472633943, "loss": 2.9314, "step": 31053 }, { "epoch": 1.52, "grad_norm": 0.5962557196617126, "learning_rate": 0.00029312620256883475, "loss": 2.9913, "step": 31054 }, { "epoch": 1.52, "grad_norm": 0.57488614320755, "learning_rate": 0.0002931108104294344, "loss": 3.0118, "step": 31055 }, { "epoch": 1.52, "grad_norm": 0.6746479272842407, "learning_rate": 0.00029309541830817873, "loss": 2.932, "step": 31056 }, { "epoch": 1.52, "grad_norm": 0.6285524964332581, "learning_rate": 0.0002930800262051085, "loss": 3.079, "step": 31057 }, { "epoch": 1.52, "grad_norm": 0.6768153309822083, "learning_rate": 0.0002930646341202641, "loss": 3.0896, "step": 31058 }, { "epoch": 1.52, "grad_norm": 0.6034467220306396, "learning_rate": 0.0002930492420536861, "loss": 3.1119, "step": 31059 }, { "epoch": 1.52, "grad_norm": 0.5936095714569092, "learning_rate": 0.00029303385000541517, "loss": 3.1263, "step": 31060 }, { "epoch": 1.52, "grad_norm": 0.6120097041130066, "learning_rate": 0.00029301845797549155, "loss": 2.9978, "step": 31061 }, { "epoch": 1.52, "grad_norm": 0.6264371871948242, "learning_rate": 0.00029300306596395604, "loss": 3.0816, "step": 31062 }, { "epoch": 1.52, "grad_norm": 0.6259018182754517, "learning_rate": 0.00029298767397084927, "loss": 2.8979, "step": 31063 }, { "epoch": 1.52, "grad_norm": 0.6216327548027039, "learning_rate": 0.0002929722819962114, "loss": 3.02, "step": 31064 }, { "epoch": 1.52, "grad_norm": 0.6106691956520081, "learning_rate": 0.00029295689004008333, "loss": 3.1225, "step": 31065 }, { "epoch": 1.52, "grad_norm": 0.6300420761108398, "learning_rate": 0.0002929414981025054, "loss": 3.0543, "step": 31066 }, { "epoch": 1.52, "grad_norm": 0.6276101469993591, "learning_rate": 0.00029292610618351814, "loss": 3.0879, "step": 31067 }, { "epoch": 1.52, "grad_norm": 0.5739606618881226, "learning_rate": 0.00029291071428316225, "loss": 2.9895, "step": 31068 }, { "epoch": 1.52, "grad_norm": 0.6423936486244202, "learning_rate": 0.0002928953224014782, "loss": 2.9886, "step": 31069 }, { "epoch": 1.52, "grad_norm": 0.563739001750946, "learning_rate": 0.00029287993053850645, "loss": 2.9262, "step": 31070 }, { "epoch": 1.52, "grad_norm": 0.6203590631484985, "learning_rate": 0.0002928645386942875, "loss": 3.1504, "step": 31071 }, { "epoch": 1.52, "grad_norm": 0.6195458173751831, "learning_rate": 0.0002928491468688621, "loss": 3.0775, "step": 31072 }, { "epoch": 1.52, "grad_norm": 0.6083816289901733, "learning_rate": 0.00029283375506227073, "loss": 3.0681, "step": 31073 }, { "epoch": 1.52, "grad_norm": 0.567173182964325, "learning_rate": 0.0002928183632745537, "loss": 2.9375, "step": 31074 }, { "epoch": 1.52, "grad_norm": 0.5811203122138977, "learning_rate": 0.00029280297150575187, "loss": 2.9456, "step": 31075 }, { "epoch": 1.52, "grad_norm": 0.5663540363311768, "learning_rate": 0.0002927875797559056, "loss": 2.8892, "step": 31076 }, { "epoch": 1.52, "grad_norm": 0.5827478766441345, "learning_rate": 0.00029277218802505544, "loss": 2.8407, "step": 31077 }, { "epoch": 1.52, "grad_norm": 0.6202985048294067, "learning_rate": 0.0002927567963132419, "loss": 3.1736, "step": 31078 }, { "epoch": 1.52, "grad_norm": 0.601956307888031, "learning_rate": 0.00029274140462050546, "loss": 3.0033, "step": 31079 }, { "epoch": 1.52, "grad_norm": 0.6013195514678955, "learning_rate": 0.000292726012946887, "loss": 2.8182, "step": 31080 }, { "epoch": 1.52, "grad_norm": 0.5847362279891968, "learning_rate": 0.00029271062129242664, "loss": 3.0162, "step": 31081 }, { "epoch": 1.52, "grad_norm": 0.5564871430397034, "learning_rate": 0.0002926952296571653, "loss": 3.1941, "step": 31082 }, { "epoch": 1.52, "grad_norm": 0.5841847062110901, "learning_rate": 0.0002926798380411431, "loss": 3.0432, "step": 31083 }, { "epoch": 1.52, "grad_norm": 0.6471392512321472, "learning_rate": 0.00029266444644440086, "loss": 2.9476, "step": 31084 }, { "epoch": 1.52, "grad_norm": 0.5753255486488342, "learning_rate": 0.00029264905486697915, "loss": 2.876, "step": 31085 }, { "epoch": 1.52, "grad_norm": 0.6308822631835938, "learning_rate": 0.0002926336633089183, "loss": 2.907, "step": 31086 }, { "epoch": 1.52, "grad_norm": 0.6044859886169434, "learning_rate": 0.0002926182717702591, "loss": 3.1926, "step": 31087 }, { "epoch": 1.52, "grad_norm": 0.610822319984436, "learning_rate": 0.0002926028802510418, "loss": 2.9054, "step": 31088 }, { "epoch": 1.52, "grad_norm": 0.6061519384384155, "learning_rate": 0.00029258748875130714, "loss": 3.0566, "step": 31089 }, { "epoch": 1.52, "grad_norm": 0.6459476351737976, "learning_rate": 0.00029257209727109564, "loss": 2.9014, "step": 31090 }, { "epoch": 1.52, "grad_norm": 0.6128569841384888, "learning_rate": 0.0002925567058104478, "loss": 2.6935, "step": 31091 }, { "epoch": 1.52, "grad_norm": 0.6115437746047974, "learning_rate": 0.0002925413143694042, "loss": 3.3244, "step": 31092 }, { "epoch": 1.52, "grad_norm": 0.6120768785476685, "learning_rate": 0.00029252592294800526, "loss": 2.8478, "step": 31093 }, { "epoch": 1.52, "grad_norm": 0.5760208368301392, "learning_rate": 0.00029251053154629163, "loss": 2.9368, "step": 31094 }, { "epoch": 1.52, "grad_norm": 0.5910771489143372, "learning_rate": 0.0002924951401643039, "loss": 2.9577, "step": 31095 }, { "epoch": 1.52, "grad_norm": 0.6013192534446716, "learning_rate": 0.00029247974880208235, "loss": 3.07, "step": 31096 }, { "epoch": 1.52, "grad_norm": 0.6419551372528076, "learning_rate": 0.00029246435745966786, "loss": 2.997, "step": 31097 }, { "epoch": 1.52, "grad_norm": 0.6099100708961487, "learning_rate": 0.00029244896613710077, "loss": 3.0675, "step": 31098 }, { "epoch": 1.52, "grad_norm": 0.5890765190124512, "learning_rate": 0.00029243357483442153, "loss": 2.9062, "step": 31099 }, { "epoch": 1.52, "grad_norm": 0.6350886821746826, "learning_rate": 0.00029241818355167095, "loss": 3.1908, "step": 31100 }, { "epoch": 1.52, "grad_norm": 0.6403512954711914, "learning_rate": 0.00029240279228888926, "loss": 3.0824, "step": 31101 }, { "epoch": 1.52, "grad_norm": 0.5865958333015442, "learning_rate": 0.00029238740104611734, "loss": 3.1828, "step": 31102 }, { "epoch": 1.52, "grad_norm": 0.6323754191398621, "learning_rate": 0.0002923720098233954, "loss": 3.1062, "step": 31103 }, { "epoch": 1.52, "grad_norm": 0.6078318953514099, "learning_rate": 0.0002923566186207641, "loss": 3.2757, "step": 31104 }, { "epoch": 1.52, "grad_norm": 0.5803014636039734, "learning_rate": 0.00029234122743826413, "loss": 3.2241, "step": 31105 }, { "epoch": 1.52, "grad_norm": 0.5942589044570923, "learning_rate": 0.00029232583627593583, "loss": 3.378, "step": 31106 }, { "epoch": 1.52, "grad_norm": 0.6468483209609985, "learning_rate": 0.0002923104451338199, "loss": 3.0655, "step": 31107 }, { "epoch": 1.52, "grad_norm": 0.6610134243965149, "learning_rate": 0.00029229505401195656, "loss": 2.9371, "step": 31108 }, { "epoch": 1.52, "grad_norm": 0.5979439616203308, "learning_rate": 0.0002922796629103867, "loss": 3.087, "step": 31109 }, { "epoch": 1.52, "grad_norm": 0.5915126204490662, "learning_rate": 0.00029226427182915075, "loss": 3.043, "step": 31110 }, { "epoch": 1.52, "grad_norm": 0.5519029498100281, "learning_rate": 0.0002922488807682891, "loss": 3.1751, "step": 31111 }, { "epoch": 1.52, "grad_norm": 0.5895189642906189, "learning_rate": 0.0002922334897278425, "loss": 3.1257, "step": 31112 }, { "epoch": 1.52, "grad_norm": 0.627914547920227, "learning_rate": 0.00029221809870785137, "loss": 3.2162, "step": 31113 }, { "epoch": 1.52, "grad_norm": 0.5837568044662476, "learning_rate": 0.0002922027077083562, "loss": 3.0215, "step": 31114 }, { "epoch": 1.52, "grad_norm": 0.5984426736831665, "learning_rate": 0.0002921873167293977, "loss": 2.9769, "step": 31115 }, { "epoch": 1.52, "grad_norm": 0.600309431552887, "learning_rate": 0.0002921719257710163, "loss": 3.3203, "step": 31116 }, { "epoch": 1.52, "grad_norm": 0.5865994095802307, "learning_rate": 0.00029215653483325257, "loss": 3.0811, "step": 31117 }, { "epoch": 1.53, "grad_norm": 0.5973957777023315, "learning_rate": 0.0002921411439161469, "loss": 3.1921, "step": 31118 }, { "epoch": 1.53, "grad_norm": 0.6090832352638245, "learning_rate": 0.00029212575301974, "loss": 3.0701, "step": 31119 }, { "epoch": 1.53, "grad_norm": 0.5993704795837402, "learning_rate": 0.00029211036214407243, "loss": 2.9587, "step": 31120 }, { "epoch": 1.53, "grad_norm": 0.5656693577766418, "learning_rate": 0.0002920949712891845, "loss": 2.8517, "step": 31121 }, { "epoch": 1.53, "grad_norm": 0.6290675401687622, "learning_rate": 0.000292079580455117, "loss": 2.9327, "step": 31122 }, { "epoch": 1.53, "grad_norm": 0.6300106048583984, "learning_rate": 0.0002920641896419103, "loss": 3.0199, "step": 31123 }, { "epoch": 1.53, "grad_norm": 0.5924240946769714, "learning_rate": 0.00029204879884960495, "loss": 3.042, "step": 31124 }, { "epoch": 1.53, "grad_norm": 0.6742143630981445, "learning_rate": 0.0002920334080782417, "loss": 3.1415, "step": 31125 }, { "epoch": 1.53, "grad_norm": 0.6075363159179688, "learning_rate": 0.0002920180173278608, "loss": 3.1435, "step": 31126 }, { "epoch": 1.53, "grad_norm": 0.6138061881065369, "learning_rate": 0.00029200262659850293, "loss": 2.8322, "step": 31127 }, { "epoch": 1.53, "grad_norm": 0.6019613742828369, "learning_rate": 0.0002919872358902086, "loss": 2.9297, "step": 31128 }, { "epoch": 1.53, "grad_norm": 0.584140956401825, "learning_rate": 0.00029197184520301833, "loss": 3.0311, "step": 31129 }, { "epoch": 1.53, "grad_norm": 0.6052551865577698, "learning_rate": 0.00029195645453697274, "loss": 3.276, "step": 31130 }, { "epoch": 1.53, "grad_norm": 0.5705941915512085, "learning_rate": 0.00029194106389211224, "loss": 3.1659, "step": 31131 }, { "epoch": 1.53, "grad_norm": 0.6407089233398438, "learning_rate": 0.0002919256732684775, "loss": 2.7949, "step": 31132 }, { "epoch": 1.53, "grad_norm": 0.6144587397575378, "learning_rate": 0.0002919102826661089, "loss": 3.1167, "step": 31133 }, { "epoch": 1.53, "grad_norm": 0.5976653099060059, "learning_rate": 0.00029189489208504706, "loss": 2.9931, "step": 31134 }, { "epoch": 1.53, "grad_norm": 0.6328117251396179, "learning_rate": 0.0002918795015253326, "loss": 3.169, "step": 31135 }, { "epoch": 1.53, "grad_norm": 0.5923229455947876, "learning_rate": 0.00029186411098700585, "loss": 3.1009, "step": 31136 }, { "epoch": 1.53, "grad_norm": 0.5932513475418091, "learning_rate": 0.00029184872047010755, "loss": 3.17, "step": 31137 }, { "epoch": 1.53, "grad_norm": 0.6198069453239441, "learning_rate": 0.00029183332997467814, "loss": 2.9827, "step": 31138 }, { "epoch": 1.53, "grad_norm": 0.6290854215621948, "learning_rate": 0.0002918179395007581, "loss": 2.9822, "step": 31139 }, { "epoch": 1.53, "grad_norm": 0.58577960729599, "learning_rate": 0.00029180254904838816, "loss": 3.062, "step": 31140 }, { "epoch": 1.53, "grad_norm": 0.5880772471427917, "learning_rate": 0.0002917871586176086, "loss": 3.0266, "step": 31141 }, { "epoch": 1.53, "grad_norm": 0.6108989715576172, "learning_rate": 0.0002917717682084602, "loss": 3.0569, "step": 31142 }, { "epoch": 1.53, "grad_norm": 0.5859941840171814, "learning_rate": 0.00029175637782098327, "loss": 3.1125, "step": 31143 }, { "epoch": 1.53, "grad_norm": 0.5857529044151306, "learning_rate": 0.0002917409874552184, "loss": 3.0528, "step": 31144 }, { "epoch": 1.53, "grad_norm": 0.6427134871482849, "learning_rate": 0.0002917255971112064, "loss": 3.147, "step": 31145 }, { "epoch": 1.53, "grad_norm": 0.5859655737876892, "learning_rate": 0.00029171020678898745, "loss": 3.154, "step": 31146 }, { "epoch": 1.53, "grad_norm": 0.5824823379516602, "learning_rate": 0.0002916948164886023, "loss": 3.174, "step": 31147 }, { "epoch": 1.53, "grad_norm": 0.6048821210861206, "learning_rate": 0.0002916794262100913, "loss": 3.0062, "step": 31148 }, { "epoch": 1.53, "grad_norm": 0.5758355855941772, "learning_rate": 0.00029166403595349514, "loss": 2.8761, "step": 31149 }, { "epoch": 1.53, "grad_norm": 0.5995056629180908, "learning_rate": 0.00029164864571885435, "loss": 2.9244, "step": 31150 }, { "epoch": 1.53, "grad_norm": 0.6515876650810242, "learning_rate": 0.00029163325550620925, "loss": 2.7845, "step": 31151 }, { "epoch": 1.53, "grad_norm": 0.5933794975280762, "learning_rate": 0.00029161786531560073, "loss": 3.0369, "step": 31152 }, { "epoch": 1.53, "grad_norm": 0.6130641102790833, "learning_rate": 0.0002916024751470691, "loss": 2.8529, "step": 31153 }, { "epoch": 1.53, "grad_norm": 0.6391645669937134, "learning_rate": 0.00029158708500065495, "loss": 3.2121, "step": 31154 }, { "epoch": 1.53, "grad_norm": 0.6190594434738159, "learning_rate": 0.0002915716948763987, "loss": 2.8578, "step": 31155 }, { "epoch": 1.53, "grad_norm": 0.6114823818206787, "learning_rate": 0.000291556304774341, "loss": 3.2051, "step": 31156 }, { "epoch": 1.53, "grad_norm": 0.5992326736450195, "learning_rate": 0.0002915409146945225, "loss": 2.9415, "step": 31157 }, { "epoch": 1.53, "grad_norm": 0.6097759008407593, "learning_rate": 0.0002915255246369835, "loss": 2.8587, "step": 31158 }, { "epoch": 1.53, "grad_norm": 0.5706651210784912, "learning_rate": 0.00029151013460176466, "loss": 3.1365, "step": 31159 }, { "epoch": 1.53, "grad_norm": 0.5817975997924805, "learning_rate": 0.0002914947445889065, "loss": 2.8617, "step": 31160 }, { "epoch": 1.53, "grad_norm": 0.5653377771377563, "learning_rate": 0.00029147935459844943, "loss": 2.9423, "step": 31161 }, { "epoch": 1.53, "grad_norm": 0.6266032457351685, "learning_rate": 0.0002914639646304343, "loss": 2.8249, "step": 31162 }, { "epoch": 1.53, "grad_norm": 0.6154088973999023, "learning_rate": 0.00029144857468490126, "loss": 3.3106, "step": 31163 }, { "epoch": 1.53, "grad_norm": 0.5928178429603577, "learning_rate": 0.00029143318476189113, "loss": 3.1093, "step": 31164 }, { "epoch": 1.53, "grad_norm": 0.6203148365020752, "learning_rate": 0.00029141779486144433, "loss": 3.0661, "step": 31165 }, { "epoch": 1.53, "grad_norm": 0.5982689261436462, "learning_rate": 0.0002914024049836013, "loss": 2.9455, "step": 31166 }, { "epoch": 1.53, "grad_norm": 0.5757526755332947, "learning_rate": 0.00029138701512840286, "loss": 3.1113, "step": 31167 }, { "epoch": 1.53, "grad_norm": 0.5914579033851624, "learning_rate": 0.00029137162529588924, "loss": 2.9128, "step": 31168 }, { "epoch": 1.53, "grad_norm": 0.6134317517280579, "learning_rate": 0.00029135623548610126, "loss": 2.9946, "step": 31169 }, { "epoch": 1.53, "grad_norm": 0.5648379921913147, "learning_rate": 0.0002913408456990791, "loss": 3.4179, "step": 31170 }, { "epoch": 1.53, "grad_norm": 0.6595600247383118, "learning_rate": 0.00029132545593486353, "loss": 3.0651, "step": 31171 }, { "epoch": 1.53, "grad_norm": 0.5977067351341248, "learning_rate": 0.00029131006619349516, "loss": 2.9647, "step": 31172 }, { "epoch": 1.53, "grad_norm": 0.5756570100784302, "learning_rate": 0.00029129467647501424, "loss": 3.0136, "step": 31173 }, { "epoch": 1.53, "grad_norm": 0.5817214250564575, "learning_rate": 0.0002912792867794616, "loss": 3.013, "step": 31174 }, { "epoch": 1.53, "grad_norm": 0.628970742225647, "learning_rate": 0.00029126389710687755, "loss": 2.8144, "step": 31175 }, { "epoch": 1.53, "grad_norm": 0.5823667049407959, "learning_rate": 0.00029124850745730266, "loss": 3.2412, "step": 31176 }, { "epoch": 1.53, "grad_norm": 0.6125685572624207, "learning_rate": 0.0002912331178307777, "loss": 3.0372, "step": 31177 }, { "epoch": 1.53, "grad_norm": 0.6078442335128784, "learning_rate": 0.0002912177282273429, "loss": 3.1155, "step": 31178 }, { "epoch": 1.53, "grad_norm": 0.5867834091186523, "learning_rate": 0.000291202338647039, "loss": 2.7659, "step": 31179 }, { "epoch": 1.53, "grad_norm": 0.605891227722168, "learning_rate": 0.0002911869490899063, "loss": 3.1373, "step": 31180 }, { "epoch": 1.53, "grad_norm": 0.6134265065193176, "learning_rate": 0.0002911715595559855, "loss": 3.0259, "step": 31181 }, { "epoch": 1.53, "grad_norm": 0.6155909299850464, "learning_rate": 0.0002911561700453173, "loss": 3.0505, "step": 31182 }, { "epoch": 1.53, "grad_norm": 0.6217151284217834, "learning_rate": 0.00029114078055794186, "loss": 3.1017, "step": 31183 }, { "epoch": 1.53, "grad_norm": 0.6171595454216003, "learning_rate": 0.00029112539109390004, "loss": 3.1822, "step": 31184 }, { "epoch": 1.53, "grad_norm": 0.5772479176521301, "learning_rate": 0.00029111000165323205, "loss": 2.9001, "step": 31185 }, { "epoch": 1.53, "grad_norm": 0.5680612921714783, "learning_rate": 0.0002910946122359787, "loss": 3.0623, "step": 31186 }, { "epoch": 1.53, "grad_norm": 0.5654654502868652, "learning_rate": 0.0002910792228421805, "loss": 3.1616, "step": 31187 }, { "epoch": 1.53, "grad_norm": 0.5805103182792664, "learning_rate": 0.00029106383347187777, "loss": 3.1398, "step": 31188 }, { "epoch": 1.53, "grad_norm": 0.5948848724365234, "learning_rate": 0.0002910484441251113, "loss": 2.8847, "step": 31189 }, { "epoch": 1.53, "grad_norm": 0.6186020970344543, "learning_rate": 0.00029103305480192147, "loss": 3.1687, "step": 31190 }, { "epoch": 1.53, "grad_norm": 0.5708625316619873, "learning_rate": 0.0002910176655023488, "loss": 3.048, "step": 31191 }, { "epoch": 1.53, "grad_norm": 0.5748517513275146, "learning_rate": 0.00029100227622643393, "loss": 3.2699, "step": 31192 }, { "epoch": 1.53, "grad_norm": 0.6108618378639221, "learning_rate": 0.0002909868869742173, "loss": 3.1266, "step": 31193 }, { "epoch": 1.53, "grad_norm": 0.5868430733680725, "learning_rate": 0.0002909714977457396, "loss": 2.9538, "step": 31194 }, { "epoch": 1.53, "grad_norm": 0.5879116058349609, "learning_rate": 0.00029095610854104105, "loss": 3.023, "step": 31195 }, { "epoch": 1.53, "grad_norm": 0.6066574454307556, "learning_rate": 0.0002909407193601624, "loss": 3.202, "step": 31196 }, { "epoch": 1.53, "grad_norm": 0.5830428004264832, "learning_rate": 0.00029092533020314427, "loss": 3.1226, "step": 31197 }, { "epoch": 1.53, "grad_norm": 0.6408023238182068, "learning_rate": 0.0002909099410700269, "loss": 3.0443, "step": 31198 }, { "epoch": 1.53, "grad_norm": 0.6012759804725647, "learning_rate": 0.0002908945519608512, "loss": 3.0593, "step": 31199 }, { "epoch": 1.53, "grad_norm": 0.6043107509613037, "learning_rate": 0.00029087916287565737, "loss": 3.059, "step": 31200 }, { "epoch": 1.53, "grad_norm": 0.590762197971344, "learning_rate": 0.00029086377381448605, "loss": 3.1015, "step": 31201 }, { "epoch": 1.53, "grad_norm": 0.5998045206069946, "learning_rate": 0.00029084838477737784, "loss": 2.8935, "step": 31202 }, { "epoch": 1.53, "grad_norm": 0.6675084233283997, "learning_rate": 0.0002908329957643732, "loss": 3.0359, "step": 31203 }, { "epoch": 1.53, "grad_norm": 0.5763409733772278, "learning_rate": 0.0002908176067755127, "loss": 2.9924, "step": 31204 }, { "epoch": 1.53, "grad_norm": 0.5802990794181824, "learning_rate": 0.0002908022178108369, "loss": 3.0601, "step": 31205 }, { "epoch": 1.53, "grad_norm": 0.592325747013092, "learning_rate": 0.0002907868288703862, "loss": 3.0388, "step": 31206 }, { "epoch": 1.53, "grad_norm": 0.5971692800521851, "learning_rate": 0.0002907714399542013, "loss": 2.8794, "step": 31207 }, { "epoch": 1.53, "grad_norm": 0.6179129481315613, "learning_rate": 0.00029075605106232264, "loss": 2.9758, "step": 31208 }, { "epoch": 1.53, "grad_norm": 0.648285984992981, "learning_rate": 0.0002907406621947908, "loss": 2.772, "step": 31209 }, { "epoch": 1.53, "grad_norm": 0.6502874493598938, "learning_rate": 0.00029072527335164615, "loss": 2.8967, "step": 31210 }, { "epoch": 1.53, "grad_norm": 0.6211252212524414, "learning_rate": 0.0002907098845329294, "loss": 2.8207, "step": 31211 }, { "epoch": 1.53, "grad_norm": 0.6525001525878906, "learning_rate": 0.0002906944957386811, "loss": 2.9625, "step": 31212 }, { "epoch": 1.53, "grad_norm": 0.6617982983589172, "learning_rate": 0.0002906791069689416, "loss": 3.0828, "step": 31213 }, { "epoch": 1.53, "grad_norm": 0.6012643575668335, "learning_rate": 0.0002906637182237516, "loss": 2.9603, "step": 31214 }, { "epoch": 1.53, "grad_norm": 0.5715975165367126, "learning_rate": 0.00029064832950315156, "loss": 3.043, "step": 31215 }, { "epoch": 1.53, "grad_norm": 0.6091911196708679, "learning_rate": 0.0002906329408071819, "loss": 2.9976, "step": 31216 }, { "epoch": 1.53, "grad_norm": 0.6170886158943176, "learning_rate": 0.00029061755213588347, "loss": 2.9807, "step": 31217 }, { "epoch": 1.53, "grad_norm": 0.5898032188415527, "learning_rate": 0.00029060216348929653, "loss": 3.1002, "step": 31218 }, { "epoch": 1.53, "grad_norm": 0.6269931197166443, "learning_rate": 0.0002905867748674617, "loss": 3.2008, "step": 31219 }, { "epoch": 1.53, "grad_norm": 0.6010162830352783, "learning_rate": 0.0002905713862704194, "loss": 2.7986, "step": 31220 }, { "epoch": 1.53, "grad_norm": 0.664634644985199, "learning_rate": 0.00029055599769821036, "loss": 3.1664, "step": 31221 }, { "epoch": 1.53, "grad_norm": 0.5801214575767517, "learning_rate": 0.00029054060915087497, "loss": 2.9423, "step": 31222 }, { "epoch": 1.53, "grad_norm": 0.5644407272338867, "learning_rate": 0.00029052522062845374, "loss": 3.0543, "step": 31223 }, { "epoch": 1.53, "grad_norm": 0.5935441255569458, "learning_rate": 0.0002905098321309873, "loss": 3.0877, "step": 31224 }, { "epoch": 1.53, "grad_norm": 0.6010169982910156, "learning_rate": 0.0002904944436585161, "loss": 3.1078, "step": 31225 }, { "epoch": 1.53, "grad_norm": 0.6892803311347961, "learning_rate": 0.0002904790552110808, "loss": 3.0326, "step": 31226 }, { "epoch": 1.53, "grad_norm": 0.5705881118774414, "learning_rate": 0.00029046366678872176, "loss": 2.9728, "step": 31227 }, { "epoch": 1.53, "grad_norm": 0.6575391292572021, "learning_rate": 0.00029044827839147953, "loss": 3.0386, "step": 31228 }, { "epoch": 1.53, "grad_norm": 0.5752348303794861, "learning_rate": 0.00029043289001939485, "loss": 3.0842, "step": 31229 }, { "epoch": 1.53, "grad_norm": 0.5966557860374451, "learning_rate": 0.000290417501672508, "loss": 3.0958, "step": 31230 }, { "epoch": 1.53, "grad_norm": 0.6041060090065002, "learning_rate": 0.00029040211335085956, "loss": 2.974, "step": 31231 }, { "epoch": 1.53, "grad_norm": 0.590144693851471, "learning_rate": 0.00029038672505449025, "loss": 3.0159, "step": 31232 }, { "epoch": 1.53, "grad_norm": 0.6653158068656921, "learning_rate": 0.00029037133678344037, "loss": 2.6909, "step": 31233 }, { "epoch": 1.53, "grad_norm": 0.6356053948402405, "learning_rate": 0.00029035594853775063, "loss": 2.886, "step": 31234 }, { "epoch": 1.53, "grad_norm": 0.616893470287323, "learning_rate": 0.00029034056031746125, "loss": 3.085, "step": 31235 }, { "epoch": 1.53, "grad_norm": 0.5892876386642456, "learning_rate": 0.0002903251721226132, "loss": 3.0894, "step": 31236 }, { "epoch": 1.53, "grad_norm": 0.5598612427711487, "learning_rate": 0.0002903097839532467, "loss": 3.0063, "step": 31237 }, { "epoch": 1.53, "grad_norm": 0.6560896039009094, "learning_rate": 0.00029029439580940226, "loss": 3.0115, "step": 31238 }, { "epoch": 1.53, "grad_norm": 0.5958468317985535, "learning_rate": 0.00029027900769112064, "loss": 3.0396, "step": 31239 }, { "epoch": 1.53, "grad_norm": 0.5762074589729309, "learning_rate": 0.00029026361959844224, "loss": 3.3083, "step": 31240 }, { "epoch": 1.53, "grad_norm": 0.5689447522163391, "learning_rate": 0.0002902482315314076, "loss": 3.2403, "step": 31241 }, { "epoch": 1.53, "grad_norm": 0.5754348039627075, "learning_rate": 0.0002902328434900571, "loss": 3.07, "step": 31242 }, { "epoch": 1.53, "grad_norm": 0.6313903331756592, "learning_rate": 0.00029021745547443145, "loss": 3.122, "step": 31243 }, { "epoch": 1.53, "grad_norm": 0.6057738065719604, "learning_rate": 0.00029020206748457135, "loss": 3.025, "step": 31244 }, { "epoch": 1.53, "grad_norm": 0.6703782677650452, "learning_rate": 0.0002901866795205169, "loss": 3.1737, "step": 31245 }, { "epoch": 1.53, "grad_norm": 0.5860815048217773, "learning_rate": 0.000290171291582309, "loss": 3.0211, "step": 31246 }, { "epoch": 1.53, "grad_norm": 0.5635547041893005, "learning_rate": 0.00029015590366998785, "loss": 3.2277, "step": 31247 }, { "epoch": 1.53, "grad_norm": 0.598173201084137, "learning_rate": 0.00029014051578359426, "loss": 2.9847, "step": 31248 }, { "epoch": 1.53, "grad_norm": 0.6276450157165527, "learning_rate": 0.0002901251279231687, "loss": 3.1291, "step": 31249 }, { "epoch": 1.53, "grad_norm": 0.5984366536140442, "learning_rate": 0.00029010974008875155, "loss": 3.23, "step": 31250 }, { "epoch": 1.53, "grad_norm": 0.5806760191917419, "learning_rate": 0.00029009435228038354, "loss": 2.9694, "step": 31251 }, { "epoch": 1.53, "grad_norm": 0.6110997796058655, "learning_rate": 0.00029007896449810506, "loss": 3.054, "step": 31252 }, { "epoch": 1.53, "grad_norm": 0.5997822880744934, "learning_rate": 0.00029006357674195657, "loss": 3.1742, "step": 31253 }, { "epoch": 1.53, "grad_norm": 0.6100072860717773, "learning_rate": 0.00029004818901197886, "loss": 2.9635, "step": 31254 }, { "epoch": 1.53, "grad_norm": 0.5825597047805786, "learning_rate": 0.00029003280130821224, "loss": 3.119, "step": 31255 }, { "epoch": 1.53, "grad_norm": 0.636887788772583, "learning_rate": 0.00029001741363069735, "loss": 3.1825, "step": 31256 }, { "epoch": 1.53, "grad_norm": 0.5912345051765442, "learning_rate": 0.00029000202597947457, "loss": 3.078, "step": 31257 }, { "epoch": 1.53, "grad_norm": 0.5856207609176636, "learning_rate": 0.0002899866383545846, "loss": 3.2083, "step": 31258 }, { "epoch": 1.53, "grad_norm": 0.6235297322273254, "learning_rate": 0.0002899712507560679, "loss": 3.106, "step": 31259 }, { "epoch": 1.53, "grad_norm": 0.6140345335006714, "learning_rate": 0.00028995586318396495, "loss": 3.2502, "step": 31260 }, { "epoch": 1.53, "grad_norm": 0.5801708698272705, "learning_rate": 0.0002899404756383164, "loss": 3.1254, "step": 31261 }, { "epoch": 1.53, "grad_norm": 0.5877318382263184, "learning_rate": 0.0002899250881191627, "loss": 2.9534, "step": 31262 }, { "epoch": 1.53, "grad_norm": 0.5814765095710754, "learning_rate": 0.0002899097006265442, "loss": 2.9297, "step": 31263 }, { "epoch": 1.53, "grad_norm": 0.6114090085029602, "learning_rate": 0.00028989431316050183, "loss": 2.9979, "step": 31264 }, { "epoch": 1.53, "grad_norm": 0.601153552532196, "learning_rate": 0.00028987892572107573, "loss": 2.9141, "step": 31265 }, { "epoch": 1.53, "grad_norm": 0.6009148955345154, "learning_rate": 0.00028986353830830674, "loss": 2.9535, "step": 31266 }, { "epoch": 1.53, "grad_norm": 0.5960407257080078, "learning_rate": 0.0002898481509222352, "loss": 3.208, "step": 31267 }, { "epoch": 1.53, "grad_norm": 0.6321334838867188, "learning_rate": 0.00028983276356290156, "loss": 2.9113, "step": 31268 }, { "epoch": 1.53, "grad_norm": 0.6519079804420471, "learning_rate": 0.00028981737623034657, "loss": 2.9775, "step": 31269 }, { "epoch": 1.53, "grad_norm": 0.652167797088623, "learning_rate": 0.0002898019889246106, "loss": 3.239, "step": 31270 }, { "epoch": 1.53, "grad_norm": 0.5852517485618591, "learning_rate": 0.00028978660164573437, "loss": 2.943, "step": 31271 }, { "epoch": 1.53, "grad_norm": 0.5924550294876099, "learning_rate": 0.0002897712143937581, "loss": 3.0424, "step": 31272 }, { "epoch": 1.53, "grad_norm": 0.5765801072120667, "learning_rate": 0.0002897558271687225, "loss": 3.1681, "step": 31273 }, { "epoch": 1.53, "grad_norm": 0.6032983660697937, "learning_rate": 0.0002897404399706682, "loss": 3.0438, "step": 31274 }, { "epoch": 1.53, "grad_norm": 0.6318213939666748, "learning_rate": 0.0002897250527996355, "loss": 3.053, "step": 31275 }, { "epoch": 1.53, "grad_norm": 0.6157848238945007, "learning_rate": 0.0002897096656556651, "loss": 2.8284, "step": 31276 }, { "epoch": 1.53, "grad_norm": 0.6649987697601318, "learning_rate": 0.0002896942785387974, "loss": 3.1921, "step": 31277 }, { "epoch": 1.53, "grad_norm": 0.5770165920257568, "learning_rate": 0.0002896788914490729, "loss": 3.0402, "step": 31278 }, { "epoch": 1.53, "grad_norm": 0.5739706158638, "learning_rate": 0.0002896635043865324, "loss": 3.0012, "step": 31279 }, { "epoch": 1.53, "grad_norm": 0.6319332122802734, "learning_rate": 0.00028964811735121614, "loss": 3.2183, "step": 31280 }, { "epoch": 1.53, "grad_norm": 0.6746277809143066, "learning_rate": 0.00028963273034316486, "loss": 3.0346, "step": 31281 }, { "epoch": 1.53, "grad_norm": 0.5536617636680603, "learning_rate": 0.0002896173433624188, "loss": 3.1045, "step": 31282 }, { "epoch": 1.53, "grad_norm": 0.6022813320159912, "learning_rate": 0.0002896019564090187, "loss": 2.8612, "step": 31283 }, { "epoch": 1.53, "grad_norm": 0.6644525527954102, "learning_rate": 0.0002895865694830052, "loss": 2.758, "step": 31284 }, { "epoch": 1.53, "grad_norm": 0.6138014197349548, "learning_rate": 0.0002895711825844185, "loss": 3.1694, "step": 31285 }, { "epoch": 1.53, "grad_norm": 0.5913397669792175, "learning_rate": 0.0002895557957132994, "loss": 2.8191, "step": 31286 }, { "epoch": 1.53, "grad_norm": 0.5768195390701294, "learning_rate": 0.0002895404088696882, "loss": 2.8873, "step": 31287 }, { "epoch": 1.53, "grad_norm": 0.6131641268730164, "learning_rate": 0.0002895250220536256, "loss": 3.1928, "step": 31288 }, { "epoch": 1.53, "grad_norm": 0.5838510394096375, "learning_rate": 0.00028950963526515214, "loss": 3.0279, "step": 31289 }, { "epoch": 1.53, "grad_norm": 0.6200519800186157, "learning_rate": 0.00028949424850430817, "loss": 3.0139, "step": 31290 }, { "epoch": 1.53, "grad_norm": 0.60508131980896, "learning_rate": 0.00028947886177113446, "loss": 3.1849, "step": 31291 }, { "epoch": 1.53, "grad_norm": 0.6125930547714233, "learning_rate": 0.0002894634750656714, "loss": 3.0983, "step": 31292 }, { "epoch": 1.53, "grad_norm": 0.5791587829589844, "learning_rate": 0.0002894480883879593, "loss": 3.1379, "step": 31293 }, { "epoch": 1.53, "grad_norm": 0.6140015721321106, "learning_rate": 0.0002894327017380392, "loss": 3.2646, "step": 31294 }, { "epoch": 1.53, "grad_norm": 0.7361003756523132, "learning_rate": 0.00028941731511595117, "loss": 3.1597, "step": 31295 }, { "epoch": 1.53, "grad_norm": 0.5974676012992859, "learning_rate": 0.00028940192852173596, "loss": 3.1636, "step": 31296 }, { "epoch": 1.53, "grad_norm": 0.6096514463424683, "learning_rate": 0.00028938654195543396, "loss": 2.9766, "step": 31297 }, { "epoch": 1.53, "grad_norm": 0.6190111637115479, "learning_rate": 0.0002893711554170858, "loss": 2.8579, "step": 31298 }, { "epoch": 1.53, "grad_norm": 0.6046838164329529, "learning_rate": 0.00028935576890673205, "loss": 3.1151, "step": 31299 }, { "epoch": 1.53, "grad_norm": 0.6145317554473877, "learning_rate": 0.000289340382424413, "loss": 3.1992, "step": 31300 }, { "epoch": 1.53, "grad_norm": 0.6103838086128235, "learning_rate": 0.00028932499597016944, "loss": 3.0945, "step": 31301 }, { "epoch": 1.53, "grad_norm": 0.600480854511261, "learning_rate": 0.0002893096095440418, "loss": 2.9785, "step": 31302 }, { "epoch": 1.53, "grad_norm": 0.5839743614196777, "learning_rate": 0.0002892942231460704, "loss": 3.1328, "step": 31303 }, { "epoch": 1.53, "grad_norm": 0.6166554689407349, "learning_rate": 0.0002892788367762962, "loss": 3.0722, "step": 31304 }, { "epoch": 1.53, "grad_norm": 0.6772105693817139, "learning_rate": 0.0002892634504347593, "loss": 2.8744, "step": 31305 }, { "epoch": 1.53, "grad_norm": 0.6089929938316345, "learning_rate": 0.00028924806412150057, "loss": 3.1004, "step": 31306 }, { "epoch": 1.53, "grad_norm": 0.5962395668029785, "learning_rate": 0.00028923267783656026, "loss": 2.9843, "step": 31307 }, { "epoch": 1.53, "grad_norm": 0.6941888928413391, "learning_rate": 0.00028921729157997894, "loss": 3.0849, "step": 31308 }, { "epoch": 1.53, "grad_norm": 0.607742965221405, "learning_rate": 0.0002892019053517974, "loss": 3.2729, "step": 31309 }, { "epoch": 1.53, "grad_norm": 0.5975240468978882, "learning_rate": 0.00028918651915205585, "loss": 3.1408, "step": 31310 }, { "epoch": 1.53, "grad_norm": 0.5710057616233826, "learning_rate": 0.000289171132980795, "loss": 3.3382, "step": 31311 }, { "epoch": 1.53, "grad_norm": 0.6463126540184021, "learning_rate": 0.00028915574683805514, "loss": 2.967, "step": 31312 }, { "epoch": 1.53, "grad_norm": 0.7494412064552307, "learning_rate": 0.00028914036072387716, "loss": 2.9294, "step": 31313 }, { "epoch": 1.53, "grad_norm": 0.5747896432876587, "learning_rate": 0.00028912497463830125, "loss": 3.2201, "step": 31314 }, { "epoch": 1.53, "grad_norm": 0.5977765917778015, "learning_rate": 0.00028910958858136796, "loss": 3.0647, "step": 31315 }, { "epoch": 1.53, "grad_norm": 0.6861127614974976, "learning_rate": 0.00028909420255311815, "loss": 3.0568, "step": 31316 }, { "epoch": 1.53, "grad_norm": 0.6028975248336792, "learning_rate": 0.00028907881655359195, "loss": 3.0533, "step": 31317 }, { "epoch": 1.53, "grad_norm": 0.5990371108055115, "learning_rate": 0.0002890634305828302, "loss": 3.0781, "step": 31318 }, { "epoch": 1.53, "grad_norm": 0.6500790119171143, "learning_rate": 0.0002890480446408731, "loss": 2.9856, "step": 31319 }, { "epoch": 1.53, "grad_norm": 0.6449432969093323, "learning_rate": 0.0002890326587277614, "loss": 2.9427, "step": 31320 }, { "epoch": 1.53, "grad_norm": 0.6211156845092773, "learning_rate": 0.0002890172728435356, "loss": 2.9741, "step": 31321 }, { "epoch": 1.54, "grad_norm": 0.6242359280586243, "learning_rate": 0.0002890018869882361, "loss": 3.1796, "step": 31322 }, { "epoch": 1.54, "grad_norm": 0.6206004619598389, "learning_rate": 0.00028898650116190367, "loss": 3.1543, "step": 31323 }, { "epoch": 1.54, "grad_norm": 0.5952155590057373, "learning_rate": 0.0002889711153645786, "loss": 2.88, "step": 31324 }, { "epoch": 1.54, "grad_norm": 0.6566175222396851, "learning_rate": 0.00028895572959630137, "loss": 3.3069, "step": 31325 }, { "epoch": 1.54, "grad_norm": 0.5943531394004822, "learning_rate": 0.00028894034385711277, "loss": 3.0009, "step": 31326 }, { "epoch": 1.54, "grad_norm": 0.6611933708190918, "learning_rate": 0.0002889249581470531, "loss": 3.1812, "step": 31327 }, { "epoch": 1.54, "grad_norm": 0.6006651520729065, "learning_rate": 0.000288909572466163, "loss": 3.2205, "step": 31328 }, { "epoch": 1.54, "grad_norm": 0.6034921407699585, "learning_rate": 0.000288894186814483, "loss": 3.079, "step": 31329 }, { "epoch": 1.54, "grad_norm": 0.5508182644844055, "learning_rate": 0.0002888788011920534, "loss": 2.9034, "step": 31330 }, { "epoch": 1.54, "grad_norm": 0.596584141254425, "learning_rate": 0.0002888634155989151, "loss": 3.1913, "step": 31331 }, { "epoch": 1.54, "grad_norm": 0.5692295432090759, "learning_rate": 0.0002888480300351083, "loss": 3.1871, "step": 31332 }, { "epoch": 1.54, "grad_norm": 0.6114552617073059, "learning_rate": 0.0002888326445006738, "loss": 2.9601, "step": 31333 }, { "epoch": 1.54, "grad_norm": 0.6093324422836304, "learning_rate": 0.00028881725899565177, "loss": 3.0075, "step": 31334 }, { "epoch": 1.54, "grad_norm": 0.608741044998169, "learning_rate": 0.00028880187352008297, "loss": 3.0645, "step": 31335 }, { "epoch": 1.54, "grad_norm": 0.598630428314209, "learning_rate": 0.00028878648807400794, "loss": 3.1638, "step": 31336 }, { "epoch": 1.54, "grad_norm": 0.6114171743392944, "learning_rate": 0.00028877110265746706, "loss": 2.9811, "step": 31337 }, { "epoch": 1.54, "grad_norm": 0.6134542226791382, "learning_rate": 0.0002887557172705011, "loss": 3.0926, "step": 31338 }, { "epoch": 1.54, "grad_norm": 0.61723792552948, "learning_rate": 0.00028874033191315023, "loss": 3.0028, "step": 31339 }, { "epoch": 1.54, "grad_norm": 0.5689911842346191, "learning_rate": 0.00028872494658545517, "loss": 3.1265, "step": 31340 }, { "epoch": 1.54, "grad_norm": 0.6355405449867249, "learning_rate": 0.00028870956128745657, "loss": 3.0033, "step": 31341 }, { "epoch": 1.54, "grad_norm": 0.6212851405143738, "learning_rate": 0.00028869417601919475, "loss": 3.1319, "step": 31342 }, { "epoch": 1.54, "grad_norm": 0.600463330745697, "learning_rate": 0.00028867879078071034, "loss": 3.0882, "step": 31343 }, { "epoch": 1.54, "grad_norm": 0.6733449101448059, "learning_rate": 0.0002886634055720437, "loss": 2.8782, "step": 31344 }, { "epoch": 1.54, "grad_norm": 0.6149190068244934, "learning_rate": 0.00028864802039323546, "loss": 3.1114, "step": 31345 }, { "epoch": 1.54, "grad_norm": 0.6464491486549377, "learning_rate": 0.0002886326352443263, "loss": 2.9551, "step": 31346 }, { "epoch": 1.54, "grad_norm": 0.5949641466140747, "learning_rate": 0.0002886172501253564, "loss": 3.2199, "step": 31347 }, { "epoch": 1.54, "grad_norm": 0.6497898697853088, "learning_rate": 0.0002886018650363667, "loss": 2.9258, "step": 31348 }, { "epoch": 1.54, "grad_norm": 0.6072144508361816, "learning_rate": 0.0002885864799773973, "loss": 2.9873, "step": 31349 }, { "epoch": 1.54, "grad_norm": 0.5734956860542297, "learning_rate": 0.00028857109494848896, "loss": 2.9834, "step": 31350 }, { "epoch": 1.54, "grad_norm": 0.6253546476364136, "learning_rate": 0.00028855570994968223, "loss": 2.9745, "step": 31351 }, { "epoch": 1.54, "grad_norm": 0.5898392796516418, "learning_rate": 0.00028854032498101746, "loss": 2.8968, "step": 31352 }, { "epoch": 1.54, "grad_norm": 0.6153894662857056, "learning_rate": 0.00028852494004253537, "loss": 2.9332, "step": 31353 }, { "epoch": 1.54, "grad_norm": 0.6599181890487671, "learning_rate": 0.00028850955513427633, "loss": 3.1384, "step": 31354 }, { "epoch": 1.54, "grad_norm": 0.5734416842460632, "learning_rate": 0.00028849417025628084, "loss": 3.0424, "step": 31355 }, { "epoch": 1.54, "grad_norm": 0.5493830442428589, "learning_rate": 0.0002884787854085896, "loss": 3.0597, "step": 31356 }, { "epoch": 1.54, "grad_norm": 0.6523565649986267, "learning_rate": 0.000288463400591243, "loss": 3.3818, "step": 31357 }, { "epoch": 1.54, "grad_norm": 0.6114045977592468, "learning_rate": 0.00028844801580428167, "loss": 2.8229, "step": 31358 }, { "epoch": 1.54, "grad_norm": 0.5856912136077881, "learning_rate": 0.00028843263104774583, "loss": 3.0191, "step": 31359 }, { "epoch": 1.54, "grad_norm": 0.6086015105247498, "learning_rate": 0.00028841724632167634, "loss": 3.0111, "step": 31360 }, { "epoch": 1.54, "grad_norm": 0.5889554619789124, "learning_rate": 0.00028840186162611364, "loss": 3.1478, "step": 31361 }, { "epoch": 1.54, "grad_norm": 0.6440091133117676, "learning_rate": 0.00028838647696109807, "loss": 3.1964, "step": 31362 }, { "epoch": 1.54, "grad_norm": 0.6442698240280151, "learning_rate": 0.0002883710923266704, "loss": 3.0283, "step": 31363 }, { "epoch": 1.54, "grad_norm": 0.6014328002929688, "learning_rate": 0.000288355707722871, "loss": 3.1843, "step": 31364 }, { "epoch": 1.54, "grad_norm": 0.5934261679649353, "learning_rate": 0.0002883403231497403, "loss": 2.9702, "step": 31365 }, { "epoch": 1.54, "grad_norm": 0.6464361548423767, "learning_rate": 0.00028832493860731916, "loss": 3.0837, "step": 31366 }, { "epoch": 1.54, "grad_norm": 0.5907967686653137, "learning_rate": 0.0002883095540956477, "loss": 2.9704, "step": 31367 }, { "epoch": 1.54, "grad_norm": 0.5807782411575317, "learning_rate": 0.0002882941696147668, "loss": 3.0911, "step": 31368 }, { "epoch": 1.54, "grad_norm": 0.5743398666381836, "learning_rate": 0.0002882787851647167, "loss": 2.9713, "step": 31369 }, { "epoch": 1.54, "grad_norm": 0.5994200110435486, "learning_rate": 0.00028826340074553797, "loss": 2.9487, "step": 31370 }, { "epoch": 1.54, "grad_norm": 0.6094851493835449, "learning_rate": 0.0002882480163572713, "loss": 2.9808, "step": 31371 }, { "epoch": 1.54, "grad_norm": 0.6090477108955383, "learning_rate": 0.00028823263199995706, "loss": 3.0441, "step": 31372 }, { "epoch": 1.54, "grad_norm": 0.5785759091377258, "learning_rate": 0.00028821724767363584, "loss": 3.0751, "step": 31373 }, { "epoch": 1.54, "grad_norm": 0.5603683590888977, "learning_rate": 0.000288201863378348, "loss": 3.0937, "step": 31374 }, { "epoch": 1.54, "grad_norm": 0.5782989263534546, "learning_rate": 0.0002881864791141342, "loss": 2.9813, "step": 31375 }, { "epoch": 1.54, "grad_norm": 0.6115820407867432, "learning_rate": 0.0002881710948810351, "loss": 2.9611, "step": 31376 }, { "epoch": 1.54, "grad_norm": 0.6622043251991272, "learning_rate": 0.00028815571067909083, "loss": 3.142, "step": 31377 }, { "epoch": 1.54, "grad_norm": 0.6727275252342224, "learning_rate": 0.0002881403265083423, "loss": 3.1263, "step": 31378 }, { "epoch": 1.54, "grad_norm": 0.6339889764785767, "learning_rate": 0.0002881249423688299, "loss": 3.0685, "step": 31379 }, { "epoch": 1.54, "grad_norm": 0.6273970007896423, "learning_rate": 0.000288109558260594, "loss": 3.2247, "step": 31380 }, { "epoch": 1.54, "grad_norm": 0.6286791563034058, "learning_rate": 0.0002880941741836753, "loss": 3.0735, "step": 31381 }, { "epoch": 1.54, "grad_norm": 0.5971516370773315, "learning_rate": 0.0002880787901381142, "loss": 3.1196, "step": 31382 }, { "epoch": 1.54, "grad_norm": 0.5588302612304688, "learning_rate": 0.0002880634061239514, "loss": 2.7368, "step": 31383 }, { "epoch": 1.54, "grad_norm": 0.6372582912445068, "learning_rate": 0.0002880480221412271, "loss": 3.0035, "step": 31384 }, { "epoch": 1.54, "grad_norm": 0.6087130904197693, "learning_rate": 0.0002880326381899821, "loss": 3.1687, "step": 31385 }, { "epoch": 1.54, "grad_norm": 0.6057910919189453, "learning_rate": 0.00028801725427025687, "loss": 2.9205, "step": 31386 }, { "epoch": 1.54, "grad_norm": 0.5742743015289307, "learning_rate": 0.0002880018703820918, "loss": 3.0471, "step": 31387 }, { "epoch": 1.54, "grad_norm": 0.5914288759231567, "learning_rate": 0.0002879864865255276, "loss": 3.14, "step": 31388 }, { "epoch": 1.54, "grad_norm": 0.5800380706787109, "learning_rate": 0.00028797110270060457, "loss": 2.8114, "step": 31389 }, { "epoch": 1.54, "grad_norm": 0.5977968573570251, "learning_rate": 0.0002879557189073635, "loss": 3.0108, "step": 31390 }, { "epoch": 1.54, "grad_norm": 0.6388533711433411, "learning_rate": 0.0002879403351458446, "loss": 2.8719, "step": 31391 }, { "epoch": 1.54, "grad_norm": 0.5507797002792358, "learning_rate": 0.0002879249514160885, "loss": 3.0939, "step": 31392 }, { "epoch": 1.54, "grad_norm": 0.5805448293685913, "learning_rate": 0.0002879095677181359, "loss": 2.9328, "step": 31393 }, { "epoch": 1.54, "grad_norm": 0.5536087155342102, "learning_rate": 0.0002878941840520271, "loss": 3.1754, "step": 31394 }, { "epoch": 1.54, "grad_norm": 0.5905557870864868, "learning_rate": 0.00028787880041780277, "loss": 3.0814, "step": 31395 }, { "epoch": 1.54, "grad_norm": 0.6131752729415894, "learning_rate": 0.0002878634168155032, "loss": 2.9661, "step": 31396 }, { "epoch": 1.54, "grad_norm": 0.6142122149467468, "learning_rate": 0.0002878480332451691, "loss": 3.0028, "step": 31397 }, { "epoch": 1.54, "grad_norm": 0.6468976140022278, "learning_rate": 0.0002878326497068411, "loss": 2.9483, "step": 31398 }, { "epoch": 1.54, "grad_norm": 0.5881075859069824, "learning_rate": 0.0002878172662005593, "loss": 2.9752, "step": 31399 }, { "epoch": 1.54, "grad_norm": 0.5991531610488892, "learning_rate": 0.0002878018827263647, "loss": 2.8782, "step": 31400 }, { "epoch": 1.54, "grad_norm": 0.589621365070343, "learning_rate": 0.00028778649928429744, "loss": 3.1235, "step": 31401 }, { "epoch": 1.54, "grad_norm": 0.5604541301727295, "learning_rate": 0.0002877711158743982, "loss": 3.1901, "step": 31402 }, { "epoch": 1.54, "grad_norm": 0.6096484661102295, "learning_rate": 0.0002877557324967076, "loss": 3.0457, "step": 31403 }, { "epoch": 1.54, "grad_norm": 0.645102858543396, "learning_rate": 0.000287740349151266, "loss": 2.9014, "step": 31404 }, { "epoch": 1.54, "grad_norm": 0.6197732090950012, "learning_rate": 0.00028772496583811397, "loss": 2.9379, "step": 31405 }, { "epoch": 1.54, "grad_norm": 0.5692452788352966, "learning_rate": 0.00028770958255729194, "loss": 3.0261, "step": 31406 }, { "epoch": 1.54, "grad_norm": 0.6242355704307556, "learning_rate": 0.00028769419930884044, "loss": 2.9257, "step": 31407 }, { "epoch": 1.54, "grad_norm": 0.578525722026825, "learning_rate": 0.0002876788160928003, "loss": 3.1085, "step": 31408 }, { "epoch": 1.54, "grad_norm": 0.6010854840278625, "learning_rate": 0.0002876634329092116, "loss": 3.0628, "step": 31409 }, { "epoch": 1.54, "grad_norm": 0.5785937309265137, "learning_rate": 0.00028764804975811516, "loss": 2.8512, "step": 31410 }, { "epoch": 1.54, "grad_norm": 0.6024236083030701, "learning_rate": 0.00028763266663955125, "loss": 2.9083, "step": 31411 }, { "epoch": 1.54, "grad_norm": 0.5748933553695679, "learning_rate": 0.00028761728355356056, "loss": 2.9888, "step": 31412 }, { "epoch": 1.54, "grad_norm": 0.5996086597442627, "learning_rate": 0.00028760190050018366, "loss": 3.0, "step": 31413 }, { "epoch": 1.54, "grad_norm": 0.6583248376846313, "learning_rate": 0.0002875865174794608, "loss": 2.8246, "step": 31414 }, { "epoch": 1.54, "grad_norm": 0.64971524477005, "learning_rate": 0.00028757113449143283, "loss": 2.9814, "step": 31415 }, { "epoch": 1.54, "grad_norm": 0.6212257146835327, "learning_rate": 0.00028755575153614, "loss": 2.9499, "step": 31416 }, { "epoch": 1.54, "grad_norm": 0.6192092895507812, "learning_rate": 0.0002875403686136229, "loss": 3.067, "step": 31417 }, { "epoch": 1.54, "grad_norm": 0.588834285736084, "learning_rate": 0.0002875249857239222, "loss": 3.134, "step": 31418 }, { "epoch": 1.54, "grad_norm": 0.5852153897285461, "learning_rate": 0.0002875096028670782, "loss": 3.1103, "step": 31419 }, { "epoch": 1.54, "grad_norm": 0.6158280372619629, "learning_rate": 0.00028749422004313157, "loss": 2.9628, "step": 31420 }, { "epoch": 1.54, "grad_norm": 0.6489527225494385, "learning_rate": 0.0002874788372521226, "loss": 3.1299, "step": 31421 }, { "epoch": 1.54, "grad_norm": 0.5668579339981079, "learning_rate": 0.00028746345449409206, "loss": 2.8412, "step": 31422 }, { "epoch": 1.54, "grad_norm": 0.6310492753982544, "learning_rate": 0.0002874480717690804, "loss": 3.1859, "step": 31423 }, { "epoch": 1.54, "grad_norm": 0.5989445447921753, "learning_rate": 0.00028743268907712805, "loss": 3.0767, "step": 31424 }, { "epoch": 1.54, "grad_norm": 0.5920668244361877, "learning_rate": 0.0002874173064182756, "loss": 2.8711, "step": 31425 }, { "epoch": 1.54, "grad_norm": 0.5631740093231201, "learning_rate": 0.00028740192379256356, "loss": 3.1315, "step": 31426 }, { "epoch": 1.54, "grad_norm": 0.613795816898346, "learning_rate": 0.00028738654120003237, "loss": 2.8111, "step": 31427 }, { "epoch": 1.54, "grad_norm": 0.6500447988510132, "learning_rate": 0.0002873711586407227, "loss": 3.0035, "step": 31428 }, { "epoch": 1.54, "grad_norm": 0.595923662185669, "learning_rate": 0.00028735577611467485, "loss": 3.0004, "step": 31429 }, { "epoch": 1.54, "grad_norm": 0.5796763896942139, "learning_rate": 0.00028734039362192956, "loss": 3.0754, "step": 31430 }, { "epoch": 1.54, "grad_norm": 0.6116151213645935, "learning_rate": 0.0002873250111625272, "loss": 3.1244, "step": 31431 }, { "epoch": 1.54, "grad_norm": 0.6706094741821289, "learning_rate": 0.00028730962873650825, "loss": 3.3772, "step": 31432 }, { "epoch": 1.54, "grad_norm": 0.5975269079208374, "learning_rate": 0.0002872942463439134, "loss": 3.1309, "step": 31433 }, { "epoch": 1.54, "grad_norm": 0.5869545340538025, "learning_rate": 0.000287278863984783, "loss": 3.2301, "step": 31434 }, { "epoch": 1.54, "grad_norm": 0.5768601298332214, "learning_rate": 0.0002872634816591577, "loss": 2.9476, "step": 31435 }, { "epoch": 1.54, "grad_norm": 0.614723265171051, "learning_rate": 0.0002872480993670778, "loss": 3.1834, "step": 31436 }, { "epoch": 1.54, "grad_norm": 0.5859989523887634, "learning_rate": 0.00028723271710858405, "loss": 3.0171, "step": 31437 }, { "epoch": 1.54, "grad_norm": 0.6272194981575012, "learning_rate": 0.0002872173348837169, "loss": 3.0156, "step": 31438 }, { "epoch": 1.54, "grad_norm": 0.6279314756393433, "learning_rate": 0.0002872019526925167, "loss": 2.7363, "step": 31439 }, { "epoch": 1.54, "grad_norm": 0.5947081446647644, "learning_rate": 0.00028718657053502427, "loss": 3.1848, "step": 31440 }, { "epoch": 1.54, "grad_norm": 0.619102954864502, "learning_rate": 0.0002871711884112798, "loss": 2.929, "step": 31441 }, { "epoch": 1.54, "grad_norm": 0.6086522936820984, "learning_rate": 0.00028715580632132393, "loss": 3.2284, "step": 31442 }, { "epoch": 1.54, "grad_norm": 0.6687970757484436, "learning_rate": 0.0002871404242651973, "loss": 2.9145, "step": 31443 }, { "epoch": 1.54, "grad_norm": 0.5713397860527039, "learning_rate": 0.0002871250422429403, "loss": 2.8666, "step": 31444 }, { "epoch": 1.54, "grad_norm": 0.5800654292106628, "learning_rate": 0.00028710966025459353, "loss": 3.0257, "step": 31445 }, { "epoch": 1.54, "grad_norm": 0.6090438365936279, "learning_rate": 0.00028709427830019723, "loss": 2.9861, "step": 31446 }, { "epoch": 1.54, "grad_norm": 0.5880951285362244, "learning_rate": 0.00028707889637979224, "loss": 3.0089, "step": 31447 }, { "epoch": 1.54, "grad_norm": 0.6175966858863831, "learning_rate": 0.000287063514493419, "loss": 3.0327, "step": 31448 }, { "epoch": 1.54, "grad_norm": 0.602347195148468, "learning_rate": 0.0002870481326411179, "loss": 3.0266, "step": 31449 }, { "epoch": 1.54, "grad_norm": 0.6162763833999634, "learning_rate": 0.0002870327508229296, "loss": 3.1335, "step": 31450 }, { "epoch": 1.54, "grad_norm": 0.6215873956680298, "learning_rate": 0.0002870173690388944, "loss": 3.0356, "step": 31451 }, { "epoch": 1.54, "grad_norm": 0.6079941391944885, "learning_rate": 0.000287001987289053, "loss": 3.0005, "step": 31452 }, { "epoch": 1.54, "grad_norm": 0.5799838304519653, "learning_rate": 0.00028698660557344597, "loss": 3.1835, "step": 31453 }, { "epoch": 1.54, "grad_norm": 0.5891263484954834, "learning_rate": 0.0002869712238921136, "loss": 2.8755, "step": 31454 }, { "epoch": 1.54, "grad_norm": 0.5852018594741821, "learning_rate": 0.00028695584224509656, "loss": 3.0743, "step": 31455 }, { "epoch": 1.54, "grad_norm": 0.6360331177711487, "learning_rate": 0.00028694046063243537, "loss": 3.1777, "step": 31456 }, { "epoch": 1.54, "grad_norm": 0.5894647240638733, "learning_rate": 0.0002869250790541704, "loss": 3.2482, "step": 31457 }, { "epoch": 1.54, "grad_norm": 0.5866122841835022, "learning_rate": 0.0002869096975103423, "loss": 2.9541, "step": 31458 }, { "epoch": 1.54, "grad_norm": 0.5750760436058044, "learning_rate": 0.00028689431600099154, "loss": 3.1921, "step": 31459 }, { "epoch": 1.54, "grad_norm": 0.6584492325782776, "learning_rate": 0.0002868789345261587, "loss": 2.9085, "step": 31460 }, { "epoch": 1.54, "grad_norm": 0.6034347414970398, "learning_rate": 0.00028686355308588405, "loss": 3.332, "step": 31461 }, { "epoch": 1.54, "grad_norm": 0.581174910068512, "learning_rate": 0.0002868481716802084, "loss": 2.9919, "step": 31462 }, { "epoch": 1.54, "grad_norm": 0.616607129573822, "learning_rate": 0.00028683279030917216, "loss": 2.9166, "step": 31463 }, { "epoch": 1.54, "grad_norm": 0.6416230201721191, "learning_rate": 0.0002868174089728157, "loss": 2.944, "step": 31464 }, { "epoch": 1.54, "grad_norm": 0.616322934627533, "learning_rate": 0.0002868020276711798, "loss": 2.9355, "step": 31465 }, { "epoch": 1.54, "grad_norm": 0.5935969948768616, "learning_rate": 0.0002867866464043047, "loss": 3.1532, "step": 31466 }, { "epoch": 1.54, "grad_norm": 0.6162576079368591, "learning_rate": 0.00028677126517223113, "loss": 2.867, "step": 31467 }, { "epoch": 1.54, "grad_norm": 0.6141955852508545, "learning_rate": 0.00028675588397499935, "loss": 2.8846, "step": 31468 }, { "epoch": 1.54, "grad_norm": 0.5820909142494202, "learning_rate": 0.00028674050281265003, "loss": 3.1472, "step": 31469 }, { "epoch": 1.54, "grad_norm": 0.6148683428764343, "learning_rate": 0.00028672512168522384, "loss": 2.9993, "step": 31470 }, { "epoch": 1.54, "grad_norm": 0.5928391218185425, "learning_rate": 0.000286709740592761, "loss": 2.9203, "step": 31471 }, { "epoch": 1.54, "grad_norm": 0.5849189758300781, "learning_rate": 0.0002866943595353023, "loss": 2.995, "step": 31472 }, { "epoch": 1.54, "grad_norm": 0.6054073572158813, "learning_rate": 0.00028667897851288786, "loss": 3.1151, "step": 31473 }, { "epoch": 1.54, "grad_norm": 0.5830089449882507, "learning_rate": 0.00028666359752555854, "loss": 3.001, "step": 31474 }, { "epoch": 1.54, "grad_norm": 0.565097451210022, "learning_rate": 0.00028664821657335476, "loss": 3.1232, "step": 31475 }, { "epoch": 1.54, "grad_norm": 0.7242405414581299, "learning_rate": 0.0002866328356563169, "loss": 2.8098, "step": 31476 }, { "epoch": 1.54, "grad_norm": 0.5993044376373291, "learning_rate": 0.0002866174547744858, "loss": 3.0741, "step": 31477 }, { "epoch": 1.54, "grad_norm": 0.5751321911811829, "learning_rate": 0.0002866020739279016, "loss": 2.8981, "step": 31478 }, { "epoch": 1.54, "grad_norm": 0.6080617308616638, "learning_rate": 0.0002865866931166049, "loss": 3.0646, "step": 31479 }, { "epoch": 1.54, "grad_norm": 0.6069605350494385, "learning_rate": 0.0002865713123406364, "loss": 2.967, "step": 31480 }, { "epoch": 1.54, "grad_norm": 0.6173204183578491, "learning_rate": 0.0002865559316000364, "loss": 2.9911, "step": 31481 }, { "epoch": 1.54, "grad_norm": 0.5786520838737488, "learning_rate": 0.0002865405508948456, "loss": 2.7597, "step": 31482 }, { "epoch": 1.54, "grad_norm": 0.6026302576065063, "learning_rate": 0.0002865251702251042, "loss": 3.1053, "step": 31483 }, { "epoch": 1.54, "grad_norm": 0.5935983061790466, "learning_rate": 0.00028650978959085305, "loss": 3.092, "step": 31484 }, { "epoch": 1.54, "grad_norm": 0.6370457410812378, "learning_rate": 0.0002864944089921325, "loss": 3.0275, "step": 31485 }, { "epoch": 1.54, "grad_norm": 0.6145215630531311, "learning_rate": 0.00028647902842898304, "loss": 3.1223, "step": 31486 }, { "epoch": 1.54, "grad_norm": 0.6142358779907227, "learning_rate": 0.00028646364790144526, "loss": 3.0543, "step": 31487 }, { "epoch": 1.54, "grad_norm": 0.5921164155006409, "learning_rate": 0.00028644826740955963, "loss": 2.9785, "step": 31488 }, { "epoch": 1.54, "grad_norm": 0.6127771139144897, "learning_rate": 0.0002864328869533666, "loss": 3.1838, "step": 31489 }, { "epoch": 1.54, "grad_norm": 0.6095576882362366, "learning_rate": 0.0002864175065329068, "loss": 2.9457, "step": 31490 }, { "epoch": 1.54, "grad_norm": 0.5726305842399597, "learning_rate": 0.00028640212614822056, "loss": 2.9453, "step": 31491 }, { "epoch": 1.54, "grad_norm": 0.5958991050720215, "learning_rate": 0.0002863867457993486, "loss": 2.8155, "step": 31492 }, { "epoch": 1.54, "grad_norm": 0.5724688768386841, "learning_rate": 0.00028637136548633136, "loss": 2.9964, "step": 31493 }, { "epoch": 1.54, "grad_norm": 0.6108876466751099, "learning_rate": 0.0002863559852092092, "loss": 3.0757, "step": 31494 }, { "epoch": 1.54, "grad_norm": 0.5869536399841309, "learning_rate": 0.00028634060496802283, "loss": 2.9439, "step": 31495 }, { "epoch": 1.54, "grad_norm": 0.6103752255439758, "learning_rate": 0.0002863252247628127, "loss": 3.0208, "step": 31496 }, { "epoch": 1.54, "grad_norm": 0.6240681409835815, "learning_rate": 0.0002863098445936194, "loss": 2.8288, "step": 31497 }, { "epoch": 1.54, "grad_norm": 0.5823042988777161, "learning_rate": 0.0002862944644604831, "loss": 2.9939, "step": 31498 }, { "epoch": 1.54, "grad_norm": 0.5619244575500488, "learning_rate": 0.00028627908436344463, "loss": 3.0009, "step": 31499 }, { "epoch": 1.54, "grad_norm": 0.652056097984314, "learning_rate": 0.0002862637043025445, "loss": 3.3113, "step": 31500 }, { "epoch": 1.54, "grad_norm": 0.5911815166473389, "learning_rate": 0.0002862483242778229, "loss": 3.1164, "step": 31501 }, { "epoch": 1.54, "grad_norm": 0.6071313619613647, "learning_rate": 0.0002862329442893208, "loss": 3.1199, "step": 31502 }, { "epoch": 1.54, "grad_norm": 0.6353355646133423, "learning_rate": 0.00028621756433707844, "loss": 2.8222, "step": 31503 }, { "epoch": 1.54, "grad_norm": 0.6127616167068481, "learning_rate": 0.00028620218442113623, "loss": 3.1173, "step": 31504 }, { "epoch": 1.54, "grad_norm": 0.6205554008483887, "learning_rate": 0.00028618680454153497, "loss": 3.0187, "step": 31505 }, { "epoch": 1.54, "grad_norm": 0.5830715298652649, "learning_rate": 0.000286171424698315, "loss": 2.8732, "step": 31506 }, { "epoch": 1.54, "grad_norm": 0.6005604267120361, "learning_rate": 0.0002861560448915168, "loss": 3.1406, "step": 31507 }, { "epoch": 1.54, "grad_norm": 0.5887455344200134, "learning_rate": 0.0002861406651211808, "loss": 3.1733, "step": 31508 }, { "epoch": 1.54, "grad_norm": 0.5852056741714478, "learning_rate": 0.0002861252853873476, "loss": 3.0398, "step": 31509 }, { "epoch": 1.54, "grad_norm": 0.6041200160980225, "learning_rate": 0.000286109905690058, "loss": 2.9279, "step": 31510 }, { "epoch": 1.54, "grad_norm": 0.590787410736084, "learning_rate": 0.000286094526029352, "loss": 2.9335, "step": 31511 }, { "epoch": 1.54, "grad_norm": 0.5768125653266907, "learning_rate": 0.00028607914640527046, "loss": 3.1014, "step": 31512 }, { "epoch": 1.54, "grad_norm": 0.6092056035995483, "learning_rate": 0.00028606376681785366, "loss": 3.0408, "step": 31513 }, { "epoch": 1.54, "grad_norm": 0.6362078189849854, "learning_rate": 0.0002860483872671423, "loss": 2.8638, "step": 31514 }, { "epoch": 1.54, "grad_norm": 0.5419045686721802, "learning_rate": 0.00028603300775317683, "loss": 3.0463, "step": 31515 }, { "epoch": 1.54, "grad_norm": 0.5694605112075806, "learning_rate": 0.00028601762827599764, "loss": 2.8361, "step": 31516 }, { "epoch": 1.54, "grad_norm": 0.6117928624153137, "learning_rate": 0.00028600224883564544, "loss": 3.0969, "step": 31517 }, { "epoch": 1.54, "grad_norm": 0.5907307863235474, "learning_rate": 0.00028598686943216055, "loss": 3.1432, "step": 31518 }, { "epoch": 1.54, "grad_norm": 0.5980222821235657, "learning_rate": 0.00028597149006558343, "loss": 2.9284, "step": 31519 }, { "epoch": 1.54, "grad_norm": 0.6280714869499207, "learning_rate": 0.0002859561107359549, "loss": 2.811, "step": 31520 }, { "epoch": 1.54, "grad_norm": 0.5964347720146179, "learning_rate": 0.0002859407314433152, "loss": 2.8092, "step": 31521 }, { "epoch": 1.54, "grad_norm": 0.5739207863807678, "learning_rate": 0.00028592535218770497, "loss": 3.0099, "step": 31522 }, { "epoch": 1.54, "grad_norm": 0.5974461436271667, "learning_rate": 0.00028590997296916454, "loss": 2.9818, "step": 31523 }, { "epoch": 1.54, "grad_norm": 0.6324328184127808, "learning_rate": 0.00028589459378773456, "loss": 3.3268, "step": 31524 }, { "epoch": 1.54, "grad_norm": 0.5549293756484985, "learning_rate": 0.0002858792146434556, "loss": 3.1289, "step": 31525 }, { "epoch": 1.55, "grad_norm": 0.5910172462463379, "learning_rate": 0.00028586383553636793, "loss": 3.0805, "step": 31526 }, { "epoch": 1.55, "grad_norm": 0.5912405848503113, "learning_rate": 0.0002858484564665123, "loss": 3.2192, "step": 31527 }, { "epoch": 1.55, "grad_norm": 0.6898330450057983, "learning_rate": 0.0002858330774339291, "loss": 2.9639, "step": 31528 }, { "epoch": 1.55, "grad_norm": 0.6082789897918701, "learning_rate": 0.00028581769843865875, "loss": 2.9877, "step": 31529 }, { "epoch": 1.55, "grad_norm": 0.6206566691398621, "learning_rate": 0.000285802319480742, "loss": 3.1703, "step": 31530 }, { "epoch": 1.55, "grad_norm": 0.5829499363899231, "learning_rate": 0.000285786940560219, "loss": 3.1095, "step": 31531 }, { "epoch": 1.55, "grad_norm": 0.5791610479354858, "learning_rate": 0.0002857715616771307, "loss": 2.9349, "step": 31532 }, { "epoch": 1.55, "grad_norm": 0.6252768635749817, "learning_rate": 0.00028575618283151725, "loss": 3.0773, "step": 31533 }, { "epoch": 1.55, "grad_norm": 0.5908172130584717, "learning_rate": 0.0002857408040234192, "loss": 2.9155, "step": 31534 }, { "epoch": 1.55, "grad_norm": 0.5993767976760864, "learning_rate": 0.0002857254252528773, "loss": 3.1915, "step": 31535 }, { "epoch": 1.55, "grad_norm": 0.615296483039856, "learning_rate": 0.0002857100465199318, "loss": 2.9795, "step": 31536 }, { "epoch": 1.55, "grad_norm": 0.57916659116745, "learning_rate": 0.00028569466782462337, "loss": 3.1722, "step": 31537 }, { "epoch": 1.55, "grad_norm": 0.597743570804596, "learning_rate": 0.0002856792891669922, "loss": 2.8156, "step": 31538 }, { "epoch": 1.55, "grad_norm": 0.5725197792053223, "learning_rate": 0.0002856639105470792, "loss": 2.9045, "step": 31539 }, { "epoch": 1.55, "grad_norm": 0.5700836777687073, "learning_rate": 0.00028564853196492475, "loss": 3.203, "step": 31540 }, { "epoch": 1.55, "grad_norm": 0.6204779148101807, "learning_rate": 0.00028563315342056914, "loss": 3.0341, "step": 31541 }, { "epoch": 1.55, "grad_norm": 0.6638866066932678, "learning_rate": 0.00028561777491405317, "loss": 2.8039, "step": 31542 }, { "epoch": 1.55, "grad_norm": 0.6386014819145203, "learning_rate": 0.0002856023964454172, "loss": 3.016, "step": 31543 }, { "epoch": 1.55, "grad_norm": 0.5822014212608337, "learning_rate": 0.0002855870180147018, "loss": 2.9548, "step": 31544 }, { "epoch": 1.55, "grad_norm": 0.586982786655426, "learning_rate": 0.00028557163962194724, "loss": 2.9768, "step": 31545 }, { "epoch": 1.55, "grad_norm": 0.5915098786354065, "learning_rate": 0.0002855562612671943, "loss": 3.1178, "step": 31546 }, { "epoch": 1.55, "grad_norm": 0.5712721943855286, "learning_rate": 0.0002855408829504835, "loss": 2.9357, "step": 31547 }, { "epoch": 1.55, "grad_norm": 0.643669605255127, "learning_rate": 0.000285525504671855, "loss": 2.9061, "step": 31548 }, { "epoch": 1.55, "grad_norm": 0.5977221727371216, "learning_rate": 0.0002855101264313497, "loss": 2.989, "step": 31549 }, { "epoch": 1.55, "grad_norm": 0.6182084679603577, "learning_rate": 0.0002854947482290079, "loss": 3.1102, "step": 31550 }, { "epoch": 1.55, "grad_norm": 0.6026540994644165, "learning_rate": 0.0002854793700648701, "loss": 3.0948, "step": 31551 }, { "epoch": 1.55, "grad_norm": 0.6409699320793152, "learning_rate": 0.00028546399193897694, "loss": 2.8929, "step": 31552 }, { "epoch": 1.55, "grad_norm": 0.5884620547294617, "learning_rate": 0.0002854486138513687, "loss": 3.3798, "step": 31553 }, { "epoch": 1.55, "grad_norm": 0.5860359072685242, "learning_rate": 0.0002854332358020861, "loss": 3.1201, "step": 31554 }, { "epoch": 1.55, "grad_norm": 0.6279225945472717, "learning_rate": 0.0002854178577911696, "loss": 3.0676, "step": 31555 }, { "epoch": 1.55, "grad_norm": 0.6590830087661743, "learning_rate": 0.00028540247981865944, "loss": 3.0733, "step": 31556 }, { "epoch": 1.55, "grad_norm": 0.6351860165596008, "learning_rate": 0.0002853871018845966, "loss": 2.8937, "step": 31557 }, { "epoch": 1.55, "grad_norm": 0.6308092474937439, "learning_rate": 0.0002853717239890212, "loss": 2.96, "step": 31558 }, { "epoch": 1.55, "grad_norm": 0.6065652370452881, "learning_rate": 0.00028535634613197395, "loss": 3.285, "step": 31559 }, { "epoch": 1.55, "grad_norm": 0.5910200476646423, "learning_rate": 0.0002853409683134951, "loss": 2.7821, "step": 31560 }, { "epoch": 1.55, "grad_norm": 0.624097466468811, "learning_rate": 0.0002853255905336254, "loss": 3.0966, "step": 31561 }, { "epoch": 1.55, "grad_norm": 0.6241322159767151, "learning_rate": 0.0002853102127924054, "loss": 3.0899, "step": 31562 }, { "epoch": 1.55, "grad_norm": 0.6398972272872925, "learning_rate": 0.00028529483508987524, "loss": 3.0438, "step": 31563 }, { "epoch": 1.55, "grad_norm": 0.601036548614502, "learning_rate": 0.0002852794574260758, "loss": 3.2424, "step": 31564 }, { "epoch": 1.55, "grad_norm": 0.610298216342926, "learning_rate": 0.0002852640798010474, "loss": 3.0497, "step": 31565 }, { "epoch": 1.55, "grad_norm": 0.5706086754798889, "learning_rate": 0.00028524870221483054, "loss": 2.9957, "step": 31566 }, { "epoch": 1.55, "grad_norm": 0.5881780385971069, "learning_rate": 0.00028523332466746584, "loss": 3.1404, "step": 31567 }, { "epoch": 1.55, "grad_norm": 0.558418333530426, "learning_rate": 0.0002852179471589937, "loss": 2.9538, "step": 31568 }, { "epoch": 1.55, "grad_norm": 0.5936317443847656, "learning_rate": 0.00028520256968945465, "loss": 2.7039, "step": 31569 }, { "epoch": 1.55, "grad_norm": 0.6009182929992676, "learning_rate": 0.0002851871922588891, "loss": 3.1115, "step": 31570 }, { "epoch": 1.55, "grad_norm": 0.6169144511222839, "learning_rate": 0.00028517181486733764, "loss": 3.127, "step": 31571 }, { "epoch": 1.55, "grad_norm": 0.6200720071792603, "learning_rate": 0.00028515643751484093, "loss": 3.0668, "step": 31572 }, { "epoch": 1.55, "grad_norm": 0.6260064244270325, "learning_rate": 0.00028514106020143914, "loss": 3.2649, "step": 31573 }, { "epoch": 1.55, "grad_norm": 0.5613835453987122, "learning_rate": 0.00028512568292717304, "loss": 3.1029, "step": 31574 }, { "epoch": 1.55, "grad_norm": 0.6062586903572083, "learning_rate": 0.0002851103056920829, "loss": 2.9356, "step": 31575 }, { "epoch": 1.55, "grad_norm": 0.6101064085960388, "learning_rate": 0.00028509492849620945, "loss": 2.9583, "step": 31576 }, { "epoch": 1.55, "grad_norm": 0.5925574898719788, "learning_rate": 0.0002850795513395931, "loss": 3.092, "step": 31577 }, { "epoch": 1.55, "grad_norm": 0.5718997120857239, "learning_rate": 0.00028506417422227427, "loss": 3.0322, "step": 31578 }, { "epoch": 1.55, "grad_norm": 0.6322290897369385, "learning_rate": 0.0002850487971442936, "loss": 2.9443, "step": 31579 }, { "epoch": 1.55, "grad_norm": 0.585167646408081, "learning_rate": 0.00028503342010569147, "loss": 2.9556, "step": 31580 }, { "epoch": 1.55, "grad_norm": 0.5464421510696411, "learning_rate": 0.00028501804310650844, "loss": 2.9251, "step": 31581 }, { "epoch": 1.55, "grad_norm": 0.5760757327079773, "learning_rate": 0.00028500266614678505, "loss": 3.1459, "step": 31582 }, { "epoch": 1.55, "grad_norm": 0.5659793615341187, "learning_rate": 0.0002849872892265617, "loss": 3.1723, "step": 31583 }, { "epoch": 1.55, "grad_norm": 0.64796382188797, "learning_rate": 0.00028497191234587904, "loss": 2.8646, "step": 31584 }, { "epoch": 1.55, "grad_norm": 0.7371762990951538, "learning_rate": 0.0002849565355047773, "loss": 3.0312, "step": 31585 }, { "epoch": 1.55, "grad_norm": 0.5944815278053284, "learning_rate": 0.00028494115870329723, "loss": 2.9846, "step": 31586 }, { "epoch": 1.55, "grad_norm": 0.5763345956802368, "learning_rate": 0.0002849257819414793, "loss": 2.9265, "step": 31587 }, { "epoch": 1.55, "grad_norm": 0.5970070958137512, "learning_rate": 0.00028491040521936385, "loss": 3.0161, "step": 31588 }, { "epoch": 1.55, "grad_norm": 0.594948410987854, "learning_rate": 0.00028489502853699163, "loss": 3.1838, "step": 31589 }, { "epoch": 1.55, "grad_norm": 0.6008180975914001, "learning_rate": 0.0002848796518944029, "loss": 3.0902, "step": 31590 }, { "epoch": 1.55, "grad_norm": 0.5951430201530457, "learning_rate": 0.00028486427529163824, "loss": 3.0587, "step": 31591 }, { "epoch": 1.55, "grad_norm": 0.607059121131897, "learning_rate": 0.0002848488987287382, "loss": 3.0581, "step": 31592 }, { "epoch": 1.55, "grad_norm": 0.600946307182312, "learning_rate": 0.00028483352220574324, "loss": 3.1581, "step": 31593 }, { "epoch": 1.55, "grad_norm": 0.5530339479446411, "learning_rate": 0.0002848181457226939, "loss": 3.1023, "step": 31594 }, { "epoch": 1.55, "grad_norm": 0.6456699371337891, "learning_rate": 0.0002848027692796306, "loss": 3.1343, "step": 31595 }, { "epoch": 1.55, "grad_norm": 0.5949581265449524, "learning_rate": 0.00028478739287659383, "loss": 3.0981, "step": 31596 }, { "epoch": 1.55, "grad_norm": 0.6152155995368958, "learning_rate": 0.00028477201651362427, "loss": 2.957, "step": 31597 }, { "epoch": 1.55, "grad_norm": 0.6344307065010071, "learning_rate": 0.0002847566401907622, "loss": 2.9609, "step": 31598 }, { "epoch": 1.55, "grad_norm": 0.5822964906692505, "learning_rate": 0.00028474126390804833, "loss": 3.0524, "step": 31599 }, { "epoch": 1.55, "grad_norm": 0.5847934484481812, "learning_rate": 0.00028472588766552284, "loss": 2.9667, "step": 31600 }, { "epoch": 1.55, "grad_norm": 0.5926794409751892, "learning_rate": 0.0002847105114632265, "loss": 3.0349, "step": 31601 }, { "epoch": 1.55, "grad_norm": 0.5681343078613281, "learning_rate": 0.00028469513530119983, "loss": 3.2188, "step": 31602 }, { "epoch": 1.55, "grad_norm": 0.5752440690994263, "learning_rate": 0.0002846797591794831, "loss": 3.0376, "step": 31603 }, { "epoch": 1.55, "grad_norm": 0.6142379641532898, "learning_rate": 0.00028466438309811704, "loss": 3.1508, "step": 31604 }, { "epoch": 1.55, "grad_norm": 0.6223300099372864, "learning_rate": 0.00028464900705714197, "loss": 2.9159, "step": 31605 }, { "epoch": 1.55, "grad_norm": 0.6309249401092529, "learning_rate": 0.00028463363105659837, "loss": 2.8823, "step": 31606 }, { "epoch": 1.55, "grad_norm": 0.5628587603569031, "learning_rate": 0.000284618255096527, "loss": 3.1089, "step": 31607 }, { "epoch": 1.55, "grad_norm": 0.6215999126434326, "learning_rate": 0.0002846028791769681, "loss": 3.1475, "step": 31608 }, { "epoch": 1.55, "grad_norm": 0.5789870619773865, "learning_rate": 0.0002845875032979624, "loss": 3.0857, "step": 31609 }, { "epoch": 1.55, "grad_norm": 0.5938981771469116, "learning_rate": 0.00028457212745955, "loss": 2.958, "step": 31610 }, { "epoch": 1.55, "grad_norm": 0.5996585488319397, "learning_rate": 0.00028455675166177177, "loss": 2.9466, "step": 31611 }, { "epoch": 1.55, "grad_norm": 0.5623268485069275, "learning_rate": 0.00028454137590466815, "loss": 2.8726, "step": 31612 }, { "epoch": 1.55, "grad_norm": 0.5604333281517029, "learning_rate": 0.00028452600018827947, "loss": 3.3238, "step": 31613 }, { "epoch": 1.55, "grad_norm": 0.6165540218353271, "learning_rate": 0.0002845106245126464, "loss": 2.9483, "step": 31614 }, { "epoch": 1.55, "grad_norm": 0.5510979890823364, "learning_rate": 0.00028449524887780926, "loss": 2.9379, "step": 31615 }, { "epoch": 1.55, "grad_norm": 0.5766331553459167, "learning_rate": 0.0002844798732838087, "loss": 3.0955, "step": 31616 }, { "epoch": 1.55, "grad_norm": 0.6156991124153137, "learning_rate": 0.00028446449773068523, "loss": 2.9406, "step": 31617 }, { "epoch": 1.55, "grad_norm": 0.6004645824432373, "learning_rate": 0.0002844491222184792, "loss": 3.0694, "step": 31618 }, { "epoch": 1.55, "grad_norm": 0.6063745021820068, "learning_rate": 0.0002844337467472313, "loss": 2.8684, "step": 31619 }, { "epoch": 1.55, "grad_norm": 0.5919575691223145, "learning_rate": 0.0002844183713169818, "loss": 3.1631, "step": 31620 }, { "epoch": 1.55, "grad_norm": 0.5818456411361694, "learning_rate": 0.0002844029959277713, "loss": 2.9513, "step": 31621 }, { "epoch": 1.55, "grad_norm": 0.5850279927253723, "learning_rate": 0.0002843876205796404, "loss": 3.0365, "step": 31622 }, { "epoch": 1.55, "grad_norm": 0.590082049369812, "learning_rate": 0.00028437224527262943, "loss": 2.9559, "step": 31623 }, { "epoch": 1.55, "grad_norm": 0.8791807889938354, "learning_rate": 0.0002843568700067791, "loss": 2.9216, "step": 31624 }, { "epoch": 1.55, "grad_norm": 0.593088686466217, "learning_rate": 0.00028434149478212957, "loss": 3.1052, "step": 31625 }, { "epoch": 1.55, "grad_norm": 0.6907350420951843, "learning_rate": 0.00028432611959872166, "loss": 3.1177, "step": 31626 }, { "epoch": 1.55, "grad_norm": 0.5803176164627075, "learning_rate": 0.00028431074445659573, "loss": 3.0874, "step": 31627 }, { "epoch": 1.55, "grad_norm": 0.627086877822876, "learning_rate": 0.0002842953693557921, "loss": 3.0477, "step": 31628 }, { "epoch": 1.55, "grad_norm": 0.6301358938217163, "learning_rate": 0.0002842799942963517, "loss": 3.1221, "step": 31629 }, { "epoch": 1.55, "grad_norm": 0.5863131284713745, "learning_rate": 0.0002842646192783146, "loss": 3.176, "step": 31630 }, { "epoch": 1.55, "grad_norm": 0.6010187864303589, "learning_rate": 0.0002842492443017216, "loss": 3.2, "step": 31631 }, { "epoch": 1.55, "grad_norm": 0.5959247946739197, "learning_rate": 0.0002842338693666129, "loss": 2.8586, "step": 31632 }, { "epoch": 1.55, "grad_norm": 0.5971164107322693, "learning_rate": 0.0002842184944730292, "loss": 2.8364, "step": 31633 }, { "epoch": 1.55, "grad_norm": 0.6169106960296631, "learning_rate": 0.000284203119621011, "loss": 2.9412, "step": 31634 }, { "epoch": 1.55, "grad_norm": 0.6330366730690002, "learning_rate": 0.0002841877448105987, "loss": 3.2115, "step": 31635 }, { "epoch": 1.55, "grad_norm": 0.6117235422134399, "learning_rate": 0.00028417237004183297, "loss": 2.8728, "step": 31636 }, { "epoch": 1.55, "grad_norm": 0.5777655839920044, "learning_rate": 0.00028415699531475394, "loss": 2.9479, "step": 31637 }, { "epoch": 1.55, "grad_norm": 0.5683594346046448, "learning_rate": 0.0002841416206294025, "loss": 3.1955, "step": 31638 }, { "epoch": 1.55, "grad_norm": 0.6049903631210327, "learning_rate": 0.00028412624598581895, "loss": 3.1881, "step": 31639 }, { "epoch": 1.55, "grad_norm": 0.5954413414001465, "learning_rate": 0.0002841108713840437, "loss": 3.0535, "step": 31640 }, { "epoch": 1.55, "grad_norm": 0.6370765566825867, "learning_rate": 0.00028409549682411754, "loss": 3.0142, "step": 31641 }, { "epoch": 1.55, "grad_norm": 0.5675783753395081, "learning_rate": 0.0002840801223060807, "loss": 3.1618, "step": 31642 }, { "epoch": 1.55, "grad_norm": 0.6547769904136658, "learning_rate": 0.00028406474782997367, "loss": 3.2298, "step": 31643 }, { "epoch": 1.55, "grad_norm": 0.6375356316566467, "learning_rate": 0.0002840493733958372, "loss": 3.0107, "step": 31644 }, { "epoch": 1.55, "grad_norm": 0.6045966744422913, "learning_rate": 0.00028403399900371146, "loss": 3.0618, "step": 31645 }, { "epoch": 1.55, "grad_norm": 0.5673590302467346, "learning_rate": 0.0002840186246536372, "loss": 2.9589, "step": 31646 }, { "epoch": 1.55, "grad_norm": 0.6037384867668152, "learning_rate": 0.0002840032503456547, "loss": 3.1438, "step": 31647 }, { "epoch": 1.55, "grad_norm": 0.6107524633407593, "learning_rate": 0.0002839878760798046, "loss": 3.062, "step": 31648 }, { "epoch": 1.55, "grad_norm": 0.5911772847175598, "learning_rate": 0.00028397250185612747, "loss": 2.9119, "step": 31649 }, { "epoch": 1.55, "grad_norm": 0.6252596974372864, "learning_rate": 0.0002839571276746635, "loss": 2.9791, "step": 31650 }, { "epoch": 1.55, "grad_norm": 0.5904194116592407, "learning_rate": 0.0002839417535354535, "loss": 2.805, "step": 31651 }, { "epoch": 1.55, "grad_norm": 0.5900318026542664, "learning_rate": 0.0002839263794385378, "loss": 3.1029, "step": 31652 }, { "epoch": 1.55, "grad_norm": 0.6207619309425354, "learning_rate": 0.0002839110053839568, "loss": 2.9533, "step": 31653 }, { "epoch": 1.55, "grad_norm": 0.7129519581794739, "learning_rate": 0.0002838956313717513, "loss": 2.9724, "step": 31654 }, { "epoch": 1.55, "grad_norm": 0.5762568712234497, "learning_rate": 0.0002838802574019615, "loss": 3.0598, "step": 31655 }, { "epoch": 1.55, "grad_norm": 0.6000192761421204, "learning_rate": 0.0002838648834746281, "loss": 3.0808, "step": 31656 }, { "epoch": 1.55, "grad_norm": 0.6309391260147095, "learning_rate": 0.0002838495095897914, "loss": 3.1001, "step": 31657 }, { "epoch": 1.55, "grad_norm": 0.5648094415664673, "learning_rate": 0.0002838341357474919, "loss": 2.8382, "step": 31658 }, { "epoch": 1.55, "grad_norm": 0.601072371006012, "learning_rate": 0.00028381876194777037, "loss": 2.9228, "step": 31659 }, { "epoch": 1.55, "grad_norm": 0.6006704568862915, "learning_rate": 0.000283803388190667, "loss": 3.1873, "step": 31660 }, { "epoch": 1.55, "grad_norm": 0.5812244415283203, "learning_rate": 0.00028378801447622245, "loss": 2.9829, "step": 31661 }, { "epoch": 1.55, "grad_norm": 0.5904305577278137, "learning_rate": 0.00028377264080447703, "loss": 2.9529, "step": 31662 }, { "epoch": 1.55, "grad_norm": 0.5870997905731201, "learning_rate": 0.0002837572671754714, "loss": 2.9428, "step": 31663 }, { "epoch": 1.55, "grad_norm": 0.6143147945404053, "learning_rate": 0.0002837418935892461, "loss": 3.1222, "step": 31664 }, { "epoch": 1.55, "grad_norm": 0.8274781107902527, "learning_rate": 0.0002837265200458414, "loss": 3.1928, "step": 31665 }, { "epoch": 1.55, "grad_norm": 0.5731198787689209, "learning_rate": 0.00028371114654529805, "loss": 3.1566, "step": 31666 }, { "epoch": 1.55, "grad_norm": 0.582653820514679, "learning_rate": 0.0002836957730876563, "loss": 3.0699, "step": 31667 }, { "epoch": 1.55, "grad_norm": 0.6127033829689026, "learning_rate": 0.0002836803996729567, "loss": 3.074, "step": 31668 }, { "epoch": 1.55, "grad_norm": 0.578920304775238, "learning_rate": 0.00028366502630123994, "loss": 3.1217, "step": 31669 }, { "epoch": 1.55, "grad_norm": 0.574611246585846, "learning_rate": 0.0002836496529725462, "loss": 3.0978, "step": 31670 }, { "epoch": 1.55, "grad_norm": 0.6003169417381287, "learning_rate": 0.0002836342796869163, "loss": 2.872, "step": 31671 }, { "epoch": 1.55, "grad_norm": 0.6231915950775146, "learning_rate": 0.0002836189064443904, "loss": 2.9799, "step": 31672 }, { "epoch": 1.55, "grad_norm": 0.6026855111122131, "learning_rate": 0.00028360353324500913, "loss": 2.9663, "step": 31673 }, { "epoch": 1.55, "grad_norm": 0.639599621295929, "learning_rate": 0.0002835881600888132, "loss": 3.0197, "step": 31674 }, { "epoch": 1.55, "grad_norm": 0.6096246242523193, "learning_rate": 0.0002835727869758427, "loss": 3.1729, "step": 31675 }, { "epoch": 1.55, "grad_norm": 0.585129976272583, "learning_rate": 0.00028355741390613846, "loss": 2.8763, "step": 31676 }, { "epoch": 1.55, "grad_norm": 0.5901762843132019, "learning_rate": 0.00028354204087974064, "loss": 3.2103, "step": 31677 }, { "epoch": 1.55, "grad_norm": 0.6148892641067505, "learning_rate": 0.00028352666789669003, "loss": 2.9013, "step": 31678 }, { "epoch": 1.55, "grad_norm": 0.5568849444389343, "learning_rate": 0.0002835112949570271, "loss": 2.9905, "step": 31679 }, { "epoch": 1.55, "grad_norm": 0.5767827033996582, "learning_rate": 0.00028349592206079207, "loss": 3.0622, "step": 31680 }, { "epoch": 1.55, "grad_norm": 0.6218153834342957, "learning_rate": 0.00028348054920802575, "loss": 2.9914, "step": 31681 }, { "epoch": 1.55, "grad_norm": 0.6101388335227966, "learning_rate": 0.0002834651763987684, "loss": 3.0335, "step": 31682 }, { "epoch": 1.55, "grad_norm": 0.5884225368499756, "learning_rate": 0.00028344980363306056, "loss": 2.9399, "step": 31683 }, { "epoch": 1.55, "grad_norm": 0.6422539353370667, "learning_rate": 0.0002834344309109429, "loss": 2.9614, "step": 31684 }, { "epoch": 1.55, "grad_norm": 0.5843330025672913, "learning_rate": 0.0002834190582324557, "loss": 3.2347, "step": 31685 }, { "epoch": 1.55, "grad_norm": 0.5898882746696472, "learning_rate": 0.0002834036855976395, "loss": 3.0284, "step": 31686 }, { "epoch": 1.55, "grad_norm": 0.6142812967300415, "learning_rate": 0.0002833883130065347, "loss": 3.0773, "step": 31687 }, { "epoch": 1.55, "grad_norm": 0.591521143913269, "learning_rate": 0.00028337294045918194, "loss": 3.2177, "step": 31688 }, { "epoch": 1.55, "grad_norm": 0.6057526469230652, "learning_rate": 0.00028335756795562175, "loss": 2.7478, "step": 31689 }, { "epoch": 1.55, "grad_norm": 0.5793746113777161, "learning_rate": 0.0002833421954958944, "loss": 3.3766, "step": 31690 }, { "epoch": 1.55, "grad_norm": 0.6439757943153381, "learning_rate": 0.00028332682308004057, "loss": 3.1779, "step": 31691 }, { "epoch": 1.55, "grad_norm": 0.6085169911384583, "learning_rate": 0.0002833114507081007, "loss": 3.1525, "step": 31692 }, { "epoch": 1.55, "grad_norm": 0.6145389080047607, "learning_rate": 0.0002832960783801151, "loss": 3.0357, "step": 31693 }, { "epoch": 1.55, "grad_norm": 0.5741784572601318, "learning_rate": 0.0002832807060961246, "loss": 3.3173, "step": 31694 }, { "epoch": 1.55, "grad_norm": 0.6480720639228821, "learning_rate": 0.0002832653338561694, "loss": 2.9654, "step": 31695 }, { "epoch": 1.55, "grad_norm": 0.600227415561676, "learning_rate": 0.00028324996166029015, "loss": 2.8275, "step": 31696 }, { "epoch": 1.55, "grad_norm": 0.6081393957138062, "learning_rate": 0.00028323458950852727, "loss": 3.164, "step": 31697 }, { "epoch": 1.55, "grad_norm": 0.5734052658081055, "learning_rate": 0.00028321921740092113, "loss": 3.0086, "step": 31698 }, { "epoch": 1.55, "grad_norm": 0.6059851050376892, "learning_rate": 0.00028320384533751255, "loss": 2.9567, "step": 31699 }, { "epoch": 1.55, "grad_norm": 0.5766007304191589, "learning_rate": 0.0002831884733183417, "loss": 3.1765, "step": 31700 }, { "epoch": 1.55, "grad_norm": 0.5918262600898743, "learning_rate": 0.00028317310134344927, "loss": 3.1488, "step": 31701 }, { "epoch": 1.55, "grad_norm": 0.5856273770332336, "learning_rate": 0.0002831577294128755, "loss": 3.2332, "step": 31702 }, { "epoch": 1.55, "grad_norm": 0.6248118877410889, "learning_rate": 0.0002831423575266611, "loss": 2.9965, "step": 31703 }, { "epoch": 1.55, "grad_norm": 0.6196810603141785, "learning_rate": 0.0002831269856848465, "loss": 3.1695, "step": 31704 }, { "epoch": 1.55, "grad_norm": 0.693333089351654, "learning_rate": 0.0002831116138874721, "loss": 3.2062, "step": 31705 }, { "epoch": 1.55, "grad_norm": 0.6046402454376221, "learning_rate": 0.00028309624213457854, "loss": 3.0393, "step": 31706 }, { "epoch": 1.55, "grad_norm": 0.6249449253082275, "learning_rate": 0.0002830808704262062, "loss": 2.9533, "step": 31707 }, { "epoch": 1.55, "grad_norm": 0.5609068870544434, "learning_rate": 0.0002830654987623957, "loss": 2.9595, "step": 31708 }, { "epoch": 1.55, "grad_norm": 0.6460155844688416, "learning_rate": 0.00028305012714318717, "loss": 2.9995, "step": 31709 }, { "epoch": 1.55, "grad_norm": 0.6091744303703308, "learning_rate": 0.0002830347555686215, "loss": 3.2087, "step": 31710 }, { "epoch": 1.55, "grad_norm": 0.5952534079551697, "learning_rate": 0.00028301938403873904, "loss": 2.9537, "step": 31711 }, { "epoch": 1.55, "grad_norm": 0.5830929279327393, "learning_rate": 0.0002830040125535802, "loss": 2.989, "step": 31712 }, { "epoch": 1.55, "grad_norm": 0.598037838935852, "learning_rate": 0.00028298864111318557, "loss": 3.0506, "step": 31713 }, { "epoch": 1.55, "grad_norm": 0.5895772576332092, "learning_rate": 0.00028297326971759554, "loss": 3.0416, "step": 31714 }, { "epoch": 1.55, "grad_norm": 0.6120174527168274, "learning_rate": 0.00028295789836685055, "loss": 3.1145, "step": 31715 }, { "epoch": 1.55, "grad_norm": 0.5766130089759827, "learning_rate": 0.00028294252706099137, "loss": 3.0686, "step": 31716 }, { "epoch": 1.55, "grad_norm": 0.6399678587913513, "learning_rate": 0.0002829271558000581, "loss": 2.9891, "step": 31717 }, { "epoch": 1.55, "grad_norm": 0.6096529364585876, "learning_rate": 0.0002829117845840916, "loss": 2.9512, "step": 31718 }, { "epoch": 1.55, "grad_norm": 0.632032036781311, "learning_rate": 0.00028289641341313205, "loss": 3.1313, "step": 31719 }, { "epoch": 1.55, "grad_norm": 0.6017841696739197, "learning_rate": 0.00028288104228722, "loss": 3.0703, "step": 31720 }, { "epoch": 1.55, "grad_norm": 0.6441850066184998, "learning_rate": 0.00028286567120639616, "loss": 2.9992, "step": 31721 }, { "epoch": 1.55, "grad_norm": 0.5980612635612488, "learning_rate": 0.00028285030017070076, "loss": 3.1623, "step": 31722 }, { "epoch": 1.55, "grad_norm": 0.5885056257247925, "learning_rate": 0.0002828349291801744, "loss": 2.9903, "step": 31723 }, { "epoch": 1.55, "grad_norm": 0.5749340057373047, "learning_rate": 0.00028281955823485745, "loss": 2.9712, "step": 31724 }, { "epoch": 1.55, "grad_norm": 0.5880852341651917, "learning_rate": 0.00028280418733479053, "loss": 3.0399, "step": 31725 }, { "epoch": 1.55, "grad_norm": 0.6071673631668091, "learning_rate": 0.00028278881648001415, "loss": 2.9693, "step": 31726 }, { "epoch": 1.55, "grad_norm": 0.6033803820610046, "learning_rate": 0.0002827734456705686, "loss": 3.0939, "step": 31727 }, { "epoch": 1.55, "grad_norm": 0.5925212502479553, "learning_rate": 0.00028275807490649456, "loss": 3.0581, "step": 31728 }, { "epoch": 1.55, "grad_norm": 0.6496951580047607, "learning_rate": 0.0002827427041878324, "loss": 2.8309, "step": 31729 }, { "epoch": 1.56, "grad_norm": 0.568051815032959, "learning_rate": 0.00028272733351462253, "loss": 3.1127, "step": 31730 }, { "epoch": 1.56, "grad_norm": 0.5681890249252319, "learning_rate": 0.0002827119628869057, "loss": 2.8544, "step": 31731 }, { "epoch": 1.56, "grad_norm": 0.5864232182502747, "learning_rate": 0.0002826965923047222, "loss": 2.8054, "step": 31732 }, { "epoch": 1.56, "grad_norm": 0.6019498705863953, "learning_rate": 0.0002826812217681125, "loss": 2.9317, "step": 31733 }, { "epoch": 1.56, "grad_norm": 0.637272298336029, "learning_rate": 0.00028266585127711713, "loss": 3.1232, "step": 31734 }, { "epoch": 1.56, "grad_norm": 0.6020846366882324, "learning_rate": 0.0002826504808317765, "loss": 3.0031, "step": 31735 }, { "epoch": 1.56, "grad_norm": 0.6123052835464478, "learning_rate": 0.00028263511043213137, "loss": 3.0354, "step": 31736 }, { "epoch": 1.56, "grad_norm": 0.5840319395065308, "learning_rate": 0.0002826197400782218, "loss": 2.7779, "step": 31737 }, { "epoch": 1.56, "grad_norm": 0.6461364030838013, "learning_rate": 0.0002826043697700887, "loss": 3.3123, "step": 31738 }, { "epoch": 1.56, "grad_norm": 0.6259379386901855, "learning_rate": 0.00028258899950777213, "loss": 3.0375, "step": 31739 }, { "epoch": 1.56, "grad_norm": 0.5921148657798767, "learning_rate": 0.0002825736292913129, "loss": 2.9987, "step": 31740 }, { "epoch": 1.56, "grad_norm": 0.5672594904899597, "learning_rate": 0.00028255825912075136, "loss": 2.9653, "step": 31741 }, { "epoch": 1.56, "grad_norm": 0.5888809561729431, "learning_rate": 0.00028254288899612795, "loss": 3.078, "step": 31742 }, { "epoch": 1.56, "grad_norm": 0.5958986282348633, "learning_rate": 0.00028252751891748334, "loss": 3.1048, "step": 31743 }, { "epoch": 1.56, "grad_norm": 0.6289757490158081, "learning_rate": 0.0002825121488848578, "loss": 3.0964, "step": 31744 }, { "epoch": 1.56, "grad_norm": 0.5901963710784912, "learning_rate": 0.00028249677889829187, "loss": 2.9512, "step": 31745 }, { "epoch": 1.56, "grad_norm": 0.6648480296134949, "learning_rate": 0.0002824814089578261, "loss": 3.0944, "step": 31746 }, { "epoch": 1.56, "grad_norm": 0.6112911105155945, "learning_rate": 0.0002824660390635009, "loss": 2.8058, "step": 31747 }, { "epoch": 1.56, "grad_norm": 0.6173609495162964, "learning_rate": 0.00028245066921535686, "loss": 3.1509, "step": 31748 }, { "epoch": 1.56, "grad_norm": 0.6812626123428345, "learning_rate": 0.00028243529941343425, "loss": 2.8451, "step": 31749 }, { "epoch": 1.56, "grad_norm": 0.5810101628303528, "learning_rate": 0.0002824199296577738, "loss": 2.7511, "step": 31750 }, { "epoch": 1.56, "grad_norm": 0.5572994947433472, "learning_rate": 0.00028240455994841586, "loss": 3.047, "step": 31751 }, { "epoch": 1.56, "grad_norm": 0.6092203259468079, "learning_rate": 0.0002823891902854008, "loss": 3.0467, "step": 31752 }, { "epoch": 1.56, "grad_norm": 0.6097322106361389, "learning_rate": 0.00028237382066876937, "loss": 3.1722, "step": 31753 }, { "epoch": 1.56, "grad_norm": 0.5815500020980835, "learning_rate": 0.0002823584510985618, "loss": 3.1401, "step": 31754 }, { "epoch": 1.56, "grad_norm": 0.6111709475517273, "learning_rate": 0.0002823430815748187, "loss": 3.1151, "step": 31755 }, { "epoch": 1.56, "grad_norm": 0.6202290058135986, "learning_rate": 0.0002823277120975806, "loss": 2.981, "step": 31756 }, { "epoch": 1.56, "grad_norm": 0.6156234741210938, "learning_rate": 0.00028231234266688776, "loss": 3.0407, "step": 31757 }, { "epoch": 1.56, "grad_norm": 0.5927169322967529, "learning_rate": 0.00028229697328278094, "loss": 2.9775, "step": 31758 }, { "epoch": 1.56, "grad_norm": 0.5997295379638672, "learning_rate": 0.0002822816039453004, "loss": 3.031, "step": 31759 }, { "epoch": 1.56, "grad_norm": 0.5959268808364868, "learning_rate": 0.0002822662346544867, "loss": 3.1206, "step": 31760 }, { "epoch": 1.56, "grad_norm": 0.6445311307907104, "learning_rate": 0.0002822508654103804, "loss": 2.9471, "step": 31761 }, { "epoch": 1.56, "grad_norm": 0.6009385585784912, "learning_rate": 0.0002822354962130219, "loss": 2.9307, "step": 31762 }, { "epoch": 1.56, "grad_norm": 0.5991648435592651, "learning_rate": 0.00028222012706245167, "loss": 3.112, "step": 31763 }, { "epoch": 1.56, "grad_norm": 0.5902231335639954, "learning_rate": 0.0002822047579587101, "loss": 2.8237, "step": 31764 }, { "epoch": 1.56, "grad_norm": 0.8094449639320374, "learning_rate": 0.0002821893889018379, "loss": 3.1565, "step": 31765 }, { "epoch": 1.56, "grad_norm": 0.6150186657905579, "learning_rate": 0.00028217401989187547, "loss": 2.9342, "step": 31766 }, { "epoch": 1.56, "grad_norm": 0.5921657681465149, "learning_rate": 0.000282158650928863, "loss": 2.9887, "step": 31767 }, { "epoch": 1.56, "grad_norm": 0.5835986137390137, "learning_rate": 0.00028214328201284146, "loss": 3.1866, "step": 31768 }, { "epoch": 1.56, "grad_norm": 0.5715051889419556, "learning_rate": 0.00028212791314385097, "loss": 2.8587, "step": 31769 }, { "epoch": 1.56, "grad_norm": 0.6232491135597229, "learning_rate": 0.0002821125443219321, "loss": 3.0507, "step": 31770 }, { "epoch": 1.56, "grad_norm": 0.5849910974502563, "learning_rate": 0.0002820971755471254, "loss": 2.9724, "step": 31771 }, { "epoch": 1.56, "grad_norm": 0.6116089820861816, "learning_rate": 0.0002820818068194713, "loss": 3.2148, "step": 31772 }, { "epoch": 1.56, "grad_norm": 0.6128947138786316, "learning_rate": 0.00028206643813901024, "loss": 3.0349, "step": 31773 }, { "epoch": 1.56, "grad_norm": 0.591354250907898, "learning_rate": 0.0002820510695057827, "loss": 3.119, "step": 31774 }, { "epoch": 1.56, "grad_norm": 0.6188297867774963, "learning_rate": 0.0002820357009198292, "loss": 3.2393, "step": 31775 }, { "epoch": 1.56, "grad_norm": 0.6261090636253357, "learning_rate": 0.0002820203323811903, "loss": 2.9624, "step": 31776 }, { "epoch": 1.56, "grad_norm": 0.56405109167099, "learning_rate": 0.00028200496388990624, "loss": 3.1006, "step": 31777 }, { "epoch": 1.56, "grad_norm": 0.6306495666503906, "learning_rate": 0.00028198959544601776, "loss": 3.1529, "step": 31778 }, { "epoch": 1.56, "grad_norm": 0.6113765239715576, "learning_rate": 0.0002819742270495651, "loss": 2.888, "step": 31779 }, { "epoch": 1.56, "grad_norm": 0.5723241567611694, "learning_rate": 0.000281958858700589, "loss": 2.8904, "step": 31780 }, { "epoch": 1.56, "grad_norm": 0.5704621076583862, "learning_rate": 0.0002819434903991297, "loss": 2.879, "step": 31781 }, { "epoch": 1.56, "grad_norm": 0.5865147113800049, "learning_rate": 0.00028192812214522777, "loss": 3.0125, "step": 31782 }, { "epoch": 1.56, "grad_norm": 0.6170030832290649, "learning_rate": 0.00028191275393892376, "loss": 2.9818, "step": 31783 }, { "epoch": 1.56, "grad_norm": 0.6150692701339722, "learning_rate": 0.00028189738578025803, "loss": 3.019, "step": 31784 }, { "epoch": 1.56, "grad_norm": 0.6316766142845154, "learning_rate": 0.0002818820176692711, "loss": 2.8793, "step": 31785 }, { "epoch": 1.56, "grad_norm": 0.5430260300636292, "learning_rate": 0.00028186664960600346, "loss": 2.9567, "step": 31786 }, { "epoch": 1.56, "grad_norm": 0.5936475992202759, "learning_rate": 0.0002818512815904955, "loss": 3.1525, "step": 31787 }, { "epoch": 1.56, "grad_norm": 0.6307163834571838, "learning_rate": 0.0002818359136227879, "loss": 3.0512, "step": 31788 }, { "epoch": 1.56, "grad_norm": 0.6203303933143616, "learning_rate": 0.0002818205457029209, "loss": 3.0299, "step": 31789 }, { "epoch": 1.56, "grad_norm": 0.5848477482795715, "learning_rate": 0.0002818051778309352, "loss": 3.0923, "step": 31790 }, { "epoch": 1.56, "grad_norm": 0.611884355545044, "learning_rate": 0.00028178981000687105, "loss": 3.0312, "step": 31791 }, { "epoch": 1.56, "grad_norm": 0.585080623626709, "learning_rate": 0.00028177444223076903, "loss": 3.14, "step": 31792 }, { "epoch": 1.56, "grad_norm": 0.5883607268333435, "learning_rate": 0.00028175907450266974, "loss": 3.2011, "step": 31793 }, { "epoch": 1.56, "grad_norm": 0.6267507672309875, "learning_rate": 0.0002817437068226134, "loss": 3.17, "step": 31794 }, { "epoch": 1.56, "grad_norm": 0.6080348491668701, "learning_rate": 0.00028172833919064077, "loss": 2.996, "step": 31795 }, { "epoch": 1.56, "grad_norm": 0.5978765487670898, "learning_rate": 0.000281712971606792, "loss": 2.9237, "step": 31796 }, { "epoch": 1.56, "grad_norm": 0.6047960519790649, "learning_rate": 0.0002816976040711078, "loss": 2.8924, "step": 31797 }, { "epoch": 1.56, "grad_norm": 0.6580313444137573, "learning_rate": 0.0002816822365836287, "loss": 2.9914, "step": 31798 }, { "epoch": 1.56, "grad_norm": 0.620141863822937, "learning_rate": 0.00028166686914439493, "loss": 3.1461, "step": 31799 }, { "epoch": 1.56, "grad_norm": 0.604900062084198, "learning_rate": 0.0002816515017534472, "loss": 3.0193, "step": 31800 }, { "epoch": 1.56, "grad_norm": 0.6572511196136475, "learning_rate": 0.0002816361344108258, "loss": 2.9982, "step": 31801 }, { "epoch": 1.56, "grad_norm": 0.6069661974906921, "learning_rate": 0.00028162076711657133, "loss": 3.0559, "step": 31802 }, { "epoch": 1.56, "grad_norm": 0.6206227540969849, "learning_rate": 0.00028160539987072425, "loss": 3.0348, "step": 31803 }, { "epoch": 1.56, "grad_norm": 0.5691961050033569, "learning_rate": 0.0002815900326733249, "loss": 2.9718, "step": 31804 }, { "epoch": 1.56, "grad_norm": 0.5769830942153931, "learning_rate": 0.000281574665524414, "loss": 2.8864, "step": 31805 }, { "epoch": 1.56, "grad_norm": 0.6015021800994873, "learning_rate": 0.0002815592984240318, "loss": 3.0994, "step": 31806 }, { "epoch": 1.56, "grad_norm": 0.6178338527679443, "learning_rate": 0.00028154393137221886, "loss": 3.1458, "step": 31807 }, { "epoch": 1.56, "grad_norm": 0.5842515826225281, "learning_rate": 0.00028152856436901573, "loss": 3.1004, "step": 31808 }, { "epoch": 1.56, "grad_norm": 0.5896745920181274, "learning_rate": 0.00028151319741446276, "loss": 3.0969, "step": 31809 }, { "epoch": 1.56, "grad_norm": 0.6141567230224609, "learning_rate": 0.0002814978305086005, "loss": 2.9886, "step": 31810 }, { "epoch": 1.56, "grad_norm": 0.6013665795326233, "learning_rate": 0.00028148246365146927, "loss": 3.0342, "step": 31811 }, { "epoch": 1.56, "grad_norm": 0.6272392868995667, "learning_rate": 0.00028146709684310974, "loss": 3.2857, "step": 31812 }, { "epoch": 1.56, "grad_norm": 0.5829760432243347, "learning_rate": 0.0002814517300835624, "loss": 3.1099, "step": 31813 }, { "epoch": 1.56, "grad_norm": 0.629869282245636, "learning_rate": 0.0002814363633728675, "loss": 2.8432, "step": 31814 }, { "epoch": 1.56, "grad_norm": 0.6304134726524353, "learning_rate": 0.00028142099671106573, "loss": 3.305, "step": 31815 }, { "epoch": 1.56, "grad_norm": 0.5929440855979919, "learning_rate": 0.00028140563009819745, "loss": 3.1774, "step": 31816 }, { "epoch": 1.56, "grad_norm": 0.6079155206680298, "learning_rate": 0.0002813902635343031, "loss": 3.167, "step": 31817 }, { "epoch": 1.56, "grad_norm": 0.6250888705253601, "learning_rate": 0.00028137489701942335, "loss": 2.9714, "step": 31818 }, { "epoch": 1.56, "grad_norm": 0.6257724165916443, "learning_rate": 0.00028135953055359836, "loss": 2.7605, "step": 31819 }, { "epoch": 1.56, "grad_norm": 0.6094701290130615, "learning_rate": 0.00028134416413686897, "loss": 3.1054, "step": 31820 }, { "epoch": 1.56, "grad_norm": 0.6528363823890686, "learning_rate": 0.00028132879776927535, "loss": 2.792, "step": 31821 }, { "epoch": 1.56, "grad_norm": 0.647331953048706, "learning_rate": 0.00028131343145085805, "loss": 2.9551, "step": 31822 }, { "epoch": 1.56, "grad_norm": 0.5757825374603271, "learning_rate": 0.0002812980651816577, "loss": 3.011, "step": 31823 }, { "epoch": 1.56, "grad_norm": 0.5802479982376099, "learning_rate": 0.00028128269896171457, "loss": 2.9968, "step": 31824 }, { "epoch": 1.56, "grad_norm": 0.583878755569458, "learning_rate": 0.00028126733279106925, "loss": 3.0743, "step": 31825 }, { "epoch": 1.56, "grad_norm": 0.5796141624450684, "learning_rate": 0.0002812519666697621, "loss": 3.1808, "step": 31826 }, { "epoch": 1.56, "grad_norm": 0.6166408658027649, "learning_rate": 0.0002812366005978337, "loss": 3.1588, "step": 31827 }, { "epoch": 1.56, "grad_norm": 0.5788041353225708, "learning_rate": 0.0002812212345753245, "loss": 2.872, "step": 31828 }, { "epoch": 1.56, "grad_norm": 0.5892295837402344, "learning_rate": 0.0002812058686022749, "loss": 3.0678, "step": 31829 }, { "epoch": 1.56, "grad_norm": 0.6605625748634338, "learning_rate": 0.0002811905026787255, "loss": 2.7785, "step": 31830 }, { "epoch": 1.56, "grad_norm": 0.6186345219612122, "learning_rate": 0.00028117513680471663, "loss": 3.0413, "step": 31831 }, { "epoch": 1.56, "grad_norm": 0.561852216720581, "learning_rate": 0.0002811597709802888, "loss": 2.7898, "step": 31832 }, { "epoch": 1.56, "grad_norm": 0.5842719674110413, "learning_rate": 0.0002811444052054826, "loss": 3.0802, "step": 31833 }, { "epoch": 1.56, "grad_norm": 0.6352782249450684, "learning_rate": 0.0002811290394803384, "loss": 2.9058, "step": 31834 }, { "epoch": 1.56, "grad_norm": 0.6168009638786316, "learning_rate": 0.0002811136738048967, "loss": 2.9086, "step": 31835 }, { "epoch": 1.56, "grad_norm": 0.6582854986190796, "learning_rate": 0.0002810983081791978, "loss": 3.0012, "step": 31836 }, { "epoch": 1.56, "grad_norm": 0.5972253084182739, "learning_rate": 0.0002810829426032824, "loss": 3.2943, "step": 31837 }, { "epoch": 1.56, "grad_norm": 0.6005479693412781, "learning_rate": 0.000281067577077191, "loss": 2.9417, "step": 31838 }, { "epoch": 1.56, "grad_norm": 0.5901066660881042, "learning_rate": 0.0002810522116009638, "loss": 2.9913, "step": 31839 }, { "epoch": 1.56, "grad_norm": 0.6512740850448608, "learning_rate": 0.0002810368461746415, "loss": 3.1729, "step": 31840 }, { "epoch": 1.56, "grad_norm": 0.6124058961868286, "learning_rate": 0.00028102148079826443, "loss": 2.9708, "step": 31841 }, { "epoch": 1.56, "grad_norm": 0.5982481241226196, "learning_rate": 0.0002810061154718732, "loss": 3.1866, "step": 31842 }, { "epoch": 1.56, "grad_norm": 0.5993276238441467, "learning_rate": 0.00028099075019550826, "loss": 2.8918, "step": 31843 }, { "epoch": 1.56, "grad_norm": 0.5819422602653503, "learning_rate": 0.00028097538496920987, "loss": 3.1729, "step": 31844 }, { "epoch": 1.56, "grad_norm": 0.6041077375411987, "learning_rate": 0.0002809600197930188, "loss": 2.9457, "step": 31845 }, { "epoch": 1.56, "grad_norm": 0.6543545126914978, "learning_rate": 0.0002809446546669753, "loss": 3.07, "step": 31846 }, { "epoch": 1.56, "grad_norm": 0.5843205451965332, "learning_rate": 0.0002809292895911199, "loss": 3.1067, "step": 31847 }, { "epoch": 1.56, "grad_norm": 0.5955379605293274, "learning_rate": 0.0002809139245654931, "loss": 3.0711, "step": 31848 }, { "epoch": 1.56, "grad_norm": 0.6512821912765503, "learning_rate": 0.00028089855959013537, "loss": 2.8107, "step": 31849 }, { "epoch": 1.56, "grad_norm": 0.6019611954689026, "learning_rate": 0.0002808831946650872, "loss": 3.0046, "step": 31850 }, { "epoch": 1.56, "grad_norm": 0.5904426574707031, "learning_rate": 0.0002808678297903889, "loss": 2.968, "step": 31851 }, { "epoch": 1.56, "grad_norm": 0.6119365096092224, "learning_rate": 0.00028085246496608115, "loss": 3.2099, "step": 31852 }, { "epoch": 1.56, "grad_norm": 0.6144260168075562, "learning_rate": 0.00028083710019220434, "loss": 3.1469, "step": 31853 }, { "epoch": 1.56, "grad_norm": 0.5753419399261475, "learning_rate": 0.00028082173546879876, "loss": 3.1069, "step": 31854 }, { "epoch": 1.56, "grad_norm": 0.5890007019042969, "learning_rate": 0.0002808063707959052, "loss": 2.9953, "step": 31855 }, { "epoch": 1.56, "grad_norm": 0.6050313711166382, "learning_rate": 0.0002807910061735639, "loss": 3.1714, "step": 31856 }, { "epoch": 1.56, "grad_norm": 0.6816890835762024, "learning_rate": 0.0002807756416018155, "loss": 3.093, "step": 31857 }, { "epoch": 1.56, "grad_norm": 0.6396083831787109, "learning_rate": 0.0002807602770807002, "loss": 3.1671, "step": 31858 }, { "epoch": 1.56, "grad_norm": 0.5927896499633789, "learning_rate": 0.0002807449126102586, "loss": 2.9865, "step": 31859 }, { "epoch": 1.56, "grad_norm": 0.6155912280082703, "learning_rate": 0.00028072954819053134, "loss": 2.914, "step": 31860 }, { "epoch": 1.56, "grad_norm": 0.5896044373512268, "learning_rate": 0.0002807141838215587, "loss": 3.0633, "step": 31861 }, { "epoch": 1.56, "grad_norm": 0.6200414896011353, "learning_rate": 0.00028069881950338124, "loss": 2.9665, "step": 31862 }, { "epoch": 1.56, "grad_norm": 0.5940514206886292, "learning_rate": 0.00028068345523603924, "loss": 3.2252, "step": 31863 }, { "epoch": 1.56, "grad_norm": 0.6118853092193604, "learning_rate": 0.00028066809101957337, "loss": 2.9935, "step": 31864 }, { "epoch": 1.56, "grad_norm": 0.628420889377594, "learning_rate": 0.00028065272685402407, "loss": 2.6308, "step": 31865 }, { "epoch": 1.56, "grad_norm": 0.6051673293113708, "learning_rate": 0.0002806373627394316, "loss": 3.186, "step": 31866 }, { "epoch": 1.56, "grad_norm": 0.6414685845375061, "learning_rate": 0.0002806219986758368, "loss": 2.9328, "step": 31867 }, { "epoch": 1.56, "grad_norm": 0.6532812118530273, "learning_rate": 0.00028060663466327986, "loss": 3.215, "step": 31868 }, { "epoch": 1.56, "grad_norm": 0.6058521270751953, "learning_rate": 0.0002805912707018012, "loss": 3.1998, "step": 31869 }, { "epoch": 1.56, "grad_norm": 0.5776923298835754, "learning_rate": 0.0002805759067914416, "loss": 2.9845, "step": 31870 }, { "epoch": 1.56, "grad_norm": 0.6667451858520508, "learning_rate": 0.00028056054293224117, "loss": 3.1901, "step": 31871 }, { "epoch": 1.56, "grad_norm": 0.6361920833587646, "learning_rate": 0.00028054517912424066, "loss": 3.0641, "step": 31872 }, { "epoch": 1.56, "grad_norm": 0.587245762348175, "learning_rate": 0.0002805298153674802, "loss": 3.1569, "step": 31873 }, { "epoch": 1.56, "grad_norm": 0.6225107908248901, "learning_rate": 0.0002805144516620006, "loss": 3.0355, "step": 31874 }, { "epoch": 1.56, "grad_norm": 0.6272902488708496, "learning_rate": 0.0002804990880078422, "loss": 3.0836, "step": 31875 }, { "epoch": 1.56, "grad_norm": 0.6180784106254578, "learning_rate": 0.0002804837244050454, "loss": 3.0903, "step": 31876 }, { "epoch": 1.56, "grad_norm": 0.6579588651657104, "learning_rate": 0.00028046836085365075, "loss": 3.1705, "step": 31877 }, { "epoch": 1.56, "grad_norm": 0.6163791418075562, "learning_rate": 0.00028045299735369864, "loss": 3.1334, "step": 31878 }, { "epoch": 1.56, "grad_norm": 0.5973280668258667, "learning_rate": 0.0002804376339052295, "loss": 2.9392, "step": 31879 }, { "epoch": 1.56, "grad_norm": 0.5973444581031799, "learning_rate": 0.000280422270508284, "loss": 3.068, "step": 31880 }, { "epoch": 1.56, "grad_norm": 0.5980919599533081, "learning_rate": 0.0002804069071629024, "loss": 3.0488, "step": 31881 }, { "epoch": 1.56, "grad_norm": 0.5866699814796448, "learning_rate": 0.0002803915438691253, "loss": 2.9993, "step": 31882 }, { "epoch": 1.56, "grad_norm": 0.6229212880134583, "learning_rate": 0.00028037618062699306, "loss": 3.168, "step": 31883 }, { "epoch": 1.56, "grad_norm": 0.629422664642334, "learning_rate": 0.0002803608174365461, "loss": 2.9593, "step": 31884 }, { "epoch": 1.56, "grad_norm": 0.5796070694923401, "learning_rate": 0.0002803454542978251, "loss": 3.0037, "step": 31885 }, { "epoch": 1.56, "grad_norm": 0.6071597933769226, "learning_rate": 0.0002803300912108703, "loss": 2.9927, "step": 31886 }, { "epoch": 1.56, "grad_norm": 0.6303873658180237, "learning_rate": 0.0002803147281757224, "loss": 3.144, "step": 31887 }, { "epoch": 1.56, "grad_norm": 0.5881187915802002, "learning_rate": 0.0002802993651924215, "loss": 3.1171, "step": 31888 }, { "epoch": 1.56, "grad_norm": 0.5947704315185547, "learning_rate": 0.00028028400226100844, "loss": 3.233, "step": 31889 }, { "epoch": 1.56, "grad_norm": 0.6180113554000854, "learning_rate": 0.00028026863938152355, "loss": 3.0989, "step": 31890 }, { "epoch": 1.56, "grad_norm": 0.6293803453445435, "learning_rate": 0.00028025327655400707, "loss": 3.1379, "step": 31891 }, { "epoch": 1.56, "grad_norm": 0.6309683322906494, "learning_rate": 0.0002802379137784999, "loss": 3.0034, "step": 31892 }, { "epoch": 1.56, "grad_norm": 0.5890076160430908, "learning_rate": 0.0002802225510550421, "loss": 3.1913, "step": 31893 }, { "epoch": 1.56, "grad_norm": 0.5986996293067932, "learning_rate": 0.00028020718838367426, "loss": 3.1027, "step": 31894 }, { "epoch": 1.56, "grad_norm": 0.6149293184280396, "learning_rate": 0.000280191825764437, "loss": 2.8922, "step": 31895 }, { "epoch": 1.56, "grad_norm": 0.5837023258209229, "learning_rate": 0.00028017646319737063, "loss": 2.8853, "step": 31896 }, { "epoch": 1.56, "grad_norm": 0.5797613859176636, "learning_rate": 0.00028016110068251567, "loss": 3.1853, "step": 31897 }, { "epoch": 1.56, "grad_norm": 0.6410925984382629, "learning_rate": 0.00028014573821991243, "loss": 3.1645, "step": 31898 }, { "epoch": 1.56, "grad_norm": 0.5977853536605835, "learning_rate": 0.0002801303758096015, "loss": 3.1972, "step": 31899 }, { "epoch": 1.56, "grad_norm": 0.6081350445747375, "learning_rate": 0.00028011501345162355, "loss": 3.0046, "step": 31900 }, { "epoch": 1.56, "grad_norm": 0.598984956741333, "learning_rate": 0.0002800996511460186, "loss": 2.9773, "step": 31901 }, { "epoch": 1.56, "grad_norm": 0.5773735046386719, "learning_rate": 0.0002800842888928275, "loss": 2.9064, "step": 31902 }, { "epoch": 1.56, "grad_norm": 0.5690639615058899, "learning_rate": 0.0002800689266920904, "loss": 2.9076, "step": 31903 }, { "epoch": 1.56, "grad_norm": 0.6134076118469238, "learning_rate": 0.00028005356454384797, "loss": 2.9833, "step": 31904 }, { "epoch": 1.56, "grad_norm": 0.5963901281356812, "learning_rate": 0.0002800382024481407, "loss": 2.8316, "step": 31905 }, { "epoch": 1.56, "grad_norm": 0.5689248442649841, "learning_rate": 0.0002800228404050088, "loss": 3.0716, "step": 31906 }, { "epoch": 1.56, "grad_norm": 0.6227253079414368, "learning_rate": 0.00028000747841449306, "loss": 2.9693, "step": 31907 }, { "epoch": 1.56, "grad_norm": 0.6223208904266357, "learning_rate": 0.0002799921164766337, "loss": 3.0232, "step": 31908 }, { "epoch": 1.56, "grad_norm": 0.5878385305404663, "learning_rate": 0.00027997675459147115, "loss": 2.9983, "step": 31909 }, { "epoch": 1.56, "grad_norm": 0.6404999494552612, "learning_rate": 0.00027996139275904616, "loss": 3.1131, "step": 31910 }, { "epoch": 1.56, "grad_norm": 0.6232870817184448, "learning_rate": 0.0002799460309793989, "loss": 3.0633, "step": 31911 }, { "epoch": 1.56, "grad_norm": 0.6137735843658447, "learning_rate": 0.00027993066925257004, "loss": 2.8579, "step": 31912 }, { "epoch": 1.56, "grad_norm": 0.5834466218948364, "learning_rate": 0.0002799153075785998, "loss": 3.0556, "step": 31913 }, { "epoch": 1.56, "grad_norm": 0.6220276951789856, "learning_rate": 0.0002798999459575288, "loss": 3.0881, "step": 31914 }, { "epoch": 1.56, "grad_norm": 0.5749549269676208, "learning_rate": 0.00027988458438939754, "loss": 3.0783, "step": 31915 }, { "epoch": 1.56, "grad_norm": 0.5834282636642456, "learning_rate": 0.0002798692228742463, "loss": 3.078, "step": 31916 }, { "epoch": 1.56, "grad_norm": 0.59247225522995, "learning_rate": 0.00027985386141211584, "loss": 2.9249, "step": 31917 }, { "epoch": 1.56, "grad_norm": 0.5814173221588135, "learning_rate": 0.00027983850000304626, "loss": 2.897, "step": 31918 }, { "epoch": 1.56, "grad_norm": 0.6004323959350586, "learning_rate": 0.0002798231386470782, "loss": 3.0616, "step": 31919 }, { "epoch": 1.56, "grad_norm": 0.5897133350372314, "learning_rate": 0.0002798077773442522, "loss": 3.1485, "step": 31920 }, { "epoch": 1.56, "grad_norm": 0.6207877397537231, "learning_rate": 0.00027979241609460854, "loss": 2.8907, "step": 31921 }, { "epoch": 1.56, "grad_norm": 0.5839530229568481, "learning_rate": 0.00027977705489818787, "loss": 3.0955, "step": 31922 }, { "epoch": 1.56, "grad_norm": 0.6286572813987732, "learning_rate": 0.0002797616937550305, "loss": 2.9629, "step": 31923 }, { "epoch": 1.56, "grad_norm": 0.5850244760513306, "learning_rate": 0.00027974633266517686, "loss": 3.3621, "step": 31924 }, { "epoch": 1.56, "grad_norm": 0.5959276556968689, "learning_rate": 0.0002797309716286676, "loss": 3.0628, "step": 31925 }, { "epoch": 1.56, "grad_norm": 0.6070095896720886, "learning_rate": 0.00027971561064554296, "loss": 2.8452, "step": 31926 }, { "epoch": 1.56, "grad_norm": 0.6149983406066895, "learning_rate": 0.0002797002497158436, "loss": 2.8937, "step": 31927 }, { "epoch": 1.56, "grad_norm": 0.594487726688385, "learning_rate": 0.0002796848888396098, "loss": 2.9891, "step": 31928 }, { "epoch": 1.56, "grad_norm": 0.623718798160553, "learning_rate": 0.0002796695280168821, "loss": 3.0839, "step": 31929 }, { "epoch": 1.56, "grad_norm": 0.6588249206542969, "learning_rate": 0.000279654167247701, "loss": 2.8436, "step": 31930 }, { "epoch": 1.56, "grad_norm": 0.6797537803649902, "learning_rate": 0.0002796388065321068, "loss": 3.0175, "step": 31931 }, { "epoch": 1.56, "grad_norm": 0.6062451601028442, "learning_rate": 0.00027962344587014024, "loss": 3.0375, "step": 31932 }, { "epoch": 1.56, "grad_norm": 0.5665577054023743, "learning_rate": 0.00027960808526184146, "loss": 3.332, "step": 31933 }, { "epoch": 1.57, "grad_norm": 0.5775216817855835, "learning_rate": 0.00027959272470725113, "loss": 3.1504, "step": 31934 }, { "epoch": 1.57, "grad_norm": 0.6084179282188416, "learning_rate": 0.0002795773642064096, "loss": 3.3357, "step": 31935 }, { "epoch": 1.57, "grad_norm": 0.6588486433029175, "learning_rate": 0.0002795620037593573, "loss": 2.8731, "step": 31936 }, { "epoch": 1.57, "grad_norm": 0.6479736566543579, "learning_rate": 0.0002795466433661349, "loss": 3.0273, "step": 31937 }, { "epoch": 1.57, "grad_norm": 0.5970213413238525, "learning_rate": 0.00027953128302678253, "loss": 2.9507, "step": 31938 }, { "epoch": 1.57, "grad_norm": 0.5876474380493164, "learning_rate": 0.00027951592274134095, "loss": 2.9572, "step": 31939 }, { "epoch": 1.57, "grad_norm": 0.5981870293617249, "learning_rate": 0.00027950056250985046, "loss": 3.128, "step": 31940 }, { "epoch": 1.57, "grad_norm": 0.6277065277099609, "learning_rate": 0.0002794852023323515, "loss": 2.9634, "step": 31941 }, { "epoch": 1.57, "grad_norm": 0.6169352531433105, "learning_rate": 0.0002794698422088846, "loss": 2.9605, "step": 31942 }, { "epoch": 1.57, "grad_norm": 0.6255897283554077, "learning_rate": 0.0002794544821394901, "loss": 3.017, "step": 31943 }, { "epoch": 1.57, "grad_norm": 0.584186851978302, "learning_rate": 0.00027943912212420873, "loss": 3.0064, "step": 31944 }, { "epoch": 1.57, "grad_norm": 0.5788871049880981, "learning_rate": 0.00027942376216308064, "loss": 2.9854, "step": 31945 }, { "epoch": 1.57, "grad_norm": 0.601641833782196, "learning_rate": 0.00027940840225614634, "loss": 2.8917, "step": 31946 }, { "epoch": 1.57, "grad_norm": 0.5883685946464539, "learning_rate": 0.00027939304240344644, "loss": 3.2907, "step": 31947 }, { "epoch": 1.57, "grad_norm": 0.5894083380699158, "learning_rate": 0.0002793776826050213, "loss": 3.0337, "step": 31948 }, { "epoch": 1.57, "grad_norm": 0.5871928930282593, "learning_rate": 0.0002793623228609114, "loss": 3.1671, "step": 31949 }, { "epoch": 1.57, "grad_norm": 0.599830150604248, "learning_rate": 0.00027934696317115707, "loss": 3.0923, "step": 31950 }, { "epoch": 1.57, "grad_norm": 0.5850623846054077, "learning_rate": 0.0002793316035357989, "loss": 2.9835, "step": 31951 }, { "epoch": 1.57, "grad_norm": 0.6004115343093872, "learning_rate": 0.00027931624395487734, "loss": 3.0782, "step": 31952 }, { "epoch": 1.57, "grad_norm": 0.6150354743003845, "learning_rate": 0.00027930088442843274, "loss": 3.1377, "step": 31953 }, { "epoch": 1.57, "grad_norm": 0.5694727897644043, "learning_rate": 0.0002792855249565058, "loss": 2.9826, "step": 31954 }, { "epoch": 1.57, "grad_norm": 0.6149030327796936, "learning_rate": 0.0002792701655391366, "loss": 3.1122, "step": 31955 }, { "epoch": 1.57, "grad_norm": 0.5870358347892761, "learning_rate": 0.00027925480617636584, "loss": 2.9392, "step": 31956 }, { "epoch": 1.57, "grad_norm": 0.576116681098938, "learning_rate": 0.000279239446868234, "loss": 2.9264, "step": 31957 }, { "epoch": 1.57, "grad_norm": 0.586956262588501, "learning_rate": 0.00027922408761478143, "loss": 2.9759, "step": 31958 }, { "epoch": 1.57, "grad_norm": 0.5969283580780029, "learning_rate": 0.0002792087284160487, "loss": 3.1103, "step": 31959 }, { "epoch": 1.57, "grad_norm": 0.5975672602653503, "learning_rate": 0.000279193369272076, "loss": 2.7399, "step": 31960 }, { "epoch": 1.57, "grad_norm": 0.5579419732093811, "learning_rate": 0.000279178010182904, "loss": 3.1493, "step": 31961 }, { "epoch": 1.57, "grad_norm": 0.5934404134750366, "learning_rate": 0.0002791626511485733, "loss": 3.0314, "step": 31962 }, { "epoch": 1.57, "grad_norm": 0.6547814607620239, "learning_rate": 0.00027914729216912395, "loss": 3.1367, "step": 31963 }, { "epoch": 1.57, "grad_norm": 0.6256775856018066, "learning_rate": 0.00027913193324459674, "loss": 3.0097, "step": 31964 }, { "epoch": 1.57, "grad_norm": 0.6281748414039612, "learning_rate": 0.0002791165743750319, "loss": 3.119, "step": 31965 }, { "epoch": 1.57, "grad_norm": 0.584899365901947, "learning_rate": 0.00027910121556047005, "loss": 3.0945, "step": 31966 }, { "epoch": 1.57, "grad_norm": 0.5488184690475464, "learning_rate": 0.00027908585680095163, "loss": 2.9363, "step": 31967 }, { "epoch": 1.57, "grad_norm": 0.6100363731384277, "learning_rate": 0.0002790704980965169, "loss": 3.102, "step": 31968 }, { "epoch": 1.57, "grad_norm": 0.5827188491821289, "learning_rate": 0.0002790551394472066, "loss": 2.9794, "step": 31969 }, { "epoch": 1.57, "grad_norm": 0.592393696308136, "learning_rate": 0.00027903978085306095, "loss": 3.0996, "step": 31970 }, { "epoch": 1.57, "grad_norm": 0.5915204882621765, "learning_rate": 0.0002790244223141204, "loss": 3.125, "step": 31971 }, { "epoch": 1.57, "grad_norm": 0.6166325211524963, "learning_rate": 0.0002790090638304257, "loss": 3.1128, "step": 31972 }, { "epoch": 1.57, "grad_norm": 0.6143838167190552, "learning_rate": 0.00027899370540201697, "loss": 2.8731, "step": 31973 }, { "epoch": 1.57, "grad_norm": 0.5882230997085571, "learning_rate": 0.0002789783470289348, "loss": 3.0723, "step": 31974 }, { "epoch": 1.57, "grad_norm": 0.5999899506568909, "learning_rate": 0.0002789629887112195, "loss": 3.0853, "step": 31975 }, { "epoch": 1.57, "grad_norm": 0.6208217740058899, "learning_rate": 0.00027894763044891176, "loss": 2.8768, "step": 31976 }, { "epoch": 1.57, "grad_norm": 0.6412845253944397, "learning_rate": 0.0002789322722420519, "loss": 3.1244, "step": 31977 }, { "epoch": 1.57, "grad_norm": 0.5920975208282471, "learning_rate": 0.0002789169140906803, "loss": 3.2523, "step": 31978 }, { "epoch": 1.57, "grad_norm": 0.576759934425354, "learning_rate": 0.0002789015559948376, "loss": 3.0485, "step": 31979 }, { "epoch": 1.57, "grad_norm": 0.6126590967178345, "learning_rate": 0.0002788861979545641, "loss": 2.9728, "step": 31980 }, { "epoch": 1.57, "grad_norm": 0.6673457026481628, "learning_rate": 0.0002788708399699002, "loss": 2.889, "step": 31981 }, { "epoch": 1.57, "grad_norm": 0.6051563620567322, "learning_rate": 0.0002788554820408866, "loss": 3.1627, "step": 31982 }, { "epoch": 1.57, "grad_norm": 0.5865967273712158, "learning_rate": 0.00027884012416756347, "loss": 3.1364, "step": 31983 }, { "epoch": 1.57, "grad_norm": 0.6102021336555481, "learning_rate": 0.0002788247663499715, "loss": 3.0828, "step": 31984 }, { "epoch": 1.57, "grad_norm": 0.5698949694633484, "learning_rate": 0.0002788094085881509, "loss": 3.0331, "step": 31985 }, { "epoch": 1.57, "grad_norm": 0.5987421274185181, "learning_rate": 0.00027879405088214226, "loss": 3.156, "step": 31986 }, { "epoch": 1.57, "grad_norm": 0.6104761362075806, "learning_rate": 0.00027877869323198606, "loss": 3.0174, "step": 31987 }, { "epoch": 1.57, "grad_norm": 0.5552363395690918, "learning_rate": 0.0002787633356377227, "loss": 3.0127, "step": 31988 }, { "epoch": 1.57, "grad_norm": 0.5822331309318542, "learning_rate": 0.00027874797809939264, "loss": 3.1789, "step": 31989 }, { "epoch": 1.57, "grad_norm": 0.5854426622390747, "learning_rate": 0.0002787326206170362, "loss": 3.1524, "step": 31990 }, { "epoch": 1.57, "grad_norm": 0.6006827354431152, "learning_rate": 0.000278717263190694, "loss": 2.9734, "step": 31991 }, { "epoch": 1.57, "grad_norm": 0.5953511595726013, "learning_rate": 0.0002787019058204065, "loss": 2.9979, "step": 31992 }, { "epoch": 1.57, "grad_norm": 0.5749074220657349, "learning_rate": 0.00027868654850621397, "loss": 2.9802, "step": 31993 }, { "epoch": 1.57, "grad_norm": 0.638322114944458, "learning_rate": 0.0002786711912481571, "loss": 3.0781, "step": 31994 }, { "epoch": 1.57, "grad_norm": 0.5966150760650635, "learning_rate": 0.00027865583404627616, "loss": 2.9011, "step": 31995 }, { "epoch": 1.57, "grad_norm": 0.5970824956893921, "learning_rate": 0.00027864047690061153, "loss": 3.1345, "step": 31996 }, { "epoch": 1.57, "grad_norm": 0.6031424403190613, "learning_rate": 0.00027862511981120394, "loss": 3.2617, "step": 31997 }, { "epoch": 1.57, "grad_norm": 0.5995854735374451, "learning_rate": 0.0002786097627780936, "loss": 2.9957, "step": 31998 }, { "epoch": 1.57, "grad_norm": 0.6159862279891968, "learning_rate": 0.00027859440580132105, "loss": 2.9234, "step": 31999 }, { "epoch": 1.57, "grad_norm": 0.6073964238166809, "learning_rate": 0.0002785790488809267, "loss": 2.867, "step": 32000 }, { "epoch": 1.57, "grad_norm": 1.2414467334747314, "learning_rate": 0.00027856369201695097, "loss": 3.1922, "step": 32001 }, { "epoch": 1.57, "grad_norm": 0.616241455078125, "learning_rate": 0.0002785483352094344, "loss": 2.9149, "step": 32002 }, { "epoch": 1.57, "grad_norm": 0.6222730875015259, "learning_rate": 0.0002785329784584173, "loss": 3.1566, "step": 32003 }, { "epoch": 1.57, "grad_norm": 0.5811864137649536, "learning_rate": 0.0002785176217639404, "loss": 3.0071, "step": 32004 }, { "epoch": 1.57, "grad_norm": 0.7227095365524292, "learning_rate": 0.00027850226512604376, "loss": 2.9084, "step": 32005 }, { "epoch": 1.57, "grad_norm": 0.5832785964012146, "learning_rate": 0.00027848690854476805, "loss": 3.0637, "step": 32006 }, { "epoch": 1.57, "grad_norm": 0.6182896494865417, "learning_rate": 0.0002784715520201538, "loss": 3.1451, "step": 32007 }, { "epoch": 1.57, "grad_norm": 0.6133847832679749, "learning_rate": 0.0002784561955522412, "loss": 2.8646, "step": 32008 }, { "epoch": 1.57, "grad_norm": 0.6060160994529724, "learning_rate": 0.0002784408391410709, "loss": 3.0987, "step": 32009 }, { "epoch": 1.57, "grad_norm": 0.6133733987808228, "learning_rate": 0.0002784254827866833, "loss": 2.9392, "step": 32010 }, { "epoch": 1.57, "grad_norm": 0.5962675213813782, "learning_rate": 0.0002784101264891187, "loss": 3.1465, "step": 32011 }, { "epoch": 1.57, "grad_norm": 0.5825144648551941, "learning_rate": 0.00027839477024841784, "loss": 3.1008, "step": 32012 }, { "epoch": 1.57, "grad_norm": 0.5747712254524231, "learning_rate": 0.0002783794140646209, "loss": 3.088, "step": 32013 }, { "epoch": 1.57, "grad_norm": 0.624232292175293, "learning_rate": 0.0002783640579377685, "loss": 3.1401, "step": 32014 }, { "epoch": 1.57, "grad_norm": 0.6441041231155396, "learning_rate": 0.00027834870186790086, "loss": 2.9997, "step": 32015 }, { "epoch": 1.57, "grad_norm": 0.6096225380897522, "learning_rate": 0.00027833334585505875, "loss": 2.6779, "step": 32016 }, { "epoch": 1.57, "grad_norm": 0.6041115522384644, "learning_rate": 0.0002783179898992824, "loss": 3.0651, "step": 32017 }, { "epoch": 1.57, "grad_norm": 0.6201046705245972, "learning_rate": 0.0002783026340006121, "loss": 2.9325, "step": 32018 }, { "epoch": 1.57, "grad_norm": 0.5794186592102051, "learning_rate": 0.0002782872781590887, "loss": 3.0974, "step": 32019 }, { "epoch": 1.57, "grad_norm": 0.5904180407524109, "learning_rate": 0.0002782719223747524, "loss": 3.1501, "step": 32020 }, { "epoch": 1.57, "grad_norm": 0.5724608898162842, "learning_rate": 0.0002782565666476437, "loss": 3.0647, "step": 32021 }, { "epoch": 1.57, "grad_norm": 0.6130523681640625, "learning_rate": 0.00027824121097780286, "loss": 2.9517, "step": 32022 }, { "epoch": 1.57, "grad_norm": 0.5855075716972351, "learning_rate": 0.00027822585536527053, "loss": 2.8407, "step": 32023 }, { "epoch": 1.57, "grad_norm": 0.5709105730056763, "learning_rate": 0.0002782104998100872, "loss": 3.1079, "step": 32024 }, { "epoch": 1.57, "grad_norm": 0.6038665175437927, "learning_rate": 0.00027819514431229314, "loss": 2.9033, "step": 32025 }, { "epoch": 1.57, "grad_norm": 0.6230751872062683, "learning_rate": 0.00027817978887192897, "loss": 2.9427, "step": 32026 }, { "epoch": 1.57, "grad_norm": 0.5995590090751648, "learning_rate": 0.00027816443348903494, "loss": 3.0794, "step": 32027 }, { "epoch": 1.57, "grad_norm": 0.5960817337036133, "learning_rate": 0.0002781490781636516, "loss": 3.079, "step": 32028 }, { "epoch": 1.57, "grad_norm": 0.5772078633308411, "learning_rate": 0.0002781337228958195, "loss": 2.9819, "step": 32029 }, { "epoch": 1.57, "grad_norm": 0.5906760692596436, "learning_rate": 0.0002781183676855788, "loss": 3.1876, "step": 32030 }, { "epoch": 1.57, "grad_norm": 0.6208398938179016, "learning_rate": 0.00027810301253297024, "loss": 2.9969, "step": 32031 }, { "epoch": 1.57, "grad_norm": 0.6172988414764404, "learning_rate": 0.0002780876574380341, "loss": 3.1743, "step": 32032 }, { "epoch": 1.57, "grad_norm": 0.6380127668380737, "learning_rate": 0.0002780723024008107, "loss": 3.0405, "step": 32033 }, { "epoch": 1.57, "grad_norm": 0.639022946357727, "learning_rate": 0.0002780569474213408, "loss": 2.8838, "step": 32034 }, { "epoch": 1.57, "grad_norm": 0.6695070862770081, "learning_rate": 0.00027804159249966465, "loss": 2.9628, "step": 32035 }, { "epoch": 1.57, "grad_norm": 0.6148467063903809, "learning_rate": 0.0002780262376358228, "loss": 2.9927, "step": 32036 }, { "epoch": 1.57, "grad_norm": 0.6534361839294434, "learning_rate": 0.0002780108828298554, "loss": 3.1393, "step": 32037 }, { "epoch": 1.57, "grad_norm": 0.5975500345230103, "learning_rate": 0.00027799552808180323, "loss": 3.2256, "step": 32038 }, { "epoch": 1.57, "grad_norm": 0.6091464161872864, "learning_rate": 0.00027798017339170664, "loss": 2.9941, "step": 32039 }, { "epoch": 1.57, "grad_norm": 0.5968720316886902, "learning_rate": 0.00027796481875960597, "loss": 3.15, "step": 32040 }, { "epoch": 1.57, "grad_norm": 0.5805733799934387, "learning_rate": 0.00027794946418554183, "loss": 3.0558, "step": 32041 }, { "epoch": 1.57, "grad_norm": 0.638577401638031, "learning_rate": 0.00027793410966955444, "loss": 3.0992, "step": 32042 }, { "epoch": 1.57, "grad_norm": 0.5825785398483276, "learning_rate": 0.0002779187552116843, "loss": 3.1721, "step": 32043 }, { "epoch": 1.57, "grad_norm": 0.6435587406158447, "learning_rate": 0.00027790340081197207, "loss": 3.0042, "step": 32044 }, { "epoch": 1.57, "grad_norm": 0.5820013880729675, "learning_rate": 0.0002778880464704579, "loss": 2.998, "step": 32045 }, { "epoch": 1.57, "grad_norm": 0.5931047797203064, "learning_rate": 0.0002778726921871825, "loss": 3.1897, "step": 32046 }, { "epoch": 1.57, "grad_norm": 0.6294622421264648, "learning_rate": 0.0002778573379621861, "loss": 2.9959, "step": 32047 }, { "epoch": 1.57, "grad_norm": 0.5972573161125183, "learning_rate": 0.0002778419837955091, "loss": 3.0585, "step": 32048 }, { "epoch": 1.57, "grad_norm": 0.6380653381347656, "learning_rate": 0.0002778266296871922, "loss": 3.0712, "step": 32049 }, { "epoch": 1.57, "grad_norm": 0.6002389192581177, "learning_rate": 0.0002778112756372757, "loss": 3.1067, "step": 32050 }, { "epoch": 1.57, "grad_norm": 0.6016014218330383, "learning_rate": 0.0002777959216458, "loss": 3.0874, "step": 32051 }, { "epoch": 1.57, "grad_norm": 0.5852669477462769, "learning_rate": 0.00027778056771280546, "loss": 3.1815, "step": 32052 }, { "epoch": 1.57, "grad_norm": 0.6085726022720337, "learning_rate": 0.0002777652138383327, "loss": 3.1229, "step": 32053 }, { "epoch": 1.57, "grad_norm": 0.6045544147491455, "learning_rate": 0.0002777498600224222, "loss": 2.7204, "step": 32054 }, { "epoch": 1.57, "grad_norm": 0.5995080471038818, "learning_rate": 0.0002777345062651141, "loss": 3.1933, "step": 32055 }, { "epoch": 1.57, "grad_norm": 0.6304450035095215, "learning_rate": 0.00027771915256644917, "loss": 2.8745, "step": 32056 }, { "epoch": 1.57, "grad_norm": 0.5954857468605042, "learning_rate": 0.00027770379892646765, "loss": 2.9642, "step": 32057 }, { "epoch": 1.57, "grad_norm": 0.6180627346038818, "learning_rate": 0.00027768844534520993, "loss": 2.9956, "step": 32058 }, { "epoch": 1.57, "grad_norm": 0.5869137644767761, "learning_rate": 0.00027767309182271673, "loss": 2.8287, "step": 32059 }, { "epoch": 1.57, "grad_norm": 0.5612866282463074, "learning_rate": 0.0002776577383590282, "loss": 2.8453, "step": 32060 }, { "epoch": 1.57, "grad_norm": 0.5565618872642517, "learning_rate": 0.000277642384954185, "loss": 3.1548, "step": 32061 }, { "epoch": 1.57, "grad_norm": 0.6152032017707825, "learning_rate": 0.0002776270316082273, "loss": 2.8864, "step": 32062 }, { "epoch": 1.57, "grad_norm": 0.5819271206855774, "learning_rate": 0.0002776116783211957, "loss": 3.0542, "step": 32063 }, { "epoch": 1.57, "grad_norm": 0.5854024887084961, "learning_rate": 0.0002775963250931308, "loss": 2.9896, "step": 32064 }, { "epoch": 1.57, "grad_norm": 0.5745155215263367, "learning_rate": 0.0002775809719240727, "loss": 2.971, "step": 32065 }, { "epoch": 1.57, "grad_norm": 0.5686827898025513, "learning_rate": 0.00027756561881406213, "loss": 3.1338, "step": 32066 }, { "epoch": 1.57, "grad_norm": 0.5938876867294312, "learning_rate": 0.0002775502657631393, "loss": 3.1614, "step": 32067 }, { "epoch": 1.57, "grad_norm": 0.5606462359428406, "learning_rate": 0.00027753491277134474, "loss": 3.1713, "step": 32068 }, { "epoch": 1.57, "grad_norm": 0.6324983239173889, "learning_rate": 0.00027751955983871905, "loss": 3.2225, "step": 32069 }, { "epoch": 1.57, "grad_norm": 0.5873457193374634, "learning_rate": 0.0002775042069653023, "loss": 2.8439, "step": 32070 }, { "epoch": 1.57, "grad_norm": 0.5858466625213623, "learning_rate": 0.0002774888541511353, "loss": 3.1174, "step": 32071 }, { "epoch": 1.57, "grad_norm": 0.6003888249397278, "learning_rate": 0.0002774735013962583, "loss": 3.1971, "step": 32072 }, { "epoch": 1.57, "grad_norm": 0.574512243270874, "learning_rate": 0.00027745814870071164, "loss": 2.901, "step": 32073 }, { "epoch": 1.57, "grad_norm": 0.5840027928352356, "learning_rate": 0.00027744279606453605, "loss": 2.9237, "step": 32074 }, { "epoch": 1.57, "grad_norm": 0.5972057580947876, "learning_rate": 0.0002774274434877717, "loss": 3.0789, "step": 32075 }, { "epoch": 1.57, "grad_norm": 0.6341174840927124, "learning_rate": 0.0002774120909704592, "loss": 3.0405, "step": 32076 }, { "epoch": 1.57, "grad_norm": 0.6080915331840515, "learning_rate": 0.0002773967385126388, "loss": 3.1676, "step": 32077 }, { "epoch": 1.57, "grad_norm": 0.5747969150543213, "learning_rate": 0.00027738138611435104, "loss": 3.1673, "step": 32078 }, { "epoch": 1.57, "grad_norm": 0.6224098205566406, "learning_rate": 0.00027736603377563645, "loss": 3.1983, "step": 32079 }, { "epoch": 1.57, "grad_norm": 0.6277689337730408, "learning_rate": 0.0002773506814965353, "loss": 2.974, "step": 32080 }, { "epoch": 1.57, "grad_norm": 0.6136027574539185, "learning_rate": 0.00027733532927708815, "loss": 3.1981, "step": 32081 }, { "epoch": 1.57, "grad_norm": 0.6523263454437256, "learning_rate": 0.00027731997711733533, "loss": 3.1751, "step": 32082 }, { "epoch": 1.57, "grad_norm": 0.6148524880409241, "learning_rate": 0.0002773046250173173, "loss": 2.9798, "step": 32083 }, { "epoch": 1.57, "grad_norm": 0.5856233835220337, "learning_rate": 0.0002772892729770746, "loss": 2.8006, "step": 32084 }, { "epoch": 1.57, "grad_norm": 0.5549863576889038, "learning_rate": 0.00027727392099664746, "loss": 3.0683, "step": 32085 }, { "epoch": 1.57, "grad_norm": 0.6235625743865967, "learning_rate": 0.0002772585690760766, "loss": 3.0228, "step": 32086 }, { "epoch": 1.57, "grad_norm": 0.6109411716461182, "learning_rate": 0.00027724321721540223, "loss": 3.031, "step": 32087 }, { "epoch": 1.57, "grad_norm": 0.641261637210846, "learning_rate": 0.00027722786541466474, "loss": 3.1146, "step": 32088 }, { "epoch": 1.57, "grad_norm": 0.6297993063926697, "learning_rate": 0.00027721251367390483, "loss": 2.9968, "step": 32089 }, { "epoch": 1.57, "grad_norm": 0.6300150156021118, "learning_rate": 0.0002771971619931627, "loss": 3.0939, "step": 32090 }, { "epoch": 1.57, "grad_norm": 0.6364719271659851, "learning_rate": 0.00027718181037247896, "loss": 3.026, "step": 32091 }, { "epoch": 1.57, "grad_norm": 0.613513171672821, "learning_rate": 0.0002771664588118937, "loss": 2.8987, "step": 32092 }, { "epoch": 1.57, "grad_norm": 0.6249509453773499, "learning_rate": 0.0002771511073114478, "loss": 2.9277, "step": 32093 }, { "epoch": 1.57, "grad_norm": 0.605344295501709, "learning_rate": 0.0002771357558711815, "loss": 2.9627, "step": 32094 }, { "epoch": 1.57, "grad_norm": 0.7609037160873413, "learning_rate": 0.00027712040449113503, "loss": 2.957, "step": 32095 }, { "epoch": 1.57, "grad_norm": 0.5966310501098633, "learning_rate": 0.0002771050531713492, "loss": 3.1181, "step": 32096 }, { "epoch": 1.57, "grad_norm": 0.6008804440498352, "learning_rate": 0.00027708970191186414, "loss": 2.9581, "step": 32097 }, { "epoch": 1.57, "grad_norm": 0.5897909998893738, "learning_rate": 0.0002770743507127205, "loss": 3.3047, "step": 32098 }, { "epoch": 1.57, "grad_norm": 0.590558648109436, "learning_rate": 0.0002770589995739585, "loss": 3.0585, "step": 32099 }, { "epoch": 1.57, "grad_norm": 0.5807520747184753, "learning_rate": 0.0002770436484956187, "loss": 2.9507, "step": 32100 }, { "epoch": 1.57, "grad_norm": 0.6075061559677124, "learning_rate": 0.0002770282974777416, "loss": 2.8376, "step": 32101 }, { "epoch": 1.57, "grad_norm": 0.6187633275985718, "learning_rate": 0.0002770129465203674, "loss": 3.172, "step": 32102 }, { "epoch": 1.57, "grad_norm": 0.5896881222724915, "learning_rate": 0.00027699759562353684, "loss": 3.0375, "step": 32103 }, { "epoch": 1.57, "grad_norm": 0.5933699011802673, "learning_rate": 0.00027698224478729005, "loss": 3.0564, "step": 32104 }, { "epoch": 1.57, "grad_norm": 0.5940541625022888, "learning_rate": 0.0002769668940116676, "loss": 2.9197, "step": 32105 }, { "epoch": 1.57, "grad_norm": 0.6250037550926208, "learning_rate": 0.00027695154329671003, "loss": 3.0014, "step": 32106 }, { "epoch": 1.57, "grad_norm": 0.6533704400062561, "learning_rate": 0.0002769361926424575, "loss": 2.8502, "step": 32107 }, { "epoch": 1.57, "grad_norm": 0.5797675848007202, "learning_rate": 0.00027692084204895075, "loss": 3.0491, "step": 32108 }, { "epoch": 1.57, "grad_norm": 0.5860679745674133, "learning_rate": 0.00027690549151623, "loss": 3.176, "step": 32109 }, { "epoch": 1.57, "grad_norm": 0.6044549345970154, "learning_rate": 0.0002768901410443357, "loss": 2.8871, "step": 32110 }, { "epoch": 1.57, "grad_norm": 0.6011185646057129, "learning_rate": 0.0002768747906333084, "loss": 3.0566, "step": 32111 }, { "epoch": 1.57, "grad_norm": 0.5532136559486389, "learning_rate": 0.00027685944028318845, "loss": 2.9295, "step": 32112 }, { "epoch": 1.57, "grad_norm": 0.5941985249519348, "learning_rate": 0.00027684408999401625, "loss": 2.9165, "step": 32113 }, { "epoch": 1.57, "grad_norm": 0.599427342414856, "learning_rate": 0.0002768287397658322, "loss": 3.1695, "step": 32114 }, { "epoch": 1.57, "grad_norm": 0.6106255650520325, "learning_rate": 0.00027681338959867683, "loss": 3.0471, "step": 32115 }, { "epoch": 1.57, "grad_norm": 0.6107386946678162, "learning_rate": 0.0002767980394925906, "loss": 3.2058, "step": 32116 }, { "epoch": 1.57, "grad_norm": 0.6064965128898621, "learning_rate": 0.00027678268944761374, "loss": 2.885, "step": 32117 }, { "epoch": 1.57, "grad_norm": 0.6419435143470764, "learning_rate": 0.00027676733946378697, "loss": 3.086, "step": 32118 }, { "epoch": 1.57, "grad_norm": 0.6065117716789246, "learning_rate": 0.00027675198954115046, "loss": 3.0474, "step": 32119 }, { "epoch": 1.57, "grad_norm": 0.7930900454521179, "learning_rate": 0.0002767366396797447, "loss": 2.9981, "step": 32120 }, { "epoch": 1.57, "grad_norm": 0.6305885910987854, "learning_rate": 0.00027672128987961023, "loss": 3.0314, "step": 32121 }, { "epoch": 1.57, "grad_norm": 0.599500834941864, "learning_rate": 0.00027670594014078735, "loss": 3.3042, "step": 32122 }, { "epoch": 1.57, "grad_norm": 0.6495477557182312, "learning_rate": 0.00027669059046331665, "loss": 3.0577, "step": 32123 }, { "epoch": 1.57, "grad_norm": 0.5801239013671875, "learning_rate": 0.00027667524084723825, "loss": 2.8206, "step": 32124 }, { "epoch": 1.57, "grad_norm": 0.6565015316009521, "learning_rate": 0.00027665989129259285, "loss": 2.8886, "step": 32125 }, { "epoch": 1.57, "grad_norm": 0.612785279750824, "learning_rate": 0.00027664454179942096, "loss": 3.0586, "step": 32126 }, { "epoch": 1.57, "grad_norm": 0.6405092477798462, "learning_rate": 0.00027662919236776264, "loss": 3.2097, "step": 32127 }, { "epoch": 1.57, "grad_norm": 0.6418353319168091, "learning_rate": 0.0002766138429976587, "loss": 2.8991, "step": 32128 }, { "epoch": 1.57, "grad_norm": 0.598755419254303, "learning_rate": 0.00027659849368914926, "loss": 3.0289, "step": 32129 }, { "epoch": 1.57, "grad_norm": 0.6165720820426941, "learning_rate": 0.00027658314444227493, "loss": 3.2701, "step": 32130 }, { "epoch": 1.57, "grad_norm": 0.6199347972869873, "learning_rate": 0.00027656779525707614, "loss": 2.834, "step": 32131 }, { "epoch": 1.57, "grad_norm": 0.6760289669036865, "learning_rate": 0.0002765524461335932, "loss": 3.128, "step": 32132 }, { "epoch": 1.57, "grad_norm": 0.6090681552886963, "learning_rate": 0.0002765370970718667, "loss": 2.799, "step": 32133 }, { "epoch": 1.57, "grad_norm": 0.6263375282287598, "learning_rate": 0.0002765217480719369, "loss": 2.939, "step": 32134 }, { "epoch": 1.57, "grad_norm": 0.5948789715766907, "learning_rate": 0.00027650639913384423, "loss": 2.971, "step": 32135 }, { "epoch": 1.57, "grad_norm": 0.6212787628173828, "learning_rate": 0.0002764910502576293, "loss": 3.0933, "step": 32136 }, { "epoch": 1.57, "grad_norm": 0.6157195568084717, "learning_rate": 0.0002764757014433324, "loss": 2.8863, "step": 32137 }, { "epoch": 1.58, "grad_norm": 0.6237986087799072, "learning_rate": 0.000276460352690994, "loss": 3.1061, "step": 32138 }, { "epoch": 1.58, "grad_norm": 0.6217524409294128, "learning_rate": 0.00027644500400065435, "loss": 3.0781, "step": 32139 }, { "epoch": 1.58, "grad_norm": 0.6218914985656738, "learning_rate": 0.00027642965537235417, "loss": 2.946, "step": 32140 }, { "epoch": 1.58, "grad_norm": 0.6007027626037598, "learning_rate": 0.0002764143068061337, "loss": 3.0428, "step": 32141 }, { "epoch": 1.58, "grad_norm": 0.5837012529373169, "learning_rate": 0.0002763989583020334, "loss": 2.8888, "step": 32142 }, { "epoch": 1.58, "grad_norm": 0.5839048624038696, "learning_rate": 0.00027638360986009374, "loss": 3.089, "step": 32143 }, { "epoch": 1.58, "grad_norm": 0.6786932945251465, "learning_rate": 0.0002763682614803551, "loss": 2.9294, "step": 32144 }, { "epoch": 1.58, "grad_norm": 0.6156800985336304, "learning_rate": 0.0002763529131628578, "loss": 3.2307, "step": 32145 }, { "epoch": 1.58, "grad_norm": 0.6580122709274292, "learning_rate": 0.0002763375649076425, "loss": 2.9384, "step": 32146 }, { "epoch": 1.58, "grad_norm": 0.6277601718902588, "learning_rate": 0.0002763222167147494, "loss": 2.9617, "step": 32147 }, { "epoch": 1.58, "grad_norm": 0.6300899386405945, "learning_rate": 0.0002763068685842191, "loss": 3.0255, "step": 32148 }, { "epoch": 1.58, "grad_norm": 0.5806252956390381, "learning_rate": 0.000276291520516092, "loss": 2.7774, "step": 32149 }, { "epoch": 1.58, "grad_norm": 0.5649163126945496, "learning_rate": 0.0002762761725104083, "loss": 3.0464, "step": 32150 }, { "epoch": 1.58, "grad_norm": 0.6552037000656128, "learning_rate": 0.0002762608245672088, "loss": 2.938, "step": 32151 }, { "epoch": 1.58, "grad_norm": 0.5685431957244873, "learning_rate": 0.0002762454766865336, "loss": 3.0988, "step": 32152 }, { "epoch": 1.58, "grad_norm": 0.5887003540992737, "learning_rate": 0.00027623012886842334, "loss": 2.9508, "step": 32153 }, { "epoch": 1.58, "grad_norm": 0.6270775198936462, "learning_rate": 0.0002762147811129182, "loss": 3.3599, "step": 32154 }, { "epoch": 1.58, "grad_norm": 0.5925376415252686, "learning_rate": 0.0002761994334200588, "loss": 2.8112, "step": 32155 }, { "epoch": 1.58, "grad_norm": 0.6008676290512085, "learning_rate": 0.00027618408578988557, "loss": 3.0296, "step": 32156 }, { "epoch": 1.58, "grad_norm": 0.6249087452888489, "learning_rate": 0.0002761687382224388, "loss": 3.0094, "step": 32157 }, { "epoch": 1.58, "grad_norm": 0.6997944116592407, "learning_rate": 0.00027615339071775905, "loss": 2.9996, "step": 32158 }, { "epoch": 1.58, "grad_norm": 0.6397016644477844, "learning_rate": 0.00027613804327588666, "loss": 3.184, "step": 32159 }, { "epoch": 1.58, "grad_norm": 0.6241958141326904, "learning_rate": 0.000276122695896862, "loss": 3.1826, "step": 32160 }, { "epoch": 1.58, "grad_norm": 0.5984312295913696, "learning_rate": 0.0002761073485807257, "loss": 2.841, "step": 32161 }, { "epoch": 1.58, "grad_norm": 0.6074608564376831, "learning_rate": 0.00027609200132751793, "loss": 2.9895, "step": 32162 }, { "epoch": 1.58, "grad_norm": 0.6068947315216064, "learning_rate": 0.0002760766541372794, "loss": 2.7853, "step": 32163 }, { "epoch": 1.58, "grad_norm": 0.6099889278411865, "learning_rate": 0.0002760613070100501, "loss": 3.0638, "step": 32164 }, { "epoch": 1.58, "grad_norm": 0.640886127948761, "learning_rate": 0.0002760459599458709, "loss": 3.0396, "step": 32165 }, { "epoch": 1.58, "grad_norm": 0.6190171241760254, "learning_rate": 0.00027603061294478203, "loss": 3.0839, "step": 32166 }, { "epoch": 1.58, "grad_norm": 0.5623420476913452, "learning_rate": 0.0002760152660068238, "loss": 2.9209, "step": 32167 }, { "epoch": 1.58, "grad_norm": 0.588102400302887, "learning_rate": 0.0002759999191320368, "loss": 3.0652, "step": 32168 }, { "epoch": 1.58, "grad_norm": 0.5879389047622681, "learning_rate": 0.0002759845723204613, "loss": 2.8914, "step": 32169 }, { "epoch": 1.58, "grad_norm": 0.6012025475502014, "learning_rate": 0.000275969225572138, "loss": 3.1915, "step": 32170 }, { "epoch": 1.58, "grad_norm": 0.6043398380279541, "learning_rate": 0.00027595387888710703, "loss": 3.0019, "step": 32171 }, { "epoch": 1.58, "grad_norm": 0.6456342935562134, "learning_rate": 0.00027593853226540887, "loss": 2.8778, "step": 32172 }, { "epoch": 1.58, "grad_norm": 0.5889996886253357, "learning_rate": 0.0002759231857070841, "loss": 2.9956, "step": 32173 }, { "epoch": 1.58, "grad_norm": 0.697205126285553, "learning_rate": 0.00027590783921217294, "loss": 3.0563, "step": 32174 }, { "epoch": 1.58, "grad_norm": 0.5691419243812561, "learning_rate": 0.000275892492780716, "loss": 3.1269, "step": 32175 }, { "epoch": 1.58, "grad_norm": 0.6309320330619812, "learning_rate": 0.00027587714641275345, "loss": 2.7833, "step": 32176 }, { "epoch": 1.58, "grad_norm": 0.5982531905174255, "learning_rate": 0.0002758618001083259, "loss": 2.9703, "step": 32177 }, { "epoch": 1.58, "grad_norm": 0.5961381793022156, "learning_rate": 0.0002758464538674738, "loss": 3.017, "step": 32178 }, { "epoch": 1.58, "grad_norm": 0.6061530113220215, "learning_rate": 0.00027583110769023735, "loss": 3.2634, "step": 32179 }, { "epoch": 1.58, "grad_norm": 0.5718122124671936, "learning_rate": 0.00027581576157665726, "loss": 3.1522, "step": 32180 }, { "epoch": 1.58, "grad_norm": 0.632178544998169, "learning_rate": 0.0002758004155267738, "loss": 2.8639, "step": 32181 }, { "epoch": 1.58, "grad_norm": 0.6122092008590698, "learning_rate": 0.0002757850695406272, "loss": 2.8233, "step": 32182 }, { "epoch": 1.58, "grad_norm": 0.6195982694625854, "learning_rate": 0.00027576972361825823, "loss": 3.092, "step": 32183 }, { "epoch": 1.58, "grad_norm": 0.6117836833000183, "learning_rate": 0.0002757543777597071, "loss": 3.2046, "step": 32184 }, { "epoch": 1.58, "grad_norm": 0.6399086117744446, "learning_rate": 0.0002757390319650144, "loss": 3.0408, "step": 32185 }, { "epoch": 1.58, "grad_norm": 0.6001808047294617, "learning_rate": 0.0002757236862342202, "loss": 2.9358, "step": 32186 }, { "epoch": 1.58, "grad_norm": 0.5729873776435852, "learning_rate": 0.00027570834056736516, "loss": 3.1652, "step": 32187 }, { "epoch": 1.58, "grad_norm": 0.5765482187271118, "learning_rate": 0.00027569299496448983, "loss": 3.0379, "step": 32188 }, { "epoch": 1.58, "grad_norm": 0.5675467848777771, "learning_rate": 0.00027567764942563436, "loss": 3.0612, "step": 32189 }, { "epoch": 1.58, "grad_norm": 0.5885112285614014, "learning_rate": 0.0002756623039508394, "loss": 2.9927, "step": 32190 }, { "epoch": 1.58, "grad_norm": 0.6263838410377502, "learning_rate": 0.0002756469585401451, "loss": 3.1661, "step": 32191 }, { "epoch": 1.58, "grad_norm": 0.580174446105957, "learning_rate": 0.00027563161319359207, "loss": 3.057, "step": 32192 }, { "epoch": 1.58, "grad_norm": 0.5984945297241211, "learning_rate": 0.0002756162679112208, "loss": 3.1215, "step": 32193 }, { "epoch": 1.58, "grad_norm": 0.588066041469574, "learning_rate": 0.0002756009226930714, "loss": 3.1108, "step": 32194 }, { "epoch": 1.58, "grad_norm": 0.6735916137695312, "learning_rate": 0.0002755855775391846, "loss": 2.7924, "step": 32195 }, { "epoch": 1.58, "grad_norm": 0.6410754919052124, "learning_rate": 0.0002755702324496007, "loss": 2.906, "step": 32196 }, { "epoch": 1.58, "grad_norm": 0.5920937061309814, "learning_rate": 0.00027555488742436, "loss": 3.1199, "step": 32197 }, { "epoch": 1.58, "grad_norm": 0.5957187414169312, "learning_rate": 0.0002755395424635031, "loss": 2.8848, "step": 32198 }, { "epoch": 1.58, "grad_norm": 0.5822932124137878, "learning_rate": 0.0002755241975670703, "loss": 2.7998, "step": 32199 }, { "epoch": 1.58, "grad_norm": 0.6525944471359253, "learning_rate": 0.00027550885273510214, "loss": 3.3601, "step": 32200 }, { "epoch": 1.58, "grad_norm": 0.6136645674705505, "learning_rate": 0.0002754935079676388, "loss": 2.8207, "step": 32201 }, { "epoch": 1.58, "grad_norm": 0.5625642538070679, "learning_rate": 0.0002754781632647209, "loss": 2.9618, "step": 32202 }, { "epoch": 1.58, "grad_norm": 0.6007310748100281, "learning_rate": 0.0002754628186263889, "loss": 3.0686, "step": 32203 }, { "epoch": 1.58, "grad_norm": 0.5860627889633179, "learning_rate": 0.00027544747405268297, "loss": 2.9696, "step": 32204 }, { "epoch": 1.58, "grad_norm": 0.6014703512191772, "learning_rate": 0.00027543212954364376, "loss": 3.1983, "step": 32205 }, { "epoch": 1.58, "grad_norm": 0.6181617975234985, "learning_rate": 0.0002754167850993116, "loss": 3.234, "step": 32206 }, { "epoch": 1.58, "grad_norm": 0.6260382533073425, "learning_rate": 0.00027540144071972676, "loss": 2.9967, "step": 32207 }, { "epoch": 1.58, "grad_norm": 0.5667868256568909, "learning_rate": 0.00027538609640492996, "loss": 2.902, "step": 32208 }, { "epoch": 1.58, "grad_norm": 0.5965689420700073, "learning_rate": 0.0002753707521549613, "loss": 3.1618, "step": 32209 }, { "epoch": 1.58, "grad_norm": 0.6163270473480225, "learning_rate": 0.0002753554079698615, "loss": 2.993, "step": 32210 }, { "epoch": 1.58, "grad_norm": 0.6091551780700684, "learning_rate": 0.0002753400638496707, "loss": 3.1026, "step": 32211 }, { "epoch": 1.58, "grad_norm": 0.615360677242279, "learning_rate": 0.00027532471979442943, "loss": 3.3256, "step": 32212 }, { "epoch": 1.58, "grad_norm": 0.58626389503479, "learning_rate": 0.00027530937580417814, "loss": 3.1256, "step": 32213 }, { "epoch": 1.58, "grad_norm": 0.6162660717964172, "learning_rate": 0.0002752940318789572, "loss": 2.9119, "step": 32214 }, { "epoch": 1.58, "grad_norm": 0.630847692489624, "learning_rate": 0.0002752786880188071, "loss": 3.0364, "step": 32215 }, { "epoch": 1.58, "grad_norm": 0.5829058289527893, "learning_rate": 0.000275263344223768, "loss": 3.141, "step": 32216 }, { "epoch": 1.58, "grad_norm": 0.6056093573570251, "learning_rate": 0.00027524800049388056, "loss": 3.0728, "step": 32217 }, { "epoch": 1.58, "grad_norm": 0.6000507473945618, "learning_rate": 0.0002752326568291852, "loss": 2.8014, "step": 32218 }, { "epoch": 1.58, "grad_norm": 0.5783975124359131, "learning_rate": 0.0002752173132297221, "loss": 2.9118, "step": 32219 }, { "epoch": 1.58, "grad_norm": 0.5872189402580261, "learning_rate": 0.0002752019696955321, "loss": 2.8498, "step": 32220 }, { "epoch": 1.58, "grad_norm": 0.6652278900146484, "learning_rate": 0.0002751866262266551, "loss": 3.0035, "step": 32221 }, { "epoch": 1.58, "grad_norm": 0.5831838846206665, "learning_rate": 0.0002751712828231317, "loss": 3.2491, "step": 32222 }, { "epoch": 1.58, "grad_norm": 0.6078551411628723, "learning_rate": 0.00027515593948500256, "loss": 3.0388, "step": 32223 }, { "epoch": 1.58, "grad_norm": 0.5786187648773193, "learning_rate": 0.0002751405962123078, "loss": 2.9939, "step": 32224 }, { "epoch": 1.58, "grad_norm": 0.5971993207931519, "learning_rate": 0.00027512525300508805, "loss": 3.2239, "step": 32225 }, { "epoch": 1.58, "grad_norm": 0.5976489782333374, "learning_rate": 0.0002751099098633834, "loss": 3.0328, "step": 32226 }, { "epoch": 1.58, "grad_norm": 0.5616191029548645, "learning_rate": 0.0002750945667872345, "loss": 2.9874, "step": 32227 }, { "epoch": 1.58, "grad_norm": 0.6311202049255371, "learning_rate": 0.0002750792237766819, "loss": 2.9954, "step": 32228 }, { "epoch": 1.58, "grad_norm": 0.5888649821281433, "learning_rate": 0.0002750638808317656, "loss": 3.1133, "step": 32229 }, { "epoch": 1.58, "grad_norm": 0.6184988617897034, "learning_rate": 0.0002750485379525264, "loss": 3.1047, "step": 32230 }, { "epoch": 1.58, "grad_norm": 0.6103339791297913, "learning_rate": 0.0002750331951390044, "loss": 3.2664, "step": 32231 }, { "epoch": 1.58, "grad_norm": 0.5738555788993835, "learning_rate": 0.0002750178523912402, "loss": 3.1224, "step": 32232 }, { "epoch": 1.58, "grad_norm": 0.6099984049797058, "learning_rate": 0.00027500250970927424, "loss": 3.1296, "step": 32233 }, { "epoch": 1.58, "grad_norm": 0.6913991570472717, "learning_rate": 0.0002749871670931468, "loss": 2.9424, "step": 32234 }, { "epoch": 1.58, "grad_norm": 0.5921345353126526, "learning_rate": 0.0002749718245428984, "loss": 3.1274, "step": 32235 }, { "epoch": 1.58, "grad_norm": 0.5641476511955261, "learning_rate": 0.00027495648205856936, "loss": 3.1257, "step": 32236 }, { "epoch": 1.58, "grad_norm": 0.6268711090087891, "learning_rate": 0.0002749411396402001, "loss": 3.0799, "step": 32237 }, { "epoch": 1.58, "grad_norm": 0.6031218767166138, "learning_rate": 0.0002749257972878311, "loss": 2.9939, "step": 32238 }, { "epoch": 1.58, "grad_norm": 0.5958530902862549, "learning_rate": 0.00027491045500150274, "loss": 3.1693, "step": 32239 }, { "epoch": 1.58, "grad_norm": 0.5875588059425354, "learning_rate": 0.00027489511278125546, "loss": 3.2846, "step": 32240 }, { "epoch": 1.58, "grad_norm": 0.5789769887924194, "learning_rate": 0.0002748797706271295, "loss": 2.9361, "step": 32241 }, { "epoch": 1.58, "grad_norm": 0.5652769207954407, "learning_rate": 0.0002748644285391654, "loss": 3.0612, "step": 32242 }, { "epoch": 1.58, "grad_norm": 0.57442307472229, "learning_rate": 0.00027484908651740367, "loss": 3.177, "step": 32243 }, { "epoch": 1.58, "grad_norm": 0.6496897339820862, "learning_rate": 0.00027483374456188447, "loss": 2.985, "step": 32244 }, { "epoch": 1.58, "grad_norm": 0.5900055170059204, "learning_rate": 0.00027481840267264845, "loss": 2.9613, "step": 32245 }, { "epoch": 1.58, "grad_norm": 0.6131547093391418, "learning_rate": 0.0002748030608497359, "loss": 3.0876, "step": 32246 }, { "epoch": 1.58, "grad_norm": 0.6449847221374512, "learning_rate": 0.00027478771909318725, "loss": 2.7354, "step": 32247 }, { "epoch": 1.58, "grad_norm": 0.5974999666213989, "learning_rate": 0.0002747723774030428, "loss": 3.2546, "step": 32248 }, { "epoch": 1.58, "grad_norm": 0.6547728776931763, "learning_rate": 0.000274757035779343, "loss": 3.1246, "step": 32249 }, { "epoch": 1.58, "grad_norm": 0.6210682988166809, "learning_rate": 0.00027474169422212846, "loss": 3.1677, "step": 32250 }, { "epoch": 1.58, "grad_norm": 0.924889326095581, "learning_rate": 0.00027472635273143937, "loss": 3.046, "step": 32251 }, { "epoch": 1.58, "grad_norm": 0.5950158834457397, "learning_rate": 0.0002747110113073163, "loss": 3.2527, "step": 32252 }, { "epoch": 1.58, "grad_norm": 0.6571494340896606, "learning_rate": 0.00027469566994979945, "loss": 3.0866, "step": 32253 }, { "epoch": 1.58, "grad_norm": 0.6341148614883423, "learning_rate": 0.00027468032865892936, "loss": 2.8808, "step": 32254 }, { "epoch": 1.58, "grad_norm": 0.5878071188926697, "learning_rate": 0.0002746649874347465, "loss": 2.9232, "step": 32255 }, { "epoch": 1.58, "grad_norm": 0.6338520646095276, "learning_rate": 0.00027464964627729103, "loss": 3.1164, "step": 32256 }, { "epoch": 1.58, "grad_norm": 0.5606010556221008, "learning_rate": 0.00027463430518660364, "loss": 3.1759, "step": 32257 }, { "epoch": 1.58, "grad_norm": 0.6088926196098328, "learning_rate": 0.00027461896416272456, "loss": 3.0883, "step": 32258 }, { "epoch": 1.58, "grad_norm": 0.580753743648529, "learning_rate": 0.00027460362320569414, "loss": 2.9957, "step": 32259 }, { "epoch": 1.58, "grad_norm": 0.5840525031089783, "learning_rate": 0.0002745882823155531, "loss": 3.1201, "step": 32260 }, { "epoch": 1.58, "grad_norm": 0.5990431904792786, "learning_rate": 0.0002745729414923415, "loss": 3.1314, "step": 32261 }, { "epoch": 1.58, "grad_norm": 0.6257085204124451, "learning_rate": 0.00027455760073609995, "loss": 3.0842, "step": 32262 }, { "epoch": 1.58, "grad_norm": 0.5750504732131958, "learning_rate": 0.00027454226004686866, "loss": 3.148, "step": 32263 }, { "epoch": 1.58, "grad_norm": 0.6597176790237427, "learning_rate": 0.0002745269194246882, "loss": 3.263, "step": 32264 }, { "epoch": 1.58, "grad_norm": 0.5871362686157227, "learning_rate": 0.000274511578869599, "loss": 3.0359, "step": 32265 }, { "epoch": 1.58, "grad_norm": 0.5987416505813599, "learning_rate": 0.00027449623838164135, "loss": 2.9311, "step": 32266 }, { "epoch": 1.58, "grad_norm": 0.6555812358856201, "learning_rate": 0.00027448089796085575, "loss": 3.0914, "step": 32267 }, { "epoch": 1.58, "grad_norm": 0.5952374339103699, "learning_rate": 0.00027446555760728255, "loss": 2.9107, "step": 32268 }, { "epoch": 1.58, "grad_norm": 0.5956671237945557, "learning_rate": 0.00027445021732096204, "loss": 3.183, "step": 32269 }, { "epoch": 1.58, "grad_norm": 0.5921088457107544, "learning_rate": 0.00027443487710193486, "loss": 3.1328, "step": 32270 }, { "epoch": 1.58, "grad_norm": 0.5908499360084534, "learning_rate": 0.0002744195369502412, "loss": 3.1227, "step": 32271 }, { "epoch": 1.58, "grad_norm": 0.6202715635299683, "learning_rate": 0.0002744041968659217, "loss": 2.9904, "step": 32272 }, { "epoch": 1.58, "grad_norm": 0.7672480344772339, "learning_rate": 0.00027438885684901647, "loss": 2.8866, "step": 32273 }, { "epoch": 1.58, "grad_norm": 0.6302787065505981, "learning_rate": 0.00027437351689956613, "loss": 3.0427, "step": 32274 }, { "epoch": 1.58, "grad_norm": 0.5923793911933899, "learning_rate": 0.000274358177017611, "loss": 2.9583, "step": 32275 }, { "epoch": 1.58, "grad_norm": 0.584662139415741, "learning_rate": 0.00027434283720319155, "loss": 2.9949, "step": 32276 }, { "epoch": 1.58, "grad_norm": 0.5973477959632874, "learning_rate": 0.00027432749745634813, "loss": 3.0155, "step": 32277 }, { "epoch": 1.58, "grad_norm": 0.5955763459205627, "learning_rate": 0.00027431215777712104, "loss": 3.1771, "step": 32278 }, { "epoch": 1.58, "grad_norm": 0.5979658365249634, "learning_rate": 0.0002742968181655508, "loss": 3.0354, "step": 32279 }, { "epoch": 1.58, "grad_norm": 0.6078369617462158, "learning_rate": 0.0002742814786216779, "loss": 3.127, "step": 32280 }, { "epoch": 1.58, "grad_norm": 0.6542069315910339, "learning_rate": 0.00027426613914554253, "loss": 2.921, "step": 32281 }, { "epoch": 1.58, "grad_norm": 0.6545595526695251, "learning_rate": 0.0002742507997371853, "loss": 3.0068, "step": 32282 }, { "epoch": 1.58, "grad_norm": 0.5855773091316223, "learning_rate": 0.00027423546039664643, "loss": 3.1546, "step": 32283 }, { "epoch": 1.58, "grad_norm": 0.6614902019500732, "learning_rate": 0.0002742201211239663, "loss": 3.1364, "step": 32284 }, { "epoch": 1.58, "grad_norm": 0.6412147879600525, "learning_rate": 0.0002742047819191856, "loss": 2.7693, "step": 32285 }, { "epoch": 1.58, "grad_norm": 0.6186434030532837, "learning_rate": 0.00027418944278234447, "loss": 2.9929, "step": 32286 }, { "epoch": 1.58, "grad_norm": 0.6143002510070801, "learning_rate": 0.0002741741037134834, "loss": 2.8651, "step": 32287 }, { "epoch": 1.58, "grad_norm": 0.6143453121185303, "learning_rate": 0.0002741587647126427, "loss": 3.2461, "step": 32288 }, { "epoch": 1.58, "grad_norm": 0.6308348774909973, "learning_rate": 0.00027414342577986283, "loss": 2.9028, "step": 32289 }, { "epoch": 1.58, "grad_norm": 0.588534414768219, "learning_rate": 0.00027412808691518436, "loss": 3.1194, "step": 32290 }, { "epoch": 1.58, "grad_norm": 0.6186642646789551, "learning_rate": 0.00027411274811864735, "loss": 3.0384, "step": 32291 }, { "epoch": 1.58, "grad_norm": 0.5939307808876038, "learning_rate": 0.0002740974093902925, "loss": 3.1793, "step": 32292 }, { "epoch": 1.58, "grad_norm": 0.6168650984764099, "learning_rate": 0.00027408207073016, "loss": 3.1562, "step": 32293 }, { "epoch": 1.58, "grad_norm": 0.6111552119255066, "learning_rate": 0.00027406673213829037, "loss": 3.2366, "step": 32294 }, { "epoch": 1.58, "grad_norm": 0.612614631652832, "learning_rate": 0.000274051393614724, "loss": 2.9828, "step": 32295 }, { "epoch": 1.58, "grad_norm": 0.5782878398895264, "learning_rate": 0.0002740360551595012, "loss": 3.1741, "step": 32296 }, { "epoch": 1.58, "grad_norm": 0.6033691167831421, "learning_rate": 0.0002740207167726626, "loss": 2.99, "step": 32297 }, { "epoch": 1.58, "grad_norm": 0.5962322950363159, "learning_rate": 0.00027400537845424826, "loss": 3.293, "step": 32298 }, { "epoch": 1.58, "grad_norm": 0.6039497256278992, "learning_rate": 0.0002739900402042987, "loss": 3.2375, "step": 32299 }, { "epoch": 1.58, "grad_norm": 0.6100921630859375, "learning_rate": 0.0002739747020228546, "loss": 3.096, "step": 32300 }, { "epoch": 1.58, "grad_norm": 0.5819284319877625, "learning_rate": 0.000273959363909956, "loss": 3.0614, "step": 32301 }, { "epoch": 1.58, "grad_norm": 0.6027130484580994, "learning_rate": 0.0002739440258656435, "loss": 2.9864, "step": 32302 }, { "epoch": 1.58, "grad_norm": 0.5925067663192749, "learning_rate": 0.00027392868788995723, "loss": 3.2436, "step": 32303 }, { "epoch": 1.58, "grad_norm": 0.6477892398834229, "learning_rate": 0.0002739133499829379, "loss": 3.0252, "step": 32304 }, { "epoch": 1.58, "grad_norm": 0.6437166929244995, "learning_rate": 0.00027389801214462587, "loss": 3.0984, "step": 32305 }, { "epoch": 1.58, "grad_norm": 0.60708087682724, "learning_rate": 0.00027388267437506125, "loss": 2.945, "step": 32306 }, { "epoch": 1.58, "grad_norm": 0.6095942258834839, "learning_rate": 0.0002738673366742848, "loss": 3.1028, "step": 32307 }, { "epoch": 1.58, "grad_norm": 0.5710780024528503, "learning_rate": 0.00027385199904233674, "loss": 2.9135, "step": 32308 }, { "epoch": 1.58, "grad_norm": 0.5993046164512634, "learning_rate": 0.0002738366614792574, "loss": 2.9611, "step": 32309 }, { "epoch": 1.58, "grad_norm": 0.6444818377494812, "learning_rate": 0.00027382132398508734, "loss": 3.0658, "step": 32310 }, { "epoch": 1.58, "grad_norm": 0.6341570615768433, "learning_rate": 0.0002738059865598668, "loss": 3.1248, "step": 32311 }, { "epoch": 1.58, "grad_norm": 0.6394447684288025, "learning_rate": 0.00027379064920363637, "loss": 2.9208, "step": 32312 }, { "epoch": 1.58, "grad_norm": 0.613353431224823, "learning_rate": 0.00027377531191643623, "loss": 3.16, "step": 32313 }, { "epoch": 1.58, "grad_norm": 0.5999470949172974, "learning_rate": 0.0002737599746983068, "loss": 2.9717, "step": 32314 }, { "epoch": 1.58, "grad_norm": 0.6254415512084961, "learning_rate": 0.00027374463754928867, "loss": 3.171, "step": 32315 }, { "epoch": 1.58, "grad_norm": 0.6086492538452148, "learning_rate": 0.0002737293004694221, "loss": 3.0757, "step": 32316 }, { "epoch": 1.58, "grad_norm": 0.6170065402984619, "learning_rate": 0.00027371396345874754, "loss": 2.8206, "step": 32317 }, { "epoch": 1.58, "grad_norm": 0.6064412593841553, "learning_rate": 0.0002736986265173052, "loss": 3.0026, "step": 32318 }, { "epoch": 1.58, "grad_norm": 0.6228976249694824, "learning_rate": 0.0002736832896451357, "loss": 3.1017, "step": 32319 }, { "epoch": 1.58, "grad_norm": 0.6091843247413635, "learning_rate": 0.0002736679528422794, "loss": 2.9123, "step": 32320 }, { "epoch": 1.58, "grad_norm": 0.5790774822235107, "learning_rate": 0.0002736526161087765, "loss": 2.8931, "step": 32321 }, { "epoch": 1.58, "grad_norm": 0.6052638292312622, "learning_rate": 0.00027363727944466773, "loss": 3.2133, "step": 32322 }, { "epoch": 1.58, "grad_norm": 0.6797242164611816, "learning_rate": 0.00027362194284999316, "loss": 3.1776, "step": 32323 }, { "epoch": 1.58, "grad_norm": 0.5846360921859741, "learning_rate": 0.00027360660632479346, "loss": 2.9818, "step": 32324 }, { "epoch": 1.58, "grad_norm": 0.5824291706085205, "learning_rate": 0.00027359126986910867, "loss": 2.9715, "step": 32325 }, { "epoch": 1.58, "grad_norm": 0.5969865322113037, "learning_rate": 0.00027357593348297947, "loss": 3.0331, "step": 32326 }, { "epoch": 1.58, "grad_norm": 0.6006813049316406, "learning_rate": 0.00027356059716644623, "loss": 3.1214, "step": 32327 }, { "epoch": 1.58, "grad_norm": 0.6340086460113525, "learning_rate": 0.0002735452609195492, "loss": 3.1599, "step": 32328 }, { "epoch": 1.58, "grad_norm": 0.610417902469635, "learning_rate": 0.00027352992474232894, "loss": 3.0844, "step": 32329 }, { "epoch": 1.58, "grad_norm": 0.6211175322532654, "learning_rate": 0.00027351458863482576, "loss": 2.9915, "step": 32330 }, { "epoch": 1.58, "grad_norm": 0.6344559192657471, "learning_rate": 0.00027349925259708, "loss": 2.8198, "step": 32331 }, { "epoch": 1.58, "grad_norm": 0.5910095572471619, "learning_rate": 0.00027348391662913217, "loss": 3.0135, "step": 32332 }, { "epoch": 1.58, "grad_norm": 0.6326538920402527, "learning_rate": 0.0002734685807310225, "loss": 3.0308, "step": 32333 }, { "epoch": 1.58, "grad_norm": 0.6006748676300049, "learning_rate": 0.0002734532449027916, "loss": 2.9874, "step": 32334 }, { "epoch": 1.58, "grad_norm": 0.6194648742675781, "learning_rate": 0.0002734379091444797, "loss": 3.0995, "step": 32335 }, { "epoch": 1.58, "grad_norm": 0.6255232095718384, "learning_rate": 0.00027342257345612715, "loss": 2.9889, "step": 32336 }, { "epoch": 1.58, "grad_norm": 0.6315654516220093, "learning_rate": 0.0002734072378377746, "loss": 3.1515, "step": 32337 }, { "epoch": 1.58, "grad_norm": 0.6415723562240601, "learning_rate": 0.0002733919022894622, "loss": 2.9541, "step": 32338 }, { "epoch": 1.58, "grad_norm": 0.6148661375045776, "learning_rate": 0.0002733765668112304, "loss": 3.0006, "step": 32339 }, { "epoch": 1.58, "grad_norm": 0.7170533537864685, "learning_rate": 0.00027336123140311955, "loss": 2.9005, "step": 32340 }, { "epoch": 1.58, "grad_norm": 0.5991891622543335, "learning_rate": 0.0002733458960651701, "loss": 2.9749, "step": 32341 }, { "epoch": 1.59, "grad_norm": 0.5969253182411194, "learning_rate": 0.00027333056079742257, "loss": 2.8458, "step": 32342 }, { "epoch": 1.59, "grad_norm": 0.6092698574066162, "learning_rate": 0.000273315225599917, "loss": 3.0701, "step": 32343 }, { "epoch": 1.59, "grad_norm": 0.6403960585594177, "learning_rate": 0.00027329989047269413, "loss": 2.9416, "step": 32344 }, { "epoch": 1.59, "grad_norm": 0.5866044759750366, "learning_rate": 0.0002732845554157942, "loss": 3.0471, "step": 32345 }, { "epoch": 1.59, "grad_norm": 0.6154597997665405, "learning_rate": 0.00027326922042925745, "loss": 3.141, "step": 32346 }, { "epoch": 1.59, "grad_norm": 0.6291300058364868, "learning_rate": 0.0002732538855131246, "loss": 2.9708, "step": 32347 }, { "epoch": 1.59, "grad_norm": 0.615016520023346, "learning_rate": 0.0002732385506674359, "loss": 2.903, "step": 32348 }, { "epoch": 1.59, "grad_norm": 0.6233264803886414, "learning_rate": 0.0002732232158922316, "loss": 3.0574, "step": 32349 }, { "epoch": 1.59, "grad_norm": 0.5890025496482849, "learning_rate": 0.0002732078811875522, "loss": 3.0299, "step": 32350 }, { "epoch": 1.59, "grad_norm": 0.5658774971961975, "learning_rate": 0.00027319254655343804, "loss": 2.972, "step": 32351 }, { "epoch": 1.59, "grad_norm": 0.6112178564071655, "learning_rate": 0.00027317721198992967, "loss": 2.9405, "step": 32352 }, { "epoch": 1.59, "grad_norm": 0.6809776425361633, "learning_rate": 0.00027316187749706737, "loss": 3.1299, "step": 32353 }, { "epoch": 1.59, "grad_norm": 0.6142784953117371, "learning_rate": 0.0002731465430748915, "loss": 3.2225, "step": 32354 }, { "epoch": 1.59, "grad_norm": 0.5717936158180237, "learning_rate": 0.00027313120872344236, "loss": 3.1395, "step": 32355 }, { "epoch": 1.59, "grad_norm": 0.6221408843994141, "learning_rate": 0.0002731158744427605, "loss": 3.1424, "step": 32356 }, { "epoch": 1.59, "grad_norm": 0.6313767433166504, "learning_rate": 0.00027310054023288634, "loss": 3.0057, "step": 32357 }, { "epoch": 1.59, "grad_norm": 0.577363908290863, "learning_rate": 0.00027308520609386003, "loss": 3.0129, "step": 32358 }, { "epoch": 1.59, "grad_norm": 0.6358572840690613, "learning_rate": 0.00027306987202572227, "loss": 3.0684, "step": 32359 }, { "epoch": 1.59, "grad_norm": 0.6030645370483398, "learning_rate": 0.0002730545380285132, "loss": 3.105, "step": 32360 }, { "epoch": 1.59, "grad_norm": 0.574252188205719, "learning_rate": 0.0002730392041022732, "loss": 3.2618, "step": 32361 }, { "epoch": 1.59, "grad_norm": 0.5889049172401428, "learning_rate": 0.0002730238702470429, "loss": 3.1371, "step": 32362 }, { "epoch": 1.59, "grad_norm": 0.5928773283958435, "learning_rate": 0.00027300853646286245, "loss": 2.8395, "step": 32363 }, { "epoch": 1.59, "grad_norm": 0.6077459454536438, "learning_rate": 0.00027299320274977245, "loss": 2.9993, "step": 32364 }, { "epoch": 1.59, "grad_norm": 0.6264618635177612, "learning_rate": 0.000272977869107813, "loss": 3.1145, "step": 32365 }, { "epoch": 1.59, "grad_norm": 0.6416333913803101, "learning_rate": 0.0002729625355370247, "loss": 3.0723, "step": 32366 }, { "epoch": 1.59, "grad_norm": 0.6093899607658386, "learning_rate": 0.0002729472020374479, "loss": 2.8618, "step": 32367 }, { "epoch": 1.59, "grad_norm": 0.6002346277236938, "learning_rate": 0.00027293186860912285, "loss": 3.0874, "step": 32368 }, { "epoch": 1.59, "grad_norm": 0.5929070115089417, "learning_rate": 0.0002729165352520902, "loss": 3.2543, "step": 32369 }, { "epoch": 1.59, "grad_norm": 0.6260755062103271, "learning_rate": 0.0002729012019663901, "loss": 3.0492, "step": 32370 }, { "epoch": 1.59, "grad_norm": 0.6383898258209229, "learning_rate": 0.00027288586875206296, "loss": 2.941, "step": 32371 }, { "epoch": 1.59, "grad_norm": 0.6028842926025391, "learning_rate": 0.0002728705356091494, "loss": 3.1908, "step": 32372 }, { "epoch": 1.59, "grad_norm": 0.6282774209976196, "learning_rate": 0.00027285520253768944, "loss": 3.0125, "step": 32373 }, { "epoch": 1.59, "grad_norm": 0.5710699558258057, "learning_rate": 0.00027283986953772376, "loss": 3.0275, "step": 32374 }, { "epoch": 1.59, "grad_norm": 0.6519010066986084, "learning_rate": 0.00027282453660929266, "loss": 2.6907, "step": 32375 }, { "epoch": 1.59, "grad_norm": 0.5964446067810059, "learning_rate": 0.0002728092037524364, "loss": 3.0842, "step": 32376 }, { "epoch": 1.59, "grad_norm": 0.564836323261261, "learning_rate": 0.00027279387096719557, "loss": 3.0647, "step": 32377 }, { "epoch": 1.59, "grad_norm": 0.5880719423294067, "learning_rate": 0.00027277853825361035, "loss": 3.0947, "step": 32378 }, { "epoch": 1.59, "grad_norm": 0.6163837909698486, "learning_rate": 0.00027276320561172136, "loss": 3.095, "step": 32379 }, { "epoch": 1.59, "grad_norm": 0.6298701763153076, "learning_rate": 0.0002727478730415687, "loss": 2.9519, "step": 32380 }, { "epoch": 1.59, "grad_norm": 0.6387132406234741, "learning_rate": 0.000272732540543193, "loss": 3.0831, "step": 32381 }, { "epoch": 1.59, "grad_norm": 0.5862566232681274, "learning_rate": 0.0002727172081166345, "loss": 3.0494, "step": 32382 }, { "epoch": 1.59, "grad_norm": 0.5777621269226074, "learning_rate": 0.00027270187576193354, "loss": 3.0085, "step": 32383 }, { "epoch": 1.59, "grad_norm": 0.5978364944458008, "learning_rate": 0.00027268654347913074, "loss": 2.8669, "step": 32384 }, { "epoch": 1.59, "grad_norm": 0.6363843679428101, "learning_rate": 0.0002726712112682662, "loss": 2.9042, "step": 32385 }, { "epoch": 1.59, "grad_norm": 0.5844437479972839, "learning_rate": 0.00027265587912938047, "loss": 3.1127, "step": 32386 }, { "epoch": 1.59, "grad_norm": 0.5706456899642944, "learning_rate": 0.00027264054706251393, "loss": 2.8488, "step": 32387 }, { "epoch": 1.59, "grad_norm": 0.5758098363876343, "learning_rate": 0.0002726252150677069, "loss": 3.4247, "step": 32388 }, { "epoch": 1.59, "grad_norm": 0.647951602935791, "learning_rate": 0.00027260988314499983, "loss": 3.069, "step": 32389 }, { "epoch": 1.59, "grad_norm": 0.6163773536682129, "learning_rate": 0.000272594551294433, "loss": 3.0409, "step": 32390 }, { "epoch": 1.59, "grad_norm": 0.5810932517051697, "learning_rate": 0.00027257921951604673, "loss": 3.0328, "step": 32391 }, { "epoch": 1.59, "grad_norm": 0.6113194823265076, "learning_rate": 0.0002725638878098818, "loss": 3.0184, "step": 32392 }, { "epoch": 1.59, "grad_norm": 0.6727303266525269, "learning_rate": 0.00027254855617597806, "loss": 3.1082, "step": 32393 }, { "epoch": 1.59, "grad_norm": 0.5940728783607483, "learning_rate": 0.00027253322461437635, "loss": 3.158, "step": 32394 }, { "epoch": 1.59, "grad_norm": 0.6131221652030945, "learning_rate": 0.0002725178931251166, "loss": 3.0268, "step": 32395 }, { "epoch": 1.59, "grad_norm": 0.591697633266449, "learning_rate": 0.0002725025617082396, "loss": 3.1224, "step": 32396 }, { "epoch": 1.59, "grad_norm": 0.5704348683357239, "learning_rate": 0.0002724872303637855, "loss": 2.9679, "step": 32397 }, { "epoch": 1.59, "grad_norm": 0.6032789349555969, "learning_rate": 0.0002724718990917947, "loss": 2.959, "step": 32398 }, { "epoch": 1.59, "grad_norm": 0.5963385701179504, "learning_rate": 0.00027245656789230775, "loss": 3.1237, "step": 32399 }, { "epoch": 1.59, "grad_norm": 0.625329315662384, "learning_rate": 0.00027244123676536486, "loss": 2.9193, "step": 32400 }, { "epoch": 1.59, "grad_norm": 0.5919100046157837, "learning_rate": 0.0002724259057110063, "loss": 3.141, "step": 32401 }, { "epoch": 1.59, "grad_norm": 0.6092190146446228, "learning_rate": 0.0002724105747292728, "loss": 2.9224, "step": 32402 }, { "epoch": 1.59, "grad_norm": 0.5872280597686768, "learning_rate": 0.0002723952438202044, "loss": 3.3013, "step": 32403 }, { "epoch": 1.59, "grad_norm": 0.5837467908859253, "learning_rate": 0.00027237991298384174, "loss": 2.8526, "step": 32404 }, { "epoch": 1.59, "grad_norm": 0.6181141138076782, "learning_rate": 0.0002723645822202249, "loss": 2.9058, "step": 32405 }, { "epoch": 1.59, "grad_norm": 0.6013671159744263, "learning_rate": 0.0002723492515293946, "loss": 3.0388, "step": 32406 }, { "epoch": 1.59, "grad_norm": 0.5945082306861877, "learning_rate": 0.00027233392091139096, "loss": 2.9561, "step": 32407 }, { "epoch": 1.59, "grad_norm": 0.565525233745575, "learning_rate": 0.00027231859036625444, "loss": 3.0511, "step": 32408 }, { "epoch": 1.59, "grad_norm": 0.6196669936180115, "learning_rate": 0.0002723032598940255, "loss": 3.0336, "step": 32409 }, { "epoch": 1.59, "grad_norm": 0.6390196681022644, "learning_rate": 0.0002722879294947444, "loss": 2.7013, "step": 32410 }, { "epoch": 1.59, "grad_norm": 0.6164066791534424, "learning_rate": 0.0002722725991684516, "loss": 3.1329, "step": 32411 }, { "epoch": 1.59, "grad_norm": 0.570892870426178, "learning_rate": 0.00027225726891518735, "loss": 3.1695, "step": 32412 }, { "epoch": 1.59, "grad_norm": 0.6367611289024353, "learning_rate": 0.000272241938734992, "loss": 3.1406, "step": 32413 }, { "epoch": 1.59, "grad_norm": 0.6313506960868835, "learning_rate": 0.0002722266086279063, "loss": 3.0672, "step": 32414 }, { "epoch": 1.59, "grad_norm": 0.5836746096611023, "learning_rate": 0.0002722112785939703, "loss": 3.2579, "step": 32415 }, { "epoch": 1.59, "grad_norm": 0.5823087692260742, "learning_rate": 0.00027219594863322447, "loss": 2.9865, "step": 32416 }, { "epoch": 1.59, "grad_norm": 0.6018955111503601, "learning_rate": 0.00027218061874570903, "loss": 2.9428, "step": 32417 }, { "epoch": 1.59, "grad_norm": 0.6570754647254944, "learning_rate": 0.0002721652889314645, "loss": 3.081, "step": 32418 }, { "epoch": 1.59, "grad_norm": 0.6093233227729797, "learning_rate": 0.00027214995919053143, "loss": 2.591, "step": 32419 }, { "epoch": 1.59, "grad_norm": 0.5643780827522278, "learning_rate": 0.00027213462952294977, "loss": 3.0753, "step": 32420 }, { "epoch": 1.59, "grad_norm": 0.5750892162322998, "learning_rate": 0.0002721192999287603, "loss": 2.9966, "step": 32421 }, { "epoch": 1.59, "grad_norm": 0.6011072993278503, "learning_rate": 0.00027210397040800313, "loss": 3.0969, "step": 32422 }, { "epoch": 1.59, "grad_norm": 0.5999905467033386, "learning_rate": 0.0002720886409607187, "loss": 3.0539, "step": 32423 }, { "epoch": 1.59, "grad_norm": 0.6333760023117065, "learning_rate": 0.0002720733115869476, "loss": 3.1335, "step": 32424 }, { "epoch": 1.59, "grad_norm": 0.6337881684303284, "learning_rate": 0.0002720579822867299, "loss": 2.828, "step": 32425 }, { "epoch": 1.59, "grad_norm": 0.5925475358963013, "learning_rate": 0.00027204265306010616, "loss": 2.9094, "step": 32426 }, { "epoch": 1.59, "grad_norm": 0.6454971432685852, "learning_rate": 0.00027202732390711656, "loss": 3.0657, "step": 32427 }, { "epoch": 1.59, "grad_norm": 0.6801785826683044, "learning_rate": 0.0002720119948278017, "loss": 2.9696, "step": 32428 }, { "epoch": 1.59, "grad_norm": 0.6122978329658508, "learning_rate": 0.0002719966658222019, "loss": 2.901, "step": 32429 }, { "epoch": 1.59, "grad_norm": 0.6025977730751038, "learning_rate": 0.0002719813368903573, "loss": 3.0501, "step": 32430 }, { "epoch": 1.59, "grad_norm": 0.5820662975311279, "learning_rate": 0.0002719660080323087, "loss": 3.1117, "step": 32431 }, { "epoch": 1.59, "grad_norm": 0.6449814438819885, "learning_rate": 0.00027195067924809616, "loss": 3.0745, "step": 32432 }, { "epoch": 1.59, "grad_norm": 0.6674616932868958, "learning_rate": 0.00027193535053776006, "loss": 2.9594, "step": 32433 }, { "epoch": 1.59, "grad_norm": 0.6257567405700684, "learning_rate": 0.00027192002190134097, "loss": 3.1974, "step": 32434 }, { "epoch": 1.59, "grad_norm": 0.6641944050788879, "learning_rate": 0.000271904693338879, "loss": 3.2397, "step": 32435 }, { "epoch": 1.59, "grad_norm": 0.6264591813087463, "learning_rate": 0.0002718893648504148, "loss": 3.1784, "step": 32436 }, { "epoch": 1.59, "grad_norm": 0.6466692686080933, "learning_rate": 0.00027187403643598847, "loss": 2.7668, "step": 32437 }, { "epoch": 1.59, "grad_norm": 0.6122263073921204, "learning_rate": 0.00027185870809564054, "loss": 2.9252, "step": 32438 }, { "epoch": 1.59, "grad_norm": 0.5788053274154663, "learning_rate": 0.00027184337982941143, "loss": 2.9313, "step": 32439 }, { "epoch": 1.59, "grad_norm": 0.6403984427452087, "learning_rate": 0.00027182805163734143, "loss": 3.004, "step": 32440 }, { "epoch": 1.59, "grad_norm": 0.6190902590751648, "learning_rate": 0.00027181272351947097, "loss": 3.2076, "step": 32441 }, { "epoch": 1.59, "grad_norm": 0.6131787300109863, "learning_rate": 0.0002717973954758402, "loss": 3.0188, "step": 32442 }, { "epoch": 1.59, "grad_norm": 0.574184000492096, "learning_rate": 0.00027178206750648975, "loss": 3.0804, "step": 32443 }, { "epoch": 1.59, "grad_norm": 0.6264970302581787, "learning_rate": 0.00027176673961145997, "loss": 2.8913, "step": 32444 }, { "epoch": 1.59, "grad_norm": 0.613540768623352, "learning_rate": 0.000271751411790791, "loss": 3.0355, "step": 32445 }, { "epoch": 1.59, "grad_norm": 0.5924250483512878, "learning_rate": 0.0002717360840445236, "loss": 2.7862, "step": 32446 }, { "epoch": 1.59, "grad_norm": 0.5855326652526855, "learning_rate": 0.00027172075637269774, "loss": 2.8646, "step": 32447 }, { "epoch": 1.59, "grad_norm": 0.6014474034309387, "learning_rate": 0.000271705428775354, "loss": 3.0086, "step": 32448 }, { "epoch": 1.59, "grad_norm": 0.6462697386741638, "learning_rate": 0.0002716901012525328, "loss": 3.0801, "step": 32449 }, { "epoch": 1.59, "grad_norm": 0.582455039024353, "learning_rate": 0.00027167477380427436, "loss": 3.0355, "step": 32450 }, { "epoch": 1.59, "grad_norm": 0.5739898085594177, "learning_rate": 0.00027165944643061914, "loss": 3.1304, "step": 32451 }, { "epoch": 1.59, "grad_norm": 0.6203562617301941, "learning_rate": 0.0002716441191316074, "loss": 3.2051, "step": 32452 }, { "epoch": 1.59, "grad_norm": 0.6219992637634277, "learning_rate": 0.0002716287919072796, "loss": 2.9347, "step": 32453 }, { "epoch": 1.59, "grad_norm": 0.6303226351737976, "learning_rate": 0.0002716134647576763, "loss": 3.0828, "step": 32454 }, { "epoch": 1.59, "grad_norm": 0.6271916627883911, "learning_rate": 0.0002715981376828374, "loss": 3.0765, "step": 32455 }, { "epoch": 1.59, "grad_norm": 0.6190890073776245, "learning_rate": 0.00027158281068280374, "loss": 2.999, "step": 32456 }, { "epoch": 1.59, "grad_norm": 0.6210778951644897, "learning_rate": 0.00027156748375761536, "loss": 3.0662, "step": 32457 }, { "epoch": 1.59, "grad_norm": 0.5740706324577332, "learning_rate": 0.0002715521569073128, "loss": 2.9903, "step": 32458 }, { "epoch": 1.59, "grad_norm": 0.5983421802520752, "learning_rate": 0.00027153683013193645, "loss": 2.9476, "step": 32459 }, { "epoch": 1.59, "grad_norm": 0.5819885730743408, "learning_rate": 0.0002715215034315265, "loss": 3.1873, "step": 32460 }, { "epoch": 1.59, "grad_norm": 0.6093148589134216, "learning_rate": 0.00027150617680612353, "loss": 2.9995, "step": 32461 }, { "epoch": 1.59, "grad_norm": 0.607557475566864, "learning_rate": 0.00027149085025576775, "loss": 3.0563, "step": 32462 }, { "epoch": 1.59, "grad_norm": 0.6053733825683594, "learning_rate": 0.0002714755237804995, "loss": 3.2042, "step": 32463 }, { "epoch": 1.59, "grad_norm": 0.6200517416000366, "learning_rate": 0.0002714601973803594, "loss": 3.1293, "step": 32464 }, { "epoch": 1.59, "grad_norm": 0.6223552823066711, "learning_rate": 0.00027144487105538757, "loss": 2.9668, "step": 32465 }, { "epoch": 1.59, "grad_norm": 0.6040474772453308, "learning_rate": 0.00027142954480562456, "loss": 2.9209, "step": 32466 }, { "epoch": 1.59, "grad_norm": 0.5464975237846375, "learning_rate": 0.00027141421863111044, "loss": 3.1782, "step": 32467 }, { "epoch": 1.59, "grad_norm": 0.6203432679176331, "learning_rate": 0.00027139889253188586, "loss": 2.926, "step": 32468 }, { "epoch": 1.59, "grad_norm": 0.5865224599838257, "learning_rate": 0.0002713835665079911, "loss": 3.2632, "step": 32469 }, { "epoch": 1.59, "grad_norm": 0.577903687953949, "learning_rate": 0.00027136824055946647, "loss": 3.0893, "step": 32470 }, { "epoch": 1.59, "grad_norm": 0.6584243178367615, "learning_rate": 0.00027135291468635247, "loss": 2.8712, "step": 32471 }, { "epoch": 1.59, "grad_norm": 1.597705364227295, "learning_rate": 0.00027133758888868935, "loss": 3.0972, "step": 32472 }, { "epoch": 1.59, "grad_norm": 0.5774487853050232, "learning_rate": 0.0002713222631665174, "loss": 2.9983, "step": 32473 }, { "epoch": 1.59, "grad_norm": 0.6193967461585999, "learning_rate": 0.00027130693751987724, "loss": 3.0678, "step": 32474 }, { "epoch": 1.59, "grad_norm": 0.692612886428833, "learning_rate": 0.0002712916119488089, "loss": 3.0259, "step": 32475 }, { "epoch": 1.59, "grad_norm": 0.611100971698761, "learning_rate": 0.0002712762864533531, "loss": 2.943, "step": 32476 }, { "epoch": 1.59, "grad_norm": 0.6383076310157776, "learning_rate": 0.00027126096103355, "loss": 3.1925, "step": 32477 }, { "epoch": 1.59, "grad_norm": 0.5976969003677368, "learning_rate": 0.0002712456356894399, "loss": 2.983, "step": 32478 }, { "epoch": 1.59, "grad_norm": 0.6111800670623779, "learning_rate": 0.0002712303104210634, "loss": 3.1147, "step": 32479 }, { "epoch": 1.59, "grad_norm": 0.5945995450019836, "learning_rate": 0.00027121498522846065, "loss": 3.1164, "step": 32480 }, { "epoch": 1.59, "grad_norm": 0.5979011654853821, "learning_rate": 0.00027119966011167215, "loss": 3.0233, "step": 32481 }, { "epoch": 1.59, "grad_norm": 0.6658178567886353, "learning_rate": 0.0002711843350707381, "loss": 3.0965, "step": 32482 }, { "epoch": 1.59, "grad_norm": 0.5945828557014465, "learning_rate": 0.0002711690101056991, "loss": 3.0912, "step": 32483 }, { "epoch": 1.59, "grad_norm": 0.6201061606407166, "learning_rate": 0.00027115368521659527, "loss": 2.9176, "step": 32484 }, { "epoch": 1.59, "grad_norm": 0.6284053325653076, "learning_rate": 0.00027113836040346703, "loss": 2.9548, "step": 32485 }, { "epoch": 1.59, "grad_norm": 0.5955159068107605, "learning_rate": 0.000271123035666355, "loss": 3.2301, "step": 32486 }, { "epoch": 1.59, "grad_norm": 0.6049878597259521, "learning_rate": 0.00027110771100529916, "loss": 2.9669, "step": 32487 }, { "epoch": 1.59, "grad_norm": 0.5916286706924438, "learning_rate": 0.0002710923864203402, "loss": 3.0258, "step": 32488 }, { "epoch": 1.59, "grad_norm": 0.5768219232559204, "learning_rate": 0.0002710770619115181, "loss": 3.0384, "step": 32489 }, { "epoch": 1.59, "grad_norm": 0.6287925839424133, "learning_rate": 0.0002710617374788736, "loss": 3.0563, "step": 32490 }, { "epoch": 1.59, "grad_norm": 0.659156858921051, "learning_rate": 0.00027104641312244694, "loss": 3.0919, "step": 32491 }, { "epoch": 1.59, "grad_norm": 0.5746557712554932, "learning_rate": 0.0002710310888422784, "loss": 3.063, "step": 32492 }, { "epoch": 1.59, "grad_norm": 0.613263726234436, "learning_rate": 0.00027101576463840846, "loss": 2.9219, "step": 32493 }, { "epoch": 1.59, "grad_norm": 0.610640287399292, "learning_rate": 0.00027100044051087737, "loss": 3.182, "step": 32494 }, { "epoch": 1.59, "grad_norm": 0.6279901266098022, "learning_rate": 0.00027098511645972544, "loss": 2.7512, "step": 32495 }, { "epoch": 1.59, "grad_norm": 0.6687607169151306, "learning_rate": 0.0002709697924849933, "loss": 3.1327, "step": 32496 }, { "epoch": 1.59, "grad_norm": 0.6255040764808655, "learning_rate": 0.000270954468586721, "loss": 2.8758, "step": 32497 }, { "epoch": 1.59, "grad_norm": 0.5970877408981323, "learning_rate": 0.00027093914476494915, "loss": 3.0503, "step": 32498 }, { "epoch": 1.59, "grad_norm": 0.5644693970680237, "learning_rate": 0.00027092382101971793, "loss": 2.8474, "step": 32499 }, { "epoch": 1.59, "grad_norm": 0.591464638710022, "learning_rate": 0.00027090849735106776, "loss": 2.9457, "step": 32500 }, { "epoch": 1.59, "grad_norm": 0.6773257255554199, "learning_rate": 0.0002708931737590391, "loss": 3.0178, "step": 32501 }, { "epoch": 1.59, "grad_norm": 0.59250807762146, "learning_rate": 0.00027087785024367217, "loss": 3.1202, "step": 32502 }, { "epoch": 1.59, "grad_norm": 0.6139160990715027, "learning_rate": 0.0002708625268050074, "loss": 2.8928, "step": 32503 }, { "epoch": 1.59, "grad_norm": 0.6274259686470032, "learning_rate": 0.00027084720344308503, "loss": 3.168, "step": 32504 }, { "epoch": 1.59, "grad_norm": 0.5896390080451965, "learning_rate": 0.0002708318801579456, "loss": 2.9981, "step": 32505 }, { "epoch": 1.59, "grad_norm": 0.6155339479446411, "learning_rate": 0.00027081655694962943, "loss": 2.9269, "step": 32506 }, { "epoch": 1.59, "grad_norm": 0.5823910236358643, "learning_rate": 0.00027080123381817666, "loss": 2.9131, "step": 32507 }, { "epoch": 1.59, "grad_norm": 0.6796182990074158, "learning_rate": 0.00027078591076362804, "loss": 3.0235, "step": 32508 }, { "epoch": 1.59, "grad_norm": 0.5854375958442688, "learning_rate": 0.00027077058778602356, "loss": 2.883, "step": 32509 }, { "epoch": 1.59, "grad_norm": 0.5948371291160583, "learning_rate": 0.0002707552648854037, "loss": 2.9561, "step": 32510 }, { "epoch": 1.59, "grad_norm": 0.6129918694496155, "learning_rate": 0.00027073994206180897, "loss": 3.2477, "step": 32511 }, { "epoch": 1.59, "grad_norm": 0.6146805882453918, "learning_rate": 0.0002707246193152796, "loss": 3.0606, "step": 32512 }, { "epoch": 1.59, "grad_norm": 0.5845103859901428, "learning_rate": 0.00027070929664585594, "loss": 3.1949, "step": 32513 }, { "epoch": 1.59, "grad_norm": 0.606810450553894, "learning_rate": 0.0002706939740535782, "loss": 2.9231, "step": 32514 }, { "epoch": 1.59, "grad_norm": 0.5757008194923401, "learning_rate": 0.0002706786515384869, "loss": 2.9504, "step": 32515 }, { "epoch": 1.59, "grad_norm": 0.620858371257782, "learning_rate": 0.0002706633291006226, "loss": 2.9856, "step": 32516 }, { "epoch": 1.59, "grad_norm": 0.6243740320205688, "learning_rate": 0.00027064800674002533, "loss": 3.1278, "step": 32517 }, { "epoch": 1.59, "grad_norm": 0.5853642225265503, "learning_rate": 0.00027063268445673557, "loss": 3.1047, "step": 32518 }, { "epoch": 1.59, "grad_norm": 0.6242586374282837, "learning_rate": 0.0002706173622507936, "loss": 3.009, "step": 32519 }, { "epoch": 1.59, "grad_norm": 0.636432409286499, "learning_rate": 0.0002706020401222399, "loss": 2.9736, "step": 32520 }, { "epoch": 1.59, "grad_norm": 0.6194183826446533, "learning_rate": 0.00027058671807111484, "loss": 3.1121, "step": 32521 }, { "epoch": 1.59, "grad_norm": 0.6781464219093323, "learning_rate": 0.00027057139609745854, "loss": 2.7746, "step": 32522 }, { "epoch": 1.59, "grad_norm": 0.5913337469100952, "learning_rate": 0.0002705560742013117, "loss": 3.0744, "step": 32523 }, { "epoch": 1.59, "grad_norm": 0.5661170482635498, "learning_rate": 0.0002705407523827144, "loss": 3.1307, "step": 32524 }, { "epoch": 1.59, "grad_norm": 0.6512724161148071, "learning_rate": 0.000270525430641707, "loss": 3.1466, "step": 32525 }, { "epoch": 1.59, "grad_norm": 0.631696879863739, "learning_rate": 0.0002705101089783301, "loss": 3.0165, "step": 32526 }, { "epoch": 1.59, "grad_norm": 0.6301171183586121, "learning_rate": 0.00027049478739262384, "loss": 2.9184, "step": 32527 }, { "epoch": 1.59, "grad_norm": 0.6100221276283264, "learning_rate": 0.00027047946588462874, "loss": 3.1662, "step": 32528 }, { "epoch": 1.59, "grad_norm": 0.6037270426750183, "learning_rate": 0.00027046414445438486, "loss": 2.9379, "step": 32529 }, { "epoch": 1.59, "grad_norm": 0.6437107920646667, "learning_rate": 0.0002704488231019328, "loss": 3.0219, "step": 32530 }, { "epoch": 1.59, "grad_norm": 0.5775290131568909, "learning_rate": 0.00027043350182731293, "loss": 3.1123, "step": 32531 }, { "epoch": 1.59, "grad_norm": 0.5918175578117371, "learning_rate": 0.0002704181806305654, "loss": 3.0319, "step": 32532 }, { "epoch": 1.59, "grad_norm": 0.567966103553772, "learning_rate": 0.0002704028595117308, "loss": 3.1738, "step": 32533 }, { "epoch": 1.59, "grad_norm": 0.6273672580718994, "learning_rate": 0.0002703875384708493, "loss": 3.2175, "step": 32534 }, { "epoch": 1.59, "grad_norm": 0.6246945858001709, "learning_rate": 0.0002703722175079613, "loss": 2.9896, "step": 32535 }, { "epoch": 1.59, "grad_norm": 0.5851449370384216, "learning_rate": 0.0002703568966231073, "loss": 3.0777, "step": 32536 }, { "epoch": 1.59, "grad_norm": 0.6178532838821411, "learning_rate": 0.00027034157581632735, "loss": 3.3369, "step": 32537 }, { "epoch": 1.59, "grad_norm": 0.626719057559967, "learning_rate": 0.0002703262550876622, "loss": 3.2271, "step": 32538 }, { "epoch": 1.59, "grad_norm": 0.5996720194816589, "learning_rate": 0.00027031093443715185, "loss": 3.2784, "step": 32539 }, { "epoch": 1.59, "grad_norm": 0.5899909138679504, "learning_rate": 0.0002702956138648367, "loss": 3.2182, "step": 32540 }, { "epoch": 1.59, "grad_norm": 0.5896705985069275, "learning_rate": 0.0002702802933707574, "loss": 2.862, "step": 32541 }, { "epoch": 1.59, "grad_norm": 0.6266672015190125, "learning_rate": 0.00027026497295495395, "loss": 3.039, "step": 32542 }, { "epoch": 1.59, "grad_norm": 0.615408718585968, "learning_rate": 0.0002702496526174669, "loss": 3.0288, "step": 32543 }, { "epoch": 1.59, "grad_norm": 0.6363191604614258, "learning_rate": 0.0002702343323583365, "loss": 3.1345, "step": 32544 }, { "epoch": 1.59, "grad_norm": 0.6553589701652527, "learning_rate": 0.0002702190121776031, "loss": 3.1664, "step": 32545 }, { "epoch": 1.6, "grad_norm": 0.6160081624984741, "learning_rate": 0.0002702036920753072, "loss": 2.9866, "step": 32546 }, { "epoch": 1.6, "grad_norm": 0.6299121975898743, "learning_rate": 0.0002701883720514889, "loss": 3.0313, "step": 32547 }, { "epoch": 1.6, "grad_norm": 0.5947949290275574, "learning_rate": 0.00027017305210618885, "loss": 3.2526, "step": 32548 }, { "epoch": 1.6, "grad_norm": 0.5934600830078125, "learning_rate": 0.00027015773223944717, "loss": 2.9821, "step": 32549 }, { "epoch": 1.6, "grad_norm": 0.6083028316497803, "learning_rate": 0.00027014241245130414, "loss": 2.9853, "step": 32550 }, { "epoch": 1.6, "grad_norm": 0.5787101984024048, "learning_rate": 0.0002701270927418005, "loss": 2.9115, "step": 32551 }, { "epoch": 1.6, "grad_norm": 0.580090343952179, "learning_rate": 0.0002701117731109762, "loss": 3.02, "step": 32552 }, { "epoch": 1.6, "grad_norm": 0.6392739415168762, "learning_rate": 0.0002700964535588718, "loss": 3.2771, "step": 32553 }, { "epoch": 1.6, "grad_norm": 0.607865035533905, "learning_rate": 0.0002700811340855275, "loss": 2.8881, "step": 32554 }, { "epoch": 1.6, "grad_norm": 0.6404851078987122, "learning_rate": 0.0002700658146909837, "loss": 3.1971, "step": 32555 }, { "epoch": 1.6, "grad_norm": 0.6085619330406189, "learning_rate": 0.00027005049537528103, "loss": 3.0046, "step": 32556 }, { "epoch": 1.6, "grad_norm": 0.5817078351974487, "learning_rate": 0.00027003517613845935, "loss": 2.956, "step": 32557 }, { "epoch": 1.6, "grad_norm": 0.6575085520744324, "learning_rate": 0.0002700198569805594, "loss": 3.2202, "step": 32558 }, { "epoch": 1.6, "grad_norm": 0.6530008912086487, "learning_rate": 0.0002700045379016212, "loss": 2.9829, "step": 32559 }, { "epoch": 1.6, "grad_norm": 0.5922717452049255, "learning_rate": 0.00026998921890168545, "loss": 3.005, "step": 32560 }, { "epoch": 1.6, "grad_norm": 0.5945848822593689, "learning_rate": 0.00026997389998079227, "loss": 2.9596, "step": 32561 }, { "epoch": 1.6, "grad_norm": 0.5837536454200745, "learning_rate": 0.0002699585811389819, "loss": 2.9112, "step": 32562 }, { "epoch": 1.6, "grad_norm": 0.6117374897003174, "learning_rate": 0.0002699432623762951, "loss": 2.9097, "step": 32563 }, { "epoch": 1.6, "grad_norm": 0.5952165126800537, "learning_rate": 0.00026992794369277187, "loss": 3.0588, "step": 32564 }, { "epoch": 1.6, "grad_norm": 0.6353278756141663, "learning_rate": 0.00026991262508845265, "loss": 3.1253, "step": 32565 }, { "epoch": 1.6, "grad_norm": 0.6072468161582947, "learning_rate": 0.0002698973065633777, "loss": 2.8889, "step": 32566 }, { "epoch": 1.6, "grad_norm": 0.6333851218223572, "learning_rate": 0.0002698819881175875, "loss": 3.0873, "step": 32567 }, { "epoch": 1.6, "grad_norm": 0.5800299644470215, "learning_rate": 0.0002698666697511224, "loss": 3.1187, "step": 32568 }, { "epoch": 1.6, "grad_norm": 0.6245841979980469, "learning_rate": 0.00026985135146402264, "loss": 3.1307, "step": 32569 }, { "epoch": 1.6, "grad_norm": 0.6200968623161316, "learning_rate": 0.0002698360332563287, "loss": 3.0176, "step": 32570 }, { "epoch": 1.6, "grad_norm": 0.5942978858947754, "learning_rate": 0.00026982071512808076, "loss": 3.2657, "step": 32571 }, { "epoch": 1.6, "grad_norm": 0.6148001551628113, "learning_rate": 0.0002698053970793192, "loss": 2.9059, "step": 32572 }, { "epoch": 1.6, "grad_norm": 0.6103355288505554, "learning_rate": 0.00026979007911008454, "loss": 3.1997, "step": 32573 }, { "epoch": 1.6, "grad_norm": 0.5524942278862, "learning_rate": 0.0002697747612204169, "loss": 3.0229, "step": 32574 }, { "epoch": 1.6, "grad_norm": 0.590096652507782, "learning_rate": 0.00026975944341035683, "loss": 3.2664, "step": 32575 }, { "epoch": 1.6, "grad_norm": 0.6268036961555481, "learning_rate": 0.0002697441256799444, "loss": 2.976, "step": 32576 }, { "epoch": 1.6, "grad_norm": 0.6254855394363403, "learning_rate": 0.00026972880802922014, "loss": 3.2555, "step": 32577 }, { "epoch": 1.6, "grad_norm": 0.6256861090660095, "learning_rate": 0.00026971349045822453, "loss": 3.0236, "step": 32578 }, { "epoch": 1.6, "grad_norm": 0.6017988324165344, "learning_rate": 0.0002696981729669977, "loss": 3.025, "step": 32579 }, { "epoch": 1.6, "grad_norm": 0.6067514419555664, "learning_rate": 0.00026968285555558003, "loss": 3.0145, "step": 32580 }, { "epoch": 1.6, "grad_norm": 0.6188127398490906, "learning_rate": 0.0002696675382240118, "loss": 3.2419, "step": 32581 }, { "epoch": 1.6, "grad_norm": 0.7204647064208984, "learning_rate": 0.00026965222097233353, "loss": 3.1838, "step": 32582 }, { "epoch": 1.6, "grad_norm": 0.6136758923530579, "learning_rate": 0.00026963690380058555, "loss": 2.9887, "step": 32583 }, { "epoch": 1.6, "grad_norm": 0.6154049634933472, "learning_rate": 0.00026962158670880795, "loss": 3.1223, "step": 32584 }, { "epoch": 1.6, "grad_norm": 0.6027714014053345, "learning_rate": 0.00026960626969704137, "loss": 3.0375, "step": 32585 }, { "epoch": 1.6, "grad_norm": 0.5715473890304565, "learning_rate": 0.00026959095276532594, "loss": 2.9966, "step": 32586 }, { "epoch": 1.6, "grad_norm": 0.6251134872436523, "learning_rate": 0.0002695756359137021, "loss": 2.9965, "step": 32587 }, { "epoch": 1.6, "grad_norm": 0.6239544153213501, "learning_rate": 0.00026956031914221023, "loss": 3.2285, "step": 32588 }, { "epoch": 1.6, "grad_norm": 0.6197227239608765, "learning_rate": 0.0002695450024508906, "loss": 2.9335, "step": 32589 }, { "epoch": 1.6, "grad_norm": 0.6238695979118347, "learning_rate": 0.00026952968583978363, "loss": 3.1362, "step": 32590 }, { "epoch": 1.6, "grad_norm": 0.6636531949043274, "learning_rate": 0.00026951436930892946, "loss": 2.9115, "step": 32591 }, { "epoch": 1.6, "grad_norm": 0.5675901770591736, "learning_rate": 0.0002694990528583687, "loss": 2.8736, "step": 32592 }, { "epoch": 1.6, "grad_norm": 0.6764063239097595, "learning_rate": 0.00026948373648814157, "loss": 3.0807, "step": 32593 }, { "epoch": 1.6, "grad_norm": 0.6123232841491699, "learning_rate": 0.00026946842019828825, "loss": 2.9812, "step": 32594 }, { "epoch": 1.6, "grad_norm": 0.7344564199447632, "learning_rate": 0.00026945310398884944, "loss": 2.9918, "step": 32595 }, { "epoch": 1.6, "grad_norm": 0.6294760704040527, "learning_rate": 0.00026943778785986516, "loss": 3.0817, "step": 32596 }, { "epoch": 1.6, "grad_norm": 0.6110149621963501, "learning_rate": 0.0002694224718113758, "loss": 3.1679, "step": 32597 }, { "epoch": 1.6, "grad_norm": 0.6438841819763184, "learning_rate": 0.00026940715584342187, "loss": 3.1391, "step": 32598 }, { "epoch": 1.6, "grad_norm": 0.6176292896270752, "learning_rate": 0.00026939183995604353, "loss": 2.913, "step": 32599 }, { "epoch": 1.6, "grad_norm": 0.5883298516273499, "learning_rate": 0.0002693765241492813, "loss": 3.054, "step": 32600 }, { "epoch": 1.6, "grad_norm": 0.6791853308677673, "learning_rate": 0.0002693612084231754, "loss": 3.0085, "step": 32601 }, { "epoch": 1.6, "grad_norm": 0.6615046262741089, "learning_rate": 0.00026934589277776604, "loss": 3.0181, "step": 32602 }, { "epoch": 1.6, "grad_norm": 0.6171573996543884, "learning_rate": 0.00026933057721309386, "loss": 2.9615, "step": 32603 }, { "epoch": 1.6, "grad_norm": 0.6239962577819824, "learning_rate": 0.0002693152617291989, "loss": 2.9545, "step": 32604 }, { "epoch": 1.6, "grad_norm": 0.61008620262146, "learning_rate": 0.0002692999463261218, "loss": 3.1183, "step": 32605 }, { "epoch": 1.6, "grad_norm": 0.6085340976715088, "learning_rate": 0.00026928463100390256, "loss": 3.1372, "step": 32606 }, { "epoch": 1.6, "grad_norm": 0.6348270177841187, "learning_rate": 0.00026926931576258176, "loss": 2.9547, "step": 32607 }, { "epoch": 1.6, "grad_norm": 0.5983766913414001, "learning_rate": 0.00026925400060219973, "loss": 3.3116, "step": 32608 }, { "epoch": 1.6, "grad_norm": 0.6149789690971375, "learning_rate": 0.00026923868552279666, "loss": 3.0122, "step": 32609 }, { "epoch": 1.6, "grad_norm": 0.5611794590950012, "learning_rate": 0.0002692233705244131, "loss": 3.1514, "step": 32610 }, { "epoch": 1.6, "grad_norm": 0.6104076504707336, "learning_rate": 0.00026920805560708914, "loss": 3.0857, "step": 32611 }, { "epoch": 1.6, "grad_norm": 0.6021207571029663, "learning_rate": 0.0002691927407708652, "loss": 2.9064, "step": 32612 }, { "epoch": 1.6, "grad_norm": 0.58101487159729, "learning_rate": 0.00026917742601578173, "loss": 3.0131, "step": 32613 }, { "epoch": 1.6, "grad_norm": 0.5953050851821899, "learning_rate": 0.00026916211134187895, "loss": 2.9169, "step": 32614 }, { "epoch": 1.6, "grad_norm": 0.6245440244674683, "learning_rate": 0.0002691467967491973, "loss": 3.0768, "step": 32615 }, { "epoch": 1.6, "grad_norm": 0.5973414182662964, "learning_rate": 0.000269131482237777, "loss": 2.9519, "step": 32616 }, { "epoch": 1.6, "grad_norm": 0.583087146282196, "learning_rate": 0.0002691161678076583, "loss": 3.0333, "step": 32617 }, { "epoch": 1.6, "grad_norm": 0.6332690715789795, "learning_rate": 0.000269100853458882, "loss": 3.0362, "step": 32618 }, { "epoch": 1.6, "grad_norm": 0.6058811545372009, "learning_rate": 0.0002690855391914878, "loss": 2.9249, "step": 32619 }, { "epoch": 1.6, "grad_norm": 0.6081041693687439, "learning_rate": 0.00026907022500551655, "loss": 3.0316, "step": 32620 }, { "epoch": 1.6, "grad_norm": 0.6076980829238892, "learning_rate": 0.0002690549109010082, "loss": 2.9194, "step": 32621 }, { "epoch": 1.6, "grad_norm": 0.6190997362136841, "learning_rate": 0.0002690395968780033, "loss": 3.1308, "step": 32622 }, { "epoch": 1.6, "grad_norm": 0.6132203936576843, "learning_rate": 0.0002690242829365423, "loss": 3.0724, "step": 32623 }, { "epoch": 1.6, "grad_norm": 0.5711997151374817, "learning_rate": 0.00026900896907666516, "loss": 2.984, "step": 32624 }, { "epoch": 1.6, "grad_norm": 0.6215646862983704, "learning_rate": 0.00026899365529841257, "loss": 3.1405, "step": 32625 }, { "epoch": 1.6, "grad_norm": 0.5729935765266418, "learning_rate": 0.0002689783416018247, "loss": 3.1276, "step": 32626 }, { "epoch": 1.6, "grad_norm": 0.5910037755966187, "learning_rate": 0.0002689630279869418, "loss": 2.9777, "step": 32627 }, { "epoch": 1.6, "grad_norm": 0.6306336522102356, "learning_rate": 0.00026894771445380445, "loss": 2.9572, "step": 32628 }, { "epoch": 1.6, "grad_norm": 0.6902565360069275, "learning_rate": 0.0002689324010024528, "loss": 2.8442, "step": 32629 }, { "epoch": 1.6, "grad_norm": 0.5936403870582581, "learning_rate": 0.0002689170876329273, "loss": 3.0951, "step": 32630 }, { "epoch": 1.6, "grad_norm": 0.6323724389076233, "learning_rate": 0.0002689017743452681, "loss": 2.9881, "step": 32631 }, { "epoch": 1.6, "grad_norm": 0.5704877972602844, "learning_rate": 0.0002688864611395156, "loss": 2.9077, "step": 32632 }, { "epoch": 1.6, "grad_norm": 0.643515944480896, "learning_rate": 0.0002688711480157103, "loss": 3.0715, "step": 32633 }, { "epoch": 1.6, "grad_norm": 0.6297488212585449, "learning_rate": 0.00026885583497389235, "loss": 3.0259, "step": 32634 }, { "epoch": 1.6, "grad_norm": 0.5987085103988647, "learning_rate": 0.00026884052201410215, "loss": 3.0065, "step": 32635 }, { "epoch": 1.6, "grad_norm": 0.5853610634803772, "learning_rate": 0.00026882520913638, "loss": 2.9778, "step": 32636 }, { "epoch": 1.6, "grad_norm": 0.5946998000144958, "learning_rate": 0.0002688098963407664, "loss": 2.9844, "step": 32637 }, { "epoch": 1.6, "grad_norm": 0.6260942220687866, "learning_rate": 0.00026879458362730126, "loss": 2.9159, "step": 32638 }, { "epoch": 1.6, "grad_norm": 0.7542493343353271, "learning_rate": 0.00026877927099602523, "loss": 3.1824, "step": 32639 }, { "epoch": 1.6, "grad_norm": 0.5645418763160706, "learning_rate": 0.0002687639584469787, "loss": 2.9704, "step": 32640 }, { "epoch": 1.6, "grad_norm": 0.6517764329910278, "learning_rate": 0.0002687486459802019, "loss": 3.099, "step": 32641 }, { "epoch": 1.6, "grad_norm": 0.6776633262634277, "learning_rate": 0.0002687333335957352, "loss": 3.0951, "step": 32642 }, { "epoch": 1.6, "grad_norm": 0.6206831336021423, "learning_rate": 0.00026871802129361865, "loss": 3.1086, "step": 32643 }, { "epoch": 1.6, "grad_norm": 0.633886992931366, "learning_rate": 0.00026870270907389303, "loss": 3.0899, "step": 32644 }, { "epoch": 1.6, "grad_norm": 0.6100537776947021, "learning_rate": 0.0002686873969365984, "loss": 2.9998, "step": 32645 }, { "epoch": 1.6, "grad_norm": 0.588862419128418, "learning_rate": 0.0002686720848817751, "loss": 3.1437, "step": 32646 }, { "epoch": 1.6, "grad_norm": 0.6336972713470459, "learning_rate": 0.0002686567729094635, "loss": 3.0714, "step": 32647 }, { "epoch": 1.6, "grad_norm": 0.6299551129341125, "learning_rate": 0.000268641461019704, "loss": 3.0617, "step": 32648 }, { "epoch": 1.6, "grad_norm": 0.5796019434928894, "learning_rate": 0.0002686261492125367, "loss": 2.8452, "step": 32649 }, { "epoch": 1.6, "grad_norm": 0.6125264763832092, "learning_rate": 0.00026861083748800225, "loss": 3.037, "step": 32650 }, { "epoch": 1.6, "grad_norm": 0.6127404570579529, "learning_rate": 0.00026859552584614077, "loss": 3.182, "step": 32651 }, { "epoch": 1.6, "grad_norm": 0.6474936604499817, "learning_rate": 0.00026858021428699264, "loss": 3.2452, "step": 32652 }, { "epoch": 1.6, "grad_norm": 0.5996273159980774, "learning_rate": 0.0002685649028105981, "loss": 3.1117, "step": 32653 }, { "epoch": 1.6, "grad_norm": 0.6106273531913757, "learning_rate": 0.0002685495914169976, "loss": 3.2262, "step": 32654 }, { "epoch": 1.6, "grad_norm": 0.6102750897407532, "learning_rate": 0.00026853428010623146, "loss": 3.1025, "step": 32655 }, { "epoch": 1.6, "grad_norm": 0.6115225553512573, "learning_rate": 0.0002685189688783399, "loss": 3.0833, "step": 32656 }, { "epoch": 1.6, "grad_norm": 0.6346925497055054, "learning_rate": 0.0002685036577333634, "loss": 3.0, "step": 32657 }, { "epoch": 1.6, "grad_norm": 0.6187523603439331, "learning_rate": 0.0002684883466713422, "loss": 3.0215, "step": 32658 }, { "epoch": 1.6, "grad_norm": 0.7155755758285522, "learning_rate": 0.0002684730356923165, "loss": 2.8853, "step": 32659 }, { "epoch": 1.6, "grad_norm": 0.6200131177902222, "learning_rate": 0.0002684577247963269, "loss": 3.0541, "step": 32660 }, { "epoch": 1.6, "grad_norm": 0.6308820843696594, "learning_rate": 0.0002684424139834135, "loss": 3.0811, "step": 32661 }, { "epoch": 1.6, "grad_norm": 0.5879965424537659, "learning_rate": 0.00026842710325361677, "loss": 2.9076, "step": 32662 }, { "epoch": 1.6, "grad_norm": 0.629178524017334, "learning_rate": 0.00026841179260697693, "loss": 2.9654, "step": 32663 }, { "epoch": 1.6, "grad_norm": 0.621136486530304, "learning_rate": 0.0002683964820435343, "loss": 3.0303, "step": 32664 }, { "epoch": 1.6, "grad_norm": 0.6378344297409058, "learning_rate": 0.0002683811715633294, "loss": 2.8025, "step": 32665 }, { "epoch": 1.6, "grad_norm": 0.642932116985321, "learning_rate": 0.0002683658611664023, "loss": 2.7823, "step": 32666 }, { "epoch": 1.6, "grad_norm": 0.6393907070159912, "learning_rate": 0.0002683505508527935, "loss": 3.1127, "step": 32667 }, { "epoch": 1.6, "grad_norm": 0.6391983032226562, "learning_rate": 0.0002683352406225432, "loss": 2.9901, "step": 32668 }, { "epoch": 1.6, "grad_norm": 0.6028316020965576, "learning_rate": 0.00026831993047569177, "loss": 3.0726, "step": 32669 }, { "epoch": 1.6, "grad_norm": 0.655157208442688, "learning_rate": 0.0002683046204122796, "loss": 2.786, "step": 32670 }, { "epoch": 1.6, "grad_norm": 0.61063551902771, "learning_rate": 0.0002682893104323469, "loss": 3.0121, "step": 32671 }, { "epoch": 1.6, "grad_norm": 0.6077100038528442, "learning_rate": 0.00026827400053593413, "loss": 3.0462, "step": 32672 }, { "epoch": 1.6, "grad_norm": 0.6022846698760986, "learning_rate": 0.0002682586907230815, "loss": 3.0828, "step": 32673 }, { "epoch": 1.6, "grad_norm": 0.6264395117759705, "learning_rate": 0.0002682433809938293, "loss": 3.1968, "step": 32674 }, { "epoch": 1.6, "grad_norm": 0.6148096323013306, "learning_rate": 0.00026822807134821804, "loss": 3.2081, "step": 32675 }, { "epoch": 1.6, "grad_norm": 0.6186606287956238, "learning_rate": 0.0002682127617862879, "loss": 2.9582, "step": 32676 }, { "epoch": 1.6, "grad_norm": 0.6134831309318542, "learning_rate": 0.00026819745230807925, "loss": 3.0577, "step": 32677 }, { "epoch": 1.6, "grad_norm": 0.5858891010284424, "learning_rate": 0.00026818214291363226, "loss": 2.9476, "step": 32678 }, { "epoch": 1.6, "grad_norm": 0.6068850755691528, "learning_rate": 0.0002681668336029874, "loss": 3.0758, "step": 32679 }, { "epoch": 1.6, "grad_norm": 0.5995755195617676, "learning_rate": 0.0002681515243761851, "loss": 2.9556, "step": 32680 }, { "epoch": 1.6, "grad_norm": 0.5753273367881775, "learning_rate": 0.0002681362152332655, "loss": 2.7898, "step": 32681 }, { "epoch": 1.6, "grad_norm": 0.6415500044822693, "learning_rate": 0.00026812090617426903, "loss": 3.2757, "step": 32682 }, { "epoch": 1.6, "grad_norm": 0.5945836305618286, "learning_rate": 0.0002681055971992358, "loss": 3.1363, "step": 32683 }, { "epoch": 1.6, "grad_norm": 0.6280573010444641, "learning_rate": 0.0002680902883082064, "loss": 2.8068, "step": 32684 }, { "epoch": 1.6, "grad_norm": 0.6142799258232117, "learning_rate": 0.00026807497950122105, "loss": 3.0977, "step": 32685 }, { "epoch": 1.6, "grad_norm": 0.6052511930465698, "learning_rate": 0.00026805967077832, "loss": 2.9415, "step": 32686 }, { "epoch": 1.6, "grad_norm": 0.5932520031929016, "learning_rate": 0.0002680443621395437, "loss": 2.9637, "step": 32687 }, { "epoch": 1.6, "grad_norm": 0.5896964073181152, "learning_rate": 0.00026802905358493237, "loss": 2.875, "step": 32688 }, { "epoch": 1.6, "grad_norm": 0.6037048697471619, "learning_rate": 0.0002680137451145263, "loss": 3.1561, "step": 32689 }, { "epoch": 1.6, "grad_norm": 0.6128067374229431, "learning_rate": 0.00026799843672836596, "loss": 2.8895, "step": 32690 }, { "epoch": 1.6, "grad_norm": 0.5946832299232483, "learning_rate": 0.00026798312842649157, "loss": 2.9309, "step": 32691 }, { "epoch": 1.6, "grad_norm": 0.6187651753425598, "learning_rate": 0.0002679678202089435, "loss": 2.8649, "step": 32692 }, { "epoch": 1.6, "grad_norm": 0.5798802971839905, "learning_rate": 0.00026795251207576185, "loss": 3.0354, "step": 32693 }, { "epoch": 1.6, "grad_norm": 0.5939930081367493, "learning_rate": 0.00026793720402698725, "loss": 3.1686, "step": 32694 }, { "epoch": 1.6, "grad_norm": 0.5916917324066162, "learning_rate": 0.0002679218960626599, "loss": 3.1373, "step": 32695 }, { "epoch": 1.6, "grad_norm": 0.6304068565368652, "learning_rate": 0.00026790658818282, "loss": 3.0483, "step": 32696 }, { "epoch": 1.6, "grad_norm": 0.5936771035194397, "learning_rate": 0.0002678912803875081, "loss": 2.9578, "step": 32697 }, { "epoch": 1.6, "grad_norm": 0.5950177907943726, "learning_rate": 0.0002678759726767643, "loss": 3.0496, "step": 32698 }, { "epoch": 1.6, "grad_norm": 0.6186109781265259, "learning_rate": 0.00026786066505062896, "loss": 3.0393, "step": 32699 }, { "epoch": 1.6, "grad_norm": 0.6025801301002502, "learning_rate": 0.0002678453575091426, "loss": 3.2257, "step": 32700 }, { "epoch": 1.6, "grad_norm": 0.5880355834960938, "learning_rate": 0.0002678300500523452, "loss": 2.7253, "step": 32701 }, { "epoch": 1.6, "grad_norm": 0.6294435858726501, "learning_rate": 0.0002678147426802774, "loss": 3.0772, "step": 32702 }, { "epoch": 1.6, "grad_norm": 0.6276559233665466, "learning_rate": 0.0002677994353929793, "loss": 2.8216, "step": 32703 }, { "epoch": 1.6, "grad_norm": 0.6946702003479004, "learning_rate": 0.00026778412819049123, "loss": 3.0334, "step": 32704 }, { "epoch": 1.6, "grad_norm": 0.5705452561378479, "learning_rate": 0.00026776882107285374, "loss": 2.9975, "step": 32705 }, { "epoch": 1.6, "grad_norm": 0.5945636034011841, "learning_rate": 0.00026775351404010684, "loss": 2.9102, "step": 32706 }, { "epoch": 1.6, "grad_norm": 0.5809672474861145, "learning_rate": 0.0002677382070922911, "loss": 2.9264, "step": 32707 }, { "epoch": 1.6, "grad_norm": 0.5987460613250732, "learning_rate": 0.0002677229002294465, "loss": 3.0214, "step": 32708 }, { "epoch": 1.6, "grad_norm": 0.6070874929428101, "learning_rate": 0.00026770759345161375, "loss": 3.1131, "step": 32709 }, { "epoch": 1.6, "grad_norm": 0.632290780544281, "learning_rate": 0.00026769228675883297, "loss": 2.8364, "step": 32710 }, { "epoch": 1.6, "grad_norm": 0.6187470555305481, "learning_rate": 0.0002676769801511444, "loss": 2.9534, "step": 32711 }, { "epoch": 1.6, "grad_norm": 0.5995469689369202, "learning_rate": 0.00026766167362858855, "loss": 2.9997, "step": 32712 }, { "epoch": 1.6, "grad_norm": 0.6334401965141296, "learning_rate": 0.0002676463671912056, "loss": 3.0416, "step": 32713 }, { "epoch": 1.6, "grad_norm": 0.6241161227226257, "learning_rate": 0.0002676310608390359, "loss": 2.9658, "step": 32714 }, { "epoch": 1.6, "grad_norm": 0.6149295568466187, "learning_rate": 0.00026761575457211965, "loss": 3.071, "step": 32715 }, { "epoch": 1.6, "grad_norm": 0.6074439287185669, "learning_rate": 0.0002676004483904973, "loss": 3.0599, "step": 32716 }, { "epoch": 1.6, "grad_norm": 0.5981096625328064, "learning_rate": 0.0002675851422942093, "loss": 3.1352, "step": 32717 }, { "epoch": 1.6, "grad_norm": 0.5963551998138428, "learning_rate": 0.00026756983628329554, "loss": 2.8888, "step": 32718 }, { "epoch": 1.6, "grad_norm": 0.6065682768821716, "learning_rate": 0.0002675545303577968, "loss": 3.0682, "step": 32719 }, { "epoch": 1.6, "grad_norm": 0.6360631585121155, "learning_rate": 0.0002675392245177531, "loss": 3.0089, "step": 32720 }, { "epoch": 1.6, "grad_norm": 0.5898069143295288, "learning_rate": 0.00026752391876320476, "loss": 2.876, "step": 32721 }, { "epoch": 1.6, "grad_norm": 0.6404922604560852, "learning_rate": 0.0002675086130941923, "loss": 3.1414, "step": 32722 }, { "epoch": 1.6, "grad_norm": 0.6144474744796753, "learning_rate": 0.0002674933075107558, "loss": 2.9401, "step": 32723 }, { "epoch": 1.6, "grad_norm": 0.6159098744392395, "learning_rate": 0.00026747800201293575, "loss": 3.0381, "step": 32724 }, { "epoch": 1.6, "grad_norm": 0.6114851236343384, "learning_rate": 0.0002674626966007724, "loss": 3.0802, "step": 32725 }, { "epoch": 1.6, "grad_norm": 0.6240290999412537, "learning_rate": 0.0002674473912743059, "loss": 3.0702, "step": 32726 }, { "epoch": 1.6, "grad_norm": 0.598355770111084, "learning_rate": 0.00026743208603357687, "loss": 2.8799, "step": 32727 }, { "epoch": 1.6, "grad_norm": 0.6011687517166138, "learning_rate": 0.00026741678087862535, "loss": 3.171, "step": 32728 }, { "epoch": 1.6, "grad_norm": 0.6154776811599731, "learning_rate": 0.00026740147580949186, "loss": 3.1596, "step": 32729 }, { "epoch": 1.6, "grad_norm": 0.5840276479721069, "learning_rate": 0.0002673861708262165, "loss": 3.1403, "step": 32730 }, { "epoch": 1.6, "grad_norm": 0.6225008368492126, "learning_rate": 0.00026737086592883976, "loss": 2.9208, "step": 32731 }, { "epoch": 1.6, "grad_norm": 0.5753346681594849, "learning_rate": 0.0002673555611174019, "loss": 3.0474, "step": 32732 }, { "epoch": 1.6, "grad_norm": 0.6378356218338013, "learning_rate": 0.0002673402563919431, "loss": 3.0054, "step": 32733 }, { "epoch": 1.6, "grad_norm": 0.5706433653831482, "learning_rate": 0.0002673249517525039, "loss": 3.058, "step": 32734 }, { "epoch": 1.6, "grad_norm": 0.6072514057159424, "learning_rate": 0.00026730964719912445, "loss": 2.9545, "step": 32735 }, { "epoch": 1.6, "grad_norm": 0.6374166011810303, "learning_rate": 0.00026729434273184497, "loss": 3.1019, "step": 32736 }, { "epoch": 1.6, "grad_norm": 0.6123475432395935, "learning_rate": 0.00026727903835070606, "loss": 3.0819, "step": 32737 }, { "epoch": 1.6, "grad_norm": 0.6655570268630981, "learning_rate": 0.00026726373405574783, "loss": 3.1967, "step": 32738 }, { "epoch": 1.6, "grad_norm": 0.5814079642295837, "learning_rate": 0.0002672484298470107, "loss": 3.0927, "step": 32739 }, { "epoch": 1.6, "grad_norm": 0.7113948464393616, "learning_rate": 0.0002672331257245347, "loss": 3.0828, "step": 32740 }, { "epoch": 1.6, "grad_norm": 0.6144413352012634, "learning_rate": 0.0002672178216883604, "loss": 2.8819, "step": 32741 }, { "epoch": 1.6, "grad_norm": 0.5857863426208496, "learning_rate": 0.0002672025177385282, "loss": 3.2243, "step": 32742 }, { "epoch": 1.6, "grad_norm": 0.5955747365951538, "learning_rate": 0.00026718721387507813, "loss": 3.1915, "step": 32743 }, { "epoch": 1.6, "grad_norm": 0.6116932034492493, "learning_rate": 0.0002671719100980507, "loss": 2.8832, "step": 32744 }, { "epoch": 1.6, "grad_norm": 0.5967369079589844, "learning_rate": 0.000267156606407486, "loss": 2.9476, "step": 32745 }, { "epoch": 1.6, "grad_norm": 0.6008356213569641, "learning_rate": 0.0002671413028034246, "loss": 3.111, "step": 32746 }, { "epoch": 1.6, "grad_norm": 0.5891445279121399, "learning_rate": 0.00026712599928590674, "loss": 3.1365, "step": 32747 }, { "epoch": 1.6, "grad_norm": 0.5794476270675659, "learning_rate": 0.0002671106958549725, "loss": 3.1771, "step": 32748 }, { "epoch": 1.6, "grad_norm": 0.6033952236175537, "learning_rate": 0.0002670953925106625, "loss": 3.1056, "step": 32749 }, { "epoch": 1.6, "grad_norm": 0.6501690745353699, "learning_rate": 0.00026708008925301687, "loss": 2.7864, "step": 32750 }, { "epoch": 1.61, "grad_norm": 0.6086935997009277, "learning_rate": 0.00026706478608207586, "loss": 3.0754, "step": 32751 }, { "epoch": 1.61, "grad_norm": 0.6224468350410461, "learning_rate": 0.00026704948299787997, "loss": 3.0004, "step": 32752 }, { "epoch": 1.61, "grad_norm": 0.5442426800727844, "learning_rate": 0.0002670341800004693, "loss": 3.0873, "step": 32753 }, { "epoch": 1.61, "grad_norm": 0.5749726891517639, "learning_rate": 0.00026701887708988444, "loss": 2.8662, "step": 32754 }, { "epoch": 1.61, "grad_norm": 0.6050938367843628, "learning_rate": 0.0002670035742661653, "loss": 2.9914, "step": 32755 }, { "epoch": 1.61, "grad_norm": 0.5932197570800781, "learning_rate": 0.00026698827152935245, "loss": 3.0756, "step": 32756 }, { "epoch": 1.61, "grad_norm": 0.592960000038147, "learning_rate": 0.0002669729688794863, "loss": 3.055, "step": 32757 }, { "epoch": 1.61, "grad_norm": 0.5769248008728027, "learning_rate": 0.0002669576663166068, "loss": 3.3109, "step": 32758 }, { "epoch": 1.61, "grad_norm": 0.6258298754692078, "learning_rate": 0.0002669423638407545, "loss": 3.139, "step": 32759 }, { "epoch": 1.61, "grad_norm": 0.5944938659667969, "learning_rate": 0.0002669270614519697, "loss": 3.1027, "step": 32760 }, { "epoch": 1.61, "grad_norm": 0.6634683012962341, "learning_rate": 0.00026691175915029253, "loss": 3.0298, "step": 32761 }, { "epoch": 1.61, "grad_norm": 0.6014466881752014, "learning_rate": 0.0002668964569357636, "loss": 2.9555, "step": 32762 }, { "epoch": 1.61, "grad_norm": 0.620914876461029, "learning_rate": 0.00026688115480842287, "loss": 3.0418, "step": 32763 }, { "epoch": 1.61, "grad_norm": 0.5882852673530579, "learning_rate": 0.0002668658527683109, "loss": 2.9286, "step": 32764 }, { "epoch": 1.61, "grad_norm": 0.610764741897583, "learning_rate": 0.0002668505508154679, "loss": 3.0676, "step": 32765 }, { "epoch": 1.61, "grad_norm": 0.6545993685722351, "learning_rate": 0.00026683524894993406, "loss": 2.8785, "step": 32766 }, { "epoch": 1.61, "grad_norm": 0.6135849356651306, "learning_rate": 0.00026681994717174995, "loss": 3.0813, "step": 32767 }, { "epoch": 1.61, "grad_norm": 0.596288800239563, "learning_rate": 0.0002668046454809556, "loss": 3.1225, "step": 32768 }, { "epoch": 1.61, "grad_norm": 0.6006448864936829, "learning_rate": 0.0002667893438775915, "loss": 2.8525, "step": 32769 }, { "epoch": 1.61, "grad_norm": 0.5774670243263245, "learning_rate": 0.0002667740423616978, "loss": 3.2201, "step": 32770 }, { "epoch": 1.61, "grad_norm": 0.6070205569267273, "learning_rate": 0.00026675874093331494, "loss": 2.946, "step": 32771 }, { "epoch": 1.61, "grad_norm": 0.5863068103790283, "learning_rate": 0.0002667434395924832, "loss": 3.2808, "step": 32772 }, { "epoch": 1.61, "grad_norm": 0.6191710233688354, "learning_rate": 0.0002667281383392427, "loss": 2.894, "step": 32773 }, { "epoch": 1.61, "grad_norm": 0.621767520904541, "learning_rate": 0.000266712837173634, "loss": 3.1097, "step": 32774 }, { "epoch": 1.61, "grad_norm": 0.5857933163642883, "learning_rate": 0.0002666975360956972, "loss": 2.9612, "step": 32775 }, { "epoch": 1.61, "grad_norm": 0.6357319355010986, "learning_rate": 0.0002666822351054727, "loss": 3.0118, "step": 32776 }, { "epoch": 1.61, "grad_norm": 0.5903388857841492, "learning_rate": 0.00026666693420300086, "loss": 2.7496, "step": 32777 }, { "epoch": 1.61, "grad_norm": 0.6077666282653809, "learning_rate": 0.0002666516333883218, "loss": 3.0988, "step": 32778 }, { "epoch": 1.61, "grad_norm": 0.6337267756462097, "learning_rate": 0.000266636332661476, "loss": 3.1458, "step": 32779 }, { "epoch": 1.61, "grad_norm": 0.567110538482666, "learning_rate": 0.00026662103202250355, "loss": 2.8474, "step": 32780 }, { "epoch": 1.61, "grad_norm": 0.6637780666351318, "learning_rate": 0.00026660573147144494, "loss": 3.2734, "step": 32781 }, { "epoch": 1.61, "grad_norm": 0.6085167527198792, "learning_rate": 0.00026659043100834054, "loss": 3.1563, "step": 32782 }, { "epoch": 1.61, "grad_norm": 0.6335734128952026, "learning_rate": 0.0002665751306332304, "loss": 3.1045, "step": 32783 }, { "epoch": 1.61, "grad_norm": 0.6378978490829468, "learning_rate": 0.00026655983034615493, "loss": 3.1083, "step": 32784 }, { "epoch": 1.61, "grad_norm": 0.5712326765060425, "learning_rate": 0.0002665445301471544, "loss": 3.1253, "step": 32785 }, { "epoch": 1.61, "grad_norm": 0.5776566863059998, "learning_rate": 0.00026652923003626916, "loss": 3.0857, "step": 32786 }, { "epoch": 1.61, "grad_norm": 0.5935328602790833, "learning_rate": 0.00026651393001353956, "loss": 2.9172, "step": 32787 }, { "epoch": 1.61, "grad_norm": 0.5658502578735352, "learning_rate": 0.0002664986300790057, "loss": 2.8991, "step": 32788 }, { "epoch": 1.61, "grad_norm": 0.6034933924674988, "learning_rate": 0.0002664833302327081, "loss": 3.1756, "step": 32789 }, { "epoch": 1.61, "grad_norm": 0.5854072570800781, "learning_rate": 0.00026646803047468697, "loss": 2.8792, "step": 32790 }, { "epoch": 1.61, "grad_norm": 0.5877964496612549, "learning_rate": 0.00026645273080498264, "loss": 2.9641, "step": 32791 }, { "epoch": 1.61, "grad_norm": 0.6241223216056824, "learning_rate": 0.00026643743122363515, "loss": 2.9318, "step": 32792 }, { "epoch": 1.61, "grad_norm": 0.5755746364593506, "learning_rate": 0.00026642213173068516, "loss": 2.9953, "step": 32793 }, { "epoch": 1.61, "grad_norm": 0.6470869779586792, "learning_rate": 0.00026640683232617284, "loss": 2.8269, "step": 32794 }, { "epoch": 1.61, "grad_norm": 0.6277267932891846, "learning_rate": 0.0002663915330101383, "loss": 3.0639, "step": 32795 }, { "epoch": 1.61, "grad_norm": 0.6223757266998291, "learning_rate": 0.0002663762337826222, "loss": 2.9395, "step": 32796 }, { "epoch": 1.61, "grad_norm": 0.5803819894790649, "learning_rate": 0.0002663609346436645, "loss": 2.8632, "step": 32797 }, { "epoch": 1.61, "grad_norm": 0.659708559513092, "learning_rate": 0.00026634563559330555, "loss": 2.9963, "step": 32798 }, { "epoch": 1.61, "grad_norm": 0.6480075716972351, "learning_rate": 0.0002663303366315859, "loss": 3.0471, "step": 32799 }, { "epoch": 1.61, "grad_norm": 0.5975663661956787, "learning_rate": 0.00026631503775854557, "loss": 2.7081, "step": 32800 }, { "epoch": 1.61, "grad_norm": 0.5907651782035828, "learning_rate": 0.000266299738974225, "loss": 3.1879, "step": 32801 }, { "epoch": 1.61, "grad_norm": 0.6050690412521362, "learning_rate": 0.00026628444027866437, "loss": 3.0749, "step": 32802 }, { "epoch": 1.61, "grad_norm": 0.638195276260376, "learning_rate": 0.00026626914167190394, "loss": 3.0082, "step": 32803 }, { "epoch": 1.61, "grad_norm": 0.5860411524772644, "learning_rate": 0.0002662538431539843, "loss": 2.9251, "step": 32804 }, { "epoch": 1.61, "grad_norm": 0.5866068005561829, "learning_rate": 0.00026623854472494546, "loss": 2.9912, "step": 32805 }, { "epoch": 1.61, "grad_norm": 0.6038371324539185, "learning_rate": 0.00026622324638482785, "loss": 2.9756, "step": 32806 }, { "epoch": 1.61, "grad_norm": 0.5844926834106445, "learning_rate": 0.0002662079481336716, "loss": 3.011, "step": 32807 }, { "epoch": 1.61, "grad_norm": 0.5914148092269897, "learning_rate": 0.00026619264997151715, "loss": 3.2044, "step": 32808 }, { "epoch": 1.61, "grad_norm": 0.5837469100952148, "learning_rate": 0.0002661773518984048, "loss": 2.9881, "step": 32809 }, { "epoch": 1.61, "grad_norm": 0.60676109790802, "learning_rate": 0.0002661620539143747, "loss": 3.1325, "step": 32810 }, { "epoch": 1.61, "grad_norm": 0.6246742010116577, "learning_rate": 0.0002661467560194674, "loss": 2.9884, "step": 32811 }, { "epoch": 1.61, "grad_norm": 0.5762494206428528, "learning_rate": 0.0002661314582137229, "loss": 3.31, "step": 32812 }, { "epoch": 1.61, "grad_norm": 0.5754257440567017, "learning_rate": 0.00026611616049718157, "loss": 3.0474, "step": 32813 }, { "epoch": 1.61, "grad_norm": 0.6173507571220398, "learning_rate": 0.0002661008628698839, "loss": 3.0469, "step": 32814 }, { "epoch": 1.61, "grad_norm": 0.5958468914031982, "learning_rate": 0.00026608556533186997, "loss": 2.8676, "step": 32815 }, { "epoch": 1.61, "grad_norm": 0.6056309342384338, "learning_rate": 0.0002660702678831802, "loss": 3.2593, "step": 32816 }, { "epoch": 1.61, "grad_norm": 0.6290296316146851, "learning_rate": 0.00026605497052385464, "loss": 3.0761, "step": 32817 }, { "epoch": 1.61, "grad_norm": 0.6019764542579651, "learning_rate": 0.00026603967325393386, "loss": 2.9699, "step": 32818 }, { "epoch": 1.61, "grad_norm": 0.6389729976654053, "learning_rate": 0.00026602437607345814, "loss": 3.1099, "step": 32819 }, { "epoch": 1.61, "grad_norm": 0.6280241012573242, "learning_rate": 0.0002660090789824675, "loss": 2.9096, "step": 32820 }, { "epoch": 1.61, "grad_norm": 0.6099143028259277, "learning_rate": 0.00026599378198100253, "loss": 3.0422, "step": 32821 }, { "epoch": 1.61, "grad_norm": 0.6680817604064941, "learning_rate": 0.0002659784850691033, "loss": 3.2611, "step": 32822 }, { "epoch": 1.61, "grad_norm": 0.7214831113815308, "learning_rate": 0.0002659631882468102, "loss": 3.0891, "step": 32823 }, { "epoch": 1.61, "grad_norm": 0.6249724626541138, "learning_rate": 0.0002659478915141636, "loss": 2.9744, "step": 32824 }, { "epoch": 1.61, "grad_norm": 0.6301910877227783, "learning_rate": 0.0002659325948712036, "loss": 2.9372, "step": 32825 }, { "epoch": 1.61, "grad_norm": 0.6053838133811951, "learning_rate": 0.0002659172983179707, "loss": 3.2256, "step": 32826 }, { "epoch": 1.61, "grad_norm": 0.6509674191474915, "learning_rate": 0.00026590200185450504, "loss": 3.0197, "step": 32827 }, { "epoch": 1.61, "grad_norm": 0.6163961887359619, "learning_rate": 0.0002658867054808468, "loss": 3.0188, "step": 32828 }, { "epoch": 1.61, "grad_norm": 0.5931441783905029, "learning_rate": 0.00026587140919703665, "loss": 3.0105, "step": 32829 }, { "epoch": 1.61, "grad_norm": 0.6105912923812866, "learning_rate": 0.0002658561130031145, "loss": 2.8814, "step": 32830 }, { "epoch": 1.61, "grad_norm": 0.6719006896018982, "learning_rate": 0.0002658408168991209, "loss": 2.9579, "step": 32831 }, { "epoch": 1.61, "grad_norm": 0.5960902571678162, "learning_rate": 0.0002658255208850958, "loss": 3.1361, "step": 32832 }, { "epoch": 1.61, "grad_norm": 0.5977107882499695, "learning_rate": 0.0002658102249610798, "loss": 3.2039, "step": 32833 }, { "epoch": 1.61, "grad_norm": 0.6679555177688599, "learning_rate": 0.00026579492912711316, "loss": 3.3237, "step": 32834 }, { "epoch": 1.61, "grad_norm": 0.6147051453590393, "learning_rate": 0.000265779633383236, "loss": 3.0583, "step": 32835 }, { "epoch": 1.61, "grad_norm": 0.6416993141174316, "learning_rate": 0.0002657643377294888, "loss": 3.0023, "step": 32836 }, { "epoch": 1.61, "grad_norm": 0.5568658113479614, "learning_rate": 0.0002657490421659117, "loss": 2.8273, "step": 32837 }, { "epoch": 1.61, "grad_norm": 0.6182296872138977, "learning_rate": 0.0002657337466925449, "loss": 3.1808, "step": 32838 }, { "epoch": 1.61, "grad_norm": 0.61872398853302, "learning_rate": 0.00026571845130942903, "loss": 2.9039, "step": 32839 }, { "epoch": 1.61, "grad_norm": 0.5836578011512756, "learning_rate": 0.000265703156016604, "loss": 2.8411, "step": 32840 }, { "epoch": 1.61, "grad_norm": 0.6261762976646423, "learning_rate": 0.0002656878608141104, "loss": 3.0058, "step": 32841 }, { "epoch": 1.61, "grad_norm": 0.7778844237327576, "learning_rate": 0.00026567256570198824, "loss": 2.8564, "step": 32842 }, { "epoch": 1.61, "grad_norm": 0.6407623887062073, "learning_rate": 0.0002656572706802779, "loss": 2.9384, "step": 32843 }, { "epoch": 1.61, "grad_norm": 0.5836843252182007, "learning_rate": 0.00026564197574901994, "loss": 3.0409, "step": 32844 }, { "epoch": 1.61, "grad_norm": 0.6171208024024963, "learning_rate": 0.00026562668090825413, "loss": 2.9327, "step": 32845 }, { "epoch": 1.61, "grad_norm": 0.6322222352027893, "learning_rate": 0.0002656113861580212, "loss": 3.12, "step": 32846 }, { "epoch": 1.61, "grad_norm": 2.0868051052093506, "learning_rate": 0.0002655960914983612, "loss": 3.0806, "step": 32847 }, { "epoch": 1.61, "grad_norm": 0.6040574908256531, "learning_rate": 0.00026558079692931444, "loss": 3.019, "step": 32848 }, { "epoch": 1.61, "grad_norm": 0.693903386592865, "learning_rate": 0.00026556550245092136, "loss": 2.8479, "step": 32849 }, { "epoch": 1.61, "grad_norm": 0.6436097025871277, "learning_rate": 0.00026555020806322196, "loss": 3.0285, "step": 32850 }, { "epoch": 1.61, "grad_norm": 0.6326521039009094, "learning_rate": 0.0002655349137662568, "loss": 3.076, "step": 32851 }, { "epoch": 1.61, "grad_norm": 0.6029640436172485, "learning_rate": 0.00026551961956006604, "loss": 2.957, "step": 32852 }, { "epoch": 1.61, "grad_norm": 0.5923823118209839, "learning_rate": 0.0002655043254446898, "loss": 2.9685, "step": 32853 }, { "epoch": 1.61, "grad_norm": 0.6047025322914124, "learning_rate": 0.00026548903142016876, "loss": 3.0945, "step": 32854 }, { "epoch": 1.61, "grad_norm": 0.6205102205276489, "learning_rate": 0.0002654737374865428, "loss": 2.7298, "step": 32855 }, { "epoch": 1.61, "grad_norm": 0.636955976486206, "learning_rate": 0.00026545844364385255, "loss": 3.0996, "step": 32856 }, { "epoch": 1.61, "grad_norm": 0.6210900545120239, "learning_rate": 0.00026544314989213795, "loss": 2.9344, "step": 32857 }, { "epoch": 1.61, "grad_norm": 0.5878751873970032, "learning_rate": 0.0002654278562314395, "loss": 2.929, "step": 32858 }, { "epoch": 1.61, "grad_norm": 0.6176540851593018, "learning_rate": 0.00026541256266179744, "loss": 3.2169, "step": 32859 }, { "epoch": 1.61, "grad_norm": 0.7335445284843445, "learning_rate": 0.0002653972691832519, "loss": 3.117, "step": 32860 }, { "epoch": 1.61, "grad_norm": 0.6168895363807678, "learning_rate": 0.0002653819757958435, "loss": 3.0956, "step": 32861 }, { "epoch": 1.61, "grad_norm": 0.6440188884735107, "learning_rate": 0.0002653666824996122, "loss": 3.0447, "step": 32862 }, { "epoch": 1.61, "grad_norm": 0.5899813771247864, "learning_rate": 0.00026535138929459834, "loss": 2.9709, "step": 32863 }, { "epoch": 1.61, "grad_norm": 0.6249847412109375, "learning_rate": 0.0002653360961808424, "loss": 3.1298, "step": 32864 }, { "epoch": 1.61, "grad_norm": 0.5660920143127441, "learning_rate": 0.00026532080315838435, "loss": 3.0273, "step": 32865 }, { "epoch": 1.61, "grad_norm": 0.6059000492095947, "learning_rate": 0.00026530551022726476, "loss": 3.014, "step": 32866 }, { "epoch": 1.61, "grad_norm": 0.6058610081672668, "learning_rate": 0.00026529021738752373, "loss": 3.0652, "step": 32867 }, { "epoch": 1.61, "grad_norm": 0.6035959124565125, "learning_rate": 0.00026527492463920154, "loss": 3.1049, "step": 32868 }, { "epoch": 1.61, "grad_norm": 0.5923382639884949, "learning_rate": 0.00026525963198233864, "loss": 3.1006, "step": 32869 }, { "epoch": 1.61, "grad_norm": 0.6309588551521301, "learning_rate": 0.0002652443394169751, "loss": 3.1369, "step": 32870 }, { "epoch": 1.61, "grad_norm": 0.5777470469474792, "learning_rate": 0.0002652290469431513, "loss": 2.9681, "step": 32871 }, { "epoch": 1.61, "grad_norm": 0.604954183101654, "learning_rate": 0.00026521375456090745, "loss": 3.0512, "step": 32872 }, { "epoch": 1.61, "grad_norm": 0.618948757648468, "learning_rate": 0.000265198462270284, "loss": 3.152, "step": 32873 }, { "epoch": 1.61, "grad_norm": 0.5669094920158386, "learning_rate": 0.000265183170071321, "loss": 3.0005, "step": 32874 }, { "epoch": 1.61, "grad_norm": 0.6252792477607727, "learning_rate": 0.0002651678779640588, "loss": 3.0027, "step": 32875 }, { "epoch": 1.61, "grad_norm": 0.6201028823852539, "learning_rate": 0.0002651525859485378, "loss": 2.932, "step": 32876 }, { "epoch": 1.61, "grad_norm": 0.5937361717224121, "learning_rate": 0.0002651372940247981, "loss": 3.0645, "step": 32877 }, { "epoch": 1.61, "grad_norm": 0.6492998003959656, "learning_rate": 0.00026512200219288017, "loss": 3.1435, "step": 32878 }, { "epoch": 1.61, "grad_norm": 0.625295877456665, "learning_rate": 0.000265106710452824, "loss": 2.965, "step": 32879 }, { "epoch": 1.61, "grad_norm": 0.5870344042778015, "learning_rate": 0.00026509141880467016, "loss": 2.9514, "step": 32880 }, { "epoch": 1.61, "grad_norm": 0.5917270183563232, "learning_rate": 0.00026507612724845883, "loss": 2.9134, "step": 32881 }, { "epoch": 1.61, "grad_norm": 0.6095778346061707, "learning_rate": 0.00026506083578423014, "loss": 3.1039, "step": 32882 }, { "epoch": 1.61, "grad_norm": 0.66627436876297, "learning_rate": 0.00026504554441202457, "loss": 2.996, "step": 32883 }, { "epoch": 1.61, "grad_norm": 0.6477625370025635, "learning_rate": 0.00026503025313188235, "loss": 2.9929, "step": 32884 }, { "epoch": 1.61, "grad_norm": 0.5901625156402588, "learning_rate": 0.0002650149619438436, "loss": 3.2471, "step": 32885 }, { "epoch": 1.61, "grad_norm": 0.6187769174575806, "learning_rate": 0.0002649996708479488, "loss": 3.0292, "step": 32886 }, { "epoch": 1.61, "grad_norm": 0.6475508809089661, "learning_rate": 0.000264984379844238, "loss": 3.0723, "step": 32887 }, { "epoch": 1.61, "grad_norm": 0.5832803845405579, "learning_rate": 0.0002649690889327518, "loss": 2.8318, "step": 32888 }, { "epoch": 1.61, "grad_norm": 0.6353322863578796, "learning_rate": 0.00026495379811353014, "loss": 3.0975, "step": 32889 }, { "epoch": 1.61, "grad_norm": 0.5817992091178894, "learning_rate": 0.0002649385073866134, "loss": 3.0773, "step": 32890 }, { "epoch": 1.61, "grad_norm": 0.596846342086792, "learning_rate": 0.000264923216752042, "loss": 3.1376, "step": 32891 }, { "epoch": 1.61, "grad_norm": 0.6275048851966858, "learning_rate": 0.00026490792620985603, "loss": 3.2158, "step": 32892 }, { "epoch": 1.61, "grad_norm": 0.5930907130241394, "learning_rate": 0.0002648926357600959, "loss": 3.2069, "step": 32893 }, { "epoch": 1.61, "grad_norm": 0.597496747970581, "learning_rate": 0.00026487734540280174, "loss": 2.8155, "step": 32894 }, { "epoch": 1.61, "grad_norm": 0.6274101734161377, "learning_rate": 0.0002648620551380139, "loss": 3.1146, "step": 32895 }, { "epoch": 1.61, "grad_norm": 0.6199111342430115, "learning_rate": 0.0002648467649657727, "loss": 3.0811, "step": 32896 }, { "epoch": 1.61, "grad_norm": 0.6213255524635315, "learning_rate": 0.0002648314748861182, "loss": 3.0629, "step": 32897 }, { "epoch": 1.61, "grad_norm": 0.6072750091552734, "learning_rate": 0.000264816184899091, "loss": 3.0835, "step": 32898 }, { "epoch": 1.61, "grad_norm": 0.7357712388038635, "learning_rate": 0.0002648008950047312, "loss": 2.8921, "step": 32899 }, { "epoch": 1.61, "grad_norm": 0.637231707572937, "learning_rate": 0.00026478560520307886, "loss": 3.1188, "step": 32900 }, { "epoch": 1.61, "grad_norm": 0.5956443548202515, "learning_rate": 0.00026477031549417473, "loss": 3.0897, "step": 32901 }, { "epoch": 1.61, "grad_norm": 0.5935696959495544, "learning_rate": 0.0002647550258780587, "loss": 2.942, "step": 32902 }, { "epoch": 1.61, "grad_norm": 0.659156322479248, "learning_rate": 0.00026473973635477116, "loss": 3.1397, "step": 32903 }, { "epoch": 1.61, "grad_norm": 0.6243013739585876, "learning_rate": 0.00026472444692435227, "loss": 2.8611, "step": 32904 }, { "epoch": 1.61, "grad_norm": 0.5944880843162537, "learning_rate": 0.00026470915758684235, "loss": 2.9702, "step": 32905 }, { "epoch": 1.61, "grad_norm": 0.6213318705558777, "learning_rate": 0.0002646938683422819, "loss": 3.0618, "step": 32906 }, { "epoch": 1.61, "grad_norm": 0.6070886254310608, "learning_rate": 0.0002646785791907109, "loss": 3.1214, "step": 32907 }, { "epoch": 1.61, "grad_norm": 0.6019971966743469, "learning_rate": 0.00026466329013216986, "loss": 3.0693, "step": 32908 }, { "epoch": 1.61, "grad_norm": 0.5632584691047668, "learning_rate": 0.00026464800116669877, "loss": 2.8446, "step": 32909 }, { "epoch": 1.61, "grad_norm": 0.6107988953590393, "learning_rate": 0.0002646327122943381, "loss": 2.9715, "step": 32910 }, { "epoch": 1.61, "grad_norm": 0.6390516757965088, "learning_rate": 0.0002646174235151281, "loss": 3.0538, "step": 32911 }, { "epoch": 1.61, "grad_norm": 0.5974604487419128, "learning_rate": 0.00026460213482910883, "loss": 2.9292, "step": 32912 }, { "epoch": 1.61, "grad_norm": 0.603056788444519, "learning_rate": 0.0002645868462363209, "loss": 3.1131, "step": 32913 }, { "epoch": 1.61, "grad_norm": 0.6022782921791077, "learning_rate": 0.00026457155773680434, "loss": 3.1211, "step": 32914 }, { "epoch": 1.61, "grad_norm": 0.6246730089187622, "learning_rate": 0.00026455626933059943, "loss": 3.1058, "step": 32915 }, { "epoch": 1.61, "grad_norm": 0.6355071067810059, "learning_rate": 0.0002645409810177466, "loss": 3.1005, "step": 32916 }, { "epoch": 1.61, "grad_norm": 0.5840960741043091, "learning_rate": 0.0002645256927982859, "loss": 3.2979, "step": 32917 }, { "epoch": 1.61, "grad_norm": 0.614286482334137, "learning_rate": 0.0002645104046722578, "loss": 2.919, "step": 32918 }, { "epoch": 1.61, "grad_norm": 0.6425104141235352, "learning_rate": 0.00026449511663970235, "loss": 3.1202, "step": 32919 }, { "epoch": 1.61, "grad_norm": 0.617282509803772, "learning_rate": 0.00026447982870065993, "loss": 3.072, "step": 32920 }, { "epoch": 1.61, "grad_norm": 0.5912017822265625, "learning_rate": 0.00026446454085517093, "loss": 2.8702, "step": 32921 }, { "epoch": 1.61, "grad_norm": 0.6111149787902832, "learning_rate": 0.00026444925310327536, "loss": 2.9778, "step": 32922 }, { "epoch": 1.61, "grad_norm": 0.6221730709075928, "learning_rate": 0.0002644339654450137, "loss": 3.289, "step": 32923 }, { "epoch": 1.61, "grad_norm": 0.6156650185585022, "learning_rate": 0.0002644186778804261, "loss": 3.1326, "step": 32924 }, { "epoch": 1.61, "grad_norm": 0.6163793802261353, "learning_rate": 0.0002644033904095528, "loss": 3.0494, "step": 32925 }, { "epoch": 1.61, "grad_norm": 0.6287680268287659, "learning_rate": 0.00026438810303243426, "loss": 3.0893, "step": 32926 }, { "epoch": 1.61, "grad_norm": 0.6106224060058594, "learning_rate": 0.00026437281574911043, "loss": 2.9948, "step": 32927 }, { "epoch": 1.61, "grad_norm": 0.6386653184890747, "learning_rate": 0.0002643575285596219, "loss": 3.023, "step": 32928 }, { "epoch": 1.61, "grad_norm": 0.6105353236198425, "learning_rate": 0.00026434224146400866, "loss": 3.0687, "step": 32929 }, { "epoch": 1.61, "grad_norm": 0.5927025079727173, "learning_rate": 0.0002643269544623111, "loss": 2.9087, "step": 32930 }, { "epoch": 1.61, "grad_norm": 0.6284357309341431, "learning_rate": 0.00026431166755456955, "loss": 2.934, "step": 32931 }, { "epoch": 1.61, "grad_norm": 0.6147943139076233, "learning_rate": 0.0002642963807408242, "loss": 2.9622, "step": 32932 }, { "epoch": 1.61, "grad_norm": 0.6074808239936829, "learning_rate": 0.00026428109402111533, "loss": 2.9748, "step": 32933 }, { "epoch": 1.61, "grad_norm": 0.6237158179283142, "learning_rate": 0.00026426580739548306, "loss": 3.109, "step": 32934 }, { "epoch": 1.61, "grad_norm": 0.594880223274231, "learning_rate": 0.0002642505208639679, "loss": 3.1491, "step": 32935 }, { "epoch": 1.61, "grad_norm": 0.5945537686347961, "learning_rate": 0.00026423523442661, "loss": 2.9643, "step": 32936 }, { "epoch": 1.61, "grad_norm": 0.6391527652740479, "learning_rate": 0.0002642199480834495, "loss": 2.9662, "step": 32937 }, { "epoch": 1.61, "grad_norm": 0.6021575927734375, "learning_rate": 0.00026420466183452683, "loss": 3.1065, "step": 32938 }, { "epoch": 1.61, "grad_norm": 0.6353442072868347, "learning_rate": 0.0002641893756798822, "loss": 2.9833, "step": 32939 }, { "epoch": 1.61, "grad_norm": 0.6768826246261597, "learning_rate": 0.00026417408961955575, "loss": 2.8308, "step": 32940 }, { "epoch": 1.61, "grad_norm": 0.5737194418907166, "learning_rate": 0.00026415880365358803, "loss": 3.0519, "step": 32941 }, { "epoch": 1.61, "grad_norm": 0.5978490710258484, "learning_rate": 0.000264143517782019, "loss": 2.8875, "step": 32942 }, { "epoch": 1.61, "grad_norm": 0.6324152946472168, "learning_rate": 0.0002641282320048892, "loss": 3.2518, "step": 32943 }, { "epoch": 1.61, "grad_norm": 0.6049727201461792, "learning_rate": 0.0002641129463222385, "loss": 2.9631, "step": 32944 }, { "epoch": 1.61, "grad_norm": 0.584157407283783, "learning_rate": 0.0002640976607341075, "loss": 3.0125, "step": 32945 }, { "epoch": 1.61, "grad_norm": 0.603410005569458, "learning_rate": 0.00026408237524053646, "loss": 2.7752, "step": 32946 }, { "epoch": 1.61, "grad_norm": 0.6187100410461426, "learning_rate": 0.0002640670898415654, "loss": 3.0289, "step": 32947 }, { "epoch": 1.61, "grad_norm": 0.5807082653045654, "learning_rate": 0.0002640518045372348, "loss": 3.1831, "step": 32948 }, { "epoch": 1.61, "grad_norm": 0.6443330645561218, "learning_rate": 0.00026403651932758467, "loss": 3.1691, "step": 32949 }, { "epoch": 1.61, "grad_norm": 0.6212769746780396, "learning_rate": 0.0002640212342126556, "loss": 2.9774, "step": 32950 }, { "epoch": 1.61, "grad_norm": 0.5869172811508179, "learning_rate": 0.00026400594919248757, "loss": 2.8276, "step": 32951 }, { "epoch": 1.61, "grad_norm": 0.6382247805595398, "learning_rate": 0.00026399066426712094, "loss": 3.0276, "step": 32952 }, { "epoch": 1.61, "grad_norm": 0.6578730344772339, "learning_rate": 0.0002639753794365961, "loss": 3.0336, "step": 32953 }, { "epoch": 1.61, "grad_norm": 0.6229750514030457, "learning_rate": 0.000263960094700953, "loss": 3.0683, "step": 32954 }, { "epoch": 1.62, "grad_norm": 0.6478546261787415, "learning_rate": 0.0002639448100602323, "loss": 2.9494, "step": 32955 }, { "epoch": 1.62, "grad_norm": 0.607498288154602, "learning_rate": 0.0002639295255144738, "loss": 3.0408, "step": 32956 }, { "epoch": 1.62, "grad_norm": 0.6057003736495972, "learning_rate": 0.00026391424106371805, "loss": 3.2572, "step": 32957 }, { "epoch": 1.62, "grad_norm": 0.5550916194915771, "learning_rate": 0.00026389895670800534, "loss": 2.9359, "step": 32958 }, { "epoch": 1.62, "grad_norm": 0.6296790242195129, "learning_rate": 0.0002638836724473757, "loss": 3.0151, "step": 32959 }, { "epoch": 1.62, "grad_norm": 0.5951831936836243, "learning_rate": 0.00026386838828186966, "loss": 2.8441, "step": 32960 }, { "epoch": 1.62, "grad_norm": 0.6196625828742981, "learning_rate": 0.00026385310421152725, "loss": 2.8942, "step": 32961 }, { "epoch": 1.62, "grad_norm": 0.6182433366775513, "learning_rate": 0.00026383782023638874, "loss": 2.9938, "step": 32962 }, { "epoch": 1.62, "grad_norm": 0.6165900230407715, "learning_rate": 0.0002638225363564946, "loss": 2.8868, "step": 32963 }, { "epoch": 1.62, "grad_norm": 0.6178959012031555, "learning_rate": 0.0002638072525718848, "loss": 2.8964, "step": 32964 }, { "epoch": 1.62, "grad_norm": 0.6362462639808655, "learning_rate": 0.00026379196888259984, "loss": 2.8124, "step": 32965 }, { "epoch": 1.62, "grad_norm": 0.6361078023910522, "learning_rate": 0.00026377668528867977, "loss": 3.0088, "step": 32966 }, { "epoch": 1.62, "grad_norm": 0.7130830883979797, "learning_rate": 0.00026376140179016494, "loss": 3.1082, "step": 32967 }, { "epoch": 1.62, "grad_norm": 0.6127350926399231, "learning_rate": 0.0002637461183870957, "loss": 3.1208, "step": 32968 }, { "epoch": 1.62, "grad_norm": 0.5895925164222717, "learning_rate": 0.0002637308350795121, "loss": 2.8528, "step": 32969 }, { "epoch": 1.62, "grad_norm": 0.6005771160125732, "learning_rate": 0.00026371555186745465, "loss": 3.0934, "step": 32970 }, { "epoch": 1.62, "grad_norm": 0.6108828783035278, "learning_rate": 0.0002637002687509633, "loss": 2.9517, "step": 32971 }, { "epoch": 1.62, "grad_norm": 0.5996699333190918, "learning_rate": 0.0002636849857300785, "loss": 2.9753, "step": 32972 }, { "epoch": 1.62, "grad_norm": 0.5930362939834595, "learning_rate": 0.0002636697028048405, "loss": 2.9364, "step": 32973 }, { "epoch": 1.62, "grad_norm": 0.6767230033874512, "learning_rate": 0.0002636544199752894, "loss": 3.1138, "step": 32974 }, { "epoch": 1.62, "grad_norm": 0.6063745021820068, "learning_rate": 0.0002636391372414657, "loss": 3.1365, "step": 32975 }, { "epoch": 1.62, "grad_norm": 0.6611247658729553, "learning_rate": 0.00026362385460340946, "loss": 3.0819, "step": 32976 }, { "epoch": 1.62, "grad_norm": 0.6121769547462463, "learning_rate": 0.00026360857206116095, "loss": 2.8333, "step": 32977 }, { "epoch": 1.62, "grad_norm": 0.6047820448875427, "learning_rate": 0.00026359328961476057, "loss": 3.1222, "step": 32978 }, { "epoch": 1.62, "grad_norm": 0.6275504231452942, "learning_rate": 0.00026357800726424833, "loss": 3.3318, "step": 32979 }, { "epoch": 1.62, "grad_norm": 0.6080473065376282, "learning_rate": 0.0002635627250096647, "loss": 2.9658, "step": 32980 }, { "epoch": 1.62, "grad_norm": 0.6527457237243652, "learning_rate": 0.00026354744285104977, "loss": 3.0866, "step": 32981 }, { "epoch": 1.62, "grad_norm": 0.6376957297325134, "learning_rate": 0.0002635321607884439, "loss": 3.1455, "step": 32982 }, { "epoch": 1.62, "grad_norm": 0.6095510125160217, "learning_rate": 0.0002635168788218874, "loss": 2.9285, "step": 32983 }, { "epoch": 1.62, "grad_norm": 0.6063658595085144, "learning_rate": 0.0002635015969514202, "loss": 3.2578, "step": 32984 }, { "epoch": 1.62, "grad_norm": 0.5850301384925842, "learning_rate": 0.00026348631517708295, "loss": 3.193, "step": 32985 }, { "epoch": 1.62, "grad_norm": 0.6624311804771423, "learning_rate": 0.00026347103349891565, "loss": 3.1238, "step": 32986 }, { "epoch": 1.62, "grad_norm": 0.5807081460952759, "learning_rate": 0.00026345575191695855, "loss": 3.0628, "step": 32987 }, { "epoch": 1.62, "grad_norm": 0.5957835912704468, "learning_rate": 0.0002634404704312521, "loss": 2.947, "step": 32988 }, { "epoch": 1.62, "grad_norm": 0.6118351817131042, "learning_rate": 0.00026342518904183624, "loss": 3.0652, "step": 32989 }, { "epoch": 1.62, "grad_norm": 0.5782414674758911, "learning_rate": 0.0002634099077487516, "loss": 2.9368, "step": 32990 }, { "epoch": 1.62, "grad_norm": 0.6376646161079407, "learning_rate": 0.0002633946265520381, "loss": 2.9095, "step": 32991 }, { "epoch": 1.62, "grad_norm": 0.7035831809043884, "learning_rate": 0.00026337934545173606, "loss": 2.9032, "step": 32992 }, { "epoch": 1.62, "grad_norm": 0.6374669075012207, "learning_rate": 0.0002633640644478859, "loss": 3.1269, "step": 32993 }, { "epoch": 1.62, "grad_norm": 0.6482071280479431, "learning_rate": 0.0002633487835405277, "loss": 2.9167, "step": 32994 }, { "epoch": 1.62, "grad_norm": 0.5882799029350281, "learning_rate": 0.0002633335027297018, "loss": 2.8802, "step": 32995 }, { "epoch": 1.62, "grad_norm": 0.6047265529632568, "learning_rate": 0.00026331822201544825, "loss": 3.0355, "step": 32996 }, { "epoch": 1.62, "grad_norm": 0.5921814441680908, "learning_rate": 0.00026330294139780756, "loss": 2.9725, "step": 32997 }, { "epoch": 1.62, "grad_norm": 0.6499700546264648, "learning_rate": 0.00026328766087681986, "loss": 2.9638, "step": 32998 }, { "epoch": 1.62, "grad_norm": 0.6229745149612427, "learning_rate": 0.00026327238045252533, "loss": 3.066, "step": 32999 }, { "epoch": 1.62, "grad_norm": 0.6632835268974304, "learning_rate": 0.00026325710012496434, "loss": 2.9409, "step": 33000 }, { "epoch": 1.62, "grad_norm": 0.5706971287727356, "learning_rate": 0.00026324181989417707, "loss": 2.9932, "step": 33001 }, { "epoch": 1.62, "grad_norm": 0.6048433184623718, "learning_rate": 0.0002632265397602037, "loss": 2.9595, "step": 33002 }, { "epoch": 1.62, "grad_norm": 0.6280288100242615, "learning_rate": 0.0002632112597230847, "loss": 3.1984, "step": 33003 }, { "epoch": 1.62, "grad_norm": 0.6192353963851929, "learning_rate": 0.00026319597978286, "loss": 2.9718, "step": 33004 }, { "epoch": 1.62, "grad_norm": 0.6082794070243835, "learning_rate": 0.0002631806999395702, "loss": 3.0911, "step": 33005 }, { "epoch": 1.62, "grad_norm": 0.6182049512863159, "learning_rate": 0.00026316542019325513, "loss": 3.064, "step": 33006 }, { "epoch": 1.62, "grad_norm": 0.6134260296821594, "learning_rate": 0.0002631501405439553, "loss": 3.0449, "step": 33007 }, { "epoch": 1.62, "grad_norm": 0.6304604411125183, "learning_rate": 0.0002631348609917111, "loss": 2.9606, "step": 33008 }, { "epoch": 1.62, "grad_norm": 0.5884554386138916, "learning_rate": 0.00026311958153656237, "loss": 3.0864, "step": 33009 }, { "epoch": 1.62, "grad_norm": 0.5836405158042908, "learning_rate": 0.00026310430217854966, "loss": 3.4003, "step": 33010 }, { "epoch": 1.62, "grad_norm": 0.6103641986846924, "learning_rate": 0.000263089022917713, "loss": 3.1367, "step": 33011 }, { "epoch": 1.62, "grad_norm": 0.6257080435752869, "learning_rate": 0.0002630737437540928, "loss": 3.2796, "step": 33012 }, { "epoch": 1.62, "grad_norm": 0.6084847450256348, "learning_rate": 0.0002630584646877294, "loss": 3.099, "step": 33013 }, { "epoch": 1.62, "grad_norm": 0.6533360481262207, "learning_rate": 0.0002630431857186627, "loss": 3.201, "step": 33014 }, { "epoch": 1.62, "grad_norm": 0.65522700548172, "learning_rate": 0.0002630279068469333, "loss": 2.9477, "step": 33015 }, { "epoch": 1.62, "grad_norm": 0.6127893328666687, "learning_rate": 0.0002630126280725812, "loss": 2.9227, "step": 33016 }, { "epoch": 1.62, "grad_norm": 0.6066474318504333, "learning_rate": 0.00026299734939564664, "loss": 2.9635, "step": 33017 }, { "epoch": 1.62, "grad_norm": 0.6110500693321228, "learning_rate": 0.0002629820708161701, "loss": 3.1665, "step": 33018 }, { "epoch": 1.62, "grad_norm": 0.6653189063072205, "learning_rate": 0.00026296679233419153, "loss": 3.1245, "step": 33019 }, { "epoch": 1.62, "grad_norm": 0.5800127983093262, "learning_rate": 0.00026295151394975145, "loss": 2.8873, "step": 33020 }, { "epoch": 1.62, "grad_norm": 0.6234315037727356, "learning_rate": 0.0002629362356628898, "loss": 2.9345, "step": 33021 }, { "epoch": 1.62, "grad_norm": 0.6198316216468811, "learning_rate": 0.000262920957473647, "loss": 2.8847, "step": 33022 }, { "epoch": 1.62, "grad_norm": 0.6274871230125427, "learning_rate": 0.0002629056793820634, "loss": 3.0915, "step": 33023 }, { "epoch": 1.62, "grad_norm": 0.6605402231216431, "learning_rate": 0.0002628904013881789, "loss": 2.9215, "step": 33024 }, { "epoch": 1.62, "grad_norm": 0.634557843208313, "learning_rate": 0.00026287512349203407, "loss": 2.955, "step": 33025 }, { "epoch": 1.62, "grad_norm": 0.6039037108421326, "learning_rate": 0.000262859845693669, "loss": 3.1472, "step": 33026 }, { "epoch": 1.62, "grad_norm": 0.6004440784454346, "learning_rate": 0.000262844567993124, "loss": 3.1694, "step": 33027 }, { "epoch": 1.62, "grad_norm": 0.5967251658439636, "learning_rate": 0.0002628292903904391, "loss": 2.922, "step": 33028 }, { "epoch": 1.62, "grad_norm": 0.5802043676376343, "learning_rate": 0.0002628140128856547, "loss": 3.0168, "step": 33029 }, { "epoch": 1.62, "grad_norm": 0.5677957534790039, "learning_rate": 0.00026279873547881117, "loss": 2.8979, "step": 33030 }, { "epoch": 1.62, "grad_norm": 0.6296228170394897, "learning_rate": 0.0002627834581699486, "loss": 2.9727, "step": 33031 }, { "epoch": 1.62, "grad_norm": 0.6011860966682434, "learning_rate": 0.0002627681809591072, "loss": 3.1614, "step": 33032 }, { "epoch": 1.62, "grad_norm": 0.6390479803085327, "learning_rate": 0.0002627529038463272, "loss": 3.0619, "step": 33033 }, { "epoch": 1.62, "grad_norm": 0.7037544846534729, "learning_rate": 0.0002627376268316489, "loss": 3.0301, "step": 33034 }, { "epoch": 1.62, "grad_norm": 0.6357463598251343, "learning_rate": 0.00026272234991511263, "loss": 3.0546, "step": 33035 }, { "epoch": 1.62, "grad_norm": 0.5906470417976379, "learning_rate": 0.0002627070730967584, "loss": 3.1207, "step": 33036 }, { "epoch": 1.62, "grad_norm": 0.6273382306098938, "learning_rate": 0.0002626917963766266, "loss": 3.0675, "step": 33037 }, { "epoch": 1.62, "grad_norm": 0.5891216993331909, "learning_rate": 0.00026267651975475745, "loss": 3.158, "step": 33038 }, { "epoch": 1.62, "grad_norm": 0.6417419910430908, "learning_rate": 0.00026266124323119107, "loss": 3.1118, "step": 33039 }, { "epoch": 1.62, "grad_norm": 0.5798816680908203, "learning_rate": 0.0002626459668059679, "loss": 3.1448, "step": 33040 }, { "epoch": 1.62, "grad_norm": 0.6085364818572998, "learning_rate": 0.000262630690479128, "loss": 3.1527, "step": 33041 }, { "epoch": 1.62, "grad_norm": 0.6986152529716492, "learning_rate": 0.00026261541425071176, "loss": 2.9744, "step": 33042 }, { "epoch": 1.62, "grad_norm": 0.5539711713790894, "learning_rate": 0.0002626001381207592, "loss": 3.1058, "step": 33043 }, { "epoch": 1.62, "grad_norm": 0.6884754300117493, "learning_rate": 0.00026258486208931075, "loss": 3.0357, "step": 33044 }, { "epoch": 1.62, "grad_norm": 0.6211069226264954, "learning_rate": 0.0002625695861564066, "loss": 3.2542, "step": 33045 }, { "epoch": 1.62, "grad_norm": 0.6055322885513306, "learning_rate": 0.00026255431032208684, "loss": 3.275, "step": 33046 }, { "epoch": 1.62, "grad_norm": 0.581346333026886, "learning_rate": 0.00026253903458639197, "loss": 2.9475, "step": 33047 }, { "epoch": 1.62, "grad_norm": 0.5834121704101562, "learning_rate": 0.00026252375894936193, "loss": 3.3297, "step": 33048 }, { "epoch": 1.62, "grad_norm": 0.610347330570221, "learning_rate": 0.0002625084834110371, "loss": 2.9988, "step": 33049 }, { "epoch": 1.62, "grad_norm": 0.6127068400382996, "learning_rate": 0.00026249320797145785, "loss": 2.843, "step": 33050 }, { "epoch": 1.62, "grad_norm": 0.5858662128448486, "learning_rate": 0.0002624779326306641, "loss": 3.0937, "step": 33051 }, { "epoch": 1.62, "grad_norm": 0.5876694917678833, "learning_rate": 0.00026246265738869647, "loss": 2.9783, "step": 33052 }, { "epoch": 1.62, "grad_norm": 0.622711718082428, "learning_rate": 0.0002624473822455948, "loss": 2.9802, "step": 33053 }, { "epoch": 1.62, "grad_norm": 0.5976312160491943, "learning_rate": 0.0002624321072013995, "loss": 3.0595, "step": 33054 }, { "epoch": 1.62, "grad_norm": 0.5904166102409363, "learning_rate": 0.0002624168322561509, "loss": 2.9977, "step": 33055 }, { "epoch": 1.62, "grad_norm": 0.7579708099365234, "learning_rate": 0.0002624015574098891, "loss": 2.9153, "step": 33056 }, { "epoch": 1.62, "grad_norm": 0.6306504011154175, "learning_rate": 0.0002623862826626544, "loss": 3.1642, "step": 33057 }, { "epoch": 1.62, "grad_norm": 0.6161608695983887, "learning_rate": 0.0002623710080144868, "loss": 3.1045, "step": 33058 }, { "epoch": 1.62, "grad_norm": 0.6324542760848999, "learning_rate": 0.0002623557334654269, "loss": 3.1015, "step": 33059 }, { "epoch": 1.62, "grad_norm": 0.6091515421867371, "learning_rate": 0.00026234045901551474, "loss": 3.0746, "step": 33060 }, { "epoch": 1.62, "grad_norm": 0.6115736961364746, "learning_rate": 0.0002623251846647905, "loss": 3.0863, "step": 33061 }, { "epoch": 1.62, "grad_norm": 0.6223326325416565, "learning_rate": 0.00026230991041329457, "loss": 2.998, "step": 33062 }, { "epoch": 1.62, "grad_norm": 0.5899375677108765, "learning_rate": 0.000262294636261067, "loss": 3.0513, "step": 33063 }, { "epoch": 1.62, "grad_norm": 0.577978789806366, "learning_rate": 0.00026227936220814805, "loss": 3.0171, "step": 33064 }, { "epoch": 1.62, "grad_norm": 0.5958923697471619, "learning_rate": 0.00026226408825457813, "loss": 2.9265, "step": 33065 }, { "epoch": 1.62, "grad_norm": 0.6276746988296509, "learning_rate": 0.0002622488144003973, "loss": 2.8662, "step": 33066 }, { "epoch": 1.62, "grad_norm": 0.5911863446235657, "learning_rate": 0.0002622335406456459, "loss": 3.0632, "step": 33067 }, { "epoch": 1.62, "grad_norm": 0.6043195724487305, "learning_rate": 0.0002622182669903639, "loss": 2.8693, "step": 33068 }, { "epoch": 1.62, "grad_norm": 0.6209897994995117, "learning_rate": 0.0002622029934345917, "loss": 2.8144, "step": 33069 }, { "epoch": 1.62, "grad_norm": 0.5737490653991699, "learning_rate": 0.0002621877199783697, "loss": 3.1074, "step": 33070 }, { "epoch": 1.62, "grad_norm": 0.5962395668029785, "learning_rate": 0.00026217244662173793, "loss": 2.9208, "step": 33071 }, { "epoch": 1.62, "grad_norm": 0.6508395671844482, "learning_rate": 0.00026215717336473674, "loss": 2.7527, "step": 33072 }, { "epoch": 1.62, "grad_norm": 0.6254932284355164, "learning_rate": 0.0002621419002074061, "loss": 2.7828, "step": 33073 }, { "epoch": 1.62, "grad_norm": 0.6272798180580139, "learning_rate": 0.0002621266271497865, "loss": 3.0308, "step": 33074 }, { "epoch": 1.62, "grad_norm": 0.5974328517913818, "learning_rate": 0.0002621113541919181, "loss": 2.8549, "step": 33075 }, { "epoch": 1.62, "grad_norm": 0.6224279403686523, "learning_rate": 0.000262096081333841, "loss": 2.9043, "step": 33076 }, { "epoch": 1.62, "grad_norm": 0.6068066358566284, "learning_rate": 0.0002620808085755956, "loss": 3.1066, "step": 33077 }, { "epoch": 1.62, "grad_norm": 0.6328063011169434, "learning_rate": 0.0002620655359172221, "loss": 3.0899, "step": 33078 }, { "epoch": 1.62, "grad_norm": 0.6107156276702881, "learning_rate": 0.00026205026335876047, "loss": 3.0569, "step": 33079 }, { "epoch": 1.62, "grad_norm": 0.6129263639450073, "learning_rate": 0.00026203499090025136, "loss": 3.0112, "step": 33080 }, { "epoch": 1.62, "grad_norm": 0.6082353591918945, "learning_rate": 0.00026201971854173474, "loss": 3.0659, "step": 33081 }, { "epoch": 1.62, "grad_norm": 0.6048126816749573, "learning_rate": 0.0002620044462832509, "loss": 2.9859, "step": 33082 }, { "epoch": 1.62, "grad_norm": 0.5699526071548462, "learning_rate": 0.0002619891741248399, "loss": 3.1299, "step": 33083 }, { "epoch": 1.62, "grad_norm": 0.5666524767875671, "learning_rate": 0.0002619739020665422, "loss": 3.215, "step": 33084 }, { "epoch": 1.62, "grad_norm": 0.5848353505134583, "learning_rate": 0.00026195863010839793, "loss": 3.0975, "step": 33085 }, { "epoch": 1.62, "grad_norm": 0.5911352634429932, "learning_rate": 0.0002619433582504472, "loss": 3.1146, "step": 33086 }, { "epoch": 1.62, "grad_norm": 0.6226699352264404, "learning_rate": 0.0002619280864927305, "loss": 2.8967, "step": 33087 }, { "epoch": 1.62, "grad_norm": 0.5983354449272156, "learning_rate": 0.0002619128148352878, "loss": 3.0173, "step": 33088 }, { "epoch": 1.62, "grad_norm": 0.643765389919281, "learning_rate": 0.00026189754327815936, "loss": 3.1155, "step": 33089 }, { "epoch": 1.62, "grad_norm": 0.5957856774330139, "learning_rate": 0.0002618822718213856, "loss": 2.9854, "step": 33090 }, { "epoch": 1.62, "grad_norm": 0.565482497215271, "learning_rate": 0.00026186700046500644, "loss": 2.9022, "step": 33091 }, { "epoch": 1.62, "grad_norm": 0.5913927555084229, "learning_rate": 0.0002618517292090624, "loss": 3.1603, "step": 33092 }, { "epoch": 1.62, "grad_norm": 0.6006325483322144, "learning_rate": 0.00026183645805359353, "loss": 3.2807, "step": 33093 }, { "epoch": 1.62, "grad_norm": 0.6765154004096985, "learning_rate": 0.00026182118699863996, "loss": 3.0531, "step": 33094 }, { "epoch": 1.62, "grad_norm": 0.6251957416534424, "learning_rate": 0.0002618059160442422, "loss": 2.9957, "step": 33095 }, { "epoch": 1.62, "grad_norm": 0.6182376742362976, "learning_rate": 0.00026179064519044027, "loss": 3.0766, "step": 33096 }, { "epoch": 1.62, "grad_norm": 0.5697817206382751, "learning_rate": 0.00026177537443727443, "loss": 2.8416, "step": 33097 }, { "epoch": 1.62, "grad_norm": 0.6249179244041443, "learning_rate": 0.00026176010378478484, "loss": 3.007, "step": 33098 }, { "epoch": 1.62, "grad_norm": 0.6290808320045471, "learning_rate": 0.0002617448332330118, "loss": 3.235, "step": 33099 }, { "epoch": 1.62, "grad_norm": 0.6982139348983765, "learning_rate": 0.0002617295627819956, "loss": 3.0333, "step": 33100 }, { "epoch": 1.62, "grad_norm": 0.6556278467178345, "learning_rate": 0.00026171429243177614, "loss": 3.0093, "step": 33101 }, { "epoch": 1.62, "grad_norm": 0.6192794442176819, "learning_rate": 0.0002616990221823941, "loss": 3.0252, "step": 33102 }, { "epoch": 1.62, "grad_norm": 0.6167252659797668, "learning_rate": 0.0002616837520338894, "loss": 2.9509, "step": 33103 }, { "epoch": 1.62, "grad_norm": 0.651763379573822, "learning_rate": 0.0002616684819863023, "loss": 3.0517, "step": 33104 }, { "epoch": 1.62, "grad_norm": 0.6661128997802734, "learning_rate": 0.00026165321203967297, "loss": 2.9474, "step": 33105 }, { "epoch": 1.62, "grad_norm": 0.6066205501556396, "learning_rate": 0.00026163794219404176, "loss": 3.1624, "step": 33106 }, { "epoch": 1.62, "grad_norm": 0.6290202736854553, "learning_rate": 0.0002616226724494489, "loss": 2.8767, "step": 33107 }, { "epoch": 1.62, "grad_norm": 0.6024856567382812, "learning_rate": 0.0002616074028059344, "loss": 2.9091, "step": 33108 }, { "epoch": 1.62, "grad_norm": 0.5915390253067017, "learning_rate": 0.00026159213326353877, "loss": 2.9278, "step": 33109 }, { "epoch": 1.62, "grad_norm": 0.6625373363494873, "learning_rate": 0.00026157686382230195, "loss": 3.3122, "step": 33110 }, { "epoch": 1.62, "grad_norm": 0.7008400559425354, "learning_rate": 0.00026156159448226425, "loss": 3.002, "step": 33111 }, { "epoch": 1.62, "grad_norm": 0.7132464051246643, "learning_rate": 0.00026154632524346604, "loss": 3.1318, "step": 33112 }, { "epoch": 1.62, "grad_norm": 0.6390926837921143, "learning_rate": 0.00026153105610594727, "loss": 2.9645, "step": 33113 }, { "epoch": 1.62, "grad_norm": 0.6167672872543335, "learning_rate": 0.0002615157870697484, "loss": 2.9288, "step": 33114 }, { "epoch": 1.62, "grad_norm": 0.5881397724151611, "learning_rate": 0.0002615005181349095, "loss": 2.8726, "step": 33115 }, { "epoch": 1.62, "grad_norm": 0.6101930737495422, "learning_rate": 0.00026148524930147074, "loss": 3.1134, "step": 33116 }, { "epoch": 1.62, "grad_norm": 0.602695643901825, "learning_rate": 0.0002614699805694726, "loss": 2.9318, "step": 33117 }, { "epoch": 1.62, "grad_norm": 0.6428423523902893, "learning_rate": 0.000261454711938955, "loss": 3.018, "step": 33118 }, { "epoch": 1.62, "grad_norm": 0.62602299451828, "learning_rate": 0.00026143944340995836, "loss": 2.8108, "step": 33119 }, { "epoch": 1.62, "grad_norm": 0.6125110387802124, "learning_rate": 0.0002614241749825227, "loss": 3.1053, "step": 33120 }, { "epoch": 1.62, "grad_norm": 0.5930248498916626, "learning_rate": 0.00026140890665668834, "loss": 2.7055, "step": 33121 }, { "epoch": 1.62, "grad_norm": 0.6099435091018677, "learning_rate": 0.0002613936384324956, "loss": 3.0313, "step": 33122 }, { "epoch": 1.62, "grad_norm": 0.599867045879364, "learning_rate": 0.00026137837030998444, "loss": 3.0244, "step": 33123 }, { "epoch": 1.62, "grad_norm": 0.6972360014915466, "learning_rate": 0.00026136310228919537, "loss": 3.1866, "step": 33124 }, { "epoch": 1.62, "grad_norm": 0.6721414923667908, "learning_rate": 0.0002613478343701684, "loss": 3.0925, "step": 33125 }, { "epoch": 1.62, "grad_norm": 0.6666560769081116, "learning_rate": 0.0002613325665529437, "loss": 3.2204, "step": 33126 }, { "epoch": 1.62, "grad_norm": 0.577688992023468, "learning_rate": 0.0002613172988375617, "loss": 3.1363, "step": 33127 }, { "epoch": 1.62, "grad_norm": 0.60993492603302, "learning_rate": 0.00026130203122406244, "loss": 3.2141, "step": 33128 }, { "epoch": 1.62, "grad_norm": 0.601895809173584, "learning_rate": 0.0002612867637124862, "loss": 2.8998, "step": 33129 }, { "epoch": 1.62, "grad_norm": 0.5851340889930725, "learning_rate": 0.00026127149630287313, "loss": 3.0916, "step": 33130 }, { "epoch": 1.62, "grad_norm": 0.5805398225784302, "learning_rate": 0.0002612562289952634, "loss": 2.9505, "step": 33131 }, { "epoch": 1.62, "grad_norm": 0.6383261680603027, "learning_rate": 0.0002612409617896975, "loss": 2.8642, "step": 33132 }, { "epoch": 1.62, "grad_norm": 0.6343017816543579, "learning_rate": 0.00026122569468621537, "loss": 3.29, "step": 33133 }, { "epoch": 1.62, "grad_norm": 0.6100292801856995, "learning_rate": 0.00026121042768485737, "loss": 2.9963, "step": 33134 }, { "epoch": 1.62, "grad_norm": 0.5947556495666504, "learning_rate": 0.0002611951607856635, "loss": 2.851, "step": 33135 }, { "epoch": 1.62, "grad_norm": 0.6661785244941711, "learning_rate": 0.00026117989398867416, "loss": 3.2415, "step": 33136 }, { "epoch": 1.62, "grad_norm": 0.6562435030937195, "learning_rate": 0.00026116462729392956, "loss": 3.1879, "step": 33137 }, { "epoch": 1.62, "grad_norm": 0.6270297765731812, "learning_rate": 0.0002611493607014698, "loss": 3.1056, "step": 33138 }, { "epoch": 1.62, "grad_norm": 0.6291563510894775, "learning_rate": 0.00026113409421133524, "loss": 3.0792, "step": 33139 }, { "epoch": 1.62, "grad_norm": 0.6328475475311279, "learning_rate": 0.0002611188278235659, "loss": 2.9278, "step": 33140 }, { "epoch": 1.62, "grad_norm": 0.5976963639259338, "learning_rate": 0.000261103561538202, "loss": 3.1471, "step": 33141 }, { "epoch": 1.62, "grad_norm": 0.5877504348754883, "learning_rate": 0.000261088295355284, "loss": 2.8909, "step": 33142 }, { "epoch": 1.62, "grad_norm": 0.6378092169761658, "learning_rate": 0.0002610730292748519, "loss": 2.7738, "step": 33143 }, { "epoch": 1.62, "grad_norm": 0.686267614364624, "learning_rate": 0.00026105776329694597, "loss": 3.0721, "step": 33144 }, { "epoch": 1.62, "grad_norm": 0.6485110521316528, "learning_rate": 0.0002610424974216063, "loss": 3.1614, "step": 33145 }, { "epoch": 1.62, "grad_norm": 0.6028688549995422, "learning_rate": 0.00026102723164887325, "loss": 3.1251, "step": 33146 }, { "epoch": 1.62, "grad_norm": 0.5872344374656677, "learning_rate": 0.00026101196597878704, "loss": 3.2345, "step": 33147 }, { "epoch": 1.62, "grad_norm": 0.661635160446167, "learning_rate": 0.0002609967004113877, "loss": 3.0462, "step": 33148 }, { "epoch": 1.62, "grad_norm": 0.6713125705718994, "learning_rate": 0.0002609814349467156, "loss": 2.9479, "step": 33149 }, { "epoch": 1.62, "grad_norm": 0.5891671776771545, "learning_rate": 0.00026096616958481094, "loss": 3.0252, "step": 33150 }, { "epoch": 1.62, "grad_norm": 0.6562174558639526, "learning_rate": 0.0002609509043257137, "loss": 3.0676, "step": 33151 }, { "epoch": 1.62, "grad_norm": 0.6034083366394043, "learning_rate": 0.00026093563916946443, "loss": 3.2758, "step": 33152 }, { "epoch": 1.62, "grad_norm": 0.5874406099319458, "learning_rate": 0.00026092037411610305, "loss": 2.7679, "step": 33153 }, { "epoch": 1.62, "grad_norm": 0.583278238773346, "learning_rate": 0.00026090510916567, "loss": 3.2504, "step": 33154 }, { "epoch": 1.62, "grad_norm": 0.6118584871292114, "learning_rate": 0.00026088984431820535, "loss": 3.032, "step": 33155 }, { "epoch": 1.62, "grad_norm": 0.6118886470794678, "learning_rate": 0.0002608745795737492, "loss": 2.9379, "step": 33156 }, { "epoch": 1.62, "grad_norm": 0.5924785137176514, "learning_rate": 0.00026085931493234204, "loss": 3.0218, "step": 33157 }, { "epoch": 1.62, "grad_norm": 0.6517102718353271, "learning_rate": 0.00026084405039402385, "loss": 2.8742, "step": 33158 }, { "epoch": 1.63, "grad_norm": 0.6144333481788635, "learning_rate": 0.0002608287859588349, "loss": 2.952, "step": 33159 }, { "epoch": 1.63, "grad_norm": 0.5906076431274414, "learning_rate": 0.00026081352162681535, "loss": 3.0309, "step": 33160 }, { "epoch": 1.63, "grad_norm": 0.6224218606948853, "learning_rate": 0.00026079825739800544, "loss": 3.0958, "step": 33161 }, { "epoch": 1.63, "grad_norm": 0.6227795481681824, "learning_rate": 0.00026078299327244545, "loss": 2.8246, "step": 33162 }, { "epoch": 1.63, "grad_norm": 0.5719353556632996, "learning_rate": 0.0002607677292501754, "loss": 2.9469, "step": 33163 }, { "epoch": 1.63, "grad_norm": 0.6005598902702332, "learning_rate": 0.00026075246533123573, "loss": 3.0891, "step": 33164 }, { "epoch": 1.63, "grad_norm": 0.6221149563789368, "learning_rate": 0.00026073720151566644, "loss": 2.8739, "step": 33165 }, { "epoch": 1.63, "grad_norm": 0.6196457147598267, "learning_rate": 0.00026072193780350776, "loss": 2.9828, "step": 33166 }, { "epoch": 1.63, "grad_norm": 0.6265254616737366, "learning_rate": 0.0002607066741948, "loss": 2.8732, "step": 33167 }, { "epoch": 1.63, "grad_norm": 0.653234601020813, "learning_rate": 0.0002606914106895833, "loss": 2.9117, "step": 33168 }, { "epoch": 1.63, "grad_norm": 0.6232619285583496, "learning_rate": 0.0002606761472878979, "loss": 3.2086, "step": 33169 }, { "epoch": 1.63, "grad_norm": 0.5829308032989502, "learning_rate": 0.00026066088398978384, "loss": 3.2108, "step": 33170 }, { "epoch": 1.63, "grad_norm": 0.5806586742401123, "learning_rate": 0.00026064562079528144, "loss": 3.133, "step": 33171 }, { "epoch": 1.63, "grad_norm": 0.5950442552566528, "learning_rate": 0.0002606303577044311, "loss": 3.0696, "step": 33172 }, { "epoch": 1.63, "grad_norm": 0.6098839640617371, "learning_rate": 0.0002606150947172726, "loss": 3.2242, "step": 33173 }, { "epoch": 1.63, "grad_norm": 0.6669129729270935, "learning_rate": 0.0002605998318338465, "loss": 2.9275, "step": 33174 }, { "epoch": 1.63, "grad_norm": 0.5990313291549683, "learning_rate": 0.0002605845690541928, "loss": 3.1519, "step": 33175 }, { "epoch": 1.63, "grad_norm": 0.614076554775238, "learning_rate": 0.0002605693063783517, "loss": 2.8938, "step": 33176 }, { "epoch": 1.63, "grad_norm": 0.6242024302482605, "learning_rate": 0.0002605540438063636, "loss": 3.0608, "step": 33177 }, { "epoch": 1.63, "grad_norm": 0.5861707329750061, "learning_rate": 0.00026053878133826844, "loss": 3.0707, "step": 33178 }, { "epoch": 1.63, "grad_norm": 0.6213533282279968, "learning_rate": 0.00026052351897410666, "loss": 2.9031, "step": 33179 }, { "epoch": 1.63, "grad_norm": 0.560062825679779, "learning_rate": 0.0002605082567139182, "loss": 3.173, "step": 33180 }, { "epoch": 1.63, "grad_norm": 0.6374415159225464, "learning_rate": 0.00026049299455774353, "loss": 2.9135, "step": 33181 }, { "epoch": 1.63, "grad_norm": 0.6181164979934692, "learning_rate": 0.0002604777325056225, "loss": 3.0568, "step": 33182 }, { "epoch": 1.63, "grad_norm": 0.6000651121139526, "learning_rate": 0.0002604624705575957, "loss": 3.0938, "step": 33183 }, { "epoch": 1.63, "grad_norm": 0.6053292155265808, "learning_rate": 0.0002604472087137031, "loss": 3.0198, "step": 33184 }, { "epoch": 1.63, "grad_norm": 0.6192270517349243, "learning_rate": 0.0002604319469739849, "loss": 2.9611, "step": 33185 }, { "epoch": 1.63, "grad_norm": 0.599314272403717, "learning_rate": 0.00026041668533848135, "loss": 3.0997, "step": 33186 }, { "epoch": 1.63, "grad_norm": 0.613339364528656, "learning_rate": 0.00026040142380723264, "loss": 3.1241, "step": 33187 }, { "epoch": 1.63, "grad_norm": 0.6284672021865845, "learning_rate": 0.0002603861623802789, "loss": 2.9876, "step": 33188 }, { "epoch": 1.63, "grad_norm": 0.6124971508979797, "learning_rate": 0.0002603709010576605, "loss": 2.9233, "step": 33189 }, { "epoch": 1.63, "grad_norm": 0.6334354281425476, "learning_rate": 0.0002603556398394175, "loss": 3.187, "step": 33190 }, { "epoch": 1.63, "grad_norm": 0.5786687135696411, "learning_rate": 0.0002603403787255901, "loss": 2.8804, "step": 33191 }, { "epoch": 1.63, "grad_norm": 0.6184731721878052, "learning_rate": 0.00026032511771621844, "loss": 2.9722, "step": 33192 }, { "epoch": 1.63, "grad_norm": 0.591331422328949, "learning_rate": 0.00026030985681134276, "loss": 2.9679, "step": 33193 }, { "epoch": 1.63, "grad_norm": 0.6321749091148376, "learning_rate": 0.0002602945960110034, "loss": 3.0021, "step": 33194 }, { "epoch": 1.63, "grad_norm": 0.6103703379631042, "learning_rate": 0.0002602793353152404, "loss": 3.1078, "step": 33195 }, { "epoch": 1.63, "grad_norm": 0.5984880924224854, "learning_rate": 0.000260264074724094, "loss": 3.0928, "step": 33196 }, { "epoch": 1.63, "grad_norm": 0.5951915979385376, "learning_rate": 0.0002602488142376043, "loss": 2.8636, "step": 33197 }, { "epoch": 1.63, "grad_norm": 0.656234622001648, "learning_rate": 0.00026023355385581165, "loss": 3.2596, "step": 33198 }, { "epoch": 1.63, "grad_norm": 0.6170872449874878, "learning_rate": 0.0002602182935787562, "loss": 3.0438, "step": 33199 }, { "epoch": 1.63, "grad_norm": 0.6340166926383972, "learning_rate": 0.00026020303340647797, "loss": 3.1691, "step": 33200 }, { "epoch": 1.63, "grad_norm": 0.6261439323425293, "learning_rate": 0.00026018777333901744, "loss": 2.905, "step": 33201 }, { "epoch": 1.63, "grad_norm": 0.5833534598350525, "learning_rate": 0.00026017251337641454, "loss": 3.0735, "step": 33202 }, { "epoch": 1.63, "grad_norm": 0.6300815343856812, "learning_rate": 0.00026015725351870956, "loss": 3.054, "step": 33203 }, { "epoch": 1.63, "grad_norm": 0.6257278919219971, "learning_rate": 0.00026014199376594283, "loss": 3.032, "step": 33204 }, { "epoch": 1.63, "grad_norm": 0.607824444770813, "learning_rate": 0.00026012673411815435, "loss": 2.8231, "step": 33205 }, { "epoch": 1.63, "grad_norm": 0.6163040399551392, "learning_rate": 0.00026011147457538446, "loss": 3.0407, "step": 33206 }, { "epoch": 1.63, "grad_norm": 0.6370983719825745, "learning_rate": 0.0002600962151376731, "loss": 2.9238, "step": 33207 }, { "epoch": 1.63, "grad_norm": 0.5754092335700989, "learning_rate": 0.0002600809558050607, "loss": 2.9017, "step": 33208 }, { "epoch": 1.63, "grad_norm": 0.6198747754096985, "learning_rate": 0.00026006569657758747, "loss": 3.0832, "step": 33209 }, { "epoch": 1.63, "grad_norm": 0.6094009876251221, "learning_rate": 0.00026005043745529335, "loss": 3.0943, "step": 33210 }, { "epoch": 1.63, "grad_norm": 0.6221636533737183, "learning_rate": 0.00026003517843821883, "loss": 2.969, "step": 33211 }, { "epoch": 1.63, "grad_norm": 0.6165490746498108, "learning_rate": 0.0002600199195264039, "loss": 3.0619, "step": 33212 }, { "epoch": 1.63, "grad_norm": 0.6335442066192627, "learning_rate": 0.0002600046607198888, "loss": 3.0032, "step": 33213 }, { "epoch": 1.63, "grad_norm": 0.608069658279419, "learning_rate": 0.00025998940201871375, "loss": 2.9305, "step": 33214 }, { "epoch": 1.63, "grad_norm": 0.5837422609329224, "learning_rate": 0.00025997414342291883, "loss": 3.1584, "step": 33215 }, { "epoch": 1.63, "grad_norm": 0.6601833701133728, "learning_rate": 0.00025995888493254447, "loss": 3.0164, "step": 33216 }, { "epoch": 1.63, "grad_norm": 0.6254077553749084, "learning_rate": 0.0002599436265476306, "loss": 2.9673, "step": 33217 }, { "epoch": 1.63, "grad_norm": 0.6094150543212891, "learning_rate": 0.0002599283682682175, "loss": 3.217, "step": 33218 }, { "epoch": 1.63, "grad_norm": 0.6002028584480286, "learning_rate": 0.00025991311009434544, "loss": 2.9381, "step": 33219 }, { "epoch": 1.63, "grad_norm": 0.6100465655326843, "learning_rate": 0.0002598978520260545, "loss": 3.05, "step": 33220 }, { "epoch": 1.63, "grad_norm": 0.6161542534828186, "learning_rate": 0.0002598825940633849, "loss": 3.0822, "step": 33221 }, { "epoch": 1.63, "grad_norm": 0.6322788000106812, "learning_rate": 0.0002598673362063768, "loss": 3.0368, "step": 33222 }, { "epoch": 1.63, "grad_norm": 0.617686927318573, "learning_rate": 0.0002598520784550704, "loss": 3.1602, "step": 33223 }, { "epoch": 1.63, "grad_norm": 0.6013792753219604, "learning_rate": 0.00025983682080950597, "loss": 3.0759, "step": 33224 }, { "epoch": 1.63, "grad_norm": 0.5862300992012024, "learning_rate": 0.0002598215632697235, "loss": 3.2734, "step": 33225 }, { "epoch": 1.63, "grad_norm": 0.5786439776420593, "learning_rate": 0.00025980630583576347, "loss": 2.8391, "step": 33226 }, { "epoch": 1.63, "grad_norm": 0.6323323249816895, "learning_rate": 0.0002597910485076658, "loss": 3.1474, "step": 33227 }, { "epoch": 1.63, "grad_norm": 0.5810719728469849, "learning_rate": 0.0002597757912854707, "loss": 3.1847, "step": 33228 }, { "epoch": 1.63, "grad_norm": 0.6033740639686584, "learning_rate": 0.00025976053416921856, "loss": 3.0101, "step": 33229 }, { "epoch": 1.63, "grad_norm": 0.6337068676948547, "learning_rate": 0.00025974527715894936, "loss": 2.9007, "step": 33230 }, { "epoch": 1.63, "grad_norm": 0.6159489750862122, "learning_rate": 0.0002597300202547034, "loss": 3.0461, "step": 33231 }, { "epoch": 1.63, "grad_norm": 0.5909284949302673, "learning_rate": 0.0002597147634565207, "loss": 3.0588, "step": 33232 }, { "epoch": 1.63, "grad_norm": 0.5779708623886108, "learning_rate": 0.0002596995067644416, "loss": 3.0699, "step": 33233 }, { "epoch": 1.63, "grad_norm": 0.618440568447113, "learning_rate": 0.0002596842501785063, "loss": 3.1235, "step": 33234 }, { "epoch": 1.63, "grad_norm": 0.6726293563842773, "learning_rate": 0.0002596689936987549, "loss": 2.8195, "step": 33235 }, { "epoch": 1.63, "grad_norm": 0.6066452264785767, "learning_rate": 0.0002596537373252277, "loss": 3.019, "step": 33236 }, { "epoch": 1.63, "grad_norm": 0.6022939085960388, "learning_rate": 0.00025963848105796465, "loss": 2.9119, "step": 33237 }, { "epoch": 1.63, "grad_norm": 0.590732991695404, "learning_rate": 0.00025962322489700613, "loss": 2.8292, "step": 33238 }, { "epoch": 1.63, "grad_norm": 0.5825474262237549, "learning_rate": 0.0002596079688423923, "loss": 2.9209, "step": 33239 }, { "epoch": 1.63, "grad_norm": 0.6330576539039612, "learning_rate": 0.0002595927128941632, "loss": 2.9113, "step": 33240 }, { "epoch": 1.63, "grad_norm": 0.5879871845245361, "learning_rate": 0.00025957745705235927, "loss": 2.9726, "step": 33241 }, { "epoch": 1.63, "grad_norm": 0.6738741993904114, "learning_rate": 0.0002595622013170204, "loss": 2.8329, "step": 33242 }, { "epoch": 1.63, "grad_norm": 0.6530206203460693, "learning_rate": 0.0002595469456881869, "loss": 3.0572, "step": 33243 }, { "epoch": 1.63, "grad_norm": 0.6073220372200012, "learning_rate": 0.00025953169016589917, "loss": 2.9772, "step": 33244 }, { "epoch": 1.63, "grad_norm": 0.6424898505210876, "learning_rate": 0.000259516434750197, "loss": 3.0354, "step": 33245 }, { "epoch": 1.63, "grad_norm": 0.6124472618103027, "learning_rate": 0.00025950117944112085, "loss": 3.0896, "step": 33246 }, { "epoch": 1.63, "grad_norm": 0.6103495955467224, "learning_rate": 0.00025948592423871063, "loss": 3.0801, "step": 33247 }, { "epoch": 1.63, "grad_norm": 0.6181695461273193, "learning_rate": 0.0002594706691430068, "loss": 3.0102, "step": 33248 }, { "epoch": 1.63, "grad_norm": 0.6263618469238281, "learning_rate": 0.0002594554141540495, "loss": 2.9957, "step": 33249 }, { "epoch": 1.63, "grad_norm": 0.6513804793357849, "learning_rate": 0.00025944015927187867, "loss": 3.1224, "step": 33250 }, { "epoch": 1.63, "grad_norm": 0.6265279054641724, "learning_rate": 0.0002594249044965348, "loss": 2.7665, "step": 33251 }, { "epoch": 1.63, "grad_norm": 0.651552140712738, "learning_rate": 0.0002594096498280579, "loss": 2.906, "step": 33252 }, { "epoch": 1.63, "grad_norm": 0.6316579580307007, "learning_rate": 0.00025939439526648803, "loss": 2.9716, "step": 33253 }, { "epoch": 1.63, "grad_norm": 0.5931977033615112, "learning_rate": 0.0002593791408118657, "loss": 3.1162, "step": 33254 }, { "epoch": 1.63, "grad_norm": 0.6142486333847046, "learning_rate": 0.0002593638864642307, "loss": 3.0778, "step": 33255 }, { "epoch": 1.63, "grad_norm": 0.5705798268318176, "learning_rate": 0.0002593486322236236, "loss": 3.136, "step": 33256 }, { "epoch": 1.63, "grad_norm": 0.6078622341156006, "learning_rate": 0.0002593333780900843, "loss": 3.0166, "step": 33257 }, { "epoch": 1.63, "grad_norm": 0.5794697999954224, "learning_rate": 0.00025931812406365293, "loss": 2.999, "step": 33258 }, { "epoch": 1.63, "grad_norm": 0.6560611724853516, "learning_rate": 0.00025930287014437, "loss": 2.8884, "step": 33259 }, { "epoch": 1.63, "grad_norm": 0.6472424268722534, "learning_rate": 0.00025928761633227534, "loss": 3.0264, "step": 33260 }, { "epoch": 1.63, "grad_norm": 0.6192428469657898, "learning_rate": 0.0002592723626274094, "loss": 2.9969, "step": 33261 }, { "epoch": 1.63, "grad_norm": 0.6398415565490723, "learning_rate": 0.0002592571090298121, "loss": 3.0716, "step": 33262 }, { "epoch": 1.63, "grad_norm": 0.5866214036941528, "learning_rate": 0.0002592418555395238, "loss": 3.1818, "step": 33263 }, { "epoch": 1.63, "grad_norm": 0.6058687567710876, "learning_rate": 0.00025922660215658457, "loss": 3.0925, "step": 33264 }, { "epoch": 1.63, "grad_norm": 0.5742605924606323, "learning_rate": 0.00025921134888103454, "loss": 2.9712, "step": 33265 }, { "epoch": 1.63, "grad_norm": 0.6119689345359802, "learning_rate": 0.0002591960957129141, "loss": 2.8314, "step": 33266 }, { "epoch": 1.63, "grad_norm": 0.645915150642395, "learning_rate": 0.00025918084265226326, "loss": 2.8669, "step": 33267 }, { "epoch": 1.63, "grad_norm": 0.630989134311676, "learning_rate": 0.0002591655896991222, "loss": 3.1193, "step": 33268 }, { "epoch": 1.63, "grad_norm": 0.5954846739768982, "learning_rate": 0.00025915033685353106, "loss": 2.9961, "step": 33269 }, { "epoch": 1.63, "grad_norm": 0.5985564589500427, "learning_rate": 0.00025913508411553015, "loss": 3.0826, "step": 33270 }, { "epoch": 1.63, "grad_norm": 0.623395562171936, "learning_rate": 0.0002591198314851596, "loss": 3.1322, "step": 33271 }, { "epoch": 1.63, "grad_norm": 0.5947928428649902, "learning_rate": 0.00025910457896245943, "loss": 3.0461, "step": 33272 }, { "epoch": 1.63, "grad_norm": 0.594084620475769, "learning_rate": 0.00025908932654747005, "loss": 2.9222, "step": 33273 }, { "epoch": 1.63, "grad_norm": 0.6043190956115723, "learning_rate": 0.00025907407424023146, "loss": 3.0105, "step": 33274 }, { "epoch": 1.63, "grad_norm": 0.6263400912284851, "learning_rate": 0.0002590588220407838, "loss": 3.1322, "step": 33275 }, { "epoch": 1.63, "grad_norm": 0.5728640556335449, "learning_rate": 0.0002590435699491675, "loss": 2.9339, "step": 33276 }, { "epoch": 1.63, "grad_norm": 0.6283183097839355, "learning_rate": 0.00025902831796542234, "loss": 3.0203, "step": 33277 }, { "epoch": 1.63, "grad_norm": 0.6112415790557861, "learning_rate": 0.0002590130660895889, "loss": 2.7254, "step": 33278 }, { "epoch": 1.63, "grad_norm": 0.5928776860237122, "learning_rate": 0.0002589978143217071, "loss": 3.0446, "step": 33279 }, { "epoch": 1.63, "grad_norm": 0.6818798780441284, "learning_rate": 0.00025898256266181704, "loss": 2.9687, "step": 33280 }, { "epoch": 1.63, "grad_norm": 0.5973649621009827, "learning_rate": 0.00025896731110995917, "loss": 3.2809, "step": 33281 }, { "epoch": 1.63, "grad_norm": 0.6226446628570557, "learning_rate": 0.00025895205966617343, "loss": 3.058, "step": 33282 }, { "epoch": 1.63, "grad_norm": 0.6349427103996277, "learning_rate": 0.00025893680833050017, "loss": 3.167, "step": 33283 }, { "epoch": 1.63, "grad_norm": 0.6562497019767761, "learning_rate": 0.0002589215571029793, "loss": 3.0447, "step": 33284 }, { "epoch": 1.63, "grad_norm": 0.5976384878158569, "learning_rate": 0.0002589063059836512, "loss": 3.0494, "step": 33285 }, { "epoch": 1.63, "grad_norm": 0.62012779712677, "learning_rate": 0.00025889105497255605, "loss": 3.164, "step": 33286 }, { "epoch": 1.63, "grad_norm": 0.5701225996017456, "learning_rate": 0.00025887580406973385, "loss": 3.0588, "step": 33287 }, { "epoch": 1.63, "grad_norm": 0.6170254945755005, "learning_rate": 0.0002588605532752249, "loss": 2.9552, "step": 33288 }, { "epoch": 1.63, "grad_norm": 0.6369534730911255, "learning_rate": 0.0002588453025890694, "loss": 3.1661, "step": 33289 }, { "epoch": 1.63, "grad_norm": 0.6616371273994446, "learning_rate": 0.00025883005201130736, "loss": 3.0857, "step": 33290 }, { "epoch": 1.63, "grad_norm": 0.6203917264938354, "learning_rate": 0.0002588148015419791, "loss": 3.0843, "step": 33291 }, { "epoch": 1.63, "grad_norm": 0.6321138739585876, "learning_rate": 0.0002587995511811247, "loss": 2.8944, "step": 33292 }, { "epoch": 1.63, "grad_norm": 0.6112622618675232, "learning_rate": 0.00025878430092878446, "loss": 3.1176, "step": 33293 }, { "epoch": 1.63, "grad_norm": 0.6042934060096741, "learning_rate": 0.0002587690507849983, "loss": 2.9723, "step": 33294 }, { "epoch": 1.63, "grad_norm": 0.6640942692756653, "learning_rate": 0.00025875380074980644, "loss": 2.8202, "step": 33295 }, { "epoch": 1.63, "grad_norm": 0.5991891622543335, "learning_rate": 0.0002587385508232493, "loss": 2.9682, "step": 33296 }, { "epoch": 1.63, "grad_norm": 0.6276793479919434, "learning_rate": 0.00025872330100536683, "loss": 2.91, "step": 33297 }, { "epoch": 1.63, "grad_norm": 0.5799841284751892, "learning_rate": 0.0002587080512961993, "loss": 2.9584, "step": 33298 }, { "epoch": 1.63, "grad_norm": 0.6178253293037415, "learning_rate": 0.0002586928016957867, "loss": 3.0259, "step": 33299 }, { "epoch": 1.63, "grad_norm": 0.6273776292800903, "learning_rate": 0.00025867755220416934, "loss": 3.0089, "step": 33300 }, { "epoch": 1.63, "grad_norm": 0.6046527624130249, "learning_rate": 0.0002586623028213875, "loss": 3.0268, "step": 33301 }, { "epoch": 1.63, "grad_norm": 0.6330516934394836, "learning_rate": 0.000258647053547481, "loss": 3.0706, "step": 33302 }, { "epoch": 1.63, "grad_norm": 0.6148566603660583, "learning_rate": 0.0002586318043824903, "loss": 3.0584, "step": 33303 }, { "epoch": 1.63, "grad_norm": 0.6204240918159485, "learning_rate": 0.0002586165553264555, "loss": 3.0304, "step": 33304 }, { "epoch": 1.63, "grad_norm": 0.6241191029548645, "learning_rate": 0.0002586013063794166, "loss": 2.7981, "step": 33305 }, { "epoch": 1.63, "grad_norm": 0.6470986604690552, "learning_rate": 0.000258586057541414, "loss": 3.0272, "step": 33306 }, { "epoch": 1.63, "grad_norm": 0.6138694286346436, "learning_rate": 0.00025857080881248776, "loss": 2.8922, "step": 33307 }, { "epoch": 1.63, "grad_norm": 0.5973373055458069, "learning_rate": 0.00025855556019267805, "loss": 2.907, "step": 33308 }, { "epoch": 1.63, "grad_norm": 0.599695086479187, "learning_rate": 0.0002585403116820249, "loss": 3.1989, "step": 33309 }, { "epoch": 1.63, "grad_norm": 0.6345245838165283, "learning_rate": 0.0002585250632805686, "loss": 3.1087, "step": 33310 }, { "epoch": 1.63, "grad_norm": 0.6359992623329163, "learning_rate": 0.00025850981498834946, "loss": 2.968, "step": 33311 }, { "epoch": 1.63, "grad_norm": 0.5912989377975464, "learning_rate": 0.0002584945668054073, "loss": 2.9463, "step": 33312 }, { "epoch": 1.63, "grad_norm": 0.6323924660682678, "learning_rate": 0.00025847931873178266, "loss": 3.3634, "step": 33313 }, { "epoch": 1.63, "grad_norm": 0.6180119514465332, "learning_rate": 0.00025846407076751535, "loss": 3.0597, "step": 33314 }, { "epoch": 1.63, "grad_norm": 0.5933579802513123, "learning_rate": 0.0002584488229126457, "loss": 3.1696, "step": 33315 }, { "epoch": 1.63, "grad_norm": 0.6219573020935059, "learning_rate": 0.0002584335751672139, "loss": 2.9026, "step": 33316 }, { "epoch": 1.63, "grad_norm": 0.6562425494194031, "learning_rate": 0.00025841832753126, "loss": 3.0916, "step": 33317 }, { "epoch": 1.63, "grad_norm": 0.6309749484062195, "learning_rate": 0.00025840308000482426, "loss": 3.0988, "step": 33318 }, { "epoch": 1.63, "grad_norm": 0.6028389930725098, "learning_rate": 0.00025838783258794683, "loss": 2.8867, "step": 33319 }, { "epoch": 1.63, "grad_norm": 0.5963541269302368, "learning_rate": 0.0002583725852806678, "loss": 2.9588, "step": 33320 }, { "epoch": 1.63, "grad_norm": 0.6047212481498718, "learning_rate": 0.0002583573380830274, "loss": 2.9557, "step": 33321 }, { "epoch": 1.63, "grad_norm": 0.5715580582618713, "learning_rate": 0.00025834209099506576, "loss": 3.0006, "step": 33322 }, { "epoch": 1.63, "grad_norm": 0.6066077351570129, "learning_rate": 0.00025832684401682303, "loss": 3.1405, "step": 33323 }, { "epoch": 1.63, "grad_norm": 0.6018175482749939, "learning_rate": 0.00025831159714833935, "loss": 2.902, "step": 33324 }, { "epoch": 1.63, "grad_norm": 0.6172803044319153, "learning_rate": 0.0002582963503896549, "loss": 2.888, "step": 33325 }, { "epoch": 1.63, "grad_norm": 0.6167096495628357, "learning_rate": 0.0002582811037408099, "loss": 2.9556, "step": 33326 }, { "epoch": 1.63, "grad_norm": 0.6224731802940369, "learning_rate": 0.0002582658572018443, "loss": 2.9202, "step": 33327 }, { "epoch": 1.63, "grad_norm": 0.6047587990760803, "learning_rate": 0.00025825061077279856, "loss": 2.9544, "step": 33328 }, { "epoch": 1.63, "grad_norm": 0.6331458687782288, "learning_rate": 0.00025823536445371265, "loss": 2.9418, "step": 33329 }, { "epoch": 1.63, "grad_norm": 0.6286932826042175, "learning_rate": 0.00025822011824462663, "loss": 3.2196, "step": 33330 }, { "epoch": 1.63, "grad_norm": 0.6198918223381042, "learning_rate": 0.0002582048721455809, "loss": 2.9849, "step": 33331 }, { "epoch": 1.63, "grad_norm": 0.6311217546463013, "learning_rate": 0.0002581896261566154, "loss": 2.9469, "step": 33332 }, { "epoch": 1.63, "grad_norm": 0.6329818964004517, "learning_rate": 0.0002581743802777705, "loss": 2.7676, "step": 33333 }, { "epoch": 1.63, "grad_norm": 0.6387625932693481, "learning_rate": 0.0002581591345090861, "loss": 2.994, "step": 33334 }, { "epoch": 1.63, "grad_norm": 0.6207517385482788, "learning_rate": 0.0002581438888506025, "loss": 3.1672, "step": 33335 }, { "epoch": 1.63, "grad_norm": 0.5961693525314331, "learning_rate": 0.00025812864330236005, "loss": 2.8354, "step": 33336 }, { "epoch": 1.63, "grad_norm": 0.6982114911079407, "learning_rate": 0.00025811339786439846, "loss": 3.005, "step": 33337 }, { "epoch": 1.63, "grad_norm": 0.6450947523117065, "learning_rate": 0.0002580981525367582, "loss": 2.9924, "step": 33338 }, { "epoch": 1.63, "grad_norm": 0.6073309779167175, "learning_rate": 0.0002580829073194793, "loss": 2.9336, "step": 33339 }, { "epoch": 1.63, "grad_norm": 0.6175057291984558, "learning_rate": 0.00025806766221260204, "loss": 2.8445, "step": 33340 }, { "epoch": 1.63, "grad_norm": 0.6069908738136292, "learning_rate": 0.00025805241721616644, "loss": 3.0606, "step": 33341 }, { "epoch": 1.63, "grad_norm": 0.6620157957077026, "learning_rate": 0.0002580371723302126, "loss": 3.2177, "step": 33342 }, { "epoch": 1.63, "grad_norm": 0.6427522897720337, "learning_rate": 0.00025802192755478096, "loss": 3.0998, "step": 33343 }, { "epoch": 1.63, "grad_norm": 0.6400549411773682, "learning_rate": 0.0002580066828899114, "loss": 2.9967, "step": 33344 }, { "epoch": 1.63, "grad_norm": 0.5848243832588196, "learning_rate": 0.0002579914383356442, "loss": 3.0598, "step": 33345 }, { "epoch": 1.63, "grad_norm": 0.5872244238853455, "learning_rate": 0.00025797619389201934, "loss": 3.002, "step": 33346 }, { "epoch": 1.63, "grad_norm": 0.5892220735549927, "learning_rate": 0.0002579609495590772, "loss": 3.0065, "step": 33347 }, { "epoch": 1.63, "grad_norm": 0.5997381210327148, "learning_rate": 0.0002579457053368578, "loss": 2.9662, "step": 33348 }, { "epoch": 1.63, "grad_norm": 0.645177960395813, "learning_rate": 0.00025793046122540126, "loss": 3.0608, "step": 33349 }, { "epoch": 1.63, "grad_norm": 0.6021146178245544, "learning_rate": 0.00025791521722474795, "loss": 2.8478, "step": 33350 }, { "epoch": 1.63, "grad_norm": 0.6015944480895996, "learning_rate": 0.0002578999733349378, "loss": 3.0736, "step": 33351 }, { "epoch": 1.63, "grad_norm": 0.6046991944313049, "learning_rate": 0.00025788472955601087, "loss": 3.1848, "step": 33352 }, { "epoch": 1.63, "grad_norm": 0.6653998494148254, "learning_rate": 0.00025786948588800765, "loss": 3.1648, "step": 33353 }, { "epoch": 1.63, "grad_norm": 0.599899172782898, "learning_rate": 0.00025785424233096797, "loss": 3.2521, "step": 33354 }, { "epoch": 1.63, "grad_norm": 0.6322702169418335, "learning_rate": 0.0002578389988849322, "loss": 2.8027, "step": 33355 }, { "epoch": 1.63, "grad_norm": 0.5862153768539429, "learning_rate": 0.0002578237555499403, "loss": 2.9012, "step": 33356 }, { "epoch": 1.63, "grad_norm": 0.6028646230697632, "learning_rate": 0.0002578085123260325, "loss": 3.1227, "step": 33357 }, { "epoch": 1.63, "grad_norm": 0.6330054402351379, "learning_rate": 0.00025779326921324905, "loss": 2.8262, "step": 33358 }, { "epoch": 1.63, "grad_norm": 0.609877347946167, "learning_rate": 0.00025777802621162994, "loss": 2.9177, "step": 33359 }, { "epoch": 1.63, "grad_norm": 0.5922818183898926, "learning_rate": 0.0002577627833212155, "loss": 2.9613, "step": 33360 }, { "epoch": 1.63, "grad_norm": 0.5810437798500061, "learning_rate": 0.0002577475405420456, "loss": 3.0444, "step": 33361 }, { "epoch": 1.63, "grad_norm": 0.6439691781997681, "learning_rate": 0.0002577322978741606, "loss": 2.9434, "step": 33362 }, { "epoch": 1.64, "grad_norm": 0.6181412935256958, "learning_rate": 0.0002577170553176007, "loss": 3.0512, "step": 33363 }, { "epoch": 1.64, "grad_norm": 0.5877893567085266, "learning_rate": 0.0002577018128724057, "loss": 3.1475, "step": 33364 }, { "epoch": 1.64, "grad_norm": 0.6160455942153931, "learning_rate": 0.0002576865705386162, "loss": 2.9531, "step": 33365 }, { "epoch": 1.64, "grad_norm": 0.5825440883636475, "learning_rate": 0.00025767132831627207, "loss": 3.0221, "step": 33366 }, { "epoch": 1.64, "grad_norm": 0.5844451189041138, "learning_rate": 0.0002576560862054134, "loss": 3.0393, "step": 33367 }, { "epoch": 1.64, "grad_norm": 0.6060708165168762, "learning_rate": 0.00025764084420608064, "loss": 3.1585, "step": 33368 }, { "epoch": 1.64, "grad_norm": 0.641826868057251, "learning_rate": 0.00025762560231831365, "loss": 3.0958, "step": 33369 }, { "epoch": 1.64, "grad_norm": 0.6380789279937744, "learning_rate": 0.00025761036054215274, "loss": 2.9916, "step": 33370 }, { "epoch": 1.64, "grad_norm": 0.624642014503479, "learning_rate": 0.0002575951188776378, "loss": 2.8842, "step": 33371 }, { "epoch": 1.64, "grad_norm": 0.5973908305168152, "learning_rate": 0.00025757987732480934, "loss": 2.6799, "step": 33372 }, { "epoch": 1.64, "grad_norm": 0.6174383163452148, "learning_rate": 0.0002575646358837073, "loss": 3.2208, "step": 33373 }, { "epoch": 1.64, "grad_norm": 0.5949251055717468, "learning_rate": 0.00025754939455437174, "loss": 2.959, "step": 33374 }, { "epoch": 1.64, "grad_norm": 0.6241964101791382, "learning_rate": 0.000257534153336843, "loss": 3.0481, "step": 33375 }, { "epoch": 1.64, "grad_norm": 0.6426224112510681, "learning_rate": 0.0002575189122311611, "loss": 2.8685, "step": 33376 }, { "epoch": 1.64, "grad_norm": 0.6326531171798706, "learning_rate": 0.0002575036712373661, "loss": 2.9726, "step": 33377 }, { "epoch": 1.64, "grad_norm": 0.6101643443107605, "learning_rate": 0.0002574884303554984, "loss": 2.9489, "step": 33378 }, { "epoch": 1.64, "grad_norm": 0.6922885179519653, "learning_rate": 0.0002574731895855979, "loss": 2.8549, "step": 33379 }, { "epoch": 1.64, "grad_norm": 0.5953141450881958, "learning_rate": 0.0002574579489277049, "loss": 3.1971, "step": 33380 }, { "epoch": 1.64, "grad_norm": 0.6089001893997192, "learning_rate": 0.0002574427083818595, "loss": 3.1966, "step": 33381 }, { "epoch": 1.64, "grad_norm": 0.6315808296203613, "learning_rate": 0.00025742746794810167, "loss": 2.9393, "step": 33382 }, { "epoch": 1.64, "grad_norm": 0.6140810251235962, "learning_rate": 0.00025741222762647187, "loss": 3.0597, "step": 33383 }, { "epoch": 1.64, "grad_norm": 0.6030791997909546, "learning_rate": 0.00025739698741701, "loss": 2.9677, "step": 33384 }, { "epoch": 1.64, "grad_norm": 0.6273488998413086, "learning_rate": 0.0002573817473197563, "loss": 3.0209, "step": 33385 }, { "epoch": 1.64, "grad_norm": 0.5986915230751038, "learning_rate": 0.00025736650733475074, "loss": 3.0809, "step": 33386 }, { "epoch": 1.64, "grad_norm": 0.6623584032058716, "learning_rate": 0.0002573512674620337, "loss": 3.1532, "step": 33387 }, { "epoch": 1.64, "grad_norm": 0.5963658690452576, "learning_rate": 0.0002573360277016453, "loss": 3.0506, "step": 33388 }, { "epoch": 1.64, "grad_norm": 0.6195214986801147, "learning_rate": 0.0002573207880536254, "loss": 3.316, "step": 33389 }, { "epoch": 1.64, "grad_norm": 0.6641665697097778, "learning_rate": 0.0002573055485180145, "loss": 3.044, "step": 33390 }, { "epoch": 1.64, "grad_norm": 0.6132287383079529, "learning_rate": 0.00025729030909485244, "loss": 2.805, "step": 33391 }, { "epoch": 1.64, "grad_norm": 0.6133543252944946, "learning_rate": 0.00025727506978417947, "loss": 2.8478, "step": 33392 }, { "epoch": 1.64, "grad_norm": 0.652361273765564, "learning_rate": 0.0002572598305860359, "loss": 3.0461, "step": 33393 }, { "epoch": 1.64, "grad_norm": 0.5797291398048401, "learning_rate": 0.00025724459150046157, "loss": 3.3286, "step": 33394 }, { "epoch": 1.64, "grad_norm": 0.6221191883087158, "learning_rate": 0.0002572293525274969, "loss": 2.8018, "step": 33395 }, { "epoch": 1.64, "grad_norm": 0.6194936633110046, "learning_rate": 0.0002572141136671817, "loss": 2.847, "step": 33396 }, { "epoch": 1.64, "grad_norm": 0.6495451331138611, "learning_rate": 0.00025719887491955633, "loss": 3.1063, "step": 33397 }, { "epoch": 1.64, "grad_norm": 0.6331799030303955, "learning_rate": 0.000257183636284661, "loss": 3.1819, "step": 33398 }, { "epoch": 1.64, "grad_norm": 0.5986855626106262, "learning_rate": 0.0002571683977625357, "loss": 3.0035, "step": 33399 }, { "epoch": 1.64, "grad_norm": 0.6081706881523132, "learning_rate": 0.0002571531593532206, "loss": 3.0621, "step": 33400 }, { "epoch": 1.64, "grad_norm": 0.638114333152771, "learning_rate": 0.00025713792105675577, "loss": 2.9936, "step": 33401 }, { "epoch": 1.64, "grad_norm": 0.6606442928314209, "learning_rate": 0.00025712268287318143, "loss": 3.0516, "step": 33402 }, { "epoch": 1.64, "grad_norm": 0.642713189125061, "learning_rate": 0.00025710744480253776, "loss": 2.943, "step": 33403 }, { "epoch": 1.64, "grad_norm": 0.6082857847213745, "learning_rate": 0.0002570922068448647, "loss": 3.0018, "step": 33404 }, { "epoch": 1.64, "grad_norm": 0.5871419310569763, "learning_rate": 0.00025707696900020263, "loss": 3.0449, "step": 33405 }, { "epoch": 1.64, "grad_norm": 0.6455428600311279, "learning_rate": 0.0002570617312685915, "loss": 2.959, "step": 33406 }, { "epoch": 1.64, "grad_norm": 0.5815387964248657, "learning_rate": 0.00025704649365007143, "loss": 2.7549, "step": 33407 }, { "epoch": 1.64, "grad_norm": 0.6647642254829407, "learning_rate": 0.0002570312561446828, "loss": 3.2245, "step": 33408 }, { "epoch": 1.64, "grad_norm": 0.6292085647583008, "learning_rate": 0.00025701601875246545, "loss": 2.8913, "step": 33409 }, { "epoch": 1.64, "grad_norm": 0.6244023442268372, "learning_rate": 0.00025700078147345974, "loss": 2.9362, "step": 33410 }, { "epoch": 1.64, "grad_norm": 0.620573878288269, "learning_rate": 0.0002569855443077055, "loss": 2.8535, "step": 33411 }, { "epoch": 1.64, "grad_norm": 0.612984299659729, "learning_rate": 0.00025697030725524325, "loss": 2.9519, "step": 33412 }, { "epoch": 1.64, "grad_norm": 0.6438848376274109, "learning_rate": 0.0002569550703161129, "loss": 2.8849, "step": 33413 }, { "epoch": 1.64, "grad_norm": 0.5936183929443359, "learning_rate": 0.0002569398334903545, "loss": 2.7897, "step": 33414 }, { "epoch": 1.64, "grad_norm": 0.614172101020813, "learning_rate": 0.0002569245967780084, "loss": 3.0311, "step": 33415 }, { "epoch": 1.64, "grad_norm": 0.6163328289985657, "learning_rate": 0.0002569093601791146, "loss": 3.0904, "step": 33416 }, { "epoch": 1.64, "grad_norm": 0.6245632767677307, "learning_rate": 0.0002568941236937133, "loss": 2.9677, "step": 33417 }, { "epoch": 1.64, "grad_norm": 0.6340919733047485, "learning_rate": 0.00025687888732184446, "loss": 3.2047, "step": 33418 }, { "epoch": 1.64, "grad_norm": 0.58950275182724, "learning_rate": 0.0002568636510635483, "loss": 2.9382, "step": 33419 }, { "epoch": 1.64, "grad_norm": 0.5915112495422363, "learning_rate": 0.00025684841491886517, "loss": 2.7972, "step": 33420 }, { "epoch": 1.64, "grad_norm": 0.6251772046089172, "learning_rate": 0.0002568331788878349, "loss": 2.9719, "step": 33421 }, { "epoch": 1.64, "grad_norm": 0.5975916385650635, "learning_rate": 0.0002568179429704978, "loss": 2.9929, "step": 33422 }, { "epoch": 1.64, "grad_norm": 0.6500588655471802, "learning_rate": 0.0002568027071668938, "loss": 2.9747, "step": 33423 }, { "epoch": 1.64, "grad_norm": 0.6431196331977844, "learning_rate": 0.00025678747147706325, "loss": 2.9508, "step": 33424 }, { "epoch": 1.64, "grad_norm": 0.6361487507820129, "learning_rate": 0.0002567722359010462, "loss": 2.9058, "step": 33425 }, { "epoch": 1.64, "grad_norm": 0.6039143204689026, "learning_rate": 0.00025675700043888264, "loss": 2.9206, "step": 33426 }, { "epoch": 1.64, "grad_norm": 0.6220351457595825, "learning_rate": 0.000256741765090613, "loss": 3.2211, "step": 33427 }, { "epoch": 1.64, "grad_norm": 0.6292129158973694, "learning_rate": 0.0002567265298562771, "loss": 3.0292, "step": 33428 }, { "epoch": 1.64, "grad_norm": 0.6125423312187195, "learning_rate": 0.00025671129473591516, "loss": 2.9991, "step": 33429 }, { "epoch": 1.64, "grad_norm": 0.6337186098098755, "learning_rate": 0.0002566960597295674, "loss": 3.1525, "step": 33430 }, { "epoch": 1.64, "grad_norm": 0.6597618460655212, "learning_rate": 0.0002566808248372739, "loss": 2.9801, "step": 33431 }, { "epoch": 1.64, "grad_norm": 0.6206303238868713, "learning_rate": 0.00025666559005907484, "loss": 3.1365, "step": 33432 }, { "epoch": 1.64, "grad_norm": 0.5949422717094421, "learning_rate": 0.0002566503553950101, "loss": 2.8709, "step": 33433 }, { "epoch": 1.64, "grad_norm": 0.5526446104049683, "learning_rate": 0.0002566351208451201, "loss": 2.9796, "step": 33434 }, { "epoch": 1.64, "grad_norm": 0.6217344999313354, "learning_rate": 0.00025661988640944484, "loss": 2.7793, "step": 33435 }, { "epoch": 1.64, "grad_norm": 0.5836462378501892, "learning_rate": 0.0002566046520880244, "loss": 2.9644, "step": 33436 }, { "epoch": 1.64, "grad_norm": 0.6020750999450684, "learning_rate": 0.000256589417880899, "loss": 3.1975, "step": 33437 }, { "epoch": 1.64, "grad_norm": 0.6435813903808594, "learning_rate": 0.00025657418378810873, "loss": 2.9718, "step": 33438 }, { "epoch": 1.64, "grad_norm": 0.6334481835365295, "learning_rate": 0.00025655894980969356, "loss": 2.8116, "step": 33439 }, { "epoch": 1.64, "grad_norm": 0.6242516040802002, "learning_rate": 0.0002565437159456939, "loss": 2.8935, "step": 33440 }, { "epoch": 1.64, "grad_norm": 0.6350675821304321, "learning_rate": 0.0002565284821961497, "loss": 2.9426, "step": 33441 }, { "epoch": 1.64, "grad_norm": 0.6530657410621643, "learning_rate": 0.00025651324856110116, "loss": 3.0214, "step": 33442 }, { "epoch": 1.64, "grad_norm": 0.6366276144981384, "learning_rate": 0.00025649801504058826, "loss": 2.9178, "step": 33443 }, { "epoch": 1.64, "grad_norm": 0.629357635974884, "learning_rate": 0.0002564827816346512, "loss": 3.052, "step": 33444 }, { "epoch": 1.64, "grad_norm": 0.5792384743690491, "learning_rate": 0.00025646754834333024, "loss": 2.9482, "step": 33445 }, { "epoch": 1.64, "grad_norm": 0.615355908870697, "learning_rate": 0.00025645231516666534, "loss": 3.1283, "step": 33446 }, { "epoch": 1.64, "grad_norm": 0.6263376474380493, "learning_rate": 0.00025643708210469667, "loss": 3.1832, "step": 33447 }, { "epoch": 1.64, "grad_norm": 0.6375722289085388, "learning_rate": 0.0002564218491574643, "loss": 2.9832, "step": 33448 }, { "epoch": 1.64, "grad_norm": 0.6010499596595764, "learning_rate": 0.0002564066163250084, "loss": 3.1208, "step": 33449 }, { "epoch": 1.64, "grad_norm": 0.6063414216041565, "learning_rate": 0.0002563913836073691, "loss": 3.1275, "step": 33450 }, { "epoch": 1.64, "grad_norm": 0.6149500012397766, "learning_rate": 0.0002563761510045864, "loss": 3.0182, "step": 33451 }, { "epoch": 1.64, "grad_norm": 0.6116110682487488, "learning_rate": 0.0002563609185167007, "loss": 3.1028, "step": 33452 }, { "epoch": 1.64, "grad_norm": 0.6712843775749207, "learning_rate": 0.00025634568614375184, "loss": 3.0418, "step": 33453 }, { "epoch": 1.64, "grad_norm": 0.597224235534668, "learning_rate": 0.00025633045388577996, "loss": 3.0967, "step": 33454 }, { "epoch": 1.64, "grad_norm": 0.609315037727356, "learning_rate": 0.00025631522174282545, "loss": 3.0672, "step": 33455 }, { "epoch": 1.64, "grad_norm": 0.6235337853431702, "learning_rate": 0.00025629998971492816, "loss": 2.9888, "step": 33456 }, { "epoch": 1.64, "grad_norm": 0.5854429602622986, "learning_rate": 0.00025628475780212825, "loss": 3.0874, "step": 33457 }, { "epoch": 1.64, "grad_norm": 0.6504743695259094, "learning_rate": 0.0002562695260044659, "loss": 3.1094, "step": 33458 }, { "epoch": 1.64, "grad_norm": 0.6945144534111023, "learning_rate": 0.0002562542943219811, "loss": 2.8553, "step": 33459 }, { "epoch": 1.64, "grad_norm": 0.5794476866722107, "learning_rate": 0.0002562390627547142, "loss": 2.9821, "step": 33460 }, { "epoch": 1.64, "grad_norm": 0.5813378691673279, "learning_rate": 0.00025622383130270514, "loss": 2.9139, "step": 33461 }, { "epoch": 1.64, "grad_norm": 0.6076094508171082, "learning_rate": 0.00025620859996599416, "loss": 2.9726, "step": 33462 }, { "epoch": 1.64, "grad_norm": 0.6358339190483093, "learning_rate": 0.0002561933687446212, "loss": 2.9508, "step": 33463 }, { "epoch": 1.64, "grad_norm": 0.6296307444572449, "learning_rate": 0.0002561781376386265, "loss": 3.04, "step": 33464 }, { "epoch": 1.64, "grad_norm": 0.6401169896125793, "learning_rate": 0.00025616290664805025, "loss": 3.2167, "step": 33465 }, { "epoch": 1.64, "grad_norm": 0.6119227409362793, "learning_rate": 0.00025614767577293227, "loss": 3.2228, "step": 33466 }, { "epoch": 1.64, "grad_norm": 0.6023973226547241, "learning_rate": 0.00025613244501331304, "loss": 3.0254, "step": 33467 }, { "epoch": 1.64, "grad_norm": 0.5672397613525391, "learning_rate": 0.0002561172143692325, "loss": 3.1091, "step": 33468 }, { "epoch": 1.64, "grad_norm": 0.6069930195808411, "learning_rate": 0.0002561019838407306, "loss": 2.9391, "step": 33469 }, { "epoch": 1.64, "grad_norm": 0.6205309629440308, "learning_rate": 0.0002560867534278478, "loss": 2.9794, "step": 33470 }, { "epoch": 1.64, "grad_norm": 0.6706638336181641, "learning_rate": 0.000256071523130624, "loss": 2.9495, "step": 33471 }, { "epoch": 1.64, "grad_norm": 0.6044138073921204, "learning_rate": 0.00025605629294909944, "loss": 3.0051, "step": 33472 }, { "epoch": 1.64, "grad_norm": 0.6162787079811096, "learning_rate": 0.00025604106288331394, "loss": 2.7831, "step": 33473 }, { "epoch": 1.64, "grad_norm": 0.6222827434539795, "learning_rate": 0.00025602583293330794, "loss": 2.9045, "step": 33474 }, { "epoch": 1.64, "grad_norm": 0.6912936568260193, "learning_rate": 0.0002560106030991215, "loss": 3.111, "step": 33475 }, { "epoch": 1.64, "grad_norm": 0.5932526588439941, "learning_rate": 0.0002559953733807945, "loss": 2.973, "step": 33476 }, { "epoch": 1.64, "grad_norm": 0.6283918619155884, "learning_rate": 0.00025598014377836736, "loss": 2.8764, "step": 33477 }, { "epoch": 1.64, "grad_norm": 0.6525346636772156, "learning_rate": 0.00025596491429188, "loss": 2.9557, "step": 33478 }, { "epoch": 1.64, "grad_norm": 0.595056414604187, "learning_rate": 0.0002559496849213725, "loss": 3.0048, "step": 33479 }, { "epoch": 1.64, "grad_norm": 0.6286141276359558, "learning_rate": 0.0002559344556668852, "loss": 2.9229, "step": 33480 }, { "epoch": 1.64, "grad_norm": 0.6390392184257507, "learning_rate": 0.0002559192265284579, "loss": 3.024, "step": 33481 }, { "epoch": 1.64, "grad_norm": 0.6058946251869202, "learning_rate": 0.00025590399750613106, "loss": 2.8562, "step": 33482 }, { "epoch": 1.64, "grad_norm": 0.7172413468360901, "learning_rate": 0.0002558887685999445, "loss": 2.993, "step": 33483 }, { "epoch": 1.64, "grad_norm": 0.6925371289253235, "learning_rate": 0.0002558735398099384, "loss": 2.83, "step": 33484 }, { "epoch": 1.64, "grad_norm": 0.6435196995735168, "learning_rate": 0.000255858311136153, "loss": 3.1041, "step": 33485 }, { "epoch": 1.64, "grad_norm": 0.6237314939498901, "learning_rate": 0.00025584308257862824, "loss": 2.8744, "step": 33486 }, { "epoch": 1.64, "grad_norm": 0.5538667440414429, "learning_rate": 0.0002558278541374044, "loss": 2.9134, "step": 33487 }, { "epoch": 1.64, "grad_norm": 0.6051622629165649, "learning_rate": 0.00025581262581252133, "loss": 2.8513, "step": 33488 }, { "epoch": 1.64, "grad_norm": 0.61176997423172, "learning_rate": 0.0002557973976040194, "loss": 3.0499, "step": 33489 }, { "epoch": 1.64, "grad_norm": 0.6022197604179382, "learning_rate": 0.00025578216951193864, "loss": 3.1049, "step": 33490 }, { "epoch": 1.64, "grad_norm": 0.6228072047233582, "learning_rate": 0.00025576694153631906, "loss": 3.0617, "step": 33491 }, { "epoch": 1.64, "grad_norm": 0.6194964051246643, "learning_rate": 0.0002557517136772009, "loss": 2.8614, "step": 33492 }, { "epoch": 1.64, "grad_norm": 0.6054251790046692, "learning_rate": 0.00025573648593462425, "loss": 3.0247, "step": 33493 }, { "epoch": 1.64, "grad_norm": 0.6623701453208923, "learning_rate": 0.00025572125830862914, "loss": 3.0102, "step": 33494 }, { "epoch": 1.64, "grad_norm": 0.6244812607765198, "learning_rate": 0.00025570603079925564, "loss": 2.8992, "step": 33495 }, { "epoch": 1.64, "grad_norm": 0.6170223951339722, "learning_rate": 0.00025569080340654397, "loss": 3.1205, "step": 33496 }, { "epoch": 1.64, "grad_norm": 0.5932803750038147, "learning_rate": 0.00025567557613053425, "loss": 3.1165, "step": 33497 }, { "epoch": 1.64, "grad_norm": 0.6121468544006348, "learning_rate": 0.00025566034897126644, "loss": 3.0744, "step": 33498 }, { "epoch": 1.64, "grad_norm": 0.6298878788948059, "learning_rate": 0.00025564512192878086, "loss": 2.8936, "step": 33499 }, { "epoch": 1.64, "grad_norm": 0.6140074729919434, "learning_rate": 0.0002556298950031174, "loss": 2.9988, "step": 33500 }, { "epoch": 1.64, "grad_norm": 0.5965002179145813, "learning_rate": 0.00025561466819431627, "loss": 3.168, "step": 33501 }, { "epoch": 1.64, "grad_norm": 0.637391209602356, "learning_rate": 0.0002555994415024176, "loss": 3.2728, "step": 33502 }, { "epoch": 1.64, "grad_norm": 0.6739944219589233, "learning_rate": 0.00025558421492746135, "loss": 3.0144, "step": 33503 }, { "epoch": 1.64, "grad_norm": 0.6153771877288818, "learning_rate": 0.00025556898846948786, "loss": 3.0694, "step": 33504 }, { "epoch": 1.64, "grad_norm": 0.5968106985092163, "learning_rate": 0.00025555376212853704, "loss": 2.9543, "step": 33505 }, { "epoch": 1.64, "grad_norm": 0.610713541507721, "learning_rate": 0.000255538535904649, "loss": 3.0342, "step": 33506 }, { "epoch": 1.64, "grad_norm": 0.6176581382751465, "learning_rate": 0.000255523309797864, "loss": 3.1373, "step": 33507 }, { "epoch": 1.64, "grad_norm": 0.6341189742088318, "learning_rate": 0.000255508083808222, "loss": 2.9842, "step": 33508 }, { "epoch": 1.64, "grad_norm": 0.5881686806678772, "learning_rate": 0.00025549285793576324, "loss": 3.0403, "step": 33509 }, { "epoch": 1.64, "grad_norm": 0.6181926727294922, "learning_rate": 0.0002554776321805276, "loss": 3.124, "step": 33510 }, { "epoch": 1.64, "grad_norm": 0.6077942848205566, "learning_rate": 0.00025546240654255535, "loss": 2.9548, "step": 33511 }, { "epoch": 1.64, "grad_norm": 0.6073379516601562, "learning_rate": 0.00025544718102188656, "loss": 2.8528, "step": 33512 }, { "epoch": 1.64, "grad_norm": 0.6286836266517639, "learning_rate": 0.00025543195561856125, "loss": 2.9042, "step": 33513 }, { "epoch": 1.64, "grad_norm": 0.6575766801834106, "learning_rate": 0.0002554167303326197, "loss": 3.1573, "step": 33514 }, { "epoch": 1.64, "grad_norm": 0.5965009331703186, "learning_rate": 0.0002554015051641019, "loss": 3.0997, "step": 33515 }, { "epoch": 1.64, "grad_norm": 0.6113033890724182, "learning_rate": 0.0002553862801130478, "loss": 2.9108, "step": 33516 }, { "epoch": 1.64, "grad_norm": 0.6287041306495667, "learning_rate": 0.00025537105517949786, "loss": 2.875, "step": 33517 }, { "epoch": 1.64, "grad_norm": 0.6469335556030273, "learning_rate": 0.00025535583036349186, "loss": 3.1125, "step": 33518 }, { "epoch": 1.64, "grad_norm": 0.6197258830070496, "learning_rate": 0.0002553406056650701, "loss": 3.0977, "step": 33519 }, { "epoch": 1.64, "grad_norm": 0.6109700202941895, "learning_rate": 0.0002553253810842724, "loss": 2.9279, "step": 33520 }, { "epoch": 1.64, "grad_norm": 0.6416023373603821, "learning_rate": 0.0002553101566211391, "loss": 2.9842, "step": 33521 }, { "epoch": 1.64, "grad_norm": 0.5842431783676147, "learning_rate": 0.00025529493227571034, "loss": 3.105, "step": 33522 }, { "epoch": 1.64, "grad_norm": 0.6129705905914307, "learning_rate": 0.0002552797080480261, "loss": 2.9826, "step": 33523 }, { "epoch": 1.64, "grad_norm": 0.5900617837905884, "learning_rate": 0.00025526448393812655, "loss": 3.2003, "step": 33524 }, { "epoch": 1.64, "grad_norm": 0.5936287641525269, "learning_rate": 0.00025524925994605164, "loss": 3.2126, "step": 33525 }, { "epoch": 1.64, "grad_norm": 0.5882431864738464, "learning_rate": 0.0002552340360718416, "loss": 2.9639, "step": 33526 }, { "epoch": 1.64, "grad_norm": 0.5971298217773438, "learning_rate": 0.00025521881231553654, "loss": 3.096, "step": 33527 }, { "epoch": 1.64, "grad_norm": 0.6381330490112305, "learning_rate": 0.0002552035886771764, "loss": 2.9462, "step": 33528 }, { "epoch": 1.64, "grad_norm": 0.5903002023696899, "learning_rate": 0.00025518836515680155, "loss": 2.8188, "step": 33529 }, { "epoch": 1.64, "grad_norm": 0.5994799733161926, "learning_rate": 0.0002551731417544518, "loss": 2.9601, "step": 33530 }, { "epoch": 1.64, "grad_norm": 0.5971114039421082, "learning_rate": 0.0002551579184701673, "loss": 2.9885, "step": 33531 }, { "epoch": 1.64, "grad_norm": 0.6384589076042175, "learning_rate": 0.0002551426953039884, "loss": 3.1067, "step": 33532 }, { "epoch": 1.64, "grad_norm": 0.591693639755249, "learning_rate": 0.0002551274722559549, "loss": 3.0607, "step": 33533 }, { "epoch": 1.64, "grad_norm": 0.6068736910820007, "learning_rate": 0.0002551122493261071, "loss": 3.1471, "step": 33534 }, { "epoch": 1.64, "grad_norm": 0.6241335868835449, "learning_rate": 0.00025509702651448486, "loss": 3.0201, "step": 33535 }, { "epoch": 1.64, "grad_norm": 0.6447053551673889, "learning_rate": 0.00025508180382112847, "loss": 2.9881, "step": 33536 }, { "epoch": 1.64, "grad_norm": 0.600275456905365, "learning_rate": 0.000255066581246078, "loss": 3.0984, "step": 33537 }, { "epoch": 1.64, "grad_norm": 0.5895094275474548, "learning_rate": 0.00025505135878937344, "loss": 3.1595, "step": 33538 }, { "epoch": 1.64, "grad_norm": 0.6206028461456299, "learning_rate": 0.000255036136451055, "loss": 3.1758, "step": 33539 }, { "epoch": 1.64, "grad_norm": 0.6393868923187256, "learning_rate": 0.00025502091423116273, "loss": 2.9527, "step": 33540 }, { "epoch": 1.64, "grad_norm": 0.672612190246582, "learning_rate": 0.0002550056921297366, "loss": 2.9975, "step": 33541 }, { "epoch": 1.64, "grad_norm": 0.6368157863616943, "learning_rate": 0.00025499047014681695, "loss": 3.0863, "step": 33542 }, { "epoch": 1.64, "grad_norm": 0.621232271194458, "learning_rate": 0.00025497524828244365, "loss": 3.052, "step": 33543 }, { "epoch": 1.64, "grad_norm": 0.6463088989257812, "learning_rate": 0.00025496002653665697, "loss": 2.6657, "step": 33544 }, { "epoch": 1.64, "grad_norm": 0.6010912656784058, "learning_rate": 0.00025494480490949686, "loss": 2.9976, "step": 33545 }, { "epoch": 1.64, "grad_norm": 0.6122451424598694, "learning_rate": 0.00025492958340100344, "loss": 2.9951, "step": 33546 }, { "epoch": 1.64, "grad_norm": 0.6520957350730896, "learning_rate": 0.00025491436201121687, "loss": 2.9447, "step": 33547 }, { "epoch": 1.64, "grad_norm": 0.6786126494407654, "learning_rate": 0.00025489914074017716, "loss": 3.0369, "step": 33548 }, { "epoch": 1.64, "grad_norm": 0.5904258489608765, "learning_rate": 0.0002548839195879245, "loss": 2.9875, "step": 33549 }, { "epoch": 1.64, "grad_norm": 0.6132463216781616, "learning_rate": 0.0002548686985544988, "loss": 2.8802, "step": 33550 }, { "epoch": 1.64, "grad_norm": 0.6229579448699951, "learning_rate": 0.00025485347763994033, "loss": 3.1264, "step": 33551 }, { "epoch": 1.64, "grad_norm": 0.5817505717277527, "learning_rate": 0.0002548382568442892, "loss": 2.837, "step": 33552 }, { "epoch": 1.64, "grad_norm": 0.6309376955032349, "learning_rate": 0.00025482303616758525, "loss": 2.9213, "step": 33553 }, { "epoch": 1.64, "grad_norm": 0.6208295822143555, "learning_rate": 0.00025480781560986883, "loss": 2.9553, "step": 33554 }, { "epoch": 1.64, "grad_norm": 0.6129673719406128, "learning_rate": 0.0002547925951711799, "loss": 2.7496, "step": 33555 }, { "epoch": 1.64, "grad_norm": 0.6032195091247559, "learning_rate": 0.0002547773748515585, "loss": 2.9992, "step": 33556 }, { "epoch": 1.64, "grad_norm": 0.6073378920555115, "learning_rate": 0.0002547621546510449, "loss": 2.8163, "step": 33557 }, { "epoch": 1.64, "grad_norm": 0.6146911382675171, "learning_rate": 0.00025474693456967905, "loss": 2.9754, "step": 33558 }, { "epoch": 1.64, "grad_norm": 0.6286951899528503, "learning_rate": 0.0002547317146075011, "loss": 2.9883, "step": 33559 }, { "epoch": 1.64, "grad_norm": 0.6064934134483337, "learning_rate": 0.000254716494764551, "loss": 3.0692, "step": 33560 }, { "epoch": 1.64, "grad_norm": 0.6015488505363464, "learning_rate": 0.0002547012750408689, "loss": 3.0608, "step": 33561 }, { "epoch": 1.64, "grad_norm": 0.6412321925163269, "learning_rate": 0.0002546860554364951, "loss": 3.3523, "step": 33562 }, { "epoch": 1.64, "grad_norm": 0.6053003072738647, "learning_rate": 0.0002546708359514694, "loss": 2.8756, "step": 33563 }, { "epoch": 1.64, "grad_norm": 0.6287083029747009, "learning_rate": 0.00025465561658583204, "loss": 3.047, "step": 33564 }, { "epoch": 1.64, "grad_norm": 0.6067085266113281, "learning_rate": 0.000254640397339623, "loss": 3.1418, "step": 33565 }, { "epoch": 1.64, "grad_norm": 0.6111128330230713, "learning_rate": 0.00025462517821288246, "loss": 3.0786, "step": 33566 }, { "epoch": 1.65, "grad_norm": 0.6282280087471008, "learning_rate": 0.00025460995920565055, "loss": 3.0406, "step": 33567 }, { "epoch": 1.65, "grad_norm": 0.6765866875648499, "learning_rate": 0.0002545947403179671, "loss": 3.1285, "step": 33568 }, { "epoch": 1.65, "grad_norm": 0.6518777012825012, "learning_rate": 0.0002545795215498725, "loss": 3.0133, "step": 33569 }, { "epoch": 1.65, "grad_norm": 0.6171415448188782, "learning_rate": 0.0002545643029014067, "loss": 3.1624, "step": 33570 }, { "epoch": 1.65, "grad_norm": 0.6353912949562073, "learning_rate": 0.0002545490843726098, "loss": 2.9545, "step": 33571 }, { "epoch": 1.65, "grad_norm": 0.6414846181869507, "learning_rate": 0.0002545338659635217, "loss": 3.1739, "step": 33572 }, { "epoch": 1.65, "grad_norm": 0.6102538704872131, "learning_rate": 0.00025451864767418275, "loss": 3.1158, "step": 33573 }, { "epoch": 1.65, "grad_norm": 0.6371604204177856, "learning_rate": 0.00025450342950463303, "loss": 3.0714, "step": 33574 }, { "epoch": 1.65, "grad_norm": 0.5931726694107056, "learning_rate": 0.0002544882114549123, "loss": 3.1613, "step": 33575 }, { "epoch": 1.65, "grad_norm": 0.614202618598938, "learning_rate": 0.00025447299352506105, "loss": 2.8824, "step": 33576 }, { "epoch": 1.65, "grad_norm": 0.6412155628204346, "learning_rate": 0.0002544577757151191, "loss": 2.6015, "step": 33577 }, { "epoch": 1.65, "grad_norm": 0.6213472485542297, "learning_rate": 0.0002544425580251265, "loss": 3.0039, "step": 33578 }, { "epoch": 1.65, "grad_norm": 0.6479688286781311, "learning_rate": 0.0002544273404551236, "loss": 2.9294, "step": 33579 }, { "epoch": 1.65, "grad_norm": 0.6218825578689575, "learning_rate": 0.00025441212300515026, "loss": 3.1985, "step": 33580 }, { "epoch": 1.65, "grad_norm": 0.6288421750068665, "learning_rate": 0.0002543969056752467, "loss": 2.974, "step": 33581 }, { "epoch": 1.65, "grad_norm": 0.6092920303344727, "learning_rate": 0.00025438168846545275, "loss": 3.1613, "step": 33582 }, { "epoch": 1.65, "grad_norm": 0.6967260241508484, "learning_rate": 0.0002543664713758087, "loss": 3.1719, "step": 33583 }, { "epoch": 1.65, "grad_norm": 0.6147881150245667, "learning_rate": 0.00025435125440635464, "loss": 2.9597, "step": 33584 }, { "epoch": 1.65, "grad_norm": 0.6140986680984497, "learning_rate": 0.00025433603755713055, "loss": 2.8277, "step": 33585 }, { "epoch": 1.65, "grad_norm": 0.6095908880233765, "learning_rate": 0.00025432082082817663, "loss": 2.9332, "step": 33586 }, { "epoch": 1.65, "grad_norm": 0.6159641146659851, "learning_rate": 0.0002543056042195327, "loss": 2.96, "step": 33587 }, { "epoch": 1.65, "grad_norm": 0.5917350053787231, "learning_rate": 0.00025429038773123916, "loss": 2.9545, "step": 33588 }, { "epoch": 1.65, "grad_norm": 0.6371214985847473, "learning_rate": 0.000254275171363336, "loss": 2.9997, "step": 33589 }, { "epoch": 1.65, "grad_norm": 0.609025239944458, "learning_rate": 0.00025425995511586303, "loss": 3.2217, "step": 33590 }, { "epoch": 1.65, "grad_norm": 0.6314291954040527, "learning_rate": 0.0002542447389888607, "loss": 3.135, "step": 33591 }, { "epoch": 1.65, "grad_norm": 0.6252784729003906, "learning_rate": 0.00025422952298236886, "loss": 3.072, "step": 33592 }, { "epoch": 1.65, "grad_norm": 0.5745939612388611, "learning_rate": 0.0002542143070964276, "loss": 2.9007, "step": 33593 }, { "epoch": 1.65, "grad_norm": 0.6394028663635254, "learning_rate": 0.0002541990913310772, "loss": 3.1977, "step": 33594 }, { "epoch": 1.65, "grad_norm": 0.5794762969017029, "learning_rate": 0.00025418387568635747, "loss": 3.063, "step": 33595 }, { "epoch": 1.65, "grad_norm": 0.6263272762298584, "learning_rate": 0.00025416866016230865, "loss": 2.9177, "step": 33596 }, { "epoch": 1.65, "grad_norm": 0.6222317218780518, "learning_rate": 0.0002541534447589706, "loss": 2.9814, "step": 33597 }, { "epoch": 1.65, "grad_norm": 0.6260561347007751, "learning_rate": 0.0002541382294763837, "loss": 3.0318, "step": 33598 }, { "epoch": 1.65, "grad_norm": 0.6080615520477295, "learning_rate": 0.00025412301431458785, "loss": 3.0894, "step": 33599 }, { "epoch": 1.65, "grad_norm": 0.617087721824646, "learning_rate": 0.0002541077992736231, "loss": 2.9801, "step": 33600 }, { "epoch": 1.65, "grad_norm": 0.6065132021903992, "learning_rate": 0.0002540925843535297, "loss": 3.0377, "step": 33601 }, { "epoch": 1.65, "grad_norm": 0.5966345071792603, "learning_rate": 0.0002540773695543475, "loss": 2.8676, "step": 33602 }, { "epoch": 1.65, "grad_norm": 0.5771703720092773, "learning_rate": 0.0002540621548761166, "loss": 2.9713, "step": 33603 }, { "epoch": 1.65, "grad_norm": 0.6102816462516785, "learning_rate": 0.0002540469403188773, "loss": 3.0297, "step": 33604 }, { "epoch": 1.65, "grad_norm": 0.5968355536460876, "learning_rate": 0.00025403172588266934, "loss": 2.9866, "step": 33605 }, { "epoch": 1.65, "grad_norm": 0.6273283958435059, "learning_rate": 0.00025401651156753316, "loss": 3.0427, "step": 33606 }, { "epoch": 1.65, "grad_norm": 0.6076398491859436, "learning_rate": 0.00025400129737350853, "loss": 2.7761, "step": 33607 }, { "epoch": 1.65, "grad_norm": 0.6098259091377258, "learning_rate": 0.0002539860833006356, "loss": 3.0196, "step": 33608 }, { "epoch": 1.65, "grad_norm": 0.5994309186935425, "learning_rate": 0.0002539708693489546, "loss": 3.0615, "step": 33609 }, { "epoch": 1.65, "grad_norm": 0.6185238361358643, "learning_rate": 0.00025395565551850535, "loss": 2.9607, "step": 33610 }, { "epoch": 1.65, "grad_norm": 0.6246052384376526, "learning_rate": 0.0002539404418093282, "loss": 2.9347, "step": 33611 }, { "epoch": 1.65, "grad_norm": 0.616183876991272, "learning_rate": 0.00025392522822146284, "loss": 2.8803, "step": 33612 }, { "epoch": 1.65, "grad_norm": 0.5878469944000244, "learning_rate": 0.00025391001475494964, "loss": 3.1297, "step": 33613 }, { "epoch": 1.65, "grad_norm": 0.6638484597206116, "learning_rate": 0.0002538948014098287, "loss": 2.9255, "step": 33614 }, { "epoch": 1.65, "grad_norm": 0.5929738879203796, "learning_rate": 0.00025387958818613986, "loss": 3.0306, "step": 33615 }, { "epoch": 1.65, "grad_norm": 0.6322368383407593, "learning_rate": 0.00025386437508392336, "loss": 3.0476, "step": 33616 }, { "epoch": 1.65, "grad_norm": 0.6140685081481934, "learning_rate": 0.00025384916210321925, "loss": 3.4163, "step": 33617 }, { "epoch": 1.65, "grad_norm": 0.685403048992157, "learning_rate": 0.00025383394924406743, "loss": 2.907, "step": 33618 }, { "epoch": 1.65, "grad_norm": 0.6040143966674805, "learning_rate": 0.00025381873650650823, "loss": 3.2015, "step": 33619 }, { "epoch": 1.65, "grad_norm": 0.6561526656150818, "learning_rate": 0.00025380352389058156, "loss": 2.9761, "step": 33620 }, { "epoch": 1.65, "grad_norm": 0.5854026675224304, "learning_rate": 0.0002537883113963276, "loss": 3.077, "step": 33621 }, { "epoch": 1.65, "grad_norm": 0.5917167067527771, "learning_rate": 0.0002537730990237862, "loss": 2.972, "step": 33622 }, { "epoch": 1.65, "grad_norm": 0.6419681906700134, "learning_rate": 0.0002537578867729975, "loss": 3.0175, "step": 33623 }, { "epoch": 1.65, "grad_norm": 0.6021273732185364, "learning_rate": 0.0002537426746440018, "loss": 3.1386, "step": 33624 }, { "epoch": 1.65, "grad_norm": 0.6152304410934448, "learning_rate": 0.0002537274626368389, "loss": 2.7386, "step": 33625 }, { "epoch": 1.65, "grad_norm": 0.6640774011611938, "learning_rate": 0.00025371225075154904, "loss": 2.9014, "step": 33626 }, { "epoch": 1.65, "grad_norm": 0.6067616939544678, "learning_rate": 0.00025369703898817206, "loss": 2.9653, "step": 33627 }, { "epoch": 1.65, "grad_norm": 0.5870032906532288, "learning_rate": 0.00025368182734674825, "loss": 3.0218, "step": 33628 }, { "epoch": 1.65, "grad_norm": 0.5886638164520264, "learning_rate": 0.0002536666158273177, "loss": 3.0415, "step": 33629 }, { "epoch": 1.65, "grad_norm": 0.640203595161438, "learning_rate": 0.0002536514044299202, "loss": 3.0191, "step": 33630 }, { "epoch": 1.65, "grad_norm": 0.6461527943611145, "learning_rate": 0.00025363619315459606, "loss": 2.9528, "step": 33631 }, { "epoch": 1.65, "grad_norm": 0.6292715668678284, "learning_rate": 0.00025362098200138524, "loss": 2.9719, "step": 33632 }, { "epoch": 1.65, "grad_norm": 0.6264772415161133, "learning_rate": 0.0002536057709703278, "loss": 3.0541, "step": 33633 }, { "epoch": 1.65, "grad_norm": 0.6099303960800171, "learning_rate": 0.00025359056006146394, "loss": 2.9152, "step": 33634 }, { "epoch": 1.65, "grad_norm": 0.6216869950294495, "learning_rate": 0.0002535753492748335, "loss": 3.1676, "step": 33635 }, { "epoch": 1.65, "grad_norm": 0.6083691716194153, "learning_rate": 0.0002535601386104768, "loss": 3.0394, "step": 33636 }, { "epoch": 1.65, "grad_norm": 0.5741828083992004, "learning_rate": 0.00025354492806843354, "loss": 3.0786, "step": 33637 }, { "epoch": 1.65, "grad_norm": 0.634376049041748, "learning_rate": 0.00025352971764874415, "loss": 3.148, "step": 33638 }, { "epoch": 1.65, "grad_norm": 0.6192229390144348, "learning_rate": 0.00025351450735144854, "loss": 3.0148, "step": 33639 }, { "epoch": 1.65, "grad_norm": 0.6180740594863892, "learning_rate": 0.0002534992971765867, "loss": 3.1087, "step": 33640 }, { "epoch": 1.65, "grad_norm": 0.5931870937347412, "learning_rate": 0.0002534840871241988, "loss": 3.1503, "step": 33641 }, { "epoch": 1.65, "grad_norm": 0.5776154398918152, "learning_rate": 0.00025346887719432485, "loss": 3.0434, "step": 33642 }, { "epoch": 1.65, "grad_norm": 0.61803138256073, "learning_rate": 0.0002534536673870049, "loss": 3.0197, "step": 33643 }, { "epoch": 1.65, "grad_norm": 0.6241032481193542, "learning_rate": 0.0002534384577022791, "loss": 3.0501, "step": 33644 }, { "epoch": 1.65, "grad_norm": 0.6268068552017212, "learning_rate": 0.00025342324814018734, "loss": 2.8237, "step": 33645 }, { "epoch": 1.65, "grad_norm": 0.5747461318969727, "learning_rate": 0.0002534080387007699, "loss": 3.3158, "step": 33646 }, { "epoch": 1.65, "grad_norm": 0.6452773213386536, "learning_rate": 0.0002533928293840667, "loss": 3.1601, "step": 33647 }, { "epoch": 1.65, "grad_norm": 0.6670741438865662, "learning_rate": 0.0002533776201901177, "loss": 3.0166, "step": 33648 }, { "epoch": 1.65, "grad_norm": 0.585372269153595, "learning_rate": 0.0002533624111189632, "loss": 3.0088, "step": 33649 }, { "epoch": 1.65, "grad_norm": 0.5669026970863342, "learning_rate": 0.0002533472021706431, "loss": 2.8939, "step": 33650 }, { "epoch": 1.65, "grad_norm": 0.6074293851852417, "learning_rate": 0.0002533319933451975, "loss": 3.2963, "step": 33651 }, { "epoch": 1.65, "grad_norm": 0.5969696640968323, "learning_rate": 0.0002533167846426664, "loss": 2.9618, "step": 33652 }, { "epoch": 1.65, "grad_norm": 0.6585623621940613, "learning_rate": 0.00025330157606309, "loss": 2.9734, "step": 33653 }, { "epoch": 1.65, "grad_norm": 0.5954491496086121, "learning_rate": 0.0002532863676065082, "loss": 2.9167, "step": 33654 }, { "epoch": 1.65, "grad_norm": 0.6330291628837585, "learning_rate": 0.00025327115927296104, "loss": 3.005, "step": 33655 }, { "epoch": 1.65, "grad_norm": 0.6088626980781555, "learning_rate": 0.0002532559510624888, "loss": 3.2036, "step": 33656 }, { "epoch": 1.65, "grad_norm": 0.6002334356307983, "learning_rate": 0.0002532407429751313, "loss": 3.0143, "step": 33657 }, { "epoch": 1.65, "grad_norm": 0.6352154612541199, "learning_rate": 0.0002532255350109287, "loss": 3.0687, "step": 33658 }, { "epoch": 1.65, "grad_norm": 0.5860254764556885, "learning_rate": 0.000253210327169921, "loss": 3.0877, "step": 33659 }, { "epoch": 1.65, "grad_norm": 0.6036262512207031, "learning_rate": 0.0002531951194521483, "loss": 2.8906, "step": 33660 }, { "epoch": 1.65, "grad_norm": 0.5720845460891724, "learning_rate": 0.00025317991185765077, "loss": 2.8707, "step": 33661 }, { "epoch": 1.65, "grad_norm": 0.5837738513946533, "learning_rate": 0.0002531647043864682, "loss": 3.0003, "step": 33662 }, { "epoch": 1.65, "grad_norm": 0.5987973213195801, "learning_rate": 0.00025314949703864086, "loss": 3.1969, "step": 33663 }, { "epoch": 1.65, "grad_norm": 0.6149620413780212, "learning_rate": 0.00025313428981420873, "loss": 2.9412, "step": 33664 }, { "epoch": 1.65, "grad_norm": 0.6203813552856445, "learning_rate": 0.00025311908271321177, "loss": 3.0063, "step": 33665 }, { "epoch": 1.65, "grad_norm": 0.6189368367195129, "learning_rate": 0.00025310387573569026, "loss": 3.0654, "step": 33666 }, { "epoch": 1.65, "grad_norm": 0.6118205189704895, "learning_rate": 0.000253088668881684, "loss": 2.9288, "step": 33667 }, { "epoch": 1.65, "grad_norm": 0.6222481727600098, "learning_rate": 0.0002530734621512332, "loss": 3.0051, "step": 33668 }, { "epoch": 1.65, "grad_norm": 0.631308913230896, "learning_rate": 0.0002530582555443779, "loss": 3.1259, "step": 33669 }, { "epoch": 1.65, "grad_norm": 0.6014788150787354, "learning_rate": 0.000253043049061158, "loss": 2.9009, "step": 33670 }, { "epoch": 1.65, "grad_norm": 0.5971441268920898, "learning_rate": 0.0002530278427016138, "loss": 3.2945, "step": 33671 }, { "epoch": 1.65, "grad_norm": 0.606205940246582, "learning_rate": 0.00025301263646578524, "loss": 2.8732, "step": 33672 }, { "epoch": 1.65, "grad_norm": 0.5801162123680115, "learning_rate": 0.00025299743035371233, "loss": 2.9203, "step": 33673 }, { "epoch": 1.65, "grad_norm": 0.6619035005569458, "learning_rate": 0.0002529822243654351, "loss": 2.8384, "step": 33674 }, { "epoch": 1.65, "grad_norm": 0.614162027835846, "learning_rate": 0.0002529670185009936, "loss": 2.9881, "step": 33675 }, { "epoch": 1.65, "grad_norm": 0.6145156621932983, "learning_rate": 0.0002529518127604281, "loss": 3.2938, "step": 33676 }, { "epoch": 1.65, "grad_norm": 0.6185023784637451, "learning_rate": 0.0002529366071437782, "loss": 2.9915, "step": 33677 }, { "epoch": 1.65, "grad_norm": 0.640781581401825, "learning_rate": 0.0002529214016510845, "loss": 2.879, "step": 33678 }, { "epoch": 1.65, "grad_norm": 0.616585910320282, "learning_rate": 0.00025290619628238656, "loss": 2.9472, "step": 33679 }, { "epoch": 1.65, "grad_norm": 0.6268201470375061, "learning_rate": 0.0002528909910377247, "loss": 3.0141, "step": 33680 }, { "epoch": 1.65, "grad_norm": 0.6437908411026001, "learning_rate": 0.00025287578591713894, "loss": 2.9349, "step": 33681 }, { "epoch": 1.65, "grad_norm": 0.6435546875, "learning_rate": 0.00025286058092066925, "loss": 2.9744, "step": 33682 }, { "epoch": 1.65, "grad_norm": 0.6060557961463928, "learning_rate": 0.0002528453760483558, "loss": 3.0914, "step": 33683 }, { "epoch": 1.65, "grad_norm": 0.5863337516784668, "learning_rate": 0.00025283017130023843, "loss": 3.1313, "step": 33684 }, { "epoch": 1.65, "grad_norm": 0.6209391951560974, "learning_rate": 0.0002528149666763573, "loss": 3.1179, "step": 33685 }, { "epoch": 1.65, "grad_norm": 0.6263371706008911, "learning_rate": 0.0002527997621767526, "loss": 3.0119, "step": 33686 }, { "epoch": 1.65, "grad_norm": 0.6627563238143921, "learning_rate": 0.0002527845578014642, "loss": 2.9974, "step": 33687 }, { "epoch": 1.65, "grad_norm": 0.7270352244377136, "learning_rate": 0.0002527693535505322, "loss": 3.0584, "step": 33688 }, { "epoch": 1.65, "grad_norm": 0.5933760404586792, "learning_rate": 0.0002527541494239965, "loss": 2.9056, "step": 33689 }, { "epoch": 1.65, "grad_norm": 0.5804953575134277, "learning_rate": 0.00025273894542189736, "loss": 3.1185, "step": 33690 }, { "epoch": 1.65, "grad_norm": 0.6337752938270569, "learning_rate": 0.0002527237415442748, "loss": 2.8029, "step": 33691 }, { "epoch": 1.65, "grad_norm": 0.5905575156211853, "learning_rate": 0.00025270853779116873, "loss": 2.9804, "step": 33692 }, { "epoch": 1.65, "grad_norm": 0.8519971370697021, "learning_rate": 0.00025269333416261936, "loss": 3.0931, "step": 33693 }, { "epoch": 1.65, "grad_norm": 0.5886369347572327, "learning_rate": 0.00025267813065866655, "loss": 3.0487, "step": 33694 }, { "epoch": 1.65, "grad_norm": 0.5819816589355469, "learning_rate": 0.00025266292727935043, "loss": 2.9745, "step": 33695 }, { "epoch": 1.65, "grad_norm": 0.5780503749847412, "learning_rate": 0.00025264772402471117, "loss": 3.2257, "step": 33696 }, { "epoch": 1.65, "grad_norm": 0.6502557396888733, "learning_rate": 0.0002526325208947886, "loss": 3.067, "step": 33697 }, { "epoch": 1.65, "grad_norm": 0.6687465310096741, "learning_rate": 0.00025261731788962295, "loss": 3.1206, "step": 33698 }, { "epoch": 1.65, "grad_norm": 0.6016365885734558, "learning_rate": 0.000252602115009254, "loss": 3.0287, "step": 33699 }, { "epoch": 1.65, "grad_norm": 0.6240675449371338, "learning_rate": 0.000252586912253722, "loss": 2.9064, "step": 33700 }, { "epoch": 1.65, "grad_norm": 0.582499086856842, "learning_rate": 0.00025257170962306706, "loss": 3.2111, "step": 33701 }, { "epoch": 1.65, "grad_norm": 0.6195961236953735, "learning_rate": 0.00025255650711732895, "loss": 3.008, "step": 33702 }, { "epoch": 1.65, "grad_norm": 0.5937759280204773, "learning_rate": 0.00025254130473654807, "loss": 2.8966, "step": 33703 }, { "epoch": 1.65, "grad_norm": 0.6716198921203613, "learning_rate": 0.00025252610248076415, "loss": 2.8721, "step": 33704 }, { "epoch": 1.65, "grad_norm": 0.6009177565574646, "learning_rate": 0.0002525109003500172, "loss": 3.0408, "step": 33705 }, { "epoch": 1.65, "grad_norm": 0.6476081013679504, "learning_rate": 0.0002524956983443476, "loss": 2.9311, "step": 33706 }, { "epoch": 1.65, "grad_norm": 0.6230241060256958, "learning_rate": 0.00025248049646379506, "loss": 2.6999, "step": 33707 }, { "epoch": 1.65, "grad_norm": 0.6477628946304321, "learning_rate": 0.0002524652947083999, "loss": 3.2673, "step": 33708 }, { "epoch": 1.65, "grad_norm": 0.6434798240661621, "learning_rate": 0.0002524500930782019, "loss": 2.8493, "step": 33709 }, { "epoch": 1.65, "grad_norm": 0.6150158047676086, "learning_rate": 0.00025243489157324113, "loss": 2.9382, "step": 33710 }, { "epoch": 1.65, "grad_norm": 0.6013590693473816, "learning_rate": 0.0002524196901935579, "loss": 2.9773, "step": 33711 }, { "epoch": 1.65, "grad_norm": 0.6192983388900757, "learning_rate": 0.00025240448893919194, "loss": 3.2487, "step": 33712 }, { "epoch": 1.65, "grad_norm": 0.6279726624488831, "learning_rate": 0.00025238928781018345, "loss": 2.9022, "step": 33713 }, { "epoch": 1.65, "grad_norm": 0.6031069755554199, "learning_rate": 0.00025237408680657227, "loss": 3.0851, "step": 33714 }, { "epoch": 1.65, "grad_norm": 0.6207053065299988, "learning_rate": 0.0002523588859283987, "loss": 3.0118, "step": 33715 }, { "epoch": 1.65, "grad_norm": 0.5942187309265137, "learning_rate": 0.00025234368517570267, "loss": 3.1582, "step": 33716 }, { "epoch": 1.65, "grad_norm": 0.6146659851074219, "learning_rate": 0.0002523284845485241, "loss": 3.0741, "step": 33717 }, { "epoch": 1.65, "grad_norm": 0.6310451030731201, "learning_rate": 0.00025231328404690323, "loss": 3.027, "step": 33718 }, { "epoch": 1.65, "grad_norm": 0.6505426168441772, "learning_rate": 0.00025229808367087993, "loss": 2.864, "step": 33719 }, { "epoch": 1.65, "grad_norm": 0.6088349223136902, "learning_rate": 0.00025228288342049425, "loss": 2.9772, "step": 33720 }, { "epoch": 1.65, "grad_norm": 0.6298375725746155, "learning_rate": 0.0002522676832957864, "loss": 3.1535, "step": 33721 }, { "epoch": 1.65, "grad_norm": 0.6122135519981384, "learning_rate": 0.00025225248329679626, "loss": 3.1195, "step": 33722 }, { "epoch": 1.65, "grad_norm": 0.5897640585899353, "learning_rate": 0.00025223728342356385, "loss": 3.0419, "step": 33723 }, { "epoch": 1.65, "grad_norm": 0.6169995069503784, "learning_rate": 0.0002522220836761292, "loss": 3.0293, "step": 33724 }, { "epoch": 1.65, "grad_norm": 0.6491032838821411, "learning_rate": 0.00025220688405453237, "loss": 2.9347, "step": 33725 }, { "epoch": 1.65, "grad_norm": 0.5842862725257874, "learning_rate": 0.0002521916845588135, "loss": 3.1327, "step": 33726 }, { "epoch": 1.65, "grad_norm": 0.6430948376655579, "learning_rate": 0.0002521764851890125, "loss": 2.9445, "step": 33727 }, { "epoch": 1.65, "grad_norm": 0.6189959645271301, "learning_rate": 0.00025216128594516955, "loss": 3.0916, "step": 33728 }, { "epoch": 1.65, "grad_norm": 0.608040452003479, "learning_rate": 0.0002521460868273244, "loss": 3.0271, "step": 33729 }, { "epoch": 1.65, "grad_norm": 0.6393671035766602, "learning_rate": 0.00025213088783551733, "loss": 2.8933, "step": 33730 }, { "epoch": 1.65, "grad_norm": 0.6282805800437927, "learning_rate": 0.00025211568896978825, "loss": 2.9764, "step": 33731 }, { "epoch": 1.65, "grad_norm": 0.5948448181152344, "learning_rate": 0.0002521004902301772, "loss": 2.9167, "step": 33732 }, { "epoch": 1.65, "grad_norm": 0.6147938370704651, "learning_rate": 0.0002520852916167243, "loss": 3.0297, "step": 33733 }, { "epoch": 1.65, "grad_norm": 0.6409828662872314, "learning_rate": 0.0002520700931294695, "loss": 2.8486, "step": 33734 }, { "epoch": 1.65, "grad_norm": 0.718755841255188, "learning_rate": 0.0002520548947684529, "loss": 2.9996, "step": 33735 }, { "epoch": 1.65, "grad_norm": 0.6078330874443054, "learning_rate": 0.00025203969653371435, "loss": 2.9222, "step": 33736 }, { "epoch": 1.65, "grad_norm": 0.6095114350318909, "learning_rate": 0.0002520244984252941, "loss": 2.8218, "step": 33737 }, { "epoch": 1.65, "grad_norm": 0.670408308506012, "learning_rate": 0.0002520093004432322, "loss": 2.9352, "step": 33738 }, { "epoch": 1.65, "grad_norm": 0.6077500581741333, "learning_rate": 0.0002519941025875683, "loss": 3.225, "step": 33739 }, { "epoch": 1.65, "grad_norm": 0.6481585502624512, "learning_rate": 0.0002519789048583429, "loss": 2.9603, "step": 33740 }, { "epoch": 1.65, "grad_norm": 0.5873180627822876, "learning_rate": 0.0002519637072555958, "loss": 3.0317, "step": 33741 }, { "epoch": 1.65, "grad_norm": 0.6104382872581482, "learning_rate": 0.00025194850977936693, "loss": 3.0169, "step": 33742 }, { "epoch": 1.65, "grad_norm": 0.5977993607521057, "learning_rate": 0.00025193331242969656, "loss": 3.1041, "step": 33743 }, { "epoch": 1.65, "grad_norm": 0.6217960119247437, "learning_rate": 0.0002519181152066246, "loss": 2.8854, "step": 33744 }, { "epoch": 1.65, "grad_norm": 0.6322939991950989, "learning_rate": 0.00025190291811019104, "loss": 2.9709, "step": 33745 }, { "epoch": 1.65, "grad_norm": 0.5825021862983704, "learning_rate": 0.00025188772114043584, "loss": 3.1036, "step": 33746 }, { "epoch": 1.65, "grad_norm": 0.6149255633354187, "learning_rate": 0.00025187252429739916, "loss": 3.0629, "step": 33747 }, { "epoch": 1.65, "grad_norm": 0.5964076519012451, "learning_rate": 0.0002518573275811211, "loss": 2.949, "step": 33748 }, { "epoch": 1.65, "grad_norm": 0.5930870175361633, "learning_rate": 0.0002518421309916415, "loss": 3.2019, "step": 33749 }, { "epoch": 1.65, "grad_norm": 0.6285166144371033, "learning_rate": 0.0002518269345290005, "loss": 3.1833, "step": 33750 }, { "epoch": 1.65, "grad_norm": 0.5890181064605713, "learning_rate": 0.000251811738193238, "loss": 3.284, "step": 33751 }, { "epoch": 1.65, "grad_norm": 0.5849533677101135, "learning_rate": 0.00025179654198439415, "loss": 3.0216, "step": 33752 }, { "epoch": 1.65, "grad_norm": 0.6618516445159912, "learning_rate": 0.000251781345902509, "loss": 3.1006, "step": 33753 }, { "epoch": 1.65, "grad_norm": 0.6029493808746338, "learning_rate": 0.0002517661499476224, "loss": 2.8438, "step": 33754 }, { "epoch": 1.65, "grad_norm": 0.628272294998169, "learning_rate": 0.00025175095411977453, "loss": 3.0567, "step": 33755 }, { "epoch": 1.65, "grad_norm": 0.6236722469329834, "learning_rate": 0.0002517357584190054, "loss": 3.056, "step": 33756 }, { "epoch": 1.65, "grad_norm": 0.5944138169288635, "learning_rate": 0.00025172056284535483, "loss": 3.1059, "step": 33757 }, { "epoch": 1.65, "grad_norm": 0.5836665034294128, "learning_rate": 0.0002517053673988632, "loss": 3.1147, "step": 33758 }, { "epoch": 1.65, "grad_norm": 0.6636527180671692, "learning_rate": 0.00025169017207957026, "loss": 2.7778, "step": 33759 }, { "epoch": 1.65, "grad_norm": 0.6323641538619995, "learning_rate": 0.00025167497688751615, "loss": 3.0488, "step": 33760 }, { "epoch": 1.65, "grad_norm": 0.6302624940872192, "learning_rate": 0.00025165978182274077, "loss": 3.1198, "step": 33761 }, { "epoch": 1.65, "grad_norm": 0.6282402873039246, "learning_rate": 0.0002516445868852842, "loss": 3.0372, "step": 33762 }, { "epoch": 1.65, "grad_norm": 0.5997104048728943, "learning_rate": 0.0002516293920751867, "loss": 2.7741, "step": 33763 }, { "epoch": 1.65, "grad_norm": 0.6254650950431824, "learning_rate": 0.0002516141973924878, "loss": 3.0331, "step": 33764 }, { "epoch": 1.65, "grad_norm": 0.6326666474342346, "learning_rate": 0.000251599002837228, "loss": 2.9852, "step": 33765 }, { "epoch": 1.65, "grad_norm": 0.5990265011787415, "learning_rate": 0.00025158380840944696, "loss": 2.9702, "step": 33766 }, { "epoch": 1.65, "grad_norm": 0.6204058527946472, "learning_rate": 0.00025156861410918487, "loss": 3.0526, "step": 33767 }, { "epoch": 1.65, "grad_norm": 0.747573971748352, "learning_rate": 0.0002515534199364818, "loss": 2.9046, "step": 33768 }, { "epoch": 1.65, "grad_norm": 0.6106170415878296, "learning_rate": 0.00025153822589137764, "loss": 3.1094, "step": 33769 }, { "epoch": 1.65, "grad_norm": 0.6426050066947937, "learning_rate": 0.0002515230319739126, "loss": 2.9108, "step": 33770 }, { "epoch": 1.66, "grad_norm": 0.6021739840507507, "learning_rate": 0.0002515078381841264, "loss": 3.1094, "step": 33771 }, { "epoch": 1.66, "grad_norm": 0.5746902823448181, "learning_rate": 0.00025149264452205923, "loss": 3.0454, "step": 33772 }, { "epoch": 1.66, "grad_norm": 0.5948019027709961, "learning_rate": 0.00025147745098775116, "loss": 2.9203, "step": 33773 }, { "epoch": 1.66, "grad_norm": 0.6490957736968994, "learning_rate": 0.0002514622575812422, "loss": 2.8963, "step": 33774 }, { "epoch": 1.66, "grad_norm": 0.6122204065322876, "learning_rate": 0.0002514470643025723, "loss": 2.8563, "step": 33775 }, { "epoch": 1.66, "grad_norm": 0.5842893123626709, "learning_rate": 0.00025143187115178134, "loss": 2.8726, "step": 33776 }, { "epoch": 1.66, "grad_norm": 0.62315434217453, "learning_rate": 0.0002514166781289096, "loss": 2.9784, "step": 33777 }, { "epoch": 1.66, "grad_norm": 0.6309679746627808, "learning_rate": 0.00025140148523399697, "loss": 3.1414, "step": 33778 }, { "epoch": 1.66, "grad_norm": 0.6170636415481567, "learning_rate": 0.0002513862924670834, "loss": 3.0576, "step": 33779 }, { "epoch": 1.66, "grad_norm": 0.6519901156425476, "learning_rate": 0.00025137109982820914, "loss": 3.0581, "step": 33780 }, { "epoch": 1.66, "grad_norm": 0.6126569509506226, "learning_rate": 0.00025135590731741394, "loss": 3.0446, "step": 33781 }, { "epoch": 1.66, "grad_norm": 0.5987186431884766, "learning_rate": 0.00025134071493473784, "loss": 2.9068, "step": 33782 }, { "epoch": 1.66, "grad_norm": 0.6122428178787231, "learning_rate": 0.0002513255226802211, "loss": 2.9485, "step": 33783 }, { "epoch": 1.66, "grad_norm": 0.6258891224861145, "learning_rate": 0.00025131033055390345, "loss": 3.0306, "step": 33784 }, { "epoch": 1.66, "grad_norm": 0.6125542521476746, "learning_rate": 0.00025129513855582514, "loss": 3.1461, "step": 33785 }, { "epoch": 1.66, "grad_norm": 0.610899806022644, "learning_rate": 0.00025127994668602593, "loss": 2.9816, "step": 33786 }, { "epoch": 1.66, "grad_norm": 0.6190662384033203, "learning_rate": 0.0002512647549445459, "loss": 2.9831, "step": 33787 }, { "epoch": 1.66, "grad_norm": 0.6101117134094238, "learning_rate": 0.00025124956333142534, "loss": 3.1565, "step": 33788 }, { "epoch": 1.66, "grad_norm": 0.6318680047988892, "learning_rate": 0.0002512343718467039, "loss": 3.1456, "step": 33789 }, { "epoch": 1.66, "grad_norm": 0.6346719264984131, "learning_rate": 0.0002512191804904219, "loss": 3.0291, "step": 33790 }, { "epoch": 1.66, "grad_norm": 0.6252193450927734, "learning_rate": 0.00025120398926261895, "loss": 2.9251, "step": 33791 }, { "epoch": 1.66, "grad_norm": 0.6464138627052307, "learning_rate": 0.00025118879816333545, "loss": 3.0129, "step": 33792 }, { "epoch": 1.66, "grad_norm": 0.6083313226699829, "learning_rate": 0.0002511736071926113, "loss": 2.9965, "step": 33793 }, { "epoch": 1.66, "grad_norm": 0.5951755046844482, "learning_rate": 0.0002511584163504863, "loss": 2.9157, "step": 33794 }, { "epoch": 1.66, "grad_norm": 0.604433000087738, "learning_rate": 0.00025114322563700084, "loss": 3.1928, "step": 33795 }, { "epoch": 1.66, "grad_norm": 0.6072720289230347, "learning_rate": 0.0002511280350521946, "loss": 2.8213, "step": 33796 }, { "epoch": 1.66, "grad_norm": 0.611731231212616, "learning_rate": 0.0002511128445961077, "loss": 3.159, "step": 33797 }, { "epoch": 1.66, "grad_norm": 0.5745161175727844, "learning_rate": 0.00025109765426878025, "loss": 2.951, "step": 33798 }, { "epoch": 1.66, "grad_norm": 0.5982853770256042, "learning_rate": 0.0002510824640702521, "loss": 3.1052, "step": 33799 }, { "epoch": 1.66, "grad_norm": 0.6100951433181763, "learning_rate": 0.00025106727400056334, "loss": 3.0418, "step": 33800 }, { "epoch": 1.66, "grad_norm": 0.6522631645202637, "learning_rate": 0.00025105208405975393, "loss": 2.8211, "step": 33801 }, { "epoch": 1.66, "grad_norm": 0.6171745657920837, "learning_rate": 0.0002510368942478639, "loss": 2.8845, "step": 33802 }, { "epoch": 1.66, "grad_norm": 0.6560838222503662, "learning_rate": 0.0002510217045649334, "loss": 3.0927, "step": 33803 }, { "epoch": 1.66, "grad_norm": 0.6423225998878479, "learning_rate": 0.0002510065150110021, "loss": 2.9911, "step": 33804 }, { "epoch": 1.66, "grad_norm": 0.6459085941314697, "learning_rate": 0.00025099132558611037, "loss": 2.8735, "step": 33805 }, { "epoch": 1.66, "grad_norm": 0.6427082419395447, "learning_rate": 0.000250976136290298, "loss": 3.0119, "step": 33806 }, { "epoch": 1.66, "grad_norm": 0.6143639087677002, "learning_rate": 0.0002509609471236051, "loss": 3.0391, "step": 33807 }, { "epoch": 1.66, "grad_norm": 0.6215494871139526, "learning_rate": 0.00025094575808607154, "loss": 2.9713, "step": 33808 }, { "epoch": 1.66, "grad_norm": 0.7413198351860046, "learning_rate": 0.0002509305691777374, "loss": 2.8757, "step": 33809 }, { "epoch": 1.66, "grad_norm": 0.5840345621109009, "learning_rate": 0.0002509153803986428, "loss": 2.9908, "step": 33810 }, { "epoch": 1.66, "grad_norm": 0.5827749967575073, "learning_rate": 0.0002509001917488276, "loss": 3.0006, "step": 33811 }, { "epoch": 1.66, "grad_norm": 0.6079306602478027, "learning_rate": 0.0002508850032283319, "loss": 3.0942, "step": 33812 }, { "epoch": 1.66, "grad_norm": 0.5841607451438904, "learning_rate": 0.0002508698148371955, "loss": 2.917, "step": 33813 }, { "epoch": 1.66, "grad_norm": 0.6027677059173584, "learning_rate": 0.0002508546265754587, "loss": 3.0351, "step": 33814 }, { "epoch": 1.66, "grad_norm": 0.5869815349578857, "learning_rate": 0.0002508394384431613, "loss": 2.9288, "step": 33815 }, { "epoch": 1.66, "grad_norm": 0.6005563139915466, "learning_rate": 0.00025082425044034335, "loss": 3.2591, "step": 33816 }, { "epoch": 1.66, "grad_norm": 0.6811476945877075, "learning_rate": 0.00025080906256704486, "loss": 2.9065, "step": 33817 }, { "epoch": 1.66, "grad_norm": 0.5969555377960205, "learning_rate": 0.0002507938748233059, "loss": 3.1142, "step": 33818 }, { "epoch": 1.66, "grad_norm": 0.6238395571708679, "learning_rate": 0.0002507786872091662, "loss": 2.9812, "step": 33819 }, { "epoch": 1.66, "grad_norm": 0.5949825644493103, "learning_rate": 0.0002507634997246662, "loss": 2.9406, "step": 33820 }, { "epoch": 1.66, "grad_norm": 0.5956006646156311, "learning_rate": 0.00025074831236984556, "loss": 3.0653, "step": 33821 }, { "epoch": 1.66, "grad_norm": 0.6946335434913635, "learning_rate": 0.00025073312514474445, "loss": 2.8979, "step": 33822 }, { "epoch": 1.66, "grad_norm": 0.6324100494384766, "learning_rate": 0.0002507179380494027, "loss": 3.1288, "step": 33823 }, { "epoch": 1.66, "grad_norm": 0.6476609706878662, "learning_rate": 0.00025070275108386047, "loss": 3.1207, "step": 33824 }, { "epoch": 1.66, "grad_norm": 0.6044153571128845, "learning_rate": 0.0002506875642481578, "loss": 3.0552, "step": 33825 }, { "epoch": 1.66, "grad_norm": 0.6101298332214355, "learning_rate": 0.00025067237754233443, "loss": 3.069, "step": 33826 }, { "epoch": 1.66, "grad_norm": 0.5707805752754211, "learning_rate": 0.00025065719096643065, "loss": 3.0449, "step": 33827 }, { "epoch": 1.66, "grad_norm": 0.5993731617927551, "learning_rate": 0.0002506420045204863, "loss": 2.9649, "step": 33828 }, { "epoch": 1.66, "grad_norm": 0.6095276474952698, "learning_rate": 0.00025062681820454133, "loss": 3.2008, "step": 33829 }, { "epoch": 1.66, "grad_norm": 0.6098636984825134, "learning_rate": 0.0002506116320186359, "loss": 3.1267, "step": 33830 }, { "epoch": 1.66, "grad_norm": 0.5973874926567078, "learning_rate": 0.0002505964459628098, "loss": 2.8534, "step": 33831 }, { "epoch": 1.66, "grad_norm": 0.657274067401886, "learning_rate": 0.0002505812600371034, "loss": 2.9407, "step": 33832 }, { "epoch": 1.66, "grad_norm": 0.6024130582809448, "learning_rate": 0.0002505660742415563, "loss": 3.1447, "step": 33833 }, { "epoch": 1.66, "grad_norm": 0.6005028486251831, "learning_rate": 0.0002505508885762086, "loss": 3.1241, "step": 33834 }, { "epoch": 1.66, "grad_norm": 0.6737073063850403, "learning_rate": 0.0002505357030411004, "loss": 3.0029, "step": 33835 }, { "epoch": 1.66, "grad_norm": 0.603809118270874, "learning_rate": 0.0002505205176362717, "loss": 3.0558, "step": 33836 }, { "epoch": 1.66, "grad_norm": 0.593603789806366, "learning_rate": 0.0002505053323617624, "loss": 3.203, "step": 33837 }, { "epoch": 1.66, "grad_norm": 0.639643132686615, "learning_rate": 0.0002504901472176124, "loss": 3.0018, "step": 33838 }, { "epoch": 1.66, "grad_norm": 0.6236515045166016, "learning_rate": 0.00025047496220386193, "loss": 3.0459, "step": 33839 }, { "epoch": 1.66, "grad_norm": 0.6432270407676697, "learning_rate": 0.00025045977732055094, "loss": 2.9573, "step": 33840 }, { "epoch": 1.66, "grad_norm": 0.6288147568702698, "learning_rate": 0.00025044459256771923, "loss": 2.9896, "step": 33841 }, { "epoch": 1.66, "grad_norm": 0.6194936037063599, "learning_rate": 0.0002504294079454071, "loss": 3.1635, "step": 33842 }, { "epoch": 1.66, "grad_norm": 0.6322032809257507, "learning_rate": 0.00025041422345365425, "loss": 3.0478, "step": 33843 }, { "epoch": 1.66, "grad_norm": 0.6737121939659119, "learning_rate": 0.0002503990390925007, "loss": 3.3406, "step": 33844 }, { "epoch": 1.66, "grad_norm": 0.6102167963981628, "learning_rate": 0.0002503838548619867, "loss": 3.1013, "step": 33845 }, { "epoch": 1.66, "grad_norm": 0.6064083576202393, "learning_rate": 0.00025036867076215204, "loss": 3.0313, "step": 33846 }, { "epoch": 1.66, "grad_norm": 0.6235889196395874, "learning_rate": 0.0002503534867930368, "loss": 3.0489, "step": 33847 }, { "epoch": 1.66, "grad_norm": 0.6064871549606323, "learning_rate": 0.0002503383029546808, "loss": 2.9426, "step": 33848 }, { "epoch": 1.66, "grad_norm": 0.6244485378265381, "learning_rate": 0.0002503231192471241, "loss": 2.994, "step": 33849 }, { "epoch": 1.66, "grad_norm": 0.6445832252502441, "learning_rate": 0.0002503079356704069, "loss": 3.1693, "step": 33850 }, { "epoch": 1.66, "grad_norm": 0.6180044412612915, "learning_rate": 0.000250292752224569, "loss": 3.1618, "step": 33851 }, { "epoch": 1.66, "grad_norm": 0.6148596405982971, "learning_rate": 0.00025027756890965043, "loss": 2.8679, "step": 33852 }, { "epoch": 1.66, "grad_norm": 0.6558221578598022, "learning_rate": 0.00025026238572569104, "loss": 3.1456, "step": 33853 }, { "epoch": 1.66, "grad_norm": 0.5984779596328735, "learning_rate": 0.00025024720267273113, "loss": 3.0931, "step": 33854 }, { "epoch": 1.66, "grad_norm": 0.5910894274711609, "learning_rate": 0.00025023201975081044, "loss": 2.8309, "step": 33855 }, { "epoch": 1.66, "grad_norm": 0.6671299338340759, "learning_rate": 0.000250216836959969, "loss": 2.9913, "step": 33856 }, { "epoch": 1.66, "grad_norm": 0.617927610874176, "learning_rate": 0.00025020165430024693, "loss": 3.0802, "step": 33857 }, { "epoch": 1.66, "grad_norm": 0.6495857238769531, "learning_rate": 0.000250186471771684, "loss": 2.8715, "step": 33858 }, { "epoch": 1.66, "grad_norm": 0.6269500851631165, "learning_rate": 0.00025017128937432034, "loss": 2.7541, "step": 33859 }, { "epoch": 1.66, "grad_norm": 0.6183517575263977, "learning_rate": 0.00025015610710819603, "loss": 3.0808, "step": 33860 }, { "epoch": 1.66, "grad_norm": 0.6176064014434814, "learning_rate": 0.00025014092497335083, "loss": 3.178, "step": 33861 }, { "epoch": 1.66, "grad_norm": 0.609695315361023, "learning_rate": 0.00025012574296982495, "loss": 3.0251, "step": 33862 }, { "epoch": 1.66, "grad_norm": 0.6329081654548645, "learning_rate": 0.0002501105610976581, "loss": 3.1642, "step": 33863 }, { "epoch": 1.66, "grad_norm": 0.6028696894645691, "learning_rate": 0.0002500953793568905, "loss": 2.8275, "step": 33864 }, { "epoch": 1.66, "grad_norm": 0.6294543147087097, "learning_rate": 0.00025008019774756216, "loss": 3.264, "step": 33865 }, { "epoch": 1.66, "grad_norm": 0.5953629016876221, "learning_rate": 0.0002500650162697128, "loss": 3.0756, "step": 33866 }, { "epoch": 1.66, "grad_norm": 0.6343550682067871, "learning_rate": 0.00025004983492338273, "loss": 3.1477, "step": 33867 }, { "epoch": 1.66, "grad_norm": 0.581670343875885, "learning_rate": 0.0002500346537086117, "loss": 3.1717, "step": 33868 }, { "epoch": 1.66, "grad_norm": 0.6213530898094177, "learning_rate": 0.00025001947262543974, "loss": 2.9365, "step": 33869 }, { "epoch": 1.66, "grad_norm": 0.6029473543167114, "learning_rate": 0.000250004291673907, "loss": 3.0754, "step": 33870 }, { "epoch": 1.66, "grad_norm": 0.6134632229804993, "learning_rate": 0.00024998911085405315, "loss": 2.8977, "step": 33871 }, { "epoch": 1.66, "grad_norm": 0.6177032589912415, "learning_rate": 0.00024997393016591853, "loss": 3.1839, "step": 33872 }, { "epoch": 1.66, "grad_norm": 0.6571273803710938, "learning_rate": 0.0002499587496095429, "loss": 3.0477, "step": 33873 }, { "epoch": 1.66, "grad_norm": 0.6338697075843811, "learning_rate": 0.0002499435691849662, "loss": 2.9806, "step": 33874 }, { "epoch": 1.66, "grad_norm": 0.6008161902427673, "learning_rate": 0.00024992838889222865, "loss": 2.9013, "step": 33875 }, { "epoch": 1.66, "grad_norm": 0.6442448496818542, "learning_rate": 0.00024991320873137, "loss": 2.9811, "step": 33876 }, { "epoch": 1.66, "grad_norm": 0.6157088875770569, "learning_rate": 0.0002498980287024304, "loss": 3.0265, "step": 33877 }, { "epoch": 1.66, "grad_norm": 0.5938071012496948, "learning_rate": 0.0002498828488054496, "loss": 2.9337, "step": 33878 }, { "epoch": 1.66, "grad_norm": 0.6022875905036926, "learning_rate": 0.00024986766904046785, "loss": 3.1691, "step": 33879 }, { "epoch": 1.66, "grad_norm": 0.6283571720123291, "learning_rate": 0.00024985248940752504, "loss": 3.0208, "step": 33880 }, { "epoch": 1.66, "grad_norm": 0.6032127141952515, "learning_rate": 0.000249837309906661, "loss": 2.9129, "step": 33881 }, { "epoch": 1.66, "grad_norm": 0.6097046136856079, "learning_rate": 0.00024982213053791595, "loss": 3.0705, "step": 33882 }, { "epoch": 1.66, "grad_norm": 0.5926503539085388, "learning_rate": 0.00024980695130132963, "loss": 3.0113, "step": 33883 }, { "epoch": 1.66, "grad_norm": 0.6158053874969482, "learning_rate": 0.0002497917721969423, "loss": 3.1628, "step": 33884 }, { "epoch": 1.66, "grad_norm": 0.6430631279945374, "learning_rate": 0.0002497765932247936, "loss": 3.0387, "step": 33885 }, { "epoch": 1.66, "grad_norm": 0.5978126525878906, "learning_rate": 0.0002497614143849238, "loss": 2.8056, "step": 33886 }, { "epoch": 1.66, "grad_norm": 0.5920930504798889, "learning_rate": 0.0002497462356773728, "loss": 2.8876, "step": 33887 }, { "epoch": 1.66, "grad_norm": 0.5528014302253723, "learning_rate": 0.0002497310571021804, "loss": 2.97, "step": 33888 }, { "epoch": 1.66, "grad_norm": 0.5907645225524902, "learning_rate": 0.00024971587865938686, "loss": 3.0249, "step": 33889 }, { "epoch": 1.66, "grad_norm": 0.5735505223274231, "learning_rate": 0.00024970070034903197, "loss": 2.9847, "step": 33890 }, { "epoch": 1.66, "grad_norm": 0.6211549639701843, "learning_rate": 0.0002496855221711557, "loss": 3.1012, "step": 33891 }, { "epoch": 1.66, "grad_norm": 0.6353756785392761, "learning_rate": 0.0002496703441257982, "loss": 3.062, "step": 33892 }, { "epoch": 1.66, "grad_norm": 0.5956559181213379, "learning_rate": 0.00024965516621299914, "loss": 2.9391, "step": 33893 }, { "epoch": 1.66, "grad_norm": 0.598713755607605, "learning_rate": 0.00024963998843279884, "loss": 2.964, "step": 33894 }, { "epoch": 1.66, "grad_norm": 0.6306696534156799, "learning_rate": 0.0002496248107852371, "loss": 2.8183, "step": 33895 }, { "epoch": 1.66, "grad_norm": 0.6647626161575317, "learning_rate": 0.00024960963327035383, "loss": 2.9461, "step": 33896 }, { "epoch": 1.66, "grad_norm": 0.6233382821083069, "learning_rate": 0.00024959445588818924, "loss": 3.1564, "step": 33897 }, { "epoch": 1.66, "grad_norm": 0.6287595629692078, "learning_rate": 0.00024957927863878304, "loss": 2.9366, "step": 33898 }, { "epoch": 1.66, "grad_norm": 0.6304962635040283, "learning_rate": 0.0002495641015221754, "loss": 3.1073, "step": 33899 }, { "epoch": 1.66, "grad_norm": 0.6396434307098389, "learning_rate": 0.00024954892453840606, "loss": 2.9138, "step": 33900 }, { "epoch": 1.66, "grad_norm": 0.5777924656867981, "learning_rate": 0.0002495337476875152, "loss": 2.9275, "step": 33901 }, { "epoch": 1.66, "grad_norm": 0.6460705995559692, "learning_rate": 0.00024951857096954286, "loss": 2.9709, "step": 33902 }, { "epoch": 1.66, "grad_norm": 0.607254683971405, "learning_rate": 0.0002495033943845287, "loss": 3.1602, "step": 33903 }, { "epoch": 1.66, "grad_norm": 0.5625812411308289, "learning_rate": 0.0002494882179325131, "loss": 3.0044, "step": 33904 }, { "epoch": 1.66, "grad_norm": 0.597536563873291, "learning_rate": 0.00024947304161353564, "loss": 3.281, "step": 33905 }, { "epoch": 1.66, "grad_norm": 0.7313137650489807, "learning_rate": 0.0002494578654276365, "loss": 3.086, "step": 33906 }, { "epoch": 1.66, "grad_norm": 0.6183013319969177, "learning_rate": 0.00024944268937485565, "loss": 2.8716, "step": 33907 }, { "epoch": 1.66, "grad_norm": 0.6373917460441589, "learning_rate": 0.000249427513455233, "loss": 3.0631, "step": 33908 }, { "epoch": 1.66, "grad_norm": 0.6329418420791626, "learning_rate": 0.0002494123376688086, "loss": 3.0995, "step": 33909 }, { "epoch": 1.66, "grad_norm": 0.6428967118263245, "learning_rate": 0.00024939716201562227, "loss": 3.1429, "step": 33910 }, { "epoch": 1.66, "grad_norm": 0.5830432772636414, "learning_rate": 0.00024938198649571407, "loss": 2.8414, "step": 33911 }, { "epoch": 1.66, "grad_norm": 0.6218639612197876, "learning_rate": 0.0002493668111091241, "loss": 3.0054, "step": 33912 }, { "epoch": 1.66, "grad_norm": 0.6176791787147522, "learning_rate": 0.0002493516358558921, "loss": 2.9076, "step": 33913 }, { "epoch": 1.66, "grad_norm": 0.6287698745727539, "learning_rate": 0.00024933646073605826, "loss": 3.1468, "step": 33914 }, { "epoch": 1.66, "grad_norm": 0.6077466011047363, "learning_rate": 0.00024932128574966226, "loss": 3.0103, "step": 33915 }, { "epoch": 1.66, "grad_norm": 0.6049577593803406, "learning_rate": 0.00024930611089674434, "loss": 2.6801, "step": 33916 }, { "epoch": 1.66, "grad_norm": 0.5947195887565613, "learning_rate": 0.0002492909361773444, "loss": 3.1092, "step": 33917 }, { "epoch": 1.66, "grad_norm": 0.6179590225219727, "learning_rate": 0.0002492757615915023, "loss": 3.0234, "step": 33918 }, { "epoch": 1.66, "grad_norm": 0.6137421727180481, "learning_rate": 0.0002492605871392582, "loss": 3.0543, "step": 33919 }, { "epoch": 1.66, "grad_norm": 0.6110509037971497, "learning_rate": 0.00024924541282065184, "loss": 3.0235, "step": 33920 }, { "epoch": 1.66, "grad_norm": 0.604406476020813, "learning_rate": 0.00024923023863572324, "loss": 2.8201, "step": 33921 }, { "epoch": 1.66, "grad_norm": 0.5563648343086243, "learning_rate": 0.00024921506458451255, "loss": 3.0609, "step": 33922 }, { "epoch": 1.66, "grad_norm": 0.6176638007164001, "learning_rate": 0.00024919989066705955, "loss": 2.9723, "step": 33923 }, { "epoch": 1.66, "grad_norm": 0.5700177550315857, "learning_rate": 0.00024918471688340437, "loss": 3.0213, "step": 33924 }, { "epoch": 1.66, "grad_norm": 0.6631049513816833, "learning_rate": 0.0002491695432335867, "loss": 2.908, "step": 33925 }, { "epoch": 1.66, "grad_norm": 0.6572071313858032, "learning_rate": 0.0002491543697176467, "loss": 3.0808, "step": 33926 }, { "epoch": 1.66, "grad_norm": 0.5589262843132019, "learning_rate": 0.0002491391963356244, "loss": 2.9899, "step": 33927 }, { "epoch": 1.66, "grad_norm": 0.6265024542808533, "learning_rate": 0.00024912402308755957, "loss": 2.9103, "step": 33928 }, { "epoch": 1.66, "grad_norm": 0.5696996450424194, "learning_rate": 0.00024910884997349236, "loss": 3.0077, "step": 33929 }, { "epoch": 1.66, "grad_norm": 0.6142954230308533, "learning_rate": 0.0002490936769934626, "loss": 3.0136, "step": 33930 }, { "epoch": 1.66, "grad_norm": 0.5850833654403687, "learning_rate": 0.00024907850414751016, "loss": 3.0001, "step": 33931 }, { "epoch": 1.66, "grad_norm": 0.6385449171066284, "learning_rate": 0.0002490633314356754, "loss": 3.005, "step": 33932 }, { "epoch": 1.66, "grad_norm": 0.6238567233085632, "learning_rate": 0.0002490481588579978, "loss": 3.1686, "step": 33933 }, { "epoch": 1.66, "grad_norm": 0.6315261721611023, "learning_rate": 0.0002490329864145177, "loss": 3.2475, "step": 33934 }, { "epoch": 1.66, "grad_norm": 0.6120766997337341, "learning_rate": 0.0002490178141052748, "loss": 2.8656, "step": 33935 }, { "epoch": 1.66, "grad_norm": 0.6094756126403809, "learning_rate": 0.0002490026419303092, "loss": 2.949, "step": 33936 }, { "epoch": 1.66, "grad_norm": 0.6236438751220703, "learning_rate": 0.0002489874698896609, "loss": 3.0454, "step": 33937 }, { "epoch": 1.66, "grad_norm": 0.7023098468780518, "learning_rate": 0.00024897229798336967, "loss": 2.8311, "step": 33938 }, { "epoch": 1.66, "grad_norm": 0.609567403793335, "learning_rate": 0.00024895712621147574, "loss": 3.0063, "step": 33939 }, { "epoch": 1.66, "grad_norm": 0.6077160835266113, "learning_rate": 0.00024894195457401876, "loss": 3.0588, "step": 33940 }, { "epoch": 1.66, "grad_norm": 0.6205511093139648, "learning_rate": 0.00024892678307103885, "loss": 3.0441, "step": 33941 }, { "epoch": 1.66, "grad_norm": 0.626202404499054, "learning_rate": 0.00024891161170257606, "loss": 3.137, "step": 33942 }, { "epoch": 1.66, "grad_norm": 0.5801575779914856, "learning_rate": 0.0002488964404686702, "loss": 2.9521, "step": 33943 }, { "epoch": 1.66, "grad_norm": 0.6224533319473267, "learning_rate": 0.00024888126936936135, "loss": 3.0051, "step": 33944 }, { "epoch": 1.66, "grad_norm": 0.6122703552246094, "learning_rate": 0.00024886609840468933, "loss": 2.7792, "step": 33945 }, { "epoch": 1.66, "grad_norm": 0.6122666001319885, "learning_rate": 0.0002488509275746941, "loss": 2.8985, "step": 33946 }, { "epoch": 1.66, "grad_norm": 0.5584424734115601, "learning_rate": 0.0002488357568794158, "loss": 2.9583, "step": 33947 }, { "epoch": 1.66, "grad_norm": 0.601530909538269, "learning_rate": 0.0002488205863188942, "loss": 2.8557, "step": 33948 }, { "epoch": 1.66, "grad_norm": 0.6443337798118591, "learning_rate": 0.0002488054158931694, "loss": 2.9457, "step": 33949 }, { "epoch": 1.66, "grad_norm": 0.5796291828155518, "learning_rate": 0.0002487902456022812, "loss": 2.9709, "step": 33950 }, { "epoch": 1.66, "grad_norm": 0.5982805490493774, "learning_rate": 0.0002487750754462696, "loss": 3.193, "step": 33951 }, { "epoch": 1.66, "grad_norm": 0.5929266810417175, "learning_rate": 0.0002487599054251747, "loss": 3.1062, "step": 33952 }, { "epoch": 1.66, "grad_norm": 0.6260059475898743, "learning_rate": 0.0002487447355390363, "loss": 2.9633, "step": 33953 }, { "epoch": 1.66, "grad_norm": 0.6307040452957153, "learning_rate": 0.0002487295657878945, "loss": 3.0025, "step": 33954 }, { "epoch": 1.66, "grad_norm": 0.6263796091079712, "learning_rate": 0.000248714396171789, "loss": 2.8704, "step": 33955 }, { "epoch": 1.66, "grad_norm": 0.5983632206916809, "learning_rate": 0.00024869922669076, "loss": 3.1199, "step": 33956 }, { "epoch": 1.66, "grad_norm": 0.6320955157279968, "learning_rate": 0.0002486840573448474, "loss": 3.0689, "step": 33957 }, { "epoch": 1.66, "grad_norm": 0.6292237639427185, "learning_rate": 0.00024866888813409097, "loss": 2.9389, "step": 33958 }, { "epoch": 1.66, "grad_norm": 0.6104917526245117, "learning_rate": 0.00024865371905853097, "loss": 3.0677, "step": 33959 }, { "epoch": 1.66, "grad_norm": 0.6262634992599487, "learning_rate": 0.0002486385501182071, "loss": 2.9518, "step": 33960 }, { "epoch": 1.66, "grad_norm": 0.6107706427574158, "learning_rate": 0.0002486233813131595, "loss": 2.9798, "step": 33961 }, { "epoch": 1.66, "grad_norm": 0.6529039144515991, "learning_rate": 0.0002486082126434279, "loss": 3.1314, "step": 33962 }, { "epoch": 1.66, "grad_norm": 0.6255234479904175, "learning_rate": 0.0002485930441090524, "loss": 3.112, "step": 33963 }, { "epoch": 1.66, "grad_norm": 0.5849068760871887, "learning_rate": 0.0002485778757100731, "loss": 3.0832, "step": 33964 }, { "epoch": 1.66, "grad_norm": 0.6300506591796875, "learning_rate": 0.0002485627074465295, "loss": 2.8947, "step": 33965 }, { "epoch": 1.66, "grad_norm": 0.6072397828102112, "learning_rate": 0.00024854753931846207, "loss": 2.9836, "step": 33966 }, { "epoch": 1.66, "grad_norm": 0.6096535325050354, "learning_rate": 0.0002485323713259104, "loss": 2.876, "step": 33967 }, { "epoch": 1.66, "grad_norm": 0.6128242611885071, "learning_rate": 0.0002485172034689145, "loss": 3.1772, "step": 33968 }, { "epoch": 1.66, "grad_norm": 0.6279151439666748, "learning_rate": 0.00024850203574751454, "loss": 2.9902, "step": 33969 }, { "epoch": 1.66, "grad_norm": 0.7040567398071289, "learning_rate": 0.0002484868681617502, "loss": 3.0256, "step": 33970 }, { "epoch": 1.66, "grad_norm": 0.6096146702766418, "learning_rate": 0.0002484717007116616, "loss": 2.8562, "step": 33971 }, { "epoch": 1.66, "grad_norm": 0.5939738750457764, "learning_rate": 0.0002484565333972885, "loss": 3.1068, "step": 33972 }, { "epoch": 1.66, "grad_norm": 0.6173325181007385, "learning_rate": 0.00024844136621867096, "loss": 3.0958, "step": 33973 }, { "epoch": 1.66, "grad_norm": 0.6269416213035583, "learning_rate": 0.0002484261991758491, "loss": 3.0309, "step": 33974 }, { "epoch": 1.67, "grad_norm": 0.6126830577850342, "learning_rate": 0.00024841103226886263, "loss": 3.0065, "step": 33975 }, { "epoch": 1.67, "grad_norm": 0.6426466107368469, "learning_rate": 0.00024839586549775164, "loss": 3.0531, "step": 33976 }, { "epoch": 1.67, "grad_norm": 0.6027688384056091, "learning_rate": 0.0002483806988625559, "loss": 3.0285, "step": 33977 }, { "epoch": 1.67, "grad_norm": 0.6397801637649536, "learning_rate": 0.0002483655323633155, "loss": 3.0949, "step": 33978 }, { "epoch": 1.67, "grad_norm": 0.591986894607544, "learning_rate": 0.0002483503660000704, "loss": 3.1487, "step": 33979 }, { "epoch": 1.67, "grad_norm": 0.6391310095787048, "learning_rate": 0.00024833519977286044, "loss": 3.0401, "step": 33980 }, { "epoch": 1.67, "grad_norm": 0.6004000306129456, "learning_rate": 0.00024832003368172566, "loss": 3.0843, "step": 33981 }, { "epoch": 1.67, "grad_norm": 0.5886471271514893, "learning_rate": 0.00024830486772670595, "loss": 3.0734, "step": 33982 }, { "epoch": 1.67, "grad_norm": 0.616348385810852, "learning_rate": 0.0002482897019078412, "loss": 2.848, "step": 33983 }, { "epoch": 1.67, "grad_norm": 0.6533381342887878, "learning_rate": 0.0002482745362251715, "loss": 3.0357, "step": 33984 }, { "epoch": 1.67, "grad_norm": 0.6193398237228394, "learning_rate": 0.00024825937067873675, "loss": 3.0845, "step": 33985 }, { "epoch": 1.67, "grad_norm": 0.604232668876648, "learning_rate": 0.0002482442052685769, "loss": 3.1351, "step": 33986 }, { "epoch": 1.67, "grad_norm": 0.6174710392951965, "learning_rate": 0.0002482290399947317, "loss": 3.1375, "step": 33987 }, { "epoch": 1.67, "grad_norm": 0.5886451005935669, "learning_rate": 0.0002482138748572413, "loss": 3.0255, "step": 33988 }, { "epoch": 1.67, "grad_norm": 0.6079821586608887, "learning_rate": 0.00024819870985614564, "loss": 2.9344, "step": 33989 }, { "epoch": 1.67, "grad_norm": 0.669161856174469, "learning_rate": 0.00024818354499148447, "loss": 3.0288, "step": 33990 }, { "epoch": 1.67, "grad_norm": 0.6441835761070251, "learning_rate": 0.00024816838026329806, "loss": 2.9344, "step": 33991 }, { "epoch": 1.67, "grad_norm": 0.6017826795578003, "learning_rate": 0.00024815321567162605, "loss": 2.8813, "step": 33992 }, { "epoch": 1.67, "grad_norm": 0.6216018199920654, "learning_rate": 0.0002481380512165084, "loss": 2.8692, "step": 33993 }, { "epoch": 1.67, "grad_norm": 0.6077483892440796, "learning_rate": 0.00024812288689798537, "loss": 3.0153, "step": 33994 }, { "epoch": 1.67, "grad_norm": 0.8035667538642883, "learning_rate": 0.00024810772271609646, "loss": 3.3342, "step": 33995 }, { "epoch": 1.67, "grad_norm": 0.6364055275917053, "learning_rate": 0.000248092558670882, "loss": 2.9934, "step": 33996 }, { "epoch": 1.67, "grad_norm": 0.5845009684562683, "learning_rate": 0.00024807739476238167, "loss": 3.0476, "step": 33997 }, { "epoch": 1.67, "grad_norm": 0.6098141670227051, "learning_rate": 0.0002480622309906354, "loss": 2.9357, "step": 33998 }, { "epoch": 1.67, "grad_norm": 0.6033996343612671, "learning_rate": 0.00024804706735568336, "loss": 3.0799, "step": 33999 }, { "epoch": 1.67, "grad_norm": 0.6313714385032654, "learning_rate": 0.0002480319038575653, "loss": 3.0056, "step": 34000 }, { "epoch": 1.67, "grad_norm": 0.6305382251739502, "learning_rate": 0.00024801674049632126, "loss": 2.9569, "step": 34001 }, { "epoch": 1.67, "grad_norm": 0.6073910593986511, "learning_rate": 0.000248001577271991, "loss": 2.9318, "step": 34002 }, { "epoch": 1.67, "grad_norm": 0.594529926776886, "learning_rate": 0.0002479864141846146, "loss": 2.9888, "step": 34003 }, { "epoch": 1.67, "grad_norm": 0.6212973594665527, "learning_rate": 0.0002479712512342321, "loss": 2.9173, "step": 34004 }, { "epoch": 1.67, "grad_norm": 0.6293397545814514, "learning_rate": 0.0002479560884208831, "loss": 2.8407, "step": 34005 }, { "epoch": 1.67, "grad_norm": 0.6491313576698303, "learning_rate": 0.00024794092574460797, "loss": 2.8996, "step": 34006 }, { "epoch": 1.67, "grad_norm": 0.583442747592926, "learning_rate": 0.0002479257632054463, "loss": 2.928, "step": 34007 }, { "epoch": 1.67, "grad_norm": 0.6074749827384949, "learning_rate": 0.0002479106008034381, "loss": 2.8657, "step": 34008 }, { "epoch": 1.67, "grad_norm": 0.5930288434028625, "learning_rate": 0.0002478954385386235, "loss": 3.0198, "step": 34009 }, { "epoch": 1.67, "grad_norm": 0.6107326745986938, "learning_rate": 0.0002478802764110422, "loss": 2.9917, "step": 34010 }, { "epoch": 1.67, "grad_norm": 0.5948980450630188, "learning_rate": 0.0002478651144207342, "loss": 3.0944, "step": 34011 }, { "epoch": 1.67, "grad_norm": 0.6396520137786865, "learning_rate": 0.0002478499525677395, "loss": 2.9329, "step": 34012 }, { "epoch": 1.67, "grad_norm": 0.6055231094360352, "learning_rate": 0.0002478347908520979, "loss": 3.2016, "step": 34013 }, { "epoch": 1.67, "grad_norm": 0.6053661108016968, "learning_rate": 0.00024781962927384953, "loss": 2.8238, "step": 34014 }, { "epoch": 1.67, "grad_norm": 0.6582406163215637, "learning_rate": 0.00024780446783303415, "loss": 2.8245, "step": 34015 }, { "epoch": 1.67, "grad_norm": 0.6152645349502563, "learning_rate": 0.00024778930652969187, "loss": 3.0896, "step": 34016 }, { "epoch": 1.67, "grad_norm": 0.6121913194656372, "learning_rate": 0.00024777414536386236, "loss": 3.1472, "step": 34017 }, { "epoch": 1.67, "grad_norm": 0.6189271807670593, "learning_rate": 0.00024775898433558575, "loss": 3.2943, "step": 34018 }, { "epoch": 1.67, "grad_norm": 0.6674231886863708, "learning_rate": 0.000247743823444902, "loss": 2.8963, "step": 34019 }, { "epoch": 1.67, "grad_norm": 0.6085709929466248, "learning_rate": 0.00024772866269185084, "loss": 2.9968, "step": 34020 }, { "epoch": 1.67, "grad_norm": 0.6044761538505554, "learning_rate": 0.0002477135020764724, "loss": 2.9718, "step": 34021 }, { "epoch": 1.67, "grad_norm": 0.6364777088165283, "learning_rate": 0.00024769834159880656, "loss": 3.092, "step": 34022 }, { "epoch": 1.67, "grad_norm": 0.6292357444763184, "learning_rate": 0.00024768318125889305, "loss": 2.865, "step": 34023 }, { "epoch": 1.67, "grad_norm": 0.6410455703735352, "learning_rate": 0.0002476680210567722, "loss": 2.8619, "step": 34024 }, { "epoch": 1.67, "grad_norm": 0.6323256492614746, "learning_rate": 0.0002476528609924836, "loss": 2.9672, "step": 34025 }, { "epoch": 1.67, "grad_norm": 0.6153978705406189, "learning_rate": 0.00024763770106606733, "loss": 3.0129, "step": 34026 }, { "epoch": 1.67, "grad_norm": 0.6172310709953308, "learning_rate": 0.0002476225412775632, "loss": 3.1617, "step": 34027 }, { "epoch": 1.67, "grad_norm": 0.6111814379692078, "learning_rate": 0.00024760738162701125, "loss": 2.8775, "step": 34028 }, { "epoch": 1.67, "grad_norm": 0.6040524840354919, "learning_rate": 0.00024759222211445144, "loss": 3.0851, "step": 34029 }, { "epoch": 1.67, "grad_norm": 0.6083324551582336, "learning_rate": 0.0002475770627399235, "loss": 3.2039, "step": 34030 }, { "epoch": 1.67, "grad_norm": 0.6631869673728943, "learning_rate": 0.0002475619035034676, "loss": 3.0856, "step": 34031 }, { "epoch": 1.67, "grad_norm": 0.605511486530304, "learning_rate": 0.00024754674440512355, "loss": 3.0132, "step": 34032 }, { "epoch": 1.67, "grad_norm": 0.6278396248817444, "learning_rate": 0.0002475315854449312, "loss": 2.935, "step": 34033 }, { "epoch": 1.67, "grad_norm": 0.6231174468994141, "learning_rate": 0.00024751642662293065, "loss": 2.9998, "step": 34034 }, { "epoch": 1.67, "grad_norm": 0.6460698246955872, "learning_rate": 0.0002475012679391616, "loss": 3.1249, "step": 34035 }, { "epoch": 1.67, "grad_norm": 0.6108884811401367, "learning_rate": 0.00024748610939366426, "loss": 3.1124, "step": 34036 }, { "epoch": 1.67, "grad_norm": 0.67753005027771, "learning_rate": 0.00024747095098647833, "loss": 3.0618, "step": 34037 }, { "epoch": 1.67, "grad_norm": 0.6046687364578247, "learning_rate": 0.00024745579271764375, "loss": 3.1446, "step": 34038 }, { "epoch": 1.67, "grad_norm": 0.701672375202179, "learning_rate": 0.0002474406345872006, "loss": 2.9236, "step": 34039 }, { "epoch": 1.67, "grad_norm": 0.60172438621521, "learning_rate": 0.00024742547659518867, "loss": 3.0169, "step": 34040 }, { "epoch": 1.67, "grad_norm": 0.5958285927772522, "learning_rate": 0.000247410318741648, "loss": 3.0876, "step": 34041 }, { "epoch": 1.67, "grad_norm": 0.6142698526382446, "learning_rate": 0.0002473951610266182, "loss": 3.2667, "step": 34042 }, { "epoch": 1.67, "grad_norm": 0.5789704322814941, "learning_rate": 0.00024738000345013965, "loss": 2.9274, "step": 34043 }, { "epoch": 1.67, "grad_norm": 0.6565642356872559, "learning_rate": 0.0002473648460122519, "loss": 3.2293, "step": 34044 }, { "epoch": 1.67, "grad_norm": 0.6168193221092224, "learning_rate": 0.00024734968871299506, "loss": 3.166, "step": 34045 }, { "epoch": 1.67, "grad_norm": 0.6158468127250671, "learning_rate": 0.00024733453155240906, "loss": 3.1144, "step": 34046 }, { "epoch": 1.67, "grad_norm": 0.6314502954483032, "learning_rate": 0.0002473193745305337, "loss": 2.9168, "step": 34047 }, { "epoch": 1.67, "grad_norm": 0.6529750823974609, "learning_rate": 0.00024730421764740905, "loss": 3.116, "step": 34048 }, { "epoch": 1.67, "grad_norm": 0.6014294028282166, "learning_rate": 0.00024728906090307483, "loss": 3.053, "step": 34049 }, { "epoch": 1.67, "grad_norm": 0.6091155409812927, "learning_rate": 0.0002472739042975712, "loss": 2.8216, "step": 34050 }, { "epoch": 1.67, "grad_norm": 0.641144871711731, "learning_rate": 0.00024725874783093795, "loss": 3.0557, "step": 34051 }, { "epoch": 1.67, "grad_norm": 0.6270335912704468, "learning_rate": 0.00024724359150321486, "loss": 3.1368, "step": 34052 }, { "epoch": 1.67, "grad_norm": 0.5929427146911621, "learning_rate": 0.00024722843531444215, "loss": 3.031, "step": 34053 }, { "epoch": 1.67, "grad_norm": 0.6024312376976013, "learning_rate": 0.00024721327926465955, "loss": 2.8996, "step": 34054 }, { "epoch": 1.67, "grad_norm": 0.6244577765464783, "learning_rate": 0.0002471981233539069, "loss": 2.9659, "step": 34055 }, { "epoch": 1.67, "grad_norm": 0.6653500199317932, "learning_rate": 0.0002471829675822244, "loss": 2.9293, "step": 34056 }, { "epoch": 1.67, "grad_norm": 0.583678662776947, "learning_rate": 0.0002471678119496516, "loss": 3.0174, "step": 34057 }, { "epoch": 1.67, "grad_norm": 0.6141014099121094, "learning_rate": 0.00024715265645622883, "loss": 3.1254, "step": 34058 }, { "epoch": 1.67, "grad_norm": 0.6307711601257324, "learning_rate": 0.0002471375011019957, "loss": 2.8053, "step": 34059 }, { "epoch": 1.67, "grad_norm": 0.5954844951629639, "learning_rate": 0.0002471223458869922, "loss": 2.8284, "step": 34060 }, { "epoch": 1.67, "grad_norm": 0.6285943388938904, "learning_rate": 0.00024710719081125833, "loss": 2.9632, "step": 34061 }, { "epoch": 1.67, "grad_norm": 0.6757904887199402, "learning_rate": 0.00024709203587483384, "loss": 2.9003, "step": 34062 }, { "epoch": 1.67, "grad_norm": 0.5749251842498779, "learning_rate": 0.0002470768810777589, "loss": 3.0031, "step": 34063 }, { "epoch": 1.67, "grad_norm": 0.6063864231109619, "learning_rate": 0.0002470617264200731, "loss": 2.9874, "step": 34064 }, { "epoch": 1.67, "grad_norm": 0.6300905346870422, "learning_rate": 0.0002470465719018166, "loss": 2.8773, "step": 34065 }, { "epoch": 1.67, "grad_norm": 0.6623172760009766, "learning_rate": 0.00024703141752302927, "loss": 2.9534, "step": 34066 }, { "epoch": 1.67, "grad_norm": 0.6001306772232056, "learning_rate": 0.0002470162632837509, "loss": 2.909, "step": 34067 }, { "epoch": 1.67, "grad_norm": 0.6355345249176025, "learning_rate": 0.0002470011091840216, "loss": 3.036, "step": 34068 }, { "epoch": 1.67, "grad_norm": 0.6132904887199402, "learning_rate": 0.00024698595522388114, "loss": 3.0769, "step": 34069 }, { "epoch": 1.67, "grad_norm": 0.6177525520324707, "learning_rate": 0.0002469708014033694, "loss": 2.792, "step": 34070 }, { "epoch": 1.67, "grad_norm": 0.6004658937454224, "learning_rate": 0.0002469556477225265, "loss": 3.1412, "step": 34071 }, { "epoch": 1.67, "grad_norm": 0.6170461177825928, "learning_rate": 0.0002469404941813921, "loss": 3.1152, "step": 34072 }, { "epoch": 1.67, "grad_norm": 0.5980595350265503, "learning_rate": 0.0002469253407800063, "loss": 3.123, "step": 34073 }, { "epoch": 1.67, "grad_norm": 0.6601492762565613, "learning_rate": 0.0002469101875184089, "loss": 2.977, "step": 34074 }, { "epoch": 1.67, "grad_norm": 0.6366380453109741, "learning_rate": 0.00024689503439663974, "loss": 2.9476, "step": 34075 }, { "epoch": 1.67, "grad_norm": 0.656537652015686, "learning_rate": 0.000246879881414739, "loss": 3.1397, "step": 34076 }, { "epoch": 1.67, "grad_norm": 0.629987359046936, "learning_rate": 0.0002468647285727464, "loss": 2.9363, "step": 34077 }, { "epoch": 1.67, "grad_norm": 0.6459150314331055, "learning_rate": 0.0002468495758707019, "loss": 3.0619, "step": 34078 }, { "epoch": 1.67, "grad_norm": 0.6273544430732727, "learning_rate": 0.0002468344233086453, "loss": 2.9486, "step": 34079 }, { "epoch": 1.67, "grad_norm": 0.6106494665145874, "learning_rate": 0.00024681927088661664, "loss": 3.2629, "step": 34080 }, { "epoch": 1.67, "grad_norm": 0.6219441890716553, "learning_rate": 0.0002468041186046558, "loss": 3.2484, "step": 34081 }, { "epoch": 1.67, "grad_norm": 0.6537150144577026, "learning_rate": 0.0002467889664628026, "loss": 2.9886, "step": 34082 }, { "epoch": 1.67, "grad_norm": 0.6156889200210571, "learning_rate": 0.0002467738144610972, "loss": 3.1849, "step": 34083 }, { "epoch": 1.67, "grad_norm": 0.6050704717636108, "learning_rate": 0.0002467586625995792, "loss": 2.9982, "step": 34084 }, { "epoch": 1.67, "grad_norm": 0.6336327195167542, "learning_rate": 0.00024674351087828857, "loss": 3.1285, "step": 34085 }, { "epoch": 1.67, "grad_norm": 0.6327105164527893, "learning_rate": 0.00024672835929726543, "loss": 2.9639, "step": 34086 }, { "epoch": 1.67, "grad_norm": 0.6417514681816101, "learning_rate": 0.0002467132078565495, "loss": 3.111, "step": 34087 }, { "epoch": 1.67, "grad_norm": 0.6045467257499695, "learning_rate": 0.0002466980565561807, "loss": 3.2154, "step": 34088 }, { "epoch": 1.67, "grad_norm": 0.9114331007003784, "learning_rate": 0.0002466829053961989, "loss": 3.1789, "step": 34089 }, { "epoch": 1.67, "grad_norm": 0.5843703746795654, "learning_rate": 0.0002466677543766441, "loss": 3.1093, "step": 34090 }, { "epoch": 1.67, "grad_norm": 0.6175855398178101, "learning_rate": 0.0002466526034975563, "loss": 3.1485, "step": 34091 }, { "epoch": 1.67, "grad_norm": 0.6193850636482239, "learning_rate": 0.0002466374527589751, "loss": 3.0645, "step": 34092 }, { "epoch": 1.67, "grad_norm": 0.6025353670120239, "learning_rate": 0.0002466223021609407, "loss": 2.9797, "step": 34093 }, { "epoch": 1.67, "grad_norm": 0.6327268481254578, "learning_rate": 0.00024660715170349285, "loss": 3.2005, "step": 34094 }, { "epoch": 1.67, "grad_norm": 0.6247721910476685, "learning_rate": 0.0002465920013866714, "loss": 3.0138, "step": 34095 }, { "epoch": 1.67, "grad_norm": 0.6387418508529663, "learning_rate": 0.0002465768512105165, "loss": 3.1151, "step": 34096 }, { "epoch": 1.67, "grad_norm": 0.6263685822486877, "learning_rate": 0.00024656170117506773, "loss": 2.9747, "step": 34097 }, { "epoch": 1.67, "grad_norm": 0.6204245686531067, "learning_rate": 0.00024654655128036526, "loss": 2.9732, "step": 34098 }, { "epoch": 1.67, "grad_norm": 0.5845150947570801, "learning_rate": 0.0002465314015264489, "loss": 3.0868, "step": 34099 }, { "epoch": 1.67, "grad_norm": 0.6110867857933044, "learning_rate": 0.0002465162519133584, "loss": 2.8015, "step": 34100 }, { "epoch": 1.67, "grad_norm": 0.6433117389678955, "learning_rate": 0.0002465011024411339, "loss": 2.9421, "step": 34101 }, { "epoch": 1.67, "grad_norm": 0.6174062490463257, "learning_rate": 0.0002464859531098152, "loss": 2.854, "step": 34102 }, { "epoch": 1.67, "grad_norm": 0.5987910032272339, "learning_rate": 0.0002464708039194423, "loss": 3.1167, "step": 34103 }, { "epoch": 1.67, "grad_norm": 0.6083313226699829, "learning_rate": 0.0002464556548700548, "loss": 3.0495, "step": 34104 }, { "epoch": 1.67, "grad_norm": 0.633455753326416, "learning_rate": 0.0002464405059616929, "loss": 2.8616, "step": 34105 }, { "epoch": 1.67, "grad_norm": 0.6219099760055542, "learning_rate": 0.00024642535719439644, "loss": 3.1337, "step": 34106 }, { "epoch": 1.67, "grad_norm": 0.6289411783218384, "learning_rate": 0.0002464102085682052, "loss": 3.1779, "step": 34107 }, { "epoch": 1.67, "grad_norm": 0.6306152939796448, "learning_rate": 0.00024639506008315926, "loss": 3.1277, "step": 34108 }, { "epoch": 1.67, "grad_norm": 0.6876551508903503, "learning_rate": 0.0002463799117392983, "loss": 3.0214, "step": 34109 }, { "epoch": 1.67, "grad_norm": 0.598209023475647, "learning_rate": 0.00024636476353666234, "loss": 3.0505, "step": 34110 }, { "epoch": 1.67, "grad_norm": 0.645162045955658, "learning_rate": 0.0002463496154752914, "loss": 3.0546, "step": 34111 }, { "epoch": 1.67, "grad_norm": 0.6210871934890747, "learning_rate": 0.0002463344675552251, "loss": 3.1667, "step": 34112 }, { "epoch": 1.67, "grad_norm": 0.5721740126609802, "learning_rate": 0.0002463193197765036, "loss": 2.9053, "step": 34113 }, { "epoch": 1.67, "grad_norm": 0.5755153298377991, "learning_rate": 0.00024630417213916656, "loss": 2.8611, "step": 34114 }, { "epoch": 1.67, "grad_norm": 0.615121066570282, "learning_rate": 0.000246289024643254, "loss": 2.9583, "step": 34115 }, { "epoch": 1.67, "grad_norm": 0.641639232635498, "learning_rate": 0.0002462738772888059, "loss": 3.1712, "step": 34116 }, { "epoch": 1.67, "grad_norm": 0.5973453521728516, "learning_rate": 0.00024625873007586203, "loss": 3.1632, "step": 34117 }, { "epoch": 1.67, "grad_norm": 0.6583412289619446, "learning_rate": 0.0002462435830044624, "loss": 2.9733, "step": 34118 }, { "epoch": 1.67, "grad_norm": 0.6712580323219299, "learning_rate": 0.0002462284360746467, "loss": 3.1107, "step": 34119 }, { "epoch": 1.67, "grad_norm": 0.6605498194694519, "learning_rate": 0.00024621328928645504, "loss": 3.0804, "step": 34120 }, { "epoch": 1.67, "grad_norm": 0.6001577377319336, "learning_rate": 0.0002461981426399272, "loss": 2.8523, "step": 34121 }, { "epoch": 1.67, "grad_norm": 0.6218976974487305, "learning_rate": 0.000246182996135103, "loss": 3.1601, "step": 34122 }, { "epoch": 1.67, "grad_norm": 0.6491854190826416, "learning_rate": 0.0002461678497720226, "loss": 2.9465, "step": 34123 }, { "epoch": 1.67, "grad_norm": 0.6104328036308289, "learning_rate": 0.0002461527035507256, "loss": 3.0921, "step": 34124 }, { "epoch": 1.67, "grad_norm": 0.622700035572052, "learning_rate": 0.00024613755747125216, "loss": 2.9134, "step": 34125 }, { "epoch": 1.67, "grad_norm": 0.6061636805534363, "learning_rate": 0.00024612241153364184, "loss": 3.047, "step": 34126 }, { "epoch": 1.67, "grad_norm": 0.6331983804702759, "learning_rate": 0.0002461072657379348, "loss": 2.7637, "step": 34127 }, { "epoch": 1.67, "grad_norm": 0.6341940760612488, "learning_rate": 0.00024609212008417096, "loss": 3.0107, "step": 34128 }, { "epoch": 1.67, "grad_norm": 0.6093514561653137, "learning_rate": 0.00024607697457238993, "loss": 2.9378, "step": 34129 }, { "epoch": 1.67, "grad_norm": 0.6078907251358032, "learning_rate": 0.0002460618292026319, "loss": 2.928, "step": 34130 }, { "epoch": 1.67, "grad_norm": 0.6402571201324463, "learning_rate": 0.00024604668397493657, "loss": 3.0684, "step": 34131 }, { "epoch": 1.67, "grad_norm": 0.6203433871269226, "learning_rate": 0.0002460315388893438, "loss": 2.8733, "step": 34132 }, { "epoch": 1.67, "grad_norm": 0.6072234511375427, "learning_rate": 0.0002460163939458937, "loss": 3.0711, "step": 34133 }, { "epoch": 1.67, "grad_norm": 0.6317029595375061, "learning_rate": 0.000246001249144626, "loss": 2.9225, "step": 34134 }, { "epoch": 1.67, "grad_norm": 0.6035718321800232, "learning_rate": 0.0002459861044855807, "loss": 3.0206, "step": 34135 }, { "epoch": 1.67, "grad_norm": 0.6024402976036072, "learning_rate": 0.0002459709599687974, "loss": 2.8322, "step": 34136 }, { "epoch": 1.67, "grad_norm": 0.7386209964752197, "learning_rate": 0.00024595581559431627, "loss": 3.1102, "step": 34137 }, { "epoch": 1.67, "grad_norm": 0.646673321723938, "learning_rate": 0.00024594067136217716, "loss": 3.1028, "step": 34138 }, { "epoch": 1.67, "grad_norm": 0.6153397560119629, "learning_rate": 0.00024592552727242, "loss": 3.1711, "step": 34139 }, { "epoch": 1.67, "grad_norm": 0.6744325160980225, "learning_rate": 0.0002459103833250845, "loss": 2.8508, "step": 34140 }, { "epoch": 1.67, "grad_norm": 0.5932391881942749, "learning_rate": 0.0002458952395202106, "loss": 2.9682, "step": 34141 }, { "epoch": 1.67, "grad_norm": 0.570640504360199, "learning_rate": 0.0002458800958578383, "loss": 3.0059, "step": 34142 }, { "epoch": 1.67, "grad_norm": 0.6285528540611267, "learning_rate": 0.00024586495233800743, "loss": 3.0278, "step": 34143 }, { "epoch": 1.67, "grad_norm": 0.6127492189407349, "learning_rate": 0.0002458498089607577, "loss": 3.117, "step": 34144 }, { "epoch": 1.67, "grad_norm": 0.6140125393867493, "learning_rate": 0.00024583466572612934, "loss": 3.2226, "step": 34145 }, { "epoch": 1.67, "grad_norm": 0.6389678716659546, "learning_rate": 0.000245819522634162, "loss": 3.1822, "step": 34146 }, { "epoch": 1.67, "grad_norm": 0.6139385104179382, "learning_rate": 0.0002458043796848955, "loss": 2.8201, "step": 34147 }, { "epoch": 1.67, "grad_norm": 0.6733065843582153, "learning_rate": 0.00024578923687837, "loss": 3.0222, "step": 34148 }, { "epoch": 1.67, "grad_norm": 0.6101844906806946, "learning_rate": 0.0002457740942146251, "loss": 2.8743, "step": 34149 }, { "epoch": 1.67, "grad_norm": 0.5869734883308411, "learning_rate": 0.0002457589516937009, "loss": 3.0927, "step": 34150 }, { "epoch": 1.67, "grad_norm": 0.6247618198394775, "learning_rate": 0.00024574380931563706, "loss": 3.2443, "step": 34151 }, { "epoch": 1.67, "grad_norm": 0.6441221833229065, "learning_rate": 0.00024572866708047367, "loss": 2.9533, "step": 34152 }, { "epoch": 1.67, "grad_norm": 0.6270751357078552, "learning_rate": 0.0002457135249882505, "loss": 3.0383, "step": 34153 }, { "epoch": 1.67, "grad_norm": 0.6131313443183899, "learning_rate": 0.0002456983830390074, "loss": 3.2256, "step": 34154 }, { "epoch": 1.67, "grad_norm": 0.6170026063919067, "learning_rate": 0.0002456832412327844, "loss": 2.9657, "step": 34155 }, { "epoch": 1.67, "grad_norm": 0.6400575637817383, "learning_rate": 0.00024566809956962126, "loss": 3.0925, "step": 34156 }, { "epoch": 1.67, "grad_norm": 0.618373453617096, "learning_rate": 0.00024565295804955783, "loss": 2.9742, "step": 34157 }, { "epoch": 1.67, "grad_norm": 0.67595374584198, "learning_rate": 0.0002456378166726342, "loss": 3.2046, "step": 34158 }, { "epoch": 1.67, "grad_norm": 0.6886033415794373, "learning_rate": 0.0002456226754388899, "loss": 2.941, "step": 34159 }, { "epoch": 1.67, "grad_norm": 0.62932288646698, "learning_rate": 0.00024560753434836515, "loss": 2.9249, "step": 34160 }, { "epoch": 1.67, "grad_norm": 0.6120386123657227, "learning_rate": 0.00024559239340109966, "loss": 3.2161, "step": 34161 }, { "epoch": 1.67, "grad_norm": 0.6127865314483643, "learning_rate": 0.00024557725259713323, "loss": 2.8264, "step": 34162 }, { "epoch": 1.67, "grad_norm": 0.6054245829582214, "learning_rate": 0.00024556211193650597, "loss": 3.1627, "step": 34163 }, { "epoch": 1.67, "grad_norm": 0.6147023439407349, "learning_rate": 0.0002455469714192576, "loss": 3.0502, "step": 34164 }, { "epoch": 1.67, "grad_norm": 0.6629253029823303, "learning_rate": 0.0002455318310454281, "loss": 2.9995, "step": 34165 }, { "epoch": 1.67, "grad_norm": 0.6122319102287292, "learning_rate": 0.0002455166908150571, "loss": 3.0473, "step": 34166 }, { "epoch": 1.67, "grad_norm": 0.6370877027511597, "learning_rate": 0.00024550155072818476, "loss": 2.8365, "step": 34167 }, { "epoch": 1.67, "grad_norm": 0.6131877899169922, "learning_rate": 0.0002454864107848509, "loss": 2.9348, "step": 34168 }, { "epoch": 1.67, "grad_norm": 0.6444746255874634, "learning_rate": 0.0002454712709850952, "loss": 3.127, "step": 34169 }, { "epoch": 1.67, "grad_norm": 0.6460678577423096, "learning_rate": 0.0002454561313289578, "loss": 2.8952, "step": 34170 }, { "epoch": 1.67, "grad_norm": 0.6373785734176636, "learning_rate": 0.00024544099181647843, "loss": 3.0078, "step": 34171 }, { "epoch": 1.67, "grad_norm": 0.6953406929969788, "learning_rate": 0.00024542585244769687, "loss": 3.0514, "step": 34172 }, { "epoch": 1.67, "grad_norm": 0.6235325336456299, "learning_rate": 0.00024541071322265324, "loss": 3.0022, "step": 34173 }, { "epoch": 1.67, "grad_norm": 0.6241675019264221, "learning_rate": 0.00024539557414138724, "loss": 3.1864, "step": 34174 }, { "epoch": 1.67, "grad_norm": 0.6402691602706909, "learning_rate": 0.00024538043520393887, "loss": 2.88, "step": 34175 }, { "epoch": 1.67, "grad_norm": 0.6314650774002075, "learning_rate": 0.0002453652964103478, "loss": 3.0372, "step": 34176 }, { "epoch": 1.67, "grad_norm": 0.6837327480316162, "learning_rate": 0.000245350157760654, "loss": 2.8986, "step": 34177 }, { "epoch": 1.67, "grad_norm": 0.6558158993721008, "learning_rate": 0.0002453350192548975, "loss": 3.008, "step": 34178 }, { "epoch": 1.68, "grad_norm": 0.6023430228233337, "learning_rate": 0.00024531988089311797, "loss": 2.9874, "step": 34179 }, { "epoch": 1.68, "grad_norm": 0.6416671276092529, "learning_rate": 0.00024530474267535547, "loss": 2.7857, "step": 34180 }, { "epoch": 1.68, "grad_norm": 0.6319637298583984, "learning_rate": 0.00024528960460164955, "loss": 2.914, "step": 34181 }, { "epoch": 1.68, "grad_norm": 0.6269749999046326, "learning_rate": 0.0002452744666720404, "loss": 3.0919, "step": 34182 }, { "epoch": 1.68, "grad_norm": 0.6353631615638733, "learning_rate": 0.0002452593288865678, "loss": 2.9123, "step": 34183 }, { "epoch": 1.68, "grad_norm": 0.6360670924186707, "learning_rate": 0.00024524419124527145, "loss": 2.8882, "step": 34184 }, { "epoch": 1.68, "grad_norm": 0.6149998307228088, "learning_rate": 0.0002452290537481915, "loss": 3.2487, "step": 34185 }, { "epoch": 1.68, "grad_norm": 0.5942299365997314, "learning_rate": 0.0002452139163953676, "loss": 3.0568, "step": 34186 }, { "epoch": 1.68, "grad_norm": 0.5962042808532715, "learning_rate": 0.00024519877918683974, "loss": 3.0002, "step": 34187 }, { "epoch": 1.68, "grad_norm": 0.6600490212440491, "learning_rate": 0.00024518364212264775, "loss": 3.2226, "step": 34188 }, { "epoch": 1.68, "grad_norm": 0.6743106842041016, "learning_rate": 0.00024516850520283154, "loss": 3.1296, "step": 34189 }, { "epoch": 1.68, "grad_norm": 0.6211302876472473, "learning_rate": 0.0002451533684274309, "loss": 3.1217, "step": 34190 }, { "epoch": 1.68, "grad_norm": 0.7367480397224426, "learning_rate": 0.00024513823179648565, "loss": 2.9123, "step": 34191 }, { "epoch": 1.68, "grad_norm": 0.6232231855392456, "learning_rate": 0.00024512309531003583, "loss": 3.0276, "step": 34192 }, { "epoch": 1.68, "grad_norm": 0.6443117260932922, "learning_rate": 0.0002451079589681212, "loss": 3.2087, "step": 34193 }, { "epoch": 1.68, "grad_norm": 0.6081936955451965, "learning_rate": 0.0002450928227707816, "loss": 2.993, "step": 34194 }, { "epoch": 1.68, "grad_norm": 0.600371241569519, "learning_rate": 0.00024507768671805705, "loss": 3.051, "step": 34195 }, { "epoch": 1.68, "grad_norm": 0.6255161762237549, "learning_rate": 0.0002450625508099872, "loss": 3.0649, "step": 34196 }, { "epoch": 1.68, "grad_norm": 0.632082462310791, "learning_rate": 0.00024504741504661207, "loss": 2.9378, "step": 34197 }, { "epoch": 1.68, "grad_norm": 0.5944221615791321, "learning_rate": 0.00024503227942797144, "loss": 3.1465, "step": 34198 }, { "epoch": 1.68, "grad_norm": 0.5801829695701599, "learning_rate": 0.00024501714395410514, "loss": 2.8567, "step": 34199 }, { "epoch": 1.68, "grad_norm": 0.5647958517074585, "learning_rate": 0.00024500200862505317, "loss": 3.0342, "step": 34200 }, { "epoch": 1.68, "grad_norm": 0.6428757309913635, "learning_rate": 0.0002449868734408554, "loss": 3.0151, "step": 34201 }, { "epoch": 1.68, "grad_norm": 0.5983733534812927, "learning_rate": 0.00024497173840155157, "loss": 2.9189, "step": 34202 }, { "epoch": 1.68, "grad_norm": 0.6872225999832153, "learning_rate": 0.0002449566035071815, "loss": 2.9386, "step": 34203 }, { "epoch": 1.68, "grad_norm": 0.6514347195625305, "learning_rate": 0.00024494146875778523, "loss": 2.9179, "step": 34204 }, { "epoch": 1.68, "grad_norm": 0.6127699613571167, "learning_rate": 0.00024492633415340256, "loss": 2.8151, "step": 34205 }, { "epoch": 1.68, "grad_norm": 0.6513962149620056, "learning_rate": 0.0002449111996940732, "loss": 2.9692, "step": 34206 }, { "epoch": 1.68, "grad_norm": 0.6389971971511841, "learning_rate": 0.00024489606537983727, "loss": 2.9922, "step": 34207 }, { "epoch": 1.68, "grad_norm": 0.6056563854217529, "learning_rate": 0.00024488093121073445, "loss": 2.9475, "step": 34208 }, { "epoch": 1.68, "grad_norm": 0.6097168326377869, "learning_rate": 0.0002448657971868046, "loss": 2.9191, "step": 34209 }, { "epoch": 1.68, "grad_norm": 0.640527069568634, "learning_rate": 0.0002448506633080877, "loss": 3.0529, "step": 34210 }, { "epoch": 1.68, "grad_norm": 0.6365799307823181, "learning_rate": 0.00024483552957462357, "loss": 2.9739, "step": 34211 }, { "epoch": 1.68, "grad_norm": 0.6156507730484009, "learning_rate": 0.00024482039598645205, "loss": 2.9809, "step": 34212 }, { "epoch": 1.68, "grad_norm": 0.6057137846946716, "learning_rate": 0.00024480526254361284, "loss": 3.0669, "step": 34213 }, { "epoch": 1.68, "grad_norm": 0.6164252758026123, "learning_rate": 0.000244790129246146, "loss": 3.0025, "step": 34214 }, { "epoch": 1.68, "grad_norm": 0.6119879484176636, "learning_rate": 0.00024477499609409144, "loss": 3.0289, "step": 34215 }, { "epoch": 1.68, "grad_norm": 0.6054874062538147, "learning_rate": 0.00024475986308748873, "loss": 3.1761, "step": 34216 }, { "epoch": 1.68, "grad_norm": 0.6626514196395874, "learning_rate": 0.0002447447302263781, "loss": 3.0249, "step": 34217 }, { "epoch": 1.68, "grad_norm": 0.6329224109649658, "learning_rate": 0.0002447295975107991, "loss": 2.9464, "step": 34218 }, { "epoch": 1.68, "grad_norm": 0.6414870619773865, "learning_rate": 0.0002447144649407917, "loss": 3.1747, "step": 34219 }, { "epoch": 1.68, "grad_norm": 0.6235871315002441, "learning_rate": 0.00024469933251639584, "loss": 3.0888, "step": 34220 }, { "epoch": 1.68, "grad_norm": 0.6273425221443176, "learning_rate": 0.00024468420023765117, "loss": 3.0888, "step": 34221 }, { "epoch": 1.68, "grad_norm": 0.6260068416595459, "learning_rate": 0.00024466906810459785, "loss": 3.174, "step": 34222 }, { "epoch": 1.68, "grad_norm": 0.721221923828125, "learning_rate": 0.00024465393611727544, "loss": 2.8587, "step": 34223 }, { "epoch": 1.68, "grad_norm": 0.5994982719421387, "learning_rate": 0.0002446388042757238, "loss": 2.8307, "step": 34224 }, { "epoch": 1.68, "grad_norm": 0.5987682342529297, "learning_rate": 0.0002446236725799831, "loss": 3.0374, "step": 34225 }, { "epoch": 1.68, "grad_norm": 0.6004258990287781, "learning_rate": 0.0002446085410300929, "loss": 2.8079, "step": 34226 }, { "epoch": 1.68, "grad_norm": 0.5990030169487, "learning_rate": 0.0002445934096260932, "loss": 3.0825, "step": 34227 }, { "epoch": 1.68, "grad_norm": 0.7027409076690674, "learning_rate": 0.0002445782783680237, "loss": 2.9353, "step": 34228 }, { "epoch": 1.68, "grad_norm": 0.6076624989509583, "learning_rate": 0.0002445631472559244, "loss": 3.0427, "step": 34229 }, { "epoch": 1.68, "grad_norm": 0.6400550603866577, "learning_rate": 0.0002445480162898352, "loss": 3.0594, "step": 34230 }, { "epoch": 1.68, "grad_norm": 0.5895517468452454, "learning_rate": 0.0002445328854697957, "loss": 3.1673, "step": 34231 }, { "epoch": 1.68, "grad_norm": 0.6176828742027283, "learning_rate": 0.000244517754795846, "loss": 3.1844, "step": 34232 }, { "epoch": 1.68, "grad_norm": 0.6322025060653687, "learning_rate": 0.0002445026242680258, "loss": 3.1415, "step": 34233 }, { "epoch": 1.68, "grad_norm": 0.6357572078704834, "learning_rate": 0.00024448749388637493, "loss": 2.9114, "step": 34234 }, { "epoch": 1.68, "grad_norm": 0.6500570178031921, "learning_rate": 0.0002444723636509335, "loss": 2.9372, "step": 34235 }, { "epoch": 1.68, "grad_norm": 0.6248914003372192, "learning_rate": 0.0002444572335617411, "loss": 2.9285, "step": 34236 }, { "epoch": 1.68, "grad_norm": 0.5908624529838562, "learning_rate": 0.0002444421036188377, "loss": 3.1703, "step": 34237 }, { "epoch": 1.68, "grad_norm": 0.6131687760353088, "learning_rate": 0.00024442697382226296, "loss": 2.8722, "step": 34238 }, { "epoch": 1.68, "grad_norm": 0.6797118186950684, "learning_rate": 0.00024441184417205684, "loss": 3.0294, "step": 34239 }, { "epoch": 1.68, "grad_norm": 0.6466017365455627, "learning_rate": 0.00024439671466825944, "loss": 2.9447, "step": 34240 }, { "epoch": 1.68, "grad_norm": 0.6415280699729919, "learning_rate": 0.0002443815853109103, "loss": 3.1305, "step": 34241 }, { "epoch": 1.68, "grad_norm": 0.613725483417511, "learning_rate": 0.0002443664561000494, "loss": 2.773, "step": 34242 }, { "epoch": 1.68, "grad_norm": 0.6166124939918518, "learning_rate": 0.00024435132703571645, "loss": 2.8056, "step": 34243 }, { "epoch": 1.68, "grad_norm": 0.6828124523162842, "learning_rate": 0.00024433619811795146, "loss": 2.9771, "step": 34244 }, { "epoch": 1.68, "grad_norm": 0.6250672936439514, "learning_rate": 0.00024432106934679424, "loss": 3.0102, "step": 34245 }, { "epoch": 1.68, "grad_norm": 0.6510005593299866, "learning_rate": 0.0002443059407222845, "loss": 2.931, "step": 34246 }, { "epoch": 1.68, "grad_norm": 0.6244876980781555, "learning_rate": 0.0002442908122444623, "loss": 2.9292, "step": 34247 }, { "epoch": 1.68, "grad_norm": 0.631175696849823, "learning_rate": 0.0002442756839133674, "loss": 3.1205, "step": 34248 }, { "epoch": 1.68, "grad_norm": 0.6444908380508423, "learning_rate": 0.0002442605557290395, "loss": 3.013, "step": 34249 }, { "epoch": 1.68, "grad_norm": 0.6660930514335632, "learning_rate": 0.0002442454276915187, "loss": 3.1253, "step": 34250 }, { "epoch": 1.68, "grad_norm": 0.6830661296844482, "learning_rate": 0.00024423029980084464, "loss": 3.1829, "step": 34251 }, { "epoch": 1.68, "grad_norm": 0.6448426246643066, "learning_rate": 0.00024421517205705734, "loss": 3.0845, "step": 34252 }, { "epoch": 1.68, "grad_norm": 0.6062400341033936, "learning_rate": 0.00024420004446019635, "loss": 3.12, "step": 34253 }, { "epoch": 1.68, "grad_norm": 0.5658940672874451, "learning_rate": 0.00024418491701030183, "loss": 2.9393, "step": 34254 }, { "epoch": 1.68, "grad_norm": 0.6128112077713013, "learning_rate": 0.00024416978970741353, "loss": 2.9671, "step": 34255 }, { "epoch": 1.68, "grad_norm": 0.5911213159561157, "learning_rate": 0.0002441546625515711, "loss": 3.0101, "step": 34256 }, { "epoch": 1.68, "grad_norm": 0.6012565493583679, "learning_rate": 0.00024413953554281473, "loss": 2.9874, "step": 34257 }, { "epoch": 1.68, "grad_norm": 0.6286384463310242, "learning_rate": 0.00024412440868118397, "loss": 2.9793, "step": 34258 }, { "epoch": 1.68, "grad_norm": 0.6148403286933899, "learning_rate": 0.0002441092819667187, "loss": 2.9738, "step": 34259 }, { "epoch": 1.68, "grad_norm": 0.616051435470581, "learning_rate": 0.00024409415539945896, "loss": 3.0531, "step": 34260 }, { "epoch": 1.68, "grad_norm": 0.5852324962615967, "learning_rate": 0.00024407902897944435, "loss": 3.0542, "step": 34261 }, { "epoch": 1.68, "grad_norm": 0.6138484477996826, "learning_rate": 0.00024406390270671492, "loss": 3.1578, "step": 34262 }, { "epoch": 1.68, "grad_norm": 0.6208502054214478, "learning_rate": 0.00024404877658131032, "loss": 3.067, "step": 34263 }, { "epoch": 1.68, "grad_norm": 0.6173650026321411, "learning_rate": 0.00024403365060327045, "loss": 2.8049, "step": 34264 }, { "epoch": 1.68, "grad_norm": 0.6221855878829956, "learning_rate": 0.0002440185247726353, "loss": 3.171, "step": 34265 }, { "epoch": 1.68, "grad_norm": 0.6358213424682617, "learning_rate": 0.0002440033990894445, "loss": 3.2996, "step": 34266 }, { "epoch": 1.68, "grad_norm": 0.5805011987686157, "learning_rate": 0.00024398827355373803, "loss": 3.1994, "step": 34267 }, { "epoch": 1.68, "grad_norm": 0.6460946798324585, "learning_rate": 0.00024397314816555557, "loss": 2.9848, "step": 34268 }, { "epoch": 1.68, "grad_norm": 0.6628650426864624, "learning_rate": 0.00024395802292493708, "loss": 2.9467, "step": 34269 }, { "epoch": 1.68, "grad_norm": 0.6098015308380127, "learning_rate": 0.00024394289783192245, "loss": 3.0061, "step": 34270 }, { "epoch": 1.68, "grad_norm": 0.6058465242385864, "learning_rate": 0.00024392777288655132, "loss": 2.85, "step": 34271 }, { "epoch": 1.68, "grad_norm": 0.6222949624061584, "learning_rate": 0.0002439126480888638, "loss": 3.2935, "step": 34272 }, { "epoch": 1.68, "grad_norm": 0.6029030084609985, "learning_rate": 0.00024389752343889943, "loss": 3.1337, "step": 34273 }, { "epoch": 1.68, "grad_norm": 0.5769012570381165, "learning_rate": 0.00024388239893669833, "loss": 2.9523, "step": 34274 }, { "epoch": 1.68, "grad_norm": 0.6386813521385193, "learning_rate": 0.0002438672745823, "loss": 3.0349, "step": 34275 }, { "epoch": 1.68, "grad_norm": 0.6211322546005249, "learning_rate": 0.00024385215037574457, "loss": 3.046, "step": 34276 }, { "epoch": 1.68, "grad_norm": 0.6359053254127502, "learning_rate": 0.00024383702631707182, "loss": 2.9339, "step": 34277 }, { "epoch": 1.68, "grad_norm": 0.6204125285148621, "learning_rate": 0.0002438219024063214, "loss": 3.1282, "step": 34278 }, { "epoch": 1.68, "grad_norm": 0.6863940358161926, "learning_rate": 0.00024380677864353344, "loss": 3.1766, "step": 34279 }, { "epoch": 1.68, "grad_norm": 0.6086069345474243, "learning_rate": 0.00024379165502874743, "loss": 2.8825, "step": 34280 }, { "epoch": 1.68, "grad_norm": 0.6797551512718201, "learning_rate": 0.00024377653156200347, "loss": 2.948, "step": 34281 }, { "epoch": 1.68, "grad_norm": 0.6163539290428162, "learning_rate": 0.0002437614082433414, "loss": 3.045, "step": 34282 }, { "epoch": 1.68, "grad_norm": 0.6408080458641052, "learning_rate": 0.0002437462850728008, "loss": 3.0271, "step": 34283 }, { "epoch": 1.68, "grad_norm": 0.6323572993278503, "learning_rate": 0.00024373116205042176, "loss": 2.9313, "step": 34284 }, { "epoch": 1.68, "grad_norm": 0.611010730266571, "learning_rate": 0.00024371603917624398, "loss": 3.0656, "step": 34285 }, { "epoch": 1.68, "grad_norm": 0.615911602973938, "learning_rate": 0.00024370091645030726, "loss": 3.2463, "step": 34286 }, { "epoch": 1.68, "grad_norm": 0.6529790163040161, "learning_rate": 0.0002436857938726516, "loss": 2.9683, "step": 34287 }, { "epoch": 1.68, "grad_norm": 0.64628005027771, "learning_rate": 0.00024367067144331667, "loss": 2.9319, "step": 34288 }, { "epoch": 1.68, "grad_norm": 0.5937123894691467, "learning_rate": 0.00024365554916234243, "loss": 3.0921, "step": 34289 }, { "epoch": 1.68, "grad_norm": 0.6053999066352844, "learning_rate": 0.00024364042702976847, "loss": 2.9979, "step": 34290 }, { "epoch": 1.68, "grad_norm": 0.635191023349762, "learning_rate": 0.00024362530504563486, "loss": 2.9769, "step": 34291 }, { "epoch": 1.68, "grad_norm": 0.6321050524711609, "learning_rate": 0.00024361018320998137, "loss": 2.9325, "step": 34292 }, { "epoch": 1.68, "grad_norm": 0.6223572492599487, "learning_rate": 0.00024359506152284776, "loss": 3.0409, "step": 34293 }, { "epoch": 1.68, "grad_norm": 0.6281905770301819, "learning_rate": 0.00024357993998427396, "loss": 3.1466, "step": 34294 }, { "epoch": 1.68, "grad_norm": 0.5953301191329956, "learning_rate": 0.0002435648185942997, "loss": 3.0765, "step": 34295 }, { "epoch": 1.68, "grad_norm": 0.6165432333946228, "learning_rate": 0.0002435496973529648, "loss": 3.0896, "step": 34296 }, { "epoch": 1.68, "grad_norm": 0.5897827744483948, "learning_rate": 0.00024353457626030922, "loss": 2.9357, "step": 34297 }, { "epoch": 1.68, "grad_norm": 0.6467866897583008, "learning_rate": 0.00024351945531637267, "loss": 2.8904, "step": 34298 }, { "epoch": 1.68, "grad_norm": 0.6166879534721375, "learning_rate": 0.00024350433452119505, "loss": 3.0529, "step": 34299 }, { "epoch": 1.68, "grad_norm": 0.6443753242492676, "learning_rate": 0.00024348921387481602, "loss": 2.7317, "step": 34300 }, { "epoch": 1.68, "grad_norm": 0.755827009677887, "learning_rate": 0.00024347409337727554, "loss": 3.1424, "step": 34301 }, { "epoch": 1.68, "grad_norm": 0.5986933708190918, "learning_rate": 0.00024345897302861355, "loss": 3.1745, "step": 34302 }, { "epoch": 1.68, "grad_norm": 0.6184709668159485, "learning_rate": 0.00024344385282886962, "loss": 3.0542, "step": 34303 }, { "epoch": 1.68, "grad_norm": 0.9942894577980042, "learning_rate": 0.0002434287327780838, "loss": 3.0583, "step": 34304 }, { "epoch": 1.68, "grad_norm": 0.6022435426712036, "learning_rate": 0.00024341361287629572, "loss": 3.154, "step": 34305 }, { "epoch": 1.68, "grad_norm": 0.6629198789596558, "learning_rate": 0.0002433984931235453, "loss": 3.0552, "step": 34306 }, { "epoch": 1.68, "grad_norm": 0.5723194479942322, "learning_rate": 0.00024338337351987246, "loss": 3.1065, "step": 34307 }, { "epoch": 1.68, "grad_norm": 0.6211422085762024, "learning_rate": 0.00024336825406531676, "loss": 3.1316, "step": 34308 }, { "epoch": 1.68, "grad_norm": 0.6164812445640564, "learning_rate": 0.00024335313475991834, "loss": 2.9869, "step": 34309 }, { "epoch": 1.68, "grad_norm": 1.0711008310317993, "learning_rate": 0.00024333801560371676, "loss": 2.9982, "step": 34310 }, { "epoch": 1.68, "grad_norm": 0.6362112760543823, "learning_rate": 0.00024332289659675192, "loss": 3.0657, "step": 34311 }, { "epoch": 1.68, "grad_norm": 0.5985422134399414, "learning_rate": 0.00024330777773906375, "loss": 3.1279, "step": 34312 }, { "epoch": 1.68, "grad_norm": 0.6208683848381042, "learning_rate": 0.00024329265903069193, "loss": 2.8673, "step": 34313 }, { "epoch": 1.68, "grad_norm": 0.6138306856155396, "learning_rate": 0.00024327754047167641, "loss": 3.024, "step": 34314 }, { "epoch": 1.68, "grad_norm": 0.5941352844238281, "learning_rate": 0.00024326242206205678, "loss": 2.8803, "step": 34315 }, { "epoch": 1.68, "grad_norm": 0.6221591830253601, "learning_rate": 0.0002432473038018731, "loss": 3.1186, "step": 34316 }, { "epoch": 1.68, "grad_norm": 0.6871387958526611, "learning_rate": 0.00024323218569116515, "loss": 3.0352, "step": 34317 }, { "epoch": 1.68, "grad_norm": 0.5821581482887268, "learning_rate": 0.0002432170677299726, "loss": 3.2781, "step": 34318 }, { "epoch": 1.68, "grad_norm": 0.6137875914573669, "learning_rate": 0.00024320194991833544, "loss": 2.8125, "step": 34319 }, { "epoch": 1.68, "grad_norm": 0.6038451194763184, "learning_rate": 0.0002431868322562934, "loss": 3.1545, "step": 34320 }, { "epoch": 1.68, "grad_norm": 0.6101359128952026, "learning_rate": 0.00024317171474388618, "loss": 3.026, "step": 34321 }, { "epoch": 1.68, "grad_norm": 0.7233001589775085, "learning_rate": 0.00024315659738115392, "loss": 2.9277, "step": 34322 }, { "epoch": 1.68, "grad_norm": 0.6210648417472839, "learning_rate": 0.0002431414801681361, "loss": 3.0142, "step": 34323 }, { "epoch": 1.68, "grad_norm": 0.598692774772644, "learning_rate": 0.00024312636310487278, "loss": 2.9345, "step": 34324 }, { "epoch": 1.68, "grad_norm": 0.5950759053230286, "learning_rate": 0.00024311124619140365, "loss": 3.062, "step": 34325 }, { "epoch": 1.68, "grad_norm": 0.5895894765853882, "learning_rate": 0.00024309612942776846, "loss": 3.2376, "step": 34326 }, { "epoch": 1.68, "grad_norm": 0.638976514339447, "learning_rate": 0.00024308101281400726, "loss": 2.917, "step": 34327 }, { "epoch": 1.68, "grad_norm": 0.6100997924804688, "learning_rate": 0.00024306589635015961, "loss": 3.0652, "step": 34328 }, { "epoch": 1.68, "grad_norm": 0.6079884767532349, "learning_rate": 0.00024305078003626553, "loss": 3.0837, "step": 34329 }, { "epoch": 1.68, "grad_norm": 0.5833554267883301, "learning_rate": 0.0002430356638723646, "loss": 3.0955, "step": 34330 }, { "epoch": 1.68, "grad_norm": 0.5608810782432556, "learning_rate": 0.00024302054785849684, "loss": 3.0168, "step": 34331 }, { "epoch": 1.68, "grad_norm": 0.6720162630081177, "learning_rate": 0.00024300543199470208, "loss": 3.0969, "step": 34332 }, { "epoch": 1.68, "grad_norm": 0.6119675636291504, "learning_rate": 0.00024299031628101988, "loss": 3.1501, "step": 34333 }, { "epoch": 1.68, "grad_norm": 0.6166000366210938, "learning_rate": 0.00024297520071749033, "loss": 3.1214, "step": 34334 }, { "epoch": 1.68, "grad_norm": 0.7879331707954407, "learning_rate": 0.00024296008530415308, "loss": 3.0078, "step": 34335 }, { "epoch": 1.68, "grad_norm": 0.630114734172821, "learning_rate": 0.0002429449700410479, "loss": 3.1951, "step": 34336 }, { "epoch": 1.68, "grad_norm": 0.6173840761184692, "learning_rate": 0.00024292985492821488, "loss": 3.1601, "step": 34337 }, { "epoch": 1.68, "grad_norm": 0.6316830515861511, "learning_rate": 0.0002429147399656935, "loss": 3.086, "step": 34338 }, { "epoch": 1.68, "grad_norm": 0.6645943522453308, "learning_rate": 0.00024289962515352384, "loss": 2.9076, "step": 34339 }, { "epoch": 1.68, "grad_norm": 0.6618261933326721, "learning_rate": 0.0002428845104917454, "loss": 3.1904, "step": 34340 }, { "epoch": 1.68, "grad_norm": 0.6228432655334473, "learning_rate": 0.0002428693959803982, "loss": 3.0376, "step": 34341 }, { "epoch": 1.68, "grad_norm": 0.6474012732505798, "learning_rate": 0.00024285428161952214, "loss": 3.1362, "step": 34342 }, { "epoch": 1.68, "grad_norm": 0.5851452350616455, "learning_rate": 0.0002428391674091568, "loss": 3.0867, "step": 34343 }, { "epoch": 1.68, "grad_norm": 0.6338858604431152, "learning_rate": 0.00024282405334934221, "loss": 3.013, "step": 34344 }, { "epoch": 1.68, "grad_norm": 0.6032165288925171, "learning_rate": 0.0002428089394401179, "loss": 3.1117, "step": 34345 }, { "epoch": 1.68, "grad_norm": 0.6296061873435974, "learning_rate": 0.00024279382568152393, "loss": 2.954, "step": 34346 }, { "epoch": 1.68, "grad_norm": 0.5732358694076538, "learning_rate": 0.00024277871207360003, "loss": 2.9257, "step": 34347 }, { "epoch": 1.68, "grad_norm": 0.6399105787277222, "learning_rate": 0.0002427635986163859, "loss": 2.9714, "step": 34348 }, { "epoch": 1.68, "grad_norm": 0.6738459467887878, "learning_rate": 0.00024274848530992155, "loss": 2.9373, "step": 34349 }, { "epoch": 1.68, "grad_norm": 0.6233891844749451, "learning_rate": 0.0002427333721542466, "loss": 3.1315, "step": 34350 }, { "epoch": 1.68, "grad_norm": 0.6309840083122253, "learning_rate": 0.00024271825914940102, "loss": 3.0287, "step": 34351 }, { "epoch": 1.68, "grad_norm": 0.6268077492713928, "learning_rate": 0.00024270314629542435, "loss": 3.0762, "step": 34352 }, { "epoch": 1.68, "grad_norm": 0.5958818793296814, "learning_rate": 0.00024268803359235667, "loss": 2.9662, "step": 34353 }, { "epoch": 1.68, "grad_norm": 0.6950878500938416, "learning_rate": 0.0002426729210402377, "loss": 2.8479, "step": 34354 }, { "epoch": 1.68, "grad_norm": 0.6276915073394775, "learning_rate": 0.00024265780863910716, "loss": 3.1364, "step": 34355 }, { "epoch": 1.68, "grad_norm": 0.647860050201416, "learning_rate": 0.00024264269638900497, "loss": 2.5724, "step": 34356 }, { "epoch": 1.68, "grad_norm": 0.599606454372406, "learning_rate": 0.00024262758428997088, "loss": 2.9408, "step": 34357 }, { "epoch": 1.68, "grad_norm": 0.6251691579818726, "learning_rate": 0.0002426124723420446, "loss": 3.0815, "step": 34358 }, { "epoch": 1.68, "grad_norm": 0.597493052482605, "learning_rate": 0.00024259736054526614, "loss": 2.9994, "step": 34359 }, { "epoch": 1.68, "grad_norm": 0.6401041150093079, "learning_rate": 0.00024258224889967512, "loss": 3.0447, "step": 34360 }, { "epoch": 1.68, "grad_norm": 0.6328083276748657, "learning_rate": 0.00024256713740531148, "loss": 2.9255, "step": 34361 }, { "epoch": 1.68, "grad_norm": 0.6329466700553894, "learning_rate": 0.00024255202606221485, "loss": 2.923, "step": 34362 }, { "epoch": 1.68, "grad_norm": 0.6171144247055054, "learning_rate": 0.00024253691487042511, "loss": 2.8842, "step": 34363 }, { "epoch": 1.68, "grad_norm": 0.6171637773513794, "learning_rate": 0.00024252180382998217, "loss": 3.1929, "step": 34364 }, { "epoch": 1.68, "grad_norm": 0.6588241457939148, "learning_rate": 0.0002425066929409257, "loss": 2.9894, "step": 34365 }, { "epoch": 1.68, "grad_norm": 0.6128795742988586, "learning_rate": 0.0002424915822032956, "loss": 2.8934, "step": 34366 }, { "epoch": 1.68, "grad_norm": 0.584743082523346, "learning_rate": 0.00024247647161713148, "loss": 3.0518, "step": 34367 }, { "epoch": 1.68, "grad_norm": 0.6354223489761353, "learning_rate": 0.00024246136118247333, "loss": 2.8951, "step": 34368 }, { "epoch": 1.68, "grad_norm": 0.6299620866775513, "learning_rate": 0.00024244625089936091, "loss": 2.9908, "step": 34369 }, { "epoch": 1.68, "grad_norm": 0.6883636713027954, "learning_rate": 0.0002424311407678339, "loss": 2.9329, "step": 34370 }, { "epoch": 1.68, "grad_norm": 0.5922103524208069, "learning_rate": 0.0002424160307879323, "loss": 3.091, "step": 34371 }, { "epoch": 1.68, "grad_norm": 0.6945319175720215, "learning_rate": 0.0002424009209596957, "loss": 3.1215, "step": 34372 }, { "epoch": 1.68, "grad_norm": 0.6298499703407288, "learning_rate": 0.00024238581128316396, "loss": 3.0817, "step": 34373 }, { "epoch": 1.68, "grad_norm": 0.632078230381012, "learning_rate": 0.00024237070175837705, "loss": 3.1141, "step": 34374 }, { "epoch": 1.68, "grad_norm": 0.633488118648529, "learning_rate": 0.0002423555923853745, "loss": 2.97, "step": 34375 }, { "epoch": 1.68, "grad_norm": 0.6549461483955383, "learning_rate": 0.00024234048316419633, "loss": 3.0173, "step": 34376 }, { "epoch": 1.68, "grad_norm": 0.6033638715744019, "learning_rate": 0.0002423253740948821, "loss": 2.9334, "step": 34377 }, { "epoch": 1.68, "grad_norm": 0.6423242688179016, "learning_rate": 0.00024231026517747178, "loss": 3.1711, "step": 34378 }, { "epoch": 1.68, "grad_norm": 0.586613118648529, "learning_rate": 0.0002422951564120052, "loss": 2.9276, "step": 34379 }, { "epoch": 1.68, "grad_norm": 0.6340028643608093, "learning_rate": 0.00024228004779852194, "loss": 3.0458, "step": 34380 }, { "epoch": 1.68, "grad_norm": 0.6134786009788513, "learning_rate": 0.00024226493933706208, "loss": 3.0783, "step": 34381 }, { "epoch": 1.68, "grad_norm": 0.641250491142273, "learning_rate": 0.00024224983102766516, "loss": 3.0496, "step": 34382 }, { "epoch": 1.69, "grad_norm": 0.6067681908607483, "learning_rate": 0.000242234722870371, "loss": 2.8943, "step": 34383 }, { "epoch": 1.69, "grad_norm": 0.5972951650619507, "learning_rate": 0.00024221961486521962, "loss": 3.0228, "step": 34384 }, { "epoch": 1.69, "grad_norm": 0.6391837000846863, "learning_rate": 0.0002422045070122505, "loss": 2.9397, "step": 34385 }, { "epoch": 1.69, "grad_norm": 0.5825820565223694, "learning_rate": 0.00024218939931150375, "loss": 3.1374, "step": 34386 }, { "epoch": 1.69, "grad_norm": 0.6349732875823975, "learning_rate": 0.00024217429176301889, "loss": 2.9957, "step": 34387 }, { "epoch": 1.69, "grad_norm": 0.6446109414100647, "learning_rate": 0.00024215918436683577, "loss": 3.1967, "step": 34388 }, { "epoch": 1.69, "grad_norm": 0.636913001537323, "learning_rate": 0.00024214407712299437, "loss": 3.1058, "step": 34389 }, { "epoch": 1.69, "grad_norm": 0.5909577012062073, "learning_rate": 0.00024212897003153425, "loss": 3.0518, "step": 34390 }, { "epoch": 1.69, "grad_norm": 0.6335484981536865, "learning_rate": 0.00024211386309249539, "loss": 3.0988, "step": 34391 }, { "epoch": 1.69, "grad_norm": 0.6210086941719055, "learning_rate": 0.0002420987563059173, "loss": 2.9179, "step": 34392 }, { "epoch": 1.69, "grad_norm": 0.6080520153045654, "learning_rate": 0.00024208364967184, "loss": 3.227, "step": 34393 }, { "epoch": 1.69, "grad_norm": 0.6113629341125488, "learning_rate": 0.00024206854319030333, "loss": 2.9451, "step": 34394 }, { "epoch": 1.69, "grad_norm": 0.6378176808357239, "learning_rate": 0.00024205343686134682, "loss": 3.1103, "step": 34395 }, { "epoch": 1.69, "grad_norm": 0.6006256341934204, "learning_rate": 0.00024203833068501052, "loss": 2.9884, "step": 34396 }, { "epoch": 1.69, "grad_norm": 0.6118775010108948, "learning_rate": 0.00024202322466133406, "loss": 2.7407, "step": 34397 }, { "epoch": 1.69, "grad_norm": 0.6467645168304443, "learning_rate": 0.00024200811879035718, "loss": 2.7859, "step": 34398 }, { "epoch": 1.69, "grad_norm": 0.6246896386146545, "learning_rate": 0.00024199301307211988, "loss": 3.0706, "step": 34399 }, { "epoch": 1.69, "grad_norm": 0.6289390325546265, "learning_rate": 0.00024197790750666177, "loss": 3.0411, "step": 34400 }, { "epoch": 1.69, "grad_norm": 0.6309362649917603, "learning_rate": 0.00024196280209402276, "loss": 2.9929, "step": 34401 }, { "epoch": 1.69, "grad_norm": 0.6463305950164795, "learning_rate": 0.0002419476968342424, "loss": 3.0134, "step": 34402 }, { "epoch": 1.69, "grad_norm": 0.6127621531486511, "learning_rate": 0.00024193259172736067, "loss": 3.0831, "step": 34403 }, { "epoch": 1.69, "grad_norm": 0.6621184945106506, "learning_rate": 0.00024191748677341744, "loss": 2.9493, "step": 34404 }, { "epoch": 1.69, "grad_norm": 0.6097039580345154, "learning_rate": 0.0002419023819724523, "loss": 3.0993, "step": 34405 }, { "epoch": 1.69, "grad_norm": 0.660682201385498, "learning_rate": 0.00024188727732450517, "loss": 3.2129, "step": 34406 }, { "epoch": 1.69, "grad_norm": 0.6187965869903564, "learning_rate": 0.00024187217282961563, "loss": 2.8815, "step": 34407 }, { "epoch": 1.69, "grad_norm": 0.5884891152381897, "learning_rate": 0.00024185706848782365, "loss": 3.0827, "step": 34408 }, { "epoch": 1.69, "grad_norm": 0.5681439638137817, "learning_rate": 0.00024184196429916904, "loss": 3.1114, "step": 34409 }, { "epoch": 1.69, "grad_norm": 0.6478064060211182, "learning_rate": 0.00024182686026369142, "loss": 3.0638, "step": 34410 }, { "epoch": 1.69, "grad_norm": 0.6609353423118591, "learning_rate": 0.00024181175638143072, "loss": 3.0618, "step": 34411 }, { "epoch": 1.69, "grad_norm": 0.6224457025527954, "learning_rate": 0.0002417966526524266, "loss": 2.9672, "step": 34412 }, { "epoch": 1.69, "grad_norm": 0.6122366189956665, "learning_rate": 0.00024178154907671887, "loss": 2.8498, "step": 34413 }, { "epoch": 1.69, "grad_norm": 0.6315528154373169, "learning_rate": 0.00024176644565434744, "loss": 2.8217, "step": 34414 }, { "epoch": 1.69, "grad_norm": 0.714515209197998, "learning_rate": 0.0002417513423853519, "loss": 2.774, "step": 34415 }, { "epoch": 1.69, "grad_norm": 0.6689561009407043, "learning_rate": 0.00024173623926977223, "loss": 2.9304, "step": 34416 }, { "epoch": 1.69, "grad_norm": 0.6164539456367493, "learning_rate": 0.00024172113630764792, "loss": 3.1242, "step": 34417 }, { "epoch": 1.69, "grad_norm": 0.6706748604774475, "learning_rate": 0.000241706033499019, "loss": 3.3955, "step": 34418 }, { "epoch": 1.69, "grad_norm": 0.5987676382064819, "learning_rate": 0.00024169093084392524, "loss": 3.03, "step": 34419 }, { "epoch": 1.69, "grad_norm": 0.6364895105361938, "learning_rate": 0.0002416758283424062, "loss": 3.2174, "step": 34420 }, { "epoch": 1.69, "grad_norm": 0.755660891532898, "learning_rate": 0.00024166072599450192, "loss": 2.9115, "step": 34421 }, { "epoch": 1.69, "grad_norm": 0.6371123194694519, "learning_rate": 0.00024164562380025203, "loss": 3.0867, "step": 34422 }, { "epoch": 1.69, "grad_norm": 0.6260061264038086, "learning_rate": 0.00024163052175969627, "loss": 2.9373, "step": 34423 }, { "epoch": 1.69, "grad_norm": 0.601723849773407, "learning_rate": 0.00024161541987287457, "loss": 3.0002, "step": 34424 }, { "epoch": 1.69, "grad_norm": 0.6575174331665039, "learning_rate": 0.00024160031813982657, "loss": 3.0804, "step": 34425 }, { "epoch": 1.69, "grad_norm": 0.6266413927078247, "learning_rate": 0.00024158521656059213, "loss": 3.0032, "step": 34426 }, { "epoch": 1.69, "grad_norm": 0.6671347618103027, "learning_rate": 0.000241570115135211, "loss": 2.9415, "step": 34427 }, { "epoch": 1.69, "grad_norm": 0.6078870296478271, "learning_rate": 0.00024155501386372282, "loss": 2.9793, "step": 34428 }, { "epoch": 1.69, "grad_norm": 0.6798886060714722, "learning_rate": 0.00024153991274616763, "loss": 2.7276, "step": 34429 }, { "epoch": 1.69, "grad_norm": 0.6024205088615417, "learning_rate": 0.00024152481178258501, "loss": 3.1548, "step": 34430 }, { "epoch": 1.69, "grad_norm": 0.6181690692901611, "learning_rate": 0.00024150971097301486, "loss": 2.8255, "step": 34431 }, { "epoch": 1.69, "grad_norm": 0.6173415780067444, "learning_rate": 0.00024149461031749668, "loss": 3.0516, "step": 34432 }, { "epoch": 1.69, "grad_norm": 0.6175852417945862, "learning_rate": 0.00024147950981607064, "loss": 2.9044, "step": 34433 }, { "epoch": 1.69, "grad_norm": 0.6466128826141357, "learning_rate": 0.0002414644094687762, "loss": 2.893, "step": 34434 }, { "epoch": 1.69, "grad_norm": 0.626565158367157, "learning_rate": 0.00024144930927565318, "loss": 3.1384, "step": 34435 }, { "epoch": 1.69, "grad_norm": 0.6032750606536865, "learning_rate": 0.00024143420923674157, "loss": 2.773, "step": 34436 }, { "epoch": 1.69, "grad_norm": 0.6452335715293884, "learning_rate": 0.0002414191093520809, "loss": 2.8955, "step": 34437 }, { "epoch": 1.69, "grad_norm": 0.6146382093429565, "learning_rate": 0.00024140400962171106, "loss": 2.935, "step": 34438 }, { "epoch": 1.69, "grad_norm": 0.5858275294303894, "learning_rate": 0.00024138891004567167, "loss": 3.0345, "step": 34439 }, { "epoch": 1.69, "grad_norm": 0.6086834669113159, "learning_rate": 0.00024137381062400268, "loss": 2.9822, "step": 34440 }, { "epoch": 1.69, "grad_norm": 0.6375246047973633, "learning_rate": 0.00024135871135674388, "loss": 3.1297, "step": 34441 }, { "epoch": 1.69, "grad_norm": 0.6939441561698914, "learning_rate": 0.00024134361224393479, "loss": 3.0535, "step": 34442 }, { "epoch": 1.69, "grad_norm": 0.6073477864265442, "learning_rate": 0.00024132851328561544, "loss": 2.944, "step": 34443 }, { "epoch": 1.69, "grad_norm": 0.6158416271209717, "learning_rate": 0.0002413134144818254, "loss": 3.151, "step": 34444 }, { "epoch": 1.69, "grad_norm": 0.5689690709114075, "learning_rate": 0.0002412983158326046, "loss": 2.9117, "step": 34445 }, { "epoch": 1.69, "grad_norm": 0.6501169800758362, "learning_rate": 0.0002412832173379928, "loss": 3.1277, "step": 34446 }, { "epoch": 1.69, "grad_norm": 0.6253321766853333, "learning_rate": 0.00024126811899802958, "loss": 3.1249, "step": 34447 }, { "epoch": 1.69, "grad_norm": 0.6375982761383057, "learning_rate": 0.00024125302081275492, "loss": 3.1011, "step": 34448 }, { "epoch": 1.69, "grad_norm": 0.6295775175094604, "learning_rate": 0.00024123792278220847, "loss": 3.1153, "step": 34449 }, { "epoch": 1.69, "grad_norm": 0.6375122666358948, "learning_rate": 0.00024122282490642998, "loss": 2.9863, "step": 34450 }, { "epoch": 1.69, "grad_norm": 0.6656721234321594, "learning_rate": 0.00024120772718545935, "loss": 3.0008, "step": 34451 }, { "epoch": 1.69, "grad_norm": 0.584492564201355, "learning_rate": 0.00024119262961933618, "loss": 2.9974, "step": 34452 }, { "epoch": 1.69, "grad_norm": 0.5878311395645142, "learning_rate": 0.0002411775322081004, "loss": 3.0679, "step": 34453 }, { "epoch": 1.69, "grad_norm": 0.6173829436302185, "learning_rate": 0.00024116243495179152, "loss": 2.82, "step": 34454 }, { "epoch": 1.69, "grad_norm": 0.6128963828086853, "learning_rate": 0.00024114733785044953, "loss": 2.8603, "step": 34455 }, { "epoch": 1.69, "grad_norm": 0.6082571744918823, "learning_rate": 0.0002411322409041142, "loss": 2.9488, "step": 34456 }, { "epoch": 1.69, "grad_norm": 0.6463648080825806, "learning_rate": 0.0002411171441128251, "loss": 3.0709, "step": 34457 }, { "epoch": 1.69, "grad_norm": 0.6087504625320435, "learning_rate": 0.00024110204747662225, "loss": 3.1574, "step": 34458 }, { "epoch": 1.69, "grad_norm": 0.6248615980148315, "learning_rate": 0.0002410869509955452, "loss": 3.0775, "step": 34459 }, { "epoch": 1.69, "grad_norm": 0.6408868432044983, "learning_rate": 0.00024107185466963369, "loss": 3.1373, "step": 34460 }, { "epoch": 1.69, "grad_norm": 0.6176990866661072, "learning_rate": 0.00024105675849892772, "loss": 3.0042, "step": 34461 }, { "epoch": 1.69, "grad_norm": 0.6030988693237305, "learning_rate": 0.00024104166248346685, "loss": 2.9704, "step": 34462 }, { "epoch": 1.69, "grad_norm": 0.6126129031181335, "learning_rate": 0.00024102656662329094, "loss": 2.9439, "step": 34463 }, { "epoch": 1.69, "grad_norm": 0.620477020740509, "learning_rate": 0.0002410114709184396, "loss": 3.2136, "step": 34464 }, { "epoch": 1.69, "grad_norm": 0.6435030102729797, "learning_rate": 0.00024099637536895263, "loss": 2.8834, "step": 34465 }, { "epoch": 1.69, "grad_norm": 0.6259873509407043, "learning_rate": 0.00024098127997487006, "loss": 3.2122, "step": 34466 }, { "epoch": 1.69, "grad_norm": 0.630929708480835, "learning_rate": 0.00024096618473623134, "loss": 3.1022, "step": 34467 }, { "epoch": 1.69, "grad_norm": 0.6018049120903015, "learning_rate": 0.0002409510896530764, "loss": 2.6932, "step": 34468 }, { "epoch": 1.69, "grad_norm": 0.6094369292259216, "learning_rate": 0.0002409359947254448, "loss": 2.9726, "step": 34469 }, { "epoch": 1.69, "grad_norm": 0.6517297625541687, "learning_rate": 0.0002409208999533765, "loss": 3.0395, "step": 34470 }, { "epoch": 1.69, "grad_norm": 0.6332142949104309, "learning_rate": 0.0002409058053369112, "loss": 2.9317, "step": 34471 }, { "epoch": 1.69, "grad_norm": 0.6489332914352417, "learning_rate": 0.00024089071087608852, "loss": 2.8955, "step": 34472 }, { "epoch": 1.69, "grad_norm": 0.6442323327064514, "learning_rate": 0.0002408756165709485, "loss": 3.0071, "step": 34473 }, { "epoch": 1.69, "grad_norm": 0.6622292995452881, "learning_rate": 0.0002408605224215306, "loss": 2.5943, "step": 34474 }, { "epoch": 1.69, "grad_norm": 0.6429809927940369, "learning_rate": 0.00024084542842787467, "loss": 2.9143, "step": 34475 }, { "epoch": 1.69, "grad_norm": 0.658168375492096, "learning_rate": 0.00024083033459002066, "loss": 3.0773, "step": 34476 }, { "epoch": 1.69, "grad_norm": 0.6099553108215332, "learning_rate": 0.00024081524090800805, "loss": 3.1176, "step": 34477 }, { "epoch": 1.69, "grad_norm": 0.6159991025924683, "learning_rate": 0.00024080014738187677, "loss": 2.9891, "step": 34478 }, { "epoch": 1.69, "grad_norm": 0.589513897895813, "learning_rate": 0.0002407850540116664, "loss": 2.9638, "step": 34479 }, { "epoch": 1.69, "grad_norm": 0.6264899969100952, "learning_rate": 0.00024076996079741686, "loss": 2.898, "step": 34480 }, { "epoch": 1.69, "grad_norm": 0.7650187611579895, "learning_rate": 0.00024075486773916793, "loss": 3.0772, "step": 34481 }, { "epoch": 1.69, "grad_norm": 0.6351287364959717, "learning_rate": 0.0002407397748369591, "loss": 3.1892, "step": 34482 }, { "epoch": 1.69, "grad_norm": 0.6214819550514221, "learning_rate": 0.00024072468209083045, "loss": 2.9347, "step": 34483 }, { "epoch": 1.69, "grad_norm": 0.6406108736991882, "learning_rate": 0.00024070958950082153, "loss": 2.9502, "step": 34484 }, { "epoch": 1.69, "grad_norm": 0.6591122150421143, "learning_rate": 0.00024069449706697207, "loss": 3.3276, "step": 34485 }, { "epoch": 1.69, "grad_norm": 0.6212307214736938, "learning_rate": 0.000240679404789322, "loss": 3.2392, "step": 34486 }, { "epoch": 1.69, "grad_norm": 0.7033385634422302, "learning_rate": 0.00024066431266791085, "loss": 2.9312, "step": 34487 }, { "epoch": 1.69, "grad_norm": 0.5959001779556274, "learning_rate": 0.0002406492207027786, "loss": 2.7623, "step": 34488 }, { "epoch": 1.69, "grad_norm": 0.5990002155303955, "learning_rate": 0.00024063412889396485, "loss": 2.971, "step": 34489 }, { "epoch": 1.69, "grad_norm": 0.6115400195121765, "learning_rate": 0.00024061903724150927, "loss": 2.8386, "step": 34490 }, { "epoch": 1.69, "grad_norm": 0.6301840543746948, "learning_rate": 0.00024060394574545186, "loss": 2.8966, "step": 34491 }, { "epoch": 1.69, "grad_norm": 0.6668326258659363, "learning_rate": 0.00024058885440583216, "loss": 2.8776, "step": 34492 }, { "epoch": 1.69, "grad_norm": 0.6199162602424622, "learning_rate": 0.00024057376322269007, "loss": 3.1312, "step": 34493 }, { "epoch": 1.69, "grad_norm": 0.6100919842720032, "learning_rate": 0.0002405586721960651, "loss": 3.1269, "step": 34494 }, { "epoch": 1.69, "grad_norm": 0.6069178581237793, "learning_rate": 0.00024054358132599723, "loss": 2.9928, "step": 34495 }, { "epoch": 1.69, "grad_norm": 0.6124541163444519, "learning_rate": 0.00024052849061252616, "loss": 3.018, "step": 34496 }, { "epoch": 1.69, "grad_norm": 0.5998200178146362, "learning_rate": 0.0002405134000556915, "loss": 2.8799, "step": 34497 }, { "epoch": 1.69, "grad_norm": 0.6153830885887146, "learning_rate": 0.00024049830965553322, "loss": 3.1137, "step": 34498 }, { "epoch": 1.69, "grad_norm": 0.6186378002166748, "learning_rate": 0.00024048321941209087, "loss": 3.0032, "step": 34499 }, { "epoch": 1.69, "grad_norm": 0.625507652759552, "learning_rate": 0.00024046812932540417, "loss": 3.2128, "step": 34500 }, { "epoch": 1.69, "grad_norm": 0.628273069858551, "learning_rate": 0.00024045303939551315, "loss": 3.0745, "step": 34501 }, { "epoch": 1.69, "grad_norm": 0.6160310506820679, "learning_rate": 0.00024043794962245725, "loss": 3.0487, "step": 34502 }, { "epoch": 1.69, "grad_norm": 0.6147609353065491, "learning_rate": 0.0002404228600062764, "loss": 3.0751, "step": 34503 }, { "epoch": 1.69, "grad_norm": 0.6561556458473206, "learning_rate": 0.0002404077705470102, "loss": 3.1311, "step": 34504 }, { "epoch": 1.69, "grad_norm": 0.6268799901008606, "learning_rate": 0.00024039268124469837, "loss": 2.981, "step": 34505 }, { "epoch": 1.69, "grad_norm": 0.6238470077514648, "learning_rate": 0.00024037759209938092, "loss": 2.9657, "step": 34506 }, { "epoch": 1.69, "grad_norm": 0.5943543314933777, "learning_rate": 0.0002403625031110974, "loss": 2.8509, "step": 34507 }, { "epoch": 1.69, "grad_norm": 0.6060153841972351, "learning_rate": 0.00024034741427988757, "loss": 3.0004, "step": 34508 }, { "epoch": 1.69, "grad_norm": 0.6190601587295532, "learning_rate": 0.00024033232560579106, "loss": 3.0331, "step": 34509 }, { "epoch": 1.69, "grad_norm": 0.6295119524002075, "learning_rate": 0.00024031723708884786, "loss": 2.895, "step": 34510 }, { "epoch": 1.69, "grad_norm": 0.6272006034851074, "learning_rate": 0.00024030214872909747, "loss": 3.0444, "step": 34511 }, { "epoch": 1.69, "grad_norm": 0.6380195021629333, "learning_rate": 0.00024028706052657973, "loss": 3.0712, "step": 34512 }, { "epoch": 1.69, "grad_norm": 0.6345843076705933, "learning_rate": 0.00024027197248133446, "loss": 3.0436, "step": 34513 }, { "epoch": 1.69, "grad_norm": 0.586628258228302, "learning_rate": 0.00024025688459340127, "loss": 3.2935, "step": 34514 }, { "epoch": 1.69, "grad_norm": 0.6063928008079529, "learning_rate": 0.00024024179686282, "loss": 2.8827, "step": 34515 }, { "epoch": 1.69, "grad_norm": 0.5976722836494446, "learning_rate": 0.00024022670928963024, "loss": 3.0413, "step": 34516 }, { "epoch": 1.69, "grad_norm": 0.6519937515258789, "learning_rate": 0.00024021162187387184, "loss": 2.9673, "step": 34517 }, { "epoch": 1.69, "grad_norm": 0.6437779068946838, "learning_rate": 0.0002401965346155846, "loss": 3.0316, "step": 34518 }, { "epoch": 1.69, "grad_norm": 0.5971271395683289, "learning_rate": 0.0002401814475148081, "loss": 3.1481, "step": 34519 }, { "epoch": 1.69, "grad_norm": 0.6342564225196838, "learning_rate": 0.00024016636057158224, "loss": 3.0502, "step": 34520 }, { "epoch": 1.69, "grad_norm": 0.5975090265274048, "learning_rate": 0.00024015127378594663, "loss": 2.9394, "step": 34521 }, { "epoch": 1.69, "grad_norm": 0.6094161868095398, "learning_rate": 0.00024013618715794095, "loss": 2.9791, "step": 34522 }, { "epoch": 1.69, "grad_norm": 0.6090801954269409, "learning_rate": 0.00024012110068760519, "loss": 3.0233, "step": 34523 }, { "epoch": 1.69, "grad_norm": 0.6072431802749634, "learning_rate": 0.00024010601437497882, "loss": 2.9527, "step": 34524 }, { "epoch": 1.69, "grad_norm": 0.7141648530960083, "learning_rate": 0.0002400909282201018, "loss": 2.8589, "step": 34525 }, { "epoch": 1.69, "grad_norm": 0.6420031785964966, "learning_rate": 0.00024007584222301357, "loss": 3.0201, "step": 34526 }, { "epoch": 1.69, "grad_norm": 0.6151692867279053, "learning_rate": 0.00024006075638375408, "loss": 3.039, "step": 34527 }, { "epoch": 1.69, "grad_norm": 0.6226093173027039, "learning_rate": 0.00024004567070236312, "loss": 3.0654, "step": 34528 }, { "epoch": 1.69, "grad_norm": 0.6651445031166077, "learning_rate": 0.00024003058517888028, "loss": 2.8437, "step": 34529 }, { "epoch": 1.69, "grad_norm": 0.5924610495567322, "learning_rate": 0.00024001549981334541, "loss": 2.7907, "step": 34530 }, { "epoch": 1.69, "grad_norm": 0.6144448518753052, "learning_rate": 0.00024000041460579802, "loss": 3.0333, "step": 34531 }, { "epoch": 1.69, "grad_norm": 0.6835182905197144, "learning_rate": 0.00023998532955627803, "loss": 2.8505, "step": 34532 }, { "epoch": 1.69, "grad_norm": 0.6623280048370361, "learning_rate": 0.00023997024466482523, "loss": 3.0159, "step": 34533 }, { "epoch": 1.69, "grad_norm": 0.6474083065986633, "learning_rate": 0.00023995515993147911, "loss": 2.9979, "step": 34534 }, { "epoch": 1.69, "grad_norm": 0.6150049567222595, "learning_rate": 0.00023994007535627968, "loss": 3.1431, "step": 34535 }, { "epoch": 1.69, "grad_norm": 0.6174440383911133, "learning_rate": 0.00023992499093926646, "loss": 2.996, "step": 34536 }, { "epoch": 1.69, "grad_norm": 0.6341604590415955, "learning_rate": 0.00023990990668047921, "loss": 3.0105, "step": 34537 }, { "epoch": 1.69, "grad_norm": 0.624151349067688, "learning_rate": 0.00023989482257995783, "loss": 3.1166, "step": 34538 }, { "epoch": 1.69, "grad_norm": 0.5957610011100769, "learning_rate": 0.00023987973863774184, "loss": 2.9692, "step": 34539 }, { "epoch": 1.69, "grad_norm": 0.655441403388977, "learning_rate": 0.00023986465485387114, "loss": 2.9397, "step": 34540 }, { "epoch": 1.69, "grad_norm": 0.6130520701408386, "learning_rate": 0.0002398495712283852, "loss": 3.0796, "step": 34541 }, { "epoch": 1.69, "grad_norm": 0.6145035624504089, "learning_rate": 0.00023983448776132397, "loss": 3.0219, "step": 34542 }, { "epoch": 1.69, "grad_norm": 0.6132924556732178, "learning_rate": 0.00023981940445272723, "loss": 2.9801, "step": 34543 }, { "epoch": 1.69, "grad_norm": 0.6345343589782715, "learning_rate": 0.00023980432130263444, "loss": 2.9226, "step": 34544 }, { "epoch": 1.69, "grad_norm": 0.6521178483963013, "learning_rate": 0.00023978923831108562, "loss": 3.0685, "step": 34545 }, { "epoch": 1.69, "grad_norm": 0.6504637598991394, "learning_rate": 0.00023977415547812029, "loss": 2.8594, "step": 34546 }, { "epoch": 1.69, "grad_norm": 0.6152077913284302, "learning_rate": 0.00023975907280377815, "loss": 3.1033, "step": 34547 }, { "epoch": 1.69, "grad_norm": 0.6426700949668884, "learning_rate": 0.00023974399028809916, "loss": 2.9754, "step": 34548 }, { "epoch": 1.69, "grad_norm": 0.6243044137954712, "learning_rate": 0.00023972890793112281, "loss": 2.958, "step": 34549 }, { "epoch": 1.69, "grad_norm": 0.6287975311279297, "learning_rate": 0.00023971382573288903, "loss": 3.0594, "step": 34550 }, { "epoch": 1.69, "grad_norm": 0.6270124912261963, "learning_rate": 0.00023969874369343734, "loss": 2.976, "step": 34551 }, { "epoch": 1.69, "grad_norm": 0.6438772678375244, "learning_rate": 0.00023968366181280755, "loss": 3.0046, "step": 34552 }, { "epoch": 1.69, "grad_norm": 0.6129069328308105, "learning_rate": 0.00023966858009103945, "loss": 2.9007, "step": 34553 }, { "epoch": 1.69, "grad_norm": 0.6193562150001526, "learning_rate": 0.00023965349852817267, "loss": 2.9481, "step": 34554 }, { "epoch": 1.69, "grad_norm": 0.6159753203392029, "learning_rate": 0.00023963841712424703, "loss": 2.9394, "step": 34555 }, { "epoch": 1.69, "grad_norm": 0.5913207530975342, "learning_rate": 0.00023962333587930203, "loss": 2.9432, "step": 34556 }, { "epoch": 1.69, "grad_norm": 0.6216997504234314, "learning_rate": 0.00023960825479337763, "loss": 3.0619, "step": 34557 }, { "epoch": 1.69, "grad_norm": 0.6246969699859619, "learning_rate": 0.00023959317386651352, "loss": 2.8386, "step": 34558 }, { "epoch": 1.69, "grad_norm": 0.6128319501876831, "learning_rate": 0.0002395780930987492, "loss": 3.0849, "step": 34559 }, { "epoch": 1.69, "grad_norm": 0.6397287249565125, "learning_rate": 0.00023956301249012472, "loss": 3.0055, "step": 34560 }, { "epoch": 1.69, "grad_norm": 0.5970715880393982, "learning_rate": 0.00023954793204067956, "loss": 2.909, "step": 34561 }, { "epoch": 1.69, "grad_norm": 0.6784390807151794, "learning_rate": 0.00023953285175045346, "loss": 3.0605, "step": 34562 }, { "epoch": 1.69, "grad_norm": 0.611601710319519, "learning_rate": 0.00023951777161948634, "loss": 3.0419, "step": 34563 }, { "epoch": 1.69, "grad_norm": 0.6383347511291504, "learning_rate": 0.00023950269164781766, "loss": 2.9873, "step": 34564 }, { "epoch": 1.69, "grad_norm": 0.5747080445289612, "learning_rate": 0.00023948761183548734, "loss": 2.8731, "step": 34565 }, { "epoch": 1.69, "grad_norm": 0.611871600151062, "learning_rate": 0.00023947253218253484, "loss": 3.0842, "step": 34566 }, { "epoch": 1.69, "grad_norm": 0.6260141730308533, "learning_rate": 0.00023945745268900008, "loss": 3.1116, "step": 34567 }, { "epoch": 1.69, "grad_norm": 0.5983566641807556, "learning_rate": 0.00023944237335492285, "loss": 3.0337, "step": 34568 }, { "epoch": 1.69, "grad_norm": 0.599165678024292, "learning_rate": 0.0002394272941803427, "loss": 2.9508, "step": 34569 }, { "epoch": 1.69, "grad_norm": 0.6618895530700684, "learning_rate": 0.00023941221516529944, "loss": 3.1141, "step": 34570 }, { "epoch": 1.69, "grad_norm": 0.5907045006752014, "learning_rate": 0.00023939713630983267, "loss": 2.8549, "step": 34571 }, { "epoch": 1.69, "grad_norm": 0.6474612951278687, "learning_rate": 0.0002393820576139822, "loss": 2.9113, "step": 34572 }, { "epoch": 1.69, "grad_norm": 0.5951142311096191, "learning_rate": 0.00023936697907778778, "loss": 2.9788, "step": 34573 }, { "epoch": 1.69, "grad_norm": 0.6441212296485901, "learning_rate": 0.00023935190070128895, "loss": 2.8805, "step": 34574 }, { "epoch": 1.69, "grad_norm": 0.6195458173751831, "learning_rate": 0.0002393368224845257, "loss": 3.1319, "step": 34575 }, { "epoch": 1.69, "grad_norm": 0.602984607219696, "learning_rate": 0.00023932174442753748, "loss": 3.1744, "step": 34576 }, { "epoch": 1.69, "grad_norm": 0.616973340511322, "learning_rate": 0.00023930666653036408, "loss": 3.1541, "step": 34577 }, { "epoch": 1.69, "grad_norm": 0.6357001066207886, "learning_rate": 0.00023929158879304534, "loss": 3.112, "step": 34578 }, { "epoch": 1.69, "grad_norm": 0.6247024536132812, "learning_rate": 0.00023927651121562082, "loss": 3.2426, "step": 34579 }, { "epoch": 1.69, "grad_norm": 0.6100210547447205, "learning_rate": 0.00023926143379813034, "loss": 3.0513, "step": 34580 }, { "epoch": 1.69, "grad_norm": 0.5985271334648132, "learning_rate": 0.00023924635654061342, "loss": 2.9466, "step": 34581 }, { "epoch": 1.69, "grad_norm": 0.6380882859230042, "learning_rate": 0.00023923127944310997, "loss": 3.0692, "step": 34582 }, { "epoch": 1.69, "grad_norm": 0.6195142269134521, "learning_rate": 0.0002392162025056597, "loss": 3.1293, "step": 34583 }, { "epoch": 1.69, "grad_norm": 0.6390209197998047, "learning_rate": 0.00023920112572830212, "loss": 2.8975, "step": 34584 }, { "epoch": 1.69, "grad_norm": 0.6426082849502563, "learning_rate": 0.00023918604911107722, "loss": 2.9478, "step": 34585 }, { "epoch": 1.69, "grad_norm": 0.6332377195358276, "learning_rate": 0.00023917097265402452, "loss": 2.9731, "step": 34586 }, { "epoch": 1.7, "grad_norm": 0.6898009181022644, "learning_rate": 0.0002391558963571838, "loss": 3.0346, "step": 34587 }, { "epoch": 1.7, "grad_norm": 0.6284445524215698, "learning_rate": 0.0002391408202205946, "loss": 3.1084, "step": 34588 }, { "epoch": 1.7, "grad_norm": 0.5925804376602173, "learning_rate": 0.00023912574424429678, "loss": 3.0499, "step": 34589 }, { "epoch": 1.7, "grad_norm": 0.6358410716056824, "learning_rate": 0.0002391106684283302, "loss": 2.8512, "step": 34590 }, { "epoch": 1.7, "grad_norm": 0.6737104654312134, "learning_rate": 0.0002390955927727343, "loss": 2.946, "step": 34591 }, { "epoch": 1.7, "grad_norm": 0.604070782661438, "learning_rate": 0.00023908051727754897, "loss": 3.0541, "step": 34592 }, { "epoch": 1.7, "grad_norm": 0.5812761187553406, "learning_rate": 0.00023906544194281367, "loss": 3.1825, "step": 34593 }, { "epoch": 1.7, "grad_norm": 0.5896308422088623, "learning_rate": 0.00023905036676856835, "loss": 3.1379, "step": 34594 }, { "epoch": 1.7, "grad_norm": 0.6765037775039673, "learning_rate": 0.00023903529175485272, "loss": 2.9756, "step": 34595 }, { "epoch": 1.7, "grad_norm": 0.6317283511161804, "learning_rate": 0.00023902021690170623, "loss": 3.07, "step": 34596 }, { "epoch": 1.7, "grad_norm": 0.6281628012657166, "learning_rate": 0.0002390051422091689, "loss": 3.049, "step": 34597 }, { "epoch": 1.7, "grad_norm": 0.6032910943031311, "learning_rate": 0.00023899006767728024, "loss": 2.9015, "step": 34598 }, { "epoch": 1.7, "grad_norm": 0.6200457811355591, "learning_rate": 0.00023897499330607992, "loss": 3.0544, "step": 34599 }, { "epoch": 1.7, "grad_norm": 0.5972832441329956, "learning_rate": 0.00023895991909560785, "loss": 3.0304, "step": 34600 }, { "epoch": 1.7, "grad_norm": 0.6621426343917847, "learning_rate": 0.00023894484504590355, "loss": 3.1561, "step": 34601 }, { "epoch": 1.7, "grad_norm": 0.6243399977684021, "learning_rate": 0.00023892977115700685, "loss": 2.9101, "step": 34602 }, { "epoch": 1.7, "grad_norm": 0.6167004108428955, "learning_rate": 0.00023891469742895723, "loss": 2.9986, "step": 34603 }, { "epoch": 1.7, "grad_norm": 0.7089727520942688, "learning_rate": 0.0002388996238617946, "loss": 3.242, "step": 34604 }, { "epoch": 1.7, "grad_norm": 0.5827861428260803, "learning_rate": 0.00023888455045555868, "loss": 2.9402, "step": 34605 }, { "epoch": 1.7, "grad_norm": 0.6445075273513794, "learning_rate": 0.000238869477210289, "loss": 3.0979, "step": 34606 }, { "epoch": 1.7, "grad_norm": 0.6177965402603149, "learning_rate": 0.0002388544041260254, "loss": 2.9209, "step": 34607 }, { "epoch": 1.7, "grad_norm": 0.6588315963745117, "learning_rate": 0.00023883933120280747, "loss": 2.9099, "step": 34608 }, { "epoch": 1.7, "grad_norm": 0.6694788336753845, "learning_rate": 0.000238824258440675, "loss": 2.9249, "step": 34609 }, { "epoch": 1.7, "grad_norm": 0.6350728869438171, "learning_rate": 0.00023880918583966777, "loss": 3.1211, "step": 34610 }, { "epoch": 1.7, "grad_norm": 0.6018306016921997, "learning_rate": 0.0002387941133998252, "loss": 3.068, "step": 34611 }, { "epoch": 1.7, "grad_norm": 0.6151270866394043, "learning_rate": 0.00023877904112118726, "loss": 2.7378, "step": 34612 }, { "epoch": 1.7, "grad_norm": 0.6024587154388428, "learning_rate": 0.0002387639690037935, "loss": 3.111, "step": 34613 }, { "epoch": 1.7, "grad_norm": 0.6354588866233826, "learning_rate": 0.00023874889704768366, "loss": 3.0932, "step": 34614 }, { "epoch": 1.7, "grad_norm": 0.6052812337875366, "learning_rate": 0.0002387338252528975, "loss": 3.0522, "step": 34615 }, { "epoch": 1.7, "grad_norm": 0.6550926566123962, "learning_rate": 0.00023871875361947462, "loss": 3.0193, "step": 34616 }, { "epoch": 1.7, "grad_norm": 0.6020631790161133, "learning_rate": 0.00023870368214745478, "loss": 2.9907, "step": 34617 }, { "epoch": 1.7, "grad_norm": 0.6404764652252197, "learning_rate": 0.00023868861083687753, "loss": 3.0878, "step": 34618 }, { "epoch": 1.7, "grad_norm": 0.6281757950782776, "learning_rate": 0.00023867353968778273, "loss": 2.9839, "step": 34619 }, { "epoch": 1.7, "grad_norm": 0.6446701884269714, "learning_rate": 0.0002386584687002101, "loss": 3.0934, "step": 34620 }, { "epoch": 1.7, "grad_norm": 0.6888076663017273, "learning_rate": 0.00023864339787419916, "loss": 3.2198, "step": 34621 }, { "epoch": 1.7, "grad_norm": 0.7113660573959351, "learning_rate": 0.0002386283272097898, "loss": 3.125, "step": 34622 }, { "epoch": 1.7, "grad_norm": 0.6033316254615784, "learning_rate": 0.00023861325670702154, "loss": 2.9347, "step": 34623 }, { "epoch": 1.7, "grad_norm": 0.6461472511291504, "learning_rate": 0.00023859818636593407, "loss": 2.9211, "step": 34624 }, { "epoch": 1.7, "grad_norm": 0.6217671632766724, "learning_rate": 0.00023858311618656734, "loss": 2.9407, "step": 34625 }, { "epoch": 1.7, "grad_norm": 0.7843379378318787, "learning_rate": 0.00023856804616896075, "loss": 3.0633, "step": 34626 }, { "epoch": 1.7, "grad_norm": 0.6176000237464905, "learning_rate": 0.0002385529763131542, "loss": 3.0751, "step": 34627 }, { "epoch": 1.7, "grad_norm": 0.6316340565681458, "learning_rate": 0.00023853790661918715, "loss": 2.9783, "step": 34628 }, { "epoch": 1.7, "grad_norm": 0.63956218957901, "learning_rate": 0.00023852283708709944, "loss": 3.0346, "step": 34629 }, { "epoch": 1.7, "grad_norm": 0.6014463901519775, "learning_rate": 0.00023850776771693082, "loss": 3.088, "step": 34630 }, { "epoch": 1.7, "grad_norm": 0.6172289252281189, "learning_rate": 0.0002384926985087209, "loss": 2.9347, "step": 34631 }, { "epoch": 1.7, "grad_norm": 0.6280187964439392, "learning_rate": 0.00023847762946250942, "loss": 2.9345, "step": 34632 }, { "epoch": 1.7, "grad_norm": 0.5740066170692444, "learning_rate": 0.0002384625605783359, "loss": 2.9651, "step": 34633 }, { "epoch": 1.7, "grad_norm": 0.6673811078071594, "learning_rate": 0.0002384474918562402, "loss": 3.2161, "step": 34634 }, { "epoch": 1.7, "grad_norm": 0.6165456175804138, "learning_rate": 0.000238432423296262, "loss": 3.0175, "step": 34635 }, { "epoch": 1.7, "grad_norm": 0.6148802042007446, "learning_rate": 0.0002384173548984409, "loss": 2.9729, "step": 34636 }, { "epoch": 1.7, "grad_norm": 0.6252344846725464, "learning_rate": 0.0002384022866628167, "loss": 3.0979, "step": 34637 }, { "epoch": 1.7, "grad_norm": 0.6333229541778564, "learning_rate": 0.00023838721858942898, "loss": 2.8437, "step": 34638 }, { "epoch": 1.7, "grad_norm": 0.6194198131561279, "learning_rate": 0.0002383721506783174, "loss": 2.8774, "step": 34639 }, { "epoch": 1.7, "grad_norm": 0.6095027327537537, "learning_rate": 0.00023835708292952183, "loss": 2.9816, "step": 34640 }, { "epoch": 1.7, "grad_norm": 0.613484799861908, "learning_rate": 0.0002383420153430818, "loss": 3.0787, "step": 34641 }, { "epoch": 1.7, "grad_norm": 0.5842860341072083, "learning_rate": 0.00023832694791903707, "loss": 3.0523, "step": 34642 }, { "epoch": 1.7, "grad_norm": 0.6341204047203064, "learning_rate": 0.0002383118806574272, "loss": 3.1984, "step": 34643 }, { "epoch": 1.7, "grad_norm": 0.6389890313148499, "learning_rate": 0.00023829681355829203, "loss": 2.8034, "step": 34644 }, { "epoch": 1.7, "grad_norm": 0.5947014689445496, "learning_rate": 0.00023828174662167122, "loss": 3.1272, "step": 34645 }, { "epoch": 1.7, "grad_norm": 0.6640393733978271, "learning_rate": 0.00023826667984760426, "loss": 2.9868, "step": 34646 }, { "epoch": 1.7, "grad_norm": 0.6023179292678833, "learning_rate": 0.00023825161323613113, "loss": 3.1548, "step": 34647 }, { "epoch": 1.7, "grad_norm": 0.6089648604393005, "learning_rate": 0.0002382365467872913, "loss": 2.9102, "step": 34648 }, { "epoch": 1.7, "grad_norm": 0.6076211333274841, "learning_rate": 0.00023822148050112448, "loss": 3.0588, "step": 34649 }, { "epoch": 1.7, "grad_norm": 0.7229902744293213, "learning_rate": 0.0002382064143776705, "loss": 2.927, "step": 34650 }, { "epoch": 1.7, "grad_norm": 0.5934354662895203, "learning_rate": 0.00023819134841696882, "loss": 2.9372, "step": 34651 }, { "epoch": 1.7, "grad_norm": 0.6203776597976685, "learning_rate": 0.00023817628261905933, "loss": 3.0984, "step": 34652 }, { "epoch": 1.7, "grad_norm": 0.6217381358146667, "learning_rate": 0.0002381612169839816, "loss": 3.1926, "step": 34653 }, { "epoch": 1.7, "grad_norm": 0.5950883030891418, "learning_rate": 0.0002381461515117752, "loss": 2.8172, "step": 34654 }, { "epoch": 1.7, "grad_norm": 0.622291624546051, "learning_rate": 0.0002381310862024801, "loss": 2.9426, "step": 34655 }, { "epoch": 1.7, "grad_norm": 0.6245651841163635, "learning_rate": 0.00023811602105613575, "loss": 3.0114, "step": 34656 }, { "epoch": 1.7, "grad_norm": 0.6118585467338562, "learning_rate": 0.0002381009560727819, "loss": 3.003, "step": 34657 }, { "epoch": 1.7, "grad_norm": 0.7004932761192322, "learning_rate": 0.00023808589125245814, "loss": 2.9431, "step": 34658 }, { "epoch": 1.7, "grad_norm": 0.6721332669258118, "learning_rate": 0.0002380708265952043, "loss": 2.9977, "step": 34659 }, { "epoch": 1.7, "grad_norm": 0.6560796499252319, "learning_rate": 0.00023805576210106003, "loss": 2.9513, "step": 34660 }, { "epoch": 1.7, "grad_norm": 0.5827532410621643, "learning_rate": 0.0002380406977700648, "loss": 2.8129, "step": 34661 }, { "epoch": 1.7, "grad_norm": 0.6369756460189819, "learning_rate": 0.00023802563360225863, "loss": 3.1372, "step": 34662 }, { "epoch": 1.7, "grad_norm": 0.6369935870170593, "learning_rate": 0.00023801056959768093, "loss": 2.876, "step": 34663 }, { "epoch": 1.7, "grad_norm": 0.6207792162895203, "learning_rate": 0.00023799550575637153, "loss": 3.0598, "step": 34664 }, { "epoch": 1.7, "grad_norm": 0.5954633355140686, "learning_rate": 0.0002379804420783699, "loss": 2.9627, "step": 34665 }, { "epoch": 1.7, "grad_norm": 0.6295991539955139, "learning_rate": 0.00023796537856371589, "loss": 3.0739, "step": 34666 }, { "epoch": 1.7, "grad_norm": 0.6417800188064575, "learning_rate": 0.00023795031521244922, "loss": 3.0968, "step": 34667 }, { "epoch": 1.7, "grad_norm": 0.6085421442985535, "learning_rate": 0.00023793525202460937, "loss": 2.824, "step": 34668 }, { "epoch": 1.7, "grad_norm": 0.6392890214920044, "learning_rate": 0.00023792018900023622, "loss": 2.9634, "step": 34669 }, { "epoch": 1.7, "grad_norm": 0.6218932271003723, "learning_rate": 0.00023790512613936925, "loss": 3.0132, "step": 34670 }, { "epoch": 1.7, "grad_norm": 0.642927885055542, "learning_rate": 0.00023789006344204823, "loss": 2.981, "step": 34671 }, { "epoch": 1.7, "grad_norm": 0.6467803120613098, "learning_rate": 0.00023787500090831294, "loss": 3.0479, "step": 34672 }, { "epoch": 1.7, "grad_norm": 0.6575911045074463, "learning_rate": 0.00023785993853820282, "loss": 2.999, "step": 34673 }, { "epoch": 1.7, "grad_norm": 0.6105745434761047, "learning_rate": 0.00023784487633175778, "loss": 3.017, "step": 34674 }, { "epoch": 1.7, "grad_norm": 0.6278857588768005, "learning_rate": 0.0002378298142890173, "loss": 3.0142, "step": 34675 }, { "epoch": 1.7, "grad_norm": 0.7186394929885864, "learning_rate": 0.00023781475241002107, "loss": 3.0599, "step": 34676 }, { "epoch": 1.7, "grad_norm": 0.6153616309165955, "learning_rate": 0.00023779969069480896, "loss": 3.1681, "step": 34677 }, { "epoch": 1.7, "grad_norm": 0.6550021171569824, "learning_rate": 0.00023778462914342038, "loss": 3.1913, "step": 34678 }, { "epoch": 1.7, "grad_norm": 0.6431772112846375, "learning_rate": 0.0002377695677558952, "loss": 3.0687, "step": 34679 }, { "epoch": 1.7, "grad_norm": 0.5983546376228333, "learning_rate": 0.0002377545065322729, "loss": 2.9564, "step": 34680 }, { "epoch": 1.7, "grad_norm": 0.6056777238845825, "learning_rate": 0.00023773944547259327, "loss": 3.1077, "step": 34681 }, { "epoch": 1.7, "grad_norm": 0.6222465634346008, "learning_rate": 0.00023772438457689607, "loss": 3.2957, "step": 34682 }, { "epoch": 1.7, "grad_norm": 0.5904668569564819, "learning_rate": 0.00023770932384522072, "loss": 3.1094, "step": 34683 }, { "epoch": 1.7, "grad_norm": 0.6390523910522461, "learning_rate": 0.00023769426327760712, "loss": 3.1585, "step": 34684 }, { "epoch": 1.7, "grad_norm": 0.6436138153076172, "learning_rate": 0.0002376792028740948, "loss": 2.9761, "step": 34685 }, { "epoch": 1.7, "grad_norm": 0.6362774968147278, "learning_rate": 0.00023766414263472338, "loss": 3.1662, "step": 34686 }, { "epoch": 1.7, "grad_norm": 0.6165968775749207, "learning_rate": 0.00023764908255953277, "loss": 2.9115, "step": 34687 }, { "epoch": 1.7, "grad_norm": 0.6462124586105347, "learning_rate": 0.00023763402264856238, "loss": 3.1004, "step": 34688 }, { "epoch": 1.7, "grad_norm": 0.6038845777511597, "learning_rate": 0.00023761896290185207, "loss": 3.167, "step": 34689 }, { "epoch": 1.7, "grad_norm": 0.6276283860206604, "learning_rate": 0.00023760390331944127, "loss": 3.0901, "step": 34690 }, { "epoch": 1.7, "grad_norm": 0.6404023766517639, "learning_rate": 0.00023758884390136974, "loss": 3.031, "step": 34691 }, { "epoch": 1.7, "grad_norm": 0.5949774384498596, "learning_rate": 0.00023757378464767735, "loss": 3.1607, "step": 34692 }, { "epoch": 1.7, "grad_norm": 0.5838576555252075, "learning_rate": 0.00023755872555840355, "loss": 2.8934, "step": 34693 }, { "epoch": 1.7, "grad_norm": 0.6016432046890259, "learning_rate": 0.00023754366663358804, "loss": 2.9678, "step": 34694 }, { "epoch": 1.7, "grad_norm": 0.6535770297050476, "learning_rate": 0.00023752860787327044, "loss": 3.088, "step": 34695 }, { "epoch": 1.7, "grad_norm": 0.5867074131965637, "learning_rate": 0.0002375135492774905, "loss": 3.1053, "step": 34696 }, { "epoch": 1.7, "grad_norm": 0.6075217127799988, "learning_rate": 0.0002374984908462879, "loss": 2.8846, "step": 34697 }, { "epoch": 1.7, "grad_norm": 0.5963104367256165, "learning_rate": 0.00023748343257970215, "loss": 3.1219, "step": 34698 }, { "epoch": 1.7, "grad_norm": 0.5905624032020569, "learning_rate": 0.00023746837447777313, "loss": 3.1523, "step": 34699 }, { "epoch": 1.7, "grad_norm": 0.6578564643859863, "learning_rate": 0.00023745331654054027, "loss": 2.8213, "step": 34700 }, { "epoch": 1.7, "grad_norm": 0.609445333480835, "learning_rate": 0.0002374382587680433, "loss": 3.1463, "step": 34701 }, { "epoch": 1.7, "grad_norm": 0.6245207190513611, "learning_rate": 0.00023742320116032202, "loss": 3.0789, "step": 34702 }, { "epoch": 1.7, "grad_norm": 0.6319349408149719, "learning_rate": 0.00023740814371741597, "loss": 2.9953, "step": 34703 }, { "epoch": 1.7, "grad_norm": 0.6266147494316101, "learning_rate": 0.00023739308643936486, "loss": 2.9907, "step": 34704 }, { "epoch": 1.7, "grad_norm": 0.6235140562057495, "learning_rate": 0.00023737802932620819, "loss": 3.0847, "step": 34705 }, { "epoch": 1.7, "grad_norm": 0.6102908253669739, "learning_rate": 0.0002373629723779858, "loss": 3.0529, "step": 34706 }, { "epoch": 1.7, "grad_norm": 0.5892670750617981, "learning_rate": 0.00023734791559473732, "loss": 2.8094, "step": 34707 }, { "epoch": 1.7, "grad_norm": 0.658932089805603, "learning_rate": 0.0002373328589765023, "loss": 3.0447, "step": 34708 }, { "epoch": 1.7, "grad_norm": 0.6005896925926208, "learning_rate": 0.00023731780252332058, "loss": 3.0984, "step": 34709 }, { "epoch": 1.7, "grad_norm": 0.6177623867988586, "learning_rate": 0.00023730274623523162, "loss": 3.033, "step": 34710 }, { "epoch": 1.7, "grad_norm": 0.6345233917236328, "learning_rate": 0.00023728769011227509, "loss": 3.0754, "step": 34711 }, { "epoch": 1.7, "grad_norm": 0.7004926800727844, "learning_rate": 0.00023727263415449087, "loss": 3.1004, "step": 34712 }, { "epoch": 1.7, "grad_norm": 0.6280829906463623, "learning_rate": 0.0002372575783619183, "loss": 2.9972, "step": 34713 }, { "epoch": 1.7, "grad_norm": 0.661109983921051, "learning_rate": 0.00023724252273459736, "loss": 2.9467, "step": 34714 }, { "epoch": 1.7, "grad_norm": 0.6094497442245483, "learning_rate": 0.00023722746727256744, "loss": 2.869, "step": 34715 }, { "epoch": 1.7, "grad_norm": 0.6951985359191895, "learning_rate": 0.00023721241197586826, "loss": 3.0284, "step": 34716 }, { "epoch": 1.7, "grad_norm": 0.6294052004814148, "learning_rate": 0.0002371973568445396, "loss": 2.898, "step": 34717 }, { "epoch": 1.7, "grad_norm": 0.6243933439254761, "learning_rate": 0.00023718230187862098, "loss": 2.8577, "step": 34718 }, { "epoch": 1.7, "grad_norm": 0.6630285978317261, "learning_rate": 0.00023716724707815217, "loss": 2.9685, "step": 34719 }, { "epoch": 1.7, "grad_norm": 0.6620771288871765, "learning_rate": 0.00023715219244317256, "loss": 3.0159, "step": 34720 }, { "epoch": 1.7, "grad_norm": 0.6197815537452698, "learning_rate": 0.00023713713797372202, "loss": 2.9542, "step": 34721 }, { "epoch": 1.7, "grad_norm": 0.6559625864028931, "learning_rate": 0.0002371220836698403, "loss": 2.9549, "step": 34722 }, { "epoch": 1.7, "grad_norm": 0.6281477212905884, "learning_rate": 0.00023710702953156675, "loss": 2.9301, "step": 34723 }, { "epoch": 1.7, "grad_norm": 0.6305189728736877, "learning_rate": 0.00023709197555894128, "loss": 2.9736, "step": 34724 }, { "epoch": 1.7, "grad_norm": 0.6746921539306641, "learning_rate": 0.0002370769217520034, "loss": 2.9676, "step": 34725 }, { "epoch": 1.7, "grad_norm": 0.6075299382209778, "learning_rate": 0.00023706186811079278, "loss": 2.7907, "step": 34726 }, { "epoch": 1.7, "grad_norm": 0.61752849817276, "learning_rate": 0.00023704681463534915, "loss": 3.1077, "step": 34727 }, { "epoch": 1.7, "grad_norm": 0.5963026285171509, "learning_rate": 0.00023703176132571207, "loss": 2.9804, "step": 34728 }, { "epoch": 1.7, "grad_norm": 0.6269551515579224, "learning_rate": 0.00023701670818192122, "loss": 2.8909, "step": 34729 }, { "epoch": 1.7, "grad_norm": 0.6345133781433105, "learning_rate": 0.00023700165520401618, "loss": 3.0245, "step": 34730 }, { "epoch": 1.7, "grad_norm": 0.6552383899688721, "learning_rate": 0.00023698660239203663, "loss": 2.9909, "step": 34731 }, { "epoch": 1.7, "grad_norm": 0.6076620817184448, "learning_rate": 0.00023697154974602235, "loss": 3.0518, "step": 34732 }, { "epoch": 1.7, "grad_norm": 0.6243478655815125, "learning_rate": 0.00023695649726601284, "loss": 2.9917, "step": 34733 }, { "epoch": 1.7, "grad_norm": 0.6108534336090088, "learning_rate": 0.00023694144495204782, "loss": 2.8576, "step": 34734 }, { "epoch": 1.7, "grad_norm": 0.658390998840332, "learning_rate": 0.00023692639280416679, "loss": 3.1793, "step": 34735 }, { "epoch": 1.7, "grad_norm": 0.649669349193573, "learning_rate": 0.00023691134082240957, "loss": 3.0141, "step": 34736 }, { "epoch": 1.7, "grad_norm": 0.6026470065116882, "learning_rate": 0.0002368962890068158, "loss": 2.8894, "step": 34737 }, { "epoch": 1.7, "grad_norm": 0.5986788868904114, "learning_rate": 0.00023688123735742493, "loss": 2.9405, "step": 34738 }, { "epoch": 1.7, "grad_norm": 0.6287503838539124, "learning_rate": 0.00023686618587427685, "loss": 2.9873, "step": 34739 }, { "epoch": 1.7, "grad_norm": 0.617247998714447, "learning_rate": 0.00023685113455741103, "loss": 2.9822, "step": 34740 }, { "epoch": 1.7, "grad_norm": 0.6538761258125305, "learning_rate": 0.00023683608340686722, "loss": 3.2603, "step": 34741 }, { "epoch": 1.7, "grad_norm": 0.5831221342086792, "learning_rate": 0.00023682103242268489, "loss": 3.0979, "step": 34742 }, { "epoch": 1.7, "grad_norm": 0.6267368197441101, "learning_rate": 0.00023680598160490387, "loss": 2.7693, "step": 34743 }, { "epoch": 1.7, "grad_norm": 0.6173529624938965, "learning_rate": 0.0002367909309535638, "loss": 2.883, "step": 34744 }, { "epoch": 1.7, "grad_norm": 0.6151677370071411, "learning_rate": 0.00023677588046870413, "loss": 3.2401, "step": 34745 }, { "epoch": 1.7, "grad_norm": 0.6226587891578674, "learning_rate": 0.00023676083015036474, "loss": 2.8953, "step": 34746 }, { "epoch": 1.7, "grad_norm": 0.5849825739860535, "learning_rate": 0.00023674577999858507, "loss": 2.8515, "step": 34747 }, { "epoch": 1.7, "grad_norm": 0.5755380988121033, "learning_rate": 0.0002367307300134048, "loss": 3.012, "step": 34748 }, { "epoch": 1.7, "grad_norm": 0.6167582273483276, "learning_rate": 0.00023671568019486376, "loss": 2.9798, "step": 34749 }, { "epoch": 1.7, "grad_norm": 0.5916376709938049, "learning_rate": 0.00023670063054300134, "loss": 3.0067, "step": 34750 }, { "epoch": 1.7, "grad_norm": 0.639107882976532, "learning_rate": 0.00023668558105785736, "loss": 3.067, "step": 34751 }, { "epoch": 1.7, "grad_norm": 0.610746443271637, "learning_rate": 0.00023667053173947122, "loss": 2.9683, "step": 34752 }, { "epoch": 1.7, "grad_norm": 0.6915435194969177, "learning_rate": 0.00023665548258788274, "loss": 3.0443, "step": 34753 }, { "epoch": 1.7, "grad_norm": 0.6022907495498657, "learning_rate": 0.00023664043360313166, "loss": 3.0091, "step": 34754 }, { "epoch": 1.7, "grad_norm": 0.6308808326721191, "learning_rate": 0.0002366253847852574, "loss": 2.9871, "step": 34755 }, { "epoch": 1.7, "grad_norm": 0.6034700870513916, "learning_rate": 0.00023661033613429976, "loss": 3.1442, "step": 34756 }, { "epoch": 1.7, "grad_norm": 0.5891356468200684, "learning_rate": 0.00023659528765029815, "loss": 3.0425, "step": 34757 }, { "epoch": 1.7, "grad_norm": 0.6369116902351379, "learning_rate": 0.00023658023933329243, "loss": 3.1113, "step": 34758 }, { "epoch": 1.7, "grad_norm": 0.605215311050415, "learning_rate": 0.0002365651911833222, "loss": 3.1997, "step": 34759 }, { "epoch": 1.7, "grad_norm": 0.6010726094245911, "learning_rate": 0.00023655014320042692, "loss": 2.907, "step": 34760 }, { "epoch": 1.7, "grad_norm": 0.615598201751709, "learning_rate": 0.00023653509538464648, "loss": 2.9991, "step": 34761 }, { "epoch": 1.7, "grad_norm": 0.6162781119346619, "learning_rate": 0.00023652004773602034, "loss": 3.1683, "step": 34762 }, { "epoch": 1.7, "grad_norm": 0.6095041036605835, "learning_rate": 0.0002365050002545881, "loss": 2.9159, "step": 34763 }, { "epoch": 1.7, "grad_norm": 0.5748782753944397, "learning_rate": 0.0002364899529403896, "loss": 2.7686, "step": 34764 }, { "epoch": 1.7, "grad_norm": 0.6566309332847595, "learning_rate": 0.00023647490579346432, "loss": 2.8777, "step": 34765 }, { "epoch": 1.7, "grad_norm": 0.5985787510871887, "learning_rate": 0.0002364598588138519, "loss": 2.9995, "step": 34766 }, { "epoch": 1.7, "grad_norm": 0.6187407374382019, "learning_rate": 0.0002364448120015919, "loss": 2.8916, "step": 34767 }, { "epoch": 1.7, "grad_norm": 0.6135745644569397, "learning_rate": 0.00023642976535672407, "loss": 3.0603, "step": 34768 }, { "epoch": 1.7, "grad_norm": 0.6574584245681763, "learning_rate": 0.0002364147188792881, "loss": 2.9804, "step": 34769 }, { "epoch": 1.7, "grad_norm": 0.6308838725090027, "learning_rate": 0.00023639967256932338, "loss": 2.9685, "step": 34770 }, { "epoch": 1.7, "grad_norm": 0.6589849591255188, "learning_rate": 0.00023638462642686984, "loss": 3.1744, "step": 34771 }, { "epoch": 1.7, "grad_norm": 0.6437431573867798, "learning_rate": 0.00023636958045196676, "loss": 2.9364, "step": 34772 }, { "epoch": 1.7, "grad_norm": 0.6591246128082275, "learning_rate": 0.00023635453464465408, "loss": 3.0042, "step": 34773 }, { "epoch": 1.7, "grad_norm": 0.6104686260223389, "learning_rate": 0.00023633948900497135, "loss": 2.9775, "step": 34774 }, { "epoch": 1.7, "grad_norm": 0.6379290819168091, "learning_rate": 0.00023632444353295802, "loss": 2.9928, "step": 34775 }, { "epoch": 1.7, "grad_norm": 0.6709439158439636, "learning_rate": 0.00023630939822865397, "loss": 2.8491, "step": 34776 }, { "epoch": 1.7, "grad_norm": 0.6231077909469604, "learning_rate": 0.00023629435309209865, "loss": 2.9117, "step": 34777 }, { "epoch": 1.7, "grad_norm": 0.6265411376953125, "learning_rate": 0.00023627930812333172, "loss": 3.1632, "step": 34778 }, { "epoch": 1.7, "grad_norm": 0.6154355406761169, "learning_rate": 0.00023626426332239292, "loss": 3.1184, "step": 34779 }, { "epoch": 1.7, "grad_norm": 0.6437824964523315, "learning_rate": 0.0002362492186893217, "loss": 2.844, "step": 34780 }, { "epoch": 1.7, "grad_norm": 0.6491339206695557, "learning_rate": 0.00023623417422415788, "loss": 2.8048, "step": 34781 }, { "epoch": 1.7, "grad_norm": 0.6449874639511108, "learning_rate": 0.0002362191299269408, "loss": 3.0313, "step": 34782 }, { "epoch": 1.7, "grad_norm": 0.6093539595603943, "learning_rate": 0.00023620408579771038, "loss": 3.1829, "step": 34783 }, { "epoch": 1.7, "grad_norm": 0.6419656872749329, "learning_rate": 0.00023618904183650614, "loss": 3.3079, "step": 34784 }, { "epoch": 1.7, "grad_norm": 0.6378693580627441, "learning_rate": 0.00023617399804336755, "loss": 2.9831, "step": 34785 }, { "epoch": 1.7, "grad_norm": 0.6107136607170105, "learning_rate": 0.0002361589544183345, "loss": 2.9476, "step": 34786 }, { "epoch": 1.7, "grad_norm": 0.6500281691551208, "learning_rate": 0.0002361439109614464, "loss": 3.0999, "step": 34787 }, { "epoch": 1.7, "grad_norm": 0.5922091007232666, "learning_rate": 0.00023612886767274293, "loss": 3.0714, "step": 34788 }, { "epoch": 1.7, "grad_norm": 0.6356111764907837, "learning_rate": 0.00023611382455226385, "loss": 2.8917, "step": 34789 }, { "epoch": 1.7, "grad_norm": 0.6688171029090881, "learning_rate": 0.00023609878160004856, "loss": 3.0792, "step": 34790 }, { "epoch": 1.71, "grad_norm": 0.6286024451255798, "learning_rate": 0.00023608373881613685, "loss": 3.1178, "step": 34791 }, { "epoch": 1.71, "grad_norm": 0.6304290294647217, "learning_rate": 0.00023606869620056817, "loss": 2.9926, "step": 34792 }, { "epoch": 1.71, "grad_norm": 0.6616683006286621, "learning_rate": 0.0002360536537533822, "loss": 2.761, "step": 34793 }, { "epoch": 1.71, "grad_norm": 0.5931280851364136, "learning_rate": 0.0002360386114746188, "loss": 2.9022, "step": 34794 }, { "epoch": 1.71, "grad_norm": 0.6022833585739136, "learning_rate": 0.00023602356936431728, "loss": 3.0123, "step": 34795 }, { "epoch": 1.71, "grad_norm": 0.6116443276405334, "learning_rate": 0.00023600852742251742, "loss": 2.9503, "step": 34796 }, { "epoch": 1.71, "grad_norm": 0.651864230632782, "learning_rate": 0.00023599348564925867, "loss": 2.966, "step": 34797 }, { "epoch": 1.71, "grad_norm": 0.6232309341430664, "learning_rate": 0.0002359784440445808, "loss": 2.9551, "step": 34798 }, { "epoch": 1.71, "grad_norm": 0.5941501259803772, "learning_rate": 0.00023596340260852348, "loss": 3.1523, "step": 34799 }, { "epoch": 1.71, "grad_norm": 0.6085199117660522, "learning_rate": 0.0002359483613411261, "loss": 2.8691, "step": 34800 }, { "epoch": 1.71, "grad_norm": 0.6391234397888184, "learning_rate": 0.00023593332024242854, "loss": 2.9873, "step": 34801 }, { "epoch": 1.71, "grad_norm": 0.6489654183387756, "learning_rate": 0.0002359182793124702, "loss": 2.9056, "step": 34802 }, { "epoch": 1.71, "grad_norm": 0.6384288668632507, "learning_rate": 0.0002359032385512907, "loss": 3.1661, "step": 34803 }, { "epoch": 1.71, "grad_norm": 0.660001814365387, "learning_rate": 0.00023588819795892994, "loss": 3.021, "step": 34804 }, { "epoch": 1.71, "grad_norm": 0.6211850047111511, "learning_rate": 0.0002358731575354272, "loss": 3.0002, "step": 34805 }, { "epoch": 1.71, "grad_norm": 0.5897373557090759, "learning_rate": 0.0002358581172808223, "loss": 3.1594, "step": 34806 }, { "epoch": 1.71, "grad_norm": 0.5892007350921631, "learning_rate": 0.00023584307719515469, "loss": 2.7936, "step": 34807 }, { "epoch": 1.71, "grad_norm": 0.5857629776000977, "learning_rate": 0.00023582803727846405, "loss": 2.9798, "step": 34808 }, { "epoch": 1.71, "grad_norm": 0.6502203941345215, "learning_rate": 0.0002358129975307901, "loss": 3.3007, "step": 34809 }, { "epoch": 1.71, "grad_norm": 0.6255640983581543, "learning_rate": 0.00023579795795217225, "loss": 2.9249, "step": 34810 }, { "epoch": 1.71, "grad_norm": 0.6112305521965027, "learning_rate": 0.00023578291854265033, "loss": 3.198, "step": 34811 }, { "epoch": 1.71, "grad_norm": 0.6151379346847534, "learning_rate": 0.00023576787930226385, "loss": 2.7124, "step": 34812 }, { "epoch": 1.71, "grad_norm": 0.6541122794151306, "learning_rate": 0.00023575284023105229, "loss": 2.8483, "step": 34813 }, { "epoch": 1.71, "grad_norm": 0.6102315187454224, "learning_rate": 0.0002357378013290555, "loss": 2.942, "step": 34814 }, { "epoch": 1.71, "grad_norm": 0.6251721382141113, "learning_rate": 0.00023572276259631287, "loss": 3.1909, "step": 34815 }, { "epoch": 1.71, "grad_norm": 0.5804009437561035, "learning_rate": 0.00023570772403286425, "loss": 3.049, "step": 34816 }, { "epoch": 1.71, "grad_norm": 0.6197915077209473, "learning_rate": 0.00023569268563874903, "loss": 3.0851, "step": 34817 }, { "epoch": 1.71, "grad_norm": 0.605315089225769, "learning_rate": 0.00023567764741400682, "loss": 3.1499, "step": 34818 }, { "epoch": 1.71, "grad_norm": 0.6538598537445068, "learning_rate": 0.00023566260935867746, "loss": 3.0767, "step": 34819 }, { "epoch": 1.71, "grad_norm": 0.6209130883216858, "learning_rate": 0.00023564757147280033, "loss": 3.1515, "step": 34820 }, { "epoch": 1.71, "grad_norm": 0.6308000087738037, "learning_rate": 0.00023563253375641517, "loss": 3.0422, "step": 34821 }, { "epoch": 1.71, "grad_norm": 0.5922633409500122, "learning_rate": 0.00023561749620956143, "loss": 3.0058, "step": 34822 }, { "epoch": 1.71, "grad_norm": 0.6279627084732056, "learning_rate": 0.00023560245883227893, "loss": 2.9634, "step": 34823 }, { "epoch": 1.71, "grad_norm": 0.611582338809967, "learning_rate": 0.00023558742162460707, "loss": 2.9571, "step": 34824 }, { "epoch": 1.71, "grad_norm": 0.5956346392631531, "learning_rate": 0.00023557238458658553, "loss": 3.0411, "step": 34825 }, { "epoch": 1.71, "grad_norm": 0.6095432639122009, "learning_rate": 0.00023555734771825402, "loss": 3.1169, "step": 34826 }, { "epoch": 1.71, "grad_norm": 0.5974093079566956, "learning_rate": 0.000235542311019652, "loss": 2.9938, "step": 34827 }, { "epoch": 1.71, "grad_norm": 0.696919322013855, "learning_rate": 0.0002355272744908192, "loss": 3.144, "step": 34828 }, { "epoch": 1.71, "grad_norm": 0.6090503931045532, "learning_rate": 0.00023551223813179502, "loss": 3.1853, "step": 34829 }, { "epoch": 1.71, "grad_norm": 0.6434307098388672, "learning_rate": 0.00023549720194261924, "loss": 3.0533, "step": 34830 }, { "epoch": 1.71, "grad_norm": 0.6265668272972107, "learning_rate": 0.00023548216592333152, "loss": 2.8724, "step": 34831 }, { "epoch": 1.71, "grad_norm": 0.6203542947769165, "learning_rate": 0.00023546713007397115, "loss": 3.1396, "step": 34832 }, { "epoch": 1.71, "grad_norm": 0.6344203948974609, "learning_rate": 0.00023545209439457813, "loss": 3.0088, "step": 34833 }, { "epoch": 1.71, "grad_norm": 0.6461747288703918, "learning_rate": 0.00023543705888519174, "loss": 2.848, "step": 34834 }, { "epoch": 1.71, "grad_norm": 0.6251636743545532, "learning_rate": 0.0002354220235458518, "loss": 2.8499, "step": 34835 }, { "epoch": 1.71, "grad_norm": 0.6171122789382935, "learning_rate": 0.00023540698837659784, "loss": 2.9701, "step": 34836 }, { "epoch": 1.71, "grad_norm": 0.6390722990036011, "learning_rate": 0.0002353919533774693, "loss": 3.0639, "step": 34837 }, { "epoch": 1.71, "grad_norm": 0.6113898158073425, "learning_rate": 0.0002353769185485061, "loss": 3.0786, "step": 34838 }, { "epoch": 1.71, "grad_norm": 0.640236496925354, "learning_rate": 0.00023536188388974757, "loss": 2.9315, "step": 34839 }, { "epoch": 1.71, "grad_norm": 0.6200087666511536, "learning_rate": 0.00023534684940123332, "loss": 3.1699, "step": 34840 }, { "epoch": 1.71, "grad_norm": 0.6289457678794861, "learning_rate": 0.00023533181508300318, "loss": 3.1179, "step": 34841 }, { "epoch": 1.71, "grad_norm": 0.6203035116195679, "learning_rate": 0.00023531678093509655, "loss": 3.1601, "step": 34842 }, { "epoch": 1.71, "grad_norm": 0.6237162351608276, "learning_rate": 0.00023530174695755306, "loss": 3.0658, "step": 34843 }, { "epoch": 1.71, "grad_norm": 0.6613100171089172, "learning_rate": 0.00023528671315041226, "loss": 2.8084, "step": 34844 }, { "epoch": 1.71, "grad_norm": 0.6259462833404541, "learning_rate": 0.00023527167951371383, "loss": 3.0736, "step": 34845 }, { "epoch": 1.71, "grad_norm": 0.6277626156806946, "learning_rate": 0.0002352566460474974, "loss": 3.0055, "step": 34846 }, { "epoch": 1.71, "grad_norm": 0.6393147110939026, "learning_rate": 0.0002352416127518024, "loss": 3.144, "step": 34847 }, { "epoch": 1.71, "grad_norm": 0.6422019600868225, "learning_rate": 0.00023522657962666866, "loss": 3.0748, "step": 34848 }, { "epoch": 1.71, "grad_norm": 0.6111181378364563, "learning_rate": 0.00023521154667213555, "loss": 3.024, "step": 34849 }, { "epoch": 1.71, "grad_norm": 0.6628186702728271, "learning_rate": 0.0002351965138882427, "loss": 3.0101, "step": 34850 }, { "epoch": 1.71, "grad_norm": 0.6172040700912476, "learning_rate": 0.0002351814812750299, "loss": 3.1652, "step": 34851 }, { "epoch": 1.71, "grad_norm": 0.6279569268226624, "learning_rate": 0.00023516644883253654, "loss": 3.0395, "step": 34852 }, { "epoch": 1.71, "grad_norm": 0.6078111529350281, "learning_rate": 0.00023515141656080236, "loss": 2.9895, "step": 34853 }, { "epoch": 1.71, "grad_norm": 0.6228066682815552, "learning_rate": 0.0002351363844598667, "loss": 2.9287, "step": 34854 }, { "epoch": 1.71, "grad_norm": 0.7036370038986206, "learning_rate": 0.00023512135252976935, "loss": 3.1124, "step": 34855 }, { "epoch": 1.71, "grad_norm": 0.5945392847061157, "learning_rate": 0.00023510632077054997, "loss": 2.9471, "step": 34856 }, { "epoch": 1.71, "grad_norm": 0.6402318477630615, "learning_rate": 0.00023509128918224802, "loss": 3.1473, "step": 34857 }, { "epoch": 1.71, "grad_norm": 0.6327133774757385, "learning_rate": 0.00023507625776490317, "loss": 2.855, "step": 34858 }, { "epoch": 1.71, "grad_norm": 0.6053986549377441, "learning_rate": 0.0002350612265185548, "loss": 3.0087, "step": 34859 }, { "epoch": 1.71, "grad_norm": 0.5792701244354248, "learning_rate": 0.00023504619544324277, "loss": 2.8177, "step": 34860 }, { "epoch": 1.71, "grad_norm": 0.6614009141921997, "learning_rate": 0.00023503116453900663, "loss": 3.218, "step": 34861 }, { "epoch": 1.71, "grad_norm": 0.6756806969642639, "learning_rate": 0.00023501613380588575, "loss": 3.1788, "step": 34862 }, { "epoch": 1.71, "grad_norm": 0.6036829352378845, "learning_rate": 0.00023500110324392004, "loss": 3.0215, "step": 34863 }, { "epoch": 1.71, "grad_norm": 0.6031563878059387, "learning_rate": 0.0002349860728531488, "loss": 3.146, "step": 34864 }, { "epoch": 1.71, "grad_norm": 0.6657361388206482, "learning_rate": 0.00023497104263361167, "loss": 3.0158, "step": 34865 }, { "epoch": 1.71, "grad_norm": 0.6230410933494568, "learning_rate": 0.00023495601258534843, "loss": 2.758, "step": 34866 }, { "epoch": 1.71, "grad_norm": 0.598953127861023, "learning_rate": 0.00023494098270839849, "loss": 3.1044, "step": 34867 }, { "epoch": 1.71, "grad_norm": 0.6193811893463135, "learning_rate": 0.00023492595300280152, "loss": 2.843, "step": 34868 }, { "epoch": 1.71, "grad_norm": 0.6279089450836182, "learning_rate": 0.00023491092346859693, "loss": 3.1108, "step": 34869 }, { "epoch": 1.71, "grad_norm": 0.6103789806365967, "learning_rate": 0.00023489589410582455, "loss": 3.1877, "step": 34870 }, { "epoch": 1.71, "grad_norm": 0.6290521621704102, "learning_rate": 0.0002348808649145239, "loss": 3.0293, "step": 34871 }, { "epoch": 1.71, "grad_norm": 0.5811523795127869, "learning_rate": 0.00023486583589473438, "loss": 3.0623, "step": 34872 }, { "epoch": 1.71, "grad_norm": 0.6363834738731384, "learning_rate": 0.00023485080704649586, "loss": 2.9667, "step": 34873 }, { "epoch": 1.71, "grad_norm": 0.6560509204864502, "learning_rate": 0.0002348357783698477, "loss": 2.975, "step": 34874 }, { "epoch": 1.71, "grad_norm": 0.6376746892929077, "learning_rate": 0.0002348207498648295, "loss": 3.156, "step": 34875 }, { "epoch": 1.71, "grad_norm": 0.6634820699691772, "learning_rate": 0.00023480572153148103, "loss": 2.9336, "step": 34876 }, { "epoch": 1.71, "grad_norm": 0.6263517141342163, "learning_rate": 0.00023479069336984157, "loss": 3.0352, "step": 34877 }, { "epoch": 1.71, "grad_norm": 0.6027346253395081, "learning_rate": 0.00023477566537995106, "loss": 2.884, "step": 34878 }, { "epoch": 1.71, "grad_norm": 0.6649355292320251, "learning_rate": 0.0002347606375618488, "loss": 2.9949, "step": 34879 }, { "epoch": 1.71, "grad_norm": 0.6449239253997803, "learning_rate": 0.00023474560991557442, "loss": 3.0944, "step": 34880 }, { "epoch": 1.71, "grad_norm": 0.6204206943511963, "learning_rate": 0.00023473058244116767, "loss": 2.8473, "step": 34881 }, { "epoch": 1.71, "grad_norm": 0.6074206829071045, "learning_rate": 0.00023471555513866795, "loss": 2.8826, "step": 34882 }, { "epoch": 1.71, "grad_norm": 0.607448935508728, "learning_rate": 0.00023470052800811495, "loss": 2.9217, "step": 34883 }, { "epoch": 1.71, "grad_norm": 0.6031710505485535, "learning_rate": 0.00023468550104954804, "loss": 2.9217, "step": 34884 }, { "epoch": 1.71, "grad_norm": 0.5931732654571533, "learning_rate": 0.00023467047426300704, "loss": 2.8402, "step": 34885 }, { "epoch": 1.71, "grad_norm": 0.6108601689338684, "learning_rate": 0.00023465544764853148, "loss": 3.0791, "step": 34886 }, { "epoch": 1.71, "grad_norm": 0.6241343021392822, "learning_rate": 0.0002346404212061608, "loss": 3.0006, "step": 34887 }, { "epoch": 1.71, "grad_norm": 0.6183168292045593, "learning_rate": 0.00023462539493593478, "loss": 3.1661, "step": 34888 }, { "epoch": 1.71, "grad_norm": 0.6017194986343384, "learning_rate": 0.00023461036883789282, "loss": 3.0034, "step": 34889 }, { "epoch": 1.71, "grad_norm": 0.659395694732666, "learning_rate": 0.00023459534291207448, "loss": 3.0346, "step": 34890 }, { "epoch": 1.71, "grad_norm": 0.6340118646621704, "learning_rate": 0.00023458031715851957, "loss": 3.1279, "step": 34891 }, { "epoch": 1.71, "grad_norm": 0.6464155912399292, "learning_rate": 0.00023456529157726746, "loss": 2.861, "step": 34892 }, { "epoch": 1.71, "grad_norm": 0.6099604368209839, "learning_rate": 0.00023455026616835784, "loss": 3.066, "step": 34893 }, { "epoch": 1.71, "grad_norm": 0.6244305968284607, "learning_rate": 0.00023453524093183009, "loss": 2.9749, "step": 34894 }, { "epoch": 1.71, "grad_norm": 0.5913376212120056, "learning_rate": 0.00023452021586772392, "loss": 3.099, "step": 34895 }, { "epoch": 1.71, "grad_norm": 0.6622759699821472, "learning_rate": 0.000234505190976079, "loss": 3.0457, "step": 34896 }, { "epoch": 1.71, "grad_norm": 0.6322427988052368, "learning_rate": 0.00023449016625693477, "loss": 3.0076, "step": 34897 }, { "epoch": 1.71, "grad_norm": 0.6200683116912842, "learning_rate": 0.0002344751417103309, "loss": 2.9269, "step": 34898 }, { "epoch": 1.71, "grad_norm": 0.6351677775382996, "learning_rate": 0.00023446011733630674, "loss": 2.9341, "step": 34899 }, { "epoch": 1.71, "grad_norm": 0.6051552891731262, "learning_rate": 0.0002344450931349022, "loss": 3.1266, "step": 34900 }, { "epoch": 1.71, "grad_norm": 0.657870352268219, "learning_rate": 0.00023443006910615652, "loss": 3.2018, "step": 34901 }, { "epoch": 1.71, "grad_norm": 0.6787803173065186, "learning_rate": 0.00023441504525010943, "loss": 3.0304, "step": 34902 }, { "epoch": 1.71, "grad_norm": 0.6219592690467834, "learning_rate": 0.0002344000215668006, "loss": 3.2, "step": 34903 }, { "epoch": 1.71, "grad_norm": 0.6439167857170105, "learning_rate": 0.0002343849980562694, "loss": 3.0774, "step": 34904 }, { "epoch": 1.71, "grad_norm": 0.6104590892791748, "learning_rate": 0.00023436997471855558, "loss": 3.0582, "step": 34905 }, { "epoch": 1.71, "grad_norm": 0.6476345062255859, "learning_rate": 0.00023435495155369848, "loss": 3.014, "step": 34906 }, { "epoch": 1.71, "grad_norm": 0.6276230812072754, "learning_rate": 0.00023433992856173786, "loss": 3.0314, "step": 34907 }, { "epoch": 1.71, "grad_norm": 0.6397703886032104, "learning_rate": 0.00023432490574271328, "loss": 2.9595, "step": 34908 }, { "epoch": 1.71, "grad_norm": 0.6316772699356079, "learning_rate": 0.00023430988309666414, "loss": 2.9005, "step": 34909 }, { "epoch": 1.71, "grad_norm": 0.6077426671981812, "learning_rate": 0.00023429486062363027, "loss": 2.9963, "step": 34910 }, { "epoch": 1.71, "grad_norm": 0.6244196891784668, "learning_rate": 0.00023427983832365106, "loss": 3.0356, "step": 34911 }, { "epoch": 1.71, "grad_norm": 0.6547126770019531, "learning_rate": 0.00023426481619676597, "loss": 2.9158, "step": 34912 }, { "epoch": 1.71, "grad_norm": 0.5975217223167419, "learning_rate": 0.00023424979424301488, "loss": 3.005, "step": 34913 }, { "epoch": 1.71, "grad_norm": 0.6610994935035706, "learning_rate": 0.00023423477246243708, "loss": 2.8519, "step": 34914 }, { "epoch": 1.71, "grad_norm": 0.6371542811393738, "learning_rate": 0.00023421975085507232, "loss": 2.9664, "step": 34915 }, { "epoch": 1.71, "grad_norm": 0.6088597178459167, "learning_rate": 0.00023420472942095995, "loss": 2.9404, "step": 34916 }, { "epoch": 1.71, "grad_norm": 0.654114305973053, "learning_rate": 0.00023418970816013965, "loss": 2.9306, "step": 34917 }, { "epoch": 1.71, "grad_norm": 0.637015700340271, "learning_rate": 0.0002341746870726511, "loss": 2.8428, "step": 34918 }, { "epoch": 1.71, "grad_norm": 0.6335122585296631, "learning_rate": 0.00023415966615853372, "loss": 2.9795, "step": 34919 }, { "epoch": 1.71, "grad_norm": 0.6221902966499329, "learning_rate": 0.0002341446454178272, "loss": 3.0404, "step": 34920 }, { "epoch": 1.71, "grad_norm": 0.6287366151809692, "learning_rate": 0.00023412962485057086, "loss": 3.1862, "step": 34921 }, { "epoch": 1.71, "grad_norm": 0.6433752775192261, "learning_rate": 0.00023411460445680444, "loss": 3.119, "step": 34922 }, { "epoch": 1.71, "grad_norm": 0.6408416032791138, "learning_rate": 0.00023409958423656755, "loss": 3.0431, "step": 34923 }, { "epoch": 1.71, "grad_norm": 0.6352258920669556, "learning_rate": 0.00023408456418989957, "loss": 2.9922, "step": 34924 }, { "epoch": 1.71, "grad_norm": 0.6453670263290405, "learning_rate": 0.00023406954431684027, "loss": 3.1621, "step": 34925 }, { "epoch": 1.71, "grad_norm": 0.5970926284790039, "learning_rate": 0.00023405452461742904, "loss": 2.9972, "step": 34926 }, { "epoch": 1.71, "grad_norm": 0.6512788534164429, "learning_rate": 0.00023403950509170543, "loss": 3.1177, "step": 34927 }, { "epoch": 1.71, "grad_norm": 0.6225276589393616, "learning_rate": 0.00023402448573970925, "loss": 2.9748, "step": 34928 }, { "epoch": 1.71, "grad_norm": 0.6059454083442688, "learning_rate": 0.00023400946656147977, "loss": 3.0446, "step": 34929 }, { "epoch": 1.71, "grad_norm": 0.6157578825950623, "learning_rate": 0.0002339944475570567, "loss": 3.1555, "step": 34930 }, { "epoch": 1.71, "grad_norm": 0.6617079973220825, "learning_rate": 0.00023397942872647944, "loss": 2.9416, "step": 34931 }, { "epoch": 1.71, "grad_norm": 0.6348146200180054, "learning_rate": 0.00023396441006978775, "loss": 3.1802, "step": 34932 }, { "epoch": 1.71, "grad_norm": 0.6032415628433228, "learning_rate": 0.00023394939158702112, "loss": 2.9522, "step": 34933 }, { "epoch": 1.71, "grad_norm": 0.6263419985771179, "learning_rate": 0.00023393437327821895, "loss": 2.8259, "step": 34934 }, { "epoch": 1.71, "grad_norm": 0.6232619285583496, "learning_rate": 0.0002339193551434211, "loss": 2.8977, "step": 34935 }, { "epoch": 1.71, "grad_norm": 0.6191720962524414, "learning_rate": 0.00023390433718266685, "loss": 2.9033, "step": 34936 }, { "epoch": 1.71, "grad_norm": 0.7007677555084229, "learning_rate": 0.0002338893193959958, "loss": 3.1077, "step": 34937 }, { "epoch": 1.71, "grad_norm": 0.6107507944107056, "learning_rate": 0.0002338743017834477, "loss": 3.1321, "step": 34938 }, { "epoch": 1.71, "grad_norm": 0.6792271137237549, "learning_rate": 0.00023385928434506182, "loss": 2.9655, "step": 34939 }, { "epoch": 1.71, "grad_norm": 0.645459771156311, "learning_rate": 0.00023384426708087798, "loss": 3.0854, "step": 34940 }, { "epoch": 1.71, "grad_norm": 0.6554004549980164, "learning_rate": 0.00023382924999093555, "loss": 2.7691, "step": 34941 }, { "epoch": 1.71, "grad_norm": 0.6682091355323792, "learning_rate": 0.00023381423307527408, "loss": 2.9499, "step": 34942 }, { "epoch": 1.71, "grad_norm": 0.653188169002533, "learning_rate": 0.0002337992163339333, "loss": 3.0563, "step": 34943 }, { "epoch": 1.71, "grad_norm": 0.6153813600540161, "learning_rate": 0.0002337841997669526, "loss": 2.972, "step": 34944 }, { "epoch": 1.71, "grad_norm": 0.6357757449150085, "learning_rate": 0.00023376918337437164, "loss": 2.9628, "step": 34945 }, { "epoch": 1.71, "grad_norm": 0.6298278570175171, "learning_rate": 0.00023375416715622977, "loss": 2.7598, "step": 34946 }, { "epoch": 1.71, "grad_norm": 0.581403911113739, "learning_rate": 0.00023373915111256675, "loss": 2.8313, "step": 34947 }, { "epoch": 1.71, "grad_norm": 0.6330929398536682, "learning_rate": 0.00023372413524342208, "loss": 2.9305, "step": 34948 }, { "epoch": 1.71, "grad_norm": 0.6617899537086487, "learning_rate": 0.00023370911954883518, "loss": 2.8998, "step": 34949 }, { "epoch": 1.71, "grad_norm": 0.6223428249359131, "learning_rate": 0.00023369410402884582, "loss": 3.0054, "step": 34950 }, { "epoch": 1.71, "grad_norm": 0.6279831528663635, "learning_rate": 0.00023367908868349337, "loss": 3.0671, "step": 34951 }, { "epoch": 1.71, "grad_norm": 0.5640294551849365, "learning_rate": 0.00023366407351281735, "loss": 3.0893, "step": 34952 }, { "epoch": 1.71, "grad_norm": 0.5992966294288635, "learning_rate": 0.0002336490585168576, "loss": 3.0811, "step": 34953 }, { "epoch": 1.71, "grad_norm": 0.6405746340751648, "learning_rate": 0.00023363404369565328, "loss": 3.0204, "step": 34954 }, { "epoch": 1.71, "grad_norm": 0.651674747467041, "learning_rate": 0.00023361902904924428, "loss": 2.7941, "step": 34955 }, { "epoch": 1.71, "grad_norm": 0.6315301656723022, "learning_rate": 0.0002336040145776698, "loss": 2.9018, "step": 34956 }, { "epoch": 1.71, "grad_norm": 0.6295512914657593, "learning_rate": 0.00023358900028096959, "loss": 3.013, "step": 34957 }, { "epoch": 1.71, "grad_norm": 0.5874469876289368, "learning_rate": 0.0002335739861591833, "loss": 3.203, "step": 34958 }, { "epoch": 1.71, "grad_norm": 0.6032001376152039, "learning_rate": 0.00023355897221235026, "loss": 3.0261, "step": 34959 }, { "epoch": 1.71, "grad_norm": 0.6361725330352783, "learning_rate": 0.00023354395844051018, "loss": 3.0568, "step": 34960 }, { "epoch": 1.71, "grad_norm": 0.6287243962287903, "learning_rate": 0.0002335289448437024, "loss": 3.1561, "step": 34961 }, { "epoch": 1.71, "grad_norm": 0.6232472062110901, "learning_rate": 0.00023351393142196665, "loss": 3.0336, "step": 34962 }, { "epoch": 1.71, "grad_norm": 0.6193660497665405, "learning_rate": 0.00023349891817534246, "loss": 2.7832, "step": 34963 }, { "epoch": 1.71, "grad_norm": 0.6325574517250061, "learning_rate": 0.00023348390510386917, "loss": 2.9592, "step": 34964 }, { "epoch": 1.71, "grad_norm": 0.6106416583061218, "learning_rate": 0.00023346889220758667, "loss": 3.0528, "step": 34965 }, { "epoch": 1.71, "grad_norm": 0.6092174053192139, "learning_rate": 0.0002334538794865342, "loss": 2.9849, "step": 34966 }, { "epoch": 1.71, "grad_norm": 0.6205219030380249, "learning_rate": 0.00023343886694075135, "loss": 2.9281, "step": 34967 }, { "epoch": 1.71, "grad_norm": 0.6163828372955322, "learning_rate": 0.00023342385457027781, "loss": 2.7753, "step": 34968 }, { "epoch": 1.71, "grad_norm": 0.6253390312194824, "learning_rate": 0.000233408842375153, "loss": 2.8628, "step": 34969 }, { "epoch": 1.71, "grad_norm": 0.650109589099884, "learning_rate": 0.00023339383035541654, "loss": 2.9486, "step": 34970 }, { "epoch": 1.71, "grad_norm": 0.6107782125473022, "learning_rate": 0.00023337881851110776, "loss": 2.8765, "step": 34971 }, { "epoch": 1.71, "grad_norm": 0.6146960854530334, "learning_rate": 0.00023336380684226644, "loss": 3.0767, "step": 34972 }, { "epoch": 1.71, "grad_norm": 0.6434600949287415, "learning_rate": 0.0002333487953489321, "loss": 3.0933, "step": 34973 }, { "epoch": 1.71, "grad_norm": 0.5741561055183411, "learning_rate": 0.00023333378403114405, "loss": 3.2363, "step": 34974 }, { "epoch": 1.71, "grad_norm": 0.6086795330047607, "learning_rate": 0.00023331877288894213, "loss": 2.7989, "step": 34975 }, { "epoch": 1.71, "grad_norm": 0.5960971713066101, "learning_rate": 0.00023330376192236564, "loss": 3.1249, "step": 34976 }, { "epoch": 1.71, "grad_norm": 0.6542201638221741, "learning_rate": 0.00023328875113145432, "loss": 2.8669, "step": 34977 }, { "epoch": 1.71, "grad_norm": 0.6497222185134888, "learning_rate": 0.0002332737405162474, "loss": 3.1174, "step": 34978 }, { "epoch": 1.71, "grad_norm": 0.6587827205657959, "learning_rate": 0.00023325873007678464, "loss": 2.9742, "step": 34979 }, { "epoch": 1.71, "grad_norm": 0.6213930249214172, "learning_rate": 0.0002332437198131057, "loss": 3.1319, "step": 34980 }, { "epoch": 1.71, "grad_norm": 0.6171470880508423, "learning_rate": 0.00023322870972524983, "loss": 2.9525, "step": 34981 }, { "epoch": 1.71, "grad_norm": 0.6292840242385864, "learning_rate": 0.00023321369981325676, "loss": 3.0539, "step": 34982 }, { "epoch": 1.71, "grad_norm": 0.6087939143180847, "learning_rate": 0.00023319869007716587, "loss": 3.0538, "step": 34983 }, { "epoch": 1.71, "grad_norm": 0.6223199367523193, "learning_rate": 0.0002331836805170168, "loss": 2.8537, "step": 34984 }, { "epoch": 1.71, "grad_norm": 0.6605082154273987, "learning_rate": 0.00023316867113284913, "loss": 3.0738, "step": 34985 }, { "epoch": 1.71, "grad_norm": 0.6185541749000549, "learning_rate": 0.0002331536619247022, "loss": 2.9984, "step": 34986 }, { "epoch": 1.71, "grad_norm": 0.631881594657898, "learning_rate": 0.00023313865289261576, "loss": 2.9922, "step": 34987 }, { "epoch": 1.71, "grad_norm": 0.618061363697052, "learning_rate": 0.0002331236440366292, "loss": 2.996, "step": 34988 }, { "epoch": 1.71, "grad_norm": 0.609032928943634, "learning_rate": 0.00023310863535678202, "loss": 2.6868, "step": 34989 }, { "epoch": 1.71, "grad_norm": 0.6071302890777588, "learning_rate": 0.00023309362685311396, "loss": 3.0175, "step": 34990 }, { "epoch": 1.71, "grad_norm": 0.6098959445953369, "learning_rate": 0.00023307861852566434, "loss": 2.9696, "step": 34991 }, { "epoch": 1.71, "grad_norm": 0.638105034828186, "learning_rate": 0.00023306361037447282, "loss": 2.9288, "step": 34992 }, { "epoch": 1.71, "grad_norm": 0.6155096292495728, "learning_rate": 0.00023304860239957873, "loss": 3.1519, "step": 34993 }, { "epoch": 1.71, "grad_norm": 0.628525972366333, "learning_rate": 0.0002330335946010218, "loss": 2.9343, "step": 34994 }, { "epoch": 1.72, "grad_norm": 0.7091739177703857, "learning_rate": 0.00023301858697884157, "loss": 3.1016, "step": 34995 }, { "epoch": 1.72, "grad_norm": 0.62142014503479, "learning_rate": 0.0002330035795330774, "loss": 2.8768, "step": 34996 }, { "epoch": 1.72, "grad_norm": 0.6097868084907532, "learning_rate": 0.00023298857226376895, "loss": 3.0771, "step": 34997 }, { "epoch": 1.72, "grad_norm": 0.6286457777023315, "learning_rate": 0.00023297356517095562, "loss": 3.0466, "step": 34998 }, { "epoch": 1.72, "grad_norm": 0.6783491373062134, "learning_rate": 0.00023295855825467712, "loss": 3.2109, "step": 34999 }, { "epoch": 1.72, "grad_norm": 0.7100011110305786, "learning_rate": 0.00023294355151497287, "loss": 2.953, "step": 35000 }, { "epoch": 1.72, "grad_norm": 0.5850964188575745, "learning_rate": 0.00023292854495188232, "loss": 2.9883, "step": 35001 }, { "epoch": 1.72, "grad_norm": 0.6173315048217773, "learning_rate": 0.0002329135385654452, "loss": 3.0854, "step": 35002 }, { "epoch": 1.72, "grad_norm": 0.6593319773674011, "learning_rate": 0.00023289853235570083, "loss": 2.7224, "step": 35003 }, { "epoch": 1.72, "grad_norm": 0.6051262617111206, "learning_rate": 0.00023288352632268877, "loss": 3.0075, "step": 35004 }, { "epoch": 1.72, "grad_norm": 0.6009809970855713, "learning_rate": 0.00023286852046644867, "loss": 2.7894, "step": 35005 }, { "epoch": 1.72, "grad_norm": 0.5898076295852661, "learning_rate": 0.00023285351478701994, "loss": 3.0432, "step": 35006 }, { "epoch": 1.72, "grad_norm": 0.6174237728118896, "learning_rate": 0.0002328385092844422, "loss": 2.9945, "step": 35007 }, { "epoch": 1.72, "grad_norm": 0.6520997881889343, "learning_rate": 0.00023282350395875472, "loss": 2.9187, "step": 35008 }, { "epoch": 1.72, "grad_norm": 0.6250781416893005, "learning_rate": 0.0002328084988099973, "loss": 3.1153, "step": 35009 }, { "epoch": 1.72, "grad_norm": 0.6104256510734558, "learning_rate": 0.00023279349383820944, "loss": 3.1432, "step": 35010 }, { "epoch": 1.72, "grad_norm": 0.6256521344184875, "learning_rate": 0.00023277848904343045, "loss": 2.8728, "step": 35011 }, { "epoch": 1.72, "grad_norm": 0.5990914702415466, "learning_rate": 0.0002327634844257001, "loss": 3.0632, "step": 35012 }, { "epoch": 1.72, "grad_norm": 0.6210713982582092, "learning_rate": 0.00023274847998505774, "loss": 2.9762, "step": 35013 }, { "epoch": 1.72, "grad_norm": 0.6181674003601074, "learning_rate": 0.00023273347572154288, "loss": 3.0125, "step": 35014 }, { "epoch": 1.72, "grad_norm": 0.8895490169525146, "learning_rate": 0.0002327184716351952, "loss": 2.9884, "step": 35015 }, { "epoch": 1.72, "grad_norm": 0.6275051236152649, "learning_rate": 0.00023270346772605408, "loss": 2.9187, "step": 35016 }, { "epoch": 1.72, "grad_norm": 0.5979534387588501, "learning_rate": 0.0002326884639941591, "loss": 2.9628, "step": 35017 }, { "epoch": 1.72, "grad_norm": 0.6114262938499451, "learning_rate": 0.0002326734604395497, "loss": 2.9154, "step": 35018 }, { "epoch": 1.72, "grad_norm": 0.6187745332717896, "learning_rate": 0.00023265845706226535, "loss": 2.9844, "step": 35019 }, { "epoch": 1.72, "grad_norm": 0.5852953791618347, "learning_rate": 0.00023264345386234586, "loss": 3.0157, "step": 35020 }, { "epoch": 1.72, "grad_norm": 0.5987752676010132, "learning_rate": 0.00023262845083983047, "loss": 3.0215, "step": 35021 }, { "epoch": 1.72, "grad_norm": 0.6052468419075012, "learning_rate": 0.00023261344799475886, "loss": 2.8387, "step": 35022 }, { "epoch": 1.72, "grad_norm": 0.653300940990448, "learning_rate": 0.00023259844532717028, "loss": 3.0387, "step": 35023 }, { "epoch": 1.72, "grad_norm": 0.645679235458374, "learning_rate": 0.00023258344283710452, "loss": 2.861, "step": 35024 }, { "epoch": 1.72, "grad_norm": 0.6186267137527466, "learning_rate": 0.00023256844052460106, "loss": 2.9967, "step": 35025 }, { "epoch": 1.72, "grad_norm": 0.6053884029388428, "learning_rate": 0.0002325534383896992, "loss": 3.0525, "step": 35026 }, { "epoch": 1.72, "grad_norm": 0.6017190217971802, "learning_rate": 0.00023253843643243876, "loss": 2.8209, "step": 35027 }, { "epoch": 1.72, "grad_norm": 0.6449417471885681, "learning_rate": 0.000232523434652859, "loss": 3.0937, "step": 35028 }, { "epoch": 1.72, "grad_norm": 0.6535047888755798, "learning_rate": 0.0002325084330509995, "loss": 3.1532, "step": 35029 }, { "epoch": 1.72, "grad_norm": 0.6600343585014343, "learning_rate": 0.0002324934316268999, "loss": 3.0011, "step": 35030 }, { "epoch": 1.72, "grad_norm": 0.6052358150482178, "learning_rate": 0.00023247843038059956, "loss": 3.2281, "step": 35031 }, { "epoch": 1.72, "grad_norm": 0.6027965545654297, "learning_rate": 0.00023246342931213809, "loss": 2.9464, "step": 35032 }, { "epoch": 1.72, "grad_norm": 0.6339533925056458, "learning_rate": 0.00023244842842155483, "loss": 3.0856, "step": 35033 }, { "epoch": 1.72, "grad_norm": 0.6081569194793701, "learning_rate": 0.00023243342770888948, "loss": 2.8734, "step": 35034 }, { "epoch": 1.72, "grad_norm": 0.6404123902320862, "learning_rate": 0.00023241842717418152, "loss": 3.113, "step": 35035 }, { "epoch": 1.72, "grad_norm": 0.6517124176025391, "learning_rate": 0.00023240342681747031, "loss": 3.0599, "step": 35036 }, { "epoch": 1.72, "grad_norm": 0.6136491894721985, "learning_rate": 0.00023238842663879557, "loss": 2.9569, "step": 35037 }, { "epoch": 1.72, "grad_norm": 0.585811972618103, "learning_rate": 0.00023237342663819666, "loss": 2.9896, "step": 35038 }, { "epoch": 1.72, "grad_norm": 0.6353388428688049, "learning_rate": 0.00023235842681571306, "loss": 2.8659, "step": 35039 }, { "epoch": 1.72, "grad_norm": 0.625332236289978, "learning_rate": 0.00023234342717138448, "loss": 3.2114, "step": 35040 }, { "epoch": 1.72, "grad_norm": 0.6419073939323425, "learning_rate": 0.00023232842770525014, "loss": 2.8107, "step": 35041 }, { "epoch": 1.72, "grad_norm": 0.6179407835006714, "learning_rate": 0.00023231342841734985, "loss": 2.9328, "step": 35042 }, { "epoch": 1.72, "grad_norm": 0.6118654608726501, "learning_rate": 0.0002322984293077229, "loss": 2.9489, "step": 35043 }, { "epoch": 1.72, "grad_norm": 0.6000199913978577, "learning_rate": 0.00023228343037640879, "loss": 3.0466, "step": 35044 }, { "epoch": 1.72, "grad_norm": 0.6850727796554565, "learning_rate": 0.0002322684316234472, "loss": 3.1185, "step": 35045 }, { "epoch": 1.72, "grad_norm": 0.6070858240127563, "learning_rate": 0.0002322534330488775, "loss": 3.0039, "step": 35046 }, { "epoch": 1.72, "grad_norm": 0.7080966830253601, "learning_rate": 0.00023223843465273924, "loss": 3.0643, "step": 35047 }, { "epoch": 1.72, "grad_norm": 0.6055739521980286, "learning_rate": 0.00023222343643507177, "loss": 2.9921, "step": 35048 }, { "epoch": 1.72, "grad_norm": 0.6497558355331421, "learning_rate": 0.00023220843839591484, "loss": 2.9583, "step": 35049 }, { "epoch": 1.72, "grad_norm": 0.6094196438789368, "learning_rate": 0.00023219344053530786, "loss": 2.7776, "step": 35050 }, { "epoch": 1.72, "grad_norm": 0.6469390988349915, "learning_rate": 0.0002321784428532902, "loss": 3.0497, "step": 35051 }, { "epoch": 1.72, "grad_norm": 0.6989441514015198, "learning_rate": 0.0002321634453499016, "loss": 3.1574, "step": 35052 }, { "epoch": 1.72, "grad_norm": 0.6314229965209961, "learning_rate": 0.00023214844802518135, "loss": 2.9443, "step": 35053 }, { "epoch": 1.72, "grad_norm": 0.6381677389144897, "learning_rate": 0.0002321334508791691, "loss": 2.9694, "step": 35054 }, { "epoch": 1.72, "grad_norm": 0.6836425065994263, "learning_rate": 0.00023211845391190415, "loss": 2.8559, "step": 35055 }, { "epoch": 1.72, "grad_norm": 0.6240857243537903, "learning_rate": 0.00023210345712342619, "loss": 2.7767, "step": 35056 }, { "epoch": 1.72, "grad_norm": 0.6270095705986023, "learning_rate": 0.00023208846051377474, "loss": 2.999, "step": 35057 }, { "epoch": 1.72, "grad_norm": 0.6575677990913391, "learning_rate": 0.00023207346408298906, "loss": 2.943, "step": 35058 }, { "epoch": 1.72, "grad_norm": 0.6204233765602112, "learning_rate": 0.00023205846783110894, "loss": 3.0342, "step": 35059 }, { "epoch": 1.72, "grad_norm": 0.6046788692474365, "learning_rate": 0.00023204347175817362, "loss": 2.9394, "step": 35060 }, { "epoch": 1.72, "grad_norm": 0.6025890707969666, "learning_rate": 0.0002320284758642228, "loss": 2.9541, "step": 35061 }, { "epoch": 1.72, "grad_norm": 0.5898043513298035, "learning_rate": 0.0002320134801492959, "loss": 3.1216, "step": 35062 }, { "epoch": 1.72, "grad_norm": 0.6248551607131958, "learning_rate": 0.0002319984846134323, "loss": 2.9211, "step": 35063 }, { "epoch": 1.72, "grad_norm": 0.677790641784668, "learning_rate": 0.00023198348925667177, "loss": 2.8475, "step": 35064 }, { "epoch": 1.72, "grad_norm": 0.6180427074432373, "learning_rate": 0.00023196849407905353, "loss": 3.1241, "step": 35065 }, { "epoch": 1.72, "grad_norm": 0.6461980938911438, "learning_rate": 0.00023195349908061716, "loss": 3.1005, "step": 35066 }, { "epoch": 1.72, "grad_norm": 0.6379979848861694, "learning_rate": 0.0002319385042614023, "loss": 2.9287, "step": 35067 }, { "epoch": 1.72, "grad_norm": 0.631192684173584, "learning_rate": 0.0002319235096214482, "loss": 3.0211, "step": 35068 }, { "epoch": 1.72, "grad_norm": 0.6260473728179932, "learning_rate": 0.00023190851516079463, "loss": 2.9484, "step": 35069 }, { "epoch": 1.72, "grad_norm": 0.6406779885292053, "learning_rate": 0.0002318935208794807, "loss": 2.9191, "step": 35070 }, { "epoch": 1.72, "grad_norm": 0.6329416036605835, "learning_rate": 0.00023187852677754626, "loss": 2.8363, "step": 35071 }, { "epoch": 1.72, "grad_norm": 0.6531370878219604, "learning_rate": 0.00023186353285503076, "loss": 2.9056, "step": 35072 }, { "epoch": 1.72, "grad_norm": 0.6264121532440186, "learning_rate": 0.0002318485391119734, "loss": 3.0358, "step": 35073 }, { "epoch": 1.72, "grad_norm": 0.6566066145896912, "learning_rate": 0.00023183354554841403, "loss": 2.8215, "step": 35074 }, { "epoch": 1.72, "grad_norm": 0.652664840221405, "learning_rate": 0.00023181855216439194, "loss": 3.1829, "step": 35075 }, { "epoch": 1.72, "grad_norm": 0.6445363163948059, "learning_rate": 0.00023180355895994654, "loss": 3.0111, "step": 35076 }, { "epoch": 1.72, "grad_norm": 0.5922958254814148, "learning_rate": 0.00023178856593511761, "loss": 3.0851, "step": 35077 }, { "epoch": 1.72, "grad_norm": 0.6356951594352722, "learning_rate": 0.00023177357308994443, "loss": 3.0073, "step": 35078 }, { "epoch": 1.72, "grad_norm": 0.6576932072639465, "learning_rate": 0.00023175858042446653, "loss": 3.1066, "step": 35079 }, { "epoch": 1.72, "grad_norm": 0.6770666837692261, "learning_rate": 0.0002317435879387233, "loss": 2.8589, "step": 35080 }, { "epoch": 1.72, "grad_norm": 0.6689457297325134, "learning_rate": 0.00023172859563275436, "loss": 2.9456, "step": 35081 }, { "epoch": 1.72, "grad_norm": 0.6123393774032593, "learning_rate": 0.00023171360350659922, "loss": 3.3388, "step": 35082 }, { "epoch": 1.72, "grad_norm": 0.6080477833747864, "learning_rate": 0.0002316986115602973, "loss": 2.8955, "step": 35083 }, { "epoch": 1.72, "grad_norm": 0.5717896819114685, "learning_rate": 0.0002316836197938881, "loss": 2.9882, "step": 35084 }, { "epoch": 1.72, "grad_norm": 0.5806421637535095, "learning_rate": 0.000231668628207411, "loss": 3.0781, "step": 35085 }, { "epoch": 1.72, "grad_norm": 0.6210070252418518, "learning_rate": 0.00023165363680090567, "loss": 3.1186, "step": 35086 }, { "epoch": 1.72, "grad_norm": 0.6110129356384277, "learning_rate": 0.00023163864557441152, "loss": 3.156, "step": 35087 }, { "epoch": 1.72, "grad_norm": 0.6130040884017944, "learning_rate": 0.00023162365452796793, "loss": 2.936, "step": 35088 }, { "epoch": 1.72, "grad_norm": 0.6246054768562317, "learning_rate": 0.0002316086636616146, "loss": 2.9497, "step": 35089 }, { "epoch": 1.72, "grad_norm": 0.6145926117897034, "learning_rate": 0.00023159367297539082, "loss": 3.0549, "step": 35090 }, { "epoch": 1.72, "grad_norm": 0.5754942893981934, "learning_rate": 0.0002315786824693361, "loss": 3.034, "step": 35091 }, { "epoch": 1.72, "grad_norm": 0.639498770236969, "learning_rate": 0.00023156369214349004, "loss": 3.0611, "step": 35092 }, { "epoch": 1.72, "grad_norm": 0.6060043573379517, "learning_rate": 0.00023154870199789199, "loss": 3.1878, "step": 35093 }, { "epoch": 1.72, "grad_norm": 0.6273714900016785, "learning_rate": 0.00023153371203258157, "loss": 3.0544, "step": 35094 }, { "epoch": 1.72, "grad_norm": 0.6444366574287415, "learning_rate": 0.00023151872224759805, "loss": 3.2422, "step": 35095 }, { "epoch": 1.72, "grad_norm": 0.6600435376167297, "learning_rate": 0.0002315037326429811, "loss": 3.1418, "step": 35096 }, { "epoch": 1.72, "grad_norm": 0.605183482170105, "learning_rate": 0.00023148874321877018, "loss": 2.9719, "step": 35097 }, { "epoch": 1.72, "grad_norm": 0.6069551110267639, "learning_rate": 0.0002314737539750046, "loss": 2.6923, "step": 35098 }, { "epoch": 1.72, "grad_norm": 0.5764786005020142, "learning_rate": 0.0002314587649117241, "loss": 2.9678, "step": 35099 }, { "epoch": 1.72, "grad_norm": 0.6361105442047119, "learning_rate": 0.000231443776028968, "loss": 3.2081, "step": 35100 }, { "epoch": 1.72, "grad_norm": 0.5919163227081299, "learning_rate": 0.00023142878732677566, "loss": 3.0231, "step": 35101 }, { "epoch": 1.72, "grad_norm": 0.6047539114952087, "learning_rate": 0.00023141379880518685, "loss": 3.0358, "step": 35102 }, { "epoch": 1.72, "grad_norm": 0.6241350173950195, "learning_rate": 0.00023139881046424079, "loss": 2.9605, "step": 35103 }, { "epoch": 1.72, "grad_norm": 0.6277053356170654, "learning_rate": 0.00023138382230397718, "loss": 3.0339, "step": 35104 }, { "epoch": 1.72, "grad_norm": 0.6247433423995972, "learning_rate": 0.0002313688343244353, "loss": 2.979, "step": 35105 }, { "epoch": 1.72, "grad_norm": 0.5854073762893677, "learning_rate": 0.00023135384652565465, "loss": 2.9685, "step": 35106 }, { "epoch": 1.72, "grad_norm": 0.6047874093055725, "learning_rate": 0.0002313388589076749, "loss": 3.2763, "step": 35107 }, { "epoch": 1.72, "grad_norm": 0.5953376293182373, "learning_rate": 0.00023132387147053527, "loss": 2.8974, "step": 35108 }, { "epoch": 1.72, "grad_norm": 0.5756540894508362, "learning_rate": 0.00023130888421427545, "loss": 3.0288, "step": 35109 }, { "epoch": 1.72, "grad_norm": 0.6189519166946411, "learning_rate": 0.00023129389713893466, "loss": 2.7638, "step": 35110 }, { "epoch": 1.72, "grad_norm": 0.5780520439147949, "learning_rate": 0.0002312789102445526, "loss": 3.0979, "step": 35111 }, { "epoch": 1.72, "grad_norm": 0.6746885180473328, "learning_rate": 0.00023126392353116872, "loss": 3.0076, "step": 35112 }, { "epoch": 1.72, "grad_norm": 0.6331716179847717, "learning_rate": 0.00023124893699882231, "loss": 3.2743, "step": 35113 }, { "epoch": 1.72, "grad_norm": 0.6570529937744141, "learning_rate": 0.00023123395064755312, "loss": 3.0482, "step": 35114 }, { "epoch": 1.72, "grad_norm": 0.6318064332008362, "learning_rate": 0.00023121896447740036, "loss": 3.0843, "step": 35115 }, { "epoch": 1.72, "grad_norm": 0.6018270254135132, "learning_rate": 0.00023120397848840359, "loss": 2.9797, "step": 35116 }, { "epoch": 1.72, "grad_norm": 0.5887584090232849, "learning_rate": 0.00023118899268060241, "loss": 3.1461, "step": 35117 }, { "epoch": 1.72, "grad_norm": 0.6415103077888489, "learning_rate": 0.00023117400705403613, "loss": 3.0801, "step": 35118 }, { "epoch": 1.72, "grad_norm": 0.6064682006835938, "learning_rate": 0.0002311590216087443, "loss": 2.779, "step": 35119 }, { "epoch": 1.72, "grad_norm": 0.6046452522277832, "learning_rate": 0.00023114403634476625, "loss": 2.99, "step": 35120 }, { "epoch": 1.72, "grad_norm": 0.6241592764854431, "learning_rate": 0.00023112905126214156, "loss": 2.903, "step": 35121 }, { "epoch": 1.72, "grad_norm": 0.6440034508705139, "learning_rate": 0.00023111406636090983, "loss": 2.928, "step": 35122 }, { "epoch": 1.72, "grad_norm": 0.6371393799781799, "learning_rate": 0.0002310990816411103, "loss": 3.198, "step": 35123 }, { "epoch": 1.72, "grad_norm": 0.635123610496521, "learning_rate": 0.00023108409710278263, "loss": 2.8926, "step": 35124 }, { "epoch": 1.72, "grad_norm": 0.6079972386360168, "learning_rate": 0.000231069112745966, "loss": 2.9344, "step": 35125 }, { "epoch": 1.72, "grad_norm": 0.5995076298713684, "learning_rate": 0.0002310541285707002, "loss": 2.9177, "step": 35126 }, { "epoch": 1.72, "grad_norm": 0.6506484746932983, "learning_rate": 0.00023103914457702455, "loss": 3.03, "step": 35127 }, { "epoch": 1.72, "grad_norm": 0.6203939914703369, "learning_rate": 0.00023102416076497843, "loss": 3.0106, "step": 35128 }, { "epoch": 1.72, "grad_norm": 0.6446170210838318, "learning_rate": 0.00023100917713460154, "loss": 3.1954, "step": 35129 }, { "epoch": 1.72, "grad_norm": 0.676665723323822, "learning_rate": 0.00023099419368593311, "loss": 2.9198, "step": 35130 }, { "epoch": 1.72, "grad_norm": 0.6401715278625488, "learning_rate": 0.00023097921041901275, "loss": 3.1917, "step": 35131 }, { "epoch": 1.72, "grad_norm": 0.6062941551208496, "learning_rate": 0.00023096422733387978, "loss": 2.7867, "step": 35132 }, { "epoch": 1.72, "grad_norm": 0.574828565120697, "learning_rate": 0.00023094924443057378, "loss": 2.9809, "step": 35133 }, { "epoch": 1.72, "grad_norm": 0.6718885898590088, "learning_rate": 0.00023093426170913426, "loss": 3.095, "step": 35134 }, { "epoch": 1.72, "grad_norm": 0.595146656036377, "learning_rate": 0.00023091927916960042, "loss": 2.8781, "step": 35135 }, { "epoch": 1.72, "grad_norm": 0.6414282321929932, "learning_rate": 0.0002309042968120121, "loss": 2.8138, "step": 35136 }, { "epoch": 1.72, "grad_norm": 0.6145898699760437, "learning_rate": 0.00023088931463640847, "loss": 2.856, "step": 35137 }, { "epoch": 1.72, "grad_norm": 0.6160130500793457, "learning_rate": 0.00023087433264282905, "loss": 2.7365, "step": 35138 }, { "epoch": 1.72, "grad_norm": 0.6244746446609497, "learning_rate": 0.00023085935083131342, "loss": 2.838, "step": 35139 }, { "epoch": 1.72, "grad_norm": 0.6224129796028137, "learning_rate": 0.00023084436920190092, "loss": 2.8034, "step": 35140 }, { "epoch": 1.72, "grad_norm": 0.6752472519874573, "learning_rate": 0.0002308293877546311, "loss": 2.9116, "step": 35141 }, { "epoch": 1.72, "grad_norm": 0.6341972947120667, "learning_rate": 0.00023081440648954323, "loss": 2.8963, "step": 35142 }, { "epoch": 1.72, "grad_norm": 0.6429397463798523, "learning_rate": 0.00023079942540667688, "loss": 3.1747, "step": 35143 }, { "epoch": 1.72, "grad_norm": 0.6000608205795288, "learning_rate": 0.00023078444450607164, "loss": 2.8118, "step": 35144 }, { "epoch": 1.72, "grad_norm": 0.6079022884368896, "learning_rate": 0.00023076946378776687, "loss": 2.9218, "step": 35145 }, { "epoch": 1.72, "grad_norm": 0.6264728307723999, "learning_rate": 0.00023075448325180203, "loss": 2.8652, "step": 35146 }, { "epoch": 1.72, "grad_norm": 0.6049911379814148, "learning_rate": 0.00023073950289821638, "loss": 3.0077, "step": 35147 }, { "epoch": 1.72, "grad_norm": 0.6240792870521545, "learning_rate": 0.00023072452272704966, "loss": 3.0171, "step": 35148 }, { "epoch": 1.72, "grad_norm": 0.6277632117271423, "learning_rate": 0.00023070954273834128, "loss": 2.9082, "step": 35149 }, { "epoch": 1.72, "grad_norm": 0.5884895324707031, "learning_rate": 0.0002306945629321305, "loss": 3.139, "step": 35150 }, { "epoch": 1.72, "grad_norm": 0.6733899712562561, "learning_rate": 0.00023067958330845703, "loss": 3.0002, "step": 35151 }, { "epoch": 1.72, "grad_norm": 0.6554712057113647, "learning_rate": 0.0002306646038673601, "loss": 3.1105, "step": 35152 }, { "epoch": 1.72, "grad_norm": 0.6073846817016602, "learning_rate": 0.00023064962460887925, "loss": 2.9152, "step": 35153 }, { "epoch": 1.72, "grad_norm": 0.5951540470123291, "learning_rate": 0.000230634645533054, "loss": 3.0983, "step": 35154 }, { "epoch": 1.72, "grad_norm": 0.6558448672294617, "learning_rate": 0.00023061966663992376, "loss": 2.9839, "step": 35155 }, { "epoch": 1.72, "grad_norm": 0.632959246635437, "learning_rate": 0.00023060468792952796, "loss": 3.2383, "step": 35156 }, { "epoch": 1.72, "grad_norm": 0.6814397573471069, "learning_rate": 0.00023058970940190596, "loss": 2.7762, "step": 35157 }, { "epoch": 1.72, "grad_norm": 0.62791907787323, "learning_rate": 0.0002305747310570974, "loss": 3.0783, "step": 35158 }, { "epoch": 1.72, "grad_norm": 0.6006518602371216, "learning_rate": 0.00023055975289514166, "loss": 3.2219, "step": 35159 }, { "epoch": 1.72, "grad_norm": 0.6083630919456482, "learning_rate": 0.00023054477491607804, "loss": 3.1753, "step": 35160 }, { "epoch": 1.72, "grad_norm": 0.5891106128692627, "learning_rate": 0.00023052979711994624, "loss": 2.9886, "step": 35161 }, { "epoch": 1.72, "grad_norm": 0.6096766591072083, "learning_rate": 0.00023051481950678545, "loss": 3.0412, "step": 35162 }, { "epoch": 1.72, "grad_norm": 0.64244544506073, "learning_rate": 0.00023049984207663537, "loss": 2.8953, "step": 35163 }, { "epoch": 1.72, "grad_norm": 0.6248708963394165, "learning_rate": 0.00023048486482953536, "loss": 3.0307, "step": 35164 }, { "epoch": 1.72, "grad_norm": 0.6246823072433472, "learning_rate": 0.00023046988776552469, "loss": 3.14, "step": 35165 }, { "epoch": 1.72, "grad_norm": 0.6114916205406189, "learning_rate": 0.00023045491088464313, "loss": 3.003, "step": 35166 }, { "epoch": 1.72, "grad_norm": 0.6664708256721497, "learning_rate": 0.00023043993418692986, "loss": 2.9394, "step": 35167 }, { "epoch": 1.72, "grad_norm": 0.6526961326599121, "learning_rate": 0.00023042495767242434, "loss": 3.0523, "step": 35168 }, { "epoch": 1.72, "grad_norm": 0.637554943561554, "learning_rate": 0.00023040998134116623, "loss": 3.157, "step": 35169 }, { "epoch": 1.72, "grad_norm": 0.6036433577537537, "learning_rate": 0.00023039500519319477, "loss": 3.2725, "step": 35170 }, { "epoch": 1.72, "grad_norm": 0.6289345622062683, "learning_rate": 0.00023038002922854957, "loss": 3.088, "step": 35171 }, { "epoch": 1.72, "grad_norm": 0.6497759222984314, "learning_rate": 0.0002303650534472698, "loss": 3.1527, "step": 35172 }, { "epoch": 1.72, "grad_norm": 0.6652274131774902, "learning_rate": 0.00023035007784939516, "loss": 2.8838, "step": 35173 }, { "epoch": 1.72, "grad_norm": 0.6045557260513306, "learning_rate": 0.00023033510243496507, "loss": 2.9615, "step": 35174 }, { "epoch": 1.72, "grad_norm": 0.6381337642669678, "learning_rate": 0.00023032012720401878, "loss": 3.0682, "step": 35175 }, { "epoch": 1.72, "grad_norm": 0.6169887781143188, "learning_rate": 0.000230305152156596, "loss": 3.2183, "step": 35176 }, { "epoch": 1.72, "grad_norm": 0.6736177206039429, "learning_rate": 0.00023029017729273596, "loss": 2.9374, "step": 35177 }, { "epoch": 1.72, "grad_norm": 0.6414783596992493, "learning_rate": 0.00023027520261247812, "loss": 3.0381, "step": 35178 }, { "epoch": 1.72, "grad_norm": 0.6434562802314758, "learning_rate": 0.00023026022811586215, "loss": 2.8393, "step": 35179 }, { "epoch": 1.72, "grad_norm": 0.6093327403068542, "learning_rate": 0.00023024525380292718, "loss": 3.0741, "step": 35180 }, { "epoch": 1.72, "grad_norm": 0.5809537172317505, "learning_rate": 0.0002302302796737129, "loss": 3.0564, "step": 35181 }, { "epoch": 1.72, "grad_norm": 0.635099470615387, "learning_rate": 0.00023021530572825847, "loss": 3.1763, "step": 35182 }, { "epoch": 1.72, "grad_norm": 0.6042500734329224, "learning_rate": 0.00023020033196660355, "loss": 3.0027, "step": 35183 }, { "epoch": 1.72, "grad_norm": 0.6080394387245178, "learning_rate": 0.00023018535838878758, "loss": 3.0439, "step": 35184 }, { "epoch": 1.72, "grad_norm": 0.666522204875946, "learning_rate": 0.00023017038499484994, "loss": 3.0731, "step": 35185 }, { "epoch": 1.72, "grad_norm": 0.6572619080543518, "learning_rate": 0.0002301554117848301, "loss": 3.0514, "step": 35186 }, { "epoch": 1.72, "grad_norm": 0.6002278327941895, "learning_rate": 0.00023014043875876734, "loss": 2.9164, "step": 35187 }, { "epoch": 1.72, "grad_norm": 0.6185051202774048, "learning_rate": 0.00023012546591670127, "loss": 2.9961, "step": 35188 }, { "epoch": 1.72, "grad_norm": 0.61963951587677, "learning_rate": 0.00023011049325867135, "loss": 2.9879, "step": 35189 }, { "epoch": 1.72, "grad_norm": 0.5980280041694641, "learning_rate": 0.0002300955207847168, "loss": 3.2725, "step": 35190 }, { "epoch": 1.72, "grad_norm": 0.5959213972091675, "learning_rate": 0.00023008054849487733, "loss": 3.1077, "step": 35191 }, { "epoch": 1.72, "grad_norm": 0.5841280221939087, "learning_rate": 0.00023006557638919223, "loss": 3.0087, "step": 35192 }, { "epoch": 1.72, "grad_norm": 0.618198573589325, "learning_rate": 0.0002300506044677008, "loss": 2.9676, "step": 35193 }, { "epoch": 1.72, "grad_norm": 0.6227092742919922, "learning_rate": 0.00023003563273044276, "loss": 3.1052, "step": 35194 }, { "epoch": 1.72, "grad_norm": 0.6252568364143372, "learning_rate": 0.00023002066117745737, "loss": 3.0911, "step": 35195 }, { "epoch": 1.72, "grad_norm": 0.623429536819458, "learning_rate": 0.00023000568980878415, "loss": 2.9064, "step": 35196 }, { "epoch": 1.72, "grad_norm": 0.6618231534957886, "learning_rate": 0.0002299907186244623, "loss": 3.1137, "step": 35197 }, { "epoch": 1.72, "grad_norm": 0.6143259406089783, "learning_rate": 0.00022997574762453156, "loss": 3.0053, "step": 35198 }, { "epoch": 1.73, "grad_norm": 0.6106548309326172, "learning_rate": 0.0002299607768090312, "loss": 3.0564, "step": 35199 }, { "epoch": 1.73, "grad_norm": 0.595967710018158, "learning_rate": 0.0002299458061780006, "loss": 2.9528, "step": 35200 }, { "epoch": 1.73, "grad_norm": 0.6516256928443909, "learning_rate": 0.0002299308357314794, "loss": 3.0523, "step": 35201 }, { "epoch": 1.73, "grad_norm": 0.6277573704719543, "learning_rate": 0.0002299158654695068, "loss": 3.1288, "step": 35202 }, { "epoch": 1.73, "grad_norm": 0.6570903062820435, "learning_rate": 0.0002299008953921223, "loss": 2.9566, "step": 35203 }, { "epoch": 1.73, "grad_norm": 0.638201892375946, "learning_rate": 0.00022988592549936542, "loss": 2.756, "step": 35204 }, { "epoch": 1.73, "grad_norm": 0.6191340088844299, "learning_rate": 0.00022987095579127544, "loss": 3.0523, "step": 35205 }, { "epoch": 1.73, "grad_norm": 0.6047921776771545, "learning_rate": 0.00022985598626789196, "loss": 3.0704, "step": 35206 }, { "epoch": 1.73, "grad_norm": 0.6013028025627136, "learning_rate": 0.00022984101692925425, "loss": 3.1367, "step": 35207 }, { "epoch": 1.73, "grad_norm": 0.681874692440033, "learning_rate": 0.00022982604777540175, "loss": 3.125, "step": 35208 }, { "epoch": 1.73, "grad_norm": 0.6835533976554871, "learning_rate": 0.0002298110788063741, "loss": 2.946, "step": 35209 }, { "epoch": 1.73, "grad_norm": 0.6232330799102783, "learning_rate": 0.00022979611002221044, "loss": 2.9882, "step": 35210 }, { "epoch": 1.73, "grad_norm": 0.6557362675666809, "learning_rate": 0.00022978114142295036, "loss": 2.8999, "step": 35211 }, { "epoch": 1.73, "grad_norm": 0.6331151127815247, "learning_rate": 0.00022976617300863317, "loss": 3.0894, "step": 35212 }, { "epoch": 1.73, "grad_norm": 0.6033350825309753, "learning_rate": 0.00022975120477929846, "loss": 2.892, "step": 35213 }, { "epoch": 1.73, "grad_norm": 0.64558345079422, "learning_rate": 0.0002297362367349855, "loss": 2.8062, "step": 35214 }, { "epoch": 1.73, "grad_norm": 0.6405138373374939, "learning_rate": 0.00022972126887573372, "loss": 2.9401, "step": 35215 }, { "epoch": 1.73, "grad_norm": 0.6379008293151855, "learning_rate": 0.0002297063012015827, "loss": 3.1785, "step": 35216 }, { "epoch": 1.73, "grad_norm": 0.6051523089408875, "learning_rate": 0.0002296913337125717, "loss": 3.099, "step": 35217 }, { "epoch": 1.73, "grad_norm": 0.6115164160728455, "learning_rate": 0.00022967636640874026, "loss": 2.9392, "step": 35218 }, { "epoch": 1.73, "grad_norm": 0.6115735769271851, "learning_rate": 0.0002296613992901276, "loss": 3.0127, "step": 35219 }, { "epoch": 1.73, "grad_norm": 0.6742574572563171, "learning_rate": 0.00022964643235677337, "loss": 3.2632, "step": 35220 }, { "epoch": 1.73, "grad_norm": 0.6841800212860107, "learning_rate": 0.00022963146560871692, "loss": 2.976, "step": 35221 }, { "epoch": 1.73, "grad_norm": 0.6270367503166199, "learning_rate": 0.00022961649904599754, "loss": 3.1518, "step": 35222 }, { "epoch": 1.73, "grad_norm": 0.6001424193382263, "learning_rate": 0.00022960153266865488, "loss": 3.0636, "step": 35223 }, { "epoch": 1.73, "grad_norm": 0.5948289632797241, "learning_rate": 0.0002295865664767281, "loss": 3.0079, "step": 35224 }, { "epoch": 1.73, "grad_norm": 0.6431679725646973, "learning_rate": 0.00022957160047025683, "loss": 2.9789, "step": 35225 }, { "epoch": 1.73, "grad_norm": 0.6434341669082642, "learning_rate": 0.00022955663464928048, "loss": 3.2493, "step": 35226 }, { "epoch": 1.73, "grad_norm": 0.6969561576843262, "learning_rate": 0.00022954166901383827, "loss": 2.903, "step": 35227 }, { "epoch": 1.73, "grad_norm": 0.7470771074295044, "learning_rate": 0.00022952670356396986, "loss": 3.0102, "step": 35228 }, { "epoch": 1.73, "grad_norm": 0.6371145844459534, "learning_rate": 0.00022951173829971447, "loss": 2.9863, "step": 35229 }, { "epoch": 1.73, "grad_norm": 0.6292941570281982, "learning_rate": 0.00022949677322111155, "loss": 3.1331, "step": 35230 }, { "epoch": 1.73, "grad_norm": 0.6096308827400208, "learning_rate": 0.0002294818083282007, "loss": 3.084, "step": 35231 }, { "epoch": 1.73, "grad_norm": 0.6575873494148254, "learning_rate": 0.0002294668436210211, "loss": 3.0223, "step": 35232 }, { "epoch": 1.73, "grad_norm": 0.6215528845787048, "learning_rate": 0.00022945187909961237, "loss": 2.9783, "step": 35233 }, { "epoch": 1.73, "grad_norm": 0.6451046466827393, "learning_rate": 0.00022943691476401363, "loss": 2.8736, "step": 35234 }, { "epoch": 1.73, "grad_norm": 0.5992505550384521, "learning_rate": 0.0002294219506142646, "loss": 3.1373, "step": 35235 }, { "epoch": 1.73, "grad_norm": 0.6010803580284119, "learning_rate": 0.00022940698665040459, "loss": 2.8851, "step": 35236 }, { "epoch": 1.73, "grad_norm": 0.6390721201896667, "learning_rate": 0.00022939202287247285, "loss": 3.0608, "step": 35237 }, { "epoch": 1.73, "grad_norm": 0.6065561771392822, "learning_rate": 0.00022937705928050911, "loss": 2.9655, "step": 35238 }, { "epoch": 1.73, "grad_norm": 0.6406224370002747, "learning_rate": 0.0002293620958745525, "loss": 3.0668, "step": 35239 }, { "epoch": 1.73, "grad_norm": 0.6230671405792236, "learning_rate": 0.00022934713265464248, "loss": 2.97, "step": 35240 }, { "epoch": 1.73, "grad_norm": 0.6472780704498291, "learning_rate": 0.00022933216962081867, "loss": 3.097, "step": 35241 }, { "epoch": 1.73, "grad_norm": 0.6151770949363708, "learning_rate": 0.00022931720677312023, "loss": 2.9176, "step": 35242 }, { "epoch": 1.73, "grad_norm": 0.6394203901290894, "learning_rate": 0.00022930224411158673, "loss": 2.904, "step": 35243 }, { "epoch": 1.73, "grad_norm": 0.5917284488677979, "learning_rate": 0.0002292872816362574, "loss": 3.172, "step": 35244 }, { "epoch": 1.73, "grad_norm": 0.6539353728294373, "learning_rate": 0.00022927231934717176, "loss": 3.0997, "step": 35245 }, { "epoch": 1.73, "grad_norm": 0.592643678188324, "learning_rate": 0.00022925735724436933, "loss": 3.1776, "step": 35246 }, { "epoch": 1.73, "grad_norm": 0.6221964955329895, "learning_rate": 0.00022924239532788935, "loss": 3.0839, "step": 35247 }, { "epoch": 1.73, "grad_norm": 0.6389697194099426, "learning_rate": 0.00022922743359777138, "loss": 3.1729, "step": 35248 }, { "epoch": 1.73, "grad_norm": 0.6145962476730347, "learning_rate": 0.00022921247205405458, "loss": 3.2908, "step": 35249 }, { "epoch": 1.73, "grad_norm": 0.6351203322410583, "learning_rate": 0.00022919751069677856, "loss": 3.214, "step": 35250 }, { "epoch": 1.73, "grad_norm": 0.6602262258529663, "learning_rate": 0.00022918254952598272, "loss": 2.904, "step": 35251 }, { "epoch": 1.73, "grad_norm": 0.6813586354255676, "learning_rate": 0.0002291675885417063, "loss": 2.9873, "step": 35252 }, { "epoch": 1.73, "grad_norm": 0.6100876331329346, "learning_rate": 0.00022915262774398897, "loss": 3.0853, "step": 35253 }, { "epoch": 1.73, "grad_norm": 0.614741861820221, "learning_rate": 0.0002291376671328699, "loss": 3.0462, "step": 35254 }, { "epoch": 1.73, "grad_norm": 0.629533588886261, "learning_rate": 0.0002291227067083885, "loss": 2.9814, "step": 35255 }, { "epoch": 1.73, "grad_norm": 0.6625435948371887, "learning_rate": 0.0002291077464705844, "loss": 3.1346, "step": 35256 }, { "epoch": 1.73, "grad_norm": 0.6223627924919128, "learning_rate": 0.0002290927864194968, "loss": 2.9611, "step": 35257 }, { "epoch": 1.73, "grad_norm": 0.5756930112838745, "learning_rate": 0.0002290778265551652, "loss": 2.9672, "step": 35258 }, { "epoch": 1.73, "grad_norm": 0.6262645125389099, "learning_rate": 0.0002290628668776288, "loss": 2.8283, "step": 35259 }, { "epoch": 1.73, "grad_norm": 0.6052729487419128, "learning_rate": 0.00022904790738692725, "loss": 2.8735, "step": 35260 }, { "epoch": 1.73, "grad_norm": 0.6487675309181213, "learning_rate": 0.0002290329480830999, "loss": 2.8281, "step": 35261 }, { "epoch": 1.73, "grad_norm": 0.6117686629295349, "learning_rate": 0.00022901798896618602, "loss": 3.0395, "step": 35262 }, { "epoch": 1.73, "grad_norm": 0.602601170539856, "learning_rate": 0.0002290030300362252, "loss": 2.974, "step": 35263 }, { "epoch": 1.73, "grad_norm": 0.6140736937522888, "learning_rate": 0.00022898807129325665, "loss": 2.8676, "step": 35264 }, { "epoch": 1.73, "grad_norm": 0.6174287796020508, "learning_rate": 0.00022897311273731985, "loss": 3.0077, "step": 35265 }, { "epoch": 1.73, "grad_norm": 0.6516817212104797, "learning_rate": 0.00022895815436845427, "loss": 3.1008, "step": 35266 }, { "epoch": 1.73, "grad_norm": 0.6091328859329224, "learning_rate": 0.0002289431961866991, "loss": 2.8788, "step": 35267 }, { "epoch": 1.73, "grad_norm": 0.6192858815193176, "learning_rate": 0.00022892823819209407, "loss": 2.9493, "step": 35268 }, { "epoch": 1.73, "grad_norm": 0.641791820526123, "learning_rate": 0.0002289132803846783, "loss": 3.0263, "step": 35269 }, { "epoch": 1.73, "grad_norm": 0.6411734223365784, "learning_rate": 0.00022889832276449116, "loss": 3.1047, "step": 35270 }, { "epoch": 1.73, "grad_norm": 0.6926196217536926, "learning_rate": 0.00022888336533157232, "loss": 3.1334, "step": 35271 }, { "epoch": 1.73, "grad_norm": 0.622738778591156, "learning_rate": 0.00022886840808596096, "loss": 3.1372, "step": 35272 }, { "epoch": 1.73, "grad_norm": 0.5840880870819092, "learning_rate": 0.00022885345102769655, "loss": 3.0984, "step": 35273 }, { "epoch": 1.73, "grad_norm": 0.60886549949646, "learning_rate": 0.00022883849415681834, "loss": 2.9715, "step": 35274 }, { "epoch": 1.73, "grad_norm": 0.6631279587745667, "learning_rate": 0.00022882353747336593, "loss": 2.959, "step": 35275 }, { "epoch": 1.73, "grad_norm": 0.6146132946014404, "learning_rate": 0.00022880858097737864, "loss": 2.9739, "step": 35276 }, { "epoch": 1.73, "grad_norm": 0.6303625702857971, "learning_rate": 0.00022879362466889577, "loss": 3.0501, "step": 35277 }, { "epoch": 1.73, "grad_norm": 0.6738470792770386, "learning_rate": 0.0002287786685479569, "loss": 2.9648, "step": 35278 }, { "epoch": 1.73, "grad_norm": 0.6016108989715576, "learning_rate": 0.00022876371261460122, "loss": 3.1268, "step": 35279 }, { "epoch": 1.73, "grad_norm": 0.6048367023468018, "learning_rate": 0.00022874875686886817, "loss": 3.1259, "step": 35280 }, { "epoch": 1.73, "grad_norm": 0.6452575922012329, "learning_rate": 0.0002287338013107973, "loss": 3.1736, "step": 35281 }, { "epoch": 1.73, "grad_norm": 0.6293025612831116, "learning_rate": 0.00022871884594042783, "loss": 2.9434, "step": 35282 }, { "epoch": 1.73, "grad_norm": 0.6301412582397461, "learning_rate": 0.00022870389075779925, "loss": 2.8099, "step": 35283 }, { "epoch": 1.73, "grad_norm": 0.6559537053108215, "learning_rate": 0.0002286889357629508, "loss": 2.8507, "step": 35284 }, { "epoch": 1.73, "grad_norm": 0.6444092392921448, "learning_rate": 0.00022867398095592194, "loss": 2.8865, "step": 35285 }, { "epoch": 1.73, "grad_norm": 0.5924097895622253, "learning_rate": 0.0002286590263367522, "loss": 3.3732, "step": 35286 }, { "epoch": 1.73, "grad_norm": 0.6571818590164185, "learning_rate": 0.00022864407190548085, "loss": 3.0194, "step": 35287 }, { "epoch": 1.73, "grad_norm": 0.6414764523506165, "learning_rate": 0.00022862911766214733, "loss": 2.895, "step": 35288 }, { "epoch": 1.73, "grad_norm": 0.5858622789382935, "learning_rate": 0.00022861416360679082, "loss": 2.973, "step": 35289 }, { "epoch": 1.73, "grad_norm": 0.6514439582824707, "learning_rate": 0.00022859920973945097, "loss": 2.8787, "step": 35290 }, { "epoch": 1.73, "grad_norm": 0.6287581324577332, "learning_rate": 0.0002285842560601671, "loss": 3.1014, "step": 35291 }, { "epoch": 1.73, "grad_norm": 0.6182932257652283, "learning_rate": 0.0002285693025689784, "loss": 3.1868, "step": 35292 }, { "epoch": 1.73, "grad_norm": 0.739643394947052, "learning_rate": 0.00022855434926592454, "loss": 2.7995, "step": 35293 }, { "epoch": 1.73, "grad_norm": 0.6869654655456543, "learning_rate": 0.00022853939615104476, "loss": 2.9529, "step": 35294 }, { "epoch": 1.73, "grad_norm": 0.6125555038452148, "learning_rate": 0.00022852444322437847, "loss": 2.8912, "step": 35295 }, { "epoch": 1.73, "grad_norm": 0.5996557474136353, "learning_rate": 0.00022850949048596492, "loss": 3.0872, "step": 35296 }, { "epoch": 1.73, "grad_norm": 0.6288072466850281, "learning_rate": 0.00022849453793584372, "loss": 2.9651, "step": 35297 }, { "epoch": 1.73, "grad_norm": 0.6631354093551636, "learning_rate": 0.00022847958557405416, "loss": 2.9076, "step": 35298 }, { "epoch": 1.73, "grad_norm": 0.6144710779190063, "learning_rate": 0.0002284646334006355, "loss": 2.9343, "step": 35299 }, { "epoch": 1.73, "grad_norm": 0.6347219347953796, "learning_rate": 0.00022844968141562738, "loss": 3.1248, "step": 35300 }, { "epoch": 1.73, "grad_norm": 0.6205817461013794, "learning_rate": 0.00022843472961906893, "loss": 2.8253, "step": 35301 }, { "epoch": 1.73, "grad_norm": 0.6230130791664124, "learning_rate": 0.0002284197780109996, "loss": 2.9309, "step": 35302 }, { "epoch": 1.73, "grad_norm": 0.673992931842804, "learning_rate": 0.0002284048265914589, "loss": 3.2319, "step": 35303 }, { "epoch": 1.73, "grad_norm": 0.7464253902435303, "learning_rate": 0.00022838987536048604, "loss": 3.1167, "step": 35304 }, { "epoch": 1.73, "grad_norm": 0.6007848381996155, "learning_rate": 0.00022837492431812054, "loss": 2.8541, "step": 35305 }, { "epoch": 1.73, "grad_norm": 0.6575406789779663, "learning_rate": 0.0002283599734644016, "loss": 3.1411, "step": 35306 }, { "epoch": 1.73, "grad_norm": 0.5982678532600403, "learning_rate": 0.00022834502279936867, "loss": 2.9991, "step": 35307 }, { "epoch": 1.73, "grad_norm": 0.6275074481964111, "learning_rate": 0.00022833007232306128, "loss": 2.88, "step": 35308 }, { "epoch": 1.73, "grad_norm": 0.6013560891151428, "learning_rate": 0.00022831512203551864, "loss": 2.8328, "step": 35309 }, { "epoch": 1.73, "grad_norm": 0.6640160083770752, "learning_rate": 0.00022830017193678024, "loss": 2.9587, "step": 35310 }, { "epoch": 1.73, "grad_norm": 0.6699552536010742, "learning_rate": 0.00022828522202688527, "loss": 3.1212, "step": 35311 }, { "epoch": 1.73, "grad_norm": 0.6032391786575317, "learning_rate": 0.00022827027230587328, "loss": 2.9259, "step": 35312 }, { "epoch": 1.73, "grad_norm": 0.6313595175743103, "learning_rate": 0.00022825532277378363, "loss": 3.02, "step": 35313 }, { "epoch": 1.73, "grad_norm": 0.6690878868103027, "learning_rate": 0.00022824037343065556, "loss": 3.0762, "step": 35314 }, { "epoch": 1.73, "grad_norm": 0.5943594574928284, "learning_rate": 0.00022822542427652863, "loss": 2.8547, "step": 35315 }, { "epoch": 1.73, "grad_norm": 0.6819391846656799, "learning_rate": 0.00022821047531144207, "loss": 3.2038, "step": 35316 }, { "epoch": 1.73, "grad_norm": 0.6711174845695496, "learning_rate": 0.00022819552653543525, "loss": 2.8025, "step": 35317 }, { "epoch": 1.73, "grad_norm": 0.667477011680603, "learning_rate": 0.0002281805779485477, "loss": 3.1893, "step": 35318 }, { "epoch": 1.73, "grad_norm": 0.6368096470832825, "learning_rate": 0.0002281656295508186, "loss": 3.1114, "step": 35319 }, { "epoch": 1.73, "grad_norm": 0.6909788846969604, "learning_rate": 0.0002281506813422875, "loss": 2.9798, "step": 35320 }, { "epoch": 1.73, "grad_norm": 0.6262080073356628, "learning_rate": 0.0002281357333229936, "loss": 3.0459, "step": 35321 }, { "epoch": 1.73, "grad_norm": 0.6632434725761414, "learning_rate": 0.00022812078549297633, "loss": 3.0869, "step": 35322 }, { "epoch": 1.73, "grad_norm": 0.6851413249969482, "learning_rate": 0.0002281058378522752, "loss": 2.9552, "step": 35323 }, { "epoch": 1.73, "grad_norm": 0.6063692569732666, "learning_rate": 0.0002280908904009293, "loss": 2.8897, "step": 35324 }, { "epoch": 1.73, "grad_norm": 0.6380870342254639, "learning_rate": 0.00022807594313897827, "loss": 3.108, "step": 35325 }, { "epoch": 1.73, "grad_norm": 0.6056703925132751, "learning_rate": 0.00022806099606646125, "loss": 3.0071, "step": 35326 }, { "epoch": 1.73, "grad_norm": 0.6386041045188904, "learning_rate": 0.0002280460491834178, "loss": 3.0436, "step": 35327 }, { "epoch": 1.73, "grad_norm": 0.6316136717796326, "learning_rate": 0.00022803110248988725, "loss": 2.9004, "step": 35328 }, { "epoch": 1.73, "grad_norm": 0.624875545501709, "learning_rate": 0.0002280161559859088, "loss": 3.0124, "step": 35329 }, { "epoch": 1.73, "grad_norm": 0.6380939483642578, "learning_rate": 0.0002280012096715221, "loss": 3.0695, "step": 35330 }, { "epoch": 1.73, "grad_norm": 0.6007676720619202, "learning_rate": 0.00022798626354676626, "loss": 3.1734, "step": 35331 }, { "epoch": 1.73, "grad_norm": 0.6163882613182068, "learning_rate": 0.00022797131761168068, "loss": 2.8966, "step": 35332 }, { "epoch": 1.73, "grad_norm": 0.6753647327423096, "learning_rate": 0.00022795637186630494, "loss": 2.9465, "step": 35333 }, { "epoch": 1.73, "grad_norm": 0.5936952233314514, "learning_rate": 0.00022794142631067814, "loss": 2.8909, "step": 35334 }, { "epoch": 1.73, "grad_norm": 0.6279860734939575, "learning_rate": 0.00022792648094483987, "loss": 3.0901, "step": 35335 }, { "epoch": 1.73, "grad_norm": 0.6184460520744324, "learning_rate": 0.00022791153576882922, "loss": 2.9562, "step": 35336 }, { "epoch": 1.73, "grad_norm": 0.6539486646652222, "learning_rate": 0.0002278965907826858, "loss": 2.9344, "step": 35337 }, { "epoch": 1.73, "grad_norm": 0.6066672205924988, "learning_rate": 0.00022788164598644894, "loss": 3.0856, "step": 35338 }, { "epoch": 1.73, "grad_norm": 0.613047182559967, "learning_rate": 0.00022786670138015783, "loss": 3.1087, "step": 35339 }, { "epoch": 1.73, "grad_norm": 0.6032065749168396, "learning_rate": 0.00022785175696385203, "loss": 3.0789, "step": 35340 }, { "epoch": 1.73, "grad_norm": 0.6134180426597595, "learning_rate": 0.00022783681273757077, "loss": 3.1098, "step": 35341 }, { "epoch": 1.73, "grad_norm": 0.6650903820991516, "learning_rate": 0.00022782186870135343, "loss": 2.9518, "step": 35342 }, { "epoch": 1.73, "grad_norm": 0.6557191610336304, "learning_rate": 0.0002278069248552395, "loss": 2.8869, "step": 35343 }, { "epoch": 1.73, "grad_norm": 0.6179054975509644, "learning_rate": 0.00022779198119926819, "loss": 3.0837, "step": 35344 }, { "epoch": 1.73, "grad_norm": 0.6232442855834961, "learning_rate": 0.00022777703773347894, "loss": 3.1992, "step": 35345 }, { "epoch": 1.73, "grad_norm": 0.6513820290565491, "learning_rate": 0.00022776209445791093, "loss": 3.0261, "step": 35346 }, { "epoch": 1.73, "grad_norm": 0.6319857239723206, "learning_rate": 0.00022774715137260367, "loss": 2.9248, "step": 35347 }, { "epoch": 1.73, "grad_norm": 0.6437803506851196, "learning_rate": 0.00022773220847759667, "loss": 3.0184, "step": 35348 }, { "epoch": 1.73, "grad_norm": 0.645805299282074, "learning_rate": 0.00022771726577292903, "loss": 2.9881, "step": 35349 }, { "epoch": 1.73, "grad_norm": 0.6178550124168396, "learning_rate": 0.0002277023232586403, "loss": 3.097, "step": 35350 }, { "epoch": 1.73, "grad_norm": 0.6673634052276611, "learning_rate": 0.00022768738093476956, "loss": 3.2377, "step": 35351 }, { "epoch": 1.73, "grad_norm": 0.601243257522583, "learning_rate": 0.00022767243880135641, "loss": 2.6505, "step": 35352 }, { "epoch": 1.73, "grad_norm": 0.6067811846733093, "learning_rate": 0.00022765749685844025, "loss": 2.8767, "step": 35353 }, { "epoch": 1.73, "grad_norm": 0.5918362736701965, "learning_rate": 0.00022764255510606014, "loss": 2.9484, "step": 35354 }, { "epoch": 1.73, "grad_norm": 0.6136022806167603, "learning_rate": 0.00022762761354425576, "loss": 3.0941, "step": 35355 }, { "epoch": 1.73, "grad_norm": 0.6263452768325806, "learning_rate": 0.0002276126721730663, "loss": 2.9776, "step": 35356 }, { "epoch": 1.73, "grad_norm": 0.6397753953933716, "learning_rate": 0.000227597730992531, "loss": 2.8435, "step": 35357 }, { "epoch": 1.73, "grad_norm": 0.6426587700843811, "learning_rate": 0.0002275827900026895, "loss": 2.9557, "step": 35358 }, { "epoch": 1.73, "grad_norm": 0.6584495902061462, "learning_rate": 0.00022756784920358093, "loss": 2.8092, "step": 35359 }, { "epoch": 1.73, "grad_norm": 0.6029062271118164, "learning_rate": 0.00022755290859524476, "loss": 3.1722, "step": 35360 }, { "epoch": 1.73, "grad_norm": 0.5749930739402771, "learning_rate": 0.00022753796817772014, "loss": 2.9569, "step": 35361 }, { "epoch": 1.73, "grad_norm": 0.7005466222763062, "learning_rate": 0.00022752302795104665, "loss": 3.0455, "step": 35362 }, { "epoch": 1.73, "grad_norm": 0.6490772366523743, "learning_rate": 0.00022750808791526363, "loss": 2.8489, "step": 35363 }, { "epoch": 1.73, "grad_norm": 0.6193538904190063, "learning_rate": 0.00022749314807041023, "loss": 2.97, "step": 35364 }, { "epoch": 1.73, "grad_norm": 0.6467298865318298, "learning_rate": 0.00022747820841652607, "loss": 3.0202, "step": 35365 }, { "epoch": 1.73, "grad_norm": 0.5905973315238953, "learning_rate": 0.00022746326895365025, "loss": 3.1219, "step": 35366 }, { "epoch": 1.73, "grad_norm": 0.6227057576179504, "learning_rate": 0.00022744832968182228, "loss": 2.9582, "step": 35367 }, { "epoch": 1.73, "grad_norm": 0.6363484263420105, "learning_rate": 0.00022743339060108135, "loss": 3.2402, "step": 35368 }, { "epoch": 1.73, "grad_norm": 0.6553448438644409, "learning_rate": 0.00022741845171146685, "loss": 3.0608, "step": 35369 }, { "epoch": 1.73, "grad_norm": 0.6466527581214905, "learning_rate": 0.00022740351301301837, "loss": 3.0484, "step": 35370 }, { "epoch": 1.73, "grad_norm": 0.5950348377227783, "learning_rate": 0.00022738857450577496, "loss": 3.2043, "step": 35371 }, { "epoch": 1.73, "grad_norm": 0.6238282918930054, "learning_rate": 0.00022737363618977613, "loss": 2.9925, "step": 35372 }, { "epoch": 1.73, "grad_norm": 0.6248958110809326, "learning_rate": 0.00022735869806506106, "loss": 3.1858, "step": 35373 }, { "epoch": 1.73, "grad_norm": 0.7205170392990112, "learning_rate": 0.00022734376013166927, "loss": 2.9612, "step": 35374 }, { "epoch": 1.73, "grad_norm": 0.6162315607070923, "learning_rate": 0.0002273288223896401, "loss": 2.8959, "step": 35375 }, { "epoch": 1.73, "grad_norm": 0.6196463704109192, "learning_rate": 0.00022731388483901267, "loss": 3.0026, "step": 35376 }, { "epoch": 1.73, "grad_norm": 0.6567784547805786, "learning_rate": 0.00022729894747982663, "loss": 2.9925, "step": 35377 }, { "epoch": 1.73, "grad_norm": 0.6151658892631531, "learning_rate": 0.00022728401031212107, "loss": 3.1042, "step": 35378 }, { "epoch": 1.73, "grad_norm": 0.5919417142868042, "learning_rate": 0.0002272690733359354, "loss": 3.0079, "step": 35379 }, { "epoch": 1.73, "grad_norm": 0.6030619740486145, "learning_rate": 0.00022725413655130916, "loss": 2.9471, "step": 35380 }, { "epoch": 1.73, "grad_norm": 0.6577292084693909, "learning_rate": 0.0002272391999582814, "loss": 2.8635, "step": 35381 }, { "epoch": 1.73, "grad_norm": 0.6286402940750122, "learning_rate": 0.00022722426355689166, "loss": 2.8672, "step": 35382 }, { "epoch": 1.73, "grad_norm": 0.6577459573745728, "learning_rate": 0.0002272093273471791, "loss": 3.0737, "step": 35383 }, { "epoch": 1.73, "grad_norm": 0.692613959312439, "learning_rate": 0.00022719439132918318, "loss": 3.2409, "step": 35384 }, { "epoch": 1.73, "grad_norm": 0.628135085105896, "learning_rate": 0.0002271794555029433, "loss": 2.9021, "step": 35385 }, { "epoch": 1.73, "grad_norm": 0.629897952079773, "learning_rate": 0.00022716451986849861, "loss": 2.9762, "step": 35386 }, { "epoch": 1.73, "grad_norm": 0.6259167790412903, "learning_rate": 0.00022714958442588868, "loss": 2.949, "step": 35387 }, { "epoch": 1.73, "grad_norm": 0.6222914457321167, "learning_rate": 0.0002271346491751526, "loss": 2.9188, "step": 35388 }, { "epoch": 1.73, "grad_norm": 0.6155588030815125, "learning_rate": 0.00022711971411632992, "loss": 3.0117, "step": 35389 }, { "epoch": 1.73, "grad_norm": 0.6510941982269287, "learning_rate": 0.0002271047792494599, "loss": 3.1024, "step": 35390 }, { "epoch": 1.73, "grad_norm": 0.6198724508285522, "learning_rate": 0.00022708984457458175, "loss": 2.7966, "step": 35391 }, { "epoch": 1.73, "grad_norm": 0.6015543341636658, "learning_rate": 0.00022707491009173506, "loss": 3.0348, "step": 35392 }, { "epoch": 1.73, "grad_norm": 0.6300435662269592, "learning_rate": 0.00022705997580095894, "loss": 3.0702, "step": 35393 }, { "epoch": 1.73, "grad_norm": 0.6055490970611572, "learning_rate": 0.00022704504170229276, "loss": 3.1463, "step": 35394 }, { "epoch": 1.73, "grad_norm": 0.5714443922042847, "learning_rate": 0.00022703010779577602, "loss": 3.1352, "step": 35395 }, { "epoch": 1.73, "grad_norm": 0.6332013607025146, "learning_rate": 0.00022701517408144789, "loss": 3.0562, "step": 35396 }, { "epoch": 1.73, "grad_norm": 0.5947373509407043, "learning_rate": 0.00022700024055934777, "loss": 3.167, "step": 35397 }, { "epoch": 1.73, "grad_norm": 0.6268903613090515, "learning_rate": 0.00022698530722951486, "loss": 2.9227, "step": 35398 }, { "epoch": 1.73, "grad_norm": 0.6999599933624268, "learning_rate": 0.00022697037409198865, "loss": 2.8985, "step": 35399 }, { "epoch": 1.73, "grad_norm": 0.6626995205879211, "learning_rate": 0.00022695544114680852, "loss": 3.0245, "step": 35400 }, { "epoch": 1.73, "grad_norm": 0.6301445960998535, "learning_rate": 0.0002269405083940135, "loss": 3.0983, "step": 35401 }, { "epoch": 1.73, "grad_norm": 0.6395618915557861, "learning_rate": 0.00022692557583364333, "loss": 2.9343, "step": 35402 }, { "epoch": 1.74, "grad_norm": 0.5879999995231628, "learning_rate": 0.00022691064346573703, "loss": 2.7305, "step": 35403 }, { "epoch": 1.74, "grad_norm": 0.6151583194732666, "learning_rate": 0.00022689571129033397, "loss": 3.0668, "step": 35404 }, { "epoch": 1.74, "grad_norm": 0.6331624984741211, "learning_rate": 0.00022688077930747367, "loss": 3.0581, "step": 35405 }, { "epoch": 1.74, "grad_norm": 0.6111859083175659, "learning_rate": 0.00022686584751719524, "loss": 3.1264, "step": 35406 }, { "epoch": 1.74, "grad_norm": 0.6400036811828613, "learning_rate": 0.00022685091591953817, "loss": 3.1299, "step": 35407 }, { "epoch": 1.74, "grad_norm": 0.6397266983985901, "learning_rate": 0.00022683598451454158, "loss": 3.2165, "step": 35408 }, { "epoch": 1.74, "grad_norm": 0.6920884847640991, "learning_rate": 0.0002268210533022449, "loss": 3.1075, "step": 35409 }, { "epoch": 1.74, "grad_norm": 0.6338069438934326, "learning_rate": 0.00022680612228268766, "loss": 3.0068, "step": 35410 }, { "epoch": 1.74, "grad_norm": 0.6007820963859558, "learning_rate": 0.0002267911914559089, "loss": 3.0907, "step": 35411 }, { "epoch": 1.74, "grad_norm": 0.6300089955329895, "learning_rate": 0.00022677626082194812, "loss": 3.1833, "step": 35412 }, { "epoch": 1.74, "grad_norm": 0.6192349791526794, "learning_rate": 0.00022676133038084447, "loss": 3.0577, "step": 35413 }, { "epoch": 1.74, "grad_norm": 0.6377847790718079, "learning_rate": 0.00022674640013263745, "loss": 3.0741, "step": 35414 }, { "epoch": 1.74, "grad_norm": 0.6192604303359985, "learning_rate": 0.00022673147007736636, "loss": 3.0948, "step": 35415 }, { "epoch": 1.74, "grad_norm": 0.605172336101532, "learning_rate": 0.00022671654021507037, "loss": 3.0666, "step": 35416 }, { "epoch": 1.74, "grad_norm": 0.6593872904777527, "learning_rate": 0.00022670161054578903, "loss": 3.0207, "step": 35417 }, { "epoch": 1.74, "grad_norm": 0.6469155550003052, "learning_rate": 0.00022668668106956148, "loss": 2.9885, "step": 35418 }, { "epoch": 1.74, "grad_norm": 0.6012063026428223, "learning_rate": 0.00022667175178642701, "loss": 3.0121, "step": 35419 }, { "epoch": 1.74, "grad_norm": 0.6011958718299866, "learning_rate": 0.0002266568226964252, "loss": 2.9255, "step": 35420 }, { "epoch": 1.74, "grad_norm": 0.6600900888442993, "learning_rate": 0.0002266418937995951, "loss": 3.0529, "step": 35421 }, { "epoch": 1.74, "grad_norm": 0.6183411478996277, "learning_rate": 0.00022662696509597623, "loss": 2.9691, "step": 35422 }, { "epoch": 1.74, "grad_norm": 0.6118205189704895, "learning_rate": 0.0002266120365856077, "loss": 3.101, "step": 35423 }, { "epoch": 1.74, "grad_norm": 0.6012549996376038, "learning_rate": 0.00022659710826852895, "loss": 3.0369, "step": 35424 }, { "epoch": 1.74, "grad_norm": 0.7324210405349731, "learning_rate": 0.00022658218014477938, "loss": 3.0671, "step": 35425 }, { "epoch": 1.74, "grad_norm": 0.616727888584137, "learning_rate": 0.00022656725221439808, "loss": 2.9368, "step": 35426 }, { "epoch": 1.74, "grad_norm": 0.5840345621109009, "learning_rate": 0.00022655232447742467, "loss": 3.1202, "step": 35427 }, { "epoch": 1.74, "grad_norm": 0.6287633180618286, "learning_rate": 0.0002265373969338982, "loss": 2.9494, "step": 35428 }, { "epoch": 1.74, "grad_norm": 0.6012067198753357, "learning_rate": 0.00022652246958385807, "loss": 3.0535, "step": 35429 }, { "epoch": 1.74, "grad_norm": 0.6259651184082031, "learning_rate": 0.00022650754242734371, "loss": 3.2078, "step": 35430 }, { "epoch": 1.74, "grad_norm": 0.6013410687446594, "learning_rate": 0.00022649261546439422, "loss": 3.0495, "step": 35431 }, { "epoch": 1.74, "grad_norm": 0.6384536623954773, "learning_rate": 0.00022647768869504912, "loss": 2.9705, "step": 35432 }, { "epoch": 1.74, "grad_norm": 0.6175307631492615, "learning_rate": 0.00022646276211934764, "loss": 3.1426, "step": 35433 }, { "epoch": 1.74, "grad_norm": 0.630964457988739, "learning_rate": 0.000226447835737329, "loss": 3.0491, "step": 35434 }, { "epoch": 1.74, "grad_norm": 0.6299197673797607, "learning_rate": 0.00022643290954903273, "loss": 2.9989, "step": 35435 }, { "epoch": 1.74, "grad_norm": 0.6441041231155396, "learning_rate": 0.00022641798355449796, "loss": 3.2179, "step": 35436 }, { "epoch": 1.74, "grad_norm": 0.6124367117881775, "learning_rate": 0.00022640305775376409, "loss": 2.9684, "step": 35437 }, { "epoch": 1.74, "grad_norm": 0.5946165323257446, "learning_rate": 0.00022638813214687033, "loss": 2.781, "step": 35438 }, { "epoch": 1.74, "grad_norm": 0.6153435707092285, "learning_rate": 0.0002263732067338561, "loss": 3.2042, "step": 35439 }, { "epoch": 1.74, "grad_norm": 0.614129364490509, "learning_rate": 0.0002263582815147607, "loss": 3.0863, "step": 35440 }, { "epoch": 1.74, "grad_norm": 0.6298364400863647, "learning_rate": 0.00022634335648962333, "loss": 2.8166, "step": 35441 }, { "epoch": 1.74, "grad_norm": 0.6218454241752625, "learning_rate": 0.0002263284316584835, "loss": 3.0104, "step": 35442 }, { "epoch": 1.74, "grad_norm": 0.63499915599823, "learning_rate": 0.00022631350702138038, "loss": 3.0578, "step": 35443 }, { "epoch": 1.74, "grad_norm": 0.5916718244552612, "learning_rate": 0.0002262985825783533, "loss": 3.2383, "step": 35444 }, { "epoch": 1.74, "grad_norm": 0.6168555617332458, "learning_rate": 0.0002262836583294415, "loss": 3.2438, "step": 35445 }, { "epoch": 1.74, "grad_norm": 0.6397154927253723, "learning_rate": 0.00022626873427468439, "loss": 2.8624, "step": 35446 }, { "epoch": 1.74, "grad_norm": 0.594692587852478, "learning_rate": 0.00022625381041412127, "loss": 3.0968, "step": 35447 }, { "epoch": 1.74, "grad_norm": 0.6055973172187805, "learning_rate": 0.00022623888674779135, "loss": 2.9747, "step": 35448 }, { "epoch": 1.74, "grad_norm": 0.6591005921363831, "learning_rate": 0.0002262239632757341, "loss": 2.9765, "step": 35449 }, { "epoch": 1.74, "grad_norm": 0.6846657991409302, "learning_rate": 0.00022620903999798862, "loss": 2.827, "step": 35450 }, { "epoch": 1.74, "grad_norm": 0.6533744931221008, "learning_rate": 0.00022619411691459442, "loss": 3.1813, "step": 35451 }, { "epoch": 1.74, "grad_norm": 0.6433231830596924, "learning_rate": 0.00022617919402559075, "loss": 2.9903, "step": 35452 }, { "epoch": 1.74, "grad_norm": 0.6263206005096436, "learning_rate": 0.00022616427133101673, "loss": 2.9961, "step": 35453 }, { "epoch": 1.74, "grad_norm": 0.6297306418418884, "learning_rate": 0.00022614934883091197, "loss": 3.0246, "step": 35454 }, { "epoch": 1.74, "grad_norm": 0.6420456171035767, "learning_rate": 0.00022613442652531551, "loss": 3.0221, "step": 35455 }, { "epoch": 1.74, "grad_norm": 0.6900204420089722, "learning_rate": 0.00022611950441426675, "loss": 3.232, "step": 35456 }, { "epoch": 1.74, "grad_norm": 0.6068527698516846, "learning_rate": 0.00022610458249780507, "loss": 2.8889, "step": 35457 }, { "epoch": 1.74, "grad_norm": 0.6444588303565979, "learning_rate": 0.00022608966077596965, "loss": 2.9091, "step": 35458 }, { "epoch": 1.74, "grad_norm": 0.6167253255844116, "learning_rate": 0.0002260747392487999, "loss": 2.8771, "step": 35459 }, { "epoch": 1.74, "grad_norm": 0.6610934138298035, "learning_rate": 0.00022605981791633492, "loss": 2.8356, "step": 35460 }, { "epoch": 1.74, "grad_norm": 0.6797847151756287, "learning_rate": 0.00022604489677861426, "loss": 3.1294, "step": 35461 }, { "epoch": 1.74, "grad_norm": 0.6337746381759644, "learning_rate": 0.00022602997583567712, "loss": 3.0702, "step": 35462 }, { "epoch": 1.74, "grad_norm": 0.619875967502594, "learning_rate": 0.00022601505508756267, "loss": 2.879, "step": 35463 }, { "epoch": 1.74, "grad_norm": 0.6586485505104065, "learning_rate": 0.0002260001345343105, "loss": 3.1996, "step": 35464 }, { "epoch": 1.74, "grad_norm": 0.6685471534729004, "learning_rate": 0.0002259852141759596, "loss": 3.0143, "step": 35465 }, { "epoch": 1.74, "grad_norm": 0.6211610436439514, "learning_rate": 0.00022597029401254937, "loss": 3.2222, "step": 35466 }, { "epoch": 1.74, "grad_norm": 0.6223245859146118, "learning_rate": 0.0002259553740441193, "loss": 2.8696, "step": 35467 }, { "epoch": 1.74, "grad_norm": 0.603645384311676, "learning_rate": 0.00022594045427070842, "loss": 2.9405, "step": 35468 }, { "epoch": 1.74, "grad_norm": 0.6659462451934814, "learning_rate": 0.0002259255346923562, "loss": 3.0429, "step": 35469 }, { "epoch": 1.74, "grad_norm": 0.6123594045639038, "learning_rate": 0.00022591061530910175, "loss": 2.9071, "step": 35470 }, { "epoch": 1.74, "grad_norm": 0.6161990165710449, "learning_rate": 0.00022589569612098447, "loss": 3.1323, "step": 35471 }, { "epoch": 1.74, "grad_norm": 0.6867458820343018, "learning_rate": 0.0002258807771280438, "loss": 2.9695, "step": 35472 }, { "epoch": 1.74, "grad_norm": 0.5981655716896057, "learning_rate": 0.00022586585833031883, "loss": 2.9782, "step": 35473 }, { "epoch": 1.74, "grad_norm": 0.6146122217178345, "learning_rate": 0.000225850939727849, "loss": 3.0785, "step": 35474 }, { "epoch": 1.74, "grad_norm": 0.6437488794326782, "learning_rate": 0.00022583602132067335, "loss": 2.8745, "step": 35475 }, { "epoch": 1.74, "grad_norm": 0.6147376894950867, "learning_rate": 0.00022582110310883147, "loss": 2.9838, "step": 35476 }, { "epoch": 1.74, "grad_norm": 0.5768126845359802, "learning_rate": 0.00022580618509236258, "loss": 3.0706, "step": 35477 }, { "epoch": 1.74, "grad_norm": 0.6257051825523376, "learning_rate": 0.00022579126727130576, "loss": 2.857, "step": 35478 }, { "epoch": 1.74, "grad_norm": 0.6463055610656738, "learning_rate": 0.0002257763496457006, "loss": 3.0198, "step": 35479 }, { "epoch": 1.74, "grad_norm": 0.6310402154922485, "learning_rate": 0.00022576143221558618, "loss": 2.7418, "step": 35480 }, { "epoch": 1.74, "grad_norm": 0.6200835704803467, "learning_rate": 0.00022574651498100186, "loss": 3.0547, "step": 35481 }, { "epoch": 1.74, "grad_norm": 0.6173551082611084, "learning_rate": 0.00022573159794198695, "loss": 2.8957, "step": 35482 }, { "epoch": 1.74, "grad_norm": 0.6188896894454956, "learning_rate": 0.00022571668109858074, "loss": 2.9646, "step": 35483 }, { "epoch": 1.74, "grad_norm": 0.6149911880493164, "learning_rate": 0.00022570176445082252, "loss": 2.963, "step": 35484 }, { "epoch": 1.74, "grad_norm": 0.568781316280365, "learning_rate": 0.00022568684799875144, "loss": 3.0014, "step": 35485 }, { "epoch": 1.74, "grad_norm": 0.7318620681762695, "learning_rate": 0.00022567193174240693, "loss": 3.0284, "step": 35486 }, { "epoch": 1.74, "grad_norm": 0.6288902759552002, "learning_rate": 0.00022565701568182835, "loss": 2.8994, "step": 35487 }, { "epoch": 1.74, "grad_norm": 0.6177136301994324, "learning_rate": 0.00022564209981705473, "loss": 3.065, "step": 35488 }, { "epoch": 1.74, "grad_norm": 0.6629291772842407, "learning_rate": 0.00022562718414812568, "loss": 3.1049, "step": 35489 }, { "epoch": 1.74, "grad_norm": 0.6288484334945679, "learning_rate": 0.00022561226867508018, "loss": 2.9863, "step": 35490 }, { "epoch": 1.74, "grad_norm": 0.611689031124115, "learning_rate": 0.00022559735339795767, "loss": 2.8179, "step": 35491 }, { "epoch": 1.74, "grad_norm": 0.6365360617637634, "learning_rate": 0.00022558243831679749, "loss": 3.0323, "step": 35492 }, { "epoch": 1.74, "grad_norm": 0.586676299571991, "learning_rate": 0.00022556752343163872, "loss": 2.9651, "step": 35493 }, { "epoch": 1.74, "grad_norm": 0.6502875685691833, "learning_rate": 0.00022555260874252092, "loss": 3.2247, "step": 35494 }, { "epoch": 1.74, "grad_norm": 0.6236476302146912, "learning_rate": 0.00022553769424948314, "loss": 3.2573, "step": 35495 }, { "epoch": 1.74, "grad_norm": 0.622161865234375, "learning_rate": 0.00022552277995256466, "loss": 3.0476, "step": 35496 }, { "epoch": 1.74, "grad_norm": 0.5968731045722961, "learning_rate": 0.000225507865851805, "loss": 3.1644, "step": 35497 }, { "epoch": 1.74, "grad_norm": 0.5831536054611206, "learning_rate": 0.00022549295194724322, "loss": 2.9254, "step": 35498 }, { "epoch": 1.74, "grad_norm": 0.6409755945205688, "learning_rate": 0.0002254780382389187, "loss": 3.0804, "step": 35499 }, { "epoch": 1.74, "grad_norm": 0.6048734188079834, "learning_rate": 0.0002254631247268706, "loss": 2.9089, "step": 35500 }, { "epoch": 1.74, "grad_norm": 0.6509040594100952, "learning_rate": 0.00022544821141113832, "loss": 2.6318, "step": 35501 }, { "epoch": 1.74, "grad_norm": 0.637199342250824, "learning_rate": 0.00022543329829176115, "loss": 3.145, "step": 35502 }, { "epoch": 1.74, "grad_norm": 0.6597538590431213, "learning_rate": 0.00022541838536877822, "loss": 2.7563, "step": 35503 }, { "epoch": 1.74, "grad_norm": 0.6191924214363098, "learning_rate": 0.00022540347264222904, "loss": 3.2077, "step": 35504 }, { "epoch": 1.74, "grad_norm": 0.6519606113433838, "learning_rate": 0.00022538856011215265, "loss": 2.8337, "step": 35505 }, { "epoch": 1.74, "grad_norm": 0.6172239184379578, "learning_rate": 0.00022537364777858843, "loss": 3.2083, "step": 35506 }, { "epoch": 1.74, "grad_norm": 0.6710629463195801, "learning_rate": 0.00022535873564157575, "loss": 2.7566, "step": 35507 }, { "epoch": 1.74, "grad_norm": 0.6321614980697632, "learning_rate": 0.00022534382370115375, "loss": 2.8906, "step": 35508 }, { "epoch": 1.74, "grad_norm": 0.6277992129325867, "learning_rate": 0.00022532891195736182, "loss": 3.0721, "step": 35509 }, { "epoch": 1.74, "grad_norm": 0.6334667205810547, "learning_rate": 0.000225314000410239, "loss": 2.9217, "step": 35510 }, { "epoch": 1.74, "grad_norm": 0.6273242831230164, "learning_rate": 0.00022529908905982477, "loss": 2.844, "step": 35511 }, { "epoch": 1.74, "grad_norm": 0.6677801012992859, "learning_rate": 0.0002252841779061585, "loss": 3.0954, "step": 35512 }, { "epoch": 1.74, "grad_norm": 0.6611440181732178, "learning_rate": 0.00022526926694927922, "loss": 3.0319, "step": 35513 }, { "epoch": 1.74, "grad_norm": 0.6573300957679749, "learning_rate": 0.00022525435618922637, "loss": 3.0643, "step": 35514 }, { "epoch": 1.74, "grad_norm": 0.6609066724777222, "learning_rate": 0.00022523944562603904, "loss": 3.2395, "step": 35515 }, { "epoch": 1.74, "grad_norm": 0.6033406853675842, "learning_rate": 0.0002252245352597567, "loss": 3.2297, "step": 35516 }, { "epoch": 1.74, "grad_norm": 0.6859046220779419, "learning_rate": 0.00022520962509041864, "loss": 2.9935, "step": 35517 }, { "epoch": 1.74, "grad_norm": 0.6412832140922546, "learning_rate": 0.00022519471511806384, "loss": 2.8779, "step": 35518 }, { "epoch": 1.74, "grad_norm": 0.6439999341964722, "learning_rate": 0.00022517980534273192, "loss": 2.958, "step": 35519 }, { "epoch": 1.74, "grad_norm": 0.6545829772949219, "learning_rate": 0.00022516489576446196, "loss": 3.0343, "step": 35520 }, { "epoch": 1.74, "grad_norm": 0.6581568121910095, "learning_rate": 0.0002251499863832933, "loss": 3.0944, "step": 35521 }, { "epoch": 1.74, "grad_norm": 0.5977330207824707, "learning_rate": 0.00022513507719926505, "loss": 3.0737, "step": 35522 }, { "epoch": 1.74, "grad_norm": 0.611193835735321, "learning_rate": 0.00022512016821241664, "loss": 3.0715, "step": 35523 }, { "epoch": 1.74, "grad_norm": 0.6156790256500244, "learning_rate": 0.0002251052594227874, "loss": 2.9858, "step": 35524 }, { "epoch": 1.74, "grad_norm": 0.6435781717300415, "learning_rate": 0.00022509035083041631, "loss": 3.1172, "step": 35525 }, { "epoch": 1.74, "grad_norm": 0.6316915154457092, "learning_rate": 0.00022507544243534296, "loss": 3.0121, "step": 35526 }, { "epoch": 1.74, "grad_norm": 0.6427628397941589, "learning_rate": 0.00022506053423760645, "loss": 3.1036, "step": 35527 }, { "epoch": 1.74, "grad_norm": 0.7132937908172607, "learning_rate": 0.000225045626237246, "loss": 3.1381, "step": 35528 }, { "epoch": 1.74, "grad_norm": 0.6076539754867554, "learning_rate": 0.00022503071843430102, "loss": 2.9185, "step": 35529 }, { "epoch": 1.74, "grad_norm": 0.59321129322052, "learning_rate": 0.0002250158108288107, "loss": 3.2645, "step": 35530 }, { "epoch": 1.74, "grad_norm": 0.6133169531822205, "learning_rate": 0.00022500090342081433, "loss": 2.9951, "step": 35531 }, { "epoch": 1.74, "grad_norm": 0.5968568325042725, "learning_rate": 0.00022498599621035103, "loss": 2.9241, "step": 35532 }, { "epoch": 1.74, "grad_norm": 0.6295343637466431, "learning_rate": 0.00022497108919746018, "loss": 2.9182, "step": 35533 }, { "epoch": 1.74, "grad_norm": 0.6084388494491577, "learning_rate": 0.0002249561823821812, "loss": 3.0067, "step": 35534 }, { "epoch": 1.74, "grad_norm": 0.6127825379371643, "learning_rate": 0.00022494127576455305, "loss": 2.998, "step": 35535 }, { "epoch": 1.74, "grad_norm": 0.6642194986343384, "learning_rate": 0.00022492636934461524, "loss": 2.9918, "step": 35536 }, { "epoch": 1.74, "grad_norm": 0.5972631573677063, "learning_rate": 0.00022491146312240683, "loss": 2.8956, "step": 35537 }, { "epoch": 1.74, "grad_norm": 0.6442238092422485, "learning_rate": 0.0002248965570979672, "loss": 2.8439, "step": 35538 }, { "epoch": 1.74, "grad_norm": 0.6403670907020569, "learning_rate": 0.00022488165127133565, "loss": 2.867, "step": 35539 }, { "epoch": 1.74, "grad_norm": 0.6831574440002441, "learning_rate": 0.00022486674564255128, "loss": 3.0751, "step": 35540 }, { "epoch": 1.74, "grad_norm": 0.6002147793769836, "learning_rate": 0.0002248518402116535, "loss": 3.0445, "step": 35541 }, { "epoch": 1.74, "grad_norm": 0.6270040273666382, "learning_rate": 0.0002248369349786815, "loss": 3.0538, "step": 35542 }, { "epoch": 1.74, "grad_norm": 0.6381674408912659, "learning_rate": 0.00022482202994367448, "loss": 3.0805, "step": 35543 }, { "epoch": 1.74, "grad_norm": 0.6307418942451477, "learning_rate": 0.00022480712510667195, "loss": 3.0251, "step": 35544 }, { "epoch": 1.74, "grad_norm": 0.6338752508163452, "learning_rate": 0.00022479222046771282, "loss": 3.2437, "step": 35545 }, { "epoch": 1.74, "grad_norm": 0.5932478904724121, "learning_rate": 0.00022477731602683662, "loss": 2.7564, "step": 35546 }, { "epoch": 1.74, "grad_norm": 0.6139539480209351, "learning_rate": 0.00022476241178408232, "loss": 3.0832, "step": 35547 }, { "epoch": 1.74, "grad_norm": 0.6189956068992615, "learning_rate": 0.00022474750773948944, "loss": 3.0366, "step": 35548 }, { "epoch": 1.74, "grad_norm": 0.6055632829666138, "learning_rate": 0.0002247326038930972, "loss": 3.0145, "step": 35549 }, { "epoch": 1.74, "grad_norm": 0.6140079498291016, "learning_rate": 0.0002247177002449447, "loss": 3.0551, "step": 35550 }, { "epoch": 1.74, "grad_norm": 0.656378984451294, "learning_rate": 0.00022470279679507143, "loss": 3.0311, "step": 35551 }, { "epoch": 1.74, "grad_norm": 0.668050229549408, "learning_rate": 0.0002246878935435163, "loss": 3.0796, "step": 35552 }, { "epoch": 1.74, "grad_norm": 0.6154969930648804, "learning_rate": 0.00022467299049031887, "loss": 3.0686, "step": 35553 }, { "epoch": 1.74, "grad_norm": 0.6256099343299866, "learning_rate": 0.0002246580876355184, "loss": 2.9475, "step": 35554 }, { "epoch": 1.74, "grad_norm": 0.6148830056190491, "learning_rate": 0.0002246431849791538, "loss": 2.8056, "step": 35555 }, { "epoch": 1.74, "grad_norm": 0.6625372171401978, "learning_rate": 0.00022462828252126473, "loss": 2.8291, "step": 35556 }, { "epoch": 1.74, "grad_norm": 0.6332937479019165, "learning_rate": 0.0002246133802618902, "loss": 3.0003, "step": 35557 }, { "epoch": 1.74, "grad_norm": 0.5910854339599609, "learning_rate": 0.0002245984782010694, "loss": 3.0787, "step": 35558 }, { "epoch": 1.74, "grad_norm": 0.6351834535598755, "learning_rate": 0.0002245835763388419, "loss": 3.2896, "step": 35559 }, { "epoch": 1.74, "grad_norm": 0.6503569483757019, "learning_rate": 0.00022456867467524663, "loss": 3.0756, "step": 35560 }, { "epoch": 1.74, "grad_norm": 0.686381995677948, "learning_rate": 0.00022455377321032306, "loss": 2.7848, "step": 35561 }, { "epoch": 1.74, "grad_norm": 0.6213240027427673, "learning_rate": 0.00022453887194411016, "loss": 3.0433, "step": 35562 }, { "epoch": 1.74, "grad_norm": 0.6782155632972717, "learning_rate": 0.00022452397087664743, "loss": 3.0367, "step": 35563 }, { "epoch": 1.74, "grad_norm": 0.8070650100708008, "learning_rate": 0.00022450907000797412, "loss": 3.0191, "step": 35564 }, { "epoch": 1.74, "grad_norm": 0.6163588166236877, "learning_rate": 0.00022449416933812924, "loss": 2.9667, "step": 35565 }, { "epoch": 1.74, "grad_norm": 0.6353696584701538, "learning_rate": 0.00022447926886715231, "loss": 3.0485, "step": 35566 }, { "epoch": 1.74, "grad_norm": 0.6798037886619568, "learning_rate": 0.0002244643685950824, "loss": 2.9272, "step": 35567 }, { "epoch": 1.74, "grad_norm": 0.5956858992576599, "learning_rate": 0.00022444946852195874, "loss": 3.0098, "step": 35568 }, { "epoch": 1.74, "grad_norm": 0.6191090941429138, "learning_rate": 0.00022443456864782078, "loss": 2.8499, "step": 35569 }, { "epoch": 1.74, "grad_norm": 0.6584221720695496, "learning_rate": 0.00022441966897270753, "loss": 3.2436, "step": 35570 }, { "epoch": 1.74, "grad_norm": 0.6226842403411865, "learning_rate": 0.0002244047694966584, "loss": 3.0886, "step": 35571 }, { "epoch": 1.74, "grad_norm": 0.6515241265296936, "learning_rate": 0.00022438987021971242, "loss": 3.0962, "step": 35572 }, { "epoch": 1.74, "grad_norm": 0.6351691484451294, "learning_rate": 0.000224374971141909, "loss": 2.9709, "step": 35573 }, { "epoch": 1.74, "grad_norm": 0.6209082007408142, "learning_rate": 0.00022436007226328748, "loss": 2.9444, "step": 35574 }, { "epoch": 1.74, "grad_norm": 0.5917410850524902, "learning_rate": 0.00022434517358388688, "loss": 3.1332, "step": 35575 }, { "epoch": 1.74, "grad_norm": 0.6047651171684265, "learning_rate": 0.0002243302751037466, "loss": 2.9769, "step": 35576 }, { "epoch": 1.74, "grad_norm": 0.5979304909706116, "learning_rate": 0.0002243153768229057, "loss": 3.0874, "step": 35577 }, { "epoch": 1.74, "grad_norm": 0.6439438462257385, "learning_rate": 0.00022430047874140357, "loss": 2.9494, "step": 35578 }, { "epoch": 1.74, "grad_norm": 0.590167760848999, "learning_rate": 0.0002242855808592795, "loss": 2.9652, "step": 35579 }, { "epoch": 1.74, "grad_norm": 0.6090808510780334, "learning_rate": 0.00022427068317657252, "loss": 2.9947, "step": 35580 }, { "epoch": 1.74, "grad_norm": 0.607502281665802, "learning_rate": 0.0002242557856933221, "loss": 2.9917, "step": 35581 }, { "epoch": 1.74, "grad_norm": 0.6465458273887634, "learning_rate": 0.0002242408884095673, "loss": 2.9978, "step": 35582 }, { "epoch": 1.74, "grad_norm": 0.6103089451789856, "learning_rate": 0.00022422599132534737, "loss": 2.8718, "step": 35583 }, { "epoch": 1.74, "grad_norm": 0.6132879853248596, "learning_rate": 0.0002242110944407017, "loss": 2.7641, "step": 35584 }, { "epoch": 1.74, "grad_norm": 0.6607589721679688, "learning_rate": 0.00022419619775566934, "loss": 3.0388, "step": 35585 }, { "epoch": 1.74, "grad_norm": 0.6323291063308716, "learning_rate": 0.00022418130127028972, "loss": 3.0716, "step": 35586 }, { "epoch": 1.74, "grad_norm": 0.6502161622047424, "learning_rate": 0.0002241664049846018, "loss": 3.0507, "step": 35587 }, { "epoch": 1.74, "grad_norm": 0.6421080231666565, "learning_rate": 0.00022415150889864503, "loss": 3.0403, "step": 35588 }, { "epoch": 1.74, "grad_norm": 0.6806257367134094, "learning_rate": 0.00022413661301245872, "loss": 2.9999, "step": 35589 }, { "epoch": 1.74, "grad_norm": 0.6115500330924988, "learning_rate": 0.00022412171732608176, "loss": 2.9546, "step": 35590 }, { "epoch": 1.74, "grad_norm": 0.6313839554786682, "learning_rate": 0.00022410682183955375, "loss": 3.0004, "step": 35591 }, { "epoch": 1.74, "grad_norm": 0.6472813487052917, "learning_rate": 0.0002240919265529137, "loss": 3.1098, "step": 35592 }, { "epoch": 1.74, "grad_norm": 0.6413887143135071, "learning_rate": 0.00022407703146620086, "loss": 3.1327, "step": 35593 }, { "epoch": 1.74, "grad_norm": 0.5937476754188538, "learning_rate": 0.00022406213657945464, "loss": 2.833, "step": 35594 }, { "epoch": 1.74, "grad_norm": 0.6520037055015564, "learning_rate": 0.00022404724189271398, "loss": 3.0171, "step": 35595 }, { "epoch": 1.74, "grad_norm": 0.6196516156196594, "learning_rate": 0.0002240323474060184, "loss": 2.9223, "step": 35596 }, { "epoch": 1.74, "grad_norm": 0.6525871753692627, "learning_rate": 0.00022401745311940695, "loss": 3.1616, "step": 35597 }, { "epoch": 1.74, "grad_norm": 0.617184042930603, "learning_rate": 0.00022400255903291886, "loss": 3.071, "step": 35598 }, { "epoch": 1.74, "grad_norm": 0.6290184259414673, "learning_rate": 0.00022398766514659347, "loss": 3.1684, "step": 35599 }, { "epoch": 1.74, "grad_norm": 0.6642170548439026, "learning_rate": 0.00022397277146046996, "loss": 2.8508, "step": 35600 }, { "epoch": 1.74, "grad_norm": 0.6329320073127747, "learning_rate": 0.0002239578779745875, "loss": 2.8697, "step": 35601 }, { "epoch": 1.74, "grad_norm": 0.6439346671104431, "learning_rate": 0.00022394298468898533, "loss": 3.0158, "step": 35602 }, { "epoch": 1.74, "grad_norm": 0.6102849245071411, "learning_rate": 0.0002239280916037028, "loss": 3.0351, "step": 35603 }, { "epoch": 1.74, "grad_norm": 0.6272664070129395, "learning_rate": 0.00022391319871877888, "loss": 2.9033, "step": 35604 }, { "epoch": 1.74, "grad_norm": 0.6411396265029907, "learning_rate": 0.000223898306034253, "loss": 2.9436, "step": 35605 }, { "epoch": 1.74, "grad_norm": 0.6268124580383301, "learning_rate": 0.00022388341355016443, "loss": 2.9714, "step": 35606 }, { "epoch": 1.75, "grad_norm": 0.608563244342804, "learning_rate": 0.00022386852126655223, "loss": 3.1994, "step": 35607 }, { "epoch": 1.75, "grad_norm": 0.6222233176231384, "learning_rate": 0.00022385362918345573, "loss": 2.9368, "step": 35608 }, { "epoch": 1.75, "grad_norm": 0.6342154145240784, "learning_rate": 0.00022383873730091403, "loss": 2.9475, "step": 35609 }, { "epoch": 1.75, "grad_norm": 0.6370996832847595, "learning_rate": 0.00022382384561896648, "loss": 2.7946, "step": 35610 }, { "epoch": 1.75, "grad_norm": 0.6122251749038696, "learning_rate": 0.0002238089541376523, "loss": 2.809, "step": 35611 }, { "epoch": 1.75, "grad_norm": 0.6048957109451294, "learning_rate": 0.0002237940628570106, "loss": 2.9704, "step": 35612 }, { "epoch": 1.75, "grad_norm": 0.619551420211792, "learning_rate": 0.0002237791717770808, "loss": 2.8918, "step": 35613 }, { "epoch": 1.75, "grad_norm": 0.6651395559310913, "learning_rate": 0.0002237642808979018, "loss": 3.0585, "step": 35614 }, { "epoch": 1.75, "grad_norm": 0.6424996852874756, "learning_rate": 0.00022374939021951314, "loss": 2.9588, "step": 35615 }, { "epoch": 1.75, "grad_norm": 0.678436815738678, "learning_rate": 0.00022373449974195396, "loss": 2.9305, "step": 35616 }, { "epoch": 1.75, "grad_norm": 0.6074570417404175, "learning_rate": 0.0002237196094652633, "loss": 2.7319, "step": 35617 }, { "epoch": 1.75, "grad_norm": 0.6088811755180359, "learning_rate": 0.0002237047193894806, "loss": 2.9016, "step": 35618 }, { "epoch": 1.75, "grad_norm": 0.6111059188842773, "learning_rate": 0.00022368982951464497, "loss": 3.071, "step": 35619 }, { "epoch": 1.75, "grad_norm": 0.594634473323822, "learning_rate": 0.00022367493984079553, "loss": 3.0388, "step": 35620 }, { "epoch": 1.75, "grad_norm": 0.5961778163909912, "learning_rate": 0.0002236600503679718, "loss": 3.0913, "step": 35621 }, { "epoch": 1.75, "grad_norm": 0.6522762179374695, "learning_rate": 0.0002236451610962127, "loss": 2.9231, "step": 35622 }, { "epoch": 1.75, "grad_norm": 0.6687374711036682, "learning_rate": 0.00022363027202555762, "loss": 3.0651, "step": 35623 }, { "epoch": 1.75, "grad_norm": 0.6435642838478088, "learning_rate": 0.00022361538315604558, "loss": 2.9751, "step": 35624 }, { "epoch": 1.75, "grad_norm": 0.667826235294342, "learning_rate": 0.00022360049448771598, "loss": 3.1817, "step": 35625 }, { "epoch": 1.75, "grad_norm": 0.6378101706504822, "learning_rate": 0.00022358560602060807, "loss": 2.9707, "step": 35626 }, { "epoch": 1.75, "grad_norm": 0.620212197303772, "learning_rate": 0.0002235707177547608, "loss": 3.1727, "step": 35627 }, { "epoch": 1.75, "grad_norm": 0.6127507090568542, "learning_rate": 0.0002235558296902137, "loss": 3.027, "step": 35628 }, { "epoch": 1.75, "grad_norm": 0.6060869693756104, "learning_rate": 0.00022354094182700575, "loss": 3.2523, "step": 35629 }, { "epoch": 1.75, "grad_norm": 0.6330663561820984, "learning_rate": 0.00022352605416517618, "loss": 2.9645, "step": 35630 }, { "epoch": 1.75, "grad_norm": 0.6215522289276123, "learning_rate": 0.0002235111667047644, "loss": 2.9589, "step": 35631 }, { "epoch": 1.75, "grad_norm": 0.6048828363418579, "learning_rate": 0.00022349627944580946, "loss": 2.9766, "step": 35632 }, { "epoch": 1.75, "grad_norm": 0.607565701007843, "learning_rate": 0.00022348139238835064, "loss": 3.177, "step": 35633 }, { "epoch": 1.75, "grad_norm": 0.6367427110671997, "learning_rate": 0.00022346650553242695, "loss": 3.0012, "step": 35634 }, { "epoch": 1.75, "grad_norm": 0.6365235447883606, "learning_rate": 0.00022345161887807774, "loss": 3.0022, "step": 35635 }, { "epoch": 1.75, "grad_norm": 0.6165895462036133, "learning_rate": 0.00022343673242534244, "loss": 2.8973, "step": 35636 }, { "epoch": 1.75, "grad_norm": 0.6315513849258423, "learning_rate": 0.00022342184617425993, "loss": 2.887, "step": 35637 }, { "epoch": 1.75, "grad_norm": 0.6446321606636047, "learning_rate": 0.0002234069601248696, "loss": 3.0301, "step": 35638 }, { "epoch": 1.75, "grad_norm": 0.6023744940757751, "learning_rate": 0.00022339207427721049, "loss": 2.8683, "step": 35639 }, { "epoch": 1.75, "grad_norm": 0.6446340680122375, "learning_rate": 0.00022337718863132196, "loss": 3.1663, "step": 35640 }, { "epoch": 1.75, "grad_norm": 0.6004554629325867, "learning_rate": 0.00022336230318724325, "loss": 3.0679, "step": 35641 }, { "epoch": 1.75, "grad_norm": 0.6370178461074829, "learning_rate": 0.00022334741794501333, "loss": 2.9213, "step": 35642 }, { "epoch": 1.75, "grad_norm": 0.6428967118263245, "learning_rate": 0.0002233325329046717, "loss": 2.9016, "step": 35643 }, { "epoch": 1.75, "grad_norm": 0.685459554195404, "learning_rate": 0.0002233176480662574, "loss": 2.7608, "step": 35644 }, { "epoch": 1.75, "grad_norm": 0.6399477124214172, "learning_rate": 0.00022330276342980956, "loss": 2.9918, "step": 35645 }, { "epoch": 1.75, "grad_norm": 0.64689040184021, "learning_rate": 0.0002232878789953676, "loss": 3.0101, "step": 35646 }, { "epoch": 1.75, "grad_norm": 0.6764118671417236, "learning_rate": 0.00022327299476297054, "loss": 3.169, "step": 35647 }, { "epoch": 1.75, "grad_norm": 0.6111961603164673, "learning_rate": 0.00022325811073265774, "loss": 2.777, "step": 35648 }, { "epoch": 1.75, "grad_norm": 0.6330588459968567, "learning_rate": 0.00022324322690446815, "loss": 2.8875, "step": 35649 }, { "epoch": 1.75, "grad_norm": 0.6471860408782959, "learning_rate": 0.00022322834327844117, "loss": 3.055, "step": 35650 }, { "epoch": 1.75, "grad_norm": 0.6193934679031372, "learning_rate": 0.00022321345985461605, "loss": 2.9988, "step": 35651 }, { "epoch": 1.75, "grad_norm": 0.627863883972168, "learning_rate": 0.00022319857663303177, "loss": 3.1429, "step": 35652 }, { "epoch": 1.75, "grad_norm": 0.6523374915122986, "learning_rate": 0.0002231836936137278, "loss": 3.1287, "step": 35653 }, { "epoch": 1.75, "grad_norm": 0.6731512546539307, "learning_rate": 0.00022316881079674305, "loss": 2.9187, "step": 35654 }, { "epoch": 1.75, "grad_norm": 0.6700525879859924, "learning_rate": 0.00022315392818211694, "loss": 2.8942, "step": 35655 }, { "epoch": 1.75, "grad_norm": 0.6407337784767151, "learning_rate": 0.00022313904576988866, "loss": 3.2872, "step": 35656 }, { "epoch": 1.75, "grad_norm": 0.6617950797080994, "learning_rate": 0.00022312416356009722, "loss": 3.0562, "step": 35657 }, { "epoch": 1.75, "grad_norm": 0.6472514867782593, "learning_rate": 0.000223109281552782, "loss": 3.138, "step": 35658 }, { "epoch": 1.75, "grad_norm": 0.669858992099762, "learning_rate": 0.00022309439974798213, "loss": 2.8543, "step": 35659 }, { "epoch": 1.75, "grad_norm": 0.5877886414527893, "learning_rate": 0.00022307951814573676, "loss": 3.2045, "step": 35660 }, { "epoch": 1.75, "grad_norm": 0.5913372039794922, "learning_rate": 0.00022306463674608525, "loss": 3.2414, "step": 35661 }, { "epoch": 1.75, "grad_norm": 0.6737897992134094, "learning_rate": 0.0002230497555490666, "loss": 3.0833, "step": 35662 }, { "epoch": 1.75, "grad_norm": 0.6321114897727966, "learning_rate": 0.00022303487455472017, "loss": 2.9484, "step": 35663 }, { "epoch": 1.75, "grad_norm": 0.6263748407363892, "learning_rate": 0.00022301999376308492, "loss": 3.1179, "step": 35664 }, { "epoch": 1.75, "grad_norm": 0.7027498483657837, "learning_rate": 0.00022300511317420023, "loss": 2.8271, "step": 35665 }, { "epoch": 1.75, "grad_norm": 0.6153057813644409, "learning_rate": 0.0002229902327881054, "loss": 3.2247, "step": 35666 }, { "epoch": 1.75, "grad_norm": 0.6329478621482849, "learning_rate": 0.0002229753526048393, "loss": 2.8801, "step": 35667 }, { "epoch": 1.75, "grad_norm": 0.6193848848342896, "learning_rate": 0.0002229604726244414, "loss": 2.9691, "step": 35668 }, { "epoch": 1.75, "grad_norm": 0.6393882036209106, "learning_rate": 0.00022294559284695076, "loss": 2.9422, "step": 35669 }, { "epoch": 1.75, "grad_norm": 0.6423365473747253, "learning_rate": 0.00022293071327240656, "loss": 3.0597, "step": 35670 }, { "epoch": 1.75, "grad_norm": 0.6023625135421753, "learning_rate": 0.00022291583390084808, "loss": 2.9063, "step": 35671 }, { "epoch": 1.75, "grad_norm": 0.6385714411735535, "learning_rate": 0.00022290095473231444, "loss": 2.9283, "step": 35672 }, { "epoch": 1.75, "grad_norm": 0.6258054971694946, "learning_rate": 0.0002228860757668449, "loss": 3.0131, "step": 35673 }, { "epoch": 1.75, "grad_norm": 0.6036873459815979, "learning_rate": 0.0002228711970044785, "loss": 3.2195, "step": 35674 }, { "epoch": 1.75, "grad_norm": 0.6359200477600098, "learning_rate": 0.0002228563184452545, "loss": 3.214, "step": 35675 }, { "epoch": 1.75, "grad_norm": 0.6024333834648132, "learning_rate": 0.00022284144008921224, "loss": 2.9977, "step": 35676 }, { "epoch": 1.75, "grad_norm": 0.6681496500968933, "learning_rate": 0.00022282656193639067, "loss": 3.1888, "step": 35677 }, { "epoch": 1.75, "grad_norm": 0.6251303553581238, "learning_rate": 0.0002228116839868292, "loss": 2.8912, "step": 35678 }, { "epoch": 1.75, "grad_norm": 0.6392276287078857, "learning_rate": 0.00022279680624056678, "loss": 3.1644, "step": 35679 }, { "epoch": 1.75, "grad_norm": 0.6303515434265137, "learning_rate": 0.00022278192869764282, "loss": 3.0278, "step": 35680 }, { "epoch": 1.75, "grad_norm": 0.6725468635559082, "learning_rate": 0.00022276705135809631, "loss": 3.1027, "step": 35681 }, { "epoch": 1.75, "grad_norm": 0.6221615672111511, "learning_rate": 0.0002227521742219665, "loss": 3.0331, "step": 35682 }, { "epoch": 1.75, "grad_norm": 0.6534598469734192, "learning_rate": 0.00022273729728929272, "loss": 3.0542, "step": 35683 }, { "epoch": 1.75, "grad_norm": 0.6483638286590576, "learning_rate": 0.00022272242056011396, "loss": 2.9716, "step": 35684 }, { "epoch": 1.75, "grad_norm": 0.636474072933197, "learning_rate": 0.0002227075440344695, "loss": 3.0775, "step": 35685 }, { "epoch": 1.75, "grad_norm": 0.6173744201660156, "learning_rate": 0.00022269266771239842, "loss": 2.879, "step": 35686 }, { "epoch": 1.75, "grad_norm": 0.6293776631355286, "learning_rate": 0.00022267779159394, "loss": 3.0981, "step": 35687 }, { "epoch": 1.75, "grad_norm": 0.624398946762085, "learning_rate": 0.0002226629156791335, "loss": 2.7682, "step": 35688 }, { "epoch": 1.75, "grad_norm": 0.647941529750824, "learning_rate": 0.00022264803996801787, "loss": 3.103, "step": 35689 }, { "epoch": 1.75, "grad_norm": 0.6048708558082581, "learning_rate": 0.00022263316446063247, "loss": 2.9964, "step": 35690 }, { "epoch": 1.75, "grad_norm": 0.6444445848464966, "learning_rate": 0.00022261828915701642, "loss": 3.1067, "step": 35691 }, { "epoch": 1.75, "grad_norm": 0.6575992703437805, "learning_rate": 0.00022260341405720886, "loss": 3.024, "step": 35692 }, { "epoch": 1.75, "grad_norm": 0.6296408772468567, "learning_rate": 0.00022258853916124908, "loss": 3.1385, "step": 35693 }, { "epoch": 1.75, "grad_norm": 0.6155877709388733, "learning_rate": 0.00022257366446917616, "loss": 3.0299, "step": 35694 }, { "epoch": 1.75, "grad_norm": 0.6629244089126587, "learning_rate": 0.00022255878998102936, "loss": 2.886, "step": 35695 }, { "epoch": 1.75, "grad_norm": 0.6616221070289612, "learning_rate": 0.00022254391569684768, "loss": 3.066, "step": 35696 }, { "epoch": 1.75, "grad_norm": 0.630438506603241, "learning_rate": 0.00022252904161667043, "loss": 3.0545, "step": 35697 }, { "epoch": 1.75, "grad_norm": 0.6666058301925659, "learning_rate": 0.00022251416774053686, "loss": 2.9689, "step": 35698 }, { "epoch": 1.75, "grad_norm": 0.6253411173820496, "learning_rate": 0.00022249929406848609, "loss": 2.8768, "step": 35699 }, { "epoch": 1.75, "grad_norm": 0.6282232403755188, "learning_rate": 0.00022248442060055724, "loss": 2.9593, "step": 35700 }, { "epoch": 1.75, "grad_norm": 0.5989751219749451, "learning_rate": 0.0002224695473367894, "loss": 2.8903, "step": 35701 }, { "epoch": 1.75, "grad_norm": 0.6522663831710815, "learning_rate": 0.00022245467427722194, "loss": 2.9982, "step": 35702 }, { "epoch": 1.75, "grad_norm": 0.6247538328170776, "learning_rate": 0.00022243980142189397, "loss": 2.8845, "step": 35703 }, { "epoch": 1.75, "grad_norm": 0.6098034977912903, "learning_rate": 0.00022242492877084453, "loss": 2.941, "step": 35704 }, { "epoch": 1.75, "grad_norm": 0.6762757301330566, "learning_rate": 0.00022241005632411305, "loss": 3.0948, "step": 35705 }, { "epoch": 1.75, "grad_norm": 0.6396958827972412, "learning_rate": 0.00022239518408173842, "loss": 3.0555, "step": 35706 }, { "epoch": 1.75, "grad_norm": 0.6262756586074829, "learning_rate": 0.00022238031204375994, "loss": 3.0633, "step": 35707 }, { "epoch": 1.75, "grad_norm": 0.6355347037315369, "learning_rate": 0.0002223654402102169, "loss": 3.2732, "step": 35708 }, { "epoch": 1.75, "grad_norm": 0.6595379114151001, "learning_rate": 0.00022235056858114824, "loss": 2.9131, "step": 35709 }, { "epoch": 1.75, "grad_norm": 0.5768058896064758, "learning_rate": 0.00022233569715659335, "loss": 3.0761, "step": 35710 }, { "epoch": 1.75, "grad_norm": 0.6388674974441528, "learning_rate": 0.00022232082593659112, "loss": 3.2021, "step": 35711 }, { "epoch": 1.75, "grad_norm": 0.6107366681098938, "learning_rate": 0.00022230595492118098, "loss": 2.9706, "step": 35712 }, { "epoch": 1.75, "grad_norm": 0.6297752857208252, "learning_rate": 0.00022229108411040205, "loss": 3.1894, "step": 35713 }, { "epoch": 1.75, "grad_norm": 0.6354250311851501, "learning_rate": 0.00022227621350429333, "loss": 3.2159, "step": 35714 }, { "epoch": 1.75, "grad_norm": 0.6811100244522095, "learning_rate": 0.00022226134310289426, "loss": 3.0646, "step": 35715 }, { "epoch": 1.75, "grad_norm": 0.6282296776771545, "learning_rate": 0.00022224647290624365, "loss": 3.1562, "step": 35716 }, { "epoch": 1.75, "grad_norm": 0.6105203628540039, "learning_rate": 0.00022223160291438097, "loss": 3.07, "step": 35717 }, { "epoch": 1.75, "grad_norm": 0.5900524258613586, "learning_rate": 0.00022221673312734536, "loss": 3.1058, "step": 35718 }, { "epoch": 1.75, "grad_norm": 0.6298518180847168, "learning_rate": 0.00022220186354517576, "loss": 3.0704, "step": 35719 }, { "epoch": 1.75, "grad_norm": 0.6291689872741699, "learning_rate": 0.00022218699416791163, "loss": 2.9188, "step": 35720 }, { "epoch": 1.75, "grad_norm": 0.5981619358062744, "learning_rate": 0.00022217212499559192, "loss": 3.0848, "step": 35721 }, { "epoch": 1.75, "grad_norm": 0.6223424673080444, "learning_rate": 0.00022215725602825573, "loss": 2.9289, "step": 35722 }, { "epoch": 1.75, "grad_norm": 0.6437315344810486, "learning_rate": 0.00022214238726594253, "loss": 3.2998, "step": 35723 }, { "epoch": 1.75, "grad_norm": 0.6619381904602051, "learning_rate": 0.0002221275187086912, "loss": 2.8725, "step": 35724 }, { "epoch": 1.75, "grad_norm": 0.624908983707428, "learning_rate": 0.0002221126503565411, "loss": 3.1948, "step": 35725 }, { "epoch": 1.75, "grad_norm": 0.6466329097747803, "learning_rate": 0.00022209778220953115, "loss": 2.9206, "step": 35726 }, { "epoch": 1.75, "grad_norm": 0.6283143162727356, "learning_rate": 0.00022208291426770068, "loss": 3.0594, "step": 35727 }, { "epoch": 1.75, "grad_norm": 0.6625270843505859, "learning_rate": 0.00022206804653108895, "loss": 2.9291, "step": 35728 }, { "epoch": 1.75, "grad_norm": 0.6456392407417297, "learning_rate": 0.0002220531789997348, "loss": 2.9168, "step": 35729 }, { "epoch": 1.75, "grad_norm": 2.799959659576416, "learning_rate": 0.00022203831167367773, "loss": 3.0285, "step": 35730 }, { "epoch": 1.75, "grad_norm": 0.6306584477424622, "learning_rate": 0.00022202344455295663, "loss": 2.7562, "step": 35731 }, { "epoch": 1.75, "grad_norm": 0.7043753266334534, "learning_rate": 0.00022200857763761078, "loss": 2.8602, "step": 35732 }, { "epoch": 1.75, "grad_norm": 0.656856119632721, "learning_rate": 0.00022199371092767944, "loss": 2.9566, "step": 35733 }, { "epoch": 1.75, "grad_norm": 0.638849139213562, "learning_rate": 0.00022197884442320154, "loss": 2.9435, "step": 35734 }, { "epoch": 1.75, "grad_norm": 0.6164271831512451, "learning_rate": 0.00022196397812421646, "loss": 2.8935, "step": 35735 }, { "epoch": 1.75, "grad_norm": 0.6180526614189148, "learning_rate": 0.0002219491120307631, "loss": 3.05, "step": 35736 }, { "epoch": 1.75, "grad_norm": 0.6781109571456909, "learning_rate": 0.00022193424614288076, "loss": 3.0915, "step": 35737 }, { "epoch": 1.75, "grad_norm": 0.6297280788421631, "learning_rate": 0.0002219193804606087, "loss": 2.8888, "step": 35738 }, { "epoch": 1.75, "grad_norm": 0.6124048829078674, "learning_rate": 0.00022190451498398594, "loss": 3.0255, "step": 35739 }, { "epoch": 1.75, "grad_norm": 0.6476908922195435, "learning_rate": 0.00022188964971305169, "loss": 3.111, "step": 35740 }, { "epoch": 1.75, "grad_norm": 0.602753221988678, "learning_rate": 0.0002218747846478449, "loss": 3.2071, "step": 35741 }, { "epoch": 1.75, "grad_norm": 0.6379673480987549, "learning_rate": 0.00022185991978840503, "loss": 2.9443, "step": 35742 }, { "epoch": 1.75, "grad_norm": 0.675123393535614, "learning_rate": 0.00022184505513477115, "loss": 2.9927, "step": 35743 }, { "epoch": 1.75, "grad_norm": 0.615736186504364, "learning_rate": 0.0002218301906869822, "loss": 2.928, "step": 35744 }, { "epoch": 1.75, "grad_norm": 0.6254475116729736, "learning_rate": 0.0002218153264450776, "loss": 3.0137, "step": 35745 }, { "epoch": 1.75, "grad_norm": 0.5977519750595093, "learning_rate": 0.00022180046240909634, "loss": 2.7181, "step": 35746 }, { "epoch": 1.75, "grad_norm": 0.659858763217926, "learning_rate": 0.00022178559857907759, "loss": 3.1556, "step": 35747 }, { "epoch": 1.75, "grad_norm": 0.634911060333252, "learning_rate": 0.00022177073495506058, "loss": 3.1182, "step": 35748 }, { "epoch": 1.75, "grad_norm": 0.6552413105964661, "learning_rate": 0.00022175587153708438, "loss": 3.0402, "step": 35749 }, { "epoch": 1.75, "grad_norm": 0.5962386131286621, "learning_rate": 0.00022174100832518819, "loss": 3.092, "step": 35750 }, { "epoch": 1.75, "grad_norm": 0.6224254965782166, "learning_rate": 0.00022172614531941102, "loss": 2.9382, "step": 35751 }, { "epoch": 1.75, "grad_norm": 0.6392555236816406, "learning_rate": 0.00022171128251979218, "loss": 3.0492, "step": 35752 }, { "epoch": 1.75, "grad_norm": 0.6384632587432861, "learning_rate": 0.0002216964199263708, "loss": 3.043, "step": 35753 }, { "epoch": 1.75, "grad_norm": 0.6350359320640564, "learning_rate": 0.00022168155753918585, "loss": 3.0483, "step": 35754 }, { "epoch": 1.75, "grad_norm": 0.6259121298789978, "learning_rate": 0.00022166669535827674, "loss": 3.1861, "step": 35755 }, { "epoch": 1.75, "grad_norm": 0.6385367512702942, "learning_rate": 0.0002216518333836824, "loss": 2.9332, "step": 35756 }, { "epoch": 1.75, "grad_norm": 0.7472400069236755, "learning_rate": 0.00022163697161544217, "loss": 2.9411, "step": 35757 }, { "epoch": 1.75, "grad_norm": 0.6142936944961548, "learning_rate": 0.00022162211005359486, "loss": 3.0595, "step": 35758 }, { "epoch": 1.75, "grad_norm": 0.6409468054771423, "learning_rate": 0.0002216072486981799, "loss": 3.0586, "step": 35759 }, { "epoch": 1.75, "grad_norm": 0.6137709617614746, "learning_rate": 0.00022159238754923647, "loss": 2.9876, "step": 35760 }, { "epoch": 1.75, "grad_norm": 0.6863341331481934, "learning_rate": 0.00022157752660680353, "loss": 3.1946, "step": 35761 }, { "epoch": 1.75, "grad_norm": 0.615785539150238, "learning_rate": 0.00022156266587092038, "loss": 2.8858, "step": 35762 }, { "epoch": 1.75, "grad_norm": 0.6471198201179504, "learning_rate": 0.00022154780534162593, "loss": 3.07, "step": 35763 }, { "epoch": 1.75, "grad_norm": 0.6607215404510498, "learning_rate": 0.0002215329450189595, "loss": 3.1074, "step": 35764 }, { "epoch": 1.75, "grad_norm": 0.6380542516708374, "learning_rate": 0.0002215180849029603, "loss": 3.0175, "step": 35765 }, { "epoch": 1.75, "grad_norm": 0.5926660895347595, "learning_rate": 0.00022150322499366718, "loss": 3.1769, "step": 35766 }, { "epoch": 1.75, "grad_norm": 0.6166107058525085, "learning_rate": 0.00022148836529111964, "loss": 2.812, "step": 35767 }, { "epoch": 1.75, "grad_norm": 0.6104620695114136, "learning_rate": 0.00022147350579535652, "loss": 3.1454, "step": 35768 }, { "epoch": 1.75, "grad_norm": 0.6072012186050415, "learning_rate": 0.00022145864650641704, "loss": 3.0505, "step": 35769 }, { "epoch": 1.75, "grad_norm": 0.6567019820213318, "learning_rate": 0.00022144378742434054, "loss": 2.9977, "step": 35770 }, { "epoch": 1.75, "grad_norm": 0.6445237398147583, "learning_rate": 0.00022142892854916586, "loss": 2.9995, "step": 35771 }, { "epoch": 1.75, "grad_norm": 0.6039271354675293, "learning_rate": 0.0002214140698809323, "loss": 3.1029, "step": 35772 }, { "epoch": 1.75, "grad_norm": 0.6350820660591125, "learning_rate": 0.00022139921141967885, "loss": 2.942, "step": 35773 }, { "epoch": 1.75, "grad_norm": 0.6840111017227173, "learning_rate": 0.00022138435316544487, "loss": 2.9071, "step": 35774 }, { "epoch": 1.75, "grad_norm": 0.7613235712051392, "learning_rate": 0.00022136949511826942, "loss": 2.7632, "step": 35775 }, { "epoch": 1.75, "grad_norm": 0.651785671710968, "learning_rate": 0.00022135463727819142, "loss": 2.9729, "step": 35776 }, { "epoch": 1.75, "grad_norm": 0.62009596824646, "learning_rate": 0.00022133977964525033, "loss": 3.1663, "step": 35777 }, { "epoch": 1.75, "grad_norm": 0.6254829168319702, "learning_rate": 0.00022132492221948498, "loss": 2.9613, "step": 35778 }, { "epoch": 1.75, "grad_norm": 0.6415801048278809, "learning_rate": 0.00022131006500093468, "loss": 3.0607, "step": 35779 }, { "epoch": 1.75, "grad_norm": 0.6973337531089783, "learning_rate": 0.00022129520798963863, "loss": 2.6692, "step": 35780 }, { "epoch": 1.75, "grad_norm": 0.614383339881897, "learning_rate": 0.0002212803511856357, "loss": 2.8457, "step": 35781 }, { "epoch": 1.75, "grad_norm": 0.6090778708457947, "learning_rate": 0.00022126549458896535, "loss": 3.0376, "step": 35782 }, { "epoch": 1.75, "grad_norm": 0.6137256622314453, "learning_rate": 0.00022125063819966644, "loss": 2.8509, "step": 35783 }, { "epoch": 1.75, "grad_norm": 0.6132293939590454, "learning_rate": 0.0002212357820177781, "loss": 3.2059, "step": 35784 }, { "epoch": 1.75, "grad_norm": 0.6477642059326172, "learning_rate": 0.00022122092604333975, "loss": 3.0761, "step": 35785 }, { "epoch": 1.75, "grad_norm": 0.6460610032081604, "learning_rate": 0.00022120607027639017, "loss": 3.0912, "step": 35786 }, { "epoch": 1.75, "grad_norm": 0.628158450126648, "learning_rate": 0.00022119121471696874, "loss": 3.2221, "step": 35787 }, { "epoch": 1.75, "grad_norm": 0.6395934820175171, "learning_rate": 0.00022117635936511437, "loss": 2.687, "step": 35788 }, { "epoch": 1.75, "grad_norm": 0.6341500282287598, "learning_rate": 0.00022116150422086633, "loss": 2.9476, "step": 35789 }, { "epoch": 1.75, "grad_norm": 0.5902708172798157, "learning_rate": 0.0002211466492842638, "loss": 3.0437, "step": 35790 }, { "epoch": 1.75, "grad_norm": 0.6335721611976624, "learning_rate": 0.00022113179455534573, "loss": 2.9241, "step": 35791 }, { "epoch": 1.75, "grad_norm": 0.6263195872306824, "learning_rate": 0.00022111694003415145, "loss": 2.9259, "step": 35792 }, { "epoch": 1.75, "grad_norm": 0.6031926870346069, "learning_rate": 0.00022110208572071984, "loss": 2.9516, "step": 35793 }, { "epoch": 1.75, "grad_norm": 0.6146538257598877, "learning_rate": 0.00022108723161509014, "loss": 2.9278, "step": 35794 }, { "epoch": 1.75, "grad_norm": 0.6239001154899597, "learning_rate": 0.0002210723777173016, "loss": 2.8726, "step": 35795 }, { "epoch": 1.75, "grad_norm": 0.6321032047271729, "learning_rate": 0.00022105752402739313, "loss": 3.0842, "step": 35796 }, { "epoch": 1.75, "grad_norm": 0.6525657773017883, "learning_rate": 0.0002210426705454041, "loss": 2.8471, "step": 35797 }, { "epoch": 1.75, "grad_norm": 0.6774393320083618, "learning_rate": 0.00022102781727137327, "loss": 3.2696, "step": 35798 }, { "epoch": 1.75, "grad_norm": 0.7013652324676514, "learning_rate": 0.00022101296420534, "loss": 2.9296, "step": 35799 }, { "epoch": 1.75, "grad_norm": 0.6332024931907654, "learning_rate": 0.0002209981113473435, "loss": 2.9423, "step": 35800 }, { "epoch": 1.75, "grad_norm": 0.64900803565979, "learning_rate": 0.00022098325869742273, "loss": 2.9368, "step": 35801 }, { "epoch": 1.75, "grad_norm": 0.6335112452507019, "learning_rate": 0.0002209684062556169, "loss": 3.258, "step": 35802 }, { "epoch": 1.75, "grad_norm": 0.5994589328765869, "learning_rate": 0.00022095355402196494, "loss": 3.1889, "step": 35803 }, { "epoch": 1.75, "grad_norm": 0.6108216643333435, "learning_rate": 0.0002209387019965062, "loss": 3.0253, "step": 35804 }, { "epoch": 1.75, "grad_norm": 0.6158538460731506, "learning_rate": 0.00022092385017927977, "loss": 2.9247, "step": 35805 }, { "epoch": 1.75, "grad_norm": 0.6153451204299927, "learning_rate": 0.0002209089985703245, "loss": 3.0619, "step": 35806 }, { "epoch": 1.75, "grad_norm": 0.6863804459571838, "learning_rate": 0.00022089414716967992, "loss": 2.8898, "step": 35807 }, { "epoch": 1.75, "grad_norm": 0.6681492924690247, "learning_rate": 0.00022087929597738482, "loss": 2.9785, "step": 35808 }, { "epoch": 1.75, "grad_norm": 0.6478404998779297, "learning_rate": 0.00022086444499347835, "loss": 3.0399, "step": 35809 }, { "epoch": 1.75, "grad_norm": 0.6117250919342041, "learning_rate": 0.00022084959421799988, "loss": 3.1075, "step": 35810 }, { "epoch": 1.76, "grad_norm": 0.6180821657180786, "learning_rate": 0.00022083474365098825, "loss": 3.2999, "step": 35811 }, { "epoch": 1.76, "grad_norm": 0.6459303498268127, "learning_rate": 0.00022081989329248276, "loss": 2.9466, "step": 35812 }, { "epoch": 1.76, "grad_norm": 0.6436235904693604, "learning_rate": 0.00022080504314252227, "loss": 3.0516, "step": 35813 }, { "epoch": 1.76, "grad_norm": 0.627779483795166, "learning_rate": 0.0002207901932011461, "loss": 3.0361, "step": 35814 }, { "epoch": 1.76, "grad_norm": 0.5869961977005005, "learning_rate": 0.00022077534346839344, "loss": 2.9429, "step": 35815 }, { "epoch": 1.76, "grad_norm": 0.6305521726608276, "learning_rate": 0.0002207604939443031, "loss": 3.2122, "step": 35816 }, { "epoch": 1.76, "grad_norm": 0.621493399143219, "learning_rate": 0.00022074564462891454, "loss": 2.9271, "step": 35817 }, { "epoch": 1.76, "grad_norm": 0.6501767039299011, "learning_rate": 0.0002207307955222666, "loss": 2.8088, "step": 35818 }, { "epoch": 1.76, "grad_norm": 0.6253821849822998, "learning_rate": 0.00022071594662439846, "loss": 3.0041, "step": 35819 }, { "epoch": 1.76, "grad_norm": 0.6286084651947021, "learning_rate": 0.00022070109793534935, "loss": 2.8641, "step": 35820 }, { "epoch": 1.76, "grad_norm": 0.8458594083786011, "learning_rate": 0.00022068624945515818, "loss": 3.1558, "step": 35821 }, { "epoch": 1.76, "grad_norm": 0.6667223572731018, "learning_rate": 0.00022067140118386432, "loss": 2.9657, "step": 35822 }, { "epoch": 1.76, "grad_norm": 0.6399785280227661, "learning_rate": 0.00022065655312150664, "loss": 2.9304, "step": 35823 }, { "epoch": 1.76, "grad_norm": 0.6804106831550598, "learning_rate": 0.00022064170526812425, "loss": 3.1718, "step": 35824 }, { "epoch": 1.76, "grad_norm": 0.6263512969017029, "learning_rate": 0.00022062685762375646, "loss": 2.7909, "step": 35825 }, { "epoch": 1.76, "grad_norm": 0.6424779891967773, "learning_rate": 0.00022061201018844218, "loss": 3.1985, "step": 35826 }, { "epoch": 1.76, "grad_norm": 0.7617523074150085, "learning_rate": 0.00022059716296222065, "loss": 2.9046, "step": 35827 }, { "epoch": 1.76, "grad_norm": 0.6302613019943237, "learning_rate": 0.0002205823159451308, "loss": 3.043, "step": 35828 }, { "epoch": 1.76, "grad_norm": 0.6046407222747803, "learning_rate": 0.00022056746913721192, "loss": 3.1451, "step": 35829 }, { "epoch": 1.76, "grad_norm": 0.634103536605835, "learning_rate": 0.0002205526225385031, "loss": 3.1131, "step": 35830 }, { "epoch": 1.76, "grad_norm": 0.6643496155738831, "learning_rate": 0.0002205377761490432, "loss": 3.097, "step": 35831 }, { "epoch": 1.76, "grad_norm": 0.6304755806922913, "learning_rate": 0.00022052292996887173, "loss": 3.1338, "step": 35832 }, { "epoch": 1.76, "grad_norm": 0.6238082051277161, "learning_rate": 0.00022050808399802743, "loss": 2.871, "step": 35833 }, { "epoch": 1.76, "grad_norm": 0.6616991758346558, "learning_rate": 0.00022049323823654962, "loss": 3.0218, "step": 35834 }, { "epoch": 1.76, "grad_norm": 0.6222161650657654, "learning_rate": 0.0002204783926844772, "loss": 3.1102, "step": 35835 }, { "epoch": 1.76, "grad_norm": 0.6417911648750305, "learning_rate": 0.00022046354734184945, "loss": 2.897, "step": 35836 }, { "epoch": 1.76, "grad_norm": 0.5990554094314575, "learning_rate": 0.00022044870220870545, "loss": 2.9511, "step": 35837 }, { "epoch": 1.76, "grad_norm": 0.5991533994674683, "learning_rate": 0.00022043385728508417, "loss": 2.89, "step": 35838 }, { "epoch": 1.76, "grad_norm": 0.6272563934326172, "learning_rate": 0.0002204190125710249, "loss": 3.0124, "step": 35839 }, { "epoch": 1.76, "grad_norm": 0.6233096122741699, "learning_rate": 0.00022040416806656652, "loss": 3.1823, "step": 35840 }, { "epoch": 1.76, "grad_norm": 0.6353172659873962, "learning_rate": 0.0002203893237717483, "loss": 2.8572, "step": 35841 }, { "epoch": 1.76, "grad_norm": 0.6314014792442322, "learning_rate": 0.00022037447968660935, "loss": 2.7837, "step": 35842 }, { "epoch": 1.76, "grad_norm": 0.6511485576629639, "learning_rate": 0.00022035963581118855, "loss": 3.0148, "step": 35843 }, { "epoch": 1.76, "grad_norm": 0.6228102445602417, "learning_rate": 0.00022034479214552528, "loss": 2.9736, "step": 35844 }, { "epoch": 1.76, "grad_norm": 0.5977052450180054, "learning_rate": 0.00022032994868965844, "loss": 3.0123, "step": 35845 }, { "epoch": 1.76, "grad_norm": 0.6566368341445923, "learning_rate": 0.0002203151054436271, "loss": 3.0192, "step": 35846 }, { "epoch": 1.76, "grad_norm": 0.620186984539032, "learning_rate": 0.00022030026240747058, "loss": 3.0604, "step": 35847 }, { "epoch": 1.76, "grad_norm": 0.6430531144142151, "learning_rate": 0.00022028541958122775, "loss": 3.0086, "step": 35848 }, { "epoch": 1.76, "grad_norm": 0.6357125639915466, "learning_rate": 0.00022027057696493785, "loss": 2.8469, "step": 35849 }, { "epoch": 1.76, "grad_norm": 0.6521148085594177, "learning_rate": 0.0002202557345586398, "loss": 2.7497, "step": 35850 }, { "epoch": 1.76, "grad_norm": 0.6499366164207458, "learning_rate": 0.00022024089236237287, "loss": 3.2067, "step": 35851 }, { "epoch": 1.76, "grad_norm": 0.6402422785758972, "learning_rate": 0.00022022605037617612, "loss": 3.1193, "step": 35852 }, { "epoch": 1.76, "grad_norm": 0.6213492155075073, "learning_rate": 0.00022021120860008848, "loss": 3.0745, "step": 35853 }, { "epoch": 1.76, "grad_norm": 0.6546719074249268, "learning_rate": 0.00022019636703414928, "loss": 3.1135, "step": 35854 }, { "epoch": 1.76, "grad_norm": 0.627903163433075, "learning_rate": 0.00022018152567839747, "loss": 3.1131, "step": 35855 }, { "epoch": 1.76, "grad_norm": 0.628490686416626, "learning_rate": 0.00022016668453287203, "loss": 2.8469, "step": 35856 }, { "epoch": 1.76, "grad_norm": 0.6247190833091736, "learning_rate": 0.00022015184359761237, "loss": 3.0752, "step": 35857 }, { "epoch": 1.76, "grad_norm": 0.6584323048591614, "learning_rate": 0.00022013700287265727, "loss": 2.7668, "step": 35858 }, { "epoch": 1.76, "grad_norm": 0.6163456439971924, "learning_rate": 0.000220122162358046, "loss": 3.1157, "step": 35859 }, { "epoch": 1.76, "grad_norm": 0.6366605162620544, "learning_rate": 0.0002201073220538175, "loss": 2.9302, "step": 35860 }, { "epoch": 1.76, "grad_norm": 0.7113758325576782, "learning_rate": 0.0002200924819600109, "loss": 3.1147, "step": 35861 }, { "epoch": 1.76, "grad_norm": 0.5954180955886841, "learning_rate": 0.00022007764207666546, "loss": 2.9149, "step": 35862 }, { "epoch": 1.76, "grad_norm": 0.6021776795387268, "learning_rate": 0.00022006280240382007, "loss": 3.0365, "step": 35863 }, { "epoch": 1.76, "grad_norm": 0.617344856262207, "learning_rate": 0.00022004796294151392, "loss": 3.2413, "step": 35864 }, { "epoch": 1.76, "grad_norm": 0.6328892707824707, "learning_rate": 0.00022003312368978597, "loss": 3.1775, "step": 35865 }, { "epoch": 1.76, "grad_norm": 0.6340292096138, "learning_rate": 0.0002200182846486754, "loss": 2.7447, "step": 35866 }, { "epoch": 1.76, "grad_norm": 0.6463967561721802, "learning_rate": 0.00022000344581822137, "loss": 2.9128, "step": 35867 }, { "epoch": 1.76, "grad_norm": 0.6408669352531433, "learning_rate": 0.0002199886071984627, "loss": 3.0877, "step": 35868 }, { "epoch": 1.76, "grad_norm": 0.6117002367973328, "learning_rate": 0.0002199737687894388, "loss": 3.2649, "step": 35869 }, { "epoch": 1.76, "grad_norm": 0.6672056913375854, "learning_rate": 0.0002199589305911885, "loss": 2.74, "step": 35870 }, { "epoch": 1.76, "grad_norm": 0.6419618129730225, "learning_rate": 0.0002199440926037509, "loss": 2.9445, "step": 35871 }, { "epoch": 1.76, "grad_norm": 1.165690302848816, "learning_rate": 0.00021992925482716534, "loss": 3.0519, "step": 35872 }, { "epoch": 1.76, "grad_norm": 0.6113609671592712, "learning_rate": 0.0002199144172614706, "loss": 3.0843, "step": 35873 }, { "epoch": 1.76, "grad_norm": 0.6271687746047974, "learning_rate": 0.00021989957990670595, "loss": 2.9394, "step": 35874 }, { "epoch": 1.76, "grad_norm": 0.6276607513427734, "learning_rate": 0.00021988474276291028, "loss": 3.0057, "step": 35875 }, { "epoch": 1.76, "grad_norm": 0.6973236203193665, "learning_rate": 0.00021986990583012283, "loss": 2.699, "step": 35876 }, { "epoch": 1.76, "grad_norm": 0.6413176655769348, "learning_rate": 0.00021985506910838267, "loss": 3.0356, "step": 35877 }, { "epoch": 1.76, "grad_norm": 0.6662982106208801, "learning_rate": 0.00021984023259772874, "loss": 2.9587, "step": 35878 }, { "epoch": 1.76, "grad_norm": 0.6634508967399597, "learning_rate": 0.0002198253962982003, "loss": 2.69, "step": 35879 }, { "epoch": 1.76, "grad_norm": 0.6144524812698364, "learning_rate": 0.00021981056020983623, "loss": 2.9196, "step": 35880 }, { "epoch": 1.76, "grad_norm": 0.6477610468864441, "learning_rate": 0.0002197957243326758, "loss": 3.0727, "step": 35881 }, { "epoch": 1.76, "grad_norm": 0.6386900544166565, "learning_rate": 0.00021978088866675803, "loss": 2.9197, "step": 35882 }, { "epoch": 1.76, "grad_norm": 0.6349610686302185, "learning_rate": 0.00021976605321212184, "loss": 3.2623, "step": 35883 }, { "epoch": 1.76, "grad_norm": 0.6554539799690247, "learning_rate": 0.00021975121796880657, "loss": 3.2208, "step": 35884 }, { "epoch": 1.76, "grad_norm": 0.6648715138435364, "learning_rate": 0.00021973638293685104, "loss": 2.9701, "step": 35885 }, { "epoch": 1.76, "grad_norm": 0.6611798405647278, "learning_rate": 0.00021972154811629438, "loss": 3.0644, "step": 35886 }, { "epoch": 1.76, "grad_norm": 0.6232627034187317, "learning_rate": 0.00021970671350717588, "loss": 3.0797, "step": 35887 }, { "epoch": 1.76, "grad_norm": 0.6150153875350952, "learning_rate": 0.00021969187910953435, "loss": 2.9063, "step": 35888 }, { "epoch": 1.76, "grad_norm": 0.6366429328918457, "learning_rate": 0.000219677044923409, "loss": 2.9047, "step": 35889 }, { "epoch": 1.76, "grad_norm": 0.6754364967346191, "learning_rate": 0.00021966221094883877, "loss": 3.1375, "step": 35890 }, { "epoch": 1.76, "grad_norm": 0.5876303315162659, "learning_rate": 0.00021964737718586287, "loss": 2.973, "step": 35891 }, { "epoch": 1.76, "grad_norm": 0.7163461446762085, "learning_rate": 0.00021963254363452036, "loss": 2.9597, "step": 35892 }, { "epoch": 1.76, "grad_norm": 0.6201485991477966, "learning_rate": 0.0002196177102948501, "loss": 2.992, "step": 35893 }, { "epoch": 1.76, "grad_norm": 0.6001695990562439, "learning_rate": 0.0002196028771668915, "loss": 3.0862, "step": 35894 }, { "epoch": 1.76, "grad_norm": 0.6142009496688843, "learning_rate": 0.0002195880442506834, "loss": 2.9972, "step": 35895 }, { "epoch": 1.76, "grad_norm": 0.6313881278038025, "learning_rate": 0.00021957321154626483, "loss": 3.204, "step": 35896 }, { "epoch": 1.76, "grad_norm": 0.6144121885299683, "learning_rate": 0.00021955837905367507, "loss": 2.8235, "step": 35897 }, { "epoch": 1.76, "grad_norm": 0.6347367763519287, "learning_rate": 0.00021954354677295297, "loss": 2.9528, "step": 35898 }, { "epoch": 1.76, "grad_norm": 0.655368447303772, "learning_rate": 0.00021952871470413778, "loss": 2.9214, "step": 35899 }, { "epoch": 1.76, "grad_norm": 0.6603178977966309, "learning_rate": 0.00021951388284726834, "loss": 2.9844, "step": 35900 }, { "epoch": 1.76, "grad_norm": 0.6305208802223206, "learning_rate": 0.00021949905120238384, "loss": 2.933, "step": 35901 }, { "epoch": 1.76, "grad_norm": 0.6413267850875854, "learning_rate": 0.00021948421976952345, "loss": 2.8795, "step": 35902 }, { "epoch": 1.76, "grad_norm": 0.6816225647926331, "learning_rate": 0.00021946938854872607, "loss": 3.0087, "step": 35903 }, { "epoch": 1.76, "grad_norm": 0.6837611794471741, "learning_rate": 0.00021945455754003088, "loss": 2.9229, "step": 35904 }, { "epoch": 1.76, "grad_norm": 0.7396782040596008, "learning_rate": 0.0002194397267434768, "loss": 3.0543, "step": 35905 }, { "epoch": 1.76, "grad_norm": 0.6411307454109192, "learning_rate": 0.00021942489615910303, "loss": 2.8873, "step": 35906 }, { "epoch": 1.76, "grad_norm": 0.6512781381607056, "learning_rate": 0.00021941006578694863, "loss": 3.1282, "step": 35907 }, { "epoch": 1.76, "grad_norm": 0.6359968185424805, "learning_rate": 0.00021939523562705242, "loss": 3.0821, "step": 35908 }, { "epoch": 1.76, "grad_norm": 0.6434915661811829, "learning_rate": 0.00021938040567945387, "loss": 3.2044, "step": 35909 }, { "epoch": 1.76, "grad_norm": 0.6505936980247498, "learning_rate": 0.0002193655759441917, "loss": 2.8933, "step": 35910 }, { "epoch": 1.76, "grad_norm": 0.6163533926010132, "learning_rate": 0.00021935074642130515, "loss": 2.9099, "step": 35911 }, { "epoch": 1.76, "grad_norm": 0.6336808800697327, "learning_rate": 0.0002193359171108331, "loss": 2.9613, "step": 35912 }, { "epoch": 1.76, "grad_norm": 0.6180527806282043, "learning_rate": 0.00021932108801281478, "loss": 3.2311, "step": 35913 }, { "epoch": 1.76, "grad_norm": 0.6565324068069458, "learning_rate": 0.00021930625912728926, "loss": 3.0751, "step": 35914 }, { "epoch": 1.76, "grad_norm": 0.6686374545097351, "learning_rate": 0.00021929143045429535, "loss": 3.0744, "step": 35915 }, { "epoch": 1.76, "grad_norm": 0.6567208170890808, "learning_rate": 0.00021927660199387247, "loss": 3.1457, "step": 35916 }, { "epoch": 1.76, "grad_norm": 0.6428233981132507, "learning_rate": 0.00021926177374605938, "loss": 3.1215, "step": 35917 }, { "epoch": 1.76, "grad_norm": 0.6695310473442078, "learning_rate": 0.00021924694571089518, "loss": 3.1795, "step": 35918 }, { "epoch": 1.76, "grad_norm": 0.6748586297035217, "learning_rate": 0.0002192321178884191, "loss": 3.2222, "step": 35919 }, { "epoch": 1.76, "grad_norm": 0.6294280290603638, "learning_rate": 0.0002192172902786701, "loss": 2.8973, "step": 35920 }, { "epoch": 1.76, "grad_norm": 0.6525007486343384, "learning_rate": 0.00021920246288168718, "loss": 2.9294, "step": 35921 }, { "epoch": 1.76, "grad_norm": 0.6621073484420776, "learning_rate": 0.00021918763569750934, "loss": 3.0736, "step": 35922 }, { "epoch": 1.76, "grad_norm": 0.620073139667511, "learning_rate": 0.00021917280872617566, "loss": 3.2611, "step": 35923 }, { "epoch": 1.76, "grad_norm": 0.641943633556366, "learning_rate": 0.00021915798196772544, "loss": 2.9386, "step": 35924 }, { "epoch": 1.76, "grad_norm": 0.6125879287719727, "learning_rate": 0.00021914315542219742, "loss": 2.8696, "step": 35925 }, { "epoch": 1.76, "grad_norm": 0.6328999996185303, "learning_rate": 0.0002191283290896309, "loss": 2.9842, "step": 35926 }, { "epoch": 1.76, "grad_norm": 0.582988977432251, "learning_rate": 0.00021911350297006458, "loss": 2.8296, "step": 35927 }, { "epoch": 1.76, "grad_norm": 0.6015204787254333, "learning_rate": 0.00021909867706353787, "loss": 3.0296, "step": 35928 }, { "epoch": 1.76, "grad_norm": 0.6688986420631409, "learning_rate": 0.00021908385137008968, "loss": 2.8411, "step": 35929 }, { "epoch": 1.76, "grad_norm": 0.6483864188194275, "learning_rate": 0.00021906902588975896, "loss": 3.0242, "step": 35930 }, { "epoch": 1.76, "grad_norm": 0.6931516528129578, "learning_rate": 0.00021905420062258497, "loss": 2.9332, "step": 35931 }, { "epoch": 1.76, "grad_norm": 0.6430986523628235, "learning_rate": 0.00021903937556860657, "loss": 2.9444, "step": 35932 }, { "epoch": 1.76, "grad_norm": 0.6340141296386719, "learning_rate": 0.00021902455072786282, "loss": 2.7925, "step": 35933 }, { "epoch": 1.76, "grad_norm": 0.608032763004303, "learning_rate": 0.0002190097261003929, "loss": 3.0971, "step": 35934 }, { "epoch": 1.76, "grad_norm": 0.6480880379676819, "learning_rate": 0.0002189949016862358, "loss": 2.8061, "step": 35935 }, { "epoch": 1.76, "grad_norm": 0.6553376317024231, "learning_rate": 0.00021898007748543053, "loss": 3.0343, "step": 35936 }, { "epoch": 1.76, "grad_norm": 0.6663204431533813, "learning_rate": 0.00021896525349801606, "loss": 3.0759, "step": 35937 }, { "epoch": 1.76, "grad_norm": 0.6248182654380798, "learning_rate": 0.00021895042972403154, "loss": 2.7778, "step": 35938 }, { "epoch": 1.76, "grad_norm": 0.6360815167427063, "learning_rate": 0.00021893560616351607, "loss": 2.9929, "step": 35939 }, { "epoch": 1.76, "grad_norm": 0.6211484670639038, "learning_rate": 0.00021892078281650847, "loss": 3.0523, "step": 35940 }, { "epoch": 1.76, "grad_norm": 0.6059033274650574, "learning_rate": 0.00021890595968304807, "loss": 2.8255, "step": 35941 }, { "epoch": 1.76, "grad_norm": 0.6170439124107361, "learning_rate": 0.00021889113676317365, "loss": 3.0655, "step": 35942 }, { "epoch": 1.76, "grad_norm": 0.6702554821968079, "learning_rate": 0.00021887631405692444, "loss": 3.0742, "step": 35943 }, { "epoch": 1.76, "grad_norm": 0.6257311105728149, "learning_rate": 0.00021886149156433944, "loss": 2.9236, "step": 35944 }, { "epoch": 1.76, "grad_norm": 0.6383312344551086, "learning_rate": 0.0002188466692854575, "loss": 3.0114, "step": 35945 }, { "epoch": 1.76, "grad_norm": 0.6520895957946777, "learning_rate": 0.000218831847220318, "loss": 3.0147, "step": 35946 }, { "epoch": 1.76, "grad_norm": 0.6120529174804688, "learning_rate": 0.0002188170253689597, "loss": 2.9981, "step": 35947 }, { "epoch": 1.76, "grad_norm": 0.6751158237457275, "learning_rate": 0.0002188022037314217, "loss": 3.0975, "step": 35948 }, { "epoch": 1.76, "grad_norm": 0.6635515689849854, "learning_rate": 0.00021878738230774315, "loss": 2.8741, "step": 35949 }, { "epoch": 1.76, "grad_norm": 0.6444944143295288, "learning_rate": 0.00021877256109796299, "loss": 2.788, "step": 35950 }, { "epoch": 1.76, "grad_norm": 0.6126725673675537, "learning_rate": 0.00021875774010212027, "loss": 2.8899, "step": 35951 }, { "epoch": 1.76, "grad_norm": 0.6406420469284058, "learning_rate": 0.00021874291932025394, "loss": 3.0294, "step": 35952 }, { "epoch": 1.76, "grad_norm": 0.6252112984657288, "learning_rate": 0.00021872809875240318, "loss": 3.0093, "step": 35953 }, { "epoch": 1.76, "grad_norm": 0.6004651188850403, "learning_rate": 0.00021871327839860703, "loss": 3.0745, "step": 35954 }, { "epoch": 1.76, "grad_norm": 0.6479066014289856, "learning_rate": 0.00021869845825890435, "loss": 3.2259, "step": 35955 }, { "epoch": 1.76, "grad_norm": 0.6312503218650818, "learning_rate": 0.00021868363833333439, "loss": 3.0985, "step": 35956 }, { "epoch": 1.76, "grad_norm": 0.6492436528205872, "learning_rate": 0.00021866881862193604, "loss": 3.0955, "step": 35957 }, { "epoch": 1.76, "grad_norm": 0.6168732643127441, "learning_rate": 0.00021865399912474824, "loss": 2.8809, "step": 35958 }, { "epoch": 1.76, "grad_norm": 0.7132405638694763, "learning_rate": 0.00021863917984181035, "loss": 3.2223, "step": 35959 }, { "epoch": 1.76, "grad_norm": 0.6588032245635986, "learning_rate": 0.00021862436077316113, "loss": 3.2283, "step": 35960 }, { "epoch": 1.76, "grad_norm": 0.8370882272720337, "learning_rate": 0.00021860954191883973, "loss": 2.8941, "step": 35961 }, { "epoch": 1.76, "grad_norm": 0.634719729423523, "learning_rate": 0.00021859472327888496, "loss": 3.1808, "step": 35962 }, { "epoch": 1.76, "grad_norm": 0.6163418292999268, "learning_rate": 0.00021857990485333608, "loss": 2.9578, "step": 35963 }, { "epoch": 1.76, "grad_norm": 0.6694480180740356, "learning_rate": 0.00021856508664223217, "loss": 2.8498, "step": 35964 }, { "epoch": 1.76, "grad_norm": 0.6347652673721313, "learning_rate": 0.0002185502686456121, "loss": 2.8425, "step": 35965 }, { "epoch": 1.76, "grad_norm": 0.6317447423934937, "learning_rate": 0.000218535450863515, "loss": 3.0694, "step": 35966 }, { "epoch": 1.76, "grad_norm": 0.6160470843315125, "learning_rate": 0.00021852063329597972, "loss": 2.9192, "step": 35967 }, { "epoch": 1.76, "grad_norm": 0.5880303978919983, "learning_rate": 0.00021850581594304547, "loss": 2.6224, "step": 35968 }, { "epoch": 1.76, "grad_norm": 0.7462405562400818, "learning_rate": 0.00021849099880475126, "loss": 3.0498, "step": 35969 }, { "epoch": 1.76, "grad_norm": 0.6336656212806702, "learning_rate": 0.000218476181881136, "loss": 3.2386, "step": 35970 }, { "epoch": 1.76, "grad_norm": 0.638992965221405, "learning_rate": 0.0002184613651722389, "loss": 2.9897, "step": 35971 }, { "epoch": 1.76, "grad_norm": 0.6258759498596191, "learning_rate": 0.00021844654867809877, "loss": 3.0165, "step": 35972 }, { "epoch": 1.76, "grad_norm": 0.6453997492790222, "learning_rate": 0.00021843173239875468, "loss": 2.7978, "step": 35973 }, { "epoch": 1.76, "grad_norm": 0.6440539360046387, "learning_rate": 0.00021841691633424587, "loss": 2.8896, "step": 35974 }, { "epoch": 1.76, "grad_norm": 0.6205512285232544, "learning_rate": 0.0002184021004846111, "loss": 3.1415, "step": 35975 }, { "epoch": 1.76, "grad_norm": 0.6318713426589966, "learning_rate": 0.00021838728484988956, "loss": 2.9567, "step": 35976 }, { "epoch": 1.76, "grad_norm": 0.6605975031852722, "learning_rate": 0.0002183724694301201, "loss": 2.9058, "step": 35977 }, { "epoch": 1.76, "grad_norm": 0.6602401733398438, "learning_rate": 0.00021835765422534192, "loss": 2.8578, "step": 35978 }, { "epoch": 1.76, "grad_norm": 0.6470891237258911, "learning_rate": 0.000218342839235594, "loss": 2.8326, "step": 35979 }, { "epoch": 1.76, "grad_norm": 0.6551663279533386, "learning_rate": 0.00021832802446091523, "loss": 3.1398, "step": 35980 }, { "epoch": 1.76, "grad_norm": 0.6770197153091431, "learning_rate": 0.00021831320990134485, "loss": 2.9125, "step": 35981 }, { "epoch": 1.76, "grad_norm": 0.6041737794876099, "learning_rate": 0.00021829839555692165, "loss": 3.1245, "step": 35982 }, { "epoch": 1.76, "grad_norm": 0.6864397525787354, "learning_rate": 0.0002182835814276847, "loss": 2.9672, "step": 35983 }, { "epoch": 1.76, "grad_norm": 0.628705620765686, "learning_rate": 0.00021826876751367323, "loss": 2.9742, "step": 35984 }, { "epoch": 1.76, "grad_norm": 0.6601821184158325, "learning_rate": 0.00021825395381492593, "loss": 2.941, "step": 35985 }, { "epoch": 1.76, "grad_norm": 0.679912269115448, "learning_rate": 0.00021823914033148215, "loss": 3.0776, "step": 35986 }, { "epoch": 1.76, "grad_norm": 0.7171043753623962, "learning_rate": 0.00021822432706338065, "loss": 3.0341, "step": 35987 }, { "epoch": 1.76, "grad_norm": 0.6408360600471497, "learning_rate": 0.0002182095140106605, "loss": 2.934, "step": 35988 }, { "epoch": 1.76, "grad_norm": 0.6534038186073303, "learning_rate": 0.00021819470117336083, "loss": 2.9033, "step": 35989 }, { "epoch": 1.76, "grad_norm": 0.6343661546707153, "learning_rate": 0.00021817988855152051, "loss": 3.1754, "step": 35990 }, { "epoch": 1.76, "grad_norm": 0.6617305278778076, "learning_rate": 0.00021816507614517874, "loss": 3.0125, "step": 35991 }, { "epoch": 1.76, "grad_norm": 0.636042594909668, "learning_rate": 0.00021815026395437422, "loss": 3.2112, "step": 35992 }, { "epoch": 1.76, "grad_norm": 0.6248968243598938, "learning_rate": 0.0002181354519791463, "loss": 2.7923, "step": 35993 }, { "epoch": 1.76, "grad_norm": 0.6503058671951294, "learning_rate": 0.0002181206402195338, "loss": 2.963, "step": 35994 }, { "epoch": 1.76, "grad_norm": 0.6492918133735657, "learning_rate": 0.0002181058286755757, "loss": 2.9537, "step": 35995 }, { "epoch": 1.76, "grad_norm": 0.6048479676246643, "learning_rate": 0.0002180910173473112, "loss": 2.8097, "step": 35996 }, { "epoch": 1.76, "grad_norm": 0.6944062113761902, "learning_rate": 0.00021807620623477916, "loss": 2.8929, "step": 35997 }, { "epoch": 1.76, "grad_norm": 0.6303960084915161, "learning_rate": 0.00021806139533801867, "loss": 3.0977, "step": 35998 }, { "epoch": 1.76, "grad_norm": 0.6644660234451294, "learning_rate": 0.00021804658465706853, "loss": 2.9469, "step": 35999 }, { "epoch": 1.76, "grad_norm": 0.5891596078872681, "learning_rate": 0.00021803177419196806, "loss": 3.255, "step": 36000 }, { "epoch": 1.76, "grad_norm": 0.6277598142623901, "learning_rate": 0.00021801696394275615, "loss": 3.1176, "step": 36001 }, { "epoch": 1.76, "grad_norm": 0.6298516392707825, "learning_rate": 0.00021800215390947164, "loss": 2.9568, "step": 36002 }, { "epoch": 1.76, "grad_norm": 0.6497096419334412, "learning_rate": 0.0002179873440921538, "loss": 2.9517, "step": 36003 }, { "epoch": 1.76, "grad_norm": 0.6442708969116211, "learning_rate": 0.00021797253449084143, "loss": 3.0019, "step": 36004 }, { "epoch": 1.76, "grad_norm": 0.6389861702919006, "learning_rate": 0.00021795772510557363, "loss": 2.8269, "step": 36005 }, { "epoch": 1.76, "grad_norm": 0.6288984417915344, "learning_rate": 0.00021794291593638947, "loss": 2.7268, "step": 36006 }, { "epoch": 1.76, "grad_norm": 0.6967528462409973, "learning_rate": 0.00021792810698332773, "loss": 2.8301, "step": 36007 }, { "epoch": 1.76, "grad_norm": 0.7129323482513428, "learning_rate": 0.00021791329824642774, "loss": 3.0183, "step": 36008 }, { "epoch": 1.76, "grad_norm": 1.0993932485580444, "learning_rate": 0.0002178984897257282, "loss": 2.9254, "step": 36009 }, { "epoch": 1.76, "grad_norm": 0.6599279642105103, "learning_rate": 0.00021788368142126822, "loss": 2.9239, "step": 36010 }, { "epoch": 1.76, "grad_norm": 0.6514578461647034, "learning_rate": 0.00021786887333308694, "loss": 2.9384, "step": 36011 }, { "epoch": 1.76, "grad_norm": 0.6423161029815674, "learning_rate": 0.00021785406546122316, "loss": 3.0236, "step": 36012 }, { "epoch": 1.76, "grad_norm": 0.6230955719947815, "learning_rate": 0.00021783925780571604, "loss": 3.0301, "step": 36013 }, { "epoch": 1.76, "grad_norm": 0.6230641007423401, "learning_rate": 0.00021782445036660437, "loss": 2.9346, "step": 36014 }, { "epoch": 1.77, "grad_norm": 0.6333297491073608, "learning_rate": 0.00021780964314392734, "loss": 2.7931, "step": 36015 }, { "epoch": 1.77, "grad_norm": 0.6389443874359131, "learning_rate": 0.000217794836137724, "loss": 2.7033, "step": 36016 }, { "epoch": 1.77, "grad_norm": 0.6404136419296265, "learning_rate": 0.00021778002934803306, "loss": 2.8099, "step": 36017 }, { "epoch": 1.77, "grad_norm": 0.6511685848236084, "learning_rate": 0.00021776522277489386, "loss": 3.1501, "step": 36018 }, { "epoch": 1.77, "grad_norm": 0.6114490628242493, "learning_rate": 0.00021775041641834513, "loss": 2.9718, "step": 36019 }, { "epoch": 1.77, "grad_norm": 0.6275057792663574, "learning_rate": 0.00021773561027842595, "loss": 2.9344, "step": 36020 }, { "epoch": 1.77, "grad_norm": 0.6325558423995972, "learning_rate": 0.00021772080435517545, "loss": 3.1454, "step": 36021 }, { "epoch": 1.77, "grad_norm": 0.6032198667526245, "learning_rate": 0.00021770599864863246, "loss": 2.9158, "step": 36022 }, { "epoch": 1.77, "grad_norm": 0.5971514582633972, "learning_rate": 0.0002176911931588361, "loss": 2.8928, "step": 36023 }, { "epoch": 1.77, "grad_norm": 0.6245431900024414, "learning_rate": 0.00021767638788582517, "loss": 3.1731, "step": 36024 }, { "epoch": 1.77, "grad_norm": 0.6920629143714905, "learning_rate": 0.00021766158282963876, "loss": 3.0704, "step": 36025 }, { "epoch": 1.77, "grad_norm": 0.6170417666435242, "learning_rate": 0.00021764677799031607, "loss": 2.942, "step": 36026 }, { "epoch": 1.77, "grad_norm": 0.6285954713821411, "learning_rate": 0.0002176319733678958, "loss": 2.9836, "step": 36027 }, { "epoch": 1.77, "grad_norm": 0.6193482875823975, "learning_rate": 0.00021761716896241716, "loss": 2.8479, "step": 36028 }, { "epoch": 1.77, "grad_norm": 0.6073896884918213, "learning_rate": 0.00021760236477391887, "loss": 3.2373, "step": 36029 }, { "epoch": 1.77, "grad_norm": 0.6277605295181274, "learning_rate": 0.00021758756080244023, "loss": 3.0198, "step": 36030 }, { "epoch": 1.77, "grad_norm": 0.6205739974975586, "learning_rate": 0.0002175727570480201, "loss": 3.0834, "step": 36031 }, { "epoch": 1.77, "grad_norm": 0.9096717238426208, "learning_rate": 0.00021755795351069738, "loss": 3.0458, "step": 36032 }, { "epoch": 1.77, "grad_norm": 0.6602234840393066, "learning_rate": 0.00021754315019051126, "loss": 3.2214, "step": 36033 }, { "epoch": 1.77, "grad_norm": 0.650767982006073, "learning_rate": 0.00021752834708750054, "loss": 3.0056, "step": 36034 }, { "epoch": 1.77, "grad_norm": 0.5995840430259705, "learning_rate": 0.0002175135442017042, "loss": 3.1619, "step": 36035 }, { "epoch": 1.77, "grad_norm": 0.6354784369468689, "learning_rate": 0.00021749874153316148, "loss": 3.2268, "step": 36036 }, { "epoch": 1.77, "grad_norm": 0.6659834980964661, "learning_rate": 0.0002174839390819111, "loss": 3.1833, "step": 36037 }, { "epoch": 1.77, "grad_norm": 0.649095892906189, "learning_rate": 0.00021746913684799222, "loss": 2.8661, "step": 36038 }, { "epoch": 1.77, "grad_norm": 0.7001377940177917, "learning_rate": 0.0002174543348314436, "loss": 2.85, "step": 36039 }, { "epoch": 1.77, "grad_norm": 0.6997196674346924, "learning_rate": 0.00021743953303230448, "loss": 3.1971, "step": 36040 }, { "epoch": 1.77, "grad_norm": 0.6453948020935059, "learning_rate": 0.00021742473145061378, "loss": 3.01, "step": 36041 }, { "epoch": 1.77, "grad_norm": 0.639747679233551, "learning_rate": 0.0002174099300864103, "loss": 2.9441, "step": 36042 }, { "epoch": 1.77, "grad_norm": 0.619999885559082, "learning_rate": 0.00021739512893973335, "loss": 2.8838, "step": 36043 }, { "epoch": 1.77, "grad_norm": 0.6154755353927612, "learning_rate": 0.00021738032801062155, "loss": 3.1061, "step": 36044 }, { "epoch": 1.77, "grad_norm": 0.6470183730125427, "learning_rate": 0.00021736552729911417, "loss": 3.0322, "step": 36045 }, { "epoch": 1.77, "grad_norm": 0.6488195657730103, "learning_rate": 0.00021735072680525015, "loss": 2.9479, "step": 36046 }, { "epoch": 1.77, "grad_norm": 0.6353014707565308, "learning_rate": 0.00021733592652906826, "loss": 3.135, "step": 36047 }, { "epoch": 1.77, "grad_norm": 0.6322101354598999, "learning_rate": 0.00021732112647060775, "loss": 3.1161, "step": 36048 }, { "epoch": 1.77, "grad_norm": 0.5940348505973816, "learning_rate": 0.00021730632662990743, "loss": 2.9735, "step": 36049 }, { "epoch": 1.77, "grad_norm": 0.5832706689834595, "learning_rate": 0.00021729152700700628, "loss": 2.9688, "step": 36050 }, { "epoch": 1.77, "grad_norm": 0.6051971316337585, "learning_rate": 0.00021727672760194343, "loss": 2.9654, "step": 36051 }, { "epoch": 1.77, "grad_norm": 0.6165816783905029, "learning_rate": 0.0002172619284147577, "loss": 2.8744, "step": 36052 }, { "epoch": 1.77, "grad_norm": 0.6479892134666443, "learning_rate": 0.0002172471294454882, "loss": 2.8019, "step": 36053 }, { "epoch": 1.77, "grad_norm": 0.6285097002983093, "learning_rate": 0.00021723233069417374, "loss": 2.8948, "step": 36054 }, { "epoch": 1.77, "grad_norm": 0.6039531826972961, "learning_rate": 0.0002172175321608534, "loss": 2.8057, "step": 36055 }, { "epoch": 1.77, "grad_norm": 0.6159113049507141, "learning_rate": 0.00021720273384556624, "loss": 2.9787, "step": 36056 }, { "epoch": 1.77, "grad_norm": 0.642290472984314, "learning_rate": 0.00021718793574835102, "loss": 3.0758, "step": 36057 }, { "epoch": 1.77, "grad_norm": 0.6488417387008667, "learning_rate": 0.00021717313786924695, "loss": 2.9597, "step": 36058 }, { "epoch": 1.77, "grad_norm": 0.6642580628395081, "learning_rate": 0.00021715834020829284, "loss": 3.1399, "step": 36059 }, { "epoch": 1.77, "grad_norm": 0.686511218547821, "learning_rate": 0.00021714354276552766, "loss": 2.9219, "step": 36060 }, { "epoch": 1.77, "grad_norm": 0.7143248319625854, "learning_rate": 0.00021712874554099057, "loss": 2.8625, "step": 36061 }, { "epoch": 1.77, "grad_norm": 0.6069398522377014, "learning_rate": 0.00021711394853472037, "loss": 2.6463, "step": 36062 }, { "epoch": 1.77, "grad_norm": 0.5863265991210938, "learning_rate": 0.00021709915174675614, "loss": 3.0262, "step": 36063 }, { "epoch": 1.77, "grad_norm": 0.6850087642669678, "learning_rate": 0.00021708435517713663, "loss": 3.182, "step": 36064 }, { "epoch": 1.77, "grad_norm": 0.6968920230865479, "learning_rate": 0.00021706955882590096, "loss": 2.9166, "step": 36065 }, { "epoch": 1.77, "grad_norm": 0.662936270236969, "learning_rate": 0.0002170547626930883, "loss": 2.9049, "step": 36066 }, { "epoch": 1.77, "grad_norm": 0.6453676223754883, "learning_rate": 0.0002170399667787373, "loss": 3.0578, "step": 36067 }, { "epoch": 1.77, "grad_norm": 0.6160590052604675, "learning_rate": 0.00021702517108288715, "loss": 3.0075, "step": 36068 }, { "epoch": 1.77, "grad_norm": 0.6253478527069092, "learning_rate": 0.00021701037560557663, "loss": 3.0597, "step": 36069 }, { "epoch": 1.77, "grad_norm": 0.6356991529464722, "learning_rate": 0.0002169955803468449, "loss": 2.8705, "step": 36070 }, { "epoch": 1.77, "grad_norm": 0.6288435459136963, "learning_rate": 0.0002169807853067308, "loss": 3.2712, "step": 36071 }, { "epoch": 1.77, "grad_norm": 0.6195838451385498, "learning_rate": 0.00021696599048527326, "loss": 2.9045, "step": 36072 }, { "epoch": 1.77, "grad_norm": 0.6254141330718994, "learning_rate": 0.00021695119588251144, "loss": 3.2025, "step": 36073 }, { "epoch": 1.77, "grad_norm": 0.6363393664360046, "learning_rate": 0.00021693640149848416, "loss": 3.0731, "step": 36074 }, { "epoch": 1.77, "grad_norm": 0.5991094708442688, "learning_rate": 0.00021692160733323044, "loss": 3.021, "step": 36075 }, { "epoch": 1.77, "grad_norm": 0.6361525654792786, "learning_rate": 0.0002169068133867891, "loss": 2.9463, "step": 36076 }, { "epoch": 1.77, "grad_norm": 0.653708279132843, "learning_rate": 0.00021689201965919929, "loss": 2.9209, "step": 36077 }, { "epoch": 1.77, "grad_norm": 0.6295130848884583, "learning_rate": 0.00021687722615049994, "loss": 2.9804, "step": 36078 }, { "epoch": 1.77, "grad_norm": 0.6194791197776794, "learning_rate": 0.00021686243286072988, "loss": 2.979, "step": 36079 }, { "epoch": 1.77, "grad_norm": 0.6836393475532532, "learning_rate": 0.0002168476397899283, "loss": 2.8899, "step": 36080 }, { "epoch": 1.77, "grad_norm": 0.5709426403045654, "learning_rate": 0.00021683284693813396, "loss": 3.0018, "step": 36081 }, { "epoch": 1.77, "grad_norm": 0.637849748134613, "learning_rate": 0.00021681805430538587, "loss": 3.072, "step": 36082 }, { "epoch": 1.77, "grad_norm": 0.5979206562042236, "learning_rate": 0.00021680326189172307, "loss": 2.9527, "step": 36083 }, { "epoch": 1.77, "grad_norm": 0.6736457347869873, "learning_rate": 0.00021678846969718445, "loss": 3.1289, "step": 36084 }, { "epoch": 1.77, "grad_norm": 0.6516305208206177, "learning_rate": 0.00021677367772180906, "loss": 3.0119, "step": 36085 }, { "epoch": 1.77, "grad_norm": 0.6282474994659424, "learning_rate": 0.0002167588859656356, "loss": 2.9219, "step": 36086 }, { "epoch": 1.77, "grad_norm": 0.6169061660766602, "learning_rate": 0.00021674409442870327, "loss": 3.0883, "step": 36087 }, { "epoch": 1.77, "grad_norm": 0.7025720477104187, "learning_rate": 0.00021672930311105107, "loss": 3.1129, "step": 36088 }, { "epoch": 1.77, "grad_norm": 0.6823790073394775, "learning_rate": 0.0002167145120127178, "loss": 2.9719, "step": 36089 }, { "epoch": 1.77, "grad_norm": 0.6688551902770996, "learning_rate": 0.00021669972113374258, "loss": 2.8693, "step": 36090 }, { "epoch": 1.77, "grad_norm": 0.6762685775756836, "learning_rate": 0.0002166849304741641, "loss": 2.9417, "step": 36091 }, { "epoch": 1.77, "grad_norm": 0.6573099493980408, "learning_rate": 0.00021667014003402154, "loss": 2.9705, "step": 36092 }, { "epoch": 1.77, "grad_norm": 0.6102715730667114, "learning_rate": 0.00021665534981335388, "loss": 3.0654, "step": 36093 }, { "epoch": 1.77, "grad_norm": 0.6486304998397827, "learning_rate": 0.00021664055981219984, "loss": 2.9239, "step": 36094 }, { "epoch": 1.77, "grad_norm": 0.5909685492515564, "learning_rate": 0.00021662577003059869, "loss": 3.0793, "step": 36095 }, { "epoch": 1.77, "grad_norm": 0.6453856229782104, "learning_rate": 0.00021661098046858908, "loss": 2.9635, "step": 36096 }, { "epoch": 1.77, "grad_norm": 0.6692531108856201, "learning_rate": 0.0002165961911262101, "loss": 3.0343, "step": 36097 }, { "epoch": 1.77, "grad_norm": 0.6578263640403748, "learning_rate": 0.0002165814020035008, "loss": 3.1648, "step": 36098 }, { "epoch": 1.77, "grad_norm": 0.6265367865562439, "learning_rate": 0.0002165666131005, "loss": 3.0647, "step": 36099 }, { "epoch": 1.77, "grad_norm": 0.615085780620575, "learning_rate": 0.00021655182441724673, "loss": 3.1935, "step": 36100 }, { "epoch": 1.77, "grad_norm": 0.6076129078865051, "learning_rate": 0.0002165370359537797, "loss": 3.0154, "step": 36101 }, { "epoch": 1.77, "grad_norm": 0.6174434423446655, "learning_rate": 0.0002165222477101382, "loss": 3.0799, "step": 36102 }, { "epoch": 1.77, "grad_norm": 0.6437174081802368, "learning_rate": 0.00021650745968636108, "loss": 2.9149, "step": 36103 }, { "epoch": 1.77, "grad_norm": 0.614182710647583, "learning_rate": 0.0002164926718824871, "loss": 3.0644, "step": 36104 }, { "epoch": 1.77, "grad_norm": 0.6699419617652893, "learning_rate": 0.0002164778842985555, "loss": 2.8719, "step": 36105 }, { "epoch": 1.77, "grad_norm": 0.636222243309021, "learning_rate": 0.00021646309693460495, "loss": 3.0052, "step": 36106 }, { "epoch": 1.77, "grad_norm": 0.6297125816345215, "learning_rate": 0.00021644830979067456, "loss": 3.184, "step": 36107 }, { "epoch": 1.77, "grad_norm": 0.6243493556976318, "learning_rate": 0.00021643352286680333, "loss": 2.8433, "step": 36108 }, { "epoch": 1.77, "grad_norm": 0.6312358975410461, "learning_rate": 0.00021641873616303, "loss": 3.1941, "step": 36109 }, { "epoch": 1.77, "grad_norm": 0.6219147443771362, "learning_rate": 0.00021640394967939374, "loss": 3.0337, "step": 36110 }, { "epoch": 1.77, "grad_norm": 0.6636743545532227, "learning_rate": 0.0002163891634159333, "loss": 3.2584, "step": 36111 }, { "epoch": 1.77, "grad_norm": 0.6117985844612122, "learning_rate": 0.00021637437737268773, "loss": 3.284, "step": 36112 }, { "epoch": 1.77, "grad_norm": 0.6436432003974915, "learning_rate": 0.00021635959154969603, "loss": 2.99, "step": 36113 }, { "epoch": 1.77, "grad_norm": 0.6268394589424133, "learning_rate": 0.00021634480594699698, "loss": 3.0048, "step": 36114 }, { "epoch": 1.77, "grad_norm": 0.6493527889251709, "learning_rate": 0.0002163300205646297, "loss": 3.037, "step": 36115 }, { "epoch": 1.77, "grad_norm": 0.6208270192146301, "learning_rate": 0.00021631523540263292, "loss": 2.973, "step": 36116 }, { "epoch": 1.77, "grad_norm": 0.6634708642959595, "learning_rate": 0.00021630045046104577, "loss": 2.9894, "step": 36117 }, { "epoch": 1.77, "grad_norm": 0.6609952449798584, "learning_rate": 0.00021628566573990717, "loss": 3.1561, "step": 36118 }, { "epoch": 1.77, "grad_norm": 0.6642773747444153, "learning_rate": 0.0002162708812392559, "loss": 2.9295, "step": 36119 }, { "epoch": 1.77, "grad_norm": 0.6038638353347778, "learning_rate": 0.00021625609695913116, "loss": 2.869, "step": 36120 }, { "epoch": 1.77, "grad_norm": 0.6279966831207275, "learning_rate": 0.00021624131289957167, "loss": 2.9704, "step": 36121 }, { "epoch": 1.77, "grad_norm": 0.6348757147789001, "learning_rate": 0.00021622652906061636, "loss": 3.0251, "step": 36122 }, { "epoch": 1.77, "grad_norm": 0.6414353847503662, "learning_rate": 0.0002162117454423044, "loss": 2.9168, "step": 36123 }, { "epoch": 1.77, "grad_norm": 0.6199612617492676, "learning_rate": 0.00021619696204467452, "loss": 2.956, "step": 36124 }, { "epoch": 1.77, "grad_norm": 0.6189448833465576, "learning_rate": 0.00021618217886776577, "loss": 3.0051, "step": 36125 }, { "epoch": 1.77, "grad_norm": 0.6277577877044678, "learning_rate": 0.0002161673959116169, "loss": 3.0413, "step": 36126 }, { "epoch": 1.77, "grad_norm": 0.6269301772117615, "learning_rate": 0.00021615261317626692, "loss": 3.1164, "step": 36127 }, { "epoch": 1.77, "grad_norm": 0.6273691058158875, "learning_rate": 0.00021613783066175505, "loss": 2.7998, "step": 36128 }, { "epoch": 1.77, "grad_norm": 0.6344377398490906, "learning_rate": 0.00021612304836811984, "loss": 3.1015, "step": 36129 }, { "epoch": 1.77, "grad_norm": 0.6333027482032776, "learning_rate": 0.00021610826629540048, "loss": 2.893, "step": 36130 }, { "epoch": 1.77, "grad_norm": 0.6379823088645935, "learning_rate": 0.0002160934844436357, "loss": 3.0585, "step": 36131 }, { "epoch": 1.77, "grad_norm": 0.6152437925338745, "learning_rate": 0.00021607870281286455, "loss": 3.0948, "step": 36132 }, { "epoch": 1.77, "grad_norm": 0.8633241057395935, "learning_rate": 0.00021606392140312606, "loss": 2.9737, "step": 36133 }, { "epoch": 1.77, "grad_norm": 0.6464073061943054, "learning_rate": 0.00021604914021445886, "loss": 2.9933, "step": 36134 }, { "epoch": 1.77, "grad_norm": 0.6235373616218567, "learning_rate": 0.00021603435924690223, "loss": 3.0425, "step": 36135 }, { "epoch": 1.77, "grad_norm": 0.7127751708030701, "learning_rate": 0.00021601957850049487, "loss": 2.9248, "step": 36136 }, { "epoch": 1.77, "grad_norm": 0.6603729724884033, "learning_rate": 0.00021600479797527573, "loss": 3.1996, "step": 36137 }, { "epoch": 1.77, "grad_norm": 0.6313992738723755, "learning_rate": 0.00021599001767128387, "loss": 3.0105, "step": 36138 }, { "epoch": 1.77, "grad_norm": 0.6753054857254028, "learning_rate": 0.00021597523758855813, "loss": 2.942, "step": 36139 }, { "epoch": 1.77, "grad_norm": 0.6346279978752136, "learning_rate": 0.0002159604577271375, "loss": 2.9444, "step": 36140 }, { "epoch": 1.77, "grad_norm": 0.6592559218406677, "learning_rate": 0.00021594567808706067, "loss": 3.264, "step": 36141 }, { "epoch": 1.77, "grad_norm": 0.6272072196006775, "learning_rate": 0.00021593089866836683, "loss": 3.0916, "step": 36142 }, { "epoch": 1.77, "grad_norm": 0.6413544416427612, "learning_rate": 0.0002159161194710949, "loss": 3.0556, "step": 36143 }, { "epoch": 1.77, "grad_norm": 0.6144356727600098, "learning_rate": 0.00021590134049528364, "loss": 3.0897, "step": 36144 }, { "epoch": 1.77, "grad_norm": 0.6145825982093811, "learning_rate": 0.00021588656174097214, "loss": 3.1299, "step": 36145 }, { "epoch": 1.77, "grad_norm": 0.6415427327156067, "learning_rate": 0.0002158717832081992, "loss": 3.0902, "step": 36146 }, { "epoch": 1.77, "grad_norm": 0.6182569265365601, "learning_rate": 0.00021585700489700383, "loss": 2.9496, "step": 36147 }, { "epoch": 1.77, "grad_norm": 0.6118202209472656, "learning_rate": 0.00021584222680742484, "loss": 2.9401, "step": 36148 }, { "epoch": 1.77, "grad_norm": 0.630115807056427, "learning_rate": 0.00021582744893950122, "loss": 2.9302, "step": 36149 }, { "epoch": 1.77, "grad_norm": 0.6311905384063721, "learning_rate": 0.000215812671293272, "loss": 3.1874, "step": 36150 }, { "epoch": 1.77, "grad_norm": 0.632701575756073, "learning_rate": 0.0002157978938687759, "loss": 2.9891, "step": 36151 }, { "epoch": 1.77, "grad_norm": 0.6322634220123291, "learning_rate": 0.00021578311666605208, "loss": 2.8718, "step": 36152 }, { "epoch": 1.77, "grad_norm": 0.6122119426727295, "learning_rate": 0.00021576833968513917, "loss": 3.0455, "step": 36153 }, { "epoch": 1.77, "grad_norm": 0.6663072109222412, "learning_rate": 0.0002157535629260763, "loss": 2.8699, "step": 36154 }, { "epoch": 1.77, "grad_norm": 0.6152451038360596, "learning_rate": 0.0002157387863889024, "loss": 3.0347, "step": 36155 }, { "epoch": 1.77, "grad_norm": 0.614471435546875, "learning_rate": 0.0002157240100736562, "loss": 3.1776, "step": 36156 }, { "epoch": 1.77, "grad_norm": 1.1464407444000244, "learning_rate": 0.00021570923398037684, "loss": 3.0467, "step": 36157 }, { "epoch": 1.77, "grad_norm": 0.626950204372406, "learning_rate": 0.0002156944581091031, "loss": 2.7787, "step": 36158 }, { "epoch": 1.77, "grad_norm": 0.6673448085784912, "learning_rate": 0.00021567968245987386, "loss": 3.0078, "step": 36159 }, { "epoch": 1.77, "grad_norm": 0.6380613446235657, "learning_rate": 0.00021566490703272824, "loss": 2.8045, "step": 36160 }, { "epoch": 1.77, "grad_norm": 0.6106299161911011, "learning_rate": 0.00021565013182770493, "loss": 3.2147, "step": 36161 }, { "epoch": 1.77, "grad_norm": 0.6322900652885437, "learning_rate": 0.00021563535684484305, "loss": 3.0431, "step": 36162 }, { "epoch": 1.77, "grad_norm": 0.603399932384491, "learning_rate": 0.00021562058208418123, "loss": 2.9781, "step": 36163 }, { "epoch": 1.77, "grad_norm": 0.6214710474014282, "learning_rate": 0.00021560580754575865, "loss": 3.0257, "step": 36164 }, { "epoch": 1.77, "grad_norm": 0.6370988488197327, "learning_rate": 0.00021559103322961419, "loss": 3.0746, "step": 36165 }, { "epoch": 1.77, "grad_norm": 0.6562209129333496, "learning_rate": 0.00021557625913578654, "loss": 2.9225, "step": 36166 }, { "epoch": 1.77, "grad_norm": 0.6525112986564636, "learning_rate": 0.00021556148526431497, "loss": 2.9325, "step": 36167 }, { "epoch": 1.77, "grad_norm": 0.6724076867103577, "learning_rate": 0.00021554671161523806, "loss": 2.9713, "step": 36168 }, { "epoch": 1.77, "grad_norm": 0.6039425134658813, "learning_rate": 0.00021553193818859488, "loss": 3.0612, "step": 36169 }, { "epoch": 1.77, "grad_norm": 0.6516081690788269, "learning_rate": 0.0002155171649844244, "loss": 2.8402, "step": 36170 }, { "epoch": 1.77, "grad_norm": 0.6352439522743225, "learning_rate": 0.00021550239200276536, "loss": 3.0824, "step": 36171 }, { "epoch": 1.77, "grad_norm": 0.6601967215538025, "learning_rate": 0.00021548761924365683, "loss": 2.9832, "step": 36172 }, { "epoch": 1.77, "grad_norm": 0.6137501001358032, "learning_rate": 0.0002154728467071376, "loss": 3.0011, "step": 36173 }, { "epoch": 1.77, "grad_norm": 0.6737114787101746, "learning_rate": 0.00021545807439324655, "loss": 2.8073, "step": 36174 }, { "epoch": 1.77, "grad_norm": 0.6427306532859802, "learning_rate": 0.0002154433023020228, "loss": 2.913, "step": 36175 }, { "epoch": 1.77, "grad_norm": 0.6364057660102844, "learning_rate": 0.00021542853043350507, "loss": 3.0798, "step": 36176 }, { "epoch": 1.77, "grad_norm": 0.6504160165786743, "learning_rate": 0.00021541375878773237, "loss": 2.9817, "step": 36177 }, { "epoch": 1.77, "grad_norm": 0.669624924659729, "learning_rate": 0.00021539898736474338, "loss": 2.8296, "step": 36178 }, { "epoch": 1.77, "grad_norm": 0.6107624769210815, "learning_rate": 0.0002153842161645773, "loss": 3.046, "step": 36179 }, { "epoch": 1.77, "grad_norm": 0.6231289505958557, "learning_rate": 0.00021536944518727296, "loss": 3.1231, "step": 36180 }, { "epoch": 1.77, "grad_norm": 0.6516662836074829, "learning_rate": 0.0002153546744328691, "loss": 3.0611, "step": 36181 }, { "epoch": 1.77, "grad_norm": 0.6371469497680664, "learning_rate": 0.00021533990390140484, "loss": 3.0647, "step": 36182 }, { "epoch": 1.77, "grad_norm": 0.6329872608184814, "learning_rate": 0.00021532513359291886, "loss": 2.9429, "step": 36183 }, { "epoch": 1.77, "grad_norm": 0.6683485507965088, "learning_rate": 0.00021531036350745018, "loss": 2.8126, "step": 36184 }, { "epoch": 1.77, "grad_norm": 0.6202999949455261, "learning_rate": 0.00021529559364503784, "loss": 3.0846, "step": 36185 }, { "epoch": 1.77, "grad_norm": 0.6209010481834412, "learning_rate": 0.00021528082400572055, "loss": 2.9513, "step": 36186 }, { "epoch": 1.77, "grad_norm": 0.6557936072349548, "learning_rate": 0.00021526605458953733, "loss": 3.028, "step": 36187 }, { "epoch": 1.77, "grad_norm": 0.6131925582885742, "learning_rate": 0.0002152512853965268, "loss": 2.867, "step": 36188 }, { "epoch": 1.77, "grad_norm": 0.6493362188339233, "learning_rate": 0.00021523651642672815, "loss": 2.935, "step": 36189 }, { "epoch": 1.77, "grad_norm": 0.8060845136642456, "learning_rate": 0.00021522174768018033, "loss": 3.1432, "step": 36190 }, { "epoch": 1.77, "grad_norm": 0.5938668251037598, "learning_rate": 0.00021520697915692203, "loss": 2.9079, "step": 36191 }, { "epoch": 1.77, "grad_norm": 0.6498206257820129, "learning_rate": 0.0002151922108569923, "loss": 2.9356, "step": 36192 }, { "epoch": 1.77, "grad_norm": 0.6662101745605469, "learning_rate": 0.00021517744278042982, "loss": 2.9902, "step": 36193 }, { "epoch": 1.77, "grad_norm": 0.686980128288269, "learning_rate": 0.00021516267492727373, "loss": 2.9666, "step": 36194 }, { "epoch": 1.77, "grad_norm": 0.6905176639556885, "learning_rate": 0.00021514790729756285, "loss": 2.9056, "step": 36195 }, { "epoch": 1.77, "grad_norm": 0.6709184050559998, "learning_rate": 0.00021513313989133596, "loss": 3.0483, "step": 36196 }, { "epoch": 1.77, "grad_norm": 0.6174628734588623, "learning_rate": 0.00021511837270863214, "loss": 3.0596, "step": 36197 }, { "epoch": 1.77, "grad_norm": 0.6258738040924072, "learning_rate": 0.0002151036057494902, "loss": 3.0001, "step": 36198 }, { "epoch": 1.77, "grad_norm": 0.6265406012535095, "learning_rate": 0.00021508883901394886, "loss": 3.172, "step": 36199 }, { "epoch": 1.77, "grad_norm": 0.6480788588523865, "learning_rate": 0.00021507407250204736, "loss": 2.9221, "step": 36200 }, { "epoch": 1.77, "grad_norm": 0.6287563443183899, "learning_rate": 0.00021505930621382437, "loss": 3.0638, "step": 36201 }, { "epoch": 1.77, "grad_norm": 0.6304842829704285, "learning_rate": 0.00021504454014931884, "loss": 2.9461, "step": 36202 }, { "epoch": 1.77, "grad_norm": 0.6325621604919434, "learning_rate": 0.0002150297743085695, "loss": 2.892, "step": 36203 }, { "epoch": 1.77, "grad_norm": 0.6825348734855652, "learning_rate": 0.00021501500869161548, "loss": 2.8529, "step": 36204 }, { "epoch": 1.77, "grad_norm": 0.6398288607597351, "learning_rate": 0.00021500024329849563, "loss": 2.9031, "step": 36205 }, { "epoch": 1.77, "grad_norm": 0.5954134464263916, "learning_rate": 0.00021498547812924865, "loss": 2.8321, "step": 36206 }, { "epoch": 1.77, "grad_norm": 0.6618627309799194, "learning_rate": 0.00021497071318391368, "loss": 3.1237, "step": 36207 }, { "epoch": 1.77, "grad_norm": 0.6097262501716614, "learning_rate": 0.00021495594846252937, "loss": 2.97, "step": 36208 }, { "epoch": 1.77, "grad_norm": 0.6133797764778137, "learning_rate": 0.00021494118396513475, "loss": 3.1634, "step": 36209 }, { "epoch": 1.77, "grad_norm": 0.7019491791725159, "learning_rate": 0.0002149264196917688, "loss": 2.9923, "step": 36210 }, { "epoch": 1.77, "grad_norm": 0.6512349843978882, "learning_rate": 0.00021491165564247011, "loss": 3.1578, "step": 36211 }, { "epoch": 1.77, "grad_norm": 0.6159108281135559, "learning_rate": 0.0002148968918172779, "loss": 2.9716, "step": 36212 }, { "epoch": 1.77, "grad_norm": 0.6668187975883484, "learning_rate": 0.00021488212821623085, "loss": 2.9477, "step": 36213 }, { "epoch": 1.77, "grad_norm": 0.6413983106613159, "learning_rate": 0.0002148673648393678, "loss": 3.215, "step": 36214 }, { "epoch": 1.77, "grad_norm": 0.6101641058921814, "learning_rate": 0.0002148526016867279, "loss": 2.7737, "step": 36215 }, { "epoch": 1.77, "grad_norm": 0.622040867805481, "learning_rate": 0.00021483783875834972, "loss": 2.9313, "step": 36216 }, { "epoch": 1.77, "grad_norm": 0.6527264714241028, "learning_rate": 0.0002148230760542724, "loss": 3.2522, "step": 36217 }, { "epoch": 1.77, "grad_norm": 0.6784375905990601, "learning_rate": 0.00021480831357453456, "loss": 2.8937, "step": 36218 }, { "epoch": 1.78, "grad_norm": 0.6425294876098633, "learning_rate": 0.0002147935513191753, "loss": 2.905, "step": 36219 }, { "epoch": 1.78, "grad_norm": 0.6469739675521851, "learning_rate": 0.0002147787892882335, "loss": 2.9366, "step": 36220 }, { "epoch": 1.78, "grad_norm": 0.6778955459594727, "learning_rate": 0.00021476402748174783, "loss": 3.0863, "step": 36221 }, { "epoch": 1.78, "grad_norm": 0.5806019306182861, "learning_rate": 0.00021474926589975745, "loss": 2.7963, "step": 36222 }, { "epoch": 1.78, "grad_norm": 0.6367674469947815, "learning_rate": 0.000214734504542301, "loss": 2.9159, "step": 36223 }, { "epoch": 1.78, "grad_norm": 0.6361095905303955, "learning_rate": 0.00021471974340941754, "loss": 3.1667, "step": 36224 }, { "epoch": 1.78, "grad_norm": 0.6673516631126404, "learning_rate": 0.0002147049825011457, "loss": 3.1121, "step": 36225 }, { "epoch": 1.78, "grad_norm": 0.6467750072479248, "learning_rate": 0.00021469022181752465, "loss": 3.1073, "step": 36226 }, { "epoch": 1.78, "grad_norm": 0.6565433740615845, "learning_rate": 0.00021467546135859315, "loss": 2.8489, "step": 36227 }, { "epoch": 1.78, "grad_norm": 0.6047545075416565, "learning_rate": 0.00021466070112438997, "loss": 3.0128, "step": 36228 }, { "epoch": 1.78, "grad_norm": 0.6131613254547119, "learning_rate": 0.00021464594111495419, "loss": 2.8185, "step": 36229 }, { "epoch": 1.78, "grad_norm": 0.6076128482818604, "learning_rate": 0.0002146311813303244, "loss": 3.0758, "step": 36230 }, { "epoch": 1.78, "grad_norm": 0.6186553835868835, "learning_rate": 0.0002146164217705398, "loss": 2.9951, "step": 36231 }, { "epoch": 1.78, "grad_norm": 0.6514021754264832, "learning_rate": 0.00021460166243563914, "loss": 3.0689, "step": 36232 }, { "epoch": 1.78, "grad_norm": 0.6270773410797119, "learning_rate": 0.00021458690332566113, "loss": 3.036, "step": 36233 }, { "epoch": 1.78, "grad_norm": 0.6818325519561768, "learning_rate": 0.00021457214444064492, "loss": 2.9342, "step": 36234 }, { "epoch": 1.78, "grad_norm": 0.6430490016937256, "learning_rate": 0.00021455738578062916, "loss": 2.9211, "step": 36235 }, { "epoch": 1.78, "grad_norm": 0.6284935474395752, "learning_rate": 0.00021454262734565277, "loss": 3.1729, "step": 36236 }, { "epoch": 1.78, "grad_norm": 0.646930456161499, "learning_rate": 0.00021452786913575474, "loss": 2.983, "step": 36237 }, { "epoch": 1.78, "grad_norm": 0.6334282159805298, "learning_rate": 0.00021451311115097378, "loss": 3.1183, "step": 36238 }, { "epoch": 1.78, "grad_norm": 0.6594672203063965, "learning_rate": 0.00021449835339134893, "loss": 3.0003, "step": 36239 }, { "epoch": 1.78, "grad_norm": 0.6455363035202026, "learning_rate": 0.00021448359585691883, "loss": 3.0601, "step": 36240 }, { "epoch": 1.78, "grad_norm": 0.6710865497589111, "learning_rate": 0.00021446883854772255, "loss": 2.9493, "step": 36241 }, { "epoch": 1.78, "grad_norm": 0.6526823043823242, "learning_rate": 0.00021445408146379894, "loss": 3.1155, "step": 36242 }, { "epoch": 1.78, "grad_norm": 0.6468623876571655, "learning_rate": 0.0002144393246051867, "loss": 2.7825, "step": 36243 }, { "epoch": 1.78, "grad_norm": 0.6826371550559998, "learning_rate": 0.00021442456797192492, "loss": 3.0516, "step": 36244 }, { "epoch": 1.78, "grad_norm": 0.6300487518310547, "learning_rate": 0.0002144098115640523, "loss": 3.0863, "step": 36245 }, { "epoch": 1.78, "grad_norm": 0.6709060072898865, "learning_rate": 0.00021439505538160776, "loss": 3.0067, "step": 36246 }, { "epoch": 1.78, "grad_norm": 0.6281489729881287, "learning_rate": 0.0002143802994246302, "loss": 3.1286, "step": 36247 }, { "epoch": 1.78, "grad_norm": 0.6108605861663818, "learning_rate": 0.00021436554369315845, "loss": 3.0594, "step": 36248 }, { "epoch": 1.78, "grad_norm": 0.692886233329773, "learning_rate": 0.00021435078818723144, "loss": 2.9815, "step": 36249 }, { "epoch": 1.78, "grad_norm": 0.6155291199684143, "learning_rate": 0.0002143360329068878, "loss": 2.9099, "step": 36250 }, { "epoch": 1.78, "grad_norm": 0.5946487784385681, "learning_rate": 0.0002143212778521666, "loss": 3.0502, "step": 36251 }, { "epoch": 1.78, "grad_norm": 0.6302567720413208, "learning_rate": 0.0002143065230231068, "loss": 3.019, "step": 36252 }, { "epoch": 1.78, "grad_norm": 0.610625684261322, "learning_rate": 0.00021429176841974702, "loss": 2.9311, "step": 36253 }, { "epoch": 1.78, "grad_norm": 0.6491797566413879, "learning_rate": 0.00021427701404212634, "loss": 2.978, "step": 36254 }, { "epoch": 1.78, "grad_norm": 0.6238184571266174, "learning_rate": 0.00021426225989028334, "loss": 3.1842, "step": 36255 }, { "epoch": 1.78, "grad_norm": 0.6471647024154663, "learning_rate": 0.00021424750596425713, "loss": 3.1372, "step": 36256 }, { "epoch": 1.78, "grad_norm": 0.6920592188835144, "learning_rate": 0.00021423275226408654, "loss": 3.1878, "step": 36257 }, { "epoch": 1.78, "grad_norm": 0.6681246757507324, "learning_rate": 0.00021421799878981027, "loss": 2.9845, "step": 36258 }, { "epoch": 1.78, "grad_norm": 0.6273609399795532, "learning_rate": 0.0002142032455414674, "loss": 3.0139, "step": 36259 }, { "epoch": 1.78, "grad_norm": 0.6397222876548767, "learning_rate": 0.0002141884925190966, "loss": 2.9536, "step": 36260 }, { "epoch": 1.78, "grad_norm": 0.6393402814865112, "learning_rate": 0.00021417373972273672, "loss": 2.983, "step": 36261 }, { "epoch": 1.78, "grad_norm": 0.619844913482666, "learning_rate": 0.00021415898715242682, "loss": 3.053, "step": 36262 }, { "epoch": 1.78, "grad_norm": 0.6226319074630737, "learning_rate": 0.00021414423480820558, "loss": 3.3036, "step": 36263 }, { "epoch": 1.78, "grad_norm": 0.7460436820983887, "learning_rate": 0.000214129482690112, "loss": 2.7747, "step": 36264 }, { "epoch": 1.78, "grad_norm": 0.6607783436775208, "learning_rate": 0.00021411473079818464, "loss": 3.0174, "step": 36265 }, { "epoch": 1.78, "grad_norm": 0.6538329124450684, "learning_rate": 0.00021409997913246262, "loss": 3.0033, "step": 36266 }, { "epoch": 1.78, "grad_norm": 0.704361081123352, "learning_rate": 0.0002140852276929848, "loss": 3.0254, "step": 36267 }, { "epoch": 1.78, "grad_norm": 0.7130153775215149, "learning_rate": 0.00021407047647978988, "loss": 3.0591, "step": 36268 }, { "epoch": 1.78, "grad_norm": 0.598651111125946, "learning_rate": 0.00021405572549291685, "loss": 3.1185, "step": 36269 }, { "epoch": 1.78, "grad_norm": 0.6102203726768494, "learning_rate": 0.0002140409747324044, "loss": 3.2775, "step": 36270 }, { "epoch": 1.78, "grad_norm": 0.6122444868087769, "learning_rate": 0.00021402622419829155, "loss": 3.1641, "step": 36271 }, { "epoch": 1.78, "grad_norm": 0.6745235919952393, "learning_rate": 0.00021401147389061716, "loss": 2.8793, "step": 36272 }, { "epoch": 1.78, "grad_norm": 0.6349541544914246, "learning_rate": 0.0002139967238094198, "loss": 3.0279, "step": 36273 }, { "epoch": 1.78, "grad_norm": 0.631254255771637, "learning_rate": 0.0002139819739547387, "loss": 2.9795, "step": 36274 }, { "epoch": 1.78, "grad_norm": 0.6068877577781677, "learning_rate": 0.00021396722432661245, "loss": 2.9905, "step": 36275 }, { "epoch": 1.78, "grad_norm": 0.677623987197876, "learning_rate": 0.0002139524749250799, "loss": 2.8977, "step": 36276 }, { "epoch": 1.78, "grad_norm": 0.6311962604522705, "learning_rate": 0.00021393772575018012, "loss": 3.075, "step": 36277 }, { "epoch": 1.78, "grad_norm": 0.6224701404571533, "learning_rate": 0.00021392297680195173, "loss": 3.0223, "step": 36278 }, { "epoch": 1.78, "grad_norm": 0.6306285858154297, "learning_rate": 0.00021390822808043373, "loss": 3.163, "step": 36279 }, { "epoch": 1.78, "grad_norm": 0.6172642707824707, "learning_rate": 0.00021389347958566474, "loss": 3.0831, "step": 36280 }, { "epoch": 1.78, "grad_norm": 0.6490658521652222, "learning_rate": 0.0002138787313176838, "loss": 2.973, "step": 36281 }, { "epoch": 1.78, "grad_norm": 0.6317246556282043, "learning_rate": 0.00021386398327652981, "loss": 3.3257, "step": 36282 }, { "epoch": 1.78, "grad_norm": 0.6703366041183472, "learning_rate": 0.00021384923546224137, "loss": 3.067, "step": 36283 }, { "epoch": 1.78, "grad_norm": 0.6874723434448242, "learning_rate": 0.0002138344878748576, "loss": 2.9391, "step": 36284 }, { "epoch": 1.78, "grad_norm": 0.6609402894973755, "learning_rate": 0.0002138197405144171, "loss": 2.9994, "step": 36285 }, { "epoch": 1.78, "grad_norm": 0.6028761267662048, "learning_rate": 0.0002138049933809588, "loss": 3.0421, "step": 36286 }, { "epoch": 1.78, "grad_norm": 0.6413368582725525, "learning_rate": 0.00021379024647452167, "loss": 3.0925, "step": 36287 }, { "epoch": 1.78, "grad_norm": 0.660110354423523, "learning_rate": 0.00021377549979514438, "loss": 2.8916, "step": 36288 }, { "epoch": 1.78, "grad_norm": 0.6747713088989258, "learning_rate": 0.0002137607533428659, "loss": 2.8905, "step": 36289 }, { "epoch": 1.78, "grad_norm": 0.600744366645813, "learning_rate": 0.00021374600711772488, "loss": 3.1411, "step": 36290 }, { "epoch": 1.78, "grad_norm": 0.6018193960189819, "learning_rate": 0.00021373126111976027, "loss": 3.0956, "step": 36291 }, { "epoch": 1.78, "grad_norm": 0.611671507358551, "learning_rate": 0.00021371651534901102, "loss": 3.064, "step": 36292 }, { "epoch": 1.78, "grad_norm": 0.6379413604736328, "learning_rate": 0.00021370176980551583, "loss": 2.9962, "step": 36293 }, { "epoch": 1.78, "grad_norm": 0.644549548625946, "learning_rate": 0.00021368702448931363, "loss": 2.9289, "step": 36294 }, { "epoch": 1.78, "grad_norm": 0.6469461917877197, "learning_rate": 0.00021367227940044303, "loss": 3.3018, "step": 36295 }, { "epoch": 1.78, "grad_norm": 0.6457450985908508, "learning_rate": 0.00021365753453894313, "loss": 3.0339, "step": 36296 }, { "epoch": 1.78, "grad_norm": 0.6415975689888, "learning_rate": 0.00021364278990485273, "loss": 3.1975, "step": 36297 }, { "epoch": 1.78, "grad_norm": 0.6394268870353699, "learning_rate": 0.0002136280454982105, "loss": 3.05, "step": 36298 }, { "epoch": 1.78, "grad_norm": 0.6522897481918335, "learning_rate": 0.00021361330131905543, "loss": 3.0616, "step": 36299 }, { "epoch": 1.78, "grad_norm": 0.6101564764976501, "learning_rate": 0.0002135985573674263, "loss": 3.218, "step": 36300 }, { "epoch": 1.78, "grad_norm": 0.6102758049964905, "learning_rate": 0.000213583813643362, "loss": 2.7773, "step": 36301 }, { "epoch": 1.78, "grad_norm": 0.5993099212646484, "learning_rate": 0.00021356907014690113, "loss": 2.6897, "step": 36302 }, { "epoch": 1.78, "grad_norm": 0.629689633846283, "learning_rate": 0.0002135543268780828, "loss": 3.0496, "step": 36303 }, { "epoch": 1.78, "grad_norm": 0.5861761569976807, "learning_rate": 0.00021353958383694575, "loss": 3.0468, "step": 36304 }, { "epoch": 1.78, "grad_norm": 0.6414809226989746, "learning_rate": 0.00021352484102352872, "loss": 2.7947, "step": 36305 }, { "epoch": 1.78, "grad_norm": 0.6364729404449463, "learning_rate": 0.00021351009843787074, "loss": 3.0792, "step": 36306 }, { "epoch": 1.78, "grad_norm": 0.6140216588973999, "learning_rate": 0.00021349535608001044, "loss": 2.8048, "step": 36307 }, { "epoch": 1.78, "grad_norm": 0.6230908036231995, "learning_rate": 0.00021348061394998663, "loss": 2.9799, "step": 36308 }, { "epoch": 1.78, "grad_norm": 0.6611496210098267, "learning_rate": 0.00021346587204783843, "loss": 2.7688, "step": 36309 }, { "epoch": 1.78, "grad_norm": 0.638316810131073, "learning_rate": 0.00021345113037360438, "loss": 3.2603, "step": 36310 }, { "epoch": 1.78, "grad_norm": 0.6499404311180115, "learning_rate": 0.00021343638892732343, "loss": 2.8078, "step": 36311 }, { "epoch": 1.78, "grad_norm": 0.6417861580848694, "learning_rate": 0.00021342164770903426, "loss": 2.8695, "step": 36312 }, { "epoch": 1.78, "grad_norm": 0.6680542230606079, "learning_rate": 0.0002134069067187758, "loss": 3.202, "step": 36313 }, { "epoch": 1.78, "grad_norm": 0.6242054104804993, "learning_rate": 0.000213392165956587, "loss": 2.8746, "step": 36314 }, { "epoch": 1.78, "grad_norm": 0.6265760064125061, "learning_rate": 0.00021337742542250652, "loss": 3.0401, "step": 36315 }, { "epoch": 1.78, "grad_norm": 0.6692229509353638, "learning_rate": 0.00021336268511657332, "loss": 2.6561, "step": 36316 }, { "epoch": 1.78, "grad_norm": 0.7591632008552551, "learning_rate": 0.00021334794503882596, "loss": 3.0317, "step": 36317 }, { "epoch": 1.78, "grad_norm": 0.6061434745788574, "learning_rate": 0.00021333320518930355, "loss": 3.0425, "step": 36318 }, { "epoch": 1.78, "grad_norm": 0.6159639954566956, "learning_rate": 0.00021331846556804483, "loss": 2.9656, "step": 36319 }, { "epoch": 1.78, "grad_norm": 0.5951846241950989, "learning_rate": 0.00021330372617508846, "loss": 2.867, "step": 36320 }, { "epoch": 1.78, "grad_norm": 0.5990233421325684, "learning_rate": 0.00021328898701047352, "loss": 2.9664, "step": 36321 }, { "epoch": 1.78, "grad_norm": 0.6403229236602783, "learning_rate": 0.00021327424807423865, "loss": 3.045, "step": 36322 }, { "epoch": 1.78, "grad_norm": 0.6548666954040527, "learning_rate": 0.00021325950936642264, "loss": 3.1726, "step": 36323 }, { "epoch": 1.78, "grad_norm": 0.6505340933799744, "learning_rate": 0.00021324477088706452, "loss": 3.0317, "step": 36324 }, { "epoch": 1.78, "grad_norm": 0.6289445757865906, "learning_rate": 0.0002132300326362029, "loss": 3.0337, "step": 36325 }, { "epoch": 1.78, "grad_norm": 0.6409799456596375, "learning_rate": 0.00021321529461387675, "loss": 3.0774, "step": 36326 }, { "epoch": 1.78, "grad_norm": 0.6213870048522949, "learning_rate": 0.00021320055682012468, "loss": 3.1011, "step": 36327 }, { "epoch": 1.78, "grad_norm": 0.5979524254798889, "learning_rate": 0.00021318581925498572, "loss": 2.9269, "step": 36328 }, { "epoch": 1.78, "grad_norm": 0.6520103812217712, "learning_rate": 0.0002131710819184987, "loss": 2.9932, "step": 36329 }, { "epoch": 1.78, "grad_norm": 0.6027911305427551, "learning_rate": 0.00021315634481070214, "loss": 3.1689, "step": 36330 }, { "epoch": 1.78, "grad_norm": 0.6382830142974854, "learning_rate": 0.0002131416079316352, "loss": 3.0135, "step": 36331 }, { "epoch": 1.78, "grad_norm": 0.6174009442329407, "learning_rate": 0.00021312687128133645, "loss": 2.9106, "step": 36332 }, { "epoch": 1.78, "grad_norm": 0.640430212020874, "learning_rate": 0.00021311213485984486, "loss": 3.1547, "step": 36333 }, { "epoch": 1.78, "grad_norm": 0.6170399785041809, "learning_rate": 0.00021309739866719922, "loss": 3.18, "step": 36334 }, { "epoch": 1.78, "grad_norm": 0.6654266715049744, "learning_rate": 0.0002130826627034382, "loss": 3.0434, "step": 36335 }, { "epoch": 1.78, "grad_norm": 0.6619197130203247, "learning_rate": 0.00021306792696860086, "loss": 2.9076, "step": 36336 }, { "epoch": 1.78, "grad_norm": 0.6682602167129517, "learning_rate": 0.0002130531914627258, "loss": 2.9377, "step": 36337 }, { "epoch": 1.78, "grad_norm": 0.8006364107131958, "learning_rate": 0.00021303845618585182, "loss": 3.014, "step": 36338 }, { "epoch": 1.78, "grad_norm": 0.6219844818115234, "learning_rate": 0.00021302372113801796, "loss": 2.8757, "step": 36339 }, { "epoch": 1.78, "grad_norm": 0.6059569716453552, "learning_rate": 0.00021300898631926275, "loss": 2.9424, "step": 36340 }, { "epoch": 1.78, "grad_norm": 0.6212844848632812, "learning_rate": 0.00021299425172962528, "loss": 2.9297, "step": 36341 }, { "epoch": 1.78, "grad_norm": 0.6634481549263, "learning_rate": 0.00021297951736914402, "loss": 3.2482, "step": 36342 }, { "epoch": 1.78, "grad_norm": 0.6362781524658203, "learning_rate": 0.00021296478323785804, "loss": 3.0874, "step": 36343 }, { "epoch": 1.78, "grad_norm": 0.721335768699646, "learning_rate": 0.00021295004933580614, "loss": 2.8786, "step": 36344 }, { "epoch": 1.78, "grad_norm": 0.6178750395774841, "learning_rate": 0.00021293531566302692, "loss": 2.8094, "step": 36345 }, { "epoch": 1.78, "grad_norm": 0.6878019571304321, "learning_rate": 0.00021292058221955947, "loss": 2.9951, "step": 36346 }, { "epoch": 1.78, "grad_norm": 0.6131126284599304, "learning_rate": 0.0002129058490054424, "loss": 2.993, "step": 36347 }, { "epoch": 1.78, "grad_norm": 0.6379331946372986, "learning_rate": 0.00021289111602071448, "loss": 2.8931, "step": 36348 }, { "epoch": 1.78, "grad_norm": 0.6440445780754089, "learning_rate": 0.00021287638326541468, "loss": 3.142, "step": 36349 }, { "epoch": 1.78, "grad_norm": 0.6755915880203247, "learning_rate": 0.00021286165073958175, "loss": 3.0775, "step": 36350 }, { "epoch": 1.78, "grad_norm": 0.6246413588523865, "learning_rate": 0.0002128469184432545, "loss": 2.8214, "step": 36351 }, { "epoch": 1.78, "grad_norm": 0.6133143305778503, "learning_rate": 0.0002128321863764715, "loss": 2.7959, "step": 36352 }, { "epoch": 1.78, "grad_norm": 0.6247037649154663, "learning_rate": 0.0002128174545392718, "loss": 2.9561, "step": 36353 }, { "epoch": 1.78, "grad_norm": 0.6576348543167114, "learning_rate": 0.00021280272293169424, "loss": 3.043, "step": 36354 }, { "epoch": 1.78, "grad_norm": 0.633882462978363, "learning_rate": 0.00021278799155377746, "loss": 3.1632, "step": 36355 }, { "epoch": 1.78, "grad_norm": 0.6401931643486023, "learning_rate": 0.0002127732604055604, "loss": 3.1631, "step": 36356 }, { "epoch": 1.78, "grad_norm": 0.6456989645957947, "learning_rate": 0.0002127585294870817, "loss": 2.7861, "step": 36357 }, { "epoch": 1.78, "grad_norm": 0.5906917452812195, "learning_rate": 0.00021274379879838027, "loss": 3.2061, "step": 36358 }, { "epoch": 1.78, "grad_norm": 0.6478468179702759, "learning_rate": 0.0002127290683394949, "loss": 3.1233, "step": 36359 }, { "epoch": 1.78, "grad_norm": 0.643356204032898, "learning_rate": 0.00021271433811046432, "loss": 2.7415, "step": 36360 }, { "epoch": 1.78, "grad_norm": 0.651016116142273, "learning_rate": 0.00021269960811132747, "loss": 2.8069, "step": 36361 }, { "epoch": 1.78, "grad_norm": 0.6640231609344482, "learning_rate": 0.000212684878342123, "loss": 2.8503, "step": 36362 }, { "epoch": 1.78, "grad_norm": 0.6326684951782227, "learning_rate": 0.0002126701488028897, "loss": 2.9992, "step": 36363 }, { "epoch": 1.78, "grad_norm": 0.6268022656440735, "learning_rate": 0.00021265541949366652, "loss": 3.0261, "step": 36364 }, { "epoch": 1.78, "grad_norm": 0.6220657229423523, "learning_rate": 0.00021264069041449207, "loss": 2.7213, "step": 36365 }, { "epoch": 1.78, "grad_norm": 0.6234197616577148, "learning_rate": 0.00021262596156540535, "loss": 3.0385, "step": 36366 }, { "epoch": 1.78, "grad_norm": 0.6607543230056763, "learning_rate": 0.00021261123294644485, "loss": 3.0352, "step": 36367 }, { "epoch": 1.78, "grad_norm": 0.6178925037384033, "learning_rate": 0.00021259650455764966, "loss": 3.0404, "step": 36368 }, { "epoch": 1.78, "grad_norm": 0.6788184642791748, "learning_rate": 0.00021258177639905848, "loss": 2.9027, "step": 36369 }, { "epoch": 1.78, "grad_norm": 0.6631069779396057, "learning_rate": 0.00021256704847071, "loss": 3.1795, "step": 36370 }, { "epoch": 1.78, "grad_norm": 0.6293043494224548, "learning_rate": 0.00021255232077264314, "loss": 2.953, "step": 36371 }, { "epoch": 1.78, "grad_norm": 0.6410183906555176, "learning_rate": 0.00021253759330489656, "loss": 3.0083, "step": 36372 }, { "epoch": 1.78, "grad_norm": 0.6360601782798767, "learning_rate": 0.00021252286606750914, "loss": 3.0385, "step": 36373 }, { "epoch": 1.78, "grad_norm": 0.6407144069671631, "learning_rate": 0.00021250813906051978, "loss": 3.0849, "step": 36374 }, { "epoch": 1.78, "grad_norm": 0.5937209129333496, "learning_rate": 0.00021249341228396702, "loss": 2.8081, "step": 36375 }, { "epoch": 1.78, "grad_norm": 0.612413763999939, "learning_rate": 0.00021247868573788986, "loss": 3.133, "step": 36376 }, { "epoch": 1.78, "grad_norm": 0.6570436358451843, "learning_rate": 0.00021246395942232693, "loss": 3.0547, "step": 36377 }, { "epoch": 1.78, "grad_norm": 0.6165720820426941, "learning_rate": 0.00021244923333731702, "loss": 3.0068, "step": 36378 }, { "epoch": 1.78, "grad_norm": 0.6688787341117859, "learning_rate": 0.00021243450748289916, "loss": 2.9723, "step": 36379 }, { "epoch": 1.78, "grad_norm": 0.6865296363830566, "learning_rate": 0.00021241978185911184, "loss": 2.8357, "step": 36380 }, { "epoch": 1.78, "grad_norm": 0.6525271534919739, "learning_rate": 0.00021240505646599405, "loss": 3.1706, "step": 36381 }, { "epoch": 1.78, "grad_norm": 0.6239628195762634, "learning_rate": 0.00021239033130358433, "loss": 3.0773, "step": 36382 }, { "epoch": 1.78, "grad_norm": 0.6260185241699219, "learning_rate": 0.00021237560637192178, "loss": 2.9808, "step": 36383 }, { "epoch": 1.78, "grad_norm": 0.6808270812034607, "learning_rate": 0.00021236088167104494, "loss": 2.7452, "step": 36384 }, { "epoch": 1.78, "grad_norm": 0.6365841031074524, "learning_rate": 0.00021234615720099257, "loss": 3.1358, "step": 36385 }, { "epoch": 1.78, "grad_norm": 0.6161235570907593, "learning_rate": 0.00021233143296180374, "loss": 3.1963, "step": 36386 }, { "epoch": 1.78, "grad_norm": 0.6361461281776428, "learning_rate": 0.00021231670895351695, "loss": 3.1814, "step": 36387 }, { "epoch": 1.78, "grad_norm": 0.6474431157112122, "learning_rate": 0.00021230198517617114, "loss": 2.9853, "step": 36388 }, { "epoch": 1.78, "grad_norm": 0.679755687713623, "learning_rate": 0.0002122872616298049, "loss": 3.062, "step": 36389 }, { "epoch": 1.78, "grad_norm": 0.6086769700050354, "learning_rate": 0.00021227253831445717, "loss": 3.1973, "step": 36390 }, { "epoch": 1.78, "grad_norm": 0.6710407733917236, "learning_rate": 0.0002122578152301668, "loss": 2.8719, "step": 36391 }, { "epoch": 1.78, "grad_norm": 0.664715051651001, "learning_rate": 0.0002122430923769723, "loss": 2.9214, "step": 36392 }, { "epoch": 1.78, "grad_norm": 0.625869631767273, "learning_rate": 0.00021222836975491274, "loss": 3.0165, "step": 36393 }, { "epoch": 1.78, "grad_norm": 0.6978575587272644, "learning_rate": 0.00021221364736402664, "loss": 3.0272, "step": 36394 }, { "epoch": 1.78, "grad_norm": 0.6914448142051697, "learning_rate": 0.00021219892520435294, "loss": 2.8926, "step": 36395 }, { "epoch": 1.78, "grad_norm": 0.6650540828704834, "learning_rate": 0.00021218420327593043, "loss": 3.0207, "step": 36396 }, { "epoch": 1.78, "grad_norm": 0.6832113862037659, "learning_rate": 0.00021216948157879776, "loss": 2.9073, "step": 36397 }, { "epoch": 1.78, "grad_norm": 0.6525067687034607, "learning_rate": 0.00021215476011299383, "loss": 2.9059, "step": 36398 }, { "epoch": 1.78, "grad_norm": 0.6345611214637756, "learning_rate": 0.00021214003887855737, "loss": 2.8414, "step": 36399 }, { "epoch": 1.78, "grad_norm": 0.6637008786201477, "learning_rate": 0.000212125317875527, "loss": 2.9859, "step": 36400 }, { "epoch": 1.78, "grad_norm": 0.6102834343910217, "learning_rate": 0.00021211059710394178, "loss": 3.1545, "step": 36401 }, { "epoch": 1.78, "grad_norm": 0.6296905875205994, "learning_rate": 0.0002120958765638403, "loss": 2.6042, "step": 36402 }, { "epoch": 1.78, "grad_norm": 0.6705476641654968, "learning_rate": 0.00021208115625526142, "loss": 2.9978, "step": 36403 }, { "epoch": 1.78, "grad_norm": 0.6274638772010803, "learning_rate": 0.0002120664361782437, "loss": 3.0183, "step": 36404 }, { "epoch": 1.78, "grad_norm": 0.6929411888122559, "learning_rate": 0.00021205171633282614, "loss": 3.3224, "step": 36405 }, { "epoch": 1.78, "grad_norm": 0.6245270371437073, "learning_rate": 0.0002120369967190475, "loss": 3.1516, "step": 36406 }, { "epoch": 1.78, "grad_norm": 0.6495602130889893, "learning_rate": 0.00021202227733694635, "loss": 2.9227, "step": 36407 }, { "epoch": 1.78, "grad_norm": 0.6302369832992554, "learning_rate": 0.0002120075581865617, "loss": 3.09, "step": 36408 }, { "epoch": 1.78, "grad_norm": 0.6170496344566345, "learning_rate": 0.00021199283926793218, "loss": 2.8749, "step": 36409 }, { "epoch": 1.78, "grad_norm": 0.6352107524871826, "learning_rate": 0.00021197812058109652, "loss": 2.9232, "step": 36410 }, { "epoch": 1.78, "grad_norm": 0.6431768536567688, "learning_rate": 0.00021196340212609362, "loss": 3.116, "step": 36411 }, { "epoch": 1.78, "grad_norm": 0.6413148045539856, "learning_rate": 0.00021194868390296217, "loss": 3.0991, "step": 36412 }, { "epoch": 1.78, "grad_norm": 0.607494592666626, "learning_rate": 0.00021193396591174098, "loss": 3.0078, "step": 36413 }, { "epoch": 1.78, "grad_norm": 0.6016876101493835, "learning_rate": 0.00021191924815246865, "loss": 2.9614, "step": 36414 }, { "epoch": 1.78, "grad_norm": 0.6435602903366089, "learning_rate": 0.00021190453062518407, "loss": 2.8346, "step": 36415 }, { "epoch": 1.78, "grad_norm": 0.6488229036331177, "learning_rate": 0.00021188981332992613, "loss": 2.989, "step": 36416 }, { "epoch": 1.78, "grad_norm": 0.623367428779602, "learning_rate": 0.00021187509626673342, "loss": 3.1408, "step": 36417 }, { "epoch": 1.78, "grad_norm": 0.6525915265083313, "learning_rate": 0.0002118603794356448, "loss": 3.0817, "step": 36418 }, { "epoch": 1.78, "grad_norm": 0.6700499653816223, "learning_rate": 0.00021184566283669883, "loss": 3.0911, "step": 36419 }, { "epoch": 1.78, "grad_norm": 0.6590023636817932, "learning_rate": 0.0002118309464699345, "loss": 2.8538, "step": 36420 }, { "epoch": 1.78, "grad_norm": 0.6719303131103516, "learning_rate": 0.00021181623033539052, "loss": 3.0935, "step": 36421 }, { "epoch": 1.78, "grad_norm": 0.6459015607833862, "learning_rate": 0.00021180151443310556, "loss": 2.809, "step": 36422 }, { "epoch": 1.79, "grad_norm": 0.6407613158226013, "learning_rate": 0.0002117867987631185, "loss": 2.7572, "step": 36423 }, { "epoch": 1.79, "grad_norm": 0.6750385165214539, "learning_rate": 0.000211772083325468, "loss": 2.9058, "step": 36424 }, { "epoch": 1.79, "grad_norm": 0.6313875317573547, "learning_rate": 0.00021175736812019275, "loss": 3.0272, "step": 36425 }, { "epoch": 1.79, "grad_norm": 0.6446053385734558, "learning_rate": 0.0002117426531473318, "loss": 2.9416, "step": 36426 }, { "epoch": 1.79, "grad_norm": 0.6613894104957581, "learning_rate": 0.00021172793840692358, "loss": 2.9213, "step": 36427 }, { "epoch": 1.79, "grad_norm": 0.6796953082084656, "learning_rate": 0.00021171322389900715, "loss": 2.9999, "step": 36428 }, { "epoch": 1.79, "grad_norm": 0.6764907836914062, "learning_rate": 0.00021169850962362088, "loss": 2.9982, "step": 36429 }, { "epoch": 1.79, "grad_norm": 0.6619737148284912, "learning_rate": 0.00021168379558080385, "loss": 2.8365, "step": 36430 }, { "epoch": 1.79, "grad_norm": 0.6781612038612366, "learning_rate": 0.00021166908177059474, "loss": 2.7274, "step": 36431 }, { "epoch": 1.79, "grad_norm": 0.6520367860794067, "learning_rate": 0.00021165436819303214, "loss": 3.0871, "step": 36432 }, { "epoch": 1.79, "grad_norm": 0.6807015538215637, "learning_rate": 0.00021163965484815507, "loss": 3.1435, "step": 36433 }, { "epoch": 1.79, "grad_norm": 0.688068687915802, "learning_rate": 0.00021162494173600202, "loss": 3.1147, "step": 36434 }, { "epoch": 1.79, "grad_norm": 0.6110426187515259, "learning_rate": 0.00021161022885661195, "loss": 2.9672, "step": 36435 }, { "epoch": 1.79, "grad_norm": 0.6746833920478821, "learning_rate": 0.00021159551621002356, "loss": 2.9923, "step": 36436 }, { "epoch": 1.79, "grad_norm": 0.6675704717636108, "learning_rate": 0.00021158080379627547, "loss": 3.146, "step": 36437 }, { "epoch": 1.79, "grad_norm": 0.6473252773284912, "learning_rate": 0.00021156609161540664, "loss": 2.9734, "step": 36438 }, { "epoch": 1.79, "grad_norm": 0.6055066585540771, "learning_rate": 0.0002115513796674556, "loss": 2.9343, "step": 36439 }, { "epoch": 1.79, "grad_norm": 0.6825117468833923, "learning_rate": 0.00021153666795246118, "loss": 2.9982, "step": 36440 }, { "epoch": 1.79, "grad_norm": 0.7194039821624756, "learning_rate": 0.00021152195647046226, "loss": 3.1512, "step": 36441 }, { "epoch": 1.79, "grad_norm": 0.6608765125274658, "learning_rate": 0.00021150724522149742, "loss": 2.9463, "step": 36442 }, { "epoch": 1.79, "grad_norm": 0.6718124747276306, "learning_rate": 0.00021149253420560556, "loss": 2.7968, "step": 36443 }, { "epoch": 1.79, "grad_norm": 0.6314850449562073, "learning_rate": 0.00021147782342282519, "loss": 2.8741, "step": 36444 }, { "epoch": 1.79, "grad_norm": 0.6046271324157715, "learning_rate": 0.00021146311287319522, "loss": 3.0566, "step": 36445 }, { "epoch": 1.79, "grad_norm": 0.6407644152641296, "learning_rate": 0.00021144840255675446, "loss": 2.9809, "step": 36446 }, { "epoch": 1.79, "grad_norm": 0.632928192615509, "learning_rate": 0.00021143369247354147, "loss": 3.0315, "step": 36447 }, { "epoch": 1.79, "grad_norm": 0.6315308213233948, "learning_rate": 0.00021141898262359515, "loss": 3.1262, "step": 36448 }, { "epoch": 1.79, "grad_norm": 0.6301126480102539, "learning_rate": 0.00021140427300695417, "loss": 2.8378, "step": 36449 }, { "epoch": 1.79, "grad_norm": 0.6344707012176514, "learning_rate": 0.00021138956362365722, "loss": 3.2058, "step": 36450 }, { "epoch": 1.79, "grad_norm": 0.6632879972457886, "learning_rate": 0.00021137485447374324, "loss": 3.1878, "step": 36451 }, { "epoch": 1.79, "grad_norm": 0.5994957089424133, "learning_rate": 0.00021136014555725075, "loss": 3.1941, "step": 36452 }, { "epoch": 1.79, "grad_norm": 0.5996524691581726, "learning_rate": 0.00021134543687421865, "loss": 3.1972, "step": 36453 }, { "epoch": 1.79, "grad_norm": 0.6582194566726685, "learning_rate": 0.00021133072842468545, "loss": 2.8146, "step": 36454 }, { "epoch": 1.79, "grad_norm": 0.6702811121940613, "learning_rate": 0.00021131602020869013, "loss": 3.0313, "step": 36455 }, { "epoch": 1.79, "grad_norm": 0.6455747485160828, "learning_rate": 0.0002113013122262714, "loss": 3.1189, "step": 36456 }, { "epoch": 1.79, "grad_norm": 0.6200703382492065, "learning_rate": 0.00021128660447746787, "loss": 2.7576, "step": 36457 }, { "epoch": 1.79, "grad_norm": 0.6644160747528076, "learning_rate": 0.00021127189696231846, "loss": 3.0674, "step": 36458 }, { "epoch": 1.79, "grad_norm": 0.6624540090560913, "learning_rate": 0.00021125718968086163, "loss": 2.9791, "step": 36459 }, { "epoch": 1.79, "grad_norm": 0.6029409766197205, "learning_rate": 0.00021124248263313644, "loss": 3.0949, "step": 36460 }, { "epoch": 1.79, "grad_norm": 0.6144557595252991, "learning_rate": 0.0002112277758191814, "loss": 3.1555, "step": 36461 }, { "epoch": 1.79, "grad_norm": 0.6617509722709656, "learning_rate": 0.00021121306923903524, "loss": 2.9906, "step": 36462 }, { "epoch": 1.79, "grad_norm": 0.6040384769439697, "learning_rate": 0.0002111983628927369, "loss": 2.941, "step": 36463 }, { "epoch": 1.79, "grad_norm": 0.6424322724342346, "learning_rate": 0.0002111836567803249, "loss": 3.1002, "step": 36464 }, { "epoch": 1.79, "grad_norm": 0.6235278844833374, "learning_rate": 0.00021116895090183817, "loss": 3.0474, "step": 36465 }, { "epoch": 1.79, "grad_norm": 0.6009097099304199, "learning_rate": 0.00021115424525731515, "loss": 3.0715, "step": 36466 }, { "epoch": 1.79, "grad_norm": 0.6081644892692566, "learning_rate": 0.00021113953984679484, "loss": 3.1374, "step": 36467 }, { "epoch": 1.79, "grad_norm": 0.6132400631904602, "learning_rate": 0.00021112483467031594, "loss": 2.8668, "step": 36468 }, { "epoch": 1.79, "grad_norm": 0.6370624303817749, "learning_rate": 0.000211110129727917, "loss": 2.8138, "step": 36469 }, { "epoch": 1.79, "grad_norm": 0.6113362908363342, "learning_rate": 0.00021109542501963695, "loss": 3.1049, "step": 36470 }, { "epoch": 1.79, "grad_norm": 0.6134870052337646, "learning_rate": 0.00021108072054551444, "loss": 3.051, "step": 36471 }, { "epoch": 1.79, "grad_norm": 0.6292585730552673, "learning_rate": 0.0002110660163055881, "loss": 2.9592, "step": 36472 }, { "epoch": 1.79, "grad_norm": 0.6173241138458252, "learning_rate": 0.0002110513122998969, "loss": 3.2329, "step": 36473 }, { "epoch": 1.79, "grad_norm": 0.6382222771644592, "learning_rate": 0.00021103660852847937, "loss": 2.9698, "step": 36474 }, { "epoch": 1.79, "grad_norm": 0.6229817867279053, "learning_rate": 0.00021102190499137433, "loss": 2.9552, "step": 36475 }, { "epoch": 1.79, "grad_norm": 0.6382546424865723, "learning_rate": 0.00021100720168862038, "loss": 3.0138, "step": 36476 }, { "epoch": 1.79, "grad_norm": 0.63431316614151, "learning_rate": 0.0002109924986202563, "loss": 2.7953, "step": 36477 }, { "epoch": 1.79, "grad_norm": 0.629733681678772, "learning_rate": 0.00021097779578632095, "loss": 3.2056, "step": 36478 }, { "epoch": 1.79, "grad_norm": 0.6950914859771729, "learning_rate": 0.00021096309318685292, "loss": 3.1653, "step": 36479 }, { "epoch": 1.79, "grad_norm": 0.6350083351135254, "learning_rate": 0.00021094839082189104, "loss": 2.9533, "step": 36480 }, { "epoch": 1.79, "grad_norm": 0.6433498859405518, "learning_rate": 0.00021093368869147384, "loss": 2.9834, "step": 36481 }, { "epoch": 1.79, "grad_norm": 0.6216697692871094, "learning_rate": 0.0002109189867956402, "loss": 3.0459, "step": 36482 }, { "epoch": 1.79, "grad_norm": 0.6351829767227173, "learning_rate": 0.00021090428513442888, "loss": 3.0169, "step": 36483 }, { "epoch": 1.79, "grad_norm": 0.6634454727172852, "learning_rate": 0.00021088958370787838, "loss": 2.9469, "step": 36484 }, { "epoch": 1.79, "grad_norm": 0.6411239504814148, "learning_rate": 0.00021087488251602776, "loss": 3.1319, "step": 36485 }, { "epoch": 1.79, "grad_norm": 0.6567073464393616, "learning_rate": 0.00021086018155891541, "loss": 2.8596, "step": 36486 }, { "epoch": 1.79, "grad_norm": 0.6086127758026123, "learning_rate": 0.00021084548083658017, "loss": 3.0581, "step": 36487 }, { "epoch": 1.79, "grad_norm": 0.6434993743896484, "learning_rate": 0.00021083078034906085, "loss": 3.1005, "step": 36488 }, { "epoch": 1.79, "grad_norm": 0.685982882976532, "learning_rate": 0.0002108160800963961, "loss": 3.0861, "step": 36489 }, { "epoch": 1.79, "grad_norm": 0.6597147583961487, "learning_rate": 0.00021080138007862465, "loss": 3.0276, "step": 36490 }, { "epoch": 1.79, "grad_norm": 0.6205618977546692, "learning_rate": 0.00021078668029578506, "loss": 3.1633, "step": 36491 }, { "epoch": 1.79, "grad_norm": 0.6635233759880066, "learning_rate": 0.00021077198074791626, "loss": 2.885, "step": 36492 }, { "epoch": 1.79, "grad_norm": 0.6489311456680298, "learning_rate": 0.00021075728143505698, "loss": 2.8726, "step": 36493 }, { "epoch": 1.79, "grad_norm": 0.6681040525436401, "learning_rate": 0.0002107425823572457, "loss": 2.8963, "step": 36494 }, { "epoch": 1.79, "grad_norm": 0.6183825731277466, "learning_rate": 0.0002107278835145214, "loss": 3.2596, "step": 36495 }, { "epoch": 1.79, "grad_norm": 0.9229050278663635, "learning_rate": 0.00021071318490692255, "loss": 3.1778, "step": 36496 }, { "epoch": 1.79, "grad_norm": 0.6755209565162659, "learning_rate": 0.00021069848653448804, "loss": 2.8989, "step": 36497 }, { "epoch": 1.79, "grad_norm": 0.6122171878814697, "learning_rate": 0.00021068378839725663, "loss": 3.1738, "step": 36498 }, { "epoch": 1.79, "grad_norm": 0.6368758082389832, "learning_rate": 0.00021066909049526677, "loss": 2.9932, "step": 36499 }, { "epoch": 1.79, "grad_norm": 0.6233503222465515, "learning_rate": 0.00021065439282855748, "loss": 3.0565, "step": 36500 }, { "epoch": 1.79, "grad_norm": 0.6276006698608398, "learning_rate": 0.00021063969539716727, "loss": 2.7239, "step": 36501 }, { "epoch": 1.79, "grad_norm": 0.6331776976585388, "learning_rate": 0.0002106249982011348, "loss": 2.9001, "step": 36502 }, { "epoch": 1.79, "grad_norm": 0.6143416166305542, "learning_rate": 0.00021061030124049903, "loss": 3.2166, "step": 36503 }, { "epoch": 1.79, "grad_norm": 0.6579669713973999, "learning_rate": 0.0002105956045152985, "loss": 2.8755, "step": 36504 }, { "epoch": 1.79, "grad_norm": 0.6055128574371338, "learning_rate": 0.00021058090802557198, "loss": 2.7474, "step": 36505 }, { "epoch": 1.79, "grad_norm": 0.6531729102134705, "learning_rate": 0.000210566211771358, "loss": 3.0321, "step": 36506 }, { "epoch": 1.79, "grad_norm": 0.6186026334762573, "learning_rate": 0.00021055151575269547, "loss": 3.0292, "step": 36507 }, { "epoch": 1.79, "grad_norm": 0.6538664102554321, "learning_rate": 0.0002105368199696231, "loss": 3.0134, "step": 36508 }, { "epoch": 1.79, "grad_norm": 0.5929544568061829, "learning_rate": 0.0002105221244221794, "loss": 3.0028, "step": 36509 }, { "epoch": 1.79, "grad_norm": 0.6697933673858643, "learning_rate": 0.0002105074291104033, "loss": 2.9312, "step": 36510 }, { "epoch": 1.79, "grad_norm": 0.6528447270393372, "learning_rate": 0.00021049273403433337, "loss": 2.8436, "step": 36511 }, { "epoch": 1.79, "grad_norm": 0.6155452728271484, "learning_rate": 0.0002104780391940083, "loss": 3.1464, "step": 36512 }, { "epoch": 1.79, "grad_norm": 0.6547124981880188, "learning_rate": 0.00021046334458946697, "loss": 2.8919, "step": 36513 }, { "epoch": 1.79, "grad_norm": 0.6116988658905029, "learning_rate": 0.0002104486502207479, "loss": 3.0086, "step": 36514 }, { "epoch": 1.79, "grad_norm": 0.6030555367469788, "learning_rate": 0.00021043395608788993, "loss": 3.0789, "step": 36515 }, { "epoch": 1.79, "grad_norm": 0.6490576267242432, "learning_rate": 0.0002104192621909315, "loss": 2.8825, "step": 36516 }, { "epoch": 1.79, "grad_norm": 0.643524169921875, "learning_rate": 0.00021040456852991157, "loss": 2.931, "step": 36517 }, { "epoch": 1.79, "grad_norm": 0.6385647654533386, "learning_rate": 0.0002103898751048688, "loss": 3.002, "step": 36518 }, { "epoch": 1.79, "grad_norm": 0.6039292812347412, "learning_rate": 0.00021037518191584188, "loss": 3.1068, "step": 36519 }, { "epoch": 1.79, "grad_norm": 0.6315106153488159, "learning_rate": 0.0002103604889628695, "loss": 3.1007, "step": 36520 }, { "epoch": 1.79, "grad_norm": 0.5782997012138367, "learning_rate": 0.0002103457962459902, "loss": 3.0973, "step": 36521 }, { "epoch": 1.79, "grad_norm": 0.6795260310173035, "learning_rate": 0.00021033110376524291, "loss": 3.1494, "step": 36522 }, { "epoch": 1.79, "grad_norm": 0.6649677157402039, "learning_rate": 0.00021031641152066628, "loss": 3.0697, "step": 36523 }, { "epoch": 1.79, "grad_norm": 0.6292036771774292, "learning_rate": 0.0002103017195122988, "loss": 3.1645, "step": 36524 }, { "epoch": 1.79, "grad_norm": 0.6512131690979004, "learning_rate": 0.00021028702774017954, "loss": 2.9064, "step": 36525 }, { "epoch": 1.79, "grad_norm": 0.6254218816757202, "learning_rate": 0.00021027233620434687, "loss": 3.1291, "step": 36526 }, { "epoch": 1.79, "grad_norm": 0.6537070274353027, "learning_rate": 0.00021025764490483956, "loss": 3.0194, "step": 36527 }, { "epoch": 1.79, "grad_norm": 0.6867677569389343, "learning_rate": 0.00021024295384169646, "loss": 2.7768, "step": 36528 }, { "epoch": 1.79, "grad_norm": 0.6541476249694824, "learning_rate": 0.00021022826301495605, "loss": 2.8653, "step": 36529 }, { "epoch": 1.79, "grad_norm": 0.6634353995323181, "learning_rate": 0.0002102135724246572, "loss": 3.1185, "step": 36530 }, { "epoch": 1.79, "grad_norm": 0.6761667728424072, "learning_rate": 0.0002101988820708384, "loss": 2.9844, "step": 36531 }, { "epoch": 1.79, "grad_norm": 0.6391075849533081, "learning_rate": 0.00021018419195353852, "loss": 2.9843, "step": 36532 }, { "epoch": 1.79, "grad_norm": 0.6631091833114624, "learning_rate": 0.00021016950207279627, "loss": 2.9588, "step": 36533 }, { "epoch": 1.79, "grad_norm": 0.6815654039382935, "learning_rate": 0.00021015481242865012, "loss": 2.9905, "step": 36534 }, { "epoch": 1.79, "grad_norm": 0.6456697583198547, "learning_rate": 0.00021014012302113903, "loss": 3.0181, "step": 36535 }, { "epoch": 1.79, "grad_norm": 0.6634407639503479, "learning_rate": 0.00021012543385030147, "loss": 3.1435, "step": 36536 }, { "epoch": 1.79, "grad_norm": 0.678929328918457, "learning_rate": 0.00021011074491617636, "loss": 2.8749, "step": 36537 }, { "epoch": 1.79, "grad_norm": 0.6539340019226074, "learning_rate": 0.00021009605621880208, "loss": 2.8567, "step": 36538 }, { "epoch": 1.79, "grad_norm": 0.6301677227020264, "learning_rate": 0.00021008136775821752, "loss": 3.1227, "step": 36539 }, { "epoch": 1.79, "grad_norm": 0.6353392601013184, "learning_rate": 0.0002100666795344614, "loss": 3.1078, "step": 36540 }, { "epoch": 1.79, "grad_norm": 0.6301056742668152, "learning_rate": 0.0002100519915475723, "loss": 3.1015, "step": 36541 }, { "epoch": 1.79, "grad_norm": 0.6307017803192139, "learning_rate": 0.000210037303797589, "loss": 2.948, "step": 36542 }, { "epoch": 1.79, "grad_norm": 0.5895671248435974, "learning_rate": 0.00021002261628455, "loss": 3.1825, "step": 36543 }, { "epoch": 1.79, "grad_norm": 0.6554360389709473, "learning_rate": 0.00021000792900849422, "loss": 3.0251, "step": 36544 }, { "epoch": 1.79, "grad_norm": 0.6623850464820862, "learning_rate": 0.00020999324196946026, "loss": 2.9006, "step": 36545 }, { "epoch": 1.79, "grad_norm": 0.6320748329162598, "learning_rate": 0.00020997855516748665, "loss": 2.932, "step": 36546 }, { "epoch": 1.79, "grad_norm": 0.655175507068634, "learning_rate": 0.0002099638686026123, "loss": 2.8594, "step": 36547 }, { "epoch": 1.79, "grad_norm": 0.719323456287384, "learning_rate": 0.00020994918227487577, "loss": 3.0602, "step": 36548 }, { "epoch": 1.79, "grad_norm": 0.6324592232704163, "learning_rate": 0.0002099344961843157, "loss": 2.763, "step": 36549 }, { "epoch": 1.79, "grad_norm": 0.5989832282066345, "learning_rate": 0.00020991981033097095, "loss": 2.6408, "step": 36550 }, { "epoch": 1.79, "grad_norm": 0.6369208693504333, "learning_rate": 0.00020990512471488003, "loss": 3.09, "step": 36551 }, { "epoch": 1.79, "grad_norm": 0.6124125123023987, "learning_rate": 0.00020989043933608169, "loss": 3.0194, "step": 36552 }, { "epoch": 1.79, "grad_norm": 0.6564167141914368, "learning_rate": 0.00020987575419461452, "loss": 3.2123, "step": 36553 }, { "epoch": 1.79, "grad_norm": 0.6500516533851624, "learning_rate": 0.00020986106929051732, "loss": 3.0145, "step": 36554 }, { "epoch": 1.79, "grad_norm": 0.6239467859268188, "learning_rate": 0.00020984638462382877, "loss": 3.0401, "step": 36555 }, { "epoch": 1.79, "grad_norm": 0.640312910079956, "learning_rate": 0.00020983170019458737, "loss": 3.0706, "step": 36556 }, { "epoch": 1.79, "grad_norm": 0.6156535744667053, "learning_rate": 0.00020981701600283204, "loss": 2.9437, "step": 36557 }, { "epoch": 1.79, "grad_norm": 0.6147194504737854, "learning_rate": 0.00020980233204860122, "loss": 3.0339, "step": 36558 }, { "epoch": 1.79, "grad_norm": 0.64543616771698, "learning_rate": 0.00020978764833193376, "loss": 2.9397, "step": 36559 }, { "epoch": 1.79, "grad_norm": 0.627521276473999, "learning_rate": 0.0002097729648528683, "loss": 2.9861, "step": 36560 }, { "epoch": 1.79, "grad_norm": 0.6274319887161255, "learning_rate": 0.0002097582816114434, "loss": 2.9149, "step": 36561 }, { "epoch": 1.79, "grad_norm": 0.6580536961555481, "learning_rate": 0.00020974359860769792, "loss": 2.9476, "step": 36562 }, { "epoch": 1.79, "grad_norm": 0.6078628301620483, "learning_rate": 0.0002097289158416704, "loss": 2.911, "step": 36563 }, { "epoch": 1.79, "grad_norm": 0.6547374725341797, "learning_rate": 0.00020971423331339945, "loss": 3.0069, "step": 36564 }, { "epoch": 1.79, "grad_norm": 0.6162861585617065, "learning_rate": 0.00020969955102292394, "loss": 2.9177, "step": 36565 }, { "epoch": 1.79, "grad_norm": 0.6419432163238525, "learning_rate": 0.00020968486897028242, "loss": 3.0997, "step": 36566 }, { "epoch": 1.79, "grad_norm": 0.6692230105400085, "learning_rate": 0.00020967018715551362, "loss": 3.0527, "step": 36567 }, { "epoch": 1.79, "grad_norm": 0.6301555633544922, "learning_rate": 0.00020965550557865606, "loss": 3.1243, "step": 36568 }, { "epoch": 1.79, "grad_norm": 0.6723204851150513, "learning_rate": 0.00020964082423974853, "loss": 3.1511, "step": 36569 }, { "epoch": 1.79, "grad_norm": 0.6410068273544312, "learning_rate": 0.00020962614313882978, "loss": 3.0315, "step": 36570 }, { "epoch": 1.79, "grad_norm": 0.6543024182319641, "learning_rate": 0.00020961146227593821, "loss": 2.8805, "step": 36571 }, { "epoch": 1.79, "grad_norm": 0.6214228868484497, "learning_rate": 0.00020959678165111283, "loss": 2.9974, "step": 36572 }, { "epoch": 1.79, "grad_norm": 0.5998808145523071, "learning_rate": 0.00020958210126439204, "loss": 3.0079, "step": 36573 }, { "epoch": 1.79, "grad_norm": 0.6029256582260132, "learning_rate": 0.00020956742111581447, "loss": 2.9974, "step": 36574 }, { "epoch": 1.79, "grad_norm": 0.6017154455184937, "learning_rate": 0.00020955274120541915, "loss": 3.0842, "step": 36575 }, { "epoch": 1.79, "grad_norm": 0.6353750228881836, "learning_rate": 0.00020953806153324437, "loss": 2.8448, "step": 36576 }, { "epoch": 1.79, "grad_norm": 0.6240317821502686, "learning_rate": 0.000209523382099329, "loss": 3.0912, "step": 36577 }, { "epoch": 1.79, "grad_norm": 0.6466373205184937, "learning_rate": 0.0002095087029037115, "loss": 2.9799, "step": 36578 }, { "epoch": 1.79, "grad_norm": 0.7123924493789673, "learning_rate": 0.00020949402394643062, "loss": 3.0003, "step": 36579 }, { "epoch": 1.79, "grad_norm": 0.6275160312652588, "learning_rate": 0.00020947934522752524, "loss": 2.9326, "step": 36580 }, { "epoch": 1.79, "grad_norm": 0.5951224565505981, "learning_rate": 0.00020946466674703378, "loss": 2.9995, "step": 36581 }, { "epoch": 1.79, "grad_norm": 0.6290155053138733, "learning_rate": 0.00020944998850499501, "loss": 2.9477, "step": 36582 }, { "epoch": 1.79, "grad_norm": 0.645408570766449, "learning_rate": 0.00020943531050144745, "loss": 3.0292, "step": 36583 }, { "epoch": 1.79, "grad_norm": 0.6126695871353149, "learning_rate": 0.00020942063273642988, "loss": 2.7644, "step": 36584 }, { "epoch": 1.79, "grad_norm": 0.6352578401565552, "learning_rate": 0.000209405955209981, "loss": 2.9679, "step": 36585 }, { "epoch": 1.79, "grad_norm": 0.6484764218330383, "learning_rate": 0.00020939127792213928, "loss": 3.058, "step": 36586 }, { "epoch": 1.79, "grad_norm": 0.6158818602561951, "learning_rate": 0.00020937660087294366, "loss": 3.0141, "step": 36587 }, { "epoch": 1.79, "grad_norm": 0.6046254634857178, "learning_rate": 0.0002093619240624325, "loss": 3.0229, "step": 36588 }, { "epoch": 1.79, "grad_norm": 0.6500994563102722, "learning_rate": 0.00020934724749064457, "loss": 2.7405, "step": 36589 }, { "epoch": 1.79, "grad_norm": 0.6411953568458557, "learning_rate": 0.00020933257115761866, "loss": 2.9773, "step": 36590 }, { "epoch": 1.79, "grad_norm": 0.6058011651039124, "learning_rate": 0.00020931789506339324, "loss": 3.0519, "step": 36591 }, { "epoch": 1.79, "grad_norm": 0.6400535702705383, "learning_rate": 0.0002093032192080071, "loss": 2.8411, "step": 36592 }, { "epoch": 1.79, "grad_norm": 0.6149111986160278, "learning_rate": 0.0002092885435914987, "loss": 3.0371, "step": 36593 }, { "epoch": 1.79, "grad_norm": 0.64310222864151, "learning_rate": 0.00020927386821390688, "loss": 2.8115, "step": 36594 }, { "epoch": 1.79, "grad_norm": 0.620502769947052, "learning_rate": 0.0002092591930752703, "loss": 2.8578, "step": 36595 }, { "epoch": 1.79, "grad_norm": 0.6260932683944702, "learning_rate": 0.00020924451817562738, "loss": 3.0976, "step": 36596 }, { "epoch": 1.79, "grad_norm": 0.6364988088607788, "learning_rate": 0.00020922984351501707, "loss": 3.0012, "step": 36597 }, { "epoch": 1.79, "grad_norm": 0.6061363220214844, "learning_rate": 0.00020921516909347778, "loss": 3.0253, "step": 36598 }, { "epoch": 1.79, "grad_norm": 0.6426119804382324, "learning_rate": 0.00020920049491104833, "loss": 3.0864, "step": 36599 }, { "epoch": 1.79, "grad_norm": 0.6786094903945923, "learning_rate": 0.00020918582096776737, "loss": 2.8078, "step": 36600 }, { "epoch": 1.79, "grad_norm": 0.60618656873703, "learning_rate": 0.0002091711472636733, "loss": 2.9329, "step": 36601 }, { "epoch": 1.79, "grad_norm": 0.6546763777732849, "learning_rate": 0.00020915647379880515, "loss": 3.0702, "step": 36602 }, { "epoch": 1.79, "grad_norm": 0.6468024849891663, "learning_rate": 0.00020914180057320127, "loss": 3.0346, "step": 36603 }, { "epoch": 1.79, "grad_norm": 0.6212300062179565, "learning_rate": 0.00020912712758690033, "loss": 3.0142, "step": 36604 }, { "epoch": 1.79, "grad_norm": 0.6541531682014465, "learning_rate": 0.00020911245483994117, "loss": 2.9084, "step": 36605 }, { "epoch": 1.79, "grad_norm": 0.680400550365448, "learning_rate": 0.00020909778233236224, "loss": 2.9649, "step": 36606 }, { "epoch": 1.79, "grad_norm": 0.6672208309173584, "learning_rate": 0.00020908311006420234, "loss": 2.812, "step": 36607 }, { "epoch": 1.79, "grad_norm": 0.6206700205802917, "learning_rate": 0.0002090684380354999, "loss": 3.1004, "step": 36608 }, { "epoch": 1.79, "grad_norm": 0.6560788154602051, "learning_rate": 0.00020905376624629377, "loss": 2.8389, "step": 36609 }, { "epoch": 1.79, "grad_norm": 0.61861652135849, "learning_rate": 0.00020903909469662253, "loss": 3.2776, "step": 36610 }, { "epoch": 1.79, "grad_norm": 0.6285805702209473, "learning_rate": 0.00020902442338652474, "loss": 3.0981, "step": 36611 }, { "epoch": 1.79, "grad_norm": 0.6283038258552551, "learning_rate": 0.0002090097523160392, "loss": 2.9732, "step": 36612 }, { "epoch": 1.79, "grad_norm": 0.6750781536102295, "learning_rate": 0.00020899508148520442, "loss": 2.8903, "step": 36613 }, { "epoch": 1.79, "grad_norm": 0.6676583886146545, "learning_rate": 0.00020898041089405912, "loss": 3.013, "step": 36614 }, { "epoch": 1.79, "grad_norm": 0.6667542457580566, "learning_rate": 0.00020896574054264183, "loss": 2.9005, "step": 36615 }, { "epoch": 1.79, "grad_norm": 0.6553710699081421, "learning_rate": 0.00020895107043099125, "loss": 2.7584, "step": 36616 }, { "epoch": 1.79, "grad_norm": 0.6519506573677063, "learning_rate": 0.00020893640055914613, "loss": 3.0171, "step": 36617 }, { "epoch": 1.79, "grad_norm": 0.6207144260406494, "learning_rate": 0.00020892173092714491, "loss": 2.9434, "step": 36618 }, { "epoch": 1.79, "grad_norm": 0.6540098190307617, "learning_rate": 0.0002089070615350264, "loss": 3.1374, "step": 36619 }, { "epoch": 1.79, "grad_norm": 0.6426064372062683, "learning_rate": 0.00020889239238282902, "loss": 2.8674, "step": 36620 }, { "epoch": 1.79, "grad_norm": 0.6309428811073303, "learning_rate": 0.00020887772347059165, "loss": 2.9389, "step": 36621 }, { "epoch": 1.79, "grad_norm": 0.6225348711013794, "learning_rate": 0.00020886305479835286, "loss": 2.9193, "step": 36622 }, { "epoch": 1.79, "grad_norm": 0.626919686794281, "learning_rate": 0.00020884838636615113, "loss": 3.2561, "step": 36623 }, { "epoch": 1.79, "grad_norm": 0.637225329875946, "learning_rate": 0.00020883371817402533, "loss": 2.9154, "step": 36624 }, { "epoch": 1.79, "grad_norm": 0.7885748744010925, "learning_rate": 0.00020881905022201392, "loss": 2.9749, "step": 36625 }, { "epoch": 1.79, "grad_norm": 0.6135236620903015, "learning_rate": 0.00020880438251015548, "loss": 3.3823, "step": 36626 }, { "epoch": 1.8, "grad_norm": 0.634709358215332, "learning_rate": 0.00020878971503848892, "loss": 2.9316, "step": 36627 }, { "epoch": 1.8, "grad_norm": 0.6408893465995789, "learning_rate": 0.00020877504780705258, "loss": 3.0149, "step": 36628 }, { "epoch": 1.8, "grad_norm": 0.8755736351013184, "learning_rate": 0.00020876038081588534, "loss": 2.8576, "step": 36629 }, { "epoch": 1.8, "grad_norm": 0.6238410472869873, "learning_rate": 0.00020874571406502551, "loss": 2.9549, "step": 36630 }, { "epoch": 1.8, "grad_norm": 0.633625864982605, "learning_rate": 0.00020873104755451198, "loss": 2.7371, "step": 36631 }, { "epoch": 1.8, "grad_norm": 0.6014534831047058, "learning_rate": 0.00020871638128438342, "loss": 3.0362, "step": 36632 }, { "epoch": 1.8, "grad_norm": 0.6362054944038391, "learning_rate": 0.00020870171525467812, "loss": 2.9729, "step": 36633 }, { "epoch": 1.8, "grad_norm": 0.6586917638778687, "learning_rate": 0.00020868704946543518, "loss": 3.0641, "step": 36634 }, { "epoch": 1.8, "grad_norm": 0.6921262145042419, "learning_rate": 0.00020867238391669284, "loss": 3.0557, "step": 36635 }, { "epoch": 1.8, "grad_norm": 0.6216939687728882, "learning_rate": 0.0002086577186084898, "loss": 3.0349, "step": 36636 }, { "epoch": 1.8, "grad_norm": 0.6409044861793518, "learning_rate": 0.00020864305354086489, "loss": 3.1367, "step": 36637 }, { "epoch": 1.8, "grad_norm": 0.6375213265419006, "learning_rate": 0.00020862838871385655, "loss": 3.0367, "step": 36638 }, { "epoch": 1.8, "grad_norm": 0.6405977010726929, "learning_rate": 0.00020861372412750347, "loss": 3.1688, "step": 36639 }, { "epoch": 1.8, "grad_norm": 0.6716753244400024, "learning_rate": 0.00020859905978184415, "loss": 3.0811, "step": 36640 }, { "epoch": 1.8, "grad_norm": 0.7119506597518921, "learning_rate": 0.0002085843956769173, "loss": 3.0607, "step": 36641 }, { "epoch": 1.8, "grad_norm": 0.6559655070304871, "learning_rate": 0.0002085697318127617, "loss": 3.1062, "step": 36642 }, { "epoch": 1.8, "grad_norm": 0.6267669796943665, "learning_rate": 0.0002085550681894158, "loss": 3.0712, "step": 36643 }, { "epoch": 1.8, "grad_norm": 0.6666412949562073, "learning_rate": 0.00020854040480691824, "loss": 3.0376, "step": 36644 }, { "epoch": 1.8, "grad_norm": 0.6373438835144043, "learning_rate": 0.0002085257416653076, "loss": 3.1018, "step": 36645 }, { "epoch": 1.8, "grad_norm": 0.6477558612823486, "learning_rate": 0.00020851107876462257, "loss": 3.0902, "step": 36646 }, { "epoch": 1.8, "grad_norm": 0.6476315259933472, "learning_rate": 0.00020849641610490185, "loss": 3.0337, "step": 36647 }, { "epoch": 1.8, "grad_norm": 0.6021722555160522, "learning_rate": 0.0002084817536861838, "loss": 3.0909, "step": 36648 }, { "epoch": 1.8, "grad_norm": 0.6318761110305786, "learning_rate": 0.00020846709150850736, "loss": 2.9111, "step": 36649 }, { "epoch": 1.8, "grad_norm": 0.6342579126358032, "learning_rate": 0.00020845242957191087, "loss": 3.0186, "step": 36650 }, { "epoch": 1.8, "grad_norm": 0.6395136713981628, "learning_rate": 0.00020843776787643302, "loss": 3.0277, "step": 36651 }, { "epoch": 1.8, "grad_norm": 0.613551914691925, "learning_rate": 0.0002084231064221126, "loss": 3.0372, "step": 36652 }, { "epoch": 1.8, "grad_norm": 0.6066928505897522, "learning_rate": 0.00020840844520898804, "loss": 3.0808, "step": 36653 }, { "epoch": 1.8, "grad_norm": 0.6370232701301575, "learning_rate": 0.00020839378423709805, "loss": 3.1052, "step": 36654 }, { "epoch": 1.8, "grad_norm": 0.6237879991531372, "learning_rate": 0.0002083791235064811, "loss": 2.753, "step": 36655 }, { "epoch": 1.8, "grad_norm": 0.6108696460723877, "learning_rate": 0.00020836446301717598, "loss": 2.9691, "step": 36656 }, { "epoch": 1.8, "grad_norm": 0.6503059267997742, "learning_rate": 0.00020834980276922125, "loss": 3.0589, "step": 36657 }, { "epoch": 1.8, "grad_norm": 0.6078692078590393, "learning_rate": 0.0002083351427626554, "loss": 3.2604, "step": 36658 }, { "epoch": 1.8, "grad_norm": 0.6196121573448181, "learning_rate": 0.00020832048299751726, "loss": 3.1257, "step": 36659 }, { "epoch": 1.8, "grad_norm": 0.651357889175415, "learning_rate": 0.0002083058234738452, "loss": 2.9815, "step": 36660 }, { "epoch": 1.8, "grad_norm": 0.5992416143417358, "learning_rate": 0.000208291164191678, "loss": 2.7149, "step": 36661 }, { "epoch": 1.8, "grad_norm": 0.6600878238677979, "learning_rate": 0.0002082765051510543, "loss": 3.1287, "step": 36662 }, { "epoch": 1.8, "grad_norm": 0.6135623455047607, "learning_rate": 0.00020826184635201254, "loss": 2.8863, "step": 36663 }, { "epoch": 1.8, "grad_norm": 0.614033579826355, "learning_rate": 0.0002082471877945915, "loss": 3.1524, "step": 36664 }, { "epoch": 1.8, "grad_norm": 0.6445769667625427, "learning_rate": 0.00020823252947882969, "loss": 2.993, "step": 36665 }, { "epoch": 1.8, "grad_norm": 0.6738548874855042, "learning_rate": 0.00020821787140476564, "loss": 3.0463, "step": 36666 }, { "epoch": 1.8, "grad_norm": 0.678357720375061, "learning_rate": 0.00020820321357243818, "loss": 2.7473, "step": 36667 }, { "epoch": 1.8, "grad_norm": 0.6646535396575928, "learning_rate": 0.00020818855598188574, "loss": 2.9655, "step": 36668 }, { "epoch": 1.8, "grad_norm": 0.6294272541999817, "learning_rate": 0.000208173898633147, "loss": 3.0103, "step": 36669 }, { "epoch": 1.8, "grad_norm": 0.6239528059959412, "learning_rate": 0.0002081592415262605, "loss": 3.0741, "step": 36670 }, { "epoch": 1.8, "grad_norm": 0.630536675453186, "learning_rate": 0.00020814458466126485, "loss": 3.0073, "step": 36671 }, { "epoch": 1.8, "grad_norm": 0.6464529633522034, "learning_rate": 0.0002081299280381988, "loss": 3.1235, "step": 36672 }, { "epoch": 1.8, "grad_norm": 0.6047073602676392, "learning_rate": 0.00020811527165710069, "loss": 2.9439, "step": 36673 }, { "epoch": 1.8, "grad_norm": 0.6120465397834778, "learning_rate": 0.0002081006155180094, "loss": 2.8782, "step": 36674 }, { "epoch": 1.8, "grad_norm": 0.631659984588623, "learning_rate": 0.00020808595962096336, "loss": 3.1335, "step": 36675 }, { "epoch": 1.8, "grad_norm": 0.6841307282447815, "learning_rate": 0.00020807130396600115, "loss": 2.9434, "step": 36676 }, { "epoch": 1.8, "grad_norm": 0.626285970211029, "learning_rate": 0.00020805664855316153, "loss": 2.8426, "step": 36677 }, { "epoch": 1.8, "grad_norm": 0.6159030795097351, "learning_rate": 0.00020804199338248292, "loss": 3.0015, "step": 36678 }, { "epoch": 1.8, "grad_norm": 0.593059778213501, "learning_rate": 0.00020802733845400411, "loss": 2.8796, "step": 36679 }, { "epoch": 1.8, "grad_norm": 0.7196869254112244, "learning_rate": 0.00020801268376776348, "loss": 2.7227, "step": 36680 }, { "epoch": 1.8, "grad_norm": 0.6337694525718689, "learning_rate": 0.0002079980293237997, "loss": 3.0181, "step": 36681 }, { "epoch": 1.8, "grad_norm": 0.612427830696106, "learning_rate": 0.00020798337512215154, "loss": 2.9788, "step": 36682 }, { "epoch": 1.8, "grad_norm": 0.629952073097229, "learning_rate": 0.0002079687211628574, "loss": 2.9669, "step": 36683 }, { "epoch": 1.8, "grad_norm": 0.657768964767456, "learning_rate": 0.00020795406744595604, "loss": 2.9131, "step": 36684 }, { "epoch": 1.8, "grad_norm": 0.6409008502960205, "learning_rate": 0.00020793941397148576, "loss": 3.0031, "step": 36685 }, { "epoch": 1.8, "grad_norm": 0.6390206217765808, "learning_rate": 0.00020792476073948544, "loss": 3.0613, "step": 36686 }, { "epoch": 1.8, "grad_norm": 0.663670539855957, "learning_rate": 0.00020791010774999367, "loss": 3.0369, "step": 36687 }, { "epoch": 1.8, "grad_norm": 0.6246560215950012, "learning_rate": 0.00020789545500304873, "loss": 3.0468, "step": 36688 }, { "epoch": 1.8, "grad_norm": 0.6665436625480652, "learning_rate": 0.00020788080249868965, "loss": 2.7913, "step": 36689 }, { "epoch": 1.8, "grad_norm": 0.6187042593955994, "learning_rate": 0.00020786615023695473, "loss": 2.8444, "step": 36690 }, { "epoch": 1.8, "grad_norm": 0.659570038318634, "learning_rate": 0.0002078514982178827, "loss": 2.9988, "step": 36691 }, { "epoch": 1.8, "grad_norm": 0.6804064512252808, "learning_rate": 0.00020783684644151194, "loss": 3.032, "step": 36692 }, { "epoch": 1.8, "grad_norm": 0.6458125710487366, "learning_rate": 0.00020782219490788126, "loss": 2.9141, "step": 36693 }, { "epoch": 1.8, "grad_norm": 0.6113386154174805, "learning_rate": 0.00020780754361702925, "loss": 2.9778, "step": 36694 }, { "epoch": 1.8, "grad_norm": 0.6785792112350464, "learning_rate": 0.00020779289256899435, "loss": 3.1989, "step": 36695 }, { "epoch": 1.8, "grad_norm": 0.5986670851707458, "learning_rate": 0.0002077782417638153, "loss": 2.8518, "step": 36696 }, { "epoch": 1.8, "grad_norm": 0.6187350749969482, "learning_rate": 0.00020776359120153054, "loss": 2.9814, "step": 36697 }, { "epoch": 1.8, "grad_norm": 0.6327255368232727, "learning_rate": 0.00020774894088217868, "loss": 3.1217, "step": 36698 }, { "epoch": 1.8, "grad_norm": 0.6451022028923035, "learning_rate": 0.0002077342908057985, "loss": 3.1749, "step": 36699 }, { "epoch": 1.8, "grad_norm": 0.6500363945960999, "learning_rate": 0.00020771964097242827, "loss": 2.8666, "step": 36700 }, { "epoch": 1.8, "grad_norm": 0.659353494644165, "learning_rate": 0.00020770499138210696, "loss": 3.2217, "step": 36701 }, { "epoch": 1.8, "grad_norm": 0.6432385444641113, "learning_rate": 0.00020769034203487274, "loss": 2.7555, "step": 36702 }, { "epoch": 1.8, "grad_norm": 0.7030990123748779, "learning_rate": 0.00020767569293076444, "loss": 2.9348, "step": 36703 }, { "epoch": 1.8, "grad_norm": 0.6431054472923279, "learning_rate": 0.00020766104406982067, "loss": 2.866, "step": 36704 }, { "epoch": 1.8, "grad_norm": 0.6525076627731323, "learning_rate": 0.00020764639545207988, "loss": 3.0982, "step": 36705 }, { "epoch": 1.8, "grad_norm": 0.6153354644775391, "learning_rate": 0.0002076317470775808, "loss": 2.7865, "step": 36706 }, { "epoch": 1.8, "grad_norm": 0.6521280407905579, "learning_rate": 0.0002076170989463618, "loss": 2.9171, "step": 36707 }, { "epoch": 1.8, "grad_norm": 0.677487313747406, "learning_rate": 0.00020760245105846164, "loss": 3.1123, "step": 36708 }, { "epoch": 1.8, "grad_norm": 0.6668803095817566, "learning_rate": 0.00020758780341391888, "loss": 2.9094, "step": 36709 }, { "epoch": 1.8, "grad_norm": 0.6921082139015198, "learning_rate": 0.00020757315601277197, "loss": 3.0438, "step": 36710 }, { "epoch": 1.8, "grad_norm": 0.6118077039718628, "learning_rate": 0.00020755850885505965, "loss": 3.111, "step": 36711 }, { "epoch": 1.8, "grad_norm": 0.6800283789634705, "learning_rate": 0.00020754386194082037, "loss": 2.9849, "step": 36712 }, { "epoch": 1.8, "grad_norm": 0.6486184597015381, "learning_rate": 0.00020752921527009277, "loss": 2.8923, "step": 36713 }, { "epoch": 1.8, "grad_norm": 0.631157636642456, "learning_rate": 0.0002075145688429155, "loss": 3.034, "step": 36714 }, { "epoch": 1.8, "grad_norm": 0.610759973526001, "learning_rate": 0.00020749992265932697, "loss": 3.0504, "step": 36715 }, { "epoch": 1.8, "grad_norm": 0.6135387420654297, "learning_rate": 0.00020748527671936596, "loss": 2.8504, "step": 36716 }, { "epoch": 1.8, "grad_norm": 0.6542940139770508, "learning_rate": 0.00020747063102307078, "loss": 3.0165, "step": 36717 }, { "epoch": 1.8, "grad_norm": 0.7032894492149353, "learning_rate": 0.00020745598557048018, "loss": 2.9044, "step": 36718 }, { "epoch": 1.8, "grad_norm": 0.6352038383483887, "learning_rate": 0.0002074413403616328, "loss": 2.8658, "step": 36719 }, { "epoch": 1.8, "grad_norm": 0.6255574822425842, "learning_rate": 0.000207426695396567, "loss": 3.1381, "step": 36720 }, { "epoch": 1.8, "grad_norm": 0.6670256853103638, "learning_rate": 0.00020741205067532158, "loss": 2.993, "step": 36721 }, { "epoch": 1.8, "grad_norm": 0.6089436411857605, "learning_rate": 0.00020739740619793486, "loss": 3.0545, "step": 36722 }, { "epoch": 1.8, "grad_norm": 0.6588670015335083, "learning_rate": 0.0002073827619644456, "loss": 3.0358, "step": 36723 }, { "epoch": 1.8, "grad_norm": 0.6333926916122437, "learning_rate": 0.00020736811797489243, "loss": 2.8797, "step": 36724 }, { "epoch": 1.8, "grad_norm": 0.6069063544273376, "learning_rate": 0.00020735347422931366, "loss": 3.113, "step": 36725 }, { "epoch": 1.8, "grad_norm": 0.6227714419364929, "learning_rate": 0.00020733883072774815, "loss": 3.0379, "step": 36726 }, { "epoch": 1.8, "grad_norm": 0.635350227355957, "learning_rate": 0.00020732418747023424, "loss": 3.0705, "step": 36727 }, { "epoch": 1.8, "grad_norm": 0.6420906186103821, "learning_rate": 0.00020730954445681053, "loss": 2.8377, "step": 36728 }, { "epoch": 1.8, "grad_norm": 0.6361296772956848, "learning_rate": 0.0002072949016875158, "loss": 3.1277, "step": 36729 }, { "epoch": 1.8, "grad_norm": 0.6772409677505493, "learning_rate": 0.00020728025916238836, "loss": 2.8932, "step": 36730 }, { "epoch": 1.8, "grad_norm": 0.6195856928825378, "learning_rate": 0.00020726561688146696, "loss": 3.0179, "step": 36731 }, { "epoch": 1.8, "grad_norm": 0.6762132048606873, "learning_rate": 0.0002072509748447899, "loss": 3.0329, "step": 36732 }, { "epoch": 1.8, "grad_norm": 0.6098534464836121, "learning_rate": 0.000207236333052396, "loss": 2.9847, "step": 36733 }, { "epoch": 1.8, "grad_norm": 0.6518588066101074, "learning_rate": 0.00020722169150432383, "loss": 3.0305, "step": 36734 }, { "epoch": 1.8, "grad_norm": 0.6830816268920898, "learning_rate": 0.00020720705020061173, "loss": 3.1915, "step": 36735 }, { "epoch": 1.8, "grad_norm": 0.6383629441261292, "learning_rate": 0.00020719240914129854, "loss": 3.1503, "step": 36736 }, { "epoch": 1.8, "grad_norm": 0.5932306051254272, "learning_rate": 0.0002071777683264226, "loss": 3.0398, "step": 36737 }, { "epoch": 1.8, "grad_norm": 0.6360545754432678, "learning_rate": 0.00020716312775602254, "loss": 3.0491, "step": 36738 }, { "epoch": 1.8, "grad_norm": 0.5986540913581848, "learning_rate": 0.000207148487430137, "loss": 2.8491, "step": 36739 }, { "epoch": 1.8, "grad_norm": 0.6128685474395752, "learning_rate": 0.00020713384734880445, "loss": 2.9966, "step": 36740 }, { "epoch": 1.8, "grad_norm": 0.5811918377876282, "learning_rate": 0.00020711920751206354, "loss": 2.828, "step": 36741 }, { "epoch": 1.8, "grad_norm": 0.6315313577651978, "learning_rate": 0.00020710456791995266, "loss": 3.0263, "step": 36742 }, { "epoch": 1.8, "grad_norm": 0.5910419821739197, "learning_rate": 0.0002070899285725104, "loss": 2.8827, "step": 36743 }, { "epoch": 1.8, "grad_norm": 0.6899007558822632, "learning_rate": 0.00020707528946977555, "loss": 2.9803, "step": 36744 }, { "epoch": 1.8, "grad_norm": 0.6172998547554016, "learning_rate": 0.00020706065061178644, "loss": 3.0134, "step": 36745 }, { "epoch": 1.8, "grad_norm": 0.6163078546524048, "learning_rate": 0.00020704601199858172, "loss": 3.1796, "step": 36746 }, { "epoch": 1.8, "grad_norm": 0.6540799140930176, "learning_rate": 0.00020703137363019985, "loss": 2.8401, "step": 36747 }, { "epoch": 1.8, "grad_norm": 0.6538397073745728, "learning_rate": 0.0002070167355066795, "loss": 2.8745, "step": 36748 }, { "epoch": 1.8, "grad_norm": 0.7262605428695679, "learning_rate": 0.00020700209762805922, "loss": 3.2279, "step": 36749 }, { "epoch": 1.8, "grad_norm": 0.6116203665733337, "learning_rate": 0.00020698745999437736, "loss": 3.0517, "step": 36750 }, { "epoch": 1.8, "grad_norm": 0.6457718014717102, "learning_rate": 0.0002069728226056728, "loss": 2.8412, "step": 36751 }, { "epoch": 1.8, "grad_norm": 0.6367230415344238, "learning_rate": 0.00020695818546198383, "loss": 3.0288, "step": 36752 }, { "epoch": 1.8, "grad_norm": 0.6102105379104614, "learning_rate": 0.00020694354856334908, "loss": 2.8687, "step": 36753 }, { "epoch": 1.8, "grad_norm": 0.679972767829895, "learning_rate": 0.00020692891190980716, "loss": 2.9442, "step": 36754 }, { "epoch": 1.8, "grad_norm": 0.6194684505462646, "learning_rate": 0.00020691427550139656, "loss": 2.9587, "step": 36755 }, { "epoch": 1.8, "grad_norm": 0.7451155781745911, "learning_rate": 0.00020689963933815593, "loss": 3.0858, "step": 36756 }, { "epoch": 1.8, "grad_norm": 0.615552544593811, "learning_rate": 0.00020688500342012355, "loss": 2.8748, "step": 36757 }, { "epoch": 1.8, "grad_norm": 0.6059805750846863, "learning_rate": 0.00020687036774733823, "loss": 2.7704, "step": 36758 }, { "epoch": 1.8, "grad_norm": 0.6106909513473511, "learning_rate": 0.00020685573231983852, "loss": 2.8743, "step": 36759 }, { "epoch": 1.8, "grad_norm": 0.6602317094802856, "learning_rate": 0.00020684109713766278, "loss": 3.0426, "step": 36760 }, { "epoch": 1.8, "grad_norm": 0.6297293305397034, "learning_rate": 0.00020682646220084974, "loss": 3.0277, "step": 36761 }, { "epoch": 1.8, "grad_norm": 0.6300654411315918, "learning_rate": 0.00020681182750943776, "loss": 3.0978, "step": 36762 }, { "epoch": 1.8, "grad_norm": 0.6634638905525208, "learning_rate": 0.00020679719306346557, "loss": 3.0136, "step": 36763 }, { "epoch": 1.8, "grad_norm": 0.6517794728279114, "learning_rate": 0.00020678255886297171, "loss": 2.971, "step": 36764 }, { "epoch": 1.8, "grad_norm": 0.6523165106773376, "learning_rate": 0.00020676792490799447, "loss": 3.0286, "step": 36765 }, { "epoch": 1.8, "grad_norm": 0.6734626889228821, "learning_rate": 0.00020675329119857275, "loss": 3.1295, "step": 36766 }, { "epoch": 1.8, "grad_norm": 0.632370114326477, "learning_rate": 0.00020673865773474477, "loss": 3.0338, "step": 36767 }, { "epoch": 1.8, "grad_norm": 0.6716340780258179, "learning_rate": 0.0002067240245165494, "loss": 3.0109, "step": 36768 }, { "epoch": 1.8, "grad_norm": 0.641351044178009, "learning_rate": 0.00020670939154402478, "loss": 2.9696, "step": 36769 }, { "epoch": 1.8, "grad_norm": 0.610369086265564, "learning_rate": 0.00020669475881720975, "loss": 3.0894, "step": 36770 }, { "epoch": 1.8, "grad_norm": 0.6596361398696899, "learning_rate": 0.00020668012633614287, "loss": 3.0013, "step": 36771 }, { "epoch": 1.8, "grad_norm": 0.6531994938850403, "learning_rate": 0.00020666549410086244, "loss": 3.2047, "step": 36772 }, { "epoch": 1.8, "grad_norm": 0.6469523906707764, "learning_rate": 0.00020665086211140723, "loss": 2.853, "step": 36773 }, { "epoch": 1.8, "grad_norm": 0.6518253684043884, "learning_rate": 0.00020663623036781565, "loss": 2.9245, "step": 36774 }, { "epoch": 1.8, "grad_norm": 0.617925763130188, "learning_rate": 0.00020662159887012616, "loss": 2.7722, "step": 36775 }, { "epoch": 1.8, "grad_norm": 0.6405405402183533, "learning_rate": 0.00020660696761837761, "loss": 3.2794, "step": 36776 }, { "epoch": 1.8, "grad_norm": 0.6911517977714539, "learning_rate": 0.00020659233661260822, "loss": 2.9798, "step": 36777 }, { "epoch": 1.8, "grad_norm": 0.627225935459137, "learning_rate": 0.0002065777058528567, "loss": 3.0352, "step": 36778 }, { "epoch": 1.8, "grad_norm": 0.6320593953132629, "learning_rate": 0.00020656307533916142, "loss": 2.9623, "step": 36779 }, { "epoch": 1.8, "grad_norm": 0.6159602403640747, "learning_rate": 0.00020654844507156107, "loss": 3.0543, "step": 36780 }, { "epoch": 1.8, "grad_norm": 0.6653395891189575, "learning_rate": 0.00020653381505009418, "loss": 2.9946, "step": 36781 }, { "epoch": 1.8, "grad_norm": 0.6371135711669922, "learning_rate": 0.00020651918527479914, "loss": 3.1782, "step": 36782 }, { "epoch": 1.8, "grad_norm": 0.6792336106300354, "learning_rate": 0.0002065045557457147, "loss": 3.0116, "step": 36783 }, { "epoch": 1.8, "grad_norm": 0.6385068297386169, "learning_rate": 0.0002064899264628791, "loss": 2.9848, "step": 36784 }, { "epoch": 1.8, "grad_norm": 0.6543515920639038, "learning_rate": 0.00020647529742633117, "loss": 2.9478, "step": 36785 }, { "epoch": 1.8, "grad_norm": 0.6753814816474915, "learning_rate": 0.00020646066863610931, "loss": 3.0857, "step": 36786 }, { "epoch": 1.8, "grad_norm": 0.6674555540084839, "learning_rate": 0.00020644604009225191, "loss": 3.0606, "step": 36787 }, { "epoch": 1.8, "grad_norm": 0.6608149409294128, "learning_rate": 0.00020643141179479775, "loss": 3.1004, "step": 36788 }, { "epoch": 1.8, "grad_norm": 0.6545112729072571, "learning_rate": 0.00020641678374378527, "loss": 2.8895, "step": 36789 }, { "epoch": 1.8, "grad_norm": 0.6464564800262451, "learning_rate": 0.0002064021559392528, "loss": 3.1448, "step": 36790 }, { "epoch": 1.8, "grad_norm": 0.6789308190345764, "learning_rate": 0.00020638752838123925, "loss": 3.0127, "step": 36791 }, { "epoch": 1.8, "grad_norm": 0.6076009273529053, "learning_rate": 0.00020637290106978283, "loss": 3.0271, "step": 36792 }, { "epoch": 1.8, "grad_norm": 0.6446407437324524, "learning_rate": 0.00020635827400492223, "loss": 2.9744, "step": 36793 }, { "epoch": 1.8, "grad_norm": 0.6316608786582947, "learning_rate": 0.00020634364718669584, "loss": 3.008, "step": 36794 }, { "epoch": 1.8, "grad_norm": 0.6050397157669067, "learning_rate": 0.0002063290206151423, "loss": 2.8838, "step": 36795 }, { "epoch": 1.8, "grad_norm": 0.6316719651222229, "learning_rate": 0.00020631439429030012, "loss": 2.9403, "step": 36796 }, { "epoch": 1.8, "grad_norm": 0.6683484315872192, "learning_rate": 0.00020629976821220767, "loss": 3.0497, "step": 36797 }, { "epoch": 1.8, "grad_norm": 0.663461446762085, "learning_rate": 0.00020628514238090373, "loss": 3.0129, "step": 36798 }, { "epoch": 1.8, "grad_norm": 0.6077750325202942, "learning_rate": 0.00020627051679642666, "loss": 2.9524, "step": 36799 }, { "epoch": 1.8, "grad_norm": 0.632233738899231, "learning_rate": 0.0002062558914588149, "loss": 2.8329, "step": 36800 }, { "epoch": 1.8, "grad_norm": 0.6921725273132324, "learning_rate": 0.00020624126636810725, "loss": 3.0573, "step": 36801 }, { "epoch": 1.8, "grad_norm": 0.6208415031433105, "learning_rate": 0.000206226641524342, "loss": 2.8597, "step": 36802 }, { "epoch": 1.8, "grad_norm": 0.6545069217681885, "learning_rate": 0.00020621201692755777, "loss": 3.0219, "step": 36803 }, { "epoch": 1.8, "grad_norm": 0.649098813533783, "learning_rate": 0.0002061973925777929, "loss": 3.1196, "step": 36804 }, { "epoch": 1.8, "grad_norm": 0.6079701781272888, "learning_rate": 0.00020618276847508604, "loss": 2.788, "step": 36805 }, { "epoch": 1.8, "grad_norm": 0.6139565110206604, "learning_rate": 0.00020616814461947588, "loss": 2.9477, "step": 36806 }, { "epoch": 1.8, "grad_norm": 0.6319758296012878, "learning_rate": 0.0002061535210110007, "loss": 3.0009, "step": 36807 }, { "epoch": 1.8, "grad_norm": 0.6935994029045105, "learning_rate": 0.0002061388976496991, "loss": 2.9628, "step": 36808 }, { "epoch": 1.8, "grad_norm": 0.6240067481994629, "learning_rate": 0.00020612427453560946, "loss": 2.795, "step": 36809 }, { "epoch": 1.8, "grad_norm": 0.6220824122428894, "learning_rate": 0.00020610965166877054, "loss": 3.0931, "step": 36810 }, { "epoch": 1.8, "grad_norm": 0.6552855968475342, "learning_rate": 0.00020609502904922074, "loss": 2.8704, "step": 36811 }, { "epoch": 1.8, "grad_norm": 0.6498665809631348, "learning_rate": 0.00020608040667699845, "loss": 3.0281, "step": 36812 }, { "epoch": 1.8, "grad_norm": 0.6534984707832336, "learning_rate": 0.00020606578455214242, "loss": 3.0395, "step": 36813 }, { "epoch": 1.8, "grad_norm": 0.7032010555267334, "learning_rate": 0.00020605116267469097, "loss": 3.0231, "step": 36814 }, { "epoch": 1.8, "grad_norm": 0.6376596093177795, "learning_rate": 0.0002060365410446826, "loss": 3.1822, "step": 36815 }, { "epoch": 1.8, "grad_norm": 0.6410520672798157, "learning_rate": 0.00020602191966215605, "loss": 3.0589, "step": 36816 }, { "epoch": 1.8, "grad_norm": 0.6917757391929626, "learning_rate": 0.00020600729852714958, "loss": 2.9896, "step": 36817 }, { "epoch": 1.8, "grad_norm": 0.6153084635734558, "learning_rate": 0.0002059926776397019, "loss": 3.0361, "step": 36818 }, { "epoch": 1.8, "grad_norm": 0.720618724822998, "learning_rate": 0.00020597805699985124, "loss": 3.0778, "step": 36819 }, { "epoch": 1.8, "grad_norm": 0.6662018895149231, "learning_rate": 0.00020596343660763642, "loss": 2.7993, "step": 36820 }, { "epoch": 1.8, "grad_norm": 0.6655521392822266, "learning_rate": 0.0002059488164630958, "loss": 2.942, "step": 36821 }, { "epoch": 1.8, "grad_norm": 0.6253843307495117, "learning_rate": 0.00020593419656626778, "loss": 3.0623, "step": 36822 }, { "epoch": 1.8, "grad_norm": 0.6495913863182068, "learning_rate": 0.00020591957691719112, "loss": 2.8426, "step": 36823 }, { "epoch": 1.8, "grad_norm": 0.6686586737632751, "learning_rate": 0.00020590495751590408, "loss": 2.9081, "step": 36824 }, { "epoch": 1.8, "grad_norm": 0.6453542113304138, "learning_rate": 0.00020589033836244534, "loss": 3.1822, "step": 36825 }, { "epoch": 1.8, "grad_norm": 0.6410621404647827, "learning_rate": 0.0002058757194568534, "loss": 2.809, "step": 36826 }, { "epoch": 1.8, "grad_norm": 0.6789267659187317, "learning_rate": 0.00020586110079916654, "loss": 2.9868, "step": 36827 }, { "epoch": 1.8, "grad_norm": 0.6374423503875732, "learning_rate": 0.00020584648238942358, "loss": 3.154, "step": 36828 }, { "epoch": 1.8, "grad_norm": 0.6617890000343323, "learning_rate": 0.0002058318642276628, "loss": 3.0522, "step": 36829 }, { "epoch": 1.8, "grad_norm": 0.6505021452903748, "learning_rate": 0.00020581724631392267, "loss": 3.2386, "step": 36830 }, { "epoch": 1.8, "grad_norm": 0.6423779726028442, "learning_rate": 0.00020580262864824193, "loss": 3.1164, "step": 36831 }, { "epoch": 1.81, "grad_norm": 0.6466403603553772, "learning_rate": 0.0002057880112306589, "loss": 3.0535, "step": 36832 }, { "epoch": 1.81, "grad_norm": 0.6527153849601746, "learning_rate": 0.00020577339406121215, "loss": 2.8668, "step": 36833 }, { "epoch": 1.81, "grad_norm": 0.6217302083969116, "learning_rate": 0.00020575877713994002, "loss": 3.0263, "step": 36834 }, { "epoch": 1.81, "grad_norm": 0.6141082048416138, "learning_rate": 0.00020574416046688124, "loss": 3.1425, "step": 36835 }, { "epoch": 1.81, "grad_norm": 0.6378345489501953, "learning_rate": 0.00020572954404207423, "loss": 3.1281, "step": 36836 }, { "epoch": 1.81, "grad_norm": 0.6423177719116211, "learning_rate": 0.00020571492786555733, "loss": 3.0076, "step": 36837 }, { "epoch": 1.81, "grad_norm": 0.6553202867507935, "learning_rate": 0.0002057003119373693, "loss": 2.9455, "step": 36838 }, { "epoch": 1.81, "grad_norm": 0.627715528011322, "learning_rate": 0.00020568569625754844, "loss": 2.9104, "step": 36839 }, { "epoch": 1.81, "grad_norm": 0.6472757458686829, "learning_rate": 0.00020567108082613324, "loss": 2.9738, "step": 36840 }, { "epoch": 1.81, "grad_norm": 0.6219276785850525, "learning_rate": 0.00020565646564316237, "loss": 3.0335, "step": 36841 }, { "epoch": 1.81, "grad_norm": 0.6370683312416077, "learning_rate": 0.00020564185070867416, "loss": 3.1745, "step": 36842 }, { "epoch": 1.81, "grad_norm": 0.6401722431182861, "learning_rate": 0.0002056272360227072, "loss": 3.118, "step": 36843 }, { "epoch": 1.81, "grad_norm": 0.7073693871498108, "learning_rate": 0.00020561262158529984, "loss": 2.9147, "step": 36844 }, { "epoch": 1.81, "grad_norm": 0.637370228767395, "learning_rate": 0.00020559800739649065, "loss": 3.0401, "step": 36845 }, { "epoch": 1.81, "grad_norm": 0.658949077129364, "learning_rate": 0.00020558339345631827, "loss": 2.925, "step": 36846 }, { "epoch": 1.81, "grad_norm": 0.6802607774734497, "learning_rate": 0.000205568779764821, "loss": 2.9548, "step": 36847 }, { "epoch": 1.81, "grad_norm": 0.6683867573738098, "learning_rate": 0.00020555416632203749, "loss": 3.0257, "step": 36848 }, { "epoch": 1.81, "grad_norm": 0.6157516241073608, "learning_rate": 0.00020553955312800596, "loss": 3.0218, "step": 36849 }, { "epoch": 1.81, "grad_norm": 0.6470605134963989, "learning_rate": 0.0002055249401827652, "loss": 2.8591, "step": 36850 }, { "epoch": 1.81, "grad_norm": 0.7139477729797363, "learning_rate": 0.0002055103274863535, "loss": 2.8813, "step": 36851 }, { "epoch": 1.81, "grad_norm": 0.5850152373313904, "learning_rate": 0.00020549571503880937, "loss": 2.9898, "step": 36852 }, { "epoch": 1.81, "grad_norm": 0.6298661828041077, "learning_rate": 0.00020548110284017144, "loss": 3.1898, "step": 36853 }, { "epoch": 1.81, "grad_norm": 0.6115210056304932, "learning_rate": 0.00020546649089047805, "loss": 2.9295, "step": 36854 }, { "epoch": 1.81, "grad_norm": 0.6343759894371033, "learning_rate": 0.0002054518791897678, "loss": 3.2084, "step": 36855 }, { "epoch": 1.81, "grad_norm": 0.6652531623840332, "learning_rate": 0.00020543726773807896, "loss": 2.8345, "step": 36856 }, { "epoch": 1.81, "grad_norm": 0.6212843060493469, "learning_rate": 0.00020542265653545022, "loss": 2.9031, "step": 36857 }, { "epoch": 1.81, "grad_norm": 0.6636337637901306, "learning_rate": 0.00020540804558192007, "loss": 3.254, "step": 36858 }, { "epoch": 1.81, "grad_norm": 0.6373041868209839, "learning_rate": 0.0002053934348775268, "loss": 2.9458, "step": 36859 }, { "epoch": 1.81, "grad_norm": 0.6539564728736877, "learning_rate": 0.00020537882442230916, "loss": 2.9724, "step": 36860 }, { "epoch": 1.81, "grad_norm": 0.6322240829467773, "learning_rate": 0.00020536421421630537, "loss": 2.9376, "step": 36861 }, { "epoch": 1.81, "grad_norm": 0.6140871047973633, "learning_rate": 0.000205349604259554, "loss": 3.0766, "step": 36862 }, { "epoch": 1.81, "grad_norm": 0.6811372637748718, "learning_rate": 0.00020533499455209364, "loss": 2.9396, "step": 36863 }, { "epoch": 1.81, "grad_norm": 0.6444950699806213, "learning_rate": 0.0002053203850939626, "loss": 2.9081, "step": 36864 }, { "epoch": 1.81, "grad_norm": 0.6304511427879333, "learning_rate": 0.00020530577588519968, "loss": 3.3028, "step": 36865 }, { "epoch": 1.81, "grad_norm": 0.6575809121131897, "learning_rate": 0.00020529116692584286, "loss": 2.9909, "step": 36866 }, { "epoch": 1.81, "grad_norm": 0.6433678865432739, "learning_rate": 0.00020527655821593092, "loss": 3.0047, "step": 36867 }, { "epoch": 1.81, "grad_norm": 0.6921300888061523, "learning_rate": 0.00020526194975550242, "loss": 3.0075, "step": 36868 }, { "epoch": 1.81, "grad_norm": 0.6458017230033875, "learning_rate": 0.00020524734154459562, "loss": 3.1427, "step": 36869 }, { "epoch": 1.81, "grad_norm": 0.6515001058578491, "learning_rate": 0.00020523273358324912, "loss": 2.9976, "step": 36870 }, { "epoch": 1.81, "grad_norm": 0.6600772142410278, "learning_rate": 0.0002052181258715013, "loss": 2.9814, "step": 36871 }, { "epoch": 1.81, "grad_norm": 0.6418211460113525, "learning_rate": 0.00020520351840939074, "loss": 3.1198, "step": 36872 }, { "epoch": 1.81, "grad_norm": 0.6537681221961975, "learning_rate": 0.00020518891119695592, "loss": 3.0484, "step": 36873 }, { "epoch": 1.81, "grad_norm": 0.669597327709198, "learning_rate": 0.00020517430423423513, "loss": 2.9601, "step": 36874 }, { "epoch": 1.81, "grad_norm": 0.6508209109306335, "learning_rate": 0.0002051596975212671, "loss": 2.887, "step": 36875 }, { "epoch": 1.81, "grad_norm": 0.580568790435791, "learning_rate": 0.00020514509105809013, "loss": 3.0521, "step": 36876 }, { "epoch": 1.81, "grad_norm": 0.6386202573776245, "learning_rate": 0.00020513048484474265, "loss": 2.9794, "step": 36877 }, { "epoch": 1.81, "grad_norm": 0.6095972657203674, "learning_rate": 0.00020511587888126338, "loss": 3.0316, "step": 36878 }, { "epoch": 1.81, "grad_norm": 0.6701319813728333, "learning_rate": 0.00020510127316769055, "loss": 2.9153, "step": 36879 }, { "epoch": 1.81, "grad_norm": 0.6238023042678833, "learning_rate": 0.00020508666770406277, "loss": 3.0534, "step": 36880 }, { "epoch": 1.81, "grad_norm": 0.6750576496124268, "learning_rate": 0.0002050720624904183, "loss": 2.9337, "step": 36881 }, { "epoch": 1.81, "grad_norm": 0.67641282081604, "learning_rate": 0.0002050574575267958, "loss": 3.0228, "step": 36882 }, { "epoch": 1.81, "grad_norm": 0.6155452132225037, "learning_rate": 0.00020504285281323375, "loss": 3.0172, "step": 36883 }, { "epoch": 1.81, "grad_norm": 0.6545352339744568, "learning_rate": 0.0002050282483497704, "loss": 2.9602, "step": 36884 }, { "epoch": 1.81, "grad_norm": 0.630028247833252, "learning_rate": 0.00020501364413644455, "loss": 3.0454, "step": 36885 }, { "epoch": 1.81, "grad_norm": 0.6380795836448669, "learning_rate": 0.00020499904017329433, "loss": 3.0451, "step": 36886 }, { "epoch": 1.81, "grad_norm": 0.6383315324783325, "learning_rate": 0.0002049844364603584, "loss": 3.048, "step": 36887 }, { "epoch": 1.81, "grad_norm": 0.6407351493835449, "learning_rate": 0.00020496983299767528, "loss": 3.145, "step": 36888 }, { "epoch": 1.81, "grad_norm": 0.6724693775177002, "learning_rate": 0.00020495522978528316, "loss": 3.084, "step": 36889 }, { "epoch": 1.81, "grad_norm": 0.625685453414917, "learning_rate": 0.00020494062682322085, "loss": 3.1366, "step": 36890 }, { "epoch": 1.81, "grad_norm": 0.6715691089630127, "learning_rate": 0.00020492602411152655, "loss": 3.0932, "step": 36891 }, { "epoch": 1.81, "grad_norm": 0.635994017124176, "learning_rate": 0.00020491142165023874, "loss": 2.9926, "step": 36892 }, { "epoch": 1.81, "grad_norm": 0.6208715438842773, "learning_rate": 0.00020489681943939604, "loss": 3.1724, "step": 36893 }, { "epoch": 1.81, "grad_norm": 0.6574182510375977, "learning_rate": 0.0002048822174790368, "loss": 3.1069, "step": 36894 }, { "epoch": 1.81, "grad_norm": 0.6716117858886719, "learning_rate": 0.0002048676157691995, "loss": 2.7032, "step": 36895 }, { "epoch": 1.81, "grad_norm": 0.6087467670440674, "learning_rate": 0.00020485301430992254, "loss": 3.0921, "step": 36896 }, { "epoch": 1.81, "grad_norm": 0.6368101835250854, "learning_rate": 0.00020483841310124445, "loss": 3.028, "step": 36897 }, { "epoch": 1.81, "grad_norm": 0.634164035320282, "learning_rate": 0.0002048238121432037, "loss": 3.1461, "step": 36898 }, { "epoch": 1.81, "grad_norm": 0.6230823993682861, "learning_rate": 0.00020480921143583864, "loss": 2.879, "step": 36899 }, { "epoch": 1.81, "grad_norm": 0.656328022480011, "learning_rate": 0.00020479461097918787, "loss": 3.0447, "step": 36900 }, { "epoch": 1.81, "grad_norm": 0.6206152439117432, "learning_rate": 0.00020478001077328973, "loss": 2.7903, "step": 36901 }, { "epoch": 1.81, "grad_norm": 0.6653019785881042, "learning_rate": 0.00020476541081818268, "loss": 3.1942, "step": 36902 }, { "epoch": 1.81, "grad_norm": 0.6336932182312012, "learning_rate": 0.00020475081111390533, "loss": 2.9204, "step": 36903 }, { "epoch": 1.81, "grad_norm": 0.66854327917099, "learning_rate": 0.00020473621166049592, "loss": 2.9795, "step": 36904 }, { "epoch": 1.81, "grad_norm": 0.6467189788818359, "learning_rate": 0.0002047216124579931, "loss": 2.8518, "step": 36905 }, { "epoch": 1.81, "grad_norm": 0.6561312079429626, "learning_rate": 0.00020470701350643503, "loss": 2.9816, "step": 36906 }, { "epoch": 1.81, "grad_norm": 0.6752562522888184, "learning_rate": 0.0002046924148058604, "loss": 3.0072, "step": 36907 }, { "epoch": 1.81, "grad_norm": 0.6663380861282349, "learning_rate": 0.00020467781635630773, "loss": 3.0714, "step": 36908 }, { "epoch": 1.81, "grad_norm": 0.6871305108070374, "learning_rate": 0.00020466321815781525, "loss": 2.7063, "step": 36909 }, { "epoch": 1.81, "grad_norm": 0.6497218608856201, "learning_rate": 0.0002046486202104216, "loss": 2.948, "step": 36910 }, { "epoch": 1.81, "grad_norm": 0.633631706237793, "learning_rate": 0.000204634022514165, "loss": 2.9519, "step": 36911 }, { "epoch": 1.81, "grad_norm": 0.6532074809074402, "learning_rate": 0.00020461942506908408, "loss": 2.8991, "step": 36912 }, { "epoch": 1.81, "grad_norm": 0.6703870892524719, "learning_rate": 0.0002046048278752173, "loss": 3.0202, "step": 36913 }, { "epoch": 1.81, "grad_norm": 0.6608051657676697, "learning_rate": 0.00020459023093260295, "loss": 3.1402, "step": 36914 }, { "epoch": 1.81, "grad_norm": 0.6233830451965332, "learning_rate": 0.00020457563424127967, "loss": 3.1028, "step": 36915 }, { "epoch": 1.81, "grad_norm": 0.6100077629089355, "learning_rate": 0.00020456103780128575, "loss": 2.8412, "step": 36916 }, { "epoch": 1.81, "grad_norm": 0.60553377866745, "learning_rate": 0.00020454644161265958, "loss": 2.9459, "step": 36917 }, { "epoch": 1.81, "grad_norm": 0.6492863893508911, "learning_rate": 0.00020453184567543992, "loss": 3.0801, "step": 36918 }, { "epoch": 1.81, "grad_norm": 0.6354946494102478, "learning_rate": 0.00020451724998966488, "loss": 3.0188, "step": 36919 }, { "epoch": 1.81, "grad_norm": 0.617892861366272, "learning_rate": 0.00020450265455537308, "loss": 2.9104, "step": 36920 }, { "epoch": 1.81, "grad_norm": 0.6572944521903992, "learning_rate": 0.00020448805937260287, "loss": 2.9129, "step": 36921 }, { "epoch": 1.81, "grad_norm": 0.7081995010375977, "learning_rate": 0.00020447346444139275, "loss": 2.9972, "step": 36922 }, { "epoch": 1.81, "grad_norm": 0.6335873007774353, "learning_rate": 0.00020445886976178115, "loss": 3.0526, "step": 36923 }, { "epoch": 1.81, "grad_norm": 0.6316371560096741, "learning_rate": 0.00020444427533380642, "loss": 2.906, "step": 36924 }, { "epoch": 1.81, "grad_norm": 0.6577939391136169, "learning_rate": 0.00020442968115750722, "loss": 2.9412, "step": 36925 }, { "epoch": 1.81, "grad_norm": 0.6616895794868469, "learning_rate": 0.0002044150872329217, "loss": 2.8354, "step": 36926 }, { "epoch": 1.81, "grad_norm": 0.6275897026062012, "learning_rate": 0.0002044004935600886, "loss": 3.1783, "step": 36927 }, { "epoch": 1.81, "grad_norm": 0.6467520594596863, "learning_rate": 0.00020438590013904605, "loss": 3.1131, "step": 36928 }, { "epoch": 1.81, "grad_norm": 0.6437161564826965, "learning_rate": 0.00020437130696983265, "loss": 3.0071, "step": 36929 }, { "epoch": 1.81, "grad_norm": 0.6312966346740723, "learning_rate": 0.00020435671405248694, "loss": 3.2288, "step": 36930 }, { "epoch": 1.81, "grad_norm": 0.6609817743301392, "learning_rate": 0.00020434212138704716, "loss": 3.136, "step": 36931 }, { "epoch": 1.81, "grad_norm": 0.6926374435424805, "learning_rate": 0.0002043275289735519, "loss": 2.8855, "step": 36932 }, { "epoch": 1.81, "grad_norm": 0.6676579713821411, "learning_rate": 0.00020431293681203938, "loss": 3.1017, "step": 36933 }, { "epoch": 1.81, "grad_norm": 0.676917314529419, "learning_rate": 0.00020429834490254823, "loss": 3.0384, "step": 36934 }, { "epoch": 1.81, "grad_norm": 0.6208324432373047, "learning_rate": 0.0002042837532451169, "loss": 3.0039, "step": 36935 }, { "epoch": 1.81, "grad_norm": 0.6595355868339539, "learning_rate": 0.00020426916183978363, "loss": 2.996, "step": 36936 }, { "epoch": 1.81, "grad_norm": 0.6129118800163269, "learning_rate": 0.00020425457068658706, "loss": 3.043, "step": 36937 }, { "epoch": 1.81, "grad_norm": 0.5993137955665588, "learning_rate": 0.00020423997978556547, "loss": 3.0566, "step": 36938 }, { "epoch": 1.81, "grad_norm": 0.620607316493988, "learning_rate": 0.00020422538913675726, "loss": 2.8563, "step": 36939 }, { "epoch": 1.81, "grad_norm": 0.6551635265350342, "learning_rate": 0.00020421079874020112, "loss": 3.1258, "step": 36940 }, { "epoch": 1.81, "grad_norm": 0.6164003610610962, "learning_rate": 0.00020419620859593517, "loss": 3.1104, "step": 36941 }, { "epoch": 1.81, "grad_norm": 0.6258066296577454, "learning_rate": 0.00020418161870399808, "loss": 3.1211, "step": 36942 }, { "epoch": 1.81, "grad_norm": 0.6458246111869812, "learning_rate": 0.00020416702906442805, "loss": 3.0477, "step": 36943 }, { "epoch": 1.81, "grad_norm": 0.6024858951568604, "learning_rate": 0.00020415243967726362, "loss": 2.9224, "step": 36944 }, { "epoch": 1.81, "grad_norm": 0.6260032653808594, "learning_rate": 0.00020413785054254337, "loss": 2.8545, "step": 36945 }, { "epoch": 1.81, "grad_norm": 0.6995924711227417, "learning_rate": 0.00020412326166030539, "loss": 2.8598, "step": 36946 }, { "epoch": 1.81, "grad_norm": 0.634792149066925, "learning_rate": 0.0002041086730305884, "loss": 2.72, "step": 36947 }, { "epoch": 1.81, "grad_norm": 0.657988965511322, "learning_rate": 0.00020409408465343066, "loss": 2.8743, "step": 36948 }, { "epoch": 1.81, "grad_norm": 0.6405299305915833, "learning_rate": 0.00020407949652887064, "loss": 2.9527, "step": 36949 }, { "epoch": 1.81, "grad_norm": 0.6475312113761902, "learning_rate": 0.0002040649086569468, "loss": 3.0618, "step": 36950 }, { "epoch": 1.81, "grad_norm": 0.6201655864715576, "learning_rate": 0.00020405032103769746, "loss": 3.1179, "step": 36951 }, { "epoch": 1.81, "grad_norm": 0.6073014140129089, "learning_rate": 0.0002040357336711612, "loss": 2.9965, "step": 36952 }, { "epoch": 1.81, "grad_norm": 0.6534898281097412, "learning_rate": 0.00020402114655737635, "loss": 2.9554, "step": 36953 }, { "epoch": 1.81, "grad_norm": 0.630856990814209, "learning_rate": 0.0002040065596963812, "loss": 2.8513, "step": 36954 }, { "epoch": 1.81, "grad_norm": 0.6377066969871521, "learning_rate": 0.00020399197308821444, "loss": 3.083, "step": 36955 }, { "epoch": 1.81, "grad_norm": 0.6434876322746277, "learning_rate": 0.00020397738673291432, "loss": 2.7419, "step": 36956 }, { "epoch": 1.81, "grad_norm": 0.6340557932853699, "learning_rate": 0.0002039628006305193, "loss": 2.9313, "step": 36957 }, { "epoch": 1.81, "grad_norm": 0.6318641304969788, "learning_rate": 0.00020394821478106768, "loss": 2.8285, "step": 36958 }, { "epoch": 1.81, "grad_norm": 0.6800630688667297, "learning_rate": 0.000203933629184598, "loss": 3.031, "step": 36959 }, { "epoch": 1.81, "grad_norm": 0.6454338431358337, "learning_rate": 0.00020391904384114875, "loss": 2.8901, "step": 36960 }, { "epoch": 1.81, "grad_norm": 0.7657660245895386, "learning_rate": 0.00020390445875075812, "loss": 3.1257, "step": 36961 }, { "epoch": 1.81, "grad_norm": 0.6502024531364441, "learning_rate": 0.00020388987391346477, "loss": 3.2133, "step": 36962 }, { "epoch": 1.81, "grad_norm": 0.6643862128257751, "learning_rate": 0.00020387528932930691, "loss": 3.0175, "step": 36963 }, { "epoch": 1.81, "grad_norm": 0.6402032375335693, "learning_rate": 0.00020386070499832303, "loss": 2.9454, "step": 36964 }, { "epoch": 1.81, "grad_norm": 0.6753803491592407, "learning_rate": 0.00020384612092055166, "loss": 3.1392, "step": 36965 }, { "epoch": 1.81, "grad_norm": 0.6361395716667175, "learning_rate": 0.000203831537096031, "loss": 2.8411, "step": 36966 }, { "epoch": 1.81, "grad_norm": 0.6042212843894958, "learning_rate": 0.00020381695352479967, "loss": 2.9097, "step": 36967 }, { "epoch": 1.81, "grad_norm": 0.724984347820282, "learning_rate": 0.00020380237020689582, "loss": 3.0332, "step": 36968 }, { "epoch": 1.81, "grad_norm": 0.6241597533226013, "learning_rate": 0.00020378778714235803, "loss": 2.9793, "step": 36969 }, { "epoch": 1.81, "grad_norm": 0.6064009070396423, "learning_rate": 0.00020377320433122482, "loss": 3.2852, "step": 36970 }, { "epoch": 1.81, "grad_norm": 0.6465131044387817, "learning_rate": 0.00020375862177353436, "loss": 3.1043, "step": 36971 }, { "epoch": 1.81, "grad_norm": 0.6775936484336853, "learning_rate": 0.0002037440394693253, "loss": 3.1991, "step": 36972 }, { "epoch": 1.81, "grad_norm": 0.6569826602935791, "learning_rate": 0.00020372945741863575, "loss": 2.8564, "step": 36973 }, { "epoch": 1.81, "grad_norm": 0.6317790746688843, "learning_rate": 0.00020371487562150436, "loss": 3.0964, "step": 36974 }, { "epoch": 1.81, "grad_norm": 0.642630398273468, "learning_rate": 0.00020370029407796951, "loss": 3.1208, "step": 36975 }, { "epoch": 1.81, "grad_norm": 0.6569013595581055, "learning_rate": 0.00020368571278806943, "loss": 3.1136, "step": 36976 }, { "epoch": 1.81, "grad_norm": 0.6564520597457886, "learning_rate": 0.00020367113175184278, "loss": 2.9458, "step": 36977 }, { "epoch": 1.81, "grad_norm": 0.6364352703094482, "learning_rate": 0.00020365655096932773, "loss": 3.0549, "step": 36978 }, { "epoch": 1.81, "grad_norm": 0.6397393345832825, "learning_rate": 0.00020364197044056277, "loss": 2.7911, "step": 36979 }, { "epoch": 1.81, "grad_norm": 0.6152938008308411, "learning_rate": 0.00020362739016558642, "loss": 2.8501, "step": 36980 }, { "epoch": 1.81, "grad_norm": 0.6240938305854797, "learning_rate": 0.00020361281014443693, "loss": 2.9638, "step": 36981 }, { "epoch": 1.81, "grad_norm": 0.6312567591667175, "learning_rate": 0.0002035982303771528, "loss": 2.9071, "step": 36982 }, { "epoch": 1.81, "grad_norm": 0.6281710267066956, "learning_rate": 0.00020358365086377224, "loss": 2.9096, "step": 36983 }, { "epoch": 1.81, "grad_norm": 0.6307974457740784, "learning_rate": 0.00020356907160433386, "loss": 2.7694, "step": 36984 }, { "epoch": 1.81, "grad_norm": 0.6692641377449036, "learning_rate": 0.00020355449259887607, "loss": 3.0278, "step": 36985 }, { "epoch": 1.81, "grad_norm": 0.6274914145469666, "learning_rate": 0.000203539913847437, "loss": 3.0309, "step": 36986 }, { "epoch": 1.81, "grad_norm": 0.6962708234786987, "learning_rate": 0.00020352533535005545, "loss": 2.827, "step": 36987 }, { "epoch": 1.81, "grad_norm": 0.6521985530853271, "learning_rate": 0.0002035107571067694, "loss": 3.0369, "step": 36988 }, { "epoch": 1.81, "grad_norm": 0.6316908597946167, "learning_rate": 0.00020349617911761753, "loss": 2.8481, "step": 36989 }, { "epoch": 1.81, "grad_norm": 0.6510263085365295, "learning_rate": 0.00020348160138263817, "loss": 3.0658, "step": 36990 }, { "epoch": 1.81, "grad_norm": 0.6218271255493164, "learning_rate": 0.00020346702390186964, "loss": 2.7787, "step": 36991 }, { "epoch": 1.81, "grad_norm": 0.6902745366096497, "learning_rate": 0.0002034524466753505, "loss": 2.9119, "step": 36992 }, { "epoch": 1.81, "grad_norm": 0.6275454163551331, "learning_rate": 0.00020343786970311895, "loss": 2.893, "step": 36993 }, { "epoch": 1.81, "grad_norm": 0.6849902868270874, "learning_rate": 0.00020342329298521345, "loss": 2.9944, "step": 36994 }, { "epoch": 1.81, "grad_norm": 0.7208778262138367, "learning_rate": 0.00020340871652167248, "loss": 2.9227, "step": 36995 }, { "epoch": 1.81, "grad_norm": 0.6108225584030151, "learning_rate": 0.00020339414031253432, "loss": 3.1913, "step": 36996 }, { "epoch": 1.81, "grad_norm": 0.6657624840736389, "learning_rate": 0.00020337956435783749, "loss": 2.6087, "step": 36997 }, { "epoch": 1.81, "grad_norm": 0.6316897869110107, "learning_rate": 0.00020336498865762014, "loss": 3.0783, "step": 36998 }, { "epoch": 1.81, "grad_norm": 0.6183792352676392, "learning_rate": 0.00020335041321192088, "loss": 2.9349, "step": 36999 }, { "epoch": 1.81, "grad_norm": 0.6724928617477417, "learning_rate": 0.00020333583802077811, "loss": 2.9243, "step": 37000 }, { "epoch": 1.81, "grad_norm": 0.5862394571304321, "learning_rate": 0.00020332126308422998, "loss": 3.1631, "step": 37001 }, { "epoch": 1.81, "grad_norm": 0.6738343834877014, "learning_rate": 0.00020330668840231515, "loss": 2.9031, "step": 37002 }, { "epoch": 1.81, "grad_norm": 0.637536346912384, "learning_rate": 0.0002032921139750719, "loss": 2.9616, "step": 37003 }, { "epoch": 1.81, "grad_norm": 0.6663991212844849, "learning_rate": 0.00020327753980253863, "loss": 3.0612, "step": 37004 }, { "epoch": 1.81, "grad_norm": 0.6609890460968018, "learning_rate": 0.00020326296588475356, "loss": 3.0707, "step": 37005 }, { "epoch": 1.81, "grad_norm": 0.645164966583252, "learning_rate": 0.0002032483922217553, "loss": 2.9354, "step": 37006 }, { "epoch": 1.81, "grad_norm": 0.675213098526001, "learning_rate": 0.00020323381881358224, "loss": 3.0725, "step": 37007 }, { "epoch": 1.81, "grad_norm": 0.6274236440658569, "learning_rate": 0.00020321924566027254, "loss": 3.0605, "step": 37008 }, { "epoch": 1.81, "grad_norm": 0.6301320791244507, "learning_rate": 0.00020320467276186484, "loss": 3.1105, "step": 37009 }, { "epoch": 1.81, "grad_norm": 0.6352813839912415, "learning_rate": 0.00020319010011839728, "loss": 3.2368, "step": 37010 }, { "epoch": 1.81, "grad_norm": 0.6481574773788452, "learning_rate": 0.00020317552772990843, "loss": 2.9269, "step": 37011 }, { "epoch": 1.81, "grad_norm": 0.6353967189788818, "learning_rate": 0.00020316095559643667, "loss": 3.0593, "step": 37012 }, { "epoch": 1.81, "grad_norm": 0.6281538009643555, "learning_rate": 0.0002031463837180202, "loss": 2.9999, "step": 37013 }, { "epoch": 1.81, "grad_norm": 0.6500021815299988, "learning_rate": 0.00020313181209469762, "loss": 3.1799, "step": 37014 }, { "epoch": 1.81, "grad_norm": 0.611717164516449, "learning_rate": 0.00020311724072650717, "loss": 2.9209, "step": 37015 }, { "epoch": 1.81, "grad_norm": 0.657289981842041, "learning_rate": 0.00020310266961348714, "loss": 2.8855, "step": 37016 }, { "epoch": 1.81, "grad_norm": 0.6263495683670044, "learning_rate": 0.00020308809875567622, "loss": 2.8046, "step": 37017 }, { "epoch": 1.81, "grad_norm": 0.6198089122772217, "learning_rate": 0.00020307352815311248, "loss": 2.8548, "step": 37018 }, { "epoch": 1.81, "grad_norm": 0.6605949401855469, "learning_rate": 0.00020305895780583453, "loss": 3.0922, "step": 37019 }, { "epoch": 1.81, "grad_norm": 0.6008433103561401, "learning_rate": 0.0002030443877138805, "loss": 2.9261, "step": 37020 }, { "epoch": 1.81, "grad_norm": 0.6339818835258484, "learning_rate": 0.0002030298178772889, "loss": 2.9345, "step": 37021 }, { "epoch": 1.81, "grad_norm": 0.6539232730865479, "learning_rate": 0.0002030152482960982, "loss": 2.9125, "step": 37022 }, { "epoch": 1.81, "grad_norm": 0.5978472232818604, "learning_rate": 0.00020300067897034655, "loss": 3.0355, "step": 37023 }, { "epoch": 1.81, "grad_norm": 0.6346756219863892, "learning_rate": 0.00020298610990007255, "loss": 2.9446, "step": 37024 }, { "epoch": 1.81, "grad_norm": 0.6300790905952454, "learning_rate": 0.00020297154108531442, "loss": 2.8722, "step": 37025 }, { "epoch": 1.81, "grad_norm": 0.6479358077049255, "learning_rate": 0.0002029569725261105, "loss": 3.0601, "step": 37026 }, { "epoch": 1.81, "grad_norm": 0.6204195618629456, "learning_rate": 0.00020294240422249937, "loss": 2.9397, "step": 37027 }, { "epoch": 1.81, "grad_norm": 0.7228114008903503, "learning_rate": 0.0002029278361745192, "loss": 2.7625, "step": 37028 }, { "epoch": 1.81, "grad_norm": 0.6030262112617493, "learning_rate": 0.0002029132683822085, "loss": 2.9429, "step": 37029 }, { "epoch": 1.81, "grad_norm": 0.623123049736023, "learning_rate": 0.00020289870084560542, "loss": 2.9507, "step": 37030 }, { "epoch": 1.81, "grad_norm": 0.6438615322113037, "learning_rate": 0.0002028841335647485, "loss": 3.0401, "step": 37031 }, { "epoch": 1.81, "grad_norm": 0.7645979523658752, "learning_rate": 0.0002028695665396762, "loss": 3.062, "step": 37032 }, { "epoch": 1.81, "grad_norm": 0.6688880920410156, "learning_rate": 0.0002028549997704267, "loss": 2.9529, "step": 37033 }, { "epoch": 1.81, "grad_norm": 0.6559062004089355, "learning_rate": 0.0002028404332570385, "loss": 3.097, "step": 37034 }, { "epoch": 1.81, "grad_norm": 0.6532920002937317, "learning_rate": 0.00020282586699954977, "loss": 2.7324, "step": 37035 }, { "epoch": 1.82, "grad_norm": 0.6466086506843567, "learning_rate": 0.0002028113009979991, "loss": 3.0552, "step": 37036 }, { "epoch": 1.82, "grad_norm": 0.6443438529968262, "learning_rate": 0.00020279673525242477, "loss": 2.9941, "step": 37037 }, { "epoch": 1.82, "grad_norm": 0.6671386957168579, "learning_rate": 0.00020278216976286504, "loss": 2.95, "step": 37038 }, { "epoch": 1.82, "grad_norm": 0.6276841163635254, "learning_rate": 0.00020276760452935848, "loss": 3.0235, "step": 37039 }, { "epoch": 1.82, "grad_norm": 0.650061845779419, "learning_rate": 0.0002027530395519433, "loss": 3.0023, "step": 37040 }, { "epoch": 1.82, "grad_norm": 0.6341570019721985, "learning_rate": 0.0002027384748306578, "loss": 2.9555, "step": 37041 }, { "epoch": 1.82, "grad_norm": 0.6999353766441345, "learning_rate": 0.00020272391036554058, "loss": 3.009, "step": 37042 }, { "epoch": 1.82, "grad_norm": 0.630683183670044, "learning_rate": 0.00020270934615662978, "loss": 3.1733, "step": 37043 }, { "epoch": 1.82, "grad_norm": 0.6502330303192139, "learning_rate": 0.00020269478220396393, "loss": 2.8276, "step": 37044 }, { "epoch": 1.82, "grad_norm": 0.599058210849762, "learning_rate": 0.00020268021850758115, "loss": 2.8763, "step": 37045 }, { "epoch": 1.82, "grad_norm": 0.5828586220741272, "learning_rate": 0.00020266565506752002, "loss": 2.9274, "step": 37046 }, { "epoch": 1.82, "grad_norm": 0.6535758376121521, "learning_rate": 0.00020265109188381887, "loss": 3.153, "step": 37047 }, { "epoch": 1.82, "grad_norm": 0.624528169631958, "learning_rate": 0.00020263652895651587, "loss": 2.9203, "step": 37048 }, { "epoch": 1.82, "grad_norm": 0.6232485175132751, "learning_rate": 0.0002026219662856497, "loss": 2.9709, "step": 37049 }, { "epoch": 1.82, "grad_norm": 0.6133432388305664, "learning_rate": 0.00020260740387125834, "loss": 2.9712, "step": 37050 }, { "epoch": 1.82, "grad_norm": 0.6627100706100464, "learning_rate": 0.00020259284171338042, "loss": 2.9813, "step": 37051 }, { "epoch": 1.82, "grad_norm": 0.6731305122375488, "learning_rate": 0.0002025782798120543, "loss": 3.0706, "step": 37052 }, { "epoch": 1.82, "grad_norm": 0.6375182867050171, "learning_rate": 0.0002025637181673181, "loss": 3.0896, "step": 37053 }, { "epoch": 1.82, "grad_norm": 0.684897243976593, "learning_rate": 0.0002025491567792104, "loss": 2.8677, "step": 37054 }, { "epoch": 1.82, "grad_norm": 0.6456209421157837, "learning_rate": 0.00020253459564776945, "loss": 3.1135, "step": 37055 }, { "epoch": 1.82, "grad_norm": 0.6744033098220825, "learning_rate": 0.00020252003477303353, "loss": 3.0566, "step": 37056 }, { "epoch": 1.82, "grad_norm": 0.6282669305801392, "learning_rate": 0.0002025054741550412, "loss": 3.0302, "step": 37057 }, { "epoch": 1.82, "grad_norm": 0.6827451586723328, "learning_rate": 0.00020249091379383066, "loss": 2.8749, "step": 37058 }, { "epoch": 1.82, "grad_norm": 0.631802499294281, "learning_rate": 0.0002024763536894403, "loss": 3.0074, "step": 37059 }, { "epoch": 1.82, "grad_norm": 0.6587291359901428, "learning_rate": 0.00020246179384190836, "loss": 2.7468, "step": 37060 }, { "epoch": 1.82, "grad_norm": 0.655880868434906, "learning_rate": 0.00020244723425127335, "loss": 3.1387, "step": 37061 }, { "epoch": 1.82, "grad_norm": 0.6162218451499939, "learning_rate": 0.0002024326749175736, "loss": 3.1554, "step": 37062 }, { "epoch": 1.82, "grad_norm": 0.6140300631523132, "learning_rate": 0.0002024181158408473, "loss": 3.0222, "step": 37063 }, { "epoch": 1.82, "grad_norm": 0.6147719025611877, "learning_rate": 0.000202403557021133, "loss": 2.9423, "step": 37064 }, { "epoch": 1.82, "grad_norm": 0.6162436604499817, "learning_rate": 0.00020238899845846889, "loss": 2.9565, "step": 37065 }, { "epoch": 1.82, "grad_norm": 0.6531295776367188, "learning_rate": 0.00020237444015289336, "loss": 3.1544, "step": 37066 }, { "epoch": 1.82, "grad_norm": 0.6456297039985657, "learning_rate": 0.00020235988210444482, "loss": 2.9994, "step": 37067 }, { "epoch": 1.82, "grad_norm": 0.6418159008026123, "learning_rate": 0.00020234532431316157, "loss": 2.8051, "step": 37068 }, { "epoch": 1.82, "grad_norm": 0.6710839867591858, "learning_rate": 0.00020233076677908196, "loss": 2.9623, "step": 37069 }, { "epoch": 1.82, "grad_norm": 0.6115990877151489, "learning_rate": 0.00020231620950224419, "loss": 2.956, "step": 37070 }, { "epoch": 1.82, "grad_norm": 0.601249098777771, "learning_rate": 0.0002023016524826867, "loss": 3.0491, "step": 37071 }, { "epoch": 1.82, "grad_norm": 0.6476225256919861, "learning_rate": 0.00020228709572044803, "loss": 3.0574, "step": 37072 }, { "epoch": 1.82, "grad_norm": 0.6286438703536987, "learning_rate": 0.00020227253921556624, "loss": 2.8102, "step": 37073 }, { "epoch": 1.82, "grad_norm": 0.6054357886314392, "learning_rate": 0.00020225798296807986, "loss": 3.1466, "step": 37074 }, { "epoch": 1.82, "grad_norm": 0.681904673576355, "learning_rate": 0.00020224342697802705, "loss": 3.0506, "step": 37075 }, { "epoch": 1.82, "grad_norm": 0.6787092089653015, "learning_rate": 0.00020222887124544625, "loss": 3.0671, "step": 37076 }, { "epoch": 1.82, "grad_norm": 0.6227284669876099, "learning_rate": 0.00020221431577037584, "loss": 3.0675, "step": 37077 }, { "epoch": 1.82, "grad_norm": 0.6418759226799011, "learning_rate": 0.000202199760552854, "loss": 3.0101, "step": 37078 }, { "epoch": 1.82, "grad_norm": 0.6091601252555847, "learning_rate": 0.00020218520559291928, "loss": 2.8531, "step": 37079 }, { "epoch": 1.82, "grad_norm": 0.6450102925300598, "learning_rate": 0.0002021706508906099, "loss": 3.0782, "step": 37080 }, { "epoch": 1.82, "grad_norm": 0.6219157576560974, "learning_rate": 0.00020215609644596422, "loss": 3.0429, "step": 37081 }, { "epoch": 1.82, "grad_norm": 0.6502217650413513, "learning_rate": 0.0002021415422590204, "loss": 2.9689, "step": 37082 }, { "epoch": 1.82, "grad_norm": 0.6541635990142822, "learning_rate": 0.000202126988329817, "loss": 2.9687, "step": 37083 }, { "epoch": 1.82, "grad_norm": 0.6724274754524231, "learning_rate": 0.00020211243465839237, "loss": 2.9509, "step": 37084 }, { "epoch": 1.82, "grad_norm": 0.6167418360710144, "learning_rate": 0.0002020978812447846, "loss": 3.0759, "step": 37085 }, { "epoch": 1.82, "grad_norm": 0.6464261412620544, "learning_rate": 0.0002020833280890323, "loss": 2.7714, "step": 37086 }, { "epoch": 1.82, "grad_norm": 0.6340137720108032, "learning_rate": 0.0002020687751911736, "loss": 3.0417, "step": 37087 }, { "epoch": 1.82, "grad_norm": 0.631371259689331, "learning_rate": 0.00020205422255124684, "loss": 2.8924, "step": 37088 }, { "epoch": 1.82, "grad_norm": 0.6182030439376831, "learning_rate": 0.00020203967016929053, "loss": 3.1116, "step": 37089 }, { "epoch": 1.82, "grad_norm": 0.6417081356048584, "learning_rate": 0.00020202511804534278, "loss": 3.0076, "step": 37090 }, { "epoch": 1.82, "grad_norm": 0.6194267868995667, "learning_rate": 0.00020201056617944217, "loss": 3.0782, "step": 37091 }, { "epoch": 1.82, "grad_norm": 0.6341774463653564, "learning_rate": 0.00020199601457162667, "loss": 2.9582, "step": 37092 }, { "epoch": 1.82, "grad_norm": 0.6601331233978271, "learning_rate": 0.00020198146322193483, "loss": 3.0365, "step": 37093 }, { "epoch": 1.82, "grad_norm": 0.6085912585258484, "learning_rate": 0.00020196691213040507, "loss": 3.1794, "step": 37094 }, { "epoch": 1.82, "grad_norm": 0.621552586555481, "learning_rate": 0.00020195236129707552, "loss": 3.1996, "step": 37095 }, { "epoch": 1.82, "grad_norm": 0.6433894634246826, "learning_rate": 0.00020193781072198468, "loss": 2.7632, "step": 37096 }, { "epoch": 1.82, "grad_norm": 0.633199155330658, "learning_rate": 0.00020192326040517064, "loss": 3.1733, "step": 37097 }, { "epoch": 1.82, "grad_norm": 0.6254742741584778, "learning_rate": 0.0002019087103466719, "loss": 3.0559, "step": 37098 }, { "epoch": 1.82, "grad_norm": 0.6049128770828247, "learning_rate": 0.0002018941605465268, "loss": 3.0833, "step": 37099 }, { "epoch": 1.82, "grad_norm": 0.6894818544387817, "learning_rate": 0.0002018796110047735, "loss": 2.7616, "step": 37100 }, { "epoch": 1.82, "grad_norm": 0.596031665802002, "learning_rate": 0.0002018650617214505, "loss": 2.9947, "step": 37101 }, { "epoch": 1.82, "grad_norm": 0.6057289838790894, "learning_rate": 0.000201850512696596, "loss": 3.1525, "step": 37102 }, { "epoch": 1.82, "grad_norm": 0.6352101564407349, "learning_rate": 0.00020183596393024832, "loss": 3.0793, "step": 37103 }, { "epoch": 1.82, "grad_norm": 0.6023186445236206, "learning_rate": 0.00020182141542244593, "loss": 3.0214, "step": 37104 }, { "epoch": 1.82, "grad_norm": 0.6341410875320435, "learning_rate": 0.000201806867173227, "loss": 2.868, "step": 37105 }, { "epoch": 1.82, "grad_norm": 0.6453239917755127, "learning_rate": 0.00020179231918262988, "loss": 3.0226, "step": 37106 }, { "epoch": 1.82, "grad_norm": 0.6963949203491211, "learning_rate": 0.00020177777145069284, "loss": 3.0891, "step": 37107 }, { "epoch": 1.82, "grad_norm": 0.6552883386611938, "learning_rate": 0.00020176322397745427, "loss": 3.1864, "step": 37108 }, { "epoch": 1.82, "grad_norm": 0.6849330067634583, "learning_rate": 0.00020174867676295256, "loss": 3.0045, "step": 37109 }, { "epoch": 1.82, "grad_norm": 0.7277083992958069, "learning_rate": 0.00020173412980722575, "loss": 3.0083, "step": 37110 }, { "epoch": 1.82, "grad_norm": 0.6667925119400024, "learning_rate": 0.00020171958311031245, "loss": 2.9899, "step": 37111 }, { "epoch": 1.82, "grad_norm": 0.6309471726417542, "learning_rate": 0.00020170503667225076, "loss": 3.0762, "step": 37112 }, { "epoch": 1.82, "grad_norm": 0.6317311525344849, "learning_rate": 0.00020169049049307918, "loss": 2.9453, "step": 37113 }, { "epoch": 1.82, "grad_norm": 0.6759694814682007, "learning_rate": 0.00020167594457283595, "loss": 3.2062, "step": 37114 }, { "epoch": 1.82, "grad_norm": 0.6160515546798706, "learning_rate": 0.0002016613989115592, "loss": 3.099, "step": 37115 }, { "epoch": 1.82, "grad_norm": 0.6787965297698975, "learning_rate": 0.00020164685350928757, "loss": 2.8703, "step": 37116 }, { "epoch": 1.82, "grad_norm": 0.6101091504096985, "learning_rate": 0.00020163230836605912, "loss": 3.2368, "step": 37117 }, { "epoch": 1.82, "grad_norm": 0.6226139068603516, "learning_rate": 0.00020161776348191223, "loss": 3.125, "step": 37118 }, { "epoch": 1.82, "grad_norm": 0.7263720035552979, "learning_rate": 0.00020160321885688527, "loss": 3.0666, "step": 37119 }, { "epoch": 1.82, "grad_norm": 0.6440698504447937, "learning_rate": 0.00020158867449101645, "loss": 2.9552, "step": 37120 }, { "epoch": 1.82, "grad_norm": 0.6304377317428589, "learning_rate": 0.0002015741303843442, "loss": 3.2824, "step": 37121 }, { "epoch": 1.82, "grad_norm": 0.6487731337547302, "learning_rate": 0.00020155958653690664, "loss": 2.8647, "step": 37122 }, { "epoch": 1.82, "grad_norm": 0.6729735732078552, "learning_rate": 0.00020154504294874224, "loss": 2.879, "step": 37123 }, { "epoch": 1.82, "grad_norm": 0.6333503127098083, "learning_rate": 0.0002015304996198893, "loss": 3.0657, "step": 37124 }, { "epoch": 1.82, "grad_norm": 0.6147133708000183, "learning_rate": 0.0002015159565503859, "loss": 3.0688, "step": 37125 }, { "epoch": 1.82, "grad_norm": 0.6360808610916138, "learning_rate": 0.0002015014137402707, "loss": 3.21, "step": 37126 }, { "epoch": 1.82, "grad_norm": 0.6403216123580933, "learning_rate": 0.00020148687118958175, "loss": 3.0954, "step": 37127 }, { "epoch": 1.82, "grad_norm": 0.6334720849990845, "learning_rate": 0.00020147232889835735, "loss": 3.0747, "step": 37128 }, { "epoch": 1.82, "grad_norm": 0.688264787197113, "learning_rate": 0.000201457786866636, "loss": 2.8259, "step": 37129 }, { "epoch": 1.82, "grad_norm": 0.6472107172012329, "learning_rate": 0.00020144324509445584, "loss": 3.025, "step": 37130 }, { "epoch": 1.82, "grad_norm": 0.6436012983322144, "learning_rate": 0.0002014287035818552, "loss": 2.9706, "step": 37131 }, { "epoch": 1.82, "grad_norm": 0.6599947810173035, "learning_rate": 0.00020141416232887232, "loss": 2.8894, "step": 37132 }, { "epoch": 1.82, "grad_norm": 0.65586918592453, "learning_rate": 0.00020139962133554552, "loss": 3.006, "step": 37133 }, { "epoch": 1.82, "grad_norm": 0.7479538321495056, "learning_rate": 0.0002013850806019133, "loss": 2.8773, "step": 37134 }, { "epoch": 1.82, "grad_norm": 0.6416367888450623, "learning_rate": 0.00020137054012801377, "loss": 3.0427, "step": 37135 }, { "epoch": 1.82, "grad_norm": 0.5910057425498962, "learning_rate": 0.00020135599991388524, "loss": 2.9003, "step": 37136 }, { "epoch": 1.82, "grad_norm": 0.604770839214325, "learning_rate": 0.00020134145995956597, "loss": 2.9453, "step": 37137 }, { "epoch": 1.82, "grad_norm": 0.6588855385780334, "learning_rate": 0.0002013269202650943, "loss": 2.9322, "step": 37138 }, { "epoch": 1.82, "grad_norm": 0.6265450716018677, "learning_rate": 0.00020131238083050863, "loss": 2.8692, "step": 37139 }, { "epoch": 1.82, "grad_norm": 0.607767641544342, "learning_rate": 0.00020129784165584703, "loss": 3.0773, "step": 37140 }, { "epoch": 1.82, "grad_norm": 0.660254716873169, "learning_rate": 0.00020128330274114803, "loss": 2.9246, "step": 37141 }, { "epoch": 1.82, "grad_norm": 0.6739830374717712, "learning_rate": 0.00020126876408644976, "loss": 2.7952, "step": 37142 }, { "epoch": 1.82, "grad_norm": 0.6582867503166199, "learning_rate": 0.00020125422569179053, "loss": 2.9389, "step": 37143 }, { "epoch": 1.82, "grad_norm": 0.6650797724723816, "learning_rate": 0.00020123968755720876, "loss": 2.9109, "step": 37144 }, { "epoch": 1.82, "grad_norm": 0.7328686118125916, "learning_rate": 0.00020122514968274257, "loss": 3.0024, "step": 37145 }, { "epoch": 1.82, "grad_norm": 0.6152870059013367, "learning_rate": 0.00020121061206843042, "loss": 2.9541, "step": 37146 }, { "epoch": 1.82, "grad_norm": 0.603813648223877, "learning_rate": 0.00020119607471431034, "loss": 2.8763, "step": 37147 }, { "epoch": 1.82, "grad_norm": 0.646477222442627, "learning_rate": 0.00020118153762042088, "loss": 3.0344, "step": 37148 }, { "epoch": 1.82, "grad_norm": 0.7055385112762451, "learning_rate": 0.0002011670007868003, "loss": 3.0673, "step": 37149 }, { "epoch": 1.82, "grad_norm": 0.6229759454727173, "learning_rate": 0.00020115246421348668, "loss": 3.1857, "step": 37150 }, { "epoch": 1.82, "grad_norm": 0.6314061284065247, "learning_rate": 0.00020113792790051854, "loss": 3.0096, "step": 37151 }, { "epoch": 1.82, "grad_norm": 0.6126551032066345, "learning_rate": 0.000201123391847934, "loss": 2.9149, "step": 37152 }, { "epoch": 1.82, "grad_norm": 0.6453806161880493, "learning_rate": 0.00020110885605577146, "loss": 3.0979, "step": 37153 }, { "epoch": 1.82, "grad_norm": 0.6280014514923096, "learning_rate": 0.0002010943205240692, "loss": 2.8085, "step": 37154 }, { "epoch": 1.82, "grad_norm": 0.6255245208740234, "learning_rate": 0.00020107978525286537, "loss": 3.2936, "step": 37155 }, { "epoch": 1.82, "grad_norm": 0.6780776977539062, "learning_rate": 0.0002010652502421985, "loss": 2.9829, "step": 37156 }, { "epoch": 1.82, "grad_norm": 0.7052608132362366, "learning_rate": 0.0002010507154921066, "loss": 2.8949, "step": 37157 }, { "epoch": 1.82, "grad_norm": 0.6485824584960938, "learning_rate": 0.00020103618100262815, "loss": 2.9753, "step": 37158 }, { "epoch": 1.82, "grad_norm": 0.6526051163673401, "learning_rate": 0.00020102164677380122, "loss": 3.1261, "step": 37159 }, { "epoch": 1.82, "grad_norm": 0.6495037078857422, "learning_rate": 0.00020100711280566431, "loss": 2.9435, "step": 37160 }, { "epoch": 1.82, "grad_norm": 0.5939731597900391, "learning_rate": 0.00020099257909825567, "loss": 3.1176, "step": 37161 }, { "epoch": 1.82, "grad_norm": 0.6784581542015076, "learning_rate": 0.00020097804565161342, "loss": 3.1203, "step": 37162 }, { "epoch": 1.82, "grad_norm": 0.6253423690795898, "learning_rate": 0.000200963512465776, "loss": 3.0622, "step": 37163 }, { "epoch": 1.82, "grad_norm": 0.6293935179710388, "learning_rate": 0.00020094897954078163, "loss": 3.1353, "step": 37164 }, { "epoch": 1.82, "grad_norm": 0.7159479856491089, "learning_rate": 0.0002009344468766685, "loss": 3.2139, "step": 37165 }, { "epoch": 1.82, "grad_norm": 0.6202086806297302, "learning_rate": 0.0002009199144734751, "loss": 3.0405, "step": 37166 }, { "epoch": 1.82, "grad_norm": 0.6366106867790222, "learning_rate": 0.00020090538233123948, "loss": 3.0614, "step": 37167 }, { "epoch": 1.82, "grad_norm": 0.6750251650810242, "learning_rate": 0.00020089085045000012, "loss": 3.0452, "step": 37168 }, { "epoch": 1.82, "grad_norm": 0.6211447715759277, "learning_rate": 0.00020087631882979504, "loss": 3.0842, "step": 37169 }, { "epoch": 1.82, "grad_norm": 0.6459939479827881, "learning_rate": 0.00020086178747066272, "loss": 2.9701, "step": 37170 }, { "epoch": 1.82, "grad_norm": 0.6417645812034607, "learning_rate": 0.00020084725637264147, "loss": 2.9645, "step": 37171 }, { "epoch": 1.82, "grad_norm": 0.610141396522522, "learning_rate": 0.00020083272553576929, "loss": 2.9528, "step": 37172 }, { "epoch": 1.82, "grad_norm": 0.6073371171951294, "learning_rate": 0.00020081819496008477, "loss": 3.0393, "step": 37173 }, { "epoch": 1.82, "grad_norm": 0.6643429398536682, "learning_rate": 0.00020080366464562594, "loss": 2.9228, "step": 37174 }, { "epoch": 1.82, "grad_norm": 0.6577807664871216, "learning_rate": 0.00020078913459243119, "loss": 3.1136, "step": 37175 }, { "epoch": 1.82, "grad_norm": 0.5988003015518188, "learning_rate": 0.00020077460480053883, "loss": 2.9344, "step": 37176 }, { "epoch": 1.82, "grad_norm": 0.6243207454681396, "learning_rate": 0.00020076007526998696, "loss": 2.9445, "step": 37177 }, { "epoch": 1.82, "grad_norm": 0.6623547077178955, "learning_rate": 0.00020074554600081406, "loss": 2.9545, "step": 37178 }, { "epoch": 1.82, "grad_norm": 0.6492032408714294, "learning_rate": 0.00020073101699305822, "loss": 2.8503, "step": 37179 }, { "epoch": 1.82, "grad_norm": 0.6891651153564453, "learning_rate": 0.00020071648824675773, "loss": 3.1972, "step": 37180 }, { "epoch": 1.82, "grad_norm": 0.6541674733161926, "learning_rate": 0.00020070195976195105, "loss": 2.9552, "step": 37181 }, { "epoch": 1.82, "grad_norm": 0.6345112323760986, "learning_rate": 0.0002006874315386762, "loss": 2.9296, "step": 37182 }, { "epoch": 1.82, "grad_norm": 0.6238939762115479, "learning_rate": 0.00020067290357697162, "loss": 2.9192, "step": 37183 }, { "epoch": 1.82, "grad_norm": 0.6258445978164673, "learning_rate": 0.00020065837587687535, "loss": 2.798, "step": 37184 }, { "epoch": 1.82, "grad_norm": 0.6790409088134766, "learning_rate": 0.00020064384843842588, "loss": 3.1312, "step": 37185 }, { "epoch": 1.82, "grad_norm": 0.6279799342155457, "learning_rate": 0.00020062932126166147, "loss": 2.8466, "step": 37186 }, { "epoch": 1.82, "grad_norm": 0.6571778059005737, "learning_rate": 0.00020061479434662017, "loss": 3.1089, "step": 37187 }, { "epoch": 1.82, "grad_norm": 0.6268858909606934, "learning_rate": 0.0002006002676933405, "loss": 2.922, "step": 37188 }, { "epoch": 1.82, "grad_norm": 0.6086148023605347, "learning_rate": 0.0002005857413018605, "loss": 2.881, "step": 37189 }, { "epoch": 1.82, "grad_norm": 0.6448004245758057, "learning_rate": 0.0002005712151722185, "loss": 2.9411, "step": 37190 }, { "epoch": 1.82, "grad_norm": 0.6258628964424133, "learning_rate": 0.0002005566893044529, "loss": 3.1739, "step": 37191 }, { "epoch": 1.82, "grad_norm": 0.6352370381355286, "learning_rate": 0.00020054216369860177, "loss": 2.9686, "step": 37192 }, { "epoch": 1.82, "grad_norm": 0.6704267859458923, "learning_rate": 0.0002005276383547035, "loss": 3.0863, "step": 37193 }, { "epoch": 1.82, "grad_norm": 0.6442691087722778, "learning_rate": 0.00020051311327279618, "loss": 2.9182, "step": 37194 }, { "epoch": 1.82, "grad_norm": 0.6933001279830933, "learning_rate": 0.00020049858845291814, "loss": 3.0166, "step": 37195 }, { "epoch": 1.82, "grad_norm": 0.6700044870376587, "learning_rate": 0.00020048406389510786, "loss": 2.9624, "step": 37196 }, { "epoch": 1.82, "grad_norm": 0.6275744438171387, "learning_rate": 0.00020046953959940324, "loss": 2.9598, "step": 37197 }, { "epoch": 1.82, "grad_norm": 0.6295133829116821, "learning_rate": 0.00020045501556584285, "loss": 2.9946, "step": 37198 }, { "epoch": 1.82, "grad_norm": 0.6550989747047424, "learning_rate": 0.00020044049179446463, "loss": 2.8582, "step": 37199 }, { "epoch": 1.82, "grad_norm": 0.6450958251953125, "learning_rate": 0.00020042596828530706, "loss": 2.9827, "step": 37200 }, { "epoch": 1.82, "grad_norm": 0.6483104228973389, "learning_rate": 0.00020041144503840836, "loss": 2.8909, "step": 37201 }, { "epoch": 1.82, "grad_norm": 0.6364044547080994, "learning_rate": 0.00020039692205380666, "loss": 2.9006, "step": 37202 }, { "epoch": 1.82, "grad_norm": 0.6128686666488647, "learning_rate": 0.00020038239933154042, "loss": 2.91, "step": 37203 }, { "epoch": 1.82, "grad_norm": 0.6625557541847229, "learning_rate": 0.0002003678768716477, "loss": 2.8524, "step": 37204 }, { "epoch": 1.82, "grad_norm": 0.6292389631271362, "learning_rate": 0.00020035335467416676, "loss": 3.1534, "step": 37205 }, { "epoch": 1.82, "grad_norm": 0.6541522741317749, "learning_rate": 0.000200338832739136, "loss": 2.8124, "step": 37206 }, { "epoch": 1.82, "grad_norm": 0.6432307958602905, "learning_rate": 0.00020032431106659353, "loss": 2.9206, "step": 37207 }, { "epoch": 1.82, "grad_norm": 0.6428229212760925, "learning_rate": 0.00020030978965657775, "loss": 2.7471, "step": 37208 }, { "epoch": 1.82, "grad_norm": 0.6233975887298584, "learning_rate": 0.00020029526850912663, "loss": 2.965, "step": 37209 }, { "epoch": 1.82, "grad_norm": 0.6788715720176697, "learning_rate": 0.00020028074762427865, "loss": 3.0143, "step": 37210 }, { "epoch": 1.82, "grad_norm": 0.6513542532920837, "learning_rate": 0.00020026622700207206, "loss": 3.0382, "step": 37211 }, { "epoch": 1.82, "grad_norm": 0.6843482255935669, "learning_rate": 0.00020025170664254488, "loss": 2.7828, "step": 37212 }, { "epoch": 1.82, "grad_norm": 0.5919236540794373, "learning_rate": 0.00020023718654573567, "loss": 2.951, "step": 37213 }, { "epoch": 1.82, "grad_norm": 0.6735742092132568, "learning_rate": 0.0002002226667116824, "loss": 3.0408, "step": 37214 }, { "epoch": 1.82, "grad_norm": 0.6410362720489502, "learning_rate": 0.0002002081471404234, "loss": 3.0412, "step": 37215 }, { "epoch": 1.82, "grad_norm": 0.623496949672699, "learning_rate": 0.0002001936278319971, "loss": 3.1536, "step": 37216 }, { "epoch": 1.82, "grad_norm": 0.5960062742233276, "learning_rate": 0.0002001791087864414, "loss": 3.2316, "step": 37217 }, { "epoch": 1.82, "grad_norm": 0.6214715242385864, "learning_rate": 0.00020016459000379483, "loss": 2.8934, "step": 37218 }, { "epoch": 1.82, "grad_norm": 0.6391525268554688, "learning_rate": 0.00020015007148409548, "loss": 2.8583, "step": 37219 }, { "epoch": 1.82, "grad_norm": 0.616982102394104, "learning_rate": 0.00020013555322738157, "loss": 3.1318, "step": 37220 }, { "epoch": 1.82, "grad_norm": 0.665523111820221, "learning_rate": 0.0002001210352336915, "loss": 2.9528, "step": 37221 }, { "epoch": 1.82, "grad_norm": 0.6505708694458008, "learning_rate": 0.00020010651750306332, "loss": 2.9226, "step": 37222 }, { "epoch": 1.82, "grad_norm": 0.6481497883796692, "learning_rate": 0.00020009200003553545, "loss": 3.2122, "step": 37223 }, { "epoch": 1.82, "grad_norm": 0.6302831768989563, "learning_rate": 0.00020007748283114586, "loss": 3.0263, "step": 37224 }, { "epoch": 1.82, "grad_norm": 0.6752156019210815, "learning_rate": 0.00020006296588993304, "loss": 3.0486, "step": 37225 }, { "epoch": 1.82, "grad_norm": 0.6285132169723511, "learning_rate": 0.0002000484492119352, "loss": 2.983, "step": 37226 }, { "epoch": 1.82, "grad_norm": 0.651867151260376, "learning_rate": 0.0002000339327971904, "loss": 3.0873, "step": 37227 }, { "epoch": 1.82, "grad_norm": 0.6278545260429382, "learning_rate": 0.00020001941664573705, "loss": 3.0538, "step": 37228 }, { "epoch": 1.82, "grad_norm": 0.6236552000045776, "learning_rate": 0.0002000049007576133, "loss": 2.9181, "step": 37229 }, { "epoch": 1.82, "grad_norm": 0.647704005241394, "learning_rate": 0.00019999038513285732, "loss": 2.9183, "step": 37230 }, { "epoch": 1.82, "grad_norm": 0.6166619658470154, "learning_rate": 0.00019997586977150752, "loss": 3.0254, "step": 37231 }, { "epoch": 1.82, "grad_norm": 0.636547863483429, "learning_rate": 0.000199961354673602, "loss": 3.1082, "step": 37232 }, { "epoch": 1.82, "grad_norm": 0.6511338353157043, "learning_rate": 0.00019994683983917905, "loss": 2.9792, "step": 37233 }, { "epoch": 1.82, "grad_norm": 0.6390637755393982, "learning_rate": 0.0001999323252682768, "loss": 3.0991, "step": 37234 }, { "epoch": 1.82, "grad_norm": 0.6388706564903259, "learning_rate": 0.0001999178109609335, "loss": 3.0653, "step": 37235 }, { "epoch": 1.82, "grad_norm": 0.629075825214386, "learning_rate": 0.00019990329691718753, "loss": 2.9239, "step": 37236 }, { "epoch": 1.82, "grad_norm": 0.6319878101348877, "learning_rate": 0.00019988878313707698, "loss": 3.2043, "step": 37237 }, { "epoch": 1.82, "grad_norm": 0.6719925403594971, "learning_rate": 0.0001998742696206402, "loss": 3.0834, "step": 37238 }, { "epoch": 1.82, "grad_norm": 0.6301445960998535, "learning_rate": 0.00019985975636791517, "loss": 3.2402, "step": 37239 }, { "epoch": 1.83, "grad_norm": 0.6451971530914307, "learning_rate": 0.00019984524337894038, "loss": 3.2114, "step": 37240 }, { "epoch": 1.83, "grad_norm": 0.6485806107521057, "learning_rate": 0.00019983073065375393, "loss": 2.8606, "step": 37241 }, { "epoch": 1.83, "grad_norm": 0.6495173573493958, "learning_rate": 0.00019981621819239396, "loss": 2.9877, "step": 37242 }, { "epoch": 1.83, "grad_norm": 0.6307535171508789, "learning_rate": 0.0001998017059948989, "loss": 2.8788, "step": 37243 }, { "epoch": 1.83, "grad_norm": 0.6974294781684875, "learning_rate": 0.0001997871940613068, "loss": 3.1262, "step": 37244 }, { "epoch": 1.83, "grad_norm": 0.6444449424743652, "learning_rate": 0.00019977268239165604, "loss": 3.0831, "step": 37245 }, { "epoch": 1.83, "grad_norm": 0.6444473266601562, "learning_rate": 0.0001997581709859846, "loss": 2.886, "step": 37246 }, { "epoch": 1.83, "grad_norm": 0.6505341529846191, "learning_rate": 0.0001997436598443309, "loss": 2.8883, "step": 37247 }, { "epoch": 1.83, "grad_norm": 0.6732316017150879, "learning_rate": 0.00019972914896673318, "loss": 3.0509, "step": 37248 }, { "epoch": 1.83, "grad_norm": 0.6295874714851379, "learning_rate": 0.00019971463835322947, "loss": 3.2458, "step": 37249 }, { "epoch": 1.83, "grad_norm": 0.6318504214286804, "learning_rate": 0.00019970012800385822, "loss": 2.9675, "step": 37250 }, { "epoch": 1.83, "grad_norm": 0.6528605818748474, "learning_rate": 0.00019968561791865744, "loss": 2.8613, "step": 37251 }, { "epoch": 1.83, "grad_norm": 0.649989902973175, "learning_rate": 0.0001996711080976654, "loss": 3.1373, "step": 37252 }, { "epoch": 1.83, "grad_norm": 0.6412109136581421, "learning_rate": 0.00019965659854092048, "loss": 2.7814, "step": 37253 }, { "epoch": 1.83, "grad_norm": 0.6689494848251343, "learning_rate": 0.0001996420892484606, "loss": 2.877, "step": 37254 }, { "epoch": 1.83, "grad_norm": 0.6729632616043091, "learning_rate": 0.00019962758022032441, "loss": 3.1106, "step": 37255 }, { "epoch": 1.83, "grad_norm": 0.6817084550857544, "learning_rate": 0.00019961307145654962, "loss": 3.0034, "step": 37256 }, { "epoch": 1.83, "grad_norm": 0.6519462466239929, "learning_rate": 0.0001995985629571747, "loss": 3.2566, "step": 37257 }, { "epoch": 1.83, "grad_norm": 0.6344906687736511, "learning_rate": 0.00019958405472223796, "loss": 3.128, "step": 37258 }, { "epoch": 1.83, "grad_norm": 0.6581130027770996, "learning_rate": 0.0001995695467517774, "loss": 3.1058, "step": 37259 }, { "epoch": 1.83, "grad_norm": 0.6368814706802368, "learning_rate": 0.00019955503904583142, "loss": 2.973, "step": 37260 }, { "epoch": 1.83, "grad_norm": 0.6296173334121704, "learning_rate": 0.000199540531604438, "loss": 2.9619, "step": 37261 }, { "epoch": 1.83, "grad_norm": 0.6157219409942627, "learning_rate": 0.00019952602442763553, "loss": 3.0496, "step": 37262 }, { "epoch": 1.83, "grad_norm": 0.645746111869812, "learning_rate": 0.0001995115175154623, "loss": 2.9254, "step": 37263 }, { "epoch": 1.83, "grad_norm": 0.6452582478523254, "learning_rate": 0.00019949701086795625, "loss": 2.8589, "step": 37264 }, { "epoch": 1.83, "grad_norm": 0.6056995987892151, "learning_rate": 0.00019948250448515578, "loss": 2.9054, "step": 37265 }, { "epoch": 1.83, "grad_norm": 0.649617612361908, "learning_rate": 0.00019946799836709908, "loss": 2.7798, "step": 37266 }, { "epoch": 1.83, "grad_norm": 0.6778903007507324, "learning_rate": 0.0001994534925138242, "loss": 2.7532, "step": 37267 }, { "epoch": 1.83, "grad_norm": 0.6305443644523621, "learning_rate": 0.0001994389869253696, "loss": 3.0986, "step": 37268 }, { "epoch": 1.83, "grad_norm": 0.6616302132606506, "learning_rate": 0.00019942448160177335, "loss": 3.2042, "step": 37269 }, { "epoch": 1.83, "grad_norm": 0.6154059767723083, "learning_rate": 0.00019940997654307367, "loss": 2.8201, "step": 37270 }, { "epoch": 1.83, "grad_norm": 0.6408655643463135, "learning_rate": 0.00019939547174930867, "loss": 3.0539, "step": 37271 }, { "epoch": 1.83, "grad_norm": 0.6339004635810852, "learning_rate": 0.00019938096722051666, "loss": 2.887, "step": 37272 }, { "epoch": 1.83, "grad_norm": 0.6301832795143127, "learning_rate": 0.00019936646295673586, "loss": 3.0111, "step": 37273 }, { "epoch": 1.83, "grad_norm": 0.6578487157821655, "learning_rate": 0.00019935195895800435, "loss": 3.0007, "step": 37274 }, { "epoch": 1.83, "grad_norm": 0.6201621294021606, "learning_rate": 0.00019933745522436052, "loss": 3.0404, "step": 37275 }, { "epoch": 1.83, "grad_norm": 0.63294517993927, "learning_rate": 0.00019932295175584234, "loss": 2.8971, "step": 37276 }, { "epoch": 1.83, "grad_norm": 0.6506541967391968, "learning_rate": 0.00019930844855248816, "loss": 3.0214, "step": 37277 }, { "epoch": 1.83, "grad_norm": 0.6443052887916565, "learning_rate": 0.00019929394561433628, "loss": 2.9546, "step": 37278 }, { "epoch": 1.83, "grad_norm": 0.641716480255127, "learning_rate": 0.00019927944294142455, "loss": 3.1024, "step": 37279 }, { "epoch": 1.83, "grad_norm": 0.6462839245796204, "learning_rate": 0.00019926494053379155, "loss": 3.033, "step": 37280 }, { "epoch": 1.83, "grad_norm": 0.6586825251579285, "learning_rate": 0.00019925043839147527, "loss": 2.9807, "step": 37281 }, { "epoch": 1.83, "grad_norm": 0.6649317145347595, "learning_rate": 0.00019923593651451386, "loss": 3.1034, "step": 37282 }, { "epoch": 1.83, "grad_norm": 0.6174647212028503, "learning_rate": 0.00019922143490294576, "loss": 2.9981, "step": 37283 }, { "epoch": 1.83, "grad_norm": 0.7386031746864319, "learning_rate": 0.00019920693355680893, "loss": 2.9414, "step": 37284 }, { "epoch": 1.83, "grad_norm": 0.7577504515647888, "learning_rate": 0.00019919243247614168, "loss": 3.0452, "step": 37285 }, { "epoch": 1.83, "grad_norm": 0.624578058719635, "learning_rate": 0.00019917793166098204, "loss": 3.1585, "step": 37286 }, { "epoch": 1.83, "grad_norm": 0.6715158224105835, "learning_rate": 0.0001991634311113684, "loss": 3.2085, "step": 37287 }, { "epoch": 1.83, "grad_norm": 0.672528862953186, "learning_rate": 0.00019914893082733888, "loss": 3.1, "step": 37288 }, { "epoch": 1.83, "grad_norm": 0.6003857254981995, "learning_rate": 0.0001991344308089316, "loss": 3.0433, "step": 37289 }, { "epoch": 1.83, "grad_norm": 0.6297593116760254, "learning_rate": 0.00019911993105618494, "loss": 2.9949, "step": 37290 }, { "epoch": 1.83, "grad_norm": 0.6269044280052185, "learning_rate": 0.00019910543156913688, "loss": 3.0166, "step": 37291 }, { "epoch": 1.83, "grad_norm": 0.6183241009712219, "learning_rate": 0.00019909093234782565, "loss": 3.0963, "step": 37292 }, { "epoch": 1.83, "grad_norm": 0.6317001581192017, "learning_rate": 0.0001990764333922896, "loss": 3.1374, "step": 37293 }, { "epoch": 1.83, "grad_norm": 0.6698792576789856, "learning_rate": 0.00019906193470256678, "loss": 2.7781, "step": 37294 }, { "epoch": 1.83, "grad_norm": 0.6696431040763855, "learning_rate": 0.00019904743627869543, "loss": 3.0622, "step": 37295 }, { "epoch": 1.83, "grad_norm": 0.6266303062438965, "learning_rate": 0.00019903293812071357, "loss": 3.1569, "step": 37296 }, { "epoch": 1.83, "grad_norm": 0.6246880292892456, "learning_rate": 0.0001990184402286595, "loss": 3.3158, "step": 37297 }, { "epoch": 1.83, "grad_norm": 0.7014569044113159, "learning_rate": 0.0001990039426025716, "loss": 2.9063, "step": 37298 }, { "epoch": 1.83, "grad_norm": 0.5825400352478027, "learning_rate": 0.0001989894452424878, "loss": 2.9569, "step": 37299 }, { "epoch": 1.83, "grad_norm": 0.6470881104469299, "learning_rate": 0.00019897494814844643, "loss": 2.9374, "step": 37300 }, { "epoch": 1.83, "grad_norm": 0.6545881628990173, "learning_rate": 0.00019896045132048544, "loss": 3.0919, "step": 37301 }, { "epoch": 1.83, "grad_norm": 0.701209306716919, "learning_rate": 0.00019894595475864325, "loss": 3.0953, "step": 37302 }, { "epoch": 1.83, "grad_norm": 0.7519078254699707, "learning_rate": 0.00019893145846295806, "loss": 2.9928, "step": 37303 }, { "epoch": 1.83, "grad_norm": 0.6551284193992615, "learning_rate": 0.00019891696243346782, "loss": 3.2031, "step": 37304 }, { "epoch": 1.83, "grad_norm": 0.6280120611190796, "learning_rate": 0.00019890246667021095, "loss": 3.0596, "step": 37305 }, { "epoch": 1.83, "grad_norm": 0.62892746925354, "learning_rate": 0.0001988879711732255, "loss": 2.8681, "step": 37306 }, { "epoch": 1.83, "grad_norm": 0.6775756478309631, "learning_rate": 0.00019887347594254958, "loss": 2.9004, "step": 37307 }, { "epoch": 1.83, "grad_norm": 0.6382337808609009, "learning_rate": 0.0001988589809782216, "loss": 2.8997, "step": 37308 }, { "epoch": 1.83, "grad_norm": 0.639171838760376, "learning_rate": 0.00019884448628027955, "loss": 3.1746, "step": 37309 }, { "epoch": 1.83, "grad_norm": 0.643237829208374, "learning_rate": 0.0001988299918487617, "loss": 3.0516, "step": 37310 }, { "epoch": 1.83, "grad_norm": 0.6277839541435242, "learning_rate": 0.00019881549768370604, "loss": 2.9276, "step": 37311 }, { "epoch": 1.83, "grad_norm": 0.6903034448623657, "learning_rate": 0.000198801003785151, "loss": 2.7113, "step": 37312 }, { "epoch": 1.83, "grad_norm": 0.6506561636924744, "learning_rate": 0.00019878651015313464, "loss": 3.142, "step": 37313 }, { "epoch": 1.83, "grad_norm": 0.6318259835243225, "learning_rate": 0.00019877201678769504, "loss": 3.018, "step": 37314 }, { "epoch": 1.83, "grad_norm": 0.660740852355957, "learning_rate": 0.0001987575236888706, "loss": 3.2023, "step": 37315 }, { "epoch": 1.83, "grad_norm": 0.6094775199890137, "learning_rate": 0.00019874303085669923, "loss": 2.8146, "step": 37316 }, { "epoch": 1.83, "grad_norm": 0.64765465259552, "learning_rate": 0.0001987285382912194, "loss": 3.1413, "step": 37317 }, { "epoch": 1.83, "grad_norm": 0.6887152791023254, "learning_rate": 0.00019871404599246893, "loss": 3.0623, "step": 37318 }, { "epoch": 1.83, "grad_norm": 0.6461672782897949, "learning_rate": 0.0001986995539604862, "loss": 3.0541, "step": 37319 }, { "epoch": 1.83, "grad_norm": 0.6719304323196411, "learning_rate": 0.0001986850621953094, "loss": 3.1318, "step": 37320 }, { "epoch": 1.83, "grad_norm": 0.6449258327484131, "learning_rate": 0.00019867057069697665, "loss": 2.7289, "step": 37321 }, { "epoch": 1.83, "grad_norm": 0.6388571858406067, "learning_rate": 0.00019865607946552616, "loss": 2.9219, "step": 37322 }, { "epoch": 1.83, "grad_norm": 0.6894384622573853, "learning_rate": 0.00019864158850099595, "loss": 3.1242, "step": 37323 }, { "epoch": 1.83, "grad_norm": 0.6467133164405823, "learning_rate": 0.00019862709780342432, "loss": 3.1348, "step": 37324 }, { "epoch": 1.83, "grad_norm": 0.6446911096572876, "learning_rate": 0.0001986126073728495, "loss": 2.8621, "step": 37325 }, { "epoch": 1.83, "grad_norm": 0.6543609499931335, "learning_rate": 0.0001985981172093094, "loss": 2.8432, "step": 37326 }, { "epoch": 1.83, "grad_norm": 0.6412947773933411, "learning_rate": 0.00019858362731284247, "loss": 2.9529, "step": 37327 }, { "epoch": 1.83, "grad_norm": 0.6755062341690063, "learning_rate": 0.0001985691376834867, "loss": 3.0239, "step": 37328 }, { "epoch": 1.83, "grad_norm": 0.6204738616943359, "learning_rate": 0.00019855464832128024, "loss": 3.0975, "step": 37329 }, { "epoch": 1.83, "grad_norm": 0.6035327315330505, "learning_rate": 0.00019854015922626141, "loss": 3.0631, "step": 37330 }, { "epoch": 1.83, "grad_norm": 0.6611145734786987, "learning_rate": 0.00019852567039846825, "loss": 2.9024, "step": 37331 }, { "epoch": 1.83, "grad_norm": 0.6760570406913757, "learning_rate": 0.00019851118183793898, "loss": 2.8843, "step": 37332 }, { "epoch": 1.83, "grad_norm": 0.6354165077209473, "learning_rate": 0.00019849669354471162, "loss": 3.0704, "step": 37333 }, { "epoch": 1.83, "grad_norm": 0.6146211624145508, "learning_rate": 0.00019848220551882447, "loss": 3.0125, "step": 37334 }, { "epoch": 1.83, "grad_norm": 0.726223349571228, "learning_rate": 0.00019846771776031572, "loss": 2.838, "step": 37335 }, { "epoch": 1.83, "grad_norm": 0.6218348145484924, "learning_rate": 0.00019845323026922339, "loss": 3.0697, "step": 37336 }, { "epoch": 1.83, "grad_norm": 0.6220173835754395, "learning_rate": 0.00019843874304558578, "loss": 3.224, "step": 37337 }, { "epoch": 1.83, "grad_norm": 0.6685629487037659, "learning_rate": 0.00019842425608944085, "loss": 3.0376, "step": 37338 }, { "epoch": 1.83, "grad_norm": 0.6236324906349182, "learning_rate": 0.00019840976940082696, "loss": 3.0332, "step": 37339 }, { "epoch": 1.83, "grad_norm": 0.6823529601097107, "learning_rate": 0.00019839528297978227, "loss": 2.8418, "step": 37340 }, { "epoch": 1.83, "grad_norm": 0.6390722393989563, "learning_rate": 0.0001983807968263447, "loss": 3.0181, "step": 37341 }, { "epoch": 1.83, "grad_norm": 0.6559518575668335, "learning_rate": 0.00019836631094055266, "loss": 2.774, "step": 37342 }, { "epoch": 1.83, "grad_norm": 0.6333982348442078, "learning_rate": 0.0001983518253224442, "loss": 2.8799, "step": 37343 }, { "epoch": 1.83, "grad_norm": 0.6427645683288574, "learning_rate": 0.00019833733997205735, "loss": 2.924, "step": 37344 }, { "epoch": 1.83, "grad_norm": 0.6432594656944275, "learning_rate": 0.0001983228548894305, "loss": 2.9778, "step": 37345 }, { "epoch": 1.83, "grad_norm": 0.6509485244750977, "learning_rate": 0.00019830837007460162, "loss": 3.065, "step": 37346 }, { "epoch": 1.83, "grad_norm": 0.6645312905311584, "learning_rate": 0.000198293885527609, "loss": 3.0997, "step": 37347 }, { "epoch": 1.83, "grad_norm": 0.6213983297348022, "learning_rate": 0.00019827940124849061, "loss": 3.0474, "step": 37348 }, { "epoch": 1.83, "grad_norm": 0.7174972295761108, "learning_rate": 0.00019826491723728474, "loss": 3.0375, "step": 37349 }, { "epoch": 1.83, "grad_norm": 0.6308889389038086, "learning_rate": 0.00019825043349402954, "loss": 2.937, "step": 37350 }, { "epoch": 1.83, "grad_norm": 0.6400142312049866, "learning_rate": 0.00019823595001876301, "loss": 3.1328, "step": 37351 }, { "epoch": 1.83, "grad_norm": 0.6188294887542725, "learning_rate": 0.00019822146681152354, "loss": 2.9726, "step": 37352 }, { "epoch": 1.83, "grad_norm": 0.6347734928131104, "learning_rate": 0.0001982069838723491, "loss": 2.8248, "step": 37353 }, { "epoch": 1.83, "grad_norm": 0.6330137252807617, "learning_rate": 0.00019819250120127778, "loss": 3.0705, "step": 37354 }, { "epoch": 1.83, "grad_norm": 0.6539512872695923, "learning_rate": 0.00019817801879834794, "loss": 2.9606, "step": 37355 }, { "epoch": 1.83, "grad_norm": 0.6931697726249695, "learning_rate": 0.00019816353666359752, "loss": 2.8545, "step": 37356 }, { "epoch": 1.83, "grad_norm": 0.6446145176887512, "learning_rate": 0.0001981490547970648, "loss": 3.0497, "step": 37357 }, { "epoch": 1.83, "grad_norm": 0.6381685137748718, "learning_rate": 0.00019813457319878782, "loss": 3.0407, "step": 37358 }, { "epoch": 1.83, "grad_norm": 0.7098675966262817, "learning_rate": 0.0001981200918688047, "loss": 3.0888, "step": 37359 }, { "epoch": 1.83, "grad_norm": 0.623608410358429, "learning_rate": 0.00019810561080715384, "loss": 2.9005, "step": 37360 }, { "epoch": 1.83, "grad_norm": 0.6890565156936646, "learning_rate": 0.00019809113001387305, "loss": 2.7902, "step": 37361 }, { "epoch": 1.83, "grad_norm": 0.6344024538993835, "learning_rate": 0.00019807664948900071, "loss": 2.9944, "step": 37362 }, { "epoch": 1.83, "grad_norm": 0.6248427033424377, "learning_rate": 0.00019806216923257475, "loss": 3.179, "step": 37363 }, { "epoch": 1.83, "grad_norm": 0.6155762672424316, "learning_rate": 0.00019804768924463348, "loss": 2.9478, "step": 37364 }, { "epoch": 1.83, "grad_norm": 0.6568313837051392, "learning_rate": 0.00019803320952521501, "loss": 3.0381, "step": 37365 }, { "epoch": 1.83, "grad_norm": 0.6843270063400269, "learning_rate": 0.0001980187300743573, "loss": 2.9444, "step": 37366 }, { "epoch": 1.83, "grad_norm": 0.6298990845680237, "learning_rate": 0.00019800425089209881, "loss": 3.0637, "step": 37367 }, { "epoch": 1.83, "grad_norm": 0.6255677938461304, "learning_rate": 0.00019798977197847743, "loss": 3.1191, "step": 37368 }, { "epoch": 1.83, "grad_norm": 0.6226163506507874, "learning_rate": 0.00019797529333353127, "loss": 3.1296, "step": 37369 }, { "epoch": 1.83, "grad_norm": 0.6100264191627502, "learning_rate": 0.00019796081495729868, "loss": 3.1796, "step": 37370 }, { "epoch": 1.83, "grad_norm": 0.644378662109375, "learning_rate": 0.00019794633684981758, "loss": 2.7442, "step": 37371 }, { "epoch": 1.83, "grad_norm": 0.6284385919570923, "learning_rate": 0.00019793185901112627, "loss": 3.1884, "step": 37372 }, { "epoch": 1.83, "grad_norm": 0.659661054611206, "learning_rate": 0.0001979173814412627, "loss": 3.0421, "step": 37373 }, { "epoch": 1.83, "grad_norm": 0.6167201399803162, "learning_rate": 0.00019790290414026514, "loss": 3.1204, "step": 37374 }, { "epoch": 1.83, "grad_norm": 0.6492601037025452, "learning_rate": 0.00019788842710817173, "loss": 3.0217, "step": 37375 }, { "epoch": 1.83, "grad_norm": 0.8590417504310608, "learning_rate": 0.00019787395034502047, "loss": 2.93, "step": 37376 }, { "epoch": 1.83, "grad_norm": 0.6162007451057434, "learning_rate": 0.00019785947385084966, "loss": 2.8552, "step": 37377 }, { "epoch": 1.83, "grad_norm": 0.6711397767066956, "learning_rate": 0.0001978449976256972, "loss": 3.0362, "step": 37378 }, { "epoch": 1.83, "grad_norm": 0.6198904514312744, "learning_rate": 0.00019783052166960147, "loss": 2.8681, "step": 37379 }, { "epoch": 1.83, "grad_norm": 0.6615649461746216, "learning_rate": 0.00019781604598260048, "loss": 3.008, "step": 37380 }, { "epoch": 1.83, "grad_norm": 0.6402595639228821, "learning_rate": 0.0001978015705647323, "loss": 2.8823, "step": 37381 }, { "epoch": 1.83, "grad_norm": 0.6121137738227844, "learning_rate": 0.0001977870954160352, "loss": 3.1466, "step": 37382 }, { "epoch": 1.83, "grad_norm": 0.63599693775177, "learning_rate": 0.00019777262053654714, "loss": 2.923, "step": 37383 }, { "epoch": 1.83, "grad_norm": 0.6528685092926025, "learning_rate": 0.0001977581459263063, "loss": 2.9125, "step": 37384 }, { "epoch": 1.83, "grad_norm": 0.6184086799621582, "learning_rate": 0.00019774367158535095, "loss": 3.0302, "step": 37385 }, { "epoch": 1.83, "grad_norm": 0.6583097577095032, "learning_rate": 0.00019772919751371903, "loss": 3.0069, "step": 37386 }, { "epoch": 1.83, "grad_norm": 0.6171321868896484, "learning_rate": 0.00019771472371144874, "loss": 2.911, "step": 37387 }, { "epoch": 1.83, "grad_norm": 0.6588572263717651, "learning_rate": 0.0001977002501785781, "loss": 3.247, "step": 37388 }, { "epoch": 1.83, "grad_norm": 0.6455833911895752, "learning_rate": 0.00019768577691514538, "loss": 2.9604, "step": 37389 }, { "epoch": 1.83, "grad_norm": 0.6067509651184082, "learning_rate": 0.00019767130392118868, "loss": 2.9472, "step": 37390 }, { "epoch": 1.83, "grad_norm": 0.6183087229728699, "learning_rate": 0.00019765683119674595, "loss": 3.1244, "step": 37391 }, { "epoch": 1.83, "grad_norm": 0.7128816843032837, "learning_rate": 0.0001976423587418555, "loss": 3.1313, "step": 37392 }, { "epoch": 1.83, "grad_norm": 0.6646550893783569, "learning_rate": 0.0001976278865565554, "loss": 2.901, "step": 37393 }, { "epoch": 1.83, "grad_norm": 0.6627399325370789, "learning_rate": 0.0001976134146408838, "loss": 3.0871, "step": 37394 }, { "epoch": 1.83, "grad_norm": 0.6553120017051697, "learning_rate": 0.00019759894299487858, "loss": 3.2268, "step": 37395 }, { "epoch": 1.83, "grad_norm": 0.6263793110847473, "learning_rate": 0.00019758447161857812, "loss": 3.1037, "step": 37396 }, { "epoch": 1.83, "grad_norm": 0.647152304649353, "learning_rate": 0.0001975700005120205, "loss": 2.8602, "step": 37397 }, { "epoch": 1.83, "grad_norm": 0.6689240336418152, "learning_rate": 0.00019755552967524372, "loss": 3.1095, "step": 37398 }, { "epoch": 1.83, "grad_norm": 0.6836102604866028, "learning_rate": 0.00019754105910828603, "loss": 2.922, "step": 37399 }, { "epoch": 1.83, "grad_norm": 0.6442393064498901, "learning_rate": 0.00019752658881118536, "loss": 2.9583, "step": 37400 }, { "epoch": 1.83, "grad_norm": 0.6044512391090393, "learning_rate": 0.00019751211878398002, "loss": 3.21, "step": 37401 }, { "epoch": 1.83, "grad_norm": 0.627376139163971, "learning_rate": 0.00019749764902670804, "loss": 3.017, "step": 37402 }, { "epoch": 1.83, "grad_norm": 0.6755691766738892, "learning_rate": 0.00019748317953940744, "loss": 3.0251, "step": 37403 }, { "epoch": 1.83, "grad_norm": 0.621547520160675, "learning_rate": 0.00019746871032211657, "loss": 3.0488, "step": 37404 }, { "epoch": 1.83, "grad_norm": 0.6141914129257202, "learning_rate": 0.00019745424137487328, "loss": 3.1355, "step": 37405 }, { "epoch": 1.83, "grad_norm": 0.6534335017204285, "learning_rate": 0.00019743977269771573, "loss": 3.0441, "step": 37406 }, { "epoch": 1.83, "grad_norm": 0.6744629144668579, "learning_rate": 0.0001974253042906822, "loss": 3.0055, "step": 37407 }, { "epoch": 1.83, "grad_norm": 0.6473421454429626, "learning_rate": 0.0001974108361538106, "loss": 2.8622, "step": 37408 }, { "epoch": 1.83, "grad_norm": 0.6605812311172485, "learning_rate": 0.00019739636828713922, "loss": 3.0587, "step": 37409 }, { "epoch": 1.83, "grad_norm": 0.6951032876968384, "learning_rate": 0.0001973819006907059, "loss": 3.0455, "step": 37410 }, { "epoch": 1.83, "grad_norm": 0.6221358180046082, "learning_rate": 0.000197367433364549, "loss": 3.1964, "step": 37411 }, { "epoch": 1.83, "grad_norm": 0.6235369443893433, "learning_rate": 0.00019735296630870656, "loss": 2.9421, "step": 37412 }, { "epoch": 1.83, "grad_norm": 0.6678821444511414, "learning_rate": 0.00019733849952321655, "loss": 2.8644, "step": 37413 }, { "epoch": 1.83, "grad_norm": 0.6573663949966431, "learning_rate": 0.00019732403300811729, "loss": 3.0983, "step": 37414 }, { "epoch": 1.83, "grad_norm": 0.6279765367507935, "learning_rate": 0.00019730956676344667, "loss": 2.7258, "step": 37415 }, { "epoch": 1.83, "grad_norm": 0.6428644061088562, "learning_rate": 0.0001972951007892429, "loss": 2.9142, "step": 37416 }, { "epoch": 1.83, "grad_norm": 0.6456839442253113, "learning_rate": 0.0001972806350855441, "loss": 2.9727, "step": 37417 }, { "epoch": 1.83, "grad_norm": 0.6260696649551392, "learning_rate": 0.00019726616965238828, "loss": 3.1471, "step": 37418 }, { "epoch": 1.83, "grad_norm": 0.6509988307952881, "learning_rate": 0.00019725170448981377, "loss": 2.923, "step": 37419 }, { "epoch": 1.83, "grad_norm": 0.6517656445503235, "learning_rate": 0.0001972372395978583, "loss": 2.9567, "step": 37420 }, { "epoch": 1.83, "grad_norm": 0.6638441681861877, "learning_rate": 0.00019722277497656018, "loss": 3.1524, "step": 37421 }, { "epoch": 1.83, "grad_norm": 0.6186351776123047, "learning_rate": 0.00019720831062595759, "loss": 3.002, "step": 37422 }, { "epoch": 1.83, "grad_norm": 0.6337801814079285, "learning_rate": 0.00019719384654608849, "loss": 2.9353, "step": 37423 }, { "epoch": 1.83, "grad_norm": 0.6668829917907715, "learning_rate": 0.00019717938273699107, "loss": 3.0473, "step": 37424 }, { "epoch": 1.83, "grad_norm": 0.6589478850364685, "learning_rate": 0.00019716491919870324, "loss": 3.1595, "step": 37425 }, { "epoch": 1.83, "grad_norm": 0.6218236684799194, "learning_rate": 0.00019715045593126325, "loss": 2.9327, "step": 37426 }, { "epoch": 1.83, "grad_norm": 0.6272194981575012, "learning_rate": 0.00019713599293470928, "loss": 2.9345, "step": 37427 }, { "epoch": 1.83, "grad_norm": 0.655690610408783, "learning_rate": 0.00019712153020907917, "loss": 3.021, "step": 37428 }, { "epoch": 1.83, "grad_norm": 0.6610934734344482, "learning_rate": 0.0001971070677544113, "loss": 3.0346, "step": 37429 }, { "epoch": 1.83, "grad_norm": 0.6667243242263794, "learning_rate": 0.00019709260557074348, "loss": 3.0287, "step": 37430 }, { "epoch": 1.83, "grad_norm": 0.6456306576728821, "learning_rate": 0.00019707814365811391, "loss": 2.8273, "step": 37431 }, { "epoch": 1.83, "grad_norm": 0.6371907591819763, "learning_rate": 0.00019706368201656086, "loss": 3.0458, "step": 37432 }, { "epoch": 1.83, "grad_norm": 0.6370196342468262, "learning_rate": 0.00019704922064612217, "loss": 3.024, "step": 37433 }, { "epoch": 1.83, "grad_norm": 0.6374519467353821, "learning_rate": 0.00019703475954683606, "loss": 2.9035, "step": 37434 }, { "epoch": 1.83, "grad_norm": 0.7033529877662659, "learning_rate": 0.00019702029871874048, "loss": 3.0132, "step": 37435 }, { "epoch": 1.83, "grad_norm": 0.6438736915588379, "learning_rate": 0.00019700583816187363, "loss": 3.004, "step": 37436 }, { "epoch": 1.83, "grad_norm": 0.8195931911468506, "learning_rate": 0.00019699137787627369, "loss": 2.8433, "step": 37437 }, { "epoch": 1.83, "grad_norm": 0.6257565021514893, "learning_rate": 0.0001969769178619785, "loss": 3.02, "step": 37438 }, { "epoch": 1.83, "grad_norm": 0.6688423752784729, "learning_rate": 0.0001969624581190264, "loss": 3.1861, "step": 37439 }, { "epoch": 1.83, "grad_norm": 0.6222291588783264, "learning_rate": 0.00019694799864745525, "loss": 2.9031, "step": 37440 }, { "epoch": 1.83, "grad_norm": 0.6126012206077576, "learning_rate": 0.00019693353944730325, "loss": 2.9896, "step": 37441 }, { "epoch": 1.83, "grad_norm": 0.6390916705131531, "learning_rate": 0.0001969190805186086, "loss": 2.7175, "step": 37442 }, { "epoch": 1.83, "grad_norm": 0.6593005061149597, "learning_rate": 0.00019690462186140907, "loss": 2.9528, "step": 37443 }, { "epoch": 1.84, "grad_norm": 0.6446021199226379, "learning_rate": 0.00019689016347574308, "loss": 3.0649, "step": 37444 }, { "epoch": 1.84, "grad_norm": 0.6597580909729004, "learning_rate": 0.0001968757053616485, "loss": 2.949, "step": 37445 }, { "epoch": 1.84, "grad_norm": 0.6188734173774719, "learning_rate": 0.0001968612475191634, "loss": 3.0953, "step": 37446 }, { "epoch": 1.84, "grad_norm": 0.6276201009750366, "learning_rate": 0.00019684678994832605, "loss": 2.9462, "step": 37447 }, { "epoch": 1.84, "grad_norm": 0.6137672662734985, "learning_rate": 0.00019683233264917435, "loss": 2.7369, "step": 37448 }, { "epoch": 1.84, "grad_norm": 0.6281610727310181, "learning_rate": 0.00019681787562174645, "loss": 2.8975, "step": 37449 }, { "epoch": 1.84, "grad_norm": 0.675428032875061, "learning_rate": 0.00019680341886608034, "loss": 3.0545, "step": 37450 }, { "epoch": 1.84, "grad_norm": 0.6224192976951599, "learning_rate": 0.0001967889623822142, "loss": 2.9262, "step": 37451 }, { "epoch": 1.84, "grad_norm": 0.6284218430519104, "learning_rate": 0.00019677450617018613, "loss": 2.9402, "step": 37452 }, { "epoch": 1.84, "grad_norm": 0.6586654186248779, "learning_rate": 0.000196760050230034, "loss": 2.9344, "step": 37453 }, { "epoch": 1.84, "grad_norm": 0.6120323538780212, "learning_rate": 0.0001967455945617962, "loss": 2.8945, "step": 37454 }, { "epoch": 1.84, "grad_norm": 0.6563277840614319, "learning_rate": 0.00019673113916551053, "loss": 3.0017, "step": 37455 }, { "epoch": 1.84, "grad_norm": 0.6687843799591064, "learning_rate": 0.0001967166840412151, "loss": 3.0225, "step": 37456 }, { "epoch": 1.84, "grad_norm": 0.6557430028915405, "learning_rate": 0.0001967022291889482, "loss": 3.1305, "step": 37457 }, { "epoch": 1.84, "grad_norm": 0.6802811026573181, "learning_rate": 0.00019668777460874766, "loss": 3.053, "step": 37458 }, { "epoch": 1.84, "grad_norm": 0.6781749725341797, "learning_rate": 0.00019667332030065172, "loss": 2.8287, "step": 37459 }, { "epoch": 1.84, "grad_norm": 0.6726826429367065, "learning_rate": 0.00019665886626469824, "loss": 3.1735, "step": 37460 }, { "epoch": 1.84, "grad_norm": 0.6283700466156006, "learning_rate": 0.00019664441250092542, "loss": 2.9045, "step": 37461 }, { "epoch": 1.84, "grad_norm": 0.6707321405410767, "learning_rate": 0.00019662995900937143, "loss": 3.1, "step": 37462 }, { "epoch": 1.84, "grad_norm": 0.6527551412582397, "learning_rate": 0.0001966155057900742, "loss": 2.8893, "step": 37463 }, { "epoch": 1.84, "grad_norm": 0.6310652494430542, "learning_rate": 0.0001966010528430719, "loss": 3.1475, "step": 37464 }, { "epoch": 1.84, "grad_norm": 0.626528799533844, "learning_rate": 0.00019658660016840237, "loss": 3.2232, "step": 37465 }, { "epoch": 1.84, "grad_norm": 0.6604952812194824, "learning_rate": 0.00019657214776610388, "loss": 3.2123, "step": 37466 }, { "epoch": 1.84, "grad_norm": 0.6406633257865906, "learning_rate": 0.00019655769563621457, "loss": 2.8845, "step": 37467 }, { "epoch": 1.84, "grad_norm": 0.6594672799110413, "learning_rate": 0.00019654324377877224, "loss": 3.0813, "step": 37468 }, { "epoch": 1.84, "grad_norm": 0.6008974313735962, "learning_rate": 0.0001965287921938152, "loss": 3.1406, "step": 37469 }, { "epoch": 1.84, "grad_norm": 0.6440598964691162, "learning_rate": 0.00019651434088138135, "loss": 3.0571, "step": 37470 }, { "epoch": 1.84, "grad_norm": 0.6732915043830872, "learning_rate": 0.00019649988984150887, "loss": 2.9316, "step": 37471 }, { "epoch": 1.84, "grad_norm": 0.6244654059410095, "learning_rate": 0.00019648543907423567, "loss": 3.0407, "step": 37472 }, { "epoch": 1.84, "grad_norm": 0.6534292697906494, "learning_rate": 0.0001964709885795999, "loss": 2.9746, "step": 37473 }, { "epoch": 1.84, "grad_norm": 0.626899778842926, "learning_rate": 0.0001964565383576397, "loss": 2.8462, "step": 37474 }, { "epoch": 1.84, "grad_norm": 0.6582539677619934, "learning_rate": 0.00019644208840839295, "loss": 3.0684, "step": 37475 }, { "epoch": 1.84, "grad_norm": 0.6599977016448975, "learning_rate": 0.00019642763873189796, "loss": 2.8524, "step": 37476 }, { "epoch": 1.84, "grad_norm": 0.6857508420944214, "learning_rate": 0.00019641318932819255, "loss": 2.9823, "step": 37477 }, { "epoch": 1.84, "grad_norm": 0.6616058349609375, "learning_rate": 0.00019639874019731476, "loss": 3.2488, "step": 37478 }, { "epoch": 1.84, "grad_norm": 0.6988261938095093, "learning_rate": 0.0001963842913393029, "loss": 2.8922, "step": 37479 }, { "epoch": 1.84, "grad_norm": 0.6614097952842712, "learning_rate": 0.0001963698427541948, "loss": 2.8559, "step": 37480 }, { "epoch": 1.84, "grad_norm": 0.6063705086708069, "learning_rate": 0.00019635539444202874, "loss": 2.972, "step": 37481 }, { "epoch": 1.84, "grad_norm": 0.6515814065933228, "learning_rate": 0.00019634094640284244, "loss": 2.9072, "step": 37482 }, { "epoch": 1.84, "grad_norm": 0.6901261210441589, "learning_rate": 0.00019632649863667414, "loss": 3.0161, "step": 37483 }, { "epoch": 1.84, "grad_norm": 0.628663957118988, "learning_rate": 0.000196312051143562, "loss": 3.0743, "step": 37484 }, { "epoch": 1.84, "grad_norm": 0.6244681477546692, "learning_rate": 0.0001962976039235439, "loss": 3.1726, "step": 37485 }, { "epoch": 1.84, "grad_norm": 0.6625844836235046, "learning_rate": 0.000196283156976658, "loss": 2.9569, "step": 37486 }, { "epoch": 1.84, "grad_norm": 0.6282349228858948, "learning_rate": 0.0001962687103029422, "loss": 3.1225, "step": 37487 }, { "epoch": 1.84, "grad_norm": 0.6397706866264343, "learning_rate": 0.00019625426390243472, "loss": 3.0557, "step": 37488 }, { "epoch": 1.84, "grad_norm": 0.6789543032646179, "learning_rate": 0.0001962398177751736, "loss": 2.9712, "step": 37489 }, { "epoch": 1.84, "grad_norm": 0.6492639780044556, "learning_rate": 0.0001962253719211967, "loss": 2.9644, "step": 37490 }, { "epoch": 1.84, "grad_norm": 0.6266045570373535, "learning_rate": 0.00019621092634054235, "loss": 3.053, "step": 37491 }, { "epoch": 1.84, "grad_norm": 0.633960485458374, "learning_rate": 0.00019619648103324835, "loss": 3.073, "step": 37492 }, { "epoch": 1.84, "grad_norm": 0.680493175983429, "learning_rate": 0.00019618203599935282, "loss": 3.0203, "step": 37493 }, { "epoch": 1.84, "grad_norm": 0.6308938264846802, "learning_rate": 0.0001961675912388939, "loss": 3.03, "step": 37494 }, { "epoch": 1.84, "grad_norm": 0.6352134943008423, "learning_rate": 0.00019615314675190955, "loss": 3.1219, "step": 37495 }, { "epoch": 1.84, "grad_norm": 0.608165442943573, "learning_rate": 0.00019613870253843785, "loss": 3.0078, "step": 37496 }, { "epoch": 1.84, "grad_norm": 0.6532719731330872, "learning_rate": 0.00019612425859851673, "loss": 2.8869, "step": 37497 }, { "epoch": 1.84, "grad_norm": 0.6677577495574951, "learning_rate": 0.00019610981493218437, "loss": 2.8604, "step": 37498 }, { "epoch": 1.84, "grad_norm": 0.5935999751091003, "learning_rate": 0.0001960953715394788, "loss": 3.175, "step": 37499 }, { "epoch": 1.84, "grad_norm": 0.6511732935905457, "learning_rate": 0.0001960809284204379, "loss": 2.9567, "step": 37500 }, { "epoch": 1.84, "grad_norm": 0.6400048136711121, "learning_rate": 0.00019606648557509996, "loss": 2.9456, "step": 37501 }, { "epoch": 1.84, "grad_norm": 0.6501739025115967, "learning_rate": 0.0001960520430035028, "loss": 2.9489, "step": 37502 }, { "epoch": 1.84, "grad_norm": 0.6374745965003967, "learning_rate": 0.0001960376007056846, "loss": 3.0406, "step": 37503 }, { "epoch": 1.84, "grad_norm": 0.6723983883857727, "learning_rate": 0.0001960231586816834, "loss": 2.8661, "step": 37504 }, { "epoch": 1.84, "grad_norm": 0.6227818727493286, "learning_rate": 0.0001960087169315371, "loss": 2.7228, "step": 37505 }, { "epoch": 1.84, "grad_norm": 0.6718969345092773, "learning_rate": 0.00019599427545528392, "loss": 3.0161, "step": 37506 }, { "epoch": 1.84, "grad_norm": 0.6838817596435547, "learning_rate": 0.00019597983425296176, "loss": 2.8973, "step": 37507 }, { "epoch": 1.84, "grad_norm": 0.710553765296936, "learning_rate": 0.00019596539332460863, "loss": 2.8391, "step": 37508 }, { "epoch": 1.84, "grad_norm": 0.6641603708267212, "learning_rate": 0.00019595095267026277, "loss": 3.1281, "step": 37509 }, { "epoch": 1.84, "grad_norm": 0.6101526618003845, "learning_rate": 0.00019593651228996201, "loss": 3.0391, "step": 37510 }, { "epoch": 1.84, "grad_norm": 0.6275225281715393, "learning_rate": 0.00019592207218374448, "loss": 2.9472, "step": 37511 }, { "epoch": 1.84, "grad_norm": 0.6456922888755798, "learning_rate": 0.0001959076323516481, "loss": 3.2232, "step": 37512 }, { "epoch": 1.84, "grad_norm": 0.6447487473487854, "learning_rate": 0.000195893192793711, "loss": 2.7937, "step": 37513 }, { "epoch": 1.84, "grad_norm": 0.6500653624534607, "learning_rate": 0.00019587875350997127, "loss": 3.1145, "step": 37514 }, { "epoch": 1.84, "grad_norm": 0.5911208391189575, "learning_rate": 0.00019586431450046676, "loss": 3.0935, "step": 37515 }, { "epoch": 1.84, "grad_norm": 0.6784209609031677, "learning_rate": 0.00019584987576523572, "loss": 2.9249, "step": 37516 }, { "epoch": 1.84, "grad_norm": 0.6476365923881531, "learning_rate": 0.000195835437304316, "loss": 2.9984, "step": 37517 }, { "epoch": 1.84, "grad_norm": 0.6495740413665771, "learning_rate": 0.0001958209991177456, "loss": 2.7983, "step": 37518 }, { "epoch": 1.84, "grad_norm": 0.6220631003379822, "learning_rate": 0.0001958065612055628, "loss": 3.1553, "step": 37519 }, { "epoch": 1.84, "grad_norm": 0.6869008541107178, "learning_rate": 0.0001957921235678054, "loss": 2.9559, "step": 37520 }, { "epoch": 1.84, "grad_norm": 0.6284224390983582, "learning_rate": 0.00019577768620451154, "loss": 2.9683, "step": 37521 }, { "epoch": 1.84, "grad_norm": 0.6778497695922852, "learning_rate": 0.00019576324911571908, "loss": 3.1064, "step": 37522 }, { "epoch": 1.84, "grad_norm": 0.6434932947158813, "learning_rate": 0.00019574881230146615, "loss": 3.1636, "step": 37523 }, { "epoch": 1.84, "grad_norm": 0.7298178672790527, "learning_rate": 0.0001957343757617909, "loss": 3.0143, "step": 37524 }, { "epoch": 1.84, "grad_norm": 0.6708294749259949, "learning_rate": 0.0001957199394967312, "loss": 2.9184, "step": 37525 }, { "epoch": 1.84, "grad_norm": 0.6386827826499939, "learning_rate": 0.0001957055035063252, "loss": 3.0899, "step": 37526 }, { "epoch": 1.84, "grad_norm": 0.7549542784690857, "learning_rate": 0.00019569106779061067, "loss": 2.9044, "step": 37527 }, { "epoch": 1.84, "grad_norm": 0.7159115076065063, "learning_rate": 0.00019567663234962586, "loss": 3.0351, "step": 37528 }, { "epoch": 1.84, "grad_norm": 0.643603503704071, "learning_rate": 0.00019566219718340877, "loss": 3.097, "step": 37529 }, { "epoch": 1.84, "grad_norm": 0.6065301895141602, "learning_rate": 0.00019564776229199726, "loss": 2.8429, "step": 37530 }, { "epoch": 1.84, "grad_norm": 0.6380607485771179, "learning_rate": 0.00019563332767542964, "loss": 3.0454, "step": 37531 }, { "epoch": 1.84, "grad_norm": 0.6211923956871033, "learning_rate": 0.0001956188933337436, "loss": 2.7386, "step": 37532 }, { "epoch": 1.84, "grad_norm": 0.6757135987281799, "learning_rate": 0.0001956044592669773, "loss": 2.9283, "step": 37533 }, { "epoch": 1.84, "grad_norm": 0.5980146527290344, "learning_rate": 0.00019559002547516887, "loss": 3.097, "step": 37534 }, { "epoch": 1.84, "grad_norm": 0.6270644664764404, "learning_rate": 0.00019557559195835614, "loss": 2.8905, "step": 37535 }, { "epoch": 1.84, "grad_norm": 0.6243385076522827, "learning_rate": 0.00019556115871657728, "loss": 3.0983, "step": 37536 }, { "epoch": 1.84, "grad_norm": 0.6248341798782349, "learning_rate": 0.00019554672574987011, "loss": 3.1228, "step": 37537 }, { "epoch": 1.84, "grad_norm": 0.6501051187515259, "learning_rate": 0.00019553229305827282, "loss": 3.0654, "step": 37538 }, { "epoch": 1.84, "grad_norm": 0.7006635069847107, "learning_rate": 0.0001955178606418234, "loss": 2.8608, "step": 37539 }, { "epoch": 1.84, "grad_norm": 0.6526589393615723, "learning_rate": 0.00019550342850055974, "loss": 3.0982, "step": 37540 }, { "epoch": 1.84, "grad_norm": 0.6131229996681213, "learning_rate": 0.00019548899663452007, "loss": 2.8902, "step": 37541 }, { "epoch": 1.84, "grad_norm": 0.6289817094802856, "learning_rate": 0.00019547456504374213, "loss": 3.0206, "step": 37542 }, { "epoch": 1.84, "grad_norm": 0.6378176212310791, "learning_rate": 0.00019546013372826411, "loss": 3.0923, "step": 37543 }, { "epoch": 1.84, "grad_norm": 0.6519317030906677, "learning_rate": 0.00019544570268812406, "loss": 3.0326, "step": 37544 }, { "epoch": 1.84, "grad_norm": 0.7056100368499756, "learning_rate": 0.00019543127192335979, "loss": 2.8943, "step": 37545 }, { "epoch": 1.84, "grad_norm": 0.6589441895484924, "learning_rate": 0.00019541684143400953, "loss": 3.1757, "step": 37546 }, { "epoch": 1.84, "grad_norm": 0.6761717200279236, "learning_rate": 0.00019540241122011111, "loss": 2.8114, "step": 37547 }, { "epoch": 1.84, "grad_norm": 0.6543384790420532, "learning_rate": 0.00019538798128170272, "loss": 3.2523, "step": 37548 }, { "epoch": 1.84, "grad_norm": 0.633036196231842, "learning_rate": 0.0001953735516188221, "loss": 2.858, "step": 37549 }, { "epoch": 1.84, "grad_norm": 0.6491231322288513, "learning_rate": 0.00019535912223150745, "loss": 3.1472, "step": 37550 }, { "epoch": 1.84, "grad_norm": 0.6392214894294739, "learning_rate": 0.00019534469311979685, "loss": 2.9719, "step": 37551 }, { "epoch": 1.84, "grad_norm": 0.6699644923210144, "learning_rate": 0.00019533026428372803, "loss": 2.9833, "step": 37552 }, { "epoch": 1.84, "grad_norm": 0.695570707321167, "learning_rate": 0.00019531583572333924, "loss": 2.985, "step": 37553 }, { "epoch": 1.84, "grad_norm": 0.7695941925048828, "learning_rate": 0.0001953014074386684, "loss": 2.8777, "step": 37554 }, { "epoch": 1.84, "grad_norm": 0.6346907615661621, "learning_rate": 0.0001952869794297534, "loss": 3.1383, "step": 37555 }, { "epoch": 1.84, "grad_norm": 0.6936262249946594, "learning_rate": 0.00019527255169663244, "loss": 3.0131, "step": 37556 }, { "epoch": 1.84, "grad_norm": 0.6635444760322571, "learning_rate": 0.0001952581242393434, "loss": 3.0441, "step": 37557 }, { "epoch": 1.84, "grad_norm": 0.6767592430114746, "learning_rate": 0.0001952436970579244, "loss": 2.9566, "step": 37558 }, { "epoch": 1.84, "grad_norm": 0.6856628656387329, "learning_rate": 0.00019522927015241316, "loss": 3.2645, "step": 37559 }, { "epoch": 1.84, "grad_norm": 0.6861822009086609, "learning_rate": 0.00019521484352284793, "loss": 3.02, "step": 37560 }, { "epoch": 1.84, "grad_norm": 0.6693269610404968, "learning_rate": 0.00019520041716926674, "loss": 3.0023, "step": 37561 }, { "epoch": 1.84, "grad_norm": 0.6560589075088501, "learning_rate": 0.00019518599109170727, "loss": 2.8327, "step": 37562 }, { "epoch": 1.84, "grad_norm": 0.6303350925445557, "learning_rate": 0.00019517156529020793, "loss": 3.0216, "step": 37563 }, { "epoch": 1.84, "grad_norm": 0.6482394337654114, "learning_rate": 0.0001951571397648064, "loss": 3.0575, "step": 37564 }, { "epoch": 1.84, "grad_norm": 0.6522819399833679, "learning_rate": 0.00019514271451554076, "loss": 3.136, "step": 37565 }, { "epoch": 1.84, "grad_norm": 0.6398262977600098, "learning_rate": 0.00019512828954244913, "loss": 2.9499, "step": 37566 }, { "epoch": 1.84, "grad_norm": 0.6437414288520813, "learning_rate": 0.00019511386484556933, "loss": 2.9961, "step": 37567 }, { "epoch": 1.84, "grad_norm": 0.6147285103797913, "learning_rate": 0.00019509944042493953, "loss": 3.1381, "step": 37568 }, { "epoch": 1.84, "grad_norm": 0.6700540781021118, "learning_rate": 0.0001950850162805975, "loss": 2.8628, "step": 37569 }, { "epoch": 1.84, "grad_norm": 0.6323624849319458, "learning_rate": 0.00019507059241258133, "loss": 2.9542, "step": 37570 }, { "epoch": 1.84, "grad_norm": 0.6200016140937805, "learning_rate": 0.00019505616882092917, "loss": 3.0798, "step": 37571 }, { "epoch": 1.84, "grad_norm": 0.6565930247306824, "learning_rate": 0.00019504174550567875, "loss": 2.9682, "step": 37572 }, { "epoch": 1.84, "grad_norm": 0.6288124918937683, "learning_rate": 0.00019502732246686825, "loss": 3.1476, "step": 37573 }, { "epoch": 1.84, "grad_norm": 0.7063016891479492, "learning_rate": 0.00019501289970453545, "loss": 3.0224, "step": 37574 }, { "epoch": 1.84, "grad_norm": 0.626835286617279, "learning_rate": 0.00019499847721871855, "loss": 2.9865, "step": 37575 }, { "epoch": 1.84, "grad_norm": 0.6863059401512146, "learning_rate": 0.0001949840550094555, "loss": 3.0436, "step": 37576 }, { "epoch": 1.84, "grad_norm": 0.6676422357559204, "learning_rate": 0.0001949696330767841, "loss": 3.0392, "step": 37577 }, { "epoch": 1.84, "grad_norm": 0.649277925491333, "learning_rate": 0.00019495521142074264, "loss": 3.0748, "step": 37578 }, { "epoch": 1.84, "grad_norm": 0.6728671789169312, "learning_rate": 0.00019494079004136886, "loss": 3.0207, "step": 37579 }, { "epoch": 1.84, "grad_norm": 0.6115674376487732, "learning_rate": 0.00019492636893870074, "loss": 3.0674, "step": 37580 }, { "epoch": 1.84, "grad_norm": 0.6326895356178284, "learning_rate": 0.00019491194811277647, "loss": 3.042, "step": 37581 }, { "epoch": 1.84, "grad_norm": 0.6360620260238647, "learning_rate": 0.00019489752756363378, "loss": 2.882, "step": 37582 }, { "epoch": 1.84, "grad_norm": 0.6419123411178589, "learning_rate": 0.00019488310729131095, "loss": 3.1109, "step": 37583 }, { "epoch": 1.84, "grad_norm": 0.6531715989112854, "learning_rate": 0.00019486868729584565, "loss": 2.713, "step": 37584 }, { "epoch": 1.84, "grad_norm": 0.6210694909095764, "learning_rate": 0.00019485426757727593, "loss": 3.0199, "step": 37585 }, { "epoch": 1.84, "grad_norm": 0.6679146885871887, "learning_rate": 0.00019483984813564002, "loss": 2.7999, "step": 37586 }, { "epoch": 1.84, "grad_norm": 0.6969517469406128, "learning_rate": 0.0001948254289709756, "loss": 2.928, "step": 37587 }, { "epoch": 1.84, "grad_norm": 0.6617041826248169, "learning_rate": 0.00019481101008332082, "loss": 3.1227, "step": 37588 }, { "epoch": 1.84, "grad_norm": 0.6362742185592651, "learning_rate": 0.00019479659147271354, "loss": 2.732, "step": 37589 }, { "epoch": 1.84, "grad_norm": 0.6810065507888794, "learning_rate": 0.00019478217313919176, "loss": 3.0502, "step": 37590 }, { "epoch": 1.84, "grad_norm": 0.643323540687561, "learning_rate": 0.00019476775508279361, "loss": 2.8579, "step": 37591 }, { "epoch": 1.84, "grad_norm": 0.6467273235321045, "learning_rate": 0.00019475333730355681, "loss": 3.1401, "step": 37592 }, { "epoch": 1.84, "grad_norm": 0.6345007419586182, "learning_rate": 0.00019473891980151958, "loss": 2.7846, "step": 37593 }, { "epoch": 1.84, "grad_norm": 0.6979330778121948, "learning_rate": 0.00019472450257671973, "loss": 2.8787, "step": 37594 }, { "epoch": 1.84, "grad_norm": 0.8724361062049866, "learning_rate": 0.00019471008562919518, "loss": 3.1034, "step": 37595 }, { "epoch": 1.84, "grad_norm": 0.6298291087150574, "learning_rate": 0.00019469566895898412, "loss": 3.1297, "step": 37596 }, { "epoch": 1.84, "grad_norm": 0.6355240941047668, "learning_rate": 0.0001946812525661244, "loss": 3.1283, "step": 37597 }, { "epoch": 1.84, "grad_norm": 0.6391889452934265, "learning_rate": 0.00019466683645065402, "loss": 3.0097, "step": 37598 }, { "epoch": 1.84, "grad_norm": 0.6222915649414062, "learning_rate": 0.0001946524206126108, "loss": 3.0043, "step": 37599 }, { "epoch": 1.84, "grad_norm": 0.6683493256568909, "learning_rate": 0.00019463800505203284, "loss": 3.053, "step": 37600 }, { "epoch": 1.84, "grad_norm": 0.6785693168640137, "learning_rate": 0.00019462358976895822, "loss": 3.0694, "step": 37601 }, { "epoch": 1.84, "grad_norm": 0.6351543068885803, "learning_rate": 0.00019460917476342464, "loss": 2.9983, "step": 37602 }, { "epoch": 1.84, "grad_norm": 0.6268134713172913, "learning_rate": 0.00019459476003547035, "loss": 2.9023, "step": 37603 }, { "epoch": 1.84, "grad_norm": 0.6076909303665161, "learning_rate": 0.00019458034558513304, "loss": 3.0121, "step": 37604 }, { "epoch": 1.84, "grad_norm": 0.6232816576957703, "learning_rate": 0.00019456593141245086, "loss": 2.9033, "step": 37605 }, { "epoch": 1.84, "grad_norm": 0.6748607754707336, "learning_rate": 0.00019455151751746183, "loss": 3.0505, "step": 37606 }, { "epoch": 1.84, "grad_norm": 0.6353899836540222, "learning_rate": 0.00019453710390020365, "loss": 3.1189, "step": 37607 }, { "epoch": 1.84, "grad_norm": 0.6453273892402649, "learning_rate": 0.00019452269056071457, "loss": 3.3284, "step": 37608 }, { "epoch": 1.84, "grad_norm": 0.6326226592063904, "learning_rate": 0.00019450827749903238, "loss": 2.9296, "step": 37609 }, { "epoch": 1.84, "grad_norm": 0.6521055102348328, "learning_rate": 0.00019449386471519502, "loss": 3.0117, "step": 37610 }, { "epoch": 1.84, "grad_norm": 0.663453221321106, "learning_rate": 0.00019447945220924063, "loss": 2.8768, "step": 37611 }, { "epoch": 1.84, "grad_norm": 0.6682723760604858, "learning_rate": 0.000194465039981207, "loss": 2.9598, "step": 37612 }, { "epoch": 1.84, "grad_norm": 0.6204665899276733, "learning_rate": 0.00019445062803113218, "loss": 3.1624, "step": 37613 }, { "epoch": 1.84, "grad_norm": 0.6165514588356018, "learning_rate": 0.000194436216359054, "loss": 3.3258, "step": 37614 }, { "epoch": 1.84, "grad_norm": 0.6465550065040588, "learning_rate": 0.00019442180496501054, "loss": 2.9518, "step": 37615 }, { "epoch": 1.84, "grad_norm": 0.6707178354263306, "learning_rate": 0.00019440739384903982, "loss": 2.9864, "step": 37616 }, { "epoch": 1.84, "grad_norm": 0.6141831278800964, "learning_rate": 0.00019439298301117955, "loss": 2.9937, "step": 37617 }, { "epoch": 1.84, "grad_norm": 0.6654483675956726, "learning_rate": 0.00019437857245146797, "loss": 2.8547, "step": 37618 }, { "epoch": 1.84, "grad_norm": 0.6087260842323303, "learning_rate": 0.00019436416216994285, "loss": 2.8802, "step": 37619 }, { "epoch": 1.84, "grad_norm": 0.7105410099029541, "learning_rate": 0.00019434975216664217, "loss": 3.0672, "step": 37620 }, { "epoch": 1.84, "grad_norm": 0.6254940629005432, "learning_rate": 0.000194335342441604, "loss": 2.9234, "step": 37621 }, { "epoch": 1.84, "grad_norm": 0.6136469841003418, "learning_rate": 0.00019432093299486612, "loss": 3.0558, "step": 37622 }, { "epoch": 1.84, "grad_norm": 0.6719083786010742, "learning_rate": 0.00019430652382646665, "loss": 2.9483, "step": 37623 }, { "epoch": 1.84, "grad_norm": 0.6281088590621948, "learning_rate": 0.00019429211493644335, "loss": 2.958, "step": 37624 }, { "epoch": 1.84, "grad_norm": 0.642126739025116, "learning_rate": 0.00019427770632483423, "loss": 3.0231, "step": 37625 }, { "epoch": 1.84, "grad_norm": 0.6834142208099365, "learning_rate": 0.00019426329799167742, "loss": 2.9678, "step": 37626 }, { "epoch": 1.84, "grad_norm": 0.6461591124534607, "learning_rate": 0.00019424888993701072, "loss": 2.944, "step": 37627 }, { "epoch": 1.84, "grad_norm": 0.6434966325759888, "learning_rate": 0.0001942344821608721, "loss": 2.8844, "step": 37628 }, { "epoch": 1.84, "grad_norm": 0.62734055519104, "learning_rate": 0.00019422007466329938, "loss": 3.0886, "step": 37629 }, { "epoch": 1.84, "grad_norm": 0.6289473176002502, "learning_rate": 0.00019420566744433074, "loss": 2.8589, "step": 37630 }, { "epoch": 1.84, "grad_norm": 0.6288855671882629, "learning_rate": 0.000194191260504004, "loss": 2.986, "step": 37631 }, { "epoch": 1.84, "grad_norm": 0.6580570936203003, "learning_rate": 0.000194176853842357, "loss": 3.0736, "step": 37632 }, { "epoch": 1.84, "grad_norm": 0.6353893280029297, "learning_rate": 0.00019416244745942797, "loss": 3.0207, "step": 37633 }, { "epoch": 1.84, "grad_norm": 0.6546821594238281, "learning_rate": 0.0001941480413552546, "loss": 2.9067, "step": 37634 }, { "epoch": 1.84, "grad_norm": 0.6692838668823242, "learning_rate": 0.00019413363552987496, "loss": 3.0202, "step": 37635 }, { "epoch": 1.84, "grad_norm": 0.6453374028205872, "learning_rate": 0.00019411922998332682, "loss": 3.0015, "step": 37636 }, { "epoch": 1.84, "grad_norm": 0.6587013006210327, "learning_rate": 0.00019410482471564838, "loss": 2.9682, "step": 37637 }, { "epoch": 1.84, "grad_norm": 0.6185652613639832, "learning_rate": 0.00019409041972687745, "loss": 2.9989, "step": 37638 }, { "epoch": 1.84, "grad_norm": 0.6610234379768372, "learning_rate": 0.0001940760150170519, "loss": 3.0296, "step": 37639 }, { "epoch": 1.84, "grad_norm": 0.610658586025238, "learning_rate": 0.00019406161058620983, "loss": 3.0412, "step": 37640 }, { "epoch": 1.84, "grad_norm": 0.6481900215148926, "learning_rate": 0.000194047206434389, "loss": 2.9011, "step": 37641 }, { "epoch": 1.84, "grad_norm": 0.6394829750061035, "learning_rate": 0.00019403280256162744, "loss": 3.0083, "step": 37642 }, { "epoch": 1.84, "grad_norm": 0.6193023920059204, "learning_rate": 0.0001940183989679632, "loss": 3.0251, "step": 37643 }, { "epoch": 1.84, "grad_norm": 0.6388663053512573, "learning_rate": 0.00019400399565343393, "loss": 3.0921, "step": 37644 }, { "epoch": 1.84, "grad_norm": 0.6341210603713989, "learning_rate": 0.00019398959261807794, "loss": 3.1424, "step": 37645 }, { "epoch": 1.84, "grad_norm": 0.662173330783844, "learning_rate": 0.00019397518986193277, "loss": 2.9784, "step": 37646 }, { "epoch": 1.84, "grad_norm": 0.6525291800498962, "learning_rate": 0.00019396078738503659, "loss": 2.8043, "step": 37647 }, { "epoch": 1.85, "grad_norm": 0.6806787252426147, "learning_rate": 0.00019394638518742733, "loss": 2.8409, "step": 37648 }, { "epoch": 1.85, "grad_norm": 0.6746661067008972, "learning_rate": 0.0001939319832691429, "loss": 3.0212, "step": 37649 }, { "epoch": 1.85, "grad_norm": 0.6305448412895203, "learning_rate": 0.00019391758163022125, "loss": 3.1187, "step": 37650 }, { "epoch": 1.85, "grad_norm": 0.6361280679702759, "learning_rate": 0.00019390318027070013, "loss": 3.0798, "step": 37651 }, { "epoch": 1.85, "grad_norm": 0.6559592485427856, "learning_rate": 0.0001938887791906177, "loss": 3.1859, "step": 37652 }, { "epoch": 1.85, "grad_norm": 0.6498610973358154, "learning_rate": 0.00019387437839001184, "loss": 3.1285, "step": 37653 }, { "epoch": 1.85, "grad_norm": 0.6713485717773438, "learning_rate": 0.0001938599778689203, "loss": 2.8758, "step": 37654 }, { "epoch": 1.85, "grad_norm": 0.6473253965377808, "learning_rate": 0.00019384557762738134, "loss": 2.882, "step": 37655 }, { "epoch": 1.85, "grad_norm": 0.6119524240493774, "learning_rate": 0.0001938311776654326, "loss": 2.7755, "step": 37656 }, { "epoch": 1.85, "grad_norm": 0.6408953070640564, "learning_rate": 0.00019381677798311207, "loss": 3.2346, "step": 37657 }, { "epoch": 1.85, "grad_norm": 0.6806536912918091, "learning_rate": 0.0001938023785804578, "loss": 3.0154, "step": 37658 }, { "epoch": 1.85, "grad_norm": 0.6582728028297424, "learning_rate": 0.00019378797945750756, "loss": 2.8902, "step": 37659 }, { "epoch": 1.85, "grad_norm": 0.6802835464477539, "learning_rate": 0.00019377358061429943, "loss": 3.0137, "step": 37660 }, { "epoch": 1.85, "grad_norm": 0.6308269500732422, "learning_rate": 0.00019375918205087112, "loss": 3.1318, "step": 37661 }, { "epoch": 1.85, "grad_norm": 0.7181369662284851, "learning_rate": 0.00019374478376726074, "loss": 3.1762, "step": 37662 }, { "epoch": 1.85, "grad_norm": 0.6746264100074768, "learning_rate": 0.0001937303857635062, "loss": 2.9109, "step": 37663 }, { "epoch": 1.85, "grad_norm": 0.653678834438324, "learning_rate": 0.00019371598803964523, "loss": 2.9796, "step": 37664 }, { "epoch": 1.85, "grad_norm": 0.6139053106307983, "learning_rate": 0.00019370159059571608, "loss": 2.995, "step": 37665 }, { "epoch": 1.85, "grad_norm": 0.6195376515388489, "learning_rate": 0.0001936871934317563, "loss": 2.9449, "step": 37666 }, { "epoch": 1.85, "grad_norm": 0.6081912517547607, "learning_rate": 0.0001936727965478041, "loss": 3.0824, "step": 37667 }, { "epoch": 1.85, "grad_norm": 0.7261514067649841, "learning_rate": 0.0001936583999438973, "loss": 3.1328, "step": 37668 }, { "epoch": 1.85, "grad_norm": 0.6595963835716248, "learning_rate": 0.0001936440036200737, "loss": 3.1274, "step": 37669 }, { "epoch": 1.85, "grad_norm": 0.6450942158699036, "learning_rate": 0.00019362960757637147, "loss": 3.0672, "step": 37670 }, { "epoch": 1.85, "grad_norm": 0.6324230432510376, "learning_rate": 0.00019361521181282833, "loss": 3.0313, "step": 37671 }, { "epoch": 1.85, "grad_norm": 0.6502272486686707, "learning_rate": 0.00019360081632948216, "loss": 3.1039, "step": 37672 }, { "epoch": 1.85, "grad_norm": 0.6629947423934937, "learning_rate": 0.0001935864211263711, "loss": 2.9381, "step": 37673 }, { "epoch": 1.85, "grad_norm": 0.6200689077377319, "learning_rate": 0.00019357202620353284, "loss": 3.0525, "step": 37674 }, { "epoch": 1.85, "grad_norm": 0.6390634775161743, "learning_rate": 0.00019355763156100544, "loss": 3.0149, "step": 37675 }, { "epoch": 1.85, "grad_norm": 0.6882568597793579, "learning_rate": 0.00019354323719882667, "loss": 2.9673, "step": 37676 }, { "epoch": 1.85, "grad_norm": 0.6751754283905029, "learning_rate": 0.00019352884311703457, "loss": 3.0688, "step": 37677 }, { "epoch": 1.85, "grad_norm": 0.6495875716209412, "learning_rate": 0.00019351444931566707, "loss": 3.0199, "step": 37678 }, { "epoch": 1.85, "grad_norm": 0.6434128880500793, "learning_rate": 0.00019350005579476186, "loss": 2.9602, "step": 37679 }, { "epoch": 1.85, "grad_norm": 0.6609219312667847, "learning_rate": 0.0001934856625543572, "loss": 3.1063, "step": 37680 }, { "epoch": 1.85, "grad_norm": 0.6295854449272156, "learning_rate": 0.00019347126959449068, "loss": 2.9994, "step": 37681 }, { "epoch": 1.85, "grad_norm": 0.6681686639785767, "learning_rate": 0.00019345687691520028, "loss": 3.0875, "step": 37682 }, { "epoch": 1.85, "grad_norm": 0.7291771173477173, "learning_rate": 0.00019344248451652412, "loss": 3.2816, "step": 37683 }, { "epoch": 1.85, "grad_norm": 0.6248905062675476, "learning_rate": 0.00019342809239849987, "loss": 2.9861, "step": 37684 }, { "epoch": 1.85, "grad_norm": 0.6470739841461182, "learning_rate": 0.00019341370056116556, "loss": 2.7754, "step": 37685 }, { "epoch": 1.85, "grad_norm": 0.6730750203132629, "learning_rate": 0.00019339930900455894, "loss": 2.8785, "step": 37686 }, { "epoch": 1.85, "grad_norm": 0.6697092652320862, "learning_rate": 0.00019338491772871806, "loss": 3.0509, "step": 37687 }, { "epoch": 1.85, "grad_norm": 0.6595104336738586, "learning_rate": 0.00019337052673368087, "loss": 3.1409, "step": 37688 }, { "epoch": 1.85, "grad_norm": 0.6694124937057495, "learning_rate": 0.00019335613601948511, "loss": 3.0137, "step": 37689 }, { "epoch": 1.85, "grad_norm": 0.649567186832428, "learning_rate": 0.00019334174558616887, "loss": 3.1952, "step": 37690 }, { "epoch": 1.85, "grad_norm": 0.6771290302276611, "learning_rate": 0.0001933273554337698, "loss": 2.8044, "step": 37691 }, { "epoch": 1.85, "grad_norm": 0.6197347044944763, "learning_rate": 0.00019331296556232598, "loss": 3.1106, "step": 37692 }, { "epoch": 1.85, "grad_norm": 0.6491581797599792, "learning_rate": 0.0001932985759718754, "loss": 3.0107, "step": 37693 }, { "epoch": 1.85, "grad_norm": 0.6176472902297974, "learning_rate": 0.00019328418666245568, "loss": 2.936, "step": 37694 }, { "epoch": 1.85, "grad_norm": 0.6549054980278015, "learning_rate": 0.00019326979763410501, "loss": 2.8333, "step": 37695 }, { "epoch": 1.85, "grad_norm": 0.6261773109436035, "learning_rate": 0.0001932554088868611, "loss": 2.8714, "step": 37696 }, { "epoch": 1.85, "grad_norm": 0.6560527086257935, "learning_rate": 0.00019324102042076176, "loss": 2.9073, "step": 37697 }, { "epoch": 1.85, "grad_norm": 0.6137068867683411, "learning_rate": 0.00019322663223584523, "loss": 2.9635, "step": 37698 }, { "epoch": 1.85, "grad_norm": 0.71515953540802, "learning_rate": 0.0001932122443321491, "loss": 2.8384, "step": 37699 }, { "epoch": 1.85, "grad_norm": 0.6288661360740662, "learning_rate": 0.00019319785670971143, "loss": 2.9895, "step": 37700 }, { "epoch": 1.85, "grad_norm": 0.6185125112533569, "learning_rate": 0.00019318346936856998, "loss": 3.028, "step": 37701 }, { "epoch": 1.85, "grad_norm": 0.6613661646842957, "learning_rate": 0.00019316908230876276, "loss": 3.1521, "step": 37702 }, { "epoch": 1.85, "grad_norm": 0.6700349450111389, "learning_rate": 0.00019315469553032766, "loss": 2.9103, "step": 37703 }, { "epoch": 1.85, "grad_norm": 0.6402429938316345, "learning_rate": 0.00019314030903330238, "loss": 3.1274, "step": 37704 }, { "epoch": 1.85, "grad_norm": 0.6753808856010437, "learning_rate": 0.00019312592281772509, "loss": 2.8903, "step": 37705 }, { "epoch": 1.85, "grad_norm": 0.6379236578941345, "learning_rate": 0.00019311153688363346, "loss": 3.0307, "step": 37706 }, { "epoch": 1.85, "grad_norm": 0.6957564353942871, "learning_rate": 0.00019309715123106558, "loss": 2.9885, "step": 37707 }, { "epoch": 1.85, "grad_norm": 0.694107711315155, "learning_rate": 0.00019308276586005915, "loss": 3.096, "step": 37708 }, { "epoch": 1.85, "grad_norm": 0.7162430882453918, "learning_rate": 0.00019306838077065208, "loss": 2.9719, "step": 37709 }, { "epoch": 1.85, "grad_norm": 0.6701442003250122, "learning_rate": 0.0001930539959628825, "loss": 3.0133, "step": 37710 }, { "epoch": 1.85, "grad_norm": 0.6491703391075134, "learning_rate": 0.00019303961143678797, "loss": 2.9792, "step": 37711 }, { "epoch": 1.85, "grad_norm": 0.6918108463287354, "learning_rate": 0.00019302522719240664, "loss": 2.9754, "step": 37712 }, { "epoch": 1.85, "grad_norm": 0.6854971647262573, "learning_rate": 0.00019301084322977612, "loss": 3.0784, "step": 37713 }, { "epoch": 1.85, "grad_norm": 0.6159098148345947, "learning_rate": 0.00019299645954893452, "loss": 2.9788, "step": 37714 }, { "epoch": 1.85, "grad_norm": 0.6346889138221741, "learning_rate": 0.0001929820761499197, "loss": 2.8104, "step": 37715 }, { "epoch": 1.85, "grad_norm": 0.6619777679443359, "learning_rate": 0.0001929676930327694, "loss": 3.0457, "step": 37716 }, { "epoch": 1.85, "grad_norm": 0.6497694253921509, "learning_rate": 0.00019295331019752166, "loss": 3.2556, "step": 37717 }, { "epoch": 1.85, "grad_norm": 0.6476145386695862, "learning_rate": 0.0001929389276442143, "loss": 2.798, "step": 37718 }, { "epoch": 1.85, "grad_norm": 0.6143190264701843, "learning_rate": 0.00019292454537288514, "loss": 3.0311, "step": 37719 }, { "epoch": 1.85, "grad_norm": 0.635507345199585, "learning_rate": 0.00019291016338357218, "loss": 2.9349, "step": 37720 }, { "epoch": 1.85, "grad_norm": 0.6005894541740417, "learning_rate": 0.00019289578167631322, "loss": 3.1017, "step": 37721 }, { "epoch": 1.85, "grad_norm": 0.6846247315406799, "learning_rate": 0.00019288140025114622, "loss": 3.0524, "step": 37722 }, { "epoch": 1.85, "grad_norm": 0.6498640179634094, "learning_rate": 0.0001928670191081089, "loss": 3.1925, "step": 37723 }, { "epoch": 1.85, "grad_norm": 0.6696533560752869, "learning_rate": 0.00019285263824723927, "loss": 2.9829, "step": 37724 }, { "epoch": 1.85, "grad_norm": 0.6601403951644897, "learning_rate": 0.00019283825766857522, "loss": 3.0455, "step": 37725 }, { "epoch": 1.85, "grad_norm": 0.650351345539093, "learning_rate": 0.00019282387737215448, "loss": 3.0277, "step": 37726 }, { "epoch": 1.85, "grad_norm": 0.5891628265380859, "learning_rate": 0.0001928094973580151, "loss": 2.8122, "step": 37727 }, { "epoch": 1.85, "grad_norm": 0.7312580943107605, "learning_rate": 0.00019279511762619478, "loss": 2.9223, "step": 37728 }, { "epoch": 1.85, "grad_norm": 0.6708784103393555, "learning_rate": 0.00019278073817673155, "loss": 3.0713, "step": 37729 }, { "epoch": 1.85, "grad_norm": 0.6447456479072571, "learning_rate": 0.0001927663590096633, "loss": 2.892, "step": 37730 }, { "epoch": 1.85, "grad_norm": 0.6298274993896484, "learning_rate": 0.00019275198012502767, "loss": 3.187, "step": 37731 }, { "epoch": 1.85, "grad_norm": 0.683275043964386, "learning_rate": 0.0001927376015228628, "loss": 3.1043, "step": 37732 }, { "epoch": 1.85, "grad_norm": 0.6236386299133301, "learning_rate": 0.0001927232232032064, "loss": 3.0443, "step": 37733 }, { "epoch": 1.85, "grad_norm": 0.6350182294845581, "learning_rate": 0.00019270884516609633, "loss": 2.9401, "step": 37734 }, { "epoch": 1.85, "grad_norm": 0.6639793515205383, "learning_rate": 0.0001926944674115706, "loss": 3.0851, "step": 37735 }, { "epoch": 1.85, "grad_norm": 0.611555814743042, "learning_rate": 0.000192680089939667, "loss": 3.0449, "step": 37736 }, { "epoch": 1.85, "grad_norm": 0.7056550979614258, "learning_rate": 0.0001926657127504234, "loss": 3.1318, "step": 37737 }, { "epoch": 1.85, "grad_norm": 0.6342179775238037, "learning_rate": 0.00019265133584387754, "loss": 2.7571, "step": 37738 }, { "epoch": 1.85, "grad_norm": 0.6653878688812256, "learning_rate": 0.00019263695922006746, "loss": 3.1323, "step": 37739 }, { "epoch": 1.85, "grad_norm": 0.6867647171020508, "learning_rate": 0.00019262258287903105, "loss": 3.0003, "step": 37740 }, { "epoch": 1.85, "grad_norm": 0.6258794069290161, "learning_rate": 0.00019260820682080596, "loss": 3.0193, "step": 37741 }, { "epoch": 1.85, "grad_norm": 0.6566473841667175, "learning_rate": 0.00019259383104543027, "loss": 3.1575, "step": 37742 }, { "epoch": 1.85, "grad_norm": 0.6476341485977173, "learning_rate": 0.0001925794555529417, "loss": 3.0576, "step": 37743 }, { "epoch": 1.85, "grad_norm": 0.6693728566169739, "learning_rate": 0.00019256508034337817, "loss": 2.9551, "step": 37744 }, { "epoch": 1.85, "grad_norm": 0.623833954334259, "learning_rate": 0.0001925507054167776, "loss": 3.0357, "step": 37745 }, { "epoch": 1.85, "grad_norm": 0.6552188992500305, "learning_rate": 0.00019253633077317772, "loss": 2.9078, "step": 37746 }, { "epoch": 1.85, "grad_norm": 0.6924782991409302, "learning_rate": 0.00019252195641261663, "loss": 2.8484, "step": 37747 }, { "epoch": 1.85, "grad_norm": 0.6190445423126221, "learning_rate": 0.00019250758233513185, "loss": 3.0351, "step": 37748 }, { "epoch": 1.85, "grad_norm": 0.7065290212631226, "learning_rate": 0.0001924932085407614, "loss": 3.1009, "step": 37749 }, { "epoch": 1.85, "grad_norm": 0.6103439331054688, "learning_rate": 0.00019247883502954326, "loss": 3.1459, "step": 37750 }, { "epoch": 1.85, "grad_norm": 0.6249575018882751, "learning_rate": 0.00019246446180151515, "loss": 3.0093, "step": 37751 }, { "epoch": 1.85, "grad_norm": 0.6286122798919678, "learning_rate": 0.00019245008885671494, "loss": 3.0115, "step": 37752 }, { "epoch": 1.85, "grad_norm": 0.6574020385742188, "learning_rate": 0.00019243571619518044, "loss": 2.8001, "step": 37753 }, { "epoch": 1.85, "grad_norm": 0.6263217329978943, "learning_rate": 0.00019242134381694962, "loss": 3.0588, "step": 37754 }, { "epoch": 1.85, "grad_norm": 0.6395589709281921, "learning_rate": 0.0001924069717220603, "loss": 2.9058, "step": 37755 }, { "epoch": 1.85, "grad_norm": 0.6206561326980591, "learning_rate": 0.00019239259991055022, "loss": 3.1611, "step": 37756 }, { "epoch": 1.85, "grad_norm": 0.6132239699363708, "learning_rate": 0.00019237822838245744, "loss": 2.9598, "step": 37757 }, { "epoch": 1.85, "grad_norm": 0.6488093137741089, "learning_rate": 0.00019236385713781965, "loss": 3.0767, "step": 37758 }, { "epoch": 1.85, "grad_norm": 0.6497656106948853, "learning_rate": 0.00019234948617667463, "loss": 3.0172, "step": 37759 }, { "epoch": 1.85, "grad_norm": 0.6418945789337158, "learning_rate": 0.00019233511549906053, "loss": 2.9671, "step": 37760 }, { "epoch": 1.85, "grad_norm": 0.6646747589111328, "learning_rate": 0.0001923207451050149, "loss": 3.0554, "step": 37761 }, { "epoch": 1.85, "grad_norm": 0.5927530527114868, "learning_rate": 0.0001923063749945758, "loss": 2.8786, "step": 37762 }, { "epoch": 1.85, "grad_norm": 0.6495875716209412, "learning_rate": 0.00019229200516778083, "loss": 3.0241, "step": 37763 }, { "epoch": 1.85, "grad_norm": 0.6332212686538696, "learning_rate": 0.00019227763562466808, "loss": 3.0432, "step": 37764 }, { "epoch": 1.85, "grad_norm": 0.6797601580619812, "learning_rate": 0.00019226326636527538, "loss": 2.9573, "step": 37765 }, { "epoch": 1.85, "grad_norm": 0.6078984141349792, "learning_rate": 0.00019224889738964033, "loss": 2.9746, "step": 37766 }, { "epoch": 1.85, "grad_norm": 0.6482206583023071, "learning_rate": 0.0001922345286978011, "loss": 3.1526, "step": 37767 }, { "epoch": 1.85, "grad_norm": 0.6135627627372742, "learning_rate": 0.00019222016028979527, "loss": 2.9846, "step": 37768 }, { "epoch": 1.85, "grad_norm": 0.6265912055969238, "learning_rate": 0.00019220579216566082, "loss": 3.0213, "step": 37769 }, { "epoch": 1.85, "grad_norm": 0.648912787437439, "learning_rate": 0.00019219142432543566, "loss": 3.17, "step": 37770 }, { "epoch": 1.85, "grad_norm": 0.6402798295021057, "learning_rate": 0.00019217705676915744, "loss": 3.302, "step": 37771 }, { "epoch": 1.85, "grad_norm": 0.6315393447875977, "learning_rate": 0.00019216268949686415, "loss": 3.0794, "step": 37772 }, { "epoch": 1.85, "grad_norm": 0.6344591975212097, "learning_rate": 0.00019214832250859357, "loss": 3.0817, "step": 37773 }, { "epoch": 1.85, "grad_norm": 0.6381920576095581, "learning_rate": 0.0001921339558043835, "loss": 3.1171, "step": 37774 }, { "epoch": 1.85, "grad_norm": 0.6676338315010071, "learning_rate": 0.00019211958938427192, "loss": 3.1187, "step": 37775 }, { "epoch": 1.85, "grad_norm": 0.6421663761138916, "learning_rate": 0.00019210522324829656, "loss": 3.0546, "step": 37776 }, { "epoch": 1.85, "grad_norm": 0.6194716691970825, "learning_rate": 0.00019209085739649529, "loss": 3.1053, "step": 37777 }, { "epoch": 1.85, "grad_norm": 0.6466617584228516, "learning_rate": 0.00019207649182890584, "loss": 3.0838, "step": 37778 }, { "epoch": 1.85, "grad_norm": 0.6185702085494995, "learning_rate": 0.00019206212654556619, "loss": 3.0664, "step": 37779 }, { "epoch": 1.85, "grad_norm": 0.6287671327590942, "learning_rate": 0.0001920477615465142, "loss": 3.0368, "step": 37780 }, { "epoch": 1.85, "grad_norm": 0.6562327146530151, "learning_rate": 0.00019203339683178755, "loss": 2.8597, "step": 37781 }, { "epoch": 1.85, "grad_norm": 0.609699010848999, "learning_rate": 0.0001920190324014242, "loss": 3.0641, "step": 37782 }, { "epoch": 1.85, "grad_norm": 0.631375789642334, "learning_rate": 0.00019200466825546189, "loss": 2.9766, "step": 37783 }, { "epoch": 1.85, "grad_norm": 0.6531733870506287, "learning_rate": 0.0001919903043939386, "loss": 2.9142, "step": 37784 }, { "epoch": 1.85, "grad_norm": 0.6809682846069336, "learning_rate": 0.0001919759408168919, "loss": 2.7791, "step": 37785 }, { "epoch": 1.85, "grad_norm": 0.6292451024055481, "learning_rate": 0.00019196157752435986, "loss": 3.1373, "step": 37786 }, { "epoch": 1.85, "grad_norm": 0.6322467923164368, "learning_rate": 0.0001919472145163803, "loss": 2.9073, "step": 37787 }, { "epoch": 1.85, "grad_norm": 0.6325113773345947, "learning_rate": 0.00019193285179299084, "loss": 3.0091, "step": 37788 }, { "epoch": 1.85, "grad_norm": 0.6427119374275208, "learning_rate": 0.0001919184893542296, "loss": 2.873, "step": 37789 }, { "epoch": 1.85, "grad_norm": 0.6506245732307434, "learning_rate": 0.0001919041272001341, "loss": 2.9152, "step": 37790 }, { "epoch": 1.85, "grad_norm": 0.7006027698516846, "learning_rate": 0.0001918897653307424, "loss": 3.2097, "step": 37791 }, { "epoch": 1.85, "grad_norm": 0.6487759947776794, "learning_rate": 0.00019187540374609235, "loss": 3.1275, "step": 37792 }, { "epoch": 1.85, "grad_norm": 0.6356369256973267, "learning_rate": 0.00019186104244622152, "loss": 2.8266, "step": 37793 }, { "epoch": 1.85, "grad_norm": 0.6854275465011597, "learning_rate": 0.000191846681431168, "loss": 2.9221, "step": 37794 }, { "epoch": 1.85, "grad_norm": 0.6778642535209656, "learning_rate": 0.0001918323207009695, "loss": 3.0824, "step": 37795 }, { "epoch": 1.85, "grad_norm": 0.6329032182693481, "learning_rate": 0.00019181796025566375, "loss": 2.8184, "step": 37796 }, { "epoch": 1.85, "grad_norm": 0.6632543206214905, "learning_rate": 0.0001918036000952888, "loss": 2.7941, "step": 37797 }, { "epoch": 1.85, "grad_norm": 0.6179139614105225, "learning_rate": 0.00019178924021988225, "loss": 2.9767, "step": 37798 }, { "epoch": 1.85, "grad_norm": 0.6899917721748352, "learning_rate": 0.00019177488062948215, "loss": 3.0879, "step": 37799 }, { "epoch": 1.85, "grad_norm": 0.6436970829963684, "learning_rate": 0.00019176052132412598, "loss": 3.2205, "step": 37800 }, { "epoch": 1.85, "grad_norm": 0.6844620108604431, "learning_rate": 0.00019174616230385188, "loss": 3.1832, "step": 37801 }, { "epoch": 1.85, "grad_norm": 0.6446673274040222, "learning_rate": 0.0001917318035686976, "loss": 3.1017, "step": 37802 }, { "epoch": 1.85, "grad_norm": 0.676476001739502, "learning_rate": 0.0001917174451187008, "loss": 3.0669, "step": 37803 }, { "epoch": 1.85, "grad_norm": 0.6377735733985901, "learning_rate": 0.00019170308695389956, "loss": 3.1079, "step": 37804 }, { "epoch": 1.85, "grad_norm": 0.6439332962036133, "learning_rate": 0.00019168872907433142, "loss": 2.9107, "step": 37805 }, { "epoch": 1.85, "grad_norm": 0.6758326888084412, "learning_rate": 0.00019167437148003428, "loss": 2.9954, "step": 37806 }, { "epoch": 1.85, "grad_norm": 0.6786984801292419, "learning_rate": 0.00019166001417104616, "loss": 3.1813, "step": 37807 }, { "epoch": 1.85, "grad_norm": 0.6583569645881653, "learning_rate": 0.00019164565714740458, "loss": 3.01, "step": 37808 }, { "epoch": 1.85, "grad_norm": 0.6564452648162842, "learning_rate": 0.0001916313004091476, "loss": 2.9767, "step": 37809 }, { "epoch": 1.85, "grad_norm": 0.6123052835464478, "learning_rate": 0.00019161694395631283, "loss": 2.8546, "step": 37810 }, { "epoch": 1.85, "grad_norm": 0.6195050477981567, "learning_rate": 0.00019160258778893813, "loss": 2.8823, "step": 37811 }, { "epoch": 1.85, "grad_norm": 0.6337482333183289, "learning_rate": 0.00019158823190706148, "loss": 2.9034, "step": 37812 }, { "epoch": 1.85, "grad_norm": 0.6698657274246216, "learning_rate": 0.0001915738763107205, "loss": 3.1051, "step": 37813 }, { "epoch": 1.85, "grad_norm": 0.6584785580635071, "learning_rate": 0.00019155952099995311, "loss": 3.0516, "step": 37814 }, { "epoch": 1.85, "grad_norm": 0.715290367603302, "learning_rate": 0.00019154516597479697, "loss": 3.1058, "step": 37815 }, { "epoch": 1.85, "grad_norm": 0.6434201598167419, "learning_rate": 0.00019153081123529005, "loss": 2.9279, "step": 37816 }, { "epoch": 1.85, "grad_norm": 0.6424633264541626, "learning_rate": 0.00019151645678147015, "loss": 3.0143, "step": 37817 }, { "epoch": 1.85, "grad_norm": 0.6202142238616943, "learning_rate": 0.00019150210261337488, "loss": 3.1368, "step": 37818 }, { "epoch": 1.85, "grad_norm": 0.6413096189498901, "learning_rate": 0.00019148774873104237, "loss": 3.1654, "step": 37819 }, { "epoch": 1.85, "grad_norm": 0.6295573711395264, "learning_rate": 0.00019147339513451016, "loss": 3.0959, "step": 37820 }, { "epoch": 1.85, "grad_norm": 0.6364253163337708, "learning_rate": 0.00019145904182381606, "loss": 3.0315, "step": 37821 }, { "epoch": 1.85, "grad_norm": 0.6383337378501892, "learning_rate": 0.00019144468879899807, "loss": 2.9825, "step": 37822 }, { "epoch": 1.85, "grad_norm": 0.6247068643569946, "learning_rate": 0.00019143033606009384, "loss": 2.9444, "step": 37823 }, { "epoch": 1.85, "grad_norm": 0.6323005557060242, "learning_rate": 0.00019141598360714128, "loss": 3.0731, "step": 37824 }, { "epoch": 1.85, "grad_norm": 0.6761791706085205, "learning_rate": 0.00019140163144017798, "loss": 2.9698, "step": 37825 }, { "epoch": 1.85, "grad_norm": 0.710123598575592, "learning_rate": 0.00019138727955924197, "loss": 2.9033, "step": 37826 }, { "epoch": 1.85, "grad_norm": 0.6326896548271179, "learning_rate": 0.000191372927964371, "loss": 3.2679, "step": 37827 }, { "epoch": 1.85, "grad_norm": 0.6370804309844971, "learning_rate": 0.00019135857665560272, "loss": 3.0771, "step": 37828 }, { "epoch": 1.85, "grad_norm": 0.8834513425827026, "learning_rate": 0.00019134422563297514, "loss": 2.8565, "step": 37829 }, { "epoch": 1.85, "grad_norm": 0.6568150520324707, "learning_rate": 0.00019132987489652584, "loss": 3.1293, "step": 37830 }, { "epoch": 1.85, "grad_norm": 0.6432253122329712, "learning_rate": 0.00019131552444629282, "loss": 2.9941, "step": 37831 }, { "epoch": 1.85, "grad_norm": 0.6338484883308411, "learning_rate": 0.00019130117428231384, "loss": 2.9888, "step": 37832 }, { "epoch": 1.85, "grad_norm": 0.6891887187957764, "learning_rate": 0.00019128682440462653, "loss": 3.0169, "step": 37833 }, { "epoch": 1.85, "grad_norm": 0.6474312543869019, "learning_rate": 0.00019127247481326895, "loss": 2.8492, "step": 37834 }, { "epoch": 1.85, "grad_norm": 0.6678059101104736, "learning_rate": 0.00019125812550827866, "loss": 3.1827, "step": 37835 }, { "epoch": 1.85, "grad_norm": 0.663590133190155, "learning_rate": 0.00019124377648969346, "loss": 3.037, "step": 37836 }, { "epoch": 1.85, "grad_norm": 0.7006127238273621, "learning_rate": 0.00019122942775755134, "loss": 2.9233, "step": 37837 }, { "epoch": 1.85, "grad_norm": 0.6066510677337646, "learning_rate": 0.00019121507931188993, "loss": 3.0044, "step": 37838 }, { "epoch": 1.85, "grad_norm": 0.6588680744171143, "learning_rate": 0.00019120073115274716, "loss": 2.8884, "step": 37839 }, { "epoch": 1.85, "grad_norm": 0.6584458351135254, "learning_rate": 0.00019118638328016055, "loss": 3.0469, "step": 37840 }, { "epoch": 1.85, "grad_norm": 0.6320998668670654, "learning_rate": 0.00019117203569416814, "loss": 3.0939, "step": 37841 }, { "epoch": 1.85, "grad_norm": 0.699286162853241, "learning_rate": 0.00019115768839480772, "loss": 2.8838, "step": 37842 }, { "epoch": 1.85, "grad_norm": 0.6417528986930847, "learning_rate": 0.00019114334138211692, "loss": 2.9171, "step": 37843 }, { "epoch": 1.85, "grad_norm": 0.614177942276001, "learning_rate": 0.00019112899465613365, "loss": 2.8298, "step": 37844 }, { "epoch": 1.85, "grad_norm": 0.6882364153862, "learning_rate": 0.00019111464821689565, "loss": 2.9626, "step": 37845 }, { "epoch": 1.85, "grad_norm": 0.6353859305381775, "learning_rate": 0.0001911003020644406, "loss": 2.9776, "step": 37846 }, { "epoch": 1.85, "grad_norm": 0.6176462769508362, "learning_rate": 0.00019108595619880657, "loss": 3.1251, "step": 37847 }, { "epoch": 1.85, "grad_norm": 0.6521666049957275, "learning_rate": 0.00019107161062003106, "loss": 2.8021, "step": 37848 }, { "epoch": 1.85, "grad_norm": 0.6447446346282959, "learning_rate": 0.0001910572653281521, "loss": 3.2144, "step": 37849 }, { "epoch": 1.85, "grad_norm": 0.6715746521949768, "learning_rate": 0.00019104292032320712, "loss": 3.0234, "step": 37850 }, { "epoch": 1.85, "grad_norm": 0.6217811107635498, "learning_rate": 0.0001910285756052342, "loss": 2.7758, "step": 37851 }, { "epoch": 1.86, "grad_norm": 0.6977801322937012, "learning_rate": 0.0001910142311742711, "loss": 2.9053, "step": 37852 }, { "epoch": 1.86, "grad_norm": 0.6272152066230774, "learning_rate": 0.0001909998870303555, "loss": 3.087, "step": 37853 }, { "epoch": 1.86, "grad_norm": 0.6529848575592041, "learning_rate": 0.0001909855431735253, "loss": 3.0135, "step": 37854 }, { "epoch": 1.86, "grad_norm": 0.6610116362571716, "learning_rate": 0.00019097119960381805, "loss": 2.8011, "step": 37855 }, { "epoch": 1.86, "grad_norm": 0.6474286913871765, "learning_rate": 0.00019095685632127172, "loss": 3.1778, "step": 37856 }, { "epoch": 1.86, "grad_norm": 0.7384703755378723, "learning_rate": 0.00019094251332592418, "loss": 3.0711, "step": 37857 }, { "epoch": 1.86, "grad_norm": 0.668545126914978, "learning_rate": 0.0001909281706178129, "loss": 2.834, "step": 37858 }, { "epoch": 1.86, "grad_norm": 0.640242874622345, "learning_rate": 0.00019091382819697597, "loss": 3.1228, "step": 37859 }, { "epoch": 1.86, "grad_norm": 0.6718035936355591, "learning_rate": 0.00019089948606345095, "loss": 2.866, "step": 37860 }, { "epoch": 1.86, "grad_norm": 0.6487429141998291, "learning_rate": 0.00019088514421727572, "loss": 2.8989, "step": 37861 }, { "epoch": 1.86, "grad_norm": 0.6288765668869019, "learning_rate": 0.00019087080265848795, "loss": 2.8211, "step": 37862 }, { "epoch": 1.86, "grad_norm": 0.644413411617279, "learning_rate": 0.0001908564613871255, "loss": 3.0838, "step": 37863 }, { "epoch": 1.86, "grad_norm": 0.6302248239517212, "learning_rate": 0.00019084212040322622, "loss": 2.9669, "step": 37864 }, { "epoch": 1.86, "grad_norm": 0.6651326417922974, "learning_rate": 0.00019082777970682763, "loss": 2.8824, "step": 37865 }, { "epoch": 1.86, "grad_norm": 0.631876528263092, "learning_rate": 0.0001908134392979678, "loss": 3.0457, "step": 37866 }, { "epoch": 1.86, "grad_norm": 0.7018389105796814, "learning_rate": 0.00019079909917668432, "loss": 3.0692, "step": 37867 }, { "epoch": 1.86, "grad_norm": 0.6403344869613647, "learning_rate": 0.00019078475934301492, "loss": 2.8391, "step": 37868 }, { "epoch": 1.86, "grad_norm": 0.6221745014190674, "learning_rate": 0.00019077041979699753, "loss": 3.002, "step": 37869 }, { "epoch": 1.86, "grad_norm": 0.6578125357627869, "learning_rate": 0.00019075608053866974, "loss": 3.0516, "step": 37870 }, { "epoch": 1.86, "grad_norm": 0.6671814918518066, "learning_rate": 0.00019074174156806956, "loss": 2.9886, "step": 37871 }, { "epoch": 1.86, "grad_norm": 0.6524838805198669, "learning_rate": 0.00019072740288523452, "loss": 2.9684, "step": 37872 }, { "epoch": 1.86, "grad_norm": 0.6703388690948486, "learning_rate": 0.00019071306449020236, "loss": 2.9369, "step": 37873 }, { "epoch": 1.86, "grad_norm": 0.6536246538162231, "learning_rate": 0.00019069872638301116, "loss": 3.1842, "step": 37874 }, { "epoch": 1.86, "grad_norm": 0.6407559514045715, "learning_rate": 0.00019068438856369838, "loss": 3.1175, "step": 37875 }, { "epoch": 1.86, "grad_norm": 0.6341151595115662, "learning_rate": 0.00019067005103230196, "loss": 3.2322, "step": 37876 }, { "epoch": 1.86, "grad_norm": 0.6281828880310059, "learning_rate": 0.00019065571378885942, "loss": 2.9174, "step": 37877 }, { "epoch": 1.86, "grad_norm": 0.6292392611503601, "learning_rate": 0.0001906413768334088, "loss": 2.999, "step": 37878 }, { "epoch": 1.86, "grad_norm": 0.7081571221351624, "learning_rate": 0.00019062704016598777, "loss": 3.2075, "step": 37879 }, { "epoch": 1.86, "grad_norm": 0.6541411876678467, "learning_rate": 0.00019061270378663395, "loss": 3.3458, "step": 37880 }, { "epoch": 1.86, "grad_norm": 0.6368046402931213, "learning_rate": 0.00019059836769538534, "loss": 2.9417, "step": 37881 }, { "epoch": 1.86, "grad_norm": 0.6417937874794006, "learning_rate": 0.00019058403189227952, "loss": 3.0076, "step": 37882 }, { "epoch": 1.86, "grad_norm": 0.6951587200164795, "learning_rate": 0.00019056969637735422, "loss": 2.9381, "step": 37883 }, { "epoch": 1.86, "grad_norm": 0.6338977813720703, "learning_rate": 0.00019055536115064747, "loss": 3.0194, "step": 37884 }, { "epoch": 1.86, "grad_norm": 0.6625250577926636, "learning_rate": 0.0001905410262121967, "loss": 3.0054, "step": 37885 }, { "epoch": 1.86, "grad_norm": 0.6405186653137207, "learning_rate": 0.0001905266915620399, "loss": 3.0492, "step": 37886 }, { "epoch": 1.86, "grad_norm": 0.644585132598877, "learning_rate": 0.0001905123572002146, "loss": 2.828, "step": 37887 }, { "epoch": 1.86, "grad_norm": 0.6336441040039062, "learning_rate": 0.00019049802312675872, "loss": 2.9381, "step": 37888 }, { "epoch": 1.86, "grad_norm": 0.6756220459938049, "learning_rate": 0.00019048368934171004, "loss": 2.8513, "step": 37889 }, { "epoch": 1.86, "grad_norm": 0.6149240136146545, "learning_rate": 0.00019046935584510614, "loss": 2.9183, "step": 37890 }, { "epoch": 1.86, "grad_norm": 0.6757703423500061, "learning_rate": 0.00019045502263698497, "loss": 2.9603, "step": 37891 }, { "epoch": 1.86, "grad_norm": 0.6618746519088745, "learning_rate": 0.0001904406897173841, "loss": 2.9956, "step": 37892 }, { "epoch": 1.86, "grad_norm": 0.6471590995788574, "learning_rate": 0.0001904263570863414, "loss": 2.9915, "step": 37893 }, { "epoch": 1.86, "grad_norm": 0.6565477252006531, "learning_rate": 0.00019041202474389467, "loss": 2.9342, "step": 37894 }, { "epoch": 1.86, "grad_norm": 0.6662431955337524, "learning_rate": 0.00019039769269008148, "loss": 2.9803, "step": 37895 }, { "epoch": 1.86, "grad_norm": 0.6561885476112366, "learning_rate": 0.00019038336092493975, "loss": 3.0549, "step": 37896 }, { "epoch": 1.86, "grad_norm": 0.6919810771942139, "learning_rate": 0.0001903690294485071, "loss": 3.1325, "step": 37897 }, { "epoch": 1.86, "grad_norm": 0.6764747500419617, "learning_rate": 0.00019035469826082126, "loss": 2.9849, "step": 37898 }, { "epoch": 1.86, "grad_norm": 0.6264052987098694, "learning_rate": 0.00019034036736192018, "loss": 2.9931, "step": 37899 }, { "epoch": 1.86, "grad_norm": 0.652153491973877, "learning_rate": 0.00019032603675184142, "loss": 2.9467, "step": 37900 }, { "epoch": 1.86, "grad_norm": 0.6279745101928711, "learning_rate": 0.00019031170643062282, "loss": 3.1106, "step": 37901 }, { "epoch": 1.86, "grad_norm": 0.6377081871032715, "learning_rate": 0.00019029737639830193, "loss": 2.8446, "step": 37902 }, { "epoch": 1.86, "grad_norm": 0.6817088723182678, "learning_rate": 0.0001902830466549167, "loss": 2.9539, "step": 37903 }, { "epoch": 1.86, "grad_norm": 0.6729699969291687, "learning_rate": 0.0001902687172005049, "loss": 3.0809, "step": 37904 }, { "epoch": 1.86, "grad_norm": 0.680040180683136, "learning_rate": 0.00019025438803510405, "loss": 2.9381, "step": 37905 }, { "epoch": 1.86, "grad_norm": 0.6663704514503479, "learning_rate": 0.00019024005915875214, "loss": 3.1179, "step": 37906 }, { "epoch": 1.86, "grad_norm": 0.6278336048126221, "learning_rate": 0.00019022573057148675, "loss": 2.857, "step": 37907 }, { "epoch": 1.86, "grad_norm": 0.6568112373352051, "learning_rate": 0.0001902114022733456, "loss": 3.1337, "step": 37908 }, { "epoch": 1.86, "grad_norm": 0.6545429229736328, "learning_rate": 0.00019019707426436654, "loss": 3.0168, "step": 37909 }, { "epoch": 1.86, "grad_norm": 0.6433941125869751, "learning_rate": 0.0001901827465445872, "loss": 3.0279, "step": 37910 }, { "epoch": 1.86, "grad_norm": 0.6414997577667236, "learning_rate": 0.00019016841911404555, "loss": 3.0132, "step": 37911 }, { "epoch": 1.86, "grad_norm": 0.6626715660095215, "learning_rate": 0.00019015409197277892, "loss": 2.9186, "step": 37912 }, { "epoch": 1.86, "grad_norm": 0.6758489608764648, "learning_rate": 0.00019013976512082532, "loss": 2.8539, "step": 37913 }, { "epoch": 1.86, "grad_norm": 0.661674439907074, "learning_rate": 0.00019012543855822253, "loss": 3.1409, "step": 37914 }, { "epoch": 1.86, "grad_norm": 0.6630668640136719, "learning_rate": 0.00019011111228500814, "loss": 3.066, "step": 37915 }, { "epoch": 1.86, "grad_norm": 0.6342833638191223, "learning_rate": 0.00019009678630121997, "loss": 2.8859, "step": 37916 }, { "epoch": 1.86, "grad_norm": 0.6333215832710266, "learning_rate": 0.00019008246060689562, "loss": 3.1166, "step": 37917 }, { "epoch": 1.86, "grad_norm": 0.6316998600959778, "learning_rate": 0.00019006813520207295, "loss": 2.9157, "step": 37918 }, { "epoch": 1.86, "grad_norm": 0.6606517434120178, "learning_rate": 0.00019005381008678973, "loss": 2.9766, "step": 37919 }, { "epoch": 1.86, "grad_norm": 0.6711786985397339, "learning_rate": 0.0001900394852610835, "loss": 3.1096, "step": 37920 }, { "epoch": 1.86, "grad_norm": 0.7660440802574158, "learning_rate": 0.00019002516072499223, "loss": 2.9022, "step": 37921 }, { "epoch": 1.86, "grad_norm": 0.7094885110855103, "learning_rate": 0.00019001083647855347, "loss": 2.9963, "step": 37922 }, { "epoch": 1.86, "grad_norm": 0.6385478377342224, "learning_rate": 0.00018999651252180486, "loss": 3.0875, "step": 37923 }, { "epoch": 1.86, "grad_norm": 0.632442831993103, "learning_rate": 0.00018998218885478446, "loss": 3.0835, "step": 37924 }, { "epoch": 1.86, "grad_norm": 0.6296939849853516, "learning_rate": 0.00018996786547752974, "loss": 3.0832, "step": 37925 }, { "epoch": 1.86, "grad_norm": 0.6628495454788208, "learning_rate": 0.00018995354239007852, "loss": 3.208, "step": 37926 }, { "epoch": 1.86, "grad_norm": 0.6310528516769409, "learning_rate": 0.0001899392195924684, "loss": 3.0514, "step": 37927 }, { "epoch": 1.86, "grad_norm": 0.6705329418182373, "learning_rate": 0.00018992489708473723, "loss": 2.9223, "step": 37928 }, { "epoch": 1.86, "grad_norm": 0.6532049179077148, "learning_rate": 0.00018991057486692276, "loss": 2.8727, "step": 37929 }, { "epoch": 1.86, "grad_norm": 0.6473034024238586, "learning_rate": 0.00018989625293906252, "loss": 2.9542, "step": 37930 }, { "epoch": 1.86, "grad_norm": 0.7011779546737671, "learning_rate": 0.00018988193130119448, "loss": 3.082, "step": 37931 }, { "epoch": 1.86, "grad_norm": 0.6515766382217407, "learning_rate": 0.00018986760995335614, "loss": 2.863, "step": 37932 }, { "epoch": 1.86, "grad_norm": 0.6576693654060364, "learning_rate": 0.0001898532888955854, "loss": 2.8862, "step": 37933 }, { "epoch": 1.86, "grad_norm": 0.6526912450790405, "learning_rate": 0.0001898389681279199, "loss": 2.8443, "step": 37934 }, { "epoch": 1.86, "grad_norm": 0.6956529021263123, "learning_rate": 0.00018982464765039726, "loss": 3.0819, "step": 37935 }, { "epoch": 1.86, "grad_norm": 0.6562744975090027, "learning_rate": 0.00018981032746305542, "loss": 2.8812, "step": 37936 }, { "epoch": 1.86, "grad_norm": 0.6449558138847351, "learning_rate": 0.00018979600756593193, "loss": 2.9919, "step": 37937 }, { "epoch": 1.86, "grad_norm": 0.6720443367958069, "learning_rate": 0.00018978168795906458, "loss": 3.1166, "step": 37938 }, { "epoch": 1.86, "grad_norm": 0.6303071975708008, "learning_rate": 0.00018976736864249094, "loss": 3.0804, "step": 37939 }, { "epoch": 1.86, "grad_norm": 0.6402961015701294, "learning_rate": 0.00018975304961624893, "loss": 2.8876, "step": 37940 }, { "epoch": 1.86, "grad_norm": 0.654398500919342, "learning_rate": 0.00018973873088037616, "loss": 2.8619, "step": 37941 }, { "epoch": 1.86, "grad_norm": 0.6452508568763733, "learning_rate": 0.00018972441243491028, "loss": 2.9692, "step": 37942 }, { "epoch": 1.86, "grad_norm": 0.6764039397239685, "learning_rate": 0.00018971009427988918, "loss": 3.1798, "step": 37943 }, { "epoch": 1.86, "grad_norm": 0.6741904616355896, "learning_rate": 0.00018969577641535042, "loss": 3.0523, "step": 37944 }, { "epoch": 1.86, "grad_norm": 0.6469334959983826, "learning_rate": 0.00018968145884133169, "loss": 3.0406, "step": 37945 }, { "epoch": 1.86, "grad_norm": 0.678054690361023, "learning_rate": 0.0001896671415578709, "loss": 3.0538, "step": 37946 }, { "epoch": 1.86, "grad_norm": 0.678111732006073, "learning_rate": 0.00018965282456500553, "loss": 3.0945, "step": 37947 }, { "epoch": 1.86, "grad_norm": 0.6882033348083496, "learning_rate": 0.00018963850786277346, "loss": 3.1515, "step": 37948 }, { "epoch": 1.86, "grad_norm": 0.6521106958389282, "learning_rate": 0.0001896241914512122, "loss": 2.9909, "step": 37949 }, { "epoch": 1.86, "grad_norm": 0.6649858355522156, "learning_rate": 0.00018960987533035963, "loss": 2.954, "step": 37950 }, { "epoch": 1.86, "grad_norm": 0.6313856244087219, "learning_rate": 0.0001895955595002535, "loss": 3.0676, "step": 37951 }, { "epoch": 1.86, "grad_norm": 0.6400474309921265, "learning_rate": 0.0001895812439609313, "loss": 3.0938, "step": 37952 }, { "epoch": 1.86, "grad_norm": 0.6551691293716431, "learning_rate": 0.0001895669287124309, "loss": 2.9153, "step": 37953 }, { "epoch": 1.86, "grad_norm": 0.6542170643806458, "learning_rate": 0.0001895526137547899, "loss": 3.0044, "step": 37954 }, { "epoch": 1.86, "grad_norm": 0.6334107518196106, "learning_rate": 0.00018953829908804608, "loss": 2.9579, "step": 37955 }, { "epoch": 1.86, "grad_norm": 0.6488938927650452, "learning_rate": 0.0001895239847122372, "loss": 3.2192, "step": 37956 }, { "epoch": 1.86, "grad_norm": 0.7389078736305237, "learning_rate": 0.00018950967062740076, "loss": 2.7784, "step": 37957 }, { "epoch": 1.86, "grad_norm": 0.6441724300384521, "learning_rate": 0.0001894953568335747, "loss": 3.2414, "step": 37958 }, { "epoch": 1.86, "grad_norm": 0.6733617186546326, "learning_rate": 0.00018948104333079658, "loss": 3.0559, "step": 37959 }, { "epoch": 1.86, "grad_norm": 0.6783096790313721, "learning_rate": 0.00018946673011910404, "loss": 2.9956, "step": 37960 }, { "epoch": 1.86, "grad_norm": 0.6289350390434265, "learning_rate": 0.00018945241719853497, "loss": 3.144, "step": 37961 }, { "epoch": 1.86, "grad_norm": 0.7053776383399963, "learning_rate": 0.0001894381045691269, "loss": 3.1361, "step": 37962 }, { "epoch": 1.86, "grad_norm": 0.6618072986602783, "learning_rate": 0.00018942379223091765, "loss": 2.9112, "step": 37963 }, { "epoch": 1.86, "grad_norm": 0.6476094722747803, "learning_rate": 0.00018940948018394472, "loss": 3.1336, "step": 37964 }, { "epoch": 1.86, "grad_norm": 0.6161331534385681, "learning_rate": 0.00018939516842824599, "loss": 3.2245, "step": 37965 }, { "epoch": 1.86, "grad_norm": 0.6259023547172546, "learning_rate": 0.00018938085696385917, "loss": 2.9133, "step": 37966 }, { "epoch": 1.86, "grad_norm": 0.6382219195365906, "learning_rate": 0.00018936654579082175, "loss": 2.9625, "step": 37967 }, { "epoch": 1.86, "grad_norm": 0.6519684791564941, "learning_rate": 0.0001893522349091717, "loss": 3.0695, "step": 37968 }, { "epoch": 1.86, "grad_norm": 0.7257705926895142, "learning_rate": 0.0001893379243189465, "loss": 3.1289, "step": 37969 }, { "epoch": 1.86, "grad_norm": 0.7212842702865601, "learning_rate": 0.00018932361402018388, "loss": 3.1242, "step": 37970 }, { "epoch": 1.86, "grad_norm": 0.6503854990005493, "learning_rate": 0.00018930930401292163, "loss": 2.8469, "step": 37971 }, { "epoch": 1.86, "grad_norm": 0.6265800595283508, "learning_rate": 0.00018929499429719726, "loss": 2.973, "step": 37972 }, { "epoch": 1.86, "grad_norm": 0.6678257584571838, "learning_rate": 0.0001892806848730488, "loss": 3.1497, "step": 37973 }, { "epoch": 1.86, "grad_norm": 0.6247530579566956, "learning_rate": 0.00018926637574051346, "loss": 2.8594, "step": 37974 }, { "epoch": 1.86, "grad_norm": 0.7319062352180481, "learning_rate": 0.0001892520668996292, "loss": 3.0092, "step": 37975 }, { "epoch": 1.86, "grad_norm": 0.6562508940696716, "learning_rate": 0.00018923775835043383, "loss": 2.9743, "step": 37976 }, { "epoch": 1.86, "grad_norm": 0.6403142213821411, "learning_rate": 0.00018922345009296477, "loss": 2.8766, "step": 37977 }, { "epoch": 1.86, "grad_norm": 0.6423630714416504, "learning_rate": 0.00018920914212725994, "loss": 2.9736, "step": 37978 }, { "epoch": 1.86, "grad_norm": 0.6349278688430786, "learning_rate": 0.00018919483445335676, "loss": 3.0937, "step": 37979 }, { "epoch": 1.86, "grad_norm": 0.6362356543540955, "learning_rate": 0.00018918052707129313, "loss": 2.9766, "step": 37980 }, { "epoch": 1.86, "grad_norm": 0.642997145652771, "learning_rate": 0.0001891662199811067, "loss": 2.864, "step": 37981 }, { "epoch": 1.86, "grad_norm": 0.6604081988334656, "learning_rate": 0.00018915191318283502, "loss": 2.9531, "step": 37982 }, { "epoch": 1.86, "grad_norm": 0.6715200543403625, "learning_rate": 0.00018913760667651602, "loss": 3.0857, "step": 37983 }, { "epoch": 1.86, "grad_norm": 0.6149404644966125, "learning_rate": 0.0001891233004621871, "loss": 3.145, "step": 37984 }, { "epoch": 1.86, "grad_norm": 0.6347184181213379, "learning_rate": 0.00018910899453988603, "loss": 2.9672, "step": 37985 }, { "epoch": 1.86, "grad_norm": 0.6498271822929382, "learning_rate": 0.00018909468890965068, "loss": 3.1004, "step": 37986 }, { "epoch": 1.86, "grad_norm": 0.6263914108276367, "learning_rate": 0.00018908038357151849, "loss": 2.935, "step": 37987 }, { "epoch": 1.86, "grad_norm": 0.6811741590499878, "learning_rate": 0.00018906607852552728, "loss": 2.8293, "step": 37988 }, { "epoch": 1.86, "grad_norm": 0.6744228601455688, "learning_rate": 0.00018905177377171457, "loss": 3.1593, "step": 37989 }, { "epoch": 1.86, "grad_norm": 0.6605095267295837, "learning_rate": 0.00018903746931011816, "loss": 3.1381, "step": 37990 }, { "epoch": 1.86, "grad_norm": 0.625415563583374, "learning_rate": 0.00018902316514077578, "loss": 3.067, "step": 37991 }, { "epoch": 1.86, "grad_norm": 0.6394377946853638, "learning_rate": 0.00018900886126372492, "loss": 3.0368, "step": 37992 }, { "epoch": 1.86, "grad_norm": 0.6430702209472656, "learning_rate": 0.00018899455767900345, "loss": 3.0768, "step": 37993 }, { "epoch": 1.86, "grad_norm": 0.7407238483428955, "learning_rate": 0.00018898025438664884, "loss": 2.9264, "step": 37994 }, { "epoch": 1.86, "grad_norm": 0.6624389290809631, "learning_rate": 0.00018896595138669895, "loss": 2.9439, "step": 37995 }, { "epoch": 1.86, "grad_norm": 0.649860143661499, "learning_rate": 0.00018895164867919142, "loss": 3.0471, "step": 37996 }, { "epoch": 1.86, "grad_norm": 0.652580201625824, "learning_rate": 0.00018893734626416374, "loss": 3.1846, "step": 37997 }, { "epoch": 1.86, "grad_norm": 0.6489927172660828, "learning_rate": 0.00018892304414165387, "loss": 3.0071, "step": 37998 }, { "epoch": 1.86, "grad_norm": 0.6793820858001709, "learning_rate": 0.00018890874231169925, "loss": 2.9431, "step": 37999 }, { "epoch": 1.86, "grad_norm": 0.6238475441932678, "learning_rate": 0.00018889444077433757, "loss": 3.0595, "step": 38000 }, { "epoch": 1.86, "grad_norm": 0.6435956954956055, "learning_rate": 0.00018888013952960667, "loss": 2.8716, "step": 38001 }, { "epoch": 1.86, "grad_norm": 0.6251559257507324, "learning_rate": 0.00018886583857754405, "loss": 3.0129, "step": 38002 }, { "epoch": 1.86, "grad_norm": 0.6116903424263, "learning_rate": 0.00018885153791818742, "loss": 2.9931, "step": 38003 }, { "epoch": 1.86, "grad_norm": 0.6142878532409668, "learning_rate": 0.0001888372375515744, "loss": 2.9865, "step": 38004 }, { "epoch": 1.86, "grad_norm": 0.6752248406410217, "learning_rate": 0.00018882293747774276, "loss": 2.8574, "step": 38005 }, { "epoch": 1.86, "grad_norm": 0.6765214204788208, "learning_rate": 0.0001888086376967301, "loss": 3.0034, "step": 38006 }, { "epoch": 1.86, "grad_norm": 0.7171432375907898, "learning_rate": 0.00018879433820857402, "loss": 2.8449, "step": 38007 }, { "epoch": 1.86, "grad_norm": 0.6618970632553101, "learning_rate": 0.00018878003901331235, "loss": 3.0472, "step": 38008 }, { "epoch": 1.86, "grad_norm": 0.6702781915664673, "learning_rate": 0.00018876574011098262, "loss": 3.0298, "step": 38009 }, { "epoch": 1.86, "grad_norm": 0.6527828574180603, "learning_rate": 0.00018875144150162245, "loss": 2.8084, "step": 38010 }, { "epoch": 1.86, "grad_norm": 0.6515904068946838, "learning_rate": 0.00018873714318526968, "loss": 3.1088, "step": 38011 }, { "epoch": 1.86, "grad_norm": 0.6282628774642944, "learning_rate": 0.00018872284516196182, "loss": 3.0204, "step": 38012 }, { "epoch": 1.86, "grad_norm": 0.6256953477859497, "learning_rate": 0.0001887085474317366, "loss": 3.097, "step": 38013 }, { "epoch": 1.86, "grad_norm": 0.7299720048904419, "learning_rate": 0.00018869424999463154, "loss": 2.9651, "step": 38014 }, { "epoch": 1.86, "grad_norm": 0.6656356453895569, "learning_rate": 0.00018867995285068442, "loss": 3.061, "step": 38015 }, { "epoch": 1.86, "grad_norm": 0.7072047591209412, "learning_rate": 0.000188665655999933, "loss": 2.9438, "step": 38016 }, { "epoch": 1.86, "grad_norm": 0.6499008536338806, "learning_rate": 0.00018865135944241476, "loss": 3.1369, "step": 38017 }, { "epoch": 1.86, "grad_norm": 0.6523368954658508, "learning_rate": 0.00018863706317816747, "loss": 3.0557, "step": 38018 }, { "epoch": 1.86, "grad_norm": 0.6852509379386902, "learning_rate": 0.0001886227672072286, "loss": 2.9922, "step": 38019 }, { "epoch": 1.86, "grad_norm": 0.695910632610321, "learning_rate": 0.00018860847152963602, "loss": 2.8368, "step": 38020 }, { "epoch": 1.86, "grad_norm": 0.687065839767456, "learning_rate": 0.00018859417614542725, "loss": 2.9775, "step": 38021 }, { "epoch": 1.86, "grad_norm": 0.6046010851860046, "learning_rate": 0.00018857988105463991, "loss": 2.9377, "step": 38022 }, { "epoch": 1.86, "grad_norm": 0.6309628486633301, "learning_rate": 0.00018856558625731187, "loss": 3.0841, "step": 38023 }, { "epoch": 1.86, "grad_norm": 0.6558736562728882, "learning_rate": 0.00018855129175348052, "loss": 2.9364, "step": 38024 }, { "epoch": 1.86, "grad_norm": 0.6964730620384216, "learning_rate": 0.0001885369975431837, "loss": 3.0563, "step": 38025 }, { "epoch": 1.86, "grad_norm": 0.722326934337616, "learning_rate": 0.00018852270362645885, "loss": 3.048, "step": 38026 }, { "epoch": 1.86, "grad_norm": 0.626826286315918, "learning_rate": 0.00018850841000334382, "loss": 2.9608, "step": 38027 }, { "epoch": 1.86, "grad_norm": 0.656711995601654, "learning_rate": 0.00018849411667387625, "loss": 3.0312, "step": 38028 }, { "epoch": 1.86, "grad_norm": 0.6362355351448059, "learning_rate": 0.0001884798236380936, "loss": 2.9385, "step": 38029 }, { "epoch": 1.86, "grad_norm": 0.7519611716270447, "learning_rate": 0.00018846553089603372, "loss": 2.8248, "step": 38030 }, { "epoch": 1.86, "grad_norm": 0.6189745664596558, "learning_rate": 0.00018845123844773412, "loss": 2.846, "step": 38031 }, { "epoch": 1.86, "grad_norm": 0.6334044337272644, "learning_rate": 0.00018843694629323244, "loss": 2.9264, "step": 38032 }, { "epoch": 1.86, "grad_norm": 0.6451401114463806, "learning_rate": 0.0001884226544325665, "loss": 2.9789, "step": 38033 }, { "epoch": 1.86, "grad_norm": 0.6401827931404114, "learning_rate": 0.00018840836286577366, "loss": 3.155, "step": 38034 }, { "epoch": 1.86, "grad_norm": 0.6569207906723022, "learning_rate": 0.00018839407159289192, "loss": 3.1407, "step": 38035 }, { "epoch": 1.86, "grad_norm": 0.6262386441230774, "learning_rate": 0.00018837978061395856, "loss": 2.9666, "step": 38036 }, { "epoch": 1.86, "grad_norm": 0.6198394894599915, "learning_rate": 0.0001883654899290113, "loss": 3.0311, "step": 38037 }, { "epoch": 1.86, "grad_norm": 0.6523306369781494, "learning_rate": 0.00018835119953808805, "loss": 2.9531, "step": 38038 }, { "epoch": 1.86, "grad_norm": 0.6170300245285034, "learning_rate": 0.00018833690944122615, "loss": 3.0921, "step": 38039 }, { "epoch": 1.86, "grad_norm": 0.6404187679290771, "learning_rate": 0.00018832261963846344, "loss": 3.2716, "step": 38040 }, { "epoch": 1.86, "grad_norm": 0.6466928124427795, "learning_rate": 0.00018830833012983727, "loss": 2.9561, "step": 38041 }, { "epoch": 1.86, "grad_norm": 0.6404629349708557, "learning_rate": 0.0001882940409153856, "loss": 3.045, "step": 38042 }, { "epoch": 1.86, "grad_norm": 0.6768361926078796, "learning_rate": 0.00018827975199514597, "loss": 3.0527, "step": 38043 }, { "epoch": 1.86, "grad_norm": 0.6463939547538757, "learning_rate": 0.0001882654633691558, "loss": 3.1942, "step": 38044 }, { "epoch": 1.86, "grad_norm": 0.6092776656150818, "learning_rate": 0.00018825117503745308, "loss": 3.0076, "step": 38045 }, { "epoch": 1.86, "grad_norm": 0.6425269246101379, "learning_rate": 0.00018823688700007516, "loss": 2.8472, "step": 38046 }, { "epoch": 1.86, "grad_norm": 0.6583645939826965, "learning_rate": 0.00018822259925705973, "loss": 2.9726, "step": 38047 }, { "epoch": 1.86, "grad_norm": 0.6546190977096558, "learning_rate": 0.00018820831180844455, "loss": 3.0767, "step": 38048 }, { "epoch": 1.86, "grad_norm": 0.6298822164535522, "learning_rate": 0.00018819402465426716, "loss": 2.9643, "step": 38049 }, { "epoch": 1.86, "grad_norm": 0.6522963047027588, "learning_rate": 0.0001881797377945652, "loss": 3.0874, "step": 38050 }, { "epoch": 1.86, "grad_norm": 0.6804884672164917, "learning_rate": 0.0001881654512293762, "loss": 3.01, "step": 38051 }, { "epoch": 1.86, "grad_norm": 0.6772292852401733, "learning_rate": 0.00018815116495873792, "loss": 3.0163, "step": 38052 }, { "epoch": 1.86, "grad_norm": 0.6438421607017517, "learning_rate": 0.00018813687898268798, "loss": 3.2455, "step": 38053 }, { "epoch": 1.86, "grad_norm": 0.6451230645179749, "learning_rate": 0.00018812259330126392, "loss": 2.9777, "step": 38054 }, { "epoch": 1.86, "grad_norm": 0.6543946862220764, "learning_rate": 0.0001881083079145035, "loss": 2.9877, "step": 38055 }, { "epoch": 1.87, "grad_norm": 0.6809148192405701, "learning_rate": 0.00018809402282244414, "loss": 3.0758, "step": 38056 }, { "epoch": 1.87, "grad_norm": 0.644823431968689, "learning_rate": 0.00018807973802512367, "loss": 2.9312, "step": 38057 }, { "epoch": 1.87, "grad_norm": 0.6681671142578125, "learning_rate": 0.00018806545352257963, "loss": 3.0616, "step": 38058 }, { "epoch": 1.87, "grad_norm": 0.6262026429176331, "learning_rate": 0.0001880511693148496, "loss": 2.9225, "step": 38059 }, { "epoch": 1.87, "grad_norm": 0.6308271884918213, "learning_rate": 0.00018803688540197132, "loss": 3.0833, "step": 38060 }, { "epoch": 1.87, "grad_norm": 0.6894946098327637, "learning_rate": 0.00018802260178398228, "loss": 2.9456, "step": 38061 }, { "epoch": 1.87, "grad_norm": 0.6232718825340271, "learning_rate": 0.0001880083184609201, "loss": 2.8134, "step": 38062 }, { "epoch": 1.87, "grad_norm": 0.6857800483703613, "learning_rate": 0.00018799403543282259, "loss": 2.9333, "step": 38063 }, { "epoch": 1.87, "grad_norm": 0.6674224734306335, "learning_rate": 0.00018797975269972718, "loss": 3.0611, "step": 38064 }, { "epoch": 1.87, "grad_norm": 0.6705251336097717, "learning_rate": 0.00018796547026167157, "loss": 3.1249, "step": 38065 }, { "epoch": 1.87, "grad_norm": 0.6390047073364258, "learning_rate": 0.00018795118811869323, "loss": 3.0813, "step": 38066 }, { "epoch": 1.87, "grad_norm": 0.6156495809555054, "learning_rate": 0.00018793690627082999, "loss": 2.861, "step": 38067 }, { "epoch": 1.87, "grad_norm": 0.6561599373817444, "learning_rate": 0.00018792262471811942, "loss": 2.9476, "step": 38068 }, { "epoch": 1.87, "grad_norm": 0.6761384606361389, "learning_rate": 0.00018790834346059894, "loss": 3.1729, "step": 38069 }, { "epoch": 1.87, "grad_norm": 0.6904240846633911, "learning_rate": 0.00018789406249830647, "loss": 2.988, "step": 38070 }, { "epoch": 1.87, "grad_norm": 0.6512880325317383, "learning_rate": 0.00018787978183127939, "loss": 3.1239, "step": 38071 }, { "epoch": 1.87, "grad_norm": 0.6401209831237793, "learning_rate": 0.00018786550145955534, "loss": 3.0033, "step": 38072 }, { "epoch": 1.87, "grad_norm": 0.6807664632797241, "learning_rate": 0.00018785122138317208, "loss": 2.7812, "step": 38073 }, { "epoch": 1.87, "grad_norm": 0.6754370927810669, "learning_rate": 0.00018783694160216701, "loss": 2.8129, "step": 38074 }, { "epoch": 1.87, "grad_norm": 0.6359378695487976, "learning_rate": 0.000187822662116578, "loss": 3.2968, "step": 38075 }, { "epoch": 1.87, "grad_norm": 0.6342616081237793, "learning_rate": 0.00018780838292644235, "loss": 2.9636, "step": 38076 }, { "epoch": 1.87, "grad_norm": 0.6342082023620605, "learning_rate": 0.00018779410403179782, "loss": 3.1626, "step": 38077 }, { "epoch": 1.87, "grad_norm": 0.6407181024551392, "learning_rate": 0.00018777982543268215, "loss": 3.0483, "step": 38078 }, { "epoch": 1.87, "grad_norm": 0.6541846394538879, "learning_rate": 0.0001877655471291328, "loss": 3.0294, "step": 38079 }, { "epoch": 1.87, "grad_norm": 0.6525475978851318, "learning_rate": 0.0001877512691211874, "loss": 2.8933, "step": 38080 }, { "epoch": 1.87, "grad_norm": 0.6654950976371765, "learning_rate": 0.00018773699140888346, "loss": 3.043, "step": 38081 }, { "epoch": 1.87, "grad_norm": 0.684727668762207, "learning_rate": 0.00018772271399225877, "loss": 3.1511, "step": 38082 }, { "epoch": 1.87, "grad_norm": 0.6460987329483032, "learning_rate": 0.00018770843687135083, "loss": 3.2699, "step": 38083 }, { "epoch": 1.87, "grad_norm": 0.6055113673210144, "learning_rate": 0.00018769416004619718, "loss": 2.9939, "step": 38084 }, { "epoch": 1.87, "grad_norm": 0.6769329309463501, "learning_rate": 0.00018767988351683562, "loss": 2.9545, "step": 38085 }, { "epoch": 1.87, "grad_norm": 0.6439892053604126, "learning_rate": 0.00018766560728330359, "loss": 3.0184, "step": 38086 }, { "epoch": 1.87, "grad_norm": 0.6827326416969299, "learning_rate": 0.00018765133134563864, "loss": 3.0605, "step": 38087 }, { "epoch": 1.87, "grad_norm": 0.6329249739646912, "learning_rate": 0.0001876370557038786, "loss": 3.1776, "step": 38088 }, { "epoch": 1.87, "grad_norm": 0.6486325263977051, "learning_rate": 0.00018762278035806083, "loss": 3.0524, "step": 38089 }, { "epoch": 1.87, "grad_norm": 0.6577807664871216, "learning_rate": 0.00018760850530822313, "loss": 2.9419, "step": 38090 }, { "epoch": 1.87, "grad_norm": 0.6746330857276917, "learning_rate": 0.00018759423055440285, "loss": 2.9862, "step": 38091 }, { "epoch": 1.87, "grad_norm": 0.6533883213996887, "learning_rate": 0.00018757995609663782, "loss": 3.0289, "step": 38092 }, { "epoch": 1.87, "grad_norm": 0.6736960411071777, "learning_rate": 0.00018756568193496558, "loss": 3.0924, "step": 38093 }, { "epoch": 1.87, "grad_norm": 0.6582047343254089, "learning_rate": 0.0001875514080694236, "loss": 2.9104, "step": 38094 }, { "epoch": 1.87, "grad_norm": 0.6109486818313599, "learning_rate": 0.00018753713450004968, "loss": 2.941, "step": 38095 }, { "epoch": 1.87, "grad_norm": 0.6732144355773926, "learning_rate": 0.0001875228612268812, "loss": 3.1589, "step": 38096 }, { "epoch": 1.87, "grad_norm": 0.7052947282791138, "learning_rate": 0.00018750858824995598, "loss": 2.7537, "step": 38097 }, { "epoch": 1.87, "grad_norm": 0.6800371408462524, "learning_rate": 0.00018749431556931142, "loss": 3.0416, "step": 38098 }, { "epoch": 1.87, "grad_norm": 0.675338864326477, "learning_rate": 0.00018748004318498512, "loss": 2.8279, "step": 38099 }, { "epoch": 1.87, "grad_norm": 0.6601358652114868, "learning_rate": 0.00018746577109701482, "loss": 3.0966, "step": 38100 }, { "epoch": 1.87, "grad_norm": 0.6854960322380066, "learning_rate": 0.000187451499305438, "loss": 3.2466, "step": 38101 }, { "epoch": 1.87, "grad_norm": 0.6765010356903076, "learning_rate": 0.00018743722781029235, "loss": 3.161, "step": 38102 }, { "epoch": 1.87, "grad_norm": 0.6960597634315491, "learning_rate": 0.0001874229566116152, "loss": 3.2019, "step": 38103 }, { "epoch": 1.87, "grad_norm": 0.6276587247848511, "learning_rate": 0.0001874086857094444, "loss": 3.1574, "step": 38104 }, { "epoch": 1.87, "grad_norm": 0.6926313042640686, "learning_rate": 0.00018739441510381752, "loss": 2.9851, "step": 38105 }, { "epoch": 1.87, "grad_norm": 0.6597981452941895, "learning_rate": 0.00018738014479477197, "loss": 2.8751, "step": 38106 }, { "epoch": 1.87, "grad_norm": 0.6823752522468567, "learning_rate": 0.00018736587478234554, "loss": 3.0197, "step": 38107 }, { "epoch": 1.87, "grad_norm": 0.6578922271728516, "learning_rate": 0.00018735160506657566, "loss": 2.9142, "step": 38108 }, { "epoch": 1.87, "grad_norm": 0.7423891425132751, "learning_rate": 0.0001873373356474999, "loss": 2.882, "step": 38109 }, { "epoch": 1.87, "grad_norm": 0.6776301264762878, "learning_rate": 0.00018732306652515608, "loss": 2.981, "step": 38110 }, { "epoch": 1.87, "grad_norm": 0.6616842150688171, "learning_rate": 0.0001873087976995815, "loss": 2.7471, "step": 38111 }, { "epoch": 1.87, "grad_norm": 0.706602156162262, "learning_rate": 0.000187294529170814, "loss": 3.0262, "step": 38112 }, { "epoch": 1.87, "grad_norm": 0.6833293437957764, "learning_rate": 0.00018728026093889084, "loss": 3.0769, "step": 38113 }, { "epoch": 1.87, "grad_norm": 0.6360612511634827, "learning_rate": 0.00018726599300384985, "loss": 2.8711, "step": 38114 }, { "epoch": 1.87, "grad_norm": 0.6546128392219543, "learning_rate": 0.0001872517253657286, "loss": 3.115, "step": 38115 }, { "epoch": 1.87, "grad_norm": 0.614295482635498, "learning_rate": 0.00018723745802456447, "loss": 2.9583, "step": 38116 }, { "epoch": 1.87, "grad_norm": 0.6379879713058472, "learning_rate": 0.00018722319098039532, "loss": 3.0077, "step": 38117 }, { "epoch": 1.87, "grad_norm": 0.6464399099349976, "learning_rate": 0.00018720892423325846, "loss": 2.9822, "step": 38118 }, { "epoch": 1.87, "grad_norm": 0.6336324214935303, "learning_rate": 0.00018719465778319161, "loss": 3.0851, "step": 38119 }, { "epoch": 1.87, "grad_norm": 0.6779120564460754, "learning_rate": 0.00018718039163023246, "loss": 2.8303, "step": 38120 }, { "epoch": 1.87, "grad_norm": 0.6924359202384949, "learning_rate": 0.00018716612577441828, "loss": 3.1107, "step": 38121 }, { "epoch": 1.87, "grad_norm": 0.643219530582428, "learning_rate": 0.0001871518602157869, "loss": 3.1419, "step": 38122 }, { "epoch": 1.87, "grad_norm": 0.6407486200332642, "learning_rate": 0.00018713759495437576, "loss": 2.9626, "step": 38123 }, { "epoch": 1.87, "grad_norm": 0.6377759575843811, "learning_rate": 0.00018712332999022246, "loss": 3.0036, "step": 38124 }, { "epoch": 1.87, "grad_norm": 0.6537527441978455, "learning_rate": 0.00018710906532336466, "loss": 3.0181, "step": 38125 }, { "epoch": 1.87, "grad_norm": 0.6885370016098022, "learning_rate": 0.00018709480095383977, "loss": 2.8599, "step": 38126 }, { "epoch": 1.87, "grad_norm": 0.6301379203796387, "learning_rate": 0.00018708053688168554, "loss": 2.8084, "step": 38127 }, { "epoch": 1.87, "grad_norm": 0.6435237526893616, "learning_rate": 0.00018706627310693933, "loss": 3.1439, "step": 38128 }, { "epoch": 1.87, "grad_norm": 0.6685776114463806, "learning_rate": 0.00018705200962963885, "loss": 2.9715, "step": 38129 }, { "epoch": 1.87, "grad_norm": 0.6370143294334412, "learning_rate": 0.00018703774644982177, "loss": 3.0221, "step": 38130 }, { "epoch": 1.87, "grad_norm": 0.6487584710121155, "learning_rate": 0.00018702348356752535, "loss": 2.9997, "step": 38131 }, { "epoch": 1.87, "grad_norm": 0.6534705758094788, "learning_rate": 0.00018700922098278746, "loss": 2.8816, "step": 38132 }, { "epoch": 1.87, "grad_norm": 0.6294593214988708, "learning_rate": 0.00018699495869564547, "loss": 2.9037, "step": 38133 }, { "epoch": 1.87, "grad_norm": 0.8882706761360168, "learning_rate": 0.00018698069670613696, "loss": 2.9112, "step": 38134 }, { "epoch": 1.87, "grad_norm": 0.6950933933258057, "learning_rate": 0.00018696643501429967, "loss": 2.9052, "step": 38135 }, { "epoch": 1.87, "grad_norm": 0.6648854613304138, "learning_rate": 0.0001869521736201709, "loss": 2.9956, "step": 38136 }, { "epoch": 1.87, "grad_norm": 0.712222695350647, "learning_rate": 0.00018693791252378855, "loss": 3.2136, "step": 38137 }, { "epoch": 1.87, "grad_norm": 0.6462759375572205, "learning_rate": 0.0001869236517251898, "loss": 3.0022, "step": 38138 }, { "epoch": 1.87, "grad_norm": 0.6655752658843994, "learning_rate": 0.00018690939122441237, "loss": 2.9743, "step": 38139 }, { "epoch": 1.87, "grad_norm": 0.6417601108551025, "learning_rate": 0.00018689513102149394, "loss": 3.051, "step": 38140 }, { "epoch": 1.87, "grad_norm": 0.6326993107795715, "learning_rate": 0.00018688087111647195, "loss": 3.0341, "step": 38141 }, { "epoch": 1.87, "grad_norm": 0.651273787021637, "learning_rate": 0.00018686661150938403, "loss": 3.1457, "step": 38142 }, { "epoch": 1.87, "grad_norm": 0.668491005897522, "learning_rate": 0.00018685235220026756, "loss": 2.8735, "step": 38143 }, { "epoch": 1.87, "grad_norm": 0.6334226727485657, "learning_rate": 0.00018683809318916023, "loss": 2.9159, "step": 38144 }, { "epoch": 1.87, "grad_norm": 0.6658082008361816, "learning_rate": 0.0001868238344760997, "loss": 2.978, "step": 38145 }, { "epoch": 1.87, "grad_norm": 0.6330041289329529, "learning_rate": 0.00018680957606112325, "loss": 2.9432, "step": 38146 }, { "epoch": 1.87, "grad_norm": 0.6996323466300964, "learning_rate": 0.00018679531794426875, "loss": 3.1075, "step": 38147 }, { "epoch": 1.87, "grad_norm": 0.6674297451972961, "learning_rate": 0.00018678106012557353, "loss": 3.0515, "step": 38148 }, { "epoch": 1.87, "grad_norm": 0.632754921913147, "learning_rate": 0.00018676680260507511, "loss": 2.9696, "step": 38149 }, { "epoch": 1.87, "grad_norm": 0.6195089221000671, "learning_rate": 0.0001867525453828113, "loss": 3.0214, "step": 38150 }, { "epoch": 1.87, "grad_norm": 0.6678367853164673, "learning_rate": 0.00018673828845881943, "loss": 2.8939, "step": 38151 }, { "epoch": 1.87, "grad_norm": 0.6400881409645081, "learning_rate": 0.00018672403183313716, "loss": 3.1865, "step": 38152 }, { "epoch": 1.87, "grad_norm": 0.6747406125068665, "learning_rate": 0.00018670977550580185, "loss": 3.1998, "step": 38153 }, { "epoch": 1.87, "grad_norm": 0.6130285263061523, "learning_rate": 0.00018669551947685128, "loss": 3.1601, "step": 38154 }, { "epoch": 1.87, "grad_norm": 0.6921128034591675, "learning_rate": 0.00018668126374632294, "loss": 2.9525, "step": 38155 }, { "epoch": 1.87, "grad_norm": 0.6503788232803345, "learning_rate": 0.00018666700831425424, "loss": 3.0248, "step": 38156 }, { "epoch": 1.87, "grad_norm": 0.624090850353241, "learning_rate": 0.00018665275318068296, "loss": 3.0273, "step": 38157 }, { "epoch": 1.87, "grad_norm": 0.6640616059303284, "learning_rate": 0.00018663849834564634, "loss": 3.284, "step": 38158 }, { "epoch": 1.87, "grad_norm": 0.7166267037391663, "learning_rate": 0.00018662424380918221, "loss": 2.9603, "step": 38159 }, { "epoch": 1.87, "grad_norm": 0.6211531758308411, "learning_rate": 0.00018660998957132805, "loss": 3.0816, "step": 38160 }, { "epoch": 1.87, "grad_norm": 0.6357093453407288, "learning_rate": 0.00018659573563212123, "loss": 3.1217, "step": 38161 }, { "epoch": 1.87, "grad_norm": 0.6868173480033875, "learning_rate": 0.00018658148199159953, "loss": 2.9255, "step": 38162 }, { "epoch": 1.87, "grad_norm": 0.6988732218742371, "learning_rate": 0.00018656722864980035, "loss": 2.914, "step": 38163 }, { "epoch": 1.87, "grad_norm": 0.620403528213501, "learning_rate": 0.0001865529756067612, "loss": 3.0708, "step": 38164 }, { "epoch": 1.87, "grad_norm": 0.7112835049629211, "learning_rate": 0.00018653872286251974, "loss": 3.1756, "step": 38165 }, { "epoch": 1.87, "grad_norm": 0.6396851539611816, "learning_rate": 0.00018652447041711344, "loss": 2.86, "step": 38166 }, { "epoch": 1.87, "grad_norm": 0.6468586325645447, "learning_rate": 0.00018651021827057992, "loss": 2.9201, "step": 38167 }, { "epoch": 1.87, "grad_norm": 0.6509426832199097, "learning_rate": 0.00018649596642295645, "loss": 2.9801, "step": 38168 }, { "epoch": 1.87, "grad_norm": 0.684259295463562, "learning_rate": 0.00018648171487428087, "loss": 3.1006, "step": 38169 }, { "epoch": 1.87, "grad_norm": 0.6615915298461914, "learning_rate": 0.00018646746362459065, "loss": 3.0024, "step": 38170 }, { "epoch": 1.87, "grad_norm": 0.6847988963127136, "learning_rate": 0.0001864532126739232, "loss": 2.9606, "step": 38171 }, { "epoch": 1.87, "grad_norm": 0.6112357974052429, "learning_rate": 0.00018643896202231623, "loss": 3.182, "step": 38172 }, { "epoch": 1.87, "grad_norm": 0.6447160243988037, "learning_rate": 0.00018642471166980707, "loss": 3.0568, "step": 38173 }, { "epoch": 1.87, "grad_norm": 0.6257251501083374, "learning_rate": 0.00018641046161643353, "loss": 3.1314, "step": 38174 }, { "epoch": 1.87, "grad_norm": 0.6292673349380493, "learning_rate": 0.00018639621186223278, "loss": 2.9662, "step": 38175 }, { "epoch": 1.87, "grad_norm": 0.6028404235839844, "learning_rate": 0.0001863819624072426, "loss": 3.1069, "step": 38176 }, { "epoch": 1.87, "grad_norm": 0.6941775679588318, "learning_rate": 0.00018636771325150056, "loss": 2.98, "step": 38177 }, { "epoch": 1.87, "grad_norm": 0.6567956805229187, "learning_rate": 0.00018635346439504395, "loss": 2.9761, "step": 38178 }, { "epoch": 1.87, "grad_norm": 0.6520336866378784, "learning_rate": 0.00018633921583791055, "loss": 3.1373, "step": 38179 }, { "epoch": 1.87, "grad_norm": 0.6364136338233948, "learning_rate": 0.00018632496758013768, "loss": 3.017, "step": 38180 }, { "epoch": 1.87, "grad_norm": 0.6324988603591919, "learning_rate": 0.00018631071962176303, "loss": 2.9674, "step": 38181 }, { "epoch": 1.87, "grad_norm": 0.6260587573051453, "learning_rate": 0.00018629647196282412, "loss": 2.8975, "step": 38182 }, { "epoch": 1.87, "grad_norm": 0.6402279138565063, "learning_rate": 0.0001862822246033583, "loss": 3.0804, "step": 38183 }, { "epoch": 1.87, "grad_norm": 0.6180815100669861, "learning_rate": 0.00018626797754340332, "loss": 2.917, "step": 38184 }, { "epoch": 1.87, "grad_norm": 0.6797022819519043, "learning_rate": 0.00018625373078299658, "loss": 2.9086, "step": 38185 }, { "epoch": 1.87, "grad_norm": 0.6699610352516174, "learning_rate": 0.00018623948432217554, "loss": 3.0638, "step": 38186 }, { "epoch": 1.87, "grad_norm": 0.6808142066001892, "learning_rate": 0.0001862252381609779, "loss": 2.9938, "step": 38187 }, { "epoch": 1.87, "grad_norm": 0.6589908599853516, "learning_rate": 0.00018621099229944107, "loss": 3.1829, "step": 38188 }, { "epoch": 1.87, "grad_norm": 0.6513395309448242, "learning_rate": 0.00018619674673760262, "loss": 2.9098, "step": 38189 }, { "epoch": 1.87, "grad_norm": 0.6903216242790222, "learning_rate": 0.00018618250147549995, "loss": 2.9158, "step": 38190 }, { "epoch": 1.87, "grad_norm": 0.71368807554245, "learning_rate": 0.0001861682565131707, "loss": 3.026, "step": 38191 }, { "epoch": 1.87, "grad_norm": 0.659199595451355, "learning_rate": 0.00018615401185065238, "loss": 2.8882, "step": 38192 }, { "epoch": 1.87, "grad_norm": 0.6380458474159241, "learning_rate": 0.0001861397674879824, "loss": 3.1365, "step": 38193 }, { "epoch": 1.87, "grad_norm": 0.6615623235702515, "learning_rate": 0.00018612552342519849, "loss": 2.7819, "step": 38194 }, { "epoch": 1.87, "grad_norm": 0.6508620977401733, "learning_rate": 0.00018611127966233797, "loss": 2.9649, "step": 38195 }, { "epoch": 1.87, "grad_norm": 0.6376355290412903, "learning_rate": 0.00018609703619943833, "loss": 2.9156, "step": 38196 }, { "epoch": 1.87, "grad_norm": 0.6251353621482849, "learning_rate": 0.00018608279303653733, "loss": 3.0193, "step": 38197 }, { "epoch": 1.87, "grad_norm": 0.6305531859397888, "learning_rate": 0.00018606855017367223, "loss": 2.8546, "step": 38198 }, { "epoch": 1.87, "grad_norm": 0.6258016228675842, "learning_rate": 0.00018605430761088073, "loss": 2.9225, "step": 38199 }, { "epoch": 1.87, "grad_norm": 0.6111253499984741, "learning_rate": 0.00018604006534820016, "loss": 2.9206, "step": 38200 }, { "epoch": 1.87, "grad_norm": 0.6775639057159424, "learning_rate": 0.0001860258233856681, "loss": 2.9885, "step": 38201 }, { "epoch": 1.87, "grad_norm": 0.6494293212890625, "learning_rate": 0.0001860115817233222, "loss": 3.0052, "step": 38202 }, { "epoch": 1.87, "grad_norm": 0.6443254947662354, "learning_rate": 0.0001859973403611998, "loss": 3.0249, "step": 38203 }, { "epoch": 1.87, "grad_norm": 0.5837622880935669, "learning_rate": 0.00018598309929933853, "loss": 2.9753, "step": 38204 }, { "epoch": 1.87, "grad_norm": 0.6752540469169617, "learning_rate": 0.00018596885853777574, "loss": 2.9707, "step": 38205 }, { "epoch": 1.87, "grad_norm": 0.6625667810440063, "learning_rate": 0.00018595461807654906, "loss": 3.0735, "step": 38206 }, { "epoch": 1.87, "grad_norm": 0.6508365273475647, "learning_rate": 0.00018594037791569602, "loss": 3.2523, "step": 38207 }, { "epoch": 1.87, "grad_norm": 0.6375356316566467, "learning_rate": 0.00018592613805525396, "loss": 3.043, "step": 38208 }, { "epoch": 1.87, "grad_norm": 0.6329519152641296, "learning_rate": 0.00018591189849526062, "loss": 2.8927, "step": 38209 }, { "epoch": 1.87, "grad_norm": 0.6348059177398682, "learning_rate": 0.00018589765923575333, "loss": 3.1013, "step": 38210 }, { "epoch": 1.87, "grad_norm": 0.6728472709655762, "learning_rate": 0.00018588342027676957, "loss": 3.0327, "step": 38211 }, { "epoch": 1.87, "grad_norm": 0.6233043074607849, "learning_rate": 0.00018586918161834706, "loss": 2.726, "step": 38212 }, { "epoch": 1.87, "grad_norm": 0.7845079302787781, "learning_rate": 0.00018585494326052307, "loss": 2.8162, "step": 38213 }, { "epoch": 1.87, "grad_norm": 0.662473738193512, "learning_rate": 0.00018584070520333528, "loss": 2.9308, "step": 38214 }, { "epoch": 1.87, "grad_norm": 0.6366502642631531, "learning_rate": 0.000185826467446821, "loss": 2.8987, "step": 38215 }, { "epoch": 1.87, "grad_norm": 0.6556762456893921, "learning_rate": 0.00018581222999101784, "loss": 2.9589, "step": 38216 }, { "epoch": 1.87, "grad_norm": 0.626169741153717, "learning_rate": 0.0001857979928359634, "loss": 3.0476, "step": 38217 }, { "epoch": 1.87, "grad_norm": 0.6835764050483704, "learning_rate": 0.0001857837559816949, "loss": 3.0164, "step": 38218 }, { "epoch": 1.87, "grad_norm": 0.6800299286842346, "learning_rate": 0.00018576951942825018, "loss": 2.9386, "step": 38219 }, { "epoch": 1.87, "grad_norm": 0.6377986073493958, "learning_rate": 0.0001857552831756664, "loss": 2.98, "step": 38220 }, { "epoch": 1.87, "grad_norm": 0.6307637095451355, "learning_rate": 0.0001857410472239813, "loss": 2.9888, "step": 38221 }, { "epoch": 1.87, "grad_norm": 0.6545106768608093, "learning_rate": 0.00018572681157323233, "loss": 3.1622, "step": 38222 }, { "epoch": 1.87, "grad_norm": 0.6275971531867981, "learning_rate": 0.00018571257622345685, "loss": 3.1065, "step": 38223 }, { "epoch": 1.87, "grad_norm": 0.6176348328590393, "learning_rate": 0.00018569834117469254, "loss": 2.9758, "step": 38224 }, { "epoch": 1.87, "grad_norm": 0.6735263466835022, "learning_rate": 0.00018568410642697678, "loss": 2.9344, "step": 38225 }, { "epoch": 1.87, "grad_norm": 0.6405964493751526, "learning_rate": 0.00018566987198034696, "loss": 3.0465, "step": 38226 }, { "epoch": 1.87, "grad_norm": 0.6843787431716919, "learning_rate": 0.0001856556378348409, "loss": 2.8972, "step": 38227 }, { "epoch": 1.87, "grad_norm": 0.687495231628418, "learning_rate": 0.00018564140399049574, "loss": 3.133, "step": 38228 }, { "epoch": 1.87, "grad_norm": 0.6218973994255066, "learning_rate": 0.00018562717044734924, "loss": 3.1439, "step": 38229 }, { "epoch": 1.87, "grad_norm": 0.6834261417388916, "learning_rate": 0.0001856129372054386, "loss": 2.9368, "step": 38230 }, { "epoch": 1.87, "grad_norm": 0.6899203658103943, "learning_rate": 0.0001855987042648015, "loss": 3.1648, "step": 38231 }, { "epoch": 1.87, "grad_norm": 0.6193641424179077, "learning_rate": 0.00018558447162547557, "loss": 3.1351, "step": 38232 }, { "epoch": 1.87, "grad_norm": 0.6698924899101257, "learning_rate": 0.0001855702392874979, "loss": 3.0177, "step": 38233 }, { "epoch": 1.87, "grad_norm": 0.6577365398406982, "learning_rate": 0.0001855560072509064, "loss": 2.8135, "step": 38234 }, { "epoch": 1.87, "grad_norm": 0.705444872379303, "learning_rate": 0.0001855417755157382, "loss": 2.8679, "step": 38235 }, { "epoch": 1.87, "grad_norm": 0.6559796929359436, "learning_rate": 0.00018552754408203092, "loss": 3.0659, "step": 38236 }, { "epoch": 1.87, "grad_norm": 0.6978949904441833, "learning_rate": 0.00018551331294982214, "loss": 2.8357, "step": 38237 }, { "epoch": 1.87, "grad_norm": 0.6330851316452026, "learning_rate": 0.00018549908211914928, "loss": 2.945, "step": 38238 }, { "epoch": 1.87, "grad_norm": 0.6387531161308289, "learning_rate": 0.00018548485159004978, "loss": 3.0267, "step": 38239 }, { "epoch": 1.87, "grad_norm": 0.6540210843086243, "learning_rate": 0.00018547062136256107, "loss": 3.0548, "step": 38240 }, { "epoch": 1.87, "grad_norm": 0.6874194741249084, "learning_rate": 0.00018545639143672068, "loss": 2.8508, "step": 38241 }, { "epoch": 1.87, "grad_norm": 0.6516175270080566, "learning_rate": 0.00018544216181256624, "loss": 3.0576, "step": 38242 }, { "epoch": 1.87, "grad_norm": 0.6130465865135193, "learning_rate": 0.00018542793249013505, "loss": 2.9599, "step": 38243 }, { "epoch": 1.87, "grad_norm": 0.6488755941390991, "learning_rate": 0.00018541370346946467, "loss": 3.0966, "step": 38244 }, { "epoch": 1.87, "grad_norm": 0.6888542771339417, "learning_rate": 0.00018539947475059245, "loss": 2.9866, "step": 38245 }, { "epoch": 1.87, "grad_norm": 0.7215191721916199, "learning_rate": 0.000185385246333556, "loss": 3.2059, "step": 38246 }, { "epoch": 1.87, "grad_norm": 0.6577127575874329, "learning_rate": 0.00018537101821839282, "loss": 2.8346, "step": 38247 }, { "epoch": 1.87, "grad_norm": 0.6458978056907654, "learning_rate": 0.0001853567904051402, "loss": 3.181, "step": 38248 }, { "epoch": 1.87, "grad_norm": 0.6692994236946106, "learning_rate": 0.00018534256289383583, "loss": 2.9622, "step": 38249 }, { "epoch": 1.87, "grad_norm": 0.6456918716430664, "learning_rate": 0.00018532833568451706, "loss": 3.1947, "step": 38250 }, { "epoch": 1.87, "grad_norm": 0.6313451528549194, "learning_rate": 0.00018531410877722142, "loss": 3.005, "step": 38251 }, { "epoch": 1.87, "grad_norm": 1.0270297527313232, "learning_rate": 0.00018529988217198622, "loss": 2.9805, "step": 38252 }, { "epoch": 1.87, "grad_norm": 0.6870240569114685, "learning_rate": 0.00018528565586884915, "loss": 3.0719, "step": 38253 }, { "epoch": 1.87, "grad_norm": 0.631271481513977, "learning_rate": 0.00018527142986784763, "loss": 2.9909, "step": 38254 }, { "epoch": 1.87, "grad_norm": 0.6817666292190552, "learning_rate": 0.00018525720416901894, "loss": 3.0439, "step": 38255 }, { "epoch": 1.87, "grad_norm": 0.6389595866203308, "learning_rate": 0.00018524297877240084, "loss": 3.2042, "step": 38256 }, { "epoch": 1.87, "grad_norm": 0.6673776507377625, "learning_rate": 0.00018522875367803053, "loss": 2.914, "step": 38257 }, { "epoch": 1.87, "grad_norm": 0.6235078573226929, "learning_rate": 0.0001852145288859456, "loss": 2.8799, "step": 38258 }, { "epoch": 1.87, "grad_norm": 0.6437073945999146, "learning_rate": 0.00018520030439618364, "loss": 3.0012, "step": 38259 }, { "epoch": 1.88, "grad_norm": 0.634657621383667, "learning_rate": 0.00018518608020878184, "loss": 2.9875, "step": 38260 }, { "epoch": 1.88, "grad_norm": 0.6310805678367615, "learning_rate": 0.00018517185632377787, "loss": 3.0513, "step": 38261 }, { "epoch": 1.88, "grad_norm": 0.6541125178337097, "learning_rate": 0.00018515763274120912, "loss": 3.0226, "step": 38262 }, { "epoch": 1.88, "grad_norm": 0.6451280117034912, "learning_rate": 0.00018514340946111302, "loss": 2.9589, "step": 38263 }, { "epoch": 1.88, "grad_norm": 0.6415655016899109, "learning_rate": 0.00018512918648352718, "loss": 3.0097, "step": 38264 }, { "epoch": 1.88, "grad_norm": 0.6348758339881897, "learning_rate": 0.0001851149638084889, "loss": 2.8844, "step": 38265 }, { "epoch": 1.88, "grad_norm": 0.6602434515953064, "learning_rate": 0.00018510074143603575, "loss": 2.8191, "step": 38266 }, { "epoch": 1.88, "grad_norm": 0.6239683032035828, "learning_rate": 0.00018508651936620504, "loss": 2.9728, "step": 38267 }, { "epoch": 1.88, "grad_norm": 0.7023791074752808, "learning_rate": 0.00018507229759903436, "loss": 2.9814, "step": 38268 }, { "epoch": 1.88, "grad_norm": 0.7200928330421448, "learning_rate": 0.00018505807613456116, "loss": 2.8517, "step": 38269 }, { "epoch": 1.88, "grad_norm": 0.6196789741516113, "learning_rate": 0.00018504385497282281, "loss": 3.0157, "step": 38270 }, { "epoch": 1.88, "grad_norm": 0.6774440407752991, "learning_rate": 0.00018502963411385688, "loss": 2.9213, "step": 38271 }, { "epoch": 1.88, "grad_norm": 0.6709611415863037, "learning_rate": 0.00018501541355770074, "loss": 3.0191, "step": 38272 }, { "epoch": 1.88, "grad_norm": 0.6436893343925476, "learning_rate": 0.00018500119330439182, "loss": 3.0043, "step": 38273 }, { "epoch": 1.88, "grad_norm": 0.6333373785018921, "learning_rate": 0.00018498697335396773, "loss": 2.9307, "step": 38274 }, { "epoch": 1.88, "grad_norm": 0.6157318949699402, "learning_rate": 0.00018497275370646575, "loss": 3.2422, "step": 38275 }, { "epoch": 1.88, "grad_norm": 0.6362408995628357, "learning_rate": 0.00018495853436192348, "loss": 2.9209, "step": 38276 }, { "epoch": 1.88, "grad_norm": 0.6697286367416382, "learning_rate": 0.00018494431532037816, "loss": 2.9718, "step": 38277 }, { "epoch": 1.88, "grad_norm": 0.6749752759933472, "learning_rate": 0.00018493009658186743, "loss": 2.9866, "step": 38278 }, { "epoch": 1.88, "grad_norm": 0.6495433449745178, "learning_rate": 0.00018491587814642874, "loss": 3.0554, "step": 38279 }, { "epoch": 1.88, "grad_norm": 0.6848012208938599, "learning_rate": 0.00018490166001409935, "loss": 3.0164, "step": 38280 }, { "epoch": 1.88, "grad_norm": 0.7173035740852356, "learning_rate": 0.00018488744218491694, "loss": 3.077, "step": 38281 }, { "epoch": 1.88, "grad_norm": 0.6794202327728271, "learning_rate": 0.00018487322465891875, "loss": 3.0307, "step": 38282 }, { "epoch": 1.88, "grad_norm": 0.640831470489502, "learning_rate": 0.00018485900743614244, "loss": 2.995, "step": 38283 }, { "epoch": 1.88, "grad_norm": 0.6711544394493103, "learning_rate": 0.00018484479051662534, "loss": 2.9166, "step": 38284 }, { "epoch": 1.88, "grad_norm": 0.654302179813385, "learning_rate": 0.00018483057390040482, "loss": 3.2323, "step": 38285 }, { "epoch": 1.88, "grad_norm": 0.6835788488388062, "learning_rate": 0.00018481635758751852, "loss": 3.0623, "step": 38286 }, { "epoch": 1.88, "grad_norm": 0.6237308382987976, "learning_rate": 0.00018480214157800368, "loss": 3.0551, "step": 38287 }, { "epoch": 1.88, "grad_norm": 0.652237594127655, "learning_rate": 0.00018478792587189774, "loss": 2.9883, "step": 38288 }, { "epoch": 1.88, "grad_norm": 0.6164937019348145, "learning_rate": 0.0001847737104692384, "loss": 3.1875, "step": 38289 }, { "epoch": 1.88, "grad_norm": 0.6360146403312683, "learning_rate": 0.0001847594953700629, "loss": 3.3418, "step": 38290 }, { "epoch": 1.88, "grad_norm": 0.639178991317749, "learning_rate": 0.0001847452805744087, "loss": 2.9557, "step": 38291 }, { "epoch": 1.88, "grad_norm": 0.6623160243034363, "learning_rate": 0.0001847310660823132, "loss": 2.8778, "step": 38292 }, { "epoch": 1.88, "grad_norm": 0.7035412788391113, "learning_rate": 0.0001847168518938139, "loss": 3.1368, "step": 38293 }, { "epoch": 1.88, "grad_norm": 0.6533422470092773, "learning_rate": 0.00018470263800894827, "loss": 3.0838, "step": 38294 }, { "epoch": 1.88, "grad_norm": 0.7027701139450073, "learning_rate": 0.00018468842442775362, "loss": 2.8786, "step": 38295 }, { "epoch": 1.88, "grad_norm": 0.6815171241760254, "learning_rate": 0.0001846742111502676, "loss": 2.8839, "step": 38296 }, { "epoch": 1.88, "grad_norm": 0.6483194828033447, "learning_rate": 0.0001846599981765274, "loss": 2.8239, "step": 38297 }, { "epoch": 1.88, "grad_norm": 0.6512202024459839, "learning_rate": 0.00018464578550657053, "loss": 2.9617, "step": 38298 }, { "epoch": 1.88, "grad_norm": 0.6246964335441589, "learning_rate": 0.00018463157314043462, "loss": 2.7436, "step": 38299 }, { "epoch": 1.88, "grad_norm": 0.6432050466537476, "learning_rate": 0.00018461736107815675, "loss": 3.1272, "step": 38300 }, { "epoch": 1.88, "grad_norm": 0.7117769718170166, "learning_rate": 0.00018460314931977481, "loss": 2.9599, "step": 38301 }, { "epoch": 1.88, "grad_norm": 0.6452391743659973, "learning_rate": 0.0001845889378653257, "loss": 3.0305, "step": 38302 }, { "epoch": 1.88, "grad_norm": 0.6579294800758362, "learning_rate": 0.00018457472671484717, "loss": 3.053, "step": 38303 }, { "epoch": 1.88, "grad_norm": 0.5954685211181641, "learning_rate": 0.00018456051586837672, "loss": 2.9531, "step": 38304 }, { "epoch": 1.88, "grad_norm": 0.673682451248169, "learning_rate": 0.00018454630532595154, "loss": 2.987, "step": 38305 }, { "epoch": 1.88, "grad_norm": 0.6650246381759644, "learning_rate": 0.0001845320950876093, "loss": 2.9635, "step": 38306 }, { "epoch": 1.88, "grad_norm": 0.6644670367240906, "learning_rate": 0.00018451788515338715, "loss": 3.0036, "step": 38307 }, { "epoch": 1.88, "grad_norm": 0.6150769591331482, "learning_rate": 0.00018450367552332272, "loss": 3.0552, "step": 38308 }, { "epoch": 1.88, "grad_norm": 0.6673305630683899, "learning_rate": 0.00018448946619745342, "loss": 2.8861, "step": 38309 }, { "epoch": 1.88, "grad_norm": 0.6792820692062378, "learning_rate": 0.00018447525717581656, "loss": 3.0457, "step": 38310 }, { "epoch": 1.88, "grad_norm": 0.6672223210334778, "learning_rate": 0.00018446104845844976, "loss": 2.9729, "step": 38311 }, { "epoch": 1.88, "grad_norm": 0.6425515413284302, "learning_rate": 0.0001844468400453902, "loss": 2.9787, "step": 38312 }, { "epoch": 1.88, "grad_norm": 0.6276784539222717, "learning_rate": 0.00018443263193667543, "loss": 2.833, "step": 38313 }, { "epoch": 1.88, "grad_norm": 0.6771143674850464, "learning_rate": 0.00018441842413234296, "loss": 2.79, "step": 38314 }, { "epoch": 1.88, "grad_norm": 0.6400710940361023, "learning_rate": 0.00018440421663243009, "loss": 3.1405, "step": 38315 }, { "epoch": 1.88, "grad_norm": 0.6542527079582214, "learning_rate": 0.00018439000943697427, "loss": 2.8716, "step": 38316 }, { "epoch": 1.88, "grad_norm": 0.648661196231842, "learning_rate": 0.0001843758025460128, "loss": 2.9464, "step": 38317 }, { "epoch": 1.88, "grad_norm": 0.6391183733940125, "learning_rate": 0.00018436159595958332, "loss": 3.1304, "step": 38318 }, { "epoch": 1.88, "grad_norm": 0.6920697093009949, "learning_rate": 0.0001843473896777232, "loss": 2.9166, "step": 38319 }, { "epoch": 1.88, "grad_norm": 0.6402799487113953, "learning_rate": 0.00018433318370046964, "loss": 2.9813, "step": 38320 }, { "epoch": 1.88, "grad_norm": 0.6674555540084839, "learning_rate": 0.00018431897802786035, "loss": 3.056, "step": 38321 }, { "epoch": 1.88, "grad_norm": 0.6335968971252441, "learning_rate": 0.00018430477265993247, "loss": 3.0924, "step": 38322 }, { "epoch": 1.88, "grad_norm": 0.6576682329177856, "learning_rate": 0.00018429056759672362, "loss": 2.7713, "step": 38323 }, { "epoch": 1.88, "grad_norm": 0.6932410001754761, "learning_rate": 0.00018427636283827124, "loss": 3.0169, "step": 38324 }, { "epoch": 1.88, "grad_norm": 0.6399856209754944, "learning_rate": 0.0001842621583846125, "loss": 3.008, "step": 38325 }, { "epoch": 1.88, "grad_norm": 0.65628582239151, "learning_rate": 0.0001842479542357851, "loss": 3.279, "step": 38326 }, { "epoch": 1.88, "grad_norm": 0.6356684565544128, "learning_rate": 0.00018423375039182624, "loss": 2.8128, "step": 38327 }, { "epoch": 1.88, "grad_norm": 0.6769824028015137, "learning_rate": 0.00018421954685277347, "loss": 2.8816, "step": 38328 }, { "epoch": 1.88, "grad_norm": 0.6468422412872314, "learning_rate": 0.00018420534361866396, "loss": 2.7391, "step": 38329 }, { "epoch": 1.88, "grad_norm": 0.653346598148346, "learning_rate": 0.0001841911406895354, "loss": 3.1066, "step": 38330 }, { "epoch": 1.88, "grad_norm": 0.671597957611084, "learning_rate": 0.00018417693806542512, "loss": 2.9741, "step": 38331 }, { "epoch": 1.88, "grad_norm": 0.6430724859237671, "learning_rate": 0.00018416273574637043, "loss": 3.0607, "step": 38332 }, { "epoch": 1.88, "grad_norm": 0.6749132871627808, "learning_rate": 0.00018414853373240888, "loss": 3.0446, "step": 38333 }, { "epoch": 1.88, "grad_norm": 0.6767664551734924, "learning_rate": 0.00018413433202357774, "loss": 2.8677, "step": 38334 }, { "epoch": 1.88, "grad_norm": 0.633886992931366, "learning_rate": 0.0001841201306199144, "loss": 2.7959, "step": 38335 }, { "epoch": 1.88, "grad_norm": 0.6434829831123352, "learning_rate": 0.00018410592952145647, "loss": 2.8927, "step": 38336 }, { "epoch": 1.88, "grad_norm": 0.6419342160224915, "learning_rate": 0.00018409172872824116, "loss": 3.1004, "step": 38337 }, { "epoch": 1.88, "grad_norm": 0.6574919819831848, "learning_rate": 0.00018407752824030598, "loss": 3.0997, "step": 38338 }, { "epoch": 1.88, "grad_norm": 0.6425691246986389, "learning_rate": 0.00018406332805768813, "loss": 2.7971, "step": 38339 }, { "epoch": 1.88, "grad_norm": 0.677888810634613, "learning_rate": 0.0001840491281804253, "loss": 2.7808, "step": 38340 }, { "epoch": 1.88, "grad_norm": 0.6419331431388855, "learning_rate": 0.00018403492860855474, "loss": 2.9517, "step": 38341 }, { "epoch": 1.88, "grad_norm": 0.6205721497535706, "learning_rate": 0.00018402072934211377, "loss": 3.0624, "step": 38342 }, { "epoch": 1.88, "grad_norm": 0.6749025583267212, "learning_rate": 0.00018400653038114, "loss": 2.9666, "step": 38343 }, { "epoch": 1.88, "grad_norm": 0.6735125184059143, "learning_rate": 0.00018399233172567057, "loss": 2.8711, "step": 38344 }, { "epoch": 1.88, "grad_norm": 0.640764594078064, "learning_rate": 0.00018397813337574307, "loss": 3.1523, "step": 38345 }, { "epoch": 1.88, "grad_norm": 0.6429493427276611, "learning_rate": 0.00018396393533139492, "loss": 2.983, "step": 38346 }, { "epoch": 1.88, "grad_norm": 0.6589473485946655, "learning_rate": 0.0001839497375926633, "loss": 2.9291, "step": 38347 }, { "epoch": 1.88, "grad_norm": 0.6674602031707764, "learning_rate": 0.00018393554015958588, "loss": 3.0551, "step": 38348 }, { "epoch": 1.88, "grad_norm": 0.6474887132644653, "learning_rate": 0.00018392134303219984, "loss": 2.9849, "step": 38349 }, { "epoch": 1.88, "grad_norm": 0.6512155532836914, "learning_rate": 0.00018390714621054253, "loss": 3.0877, "step": 38350 }, { "epoch": 1.88, "grad_norm": 0.6206179261207581, "learning_rate": 0.00018389294969465165, "loss": 3.1422, "step": 38351 }, { "epoch": 1.88, "grad_norm": 0.6520265340805054, "learning_rate": 0.00018387875348456427, "loss": 2.8446, "step": 38352 }, { "epoch": 1.88, "grad_norm": 0.6797086596488953, "learning_rate": 0.000183864557580318, "loss": 3.1531, "step": 38353 }, { "epoch": 1.88, "grad_norm": 0.6597195863723755, "learning_rate": 0.00018385036198195, "loss": 2.9861, "step": 38354 }, { "epoch": 1.88, "grad_norm": 0.6018645167350769, "learning_rate": 0.0001838361666894979, "loss": 3.0342, "step": 38355 }, { "epoch": 1.88, "grad_norm": 0.6335331797599792, "learning_rate": 0.000183821971702999, "loss": 2.9257, "step": 38356 }, { "epoch": 1.88, "grad_norm": 0.6828946471214294, "learning_rate": 0.0001838077770224905, "loss": 2.9478, "step": 38357 }, { "epoch": 1.88, "grad_norm": 0.6436010599136353, "learning_rate": 0.00018379358264801018, "loss": 3.0097, "step": 38358 }, { "epoch": 1.88, "grad_norm": 0.6501964926719666, "learning_rate": 0.00018377938857959509, "loss": 2.8804, "step": 38359 }, { "epoch": 1.88, "grad_norm": 0.6490870118141174, "learning_rate": 0.00018376519481728263, "loss": 2.8949, "step": 38360 }, { "epoch": 1.88, "grad_norm": 0.6395012140274048, "learning_rate": 0.00018375100136111043, "loss": 3.0246, "step": 38361 }, { "epoch": 1.88, "grad_norm": 0.6163665056228638, "learning_rate": 0.00018373680821111562, "loss": 3.0793, "step": 38362 }, { "epoch": 1.88, "grad_norm": 0.635441243648529, "learning_rate": 0.00018372261536733582, "loss": 3.1323, "step": 38363 }, { "epoch": 1.88, "grad_norm": 0.6642354726791382, "learning_rate": 0.0001837084228298081, "loss": 2.957, "step": 38364 }, { "epoch": 1.88, "grad_norm": 0.666587769985199, "learning_rate": 0.00018369423059857, "loss": 3.0932, "step": 38365 }, { "epoch": 1.88, "grad_norm": 0.6363824605941772, "learning_rate": 0.00018368003867365909, "loss": 2.7929, "step": 38366 }, { "epoch": 1.88, "grad_norm": 0.6824597716331482, "learning_rate": 0.0001836658470551125, "loss": 3.0304, "step": 38367 }, { "epoch": 1.88, "grad_norm": 0.6351178288459778, "learning_rate": 0.00018365165574296766, "loss": 3.0697, "step": 38368 }, { "epoch": 1.88, "grad_norm": 0.6279469728469849, "learning_rate": 0.00018363746473726194, "loss": 2.9615, "step": 38369 }, { "epoch": 1.88, "grad_norm": 0.6463478803634644, "learning_rate": 0.00018362327403803277, "loss": 3.1272, "step": 38370 }, { "epoch": 1.88, "grad_norm": 0.6736152768135071, "learning_rate": 0.00018360908364531756, "loss": 3.0822, "step": 38371 }, { "epoch": 1.88, "grad_norm": 0.627798318862915, "learning_rate": 0.0001835948935591535, "loss": 3.2404, "step": 38372 }, { "epoch": 1.88, "grad_norm": 0.6646186709403992, "learning_rate": 0.00018358070377957824, "loss": 2.8242, "step": 38373 }, { "epoch": 1.88, "grad_norm": 0.6356680393218994, "learning_rate": 0.00018356651430662888, "loss": 2.7888, "step": 38374 }, { "epoch": 1.88, "grad_norm": 0.6446818709373474, "learning_rate": 0.0001835523251403429, "loss": 2.9223, "step": 38375 }, { "epoch": 1.88, "grad_norm": 0.7403597235679626, "learning_rate": 0.00018353813628075775, "loss": 3.102, "step": 38376 }, { "epoch": 1.88, "grad_norm": 0.6128467321395874, "learning_rate": 0.00018352394772791071, "loss": 2.79, "step": 38377 }, { "epoch": 1.88, "grad_norm": 0.6641044020652771, "learning_rate": 0.00018350975948183926, "loss": 2.8962, "step": 38378 }, { "epoch": 1.88, "grad_norm": 0.6116800308227539, "learning_rate": 0.00018349557154258056, "loss": 3.0184, "step": 38379 }, { "epoch": 1.88, "grad_norm": 0.6737449765205383, "learning_rate": 0.00018348138391017214, "loss": 3.0201, "step": 38380 }, { "epoch": 1.88, "grad_norm": 0.6416964530944824, "learning_rate": 0.0001834671965846514, "loss": 3.0682, "step": 38381 }, { "epoch": 1.88, "grad_norm": 0.6830700635910034, "learning_rate": 0.00018345300956605553, "loss": 2.9621, "step": 38382 }, { "epoch": 1.88, "grad_norm": 0.6257073283195496, "learning_rate": 0.0001834388228544221, "loss": 3.1164, "step": 38383 }, { "epoch": 1.88, "grad_norm": 0.6321331858634949, "learning_rate": 0.00018342463644978825, "loss": 3.0862, "step": 38384 }, { "epoch": 1.88, "grad_norm": 0.6541038155555725, "learning_rate": 0.00018341045035219153, "loss": 3.0973, "step": 38385 }, { "epoch": 1.88, "grad_norm": 0.691582202911377, "learning_rate": 0.00018339626456166934, "loss": 3.0136, "step": 38386 }, { "epoch": 1.88, "grad_norm": 0.6166252493858337, "learning_rate": 0.00018338207907825878, "loss": 3.0924, "step": 38387 }, { "epoch": 1.88, "grad_norm": 0.6885400414466858, "learning_rate": 0.00018336789390199757, "loss": 3.0155, "step": 38388 }, { "epoch": 1.88, "grad_norm": 0.6976376175880432, "learning_rate": 0.00018335370903292277, "loss": 3.1868, "step": 38389 }, { "epoch": 1.88, "grad_norm": 0.6312760710716248, "learning_rate": 0.0001833395244710718, "loss": 3.015, "step": 38390 }, { "epoch": 1.88, "grad_norm": 0.6591302752494812, "learning_rate": 0.0001833253402164822, "loss": 2.9672, "step": 38391 }, { "epoch": 1.88, "grad_norm": 0.647793710231781, "learning_rate": 0.00018331115626919113, "loss": 2.8549, "step": 38392 }, { "epoch": 1.88, "grad_norm": 0.6505529284477234, "learning_rate": 0.0001832969726292361, "loss": 2.9445, "step": 38393 }, { "epoch": 1.88, "grad_norm": 0.7024715542793274, "learning_rate": 0.00018328278929665424, "loss": 3.0583, "step": 38394 }, { "epoch": 1.88, "grad_norm": 0.6506431698799133, "learning_rate": 0.00018326860627148312, "loss": 2.8853, "step": 38395 }, { "epoch": 1.88, "grad_norm": 0.6874439120292664, "learning_rate": 0.0001832544235537601, "loss": 3.0919, "step": 38396 }, { "epoch": 1.88, "grad_norm": 0.6847971081733704, "learning_rate": 0.00018324024114352227, "loss": 3.0551, "step": 38397 }, { "epoch": 1.88, "grad_norm": 0.6514188647270203, "learning_rate": 0.00018322605904080738, "loss": 3.0029, "step": 38398 }, { "epoch": 1.88, "grad_norm": 0.6254274249076843, "learning_rate": 0.00018321187724565244, "loss": 2.9289, "step": 38399 }, { "epoch": 1.88, "grad_norm": 0.7166518568992615, "learning_rate": 0.00018319769575809498, "loss": 3.158, "step": 38400 }, { "epoch": 1.88, "grad_norm": 0.6519727110862732, "learning_rate": 0.00018318351457817235, "loss": 3.1565, "step": 38401 }, { "epoch": 1.88, "grad_norm": 0.6201913356781006, "learning_rate": 0.00018316933370592182, "loss": 2.9268, "step": 38402 }, { "epoch": 1.88, "grad_norm": 0.6431904435157776, "learning_rate": 0.00018315515314138085, "loss": 2.8744, "step": 38403 }, { "epoch": 1.88, "grad_norm": 0.6314598917961121, "learning_rate": 0.0001831409728845866, "loss": 3.0051, "step": 38404 }, { "epoch": 1.88, "grad_norm": 0.6558628082275391, "learning_rate": 0.0001831267929355765, "loss": 2.7542, "step": 38405 }, { "epoch": 1.88, "grad_norm": 0.6240569353103638, "learning_rate": 0.0001831126132943881, "loss": 3.0997, "step": 38406 }, { "epoch": 1.88, "grad_norm": 0.6257813572883606, "learning_rate": 0.00018309843396105852, "loss": 2.8727, "step": 38407 }, { "epoch": 1.88, "grad_norm": 0.6663781404495239, "learning_rate": 0.0001830842549356252, "loss": 2.9911, "step": 38408 }, { "epoch": 1.88, "grad_norm": 0.634472131729126, "learning_rate": 0.00018307007621812534, "loss": 3.0498, "step": 38409 }, { "epoch": 1.88, "grad_norm": 0.6562722325325012, "learning_rate": 0.0001830558978085965, "loss": 2.6464, "step": 38410 }, { "epoch": 1.88, "grad_norm": 0.6011884212493896, "learning_rate": 0.0001830417197070759, "loss": 2.8752, "step": 38411 }, { "epoch": 1.88, "grad_norm": 0.6544702053070068, "learning_rate": 0.00018302754191360082, "loss": 2.94, "step": 38412 }, { "epoch": 1.88, "grad_norm": 0.6202430129051208, "learning_rate": 0.00018301336442820882, "loss": 3.1568, "step": 38413 }, { "epoch": 1.88, "grad_norm": 0.6908934712409973, "learning_rate": 0.00018299918725093702, "loss": 3.1212, "step": 38414 }, { "epoch": 1.88, "grad_norm": 0.6435255408287048, "learning_rate": 0.0001829850103818229, "loss": 2.9659, "step": 38415 }, { "epoch": 1.88, "grad_norm": 0.6239068508148193, "learning_rate": 0.00018297083382090364, "loss": 2.8303, "step": 38416 }, { "epoch": 1.88, "grad_norm": 0.6315820813179016, "learning_rate": 0.00018295665756821674, "loss": 3.0459, "step": 38417 }, { "epoch": 1.88, "grad_norm": 0.6654682755470276, "learning_rate": 0.00018294248162379953, "loss": 3.1875, "step": 38418 }, { "epoch": 1.88, "grad_norm": 0.6455845832824707, "learning_rate": 0.00018292830598768916, "loss": 3.2678, "step": 38419 }, { "epoch": 1.88, "grad_norm": 0.6427332758903503, "learning_rate": 0.00018291413065992326, "loss": 2.9188, "step": 38420 }, { "epoch": 1.88, "grad_norm": 0.6541956067085266, "learning_rate": 0.0001828999556405389, "loss": 3.0159, "step": 38421 }, { "epoch": 1.88, "grad_norm": 0.6762649416923523, "learning_rate": 0.0001828857809295735, "loss": 3.1659, "step": 38422 }, { "epoch": 1.88, "grad_norm": 0.6119546890258789, "learning_rate": 0.00018287160652706455, "loss": 2.9543, "step": 38423 }, { "epoch": 1.88, "grad_norm": 0.6221927404403687, "learning_rate": 0.00018285743243304903, "loss": 2.8137, "step": 38424 }, { "epoch": 1.88, "grad_norm": 0.6315422654151917, "learning_rate": 0.00018284325864756468, "loss": 3.1488, "step": 38425 }, { "epoch": 1.88, "grad_norm": 0.6791592240333557, "learning_rate": 0.0001828290851706486, "loss": 3.1033, "step": 38426 }, { "epoch": 1.88, "grad_norm": 0.6015139222145081, "learning_rate": 0.00018281491200233807, "loss": 2.8296, "step": 38427 }, { "epoch": 1.88, "grad_norm": 0.6524919867515564, "learning_rate": 0.00018280073914267065, "loss": 3.0343, "step": 38428 }, { "epoch": 1.88, "grad_norm": 0.663172721862793, "learning_rate": 0.00018278656659168345, "loss": 3.046, "step": 38429 }, { "epoch": 1.88, "grad_norm": 0.634985625743866, "learning_rate": 0.0001827723943494139, "loss": 3.1256, "step": 38430 }, { "epoch": 1.88, "grad_norm": 0.6688196063041687, "learning_rate": 0.00018275822241589927, "loss": 2.8842, "step": 38431 }, { "epoch": 1.88, "grad_norm": 0.6805686354637146, "learning_rate": 0.0001827440507911769, "loss": 2.8134, "step": 38432 }, { "epoch": 1.88, "grad_norm": 0.6817598342895508, "learning_rate": 0.00018272987947528423, "loss": 3.2789, "step": 38433 }, { "epoch": 1.88, "grad_norm": 0.6709921956062317, "learning_rate": 0.00018271570846825833, "loss": 3.0719, "step": 38434 }, { "epoch": 1.88, "grad_norm": 0.6306078433990479, "learning_rate": 0.00018270153777013684, "loss": 2.9929, "step": 38435 }, { "epoch": 1.88, "grad_norm": 0.6629069447517395, "learning_rate": 0.00018268736738095688, "loss": 3.0485, "step": 38436 }, { "epoch": 1.88, "grad_norm": 0.6367194652557373, "learning_rate": 0.00018267319730075573, "loss": 2.9225, "step": 38437 }, { "epoch": 1.88, "grad_norm": 0.6755772829055786, "learning_rate": 0.0001826590275295709, "loss": 2.9001, "step": 38438 }, { "epoch": 1.88, "grad_norm": 0.6794211268424988, "learning_rate": 0.0001826448580674396, "loss": 3.0339, "step": 38439 }, { "epoch": 1.88, "grad_norm": 0.7521235942840576, "learning_rate": 0.0001826306889143992, "loss": 2.8449, "step": 38440 }, { "epoch": 1.88, "grad_norm": 0.6859071254730225, "learning_rate": 0.0001826165200704868, "loss": 3.0413, "step": 38441 }, { "epoch": 1.88, "grad_norm": 0.6678243279457092, "learning_rate": 0.00018260235153574, "loss": 2.9641, "step": 38442 }, { "epoch": 1.88, "grad_norm": 0.6535446643829346, "learning_rate": 0.00018258818331019606, "loss": 3.1897, "step": 38443 }, { "epoch": 1.88, "grad_norm": 0.6140435338020325, "learning_rate": 0.00018257401539389218, "loss": 2.9456, "step": 38444 }, { "epoch": 1.88, "grad_norm": 0.6568311452865601, "learning_rate": 0.00018255984778686583, "loss": 3.0298, "step": 38445 }, { "epoch": 1.88, "grad_norm": 0.6603125333786011, "learning_rate": 0.0001825456804891541, "loss": 2.9342, "step": 38446 }, { "epoch": 1.88, "grad_norm": 0.623427152633667, "learning_rate": 0.0001825315135007945, "loss": 3.0201, "step": 38447 }, { "epoch": 1.88, "grad_norm": 0.6774618029594421, "learning_rate": 0.00018251734682182437, "loss": 3.215, "step": 38448 }, { "epoch": 1.88, "grad_norm": 0.6762425899505615, "learning_rate": 0.00018250318045228081, "loss": 2.904, "step": 38449 }, { "epoch": 1.88, "grad_norm": 0.6597325205802917, "learning_rate": 0.0001824890143922014, "loss": 3.0122, "step": 38450 }, { "epoch": 1.88, "grad_norm": 0.6491790413856506, "learning_rate": 0.00018247484864162322, "loss": 3.1313, "step": 38451 }, { "epoch": 1.88, "grad_norm": 0.6365981101989746, "learning_rate": 0.00018246068320058366, "loss": 3.0639, "step": 38452 }, { "epoch": 1.88, "grad_norm": 0.6332027912139893, "learning_rate": 0.0001824465180691201, "loss": 2.9355, "step": 38453 }, { "epoch": 1.88, "grad_norm": 0.6683153510093689, "learning_rate": 0.00018243235324726974, "loss": 3.1984, "step": 38454 }, { "epoch": 1.88, "grad_norm": 0.6562347412109375, "learning_rate": 0.00018241818873507003, "loss": 2.9301, "step": 38455 }, { "epoch": 1.88, "grad_norm": 0.6307790279388428, "learning_rate": 0.00018240402453255803, "loss": 2.8323, "step": 38456 }, { "epoch": 1.88, "grad_norm": 0.626919686794281, "learning_rate": 0.00018238986063977126, "loss": 3.0347, "step": 38457 }, { "epoch": 1.88, "grad_norm": 0.6405884027481079, "learning_rate": 0.00018237569705674704, "loss": 3.0111, "step": 38458 }, { "epoch": 1.88, "grad_norm": 0.6130412817001343, "learning_rate": 0.00018236153378352248, "loss": 3.0065, "step": 38459 }, { "epoch": 1.88, "grad_norm": 0.6970798969268799, "learning_rate": 0.00018234737082013506, "loss": 2.88, "step": 38460 }, { "epoch": 1.88, "grad_norm": 0.6523925065994263, "learning_rate": 0.00018233320816662206, "loss": 2.9462, "step": 38461 }, { "epoch": 1.88, "grad_norm": 0.6778101921081543, "learning_rate": 0.0001823190458230206, "loss": 3.0931, "step": 38462 }, { "epoch": 1.88, "grad_norm": 0.6423189640045166, "learning_rate": 0.00018230488378936826, "loss": 3.1616, "step": 38463 }, { "epoch": 1.89, "grad_norm": 0.6352372765541077, "learning_rate": 0.00018229072206570212, "loss": 2.9447, "step": 38464 }, { "epoch": 1.89, "grad_norm": 0.627549946308136, "learning_rate": 0.0001822765606520597, "loss": 3.098, "step": 38465 }, { "epoch": 1.89, "grad_norm": 0.6376739740371704, "learning_rate": 0.00018226239954847805, "loss": 3.143, "step": 38466 }, { "epoch": 1.89, "grad_norm": 0.7025358080863953, "learning_rate": 0.00018224823875499451, "loss": 3.028, "step": 38467 }, { "epoch": 1.89, "grad_norm": 0.6670701503753662, "learning_rate": 0.00018223407827164663, "loss": 2.9701, "step": 38468 }, { "epoch": 1.89, "grad_norm": 0.6625725626945496, "learning_rate": 0.00018221991809847142, "loss": 2.9864, "step": 38469 }, { "epoch": 1.89, "grad_norm": 0.6725914478302002, "learning_rate": 0.00018220575823550635, "loss": 3.1172, "step": 38470 }, { "epoch": 1.89, "grad_norm": 0.6515083909034729, "learning_rate": 0.00018219159868278852, "loss": 3.0334, "step": 38471 }, { "epoch": 1.89, "grad_norm": 0.6303300261497498, "learning_rate": 0.0001821774394403554, "loss": 2.7711, "step": 38472 }, { "epoch": 1.89, "grad_norm": 0.6697096228599548, "learning_rate": 0.0001821632805082443, "loss": 2.9665, "step": 38473 }, { "epoch": 1.89, "grad_norm": 0.657671332359314, "learning_rate": 0.00018214912188649232, "loss": 2.8763, "step": 38474 }, { "epoch": 1.89, "grad_norm": 0.6618888974189758, "learning_rate": 0.00018213496357513703, "loss": 2.7808, "step": 38475 }, { "epoch": 1.89, "grad_norm": 0.652420163154602, "learning_rate": 0.00018212080557421543, "loss": 3.0118, "step": 38476 }, { "epoch": 1.89, "grad_norm": 0.6736677289009094, "learning_rate": 0.00018210664788376491, "loss": 3.188, "step": 38477 }, { "epoch": 1.89, "grad_norm": 0.6545586585998535, "learning_rate": 0.0001820924905038229, "loss": 3.0641, "step": 38478 }, { "epoch": 1.89, "grad_norm": 0.6806058883666992, "learning_rate": 0.00018207833343442657, "loss": 2.9763, "step": 38479 }, { "epoch": 1.89, "grad_norm": 0.6442785263061523, "learning_rate": 0.00018206417667561322, "loss": 3.1018, "step": 38480 }, { "epoch": 1.89, "grad_norm": 0.6341136693954468, "learning_rate": 0.00018205002022742006, "loss": 2.9789, "step": 38481 }, { "epoch": 1.89, "grad_norm": 0.6478147506713867, "learning_rate": 0.00018203586408988446, "loss": 3.0614, "step": 38482 }, { "epoch": 1.89, "grad_norm": 0.6580110788345337, "learning_rate": 0.00018202170826304374, "loss": 2.8771, "step": 38483 }, { "epoch": 1.89, "grad_norm": 0.6745438575744629, "learning_rate": 0.00018200755274693506, "loss": 3.0367, "step": 38484 }, { "epoch": 1.89, "grad_norm": 0.6208056211471558, "learning_rate": 0.0001819933975415959, "loss": 3.0012, "step": 38485 }, { "epoch": 1.89, "grad_norm": 0.6185520887374878, "learning_rate": 0.00018197924264706325, "loss": 2.8297, "step": 38486 }, { "epoch": 1.89, "grad_norm": 0.6431528925895691, "learning_rate": 0.00018196508806337476, "loss": 3.266, "step": 38487 }, { "epoch": 1.89, "grad_norm": 0.660499095916748, "learning_rate": 0.0001819509337905674, "loss": 2.9249, "step": 38488 }, { "epoch": 1.89, "grad_norm": 0.636012077331543, "learning_rate": 0.00018193677982867847, "loss": 3.2491, "step": 38489 }, { "epoch": 1.89, "grad_norm": 0.6246479153633118, "learning_rate": 0.00018192262617774553, "loss": 2.9983, "step": 38490 }, { "epoch": 1.89, "grad_norm": 0.6834371089935303, "learning_rate": 0.00018190847283780555, "loss": 2.9373, "step": 38491 }, { "epoch": 1.89, "grad_norm": 0.6637904047966003, "learning_rate": 0.000181894319808896, "loss": 2.8806, "step": 38492 }, { "epoch": 1.89, "grad_norm": 0.6326305270195007, "learning_rate": 0.000181880167091054, "loss": 3.0709, "step": 38493 }, { "epoch": 1.89, "grad_norm": 0.6314154267311096, "learning_rate": 0.00018186601468431695, "loss": 2.9915, "step": 38494 }, { "epoch": 1.89, "grad_norm": 0.6132782697677612, "learning_rate": 0.00018185186258872212, "loss": 3.0361, "step": 38495 }, { "epoch": 1.89, "grad_norm": 0.6412111520767212, "learning_rate": 0.0001818377108043066, "loss": 2.7906, "step": 38496 }, { "epoch": 1.89, "grad_norm": 0.6674846410751343, "learning_rate": 0.000181823559331108, "loss": 2.857, "step": 38497 }, { "epoch": 1.89, "grad_norm": 0.8719302415847778, "learning_rate": 0.00018180940816916327, "loss": 2.9331, "step": 38498 }, { "epoch": 1.89, "grad_norm": 0.6447135210037231, "learning_rate": 0.00018179525731850982, "loss": 3.0817, "step": 38499 }, { "epoch": 1.89, "grad_norm": 0.6724530458450317, "learning_rate": 0.000181781106779185, "loss": 2.9817, "step": 38500 }, { "epoch": 1.89, "grad_norm": 0.6975722908973694, "learning_rate": 0.00018176695655122596, "loss": 3.1141, "step": 38501 }, { "epoch": 1.89, "grad_norm": 0.6536849141120911, "learning_rate": 0.00018175280663467003, "loss": 2.867, "step": 38502 }, { "epoch": 1.89, "grad_norm": 0.6566682457923889, "learning_rate": 0.0001817386570295543, "loss": 2.8168, "step": 38503 }, { "epoch": 1.89, "grad_norm": 0.7100996971130371, "learning_rate": 0.0001817245077359163, "loss": 2.9156, "step": 38504 }, { "epoch": 1.89, "grad_norm": 0.6142645478248596, "learning_rate": 0.00018171035875379324, "loss": 3.0425, "step": 38505 }, { "epoch": 1.89, "grad_norm": 0.6454697251319885, "learning_rate": 0.0001816962100832222, "loss": 3.0476, "step": 38506 }, { "epoch": 1.89, "grad_norm": 0.6202601790428162, "learning_rate": 0.0001816820617242407, "loss": 2.9302, "step": 38507 }, { "epoch": 1.89, "grad_norm": 0.6279861927032471, "learning_rate": 0.00018166791367688574, "loss": 2.9165, "step": 38508 }, { "epoch": 1.89, "grad_norm": 0.6671816110610962, "learning_rate": 0.0001816537659411948, "loss": 2.832, "step": 38509 }, { "epoch": 1.89, "grad_norm": 0.6781206130981445, "learning_rate": 0.00018163961851720513, "loss": 2.9789, "step": 38510 }, { "epoch": 1.89, "grad_norm": 0.6522396802902222, "learning_rate": 0.0001816254714049538, "loss": 2.8528, "step": 38511 }, { "epoch": 1.89, "grad_norm": 0.643043577671051, "learning_rate": 0.0001816113246044783, "loss": 2.8771, "step": 38512 }, { "epoch": 1.89, "grad_norm": 0.6434921026229858, "learning_rate": 0.00018159717811581575, "loss": 2.9918, "step": 38513 }, { "epoch": 1.89, "grad_norm": 0.6609666347503662, "learning_rate": 0.00018158303193900336, "loss": 3.013, "step": 38514 }, { "epoch": 1.89, "grad_norm": 0.6703172326087952, "learning_rate": 0.0001815688860740786, "loss": 3.0498, "step": 38515 }, { "epoch": 1.89, "grad_norm": 0.6868824362754822, "learning_rate": 0.00018155474052107857, "loss": 3.0622, "step": 38516 }, { "epoch": 1.89, "grad_norm": 0.632976770401001, "learning_rate": 0.00018154059528004057, "loss": 3.2264, "step": 38517 }, { "epoch": 1.89, "grad_norm": 0.7155187129974365, "learning_rate": 0.00018152645035100177, "loss": 2.9952, "step": 38518 }, { "epoch": 1.89, "grad_norm": 0.6054431796073914, "learning_rate": 0.00018151230573399955, "loss": 2.9829, "step": 38519 }, { "epoch": 1.89, "grad_norm": 0.6408286690711975, "learning_rate": 0.00018149816142907114, "loss": 2.8755, "step": 38520 }, { "epoch": 1.89, "grad_norm": 0.6466242074966431, "learning_rate": 0.00018148401743625368, "loss": 3.2028, "step": 38521 }, { "epoch": 1.89, "grad_norm": 0.6467182636260986, "learning_rate": 0.00018146987375558462, "loss": 2.9694, "step": 38522 }, { "epoch": 1.89, "grad_norm": 0.6525686979293823, "learning_rate": 0.00018145573038710105, "loss": 2.9897, "step": 38523 }, { "epoch": 1.89, "grad_norm": 0.6323734521865845, "learning_rate": 0.00018144158733084023, "loss": 2.8444, "step": 38524 }, { "epoch": 1.89, "grad_norm": 0.6349821090698242, "learning_rate": 0.00018142744458683952, "loss": 3.0805, "step": 38525 }, { "epoch": 1.89, "grad_norm": 0.6364793181419373, "learning_rate": 0.000181413302155136, "loss": 2.8423, "step": 38526 }, { "epoch": 1.89, "grad_norm": 0.6155493259429932, "learning_rate": 0.00018139916003576722, "loss": 3.0647, "step": 38527 }, { "epoch": 1.89, "grad_norm": 0.62147456407547, "learning_rate": 0.00018138501822877003, "loss": 2.977, "step": 38528 }, { "epoch": 1.89, "grad_norm": 0.6351068615913391, "learning_rate": 0.00018137087673418184, "loss": 3.0379, "step": 38529 }, { "epoch": 1.89, "grad_norm": 0.6419429779052734, "learning_rate": 0.00018135673555204012, "loss": 2.8855, "step": 38530 }, { "epoch": 1.89, "grad_norm": 0.6373555660247803, "learning_rate": 0.00018134259468238182, "loss": 2.8341, "step": 38531 }, { "epoch": 1.89, "grad_norm": 0.687985360622406, "learning_rate": 0.0001813284541252444, "loss": 2.7618, "step": 38532 }, { "epoch": 1.89, "grad_norm": 0.6934620141983032, "learning_rate": 0.0001813143138806648, "loss": 2.9882, "step": 38533 }, { "epoch": 1.89, "grad_norm": 0.6637023687362671, "learning_rate": 0.00018130017394868056, "loss": 3.0607, "step": 38534 }, { "epoch": 1.89, "grad_norm": 0.6849845051765442, "learning_rate": 0.0001812860343293289, "loss": 2.9292, "step": 38535 }, { "epoch": 1.89, "grad_norm": 0.6725578308105469, "learning_rate": 0.0001812718950226468, "loss": 3.3626, "step": 38536 }, { "epoch": 1.89, "grad_norm": 0.6605555415153503, "learning_rate": 0.0001812577560286718, "loss": 2.9055, "step": 38537 }, { "epoch": 1.89, "grad_norm": 0.6455115079879761, "learning_rate": 0.000181243617347441, "loss": 2.9905, "step": 38538 }, { "epoch": 1.89, "grad_norm": 0.6488058567047119, "learning_rate": 0.0001812294789789916, "loss": 2.8396, "step": 38539 }, { "epoch": 1.89, "grad_norm": 0.6056329011917114, "learning_rate": 0.00018121534092336097, "loss": 2.8001, "step": 38540 }, { "epoch": 1.89, "grad_norm": 0.6151305437088013, "learning_rate": 0.00018120120318058624, "loss": 3.2172, "step": 38541 }, { "epoch": 1.89, "grad_norm": 0.6283738613128662, "learning_rate": 0.00018118706575070474, "loss": 2.85, "step": 38542 }, { "epoch": 1.89, "grad_norm": 0.6253724098205566, "learning_rate": 0.00018117292863375353, "loss": 2.6845, "step": 38543 }, { "epoch": 1.89, "grad_norm": 0.6234411597251892, "learning_rate": 0.00018115879182977, "loss": 3.0064, "step": 38544 }, { "epoch": 1.89, "grad_norm": 0.6679108142852783, "learning_rate": 0.00018114465533879142, "loss": 2.8951, "step": 38545 }, { "epoch": 1.89, "grad_norm": 0.6431871056556702, "learning_rate": 0.0001811305191608548, "loss": 2.9186, "step": 38546 }, { "epoch": 1.89, "grad_norm": 0.6422107219696045, "learning_rate": 0.00018111638329599763, "loss": 2.9075, "step": 38547 }, { "epoch": 1.89, "grad_norm": 0.6774196028709412, "learning_rate": 0.00018110224774425696, "loss": 2.915, "step": 38548 }, { "epoch": 1.89, "grad_norm": 0.6358468532562256, "learning_rate": 0.00018108811250567005, "loss": 2.9972, "step": 38549 }, { "epoch": 1.89, "grad_norm": 0.6383891701698303, "learning_rate": 0.00018107397758027432, "loss": 2.936, "step": 38550 }, { "epoch": 1.89, "grad_norm": 0.6550706028938293, "learning_rate": 0.00018105984296810668, "loss": 3.0692, "step": 38551 }, { "epoch": 1.89, "grad_norm": 0.6626957654953003, "learning_rate": 0.00018104570866920466, "loss": 3.0563, "step": 38552 }, { "epoch": 1.89, "grad_norm": 0.6499350666999817, "learning_rate": 0.00018103157468360526, "loss": 3.0258, "step": 38553 }, { "epoch": 1.89, "grad_norm": 0.6834374666213989, "learning_rate": 0.00018101744101134577, "loss": 2.8288, "step": 38554 }, { "epoch": 1.89, "grad_norm": 0.5994073152542114, "learning_rate": 0.00018100330765246356, "loss": 3.1779, "step": 38555 }, { "epoch": 1.89, "grad_norm": 0.675519585609436, "learning_rate": 0.0001809891746069957, "loss": 2.8748, "step": 38556 }, { "epoch": 1.89, "grad_norm": 0.624893069267273, "learning_rate": 0.00018097504187497944, "loss": 3.2098, "step": 38557 }, { "epoch": 1.89, "grad_norm": 0.637237548828125, "learning_rate": 0.0001809609094564519, "loss": 2.9335, "step": 38558 }, { "epoch": 1.89, "grad_norm": 0.6514628529548645, "learning_rate": 0.00018094677735145057, "loss": 3.2457, "step": 38559 }, { "epoch": 1.89, "grad_norm": 0.6226351857185364, "learning_rate": 0.0001809326455600125, "loss": 2.851, "step": 38560 }, { "epoch": 1.89, "grad_norm": 0.6673992276191711, "learning_rate": 0.0001809185140821748, "loss": 3.0208, "step": 38561 }, { "epoch": 1.89, "grad_norm": 0.6154362559318542, "learning_rate": 0.00018090438291797496, "loss": 2.9796, "step": 38562 }, { "epoch": 1.89, "grad_norm": 0.6860130429267883, "learning_rate": 0.00018089025206744999, "loss": 2.9236, "step": 38563 }, { "epoch": 1.89, "grad_norm": 0.7032354474067688, "learning_rate": 0.00018087612153063723, "loss": 2.9412, "step": 38564 }, { "epoch": 1.89, "grad_norm": 0.6549293994903564, "learning_rate": 0.00018086199130757375, "loss": 3.201, "step": 38565 }, { "epoch": 1.89, "grad_norm": 0.6441744565963745, "learning_rate": 0.00018084786139829686, "loss": 2.9833, "step": 38566 }, { "epoch": 1.89, "grad_norm": 0.6373230218887329, "learning_rate": 0.00018083373180284386, "loss": 2.9114, "step": 38567 }, { "epoch": 1.89, "grad_norm": 0.7155922651290894, "learning_rate": 0.00018081960252125178, "loss": 2.8217, "step": 38568 }, { "epoch": 1.89, "grad_norm": 0.6718721985816956, "learning_rate": 0.00018080547355355802, "loss": 2.8134, "step": 38569 }, { "epoch": 1.89, "grad_norm": 0.7212534546852112, "learning_rate": 0.00018079134489979958, "loss": 2.8709, "step": 38570 }, { "epoch": 1.89, "grad_norm": 0.6554144024848938, "learning_rate": 0.0001807772165600139, "loss": 2.8055, "step": 38571 }, { "epoch": 1.89, "grad_norm": 0.6695471405982971, "learning_rate": 0.0001807630885342381, "loss": 2.8279, "step": 38572 }, { "epoch": 1.89, "grad_norm": 0.696575403213501, "learning_rate": 0.00018074896082250928, "loss": 3.1577, "step": 38573 }, { "epoch": 1.89, "grad_norm": 0.6690512895584106, "learning_rate": 0.00018073483342486481, "loss": 3.0392, "step": 38574 }, { "epoch": 1.89, "grad_norm": 0.6713618040084839, "learning_rate": 0.00018072070634134185, "loss": 3.108, "step": 38575 }, { "epoch": 1.89, "grad_norm": 0.6604920625686646, "learning_rate": 0.00018070657957197753, "loss": 2.8892, "step": 38576 }, { "epoch": 1.89, "grad_norm": 0.6354912519454956, "learning_rate": 0.0001806924531168092, "loss": 3.2366, "step": 38577 }, { "epoch": 1.89, "grad_norm": 0.6330511569976807, "learning_rate": 0.00018067832697587393, "loss": 2.9807, "step": 38578 }, { "epoch": 1.89, "grad_norm": 0.6298176646232605, "learning_rate": 0.00018066420114920906, "loss": 3.1927, "step": 38579 }, { "epoch": 1.89, "grad_norm": 0.6028285622596741, "learning_rate": 0.0001806500756368516, "loss": 3.075, "step": 38580 }, { "epoch": 1.89, "grad_norm": 0.6283663511276245, "learning_rate": 0.00018063595043883894, "loss": 2.7543, "step": 38581 }, { "epoch": 1.89, "grad_norm": 0.6311411261558533, "learning_rate": 0.00018062182555520824, "loss": 3.0354, "step": 38582 }, { "epoch": 1.89, "grad_norm": 0.708346426486969, "learning_rate": 0.00018060770098599657, "loss": 3.1341, "step": 38583 }, { "epoch": 1.89, "grad_norm": 0.6508063673973083, "learning_rate": 0.0001805935767312414, "loss": 2.8863, "step": 38584 }, { "epoch": 1.89, "grad_norm": 0.6476802825927734, "learning_rate": 0.00018057945279097964, "loss": 2.9496, "step": 38585 }, { "epoch": 1.89, "grad_norm": 0.6408866047859192, "learning_rate": 0.0001805653291652486, "loss": 3.0421, "step": 38586 }, { "epoch": 1.89, "grad_norm": 0.6279966831207275, "learning_rate": 0.0001805512058540856, "loss": 3.1745, "step": 38587 }, { "epoch": 1.89, "grad_norm": 0.6622072458267212, "learning_rate": 0.00018053708285752763, "loss": 2.9178, "step": 38588 }, { "epoch": 1.89, "grad_norm": 0.6587921977043152, "learning_rate": 0.00018052296017561213, "loss": 3.1572, "step": 38589 }, { "epoch": 1.89, "grad_norm": 0.6754596829414368, "learning_rate": 0.00018050883780837607, "loss": 2.918, "step": 38590 }, { "epoch": 1.89, "grad_norm": 0.6614739298820496, "learning_rate": 0.0001804947157558567, "loss": 2.8633, "step": 38591 }, { "epoch": 1.89, "grad_norm": 0.6809832453727722, "learning_rate": 0.00018048059401809134, "loss": 2.962, "step": 38592 }, { "epoch": 1.89, "grad_norm": 0.6957449316978455, "learning_rate": 0.00018046647259511704, "loss": 2.9203, "step": 38593 }, { "epoch": 1.89, "grad_norm": 0.6387645602226257, "learning_rate": 0.0001804523514869711, "loss": 3.0615, "step": 38594 }, { "epoch": 1.89, "grad_norm": 0.6960155963897705, "learning_rate": 0.00018043823069369057, "loss": 2.8501, "step": 38595 }, { "epoch": 1.89, "grad_norm": 0.6919668316841125, "learning_rate": 0.00018042411021531273, "loss": 3.0659, "step": 38596 }, { "epoch": 1.89, "grad_norm": 0.6586015820503235, "learning_rate": 0.0001804099900518749, "loss": 3.0833, "step": 38597 }, { "epoch": 1.89, "grad_norm": 0.6569781303405762, "learning_rate": 0.000180395870203414, "loss": 3.1045, "step": 38598 }, { "epoch": 1.89, "grad_norm": 0.6461341977119446, "learning_rate": 0.00018038175066996745, "loss": 3.1601, "step": 38599 }, { "epoch": 1.89, "grad_norm": 0.6257455945014954, "learning_rate": 0.0001803676314515723, "loss": 2.9019, "step": 38600 }, { "epoch": 1.89, "grad_norm": 0.6397156119346619, "learning_rate": 0.00018035351254826575, "loss": 2.9765, "step": 38601 }, { "epoch": 1.89, "grad_norm": 0.6896104216575623, "learning_rate": 0.00018033939396008513, "loss": 3.2906, "step": 38602 }, { "epoch": 1.89, "grad_norm": 0.6564487814903259, "learning_rate": 0.00018032527568706745, "loss": 2.951, "step": 38603 }, { "epoch": 1.89, "grad_norm": 0.6752212047576904, "learning_rate": 0.00018031115772925005, "loss": 2.8648, "step": 38604 }, { "epoch": 1.89, "grad_norm": 0.6492764353752136, "learning_rate": 0.00018029704008666988, "loss": 3.0707, "step": 38605 }, { "epoch": 1.89, "grad_norm": 0.668976366519928, "learning_rate": 0.00018028292275936431, "loss": 3.0693, "step": 38606 }, { "epoch": 1.89, "grad_norm": 0.6379303932189941, "learning_rate": 0.00018026880574737055, "loss": 3.0406, "step": 38607 }, { "epoch": 1.89, "grad_norm": 0.8051410913467407, "learning_rate": 0.00018025468905072563, "loss": 3.0841, "step": 38608 }, { "epoch": 1.89, "grad_norm": 0.6822032928466797, "learning_rate": 0.0001802405726694669, "loss": 2.7943, "step": 38609 }, { "epoch": 1.89, "grad_norm": 0.6568723917007446, "learning_rate": 0.00018022645660363133, "loss": 2.8804, "step": 38610 }, { "epoch": 1.89, "grad_norm": 0.6965839266777039, "learning_rate": 0.00018021234085325628, "loss": 3.0882, "step": 38611 }, { "epoch": 1.89, "grad_norm": 0.6454933881759644, "learning_rate": 0.00018019822541837896, "loss": 3.1348, "step": 38612 }, { "epoch": 1.89, "grad_norm": 0.6876438856124878, "learning_rate": 0.00018018411029903632, "loss": 2.9058, "step": 38613 }, { "epoch": 1.89, "grad_norm": 0.6394537687301636, "learning_rate": 0.0001801699954952658, "loss": 2.9671, "step": 38614 }, { "epoch": 1.89, "grad_norm": 0.6840302348136902, "learning_rate": 0.00018015588100710439, "loss": 2.8984, "step": 38615 }, { "epoch": 1.89, "grad_norm": 0.6187319159507751, "learning_rate": 0.00018014176683458927, "loss": 3.1231, "step": 38616 }, { "epoch": 1.89, "grad_norm": 0.605553150177002, "learning_rate": 0.00018012765297775778, "loss": 2.8378, "step": 38617 }, { "epoch": 1.89, "grad_norm": 0.6925824880599976, "learning_rate": 0.0001801135394366469, "loss": 2.8799, "step": 38618 }, { "epoch": 1.89, "grad_norm": 0.651581883430481, "learning_rate": 0.00018009942621129398, "loss": 3.0663, "step": 38619 }, { "epoch": 1.89, "grad_norm": 0.6642777323722839, "learning_rate": 0.00018008531330173598, "loss": 2.7444, "step": 38620 }, { "epoch": 1.89, "grad_norm": 0.6268136501312256, "learning_rate": 0.00018007120070801023, "loss": 3.073, "step": 38621 }, { "epoch": 1.89, "grad_norm": 0.6606512069702148, "learning_rate": 0.00018005708843015392, "loss": 3.14, "step": 38622 }, { "epoch": 1.89, "grad_norm": 0.7048143744468689, "learning_rate": 0.000180042976468204, "loss": 2.6478, "step": 38623 }, { "epoch": 1.89, "grad_norm": 0.6552999019622803, "learning_rate": 0.00018002886482219798, "loss": 2.8626, "step": 38624 }, { "epoch": 1.89, "grad_norm": 0.6531506180763245, "learning_rate": 0.00018001475349217275, "loss": 2.9233, "step": 38625 }, { "epoch": 1.89, "grad_norm": 0.682345986366272, "learning_rate": 0.0001800006424781655, "loss": 2.9008, "step": 38626 }, { "epoch": 1.89, "grad_norm": 0.6473051905632019, "learning_rate": 0.0001799865317802136, "loss": 3.1767, "step": 38627 }, { "epoch": 1.89, "grad_norm": 0.6639640927314758, "learning_rate": 0.0001799724213983539, "loss": 2.9966, "step": 38628 }, { "epoch": 1.89, "grad_norm": 0.6246669888496399, "learning_rate": 0.00017995831133262402, "loss": 3.0385, "step": 38629 }, { "epoch": 1.89, "grad_norm": 0.6045641899108887, "learning_rate": 0.0001799442015830606, "loss": 3.017, "step": 38630 }, { "epoch": 1.89, "grad_norm": 0.7342897057533264, "learning_rate": 0.00017993009214970106, "loss": 3.1561, "step": 38631 }, { "epoch": 1.89, "grad_norm": 0.7383832335472107, "learning_rate": 0.00017991598303258266, "loss": 2.8414, "step": 38632 }, { "epoch": 1.89, "grad_norm": 0.7174133062362671, "learning_rate": 0.00017990187423174242, "loss": 2.9566, "step": 38633 }, { "epoch": 1.89, "grad_norm": 0.6822835206985474, "learning_rate": 0.00017988776574721758, "loss": 3.0601, "step": 38634 }, { "epoch": 1.89, "grad_norm": 0.6779569387435913, "learning_rate": 0.0001798736575790451, "loss": 2.7663, "step": 38635 }, { "epoch": 1.89, "grad_norm": 0.6900989413261414, "learning_rate": 0.00017985954972726237, "loss": 2.9155, "step": 38636 }, { "epoch": 1.89, "grad_norm": 0.6946017146110535, "learning_rate": 0.00017984544219190653, "loss": 3.2055, "step": 38637 }, { "epoch": 1.89, "grad_norm": 0.6267918944358826, "learning_rate": 0.00017983133497301453, "loss": 2.9432, "step": 38638 }, { "epoch": 1.89, "grad_norm": 0.7059924006462097, "learning_rate": 0.0001798172280706238, "loss": 3.08, "step": 38639 }, { "epoch": 1.89, "grad_norm": 0.6676067113876343, "learning_rate": 0.0001798031214847713, "loss": 2.9552, "step": 38640 }, { "epoch": 1.89, "grad_norm": 0.6539041996002197, "learning_rate": 0.00017978901521549428, "loss": 3.0205, "step": 38641 }, { "epoch": 1.89, "grad_norm": 0.6838967204093933, "learning_rate": 0.0001797749092628298, "loss": 2.9309, "step": 38642 }, { "epoch": 1.89, "grad_norm": 0.6737627983093262, "learning_rate": 0.0001797608036268151, "loss": 3.1568, "step": 38643 }, { "epoch": 1.89, "grad_norm": 0.6425014734268188, "learning_rate": 0.00017974669830748735, "loss": 3.0889, "step": 38644 }, { "epoch": 1.89, "grad_norm": 0.6968850493431091, "learning_rate": 0.00017973259330488354, "loss": 2.8293, "step": 38645 }, { "epoch": 1.89, "grad_norm": 0.6469689011573792, "learning_rate": 0.00017971848861904105, "loss": 2.9373, "step": 38646 }, { "epoch": 1.89, "grad_norm": 0.6483293175697327, "learning_rate": 0.00017970438424999688, "loss": 3.098, "step": 38647 }, { "epoch": 1.89, "grad_norm": 0.6595166325569153, "learning_rate": 0.0001796902801977881, "loss": 3.055, "step": 38648 }, { "epoch": 1.89, "grad_norm": 0.6388629078865051, "learning_rate": 0.00017967617646245213, "loss": 2.9905, "step": 38649 }, { "epoch": 1.89, "grad_norm": 0.6459506750106812, "learning_rate": 0.00017966207304402583, "loss": 2.9324, "step": 38650 }, { "epoch": 1.89, "grad_norm": 0.7083884477615356, "learning_rate": 0.00017964796994254656, "loss": 2.7932, "step": 38651 }, { "epoch": 1.89, "grad_norm": 0.5959693193435669, "learning_rate": 0.00017963386715805137, "loss": 2.8373, "step": 38652 }, { "epoch": 1.89, "grad_norm": 0.6729698181152344, "learning_rate": 0.0001796197646905773, "loss": 3.1081, "step": 38653 }, { "epoch": 1.89, "grad_norm": 0.6342985033988953, "learning_rate": 0.00017960566254016176, "loss": 3.0374, "step": 38654 }, { "epoch": 1.89, "grad_norm": 0.7330729365348816, "learning_rate": 0.00017959156070684163, "loss": 3.2231, "step": 38655 }, { "epoch": 1.89, "grad_norm": 0.6425516605377197, "learning_rate": 0.00017957745919065427, "loss": 2.9848, "step": 38656 }, { "epoch": 1.89, "grad_norm": 0.6559476256370544, "learning_rate": 0.00017956335799163654, "loss": 3.0129, "step": 38657 }, { "epoch": 1.89, "grad_norm": 0.7110540866851807, "learning_rate": 0.00017954925710982582, "loss": 3.008, "step": 38658 }, { "epoch": 1.89, "grad_norm": 0.6435421109199524, "learning_rate": 0.00017953515654525924, "loss": 2.9261, "step": 38659 }, { "epoch": 1.89, "grad_norm": 0.6702364683151245, "learning_rate": 0.00017952105629797375, "loss": 2.9763, "step": 38660 }, { "epoch": 1.89, "grad_norm": 0.645945131778717, "learning_rate": 0.00017950695636800672, "loss": 3.0355, "step": 38661 }, { "epoch": 1.89, "grad_norm": 0.6295376420021057, "learning_rate": 0.00017949285675539515, "loss": 3.0032, "step": 38662 }, { "epoch": 1.89, "grad_norm": 0.640484631061554, "learning_rate": 0.0001794787574601761, "loss": 3.0606, "step": 38663 }, { "epoch": 1.89, "grad_norm": 0.6546970009803772, "learning_rate": 0.00017946465848238697, "loss": 2.9448, "step": 38664 }, { "epoch": 1.89, "grad_norm": 0.6797558665275574, "learning_rate": 0.00017945055982206465, "loss": 2.8821, "step": 38665 }, { "epoch": 1.89, "grad_norm": 0.6290567517280579, "learning_rate": 0.00017943646147924639, "loss": 2.9998, "step": 38666 }, { "epoch": 1.89, "grad_norm": 0.6903430819511414, "learning_rate": 0.0001794223634539692, "loss": 3.0059, "step": 38667 }, { "epoch": 1.9, "grad_norm": 0.7165091037750244, "learning_rate": 0.00017940826574627038, "loss": 2.9054, "step": 38668 }, { "epoch": 1.9, "grad_norm": 0.6272432208061218, "learning_rate": 0.00017939416835618702, "loss": 3.0671, "step": 38669 }, { "epoch": 1.9, "grad_norm": 0.6418277621269226, "learning_rate": 0.00017938007128375609, "loss": 2.8885, "step": 38670 }, { "epoch": 1.9, "grad_norm": 0.671662449836731, "learning_rate": 0.00017936597452901493, "loss": 2.9865, "step": 38671 }, { "epoch": 1.9, "grad_norm": 0.6581867337226868, "learning_rate": 0.00017935187809200046, "loss": 2.9492, "step": 38672 }, { "epoch": 1.9, "grad_norm": 0.6353850364685059, "learning_rate": 0.00017933778197275002, "loss": 2.9451, "step": 38673 }, { "epoch": 1.9, "grad_norm": 0.6606138348579407, "learning_rate": 0.0001793236861713007, "loss": 2.9165, "step": 38674 }, { "epoch": 1.9, "grad_norm": 0.7428664565086365, "learning_rate": 0.00017930959068768944, "loss": 2.9639, "step": 38675 }, { "epoch": 1.9, "grad_norm": 0.6777299046516418, "learning_rate": 0.0001792954955219536, "loss": 3.3009, "step": 38676 }, { "epoch": 1.9, "grad_norm": 0.639334499835968, "learning_rate": 0.0001792814006741301, "loss": 2.9518, "step": 38677 }, { "epoch": 1.9, "grad_norm": 0.6822625994682312, "learning_rate": 0.00017926730614425615, "loss": 3.0377, "step": 38678 }, { "epoch": 1.9, "grad_norm": 0.6702042818069458, "learning_rate": 0.00017925321193236897, "loss": 2.9242, "step": 38679 }, { "epoch": 1.9, "grad_norm": 0.6273990273475647, "learning_rate": 0.0001792391180385056, "loss": 2.8685, "step": 38680 }, { "epoch": 1.9, "grad_norm": 0.6678882837295532, "learning_rate": 0.00017922502446270314, "loss": 2.9388, "step": 38681 }, { "epoch": 1.9, "grad_norm": 0.6841011047363281, "learning_rate": 0.0001792109312049986, "loss": 3.025, "step": 38682 }, { "epoch": 1.9, "grad_norm": 0.6396910548210144, "learning_rate": 0.0001791968382654293, "loss": 3.0088, "step": 38683 }, { "epoch": 1.9, "grad_norm": 0.63197261095047, "learning_rate": 0.00017918274564403236, "loss": 3.0014, "step": 38684 }, { "epoch": 1.9, "grad_norm": 0.6431912779808044, "learning_rate": 0.00017916865334084464, "loss": 3.0681, "step": 38685 }, { "epoch": 1.9, "grad_norm": 0.6578310132026672, "learning_rate": 0.00017915456135590363, "loss": 2.9865, "step": 38686 }, { "epoch": 1.9, "grad_norm": 0.6344066262245178, "learning_rate": 0.00017914046968924612, "loss": 2.8682, "step": 38687 }, { "epoch": 1.9, "grad_norm": 0.6525806784629822, "learning_rate": 0.00017912637834090932, "loss": 2.86, "step": 38688 }, { "epoch": 1.9, "grad_norm": 0.6194393038749695, "learning_rate": 0.00017911228731093052, "loss": 2.8638, "step": 38689 }, { "epoch": 1.9, "grad_norm": 0.6648958325386047, "learning_rate": 0.0001790981965993465, "loss": 2.9185, "step": 38690 }, { "epoch": 1.9, "grad_norm": 0.6630010008811951, "learning_rate": 0.0001790841062061948, "loss": 2.8877, "step": 38691 }, { "epoch": 1.9, "grad_norm": 0.643262505531311, "learning_rate": 0.00017907001613151212, "loss": 2.8769, "step": 38692 }, { "epoch": 1.9, "grad_norm": 0.6801116466522217, "learning_rate": 0.00017905592637533572, "loss": 2.9138, "step": 38693 }, { "epoch": 1.9, "grad_norm": 0.6177447438240051, "learning_rate": 0.00017904183693770285, "loss": 3.2762, "step": 38694 }, { "epoch": 1.9, "grad_norm": 0.6487329602241516, "learning_rate": 0.00017902774781865043, "loss": 2.8668, "step": 38695 }, { "epoch": 1.9, "grad_norm": 0.6444410085678101, "learning_rate": 0.00017901365901821573, "loss": 3.0458, "step": 38696 }, { "epoch": 1.9, "grad_norm": 0.6466326713562012, "learning_rate": 0.00017899957053643567, "loss": 3.0129, "step": 38697 }, { "epoch": 1.9, "grad_norm": 0.6659789085388184, "learning_rate": 0.00017898548237334748, "loss": 3.0764, "step": 38698 }, { "epoch": 1.9, "grad_norm": 0.6836056709289551, "learning_rate": 0.0001789713945289883, "loss": 2.9231, "step": 38699 }, { "epoch": 1.9, "grad_norm": 0.6317378282546997, "learning_rate": 0.00017895730700339508, "loss": 2.9773, "step": 38700 }, { "epoch": 1.9, "grad_norm": 0.7079015374183655, "learning_rate": 0.0001789432197966051, "loss": 3.0086, "step": 38701 }, { "epoch": 1.9, "grad_norm": 0.6322833895683289, "learning_rate": 0.00017892913290865533, "loss": 2.9109, "step": 38702 }, { "epoch": 1.9, "grad_norm": 0.620879590511322, "learning_rate": 0.00017891504633958286, "loss": 3.1562, "step": 38703 }, { "epoch": 1.9, "grad_norm": 0.6418386697769165, "learning_rate": 0.00017890096008942502, "loss": 3.0976, "step": 38704 }, { "epoch": 1.9, "grad_norm": 0.6648834943771362, "learning_rate": 0.00017888687415821864, "loss": 3.162, "step": 38705 }, { "epoch": 1.9, "grad_norm": 0.6503156423568726, "learning_rate": 0.000178872788546001, "loss": 2.9803, "step": 38706 }, { "epoch": 1.9, "grad_norm": 0.6655746698379517, "learning_rate": 0.000178858703252809, "loss": 2.8608, "step": 38707 }, { "epoch": 1.9, "grad_norm": 0.7488763928413391, "learning_rate": 0.0001788446182786799, "loss": 3.0323, "step": 38708 }, { "epoch": 1.9, "grad_norm": 0.6233779788017273, "learning_rate": 0.00017883053362365088, "loss": 3.0267, "step": 38709 }, { "epoch": 1.9, "grad_norm": 0.6482000946998596, "learning_rate": 0.0001788164492877587, "loss": 2.9888, "step": 38710 }, { "epoch": 1.9, "grad_norm": 0.6482628583908081, "learning_rate": 0.00017880236527104086, "loss": 2.9677, "step": 38711 }, { "epoch": 1.9, "grad_norm": 0.6418460011482239, "learning_rate": 0.00017878828157353417, "loss": 2.7318, "step": 38712 }, { "epoch": 1.9, "grad_norm": 0.6458458304405212, "learning_rate": 0.00017877419819527582, "loss": 3.0026, "step": 38713 }, { "epoch": 1.9, "grad_norm": 0.6432719826698303, "learning_rate": 0.00017876011513630297, "loss": 3.0038, "step": 38714 }, { "epoch": 1.9, "grad_norm": 0.6705681085586548, "learning_rate": 0.00017874603239665255, "loss": 2.8765, "step": 38715 }, { "epoch": 1.9, "grad_norm": 0.6631069779396057, "learning_rate": 0.00017873194997636182, "loss": 3.0594, "step": 38716 }, { "epoch": 1.9, "grad_norm": 0.6457616090774536, "learning_rate": 0.00017871786787546776, "loss": 2.9703, "step": 38717 }, { "epoch": 1.9, "grad_norm": 0.6328929662704468, "learning_rate": 0.0001787037860940076, "loss": 2.7767, "step": 38718 }, { "epoch": 1.9, "grad_norm": 0.6492588520050049, "learning_rate": 0.00017868970463201815, "loss": 3.0519, "step": 38719 }, { "epoch": 1.9, "grad_norm": 0.6573117971420288, "learning_rate": 0.0001786756234895367, "loss": 3.0632, "step": 38720 }, { "epoch": 1.9, "grad_norm": 0.6802584528923035, "learning_rate": 0.00017866154266660045, "loss": 3.218, "step": 38721 }, { "epoch": 1.9, "grad_norm": 0.6282143592834473, "learning_rate": 0.0001786474621632462, "loss": 2.9557, "step": 38722 }, { "epoch": 1.9, "grad_norm": 0.6655977964401245, "learning_rate": 0.00017863338197951126, "loss": 2.8931, "step": 38723 }, { "epoch": 1.9, "grad_norm": 0.6891822218894958, "learning_rate": 0.00017861930211543264, "loss": 2.9934, "step": 38724 }, { "epoch": 1.9, "grad_norm": 0.6471545100212097, "learning_rate": 0.0001786052225710473, "loss": 3.1537, "step": 38725 }, { "epoch": 1.9, "grad_norm": 0.6515899896621704, "learning_rate": 0.0001785911433463926, "loss": 3.0171, "step": 38726 }, { "epoch": 1.9, "grad_norm": 0.6359424591064453, "learning_rate": 0.00017857706444150535, "loss": 3.0864, "step": 38727 }, { "epoch": 1.9, "grad_norm": 0.6311108469963074, "learning_rate": 0.00017856298585642284, "loss": 3.1186, "step": 38728 }, { "epoch": 1.9, "grad_norm": 0.6485701203346252, "learning_rate": 0.00017854890759118192, "loss": 3.1168, "step": 38729 }, { "epoch": 1.9, "grad_norm": 0.6353026628494263, "learning_rate": 0.00017853482964581992, "loss": 3.0755, "step": 38730 }, { "epoch": 1.9, "grad_norm": 0.6661299467086792, "learning_rate": 0.00017852075202037378, "loss": 2.908, "step": 38731 }, { "epoch": 1.9, "grad_norm": 0.6596245765686035, "learning_rate": 0.00017850667471488053, "loss": 2.9477, "step": 38732 }, { "epoch": 1.9, "grad_norm": 0.67014080286026, "learning_rate": 0.00017849259772937743, "loss": 3.2171, "step": 38733 }, { "epoch": 1.9, "grad_norm": 0.6512858867645264, "learning_rate": 0.00017847852106390132, "loss": 2.8545, "step": 38734 }, { "epoch": 1.9, "grad_norm": 0.6994531154632568, "learning_rate": 0.00017846444471848944, "loss": 2.9238, "step": 38735 }, { "epoch": 1.9, "grad_norm": 0.6024457216262817, "learning_rate": 0.0001784503686931789, "loss": 2.9689, "step": 38736 }, { "epoch": 1.9, "grad_norm": 0.6673498153686523, "learning_rate": 0.00017843629298800655, "loss": 2.8969, "step": 38737 }, { "epoch": 1.9, "grad_norm": 0.6520382761955261, "learning_rate": 0.00017842221760300977, "loss": 3.0239, "step": 38738 }, { "epoch": 1.9, "grad_norm": 0.6899427175521851, "learning_rate": 0.0001784081425382254, "loss": 3.0428, "step": 38739 }, { "epoch": 1.9, "grad_norm": 0.6339791417121887, "learning_rate": 0.00017839406779369052, "loss": 2.8405, "step": 38740 }, { "epoch": 1.9, "grad_norm": 1.6099035739898682, "learning_rate": 0.00017837999336944236, "loss": 2.9079, "step": 38741 }, { "epoch": 1.9, "grad_norm": 0.6363784074783325, "learning_rate": 0.00017836591926551786, "loss": 3.0636, "step": 38742 }, { "epoch": 1.9, "grad_norm": 0.6817944049835205, "learning_rate": 0.00017835184548195418, "loss": 2.865, "step": 38743 }, { "epoch": 1.9, "grad_norm": 0.6493276953697205, "learning_rate": 0.0001783377720187882, "loss": 2.8713, "step": 38744 }, { "epoch": 1.9, "grad_norm": 0.6394343376159668, "learning_rate": 0.00017832369887605718, "loss": 3.0309, "step": 38745 }, { "epoch": 1.9, "grad_norm": 0.6780338883399963, "learning_rate": 0.00017830962605379813, "loss": 3.0202, "step": 38746 }, { "epoch": 1.9, "grad_norm": 0.6320281028747559, "learning_rate": 0.00017829555355204803, "loss": 2.8572, "step": 38747 }, { "epoch": 1.9, "grad_norm": 0.6657953858375549, "learning_rate": 0.00017828148137084411, "loss": 3.0652, "step": 38748 }, { "epoch": 1.9, "grad_norm": 0.6262355446815491, "learning_rate": 0.00017826740951022336, "loss": 3.1262, "step": 38749 }, { "epoch": 1.9, "grad_norm": 0.7236253023147583, "learning_rate": 0.0001782533379702227, "loss": 2.9621, "step": 38750 }, { "epoch": 1.9, "grad_norm": 0.6585520505905151, "learning_rate": 0.00017823926675087945, "loss": 3.0541, "step": 38751 }, { "epoch": 1.9, "grad_norm": 0.6381440162658691, "learning_rate": 0.0001782251958522304, "loss": 2.9744, "step": 38752 }, { "epoch": 1.9, "grad_norm": 0.6845026612281799, "learning_rate": 0.00017821112527431287, "loss": 3.0564, "step": 38753 }, { "epoch": 1.9, "grad_norm": 0.6539150476455688, "learning_rate": 0.0001781970550171638, "loss": 2.9587, "step": 38754 }, { "epoch": 1.9, "grad_norm": 0.7004772424697876, "learning_rate": 0.00017818298508082015, "loss": 3.0137, "step": 38755 }, { "epoch": 1.9, "grad_norm": 0.6276587247848511, "learning_rate": 0.00017816891546531914, "loss": 2.9447, "step": 38756 }, { "epoch": 1.9, "grad_norm": 0.6287436485290527, "learning_rate": 0.00017815484617069775, "loss": 2.9367, "step": 38757 }, { "epoch": 1.9, "grad_norm": 0.7259926795959473, "learning_rate": 0.00017814077719699312, "loss": 3.1074, "step": 38758 }, { "epoch": 1.9, "grad_norm": 0.6566614508628845, "learning_rate": 0.00017812670854424207, "loss": 3.2498, "step": 38759 }, { "epoch": 1.9, "grad_norm": 0.6689494848251343, "learning_rate": 0.0001781126402124819, "loss": 3.0541, "step": 38760 }, { "epoch": 1.9, "grad_norm": 0.6920581459999084, "learning_rate": 0.00017809857220174962, "loss": 2.9865, "step": 38761 }, { "epoch": 1.9, "grad_norm": 0.627387285232544, "learning_rate": 0.00017808450451208213, "loss": 3.109, "step": 38762 }, { "epoch": 1.9, "grad_norm": 0.6338323354721069, "learning_rate": 0.0001780704371435167, "loss": 2.9502, "step": 38763 }, { "epoch": 1.9, "grad_norm": 0.6786047220230103, "learning_rate": 0.00017805637009609025, "loss": 3.134, "step": 38764 }, { "epoch": 1.9, "grad_norm": 0.6783745884895325, "learning_rate": 0.00017804230336983975, "loss": 3.161, "step": 38765 }, { "epoch": 1.9, "grad_norm": 0.6686196327209473, "learning_rate": 0.0001780282369648025, "loss": 3.1341, "step": 38766 }, { "epoch": 1.9, "grad_norm": 0.6732924580574036, "learning_rate": 0.00017801417088101533, "loss": 2.8498, "step": 38767 }, { "epoch": 1.9, "grad_norm": 0.6651960015296936, "learning_rate": 0.00017800010511851542, "loss": 3.1484, "step": 38768 }, { "epoch": 1.9, "grad_norm": 0.6445776224136353, "learning_rate": 0.00017798603967733963, "loss": 3.0089, "step": 38769 }, { "epoch": 1.9, "grad_norm": 0.7008956074714661, "learning_rate": 0.0001779719745575252, "loss": 3.0585, "step": 38770 }, { "epoch": 1.9, "grad_norm": 0.7105851173400879, "learning_rate": 0.00017795790975910913, "loss": 2.9633, "step": 38771 }, { "epoch": 1.9, "grad_norm": 0.6775414347648621, "learning_rate": 0.00017794384528212835, "loss": 2.9435, "step": 38772 }, { "epoch": 1.9, "grad_norm": 0.6525750160217285, "learning_rate": 0.00017792978112662013, "loss": 2.8013, "step": 38773 }, { "epoch": 1.9, "grad_norm": 0.6795325875282288, "learning_rate": 0.0001779157172926212, "loss": 3.1657, "step": 38774 }, { "epoch": 1.9, "grad_norm": 0.6364821791648865, "learning_rate": 0.00017790165378016884, "loss": 2.9929, "step": 38775 }, { "epoch": 1.9, "grad_norm": 0.6568708419799805, "learning_rate": 0.00017788759058930015, "loss": 3.088, "step": 38776 }, { "epoch": 1.9, "grad_norm": 0.6670739650726318, "learning_rate": 0.00017787352772005184, "loss": 2.9832, "step": 38777 }, { "epoch": 1.9, "grad_norm": 0.6435451507568359, "learning_rate": 0.00017785946517246136, "loss": 2.9281, "step": 38778 }, { "epoch": 1.9, "grad_norm": 0.6362979412078857, "learning_rate": 0.00017784540294656543, "loss": 2.9789, "step": 38779 }, { "epoch": 1.9, "grad_norm": 0.6384310722351074, "learning_rate": 0.00017783134104240117, "loss": 2.8507, "step": 38780 }, { "epoch": 1.9, "grad_norm": 0.6559550166130066, "learning_rate": 0.00017781727946000574, "loss": 2.9265, "step": 38781 }, { "epoch": 1.9, "grad_norm": 0.650518000125885, "learning_rate": 0.000177803218199416, "loss": 2.9284, "step": 38782 }, { "epoch": 1.9, "grad_norm": 0.6557312607765198, "learning_rate": 0.00017778915726066917, "loss": 3.059, "step": 38783 }, { "epoch": 1.9, "grad_norm": 0.613975465297699, "learning_rate": 0.00017777509664380206, "loss": 2.9277, "step": 38784 }, { "epoch": 1.9, "grad_norm": 0.6250554323196411, "learning_rate": 0.00017776103634885185, "loss": 2.9664, "step": 38785 }, { "epoch": 1.9, "grad_norm": 0.6322828531265259, "learning_rate": 0.00017774697637585563, "loss": 2.8287, "step": 38786 }, { "epoch": 1.9, "grad_norm": 0.6861053109169006, "learning_rate": 0.00017773291672485022, "loss": 3.1791, "step": 38787 }, { "epoch": 1.9, "grad_norm": 0.630342960357666, "learning_rate": 0.00017771885739587288, "loss": 2.999, "step": 38788 }, { "epoch": 1.9, "grad_norm": 0.6999707818031311, "learning_rate": 0.0001777047983889605, "loss": 3.0539, "step": 38789 }, { "epoch": 1.9, "grad_norm": 0.6790862679481506, "learning_rate": 0.00017769073970415005, "loss": 3.0786, "step": 38790 }, { "epoch": 1.9, "grad_norm": 0.6557303667068481, "learning_rate": 0.0001776766813414788, "loss": 2.7987, "step": 38791 }, { "epoch": 1.9, "grad_norm": 0.6694257855415344, "learning_rate": 0.0001776626233009835, "loss": 2.9747, "step": 38792 }, { "epoch": 1.9, "grad_norm": 0.7797983288764954, "learning_rate": 0.00017764856558270153, "loss": 2.932, "step": 38793 }, { "epoch": 1.9, "grad_norm": 0.6885984539985657, "learning_rate": 0.00017763450818666945, "loss": 2.8699, "step": 38794 }, { "epoch": 1.9, "grad_norm": 0.6026552319526672, "learning_rate": 0.00017762045111292455, "loss": 2.9986, "step": 38795 }, { "epoch": 1.9, "grad_norm": 0.7198888063430786, "learning_rate": 0.00017760639436150398, "loss": 2.9756, "step": 38796 }, { "epoch": 1.9, "grad_norm": 0.646704375743866, "learning_rate": 0.00017759233793244452, "loss": 2.9899, "step": 38797 }, { "epoch": 1.9, "grad_norm": 0.6183916926383972, "learning_rate": 0.00017757828182578332, "loss": 2.8782, "step": 38798 }, { "epoch": 1.9, "grad_norm": 0.629848062992096, "learning_rate": 0.00017756422604155728, "loss": 2.9032, "step": 38799 }, { "epoch": 1.9, "grad_norm": 0.6715112924575806, "learning_rate": 0.00017755017057980362, "loss": 2.9572, "step": 38800 }, { "epoch": 1.9, "grad_norm": 0.6943894028663635, "learning_rate": 0.00017753611544055916, "loss": 3.0577, "step": 38801 }, { "epoch": 1.9, "grad_norm": 0.6915984153747559, "learning_rate": 0.00017752206062386096, "loss": 2.9434, "step": 38802 }, { "epoch": 1.9, "grad_norm": 0.6393081545829773, "learning_rate": 0.00017750800612974626, "loss": 2.9935, "step": 38803 }, { "epoch": 1.9, "grad_norm": 0.6286363005638123, "learning_rate": 0.00017749395195825173, "loss": 2.7904, "step": 38804 }, { "epoch": 1.9, "grad_norm": 0.6637555956840515, "learning_rate": 0.0001774798981094147, "loss": 2.8944, "step": 38805 }, { "epoch": 1.9, "grad_norm": 0.6585133075714111, "learning_rate": 0.00017746584458327189, "loss": 2.8192, "step": 38806 }, { "epoch": 1.9, "grad_norm": 0.6345171332359314, "learning_rate": 0.0001774517913798605, "loss": 3.0814, "step": 38807 }, { "epoch": 1.9, "grad_norm": 0.6817945241928101, "learning_rate": 0.00017743773849921758, "loss": 3.1959, "step": 38808 }, { "epoch": 1.9, "grad_norm": 0.673495888710022, "learning_rate": 0.00017742368594137993, "loss": 2.8187, "step": 38809 }, { "epoch": 1.9, "grad_norm": 0.6089171171188354, "learning_rate": 0.00017740963370638485, "loss": 3.0861, "step": 38810 }, { "epoch": 1.9, "grad_norm": 0.6373415589332581, "learning_rate": 0.00017739558179426915, "loss": 2.9421, "step": 38811 }, { "epoch": 1.9, "grad_norm": 0.7025952935218811, "learning_rate": 0.00017738153020506982, "loss": 3.0454, "step": 38812 }, { "epoch": 1.9, "grad_norm": 0.66726154088974, "learning_rate": 0.0001773674789388241, "loss": 3.1199, "step": 38813 }, { "epoch": 1.9, "grad_norm": 0.630937397480011, "learning_rate": 0.00017735342799556864, "loss": 2.9137, "step": 38814 }, { "epoch": 1.9, "grad_norm": 0.6461975574493408, "learning_rate": 0.00017733937737534082, "loss": 2.9248, "step": 38815 }, { "epoch": 1.9, "grad_norm": 0.6686995029449463, "learning_rate": 0.0001773253270781774, "loss": 2.8179, "step": 38816 }, { "epoch": 1.9, "grad_norm": 0.6307097673416138, "learning_rate": 0.0001773112771041154, "loss": 2.9402, "step": 38817 }, { "epoch": 1.9, "grad_norm": 0.6663997769355774, "learning_rate": 0.000177297227453192, "loss": 2.7335, "step": 38818 }, { "epoch": 1.9, "grad_norm": 0.660294771194458, "learning_rate": 0.00017728317812544406, "loss": 2.9602, "step": 38819 }, { "epoch": 1.9, "grad_norm": 0.7105454206466675, "learning_rate": 0.0001772691291209086, "loss": 2.9683, "step": 38820 }, { "epoch": 1.9, "grad_norm": 0.6138967275619507, "learning_rate": 0.00017725508043962257, "loss": 2.9629, "step": 38821 }, { "epoch": 1.9, "grad_norm": 0.7000978589057922, "learning_rate": 0.00017724103208162308, "loss": 3.1553, "step": 38822 }, { "epoch": 1.9, "grad_norm": 0.6332494020462036, "learning_rate": 0.00017722698404694715, "loss": 3.03, "step": 38823 }, { "epoch": 1.9, "grad_norm": 0.5949512720108032, "learning_rate": 0.00017721293633563164, "loss": 3.0115, "step": 38824 }, { "epoch": 1.9, "grad_norm": 0.6465287208557129, "learning_rate": 0.0001771988889477137, "loss": 2.9231, "step": 38825 }, { "epoch": 1.9, "grad_norm": 0.6742639541625977, "learning_rate": 0.00017718484188323014, "loss": 3.1172, "step": 38826 }, { "epoch": 1.9, "grad_norm": 0.6116287708282471, "learning_rate": 0.00017717079514221806, "loss": 3.1104, "step": 38827 }, { "epoch": 1.9, "grad_norm": 0.7612192034721375, "learning_rate": 0.0001771567487247146, "loss": 2.9702, "step": 38828 }, { "epoch": 1.9, "grad_norm": 0.6515193581581116, "learning_rate": 0.0001771427026307566, "loss": 3.0939, "step": 38829 }, { "epoch": 1.9, "grad_norm": 0.6422175765037537, "learning_rate": 0.00017712865686038106, "loss": 2.9786, "step": 38830 }, { "epoch": 1.9, "grad_norm": 0.6645070314407349, "learning_rate": 0.0001771146114136249, "loss": 2.7252, "step": 38831 }, { "epoch": 1.9, "grad_norm": 0.6393051743507385, "learning_rate": 0.00017710056629052526, "loss": 2.7584, "step": 38832 }, { "epoch": 1.9, "grad_norm": 0.6298916935920715, "learning_rate": 0.00017708652149111913, "loss": 3.3263, "step": 38833 }, { "epoch": 1.9, "grad_norm": 0.6236022710800171, "learning_rate": 0.00017707247701544335, "loss": 2.9878, "step": 38834 }, { "epoch": 1.9, "grad_norm": 0.6486828923225403, "learning_rate": 0.0001770584328635351, "loss": 3.0399, "step": 38835 }, { "epoch": 1.9, "grad_norm": 0.6714288592338562, "learning_rate": 0.00017704438903543116, "loss": 2.9953, "step": 38836 }, { "epoch": 1.9, "grad_norm": 0.6709398627281189, "learning_rate": 0.0001770303455311687, "loss": 2.9666, "step": 38837 }, { "epoch": 1.9, "grad_norm": 0.6724570393562317, "learning_rate": 0.0001770163023507847, "loss": 3.0689, "step": 38838 }, { "epoch": 1.9, "grad_norm": 0.6929112076759338, "learning_rate": 0.00017700225949431598, "loss": 3.0246, "step": 38839 }, { "epoch": 1.9, "grad_norm": 0.6533306837081909, "learning_rate": 0.00017698821696179978, "loss": 2.8858, "step": 38840 }, { "epoch": 1.9, "grad_norm": 0.6718860268592834, "learning_rate": 0.00017697417475327288, "loss": 2.9085, "step": 38841 }, { "epoch": 1.9, "grad_norm": 0.650233805179596, "learning_rate": 0.00017696013286877226, "loss": 3.1286, "step": 38842 }, { "epoch": 1.9, "grad_norm": 0.7158471941947937, "learning_rate": 0.0001769460913083351, "loss": 2.9035, "step": 38843 }, { "epoch": 1.9, "grad_norm": 0.6609078049659729, "learning_rate": 0.00017693205007199812, "loss": 3.0562, "step": 38844 }, { "epoch": 1.9, "grad_norm": 0.672481894493103, "learning_rate": 0.00017691800915979855, "loss": 2.8383, "step": 38845 }, { "epoch": 1.9, "grad_norm": 0.6177345514297485, "learning_rate": 0.00017690396857177317, "loss": 2.8635, "step": 38846 }, { "epoch": 1.9, "grad_norm": 0.6650989651679993, "learning_rate": 0.00017688992830795905, "loss": 3.1194, "step": 38847 }, { "epoch": 1.9, "grad_norm": 0.6523521542549133, "learning_rate": 0.00017687588836839323, "loss": 2.92, "step": 38848 }, { "epoch": 1.9, "grad_norm": 0.6387304067611694, "learning_rate": 0.00017686184875311255, "loss": 3.0017, "step": 38849 }, { "epoch": 1.9, "grad_norm": 0.650626540184021, "learning_rate": 0.00017684780946215412, "loss": 3.224, "step": 38850 }, { "epoch": 1.9, "grad_norm": 0.7072687745094299, "learning_rate": 0.00017683377049555483, "loss": 3.068, "step": 38851 }, { "epoch": 1.9, "grad_norm": 0.6670072674751282, "learning_rate": 0.00017681973185335165, "loss": 2.7412, "step": 38852 }, { "epoch": 1.9, "grad_norm": 0.6184037327766418, "learning_rate": 0.0001768056935355817, "loss": 3.0716, "step": 38853 }, { "epoch": 1.9, "grad_norm": 0.6479713916778564, "learning_rate": 0.0001767916555422817, "loss": 3.0369, "step": 38854 }, { "epoch": 1.9, "grad_norm": 0.6216630339622498, "learning_rate": 0.00017677761787348892, "loss": 3.1023, "step": 38855 }, { "epoch": 1.9, "grad_norm": 0.667361319065094, "learning_rate": 0.00017676358052924008, "loss": 2.9674, "step": 38856 }, { "epoch": 1.9, "grad_norm": 0.6166158318519592, "learning_rate": 0.00017674954350957217, "loss": 2.8018, "step": 38857 }, { "epoch": 1.9, "grad_norm": 0.6484586596488953, "learning_rate": 0.00017673550681452242, "loss": 2.9074, "step": 38858 }, { "epoch": 1.9, "grad_norm": 0.6432170271873474, "learning_rate": 0.00017672147044412757, "loss": 2.7897, "step": 38859 }, { "epoch": 1.9, "grad_norm": 0.645042359828949, "learning_rate": 0.00017670743439842466, "loss": 3.1734, "step": 38860 }, { "epoch": 1.9, "grad_norm": 0.6494126915931702, "learning_rate": 0.00017669339867745052, "loss": 3.0803, "step": 38861 }, { "epoch": 1.9, "grad_norm": 0.6544557809829712, "learning_rate": 0.00017667936328124233, "loss": 2.9205, "step": 38862 }, { "epoch": 1.9, "grad_norm": 0.666323184967041, "learning_rate": 0.000176665328209837, "loss": 2.8539, "step": 38863 }, { "epoch": 1.9, "grad_norm": 0.6175612211227417, "learning_rate": 0.00017665129346327134, "loss": 3.1707, "step": 38864 }, { "epoch": 1.9, "grad_norm": 0.7000352740287781, "learning_rate": 0.00017663725904158253, "loss": 3.1623, "step": 38865 }, { "epoch": 1.9, "grad_norm": 0.7238625884056091, "learning_rate": 0.00017662322494480743, "loss": 2.7223, "step": 38866 }, { "epoch": 1.9, "grad_norm": 0.6424652338027954, "learning_rate": 0.00017660919117298292, "loss": 2.8492, "step": 38867 }, { "epoch": 1.9, "grad_norm": 0.6737831234931946, "learning_rate": 0.00017659515772614618, "loss": 3.1558, "step": 38868 }, { "epoch": 1.9, "grad_norm": 0.6455912590026855, "learning_rate": 0.00017658112460433397, "loss": 2.8725, "step": 38869 }, { "epoch": 1.9, "grad_norm": 0.6594234704971313, "learning_rate": 0.00017656709180758343, "loss": 3.1363, "step": 38870 }, { "epoch": 1.9, "grad_norm": 0.6191946864128113, "learning_rate": 0.00017655305933593124, "loss": 2.8576, "step": 38871 }, { "epoch": 1.91, "grad_norm": 0.6468594074249268, "learning_rate": 0.00017653902718941464, "loss": 3.2429, "step": 38872 }, { "epoch": 1.91, "grad_norm": 0.653252124786377, "learning_rate": 0.00017652499536807052, "loss": 2.8514, "step": 38873 }, { "epoch": 1.91, "grad_norm": 0.6690567135810852, "learning_rate": 0.00017651096387193567, "loss": 2.8927, "step": 38874 }, { "epoch": 1.91, "grad_norm": 0.6499602794647217, "learning_rate": 0.0001764969327010473, "loss": 2.9986, "step": 38875 }, { "epoch": 1.91, "grad_norm": 0.6645433902740479, "learning_rate": 0.00017648290185544213, "loss": 3.0087, "step": 38876 }, { "epoch": 1.91, "grad_norm": 0.6643459796905518, "learning_rate": 0.00017646887133515734, "loss": 3.1536, "step": 38877 }, { "epoch": 1.91, "grad_norm": 0.6513760685920715, "learning_rate": 0.00017645484114022972, "loss": 3.0292, "step": 38878 }, { "epoch": 1.91, "grad_norm": 0.646967887878418, "learning_rate": 0.00017644081127069622, "loss": 3.1265, "step": 38879 }, { "epoch": 1.91, "grad_norm": 0.7216665744781494, "learning_rate": 0.00017642678172659395, "loss": 3.0569, "step": 38880 }, { "epoch": 1.91, "grad_norm": 0.6408677101135254, "learning_rate": 0.00017641275250795968, "loss": 2.9765, "step": 38881 }, { "epoch": 1.91, "grad_norm": 0.6516188979148865, "learning_rate": 0.00017639872361483052, "loss": 3.033, "step": 38882 }, { "epoch": 1.91, "grad_norm": 0.6692624688148499, "learning_rate": 0.0001763846950472432, "loss": 2.9756, "step": 38883 }, { "epoch": 1.91, "grad_norm": 0.6817802786827087, "learning_rate": 0.00017637066680523483, "loss": 2.9432, "step": 38884 }, { "epoch": 1.91, "grad_norm": 0.7394009828567505, "learning_rate": 0.00017635663888884243, "loss": 3.1279, "step": 38885 }, { "epoch": 1.91, "grad_norm": 0.6396210789680481, "learning_rate": 0.00017634261129810273, "loss": 3.2632, "step": 38886 }, { "epoch": 1.91, "grad_norm": 0.6615631580352783, "learning_rate": 0.0001763285840330529, "loss": 3.0701, "step": 38887 }, { "epoch": 1.91, "grad_norm": 0.6404348611831665, "learning_rate": 0.00017631455709372971, "loss": 2.9918, "step": 38888 }, { "epoch": 1.91, "grad_norm": 0.667752742767334, "learning_rate": 0.00017630053048017016, "loss": 3.0932, "step": 38889 }, { "epoch": 1.91, "grad_norm": 0.6676562428474426, "learning_rate": 0.0001762865041924113, "loss": 2.9182, "step": 38890 }, { "epoch": 1.91, "grad_norm": 0.6496162414550781, "learning_rate": 0.00017627247823048987, "loss": 3.0129, "step": 38891 }, { "epoch": 1.91, "grad_norm": 0.650679886341095, "learning_rate": 0.00017625845259444304, "loss": 2.9894, "step": 38892 }, { "epoch": 1.91, "grad_norm": 0.6663594245910645, "learning_rate": 0.00017624442728430748, "loss": 3.0855, "step": 38893 }, { "epoch": 1.91, "grad_norm": 0.6415731310844421, "learning_rate": 0.00017623040230012037, "loss": 3.0113, "step": 38894 }, { "epoch": 1.91, "grad_norm": 0.6369303464889526, "learning_rate": 0.0001762163776419186, "loss": 2.9257, "step": 38895 }, { "epoch": 1.91, "grad_norm": 0.6577563285827637, "learning_rate": 0.00017620235330973897, "loss": 3.0859, "step": 38896 }, { "epoch": 1.91, "grad_norm": 0.6328701972961426, "learning_rate": 0.00017618832930361858, "loss": 3.0281, "step": 38897 }, { "epoch": 1.91, "grad_norm": 0.6667453050613403, "learning_rate": 0.00017617430562359422, "loss": 3.0712, "step": 38898 }, { "epoch": 1.91, "grad_norm": 0.6224763989448547, "learning_rate": 0.00017616028226970297, "loss": 2.9339, "step": 38899 }, { "epoch": 1.91, "grad_norm": 0.6647245287895203, "learning_rate": 0.0001761462592419818, "loss": 3.0157, "step": 38900 }, { "epoch": 1.91, "grad_norm": 0.6523852944374084, "learning_rate": 0.0001761322365404674, "loss": 2.9646, "step": 38901 }, { "epoch": 1.91, "grad_norm": 0.6578008532524109, "learning_rate": 0.00017611821416519693, "loss": 2.9051, "step": 38902 }, { "epoch": 1.91, "grad_norm": 0.6416773200035095, "learning_rate": 0.0001761041921162072, "loss": 3.0441, "step": 38903 }, { "epoch": 1.91, "grad_norm": 0.6548252701759338, "learning_rate": 0.00017609017039353517, "loss": 2.8382, "step": 38904 }, { "epoch": 1.91, "grad_norm": 0.641315758228302, "learning_rate": 0.00017607614899721786, "loss": 2.9971, "step": 38905 }, { "epoch": 1.91, "grad_norm": 0.6452323794364929, "learning_rate": 0.00017606212792729207, "loss": 3.0848, "step": 38906 }, { "epoch": 1.91, "grad_norm": 0.6650744080543518, "learning_rate": 0.00017604810718379487, "loss": 3.0076, "step": 38907 }, { "epoch": 1.91, "grad_norm": 0.678641140460968, "learning_rate": 0.00017603408676676295, "loss": 3.0404, "step": 38908 }, { "epoch": 1.91, "grad_norm": 0.6717094779014587, "learning_rate": 0.00017602006667623345, "loss": 3.0307, "step": 38909 }, { "epoch": 1.91, "grad_norm": 0.6648962497711182, "learning_rate": 0.0001760060469122433, "loss": 2.8707, "step": 38910 }, { "epoch": 1.91, "grad_norm": 0.6424950361251831, "learning_rate": 0.00017599202747482925, "loss": 2.8513, "step": 38911 }, { "epoch": 1.91, "grad_norm": 0.6827686429023743, "learning_rate": 0.00017597800836402844, "loss": 2.8907, "step": 38912 }, { "epoch": 1.91, "grad_norm": 0.6990754008293152, "learning_rate": 0.0001759639895798776, "loss": 3.1935, "step": 38913 }, { "epoch": 1.91, "grad_norm": 0.6267073154449463, "learning_rate": 0.00017594997112241375, "loss": 2.9517, "step": 38914 }, { "epoch": 1.91, "grad_norm": 0.6684610843658447, "learning_rate": 0.00017593595299167386, "loss": 2.6714, "step": 38915 }, { "epoch": 1.91, "grad_norm": 0.65615314245224, "learning_rate": 0.0001759219351876947, "loss": 3.2431, "step": 38916 }, { "epoch": 1.91, "grad_norm": 0.6717365980148315, "learning_rate": 0.00017590791771051343, "loss": 3.0229, "step": 38917 }, { "epoch": 1.91, "grad_norm": 0.6720296740531921, "learning_rate": 0.0001758939005601667, "loss": 2.9914, "step": 38918 }, { "epoch": 1.91, "grad_norm": 0.6723449230194092, "learning_rate": 0.00017587988373669156, "loss": 3.0294, "step": 38919 }, { "epoch": 1.91, "grad_norm": 0.6474607586860657, "learning_rate": 0.000175865867240125, "loss": 2.9674, "step": 38920 }, { "epoch": 1.91, "grad_norm": 0.6441729068756104, "learning_rate": 0.00017585185107050374, "loss": 3.0136, "step": 38921 }, { "epoch": 1.91, "grad_norm": 0.6469467878341675, "learning_rate": 0.00017583783522786496, "loss": 2.8793, "step": 38922 }, { "epoch": 1.91, "grad_norm": 0.6387615203857422, "learning_rate": 0.00017582381971224527, "loss": 3.1108, "step": 38923 }, { "epoch": 1.91, "grad_norm": 0.6527659296989441, "learning_rate": 0.0001758098045236818, "loss": 2.997, "step": 38924 }, { "epoch": 1.91, "grad_norm": 0.6545872092247009, "learning_rate": 0.00017579578966221145, "loss": 3.3368, "step": 38925 }, { "epoch": 1.91, "grad_norm": 0.6568055152893066, "learning_rate": 0.00017578177512787103, "loss": 3.0761, "step": 38926 }, { "epoch": 1.91, "grad_norm": 0.6849158406257629, "learning_rate": 0.00017576776092069759, "loss": 2.8588, "step": 38927 }, { "epoch": 1.91, "grad_norm": 0.6461156606674194, "learning_rate": 0.00017575374704072787, "loss": 2.8396, "step": 38928 }, { "epoch": 1.91, "grad_norm": 0.6694230437278748, "learning_rate": 0.00017573973348799883, "loss": 3.0169, "step": 38929 }, { "epoch": 1.91, "grad_norm": 0.6535854935646057, "learning_rate": 0.00017572572026254755, "loss": 2.9841, "step": 38930 }, { "epoch": 1.91, "grad_norm": 0.6744319200515747, "learning_rate": 0.00017571170736441072, "loss": 3.0615, "step": 38931 }, { "epoch": 1.91, "grad_norm": 0.6252499222755432, "learning_rate": 0.00017569769479362543, "loss": 3.0025, "step": 38932 }, { "epoch": 1.91, "grad_norm": 0.6410253047943115, "learning_rate": 0.00017568368255022835, "loss": 3.1207, "step": 38933 }, { "epoch": 1.91, "grad_norm": 0.6542544960975647, "learning_rate": 0.00017566967063425658, "loss": 2.8341, "step": 38934 }, { "epoch": 1.91, "grad_norm": 0.6271516680717468, "learning_rate": 0.00017565565904574702, "loss": 3.0428, "step": 38935 }, { "epoch": 1.91, "grad_norm": 0.6435152888298035, "learning_rate": 0.00017564164778473642, "loss": 3.0798, "step": 38936 }, { "epoch": 1.91, "grad_norm": 0.6573494672775269, "learning_rate": 0.00017562763685126194, "loss": 3.2102, "step": 38937 }, { "epoch": 1.91, "grad_norm": 0.6944133639335632, "learning_rate": 0.00017561362624536015, "loss": 2.9844, "step": 38938 }, { "epoch": 1.91, "grad_norm": 0.7348315119743347, "learning_rate": 0.0001755996159670682, "loss": 3.2201, "step": 38939 }, { "epoch": 1.91, "grad_norm": 0.6667076945304871, "learning_rate": 0.00017558560601642302, "loss": 3.1203, "step": 38940 }, { "epoch": 1.91, "grad_norm": 0.9448343515396118, "learning_rate": 0.00017557159639346124, "loss": 2.9531, "step": 38941 }, { "epoch": 1.91, "grad_norm": 0.6497033834457397, "learning_rate": 0.00017555758709822004, "loss": 2.9215, "step": 38942 }, { "epoch": 1.91, "grad_norm": 1.2949033975601196, "learning_rate": 0.00017554357813073622, "loss": 3.0897, "step": 38943 }, { "epoch": 1.91, "grad_norm": 0.6681045293807983, "learning_rate": 0.00017552956949104656, "loss": 3.1463, "step": 38944 }, { "epoch": 1.91, "grad_norm": 0.6465238332748413, "learning_rate": 0.0001755155611791882, "loss": 2.9325, "step": 38945 }, { "epoch": 1.91, "grad_norm": 0.6648427844047546, "learning_rate": 0.00017550155319519777, "loss": 2.8387, "step": 38946 }, { "epoch": 1.91, "grad_norm": 0.8117325305938721, "learning_rate": 0.00017548754553911241, "loss": 2.6738, "step": 38947 }, { "epoch": 1.91, "grad_norm": 0.629001796245575, "learning_rate": 0.00017547353821096873, "loss": 3.0694, "step": 38948 }, { "epoch": 1.91, "grad_norm": 0.6560392379760742, "learning_rate": 0.00017545953121080388, "loss": 2.8574, "step": 38949 }, { "epoch": 1.91, "grad_norm": 0.636073887348175, "learning_rate": 0.00017544552453865474, "loss": 3.1828, "step": 38950 }, { "epoch": 1.91, "grad_norm": 0.6364843845367432, "learning_rate": 0.0001754315181945579, "loss": 2.8929, "step": 38951 }, { "epoch": 1.91, "grad_norm": 0.7276064157485962, "learning_rate": 0.00017541751217855066, "loss": 2.9348, "step": 38952 }, { "epoch": 1.91, "grad_norm": 0.67525315284729, "learning_rate": 0.00017540350649066966, "loss": 2.9005, "step": 38953 }, { "epoch": 1.91, "grad_norm": 0.6328374743461609, "learning_rate": 0.00017538950113095192, "loss": 2.9466, "step": 38954 }, { "epoch": 1.91, "grad_norm": 0.6840808987617493, "learning_rate": 0.0001753754960994341, "loss": 2.9267, "step": 38955 }, { "epoch": 1.91, "grad_norm": 0.635502278804779, "learning_rate": 0.00017536149139615322, "loss": 3.1029, "step": 38956 }, { "epoch": 1.91, "grad_norm": 0.6700057983398438, "learning_rate": 0.00017534748702114638, "loss": 2.954, "step": 38957 }, { "epoch": 1.91, "grad_norm": 0.6268248558044434, "learning_rate": 0.0001753334829744501, "loss": 2.9197, "step": 38958 }, { "epoch": 1.91, "grad_norm": 0.6551121473312378, "learning_rate": 0.00017531947925610155, "loss": 3.0124, "step": 38959 }, { "epoch": 1.91, "grad_norm": 0.7056483030319214, "learning_rate": 0.00017530547586613738, "loss": 3.1388, "step": 38960 }, { "epoch": 1.91, "grad_norm": 0.6477718949317932, "learning_rate": 0.00017529147280459466, "loss": 3.0884, "step": 38961 }, { "epoch": 1.91, "grad_norm": 0.6173779964447021, "learning_rate": 0.00017527747007151022, "loss": 3.0833, "step": 38962 }, { "epoch": 1.91, "grad_norm": 0.6646998524665833, "learning_rate": 0.00017526346766692083, "loss": 3.027, "step": 38963 }, { "epoch": 1.91, "grad_norm": 0.7051180005073547, "learning_rate": 0.0001752494655908636, "loss": 2.7579, "step": 38964 }, { "epoch": 1.91, "grad_norm": 0.6498520374298096, "learning_rate": 0.00017523546384337515, "loss": 2.9913, "step": 38965 }, { "epoch": 1.91, "grad_norm": 0.6855219602584839, "learning_rate": 0.00017522146242449245, "loss": 2.9673, "step": 38966 }, { "epoch": 1.91, "grad_norm": 0.6545772552490234, "learning_rate": 0.00017520746133425252, "loss": 3.0786, "step": 38967 }, { "epoch": 1.91, "grad_norm": 0.7005255222320557, "learning_rate": 0.00017519346057269207, "loss": 2.9772, "step": 38968 }, { "epoch": 1.91, "grad_norm": 0.652550220489502, "learning_rate": 0.0001751794601398481, "loss": 3.0452, "step": 38969 }, { "epoch": 1.91, "grad_norm": 0.626100480556488, "learning_rate": 0.00017516546003575726, "loss": 3.0872, "step": 38970 }, { "epoch": 1.91, "grad_norm": 0.6244769096374512, "learning_rate": 0.00017515146026045664, "loss": 2.9593, "step": 38971 }, { "epoch": 1.91, "grad_norm": 0.6298187375068665, "learning_rate": 0.0001751374608139831, "loss": 3.0651, "step": 38972 }, { "epoch": 1.91, "grad_norm": 0.611535906791687, "learning_rate": 0.00017512346169637336, "loss": 3.2176, "step": 38973 }, { "epoch": 1.91, "grad_norm": 0.6949870586395264, "learning_rate": 0.00017510946290766446, "loss": 3.2544, "step": 38974 }, { "epoch": 1.91, "grad_norm": 0.6494085192680359, "learning_rate": 0.00017509546444789315, "loss": 2.9234, "step": 38975 }, { "epoch": 1.91, "grad_norm": 0.656135618686676, "learning_rate": 0.00017508146631709643, "loss": 3.0353, "step": 38976 }, { "epoch": 1.91, "grad_norm": 0.6190617084503174, "learning_rate": 0.00017506746851531107, "loss": 2.99, "step": 38977 }, { "epoch": 1.91, "grad_norm": 0.6905009746551514, "learning_rate": 0.0001750534710425739, "loss": 2.7008, "step": 38978 }, { "epoch": 1.91, "grad_norm": 0.6204220056533813, "learning_rate": 0.0001750394738989219, "loss": 3.1247, "step": 38979 }, { "epoch": 1.91, "grad_norm": 0.6508306264877319, "learning_rate": 0.00017502547708439178, "loss": 3.0195, "step": 38980 }, { "epoch": 1.91, "grad_norm": 0.684338390827179, "learning_rate": 0.00017501148059902047, "loss": 2.9336, "step": 38981 }, { "epoch": 1.91, "grad_norm": 0.6646551489830017, "learning_rate": 0.000174997484442845, "loss": 2.8238, "step": 38982 }, { "epoch": 1.91, "grad_norm": 0.7016298770904541, "learning_rate": 0.000174983488615902, "loss": 3.1572, "step": 38983 }, { "epoch": 1.91, "grad_norm": 0.7458086013793945, "learning_rate": 0.00017496949311822854, "loss": 2.8436, "step": 38984 }, { "epoch": 1.91, "grad_norm": 0.6394898891448975, "learning_rate": 0.00017495549794986138, "loss": 2.989, "step": 38985 }, { "epoch": 1.91, "grad_norm": 0.6666737198829651, "learning_rate": 0.00017494150311083724, "loss": 2.8933, "step": 38986 }, { "epoch": 1.91, "grad_norm": 0.6851913332939148, "learning_rate": 0.00017492750860119324, "loss": 2.8082, "step": 38987 }, { "epoch": 1.91, "grad_norm": 0.674731433391571, "learning_rate": 0.00017491351442096599, "loss": 3.1715, "step": 38988 }, { "epoch": 1.91, "grad_norm": 0.6982241868972778, "learning_rate": 0.0001748995205701926, "loss": 2.9834, "step": 38989 }, { "epoch": 1.91, "grad_norm": 0.7044297456741333, "learning_rate": 0.00017488552704890965, "loss": 2.8811, "step": 38990 }, { "epoch": 1.91, "grad_norm": 0.6905117630958557, "learning_rate": 0.00017487153385715426, "loss": 2.897, "step": 38991 }, { "epoch": 1.91, "grad_norm": 0.6385732889175415, "learning_rate": 0.00017485754099496318, "loss": 2.971, "step": 38992 }, { "epoch": 1.91, "grad_norm": 0.6887711882591248, "learning_rate": 0.0001748435484623732, "loss": 2.9601, "step": 38993 }, { "epoch": 1.91, "grad_norm": 0.6429010033607483, "learning_rate": 0.00017482955625942125, "loss": 3.0313, "step": 38994 }, { "epoch": 1.91, "grad_norm": 0.6536113023757935, "learning_rate": 0.00017481556438614411, "loss": 2.9513, "step": 38995 }, { "epoch": 1.91, "grad_norm": 0.6555978655815125, "learning_rate": 0.00017480157284257864, "loss": 2.8433, "step": 38996 }, { "epoch": 1.91, "grad_norm": 0.6542037725448608, "learning_rate": 0.00017478758162876184, "loss": 2.9936, "step": 38997 }, { "epoch": 1.91, "grad_norm": 0.6809813976287842, "learning_rate": 0.00017477359074473034, "loss": 2.9191, "step": 38998 }, { "epoch": 1.91, "grad_norm": 0.6719657182693481, "learning_rate": 0.00017475960019052122, "loss": 2.9595, "step": 38999 }, { "epoch": 1.91, "grad_norm": 0.6527001857757568, "learning_rate": 0.00017474560996617126, "loss": 3.1502, "step": 39000 }, { "epoch": 1.91, "grad_norm": 0.6487961411476135, "learning_rate": 0.0001747316200717171, "loss": 2.6611, "step": 39001 }, { "epoch": 1.91, "grad_norm": 0.689109742641449, "learning_rate": 0.00017471763050719584, "loss": 2.9463, "step": 39002 }, { "epoch": 1.91, "grad_norm": 0.6380165815353394, "learning_rate": 0.00017470364127264413, "loss": 3.0582, "step": 39003 }, { "epoch": 1.91, "grad_norm": 0.7141815423965454, "learning_rate": 0.00017468965236809906, "loss": 3.0516, "step": 39004 }, { "epoch": 1.91, "grad_norm": 0.6605910658836365, "learning_rate": 0.0001746756637935972, "loss": 3.1249, "step": 39005 }, { "epoch": 1.91, "grad_norm": 0.6353509426116943, "learning_rate": 0.0001746616755491756, "loss": 3.0085, "step": 39006 }, { "epoch": 1.91, "grad_norm": 0.6613842248916626, "learning_rate": 0.0001746476876348711, "loss": 2.9168, "step": 39007 }, { "epoch": 1.91, "grad_norm": 0.6803551316261292, "learning_rate": 0.0001746337000507203, "loss": 2.9971, "step": 39008 }, { "epoch": 1.91, "grad_norm": 0.6482704877853394, "learning_rate": 0.00017461971279676027, "loss": 3.0769, "step": 39009 }, { "epoch": 1.91, "grad_norm": 0.6631468534469604, "learning_rate": 0.00017460572587302777, "loss": 3.2677, "step": 39010 }, { "epoch": 1.91, "grad_norm": 0.740536630153656, "learning_rate": 0.0001745917392795596, "loss": 2.9764, "step": 39011 }, { "epoch": 1.91, "grad_norm": 0.6036262512207031, "learning_rate": 0.00017457775301639276, "loss": 3.0704, "step": 39012 }, { "epoch": 1.91, "grad_norm": 0.6554529666900635, "learning_rate": 0.00017456376708356404, "loss": 2.9901, "step": 39013 }, { "epoch": 1.91, "grad_norm": 0.6793158650398254, "learning_rate": 0.00017454978148111016, "loss": 3.0852, "step": 39014 }, { "epoch": 1.91, "grad_norm": 0.6710419654846191, "learning_rate": 0.00017453579620906794, "loss": 3.0756, "step": 39015 }, { "epoch": 1.91, "grad_norm": 0.65449458360672, "learning_rate": 0.00017452181126747425, "loss": 3.0163, "step": 39016 }, { "epoch": 1.91, "grad_norm": 0.6442614793777466, "learning_rate": 0.0001745078266563661, "loss": 2.8513, "step": 39017 }, { "epoch": 1.91, "grad_norm": 0.6554681658744812, "learning_rate": 0.00017449384237578004, "loss": 3.0563, "step": 39018 }, { "epoch": 1.91, "grad_norm": 0.6386138796806335, "learning_rate": 0.00017447985842575315, "loss": 3.106, "step": 39019 }, { "epoch": 1.91, "grad_norm": 0.6400095820426941, "learning_rate": 0.0001744658748063221, "loss": 3.0464, "step": 39020 }, { "epoch": 1.91, "grad_norm": 0.6746090650558472, "learning_rate": 0.00017445189151752387, "loss": 3.0485, "step": 39021 }, { "epoch": 1.91, "grad_norm": 0.6967078447341919, "learning_rate": 0.0001744379085593952, "loss": 2.9496, "step": 39022 }, { "epoch": 1.91, "grad_norm": 0.6654866337776184, "learning_rate": 0.00017442392593197274, "loss": 3.1855, "step": 39023 }, { "epoch": 1.91, "grad_norm": 0.6376835107803345, "learning_rate": 0.00017440994363529365, "loss": 2.8765, "step": 39024 }, { "epoch": 1.91, "grad_norm": 0.6771338582038879, "learning_rate": 0.00017439596166939452, "loss": 2.9306, "step": 39025 }, { "epoch": 1.91, "grad_norm": 0.6553162932395935, "learning_rate": 0.0001743819800343122, "loss": 2.962, "step": 39026 }, { "epoch": 1.91, "grad_norm": 0.6516581773757935, "learning_rate": 0.00017436799873008372, "loss": 2.978, "step": 39027 }, { "epoch": 1.91, "grad_norm": 0.6631935834884644, "learning_rate": 0.00017435401775674574, "loss": 3.1225, "step": 39028 }, { "epoch": 1.91, "grad_norm": 0.6429182291030884, "learning_rate": 0.00017434003711433508, "loss": 2.9737, "step": 39029 }, { "epoch": 1.91, "grad_norm": 0.634573757648468, "learning_rate": 0.00017432605680288847, "loss": 3.0406, "step": 39030 }, { "epoch": 1.91, "grad_norm": 0.613707423210144, "learning_rate": 0.00017431207682244295, "loss": 3.0347, "step": 39031 }, { "epoch": 1.91, "grad_norm": 0.681470513343811, "learning_rate": 0.0001742980971730351, "loss": 3.0185, "step": 39032 }, { "epoch": 1.91, "grad_norm": 0.6601608395576477, "learning_rate": 0.00017428411785470194, "loss": 2.944, "step": 39033 }, { "epoch": 1.91, "grad_norm": 0.6461837291717529, "learning_rate": 0.00017427013886748028, "loss": 3.1707, "step": 39034 }, { "epoch": 1.91, "grad_norm": 0.6573340892791748, "learning_rate": 0.0001742561602114069, "loss": 3.0512, "step": 39035 }, { "epoch": 1.91, "grad_norm": 0.6389563083648682, "learning_rate": 0.00017424218188651859, "loss": 2.9478, "step": 39036 }, { "epoch": 1.91, "grad_norm": 0.6320285797119141, "learning_rate": 0.00017422820389285202, "loss": 3.2002, "step": 39037 }, { "epoch": 1.91, "grad_norm": 0.6769739985466003, "learning_rate": 0.0001742142262304442, "loss": 3.0265, "step": 39038 }, { "epoch": 1.91, "grad_norm": 0.6444475054740906, "learning_rate": 0.000174200248899332, "loss": 2.9721, "step": 39039 }, { "epoch": 1.91, "grad_norm": 0.6511723399162292, "learning_rate": 0.00017418627189955198, "loss": 3.2104, "step": 39040 }, { "epoch": 1.91, "grad_norm": 0.8678764700889587, "learning_rate": 0.00017417229523114126, "loss": 3.0062, "step": 39041 }, { "epoch": 1.91, "grad_norm": 0.6624598503112793, "learning_rate": 0.0001741583188941364, "loss": 3.0018, "step": 39042 }, { "epoch": 1.91, "grad_norm": 0.6774799823760986, "learning_rate": 0.00017414434288857438, "loss": 2.7333, "step": 39043 }, { "epoch": 1.91, "grad_norm": 0.6489653587341309, "learning_rate": 0.00017413036721449198, "loss": 3.112, "step": 39044 }, { "epoch": 1.91, "grad_norm": 0.6598437428474426, "learning_rate": 0.00017411639187192582, "loss": 2.9259, "step": 39045 }, { "epoch": 1.91, "grad_norm": 0.6542210578918457, "learning_rate": 0.00017410241686091298, "loss": 2.8695, "step": 39046 }, { "epoch": 1.91, "grad_norm": 0.6994691491127014, "learning_rate": 0.00017408844218149008, "loss": 2.8518, "step": 39047 }, { "epoch": 1.91, "grad_norm": 0.6199400424957275, "learning_rate": 0.00017407446783369393, "loss": 2.9526, "step": 39048 }, { "epoch": 1.91, "grad_norm": 0.6711297631263733, "learning_rate": 0.00017406049381756152, "loss": 2.8745, "step": 39049 }, { "epoch": 1.91, "grad_norm": 0.6305173635482788, "learning_rate": 0.00017404652013312954, "loss": 3.0848, "step": 39050 }, { "epoch": 1.91, "grad_norm": 0.6580938100814819, "learning_rate": 0.0001740325467804348, "loss": 3.0753, "step": 39051 }, { "epoch": 1.91, "grad_norm": 0.6921206116676331, "learning_rate": 0.00017401857375951395, "loss": 3.0142, "step": 39052 }, { "epoch": 1.91, "grad_norm": 1.0677298307418823, "learning_rate": 0.00017400460107040394, "loss": 3.0512, "step": 39053 }, { "epoch": 1.91, "grad_norm": 0.6595743298530579, "learning_rate": 0.0001739906287131417, "loss": 3.0075, "step": 39054 }, { "epoch": 1.91, "grad_norm": 0.6712070107460022, "learning_rate": 0.00017397665668776374, "loss": 3.1811, "step": 39055 }, { "epoch": 1.91, "grad_norm": 0.6308397650718689, "learning_rate": 0.00017396268499430718, "loss": 3.1166, "step": 39056 }, { "epoch": 1.91, "grad_norm": 0.6080089807510376, "learning_rate": 0.0001739487136328086, "loss": 3.0898, "step": 39057 }, { "epoch": 1.91, "grad_norm": 0.6288923025131226, "learning_rate": 0.00017393474260330477, "loss": 3.0053, "step": 39058 }, { "epoch": 1.91, "grad_norm": 0.6430457830429077, "learning_rate": 0.00017392077190583267, "loss": 3.2299, "step": 39059 }, { "epoch": 1.91, "grad_norm": 0.6252223253250122, "learning_rate": 0.00017390680154042888, "loss": 3.121, "step": 39060 }, { "epoch": 1.91, "grad_norm": 0.6593741774559021, "learning_rate": 0.00017389283150713038, "loss": 3.0993, "step": 39061 }, { "epoch": 1.91, "grad_norm": 0.6887229681015015, "learning_rate": 0.0001738788618059738, "loss": 3.1088, "step": 39062 }, { "epoch": 1.91, "grad_norm": 0.6626426577568054, "learning_rate": 0.00017386489243699608, "loss": 3.0802, "step": 39063 }, { "epoch": 1.91, "grad_norm": 0.663589358329773, "learning_rate": 0.00017385092340023404, "loss": 2.8885, "step": 39064 }, { "epoch": 1.91, "grad_norm": 0.6585755944252014, "learning_rate": 0.00017383695469572435, "loss": 2.9546, "step": 39065 }, { "epoch": 1.91, "grad_norm": 0.6396164894104004, "learning_rate": 0.00017382298632350387, "loss": 3.1609, "step": 39066 }, { "epoch": 1.91, "grad_norm": 0.647235095500946, "learning_rate": 0.00017380901828360925, "loss": 2.9741, "step": 39067 }, { "epoch": 1.91, "grad_norm": 0.6416546702384949, "learning_rate": 0.0001737950505760774, "loss": 3.097, "step": 39068 }, { "epoch": 1.91, "grad_norm": 0.697792649269104, "learning_rate": 0.00017378108320094518, "loss": 2.6795, "step": 39069 }, { "epoch": 1.91, "grad_norm": 0.716894805431366, "learning_rate": 0.0001737671161582492, "loss": 2.7536, "step": 39070 }, { "epoch": 1.91, "grad_norm": 0.6366348266601562, "learning_rate": 0.00017375314944802648, "loss": 2.8962, "step": 39071 }, { "epoch": 1.91, "grad_norm": 0.6648399829864502, "learning_rate": 0.00017373918307031364, "loss": 3.1113, "step": 39072 }, { "epoch": 1.91, "grad_norm": 0.6838628649711609, "learning_rate": 0.0001737252170251474, "loss": 3.1721, "step": 39073 }, { "epoch": 1.91, "grad_norm": 0.6086000204086304, "learning_rate": 0.0001737112513125647, "loss": 3.0741, "step": 39074 }, { "epoch": 1.91, "grad_norm": 0.6315998435020447, "learning_rate": 0.0001736972859326022, "loss": 3.0657, "step": 39075 }, { "epoch": 1.92, "grad_norm": 0.6525732278823853, "learning_rate": 0.0001736833208852968, "loss": 2.9191, "step": 39076 }, { "epoch": 1.92, "grad_norm": 0.6616672873497009, "learning_rate": 0.00017366935617068517, "loss": 2.8436, "step": 39077 }, { "epoch": 1.92, "grad_norm": 0.6813806295394897, "learning_rate": 0.00017365539178880422, "loss": 3.0425, "step": 39078 }, { "epoch": 1.92, "grad_norm": 1.1355561017990112, "learning_rate": 0.00017364142773969066, "loss": 2.9847, "step": 39079 }, { "epoch": 1.92, "grad_norm": 0.6255960464477539, "learning_rate": 0.00017362746402338114, "loss": 2.9503, "step": 39080 }, { "epoch": 1.92, "grad_norm": 0.6622676253318787, "learning_rate": 0.0001736135006399127, "loss": 3.4366, "step": 39081 }, { "epoch": 1.92, "grad_norm": 0.6947815418243408, "learning_rate": 0.00017359953758932183, "loss": 2.9106, "step": 39082 }, { "epoch": 1.92, "grad_norm": 0.6500281095504761, "learning_rate": 0.00017358557487164548, "loss": 2.6224, "step": 39083 }, { "epoch": 1.92, "grad_norm": 0.6997030973434448, "learning_rate": 0.0001735716124869205, "loss": 2.9147, "step": 39084 }, { "epoch": 1.92, "grad_norm": 0.6823716759681702, "learning_rate": 0.0001735576504351834, "loss": 2.9929, "step": 39085 }, { "epoch": 1.92, "grad_norm": 0.6616767644882202, "learning_rate": 0.0001735436887164713, "loss": 2.9272, "step": 39086 }, { "epoch": 1.92, "grad_norm": 0.6855137348175049, "learning_rate": 0.00017352972733082075, "loss": 2.9099, "step": 39087 }, { "epoch": 1.92, "grad_norm": 0.6969751715660095, "learning_rate": 0.00017351576627826847, "loss": 2.7265, "step": 39088 }, { "epoch": 1.92, "grad_norm": 0.6577869057655334, "learning_rate": 0.00017350180555885143, "loss": 3.2084, "step": 39089 }, { "epoch": 1.92, "grad_norm": 0.6708926558494568, "learning_rate": 0.00017348784517260614, "loss": 3.1078, "step": 39090 }, { "epoch": 1.92, "grad_norm": 0.6796287894248962, "learning_rate": 0.00017347388511956965, "loss": 2.8497, "step": 39091 }, { "epoch": 1.92, "grad_norm": 0.6603464484214783, "learning_rate": 0.00017345992539977847, "loss": 2.8234, "step": 39092 }, { "epoch": 1.92, "grad_norm": 0.635272741317749, "learning_rate": 0.00017344596601326964, "loss": 3.1138, "step": 39093 }, { "epoch": 1.92, "grad_norm": 0.6617568731307983, "learning_rate": 0.00017343200696007979, "loss": 3.1461, "step": 39094 }, { "epoch": 1.92, "grad_norm": 0.6393948793411255, "learning_rate": 0.0001734180482402455, "loss": 2.8271, "step": 39095 }, { "epoch": 1.92, "grad_norm": 0.6940094232559204, "learning_rate": 0.0001734040898538039, "loss": 3.0738, "step": 39096 }, { "epoch": 1.92, "grad_norm": 0.6491406559944153, "learning_rate": 0.00017339013180079136, "loss": 3.0714, "step": 39097 }, { "epoch": 1.92, "grad_norm": 0.6679297089576721, "learning_rate": 0.0001733761740812449, "loss": 2.9234, "step": 39098 }, { "epoch": 1.92, "grad_norm": 0.6698212027549744, "learning_rate": 0.00017336221669520135, "loss": 2.956, "step": 39099 }, { "epoch": 1.92, "grad_norm": 0.6277477145195007, "learning_rate": 0.00017334825964269722, "loss": 2.9522, "step": 39100 }, { "epoch": 1.92, "grad_norm": 0.63001549243927, "learning_rate": 0.00017333430292376965, "loss": 3.1925, "step": 39101 }, { "epoch": 1.92, "grad_norm": 0.6419739127159119, "learning_rate": 0.0001733203465384549, "loss": 2.9653, "step": 39102 }, { "epoch": 1.92, "grad_norm": 0.6369063258171082, "learning_rate": 0.00017330639048678995, "loss": 2.8996, "step": 39103 }, { "epoch": 1.92, "grad_norm": 0.7089871764183044, "learning_rate": 0.0001732924347688117, "loss": 2.9241, "step": 39104 }, { "epoch": 1.92, "grad_norm": 0.639457106590271, "learning_rate": 0.00017327847938455673, "loss": 2.9672, "step": 39105 }, { "epoch": 1.92, "grad_norm": 0.641934335231781, "learning_rate": 0.00017326452433406194, "loss": 2.8944, "step": 39106 }, { "epoch": 1.92, "grad_norm": 0.6629417538642883, "learning_rate": 0.0001732505696173639, "loss": 2.8033, "step": 39107 }, { "epoch": 1.92, "grad_norm": 0.6630560159683228, "learning_rate": 0.00017323661523449958, "loss": 2.9384, "step": 39108 }, { "epoch": 1.92, "grad_norm": 0.6467246413230896, "learning_rate": 0.00017322266118550557, "loss": 2.9888, "step": 39109 }, { "epoch": 1.92, "grad_norm": 0.6524175405502319, "learning_rate": 0.00017320870747041862, "loss": 3.2388, "step": 39110 }, { "epoch": 1.92, "grad_norm": 0.6555929183959961, "learning_rate": 0.00017319475408927564, "loss": 3.0857, "step": 39111 }, { "epoch": 1.92, "grad_norm": 0.6451212167739868, "learning_rate": 0.00017318080104211314, "loss": 2.8654, "step": 39112 }, { "epoch": 1.92, "grad_norm": 0.6508936285972595, "learning_rate": 0.00017316684832896815, "loss": 2.9725, "step": 39113 }, { "epoch": 1.92, "grad_norm": 0.6641062498092651, "learning_rate": 0.0001731528959498771, "loss": 2.8809, "step": 39114 }, { "epoch": 1.92, "grad_norm": 0.6545625329017639, "learning_rate": 0.0001731389439048771, "loss": 2.9962, "step": 39115 }, { "epoch": 1.92, "grad_norm": 0.652595579624176, "learning_rate": 0.00017312499219400461, "loss": 2.9298, "step": 39116 }, { "epoch": 1.92, "grad_norm": 0.6414437890052795, "learning_rate": 0.00017311104081729642, "loss": 2.9284, "step": 39117 }, { "epoch": 1.92, "grad_norm": 0.7035130262374878, "learning_rate": 0.00017309708977478942, "loss": 3.058, "step": 39118 }, { "epoch": 1.92, "grad_norm": 0.6662630438804626, "learning_rate": 0.00017308313906652016, "loss": 3.0701, "step": 39119 }, { "epoch": 1.92, "grad_norm": 0.645362377166748, "learning_rate": 0.0001730691886925255, "loss": 3.1298, "step": 39120 }, { "epoch": 1.92, "grad_norm": 0.6399428844451904, "learning_rate": 0.0001730552386528423, "loss": 3.0839, "step": 39121 }, { "epoch": 1.92, "grad_norm": 0.6200828552246094, "learning_rate": 0.000173041288947507, "loss": 2.9407, "step": 39122 }, { "epoch": 1.92, "grad_norm": 0.6302321553230286, "learning_rate": 0.00017302733957655662, "loss": 2.9237, "step": 39123 }, { "epoch": 1.92, "grad_norm": 0.6495705246925354, "learning_rate": 0.00017301339054002785, "loss": 2.8885, "step": 39124 }, { "epoch": 1.92, "grad_norm": 0.634657621383667, "learning_rate": 0.00017299944183795724, "loss": 3.1106, "step": 39125 }, { "epoch": 1.92, "grad_norm": 0.6378446817398071, "learning_rate": 0.00017298549347038178, "loss": 2.7874, "step": 39126 }, { "epoch": 1.92, "grad_norm": 0.6395211815834045, "learning_rate": 0.00017297154543733796, "loss": 3.0792, "step": 39127 }, { "epoch": 1.92, "grad_norm": 0.6783758997917175, "learning_rate": 0.0001729575977388627, "loss": 2.8279, "step": 39128 }, { "epoch": 1.92, "grad_norm": 0.6913485527038574, "learning_rate": 0.00017294365037499263, "loss": 3.101, "step": 39129 }, { "epoch": 1.92, "grad_norm": 0.659443199634552, "learning_rate": 0.00017292970334576466, "loss": 3.1545, "step": 39130 }, { "epoch": 1.92, "grad_norm": 0.6611900329589844, "learning_rate": 0.00017291575665121534, "loss": 2.865, "step": 39131 }, { "epoch": 1.92, "grad_norm": 0.6406236290931702, "learning_rate": 0.0001729018102913814, "loss": 3.0973, "step": 39132 }, { "epoch": 1.92, "grad_norm": 0.6783467531204224, "learning_rate": 0.0001728878642662997, "loss": 3.074, "step": 39133 }, { "epoch": 1.92, "grad_norm": 0.6730901002883911, "learning_rate": 0.0001728739185760068, "loss": 2.8599, "step": 39134 }, { "epoch": 1.92, "grad_norm": 0.6598025560379028, "learning_rate": 0.00017285997322053954, "loss": 3.1268, "step": 39135 }, { "epoch": 1.92, "grad_norm": 0.6816617846488953, "learning_rate": 0.00017284602819993476, "loss": 2.944, "step": 39136 }, { "epoch": 1.92, "grad_norm": 0.6511579155921936, "learning_rate": 0.00017283208351422908, "loss": 3.1893, "step": 39137 }, { "epoch": 1.92, "grad_norm": 0.693321943283081, "learning_rate": 0.00017281813916345917, "loss": 3.0119, "step": 39138 }, { "epoch": 1.92, "grad_norm": 0.6485716700553894, "learning_rate": 0.00017280419514766174, "loss": 2.7844, "step": 39139 }, { "epoch": 1.92, "grad_norm": 0.6522268056869507, "learning_rate": 0.00017279025146687353, "loss": 2.9277, "step": 39140 }, { "epoch": 1.92, "grad_norm": 0.6479056477546692, "learning_rate": 0.0001727763081211315, "loss": 2.686, "step": 39141 }, { "epoch": 1.92, "grad_norm": 0.6812885999679565, "learning_rate": 0.00017276236511047202, "loss": 2.8859, "step": 39142 }, { "epoch": 1.92, "grad_norm": 0.6709456443786621, "learning_rate": 0.0001727484224349321, "loss": 2.9349, "step": 39143 }, { "epoch": 1.92, "grad_norm": 0.6877416372299194, "learning_rate": 0.00017273448009454825, "loss": 2.9836, "step": 39144 }, { "epoch": 1.92, "grad_norm": 0.6465352177619934, "learning_rate": 0.00017272053808935743, "loss": 3.0815, "step": 39145 }, { "epoch": 1.92, "grad_norm": 0.6705036163330078, "learning_rate": 0.0001727065964193962, "loss": 3.1177, "step": 39146 }, { "epoch": 1.92, "grad_norm": 0.6689993739128113, "learning_rate": 0.00017269265508470112, "loss": 3.0918, "step": 39147 }, { "epoch": 1.92, "grad_norm": 0.6450504660606384, "learning_rate": 0.00017267871408530928, "loss": 3.0432, "step": 39148 }, { "epoch": 1.92, "grad_norm": 0.6703768968582153, "learning_rate": 0.00017266477342125702, "loss": 3.0998, "step": 39149 }, { "epoch": 1.92, "grad_norm": 0.6024968028068542, "learning_rate": 0.00017265083309258127, "loss": 2.9523, "step": 39150 }, { "epoch": 1.92, "grad_norm": 0.6327059268951416, "learning_rate": 0.00017263689309931886, "loss": 3.127, "step": 39151 }, { "epoch": 1.92, "grad_norm": 0.709504246711731, "learning_rate": 0.00017262295344150634, "loss": 3.1062, "step": 39152 }, { "epoch": 1.92, "grad_norm": 0.6775631308555603, "learning_rate": 0.00017260901411918043, "loss": 3.0224, "step": 39153 }, { "epoch": 1.92, "grad_norm": 0.6679512858390808, "learning_rate": 0.00017259507513237777, "loss": 2.8959, "step": 39154 }, { "epoch": 1.92, "grad_norm": 0.7322613596916199, "learning_rate": 0.00017258113648113517, "loss": 2.9774, "step": 39155 }, { "epoch": 1.92, "grad_norm": 0.6525643467903137, "learning_rate": 0.00017256719816548943, "loss": 3.0671, "step": 39156 }, { "epoch": 1.92, "grad_norm": 0.662835419178009, "learning_rate": 0.00017255326018547707, "loss": 2.9745, "step": 39157 }, { "epoch": 1.92, "grad_norm": 0.7078630328178406, "learning_rate": 0.000172539322541135, "loss": 3.0263, "step": 39158 }, { "epoch": 1.92, "grad_norm": 0.6778189539909363, "learning_rate": 0.0001725253852324998, "loss": 3.1786, "step": 39159 }, { "epoch": 1.92, "grad_norm": 0.7059077024459839, "learning_rate": 0.00017251144825960812, "loss": 2.7726, "step": 39160 }, { "epoch": 1.92, "grad_norm": 0.6346893310546875, "learning_rate": 0.00017249751162249686, "loss": 2.9413, "step": 39161 }, { "epoch": 1.92, "grad_norm": 0.6974074840545654, "learning_rate": 0.00017248357532120248, "loss": 3.048, "step": 39162 }, { "epoch": 1.92, "grad_norm": 0.6645182967185974, "learning_rate": 0.00017246963935576196, "loss": 2.7881, "step": 39163 }, { "epoch": 1.92, "grad_norm": 0.6341907978057861, "learning_rate": 0.0001724557037262117, "loss": 3.1263, "step": 39164 }, { "epoch": 1.92, "grad_norm": 0.625954806804657, "learning_rate": 0.00017244176843258862, "loss": 2.7248, "step": 39165 }, { "epoch": 1.92, "grad_norm": 0.6558056473731995, "learning_rate": 0.0001724278334749295, "loss": 3.1032, "step": 39166 }, { "epoch": 1.92, "grad_norm": 0.6719792485237122, "learning_rate": 0.00017241389885327087, "loss": 2.9548, "step": 39167 }, { "epoch": 1.92, "grad_norm": 0.682371199131012, "learning_rate": 0.00017239996456764952, "loss": 3.1351, "step": 39168 }, { "epoch": 1.92, "grad_norm": 0.6656419634819031, "learning_rate": 0.00017238603061810195, "loss": 2.8981, "step": 39169 }, { "epoch": 1.92, "grad_norm": 0.6694563031196594, "learning_rate": 0.00017237209700466502, "loss": 3.2168, "step": 39170 }, { "epoch": 1.92, "grad_norm": 0.7101410627365112, "learning_rate": 0.00017235816372737558, "loss": 2.9831, "step": 39171 }, { "epoch": 1.92, "grad_norm": 0.6762426495552063, "learning_rate": 0.00017234423078627003, "loss": 2.8293, "step": 39172 }, { "epoch": 1.92, "grad_norm": 0.617826521396637, "learning_rate": 0.00017233029818138532, "loss": 2.8919, "step": 39173 }, { "epoch": 1.92, "grad_norm": 0.7168938517570496, "learning_rate": 0.00017231636591275807, "loss": 2.9602, "step": 39174 }, { "epoch": 1.92, "grad_norm": 0.6657389998435974, "learning_rate": 0.00017230243398042475, "loss": 2.9613, "step": 39175 }, { "epoch": 1.92, "grad_norm": 0.6456219553947449, "learning_rate": 0.0001722885023844224, "loss": 2.8773, "step": 39176 }, { "epoch": 1.92, "grad_norm": 0.6605064868927002, "learning_rate": 0.00017227457112478745, "loss": 3.088, "step": 39177 }, { "epoch": 1.92, "grad_norm": 0.6815346479415894, "learning_rate": 0.0001722606402015568, "loss": 3.0484, "step": 39178 }, { "epoch": 1.92, "grad_norm": 0.6612982153892517, "learning_rate": 0.0001722467096147669, "loss": 3.0271, "step": 39179 }, { "epoch": 1.92, "grad_norm": 0.6247538328170776, "learning_rate": 0.00017223277936445475, "loss": 2.8698, "step": 39180 }, { "epoch": 1.92, "grad_norm": 0.6820695996284485, "learning_rate": 0.00017221884945065687, "loss": 3.0734, "step": 39181 }, { "epoch": 1.92, "grad_norm": 0.6383649110794067, "learning_rate": 0.0001722049198734098, "loss": 3.1281, "step": 39182 }, { "epoch": 1.92, "grad_norm": 0.6495553255081177, "learning_rate": 0.00017219099063275052, "loss": 3.0078, "step": 39183 }, { "epoch": 1.92, "grad_norm": 0.6677046418190002, "learning_rate": 0.0001721770617287154, "loss": 3.0541, "step": 39184 }, { "epoch": 1.92, "grad_norm": 0.6571451425552368, "learning_rate": 0.00017216313316134135, "loss": 2.964, "step": 39185 }, { "epoch": 1.92, "grad_norm": 0.6236145496368408, "learning_rate": 0.0001721492049306651, "loss": 2.8809, "step": 39186 }, { "epoch": 1.92, "grad_norm": 0.6534735560417175, "learning_rate": 0.00017213527703672314, "loss": 2.8744, "step": 39187 }, { "epoch": 1.92, "grad_norm": 0.6529717445373535, "learning_rate": 0.00017212134947955235, "loss": 3.0686, "step": 39188 }, { "epoch": 1.92, "grad_norm": 0.6746391654014587, "learning_rate": 0.00017210742225918935, "loss": 3.1402, "step": 39189 }, { "epoch": 1.92, "grad_norm": 0.6508035659790039, "learning_rate": 0.00017209349537567072, "loss": 2.9857, "step": 39190 }, { "epoch": 1.92, "grad_norm": 0.6782659888267517, "learning_rate": 0.00017207956882903314, "loss": 3.0049, "step": 39191 }, { "epoch": 1.92, "grad_norm": 0.6732063293457031, "learning_rate": 0.00017206564261931334, "loss": 2.904, "step": 39192 }, { "epoch": 1.92, "grad_norm": 0.719319760799408, "learning_rate": 0.00017205171674654815, "loss": 3.0394, "step": 39193 }, { "epoch": 1.92, "grad_norm": 0.6435275673866272, "learning_rate": 0.00017203779121077397, "loss": 2.911, "step": 39194 }, { "epoch": 1.92, "grad_norm": 0.6958181858062744, "learning_rate": 0.00017202386601202776, "loss": 3.1103, "step": 39195 }, { "epoch": 1.92, "grad_norm": 0.6404264569282532, "learning_rate": 0.00017200994115034606, "loss": 2.9457, "step": 39196 }, { "epoch": 1.92, "grad_norm": 0.6635984778404236, "learning_rate": 0.00017199601662576543, "loss": 2.8922, "step": 39197 }, { "epoch": 1.92, "grad_norm": 0.6762363910675049, "learning_rate": 0.00017198209243832278, "loss": 3.0213, "step": 39198 }, { "epoch": 1.92, "grad_norm": 0.6661695241928101, "learning_rate": 0.0001719681685880545, "loss": 2.809, "step": 39199 }, { "epoch": 1.92, "grad_norm": 0.6384485363960266, "learning_rate": 0.00017195424507499755, "loss": 3.1867, "step": 39200 }, { "epoch": 1.92, "grad_norm": 0.6842610836029053, "learning_rate": 0.0001719403218991884, "loss": 2.9495, "step": 39201 }, { "epoch": 1.92, "grad_norm": 0.6691260933876038, "learning_rate": 0.00017192639906066386, "loss": 3.0114, "step": 39202 }, { "epoch": 1.92, "grad_norm": 0.6943384408950806, "learning_rate": 0.00017191247655946057, "loss": 3.1103, "step": 39203 }, { "epoch": 1.92, "grad_norm": 0.6369804739952087, "learning_rate": 0.00017189855439561503, "loss": 3.121, "step": 39204 }, { "epoch": 1.92, "grad_norm": 0.6395381689071655, "learning_rate": 0.00017188463256916417, "loss": 2.9585, "step": 39205 }, { "epoch": 1.92, "grad_norm": 0.621781051158905, "learning_rate": 0.00017187071108014447, "loss": 2.9129, "step": 39206 }, { "epoch": 1.92, "grad_norm": 0.6474804282188416, "learning_rate": 0.00017185678992859258, "loss": 2.9731, "step": 39207 }, { "epoch": 1.92, "grad_norm": 0.6578860282897949, "learning_rate": 0.0001718428691145454, "loss": 2.8956, "step": 39208 }, { "epoch": 1.92, "grad_norm": 0.7093460559844971, "learning_rate": 0.00017182894863803928, "loss": 2.9726, "step": 39209 }, { "epoch": 1.92, "grad_norm": 0.6357724070549011, "learning_rate": 0.00017181502849911121, "loss": 3.2573, "step": 39210 }, { "epoch": 1.92, "grad_norm": 0.6507198214530945, "learning_rate": 0.0001718011086977977, "loss": 2.9724, "step": 39211 }, { "epoch": 1.92, "grad_norm": 0.6744953393936157, "learning_rate": 0.00017178718923413524, "loss": 3.127, "step": 39212 }, { "epoch": 1.92, "grad_norm": 0.6369718313217163, "learning_rate": 0.00017177327010816076, "loss": 2.9405, "step": 39213 }, { "epoch": 1.92, "grad_norm": 0.6532459259033203, "learning_rate": 0.00017175935131991067, "loss": 2.9442, "step": 39214 }, { "epoch": 1.92, "grad_norm": 0.6640715003013611, "learning_rate": 0.00017174543286942192, "loss": 2.7625, "step": 39215 }, { "epoch": 1.92, "grad_norm": 0.7218378782272339, "learning_rate": 0.0001717315147567309, "loss": 3.116, "step": 39216 }, { "epoch": 1.92, "grad_norm": 0.7001231908798218, "learning_rate": 0.0001717175969818745, "loss": 2.9236, "step": 39217 }, { "epoch": 1.92, "grad_norm": 0.6841039657592773, "learning_rate": 0.00017170367954488926, "loss": 2.7602, "step": 39218 }, { "epoch": 1.92, "grad_norm": 0.67069011926651, "learning_rate": 0.00017168976244581174, "loss": 3.0251, "step": 39219 }, { "epoch": 1.92, "grad_norm": 0.6532872319221497, "learning_rate": 0.0001716758456846788, "loss": 3.0348, "step": 39220 }, { "epoch": 1.92, "grad_norm": 0.6450393795967102, "learning_rate": 0.00017166192926152684, "loss": 3.1042, "step": 39221 }, { "epoch": 1.92, "grad_norm": 0.6509761214256287, "learning_rate": 0.0001716480131763927, "loss": 2.8918, "step": 39222 }, { "epoch": 1.92, "grad_norm": 0.6896138191223145, "learning_rate": 0.00017163409742931303, "loss": 3.0114, "step": 39223 }, { "epoch": 1.92, "grad_norm": 0.6828722953796387, "learning_rate": 0.00017162018202032442, "loss": 3.0185, "step": 39224 }, { "epoch": 1.92, "grad_norm": 0.6410359144210815, "learning_rate": 0.0001716062669494636, "loss": 3.0876, "step": 39225 }, { "epoch": 1.92, "grad_norm": 0.6558801531791687, "learning_rate": 0.0001715923522167672, "loss": 2.8841, "step": 39226 }, { "epoch": 1.92, "grad_norm": 0.6510255336761475, "learning_rate": 0.00017157843782227169, "loss": 2.9658, "step": 39227 }, { "epoch": 1.92, "grad_norm": 0.6774110794067383, "learning_rate": 0.00017156452376601398, "loss": 3.0839, "step": 39228 }, { "epoch": 1.92, "grad_norm": 0.6347213983535767, "learning_rate": 0.0001715506100480305, "loss": 3.1037, "step": 39229 }, { "epoch": 1.92, "grad_norm": 0.6718534827232361, "learning_rate": 0.00017153669666835809, "loss": 3.0421, "step": 39230 }, { "epoch": 1.92, "grad_norm": 0.674842894077301, "learning_rate": 0.00017152278362703318, "loss": 3.0798, "step": 39231 }, { "epoch": 1.92, "grad_norm": 0.6837745308876038, "learning_rate": 0.00017150887092409265, "loss": 2.7996, "step": 39232 }, { "epoch": 1.92, "grad_norm": 0.6717110276222229, "learning_rate": 0.00017149495855957303, "loss": 2.842, "step": 39233 }, { "epoch": 1.92, "grad_norm": 0.6553992629051208, "learning_rate": 0.00017148104653351087, "loss": 2.9463, "step": 39234 }, { "epoch": 1.92, "grad_norm": 0.702616810798645, "learning_rate": 0.00017146713484594297, "loss": 2.8664, "step": 39235 }, { "epoch": 1.92, "grad_norm": 0.6626030206680298, "learning_rate": 0.0001714532234969058, "loss": 3.052, "step": 39236 }, { "epoch": 1.92, "grad_norm": 0.6390082240104675, "learning_rate": 0.00017143931248643607, "loss": 3.0915, "step": 39237 }, { "epoch": 1.92, "grad_norm": 0.6931836009025574, "learning_rate": 0.0001714254018145706, "loss": 3.2053, "step": 39238 }, { "epoch": 1.92, "grad_norm": 0.6288201212882996, "learning_rate": 0.0001714114914813459, "loss": 3.1003, "step": 39239 }, { "epoch": 1.92, "grad_norm": 0.6410542130470276, "learning_rate": 0.00017139758148679854, "loss": 3.0281, "step": 39240 }, { "epoch": 1.92, "grad_norm": 0.664252758026123, "learning_rate": 0.0001713836718309651, "loss": 3.0345, "step": 39241 }, { "epoch": 1.92, "grad_norm": 0.6826432943344116, "learning_rate": 0.00017136976251388232, "loss": 3.0711, "step": 39242 }, { "epoch": 1.92, "grad_norm": 0.6907158493995667, "learning_rate": 0.00017135585353558695, "loss": 2.7451, "step": 39243 }, { "epoch": 1.92, "grad_norm": 0.6534987092018127, "learning_rate": 0.00017134194489611538, "loss": 3.0869, "step": 39244 }, { "epoch": 1.92, "grad_norm": 0.6228570342063904, "learning_rate": 0.0001713280365955045, "loss": 3.0009, "step": 39245 }, { "epoch": 1.92, "grad_norm": 0.697776198387146, "learning_rate": 0.00017131412863379068, "loss": 3.175, "step": 39246 }, { "epoch": 1.92, "grad_norm": 0.6710581183433533, "learning_rate": 0.0001713002210110108, "loss": 2.8232, "step": 39247 }, { "epoch": 1.92, "grad_norm": 0.7085484862327576, "learning_rate": 0.00017128631372720138, "loss": 2.9369, "step": 39248 }, { "epoch": 1.92, "grad_norm": 0.666691780090332, "learning_rate": 0.00017127240678239884, "loss": 2.9581, "step": 39249 }, { "epoch": 1.92, "grad_norm": 0.686436653137207, "learning_rate": 0.0001712585001766402, "loss": 2.9945, "step": 39250 }, { "epoch": 1.92, "grad_norm": 0.6662260890007019, "learning_rate": 0.00017124459390996174, "loss": 2.924, "step": 39251 }, { "epoch": 1.92, "grad_norm": 0.6701951026916504, "learning_rate": 0.0001712306879824003, "loss": 3.0694, "step": 39252 }, { "epoch": 1.92, "grad_norm": 0.6563501358032227, "learning_rate": 0.00017121678239399252, "loss": 3.0597, "step": 39253 }, { "epoch": 1.92, "grad_norm": 0.6329666972160339, "learning_rate": 0.00017120287714477494, "loss": 2.7156, "step": 39254 }, { "epoch": 1.92, "grad_norm": 0.6433852314949036, "learning_rate": 0.0001711889722347842, "loss": 2.9309, "step": 39255 }, { "epoch": 1.92, "grad_norm": 0.7456541657447815, "learning_rate": 0.00017117506766405684, "loss": 2.9767, "step": 39256 }, { "epoch": 1.92, "grad_norm": 0.6476452350616455, "learning_rate": 0.0001711611634326295, "loss": 2.9284, "step": 39257 }, { "epoch": 1.92, "grad_norm": 0.6435794830322266, "learning_rate": 0.00017114725954053904, "loss": 3.1301, "step": 39258 }, { "epoch": 1.92, "grad_norm": 0.6286764144897461, "learning_rate": 0.0001711333559878217, "loss": 3.2672, "step": 39259 }, { "epoch": 1.92, "grad_norm": 0.6440817713737488, "learning_rate": 0.00017111945277451445, "loss": 3.0443, "step": 39260 }, { "epoch": 1.92, "grad_norm": 0.6571938991546631, "learning_rate": 0.0001711055499006538, "loss": 2.9511, "step": 39261 }, { "epoch": 1.92, "grad_norm": 0.7274107933044434, "learning_rate": 0.00017109164736627618, "loss": 2.9802, "step": 39262 }, { "epoch": 1.92, "grad_norm": 0.6378130912780762, "learning_rate": 0.00017107774517141847, "loss": 3.0211, "step": 39263 }, { "epoch": 1.92, "grad_norm": 0.6885344386100769, "learning_rate": 0.00017106384331611705, "loss": 2.9844, "step": 39264 }, { "epoch": 1.92, "grad_norm": 0.6548222303390503, "learning_rate": 0.0001710499418004088, "loss": 3.0318, "step": 39265 }, { "epoch": 1.92, "grad_norm": 0.681775689125061, "learning_rate": 0.00017103604062433, "loss": 2.7903, "step": 39266 }, { "epoch": 1.92, "grad_norm": 0.674473226070404, "learning_rate": 0.00017102213978791763, "loss": 3.0038, "step": 39267 }, { "epoch": 1.92, "grad_norm": 0.6563646793365479, "learning_rate": 0.00017100823929120797, "loss": 2.8295, "step": 39268 }, { "epoch": 1.92, "grad_norm": 0.6491327881813049, "learning_rate": 0.00017099433913423791, "loss": 3.0033, "step": 39269 }, { "epoch": 1.92, "grad_norm": 0.6606160402297974, "learning_rate": 0.00017098043931704394, "loss": 3.0782, "step": 39270 }, { "epoch": 1.92, "grad_norm": 0.6784059405326843, "learning_rate": 0.00017096653983966252, "loss": 3.1224, "step": 39271 }, { "epoch": 1.92, "grad_norm": 0.6741899251937866, "learning_rate": 0.00017095264070213055, "loss": 2.8379, "step": 39272 }, { "epoch": 1.92, "grad_norm": 0.6609475016593933, "learning_rate": 0.00017093874190448434, "loss": 3.0115, "step": 39273 }, { "epoch": 1.92, "grad_norm": 0.7126678228378296, "learning_rate": 0.00017092484344676068, "loss": 2.9805, "step": 39274 }, { "epoch": 1.92, "grad_norm": 0.6484171152114868, "learning_rate": 0.00017091094532899624, "loss": 2.8915, "step": 39275 }, { "epoch": 1.92, "grad_norm": 0.6500241160392761, "learning_rate": 0.0001708970475512275, "loss": 2.8879, "step": 39276 }, { "epoch": 1.92, "grad_norm": 0.6842443943023682, "learning_rate": 0.00017088315011349115, "loss": 3.0103, "step": 39277 }, { "epoch": 1.92, "grad_norm": 0.6493595838546753, "learning_rate": 0.00017086925301582358, "loss": 2.9825, "step": 39278 }, { "epoch": 1.92, "grad_norm": 0.6276535391807556, "learning_rate": 0.00017085535625826156, "loss": 3.0221, "step": 39279 }, { "epoch": 1.93, "grad_norm": 0.693079948425293, "learning_rate": 0.00017084145984084175, "loss": 3.1203, "step": 39280 }, { "epoch": 1.93, "grad_norm": 0.6312422156333923, "learning_rate": 0.0001708275637636006, "loss": 3.001, "step": 39281 }, { "epoch": 1.93, "grad_norm": 0.6259576082229614, "learning_rate": 0.0001708136680265749, "loss": 2.9753, "step": 39282 }, { "epoch": 1.93, "grad_norm": 0.6311910152435303, "learning_rate": 0.00017079977262980116, "loss": 3.1335, "step": 39283 }, { "epoch": 1.93, "grad_norm": 0.6822161674499512, "learning_rate": 0.0001707858775733158, "loss": 2.8774, "step": 39284 }, { "epoch": 1.93, "grad_norm": 0.7114952802658081, "learning_rate": 0.0001707719828571557, "loss": 3.1555, "step": 39285 }, { "epoch": 1.93, "grad_norm": 0.6649879813194275, "learning_rate": 0.00017075808848135722, "loss": 3.0921, "step": 39286 }, { "epoch": 1.93, "grad_norm": 0.6248159408569336, "learning_rate": 0.00017074419444595716, "loss": 2.9811, "step": 39287 }, { "epoch": 1.93, "grad_norm": 0.6573299169540405, "learning_rate": 0.00017073030075099191, "loss": 2.8717, "step": 39288 }, { "epoch": 1.93, "grad_norm": 0.7020032405853271, "learning_rate": 0.00017071640739649814, "loss": 2.8755, "step": 39289 }, { "epoch": 1.93, "grad_norm": 0.6383627653121948, "learning_rate": 0.00017070251438251264, "loss": 2.9567, "step": 39290 }, { "epoch": 1.93, "grad_norm": 0.7252497673034668, "learning_rate": 0.00017068862170907181, "loss": 2.9716, "step": 39291 }, { "epoch": 1.93, "grad_norm": 0.666100025177002, "learning_rate": 0.00017067472937621229, "loss": 3.1238, "step": 39292 }, { "epoch": 1.93, "grad_norm": 0.6566705107688904, "learning_rate": 0.00017066083738397048, "loss": 3.0706, "step": 39293 }, { "epoch": 1.93, "grad_norm": 0.657951295375824, "learning_rate": 0.00017064694573238316, "loss": 3.0127, "step": 39294 }, { "epoch": 1.93, "grad_norm": 0.6560387015342712, "learning_rate": 0.000170633054421487, "loss": 2.9246, "step": 39295 }, { "epoch": 1.93, "grad_norm": 0.6837169528007507, "learning_rate": 0.00017061916345131834, "loss": 3.0309, "step": 39296 }, { "epoch": 1.93, "grad_norm": 0.7252517938613892, "learning_rate": 0.00017060527282191406, "loss": 2.6591, "step": 39297 }, { "epoch": 1.93, "grad_norm": 0.6759359240531921, "learning_rate": 0.00017059138253331055, "loss": 3.1497, "step": 39298 }, { "epoch": 1.93, "grad_norm": 0.631263256072998, "learning_rate": 0.00017057749258554434, "loss": 3.1716, "step": 39299 }, { "epoch": 1.93, "grad_norm": 0.6345106959342957, "learning_rate": 0.0001705636029786522, "loss": 2.8964, "step": 39300 }, { "epoch": 1.93, "grad_norm": 0.6319267153739929, "learning_rate": 0.00017054971371267054, "loss": 3.0213, "step": 39301 }, { "epoch": 1.93, "grad_norm": 0.6083059906959534, "learning_rate": 0.00017053582478763614, "loss": 2.8726, "step": 39302 }, { "epoch": 1.93, "grad_norm": 0.6584345698356628, "learning_rate": 0.0001705219362035853, "loss": 2.9741, "step": 39303 }, { "epoch": 1.93, "grad_norm": 0.720427930355072, "learning_rate": 0.0001705080479605549, "loss": 2.8017, "step": 39304 }, { "epoch": 1.93, "grad_norm": 0.6475998759269714, "learning_rate": 0.00017049416005858142, "loss": 3.1804, "step": 39305 }, { "epoch": 1.93, "grad_norm": 0.6432875394821167, "learning_rate": 0.00017048027249770122, "loss": 2.8589, "step": 39306 }, { "epoch": 1.93, "grad_norm": 0.645972490310669, "learning_rate": 0.0001704663852779512, "loss": 2.9266, "step": 39307 }, { "epoch": 1.93, "grad_norm": 0.6561703681945801, "learning_rate": 0.00017045249839936768, "loss": 3.0237, "step": 39308 }, { "epoch": 1.93, "grad_norm": 0.6930022239685059, "learning_rate": 0.00017043861186198732, "loss": 2.9589, "step": 39309 }, { "epoch": 1.93, "grad_norm": 0.6722618937492371, "learning_rate": 0.00017042472566584688, "loss": 3.1136, "step": 39310 }, { "epoch": 1.93, "grad_norm": 0.6967858672142029, "learning_rate": 0.00017041083981098263, "loss": 2.8958, "step": 39311 }, { "epoch": 1.93, "grad_norm": 0.649520218372345, "learning_rate": 0.00017039695429743143, "loss": 3.0567, "step": 39312 }, { "epoch": 1.93, "grad_norm": 0.6798179745674133, "learning_rate": 0.0001703830691252297, "loss": 3.0234, "step": 39313 }, { "epoch": 1.93, "grad_norm": 0.6915568709373474, "learning_rate": 0.00017036918429441387, "loss": 2.9978, "step": 39314 }, { "epoch": 1.93, "grad_norm": 0.6442030072212219, "learning_rate": 0.0001703552998050208, "loss": 3.2237, "step": 39315 }, { "epoch": 1.93, "grad_norm": 0.6549793481826782, "learning_rate": 0.00017034141565708683, "loss": 3.2327, "step": 39316 }, { "epoch": 1.93, "grad_norm": 0.6851274371147156, "learning_rate": 0.0001703275318506487, "loss": 2.8293, "step": 39317 }, { "epoch": 1.93, "grad_norm": 0.6663837432861328, "learning_rate": 0.0001703136483857428, "loss": 2.8694, "step": 39318 }, { "epoch": 1.93, "grad_norm": 0.6201865077018738, "learning_rate": 0.00017029976526240588, "loss": 2.9923, "step": 39319 }, { "epoch": 1.93, "grad_norm": 0.6380475759506226, "learning_rate": 0.00017028588248067445, "loss": 2.9472, "step": 39320 }, { "epoch": 1.93, "grad_norm": 0.6493070721626282, "learning_rate": 0.0001702720000405849, "loss": 3.1009, "step": 39321 }, { "epoch": 1.93, "grad_norm": 0.6758432388305664, "learning_rate": 0.00017025811794217408, "loss": 2.8572, "step": 39322 }, { "epoch": 1.93, "grad_norm": 0.6773945093154907, "learning_rate": 0.00017024423618547824, "loss": 3.0127, "step": 39323 }, { "epoch": 1.93, "grad_norm": 0.6368323564529419, "learning_rate": 0.00017023035477053416, "loss": 2.8353, "step": 39324 }, { "epoch": 1.93, "grad_norm": 0.6623914241790771, "learning_rate": 0.00017021647369737844, "loss": 3.0751, "step": 39325 }, { "epoch": 1.93, "grad_norm": 0.6415942311286926, "learning_rate": 0.00017020259296604743, "loss": 2.9105, "step": 39326 }, { "epoch": 1.93, "grad_norm": 0.6625025868415833, "learning_rate": 0.00017018871257657808, "loss": 2.912, "step": 39327 }, { "epoch": 1.93, "grad_norm": 0.6630984544754028, "learning_rate": 0.00017017483252900641, "loss": 2.9823, "step": 39328 }, { "epoch": 1.93, "grad_norm": 0.637005090713501, "learning_rate": 0.00017016095282336926, "loss": 3.0265, "step": 39329 }, { "epoch": 1.93, "grad_norm": 0.6559372544288635, "learning_rate": 0.0001701470734597033, "loss": 3.1208, "step": 39330 }, { "epoch": 1.93, "grad_norm": 0.683228075504303, "learning_rate": 0.0001701331944380448, "loss": 3.2746, "step": 39331 }, { "epoch": 1.93, "grad_norm": 0.6639457941055298, "learning_rate": 0.00017011931575843062, "loss": 3.0369, "step": 39332 }, { "epoch": 1.93, "grad_norm": 0.691205620765686, "learning_rate": 0.00017010543742089707, "loss": 2.921, "step": 39333 }, { "epoch": 1.93, "grad_norm": 0.6341986656188965, "learning_rate": 0.00017009155942548088, "loss": 3.1622, "step": 39334 }, { "epoch": 1.93, "grad_norm": 0.6385431289672852, "learning_rate": 0.00017007768177221857, "loss": 2.8519, "step": 39335 }, { "epoch": 1.93, "grad_norm": 0.6719568967819214, "learning_rate": 0.00017006380446114646, "loss": 2.9594, "step": 39336 }, { "epoch": 1.93, "grad_norm": 0.6813459396362305, "learning_rate": 0.00017004992749230144, "loss": 2.9986, "step": 39337 }, { "epoch": 1.93, "grad_norm": 0.7117205262184143, "learning_rate": 0.0001700360508657198, "loss": 3.0113, "step": 39338 }, { "epoch": 1.93, "grad_norm": 0.6474804878234863, "learning_rate": 0.00017002217458143813, "loss": 2.8304, "step": 39339 }, { "epoch": 1.93, "grad_norm": 0.6792690753936768, "learning_rate": 0.00017000829863949316, "loss": 2.9919, "step": 39340 }, { "epoch": 1.93, "grad_norm": 0.6472276449203491, "learning_rate": 0.00016999442303992137, "loss": 3.0074, "step": 39341 }, { "epoch": 1.93, "grad_norm": 0.6135537028312683, "learning_rate": 0.0001699805477827592, "loss": 2.929, "step": 39342 }, { "epoch": 1.93, "grad_norm": 0.6506457924842834, "learning_rate": 0.00016996667286804318, "loss": 2.8383, "step": 39343 }, { "epoch": 1.93, "grad_norm": 0.6891106963157654, "learning_rate": 0.00016995279829580998, "loss": 2.7531, "step": 39344 }, { "epoch": 1.93, "grad_norm": 0.6155516505241394, "learning_rate": 0.000169938924066096, "loss": 2.9584, "step": 39345 }, { "epoch": 1.93, "grad_norm": 0.6519812941551208, "learning_rate": 0.00016992505017893782, "loss": 3.0768, "step": 39346 }, { "epoch": 1.93, "grad_norm": 0.6716668605804443, "learning_rate": 0.0001699111766343722, "loss": 2.9624, "step": 39347 }, { "epoch": 1.93, "grad_norm": 0.6471678614616394, "learning_rate": 0.00016989730343243536, "loss": 3.1556, "step": 39348 }, { "epoch": 1.93, "grad_norm": 0.6281684637069702, "learning_rate": 0.0001698834305731641, "loss": 3.1132, "step": 39349 }, { "epoch": 1.93, "grad_norm": 0.621414840221405, "learning_rate": 0.00016986955805659487, "loss": 2.9695, "step": 39350 }, { "epoch": 1.93, "grad_norm": 0.6490262746810913, "learning_rate": 0.000169855685882764, "loss": 2.8509, "step": 39351 }, { "epoch": 1.93, "grad_norm": 0.687820553779602, "learning_rate": 0.00016984181405170837, "loss": 3.0534, "step": 39352 }, { "epoch": 1.93, "grad_norm": 0.6453794240951538, "learning_rate": 0.00016982794256346424, "loss": 3.0, "step": 39353 }, { "epoch": 1.93, "grad_norm": 0.7020411491394043, "learning_rate": 0.00016981407141806836, "loss": 3.0087, "step": 39354 }, { "epoch": 1.93, "grad_norm": 0.6691149473190308, "learning_rate": 0.00016980020061555702, "loss": 2.934, "step": 39355 }, { "epoch": 1.93, "grad_norm": 0.6700564622879028, "learning_rate": 0.00016978633015596708, "loss": 2.7317, "step": 39356 }, { "epoch": 1.93, "grad_norm": 0.6754282712936401, "learning_rate": 0.0001697724600393348, "loss": 3.1237, "step": 39357 }, { "epoch": 1.93, "grad_norm": 0.6140692830085754, "learning_rate": 0.00016975859026569672, "loss": 3.0405, "step": 39358 }, { "epoch": 1.93, "grad_norm": 0.6934306025505066, "learning_rate": 0.00016974472083508958, "loss": 3.0548, "step": 39359 }, { "epoch": 1.93, "grad_norm": 0.7280018925666809, "learning_rate": 0.00016973085174754962, "loss": 2.8646, "step": 39360 }, { "epoch": 1.93, "grad_norm": 0.664756178855896, "learning_rate": 0.00016971698300311357, "loss": 3.0039, "step": 39361 }, { "epoch": 1.93, "grad_norm": 0.6578002572059631, "learning_rate": 0.000169703114601818, "loss": 2.6559, "step": 39362 }, { "epoch": 1.93, "grad_norm": 0.661143958568573, "learning_rate": 0.0001696892465436994, "loss": 2.5943, "step": 39363 }, { "epoch": 1.93, "grad_norm": 0.6856162548065186, "learning_rate": 0.00016967537882879417, "loss": 3.0467, "step": 39364 }, { "epoch": 1.93, "grad_norm": 0.661816418170929, "learning_rate": 0.00016966151145713886, "loss": 3.0191, "step": 39365 }, { "epoch": 1.93, "grad_norm": 0.665028989315033, "learning_rate": 0.00016964764442877001, "loss": 2.9423, "step": 39366 }, { "epoch": 1.93, "grad_norm": 0.702292799949646, "learning_rate": 0.00016963377774372433, "loss": 3.2149, "step": 39367 }, { "epoch": 1.93, "grad_norm": 0.6974702477455139, "learning_rate": 0.00016961991140203802, "loss": 2.791, "step": 39368 }, { "epoch": 1.93, "grad_norm": 0.6458693146705627, "learning_rate": 0.0001696060454037479, "loss": 3.1692, "step": 39369 }, { "epoch": 1.93, "grad_norm": 0.6239611506462097, "learning_rate": 0.0001695921797488903, "loss": 3.1647, "step": 39370 }, { "epoch": 1.93, "grad_norm": 0.6828367710113525, "learning_rate": 0.00016957831443750184, "loss": 2.7982, "step": 39371 }, { "epoch": 1.93, "grad_norm": 0.6554055213928223, "learning_rate": 0.00016956444946961905, "loss": 2.841, "step": 39372 }, { "epoch": 1.93, "grad_norm": 0.6682385206222534, "learning_rate": 0.00016955058484527825, "loss": 3.153, "step": 39373 }, { "epoch": 1.93, "grad_norm": 0.6697953939437866, "learning_rate": 0.00016953672056451626, "loss": 3.0903, "step": 39374 }, { "epoch": 1.93, "grad_norm": 0.693271279335022, "learning_rate": 0.0001695228566273693, "loss": 2.9609, "step": 39375 }, { "epoch": 1.93, "grad_norm": 0.6608006358146667, "learning_rate": 0.000169508993033874, "loss": 3.1086, "step": 39376 }, { "epoch": 1.93, "grad_norm": 0.6983053088188171, "learning_rate": 0.00016949512978406704, "loss": 2.9174, "step": 39377 }, { "epoch": 1.93, "grad_norm": 0.6728823184967041, "learning_rate": 0.00016948126687798483, "loss": 3.0248, "step": 39378 }, { "epoch": 1.93, "grad_norm": 0.6789221167564392, "learning_rate": 0.00016946740431566376, "loss": 3.0388, "step": 39379 }, { "epoch": 1.93, "grad_norm": 0.619369387626648, "learning_rate": 0.00016945354209714036, "loss": 3.1008, "step": 39380 }, { "epoch": 1.93, "grad_norm": 0.6840798258781433, "learning_rate": 0.0001694396802224512, "loss": 3.2137, "step": 39381 }, { "epoch": 1.93, "grad_norm": 0.6583610773086548, "learning_rate": 0.00016942581869163292, "loss": 3.0252, "step": 39382 }, { "epoch": 1.93, "grad_norm": 0.6744231581687927, "learning_rate": 0.00016941195750472176, "loss": 2.9969, "step": 39383 }, { "epoch": 1.93, "grad_norm": 0.7209470868110657, "learning_rate": 0.00016939809666175452, "loss": 3.0379, "step": 39384 }, { "epoch": 1.93, "grad_norm": 0.6821282505989075, "learning_rate": 0.00016938423616276755, "loss": 2.6843, "step": 39385 }, { "epoch": 1.93, "grad_norm": 0.6183244585990906, "learning_rate": 0.00016937037600779729, "loss": 2.9519, "step": 39386 }, { "epoch": 1.93, "grad_norm": 0.6722292304039001, "learning_rate": 0.00016935651619688036, "loss": 2.8854, "step": 39387 }, { "epoch": 1.93, "grad_norm": 0.6663751006126404, "learning_rate": 0.00016934265673005313, "loss": 2.9347, "step": 39388 }, { "epoch": 1.93, "grad_norm": 0.694520890712738, "learning_rate": 0.00016932879760735235, "loss": 2.9421, "step": 39389 }, { "epoch": 1.93, "grad_norm": 0.6110134124755859, "learning_rate": 0.0001693149388288142, "loss": 3.0589, "step": 39390 }, { "epoch": 1.93, "grad_norm": 0.6420994400978088, "learning_rate": 0.00016930108039447538, "loss": 3.1698, "step": 39391 }, { "epoch": 1.93, "grad_norm": 0.6498850584030151, "learning_rate": 0.00016928722230437248, "loss": 3.1342, "step": 39392 }, { "epoch": 1.93, "grad_norm": 0.7132246494293213, "learning_rate": 0.00016927336455854186, "loss": 2.945, "step": 39393 }, { "epoch": 1.93, "grad_norm": 0.6706508994102478, "learning_rate": 0.00016925950715702005, "loss": 2.9076, "step": 39394 }, { "epoch": 1.93, "grad_norm": 0.7032055854797363, "learning_rate": 0.00016924565009984342, "loss": 2.9207, "step": 39395 }, { "epoch": 1.93, "grad_norm": 0.6941987872123718, "learning_rate": 0.0001692317933870486, "loss": 3.0327, "step": 39396 }, { "epoch": 1.93, "grad_norm": 0.6361653208732605, "learning_rate": 0.00016921793701867217, "loss": 3.0947, "step": 39397 }, { "epoch": 1.93, "grad_norm": 0.6204395890235901, "learning_rate": 0.0001692040809947504, "loss": 2.8356, "step": 39398 }, { "epoch": 1.93, "grad_norm": 0.6345608830451965, "learning_rate": 0.00016919022531532, "loss": 3.0054, "step": 39399 }, { "epoch": 1.93, "grad_norm": 0.9609873294830322, "learning_rate": 0.00016917636998041742, "loss": 2.9836, "step": 39400 }, { "epoch": 1.93, "grad_norm": 0.6671732664108276, "learning_rate": 0.000169162514990079, "loss": 2.8382, "step": 39401 }, { "epoch": 1.93, "grad_norm": 0.6544909477233887, "learning_rate": 0.0001691486603443414, "loss": 2.9315, "step": 39402 }, { "epoch": 1.93, "grad_norm": 0.6705605387687683, "learning_rate": 0.00016913480604324095, "loss": 2.9288, "step": 39403 }, { "epoch": 1.93, "grad_norm": 0.6991368532180786, "learning_rate": 0.00016912095208681437, "loss": 2.7712, "step": 39404 }, { "epoch": 1.93, "grad_norm": 0.7298339009284973, "learning_rate": 0.00016910709847509786, "loss": 3.005, "step": 39405 }, { "epoch": 1.93, "grad_norm": 0.6603250503540039, "learning_rate": 0.00016909324520812823, "loss": 2.8994, "step": 39406 }, { "epoch": 1.93, "grad_norm": 0.6446770429611206, "learning_rate": 0.00016907939228594175, "loss": 2.9513, "step": 39407 }, { "epoch": 1.93, "grad_norm": 0.6649962067604065, "learning_rate": 0.00016906553970857488, "loss": 2.9068, "step": 39408 }, { "epoch": 1.93, "grad_norm": 0.6251447796821594, "learning_rate": 0.00016905168747606428, "loss": 2.8336, "step": 39409 }, { "epoch": 1.93, "grad_norm": 0.6825475692749023, "learning_rate": 0.00016903783558844617, "loss": 2.9488, "step": 39410 }, { "epoch": 1.93, "grad_norm": 0.6691312789916992, "learning_rate": 0.00016902398404575726, "loss": 3.0109, "step": 39411 }, { "epoch": 1.93, "grad_norm": 0.6389507055282593, "learning_rate": 0.00016901013284803407, "loss": 3.1017, "step": 39412 }, { "epoch": 1.93, "grad_norm": 0.6700393557548523, "learning_rate": 0.00016899628199531282, "loss": 2.999, "step": 39413 }, { "epoch": 1.93, "grad_norm": 0.6409572958946228, "learning_rate": 0.00016898243148763032, "loss": 2.8796, "step": 39414 }, { "epoch": 1.93, "grad_norm": 0.6346601843833923, "learning_rate": 0.00016896858132502287, "loss": 3.0772, "step": 39415 }, { "epoch": 1.93, "grad_norm": 0.6301149129867554, "learning_rate": 0.00016895473150752683, "loss": 3.0398, "step": 39416 }, { "epoch": 1.93, "grad_norm": 0.6977820992469788, "learning_rate": 0.00016894088203517894, "loss": 2.8781, "step": 39417 }, { "epoch": 1.93, "grad_norm": 0.9036363363265991, "learning_rate": 0.00016892703290801543, "loss": 2.9435, "step": 39418 }, { "epoch": 1.93, "grad_norm": 0.7055711150169373, "learning_rate": 0.00016891318412607298, "loss": 2.9065, "step": 39419 }, { "epoch": 1.93, "grad_norm": 0.6767593026161194, "learning_rate": 0.00016889933568938788, "loss": 2.8684, "step": 39420 }, { "epoch": 1.93, "grad_norm": 0.6799156665802002, "learning_rate": 0.0001688854875979968, "loss": 3.161, "step": 39421 }, { "epoch": 1.93, "grad_norm": 0.615311324596405, "learning_rate": 0.00016887163985193615, "loss": 3.001, "step": 39422 }, { "epoch": 1.93, "grad_norm": 0.6541116833686829, "learning_rate": 0.00016885779245124223, "loss": 3.0344, "step": 39423 }, { "epoch": 1.93, "grad_norm": 0.6408389806747437, "learning_rate": 0.00016884394539595172, "loss": 2.9967, "step": 39424 }, { "epoch": 1.93, "grad_norm": 0.7208709716796875, "learning_rate": 0.00016883009868610093, "loss": 2.9828, "step": 39425 }, { "epoch": 1.93, "grad_norm": 0.6646652221679688, "learning_rate": 0.00016881625232172652, "loss": 2.9533, "step": 39426 }, { "epoch": 1.93, "grad_norm": 0.637306809425354, "learning_rate": 0.00016880240630286477, "loss": 3.0291, "step": 39427 }, { "epoch": 1.93, "grad_norm": 0.6594432592391968, "learning_rate": 0.0001687885606295522, "loss": 3.0415, "step": 39428 }, { "epoch": 1.93, "grad_norm": 0.6402209401130676, "learning_rate": 0.0001687747153018256, "loss": 2.9242, "step": 39429 }, { "epoch": 1.93, "grad_norm": 0.6318802237510681, "learning_rate": 0.00016876087031972084, "loss": 2.9576, "step": 39430 }, { "epoch": 1.93, "grad_norm": 0.6425923109054565, "learning_rate": 0.00016874702568327489, "loss": 2.804, "step": 39431 }, { "epoch": 1.93, "grad_norm": 0.6498662829399109, "learning_rate": 0.00016873318139252382, "loss": 2.924, "step": 39432 }, { "epoch": 1.93, "grad_norm": 0.6536316871643066, "learning_rate": 0.0001687193374475043, "loss": 2.885, "step": 39433 }, { "epoch": 1.93, "grad_norm": 0.6774493455886841, "learning_rate": 0.0001687054938482529, "loss": 2.8245, "step": 39434 }, { "epoch": 1.93, "grad_norm": 0.6602688431739807, "learning_rate": 0.00016869165059480584, "loss": 3.3949, "step": 39435 }, { "epoch": 1.93, "grad_norm": 0.6157000660896301, "learning_rate": 0.00016867780768719983, "loss": 2.9687, "step": 39436 }, { "epoch": 1.93, "grad_norm": 0.6612973213195801, "learning_rate": 0.00016866396512547124, "loss": 2.9654, "step": 39437 }, { "epoch": 1.93, "grad_norm": 0.6561480760574341, "learning_rate": 0.0001686501229096563, "loss": 2.9176, "step": 39438 }, { "epoch": 1.93, "grad_norm": 0.680400550365448, "learning_rate": 0.0001686362810397918, "loss": 3.068, "step": 39439 }, { "epoch": 1.93, "grad_norm": 0.6502717733383179, "learning_rate": 0.00016862243951591395, "loss": 3.0433, "step": 39440 }, { "epoch": 1.93, "grad_norm": 0.641892671585083, "learning_rate": 0.00016860859833805943, "loss": 2.891, "step": 39441 }, { "epoch": 1.93, "grad_norm": 0.6740272641181946, "learning_rate": 0.00016859475750626444, "loss": 3.0464, "step": 39442 }, { "epoch": 1.93, "grad_norm": 0.6247941851615906, "learning_rate": 0.0001685809170205657, "loss": 2.8856, "step": 39443 }, { "epoch": 1.93, "grad_norm": 0.6555472016334534, "learning_rate": 0.00016856707688099956, "loss": 3.1368, "step": 39444 }, { "epoch": 1.93, "grad_norm": 0.6383205056190491, "learning_rate": 0.0001685532370876023, "loss": 2.6563, "step": 39445 }, { "epoch": 1.93, "grad_norm": 0.6372672319412231, "learning_rate": 0.0001685393976404106, "loss": 3.1764, "step": 39446 }, { "epoch": 1.93, "grad_norm": 0.6690751910209656, "learning_rate": 0.00016852555853946074, "loss": 2.9395, "step": 39447 }, { "epoch": 1.93, "grad_norm": 0.6416040658950806, "learning_rate": 0.00016851171978478927, "loss": 2.9809, "step": 39448 }, { "epoch": 1.93, "grad_norm": 0.6781452894210815, "learning_rate": 0.0001684978813764327, "loss": 3.1393, "step": 39449 }, { "epoch": 1.93, "grad_norm": 0.6450480818748474, "learning_rate": 0.00016848404331442732, "loss": 3.0979, "step": 39450 }, { "epoch": 1.93, "grad_norm": 0.6464423537254333, "learning_rate": 0.0001684702055988098, "loss": 3.0349, "step": 39451 }, { "epoch": 1.93, "grad_norm": 0.6569921374320984, "learning_rate": 0.00016845636822961638, "loss": 2.9834, "step": 39452 }, { "epoch": 1.93, "grad_norm": 0.6588374972343445, "learning_rate": 0.0001684425312068835, "loss": 3.0806, "step": 39453 }, { "epoch": 1.93, "grad_norm": 0.6816110014915466, "learning_rate": 0.00016842869453064776, "loss": 2.9424, "step": 39454 }, { "epoch": 1.93, "grad_norm": 0.6436490416526794, "learning_rate": 0.00016841485820094543, "loss": 2.9879, "step": 39455 }, { "epoch": 1.93, "grad_norm": 0.6433058381080627, "learning_rate": 0.00016840102221781316, "loss": 2.932, "step": 39456 }, { "epoch": 1.93, "grad_norm": 0.6444736123085022, "learning_rate": 0.00016838718658128714, "loss": 2.9911, "step": 39457 }, { "epoch": 1.93, "grad_norm": 0.6469634175300598, "learning_rate": 0.00016837335129140404, "loss": 2.8933, "step": 39458 }, { "epoch": 1.93, "grad_norm": 0.6529709100723267, "learning_rate": 0.00016835951634820023, "loss": 3.0116, "step": 39459 }, { "epoch": 1.93, "grad_norm": 0.6344585418701172, "learning_rate": 0.00016834568175171197, "loss": 2.9716, "step": 39460 }, { "epoch": 1.93, "grad_norm": 0.6618320941925049, "learning_rate": 0.00016833184750197596, "loss": 3.0455, "step": 39461 }, { "epoch": 1.93, "grad_norm": 0.6651561260223389, "learning_rate": 0.00016831801359902846, "loss": 3.1178, "step": 39462 }, { "epoch": 1.93, "grad_norm": 0.6415659785270691, "learning_rate": 0.00016830418004290594, "loss": 2.7613, "step": 39463 }, { "epoch": 1.93, "grad_norm": 0.7086910605430603, "learning_rate": 0.00016829034683364498, "loss": 2.8569, "step": 39464 }, { "epoch": 1.93, "grad_norm": 0.619654655456543, "learning_rate": 0.00016827651397128188, "loss": 3.1485, "step": 39465 }, { "epoch": 1.93, "grad_norm": 0.6941916942596436, "learning_rate": 0.00016826268145585314, "loss": 2.7932, "step": 39466 }, { "epoch": 1.93, "grad_norm": 0.7001242637634277, "learning_rate": 0.00016824884928739496, "loss": 2.9649, "step": 39467 }, { "epoch": 1.93, "grad_norm": 0.6473196148872375, "learning_rate": 0.00016823501746594403, "loss": 3.0718, "step": 39468 }, { "epoch": 1.93, "grad_norm": 0.6944223642349243, "learning_rate": 0.00016822118599153676, "loss": 3.0691, "step": 39469 }, { "epoch": 1.93, "grad_norm": 0.6287515759468079, "learning_rate": 0.00016820735486420944, "loss": 2.9734, "step": 39470 }, { "epoch": 1.93, "grad_norm": 0.699953556060791, "learning_rate": 0.00016819352408399869, "loss": 2.9848, "step": 39471 }, { "epoch": 1.93, "grad_norm": 0.6306780576705933, "learning_rate": 0.00016817969365094074, "loss": 3.1997, "step": 39472 }, { "epoch": 1.93, "grad_norm": 0.6510525345802307, "learning_rate": 0.0001681658635650722, "loss": 2.9396, "step": 39473 }, { "epoch": 1.93, "grad_norm": 0.6423694491386414, "learning_rate": 0.0001681520338264294, "loss": 3.058, "step": 39474 }, { "epoch": 1.93, "grad_norm": 0.662390410900116, "learning_rate": 0.00016813820443504866, "loss": 3.0601, "step": 39475 }, { "epoch": 1.93, "grad_norm": 0.653954267501831, "learning_rate": 0.0001681243753909666, "loss": 3.0233, "step": 39476 }, { "epoch": 1.93, "grad_norm": 0.6505160331726074, "learning_rate": 0.00016811054669421948, "loss": 3.0404, "step": 39477 }, { "epoch": 1.93, "grad_norm": 0.6412315368652344, "learning_rate": 0.00016809671834484377, "loss": 2.9671, "step": 39478 }, { "epoch": 1.93, "grad_norm": 0.646365225315094, "learning_rate": 0.00016808289034287604, "loss": 2.9477, "step": 39479 }, { "epoch": 1.93, "grad_norm": 0.6499340534210205, "learning_rate": 0.00016806906268835263, "loss": 3.2125, "step": 39480 }, { "epoch": 1.93, "grad_norm": 0.6421015858650208, "learning_rate": 0.00016805523538130986, "loss": 3.1327, "step": 39481 }, { "epoch": 1.93, "grad_norm": 0.6436095833778381, "learning_rate": 0.00016804140842178412, "loss": 2.9494, "step": 39482 }, { "epoch": 1.93, "grad_norm": 0.658585786819458, "learning_rate": 0.0001680275818098119, "loss": 3.0283, "step": 39483 }, { "epoch": 1.94, "grad_norm": 0.6562331318855286, "learning_rate": 0.00016801375554542977, "loss": 3.0036, "step": 39484 }, { "epoch": 1.94, "grad_norm": 0.7266846299171448, "learning_rate": 0.00016799992962867385, "loss": 2.9681, "step": 39485 }, { "epoch": 1.94, "grad_norm": 0.6242222785949707, "learning_rate": 0.00016798610405958087, "loss": 2.9437, "step": 39486 }, { "epoch": 1.94, "grad_norm": 0.6533987522125244, "learning_rate": 0.00016797227883818706, "loss": 2.7359, "step": 39487 }, { "epoch": 1.94, "grad_norm": 0.6742711067199707, "learning_rate": 0.00016795845396452876, "loss": 2.8229, "step": 39488 }, { "epoch": 1.94, "grad_norm": 0.6334719657897949, "learning_rate": 0.00016794462943864257, "loss": 2.9316, "step": 39489 }, { "epoch": 1.94, "grad_norm": 0.651542067527771, "learning_rate": 0.00016793080526056472, "loss": 3.0136, "step": 39490 }, { "epoch": 1.94, "grad_norm": 0.6780474185943604, "learning_rate": 0.00016791698143033183, "loss": 2.9013, "step": 39491 }, { "epoch": 1.94, "grad_norm": 0.674872100353241, "learning_rate": 0.00016790315794798006, "loss": 2.8134, "step": 39492 }, { "epoch": 1.94, "grad_norm": 0.6489875912666321, "learning_rate": 0.00016788933481354595, "loss": 2.7779, "step": 39493 }, { "epoch": 1.94, "grad_norm": 0.6219632625579834, "learning_rate": 0.00016787551202706604, "loss": 2.8627, "step": 39494 }, { "epoch": 1.94, "grad_norm": 0.640094518661499, "learning_rate": 0.0001678616895885766, "loss": 2.8384, "step": 39495 }, { "epoch": 1.94, "grad_norm": 0.7231557965278625, "learning_rate": 0.000167847867498114, "loss": 2.9999, "step": 39496 }, { "epoch": 1.94, "grad_norm": 0.6574937701225281, "learning_rate": 0.0001678340457557146, "loss": 2.9708, "step": 39497 }, { "epoch": 1.94, "grad_norm": 0.6397494077682495, "learning_rate": 0.000167820224361415, "loss": 2.8948, "step": 39498 }, { "epoch": 1.94, "grad_norm": 0.6591821908950806, "learning_rate": 0.0001678064033152514, "loss": 3.1144, "step": 39499 }, { "epoch": 1.94, "grad_norm": 0.684015691280365, "learning_rate": 0.00016779258261726029, "loss": 3.0855, "step": 39500 }, { "epoch": 1.94, "grad_norm": 0.6491175293922424, "learning_rate": 0.00016777876226747816, "loss": 2.8436, "step": 39501 }, { "epoch": 1.94, "grad_norm": 0.6679362058639526, "learning_rate": 0.0001677649422659413, "loss": 2.9078, "step": 39502 }, { "epoch": 1.94, "grad_norm": 0.6936571598052979, "learning_rate": 0.00016775112261268617, "loss": 2.9691, "step": 39503 }, { "epoch": 1.94, "grad_norm": 0.673439621925354, "learning_rate": 0.000167737303307749, "loss": 3.0569, "step": 39504 }, { "epoch": 1.94, "grad_norm": 0.6793358325958252, "learning_rate": 0.00016772348435116637, "loss": 3.1337, "step": 39505 }, { "epoch": 1.94, "grad_norm": 0.6524600982666016, "learning_rate": 0.00016770966574297465, "loss": 2.9261, "step": 39506 }, { "epoch": 1.94, "grad_norm": 0.6856192946434021, "learning_rate": 0.00016769584748321014, "loss": 3.0568, "step": 39507 }, { "epoch": 1.94, "grad_norm": 0.6691726446151733, "learning_rate": 0.00016768202957190943, "loss": 2.7727, "step": 39508 }, { "epoch": 1.94, "grad_norm": 0.7057440876960754, "learning_rate": 0.00016766821200910883, "loss": 2.9153, "step": 39509 }, { "epoch": 1.94, "grad_norm": 0.6541669368743896, "learning_rate": 0.00016765439479484448, "loss": 3.16, "step": 39510 }, { "epoch": 1.94, "grad_norm": 0.7234159708023071, "learning_rate": 0.00016764057792915317, "loss": 3.1711, "step": 39511 }, { "epoch": 1.94, "grad_norm": 0.6421957015991211, "learning_rate": 0.00016762676141207096, "loss": 2.8255, "step": 39512 }, { "epoch": 1.94, "grad_norm": 0.6890547871589661, "learning_rate": 0.0001676129452436345, "loss": 2.9754, "step": 39513 }, { "epoch": 1.94, "grad_norm": 0.6445096731185913, "learning_rate": 0.00016759912942388, "loss": 2.8582, "step": 39514 }, { "epoch": 1.94, "grad_norm": 0.6753875017166138, "learning_rate": 0.00016758531395284383, "loss": 3.0514, "step": 39515 }, { "epoch": 1.94, "grad_norm": 0.6762891411781311, "learning_rate": 0.00016757149883056264, "loss": 2.9444, "step": 39516 }, { "epoch": 1.94, "grad_norm": 0.6587990522384644, "learning_rate": 0.0001675576840570726, "loss": 2.8884, "step": 39517 }, { "epoch": 1.94, "grad_norm": 0.6049191951751709, "learning_rate": 0.00016754386963241014, "loss": 2.928, "step": 39518 }, { "epoch": 1.94, "grad_norm": 0.6751249432563782, "learning_rate": 0.00016753005555661152, "loss": 3.0188, "step": 39519 }, { "epoch": 1.94, "grad_norm": 0.6588065028190613, "learning_rate": 0.00016751624182971325, "loss": 3.018, "step": 39520 }, { "epoch": 1.94, "grad_norm": 0.6722527742385864, "learning_rate": 0.00016750242845175182, "loss": 2.9835, "step": 39521 }, { "epoch": 1.94, "grad_norm": 0.7164931893348694, "learning_rate": 0.00016748861542276332, "loss": 2.8443, "step": 39522 }, { "epoch": 1.94, "grad_norm": 0.6754207015037537, "learning_rate": 0.0001674748027427845, "loss": 3.1071, "step": 39523 }, { "epoch": 1.94, "grad_norm": 0.6811302304267883, "learning_rate": 0.00016746099041185152, "loss": 2.8322, "step": 39524 }, { "epoch": 1.94, "grad_norm": 0.6523797512054443, "learning_rate": 0.00016744717843000066, "loss": 3.0216, "step": 39525 }, { "epoch": 1.94, "grad_norm": 0.6112352013587952, "learning_rate": 0.0001674333667972685, "loss": 2.9525, "step": 39526 }, { "epoch": 1.94, "grad_norm": 0.6537315249443054, "learning_rate": 0.0001674195555136913, "loss": 3.0354, "step": 39527 }, { "epoch": 1.94, "grad_norm": 0.6480567455291748, "learning_rate": 0.00016740574457930552, "loss": 3.0474, "step": 39528 }, { "epoch": 1.94, "grad_norm": 0.6240382194519043, "learning_rate": 0.00016739193399414743, "loss": 2.7532, "step": 39529 }, { "epoch": 1.94, "grad_norm": 0.6805428266525269, "learning_rate": 0.00016737812375825357, "loss": 2.878, "step": 39530 }, { "epoch": 1.94, "grad_norm": 0.6385943293571472, "learning_rate": 0.00016736431387166017, "loss": 3.1757, "step": 39531 }, { "epoch": 1.94, "grad_norm": 0.6469088196754456, "learning_rate": 0.00016735050433440353, "loss": 2.8245, "step": 39532 }, { "epoch": 1.94, "grad_norm": 0.6487658619880676, "learning_rate": 0.0001673366951465203, "loss": 2.864, "step": 39533 }, { "epoch": 1.94, "grad_norm": 0.6648675799369812, "learning_rate": 0.0001673228863080465, "loss": 3.1449, "step": 39534 }, { "epoch": 1.94, "grad_norm": 0.6737173199653625, "learning_rate": 0.00016730907781901873, "loss": 2.8128, "step": 39535 }, { "epoch": 1.94, "grad_norm": 0.6540577411651611, "learning_rate": 0.00016729526967947344, "loss": 3.0712, "step": 39536 }, { "epoch": 1.94, "grad_norm": 0.6505504250526428, "learning_rate": 0.00016728146188944672, "loss": 3.0605, "step": 39537 }, { "epoch": 1.94, "grad_norm": 0.6246081590652466, "learning_rate": 0.0001672676544489752, "loss": 2.902, "step": 39538 }, { "epoch": 1.94, "grad_norm": 0.6178304553031921, "learning_rate": 0.00016725384735809517, "loss": 2.738, "step": 39539 }, { "epoch": 1.94, "grad_norm": 0.711046040058136, "learning_rate": 0.00016724004061684282, "loss": 3.0829, "step": 39540 }, { "epoch": 1.94, "grad_norm": 0.6916102766990662, "learning_rate": 0.00016722623422525478, "loss": 3.0951, "step": 39541 }, { "epoch": 1.94, "grad_norm": 0.6540066599845886, "learning_rate": 0.00016721242818336712, "loss": 3.0883, "step": 39542 }, { "epoch": 1.94, "grad_norm": 0.6461518406867981, "learning_rate": 0.00016719862249121655, "loss": 3.0277, "step": 39543 }, { "epoch": 1.94, "grad_norm": 0.6972729563713074, "learning_rate": 0.00016718481714883912, "loss": 3.252, "step": 39544 }, { "epoch": 1.94, "grad_norm": 0.6396310329437256, "learning_rate": 0.00016717101215627143, "loss": 2.5932, "step": 39545 }, { "epoch": 1.94, "grad_norm": 0.7706670165061951, "learning_rate": 0.0001671572075135497, "loss": 3.0133, "step": 39546 }, { "epoch": 1.94, "grad_norm": 0.6633245944976807, "learning_rate": 0.00016714340322071023, "loss": 3.0178, "step": 39547 }, { "epoch": 1.94, "grad_norm": 0.7238604426383972, "learning_rate": 0.00016712959927778957, "loss": 2.921, "step": 39548 }, { "epoch": 1.94, "grad_norm": 0.6844163537025452, "learning_rate": 0.0001671157956848239, "loss": 3.0155, "step": 39549 }, { "epoch": 1.94, "grad_norm": 0.6485677361488342, "learning_rate": 0.0001671019924418496, "loss": 3.1059, "step": 39550 }, { "epoch": 1.94, "grad_norm": 0.6759480237960815, "learning_rate": 0.0001670881895489032, "loss": 3.2116, "step": 39551 }, { "epoch": 1.94, "grad_norm": 0.641191303730011, "learning_rate": 0.00016707438700602084, "loss": 2.8688, "step": 39552 }, { "epoch": 1.94, "grad_norm": 0.6632412672042847, "learning_rate": 0.00016706058481323908, "loss": 3.114, "step": 39553 }, { "epoch": 1.94, "grad_norm": 0.6805461049079895, "learning_rate": 0.0001670467829705941, "loss": 2.9444, "step": 39554 }, { "epoch": 1.94, "grad_norm": 0.6704729199409485, "learning_rate": 0.00016703298147812223, "loss": 2.851, "step": 39555 }, { "epoch": 1.94, "grad_norm": 0.625964879989624, "learning_rate": 0.00016701918033586, "loss": 3.2429, "step": 39556 }, { "epoch": 1.94, "grad_norm": 0.6714993119239807, "learning_rate": 0.00016700537954384356, "loss": 2.9443, "step": 39557 }, { "epoch": 1.94, "grad_norm": 0.6405186057090759, "learning_rate": 0.0001669915791021095, "loss": 3.0493, "step": 39558 }, { "epoch": 1.94, "grad_norm": 0.6585462093353271, "learning_rate": 0.00016697777901069385, "loss": 3.0023, "step": 39559 }, { "epoch": 1.94, "grad_norm": 0.7218492031097412, "learning_rate": 0.00016696397926963325, "loss": 3.137, "step": 39560 }, { "epoch": 1.94, "grad_norm": 0.6385161280632019, "learning_rate": 0.00016695017987896398, "loss": 3.008, "step": 39561 }, { "epoch": 1.94, "grad_norm": 0.6418231725692749, "learning_rate": 0.00016693638083872217, "loss": 3.023, "step": 39562 }, { "epoch": 1.94, "grad_norm": 0.6520888805389404, "learning_rate": 0.00016692258214894446, "loss": 3.0538, "step": 39563 }, { "epoch": 1.94, "grad_norm": 0.6636407375335693, "learning_rate": 0.0001669087838096669, "loss": 2.8663, "step": 39564 }, { "epoch": 1.94, "grad_norm": 0.6697338223457336, "learning_rate": 0.000166894985820926, "loss": 2.9433, "step": 39565 }, { "epoch": 1.94, "grad_norm": 0.6582183241844177, "learning_rate": 0.00016688118818275824, "loss": 3.0544, "step": 39566 }, { "epoch": 1.94, "grad_norm": 0.6985874176025391, "learning_rate": 0.0001668673908951998, "loss": 3.1551, "step": 39567 }, { "epoch": 1.94, "grad_norm": 0.6799558997154236, "learning_rate": 0.00016685359395828702, "loss": 2.9644, "step": 39568 }, { "epoch": 1.94, "grad_norm": 0.6577656865119934, "learning_rate": 0.00016683979737205614, "loss": 2.9913, "step": 39569 }, { "epoch": 1.94, "grad_norm": 0.6054019331932068, "learning_rate": 0.0001668260011365436, "loss": 2.8949, "step": 39570 }, { "epoch": 1.94, "grad_norm": 0.7262248992919922, "learning_rate": 0.00016681220525178585, "loss": 3.1803, "step": 39571 }, { "epoch": 1.94, "grad_norm": 0.6668470501899719, "learning_rate": 0.00016679840971781904, "loss": 3.1008, "step": 39572 }, { "epoch": 1.94, "grad_norm": 0.6440438032150269, "learning_rate": 0.00016678461453467963, "loss": 2.9656, "step": 39573 }, { "epoch": 1.94, "grad_norm": 0.6357288956642151, "learning_rate": 0.00016677081970240386, "loss": 2.9421, "step": 39574 }, { "epoch": 1.94, "grad_norm": 0.6600980162620544, "learning_rate": 0.00016675702522102822, "loss": 2.661, "step": 39575 }, { "epoch": 1.94, "grad_norm": 0.6919565796852112, "learning_rate": 0.0001667432310905889, "loss": 2.8373, "step": 39576 }, { "epoch": 1.94, "grad_norm": 0.6861438751220703, "learning_rate": 0.0001667294373111222, "loss": 2.9764, "step": 39577 }, { "epoch": 1.94, "grad_norm": 0.7029724717140198, "learning_rate": 0.00016671564388266456, "loss": 2.9436, "step": 39578 }, { "epoch": 1.94, "grad_norm": 0.6634342670440674, "learning_rate": 0.0001667018508052522, "loss": 2.8775, "step": 39579 }, { "epoch": 1.94, "grad_norm": 0.6349776387214661, "learning_rate": 0.0001666880580789216, "loss": 2.9675, "step": 39580 }, { "epoch": 1.94, "grad_norm": 0.6573511362075806, "learning_rate": 0.00016667426570370888, "loss": 2.9222, "step": 39581 }, { "epoch": 1.94, "grad_norm": 0.6723518967628479, "learning_rate": 0.00016666047367965058, "loss": 2.9447, "step": 39582 }, { "epoch": 1.94, "grad_norm": 0.6813273429870605, "learning_rate": 0.00016664668200678297, "loss": 3.0502, "step": 39583 }, { "epoch": 1.94, "grad_norm": 0.6408233046531677, "learning_rate": 0.0001666328906851422, "loss": 2.913, "step": 39584 }, { "epoch": 1.94, "grad_norm": 0.6882518529891968, "learning_rate": 0.00016661909971476489, "loss": 2.9673, "step": 39585 }, { "epoch": 1.94, "grad_norm": 0.7611137628555298, "learning_rate": 0.00016660530909568704, "loss": 3.0111, "step": 39586 }, { "epoch": 1.94, "grad_norm": 0.6440632343292236, "learning_rate": 0.00016659151882794516, "loss": 3.0178, "step": 39587 }, { "epoch": 1.94, "grad_norm": 0.6395631432533264, "learning_rate": 0.00016657772891157563, "loss": 2.963, "step": 39588 }, { "epoch": 1.94, "grad_norm": 0.6646182537078857, "learning_rate": 0.0001665639393466147, "loss": 2.8769, "step": 39589 }, { "epoch": 1.94, "grad_norm": 0.6622651815414429, "learning_rate": 0.00016655015013309867, "loss": 3.1089, "step": 39590 }, { "epoch": 1.94, "grad_norm": 0.6579053997993469, "learning_rate": 0.00016653636127106374, "loss": 2.8252, "step": 39591 }, { "epoch": 1.94, "grad_norm": 0.635835587978363, "learning_rate": 0.00016652257276054638, "loss": 3.1497, "step": 39592 }, { "epoch": 1.94, "grad_norm": 0.645119309425354, "learning_rate": 0.00016650878460158295, "loss": 3.0745, "step": 39593 }, { "epoch": 1.94, "grad_norm": 0.707358717918396, "learning_rate": 0.0001664949967942096, "loss": 3.0034, "step": 39594 }, { "epoch": 1.94, "grad_norm": 0.6341613531112671, "learning_rate": 0.00016648120933846289, "loss": 2.9241, "step": 39595 }, { "epoch": 1.94, "grad_norm": 0.6709751486778259, "learning_rate": 0.00016646742223437879, "loss": 3.0487, "step": 39596 }, { "epoch": 1.94, "grad_norm": 0.6443012356758118, "learning_rate": 0.00016645363548199396, "loss": 2.947, "step": 39597 }, { "epoch": 1.94, "grad_norm": 0.6468132734298706, "learning_rate": 0.0001664398490813445, "loss": 3.1464, "step": 39598 }, { "epoch": 1.94, "grad_norm": 0.6579627394676208, "learning_rate": 0.0001664260630324667, "loss": 2.9348, "step": 39599 }, { "epoch": 1.94, "grad_norm": 0.6067564487457275, "learning_rate": 0.00016641227733539703, "loss": 2.9836, "step": 39600 }, { "epoch": 1.94, "grad_norm": 0.6814645528793335, "learning_rate": 0.00016639849199017164, "loss": 2.9497, "step": 39601 }, { "epoch": 1.94, "grad_norm": 0.7532250285148621, "learning_rate": 0.00016638470699682688, "loss": 2.8528, "step": 39602 }, { "epoch": 1.94, "grad_norm": 0.6621189713478088, "learning_rate": 0.00016637092235539925, "loss": 3.103, "step": 39603 }, { "epoch": 1.94, "grad_norm": 0.6254897713661194, "learning_rate": 0.00016635713806592483, "loss": 2.6625, "step": 39604 }, { "epoch": 1.94, "grad_norm": 0.6650421023368835, "learning_rate": 0.00016634335412844, "loss": 3.0827, "step": 39605 }, { "epoch": 1.94, "grad_norm": 0.6702773571014404, "learning_rate": 0.00016632957054298097, "loss": 3.0373, "step": 39606 }, { "epoch": 1.94, "grad_norm": 0.629437267780304, "learning_rate": 0.0001663157873095841, "loss": 2.8015, "step": 39607 }, { "epoch": 1.94, "grad_norm": 0.6859908103942871, "learning_rate": 0.00016630200442828582, "loss": 2.8849, "step": 39608 }, { "epoch": 1.94, "grad_norm": 0.6508818864822388, "learning_rate": 0.00016628822189912226, "loss": 2.7454, "step": 39609 }, { "epoch": 1.94, "grad_norm": 0.6694782972335815, "learning_rate": 0.00016627443972212986, "loss": 2.9539, "step": 39610 }, { "epoch": 1.94, "grad_norm": 0.6411961317062378, "learning_rate": 0.00016626065789734486, "loss": 2.9037, "step": 39611 }, { "epoch": 1.94, "grad_norm": 0.6573426127433777, "learning_rate": 0.00016624687642480344, "loss": 2.8916, "step": 39612 }, { "epoch": 1.94, "grad_norm": 0.7193799614906311, "learning_rate": 0.00016623309530454215, "loss": 2.843, "step": 39613 }, { "epoch": 1.94, "grad_norm": 0.6922988891601562, "learning_rate": 0.00016621931453659697, "loss": 3.0807, "step": 39614 }, { "epoch": 1.94, "grad_norm": 0.6599312424659729, "learning_rate": 0.00016620553412100453, "loss": 2.8812, "step": 39615 }, { "epoch": 1.94, "grad_norm": 0.6752668619155884, "learning_rate": 0.00016619175405780081, "loss": 2.9444, "step": 39616 }, { "epoch": 1.94, "grad_norm": 0.6617905497550964, "learning_rate": 0.00016617797434702233, "loss": 3.1259, "step": 39617 }, { "epoch": 1.94, "grad_norm": 0.6991008520126343, "learning_rate": 0.00016616419498870533, "loss": 3.0066, "step": 39618 }, { "epoch": 1.94, "grad_norm": 0.6415068507194519, "learning_rate": 0.00016615041598288613, "loss": 3.1282, "step": 39619 }, { "epoch": 1.94, "grad_norm": 0.6914513111114502, "learning_rate": 0.000166136637329601, "loss": 3.2745, "step": 39620 }, { "epoch": 1.94, "grad_norm": 0.6138527989387512, "learning_rate": 0.00016612285902888605, "loss": 2.8569, "step": 39621 }, { "epoch": 1.94, "grad_norm": 0.6552730798721313, "learning_rate": 0.00016610908108077776, "loss": 3.088, "step": 39622 }, { "epoch": 1.94, "grad_norm": 0.6678380370140076, "learning_rate": 0.00016609530348531246, "loss": 3.0937, "step": 39623 }, { "epoch": 1.94, "grad_norm": 0.7249351143836975, "learning_rate": 0.00016608152624252628, "loss": 3.1583, "step": 39624 }, { "epoch": 1.94, "grad_norm": 0.6721065044403076, "learning_rate": 0.00016606774935245568, "loss": 3.0651, "step": 39625 }, { "epoch": 1.94, "grad_norm": 0.6632776260375977, "learning_rate": 0.00016605397281513684, "loss": 2.9947, "step": 39626 }, { "epoch": 1.94, "grad_norm": 0.6905990839004517, "learning_rate": 0.00016604019663060595, "loss": 2.9802, "step": 39627 }, { "epoch": 1.94, "grad_norm": 0.7240660190582275, "learning_rate": 0.00016602642079889955, "loss": 2.9532, "step": 39628 }, { "epoch": 1.94, "grad_norm": 0.653408944606781, "learning_rate": 0.00016601264532005363, "loss": 3.0149, "step": 39629 }, { "epoch": 1.94, "grad_norm": 0.665780246257782, "learning_rate": 0.00016599887019410473, "loss": 2.9089, "step": 39630 }, { "epoch": 1.94, "grad_norm": 0.6445152163505554, "learning_rate": 0.0001659850954210889, "loss": 2.9931, "step": 39631 }, { "epoch": 1.94, "grad_norm": 0.6995826363563538, "learning_rate": 0.00016597132100104264, "loss": 3.13, "step": 39632 }, { "epoch": 1.94, "grad_norm": 0.6662886738777161, "learning_rate": 0.00016595754693400215, "loss": 3.0491, "step": 39633 }, { "epoch": 1.94, "grad_norm": 0.6708129048347473, "learning_rate": 0.00016594377322000356, "loss": 2.9691, "step": 39634 }, { "epoch": 1.94, "grad_norm": 0.6616761088371277, "learning_rate": 0.00016592999985908343, "loss": 3.0653, "step": 39635 }, { "epoch": 1.94, "grad_norm": 0.6500592827796936, "learning_rate": 0.0001659162268512777, "loss": 3.0368, "step": 39636 }, { "epoch": 1.94, "grad_norm": 0.6823159456253052, "learning_rate": 0.00016590245419662283, "loss": 2.9157, "step": 39637 }, { "epoch": 1.94, "grad_norm": 0.6613301634788513, "learning_rate": 0.00016588868189515522, "loss": 2.8774, "step": 39638 }, { "epoch": 1.94, "grad_norm": 0.647834062576294, "learning_rate": 0.00016587490994691092, "loss": 2.8103, "step": 39639 }, { "epoch": 1.94, "grad_norm": 0.6654465198516846, "learning_rate": 0.00016586113835192635, "loss": 2.9277, "step": 39640 }, { "epoch": 1.94, "grad_norm": 0.6587596535682678, "learning_rate": 0.0001658473671102378, "loss": 2.9219, "step": 39641 }, { "epoch": 1.94, "grad_norm": 0.6656188368797302, "learning_rate": 0.00016583359622188132, "loss": 3.0531, "step": 39642 }, { "epoch": 1.94, "grad_norm": 0.6441434025764465, "learning_rate": 0.00016581982568689342, "loss": 2.9493, "step": 39643 }, { "epoch": 1.94, "grad_norm": 0.6490876078605652, "learning_rate": 0.00016580605550531018, "loss": 2.9725, "step": 39644 }, { "epoch": 1.94, "grad_norm": 0.7419745326042175, "learning_rate": 0.00016579228567716807, "loss": 2.88, "step": 39645 }, { "epoch": 1.94, "grad_norm": 0.6507675051689148, "learning_rate": 0.00016577851620250313, "loss": 2.8218, "step": 39646 }, { "epoch": 1.94, "grad_norm": 0.6721502542495728, "learning_rate": 0.00016576474708135188, "loss": 3.1349, "step": 39647 }, { "epoch": 1.94, "grad_norm": 0.6784391403198242, "learning_rate": 0.00016575097831375045, "loss": 2.9116, "step": 39648 }, { "epoch": 1.94, "grad_norm": 0.6822816133499146, "learning_rate": 0.000165737209899735, "loss": 2.8589, "step": 39649 }, { "epoch": 1.94, "grad_norm": 0.6557987332344055, "learning_rate": 0.000165723441839342, "loss": 2.8979, "step": 39650 }, { "epoch": 1.94, "grad_norm": 0.6441841125488281, "learning_rate": 0.00016570967413260748, "loss": 2.9015, "step": 39651 }, { "epoch": 1.94, "grad_norm": 0.7314832210540771, "learning_rate": 0.00016569590677956785, "loss": 3.1157, "step": 39652 }, { "epoch": 1.94, "grad_norm": 0.6173034310340881, "learning_rate": 0.00016568213978025947, "loss": 3.1966, "step": 39653 }, { "epoch": 1.94, "grad_norm": 0.6204674243927002, "learning_rate": 0.00016566837313471838, "loss": 3.0183, "step": 39654 }, { "epoch": 1.94, "grad_norm": 0.6612196564674377, "learning_rate": 0.00016565460684298116, "loss": 2.8712, "step": 39655 }, { "epoch": 1.94, "grad_norm": 0.6848688721656799, "learning_rate": 0.00016564084090508362, "loss": 3.0089, "step": 39656 }, { "epoch": 1.94, "grad_norm": 0.6412416696548462, "learning_rate": 0.00016562707532106235, "loss": 2.9572, "step": 39657 }, { "epoch": 1.94, "grad_norm": 0.667161226272583, "learning_rate": 0.0001656133100909534, "loss": 2.8977, "step": 39658 }, { "epoch": 1.94, "grad_norm": 0.639665961265564, "learning_rate": 0.00016559954521479315, "loss": 3.0027, "step": 39659 }, { "epoch": 1.94, "grad_norm": 0.6585216522216797, "learning_rate": 0.00016558578069261791, "loss": 3.0025, "step": 39660 }, { "epoch": 1.94, "grad_norm": 0.637984573841095, "learning_rate": 0.00016557201652446374, "loss": 2.8542, "step": 39661 }, { "epoch": 1.94, "grad_norm": 0.6432386040687561, "learning_rate": 0.00016555825271036715, "loss": 2.9786, "step": 39662 }, { "epoch": 1.94, "grad_norm": 0.6286998391151428, "learning_rate": 0.00016554448925036426, "loss": 3.0735, "step": 39663 }, { "epoch": 1.94, "grad_norm": 0.6470546722412109, "learning_rate": 0.00016553072614449118, "loss": 2.8628, "step": 39664 }, { "epoch": 1.94, "grad_norm": 0.7382664084434509, "learning_rate": 0.0001655169633927844, "loss": 3.0028, "step": 39665 }, { "epoch": 1.94, "grad_norm": 0.6897470355033875, "learning_rate": 0.0001655032009952799, "loss": 2.8576, "step": 39666 }, { "epoch": 1.94, "grad_norm": 0.6223475933074951, "learning_rate": 0.00016548943895201428, "loss": 2.8861, "step": 39667 }, { "epoch": 1.94, "grad_norm": 0.721520721912384, "learning_rate": 0.0001654756772630234, "loss": 3.0925, "step": 39668 }, { "epoch": 1.94, "grad_norm": 0.6690992116928101, "learning_rate": 0.00016546191592834385, "loss": 2.8354, "step": 39669 }, { "epoch": 1.94, "grad_norm": 0.6852384209632874, "learning_rate": 0.00016544815494801173, "loss": 3.0092, "step": 39670 }, { "epoch": 1.94, "grad_norm": 0.6654887795448303, "learning_rate": 0.00016543439432206312, "loss": 3.1952, "step": 39671 }, { "epoch": 1.94, "grad_norm": 0.6750345826148987, "learning_rate": 0.00016542063405053457, "loss": 3.0411, "step": 39672 }, { "epoch": 1.94, "grad_norm": 0.695560872554779, "learning_rate": 0.00016540687413346203, "loss": 3.0145, "step": 39673 }, { "epoch": 1.94, "grad_norm": 0.7053586840629578, "learning_rate": 0.0001653931145708819, "loss": 3.1015, "step": 39674 }, { "epoch": 1.94, "grad_norm": 0.6435714960098267, "learning_rate": 0.0001653793553628305, "loss": 3.0385, "step": 39675 }, { "epoch": 1.94, "grad_norm": 0.6890304684638977, "learning_rate": 0.00016536559650934385, "loss": 2.8383, "step": 39676 }, { "epoch": 1.94, "grad_norm": 0.7017620205879211, "learning_rate": 0.00016535183801045847, "loss": 2.8856, "step": 39677 }, { "epoch": 1.94, "grad_norm": 0.638668417930603, "learning_rate": 0.0001653380798662104, "loss": 2.9055, "step": 39678 }, { "epoch": 1.94, "grad_norm": 0.7142661809921265, "learning_rate": 0.0001653243220766358, "loss": 2.8482, "step": 39679 }, { "epoch": 1.94, "grad_norm": 0.6465545892715454, "learning_rate": 0.0001653105646417711, "loss": 2.9118, "step": 39680 }, { "epoch": 1.94, "grad_norm": 0.640198826789856, "learning_rate": 0.00016529680756165237, "loss": 3.0155, "step": 39681 }, { "epoch": 1.94, "grad_norm": 0.6553422212600708, "learning_rate": 0.00016528305083631604, "loss": 3.0283, "step": 39682 }, { "epoch": 1.94, "grad_norm": 0.842480480670929, "learning_rate": 0.0001652692944657981, "loss": 2.7033, "step": 39683 }, { "epoch": 1.94, "grad_norm": 0.6969196200370789, "learning_rate": 0.000165255538450135, "loss": 3.1925, "step": 39684 }, { "epoch": 1.94, "grad_norm": 0.6522579193115234, "learning_rate": 0.00016524178278936295, "loss": 2.8948, "step": 39685 }, { "epoch": 1.94, "grad_norm": 0.659083902835846, "learning_rate": 0.00016522802748351797, "loss": 2.8768, "step": 39686 }, { "epoch": 1.94, "grad_norm": 0.6438480615615845, "learning_rate": 0.00016521427253263652, "loss": 2.9011, "step": 39687 }, { "epoch": 1.95, "grad_norm": 0.6511509418487549, "learning_rate": 0.0001652005179367546, "loss": 2.9839, "step": 39688 }, { "epoch": 1.95, "grad_norm": 0.6866702437400818, "learning_rate": 0.0001651867636959086, "loss": 2.9139, "step": 39689 }, { "epoch": 1.95, "grad_norm": 0.6492428183555603, "learning_rate": 0.00016517300981013484, "loss": 3.0238, "step": 39690 }, { "epoch": 1.95, "grad_norm": 0.629295825958252, "learning_rate": 0.00016515925627946942, "loss": 3.1701, "step": 39691 }, { "epoch": 1.95, "grad_norm": 0.7377083897590637, "learning_rate": 0.0001651455031039486, "loss": 2.8515, "step": 39692 }, { "epoch": 1.95, "grad_norm": 0.6295761466026306, "learning_rate": 0.00016513175028360844, "loss": 2.862, "step": 39693 }, { "epoch": 1.95, "grad_norm": 0.6240883469581604, "learning_rate": 0.00016511799781848528, "loss": 2.7688, "step": 39694 }, { "epoch": 1.95, "grad_norm": 0.7190060615539551, "learning_rate": 0.00016510424570861544, "loss": 3.069, "step": 39695 }, { "epoch": 1.95, "grad_norm": 0.6325370073318481, "learning_rate": 0.00016509049395403496, "loss": 2.8299, "step": 39696 }, { "epoch": 1.95, "grad_norm": 0.6780902743339539, "learning_rate": 0.00016507674255478028, "loss": 2.9697, "step": 39697 }, { "epoch": 1.95, "grad_norm": 0.6456710696220398, "learning_rate": 0.0001650629915108874, "loss": 2.8723, "step": 39698 }, { "epoch": 1.95, "grad_norm": 0.6725703477859497, "learning_rate": 0.00016504924082239272, "loss": 3.1925, "step": 39699 }, { "epoch": 1.95, "grad_norm": 0.6326077580451965, "learning_rate": 0.00016503549048933237, "loss": 2.9816, "step": 39700 }, { "epoch": 1.95, "grad_norm": 0.6344588994979858, "learning_rate": 0.00016502174051174245, "loss": 3.0964, "step": 39701 }, { "epoch": 1.95, "grad_norm": 0.6674083471298218, "learning_rate": 0.00016500799088965942, "loss": 2.851, "step": 39702 }, { "epoch": 1.95, "grad_norm": 0.6438904404640198, "learning_rate": 0.0001649942416231192, "loss": 3.1094, "step": 39703 }, { "epoch": 1.95, "grad_norm": 0.6310093402862549, "learning_rate": 0.00016498049271215822, "loss": 3.0114, "step": 39704 }, { "epoch": 1.95, "grad_norm": 0.6457886099815369, "learning_rate": 0.0001649667441568127, "loss": 3.0389, "step": 39705 }, { "epoch": 1.95, "grad_norm": 0.6540348529815674, "learning_rate": 0.00016495299595711886, "loss": 3.1004, "step": 39706 }, { "epoch": 1.95, "grad_norm": 0.6192497611045837, "learning_rate": 0.00016493924811311278, "loss": 2.7377, "step": 39707 }, { "epoch": 1.95, "grad_norm": 0.6560617685317993, "learning_rate": 0.00016492550062483064, "loss": 2.9241, "step": 39708 }, { "epoch": 1.95, "grad_norm": 0.6480932831764221, "learning_rate": 0.0001649117534923087, "loss": 3.131, "step": 39709 }, { "epoch": 1.95, "grad_norm": 0.6678999662399292, "learning_rate": 0.00016489800671558335, "loss": 3.0523, "step": 39710 }, { "epoch": 1.95, "grad_norm": 0.7164120674133301, "learning_rate": 0.0001648842602946905, "loss": 2.8345, "step": 39711 }, { "epoch": 1.95, "grad_norm": 0.6553846001625061, "learning_rate": 0.00016487051422966667, "loss": 2.9484, "step": 39712 }, { "epoch": 1.95, "grad_norm": 0.6383706331253052, "learning_rate": 0.00016485676852054785, "loss": 2.9157, "step": 39713 }, { "epoch": 1.95, "grad_norm": 0.6358631253242493, "learning_rate": 0.00016484302316737022, "loss": 3.0168, "step": 39714 }, { "epoch": 1.95, "grad_norm": 0.7204211354255676, "learning_rate": 0.00016482927817017018, "loss": 3.0729, "step": 39715 }, { "epoch": 1.95, "grad_norm": 0.7235838770866394, "learning_rate": 0.00016481553352898365, "loss": 3.097, "step": 39716 }, { "epoch": 1.95, "grad_norm": 0.6387816667556763, "learning_rate": 0.0001648017892438471, "loss": 3.0294, "step": 39717 }, { "epoch": 1.95, "grad_norm": 0.71495121717453, "learning_rate": 0.00016478804531479653, "loss": 3.0884, "step": 39718 }, { "epoch": 1.95, "grad_norm": 0.6762590408325195, "learning_rate": 0.00016477430174186822, "loss": 3.1445, "step": 39719 }, { "epoch": 1.95, "grad_norm": 0.6747094392776489, "learning_rate": 0.0001647605585250985, "loss": 2.7726, "step": 39720 }, { "epoch": 1.95, "grad_norm": 0.6876216530799866, "learning_rate": 0.00016474681566452343, "loss": 2.9254, "step": 39721 }, { "epoch": 1.95, "grad_norm": 0.662525475025177, "learning_rate": 0.00016473307316017922, "loss": 3.0107, "step": 39722 }, { "epoch": 1.95, "grad_norm": 0.6243906617164612, "learning_rate": 0.00016471933101210194, "loss": 2.8323, "step": 39723 }, { "epoch": 1.95, "grad_norm": 0.6680139303207397, "learning_rate": 0.00016470558922032793, "loss": 2.8206, "step": 39724 }, { "epoch": 1.95, "grad_norm": 0.6394197940826416, "learning_rate": 0.00016469184778489346, "loss": 3.0246, "step": 39725 }, { "epoch": 1.95, "grad_norm": 0.7584770917892456, "learning_rate": 0.00016467810670583446, "loss": 2.7716, "step": 39726 }, { "epoch": 1.95, "grad_norm": 0.7045025825500488, "learning_rate": 0.0001646643659831875, "loss": 2.8362, "step": 39727 }, { "epoch": 1.95, "grad_norm": 0.676650881767273, "learning_rate": 0.00016465062561698846, "loss": 3.054, "step": 39728 }, { "epoch": 1.95, "grad_norm": 0.686996340751648, "learning_rate": 0.00016463688560727354, "loss": 2.9678, "step": 39729 }, { "epoch": 1.95, "grad_norm": 0.6655346155166626, "learning_rate": 0.00016462314595407914, "loss": 3.0299, "step": 39730 }, { "epoch": 1.95, "grad_norm": 0.6884318590164185, "learning_rate": 0.0001646094066574412, "loss": 2.9813, "step": 39731 }, { "epoch": 1.95, "grad_norm": 0.6549413204193115, "learning_rate": 0.00016459566771739612, "loss": 2.9558, "step": 39732 }, { "epoch": 1.95, "grad_norm": 0.6976496577262878, "learning_rate": 0.0001645819291339799, "loss": 2.9014, "step": 39733 }, { "epoch": 1.95, "grad_norm": 0.6469190716743469, "learning_rate": 0.00016456819090722894, "loss": 2.9572, "step": 39734 }, { "epoch": 1.95, "grad_norm": 0.6241649985313416, "learning_rate": 0.0001645544530371793, "loss": 3.1763, "step": 39735 }, { "epoch": 1.95, "grad_norm": 0.6479203701019287, "learning_rate": 0.000164540715523867, "loss": 2.9041, "step": 39736 }, { "epoch": 1.95, "grad_norm": 0.7182705402374268, "learning_rate": 0.00016452697836732858, "loss": 3.0069, "step": 39737 }, { "epoch": 1.95, "grad_norm": 0.6855570077896118, "learning_rate": 0.00016451324156759986, "loss": 3.1234, "step": 39738 }, { "epoch": 1.95, "grad_norm": 0.7441180944442749, "learning_rate": 0.0001644995051247173, "loss": 3.099, "step": 39739 }, { "epoch": 1.95, "grad_norm": 0.7113296985626221, "learning_rate": 0.00016448576903871687, "loss": 3.0428, "step": 39740 }, { "epoch": 1.95, "grad_norm": 0.6470918655395508, "learning_rate": 0.00016447203330963482, "loss": 2.9745, "step": 39741 }, { "epoch": 1.95, "grad_norm": 0.6484912037849426, "learning_rate": 0.0001644582979375075, "loss": 3.0841, "step": 39742 }, { "epoch": 1.95, "grad_norm": 0.7016591429710388, "learning_rate": 0.00016444456292237093, "loss": 3.1074, "step": 39743 }, { "epoch": 1.95, "grad_norm": 0.7112533450126648, "learning_rate": 0.00016443082826426126, "loss": 2.7582, "step": 39744 }, { "epoch": 1.95, "grad_norm": 0.6694428324699402, "learning_rate": 0.00016441709396321463, "loss": 2.7622, "step": 39745 }, { "epoch": 1.95, "grad_norm": 0.6349228024482727, "learning_rate": 0.00016440336001926722, "loss": 2.8594, "step": 39746 }, { "epoch": 1.95, "grad_norm": 0.6667150855064392, "learning_rate": 0.00016438962643245544, "loss": 3.1511, "step": 39747 }, { "epoch": 1.95, "grad_norm": 0.6422046422958374, "learning_rate": 0.00016437589320281515, "loss": 3.1107, "step": 39748 }, { "epoch": 1.95, "grad_norm": 0.644203782081604, "learning_rate": 0.00016436216033038274, "loss": 2.8401, "step": 39749 }, { "epoch": 1.95, "grad_norm": 0.6971120834350586, "learning_rate": 0.00016434842781519433, "loss": 2.9795, "step": 39750 }, { "epoch": 1.95, "grad_norm": 0.6946253180503845, "learning_rate": 0.00016433469565728593, "loss": 2.9588, "step": 39751 }, { "epoch": 1.95, "grad_norm": 0.6759042143821716, "learning_rate": 0.00016432096385669397, "loss": 3.0513, "step": 39752 }, { "epoch": 1.95, "grad_norm": 0.8178485035896301, "learning_rate": 0.00016430723241345432, "loss": 2.9891, "step": 39753 }, { "epoch": 1.95, "grad_norm": 0.6548961997032166, "learning_rate": 0.00016429350132760346, "loss": 3.034, "step": 39754 }, { "epoch": 1.95, "grad_norm": 0.6240988969802856, "learning_rate": 0.00016427977059917727, "loss": 2.8678, "step": 39755 }, { "epoch": 1.95, "grad_norm": 0.6791014671325684, "learning_rate": 0.00016426604022821205, "loss": 2.9517, "step": 39756 }, { "epoch": 1.95, "grad_norm": 0.6903157234191895, "learning_rate": 0.00016425231021474418, "loss": 3.2275, "step": 39757 }, { "epoch": 1.95, "grad_norm": 0.6479774117469788, "learning_rate": 0.00016423858055880937, "loss": 2.8358, "step": 39758 }, { "epoch": 1.95, "grad_norm": 0.6902576088905334, "learning_rate": 0.0001642248512604441, "loss": 3.2466, "step": 39759 }, { "epoch": 1.95, "grad_norm": 0.6335325837135315, "learning_rate": 0.00016421112231968437, "loss": 2.871, "step": 39760 }, { "epoch": 1.95, "grad_norm": 0.7053777575492859, "learning_rate": 0.00016419739373656642, "loss": 2.9095, "step": 39761 }, { "epoch": 1.95, "grad_norm": 0.6629794239997864, "learning_rate": 0.00016418366551112644, "loss": 3.159, "step": 39762 }, { "epoch": 1.95, "grad_norm": 0.6280484199523926, "learning_rate": 0.0001641699376434005, "loss": 3.0098, "step": 39763 }, { "epoch": 1.95, "grad_norm": 0.6917252540588379, "learning_rate": 0.00016415621013342492, "loss": 2.9122, "step": 39764 }, { "epoch": 1.95, "grad_norm": 0.6806678771972656, "learning_rate": 0.00016414248298123572, "loss": 3.1373, "step": 39765 }, { "epoch": 1.95, "grad_norm": 0.6609867215156555, "learning_rate": 0.00016412875618686894, "loss": 2.8685, "step": 39766 }, { "epoch": 1.95, "grad_norm": 0.6640313267707825, "learning_rate": 0.00016411502975036097, "loss": 2.9706, "step": 39767 }, { "epoch": 1.95, "grad_norm": 0.6441121697425842, "learning_rate": 0.0001641013036717478, "loss": 2.926, "step": 39768 }, { "epoch": 1.95, "grad_norm": 0.6373971104621887, "learning_rate": 0.00016408757795106574, "loss": 3.1379, "step": 39769 }, { "epoch": 1.95, "grad_norm": 0.6475521326065063, "learning_rate": 0.0001640738525883507, "loss": 3.017, "step": 39770 }, { "epoch": 1.95, "grad_norm": 0.6653847694396973, "learning_rate": 0.0001640601275836391, "loss": 2.8866, "step": 39771 }, { "epoch": 1.95, "grad_norm": 0.6433156132698059, "learning_rate": 0.000164046402936967, "loss": 2.9023, "step": 39772 }, { "epoch": 1.95, "grad_norm": 0.6815069913864136, "learning_rate": 0.00016403267864837035, "loss": 3.0265, "step": 39773 }, { "epoch": 1.95, "grad_norm": 0.6718235015869141, "learning_rate": 0.00016401895471788557, "loss": 3.1506, "step": 39774 }, { "epoch": 1.95, "grad_norm": 0.7037975788116455, "learning_rate": 0.00016400523114554858, "loss": 3.1061, "step": 39775 }, { "epoch": 1.95, "grad_norm": 0.6948851346969604, "learning_rate": 0.00016399150793139567, "loss": 2.9012, "step": 39776 }, { "epoch": 1.95, "grad_norm": 0.6527612209320068, "learning_rate": 0.00016397778507546307, "loss": 2.7146, "step": 39777 }, { "epoch": 1.95, "grad_norm": 0.6541974544525146, "learning_rate": 0.00016396406257778663, "loss": 2.8992, "step": 39778 }, { "epoch": 1.95, "grad_norm": 0.6501400470733643, "learning_rate": 0.00016395034043840284, "loss": 3.1708, "step": 39779 }, { "epoch": 1.95, "grad_norm": 0.6790065169334412, "learning_rate": 0.00016393661865734765, "loss": 3.1035, "step": 39780 }, { "epoch": 1.95, "grad_norm": 0.7167571187019348, "learning_rate": 0.0001639228972346571, "loss": 2.9389, "step": 39781 }, { "epoch": 1.95, "grad_norm": 0.6675640344619751, "learning_rate": 0.00016390917617036758, "loss": 2.938, "step": 39782 }, { "epoch": 1.95, "grad_norm": 0.6446592211723328, "learning_rate": 0.00016389545546451498, "loss": 3.2109, "step": 39783 }, { "epoch": 1.95, "grad_norm": 0.637033998966217, "learning_rate": 0.00016388173511713565, "loss": 2.8193, "step": 39784 }, { "epoch": 1.95, "grad_norm": 0.6496434807777405, "learning_rate": 0.00016386801512826554, "loss": 3.1458, "step": 39785 }, { "epoch": 1.95, "grad_norm": 0.6477523446083069, "learning_rate": 0.000163854295497941, "loss": 3.1229, "step": 39786 }, { "epoch": 1.95, "grad_norm": 0.6604776382446289, "learning_rate": 0.00016384057622619803, "loss": 2.9036, "step": 39787 }, { "epoch": 1.95, "grad_norm": 0.6297851800918579, "learning_rate": 0.00016382685731307267, "loss": 2.9685, "step": 39788 }, { "epoch": 1.95, "grad_norm": 0.713854968547821, "learning_rate": 0.0001638131387586013, "loss": 3.1751, "step": 39789 }, { "epoch": 1.95, "grad_norm": 0.6805860996246338, "learning_rate": 0.00016379942056281977, "loss": 2.9272, "step": 39790 }, { "epoch": 1.95, "grad_norm": 0.6449320912361145, "learning_rate": 0.00016378570272576436, "loss": 2.8766, "step": 39791 }, { "epoch": 1.95, "grad_norm": 0.6588577628135681, "learning_rate": 0.00016377198524747128, "loss": 2.8883, "step": 39792 }, { "epoch": 1.95, "grad_norm": 0.6518809795379639, "learning_rate": 0.00016375826812797662, "loss": 2.8129, "step": 39793 }, { "epoch": 1.95, "grad_norm": 0.6627069711685181, "learning_rate": 0.00016374455136731642, "loss": 3.0616, "step": 39794 }, { "epoch": 1.95, "grad_norm": 0.6738892197608948, "learning_rate": 0.0001637308349655267, "loss": 3.2886, "step": 39795 }, { "epoch": 1.95, "grad_norm": 0.6399411559104919, "learning_rate": 0.00016371711892264386, "loss": 3.0394, "step": 39796 }, { "epoch": 1.95, "grad_norm": 0.6393530964851379, "learning_rate": 0.00016370340323870392, "loss": 2.8555, "step": 39797 }, { "epoch": 1.95, "grad_norm": 0.6676591038703918, "learning_rate": 0.0001636896879137429, "loss": 3.174, "step": 39798 }, { "epoch": 1.95, "grad_norm": 0.6630427241325378, "learning_rate": 0.0001636759729477971, "loss": 2.9689, "step": 39799 }, { "epoch": 1.95, "grad_norm": 0.6797800064086914, "learning_rate": 0.00016366225834090246, "loss": 2.9354, "step": 39800 }, { "epoch": 1.95, "grad_norm": 0.6970747709274292, "learning_rate": 0.0001636485440930953, "loss": 3.0008, "step": 39801 }, { "epoch": 1.95, "grad_norm": 0.7164403796195984, "learning_rate": 0.00016363483020441166, "loss": 3.0449, "step": 39802 }, { "epoch": 1.95, "grad_norm": 0.6394767165184021, "learning_rate": 0.00016362111667488747, "loss": 2.8067, "step": 39803 }, { "epoch": 1.95, "grad_norm": 0.6600726842880249, "learning_rate": 0.00016360740350455916, "loss": 3.053, "step": 39804 }, { "epoch": 1.95, "grad_norm": 0.6320900321006775, "learning_rate": 0.00016359369069346257, "loss": 3.0765, "step": 39805 }, { "epoch": 1.95, "grad_norm": 0.7160202264785767, "learning_rate": 0.00016357997824163395, "loss": 2.9636, "step": 39806 }, { "epoch": 1.95, "grad_norm": 0.6760608553886414, "learning_rate": 0.00016356626614910956, "loss": 3.1093, "step": 39807 }, { "epoch": 1.95, "grad_norm": 0.6356624960899353, "learning_rate": 0.0001635525544159253, "loss": 3.0642, "step": 39808 }, { "epoch": 1.95, "grad_norm": 0.6428079009056091, "learning_rate": 0.0001635388430421174, "loss": 3.1223, "step": 39809 }, { "epoch": 1.95, "grad_norm": 0.7246979475021362, "learning_rate": 0.00016352513202772182, "loss": 2.7984, "step": 39810 }, { "epoch": 1.95, "grad_norm": 0.6196163892745972, "learning_rate": 0.00016351142137277483, "loss": 3.017, "step": 39811 }, { "epoch": 1.95, "grad_norm": 0.7229598164558411, "learning_rate": 0.00016349771107731245, "loss": 2.9938, "step": 39812 }, { "epoch": 1.95, "grad_norm": 0.6564931869506836, "learning_rate": 0.00016348400114137078, "loss": 3.0232, "step": 39813 }, { "epoch": 1.95, "grad_norm": 0.7086764574050903, "learning_rate": 0.00016347029156498608, "loss": 2.7794, "step": 39814 }, { "epoch": 1.95, "grad_norm": 0.6638086438179016, "learning_rate": 0.0001634565823481944, "loss": 3.0231, "step": 39815 }, { "epoch": 1.95, "grad_norm": 0.6503745317459106, "learning_rate": 0.00016344287349103178, "loss": 2.9724, "step": 39816 }, { "epoch": 1.95, "grad_norm": 0.6424645781517029, "learning_rate": 0.00016342916499353428, "loss": 3.037, "step": 39817 }, { "epoch": 1.95, "grad_norm": 0.6524776816368103, "learning_rate": 0.00016341545685573798, "loss": 2.8168, "step": 39818 }, { "epoch": 1.95, "grad_norm": 0.6896033883094788, "learning_rate": 0.00016340174907767926, "loss": 2.7819, "step": 39819 }, { "epoch": 1.95, "grad_norm": 0.6743185520172119, "learning_rate": 0.00016338804165939394, "loss": 2.8043, "step": 39820 }, { "epoch": 1.95, "grad_norm": 0.7410269379615784, "learning_rate": 0.0001633743346009183, "loss": 2.8985, "step": 39821 }, { "epoch": 1.95, "grad_norm": 0.6511000990867615, "learning_rate": 0.00016336062790228828, "loss": 2.9387, "step": 39822 }, { "epoch": 1.95, "grad_norm": 0.6686780452728271, "learning_rate": 0.0001633469215635402, "loss": 3.1086, "step": 39823 }, { "epoch": 1.95, "grad_norm": 0.6792110800743103, "learning_rate": 0.00016333321558471, "loss": 2.9515, "step": 39824 }, { "epoch": 1.95, "grad_norm": 0.6629397869110107, "learning_rate": 0.00016331950996583373, "loss": 2.9703, "step": 39825 }, { "epoch": 1.95, "grad_norm": 0.6876334547996521, "learning_rate": 0.00016330580470694762, "loss": 3.1131, "step": 39826 }, { "epoch": 1.95, "grad_norm": 0.6542200446128845, "learning_rate": 0.00016329209980808765, "loss": 3.0926, "step": 39827 }, { "epoch": 1.95, "grad_norm": 0.7039780616760254, "learning_rate": 0.00016327839526928995, "loss": 2.9963, "step": 39828 }, { "epoch": 1.95, "grad_norm": 0.6578562259674072, "learning_rate": 0.00016326469109059077, "loss": 3.1772, "step": 39829 }, { "epoch": 1.95, "grad_norm": 0.6808101534843445, "learning_rate": 0.00016325098727202608, "loss": 3.1833, "step": 39830 }, { "epoch": 1.95, "grad_norm": 0.650033175945282, "learning_rate": 0.00016323728381363196, "loss": 2.9292, "step": 39831 }, { "epoch": 1.95, "grad_norm": 0.7056677341461182, "learning_rate": 0.00016322358071544442, "loss": 2.8928, "step": 39832 }, { "epoch": 1.95, "grad_norm": 0.6719111204147339, "learning_rate": 0.00016320987797749966, "loss": 2.8892, "step": 39833 }, { "epoch": 1.95, "grad_norm": 0.6775871515274048, "learning_rate": 0.00016319617559983386, "loss": 2.929, "step": 39834 }, { "epoch": 1.95, "grad_norm": 0.6379905343055725, "learning_rate": 0.00016318247358248285, "loss": 3.0039, "step": 39835 }, { "epoch": 1.95, "grad_norm": 0.636997640132904, "learning_rate": 0.00016316877192548303, "loss": 3.059, "step": 39836 }, { "epoch": 1.95, "grad_norm": 0.6647437810897827, "learning_rate": 0.00016315507062887033, "loss": 2.9804, "step": 39837 }, { "epoch": 1.95, "grad_norm": 0.6169809103012085, "learning_rate": 0.00016314136969268069, "loss": 3.0341, "step": 39838 }, { "epoch": 1.95, "grad_norm": 0.661453366279602, "learning_rate": 0.00016312766911695046, "loss": 3.0617, "step": 39839 }, { "epoch": 1.95, "grad_norm": 0.6447640657424927, "learning_rate": 0.00016311396890171553, "loss": 3.1025, "step": 39840 }, { "epoch": 1.95, "grad_norm": 0.6600822806358337, "learning_rate": 0.00016310026904701213, "loss": 3.0792, "step": 39841 }, { "epoch": 1.95, "grad_norm": 0.6973288655281067, "learning_rate": 0.00016308656955287617, "loss": 2.8341, "step": 39842 }, { "epoch": 1.95, "grad_norm": 0.656122088432312, "learning_rate": 0.0001630728704193439, "loss": 3.031, "step": 39843 }, { "epoch": 1.95, "grad_norm": 0.6374558210372925, "learning_rate": 0.0001630591716464514, "loss": 3.0751, "step": 39844 }, { "epoch": 1.95, "grad_norm": 0.6601612567901611, "learning_rate": 0.00016304547323423469, "loss": 3.0094, "step": 39845 }, { "epoch": 1.95, "grad_norm": 0.7332333326339722, "learning_rate": 0.00016303177518272983, "loss": 2.8679, "step": 39846 }, { "epoch": 1.95, "grad_norm": 0.6448028683662415, "learning_rate": 0.00016301807749197284, "loss": 3.2066, "step": 39847 }, { "epoch": 1.95, "grad_norm": 0.660399317741394, "learning_rate": 0.00016300438016199982, "loss": 3.1289, "step": 39848 }, { "epoch": 1.95, "grad_norm": 0.5987576246261597, "learning_rate": 0.00016299068319284706, "loss": 3.0243, "step": 39849 }, { "epoch": 1.95, "grad_norm": 0.6372739672660828, "learning_rate": 0.0001629769865845503, "loss": 2.5937, "step": 39850 }, { "epoch": 1.95, "grad_norm": 0.7394205927848816, "learning_rate": 0.00016296329033714597, "loss": 3.0907, "step": 39851 }, { "epoch": 1.95, "grad_norm": 0.6508176922798157, "learning_rate": 0.0001629495944506699, "loss": 3.2754, "step": 39852 }, { "epoch": 1.95, "grad_norm": 0.6771491765975952, "learning_rate": 0.00016293589892515817, "loss": 3.012, "step": 39853 }, { "epoch": 1.95, "grad_norm": 0.6909971833229065, "learning_rate": 0.000162922203760647, "loss": 3.0311, "step": 39854 }, { "epoch": 1.95, "grad_norm": 0.7127150297164917, "learning_rate": 0.00016290850895717222, "loss": 3.0109, "step": 39855 }, { "epoch": 1.95, "grad_norm": 0.6183626055717468, "learning_rate": 0.00016289481451477015, "loss": 2.9736, "step": 39856 }, { "epoch": 1.95, "grad_norm": 0.6582019329071045, "learning_rate": 0.0001628811204334767, "loss": 2.9166, "step": 39857 }, { "epoch": 1.95, "grad_norm": 0.5948113203048706, "learning_rate": 0.00016286742671332803, "loss": 3.0286, "step": 39858 }, { "epoch": 1.95, "grad_norm": 0.6852336525917053, "learning_rate": 0.00016285373335436022, "loss": 2.9953, "step": 39859 }, { "epoch": 1.95, "grad_norm": 0.7192272543907166, "learning_rate": 0.00016284004035660915, "loss": 2.9652, "step": 39860 }, { "epoch": 1.95, "grad_norm": 0.705686092376709, "learning_rate": 0.00016282634772011112, "loss": 2.9451, "step": 39861 }, { "epoch": 1.95, "grad_norm": 0.6376373767852783, "learning_rate": 0.00016281265544490202, "loss": 3.0674, "step": 39862 }, { "epoch": 1.95, "grad_norm": 0.680194079875946, "learning_rate": 0.00016279896353101793, "loss": 3.0108, "step": 39863 }, { "epoch": 1.95, "grad_norm": 0.6736658811569214, "learning_rate": 0.00016278527197849507, "loss": 2.8899, "step": 39864 }, { "epoch": 1.95, "grad_norm": 0.688944935798645, "learning_rate": 0.00016277158078736932, "loss": 2.7236, "step": 39865 }, { "epoch": 1.95, "grad_norm": 0.6319748163223267, "learning_rate": 0.00016275788995767692, "loss": 2.7577, "step": 39866 }, { "epoch": 1.95, "grad_norm": 0.683836817741394, "learning_rate": 0.00016274419948945383, "loss": 2.9514, "step": 39867 }, { "epoch": 1.95, "grad_norm": 0.6992816925048828, "learning_rate": 0.00016273050938273598, "loss": 3.1241, "step": 39868 }, { "epoch": 1.95, "grad_norm": 0.6931037306785583, "learning_rate": 0.00016271681963755968, "loss": 3.0657, "step": 39869 }, { "epoch": 1.95, "grad_norm": 0.7147294282913208, "learning_rate": 0.0001627031302539607, "loss": 2.9349, "step": 39870 }, { "epoch": 1.95, "grad_norm": 0.6773857474327087, "learning_rate": 0.0001626894412319754, "loss": 2.9131, "step": 39871 }, { "epoch": 1.95, "grad_norm": 0.6557199954986572, "learning_rate": 0.0001626757525716396, "loss": 2.7906, "step": 39872 }, { "epoch": 1.95, "grad_norm": 0.6949388980865479, "learning_rate": 0.00016266206427298954, "loss": 3.2067, "step": 39873 }, { "epoch": 1.95, "grad_norm": 0.6508885025978088, "learning_rate": 0.00016264837633606113, "loss": 2.8906, "step": 39874 }, { "epoch": 1.95, "grad_norm": 0.6729174852371216, "learning_rate": 0.00016263468876089038, "loss": 3.0934, "step": 39875 }, { "epoch": 1.95, "grad_norm": 0.6540240049362183, "learning_rate": 0.00016262100154751353, "loss": 2.9799, "step": 39876 }, { "epoch": 1.95, "grad_norm": 0.7319003939628601, "learning_rate": 0.00016260731469596639, "loss": 3.0345, "step": 39877 }, { "epoch": 1.95, "grad_norm": 0.6470528841018677, "learning_rate": 0.00016259362820628513, "loss": 2.9739, "step": 39878 }, { "epoch": 1.95, "grad_norm": 0.6552753448486328, "learning_rate": 0.000162579942078506, "loss": 3.0781, "step": 39879 }, { "epoch": 1.95, "grad_norm": 0.6793324947357178, "learning_rate": 0.00016256625631266467, "loss": 3.0156, "step": 39880 }, { "epoch": 1.95, "grad_norm": 0.636117160320282, "learning_rate": 0.00016255257090879753, "loss": 3.1659, "step": 39881 }, { "epoch": 1.95, "grad_norm": 0.6674087643623352, "learning_rate": 0.00016253888586694045, "loss": 3.0635, "step": 39882 }, { "epoch": 1.95, "grad_norm": 0.7054731249809265, "learning_rate": 0.00016252520118712935, "loss": 3.1278, "step": 39883 }, { "epoch": 1.95, "grad_norm": 0.6451898813247681, "learning_rate": 0.00016251151686940055, "loss": 2.7129, "step": 39884 }, { "epoch": 1.95, "grad_norm": 0.6893364787101746, "learning_rate": 0.00016249783291378986, "loss": 3.0023, "step": 39885 }, { "epoch": 1.95, "grad_norm": 0.6900562047958374, "learning_rate": 0.0001624841493203335, "loss": 3.0193, "step": 39886 }, { "epoch": 1.95, "grad_norm": 0.6447057723999023, "learning_rate": 0.00016247046608906728, "loss": 3.0807, "step": 39887 }, { "epoch": 1.95, "grad_norm": 0.7077726125717163, "learning_rate": 0.00016245678322002755, "loss": 2.8504, "step": 39888 }, { "epoch": 1.95, "grad_norm": 0.6641325354576111, "learning_rate": 0.00016244310071325017, "loss": 3.2085, "step": 39889 }, { "epoch": 1.95, "grad_norm": 0.701665461063385, "learning_rate": 0.00016242941856877107, "loss": 3.1182, "step": 39890 }, { "epoch": 1.95, "grad_norm": 0.6562961339950562, "learning_rate": 0.00016241573678662656, "loss": 3.1035, "step": 39891 }, { "epoch": 1.96, "grad_norm": 0.6483310461044312, "learning_rate": 0.00016240205536685235, "loss": 2.9854, "step": 39892 }, { "epoch": 1.96, "grad_norm": 0.6495265364646912, "learning_rate": 0.0001623883743094848, "loss": 3.0443, "step": 39893 }, { "epoch": 1.96, "grad_norm": 0.7036017179489136, "learning_rate": 0.00016237469361455963, "loss": 3.0209, "step": 39894 }, { "epoch": 1.96, "grad_norm": 0.674313485622406, "learning_rate": 0.00016236101328211317, "loss": 2.8208, "step": 39895 }, { "epoch": 1.96, "grad_norm": 0.7025102972984314, "learning_rate": 0.00016234733331218132, "loss": 2.8201, "step": 39896 }, { "epoch": 1.96, "grad_norm": 0.66330885887146, "learning_rate": 0.00016233365370479997, "loss": 3.1881, "step": 39897 }, { "epoch": 1.96, "grad_norm": 0.6553804874420166, "learning_rate": 0.0001623199744600054, "loss": 2.9553, "step": 39898 }, { "epoch": 1.96, "grad_norm": 0.6467947959899902, "learning_rate": 0.00016230629557783342, "loss": 2.9607, "step": 39899 }, { "epoch": 1.96, "grad_norm": 0.6393236517906189, "learning_rate": 0.00016229261705832015, "loss": 3.1014, "step": 39900 }, { "epoch": 1.96, "grad_norm": 0.675190269947052, "learning_rate": 0.00016227893890150173, "loss": 2.8303, "step": 39901 }, { "epoch": 1.96, "grad_norm": 0.6858446002006531, "learning_rate": 0.00016226526110741396, "loss": 2.9271, "step": 39902 }, { "epoch": 1.96, "grad_norm": 0.6574386954307556, "learning_rate": 0.00016225158367609314, "loss": 2.8188, "step": 39903 }, { "epoch": 1.96, "grad_norm": 0.6161906123161316, "learning_rate": 0.0001622379066075751, "loss": 2.8077, "step": 39904 }, { "epoch": 1.96, "grad_norm": 0.6860578060150146, "learning_rate": 0.0001622242299018958, "loss": 2.8483, "step": 39905 }, { "epoch": 1.96, "grad_norm": 0.7148755788803101, "learning_rate": 0.00016221055355909148, "loss": 3.0416, "step": 39906 }, { "epoch": 1.96, "grad_norm": 0.7170261740684509, "learning_rate": 0.00016219687757919793, "loss": 2.8434, "step": 39907 }, { "epoch": 1.96, "grad_norm": 0.6350681185722351, "learning_rate": 0.00016218320196225138, "loss": 3.034, "step": 39908 }, { "epoch": 1.96, "grad_norm": 0.6371099948883057, "learning_rate": 0.00016216952670828762, "loss": 3.0398, "step": 39909 }, { "epoch": 1.96, "grad_norm": 0.6577742695808411, "learning_rate": 0.00016215585181734296, "loss": 2.7016, "step": 39910 }, { "epoch": 1.96, "grad_norm": 0.6610209345817566, "learning_rate": 0.00016214217728945324, "loss": 3.0579, "step": 39911 }, { "epoch": 1.96, "grad_norm": 0.685881495475769, "learning_rate": 0.00016212850312465437, "loss": 2.9577, "step": 39912 }, { "epoch": 1.96, "grad_norm": 0.6734350919723511, "learning_rate": 0.00016211482932298264, "loss": 3.0301, "step": 39913 }, { "epoch": 1.96, "grad_norm": 0.6929466128349304, "learning_rate": 0.00016210115588447378, "loss": 3.0674, "step": 39914 }, { "epoch": 1.96, "grad_norm": 0.6628326177597046, "learning_rate": 0.00016208748280916393, "loss": 3.0554, "step": 39915 }, { "epoch": 1.96, "grad_norm": 0.6858869791030884, "learning_rate": 0.00016207381009708923, "loss": 2.9002, "step": 39916 }, { "epoch": 1.96, "grad_norm": 0.6722764372825623, "learning_rate": 0.00016206013774828557, "loss": 2.8665, "step": 39917 }, { "epoch": 1.96, "grad_norm": 0.6647232174873352, "learning_rate": 0.00016204646576278893, "loss": 3.0932, "step": 39918 }, { "epoch": 1.96, "grad_norm": 0.7160334587097168, "learning_rate": 0.00016203279414063525, "loss": 2.9176, "step": 39919 }, { "epoch": 1.96, "grad_norm": 0.6342455148696899, "learning_rate": 0.00016201912288186067, "loss": 3.0347, "step": 39920 }, { "epoch": 1.96, "grad_norm": 0.711457371711731, "learning_rate": 0.00016200545198650126, "loss": 3.0407, "step": 39921 }, { "epoch": 1.96, "grad_norm": 0.6820720434188843, "learning_rate": 0.00016199178145459277, "loss": 3.1249, "step": 39922 }, { "epoch": 1.96, "grad_norm": 0.7002881765365601, "learning_rate": 0.00016197811128617153, "loss": 2.8607, "step": 39923 }, { "epoch": 1.96, "grad_norm": 0.7007981538772583, "learning_rate": 0.0001619644414812733, "loss": 2.8763, "step": 39924 }, { "epoch": 1.96, "grad_norm": 0.6570713520050049, "learning_rate": 0.0001619507720399342, "loss": 2.885, "step": 39925 }, { "epoch": 1.96, "grad_norm": 0.6478700637817383, "learning_rate": 0.00016193710296219025, "loss": 3.041, "step": 39926 }, { "epoch": 1.96, "grad_norm": 0.7194265127182007, "learning_rate": 0.0001619234342480773, "loss": 2.9688, "step": 39927 }, { "epoch": 1.96, "grad_norm": 0.6922836303710938, "learning_rate": 0.00016190976589763151, "loss": 2.9779, "step": 39928 }, { "epoch": 1.96, "grad_norm": 0.6822351217269897, "learning_rate": 0.00016189609791088878, "loss": 2.9213, "step": 39929 }, { "epoch": 1.96, "grad_norm": 0.6374616622924805, "learning_rate": 0.00016188243028788507, "loss": 2.8969, "step": 39930 }, { "epoch": 1.96, "grad_norm": 0.6212733387947083, "learning_rate": 0.00016186876302865663, "loss": 2.8603, "step": 39931 }, { "epoch": 1.96, "grad_norm": 0.6491883397102356, "learning_rate": 0.00016185509613323928, "loss": 3.0147, "step": 39932 }, { "epoch": 1.96, "grad_norm": 0.6715843677520752, "learning_rate": 0.000161841429601669, "loss": 2.8747, "step": 39933 }, { "epoch": 1.96, "grad_norm": 0.658258318901062, "learning_rate": 0.0001618277634339817, "loss": 2.9536, "step": 39934 }, { "epoch": 1.96, "grad_norm": 0.7133314609527588, "learning_rate": 0.00016181409763021346, "loss": 3.0629, "step": 39935 }, { "epoch": 1.96, "grad_norm": 0.6304369568824768, "learning_rate": 0.00016180043219040041, "loss": 2.8939, "step": 39936 }, { "epoch": 1.96, "grad_norm": 0.6943387985229492, "learning_rate": 0.00016178676711457832, "loss": 3.0713, "step": 39937 }, { "epoch": 1.96, "grad_norm": 0.6555782556533813, "learning_rate": 0.00016177310240278342, "loss": 2.8656, "step": 39938 }, { "epoch": 1.96, "grad_norm": 0.634103000164032, "learning_rate": 0.00016175943805505152, "loss": 2.9851, "step": 39939 }, { "epoch": 1.96, "grad_norm": 0.6928660869598389, "learning_rate": 0.00016174577407141857, "loss": 2.9063, "step": 39940 }, { "epoch": 1.96, "grad_norm": 0.6522265076637268, "learning_rate": 0.0001617321104519208, "loss": 2.973, "step": 39941 }, { "epoch": 1.96, "grad_norm": 0.7050772905349731, "learning_rate": 0.00016171844719659384, "loss": 2.8929, "step": 39942 }, { "epoch": 1.96, "grad_norm": 0.6689801216125488, "learning_rate": 0.000161704784305474, "loss": 3.0081, "step": 39943 }, { "epoch": 1.96, "grad_norm": 0.7276343107223511, "learning_rate": 0.0001616911217785971, "loss": 2.9564, "step": 39944 }, { "epoch": 1.96, "grad_norm": 0.6313931345939636, "learning_rate": 0.00016167745961599914, "loss": 3.0023, "step": 39945 }, { "epoch": 1.96, "grad_norm": 0.6859260201454163, "learning_rate": 0.0001616637978177162, "loss": 2.778, "step": 39946 }, { "epoch": 1.96, "grad_norm": 0.6386987566947937, "learning_rate": 0.00016165013638378423, "loss": 2.9645, "step": 39947 }, { "epoch": 1.96, "grad_norm": 0.6548264026641846, "learning_rate": 0.0001616364753142392, "loss": 2.9473, "step": 39948 }, { "epoch": 1.96, "grad_norm": 0.6422974467277527, "learning_rate": 0.00016162281460911694, "loss": 2.829, "step": 39949 }, { "epoch": 1.96, "grad_norm": 0.6441436409950256, "learning_rate": 0.0001616091542684535, "loss": 3.045, "step": 39950 }, { "epoch": 1.96, "grad_norm": 0.6431066989898682, "learning_rate": 0.0001615954942922851, "loss": 3.079, "step": 39951 }, { "epoch": 1.96, "grad_norm": 0.6600309610366821, "learning_rate": 0.00016158183468064738, "loss": 3.1532, "step": 39952 }, { "epoch": 1.96, "grad_norm": 0.7169448733329773, "learning_rate": 0.00016156817543357656, "loss": 2.9717, "step": 39953 }, { "epoch": 1.96, "grad_norm": 0.6597325801849365, "learning_rate": 0.00016155451655110856, "loss": 3.0926, "step": 39954 }, { "epoch": 1.96, "grad_norm": 0.6279834508895874, "learning_rate": 0.0001615408580332792, "loss": 2.9351, "step": 39955 }, { "epoch": 1.96, "grad_norm": 0.6826247572898865, "learning_rate": 0.00016152719988012467, "loss": 2.9439, "step": 39956 }, { "epoch": 1.96, "grad_norm": 0.6647608876228333, "learning_rate": 0.00016151354209168074, "loss": 2.9722, "step": 39957 }, { "epoch": 1.96, "grad_norm": 0.6797239780426025, "learning_rate": 0.00016149988466798365, "loss": 3.0933, "step": 39958 }, { "epoch": 1.96, "grad_norm": 0.7346055507659912, "learning_rate": 0.000161486227609069, "loss": 2.8231, "step": 39959 }, { "epoch": 1.96, "grad_norm": 0.6545577049255371, "learning_rate": 0.00016147257091497316, "loss": 3.0375, "step": 39960 }, { "epoch": 1.96, "grad_norm": 0.6826615929603577, "learning_rate": 0.0001614589145857319, "loss": 2.7554, "step": 39961 }, { "epoch": 1.96, "grad_norm": 0.6645894646644592, "learning_rate": 0.00016144525862138106, "loss": 2.9856, "step": 39962 }, { "epoch": 1.96, "grad_norm": 0.6471953392028809, "learning_rate": 0.00016143160302195682, "loss": 3.0498, "step": 39963 }, { "epoch": 1.96, "grad_norm": 0.6736137866973877, "learning_rate": 0.00016141794778749502, "loss": 3.0682, "step": 39964 }, { "epoch": 1.96, "grad_norm": 0.65671706199646, "learning_rate": 0.00016140429291803166, "loss": 2.9904, "step": 39965 }, { "epoch": 1.96, "grad_norm": 0.6748507618904114, "learning_rate": 0.00016139063841360285, "loss": 3.1008, "step": 39966 }, { "epoch": 1.96, "grad_norm": 0.6879149079322815, "learning_rate": 0.00016137698427424427, "loss": 2.9373, "step": 39967 }, { "epoch": 1.96, "grad_norm": 0.706725001335144, "learning_rate": 0.00016136333049999215, "loss": 3.0366, "step": 39968 }, { "epoch": 1.96, "grad_norm": 0.6533642411231995, "learning_rate": 0.00016134967709088236, "loss": 2.8464, "step": 39969 }, { "epoch": 1.96, "grad_norm": 0.6878400444984436, "learning_rate": 0.0001613360240469508, "loss": 3.0078, "step": 39970 }, { "epoch": 1.96, "grad_norm": 0.6259313225746155, "learning_rate": 0.00016132237136823338, "loss": 2.9485, "step": 39971 }, { "epoch": 1.96, "grad_norm": 0.6699492335319519, "learning_rate": 0.00016130871905476615, "loss": 3.141, "step": 39972 }, { "epoch": 1.96, "grad_norm": 0.653860867023468, "learning_rate": 0.0001612950671065852, "loss": 3.0009, "step": 39973 }, { "epoch": 1.96, "grad_norm": 0.6694608330726624, "learning_rate": 0.00016128141552372618, "loss": 3.0055, "step": 39974 }, { "epoch": 1.96, "grad_norm": 0.6927410960197449, "learning_rate": 0.00016126776430622536, "loss": 3.1213, "step": 39975 }, { "epoch": 1.96, "grad_norm": 0.690608561038971, "learning_rate": 0.00016125411345411855, "loss": 2.9176, "step": 39976 }, { "epoch": 1.96, "grad_norm": 0.6918655633926392, "learning_rate": 0.00016124046296744159, "loss": 2.9613, "step": 39977 }, { "epoch": 1.96, "grad_norm": 0.6636469960212708, "learning_rate": 0.00016122681284623064, "loss": 2.9775, "step": 39978 }, { "epoch": 1.96, "grad_norm": 0.6595686078071594, "learning_rate": 0.00016121316309052145, "loss": 2.8676, "step": 39979 }, { "epoch": 1.96, "grad_norm": 0.6494026184082031, "learning_rate": 0.00016119951370035021, "loss": 2.8945, "step": 39980 }, { "epoch": 1.96, "grad_norm": 0.7089259624481201, "learning_rate": 0.0001611858646757526, "loss": 2.8564, "step": 39981 }, { "epoch": 1.96, "grad_norm": 0.6717854142189026, "learning_rate": 0.0001611722160167647, "loss": 2.9878, "step": 39982 }, { "epoch": 1.96, "grad_norm": 0.6851207613945007, "learning_rate": 0.00016115856772342275, "loss": 3.0027, "step": 39983 }, { "epoch": 1.96, "grad_norm": 0.6713508367538452, "learning_rate": 0.00016114491979576215, "loss": 3.0956, "step": 39984 }, { "epoch": 1.96, "grad_norm": 0.6420844793319702, "learning_rate": 0.00016113127223381919, "loss": 3.0019, "step": 39985 }, { "epoch": 1.96, "grad_norm": 0.63764488697052, "learning_rate": 0.00016111762503762965, "loss": 3.0084, "step": 39986 }, { "epoch": 1.96, "grad_norm": 0.6774821877479553, "learning_rate": 0.00016110397820722953, "loss": 2.9804, "step": 39987 }, { "epoch": 1.96, "grad_norm": 0.6736724376678467, "learning_rate": 0.00016109033174265498, "loss": 3.0381, "step": 39988 }, { "epoch": 1.96, "grad_norm": 0.6819289922714233, "learning_rate": 0.00016107668564394157, "loss": 3.1965, "step": 39989 }, { "epoch": 1.96, "grad_norm": 0.7004780769348145, "learning_rate": 0.0001610630399111256, "loss": 2.8297, "step": 39990 }, { "epoch": 1.96, "grad_norm": 0.6639523506164551, "learning_rate": 0.00016104939454424284, "loss": 3.05, "step": 39991 }, { "epoch": 1.96, "grad_norm": 0.7015445828437805, "learning_rate": 0.00016103574954332906, "loss": 2.9532, "step": 39992 }, { "epoch": 1.96, "grad_norm": 0.6690757274627686, "learning_rate": 0.00016102210490842058, "loss": 2.9515, "step": 39993 }, { "epoch": 1.96, "grad_norm": 0.6284964680671692, "learning_rate": 0.00016100846063955295, "loss": 3.1455, "step": 39994 }, { "epoch": 1.96, "grad_norm": 0.7172853350639343, "learning_rate": 0.00016099481673676242, "loss": 2.9555, "step": 39995 }, { "epoch": 1.96, "grad_norm": 0.6635323166847229, "learning_rate": 0.00016098117320008465, "loss": 3.0923, "step": 39996 }, { "epoch": 1.96, "grad_norm": 0.6090884208679199, "learning_rate": 0.00016096753002955587, "loss": 3.099, "step": 39997 }, { "epoch": 1.96, "grad_norm": 0.6727035045623779, "learning_rate": 0.00016095388722521184, "loss": 3.0681, "step": 39998 }, { "epoch": 1.96, "grad_norm": 0.6885789036750793, "learning_rate": 0.0001609402447870884, "loss": 3.3607, "step": 39999 }, { "epoch": 1.96, "grad_norm": 0.6628068685531616, "learning_rate": 0.0001609266027152217, "loss": 2.766, "step": 40000 }, { "epoch": 1.96, "grad_norm": 0.7326607704162598, "learning_rate": 0.00016091296100964746, "loss": 2.8427, "step": 40001 }, { "epoch": 1.96, "grad_norm": 0.637006938457489, "learning_rate": 0.00016089931967040171, "loss": 2.9066, "step": 40002 }, { "epoch": 1.96, "grad_norm": 0.6497381329536438, "learning_rate": 0.0001608856786975205, "loss": 2.8787, "step": 40003 }, { "epoch": 1.96, "grad_norm": 0.6747108101844788, "learning_rate": 0.00016087203809103952, "loss": 3.0503, "step": 40004 }, { "epoch": 1.96, "grad_norm": 0.6333727240562439, "learning_rate": 0.00016085839785099493, "loss": 2.9565, "step": 40005 }, { "epoch": 1.96, "grad_norm": 0.6521469950675964, "learning_rate": 0.00016084475797742252, "loss": 3.0972, "step": 40006 }, { "epoch": 1.96, "grad_norm": 0.7146298885345459, "learning_rate": 0.00016083111847035815, "loss": 2.9459, "step": 40007 }, { "epoch": 1.96, "grad_norm": 0.6354973912239075, "learning_rate": 0.00016081747932983792, "loss": 3.2163, "step": 40008 }, { "epoch": 1.96, "grad_norm": 0.6594139337539673, "learning_rate": 0.00016080384055589756, "loss": 2.9049, "step": 40009 }, { "epoch": 1.96, "grad_norm": 0.693379282951355, "learning_rate": 0.00016079020214857325, "loss": 2.8049, "step": 40010 }, { "epoch": 1.96, "grad_norm": 0.6748722791671753, "learning_rate": 0.0001607765641079006, "loss": 2.7994, "step": 40011 }, { "epoch": 1.96, "grad_norm": 0.6617879867553711, "learning_rate": 0.0001607629264339158, "loss": 2.9751, "step": 40012 }, { "epoch": 1.96, "grad_norm": 0.6831246018409729, "learning_rate": 0.00016074928912665468, "loss": 2.9856, "step": 40013 }, { "epoch": 1.96, "grad_norm": 0.6496304869651794, "learning_rate": 0.000160735652186153, "loss": 2.932, "step": 40014 }, { "epoch": 1.96, "grad_norm": 0.6654090881347656, "learning_rate": 0.0001607220156124469, "loss": 3.2564, "step": 40015 }, { "epoch": 1.96, "grad_norm": 0.690848171710968, "learning_rate": 0.00016070837940557214, "loss": 3.0275, "step": 40016 }, { "epoch": 1.96, "grad_norm": 0.6424733400344849, "learning_rate": 0.0001606947435655647, "loss": 3.1051, "step": 40017 }, { "epoch": 1.96, "grad_norm": 0.6729825735092163, "learning_rate": 0.00016068110809246058, "loss": 3.0315, "step": 40018 }, { "epoch": 1.96, "grad_norm": 0.6387492418289185, "learning_rate": 0.00016066747298629558, "loss": 3.0497, "step": 40019 }, { "epoch": 1.96, "grad_norm": 0.6648398637771606, "learning_rate": 0.0001606538382471057, "loss": 2.9517, "step": 40020 }, { "epoch": 1.96, "grad_norm": 0.6861499547958374, "learning_rate": 0.00016064020387492666, "loss": 2.9324, "step": 40021 }, { "epoch": 1.96, "grad_norm": 0.6802994608879089, "learning_rate": 0.00016062656986979448, "loss": 2.9477, "step": 40022 }, { "epoch": 1.96, "grad_norm": 0.6521588563919067, "learning_rate": 0.00016061293623174523, "loss": 2.8447, "step": 40023 }, { "epoch": 1.96, "grad_norm": 0.7014820575714111, "learning_rate": 0.00016059930296081457, "loss": 2.9435, "step": 40024 }, { "epoch": 1.96, "grad_norm": 0.634397029876709, "learning_rate": 0.00016058567005703864, "loss": 3.1462, "step": 40025 }, { "epoch": 1.96, "grad_norm": 0.6457260847091675, "learning_rate": 0.00016057203752045307, "loss": 2.9698, "step": 40026 }, { "epoch": 1.96, "grad_norm": 0.7065685391426086, "learning_rate": 0.00016055840535109407, "loss": 3.1251, "step": 40027 }, { "epoch": 1.96, "grad_norm": 0.6713890433311462, "learning_rate": 0.00016054477354899739, "loss": 2.9537, "step": 40028 }, { "epoch": 1.96, "grad_norm": 0.674758791923523, "learning_rate": 0.0001605311421141988, "loss": 3.1935, "step": 40029 }, { "epoch": 1.96, "grad_norm": 0.6733911633491516, "learning_rate": 0.00016051751104673447, "loss": 3.0881, "step": 40030 }, { "epoch": 1.96, "grad_norm": 0.6857780814170837, "learning_rate": 0.00016050388034664006, "loss": 2.8133, "step": 40031 }, { "epoch": 1.96, "grad_norm": 0.6858083605766296, "learning_rate": 0.00016049025001395158, "loss": 2.9045, "step": 40032 }, { "epoch": 1.96, "grad_norm": 0.6884641647338867, "learning_rate": 0.00016047662004870506, "loss": 3.1941, "step": 40033 }, { "epoch": 1.96, "grad_norm": 0.6528140902519226, "learning_rate": 0.00016046299045093624, "loss": 2.9926, "step": 40034 }, { "epoch": 1.96, "grad_norm": 0.644972026348114, "learning_rate": 0.00016044936122068106, "loss": 2.9634, "step": 40035 }, { "epoch": 1.96, "grad_norm": 0.6678783297538757, "learning_rate": 0.00016043573235797532, "loss": 3.021, "step": 40036 }, { "epoch": 1.96, "grad_norm": 0.6511356830596924, "learning_rate": 0.00016042210386285497, "loss": 2.9231, "step": 40037 }, { "epoch": 1.96, "grad_norm": 0.6782459020614624, "learning_rate": 0.00016040847573535607, "loss": 2.848, "step": 40038 }, { "epoch": 1.96, "grad_norm": 0.7153217792510986, "learning_rate": 0.00016039484797551425, "loss": 3.042, "step": 40039 }, { "epoch": 1.96, "grad_norm": 0.6678678393363953, "learning_rate": 0.00016038122058336567, "loss": 3.0906, "step": 40040 }, { "epoch": 1.96, "grad_norm": 0.6697278618812561, "learning_rate": 0.00016036759355894604, "loss": 3.0417, "step": 40041 }, { "epoch": 1.96, "grad_norm": 0.6855822801589966, "learning_rate": 0.00016035396690229124, "loss": 3.2378, "step": 40042 }, { "epoch": 1.96, "grad_norm": 0.6451258659362793, "learning_rate": 0.00016034034061343728, "loss": 3.1821, "step": 40043 }, { "epoch": 1.96, "grad_norm": 0.8559314012527466, "learning_rate": 0.00016032671469241988, "loss": 2.936, "step": 40044 }, { "epoch": 1.96, "grad_norm": 0.6715162396430969, "learning_rate": 0.00016031308913927515, "loss": 3.0956, "step": 40045 }, { "epoch": 1.96, "grad_norm": 0.6763110160827637, "learning_rate": 0.00016029946395403877, "loss": 3.0766, "step": 40046 }, { "epoch": 1.96, "grad_norm": 0.6643348336219788, "learning_rate": 0.0001602858391367468, "loss": 2.9967, "step": 40047 }, { "epoch": 1.96, "grad_norm": 0.6329568028450012, "learning_rate": 0.00016027221468743497, "loss": 3.013, "step": 40048 }, { "epoch": 1.96, "grad_norm": 0.6494516134262085, "learning_rate": 0.0001602585906061393, "loss": 2.789, "step": 40049 }, { "epoch": 1.96, "grad_norm": 0.6577889919281006, "learning_rate": 0.00016024496689289564, "loss": 3.0301, "step": 40050 }, { "epoch": 1.96, "grad_norm": 0.7097561359405518, "learning_rate": 0.0001602313435477397, "loss": 2.8324, "step": 40051 }, { "epoch": 1.96, "grad_norm": 0.652508556842804, "learning_rate": 0.0001602177205707076, "loss": 2.8232, "step": 40052 }, { "epoch": 1.96, "grad_norm": 0.6659618020057678, "learning_rate": 0.00016020409796183507, "loss": 2.7219, "step": 40053 }, { "epoch": 1.96, "grad_norm": 0.6810920834541321, "learning_rate": 0.000160190475721158, "loss": 2.9999, "step": 40054 }, { "epoch": 1.96, "grad_norm": 0.7163436412811279, "learning_rate": 0.00016017685384871243, "loss": 3.0742, "step": 40055 }, { "epoch": 1.96, "grad_norm": 0.6159772276878357, "learning_rate": 0.00016016323234453413, "loss": 2.757, "step": 40056 }, { "epoch": 1.96, "grad_norm": 0.642678439617157, "learning_rate": 0.00016014961120865894, "loss": 2.9309, "step": 40057 }, { "epoch": 1.96, "grad_norm": 0.7379891276359558, "learning_rate": 0.0001601359904411227, "loss": 2.9141, "step": 40058 }, { "epoch": 1.96, "grad_norm": 0.6562783122062683, "learning_rate": 0.0001601223700419613, "loss": 3.2553, "step": 40059 }, { "epoch": 1.96, "grad_norm": 0.6276502013206482, "learning_rate": 0.00016010875001121075, "loss": 3.117, "step": 40060 }, { "epoch": 1.96, "grad_norm": 0.681545615196228, "learning_rate": 0.00016009513034890674, "loss": 3.1442, "step": 40061 }, { "epoch": 1.96, "grad_norm": 0.6715508699417114, "learning_rate": 0.00016008151105508536, "loss": 2.8528, "step": 40062 }, { "epoch": 1.96, "grad_norm": 0.6921969652175903, "learning_rate": 0.00016006789212978233, "loss": 3.1951, "step": 40063 }, { "epoch": 1.96, "grad_norm": 0.635017454624176, "learning_rate": 0.00016005427357303347, "loss": 3.0769, "step": 40064 }, { "epoch": 1.96, "grad_norm": 0.6426366567611694, "learning_rate": 0.00016004065538487478, "loss": 3.112, "step": 40065 }, { "epoch": 1.96, "grad_norm": 0.6538386940956116, "learning_rate": 0.00016002703756534195, "loss": 3.0287, "step": 40066 }, { "epoch": 1.96, "grad_norm": 0.6401711702346802, "learning_rate": 0.00016001342011447113, "loss": 2.9856, "step": 40067 }, { "epoch": 1.96, "grad_norm": 0.6818058490753174, "learning_rate": 0.00015999980303229788, "loss": 2.9975, "step": 40068 }, { "epoch": 1.96, "grad_norm": 0.6866037845611572, "learning_rate": 0.0001599861863188582, "loss": 2.8041, "step": 40069 }, { "epoch": 1.96, "grad_norm": 0.6289970874786377, "learning_rate": 0.00015997256997418808, "loss": 2.9064, "step": 40070 }, { "epoch": 1.96, "grad_norm": 0.6188867688179016, "learning_rate": 0.0001599589539983233, "loss": 2.975, "step": 40071 }, { "epoch": 1.96, "grad_norm": 0.6899058818817139, "learning_rate": 0.00015994533839129961, "loss": 3.1588, "step": 40072 }, { "epoch": 1.96, "grad_norm": 0.6458503603935242, "learning_rate": 0.00015993172315315285, "loss": 2.9297, "step": 40073 }, { "epoch": 1.96, "grad_norm": 0.6466934084892273, "learning_rate": 0.000159918108283919, "loss": 2.9095, "step": 40074 }, { "epoch": 1.96, "grad_norm": 0.6431629657745361, "learning_rate": 0.000159904493783634, "loss": 3.019, "step": 40075 }, { "epoch": 1.96, "grad_norm": 0.6387059688568115, "learning_rate": 0.00015989087965233347, "loss": 2.7465, "step": 40076 }, { "epoch": 1.96, "grad_norm": 0.6597829461097717, "learning_rate": 0.00015987726589005352, "loss": 3.1271, "step": 40077 }, { "epoch": 1.96, "grad_norm": 0.673991858959198, "learning_rate": 0.00015986365249682992, "loss": 3.0424, "step": 40078 }, { "epoch": 1.96, "grad_norm": 0.6489351391792297, "learning_rate": 0.00015985003947269834, "loss": 2.8951, "step": 40079 }, { "epoch": 1.96, "grad_norm": 0.7270581722259521, "learning_rate": 0.00015983642681769494, "loss": 2.8268, "step": 40080 }, { "epoch": 1.96, "grad_norm": 0.7210774421691895, "learning_rate": 0.00015982281453185522, "loss": 3.0683, "step": 40081 }, { "epoch": 1.96, "grad_norm": 0.6519688963890076, "learning_rate": 0.0001598092026152154, "loss": 2.9316, "step": 40082 }, { "epoch": 1.96, "grad_norm": 0.6710745692253113, "learning_rate": 0.00015979559106781103, "loss": 3.1201, "step": 40083 }, { "epoch": 1.96, "grad_norm": 0.6843429803848267, "learning_rate": 0.00015978197988967811, "loss": 2.9192, "step": 40084 }, { "epoch": 1.96, "grad_norm": 0.6874869465827942, "learning_rate": 0.00015976836908085268, "loss": 2.9876, "step": 40085 }, { "epoch": 1.96, "grad_norm": 0.6737939715385437, "learning_rate": 0.00015975475864137017, "loss": 2.9837, "step": 40086 }, { "epoch": 1.96, "grad_norm": 0.69536292552948, "learning_rate": 0.00015974114857126675, "loss": 3.025, "step": 40087 }, { "epoch": 1.96, "grad_norm": 0.6652324795722961, "learning_rate": 0.00015972753887057799, "loss": 2.9949, "step": 40088 }, { "epoch": 1.96, "grad_norm": 0.6829466223716736, "learning_rate": 0.0001597139295393399, "loss": 3.1127, "step": 40089 }, { "epoch": 1.96, "grad_norm": 0.7316030859947205, "learning_rate": 0.0001597003205775885, "loss": 2.9742, "step": 40090 }, { "epoch": 1.96, "grad_norm": 0.6916874051094055, "learning_rate": 0.0001596867119853593, "loss": 3.2263, "step": 40091 }, { "epoch": 1.96, "grad_norm": 0.647091805934906, "learning_rate": 0.00015967310376268837, "loss": 3.0349, "step": 40092 }, { "epoch": 1.96, "grad_norm": 0.6750726699829102, "learning_rate": 0.00015965949590961156, "loss": 2.8189, "step": 40093 }, { "epoch": 1.96, "grad_norm": 0.6377853751182556, "learning_rate": 0.00015964588842616446, "loss": 2.7829, "step": 40094 }, { "epoch": 1.96, "grad_norm": 0.7172527313232422, "learning_rate": 0.00015963228131238324, "loss": 3.0597, "step": 40095 }, { "epoch": 1.97, "grad_norm": 0.6895819902420044, "learning_rate": 0.00015961867456830339, "loss": 3.0861, "step": 40096 }, { "epoch": 1.97, "grad_norm": 0.7082463502883911, "learning_rate": 0.0001596050681939611, "loss": 2.7711, "step": 40097 }, { "epoch": 1.97, "grad_norm": 0.6395615339279175, "learning_rate": 0.00015959146218939193, "loss": 3.0708, "step": 40098 }, { "epoch": 1.97, "grad_norm": 0.6931965351104736, "learning_rate": 0.00015957785655463193, "loss": 2.9829, "step": 40099 }, { "epoch": 1.97, "grad_norm": 0.6493892073631287, "learning_rate": 0.00015956425128971682, "loss": 3.0342, "step": 40100 }, { "epoch": 1.97, "grad_norm": 0.6747897267341614, "learning_rate": 0.0001595506463946823, "loss": 2.8114, "step": 40101 }, { "epoch": 1.97, "grad_norm": 0.6698330044746399, "learning_rate": 0.00015953704186956452, "loss": 3.0373, "step": 40102 }, { "epoch": 1.97, "grad_norm": 0.6742802262306213, "learning_rate": 0.00015952343771439903, "loss": 3.0902, "step": 40103 }, { "epoch": 1.97, "grad_norm": 0.6375805735588074, "learning_rate": 0.00015950983392922175, "loss": 3.0517, "step": 40104 }, { "epoch": 1.97, "grad_norm": 0.6356774568557739, "learning_rate": 0.0001594962305140686, "loss": 2.8752, "step": 40105 }, { "epoch": 1.97, "grad_norm": 0.6412781476974487, "learning_rate": 0.00015948262746897526, "loss": 3.0477, "step": 40106 }, { "epoch": 1.97, "grad_norm": 0.6381820440292358, "learning_rate": 0.00015946902479397777, "loss": 3.0045, "step": 40107 }, { "epoch": 1.97, "grad_norm": 0.6312680840492249, "learning_rate": 0.0001594554224891118, "loss": 2.9117, "step": 40108 }, { "epoch": 1.97, "grad_norm": 0.6366028785705566, "learning_rate": 0.0001594418205544131, "loss": 3.0584, "step": 40109 }, { "epoch": 1.97, "grad_norm": 0.6530911922454834, "learning_rate": 0.0001594282189899177, "loss": 2.8788, "step": 40110 }, { "epoch": 1.97, "grad_norm": 0.653728187084198, "learning_rate": 0.0001594146177956612, "loss": 2.8951, "step": 40111 }, { "epoch": 1.97, "grad_norm": 0.6730470061302185, "learning_rate": 0.0001594010169716797, "loss": 3.071, "step": 40112 }, { "epoch": 1.97, "grad_norm": 0.6829031705856323, "learning_rate": 0.00015938741651800868, "loss": 2.9737, "step": 40113 }, { "epoch": 1.97, "grad_norm": 0.6463053226470947, "learning_rate": 0.00015937381643468427, "loss": 3.0131, "step": 40114 }, { "epoch": 1.97, "grad_norm": 0.6664642095565796, "learning_rate": 0.00015936021672174216, "loss": 2.7908, "step": 40115 }, { "epoch": 1.97, "grad_norm": 0.6486683487892151, "learning_rate": 0.00015934661737921808, "loss": 3.1284, "step": 40116 }, { "epoch": 1.97, "grad_norm": 0.6821164488792419, "learning_rate": 0.00015933301840714804, "loss": 3.0977, "step": 40117 }, { "epoch": 1.97, "grad_norm": 0.6829363107681274, "learning_rate": 0.00015931941980556767, "loss": 3.0408, "step": 40118 }, { "epoch": 1.97, "grad_norm": 0.7041226625442505, "learning_rate": 0.00015930582157451284, "loss": 2.9775, "step": 40119 }, { "epoch": 1.97, "grad_norm": 0.6732624769210815, "learning_rate": 0.00015929222371401956, "loss": 2.9952, "step": 40120 }, { "epoch": 1.97, "grad_norm": 0.6260468363761902, "learning_rate": 0.00015927862622412345, "loss": 3.1095, "step": 40121 }, { "epoch": 1.97, "grad_norm": 0.6741329431533813, "learning_rate": 0.00015926502910486033, "loss": 2.9427, "step": 40122 }, { "epoch": 1.97, "grad_norm": 0.6579768657684326, "learning_rate": 0.00015925143235626595, "loss": 3.0081, "step": 40123 }, { "epoch": 1.97, "grad_norm": 0.6937235593795776, "learning_rate": 0.00015923783597837635, "loss": 3.1918, "step": 40124 }, { "epoch": 1.97, "grad_norm": 0.6266592144966125, "learning_rate": 0.00015922423997122705, "loss": 2.9933, "step": 40125 }, { "epoch": 1.97, "grad_norm": 0.6419720649719238, "learning_rate": 0.00015921064433485404, "loss": 2.8718, "step": 40126 }, { "epoch": 1.97, "grad_norm": 0.7935446500778198, "learning_rate": 0.00015919704906929317, "loss": 2.76, "step": 40127 }, { "epoch": 1.97, "grad_norm": 0.6469537615776062, "learning_rate": 0.00015918345417458008, "loss": 2.8866, "step": 40128 }, { "epoch": 1.97, "grad_norm": 0.6699641346931458, "learning_rate": 0.00015916985965075078, "loss": 2.925, "step": 40129 }, { "epoch": 1.97, "grad_norm": 0.6339029669761658, "learning_rate": 0.000159156265497841, "loss": 2.9374, "step": 40130 }, { "epoch": 1.97, "grad_norm": 0.6614867448806763, "learning_rate": 0.00015914267171588637, "loss": 2.8457, "step": 40131 }, { "epoch": 1.97, "grad_norm": 0.6942468881607056, "learning_rate": 0.00015912907830492292, "loss": 3.162, "step": 40132 }, { "epoch": 1.97, "grad_norm": 0.6900768280029297, "learning_rate": 0.00015911548526498627, "loss": 3.1054, "step": 40133 }, { "epoch": 1.97, "grad_norm": 0.6508978009223938, "learning_rate": 0.00015910189259611248, "loss": 2.7635, "step": 40134 }, { "epoch": 1.97, "grad_norm": 0.6943615674972534, "learning_rate": 0.00015908830029833702, "loss": 3.0908, "step": 40135 }, { "epoch": 1.97, "grad_norm": 0.6539590954780579, "learning_rate": 0.00015907470837169598, "loss": 2.8507, "step": 40136 }, { "epoch": 1.97, "grad_norm": 0.6708890795707703, "learning_rate": 0.0001590611168162251, "loss": 3.1577, "step": 40137 }, { "epoch": 1.97, "grad_norm": 0.7016001343727112, "learning_rate": 0.00015904752563195994, "loss": 2.9599, "step": 40138 }, { "epoch": 1.97, "grad_norm": 0.716665506362915, "learning_rate": 0.00015903393481893658, "loss": 3.1311, "step": 40139 }, { "epoch": 1.97, "grad_norm": 0.6903090476989746, "learning_rate": 0.0001590203443771906, "loss": 2.9423, "step": 40140 }, { "epoch": 1.97, "grad_norm": 0.6860828995704651, "learning_rate": 0.0001590067543067579, "loss": 3.0349, "step": 40141 }, { "epoch": 1.97, "grad_norm": 0.6501972675323486, "learning_rate": 0.00015899316460767437, "loss": 2.9293, "step": 40142 }, { "epoch": 1.97, "grad_norm": 0.6735811233520508, "learning_rate": 0.00015897957527997574, "loss": 3.2349, "step": 40143 }, { "epoch": 1.97, "grad_norm": 0.6617088317871094, "learning_rate": 0.00015896598632369774, "loss": 2.9299, "step": 40144 }, { "epoch": 1.97, "grad_norm": 0.6840248703956604, "learning_rate": 0.00015895239773887611, "loss": 3.0315, "step": 40145 }, { "epoch": 1.97, "grad_norm": 0.6260735988616943, "learning_rate": 0.0001589388095255467, "loss": 2.7975, "step": 40146 }, { "epoch": 1.97, "grad_norm": 0.7066246867179871, "learning_rate": 0.00015892522168374547, "loss": 2.9681, "step": 40147 }, { "epoch": 1.97, "grad_norm": 0.6458545327186584, "learning_rate": 0.00015891163421350788, "loss": 2.8748, "step": 40148 }, { "epoch": 1.97, "grad_norm": 0.6603613495826721, "learning_rate": 0.00015889804711487007, "loss": 3.029, "step": 40149 }, { "epoch": 1.97, "grad_norm": 0.6358199715614319, "learning_rate": 0.00015888446038786746, "loss": 2.9619, "step": 40150 }, { "epoch": 1.97, "grad_norm": 0.7179542779922485, "learning_rate": 0.00015887087403253618, "loss": 2.9119, "step": 40151 }, { "epoch": 1.97, "grad_norm": 0.6448338627815247, "learning_rate": 0.00015885728804891186, "loss": 2.9725, "step": 40152 }, { "epoch": 1.97, "grad_norm": 0.7053864002227783, "learning_rate": 0.00015884370243703018, "loss": 2.6094, "step": 40153 }, { "epoch": 1.97, "grad_norm": 0.6579211354255676, "learning_rate": 0.0001588301171969271, "loss": 2.9604, "step": 40154 }, { "epoch": 1.97, "grad_norm": 0.6514232754707336, "learning_rate": 0.00015881653232863822, "loss": 3.1176, "step": 40155 }, { "epoch": 1.97, "grad_norm": 0.6942654848098755, "learning_rate": 0.00015880294783219943, "loss": 3.1528, "step": 40156 }, { "epoch": 1.97, "grad_norm": 0.7441583871841431, "learning_rate": 0.0001587893637076466, "loss": 2.9195, "step": 40157 }, { "epoch": 1.97, "grad_norm": 0.6600908041000366, "learning_rate": 0.00015877577995501542, "loss": 2.9988, "step": 40158 }, { "epoch": 1.97, "grad_norm": 0.6638275980949402, "learning_rate": 0.00015876219657434164, "loss": 2.9348, "step": 40159 }, { "epoch": 1.97, "grad_norm": 0.6445525288581848, "learning_rate": 0.00015874861356566096, "loss": 2.8587, "step": 40160 }, { "epoch": 1.97, "grad_norm": 0.6753540635108948, "learning_rate": 0.00015873503092900924, "loss": 2.8209, "step": 40161 }, { "epoch": 1.97, "grad_norm": 0.6722790002822876, "learning_rate": 0.0001587214486644224, "loss": 2.9628, "step": 40162 }, { "epoch": 1.97, "grad_norm": 0.7291383147239685, "learning_rate": 0.00015870786677193592, "loss": 3.0806, "step": 40163 }, { "epoch": 1.97, "grad_norm": 0.6721978187561035, "learning_rate": 0.00015869428525158588, "loss": 3.0379, "step": 40164 }, { "epoch": 1.97, "grad_norm": 0.7075223922729492, "learning_rate": 0.00015868070410340787, "loss": 3.0957, "step": 40165 }, { "epoch": 1.97, "grad_norm": 0.6854174733161926, "learning_rate": 0.00015866712332743757, "loss": 2.9606, "step": 40166 }, { "epoch": 1.97, "grad_norm": 0.678521454334259, "learning_rate": 0.00015865354292371098, "loss": 2.8023, "step": 40167 }, { "epoch": 1.97, "grad_norm": 0.6438807249069214, "learning_rate": 0.0001586399628922637, "loss": 3.0427, "step": 40168 }, { "epoch": 1.97, "grad_norm": 0.6650844216346741, "learning_rate": 0.00015862638323313157, "loss": 3.129, "step": 40169 }, { "epoch": 1.97, "grad_norm": 0.6512045860290527, "learning_rate": 0.00015861280394635028, "loss": 3.1556, "step": 40170 }, { "epoch": 1.97, "grad_norm": 0.6742256879806519, "learning_rate": 0.00015859922503195563, "loss": 2.9698, "step": 40171 }, { "epoch": 1.97, "grad_norm": 0.6796380877494812, "learning_rate": 0.00015858564648998355, "loss": 2.9838, "step": 40172 }, { "epoch": 1.97, "grad_norm": 0.6988964080810547, "learning_rate": 0.00015857206832046967, "loss": 2.9046, "step": 40173 }, { "epoch": 1.97, "grad_norm": 0.6292441487312317, "learning_rate": 0.00015855849052344974, "loss": 3.0492, "step": 40174 }, { "epoch": 1.97, "grad_norm": 0.6308088898658752, "learning_rate": 0.0001585449130989594, "loss": 2.9864, "step": 40175 }, { "epoch": 1.97, "grad_norm": 0.6453452706336975, "learning_rate": 0.00015853133604703452, "loss": 3.0101, "step": 40176 }, { "epoch": 1.97, "grad_norm": 0.7138879299163818, "learning_rate": 0.00015851775936771102, "loss": 3.0329, "step": 40177 }, { "epoch": 1.97, "grad_norm": 0.687667191028595, "learning_rate": 0.00015850418306102436, "loss": 2.7237, "step": 40178 }, { "epoch": 1.97, "grad_norm": 0.720391571521759, "learning_rate": 0.0001584906071270106, "loss": 3.1241, "step": 40179 }, { "epoch": 1.97, "grad_norm": 0.6851097345352173, "learning_rate": 0.0001584770315657053, "loss": 2.8208, "step": 40180 }, { "epoch": 1.97, "grad_norm": 0.6699183583259583, "learning_rate": 0.00015846345637714418, "loss": 3.0279, "step": 40181 }, { "epoch": 1.97, "grad_norm": 0.6366994976997375, "learning_rate": 0.00015844988156136323, "loss": 2.9347, "step": 40182 }, { "epoch": 1.97, "grad_norm": 0.6660268306732178, "learning_rate": 0.00015843630711839788, "loss": 2.9707, "step": 40183 }, { "epoch": 1.97, "grad_norm": 0.6552741527557373, "learning_rate": 0.00015842273304828415, "loss": 2.951, "step": 40184 }, { "epoch": 1.97, "grad_norm": 0.7053703665733337, "learning_rate": 0.0001584091593510576, "loss": 2.9394, "step": 40185 }, { "epoch": 1.97, "grad_norm": 0.6609983444213867, "learning_rate": 0.00015839558602675423, "loss": 2.9316, "step": 40186 }, { "epoch": 1.97, "grad_norm": 0.6549779772758484, "learning_rate": 0.00015838201307540959, "loss": 2.8942, "step": 40187 }, { "epoch": 1.97, "grad_norm": 0.6527225971221924, "learning_rate": 0.0001583684404970594, "loss": 3.155, "step": 40188 }, { "epoch": 1.97, "grad_norm": 0.6407042145729065, "learning_rate": 0.00015835486829173954, "loss": 2.8456, "step": 40189 }, { "epoch": 1.97, "grad_norm": 0.6975271701812744, "learning_rate": 0.00015834129645948564, "loss": 2.7692, "step": 40190 }, { "epoch": 1.97, "grad_norm": 0.6264335513114929, "learning_rate": 0.00015832772500033344, "loss": 2.8132, "step": 40191 }, { "epoch": 1.97, "grad_norm": 0.6382396817207336, "learning_rate": 0.00015831415391431893, "loss": 2.8311, "step": 40192 }, { "epoch": 1.97, "grad_norm": 0.6645261645317078, "learning_rate": 0.0001583005832014775, "loss": 3.2165, "step": 40193 }, { "epoch": 1.97, "grad_norm": 0.6498206257820129, "learning_rate": 0.00015828701286184517, "loss": 2.9257, "step": 40194 }, { "epoch": 1.97, "grad_norm": 0.6160268187522888, "learning_rate": 0.00015827344289545765, "loss": 2.9067, "step": 40195 }, { "epoch": 1.97, "grad_norm": 0.6547241806983948, "learning_rate": 0.00015825987330235044, "loss": 2.8953, "step": 40196 }, { "epoch": 1.97, "grad_norm": 0.6759021282196045, "learning_rate": 0.00015824630408255955, "loss": 2.8691, "step": 40197 }, { "epoch": 1.97, "grad_norm": 0.6542566418647766, "learning_rate": 0.0001582327352361205, "loss": 2.8604, "step": 40198 }, { "epoch": 1.97, "grad_norm": 0.6945884227752686, "learning_rate": 0.0001582191667630693, "loss": 3.0368, "step": 40199 }, { "epoch": 1.97, "grad_norm": 0.6422370076179504, "learning_rate": 0.00015820559866344138, "loss": 2.9877, "step": 40200 }, { "epoch": 1.97, "grad_norm": 0.6469019651412964, "learning_rate": 0.00015819203093727271, "loss": 3.0606, "step": 40201 }, { "epoch": 1.97, "grad_norm": 0.6708106398582458, "learning_rate": 0.000158178463584599, "loss": 3.2249, "step": 40202 }, { "epoch": 1.97, "grad_norm": 0.6726407408714294, "learning_rate": 0.00015816489660545579, "loss": 2.8245, "step": 40203 }, { "epoch": 1.97, "grad_norm": 0.6494171619415283, "learning_rate": 0.00015815132999987905, "loss": 3.1015, "step": 40204 }, { "epoch": 1.97, "grad_norm": 0.6671391725540161, "learning_rate": 0.00015813776376790426, "loss": 2.937, "step": 40205 }, { "epoch": 1.97, "grad_norm": 0.6737003922462463, "learning_rate": 0.00015812419790956744, "loss": 2.9281, "step": 40206 }, { "epoch": 1.97, "grad_norm": 0.6515318155288696, "learning_rate": 0.00015811063242490407, "loss": 3.1345, "step": 40207 }, { "epoch": 1.97, "grad_norm": 0.6995930671691895, "learning_rate": 0.00015809706731395002, "loss": 2.8716, "step": 40208 }, { "epoch": 1.97, "grad_norm": 0.6816954612731934, "learning_rate": 0.00015808350257674113, "loss": 3.0883, "step": 40209 }, { "epoch": 1.97, "grad_norm": 0.6604844331741333, "learning_rate": 0.00015806993821331283, "loss": 3.192, "step": 40210 }, { "epoch": 1.97, "grad_norm": 0.6579017043113708, "learning_rate": 0.0001580563742237011, "loss": 2.839, "step": 40211 }, { "epoch": 1.97, "grad_norm": 0.6625229716300964, "learning_rate": 0.00015804281060794142, "loss": 2.9405, "step": 40212 }, { "epoch": 1.97, "grad_norm": 0.7139385938644409, "learning_rate": 0.00015802924736606965, "loss": 3.0133, "step": 40213 }, { "epoch": 1.97, "grad_norm": 0.6507391333580017, "learning_rate": 0.00015801568449812166, "loss": 3.032, "step": 40214 }, { "epoch": 1.97, "grad_norm": 0.6921162009239197, "learning_rate": 0.00015800212200413292, "loss": 2.862, "step": 40215 }, { "epoch": 1.97, "grad_norm": 0.6766071319580078, "learning_rate": 0.00015798855988413937, "loss": 3.1456, "step": 40216 }, { "epoch": 1.97, "grad_norm": 0.7535398602485657, "learning_rate": 0.00015797499813817665, "loss": 2.9585, "step": 40217 }, { "epoch": 1.97, "grad_norm": 0.6730259656906128, "learning_rate": 0.0001579614367662803, "loss": 3.04, "step": 40218 }, { "epoch": 1.97, "grad_norm": 0.6672115921974182, "learning_rate": 0.0001579478757684863, "loss": 2.9337, "step": 40219 }, { "epoch": 1.97, "grad_norm": 0.708581805229187, "learning_rate": 0.00015793431514483016, "loss": 2.9796, "step": 40220 }, { "epoch": 1.97, "grad_norm": 0.6547756791114807, "learning_rate": 0.00015792075489534782, "loss": 2.9422, "step": 40221 }, { "epoch": 1.97, "grad_norm": 0.6528579592704773, "learning_rate": 0.0001579071950200747, "loss": 2.9878, "step": 40222 }, { "epoch": 1.97, "grad_norm": 0.6749711036682129, "learning_rate": 0.00015789363551904685, "loss": 2.8666, "step": 40223 }, { "epoch": 1.97, "grad_norm": 0.6884815692901611, "learning_rate": 0.0001578800763922998, "loss": 2.9652, "step": 40224 }, { "epoch": 1.97, "grad_norm": 0.6515948176383972, "learning_rate": 0.00015786651763986912, "loss": 2.9444, "step": 40225 }, { "epoch": 1.97, "grad_norm": 0.7087914347648621, "learning_rate": 0.00015785295926179087, "loss": 2.9341, "step": 40226 }, { "epoch": 1.97, "grad_norm": 0.6495019197463989, "learning_rate": 0.0001578394012581004, "loss": 3.0155, "step": 40227 }, { "epoch": 1.97, "grad_norm": 0.6784089207649231, "learning_rate": 0.00015782584362883362, "loss": 3.0786, "step": 40228 }, { "epoch": 1.97, "grad_norm": 0.6523367166519165, "learning_rate": 0.00015781228637402629, "loss": 2.9048, "step": 40229 }, { "epoch": 1.97, "grad_norm": 0.6989542245864868, "learning_rate": 0.00015779872949371395, "loss": 3.0083, "step": 40230 }, { "epoch": 1.97, "grad_norm": 0.6683775186538696, "learning_rate": 0.00015778517298793249, "loss": 2.8116, "step": 40231 }, { "epoch": 1.97, "grad_norm": 0.7025153636932373, "learning_rate": 0.0001577716168567175, "loss": 3.2109, "step": 40232 }, { "epoch": 1.97, "grad_norm": 0.6500304937362671, "learning_rate": 0.0001577580611001046, "loss": 3.2659, "step": 40233 }, { "epoch": 1.97, "grad_norm": 0.6912034749984741, "learning_rate": 0.0001577445057181297, "loss": 2.8978, "step": 40234 }, { "epoch": 1.97, "grad_norm": 0.6490848660469055, "learning_rate": 0.00015773095071082825, "loss": 3.2104, "step": 40235 }, { "epoch": 1.97, "grad_norm": 0.7083659172058105, "learning_rate": 0.00015771739607823627, "loss": 2.9851, "step": 40236 }, { "epoch": 1.97, "grad_norm": 0.6967908143997192, "learning_rate": 0.00015770384182038914, "loss": 3.0441, "step": 40237 }, { "epoch": 1.97, "grad_norm": 0.7174261808395386, "learning_rate": 0.00015769028793732282, "loss": 3.1049, "step": 40238 }, { "epoch": 1.97, "grad_norm": 0.6764041185379028, "learning_rate": 0.0001576767344290729, "loss": 2.8842, "step": 40239 }, { "epoch": 1.97, "grad_norm": 0.6803184747695923, "learning_rate": 0.00015766318129567498, "loss": 3.1289, "step": 40240 }, { "epoch": 1.97, "grad_norm": 0.6668773293495178, "learning_rate": 0.0001576496285371649, "loss": 3.0398, "step": 40241 }, { "epoch": 1.97, "grad_norm": 0.639971137046814, "learning_rate": 0.00015763607615357821, "loss": 3.1722, "step": 40242 }, { "epoch": 1.97, "grad_norm": 0.6707262396812439, "learning_rate": 0.00015762252414495073, "loss": 2.9338, "step": 40243 }, { "epoch": 1.97, "grad_norm": 0.6912310123443604, "learning_rate": 0.00015760897251131822, "loss": 2.9795, "step": 40244 }, { "epoch": 1.97, "grad_norm": 0.6759024858474731, "learning_rate": 0.0001575954212527163, "loss": 3.0455, "step": 40245 }, { "epoch": 1.97, "grad_norm": 0.6769490242004395, "learning_rate": 0.00015758187036918054, "loss": 2.9084, "step": 40246 }, { "epoch": 1.97, "grad_norm": 0.7240374684333801, "learning_rate": 0.0001575683198607467, "loss": 3.026, "step": 40247 }, { "epoch": 1.97, "grad_norm": 0.7014404535293579, "learning_rate": 0.0001575547697274505, "loss": 2.8883, "step": 40248 }, { "epoch": 1.97, "grad_norm": 0.6260035634040833, "learning_rate": 0.0001575412199693277, "loss": 2.9092, "step": 40249 }, { "epoch": 1.97, "grad_norm": 0.6681224703788757, "learning_rate": 0.00015752767058641376, "loss": 3.019, "step": 40250 }, { "epoch": 1.97, "grad_norm": 0.6676934957504272, "learning_rate": 0.00015751412157874467, "loss": 3.0937, "step": 40251 }, { "epoch": 1.97, "grad_norm": 0.7086468935012817, "learning_rate": 0.00015750057294635584, "loss": 2.9019, "step": 40252 }, { "epoch": 1.97, "grad_norm": 0.6498782634735107, "learning_rate": 0.00015748702468928322, "loss": 2.9223, "step": 40253 }, { "epoch": 1.97, "grad_norm": 0.6463537216186523, "learning_rate": 0.0001574734768075623, "loss": 2.8543, "step": 40254 }, { "epoch": 1.97, "grad_norm": 0.66627037525177, "learning_rate": 0.00015745992930122873, "loss": 2.9977, "step": 40255 }, { "epoch": 1.97, "grad_norm": 0.6671886444091797, "learning_rate": 0.0001574463821703184, "loss": 3.0417, "step": 40256 }, { "epoch": 1.97, "grad_norm": 0.6478363275527954, "learning_rate": 0.00015743283541486673, "loss": 2.8345, "step": 40257 }, { "epoch": 1.97, "grad_norm": 0.7030870914459229, "learning_rate": 0.00015741928903490954, "loss": 2.8857, "step": 40258 }, { "epoch": 1.97, "grad_norm": 0.6763714551925659, "learning_rate": 0.0001574057430304826, "loss": 3.0909, "step": 40259 }, { "epoch": 1.97, "grad_norm": 0.6961510181427002, "learning_rate": 0.0001573921974016215, "loss": 2.933, "step": 40260 }, { "epoch": 1.97, "grad_norm": 0.6534474492073059, "learning_rate": 0.00015737865214836193, "loss": 3.1245, "step": 40261 }, { "epoch": 1.97, "grad_norm": 0.6820818185806274, "learning_rate": 0.0001573651072707394, "loss": 3.09, "step": 40262 }, { "epoch": 1.97, "grad_norm": 0.6295596361160278, "learning_rate": 0.00015735156276878976, "loss": 3.1183, "step": 40263 }, { "epoch": 1.97, "grad_norm": 0.6853962540626526, "learning_rate": 0.00015733801864254874, "loss": 3.0549, "step": 40264 }, { "epoch": 1.97, "grad_norm": 0.7549799084663391, "learning_rate": 0.0001573244748920518, "loss": 3.0955, "step": 40265 }, { "epoch": 1.97, "grad_norm": 0.6778276562690735, "learning_rate": 0.00015731093151733484, "loss": 2.9153, "step": 40266 }, { "epoch": 1.97, "grad_norm": 0.6821315884590149, "learning_rate": 0.00015729738851843344, "loss": 3.0041, "step": 40267 }, { "epoch": 1.97, "grad_norm": 0.7044394016265869, "learning_rate": 0.00015728384589538315, "loss": 2.8476, "step": 40268 }, { "epoch": 1.97, "grad_norm": 0.6207550764083862, "learning_rate": 0.00015727030364821984, "loss": 3.1545, "step": 40269 }, { "epoch": 1.97, "grad_norm": 0.6819803714752197, "learning_rate": 0.000157256761776979, "loss": 3.0553, "step": 40270 }, { "epoch": 1.97, "grad_norm": 0.6631947159767151, "learning_rate": 0.00015724322028169645, "loss": 2.9441, "step": 40271 }, { "epoch": 1.97, "grad_norm": 0.6724246144294739, "learning_rate": 0.0001572296791624077, "loss": 3.1259, "step": 40272 }, { "epoch": 1.97, "grad_norm": 0.6821426749229431, "learning_rate": 0.00015721613841914852, "loss": 3.0238, "step": 40273 }, { "epoch": 1.97, "grad_norm": 0.6199890971183777, "learning_rate": 0.0001572025980519546, "loss": 3.0075, "step": 40274 }, { "epoch": 1.97, "grad_norm": 0.637385368347168, "learning_rate": 0.00015718905806086158, "loss": 2.9653, "step": 40275 }, { "epoch": 1.97, "grad_norm": 0.6946399211883545, "learning_rate": 0.00015717551844590512, "loss": 3.0097, "step": 40276 }, { "epoch": 1.97, "grad_norm": 0.6899637579917908, "learning_rate": 0.00015716197920712072, "loss": 2.9472, "step": 40277 }, { "epoch": 1.97, "grad_norm": 0.6917797923088074, "learning_rate": 0.00015714844034454432, "loss": 2.9553, "step": 40278 }, { "epoch": 1.97, "grad_norm": 0.6806402206420898, "learning_rate": 0.00015713490185821128, "loss": 3.078, "step": 40279 }, { "epoch": 1.97, "grad_norm": 0.6591494679450989, "learning_rate": 0.00015712136374815744, "loss": 3.1675, "step": 40280 }, { "epoch": 1.97, "grad_norm": 0.632714033126831, "learning_rate": 0.00015710782601441854, "loss": 2.9648, "step": 40281 }, { "epoch": 1.97, "grad_norm": 0.6618544459342957, "learning_rate": 0.00015709428865703012, "loss": 3.0405, "step": 40282 }, { "epoch": 1.97, "grad_norm": 0.6480012536048889, "learning_rate": 0.00015708075167602785, "loss": 2.8349, "step": 40283 }, { "epoch": 1.97, "grad_norm": 0.6463007926940918, "learning_rate": 0.00015706721507144727, "loss": 3.0346, "step": 40284 }, { "epoch": 1.97, "grad_norm": 0.652336597442627, "learning_rate": 0.00015705367884332413, "loss": 3.1047, "step": 40285 }, { "epoch": 1.97, "grad_norm": 0.6886484026908875, "learning_rate": 0.0001570401429916942, "loss": 2.8904, "step": 40286 }, { "epoch": 1.97, "grad_norm": 0.632122814655304, "learning_rate": 0.00015702660751659288, "loss": 3.155, "step": 40287 }, { "epoch": 1.97, "grad_norm": 0.7023600339889526, "learning_rate": 0.00015701307241805613, "loss": 2.8844, "step": 40288 }, { "epoch": 1.97, "grad_norm": 0.6865475177764893, "learning_rate": 0.00015699953769611938, "loss": 3.0419, "step": 40289 }, { "epoch": 1.97, "grad_norm": 0.676014244556427, "learning_rate": 0.0001569860033508182, "loss": 2.9082, "step": 40290 }, { "epoch": 1.97, "grad_norm": 0.6413493752479553, "learning_rate": 0.00015697246938218854, "loss": 2.8572, "step": 40291 }, { "epoch": 1.97, "grad_norm": 0.6815982460975647, "learning_rate": 0.00015695893579026576, "loss": 3.0746, "step": 40292 }, { "epoch": 1.97, "grad_norm": 0.6308925151824951, "learning_rate": 0.00015694540257508565, "loss": 2.9461, "step": 40293 }, { "epoch": 1.97, "grad_norm": 0.7394782304763794, "learning_rate": 0.00015693186973668376, "loss": 2.8717, "step": 40294 }, { "epoch": 1.97, "grad_norm": 0.656547486782074, "learning_rate": 0.00015691833727509577, "loss": 2.8996, "step": 40295 }, { "epoch": 1.97, "grad_norm": 0.6732604503631592, "learning_rate": 0.00015690480519035745, "loss": 3.065, "step": 40296 }, { "epoch": 1.97, "grad_norm": 0.6596916317939758, "learning_rate": 0.00015689127348250434, "loss": 3.0307, "step": 40297 }, { "epoch": 1.97, "grad_norm": 0.7515655159950256, "learning_rate": 0.0001568777421515721, "loss": 2.9068, "step": 40298 }, { "epoch": 1.97, "grad_norm": 0.6503599286079407, "learning_rate": 0.0001568642111975962, "loss": 2.922, "step": 40299 }, { "epoch": 1.98, "grad_norm": 0.6916700601577759, "learning_rate": 0.00015685068062061243, "loss": 2.9162, "step": 40300 }, { "epoch": 1.98, "grad_norm": 0.6582286357879639, "learning_rate": 0.00015683715042065654, "loss": 2.8721, "step": 40301 }, { "epoch": 1.98, "grad_norm": 0.6865876317024231, "learning_rate": 0.00015682362059776393, "loss": 3.0664, "step": 40302 }, { "epoch": 1.98, "grad_norm": 0.7114717960357666, "learning_rate": 0.00015681009115197048, "loss": 3.087, "step": 40303 }, { "epoch": 1.98, "grad_norm": 0.6616129875183105, "learning_rate": 0.00015679656208331165, "loss": 3.0272, "step": 40304 }, { "epoch": 1.98, "grad_norm": 0.6594287753105164, "learning_rate": 0.00015678303339182305, "loss": 2.9298, "step": 40305 }, { "epoch": 1.98, "grad_norm": 0.7083821892738342, "learning_rate": 0.00015676950507754044, "loss": 3.2278, "step": 40306 }, { "epoch": 1.98, "grad_norm": 0.6950168013572693, "learning_rate": 0.00015675597714049925, "loss": 2.9107, "step": 40307 }, { "epoch": 1.98, "grad_norm": 0.7014042139053345, "learning_rate": 0.00015674244958073543, "loss": 2.892, "step": 40308 }, { "epoch": 1.98, "grad_norm": 0.7078503370285034, "learning_rate": 0.00015672892239828432, "loss": 2.8923, "step": 40309 }, { "epoch": 1.98, "grad_norm": 0.6410806179046631, "learning_rate": 0.00015671539559318162, "loss": 3.0353, "step": 40310 }, { "epoch": 1.98, "grad_norm": 0.6679813265800476, "learning_rate": 0.00015670186916546318, "loss": 2.9274, "step": 40311 }, { "epoch": 1.98, "grad_norm": 0.6931245923042297, "learning_rate": 0.00015668834311516428, "loss": 2.9186, "step": 40312 }, { "epoch": 1.98, "grad_norm": 0.6886323094367981, "learning_rate": 0.00015667481744232082, "loss": 3.0251, "step": 40313 }, { "epoch": 1.98, "grad_norm": 0.6314588785171509, "learning_rate": 0.00015666129214696815, "loss": 3.0817, "step": 40314 }, { "epoch": 1.98, "grad_norm": 0.6671131253242493, "learning_rate": 0.00015664776722914205, "loss": 2.987, "step": 40315 }, { "epoch": 1.98, "grad_norm": 0.6521415114402771, "learning_rate": 0.00015663424268887828, "loss": 2.884, "step": 40316 }, { "epoch": 1.98, "grad_norm": 0.6604424118995667, "learning_rate": 0.00015662071852621222, "loss": 2.767, "step": 40317 }, { "epoch": 1.98, "grad_norm": 0.6475251317024231, "learning_rate": 0.00015660719474117968, "loss": 3.0083, "step": 40318 }, { "epoch": 1.98, "grad_norm": 0.6570774912834167, "learning_rate": 0.0001565936713338162, "loss": 3.0535, "step": 40319 }, { "epoch": 1.98, "grad_norm": 0.6820668578147888, "learning_rate": 0.00015658014830415728, "loss": 2.9305, "step": 40320 }, { "epoch": 1.98, "grad_norm": 0.6360170841217041, "learning_rate": 0.00015656662565223878, "loss": 3.0399, "step": 40321 }, { "epoch": 1.98, "grad_norm": 0.6604784727096558, "learning_rate": 0.00015655310337809604, "loss": 3.2472, "step": 40322 }, { "epoch": 1.98, "grad_norm": 0.694609522819519, "learning_rate": 0.00015653958148176492, "loss": 2.9491, "step": 40323 }, { "epoch": 1.98, "grad_norm": 0.6777598857879639, "learning_rate": 0.00015652605996328085, "loss": 3.0202, "step": 40324 }, { "epoch": 1.98, "grad_norm": 0.6691681742668152, "learning_rate": 0.00015651253882267965, "loss": 3.0187, "step": 40325 }, { "epoch": 1.98, "grad_norm": 0.6447973251342773, "learning_rate": 0.0001564990180599968, "loss": 3.2181, "step": 40326 }, { "epoch": 1.98, "grad_norm": 0.6347460746765137, "learning_rate": 0.00015648549767526777, "loss": 3.0601, "step": 40327 }, { "epoch": 1.98, "grad_norm": 0.6752090454101562, "learning_rate": 0.00015647197766852848, "loss": 2.9086, "step": 40328 }, { "epoch": 1.98, "grad_norm": 0.6812394261360168, "learning_rate": 0.00015645845803981425, "loss": 2.9608, "step": 40329 }, { "epoch": 1.98, "grad_norm": 0.7588211894035339, "learning_rate": 0.00015644493878916077, "loss": 3.021, "step": 40330 }, { "epoch": 1.98, "grad_norm": 0.6755926012992859, "learning_rate": 0.00015643141991660388, "loss": 3.0217, "step": 40331 }, { "epoch": 1.98, "grad_norm": 0.6614903211593628, "learning_rate": 0.00015641790142217882, "loss": 3.1655, "step": 40332 }, { "epoch": 1.98, "grad_norm": 0.6844372749328613, "learning_rate": 0.00015640438330592158, "loss": 2.9864, "step": 40333 }, { "epoch": 1.98, "grad_norm": 0.6596974730491638, "learning_rate": 0.00015639086556786747, "loss": 3.099, "step": 40334 }, { "epoch": 1.98, "grad_norm": 0.6417086720466614, "learning_rate": 0.00015637734820805212, "loss": 3.0785, "step": 40335 }, { "epoch": 1.98, "grad_norm": 0.6517664194107056, "learning_rate": 0.00015636383122651125, "loss": 2.8524, "step": 40336 }, { "epoch": 1.98, "grad_norm": 0.7032948136329651, "learning_rate": 0.00015635031462328034, "loss": 3.0513, "step": 40337 }, { "epoch": 1.98, "grad_norm": 0.666313886642456, "learning_rate": 0.0001563367983983952, "loss": 3.2258, "step": 40338 }, { "epoch": 1.98, "grad_norm": 0.6794551610946655, "learning_rate": 0.0001563232825518911, "loss": 3.0646, "step": 40339 }, { "epoch": 1.98, "grad_norm": 0.6857566833496094, "learning_rate": 0.00015630976708380396, "loss": 3.1632, "step": 40340 }, { "epoch": 1.98, "grad_norm": 0.6492341756820679, "learning_rate": 0.00015629625199416926, "loss": 3.1526, "step": 40341 }, { "epoch": 1.98, "grad_norm": 0.6422709226608276, "learning_rate": 0.00015628273728302241, "loss": 3.0216, "step": 40342 }, { "epoch": 1.98, "grad_norm": 0.677402138710022, "learning_rate": 0.00015626922295039936, "loss": 2.9903, "step": 40343 }, { "epoch": 1.98, "grad_norm": 0.6274300217628479, "learning_rate": 0.00015625570899633533, "loss": 2.9704, "step": 40344 }, { "epoch": 1.98, "grad_norm": 0.6816664934158325, "learning_rate": 0.00015624219542086613, "loss": 3.1336, "step": 40345 }, { "epoch": 1.98, "grad_norm": 0.719647228717804, "learning_rate": 0.00015622868222402747, "loss": 3.0212, "step": 40346 }, { "epoch": 1.98, "grad_norm": 0.7077556252479553, "learning_rate": 0.00015621516940585474, "loss": 2.945, "step": 40347 }, { "epoch": 1.98, "grad_norm": 0.7448094487190247, "learning_rate": 0.0001562016569663836, "loss": 2.854, "step": 40348 }, { "epoch": 1.98, "grad_norm": 0.6531862616539001, "learning_rate": 0.0001561881449056495, "loss": 3.0116, "step": 40349 }, { "epoch": 1.98, "grad_norm": 0.6494091749191284, "learning_rate": 0.00015617463322368814, "loss": 2.9924, "step": 40350 }, { "epoch": 1.98, "grad_norm": 0.6503402590751648, "learning_rate": 0.00015616112192053526, "loss": 3.1069, "step": 40351 }, { "epoch": 1.98, "grad_norm": 0.7476397752761841, "learning_rate": 0.00015614761099622617, "loss": 2.9541, "step": 40352 }, { "epoch": 1.98, "grad_norm": 0.6769456267356873, "learning_rate": 0.0001561341004507967, "loss": 2.9051, "step": 40353 }, { "epoch": 1.98, "grad_norm": 0.6509860157966614, "learning_rate": 0.00015612059028428224, "loss": 2.997, "step": 40354 }, { "epoch": 1.98, "grad_norm": 0.6968262195587158, "learning_rate": 0.0001561070804967185, "loss": 2.9891, "step": 40355 }, { "epoch": 1.98, "grad_norm": 0.6519584655761719, "learning_rate": 0.0001560935710881411, "loss": 3.1037, "step": 40356 }, { "epoch": 1.98, "grad_norm": 0.6796095967292786, "learning_rate": 0.00015608006205858539, "loss": 2.8397, "step": 40357 }, { "epoch": 1.98, "grad_norm": 0.6914886832237244, "learning_rate": 0.00015606655340808718, "loss": 3.1213, "step": 40358 }, { "epoch": 1.98, "grad_norm": 0.6436863541603088, "learning_rate": 0.00015605304513668187, "loss": 3.0416, "step": 40359 }, { "epoch": 1.98, "grad_norm": 0.7205463647842407, "learning_rate": 0.00015603953724440528, "loss": 3.1417, "step": 40360 }, { "epoch": 1.98, "grad_norm": 0.6410875916481018, "learning_rate": 0.00015602602973129273, "loss": 2.9679, "step": 40361 }, { "epoch": 1.98, "grad_norm": 0.6640878915786743, "learning_rate": 0.00015601252259737997, "loss": 3.0664, "step": 40362 }, { "epoch": 1.98, "grad_norm": 0.6714800000190735, "learning_rate": 0.00015599901584270257, "loss": 2.942, "step": 40363 }, { "epoch": 1.98, "grad_norm": 0.6468148231506348, "learning_rate": 0.0001559855094672959, "loss": 2.7303, "step": 40364 }, { "epoch": 1.98, "grad_norm": 0.6648983955383301, "learning_rate": 0.00015597200347119581, "loss": 3.0801, "step": 40365 }, { "epoch": 1.98, "grad_norm": 0.784553050994873, "learning_rate": 0.00015595849785443762, "loss": 2.8801, "step": 40366 }, { "epoch": 1.98, "grad_norm": 0.6907336711883545, "learning_rate": 0.00015594499261705703, "loss": 2.9111, "step": 40367 }, { "epoch": 1.98, "grad_norm": 0.6566920876502991, "learning_rate": 0.00015593148775908973, "loss": 2.9668, "step": 40368 }, { "epoch": 1.98, "grad_norm": 0.6529011130332947, "learning_rate": 0.00015591798328057113, "loss": 2.9668, "step": 40369 }, { "epoch": 1.98, "grad_norm": 0.6959037184715271, "learning_rate": 0.00015590447918153687, "loss": 3.1346, "step": 40370 }, { "epoch": 1.98, "grad_norm": 0.6820028424263, "learning_rate": 0.00015589097546202235, "loss": 2.9913, "step": 40371 }, { "epoch": 1.98, "grad_norm": 0.688714325428009, "learning_rate": 0.0001558774721220633, "loss": 3.0301, "step": 40372 }, { "epoch": 1.98, "grad_norm": 0.6635189056396484, "learning_rate": 0.00015586396916169532, "loss": 3.0752, "step": 40373 }, { "epoch": 1.98, "grad_norm": 0.6321013569831848, "learning_rate": 0.00015585046658095383, "loss": 3.0512, "step": 40374 }, { "epoch": 1.98, "grad_norm": 0.6665242910385132, "learning_rate": 0.00015583696437987459, "loss": 3.0112, "step": 40375 }, { "epoch": 1.98, "grad_norm": 0.6853665709495544, "learning_rate": 0.00015582346255849287, "loss": 3.0421, "step": 40376 }, { "epoch": 1.98, "grad_norm": 0.6680829524993896, "learning_rate": 0.00015580996111684456, "loss": 3.0632, "step": 40377 }, { "epoch": 1.98, "grad_norm": 0.6497752070426941, "learning_rate": 0.00015579646005496505, "loss": 2.9086, "step": 40378 }, { "epoch": 1.98, "grad_norm": 0.6857454776763916, "learning_rate": 0.00015578295937288979, "loss": 2.7439, "step": 40379 }, { "epoch": 1.98, "grad_norm": 0.6618149280548096, "learning_rate": 0.00015576945907065456, "loss": 2.9654, "step": 40380 }, { "epoch": 1.98, "grad_norm": 0.6831642389297485, "learning_rate": 0.00015575595914829474, "loss": 3.0545, "step": 40381 }, { "epoch": 1.98, "grad_norm": 0.6822869777679443, "learning_rate": 0.00015574245960584595, "loss": 3.1841, "step": 40382 }, { "epoch": 1.98, "grad_norm": 0.6751419305801392, "learning_rate": 0.00015572896044334386, "loss": 2.9026, "step": 40383 }, { "epoch": 1.98, "grad_norm": 0.7170292735099792, "learning_rate": 0.00015571546166082397, "loss": 3.2591, "step": 40384 }, { "epoch": 1.98, "grad_norm": 0.6834322214126587, "learning_rate": 0.00015570196325832175, "loss": 2.9228, "step": 40385 }, { "epoch": 1.98, "grad_norm": 0.675579309463501, "learning_rate": 0.00015568846523587263, "loss": 3.0748, "step": 40386 }, { "epoch": 1.98, "grad_norm": 0.694878101348877, "learning_rate": 0.00015567496759351238, "loss": 3.1112, "step": 40387 }, { "epoch": 1.98, "grad_norm": 0.6923894882202148, "learning_rate": 0.00015566147033127656, "loss": 3.0667, "step": 40388 }, { "epoch": 1.98, "grad_norm": 0.7370680570602417, "learning_rate": 0.00015564797344920058, "loss": 3.0021, "step": 40389 }, { "epoch": 1.98, "grad_norm": 0.7134600281715393, "learning_rate": 0.00015563447694732016, "loss": 3.0047, "step": 40390 }, { "epoch": 1.98, "grad_norm": 0.6903583407402039, "learning_rate": 0.00015562098082567071, "loss": 3.044, "step": 40391 }, { "epoch": 1.98, "grad_norm": 0.6875481605529785, "learning_rate": 0.00015560748508428775, "loss": 2.8929, "step": 40392 }, { "epoch": 1.98, "grad_norm": 0.6734123826026917, "learning_rate": 0.00015559398972320694, "loss": 2.7966, "step": 40393 }, { "epoch": 1.98, "grad_norm": 0.6842463612556458, "learning_rate": 0.0001555804947424637, "loss": 2.912, "step": 40394 }, { "epoch": 1.98, "grad_norm": 0.6658815741539001, "learning_rate": 0.00015556700014209376, "loss": 3.0623, "step": 40395 }, { "epoch": 1.98, "grad_norm": 0.6950148344039917, "learning_rate": 0.00015555350592213237, "loss": 2.833, "step": 40396 }, { "epoch": 1.98, "grad_norm": 0.6506776809692383, "learning_rate": 0.0001555400120826153, "loss": 2.842, "step": 40397 }, { "epoch": 1.98, "grad_norm": 0.6653673052787781, "learning_rate": 0.00015552651862357818, "loss": 3.0188, "step": 40398 }, { "epoch": 1.98, "grad_norm": 0.6891602873802185, "learning_rate": 0.00015551302554505633, "loss": 2.92, "step": 40399 }, { "epoch": 1.98, "grad_norm": 0.6852928400039673, "learning_rate": 0.00015549953284708543, "loss": 3.1038, "step": 40400 }, { "epoch": 1.98, "grad_norm": 0.6566252708435059, "learning_rate": 0.00015548604052970078, "loss": 3.0613, "step": 40401 }, { "epoch": 1.98, "grad_norm": 0.7153254151344299, "learning_rate": 0.00015547254859293812, "loss": 2.9753, "step": 40402 }, { "epoch": 1.98, "grad_norm": 0.6642997860908508, "learning_rate": 0.00015545905703683304, "loss": 2.9404, "step": 40403 }, { "epoch": 1.98, "grad_norm": 0.6532772779464722, "learning_rate": 0.0001554455658614209, "loss": 2.8544, "step": 40404 }, { "epoch": 1.98, "grad_norm": 0.6967506408691406, "learning_rate": 0.00015543207506673744, "loss": 2.8222, "step": 40405 }, { "epoch": 1.98, "grad_norm": 0.6587045788764954, "learning_rate": 0.00015541858465281807, "loss": 2.8928, "step": 40406 }, { "epoch": 1.98, "grad_norm": 0.668938398361206, "learning_rate": 0.00015540509461969818, "loss": 3.1886, "step": 40407 }, { "epoch": 1.98, "grad_norm": 0.6699063181877136, "learning_rate": 0.00015539160496741357, "loss": 3.013, "step": 40408 }, { "epoch": 1.98, "grad_norm": 0.6921393275260925, "learning_rate": 0.00015537811569599954, "loss": 2.9297, "step": 40409 }, { "epoch": 1.98, "grad_norm": 0.7287542223930359, "learning_rate": 0.0001553646268054918, "loss": 3.1772, "step": 40410 }, { "epoch": 1.98, "grad_norm": 0.6881520748138428, "learning_rate": 0.0001553511382959257, "loss": 2.9042, "step": 40411 }, { "epoch": 1.98, "grad_norm": 0.6746994256973267, "learning_rate": 0.000155337650167337, "loss": 3.0089, "step": 40412 }, { "epoch": 1.98, "grad_norm": 0.6475083827972412, "learning_rate": 0.00015532416241976108, "loss": 2.9789, "step": 40413 }, { "epoch": 1.98, "grad_norm": 0.7340616583824158, "learning_rate": 0.00015531067505323334, "loss": 2.9834, "step": 40414 }, { "epoch": 1.98, "grad_norm": 0.6629500389099121, "learning_rate": 0.00015529718806778955, "loss": 3.1256, "step": 40415 }, { "epoch": 1.98, "grad_norm": 0.6916855573654175, "learning_rate": 0.00015528370146346504, "loss": 2.9015, "step": 40416 }, { "epoch": 1.98, "grad_norm": 0.6919158697128296, "learning_rate": 0.0001552702152402954, "loss": 3.1538, "step": 40417 }, { "epoch": 1.98, "grad_norm": 0.6587050557136536, "learning_rate": 0.00015525672939831623, "loss": 2.9421, "step": 40418 }, { "epoch": 1.98, "grad_norm": 0.6316267848014832, "learning_rate": 0.00015524324393756292, "loss": 2.9828, "step": 40419 }, { "epoch": 1.98, "grad_norm": 0.6622289419174194, "learning_rate": 0.0001552297588580711, "loss": 2.9457, "step": 40420 }, { "epoch": 1.98, "grad_norm": 0.656522274017334, "learning_rate": 0.0001552162741598763, "loss": 2.9718, "step": 40421 }, { "epoch": 1.98, "grad_norm": 0.6395564079284668, "learning_rate": 0.0001552027898430138, "loss": 3.034, "step": 40422 }, { "epoch": 1.98, "grad_norm": 0.6881017684936523, "learning_rate": 0.00015518930590751942, "loss": 3.075, "step": 40423 }, { "epoch": 1.98, "grad_norm": 0.7126860022544861, "learning_rate": 0.0001551758223534284, "loss": 2.8778, "step": 40424 }, { "epoch": 1.98, "grad_norm": 0.6271448135375977, "learning_rate": 0.00015516233918077657, "loss": 3.032, "step": 40425 }, { "epoch": 1.98, "grad_norm": 0.6470649838447571, "learning_rate": 0.0001551488563895991, "loss": 2.8064, "step": 40426 }, { "epoch": 1.98, "grad_norm": 0.6680401563644409, "learning_rate": 0.0001551353739799318, "loss": 2.9065, "step": 40427 }, { "epoch": 1.98, "grad_norm": 0.716007649898529, "learning_rate": 0.00015512189195181003, "loss": 3.0163, "step": 40428 }, { "epoch": 1.98, "grad_norm": 0.6913284063339233, "learning_rate": 0.0001551084103052692, "loss": 3.1034, "step": 40429 }, { "epoch": 1.98, "grad_norm": 0.689062774181366, "learning_rate": 0.00015509492904034505, "loss": 3.1922, "step": 40430 }, { "epoch": 1.98, "grad_norm": 0.6667220592498779, "learning_rate": 0.0001550814481570729, "loss": 2.9831, "step": 40431 }, { "epoch": 1.98, "grad_norm": 0.665783166885376, "learning_rate": 0.00015506796765548823, "loss": 2.9346, "step": 40432 }, { "epoch": 1.98, "grad_norm": 0.6425278186798096, "learning_rate": 0.00015505448753562682, "loss": 2.8453, "step": 40433 }, { "epoch": 1.98, "grad_norm": 0.6473648548126221, "learning_rate": 0.00015504100779752385, "loss": 2.9768, "step": 40434 }, { "epoch": 1.98, "grad_norm": 0.6833198070526123, "learning_rate": 0.00015502752844121512, "loss": 2.8831, "step": 40435 }, { "epoch": 1.98, "grad_norm": 0.9269675016403198, "learning_rate": 0.00015501404946673597, "loss": 3.0654, "step": 40436 }, { "epoch": 1.98, "grad_norm": 0.7202673554420471, "learning_rate": 0.0001550005708741219, "loss": 3.1508, "step": 40437 }, { "epoch": 1.98, "grad_norm": 0.6838274598121643, "learning_rate": 0.0001549870926634083, "loss": 2.8832, "step": 40438 }, { "epoch": 1.98, "grad_norm": 0.6578678488731384, "learning_rate": 0.0001549736148346308, "loss": 3.1325, "step": 40439 }, { "epoch": 1.98, "grad_norm": 0.6588073968887329, "learning_rate": 0.00015496013738782501, "loss": 2.9813, "step": 40440 }, { "epoch": 1.98, "grad_norm": 0.6366355419158936, "learning_rate": 0.00015494666032302617, "loss": 2.9525, "step": 40441 }, { "epoch": 1.98, "grad_norm": 0.6619384288787842, "learning_rate": 0.00015493318364027007, "loss": 2.9329, "step": 40442 }, { "epoch": 1.98, "grad_norm": 0.6706056594848633, "learning_rate": 0.00015491970733959198, "loss": 2.832, "step": 40443 }, { "epoch": 1.98, "grad_norm": 0.638305127620697, "learning_rate": 0.00015490623142102737, "loss": 2.9308, "step": 40444 }, { "epoch": 1.98, "grad_norm": 0.6442788243293762, "learning_rate": 0.00015489275588461196, "loss": 3.0546, "step": 40445 }, { "epoch": 1.98, "grad_norm": 0.6786911487579346, "learning_rate": 0.00015487928073038094, "loss": 3.0076, "step": 40446 }, { "epoch": 1.98, "grad_norm": 0.6867267489433289, "learning_rate": 0.00015486580595837013, "loss": 2.8399, "step": 40447 }, { "epoch": 1.98, "grad_norm": 0.6981363296508789, "learning_rate": 0.0001548523315686147, "loss": 2.8704, "step": 40448 }, { "epoch": 1.98, "grad_norm": 0.6754472851753235, "learning_rate": 0.00015483885756115045, "loss": 3.1064, "step": 40449 }, { "epoch": 1.98, "grad_norm": 0.7413714528083801, "learning_rate": 0.0001548253839360127, "loss": 2.9747, "step": 40450 }, { "epoch": 1.98, "grad_norm": 0.6990171670913696, "learning_rate": 0.0001548119106932368, "loss": 2.9777, "step": 40451 }, { "epoch": 1.98, "grad_norm": 0.7089271545410156, "learning_rate": 0.00015479843783285856, "loss": 2.9411, "step": 40452 }, { "epoch": 1.98, "grad_norm": 0.6640129089355469, "learning_rate": 0.00015478496535491315, "loss": 3.1461, "step": 40453 }, { "epoch": 1.98, "grad_norm": 0.6691185832023621, "learning_rate": 0.00015477149325943617, "loss": 3.0974, "step": 40454 }, { "epoch": 1.98, "grad_norm": 0.6627618074417114, "learning_rate": 0.00015475802154646327, "loss": 2.9525, "step": 40455 }, { "epoch": 1.98, "grad_norm": 0.7101806998252869, "learning_rate": 0.00015474455021602967, "loss": 2.8795, "step": 40456 }, { "epoch": 1.98, "grad_norm": 0.6481982469558716, "learning_rate": 0.00015473107926817107, "loss": 3.1016, "step": 40457 }, { "epoch": 1.98, "grad_norm": 0.6829726696014404, "learning_rate": 0.00015471760870292285, "loss": 3.1465, "step": 40458 }, { "epoch": 1.98, "grad_norm": 0.708656370639801, "learning_rate": 0.00015470413852032038, "loss": 2.91, "step": 40459 }, { "epoch": 1.98, "grad_norm": 0.6740055084228516, "learning_rate": 0.0001546906687203994, "loss": 2.9464, "step": 40460 }, { "epoch": 1.98, "grad_norm": 0.6535217761993408, "learning_rate": 0.00015467719930319508, "loss": 3.3205, "step": 40461 }, { "epoch": 1.98, "grad_norm": 0.656358540058136, "learning_rate": 0.00015466373026874313, "loss": 3.0558, "step": 40462 }, { "epoch": 1.98, "grad_norm": 0.6297536492347717, "learning_rate": 0.00015465026161707888, "loss": 3.3642, "step": 40463 }, { "epoch": 1.98, "grad_norm": 0.6395627856254578, "learning_rate": 0.00015463679334823795, "loss": 2.8474, "step": 40464 }, { "epoch": 1.98, "grad_norm": 0.6379209756851196, "learning_rate": 0.00015462332546225578, "loss": 3.1087, "step": 40465 }, { "epoch": 1.98, "grad_norm": 0.7360820770263672, "learning_rate": 0.00015460985795916766, "loss": 2.8954, "step": 40466 }, { "epoch": 1.98, "grad_norm": 0.6527162790298462, "learning_rate": 0.00015459639083900928, "loss": 3.1293, "step": 40467 }, { "epoch": 1.98, "grad_norm": 0.6693764328956604, "learning_rate": 0.0001545829241018159, "loss": 2.9546, "step": 40468 }, { "epoch": 1.98, "grad_norm": 0.7010257244110107, "learning_rate": 0.00015456945774762316, "loss": 2.8822, "step": 40469 }, { "epoch": 1.98, "grad_norm": 0.6681190133094788, "learning_rate": 0.00015455599177646655, "loss": 2.9926, "step": 40470 }, { "epoch": 1.98, "grad_norm": 0.7095162868499756, "learning_rate": 0.0001545425261883815, "loss": 3.2781, "step": 40471 }, { "epoch": 1.98, "grad_norm": 0.6761109828948975, "learning_rate": 0.0001545290609834034, "loss": 2.8825, "step": 40472 }, { "epoch": 1.98, "grad_norm": 0.6583576202392578, "learning_rate": 0.00015451559616156766, "loss": 2.9249, "step": 40473 }, { "epoch": 1.98, "grad_norm": 0.6421191692352295, "learning_rate": 0.0001545021317229099, "loss": 2.8429, "step": 40474 }, { "epoch": 1.98, "grad_norm": 0.6621305346488953, "learning_rate": 0.00015448866766746552, "loss": 2.9998, "step": 40475 }, { "epoch": 1.98, "grad_norm": 0.708996057510376, "learning_rate": 0.00015447520399526998, "loss": 2.8448, "step": 40476 }, { "epoch": 1.98, "grad_norm": 0.6437864899635315, "learning_rate": 0.00015446174070635882, "loss": 2.9074, "step": 40477 }, { "epoch": 1.98, "grad_norm": 0.6887125372886658, "learning_rate": 0.00015444827780076726, "loss": 3.0311, "step": 40478 }, { "epoch": 1.98, "grad_norm": 0.6327663660049438, "learning_rate": 0.00015443481527853113, "loss": 3.1015, "step": 40479 }, { "epoch": 1.98, "grad_norm": 0.6891899704933167, "learning_rate": 0.00015442135313968564, "loss": 2.9709, "step": 40480 }, { "epoch": 1.98, "grad_norm": 0.6350082159042358, "learning_rate": 0.00015440789138426614, "loss": 3.0892, "step": 40481 }, { "epoch": 1.98, "grad_norm": 0.6630797386169434, "learning_rate": 0.00015439443001230842, "loss": 3.0736, "step": 40482 }, { "epoch": 1.98, "grad_norm": 0.6615853309631348, "learning_rate": 0.0001543809690238476, "loss": 3.1135, "step": 40483 }, { "epoch": 1.98, "grad_norm": 0.69512939453125, "learning_rate": 0.00015436750841891925, "loss": 3.0358, "step": 40484 }, { "epoch": 1.98, "grad_norm": 0.7393690943717957, "learning_rate": 0.000154354048197559, "loss": 2.8721, "step": 40485 }, { "epoch": 1.98, "grad_norm": 0.6448101997375488, "learning_rate": 0.0001543405883598022, "loss": 2.8879, "step": 40486 }, { "epoch": 1.98, "grad_norm": 0.6300211548805237, "learning_rate": 0.00015432712890568422, "loss": 2.9113, "step": 40487 }, { "epoch": 1.98, "grad_norm": 0.7020927667617798, "learning_rate": 0.00015431366983524041, "loss": 2.9078, "step": 40488 }, { "epoch": 1.98, "grad_norm": 0.717994213104248, "learning_rate": 0.00015430021114850635, "loss": 2.9872, "step": 40489 }, { "epoch": 1.98, "grad_norm": 0.6843698620796204, "learning_rate": 0.00015428675284551765, "loss": 2.9769, "step": 40490 }, { "epoch": 1.98, "grad_norm": 0.6766223907470703, "learning_rate": 0.00015427329492630944, "loss": 2.8667, "step": 40491 }, { "epoch": 1.98, "grad_norm": 0.6386144161224365, "learning_rate": 0.00015425983739091747, "loss": 2.972, "step": 40492 }, { "epoch": 1.98, "grad_norm": 0.6815845966339111, "learning_rate": 0.00015424638023937707, "loss": 3.0052, "step": 40493 }, { "epoch": 1.98, "grad_norm": 0.6865285038948059, "learning_rate": 0.00015423292347172348, "loss": 2.8614, "step": 40494 }, { "epoch": 1.98, "grad_norm": 0.8084060549736023, "learning_rate": 0.00015421946708799248, "loss": 2.7633, "step": 40495 }, { "epoch": 1.98, "grad_norm": 0.6883880496025085, "learning_rate": 0.00015420601108821918, "loss": 3.018, "step": 40496 }, { "epoch": 1.98, "grad_norm": 0.6464810967445374, "learning_rate": 0.00015419255547243936, "loss": 2.8539, "step": 40497 }, { "epoch": 1.98, "grad_norm": 0.6759091019630432, "learning_rate": 0.00015417910024068815, "loss": 2.9933, "step": 40498 }, { "epoch": 1.98, "grad_norm": 0.7044882774353027, "learning_rate": 0.0001541656453930011, "loss": 3.1197, "step": 40499 }, { "epoch": 1.98, "grad_norm": 0.6467495560646057, "learning_rate": 0.0001541521909294138, "loss": 2.8981, "step": 40500 }, { "epoch": 1.98, "grad_norm": 0.6994920372962952, "learning_rate": 0.00015413873684996156, "loss": 3.0469, "step": 40501 }, { "epoch": 1.98, "grad_norm": 0.6590608954429626, "learning_rate": 0.00015412528315467987, "loss": 2.7399, "step": 40502 }, { "epoch": 1.98, "grad_norm": 0.6317247748374939, "learning_rate": 0.0001541118298436039, "loss": 2.947, "step": 40503 }, { "epoch": 1.99, "grad_norm": 0.7404329180717468, "learning_rate": 0.0001540983769167694, "loss": 2.9812, "step": 40504 }, { "epoch": 1.99, "grad_norm": 0.6499910950660706, "learning_rate": 0.00015408492437421175, "loss": 2.9587, "step": 40505 }, { "epoch": 1.99, "grad_norm": 0.6785842180252075, "learning_rate": 0.0001540714722159662, "loss": 2.9186, "step": 40506 }, { "epoch": 1.99, "grad_norm": 0.7388772368431091, "learning_rate": 0.00015405802044206844, "loss": 2.8404, "step": 40507 }, { "epoch": 1.99, "grad_norm": 0.6410542726516724, "learning_rate": 0.00015404456905255377, "loss": 2.897, "step": 40508 }, { "epoch": 1.99, "grad_norm": 0.6782169342041016, "learning_rate": 0.0001540311180474575, "loss": 3.1239, "step": 40509 }, { "epoch": 1.99, "grad_norm": 0.6322621703147888, "learning_rate": 0.00015401766742681528, "loss": 2.882, "step": 40510 }, { "epoch": 1.99, "grad_norm": 0.6843157410621643, "learning_rate": 0.0001540042171906623, "loss": 2.9253, "step": 40511 }, { "epoch": 1.99, "grad_norm": 0.6755958795547485, "learning_rate": 0.00015399076733903425, "loss": 2.7574, "step": 40512 }, { "epoch": 1.99, "grad_norm": 0.6259028911590576, "learning_rate": 0.00015397731787196632, "loss": 2.7801, "step": 40513 }, { "epoch": 1.99, "grad_norm": 0.6746711134910583, "learning_rate": 0.0001539638687894941, "loss": 2.9839, "step": 40514 }, { "epoch": 1.99, "grad_norm": 0.639659583568573, "learning_rate": 0.000153950420091653, "loss": 3.1256, "step": 40515 }, { "epoch": 1.99, "grad_norm": 0.6220987439155579, "learning_rate": 0.00015393697177847824, "loss": 2.8792, "step": 40516 }, { "epoch": 1.99, "grad_norm": 0.6759223937988281, "learning_rate": 0.0001539235238500055, "loss": 2.9302, "step": 40517 }, { "epoch": 1.99, "grad_norm": 0.6941930055618286, "learning_rate": 0.00015391007630627, "loss": 2.8693, "step": 40518 }, { "epoch": 1.99, "grad_norm": 0.6453860402107239, "learning_rate": 0.00015389662914730732, "loss": 2.9384, "step": 40519 }, { "epoch": 1.99, "grad_norm": 0.6453847885131836, "learning_rate": 0.0001538831823731527, "loss": 2.9717, "step": 40520 }, { "epoch": 1.99, "grad_norm": 0.6334241032600403, "learning_rate": 0.00015386973598384166, "loss": 2.8373, "step": 40521 }, { "epoch": 1.99, "grad_norm": 0.6756218075752258, "learning_rate": 0.00015385628997940972, "loss": 2.9777, "step": 40522 }, { "epoch": 1.99, "grad_norm": 0.6967214345932007, "learning_rate": 0.00015384284435989218, "loss": 2.9146, "step": 40523 }, { "epoch": 1.99, "grad_norm": 0.692237138748169, "learning_rate": 0.00015382939912532447, "loss": 2.9344, "step": 40524 }, { "epoch": 1.99, "grad_norm": 0.6731816530227661, "learning_rate": 0.00015381595427574189, "loss": 3.0359, "step": 40525 }, { "epoch": 1.99, "grad_norm": 0.6681119203567505, "learning_rate": 0.00015380250981117993, "loss": 2.9214, "step": 40526 }, { "epoch": 1.99, "grad_norm": 0.6464305520057678, "learning_rate": 0.00015378906573167413, "loss": 2.9634, "step": 40527 }, { "epoch": 1.99, "grad_norm": 0.6641029119491577, "learning_rate": 0.0001537756220372597, "loss": 3.0965, "step": 40528 }, { "epoch": 1.99, "grad_norm": 0.6519691944122314, "learning_rate": 0.00015376217872797228, "loss": 3.0628, "step": 40529 }, { "epoch": 1.99, "grad_norm": 0.7187452912330627, "learning_rate": 0.00015374873580384707, "loss": 3.1994, "step": 40530 }, { "epoch": 1.99, "grad_norm": 0.666256308555603, "learning_rate": 0.0001537352932649195, "loss": 2.9878, "step": 40531 }, { "epoch": 1.99, "grad_norm": 0.6944901347160339, "learning_rate": 0.0001537218511112251, "loss": 3.0209, "step": 40532 }, { "epoch": 1.99, "grad_norm": 0.7129674553871155, "learning_rate": 0.00015370840934279906, "loss": 3.0064, "step": 40533 }, { "epoch": 1.99, "grad_norm": 0.6722531914710999, "learning_rate": 0.00015369496795967705, "loss": 2.8155, "step": 40534 }, { "epoch": 1.99, "grad_norm": 0.6533281803131104, "learning_rate": 0.0001536815269618942, "loss": 2.9369, "step": 40535 }, { "epoch": 1.99, "grad_norm": 0.6529192328453064, "learning_rate": 0.00015366808634948605, "loss": 2.8809, "step": 40536 }, { "epoch": 1.99, "grad_norm": 0.6997420787811279, "learning_rate": 0.00015365464612248825, "loss": 3.0539, "step": 40537 }, { "epoch": 1.99, "grad_norm": 0.7066969275474548, "learning_rate": 0.00015364120628093573, "loss": 2.9932, "step": 40538 }, { "epoch": 1.99, "grad_norm": 0.6391503810882568, "learning_rate": 0.0001536277668248642, "loss": 2.9198, "step": 40539 }, { "epoch": 1.99, "grad_norm": 0.6769759058952332, "learning_rate": 0.00015361432775430885, "loss": 2.9239, "step": 40540 }, { "epoch": 1.99, "grad_norm": 0.6748432517051697, "learning_rate": 0.0001536008890693052, "loss": 3.0284, "step": 40541 }, { "epoch": 1.99, "grad_norm": 0.7206935882568359, "learning_rate": 0.00015358745076988873, "loss": 2.8153, "step": 40542 }, { "epoch": 1.99, "grad_norm": 0.6426916122436523, "learning_rate": 0.0001535740128560946, "loss": 2.8724, "step": 40543 }, { "epoch": 1.99, "grad_norm": 0.7100978493690491, "learning_rate": 0.00015356057532795855, "loss": 2.7467, "step": 40544 }, { "epoch": 1.99, "grad_norm": 0.6432538032531738, "learning_rate": 0.00015354713818551568, "loss": 2.8214, "step": 40545 }, { "epoch": 1.99, "grad_norm": 0.6779876351356506, "learning_rate": 0.00015353370142880132, "loss": 2.8472, "step": 40546 }, { "epoch": 1.99, "grad_norm": 0.6484413743019104, "learning_rate": 0.0001535202650578512, "loss": 2.9393, "step": 40547 }, { "epoch": 1.99, "grad_norm": 0.6245802640914917, "learning_rate": 0.00015350682907270033, "loss": 3.1935, "step": 40548 }, { "epoch": 1.99, "grad_norm": 0.6747826337814331, "learning_rate": 0.00015349339347338442, "loss": 3.0041, "step": 40549 }, { "epoch": 1.99, "grad_norm": 0.6607273817062378, "learning_rate": 0.0001534799582599386, "loss": 2.8044, "step": 40550 }, { "epoch": 1.99, "grad_norm": 0.6659206748008728, "learning_rate": 0.0001534665234323985, "loss": 2.9159, "step": 40551 }, { "epoch": 1.99, "grad_norm": 0.6552886962890625, "learning_rate": 0.00015345308899079936, "loss": 2.8379, "step": 40552 }, { "epoch": 1.99, "grad_norm": 0.6883482933044434, "learning_rate": 0.0001534396549351765, "loss": 2.922, "step": 40553 }, { "epoch": 1.99, "grad_norm": 0.6916592121124268, "learning_rate": 0.00015342622126556546, "loss": 2.9854, "step": 40554 }, { "epoch": 1.99, "grad_norm": 0.6768868565559387, "learning_rate": 0.00015341278798200141, "loss": 2.969, "step": 40555 }, { "epoch": 1.99, "grad_norm": 0.6460238695144653, "learning_rate": 0.0001533993550845199, "loss": 3.0113, "step": 40556 }, { "epoch": 1.99, "grad_norm": 0.7118356823921204, "learning_rate": 0.00015338592257315638, "loss": 2.9523, "step": 40557 }, { "epoch": 1.99, "grad_norm": 0.6956257224082947, "learning_rate": 0.00015337249044794602, "loss": 2.9509, "step": 40558 }, { "epoch": 1.99, "grad_norm": 0.702347457408905, "learning_rate": 0.0001533590587089244, "loss": 2.9354, "step": 40559 }, { "epoch": 1.99, "grad_norm": 0.6793229579925537, "learning_rate": 0.0001533456273561268, "loss": 2.9578, "step": 40560 }, { "epoch": 1.99, "grad_norm": 0.738370418548584, "learning_rate": 0.00015333219638958849, "loss": 3.0186, "step": 40561 }, { "epoch": 1.99, "grad_norm": 0.7036491632461548, "learning_rate": 0.00015331876580934505, "loss": 3.0268, "step": 40562 }, { "epoch": 1.99, "grad_norm": 0.7223859429359436, "learning_rate": 0.00015330533561543164, "loss": 2.8532, "step": 40563 }, { "epoch": 1.99, "grad_norm": 0.6854945421218872, "learning_rate": 0.00015329190580788387, "loss": 3.0173, "step": 40564 }, { "epoch": 1.99, "grad_norm": 0.6289524435997009, "learning_rate": 0.00015327847638673688, "loss": 2.9122, "step": 40565 }, { "epoch": 1.99, "grad_norm": 0.7152692675590515, "learning_rate": 0.00015326504735202622, "loss": 2.9671, "step": 40566 }, { "epoch": 1.99, "grad_norm": 0.6731276512145996, "learning_rate": 0.0001532516187037872, "loss": 3.0716, "step": 40567 }, { "epoch": 1.99, "grad_norm": 0.6647647619247437, "learning_rate": 0.00015323819044205507, "loss": 2.9302, "step": 40568 }, { "epoch": 1.99, "grad_norm": 0.7299318909645081, "learning_rate": 0.0001532247625668654, "loss": 2.6701, "step": 40569 }, { "epoch": 1.99, "grad_norm": 0.7116023302078247, "learning_rate": 0.0001532113350782534, "loss": 2.8473, "step": 40570 }, { "epoch": 1.99, "grad_norm": 0.6508423089981079, "learning_rate": 0.0001531979079762544, "loss": 3.0764, "step": 40571 }, { "epoch": 1.99, "grad_norm": 0.6736328601837158, "learning_rate": 0.00015318448126090407, "loss": 3.047, "step": 40572 }, { "epoch": 1.99, "grad_norm": 0.6313000917434692, "learning_rate": 0.00015317105493223746, "loss": 2.8944, "step": 40573 }, { "epoch": 1.99, "grad_norm": 0.6281668543815613, "learning_rate": 0.0001531576289902901, "loss": 2.7681, "step": 40574 }, { "epoch": 1.99, "grad_norm": 0.6827503442764282, "learning_rate": 0.00015314420343509716, "loss": 3.1714, "step": 40575 }, { "epoch": 1.99, "grad_norm": 0.6493374109268188, "learning_rate": 0.00015313077826669414, "loss": 3.0646, "step": 40576 }, { "epoch": 1.99, "grad_norm": 0.702488899230957, "learning_rate": 0.00015311735348511648, "loss": 2.9156, "step": 40577 }, { "epoch": 1.99, "grad_norm": 0.6838111281394958, "learning_rate": 0.00015310392909039932, "loss": 2.7314, "step": 40578 }, { "epoch": 1.99, "grad_norm": 0.7085728645324707, "learning_rate": 0.0001530905050825783, "loss": 2.9128, "step": 40579 }, { "epoch": 1.99, "grad_norm": 0.6542535424232483, "learning_rate": 0.0001530770814616884, "loss": 3.0594, "step": 40580 }, { "epoch": 1.99, "grad_norm": 0.6909346580505371, "learning_rate": 0.0001530636582277654, "loss": 2.9486, "step": 40581 }, { "epoch": 1.99, "grad_norm": 0.6850855350494385, "learning_rate": 0.0001530502353808444, "loss": 3.1318, "step": 40582 }, { "epoch": 1.99, "grad_norm": 0.633678138256073, "learning_rate": 0.0001530368129209607, "loss": 3.0352, "step": 40583 }, { "epoch": 1.99, "grad_norm": 0.6679724454879761, "learning_rate": 0.0001530233908481499, "loss": 3.1599, "step": 40584 }, { "epoch": 1.99, "grad_norm": 0.7710044384002686, "learning_rate": 0.00015300996916244703, "loss": 3.0995, "step": 40585 }, { "epoch": 1.99, "grad_norm": 0.6416032314300537, "learning_rate": 0.00015299654786388766, "loss": 2.9725, "step": 40586 }, { "epoch": 1.99, "grad_norm": 0.6761838793754578, "learning_rate": 0.0001529831269525072, "loss": 3.2538, "step": 40587 }, { "epoch": 1.99, "grad_norm": 0.6844536066055298, "learning_rate": 0.00015296970642834088, "loss": 2.9308, "step": 40588 }, { "epoch": 1.99, "grad_norm": 0.6797305941581726, "learning_rate": 0.000152956286291424, "loss": 2.9528, "step": 40589 }, { "epoch": 1.99, "grad_norm": 0.6660833954811096, "learning_rate": 0.00015294286654179192, "loss": 3.0079, "step": 40590 }, { "epoch": 1.99, "grad_norm": 0.6612690091133118, "learning_rate": 0.0001529294471794801, "loss": 3.0193, "step": 40591 }, { "epoch": 1.99, "grad_norm": 0.6777284145355225, "learning_rate": 0.00015291602820452374, "loss": 3.0346, "step": 40592 }, { "epoch": 1.99, "grad_norm": 0.678779125213623, "learning_rate": 0.00015290260961695824, "loss": 2.9215, "step": 40593 }, { "epoch": 1.99, "grad_norm": 0.6552702784538269, "learning_rate": 0.00015288919141681905, "loss": 3.193, "step": 40594 }, { "epoch": 1.99, "grad_norm": 0.66253662109375, "learning_rate": 0.00015287577360414146, "loss": 2.8081, "step": 40595 }, { "epoch": 1.99, "grad_norm": 0.6897073984146118, "learning_rate": 0.00015286235617896073, "loss": 3.0626, "step": 40596 }, { "epoch": 1.99, "grad_norm": 0.6485527753829956, "learning_rate": 0.0001528489391413121, "loss": 3.1438, "step": 40597 }, { "epoch": 1.99, "grad_norm": 0.6479106545448303, "learning_rate": 0.00015283552249123112, "loss": 3.1592, "step": 40598 }, { "epoch": 1.99, "grad_norm": 0.6787962317466736, "learning_rate": 0.00015282210622875308, "loss": 3.2376, "step": 40599 }, { "epoch": 1.99, "grad_norm": 0.656581699848175, "learning_rate": 0.00015280869035391324, "loss": 3.0471, "step": 40600 }, { "epoch": 1.99, "grad_norm": 0.6656479835510254, "learning_rate": 0.00015279527486674707, "loss": 2.8886, "step": 40601 }, { "epoch": 1.99, "grad_norm": 0.6412723660469055, "learning_rate": 0.00015278185976728972, "loss": 3.0208, "step": 40602 }, { "epoch": 1.99, "grad_norm": 0.7223705649375916, "learning_rate": 0.0001527684450555767, "loss": 3.1058, "step": 40603 }, { "epoch": 1.99, "grad_norm": 0.7059385180473328, "learning_rate": 0.00015275503073164327, "loss": 3.0413, "step": 40604 }, { "epoch": 1.99, "grad_norm": 0.7070484161376953, "learning_rate": 0.00015274161679552464, "loss": 3.0564, "step": 40605 }, { "epoch": 1.99, "grad_norm": 0.6791709661483765, "learning_rate": 0.00015272820324725635, "loss": 2.8612, "step": 40606 }, { "epoch": 1.99, "grad_norm": 0.6438567638397217, "learning_rate": 0.00015271479008687359, "loss": 2.9881, "step": 40607 }, { "epoch": 1.99, "grad_norm": 0.631389856338501, "learning_rate": 0.00015270137731441164, "loss": 3.0258, "step": 40608 }, { "epoch": 1.99, "grad_norm": 0.7261651754379272, "learning_rate": 0.00015268796492990603, "loss": 2.9647, "step": 40609 }, { "epoch": 1.99, "grad_norm": 0.6476674675941467, "learning_rate": 0.00015267455293339205, "loss": 2.9719, "step": 40610 }, { "epoch": 1.99, "grad_norm": 0.6464186310768127, "learning_rate": 0.00015266114132490488, "loss": 3.1635, "step": 40611 }, { "epoch": 1.99, "grad_norm": 0.6697189211845398, "learning_rate": 0.0001526477301044798, "loss": 2.9862, "step": 40612 }, { "epoch": 1.99, "grad_norm": 0.6742683053016663, "learning_rate": 0.00015263431927215224, "loss": 2.984, "step": 40613 }, { "epoch": 1.99, "grad_norm": 0.6472331881523132, "learning_rate": 0.00015262090882795765, "loss": 3.1692, "step": 40614 }, { "epoch": 1.99, "grad_norm": 0.6672264337539673, "learning_rate": 0.0001526074987719311, "loss": 2.9423, "step": 40615 }, { "epoch": 1.99, "grad_norm": 0.6128020286560059, "learning_rate": 0.00015259408910410816, "loss": 3.0283, "step": 40616 }, { "epoch": 1.99, "grad_norm": 0.665045976638794, "learning_rate": 0.00015258067982452402, "loss": 2.9884, "step": 40617 }, { "epoch": 1.99, "grad_norm": 0.6567105650901794, "learning_rate": 0.00015256727093321387, "loss": 3.0353, "step": 40618 }, { "epoch": 1.99, "grad_norm": 0.6515582203865051, "learning_rate": 0.00015255386243021328, "loss": 2.936, "step": 40619 }, { "epoch": 1.99, "grad_norm": 0.6716775298118591, "learning_rate": 0.0001525404543155573, "loss": 2.9665, "step": 40620 }, { "epoch": 1.99, "grad_norm": 0.651340901851654, "learning_rate": 0.00015252704658928152, "loss": 3.1269, "step": 40621 }, { "epoch": 1.99, "grad_norm": 0.6484601497650146, "learning_rate": 0.00015251363925142098, "loss": 3.0912, "step": 40622 }, { "epoch": 1.99, "grad_norm": 0.6475279331207275, "learning_rate": 0.00015250023230201114, "loss": 2.8503, "step": 40623 }, { "epoch": 1.99, "grad_norm": 0.653603732585907, "learning_rate": 0.00015248682574108745, "loss": 2.9839, "step": 40624 }, { "epoch": 1.99, "grad_norm": 0.6498973965644836, "learning_rate": 0.00015247341956868505, "loss": 2.8576, "step": 40625 }, { "epoch": 1.99, "grad_norm": 0.6715899705886841, "learning_rate": 0.00015246001378483925, "loss": 2.9632, "step": 40626 }, { "epoch": 1.99, "grad_norm": 0.6394970417022705, "learning_rate": 0.00015244660838958528, "loss": 2.8658, "step": 40627 }, { "epoch": 1.99, "grad_norm": 0.6495617032051086, "learning_rate": 0.00015243320338295856, "loss": 2.9026, "step": 40628 }, { "epoch": 1.99, "grad_norm": 0.710526168346405, "learning_rate": 0.00015241979876499446, "loss": 3.0276, "step": 40629 }, { "epoch": 1.99, "grad_norm": 0.6743832230567932, "learning_rate": 0.00015240639453572812, "loss": 2.8014, "step": 40630 }, { "epoch": 1.99, "grad_norm": 0.6878013610839844, "learning_rate": 0.00015239299069519504, "loss": 2.8822, "step": 40631 }, { "epoch": 1.99, "grad_norm": 0.6774446368217468, "learning_rate": 0.00015237958724343044, "loss": 2.9242, "step": 40632 }, { "epoch": 1.99, "grad_norm": 0.6619464159011841, "learning_rate": 0.00015236618418046943, "loss": 3.0985, "step": 40633 }, { "epoch": 1.99, "grad_norm": 0.7469950914382935, "learning_rate": 0.00015235278150634764, "loss": 3.0865, "step": 40634 }, { "epoch": 1.99, "grad_norm": 0.6729745864868164, "learning_rate": 0.0001523393792211001, "loss": 2.9876, "step": 40635 }, { "epoch": 1.99, "grad_norm": 0.665226936340332, "learning_rate": 0.00015232597732476227, "loss": 2.9907, "step": 40636 }, { "epoch": 1.99, "grad_norm": 0.6587421298027039, "learning_rate": 0.00015231257581736933, "loss": 3.2124, "step": 40637 }, { "epoch": 1.99, "grad_norm": 0.6516327261924744, "learning_rate": 0.00015229917469895664, "loss": 3.0794, "step": 40638 }, { "epoch": 1.99, "grad_norm": 1.108006477355957, "learning_rate": 0.00015228577396955974, "loss": 2.8191, "step": 40639 }, { "epoch": 1.99, "grad_norm": 0.7497507929801941, "learning_rate": 0.0001522723736292134, "loss": 2.9436, "step": 40640 }, { "epoch": 1.99, "grad_norm": 0.6455642580986023, "learning_rate": 0.0001522589736779534, "loss": 3.1215, "step": 40641 }, { "epoch": 1.99, "grad_norm": 0.6802120804786682, "learning_rate": 0.00015224557411581464, "loss": 2.6826, "step": 40642 }, { "epoch": 1.99, "grad_norm": 0.6976883411407471, "learning_rate": 0.0001522321749428326, "loss": 3.0047, "step": 40643 }, { "epoch": 1.99, "grad_norm": 0.6384730339050293, "learning_rate": 0.0001522187761590427, "loss": 2.865, "step": 40644 }, { "epoch": 1.99, "grad_norm": 0.635405957698822, "learning_rate": 0.00015220537776448, "loss": 2.9101, "step": 40645 }, { "epoch": 1.99, "grad_norm": 0.6480907797813416, "learning_rate": 0.00015219197975918002, "loss": 3.0178, "step": 40646 }, { "epoch": 1.99, "grad_norm": 0.6745615005493164, "learning_rate": 0.00015217858214317792, "loss": 2.8982, "step": 40647 }, { "epoch": 1.99, "grad_norm": 0.658542811870575, "learning_rate": 0.0001521651849165088, "loss": 3.2529, "step": 40648 }, { "epoch": 1.99, "grad_norm": 0.6688123941421509, "learning_rate": 0.00015215178807920834, "loss": 3.1468, "step": 40649 }, { "epoch": 1.99, "grad_norm": 0.6520625352859497, "learning_rate": 0.00015213839163131146, "loss": 3.004, "step": 40650 }, { "epoch": 1.99, "grad_norm": 0.6656911969184875, "learning_rate": 0.00015212499557285374, "loss": 3.0078, "step": 40651 }, { "epoch": 1.99, "grad_norm": 0.6434037685394287, "learning_rate": 0.00015211159990387015, "loss": 3.0344, "step": 40652 }, { "epoch": 1.99, "grad_norm": 0.6860767006874084, "learning_rate": 0.0001520982046243963, "loss": 3.1034, "step": 40653 }, { "epoch": 1.99, "grad_norm": 0.6536909341812134, "learning_rate": 0.00015208480973446733, "loss": 3.039, "step": 40654 }, { "epoch": 1.99, "grad_norm": 0.6466705203056335, "learning_rate": 0.00015207141523411834, "loss": 3.0428, "step": 40655 }, { "epoch": 1.99, "grad_norm": 0.65459805727005, "learning_rate": 0.00015205802112338492, "loss": 2.9133, "step": 40656 }, { "epoch": 1.99, "grad_norm": 0.6901729106903076, "learning_rate": 0.0001520446274023021, "loss": 3.2814, "step": 40657 }, { "epoch": 1.99, "grad_norm": 0.6484500765800476, "learning_rate": 0.00015203123407090522, "loss": 2.9905, "step": 40658 }, { "epoch": 1.99, "grad_norm": 0.6647442579269409, "learning_rate": 0.00015201784112922975, "loss": 3.1639, "step": 40659 }, { "epoch": 1.99, "grad_norm": 0.6574181914329529, "learning_rate": 0.00015200444857731062, "loss": 2.8888, "step": 40660 }, { "epoch": 1.99, "grad_norm": 0.6742486953735352, "learning_rate": 0.0001519910564151835, "loss": 2.9154, "step": 40661 }, { "epoch": 1.99, "grad_norm": 0.6795637607574463, "learning_rate": 0.00015197766464288337, "loss": 2.9062, "step": 40662 }, { "epoch": 1.99, "grad_norm": 0.642927885055542, "learning_rate": 0.00015196427326044553, "loss": 3.0751, "step": 40663 }, { "epoch": 1.99, "grad_norm": 0.6584546566009521, "learning_rate": 0.00015195088226790536, "loss": 3.1254, "step": 40664 }, { "epoch": 1.99, "grad_norm": 0.6724053621292114, "learning_rate": 0.00015193749166529804, "loss": 3.3392, "step": 40665 }, { "epoch": 1.99, "grad_norm": 0.6559529900550842, "learning_rate": 0.00015192410145265891, "loss": 3.0559, "step": 40666 }, { "epoch": 1.99, "grad_norm": 0.6671188473701477, "learning_rate": 0.00015191071163002308, "loss": 3.1581, "step": 40667 }, { "epoch": 1.99, "grad_norm": 0.6583215594291687, "learning_rate": 0.0001518973221974261, "loss": 2.9865, "step": 40668 }, { "epoch": 1.99, "grad_norm": 0.7388840317726135, "learning_rate": 0.00015188393315490308, "loss": 2.8475, "step": 40669 }, { "epoch": 1.99, "grad_norm": 0.6607951521873474, "learning_rate": 0.0001518705445024891, "loss": 3.153, "step": 40670 }, { "epoch": 1.99, "grad_norm": 0.6548836827278137, "learning_rate": 0.00015185715624021977, "loss": 2.8508, "step": 40671 }, { "epoch": 1.99, "grad_norm": 0.741969883441925, "learning_rate": 0.00015184376836813003, "loss": 3.0824, "step": 40672 }, { "epoch": 1.99, "grad_norm": 0.6918534636497498, "learning_rate": 0.0001518303808862554, "loss": 2.9278, "step": 40673 }, { "epoch": 1.99, "grad_norm": 0.628976583480835, "learning_rate": 0.00015181699379463094, "loss": 2.9431, "step": 40674 }, { "epoch": 1.99, "grad_norm": 0.6541244983673096, "learning_rate": 0.0001518036070932921, "loss": 2.7107, "step": 40675 }, { "epoch": 1.99, "grad_norm": 0.6655003428459167, "learning_rate": 0.00015179022078227406, "loss": 3.0281, "step": 40676 }, { "epoch": 1.99, "grad_norm": 0.6962637901306152, "learning_rate": 0.00015177683486161195, "loss": 3.0287, "step": 40677 }, { "epoch": 1.99, "grad_norm": 0.6648634672164917, "learning_rate": 0.00015176344933134117, "loss": 3.0775, "step": 40678 }, { "epoch": 1.99, "grad_norm": 0.6377243995666504, "learning_rate": 0.0001517500641914969, "loss": 2.7844, "step": 40679 }, { "epoch": 1.99, "grad_norm": 0.6637604236602783, "learning_rate": 0.0001517366794421144, "loss": 3.2034, "step": 40680 }, { "epoch": 1.99, "grad_norm": 0.6475324630737305, "learning_rate": 0.00015172329508322908, "loss": 3.0258, "step": 40681 }, { "epoch": 1.99, "grad_norm": 0.7287402749061584, "learning_rate": 0.0001517099111148759, "loss": 2.7741, "step": 40682 }, { "epoch": 1.99, "grad_norm": 0.6636556386947632, "learning_rate": 0.00015169652753709045, "loss": 2.9621, "step": 40683 }, { "epoch": 1.99, "grad_norm": 0.6482304334640503, "learning_rate": 0.0001516831443499078, "loss": 2.9935, "step": 40684 }, { "epoch": 1.99, "grad_norm": 0.6574996709823608, "learning_rate": 0.00015166976155336304, "loss": 2.9147, "step": 40685 }, { "epoch": 1.99, "grad_norm": 0.6734505891799927, "learning_rate": 0.0001516563791474917, "loss": 3.0778, "step": 40686 }, { "epoch": 1.99, "grad_norm": 0.6687676906585693, "learning_rate": 0.00015164299713232886, "loss": 2.9375, "step": 40687 }, { "epoch": 1.99, "grad_norm": 0.6934749484062195, "learning_rate": 0.00015162961550790988, "loss": 3.0035, "step": 40688 }, { "epoch": 1.99, "grad_norm": 0.7008736729621887, "learning_rate": 0.00015161623427426983, "loss": 2.7388, "step": 40689 }, { "epoch": 1.99, "grad_norm": 0.6899448037147522, "learning_rate": 0.00015160285343144417, "loss": 2.8754, "step": 40690 }, { "epoch": 1.99, "grad_norm": 0.6953763961791992, "learning_rate": 0.00015158947297946805, "loss": 3.1054, "step": 40691 }, { "epoch": 1.99, "grad_norm": 0.7021351456642151, "learning_rate": 0.00015157609291837658, "loss": 2.9845, "step": 40692 }, { "epoch": 1.99, "grad_norm": 0.657197892665863, "learning_rate": 0.00015156271324820523, "loss": 2.8504, "step": 40693 }, { "epoch": 1.99, "grad_norm": 0.6748011708259583, "learning_rate": 0.000151549333968989, "loss": 2.7495, "step": 40694 }, { "epoch": 1.99, "grad_norm": 0.6754131317138672, "learning_rate": 0.00015153595508076326, "loss": 2.8573, "step": 40695 }, { "epoch": 1.99, "grad_norm": 0.6298975944519043, "learning_rate": 0.00015152257658356336, "loss": 2.8821, "step": 40696 }, { "epoch": 1.99, "grad_norm": 0.6881123185157776, "learning_rate": 0.0001515091984774244, "loss": 2.8627, "step": 40697 }, { "epoch": 1.99, "grad_norm": 0.6599323153495789, "learning_rate": 0.00015149582076238164, "loss": 2.8482, "step": 40698 }, { "epoch": 1.99, "grad_norm": 0.640341579914093, "learning_rate": 0.00015148244343847025, "loss": 3.0448, "step": 40699 }, { "epoch": 1.99, "grad_norm": 0.7159940004348755, "learning_rate": 0.00015146906650572546, "loss": 3.1676, "step": 40700 }, { "epoch": 1.99, "grad_norm": 0.6907761096954346, "learning_rate": 0.00015145568996418268, "loss": 2.9688, "step": 40701 }, { "epoch": 1.99, "grad_norm": 0.6505234837532043, "learning_rate": 0.00015144231381387693, "loss": 2.9647, "step": 40702 }, { "epoch": 1.99, "grad_norm": 0.6479827761650085, "learning_rate": 0.00015142893805484363, "loss": 3.0348, "step": 40703 }, { "epoch": 1.99, "grad_norm": 0.7323071956634521, "learning_rate": 0.00015141556268711783, "loss": 2.7454, "step": 40704 }, { "epoch": 1.99, "grad_norm": 0.6864801049232483, "learning_rate": 0.00015140218771073495, "loss": 2.9794, "step": 40705 }, { "epoch": 1.99, "grad_norm": 0.675094485282898, "learning_rate": 0.00015138881312573007, "loss": 2.9768, "step": 40706 }, { "epoch": 1.99, "grad_norm": 0.6760757565498352, "learning_rate": 0.00015137543893213836, "loss": 2.6509, "step": 40707 }, { "epoch": 2.0, "grad_norm": 0.6964802145957947, "learning_rate": 0.00015136206512999527, "loss": 2.8603, "step": 40708 }, { "epoch": 2.0, "grad_norm": 0.6976985931396484, "learning_rate": 0.00015134869171933577, "loss": 2.8392, "step": 40709 }, { "epoch": 2.0, "grad_norm": 0.6337934136390686, "learning_rate": 0.00015133531870019525, "loss": 3.0877, "step": 40710 }, { "epoch": 2.0, "grad_norm": 0.6604355573654175, "learning_rate": 0.00015132194607260892, "loss": 2.9733, "step": 40711 }, { "epoch": 2.0, "grad_norm": 0.6681331992149353, "learning_rate": 0.00015130857383661203, "loss": 2.9892, "step": 40712 }, { "epoch": 2.0, "grad_norm": 0.7403008937835693, "learning_rate": 0.00015129520199223974, "loss": 2.8967, "step": 40713 }, { "epoch": 2.0, "grad_norm": 0.6657782196998596, "learning_rate": 0.00015128183053952714, "loss": 2.9276, "step": 40714 }, { "epoch": 2.0, "grad_norm": 0.6958048343658447, "learning_rate": 0.00015126845947850955, "loss": 2.9373, "step": 40715 }, { "epoch": 2.0, "grad_norm": 0.6533171534538269, "learning_rate": 0.00015125508880922236, "loss": 3.2072, "step": 40716 }, { "epoch": 2.0, "grad_norm": 0.6738389730453491, "learning_rate": 0.00015124171853170054, "loss": 3.0335, "step": 40717 }, { "epoch": 2.0, "grad_norm": 0.6788296699523926, "learning_rate": 0.0001512283486459795, "loss": 3.0401, "step": 40718 }, { "epoch": 2.0, "grad_norm": 0.6417179703712463, "learning_rate": 0.00015121497915209436, "loss": 2.66, "step": 40719 }, { "epoch": 2.0, "grad_norm": 0.6341338157653809, "learning_rate": 0.00015120161005008017, "loss": 3.0029, "step": 40720 }, { "epoch": 2.0, "grad_norm": 0.6832126975059509, "learning_rate": 0.0001511882413399725, "loss": 2.7799, "step": 40721 }, { "epoch": 2.0, "grad_norm": 0.7208033800125122, "learning_rate": 0.0001511748730218062, "loss": 2.8199, "step": 40722 }, { "epoch": 2.0, "grad_norm": 0.7194510698318481, "learning_rate": 0.00015116150509561675, "loss": 3.077, "step": 40723 }, { "epoch": 2.0, "grad_norm": 0.665046751499176, "learning_rate": 0.00015114813756143917, "loss": 3.0192, "step": 40724 }, { "epoch": 2.0, "grad_norm": 0.6552749872207642, "learning_rate": 0.0001511347704193087, "loss": 3.1042, "step": 40725 }, { "epoch": 2.0, "grad_norm": 0.7193647623062134, "learning_rate": 0.00015112140366926074, "loss": 3.11, "step": 40726 }, { "epoch": 2.0, "grad_norm": 0.718508243560791, "learning_rate": 0.00015110803731133038, "loss": 2.9426, "step": 40727 }, { "epoch": 2.0, "grad_norm": 0.6804858446121216, "learning_rate": 0.00015109467134555277, "loss": 3.0969, "step": 40728 }, { "epoch": 2.0, "grad_norm": 0.6869458556175232, "learning_rate": 0.00015108130577196302, "loss": 3.13, "step": 40729 }, { "epoch": 2.0, "grad_norm": 0.669521152973175, "learning_rate": 0.00015106794059059647, "loss": 2.9521, "step": 40730 }, { "epoch": 2.0, "grad_norm": 0.6213893294334412, "learning_rate": 0.00015105457580148838, "loss": 3.031, "step": 40731 }, { "epoch": 2.0, "grad_norm": 0.6913130879402161, "learning_rate": 0.00015104121140467378, "loss": 2.8968, "step": 40732 }, { "epoch": 2.0, "grad_norm": 0.676311731338501, "learning_rate": 0.00015102784740018803, "loss": 3.1907, "step": 40733 }, { "epoch": 2.0, "grad_norm": 0.6928296685218811, "learning_rate": 0.00015101448378806632, "loss": 2.9377, "step": 40734 }, { "epoch": 2.0, "grad_norm": 0.6946173906326294, "learning_rate": 0.00015100112056834367, "loss": 3.0254, "step": 40735 }, { "epoch": 2.0, "grad_norm": 0.6546354293823242, "learning_rate": 0.0001509877577410555, "loss": 3.2543, "step": 40736 }, { "epoch": 2.0, "grad_norm": 0.707718551158905, "learning_rate": 0.00015097439530623675, "loss": 2.915, "step": 40737 }, { "epoch": 2.0, "grad_norm": 0.6598696708679199, "learning_rate": 0.00015096103326392291, "loss": 2.7935, "step": 40738 }, { "epoch": 2.0, "grad_norm": 0.626047670841217, "learning_rate": 0.0001509476716141489, "loss": 3.126, "step": 40739 }, { "epoch": 2.0, "grad_norm": 0.6716310977935791, "learning_rate": 0.00015093431035695013, "loss": 3.1383, "step": 40740 }, { "epoch": 2.0, "grad_norm": 0.6620728969573975, "learning_rate": 0.0001509209494923617, "loss": 3.0502, "step": 40741 }, { "epoch": 2.0, "grad_norm": 0.6665850877761841, "learning_rate": 0.00015090758902041874, "loss": 3.2004, "step": 40742 }, { "epoch": 2.0, "grad_norm": 0.6744237542152405, "learning_rate": 0.00015089422894115656, "loss": 3.0869, "step": 40743 }, { "epoch": 2.0, "grad_norm": 0.6411026120185852, "learning_rate": 0.00015088086925461018, "loss": 3.0758, "step": 40744 }, { "epoch": 2.0, "grad_norm": 0.6751980781555176, "learning_rate": 0.00015086750996081493, "loss": 2.9283, "step": 40745 }, { "epoch": 2.0, "grad_norm": 0.6658977270126343, "learning_rate": 0.000150854151059806, "loss": 3.2339, "step": 40746 }, { "epoch": 2.0, "grad_norm": 0.6690980195999146, "learning_rate": 0.0001508407925516185, "loss": 2.8654, "step": 40747 }, { "epoch": 2.0, "grad_norm": 0.7223593592643738, "learning_rate": 0.0001508274344362877, "loss": 2.8638, "step": 40748 }, { "epoch": 2.0, "grad_norm": 0.673263669013977, "learning_rate": 0.00015081407671384874, "loss": 2.9679, "step": 40749 }, { "epoch": 2.0, "grad_norm": 0.706080436706543, "learning_rate": 0.0001508007193843368, "loss": 3.065, "step": 40750 }, { "epoch": 2.0, "grad_norm": 0.6620662212371826, "learning_rate": 0.00015078736244778692, "loss": 2.858, "step": 40751 }, { "epoch": 2.0, "grad_norm": 0.6945991516113281, "learning_rate": 0.00015077400590423443, "loss": 2.972, "step": 40752 }, { "epoch": 2.0, "grad_norm": 0.6781968474388123, "learning_rate": 0.0001507606497537146, "loss": 2.8857, "step": 40753 }, { "epoch": 2.0, "grad_norm": 0.6542848348617554, "learning_rate": 0.00015074729399626243, "loss": 2.7439, "step": 40754 }, { "epoch": 2.0, "grad_norm": 0.6352688670158386, "learning_rate": 0.0001507339386319132, "loss": 2.9773, "step": 40755 }, { "epoch": 2.0, "grad_norm": 0.6776319742202759, "learning_rate": 0.00015072058366070207, "loss": 2.7851, "step": 40756 }, { "epoch": 2.0, "grad_norm": 0.6314222812652588, "learning_rate": 0.00015070722908266409, "loss": 2.9906, "step": 40757 }, { "epoch": 2.0, "grad_norm": 0.6974877715110779, "learning_rate": 0.00015069387489783466, "loss": 2.8347, "step": 40758 }, { "epoch": 2.0, "grad_norm": 0.6645646691322327, "learning_rate": 0.0001506805211062487, "loss": 2.8755, "step": 40759 }, { "epoch": 2.0, "grad_norm": 0.6492670774459839, "learning_rate": 0.00015066716770794164, "loss": 2.9298, "step": 40760 }, { "epoch": 2.0, "grad_norm": 0.6646486520767212, "learning_rate": 0.00015065381470294838, "loss": 3.1262, "step": 40761 }, { "epoch": 2.0, "grad_norm": 0.6600433588027954, "learning_rate": 0.00015064046209130425, "loss": 3.1513, "step": 40762 }, { "epoch": 2.0, "grad_norm": 0.6794731616973877, "learning_rate": 0.0001506271098730445, "loss": 2.994, "step": 40763 }, { "epoch": 2.0, "grad_norm": 0.6728107929229736, "learning_rate": 0.00015061375804820422, "loss": 3.0659, "step": 40764 }, { "epoch": 2.0, "grad_norm": 0.6461384296417236, "learning_rate": 0.00015060040661681853, "loss": 2.8795, "step": 40765 }, { "epoch": 2.0, "grad_norm": 0.6834413409233093, "learning_rate": 0.0001505870555789225, "loss": 2.884, "step": 40766 }, { "epoch": 2.0, "grad_norm": 0.6609336733818054, "learning_rate": 0.00015057370493455142, "loss": 2.9974, "step": 40767 }, { "epoch": 2.0, "grad_norm": 0.6304715275764465, "learning_rate": 0.00015056035468374054, "loss": 2.9703, "step": 40768 }, { "epoch": 2.0, "grad_norm": 0.6766150593757629, "learning_rate": 0.00015054700482652482, "loss": 2.9168, "step": 40769 }, { "epoch": 2.0, "grad_norm": 0.6861264705657959, "learning_rate": 0.00015053365536293967, "loss": 2.6705, "step": 40770 }, { "epoch": 2.0, "grad_norm": 0.7163991928100586, "learning_rate": 0.00015052030629302007, "loss": 2.8756, "step": 40771 }, { "epoch": 2.0, "grad_norm": 0.6539211869239807, "learning_rate": 0.00015050695761680108, "loss": 2.8508, "step": 40772 }, { "epoch": 2.0, "grad_norm": 0.6503493785858154, "learning_rate": 0.00015049360933431816, "loss": 2.8387, "step": 40773 }, { "epoch": 2.0, "grad_norm": 0.6681285500526428, "learning_rate": 0.00015048026144560617, "loss": 3.1689, "step": 40774 }, { "epoch": 2.0, "grad_norm": 0.6761608719825745, "learning_rate": 0.0001504669139507005, "loss": 2.8518, "step": 40775 }, { "epoch": 2.0, "grad_norm": 0.7086718082427979, "learning_rate": 0.00015045356684963612, "loss": 3.0821, "step": 40776 }, { "epoch": 2.0, "grad_norm": 0.6684795618057251, "learning_rate": 0.00015044022014244835, "loss": 3.0127, "step": 40777 }, { "epoch": 2.0, "grad_norm": 0.6545731425285339, "learning_rate": 0.00015042687382917224, "loss": 3.1053, "step": 40778 }, { "epoch": 2.0, "grad_norm": 0.7030220627784729, "learning_rate": 0.00015041352790984286, "loss": 2.9971, "step": 40779 }, { "epoch": 2.0, "grad_norm": 0.6445237994194031, "learning_rate": 0.00015040018238449558, "loss": 2.959, "step": 40780 }, { "epoch": 2.0, "grad_norm": 0.702018678188324, "learning_rate": 0.0001503868372531653, "loss": 2.873, "step": 40781 }, { "epoch": 2.0, "grad_norm": 0.6564288139343262, "learning_rate": 0.00015037349251588732, "loss": 2.7837, "step": 40782 }, { "epoch": 2.0, "grad_norm": 0.732306957244873, "learning_rate": 0.00015036014817269687, "loss": 3.0326, "step": 40783 }, { "epoch": 2.0, "grad_norm": 0.6501794457435608, "learning_rate": 0.00015034680422362884, "loss": 2.9075, "step": 40784 }, { "epoch": 2.0, "grad_norm": 0.6539938449859619, "learning_rate": 0.00015033346066871866, "loss": 2.8234, "step": 40785 }, { "epoch": 2.0, "grad_norm": 0.6749308705329895, "learning_rate": 0.0001503201175080014, "loss": 3.2814, "step": 40786 }, { "epoch": 2.0, "grad_norm": 0.6837059855461121, "learning_rate": 0.00015030677474151195, "loss": 2.97, "step": 40787 }, { "epoch": 2.0, "grad_norm": 0.6461108922958374, "learning_rate": 0.00015029343236928582, "loss": 3.1164, "step": 40788 }, { "epoch": 2.0, "grad_norm": 0.6855670213699341, "learning_rate": 0.00015028009039135781, "loss": 2.8901, "step": 40789 }, { "epoch": 2.0, "grad_norm": 0.683847188949585, "learning_rate": 0.00015026674880776338, "loss": 2.9545, "step": 40790 }, { "epoch": 2.0, "grad_norm": 0.6807911396026611, "learning_rate": 0.00015025340761853738, "loss": 2.9702, "step": 40791 }, { "epoch": 2.0, "grad_norm": 0.689594566822052, "learning_rate": 0.00015024006682371523, "loss": 3.1826, "step": 40792 }, { "epoch": 2.0, "grad_norm": 0.667519211769104, "learning_rate": 0.00015022672642333188, "loss": 2.9398, "step": 40793 }, { "epoch": 2.0, "grad_norm": 0.7207837700843811, "learning_rate": 0.00015021338641742245, "loss": 2.7841, "step": 40794 }, { "epoch": 2.0, "grad_norm": 0.661036491394043, "learning_rate": 0.00015020004680602218, "loss": 3.011, "step": 40795 }, { "epoch": 2.0, "grad_norm": 0.72581946849823, "learning_rate": 0.00015018670758916607, "loss": 3.1146, "step": 40796 }, { "epoch": 2.0, "grad_norm": 0.6660535335540771, "learning_rate": 0.00015017336876688934, "loss": 3.1452, "step": 40797 }, { "epoch": 2.0, "grad_norm": 0.6611814498901367, "learning_rate": 0.00015016003033922726, "loss": 2.895, "step": 40798 }, { "epoch": 2.0, "grad_norm": 0.6168580651283264, "learning_rate": 0.00015014669230621483, "loss": 3.0379, "step": 40799 }, { "epoch": 2.0, "grad_norm": 0.6940125226974487, "learning_rate": 0.00015013335466788712, "loss": 2.9946, "step": 40800 }, { "epoch": 2.0, "grad_norm": 0.6904090642929077, "learning_rate": 0.00015012001742427923, "loss": 2.9315, "step": 40801 }, { "epoch": 2.0, "grad_norm": 0.6492003202438354, "learning_rate": 0.00015010668057542637, "loss": 2.9914, "step": 40802 }, { "epoch": 2.0, "grad_norm": 0.652450442314148, "learning_rate": 0.00015009334412136377, "loss": 3.0156, "step": 40803 }, { "epoch": 2.0, "grad_norm": 0.6898772716522217, "learning_rate": 0.00015008000806212635, "loss": 3.0286, "step": 40804 }, { "epoch": 2.0, "grad_norm": 0.6315729022026062, "learning_rate": 0.00015006667239774943, "loss": 2.6668, "step": 40805 }, { "epoch": 2.0, "grad_norm": 0.6902825832366943, "learning_rate": 0.00015005333712826794, "loss": 2.9763, "step": 40806 }, { "epoch": 2.0, "grad_norm": 0.6964982151985168, "learning_rate": 0.0001500400022537172, "loss": 2.8922, "step": 40807 }, { "epoch": 2.0, "grad_norm": 0.6511028409004211, "learning_rate": 0.00015002666777413229, "loss": 3.2217, "step": 40808 }, { "epoch": 2.0, "grad_norm": 0.6448655128479004, "learning_rate": 0.0001500133336895481, "loss": 3.1245, "step": 40809 }, { "epoch": 2.0, "grad_norm": 0.8458114862442017, "learning_rate": 0.00015000000000000004, "loss": 2.9976, "step": 40810 }, { "epoch": 2.0, "grad_norm": 0.6726029515266418, "learning_rate": 0.00014998666670552302, "loss": 2.8374, "step": 40811 }, { "epoch": 2.0, "grad_norm": 0.6877138614654541, "learning_rate": 0.00014997333380615223, "loss": 2.915, "step": 40812 }, { "epoch": 2.0, "grad_norm": 0.6989501714706421, "learning_rate": 0.00014996000130192293, "loss": 2.9306, "step": 40813 }, { "epoch": 2.0, "grad_norm": 0.6564236283302307, "learning_rate": 0.00014994666919287012, "loss": 2.9193, "step": 40814 }, { "epoch": 2.0, "grad_norm": 0.6724837422370911, "learning_rate": 0.0001499333374790289, "loss": 2.9177, "step": 40815 }, { "epoch": 2.0, "grad_norm": 0.6566126346588135, "learning_rate": 0.00014992000616043428, "loss": 3.009, "step": 40816 }, { "epoch": 2.0, "grad_norm": 0.6281498074531555, "learning_rate": 0.00014990667523712145, "loss": 3.0989, "step": 40817 }, { "epoch": 2.0, "grad_norm": 0.6152768731117249, "learning_rate": 0.0001498933447091257, "loss": 2.8553, "step": 40818 }, { "epoch": 2.0, "grad_norm": 0.649753749370575, "learning_rate": 0.00014988001457648186, "loss": 2.8566, "step": 40819 }, { "epoch": 2.0, "grad_norm": 0.6632745862007141, "learning_rate": 0.0001498666848392253, "loss": 2.9984, "step": 40820 }, { "epoch": 2.0, "grad_norm": 0.6726300120353699, "learning_rate": 0.00014985335549739097, "loss": 2.6237, "step": 40821 }, { "epoch": 2.0, "grad_norm": 0.6407365202903748, "learning_rate": 0.00014984002655101393, "loss": 2.9663, "step": 40822 }, { "epoch": 2.0, "grad_norm": 0.6597623825073242, "learning_rate": 0.00014982669800012944, "loss": 2.8987, "step": 40823 }, { "epoch": 2.0, "grad_norm": 0.689781904220581, "learning_rate": 0.00014981336984477243, "loss": 2.9595, "step": 40824 }, { "epoch": 2.0, "grad_norm": 0.6714945435523987, "learning_rate": 0.0001498000420849782, "loss": 2.9263, "step": 40825 }, { "epoch": 2.0, "grad_norm": 0.635680615901947, "learning_rate": 0.00014978671472078167, "loss": 2.8758, "step": 40826 }, { "epoch": 2.0, "grad_norm": 0.6559689044952393, "learning_rate": 0.0001497733877522181, "loss": 3.0803, "step": 40827 }, { "epoch": 2.0, "grad_norm": 0.6731073260307312, "learning_rate": 0.00014976006117932243, "loss": 2.9924, "step": 40828 }, { "epoch": 2.0, "grad_norm": 0.6569121479988098, "learning_rate": 0.00014974673500212996, "loss": 2.9098, "step": 40829 }, { "epoch": 2.0, "grad_norm": 0.6949353814125061, "learning_rate": 0.00014973340922067564, "loss": 2.73, "step": 40830 }, { "epoch": 2.0, "grad_norm": 0.6641110777854919, "learning_rate": 0.0001497200838349945, "loss": 3.1152, "step": 40831 }, { "epoch": 2.0, "grad_norm": 0.7344586253166199, "learning_rate": 0.00014970675884512188, "loss": 2.8915, "step": 40832 }, { "epoch": 2.0, "grad_norm": 0.7318583726882935, "learning_rate": 0.00014969343425109258, "loss": 2.968, "step": 40833 }, { "epoch": 2.0, "grad_norm": 0.7301783561706543, "learning_rate": 0.00014968011005294187, "loss": 3.0687, "step": 40834 }, { "epoch": 2.0, "grad_norm": 0.6690212488174438, "learning_rate": 0.00014966678625070494, "loss": 2.7896, "step": 40835 }, { "epoch": 2.0, "grad_norm": 0.6483722925186157, "learning_rate": 0.00014965346284441674, "loss": 2.9243, "step": 40836 }, { "epoch": 2.0, "grad_norm": 0.6564923524856567, "learning_rate": 0.00014964013983411237, "loss": 2.853, "step": 40837 }, { "epoch": 2.0, "grad_norm": 0.6334251165390015, "learning_rate": 0.00014962681721982686, "loss": 2.8823, "step": 40838 }, { "epoch": 2.0, "grad_norm": 0.7064406275749207, "learning_rate": 0.00014961349500159537, "loss": 2.8174, "step": 40839 }, { "epoch": 2.0, "grad_norm": 0.7101202607154846, "learning_rate": 0.0001496001731794531, "loss": 3.037, "step": 40840 }, { "epoch": 2.0, "grad_norm": 0.688157320022583, "learning_rate": 0.0001495868517534349, "loss": 2.8154, "step": 40841 }, { "epoch": 2.0, "grad_norm": 0.6820224523544312, "learning_rate": 0.0001495735307235761, "loss": 2.9316, "step": 40842 }, { "epoch": 2.0, "grad_norm": 0.6670019030570984, "learning_rate": 0.0001495602100899117, "loss": 2.9859, "step": 40843 }, { "epoch": 2.0, "grad_norm": 0.7046992778778076, "learning_rate": 0.0001495468898524766, "loss": 2.7433, "step": 40844 }, { "epoch": 2.0, "grad_norm": 0.6730843782424927, "learning_rate": 0.00014953357001130617, "loss": 2.7432, "step": 40845 }, { "epoch": 2.0, "grad_norm": 0.6858168840408325, "learning_rate": 0.00014952025056643524, "loss": 2.9308, "step": 40846 }, { "epoch": 2.0, "grad_norm": 0.6220602989196777, "learning_rate": 0.00014950693151789913, "loss": 2.903, "step": 40847 }, { "epoch": 2.0, "grad_norm": 0.6544325947761536, "learning_rate": 0.0001494936128657327, "loss": 3.0168, "step": 40848 }, { "epoch": 2.0, "grad_norm": 0.668447732925415, "learning_rate": 0.0001494802946099711, "loss": 2.9114, "step": 40849 }, { "epoch": 2.0, "grad_norm": 0.6595551371574402, "learning_rate": 0.00014946697675064955, "loss": 2.9192, "step": 40850 }, { "epoch": 2.0, "grad_norm": 0.6544173359870911, "learning_rate": 0.00014945365928780306, "loss": 3.0946, "step": 40851 }, { "epoch": 2.0, "grad_norm": 0.7191558480262756, "learning_rate": 0.0001494403422214666, "loss": 3.0198, "step": 40852 }, { "epoch": 2.0, "grad_norm": 0.6780735850334167, "learning_rate": 0.00014942702555167521, "loss": 2.9645, "step": 40853 }, { "epoch": 2.0, "grad_norm": 0.6664487719535828, "learning_rate": 0.00014941370927846407, "loss": 2.9704, "step": 40854 }, { "epoch": 2.0, "grad_norm": 0.706179678440094, "learning_rate": 0.00014940039340186835, "loss": 2.8314, "step": 40855 }, { "epoch": 2.0, "grad_norm": 0.6947509050369263, "learning_rate": 0.00014938707792192286, "loss": 2.823, "step": 40856 }, { "epoch": 2.0, "grad_norm": 0.6411890387535095, "learning_rate": 0.00014937376283866296, "loss": 3.0037, "step": 40857 }, { "epoch": 2.0, "grad_norm": 0.7116650938987732, "learning_rate": 0.00014936044815212356, "loss": 2.8026, "step": 40858 }, { "epoch": 2.0, "grad_norm": 0.6230390071868896, "learning_rate": 0.00014934713386233968, "loss": 2.948, "step": 40859 }, { "epoch": 2.0, "grad_norm": 0.6088177561759949, "learning_rate": 0.00014933381996934655, "loss": 2.8931, "step": 40860 }, { "epoch": 2.0, "grad_norm": 0.68245530128479, "learning_rate": 0.00014932050647317903, "loss": 3.0912, "step": 40861 }, { "epoch": 2.0, "grad_norm": 0.6987802386283875, "learning_rate": 0.0001493071933738724, "loss": 2.9147, "step": 40862 }, { "epoch": 2.0, "grad_norm": 0.6714006066322327, "learning_rate": 0.0001492938806714615, "loss": 2.9907, "step": 40863 }, { "epoch": 2.0, "grad_norm": 0.6982893347740173, "learning_rate": 0.0001492805683659815, "loss": 2.9568, "step": 40864 }, { "epoch": 2.0, "grad_norm": 0.6661611795425415, "learning_rate": 0.00014926725645746775, "loss": 2.8846, "step": 40865 }, { "epoch": 2.0, "grad_norm": 0.7231001257896423, "learning_rate": 0.0001492539449459548, "loss": 2.9071, "step": 40866 }, { "epoch": 2.0, "grad_norm": 0.6181085705757141, "learning_rate": 0.00014924063383147805, "loss": 2.9526, "step": 40867 }, { "epoch": 2.0, "grad_norm": 0.6579425930976868, "learning_rate": 0.00014922732311407235, "loss": 2.8463, "step": 40868 }, { "epoch": 2.0, "grad_norm": 0.6170904636383057, "learning_rate": 0.00014921401279377285, "loss": 2.8966, "step": 40869 }, { "epoch": 2.0, "grad_norm": 0.6523815393447876, "learning_rate": 0.00014920070287061473, "loss": 2.8954, "step": 40870 }, { "epoch": 2.0, "grad_norm": 0.7443393468856812, "learning_rate": 0.00014918739334463286, "loss": 2.858, "step": 40871 }, { "epoch": 2.0, "grad_norm": 0.6416648030281067, "learning_rate": 0.00014917408421586247, "loss": 2.8031, "step": 40872 }, { "epoch": 2.0, "grad_norm": 0.6666995286941528, "learning_rate": 0.0001491607754843385, "loss": 2.951, "step": 40873 }, { "epoch": 2.0, "grad_norm": 0.642076849937439, "learning_rate": 0.0001491474671500959, "loss": 2.7547, "step": 40874 }, { "epoch": 2.0, "grad_norm": 0.6719287037849426, "learning_rate": 0.00014913415921316995, "loss": 2.9337, "step": 40875 }, { "epoch": 2.0, "grad_norm": 0.6449657678604126, "learning_rate": 0.0001491208516735955, "loss": 2.9809, "step": 40876 }, { "epoch": 2.0, "grad_norm": 0.6794679164886475, "learning_rate": 0.00014910754453140775, "loss": 2.9901, "step": 40877 }, { "epoch": 2.0, "grad_norm": 0.6926255822181702, "learning_rate": 0.00014909423778664165, "loss": 2.8638, "step": 40878 }, { "epoch": 2.0, "grad_norm": 0.6646115779876709, "learning_rate": 0.00014908093143933235, "loss": 2.7086, "step": 40879 }, { "epoch": 2.0, "grad_norm": 0.6892146468162537, "learning_rate": 0.00014906762548951485, "loss": 2.8719, "step": 40880 }, { "epoch": 2.0, "grad_norm": 0.6423749923706055, "learning_rate": 0.00014905431993722407, "loss": 2.862, "step": 40881 }, { "epoch": 2.0, "grad_norm": 0.7053487300872803, "learning_rate": 0.00014904101478249525, "loss": 3.0384, "step": 40882 }, { "epoch": 2.0, "grad_norm": 0.6942588090896606, "learning_rate": 0.00014902771002536325, "loss": 2.9993, "step": 40883 }, { "epoch": 2.0, "grad_norm": 0.773780345916748, "learning_rate": 0.0001490144056658632, "loss": 2.9149, "step": 40884 }, { "epoch": 2.0, "grad_norm": 0.7698376774787903, "learning_rate": 0.00014900110170403027, "loss": 2.89, "step": 40885 }, { "epoch": 2.0, "grad_norm": 0.7158568501472473, "learning_rate": 0.00014898779813989925, "loss": 2.7576, "step": 40886 }, { "epoch": 2.0, "grad_norm": 0.671528697013855, "learning_rate": 0.00014897449497350545, "loss": 2.9737, "step": 40887 }, { "epoch": 2.0, "grad_norm": 0.6958116888999939, "learning_rate": 0.0001489611922048837, "loss": 2.8742, "step": 40888 }, { "epoch": 2.0, "grad_norm": 0.6262465119361877, "learning_rate": 0.00014894788983406905, "loss": 2.7978, "step": 40889 }, { "epoch": 2.0, "grad_norm": 0.7286080718040466, "learning_rate": 0.0001489345878610967, "loss": 2.6667, "step": 40890 }, { "epoch": 2.0, "grad_norm": 0.686791718006134, "learning_rate": 0.0001489212862860014, "loss": 2.9975, "step": 40891 }, { "epoch": 2.0, "grad_norm": 0.6629527807235718, "learning_rate": 0.00014890798510881854, "loss": 2.7829, "step": 40892 }, { "epoch": 2.0, "grad_norm": 0.6882234215736389, "learning_rate": 0.00014889468432958283, "loss": 2.8711, "step": 40893 }, { "epoch": 2.0, "grad_norm": 0.6697384119033813, "learning_rate": 0.00014888138394832955, "loss": 3.0293, "step": 40894 }, { "epoch": 2.0, "grad_norm": 0.6356438994407654, "learning_rate": 0.0001488680839650936, "loss": 2.9981, "step": 40895 }, { "epoch": 2.0, "grad_norm": 0.6785897612571716, "learning_rate": 0.00014885478437990998, "loss": 2.9045, "step": 40896 }, { "epoch": 2.0, "grad_norm": 0.6552438139915466, "learning_rate": 0.0001488414851928138, "loss": 3.0607, "step": 40897 }, { "epoch": 2.0, "grad_norm": 0.6709671020507812, "learning_rate": 0.00014882818640384, "loss": 3.0093, "step": 40898 }, { "epoch": 2.0, "grad_norm": 0.632817268371582, "learning_rate": 0.00014881488801302367, "loss": 3.0937, "step": 40899 }, { "epoch": 2.0, "grad_norm": 0.6626100540161133, "learning_rate": 0.0001488015900203999, "loss": 2.7311, "step": 40900 }, { "epoch": 2.0, "grad_norm": 0.7310886979103088, "learning_rate": 0.0001487882924260037, "loss": 2.8512, "step": 40901 }, { "epoch": 2.0, "grad_norm": 0.6690772771835327, "learning_rate": 0.00014877499522986996, "loss": 2.8298, "step": 40902 }, { "epoch": 2.0, "grad_norm": 0.6155535578727722, "learning_rate": 0.0001487616984320337, "loss": 2.8708, "step": 40903 }, { "epoch": 2.0, "grad_norm": 0.6756638884544373, "learning_rate": 0.00014874840203253016, "loss": 3.0289, "step": 40904 }, { "epoch": 2.0, "grad_norm": 0.6889369487762451, "learning_rate": 0.00014873510603139408, "loss": 2.9191, "step": 40905 }, { "epoch": 2.0, "grad_norm": 0.6636484265327454, "learning_rate": 0.0001487218104286606, "loss": 2.8135, "step": 40906 }, { "epoch": 2.0, "grad_norm": 0.7536406517028809, "learning_rate": 0.0001487085152243649, "loss": 3.1152, "step": 40907 }, { "epoch": 2.0, "grad_norm": 0.6910778880119324, "learning_rate": 0.00014869522041854175, "loss": 2.8008, "step": 40908 }, { "epoch": 2.0, "grad_norm": 0.7060720324516296, "learning_rate": 0.00014868192601122637, "loss": 3.0552, "step": 40909 }, { "epoch": 2.0, "grad_norm": 0.6742306351661682, "learning_rate": 0.0001486686320024537, "loss": 2.7962, "step": 40910 }, { "epoch": 2.0, "grad_norm": 0.6626346111297607, "learning_rate": 0.00014865533839225858, "loss": 2.9618, "step": 40911 }, { "epoch": 2.0, "grad_norm": 0.6891857385635376, "learning_rate": 0.00014864204518067628, "loss": 2.9439, "step": 40912 }, { "epoch": 2.01, "grad_norm": 0.650107741355896, "learning_rate": 0.00014862875236774164, "loss": 2.9421, "step": 40913 }, { "epoch": 2.01, "grad_norm": 0.7103627324104309, "learning_rate": 0.0001486154599534898, "loss": 3.0348, "step": 40914 }, { "epoch": 2.01, "grad_norm": 0.6941289901733398, "learning_rate": 0.0001486021679379556, "loss": 3.0479, "step": 40915 }, { "epoch": 2.01, "grad_norm": 0.6747994422912598, "learning_rate": 0.00014858887632117424, "loss": 3.063, "step": 40916 }, { "epoch": 2.01, "grad_norm": 0.6820676922798157, "learning_rate": 0.00014857558510318072, "loss": 3.0172, "step": 40917 }, { "epoch": 2.01, "grad_norm": 0.6499535441398621, "learning_rate": 0.0001485622942840098, "loss": 2.9887, "step": 40918 }, { "epoch": 2.01, "grad_norm": 0.6234838962554932, "learning_rate": 0.00014854900386369676, "loss": 2.8712, "step": 40919 }, { "epoch": 2.01, "grad_norm": 0.7115194201469421, "learning_rate": 0.00014853571384227637, "loss": 2.9185, "step": 40920 }, { "epoch": 2.01, "grad_norm": 0.6853716373443604, "learning_rate": 0.00014852242421978378, "loss": 2.8748, "step": 40921 }, { "epoch": 2.01, "grad_norm": 0.7544880509376526, "learning_rate": 0.00014850913499625412, "loss": 2.7981, "step": 40922 }, { "epoch": 2.01, "grad_norm": 0.6600387692451477, "learning_rate": 0.00014849584617172222, "loss": 2.9013, "step": 40923 }, { "epoch": 2.01, "grad_norm": 0.6780473589897156, "learning_rate": 0.0001484825577462231, "loss": 2.8438, "step": 40924 }, { "epoch": 2.01, "grad_norm": 0.667023241519928, "learning_rate": 0.00014846926971979162, "loss": 2.8672, "step": 40925 }, { "epoch": 2.01, "grad_norm": 0.6934083104133606, "learning_rate": 0.00014845598209246294, "loss": 2.7644, "step": 40926 }, { "epoch": 2.01, "grad_norm": 0.7000552415847778, "learning_rate": 0.00014844269486427217, "loss": 2.9773, "step": 40927 }, { "epoch": 2.01, "grad_norm": 0.6767484545707703, "learning_rate": 0.00014842940803525405, "loss": 2.5985, "step": 40928 }, { "epoch": 2.01, "grad_norm": 0.6668387055397034, "learning_rate": 0.00014841612160544382, "loss": 2.9201, "step": 40929 }, { "epoch": 2.01, "grad_norm": 0.6447761654853821, "learning_rate": 0.0001484028355748762, "loss": 3.0512, "step": 40930 }, { "epoch": 2.01, "grad_norm": 0.6750105619430542, "learning_rate": 0.00014838954994358648, "loss": 3.1484, "step": 40931 }, { "epoch": 2.01, "grad_norm": 0.6737284064292908, "learning_rate": 0.00014837626471160952, "loss": 2.8619, "step": 40932 }, { "epoch": 2.01, "grad_norm": 0.6524981260299683, "learning_rate": 0.00014836297987898018, "loss": 3.1476, "step": 40933 }, { "epoch": 2.01, "grad_norm": 0.6673911809921265, "learning_rate": 0.00014834969544573367, "loss": 2.9744, "step": 40934 }, { "epoch": 2.01, "grad_norm": 0.6576171517372131, "learning_rate": 0.00014833641141190476, "loss": 2.8008, "step": 40935 }, { "epoch": 2.01, "grad_norm": 0.6706809997558594, "learning_rate": 0.00014832312777752856, "loss": 2.8009, "step": 40936 }, { "epoch": 2.01, "grad_norm": 0.6633245348930359, "learning_rate": 0.00014830984454264015, "loss": 2.7447, "step": 40937 }, { "epoch": 2.01, "grad_norm": 0.6687329411506653, "learning_rate": 0.00014829656170727443, "loss": 2.8792, "step": 40938 }, { "epoch": 2.01, "grad_norm": 0.7036616206169128, "learning_rate": 0.0001482832792714664, "loss": 2.9527, "step": 40939 }, { "epoch": 2.01, "grad_norm": 0.6736128330230713, "learning_rate": 0.00014826999723525084, "loss": 2.9944, "step": 40940 }, { "epoch": 2.01, "grad_norm": 0.6891912817955017, "learning_rate": 0.00014825671559866294, "loss": 2.9328, "step": 40941 }, { "epoch": 2.01, "grad_norm": 0.6677616834640503, "learning_rate": 0.00014824343436173774, "loss": 2.9777, "step": 40942 }, { "epoch": 2.01, "grad_norm": 0.6840057969093323, "learning_rate": 0.00014823015352451006, "loss": 2.9738, "step": 40943 }, { "epoch": 2.01, "grad_norm": 0.7241817712783813, "learning_rate": 0.00014821687308701502, "loss": 2.8735, "step": 40944 }, { "epoch": 2.01, "grad_norm": 0.7008278369903564, "learning_rate": 0.0001482035930492875, "loss": 2.8839, "step": 40945 }, { "epoch": 2.01, "grad_norm": 0.639018714427948, "learning_rate": 0.00014819031341136246, "loss": 2.9546, "step": 40946 }, { "epoch": 2.01, "grad_norm": 0.6628701686859131, "learning_rate": 0.000148177034173275, "loss": 2.8829, "step": 40947 }, { "epoch": 2.01, "grad_norm": 0.7044912576675415, "learning_rate": 0.00014816375533505992, "loss": 2.7321, "step": 40948 }, { "epoch": 2.01, "grad_norm": 0.658323347568512, "learning_rate": 0.00014815047689675234, "loss": 2.9143, "step": 40949 }, { "epoch": 2.01, "grad_norm": 0.6978631615638733, "learning_rate": 0.00014813719885838712, "loss": 2.7587, "step": 40950 }, { "epoch": 2.01, "grad_norm": 0.6369495987892151, "learning_rate": 0.00014812392121999927, "loss": 2.9732, "step": 40951 }, { "epoch": 2.01, "grad_norm": 0.6941462159156799, "learning_rate": 0.00014811064398162388, "loss": 2.7965, "step": 40952 }, { "epoch": 2.01, "grad_norm": 0.6733207106590271, "learning_rate": 0.00014809736714329588, "loss": 2.9032, "step": 40953 }, { "epoch": 2.01, "grad_norm": 0.6448726058006287, "learning_rate": 0.00014808409070505012, "loss": 2.8493, "step": 40954 }, { "epoch": 2.01, "grad_norm": 0.6968315839767456, "learning_rate": 0.00014807081466692155, "loss": 2.8681, "step": 40955 }, { "epoch": 2.01, "grad_norm": 0.6684523820877075, "learning_rate": 0.0001480575390289452, "loss": 2.9887, "step": 40956 }, { "epoch": 2.01, "grad_norm": 0.6307798027992249, "learning_rate": 0.0001480442637911562, "loss": 2.8416, "step": 40957 }, { "epoch": 2.01, "grad_norm": 0.660571277141571, "learning_rate": 0.0001480309889535892, "loss": 3.104, "step": 40958 }, { "epoch": 2.01, "grad_norm": 0.6901208758354187, "learning_rate": 0.00014801771451627944, "loss": 2.873, "step": 40959 }, { "epoch": 2.01, "grad_norm": 0.6825695037841797, "learning_rate": 0.0001480044404792618, "loss": 2.9064, "step": 40960 }, { "epoch": 2.01, "grad_norm": 0.6804020404815674, "learning_rate": 0.00014799116684257108, "loss": 2.9686, "step": 40961 }, { "epoch": 2.01, "grad_norm": 0.6763026714324951, "learning_rate": 0.00014797789360624248, "loss": 2.9093, "step": 40962 }, { "epoch": 2.01, "grad_norm": 0.6673914194107056, "learning_rate": 0.00014796462077031075, "loss": 3.0376, "step": 40963 }, { "epoch": 2.01, "grad_norm": 0.6648275852203369, "learning_rate": 0.0001479513483348111, "loss": 2.8259, "step": 40964 }, { "epoch": 2.01, "grad_norm": 0.6990496516227722, "learning_rate": 0.00014793807629977817, "loss": 2.86, "step": 40965 }, { "epoch": 2.01, "grad_norm": 0.6605084538459778, "learning_rate": 0.0001479248046652471, "loss": 3.1202, "step": 40966 }, { "epoch": 2.01, "grad_norm": 0.7304693460464478, "learning_rate": 0.00014791153343125305, "loss": 2.9801, "step": 40967 }, { "epoch": 2.01, "grad_norm": 0.7068866491317749, "learning_rate": 0.0001478982625978305, "loss": 2.8674, "step": 40968 }, { "epoch": 2.01, "grad_norm": 0.6797716021537781, "learning_rate": 0.00014788499216501481, "loss": 2.8571, "step": 40969 }, { "epoch": 2.01, "grad_norm": 0.6691450476646423, "learning_rate": 0.00014787172213284066, "loss": 2.9016, "step": 40970 }, { "epoch": 2.01, "grad_norm": 0.6694939732551575, "learning_rate": 0.0001478584525013431, "loss": 2.7443, "step": 40971 }, { "epoch": 2.01, "grad_norm": 0.6383887529373169, "learning_rate": 0.00014784518327055722, "loss": 2.8351, "step": 40972 }, { "epoch": 2.01, "grad_norm": 0.6808183193206787, "learning_rate": 0.0001478319144405177, "loss": 2.9814, "step": 40973 }, { "epoch": 2.01, "grad_norm": 0.6715272665023804, "learning_rate": 0.00014781864601125976, "loss": 3.099, "step": 40974 }, { "epoch": 2.01, "grad_norm": 0.7185418009757996, "learning_rate": 0.00014780537798281823, "loss": 2.9109, "step": 40975 }, { "epoch": 2.01, "grad_norm": 0.6826639175415039, "learning_rate": 0.00014779211035522794, "loss": 2.8128, "step": 40976 }, { "epoch": 2.01, "grad_norm": 0.6765093207359314, "learning_rate": 0.00014777884312852405, "loss": 2.9096, "step": 40977 }, { "epoch": 2.01, "grad_norm": 0.691008448600769, "learning_rate": 0.00014776557630274127, "loss": 2.8658, "step": 40978 }, { "epoch": 2.01, "grad_norm": 0.6892204880714417, "learning_rate": 0.00014775230987791477, "loss": 2.9025, "step": 40979 }, { "epoch": 2.01, "grad_norm": 0.7367006540298462, "learning_rate": 0.00014773904385407925, "loss": 2.7799, "step": 40980 }, { "epoch": 2.01, "grad_norm": 0.7002752423286438, "learning_rate": 0.00014772577823126993, "loss": 2.981, "step": 40981 }, { "epoch": 2.01, "grad_norm": 0.6693331003189087, "learning_rate": 0.00014771251300952162, "loss": 2.9569, "step": 40982 }, { "epoch": 2.01, "grad_norm": 0.6542279124259949, "learning_rate": 0.00014769924818886908, "loss": 2.99, "step": 40983 }, { "epoch": 2.01, "grad_norm": 0.7216241955757141, "learning_rate": 0.00014768598376934758, "loss": 2.9451, "step": 40984 }, { "epoch": 2.01, "grad_norm": 0.6984073519706726, "learning_rate": 0.00014767271975099172, "loss": 3.0107, "step": 40985 }, { "epoch": 2.01, "grad_norm": 0.6856890320777893, "learning_rate": 0.00014765945613383675, "loss": 2.8309, "step": 40986 }, { "epoch": 2.01, "grad_norm": 0.6620368361473083, "learning_rate": 0.00014764619291791732, "loss": 2.8103, "step": 40987 }, { "epoch": 2.01, "grad_norm": 0.6587978005409241, "learning_rate": 0.00014763293010326855, "loss": 2.8216, "step": 40988 }, { "epoch": 2.01, "grad_norm": 0.6450566649436951, "learning_rate": 0.00014761966768992536, "loss": 3.0106, "step": 40989 }, { "epoch": 2.01, "grad_norm": 0.6524011492729187, "learning_rate": 0.00014760640567792265, "loss": 2.8636, "step": 40990 }, { "epoch": 2.01, "grad_norm": 0.6809319257736206, "learning_rate": 0.00014759314406729537, "loss": 2.8167, "step": 40991 }, { "epoch": 2.01, "grad_norm": 0.6798052787780762, "learning_rate": 0.0001475798828580783, "loss": 2.8203, "step": 40992 }, { "epoch": 2.01, "grad_norm": 0.6698510050773621, "learning_rate": 0.00014756662205030646, "loss": 2.9901, "step": 40993 }, { "epoch": 2.01, "grad_norm": 0.6708970665931702, "learning_rate": 0.00014755336164401492, "loss": 2.9513, "step": 40994 }, { "epoch": 2.01, "grad_norm": 0.7181163430213928, "learning_rate": 0.00014754010163923838, "loss": 2.9335, "step": 40995 }, { "epoch": 2.01, "grad_norm": 0.6596902012825012, "learning_rate": 0.00014752684203601197, "loss": 3.0563, "step": 40996 }, { "epoch": 2.01, "grad_norm": 0.681885838508606, "learning_rate": 0.00014751358283437055, "loss": 2.9588, "step": 40997 }, { "epoch": 2.01, "grad_norm": 0.6878100037574768, "learning_rate": 0.00014750032403434884, "loss": 2.9884, "step": 40998 }, { "epoch": 2.01, "grad_norm": 0.696635365486145, "learning_rate": 0.0001474870656359821, "loss": 2.9776, "step": 40999 }, { "epoch": 2.01, "grad_norm": 0.6515666842460632, "learning_rate": 0.00014747380763930492, "loss": 3.0161, "step": 41000 }, { "epoch": 2.01, "grad_norm": 0.673594057559967, "learning_rate": 0.0001474605500443525, "loss": 3.0754, "step": 41001 }, { "epoch": 2.01, "grad_norm": 0.6816129088401794, "learning_rate": 0.00014744729285115954, "loss": 2.928, "step": 41002 }, { "epoch": 2.01, "grad_norm": 0.6874986290931702, "learning_rate": 0.00014743403605976113, "loss": 2.9407, "step": 41003 }, { "epoch": 2.01, "grad_norm": 0.6564611792564392, "learning_rate": 0.00014742077967019215, "loss": 2.8386, "step": 41004 }, { "epoch": 2.01, "grad_norm": 0.6538634300231934, "learning_rate": 0.00014740752368248733, "loss": 2.9536, "step": 41005 }, { "epoch": 2.01, "grad_norm": 0.6837107539176941, "learning_rate": 0.00014739426809668186, "loss": 2.7744, "step": 41006 }, { "epoch": 2.01, "grad_norm": 0.7023835778236389, "learning_rate": 0.00014738101291281036, "loss": 2.8523, "step": 41007 }, { "epoch": 2.01, "grad_norm": 0.6810896396636963, "learning_rate": 0.00014736775813090793, "loss": 3.1852, "step": 41008 }, { "epoch": 2.01, "grad_norm": 0.6751614809036255, "learning_rate": 0.0001473545037510096, "loss": 2.9334, "step": 41009 }, { "epoch": 2.01, "grad_norm": 0.6512244939804077, "learning_rate": 0.00014734124977315, "loss": 2.8679, "step": 41010 }, { "epoch": 2.01, "grad_norm": 0.6602606773376465, "learning_rate": 0.00014732799619736423, "loss": 2.7867, "step": 41011 }, { "epoch": 2.01, "grad_norm": 0.6242491006851196, "learning_rate": 0.00014731474302368715, "loss": 3.0989, "step": 41012 }, { "epoch": 2.01, "grad_norm": 0.6728277206420898, "learning_rate": 0.00014730149025215356, "loss": 2.9024, "step": 41013 }, { "epoch": 2.01, "grad_norm": 0.6889778971672058, "learning_rate": 0.00014728823788279859, "loss": 2.952, "step": 41014 }, { "epoch": 2.01, "grad_norm": 0.709487795829773, "learning_rate": 0.00014727498591565687, "loss": 3.0053, "step": 41015 }, { "epoch": 2.01, "grad_norm": 0.7062227129936218, "learning_rate": 0.00014726173435076356, "loss": 2.8784, "step": 41016 }, { "epoch": 2.01, "grad_norm": 0.6593536138534546, "learning_rate": 0.00014724848318815333, "loss": 3.0108, "step": 41017 }, { "epoch": 2.01, "grad_norm": 0.6188623309135437, "learning_rate": 0.0001472352324278613, "loss": 2.8337, "step": 41018 }, { "epoch": 2.01, "grad_norm": 0.6538429260253906, "learning_rate": 0.00014722198206992232, "loss": 2.7436, "step": 41019 }, { "epoch": 2.01, "grad_norm": 0.7170898914337158, "learning_rate": 0.00014720873211437107, "loss": 2.916, "step": 41020 }, { "epoch": 2.01, "grad_norm": 0.6724970936775208, "learning_rate": 0.00014719548256124276, "loss": 2.9541, "step": 41021 }, { "epoch": 2.01, "grad_norm": 0.6659535765647888, "learning_rate": 0.00014718223341057203, "loss": 3.0944, "step": 41022 }, { "epoch": 2.01, "grad_norm": 0.6524555683135986, "learning_rate": 0.00014716898466239384, "loss": 3.1875, "step": 41023 }, { "epoch": 2.01, "grad_norm": 0.6858758926391602, "learning_rate": 0.00014715573631674328, "loss": 2.8407, "step": 41024 }, { "epoch": 2.01, "grad_norm": 0.64168781042099, "learning_rate": 0.0001471424883736551, "loss": 2.8617, "step": 41025 }, { "epoch": 2.01, "grad_norm": 0.6399775743484497, "learning_rate": 0.00014712924083316417, "loss": 3.0222, "step": 41026 }, { "epoch": 2.01, "grad_norm": 0.6765655875205994, "learning_rate": 0.0001471159936953053, "loss": 2.8945, "step": 41027 }, { "epoch": 2.01, "grad_norm": 0.6615230441093445, "learning_rate": 0.00014710274696011347, "loss": 2.7149, "step": 41028 }, { "epoch": 2.01, "grad_norm": 0.726775586605072, "learning_rate": 0.00014708950062762368, "loss": 2.961, "step": 41029 }, { "epoch": 2.01, "grad_norm": 0.7074599862098694, "learning_rate": 0.00014707625469787065, "loss": 3.0504, "step": 41030 }, { "epoch": 2.01, "grad_norm": 0.6598540544509888, "learning_rate": 0.0001470630091708894, "loss": 2.9812, "step": 41031 }, { "epoch": 2.01, "grad_norm": 0.6536500453948975, "learning_rate": 0.00014704976404671466, "loss": 3.0781, "step": 41032 }, { "epoch": 2.01, "grad_norm": 0.7111579775810242, "learning_rate": 0.00014703651932538152, "loss": 3.1258, "step": 41033 }, { "epoch": 2.01, "grad_norm": 0.6571623682975769, "learning_rate": 0.00014702327500692474, "loss": 3.0178, "step": 41034 }, { "epoch": 2.01, "grad_norm": 0.6660772562026978, "learning_rate": 0.00014701003109137908, "loss": 2.8174, "step": 41035 }, { "epoch": 2.01, "grad_norm": 0.6512477993965149, "learning_rate": 0.0001469967875787797, "loss": 3.073, "step": 41036 }, { "epoch": 2.01, "grad_norm": 0.6898970603942871, "learning_rate": 0.00014698354446916124, "loss": 2.8279, "step": 41037 }, { "epoch": 2.01, "grad_norm": 0.6960706114768982, "learning_rate": 0.00014697030176255864, "loss": 3.0873, "step": 41038 }, { "epoch": 2.01, "grad_norm": 0.6520676612854004, "learning_rate": 0.00014695705945900691, "loss": 3.0638, "step": 41039 }, { "epoch": 2.01, "grad_norm": 0.716157853603363, "learning_rate": 0.00014694381755854088, "loss": 2.9452, "step": 41040 }, { "epoch": 2.01, "grad_norm": 0.7131884098052979, "learning_rate": 0.00014693057606119533, "loss": 3.0743, "step": 41041 }, { "epoch": 2.01, "grad_norm": 0.6574112772941589, "learning_rate": 0.0001469173349670051, "loss": 2.943, "step": 41042 }, { "epoch": 2.01, "grad_norm": 0.6817065477371216, "learning_rate": 0.00014690409427600516, "loss": 2.8438, "step": 41043 }, { "epoch": 2.01, "grad_norm": 0.6450731754302979, "learning_rate": 0.00014689085398823047, "loss": 3.0053, "step": 41044 }, { "epoch": 2.01, "grad_norm": 0.6768467426300049, "learning_rate": 0.0001468776141037157, "loss": 3.0556, "step": 41045 }, { "epoch": 2.01, "grad_norm": 0.6716314554214478, "learning_rate": 0.00014686437462249592, "loss": 2.6741, "step": 41046 }, { "epoch": 2.01, "grad_norm": 0.6731881499290466, "learning_rate": 0.0001468511355446059, "loss": 2.8936, "step": 41047 }, { "epoch": 2.01, "grad_norm": 0.705536961555481, "learning_rate": 0.00014683789687008043, "loss": 2.8119, "step": 41048 }, { "epoch": 2.01, "grad_norm": 0.6849344372749329, "learning_rate": 0.00014682465859895458, "loss": 3.0863, "step": 41049 }, { "epoch": 2.01, "grad_norm": 0.7398621439933777, "learning_rate": 0.00014681142073126295, "loss": 2.9127, "step": 41050 }, { "epoch": 2.01, "grad_norm": 0.6680430769920349, "learning_rate": 0.00014679818326704072, "loss": 2.9321, "step": 41051 }, { "epoch": 2.01, "grad_norm": 0.6883407235145569, "learning_rate": 0.00014678494620632242, "loss": 2.9762, "step": 41052 }, { "epoch": 2.01, "grad_norm": 0.6741495132446289, "learning_rate": 0.00014677170954914312, "loss": 2.9253, "step": 41053 }, { "epoch": 2.01, "grad_norm": 0.6511622071266174, "learning_rate": 0.00014675847329553772, "loss": 3.0779, "step": 41054 }, { "epoch": 2.01, "grad_norm": 0.6725462079048157, "learning_rate": 0.00014674523744554109, "loss": 2.8217, "step": 41055 }, { "epoch": 2.01, "grad_norm": 0.67629075050354, "learning_rate": 0.00014673200199918796, "loss": 2.9472, "step": 41056 }, { "epoch": 2.01, "grad_norm": 0.6784157752990723, "learning_rate": 0.0001467187669565131, "loss": 2.8195, "step": 41057 }, { "epoch": 2.01, "grad_norm": 0.6392921209335327, "learning_rate": 0.00014670553231755165, "loss": 3.0216, "step": 41058 }, { "epoch": 2.01, "grad_norm": 0.6643005013465881, "learning_rate": 0.00014669229808233822, "loss": 3.0051, "step": 41059 }, { "epoch": 2.01, "grad_norm": 0.617713987827301, "learning_rate": 0.00014667906425090775, "loss": 3.0391, "step": 41060 }, { "epoch": 2.01, "grad_norm": 0.6412746906280518, "learning_rate": 0.00014666583082329526, "loss": 2.7262, "step": 41061 }, { "epoch": 2.01, "grad_norm": 0.6644930243492126, "learning_rate": 0.00014665259779953545, "loss": 2.8943, "step": 41062 }, { "epoch": 2.01, "grad_norm": 0.67530357837677, "learning_rate": 0.00014663936517966315, "loss": 2.7935, "step": 41063 }, { "epoch": 2.01, "grad_norm": 0.7180463075637817, "learning_rate": 0.00014662613296371316, "loss": 2.7565, "step": 41064 }, { "epoch": 2.01, "grad_norm": 0.6856942176818848, "learning_rate": 0.00014661290115172045, "loss": 2.9684, "step": 41065 }, { "epoch": 2.01, "grad_norm": 0.672465443611145, "learning_rate": 0.00014659966974371993, "loss": 2.8019, "step": 41066 }, { "epoch": 2.01, "grad_norm": 0.6677531003952026, "learning_rate": 0.00014658643873974623, "loss": 2.8741, "step": 41067 }, { "epoch": 2.01, "grad_norm": 0.6654563546180725, "learning_rate": 0.00014657320813983446, "loss": 2.8794, "step": 41068 }, { "epoch": 2.01, "grad_norm": 0.6987980604171753, "learning_rate": 0.00014655997794401932, "loss": 2.9344, "step": 41069 }, { "epoch": 2.01, "grad_norm": 0.6206268668174744, "learning_rate": 0.00014654674815233555, "loss": 2.898, "step": 41070 }, { "epoch": 2.01, "grad_norm": 0.6681779026985168, "learning_rate": 0.00014653351876481823, "loss": 2.8274, "step": 41071 }, { "epoch": 2.01, "grad_norm": 0.6701809167861938, "learning_rate": 0.00014652028978150196, "loss": 2.7613, "step": 41072 }, { "epoch": 2.01, "grad_norm": 0.6777552962303162, "learning_rate": 0.00014650706120242186, "loss": 2.967, "step": 41073 }, { "epoch": 2.01, "grad_norm": 0.7489084601402283, "learning_rate": 0.00014649383302761247, "loss": 3.0146, "step": 41074 }, { "epoch": 2.01, "grad_norm": 0.6484798192977905, "learning_rate": 0.0001464806052571088, "loss": 3.095, "step": 41075 }, { "epoch": 2.01, "grad_norm": 0.716480553150177, "learning_rate": 0.00014646737789094578, "loss": 2.8885, "step": 41076 }, { "epoch": 2.01, "grad_norm": 0.6795092821121216, "learning_rate": 0.00014645415092915813, "loss": 2.5886, "step": 41077 }, { "epoch": 2.01, "grad_norm": 0.7201026678085327, "learning_rate": 0.00014644092437178074, "loss": 2.8806, "step": 41078 }, { "epoch": 2.01, "grad_norm": 0.6745376586914062, "learning_rate": 0.00014642769821884824, "loss": 3.0144, "step": 41079 }, { "epoch": 2.01, "grad_norm": 0.7620112895965576, "learning_rate": 0.0001464144724703956, "loss": 2.797, "step": 41080 }, { "epoch": 2.01, "grad_norm": 0.6425158381462097, "learning_rate": 0.00014640124712645786, "loss": 2.6399, "step": 41081 }, { "epoch": 2.01, "grad_norm": 0.6472249031066895, "learning_rate": 0.00014638802218706955, "loss": 3.0311, "step": 41082 }, { "epoch": 2.01, "grad_norm": 0.6940520405769348, "learning_rate": 0.00014637479765226572, "loss": 2.9101, "step": 41083 }, { "epoch": 2.01, "grad_norm": 0.6446316242218018, "learning_rate": 0.00014636157352208113, "loss": 2.8199, "step": 41084 }, { "epoch": 2.01, "grad_norm": 0.662503719329834, "learning_rate": 0.00014634834979655042, "loss": 3.034, "step": 41085 }, { "epoch": 2.01, "grad_norm": 0.6603543758392334, "learning_rate": 0.00014633512647570877, "loss": 2.9694, "step": 41086 }, { "epoch": 2.01, "grad_norm": 0.6480185389518738, "learning_rate": 0.00014632190355959066, "loss": 3.0205, "step": 41087 }, { "epoch": 2.01, "grad_norm": 0.681262195110321, "learning_rate": 0.00014630868104823118, "loss": 3.0011, "step": 41088 }, { "epoch": 2.01, "grad_norm": 0.6547122001647949, "learning_rate": 0.000146295458941665, "loss": 2.9701, "step": 41089 }, { "epoch": 2.01, "grad_norm": 0.6928819417953491, "learning_rate": 0.00014628223723992694, "loss": 2.9301, "step": 41090 }, { "epoch": 2.01, "grad_norm": 0.6732745170593262, "learning_rate": 0.00014626901594305203, "loss": 2.7723, "step": 41091 }, { "epoch": 2.01, "grad_norm": 0.6303524971008301, "learning_rate": 0.00014625579505107497, "loss": 2.7936, "step": 41092 }, { "epoch": 2.01, "grad_norm": 0.675657331943512, "learning_rate": 0.00014624257456403054, "loss": 2.9224, "step": 41093 }, { "epoch": 2.01, "grad_norm": 0.7712878584861755, "learning_rate": 0.00014622935448195345, "loss": 2.9784, "step": 41094 }, { "epoch": 2.01, "grad_norm": 0.6558848023414612, "learning_rate": 0.00014621613480487864, "loss": 2.9069, "step": 41095 }, { "epoch": 2.01, "grad_norm": 0.7321421504020691, "learning_rate": 0.00014620291553284106, "loss": 2.8179, "step": 41096 }, { "epoch": 2.01, "grad_norm": 0.6873998641967773, "learning_rate": 0.0001461896966658753, "loss": 2.9708, "step": 41097 }, { "epoch": 2.01, "grad_norm": 0.7613605856895447, "learning_rate": 0.00014617647820401637, "loss": 3.1689, "step": 41098 }, { "epoch": 2.01, "grad_norm": 0.6851860284805298, "learning_rate": 0.000146163260147299, "loss": 3.0292, "step": 41099 }, { "epoch": 2.01, "grad_norm": 0.6968518495559692, "learning_rate": 0.0001461500424957579, "loss": 2.7074, "step": 41100 }, { "epoch": 2.01, "grad_norm": 0.6626551151275635, "learning_rate": 0.00014613682524942804, "loss": 2.8939, "step": 41101 }, { "epoch": 2.01, "grad_norm": 0.6604430079460144, "learning_rate": 0.00014612360840834411, "loss": 2.9987, "step": 41102 }, { "epoch": 2.01, "grad_norm": 0.711830735206604, "learning_rate": 0.00014611039197254106, "loss": 3.0629, "step": 41103 }, { "epoch": 2.01, "grad_norm": 0.6499634385108948, "learning_rate": 0.00014609717594205355, "loss": 2.7851, "step": 41104 }, { "epoch": 2.01, "grad_norm": 0.6638641953468323, "learning_rate": 0.00014608396031691657, "loss": 2.8837, "step": 41105 }, { "epoch": 2.01, "grad_norm": 0.6749254465103149, "learning_rate": 0.00014607074509716478, "loss": 3.0239, "step": 41106 }, { "epoch": 2.01, "grad_norm": 0.6623033285140991, "learning_rate": 0.00014605753028283292, "loss": 2.9147, "step": 41107 }, { "epoch": 2.01, "grad_norm": 0.7307236790657043, "learning_rate": 0.00014604431587395602, "loss": 2.833, "step": 41108 }, { "epoch": 2.01, "grad_norm": 0.6709109544754028, "learning_rate": 0.00014603110187056865, "loss": 2.9349, "step": 41109 }, { "epoch": 2.01, "grad_norm": 0.6868018507957458, "learning_rate": 0.00014601788827270574, "loss": 3.0805, "step": 41110 }, { "epoch": 2.01, "grad_norm": 0.6887187361717224, "learning_rate": 0.00014600467508040218, "loss": 2.9418, "step": 41111 }, { "epoch": 2.01, "grad_norm": 0.6684630513191223, "learning_rate": 0.00014599146229369252, "loss": 2.9893, "step": 41112 }, { "epoch": 2.01, "grad_norm": 0.6830692291259766, "learning_rate": 0.00014597824991261185, "loss": 2.9589, "step": 41113 }, { "epoch": 2.01, "grad_norm": 0.6974941492080688, "learning_rate": 0.00014596503793719482, "loss": 2.9326, "step": 41114 }, { "epoch": 2.01, "grad_norm": 0.638469934463501, "learning_rate": 0.00014595182636747614, "loss": 2.9861, "step": 41115 }, { "epoch": 2.01, "grad_norm": 0.6490311026573181, "learning_rate": 0.0001459386152034908, "loss": 3.0936, "step": 41116 }, { "epoch": 2.02, "grad_norm": 0.6784543991088867, "learning_rate": 0.0001459254044452734, "loss": 3.0954, "step": 41117 }, { "epoch": 2.02, "grad_norm": 0.6834042072296143, "learning_rate": 0.00014591219409285894, "loss": 2.914, "step": 41118 }, { "epoch": 2.02, "grad_norm": 0.6517180800437927, "learning_rate": 0.00014589898414628198, "loss": 3.13, "step": 41119 }, { "epoch": 2.02, "grad_norm": 0.6616392731666565, "learning_rate": 0.00014588577460557755, "loss": 3.1142, "step": 41120 }, { "epoch": 2.02, "grad_norm": 0.6413401365280151, "learning_rate": 0.00014587256547078035, "loss": 3.092, "step": 41121 }, { "epoch": 2.02, "grad_norm": 0.6451615691184998, "learning_rate": 0.00014585935674192504, "loss": 2.8525, "step": 41122 }, { "epoch": 2.02, "grad_norm": 0.6675742268562317, "learning_rate": 0.0001458461484190466, "loss": 3.0727, "step": 41123 }, { "epoch": 2.02, "grad_norm": 0.6669958829879761, "learning_rate": 0.00014583294050217967, "loss": 2.8502, "step": 41124 }, { "epoch": 2.02, "grad_norm": 0.6869798302650452, "learning_rate": 0.0001458197329913591, "loss": 2.9034, "step": 41125 }, { "epoch": 2.02, "grad_norm": 0.6724489331245422, "learning_rate": 0.00014580652588661974, "loss": 2.9271, "step": 41126 }, { "epoch": 2.02, "grad_norm": 0.6963558793067932, "learning_rate": 0.00014579331918799634, "loss": 2.8939, "step": 41127 }, { "epoch": 2.02, "grad_norm": 0.6525717973709106, "learning_rate": 0.0001457801128955237, "loss": 2.8894, "step": 41128 }, { "epoch": 2.02, "grad_norm": 0.6995739936828613, "learning_rate": 0.0001457669070092364, "loss": 2.807, "step": 41129 }, { "epoch": 2.02, "grad_norm": 0.6798244714736938, "learning_rate": 0.0001457537015291694, "loss": 3.1188, "step": 41130 }, { "epoch": 2.02, "grad_norm": 0.6413742303848267, "learning_rate": 0.0001457404964553576, "loss": 2.8515, "step": 41131 }, { "epoch": 2.02, "grad_norm": 0.68084317445755, "learning_rate": 0.0001457272917878355, "loss": 2.7809, "step": 41132 }, { "epoch": 2.02, "grad_norm": 0.6480937004089355, "learning_rate": 0.00014571408752663817, "loss": 2.7539, "step": 41133 }, { "epoch": 2.02, "grad_norm": 0.6867110729217529, "learning_rate": 0.00014570088367180006, "loss": 3.0271, "step": 41134 }, { "epoch": 2.02, "grad_norm": 0.7035594582557678, "learning_rate": 0.00014568768022335632, "loss": 3.1673, "step": 41135 }, { "epoch": 2.02, "grad_norm": 0.6588776707649231, "learning_rate": 0.00014567447718134147, "loss": 2.9375, "step": 41136 }, { "epoch": 2.02, "grad_norm": 0.6488538980484009, "learning_rate": 0.00014566127454579026, "loss": 2.9297, "step": 41137 }, { "epoch": 2.02, "grad_norm": 0.6975100040435791, "learning_rate": 0.00014564807231673768, "loss": 3.0464, "step": 41138 }, { "epoch": 2.02, "grad_norm": 0.6509141325950623, "learning_rate": 0.00014563487049421825, "loss": 2.8602, "step": 41139 }, { "epoch": 2.02, "grad_norm": 0.6687818169593811, "learning_rate": 0.00014562166907826696, "loss": 2.992, "step": 41140 }, { "epoch": 2.02, "grad_norm": 0.6703136563301086, "learning_rate": 0.00014560846806891837, "loss": 2.8122, "step": 41141 }, { "epoch": 2.02, "grad_norm": 0.7211496829986572, "learning_rate": 0.00014559526746620748, "loss": 2.7898, "step": 41142 }, { "epoch": 2.02, "grad_norm": 0.6537548303604126, "learning_rate": 0.00014558206727016896, "loss": 2.7898, "step": 41143 }, { "epoch": 2.02, "grad_norm": 0.6834998726844788, "learning_rate": 0.00014556886748083743, "loss": 2.9153, "step": 41144 }, { "epoch": 2.02, "grad_norm": 0.6743541359901428, "learning_rate": 0.00014555566809824788, "loss": 2.9644, "step": 41145 }, { "epoch": 2.02, "grad_norm": 0.6714619994163513, "learning_rate": 0.00014554246912243488, "loss": 2.9662, "step": 41146 }, { "epoch": 2.02, "grad_norm": 0.6737746596336365, "learning_rate": 0.0001455292705534333, "loss": 2.7696, "step": 41147 }, { "epoch": 2.02, "grad_norm": 0.654802143573761, "learning_rate": 0.00014551607239127797, "loss": 2.8942, "step": 41148 }, { "epoch": 2.02, "grad_norm": 0.6912543177604675, "learning_rate": 0.0001455028746360036, "loss": 3.193, "step": 41149 }, { "epoch": 2.02, "grad_norm": 0.6937560439109802, "learning_rate": 0.00014548967728764492, "loss": 2.8186, "step": 41150 }, { "epoch": 2.02, "grad_norm": 0.6445648074150085, "learning_rate": 0.00014547648034623657, "loss": 2.8501, "step": 41151 }, { "epoch": 2.02, "grad_norm": 0.7112509608268738, "learning_rate": 0.00014546328381181346, "loss": 2.6133, "step": 41152 }, { "epoch": 2.02, "grad_norm": 0.6472128629684448, "learning_rate": 0.00014545008768441042, "loss": 2.8535, "step": 41153 }, { "epoch": 2.02, "grad_norm": 0.6357482075691223, "learning_rate": 0.00014543689196406197, "loss": 2.9497, "step": 41154 }, { "epoch": 2.02, "grad_norm": 0.6605250239372253, "learning_rate": 0.00014542369665080315, "loss": 2.8173, "step": 41155 }, { "epoch": 2.02, "grad_norm": 0.6524414420127869, "learning_rate": 0.00014541050174466844, "loss": 3.0099, "step": 41156 }, { "epoch": 2.02, "grad_norm": 0.6539895534515381, "learning_rate": 0.0001453973072456928, "loss": 3.0537, "step": 41157 }, { "epoch": 2.02, "grad_norm": 0.6876203417778015, "learning_rate": 0.00014538411315391094, "loss": 2.9975, "step": 41158 }, { "epoch": 2.02, "grad_norm": 0.6869372725486755, "learning_rate": 0.00014537091946935742, "loss": 3.1229, "step": 41159 }, { "epoch": 2.02, "grad_norm": 0.6883721947669983, "learning_rate": 0.0001453577261920673, "loss": 2.8087, "step": 41160 }, { "epoch": 2.02, "grad_norm": 0.6697179675102234, "learning_rate": 0.000145344533322075, "loss": 2.9453, "step": 41161 }, { "epoch": 2.02, "grad_norm": 0.6436740159988403, "learning_rate": 0.00014533134085941546, "loss": 3.0862, "step": 41162 }, { "epoch": 2.02, "grad_norm": 0.6619797945022583, "learning_rate": 0.0001453181488041235, "loss": 3.0743, "step": 41163 }, { "epoch": 2.02, "grad_norm": 0.6886985898017883, "learning_rate": 0.0001453049571562338, "loss": 2.8477, "step": 41164 }, { "epoch": 2.02, "grad_norm": 0.6293699145317078, "learning_rate": 0.00014529176591578105, "loss": 2.8921, "step": 41165 }, { "epoch": 2.02, "grad_norm": 0.7019802331924438, "learning_rate": 0.00014527857508279992, "loss": 2.9149, "step": 41166 }, { "epoch": 2.02, "grad_norm": 0.6821876168251038, "learning_rate": 0.00014526538465732522, "loss": 2.7066, "step": 41167 }, { "epoch": 2.02, "grad_norm": 0.7083041071891785, "learning_rate": 0.00014525219463939183, "loss": 2.9684, "step": 41168 }, { "epoch": 2.02, "grad_norm": 0.7001522183418274, "learning_rate": 0.0001452390050290343, "loss": 2.9329, "step": 41169 }, { "epoch": 2.02, "grad_norm": 0.7127074003219604, "learning_rate": 0.00014522581582628754, "loss": 2.958, "step": 41170 }, { "epoch": 2.02, "grad_norm": 0.6899229884147644, "learning_rate": 0.00014521262703118617, "loss": 2.9127, "step": 41171 }, { "epoch": 2.02, "grad_norm": 0.6654265522956848, "learning_rate": 0.00014519943864376486, "loss": 2.997, "step": 41172 }, { "epoch": 2.02, "grad_norm": 0.670941174030304, "learning_rate": 0.00014518625066405852, "loss": 2.6803, "step": 41173 }, { "epoch": 2.02, "grad_norm": 0.6345453858375549, "learning_rate": 0.00014517306309210176, "loss": 3.1015, "step": 41174 }, { "epoch": 2.02, "grad_norm": 0.6700384616851807, "learning_rate": 0.0001451598759279294, "loss": 3.0479, "step": 41175 }, { "epoch": 2.02, "grad_norm": 0.686065673828125, "learning_rate": 0.00014514668917157603, "loss": 2.9507, "step": 41176 }, { "epoch": 2.02, "grad_norm": 0.6618055701255798, "learning_rate": 0.00014513350282307648, "loss": 2.918, "step": 41177 }, { "epoch": 2.02, "grad_norm": 0.7136662006378174, "learning_rate": 0.00014512031688246563, "loss": 3.132, "step": 41178 }, { "epoch": 2.02, "grad_norm": 0.6863377690315247, "learning_rate": 0.00014510713134977804, "loss": 2.8091, "step": 41179 }, { "epoch": 2.02, "grad_norm": 0.6750915050506592, "learning_rate": 0.00014509394622504843, "loss": 2.9447, "step": 41180 }, { "epoch": 2.02, "grad_norm": 0.6594275832176208, "learning_rate": 0.00014508076150831145, "loss": 3.0114, "step": 41181 }, { "epoch": 2.02, "grad_norm": 0.6762315630912781, "learning_rate": 0.00014506757719960192, "loss": 2.8729, "step": 41182 }, { "epoch": 2.02, "grad_norm": 0.7663244605064392, "learning_rate": 0.0001450543932989547, "loss": 2.8163, "step": 41183 }, { "epoch": 2.02, "grad_norm": 0.7097091674804688, "learning_rate": 0.0001450412098064043, "loss": 2.8352, "step": 41184 }, { "epoch": 2.02, "grad_norm": 0.7125594615936279, "learning_rate": 0.0001450280267219856, "loss": 3.0027, "step": 41185 }, { "epoch": 2.02, "grad_norm": 0.6626073718070984, "learning_rate": 0.00014501484404573327, "loss": 2.8722, "step": 41186 }, { "epoch": 2.02, "grad_norm": 0.6956759691238403, "learning_rate": 0.0001450016617776819, "loss": 2.7935, "step": 41187 }, { "epoch": 2.02, "grad_norm": 0.638950765132904, "learning_rate": 0.00014498847991786644, "loss": 2.8794, "step": 41188 }, { "epoch": 2.02, "grad_norm": 0.6870553493499756, "learning_rate": 0.00014497529846632136, "loss": 3.0417, "step": 41189 }, { "epoch": 2.02, "grad_norm": 0.681865394115448, "learning_rate": 0.00014496211742308165, "loss": 2.9795, "step": 41190 }, { "epoch": 2.02, "grad_norm": 0.6893475651741028, "learning_rate": 0.00014494893678818174, "loss": 2.7124, "step": 41191 }, { "epoch": 2.02, "grad_norm": 0.6525934338569641, "learning_rate": 0.00014493575656165654, "loss": 2.8924, "step": 41192 }, { "epoch": 2.02, "grad_norm": 0.7462884783744812, "learning_rate": 0.0001449225767435409, "loss": 2.8665, "step": 41193 }, { "epoch": 2.02, "grad_norm": 0.662916898727417, "learning_rate": 0.00014490939733386915, "loss": 3.0731, "step": 41194 }, { "epoch": 2.02, "grad_norm": 0.639997661113739, "learning_rate": 0.00014489621833267628, "loss": 3.1213, "step": 41195 }, { "epoch": 2.02, "grad_norm": 0.6598368883132935, "learning_rate": 0.00014488303973999683, "loss": 2.8662, "step": 41196 }, { "epoch": 2.02, "grad_norm": 0.6675333380699158, "learning_rate": 0.0001448698615558656, "loss": 2.9091, "step": 41197 }, { "epoch": 2.02, "grad_norm": 0.6575494408607483, "learning_rate": 0.0001448566837803174, "loss": 3.143, "step": 41198 }, { "epoch": 2.02, "grad_norm": 0.7101870775222778, "learning_rate": 0.00014484350641338672, "loss": 2.8739, "step": 41199 }, { "epoch": 2.02, "grad_norm": 0.7055264711380005, "learning_rate": 0.00014483032945510852, "loss": 3.0787, "step": 41200 }, { "epoch": 2.02, "grad_norm": 0.6996819376945496, "learning_rate": 0.00014481715290551734, "loss": 3.099, "step": 41201 }, { "epoch": 2.02, "grad_norm": 0.665325939655304, "learning_rate": 0.0001448039767646478, "loss": 2.913, "step": 41202 }, { "epoch": 2.02, "grad_norm": 0.6452702879905701, "learning_rate": 0.00014479080103253487, "loss": 2.9902, "step": 41203 }, { "epoch": 2.02, "grad_norm": 0.6679407358169556, "learning_rate": 0.00014477762570921294, "loss": 3.0373, "step": 41204 }, { "epoch": 2.02, "grad_norm": 0.6822876930236816, "learning_rate": 0.000144764450794717, "loss": 2.9035, "step": 41205 }, { "epoch": 2.02, "grad_norm": 0.6947330236434937, "learning_rate": 0.00014475127628908152, "loss": 3.0485, "step": 41206 }, { "epoch": 2.02, "grad_norm": 0.6485691070556641, "learning_rate": 0.00014473810219234142, "loss": 2.6635, "step": 41207 }, { "epoch": 2.02, "grad_norm": 0.6676596999168396, "learning_rate": 0.00014472492850453126, "loss": 2.921, "step": 41208 }, { "epoch": 2.02, "grad_norm": 0.676518440246582, "learning_rate": 0.00014471175522568568, "loss": 3.0537, "step": 41209 }, { "epoch": 2.02, "grad_norm": 0.6761108040809631, "learning_rate": 0.0001446985823558395, "loss": 2.8734, "step": 41210 }, { "epoch": 2.02, "grad_norm": 0.6406627893447876, "learning_rate": 0.0001446854098950273, "loss": 3.074, "step": 41211 }, { "epoch": 2.02, "grad_norm": 0.6818718314170837, "learning_rate": 0.00014467223784328382, "loss": 3.0591, "step": 41212 }, { "epoch": 2.02, "grad_norm": 0.6850602030754089, "learning_rate": 0.00014465906620064387, "loss": 3.0959, "step": 41213 }, { "epoch": 2.02, "grad_norm": 0.6739491820335388, "learning_rate": 0.00014464589496714198, "loss": 2.9633, "step": 41214 }, { "epoch": 2.02, "grad_norm": 0.7058461308479309, "learning_rate": 0.00014463272414281297, "loss": 2.8375, "step": 41215 }, { "epoch": 2.02, "grad_norm": 0.6772422790527344, "learning_rate": 0.00014461955372769149, "loss": 2.969, "step": 41216 }, { "epoch": 2.02, "grad_norm": 0.64620441198349, "learning_rate": 0.00014460638372181217, "loss": 2.8003, "step": 41217 }, { "epoch": 2.02, "grad_norm": 0.6821702718734741, "learning_rate": 0.00014459321412520963, "loss": 2.873, "step": 41218 }, { "epoch": 2.02, "grad_norm": 0.6443877220153809, "learning_rate": 0.00014458004493791868, "loss": 2.7943, "step": 41219 }, { "epoch": 2.02, "grad_norm": 0.651509165763855, "learning_rate": 0.00014456687615997408, "loss": 2.7859, "step": 41220 }, { "epoch": 2.02, "grad_norm": 0.7200280427932739, "learning_rate": 0.0001445537077914103, "loss": 2.7788, "step": 41221 }, { "epoch": 2.02, "grad_norm": 0.71668940782547, "learning_rate": 0.00014454053983226225, "loss": 2.7504, "step": 41222 }, { "epoch": 2.02, "grad_norm": 0.6477024555206299, "learning_rate": 0.00014452737228256448, "loss": 2.8924, "step": 41223 }, { "epoch": 2.02, "grad_norm": 0.6733539700508118, "learning_rate": 0.00014451420514235158, "loss": 2.9713, "step": 41224 }, { "epoch": 2.02, "grad_norm": 0.6498569250106812, "learning_rate": 0.0001445010384116585, "loss": 2.7993, "step": 41225 }, { "epoch": 2.02, "grad_norm": 0.8216848373413086, "learning_rate": 0.00014448787209051962, "loss": 2.8396, "step": 41226 }, { "epoch": 2.02, "grad_norm": 0.7104951739311218, "learning_rate": 0.00014447470617896986, "loss": 2.8338, "step": 41227 }, { "epoch": 2.02, "grad_norm": 0.6697823405265808, "learning_rate": 0.00014446154067704366, "loss": 2.9435, "step": 41228 }, { "epoch": 2.02, "grad_norm": 0.6788064241409302, "learning_rate": 0.00014444837558477596, "loss": 2.8841, "step": 41229 }, { "epoch": 2.02, "grad_norm": 0.6955843567848206, "learning_rate": 0.0001444352109022013, "loss": 2.8783, "step": 41230 }, { "epoch": 2.02, "grad_norm": 0.6401929259300232, "learning_rate": 0.00014442204662935425, "loss": 2.9695, "step": 41231 }, { "epoch": 2.02, "grad_norm": 0.6757379174232483, "learning_rate": 0.0001444088827662697, "loss": 3.1244, "step": 41232 }, { "epoch": 2.02, "grad_norm": 0.6907119154930115, "learning_rate": 0.0001443957193129821, "loss": 2.9758, "step": 41233 }, { "epoch": 2.02, "grad_norm": 0.659367561340332, "learning_rate": 0.00014438255626952624, "loss": 2.9205, "step": 41234 }, { "epoch": 2.02, "grad_norm": 0.685149073600769, "learning_rate": 0.00014436939363593685, "loss": 3.0351, "step": 41235 }, { "epoch": 2.02, "grad_norm": 0.6743383407592773, "learning_rate": 0.00014435623141224844, "loss": 2.8732, "step": 41236 }, { "epoch": 2.02, "grad_norm": 0.7082371115684509, "learning_rate": 0.00014434306959849587, "loss": 2.9676, "step": 41237 }, { "epoch": 2.02, "grad_norm": 0.6763203144073486, "learning_rate": 0.0001443299081947137, "loss": 3.0997, "step": 41238 }, { "epoch": 2.02, "grad_norm": 0.7034807205200195, "learning_rate": 0.00014431674720093648, "loss": 3.0158, "step": 41239 }, { "epoch": 2.02, "grad_norm": 0.693721354007721, "learning_rate": 0.00014430358661719908, "loss": 2.9763, "step": 41240 }, { "epoch": 2.02, "grad_norm": 0.6822949051856995, "learning_rate": 0.000144290426443536, "loss": 2.9631, "step": 41241 }, { "epoch": 2.02, "grad_norm": 0.6607871055603027, "learning_rate": 0.00014427726667998203, "loss": 3.0862, "step": 41242 }, { "epoch": 2.02, "grad_norm": 0.6698300838470459, "learning_rate": 0.00014426410732657172, "loss": 2.8321, "step": 41243 }, { "epoch": 2.02, "grad_norm": 0.6640063524246216, "learning_rate": 0.00014425094838333986, "loss": 2.9378, "step": 41244 }, { "epoch": 2.02, "grad_norm": 0.6912232637405396, "learning_rate": 0.000144237789850321, "loss": 3.0132, "step": 41245 }, { "epoch": 2.02, "grad_norm": 0.6797363758087158, "learning_rate": 0.00014422463172754973, "loss": 2.7892, "step": 41246 }, { "epoch": 2.02, "grad_norm": 0.7383774518966675, "learning_rate": 0.0001442114740150609, "loss": 2.9162, "step": 41247 }, { "epoch": 2.02, "grad_norm": 0.7191385626792908, "learning_rate": 0.000144198316712889, "loss": 2.8664, "step": 41248 }, { "epoch": 2.02, "grad_norm": 0.733830988407135, "learning_rate": 0.0001441851598210687, "loss": 2.8591, "step": 41249 }, { "epoch": 2.02, "grad_norm": 0.6828484535217285, "learning_rate": 0.00014417200333963486, "loss": 3.1103, "step": 41250 }, { "epoch": 2.02, "grad_norm": 0.6779079437255859, "learning_rate": 0.0001441588472686219, "loss": 2.8456, "step": 41251 }, { "epoch": 2.02, "grad_norm": 0.6722716093063354, "learning_rate": 0.0001441456916080646, "loss": 2.9776, "step": 41252 }, { "epoch": 2.02, "grad_norm": 0.7105075716972351, "learning_rate": 0.00014413253635799743, "loss": 3.1086, "step": 41253 }, { "epoch": 2.02, "grad_norm": 0.6589248180389404, "learning_rate": 0.00014411938151845518, "loss": 3.064, "step": 41254 }, { "epoch": 2.02, "grad_norm": 0.6822506785392761, "learning_rate": 0.00014410622708947257, "loss": 2.9046, "step": 41255 }, { "epoch": 2.02, "grad_norm": 0.7151714563369751, "learning_rate": 0.00014409307307108405, "loss": 2.8397, "step": 41256 }, { "epoch": 2.02, "grad_norm": 0.684058427810669, "learning_rate": 0.0001440799194633245, "loss": 2.7317, "step": 41257 }, { "epoch": 2.02, "grad_norm": 0.6461659073829651, "learning_rate": 0.00014406676626622827, "loss": 3.0171, "step": 41258 }, { "epoch": 2.02, "grad_norm": 0.6536449790000916, "learning_rate": 0.00014405361347983033, "loss": 3.0362, "step": 41259 }, { "epoch": 2.02, "grad_norm": 0.6458353400230408, "learning_rate": 0.00014404046110416517, "loss": 2.7066, "step": 41260 }, { "epoch": 2.02, "grad_norm": 0.7059634923934937, "learning_rate": 0.00014402730913926724, "loss": 2.8764, "step": 41261 }, { "epoch": 2.02, "grad_norm": 0.6733324527740479, "learning_rate": 0.0001440141575851715, "loss": 2.9203, "step": 41262 }, { "epoch": 2.02, "grad_norm": 0.6528270244598389, "learning_rate": 0.0001440010064419124, "loss": 2.8455, "step": 41263 }, { "epoch": 2.02, "grad_norm": 0.6702911257743835, "learning_rate": 0.00014398785570952458, "loss": 2.8677, "step": 41264 }, { "epoch": 2.02, "grad_norm": 0.671634316444397, "learning_rate": 0.00014397470538804282, "loss": 3.0517, "step": 41265 }, { "epoch": 2.02, "grad_norm": 0.7339351177215576, "learning_rate": 0.00014396155547750166, "loss": 2.8713, "step": 41266 }, { "epoch": 2.02, "grad_norm": 0.6775936484336853, "learning_rate": 0.00014394840597793576, "loss": 2.8699, "step": 41267 }, { "epoch": 2.02, "grad_norm": 0.6924892663955688, "learning_rate": 0.0001439352568893796, "loss": 3.0556, "step": 41268 }, { "epoch": 2.02, "grad_norm": 0.6678718328475952, "learning_rate": 0.0001439221082118679, "loss": 3.0232, "step": 41269 }, { "epoch": 2.02, "grad_norm": 0.7233895063400269, "learning_rate": 0.0001439089599454355, "loss": 2.8655, "step": 41270 }, { "epoch": 2.02, "grad_norm": 0.6184765100479126, "learning_rate": 0.0001438958120901167, "loss": 3.0533, "step": 41271 }, { "epoch": 2.02, "grad_norm": 0.7089718580245972, "learning_rate": 0.00014388266464594645, "loss": 3.1122, "step": 41272 }, { "epoch": 2.02, "grad_norm": 0.6720311045646667, "learning_rate": 0.0001438695176129592, "loss": 2.9664, "step": 41273 }, { "epoch": 2.02, "grad_norm": 0.66576087474823, "learning_rate": 0.00014385637099118944, "loss": 2.8472, "step": 41274 }, { "epoch": 2.02, "grad_norm": 0.6577184200286865, "learning_rate": 0.0001438432247806721, "loss": 2.7898, "step": 41275 }, { "epoch": 2.02, "grad_norm": 0.6298528909683228, "learning_rate": 0.00014383007898144151, "loss": 2.9125, "step": 41276 }, { "epoch": 2.02, "grad_norm": 0.6644882559776306, "learning_rate": 0.0001438169335935326, "loss": 2.9001, "step": 41277 }, { "epoch": 2.02, "grad_norm": 0.6496797800064087, "learning_rate": 0.00014380378861697968, "loss": 2.8136, "step": 41278 }, { "epoch": 2.02, "grad_norm": 0.6571464538574219, "learning_rate": 0.00014379064405181752, "loss": 2.9994, "step": 41279 }, { "epoch": 2.02, "grad_norm": 0.6770309209823608, "learning_rate": 0.00014377749989808087, "loss": 2.9402, "step": 41280 }, { "epoch": 2.02, "grad_norm": 0.6546334028244019, "learning_rate": 0.0001437643561558042, "loss": 2.9614, "step": 41281 }, { "epoch": 2.02, "grad_norm": 0.6517234444618225, "learning_rate": 0.00014375121282502216, "loss": 2.8285, "step": 41282 }, { "epoch": 2.02, "grad_norm": 0.6837851405143738, "learning_rate": 0.00014373806990576924, "loss": 3.1049, "step": 41283 }, { "epoch": 2.02, "grad_norm": 0.6652602553367615, "learning_rate": 0.00014372492739808017, "loss": 2.958, "step": 41284 }, { "epoch": 2.02, "grad_norm": 0.6891589760780334, "learning_rate": 0.0001437117853019897, "loss": 3.0539, "step": 41285 }, { "epoch": 2.02, "grad_norm": 0.6521453857421875, "learning_rate": 0.00014369864361753217, "loss": 2.7239, "step": 41286 }, { "epoch": 2.02, "grad_norm": 0.642013669013977, "learning_rate": 0.00014368550234474244, "loss": 3.1218, "step": 41287 }, { "epoch": 2.02, "grad_norm": 0.6464647650718689, "learning_rate": 0.00014367236148365504, "loss": 2.8118, "step": 41288 }, { "epoch": 2.02, "grad_norm": 0.6746804714202881, "learning_rate": 0.0001436592210343044, "loss": 2.9244, "step": 41289 }, { "epoch": 2.02, "grad_norm": 0.6726866364479065, "learning_rate": 0.00014364608099672543, "loss": 2.8907, "step": 41290 }, { "epoch": 2.02, "grad_norm": 0.686701774597168, "learning_rate": 0.00014363294137095247, "loss": 2.9126, "step": 41291 }, { "epoch": 2.02, "grad_norm": 0.6961394548416138, "learning_rate": 0.00014361980215702035, "loss": 3.023, "step": 41292 }, { "epoch": 2.02, "grad_norm": 0.662632167339325, "learning_rate": 0.0001436066633549635, "loss": 2.9869, "step": 41293 }, { "epoch": 2.02, "grad_norm": 0.6637516617774963, "learning_rate": 0.00014359352496481667, "loss": 2.8747, "step": 41294 }, { "epoch": 2.02, "grad_norm": 0.6575713753700256, "learning_rate": 0.00014358038698661442, "loss": 2.8826, "step": 41295 }, { "epoch": 2.02, "grad_norm": 0.6427962779998779, "learning_rate": 0.00014356724942039118, "loss": 3.012, "step": 41296 }, { "epoch": 2.02, "grad_norm": 0.6798694729804993, "learning_rate": 0.00014355411226618184, "loss": 2.9795, "step": 41297 }, { "epoch": 2.02, "grad_norm": 0.680004894733429, "learning_rate": 0.00014354097552402077, "loss": 3.1077, "step": 41298 }, { "epoch": 2.02, "grad_norm": 0.6552472114562988, "learning_rate": 0.00014352783919394273, "loss": 2.9108, "step": 41299 }, { "epoch": 2.02, "grad_norm": 0.6746847629547119, "learning_rate": 0.00014351470327598215, "loss": 2.8963, "step": 41300 }, { "epoch": 2.02, "grad_norm": 0.6662331223487854, "learning_rate": 0.0001435015677701737, "loss": 2.7933, "step": 41301 }, { "epoch": 2.02, "grad_norm": 0.6595388650894165, "learning_rate": 0.00014348843267655219, "loss": 2.7368, "step": 41302 }, { "epoch": 2.02, "grad_norm": 0.6785330176353455, "learning_rate": 0.00014347529799515197, "loss": 2.8712, "step": 41303 }, { "epoch": 2.02, "grad_norm": 0.6657465696334839, "learning_rate": 0.00014346216372600768, "loss": 2.7114, "step": 41304 }, { "epoch": 2.02, "grad_norm": 0.6740917563438416, "learning_rate": 0.00014344902986915386, "loss": 2.8561, "step": 41305 }, { "epoch": 2.02, "grad_norm": 0.6666470170021057, "learning_rate": 0.00014343589642462512, "loss": 2.9495, "step": 41306 }, { "epoch": 2.02, "grad_norm": 0.71805739402771, "learning_rate": 0.00014342276339245622, "loss": 3.068, "step": 41307 }, { "epoch": 2.02, "grad_norm": 0.7400341033935547, "learning_rate": 0.00014340963077268153, "loss": 3.0277, "step": 41308 }, { "epoch": 2.02, "grad_norm": 0.6645949482917786, "learning_rate": 0.00014339649856533583, "loss": 2.9738, "step": 41309 }, { "epoch": 2.02, "grad_norm": 0.6728555560112, "learning_rate": 0.00014338336677045365, "loss": 3.0966, "step": 41310 }, { "epoch": 2.02, "grad_norm": 0.6942307353019714, "learning_rate": 0.0001433702353880694, "loss": 2.7839, "step": 41311 }, { "epoch": 2.02, "grad_norm": 0.705938458442688, "learning_rate": 0.0001433571044182179, "loss": 2.6229, "step": 41312 }, { "epoch": 2.02, "grad_norm": 0.6518526077270508, "learning_rate": 0.00014334397386093356, "loss": 2.8772, "step": 41313 }, { "epoch": 2.02, "grad_norm": 0.7186595797538757, "learning_rate": 0.00014333084371625116, "loss": 2.8394, "step": 41314 }, { "epoch": 2.02, "grad_norm": 0.6730000376701355, "learning_rate": 0.00014331771398420502, "loss": 2.9375, "step": 41315 }, { "epoch": 2.02, "grad_norm": 0.672404408454895, "learning_rate": 0.00014330458466482986, "loss": 2.8477, "step": 41316 }, { "epoch": 2.02, "grad_norm": 0.6947441697120667, "learning_rate": 0.00014329145575816044, "loss": 2.8878, "step": 41317 }, { "epoch": 2.02, "grad_norm": 0.7594353556632996, "learning_rate": 0.00014327832726423114, "loss": 3.0445, "step": 41318 }, { "epoch": 2.02, "grad_norm": 0.6902020573616028, "learning_rate": 0.00014326519918307653, "loss": 3.1234, "step": 41319 }, { "epoch": 2.02, "grad_norm": 0.641372799873352, "learning_rate": 0.00014325207151473114, "loss": 2.9663, "step": 41320 }, { "epoch": 2.03, "grad_norm": 0.6673992276191711, "learning_rate": 0.00014323894425922963, "loss": 2.906, "step": 41321 }, { "epoch": 2.03, "grad_norm": 0.6893587708473206, "learning_rate": 0.00014322581741660668, "loss": 2.7704, "step": 41322 }, { "epoch": 2.03, "grad_norm": 0.6781730055809021, "learning_rate": 0.00014321269098689664, "loss": 2.6757, "step": 41323 }, { "epoch": 2.03, "grad_norm": 0.6808968782424927, "learning_rate": 0.0001431995649701343, "loss": 3.0846, "step": 41324 }, { "epoch": 2.03, "grad_norm": 0.650240421295166, "learning_rate": 0.00014318643936635415, "loss": 2.9279, "step": 41325 }, { "epoch": 2.03, "grad_norm": 0.6758670210838318, "learning_rate": 0.0001431733141755906, "loss": 2.9108, "step": 41326 }, { "epoch": 2.03, "grad_norm": 0.6735976934432983, "learning_rate": 0.0001431601893978785, "loss": 3.1225, "step": 41327 }, { "epoch": 2.03, "grad_norm": 0.6825075745582581, "learning_rate": 0.00014314706503325213, "loss": 2.8253, "step": 41328 }, { "epoch": 2.03, "grad_norm": 0.6795275211334229, "learning_rate": 0.00014313394108174637, "loss": 3.0741, "step": 41329 }, { "epoch": 2.03, "grad_norm": 0.6708423495292664, "learning_rate": 0.00014312081754339545, "loss": 2.9368, "step": 41330 }, { "epoch": 2.03, "grad_norm": 0.7268308997154236, "learning_rate": 0.00014310769441823424, "loss": 2.8793, "step": 41331 }, { "epoch": 2.03, "grad_norm": 0.662429690361023, "learning_rate": 0.00014309457170629718, "loss": 2.9008, "step": 41332 }, { "epoch": 2.03, "grad_norm": 0.6782928109169006, "learning_rate": 0.00014308144940761868, "loss": 2.9867, "step": 41333 }, { "epoch": 2.03, "grad_norm": 0.6727951765060425, "learning_rate": 0.00014306832752223358, "loss": 3.1139, "step": 41334 }, { "epoch": 2.03, "grad_norm": 0.6834380626678467, "learning_rate": 0.00014305520605017617, "loss": 3.0817, "step": 41335 }, { "epoch": 2.03, "grad_norm": 0.6775288581848145, "learning_rate": 0.00014304208499148118, "loss": 3.0145, "step": 41336 }, { "epoch": 2.03, "grad_norm": 0.6631368398666382, "learning_rate": 0.00014302896434618322, "loss": 3.1274, "step": 41337 }, { "epoch": 2.03, "grad_norm": 0.65370112657547, "learning_rate": 0.00014301584411431664, "loss": 3.0237, "step": 41338 }, { "epoch": 2.03, "grad_norm": 0.6937180757522583, "learning_rate": 0.00014300272429591625, "loss": 3.0834, "step": 41339 }, { "epoch": 2.03, "grad_norm": 0.6696368455886841, "learning_rate": 0.00014298960489101642, "loss": 3.1994, "step": 41340 }, { "epoch": 2.03, "grad_norm": 0.6452273726463318, "learning_rate": 0.00014297648589965169, "loss": 2.9717, "step": 41341 }, { "epoch": 2.03, "grad_norm": 0.6574122309684753, "learning_rate": 0.0001429633673218568, "loss": 2.9185, "step": 41342 }, { "epoch": 2.03, "grad_norm": 0.6667985320091248, "learning_rate": 0.000142950249157666, "loss": 2.9371, "step": 41343 }, { "epoch": 2.03, "grad_norm": 0.6507903337478638, "learning_rate": 0.00014293713140711416, "loss": 2.9564, "step": 41344 }, { "epoch": 2.03, "grad_norm": 0.6721150875091553, "learning_rate": 0.0001429240140702356, "loss": 2.9744, "step": 41345 }, { "epoch": 2.03, "grad_norm": 0.6516289710998535, "learning_rate": 0.00014291089714706505, "loss": 2.9591, "step": 41346 }, { "epoch": 2.03, "grad_norm": 0.6478187441825867, "learning_rate": 0.00014289778063763698, "loss": 2.8334, "step": 41347 }, { "epoch": 2.03, "grad_norm": 0.6945951581001282, "learning_rate": 0.0001428846645419858, "loss": 3.1982, "step": 41348 }, { "epoch": 2.03, "grad_norm": 0.7314687967300415, "learning_rate": 0.00014287154886014628, "loss": 2.7935, "step": 41349 }, { "epoch": 2.03, "grad_norm": 0.7077362537384033, "learning_rate": 0.00014285843359215274, "loss": 2.9902, "step": 41350 }, { "epoch": 2.03, "grad_norm": 0.676292359828949, "learning_rate": 0.00014284531873803987, "loss": 3.0049, "step": 41351 }, { "epoch": 2.03, "grad_norm": 0.6695364117622375, "learning_rate": 0.00014283220429784228, "loss": 3.1166, "step": 41352 }, { "epoch": 2.03, "grad_norm": 0.7214575409889221, "learning_rate": 0.00014281909027159443, "loss": 2.924, "step": 41353 }, { "epoch": 2.03, "grad_norm": 0.6631299257278442, "learning_rate": 0.00014280597665933083, "loss": 3.1442, "step": 41354 }, { "epoch": 2.03, "grad_norm": 0.6750709414482117, "learning_rate": 0.00014279286346108593, "loss": 3.065, "step": 41355 }, { "epoch": 2.03, "grad_norm": 0.7027854323387146, "learning_rate": 0.0001427797506768944, "loss": 3.1048, "step": 41356 }, { "epoch": 2.03, "grad_norm": 0.6556305885314941, "learning_rate": 0.00014276663830679081, "loss": 2.944, "step": 41357 }, { "epoch": 2.03, "grad_norm": 0.7109807729721069, "learning_rate": 0.00014275352635080956, "loss": 2.8792, "step": 41358 }, { "epoch": 2.03, "grad_norm": 0.7511147260665894, "learning_rate": 0.00014274041480898534, "loss": 2.8699, "step": 41359 }, { "epoch": 2.03, "grad_norm": 0.665418803691864, "learning_rate": 0.0001427273036813525, "loss": 2.9801, "step": 41360 }, { "epoch": 2.03, "grad_norm": 0.7250953316688538, "learning_rate": 0.0001427141929679458, "loss": 3.0798, "step": 41361 }, { "epoch": 2.03, "grad_norm": 0.6798324584960938, "learning_rate": 0.00014270108266879964, "loss": 3.0348, "step": 41362 }, { "epoch": 2.03, "grad_norm": 0.6868711113929749, "learning_rate": 0.00014268797278394843, "loss": 2.9232, "step": 41363 }, { "epoch": 2.03, "grad_norm": 0.683142900466919, "learning_rate": 0.00014267486331342695, "loss": 2.918, "step": 41364 }, { "epoch": 2.03, "grad_norm": 0.6508060097694397, "learning_rate": 0.00014266175425726952, "loss": 2.8509, "step": 41365 }, { "epoch": 2.03, "grad_norm": 0.6772064566612244, "learning_rate": 0.0001426486456155107, "loss": 2.9454, "step": 41366 }, { "epoch": 2.03, "grad_norm": 0.6569616794586182, "learning_rate": 0.00014263553738818517, "loss": 2.8806, "step": 41367 }, { "epoch": 2.03, "grad_norm": 0.6555266380310059, "learning_rate": 0.0001426224295753274, "loss": 2.7536, "step": 41368 }, { "epoch": 2.03, "grad_norm": 0.6969777345657349, "learning_rate": 0.00014260932217697182, "loss": 3.0595, "step": 41369 }, { "epoch": 2.03, "grad_norm": 0.6618245840072632, "learning_rate": 0.00014259621519315292, "loss": 2.9002, "step": 41370 }, { "epoch": 2.03, "grad_norm": 0.6819080710411072, "learning_rate": 0.0001425831086239054, "loss": 2.9353, "step": 41371 }, { "epoch": 2.03, "grad_norm": 0.8382446765899658, "learning_rate": 0.00014257000246926352, "loss": 2.6845, "step": 41372 }, { "epoch": 2.03, "grad_norm": 0.6515592336654663, "learning_rate": 0.00014255689672926198, "loss": 3.1173, "step": 41373 }, { "epoch": 2.03, "grad_norm": 0.6922911405563354, "learning_rate": 0.0001425437914039354, "loss": 2.8563, "step": 41374 }, { "epoch": 2.03, "grad_norm": 0.6700411438941956, "learning_rate": 0.00014253068649331817, "loss": 3.0166, "step": 41375 }, { "epoch": 2.03, "grad_norm": 0.666197657585144, "learning_rate": 0.0001425175819974448, "loss": 2.7031, "step": 41376 }, { "epoch": 2.03, "grad_norm": 0.7301174402236938, "learning_rate": 0.0001425044779163497, "loss": 2.8902, "step": 41377 }, { "epoch": 2.03, "grad_norm": 0.6734825968742371, "learning_rate": 0.0001424913742500675, "loss": 3.0026, "step": 41378 }, { "epoch": 2.03, "grad_norm": 0.7192350625991821, "learning_rate": 0.0001424782709986328, "loss": 3.0602, "step": 41379 }, { "epoch": 2.03, "grad_norm": 0.7443360090255737, "learning_rate": 0.00014246516816207995, "loss": 2.7758, "step": 41380 }, { "epoch": 2.03, "grad_norm": 0.6618134379386902, "learning_rate": 0.00014245206574044358, "loss": 2.6856, "step": 41381 }, { "epoch": 2.03, "grad_norm": 0.7043996453285217, "learning_rate": 0.00014243896373375807, "loss": 2.8153, "step": 41382 }, { "epoch": 2.03, "grad_norm": 0.7315269708633423, "learning_rate": 0.00014242586214205814, "loss": 2.9694, "step": 41383 }, { "epoch": 2.03, "grad_norm": 0.6584864258766174, "learning_rate": 0.0001424127609653781, "loss": 2.7616, "step": 41384 }, { "epoch": 2.03, "grad_norm": 0.6800797581672668, "learning_rate": 0.00014239966020375246, "loss": 2.8515, "step": 41385 }, { "epoch": 2.03, "grad_norm": 0.7683088183403015, "learning_rate": 0.0001423865598572159, "loss": 2.9446, "step": 41386 }, { "epoch": 2.03, "grad_norm": 0.6943177580833435, "learning_rate": 0.00014237345992580266, "loss": 3.2236, "step": 41387 }, { "epoch": 2.03, "grad_norm": 0.7042199969291687, "learning_rate": 0.0001423603604095474, "loss": 2.973, "step": 41388 }, { "epoch": 2.03, "grad_norm": 0.6742187738418579, "learning_rate": 0.00014234726130848472, "loss": 2.6971, "step": 41389 }, { "epoch": 2.03, "grad_norm": 0.6640881299972534, "learning_rate": 0.00014233416262264903, "loss": 2.7863, "step": 41390 }, { "epoch": 2.03, "grad_norm": 0.6789493560791016, "learning_rate": 0.0001423210643520748, "loss": 2.9233, "step": 41391 }, { "epoch": 2.03, "grad_norm": 0.6761623620986938, "learning_rate": 0.00014230796649679643, "loss": 3.1005, "step": 41392 }, { "epoch": 2.03, "grad_norm": 0.6618014574050903, "learning_rate": 0.00014229486905684854, "loss": 3.047, "step": 41393 }, { "epoch": 2.03, "grad_norm": 0.7283720970153809, "learning_rate": 0.00014228177203226574, "loss": 3.1616, "step": 41394 }, { "epoch": 2.03, "grad_norm": 0.6539528965950012, "learning_rate": 0.00014226867542308226, "loss": 2.833, "step": 41395 }, { "epoch": 2.03, "grad_norm": 0.6887137293815613, "learning_rate": 0.00014225557922933286, "loss": 3.0419, "step": 41396 }, { "epoch": 2.03, "grad_norm": 0.6629752516746521, "learning_rate": 0.00014224248345105192, "loss": 2.8952, "step": 41397 }, { "epoch": 2.03, "grad_norm": 0.6697224378585815, "learning_rate": 0.00014222938808827377, "loss": 3.0586, "step": 41398 }, { "epoch": 2.03, "grad_norm": 0.6652210354804993, "learning_rate": 0.00014221629314103317, "loss": 3.0246, "step": 41399 }, { "epoch": 2.03, "grad_norm": 0.6590397953987122, "learning_rate": 0.0001422031986093644, "loss": 2.7692, "step": 41400 }, { "epoch": 2.03, "grad_norm": 0.7004410028457642, "learning_rate": 0.0001421901044933021, "loss": 2.9138, "step": 41401 }, { "epoch": 2.03, "grad_norm": 0.7048691511154175, "learning_rate": 0.00014217701079288062, "loss": 2.7807, "step": 41402 }, { "epoch": 2.03, "grad_norm": 0.7057774066925049, "learning_rate": 0.00014216391750813452, "loss": 2.8305, "step": 41403 }, { "epoch": 2.03, "grad_norm": 0.6988634467124939, "learning_rate": 0.00014215082463909837, "loss": 2.8967, "step": 41404 }, { "epoch": 2.03, "grad_norm": 0.644032895565033, "learning_rate": 0.00014213773218580658, "loss": 2.9624, "step": 41405 }, { "epoch": 2.03, "grad_norm": 0.659142792224884, "learning_rate": 0.00014212464014829362, "loss": 3.1234, "step": 41406 }, { "epoch": 2.03, "grad_norm": 0.7245345115661621, "learning_rate": 0.00014211154852659388, "loss": 3.121, "step": 41407 }, { "epoch": 2.03, "grad_norm": 0.6617531180381775, "learning_rate": 0.0001420984573207419, "loss": 2.9157, "step": 41408 }, { "epoch": 2.03, "grad_norm": 0.6753281354904175, "learning_rate": 0.00014208536653077236, "loss": 2.8922, "step": 41409 }, { "epoch": 2.03, "grad_norm": 0.7002780437469482, "learning_rate": 0.00014207227615671943, "loss": 3.0797, "step": 41410 }, { "epoch": 2.03, "grad_norm": 0.7127296328544617, "learning_rate": 0.00014205918619861785, "loss": 3.0361, "step": 41411 }, { "epoch": 2.03, "grad_norm": 0.6621199250221252, "learning_rate": 0.00014204609665650198, "loss": 2.8687, "step": 41412 }, { "epoch": 2.03, "grad_norm": 0.6912297010421753, "learning_rate": 0.00014203300753040616, "loss": 2.8506, "step": 41413 }, { "epoch": 2.03, "grad_norm": 0.6624032258987427, "learning_rate": 0.00014201991882036513, "loss": 2.7342, "step": 41414 }, { "epoch": 2.03, "grad_norm": 0.7326284646987915, "learning_rate": 0.00014200683052641307, "loss": 2.9032, "step": 41415 }, { "epoch": 2.03, "grad_norm": 0.6524210572242737, "learning_rate": 0.00014199374264858478, "loss": 3.0304, "step": 41416 }, { "epoch": 2.03, "grad_norm": 0.6589338183403015, "learning_rate": 0.00014198065518691443, "loss": 2.9891, "step": 41417 }, { "epoch": 2.03, "grad_norm": 0.7966728806495667, "learning_rate": 0.00014196756814143665, "loss": 2.8156, "step": 41418 }, { "epoch": 2.03, "grad_norm": 0.6796247363090515, "learning_rate": 0.00014195448151218607, "loss": 2.8564, "step": 41419 }, { "epoch": 2.03, "grad_norm": 0.9536327123641968, "learning_rate": 0.00014194139529919675, "loss": 3.0352, "step": 41420 }, { "epoch": 2.03, "grad_norm": 0.7161813378334045, "learning_rate": 0.0001419283095025035, "loss": 2.9973, "step": 41421 }, { "epoch": 2.03, "grad_norm": 0.7191630005836487, "learning_rate": 0.00014191522412214055, "loss": 2.8155, "step": 41422 }, { "epoch": 2.03, "grad_norm": 0.6631558537483215, "learning_rate": 0.00014190213915814246, "loss": 2.923, "step": 41423 }, { "epoch": 2.03, "grad_norm": 0.7255247235298157, "learning_rate": 0.00014188905461054384, "loss": 2.9193, "step": 41424 }, { "epoch": 2.03, "grad_norm": 0.733752965927124, "learning_rate": 0.0001418759704793789, "loss": 2.7613, "step": 41425 }, { "epoch": 2.03, "grad_norm": 0.7541595697402954, "learning_rate": 0.00014186288676468236, "loss": 2.9079, "step": 41426 }, { "epoch": 2.03, "grad_norm": 0.6659170389175415, "learning_rate": 0.00014184980346648848, "loss": 2.9627, "step": 41427 }, { "epoch": 2.03, "grad_norm": 0.6909793019294739, "learning_rate": 0.00014183672058483174, "loss": 2.79, "step": 41428 }, { "epoch": 2.03, "grad_norm": 0.6981406807899475, "learning_rate": 0.00014182363811974667, "loss": 3.0426, "step": 41429 }, { "epoch": 2.03, "grad_norm": 0.6924818754196167, "learning_rate": 0.00014181055607126764, "loss": 2.6948, "step": 41430 }, { "epoch": 2.03, "grad_norm": 0.6846038699150085, "learning_rate": 0.00014179747443942927, "loss": 2.9601, "step": 41431 }, { "epoch": 2.03, "grad_norm": 0.722434937953949, "learning_rate": 0.0001417843932242658, "loss": 2.7735, "step": 41432 }, { "epoch": 2.03, "grad_norm": 0.6762760877609253, "learning_rate": 0.0001417713124258119, "loss": 3.0094, "step": 41433 }, { "epoch": 2.03, "grad_norm": 0.7446743845939636, "learning_rate": 0.0001417582320441019, "loss": 3.1196, "step": 41434 }, { "epoch": 2.03, "grad_norm": 0.6979431509971619, "learning_rate": 0.00014174515207917014, "loss": 3.1234, "step": 41435 }, { "epoch": 2.03, "grad_norm": 0.6926884055137634, "learning_rate": 0.0001417320725310513, "loss": 3.0273, "step": 41436 }, { "epoch": 2.03, "grad_norm": 0.6868926286697388, "learning_rate": 0.00014171899339977963, "loss": 2.8642, "step": 41437 }, { "epoch": 2.03, "grad_norm": 0.6802986860275269, "learning_rate": 0.00014170591468538966, "loss": 2.9907, "step": 41438 }, { "epoch": 2.03, "grad_norm": 0.7366862893104553, "learning_rate": 0.00014169283638791597, "loss": 2.7683, "step": 41439 }, { "epoch": 2.03, "grad_norm": 0.6840645670890808, "learning_rate": 0.00014167975850739277, "loss": 3.0044, "step": 41440 }, { "epoch": 2.03, "grad_norm": 0.6888478398323059, "learning_rate": 0.00014166668104385473, "loss": 3.0112, "step": 41441 }, { "epoch": 2.03, "grad_norm": 0.6686787009239197, "learning_rate": 0.00014165360399733616, "loss": 2.9337, "step": 41442 }, { "epoch": 2.03, "grad_norm": 0.6644048690795898, "learning_rate": 0.0001416405273678714, "loss": 3.073, "step": 41443 }, { "epoch": 2.03, "grad_norm": 0.6644816994667053, "learning_rate": 0.0001416274511554952, "loss": 2.9437, "step": 41444 }, { "epoch": 2.03, "grad_norm": 0.6657946705818176, "learning_rate": 0.00014161437536024162, "loss": 2.9644, "step": 41445 }, { "epoch": 2.03, "grad_norm": 0.6964468955993652, "learning_rate": 0.00014160129998214545, "loss": 3.0717, "step": 41446 }, { "epoch": 2.03, "grad_norm": 0.7129657864570618, "learning_rate": 0.00014158822502124084, "loss": 3.0621, "step": 41447 }, { "epoch": 2.03, "grad_norm": 0.6289086937904358, "learning_rate": 0.00014157515047756247, "loss": 2.828, "step": 41448 }, { "epoch": 2.03, "grad_norm": 0.6725035905838013, "learning_rate": 0.00014156207635114467, "loss": 2.9882, "step": 41449 }, { "epoch": 2.03, "grad_norm": 0.6678059101104736, "learning_rate": 0.00014154900264202176, "loss": 2.9306, "step": 41450 }, { "epoch": 2.03, "grad_norm": 0.6416679620742798, "learning_rate": 0.0001415359293502284, "loss": 2.7726, "step": 41451 }, { "epoch": 2.03, "grad_norm": 0.6881377100944519, "learning_rate": 0.00014152285647579878, "loss": 2.6733, "step": 41452 }, { "epoch": 2.03, "grad_norm": 0.6680883765220642, "learning_rate": 0.00014150978401876755, "loss": 2.9299, "step": 41453 }, { "epoch": 2.03, "grad_norm": 0.6864650249481201, "learning_rate": 0.000141496711979169, "loss": 2.9543, "step": 41454 }, { "epoch": 2.03, "grad_norm": 0.7557768225669861, "learning_rate": 0.00014148364035703763, "loss": 3.0048, "step": 41455 }, { "epoch": 2.03, "grad_norm": 0.6464671492576599, "learning_rate": 0.0001414705691524079, "loss": 2.8257, "step": 41456 }, { "epoch": 2.03, "grad_norm": 0.6590653657913208, "learning_rate": 0.00014145749836531405, "loss": 2.8427, "step": 41457 }, { "epoch": 2.03, "grad_norm": 0.6637300252914429, "learning_rate": 0.00014144442799579075, "loss": 2.8594, "step": 41458 }, { "epoch": 2.03, "grad_norm": 0.7155690789222717, "learning_rate": 0.00014143135804387221, "loss": 2.8372, "step": 41459 }, { "epoch": 2.03, "grad_norm": 0.6667417287826538, "learning_rate": 0.00014141828850959296, "loss": 2.8662, "step": 41460 }, { "epoch": 2.03, "grad_norm": 0.7127690315246582, "learning_rate": 0.00014140521939298755, "loss": 2.9396, "step": 41461 }, { "epoch": 2.03, "grad_norm": 0.6322091817855835, "learning_rate": 0.00014139215069409013, "loss": 2.8889, "step": 41462 }, { "epoch": 2.03, "grad_norm": 0.6614790558815002, "learning_rate": 0.00014137908241293538, "loss": 2.8967, "step": 41463 }, { "epoch": 2.03, "grad_norm": 0.6741194128990173, "learning_rate": 0.0001413660145495576, "loss": 2.9764, "step": 41464 }, { "epoch": 2.03, "grad_norm": 0.6606133580207825, "learning_rate": 0.0001413529471039911, "loss": 2.7991, "step": 41465 }, { "epoch": 2.03, "grad_norm": 0.6671067476272583, "learning_rate": 0.0001413398800762705, "loss": 3.006, "step": 41466 }, { "epoch": 2.03, "grad_norm": 0.6627110242843628, "learning_rate": 0.00014132681346643, "loss": 2.884, "step": 41467 }, { "epoch": 2.03, "grad_norm": 0.7146353721618652, "learning_rate": 0.00014131374727450427, "loss": 2.889, "step": 41468 }, { "epoch": 2.03, "grad_norm": 0.6611478924751282, "learning_rate": 0.0001413006815005275, "loss": 2.8787, "step": 41469 }, { "epoch": 2.03, "grad_norm": 0.6627159118652344, "learning_rate": 0.00014128761614453424, "loss": 3.0298, "step": 41470 }, { "epoch": 2.03, "grad_norm": 0.6639991998672485, "learning_rate": 0.0001412745512065589, "loss": 2.9859, "step": 41471 }, { "epoch": 2.03, "grad_norm": 0.6625120639801025, "learning_rate": 0.0001412614866866357, "loss": 2.9387, "step": 41472 }, { "epoch": 2.03, "grad_norm": 0.6913546919822693, "learning_rate": 0.00014124842258479936, "loss": 2.8833, "step": 41473 }, { "epoch": 2.03, "grad_norm": 0.6938081979751587, "learning_rate": 0.00014123535890108398, "loss": 2.8589, "step": 41474 }, { "epoch": 2.03, "grad_norm": 0.6782478094100952, "learning_rate": 0.0001412222956355241, "loss": 3.0988, "step": 41475 }, { "epoch": 2.03, "grad_norm": 0.6729618906974792, "learning_rate": 0.00014120923278815428, "loss": 2.9759, "step": 41476 }, { "epoch": 2.03, "grad_norm": 0.670555055141449, "learning_rate": 0.00014119617035900875, "loss": 2.8674, "step": 41477 }, { "epoch": 2.03, "grad_norm": 0.7138615250587463, "learning_rate": 0.00014118310834812194, "loss": 2.9513, "step": 41478 }, { "epoch": 2.03, "grad_norm": 0.6860772967338562, "learning_rate": 0.00014117004675552817, "loss": 2.845, "step": 41479 }, { "epoch": 2.03, "grad_norm": 0.7426106333732605, "learning_rate": 0.00014115698558126195, "loss": 2.8868, "step": 41480 }, { "epoch": 2.03, "grad_norm": 0.6855692267417908, "learning_rate": 0.00014114392482535772, "loss": 2.9, "step": 41481 }, { "epoch": 2.03, "grad_norm": 0.7088014483451843, "learning_rate": 0.00014113086448784974, "loss": 2.9, "step": 41482 }, { "epoch": 2.03, "grad_norm": 0.6505365967750549, "learning_rate": 0.0001411178045687726, "loss": 3.0525, "step": 41483 }, { "epoch": 2.03, "grad_norm": 0.702044665813446, "learning_rate": 0.00014110474506816043, "loss": 2.9941, "step": 41484 }, { "epoch": 2.03, "grad_norm": 0.6758970022201538, "learning_rate": 0.00014109168598604795, "loss": 3.1789, "step": 41485 }, { "epoch": 2.03, "grad_norm": 0.6437978744506836, "learning_rate": 0.00014107862732246935, "loss": 2.6538, "step": 41486 }, { "epoch": 2.03, "grad_norm": 0.7227490544319153, "learning_rate": 0.00014106556907745896, "loss": 2.884, "step": 41487 }, { "epoch": 2.03, "grad_norm": 0.7027838826179504, "learning_rate": 0.00014105251125105139, "loss": 3.0361, "step": 41488 }, { "epoch": 2.03, "grad_norm": 0.6415818333625793, "learning_rate": 0.0001410394538432808, "loss": 2.9468, "step": 41489 }, { "epoch": 2.03, "grad_norm": 0.6897194981575012, "learning_rate": 0.0001410263968541817, "loss": 2.9501, "step": 41490 }, { "epoch": 2.03, "grad_norm": 0.6428824663162231, "learning_rate": 0.0001410133402837886, "loss": 2.9695, "step": 41491 }, { "epoch": 2.03, "grad_norm": 0.7042983770370483, "learning_rate": 0.00014100028413213572, "loss": 2.9618, "step": 41492 }, { "epoch": 2.03, "grad_norm": 0.7326884269714355, "learning_rate": 0.00014098722839925754, "loss": 3.2218, "step": 41493 }, { "epoch": 2.03, "grad_norm": 0.6782544255256653, "learning_rate": 0.00014097417308518825, "loss": 2.9167, "step": 41494 }, { "epoch": 2.03, "grad_norm": 0.6886657476425171, "learning_rate": 0.0001409611181899624, "loss": 2.8998, "step": 41495 }, { "epoch": 2.03, "grad_norm": 0.7487680912017822, "learning_rate": 0.00014094806371361447, "loss": 2.9263, "step": 41496 }, { "epoch": 2.03, "grad_norm": 0.654858410358429, "learning_rate": 0.0001409350096561786, "loss": 3.0155, "step": 41497 }, { "epoch": 2.03, "grad_norm": 0.6685404181480408, "learning_rate": 0.00014092195601768945, "loss": 3.0298, "step": 41498 }, { "epoch": 2.03, "grad_norm": 0.7062793970108032, "learning_rate": 0.0001409089027981812, "loss": 2.883, "step": 41499 }, { "epoch": 2.03, "grad_norm": 0.7158371210098267, "learning_rate": 0.00014089584999768821, "loss": 2.9065, "step": 41500 }, { "epoch": 2.03, "grad_norm": 0.7162919640541077, "learning_rate": 0.00014088279761624505, "loss": 3.1325, "step": 41501 }, { "epoch": 2.03, "grad_norm": 0.6152122616767883, "learning_rate": 0.00014086974565388587, "loss": 2.8914, "step": 41502 }, { "epoch": 2.03, "grad_norm": 0.7153500318527222, "learning_rate": 0.00014085669411064523, "loss": 3.1672, "step": 41503 }, { "epoch": 2.03, "grad_norm": 0.6505460143089294, "learning_rate": 0.00014084364298655734, "loss": 3.0139, "step": 41504 }, { "epoch": 2.03, "grad_norm": 0.7156624794006348, "learning_rate": 0.00014083059228165665, "loss": 2.918, "step": 41505 }, { "epoch": 2.03, "grad_norm": 0.657463550567627, "learning_rate": 0.00014081754199597767, "loss": 3.0337, "step": 41506 }, { "epoch": 2.03, "grad_norm": 0.6673892736434937, "learning_rate": 0.00014080449212955468, "loss": 2.8168, "step": 41507 }, { "epoch": 2.03, "grad_norm": 0.6680115461349487, "learning_rate": 0.00014079144268242197, "loss": 2.916, "step": 41508 }, { "epoch": 2.03, "grad_norm": 0.676265299320221, "learning_rate": 0.00014077839365461386, "loss": 3.0468, "step": 41509 }, { "epoch": 2.03, "grad_norm": 0.6526424288749695, "learning_rate": 0.00014076534504616484, "loss": 2.9184, "step": 41510 }, { "epoch": 2.03, "grad_norm": 0.695165753364563, "learning_rate": 0.00014075229685710935, "loss": 3.1055, "step": 41511 }, { "epoch": 2.03, "grad_norm": 0.6782224178314209, "learning_rate": 0.00014073924908748154, "loss": 2.893, "step": 41512 }, { "epoch": 2.03, "grad_norm": 0.6610283255577087, "learning_rate": 0.00014072620173731604, "loss": 3.0361, "step": 41513 }, { "epoch": 2.03, "grad_norm": 0.6884645223617554, "learning_rate": 0.00014071315480664704, "loss": 3.005, "step": 41514 }, { "epoch": 2.03, "grad_norm": 0.7342187166213989, "learning_rate": 0.00014070010829550885, "loss": 2.9996, "step": 41515 }, { "epoch": 2.03, "grad_norm": 0.6680518388748169, "learning_rate": 0.000140687062203936, "loss": 3.1241, "step": 41516 }, { "epoch": 2.03, "grad_norm": 0.6695300936698914, "learning_rate": 0.00014067401653196268, "loss": 3.0162, "step": 41517 }, { "epoch": 2.03, "grad_norm": 0.6923040747642517, "learning_rate": 0.00014066097127962346, "loss": 2.6873, "step": 41518 }, { "epoch": 2.03, "grad_norm": 0.7008634209632874, "learning_rate": 0.00014064792644695242, "loss": 2.9065, "step": 41519 }, { "epoch": 2.03, "grad_norm": 0.6829542517662048, "learning_rate": 0.0001406348820339841, "loss": 2.8814, "step": 41520 }, { "epoch": 2.03, "grad_norm": 0.6533739566802979, "learning_rate": 0.00014062183804075307, "loss": 2.969, "step": 41521 }, { "epoch": 2.03, "grad_norm": 0.6787754893302917, "learning_rate": 0.0001406087944672932, "loss": 2.8614, "step": 41522 }, { "epoch": 2.03, "grad_norm": 0.6453655958175659, "learning_rate": 0.00014059575131363918, "loss": 2.9535, "step": 41523 }, { "epoch": 2.03, "grad_norm": 0.733506441116333, "learning_rate": 0.00014058270857982523, "loss": 2.9048, "step": 41524 }, { "epoch": 2.04, "grad_norm": 0.6687877774238586, "learning_rate": 0.00014056966626588578, "loss": 2.7681, "step": 41525 }, { "epoch": 2.04, "grad_norm": 0.6705677509307861, "learning_rate": 0.0001405566243718551, "loss": 3.2083, "step": 41526 }, { "epoch": 2.04, "grad_norm": 0.6572078466415405, "learning_rate": 0.00014054358289776754, "loss": 2.8929, "step": 41527 }, { "epoch": 2.04, "grad_norm": 0.6767847537994385, "learning_rate": 0.0001405305418436576, "loss": 2.8771, "step": 41528 }, { "epoch": 2.04, "grad_norm": 0.6408040523529053, "learning_rate": 0.00014051750120955956, "loss": 2.841, "step": 41529 }, { "epoch": 2.04, "grad_norm": 0.6896417140960693, "learning_rate": 0.00014050446099550773, "loss": 2.8297, "step": 41530 }, { "epoch": 2.04, "grad_norm": 0.7329106330871582, "learning_rate": 0.00014049142120153632, "loss": 3.0008, "step": 41531 }, { "epoch": 2.04, "grad_norm": 0.6867200136184692, "learning_rate": 0.0001404783818276798, "loss": 2.6561, "step": 41532 }, { "epoch": 2.04, "grad_norm": 0.6792352199554443, "learning_rate": 0.00014046534287397264, "loss": 3.0706, "step": 41533 }, { "epoch": 2.04, "grad_norm": 0.7385411858558655, "learning_rate": 0.00014045230434044895, "loss": 2.5935, "step": 41534 }, { "epoch": 2.04, "grad_norm": 0.662626326084137, "learning_rate": 0.00014043926622714332, "loss": 2.9917, "step": 41535 }, { "epoch": 2.04, "grad_norm": 0.734079897403717, "learning_rate": 0.00014042622853408993, "loss": 3.1046, "step": 41536 }, { "epoch": 2.04, "grad_norm": 0.646392822265625, "learning_rate": 0.0001404131912613231, "loss": 2.8517, "step": 41537 }, { "epoch": 2.04, "grad_norm": 0.6550691723823547, "learning_rate": 0.00014040015440887723, "loss": 3.0403, "step": 41538 }, { "epoch": 2.04, "grad_norm": 0.7334123849868774, "learning_rate": 0.00014038711797678658, "loss": 2.8179, "step": 41539 }, { "epoch": 2.04, "grad_norm": 0.6451386213302612, "learning_rate": 0.00014037408196508562, "loss": 2.9276, "step": 41540 }, { "epoch": 2.04, "grad_norm": 0.6706496477127075, "learning_rate": 0.00014036104637380853, "loss": 2.8136, "step": 41541 }, { "epoch": 2.04, "grad_norm": 0.6933931112289429, "learning_rate": 0.00014034801120298967, "loss": 3.1359, "step": 41542 }, { "epoch": 2.04, "grad_norm": 0.6907314658164978, "learning_rate": 0.00014033497645266356, "loss": 2.8287, "step": 41543 }, { "epoch": 2.04, "grad_norm": 0.6742256879806519, "learning_rate": 0.00014032194212286442, "loss": 2.9517, "step": 41544 }, { "epoch": 2.04, "grad_norm": 0.6563650369644165, "learning_rate": 0.00014030890821362655, "loss": 2.9116, "step": 41545 }, { "epoch": 2.04, "grad_norm": 0.665621280670166, "learning_rate": 0.00014029587472498415, "loss": 2.9368, "step": 41546 }, { "epoch": 2.04, "grad_norm": 0.6880399584770203, "learning_rate": 0.00014028284165697167, "loss": 2.9152, "step": 41547 }, { "epoch": 2.04, "grad_norm": 0.7508476376533508, "learning_rate": 0.00014026980900962358, "loss": 3.043, "step": 41548 }, { "epoch": 2.04, "grad_norm": 0.7052724957466125, "learning_rate": 0.00014025677678297394, "loss": 3.0261, "step": 41549 }, { "epoch": 2.04, "grad_norm": 0.698532223701477, "learning_rate": 0.00014024374497705736, "loss": 3.0202, "step": 41550 }, { "epoch": 2.04, "grad_norm": 0.6969287395477295, "learning_rate": 0.00014023071359190795, "loss": 2.8709, "step": 41551 }, { "epoch": 2.04, "grad_norm": 0.7008760571479797, "learning_rate": 0.00014021768262756002, "loss": 2.9338, "step": 41552 }, { "epoch": 2.04, "grad_norm": 0.6544531583786011, "learning_rate": 0.00014020465208404808, "loss": 2.8513, "step": 41553 }, { "epoch": 2.04, "grad_norm": 0.659297525882721, "learning_rate": 0.00014019162196140622, "loss": 2.8528, "step": 41554 }, { "epoch": 2.04, "grad_norm": 0.6890259385108948, "learning_rate": 0.00014017859225966896, "loss": 2.9265, "step": 41555 }, { "epoch": 2.04, "grad_norm": 0.6693836450576782, "learning_rate": 0.00014016556297887043, "loss": 2.9833, "step": 41556 }, { "epoch": 2.04, "grad_norm": 0.7449944615364075, "learning_rate": 0.00014015253411904519, "loss": 2.8482, "step": 41557 }, { "epoch": 2.04, "grad_norm": 0.6576284766197205, "learning_rate": 0.00014013950568022735, "loss": 2.9817, "step": 41558 }, { "epoch": 2.04, "grad_norm": 0.6823240518569946, "learning_rate": 0.0001401264776624512, "loss": 2.7373, "step": 41559 }, { "epoch": 2.04, "grad_norm": 0.6630463600158691, "learning_rate": 0.00014011345006575127, "loss": 2.9244, "step": 41560 }, { "epoch": 2.04, "grad_norm": 0.6817547082901001, "learning_rate": 0.00014010042289016163, "loss": 3.0798, "step": 41561 }, { "epoch": 2.04, "grad_norm": 0.697632908821106, "learning_rate": 0.00014008739613571668, "loss": 2.9706, "step": 41562 }, { "epoch": 2.04, "grad_norm": 0.6645799279212952, "learning_rate": 0.00014007436980245088, "loss": 3.0473, "step": 41563 }, { "epoch": 2.04, "grad_norm": 0.7861841320991516, "learning_rate": 0.00014006134389039828, "loss": 2.9287, "step": 41564 }, { "epoch": 2.04, "grad_norm": 0.7191126942634583, "learning_rate": 0.00014004831839959346, "loss": 2.8517, "step": 41565 }, { "epoch": 2.04, "grad_norm": 0.6533079743385315, "learning_rate": 0.0001400352933300706, "loss": 2.7748, "step": 41566 }, { "epoch": 2.04, "grad_norm": 0.6802554130554199, "learning_rate": 0.00014002226868186382, "loss": 2.985, "step": 41567 }, { "epoch": 2.04, "grad_norm": 0.6917102932929993, "learning_rate": 0.00014000924445500774, "loss": 2.9208, "step": 41568 }, { "epoch": 2.04, "grad_norm": 0.6812888383865356, "learning_rate": 0.0001399962206495364, "loss": 3.2155, "step": 41569 }, { "epoch": 2.04, "grad_norm": 0.6770250797271729, "learning_rate": 0.00013998319726548435, "loss": 2.7714, "step": 41570 }, { "epoch": 2.04, "grad_norm": 0.6774451732635498, "learning_rate": 0.00013997017430288566, "loss": 2.9362, "step": 41571 }, { "epoch": 2.04, "grad_norm": 0.6566124558448792, "learning_rate": 0.00013995715176177482, "loss": 2.9055, "step": 41572 }, { "epoch": 2.04, "grad_norm": 0.6735152006149292, "learning_rate": 0.00013994412964218604, "loss": 2.8136, "step": 41573 }, { "epoch": 2.04, "grad_norm": 0.6460737586021423, "learning_rate": 0.00013993110794415352, "loss": 2.9369, "step": 41574 }, { "epoch": 2.04, "grad_norm": 0.6937167644500732, "learning_rate": 0.00013991808666771177, "loss": 2.94, "step": 41575 }, { "epoch": 2.04, "grad_norm": 0.6455183625221252, "learning_rate": 0.00013990506581289483, "loss": 2.7323, "step": 41576 }, { "epoch": 2.04, "grad_norm": 0.6627008318901062, "learning_rate": 0.00013989204537973715, "loss": 2.8377, "step": 41577 }, { "epoch": 2.04, "grad_norm": 0.6939973831176758, "learning_rate": 0.00013987902536827314, "loss": 2.9573, "step": 41578 }, { "epoch": 2.04, "grad_norm": 0.7010552883148193, "learning_rate": 0.00013986600577853696, "loss": 2.984, "step": 41579 }, { "epoch": 2.04, "grad_norm": 0.6695321798324585, "learning_rate": 0.0001398529866105629, "loss": 2.967, "step": 41580 }, { "epoch": 2.04, "grad_norm": 0.7119990587234497, "learning_rate": 0.0001398399678643851, "loss": 2.8192, "step": 41581 }, { "epoch": 2.04, "grad_norm": 0.6729029417037964, "learning_rate": 0.00013982694954003805, "loss": 2.8459, "step": 41582 }, { "epoch": 2.04, "grad_norm": 0.6772317290306091, "learning_rate": 0.0001398139316375561, "loss": 2.9779, "step": 41583 }, { "epoch": 2.04, "grad_norm": 0.6854050159454346, "learning_rate": 0.0001398009141569733, "loss": 2.9556, "step": 41584 }, { "epoch": 2.04, "grad_norm": 0.6610407829284668, "learning_rate": 0.00013978789709832414, "loss": 2.862, "step": 41585 }, { "epoch": 2.04, "grad_norm": 0.7166022658348083, "learning_rate": 0.00013977488046164276, "loss": 3.1247, "step": 41586 }, { "epoch": 2.04, "grad_norm": 0.673302948474884, "learning_rate": 0.00013976186424696362, "loss": 2.812, "step": 41587 }, { "epoch": 2.04, "grad_norm": 0.6916917562484741, "learning_rate": 0.00013974884845432088, "loss": 3.0233, "step": 41588 }, { "epoch": 2.04, "grad_norm": 0.6967342495918274, "learning_rate": 0.0001397358330837487, "loss": 2.9757, "step": 41589 }, { "epoch": 2.04, "grad_norm": 0.741462767124176, "learning_rate": 0.0001397228181352816, "loss": 2.9999, "step": 41590 }, { "epoch": 2.04, "grad_norm": 0.7056967616081238, "learning_rate": 0.0001397098036089537, "loss": 2.8734, "step": 41591 }, { "epoch": 2.04, "grad_norm": 0.6610565781593323, "learning_rate": 0.00013969678950479926, "loss": 2.9993, "step": 41592 }, { "epoch": 2.04, "grad_norm": 0.653553307056427, "learning_rate": 0.00013968377582285278, "loss": 2.873, "step": 41593 }, { "epoch": 2.04, "grad_norm": 0.724814772605896, "learning_rate": 0.0001396707625631484, "loss": 2.916, "step": 41594 }, { "epoch": 2.04, "grad_norm": 0.6634653210639954, "learning_rate": 0.00013965774972572032, "loss": 2.9582, "step": 41595 }, { "epoch": 2.04, "grad_norm": 0.6641644835472107, "learning_rate": 0.00013964473731060276, "loss": 3.0106, "step": 41596 }, { "epoch": 2.04, "grad_norm": 0.6954382061958313, "learning_rate": 0.00013963172531783013, "loss": 3.1128, "step": 41597 }, { "epoch": 2.04, "grad_norm": 0.6707383394241333, "learning_rate": 0.00013961871374743677, "loss": 2.7836, "step": 41598 }, { "epoch": 2.04, "grad_norm": 0.6706055402755737, "learning_rate": 0.00013960570259945675, "loss": 2.9707, "step": 41599 }, { "epoch": 2.04, "grad_norm": 0.6660712361335754, "learning_rate": 0.00013959269187392457, "loss": 2.9676, "step": 41600 }, { "epoch": 2.04, "grad_norm": 0.7226710319519043, "learning_rate": 0.00013957968157087434, "loss": 3.0102, "step": 41601 }, { "epoch": 2.04, "grad_norm": 0.70147705078125, "learning_rate": 0.00013956667169034021, "loss": 3.0778, "step": 41602 }, { "epoch": 2.04, "grad_norm": 0.7111939787864685, "learning_rate": 0.00013955366223235679, "loss": 2.958, "step": 41603 }, { "epoch": 2.04, "grad_norm": 0.6577525734901428, "learning_rate": 0.00013954065319695796, "loss": 3.0126, "step": 41604 }, { "epoch": 2.04, "grad_norm": 0.6963541507720947, "learning_rate": 0.00013952764458417832, "loss": 2.9144, "step": 41605 }, { "epoch": 2.04, "grad_norm": 0.6417427659034729, "learning_rate": 0.00013951463639405186, "loss": 2.8289, "step": 41606 }, { "epoch": 2.04, "grad_norm": 0.6816616654396057, "learning_rate": 0.00013950162862661305, "loss": 2.8721, "step": 41607 }, { "epoch": 2.04, "grad_norm": 0.7183780670166016, "learning_rate": 0.00013948862128189597, "loss": 2.8842, "step": 41608 }, { "epoch": 2.04, "grad_norm": 0.6773726940155029, "learning_rate": 0.00013947561435993507, "loss": 2.8267, "step": 41609 }, { "epoch": 2.04, "grad_norm": 0.715185821056366, "learning_rate": 0.00013946260786076452, "loss": 2.8606, "step": 41610 }, { "epoch": 2.04, "grad_norm": 0.6540136337280273, "learning_rate": 0.00013944960178441848, "loss": 3.098, "step": 41611 }, { "epoch": 2.04, "grad_norm": 0.6640399098396301, "learning_rate": 0.00013943659613093137, "loss": 2.9417, "step": 41612 }, { "epoch": 2.04, "grad_norm": 0.6632117629051208, "learning_rate": 0.00013942359090033724, "loss": 2.8215, "step": 41613 }, { "epoch": 2.04, "grad_norm": 0.7612461447715759, "learning_rate": 0.00013941058609267048, "loss": 2.7336, "step": 41614 }, { "epoch": 2.04, "grad_norm": 0.6554035544395447, "learning_rate": 0.00013939758170796547, "loss": 2.7637, "step": 41615 }, { "epoch": 2.04, "grad_norm": 0.6623538732528687, "learning_rate": 0.0001393845777462563, "loss": 3.0458, "step": 41616 }, { "epoch": 2.04, "grad_norm": 0.6795579195022583, "learning_rate": 0.00013937157420757722, "loss": 2.8377, "step": 41617 }, { "epoch": 2.04, "grad_norm": 0.6572181582450867, "learning_rate": 0.0001393585710919624, "loss": 2.7483, "step": 41618 }, { "epoch": 2.04, "grad_norm": 0.6754372715950012, "learning_rate": 0.00013934556839944622, "loss": 2.8756, "step": 41619 }, { "epoch": 2.04, "grad_norm": 0.6733306646347046, "learning_rate": 0.00013933256613006298, "loss": 2.8464, "step": 41620 }, { "epoch": 2.04, "grad_norm": 0.6594640016555786, "learning_rate": 0.0001393195642838467, "loss": 2.9303, "step": 41621 }, { "epoch": 2.04, "grad_norm": 0.6582934260368347, "learning_rate": 0.00013930656286083193, "loss": 2.913, "step": 41622 }, { "epoch": 2.04, "grad_norm": 0.6640312671661377, "learning_rate": 0.00013929356186105272, "loss": 2.8359, "step": 41623 }, { "epoch": 2.04, "grad_norm": 0.643187403678894, "learning_rate": 0.0001392805612845432, "loss": 3.2914, "step": 41624 }, { "epoch": 2.04, "grad_norm": 0.7015854120254517, "learning_rate": 0.00013926756113133792, "loss": 3.1108, "step": 41625 }, { "epoch": 2.04, "grad_norm": 0.6656563878059387, "learning_rate": 0.0001392545614014708, "loss": 2.9735, "step": 41626 }, { "epoch": 2.04, "grad_norm": 0.6701093316078186, "learning_rate": 0.00013924156209497638, "loss": 2.7599, "step": 41627 }, { "epoch": 2.04, "grad_norm": 0.6650725603103638, "learning_rate": 0.0001392285632118886, "loss": 2.8968, "step": 41628 }, { "epoch": 2.04, "grad_norm": 0.6751466989517212, "learning_rate": 0.00013921556475224184, "loss": 2.9247, "step": 41629 }, { "epoch": 2.04, "grad_norm": 0.6479591727256775, "learning_rate": 0.00013920256671607047, "loss": 3.0724, "step": 41630 }, { "epoch": 2.04, "grad_norm": 0.7332056164741516, "learning_rate": 0.00013918956910340856, "loss": 2.8461, "step": 41631 }, { "epoch": 2.04, "grad_norm": 0.690640926361084, "learning_rate": 0.00013917657191429042, "loss": 2.9417, "step": 41632 }, { "epoch": 2.04, "grad_norm": 0.6897100806236267, "learning_rate": 0.0001391635751487501, "loss": 2.9565, "step": 41633 }, { "epoch": 2.04, "grad_norm": 0.6536319851875305, "learning_rate": 0.000139150578806822, "loss": 2.6714, "step": 41634 }, { "epoch": 2.04, "grad_norm": 0.8222829699516296, "learning_rate": 0.0001391375828885404, "loss": 2.9552, "step": 41635 }, { "epoch": 2.04, "grad_norm": 0.7058513164520264, "learning_rate": 0.00013912458739393933, "loss": 2.6434, "step": 41636 }, { "epoch": 2.04, "grad_norm": 0.6645085215568542, "learning_rate": 0.00013911159232305327, "loss": 3.0102, "step": 41637 }, { "epoch": 2.04, "grad_norm": 0.6806157231330872, "learning_rate": 0.0001390985976759163, "loss": 2.9867, "step": 41638 }, { "epoch": 2.04, "grad_norm": 0.7031665444374084, "learning_rate": 0.00013908560345256255, "loss": 3.0195, "step": 41639 }, { "epoch": 2.04, "grad_norm": 0.6819475889205933, "learning_rate": 0.00013907260965302648, "loss": 3.0876, "step": 41640 }, { "epoch": 2.04, "grad_norm": 0.6787337064743042, "learning_rate": 0.00013905961627734204, "loss": 2.7164, "step": 41641 }, { "epoch": 2.04, "grad_norm": 0.7834779024124146, "learning_rate": 0.00013904662332554372, "loss": 2.7447, "step": 41642 }, { "epoch": 2.04, "grad_norm": 0.6589922904968262, "learning_rate": 0.0001390336307976655, "loss": 2.8369, "step": 41643 }, { "epoch": 2.04, "grad_norm": 0.6925103664398193, "learning_rate": 0.00013902063869374175, "loss": 2.7699, "step": 41644 }, { "epoch": 2.04, "grad_norm": 0.6515282392501831, "learning_rate": 0.00013900764701380675, "loss": 2.9382, "step": 41645 }, { "epoch": 2.04, "grad_norm": 0.7050473690032959, "learning_rate": 0.0001389946557578946, "loss": 2.8169, "step": 41646 }, { "epoch": 2.04, "grad_norm": 0.7375444769859314, "learning_rate": 0.00013898166492603957, "loss": 2.8523, "step": 41647 }, { "epoch": 2.04, "grad_norm": 0.8539415597915649, "learning_rate": 0.00013896867451827576, "loss": 2.9165, "step": 41648 }, { "epoch": 2.04, "grad_norm": 0.7187871336936951, "learning_rate": 0.0001389556845346374, "loss": 2.6562, "step": 41649 }, { "epoch": 2.04, "grad_norm": 0.6538518071174622, "learning_rate": 0.00013894269497515894, "loss": 2.9595, "step": 41650 }, { "epoch": 2.04, "grad_norm": 0.6808871030807495, "learning_rate": 0.00013892970583987427, "loss": 2.7846, "step": 41651 }, { "epoch": 2.04, "grad_norm": 0.6848717927932739, "learning_rate": 0.0001389167171288179, "loss": 2.8772, "step": 41652 }, { "epoch": 2.04, "grad_norm": 0.6991068720817566, "learning_rate": 0.0001389037288420239, "loss": 3.1921, "step": 41653 }, { "epoch": 2.04, "grad_norm": 0.701252818107605, "learning_rate": 0.00013889074097952637, "loss": 2.9685, "step": 41654 }, { "epoch": 2.04, "grad_norm": 0.6782150864601135, "learning_rate": 0.00013887775354135971, "loss": 2.807, "step": 41655 }, { "epoch": 2.04, "grad_norm": 0.6534834504127502, "learning_rate": 0.00013886476652755794, "loss": 2.9893, "step": 41656 }, { "epoch": 2.04, "grad_norm": 0.6911651492118835, "learning_rate": 0.00013885177993815548, "loss": 2.8997, "step": 41657 }, { "epoch": 2.04, "grad_norm": 0.6815614700317383, "learning_rate": 0.00013883879377318627, "loss": 2.8842, "step": 41658 }, { "epoch": 2.04, "grad_norm": 0.681822657585144, "learning_rate": 0.00013882580803268482, "loss": 2.9365, "step": 41659 }, { "epoch": 2.04, "grad_norm": 0.6272294521331787, "learning_rate": 0.00013881282271668515, "loss": 2.9309, "step": 41660 }, { "epoch": 2.04, "grad_norm": 0.7433640360832214, "learning_rate": 0.0001387998378252214, "loss": 2.9243, "step": 41661 }, { "epoch": 2.04, "grad_norm": 0.6774015426635742, "learning_rate": 0.00013878685335832789, "loss": 3.0579, "step": 41662 }, { "epoch": 2.04, "grad_norm": 0.6565629839897156, "learning_rate": 0.00013877386931603874, "loss": 3.0814, "step": 41663 }, { "epoch": 2.04, "grad_norm": 0.6806601285934448, "learning_rate": 0.00013876088569838815, "loss": 2.9186, "step": 41664 }, { "epoch": 2.04, "grad_norm": 0.6668409109115601, "learning_rate": 0.0001387479025054105, "loss": 2.8923, "step": 41665 }, { "epoch": 2.04, "grad_norm": 0.6895928978919983, "learning_rate": 0.0001387349197371397, "loss": 3.0788, "step": 41666 }, { "epoch": 2.04, "grad_norm": 0.634085476398468, "learning_rate": 0.00013872193739361023, "loss": 2.8866, "step": 41667 }, { "epoch": 2.04, "grad_norm": 0.704136073589325, "learning_rate": 0.0001387089554748561, "loss": 3.0615, "step": 41668 }, { "epoch": 2.04, "grad_norm": 0.6691809296607971, "learning_rate": 0.00013869597398091148, "loss": 3.0299, "step": 41669 }, { "epoch": 2.04, "grad_norm": 0.6336991190910339, "learning_rate": 0.00013868299291181068, "loss": 2.7573, "step": 41670 }, { "epoch": 2.04, "grad_norm": 0.698936939239502, "learning_rate": 0.00013867001226758774, "loss": 2.8835, "step": 41671 }, { "epoch": 2.04, "grad_norm": 0.7142788767814636, "learning_rate": 0.00013865703204827707, "loss": 3.3196, "step": 41672 }, { "epoch": 2.04, "grad_norm": 0.694664716720581, "learning_rate": 0.0001386440522539126, "loss": 2.9352, "step": 41673 }, { "epoch": 2.04, "grad_norm": 0.666140079498291, "learning_rate": 0.00013863107288452877, "loss": 2.9518, "step": 41674 }, { "epoch": 2.04, "grad_norm": 0.722091555595398, "learning_rate": 0.00013861809394015963, "loss": 2.8776, "step": 41675 }, { "epoch": 2.04, "grad_norm": 0.6839096546173096, "learning_rate": 0.00013860511542083923, "loss": 3.0636, "step": 41676 }, { "epoch": 2.04, "grad_norm": 0.666094183921814, "learning_rate": 0.000138592137326602, "loss": 2.9686, "step": 41677 }, { "epoch": 2.04, "grad_norm": 0.696448802947998, "learning_rate": 0.00013857915965748195, "loss": 3.0072, "step": 41678 }, { "epoch": 2.04, "grad_norm": 0.7537996172904968, "learning_rate": 0.00013856618241351332, "loss": 2.8667, "step": 41679 }, { "epoch": 2.04, "grad_norm": 0.6860277652740479, "learning_rate": 0.00013855320559473037, "loss": 2.8746, "step": 41680 }, { "epoch": 2.04, "grad_norm": 0.6779218316078186, "learning_rate": 0.00013854022920116725, "loss": 2.9084, "step": 41681 }, { "epoch": 2.04, "grad_norm": 0.680435836315155, "learning_rate": 0.00013852725323285807, "loss": 2.9952, "step": 41682 }, { "epoch": 2.04, "grad_norm": 0.6816911697387695, "learning_rate": 0.00013851427768983693, "loss": 2.7457, "step": 41683 }, { "epoch": 2.04, "grad_norm": 0.7383108735084534, "learning_rate": 0.0001385013025721382, "loss": 3.1948, "step": 41684 }, { "epoch": 2.04, "grad_norm": 0.6533733606338501, "learning_rate": 0.00013848832787979586, "loss": 2.7713, "step": 41685 }, { "epoch": 2.04, "grad_norm": 0.6958843469619751, "learning_rate": 0.00013847535361284418, "loss": 3.1565, "step": 41686 }, { "epoch": 2.04, "grad_norm": 0.6978946924209595, "learning_rate": 0.00013846237977131742, "loss": 2.8521, "step": 41687 }, { "epoch": 2.04, "grad_norm": 0.6730976104736328, "learning_rate": 0.0001384494063552496, "loss": 3.025, "step": 41688 }, { "epoch": 2.04, "grad_norm": 0.6826785802841187, "learning_rate": 0.000138436433364675, "loss": 2.9969, "step": 41689 }, { "epoch": 2.04, "grad_norm": 0.7159188389778137, "learning_rate": 0.0001384234607996278, "loss": 2.9785, "step": 41690 }, { "epoch": 2.04, "grad_norm": 0.6930112838745117, "learning_rate": 0.00013841048866014197, "loss": 2.9677, "step": 41691 }, { "epoch": 2.04, "grad_norm": 0.6859667897224426, "learning_rate": 0.00013839751694625196, "loss": 2.9933, "step": 41692 }, { "epoch": 2.04, "grad_norm": 0.7143214344978333, "learning_rate": 0.00013838454565799163, "loss": 2.9476, "step": 41693 }, { "epoch": 2.04, "grad_norm": 0.7018111944198608, "learning_rate": 0.00013837157479539542, "loss": 2.7747, "step": 41694 }, { "epoch": 2.04, "grad_norm": 0.6992251873016357, "learning_rate": 0.00013835860435849727, "loss": 3.0552, "step": 41695 }, { "epoch": 2.04, "grad_norm": 0.6739795207977295, "learning_rate": 0.00013834563434733156, "loss": 2.8918, "step": 41696 }, { "epoch": 2.04, "grad_norm": 0.7193833589553833, "learning_rate": 0.00013833266476193237, "loss": 3.0882, "step": 41697 }, { "epoch": 2.04, "grad_norm": 0.6857161521911621, "learning_rate": 0.0001383196956023337, "loss": 3.0136, "step": 41698 }, { "epoch": 2.04, "grad_norm": 0.73293137550354, "learning_rate": 0.0001383067268685699, "loss": 2.7954, "step": 41699 }, { "epoch": 2.04, "grad_norm": 0.6800030469894409, "learning_rate": 0.000138293758560675, "loss": 3.0618, "step": 41700 }, { "epoch": 2.04, "grad_norm": 0.6845666766166687, "learning_rate": 0.0001382807906786832, "loss": 3.0205, "step": 41701 }, { "epoch": 2.04, "grad_norm": 0.6896492838859558, "learning_rate": 0.00013826782322262881, "loss": 2.8907, "step": 41702 }, { "epoch": 2.04, "grad_norm": 0.6447883248329163, "learning_rate": 0.00013825485619254582, "loss": 2.8803, "step": 41703 }, { "epoch": 2.04, "grad_norm": 0.6869139671325684, "learning_rate": 0.00013824188958846845, "loss": 3.2243, "step": 41704 }, { "epoch": 2.04, "grad_norm": 0.6388272047042847, "learning_rate": 0.00013822892341043067, "loss": 2.7329, "step": 41705 }, { "epoch": 2.04, "grad_norm": 0.6573567986488342, "learning_rate": 0.00013821595765846676, "loss": 2.9315, "step": 41706 }, { "epoch": 2.04, "grad_norm": 0.6946920156478882, "learning_rate": 0.00013820299233261102, "loss": 2.8361, "step": 41707 }, { "epoch": 2.04, "grad_norm": 0.7050734758377075, "learning_rate": 0.00013819002743289732, "loss": 2.8692, "step": 41708 }, { "epoch": 2.04, "grad_norm": 0.7093161940574646, "learning_rate": 0.00013817706295936007, "loss": 2.8641, "step": 41709 }, { "epoch": 2.04, "grad_norm": 0.6960554122924805, "learning_rate": 0.0001381640989120332, "loss": 2.9397, "step": 41710 }, { "epoch": 2.04, "grad_norm": 0.6445857882499695, "learning_rate": 0.00013815113529095104, "loss": 2.9978, "step": 41711 }, { "epoch": 2.04, "grad_norm": 0.7119023203849792, "learning_rate": 0.00013813817209614766, "loss": 2.9848, "step": 41712 }, { "epoch": 2.04, "grad_norm": 0.6626527309417725, "learning_rate": 0.00013812520932765702, "loss": 2.8887, "step": 41713 }, { "epoch": 2.04, "grad_norm": 0.6804422736167908, "learning_rate": 0.00013811224698551358, "loss": 3.0319, "step": 41714 }, { "epoch": 2.04, "grad_norm": 0.7170805335044861, "learning_rate": 0.0001380992850697512, "loss": 2.9286, "step": 41715 }, { "epoch": 2.04, "grad_norm": 0.6595010161399841, "learning_rate": 0.0001380863235804042, "loss": 2.9433, "step": 41716 }, { "epoch": 2.04, "grad_norm": 0.6733608245849609, "learning_rate": 0.0001380733625175067, "loss": 2.8411, "step": 41717 }, { "epoch": 2.04, "grad_norm": 0.8234678506851196, "learning_rate": 0.00013806040188109282, "loss": 2.8652, "step": 41718 }, { "epoch": 2.04, "grad_norm": 0.7054716348648071, "learning_rate": 0.00013804744167119666, "loss": 2.7808, "step": 41719 }, { "epoch": 2.04, "grad_norm": 0.7219303250312805, "learning_rate": 0.00013803448188785228, "loss": 2.8326, "step": 41720 }, { "epoch": 2.04, "grad_norm": 0.6587951183319092, "learning_rate": 0.0001380215225310939, "loss": 2.8626, "step": 41721 }, { "epoch": 2.04, "grad_norm": 0.6685791611671448, "learning_rate": 0.0001380085636009558, "loss": 2.9384, "step": 41722 }, { "epoch": 2.04, "grad_norm": 0.7046728134155273, "learning_rate": 0.00013799560509747185, "loss": 2.9112, "step": 41723 }, { "epoch": 2.04, "grad_norm": 1.1903138160705566, "learning_rate": 0.00013798264702067636, "loss": 2.7223, "step": 41724 }, { "epoch": 2.04, "grad_norm": 0.6698146462440491, "learning_rate": 0.00013796968937060345, "loss": 3.0392, "step": 41725 }, { "epoch": 2.04, "grad_norm": 0.6656641960144043, "learning_rate": 0.00013795673214728703, "loss": 3.1732, "step": 41726 }, { "epoch": 2.04, "grad_norm": 0.6809387803077698, "learning_rate": 0.00013794377535076153, "loss": 3.057, "step": 41727 }, { "epoch": 2.04, "grad_norm": 0.7437176704406738, "learning_rate": 0.00013793081898106081, "loss": 3.0012, "step": 41728 }, { "epoch": 2.05, "grad_norm": 0.6619541645050049, "learning_rate": 0.0001379178630382193, "loss": 3.0339, "step": 41729 }, { "epoch": 2.05, "grad_norm": 0.6481929421424866, "learning_rate": 0.00013790490752227074, "loss": 2.806, "step": 41730 }, { "epoch": 2.05, "grad_norm": 0.6587005853652954, "learning_rate": 0.00013789195243324952, "loss": 3.1365, "step": 41731 }, { "epoch": 2.05, "grad_norm": 0.6814006567001343, "learning_rate": 0.00013787899777118978, "loss": 2.8502, "step": 41732 }, { "epoch": 2.05, "grad_norm": 0.6785389184951782, "learning_rate": 0.00013786604353612558, "loss": 2.901, "step": 41733 }, { "epoch": 2.05, "grad_norm": 0.6514621376991272, "learning_rate": 0.00013785308972809102, "loss": 2.7796, "step": 41734 }, { "epoch": 2.05, "grad_norm": 0.7052819728851318, "learning_rate": 0.00013784013634712009, "loss": 2.825, "step": 41735 }, { "epoch": 2.05, "grad_norm": 0.6751008629798889, "learning_rate": 0.00013782718339324702, "loss": 2.9968, "step": 41736 }, { "epoch": 2.05, "grad_norm": 0.6512966752052307, "learning_rate": 0.00013781423086650608, "loss": 2.9316, "step": 41737 }, { "epoch": 2.05, "grad_norm": 0.6523333787918091, "learning_rate": 0.00013780127876693112, "loss": 3.0541, "step": 41738 }, { "epoch": 2.05, "grad_norm": 0.7050060033798218, "learning_rate": 0.0001377883270945565, "loss": 3.0663, "step": 41739 }, { "epoch": 2.05, "grad_norm": 0.7077402472496033, "learning_rate": 0.00013777537584941618, "loss": 3.0436, "step": 41740 }, { "epoch": 2.05, "grad_norm": 0.6588230133056641, "learning_rate": 0.0001377624250315442, "loss": 2.858, "step": 41741 }, { "epoch": 2.05, "grad_norm": 0.708586573600769, "learning_rate": 0.00013774947464097488, "loss": 2.8115, "step": 41742 }, { "epoch": 2.05, "grad_norm": 0.6874078512191772, "learning_rate": 0.0001377365246777421, "loss": 2.7809, "step": 41743 }, { "epoch": 2.05, "grad_norm": 0.6789014935493469, "learning_rate": 0.00013772357514188022, "loss": 3.0268, "step": 41744 }, { "epoch": 2.05, "grad_norm": 0.7179741263389587, "learning_rate": 0.00013771062603342308, "loss": 3.13, "step": 41745 }, { "epoch": 2.05, "grad_norm": 0.6951627731323242, "learning_rate": 0.00013769767735240493, "loss": 3.0357, "step": 41746 }, { "epoch": 2.05, "grad_norm": 0.6996464133262634, "learning_rate": 0.00013768472909886007, "loss": 2.9498, "step": 41747 }, { "epoch": 2.05, "grad_norm": 0.6561077237129211, "learning_rate": 0.0001376717812728222, "loss": 2.8745, "step": 41748 }, { "epoch": 2.05, "grad_norm": 0.736611008644104, "learning_rate": 0.00013765883387432568, "loss": 2.8959, "step": 41749 }, { "epoch": 2.05, "grad_norm": 0.6801815629005432, "learning_rate": 0.00013764588690340447, "loss": 3.0878, "step": 41750 }, { "epoch": 2.05, "grad_norm": 0.6809715032577515, "learning_rate": 0.00013763294036009274, "loss": 2.9666, "step": 41751 }, { "epoch": 2.05, "grad_norm": 0.7071965336799622, "learning_rate": 0.00013761999424442473, "loss": 2.9907, "step": 41752 }, { "epoch": 2.05, "grad_norm": 0.6758064031600952, "learning_rate": 0.00013760704855643428, "loss": 2.7936, "step": 41753 }, { "epoch": 2.05, "grad_norm": 0.6896607875823975, "learning_rate": 0.0001375941032961557, "loss": 2.8758, "step": 41754 }, { "epoch": 2.05, "grad_norm": 0.6973819136619568, "learning_rate": 0.000137581158463623, "loss": 2.994, "step": 41755 }, { "epoch": 2.05, "grad_norm": 0.6735284924507141, "learning_rate": 0.0001375682140588702, "loss": 2.8614, "step": 41756 }, { "epoch": 2.05, "grad_norm": 0.680656373500824, "learning_rate": 0.00013755527008193154, "loss": 2.9821, "step": 41757 }, { "epoch": 2.05, "grad_norm": 0.6982008814811707, "learning_rate": 0.00013754232653284093, "loss": 2.8955, "step": 41758 }, { "epoch": 2.05, "grad_norm": 0.683266282081604, "learning_rate": 0.0001375293834116327, "loss": 3.0358, "step": 41759 }, { "epoch": 2.05, "grad_norm": 0.6826072335243225, "learning_rate": 0.00013751644071834066, "loss": 2.7453, "step": 41760 }, { "epoch": 2.05, "grad_norm": 0.7050215005874634, "learning_rate": 0.0001375034984529992, "loss": 2.7758, "step": 41761 }, { "epoch": 2.05, "grad_norm": 0.7060586214065552, "learning_rate": 0.0001374905566156422, "loss": 2.8637, "step": 41762 }, { "epoch": 2.05, "grad_norm": 0.7489438056945801, "learning_rate": 0.0001374776152063037, "loss": 2.8327, "step": 41763 }, { "epoch": 2.05, "grad_norm": 0.7340999841690063, "learning_rate": 0.000137464674225018, "loss": 2.963, "step": 41764 }, { "epoch": 2.05, "grad_norm": 0.6897082328796387, "learning_rate": 0.00013745173367181899, "loss": 2.8568, "step": 41765 }, { "epoch": 2.05, "grad_norm": 0.7091054320335388, "learning_rate": 0.0001374387935467409, "loss": 2.9136, "step": 41766 }, { "epoch": 2.05, "grad_norm": 0.7176274657249451, "learning_rate": 0.00013742585384981767, "loss": 2.8918, "step": 41767 }, { "epoch": 2.05, "grad_norm": 0.675040602684021, "learning_rate": 0.00013741291458108342, "loss": 2.8829, "step": 41768 }, { "epoch": 2.05, "grad_norm": 0.6755308508872986, "learning_rate": 0.00013739997574057237, "loss": 3.0007, "step": 41769 }, { "epoch": 2.05, "grad_norm": 0.698590874671936, "learning_rate": 0.0001373870373283185, "loss": 3.1754, "step": 41770 }, { "epoch": 2.05, "grad_norm": 0.6927210688591003, "learning_rate": 0.00013737409934435588, "loss": 2.8816, "step": 41771 }, { "epoch": 2.05, "grad_norm": 0.7321736812591553, "learning_rate": 0.00013736116178871848, "loss": 2.995, "step": 41772 }, { "epoch": 2.05, "grad_norm": 0.7068862318992615, "learning_rate": 0.0001373482246614405, "loss": 2.895, "step": 41773 }, { "epoch": 2.05, "grad_norm": 0.6535940170288086, "learning_rate": 0.00013733528796255605, "loss": 2.9816, "step": 41774 }, { "epoch": 2.05, "grad_norm": 0.6542276740074158, "learning_rate": 0.00013732235169209908, "loss": 2.8163, "step": 41775 }, { "epoch": 2.05, "grad_norm": 0.7153056859970093, "learning_rate": 0.0001373094158501038, "loss": 2.7931, "step": 41776 }, { "epoch": 2.05, "grad_norm": 0.7129161357879639, "learning_rate": 0.00013729648043660423, "loss": 2.7878, "step": 41777 }, { "epoch": 2.05, "grad_norm": 0.656928300857544, "learning_rate": 0.0001372835454516343, "loss": 2.8457, "step": 41778 }, { "epoch": 2.05, "grad_norm": 0.733238935470581, "learning_rate": 0.00013727061089522833, "loss": 2.9476, "step": 41779 }, { "epoch": 2.05, "grad_norm": 0.6839013695716858, "learning_rate": 0.0001372576767674201, "loss": 2.9249, "step": 41780 }, { "epoch": 2.05, "grad_norm": 0.7424287796020508, "learning_rate": 0.000137244743068244, "loss": 2.9104, "step": 41781 }, { "epoch": 2.05, "grad_norm": 0.7072665691375732, "learning_rate": 0.00013723180979773378, "loss": 2.9528, "step": 41782 }, { "epoch": 2.05, "grad_norm": 0.7041286826133728, "learning_rate": 0.00013721887695592372, "loss": 2.7528, "step": 41783 }, { "epoch": 2.05, "grad_norm": 0.7332327961921692, "learning_rate": 0.00013720594454284788, "loss": 2.7556, "step": 41784 }, { "epoch": 2.05, "grad_norm": 0.6847696900367737, "learning_rate": 0.0001371930125585401, "loss": 2.7119, "step": 41785 }, { "epoch": 2.05, "grad_norm": 0.6434175968170166, "learning_rate": 0.00013718008100303472, "loss": 2.8625, "step": 41786 }, { "epoch": 2.05, "grad_norm": 0.6727170944213867, "learning_rate": 0.00013716714987636555, "loss": 2.9554, "step": 41787 }, { "epoch": 2.05, "grad_norm": 0.6850985884666443, "learning_rate": 0.00013715421917856677, "loss": 3.069, "step": 41788 }, { "epoch": 2.05, "grad_norm": 0.6658537983894348, "learning_rate": 0.00013714128890967252, "loss": 2.8858, "step": 41789 }, { "epoch": 2.05, "grad_norm": 0.6863417625427246, "learning_rate": 0.00013712835906971667, "loss": 2.8266, "step": 41790 }, { "epoch": 2.05, "grad_norm": 0.7340531349182129, "learning_rate": 0.00013711542965873352, "loss": 2.7212, "step": 41791 }, { "epoch": 2.05, "grad_norm": 0.7077760696411133, "learning_rate": 0.00013710250067675697, "loss": 2.9836, "step": 41792 }, { "epoch": 2.05, "grad_norm": 0.7007636427879333, "learning_rate": 0.000137089572123821, "loss": 2.7947, "step": 41793 }, { "epoch": 2.05, "grad_norm": 0.6507523655891418, "learning_rate": 0.0001370766439999598, "loss": 2.7803, "step": 41794 }, { "epoch": 2.05, "grad_norm": 0.6822836995124817, "learning_rate": 0.00013706371630520727, "loss": 2.7836, "step": 41795 }, { "epoch": 2.05, "grad_norm": 1.2069634199142456, "learning_rate": 0.00013705078903959768, "loss": 2.9114, "step": 41796 }, { "epoch": 2.05, "grad_norm": 0.6869301795959473, "learning_rate": 0.00013703786220316483, "loss": 2.7778, "step": 41797 }, { "epoch": 2.05, "grad_norm": 0.6838812828063965, "learning_rate": 0.000137024935795943, "loss": 2.9701, "step": 41798 }, { "epoch": 2.05, "grad_norm": 0.6679784655570984, "learning_rate": 0.00013701200981796615, "loss": 2.873, "step": 41799 }, { "epoch": 2.05, "grad_norm": 0.6981244683265686, "learning_rate": 0.00013699908426926814, "loss": 3.1318, "step": 41800 }, { "epoch": 2.05, "grad_norm": 0.6437106728553772, "learning_rate": 0.00013698615914988334, "loss": 2.8379, "step": 41801 }, { "epoch": 2.05, "grad_norm": 0.6813018321990967, "learning_rate": 0.00013697323445984545, "loss": 2.8552, "step": 41802 }, { "epoch": 2.05, "grad_norm": 0.7143990993499756, "learning_rate": 0.00013696031019918873, "loss": 2.9512, "step": 41803 }, { "epoch": 2.05, "grad_norm": 0.7127072215080261, "learning_rate": 0.00013694738636794727, "loss": 2.966, "step": 41804 }, { "epoch": 2.05, "grad_norm": 0.7043535709381104, "learning_rate": 0.000136934462966155, "loss": 2.801, "step": 41805 }, { "epoch": 2.05, "grad_norm": 0.6857683062553406, "learning_rate": 0.000136921539993846, "loss": 2.8319, "step": 41806 }, { "epoch": 2.05, "grad_norm": 0.6381077766418457, "learning_rate": 0.00013690861745105414, "loss": 3.0228, "step": 41807 }, { "epoch": 2.05, "grad_norm": 0.6954429745674133, "learning_rate": 0.00013689569533781364, "loss": 2.9477, "step": 41808 }, { "epoch": 2.05, "grad_norm": 0.6456500291824341, "learning_rate": 0.00013688277365415854, "loss": 3.0681, "step": 41809 }, { "epoch": 2.05, "grad_norm": 0.6630203127861023, "learning_rate": 0.00013686985240012276, "loss": 2.8573, "step": 41810 }, { "epoch": 2.05, "grad_norm": 0.6759008169174194, "learning_rate": 0.00013685693157574046, "loss": 2.7842, "step": 41811 }, { "epoch": 2.05, "grad_norm": 0.6748966574668884, "learning_rate": 0.00013684401118104552, "loss": 2.9473, "step": 41812 }, { "epoch": 2.05, "grad_norm": 0.7730910778045654, "learning_rate": 0.00013683109121607218, "loss": 2.636, "step": 41813 }, { "epoch": 2.05, "grad_norm": 0.6848281025886536, "learning_rate": 0.00013681817168085432, "loss": 2.8989, "step": 41814 }, { "epoch": 2.05, "grad_norm": 0.6651986241340637, "learning_rate": 0.0001368052525754259, "loss": 2.7469, "step": 41815 }, { "epoch": 2.05, "grad_norm": 0.6602210998535156, "learning_rate": 0.00013679233389982112, "loss": 3.0205, "step": 41816 }, { "epoch": 2.05, "grad_norm": 0.7078651189804077, "learning_rate": 0.00013677941565407384, "loss": 2.8248, "step": 41817 }, { "epoch": 2.05, "grad_norm": 0.6716862320899963, "learning_rate": 0.00013676649783821813, "loss": 2.8242, "step": 41818 }, { "epoch": 2.05, "grad_norm": 0.6786720156669617, "learning_rate": 0.00013675358045228821, "loss": 3.0426, "step": 41819 }, { "epoch": 2.05, "grad_norm": 0.6606864929199219, "learning_rate": 0.00013674066349631791, "loss": 3.0192, "step": 41820 }, { "epoch": 2.05, "grad_norm": 0.6600707769393921, "learning_rate": 0.00013672774697034127, "loss": 3.1479, "step": 41821 }, { "epoch": 2.05, "grad_norm": 0.7069227695465088, "learning_rate": 0.00013671483087439223, "loss": 2.8687, "step": 41822 }, { "epoch": 2.05, "grad_norm": 0.6884543895721436, "learning_rate": 0.0001367019152085049, "loss": 2.9547, "step": 41823 }, { "epoch": 2.05, "grad_norm": 0.6656718850135803, "learning_rate": 0.0001366889999727134, "loss": 2.9701, "step": 41824 }, { "epoch": 2.05, "grad_norm": 0.6968078017234802, "learning_rate": 0.00013667608516705154, "loss": 3.0355, "step": 41825 }, { "epoch": 2.05, "grad_norm": 0.7189076542854309, "learning_rate": 0.0001366631707915536, "loss": 2.9283, "step": 41826 }, { "epoch": 2.05, "grad_norm": 0.6748219728469849, "learning_rate": 0.00013665025684625335, "loss": 3.0839, "step": 41827 }, { "epoch": 2.05, "grad_norm": 0.6655088067054749, "learning_rate": 0.00013663734333118483, "loss": 2.8599, "step": 41828 }, { "epoch": 2.05, "grad_norm": 0.6688360571861267, "learning_rate": 0.00013662443024638218, "loss": 3.2349, "step": 41829 }, { "epoch": 2.05, "grad_norm": 0.7066716551780701, "learning_rate": 0.00013661151759187927, "loss": 2.9871, "step": 41830 }, { "epoch": 2.05, "grad_norm": 0.6709800362586975, "learning_rate": 0.00013659860536771023, "loss": 2.815, "step": 41831 }, { "epoch": 2.05, "grad_norm": 0.7102221250534058, "learning_rate": 0.00013658569357390897, "loss": 2.8565, "step": 41832 }, { "epoch": 2.05, "grad_norm": 0.6634939312934875, "learning_rate": 0.0001365727822105095, "loss": 2.7641, "step": 41833 }, { "epoch": 2.05, "grad_norm": 0.6929799318313599, "learning_rate": 0.000136559871277546, "loss": 2.8275, "step": 41834 }, { "epoch": 2.05, "grad_norm": 0.695609450340271, "learning_rate": 0.00013654696077505232, "loss": 3.0188, "step": 41835 }, { "epoch": 2.05, "grad_norm": 0.6726747751235962, "learning_rate": 0.00013653405070306252, "loss": 2.9284, "step": 41836 }, { "epoch": 2.05, "grad_norm": 0.7245111465454102, "learning_rate": 0.00013652114106161043, "loss": 2.8936, "step": 41837 }, { "epoch": 2.05, "grad_norm": 0.7520313858985901, "learning_rate": 0.00013650823185073035, "loss": 2.9826, "step": 41838 }, { "epoch": 2.05, "grad_norm": 0.6968186497688293, "learning_rate": 0.00013649532307045595, "loss": 3.0164, "step": 41839 }, { "epoch": 2.05, "grad_norm": 0.7093591094017029, "learning_rate": 0.00013648241472082143, "loss": 3.0614, "step": 41840 }, { "epoch": 2.05, "grad_norm": 0.6695505380630493, "learning_rate": 0.0001364695068018609, "loss": 2.957, "step": 41841 }, { "epoch": 2.05, "grad_norm": 0.721322238445282, "learning_rate": 0.0001364565993136082, "loss": 2.7591, "step": 41842 }, { "epoch": 2.05, "grad_norm": 0.6672532558441162, "learning_rate": 0.00013644369225609734, "loss": 3.027, "step": 41843 }, { "epoch": 2.05, "grad_norm": 0.7743553519248962, "learning_rate": 0.00013643078562936218, "loss": 2.7336, "step": 41844 }, { "epoch": 2.05, "grad_norm": 0.7321560978889465, "learning_rate": 0.00013641787943343686, "loss": 2.8884, "step": 41845 }, { "epoch": 2.05, "grad_norm": 0.7255355715751648, "learning_rate": 0.0001364049736683555, "loss": 2.8195, "step": 41846 }, { "epoch": 2.05, "grad_norm": 0.721168577671051, "learning_rate": 0.00013639206833415183, "loss": 2.7635, "step": 41847 }, { "epoch": 2.05, "grad_norm": 0.6950680613517761, "learning_rate": 0.0001363791634308601, "loss": 2.7786, "step": 41848 }, { "epoch": 2.05, "grad_norm": 0.7091295719146729, "learning_rate": 0.00013636625895851413, "loss": 3.0322, "step": 41849 }, { "epoch": 2.05, "grad_norm": 0.7242528796195984, "learning_rate": 0.00013635335491714786, "loss": 2.8183, "step": 41850 }, { "epoch": 2.05, "grad_norm": 0.7005438804626465, "learning_rate": 0.00013634045130679544, "loss": 2.9768, "step": 41851 }, { "epoch": 2.05, "grad_norm": 0.6915015578269958, "learning_rate": 0.00013632754812749066, "loss": 3.1398, "step": 41852 }, { "epoch": 2.05, "grad_norm": 0.6812850832939148, "learning_rate": 0.00013631464537926775, "loss": 3.0446, "step": 41853 }, { "epoch": 2.05, "grad_norm": 0.69903165102005, "learning_rate": 0.00013630174306216044, "loss": 2.9758, "step": 41854 }, { "epoch": 2.05, "grad_norm": 0.6556075215339661, "learning_rate": 0.00013628884117620284, "loss": 2.8251, "step": 41855 }, { "epoch": 2.05, "grad_norm": 0.9440958499908447, "learning_rate": 0.00013627593972142903, "loss": 2.9879, "step": 41856 }, { "epoch": 2.05, "grad_norm": 0.6949568390846252, "learning_rate": 0.00013626303869787292, "loss": 2.7712, "step": 41857 }, { "epoch": 2.05, "grad_norm": 0.7072689533233643, "learning_rate": 0.0001362501381055684, "loss": 2.9006, "step": 41858 }, { "epoch": 2.05, "grad_norm": 0.7018836140632629, "learning_rate": 0.00013623723794454944, "loss": 3.0257, "step": 41859 }, { "epoch": 2.05, "grad_norm": 0.6747689247131348, "learning_rate": 0.00013622433821485006, "loss": 2.9541, "step": 41860 }, { "epoch": 2.05, "grad_norm": 0.704422116279602, "learning_rate": 0.00013621143891650437, "loss": 2.9823, "step": 41861 }, { "epoch": 2.05, "grad_norm": 0.6954941749572754, "learning_rate": 0.00013619854004954612, "loss": 3.0837, "step": 41862 }, { "epoch": 2.05, "grad_norm": 0.7778325080871582, "learning_rate": 0.00013618564161400947, "loss": 2.797, "step": 41863 }, { "epoch": 2.05, "grad_norm": 0.7002326846122742, "learning_rate": 0.00013617274360992832, "loss": 2.8489, "step": 41864 }, { "epoch": 2.05, "grad_norm": 0.6892054080963135, "learning_rate": 0.00013615984603733655, "loss": 2.9662, "step": 41865 }, { "epoch": 2.05, "grad_norm": 0.6607648730278015, "learning_rate": 0.0001361469488962683, "loss": 2.9692, "step": 41866 }, { "epoch": 2.05, "grad_norm": 0.6442403197288513, "learning_rate": 0.00013613405218675735, "loss": 2.8593, "step": 41867 }, { "epoch": 2.05, "grad_norm": 0.6695894002914429, "learning_rate": 0.0001361211559088379, "loss": 2.7185, "step": 41868 }, { "epoch": 2.05, "grad_norm": 0.725757360458374, "learning_rate": 0.00013610826006254368, "loss": 2.9114, "step": 41869 }, { "epoch": 2.05, "grad_norm": 0.7685546278953552, "learning_rate": 0.00013609536464790871, "loss": 2.8982, "step": 41870 }, { "epoch": 2.05, "grad_norm": 0.6293439865112305, "learning_rate": 0.00013608246966496714, "loss": 2.8666, "step": 41871 }, { "epoch": 2.05, "grad_norm": 0.7110158205032349, "learning_rate": 0.00013606957511375285, "loss": 2.7713, "step": 41872 }, { "epoch": 2.05, "grad_norm": 0.687812864780426, "learning_rate": 0.00013605668099429967, "loss": 3.1352, "step": 41873 }, { "epoch": 2.05, "grad_norm": 0.6826140284538269, "learning_rate": 0.00013604378730664156, "loss": 2.7359, "step": 41874 }, { "epoch": 2.05, "grad_norm": 0.6955284476280212, "learning_rate": 0.0001360308940508126, "loss": 2.8839, "step": 41875 }, { "epoch": 2.05, "grad_norm": 0.744838535785675, "learning_rate": 0.00013601800122684676, "loss": 2.8754, "step": 41876 }, { "epoch": 2.05, "grad_norm": 0.7015159130096436, "learning_rate": 0.00013600510883477786, "loss": 2.9217, "step": 41877 }, { "epoch": 2.05, "grad_norm": 0.6986700296401978, "learning_rate": 0.00013599221687464005, "loss": 2.8404, "step": 41878 }, { "epoch": 2.05, "grad_norm": 0.7709964513778687, "learning_rate": 0.00013597932534646718, "loss": 2.8286, "step": 41879 }, { "epoch": 2.05, "grad_norm": 0.6718994379043579, "learning_rate": 0.0001359664342502931, "loss": 2.9081, "step": 41880 }, { "epoch": 2.05, "grad_norm": 0.7286249399185181, "learning_rate": 0.00013595354358615194, "loss": 2.9856, "step": 41881 }, { "epoch": 2.05, "grad_norm": 0.6866543889045715, "learning_rate": 0.00013594065335407752, "loss": 2.8534, "step": 41882 }, { "epoch": 2.05, "grad_norm": 0.6790507435798645, "learning_rate": 0.00013592776355410394, "loss": 2.9359, "step": 41883 }, { "epoch": 2.05, "grad_norm": 0.6834301948547363, "learning_rate": 0.0001359148741862649, "loss": 2.9917, "step": 41884 }, { "epoch": 2.05, "grad_norm": 0.6941182613372803, "learning_rate": 0.00013590198525059466, "loss": 2.926, "step": 41885 }, { "epoch": 2.05, "grad_norm": 0.7055644392967224, "learning_rate": 0.00013588909674712698, "loss": 2.7773, "step": 41886 }, { "epoch": 2.05, "grad_norm": 0.6956052780151367, "learning_rate": 0.0001358762086758957, "loss": 2.8471, "step": 41887 }, { "epoch": 2.05, "grad_norm": 0.6483541131019592, "learning_rate": 0.00013586332103693506, "loss": 3.033, "step": 41888 }, { "epoch": 2.05, "grad_norm": 0.6963681578636169, "learning_rate": 0.00013585043383027874, "loss": 3.1238, "step": 41889 }, { "epoch": 2.05, "grad_norm": 0.6419037580490112, "learning_rate": 0.00013583754705596076, "loss": 2.8311, "step": 41890 }, { "epoch": 2.05, "grad_norm": 0.6917277574539185, "learning_rate": 0.0001358246607140152, "loss": 2.8747, "step": 41891 }, { "epoch": 2.05, "grad_norm": 0.7247151732444763, "learning_rate": 0.00013581177480447576, "loss": 2.9386, "step": 41892 }, { "epoch": 2.05, "grad_norm": 0.71748948097229, "learning_rate": 0.00013579888932737665, "loss": 3.047, "step": 41893 }, { "epoch": 2.05, "grad_norm": 0.6673287749290466, "learning_rate": 0.00013578600428275166, "loss": 3.0716, "step": 41894 }, { "epoch": 2.05, "grad_norm": 0.7329942584037781, "learning_rate": 0.00013577311967063462, "loss": 2.7382, "step": 41895 }, { "epoch": 2.05, "grad_norm": 0.652654767036438, "learning_rate": 0.00013576023549105966, "loss": 3.2103, "step": 41896 }, { "epoch": 2.05, "grad_norm": 0.730176568031311, "learning_rate": 0.00013574735174406055, "loss": 2.8715, "step": 41897 }, { "epoch": 2.05, "grad_norm": 0.6587347984313965, "learning_rate": 0.0001357344684296714, "loss": 2.7973, "step": 41898 }, { "epoch": 2.05, "grad_norm": 0.6811947822570801, "learning_rate": 0.00013572158554792595, "loss": 2.9729, "step": 41899 }, { "epoch": 2.05, "grad_norm": 0.6677283644676208, "learning_rate": 0.0001357087030988583, "loss": 3.0673, "step": 41900 }, { "epoch": 2.05, "grad_norm": 0.7097472548484802, "learning_rate": 0.00013569582108250236, "loss": 2.7919, "step": 41901 }, { "epoch": 2.05, "grad_norm": 0.6857821345329285, "learning_rate": 0.00013568293949889186, "loss": 3.1361, "step": 41902 }, { "epoch": 2.05, "grad_norm": 0.6500460505485535, "learning_rate": 0.000135670058348061, "loss": 2.869, "step": 41903 }, { "epoch": 2.05, "grad_norm": 0.6371486186981201, "learning_rate": 0.00013565717763004348, "loss": 2.9958, "step": 41904 }, { "epoch": 2.05, "grad_norm": 0.6974950432777405, "learning_rate": 0.0001356442973448733, "loss": 2.9836, "step": 41905 }, { "epoch": 2.05, "grad_norm": 0.727595329284668, "learning_rate": 0.00013563141749258455, "loss": 2.9796, "step": 41906 }, { "epoch": 2.05, "grad_norm": 0.681317150592804, "learning_rate": 0.000135618538073211, "loss": 2.7705, "step": 41907 }, { "epoch": 2.05, "grad_norm": 0.6713190674781799, "learning_rate": 0.00013560565908678654, "loss": 2.934, "step": 41908 }, { "epoch": 2.05, "grad_norm": 0.698500394821167, "learning_rate": 0.00013559278053334507, "loss": 2.9041, "step": 41909 }, { "epoch": 2.05, "grad_norm": 0.6464803814888, "learning_rate": 0.0001355799024129206, "loss": 2.9814, "step": 41910 }, { "epoch": 2.05, "grad_norm": 0.7684870362281799, "learning_rate": 0.00013556702472554712, "loss": 2.9662, "step": 41911 }, { "epoch": 2.05, "grad_norm": 0.7648671269416809, "learning_rate": 0.00013555414747125832, "loss": 2.8802, "step": 41912 }, { "epoch": 2.05, "grad_norm": 0.6603399515151978, "learning_rate": 0.00013554127065008837, "loss": 3.0558, "step": 41913 }, { "epoch": 2.05, "grad_norm": 0.6440830230712891, "learning_rate": 0.00013552839426207093, "loss": 2.8296, "step": 41914 }, { "epoch": 2.05, "grad_norm": 0.6936287879943848, "learning_rate": 0.00013551551830724018, "loss": 2.9706, "step": 41915 }, { "epoch": 2.05, "grad_norm": 0.6846458911895752, "learning_rate": 0.0001355026427856299, "loss": 3.0088, "step": 41916 }, { "epoch": 2.05, "grad_norm": 0.6697417497634888, "learning_rate": 0.00013548976769727386, "loss": 2.8968, "step": 41917 }, { "epoch": 2.05, "grad_norm": 0.7135060429573059, "learning_rate": 0.00013547689304220625, "loss": 3.0002, "step": 41918 }, { "epoch": 2.05, "grad_norm": 0.69883131980896, "learning_rate": 0.0001354640188204607, "loss": 2.8384, "step": 41919 }, { "epoch": 2.05, "grad_norm": 0.663644552230835, "learning_rate": 0.00013545114503207142, "loss": 2.6999, "step": 41920 }, { "epoch": 2.05, "grad_norm": 0.6672549247741699, "learning_rate": 0.00013543827167707202, "loss": 3.0655, "step": 41921 }, { "epoch": 2.05, "grad_norm": 0.7014299035072327, "learning_rate": 0.00013542539875549664, "loss": 2.8712, "step": 41922 }, { "epoch": 2.05, "grad_norm": 0.6401073336601257, "learning_rate": 0.00013541252626737908, "loss": 2.9725, "step": 41923 }, { "epoch": 2.05, "grad_norm": 0.7074952125549316, "learning_rate": 0.00013539965421275317, "loss": 2.8557, "step": 41924 }, { "epoch": 2.05, "grad_norm": 0.6953228712081909, "learning_rate": 0.00013538678259165297, "loss": 2.674, "step": 41925 }, { "epoch": 2.05, "grad_norm": 0.6561875939369202, "learning_rate": 0.00013537391140411222, "loss": 3.0387, "step": 41926 }, { "epoch": 2.05, "grad_norm": 0.70853191614151, "learning_rate": 0.00013536104065016492, "loss": 2.6825, "step": 41927 }, { "epoch": 2.05, "grad_norm": 0.7176726460456848, "learning_rate": 0.00013534817032984504, "loss": 2.6226, "step": 41928 }, { "epoch": 2.05, "grad_norm": 0.7297140955924988, "learning_rate": 0.0001353353004431864, "loss": 2.9366, "step": 41929 }, { "epoch": 2.05, "grad_norm": 0.6884056329727173, "learning_rate": 0.00013532243099022287, "loss": 2.9259, "step": 41930 }, { "epoch": 2.05, "grad_norm": 0.7293509244918823, "learning_rate": 0.0001353095619709883, "loss": 2.9394, "step": 41931 }, { "epoch": 2.05, "grad_norm": 0.7084832191467285, "learning_rate": 0.0001352966933855166, "loss": 3.1906, "step": 41932 }, { "epoch": 2.06, "grad_norm": 0.7524933815002441, "learning_rate": 0.00013528382523384183, "loss": 2.9092, "step": 41933 }, { "epoch": 2.06, "grad_norm": 0.6961275935173035, "learning_rate": 0.00013527095751599767, "loss": 2.8531, "step": 41934 }, { "epoch": 2.06, "grad_norm": 0.6843053698539734, "learning_rate": 0.0001352580902320182, "loss": 2.7244, "step": 41935 }, { "epoch": 2.06, "grad_norm": 0.7608031034469604, "learning_rate": 0.0001352452233819371, "loss": 3.1026, "step": 41936 }, { "epoch": 2.06, "grad_norm": 0.6482999324798584, "learning_rate": 0.0001352323569657885, "loss": 2.9609, "step": 41937 }, { "epoch": 2.06, "grad_norm": 0.6896520853042603, "learning_rate": 0.00013521949098360616, "loss": 2.9405, "step": 41938 }, { "epoch": 2.06, "grad_norm": 0.7269614934921265, "learning_rate": 0.00013520662543542385, "loss": 2.7086, "step": 41939 }, { "epoch": 2.06, "grad_norm": 0.6921659111976624, "learning_rate": 0.00013519376032127567, "loss": 2.7784, "step": 41940 }, { "epoch": 2.06, "grad_norm": 0.6634232401847839, "learning_rate": 0.00013518089564119534, "loss": 3.0583, "step": 41941 }, { "epoch": 2.06, "grad_norm": 0.7075629830360413, "learning_rate": 0.00013516803139521677, "loss": 2.8788, "step": 41942 }, { "epoch": 2.06, "grad_norm": 0.7123188972473145, "learning_rate": 0.000135155167583374, "loss": 2.802, "step": 41943 }, { "epoch": 2.06, "grad_norm": 0.6718761920928955, "learning_rate": 0.00013514230420570085, "loss": 2.9926, "step": 41944 }, { "epoch": 2.06, "grad_norm": 0.6959552764892578, "learning_rate": 0.00013512944126223107, "loss": 2.9625, "step": 41945 }, { "epoch": 2.06, "grad_norm": 0.7005223035812378, "learning_rate": 0.00013511657875299853, "loss": 2.8976, "step": 41946 }, { "epoch": 2.06, "grad_norm": 0.6967735290527344, "learning_rate": 0.00013510371667803717, "loss": 2.9729, "step": 41947 }, { "epoch": 2.06, "grad_norm": 0.6776131391525269, "learning_rate": 0.00013509085503738102, "loss": 2.8577, "step": 41948 }, { "epoch": 2.06, "grad_norm": 0.6918829679489136, "learning_rate": 0.0001350779938310637, "loss": 3.0947, "step": 41949 }, { "epoch": 2.06, "grad_norm": 0.7643651366233826, "learning_rate": 0.00013506513305911933, "loss": 3.176, "step": 41950 }, { "epoch": 2.06, "grad_norm": 0.6947302222251892, "learning_rate": 0.00013505227272158163, "loss": 2.9131, "step": 41951 }, { "epoch": 2.06, "grad_norm": 0.7408885359764099, "learning_rate": 0.00013503941281848439, "loss": 2.9874, "step": 41952 }, { "epoch": 2.06, "grad_norm": 0.7771589159965515, "learning_rate": 0.00013502655334986173, "loss": 3.0083, "step": 41953 }, { "epoch": 2.06, "grad_norm": 0.6549042463302612, "learning_rate": 0.00013501369431574725, "loss": 2.8394, "step": 41954 }, { "epoch": 2.06, "grad_norm": 0.687390148639679, "learning_rate": 0.00013500083571617508, "loss": 2.7261, "step": 41955 }, { "epoch": 2.06, "grad_norm": 0.6740281581878662, "learning_rate": 0.0001349879775511788, "loss": 2.8224, "step": 41956 }, { "epoch": 2.06, "grad_norm": 0.6710662841796875, "learning_rate": 0.00013497511982079246, "loss": 2.709, "step": 41957 }, { "epoch": 2.06, "grad_norm": 0.7352544665336609, "learning_rate": 0.00013496226252504998, "loss": 3.0141, "step": 41958 }, { "epoch": 2.06, "grad_norm": 0.6972978115081787, "learning_rate": 0.0001349494056639852, "loss": 2.9088, "step": 41959 }, { "epoch": 2.06, "grad_norm": 0.6581162214279175, "learning_rate": 0.00013493654923763183, "loss": 2.8458, "step": 41960 }, { "epoch": 2.06, "grad_norm": 0.6925591230392456, "learning_rate": 0.00013492369324602375, "loss": 2.9503, "step": 41961 }, { "epoch": 2.06, "grad_norm": 0.663356602191925, "learning_rate": 0.00013491083768919494, "loss": 2.8988, "step": 41962 }, { "epoch": 2.06, "grad_norm": 0.7245420813560486, "learning_rate": 0.00013489798256717926, "loss": 2.8092, "step": 41963 }, { "epoch": 2.06, "grad_norm": 0.6705074310302734, "learning_rate": 0.00013488512788001043, "loss": 2.8578, "step": 41964 }, { "epoch": 2.06, "grad_norm": 0.6748921871185303, "learning_rate": 0.00013487227362772252, "loss": 3.1966, "step": 41965 }, { "epoch": 2.06, "grad_norm": 0.728326141834259, "learning_rate": 0.00013485941981034922, "loss": 2.8332, "step": 41966 }, { "epoch": 2.06, "grad_norm": 0.7034401297569275, "learning_rate": 0.00013484656642792436, "loss": 2.9848, "step": 41967 }, { "epoch": 2.06, "grad_norm": 0.68290776014328, "learning_rate": 0.00013483371348048197, "loss": 2.8918, "step": 41968 }, { "epoch": 2.06, "grad_norm": 0.6936345100402832, "learning_rate": 0.00013482086096805565, "loss": 2.779, "step": 41969 }, { "epoch": 2.06, "grad_norm": 0.7086421251296997, "learning_rate": 0.00013480800889067955, "loss": 2.9097, "step": 41970 }, { "epoch": 2.06, "grad_norm": 0.7316939830780029, "learning_rate": 0.00013479515724838722, "loss": 2.8459, "step": 41971 }, { "epoch": 2.06, "grad_norm": 0.6890463829040527, "learning_rate": 0.00013478230604121265, "loss": 3.0289, "step": 41972 }, { "epoch": 2.06, "grad_norm": 0.6866118907928467, "learning_rate": 0.00013476945526918978, "loss": 2.7847, "step": 41973 }, { "epoch": 2.06, "grad_norm": 0.7601082921028137, "learning_rate": 0.00013475660493235237, "loss": 2.8525, "step": 41974 }, { "epoch": 2.06, "grad_norm": 0.7042220234870911, "learning_rate": 0.00013474375503073433, "loss": 2.945, "step": 41975 }, { "epoch": 2.06, "grad_norm": 0.6664708852767944, "learning_rate": 0.00013473090556436927, "loss": 2.9917, "step": 41976 }, { "epoch": 2.06, "grad_norm": 0.6848310232162476, "learning_rate": 0.00013471805653329118, "loss": 2.8884, "step": 41977 }, { "epoch": 2.06, "grad_norm": 0.6613977551460266, "learning_rate": 0.00013470520793753407, "loss": 2.9344, "step": 41978 }, { "epoch": 2.06, "grad_norm": 0.7326992750167847, "learning_rate": 0.00013469235977713148, "loss": 2.8053, "step": 41979 }, { "epoch": 2.06, "grad_norm": 0.6662197113037109, "learning_rate": 0.00013467951205211755, "loss": 2.8492, "step": 41980 }, { "epoch": 2.06, "grad_norm": 0.6890004873275757, "learning_rate": 0.00013466666476252595, "loss": 2.9529, "step": 41981 }, { "epoch": 2.06, "grad_norm": 0.6676129102706909, "learning_rate": 0.00013465381790839042, "loss": 2.8345, "step": 41982 }, { "epoch": 2.06, "grad_norm": 0.676196813583374, "learning_rate": 0.00013464097148974504, "loss": 3.2066, "step": 41983 }, { "epoch": 2.06, "grad_norm": 0.6789147257804871, "learning_rate": 0.0001346281255066234, "loss": 3.0088, "step": 41984 }, { "epoch": 2.06, "grad_norm": 0.7471140623092651, "learning_rate": 0.00013461527995905955, "loss": 2.9146, "step": 41985 }, { "epoch": 2.06, "grad_norm": 0.6926723718643188, "learning_rate": 0.00013460243484708713, "loss": 2.9489, "step": 41986 }, { "epoch": 2.06, "grad_norm": 0.7110167145729065, "learning_rate": 0.00013458959017074017, "loss": 2.8836, "step": 41987 }, { "epoch": 2.06, "grad_norm": 0.7335017323493958, "learning_rate": 0.0001345767459300524, "loss": 2.9397, "step": 41988 }, { "epoch": 2.06, "grad_norm": 0.7071158289909363, "learning_rate": 0.0001345639021250575, "loss": 2.8792, "step": 41989 }, { "epoch": 2.06, "grad_norm": 0.7941440343856812, "learning_rate": 0.0001345510587557896, "loss": 2.9275, "step": 41990 }, { "epoch": 2.06, "grad_norm": 0.6561496257781982, "learning_rate": 0.00013453821582228224, "loss": 2.9598, "step": 41991 }, { "epoch": 2.06, "grad_norm": 0.7104381918907166, "learning_rate": 0.0001345253733245694, "loss": 2.897, "step": 41992 }, { "epoch": 2.06, "grad_norm": 0.702156662940979, "learning_rate": 0.00013451253126268496, "loss": 2.8701, "step": 41993 }, { "epoch": 2.06, "grad_norm": 0.7097292542457581, "learning_rate": 0.00013449968963666253, "loss": 2.7465, "step": 41994 }, { "epoch": 2.06, "grad_norm": 0.6726009249687195, "learning_rate": 0.00013448684844653624, "loss": 2.6797, "step": 41995 }, { "epoch": 2.06, "grad_norm": 0.6798008680343628, "learning_rate": 0.0001344740076923397, "loss": 2.8157, "step": 41996 }, { "epoch": 2.06, "grad_norm": 0.7259616255760193, "learning_rate": 0.00013446116737410676, "loss": 2.9134, "step": 41997 }, { "epoch": 2.06, "grad_norm": 0.7127224802970886, "learning_rate": 0.00013444832749187114, "loss": 3.0528, "step": 41998 }, { "epoch": 2.06, "grad_norm": 0.6722402572631836, "learning_rate": 0.00013443548804566678, "loss": 2.7478, "step": 41999 }, { "epoch": 2.06, "grad_norm": 0.7285507321357727, "learning_rate": 0.0001344226490355276, "loss": 2.9906, "step": 42000 }, { "epoch": 2.06, "grad_norm": 0.6551629304885864, "learning_rate": 0.00013440981046148717, "loss": 2.8337, "step": 42001 }, { "epoch": 2.06, "grad_norm": 0.678065836429596, "learning_rate": 0.00013439697232357954, "loss": 2.8061, "step": 42002 }, { "epoch": 2.06, "grad_norm": 0.7075507044792175, "learning_rate": 0.00013438413462183842, "loss": 2.7313, "step": 42003 }, { "epoch": 2.06, "grad_norm": 0.658933699131012, "learning_rate": 0.00013437129735629748, "loss": 2.8594, "step": 42004 }, { "epoch": 2.06, "grad_norm": 0.6496484279632568, "learning_rate": 0.0001343584605269908, "loss": 3.0021, "step": 42005 }, { "epoch": 2.06, "grad_norm": 0.6887893080711365, "learning_rate": 0.00013434562413395195, "loss": 2.803, "step": 42006 }, { "epoch": 2.06, "grad_norm": 0.6488357782363892, "learning_rate": 0.00013433278817721496, "loss": 2.9725, "step": 42007 }, { "epoch": 2.06, "grad_norm": 0.7380465865135193, "learning_rate": 0.00013431995265681337, "loss": 3.0395, "step": 42008 }, { "epoch": 2.06, "grad_norm": 0.703506350517273, "learning_rate": 0.0001343071175727813, "loss": 3.0208, "step": 42009 }, { "epoch": 2.06, "grad_norm": 0.7320290803909302, "learning_rate": 0.00013429428292515238, "loss": 2.8408, "step": 42010 }, { "epoch": 2.06, "grad_norm": 0.6865428686141968, "learning_rate": 0.0001342814487139603, "loss": 3.1323, "step": 42011 }, { "epoch": 2.06, "grad_norm": 0.6920920014381409, "learning_rate": 0.00013426861493923915, "loss": 2.8396, "step": 42012 }, { "epoch": 2.06, "grad_norm": 0.7023778557777405, "learning_rate": 0.00013425578160102238, "loss": 2.9399, "step": 42013 }, { "epoch": 2.06, "grad_norm": 0.648321270942688, "learning_rate": 0.00013424294869934407, "loss": 3.1877, "step": 42014 }, { "epoch": 2.06, "grad_norm": 0.6870456337928772, "learning_rate": 0.000134230116234238, "loss": 3.2613, "step": 42015 }, { "epoch": 2.06, "grad_norm": 0.7093237638473511, "learning_rate": 0.00013421728420573778, "loss": 2.8811, "step": 42016 }, { "epoch": 2.06, "grad_norm": 0.6749858260154724, "learning_rate": 0.00013420445261387747, "loss": 2.7412, "step": 42017 }, { "epoch": 2.06, "grad_norm": 0.7406046986579895, "learning_rate": 0.0001341916214586907, "loss": 2.8874, "step": 42018 }, { "epoch": 2.06, "grad_norm": 0.6919505596160889, "learning_rate": 0.00013417879074021117, "loss": 2.9656, "step": 42019 }, { "epoch": 2.06, "grad_norm": 0.7033610939979553, "learning_rate": 0.00013416596045847293, "loss": 3.0286, "step": 42020 }, { "epoch": 2.06, "grad_norm": 0.6647442579269409, "learning_rate": 0.00013415313061350948, "loss": 2.861, "step": 42021 }, { "epoch": 2.06, "grad_norm": 0.6930309534072876, "learning_rate": 0.00013414030120535491, "loss": 3.1232, "step": 42022 }, { "epoch": 2.06, "grad_norm": 0.6791087985038757, "learning_rate": 0.00013412747223404275, "loss": 2.9181, "step": 42023 }, { "epoch": 2.06, "grad_norm": 0.6947936415672302, "learning_rate": 0.00013411464369960706, "loss": 2.7641, "step": 42024 }, { "epoch": 2.06, "grad_norm": 0.7209773659706116, "learning_rate": 0.00013410181560208142, "loss": 2.9464, "step": 42025 }, { "epoch": 2.06, "grad_norm": 0.7256671190261841, "learning_rate": 0.00013408898794149956, "loss": 2.8287, "step": 42026 }, { "epoch": 2.06, "grad_norm": 0.67732834815979, "learning_rate": 0.00013407616071789552, "loss": 3.0294, "step": 42027 }, { "epoch": 2.06, "grad_norm": 0.6837236285209656, "learning_rate": 0.00013406333393130277, "loss": 2.7781, "step": 42028 }, { "epoch": 2.06, "grad_norm": 0.6771765351295471, "learning_rate": 0.0001340505075817553, "loss": 2.9683, "step": 42029 }, { "epoch": 2.06, "grad_norm": 0.685924768447876, "learning_rate": 0.00013403768166928698, "loss": 2.9958, "step": 42030 }, { "epoch": 2.06, "grad_norm": 0.6353333592414856, "learning_rate": 0.00013402485619393146, "loss": 2.9384, "step": 42031 }, { "epoch": 2.06, "grad_norm": 0.697657585144043, "learning_rate": 0.00013401203115572254, "loss": 2.921, "step": 42032 }, { "epoch": 2.06, "grad_norm": 0.6873392462730408, "learning_rate": 0.00013399920655469383, "loss": 2.7485, "step": 42033 }, { "epoch": 2.06, "grad_norm": 0.6944345235824585, "learning_rate": 0.0001339863823908793, "loss": 2.8502, "step": 42034 }, { "epoch": 2.06, "grad_norm": 0.6621580719947815, "learning_rate": 0.0001339735586643128, "loss": 2.8554, "step": 42035 }, { "epoch": 2.06, "grad_norm": 0.6833303570747375, "learning_rate": 0.00013396073537502782, "loss": 3.2196, "step": 42036 }, { "epoch": 2.06, "grad_norm": 0.6794368028640747, "learning_rate": 0.00013394791252305848, "loss": 2.8268, "step": 42037 }, { "epoch": 2.06, "grad_norm": 0.7149304747581482, "learning_rate": 0.00013393509010843827, "loss": 3.1625, "step": 42038 }, { "epoch": 2.06, "grad_norm": 0.6875823736190796, "learning_rate": 0.00013392226813120118, "loss": 2.8485, "step": 42039 }, { "epoch": 2.06, "grad_norm": 0.7069306969642639, "learning_rate": 0.00013390944659138085, "loss": 3.0752, "step": 42040 }, { "epoch": 2.06, "grad_norm": 0.6851629614830017, "learning_rate": 0.000133896625489011, "loss": 3.107, "step": 42041 }, { "epoch": 2.06, "grad_norm": 0.6919379234313965, "learning_rate": 0.00013388380482412554, "loss": 3.0562, "step": 42042 }, { "epoch": 2.06, "grad_norm": 0.724163830280304, "learning_rate": 0.0001338709845967581, "loss": 3.1261, "step": 42043 }, { "epoch": 2.06, "grad_norm": 0.6805170178413391, "learning_rate": 0.00013385816480694247, "loss": 2.8018, "step": 42044 }, { "epoch": 2.06, "grad_norm": 0.660524845123291, "learning_rate": 0.0001338453454547126, "loss": 2.8098, "step": 42045 }, { "epoch": 2.06, "grad_norm": 0.6695048809051514, "learning_rate": 0.00013383252654010212, "loss": 3.0506, "step": 42046 }, { "epoch": 2.06, "grad_norm": 0.6940860748291016, "learning_rate": 0.00013381970806314474, "loss": 2.9447, "step": 42047 }, { "epoch": 2.06, "grad_norm": 0.6937212944030762, "learning_rate": 0.0001338068900238742, "loss": 2.954, "step": 42048 }, { "epoch": 2.06, "grad_norm": 0.6832905411720276, "learning_rate": 0.0001337940724223243, "loss": 2.8383, "step": 42049 }, { "epoch": 2.06, "grad_norm": 0.6810662746429443, "learning_rate": 0.00013378125525852897, "loss": 2.6696, "step": 42050 }, { "epoch": 2.06, "grad_norm": 0.6970690488815308, "learning_rate": 0.0001337684385325217, "loss": 3.0258, "step": 42051 }, { "epoch": 2.06, "grad_norm": 0.6836695671081543, "learning_rate": 0.00013375562224433646, "loss": 3.0219, "step": 42052 }, { "epoch": 2.06, "grad_norm": 0.6802341341972351, "learning_rate": 0.00013374280639400694, "loss": 3.1268, "step": 42053 }, { "epoch": 2.06, "grad_norm": 0.6934910416603088, "learning_rate": 0.00013372999098156674, "loss": 2.9171, "step": 42054 }, { "epoch": 2.06, "grad_norm": 0.6853800415992737, "learning_rate": 0.00013371717600704986, "loss": 2.9585, "step": 42055 }, { "epoch": 2.06, "grad_norm": 0.9536087512969971, "learning_rate": 0.00013370436147048986, "loss": 2.9071, "step": 42056 }, { "epoch": 2.06, "grad_norm": 0.6992372274398804, "learning_rate": 0.00013369154737192065, "loss": 2.9655, "step": 42057 }, { "epoch": 2.06, "grad_norm": 0.6866870522499084, "learning_rate": 0.00013367873371137576, "loss": 2.9159, "step": 42058 }, { "epoch": 2.06, "grad_norm": 0.6795086860656738, "learning_rate": 0.0001336659204888891, "loss": 2.8051, "step": 42059 }, { "epoch": 2.06, "grad_norm": 0.6785724759101868, "learning_rate": 0.00013365310770449454, "loss": 3.0133, "step": 42060 }, { "epoch": 2.06, "grad_norm": 0.6523509621620178, "learning_rate": 0.00013364029535822564, "loss": 2.8145, "step": 42061 }, { "epoch": 2.06, "grad_norm": 0.7120859026908875, "learning_rate": 0.0001336274834501162, "loss": 3.0528, "step": 42062 }, { "epoch": 2.06, "grad_norm": 0.6999312043190002, "learning_rate": 0.00013361467198019984, "loss": 2.9157, "step": 42063 }, { "epoch": 2.06, "grad_norm": 0.6902523040771484, "learning_rate": 0.00013360186094851038, "loss": 2.6057, "step": 42064 }, { "epoch": 2.06, "grad_norm": 0.6437488198280334, "learning_rate": 0.00013358905035508172, "loss": 2.9131, "step": 42065 }, { "epoch": 2.06, "grad_norm": 0.6648966670036316, "learning_rate": 0.0001335762401999474, "loss": 2.6556, "step": 42066 }, { "epoch": 2.06, "grad_norm": 0.7151343822479248, "learning_rate": 0.00013356343048314133, "loss": 2.9348, "step": 42067 }, { "epoch": 2.06, "grad_norm": 0.6936889290809631, "learning_rate": 0.0001335506212046971, "loss": 2.8895, "step": 42068 }, { "epoch": 2.06, "grad_norm": 0.6722537279129028, "learning_rate": 0.00013353781236464846, "loss": 3.094, "step": 42069 }, { "epoch": 2.06, "grad_norm": 0.6585366725921631, "learning_rate": 0.00013352500396302927, "loss": 2.9741, "step": 42070 }, { "epoch": 2.06, "grad_norm": 0.7267591953277588, "learning_rate": 0.0001335121959998731, "loss": 2.9534, "step": 42071 }, { "epoch": 2.06, "grad_norm": 0.6568361520767212, "learning_rate": 0.00013349938847521383, "loss": 2.7674, "step": 42072 }, { "epoch": 2.06, "grad_norm": 0.700718343257904, "learning_rate": 0.00013348658138908503, "loss": 2.9192, "step": 42073 }, { "epoch": 2.06, "grad_norm": 0.7025381326675415, "learning_rate": 0.00013347377474152064, "loss": 3.0146, "step": 42074 }, { "epoch": 2.06, "grad_norm": 0.7157812714576721, "learning_rate": 0.00013346096853255427, "loss": 2.8323, "step": 42075 }, { "epoch": 2.06, "grad_norm": 0.6481377482414246, "learning_rate": 0.00013344816276221957, "loss": 3.0237, "step": 42076 }, { "epoch": 2.06, "grad_norm": 0.6517822742462158, "learning_rate": 0.00013343535743055043, "loss": 2.7866, "step": 42077 }, { "epoch": 2.06, "grad_norm": 0.6869397759437561, "learning_rate": 0.00013342255253758042, "loss": 2.9609, "step": 42078 }, { "epoch": 2.06, "grad_norm": 0.6711219549179077, "learning_rate": 0.00013340974808334345, "loss": 3.0348, "step": 42079 }, { "epoch": 2.06, "grad_norm": 0.7152694463729858, "learning_rate": 0.00013339694406787304, "loss": 3.1945, "step": 42080 }, { "epoch": 2.06, "grad_norm": 0.7110965251922607, "learning_rate": 0.00013338414049120302, "loss": 2.8687, "step": 42081 }, { "epoch": 2.06, "grad_norm": 0.6851297616958618, "learning_rate": 0.0001333713373533672, "loss": 3.0068, "step": 42082 }, { "epoch": 2.06, "grad_norm": 0.6575678586959839, "learning_rate": 0.00013335853465439924, "loss": 2.7863, "step": 42083 }, { "epoch": 2.06, "grad_norm": 0.6966585516929626, "learning_rate": 0.0001333457323943328, "loss": 2.9452, "step": 42084 }, { "epoch": 2.06, "grad_norm": 0.6733703017234802, "learning_rate": 0.0001333329305732015, "loss": 3.0046, "step": 42085 }, { "epoch": 2.06, "grad_norm": 0.7381994128227234, "learning_rate": 0.00013332012919103922, "loss": 2.8515, "step": 42086 }, { "epoch": 2.06, "grad_norm": 0.6656725406646729, "learning_rate": 0.00013330732824787974, "loss": 2.9327, "step": 42087 }, { "epoch": 2.06, "grad_norm": 0.6955260634422302, "learning_rate": 0.00013329452774375657, "loss": 3.0594, "step": 42088 }, { "epoch": 2.06, "grad_norm": 0.6607492566108704, "learning_rate": 0.00013328172767870363, "loss": 2.8572, "step": 42089 }, { "epoch": 2.06, "grad_norm": 0.6896944046020508, "learning_rate": 0.00013326892805275457, "loss": 2.8783, "step": 42090 }, { "epoch": 2.06, "grad_norm": 0.7322929501533508, "learning_rate": 0.00013325612886594293, "loss": 2.9448, "step": 42091 }, { "epoch": 2.06, "grad_norm": 0.6637055277824402, "learning_rate": 0.00013324333011830266, "loss": 2.9994, "step": 42092 }, { "epoch": 2.06, "grad_norm": 0.66081303358078, "learning_rate": 0.00013323053180986724, "loss": 2.9542, "step": 42093 }, { "epoch": 2.06, "grad_norm": 0.7156060934066772, "learning_rate": 0.00013321773394067064, "loss": 2.8346, "step": 42094 }, { "epoch": 2.06, "grad_norm": 0.7147414088249207, "learning_rate": 0.0001332049365107463, "loss": 2.9021, "step": 42095 }, { "epoch": 2.06, "grad_norm": 0.6858595013618469, "learning_rate": 0.0001331921395201281, "loss": 2.8172, "step": 42096 }, { "epoch": 2.06, "grad_norm": 0.6633896231651306, "learning_rate": 0.00013317934296884978, "loss": 2.9635, "step": 42097 }, { "epoch": 2.06, "grad_norm": 0.6843210458755493, "learning_rate": 0.00013316654685694497, "loss": 3.0135, "step": 42098 }, { "epoch": 2.06, "grad_norm": 0.7373406887054443, "learning_rate": 0.00013315375118444733, "loss": 2.8179, "step": 42099 }, { "epoch": 2.06, "grad_norm": 0.6996890306472778, "learning_rate": 0.00013314095595139057, "loss": 2.9772, "step": 42100 }, { "epoch": 2.06, "grad_norm": 0.6776704788208008, "learning_rate": 0.00013312816115780836, "loss": 2.874, "step": 42101 }, { "epoch": 2.06, "grad_norm": 0.7402383089065552, "learning_rate": 0.00013311536680373458, "loss": 3.0208, "step": 42102 }, { "epoch": 2.06, "grad_norm": 0.6806477308273315, "learning_rate": 0.0001331025728892027, "loss": 2.8178, "step": 42103 }, { "epoch": 2.06, "grad_norm": 0.7480725049972534, "learning_rate": 0.00013308977941424663, "loss": 2.759, "step": 42104 }, { "epoch": 2.06, "grad_norm": 0.6960058808326721, "learning_rate": 0.00013307698637889996, "loss": 3.0031, "step": 42105 }, { "epoch": 2.06, "grad_norm": 0.7045592665672302, "learning_rate": 0.00013306419378319626, "loss": 2.8392, "step": 42106 }, { "epoch": 2.06, "grad_norm": 0.6933001279830933, "learning_rate": 0.00013305140162716948, "loss": 2.8758, "step": 42107 }, { "epoch": 2.06, "grad_norm": 0.6979026794433594, "learning_rate": 0.00013303860991085306, "loss": 2.8514, "step": 42108 }, { "epoch": 2.06, "grad_norm": 0.6984500885009766, "learning_rate": 0.00013302581863428094, "loss": 2.853, "step": 42109 }, { "epoch": 2.06, "grad_norm": 0.7042974829673767, "learning_rate": 0.00013301302779748653, "loss": 2.9368, "step": 42110 }, { "epoch": 2.06, "grad_norm": 0.6589822173118591, "learning_rate": 0.00013300023740050378, "loss": 2.8031, "step": 42111 }, { "epoch": 2.06, "grad_norm": 0.6807518005371094, "learning_rate": 0.0001329874474433663, "loss": 3.1159, "step": 42112 }, { "epoch": 2.06, "grad_norm": 0.7369489669799805, "learning_rate": 0.00013297465792610761, "loss": 3.026, "step": 42113 }, { "epoch": 2.06, "grad_norm": 0.6795954704284668, "learning_rate": 0.00013296186884876164, "loss": 2.8004, "step": 42114 }, { "epoch": 2.06, "grad_norm": 0.6816520690917969, "learning_rate": 0.00013294908021136185, "loss": 2.9879, "step": 42115 }, { "epoch": 2.06, "grad_norm": 0.6890973448753357, "learning_rate": 0.00013293629201394203, "loss": 2.9887, "step": 42116 }, { "epoch": 2.06, "grad_norm": 0.6854380369186401, "learning_rate": 0.00013292350425653596, "loss": 3.0083, "step": 42117 }, { "epoch": 2.06, "grad_norm": 0.7107696533203125, "learning_rate": 0.00013291071693917713, "loss": 2.912, "step": 42118 }, { "epoch": 2.06, "grad_norm": 0.6896004676818848, "learning_rate": 0.00013289793006189942, "loss": 2.9846, "step": 42119 }, { "epoch": 2.06, "grad_norm": 0.7244950532913208, "learning_rate": 0.0001328851436247364, "loss": 2.9464, "step": 42120 }, { "epoch": 2.06, "grad_norm": 0.6931073069572449, "learning_rate": 0.00013287235762772165, "loss": 2.7884, "step": 42121 }, { "epoch": 2.06, "grad_norm": 0.6745514869689941, "learning_rate": 0.00013285957207088907, "loss": 2.7589, "step": 42122 }, { "epoch": 2.06, "grad_norm": 0.737249493598938, "learning_rate": 0.00013284678695427206, "loss": 2.9354, "step": 42123 }, { "epoch": 2.06, "grad_norm": 0.6523367762565613, "learning_rate": 0.0001328340022779046, "loss": 2.7846, "step": 42124 }, { "epoch": 2.06, "grad_norm": 0.6890686750411987, "learning_rate": 0.0001328212180418201, "loss": 3.1741, "step": 42125 }, { "epoch": 2.06, "grad_norm": 0.7183402180671692, "learning_rate": 0.00013280843424605239, "loss": 2.9694, "step": 42126 }, { "epoch": 2.06, "grad_norm": 0.6588950157165527, "learning_rate": 0.0001327956508906351, "loss": 2.9488, "step": 42127 }, { "epoch": 2.06, "grad_norm": 0.6817463040351868, "learning_rate": 0.00013278286797560184, "loss": 2.8579, "step": 42128 }, { "epoch": 2.06, "grad_norm": 0.6694334745407104, "learning_rate": 0.00013277008550098638, "loss": 2.9406, "step": 42129 }, { "epoch": 2.06, "grad_norm": 0.6861422657966614, "learning_rate": 0.0001327573034668222, "loss": 3.0737, "step": 42130 }, { "epoch": 2.06, "grad_norm": 0.6940646171569824, "learning_rate": 0.00013274452187314316, "loss": 2.8372, "step": 42131 }, { "epoch": 2.06, "grad_norm": 0.6737503409385681, "learning_rate": 0.0001327317407199829, "loss": 3.0897, "step": 42132 }, { "epoch": 2.06, "grad_norm": 0.692583441734314, "learning_rate": 0.00013271896000737512, "loss": 2.9895, "step": 42133 }, { "epoch": 2.06, "grad_norm": 0.7276341915130615, "learning_rate": 0.00013270617973535332, "loss": 2.7632, "step": 42134 }, { "epoch": 2.06, "grad_norm": 0.6521527767181396, "learning_rate": 0.0001326933999039512, "loss": 2.7446, "step": 42135 }, { "epoch": 2.06, "grad_norm": 0.7015560865402222, "learning_rate": 0.00013268062051320246, "loss": 3.0649, "step": 42136 }, { "epoch": 2.07, "grad_norm": 0.6922124028205872, "learning_rate": 0.00013266784156314087, "loss": 2.8697, "step": 42137 }, { "epoch": 2.07, "grad_norm": 0.7114979028701782, "learning_rate": 0.00013265506305379983, "loss": 2.9049, "step": 42138 }, { "epoch": 2.07, "grad_norm": 0.6223366260528564, "learning_rate": 0.0001326422849852133, "loss": 2.7975, "step": 42139 }, { "epoch": 2.07, "grad_norm": 0.7140401005744934, "learning_rate": 0.00013262950735741467, "loss": 3.1222, "step": 42140 }, { "epoch": 2.07, "grad_norm": 0.6884499192237854, "learning_rate": 0.00013261673017043782, "loss": 2.8993, "step": 42141 }, { "epoch": 2.07, "grad_norm": 0.6920923590660095, "learning_rate": 0.00013260395342431627, "loss": 2.9722, "step": 42142 }, { "epoch": 2.07, "grad_norm": 0.662397563457489, "learning_rate": 0.00013259117711908358, "loss": 3.0986, "step": 42143 }, { "epoch": 2.07, "grad_norm": 0.6610293388366699, "learning_rate": 0.00013257840125477363, "loss": 3.0419, "step": 42144 }, { "epoch": 2.07, "grad_norm": 0.7399603724479675, "learning_rate": 0.00013256562583141985, "loss": 2.8981, "step": 42145 }, { "epoch": 2.07, "grad_norm": 0.681763768196106, "learning_rate": 0.00013255285084905598, "loss": 2.756, "step": 42146 }, { "epoch": 2.07, "grad_norm": 0.7297256588935852, "learning_rate": 0.0001325400763077158, "loss": 3.0475, "step": 42147 }, { "epoch": 2.07, "grad_norm": 0.6886538863182068, "learning_rate": 0.0001325273022074328, "loss": 3.1051, "step": 42148 }, { "epoch": 2.07, "grad_norm": 0.7075888514518738, "learning_rate": 0.00013251452854824068, "loss": 3.0244, "step": 42149 }, { "epoch": 2.07, "grad_norm": 0.6723986864089966, "learning_rate": 0.00013250175533017293, "loss": 3.032, "step": 42150 }, { "epoch": 2.07, "grad_norm": 0.6775681972503662, "learning_rate": 0.00013248898255326346, "loss": 2.9657, "step": 42151 }, { "epoch": 2.07, "grad_norm": 0.6784015893936157, "learning_rate": 0.00013247621021754562, "loss": 2.8706, "step": 42152 }, { "epoch": 2.07, "grad_norm": 0.6836172342300415, "learning_rate": 0.00013246343832305324, "loss": 2.9455, "step": 42153 }, { "epoch": 2.07, "grad_norm": 0.7069783806800842, "learning_rate": 0.00013245066686982, "loss": 2.92, "step": 42154 }, { "epoch": 2.07, "grad_norm": 0.6457093358039856, "learning_rate": 0.00013243789585787948, "loss": 2.9114, "step": 42155 }, { "epoch": 2.07, "grad_norm": 0.7186350226402283, "learning_rate": 0.0001324251252872653, "loss": 2.7841, "step": 42156 }, { "epoch": 2.07, "grad_norm": 0.672428548336029, "learning_rate": 0.00013241235515801093, "loss": 3.0373, "step": 42157 }, { "epoch": 2.07, "grad_norm": 0.7136212587356567, "learning_rate": 0.0001323995854701502, "loss": 2.8389, "step": 42158 }, { "epoch": 2.07, "grad_norm": 0.6720640659332275, "learning_rate": 0.0001323868162237168, "loss": 2.7359, "step": 42159 }, { "epoch": 2.07, "grad_norm": 0.6694479584693909, "learning_rate": 0.0001323740474187442, "loss": 2.7698, "step": 42160 }, { "epoch": 2.07, "grad_norm": 0.6907969117164612, "learning_rate": 0.0001323612790552662, "loss": 2.7868, "step": 42161 }, { "epoch": 2.07, "grad_norm": 0.6790691018104553, "learning_rate": 0.0001323485111333162, "loss": 3.055, "step": 42162 }, { "epoch": 2.07, "grad_norm": 0.7087593674659729, "learning_rate": 0.00013233574365292803, "loss": 2.9253, "step": 42163 }, { "epoch": 2.07, "grad_norm": 0.6997819542884827, "learning_rate": 0.0001323229766141353, "loss": 2.972, "step": 42164 }, { "epoch": 2.07, "grad_norm": 0.7132028341293335, "learning_rate": 0.00013231021001697146, "loss": 2.8296, "step": 42165 }, { "epoch": 2.07, "grad_norm": 0.6946109533309937, "learning_rate": 0.00013229744386147036, "loss": 2.8329, "step": 42166 }, { "epoch": 2.07, "grad_norm": 0.7032156586647034, "learning_rate": 0.0001322846781476654, "loss": 2.7633, "step": 42167 }, { "epoch": 2.07, "grad_norm": 0.6949759721755981, "learning_rate": 0.0001322719128755903, "loss": 3.0027, "step": 42168 }, { "epoch": 2.07, "grad_norm": 0.6636202335357666, "learning_rate": 0.00013225914804527883, "loss": 2.9693, "step": 42169 }, { "epoch": 2.07, "grad_norm": 0.6825640797615051, "learning_rate": 0.00013224638365676451, "loss": 3.0983, "step": 42170 }, { "epoch": 2.07, "grad_norm": 0.669562816619873, "learning_rate": 0.0001322336197100809, "loss": 2.9744, "step": 42171 }, { "epoch": 2.07, "grad_norm": 0.6761650443077087, "learning_rate": 0.00013222085620526153, "loss": 2.8963, "step": 42172 }, { "epoch": 2.07, "grad_norm": 0.7180191278457642, "learning_rate": 0.00013220809314234017, "loss": 2.925, "step": 42173 }, { "epoch": 2.07, "grad_norm": 0.6580066084861755, "learning_rate": 0.00013219533052135051, "loss": 2.7424, "step": 42174 }, { "epoch": 2.07, "grad_norm": 0.7591055035591125, "learning_rate": 0.0001321825683423259, "loss": 2.7706, "step": 42175 }, { "epoch": 2.07, "grad_norm": 0.6858645677566528, "learning_rate": 0.00013216980660530025, "loss": 2.8627, "step": 42176 }, { "epoch": 2.07, "grad_norm": 0.7282005548477173, "learning_rate": 0.000132157045310307, "loss": 2.7958, "step": 42177 }, { "epoch": 2.07, "grad_norm": 0.6546103954315186, "learning_rate": 0.00013214428445737974, "loss": 2.7244, "step": 42178 }, { "epoch": 2.07, "grad_norm": 0.7295477986335754, "learning_rate": 0.00013213152404655218, "loss": 2.9721, "step": 42179 }, { "epoch": 2.07, "grad_norm": 0.6619624495506287, "learning_rate": 0.00013211876407785779, "loss": 2.9897, "step": 42180 }, { "epoch": 2.07, "grad_norm": 0.7149553298950195, "learning_rate": 0.00013210600455133037, "loss": 2.7235, "step": 42181 }, { "epoch": 2.07, "grad_norm": 0.6860978603363037, "learning_rate": 0.00013209324546700334, "loss": 3.0681, "step": 42182 }, { "epoch": 2.07, "grad_norm": 0.6987245678901672, "learning_rate": 0.00013208048682491038, "loss": 2.7269, "step": 42183 }, { "epoch": 2.07, "grad_norm": 0.6757438778877258, "learning_rate": 0.0001320677286250852, "loss": 2.8652, "step": 42184 }, { "epoch": 2.07, "grad_norm": 0.6813808679580688, "learning_rate": 0.0001320549708675613, "loss": 2.8605, "step": 42185 }, { "epoch": 2.07, "grad_norm": 0.6611071825027466, "learning_rate": 0.0001320422135523723, "loss": 2.7863, "step": 42186 }, { "epoch": 2.07, "grad_norm": 0.6753425598144531, "learning_rate": 0.00013202945667955163, "loss": 2.9375, "step": 42187 }, { "epoch": 2.07, "grad_norm": 0.6801412105560303, "learning_rate": 0.00013201670024913308, "loss": 2.9191, "step": 42188 }, { "epoch": 2.07, "grad_norm": 0.6811955571174622, "learning_rate": 0.00013200394426115034, "loss": 2.8201, "step": 42189 }, { "epoch": 2.07, "grad_norm": 0.6670415997505188, "learning_rate": 0.00013199118871563674, "loss": 2.7303, "step": 42190 }, { "epoch": 2.07, "grad_norm": 0.7115445733070374, "learning_rate": 0.00013197843361262613, "loss": 2.8177, "step": 42191 }, { "epoch": 2.07, "grad_norm": 0.6806511878967285, "learning_rate": 0.00013196567895215202, "loss": 3.0169, "step": 42192 }, { "epoch": 2.07, "grad_norm": 0.6739731431007385, "learning_rate": 0.00013195292473424782, "loss": 2.9967, "step": 42193 }, { "epoch": 2.07, "grad_norm": 0.6881250143051147, "learning_rate": 0.0001319401709589474, "loss": 2.8704, "step": 42194 }, { "epoch": 2.07, "grad_norm": 0.6715559363365173, "learning_rate": 0.0001319274176262841, "loss": 2.9177, "step": 42195 }, { "epoch": 2.07, "grad_norm": 0.6683921813964844, "learning_rate": 0.0001319146647362918, "loss": 2.8413, "step": 42196 }, { "epoch": 2.07, "grad_norm": 0.7129725813865662, "learning_rate": 0.00013190191228900378, "loss": 2.8465, "step": 42197 }, { "epoch": 2.07, "grad_norm": 0.6918213963508606, "learning_rate": 0.00013188916028445376, "loss": 2.9743, "step": 42198 }, { "epoch": 2.07, "grad_norm": 0.7041875720024109, "learning_rate": 0.00013187640872267545, "loss": 2.9971, "step": 42199 }, { "epoch": 2.07, "grad_norm": 0.7519069314002991, "learning_rate": 0.00013186365760370233, "loss": 2.8209, "step": 42200 }, { "epoch": 2.07, "grad_norm": 0.7009527087211609, "learning_rate": 0.00013185090692756796, "loss": 2.8803, "step": 42201 }, { "epoch": 2.07, "grad_norm": 0.7051267623901367, "learning_rate": 0.00013183815669430587, "loss": 2.7405, "step": 42202 }, { "epoch": 2.07, "grad_norm": 0.7135884761810303, "learning_rate": 0.00013182540690394968, "loss": 2.9261, "step": 42203 }, { "epoch": 2.07, "grad_norm": 0.6404792070388794, "learning_rate": 0.00013181265755653313, "loss": 2.755, "step": 42204 }, { "epoch": 2.07, "grad_norm": 0.7060980200767517, "learning_rate": 0.00013179990865208957, "loss": 2.8539, "step": 42205 }, { "epoch": 2.07, "grad_norm": 0.6715150475502014, "learning_rate": 0.00013178716019065275, "loss": 2.8695, "step": 42206 }, { "epoch": 2.07, "grad_norm": 0.7175034880638123, "learning_rate": 0.0001317744121722562, "loss": 2.8646, "step": 42207 }, { "epoch": 2.07, "grad_norm": 0.669849157333374, "learning_rate": 0.00013176166459693338, "loss": 2.9067, "step": 42208 }, { "epoch": 2.07, "grad_norm": 0.6535002589225769, "learning_rate": 0.00013174891746471805, "loss": 2.9545, "step": 42209 }, { "epoch": 2.07, "grad_norm": 0.6995859146118164, "learning_rate": 0.00013173617077564357, "loss": 2.938, "step": 42210 }, { "epoch": 2.07, "grad_norm": 0.6866672039031982, "learning_rate": 0.00013172342452974378, "loss": 2.8747, "step": 42211 }, { "epoch": 2.07, "grad_norm": 0.7818604111671448, "learning_rate": 0.00013171067872705196, "loss": 3.0297, "step": 42212 }, { "epoch": 2.07, "grad_norm": 0.6974015831947327, "learning_rate": 0.00013169793336760194, "loss": 2.9315, "step": 42213 }, { "epoch": 2.07, "grad_norm": 0.683866560459137, "learning_rate": 0.00013168518845142716, "loss": 2.9169, "step": 42214 }, { "epoch": 2.07, "grad_norm": 0.6737753748893738, "learning_rate": 0.00013167244397856113, "loss": 3.0056, "step": 42215 }, { "epoch": 2.07, "grad_norm": 0.7229675054550171, "learning_rate": 0.00013165969994903757, "loss": 3.1931, "step": 42216 }, { "epoch": 2.07, "grad_norm": 0.7247565388679504, "learning_rate": 0.00013164695636288986, "loss": 2.9204, "step": 42217 }, { "epoch": 2.07, "grad_norm": 0.7432151436805725, "learning_rate": 0.00013163421322015168, "loss": 3.1582, "step": 42218 }, { "epoch": 2.07, "grad_norm": 0.6654666662216187, "learning_rate": 0.00013162147052085666, "loss": 3.0593, "step": 42219 }, { "epoch": 2.07, "grad_norm": 0.6909993886947632, "learning_rate": 0.00013160872826503816, "loss": 2.7321, "step": 42220 }, { "epoch": 2.07, "grad_norm": 0.7081809043884277, "learning_rate": 0.00013159598645273002, "loss": 2.8623, "step": 42221 }, { "epoch": 2.07, "grad_norm": 0.6888182163238525, "learning_rate": 0.00013158324508396563, "loss": 2.9768, "step": 42222 }, { "epoch": 2.07, "grad_norm": 0.6894894242286682, "learning_rate": 0.00013157050415877843, "loss": 3.0968, "step": 42223 }, { "epoch": 2.07, "grad_norm": 0.6939984560012817, "learning_rate": 0.00013155776367720223, "loss": 3.0215, "step": 42224 }, { "epoch": 2.07, "grad_norm": 0.7307678461074829, "learning_rate": 0.00013154502363927037, "loss": 2.9241, "step": 42225 }, { "epoch": 2.07, "grad_norm": 0.6858737468719482, "learning_rate": 0.00013153228404501658, "loss": 2.9914, "step": 42226 }, { "epoch": 2.07, "grad_norm": 0.6769899725914001, "learning_rate": 0.00013151954489447424, "loss": 2.9458, "step": 42227 }, { "epoch": 2.07, "grad_norm": 0.6825265884399414, "learning_rate": 0.0001315068061876771, "loss": 2.8933, "step": 42228 }, { "epoch": 2.07, "grad_norm": 0.6815954446792603, "learning_rate": 0.00013149406792465859, "loss": 3.093, "step": 42229 }, { "epoch": 2.07, "grad_norm": 0.652065634727478, "learning_rate": 0.00013148133010545214, "loss": 3.0218, "step": 42230 }, { "epoch": 2.07, "grad_norm": 0.6865938901901245, "learning_rate": 0.00013146859273009158, "loss": 2.7974, "step": 42231 }, { "epoch": 2.07, "grad_norm": 0.6675371527671814, "learning_rate": 0.00013145585579861022, "loss": 2.9922, "step": 42232 }, { "epoch": 2.07, "grad_norm": 0.6868575811386108, "learning_rate": 0.0001314431193110418, "loss": 2.9725, "step": 42233 }, { "epoch": 2.07, "grad_norm": 0.689803421497345, "learning_rate": 0.00013143038326741963, "loss": 2.7922, "step": 42234 }, { "epoch": 2.07, "grad_norm": 0.7004591822624207, "learning_rate": 0.00013141764766777753, "loss": 2.8264, "step": 42235 }, { "epoch": 2.07, "grad_norm": 0.7188933491706848, "learning_rate": 0.00013140491251214888, "loss": 2.9029, "step": 42236 }, { "epoch": 2.07, "grad_norm": 0.6429142355918884, "learning_rate": 0.00013139217780056713, "loss": 2.8006, "step": 42237 }, { "epoch": 2.07, "grad_norm": 0.6708921194076538, "learning_rate": 0.00013137944353306603, "loss": 2.9435, "step": 42238 }, { "epoch": 2.07, "grad_norm": 0.6704015135765076, "learning_rate": 0.00013136670970967897, "loss": 2.9552, "step": 42239 }, { "epoch": 2.07, "grad_norm": 0.7032222151756287, "learning_rate": 0.0001313539763304395, "loss": 2.898, "step": 42240 }, { "epoch": 2.07, "grad_norm": 0.6727782487869263, "learning_rate": 0.0001313412433953813, "loss": 3.1218, "step": 42241 }, { "epoch": 2.07, "grad_norm": 0.7007694244384766, "learning_rate": 0.00013132851090453775, "loss": 3.0368, "step": 42242 }, { "epoch": 2.07, "grad_norm": 0.6816314458847046, "learning_rate": 0.00013131577885794248, "loss": 2.9854, "step": 42243 }, { "epoch": 2.07, "grad_norm": 0.7471539974212646, "learning_rate": 0.00013130304725562906, "loss": 2.9592, "step": 42244 }, { "epoch": 2.07, "grad_norm": 0.6539857983589172, "learning_rate": 0.0001312903160976308, "loss": 2.988, "step": 42245 }, { "epoch": 2.07, "grad_norm": 0.6955440044403076, "learning_rate": 0.00013127758538398145, "loss": 3.0305, "step": 42246 }, { "epoch": 2.07, "grad_norm": 0.6992368698120117, "learning_rate": 0.0001312648551147144, "loss": 2.9075, "step": 42247 }, { "epoch": 2.07, "grad_norm": 0.7383258938789368, "learning_rate": 0.00013125212528986336, "loss": 2.7885, "step": 42248 }, { "epoch": 2.07, "grad_norm": 0.7086347341537476, "learning_rate": 0.00013123939590946164, "loss": 3.0402, "step": 42249 }, { "epoch": 2.07, "grad_norm": 0.715691089630127, "learning_rate": 0.00013122666697354296, "loss": 2.9558, "step": 42250 }, { "epoch": 2.07, "grad_norm": 0.6848446726799011, "learning_rate": 0.00013121393848214077, "loss": 2.9925, "step": 42251 }, { "epoch": 2.07, "grad_norm": 0.679757297039032, "learning_rate": 0.00013120121043528845, "loss": 2.9478, "step": 42252 }, { "epoch": 2.07, "grad_norm": 0.6886526346206665, "learning_rate": 0.00013118848283301981, "loss": 3.0612, "step": 42253 }, { "epoch": 2.07, "grad_norm": 0.7555577158927917, "learning_rate": 0.00013117575567536807, "loss": 3.0184, "step": 42254 }, { "epoch": 2.07, "grad_norm": 0.6762755513191223, "learning_rate": 0.00013116302896236693, "loss": 2.84, "step": 42255 }, { "epoch": 2.07, "grad_norm": 0.6835176944732666, "learning_rate": 0.00013115030269404996, "loss": 2.9008, "step": 42256 }, { "epoch": 2.07, "grad_norm": 0.6507018804550171, "learning_rate": 0.00013113757687045063, "loss": 2.7376, "step": 42257 }, { "epoch": 2.07, "grad_norm": 0.7082822322845459, "learning_rate": 0.0001311248514916024, "loss": 3.1054, "step": 42258 }, { "epoch": 2.07, "grad_norm": 0.6697868704795837, "learning_rate": 0.0001311121265575387, "loss": 2.9395, "step": 42259 }, { "epoch": 2.07, "grad_norm": 0.7068183422088623, "learning_rate": 0.00013109940206829316, "loss": 2.9129, "step": 42260 }, { "epoch": 2.07, "grad_norm": 0.7108052968978882, "learning_rate": 0.0001310866780238994, "loss": 3.0556, "step": 42261 }, { "epoch": 2.07, "grad_norm": 0.7249935865402222, "learning_rate": 0.00013107395442439073, "loss": 3.0242, "step": 42262 }, { "epoch": 2.07, "grad_norm": 0.6782382130622864, "learning_rate": 0.00013106123126980083, "loss": 2.988, "step": 42263 }, { "epoch": 2.07, "grad_norm": 0.6726512908935547, "learning_rate": 0.00013104850856016304, "loss": 2.9917, "step": 42264 }, { "epoch": 2.07, "grad_norm": 0.6596064567565918, "learning_rate": 0.00013103578629551104, "loss": 3.1468, "step": 42265 }, { "epoch": 2.07, "grad_norm": 0.6861616969108582, "learning_rate": 0.0001310230644758783, "loss": 2.9792, "step": 42266 }, { "epoch": 2.07, "grad_norm": 0.7167417407035828, "learning_rate": 0.0001310103431012982, "loss": 2.8683, "step": 42267 }, { "epoch": 2.07, "grad_norm": 0.671953558921814, "learning_rate": 0.0001309976221718044, "loss": 3.0194, "step": 42268 }, { "epoch": 2.07, "grad_norm": 0.6684266924858093, "learning_rate": 0.00013098490168743024, "loss": 2.8826, "step": 42269 }, { "epoch": 2.07, "grad_norm": 0.6714996099472046, "learning_rate": 0.00013097218164820934, "loss": 2.8523, "step": 42270 }, { "epoch": 2.07, "grad_norm": 0.6615957617759705, "learning_rate": 0.00013095946205417527, "loss": 2.9081, "step": 42271 }, { "epoch": 2.07, "grad_norm": 0.7182602882385254, "learning_rate": 0.00013094674290536144, "loss": 2.9164, "step": 42272 }, { "epoch": 2.07, "grad_norm": 0.6356480717658997, "learning_rate": 0.00013093402420180137, "loss": 2.974, "step": 42273 }, { "epoch": 2.07, "grad_norm": 0.6807072162628174, "learning_rate": 0.00013092130594352845, "loss": 2.9493, "step": 42274 }, { "epoch": 2.07, "grad_norm": 0.7285915613174438, "learning_rate": 0.00013090858813057622, "loss": 3.148, "step": 42275 }, { "epoch": 2.07, "grad_norm": 0.6464709639549255, "learning_rate": 0.0001308958707629784, "loss": 3.0066, "step": 42276 }, { "epoch": 2.07, "grad_norm": 0.7085345387458801, "learning_rate": 0.00013088315384076813, "loss": 2.9503, "step": 42277 }, { "epoch": 2.07, "grad_norm": 0.7010959386825562, "learning_rate": 0.00013087043736397923, "loss": 2.9879, "step": 42278 }, { "epoch": 2.07, "grad_norm": 0.6488636136054993, "learning_rate": 0.00013085772133264505, "loss": 3.0544, "step": 42279 }, { "epoch": 2.07, "grad_norm": 0.6806224584579468, "learning_rate": 0.00013084500574679896, "loss": 3.0751, "step": 42280 }, { "epoch": 2.07, "grad_norm": 0.7017746567726135, "learning_rate": 0.00013083229060647473, "loss": 2.8714, "step": 42281 }, { "epoch": 2.07, "grad_norm": 0.7165690660476685, "learning_rate": 0.00013081957591170551, "loss": 2.8805, "step": 42282 }, { "epoch": 2.07, "grad_norm": 0.6798978447914124, "learning_rate": 0.00013080686166252515, "loss": 2.9131, "step": 42283 }, { "epoch": 2.07, "grad_norm": 0.6638495922088623, "learning_rate": 0.00013079414785896676, "loss": 2.9656, "step": 42284 }, { "epoch": 2.07, "grad_norm": 0.7176745533943176, "learning_rate": 0.00013078143450106407, "loss": 2.8036, "step": 42285 }, { "epoch": 2.07, "grad_norm": 0.6890358328819275, "learning_rate": 0.00013076872158885062, "loss": 2.8643, "step": 42286 }, { "epoch": 2.07, "grad_norm": 0.6619595289230347, "learning_rate": 0.0001307560091223598, "loss": 2.8526, "step": 42287 }, { "epoch": 2.07, "grad_norm": 0.682287871837616, "learning_rate": 0.00013074329710162507, "loss": 2.9174, "step": 42288 }, { "epoch": 2.07, "grad_norm": 0.6877726912498474, "learning_rate": 0.00013073058552667978, "loss": 3.0925, "step": 42289 }, { "epoch": 2.07, "grad_norm": 0.6723368167877197, "learning_rate": 0.0001307178743975576, "loss": 2.8354, "step": 42290 }, { "epoch": 2.07, "grad_norm": 0.6675642132759094, "learning_rate": 0.00013070516371429204, "loss": 2.9596, "step": 42291 }, { "epoch": 2.07, "grad_norm": 0.6696404218673706, "learning_rate": 0.0001306924534769164, "loss": 2.8843, "step": 42292 }, { "epoch": 2.07, "grad_norm": 1.3090136051177979, "learning_rate": 0.00013067974368546437, "loss": 3.0016, "step": 42293 }, { "epoch": 2.07, "grad_norm": 0.6999812722206116, "learning_rate": 0.0001306670343399693, "loss": 2.9312, "step": 42294 }, { "epoch": 2.07, "grad_norm": 0.6614927053451538, "learning_rate": 0.00013065432544046455, "loss": 2.9818, "step": 42295 }, { "epoch": 2.07, "grad_norm": 0.6659789085388184, "learning_rate": 0.00013064161698698383, "loss": 2.8157, "step": 42296 }, { "epoch": 2.07, "grad_norm": 0.6961437463760376, "learning_rate": 0.0001306289089795604, "loss": 2.8556, "step": 42297 }, { "epoch": 2.07, "grad_norm": 0.6694318056106567, "learning_rate": 0.00013061620141822792, "loss": 2.9884, "step": 42298 }, { "epoch": 2.07, "grad_norm": 0.6987858414649963, "learning_rate": 0.0001306034943030197, "loss": 2.8139, "step": 42299 }, { "epoch": 2.07, "grad_norm": 0.6902264952659607, "learning_rate": 0.00013059078763396923, "loss": 2.9773, "step": 42300 }, { "epoch": 2.07, "grad_norm": 0.6752839088439941, "learning_rate": 0.00013057808141111026, "loss": 2.8498, "step": 42301 }, { "epoch": 2.07, "grad_norm": 0.7169557809829712, "learning_rate": 0.00013056537563447578, "loss": 2.7842, "step": 42302 }, { "epoch": 2.07, "grad_norm": 0.7090547680854797, "learning_rate": 0.00013055267030409964, "loss": 3.2104, "step": 42303 }, { "epoch": 2.07, "grad_norm": 0.6589310169219971, "learning_rate": 0.00013053996542001503, "loss": 2.7599, "step": 42304 }, { "epoch": 2.07, "grad_norm": 0.7081121802330017, "learning_rate": 0.00013052726098225563, "loss": 3.0029, "step": 42305 }, { "epoch": 2.07, "grad_norm": 0.6937905550003052, "learning_rate": 0.0001305145569908547, "loss": 3.0007, "step": 42306 }, { "epoch": 2.07, "grad_norm": 0.6769243478775024, "learning_rate": 0.0001305018534458458, "loss": 2.8763, "step": 42307 }, { "epoch": 2.07, "grad_norm": 0.7453411221504211, "learning_rate": 0.00013048915034726254, "loss": 2.8378, "step": 42308 }, { "epoch": 2.07, "grad_norm": 0.674867570400238, "learning_rate": 0.0001304764476951382, "loss": 3.0571, "step": 42309 }, { "epoch": 2.07, "grad_norm": 0.707789421081543, "learning_rate": 0.00013046374548950632, "loss": 3.1763, "step": 42310 }, { "epoch": 2.07, "grad_norm": 0.7105609178543091, "learning_rate": 0.00013045104373040015, "loss": 2.9595, "step": 42311 }, { "epoch": 2.07, "grad_norm": 0.7053055167198181, "learning_rate": 0.0001304383424178533, "loss": 2.9741, "step": 42312 }, { "epoch": 2.07, "grad_norm": 0.6682379841804504, "learning_rate": 0.00013042564155189937, "loss": 2.9207, "step": 42313 }, { "epoch": 2.07, "grad_norm": 0.6992611289024353, "learning_rate": 0.00013041294113257153, "loss": 2.9781, "step": 42314 }, { "epoch": 2.07, "grad_norm": 0.6636302471160889, "learning_rate": 0.0001304002411599035, "loss": 2.9719, "step": 42315 }, { "epoch": 2.07, "grad_norm": 0.6672903299331665, "learning_rate": 0.00013038754163392858, "loss": 3.0387, "step": 42316 }, { "epoch": 2.07, "grad_norm": 0.7034769654273987, "learning_rate": 0.0001303748425546801, "loss": 2.8552, "step": 42317 }, { "epoch": 2.07, "grad_norm": 0.665863037109375, "learning_rate": 0.00013036214392219176, "loss": 2.9452, "step": 42318 }, { "epoch": 2.07, "grad_norm": 0.6769158244132996, "learning_rate": 0.0001303494457364968, "loss": 2.951, "step": 42319 }, { "epoch": 2.07, "grad_norm": 0.6977242827415466, "learning_rate": 0.00013033674799762884, "loss": 2.9524, "step": 42320 }, { "epoch": 2.07, "grad_norm": 0.6679743528366089, "learning_rate": 0.00013032405070562112, "loss": 2.8681, "step": 42321 }, { "epoch": 2.07, "grad_norm": 0.7165648341178894, "learning_rate": 0.0001303113538605072, "loss": 3.006, "step": 42322 }, { "epoch": 2.07, "grad_norm": 0.7313899993896484, "learning_rate": 0.00013029865746232063, "loss": 2.9289, "step": 42323 }, { "epoch": 2.07, "grad_norm": 0.6785582304000854, "learning_rate": 0.00013028596151109473, "loss": 2.9195, "step": 42324 }, { "epoch": 2.07, "grad_norm": 0.6553201079368591, "learning_rate": 0.00013027326600686294, "loss": 2.9507, "step": 42325 }, { "epoch": 2.07, "grad_norm": 0.7384634017944336, "learning_rate": 0.0001302605709496586, "loss": 2.8676, "step": 42326 }, { "epoch": 2.07, "grad_norm": 0.6803930401802063, "learning_rate": 0.00013024787633951528, "loss": 2.7254, "step": 42327 }, { "epoch": 2.07, "grad_norm": 0.7045084834098816, "learning_rate": 0.0001302351821764665, "loss": 2.9962, "step": 42328 }, { "epoch": 2.07, "grad_norm": 0.7233037352561951, "learning_rate": 0.00013022248846054538, "loss": 2.6192, "step": 42329 }, { "epoch": 2.07, "grad_norm": 0.7065985798835754, "learning_rate": 0.00013020979519178574, "loss": 3.0464, "step": 42330 }, { "epoch": 2.07, "grad_norm": 0.6664899587631226, "learning_rate": 0.00013019710237022078, "loss": 2.9836, "step": 42331 }, { "epoch": 2.07, "grad_norm": 0.6708837151527405, "learning_rate": 0.0001301844099958839, "loss": 3.0906, "step": 42332 }, { "epoch": 2.07, "grad_norm": 0.6945251822471619, "learning_rate": 0.0001301717180688087, "loss": 2.8813, "step": 42333 }, { "epoch": 2.07, "grad_norm": 0.6718724370002747, "learning_rate": 0.00013015902658902837, "loss": 3.0567, "step": 42334 }, { "epoch": 2.07, "grad_norm": 0.6948716044425964, "learning_rate": 0.00013014633555657665, "loss": 3.2154, "step": 42335 }, { "epoch": 2.07, "grad_norm": 0.6696521639823914, "learning_rate": 0.00013013364497148661, "loss": 2.8059, "step": 42336 }, { "epoch": 2.07, "grad_norm": 0.723652184009552, "learning_rate": 0.000130120954833792, "loss": 2.7653, "step": 42337 }, { "epoch": 2.07, "grad_norm": 0.7283807992935181, "learning_rate": 0.00013010826514352615, "loss": 2.9663, "step": 42338 }, { "epoch": 2.07, "grad_norm": 0.7475623488426208, "learning_rate": 0.00013009557590072226, "loss": 3.0521, "step": 42339 }, { "epoch": 2.07, "grad_norm": 0.6831005811691284, "learning_rate": 0.00013008288710541406, "loss": 3.1819, "step": 42340 }, { "epoch": 2.08, "grad_norm": 0.6755504012107849, "learning_rate": 0.0001300701987576347, "loss": 2.8866, "step": 42341 }, { "epoch": 2.08, "grad_norm": 0.6740015149116516, "learning_rate": 0.00013005751085741777, "loss": 2.8911, "step": 42342 }, { "epoch": 2.08, "grad_norm": 0.6407496929168701, "learning_rate": 0.00013004482340479672, "loss": 2.9256, "step": 42343 }, { "epoch": 2.08, "grad_norm": 0.7576172947883606, "learning_rate": 0.0001300321363998048, "loss": 3.1794, "step": 42344 }, { "epoch": 2.08, "grad_norm": 0.6567968726158142, "learning_rate": 0.00013001944984247565, "loss": 2.9828, "step": 42345 }, { "epoch": 2.08, "grad_norm": 0.7320488691329956, "learning_rate": 0.00013000676373284254, "loss": 2.9482, "step": 42346 }, { "epoch": 2.08, "grad_norm": 0.646527886390686, "learning_rate": 0.0001299940780709388, "loss": 2.8785, "step": 42347 }, { "epoch": 2.08, "grad_norm": 0.6653791069984436, "learning_rate": 0.00012998139285679802, "loss": 3.0511, "step": 42348 }, { "epoch": 2.08, "grad_norm": 0.6764574646949768, "learning_rate": 0.00012996870809045346, "loss": 2.9864, "step": 42349 }, { "epoch": 2.08, "grad_norm": 0.6396170854568481, "learning_rate": 0.00012995602377193868, "loss": 2.8156, "step": 42350 }, { "epoch": 2.08, "grad_norm": 0.6769349575042725, "learning_rate": 0.00012994333990128693, "loss": 2.9659, "step": 42351 }, { "epoch": 2.08, "grad_norm": 0.711395263671875, "learning_rate": 0.00012993065647853177, "loss": 3.0182, "step": 42352 }, { "epoch": 2.08, "grad_norm": 0.6921821236610413, "learning_rate": 0.00012991797350370656, "loss": 2.8534, "step": 42353 }, { "epoch": 2.08, "grad_norm": 0.686550498008728, "learning_rate": 0.00012990529097684455, "loss": 2.9083, "step": 42354 }, { "epoch": 2.08, "grad_norm": 0.7336958646774292, "learning_rate": 0.00012989260889797934, "loss": 2.7641, "step": 42355 }, { "epoch": 2.08, "grad_norm": 0.7120491862297058, "learning_rate": 0.0001298799272671442, "loss": 2.9055, "step": 42356 }, { "epoch": 2.08, "grad_norm": 0.6996833086013794, "learning_rate": 0.0001298672460843726, "loss": 2.9461, "step": 42357 }, { "epoch": 2.08, "grad_norm": 0.7006560564041138, "learning_rate": 0.000129854565349698, "loss": 2.8888, "step": 42358 }, { "epoch": 2.08, "grad_norm": 0.6544038653373718, "learning_rate": 0.0001298418850631538, "loss": 2.8266, "step": 42359 }, { "epoch": 2.08, "grad_norm": 0.7305658459663391, "learning_rate": 0.00012982920522477324, "loss": 3.1084, "step": 42360 }, { "epoch": 2.08, "grad_norm": 0.6678386330604553, "learning_rate": 0.00012981652583458973, "loss": 2.7814, "step": 42361 }, { "epoch": 2.08, "grad_norm": 0.6706693768501282, "learning_rate": 0.00012980384689263675, "loss": 2.9772, "step": 42362 }, { "epoch": 2.08, "grad_norm": 0.7006844878196716, "learning_rate": 0.00012979116839894779, "loss": 2.9108, "step": 42363 }, { "epoch": 2.08, "grad_norm": 0.7549619078636169, "learning_rate": 0.000129778490353556, "loss": 3.0531, "step": 42364 }, { "epoch": 2.08, "grad_norm": 0.6899545192718506, "learning_rate": 0.00012976581275649503, "loss": 2.8675, "step": 42365 }, { "epoch": 2.08, "grad_norm": 0.6634476184844971, "learning_rate": 0.00012975313560779805, "loss": 3.0111, "step": 42366 }, { "epoch": 2.08, "grad_norm": 0.6710315942764282, "learning_rate": 0.00012974045890749866, "loss": 2.9529, "step": 42367 }, { "epoch": 2.08, "grad_norm": 0.6514169573783875, "learning_rate": 0.0001297277826556301, "loss": 2.697, "step": 42368 }, { "epoch": 2.08, "grad_norm": 0.6755321025848389, "learning_rate": 0.00012971510685222572, "loss": 3.0741, "step": 42369 }, { "epoch": 2.08, "grad_norm": 0.7050230503082275, "learning_rate": 0.00012970243149731904, "loss": 2.6275, "step": 42370 }, { "epoch": 2.08, "grad_norm": 0.7091357707977295, "learning_rate": 0.00012968975659094332, "loss": 2.6535, "step": 42371 }, { "epoch": 2.08, "grad_norm": 0.6768811941146851, "learning_rate": 0.00012967708213313197, "loss": 3.0096, "step": 42372 }, { "epoch": 2.08, "grad_norm": 0.6693670153617859, "learning_rate": 0.00012966440812391854, "loss": 2.8613, "step": 42373 }, { "epoch": 2.08, "grad_norm": 0.7181096076965332, "learning_rate": 0.00012965173456333626, "loss": 2.7873, "step": 42374 }, { "epoch": 2.08, "grad_norm": 0.7134459614753723, "learning_rate": 0.00012963906145141855, "loss": 2.8422, "step": 42375 }, { "epoch": 2.08, "grad_norm": 0.7182124853134155, "learning_rate": 0.00012962638878819864, "loss": 2.9213, "step": 42376 }, { "epoch": 2.08, "grad_norm": 0.7274218201637268, "learning_rate": 0.00012961371657371003, "loss": 2.7016, "step": 42377 }, { "epoch": 2.08, "grad_norm": 0.6675017476081848, "learning_rate": 0.0001296010448079862, "loss": 2.8717, "step": 42378 }, { "epoch": 2.08, "grad_norm": 0.7228912115097046, "learning_rate": 0.00012958837349106032, "loss": 3.0004, "step": 42379 }, { "epoch": 2.08, "grad_norm": 0.6356688737869263, "learning_rate": 0.000129575702622966, "loss": 2.5134, "step": 42380 }, { "epoch": 2.08, "grad_norm": 0.663016140460968, "learning_rate": 0.00012956303220373647, "loss": 2.9052, "step": 42381 }, { "epoch": 2.08, "grad_norm": 0.6874765157699585, "learning_rate": 0.00012955036223340498, "loss": 3.127, "step": 42382 }, { "epoch": 2.08, "grad_norm": 0.7107385396957397, "learning_rate": 0.00012953769271200518, "loss": 2.9775, "step": 42383 }, { "epoch": 2.08, "grad_norm": 0.7340335249900818, "learning_rate": 0.00012952502363957014, "loss": 2.8887, "step": 42384 }, { "epoch": 2.08, "grad_norm": 0.6798539161682129, "learning_rate": 0.00012951235501613352, "loss": 2.9121, "step": 42385 }, { "epoch": 2.08, "grad_norm": 0.7123425006866455, "learning_rate": 0.00012949968684172845, "loss": 3.0914, "step": 42386 }, { "epoch": 2.08, "grad_norm": 0.6899136900901794, "learning_rate": 0.00012948701911638845, "loss": 2.9722, "step": 42387 }, { "epoch": 2.08, "grad_norm": 0.7115585207939148, "learning_rate": 0.00012947435184014673, "loss": 3.0044, "step": 42388 }, { "epoch": 2.08, "grad_norm": 0.6917968392372131, "learning_rate": 0.0001294616850130369, "loss": 3.1324, "step": 42389 }, { "epoch": 2.08, "grad_norm": 0.6570953726768494, "learning_rate": 0.00012944901863509213, "loss": 2.9288, "step": 42390 }, { "epoch": 2.08, "grad_norm": 0.6744385957717896, "learning_rate": 0.00012943635270634574, "loss": 2.7654, "step": 42391 }, { "epoch": 2.08, "grad_norm": 0.7136799693107605, "learning_rate": 0.00012942368722683125, "loss": 2.899, "step": 42392 }, { "epoch": 2.08, "grad_norm": 0.6737086772918701, "learning_rate": 0.00012941102219658182, "loss": 2.7251, "step": 42393 }, { "epoch": 2.08, "grad_norm": 0.6692909598350525, "learning_rate": 0.00012939835761563095, "loss": 3.0238, "step": 42394 }, { "epoch": 2.08, "grad_norm": 0.6992953419685364, "learning_rate": 0.0001293856934840121, "loss": 2.8346, "step": 42395 }, { "epoch": 2.08, "grad_norm": 0.6643773913383484, "learning_rate": 0.00012937302980175848, "loss": 2.7622, "step": 42396 }, { "epoch": 2.08, "grad_norm": 0.6850830912590027, "learning_rate": 0.00012936036656890345, "loss": 2.9824, "step": 42397 }, { "epoch": 2.08, "grad_norm": 0.7364450097084045, "learning_rate": 0.0001293477037854803, "loss": 2.953, "step": 42398 }, { "epoch": 2.08, "grad_norm": 0.7071236968040466, "learning_rate": 0.00012933504145152245, "loss": 3.1013, "step": 42399 }, { "epoch": 2.08, "grad_norm": 0.7141284942626953, "learning_rate": 0.00012932237956706332, "loss": 2.7942, "step": 42400 }, { "epoch": 2.08, "grad_norm": 0.7371499538421631, "learning_rate": 0.00012930971813213612, "loss": 2.533, "step": 42401 }, { "epoch": 2.08, "grad_norm": 0.7052841782569885, "learning_rate": 0.0001292970571467744, "loss": 2.8772, "step": 42402 }, { "epoch": 2.08, "grad_norm": 0.7750392556190491, "learning_rate": 0.00012928439661101133, "loss": 2.9739, "step": 42403 }, { "epoch": 2.08, "grad_norm": 0.7135288715362549, "learning_rate": 0.00012927173652488023, "loss": 2.8134, "step": 42404 }, { "epoch": 2.08, "grad_norm": 0.6652043461799622, "learning_rate": 0.00012925907688841464, "loss": 3.063, "step": 42405 }, { "epoch": 2.08, "grad_norm": 0.6913604736328125, "learning_rate": 0.00012924641770164762, "loss": 2.7423, "step": 42406 }, { "epoch": 2.08, "grad_norm": 0.7233631014823914, "learning_rate": 0.0001292337589646128, "loss": 2.9022, "step": 42407 }, { "epoch": 2.08, "grad_norm": 0.6916614174842834, "learning_rate": 0.0001292211006773433, "loss": 2.9954, "step": 42408 }, { "epoch": 2.08, "grad_norm": 0.7121450901031494, "learning_rate": 0.00012920844283987256, "loss": 3.0694, "step": 42409 }, { "epoch": 2.08, "grad_norm": 0.7093349695205688, "learning_rate": 0.00012919578545223404, "loss": 3.1103, "step": 42410 }, { "epoch": 2.08, "grad_norm": 0.6733999848365784, "learning_rate": 0.0001291831285144609, "loss": 2.8649, "step": 42411 }, { "epoch": 2.08, "grad_norm": 0.6598990559577942, "learning_rate": 0.00012917047202658656, "loss": 3.0327, "step": 42412 }, { "epoch": 2.08, "grad_norm": 0.6923961043357849, "learning_rate": 0.00012915781598864422, "loss": 2.8068, "step": 42413 }, { "epoch": 2.08, "grad_norm": 0.6872490048408508, "learning_rate": 0.00012914516040066727, "loss": 2.7399, "step": 42414 }, { "epoch": 2.08, "grad_norm": 0.6673179268836975, "learning_rate": 0.00012913250526268923, "loss": 2.9066, "step": 42415 }, { "epoch": 2.08, "grad_norm": 0.6778900027275085, "learning_rate": 0.00012911985057474315, "loss": 3.0026, "step": 42416 }, { "epoch": 2.08, "grad_norm": 0.7145534157752991, "learning_rate": 0.00012910719633686264, "loss": 2.9472, "step": 42417 }, { "epoch": 2.08, "grad_norm": 0.6881458163261414, "learning_rate": 0.00012909454254908084, "loss": 2.8058, "step": 42418 }, { "epoch": 2.08, "grad_norm": 0.6922593712806702, "learning_rate": 0.00012908188921143104, "loss": 2.9075, "step": 42419 }, { "epoch": 2.08, "grad_norm": 0.6974817514419556, "learning_rate": 0.00012906923632394676, "loss": 3.0196, "step": 42420 }, { "epoch": 2.08, "grad_norm": 0.6990817189216614, "learning_rate": 0.0001290565838866611, "loss": 2.8226, "step": 42421 }, { "epoch": 2.08, "grad_norm": 0.7017726302146912, "learning_rate": 0.00012904393189960763, "loss": 2.9575, "step": 42422 }, { "epoch": 2.08, "grad_norm": 0.7260956168174744, "learning_rate": 0.00012903128036281943, "loss": 2.9157, "step": 42423 }, { "epoch": 2.08, "grad_norm": 0.675983726978302, "learning_rate": 0.00012901862927632994, "loss": 2.8013, "step": 42424 }, { "epoch": 2.08, "grad_norm": 0.7136183977127075, "learning_rate": 0.00012900597864017258, "loss": 2.784, "step": 42425 }, { "epoch": 2.08, "grad_norm": 0.7095462679862976, "learning_rate": 0.00012899332845438052, "loss": 2.9734, "step": 42426 }, { "epoch": 2.08, "grad_norm": 0.6412888765335083, "learning_rate": 0.00012898067871898716, "loss": 3.1996, "step": 42427 }, { "epoch": 2.08, "grad_norm": 0.7176571488380432, "learning_rate": 0.00012896802943402564, "loss": 2.721, "step": 42428 }, { "epoch": 2.08, "grad_norm": 0.685215950012207, "learning_rate": 0.00012895538059952947, "loss": 3.2016, "step": 42429 }, { "epoch": 2.08, "grad_norm": 0.7202776670455933, "learning_rate": 0.00012894273221553197, "loss": 2.8996, "step": 42430 }, { "epoch": 2.08, "grad_norm": 0.6567890644073486, "learning_rate": 0.0001289300842820663, "loss": 3.0631, "step": 42431 }, { "epoch": 2.08, "grad_norm": 0.7814381718635559, "learning_rate": 0.00012891743679916597, "loss": 2.7118, "step": 42432 }, { "epoch": 2.08, "grad_norm": 0.6643449068069458, "learning_rate": 0.00012890478976686418, "loss": 2.8286, "step": 42433 }, { "epoch": 2.08, "grad_norm": 0.6698668003082275, "learning_rate": 0.00012889214318519417, "loss": 2.9327, "step": 42434 }, { "epoch": 2.08, "grad_norm": 0.6875276565551758, "learning_rate": 0.00012887949705418938, "loss": 2.9944, "step": 42435 }, { "epoch": 2.08, "grad_norm": 0.7161490321159363, "learning_rate": 0.00012886685137388298, "loss": 2.9799, "step": 42436 }, { "epoch": 2.08, "grad_norm": 0.6984416246414185, "learning_rate": 0.00012885420614430846, "loss": 2.8483, "step": 42437 }, { "epoch": 2.08, "grad_norm": 0.6920896768569946, "learning_rate": 0.00012884156136549893, "loss": 3.0116, "step": 42438 }, { "epoch": 2.08, "grad_norm": 0.704794704914093, "learning_rate": 0.00012882891703748793, "loss": 2.6875, "step": 42439 }, { "epoch": 2.08, "grad_norm": 0.7424866557121277, "learning_rate": 0.00012881627316030858, "loss": 2.78, "step": 42440 }, { "epoch": 2.08, "grad_norm": 0.6924298405647278, "learning_rate": 0.0001288036297339941, "loss": 2.8718, "step": 42441 }, { "epoch": 2.08, "grad_norm": 0.6861541271209717, "learning_rate": 0.000128790986758578, "loss": 2.786, "step": 42442 }, { "epoch": 2.08, "grad_norm": 0.720872700214386, "learning_rate": 0.00012877834423409345, "loss": 2.7509, "step": 42443 }, { "epoch": 2.08, "grad_norm": 0.6972219347953796, "learning_rate": 0.00012876570216057375, "loss": 3.0085, "step": 42444 }, { "epoch": 2.08, "grad_norm": 0.6819028854370117, "learning_rate": 0.00012875306053805236, "loss": 2.9576, "step": 42445 }, { "epoch": 2.08, "grad_norm": 0.6733766794204712, "learning_rate": 0.0001287404193665623, "loss": 2.9113, "step": 42446 }, { "epoch": 2.08, "grad_norm": 0.6779769062995911, "learning_rate": 0.00012872777864613718, "loss": 2.7558, "step": 42447 }, { "epoch": 2.08, "grad_norm": 0.7019845843315125, "learning_rate": 0.00012871513837681007, "loss": 2.8872, "step": 42448 }, { "epoch": 2.08, "grad_norm": 0.7672656178474426, "learning_rate": 0.00012870249855861428, "loss": 3.0762, "step": 42449 }, { "epoch": 2.08, "grad_norm": 0.6681340932846069, "learning_rate": 0.00012868985919158322, "loss": 3.0499, "step": 42450 }, { "epoch": 2.08, "grad_norm": 0.7144621014595032, "learning_rate": 0.00012867722027574998, "loss": 3.2119, "step": 42451 }, { "epoch": 2.08, "grad_norm": 0.671342134475708, "learning_rate": 0.0001286645818111481, "loss": 3.0014, "step": 42452 }, { "epoch": 2.08, "grad_norm": 0.6688801646232605, "learning_rate": 0.00012865194379781063, "loss": 2.8508, "step": 42453 }, { "epoch": 2.08, "grad_norm": 0.6864688396453857, "learning_rate": 0.00012863930623577103, "loss": 3.1261, "step": 42454 }, { "epoch": 2.08, "grad_norm": 0.7038698792457581, "learning_rate": 0.00012862666912506255, "loss": 2.8566, "step": 42455 }, { "epoch": 2.08, "grad_norm": 0.6805946230888367, "learning_rate": 0.00012861403246571836, "loss": 2.8152, "step": 42456 }, { "epoch": 2.08, "grad_norm": 0.6786236763000488, "learning_rate": 0.00012860139625777187, "loss": 2.89, "step": 42457 }, { "epoch": 2.08, "grad_norm": 0.694636881351471, "learning_rate": 0.00012858876050125627, "loss": 2.8622, "step": 42458 }, { "epoch": 2.08, "grad_norm": 0.7160537838935852, "learning_rate": 0.00012857612519620487, "loss": 2.8614, "step": 42459 }, { "epoch": 2.08, "grad_norm": 0.675896406173706, "learning_rate": 0.00012856349034265105, "loss": 2.8066, "step": 42460 }, { "epoch": 2.08, "grad_norm": 0.6733073592185974, "learning_rate": 0.000128550855940628, "loss": 3.0653, "step": 42461 }, { "epoch": 2.08, "grad_norm": 0.7059203386306763, "learning_rate": 0.000128538221990169, "loss": 3.1194, "step": 42462 }, { "epoch": 2.08, "grad_norm": 0.6924806237220764, "learning_rate": 0.00012852558849130725, "loss": 2.6967, "step": 42463 }, { "epoch": 2.08, "grad_norm": 0.6774609684944153, "learning_rate": 0.00012851295544407618, "loss": 3.0024, "step": 42464 }, { "epoch": 2.08, "grad_norm": 0.6729218363761902, "learning_rate": 0.0001285003228485089, "loss": 2.94, "step": 42465 }, { "epoch": 2.08, "grad_norm": 0.673554539680481, "learning_rate": 0.0001284876907046387, "loss": 2.9425, "step": 42466 }, { "epoch": 2.08, "grad_norm": 0.7064787149429321, "learning_rate": 0.00012847505901249912, "loss": 2.8488, "step": 42467 }, { "epoch": 2.08, "grad_norm": 0.6640927791595459, "learning_rate": 0.00012846242777212304, "loss": 2.7895, "step": 42468 }, { "epoch": 2.08, "grad_norm": 0.6812401413917542, "learning_rate": 0.00012844979698354406, "loss": 2.8939, "step": 42469 }, { "epoch": 2.08, "grad_norm": 0.6853512525558472, "learning_rate": 0.00012843716664679528, "loss": 3.0094, "step": 42470 }, { "epoch": 2.08, "grad_norm": 0.673121988773346, "learning_rate": 0.00012842453676190988, "loss": 2.8918, "step": 42471 }, { "epoch": 2.08, "grad_norm": 0.7033911347389221, "learning_rate": 0.00012841190732892135, "loss": 2.8784, "step": 42472 }, { "epoch": 2.08, "grad_norm": 0.6943319439888, "learning_rate": 0.00012839927834786268, "loss": 2.8472, "step": 42473 }, { "epoch": 2.08, "grad_norm": 0.6493386626243591, "learning_rate": 0.00012838664981876743, "loss": 2.7605, "step": 42474 }, { "epoch": 2.08, "grad_norm": 0.6886091232299805, "learning_rate": 0.00012837402174166863, "loss": 2.9289, "step": 42475 }, { "epoch": 2.08, "grad_norm": 0.6950169205665588, "learning_rate": 0.0001283613941165997, "loss": 3.0136, "step": 42476 }, { "epoch": 2.08, "grad_norm": 0.7558622360229492, "learning_rate": 0.00012834876694359386, "loss": 2.9728, "step": 42477 }, { "epoch": 2.08, "grad_norm": 0.7118024230003357, "learning_rate": 0.00012833614022268418, "loss": 2.9034, "step": 42478 }, { "epoch": 2.08, "grad_norm": 0.6858274340629578, "learning_rate": 0.0001283235139539042, "loss": 2.7353, "step": 42479 }, { "epoch": 2.08, "grad_norm": 0.6761425733566284, "learning_rate": 0.00012831088813728694, "loss": 2.8738, "step": 42480 }, { "epoch": 2.08, "grad_norm": 0.686199426651001, "learning_rate": 0.00012829826277286577, "loss": 2.7441, "step": 42481 }, { "epoch": 2.08, "grad_norm": 0.7111159563064575, "learning_rate": 0.00012828563786067398, "loss": 2.8098, "step": 42482 }, { "epoch": 2.08, "grad_norm": 0.7450211644172668, "learning_rate": 0.00012827301340074485, "loss": 3.027, "step": 42483 }, { "epoch": 2.08, "grad_norm": 0.6911284923553467, "learning_rate": 0.0001282603893931115, "loss": 3.0419, "step": 42484 }, { "epoch": 2.08, "grad_norm": 0.6608761548995972, "learning_rate": 0.00012824776583780716, "loss": 2.6919, "step": 42485 }, { "epoch": 2.08, "grad_norm": 0.687088131904602, "learning_rate": 0.00012823514273486511, "loss": 2.9157, "step": 42486 }, { "epoch": 2.08, "grad_norm": 0.6786913871765137, "learning_rate": 0.00012822252008431878, "loss": 2.8025, "step": 42487 }, { "epoch": 2.08, "grad_norm": 0.6923184394836426, "learning_rate": 0.00012820989788620119, "loss": 2.9774, "step": 42488 }, { "epoch": 2.08, "grad_norm": 0.7364439964294434, "learning_rate": 0.0001281972761405457, "loss": 3.0177, "step": 42489 }, { "epoch": 2.08, "grad_norm": 0.6883888840675354, "learning_rate": 0.00012818465484738548, "loss": 3.1355, "step": 42490 }, { "epoch": 2.08, "grad_norm": 0.6888550519943237, "learning_rate": 0.0001281720340067539, "loss": 2.9592, "step": 42491 }, { "epoch": 2.08, "grad_norm": 0.6707346439361572, "learning_rate": 0.0001281594136186841, "loss": 2.9254, "step": 42492 }, { "epoch": 2.08, "grad_norm": 0.695807158946991, "learning_rate": 0.00012814679368320923, "loss": 2.833, "step": 42493 }, { "epoch": 2.08, "grad_norm": 0.7045168876647949, "learning_rate": 0.00012813417420036273, "loss": 2.9055, "step": 42494 }, { "epoch": 2.08, "grad_norm": 0.6547208428382874, "learning_rate": 0.00012812155517017763, "loss": 2.7095, "step": 42495 }, { "epoch": 2.08, "grad_norm": 0.6461189985275269, "learning_rate": 0.0001281089365926873, "loss": 2.9019, "step": 42496 }, { "epoch": 2.08, "grad_norm": 0.6801860332489014, "learning_rate": 0.00012809631846792503, "loss": 2.8929, "step": 42497 }, { "epoch": 2.08, "grad_norm": 0.704507052898407, "learning_rate": 0.00012808370079592398, "loss": 2.8624, "step": 42498 }, { "epoch": 2.08, "grad_norm": 0.6679760217666626, "learning_rate": 0.00012807108357671738, "loss": 2.8551, "step": 42499 }, { "epoch": 2.08, "grad_norm": 0.6944798231124878, "learning_rate": 0.0001280584668103384, "loss": 3.0003, "step": 42500 }, { "epoch": 2.08, "grad_norm": 0.7155947089195251, "learning_rate": 0.00012804585049682024, "loss": 2.9066, "step": 42501 }, { "epoch": 2.08, "grad_norm": 0.7650623321533203, "learning_rate": 0.00012803323463619638, "loss": 2.8278, "step": 42502 }, { "epoch": 2.08, "grad_norm": 0.6742417812347412, "learning_rate": 0.00012802061922849976, "loss": 2.7601, "step": 42503 }, { "epoch": 2.08, "grad_norm": 0.6518091559410095, "learning_rate": 0.00012800800427376384, "loss": 3.0368, "step": 42504 }, { "epoch": 2.08, "grad_norm": 0.7054563164710999, "learning_rate": 0.00012799538977202173, "loss": 2.8426, "step": 42505 }, { "epoch": 2.08, "grad_norm": 0.7084836959838867, "learning_rate": 0.00012798277572330656, "loss": 3.0179, "step": 42506 }, { "epoch": 2.08, "grad_norm": 0.6683297753334045, "learning_rate": 0.00012797016212765175, "loss": 2.9062, "step": 42507 }, { "epoch": 2.08, "grad_norm": 0.6678385734558105, "learning_rate": 0.00012795754898509035, "loss": 3.106, "step": 42508 }, { "epoch": 2.08, "grad_norm": 0.7056717276573181, "learning_rate": 0.00012794493629565576, "loss": 2.9415, "step": 42509 }, { "epoch": 2.08, "grad_norm": 0.7119282484054565, "learning_rate": 0.00012793232405938095, "loss": 2.7636, "step": 42510 }, { "epoch": 2.08, "grad_norm": 0.743801474571228, "learning_rate": 0.0001279197122762993, "loss": 2.6962, "step": 42511 }, { "epoch": 2.08, "grad_norm": 0.7152933478355408, "learning_rate": 0.0001279071009464441, "loss": 2.87, "step": 42512 }, { "epoch": 2.08, "grad_norm": 0.6678538918495178, "learning_rate": 0.00012789449006984853, "loss": 3.0891, "step": 42513 }, { "epoch": 2.08, "grad_norm": 0.7106472849845886, "learning_rate": 0.0001278818796465457, "loss": 2.802, "step": 42514 }, { "epoch": 2.08, "grad_norm": 0.7291449904441833, "learning_rate": 0.0001278692696765688, "loss": 2.8319, "step": 42515 }, { "epoch": 2.08, "grad_norm": 0.6768184304237366, "learning_rate": 0.00012785666015995107, "loss": 2.8618, "step": 42516 }, { "epoch": 2.08, "grad_norm": 0.6788813471794128, "learning_rate": 0.00012784405109672595, "loss": 2.8516, "step": 42517 }, { "epoch": 2.08, "grad_norm": 0.6908031702041626, "learning_rate": 0.0001278314424869263, "loss": 3.0243, "step": 42518 }, { "epoch": 2.08, "grad_norm": 0.7076452374458313, "learning_rate": 0.0001278188343305856, "loss": 2.8712, "step": 42519 }, { "epoch": 2.08, "grad_norm": 0.6761187314987183, "learning_rate": 0.00012780622662773698, "loss": 2.963, "step": 42520 }, { "epoch": 2.08, "grad_norm": 0.7147707939147949, "learning_rate": 0.0001277936193784135, "loss": 3.1233, "step": 42521 }, { "epoch": 2.08, "grad_norm": 1.0204122066497803, "learning_rate": 0.0001277810125826486, "loss": 2.9935, "step": 42522 }, { "epoch": 2.08, "grad_norm": 0.690827488899231, "learning_rate": 0.00012776840624047526, "loss": 2.8386, "step": 42523 }, { "epoch": 2.08, "grad_norm": 0.6465492248535156, "learning_rate": 0.00012775580035192692, "loss": 2.8819, "step": 42524 }, { "epoch": 2.08, "grad_norm": 0.6738273501396179, "learning_rate": 0.00012774319491703652, "loss": 2.924, "step": 42525 }, { "epoch": 2.08, "grad_norm": 0.6924593448638916, "learning_rate": 0.00012773058993583735, "loss": 3.0896, "step": 42526 }, { "epoch": 2.08, "grad_norm": 0.6848966479301453, "learning_rate": 0.00012771798540836283, "loss": 2.8388, "step": 42527 }, { "epoch": 2.08, "grad_norm": 0.7153657674789429, "learning_rate": 0.00012770538133464595, "loss": 3.1066, "step": 42528 }, { "epoch": 2.08, "grad_norm": 0.6560115814208984, "learning_rate": 0.00012769277771471996, "loss": 2.9721, "step": 42529 }, { "epoch": 2.08, "grad_norm": 0.6960185766220093, "learning_rate": 0.0001276801745486179, "loss": 2.9123, "step": 42530 }, { "epoch": 2.08, "grad_norm": 0.64568030834198, "learning_rate": 0.00012766757183637308, "loss": 3.2167, "step": 42531 }, { "epoch": 2.08, "grad_norm": 0.697411060333252, "learning_rate": 0.00012765496957801886, "loss": 2.7463, "step": 42532 }, { "epoch": 2.08, "grad_norm": 0.6693733334541321, "learning_rate": 0.00012764236777358816, "loss": 2.99, "step": 42533 }, { "epoch": 2.08, "grad_norm": 0.6906144618988037, "learning_rate": 0.00012762976642311438, "loss": 2.9348, "step": 42534 }, { "epoch": 2.08, "grad_norm": 0.7639672160148621, "learning_rate": 0.00012761716552663063, "loss": 2.8812, "step": 42535 }, { "epoch": 2.08, "grad_norm": 0.6580451130867004, "learning_rate": 0.00012760456508417002, "loss": 2.9266, "step": 42536 }, { "epoch": 2.08, "grad_norm": 0.7195175886154175, "learning_rate": 0.00012759196509576587, "loss": 2.6738, "step": 42537 }, { "epoch": 2.08, "grad_norm": 0.7359403371810913, "learning_rate": 0.0001275793655614512, "loss": 2.9124, "step": 42538 }, { "epoch": 2.08, "grad_norm": 0.7050012946128845, "learning_rate": 0.00012756676648125942, "loss": 2.9858, "step": 42539 }, { "epoch": 2.08, "grad_norm": 0.6860030293464661, "learning_rate": 0.00012755416785522347, "loss": 2.7879, "step": 42540 }, { "epoch": 2.08, "grad_norm": 0.667359471321106, "learning_rate": 0.00012754156968337678, "loss": 3.0746, "step": 42541 }, { "epoch": 2.08, "grad_norm": 0.6795210242271423, "learning_rate": 0.0001275289719657524, "loss": 2.9643, "step": 42542 }, { "epoch": 2.08, "grad_norm": 0.6692383885383606, "learning_rate": 0.0001275163747023834, "loss": 2.9197, "step": 42543 }, { "epoch": 2.08, "grad_norm": 0.6974451541900635, "learning_rate": 0.0001275037778933032, "loss": 2.857, "step": 42544 }, { "epoch": 2.09, "grad_norm": 0.7106644511222839, "learning_rate": 0.0001274911815385447, "loss": 2.8773, "step": 42545 }, { "epoch": 2.09, "grad_norm": 0.7244455814361572, "learning_rate": 0.00012747858563814138, "loss": 2.9333, "step": 42546 }, { "epoch": 2.09, "grad_norm": 0.6804713606834412, "learning_rate": 0.00012746599019212613, "loss": 2.7637, "step": 42547 }, { "epoch": 2.09, "grad_norm": 0.7047104835510254, "learning_rate": 0.0001274533952005323, "loss": 2.9819, "step": 42548 }, { "epoch": 2.09, "grad_norm": 0.6896386742591858, "learning_rate": 0.00012744080066339305, "loss": 2.9393, "step": 42549 }, { "epoch": 2.09, "grad_norm": 0.7217565178871155, "learning_rate": 0.00012742820658074157, "loss": 3.0425, "step": 42550 }, { "epoch": 2.09, "grad_norm": 0.6903233528137207, "learning_rate": 0.000127415612952611, "loss": 3.207, "step": 42551 }, { "epoch": 2.09, "grad_norm": 0.6575881838798523, "learning_rate": 0.00012740301977903434, "loss": 2.8382, "step": 42552 }, { "epoch": 2.09, "grad_norm": 0.6848930716514587, "learning_rate": 0.00012739042706004495, "loss": 2.8969, "step": 42553 }, { "epoch": 2.09, "grad_norm": 0.6955052614212036, "learning_rate": 0.00012737783479567605, "loss": 3.1267, "step": 42554 }, { "epoch": 2.09, "grad_norm": 0.669148862361908, "learning_rate": 0.00012736524298596061, "loss": 3.1156, "step": 42555 }, { "epoch": 2.09, "grad_norm": 0.6611831188201904, "learning_rate": 0.00012735265163093198, "loss": 2.8058, "step": 42556 }, { "epoch": 2.09, "grad_norm": 0.6889923810958862, "learning_rate": 0.00012734006073062326, "loss": 2.7144, "step": 42557 }, { "epoch": 2.09, "grad_norm": 0.6790809631347656, "learning_rate": 0.00012732747028506745, "loss": 3.0519, "step": 42558 }, { "epoch": 2.09, "grad_norm": 0.6864499449729919, "learning_rate": 0.00012731488029429797, "loss": 2.8055, "step": 42559 }, { "epoch": 2.09, "grad_norm": 0.6939069628715515, "learning_rate": 0.00012730229075834777, "loss": 2.8274, "step": 42560 }, { "epoch": 2.09, "grad_norm": 0.6930176019668579, "learning_rate": 0.00012728970167725023, "loss": 2.8174, "step": 42561 }, { "epoch": 2.09, "grad_norm": 0.7337930798530579, "learning_rate": 0.00012727711305103822, "loss": 2.9908, "step": 42562 }, { "epoch": 2.09, "grad_norm": 0.6973217725753784, "learning_rate": 0.0001272645248797452, "loss": 3.1223, "step": 42563 }, { "epoch": 2.09, "grad_norm": 0.6723378896713257, "learning_rate": 0.00012725193716340414, "loss": 3.0629, "step": 42564 }, { "epoch": 2.09, "grad_norm": 0.7197105884552002, "learning_rate": 0.00012723934990204816, "loss": 3.0432, "step": 42565 }, { "epoch": 2.09, "grad_norm": 0.6942613124847412, "learning_rate": 0.00012722676309571057, "loss": 2.7, "step": 42566 }, { "epoch": 2.09, "grad_norm": 0.6984099745750427, "learning_rate": 0.00012721417674442433, "loss": 3.1964, "step": 42567 }, { "epoch": 2.09, "grad_norm": 0.6897585988044739, "learning_rate": 0.0001272015908482227, "loss": 2.9698, "step": 42568 }, { "epoch": 2.09, "grad_norm": 0.6685306429862976, "learning_rate": 0.00012718900540713892, "loss": 2.8009, "step": 42569 }, { "epoch": 2.09, "grad_norm": 0.7120461463928223, "learning_rate": 0.00012717642042120596, "loss": 2.9525, "step": 42570 }, { "epoch": 2.09, "grad_norm": 0.6789481043815613, "learning_rate": 0.00012716383589045715, "loss": 2.6623, "step": 42571 }, { "epoch": 2.09, "grad_norm": 0.6555876135826111, "learning_rate": 0.00012715125181492554, "loss": 2.8546, "step": 42572 }, { "epoch": 2.09, "grad_norm": 0.7006644010543823, "learning_rate": 0.00012713866819464412, "loss": 2.9681, "step": 42573 }, { "epoch": 2.09, "grad_norm": 0.681387722492218, "learning_rate": 0.0001271260850296463, "loss": 2.846, "step": 42574 }, { "epoch": 2.09, "grad_norm": 0.7118346095085144, "learning_rate": 0.00012711350231996502, "loss": 3.0905, "step": 42575 }, { "epoch": 2.09, "grad_norm": 0.673995316028595, "learning_rate": 0.0001271009200656336, "loss": 2.8904, "step": 42576 }, { "epoch": 2.09, "grad_norm": 0.6982349157333374, "learning_rate": 0.00012708833826668495, "loss": 2.7196, "step": 42577 }, { "epoch": 2.09, "grad_norm": 0.6825549006462097, "learning_rate": 0.00012707575692315248, "loss": 3.1081, "step": 42578 }, { "epoch": 2.09, "grad_norm": 0.6906778812408447, "learning_rate": 0.00012706317603506916, "loss": 2.7565, "step": 42579 }, { "epoch": 2.09, "grad_norm": 0.6175122857093811, "learning_rate": 0.00012705059560246806, "loss": 3.0223, "step": 42580 }, { "epoch": 2.09, "grad_norm": 0.681134819984436, "learning_rate": 0.00012703801562538252, "loss": 2.924, "step": 42581 }, { "epoch": 2.09, "grad_norm": 0.6651497483253479, "learning_rate": 0.00012702543610384546, "loss": 2.8553, "step": 42582 }, { "epoch": 2.09, "grad_norm": 0.6877047419548035, "learning_rate": 0.00012701285703789006, "loss": 3.2676, "step": 42583 }, { "epoch": 2.09, "grad_norm": 0.737549901008606, "learning_rate": 0.00012700027842754968, "loss": 2.7642, "step": 42584 }, { "epoch": 2.09, "grad_norm": 0.669161319732666, "learning_rate": 0.0001269877002728572, "loss": 2.8865, "step": 42585 }, { "epoch": 2.09, "grad_norm": 0.6651346683502197, "learning_rate": 0.00012697512257384586, "loss": 3.0816, "step": 42586 }, { "epoch": 2.09, "grad_norm": 0.6895802021026611, "learning_rate": 0.00012696254533054867, "loss": 3.0483, "step": 42587 }, { "epoch": 2.09, "grad_norm": 0.6694716811180115, "learning_rate": 0.0001269499685429988, "loss": 3.1017, "step": 42588 }, { "epoch": 2.09, "grad_norm": 0.7255198955535889, "learning_rate": 0.0001269373922112295, "loss": 3.0076, "step": 42589 }, { "epoch": 2.09, "grad_norm": 0.6456109881401062, "learning_rate": 0.0001269248163352737, "loss": 2.855, "step": 42590 }, { "epoch": 2.09, "grad_norm": 0.6860771775245667, "learning_rate": 0.00012691224091516476, "loss": 3.0306, "step": 42591 }, { "epoch": 2.09, "grad_norm": 0.6903136968612671, "learning_rate": 0.00012689966595093557, "loss": 3.0801, "step": 42592 }, { "epoch": 2.09, "grad_norm": 0.7179170846939087, "learning_rate": 0.00012688709144261938, "loss": 2.8021, "step": 42593 }, { "epoch": 2.09, "grad_norm": 0.6981700658798218, "learning_rate": 0.00012687451739024934, "loss": 2.8115, "step": 42594 }, { "epoch": 2.09, "grad_norm": 0.6771155595779419, "learning_rate": 0.0001268619437938584, "loss": 2.8058, "step": 42595 }, { "epoch": 2.09, "grad_norm": 0.6560533046722412, "learning_rate": 0.00012684937065347984, "loss": 2.8685, "step": 42596 }, { "epoch": 2.09, "grad_norm": 0.6963851451873779, "learning_rate": 0.00012683679796914664, "loss": 2.8669, "step": 42597 }, { "epoch": 2.09, "grad_norm": 0.7008418440818787, "learning_rate": 0.00012682422574089198, "loss": 3.0551, "step": 42598 }, { "epoch": 2.09, "grad_norm": 0.7024134993553162, "learning_rate": 0.00012681165396874908, "loss": 2.8471, "step": 42599 }, { "epoch": 2.09, "grad_norm": 0.7309966683387756, "learning_rate": 0.00012679908265275094, "loss": 2.7808, "step": 42600 }, { "epoch": 2.09, "grad_norm": 0.6708288192749023, "learning_rate": 0.00012678651179293072, "loss": 2.6906, "step": 42601 }, { "epoch": 2.09, "grad_norm": 0.6789960861206055, "learning_rate": 0.00012677394138932137, "loss": 2.7973, "step": 42602 }, { "epoch": 2.09, "grad_norm": 0.7056165337562561, "learning_rate": 0.0001267613714419561, "loss": 3.0738, "step": 42603 }, { "epoch": 2.09, "grad_norm": 0.6883794069290161, "learning_rate": 0.0001267488019508682, "loss": 3.0586, "step": 42604 }, { "epoch": 2.09, "grad_norm": 0.7074939012527466, "learning_rate": 0.00012673623291609043, "loss": 2.7751, "step": 42605 }, { "epoch": 2.09, "grad_norm": 0.7160875797271729, "learning_rate": 0.00012672366433765623, "loss": 2.8472, "step": 42606 }, { "epoch": 2.09, "grad_norm": 0.7391530871391296, "learning_rate": 0.00012671109621559856, "loss": 2.7816, "step": 42607 }, { "epoch": 2.09, "grad_norm": 0.7099108695983887, "learning_rate": 0.00012669852854995037, "loss": 2.8312, "step": 42608 }, { "epoch": 2.09, "grad_norm": 0.6549707651138306, "learning_rate": 0.00012668596134074504, "loss": 2.9045, "step": 42609 }, { "epoch": 2.09, "grad_norm": 0.6979371309280396, "learning_rate": 0.00012667339458801542, "loss": 2.9533, "step": 42610 }, { "epoch": 2.09, "grad_norm": 0.7130467295646667, "learning_rate": 0.0001266608282917948, "loss": 2.8201, "step": 42611 }, { "epoch": 2.09, "grad_norm": 0.674332857131958, "learning_rate": 0.00012664826245211612, "loss": 2.9441, "step": 42612 }, { "epoch": 2.09, "grad_norm": 0.6953020095825195, "learning_rate": 0.0001266356970690126, "loss": 2.7931, "step": 42613 }, { "epoch": 2.09, "grad_norm": 0.6988925933837891, "learning_rate": 0.0001266231321425173, "loss": 2.7282, "step": 42614 }, { "epoch": 2.09, "grad_norm": 0.7261149287223816, "learning_rate": 0.00012661056767266333, "loss": 2.8631, "step": 42615 }, { "epoch": 2.09, "grad_norm": 0.667811393737793, "learning_rate": 0.0001265980036594838, "loss": 2.9077, "step": 42616 }, { "epoch": 2.09, "grad_norm": 0.694728434085846, "learning_rate": 0.00012658544010301165, "loss": 3.0601, "step": 42617 }, { "epoch": 2.09, "grad_norm": 0.7233669757843018, "learning_rate": 0.00012657287700328012, "loss": 2.75, "step": 42618 }, { "epoch": 2.09, "grad_norm": 0.7053804993629456, "learning_rate": 0.00012656031436032218, "loss": 3.0824, "step": 42619 }, { "epoch": 2.09, "grad_norm": 0.6913226246833801, "learning_rate": 0.000126547752174171, "loss": 2.8967, "step": 42620 }, { "epoch": 2.09, "grad_norm": 0.720045804977417, "learning_rate": 0.00012653519044485975, "loss": 2.7501, "step": 42621 }, { "epoch": 2.09, "grad_norm": 0.7346246242523193, "learning_rate": 0.00012652262917242144, "loss": 3.0411, "step": 42622 }, { "epoch": 2.09, "grad_norm": 0.6882084012031555, "learning_rate": 0.00012651006835688917, "loss": 2.87, "step": 42623 }, { "epoch": 2.09, "grad_norm": 0.6975372433662415, "learning_rate": 0.00012649750799829585, "loss": 2.9828, "step": 42624 }, { "epoch": 2.09, "grad_norm": 0.6522185802459717, "learning_rate": 0.00012648494809667469, "loss": 3.0208, "step": 42625 }, { "epoch": 2.09, "grad_norm": 0.709596574306488, "learning_rate": 0.0001264723886520589, "loss": 2.7963, "step": 42626 }, { "epoch": 2.09, "grad_norm": 0.6858445405960083, "learning_rate": 0.00012645982966448134, "loss": 3.0234, "step": 42627 }, { "epoch": 2.09, "grad_norm": 0.6798964142799377, "learning_rate": 0.00012644727113397532, "loss": 3.0091, "step": 42628 }, { "epoch": 2.09, "grad_norm": 0.7389910221099854, "learning_rate": 0.00012643471306057372, "loss": 2.7046, "step": 42629 }, { "epoch": 2.09, "grad_norm": 0.6868226528167725, "learning_rate": 0.00012642215544430964, "loss": 2.7799, "step": 42630 }, { "epoch": 2.09, "grad_norm": 0.6818183064460754, "learning_rate": 0.00012640959828521628, "loss": 3.1201, "step": 42631 }, { "epoch": 2.09, "grad_norm": 0.6521576046943665, "learning_rate": 0.0001263970415833265, "loss": 3.1, "step": 42632 }, { "epoch": 2.09, "grad_norm": 0.7379617691040039, "learning_rate": 0.00012638448533867366, "loss": 2.8706, "step": 42633 }, { "epoch": 2.09, "grad_norm": 0.7239534258842468, "learning_rate": 0.00012637192955129053, "loss": 2.6417, "step": 42634 }, { "epoch": 2.09, "grad_norm": 0.7074881792068481, "learning_rate": 0.00012635937422121035, "loss": 2.987, "step": 42635 }, { "epoch": 2.09, "grad_norm": 0.7031037211418152, "learning_rate": 0.00012634681934846624, "loss": 2.9419, "step": 42636 }, { "epoch": 2.09, "grad_norm": 0.6728188991546631, "learning_rate": 0.0001263342649330912, "loss": 2.7092, "step": 42637 }, { "epoch": 2.09, "grad_norm": 0.7104039788246155, "learning_rate": 0.00012632171097511828, "loss": 2.6515, "step": 42638 }, { "epoch": 2.09, "grad_norm": 0.6465256214141846, "learning_rate": 0.00012630915747458043, "loss": 3.0457, "step": 42639 }, { "epoch": 2.09, "grad_norm": 0.690544605255127, "learning_rate": 0.00012629660443151086, "loss": 2.8131, "step": 42640 }, { "epoch": 2.09, "grad_norm": 0.6940127611160278, "learning_rate": 0.0001262840518459427, "loss": 2.9941, "step": 42641 }, { "epoch": 2.09, "grad_norm": 0.6851858496665955, "learning_rate": 0.0001262714997179088, "loss": 2.9212, "step": 42642 }, { "epoch": 2.09, "grad_norm": 0.6655691862106323, "learning_rate": 0.00012625894804744244, "loss": 2.8587, "step": 42643 }, { "epoch": 2.09, "grad_norm": 0.688534140586853, "learning_rate": 0.0001262463968345766, "loss": 3.0816, "step": 42644 }, { "epoch": 2.09, "grad_norm": 0.7009671330451965, "learning_rate": 0.00012623384607934415, "loss": 2.8889, "step": 42645 }, { "epoch": 2.09, "grad_norm": 0.6391334533691406, "learning_rate": 0.00012622129578177845, "loss": 2.8735, "step": 42646 }, { "epoch": 2.09, "grad_norm": 0.6934916973114014, "learning_rate": 0.0001262087459419123, "loss": 2.8965, "step": 42647 }, { "epoch": 2.09, "grad_norm": 0.6752188205718994, "learning_rate": 0.00012619619655977897, "loss": 2.9981, "step": 42648 }, { "epoch": 2.09, "grad_norm": 0.6940574645996094, "learning_rate": 0.0001261836476354113, "loss": 2.7263, "step": 42649 }, { "epoch": 2.09, "grad_norm": 0.7268062829971313, "learning_rate": 0.00012617109916884247, "loss": 3.1159, "step": 42650 }, { "epoch": 2.09, "grad_norm": 0.656769871711731, "learning_rate": 0.0001261585511601056, "loss": 2.9465, "step": 42651 }, { "epoch": 2.09, "grad_norm": 0.6630927920341492, "learning_rate": 0.00012614600360923367, "loss": 2.9908, "step": 42652 }, { "epoch": 2.09, "grad_norm": 0.6990768909454346, "learning_rate": 0.0001261334565162597, "loss": 2.9062, "step": 42653 }, { "epoch": 2.09, "grad_norm": 0.689720630645752, "learning_rate": 0.00012612090988121661, "loss": 2.9548, "step": 42654 }, { "epoch": 2.09, "grad_norm": 0.7280600666999817, "learning_rate": 0.00012610836370413763, "loss": 2.8982, "step": 42655 }, { "epoch": 2.09, "grad_norm": 0.6745010614395142, "learning_rate": 0.0001260958179850558, "loss": 2.8903, "step": 42656 }, { "epoch": 2.09, "grad_norm": 0.7021997570991516, "learning_rate": 0.00012608327272400406, "loss": 3.0062, "step": 42657 }, { "epoch": 2.09, "grad_norm": 0.648439884185791, "learning_rate": 0.00012607072792101558, "loss": 2.9718, "step": 42658 }, { "epoch": 2.09, "grad_norm": 0.7959494590759277, "learning_rate": 0.00012605818357612334, "loss": 2.9918, "step": 42659 }, { "epoch": 2.09, "grad_norm": 0.676099419593811, "learning_rate": 0.00012604563968936027, "loss": 3.0113, "step": 42660 }, { "epoch": 2.09, "grad_norm": 0.6749578714370728, "learning_rate": 0.00012603309626075962, "loss": 2.8392, "step": 42661 }, { "epoch": 2.09, "grad_norm": 0.6763250231742859, "learning_rate": 0.0001260205532903542, "loss": 3.0267, "step": 42662 }, { "epoch": 2.09, "grad_norm": 0.7066740989685059, "learning_rate": 0.00012600801077817728, "loss": 2.9946, "step": 42663 }, { "epoch": 2.09, "grad_norm": 0.6909931898117065, "learning_rate": 0.00012599546872426164, "loss": 2.8585, "step": 42664 }, { "epoch": 2.09, "grad_norm": 0.6577308177947998, "learning_rate": 0.00012598292712864058, "loss": 2.7868, "step": 42665 }, { "epoch": 2.09, "grad_norm": 0.7595558166503906, "learning_rate": 0.000125970385991347, "loss": 2.8711, "step": 42666 }, { "epoch": 2.09, "grad_norm": 0.670626699924469, "learning_rate": 0.0001259578453124138, "loss": 2.9837, "step": 42667 }, { "epoch": 2.09, "grad_norm": 0.7183277010917664, "learning_rate": 0.00012594530509187424, "loss": 2.9113, "step": 42668 }, { "epoch": 2.09, "grad_norm": 0.6675026416778564, "learning_rate": 0.00012593276532976117, "loss": 3.0022, "step": 42669 }, { "epoch": 2.09, "grad_norm": 0.703376293182373, "learning_rate": 0.00012592022602610768, "loss": 2.9697, "step": 42670 }, { "epoch": 2.09, "grad_norm": 0.6627922654151917, "learning_rate": 0.00012590768718094694, "loss": 3.0211, "step": 42671 }, { "epoch": 2.09, "grad_norm": 0.727324366569519, "learning_rate": 0.00012589514879431172, "loss": 2.8978, "step": 42672 }, { "epoch": 2.09, "grad_norm": 0.6731564402580261, "learning_rate": 0.00012588261086623527, "loss": 2.9288, "step": 42673 }, { "epoch": 2.09, "grad_norm": 0.6762987971305847, "learning_rate": 0.00012587007339675052, "loss": 2.8168, "step": 42674 }, { "epoch": 2.09, "grad_norm": 0.7034830451011658, "learning_rate": 0.0001258575363858904, "loss": 2.9776, "step": 42675 }, { "epoch": 2.09, "grad_norm": 0.6867435574531555, "learning_rate": 0.00012584499983368807, "loss": 2.8703, "step": 42676 }, { "epoch": 2.09, "grad_norm": 0.6966498494148254, "learning_rate": 0.00012583246374017645, "loss": 2.6529, "step": 42677 }, { "epoch": 2.09, "grad_norm": 0.6629147529602051, "learning_rate": 0.00012581992810538868, "loss": 2.7213, "step": 42678 }, { "epoch": 2.09, "grad_norm": 0.6690661907196045, "learning_rate": 0.00012580739292935759, "loss": 3.1114, "step": 42679 }, { "epoch": 2.09, "grad_norm": 0.7012913227081299, "learning_rate": 0.00012579485821211643, "loss": 2.9342, "step": 42680 }, { "epoch": 2.09, "grad_norm": 0.7117915749549866, "learning_rate": 0.00012578232395369808, "loss": 2.8775, "step": 42681 }, { "epoch": 2.09, "grad_norm": 0.6585233807563782, "learning_rate": 0.00012576979015413542, "loss": 2.899, "step": 42682 }, { "epoch": 2.09, "grad_norm": 0.744594156742096, "learning_rate": 0.00012575725681346174, "loss": 2.9088, "step": 42683 }, { "epoch": 2.09, "grad_norm": 0.6832166910171509, "learning_rate": 0.00012574472393170978, "loss": 3.0143, "step": 42684 }, { "epoch": 2.09, "grad_norm": 0.7105337977409363, "learning_rate": 0.0001257321915089127, "loss": 3.03, "step": 42685 }, { "epoch": 2.09, "grad_norm": 0.6521598100662231, "learning_rate": 0.00012571965954510358, "loss": 2.9901, "step": 42686 }, { "epoch": 2.09, "grad_norm": 0.6741447448730469, "learning_rate": 0.00012570712804031535, "loss": 2.9124, "step": 42687 }, { "epoch": 2.09, "grad_norm": 0.6789092421531677, "learning_rate": 0.000125694596994581, "loss": 2.9516, "step": 42688 }, { "epoch": 2.09, "grad_norm": 0.7190807461738586, "learning_rate": 0.00012568206640793344, "loss": 2.7755, "step": 42689 }, { "epoch": 2.09, "grad_norm": 0.7378038763999939, "learning_rate": 0.00012566953628040574, "loss": 2.8331, "step": 42690 }, { "epoch": 2.09, "grad_norm": 0.695372998714447, "learning_rate": 0.00012565700661203107, "loss": 3.0781, "step": 42691 }, { "epoch": 2.09, "grad_norm": 0.7172736525535583, "learning_rate": 0.00012564447740284217, "loss": 2.9251, "step": 42692 }, { "epoch": 2.09, "grad_norm": 0.6886417865753174, "learning_rate": 0.00012563194865287228, "loss": 2.979, "step": 42693 }, { "epoch": 2.09, "grad_norm": 0.7642080187797546, "learning_rate": 0.00012561942036215415, "loss": 2.9911, "step": 42694 }, { "epoch": 2.09, "grad_norm": 0.6821017265319824, "learning_rate": 0.00012560689253072102, "loss": 2.8739, "step": 42695 }, { "epoch": 2.09, "grad_norm": 0.7164608240127563, "learning_rate": 0.00012559436515860575, "loss": 2.6133, "step": 42696 }, { "epoch": 2.09, "grad_norm": 0.6764235496520996, "learning_rate": 0.0001255818382458413, "loss": 3.0949, "step": 42697 }, { "epoch": 2.09, "grad_norm": 0.7024563550949097, "learning_rate": 0.0001255693117924608, "loss": 2.678, "step": 42698 }, { "epoch": 2.09, "grad_norm": 0.7356938719749451, "learning_rate": 0.00012555678579849707, "loss": 2.7922, "step": 42699 }, { "epoch": 2.09, "grad_norm": 0.6670063734054565, "learning_rate": 0.00012554426026398327, "loss": 2.9197, "step": 42700 }, { "epoch": 2.09, "grad_norm": 0.709257185459137, "learning_rate": 0.00012553173518895224, "loss": 2.8767, "step": 42701 }, { "epoch": 2.09, "grad_norm": 0.6837866902351379, "learning_rate": 0.00012551921057343715, "loss": 2.9562, "step": 42702 }, { "epoch": 2.09, "grad_norm": 0.6681262850761414, "learning_rate": 0.00012550668641747085, "loss": 2.9842, "step": 42703 }, { "epoch": 2.09, "grad_norm": 0.7419562339782715, "learning_rate": 0.00012549416272108625, "loss": 2.988, "step": 42704 }, { "epoch": 2.09, "grad_norm": 0.7583781480789185, "learning_rate": 0.0001254816394843166, "loss": 3.1105, "step": 42705 }, { "epoch": 2.09, "grad_norm": 0.6745335459709167, "learning_rate": 0.00012546911670719457, "loss": 2.9673, "step": 42706 }, { "epoch": 2.09, "grad_norm": 0.6698441505432129, "learning_rate": 0.00012545659438975332, "loss": 2.9013, "step": 42707 }, { "epoch": 2.09, "grad_norm": 0.7208741903305054, "learning_rate": 0.00012544407253202588, "loss": 2.9384, "step": 42708 }, { "epoch": 2.09, "grad_norm": 0.6900094747543335, "learning_rate": 0.0001254315511340452, "loss": 2.9388, "step": 42709 }, { "epoch": 2.09, "grad_norm": 0.7146252393722534, "learning_rate": 0.00012541903019584418, "loss": 3.1268, "step": 42710 }, { "epoch": 2.09, "grad_norm": 0.6867741942405701, "learning_rate": 0.0001254065097174558, "loss": 2.9237, "step": 42711 }, { "epoch": 2.09, "grad_norm": 0.7199037075042725, "learning_rate": 0.00012539398969891298, "loss": 2.8591, "step": 42712 }, { "epoch": 2.09, "grad_norm": 0.7059955596923828, "learning_rate": 0.00012538147014024895, "loss": 2.8159, "step": 42713 }, { "epoch": 2.09, "grad_norm": 0.6868063807487488, "learning_rate": 0.00012536895104149642, "loss": 3.0604, "step": 42714 }, { "epoch": 2.09, "grad_norm": 0.6989514231681824, "learning_rate": 0.00012535643240268852, "loss": 2.9723, "step": 42715 }, { "epoch": 2.09, "grad_norm": 0.7568475008010864, "learning_rate": 0.0001253439142238581, "loss": 3.1044, "step": 42716 }, { "epoch": 2.09, "grad_norm": 0.672238290309906, "learning_rate": 0.0001253313965050383, "loss": 2.7832, "step": 42717 }, { "epoch": 2.09, "grad_norm": 0.7147806882858276, "learning_rate": 0.000125318879246262, "loss": 2.7591, "step": 42718 }, { "epoch": 2.09, "grad_norm": 0.6832359433174133, "learning_rate": 0.000125306362447562, "loss": 2.829, "step": 42719 }, { "epoch": 2.09, "grad_norm": 0.6849291324615479, "learning_rate": 0.00012529384610897158, "loss": 3.1068, "step": 42720 }, { "epoch": 2.09, "grad_norm": 0.6654471755027771, "learning_rate": 0.0001252813302305234, "loss": 2.8561, "step": 42721 }, { "epoch": 2.09, "grad_norm": 0.7064377069473267, "learning_rate": 0.0001252688148122506, "loss": 2.907, "step": 42722 }, { "epoch": 2.09, "grad_norm": 0.665331244468689, "learning_rate": 0.0001252562998541862, "loss": 2.8152, "step": 42723 }, { "epoch": 2.09, "grad_norm": 0.738166332244873, "learning_rate": 0.0001252437853563631, "loss": 2.8205, "step": 42724 }, { "epoch": 2.09, "grad_norm": 0.7051920890808105, "learning_rate": 0.0001252312713188142, "loss": 2.7961, "step": 42725 }, { "epoch": 2.09, "grad_norm": 0.7235719561576843, "learning_rate": 0.00012521875774157242, "loss": 3.0615, "step": 42726 }, { "epoch": 2.09, "grad_norm": 0.7330026626586914, "learning_rate": 0.00012520624462467077, "loss": 2.9727, "step": 42727 }, { "epoch": 2.09, "grad_norm": 0.6940485835075378, "learning_rate": 0.00012519373196814238, "loss": 2.731, "step": 42728 }, { "epoch": 2.09, "grad_norm": 0.6567136645317078, "learning_rate": 0.00012518121977201993, "loss": 2.971, "step": 42729 }, { "epoch": 2.09, "grad_norm": 0.6531281471252441, "learning_rate": 0.0001251687080363366, "loss": 2.893, "step": 42730 }, { "epoch": 2.09, "grad_norm": 0.6377910375595093, "learning_rate": 0.00012515619676112526, "loss": 2.8912, "step": 42731 }, { "epoch": 2.09, "grad_norm": 0.6578353643417358, "learning_rate": 0.0001251436859464187, "loss": 2.7222, "step": 42732 }, { "epoch": 2.09, "grad_norm": 0.6784064173698425, "learning_rate": 0.00012513117559225016, "loss": 2.9927, "step": 42733 }, { "epoch": 2.09, "grad_norm": 0.6903968453407288, "learning_rate": 0.00012511866569865236, "loss": 2.8334, "step": 42734 }, { "epoch": 2.09, "grad_norm": 0.6765158772468567, "learning_rate": 0.00012510615626565842, "loss": 2.9354, "step": 42735 }, { "epoch": 2.09, "grad_norm": 0.7345734238624573, "learning_rate": 0.0001250936472933011, "loss": 2.8646, "step": 42736 }, { "epoch": 2.09, "grad_norm": 0.6818332076072693, "learning_rate": 0.00012508113878161348, "loss": 3.0219, "step": 42737 }, { "epoch": 2.09, "grad_norm": 0.6421600580215454, "learning_rate": 0.00012506863073062855, "loss": 2.9526, "step": 42738 }, { "epoch": 2.09, "grad_norm": 0.7006136178970337, "learning_rate": 0.00012505612314037918, "loss": 2.8891, "step": 42739 }, { "epoch": 2.09, "grad_norm": 0.6548375487327576, "learning_rate": 0.00012504361601089833, "loss": 2.9922, "step": 42740 }, { "epoch": 2.09, "grad_norm": 0.6888555884361267, "learning_rate": 0.0001250311093422188, "loss": 2.828, "step": 42741 }, { "epoch": 2.09, "grad_norm": 0.7467426657676697, "learning_rate": 0.00012501860313437368, "loss": 3.1437, "step": 42742 }, { "epoch": 2.09, "grad_norm": 0.6846618056297302, "learning_rate": 0.00012500609738739596, "loss": 2.9356, "step": 42743 }, { "epoch": 2.09, "grad_norm": 0.6674962043762207, "learning_rate": 0.00012499359210131842, "loss": 2.9799, "step": 42744 }, { "epoch": 2.09, "grad_norm": 0.6744292378425598, "learning_rate": 0.00012498108727617416, "loss": 2.8018, "step": 42745 }, { "epoch": 2.09, "grad_norm": 0.6720545291900635, "learning_rate": 0.00012496858291199608, "loss": 2.9251, "step": 42746 }, { "epoch": 2.09, "grad_norm": 0.702659547328949, "learning_rate": 0.00012495607900881692, "loss": 2.9121, "step": 42747 }, { "epoch": 2.09, "grad_norm": 0.6851487159729004, "learning_rate": 0.00012494357556666987, "loss": 2.9867, "step": 42748 }, { "epoch": 2.1, "grad_norm": 0.6916007995605469, "learning_rate": 0.00012493107258558763, "loss": 2.9223, "step": 42749 }, { "epoch": 2.1, "grad_norm": 0.6949058771133423, "learning_rate": 0.0001249185700656034, "loss": 2.7184, "step": 42750 }, { "epoch": 2.1, "grad_norm": 0.6698328256607056, "learning_rate": 0.00012490606800674982, "loss": 2.7598, "step": 42751 }, { "epoch": 2.1, "grad_norm": 0.678553581237793, "learning_rate": 0.00012489356640906, "loss": 2.9062, "step": 42752 }, { "epoch": 2.1, "grad_norm": 0.6635074615478516, "learning_rate": 0.0001248810652725669, "loss": 3.1156, "step": 42753 }, { "epoch": 2.1, "grad_norm": 0.6807247400283813, "learning_rate": 0.00012486856459730336, "loss": 2.9481, "step": 42754 }, { "epoch": 2.1, "grad_norm": 0.7227141261100769, "learning_rate": 0.00012485606438330236, "loss": 2.9745, "step": 42755 }, { "epoch": 2.1, "grad_norm": 0.7141606211662292, "learning_rate": 0.00012484356463059662, "loss": 2.7561, "step": 42756 }, { "epoch": 2.1, "grad_norm": 0.7132400870323181, "learning_rate": 0.00012483106533921925, "loss": 2.8926, "step": 42757 }, { "epoch": 2.1, "grad_norm": 0.7027788162231445, "learning_rate": 0.00012481856650920327, "loss": 2.9338, "step": 42758 }, { "epoch": 2.1, "grad_norm": 0.6948914527893066, "learning_rate": 0.00012480606814058132, "loss": 2.8984, "step": 42759 }, { "epoch": 2.1, "grad_norm": 0.6889869570732117, "learning_rate": 0.0001247935702333866, "loss": 2.8425, "step": 42760 }, { "epoch": 2.1, "grad_norm": 0.7095881104469299, "learning_rate": 0.00012478107278765188, "loss": 2.9341, "step": 42761 }, { "epoch": 2.1, "grad_norm": 0.7111302018165588, "learning_rate": 0.00012476857580341, "loss": 2.8932, "step": 42762 }, { "epoch": 2.1, "grad_norm": 0.6622797846794128, "learning_rate": 0.00012475607928069408, "loss": 2.7765, "step": 42763 }, { "epoch": 2.1, "grad_norm": 0.6576592922210693, "learning_rate": 0.00012474358321953681, "loss": 2.9659, "step": 42764 }, { "epoch": 2.1, "grad_norm": 0.68706876039505, "learning_rate": 0.00012473108761997133, "loss": 2.8225, "step": 42765 }, { "epoch": 2.1, "grad_norm": 0.7006784081459045, "learning_rate": 0.0001247185924820303, "loss": 2.9659, "step": 42766 }, { "epoch": 2.1, "grad_norm": 0.7574266195297241, "learning_rate": 0.00012470609780574688, "loss": 3.0306, "step": 42767 }, { "epoch": 2.1, "grad_norm": 0.678636372089386, "learning_rate": 0.00012469360359115389, "loss": 2.8967, "step": 42768 }, { "epoch": 2.1, "grad_norm": 0.7261688113212585, "learning_rate": 0.00012468110983828408, "loss": 2.8648, "step": 42769 }, { "epoch": 2.1, "grad_norm": 0.6693586707115173, "learning_rate": 0.00012466861654717058, "loss": 3.0083, "step": 42770 }, { "epoch": 2.1, "grad_norm": 0.699759840965271, "learning_rate": 0.00012465612371784611, "loss": 2.8289, "step": 42771 }, { "epoch": 2.1, "grad_norm": 0.6655587553977966, "learning_rate": 0.0001246436313503437, "loss": 2.9072, "step": 42772 }, { "epoch": 2.1, "grad_norm": 0.6945595741271973, "learning_rate": 0.00012463113944469626, "loss": 2.9781, "step": 42773 }, { "epoch": 2.1, "grad_norm": 0.6992049217224121, "learning_rate": 0.00012461864800093655, "loss": 2.8408, "step": 42774 }, { "epoch": 2.1, "grad_norm": 0.6617697477340698, "learning_rate": 0.0001246061570190977, "loss": 2.8496, "step": 42775 }, { "epoch": 2.1, "grad_norm": 0.6566224098205566, "learning_rate": 0.00012459366649921247, "loss": 2.8566, "step": 42776 }, { "epoch": 2.1, "grad_norm": 0.6914481520652771, "learning_rate": 0.00012458117644131375, "loss": 2.9731, "step": 42777 }, { "epoch": 2.1, "grad_norm": 0.6699321866035461, "learning_rate": 0.00012456868684543437, "loss": 2.8621, "step": 42778 }, { "epoch": 2.1, "grad_norm": 0.7015001177787781, "learning_rate": 0.00012455619771160732, "loss": 2.9381, "step": 42779 }, { "epoch": 2.1, "grad_norm": 0.7836304306983948, "learning_rate": 0.00012454370903986555, "loss": 2.8052, "step": 42780 }, { "epoch": 2.1, "grad_norm": 0.6836073994636536, "learning_rate": 0.0001245312208302418, "loss": 2.8882, "step": 42781 }, { "epoch": 2.1, "grad_norm": 0.6994935870170593, "learning_rate": 0.00012451873308276913, "loss": 3.035, "step": 42782 }, { "epoch": 2.1, "grad_norm": 0.6761261224746704, "learning_rate": 0.00012450624579748035, "loss": 2.9866, "step": 42783 }, { "epoch": 2.1, "grad_norm": 0.6878436207771301, "learning_rate": 0.00012449375897440824, "loss": 2.9566, "step": 42784 }, { "epoch": 2.1, "grad_norm": 0.6756868958473206, "learning_rate": 0.0001244812726135859, "loss": 2.941, "step": 42785 }, { "epoch": 2.1, "grad_norm": 0.669620156288147, "learning_rate": 0.00012446878671504603, "loss": 2.9263, "step": 42786 }, { "epoch": 2.1, "grad_norm": 0.6921489834785461, "learning_rate": 0.00012445630127882166, "loss": 2.9708, "step": 42787 }, { "epoch": 2.1, "grad_norm": 0.7075048089027405, "learning_rate": 0.00012444381630494553, "loss": 2.9319, "step": 42788 }, { "epoch": 2.1, "grad_norm": 0.7006393074989319, "learning_rate": 0.0001244313317934507, "loss": 2.8783, "step": 42789 }, { "epoch": 2.1, "grad_norm": 0.698784351348877, "learning_rate": 0.00012441884774436993, "loss": 3.0471, "step": 42790 }, { "epoch": 2.1, "grad_norm": 0.6695194840431213, "learning_rate": 0.00012440636415773606, "loss": 2.7919, "step": 42791 }, { "epoch": 2.1, "grad_norm": 0.6728737354278564, "learning_rate": 0.0001243938810335821, "loss": 2.995, "step": 42792 }, { "epoch": 2.1, "grad_norm": 0.6940301656723022, "learning_rate": 0.00012438139837194076, "loss": 2.8124, "step": 42793 }, { "epoch": 2.1, "grad_norm": 0.683421790599823, "learning_rate": 0.00012436891617284503, "loss": 2.7807, "step": 42794 }, { "epoch": 2.1, "grad_norm": 0.6776772141456604, "learning_rate": 0.00012435643443632787, "loss": 2.8754, "step": 42795 }, { "epoch": 2.1, "grad_norm": 0.6897674202919006, "learning_rate": 0.00012434395316242197, "loss": 3.0021, "step": 42796 }, { "epoch": 2.1, "grad_norm": 0.6728845238685608, "learning_rate": 0.00012433147235116037, "loss": 3.2454, "step": 42797 }, { "epoch": 2.1, "grad_norm": 0.7249746918678284, "learning_rate": 0.0001243189920025759, "loss": 2.924, "step": 42798 }, { "epoch": 2.1, "grad_norm": 0.7298779487609863, "learning_rate": 0.00012430651211670129, "loss": 3.0579, "step": 42799 }, { "epoch": 2.1, "grad_norm": 0.8019329905509949, "learning_rate": 0.00012429403269356956, "loss": 3.0202, "step": 42800 }, { "epoch": 2.1, "grad_norm": 0.7995365858078003, "learning_rate": 0.00012428155373321348, "loss": 2.7468, "step": 42801 }, { "epoch": 2.1, "grad_norm": 0.672685980796814, "learning_rate": 0.00012426907523566607, "loss": 2.8585, "step": 42802 }, { "epoch": 2.1, "grad_norm": 0.7194468379020691, "learning_rate": 0.00012425659720096, "loss": 2.9569, "step": 42803 }, { "epoch": 2.1, "grad_norm": 0.728124737739563, "learning_rate": 0.0001242441196291283, "loss": 2.766, "step": 42804 }, { "epoch": 2.1, "grad_norm": 0.7027937173843384, "learning_rate": 0.00012423164252020377, "loss": 2.9544, "step": 42805 }, { "epoch": 2.1, "grad_norm": 0.7222430109977722, "learning_rate": 0.00012421916587421918, "loss": 2.8564, "step": 42806 }, { "epoch": 2.1, "grad_norm": 0.693401038646698, "learning_rate": 0.0001242066896912076, "loss": 2.9747, "step": 42807 }, { "epoch": 2.1, "grad_norm": 0.7571882605552673, "learning_rate": 0.00012419421397120162, "loss": 2.9809, "step": 42808 }, { "epoch": 2.1, "grad_norm": 0.6618812680244446, "learning_rate": 0.00012418173871423427, "loss": 3.0709, "step": 42809 }, { "epoch": 2.1, "grad_norm": 0.6777374148368835, "learning_rate": 0.00012416926392033847, "loss": 2.8907, "step": 42810 }, { "epoch": 2.1, "grad_norm": 0.6922290325164795, "learning_rate": 0.00012415678958954703, "loss": 2.8615, "step": 42811 }, { "epoch": 2.1, "grad_norm": 0.7021549940109253, "learning_rate": 0.00012414431572189272, "loss": 2.8617, "step": 42812 }, { "epoch": 2.1, "grad_norm": 0.7654609084129333, "learning_rate": 0.00012413184231740835, "loss": 3.0118, "step": 42813 }, { "epoch": 2.1, "grad_norm": 0.6596476435661316, "learning_rate": 0.00012411936937612686, "loss": 2.9509, "step": 42814 }, { "epoch": 2.1, "grad_norm": 0.7117953896522522, "learning_rate": 0.00012410689689808122, "loss": 3.0015, "step": 42815 }, { "epoch": 2.1, "grad_norm": 0.7276239991188049, "learning_rate": 0.00012409442488330404, "loss": 2.7217, "step": 42816 }, { "epoch": 2.1, "grad_norm": 0.7083224058151245, "learning_rate": 0.0001240819533318284, "loss": 2.9467, "step": 42817 }, { "epoch": 2.1, "grad_norm": 0.6544482111930847, "learning_rate": 0.0001240694822436869, "loss": 2.9033, "step": 42818 }, { "epoch": 2.1, "grad_norm": 0.6893659234046936, "learning_rate": 0.00012405701161891268, "loss": 2.8934, "step": 42819 }, { "epoch": 2.1, "grad_norm": 0.6625657677650452, "learning_rate": 0.0001240445414575384, "loss": 2.988, "step": 42820 }, { "epoch": 2.1, "grad_norm": 0.6781535148620605, "learning_rate": 0.00012403207175959688, "loss": 2.9722, "step": 42821 }, { "epoch": 2.1, "grad_norm": 0.6784567832946777, "learning_rate": 0.00012401960252512108, "loss": 2.8418, "step": 42822 }, { "epoch": 2.1, "grad_norm": 0.6653459668159485, "learning_rate": 0.0001240071337541437, "loss": 2.7502, "step": 42823 }, { "epoch": 2.1, "grad_norm": 0.7122622132301331, "learning_rate": 0.00012399466544669763, "loss": 2.9881, "step": 42824 }, { "epoch": 2.1, "grad_norm": 0.6495645642280579, "learning_rate": 0.00012398219760281586, "loss": 2.9813, "step": 42825 }, { "epoch": 2.1, "grad_norm": 0.6739984154701233, "learning_rate": 0.0001239697302225311, "loss": 2.9362, "step": 42826 }, { "epoch": 2.1, "grad_norm": 0.7379757165908813, "learning_rate": 0.00012395726330587619, "loss": 2.8813, "step": 42827 }, { "epoch": 2.1, "grad_norm": 0.6480869054794312, "learning_rate": 0.00012394479685288385, "loss": 2.7832, "step": 42828 }, { "epoch": 2.1, "grad_norm": 0.7142587304115295, "learning_rate": 0.00012393233086358706, "loss": 2.8355, "step": 42829 }, { "epoch": 2.1, "grad_norm": 0.6860468983650208, "learning_rate": 0.00012391986533801872, "loss": 2.9661, "step": 42830 }, { "epoch": 2.1, "grad_norm": 0.6586563587188721, "learning_rate": 0.00012390740027621146, "loss": 2.6982, "step": 42831 }, { "epoch": 2.1, "grad_norm": 0.6689882278442383, "learning_rate": 0.00012389493567819832, "loss": 3.0087, "step": 42832 }, { "epoch": 2.1, "grad_norm": 0.6792545914649963, "learning_rate": 0.00012388247154401205, "loss": 3.1049, "step": 42833 }, { "epoch": 2.1, "grad_norm": 0.7196153998374939, "learning_rate": 0.0001238700078736853, "loss": 3.0143, "step": 42834 }, { "epoch": 2.1, "grad_norm": 0.6919540762901306, "learning_rate": 0.00012385754466725122, "loss": 2.8527, "step": 42835 }, { "epoch": 2.1, "grad_norm": 0.6894993185997009, "learning_rate": 0.00012384508192474233, "loss": 2.8945, "step": 42836 }, { "epoch": 2.1, "grad_norm": 0.6774792075157166, "learning_rate": 0.00012383261964619171, "loss": 2.8883, "step": 42837 }, { "epoch": 2.1, "grad_norm": 0.6447596549987793, "learning_rate": 0.00012382015783163195, "loss": 3.1129, "step": 42838 }, { "epoch": 2.1, "grad_norm": 0.6821197271347046, "learning_rate": 0.000123807696481096, "loss": 2.8552, "step": 42839 }, { "epoch": 2.1, "grad_norm": 0.6983603835105896, "learning_rate": 0.00012379523559461674, "loss": 2.7279, "step": 42840 }, { "epoch": 2.1, "grad_norm": 0.7073538303375244, "learning_rate": 0.00012378277517222693, "loss": 2.8989, "step": 42841 }, { "epoch": 2.1, "grad_norm": 0.6931063532829285, "learning_rate": 0.00012377031521395942, "loss": 2.8065, "step": 42842 }, { "epoch": 2.1, "grad_norm": 0.6739084124565125, "learning_rate": 0.00012375785571984682, "loss": 3.0719, "step": 42843 }, { "epoch": 2.1, "grad_norm": 0.8324639797210693, "learning_rate": 0.00012374539668992212, "loss": 2.7407, "step": 42844 }, { "epoch": 2.1, "grad_norm": 0.6686299443244934, "learning_rate": 0.00012373293812421827, "loss": 2.8201, "step": 42845 }, { "epoch": 2.1, "grad_norm": 0.6707850694656372, "learning_rate": 0.00012372048002276778, "loss": 2.8545, "step": 42846 }, { "epoch": 2.1, "grad_norm": 0.6706002354621887, "learning_rate": 0.00012370802238560378, "loss": 2.9133, "step": 42847 }, { "epoch": 2.1, "grad_norm": 0.6939883828163147, "learning_rate": 0.0001236955652127589, "loss": 2.7663, "step": 42848 }, { "epoch": 2.1, "grad_norm": 0.7302611470222473, "learning_rate": 0.00012368310850426585, "loss": 2.8755, "step": 42849 }, { "epoch": 2.1, "grad_norm": 0.6648881435394287, "learning_rate": 0.00012367065226015768, "loss": 3.1254, "step": 42850 }, { "epoch": 2.1, "grad_norm": 0.7274592518806458, "learning_rate": 0.00012365819648046697, "loss": 2.9416, "step": 42851 }, { "epoch": 2.1, "grad_norm": 0.7390041947364807, "learning_rate": 0.0001236457411652267, "loss": 2.8705, "step": 42852 }, { "epoch": 2.1, "grad_norm": 0.7336735129356384, "learning_rate": 0.00012363328631446956, "loss": 2.8143, "step": 42853 }, { "epoch": 2.1, "grad_norm": 0.7421209216117859, "learning_rate": 0.00012362083192822847, "loss": 2.9359, "step": 42854 }, { "epoch": 2.1, "grad_norm": 0.6754651665687561, "learning_rate": 0.00012360837800653608, "loss": 2.9147, "step": 42855 }, { "epoch": 2.1, "grad_norm": 0.6838744282722473, "learning_rate": 0.00012359592454942538, "loss": 2.9293, "step": 42856 }, { "epoch": 2.1, "grad_norm": 0.6924825310707092, "learning_rate": 0.00012358347155692906, "loss": 2.908, "step": 42857 }, { "epoch": 2.1, "grad_norm": 0.6746190190315247, "learning_rate": 0.00012357101902907983, "loss": 3.1511, "step": 42858 }, { "epoch": 2.1, "grad_norm": 0.7157896757125854, "learning_rate": 0.00012355856696591065, "loss": 3.0123, "step": 42859 }, { "epoch": 2.1, "grad_norm": 0.6908437609672546, "learning_rate": 0.00012354611536745416, "loss": 2.7298, "step": 42860 }, { "epoch": 2.1, "grad_norm": 0.6571826934814453, "learning_rate": 0.00012353366423374323, "loss": 2.7174, "step": 42861 }, { "epoch": 2.1, "grad_norm": 0.7128993272781372, "learning_rate": 0.00012352121356481083, "loss": 2.9394, "step": 42862 }, { "epoch": 2.1, "grad_norm": 0.7356405258178711, "learning_rate": 0.00012350876336068953, "loss": 2.8878, "step": 42863 }, { "epoch": 2.1, "grad_norm": 0.7242211699485779, "learning_rate": 0.0001234963136214122, "loss": 3.0483, "step": 42864 }, { "epoch": 2.1, "grad_norm": 0.6854356527328491, "learning_rate": 0.0001234838643470115, "loss": 2.7815, "step": 42865 }, { "epoch": 2.1, "grad_norm": 0.7101714015007019, "learning_rate": 0.00012347141553752035, "loss": 3.0021, "step": 42866 }, { "epoch": 2.1, "grad_norm": 0.6805014610290527, "learning_rate": 0.0001234589671929716, "loss": 2.8371, "step": 42867 }, { "epoch": 2.1, "grad_norm": 0.70173180103302, "learning_rate": 0.00012344651931339785, "loss": 3.0057, "step": 42868 }, { "epoch": 2.1, "grad_norm": 0.6723647117614746, "learning_rate": 0.0001234340718988321, "loss": 2.9304, "step": 42869 }, { "epoch": 2.1, "grad_norm": 0.6882205009460449, "learning_rate": 0.00012342162494930704, "loss": 2.8999, "step": 42870 }, { "epoch": 2.1, "grad_norm": 0.7191662788391113, "learning_rate": 0.0001234091784648553, "loss": 3.0633, "step": 42871 }, { "epoch": 2.1, "grad_norm": 0.6749732494354248, "learning_rate": 0.00012339673244550993, "loss": 2.9349, "step": 42872 }, { "epoch": 2.1, "grad_norm": 0.7107117772102356, "learning_rate": 0.00012338428689130348, "loss": 3.0515, "step": 42873 }, { "epoch": 2.1, "grad_norm": 0.6865886449813843, "learning_rate": 0.00012337184180226892, "loss": 2.7808, "step": 42874 }, { "epoch": 2.1, "grad_norm": 0.6923301219940186, "learning_rate": 0.00012335939717843882, "loss": 2.9724, "step": 42875 }, { "epoch": 2.1, "grad_norm": 0.6706522107124329, "learning_rate": 0.00012334695301984607, "loss": 3.0265, "step": 42876 }, { "epoch": 2.1, "grad_norm": 0.6823163628578186, "learning_rate": 0.00012333450932652357, "loss": 3.1459, "step": 42877 }, { "epoch": 2.1, "grad_norm": 0.7010007500648499, "learning_rate": 0.00012332206609850399, "loss": 2.9557, "step": 42878 }, { "epoch": 2.1, "grad_norm": 0.7058342695236206, "learning_rate": 0.00012330962333582005, "loss": 2.85, "step": 42879 }, { "epoch": 2.1, "grad_norm": 0.7387443780899048, "learning_rate": 0.00012329718103850447, "loss": 2.7243, "step": 42880 }, { "epoch": 2.1, "grad_norm": 0.6735854744911194, "learning_rate": 0.0001232847392065901, "loss": 2.9853, "step": 42881 }, { "epoch": 2.1, "grad_norm": 0.7551968693733215, "learning_rate": 0.00012327229784010982, "loss": 2.844, "step": 42882 }, { "epoch": 2.1, "grad_norm": 0.6592845916748047, "learning_rate": 0.0001232598569390962, "loss": 2.9419, "step": 42883 }, { "epoch": 2.1, "grad_norm": 0.7430188655853271, "learning_rate": 0.00012324741650358221, "loss": 2.8737, "step": 42884 }, { "epoch": 2.1, "grad_norm": 0.683355450630188, "learning_rate": 0.0001232349765336005, "loss": 2.8757, "step": 42885 }, { "epoch": 2.1, "grad_norm": 0.7336805462837219, "learning_rate": 0.00012322253702918374, "loss": 2.922, "step": 42886 }, { "epoch": 2.1, "grad_norm": 0.7294582724571228, "learning_rate": 0.0001232100979903649, "loss": 2.9502, "step": 42887 }, { "epoch": 2.1, "grad_norm": 0.7106903195381165, "learning_rate": 0.0001231976594171765, "loss": 2.7697, "step": 42888 }, { "epoch": 2.1, "grad_norm": 0.702437698841095, "learning_rate": 0.00012318522130965162, "loss": 3.0185, "step": 42889 }, { "epoch": 2.1, "grad_norm": 0.673358142375946, "learning_rate": 0.0001231727836678227, "loss": 3.0657, "step": 42890 }, { "epoch": 2.1, "grad_norm": 0.7490993142127991, "learning_rate": 0.00012316034649172272, "loss": 2.8019, "step": 42891 }, { "epoch": 2.1, "grad_norm": 0.6476032137870789, "learning_rate": 0.00012314790978138436, "loss": 2.7008, "step": 42892 }, { "epoch": 2.1, "grad_norm": 0.6794806122779846, "learning_rate": 0.00012313547353684027, "loss": 2.9429, "step": 42893 }, { "epoch": 2.1, "grad_norm": 0.6979109048843384, "learning_rate": 0.0001231230377581234, "loss": 2.7561, "step": 42894 }, { "epoch": 2.1, "grad_norm": 0.7019332647323608, "learning_rate": 0.0001231106024452663, "loss": 2.9071, "step": 42895 }, { "epoch": 2.1, "grad_norm": 0.7101191878318787, "learning_rate": 0.0001230981675983019, "loss": 3.0134, "step": 42896 }, { "epoch": 2.1, "grad_norm": 0.6974600553512573, "learning_rate": 0.00012308573321726288, "loss": 2.9611, "step": 42897 }, { "epoch": 2.1, "grad_norm": 0.6884080171585083, "learning_rate": 0.00012307329930218196, "loss": 2.8888, "step": 42898 }, { "epoch": 2.1, "grad_norm": 0.7027839422225952, "learning_rate": 0.00012306086585309197, "loss": 3.136, "step": 42899 }, { "epoch": 2.1, "grad_norm": 0.6527525186538696, "learning_rate": 0.00012304843287002566, "loss": 2.8276, "step": 42900 }, { "epoch": 2.1, "grad_norm": 0.7058283686637878, "learning_rate": 0.00012303600035301558, "loss": 2.8065, "step": 42901 }, { "epoch": 2.1, "grad_norm": 0.6885665655136108, "learning_rate": 0.00012302356830209477, "loss": 2.69, "step": 42902 }, { "epoch": 2.1, "grad_norm": 0.6902278661727905, "learning_rate": 0.00012301113671729565, "loss": 2.8309, "step": 42903 }, { "epoch": 2.1, "grad_norm": 0.6766996383666992, "learning_rate": 0.0001229987055986513, "loss": 2.8522, "step": 42904 }, { "epoch": 2.1, "grad_norm": 0.7072258591651917, "learning_rate": 0.00012298627494619415, "loss": 2.952, "step": 42905 }, { "epoch": 2.1, "grad_norm": 0.7212955355644226, "learning_rate": 0.0001229738447599572, "loss": 2.9739, "step": 42906 }, { "epoch": 2.1, "grad_norm": 0.6990088224411011, "learning_rate": 0.0001229614150399731, "loss": 2.8831, "step": 42907 }, { "epoch": 2.1, "grad_norm": 0.6565753221511841, "learning_rate": 0.0001229489857862744, "loss": 3.0789, "step": 42908 }, { "epoch": 2.1, "grad_norm": 0.7555392384529114, "learning_rate": 0.00012293655699889414, "loss": 2.8962, "step": 42909 }, { "epoch": 2.1, "grad_norm": 0.6769063472747803, "learning_rate": 0.0001229241286778648, "loss": 2.825, "step": 42910 }, { "epoch": 2.1, "grad_norm": 0.7057018876075745, "learning_rate": 0.00012291170082321924, "loss": 2.8159, "step": 42911 }, { "epoch": 2.1, "grad_norm": 0.6804623007774353, "learning_rate": 0.0001228992734349903, "loss": 2.8989, "step": 42912 }, { "epoch": 2.1, "grad_norm": 0.6761884093284607, "learning_rate": 0.00012288684651321055, "loss": 2.9359, "step": 42913 }, { "epoch": 2.1, "grad_norm": 0.7361788153648376, "learning_rate": 0.00012287442005791275, "loss": 2.9682, "step": 42914 }, { "epoch": 2.1, "grad_norm": 0.7586960792541504, "learning_rate": 0.00012286199406912957, "loss": 2.6563, "step": 42915 }, { "epoch": 2.1, "grad_norm": 0.7192031145095825, "learning_rate": 0.00012284956854689377, "loss": 2.9592, "step": 42916 }, { "epoch": 2.1, "grad_norm": 0.673794686794281, "learning_rate": 0.00012283714349123825, "loss": 2.9138, "step": 42917 }, { "epoch": 2.1, "grad_norm": 0.6666536331176758, "learning_rate": 0.00012282471890219548, "loss": 2.8351, "step": 42918 }, { "epoch": 2.1, "grad_norm": 0.6872618794441223, "learning_rate": 0.0001228122947797984, "loss": 3.0766, "step": 42919 }, { "epoch": 2.1, "grad_norm": 0.6858128309249878, "learning_rate": 0.00012279987112407954, "loss": 2.9871, "step": 42920 }, { "epoch": 2.1, "grad_norm": 0.6915315389633179, "learning_rate": 0.00012278744793507184, "loss": 2.796, "step": 42921 }, { "epoch": 2.1, "grad_norm": 0.6912660002708435, "learning_rate": 0.0001227750252128079, "loss": 2.8063, "step": 42922 }, { "epoch": 2.1, "grad_norm": 0.7054868340492249, "learning_rate": 0.00012276260295732028, "loss": 2.9651, "step": 42923 }, { "epoch": 2.1, "grad_norm": 0.6961238980293274, "learning_rate": 0.00012275018116864202, "loss": 3.0532, "step": 42924 }, { "epoch": 2.1, "grad_norm": 0.7195843458175659, "learning_rate": 0.00012273775984680553, "loss": 2.7511, "step": 42925 }, { "epoch": 2.1, "grad_norm": 0.6970654129981995, "learning_rate": 0.00012272533899184367, "loss": 3.1432, "step": 42926 }, { "epoch": 2.1, "grad_norm": 0.6991729736328125, "learning_rate": 0.00012271291860378925, "loss": 3.0489, "step": 42927 }, { "epoch": 2.1, "grad_norm": 0.6705859303474426, "learning_rate": 0.00012270049868267488, "loss": 2.7341, "step": 42928 }, { "epoch": 2.1, "grad_norm": 0.679558277130127, "learning_rate": 0.0001226880792285333, "loss": 2.8421, "step": 42929 }, { "epoch": 2.1, "grad_norm": 0.7303120493888855, "learning_rate": 0.00012267566024139705, "loss": 3.0701, "step": 42930 }, { "epoch": 2.1, "grad_norm": 0.6934303641319275, "learning_rate": 0.00012266324172129913, "loss": 2.879, "step": 42931 }, { "epoch": 2.1, "grad_norm": 0.6617268323898315, "learning_rate": 0.00012265082366827196, "loss": 3.037, "step": 42932 }, { "epoch": 2.1, "grad_norm": 0.7124339938163757, "learning_rate": 0.0001226384060823484, "loss": 2.8715, "step": 42933 }, { "epoch": 2.1, "grad_norm": 0.710786759853363, "learning_rate": 0.00012262598896356128, "loss": 3.0071, "step": 42934 }, { "epoch": 2.1, "grad_norm": 0.708827793598175, "learning_rate": 0.00012261357231194313, "loss": 3.0311, "step": 42935 }, { "epoch": 2.1, "grad_norm": 0.7814995646476746, "learning_rate": 0.00012260115612752672, "loss": 3.0753, "step": 42936 }, { "epoch": 2.1, "grad_norm": 0.6750069260597229, "learning_rate": 0.0001225887404103446, "loss": 2.8407, "step": 42937 }, { "epoch": 2.1, "grad_norm": 0.6857645511627197, "learning_rate": 0.0001225763251604296, "loss": 3.0635, "step": 42938 }, { "epoch": 2.1, "grad_norm": 0.7252575755119324, "learning_rate": 0.00012256391037781454, "loss": 3.1004, "step": 42939 }, { "epoch": 2.1, "grad_norm": 0.671009361743927, "learning_rate": 0.00012255149606253183, "loss": 3.0177, "step": 42940 }, { "epoch": 2.1, "grad_norm": 0.7091543674468994, "learning_rate": 0.0001225390822146145, "loss": 3.0017, "step": 42941 }, { "epoch": 2.1, "grad_norm": 0.7315735816955566, "learning_rate": 0.00012252666883409493, "loss": 2.9222, "step": 42942 }, { "epoch": 2.1, "grad_norm": 0.7119027376174927, "learning_rate": 0.00012251425592100607, "loss": 2.8792, "step": 42943 }, { "epoch": 2.1, "grad_norm": 0.6784539222717285, "learning_rate": 0.0001225018434753805, "loss": 2.9135, "step": 42944 }, { "epoch": 2.1, "grad_norm": 0.7480369806289673, "learning_rate": 0.00012248943149725084, "loss": 2.8613, "step": 42945 }, { "epoch": 2.1, "grad_norm": 0.7432013154029846, "learning_rate": 0.00012247701998664994, "loss": 2.8941, "step": 42946 }, { "epoch": 2.1, "grad_norm": 0.7087968587875366, "learning_rate": 0.0001224646089436103, "loss": 2.9759, "step": 42947 }, { "epoch": 2.1, "grad_norm": 0.7181050181388855, "learning_rate": 0.0001224521983681647, "loss": 2.8121, "step": 42948 }, { "epoch": 2.1, "grad_norm": 0.6602891087532043, "learning_rate": 0.00012243978826034595, "loss": 3.1781, "step": 42949 }, { "epoch": 2.1, "grad_norm": 0.7048685550689697, "learning_rate": 0.00012242737862018662, "loss": 2.9432, "step": 42950 }, { "epoch": 2.1, "grad_norm": 0.6855930685997009, "learning_rate": 0.0001224149694477194, "loss": 2.8298, "step": 42951 }, { "epoch": 2.1, "grad_norm": 0.6815904378890991, "learning_rate": 0.00012240256074297687, "loss": 2.8817, "step": 42952 }, { "epoch": 2.11, "grad_norm": 0.6977430582046509, "learning_rate": 0.00012239015250599183, "loss": 2.9694, "step": 42953 }, { "epoch": 2.11, "grad_norm": 0.7615177631378174, "learning_rate": 0.00012237774473679704, "loss": 2.9469, "step": 42954 }, { "epoch": 2.11, "grad_norm": 0.6653721332550049, "learning_rate": 0.00012236533743542496, "loss": 2.8976, "step": 42955 }, { "epoch": 2.11, "grad_norm": 0.6686670184135437, "learning_rate": 0.00012235293060190855, "loss": 2.8846, "step": 42956 }, { "epoch": 2.11, "grad_norm": 0.6878864765167236, "learning_rate": 0.00012234052423628033, "loss": 2.6982, "step": 42957 }, { "epoch": 2.11, "grad_norm": 0.720576822757721, "learning_rate": 0.00012232811833857283, "loss": 2.9867, "step": 42958 }, { "epoch": 2.11, "grad_norm": 0.686820387840271, "learning_rate": 0.00012231571290881898, "loss": 3.0024, "step": 42959 }, { "epoch": 2.11, "grad_norm": 0.6706504821777344, "learning_rate": 0.00012230330794705125, "loss": 2.9889, "step": 42960 }, { "epoch": 2.11, "grad_norm": 0.672160804271698, "learning_rate": 0.00012229090345330254, "loss": 3.0674, "step": 42961 }, { "epoch": 2.11, "grad_norm": 0.7444672584533691, "learning_rate": 0.00012227849942760528, "loss": 2.9431, "step": 42962 }, { "epoch": 2.11, "grad_norm": 0.7115716338157654, "learning_rate": 0.00012226609586999223, "loss": 2.8684, "step": 42963 }, { "epoch": 2.11, "grad_norm": 0.768132746219635, "learning_rate": 0.00012225369278049617, "loss": 2.7876, "step": 42964 }, { "epoch": 2.11, "grad_norm": 0.7231113314628601, "learning_rate": 0.0001222412901591497, "loss": 2.8806, "step": 42965 }, { "epoch": 2.11, "grad_norm": 0.6965844035148621, "learning_rate": 0.00012222888800598544, "loss": 2.8743, "step": 42966 }, { "epoch": 2.11, "grad_norm": 0.6945066452026367, "learning_rate": 0.000122216486321036, "loss": 2.7055, "step": 42967 }, { "epoch": 2.11, "grad_norm": 0.675247073173523, "learning_rate": 0.00012220408510433409, "loss": 2.7753, "step": 42968 }, { "epoch": 2.11, "grad_norm": 0.6834036707878113, "learning_rate": 0.00012219168435591252, "loss": 2.7571, "step": 42969 }, { "epoch": 2.11, "grad_norm": 0.6711874008178711, "learning_rate": 0.00012217928407580377, "loss": 2.9645, "step": 42970 }, { "epoch": 2.11, "grad_norm": 0.6867322325706482, "learning_rate": 0.00012216688426404062, "loss": 2.949, "step": 42971 }, { "epoch": 2.11, "grad_norm": 0.713107168674469, "learning_rate": 0.00012215448492065568, "loss": 2.974, "step": 42972 }, { "epoch": 2.11, "grad_norm": 0.6954575777053833, "learning_rate": 0.00012214208604568148, "loss": 2.8547, "step": 42973 }, { "epoch": 2.11, "grad_norm": 0.7084396481513977, "learning_rate": 0.00012212968763915095, "loss": 3.0541, "step": 42974 }, { "epoch": 2.11, "grad_norm": 0.6738825440406799, "learning_rate": 0.00012211728970109643, "loss": 2.9123, "step": 42975 }, { "epoch": 2.11, "grad_norm": 0.730086624622345, "learning_rate": 0.00012210489223155087, "loss": 3.0752, "step": 42976 }, { "epoch": 2.11, "grad_norm": 0.7121165990829468, "learning_rate": 0.00012209249523054667, "loss": 2.9596, "step": 42977 }, { "epoch": 2.11, "grad_norm": 0.6749788522720337, "learning_rate": 0.00012208009869811663, "loss": 2.9951, "step": 42978 }, { "epoch": 2.11, "grad_norm": 0.7120500206947327, "learning_rate": 0.00012206770263429344, "loss": 2.8199, "step": 42979 }, { "epoch": 2.11, "grad_norm": 0.71989506483078, "learning_rate": 0.00012205530703910967, "loss": 3.0151, "step": 42980 }, { "epoch": 2.11, "grad_norm": 0.698340117931366, "learning_rate": 0.00012204291191259799, "loss": 2.9014, "step": 42981 }, { "epoch": 2.11, "grad_norm": 0.6799280047416687, "learning_rate": 0.00012203051725479094, "loss": 2.9977, "step": 42982 }, { "epoch": 2.11, "grad_norm": 0.6946273446083069, "learning_rate": 0.00012201812306572124, "loss": 2.6704, "step": 42983 }, { "epoch": 2.11, "grad_norm": 0.7306190133094788, "learning_rate": 0.00012200572934542167, "loss": 2.9042, "step": 42984 }, { "epoch": 2.11, "grad_norm": 0.7344149947166443, "learning_rate": 0.00012199333609392462, "loss": 2.9512, "step": 42985 }, { "epoch": 2.11, "grad_norm": 0.7163786292076111, "learning_rate": 0.00012198094331126299, "loss": 2.9448, "step": 42986 }, { "epoch": 2.11, "grad_norm": 0.7278892397880554, "learning_rate": 0.00012196855099746928, "loss": 3.0489, "step": 42987 }, { "epoch": 2.11, "grad_norm": 0.7170532941818237, "learning_rate": 0.00012195615915257607, "loss": 2.9299, "step": 42988 }, { "epoch": 2.11, "grad_norm": 0.6998884677886963, "learning_rate": 0.00012194376777661615, "loss": 2.6829, "step": 42989 }, { "epoch": 2.11, "grad_norm": 0.7183066606521606, "learning_rate": 0.00012193137686962197, "loss": 2.6611, "step": 42990 }, { "epoch": 2.11, "grad_norm": 0.6729427576065063, "learning_rate": 0.00012191898643162638, "loss": 3.1633, "step": 42991 }, { "epoch": 2.11, "grad_norm": 0.7029131054878235, "learning_rate": 0.00012190659646266182, "loss": 2.9584, "step": 42992 }, { "epoch": 2.11, "grad_norm": 0.709124743938446, "learning_rate": 0.0001218942069627611, "loss": 3.1043, "step": 42993 }, { "epoch": 2.11, "grad_norm": 0.640663743019104, "learning_rate": 0.00012188181793195679, "loss": 3.0283, "step": 42994 }, { "epoch": 2.11, "grad_norm": 0.6382255554199219, "learning_rate": 0.00012186942937028135, "loss": 2.9777, "step": 42995 }, { "epoch": 2.11, "grad_norm": 0.6668031215667725, "learning_rate": 0.00012185704127776767, "loss": 2.933, "step": 42996 }, { "epoch": 2.11, "grad_norm": 0.6796296834945679, "learning_rate": 0.00012184465365444815, "loss": 2.7516, "step": 42997 }, { "epoch": 2.11, "grad_norm": 0.6396820545196533, "learning_rate": 0.00012183226650035555, "loss": 2.989, "step": 42998 }, { "epoch": 2.11, "grad_norm": 0.6721262335777283, "learning_rate": 0.00012181987981552255, "loss": 3.1257, "step": 42999 }, { "epoch": 2.11, "grad_norm": 0.7053427696228027, "learning_rate": 0.0001218074935999816, "loss": 2.7152, "step": 43000 }, { "epoch": 2.11, "grad_norm": 0.6978045105934143, "learning_rate": 0.00012179510785376552, "loss": 3.0351, "step": 43001 }, { "epoch": 2.11, "grad_norm": 0.6725461483001709, "learning_rate": 0.00012178272257690682, "loss": 2.95, "step": 43002 }, { "epoch": 2.11, "grad_norm": 0.7001270651817322, "learning_rate": 0.00012177033776943805, "loss": 2.9442, "step": 43003 }, { "epoch": 2.11, "grad_norm": 0.7013981938362122, "learning_rate": 0.00012175795343139199, "loss": 2.8046, "step": 43004 }, { "epoch": 2.11, "grad_norm": 0.7114749550819397, "learning_rate": 0.00012174556956280109, "loss": 2.9528, "step": 43005 }, { "epoch": 2.11, "grad_norm": 0.687053918838501, "learning_rate": 0.00012173318616369815, "loss": 3.0319, "step": 43006 }, { "epoch": 2.11, "grad_norm": 0.7071757912635803, "learning_rate": 0.00012172080323411557, "loss": 3.0435, "step": 43007 }, { "epoch": 2.11, "grad_norm": 0.7466889023780823, "learning_rate": 0.0001217084207740862, "loss": 3.0157, "step": 43008 }, { "epoch": 2.11, "grad_norm": 0.6620727777481079, "learning_rate": 0.00012169603878364255, "loss": 2.9545, "step": 43009 }, { "epoch": 2.11, "grad_norm": 0.7407063245773315, "learning_rate": 0.0001216836572628171, "loss": 3.0802, "step": 43010 }, { "epoch": 2.11, "grad_norm": 0.6958205699920654, "learning_rate": 0.0001216712762116427, "loss": 2.79, "step": 43011 }, { "epoch": 2.11, "grad_norm": 0.7473426461219788, "learning_rate": 0.00012165889563015171, "loss": 2.844, "step": 43012 }, { "epoch": 2.11, "grad_norm": 0.701392412185669, "learning_rate": 0.00012164651551837698, "loss": 3.0328, "step": 43013 }, { "epoch": 2.11, "grad_norm": 0.7354457974433899, "learning_rate": 0.00012163413587635091, "loss": 3.0237, "step": 43014 }, { "epoch": 2.11, "grad_norm": 0.6868544220924377, "learning_rate": 0.0001216217567041063, "loss": 3.0438, "step": 43015 }, { "epoch": 2.11, "grad_norm": 0.6519982814788818, "learning_rate": 0.00012160937800167564, "loss": 2.9513, "step": 43016 }, { "epoch": 2.11, "grad_norm": 0.6850939393043518, "learning_rate": 0.00012159699976909144, "loss": 2.9938, "step": 43017 }, { "epoch": 2.11, "grad_norm": 0.6990240812301636, "learning_rate": 0.00012158462200638655, "loss": 3.0489, "step": 43018 }, { "epoch": 2.11, "grad_norm": 0.6919039487838745, "learning_rate": 0.0001215722447135933, "loss": 2.9913, "step": 43019 }, { "epoch": 2.11, "grad_norm": 0.6520496606826782, "learning_rate": 0.00012155986789074439, "loss": 2.8828, "step": 43020 }, { "epoch": 2.11, "grad_norm": 0.6608403921127319, "learning_rate": 0.00012154749153787257, "loss": 2.7283, "step": 43021 }, { "epoch": 2.11, "grad_norm": 0.6845476031303406, "learning_rate": 0.00012153511565501023, "loss": 3.0882, "step": 43022 }, { "epoch": 2.11, "grad_norm": 0.6670951247215271, "learning_rate": 0.00012152274024219016, "loss": 2.8529, "step": 43023 }, { "epoch": 2.11, "grad_norm": 0.684021532535553, "learning_rate": 0.0001215103652994448, "loss": 2.8678, "step": 43024 }, { "epoch": 2.11, "grad_norm": 0.7129572033882141, "learning_rate": 0.00012149799082680672, "loss": 3.1019, "step": 43025 }, { "epoch": 2.11, "grad_norm": 0.6857742667198181, "learning_rate": 0.00012148561682430866, "loss": 2.7703, "step": 43026 }, { "epoch": 2.11, "grad_norm": 0.6729403138160706, "learning_rate": 0.00012147324329198302, "loss": 2.7764, "step": 43027 }, { "epoch": 2.11, "grad_norm": 0.7263168692588806, "learning_rate": 0.00012146087022986264, "loss": 2.9186, "step": 43028 }, { "epoch": 2.11, "grad_norm": 0.6728549003601074, "learning_rate": 0.00012144849763797981, "loss": 2.9232, "step": 43029 }, { "epoch": 2.11, "grad_norm": 0.7353836894035339, "learning_rate": 0.00012143612551636741, "loss": 3.0423, "step": 43030 }, { "epoch": 2.11, "grad_norm": 0.7096356749534607, "learning_rate": 0.00012142375386505791, "loss": 3.0581, "step": 43031 }, { "epoch": 2.11, "grad_norm": 0.6555191874504089, "learning_rate": 0.00012141138268408373, "loss": 2.6584, "step": 43032 }, { "epoch": 2.11, "grad_norm": 0.7258883118629456, "learning_rate": 0.00012139901197347775, "loss": 2.9691, "step": 43033 }, { "epoch": 2.11, "grad_norm": 0.6590479016304016, "learning_rate": 0.00012138664173327224, "loss": 2.7755, "step": 43034 }, { "epoch": 2.11, "grad_norm": 0.6529909372329712, "learning_rate": 0.00012137427196349996, "loss": 2.913, "step": 43035 }, { "epoch": 2.11, "grad_norm": 0.6840808391571045, "learning_rate": 0.00012136190266419361, "loss": 2.9598, "step": 43036 }, { "epoch": 2.11, "grad_norm": 0.6781811714172363, "learning_rate": 0.0001213495338353856, "loss": 2.9957, "step": 43037 }, { "epoch": 2.11, "grad_norm": 0.7219123840332031, "learning_rate": 0.00012133716547710852, "loss": 3.1447, "step": 43038 }, { "epoch": 2.11, "grad_norm": 0.6630083322525024, "learning_rate": 0.00012132479758939487, "loss": 2.7962, "step": 43039 }, { "epoch": 2.11, "grad_norm": 0.6876730918884277, "learning_rate": 0.00012131243017227734, "loss": 2.7907, "step": 43040 }, { "epoch": 2.11, "grad_norm": 0.6795483827590942, "learning_rate": 0.00012130006322578856, "loss": 2.9542, "step": 43041 }, { "epoch": 2.11, "grad_norm": 0.6933289766311646, "learning_rate": 0.00012128769674996093, "loss": 2.8632, "step": 43042 }, { "epoch": 2.11, "grad_norm": 0.7193173170089722, "learning_rate": 0.00012127533074482718, "loss": 2.946, "step": 43043 }, { "epoch": 2.11, "grad_norm": 0.7053383588790894, "learning_rate": 0.00012126296521041976, "loss": 2.74, "step": 43044 }, { "epoch": 2.11, "grad_norm": 0.6752833127975464, "learning_rate": 0.00012125060014677137, "loss": 2.6228, "step": 43045 }, { "epoch": 2.11, "grad_norm": 0.6935940980911255, "learning_rate": 0.00012123823555391449, "loss": 3.0145, "step": 43046 }, { "epoch": 2.11, "grad_norm": 0.6865079998970032, "learning_rate": 0.00012122587143188162, "loss": 2.8768, "step": 43047 }, { "epoch": 2.11, "grad_norm": 0.6913439631462097, "learning_rate": 0.00012121350778070547, "loss": 2.8929, "step": 43048 }, { "epoch": 2.11, "grad_norm": 0.7718340754508972, "learning_rate": 0.00012120114460041844, "loss": 3.1285, "step": 43049 }, { "epoch": 2.11, "grad_norm": 0.6865830421447754, "learning_rate": 0.00012118878189105318, "loss": 2.8353, "step": 43050 }, { "epoch": 2.11, "grad_norm": 0.6636896729469299, "learning_rate": 0.00012117641965264238, "loss": 2.7779, "step": 43051 }, { "epoch": 2.11, "grad_norm": 0.7035448551177979, "learning_rate": 0.00012116405788521848, "loss": 3.0683, "step": 43052 }, { "epoch": 2.11, "grad_norm": 0.6768655180931091, "learning_rate": 0.00012115169658881401, "loss": 3.105, "step": 43053 }, { "epoch": 2.11, "grad_norm": 0.688899576663971, "learning_rate": 0.00012113933576346143, "loss": 2.7824, "step": 43054 }, { "epoch": 2.11, "grad_norm": 0.7167072892189026, "learning_rate": 0.00012112697540919346, "loss": 2.7641, "step": 43055 }, { "epoch": 2.11, "grad_norm": 0.7082327008247375, "learning_rate": 0.0001211146155260427, "loss": 2.829, "step": 43056 }, { "epoch": 2.11, "grad_norm": 0.6907978653907776, "learning_rate": 0.0001211022561140415, "loss": 3.096, "step": 43057 }, { "epoch": 2.11, "grad_norm": 0.6979928016662598, "learning_rate": 0.00012108989717322266, "loss": 2.9573, "step": 43058 }, { "epoch": 2.11, "grad_norm": 0.7088404297828674, "learning_rate": 0.00012107753870361857, "loss": 2.769, "step": 43059 }, { "epoch": 2.11, "grad_norm": 0.6992489695549011, "learning_rate": 0.0001210651807052617, "loss": 2.806, "step": 43060 }, { "epoch": 2.11, "grad_norm": 0.6478296518325806, "learning_rate": 0.00012105282317818484, "loss": 2.7097, "step": 43061 }, { "epoch": 2.11, "grad_norm": 0.6695234775543213, "learning_rate": 0.00012104046612242028, "loss": 3.1377, "step": 43062 }, { "epoch": 2.11, "grad_norm": 0.6910408735275269, "learning_rate": 0.0001210281095380008, "loss": 2.951, "step": 43063 }, { "epoch": 2.11, "grad_norm": 0.6836959719657898, "learning_rate": 0.00012101575342495874, "loss": 2.9718, "step": 43064 }, { "epoch": 2.11, "grad_norm": 0.6675450801849365, "learning_rate": 0.00012100339778332671, "loss": 2.9584, "step": 43065 }, { "epoch": 2.11, "grad_norm": 0.6959847807884216, "learning_rate": 0.00012099104261313742, "loss": 2.9065, "step": 43066 }, { "epoch": 2.11, "grad_norm": 0.6748732924461365, "learning_rate": 0.00012097868791442327, "loss": 2.9597, "step": 43067 }, { "epoch": 2.11, "grad_norm": 0.6882777810096741, "learning_rate": 0.00012096633368721678, "loss": 2.9002, "step": 43068 }, { "epoch": 2.11, "grad_norm": 0.7168293595314026, "learning_rate": 0.00012095397993155042, "loss": 2.871, "step": 43069 }, { "epoch": 2.11, "grad_norm": 0.73194420337677, "learning_rate": 0.00012094162664745682, "loss": 2.8233, "step": 43070 }, { "epoch": 2.11, "grad_norm": 0.7441113591194153, "learning_rate": 0.00012092927383496862, "loss": 2.9003, "step": 43071 }, { "epoch": 2.11, "grad_norm": 0.6556605696678162, "learning_rate": 0.00012091692149411815, "loss": 2.7515, "step": 43072 }, { "epoch": 2.11, "grad_norm": 0.6657657623291016, "learning_rate": 0.00012090456962493811, "loss": 2.959, "step": 43073 }, { "epoch": 2.11, "grad_norm": 0.6989374756813049, "learning_rate": 0.00012089221822746101, "loss": 3.0809, "step": 43074 }, { "epoch": 2.11, "grad_norm": 0.6639280319213867, "learning_rate": 0.00012087986730171923, "loss": 2.8231, "step": 43075 }, { "epoch": 2.11, "grad_norm": 0.7291037440299988, "learning_rate": 0.00012086751684774548, "loss": 2.933, "step": 43076 }, { "epoch": 2.11, "grad_norm": 0.7148304581642151, "learning_rate": 0.00012085516686557214, "loss": 2.7072, "step": 43077 }, { "epoch": 2.11, "grad_norm": 0.693949818611145, "learning_rate": 0.00012084281735523191, "loss": 2.8575, "step": 43078 }, { "epoch": 2.11, "grad_norm": 0.7028672695159912, "learning_rate": 0.00012083046831675711, "loss": 2.8634, "step": 43079 }, { "epoch": 2.11, "grad_norm": 0.7147821187973022, "learning_rate": 0.0001208181197501804, "loss": 2.8629, "step": 43080 }, { "epoch": 2.11, "grad_norm": 0.706278383731842, "learning_rate": 0.00012080577165553434, "loss": 2.8998, "step": 43081 }, { "epoch": 2.11, "grad_norm": 0.6842054128646851, "learning_rate": 0.00012079342403285143, "loss": 2.9569, "step": 43082 }, { "epoch": 2.11, "grad_norm": 0.6698657870292664, "learning_rate": 0.00012078107688216415, "loss": 2.9066, "step": 43083 }, { "epoch": 2.11, "grad_norm": 0.6596769690513611, "learning_rate": 0.00012076873020350489, "loss": 2.799, "step": 43084 }, { "epoch": 2.11, "grad_norm": 0.7220476865768433, "learning_rate": 0.00012075638399690642, "loss": 2.8281, "step": 43085 }, { "epoch": 2.11, "grad_norm": 0.7004108428955078, "learning_rate": 0.00012074403826240103, "loss": 2.7315, "step": 43086 }, { "epoch": 2.11, "grad_norm": 0.68475341796875, "learning_rate": 0.00012073169300002136, "loss": 2.9782, "step": 43087 }, { "epoch": 2.11, "grad_norm": 0.7112210988998413, "learning_rate": 0.0001207193482098, "loss": 2.8447, "step": 43088 }, { "epoch": 2.11, "grad_norm": 0.6924461722373962, "learning_rate": 0.00012070700389176937, "loss": 2.9229, "step": 43089 }, { "epoch": 2.11, "grad_norm": 0.6814031004905701, "learning_rate": 0.000120694660045962, "loss": 2.8308, "step": 43090 }, { "epoch": 2.11, "grad_norm": 0.7307980060577393, "learning_rate": 0.00012068231667241027, "loss": 3.0753, "step": 43091 }, { "epoch": 2.11, "grad_norm": 0.729032576084137, "learning_rate": 0.0001206699737711468, "loss": 2.8441, "step": 43092 }, { "epoch": 2.11, "grad_norm": 0.6686051487922668, "learning_rate": 0.00012065763134220422, "loss": 2.9681, "step": 43093 }, { "epoch": 2.11, "grad_norm": 0.7233748435974121, "learning_rate": 0.00012064528938561483, "loss": 2.8097, "step": 43094 }, { "epoch": 2.11, "grad_norm": 0.6904696822166443, "learning_rate": 0.00012063294790141134, "loss": 2.8041, "step": 43095 }, { "epoch": 2.11, "grad_norm": 0.6654314994812012, "learning_rate": 0.00012062060688962611, "loss": 2.9394, "step": 43096 }, { "epoch": 2.11, "grad_norm": 0.6917867064476013, "learning_rate": 0.0001206082663502916, "loss": 2.9945, "step": 43097 }, { "epoch": 2.11, "grad_norm": 0.6887367963790894, "learning_rate": 0.00012059592628344051, "loss": 2.899, "step": 43098 }, { "epoch": 2.11, "grad_norm": 0.7017031311988831, "learning_rate": 0.00012058358668910509, "loss": 2.7895, "step": 43099 }, { "epoch": 2.11, "grad_norm": 0.6768372654914856, "learning_rate": 0.00012057124756731808, "loss": 2.9466, "step": 43100 }, { "epoch": 2.11, "grad_norm": 0.7897763252258301, "learning_rate": 0.0001205589089181118, "loss": 3.1558, "step": 43101 }, { "epoch": 2.11, "grad_norm": 0.7289750576019287, "learning_rate": 0.00012054657074151879, "loss": 2.8482, "step": 43102 }, { "epoch": 2.11, "grad_norm": 0.6732428073883057, "learning_rate": 0.00012053423303757167, "loss": 2.9046, "step": 43103 }, { "epoch": 2.11, "grad_norm": 0.6708196997642517, "learning_rate": 0.00012052189580630288, "loss": 2.9676, "step": 43104 }, { "epoch": 2.11, "grad_norm": 0.7545298933982849, "learning_rate": 0.00012050955904774486, "loss": 3.1395, "step": 43105 }, { "epoch": 2.11, "grad_norm": 0.7323744893074036, "learning_rate": 0.00012049722276192999, "loss": 2.966, "step": 43106 }, { "epoch": 2.11, "grad_norm": 0.6957453489303589, "learning_rate": 0.0001204848869488909, "loss": 2.8968, "step": 43107 }, { "epoch": 2.11, "grad_norm": 0.6939908266067505, "learning_rate": 0.0001204725516086602, "loss": 2.7944, "step": 43108 }, { "epoch": 2.11, "grad_norm": 0.6847617626190186, "learning_rate": 0.00012046021674127011, "loss": 2.9505, "step": 43109 }, { "epoch": 2.11, "grad_norm": 0.7532222867012024, "learning_rate": 0.0001204478823467534, "loss": 2.8696, "step": 43110 }, { "epoch": 2.11, "grad_norm": 0.6626277565956116, "learning_rate": 0.00012043554842514238, "loss": 3.0799, "step": 43111 }, { "epoch": 2.11, "grad_norm": 0.6772818565368652, "learning_rate": 0.00012042321497646946, "loss": 2.9087, "step": 43112 }, { "epoch": 2.11, "grad_norm": 0.6805033683776855, "learning_rate": 0.00012041088200076734, "loss": 2.8525, "step": 43113 }, { "epoch": 2.11, "grad_norm": 0.7096461057662964, "learning_rate": 0.00012039854949806833, "loss": 2.9631, "step": 43114 }, { "epoch": 2.11, "grad_norm": 0.7710531949996948, "learning_rate": 0.00012038621746840501, "loss": 3.0431, "step": 43115 }, { "epoch": 2.11, "grad_norm": 0.6965796947479248, "learning_rate": 0.00012037388591180978, "loss": 2.9314, "step": 43116 }, { "epoch": 2.11, "grad_norm": 0.6995751857757568, "learning_rate": 0.00012036155482831526, "loss": 2.9496, "step": 43117 }, { "epoch": 2.11, "grad_norm": 0.6960641741752625, "learning_rate": 0.00012034922421795378, "loss": 3.0477, "step": 43118 }, { "epoch": 2.11, "grad_norm": 0.6957606673240662, "learning_rate": 0.00012033689408075783, "loss": 2.8089, "step": 43119 }, { "epoch": 2.11, "grad_norm": 0.7263481616973877, "learning_rate": 0.00012032456441675999, "loss": 3.0083, "step": 43120 }, { "epoch": 2.11, "grad_norm": 0.6750003695487976, "learning_rate": 0.0001203122352259926, "loss": 2.727, "step": 43121 }, { "epoch": 2.11, "grad_norm": 0.6566562056541443, "learning_rate": 0.00012029990650848817, "loss": 2.8773, "step": 43122 }, { "epoch": 2.11, "grad_norm": 0.6913471221923828, "learning_rate": 0.00012028757826427931, "loss": 3.0402, "step": 43123 }, { "epoch": 2.11, "grad_norm": 0.6827965974807739, "learning_rate": 0.00012027525049339829, "loss": 2.7393, "step": 43124 }, { "epoch": 2.11, "grad_norm": 0.6656849980354309, "learning_rate": 0.00012026292319587776, "loss": 3.0347, "step": 43125 }, { "epoch": 2.11, "grad_norm": 0.7655985355377197, "learning_rate": 0.00012025059637175012, "loss": 2.9697, "step": 43126 }, { "epoch": 2.11, "grad_norm": 0.7039027810096741, "learning_rate": 0.00012023827002104767, "loss": 2.8746, "step": 43127 }, { "epoch": 2.11, "grad_norm": 0.6682581901550293, "learning_rate": 0.00012022594414380317, "loss": 3.0307, "step": 43128 }, { "epoch": 2.11, "grad_norm": 0.6695263981819153, "learning_rate": 0.00012021361874004882, "loss": 2.9126, "step": 43129 }, { "epoch": 2.11, "grad_norm": 0.6603823900222778, "learning_rate": 0.00012020129380981729, "loss": 3.0492, "step": 43130 }, { "epoch": 2.11, "grad_norm": 0.6570489406585693, "learning_rate": 0.00012018896935314086, "loss": 2.8269, "step": 43131 }, { "epoch": 2.11, "grad_norm": 0.7224562764167786, "learning_rate": 0.00012017664537005215, "loss": 2.9248, "step": 43132 }, { "epoch": 2.11, "grad_norm": 0.7442463636398315, "learning_rate": 0.00012016432186058359, "loss": 2.8481, "step": 43133 }, { "epoch": 2.11, "grad_norm": 0.6883277893066406, "learning_rate": 0.00012015199882476748, "loss": 2.8931, "step": 43134 }, { "epoch": 2.11, "grad_norm": 0.6893338561058044, "learning_rate": 0.0001201396762626365, "loss": 3.1737, "step": 43135 }, { "epoch": 2.11, "grad_norm": 0.6887253522872925, "learning_rate": 0.00012012735417422288, "loss": 2.9977, "step": 43136 }, { "epoch": 2.11, "grad_norm": 0.7243923544883728, "learning_rate": 0.0001201150325595592, "loss": 2.78, "step": 43137 }, { "epoch": 2.11, "grad_norm": 0.6988846659660339, "learning_rate": 0.000120102711418678, "loss": 2.8849, "step": 43138 }, { "epoch": 2.11, "grad_norm": 0.6805744171142578, "learning_rate": 0.00012009039075161162, "loss": 2.8165, "step": 43139 }, { "epoch": 2.11, "grad_norm": 0.7949154376983643, "learning_rate": 0.00012007807055839257, "loss": 2.8517, "step": 43140 }, { "epoch": 2.11, "grad_norm": 0.6621174216270447, "learning_rate": 0.00012006575083905312, "loss": 2.9517, "step": 43141 }, { "epoch": 2.11, "grad_norm": 0.702193021774292, "learning_rate": 0.00012005343159362588, "loss": 2.8943, "step": 43142 }, { "epoch": 2.11, "grad_norm": 0.6978039145469666, "learning_rate": 0.00012004111282214336, "loss": 2.9092, "step": 43143 }, { "epoch": 2.11, "grad_norm": 0.7211534976959229, "learning_rate": 0.0001200287945246378, "loss": 2.7052, "step": 43144 }, { "epoch": 2.11, "grad_norm": 0.7075209617614746, "learning_rate": 0.00012001647670114186, "loss": 3.0433, "step": 43145 }, { "epoch": 2.11, "grad_norm": 0.7127019166946411, "learning_rate": 0.0001200041593516878, "loss": 3.0556, "step": 43146 }, { "epoch": 2.11, "grad_norm": 0.728064239025116, "learning_rate": 0.00011999184247630822, "loss": 3.0178, "step": 43147 }, { "epoch": 2.11, "grad_norm": 0.6802637577056885, "learning_rate": 0.00011997952607503553, "loss": 2.7863, "step": 43148 }, { "epoch": 2.11, "grad_norm": 0.716955840587616, "learning_rate": 0.00011996721014790195, "loss": 2.9714, "step": 43149 }, { "epoch": 2.11, "grad_norm": 0.772565484046936, "learning_rate": 0.00011995489469494026, "loss": 2.9096, "step": 43150 }, { "epoch": 2.11, "grad_norm": 0.6997973322868347, "learning_rate": 0.00011994257971618261, "loss": 2.8717, "step": 43151 }, { "epoch": 2.11, "grad_norm": 0.7149245142936707, "learning_rate": 0.00011993026521166154, "loss": 2.8153, "step": 43152 }, { "epoch": 2.11, "grad_norm": 0.7250232696533203, "learning_rate": 0.0001199179511814096, "loss": 2.9153, "step": 43153 }, { "epoch": 2.11, "grad_norm": 0.7074378728866577, "learning_rate": 0.0001199056376254591, "loss": 3.0232, "step": 43154 }, { "epoch": 2.11, "grad_norm": 0.7458380460739136, "learning_rate": 0.0001198933245438425, "loss": 2.7417, "step": 43155 }, { "epoch": 2.11, "grad_norm": 0.6658608913421631, "learning_rate": 0.00011988101193659214, "loss": 2.7459, "step": 43156 }, { "epoch": 2.12, "grad_norm": 0.698132336139679, "learning_rate": 0.00011986869980374052, "loss": 3.1057, "step": 43157 }, { "epoch": 2.12, "grad_norm": 0.6716579794883728, "learning_rate": 0.00011985638814532016, "loss": 3.0451, "step": 43158 }, { "epoch": 2.12, "grad_norm": 0.7294808030128479, "learning_rate": 0.00011984407696136332, "loss": 3.0347, "step": 43159 }, { "epoch": 2.12, "grad_norm": 0.7262159585952759, "learning_rate": 0.00011983176625190261, "loss": 2.9116, "step": 43160 }, { "epoch": 2.12, "grad_norm": 0.7426044344902039, "learning_rate": 0.00011981945601697036, "loss": 3.1963, "step": 43161 }, { "epoch": 2.12, "grad_norm": 0.6831039190292358, "learning_rate": 0.0001198071462565989, "loss": 2.8845, "step": 43162 }, { "epoch": 2.12, "grad_norm": 0.717583954334259, "learning_rate": 0.0001197948369708208, "loss": 2.8779, "step": 43163 }, { "epoch": 2.12, "grad_norm": 0.7191320061683655, "learning_rate": 0.00011978252815966833, "loss": 2.7909, "step": 43164 }, { "epoch": 2.12, "grad_norm": 0.7239760756492615, "learning_rate": 0.00011977021982317412, "loss": 3.0215, "step": 43165 }, { "epoch": 2.12, "grad_norm": 0.7211264967918396, "learning_rate": 0.00011975791196137032, "loss": 3.0887, "step": 43166 }, { "epoch": 2.12, "grad_norm": 0.7530267238616943, "learning_rate": 0.00011974560457428965, "loss": 2.6428, "step": 43167 }, { "epoch": 2.12, "grad_norm": 0.7176784873008728, "learning_rate": 0.00011973329766196422, "loss": 3.0019, "step": 43168 }, { "epoch": 2.12, "grad_norm": 0.6822056174278259, "learning_rate": 0.0001197209912244267, "loss": 3.0091, "step": 43169 }, { "epoch": 2.12, "grad_norm": 0.7103027701377869, "learning_rate": 0.00011970868526170942, "loss": 2.9864, "step": 43170 }, { "epoch": 2.12, "grad_norm": 0.6895686984062195, "learning_rate": 0.00011969637977384464, "loss": 3.0177, "step": 43171 }, { "epoch": 2.12, "grad_norm": 0.7093585729598999, "learning_rate": 0.00011968407476086502, "loss": 3.0156, "step": 43172 }, { "epoch": 2.12, "grad_norm": 0.7251325249671936, "learning_rate": 0.0001196717702228027, "loss": 2.7788, "step": 43173 }, { "epoch": 2.12, "grad_norm": 0.7053051590919495, "learning_rate": 0.00011965946615969028, "loss": 3.0574, "step": 43174 }, { "epoch": 2.12, "grad_norm": 0.6850510239601135, "learning_rate": 0.00011964716257156019, "loss": 3.1772, "step": 43175 }, { "epoch": 2.12, "grad_norm": 0.768746554851532, "learning_rate": 0.00011963485945844477, "loss": 2.9536, "step": 43176 }, { "epoch": 2.12, "grad_norm": 0.7009607553482056, "learning_rate": 0.00011962255682037643, "loss": 2.8981, "step": 43177 }, { "epoch": 2.12, "grad_norm": 0.714339017868042, "learning_rate": 0.00011961025465738745, "loss": 2.7134, "step": 43178 }, { "epoch": 2.12, "grad_norm": 0.6838600039482117, "learning_rate": 0.00011959795296951032, "loss": 2.9917, "step": 43179 }, { "epoch": 2.12, "grad_norm": 0.6880089640617371, "learning_rate": 0.00011958565175677759, "loss": 2.663, "step": 43180 }, { "epoch": 2.12, "grad_norm": 0.7251917123794556, "learning_rate": 0.00011957335101922145, "loss": 3.0461, "step": 43181 }, { "epoch": 2.12, "grad_norm": 0.7145901918411255, "learning_rate": 0.00011956105075687445, "loss": 3.0669, "step": 43182 }, { "epoch": 2.12, "grad_norm": 0.7030085325241089, "learning_rate": 0.00011954875096976883, "loss": 3.0314, "step": 43183 }, { "epoch": 2.12, "grad_norm": 0.6625970005989075, "learning_rate": 0.00011953645165793717, "loss": 2.8142, "step": 43184 }, { "epoch": 2.12, "grad_norm": 0.718930184841156, "learning_rate": 0.00011952415282141176, "loss": 2.8112, "step": 43185 }, { "epoch": 2.12, "grad_norm": 0.7372531294822693, "learning_rate": 0.0001195118544602249, "loss": 2.9687, "step": 43186 }, { "epoch": 2.12, "grad_norm": 0.714538037776947, "learning_rate": 0.00011949955657440918, "loss": 2.6542, "step": 43187 }, { "epoch": 2.12, "grad_norm": 0.6826031804084778, "learning_rate": 0.0001194872591639968, "loss": 2.8859, "step": 43188 }, { "epoch": 2.12, "grad_norm": 0.7082632780075073, "learning_rate": 0.0001194749622290202, "loss": 2.933, "step": 43189 }, { "epoch": 2.12, "grad_norm": 0.6917764544487, "learning_rate": 0.00011946266576951195, "loss": 3.0133, "step": 43190 }, { "epoch": 2.12, "grad_norm": 0.7382910251617432, "learning_rate": 0.00011945036978550428, "loss": 2.9339, "step": 43191 }, { "epoch": 2.12, "grad_norm": 0.7105042338371277, "learning_rate": 0.00011943807427702957, "loss": 3.0067, "step": 43192 }, { "epoch": 2.12, "grad_norm": 0.6918284296989441, "learning_rate": 0.00011942577924412016, "loss": 3.0175, "step": 43193 }, { "epoch": 2.12, "grad_norm": 0.7218431830406189, "learning_rate": 0.00011941348468680847, "loss": 2.8759, "step": 43194 }, { "epoch": 2.12, "grad_norm": 0.695606529712677, "learning_rate": 0.00011940119060512702, "loss": 2.9613, "step": 43195 }, { "epoch": 2.12, "grad_norm": 0.7161937952041626, "learning_rate": 0.00011938889699910794, "loss": 2.9276, "step": 43196 }, { "epoch": 2.12, "grad_norm": 0.6780910491943359, "learning_rate": 0.00011937660386878388, "loss": 2.9779, "step": 43197 }, { "epoch": 2.12, "grad_norm": 0.6835953593254089, "learning_rate": 0.00011936431121418708, "loss": 2.7212, "step": 43198 }, { "epoch": 2.12, "grad_norm": 0.7107114791870117, "learning_rate": 0.00011935201903534981, "loss": 3.214, "step": 43199 }, { "epoch": 2.12, "grad_norm": 0.6647638082504272, "learning_rate": 0.00011933972733230469, "loss": 2.9332, "step": 43200 }, { "epoch": 2.12, "grad_norm": 0.7023340463638306, "learning_rate": 0.00011932743610508383, "loss": 3.1139, "step": 43201 }, { "epoch": 2.12, "grad_norm": 0.7224500775337219, "learning_rate": 0.00011931514535371983, "loss": 2.8405, "step": 43202 }, { "epoch": 2.12, "grad_norm": 0.6990615725517273, "learning_rate": 0.00011930285507824487, "loss": 2.9217, "step": 43203 }, { "epoch": 2.12, "grad_norm": 0.7267166972160339, "learning_rate": 0.0001192905652786914, "loss": 2.9494, "step": 43204 }, { "epoch": 2.12, "grad_norm": 0.6758955717086792, "learning_rate": 0.00011927827595509191, "loss": 3.1795, "step": 43205 }, { "epoch": 2.12, "grad_norm": 0.7026833295822144, "learning_rate": 0.00011926598710747867, "loss": 3.0531, "step": 43206 }, { "epoch": 2.12, "grad_norm": 0.7141755819320679, "learning_rate": 0.00011925369873588404, "loss": 2.865, "step": 43207 }, { "epoch": 2.12, "grad_norm": 0.6519131064414978, "learning_rate": 0.00011924141084034027, "loss": 2.8592, "step": 43208 }, { "epoch": 2.12, "grad_norm": 0.6646400690078735, "learning_rate": 0.00011922912342087986, "loss": 2.6698, "step": 43209 }, { "epoch": 2.12, "grad_norm": 0.7201839685440063, "learning_rate": 0.00011921683647753523, "loss": 2.891, "step": 43210 }, { "epoch": 2.12, "grad_norm": 0.7078610062599182, "learning_rate": 0.00011920455001033855, "loss": 2.9393, "step": 43211 }, { "epoch": 2.12, "grad_norm": 0.7181376218795776, "learning_rate": 0.0001191922640193224, "loss": 2.7381, "step": 43212 }, { "epoch": 2.12, "grad_norm": 0.6957877278327942, "learning_rate": 0.00011917997850451902, "loss": 2.9347, "step": 43213 }, { "epoch": 2.12, "grad_norm": 0.7163203358650208, "learning_rate": 0.00011916769346596069, "loss": 3.1533, "step": 43214 }, { "epoch": 2.12, "grad_norm": 0.7181594967842102, "learning_rate": 0.00011915540890367997, "loss": 2.8268, "step": 43215 }, { "epoch": 2.12, "grad_norm": 0.6955720782279968, "learning_rate": 0.00011914312481770898, "loss": 3.059, "step": 43216 }, { "epoch": 2.12, "grad_norm": 0.6729768514633179, "learning_rate": 0.00011913084120808029, "loss": 2.8435, "step": 43217 }, { "epoch": 2.12, "grad_norm": 0.7270863056182861, "learning_rate": 0.00011911855807482606, "loss": 2.7089, "step": 43218 }, { "epoch": 2.12, "grad_norm": 0.6758031845092773, "learning_rate": 0.00011910627541797882, "loss": 2.683, "step": 43219 }, { "epoch": 2.12, "grad_norm": 0.6879879236221313, "learning_rate": 0.00011909399323757088, "loss": 3.0191, "step": 43220 }, { "epoch": 2.12, "grad_norm": 0.6507390737533569, "learning_rate": 0.00011908171153363439, "loss": 2.9589, "step": 43221 }, { "epoch": 2.12, "grad_norm": 0.7297090291976929, "learning_rate": 0.000119069430306202, "loss": 3.0383, "step": 43222 }, { "epoch": 2.12, "grad_norm": 0.6910653114318848, "learning_rate": 0.00011905714955530578, "loss": 2.8401, "step": 43223 }, { "epoch": 2.12, "grad_norm": 0.7071283459663391, "learning_rate": 0.00011904486928097823, "loss": 2.9656, "step": 43224 }, { "epoch": 2.12, "grad_norm": 0.6900044083595276, "learning_rate": 0.00011903258948325173, "loss": 2.885, "step": 43225 }, { "epoch": 2.12, "grad_norm": 0.7246055006980896, "learning_rate": 0.0001190203101621585, "loss": 3.0716, "step": 43226 }, { "epoch": 2.12, "grad_norm": 0.6860259175300598, "learning_rate": 0.00011900803131773104, "loss": 3.0308, "step": 43227 }, { "epoch": 2.12, "grad_norm": 0.6874629855155945, "learning_rate": 0.00011899575295000155, "loss": 2.9469, "step": 43228 }, { "epoch": 2.12, "grad_norm": 0.6858367919921875, "learning_rate": 0.00011898347505900236, "loss": 2.8059, "step": 43229 }, { "epoch": 2.12, "grad_norm": 0.6908622980117798, "learning_rate": 0.00011897119764476593, "loss": 2.7786, "step": 43230 }, { "epoch": 2.12, "grad_norm": 0.729853630065918, "learning_rate": 0.00011895892070732445, "loss": 2.9305, "step": 43231 }, { "epoch": 2.12, "grad_norm": 0.6964291334152222, "learning_rate": 0.00011894664424671043, "loss": 2.8263, "step": 43232 }, { "epoch": 2.12, "grad_norm": 0.7044500112533569, "learning_rate": 0.00011893436826295597, "loss": 2.928, "step": 43233 }, { "epoch": 2.12, "grad_norm": 0.7081640958786011, "learning_rate": 0.00011892209275609367, "loss": 2.9927, "step": 43234 }, { "epoch": 2.12, "grad_norm": 0.7295190095901489, "learning_rate": 0.00011890981772615573, "loss": 3.0935, "step": 43235 }, { "epoch": 2.12, "grad_norm": 0.6798872351646423, "learning_rate": 0.00011889754317317436, "loss": 2.8526, "step": 43236 }, { "epoch": 2.12, "grad_norm": 0.7571244835853577, "learning_rate": 0.00011888526909718213, "loss": 2.9359, "step": 43237 }, { "epoch": 2.12, "grad_norm": 0.7170717120170593, "learning_rate": 0.00011887299549821114, "loss": 2.925, "step": 43238 }, { "epoch": 2.12, "grad_norm": 0.7144536375999451, "learning_rate": 0.00011886072237629378, "loss": 2.9109, "step": 43239 }, { "epoch": 2.12, "grad_norm": 0.7104244232177734, "learning_rate": 0.00011884844973146254, "loss": 2.8062, "step": 43240 }, { "epoch": 2.12, "grad_norm": 0.672351062297821, "learning_rate": 0.00011883617756374963, "loss": 2.9023, "step": 43241 }, { "epoch": 2.12, "grad_norm": 0.6850356459617615, "learning_rate": 0.00011882390587318737, "loss": 3.0001, "step": 43242 }, { "epoch": 2.12, "grad_norm": 0.7461333870887756, "learning_rate": 0.00011881163465980797, "loss": 2.7922, "step": 43243 }, { "epoch": 2.12, "grad_norm": 0.6776725053787231, "learning_rate": 0.00011879936392364395, "loss": 3.152, "step": 43244 }, { "epoch": 2.12, "grad_norm": 0.7232372760772705, "learning_rate": 0.00011878709366472742, "loss": 2.9213, "step": 43245 }, { "epoch": 2.12, "grad_norm": 0.6616763472557068, "learning_rate": 0.00011877482388309084, "loss": 3.0254, "step": 43246 }, { "epoch": 2.12, "grad_norm": 0.6551421880722046, "learning_rate": 0.00011876255457876656, "loss": 2.9206, "step": 43247 }, { "epoch": 2.12, "grad_norm": 0.712017834186554, "learning_rate": 0.00011875028575178676, "loss": 2.8227, "step": 43248 }, { "epoch": 2.12, "grad_norm": 0.6586719751358032, "learning_rate": 0.00011873801740218392, "loss": 3.1015, "step": 43249 }, { "epoch": 2.12, "grad_norm": 0.7056365013122559, "learning_rate": 0.00011872574952999024, "loss": 2.7413, "step": 43250 }, { "epoch": 2.12, "grad_norm": 0.6891767978668213, "learning_rate": 0.00011871348213523796, "loss": 2.8436, "step": 43251 }, { "epoch": 2.12, "grad_norm": 0.7506184577941895, "learning_rate": 0.0001187012152179596, "loss": 2.9205, "step": 43252 }, { "epoch": 2.12, "grad_norm": 0.7506493926048279, "learning_rate": 0.00011868894877818721, "loss": 2.7396, "step": 43253 }, { "epoch": 2.12, "grad_norm": 0.7198980450630188, "learning_rate": 0.00011867668281595336, "loss": 2.991, "step": 43254 }, { "epoch": 2.12, "grad_norm": 0.6733829975128174, "learning_rate": 0.00011866441733129013, "loss": 2.8639, "step": 43255 }, { "epoch": 2.12, "grad_norm": 0.726628839969635, "learning_rate": 0.00011865215232423003, "loss": 2.8438, "step": 43256 }, { "epoch": 2.12, "grad_norm": 0.6809700727462769, "learning_rate": 0.00011863988779480524, "loss": 2.8867, "step": 43257 }, { "epoch": 2.12, "grad_norm": 0.6893267631530762, "learning_rate": 0.00011862762374304798, "loss": 3.0118, "step": 43258 }, { "epoch": 2.12, "grad_norm": 0.7157814502716064, "learning_rate": 0.00011861536016899074, "loss": 3.1546, "step": 43259 }, { "epoch": 2.12, "grad_norm": 0.6700130701065063, "learning_rate": 0.00011860309707266568, "loss": 3.0809, "step": 43260 }, { "epoch": 2.12, "grad_norm": 0.6558499932289124, "learning_rate": 0.00011859083445410513, "loss": 2.9951, "step": 43261 }, { "epoch": 2.12, "grad_norm": 0.7448561787605286, "learning_rate": 0.0001185785723133415, "loss": 2.9421, "step": 43262 }, { "epoch": 2.12, "grad_norm": 0.716131865978241, "learning_rate": 0.000118566310650407, "loss": 2.95, "step": 43263 }, { "epoch": 2.12, "grad_norm": 0.709511399269104, "learning_rate": 0.00011855404946533395, "loss": 3.1927, "step": 43264 }, { "epoch": 2.12, "grad_norm": 0.7204448580741882, "learning_rate": 0.00011854178875815446, "loss": 2.855, "step": 43265 }, { "epoch": 2.12, "grad_norm": 0.6846634745597839, "learning_rate": 0.000118529528528901, "loss": 2.9704, "step": 43266 }, { "epoch": 2.12, "grad_norm": 0.6869163513183594, "learning_rate": 0.00011851726877760594, "loss": 2.8469, "step": 43267 }, { "epoch": 2.12, "grad_norm": 0.7922883033752441, "learning_rate": 0.00011850500950430138, "loss": 2.7902, "step": 43268 }, { "epoch": 2.12, "grad_norm": 0.672174870967865, "learning_rate": 0.00011849275070901979, "loss": 3.0636, "step": 43269 }, { "epoch": 2.12, "grad_norm": 0.7000868916511536, "learning_rate": 0.00011848049239179324, "loss": 2.9595, "step": 43270 }, { "epoch": 2.12, "grad_norm": 0.6964728236198425, "learning_rate": 0.00011846823455265425, "loss": 2.8609, "step": 43271 }, { "epoch": 2.12, "grad_norm": 0.7069092392921448, "learning_rate": 0.00011845597719163501, "loss": 3.1638, "step": 43272 }, { "epoch": 2.12, "grad_norm": 0.6975874304771423, "learning_rate": 0.00011844372030876769, "loss": 2.9583, "step": 43273 }, { "epoch": 2.12, "grad_norm": 0.6885384917259216, "learning_rate": 0.00011843146390408476, "loss": 2.817, "step": 43274 }, { "epoch": 2.12, "grad_norm": 0.7786584496498108, "learning_rate": 0.00011841920797761829, "loss": 2.8851, "step": 43275 }, { "epoch": 2.12, "grad_norm": 0.6855915784835815, "learning_rate": 0.0001184069525294007, "loss": 2.7401, "step": 43276 }, { "epoch": 2.12, "grad_norm": 0.6739755272865295, "learning_rate": 0.00011839469755946432, "loss": 2.9035, "step": 43277 }, { "epoch": 2.12, "grad_norm": 0.6957394480705261, "learning_rate": 0.0001183824430678414, "loss": 3.0632, "step": 43278 }, { "epoch": 2.12, "grad_norm": 0.6879928112030029, "learning_rate": 0.00011837018905456415, "loss": 2.8398, "step": 43279 }, { "epoch": 2.12, "grad_norm": 0.701741635799408, "learning_rate": 0.00011835793551966477, "loss": 2.8844, "step": 43280 }, { "epoch": 2.12, "grad_norm": 0.6822145581245422, "learning_rate": 0.00011834568246317562, "loss": 2.8215, "step": 43281 }, { "epoch": 2.12, "grad_norm": 0.6779221296310425, "learning_rate": 0.00011833342988512909, "loss": 3.0876, "step": 43282 }, { "epoch": 2.12, "grad_norm": 0.6709139347076416, "learning_rate": 0.00011832117778555727, "loss": 2.7494, "step": 43283 }, { "epoch": 2.12, "grad_norm": 0.7509815096855164, "learning_rate": 0.00011830892616449257, "loss": 2.8125, "step": 43284 }, { "epoch": 2.12, "grad_norm": 0.6872658133506775, "learning_rate": 0.00011829667502196722, "loss": 2.7154, "step": 43285 }, { "epoch": 2.12, "grad_norm": 0.7320376634597778, "learning_rate": 0.00011828442435801331, "loss": 2.9323, "step": 43286 }, { "epoch": 2.12, "grad_norm": 0.6770885586738586, "learning_rate": 0.00011827217417266339, "loss": 2.9723, "step": 43287 }, { "epoch": 2.12, "grad_norm": 0.7534797191619873, "learning_rate": 0.00011825992446594948, "loss": 3.0001, "step": 43288 }, { "epoch": 2.12, "grad_norm": 0.720881998538971, "learning_rate": 0.00011824767523790406, "loss": 2.8738, "step": 43289 }, { "epoch": 2.12, "grad_norm": 0.6875128149986267, "learning_rate": 0.00011823542648855916, "loss": 3.1074, "step": 43290 }, { "epoch": 2.12, "grad_norm": 0.7054754495620728, "learning_rate": 0.00011822317821794718, "loss": 2.8814, "step": 43291 }, { "epoch": 2.12, "grad_norm": 0.647639811038971, "learning_rate": 0.00011821093042610049, "loss": 2.9831, "step": 43292 }, { "epoch": 2.12, "grad_norm": 0.6805564165115356, "learning_rate": 0.0001181986831130512, "loss": 2.9677, "step": 43293 }, { "epoch": 2.12, "grad_norm": 0.724385678768158, "learning_rate": 0.0001181864362788316, "loss": 2.9741, "step": 43294 }, { "epoch": 2.12, "grad_norm": 0.6952561736106873, "learning_rate": 0.00011817418992347381, "loss": 2.7953, "step": 43295 }, { "epoch": 2.12, "grad_norm": 0.7037989497184753, "learning_rate": 0.00011816194404701024, "loss": 2.5987, "step": 43296 }, { "epoch": 2.12, "grad_norm": 0.6977366209030151, "learning_rate": 0.0001181496986494732, "loss": 2.6767, "step": 43297 }, { "epoch": 2.12, "grad_norm": 0.6811829805374146, "learning_rate": 0.00011813745373089473, "loss": 3.0466, "step": 43298 }, { "epoch": 2.12, "grad_norm": 0.6867378354072571, "learning_rate": 0.00011812520929130736, "loss": 3.0597, "step": 43299 }, { "epoch": 2.12, "grad_norm": 0.6996347308158875, "learning_rate": 0.00011811296533074315, "loss": 2.8842, "step": 43300 }, { "epoch": 2.12, "grad_norm": 0.661796510219574, "learning_rate": 0.00011810072184923429, "loss": 3.1053, "step": 43301 }, { "epoch": 2.12, "grad_norm": 0.6767133474349976, "learning_rate": 0.00011808847884681323, "loss": 2.8145, "step": 43302 }, { "epoch": 2.12, "grad_norm": 0.710540771484375, "learning_rate": 0.00011807623632351201, "loss": 2.9525, "step": 43303 }, { "epoch": 2.12, "grad_norm": 0.6866672039031982, "learning_rate": 0.00011806399427936308, "loss": 2.9246, "step": 43304 }, { "epoch": 2.12, "grad_norm": 0.6833668947219849, "learning_rate": 0.00011805175271439846, "loss": 3.2081, "step": 43305 }, { "epoch": 2.12, "grad_norm": 0.6920410394668579, "learning_rate": 0.00011803951162865048, "loss": 3.0166, "step": 43306 }, { "epoch": 2.12, "grad_norm": 0.7049962878227234, "learning_rate": 0.00011802727102215155, "loss": 2.7362, "step": 43307 }, { "epoch": 2.12, "grad_norm": 0.727013349533081, "learning_rate": 0.00011801503089493376, "loss": 2.8944, "step": 43308 }, { "epoch": 2.12, "grad_norm": 0.6701471209526062, "learning_rate": 0.00011800279124702936, "loss": 2.7099, "step": 43309 }, { "epoch": 2.12, "grad_norm": 0.6716243624687195, "learning_rate": 0.00011799055207847045, "loss": 3.0203, "step": 43310 }, { "epoch": 2.12, "grad_norm": 0.7197967171669006, "learning_rate": 0.00011797831338928943, "loss": 2.8381, "step": 43311 }, { "epoch": 2.12, "grad_norm": 0.6998694539070129, "learning_rate": 0.00011796607517951858, "loss": 2.9622, "step": 43312 }, { "epoch": 2.12, "grad_norm": 0.6807000041007996, "learning_rate": 0.00011795383744918996, "loss": 3.1586, "step": 43313 }, { "epoch": 2.12, "grad_norm": 0.6802100539207458, "learning_rate": 0.00011794160019833599, "loss": 2.6665, "step": 43314 }, { "epoch": 2.12, "grad_norm": 0.6740386486053467, "learning_rate": 0.00011792936342698882, "loss": 2.8575, "step": 43315 }, { "epoch": 2.12, "grad_norm": 0.6891768574714661, "learning_rate": 0.00011791712713518056, "loss": 2.8772, "step": 43316 }, { "epoch": 2.12, "grad_norm": 0.7108803391456604, "learning_rate": 0.00011790489132294368, "loss": 3.0195, "step": 43317 }, { "epoch": 2.12, "grad_norm": 0.7088941335678101, "learning_rate": 0.00011789265599031012, "loss": 2.92, "step": 43318 }, { "epoch": 2.12, "grad_norm": 0.6881635785102844, "learning_rate": 0.00011788042113731239, "loss": 2.8271, "step": 43319 }, { "epoch": 2.12, "grad_norm": 0.6677702069282532, "learning_rate": 0.00011786818676398243, "loss": 3.0857, "step": 43320 }, { "epoch": 2.12, "grad_norm": 0.7288718223571777, "learning_rate": 0.00011785595287035276, "loss": 3.0319, "step": 43321 }, { "epoch": 2.12, "grad_norm": 0.6868347525596619, "learning_rate": 0.00011784371945645546, "loss": 2.5492, "step": 43322 }, { "epoch": 2.12, "grad_norm": 0.6867601275444031, "learning_rate": 0.00011783148652232264, "loss": 2.7741, "step": 43323 }, { "epoch": 2.12, "grad_norm": 0.715054988861084, "learning_rate": 0.0001178192540679867, "loss": 3.0581, "step": 43324 }, { "epoch": 2.12, "grad_norm": 0.6956353187561035, "learning_rate": 0.0001178070220934797, "loss": 2.9861, "step": 43325 }, { "epoch": 2.12, "grad_norm": 0.7079643607139587, "learning_rate": 0.00011779479059883407, "loss": 2.8541, "step": 43326 }, { "epoch": 2.12, "grad_norm": 0.712270200252533, "learning_rate": 0.00011778255958408175, "loss": 2.7609, "step": 43327 }, { "epoch": 2.12, "grad_norm": 0.6944162845611572, "learning_rate": 0.00011777032904925512, "loss": 2.9822, "step": 43328 }, { "epoch": 2.12, "grad_norm": 0.6619276404380798, "learning_rate": 0.00011775809899438645, "loss": 2.9275, "step": 43329 }, { "epoch": 2.12, "grad_norm": 0.7741801738739014, "learning_rate": 0.0001177458694195079, "loss": 2.7809, "step": 43330 }, { "epoch": 2.12, "grad_norm": 0.7182871103286743, "learning_rate": 0.00011773364032465167, "loss": 2.8416, "step": 43331 }, { "epoch": 2.12, "grad_norm": 0.6957319974899292, "learning_rate": 0.00011772141170984984, "loss": 2.8044, "step": 43332 }, { "epoch": 2.12, "grad_norm": 0.7118658423423767, "learning_rate": 0.0001177091835751347, "loss": 2.9776, "step": 43333 }, { "epoch": 2.12, "grad_norm": 0.7111032605171204, "learning_rate": 0.00011769695592053861, "loss": 2.8035, "step": 43334 }, { "epoch": 2.12, "grad_norm": 0.7072035074234009, "learning_rate": 0.00011768472874609355, "loss": 2.9224, "step": 43335 }, { "epoch": 2.12, "grad_norm": 0.6957293748855591, "learning_rate": 0.00011767250205183195, "loss": 2.8217, "step": 43336 }, { "epoch": 2.12, "grad_norm": 0.6990187764167786, "learning_rate": 0.00011766027583778586, "loss": 3.1363, "step": 43337 }, { "epoch": 2.12, "grad_norm": 0.6784077882766724, "learning_rate": 0.00011764805010398743, "loss": 2.9643, "step": 43338 }, { "epoch": 2.12, "grad_norm": 0.6869450211524963, "learning_rate": 0.00011763582485046902, "loss": 2.9682, "step": 43339 }, { "epoch": 2.12, "grad_norm": 0.7190410494804382, "learning_rate": 0.00011762360007726266, "loss": 2.8506, "step": 43340 }, { "epoch": 2.12, "grad_norm": 0.667700469493866, "learning_rate": 0.00011761137578440076, "loss": 3.0683, "step": 43341 }, { "epoch": 2.12, "grad_norm": 0.7749731540679932, "learning_rate": 0.00011759915197191527, "loss": 2.8538, "step": 43342 }, { "epoch": 2.12, "grad_norm": 0.7492234706878662, "learning_rate": 0.00011758692863983863, "loss": 2.7344, "step": 43343 }, { "epoch": 2.12, "grad_norm": 0.6971567273139954, "learning_rate": 0.0001175747057882029, "loss": 2.8228, "step": 43344 }, { "epoch": 2.12, "grad_norm": 0.6927439570426941, "learning_rate": 0.00011756248341704017, "loss": 2.9262, "step": 43345 }, { "epoch": 2.12, "grad_norm": 0.6876089572906494, "learning_rate": 0.00011755026152638288, "loss": 3.01, "step": 43346 }, { "epoch": 2.12, "grad_norm": 0.7251725792884827, "learning_rate": 0.00011753804011626297, "loss": 2.8696, "step": 43347 }, { "epoch": 2.12, "grad_norm": 0.6793904900550842, "learning_rate": 0.00011752581918671276, "loss": 3.1156, "step": 43348 }, { "epoch": 2.12, "grad_norm": 0.7280372381210327, "learning_rate": 0.00011751359873776453, "loss": 3.0312, "step": 43349 }, { "epoch": 2.12, "grad_norm": 0.7209813594818115, "learning_rate": 0.00011750137876945021, "loss": 3.1338, "step": 43350 }, { "epoch": 2.12, "grad_norm": 0.7237643003463745, "learning_rate": 0.00011748915928180228, "loss": 2.7921, "step": 43351 }, { "epoch": 2.12, "grad_norm": 0.7128032445907593, "learning_rate": 0.0001174769402748528, "loss": 2.8238, "step": 43352 }, { "epoch": 2.12, "grad_norm": 0.694153368473053, "learning_rate": 0.0001174647217486338, "loss": 2.8686, "step": 43353 }, { "epoch": 2.12, "grad_norm": 0.7112940549850464, "learning_rate": 0.00011745250370317769, "loss": 2.7379, "step": 43354 }, { "epoch": 2.12, "grad_norm": 0.7519726753234863, "learning_rate": 0.00011744028613851646, "loss": 2.7854, "step": 43355 }, { "epoch": 2.12, "grad_norm": 0.7124344706535339, "learning_rate": 0.00011742806905468248, "loss": 3.0715, "step": 43356 }, { "epoch": 2.12, "grad_norm": 0.6832916736602783, "learning_rate": 0.00011741585245170773, "loss": 3.0614, "step": 43357 }, { "epoch": 2.12, "grad_norm": 0.7317155599594116, "learning_rate": 0.0001174036363296246, "loss": 3.0663, "step": 43358 }, { "epoch": 2.12, "grad_norm": 0.684130072593689, "learning_rate": 0.00011739142068846512, "loss": 2.7761, "step": 43359 }, { "epoch": 2.12, "grad_norm": 0.7288141846656799, "learning_rate": 0.00011737920552826142, "loss": 2.9951, "step": 43360 }, { "epoch": 2.13, "grad_norm": 0.7078750133514404, "learning_rate": 0.00011736699084904584, "loss": 2.8456, "step": 43361 }, { "epoch": 2.13, "grad_norm": 0.6609907746315002, "learning_rate": 0.00011735477665085035, "loss": 2.8533, "step": 43362 }, { "epoch": 2.13, "grad_norm": 0.6616264581680298, "learning_rate": 0.00011734256293370722, "loss": 2.7806, "step": 43363 }, { "epoch": 2.13, "grad_norm": 0.7512163519859314, "learning_rate": 0.00011733034969764876, "loss": 2.9013, "step": 43364 }, { "epoch": 2.13, "grad_norm": 0.6941211819648743, "learning_rate": 0.00011731813694270699, "loss": 2.8684, "step": 43365 }, { "epoch": 2.13, "grad_norm": 0.6857606768608093, "learning_rate": 0.00011730592466891406, "loss": 2.8198, "step": 43366 }, { "epoch": 2.13, "grad_norm": 0.6707267165184021, "learning_rate": 0.0001172937128763021, "loss": 2.9227, "step": 43367 }, { "epoch": 2.13, "grad_norm": 0.6939967274665833, "learning_rate": 0.00011728150156490332, "loss": 2.8507, "step": 43368 }, { "epoch": 2.13, "grad_norm": 0.7025797367095947, "learning_rate": 0.00011726929073475, "loss": 2.8585, "step": 43369 }, { "epoch": 2.13, "grad_norm": 0.7331152558326721, "learning_rate": 0.0001172570803858741, "loss": 3.0419, "step": 43370 }, { "epoch": 2.13, "grad_norm": 0.669756293296814, "learning_rate": 0.000117244870518308, "loss": 2.9938, "step": 43371 }, { "epoch": 2.13, "grad_norm": 0.6814187169075012, "learning_rate": 0.00011723266113208363, "loss": 2.9681, "step": 43372 }, { "epoch": 2.13, "grad_norm": 0.6948679089546204, "learning_rate": 0.00011722045222723333, "loss": 3.1171, "step": 43373 }, { "epoch": 2.13, "grad_norm": 0.6597676277160645, "learning_rate": 0.00011720824380378922, "loss": 2.6783, "step": 43374 }, { "epoch": 2.13, "grad_norm": 0.6691949367523193, "learning_rate": 0.00011719603586178329, "loss": 2.845, "step": 43375 }, { "epoch": 2.13, "grad_norm": 0.6968749165534973, "learning_rate": 0.00011718382840124793, "loss": 3.0667, "step": 43376 }, { "epoch": 2.13, "grad_norm": 0.7081393599510193, "learning_rate": 0.0001171716214222151, "loss": 2.9093, "step": 43377 }, { "epoch": 2.13, "grad_norm": 0.677230954170227, "learning_rate": 0.00011715941492471702, "loss": 2.9554, "step": 43378 }, { "epoch": 2.13, "grad_norm": 0.6978651285171509, "learning_rate": 0.00011714720890878592, "loss": 2.8337, "step": 43379 }, { "epoch": 2.13, "grad_norm": 0.7071506381034851, "learning_rate": 0.00011713500337445392, "loss": 3.0446, "step": 43380 }, { "epoch": 2.13, "grad_norm": 0.7174202799797058, "learning_rate": 0.00011712279832175313, "loss": 2.9749, "step": 43381 }, { "epoch": 2.13, "grad_norm": 0.7007304430007935, "learning_rate": 0.00011711059375071558, "loss": 3.0169, "step": 43382 }, { "epoch": 2.13, "grad_norm": 0.7151933908462524, "learning_rate": 0.00011709838966137354, "loss": 3.0288, "step": 43383 }, { "epoch": 2.13, "grad_norm": 0.7130842208862305, "learning_rate": 0.00011708618605375923, "loss": 3.0052, "step": 43384 }, { "epoch": 2.13, "grad_norm": 0.7058139443397522, "learning_rate": 0.0001170739829279046, "loss": 2.9758, "step": 43385 }, { "epoch": 2.13, "grad_norm": 0.6666308641433716, "learning_rate": 0.000117061780283842, "loss": 2.9686, "step": 43386 }, { "epoch": 2.13, "grad_norm": 0.7057749032974243, "learning_rate": 0.00011704957812160347, "loss": 2.9458, "step": 43387 }, { "epoch": 2.13, "grad_norm": 0.6779221892356873, "learning_rate": 0.00011703737644122103, "loss": 2.8908, "step": 43388 }, { "epoch": 2.13, "grad_norm": 0.690578818321228, "learning_rate": 0.00011702517524272703, "loss": 3.1712, "step": 43389 }, { "epoch": 2.13, "grad_norm": 0.6773284077644348, "learning_rate": 0.00011701297452615342, "loss": 2.9113, "step": 43390 }, { "epoch": 2.13, "grad_norm": 0.700973629951477, "learning_rate": 0.00011700077429153249, "loss": 2.9496, "step": 43391 }, { "epoch": 2.13, "grad_norm": 0.6661964654922485, "learning_rate": 0.00011698857453889621, "loss": 2.9452, "step": 43392 }, { "epoch": 2.13, "grad_norm": 0.6915501952171326, "learning_rate": 0.00011697637526827682, "loss": 2.8738, "step": 43393 }, { "epoch": 2.13, "grad_norm": 0.6811676621437073, "learning_rate": 0.00011696417647970654, "loss": 3.0137, "step": 43394 }, { "epoch": 2.13, "grad_norm": 0.6550053358078003, "learning_rate": 0.00011695197817321736, "loss": 2.9058, "step": 43395 }, { "epoch": 2.13, "grad_norm": 0.7086590528488159, "learning_rate": 0.00011693978034884147, "loss": 2.7958, "step": 43396 }, { "epoch": 2.13, "grad_norm": 0.6640387177467346, "learning_rate": 0.00011692758300661087, "loss": 2.9049, "step": 43397 }, { "epoch": 2.13, "grad_norm": 0.7309964299201965, "learning_rate": 0.00011691538614655786, "loss": 2.6874, "step": 43398 }, { "epoch": 2.13, "grad_norm": 0.7164502143859863, "learning_rate": 0.0001169031897687144, "loss": 3.0238, "step": 43399 }, { "epoch": 2.13, "grad_norm": 0.6886323094367981, "learning_rate": 0.00011689099387311268, "loss": 3.0394, "step": 43400 }, { "epoch": 2.13, "grad_norm": 0.6922336220741272, "learning_rate": 0.00011687879845978499, "loss": 2.758, "step": 43401 }, { "epoch": 2.13, "grad_norm": 0.6996184587478638, "learning_rate": 0.00011686660352876325, "loss": 2.8422, "step": 43402 }, { "epoch": 2.13, "grad_norm": 0.6815118789672852, "learning_rate": 0.00011685440908007968, "loss": 2.984, "step": 43403 }, { "epoch": 2.13, "grad_norm": 0.7413270473480225, "learning_rate": 0.00011684221511376619, "loss": 2.9322, "step": 43404 }, { "epoch": 2.13, "grad_norm": 0.6742251515388489, "learning_rate": 0.00011683002162985512, "loss": 3.0762, "step": 43405 }, { "epoch": 2.13, "grad_norm": 0.6736028790473938, "learning_rate": 0.00011681782862837858, "loss": 2.8401, "step": 43406 }, { "epoch": 2.13, "grad_norm": 0.6667304635047913, "learning_rate": 0.00011680563610936853, "loss": 2.9525, "step": 43407 }, { "epoch": 2.13, "grad_norm": 0.7072355151176453, "learning_rate": 0.0001167934440728573, "loss": 2.7609, "step": 43408 }, { "epoch": 2.13, "grad_norm": 0.6967011094093323, "learning_rate": 0.00011678125251887677, "loss": 2.8879, "step": 43409 }, { "epoch": 2.13, "grad_norm": 0.6703258156776428, "learning_rate": 0.00011676906144745923, "loss": 3.1206, "step": 43410 }, { "epoch": 2.13, "grad_norm": 0.7139511108398438, "learning_rate": 0.00011675687085863677, "loss": 2.9037, "step": 43411 }, { "epoch": 2.13, "grad_norm": 0.7220322489738464, "learning_rate": 0.0001167446807524413, "loss": 2.8709, "step": 43412 }, { "epoch": 2.13, "grad_norm": 0.6931965351104736, "learning_rate": 0.00011673249112890518, "loss": 3.021, "step": 43413 }, { "epoch": 2.13, "grad_norm": 0.7006539106369019, "learning_rate": 0.00011672030198806031, "loss": 3.0521, "step": 43414 }, { "epoch": 2.13, "grad_norm": 0.7006272673606873, "learning_rate": 0.0001167081133299389, "loss": 2.8595, "step": 43415 }, { "epoch": 2.13, "grad_norm": 0.7924833297729492, "learning_rate": 0.00011669592515457318, "loss": 2.7673, "step": 43416 }, { "epoch": 2.13, "grad_norm": 0.683752179145813, "learning_rate": 0.00011668373746199508, "loss": 2.8574, "step": 43417 }, { "epoch": 2.13, "grad_norm": 0.7187227010726929, "learning_rate": 0.00011667155025223674, "loss": 2.9725, "step": 43418 }, { "epoch": 2.13, "grad_norm": 0.6991389393806458, "learning_rate": 0.00011665936352533013, "loss": 3.0743, "step": 43419 }, { "epoch": 2.13, "grad_norm": 0.7020309567451477, "learning_rate": 0.00011664717728130751, "loss": 2.9786, "step": 43420 }, { "epoch": 2.13, "grad_norm": 0.6862619519233704, "learning_rate": 0.00011663499152020101, "loss": 2.9249, "step": 43421 }, { "epoch": 2.13, "grad_norm": 0.6912420392036438, "learning_rate": 0.00011662280624204258, "loss": 2.787, "step": 43422 }, { "epoch": 2.13, "grad_norm": 0.7102240920066833, "learning_rate": 0.00011661062144686448, "loss": 2.9573, "step": 43423 }, { "epoch": 2.13, "grad_norm": 0.6816980838775635, "learning_rate": 0.0001165984371346987, "loss": 2.8612, "step": 43424 }, { "epoch": 2.13, "grad_norm": 0.6912957429885864, "learning_rate": 0.00011658625330557722, "loss": 2.8478, "step": 43425 }, { "epoch": 2.13, "grad_norm": 0.690959095954895, "learning_rate": 0.00011657406995953237, "loss": 2.9166, "step": 43426 }, { "epoch": 2.13, "grad_norm": 0.7246711254119873, "learning_rate": 0.00011656188709659603, "loss": 2.7915, "step": 43427 }, { "epoch": 2.13, "grad_norm": 0.6712881326675415, "learning_rate": 0.00011654970471680046, "loss": 3.0455, "step": 43428 }, { "epoch": 2.13, "grad_norm": 0.7149039506912231, "learning_rate": 0.00011653752282017754, "loss": 2.949, "step": 43429 }, { "epoch": 2.13, "grad_norm": 0.7015359401702881, "learning_rate": 0.00011652534140675951, "loss": 2.9687, "step": 43430 }, { "epoch": 2.13, "grad_norm": 0.694905161857605, "learning_rate": 0.00011651316047657853, "loss": 2.9374, "step": 43431 }, { "epoch": 2.13, "grad_norm": 0.6754507422447205, "learning_rate": 0.00011650098002966656, "loss": 3.0613, "step": 43432 }, { "epoch": 2.13, "grad_norm": 0.695451021194458, "learning_rate": 0.00011648880006605567, "loss": 3.0479, "step": 43433 }, { "epoch": 2.13, "grad_norm": 0.7183724045753479, "learning_rate": 0.00011647662058577789, "loss": 2.8461, "step": 43434 }, { "epoch": 2.13, "grad_norm": 0.6836041808128357, "learning_rate": 0.00011646444158886538, "loss": 2.9376, "step": 43435 }, { "epoch": 2.13, "grad_norm": 0.6931686401367188, "learning_rate": 0.00011645226307535027, "loss": 3.1012, "step": 43436 }, { "epoch": 2.13, "grad_norm": 0.682621419429779, "learning_rate": 0.00011644008504526452, "loss": 3.041, "step": 43437 }, { "epoch": 2.13, "grad_norm": 0.685973584651947, "learning_rate": 0.00011642790749864034, "loss": 2.728, "step": 43438 }, { "epoch": 2.13, "grad_norm": 0.737464427947998, "learning_rate": 0.0001164157304355097, "loss": 2.8201, "step": 43439 }, { "epoch": 2.13, "grad_norm": 0.6804065108299255, "learning_rate": 0.00011640355385590462, "loss": 2.7606, "step": 43440 }, { "epoch": 2.13, "grad_norm": 0.7319862842559814, "learning_rate": 0.00011639137775985736, "loss": 2.9008, "step": 43441 }, { "epoch": 2.13, "grad_norm": 0.6785258650779724, "learning_rate": 0.00011637920214739974, "loss": 2.857, "step": 43442 }, { "epoch": 2.13, "grad_norm": 0.7257917523384094, "learning_rate": 0.0001163670270185641, "loss": 2.9225, "step": 43443 }, { "epoch": 2.13, "grad_norm": 0.7215557098388672, "learning_rate": 0.00011635485237338224, "loss": 3.1464, "step": 43444 }, { "epoch": 2.13, "grad_norm": 0.7050979733467102, "learning_rate": 0.00011634267821188646, "loss": 2.9606, "step": 43445 }, { "epoch": 2.13, "grad_norm": 0.6802178025245667, "learning_rate": 0.00011633050453410873, "loss": 2.981, "step": 43446 }, { "epoch": 2.13, "grad_norm": 0.7454336285591125, "learning_rate": 0.00011631833134008099, "loss": 2.9528, "step": 43447 }, { "epoch": 2.13, "grad_norm": 0.6547221541404724, "learning_rate": 0.00011630615862983551, "loss": 2.689, "step": 43448 }, { "epoch": 2.13, "grad_norm": 0.7083719372749329, "learning_rate": 0.00011629398640340418, "loss": 2.882, "step": 43449 }, { "epoch": 2.13, "grad_norm": 0.72013258934021, "learning_rate": 0.00011628181466081912, "loss": 2.7815, "step": 43450 }, { "epoch": 2.13, "grad_norm": 0.6839480400085449, "learning_rate": 0.00011626964340211249, "loss": 2.9452, "step": 43451 }, { "epoch": 2.13, "grad_norm": 0.7362170815467834, "learning_rate": 0.00011625747262731617, "loss": 2.8755, "step": 43452 }, { "epoch": 2.13, "grad_norm": 0.6892048716545105, "learning_rate": 0.00011624530233646241, "loss": 3.0673, "step": 43453 }, { "epoch": 2.13, "grad_norm": 0.7359662652015686, "learning_rate": 0.00011623313252958317, "loss": 3.0499, "step": 43454 }, { "epoch": 2.13, "grad_norm": 0.7010781168937683, "learning_rate": 0.00011622096320671038, "loss": 2.8973, "step": 43455 }, { "epoch": 2.13, "grad_norm": 0.6811627745628357, "learning_rate": 0.00011620879436787631, "loss": 2.8123, "step": 43456 }, { "epoch": 2.13, "grad_norm": 0.7134902477264404, "learning_rate": 0.0001161966260131128, "loss": 2.972, "step": 43457 }, { "epoch": 2.13, "grad_norm": 0.7200930714607239, "learning_rate": 0.00011618445814245211, "loss": 2.9097, "step": 43458 }, { "epoch": 2.13, "grad_norm": 0.7086115479469299, "learning_rate": 0.00011617229075592606, "loss": 3.1063, "step": 43459 }, { "epoch": 2.13, "grad_norm": 0.7229481935501099, "learning_rate": 0.00011616012385356693, "loss": 3.1506, "step": 43460 }, { "epoch": 2.13, "grad_norm": 0.6957486271858215, "learning_rate": 0.00011614795743540666, "loss": 2.677, "step": 43461 }, { "epoch": 2.13, "grad_norm": 0.7093002200126648, "learning_rate": 0.00011613579150147718, "loss": 2.6256, "step": 43462 }, { "epoch": 2.13, "grad_norm": 0.7090989351272583, "learning_rate": 0.00011612362605181073, "loss": 2.9412, "step": 43463 }, { "epoch": 2.13, "grad_norm": 0.7614361047744751, "learning_rate": 0.0001161114610864392, "loss": 2.6719, "step": 43464 }, { "epoch": 2.13, "grad_norm": 0.7168323993682861, "learning_rate": 0.00011609929660539467, "loss": 2.8601, "step": 43465 }, { "epoch": 2.13, "grad_norm": 0.7068440914154053, "learning_rate": 0.0001160871326087093, "loss": 2.883, "step": 43466 }, { "epoch": 2.13, "grad_norm": 0.7084940671920776, "learning_rate": 0.000116074969096415, "loss": 2.763, "step": 43467 }, { "epoch": 2.13, "grad_norm": 0.6470160484313965, "learning_rate": 0.00011606280606854385, "loss": 2.9537, "step": 43468 }, { "epoch": 2.13, "grad_norm": 0.7235172986984253, "learning_rate": 0.00011605064352512779, "loss": 3.1622, "step": 43469 }, { "epoch": 2.13, "grad_norm": 0.7104700207710266, "learning_rate": 0.00011603848146619893, "loss": 2.9741, "step": 43470 }, { "epoch": 2.13, "grad_norm": 0.7352017164230347, "learning_rate": 0.0001160263198917894, "loss": 2.8902, "step": 43471 }, { "epoch": 2.13, "grad_norm": 0.6944370269775391, "learning_rate": 0.000116014158801931, "loss": 2.685, "step": 43472 }, { "epoch": 2.13, "grad_norm": 0.6649462580680847, "learning_rate": 0.00011600199819665605, "loss": 2.8065, "step": 43473 }, { "epoch": 2.13, "grad_norm": 0.7043724060058594, "learning_rate": 0.0001159898380759963, "loss": 2.8261, "step": 43474 }, { "epoch": 2.13, "grad_norm": 0.7225465178489685, "learning_rate": 0.000115977678439984, "loss": 3.0143, "step": 43475 }, { "epoch": 2.13, "grad_norm": 0.7245696187019348, "learning_rate": 0.00011596551928865109, "loss": 2.8686, "step": 43476 }, { "epoch": 2.13, "grad_norm": 0.7076426148414612, "learning_rate": 0.0001159533606220295, "loss": 2.9664, "step": 43477 }, { "epoch": 2.13, "grad_norm": 0.7165865302085876, "learning_rate": 0.00011594120244015143, "loss": 2.9488, "step": 43478 }, { "epoch": 2.13, "grad_norm": 0.7248859405517578, "learning_rate": 0.00011592904474304867, "loss": 2.8313, "step": 43479 }, { "epoch": 2.13, "grad_norm": 0.6996978521347046, "learning_rate": 0.00011591688753075351, "loss": 2.9924, "step": 43480 }, { "epoch": 2.13, "grad_norm": 0.7273635268211365, "learning_rate": 0.00011590473080329775, "loss": 3.0422, "step": 43481 }, { "epoch": 2.13, "grad_norm": 0.6897189021110535, "learning_rate": 0.0001158925745607136, "loss": 2.9329, "step": 43482 }, { "epoch": 2.13, "grad_norm": 0.717872142791748, "learning_rate": 0.00011588041880303297, "loss": 3.0231, "step": 43483 }, { "epoch": 2.13, "grad_norm": 0.717965304851532, "learning_rate": 0.00011586826353028777, "loss": 2.9274, "step": 43484 }, { "epoch": 2.13, "grad_norm": 0.7113906741142273, "learning_rate": 0.00011585610874251022, "loss": 2.8744, "step": 43485 }, { "epoch": 2.13, "grad_norm": 0.6816174387931824, "learning_rate": 0.00011584395443973216, "loss": 3.0101, "step": 43486 }, { "epoch": 2.13, "grad_norm": 0.6823785305023193, "learning_rate": 0.00011583180062198567, "loss": 2.9838, "step": 43487 }, { "epoch": 2.13, "grad_norm": 0.7035296559333801, "learning_rate": 0.00011581964728930291, "loss": 2.6212, "step": 43488 }, { "epoch": 2.13, "grad_norm": 0.699919581413269, "learning_rate": 0.0001158074944417157, "loss": 2.8144, "step": 43489 }, { "epoch": 2.13, "grad_norm": 0.7117913961410522, "learning_rate": 0.00011579534207925614, "loss": 2.9497, "step": 43490 }, { "epoch": 2.13, "grad_norm": 0.7389628887176514, "learning_rate": 0.00011578319020195607, "loss": 2.7566, "step": 43491 }, { "epoch": 2.13, "grad_norm": 0.6526266932487488, "learning_rate": 0.00011577103880984764, "loss": 2.7454, "step": 43492 }, { "epoch": 2.13, "grad_norm": 0.6850768327713013, "learning_rate": 0.00011575888790296294, "loss": 2.7901, "step": 43493 }, { "epoch": 2.13, "grad_norm": 0.70717853307724, "learning_rate": 0.00011574673748133377, "loss": 2.8612, "step": 43494 }, { "epoch": 2.13, "grad_norm": 0.6925551295280457, "learning_rate": 0.00011573458754499231, "loss": 2.8711, "step": 43495 }, { "epoch": 2.13, "grad_norm": 0.6883793473243713, "learning_rate": 0.00011572243809397042, "loss": 2.872, "step": 43496 }, { "epoch": 2.13, "grad_norm": 0.6649243235588074, "learning_rate": 0.00011571028912830027, "loss": 2.8072, "step": 43497 }, { "epoch": 2.13, "grad_norm": 0.7801764011383057, "learning_rate": 0.00011569814064801373, "loss": 2.809, "step": 43498 }, { "epoch": 2.13, "grad_norm": 0.6581512093544006, "learning_rate": 0.0001156859926531427, "loss": 3.2304, "step": 43499 }, { "epoch": 2.13, "grad_norm": 0.6470869779586792, "learning_rate": 0.0001156738451437194, "loss": 3.0452, "step": 43500 }, { "epoch": 2.13, "grad_norm": 0.6938720345497131, "learning_rate": 0.00011566169811977564, "loss": 3.0109, "step": 43501 }, { "epoch": 2.13, "grad_norm": 0.7173318266868591, "learning_rate": 0.00011564955158134347, "loss": 2.6295, "step": 43502 }, { "epoch": 2.13, "grad_norm": 0.6866092085838318, "learning_rate": 0.000115637405528455, "loss": 2.907, "step": 43503 }, { "epoch": 2.13, "grad_norm": 0.6871329545974731, "learning_rate": 0.00011562525996114216, "loss": 3.1121, "step": 43504 }, { "epoch": 2.13, "grad_norm": 0.7441400289535522, "learning_rate": 0.00011561311487943686, "loss": 2.8122, "step": 43505 }, { "epoch": 2.13, "grad_norm": 0.7241978645324707, "learning_rate": 0.00011560097028337107, "loss": 2.8481, "step": 43506 }, { "epoch": 2.13, "grad_norm": 0.7289275527000427, "learning_rate": 0.0001155888261729768, "loss": 2.8336, "step": 43507 }, { "epoch": 2.13, "grad_norm": 0.6423391103744507, "learning_rate": 0.00011557668254828618, "loss": 2.8059, "step": 43508 }, { "epoch": 2.13, "grad_norm": 0.7227512001991272, "learning_rate": 0.00011556453940933098, "loss": 2.8517, "step": 43509 }, { "epoch": 2.13, "grad_norm": 0.6960424780845642, "learning_rate": 0.00011555239675614337, "loss": 2.8775, "step": 43510 }, { "epoch": 2.13, "grad_norm": 0.7161725163459778, "learning_rate": 0.0001155402545887553, "loss": 2.7589, "step": 43511 }, { "epoch": 2.13, "grad_norm": 0.7142117619514465, "learning_rate": 0.00011552811290719856, "loss": 3.1114, "step": 43512 }, { "epoch": 2.13, "grad_norm": 0.7251697182655334, "learning_rate": 0.00011551597171150539, "loss": 2.9898, "step": 43513 }, { "epoch": 2.13, "grad_norm": 0.6893921494483948, "learning_rate": 0.00011550383100170752, "loss": 2.9428, "step": 43514 }, { "epoch": 2.13, "grad_norm": 0.6587451100349426, "learning_rate": 0.0001154916907778372, "loss": 2.6993, "step": 43515 }, { "epoch": 2.13, "grad_norm": 0.6924602389335632, "learning_rate": 0.00011547955103992611, "loss": 2.8499, "step": 43516 }, { "epoch": 2.13, "grad_norm": 0.6500839591026306, "learning_rate": 0.00011546741178800637, "loss": 3.2, "step": 43517 }, { "epoch": 2.13, "grad_norm": 0.7085121273994446, "learning_rate": 0.00011545527302211009, "loss": 2.839, "step": 43518 }, { "epoch": 2.13, "grad_norm": 0.6867219805717468, "learning_rate": 0.0001154431347422691, "loss": 2.7854, "step": 43519 }, { "epoch": 2.13, "grad_norm": 0.7688367366790771, "learning_rate": 0.00011543099694851534, "loss": 2.9432, "step": 43520 }, { "epoch": 2.13, "grad_norm": 0.6721521019935608, "learning_rate": 0.00011541885964088074, "loss": 2.9551, "step": 43521 }, { "epoch": 2.13, "grad_norm": 0.6870953440666199, "learning_rate": 0.00011540672281939731, "loss": 2.9556, "step": 43522 }, { "epoch": 2.13, "grad_norm": 0.7174733281135559, "learning_rate": 0.00011539458648409718, "loss": 2.8465, "step": 43523 }, { "epoch": 2.13, "grad_norm": 0.6979454159736633, "learning_rate": 0.00011538245063501205, "loss": 2.8541, "step": 43524 }, { "epoch": 2.13, "grad_norm": 0.7371723651885986, "learning_rate": 0.00011537031527217417, "loss": 3.0364, "step": 43525 }, { "epoch": 2.13, "grad_norm": 0.6876466274261475, "learning_rate": 0.0001153581803956153, "loss": 2.9516, "step": 43526 }, { "epoch": 2.13, "grad_norm": 0.7251150608062744, "learning_rate": 0.00011534604600536737, "loss": 2.7975, "step": 43527 }, { "epoch": 2.13, "grad_norm": 0.7008710503578186, "learning_rate": 0.00011533391210146249, "loss": 2.9521, "step": 43528 }, { "epoch": 2.13, "grad_norm": 0.690662145614624, "learning_rate": 0.00011532177868393248, "loss": 2.7882, "step": 43529 }, { "epoch": 2.13, "grad_norm": 0.6789386868476868, "learning_rate": 0.00011530964575280944, "loss": 3.1341, "step": 43530 }, { "epoch": 2.13, "grad_norm": 0.6649258732795715, "learning_rate": 0.00011529751330812516, "loss": 2.8537, "step": 43531 }, { "epoch": 2.13, "grad_norm": 0.7068399786949158, "learning_rate": 0.00011528538134991167, "loss": 2.5939, "step": 43532 }, { "epoch": 2.13, "grad_norm": 0.7076483964920044, "learning_rate": 0.00011527324987820104, "loss": 3.0746, "step": 43533 }, { "epoch": 2.13, "grad_norm": 0.6737373471260071, "learning_rate": 0.00011526111889302511, "loss": 3.0245, "step": 43534 }, { "epoch": 2.13, "grad_norm": 0.7310186624526978, "learning_rate": 0.00011524898839441589, "loss": 2.9546, "step": 43535 }, { "epoch": 2.13, "grad_norm": 0.6874125003814697, "learning_rate": 0.00011523685838240512, "loss": 2.9512, "step": 43536 }, { "epoch": 2.13, "grad_norm": 0.7353755831718445, "learning_rate": 0.00011522472885702491, "loss": 2.8711, "step": 43537 }, { "epoch": 2.13, "grad_norm": 0.6856021285057068, "learning_rate": 0.00011521259981830732, "loss": 2.8758, "step": 43538 }, { "epoch": 2.13, "grad_norm": 0.7210877537727356, "learning_rate": 0.00011520047126628409, "loss": 3.0204, "step": 43539 }, { "epoch": 2.13, "grad_norm": 0.6792193055152893, "learning_rate": 0.00011518834320098735, "loss": 2.9862, "step": 43540 }, { "epoch": 2.13, "grad_norm": 0.67461097240448, "learning_rate": 0.00011517621562244893, "loss": 2.9252, "step": 43541 }, { "epoch": 2.13, "grad_norm": 0.6909298896789551, "learning_rate": 0.0001151640885307007, "loss": 2.9947, "step": 43542 }, { "epoch": 2.13, "grad_norm": 0.695124626159668, "learning_rate": 0.0001151519619257748, "loss": 2.9083, "step": 43543 }, { "epoch": 2.13, "grad_norm": 0.6613640189170837, "learning_rate": 0.00011513983580770294, "loss": 2.9106, "step": 43544 }, { "epoch": 2.13, "grad_norm": 0.6796993017196655, "learning_rate": 0.00011512771017651727, "loss": 2.9502, "step": 43545 }, { "epoch": 2.13, "grad_norm": 0.6530843377113342, "learning_rate": 0.00011511558503224954, "loss": 2.8476, "step": 43546 }, { "epoch": 2.13, "grad_norm": 0.7151798605918884, "learning_rate": 0.0001151034603749319, "loss": 2.8159, "step": 43547 }, { "epoch": 2.13, "grad_norm": 0.7040915489196777, "learning_rate": 0.00011509133620459615, "loss": 3.0598, "step": 43548 }, { "epoch": 2.13, "grad_norm": 0.7070721983909607, "learning_rate": 0.00011507921252127411, "loss": 2.9802, "step": 43549 }, { "epoch": 2.13, "grad_norm": 0.6628305315971375, "learning_rate": 0.00011506708932499795, "loss": 3.0236, "step": 43550 }, { "epoch": 2.13, "grad_norm": 0.6968042850494385, "learning_rate": 0.00011505496661579937, "loss": 2.8769, "step": 43551 }, { "epoch": 2.13, "grad_norm": 0.7575791478157043, "learning_rate": 0.00011504284439371044, "loss": 2.7383, "step": 43552 }, { "epoch": 2.13, "grad_norm": 0.6684389114379883, "learning_rate": 0.00011503072265876314, "loss": 3.0318, "step": 43553 }, { "epoch": 2.13, "grad_norm": 0.67277991771698, "learning_rate": 0.00011501860141098924, "loss": 2.9531, "step": 43554 }, { "epoch": 2.13, "grad_norm": 0.6650310754776001, "learning_rate": 0.00011500648065042082, "loss": 3.0134, "step": 43555 }, { "epoch": 2.13, "grad_norm": 0.7093574404716492, "learning_rate": 0.00011499436037708975, "loss": 2.7774, "step": 43556 }, { "epoch": 2.13, "grad_norm": 0.701271653175354, "learning_rate": 0.00011498224059102793, "loss": 2.8878, "step": 43557 }, { "epoch": 2.13, "grad_norm": 0.6936562657356262, "learning_rate": 0.00011497012129226714, "loss": 2.7935, "step": 43558 }, { "epoch": 2.13, "grad_norm": 0.6467651724815369, "learning_rate": 0.00011495800248083947, "loss": 2.8307, "step": 43559 }, { "epoch": 2.13, "grad_norm": 0.6694419384002686, "learning_rate": 0.00011494588415677694, "loss": 2.8483, "step": 43560 }, { "epoch": 2.13, "grad_norm": 0.7048700451850891, "learning_rate": 0.00011493376632011121, "loss": 2.719, "step": 43561 }, { "epoch": 2.13, "grad_norm": 0.6826921701431274, "learning_rate": 0.0001149216489708744, "loss": 2.984, "step": 43562 }, { "epoch": 2.13, "grad_norm": 0.7334887385368347, "learning_rate": 0.00011490953210909837, "loss": 3.0855, "step": 43563 }, { "epoch": 2.13, "grad_norm": 0.7349691987037659, "learning_rate": 0.00011489741573481491, "loss": 2.7208, "step": 43564 }, { "epoch": 2.14, "grad_norm": 0.7117055058479309, "learning_rate": 0.00011488529984805614, "loss": 2.8431, "step": 43565 }, { "epoch": 2.14, "grad_norm": 0.7462189197540283, "learning_rate": 0.00011487318444885377, "loss": 3.0923, "step": 43566 }, { "epoch": 2.14, "grad_norm": 0.7095943689346313, "learning_rate": 0.00011486106953723987, "loss": 3.0918, "step": 43567 }, { "epoch": 2.14, "grad_norm": 0.7356390953063965, "learning_rate": 0.0001148489551132462, "loss": 2.9536, "step": 43568 }, { "epoch": 2.14, "grad_norm": 0.6879743337631226, "learning_rate": 0.00011483684117690487, "loss": 2.749, "step": 43569 }, { "epoch": 2.14, "grad_norm": 0.6889345645904541, "learning_rate": 0.00011482472772824765, "loss": 2.9549, "step": 43570 }, { "epoch": 2.14, "grad_norm": 0.7350010871887207, "learning_rate": 0.00011481261476730634, "loss": 2.9164, "step": 43571 }, { "epoch": 2.14, "grad_norm": 0.7565581202507019, "learning_rate": 0.00011480050229411307, "loss": 2.873, "step": 43572 }, { "epoch": 2.14, "grad_norm": 0.7308693528175354, "learning_rate": 0.00011478839030869956, "loss": 2.8584, "step": 43573 }, { "epoch": 2.14, "grad_norm": 0.7340457439422607, "learning_rate": 0.00011477627881109776, "loss": 3.0429, "step": 43574 }, { "epoch": 2.14, "grad_norm": 0.7506102323532104, "learning_rate": 0.00011476416780133972, "loss": 3.0489, "step": 43575 }, { "epoch": 2.14, "grad_norm": 0.7139230966567993, "learning_rate": 0.0001147520572794571, "loss": 2.948, "step": 43576 }, { "epoch": 2.14, "grad_norm": 0.7269354462623596, "learning_rate": 0.00011473994724548199, "loss": 2.8071, "step": 43577 }, { "epoch": 2.14, "grad_norm": 0.7375877499580383, "learning_rate": 0.00011472783769944623, "loss": 2.8066, "step": 43578 }, { "epoch": 2.14, "grad_norm": 0.701732337474823, "learning_rate": 0.00011471572864138158, "loss": 2.7454, "step": 43579 }, { "epoch": 2.14, "grad_norm": 0.6951040029525757, "learning_rate": 0.00011470362007132013, "loss": 3.1121, "step": 43580 }, { "epoch": 2.14, "grad_norm": 0.745621919631958, "learning_rate": 0.00011469151198929362, "loss": 2.6036, "step": 43581 }, { "epoch": 2.14, "grad_norm": 0.7371377944946289, "learning_rate": 0.00011467940439533406, "loss": 2.7472, "step": 43582 }, { "epoch": 2.14, "grad_norm": 0.7030379176139832, "learning_rate": 0.00011466729728947321, "loss": 2.8501, "step": 43583 }, { "epoch": 2.14, "grad_norm": 0.6986172795295715, "learning_rate": 0.00011465519067174312, "loss": 2.8457, "step": 43584 }, { "epoch": 2.14, "grad_norm": 0.673969566822052, "learning_rate": 0.0001146430845421756, "loss": 2.8234, "step": 43585 }, { "epoch": 2.14, "grad_norm": 0.6397623419761658, "learning_rate": 0.00011463097890080241, "loss": 3.3042, "step": 43586 }, { "epoch": 2.14, "grad_norm": 0.7095158696174622, "learning_rate": 0.00011461887374765564, "loss": 2.8524, "step": 43587 }, { "epoch": 2.14, "grad_norm": 0.6966813206672668, "learning_rate": 0.000114606769082767, "loss": 3.1101, "step": 43588 }, { "epoch": 2.14, "grad_norm": 0.7600648999214172, "learning_rate": 0.00011459466490616845, "loss": 2.838, "step": 43589 }, { "epoch": 2.14, "grad_norm": 0.7137221693992615, "learning_rate": 0.00011458256121789194, "loss": 2.8731, "step": 43590 }, { "epoch": 2.14, "grad_norm": 0.6784641742706299, "learning_rate": 0.00011457045801796932, "loss": 3.0268, "step": 43591 }, { "epoch": 2.14, "grad_norm": 0.7136608362197876, "learning_rate": 0.0001145583553064324, "loss": 2.9876, "step": 43592 }, { "epoch": 2.14, "grad_norm": 0.7022787928581238, "learning_rate": 0.00011454625308331298, "loss": 2.7072, "step": 43593 }, { "epoch": 2.14, "grad_norm": 0.6970979571342468, "learning_rate": 0.00011453415134864305, "loss": 3.2014, "step": 43594 }, { "epoch": 2.14, "grad_norm": 0.6926916241645813, "learning_rate": 0.00011452205010245456, "loss": 2.9012, "step": 43595 }, { "epoch": 2.14, "grad_norm": 0.6950446367263794, "learning_rate": 0.00011450994934477922, "loss": 3.2737, "step": 43596 }, { "epoch": 2.14, "grad_norm": 0.6955012083053589, "learning_rate": 0.00011449784907564908, "loss": 2.9293, "step": 43597 }, { "epoch": 2.14, "grad_norm": 0.6871076226234436, "learning_rate": 0.00011448574929509578, "loss": 3.1272, "step": 43598 }, { "epoch": 2.14, "grad_norm": 0.7409434914588928, "learning_rate": 0.0001144736500031514, "loss": 2.9277, "step": 43599 }, { "epoch": 2.14, "grad_norm": 0.7080227732658386, "learning_rate": 0.00011446155119984774, "loss": 3.0437, "step": 43600 }, { "epoch": 2.14, "grad_norm": 0.7121559977531433, "learning_rate": 0.00011444945288521653, "loss": 2.964, "step": 43601 }, { "epoch": 2.14, "grad_norm": 0.7132676839828491, "learning_rate": 0.00011443735505928989, "loss": 2.9648, "step": 43602 }, { "epoch": 2.14, "grad_norm": 0.7137494683265686, "learning_rate": 0.0001144252577220994, "loss": 2.8898, "step": 43603 }, { "epoch": 2.14, "grad_norm": 0.7248656153678894, "learning_rate": 0.00011441316087367711, "loss": 2.808, "step": 43604 }, { "epoch": 2.14, "grad_norm": 0.6689263582229614, "learning_rate": 0.00011440106451405492, "loss": 2.6834, "step": 43605 }, { "epoch": 2.14, "grad_norm": 0.6864780783653259, "learning_rate": 0.0001143889686432646, "loss": 2.8517, "step": 43606 }, { "epoch": 2.14, "grad_norm": 0.7175418734550476, "learning_rate": 0.000114376873261338, "loss": 3.1024, "step": 43607 }, { "epoch": 2.14, "grad_norm": 0.723504900932312, "learning_rate": 0.00011436477836830692, "loss": 2.971, "step": 43608 }, { "epoch": 2.14, "grad_norm": 0.7824511528015137, "learning_rate": 0.00011435268396420327, "loss": 2.944, "step": 43609 }, { "epoch": 2.14, "grad_norm": 0.7112360596656799, "learning_rate": 0.00011434059004905903, "loss": 2.7707, "step": 43610 }, { "epoch": 2.14, "grad_norm": 0.7221190929412842, "learning_rate": 0.00011432849662290584, "loss": 3.0907, "step": 43611 }, { "epoch": 2.14, "grad_norm": 0.6884748339653015, "learning_rate": 0.00011431640368577577, "loss": 2.9186, "step": 43612 }, { "epoch": 2.14, "grad_norm": 0.7097779512405396, "learning_rate": 0.00011430431123770052, "loss": 3.1311, "step": 43613 }, { "epoch": 2.14, "grad_norm": 0.6831579804420471, "learning_rate": 0.00011429221927871192, "loss": 2.7814, "step": 43614 }, { "epoch": 2.14, "grad_norm": 0.7085390686988831, "learning_rate": 0.00011428012780884197, "loss": 2.8956, "step": 43615 }, { "epoch": 2.14, "grad_norm": 0.7171388268470764, "learning_rate": 0.0001142680368281223, "loss": 3.1167, "step": 43616 }, { "epoch": 2.14, "grad_norm": 0.679538905620575, "learning_rate": 0.00011425594633658499, "loss": 3.0024, "step": 43617 }, { "epoch": 2.14, "grad_norm": 0.7115911245346069, "learning_rate": 0.00011424385633426165, "loss": 2.8164, "step": 43618 }, { "epoch": 2.14, "grad_norm": 0.7108883857727051, "learning_rate": 0.00011423176682118425, "loss": 2.9752, "step": 43619 }, { "epoch": 2.14, "grad_norm": 0.6663087010383606, "learning_rate": 0.00011421967779738475, "loss": 2.8533, "step": 43620 }, { "epoch": 2.14, "grad_norm": 0.6840021014213562, "learning_rate": 0.00011420758926289487, "loss": 3.0553, "step": 43621 }, { "epoch": 2.14, "grad_norm": 0.7471802234649658, "learning_rate": 0.00011419550121774641, "loss": 2.944, "step": 43622 }, { "epoch": 2.14, "grad_norm": 0.6915409564971924, "learning_rate": 0.00011418341366197113, "loss": 2.7266, "step": 43623 }, { "epoch": 2.14, "grad_norm": 0.6648789048194885, "learning_rate": 0.00011417132659560102, "loss": 2.988, "step": 43624 }, { "epoch": 2.14, "grad_norm": 0.7293175458908081, "learning_rate": 0.00011415924001866795, "loss": 2.966, "step": 43625 }, { "epoch": 2.14, "grad_norm": 0.6726661324501038, "learning_rate": 0.00011414715393120358, "loss": 3.2105, "step": 43626 }, { "epoch": 2.14, "grad_norm": 0.6855407357215881, "learning_rate": 0.00011413506833323991, "loss": 2.7334, "step": 43627 }, { "epoch": 2.14, "grad_norm": 0.7026200890541077, "learning_rate": 0.00011412298322480873, "loss": 2.9252, "step": 43628 }, { "epoch": 2.14, "grad_norm": 0.7490710020065308, "learning_rate": 0.00011411089860594174, "loss": 2.815, "step": 43629 }, { "epoch": 2.14, "grad_norm": 0.6966449022293091, "learning_rate": 0.00011409881447667098, "loss": 2.8681, "step": 43630 }, { "epoch": 2.14, "grad_norm": 0.7190315127372742, "learning_rate": 0.00011408673083702801, "loss": 3.0971, "step": 43631 }, { "epoch": 2.14, "grad_norm": 0.6999360918998718, "learning_rate": 0.00011407464768704497, "loss": 3.0055, "step": 43632 }, { "epoch": 2.14, "grad_norm": 0.7241888046264648, "learning_rate": 0.00011406256502675338, "loss": 2.8215, "step": 43633 }, { "epoch": 2.14, "grad_norm": 0.6731426119804382, "learning_rate": 0.00011405048285618535, "loss": 3.0833, "step": 43634 }, { "epoch": 2.14, "grad_norm": 0.6741295456886292, "learning_rate": 0.00011403840117537243, "loss": 2.9559, "step": 43635 }, { "epoch": 2.14, "grad_norm": 0.6998021602630615, "learning_rate": 0.00011402631998434671, "loss": 2.9883, "step": 43636 }, { "epoch": 2.14, "grad_norm": 0.7177157402038574, "learning_rate": 0.00011401423928313985, "loss": 2.9022, "step": 43637 }, { "epoch": 2.14, "grad_norm": 0.764556348323822, "learning_rate": 0.00011400215907178359, "loss": 3.133, "step": 43638 }, { "epoch": 2.14, "grad_norm": 0.7035207748413086, "learning_rate": 0.00011399007935030994, "loss": 3.065, "step": 43639 }, { "epoch": 2.14, "grad_norm": 0.6932657361030579, "learning_rate": 0.00011397800011875052, "loss": 2.9998, "step": 43640 }, { "epoch": 2.14, "grad_norm": 0.7348163723945618, "learning_rate": 0.00011396592137713725, "loss": 2.9956, "step": 43641 }, { "epoch": 2.14, "grad_norm": 0.6634372472763062, "learning_rate": 0.00011395384312550207, "loss": 2.6726, "step": 43642 }, { "epoch": 2.14, "grad_norm": 0.667530357837677, "learning_rate": 0.00011394176536387664, "loss": 2.8978, "step": 43643 }, { "epoch": 2.14, "grad_norm": 0.6972176432609558, "learning_rate": 0.00011392968809229277, "loss": 3.0545, "step": 43644 }, { "epoch": 2.14, "grad_norm": 0.7097292542457581, "learning_rate": 0.00011391761131078224, "loss": 2.9076, "step": 43645 }, { "epoch": 2.14, "grad_norm": 0.66300368309021, "learning_rate": 0.00011390553501937687, "loss": 2.765, "step": 43646 }, { "epoch": 2.14, "grad_norm": 0.6970599889755249, "learning_rate": 0.00011389345921810862, "loss": 2.7709, "step": 43647 }, { "epoch": 2.14, "grad_norm": 0.6801822781562805, "learning_rate": 0.00011388138390700906, "loss": 2.7156, "step": 43648 }, { "epoch": 2.14, "grad_norm": 0.6642677187919617, "learning_rate": 0.00011386930908611024, "loss": 2.9296, "step": 43649 }, { "epoch": 2.14, "grad_norm": 0.726485550403595, "learning_rate": 0.00011385723475544383, "loss": 2.9243, "step": 43650 }, { "epoch": 2.14, "grad_norm": 0.6999016404151917, "learning_rate": 0.00011384516091504156, "loss": 2.9357, "step": 43651 }, { "epoch": 2.14, "grad_norm": 0.6688977479934692, "learning_rate": 0.0001138330875649354, "loss": 2.9612, "step": 43652 }, { "epoch": 2.14, "grad_norm": 0.6783860921859741, "learning_rate": 0.00011382101470515694, "loss": 2.9645, "step": 43653 }, { "epoch": 2.14, "grad_norm": 0.6920573711395264, "learning_rate": 0.0001138089423357382, "loss": 2.8786, "step": 43654 }, { "epoch": 2.14, "grad_norm": 0.6857960820198059, "learning_rate": 0.0001137968704567108, "loss": 2.9031, "step": 43655 }, { "epoch": 2.14, "grad_norm": 0.7088309526443481, "learning_rate": 0.00011378479906810658, "loss": 2.9249, "step": 43656 }, { "epoch": 2.14, "grad_norm": 0.6725496649742126, "learning_rate": 0.00011377272816995746, "loss": 2.9255, "step": 43657 }, { "epoch": 2.14, "grad_norm": 0.7024503350257874, "learning_rate": 0.00011376065776229515, "loss": 3.0291, "step": 43658 }, { "epoch": 2.14, "grad_norm": 0.7618658542633057, "learning_rate": 0.00011374858784515143, "loss": 2.8862, "step": 43659 }, { "epoch": 2.14, "grad_norm": 0.7083359956741333, "learning_rate": 0.00011373651841855797, "loss": 2.9298, "step": 43660 }, { "epoch": 2.14, "grad_norm": 0.7162266373634338, "learning_rate": 0.00011372444948254667, "loss": 3.0221, "step": 43661 }, { "epoch": 2.14, "grad_norm": 0.6709370613098145, "learning_rate": 0.00011371238103714944, "loss": 2.74, "step": 43662 }, { "epoch": 2.14, "grad_norm": 0.6915350556373596, "learning_rate": 0.00011370031308239783, "loss": 2.9314, "step": 43663 }, { "epoch": 2.14, "grad_norm": 0.7247147560119629, "learning_rate": 0.00011368824561832383, "loss": 3.0371, "step": 43664 }, { "epoch": 2.14, "grad_norm": 0.8002133965492249, "learning_rate": 0.00011367617864495916, "loss": 3.008, "step": 43665 }, { "epoch": 2.14, "grad_norm": 0.6851048469543457, "learning_rate": 0.00011366411216233548, "loss": 2.9939, "step": 43666 }, { "epoch": 2.14, "grad_norm": 0.7082476019859314, "learning_rate": 0.00011365204617048474, "loss": 3.0785, "step": 43667 }, { "epoch": 2.14, "grad_norm": 0.668368399143219, "learning_rate": 0.00011363998066943857, "loss": 2.8655, "step": 43668 }, { "epoch": 2.14, "grad_norm": 0.6958811283111572, "learning_rate": 0.00011362791565922892, "loss": 2.7918, "step": 43669 }, { "epoch": 2.14, "grad_norm": 0.7040616273880005, "learning_rate": 0.00011361585113988737, "loss": 3.1085, "step": 43670 }, { "epoch": 2.14, "grad_norm": 0.7405644655227661, "learning_rate": 0.00011360378711144588, "loss": 2.7158, "step": 43671 }, { "epoch": 2.14, "grad_norm": 0.66157066822052, "learning_rate": 0.00011359172357393618, "loss": 2.8066, "step": 43672 }, { "epoch": 2.14, "grad_norm": 0.8532086610794067, "learning_rate": 0.00011357966052738988, "loss": 2.8688, "step": 43673 }, { "epoch": 2.14, "grad_norm": 0.7060077786445618, "learning_rate": 0.00011356759797183896, "loss": 2.921, "step": 43674 }, { "epoch": 2.14, "grad_norm": 0.71628737449646, "learning_rate": 0.00011355553590731503, "loss": 3.0828, "step": 43675 }, { "epoch": 2.14, "grad_norm": 0.6801706552505493, "learning_rate": 0.00011354347433384993, "loss": 2.9732, "step": 43676 }, { "epoch": 2.14, "grad_norm": 0.6853070259094238, "learning_rate": 0.00011353141325147555, "loss": 2.8799, "step": 43677 }, { "epoch": 2.14, "grad_norm": 0.6793110966682434, "learning_rate": 0.00011351935266022343, "loss": 2.8989, "step": 43678 }, { "epoch": 2.14, "grad_norm": 0.6548277735710144, "learning_rate": 0.00011350729256012555, "loss": 2.7669, "step": 43679 }, { "epoch": 2.14, "grad_norm": 0.7266643047332764, "learning_rate": 0.00011349523295121356, "loss": 2.8731, "step": 43680 }, { "epoch": 2.14, "grad_norm": 0.7011723518371582, "learning_rate": 0.00011348317383351915, "loss": 3.0052, "step": 43681 }, { "epoch": 2.14, "grad_norm": 0.6877959370613098, "learning_rate": 0.00011347111520707424, "loss": 3.0605, "step": 43682 }, { "epoch": 2.14, "grad_norm": 0.7112438678741455, "learning_rate": 0.00011345905707191044, "loss": 3.0148, "step": 43683 }, { "epoch": 2.14, "grad_norm": 0.7566985487937927, "learning_rate": 0.00011344699942805966, "loss": 3.1465, "step": 43684 }, { "epoch": 2.14, "grad_norm": 0.7064225673675537, "learning_rate": 0.0001134349422755535, "loss": 3.0024, "step": 43685 }, { "epoch": 2.14, "grad_norm": 0.6750404238700867, "learning_rate": 0.00011342288561442386, "loss": 2.8038, "step": 43686 }, { "epoch": 2.14, "grad_norm": 0.6803438663482666, "learning_rate": 0.00011341082944470248, "loss": 2.9661, "step": 43687 }, { "epoch": 2.14, "grad_norm": 0.6766833066940308, "learning_rate": 0.00011339877376642095, "loss": 3.184, "step": 43688 }, { "epoch": 2.14, "grad_norm": 0.70738685131073, "learning_rate": 0.00011338671857961125, "loss": 2.949, "step": 43689 }, { "epoch": 2.14, "grad_norm": 0.6981825828552246, "learning_rate": 0.00011337466388430489, "loss": 2.8143, "step": 43690 }, { "epoch": 2.14, "grad_norm": 0.6753236651420593, "learning_rate": 0.00011336260968053374, "loss": 3.0315, "step": 43691 }, { "epoch": 2.14, "grad_norm": 0.7474436163902283, "learning_rate": 0.0001133505559683297, "loss": 2.7655, "step": 43692 }, { "epoch": 2.14, "grad_norm": 0.717900812625885, "learning_rate": 0.00011333850274772433, "loss": 2.8381, "step": 43693 }, { "epoch": 2.14, "grad_norm": 0.6985374093055725, "learning_rate": 0.00011332645001874947, "loss": 2.7779, "step": 43694 }, { "epoch": 2.14, "grad_norm": 0.6623815298080444, "learning_rate": 0.00011331439778143669, "loss": 2.7031, "step": 43695 }, { "epoch": 2.14, "grad_norm": 0.7060630321502686, "learning_rate": 0.00011330234603581782, "loss": 3.1725, "step": 43696 }, { "epoch": 2.14, "grad_norm": 0.6988420486450195, "learning_rate": 0.00011329029478192478, "loss": 2.9677, "step": 43697 }, { "epoch": 2.14, "grad_norm": 0.7007870078086853, "learning_rate": 0.00011327824401978907, "loss": 2.9562, "step": 43698 }, { "epoch": 2.14, "grad_norm": 0.6870179176330566, "learning_rate": 0.00011326619374944263, "loss": 3.0245, "step": 43699 }, { "epoch": 2.14, "grad_norm": 0.7430259585380554, "learning_rate": 0.00011325414397091699, "loss": 2.8442, "step": 43700 }, { "epoch": 2.14, "grad_norm": 0.7168817520141602, "learning_rate": 0.0001132420946842441, "loss": 2.5137, "step": 43701 }, { "epoch": 2.14, "grad_norm": 0.7056675553321838, "learning_rate": 0.00011323004588945558, "loss": 2.8598, "step": 43702 }, { "epoch": 2.14, "grad_norm": 0.6785736680030823, "learning_rate": 0.0001132179975865831, "loss": 2.6708, "step": 43703 }, { "epoch": 2.14, "grad_norm": 0.6528010368347168, "learning_rate": 0.00011320594977565854, "loss": 2.8608, "step": 43704 }, { "epoch": 2.14, "grad_norm": 0.7285547256469727, "learning_rate": 0.00011319390245671346, "loss": 2.785, "step": 43705 }, { "epoch": 2.14, "grad_norm": 0.6859037280082703, "learning_rate": 0.00011318185562977969, "loss": 2.9215, "step": 43706 }, { "epoch": 2.14, "grad_norm": 0.683616578578949, "learning_rate": 0.00011316980929488908, "loss": 2.921, "step": 43707 }, { "epoch": 2.14, "grad_norm": 0.7073354125022888, "learning_rate": 0.00011315776345207322, "loss": 3.038, "step": 43708 }, { "epoch": 2.14, "grad_norm": 0.6951118111610413, "learning_rate": 0.00011314571810136386, "loss": 3.0371, "step": 43709 }, { "epoch": 2.14, "grad_norm": 0.7080778479576111, "learning_rate": 0.00011313367324279263, "loss": 3.0615, "step": 43710 }, { "epoch": 2.14, "grad_norm": 0.712817907333374, "learning_rate": 0.0001131216288763914, "loss": 2.7927, "step": 43711 }, { "epoch": 2.14, "grad_norm": 0.7057470679283142, "learning_rate": 0.00011310958500219177, "loss": 3.0071, "step": 43712 }, { "epoch": 2.14, "grad_norm": 0.7209771275520325, "learning_rate": 0.00011309754162022551, "loss": 2.9617, "step": 43713 }, { "epoch": 2.14, "grad_norm": 0.6734073162078857, "learning_rate": 0.00011308549873052447, "loss": 2.7933, "step": 43714 }, { "epoch": 2.14, "grad_norm": 0.6648086905479431, "learning_rate": 0.00011307345633312024, "loss": 2.9428, "step": 43715 }, { "epoch": 2.14, "grad_norm": 0.6905327439308167, "learning_rate": 0.00011306141442804456, "loss": 2.8132, "step": 43716 }, { "epoch": 2.14, "grad_norm": 0.7069293856620789, "learning_rate": 0.00011304937301532901, "loss": 2.7406, "step": 43717 }, { "epoch": 2.14, "grad_norm": 0.6863256096839905, "learning_rate": 0.00011303733209500548, "loss": 2.9026, "step": 43718 }, { "epoch": 2.14, "grad_norm": 0.6748086810112, "learning_rate": 0.0001130252916671057, "loss": 2.9089, "step": 43719 }, { "epoch": 2.14, "grad_norm": 0.68429034948349, "learning_rate": 0.0001130132517316612, "loss": 2.9713, "step": 43720 }, { "epoch": 2.14, "grad_norm": 0.6415186524391174, "learning_rate": 0.00011300121228870396, "loss": 2.6823, "step": 43721 }, { "epoch": 2.14, "grad_norm": 0.6968873143196106, "learning_rate": 0.0001129891733382654, "loss": 2.8784, "step": 43722 }, { "epoch": 2.14, "grad_norm": 0.7179439067840576, "learning_rate": 0.00011297713488037748, "loss": 2.8995, "step": 43723 }, { "epoch": 2.14, "grad_norm": 0.7167483568191528, "learning_rate": 0.0001129650969150718, "loss": 2.8282, "step": 43724 }, { "epoch": 2.14, "grad_norm": 0.7356143593788147, "learning_rate": 0.00011295305944237995, "loss": 2.6649, "step": 43725 }, { "epoch": 2.14, "grad_norm": 0.6669628024101257, "learning_rate": 0.00011294102246233385, "loss": 2.8961, "step": 43726 }, { "epoch": 2.14, "grad_norm": 0.7157459855079651, "learning_rate": 0.000112928985974965, "loss": 2.9598, "step": 43727 }, { "epoch": 2.14, "grad_norm": 0.6857950091362, "learning_rate": 0.00011291694998030517, "loss": 2.9189, "step": 43728 }, { "epoch": 2.14, "grad_norm": 0.7263401746749878, "learning_rate": 0.00011290491447838622, "loss": 2.8394, "step": 43729 }, { "epoch": 2.14, "grad_norm": 0.7011303901672363, "learning_rate": 0.00011289287946923973, "loss": 2.8701, "step": 43730 }, { "epoch": 2.14, "grad_norm": 0.7553688287734985, "learning_rate": 0.00011288084495289737, "loss": 2.8729, "step": 43731 }, { "epoch": 2.14, "grad_norm": 0.6972824931144714, "learning_rate": 0.00011286881092939074, "loss": 2.9316, "step": 43732 }, { "epoch": 2.14, "grad_norm": 0.7040778398513794, "learning_rate": 0.00011285677739875163, "loss": 2.7414, "step": 43733 }, { "epoch": 2.14, "grad_norm": 0.6931003332138062, "learning_rate": 0.00011284474436101188, "loss": 2.7205, "step": 43734 }, { "epoch": 2.14, "grad_norm": 0.6897353529930115, "learning_rate": 0.00011283271181620295, "loss": 2.8586, "step": 43735 }, { "epoch": 2.14, "grad_norm": 0.716933012008667, "learning_rate": 0.00011282067976435676, "loss": 3.0455, "step": 43736 }, { "epoch": 2.14, "grad_norm": 0.7096711993217468, "learning_rate": 0.00011280864820550474, "loss": 2.9453, "step": 43737 }, { "epoch": 2.14, "grad_norm": 0.7447996139526367, "learning_rate": 0.00011279661713967882, "loss": 2.9867, "step": 43738 }, { "epoch": 2.14, "grad_norm": 0.6898730993270874, "learning_rate": 0.0001127845865669106, "loss": 2.8042, "step": 43739 }, { "epoch": 2.14, "grad_norm": 0.6772977709770203, "learning_rate": 0.00011277255648723163, "loss": 2.9059, "step": 43740 }, { "epoch": 2.14, "grad_norm": 0.7092178463935852, "learning_rate": 0.00011276052690067382, "loss": 2.8711, "step": 43741 }, { "epoch": 2.14, "grad_norm": 0.6839644312858582, "learning_rate": 0.00011274849780726866, "loss": 2.8759, "step": 43742 }, { "epoch": 2.14, "grad_norm": 0.7505874037742615, "learning_rate": 0.00011273646920704792, "loss": 3.0373, "step": 43743 }, { "epoch": 2.14, "grad_norm": 0.7028711438179016, "learning_rate": 0.00011272444110004335, "loss": 2.7585, "step": 43744 }, { "epoch": 2.14, "grad_norm": 0.7121570706367493, "learning_rate": 0.00011271241348628659, "loss": 2.9666, "step": 43745 }, { "epoch": 2.14, "grad_norm": 0.7014894485473633, "learning_rate": 0.00011270038636580928, "loss": 2.9703, "step": 43746 }, { "epoch": 2.14, "grad_norm": 0.7833147644996643, "learning_rate": 0.000112688359738643, "loss": 2.7485, "step": 43747 }, { "epoch": 2.14, "grad_norm": 0.6829766631126404, "learning_rate": 0.00011267633360481954, "loss": 3.0008, "step": 43748 }, { "epoch": 2.14, "grad_norm": 0.7053651809692383, "learning_rate": 0.00011266430796437067, "loss": 3.0635, "step": 43749 }, { "epoch": 2.14, "grad_norm": 0.7544901371002197, "learning_rate": 0.00011265228281732784, "loss": 2.9556, "step": 43750 }, { "epoch": 2.14, "grad_norm": 0.6905205845832825, "learning_rate": 0.00011264025816372298, "loss": 3.0138, "step": 43751 }, { "epoch": 2.14, "grad_norm": 0.7467920780181885, "learning_rate": 0.00011262823400358756, "loss": 2.7328, "step": 43752 }, { "epoch": 2.14, "grad_norm": 0.6714316606521606, "learning_rate": 0.00011261621033695327, "loss": 2.9427, "step": 43753 }, { "epoch": 2.14, "grad_norm": 0.6609356999397278, "learning_rate": 0.00011260418716385189, "loss": 2.8976, "step": 43754 }, { "epoch": 2.14, "grad_norm": 0.728022038936615, "learning_rate": 0.00011259216448431493, "loss": 2.8679, "step": 43755 }, { "epoch": 2.14, "grad_norm": 0.7265163660049438, "learning_rate": 0.00011258014229837424, "loss": 2.9928, "step": 43756 }, { "epoch": 2.14, "grad_norm": 0.6698508262634277, "learning_rate": 0.00011256812060606127, "loss": 2.8256, "step": 43757 }, { "epoch": 2.14, "grad_norm": 0.7141711115837097, "learning_rate": 0.0001125560994074078, "loss": 2.6912, "step": 43758 }, { "epoch": 2.14, "grad_norm": 0.705557644367218, "learning_rate": 0.0001125440787024456, "loss": 2.8845, "step": 43759 }, { "epoch": 2.14, "grad_norm": 0.7180753946304321, "learning_rate": 0.00011253205849120621, "loss": 2.883, "step": 43760 }, { "epoch": 2.14, "grad_norm": 0.6521044969558716, "learning_rate": 0.00011252003877372132, "loss": 2.9457, "step": 43761 }, { "epoch": 2.14, "grad_norm": 0.7582170367240906, "learning_rate": 0.00011250801955002245, "loss": 3.0808, "step": 43762 }, { "epoch": 2.14, "grad_norm": 0.6841397881507874, "learning_rate": 0.00011249600082014135, "loss": 2.8299, "step": 43763 }, { "epoch": 2.14, "grad_norm": 0.6913440227508545, "learning_rate": 0.00011248398258410986, "loss": 2.8295, "step": 43764 }, { "epoch": 2.14, "grad_norm": 0.6824356317520142, "learning_rate": 0.00011247196484195933, "loss": 2.8794, "step": 43765 }, { "epoch": 2.14, "grad_norm": 0.7842134237289429, "learning_rate": 0.00011245994759372164, "loss": 2.8076, "step": 43766 }, { "epoch": 2.14, "grad_norm": 0.7321650981903076, "learning_rate": 0.00011244793083942838, "loss": 3.0722, "step": 43767 }, { "epoch": 2.14, "grad_norm": 0.7260737419128418, "learning_rate": 0.00011243591457911107, "loss": 2.994, "step": 43768 }, { "epoch": 2.15, "grad_norm": 0.7154833674430847, "learning_rate": 0.00011242389881280158, "loss": 2.985, "step": 43769 }, { "epoch": 2.15, "grad_norm": 0.6974570155143738, "learning_rate": 0.00011241188354053131, "loss": 2.9196, "step": 43770 }, { "epoch": 2.15, "grad_norm": 0.7746239900588989, "learning_rate": 0.00011239986876233214, "loss": 2.7981, "step": 43771 }, { "epoch": 2.15, "grad_norm": 0.6849657297134399, "learning_rate": 0.00011238785447823552, "loss": 2.9164, "step": 43772 }, { "epoch": 2.15, "grad_norm": 0.7732822895050049, "learning_rate": 0.0001123758406882733, "loss": 2.8262, "step": 43773 }, { "epoch": 2.15, "grad_norm": 0.7015544176101685, "learning_rate": 0.000112363827392477, "loss": 2.9393, "step": 43774 }, { "epoch": 2.15, "grad_norm": 0.6607134342193604, "learning_rate": 0.00011235181459087816, "loss": 2.9479, "step": 43775 }, { "epoch": 2.15, "grad_norm": 0.7178341150283813, "learning_rate": 0.00011233980228350863, "loss": 2.9219, "step": 43776 }, { "epoch": 2.15, "grad_norm": 0.6694148778915405, "learning_rate": 0.00011232779047039983, "loss": 2.7359, "step": 43777 }, { "epoch": 2.15, "grad_norm": 0.7360499501228333, "learning_rate": 0.00011231577915158353, "loss": 2.9348, "step": 43778 }, { "epoch": 2.15, "grad_norm": 0.672639787197113, "learning_rate": 0.00011230376832709144, "loss": 2.846, "step": 43779 }, { "epoch": 2.15, "grad_norm": 0.7251139879226685, "learning_rate": 0.00011229175799695498, "loss": 3.0, "step": 43780 }, { "epoch": 2.15, "grad_norm": 0.6625097393989563, "learning_rate": 0.00011227974816120603, "loss": 2.9168, "step": 43781 }, { "epoch": 2.15, "grad_norm": 0.6882855296134949, "learning_rate": 0.0001122677388198761, "loss": 3.1654, "step": 43782 }, { "epoch": 2.15, "grad_norm": 0.6938197612762451, "learning_rate": 0.0001122557299729967, "loss": 2.8398, "step": 43783 }, { "epoch": 2.15, "grad_norm": 0.701835572719574, "learning_rate": 0.00011224372162059972, "loss": 3.03, "step": 43784 }, { "epoch": 2.15, "grad_norm": 0.6981818675994873, "learning_rate": 0.0001122317137627165, "loss": 2.9271, "step": 43785 }, { "epoch": 2.15, "grad_norm": 0.7233262658119202, "learning_rate": 0.00011221970639937891, "loss": 2.7305, "step": 43786 }, { "epoch": 2.15, "grad_norm": 0.7583865523338318, "learning_rate": 0.00011220769953061837, "loss": 2.9369, "step": 43787 }, { "epoch": 2.15, "grad_norm": 0.6952337622642517, "learning_rate": 0.00011219569315646672, "loss": 2.8671, "step": 43788 }, { "epoch": 2.15, "grad_norm": 0.6880810260772705, "learning_rate": 0.00011218368727695546, "loss": 2.9465, "step": 43789 }, { "epoch": 2.15, "grad_norm": 0.6928128600120544, "learning_rate": 0.00011217168189211614, "loss": 2.7894, "step": 43790 }, { "epoch": 2.15, "grad_norm": 0.6935843825340271, "learning_rate": 0.00011215967700198057, "loss": 2.9268, "step": 43791 }, { "epoch": 2.15, "grad_norm": 0.7136497497558594, "learning_rate": 0.00011214767260658011, "loss": 3.0224, "step": 43792 }, { "epoch": 2.15, "grad_norm": 0.7205958962440491, "learning_rate": 0.00011213566870594668, "loss": 2.8294, "step": 43793 }, { "epoch": 2.15, "grad_norm": 0.714300811290741, "learning_rate": 0.00011212366530011162, "loss": 3.0795, "step": 43794 }, { "epoch": 2.15, "grad_norm": 0.6874080300331116, "learning_rate": 0.00011211166238910677, "loss": 3.0615, "step": 43795 }, { "epoch": 2.15, "grad_norm": 0.7105938196182251, "learning_rate": 0.00011209965997296363, "loss": 2.9694, "step": 43796 }, { "epoch": 2.15, "grad_norm": 0.7178138494491577, "learning_rate": 0.00011208765805171373, "loss": 3.0575, "step": 43797 }, { "epoch": 2.15, "grad_norm": 0.7538326978683472, "learning_rate": 0.0001120756566253889, "loss": 3.0899, "step": 43798 }, { "epoch": 2.15, "grad_norm": 0.6940291523933411, "learning_rate": 0.00011206365569402051, "loss": 2.9107, "step": 43799 }, { "epoch": 2.15, "grad_norm": 0.754417896270752, "learning_rate": 0.00011205165525764026, "loss": 2.9723, "step": 43800 }, { "epoch": 2.15, "grad_norm": 0.7753744125366211, "learning_rate": 0.00011203965531627991, "loss": 2.9622, "step": 43801 }, { "epoch": 2.15, "grad_norm": 0.7098350524902344, "learning_rate": 0.00011202765586997082, "loss": 2.8377, "step": 43802 }, { "epoch": 2.15, "grad_norm": 0.7141963839530945, "learning_rate": 0.00011201565691874482, "loss": 2.887, "step": 43803 }, { "epoch": 2.15, "grad_norm": 0.6638633608818054, "learning_rate": 0.00011200365846263338, "loss": 2.9423, "step": 43804 }, { "epoch": 2.15, "grad_norm": 0.6971076726913452, "learning_rate": 0.00011199166050166801, "loss": 2.8963, "step": 43805 }, { "epoch": 2.15, "grad_norm": 0.6292498111724854, "learning_rate": 0.00011197966303588055, "loss": 3.1008, "step": 43806 }, { "epoch": 2.15, "grad_norm": 0.696465790271759, "learning_rate": 0.00011196766606530238, "loss": 2.8679, "step": 43807 }, { "epoch": 2.15, "grad_norm": 0.6825982332229614, "learning_rate": 0.00011195566958996526, "loss": 3.0, "step": 43808 }, { "epoch": 2.15, "grad_norm": 0.7092751264572144, "learning_rate": 0.00011194367360990063, "loss": 3.0249, "step": 43809 }, { "epoch": 2.15, "grad_norm": 0.7213112115859985, "learning_rate": 0.0001119316781251403, "loss": 2.9623, "step": 43810 }, { "epoch": 2.15, "grad_norm": 0.6883493065834045, "learning_rate": 0.0001119196831357157, "loss": 3.0142, "step": 43811 }, { "epoch": 2.15, "grad_norm": 0.6896858215332031, "learning_rate": 0.00011190768864165837, "loss": 2.941, "step": 43812 }, { "epoch": 2.15, "grad_norm": 0.7189692258834839, "learning_rate": 0.00011189569464300007, "loss": 2.9459, "step": 43813 }, { "epoch": 2.15, "grad_norm": 0.67086261510849, "learning_rate": 0.00011188370113977226, "loss": 3.2535, "step": 43814 }, { "epoch": 2.15, "grad_norm": 0.6902649998664856, "learning_rate": 0.00011187170813200654, "loss": 3.2002, "step": 43815 }, { "epoch": 2.15, "grad_norm": 0.6927818655967712, "learning_rate": 0.00011185971561973463, "loss": 2.8208, "step": 43816 }, { "epoch": 2.15, "grad_norm": 0.728071391582489, "learning_rate": 0.00011184772360298804, "loss": 3.154, "step": 43817 }, { "epoch": 2.15, "grad_norm": 0.6964516043663025, "learning_rate": 0.00011183573208179832, "loss": 3.2181, "step": 43818 }, { "epoch": 2.15, "grad_norm": 0.7113494277000427, "learning_rate": 0.00011182374105619696, "loss": 2.8732, "step": 43819 }, { "epoch": 2.15, "grad_norm": 0.7100608944892883, "learning_rate": 0.00011181175052621568, "loss": 3.012, "step": 43820 }, { "epoch": 2.15, "grad_norm": 0.6832837462425232, "learning_rate": 0.00011179976049188609, "loss": 2.7346, "step": 43821 }, { "epoch": 2.15, "grad_norm": 0.7172902226448059, "learning_rate": 0.00011178777095323966, "loss": 3.0578, "step": 43822 }, { "epoch": 2.15, "grad_norm": 0.7123684883117676, "learning_rate": 0.00011177578191030807, "loss": 3.0289, "step": 43823 }, { "epoch": 2.15, "grad_norm": 0.7194732427597046, "learning_rate": 0.00011176379336312279, "loss": 2.8659, "step": 43824 }, { "epoch": 2.15, "grad_norm": 0.7032181620597839, "learning_rate": 0.00011175180531171553, "loss": 2.9368, "step": 43825 }, { "epoch": 2.15, "grad_norm": 0.7210255265235901, "learning_rate": 0.0001117398177561178, "loss": 2.8385, "step": 43826 }, { "epoch": 2.15, "grad_norm": 0.7119678258895874, "learning_rate": 0.00011172783069636106, "loss": 2.687, "step": 43827 }, { "epoch": 2.15, "grad_norm": 0.6808481812477112, "learning_rate": 0.00011171584413247706, "loss": 2.9508, "step": 43828 }, { "epoch": 2.15, "grad_norm": 0.6783161759376526, "learning_rate": 0.00011170385806449719, "loss": 3.0674, "step": 43829 }, { "epoch": 2.15, "grad_norm": 0.7140722274780273, "learning_rate": 0.00011169187249245313, "loss": 2.7876, "step": 43830 }, { "epoch": 2.15, "grad_norm": 0.6937413215637207, "learning_rate": 0.00011167988741637652, "loss": 3.2024, "step": 43831 }, { "epoch": 2.15, "grad_norm": 0.706088125705719, "learning_rate": 0.00011166790283629889, "loss": 2.8087, "step": 43832 }, { "epoch": 2.15, "grad_norm": 0.7085302472114563, "learning_rate": 0.0001116559187522517, "loss": 2.7154, "step": 43833 }, { "epoch": 2.15, "grad_norm": 0.6916400790214539, "learning_rate": 0.0001116439351642665, "loss": 2.9563, "step": 43834 }, { "epoch": 2.15, "grad_norm": 0.6995882987976074, "learning_rate": 0.00011163195207237493, "loss": 2.7844, "step": 43835 }, { "epoch": 2.15, "grad_norm": 0.7213637232780457, "learning_rate": 0.00011161996947660864, "loss": 3.0642, "step": 43836 }, { "epoch": 2.15, "grad_norm": 0.6776930689811707, "learning_rate": 0.000111607987376999, "loss": 3.0639, "step": 43837 }, { "epoch": 2.15, "grad_norm": 0.6928814649581909, "learning_rate": 0.00011159600577357778, "loss": 2.9717, "step": 43838 }, { "epoch": 2.15, "grad_norm": 0.6714714169502258, "learning_rate": 0.0001115840246663764, "loss": 2.9155, "step": 43839 }, { "epoch": 2.15, "grad_norm": 0.6734461784362793, "learning_rate": 0.00011157204405542631, "loss": 3.2625, "step": 43840 }, { "epoch": 2.15, "grad_norm": 0.7229616641998291, "learning_rate": 0.00011156006394075935, "loss": 2.9042, "step": 43841 }, { "epoch": 2.15, "grad_norm": 0.7451503276824951, "learning_rate": 0.00011154808432240677, "loss": 2.969, "step": 43842 }, { "epoch": 2.15, "grad_norm": 0.6939261555671692, "learning_rate": 0.00011153610520040039, "loss": 2.8367, "step": 43843 }, { "epoch": 2.15, "grad_norm": 0.7914009690284729, "learning_rate": 0.00011152412657477155, "loss": 2.8234, "step": 43844 }, { "epoch": 2.15, "grad_norm": 0.7722774744033813, "learning_rate": 0.00011151214844555186, "loss": 2.8591, "step": 43845 }, { "epoch": 2.15, "grad_norm": 1.1138639450073242, "learning_rate": 0.00011150017081277302, "loss": 3.0576, "step": 43846 }, { "epoch": 2.15, "grad_norm": 0.7328840494155884, "learning_rate": 0.00011148819367646643, "loss": 2.8833, "step": 43847 }, { "epoch": 2.15, "grad_norm": 0.6928589344024658, "learning_rate": 0.00011147621703666368, "loss": 2.7493, "step": 43848 }, { "epoch": 2.15, "grad_norm": 0.7033262848854065, "learning_rate": 0.00011146424089339616, "loss": 2.7993, "step": 43849 }, { "epoch": 2.15, "grad_norm": 0.7417920827865601, "learning_rate": 0.0001114522652466956, "loss": 3.0088, "step": 43850 }, { "epoch": 2.15, "grad_norm": 0.7107601761817932, "learning_rate": 0.00011144029009659352, "loss": 2.8515, "step": 43851 }, { "epoch": 2.15, "grad_norm": 0.6659914255142212, "learning_rate": 0.00011142831544312139, "loss": 2.9079, "step": 43852 }, { "epoch": 2.15, "grad_norm": 0.6738490462303162, "learning_rate": 0.00011141634128631082, "loss": 2.926, "step": 43853 }, { "epoch": 2.15, "grad_norm": 0.708010733127594, "learning_rate": 0.00011140436762619336, "loss": 2.9225, "step": 43854 }, { "epoch": 2.15, "grad_norm": 0.7281856536865234, "learning_rate": 0.00011139239446280039, "loss": 3.0498, "step": 43855 }, { "epoch": 2.15, "grad_norm": 0.7190080881118774, "learning_rate": 0.00011138042179616364, "loss": 2.6951, "step": 43856 }, { "epoch": 2.15, "grad_norm": 0.7010225057601929, "learning_rate": 0.00011136844962631449, "loss": 2.8787, "step": 43857 }, { "epoch": 2.15, "grad_norm": 0.7194762229919434, "learning_rate": 0.00011135647795328461, "loss": 2.806, "step": 43858 }, { "epoch": 2.15, "grad_norm": 0.7051863670349121, "learning_rate": 0.0001113445067771054, "loss": 2.9298, "step": 43859 }, { "epoch": 2.15, "grad_norm": 0.7123686671257019, "learning_rate": 0.00011133253609780841, "loss": 2.8497, "step": 43860 }, { "epoch": 2.15, "grad_norm": 0.7194857597351074, "learning_rate": 0.00011132056591542535, "loss": 2.9762, "step": 43861 }, { "epoch": 2.15, "grad_norm": 0.705207884311676, "learning_rate": 0.00011130859622998755, "loss": 2.7528, "step": 43862 }, { "epoch": 2.15, "grad_norm": 0.6921209692955017, "learning_rate": 0.00011129662704152663, "loss": 2.9349, "step": 43863 }, { "epoch": 2.15, "grad_norm": 0.6830700635910034, "learning_rate": 0.00011128465835007397, "loss": 2.8637, "step": 43864 }, { "epoch": 2.15, "grad_norm": 0.7572240829467773, "learning_rate": 0.00011127269015566133, "loss": 3.0668, "step": 43865 }, { "epoch": 2.15, "grad_norm": 0.6755383014678955, "learning_rate": 0.00011126072245831996, "loss": 2.814, "step": 43866 }, { "epoch": 2.15, "grad_norm": 0.7219648361206055, "learning_rate": 0.00011124875525808156, "loss": 2.7501, "step": 43867 }, { "epoch": 2.15, "grad_norm": 0.7269349098205566, "learning_rate": 0.00011123678855497768, "loss": 2.7667, "step": 43868 }, { "epoch": 2.15, "grad_norm": 0.7479811310768127, "learning_rate": 0.0001112248223490398, "loss": 2.907, "step": 43869 }, { "epoch": 2.15, "grad_norm": 0.7241057753562927, "learning_rate": 0.00011121285664029938, "loss": 3.0555, "step": 43870 }, { "epoch": 2.15, "grad_norm": 0.6962974071502686, "learning_rate": 0.00011120089142878789, "loss": 2.985, "step": 43871 }, { "epoch": 2.15, "grad_norm": 0.7270846366882324, "learning_rate": 0.0001111889267145369, "loss": 2.7828, "step": 43872 }, { "epoch": 2.15, "grad_norm": 0.7334160208702087, "learning_rate": 0.00011117696249757801, "loss": 2.902, "step": 43873 }, { "epoch": 2.15, "grad_norm": 0.7237550616264343, "learning_rate": 0.00011116499877794262, "loss": 3.0554, "step": 43874 }, { "epoch": 2.15, "grad_norm": 0.7188262343406677, "learning_rate": 0.00011115303555566233, "loss": 2.7096, "step": 43875 }, { "epoch": 2.15, "grad_norm": 0.7280434370040894, "learning_rate": 0.00011114107283076862, "loss": 2.7683, "step": 43876 }, { "epoch": 2.15, "grad_norm": 0.6606218218803406, "learning_rate": 0.0001111291106032929, "loss": 2.9093, "step": 43877 }, { "epoch": 2.15, "grad_norm": 0.7308887839317322, "learning_rate": 0.00011111714887326684, "loss": 2.7953, "step": 43878 }, { "epoch": 2.15, "grad_norm": 0.7169768214225769, "learning_rate": 0.00011110518764072174, "loss": 2.6644, "step": 43879 }, { "epoch": 2.15, "grad_norm": 0.7009234428405762, "learning_rate": 0.00011109322690568934, "loss": 2.9879, "step": 43880 }, { "epoch": 2.15, "grad_norm": 0.7171613574028015, "learning_rate": 0.00011108126666820094, "loss": 3.0013, "step": 43881 }, { "epoch": 2.15, "grad_norm": 0.7147977352142334, "learning_rate": 0.0001110693069282881, "loss": 2.7512, "step": 43882 }, { "epoch": 2.15, "grad_norm": 0.7143003344535828, "learning_rate": 0.00011105734768598244, "loss": 2.9142, "step": 43883 }, { "epoch": 2.15, "grad_norm": 0.6658389568328857, "learning_rate": 0.00011104538894131537, "loss": 2.8317, "step": 43884 }, { "epoch": 2.15, "grad_norm": 0.7165668606758118, "learning_rate": 0.00011103343069431838, "loss": 2.9532, "step": 43885 }, { "epoch": 2.15, "grad_norm": 0.7112611532211304, "learning_rate": 0.00011102147294502287, "loss": 3.2125, "step": 43886 }, { "epoch": 2.15, "grad_norm": 0.6863123178482056, "learning_rate": 0.00011100951569346045, "loss": 2.9475, "step": 43887 }, { "epoch": 2.15, "grad_norm": 0.7303819060325623, "learning_rate": 0.00011099755893966266, "loss": 2.982, "step": 43888 }, { "epoch": 2.15, "grad_norm": 0.7144728899002075, "learning_rate": 0.00011098560268366087, "loss": 2.8729, "step": 43889 }, { "epoch": 2.15, "grad_norm": 0.6913188099861145, "learning_rate": 0.00011097364692548669, "loss": 2.8669, "step": 43890 }, { "epoch": 2.15, "grad_norm": 0.7074322700500488, "learning_rate": 0.00011096169166517153, "loss": 3.1753, "step": 43891 }, { "epoch": 2.15, "grad_norm": 0.6860098242759705, "learning_rate": 0.00011094973690274684, "loss": 2.9712, "step": 43892 }, { "epoch": 2.15, "grad_norm": 0.6915674209594727, "learning_rate": 0.00011093778263824422, "loss": 2.9525, "step": 43893 }, { "epoch": 2.15, "grad_norm": 0.7016196846961975, "learning_rate": 0.00011092582887169501, "loss": 2.9581, "step": 43894 }, { "epoch": 2.15, "grad_norm": 0.6921491622924805, "learning_rate": 0.00011091387560313088, "loss": 2.9924, "step": 43895 }, { "epoch": 2.15, "grad_norm": 0.7028470635414124, "learning_rate": 0.00011090192283258312, "loss": 2.7785, "step": 43896 }, { "epoch": 2.15, "grad_norm": 0.6804261803627014, "learning_rate": 0.0001108899705600834, "loss": 2.8979, "step": 43897 }, { "epoch": 2.15, "grad_norm": 0.7549912929534912, "learning_rate": 0.00011087801878566308, "loss": 3.1259, "step": 43898 }, { "epoch": 2.15, "grad_norm": 0.6933797001838684, "learning_rate": 0.00011086606750935358, "loss": 2.9454, "step": 43899 }, { "epoch": 2.15, "grad_norm": 0.6788550615310669, "learning_rate": 0.00011085411673118654, "loss": 2.9116, "step": 43900 }, { "epoch": 2.15, "grad_norm": 0.7214063405990601, "learning_rate": 0.00011084216645119328, "loss": 2.8206, "step": 43901 }, { "epoch": 2.15, "grad_norm": 0.6881868839263916, "learning_rate": 0.00011083021666940533, "loss": 2.8979, "step": 43902 }, { "epoch": 2.15, "grad_norm": 0.7121565937995911, "learning_rate": 0.00011081826738585428, "loss": 2.8872, "step": 43903 }, { "epoch": 2.15, "grad_norm": 0.6824859380722046, "learning_rate": 0.00011080631860057144, "loss": 2.7616, "step": 43904 }, { "epoch": 2.15, "grad_norm": 0.7560449242591858, "learning_rate": 0.00011079437031358844, "loss": 3.0213, "step": 43905 }, { "epoch": 2.15, "grad_norm": 0.7062668204307556, "learning_rate": 0.00011078242252493663, "loss": 3.0686, "step": 43906 }, { "epoch": 2.15, "grad_norm": 0.6976759433746338, "learning_rate": 0.00011077047523464738, "loss": 2.8067, "step": 43907 }, { "epoch": 2.15, "grad_norm": 0.7655328512191772, "learning_rate": 0.00011075852844275242, "loss": 2.8811, "step": 43908 }, { "epoch": 2.15, "grad_norm": 0.6743826270103455, "learning_rate": 0.00011074658214928298, "loss": 2.7917, "step": 43909 }, { "epoch": 2.15, "grad_norm": 0.6853476762771606, "learning_rate": 0.00011073463635427068, "loss": 3.049, "step": 43910 }, { "epoch": 2.15, "grad_norm": 0.6936709880828857, "learning_rate": 0.00011072269105774686, "loss": 3.0572, "step": 43911 }, { "epoch": 2.15, "grad_norm": 0.7399135231971741, "learning_rate": 0.00011071074625974316, "loss": 2.7455, "step": 43912 }, { "epoch": 2.15, "grad_norm": 0.6544350385665894, "learning_rate": 0.00011069880196029089, "loss": 2.8913, "step": 43913 }, { "epoch": 2.15, "grad_norm": 0.7409467101097107, "learning_rate": 0.00011068685815942146, "loss": 2.8284, "step": 43914 }, { "epoch": 2.15, "grad_norm": 0.7109572291374207, "learning_rate": 0.00011067491485716651, "loss": 2.8065, "step": 43915 }, { "epoch": 2.15, "grad_norm": 0.6747780442237854, "learning_rate": 0.00011066297205355728, "loss": 2.9514, "step": 43916 }, { "epoch": 2.15, "grad_norm": 0.7135562896728516, "learning_rate": 0.00011065102974862537, "loss": 2.9687, "step": 43917 }, { "epoch": 2.15, "grad_norm": 0.6812368035316467, "learning_rate": 0.00011063908794240228, "loss": 2.9524, "step": 43918 }, { "epoch": 2.15, "grad_norm": 0.8200016617774963, "learning_rate": 0.00011062714663491941, "loss": 2.8971, "step": 43919 }, { "epoch": 2.15, "grad_norm": 0.6595801115036011, "learning_rate": 0.00011061520582620818, "loss": 3.0807, "step": 43920 }, { "epoch": 2.15, "grad_norm": 0.6876057982444763, "learning_rate": 0.00011060326551629993, "loss": 2.9015, "step": 43921 }, { "epoch": 2.15, "grad_norm": 0.6769374012947083, "learning_rate": 0.00011059132570522627, "loss": 2.9048, "step": 43922 }, { "epoch": 2.15, "grad_norm": 0.7245048880577087, "learning_rate": 0.00011057938639301868, "loss": 3.0612, "step": 43923 }, { "epoch": 2.15, "grad_norm": 0.6851451396942139, "learning_rate": 0.00011056744757970844, "loss": 2.8888, "step": 43924 }, { "epoch": 2.15, "grad_norm": 0.7699195742607117, "learning_rate": 0.00011055550926532718, "loss": 2.7047, "step": 43925 }, { "epoch": 2.15, "grad_norm": 0.6714342832565308, "learning_rate": 0.00011054357144990615, "loss": 3.0598, "step": 43926 }, { "epoch": 2.15, "grad_norm": 0.7293652892112732, "learning_rate": 0.00011053163413347701, "loss": 2.7219, "step": 43927 }, { "epoch": 2.15, "grad_norm": 0.7357663512229919, "learning_rate": 0.00011051969731607106, "loss": 3.0263, "step": 43928 }, { "epoch": 2.15, "grad_norm": 0.6910032033920288, "learning_rate": 0.00011050776099771966, "loss": 2.9287, "step": 43929 }, { "epoch": 2.15, "grad_norm": 0.6952933669090271, "learning_rate": 0.00011049582517845445, "loss": 2.846, "step": 43930 }, { "epoch": 2.15, "grad_norm": 0.742231547832489, "learning_rate": 0.00011048388985830668, "loss": 2.9032, "step": 43931 }, { "epoch": 2.15, "grad_norm": 0.7026122212409973, "learning_rate": 0.00011047195503730786, "loss": 2.7614, "step": 43932 }, { "epoch": 2.15, "grad_norm": 0.7984775304794312, "learning_rate": 0.00011046002071548954, "loss": 2.631, "step": 43933 }, { "epoch": 2.15, "grad_norm": 0.6924632787704468, "learning_rate": 0.00011044808689288306, "loss": 2.9778, "step": 43934 }, { "epoch": 2.15, "grad_norm": 0.7308608889579773, "learning_rate": 0.00011043615356951982, "loss": 2.9257, "step": 43935 }, { "epoch": 2.15, "grad_norm": 0.719944953918457, "learning_rate": 0.00011042422074543118, "loss": 2.8804, "step": 43936 }, { "epoch": 2.15, "grad_norm": 0.7189018130302429, "learning_rate": 0.00011041228842064864, "loss": 2.9876, "step": 43937 }, { "epoch": 2.15, "grad_norm": 0.7342965006828308, "learning_rate": 0.00011040035659520375, "loss": 3.0549, "step": 43938 }, { "epoch": 2.15, "grad_norm": 0.7213689684867859, "learning_rate": 0.00011038842526912776, "loss": 2.8142, "step": 43939 }, { "epoch": 2.15, "grad_norm": 0.7366634011268616, "learning_rate": 0.00011037649444245225, "loss": 2.757, "step": 43940 }, { "epoch": 2.15, "grad_norm": 0.7220673561096191, "learning_rate": 0.00011036456411520853, "loss": 2.5639, "step": 43941 }, { "epoch": 2.15, "grad_norm": 0.705657422542572, "learning_rate": 0.00011035263428742799, "loss": 2.9537, "step": 43942 }, { "epoch": 2.15, "grad_norm": 0.7454379200935364, "learning_rate": 0.00011034070495914219, "loss": 2.7032, "step": 43943 }, { "epoch": 2.15, "grad_norm": 0.7317204475402832, "learning_rate": 0.00011032877613038238, "loss": 2.9766, "step": 43944 }, { "epoch": 2.15, "grad_norm": 0.7271674871444702, "learning_rate": 0.00011031684780118016, "loss": 2.9546, "step": 43945 }, { "epoch": 2.15, "grad_norm": 0.6818316578865051, "learning_rate": 0.00011030491997156679, "loss": 2.8576, "step": 43946 }, { "epoch": 2.15, "grad_norm": 0.6842644810676575, "learning_rate": 0.00011029299264157382, "loss": 3.1527, "step": 43947 }, { "epoch": 2.15, "grad_norm": 0.6735297441482544, "learning_rate": 0.00011028106581123253, "loss": 3.0781, "step": 43948 }, { "epoch": 2.15, "grad_norm": 0.745730996131897, "learning_rate": 0.00011026913948057448, "loss": 2.9958, "step": 43949 }, { "epoch": 2.15, "grad_norm": 0.7029087543487549, "learning_rate": 0.000110257213649631, "loss": 2.845, "step": 43950 }, { "epoch": 2.15, "grad_norm": 0.6922839879989624, "learning_rate": 0.0001102452883184334, "loss": 2.9185, "step": 43951 }, { "epoch": 2.15, "grad_norm": 0.7484878301620483, "learning_rate": 0.00011023336348701332, "loss": 3.0122, "step": 43952 }, { "epoch": 2.15, "grad_norm": 0.7015834450721741, "learning_rate": 0.00011022143915540189, "loss": 2.9061, "step": 43953 }, { "epoch": 2.15, "grad_norm": 0.7120810151100159, "learning_rate": 0.0001102095153236307, "loss": 2.984, "step": 43954 }, { "epoch": 2.15, "grad_norm": 0.730353832244873, "learning_rate": 0.00011019759199173123, "loss": 2.9169, "step": 43955 }, { "epoch": 2.15, "grad_norm": 0.7234374284744263, "learning_rate": 0.00011018566915973477, "loss": 3.1308, "step": 43956 }, { "epoch": 2.15, "grad_norm": 0.6735356450080872, "learning_rate": 0.0001101737468276727, "loss": 2.9635, "step": 43957 }, { "epoch": 2.15, "grad_norm": 0.6654012799263, "learning_rate": 0.00011016182499557638, "loss": 2.8069, "step": 43958 }, { "epoch": 2.15, "grad_norm": 0.6770622134208679, "learning_rate": 0.00011014990366347725, "loss": 2.9357, "step": 43959 }, { "epoch": 2.15, "grad_norm": 0.7623130083084106, "learning_rate": 0.00011013798283140686, "loss": 3.0011, "step": 43960 }, { "epoch": 2.15, "grad_norm": 0.7139075398445129, "learning_rate": 0.00011012606249939635, "loss": 3.1044, "step": 43961 }, { "epoch": 2.15, "grad_norm": 0.7361212372779846, "learning_rate": 0.00011011414266747738, "loss": 2.9142, "step": 43962 }, { "epoch": 2.15, "grad_norm": 0.6977233290672302, "learning_rate": 0.00011010222333568108, "loss": 3.1171, "step": 43963 }, { "epoch": 2.15, "grad_norm": 0.6813346743583679, "learning_rate": 0.0001100903045040391, "loss": 3.0365, "step": 43964 }, { "epoch": 2.15, "grad_norm": 0.7580142021179199, "learning_rate": 0.00011007838617258272, "loss": 2.8071, "step": 43965 }, { "epoch": 2.15, "grad_norm": 0.6768600940704346, "learning_rate": 0.0001100664683413432, "loss": 2.8438, "step": 43966 }, { "epoch": 2.15, "grad_norm": 0.6694023013114929, "learning_rate": 0.00011005455101035216, "loss": 2.9374, "step": 43967 }, { "epoch": 2.15, "grad_norm": 0.6759893894195557, "learning_rate": 0.00011004263417964081, "loss": 2.9976, "step": 43968 }, { "epoch": 2.15, "grad_norm": 0.6973206996917725, "learning_rate": 0.00011003071784924054, "loss": 3.0073, "step": 43969 }, { "epoch": 2.15, "grad_norm": 0.7266998291015625, "learning_rate": 0.00011001880201918295, "loss": 2.9469, "step": 43970 }, { "epoch": 2.15, "grad_norm": 0.6822577118873596, "learning_rate": 0.00011000688668949927, "loss": 2.8839, "step": 43971 }, { "epoch": 2.15, "grad_norm": 0.7668481469154358, "learning_rate": 0.00010999497186022086, "loss": 3.0304, "step": 43972 }, { "epoch": 2.16, "grad_norm": 0.7106690406799316, "learning_rate": 0.00010998305753137901, "loss": 2.7812, "step": 43973 }, { "epoch": 2.16, "grad_norm": 0.6828577518463135, "learning_rate": 0.00010997114370300523, "loss": 2.889, "step": 43974 }, { "epoch": 2.16, "grad_norm": 0.6851549744606018, "learning_rate": 0.00010995923037513101, "loss": 2.9489, "step": 43975 }, { "epoch": 2.16, "grad_norm": 0.6917469501495361, "learning_rate": 0.00010994731754778749, "loss": 3.2144, "step": 43976 }, { "epoch": 2.16, "grad_norm": 0.6860583424568176, "learning_rate": 0.00010993540522100626, "loss": 2.9111, "step": 43977 }, { "epoch": 2.16, "grad_norm": 0.6750058531761169, "learning_rate": 0.0001099234933948186, "loss": 2.7888, "step": 43978 }, { "epoch": 2.16, "grad_norm": 0.6400216817855835, "learning_rate": 0.00010991158206925578, "loss": 3.1329, "step": 43979 }, { "epoch": 2.16, "grad_norm": 0.6475121378898621, "learning_rate": 0.00010989967124434934, "loss": 2.9708, "step": 43980 }, { "epoch": 2.16, "grad_norm": 0.6716324687004089, "learning_rate": 0.0001098877609201305, "loss": 2.8048, "step": 43981 }, { "epoch": 2.16, "grad_norm": 0.7722020745277405, "learning_rate": 0.00010987585109663082, "loss": 2.9072, "step": 43982 }, { "epoch": 2.16, "grad_norm": 0.750815212726593, "learning_rate": 0.00010986394177388144, "loss": 2.8756, "step": 43983 }, { "epoch": 2.16, "grad_norm": 0.725193977355957, "learning_rate": 0.00010985203295191385, "loss": 2.9399, "step": 43984 }, { "epoch": 2.16, "grad_norm": 0.7670903205871582, "learning_rate": 0.00010984012463075953, "loss": 2.6659, "step": 43985 }, { "epoch": 2.16, "grad_norm": 0.6852924823760986, "learning_rate": 0.0001098282168104497, "loss": 2.96, "step": 43986 }, { "epoch": 2.16, "grad_norm": 0.7253130078315735, "learning_rate": 0.00010981630949101578, "loss": 2.5994, "step": 43987 }, { "epoch": 2.16, "grad_norm": 0.7172092795372009, "learning_rate": 0.00010980440267248896, "loss": 3.1027, "step": 43988 }, { "epoch": 2.16, "grad_norm": 0.7548342347145081, "learning_rate": 0.00010979249635490075, "loss": 2.9589, "step": 43989 }, { "epoch": 2.16, "grad_norm": 0.7267433404922485, "learning_rate": 0.0001097805905382826, "loss": 2.8408, "step": 43990 }, { "epoch": 2.16, "grad_norm": 0.6913394927978516, "learning_rate": 0.00010976868522266569, "loss": 2.9435, "step": 43991 }, { "epoch": 2.16, "grad_norm": 0.7468941807746887, "learning_rate": 0.00010975678040808153, "loss": 2.8856, "step": 43992 }, { "epoch": 2.16, "grad_norm": 0.7527220249176025, "learning_rate": 0.00010974487609456138, "loss": 2.7722, "step": 43993 }, { "epoch": 2.16, "grad_norm": 0.8277668952941895, "learning_rate": 0.00010973297228213654, "loss": 2.9391, "step": 43994 }, { "epoch": 2.16, "grad_norm": 0.7322959899902344, "learning_rate": 0.00010972106897083851, "loss": 2.8405, "step": 43995 }, { "epoch": 2.16, "grad_norm": 0.6588741540908813, "learning_rate": 0.0001097091661606985, "loss": 2.8649, "step": 43996 }, { "epoch": 2.16, "grad_norm": 0.7032271027565002, "learning_rate": 0.00010969726385174799, "loss": 2.939, "step": 43997 }, { "epoch": 2.16, "grad_norm": 0.666032075881958, "learning_rate": 0.00010968536204401817, "loss": 3.0517, "step": 43998 }, { "epoch": 2.16, "grad_norm": 0.6766948699951172, "learning_rate": 0.00010967346073754061, "loss": 2.9951, "step": 43999 }, { "epoch": 2.16, "grad_norm": 0.6832386255264282, "learning_rate": 0.0001096615599323465, "loss": 2.7958, "step": 44000 }, { "epoch": 2.16, "grad_norm": 0.6713371276855469, "learning_rate": 0.00010964965962846708, "loss": 2.9533, "step": 44001 }, { "epoch": 2.16, "grad_norm": 0.7049140930175781, "learning_rate": 0.00010963775982593396, "loss": 3.0323, "step": 44002 }, { "epoch": 2.16, "grad_norm": 0.7148593068122864, "learning_rate": 0.00010962586052477825, "loss": 3.0128, "step": 44003 }, { "epoch": 2.16, "grad_norm": 0.7361739277839661, "learning_rate": 0.00010961396172503137, "loss": 2.8242, "step": 44004 }, { "epoch": 2.16, "grad_norm": 0.6860854029655457, "learning_rate": 0.00010960206342672477, "loss": 2.8884, "step": 44005 }, { "epoch": 2.16, "grad_norm": 0.8418459892272949, "learning_rate": 0.00010959016562988962, "loss": 3.0021, "step": 44006 }, { "epoch": 2.16, "grad_norm": 0.728326678276062, "learning_rate": 0.00010957826833455737, "loss": 2.8175, "step": 44007 }, { "epoch": 2.16, "grad_norm": 0.7612557411193848, "learning_rate": 0.00010956637154075938, "loss": 2.8144, "step": 44008 }, { "epoch": 2.16, "grad_norm": 0.7064254283905029, "learning_rate": 0.00010955447524852677, "loss": 2.8047, "step": 44009 }, { "epoch": 2.16, "grad_norm": 0.694352924823761, "learning_rate": 0.00010954257945789114, "loss": 3.0043, "step": 44010 }, { "epoch": 2.16, "grad_norm": 0.6741226315498352, "learning_rate": 0.00010953068416888359, "loss": 2.9749, "step": 44011 }, { "epoch": 2.16, "grad_norm": 0.6707633137702942, "learning_rate": 0.00010951878938153567, "loss": 3.0853, "step": 44012 }, { "epoch": 2.16, "grad_norm": 0.7317514419555664, "learning_rate": 0.00010950689509587851, "loss": 2.8133, "step": 44013 }, { "epoch": 2.16, "grad_norm": 0.6689852476119995, "learning_rate": 0.00010949500131194364, "loss": 2.7656, "step": 44014 }, { "epoch": 2.16, "grad_norm": 0.7170653939247131, "learning_rate": 0.00010948310802976224, "loss": 2.8662, "step": 44015 }, { "epoch": 2.16, "grad_norm": 0.6306023597717285, "learning_rate": 0.0001094712152493656, "loss": 2.8672, "step": 44016 }, { "epoch": 2.16, "grad_norm": 0.6976908445358276, "learning_rate": 0.00010945932297078516, "loss": 3.2326, "step": 44017 }, { "epoch": 2.16, "grad_norm": 0.6834633946418762, "learning_rate": 0.00010944743119405214, "loss": 2.9629, "step": 44018 }, { "epoch": 2.16, "grad_norm": 0.666874885559082, "learning_rate": 0.00010943553991919788, "loss": 2.8406, "step": 44019 }, { "epoch": 2.16, "grad_norm": 0.6879183650016785, "learning_rate": 0.00010942364914625387, "loss": 2.7367, "step": 44020 }, { "epoch": 2.16, "grad_norm": 0.6888954043388367, "learning_rate": 0.00010941175887525128, "loss": 3.0009, "step": 44021 }, { "epoch": 2.16, "grad_norm": 0.7139040231704712, "learning_rate": 0.00010939986910622141, "loss": 2.979, "step": 44022 }, { "epoch": 2.16, "grad_norm": 0.7036026120185852, "learning_rate": 0.00010938797983919553, "loss": 2.9986, "step": 44023 }, { "epoch": 2.16, "grad_norm": 0.7085335850715637, "learning_rate": 0.00010937609107420512, "loss": 2.8668, "step": 44024 }, { "epoch": 2.16, "grad_norm": 0.7103490233421326, "learning_rate": 0.0001093642028112813, "loss": 2.8923, "step": 44025 }, { "epoch": 2.16, "grad_norm": 0.6850371956825256, "learning_rate": 0.00010935231505045547, "loss": 2.8388, "step": 44026 }, { "epoch": 2.16, "grad_norm": 0.6872725486755371, "learning_rate": 0.00010934042779175905, "loss": 3.0184, "step": 44027 }, { "epoch": 2.16, "grad_norm": 0.6997257471084595, "learning_rate": 0.00010932854103522318, "loss": 3.055, "step": 44028 }, { "epoch": 2.16, "grad_norm": 0.6553372144699097, "learning_rate": 0.00010931665478087933, "loss": 2.7495, "step": 44029 }, { "epoch": 2.16, "grad_norm": 0.7009562253952026, "learning_rate": 0.0001093047690287587, "loss": 3.0277, "step": 44030 }, { "epoch": 2.16, "grad_norm": 0.7105560302734375, "learning_rate": 0.00010929288377889249, "loss": 3.132, "step": 44031 }, { "epoch": 2.16, "grad_norm": 0.6748614311218262, "learning_rate": 0.00010928099903131225, "loss": 3.0997, "step": 44032 }, { "epoch": 2.16, "grad_norm": 0.7473457455635071, "learning_rate": 0.00010926911478604903, "loss": 2.9778, "step": 44033 }, { "epoch": 2.16, "grad_norm": 0.6980936527252197, "learning_rate": 0.00010925723104313439, "loss": 3.0761, "step": 44034 }, { "epoch": 2.16, "grad_norm": 0.6893765330314636, "learning_rate": 0.00010924534780259937, "loss": 2.7932, "step": 44035 }, { "epoch": 2.16, "grad_norm": 0.6904683113098145, "learning_rate": 0.0001092334650644755, "loss": 2.8992, "step": 44036 }, { "epoch": 2.16, "grad_norm": 0.7253763675689697, "learning_rate": 0.00010922158282879398, "loss": 2.9312, "step": 44037 }, { "epoch": 2.16, "grad_norm": 0.6974086165428162, "learning_rate": 0.00010920970109558597, "loss": 2.9157, "step": 44038 }, { "epoch": 2.16, "grad_norm": 0.7278842329978943, "learning_rate": 0.00010919781986488302, "loss": 2.8189, "step": 44039 }, { "epoch": 2.16, "grad_norm": 0.73302161693573, "learning_rate": 0.00010918593913671615, "loss": 2.7879, "step": 44040 }, { "epoch": 2.16, "grad_norm": 0.7178779244422913, "learning_rate": 0.00010917405891111684, "loss": 2.8211, "step": 44041 }, { "epoch": 2.16, "grad_norm": 0.7002609372138977, "learning_rate": 0.00010916217918811642, "loss": 2.9447, "step": 44042 }, { "epoch": 2.16, "grad_norm": 0.7161980867385864, "learning_rate": 0.00010915029996774607, "loss": 2.8354, "step": 44043 }, { "epoch": 2.16, "grad_norm": 0.7740674614906311, "learning_rate": 0.0001091384212500371, "loss": 3.0059, "step": 44044 }, { "epoch": 2.16, "grad_norm": 0.7472088932991028, "learning_rate": 0.00010912654303502072, "loss": 2.5457, "step": 44045 }, { "epoch": 2.16, "grad_norm": 0.6775591373443604, "learning_rate": 0.00010911466532272827, "loss": 2.7964, "step": 44046 }, { "epoch": 2.16, "grad_norm": 0.6801954507827759, "learning_rate": 0.00010910278811319119, "loss": 2.9941, "step": 44047 }, { "epoch": 2.16, "grad_norm": 0.7444169521331787, "learning_rate": 0.00010909091140644049, "loss": 3.0355, "step": 44048 }, { "epoch": 2.16, "grad_norm": 0.7006654739379883, "learning_rate": 0.0001090790352025077, "loss": 2.8252, "step": 44049 }, { "epoch": 2.16, "grad_norm": 0.7231435775756836, "learning_rate": 0.00010906715950142389, "loss": 2.8966, "step": 44050 }, { "epoch": 2.16, "grad_norm": 0.6741910576820374, "learning_rate": 0.00010905528430322054, "loss": 2.9011, "step": 44051 }, { "epoch": 2.16, "grad_norm": 0.6673889756202698, "learning_rate": 0.0001090434096079288, "loss": 2.8311, "step": 44052 }, { "epoch": 2.16, "grad_norm": 0.7044496536254883, "learning_rate": 0.00010903153541557988, "loss": 2.8997, "step": 44053 }, { "epoch": 2.16, "grad_norm": 0.7047674655914307, "learning_rate": 0.00010901966172620527, "loss": 2.9994, "step": 44054 }, { "epoch": 2.16, "grad_norm": 0.7237603664398193, "learning_rate": 0.00010900778853983599, "loss": 2.6291, "step": 44055 }, { "epoch": 2.16, "grad_norm": 0.8270739316940308, "learning_rate": 0.00010899591585650345, "loss": 2.7419, "step": 44056 }, { "epoch": 2.16, "grad_norm": 0.7122185230255127, "learning_rate": 0.000108984043676239, "loss": 3.0659, "step": 44057 }, { "epoch": 2.16, "grad_norm": 0.6983858346939087, "learning_rate": 0.00010897217199907381, "loss": 3.0083, "step": 44058 }, { "epoch": 2.16, "grad_norm": 0.7358377575874329, "learning_rate": 0.00010896030082503915, "loss": 2.6745, "step": 44059 }, { "epoch": 2.16, "grad_norm": 0.7250675559043884, "learning_rate": 0.00010894843015416622, "loss": 2.9184, "step": 44060 }, { "epoch": 2.16, "grad_norm": 0.7548567056655884, "learning_rate": 0.00010893655998648634, "loss": 3.1155, "step": 44061 }, { "epoch": 2.16, "grad_norm": 0.7145891189575195, "learning_rate": 0.00010892469032203088, "loss": 2.9655, "step": 44062 }, { "epoch": 2.16, "grad_norm": 0.7194769382476807, "learning_rate": 0.00010891282116083093, "loss": 2.9013, "step": 44063 }, { "epoch": 2.16, "grad_norm": 0.6655535101890564, "learning_rate": 0.00010890095250291792, "loss": 3.0525, "step": 44064 }, { "epoch": 2.16, "grad_norm": 0.7517229318618774, "learning_rate": 0.00010888908434832294, "loss": 3.1682, "step": 44065 }, { "epoch": 2.16, "grad_norm": 0.6540136337280273, "learning_rate": 0.00010887721669707742, "loss": 2.9677, "step": 44066 }, { "epoch": 2.16, "grad_norm": 0.7223973274230957, "learning_rate": 0.00010886534954921253, "loss": 2.8192, "step": 44067 }, { "epoch": 2.16, "grad_norm": 0.7570539116859436, "learning_rate": 0.00010885348290475943, "loss": 3.0616, "step": 44068 }, { "epoch": 2.16, "grad_norm": 0.6754083037376404, "learning_rate": 0.00010884161676374957, "loss": 3.1191, "step": 44069 }, { "epoch": 2.16, "grad_norm": 0.6810846328735352, "learning_rate": 0.00010882975112621401, "loss": 3.0448, "step": 44070 }, { "epoch": 2.16, "grad_norm": 0.7092759609222412, "learning_rate": 0.00010881788599218406, "loss": 2.939, "step": 44071 }, { "epoch": 2.16, "grad_norm": 0.6637600064277649, "learning_rate": 0.00010880602136169116, "loss": 2.9407, "step": 44072 }, { "epoch": 2.16, "grad_norm": 0.7038546800613403, "learning_rate": 0.00010879415723476636, "loss": 3.0425, "step": 44073 }, { "epoch": 2.16, "grad_norm": 0.7373028993606567, "learning_rate": 0.00010878229361144098, "loss": 3.0257, "step": 44074 }, { "epoch": 2.16, "grad_norm": 0.6334795355796814, "learning_rate": 0.00010877043049174608, "loss": 2.9988, "step": 44075 }, { "epoch": 2.16, "grad_norm": 0.716715395450592, "learning_rate": 0.0001087585678757131, "loss": 2.8645, "step": 44076 }, { "epoch": 2.16, "grad_norm": 0.7330484986305237, "learning_rate": 0.00010874670576337335, "loss": 3.0347, "step": 44077 }, { "epoch": 2.16, "grad_norm": 0.6826630234718323, "learning_rate": 0.00010873484415475789, "loss": 3.0281, "step": 44078 }, { "epoch": 2.16, "grad_norm": 0.6680867671966553, "learning_rate": 0.00010872298304989811, "loss": 3.0422, "step": 44079 }, { "epoch": 2.16, "grad_norm": 0.6701593995094299, "learning_rate": 0.00010871112244882519, "loss": 2.7085, "step": 44080 }, { "epoch": 2.16, "grad_norm": 0.7055572271347046, "learning_rate": 0.00010869926235157028, "loss": 3.1402, "step": 44081 }, { "epoch": 2.16, "grad_norm": 0.6829777359962463, "learning_rate": 0.00010868740275816478, "loss": 2.7673, "step": 44082 }, { "epoch": 2.16, "grad_norm": 0.6810120344161987, "learning_rate": 0.00010867554366863972, "loss": 3.0536, "step": 44083 }, { "epoch": 2.16, "grad_norm": 0.6956943869590759, "learning_rate": 0.00010866368508302658, "loss": 2.9712, "step": 44084 }, { "epoch": 2.16, "grad_norm": 0.7180123329162598, "learning_rate": 0.00010865182700135638, "loss": 2.8563, "step": 44085 }, { "epoch": 2.16, "grad_norm": 0.6888797283172607, "learning_rate": 0.00010863996942366042, "loss": 3.0218, "step": 44086 }, { "epoch": 2.16, "grad_norm": 0.6845893263816833, "learning_rate": 0.00010862811234997007, "loss": 2.9983, "step": 44087 }, { "epoch": 2.16, "grad_norm": 0.7172102928161621, "learning_rate": 0.00010861625578031642, "loss": 2.9244, "step": 44088 }, { "epoch": 2.16, "grad_norm": 0.7032397985458374, "learning_rate": 0.00010860439971473072, "loss": 2.9089, "step": 44089 }, { "epoch": 2.16, "grad_norm": 0.7030555605888367, "learning_rate": 0.00010859254415324413, "loss": 2.8213, "step": 44090 }, { "epoch": 2.16, "grad_norm": 0.7154149413108826, "learning_rate": 0.0001085806890958879, "loss": 2.6665, "step": 44091 }, { "epoch": 2.16, "grad_norm": 0.6783580780029297, "learning_rate": 0.00010856883454269342, "loss": 2.9993, "step": 44092 }, { "epoch": 2.16, "grad_norm": 0.7258152365684509, "learning_rate": 0.0001085569804936917, "loss": 2.9013, "step": 44093 }, { "epoch": 2.16, "grad_norm": 0.6536148190498352, "learning_rate": 0.00010854512694891413, "loss": 2.8165, "step": 44094 }, { "epoch": 2.16, "grad_norm": 0.712685763835907, "learning_rate": 0.00010853327390839184, "loss": 2.9026, "step": 44095 }, { "epoch": 2.16, "grad_norm": 0.6730918288230896, "learning_rate": 0.00010852142137215597, "loss": 2.9592, "step": 44096 }, { "epoch": 2.16, "grad_norm": 0.6709897518157959, "learning_rate": 0.00010850956934023795, "loss": 2.5844, "step": 44097 }, { "epoch": 2.16, "grad_norm": 0.6992678642272949, "learning_rate": 0.00010849771781266872, "loss": 2.9257, "step": 44098 }, { "epoch": 2.16, "grad_norm": 0.7160495519638062, "learning_rate": 0.0001084858667894798, "loss": 3.0139, "step": 44099 }, { "epoch": 2.16, "grad_norm": 0.6836722493171692, "learning_rate": 0.00010847401627070212, "loss": 3.0412, "step": 44100 }, { "epoch": 2.16, "grad_norm": 0.6511483192443848, "learning_rate": 0.00010846216625636712, "loss": 2.8481, "step": 44101 }, { "epoch": 2.16, "grad_norm": 0.6964370012283325, "learning_rate": 0.00010845031674650589, "loss": 2.901, "step": 44102 }, { "epoch": 2.16, "grad_norm": 0.7739323377609253, "learning_rate": 0.0001084384677411496, "loss": 3.06, "step": 44103 }, { "epoch": 2.16, "grad_norm": 0.7379111051559448, "learning_rate": 0.0001084266192403296, "loss": 2.8302, "step": 44104 }, { "epoch": 2.16, "grad_norm": 0.7733993530273438, "learning_rate": 0.00010841477124407694, "loss": 2.986, "step": 44105 }, { "epoch": 2.16, "grad_norm": 0.7553689479827881, "learning_rate": 0.00010840292375242298, "loss": 2.9365, "step": 44106 }, { "epoch": 2.16, "grad_norm": 0.6967439651489258, "learning_rate": 0.00010839107676539875, "loss": 2.9238, "step": 44107 }, { "epoch": 2.16, "grad_norm": 0.6885391473770142, "learning_rate": 0.00010837923028303555, "loss": 2.9053, "step": 44108 }, { "epoch": 2.16, "grad_norm": 0.6458328366279602, "learning_rate": 0.00010836738430536466, "loss": 2.945, "step": 44109 }, { "epoch": 2.16, "grad_norm": 0.7221814393997192, "learning_rate": 0.00010835553883241723, "loss": 2.827, "step": 44110 }, { "epoch": 2.16, "grad_norm": 0.6976704001426697, "learning_rate": 0.00010834369386422441, "loss": 2.8029, "step": 44111 }, { "epoch": 2.16, "grad_norm": 0.7037103176116943, "learning_rate": 0.0001083318494008173, "loss": 2.8437, "step": 44112 }, { "epoch": 2.16, "grad_norm": 0.7122330069541931, "learning_rate": 0.00010832000544222726, "loss": 2.9479, "step": 44113 }, { "epoch": 2.16, "grad_norm": 0.705687403678894, "learning_rate": 0.0001083081619884855, "loss": 2.8805, "step": 44114 }, { "epoch": 2.16, "grad_norm": 0.6933003067970276, "learning_rate": 0.00010829631903962304, "loss": 2.9338, "step": 44115 }, { "epoch": 2.16, "grad_norm": 0.7246118783950806, "learning_rate": 0.00010828447659567132, "loss": 2.9069, "step": 44116 }, { "epoch": 2.16, "grad_norm": 0.6804153323173523, "learning_rate": 0.00010827263465666137, "loss": 2.8831, "step": 44117 }, { "epoch": 2.16, "grad_norm": 0.7856549024581909, "learning_rate": 0.00010826079322262427, "loss": 2.8996, "step": 44118 }, { "epoch": 2.16, "grad_norm": 0.6795721650123596, "learning_rate": 0.00010824895229359146, "loss": 2.716, "step": 44119 }, { "epoch": 2.16, "grad_norm": 0.7031411528587341, "learning_rate": 0.00010823711186959392, "loss": 2.9412, "step": 44120 }, { "epoch": 2.16, "grad_norm": 0.6899999976158142, "learning_rate": 0.00010822527195066301, "loss": 3.0925, "step": 44121 }, { "epoch": 2.16, "grad_norm": 0.7404148578643799, "learning_rate": 0.00010821343253682977, "loss": 2.9588, "step": 44122 }, { "epoch": 2.16, "grad_norm": 0.7039032578468323, "learning_rate": 0.0001082015936281255, "loss": 2.9895, "step": 44123 }, { "epoch": 2.16, "grad_norm": 0.6816439032554626, "learning_rate": 0.0001081897552245813, "loss": 2.707, "step": 44124 }, { "epoch": 2.16, "grad_norm": 0.7398577928543091, "learning_rate": 0.00010817791732622829, "loss": 3.0734, "step": 44125 }, { "epoch": 2.16, "grad_norm": 0.7134485244750977, "learning_rate": 0.00010816607993309785, "loss": 2.9215, "step": 44126 }, { "epoch": 2.16, "grad_norm": 0.7148016691207886, "learning_rate": 0.0001081542430452209, "loss": 3.0477, "step": 44127 }, { "epoch": 2.16, "grad_norm": 0.7097327709197998, "learning_rate": 0.00010814240666262879, "loss": 2.9591, "step": 44128 }, { "epoch": 2.16, "grad_norm": 0.7393547892570496, "learning_rate": 0.00010813057078535272, "loss": 3.0054, "step": 44129 }, { "epoch": 2.16, "grad_norm": 0.7232438325881958, "learning_rate": 0.00010811873541342372, "loss": 2.7291, "step": 44130 }, { "epoch": 2.16, "grad_norm": 0.8301907181739807, "learning_rate": 0.00010810690054687315, "loss": 2.7839, "step": 44131 }, { "epoch": 2.16, "grad_norm": 0.751732349395752, "learning_rate": 0.00010809506618573207, "loss": 2.8497, "step": 44132 }, { "epoch": 2.16, "grad_norm": 0.7149835228919983, "learning_rate": 0.00010808323233003154, "loss": 3.0302, "step": 44133 }, { "epoch": 2.16, "grad_norm": 0.7186267375946045, "learning_rate": 0.00010807139897980295, "loss": 2.8241, "step": 44134 }, { "epoch": 2.16, "grad_norm": 0.6884257197380066, "learning_rate": 0.00010805956613507726, "loss": 2.8621, "step": 44135 }, { "epoch": 2.16, "grad_norm": 0.7444910407066345, "learning_rate": 0.00010804773379588582, "loss": 2.8246, "step": 44136 }, { "epoch": 2.16, "grad_norm": 0.7724918127059937, "learning_rate": 0.00010803590196225963, "loss": 2.9742, "step": 44137 }, { "epoch": 2.16, "grad_norm": 0.707662045955658, "learning_rate": 0.00010802407063423001, "loss": 2.7956, "step": 44138 }, { "epoch": 2.16, "grad_norm": 0.7242353558540344, "learning_rate": 0.00010801223981182806, "loss": 3.0574, "step": 44139 }, { "epoch": 2.16, "grad_norm": 0.6966133713722229, "learning_rate": 0.0001080004094950848, "loss": 3.0594, "step": 44140 }, { "epoch": 2.16, "grad_norm": 0.7180776000022888, "learning_rate": 0.00010798857968403162, "loss": 2.9338, "step": 44141 }, { "epoch": 2.16, "grad_norm": 0.97182697057724, "learning_rate": 0.00010797675037869947, "loss": 2.9121, "step": 44142 }, { "epoch": 2.16, "grad_norm": 0.7087554335594177, "learning_rate": 0.0001079649215791196, "loss": 2.6782, "step": 44143 }, { "epoch": 2.16, "grad_norm": 0.6923799514770508, "learning_rate": 0.00010795309328532327, "loss": 2.8516, "step": 44144 }, { "epoch": 2.16, "grad_norm": 0.6832737922668457, "learning_rate": 0.00010794126549734155, "loss": 2.8526, "step": 44145 }, { "epoch": 2.16, "grad_norm": 0.6955265402793884, "learning_rate": 0.00010792943821520554, "loss": 2.892, "step": 44146 }, { "epoch": 2.16, "grad_norm": 0.7082833051681519, "learning_rate": 0.00010791761143894635, "loss": 2.7952, "step": 44147 }, { "epoch": 2.16, "grad_norm": 0.6620471477508545, "learning_rate": 0.00010790578516859517, "loss": 3.1079, "step": 44148 }, { "epoch": 2.16, "grad_norm": 0.7037109136581421, "learning_rate": 0.00010789395940418331, "loss": 3.0704, "step": 44149 }, { "epoch": 2.16, "grad_norm": 0.6921175122261047, "learning_rate": 0.0001078821341457417, "loss": 2.9341, "step": 44150 }, { "epoch": 2.16, "grad_norm": 0.7180284261703491, "learning_rate": 0.00010787030939330165, "loss": 2.9196, "step": 44151 }, { "epoch": 2.16, "grad_norm": 0.6754325032234192, "learning_rate": 0.00010785848514689414, "loss": 2.8187, "step": 44152 }, { "epoch": 2.16, "grad_norm": 0.7477900981903076, "learning_rate": 0.00010784666140655051, "loss": 3.0184, "step": 44153 }, { "epoch": 2.16, "grad_norm": 0.7051811814308167, "learning_rate": 0.0001078348381723018, "loss": 2.8238, "step": 44154 }, { "epoch": 2.16, "grad_norm": 0.7260898947715759, "learning_rate": 0.00010782301544417903, "loss": 2.8263, "step": 44155 }, { "epoch": 2.16, "grad_norm": 0.7143333554267883, "learning_rate": 0.00010781119322221354, "loss": 2.773, "step": 44156 }, { "epoch": 2.16, "grad_norm": 0.7197613716125488, "learning_rate": 0.00010779937150643628, "loss": 2.9153, "step": 44157 }, { "epoch": 2.16, "grad_norm": 0.7276310324668884, "learning_rate": 0.00010778755029687852, "loss": 2.8371, "step": 44158 }, { "epoch": 2.16, "grad_norm": 0.6611834764480591, "learning_rate": 0.00010777572959357143, "loss": 3.0825, "step": 44159 }, { "epoch": 2.16, "grad_norm": 0.6871948838233948, "learning_rate": 0.0001077639093965461, "loss": 3.0084, "step": 44160 }, { "epoch": 2.16, "grad_norm": 0.7019148468971252, "learning_rate": 0.00010775208970583359, "loss": 2.9931, "step": 44161 }, { "epoch": 2.16, "grad_norm": 0.6876431703567505, "learning_rate": 0.000107740270521465, "loss": 3.141, "step": 44162 }, { "epoch": 2.16, "grad_norm": 0.667335033416748, "learning_rate": 0.00010772845184347154, "loss": 2.7994, "step": 44163 }, { "epoch": 2.16, "grad_norm": 0.6756302118301392, "learning_rate": 0.00010771663367188445, "loss": 3.0605, "step": 44164 }, { "epoch": 2.16, "grad_norm": 0.7222525477409363, "learning_rate": 0.00010770481600673463, "loss": 3.2013, "step": 44165 }, { "epoch": 2.16, "grad_norm": 0.7120800018310547, "learning_rate": 0.0001076929988480534, "loss": 2.8396, "step": 44166 }, { "epoch": 2.16, "grad_norm": 0.698549211025238, "learning_rate": 0.00010768118219587182, "loss": 2.9708, "step": 44167 }, { "epoch": 2.16, "grad_norm": 0.6790973544120789, "learning_rate": 0.00010766936605022088, "loss": 3.1835, "step": 44168 }, { "epoch": 2.16, "grad_norm": 0.6756714582443237, "learning_rate": 0.00010765755041113194, "loss": 2.6655, "step": 44169 }, { "epoch": 2.16, "grad_norm": 0.7276606559753418, "learning_rate": 0.00010764573527863587, "loss": 2.8426, "step": 44170 }, { "epoch": 2.16, "grad_norm": 0.6843422055244446, "learning_rate": 0.00010763392065276404, "loss": 2.9493, "step": 44171 }, { "epoch": 2.16, "grad_norm": 0.7065037488937378, "learning_rate": 0.00010762210653354733, "loss": 2.8149, "step": 44172 }, { "epoch": 2.16, "grad_norm": 0.7013691067695618, "learning_rate": 0.00010761029292101693, "loss": 2.811, "step": 44173 }, { "epoch": 2.16, "grad_norm": 0.727316677570343, "learning_rate": 0.00010759847981520412, "loss": 3.0262, "step": 44174 }, { "epoch": 2.16, "grad_norm": 0.7056801319122314, "learning_rate": 0.0001075866672161399, "loss": 2.8486, "step": 44175 }, { "epoch": 2.16, "grad_norm": 0.6833690404891968, "learning_rate": 0.00010757485512385535, "loss": 2.8004, "step": 44176 }, { "epoch": 2.17, "grad_norm": 0.6740131974220276, "learning_rate": 0.00010756304353838152, "loss": 2.829, "step": 44177 }, { "epoch": 2.17, "grad_norm": 0.7866957187652588, "learning_rate": 0.00010755123245974969, "loss": 2.8718, "step": 44178 }, { "epoch": 2.17, "grad_norm": 0.6585379838943481, "learning_rate": 0.00010753942188799077, "loss": 2.9546, "step": 44179 }, { "epoch": 2.17, "grad_norm": 0.7182008624076843, "learning_rate": 0.00010752761182313597, "loss": 2.8891, "step": 44180 }, { "epoch": 2.17, "grad_norm": 0.695152223110199, "learning_rate": 0.0001075158022652165, "loss": 3.0113, "step": 44181 }, { "epoch": 2.17, "grad_norm": 0.7504631280899048, "learning_rate": 0.00010750399321426336, "loss": 2.8999, "step": 44182 }, { "epoch": 2.17, "grad_norm": 0.6874749064445496, "learning_rate": 0.00010749218467030767, "loss": 2.7791, "step": 44183 }, { "epoch": 2.17, "grad_norm": 0.725240170955658, "learning_rate": 0.00010748037663338039, "loss": 2.7461, "step": 44184 }, { "epoch": 2.17, "grad_norm": 0.7044048309326172, "learning_rate": 0.00010746856910351275, "loss": 2.9411, "step": 44185 }, { "epoch": 2.17, "grad_norm": 0.7227499485015869, "learning_rate": 0.00010745676208073597, "loss": 2.9253, "step": 44186 }, { "epoch": 2.17, "grad_norm": 0.700607180595398, "learning_rate": 0.0001074449555650809, "loss": 2.886, "step": 44187 }, { "epoch": 2.17, "grad_norm": 0.6638893485069275, "learning_rate": 0.00010743314955657889, "loss": 2.8618, "step": 44188 }, { "epoch": 2.17, "grad_norm": 0.7097645401954651, "learning_rate": 0.00010742134405526078, "loss": 3.0554, "step": 44189 }, { "epoch": 2.17, "grad_norm": 0.7432897686958313, "learning_rate": 0.0001074095390611579, "loss": 3.0488, "step": 44190 }, { "epoch": 2.17, "grad_norm": 0.7031722068786621, "learning_rate": 0.00010739773457430122, "loss": 2.9038, "step": 44191 }, { "epoch": 2.17, "grad_norm": 0.6727100610733032, "learning_rate": 0.00010738593059472174, "loss": 2.8709, "step": 44192 }, { "epoch": 2.17, "grad_norm": 0.7058899402618408, "learning_rate": 0.00010737412712245074, "loss": 3.1592, "step": 44193 }, { "epoch": 2.17, "grad_norm": 0.6950834393501282, "learning_rate": 0.00010736232415751913, "loss": 3.0163, "step": 44194 }, { "epoch": 2.17, "grad_norm": 0.7071797251701355, "learning_rate": 0.00010735052169995809, "loss": 2.9139, "step": 44195 }, { "epoch": 2.17, "grad_norm": 0.6854180097579956, "learning_rate": 0.00010733871974979878, "loss": 2.8134, "step": 44196 }, { "epoch": 2.17, "grad_norm": 0.7189562916755676, "learning_rate": 0.00010732691830707221, "loss": 2.8558, "step": 44197 }, { "epoch": 2.17, "grad_norm": 0.6695606112480164, "learning_rate": 0.00010731511737180947, "loss": 3.0807, "step": 44198 }, { "epoch": 2.17, "grad_norm": 0.6888101696968079, "learning_rate": 0.00010730331694404153, "loss": 3.014, "step": 44199 }, { "epoch": 2.17, "grad_norm": 0.6768573522567749, "learning_rate": 0.00010729151702379955, "loss": 2.9749, "step": 44200 }, { "epoch": 2.17, "grad_norm": 0.7207431197166443, "learning_rate": 0.00010727971761111472, "loss": 3.0053, "step": 44201 }, { "epoch": 2.17, "grad_norm": 0.7181470394134521, "learning_rate": 0.00010726791870601795, "loss": 2.881, "step": 44202 }, { "epoch": 2.17, "grad_norm": 0.7218087911605835, "learning_rate": 0.00010725612030854048, "loss": 2.86, "step": 44203 }, { "epoch": 2.17, "grad_norm": 0.7341431379318237, "learning_rate": 0.00010724432241871329, "loss": 2.9529, "step": 44204 }, { "epoch": 2.17, "grad_norm": 0.7279463410377502, "learning_rate": 0.00010723252503656737, "loss": 2.9883, "step": 44205 }, { "epoch": 2.17, "grad_norm": 0.6712480187416077, "learning_rate": 0.00010722072816213396, "loss": 2.8739, "step": 44206 }, { "epoch": 2.17, "grad_norm": 0.6901949048042297, "learning_rate": 0.00010720893179544397, "loss": 2.8589, "step": 44207 }, { "epoch": 2.17, "grad_norm": 0.7036940455436707, "learning_rate": 0.00010719713593652866, "loss": 2.8149, "step": 44208 }, { "epoch": 2.17, "grad_norm": 0.7346253991127014, "learning_rate": 0.00010718534058541885, "loss": 2.9119, "step": 44209 }, { "epoch": 2.17, "grad_norm": 0.7171415686607361, "learning_rate": 0.00010717354574214577, "loss": 3.0225, "step": 44210 }, { "epoch": 2.17, "grad_norm": 0.7323898673057556, "learning_rate": 0.00010716175140674055, "loss": 2.8658, "step": 44211 }, { "epoch": 2.17, "grad_norm": 0.7688388824462891, "learning_rate": 0.00010714995757923418, "loss": 2.8879, "step": 44212 }, { "epoch": 2.17, "grad_norm": 0.6723164319992065, "learning_rate": 0.00010713816425965768, "loss": 2.9502, "step": 44213 }, { "epoch": 2.17, "grad_norm": 0.6992418766021729, "learning_rate": 0.00010712637144804207, "loss": 2.9745, "step": 44214 }, { "epoch": 2.17, "grad_norm": 0.6746936440467834, "learning_rate": 0.00010711457914441846, "loss": 2.9747, "step": 44215 }, { "epoch": 2.17, "grad_norm": 0.7169411182403564, "learning_rate": 0.00010710278734881804, "loss": 3.0951, "step": 44216 }, { "epoch": 2.17, "grad_norm": 0.7070459127426147, "learning_rate": 0.00010709099606127165, "loss": 2.8509, "step": 44217 }, { "epoch": 2.17, "grad_norm": 0.6970311999320984, "learning_rate": 0.00010707920528181053, "loss": 2.991, "step": 44218 }, { "epoch": 2.17, "grad_norm": 0.7649372816085815, "learning_rate": 0.00010706741501046566, "loss": 3.022, "step": 44219 }, { "epoch": 2.17, "grad_norm": 0.727436900138855, "learning_rate": 0.00010705562524726802, "loss": 2.9837, "step": 44220 }, { "epoch": 2.17, "grad_norm": 0.6991956830024719, "learning_rate": 0.0001070438359922488, "loss": 2.7914, "step": 44221 }, { "epoch": 2.17, "grad_norm": 0.6980074048042297, "learning_rate": 0.0001070320472454389, "loss": 3.0317, "step": 44222 }, { "epoch": 2.17, "grad_norm": 0.6773894429206848, "learning_rate": 0.00010702025900686954, "loss": 2.8222, "step": 44223 }, { "epoch": 2.17, "grad_norm": 0.7050642967224121, "learning_rate": 0.00010700847127657159, "loss": 2.8748, "step": 44224 }, { "epoch": 2.17, "grad_norm": 0.7063111662864685, "learning_rate": 0.00010699668405457625, "loss": 3.1069, "step": 44225 }, { "epoch": 2.17, "grad_norm": 0.7139179706573486, "learning_rate": 0.00010698489734091454, "loss": 2.855, "step": 44226 }, { "epoch": 2.17, "grad_norm": 0.6964961886405945, "learning_rate": 0.00010697311113561733, "loss": 3.0488, "step": 44227 }, { "epoch": 2.17, "grad_norm": 0.6961595416069031, "learning_rate": 0.00010696132543871591, "loss": 2.9491, "step": 44228 }, { "epoch": 2.17, "grad_norm": 0.6952822208404541, "learning_rate": 0.00010694954025024115, "loss": 3.0702, "step": 44229 }, { "epoch": 2.17, "grad_norm": 0.7170188426971436, "learning_rate": 0.00010693775557022407, "loss": 2.9606, "step": 44230 }, { "epoch": 2.17, "grad_norm": 0.7045404314994812, "learning_rate": 0.00010692597139869595, "loss": 2.8441, "step": 44231 }, { "epoch": 2.17, "grad_norm": 0.7429175972938538, "learning_rate": 0.00010691418773568752, "loss": 3.0192, "step": 44232 }, { "epoch": 2.17, "grad_norm": 0.7014785408973694, "learning_rate": 0.00010690240458123008, "loss": 2.8359, "step": 44233 }, { "epoch": 2.17, "grad_norm": 0.7238576412200928, "learning_rate": 0.00010689062193535456, "loss": 2.9959, "step": 44234 }, { "epoch": 2.17, "grad_norm": 0.701926052570343, "learning_rate": 0.00010687883979809183, "loss": 2.9024, "step": 44235 }, { "epoch": 2.17, "grad_norm": 0.7221980094909668, "learning_rate": 0.00010686705816947323, "loss": 2.9919, "step": 44236 }, { "epoch": 2.17, "grad_norm": 0.7259619235992432, "learning_rate": 0.0001068552770495295, "loss": 2.8936, "step": 44237 }, { "epoch": 2.17, "grad_norm": 0.7003749012947083, "learning_rate": 0.00010684349643829188, "loss": 3.1794, "step": 44238 }, { "epoch": 2.17, "grad_norm": 0.7119761109352112, "learning_rate": 0.00010683171633579124, "loss": 2.933, "step": 44239 }, { "epoch": 2.17, "grad_norm": 0.7327340841293335, "learning_rate": 0.00010681993674205877, "loss": 2.743, "step": 44240 }, { "epoch": 2.17, "grad_norm": 0.7414251565933228, "learning_rate": 0.00010680815765712543, "loss": 2.885, "step": 44241 }, { "epoch": 2.17, "grad_norm": 0.683208703994751, "learning_rate": 0.0001067963790810221, "loss": 2.9057, "step": 44242 }, { "epoch": 2.17, "grad_norm": 0.711431086063385, "learning_rate": 0.00010678460101378003, "loss": 2.8266, "step": 44243 }, { "epoch": 2.17, "grad_norm": 0.711889386177063, "learning_rate": 0.00010677282345543005, "loss": 2.8303, "step": 44244 }, { "epoch": 2.17, "grad_norm": 0.7022669315338135, "learning_rate": 0.00010676104640600324, "loss": 2.8831, "step": 44245 }, { "epoch": 2.17, "grad_norm": 0.684972882270813, "learning_rate": 0.00010674926986553074, "loss": 2.8915, "step": 44246 }, { "epoch": 2.17, "grad_norm": 0.6765041947364807, "learning_rate": 0.0001067374938340435, "loss": 2.8127, "step": 44247 }, { "epoch": 2.17, "grad_norm": 0.6731730103492737, "learning_rate": 0.00010672571831157251, "loss": 2.9058, "step": 44248 }, { "epoch": 2.17, "grad_norm": 0.7448628544807434, "learning_rate": 0.00010671394329814864, "loss": 2.6898, "step": 44249 }, { "epoch": 2.17, "grad_norm": 0.7296499609947205, "learning_rate": 0.00010670216879380308, "loss": 3.0543, "step": 44250 }, { "epoch": 2.17, "grad_norm": 0.7346038222312927, "learning_rate": 0.00010669039479856691, "loss": 2.8413, "step": 44251 }, { "epoch": 2.17, "grad_norm": 0.6547524929046631, "learning_rate": 0.00010667862131247092, "loss": 2.7705, "step": 44252 }, { "epoch": 2.17, "grad_norm": 0.719100832939148, "learning_rate": 0.00010666684833554635, "loss": 3.0006, "step": 44253 }, { "epoch": 2.17, "grad_norm": 0.7397176027297974, "learning_rate": 0.00010665507586782399, "loss": 2.7199, "step": 44254 }, { "epoch": 2.17, "grad_norm": 0.7125775814056396, "learning_rate": 0.00010664330390933505, "loss": 2.9314, "step": 44255 }, { "epoch": 2.17, "grad_norm": 0.7293382287025452, "learning_rate": 0.00010663153246011042, "loss": 3.1108, "step": 44256 }, { "epoch": 2.17, "grad_norm": 0.6914783120155334, "learning_rate": 0.00010661976152018101, "loss": 2.8427, "step": 44257 }, { "epoch": 2.17, "grad_norm": 0.7073983550071716, "learning_rate": 0.00010660799108957806, "loss": 2.8991, "step": 44258 }, { "epoch": 2.17, "grad_norm": 0.7683893442153931, "learning_rate": 0.00010659622116833233, "loss": 2.8923, "step": 44259 }, { "epoch": 2.17, "grad_norm": 0.7236352562904358, "learning_rate": 0.00010658445175647506, "loss": 2.9805, "step": 44260 }, { "epoch": 2.17, "grad_norm": 0.7174046635627747, "learning_rate": 0.00010657268285403697, "loss": 3.0995, "step": 44261 }, { "epoch": 2.17, "grad_norm": 0.6961384415626526, "learning_rate": 0.00010656091446104936, "loss": 2.8501, "step": 44262 }, { "epoch": 2.17, "grad_norm": 0.7074950933456421, "learning_rate": 0.00010654914657754304, "loss": 2.8524, "step": 44263 }, { "epoch": 2.17, "grad_norm": 0.6755015850067139, "learning_rate": 0.00010653737920354896, "loss": 2.8667, "step": 44264 }, { "epoch": 2.17, "grad_norm": 0.673994779586792, "learning_rate": 0.00010652561233909828, "loss": 2.8819, "step": 44265 }, { "epoch": 2.17, "grad_norm": 0.6857726573944092, "learning_rate": 0.00010651384598422178, "loss": 2.9646, "step": 44266 }, { "epoch": 2.17, "grad_norm": 0.7020795941352844, "learning_rate": 0.00010650208013895057, "loss": 2.888, "step": 44267 }, { "epoch": 2.17, "grad_norm": 0.7138441801071167, "learning_rate": 0.00010649031480331577, "loss": 2.9728, "step": 44268 }, { "epoch": 2.17, "grad_norm": 0.6965253949165344, "learning_rate": 0.00010647854997734824, "loss": 2.8346, "step": 44269 }, { "epoch": 2.17, "grad_norm": 0.6731846332550049, "learning_rate": 0.00010646678566107895, "loss": 2.8683, "step": 44270 }, { "epoch": 2.17, "grad_norm": 0.7317405939102173, "learning_rate": 0.0001064550218545388, "loss": 2.9809, "step": 44271 }, { "epoch": 2.17, "grad_norm": 0.7305527925491333, "learning_rate": 0.00010644325855775889, "loss": 2.9701, "step": 44272 }, { "epoch": 2.17, "grad_norm": 0.6821311712265015, "learning_rate": 0.00010643149577077027, "loss": 2.8188, "step": 44273 }, { "epoch": 2.17, "grad_norm": 0.7498180866241455, "learning_rate": 0.00010641973349360374, "loss": 3.0598, "step": 44274 }, { "epoch": 2.17, "grad_norm": 0.731899619102478, "learning_rate": 0.00010640797172629051, "loss": 2.8892, "step": 44275 }, { "epoch": 2.17, "grad_norm": 0.7006483674049377, "learning_rate": 0.00010639621046886128, "loss": 2.86, "step": 44276 }, { "epoch": 2.17, "grad_norm": 0.7303549647331238, "learning_rate": 0.00010638444972134728, "loss": 2.597, "step": 44277 }, { "epoch": 2.17, "grad_norm": 0.6863207817077637, "learning_rate": 0.00010637268948377939, "loss": 2.6882, "step": 44278 }, { "epoch": 2.17, "grad_norm": 0.66311115026474, "learning_rate": 0.00010636092975618847, "loss": 2.9305, "step": 44279 }, { "epoch": 2.17, "grad_norm": 0.7255011796951294, "learning_rate": 0.00010634917053860569, "loss": 2.8173, "step": 44280 }, { "epoch": 2.17, "grad_norm": 0.6959855556488037, "learning_rate": 0.00010633741183106184, "loss": 2.8846, "step": 44281 }, { "epoch": 2.17, "grad_norm": 0.6742451190948486, "learning_rate": 0.00010632565363358797, "loss": 3.0126, "step": 44282 }, { "epoch": 2.17, "grad_norm": 0.7093662023544312, "learning_rate": 0.00010631389594621515, "loss": 3.0639, "step": 44283 }, { "epoch": 2.17, "grad_norm": 0.6777709126472473, "learning_rate": 0.00010630213876897423, "loss": 2.9066, "step": 44284 }, { "epoch": 2.17, "grad_norm": 0.700195848941803, "learning_rate": 0.00010629038210189623, "loss": 2.9751, "step": 44285 }, { "epoch": 2.17, "grad_norm": 0.6865321397781372, "learning_rate": 0.00010627862594501196, "loss": 3.0077, "step": 44286 }, { "epoch": 2.17, "grad_norm": 0.6771736741065979, "learning_rate": 0.0001062668702983525, "loss": 2.7006, "step": 44287 }, { "epoch": 2.17, "grad_norm": 0.7320778965950012, "learning_rate": 0.00010625511516194895, "loss": 2.9485, "step": 44288 }, { "epoch": 2.17, "grad_norm": 0.7206733226776123, "learning_rate": 0.00010624336053583202, "loss": 2.8812, "step": 44289 }, { "epoch": 2.17, "grad_norm": 0.6806963682174683, "learning_rate": 0.00010623160642003292, "loss": 2.8639, "step": 44290 }, { "epoch": 2.17, "grad_norm": 0.7328031659126282, "learning_rate": 0.00010621985281458233, "loss": 2.9097, "step": 44291 }, { "epoch": 2.17, "grad_norm": 0.7521789073944092, "learning_rate": 0.00010620809971951148, "loss": 2.605, "step": 44292 }, { "epoch": 2.17, "grad_norm": 0.7122011780738831, "learning_rate": 0.00010619634713485122, "loss": 2.9234, "step": 44293 }, { "epoch": 2.17, "grad_norm": 0.7520004510879517, "learning_rate": 0.00010618459506063235, "loss": 2.8301, "step": 44294 }, { "epoch": 2.17, "grad_norm": 0.7129939794540405, "learning_rate": 0.00010617284349688607, "loss": 2.9049, "step": 44295 }, { "epoch": 2.17, "grad_norm": 0.6909685134887695, "learning_rate": 0.0001061610924436431, "loss": 2.9319, "step": 44296 }, { "epoch": 2.17, "grad_norm": 0.6826851963996887, "learning_rate": 0.00010614934190093454, "loss": 2.7563, "step": 44297 }, { "epoch": 2.17, "grad_norm": 0.6831250190734863, "learning_rate": 0.00010613759186879138, "loss": 2.9022, "step": 44298 }, { "epoch": 2.17, "grad_norm": 0.6965422034263611, "learning_rate": 0.00010612584234724452, "loss": 3.158, "step": 44299 }, { "epoch": 2.17, "grad_norm": 0.7100363969802856, "learning_rate": 0.00010611409333632486, "loss": 2.6984, "step": 44300 }, { "epoch": 2.17, "grad_norm": 0.7082723379135132, "learning_rate": 0.00010610234483606325, "loss": 2.8544, "step": 44301 }, { "epoch": 2.17, "grad_norm": 0.7065490484237671, "learning_rate": 0.00010609059684649076, "loss": 3.014, "step": 44302 }, { "epoch": 2.17, "grad_norm": 0.7121196389198303, "learning_rate": 0.00010607884936763841, "loss": 2.8571, "step": 44303 }, { "epoch": 2.17, "grad_norm": 0.7432886958122253, "learning_rate": 0.00010606710239953697, "loss": 2.9936, "step": 44304 }, { "epoch": 2.17, "grad_norm": 0.6981472969055176, "learning_rate": 0.00010605535594221757, "loss": 2.8229, "step": 44305 }, { "epoch": 2.17, "grad_norm": 0.7373912334442139, "learning_rate": 0.000106043609995711, "loss": 2.8332, "step": 44306 }, { "epoch": 2.17, "grad_norm": 0.7022235989570618, "learning_rate": 0.00010603186456004816, "loss": 2.783, "step": 44307 }, { "epoch": 2.17, "grad_norm": 0.7700810432434082, "learning_rate": 0.00010602011963526013, "loss": 2.9577, "step": 44308 }, { "epoch": 2.17, "grad_norm": 0.7263349294662476, "learning_rate": 0.0001060083752213777, "loss": 2.8884, "step": 44309 }, { "epoch": 2.17, "grad_norm": 0.6605125665664673, "learning_rate": 0.00010599663131843197, "loss": 2.9842, "step": 44310 }, { "epoch": 2.17, "grad_norm": 0.680346667766571, "learning_rate": 0.00010598488792645367, "loss": 3.0116, "step": 44311 }, { "epoch": 2.17, "grad_norm": 0.696643054485321, "learning_rate": 0.00010597314504547386, "loss": 2.5903, "step": 44312 }, { "epoch": 2.17, "grad_norm": 0.7104796171188354, "learning_rate": 0.00010596140267552351, "loss": 3.0219, "step": 44313 }, { "epoch": 2.17, "grad_norm": 0.8690118193626404, "learning_rate": 0.00010594966081663352, "loss": 2.9789, "step": 44314 }, { "epoch": 2.17, "grad_norm": 0.7163686156272888, "learning_rate": 0.00010593791946883473, "loss": 3.0463, "step": 44315 }, { "epoch": 2.17, "grad_norm": 0.7209365367889404, "learning_rate": 0.00010592617863215806, "loss": 2.8777, "step": 44316 }, { "epoch": 2.17, "grad_norm": 0.6892098188400269, "learning_rate": 0.00010591443830663447, "loss": 2.7619, "step": 44317 }, { "epoch": 2.17, "grad_norm": 0.7479181289672852, "learning_rate": 0.00010590269849229497, "loss": 2.9923, "step": 44318 }, { "epoch": 2.17, "grad_norm": 0.6602400541305542, "learning_rate": 0.00010589095918917032, "loss": 3.0618, "step": 44319 }, { "epoch": 2.17, "grad_norm": 0.6800590753555298, "learning_rate": 0.00010587922039729162, "loss": 2.837, "step": 44320 }, { "epoch": 2.17, "grad_norm": 0.6915212869644165, "learning_rate": 0.00010586748211668975, "loss": 3.046, "step": 44321 }, { "epoch": 2.17, "grad_norm": 0.6551668643951416, "learning_rate": 0.00010585574434739542, "loss": 2.8772, "step": 44322 }, { "epoch": 2.17, "grad_norm": 0.7143257856369019, "learning_rate": 0.00010584400708943979, "loss": 2.9301, "step": 44323 }, { "epoch": 2.17, "grad_norm": 0.6705986857414246, "learning_rate": 0.00010583227034285357, "loss": 2.8245, "step": 44324 }, { "epoch": 2.17, "grad_norm": 0.6597686409950256, "learning_rate": 0.00010582053410766792, "loss": 2.9585, "step": 44325 }, { "epoch": 2.17, "grad_norm": 0.7265187501907349, "learning_rate": 0.00010580879838391349, "loss": 2.9596, "step": 44326 }, { "epoch": 2.17, "grad_norm": 0.7158097624778748, "learning_rate": 0.00010579706317162145, "loss": 2.7455, "step": 44327 }, { "epoch": 2.17, "grad_norm": 0.7624508142471313, "learning_rate": 0.0001057853284708225, "loss": 2.8151, "step": 44328 }, { "epoch": 2.17, "grad_norm": 0.7447002530097961, "learning_rate": 0.00010577359428154759, "loss": 2.5754, "step": 44329 }, { "epoch": 2.17, "grad_norm": 0.6778799295425415, "learning_rate": 0.00010576186060382771, "loss": 2.855, "step": 44330 }, { "epoch": 2.17, "grad_norm": 0.7103135585784912, "learning_rate": 0.0001057501274376936, "loss": 2.9367, "step": 44331 }, { "epoch": 2.17, "grad_norm": 0.6705018281936646, "learning_rate": 0.0001057383947831763, "loss": 3.1099, "step": 44332 }, { "epoch": 2.17, "grad_norm": 0.7252798676490784, "learning_rate": 0.00010572666264030677, "loss": 2.8609, "step": 44333 }, { "epoch": 2.17, "grad_norm": 0.7287251353263855, "learning_rate": 0.00010571493100911573, "loss": 2.8493, "step": 44334 }, { "epoch": 2.17, "grad_norm": 0.7141810059547424, "learning_rate": 0.00010570319988963424, "loss": 2.855, "step": 44335 }, { "epoch": 2.17, "grad_norm": 0.7226595878601074, "learning_rate": 0.00010569146928189316, "loss": 2.6597, "step": 44336 }, { "epoch": 2.17, "grad_norm": 0.7063793540000916, "learning_rate": 0.00010567973918592334, "loss": 3.0757, "step": 44337 }, { "epoch": 2.17, "grad_norm": 0.7134846448898315, "learning_rate": 0.0001056680096017556, "loss": 2.8873, "step": 44338 }, { "epoch": 2.17, "grad_norm": 0.7262305021286011, "learning_rate": 0.00010565628052942091, "loss": 3.1055, "step": 44339 }, { "epoch": 2.17, "grad_norm": 0.729193389415741, "learning_rate": 0.00010564455196895029, "loss": 2.9053, "step": 44340 }, { "epoch": 2.17, "grad_norm": 0.7212259769439697, "learning_rate": 0.00010563282392037443, "loss": 2.9384, "step": 44341 }, { "epoch": 2.17, "grad_norm": 0.7601874470710754, "learning_rate": 0.00010562109638372436, "loss": 2.7878, "step": 44342 }, { "epoch": 2.17, "grad_norm": 0.6938914656639099, "learning_rate": 0.00010560936935903093, "loss": 3.0421, "step": 44343 }, { "epoch": 2.17, "grad_norm": 0.7167030572891235, "learning_rate": 0.00010559764284632493, "loss": 3.0963, "step": 44344 }, { "epoch": 2.17, "grad_norm": 0.7385327219963074, "learning_rate": 0.0001055859168456374, "loss": 3.0804, "step": 44345 }, { "epoch": 2.17, "grad_norm": 0.7111912369728088, "learning_rate": 0.00010557419135699906, "loss": 3.0677, "step": 44346 }, { "epoch": 2.17, "grad_norm": 0.7527398467063904, "learning_rate": 0.000105562466380441, "loss": 3.0916, "step": 44347 }, { "epoch": 2.17, "grad_norm": 0.6765745282173157, "learning_rate": 0.00010555074191599387, "loss": 2.9669, "step": 44348 }, { "epoch": 2.17, "grad_norm": 0.6760923266410828, "learning_rate": 0.00010553901796368876, "loss": 2.8139, "step": 44349 }, { "epoch": 2.17, "grad_norm": 0.7211750149726868, "learning_rate": 0.00010552729452355644, "loss": 2.7989, "step": 44350 }, { "epoch": 2.17, "grad_norm": 0.6986032128334045, "learning_rate": 0.00010551557159562769, "loss": 3.0181, "step": 44351 }, { "epoch": 2.17, "grad_norm": 0.7039467692375183, "learning_rate": 0.00010550384917993362, "loss": 2.9076, "step": 44352 }, { "epoch": 2.17, "grad_norm": 0.7151457667350769, "learning_rate": 0.00010549212727650487, "loss": 3.0128, "step": 44353 }, { "epoch": 2.17, "grad_norm": 0.7260016202926636, "learning_rate": 0.00010548040588537244, "loss": 3.0107, "step": 44354 }, { "epoch": 2.17, "grad_norm": 0.7048295140266418, "learning_rate": 0.00010546868500656728, "loss": 2.9732, "step": 44355 }, { "epoch": 2.17, "grad_norm": 0.6705439686775208, "learning_rate": 0.00010545696464012006, "loss": 2.736, "step": 44356 }, { "epoch": 2.17, "grad_norm": 0.6685308814048767, "learning_rate": 0.00010544524478606185, "loss": 3.0948, "step": 44357 }, { "epoch": 2.17, "grad_norm": 0.7032225728034973, "learning_rate": 0.00010543352544442342, "loss": 2.8816, "step": 44358 }, { "epoch": 2.17, "grad_norm": 0.7694846987724304, "learning_rate": 0.00010542180661523555, "loss": 3.0432, "step": 44359 }, { "epoch": 2.17, "grad_norm": 0.6599303483963013, "learning_rate": 0.00010541008829852929, "loss": 2.9444, "step": 44360 }, { "epoch": 2.17, "grad_norm": 0.7349076271057129, "learning_rate": 0.00010539837049433531, "loss": 2.981, "step": 44361 }, { "epoch": 2.17, "grad_norm": 0.7268679141998291, "learning_rate": 0.00010538665320268466, "loss": 2.9341, "step": 44362 }, { "epoch": 2.17, "grad_norm": 0.6871877312660217, "learning_rate": 0.00010537493642360802, "loss": 2.8963, "step": 44363 }, { "epoch": 2.17, "grad_norm": 0.669917643070221, "learning_rate": 0.00010536322015713642, "loss": 2.921, "step": 44364 }, { "epoch": 2.17, "grad_norm": 0.7450718283653259, "learning_rate": 0.00010535150440330068, "loss": 2.6976, "step": 44365 }, { "epoch": 2.17, "grad_norm": 0.6828457713127136, "learning_rate": 0.00010533978916213148, "loss": 2.8831, "step": 44366 }, { "epoch": 2.17, "grad_norm": 0.7049870491027832, "learning_rate": 0.00010532807443365994, "loss": 2.9459, "step": 44367 }, { "epoch": 2.17, "grad_norm": 0.6955314874649048, "learning_rate": 0.00010531636021791664, "loss": 2.8951, "step": 44368 }, { "epoch": 2.17, "grad_norm": 0.6996759176254272, "learning_rate": 0.00010530464651493263, "loss": 2.9312, "step": 44369 }, { "epoch": 2.17, "grad_norm": 0.7178577184677124, "learning_rate": 0.00010529293332473879, "loss": 2.7958, "step": 44370 }, { "epoch": 2.17, "grad_norm": 0.6655074954032898, "learning_rate": 0.00010528122064736591, "loss": 2.9293, "step": 44371 }, { "epoch": 2.17, "grad_norm": 0.6931262016296387, "learning_rate": 0.00010526950848284482, "loss": 2.9426, "step": 44372 }, { "epoch": 2.17, "grad_norm": 0.6952087879180908, "learning_rate": 0.00010525779683120621, "loss": 2.921, "step": 44373 }, { "epoch": 2.17, "grad_norm": 0.6865946054458618, "learning_rate": 0.00010524608569248113, "loss": 2.8322, "step": 44374 }, { "epoch": 2.17, "grad_norm": 0.6755927801132202, "learning_rate": 0.00010523437506670052, "loss": 2.7113, "step": 44375 }, { "epoch": 2.17, "grad_norm": 0.704673171043396, "learning_rate": 0.00010522266495389491, "loss": 2.8718, "step": 44376 }, { "epoch": 2.17, "grad_norm": 0.6965922713279724, "learning_rate": 0.00010521095535409547, "loss": 2.8379, "step": 44377 }, { "epoch": 2.17, "grad_norm": 0.7056841850280762, "learning_rate": 0.00010519924626733275, "loss": 3.0354, "step": 44378 }, { "epoch": 2.17, "grad_norm": 0.7823092937469482, "learning_rate": 0.00010518753769363782, "loss": 3.1314, "step": 44379 }, { "epoch": 2.17, "grad_norm": 0.6921338438987732, "learning_rate": 0.00010517582963304146, "loss": 3.0036, "step": 44380 }, { "epoch": 2.18, "grad_norm": 0.6734832525253296, "learning_rate": 0.00010516412208557434, "loss": 2.9865, "step": 44381 }, { "epoch": 2.18, "grad_norm": 0.7287729382514954, "learning_rate": 0.00010515241505126754, "loss": 3.0678, "step": 44382 }, { "epoch": 2.18, "grad_norm": 0.6677972674369812, "learning_rate": 0.00010514070853015169, "loss": 2.9314, "step": 44383 }, { "epoch": 2.18, "grad_norm": 0.7038114070892334, "learning_rate": 0.00010512900252225766, "loss": 3.0285, "step": 44384 }, { "epoch": 2.18, "grad_norm": 0.6778184175491333, "learning_rate": 0.00010511729702761648, "loss": 2.8647, "step": 44385 }, { "epoch": 2.18, "grad_norm": 0.7425260543823242, "learning_rate": 0.00010510559204625882, "loss": 2.9822, "step": 44386 }, { "epoch": 2.18, "grad_norm": 0.720942497253418, "learning_rate": 0.00010509388757821555, "loss": 2.9104, "step": 44387 }, { "epoch": 2.18, "grad_norm": 0.7403482794761658, "learning_rate": 0.00010508218362351735, "loss": 2.881, "step": 44388 }, { "epoch": 2.18, "grad_norm": 0.6942797303199768, "learning_rate": 0.00010507048018219515, "loss": 3.1106, "step": 44389 }, { "epoch": 2.18, "grad_norm": 0.7015599012374878, "learning_rate": 0.00010505877725427991, "loss": 2.8412, "step": 44390 }, { "epoch": 2.18, "grad_norm": 0.7615519165992737, "learning_rate": 0.00010504707483980221, "loss": 2.9333, "step": 44391 }, { "epoch": 2.18, "grad_norm": 0.711105465888977, "learning_rate": 0.00010503537293879313, "loss": 3.13, "step": 44392 }, { "epoch": 2.18, "grad_norm": 0.7053470015525818, "learning_rate": 0.00010502367155128332, "loss": 2.9103, "step": 44393 }, { "epoch": 2.18, "grad_norm": 0.6969516277313232, "learning_rate": 0.00010501197067730357, "loss": 2.8205, "step": 44394 }, { "epoch": 2.18, "grad_norm": 0.7566748857498169, "learning_rate": 0.00010500027031688482, "loss": 3.0093, "step": 44395 }, { "epoch": 2.18, "grad_norm": 0.6897950172424316, "learning_rate": 0.00010498857047005777, "loss": 3.0615, "step": 44396 }, { "epoch": 2.18, "grad_norm": 0.7079964280128479, "learning_rate": 0.00010497687113685337, "loss": 3.1859, "step": 44397 }, { "epoch": 2.18, "grad_norm": 0.72109454870224, "learning_rate": 0.00010496517231730228, "loss": 2.4967, "step": 44398 }, { "epoch": 2.18, "grad_norm": 0.7014322876930237, "learning_rate": 0.00010495347401143538, "loss": 3.1057, "step": 44399 }, { "epoch": 2.18, "grad_norm": 0.7019551396369934, "learning_rate": 0.00010494177621928358, "loss": 2.9177, "step": 44400 }, { "epoch": 2.18, "grad_norm": 0.721088171005249, "learning_rate": 0.00010493007894087762, "loss": 3.1077, "step": 44401 }, { "epoch": 2.18, "grad_norm": 0.6936663389205933, "learning_rate": 0.00010491838217624829, "loss": 2.9189, "step": 44402 }, { "epoch": 2.18, "grad_norm": 0.7555628418922424, "learning_rate": 0.0001049066859254263, "loss": 2.906, "step": 44403 }, { "epoch": 2.18, "grad_norm": 0.6784551739692688, "learning_rate": 0.00010489499018844253, "loss": 2.9672, "step": 44404 }, { "epoch": 2.18, "grad_norm": 0.7072699069976807, "learning_rate": 0.00010488329496532791, "loss": 2.966, "step": 44405 }, { "epoch": 2.18, "grad_norm": 0.7025425434112549, "learning_rate": 0.00010487160025611305, "loss": 2.966, "step": 44406 }, { "epoch": 2.18, "grad_norm": 0.7028836607933044, "learning_rate": 0.00010485990606082895, "loss": 2.991, "step": 44407 }, { "epoch": 2.18, "grad_norm": 0.7300164103507996, "learning_rate": 0.0001048482123795063, "loss": 2.8152, "step": 44408 }, { "epoch": 2.18, "grad_norm": 0.6703711748123169, "learning_rate": 0.0001048365192121758, "loss": 2.9551, "step": 44409 }, { "epoch": 2.18, "grad_norm": 0.7038151621818542, "learning_rate": 0.00010482482655886841, "loss": 2.5322, "step": 44410 }, { "epoch": 2.18, "grad_norm": 0.7217896580696106, "learning_rate": 0.0001048131344196148, "loss": 2.7781, "step": 44411 }, { "epoch": 2.18, "grad_norm": 0.7087219953536987, "learning_rate": 0.00010480144279444595, "loss": 2.8338, "step": 44412 }, { "epoch": 2.18, "grad_norm": 0.7262712121009827, "learning_rate": 0.0001047897516833924, "loss": 3.0148, "step": 44413 }, { "epoch": 2.18, "grad_norm": 0.6909366846084595, "learning_rate": 0.0001047780610864852, "loss": 2.9738, "step": 44414 }, { "epoch": 2.18, "grad_norm": 0.6847735047340393, "learning_rate": 0.0001047663710037549, "loss": 2.733, "step": 44415 }, { "epoch": 2.18, "grad_norm": 0.7341592907905579, "learning_rate": 0.00010475468143523248, "loss": 2.8791, "step": 44416 }, { "epoch": 2.18, "grad_norm": 0.7717563509941101, "learning_rate": 0.0001047429923809487, "loss": 2.9478, "step": 44417 }, { "epoch": 2.18, "grad_norm": 0.6956798434257507, "learning_rate": 0.00010473130384093417, "loss": 2.9691, "step": 44418 }, { "epoch": 2.18, "grad_norm": 0.7189202904701233, "learning_rate": 0.00010471961581521993, "loss": 3.0525, "step": 44419 }, { "epoch": 2.18, "grad_norm": 0.6681233048439026, "learning_rate": 0.0001047079283038365, "loss": 2.8649, "step": 44420 }, { "epoch": 2.18, "grad_norm": 0.7311338186264038, "learning_rate": 0.00010469624130681485, "loss": 3.015, "step": 44421 }, { "epoch": 2.18, "grad_norm": 0.722417950630188, "learning_rate": 0.00010468455482418577, "loss": 2.8841, "step": 44422 }, { "epoch": 2.18, "grad_norm": 0.6731986403465271, "learning_rate": 0.00010467286885598003, "loss": 2.7704, "step": 44423 }, { "epoch": 2.18, "grad_norm": 0.6961910724639893, "learning_rate": 0.00010466118340222833, "loss": 2.8648, "step": 44424 }, { "epoch": 2.18, "grad_norm": 0.7090938687324524, "learning_rate": 0.00010464949846296138, "loss": 2.8517, "step": 44425 }, { "epoch": 2.18, "grad_norm": 0.690933108329773, "learning_rate": 0.00010463781403821004, "loss": 2.8032, "step": 44426 }, { "epoch": 2.18, "grad_norm": 0.6868634819984436, "learning_rate": 0.00010462613012800524, "loss": 3.0002, "step": 44427 }, { "epoch": 2.18, "grad_norm": 0.6949028968811035, "learning_rate": 0.00010461444673237747, "loss": 2.784, "step": 44428 }, { "epoch": 2.18, "grad_norm": 0.7235555648803711, "learning_rate": 0.00010460276385135778, "loss": 2.958, "step": 44429 }, { "epoch": 2.18, "grad_norm": 0.7306112051010132, "learning_rate": 0.00010459108148497681, "loss": 2.9012, "step": 44430 }, { "epoch": 2.18, "grad_norm": 0.7331579327583313, "learning_rate": 0.0001045793996332652, "loss": 2.8728, "step": 44431 }, { "epoch": 2.18, "grad_norm": 0.7387495636940002, "learning_rate": 0.00010456771829625396, "loss": 2.9061, "step": 44432 }, { "epoch": 2.18, "grad_norm": 0.6881783604621887, "learning_rate": 0.0001045560374739736, "loss": 2.9365, "step": 44433 }, { "epoch": 2.18, "grad_norm": 0.7136433720588684, "learning_rate": 0.00010454435716645518, "loss": 2.8599, "step": 44434 }, { "epoch": 2.18, "grad_norm": 0.6872485280036926, "learning_rate": 0.00010453267737372915, "loss": 2.7754, "step": 44435 }, { "epoch": 2.18, "grad_norm": 0.7080912590026855, "learning_rate": 0.00010452099809582646, "loss": 3.0945, "step": 44436 }, { "epoch": 2.18, "grad_norm": 0.7158118486404419, "learning_rate": 0.00010450931933277795, "loss": 2.9047, "step": 44437 }, { "epoch": 2.18, "grad_norm": 0.6851445436477661, "learning_rate": 0.00010449764108461425, "loss": 2.8758, "step": 44438 }, { "epoch": 2.18, "grad_norm": 0.7490003705024719, "learning_rate": 0.00010448596335136615, "loss": 2.8528, "step": 44439 }, { "epoch": 2.18, "grad_norm": 0.7051994800567627, "learning_rate": 0.00010447428613306428, "loss": 3.0044, "step": 44440 }, { "epoch": 2.18, "grad_norm": 0.7388154864311218, "learning_rate": 0.00010446260942973953, "loss": 3.0324, "step": 44441 }, { "epoch": 2.18, "grad_norm": 0.7008410096168518, "learning_rate": 0.00010445093324142272, "loss": 3.035, "step": 44442 }, { "epoch": 2.18, "grad_norm": 0.6961836218833923, "learning_rate": 0.00010443925756814442, "loss": 3.0496, "step": 44443 }, { "epoch": 2.18, "grad_norm": 0.7032221555709839, "learning_rate": 0.00010442758240993557, "loss": 2.9266, "step": 44444 }, { "epoch": 2.18, "grad_norm": 0.7082571387290955, "learning_rate": 0.00010441590776682686, "loss": 3.0056, "step": 44445 }, { "epoch": 2.18, "grad_norm": 0.7039412260055542, "learning_rate": 0.00010440423363884888, "loss": 2.8115, "step": 44446 }, { "epoch": 2.18, "grad_norm": 0.6998090744018555, "learning_rate": 0.00010439256002603261, "loss": 3.0467, "step": 44447 }, { "epoch": 2.18, "grad_norm": 0.7542231678962708, "learning_rate": 0.0001043808869284086, "loss": 2.9567, "step": 44448 }, { "epoch": 2.18, "grad_norm": 0.7332549095153809, "learning_rate": 0.00010436921434600778, "loss": 2.8982, "step": 44449 }, { "epoch": 2.18, "grad_norm": 0.6957454681396484, "learning_rate": 0.00010435754227886072, "loss": 2.8315, "step": 44450 }, { "epoch": 2.18, "grad_norm": 0.715060293674469, "learning_rate": 0.00010434587072699832, "loss": 2.8012, "step": 44451 }, { "epoch": 2.18, "grad_norm": 0.7155557870864868, "learning_rate": 0.00010433419969045122, "loss": 3.2461, "step": 44452 }, { "epoch": 2.18, "grad_norm": 0.7036327123641968, "learning_rate": 0.00010432252916925012, "loss": 3.0977, "step": 44453 }, { "epoch": 2.18, "grad_norm": 0.7026028037071228, "learning_rate": 0.00010431085916342589, "loss": 2.854, "step": 44454 }, { "epoch": 2.18, "grad_norm": 0.7156589031219482, "learning_rate": 0.00010429918967300912, "loss": 2.8383, "step": 44455 }, { "epoch": 2.18, "grad_norm": 0.7051274180412292, "learning_rate": 0.00010428752069803063, "loss": 2.9757, "step": 44456 }, { "epoch": 2.18, "grad_norm": 0.6463907957077026, "learning_rate": 0.00010427585223852122, "loss": 2.9478, "step": 44457 }, { "epoch": 2.18, "grad_norm": 0.7040029764175415, "learning_rate": 0.00010426418429451144, "loss": 2.6731, "step": 44458 }, { "epoch": 2.18, "grad_norm": 0.7455715537071228, "learning_rate": 0.00010425251686603225, "loss": 2.8721, "step": 44459 }, { "epoch": 2.18, "grad_norm": 0.7634180784225464, "learning_rate": 0.00010424084995311426, "loss": 3.066, "step": 44460 }, { "epoch": 2.18, "grad_norm": 0.7320479154586792, "learning_rate": 0.0001042291835557881, "loss": 2.9601, "step": 44461 }, { "epoch": 2.18, "grad_norm": 0.7387556433677673, "learning_rate": 0.00010421751767408468, "loss": 2.9843, "step": 44462 }, { "epoch": 2.18, "grad_norm": 0.7048659324645996, "learning_rate": 0.00010420585230803452, "loss": 2.8313, "step": 44463 }, { "epoch": 2.18, "grad_norm": 0.7122004628181458, "learning_rate": 0.00010419418745766858, "loss": 2.9771, "step": 44464 }, { "epoch": 2.18, "grad_norm": 0.7229511141777039, "learning_rate": 0.0001041825231230174, "loss": 2.9935, "step": 44465 }, { "epoch": 2.18, "grad_norm": 0.7182523608207703, "learning_rate": 0.00010417085930411185, "loss": 2.9483, "step": 44466 }, { "epoch": 2.18, "grad_norm": 0.7179908156394958, "learning_rate": 0.00010415919600098255, "loss": 2.85, "step": 44467 }, { "epoch": 2.18, "grad_norm": 0.7596323490142822, "learning_rate": 0.00010414753321366015, "loss": 2.7197, "step": 44468 }, { "epoch": 2.18, "grad_norm": 0.7082833051681519, "learning_rate": 0.00010413587094217552, "loss": 2.9153, "step": 44469 }, { "epoch": 2.18, "grad_norm": 0.6668033003807068, "learning_rate": 0.00010412420918655925, "loss": 3.0079, "step": 44470 }, { "epoch": 2.18, "grad_norm": 0.7020968794822693, "learning_rate": 0.00010411254794684212, "loss": 2.8986, "step": 44471 }, { "epoch": 2.18, "grad_norm": 0.6882535815238953, "learning_rate": 0.00010410088722305493, "loss": 2.9372, "step": 44472 }, { "epoch": 2.18, "grad_norm": 0.7956342101097107, "learning_rate": 0.0001040892270152283, "loss": 3.0606, "step": 44473 }, { "epoch": 2.18, "grad_norm": 0.7361034750938416, "learning_rate": 0.00010407756732339293, "loss": 2.9101, "step": 44474 }, { "epoch": 2.18, "grad_norm": 0.7044380903244019, "learning_rate": 0.00010406590814757946, "loss": 2.798, "step": 44475 }, { "epoch": 2.18, "grad_norm": 0.6986120343208313, "learning_rate": 0.00010405424948781869, "loss": 2.7251, "step": 44476 }, { "epoch": 2.18, "grad_norm": 0.712506890296936, "learning_rate": 0.0001040425913441414, "loss": 2.9249, "step": 44477 }, { "epoch": 2.18, "grad_norm": 0.7093831896781921, "learning_rate": 0.00010403093371657807, "loss": 2.7835, "step": 44478 }, { "epoch": 2.18, "grad_norm": 0.7020789384841919, "learning_rate": 0.00010401927660515972, "loss": 2.7917, "step": 44479 }, { "epoch": 2.18, "grad_norm": 0.70029616355896, "learning_rate": 0.00010400762000991675, "loss": 3.0307, "step": 44480 }, { "epoch": 2.18, "grad_norm": 0.7338650226593018, "learning_rate": 0.00010399596393088007, "loss": 3.0209, "step": 44481 }, { "epoch": 2.18, "grad_norm": 0.700697124004364, "learning_rate": 0.00010398430836808032, "loss": 2.859, "step": 44482 }, { "epoch": 2.18, "grad_norm": 0.6929804086685181, "learning_rate": 0.00010397265332154807, "loss": 2.814, "step": 44483 }, { "epoch": 2.18, "grad_norm": 0.6898903250694275, "learning_rate": 0.00010396099879131422, "loss": 2.8862, "step": 44484 }, { "epoch": 2.18, "grad_norm": 0.7237740755081177, "learning_rate": 0.00010394934477740928, "loss": 3.033, "step": 44485 }, { "epoch": 2.18, "grad_norm": 0.6766157150268555, "learning_rate": 0.00010393769127986403, "loss": 2.9624, "step": 44486 }, { "epoch": 2.18, "grad_norm": 0.7159266471862793, "learning_rate": 0.00010392603829870928, "loss": 2.8887, "step": 44487 }, { "epoch": 2.18, "grad_norm": 0.7595884799957275, "learning_rate": 0.0001039143858339756, "loss": 3.0751, "step": 44488 }, { "epoch": 2.18, "grad_norm": 0.6963126063346863, "learning_rate": 0.00010390273388569367, "loss": 2.8867, "step": 44489 }, { "epoch": 2.18, "grad_norm": 0.810042142868042, "learning_rate": 0.00010389108245389412, "loss": 2.8458, "step": 44490 }, { "epoch": 2.18, "grad_norm": 0.7496387362480164, "learning_rate": 0.00010387943153860781, "loss": 2.8515, "step": 44491 }, { "epoch": 2.18, "grad_norm": 0.7184833884239197, "learning_rate": 0.00010386778113986525, "loss": 2.8776, "step": 44492 }, { "epoch": 2.18, "grad_norm": 0.7305948734283447, "learning_rate": 0.00010385613125769719, "loss": 2.8932, "step": 44493 }, { "epoch": 2.18, "grad_norm": 0.7090479135513306, "learning_rate": 0.00010384448189213446, "loss": 2.8305, "step": 44494 }, { "epoch": 2.18, "grad_norm": 0.7010915279388428, "learning_rate": 0.00010383283304320758, "loss": 2.9246, "step": 44495 }, { "epoch": 2.18, "grad_norm": 0.7132809162139893, "learning_rate": 0.0001038211847109473, "loss": 2.831, "step": 44496 }, { "epoch": 2.18, "grad_norm": 0.7011138796806335, "learning_rate": 0.00010380953689538413, "loss": 2.9277, "step": 44497 }, { "epoch": 2.18, "grad_norm": 0.7759160995483398, "learning_rate": 0.00010379788959654889, "loss": 2.8512, "step": 44498 }, { "epoch": 2.18, "grad_norm": 0.7896947264671326, "learning_rate": 0.00010378624281447236, "loss": 2.8438, "step": 44499 }, { "epoch": 2.18, "grad_norm": 0.7089138627052307, "learning_rate": 0.000103774596549185, "loss": 2.9216, "step": 44500 }, { "epoch": 2.18, "grad_norm": 0.7258244752883911, "learning_rate": 0.00010376295080071769, "loss": 2.8647, "step": 44501 }, { "epoch": 2.18, "grad_norm": 0.7142013311386108, "learning_rate": 0.00010375130556910088, "loss": 2.9314, "step": 44502 }, { "epoch": 2.18, "grad_norm": 0.7016758322715759, "learning_rate": 0.00010373966085436548, "loss": 3.0253, "step": 44503 }, { "epoch": 2.18, "grad_norm": 0.7166877388954163, "learning_rate": 0.00010372801665654205, "loss": 2.858, "step": 44504 }, { "epoch": 2.18, "grad_norm": 0.6986833214759827, "learning_rate": 0.00010371637297566112, "loss": 3.0418, "step": 44505 }, { "epoch": 2.18, "grad_norm": 0.7260124683380127, "learning_rate": 0.00010370472981175361, "loss": 2.9055, "step": 44506 }, { "epoch": 2.18, "grad_norm": 0.7071568369865417, "learning_rate": 0.00010369308716484999, "loss": 2.8121, "step": 44507 }, { "epoch": 2.18, "grad_norm": 0.6688234210014343, "learning_rate": 0.00010368144503498097, "loss": 3.0533, "step": 44508 }, { "epoch": 2.18, "grad_norm": 0.6707281470298767, "learning_rate": 0.00010366980342217734, "loss": 3.061, "step": 44509 }, { "epoch": 2.18, "grad_norm": 0.7021904587745667, "learning_rate": 0.00010365816232646965, "loss": 2.7661, "step": 44510 }, { "epoch": 2.18, "grad_norm": 0.7200233936309814, "learning_rate": 0.0001036465217478886, "loss": 3.0572, "step": 44511 }, { "epoch": 2.18, "grad_norm": 0.7074561715126038, "learning_rate": 0.00010363488168646473, "loss": 3.0943, "step": 44512 }, { "epoch": 2.18, "grad_norm": 0.7289097309112549, "learning_rate": 0.00010362324214222876, "loss": 2.9338, "step": 44513 }, { "epoch": 2.18, "grad_norm": 0.7148504257202148, "learning_rate": 0.00010361160311521148, "loss": 3.0845, "step": 44514 }, { "epoch": 2.18, "grad_norm": 0.6821744441986084, "learning_rate": 0.00010359996460544335, "loss": 3.1103, "step": 44515 }, { "epoch": 2.18, "grad_norm": 0.6762775182723999, "learning_rate": 0.00010358832661295524, "loss": 2.9369, "step": 44516 }, { "epoch": 2.18, "grad_norm": 0.7053170204162598, "learning_rate": 0.00010357668913777756, "loss": 2.8441, "step": 44517 }, { "epoch": 2.18, "grad_norm": 0.7255358099937439, "learning_rate": 0.00010356505217994118, "loss": 2.8484, "step": 44518 }, { "epoch": 2.18, "grad_norm": 0.6944178938865662, "learning_rate": 0.00010355341573947665, "loss": 2.8011, "step": 44519 }, { "epoch": 2.18, "grad_norm": 0.722078800201416, "learning_rate": 0.00010354177981641449, "loss": 2.8922, "step": 44520 }, { "epoch": 2.18, "grad_norm": 0.7471203207969666, "learning_rate": 0.00010353014441078558, "loss": 2.8489, "step": 44521 }, { "epoch": 2.18, "grad_norm": 0.6949812173843384, "learning_rate": 0.00010351850952262036, "loss": 2.863, "step": 44522 }, { "epoch": 2.18, "grad_norm": 0.7270025610923767, "learning_rate": 0.0001035068751519496, "loss": 3.0245, "step": 44523 }, { "epoch": 2.18, "grad_norm": 0.7315390110015869, "learning_rate": 0.00010349524129880399, "loss": 2.8959, "step": 44524 }, { "epoch": 2.18, "grad_norm": 0.747600793838501, "learning_rate": 0.00010348360796321411, "loss": 2.7812, "step": 44525 }, { "epoch": 2.18, "grad_norm": 0.7093079686164856, "learning_rate": 0.00010347197514521058, "loss": 2.6684, "step": 44526 }, { "epoch": 2.18, "grad_norm": 0.7230992317199707, "learning_rate": 0.00010346034284482392, "loss": 2.9073, "step": 44527 }, { "epoch": 2.18, "grad_norm": 0.6705207824707031, "learning_rate": 0.00010344871106208492, "loss": 2.9985, "step": 44528 }, { "epoch": 2.18, "grad_norm": 0.6426035761833191, "learning_rate": 0.00010343707979702429, "loss": 2.997, "step": 44529 }, { "epoch": 2.18, "grad_norm": 0.7323707938194275, "learning_rate": 0.00010342544904967247, "loss": 2.7661, "step": 44530 }, { "epoch": 2.18, "grad_norm": 0.7027085423469543, "learning_rate": 0.00010341381882006027, "loss": 2.7511, "step": 44531 }, { "epoch": 2.18, "grad_norm": 0.6900554299354553, "learning_rate": 0.00010340218910821823, "loss": 2.9398, "step": 44532 }, { "epoch": 2.18, "grad_norm": 0.7302149534225464, "learning_rate": 0.00010339055991417688, "loss": 3.0751, "step": 44533 }, { "epoch": 2.18, "grad_norm": 0.7446348071098328, "learning_rate": 0.0001033789312379671, "loss": 2.897, "step": 44534 }, { "epoch": 2.18, "grad_norm": 0.7004778385162354, "learning_rate": 0.00010336730307961925, "loss": 2.9385, "step": 44535 }, { "epoch": 2.18, "grad_norm": 0.6848006844520569, "learning_rate": 0.00010335567543916417, "loss": 3.0406, "step": 44536 }, { "epoch": 2.18, "grad_norm": 0.6743120551109314, "learning_rate": 0.00010334404831663232, "loss": 2.8368, "step": 44537 }, { "epoch": 2.18, "grad_norm": 0.6830146908760071, "learning_rate": 0.00010333242171205436, "loss": 2.7874, "step": 44538 }, { "epoch": 2.18, "grad_norm": 0.6831412315368652, "learning_rate": 0.00010332079562546108, "loss": 2.5951, "step": 44539 }, { "epoch": 2.18, "grad_norm": 0.7036148309707642, "learning_rate": 0.00010330917005688297, "loss": 2.7791, "step": 44540 }, { "epoch": 2.18, "grad_norm": 0.7623158693313599, "learning_rate": 0.00010329754500635067, "loss": 2.9707, "step": 44541 }, { "epoch": 2.18, "grad_norm": 0.6715494394302368, "learning_rate": 0.00010328592047389464, "loss": 3.103, "step": 44542 }, { "epoch": 2.18, "grad_norm": 0.7432985305786133, "learning_rate": 0.00010327429645954567, "loss": 2.717, "step": 44543 }, { "epoch": 2.18, "grad_norm": 0.7097046375274658, "learning_rate": 0.00010326267296333445, "loss": 3.0353, "step": 44544 }, { "epoch": 2.18, "grad_norm": 0.7098416090011597, "learning_rate": 0.00010325104998529136, "loss": 2.8349, "step": 44545 }, { "epoch": 2.18, "grad_norm": 0.719925582408905, "learning_rate": 0.00010323942752544725, "loss": 2.8611, "step": 44546 }, { "epoch": 2.18, "grad_norm": 0.7523378133773804, "learning_rate": 0.0001032278055838326, "loss": 2.8775, "step": 44547 }, { "epoch": 2.18, "grad_norm": 0.7218157649040222, "learning_rate": 0.00010321618416047796, "loss": 2.8731, "step": 44548 }, { "epoch": 2.18, "grad_norm": 0.6982775926589966, "learning_rate": 0.00010320456325541412, "loss": 2.6777, "step": 44549 }, { "epoch": 2.18, "grad_norm": 0.7226266264915466, "learning_rate": 0.0001031929428686715, "loss": 3.0359, "step": 44550 }, { "epoch": 2.18, "grad_norm": 0.7298018932342529, "learning_rate": 0.00010318132300028087, "loss": 2.7433, "step": 44551 }, { "epoch": 2.18, "grad_norm": 0.725264310836792, "learning_rate": 0.00010316970365027267, "loss": 2.873, "step": 44552 }, { "epoch": 2.18, "grad_norm": 0.70024573802948, "learning_rate": 0.00010315808481867765, "loss": 3.1367, "step": 44553 }, { "epoch": 2.18, "grad_norm": 0.6710588335990906, "learning_rate": 0.0001031464665055264, "loss": 2.8933, "step": 44554 }, { "epoch": 2.18, "grad_norm": 0.7196180820465088, "learning_rate": 0.00010313484871084935, "loss": 2.9405, "step": 44555 }, { "epoch": 2.18, "grad_norm": 0.6892144083976746, "learning_rate": 0.00010312323143467734, "loss": 2.879, "step": 44556 }, { "epoch": 2.18, "grad_norm": 0.7082641124725342, "learning_rate": 0.00010311161467704073, "loss": 2.9238, "step": 44557 }, { "epoch": 2.18, "grad_norm": 0.709845244884491, "learning_rate": 0.00010309999843797025, "loss": 2.761, "step": 44558 }, { "epoch": 2.18, "grad_norm": 0.7715518474578857, "learning_rate": 0.00010308838271749657, "loss": 2.8075, "step": 44559 }, { "epoch": 2.18, "grad_norm": 0.6767800450325012, "learning_rate": 0.0001030767675156501, "loss": 2.8401, "step": 44560 }, { "epoch": 2.18, "grad_norm": 0.74344402551651, "learning_rate": 0.00010306515283246161, "loss": 2.967, "step": 44561 }, { "epoch": 2.18, "grad_norm": 0.7046286463737488, "learning_rate": 0.00010305353866796163, "loss": 3.0008, "step": 44562 }, { "epoch": 2.18, "grad_norm": 0.6904906034469604, "learning_rate": 0.00010304192502218059, "loss": 2.8009, "step": 44563 }, { "epoch": 2.18, "grad_norm": 0.7006396055221558, "learning_rate": 0.00010303031189514934, "loss": 2.9043, "step": 44564 }, { "epoch": 2.18, "grad_norm": 0.6934382915496826, "learning_rate": 0.00010301869928689824, "loss": 2.9064, "step": 44565 }, { "epoch": 2.18, "grad_norm": 0.7191388010978699, "learning_rate": 0.00010300708719745807, "loss": 2.9023, "step": 44566 }, { "epoch": 2.18, "grad_norm": 0.6918665170669556, "learning_rate": 0.00010299547562685924, "loss": 2.9055, "step": 44567 }, { "epoch": 2.18, "grad_norm": 0.7260489463806152, "learning_rate": 0.00010298386457513247, "loss": 3.0175, "step": 44568 }, { "epoch": 2.18, "grad_norm": 0.688124418258667, "learning_rate": 0.00010297225404230834, "loss": 2.9031, "step": 44569 }, { "epoch": 2.18, "grad_norm": 0.6864445805549622, "learning_rate": 0.00010296064402841724, "loss": 2.9554, "step": 44570 }, { "epoch": 2.18, "grad_norm": 0.6903675198554993, "learning_rate": 0.00010294903453349, "loss": 2.8524, "step": 44571 }, { "epoch": 2.18, "grad_norm": 0.6836926937103271, "learning_rate": 0.00010293742555755695, "loss": 3.106, "step": 44572 }, { "epoch": 2.18, "grad_norm": 0.6697653532028198, "learning_rate": 0.00010292581710064892, "loss": 2.6941, "step": 44573 }, { "epoch": 2.18, "grad_norm": 0.7098059058189392, "learning_rate": 0.00010291420916279625, "loss": 2.8763, "step": 44574 }, { "epoch": 2.18, "grad_norm": 0.6917982697486877, "learning_rate": 0.00010290260174402974, "loss": 2.9143, "step": 44575 }, { "epoch": 2.18, "grad_norm": 0.7418469786643982, "learning_rate": 0.00010289099484437982, "loss": 2.8845, "step": 44576 }, { "epoch": 2.18, "grad_norm": 0.6864647269248962, "learning_rate": 0.00010287938846387701, "loss": 2.9388, "step": 44577 }, { "epoch": 2.18, "grad_norm": 0.6718887686729431, "learning_rate": 0.00010286778260255203, "loss": 3.0018, "step": 44578 }, { "epoch": 2.18, "grad_norm": 0.7265290021896362, "learning_rate": 0.00010285617726043529, "loss": 2.9683, "step": 44579 }, { "epoch": 2.18, "grad_norm": 0.6988957524299622, "learning_rate": 0.00010284457243755743, "loss": 2.8479, "step": 44580 }, { "epoch": 2.18, "grad_norm": 0.7348703145980835, "learning_rate": 0.00010283296813394911, "loss": 2.7909, "step": 44581 }, { "epoch": 2.18, "grad_norm": 0.7384020090103149, "learning_rate": 0.00010282136434964072, "loss": 2.8118, "step": 44582 }, { "epoch": 2.18, "grad_norm": 0.7310593724250793, "learning_rate": 0.00010280976108466301, "loss": 2.8275, "step": 44583 }, { "epoch": 2.18, "grad_norm": 0.7117740511894226, "learning_rate": 0.00010279815833904645, "loss": 2.8532, "step": 44584 }, { "epoch": 2.19, "grad_norm": 0.6952208280563354, "learning_rate": 0.0001027865561128215, "loss": 2.6818, "step": 44585 }, { "epoch": 2.19, "grad_norm": 0.6998794078826904, "learning_rate": 0.00010277495440601887, "loss": 2.7961, "step": 44586 }, { "epoch": 2.19, "grad_norm": 0.71541827917099, "learning_rate": 0.00010276335321866896, "loss": 3.0561, "step": 44587 }, { "epoch": 2.19, "grad_norm": 0.7112535834312439, "learning_rate": 0.00010275175255080251, "loss": 2.9963, "step": 44588 }, { "epoch": 2.19, "grad_norm": 0.7849178314208984, "learning_rate": 0.00010274015240244992, "loss": 2.8191, "step": 44589 }, { "epoch": 2.19, "grad_norm": 0.7260420918464661, "learning_rate": 0.00010272855277364188, "loss": 2.6095, "step": 44590 }, { "epoch": 2.19, "grad_norm": 0.6618551015853882, "learning_rate": 0.00010271695366440885, "loss": 2.9215, "step": 44591 }, { "epoch": 2.19, "grad_norm": 0.6712572574615479, "learning_rate": 0.00010270535507478135, "loss": 2.9213, "step": 44592 }, { "epoch": 2.19, "grad_norm": 0.7594893574714661, "learning_rate": 0.00010269375700479002, "loss": 2.7903, "step": 44593 }, { "epoch": 2.19, "grad_norm": 0.7108433246612549, "learning_rate": 0.00010268215945446529, "loss": 2.9075, "step": 44594 }, { "epoch": 2.19, "grad_norm": 0.725123941898346, "learning_rate": 0.00010267056242383777, "loss": 2.88, "step": 44595 }, { "epoch": 2.19, "grad_norm": 0.6654282808303833, "learning_rate": 0.0001026589659129381, "loss": 2.8843, "step": 44596 }, { "epoch": 2.19, "grad_norm": 0.751349687576294, "learning_rate": 0.00010264736992179675, "loss": 3.0375, "step": 44597 }, { "epoch": 2.19, "grad_norm": 0.7495384216308594, "learning_rate": 0.00010263577445044425, "loss": 2.9641, "step": 44598 }, { "epoch": 2.19, "grad_norm": 0.7475329041481018, "learning_rate": 0.00010262417949891104, "loss": 2.908, "step": 44599 }, { "epoch": 2.19, "grad_norm": 0.6864625811576843, "learning_rate": 0.00010261258506722772, "loss": 2.9052, "step": 44600 }, { "epoch": 2.19, "grad_norm": 0.6639381647109985, "learning_rate": 0.00010260099115542499, "loss": 2.8416, "step": 44601 }, { "epoch": 2.19, "grad_norm": 0.7066051959991455, "learning_rate": 0.00010258939776353316, "loss": 2.7455, "step": 44602 }, { "epoch": 2.19, "grad_norm": 0.6839228868484497, "learning_rate": 0.00010257780489158295, "loss": 3.0094, "step": 44603 }, { "epoch": 2.19, "grad_norm": 0.7170974016189575, "learning_rate": 0.00010256621253960474, "loss": 2.9012, "step": 44604 }, { "epoch": 2.19, "grad_norm": 0.7041524052619934, "learning_rate": 0.00010255462070762921, "loss": 3.0658, "step": 44605 }, { "epoch": 2.19, "grad_norm": 0.6791936159133911, "learning_rate": 0.0001025430293956868, "loss": 2.9157, "step": 44606 }, { "epoch": 2.19, "grad_norm": 0.7103440165519714, "learning_rate": 0.00010253143860380793, "loss": 2.9272, "step": 44607 }, { "epoch": 2.19, "grad_norm": 0.6542874574661255, "learning_rate": 0.00010251984833202337, "loss": 2.9294, "step": 44608 }, { "epoch": 2.19, "grad_norm": 0.7594596147537231, "learning_rate": 0.00010250825858036346, "loss": 2.8874, "step": 44609 }, { "epoch": 2.19, "grad_norm": 0.7474169731140137, "learning_rate": 0.00010249666934885874, "loss": 2.8206, "step": 44610 }, { "epoch": 2.19, "grad_norm": 0.7913607954978943, "learning_rate": 0.0001024850806375399, "loss": 2.8971, "step": 44611 }, { "epoch": 2.19, "grad_norm": 0.6927233934402466, "learning_rate": 0.00010247349244643734, "loss": 2.7587, "step": 44612 }, { "epoch": 2.19, "grad_norm": 0.6824236512184143, "learning_rate": 0.0001024619047755816, "loss": 2.9516, "step": 44613 }, { "epoch": 2.19, "grad_norm": 0.7336830496788025, "learning_rate": 0.00010245031762500306, "loss": 2.9245, "step": 44614 }, { "epoch": 2.19, "grad_norm": 0.7069776654243469, "learning_rate": 0.0001024387309947324, "loss": 2.7558, "step": 44615 }, { "epoch": 2.19, "grad_norm": 0.6687659621238708, "learning_rate": 0.00010242714488480018, "loss": 2.8424, "step": 44616 }, { "epoch": 2.19, "grad_norm": 0.7121585607528687, "learning_rate": 0.00010241555929523673, "loss": 2.8238, "step": 44617 }, { "epoch": 2.19, "grad_norm": 0.6747063398361206, "learning_rate": 0.00010240397422607277, "loss": 3.1629, "step": 44618 }, { "epoch": 2.19, "grad_norm": 0.7251124978065491, "learning_rate": 0.00010239238967733865, "loss": 2.8919, "step": 44619 }, { "epoch": 2.19, "grad_norm": 0.6380995512008667, "learning_rate": 0.000102380805649065, "loss": 2.9561, "step": 44620 }, { "epoch": 2.19, "grad_norm": 0.7634221315383911, "learning_rate": 0.0001023692221412823, "loss": 2.9855, "step": 44621 }, { "epoch": 2.19, "grad_norm": 0.6780490875244141, "learning_rate": 0.00010235763915402094, "loss": 2.9611, "step": 44622 }, { "epoch": 2.19, "grad_norm": 0.715461254119873, "learning_rate": 0.0001023460566873116, "loss": 2.8342, "step": 44623 }, { "epoch": 2.19, "grad_norm": 0.6985108852386475, "learning_rate": 0.0001023344747411846, "loss": 2.9235, "step": 44624 }, { "epoch": 2.19, "grad_norm": 0.7491456866264343, "learning_rate": 0.00010232289331567057, "loss": 2.9031, "step": 44625 }, { "epoch": 2.19, "grad_norm": 0.709112823009491, "learning_rate": 0.00010231131241080009, "loss": 2.8829, "step": 44626 }, { "epoch": 2.19, "grad_norm": 0.6923708319664001, "learning_rate": 0.00010229973202660358, "loss": 3.0084, "step": 44627 }, { "epoch": 2.19, "grad_norm": 0.7466208934783936, "learning_rate": 0.00010228815216311152, "loss": 2.9558, "step": 44628 }, { "epoch": 2.19, "grad_norm": 0.7512304782867432, "learning_rate": 0.00010227657282035431, "loss": 2.763, "step": 44629 }, { "epoch": 2.19, "grad_norm": 0.7179058790206909, "learning_rate": 0.00010226499399836256, "loss": 2.9326, "step": 44630 }, { "epoch": 2.19, "grad_norm": 0.7273489236831665, "learning_rate": 0.00010225341569716686, "loss": 2.8466, "step": 44631 }, { "epoch": 2.19, "grad_norm": 0.6925496459007263, "learning_rate": 0.00010224183791679751, "loss": 3.0426, "step": 44632 }, { "epoch": 2.19, "grad_norm": 0.7936264276504517, "learning_rate": 0.00010223026065728517, "loss": 2.8253, "step": 44633 }, { "epoch": 2.19, "grad_norm": 0.7013174295425415, "learning_rate": 0.00010221868391866031, "loss": 2.9166, "step": 44634 }, { "epoch": 2.19, "grad_norm": 0.7175893187522888, "learning_rate": 0.00010220710770095326, "loss": 2.8768, "step": 44635 }, { "epoch": 2.19, "grad_norm": 0.6898190975189209, "learning_rate": 0.00010219553200419469, "loss": 2.7969, "step": 44636 }, { "epoch": 2.19, "grad_norm": 0.7262145280838013, "learning_rate": 0.00010218395682841494, "loss": 2.6368, "step": 44637 }, { "epoch": 2.19, "grad_norm": 0.693634033203125, "learning_rate": 0.00010217238217364471, "loss": 3.0816, "step": 44638 }, { "epoch": 2.19, "grad_norm": 0.7244294285774231, "learning_rate": 0.0001021608080399142, "loss": 2.8939, "step": 44639 }, { "epoch": 2.19, "grad_norm": 0.6989253163337708, "learning_rate": 0.00010214923442725407, "loss": 2.8912, "step": 44640 }, { "epoch": 2.19, "grad_norm": 0.694697380065918, "learning_rate": 0.00010213766133569488, "loss": 2.85, "step": 44641 }, { "epoch": 2.19, "grad_norm": 0.7070184350013733, "learning_rate": 0.000102126088765267, "loss": 3.0055, "step": 44642 }, { "epoch": 2.19, "grad_norm": 0.7489998936653137, "learning_rate": 0.00010211451671600095, "loss": 2.6904, "step": 44643 }, { "epoch": 2.19, "grad_norm": 0.7626986503601074, "learning_rate": 0.00010210294518792708, "loss": 2.8428, "step": 44644 }, { "epoch": 2.19, "grad_norm": 0.6796693801879883, "learning_rate": 0.00010209137418107604, "loss": 3.1733, "step": 44645 }, { "epoch": 2.19, "grad_norm": 0.7073202133178711, "learning_rate": 0.00010207980369547814, "loss": 3.0606, "step": 44646 }, { "epoch": 2.19, "grad_norm": 0.7209928035736084, "learning_rate": 0.00010206823373116391, "loss": 2.6608, "step": 44647 }, { "epoch": 2.19, "grad_norm": 0.6928178668022156, "learning_rate": 0.00010205666428816403, "loss": 2.8192, "step": 44648 }, { "epoch": 2.19, "grad_norm": 0.8096620440483093, "learning_rate": 0.00010204509536650876, "loss": 2.9148, "step": 44649 }, { "epoch": 2.19, "grad_norm": 0.75314861536026, "learning_rate": 0.00010203352696622862, "loss": 2.6021, "step": 44650 }, { "epoch": 2.19, "grad_norm": 0.7307797074317932, "learning_rate": 0.00010202195908735396, "loss": 2.9043, "step": 44651 }, { "epoch": 2.19, "grad_norm": 0.7347580790519714, "learning_rate": 0.00010201039172991536, "loss": 2.7061, "step": 44652 }, { "epoch": 2.19, "grad_norm": 0.7171032428741455, "learning_rate": 0.00010199882489394339, "loss": 3.0612, "step": 44653 }, { "epoch": 2.19, "grad_norm": 0.7112158536911011, "learning_rate": 0.0001019872585794683, "loss": 2.8196, "step": 44654 }, { "epoch": 2.19, "grad_norm": 0.6790067553520203, "learning_rate": 0.0001019756927865208, "loss": 2.8819, "step": 44655 }, { "epoch": 2.19, "grad_norm": 0.7215171456336975, "learning_rate": 0.00010196412751513117, "loss": 2.9303, "step": 44656 }, { "epoch": 2.19, "grad_norm": 0.7309430241584778, "learning_rate": 0.00010195256276532984, "loss": 3.0757, "step": 44657 }, { "epoch": 2.19, "grad_norm": 0.7545703649520874, "learning_rate": 0.00010194099853714742, "loss": 2.9345, "step": 44658 }, { "epoch": 2.19, "grad_norm": 0.7092612981796265, "learning_rate": 0.00010192943483061423, "loss": 2.8953, "step": 44659 }, { "epoch": 2.19, "grad_norm": 0.6743860840797424, "learning_rate": 0.00010191787164576088, "loss": 3.0049, "step": 44660 }, { "epoch": 2.19, "grad_norm": 0.676878035068512, "learning_rate": 0.00010190630898261759, "loss": 2.8814, "step": 44661 }, { "epoch": 2.19, "grad_norm": 0.7018594741821289, "learning_rate": 0.000101894746841215, "loss": 2.926, "step": 44662 }, { "epoch": 2.19, "grad_norm": 0.6919565200805664, "learning_rate": 0.00010188318522158362, "loss": 3.0665, "step": 44663 }, { "epoch": 2.19, "grad_norm": 0.7357789278030396, "learning_rate": 0.00010187162412375378, "loss": 2.9906, "step": 44664 }, { "epoch": 2.19, "grad_norm": 0.7537810802459717, "learning_rate": 0.00010186006354775598, "loss": 2.9191, "step": 44665 }, { "epoch": 2.19, "grad_norm": 0.7341231107711792, "learning_rate": 0.00010184850349362051, "loss": 3.0268, "step": 44666 }, { "epoch": 2.19, "grad_norm": 0.6830435991287231, "learning_rate": 0.00010183694396137797, "loss": 2.8872, "step": 44667 }, { "epoch": 2.19, "grad_norm": 0.7135305404663086, "learning_rate": 0.00010182538495105886, "loss": 2.8633, "step": 44668 }, { "epoch": 2.19, "grad_norm": 0.7441238164901733, "learning_rate": 0.00010181382646269347, "loss": 2.9731, "step": 44669 }, { "epoch": 2.19, "grad_norm": 0.7147581577301025, "learning_rate": 0.00010180226849631244, "loss": 2.7636, "step": 44670 }, { "epoch": 2.19, "grad_norm": 0.7436352968215942, "learning_rate": 0.00010179071105194603, "loss": 2.9936, "step": 44671 }, { "epoch": 2.19, "grad_norm": 0.7712357640266418, "learning_rate": 0.0001017791541296247, "loss": 2.7682, "step": 44672 }, { "epoch": 2.19, "grad_norm": 0.6808838248252869, "learning_rate": 0.00010176759772937898, "loss": 2.7931, "step": 44673 }, { "epoch": 2.19, "grad_norm": 0.6870922446250916, "learning_rate": 0.0001017560418512392, "loss": 2.9253, "step": 44674 }, { "epoch": 2.19, "grad_norm": 0.7315462827682495, "learning_rate": 0.0001017444864952359, "loss": 3.0074, "step": 44675 }, { "epoch": 2.19, "grad_norm": 0.7336083054542542, "learning_rate": 0.00010173293166139942, "loss": 2.8301, "step": 44676 }, { "epoch": 2.19, "grad_norm": 0.7012789845466614, "learning_rate": 0.00010172137734976032, "loss": 2.7756, "step": 44677 }, { "epoch": 2.19, "grad_norm": 0.704544723033905, "learning_rate": 0.00010170982356034896, "loss": 2.9755, "step": 44678 }, { "epoch": 2.19, "grad_norm": 0.6943780183792114, "learning_rate": 0.00010169827029319562, "loss": 2.9111, "step": 44679 }, { "epoch": 2.19, "grad_norm": 0.7300577759742737, "learning_rate": 0.00010168671754833104, "loss": 2.8051, "step": 44680 }, { "epoch": 2.19, "grad_norm": 0.7285476922988892, "learning_rate": 0.00010167516532578535, "loss": 2.6524, "step": 44681 }, { "epoch": 2.19, "grad_norm": 0.7168903946876526, "learning_rate": 0.00010166361362558908, "loss": 2.9434, "step": 44682 }, { "epoch": 2.19, "grad_norm": 0.6996946930885315, "learning_rate": 0.00010165206244777282, "loss": 2.8507, "step": 44683 }, { "epoch": 2.19, "grad_norm": 0.6721283197402954, "learning_rate": 0.00010164051179236674, "loss": 2.8241, "step": 44684 }, { "epoch": 2.19, "grad_norm": 0.6588842272758484, "learning_rate": 0.00010162896165940148, "loss": 2.8318, "step": 44685 }, { "epoch": 2.19, "grad_norm": 0.6902130842208862, "learning_rate": 0.00010161741204890734, "loss": 2.8502, "step": 44686 }, { "epoch": 2.19, "grad_norm": 0.6781286001205444, "learning_rate": 0.00010160586296091468, "loss": 3.0355, "step": 44687 }, { "epoch": 2.19, "grad_norm": 0.7113543748855591, "learning_rate": 0.00010159431439545409, "loss": 2.9687, "step": 44688 }, { "epoch": 2.19, "grad_norm": 0.737820029258728, "learning_rate": 0.00010158276635255579, "loss": 2.955, "step": 44689 }, { "epoch": 2.19, "grad_norm": 0.7128449082374573, "learning_rate": 0.0001015712188322504, "loss": 2.8879, "step": 44690 }, { "epoch": 2.19, "grad_norm": 0.6655691862106323, "learning_rate": 0.0001015596718345681, "loss": 2.86, "step": 44691 }, { "epoch": 2.19, "grad_norm": 0.7450001239776611, "learning_rate": 0.00010154812535953957, "loss": 2.9949, "step": 44692 }, { "epoch": 2.19, "grad_norm": 0.6946448087692261, "learning_rate": 0.0001015365794071951, "loss": 3.2081, "step": 44693 }, { "epoch": 2.19, "grad_norm": 0.6991720199584961, "learning_rate": 0.00010152503397756493, "loss": 2.9359, "step": 44694 }, { "epoch": 2.19, "grad_norm": 0.7270272970199585, "learning_rate": 0.00010151348907067978, "loss": 2.7, "step": 44695 }, { "epoch": 2.19, "grad_norm": 0.6899583339691162, "learning_rate": 0.00010150194468656975, "loss": 3.1077, "step": 44696 }, { "epoch": 2.19, "grad_norm": 0.7620866894721985, "learning_rate": 0.00010149040082526541, "loss": 2.9878, "step": 44697 }, { "epoch": 2.19, "grad_norm": 0.6977541446685791, "learning_rate": 0.0001014788574867973, "loss": 2.9698, "step": 44698 }, { "epoch": 2.19, "grad_norm": 0.7355969548225403, "learning_rate": 0.00010146731467119562, "loss": 2.9087, "step": 44699 }, { "epoch": 2.19, "grad_norm": 0.6769136190414429, "learning_rate": 0.00010145577237849085, "loss": 2.988, "step": 44700 }, { "epoch": 2.19, "grad_norm": 0.6845611333847046, "learning_rate": 0.00010144423060871322, "loss": 2.7466, "step": 44701 }, { "epoch": 2.19, "grad_norm": 0.6977527141571045, "learning_rate": 0.00010143268936189332, "loss": 2.8652, "step": 44702 }, { "epoch": 2.19, "grad_norm": 0.7107285261154175, "learning_rate": 0.00010142114863806158, "loss": 3.1307, "step": 44703 }, { "epoch": 2.19, "grad_norm": 0.7218692898750305, "learning_rate": 0.00010140960843724822, "loss": 2.9057, "step": 44704 }, { "epoch": 2.19, "grad_norm": 0.6744379997253418, "learning_rate": 0.00010139806875948385, "loss": 2.8445, "step": 44705 }, { "epoch": 2.19, "grad_norm": 0.7107676267623901, "learning_rate": 0.00010138652960479864, "loss": 3.0127, "step": 44706 }, { "epoch": 2.19, "grad_norm": 0.7380881309509277, "learning_rate": 0.00010137499097322315, "loss": 2.743, "step": 44707 }, { "epoch": 2.19, "grad_norm": 0.6708388924598694, "learning_rate": 0.00010136345286478777, "loss": 2.9925, "step": 44708 }, { "epoch": 2.19, "grad_norm": 0.7307834625244141, "learning_rate": 0.0001013519152795227, "loss": 2.9303, "step": 44709 }, { "epoch": 2.19, "grad_norm": 0.7044564485549927, "learning_rate": 0.00010134037821745853, "loss": 2.8851, "step": 44710 }, { "epoch": 2.19, "grad_norm": 0.7049198746681213, "learning_rate": 0.00010132884167862551, "loss": 2.8379, "step": 44711 }, { "epoch": 2.19, "grad_norm": 0.7499628067016602, "learning_rate": 0.00010131730566305404, "loss": 2.8606, "step": 44712 }, { "epoch": 2.19, "grad_norm": 0.7331987023353577, "learning_rate": 0.00010130577017077468, "loss": 3.0568, "step": 44713 }, { "epoch": 2.19, "grad_norm": 0.7273868322372437, "learning_rate": 0.00010129423520181768, "loss": 2.8243, "step": 44714 }, { "epoch": 2.19, "grad_norm": 0.7468082308769226, "learning_rate": 0.00010128270075621343, "loss": 2.9944, "step": 44715 }, { "epoch": 2.19, "grad_norm": 0.6693267226219177, "learning_rate": 0.00010127116683399219, "loss": 2.9631, "step": 44716 }, { "epoch": 2.19, "grad_norm": 0.7424618005752563, "learning_rate": 0.00010125963343518446, "loss": 3.0344, "step": 44717 }, { "epoch": 2.19, "grad_norm": 0.6935816407203674, "learning_rate": 0.00010124810055982069, "loss": 3.0167, "step": 44718 }, { "epoch": 2.19, "grad_norm": 0.7153097987174988, "learning_rate": 0.00010123656820793108, "loss": 3.0504, "step": 44719 }, { "epoch": 2.19, "grad_norm": 0.7567762136459351, "learning_rate": 0.0001012250363795462, "loss": 2.9657, "step": 44720 }, { "epoch": 2.19, "grad_norm": 0.7075486183166504, "learning_rate": 0.00010121350507469632, "loss": 2.9507, "step": 44721 }, { "epoch": 2.19, "grad_norm": 0.7259413599967957, "learning_rate": 0.00010120197429341169, "loss": 2.8704, "step": 44722 }, { "epoch": 2.19, "grad_norm": 0.6765598654747009, "learning_rate": 0.00010119044403572293, "loss": 2.7903, "step": 44723 }, { "epoch": 2.19, "grad_norm": 0.6906706690788269, "learning_rate": 0.00010117891430166018, "loss": 2.7619, "step": 44724 }, { "epoch": 2.19, "grad_norm": 0.7371125221252441, "learning_rate": 0.00010116738509125397, "loss": 2.7714, "step": 44725 }, { "epoch": 2.19, "grad_norm": 0.711427628993988, "learning_rate": 0.00010115585640453453, "loss": 2.9452, "step": 44726 }, { "epoch": 2.19, "grad_norm": 0.6924580931663513, "learning_rate": 0.00010114432824153239, "loss": 2.8359, "step": 44727 }, { "epoch": 2.19, "grad_norm": 0.7518722414970398, "learning_rate": 0.00010113280060227773, "loss": 2.8871, "step": 44728 }, { "epoch": 2.19, "grad_norm": 0.671915590763092, "learning_rate": 0.00010112127348680109, "loss": 3.0578, "step": 44729 }, { "epoch": 2.19, "grad_norm": 0.7346581220626831, "learning_rate": 0.00010110974689513275, "loss": 2.9824, "step": 44730 }, { "epoch": 2.19, "grad_norm": 0.6971009373664856, "learning_rate": 0.00010109822082730295, "loss": 2.8534, "step": 44731 }, { "epoch": 2.19, "grad_norm": 0.7431304454803467, "learning_rate": 0.00010108669528334225, "loss": 2.9716, "step": 44732 }, { "epoch": 2.19, "grad_norm": 0.7102420330047607, "learning_rate": 0.00010107517026328083, "loss": 3.0343, "step": 44733 }, { "epoch": 2.19, "grad_norm": 0.7316977381706238, "learning_rate": 0.0001010636457671491, "loss": 3.0913, "step": 44734 }, { "epoch": 2.19, "grad_norm": 0.705828845500946, "learning_rate": 0.00010105212179497757, "loss": 3.0188, "step": 44735 }, { "epoch": 2.19, "grad_norm": 0.7134692072868347, "learning_rate": 0.00010104059834679643, "loss": 3.1093, "step": 44736 }, { "epoch": 2.19, "grad_norm": 0.7043414115905762, "learning_rate": 0.00010102907542263608, "loss": 2.8323, "step": 44737 }, { "epoch": 2.19, "grad_norm": 0.694769561290741, "learning_rate": 0.00010101755302252678, "loss": 2.843, "step": 44738 }, { "epoch": 2.19, "grad_norm": 0.7042750120162964, "learning_rate": 0.00010100603114649892, "loss": 2.9738, "step": 44739 }, { "epoch": 2.19, "grad_norm": 0.7265387177467346, "learning_rate": 0.000100994509794583, "loss": 2.8053, "step": 44740 }, { "epoch": 2.19, "grad_norm": 0.743010401725769, "learning_rate": 0.00010098298896680912, "loss": 2.7329, "step": 44741 }, { "epoch": 2.19, "grad_norm": 0.6783542633056641, "learning_rate": 0.00010097146866320784, "loss": 3.1741, "step": 44742 }, { "epoch": 2.19, "grad_norm": 0.6887816190719604, "learning_rate": 0.00010095994888380932, "loss": 2.9669, "step": 44743 }, { "epoch": 2.19, "grad_norm": 0.6920436024665833, "learning_rate": 0.00010094842962864411, "loss": 3.0444, "step": 44744 }, { "epoch": 2.19, "grad_norm": 0.6976600289344788, "learning_rate": 0.0001009369108977424, "loss": 3.287, "step": 44745 }, { "epoch": 2.19, "grad_norm": 0.7294341921806335, "learning_rate": 0.00010092539269113444, "loss": 2.784, "step": 44746 }, { "epoch": 2.19, "grad_norm": 0.687296986579895, "learning_rate": 0.00010091387500885083, "loss": 2.8358, "step": 44747 }, { "epoch": 2.19, "grad_norm": 0.7157872319221497, "learning_rate": 0.00010090235785092164, "loss": 2.8321, "step": 44748 }, { "epoch": 2.19, "grad_norm": 0.6645101308822632, "learning_rate": 0.0001008908412173773, "loss": 3.1118, "step": 44749 }, { "epoch": 2.19, "grad_norm": 0.7363380193710327, "learning_rate": 0.00010087932510824829, "loss": 2.7542, "step": 44750 }, { "epoch": 2.19, "grad_norm": 0.732443630695343, "learning_rate": 0.00010086780952356481, "loss": 3.1261, "step": 44751 }, { "epoch": 2.19, "grad_norm": 0.7466281652450562, "learning_rate": 0.00010085629446335718, "loss": 2.8356, "step": 44752 }, { "epoch": 2.19, "grad_norm": 0.705704391002655, "learning_rate": 0.00010084477992765566, "loss": 2.7202, "step": 44753 }, { "epoch": 2.19, "grad_norm": 0.6960504055023193, "learning_rate": 0.00010083326591649065, "loss": 2.9017, "step": 44754 }, { "epoch": 2.19, "grad_norm": 0.7246660590171814, "learning_rate": 0.0001008217524298926, "loss": 2.9993, "step": 44755 }, { "epoch": 2.19, "grad_norm": 0.7061540484428406, "learning_rate": 0.00010081023946789163, "loss": 2.8255, "step": 44756 }, { "epoch": 2.19, "grad_norm": 0.7059864401817322, "learning_rate": 0.00010079872703051826, "loss": 2.7661, "step": 44757 }, { "epoch": 2.19, "grad_norm": 0.7149134278297424, "learning_rate": 0.00010078721511780266, "loss": 2.8085, "step": 44758 }, { "epoch": 2.19, "grad_norm": 0.6905004978179932, "learning_rate": 0.00010077570372977513, "loss": 2.9624, "step": 44759 }, { "epoch": 2.19, "grad_norm": 0.7025719285011292, "learning_rate": 0.00010076419286646614, "loss": 2.9586, "step": 44760 }, { "epoch": 2.19, "grad_norm": 0.7020415663719177, "learning_rate": 0.00010075268252790586, "loss": 2.8406, "step": 44761 }, { "epoch": 2.19, "grad_norm": 0.7688156366348267, "learning_rate": 0.00010074117271412473, "loss": 2.9296, "step": 44762 }, { "epoch": 2.19, "grad_norm": 0.7104856967926025, "learning_rate": 0.0001007296634251529, "loss": 2.875, "step": 44763 }, { "epoch": 2.19, "grad_norm": 0.7477237582206726, "learning_rate": 0.0001007181546610208, "loss": 2.8623, "step": 44764 }, { "epoch": 2.19, "grad_norm": 0.7128356099128723, "learning_rate": 0.00010070664642175885, "loss": 2.9696, "step": 44765 }, { "epoch": 2.19, "grad_norm": 0.730711042881012, "learning_rate": 0.0001006951387073972, "loss": 2.933, "step": 44766 }, { "epoch": 2.19, "grad_norm": 0.7141094207763672, "learning_rate": 0.0001006836315179662, "loss": 2.9074, "step": 44767 }, { "epoch": 2.19, "grad_norm": 0.6910660266876221, "learning_rate": 0.00010067212485349609, "loss": 2.9853, "step": 44768 }, { "epoch": 2.19, "grad_norm": 0.6767820715904236, "learning_rate": 0.00010066061871401721, "loss": 2.9947, "step": 44769 }, { "epoch": 2.19, "grad_norm": 0.7095887660980225, "learning_rate": 0.00010064911309956001, "loss": 2.7702, "step": 44770 }, { "epoch": 2.19, "grad_norm": 0.7199665904045105, "learning_rate": 0.00010063760801015457, "loss": 2.8923, "step": 44771 }, { "epoch": 2.19, "grad_norm": 0.7285208702087402, "learning_rate": 0.0001006261034458314, "loss": 2.9582, "step": 44772 }, { "epoch": 2.19, "grad_norm": 0.7214688658714294, "learning_rate": 0.00010061459940662073, "loss": 2.7663, "step": 44773 }, { "epoch": 2.19, "grad_norm": 0.7294226288795471, "learning_rate": 0.00010060309589255273, "loss": 3.0265, "step": 44774 }, { "epoch": 2.19, "grad_norm": 0.7086969614028931, "learning_rate": 0.00010059159290365788, "loss": 2.8846, "step": 44775 }, { "epoch": 2.19, "grad_norm": 0.7389560341835022, "learning_rate": 0.00010058009043996628, "loss": 2.8932, "step": 44776 }, { "epoch": 2.19, "grad_norm": 0.6759814620018005, "learning_rate": 0.00010056858850150849, "loss": 2.9601, "step": 44777 }, { "epoch": 2.19, "grad_norm": 0.7065625786781311, "learning_rate": 0.00010055708708831451, "loss": 2.806, "step": 44778 }, { "epoch": 2.19, "grad_norm": 0.7095101475715637, "learning_rate": 0.00010054558620041492, "loss": 2.9055, "step": 44779 }, { "epoch": 2.19, "grad_norm": 0.791003942489624, "learning_rate": 0.00010053408583783984, "loss": 2.9966, "step": 44780 }, { "epoch": 2.19, "grad_norm": 0.7229596376419067, "learning_rate": 0.00010052258600061953, "loss": 2.7862, "step": 44781 }, { "epoch": 2.19, "grad_norm": 0.7416842579841614, "learning_rate": 0.0001005110866887844, "loss": 2.8895, "step": 44782 }, { "epoch": 2.19, "grad_norm": 0.737678050994873, "learning_rate": 0.0001004995879023646, "loss": 3.0674, "step": 44783 }, { "epoch": 2.19, "grad_norm": 0.6982100605964661, "learning_rate": 0.00010048808964139045, "loss": 2.7872, "step": 44784 }, { "epoch": 2.19, "grad_norm": 0.7631065845489502, "learning_rate": 0.00010047659190589243, "loss": 2.8958, "step": 44785 }, { "epoch": 2.19, "grad_norm": 0.6911045908927917, "learning_rate": 0.00010046509469590054, "loss": 2.7337, "step": 44786 }, { "epoch": 2.19, "grad_norm": 0.6925402283668518, "learning_rate": 0.00010045359801144526, "loss": 2.8405, "step": 44787 }, { "epoch": 2.19, "grad_norm": 0.7005817890167236, "learning_rate": 0.00010044210185255685, "loss": 3.0554, "step": 44788 }, { "epoch": 2.2, "grad_norm": 0.7038406133651733, "learning_rate": 0.0001004306062192654, "loss": 2.8066, "step": 44789 }, { "epoch": 2.2, "grad_norm": 0.7171939015388489, "learning_rate": 0.00010041911111160143, "loss": 2.9208, "step": 44790 }, { "epoch": 2.2, "grad_norm": 0.757029116153717, "learning_rate": 0.00010040761652959505, "loss": 2.892, "step": 44791 }, { "epoch": 2.2, "grad_norm": 0.703034520149231, "learning_rate": 0.00010039612247327666, "loss": 2.908, "step": 44792 }, { "epoch": 2.2, "grad_norm": 0.7032923698425293, "learning_rate": 0.00010038462894267639, "loss": 2.9547, "step": 44793 }, { "epoch": 2.2, "grad_norm": 0.7094815373420715, "learning_rate": 0.00010037313593782467, "loss": 2.9108, "step": 44794 }, { "epoch": 2.2, "grad_norm": 0.6859133839607239, "learning_rate": 0.00010036164345875168, "loss": 2.9368, "step": 44795 }, { "epoch": 2.2, "grad_norm": 0.7258820533752441, "learning_rate": 0.00010035015150548763, "loss": 2.901, "step": 44796 }, { "epoch": 2.2, "grad_norm": 0.7605149149894714, "learning_rate": 0.00010033866007806296, "loss": 2.7665, "step": 44797 }, { "epoch": 2.2, "grad_norm": 0.7210118174552917, "learning_rate": 0.00010032716917650771, "loss": 3.0509, "step": 44798 }, { "epoch": 2.2, "grad_norm": 0.7114245891571045, "learning_rate": 0.0001003156788008523, "loss": 2.6478, "step": 44799 }, { "epoch": 2.2, "grad_norm": 0.7188319563865662, "learning_rate": 0.00010030418895112701, "loss": 2.9248, "step": 44800 }, { "epoch": 2.2, "grad_norm": 0.7011023759841919, "learning_rate": 0.00010029269962736209, "loss": 2.8442, "step": 44801 }, { "epoch": 2.2, "grad_norm": 0.7070204019546509, "learning_rate": 0.00010028121082958777, "loss": 2.9462, "step": 44802 }, { "epoch": 2.2, "grad_norm": 0.7259615659713745, "learning_rate": 0.0001002697225578342, "loss": 3.1676, "step": 44803 }, { "epoch": 2.2, "grad_norm": 0.6928755640983582, "learning_rate": 0.00010025823481213183, "loss": 2.977, "step": 44804 }, { "epoch": 2.2, "grad_norm": 0.7410005331039429, "learning_rate": 0.00010024674759251074, "loss": 3.0249, "step": 44805 }, { "epoch": 2.2, "grad_norm": 0.6871789693832397, "learning_rate": 0.00010023526089900125, "loss": 2.867, "step": 44806 }, { "epoch": 2.2, "grad_norm": 0.6890696883201599, "learning_rate": 0.00010022377473163378, "loss": 3.096, "step": 44807 }, { "epoch": 2.2, "grad_norm": 0.6946797966957092, "learning_rate": 0.00010021228909043831, "loss": 2.9074, "step": 44808 }, { "epoch": 2.2, "grad_norm": 0.7045637369155884, "learning_rate": 0.0001002008039754453, "loss": 3.0301, "step": 44809 }, { "epoch": 2.2, "grad_norm": 0.7553611397743225, "learning_rate": 0.00010018931938668497, "loss": 2.8607, "step": 44810 }, { "epoch": 2.2, "grad_norm": 0.6659156680107117, "learning_rate": 0.00010017783532418738, "loss": 2.957, "step": 44811 }, { "epoch": 2.2, "grad_norm": 0.7131342887878418, "learning_rate": 0.00010016635178798304, "loss": 2.9252, "step": 44812 }, { "epoch": 2.2, "grad_norm": 0.741779625415802, "learning_rate": 0.00010015486877810194, "loss": 2.9722, "step": 44813 }, { "epoch": 2.2, "grad_norm": 0.6962339282035828, "learning_rate": 0.0001001433862945746, "loss": 2.8588, "step": 44814 }, { "epoch": 2.2, "grad_norm": 0.6637634038925171, "learning_rate": 0.00010013190433743097, "loss": 2.912, "step": 44815 }, { "epoch": 2.2, "grad_norm": 0.6877065896987915, "learning_rate": 0.00010012042290670158, "loss": 3.0355, "step": 44816 }, { "epoch": 2.2, "grad_norm": 0.7660030126571655, "learning_rate": 0.00010010894200241648, "loss": 3.0438, "step": 44817 }, { "epoch": 2.2, "grad_norm": 0.711549699306488, "learning_rate": 0.00010009746162460592, "loss": 2.8984, "step": 44818 }, { "epoch": 2.2, "grad_norm": 0.7448240518569946, "learning_rate": 0.00010008598177330022, "loss": 2.9206, "step": 44819 }, { "epoch": 2.2, "grad_norm": 0.6869280934333801, "learning_rate": 0.00010007450244852951, "loss": 2.9734, "step": 44820 }, { "epoch": 2.2, "grad_norm": 0.8840466141700745, "learning_rate": 0.00010006302365032404, "loss": 2.6196, "step": 44821 }, { "epoch": 2.2, "grad_norm": 0.7302159070968628, "learning_rate": 0.00010005154537871423, "loss": 2.9981, "step": 44822 }, { "epoch": 2.2, "grad_norm": 0.7159656882286072, "learning_rate": 0.00010004006763373014, "loss": 2.7305, "step": 44823 }, { "epoch": 2.2, "grad_norm": 0.7011760473251343, "learning_rate": 0.00010002859041540202, "loss": 2.9915, "step": 44824 }, { "epoch": 2.2, "grad_norm": 0.8024114370346069, "learning_rate": 0.00010001711372376005, "loss": 2.9443, "step": 44825 }, { "epoch": 2.2, "grad_norm": 0.7106544971466064, "learning_rate": 0.00010000563755883449, "loss": 2.8404, "step": 44826 }, { "epoch": 2.2, "grad_norm": 0.7113576531410217, "learning_rate": 9.999416192065568e-05, "loss": 3.0292, "step": 44827 }, { "epoch": 2.2, "grad_norm": 0.7601901888847351, "learning_rate": 9.998268680925368e-05, "loss": 2.8893, "step": 44828 }, { "epoch": 2.2, "grad_norm": 0.721528172492981, "learning_rate": 9.997121222465886e-05, "loss": 2.8252, "step": 44829 }, { "epoch": 2.2, "grad_norm": 0.7221820950508118, "learning_rate": 9.99597381669013e-05, "loss": 2.986, "step": 44830 }, { "epoch": 2.2, "grad_norm": 0.733727216720581, "learning_rate": 9.994826463601136e-05, "loss": 2.9532, "step": 44831 }, { "epoch": 2.2, "grad_norm": 0.6860153675079346, "learning_rate": 9.993679163201921e-05, "loss": 2.9451, "step": 44832 }, { "epoch": 2.2, "grad_norm": 0.7270806431770325, "learning_rate": 9.992531915495494e-05, "loss": 2.8611, "step": 44833 }, { "epoch": 2.2, "grad_norm": 0.6891951560974121, "learning_rate": 9.9913847204849e-05, "loss": 3.1389, "step": 44834 }, { "epoch": 2.2, "grad_norm": 0.7794495224952698, "learning_rate": 9.990237578173135e-05, "loss": 2.7893, "step": 44835 }, { "epoch": 2.2, "grad_norm": 0.707525908946991, "learning_rate": 9.989090488563233e-05, "loss": 2.9123, "step": 44836 }, { "epoch": 2.2, "grad_norm": 0.788293719291687, "learning_rate": 9.987943451658228e-05, "loss": 2.8435, "step": 44837 }, { "epoch": 2.2, "grad_norm": 0.7396550178527832, "learning_rate": 9.98679646746113e-05, "loss": 2.9739, "step": 44838 }, { "epoch": 2.2, "grad_norm": 0.7474350929260254, "learning_rate": 9.985649535974954e-05, "loss": 2.8834, "step": 44839 }, { "epoch": 2.2, "grad_norm": 0.7349193096160889, "learning_rate": 9.984502657202716e-05, "loss": 2.9247, "step": 44840 }, { "epoch": 2.2, "grad_norm": 0.6847119927406311, "learning_rate": 9.983355831147446e-05, "loss": 3.1132, "step": 44841 }, { "epoch": 2.2, "grad_norm": 0.7022256255149841, "learning_rate": 9.982209057812177e-05, "loss": 2.9059, "step": 44842 }, { "epoch": 2.2, "grad_norm": 0.7146331667900085, "learning_rate": 9.981062337199905e-05, "loss": 2.7936, "step": 44843 }, { "epoch": 2.2, "grad_norm": 0.7405462265014648, "learning_rate": 9.979915669313672e-05, "loss": 2.8458, "step": 44844 }, { "epoch": 2.2, "grad_norm": 0.7254295945167542, "learning_rate": 9.97876905415648e-05, "loss": 3.0363, "step": 44845 }, { "epoch": 2.2, "grad_norm": 0.6980922222137451, "learning_rate": 9.977622491731364e-05, "loss": 3.0195, "step": 44846 }, { "epoch": 2.2, "grad_norm": 0.6992781758308411, "learning_rate": 9.97647598204134e-05, "loss": 3.0519, "step": 44847 }, { "epoch": 2.2, "grad_norm": 0.7055832147598267, "learning_rate": 9.975329525089412e-05, "loss": 2.8358, "step": 44848 }, { "epoch": 2.2, "grad_norm": 0.6944383382797241, "learning_rate": 9.974183120878624e-05, "loss": 3.0116, "step": 44849 }, { "epoch": 2.2, "grad_norm": 0.714631199836731, "learning_rate": 9.973036769411972e-05, "loss": 3.0383, "step": 44850 }, { "epoch": 2.2, "grad_norm": 0.7210114598274231, "learning_rate": 9.971890470692489e-05, "loss": 2.9004, "step": 44851 }, { "epoch": 2.2, "grad_norm": 0.7096450924873352, "learning_rate": 9.970744224723201e-05, "loss": 2.8675, "step": 44852 }, { "epoch": 2.2, "grad_norm": 0.6983294486999512, "learning_rate": 9.969598031507119e-05, "loss": 3.1334, "step": 44853 }, { "epoch": 2.2, "grad_norm": 0.6857281923294067, "learning_rate": 9.968451891047259e-05, "loss": 2.967, "step": 44854 }, { "epoch": 2.2, "grad_norm": 0.6746247410774231, "learning_rate": 9.967305803346633e-05, "loss": 3.0097, "step": 44855 }, { "epoch": 2.2, "grad_norm": 0.7361533045768738, "learning_rate": 9.966159768408267e-05, "loss": 2.8268, "step": 44856 }, { "epoch": 2.2, "grad_norm": 0.7003144025802612, "learning_rate": 9.965013786235192e-05, "loss": 3.0137, "step": 44857 }, { "epoch": 2.2, "grad_norm": 0.7251600623130798, "learning_rate": 9.963867856830404e-05, "loss": 2.7261, "step": 44858 }, { "epoch": 2.2, "grad_norm": 0.7403432130813599, "learning_rate": 9.962721980196942e-05, "loss": 3.0198, "step": 44859 }, { "epoch": 2.2, "grad_norm": 0.7124742865562439, "learning_rate": 9.961576156337814e-05, "loss": 2.8851, "step": 44860 }, { "epoch": 2.2, "grad_norm": 0.6854257583618164, "learning_rate": 9.960430385256029e-05, "loss": 3.0093, "step": 44861 }, { "epoch": 2.2, "grad_norm": 0.7400795221328735, "learning_rate": 9.95928466695462e-05, "loss": 3.0098, "step": 44862 }, { "epoch": 2.2, "grad_norm": 0.7203598022460938, "learning_rate": 9.958139001436593e-05, "loss": 2.8234, "step": 44863 }, { "epoch": 2.2, "grad_norm": 0.7296319007873535, "learning_rate": 9.956993388704977e-05, "loss": 3.0328, "step": 44864 }, { "epoch": 2.2, "grad_norm": 0.7356207966804504, "learning_rate": 9.955847828762778e-05, "loss": 2.9695, "step": 44865 }, { "epoch": 2.2, "grad_norm": 0.7014438509941101, "learning_rate": 9.954702321613015e-05, "loss": 3.0264, "step": 44866 }, { "epoch": 2.2, "grad_norm": 0.740602970123291, "learning_rate": 9.953556867258717e-05, "loss": 2.8827, "step": 44867 }, { "epoch": 2.2, "grad_norm": 0.699821412563324, "learning_rate": 9.952411465702893e-05, "loss": 2.8705, "step": 44868 }, { "epoch": 2.2, "grad_norm": 0.7270916104316711, "learning_rate": 9.95126611694856e-05, "loss": 3.002, "step": 44869 }, { "epoch": 2.2, "grad_norm": 0.6713517904281616, "learning_rate": 9.950120820998721e-05, "loss": 2.8885, "step": 44870 }, { "epoch": 2.2, "grad_norm": 0.7152098417282104, "learning_rate": 9.948975577856409e-05, "loss": 3.0077, "step": 44871 }, { "epoch": 2.2, "grad_norm": 0.703411340713501, "learning_rate": 9.947830387524644e-05, "loss": 2.8513, "step": 44872 }, { "epoch": 2.2, "grad_norm": 0.7669411301612854, "learning_rate": 9.946685250006427e-05, "loss": 3.0245, "step": 44873 }, { "epoch": 2.2, "grad_norm": 0.6936809420585632, "learning_rate": 9.945540165304789e-05, "loss": 2.8511, "step": 44874 }, { "epoch": 2.2, "grad_norm": 0.7249215841293335, "learning_rate": 9.944395133422741e-05, "loss": 2.9093, "step": 44875 }, { "epoch": 2.2, "grad_norm": 0.7058155536651611, "learning_rate": 9.943250154363283e-05, "loss": 3.0376, "step": 44876 }, { "epoch": 2.2, "grad_norm": 0.7191633582115173, "learning_rate": 9.942105228129456e-05, "loss": 3.0574, "step": 44877 }, { "epoch": 2.2, "grad_norm": 0.7496932744979858, "learning_rate": 9.940960354724255e-05, "loss": 2.8689, "step": 44878 }, { "epoch": 2.2, "grad_norm": 0.7125669121742249, "learning_rate": 9.939815534150711e-05, "loss": 3.145, "step": 44879 }, { "epoch": 2.2, "grad_norm": 0.7709475159645081, "learning_rate": 9.938670766411825e-05, "loss": 2.8035, "step": 44880 }, { "epoch": 2.2, "grad_norm": 0.705838680267334, "learning_rate": 9.937526051510629e-05, "loss": 2.9948, "step": 44881 }, { "epoch": 2.2, "grad_norm": 0.7062419056892395, "learning_rate": 9.936381389450129e-05, "loss": 2.7527, "step": 44882 }, { "epoch": 2.2, "grad_norm": 0.7100111246109009, "learning_rate": 9.935236780233325e-05, "loss": 2.9641, "step": 44883 }, { "epoch": 2.2, "grad_norm": 0.6903637051582336, "learning_rate": 9.934092223863261e-05, "loss": 2.9546, "step": 44884 }, { "epoch": 2.2, "grad_norm": 0.6916725039482117, "learning_rate": 9.932947720342921e-05, "loss": 2.8971, "step": 44885 }, { "epoch": 2.2, "grad_norm": 0.7668691277503967, "learning_rate": 9.931803269675351e-05, "loss": 2.7209, "step": 44886 }, { "epoch": 2.2, "grad_norm": 0.6978892087936401, "learning_rate": 9.930658871863533e-05, "loss": 2.9635, "step": 44887 }, { "epoch": 2.2, "grad_norm": 0.7086914777755737, "learning_rate": 9.929514526910499e-05, "loss": 3.0682, "step": 44888 }, { "epoch": 2.2, "grad_norm": 0.7077818512916565, "learning_rate": 9.928370234819274e-05, "loss": 2.9384, "step": 44889 }, { "epoch": 2.2, "grad_norm": 0.6918936967849731, "learning_rate": 9.927225995592855e-05, "loss": 2.7194, "step": 44890 }, { "epoch": 2.2, "grad_norm": 0.7205008864402771, "learning_rate": 9.926081809234262e-05, "loss": 2.9189, "step": 44891 }, { "epoch": 2.2, "grad_norm": 0.7491716146469116, "learning_rate": 9.924937675746493e-05, "loss": 3.0693, "step": 44892 }, { "epoch": 2.2, "grad_norm": 0.7079654932022095, "learning_rate": 9.923793595132577e-05, "loss": 3.0083, "step": 44893 }, { "epoch": 2.2, "grad_norm": 0.7141336798667908, "learning_rate": 9.922649567395537e-05, "loss": 2.8574, "step": 44894 }, { "epoch": 2.2, "grad_norm": 0.7455933690071106, "learning_rate": 9.921505592538362e-05, "loss": 3.0319, "step": 44895 }, { "epoch": 2.2, "grad_norm": 0.7107555270195007, "learning_rate": 9.920361670564087e-05, "loss": 2.7566, "step": 44896 }, { "epoch": 2.2, "grad_norm": 0.7711759209632874, "learning_rate": 9.919217801475715e-05, "loss": 2.9126, "step": 44897 }, { "epoch": 2.2, "grad_norm": 0.6815950870513916, "learning_rate": 9.918073985276248e-05, "loss": 2.7761, "step": 44898 }, { "epoch": 2.2, "grad_norm": 0.7206303477287292, "learning_rate": 9.916930221968724e-05, "loss": 3.123, "step": 44899 }, { "epoch": 2.2, "grad_norm": 0.675313413143158, "learning_rate": 9.915786511556123e-05, "loss": 2.9108, "step": 44900 }, { "epoch": 2.2, "grad_norm": 0.7314799427986145, "learning_rate": 9.91464285404149e-05, "loss": 2.7628, "step": 44901 }, { "epoch": 2.2, "grad_norm": 0.7563438415527344, "learning_rate": 9.913499249427812e-05, "loss": 2.858, "step": 44902 }, { "epoch": 2.2, "grad_norm": 0.7086213231086731, "learning_rate": 9.912355697718119e-05, "loss": 2.8109, "step": 44903 }, { "epoch": 2.2, "grad_norm": 0.7429339289665222, "learning_rate": 9.911212198915415e-05, "loss": 2.8593, "step": 44904 }, { "epoch": 2.2, "grad_norm": 0.6543605327606201, "learning_rate": 9.910068753022706e-05, "loss": 2.8576, "step": 44905 }, { "epoch": 2.2, "grad_norm": 0.7347574830055237, "learning_rate": 9.908925360043015e-05, "loss": 2.725, "step": 44906 }, { "epoch": 2.2, "grad_norm": 0.7006809115409851, "learning_rate": 9.907782019979343e-05, "loss": 2.8389, "step": 44907 }, { "epoch": 2.2, "grad_norm": 0.7191192507743835, "learning_rate": 9.906638732834701e-05, "loss": 2.9768, "step": 44908 }, { "epoch": 2.2, "grad_norm": 0.7019370794296265, "learning_rate": 9.905495498612118e-05, "loss": 2.7632, "step": 44909 }, { "epoch": 2.2, "grad_norm": 0.7217749357223511, "learning_rate": 9.904352317314583e-05, "loss": 2.9252, "step": 44910 }, { "epoch": 2.2, "grad_norm": 0.7480356097221375, "learning_rate": 9.903209188945127e-05, "loss": 2.9351, "step": 44911 }, { "epoch": 2.2, "grad_norm": 0.7302222847938538, "learning_rate": 9.902066113506747e-05, "loss": 2.8074, "step": 44912 }, { "epoch": 2.2, "grad_norm": 0.7058594226837158, "learning_rate": 9.90092309100245e-05, "loss": 2.976, "step": 44913 }, { "epoch": 2.2, "grad_norm": 0.7320786118507385, "learning_rate": 9.899780121435262e-05, "loss": 2.8483, "step": 44914 }, { "epoch": 2.2, "grad_norm": 0.7021784782409668, "learning_rate": 9.898637204808176e-05, "loss": 2.9878, "step": 44915 }, { "epoch": 2.2, "grad_norm": 0.7117753028869629, "learning_rate": 9.89749434112422e-05, "loss": 2.871, "step": 44916 }, { "epoch": 2.2, "grad_norm": 0.7381766438484192, "learning_rate": 9.896351530386385e-05, "loss": 2.8505, "step": 44917 }, { "epoch": 2.2, "grad_norm": 0.69048011302948, "learning_rate": 9.895208772597705e-05, "loss": 3.1488, "step": 44918 }, { "epoch": 2.2, "grad_norm": 0.7255758047103882, "learning_rate": 9.894066067761171e-05, "loss": 2.8735, "step": 44919 }, { "epoch": 2.2, "grad_norm": 0.6982675790786743, "learning_rate": 9.89292341587979e-05, "loss": 2.9529, "step": 44920 }, { "epoch": 2.2, "grad_norm": 0.7159181237220764, "learning_rate": 9.891780816956587e-05, "loss": 2.9456, "step": 44921 }, { "epoch": 2.2, "grad_norm": 0.6875530481338501, "learning_rate": 9.890638270994553e-05, "loss": 2.8369, "step": 44922 }, { "epoch": 2.2, "grad_norm": 0.6837921142578125, "learning_rate": 9.889495777996709e-05, "loss": 3.0856, "step": 44923 }, { "epoch": 2.2, "grad_norm": 0.7514594793319702, "learning_rate": 9.888353337966075e-05, "loss": 3.0671, "step": 44924 }, { "epoch": 2.2, "grad_norm": 0.7067429423332214, "learning_rate": 9.887210950905643e-05, "loss": 2.7875, "step": 44925 }, { "epoch": 2.2, "grad_norm": 0.7050120830535889, "learning_rate": 9.88606861681843e-05, "loss": 3.0002, "step": 44926 }, { "epoch": 2.2, "grad_norm": 0.7361913919448853, "learning_rate": 9.884926335707431e-05, "loss": 2.9435, "step": 44927 }, { "epoch": 2.2, "grad_norm": 0.7067899107933044, "learning_rate": 9.883784107575665e-05, "loss": 2.8466, "step": 44928 }, { "epoch": 2.2, "grad_norm": 0.7761995792388916, "learning_rate": 9.882641932426149e-05, "loss": 2.9587, "step": 44929 }, { "epoch": 2.2, "grad_norm": 0.7403863668441772, "learning_rate": 9.881499810261871e-05, "loss": 2.7738, "step": 44930 }, { "epoch": 2.2, "grad_norm": 0.6781747341156006, "learning_rate": 9.880357741085865e-05, "loss": 2.973, "step": 44931 }, { "epoch": 2.2, "grad_norm": 0.7273198366165161, "learning_rate": 9.879215724901109e-05, "loss": 2.8573, "step": 44932 }, { "epoch": 2.2, "grad_norm": 0.7114270925521851, "learning_rate": 9.878073761710639e-05, "loss": 2.7882, "step": 44933 }, { "epoch": 2.2, "grad_norm": 0.7011170387268066, "learning_rate": 9.876931851517454e-05, "loss": 2.8851, "step": 44934 }, { "epoch": 2.2, "grad_norm": 0.7164783477783203, "learning_rate": 9.875789994324544e-05, "loss": 3.0467, "step": 44935 }, { "epoch": 2.2, "grad_norm": 0.7922382950782776, "learning_rate": 9.874648190134941e-05, "loss": 3.0482, "step": 44936 }, { "epoch": 2.2, "grad_norm": 0.6847321391105652, "learning_rate": 9.87350643895163e-05, "loss": 2.9396, "step": 44937 }, { "epoch": 2.2, "grad_norm": 0.7641184329986572, "learning_rate": 9.872364740777632e-05, "loss": 3.0284, "step": 44938 }, { "epoch": 2.2, "grad_norm": 0.7687472105026245, "learning_rate": 9.871223095615959e-05, "loss": 3.0097, "step": 44939 }, { "epoch": 2.2, "grad_norm": 0.7540838122367859, "learning_rate": 9.87008150346961e-05, "loss": 2.9874, "step": 44940 }, { "epoch": 2.2, "grad_norm": 0.8029380440711975, "learning_rate": 9.868939964341597e-05, "loss": 2.6693, "step": 44941 }, { "epoch": 2.2, "grad_norm": 0.7017350792884827, "learning_rate": 9.86779847823491e-05, "loss": 3.0033, "step": 44942 }, { "epoch": 2.2, "grad_norm": 0.7199008464813232, "learning_rate": 9.866657045152564e-05, "loss": 3.0994, "step": 44943 }, { "epoch": 2.2, "grad_norm": 0.6676777005195618, "learning_rate": 9.865515665097581e-05, "loss": 2.7228, "step": 44944 }, { "epoch": 2.2, "grad_norm": 0.6894500851631165, "learning_rate": 9.864374338072945e-05, "loss": 2.9413, "step": 44945 }, { "epoch": 2.2, "grad_norm": 0.7256870269775391, "learning_rate": 9.863233064081682e-05, "loss": 2.7942, "step": 44946 }, { "epoch": 2.2, "grad_norm": 0.7502337098121643, "learning_rate": 9.862091843126777e-05, "loss": 3.1109, "step": 44947 }, { "epoch": 2.2, "grad_norm": 0.6998251080513, "learning_rate": 9.860950675211255e-05, "loss": 2.8859, "step": 44948 }, { "epoch": 2.2, "grad_norm": 0.7343108057975769, "learning_rate": 9.859809560338115e-05, "loss": 3.0134, "step": 44949 }, { "epoch": 2.2, "grad_norm": 0.7494150996208191, "learning_rate": 9.85866849851035e-05, "loss": 2.7913, "step": 44950 }, { "epoch": 2.2, "grad_norm": 0.6747055649757385, "learning_rate": 9.857527489730987e-05, "loss": 2.9572, "step": 44951 }, { "epoch": 2.2, "grad_norm": 0.6779135465621948, "learning_rate": 9.856386534003011e-05, "loss": 2.8614, "step": 44952 }, { "epoch": 2.2, "grad_norm": 0.713498592376709, "learning_rate": 9.855245631329432e-05, "loss": 2.8165, "step": 44953 }, { "epoch": 2.2, "grad_norm": 0.6696012616157532, "learning_rate": 9.854104781713272e-05, "loss": 3.0592, "step": 44954 }, { "epoch": 2.2, "grad_norm": 0.7587345242500305, "learning_rate": 9.852963985157525e-05, "loss": 2.8299, "step": 44955 }, { "epoch": 2.2, "grad_norm": 0.7578355669975281, "learning_rate": 9.851823241665188e-05, "loss": 2.9793, "step": 44956 }, { "epoch": 2.2, "grad_norm": 0.6964207291603088, "learning_rate": 9.850682551239265e-05, "loss": 2.9437, "step": 44957 }, { "epoch": 2.2, "grad_norm": 0.8269992470741272, "learning_rate": 9.849541913882774e-05, "loss": 2.9062, "step": 44958 }, { "epoch": 2.2, "grad_norm": 0.6761069893836975, "learning_rate": 9.848401329598705e-05, "loss": 2.9209, "step": 44959 }, { "epoch": 2.2, "grad_norm": 0.7033849954605103, "learning_rate": 9.847260798390064e-05, "loss": 2.974, "step": 44960 }, { "epoch": 2.2, "grad_norm": 0.7039138078689575, "learning_rate": 9.846120320259873e-05, "loss": 2.9274, "step": 44961 }, { "epoch": 2.2, "grad_norm": 0.6891793608665466, "learning_rate": 9.844979895211121e-05, "loss": 2.7909, "step": 44962 }, { "epoch": 2.2, "grad_norm": 0.6782719492912292, "learning_rate": 9.843839523246815e-05, "loss": 2.8842, "step": 44963 }, { "epoch": 2.2, "grad_norm": 0.7017290592193604, "learning_rate": 9.842699204369944e-05, "loss": 3.038, "step": 44964 }, { "epoch": 2.2, "grad_norm": 0.8234224915504456, "learning_rate": 9.841558938583525e-05, "loss": 2.8788, "step": 44965 }, { "epoch": 2.2, "grad_norm": 0.720818817615509, "learning_rate": 9.840418725890569e-05, "loss": 2.7221, "step": 44966 }, { "epoch": 2.2, "grad_norm": 0.7300722599029541, "learning_rate": 9.839278566294063e-05, "loss": 2.9979, "step": 44967 }, { "epoch": 2.2, "grad_norm": 0.6792693138122559, "learning_rate": 9.838138459797026e-05, "loss": 2.7892, "step": 44968 }, { "epoch": 2.2, "grad_norm": 0.7436376214027405, "learning_rate": 9.83699840640244e-05, "loss": 3.0367, "step": 44969 }, { "epoch": 2.2, "grad_norm": 0.6573380827903748, "learning_rate": 9.835858406113333e-05, "loss": 2.9167, "step": 44970 }, { "epoch": 2.2, "grad_norm": 0.6874247789382935, "learning_rate": 9.834718458932692e-05, "loss": 2.7693, "step": 44971 }, { "epoch": 2.2, "grad_norm": 0.7144591808319092, "learning_rate": 9.833578564863514e-05, "loss": 2.7311, "step": 44972 }, { "epoch": 2.2, "grad_norm": 0.72472083568573, "learning_rate": 9.832438723908814e-05, "loss": 3.0349, "step": 44973 }, { "epoch": 2.2, "grad_norm": 0.7377344369888306, "learning_rate": 9.831298936071586e-05, "loss": 3.0525, "step": 44974 }, { "epoch": 2.2, "grad_norm": 0.7162564992904663, "learning_rate": 9.830159201354828e-05, "loss": 2.935, "step": 44975 }, { "epoch": 2.2, "grad_norm": 0.7077290415763855, "learning_rate": 9.829019519761564e-05, "loss": 3.0306, "step": 44976 }, { "epoch": 2.2, "grad_norm": 0.6880072951316833, "learning_rate": 9.827879891294777e-05, "loss": 2.8846, "step": 44977 }, { "epoch": 2.2, "grad_norm": 0.7291051149368286, "learning_rate": 9.826740315957476e-05, "loss": 2.8189, "step": 44978 }, { "epoch": 2.2, "grad_norm": 0.7197936177253723, "learning_rate": 9.825600793752647e-05, "loss": 2.816, "step": 44979 }, { "epoch": 2.2, "grad_norm": 0.6987482309341431, "learning_rate": 9.824461324683304e-05, "loss": 2.7839, "step": 44980 }, { "epoch": 2.2, "grad_norm": 0.7558214664459229, "learning_rate": 9.823321908752454e-05, "loss": 2.8651, "step": 44981 }, { "epoch": 2.2, "grad_norm": 0.7327513694763184, "learning_rate": 9.822182545963082e-05, "loss": 2.8579, "step": 44982 }, { "epoch": 2.2, "grad_norm": 0.7057175040245056, "learning_rate": 9.821043236318209e-05, "loss": 2.9509, "step": 44983 }, { "epoch": 2.2, "grad_norm": 0.7285510301589966, "learning_rate": 9.819903979820825e-05, "loss": 2.9622, "step": 44984 }, { "epoch": 2.2, "grad_norm": 0.6786232590675354, "learning_rate": 9.81876477647392e-05, "loss": 3.0846, "step": 44985 }, { "epoch": 2.2, "grad_norm": 0.7109321355819702, "learning_rate": 9.817625626280511e-05, "loss": 2.8528, "step": 44986 }, { "epoch": 2.2, "grad_norm": 0.7196544408798218, "learning_rate": 9.816486529243587e-05, "loss": 2.7905, "step": 44987 }, { "epoch": 2.2, "grad_norm": 0.6925801634788513, "learning_rate": 9.815347485366162e-05, "loss": 2.8368, "step": 44988 }, { "epoch": 2.2, "grad_norm": 0.687707245349884, "learning_rate": 9.814208494651214e-05, "loss": 2.8255, "step": 44989 }, { "epoch": 2.2, "grad_norm": 0.7053709030151367, "learning_rate": 9.81306955710176e-05, "loss": 3.0476, "step": 44990 }, { "epoch": 2.2, "grad_norm": 0.6974628567695618, "learning_rate": 9.811930672720803e-05, "loss": 2.739, "step": 44991 }, { "epoch": 2.2, "grad_norm": 0.7128167152404785, "learning_rate": 9.810791841511336e-05, "loss": 3.0011, "step": 44992 }, { "epoch": 2.2, "grad_norm": 0.694808304309845, "learning_rate": 9.809653063476358e-05, "loss": 2.8174, "step": 44993 }, { "epoch": 2.21, "grad_norm": 0.7266348600387573, "learning_rate": 9.808514338618856e-05, "loss": 2.8004, "step": 44994 }, { "epoch": 2.21, "grad_norm": 0.6718068718910217, "learning_rate": 9.807375666941846e-05, "loss": 2.9927, "step": 44995 }, { "epoch": 2.21, "grad_norm": 0.7475922107696533, "learning_rate": 9.806237048448329e-05, "loss": 2.8641, "step": 44996 }, { "epoch": 2.21, "grad_norm": 0.6909314393997192, "learning_rate": 9.805098483141287e-05, "loss": 2.8757, "step": 44997 }, { "epoch": 2.21, "grad_norm": 0.7345043420791626, "learning_rate": 9.803959971023742e-05, "loss": 2.8709, "step": 44998 }, { "epoch": 2.21, "grad_norm": 0.7070412635803223, "learning_rate": 9.802821512098677e-05, "loss": 2.997, "step": 44999 }, { "epoch": 2.21, "grad_norm": 0.6929289698600769, "learning_rate": 9.801683106369082e-05, "loss": 3.0065, "step": 45000 }, { "epoch": 2.21, "grad_norm": 0.7302191257476807, "learning_rate": 9.800544753837978e-05, "loss": 2.8856, "step": 45001 }, { "epoch": 2.21, "grad_norm": 0.7192659378051758, "learning_rate": 9.799406454508343e-05, "loss": 2.8797, "step": 45002 }, { "epoch": 2.21, "grad_norm": 0.757664680480957, "learning_rate": 9.798268208383192e-05, "loss": 2.4703, "step": 45003 }, { "epoch": 2.21, "grad_norm": 0.7271679043769836, "learning_rate": 9.797130015465507e-05, "loss": 2.98, "step": 45004 }, { "epoch": 2.21, "grad_norm": 0.7153617739677429, "learning_rate": 9.7959918757583e-05, "loss": 2.992, "step": 45005 }, { "epoch": 2.21, "grad_norm": 0.7023801207542419, "learning_rate": 9.794853789264564e-05, "loss": 2.8245, "step": 45006 }, { "epoch": 2.21, "grad_norm": 0.7436354160308838, "learning_rate": 9.793715755987283e-05, "loss": 2.9209, "step": 45007 }, { "epoch": 2.21, "grad_norm": 0.7024301886558533, "learning_rate": 9.792577775929476e-05, "loss": 3.0383, "step": 45008 }, { "epoch": 2.21, "grad_norm": 0.680385410785675, "learning_rate": 9.791439849094124e-05, "loss": 2.8592, "step": 45009 }, { "epoch": 2.21, "grad_norm": 0.6959872245788574, "learning_rate": 9.790301975484228e-05, "loss": 2.9236, "step": 45010 }, { "epoch": 2.21, "grad_norm": 0.7033017873764038, "learning_rate": 9.789164155102795e-05, "loss": 3.054, "step": 45011 }, { "epoch": 2.21, "grad_norm": 0.6920191645622253, "learning_rate": 9.788026387952807e-05, "loss": 2.8266, "step": 45012 }, { "epoch": 2.21, "grad_norm": 0.6858896017074585, "learning_rate": 9.786888674037276e-05, "loss": 2.9281, "step": 45013 }, { "epoch": 2.21, "grad_norm": 0.6978284120559692, "learning_rate": 9.785751013359189e-05, "loss": 2.9375, "step": 45014 }, { "epoch": 2.21, "grad_norm": 0.7071654200553894, "learning_rate": 9.784613405921535e-05, "loss": 2.8444, "step": 45015 }, { "epoch": 2.21, "grad_norm": 0.6944267749786377, "learning_rate": 9.78347585172733e-05, "loss": 2.8455, "step": 45016 }, { "epoch": 2.21, "grad_norm": 0.7331633567810059, "learning_rate": 9.782338350779548e-05, "loss": 3.0644, "step": 45017 }, { "epoch": 2.21, "grad_norm": 0.7539132237434387, "learning_rate": 9.781200903081206e-05, "loss": 2.8949, "step": 45018 }, { "epoch": 2.21, "grad_norm": 0.7396433353424072, "learning_rate": 9.780063508635279e-05, "loss": 2.938, "step": 45019 }, { "epoch": 2.21, "grad_norm": 0.7210110425949097, "learning_rate": 9.778926167444781e-05, "loss": 2.6836, "step": 45020 }, { "epoch": 2.21, "grad_norm": 0.7514802813529968, "learning_rate": 9.777788879512702e-05, "loss": 2.9036, "step": 45021 }, { "epoch": 2.21, "grad_norm": 0.7779740691184998, "learning_rate": 9.776651644842026e-05, "loss": 2.8719, "step": 45022 }, { "epoch": 2.21, "grad_norm": 0.7061024308204651, "learning_rate": 9.775514463435767e-05, "loss": 2.7755, "step": 45023 }, { "epoch": 2.21, "grad_norm": 0.7684993743896484, "learning_rate": 9.7743773352969e-05, "loss": 2.855, "step": 45024 }, { "epoch": 2.21, "grad_norm": 0.7269738912582397, "learning_rate": 9.773240260428431e-05, "loss": 3.2446, "step": 45025 }, { "epoch": 2.21, "grad_norm": 0.6850230693817139, "learning_rate": 9.772103238833364e-05, "loss": 3.0956, "step": 45026 }, { "epoch": 2.21, "grad_norm": 0.6893491148948669, "learning_rate": 9.770966270514684e-05, "loss": 2.9326, "step": 45027 }, { "epoch": 2.21, "grad_norm": 0.7849996089935303, "learning_rate": 9.769829355475388e-05, "loss": 2.8876, "step": 45028 }, { "epoch": 2.21, "grad_norm": 0.7221559882164001, "learning_rate": 9.768692493718457e-05, "loss": 2.9922, "step": 45029 }, { "epoch": 2.21, "grad_norm": 0.6811572909355164, "learning_rate": 9.767555685246896e-05, "loss": 2.9712, "step": 45030 }, { "epoch": 2.21, "grad_norm": 0.7301697731018066, "learning_rate": 9.766418930063709e-05, "loss": 3.1497, "step": 45031 }, { "epoch": 2.21, "grad_norm": 0.7387075424194336, "learning_rate": 9.765282228171869e-05, "loss": 2.731, "step": 45032 }, { "epoch": 2.21, "grad_norm": 0.7403630018234253, "learning_rate": 9.764145579574394e-05, "loss": 3.0009, "step": 45033 }, { "epoch": 2.21, "grad_norm": 0.7094658613204956, "learning_rate": 9.763008984274254e-05, "loss": 2.7886, "step": 45034 }, { "epoch": 2.21, "grad_norm": 0.6980571150779724, "learning_rate": 9.761872442274464e-05, "loss": 3.1573, "step": 45035 }, { "epoch": 2.21, "grad_norm": 0.7270064949989319, "learning_rate": 9.760735953578006e-05, "loss": 2.9224, "step": 45036 }, { "epoch": 2.21, "grad_norm": 0.8115057349205017, "learning_rate": 9.759599518187867e-05, "loss": 3.1933, "step": 45037 }, { "epoch": 2.21, "grad_norm": 0.7091026306152344, "learning_rate": 9.758463136107055e-05, "loss": 2.6673, "step": 45038 }, { "epoch": 2.21, "grad_norm": 0.7223027348518372, "learning_rate": 9.757326807338548e-05, "loss": 2.7039, "step": 45039 }, { "epoch": 2.21, "grad_norm": 0.7187199592590332, "learning_rate": 9.756190531885353e-05, "loss": 2.971, "step": 45040 }, { "epoch": 2.21, "grad_norm": 0.7661718726158142, "learning_rate": 9.755054309750451e-05, "loss": 2.9357, "step": 45041 }, { "epoch": 2.21, "grad_norm": 0.710060179233551, "learning_rate": 9.753918140936846e-05, "loss": 3.0332, "step": 45042 }, { "epoch": 2.21, "grad_norm": 0.6724956035614014, "learning_rate": 9.752782025447523e-05, "loss": 3.0016, "step": 45043 }, { "epoch": 2.21, "grad_norm": 0.6956879496574402, "learning_rate": 9.751645963285469e-05, "loss": 3.0643, "step": 45044 }, { "epoch": 2.21, "grad_norm": 0.7180279493331909, "learning_rate": 9.750509954453689e-05, "loss": 3.1198, "step": 45045 }, { "epoch": 2.21, "grad_norm": 0.6832389235496521, "learning_rate": 9.749373998955163e-05, "loss": 2.9694, "step": 45046 }, { "epoch": 2.21, "grad_norm": 0.6939116716384888, "learning_rate": 9.748238096792886e-05, "loss": 2.8727, "step": 45047 }, { "epoch": 2.21, "grad_norm": 0.6981992721557617, "learning_rate": 9.747102247969862e-05, "loss": 2.8948, "step": 45048 }, { "epoch": 2.21, "grad_norm": 0.7490467429161072, "learning_rate": 9.745966452489072e-05, "loss": 2.6799, "step": 45049 }, { "epoch": 2.21, "grad_norm": 0.7155380845069885, "learning_rate": 9.74483071035351e-05, "loss": 2.9145, "step": 45050 }, { "epoch": 2.21, "grad_norm": 0.7536515593528748, "learning_rate": 9.743695021566157e-05, "loss": 2.81, "step": 45051 }, { "epoch": 2.21, "grad_norm": 0.6787039041519165, "learning_rate": 9.742559386130012e-05, "loss": 2.8627, "step": 45052 }, { "epoch": 2.21, "grad_norm": 0.7021150588989258, "learning_rate": 9.741423804048077e-05, "loss": 2.9647, "step": 45053 }, { "epoch": 2.21, "grad_norm": 0.7255040407180786, "learning_rate": 9.740288275323321e-05, "loss": 3.0502, "step": 45054 }, { "epoch": 2.21, "grad_norm": 0.7342783212661743, "learning_rate": 9.739152799958758e-05, "loss": 2.8024, "step": 45055 }, { "epoch": 2.21, "grad_norm": 0.6887534260749817, "learning_rate": 9.73801737795736e-05, "loss": 2.7358, "step": 45056 }, { "epoch": 2.21, "grad_norm": 0.6920632123947144, "learning_rate": 9.736882009322131e-05, "loss": 2.8555, "step": 45057 }, { "epoch": 2.21, "grad_norm": 0.7355381846427917, "learning_rate": 9.735746694056055e-05, "loss": 3.0263, "step": 45058 }, { "epoch": 2.21, "grad_norm": 0.7083019018173218, "learning_rate": 9.734611432162114e-05, "loss": 2.8918, "step": 45059 }, { "epoch": 2.21, "grad_norm": 0.7241811156272888, "learning_rate": 9.733476223643317e-05, "loss": 2.8173, "step": 45060 }, { "epoch": 2.21, "grad_norm": 0.7107488512992859, "learning_rate": 9.73234106850263e-05, "loss": 2.9891, "step": 45061 }, { "epoch": 2.21, "grad_norm": 0.7031468152999878, "learning_rate": 9.731205966743058e-05, "loss": 2.7615, "step": 45062 }, { "epoch": 2.21, "grad_norm": 0.6482835412025452, "learning_rate": 9.730070918367598e-05, "loss": 2.8392, "step": 45063 }, { "epoch": 2.21, "grad_norm": 0.696492612361908, "learning_rate": 9.728935923379234e-05, "loss": 2.9115, "step": 45064 }, { "epoch": 2.21, "grad_norm": 0.6701128482818604, "learning_rate": 9.727800981780946e-05, "loss": 2.6987, "step": 45065 }, { "epoch": 2.21, "grad_norm": 0.6930135488510132, "learning_rate": 9.726666093575723e-05, "loss": 3.1348, "step": 45066 }, { "epoch": 2.21, "grad_norm": 0.7013523578643799, "learning_rate": 9.725531258766555e-05, "loss": 2.9139, "step": 45067 }, { "epoch": 2.21, "grad_norm": 0.6763277649879456, "learning_rate": 9.72439647735645e-05, "loss": 2.9529, "step": 45068 }, { "epoch": 2.21, "grad_norm": 0.7029493451118469, "learning_rate": 9.723261749348373e-05, "loss": 2.7961, "step": 45069 }, { "epoch": 2.21, "grad_norm": 0.6882089376449585, "learning_rate": 9.72212707474533e-05, "loss": 2.9743, "step": 45070 }, { "epoch": 2.21, "grad_norm": 0.7075545191764832, "learning_rate": 9.720992453550292e-05, "loss": 3.0006, "step": 45071 }, { "epoch": 2.21, "grad_norm": 0.7261462211608887, "learning_rate": 9.719857885766266e-05, "loss": 2.9622, "step": 45072 }, { "epoch": 2.21, "grad_norm": 0.7145310044288635, "learning_rate": 9.718723371396234e-05, "loss": 2.9799, "step": 45073 }, { "epoch": 2.21, "grad_norm": 0.7117692828178406, "learning_rate": 9.71758891044317e-05, "loss": 2.9716, "step": 45074 }, { "epoch": 2.21, "grad_norm": 0.7104676961898804, "learning_rate": 9.716454502910082e-05, "loss": 3.0304, "step": 45075 }, { "epoch": 2.21, "grad_norm": 0.7376676797866821, "learning_rate": 9.715320148799942e-05, "loss": 2.8945, "step": 45076 }, { "epoch": 2.21, "grad_norm": 0.7351451516151428, "learning_rate": 9.714185848115746e-05, "loss": 2.8031, "step": 45077 }, { "epoch": 2.21, "grad_norm": 0.7405024766921997, "learning_rate": 9.713051600860486e-05, "loss": 2.973, "step": 45078 }, { "epoch": 2.21, "grad_norm": 0.7368142604827881, "learning_rate": 9.711917407037147e-05, "loss": 3.0478, "step": 45079 }, { "epoch": 2.21, "grad_norm": 0.756643533706665, "learning_rate": 9.71078326664871e-05, "loss": 3.123, "step": 45080 }, { "epoch": 2.21, "grad_norm": 0.742498517036438, "learning_rate": 9.709649179698156e-05, "loss": 2.9582, "step": 45081 }, { "epoch": 2.21, "grad_norm": 0.7527225017547607, "learning_rate": 9.708515146188482e-05, "loss": 2.8509, "step": 45082 }, { "epoch": 2.21, "grad_norm": 0.7525281310081482, "learning_rate": 9.707381166122685e-05, "loss": 3.034, "step": 45083 }, { "epoch": 2.21, "grad_norm": 0.69659823179245, "learning_rate": 9.70624723950373e-05, "loss": 2.8755, "step": 45084 }, { "epoch": 2.21, "grad_norm": 0.7091747522354126, "learning_rate": 9.705113366334622e-05, "loss": 2.8933, "step": 45085 }, { "epoch": 2.21, "grad_norm": 0.6765292882919312, "learning_rate": 9.70397954661834e-05, "loss": 2.8283, "step": 45086 }, { "epoch": 2.21, "grad_norm": 0.7405532002449036, "learning_rate": 9.702845780357864e-05, "loss": 2.9788, "step": 45087 }, { "epoch": 2.21, "grad_norm": 0.7477160692214966, "learning_rate": 9.701712067556192e-05, "loss": 2.8926, "step": 45088 }, { "epoch": 2.21, "grad_norm": 0.7491059303283691, "learning_rate": 9.700578408216296e-05, "loss": 2.7266, "step": 45089 }, { "epoch": 2.21, "grad_norm": 0.6882579326629639, "learning_rate": 9.69944480234118e-05, "loss": 3.1125, "step": 45090 }, { "epoch": 2.21, "grad_norm": 0.7268614768981934, "learning_rate": 9.698311249933807e-05, "loss": 2.9157, "step": 45091 }, { "epoch": 2.21, "grad_norm": 0.7116838693618774, "learning_rate": 9.697177750997175e-05, "loss": 2.7934, "step": 45092 }, { "epoch": 2.21, "grad_norm": 0.718450129032135, "learning_rate": 9.696044305534281e-05, "loss": 2.9242, "step": 45093 }, { "epoch": 2.21, "grad_norm": 0.7193343043327332, "learning_rate": 9.694910913548099e-05, "loss": 2.7036, "step": 45094 }, { "epoch": 2.21, "grad_norm": 0.6873836517333984, "learning_rate": 9.693777575041613e-05, "loss": 2.9068, "step": 45095 }, { "epoch": 2.21, "grad_norm": 0.7232233881950378, "learning_rate": 9.692644290017801e-05, "loss": 3.0087, "step": 45096 }, { "epoch": 2.21, "grad_norm": 0.6899810433387756, "learning_rate": 9.691511058479653e-05, "loss": 3.0144, "step": 45097 }, { "epoch": 2.21, "grad_norm": 0.7038057446479797, "learning_rate": 9.690377880430168e-05, "loss": 2.8663, "step": 45098 }, { "epoch": 2.21, "grad_norm": 0.6957258582115173, "learning_rate": 9.689244755872308e-05, "loss": 3.0446, "step": 45099 }, { "epoch": 2.21, "grad_norm": 0.7041094899177551, "learning_rate": 9.688111684809078e-05, "loss": 2.7151, "step": 45100 }, { "epoch": 2.21, "grad_norm": 0.668713390827179, "learning_rate": 9.686978667243453e-05, "loss": 3.0839, "step": 45101 }, { "epoch": 2.21, "grad_norm": 0.7322125434875488, "learning_rate": 9.685845703178408e-05, "loss": 2.8446, "step": 45102 }, { "epoch": 2.21, "grad_norm": 0.7117242217063904, "learning_rate": 9.684712792616946e-05, "loss": 3.0491, "step": 45103 }, { "epoch": 2.21, "grad_norm": 0.724531352519989, "learning_rate": 9.683579935562029e-05, "loss": 2.8113, "step": 45104 }, { "epoch": 2.21, "grad_norm": 0.7037317752838135, "learning_rate": 9.682447132016664e-05, "loss": 2.9621, "step": 45105 }, { "epoch": 2.21, "grad_norm": 0.7283707857131958, "learning_rate": 9.681314381983811e-05, "loss": 3.037, "step": 45106 }, { "epoch": 2.21, "grad_norm": 0.6818432807922363, "learning_rate": 9.680181685466477e-05, "loss": 2.7621, "step": 45107 }, { "epoch": 2.21, "grad_norm": 0.6957379579544067, "learning_rate": 9.679049042467636e-05, "loss": 2.7626, "step": 45108 }, { "epoch": 2.21, "grad_norm": 0.6879173517227173, "learning_rate": 9.677916452990255e-05, "loss": 2.795, "step": 45109 }, { "epoch": 2.21, "grad_norm": 0.6849760413169861, "learning_rate": 9.676783917037343e-05, "loss": 2.8795, "step": 45110 }, { "epoch": 2.21, "grad_norm": 0.7054252028465271, "learning_rate": 9.67565143461186e-05, "loss": 2.7672, "step": 45111 }, { "epoch": 2.21, "grad_norm": 0.6849786043167114, "learning_rate": 9.674519005716809e-05, "loss": 2.8216, "step": 45112 }, { "epoch": 2.21, "grad_norm": 0.6915918588638306, "learning_rate": 9.673386630355158e-05, "loss": 2.9443, "step": 45113 }, { "epoch": 2.21, "grad_norm": 0.6977018117904663, "learning_rate": 9.672254308529891e-05, "loss": 2.933, "step": 45114 }, { "epoch": 2.21, "grad_norm": 0.7735402584075928, "learning_rate": 9.671122040244004e-05, "loss": 2.7578, "step": 45115 }, { "epoch": 2.21, "grad_norm": 0.7309532165527344, "learning_rate": 9.669989825500467e-05, "loss": 2.9269, "step": 45116 }, { "epoch": 2.21, "grad_norm": 0.7018609046936035, "learning_rate": 9.668857664302269e-05, "loss": 2.8062, "step": 45117 }, { "epoch": 2.21, "grad_norm": 0.7004704475402832, "learning_rate": 9.667725556652372e-05, "loss": 2.9491, "step": 45118 }, { "epoch": 2.21, "grad_norm": 0.7916359305381775, "learning_rate": 9.666593502553776e-05, "loss": 3.0852, "step": 45119 }, { "epoch": 2.21, "grad_norm": 0.7221834659576416, "learning_rate": 9.665461502009468e-05, "loss": 2.9854, "step": 45120 }, { "epoch": 2.21, "grad_norm": 0.706676721572876, "learning_rate": 9.664329555022415e-05, "loss": 2.9534, "step": 45121 }, { "epoch": 2.21, "grad_norm": 0.7531946301460266, "learning_rate": 9.663197661595609e-05, "loss": 2.8679, "step": 45122 }, { "epoch": 2.21, "grad_norm": 0.7091394066810608, "learning_rate": 9.66206582173203e-05, "loss": 3.028, "step": 45123 }, { "epoch": 2.21, "grad_norm": 0.7693554162979126, "learning_rate": 9.660934035434644e-05, "loss": 2.9596, "step": 45124 }, { "epoch": 2.21, "grad_norm": 0.7340413928031921, "learning_rate": 9.659802302706452e-05, "loss": 2.7628, "step": 45125 }, { "epoch": 2.21, "grad_norm": 0.7072880864143372, "learning_rate": 9.65867062355042e-05, "loss": 2.7449, "step": 45126 }, { "epoch": 2.21, "grad_norm": 0.6786409020423889, "learning_rate": 9.657538997969542e-05, "loss": 2.7573, "step": 45127 }, { "epoch": 2.21, "grad_norm": 0.6778995394706726, "learning_rate": 9.656407425966782e-05, "loss": 3.0114, "step": 45128 }, { "epoch": 2.21, "grad_norm": 0.7381721138954163, "learning_rate": 9.655275907545139e-05, "loss": 2.7733, "step": 45129 }, { "epoch": 2.21, "grad_norm": 0.7340525984764099, "learning_rate": 9.654144442707583e-05, "loss": 2.7722, "step": 45130 }, { "epoch": 2.21, "grad_norm": 0.72030109167099, "learning_rate": 9.653013031457089e-05, "loss": 2.9553, "step": 45131 }, { "epoch": 2.21, "grad_norm": 0.7066547870635986, "learning_rate": 9.65188167379665e-05, "loss": 2.9542, "step": 45132 }, { "epoch": 2.21, "grad_norm": 0.7587066292762756, "learning_rate": 9.650750369729228e-05, "loss": 2.8834, "step": 45133 }, { "epoch": 2.21, "grad_norm": 0.7313148379325867, "learning_rate": 9.649619119257813e-05, "loss": 2.7555, "step": 45134 }, { "epoch": 2.21, "grad_norm": 0.713611900806427, "learning_rate": 9.648487922385397e-05, "loss": 2.8716, "step": 45135 }, { "epoch": 2.21, "grad_norm": 0.6982900500297546, "learning_rate": 9.647356779114934e-05, "loss": 2.9958, "step": 45136 }, { "epoch": 2.21, "grad_norm": 0.7607917189598083, "learning_rate": 9.646225689449428e-05, "loss": 2.9409, "step": 45137 }, { "epoch": 2.21, "grad_norm": 0.6986991763114929, "learning_rate": 9.645094653391848e-05, "loss": 2.8637, "step": 45138 }, { "epoch": 2.21, "grad_norm": 0.7355843782424927, "learning_rate": 9.643963670945159e-05, "loss": 2.914, "step": 45139 }, { "epoch": 2.21, "grad_norm": 0.746160089969635, "learning_rate": 9.642832742112363e-05, "loss": 2.9648, "step": 45140 }, { "epoch": 2.21, "grad_norm": 0.718352198600769, "learning_rate": 9.641701866896418e-05, "loss": 2.7866, "step": 45141 }, { "epoch": 2.21, "grad_norm": 0.6826595067977905, "learning_rate": 9.640571045300323e-05, "loss": 2.7788, "step": 45142 }, { "epoch": 2.21, "grad_norm": 0.7046639919281006, "learning_rate": 9.639440277327033e-05, "loss": 2.8692, "step": 45143 }, { "epoch": 2.21, "grad_norm": 0.6994836926460266, "learning_rate": 9.638309562979551e-05, "loss": 2.9952, "step": 45144 }, { "epoch": 2.21, "grad_norm": 0.7048764824867249, "learning_rate": 9.637178902260843e-05, "loss": 3.0218, "step": 45145 }, { "epoch": 2.21, "grad_norm": 0.6403487920761108, "learning_rate": 9.636048295173873e-05, "loss": 2.8368, "step": 45146 }, { "epoch": 2.21, "grad_norm": 0.7118167281150818, "learning_rate": 9.634917741721648e-05, "loss": 2.7272, "step": 45147 }, { "epoch": 2.21, "grad_norm": 0.7441112399101257, "learning_rate": 9.633787241907117e-05, "loss": 3.164, "step": 45148 }, { "epoch": 2.21, "grad_norm": 0.707645833492279, "learning_rate": 9.632656795733271e-05, "loss": 3.0041, "step": 45149 }, { "epoch": 2.21, "grad_norm": 0.6987390518188477, "learning_rate": 9.631526403203098e-05, "loss": 2.8839, "step": 45150 }, { "epoch": 2.21, "grad_norm": 0.7065211534500122, "learning_rate": 9.630396064319565e-05, "loss": 2.8892, "step": 45151 }, { "epoch": 2.21, "grad_norm": 0.7638340592384338, "learning_rate": 9.629265779085647e-05, "loss": 2.8871, "step": 45152 }, { "epoch": 2.21, "grad_norm": 0.7217139005661011, "learning_rate": 9.628135547504315e-05, "loss": 3.0046, "step": 45153 }, { "epoch": 2.21, "grad_norm": 0.7055973410606384, "learning_rate": 9.627005369578553e-05, "loss": 2.738, "step": 45154 }, { "epoch": 2.21, "grad_norm": 0.7258812785148621, "learning_rate": 9.625875245311345e-05, "loss": 3.0541, "step": 45155 }, { "epoch": 2.21, "grad_norm": 0.7114151120185852, "learning_rate": 9.624745174705655e-05, "loss": 3.0669, "step": 45156 }, { "epoch": 2.21, "grad_norm": 0.6896607875823975, "learning_rate": 9.623615157764473e-05, "loss": 2.8723, "step": 45157 }, { "epoch": 2.21, "grad_norm": 0.7319231033325195, "learning_rate": 9.622485194490757e-05, "loss": 3.1262, "step": 45158 }, { "epoch": 2.21, "grad_norm": 0.6564500331878662, "learning_rate": 9.621355284887502e-05, "loss": 2.8374, "step": 45159 }, { "epoch": 2.21, "grad_norm": 0.7160139083862305, "learning_rate": 9.620225428957677e-05, "loss": 2.93, "step": 45160 }, { "epoch": 2.21, "grad_norm": 0.7266965508460999, "learning_rate": 9.619095626704244e-05, "loss": 2.8838, "step": 45161 }, { "epoch": 2.21, "grad_norm": 0.7377114295959473, "learning_rate": 9.617965878130203e-05, "loss": 2.9046, "step": 45162 }, { "epoch": 2.21, "grad_norm": 0.7144932746887207, "learning_rate": 9.616836183238504e-05, "loss": 3.0129, "step": 45163 }, { "epoch": 2.21, "grad_norm": 0.6912056803703308, "learning_rate": 9.61570654203214e-05, "loss": 2.7707, "step": 45164 }, { "epoch": 2.21, "grad_norm": 0.730238139629364, "learning_rate": 9.614576954514088e-05, "loss": 3.1059, "step": 45165 }, { "epoch": 2.21, "grad_norm": 0.6824841499328613, "learning_rate": 9.613447420687319e-05, "loss": 2.7708, "step": 45166 }, { "epoch": 2.21, "grad_norm": 0.7248892188072205, "learning_rate": 9.612317940554806e-05, "loss": 2.8777, "step": 45167 }, { "epoch": 2.21, "grad_norm": 0.7106010913848877, "learning_rate": 9.611188514119515e-05, "loss": 2.8532, "step": 45168 }, { "epoch": 2.21, "grad_norm": 0.7336381077766418, "learning_rate": 9.610059141384427e-05, "loss": 2.8722, "step": 45169 }, { "epoch": 2.21, "grad_norm": 0.7006831765174866, "learning_rate": 9.60892982235253e-05, "loss": 2.8112, "step": 45170 }, { "epoch": 2.21, "grad_norm": 0.7395011782646179, "learning_rate": 9.607800557026777e-05, "loss": 2.9595, "step": 45171 }, { "epoch": 2.21, "grad_norm": 0.730022668838501, "learning_rate": 9.606671345410164e-05, "loss": 3.002, "step": 45172 }, { "epoch": 2.21, "grad_norm": 0.7525355815887451, "learning_rate": 9.605542187505643e-05, "loss": 2.8871, "step": 45173 }, { "epoch": 2.21, "grad_norm": 0.6975340247154236, "learning_rate": 9.60441308331621e-05, "loss": 2.8554, "step": 45174 }, { "epoch": 2.21, "grad_norm": 0.7521911859512329, "learning_rate": 9.603284032844828e-05, "loss": 2.772, "step": 45175 }, { "epoch": 2.21, "grad_norm": 0.7508547306060791, "learning_rate": 9.60215503609446e-05, "loss": 2.8397, "step": 45176 }, { "epoch": 2.21, "grad_norm": 0.7319634556770325, "learning_rate": 9.601026093068098e-05, "loss": 2.9338, "step": 45177 }, { "epoch": 2.21, "grad_norm": 0.6809556484222412, "learning_rate": 9.5998972037687e-05, "loss": 2.8386, "step": 45178 }, { "epoch": 2.21, "grad_norm": 0.7330763936042786, "learning_rate": 9.598768368199245e-05, "loss": 2.6866, "step": 45179 }, { "epoch": 2.21, "grad_norm": 0.7155676484107971, "learning_rate": 9.59763958636272e-05, "loss": 2.8636, "step": 45180 }, { "epoch": 2.21, "grad_norm": 0.6808676719665527, "learning_rate": 9.596510858262085e-05, "loss": 2.9295, "step": 45181 }, { "epoch": 2.21, "grad_norm": 0.7002042531967163, "learning_rate": 9.595382183900312e-05, "loss": 3.0021, "step": 45182 }, { "epoch": 2.21, "grad_norm": 0.7323654294013977, "learning_rate": 9.594253563280364e-05, "loss": 2.8457, "step": 45183 }, { "epoch": 2.21, "grad_norm": 0.677535891532898, "learning_rate": 9.593124996405228e-05, "loss": 2.9339, "step": 45184 }, { "epoch": 2.21, "grad_norm": 0.7651351094245911, "learning_rate": 9.591996483277882e-05, "loss": 2.7807, "step": 45185 }, { "epoch": 2.21, "grad_norm": 0.6927827000617981, "learning_rate": 9.590868023901283e-05, "loss": 2.9166, "step": 45186 }, { "epoch": 2.21, "grad_norm": 0.731319010257721, "learning_rate": 9.589739618278415e-05, "loss": 2.9789, "step": 45187 }, { "epoch": 2.21, "grad_norm": 0.7109827995300293, "learning_rate": 9.588611266412247e-05, "loss": 2.8897, "step": 45188 }, { "epoch": 2.21, "grad_norm": 0.7135148048400879, "learning_rate": 9.58748296830574e-05, "loss": 2.9604, "step": 45189 }, { "epoch": 2.21, "grad_norm": 0.6991270184516907, "learning_rate": 9.586354723961883e-05, "loss": 3.079, "step": 45190 }, { "epoch": 2.21, "grad_norm": 0.692098081111908, "learning_rate": 9.585226533383627e-05, "loss": 2.877, "step": 45191 }, { "epoch": 2.21, "grad_norm": 0.7232987880706787, "learning_rate": 9.584098396573969e-05, "loss": 3.0033, "step": 45192 }, { "epoch": 2.21, "grad_norm": 0.7077509760856628, "learning_rate": 9.582970313535857e-05, "loss": 3.0322, "step": 45193 }, { "epoch": 2.21, "grad_norm": 0.7513511776924133, "learning_rate": 9.581842284272279e-05, "loss": 2.9617, "step": 45194 }, { "epoch": 2.21, "grad_norm": 0.7315356135368347, "learning_rate": 9.58071430878619e-05, "loss": 3.0344, "step": 45195 }, { "epoch": 2.21, "grad_norm": 0.745606005191803, "learning_rate": 9.57958638708058e-05, "loss": 2.9354, "step": 45196 }, { "epoch": 2.21, "grad_norm": 0.7538384795188904, "learning_rate": 9.578458519158409e-05, "loss": 3.0113, "step": 45197 }, { "epoch": 2.22, "grad_norm": 0.7272838354110718, "learning_rate": 9.577330705022637e-05, "loss": 2.9533, "step": 45198 }, { "epoch": 2.22, "grad_norm": 0.6927639842033386, "learning_rate": 9.576202944676254e-05, "loss": 3.0826, "step": 45199 }, { "epoch": 2.22, "grad_norm": 0.6876212358474731, "learning_rate": 9.575075238122217e-05, "loss": 2.7837, "step": 45200 }, { "epoch": 2.22, "grad_norm": 0.721787691116333, "learning_rate": 9.573947585363499e-05, "loss": 2.7393, "step": 45201 }, { "epoch": 2.22, "grad_norm": 0.7061600685119629, "learning_rate": 9.572819986403079e-05, "loss": 2.8766, "step": 45202 }, { "epoch": 2.22, "grad_norm": 0.7322124242782593, "learning_rate": 9.571692441243925e-05, "loss": 2.9802, "step": 45203 }, { "epoch": 2.22, "grad_norm": 0.7068262696266174, "learning_rate": 9.570564949888998e-05, "loss": 3.033, "step": 45204 }, { "epoch": 2.22, "grad_norm": 0.7690457105636597, "learning_rate": 9.569437512341261e-05, "loss": 2.9735, "step": 45205 }, { "epoch": 2.22, "grad_norm": 0.7033405303955078, "learning_rate": 9.568310128603696e-05, "loss": 3.0367, "step": 45206 }, { "epoch": 2.22, "grad_norm": 0.7443763613700867, "learning_rate": 9.567182798679278e-05, "loss": 2.7348, "step": 45207 }, { "epoch": 2.22, "grad_norm": 0.7035591006278992, "learning_rate": 9.56605552257096e-05, "loss": 2.8969, "step": 45208 }, { "epoch": 2.22, "grad_norm": 0.693112313747406, "learning_rate": 9.564928300281726e-05, "loss": 2.8667, "step": 45209 }, { "epoch": 2.22, "grad_norm": 0.7365559339523315, "learning_rate": 9.563801131814539e-05, "loss": 2.7897, "step": 45210 }, { "epoch": 2.22, "grad_norm": 0.7358099818229675, "learning_rate": 9.56267401717236e-05, "loss": 3.0661, "step": 45211 }, { "epoch": 2.22, "grad_norm": 0.7144678235054016, "learning_rate": 9.56154695635817e-05, "loss": 3.0905, "step": 45212 }, { "epoch": 2.22, "grad_norm": 0.7409480810165405, "learning_rate": 9.560419949374927e-05, "loss": 3.028, "step": 45213 }, { "epoch": 2.22, "grad_norm": 0.7363123297691345, "learning_rate": 9.55929299622561e-05, "loss": 2.9172, "step": 45214 }, { "epoch": 2.22, "grad_norm": 0.7069172263145447, "learning_rate": 9.558166096913173e-05, "loss": 2.9742, "step": 45215 }, { "epoch": 2.22, "grad_norm": 0.7248828411102295, "learning_rate": 9.55703925144059e-05, "loss": 3.0391, "step": 45216 }, { "epoch": 2.22, "grad_norm": 0.7333267331123352, "learning_rate": 9.555912459810845e-05, "loss": 2.8933, "step": 45217 }, { "epoch": 2.22, "grad_norm": 0.6944749355316162, "learning_rate": 9.554785722026892e-05, "loss": 2.8961, "step": 45218 }, { "epoch": 2.22, "grad_norm": 0.7157734036445618, "learning_rate": 9.553659038091695e-05, "loss": 2.8081, "step": 45219 }, { "epoch": 2.22, "grad_norm": 0.743134081363678, "learning_rate": 9.552532408008216e-05, "loss": 2.7779, "step": 45220 }, { "epoch": 2.22, "grad_norm": 0.6870113611221313, "learning_rate": 9.551405831779431e-05, "loss": 2.8549, "step": 45221 }, { "epoch": 2.22, "grad_norm": 0.7241684198379517, "learning_rate": 9.550279309408321e-05, "loss": 2.9022, "step": 45222 }, { "epoch": 2.22, "grad_norm": 0.6814631819725037, "learning_rate": 9.549152840897827e-05, "loss": 2.82, "step": 45223 }, { "epoch": 2.22, "grad_norm": 0.6824125647544861, "learning_rate": 9.54802642625094e-05, "loss": 3.0936, "step": 45224 }, { "epoch": 2.22, "grad_norm": 0.7024969458580017, "learning_rate": 9.546900065470615e-05, "loss": 2.9814, "step": 45225 }, { "epoch": 2.22, "grad_norm": 0.7450183629989624, "learning_rate": 9.545773758559809e-05, "loss": 2.8562, "step": 45226 }, { "epoch": 2.22, "grad_norm": 0.6997722387313843, "learning_rate": 9.544647505521508e-05, "loss": 3.0313, "step": 45227 }, { "epoch": 2.22, "grad_norm": 0.7680618166923523, "learning_rate": 9.543521306358657e-05, "loss": 3.1848, "step": 45228 }, { "epoch": 2.22, "grad_norm": 0.7047203779220581, "learning_rate": 9.542395161074246e-05, "loss": 2.8618, "step": 45229 }, { "epoch": 2.22, "grad_norm": 0.6786448359489441, "learning_rate": 9.541269069671221e-05, "loss": 2.9918, "step": 45230 }, { "epoch": 2.22, "grad_norm": 0.7172592878341675, "learning_rate": 9.540143032152564e-05, "loss": 3.0713, "step": 45231 }, { "epoch": 2.22, "grad_norm": 0.7199307680130005, "learning_rate": 9.539017048521233e-05, "loss": 3.0211, "step": 45232 }, { "epoch": 2.22, "grad_norm": 0.7008538842201233, "learning_rate": 9.537891118780181e-05, "loss": 2.8901, "step": 45233 }, { "epoch": 2.22, "grad_norm": 0.6756240129470825, "learning_rate": 9.536765242932396e-05, "loss": 2.9633, "step": 45234 }, { "epoch": 2.22, "grad_norm": 0.6890457272529602, "learning_rate": 9.535639420980826e-05, "loss": 2.9048, "step": 45235 }, { "epoch": 2.22, "grad_norm": 0.6927510499954224, "learning_rate": 9.534513652928442e-05, "loss": 2.8731, "step": 45236 }, { "epoch": 2.22, "grad_norm": 0.6908006072044373, "learning_rate": 9.53338793877822e-05, "loss": 2.7577, "step": 45237 }, { "epoch": 2.22, "grad_norm": 0.7486088871955872, "learning_rate": 9.532262278533107e-05, "loss": 2.8153, "step": 45238 }, { "epoch": 2.22, "grad_norm": 0.7346619963645935, "learning_rate": 9.531136672196082e-05, "loss": 2.8707, "step": 45239 }, { "epoch": 2.22, "grad_norm": 0.7260370254516602, "learning_rate": 9.530011119770107e-05, "loss": 2.7412, "step": 45240 }, { "epoch": 2.22, "grad_norm": 0.743890106678009, "learning_rate": 9.528885621258134e-05, "loss": 2.8874, "step": 45241 }, { "epoch": 2.22, "grad_norm": 0.7786975502967834, "learning_rate": 9.527760176663143e-05, "loss": 2.8397, "step": 45242 }, { "epoch": 2.22, "grad_norm": 0.6872302293777466, "learning_rate": 9.526634785988087e-05, "loss": 2.9666, "step": 45243 }, { "epoch": 2.22, "grad_norm": 0.6927504539489746, "learning_rate": 9.52550944923594e-05, "loss": 2.873, "step": 45244 }, { "epoch": 2.22, "grad_norm": 0.7176833152770996, "learning_rate": 9.524384166409653e-05, "loss": 3.0863, "step": 45245 }, { "epoch": 2.22, "grad_norm": 0.71924889087677, "learning_rate": 9.523258937512204e-05, "loss": 2.7674, "step": 45246 }, { "epoch": 2.22, "grad_norm": 0.6952587962150574, "learning_rate": 9.522133762546555e-05, "loss": 2.9272, "step": 45247 }, { "epoch": 2.22, "grad_norm": 0.7094781398773193, "learning_rate": 9.521008641515653e-05, "loss": 2.8632, "step": 45248 }, { "epoch": 2.22, "grad_norm": 0.7094677686691284, "learning_rate": 9.519883574422481e-05, "loss": 2.9484, "step": 45249 }, { "epoch": 2.22, "grad_norm": 0.6779662370681763, "learning_rate": 9.518758561269988e-05, "loss": 2.7069, "step": 45250 }, { "epoch": 2.22, "grad_norm": 0.6990089416503906, "learning_rate": 9.517633602061141e-05, "loss": 3.1154, "step": 45251 }, { "epoch": 2.22, "grad_norm": 0.7468245625495911, "learning_rate": 9.516508696798914e-05, "loss": 3.0644, "step": 45252 }, { "epoch": 2.22, "grad_norm": 0.7292611598968506, "learning_rate": 9.515383845486263e-05, "loss": 3.1084, "step": 45253 }, { "epoch": 2.22, "grad_norm": 0.6730436682701111, "learning_rate": 9.514259048126147e-05, "loss": 2.9853, "step": 45254 }, { "epoch": 2.22, "grad_norm": 0.6685360074043274, "learning_rate": 9.513134304721521e-05, "loss": 2.9323, "step": 45255 }, { "epoch": 2.22, "grad_norm": 0.7453835606575012, "learning_rate": 9.512009615275356e-05, "loss": 2.8677, "step": 45256 }, { "epoch": 2.22, "grad_norm": 0.7116197347640991, "learning_rate": 9.510884979790624e-05, "loss": 2.7648, "step": 45257 }, { "epoch": 2.22, "grad_norm": 0.7128415703773499, "learning_rate": 9.50976039827027e-05, "loss": 2.8006, "step": 45258 }, { "epoch": 2.22, "grad_norm": 0.6778522729873657, "learning_rate": 9.508635870717272e-05, "loss": 3.0609, "step": 45259 }, { "epoch": 2.22, "grad_norm": 0.6794968843460083, "learning_rate": 9.507511397134574e-05, "loss": 2.8762, "step": 45260 }, { "epoch": 2.22, "grad_norm": 0.6920288801193237, "learning_rate": 9.506386977525156e-05, "loss": 2.7519, "step": 45261 }, { "epoch": 2.22, "grad_norm": 0.7245926856994629, "learning_rate": 9.50526261189197e-05, "loss": 2.9135, "step": 45262 }, { "epoch": 2.22, "grad_norm": 0.7115115523338318, "learning_rate": 9.50413830023797e-05, "loss": 3.035, "step": 45263 }, { "epoch": 2.22, "grad_norm": 0.6681340336799622, "learning_rate": 9.503014042566135e-05, "loss": 2.8618, "step": 45264 }, { "epoch": 2.22, "grad_norm": 0.6481229662895203, "learning_rate": 9.501889838879407e-05, "loss": 2.9743, "step": 45265 }, { "epoch": 2.22, "grad_norm": 0.7262416481971741, "learning_rate": 9.500765689180754e-05, "loss": 2.9994, "step": 45266 }, { "epoch": 2.22, "grad_norm": 0.7244082093238831, "learning_rate": 9.499641593473152e-05, "loss": 2.7771, "step": 45267 }, { "epoch": 2.22, "grad_norm": 0.7363994121551514, "learning_rate": 9.498517551759548e-05, "loss": 3.0425, "step": 45268 }, { "epoch": 2.22, "grad_norm": 0.7266939878463745, "learning_rate": 9.497393564042901e-05, "loss": 2.9354, "step": 45269 }, { "epoch": 2.22, "grad_norm": 0.7804268002510071, "learning_rate": 9.496269630326165e-05, "loss": 2.9353, "step": 45270 }, { "epoch": 2.22, "grad_norm": 0.6953659653663635, "learning_rate": 9.495145750612319e-05, "loss": 3.292, "step": 45271 }, { "epoch": 2.22, "grad_norm": 0.6974464654922485, "learning_rate": 9.494021924904302e-05, "loss": 2.9793, "step": 45272 }, { "epoch": 2.22, "grad_norm": 0.7132477760314941, "learning_rate": 9.492898153205085e-05, "loss": 2.9696, "step": 45273 }, { "epoch": 2.22, "grad_norm": 0.6814945340156555, "learning_rate": 9.491774435517637e-05, "loss": 2.5446, "step": 45274 }, { "epoch": 2.22, "grad_norm": 0.6983672976493835, "learning_rate": 9.490650771844899e-05, "loss": 2.9157, "step": 45275 }, { "epoch": 2.22, "grad_norm": 0.7214673161506653, "learning_rate": 9.48952716218986e-05, "loss": 2.794, "step": 45276 }, { "epoch": 2.22, "grad_norm": 0.7465963363647461, "learning_rate": 9.48840360655544e-05, "loss": 2.991, "step": 45277 }, { "epoch": 2.22, "grad_norm": 0.7039017677307129, "learning_rate": 9.487280104944616e-05, "loss": 2.845, "step": 45278 }, { "epoch": 2.22, "grad_norm": 0.737330436706543, "learning_rate": 9.486156657360357e-05, "loss": 2.8189, "step": 45279 }, { "epoch": 2.22, "grad_norm": 0.6810418367385864, "learning_rate": 9.485033263805607e-05, "loss": 3.0818, "step": 45280 }, { "epoch": 2.22, "grad_norm": 0.6977468729019165, "learning_rate": 9.483909924283338e-05, "loss": 2.782, "step": 45281 }, { "epoch": 2.22, "grad_norm": 0.7160059213638306, "learning_rate": 9.482786638796496e-05, "loss": 2.9945, "step": 45282 }, { "epoch": 2.22, "grad_norm": 0.7127458453178406, "learning_rate": 9.48166340734805e-05, "loss": 2.7842, "step": 45283 }, { "epoch": 2.22, "grad_norm": 0.6878350973129272, "learning_rate": 9.480540229940956e-05, "loss": 2.7996, "step": 45284 }, { "epoch": 2.22, "grad_norm": 0.6775646209716797, "learning_rate": 9.47941710657816e-05, "loss": 2.926, "step": 45285 }, { "epoch": 2.22, "grad_norm": 0.7190422415733337, "learning_rate": 9.478294037262642e-05, "loss": 2.9772, "step": 45286 }, { "epoch": 2.22, "grad_norm": 0.6985235214233398, "learning_rate": 9.477171021997335e-05, "loss": 3.0118, "step": 45287 }, { "epoch": 2.22, "grad_norm": 0.7282534241676331, "learning_rate": 9.476048060785213e-05, "loss": 2.8941, "step": 45288 }, { "epoch": 2.22, "grad_norm": 0.7488526701927185, "learning_rate": 9.474925153629238e-05, "loss": 3.0031, "step": 45289 }, { "epoch": 2.22, "grad_norm": 0.7149317860603333, "learning_rate": 9.473802300532361e-05, "loss": 3.0255, "step": 45290 }, { "epoch": 2.22, "grad_norm": 0.6947160959243774, "learning_rate": 9.472679501497538e-05, "loss": 2.9155, "step": 45291 }, { "epoch": 2.22, "grad_norm": 0.7043709754943848, "learning_rate": 9.471556756527716e-05, "loss": 2.9663, "step": 45292 }, { "epoch": 2.22, "grad_norm": 0.7300408482551575, "learning_rate": 9.470434065625865e-05, "loss": 2.7279, "step": 45293 }, { "epoch": 2.22, "grad_norm": 0.6901246309280396, "learning_rate": 9.469311428794948e-05, "loss": 2.9823, "step": 45294 }, { "epoch": 2.22, "grad_norm": 0.719524621963501, "learning_rate": 9.468188846037904e-05, "loss": 3.1144, "step": 45295 }, { "epoch": 2.22, "grad_norm": 0.7352761030197144, "learning_rate": 9.467066317357707e-05, "loss": 2.8261, "step": 45296 }, { "epoch": 2.22, "grad_norm": 0.7071006894111633, "learning_rate": 9.4659438427573e-05, "loss": 3.0008, "step": 45297 }, { "epoch": 2.22, "grad_norm": 0.7212498188018799, "learning_rate": 9.46482142223965e-05, "loss": 2.8313, "step": 45298 }, { "epoch": 2.22, "grad_norm": 0.7842340469360352, "learning_rate": 9.463699055807713e-05, "loss": 2.8263, "step": 45299 }, { "epoch": 2.22, "grad_norm": 0.6890537142753601, "learning_rate": 9.462576743464426e-05, "loss": 2.9741, "step": 45300 }, { "epoch": 2.22, "grad_norm": 0.7267663478851318, "learning_rate": 9.461454485212774e-05, "loss": 2.9529, "step": 45301 }, { "epoch": 2.22, "grad_norm": 0.7003971934318542, "learning_rate": 9.460332281055684e-05, "loss": 2.9145, "step": 45302 }, { "epoch": 2.22, "grad_norm": 0.7068154215812683, "learning_rate": 9.45921013099613e-05, "loss": 2.8808, "step": 45303 }, { "epoch": 2.22, "grad_norm": 0.7120148539543152, "learning_rate": 9.45808803503707e-05, "loss": 2.8749, "step": 45304 }, { "epoch": 2.22, "grad_norm": 0.6946321725845337, "learning_rate": 9.456965993181453e-05, "loss": 2.9578, "step": 45305 }, { "epoch": 2.22, "grad_norm": 0.699981689453125, "learning_rate": 9.455844005432234e-05, "loss": 2.8174, "step": 45306 }, { "epoch": 2.22, "grad_norm": 0.6788482666015625, "learning_rate": 9.45472207179236e-05, "loss": 2.8667, "step": 45307 }, { "epoch": 2.22, "grad_norm": 0.7016308307647705, "learning_rate": 9.453600192264792e-05, "loss": 2.7427, "step": 45308 }, { "epoch": 2.22, "grad_norm": 0.7483291625976562, "learning_rate": 9.452478366852498e-05, "loss": 2.8937, "step": 45309 }, { "epoch": 2.22, "grad_norm": 0.6895776987075806, "learning_rate": 9.451356595558412e-05, "loss": 2.9125, "step": 45310 }, { "epoch": 2.22, "grad_norm": 0.7158215641975403, "learning_rate": 9.450234878385505e-05, "loss": 2.9937, "step": 45311 }, { "epoch": 2.22, "grad_norm": 0.7149104475975037, "learning_rate": 9.449113215336727e-05, "loss": 2.8569, "step": 45312 }, { "epoch": 2.22, "grad_norm": 0.7052308320999146, "learning_rate": 9.447991606415021e-05, "loss": 3.0051, "step": 45313 }, { "epoch": 2.22, "grad_norm": 0.7073119878768921, "learning_rate": 9.446870051623358e-05, "loss": 2.9697, "step": 45314 }, { "epoch": 2.22, "grad_norm": 0.7047192454338074, "learning_rate": 9.445748550964675e-05, "loss": 3.0161, "step": 45315 }, { "epoch": 2.22, "grad_norm": 0.7001875042915344, "learning_rate": 9.444627104441944e-05, "loss": 2.9102, "step": 45316 }, { "epoch": 2.22, "grad_norm": 0.6942550539970398, "learning_rate": 9.443505712058098e-05, "loss": 3.0716, "step": 45317 }, { "epoch": 2.22, "grad_norm": 0.6810586452484131, "learning_rate": 9.4423843738161e-05, "loss": 2.7442, "step": 45318 }, { "epoch": 2.22, "grad_norm": 0.688390851020813, "learning_rate": 9.441263089718919e-05, "loss": 2.7613, "step": 45319 }, { "epoch": 2.22, "grad_norm": 0.7119620442390442, "learning_rate": 9.440141859769494e-05, "loss": 3.0136, "step": 45320 }, { "epoch": 2.22, "grad_norm": 0.7164089679718018, "learning_rate": 9.439020683970778e-05, "loss": 2.7685, "step": 45321 }, { "epoch": 2.22, "grad_norm": 0.7110692858695984, "learning_rate": 9.437899562325715e-05, "loss": 2.7599, "step": 45322 }, { "epoch": 2.22, "grad_norm": 0.8134690523147583, "learning_rate": 9.436778494837264e-05, "loss": 2.9939, "step": 45323 }, { "epoch": 2.22, "grad_norm": 0.7323058843612671, "learning_rate": 9.435657481508393e-05, "loss": 2.9119, "step": 45324 }, { "epoch": 2.22, "grad_norm": 0.6736330389976501, "learning_rate": 9.434536522342033e-05, "loss": 3.0363, "step": 45325 }, { "epoch": 2.22, "grad_norm": 0.710964560508728, "learning_rate": 9.433415617341156e-05, "loss": 2.7434, "step": 45326 }, { "epoch": 2.22, "grad_norm": 0.6646226644515991, "learning_rate": 9.432294766508705e-05, "loss": 3.0675, "step": 45327 }, { "epoch": 2.22, "grad_norm": 0.7395647168159485, "learning_rate": 9.43117396984762e-05, "loss": 2.9635, "step": 45328 }, { "epoch": 2.22, "grad_norm": 0.691923975944519, "learning_rate": 9.430053227360875e-05, "loss": 2.7976, "step": 45329 }, { "epoch": 2.22, "grad_norm": 0.6718361377716064, "learning_rate": 9.428932539051399e-05, "loss": 2.9561, "step": 45330 }, { "epoch": 2.22, "grad_norm": 0.7138594388961792, "learning_rate": 9.427811904922167e-05, "loss": 3.0096, "step": 45331 }, { "epoch": 2.22, "grad_norm": 0.7084429860115051, "learning_rate": 9.426691324976113e-05, "loss": 2.8364, "step": 45332 }, { "epoch": 2.22, "grad_norm": 0.7155402898788452, "learning_rate": 9.4255707992162e-05, "loss": 3.0008, "step": 45333 }, { "epoch": 2.22, "grad_norm": 0.7018200755119324, "learning_rate": 9.424450327645374e-05, "loss": 2.7881, "step": 45334 }, { "epoch": 2.22, "grad_norm": 0.7858231663703918, "learning_rate": 9.423329910266576e-05, "loss": 2.9398, "step": 45335 }, { "epoch": 2.22, "grad_norm": 0.69808030128479, "learning_rate": 9.42220954708278e-05, "loss": 2.9118, "step": 45336 }, { "epoch": 2.22, "grad_norm": 0.7413972020149231, "learning_rate": 9.42108923809691e-05, "loss": 2.9831, "step": 45337 }, { "epoch": 2.22, "grad_norm": 0.7176880836486816, "learning_rate": 9.419968983311934e-05, "loss": 2.7619, "step": 45338 }, { "epoch": 2.22, "grad_norm": 0.7590557336807251, "learning_rate": 9.418848782730805e-05, "loss": 3.0727, "step": 45339 }, { "epoch": 2.22, "grad_norm": 0.6953451633453369, "learning_rate": 9.417728636356459e-05, "loss": 3.0103, "step": 45340 }, { "epoch": 2.22, "grad_norm": 0.7350430488586426, "learning_rate": 9.416608544191866e-05, "loss": 3.0276, "step": 45341 }, { "epoch": 2.22, "grad_norm": 0.6567845344543457, "learning_rate": 9.415488506239964e-05, "loss": 2.8636, "step": 45342 }, { "epoch": 2.22, "grad_norm": 0.7473620772361755, "learning_rate": 9.414368522503695e-05, "loss": 2.96, "step": 45343 }, { "epoch": 2.22, "grad_norm": 0.7204223871231079, "learning_rate": 9.413248592986025e-05, "loss": 3.0137, "step": 45344 }, { "epoch": 2.22, "grad_norm": 0.7221093773841858, "learning_rate": 9.412128717689888e-05, "loss": 3.0327, "step": 45345 }, { "epoch": 2.22, "grad_norm": 0.7267528176307678, "learning_rate": 9.41100889661825e-05, "loss": 2.7521, "step": 45346 }, { "epoch": 2.22, "grad_norm": 0.7274415493011475, "learning_rate": 9.409889129774043e-05, "loss": 2.9669, "step": 45347 }, { "epoch": 2.22, "grad_norm": 0.7493104934692383, "learning_rate": 9.408769417160237e-05, "loss": 2.8927, "step": 45348 }, { "epoch": 2.22, "grad_norm": 0.7367200255393982, "learning_rate": 9.407649758779766e-05, "loss": 3.0401, "step": 45349 }, { "epoch": 2.22, "grad_norm": 0.6605836749076843, "learning_rate": 9.406530154635574e-05, "loss": 2.9622, "step": 45350 }, { "epoch": 2.22, "grad_norm": 0.7259601950645447, "learning_rate": 9.405410604730629e-05, "loss": 3.0617, "step": 45351 }, { "epoch": 2.22, "grad_norm": 1.0172241926193237, "learning_rate": 9.40429110906786e-05, "loss": 2.8292, "step": 45352 }, { "epoch": 2.22, "grad_norm": 0.6789811849594116, "learning_rate": 9.403171667650235e-05, "loss": 3.0832, "step": 45353 }, { "epoch": 2.22, "grad_norm": 0.7042773962020874, "learning_rate": 9.402052280480677e-05, "loss": 2.8346, "step": 45354 }, { "epoch": 2.22, "grad_norm": 0.7092491388320923, "learning_rate": 9.400932947562165e-05, "loss": 2.7202, "step": 45355 }, { "epoch": 2.22, "grad_norm": 0.7057832479476929, "learning_rate": 9.399813668897625e-05, "loss": 3.0595, "step": 45356 }, { "epoch": 2.22, "grad_norm": 0.751794159412384, "learning_rate": 9.398694444490006e-05, "loss": 2.8588, "step": 45357 }, { "epoch": 2.22, "grad_norm": 0.7302895784378052, "learning_rate": 9.39757527434227e-05, "loss": 2.9267, "step": 45358 }, { "epoch": 2.22, "grad_norm": 0.6791434288024902, "learning_rate": 9.396456158457348e-05, "loss": 2.9432, "step": 45359 }, { "epoch": 2.22, "grad_norm": 0.7375596761703491, "learning_rate": 9.395337096838191e-05, "loss": 2.9125, "step": 45360 }, { "epoch": 2.22, "grad_norm": 0.7245837450027466, "learning_rate": 9.394218089487763e-05, "loss": 2.8705, "step": 45361 }, { "epoch": 2.22, "grad_norm": 0.7496141195297241, "learning_rate": 9.393099136408989e-05, "loss": 2.8422, "step": 45362 }, { "epoch": 2.22, "grad_norm": 0.7486103773117065, "learning_rate": 9.391980237604834e-05, "loss": 2.9145, "step": 45363 }, { "epoch": 2.22, "grad_norm": 0.6848008632659912, "learning_rate": 9.390861393078237e-05, "loss": 2.8837, "step": 45364 }, { "epoch": 2.22, "grad_norm": 0.70891273021698, "learning_rate": 9.389742602832137e-05, "loss": 2.8657, "step": 45365 }, { "epoch": 2.22, "grad_norm": 0.7543848752975464, "learning_rate": 9.388623866869498e-05, "loss": 3.0147, "step": 45366 }, { "epoch": 2.22, "grad_norm": 0.7381017804145813, "learning_rate": 9.387505185193246e-05, "loss": 2.9008, "step": 45367 }, { "epoch": 2.22, "grad_norm": 0.7030778527259827, "learning_rate": 9.38638655780635e-05, "loss": 2.7483, "step": 45368 }, { "epoch": 2.22, "grad_norm": 0.6804453730583191, "learning_rate": 9.385267984711733e-05, "loss": 2.8687, "step": 45369 }, { "epoch": 2.22, "grad_norm": 0.7367046475410461, "learning_rate": 9.384149465912362e-05, "loss": 2.8755, "step": 45370 }, { "epoch": 2.22, "grad_norm": 0.7181659936904907, "learning_rate": 9.383031001411177e-05, "loss": 3.0086, "step": 45371 }, { "epoch": 2.22, "grad_norm": 0.7256984114646912, "learning_rate": 9.381912591211108e-05, "loss": 3.0784, "step": 45372 }, { "epoch": 2.22, "grad_norm": 0.6880785226821899, "learning_rate": 9.380794235315125e-05, "loss": 3.0513, "step": 45373 }, { "epoch": 2.22, "grad_norm": 0.7282678484916687, "learning_rate": 9.379675933726149e-05, "loss": 2.742, "step": 45374 }, { "epoch": 2.22, "grad_norm": 0.7389535307884216, "learning_rate": 9.378557686447138e-05, "loss": 2.7666, "step": 45375 }, { "epoch": 2.22, "grad_norm": 0.6855918169021606, "learning_rate": 9.377439493481051e-05, "loss": 2.9682, "step": 45376 }, { "epoch": 2.22, "grad_norm": 0.6976229548454285, "learning_rate": 9.376321354830815e-05, "loss": 3.0365, "step": 45377 }, { "epoch": 2.22, "grad_norm": 0.6974002122879028, "learning_rate": 9.375203270499382e-05, "loss": 2.7856, "step": 45378 }, { "epoch": 2.22, "grad_norm": 0.7219595313072205, "learning_rate": 9.374085240489686e-05, "loss": 3.002, "step": 45379 }, { "epoch": 2.22, "grad_norm": 0.7123830318450928, "learning_rate": 9.372967264804678e-05, "loss": 2.9279, "step": 45380 }, { "epoch": 2.22, "grad_norm": 0.7480422258377075, "learning_rate": 9.371849343447313e-05, "loss": 2.6871, "step": 45381 }, { "epoch": 2.22, "grad_norm": 0.7664074897766113, "learning_rate": 9.370731476420521e-05, "loss": 2.9509, "step": 45382 }, { "epoch": 2.22, "grad_norm": 0.7201911211013794, "learning_rate": 9.369613663727257e-05, "loss": 2.9775, "step": 45383 }, { "epoch": 2.22, "grad_norm": 0.7252894639968872, "learning_rate": 9.368495905370454e-05, "loss": 2.9017, "step": 45384 }, { "epoch": 2.22, "grad_norm": 0.7937513589859009, "learning_rate": 9.367378201353072e-05, "loss": 3.3099, "step": 45385 }, { "epoch": 2.22, "grad_norm": 0.7466810345649719, "learning_rate": 9.366260551678043e-05, "loss": 2.7903, "step": 45386 }, { "epoch": 2.22, "grad_norm": 0.7079269289970398, "learning_rate": 9.365142956348303e-05, "loss": 2.878, "step": 45387 }, { "epoch": 2.22, "grad_norm": 0.75801682472229, "learning_rate": 9.364025415366815e-05, "loss": 2.7751, "step": 45388 }, { "epoch": 2.22, "grad_norm": 0.7508972883224487, "learning_rate": 9.362907928736504e-05, "loss": 2.8401, "step": 45389 }, { "epoch": 2.22, "grad_norm": 0.6906934380531311, "learning_rate": 9.361790496460321e-05, "loss": 2.8999, "step": 45390 }, { "epoch": 2.22, "grad_norm": 0.7488749027252197, "learning_rate": 9.36067311854122e-05, "loss": 2.8891, "step": 45391 }, { "epoch": 2.22, "grad_norm": 0.7199647426605225, "learning_rate": 9.359555794982136e-05, "loss": 2.8431, "step": 45392 }, { "epoch": 2.22, "grad_norm": 0.7045316100120544, "learning_rate": 9.358438525786006e-05, "loss": 2.8693, "step": 45393 }, { "epoch": 2.22, "grad_norm": 0.7193567156791687, "learning_rate": 9.357321310955766e-05, "loss": 3.1406, "step": 45394 }, { "epoch": 2.22, "grad_norm": 0.6742414832115173, "learning_rate": 9.356204150494374e-05, "loss": 2.8068, "step": 45395 }, { "epoch": 2.22, "grad_norm": 0.7182538509368896, "learning_rate": 9.355087044404772e-05, "loss": 3.1212, "step": 45396 }, { "epoch": 2.22, "grad_norm": 0.7040326595306396, "learning_rate": 9.353969992689891e-05, "loss": 3.1036, "step": 45397 }, { "epoch": 2.22, "grad_norm": 0.7186934947967529, "learning_rate": 9.352852995352689e-05, "loss": 2.7082, "step": 45398 }, { "epoch": 2.22, "grad_norm": 0.7571741342544556, "learning_rate": 9.351736052396088e-05, "loss": 3.0488, "step": 45399 }, { "epoch": 2.22, "grad_norm": 0.7321252822875977, "learning_rate": 9.350619163823051e-05, "loss": 2.9676, "step": 45400 }, { "epoch": 2.22, "grad_norm": 0.7001104950904846, "learning_rate": 9.349502329636505e-05, "loss": 2.819, "step": 45401 }, { "epoch": 2.23, "grad_norm": 0.718051016330719, "learning_rate": 9.34838554983939e-05, "loss": 2.897, "step": 45402 }, { "epoch": 2.23, "grad_norm": 0.7028896808624268, "learning_rate": 9.347268824434662e-05, "loss": 2.8919, "step": 45403 }, { "epoch": 2.23, "grad_norm": 0.7708454728126526, "learning_rate": 9.346152153425245e-05, "loss": 2.753, "step": 45404 }, { "epoch": 2.23, "grad_norm": 0.712142288684845, "learning_rate": 9.345035536814087e-05, "loss": 2.8239, "step": 45405 }, { "epoch": 2.23, "grad_norm": 0.7452940344810486, "learning_rate": 9.34391897460414e-05, "loss": 2.6923, "step": 45406 }, { "epoch": 2.23, "grad_norm": 0.7222115993499756, "learning_rate": 9.342802466798334e-05, "loss": 2.8162, "step": 45407 }, { "epoch": 2.23, "grad_norm": 0.7443071603775024, "learning_rate": 9.341686013399609e-05, "loss": 3.0949, "step": 45408 }, { "epoch": 2.23, "grad_norm": 0.7208560109138489, "learning_rate": 9.340569614410902e-05, "loss": 3.0639, "step": 45409 }, { "epoch": 2.23, "grad_norm": 0.7204750180244446, "learning_rate": 9.339453269835158e-05, "loss": 2.9431, "step": 45410 }, { "epoch": 2.23, "grad_norm": 0.6994853615760803, "learning_rate": 9.338336979675323e-05, "loss": 2.8151, "step": 45411 }, { "epoch": 2.23, "grad_norm": 0.6930583119392395, "learning_rate": 9.337220743934328e-05, "loss": 3.2522, "step": 45412 }, { "epoch": 2.23, "grad_norm": 0.783711314201355, "learning_rate": 9.336104562615123e-05, "loss": 2.9764, "step": 45413 }, { "epoch": 2.23, "grad_norm": 0.766595721244812, "learning_rate": 9.334988435720643e-05, "loss": 2.9608, "step": 45414 }, { "epoch": 2.23, "grad_norm": 0.7293518781661987, "learning_rate": 9.333872363253817e-05, "loss": 3.0567, "step": 45415 }, { "epoch": 2.23, "grad_norm": 0.6588782668113708, "learning_rate": 9.332756345217604e-05, "loss": 3.0559, "step": 45416 }, { "epoch": 2.23, "grad_norm": 0.7530074715614319, "learning_rate": 9.331640381614923e-05, "loss": 2.9012, "step": 45417 }, { "epoch": 2.23, "grad_norm": 0.740515410900116, "learning_rate": 9.330524472448735e-05, "loss": 2.8191, "step": 45418 }, { "epoch": 2.23, "grad_norm": 0.7402000427246094, "learning_rate": 9.329408617721953e-05, "loss": 2.8501, "step": 45419 }, { "epoch": 2.23, "grad_norm": 0.7017174959182739, "learning_rate": 9.328292817437535e-05, "loss": 3.007, "step": 45420 }, { "epoch": 2.23, "grad_norm": 0.7643221616744995, "learning_rate": 9.327177071598425e-05, "loss": 3.0573, "step": 45421 }, { "epoch": 2.23, "grad_norm": 0.7307689785957336, "learning_rate": 9.326061380207548e-05, "loss": 3.0476, "step": 45422 }, { "epoch": 2.23, "grad_norm": 0.7625921368598938, "learning_rate": 9.324945743267852e-05, "loss": 2.6859, "step": 45423 }, { "epoch": 2.23, "grad_norm": 0.7725873589515686, "learning_rate": 9.323830160782256e-05, "loss": 2.987, "step": 45424 }, { "epoch": 2.23, "grad_norm": 0.7039408683776855, "learning_rate": 9.322714632753721e-05, "loss": 3.0487, "step": 45425 }, { "epoch": 2.23, "grad_norm": 0.725426435470581, "learning_rate": 9.321599159185171e-05, "loss": 2.9296, "step": 45426 }, { "epoch": 2.23, "grad_norm": 0.7159402966499329, "learning_rate": 9.320483740079549e-05, "loss": 2.8903, "step": 45427 }, { "epoch": 2.23, "grad_norm": 0.7357923984527588, "learning_rate": 9.319368375439798e-05, "loss": 2.6201, "step": 45428 }, { "epoch": 2.23, "grad_norm": 0.7285069227218628, "learning_rate": 9.318253065268856e-05, "loss": 2.908, "step": 45429 }, { "epoch": 2.23, "grad_norm": 0.7228646278381348, "learning_rate": 9.317137809569651e-05, "loss": 3.1464, "step": 45430 }, { "epoch": 2.23, "grad_norm": 0.6842017769813538, "learning_rate": 9.316022608345115e-05, "loss": 3.0632, "step": 45431 }, { "epoch": 2.23, "grad_norm": 0.7215100526809692, "learning_rate": 9.314907461598198e-05, "loss": 2.8692, "step": 45432 }, { "epoch": 2.23, "grad_norm": 0.6963567137718201, "learning_rate": 9.31379236933184e-05, "loss": 2.8567, "step": 45433 }, { "epoch": 2.23, "grad_norm": 0.6841099262237549, "learning_rate": 9.312677331548965e-05, "loss": 2.6787, "step": 45434 }, { "epoch": 2.23, "grad_norm": 0.6942437291145325, "learning_rate": 9.311562348252523e-05, "loss": 3.0439, "step": 45435 }, { "epoch": 2.23, "grad_norm": 0.7410614490509033, "learning_rate": 9.310447419445446e-05, "loss": 2.9797, "step": 45436 }, { "epoch": 2.23, "grad_norm": 0.685966432094574, "learning_rate": 9.30933254513066e-05, "loss": 2.8331, "step": 45437 }, { "epoch": 2.23, "grad_norm": 0.7045655846595764, "learning_rate": 9.30821772531112e-05, "loss": 2.9347, "step": 45438 }, { "epoch": 2.23, "grad_norm": 0.6830521821975708, "learning_rate": 9.307102959989741e-05, "loss": 2.9615, "step": 45439 }, { "epoch": 2.23, "grad_norm": 0.6648380756378174, "learning_rate": 9.305988249169481e-05, "loss": 3.034, "step": 45440 }, { "epoch": 2.23, "grad_norm": 0.7092429995536804, "learning_rate": 9.304873592853253e-05, "loss": 2.9286, "step": 45441 }, { "epoch": 2.23, "grad_norm": 0.7085046172142029, "learning_rate": 9.303758991044007e-05, "loss": 2.8802, "step": 45442 }, { "epoch": 2.23, "grad_norm": 0.7178999781608582, "learning_rate": 9.302644443744689e-05, "loss": 2.7642, "step": 45443 }, { "epoch": 2.23, "grad_norm": 0.7461972236633301, "learning_rate": 9.301529950958218e-05, "loss": 2.9643, "step": 45444 }, { "epoch": 2.23, "grad_norm": 0.7783269286155701, "learning_rate": 9.300415512687538e-05, "loss": 3.0405, "step": 45445 }, { "epoch": 2.23, "grad_norm": 0.7068384885787964, "learning_rate": 9.299301128935568e-05, "loss": 2.7719, "step": 45446 }, { "epoch": 2.23, "grad_norm": 0.7588903903961182, "learning_rate": 9.298186799705251e-05, "loss": 2.9524, "step": 45447 }, { "epoch": 2.23, "grad_norm": 0.7695575952529907, "learning_rate": 9.297072524999541e-05, "loss": 2.719, "step": 45448 }, { "epoch": 2.23, "grad_norm": 0.6897878646850586, "learning_rate": 9.295958304821344e-05, "loss": 2.7618, "step": 45449 }, { "epoch": 2.23, "grad_norm": 0.7157047390937805, "learning_rate": 9.294844139173622e-05, "loss": 2.966, "step": 45450 }, { "epoch": 2.23, "grad_norm": 0.7025874257087708, "learning_rate": 9.293730028059295e-05, "loss": 3.0808, "step": 45451 }, { "epoch": 2.23, "grad_norm": 0.6616024374961853, "learning_rate": 9.292615971481287e-05, "loss": 3.149, "step": 45452 }, { "epoch": 2.23, "grad_norm": 0.6998037099838257, "learning_rate": 9.291501969442551e-05, "loss": 2.9832, "step": 45453 }, { "epoch": 2.23, "grad_norm": 0.7034969925880432, "learning_rate": 9.290388021946005e-05, "loss": 2.8652, "step": 45454 }, { "epoch": 2.23, "grad_norm": 0.6883959174156189, "learning_rate": 9.289274128994603e-05, "loss": 2.9851, "step": 45455 }, { "epoch": 2.23, "grad_norm": 0.6983670592308044, "learning_rate": 9.288160290591255e-05, "loss": 2.6393, "step": 45456 }, { "epoch": 2.23, "grad_norm": 0.7119463086128235, "learning_rate": 9.287046506738919e-05, "loss": 2.8454, "step": 45457 }, { "epoch": 2.23, "grad_norm": 0.6975045204162598, "learning_rate": 9.285932777440517e-05, "loss": 2.897, "step": 45458 }, { "epoch": 2.23, "grad_norm": 0.7082361578941345, "learning_rate": 9.284819102698967e-05, "loss": 2.7897, "step": 45459 }, { "epoch": 2.23, "grad_norm": 0.6975535154342651, "learning_rate": 9.283705482517231e-05, "loss": 2.9471, "step": 45460 }, { "epoch": 2.23, "grad_norm": 0.6530376076698303, "learning_rate": 9.282591916898216e-05, "loss": 2.8358, "step": 45461 }, { "epoch": 2.23, "grad_norm": 0.7379580140113831, "learning_rate": 9.281478405844866e-05, "loss": 2.7979, "step": 45462 }, { "epoch": 2.23, "grad_norm": 0.7067075967788696, "learning_rate": 9.280364949360125e-05, "loss": 2.9221, "step": 45463 }, { "epoch": 2.23, "grad_norm": 0.7226753234863281, "learning_rate": 9.279251547446906e-05, "loss": 2.8154, "step": 45464 }, { "epoch": 2.23, "grad_norm": 0.72322678565979, "learning_rate": 9.278138200108161e-05, "loss": 2.9727, "step": 45465 }, { "epoch": 2.23, "grad_norm": 0.6826564073562622, "learning_rate": 9.277024907346812e-05, "loss": 2.9562, "step": 45466 }, { "epoch": 2.23, "grad_norm": 0.7021982669830322, "learning_rate": 9.27591166916578e-05, "loss": 2.8354, "step": 45467 }, { "epoch": 2.23, "grad_norm": 0.7087491154670715, "learning_rate": 9.274798485568019e-05, "loss": 2.7002, "step": 45468 }, { "epoch": 2.23, "grad_norm": 0.7043088674545288, "learning_rate": 9.273685356556437e-05, "loss": 2.8718, "step": 45469 }, { "epoch": 2.23, "grad_norm": 0.7019631266593933, "learning_rate": 9.272572282133992e-05, "loss": 2.7755, "step": 45470 }, { "epoch": 2.23, "grad_norm": 0.6537641882896423, "learning_rate": 9.271459262303594e-05, "loss": 3.0271, "step": 45471 }, { "epoch": 2.23, "grad_norm": 0.7400332689285278, "learning_rate": 9.270346297068192e-05, "loss": 2.8694, "step": 45472 }, { "epoch": 2.23, "grad_norm": 0.7204238772392273, "learning_rate": 9.269233386430705e-05, "loss": 2.9346, "step": 45473 }, { "epoch": 2.23, "grad_norm": 0.6749469637870789, "learning_rate": 9.26812053039406e-05, "loss": 2.7155, "step": 45474 }, { "epoch": 2.23, "grad_norm": 0.6849311590194702, "learning_rate": 9.267007728961205e-05, "loss": 2.9678, "step": 45475 }, { "epoch": 2.23, "grad_norm": 0.6702030301094055, "learning_rate": 9.265894982135054e-05, "loss": 2.946, "step": 45476 }, { "epoch": 2.23, "grad_norm": 0.7221550345420837, "learning_rate": 9.264782289918542e-05, "loss": 3.0065, "step": 45477 }, { "epoch": 2.23, "grad_norm": 0.6980705857276917, "learning_rate": 9.263669652314615e-05, "loss": 3.0259, "step": 45478 }, { "epoch": 2.23, "grad_norm": 0.7406960725784302, "learning_rate": 9.26255706932619e-05, "loss": 2.8508, "step": 45479 }, { "epoch": 2.23, "grad_norm": 0.7291260957717896, "learning_rate": 9.261444540956201e-05, "loss": 2.8585, "step": 45480 }, { "epoch": 2.23, "grad_norm": 0.7162569165229797, "learning_rate": 9.260332067207563e-05, "loss": 3.0759, "step": 45481 }, { "epoch": 2.23, "grad_norm": 0.6985812783241272, "learning_rate": 9.25921964808322e-05, "loss": 2.9614, "step": 45482 }, { "epoch": 2.23, "grad_norm": 0.7387300133705139, "learning_rate": 9.258107283586111e-05, "loss": 2.8777, "step": 45483 }, { "epoch": 2.23, "grad_norm": 0.7236722111701965, "learning_rate": 9.256994973719148e-05, "loss": 2.9205, "step": 45484 }, { "epoch": 2.23, "grad_norm": 0.8212042450904846, "learning_rate": 9.255882718485272e-05, "loss": 2.83, "step": 45485 }, { "epoch": 2.23, "grad_norm": 0.7353643774986267, "learning_rate": 9.254770517887403e-05, "loss": 2.8415, "step": 45486 }, { "epoch": 2.23, "grad_norm": 0.6906113624572754, "learning_rate": 9.253658371928487e-05, "loss": 2.7953, "step": 45487 }, { "epoch": 2.23, "grad_norm": 0.7319288849830627, "learning_rate": 9.25254628061144e-05, "loss": 3.0537, "step": 45488 }, { "epoch": 2.23, "grad_norm": 0.6931775212287903, "learning_rate": 9.251434243939183e-05, "loss": 2.9983, "step": 45489 }, { "epoch": 2.23, "grad_norm": 0.6803140640258789, "learning_rate": 9.250322261914666e-05, "loss": 2.9426, "step": 45490 }, { "epoch": 2.23, "grad_norm": 0.7066940665245056, "learning_rate": 9.249210334540798e-05, "loss": 2.713, "step": 45491 }, { "epoch": 2.23, "grad_norm": 0.6657136082649231, "learning_rate": 9.248098461820513e-05, "loss": 2.6789, "step": 45492 }, { "epoch": 2.23, "grad_norm": 0.7160326242446899, "learning_rate": 9.246986643756755e-05, "loss": 2.6241, "step": 45493 }, { "epoch": 2.23, "grad_norm": 0.7386571764945984, "learning_rate": 9.245874880352441e-05, "loss": 3.0217, "step": 45494 }, { "epoch": 2.23, "grad_norm": 0.7494421601295471, "learning_rate": 9.244763171610494e-05, "loss": 2.9368, "step": 45495 }, { "epoch": 2.23, "grad_norm": 0.7182496786117554, "learning_rate": 9.24365151753384e-05, "loss": 3.0068, "step": 45496 }, { "epoch": 2.23, "grad_norm": 0.7699613571166992, "learning_rate": 9.242539918125416e-05, "loss": 2.7448, "step": 45497 }, { "epoch": 2.23, "grad_norm": 0.6990972757339478, "learning_rate": 9.241428373388152e-05, "loss": 2.9623, "step": 45498 }, { "epoch": 2.23, "grad_norm": 0.7023851275444031, "learning_rate": 9.240316883324964e-05, "loss": 2.8577, "step": 45499 }, { "epoch": 2.23, "grad_norm": 0.710388720035553, "learning_rate": 9.239205447938793e-05, "loss": 2.885, "step": 45500 }, { "epoch": 2.23, "grad_norm": 0.7227920889854431, "learning_rate": 9.238094067232551e-05, "loss": 2.8459, "step": 45501 }, { "epoch": 2.23, "grad_norm": 0.7624646425247192, "learning_rate": 9.236982741209191e-05, "loss": 2.9227, "step": 45502 }, { "epoch": 2.23, "grad_norm": 0.7066624164581299, "learning_rate": 9.235871469871607e-05, "loss": 3.1932, "step": 45503 }, { "epoch": 2.23, "grad_norm": 0.7189279198646545, "learning_rate": 9.23476025322274e-05, "loss": 2.8258, "step": 45504 }, { "epoch": 2.23, "grad_norm": 0.6847929954528809, "learning_rate": 9.233649091265529e-05, "loss": 2.6352, "step": 45505 }, { "epoch": 2.23, "grad_norm": 0.7308363318443298, "learning_rate": 9.232537984002878e-05, "loss": 3.0242, "step": 45506 }, { "epoch": 2.23, "grad_norm": 0.7426416873931885, "learning_rate": 9.231426931437734e-05, "loss": 2.9488, "step": 45507 }, { "epoch": 2.23, "grad_norm": 0.6900115609169006, "learning_rate": 9.230315933573005e-05, "loss": 2.8308, "step": 45508 }, { "epoch": 2.23, "grad_norm": 0.7146220207214355, "learning_rate": 9.229204990411637e-05, "loss": 2.9156, "step": 45509 }, { "epoch": 2.23, "grad_norm": 0.733957052230835, "learning_rate": 9.228094101956546e-05, "loss": 2.8853, "step": 45510 }, { "epoch": 2.23, "grad_norm": 0.7220903635025024, "learning_rate": 9.226983268210649e-05, "loss": 2.9089, "step": 45511 }, { "epoch": 2.23, "grad_norm": 0.7509836554527283, "learning_rate": 9.225872489176888e-05, "loss": 2.9585, "step": 45512 }, { "epoch": 2.23, "grad_norm": 0.7305395603179932, "learning_rate": 9.224761764858172e-05, "loss": 3.0472, "step": 45513 }, { "epoch": 2.23, "grad_norm": 0.7344761490821838, "learning_rate": 9.223651095257435e-05, "loss": 2.9322, "step": 45514 }, { "epoch": 2.23, "grad_norm": 0.693290650844574, "learning_rate": 9.222540480377613e-05, "loss": 2.8171, "step": 45515 }, { "epoch": 2.23, "grad_norm": 0.6867750287055969, "learning_rate": 9.22142992022162e-05, "loss": 3.0071, "step": 45516 }, { "epoch": 2.23, "grad_norm": 0.7052892446517944, "learning_rate": 9.220319414792382e-05, "loss": 2.9504, "step": 45517 }, { "epoch": 2.23, "grad_norm": 0.7113419771194458, "learning_rate": 9.219208964092812e-05, "loss": 2.9487, "step": 45518 }, { "epoch": 2.23, "grad_norm": 0.7192926406860352, "learning_rate": 9.218098568125849e-05, "loss": 2.9342, "step": 45519 }, { "epoch": 2.23, "grad_norm": 0.7447081208229065, "learning_rate": 9.216988226894423e-05, "loss": 2.9044, "step": 45520 }, { "epoch": 2.23, "grad_norm": 0.7151831388473511, "learning_rate": 9.215877940401441e-05, "loss": 3.0041, "step": 45521 }, { "epoch": 2.23, "grad_norm": 0.7142324447631836, "learning_rate": 9.214767708649845e-05, "loss": 2.8919, "step": 45522 }, { "epoch": 2.23, "grad_norm": 0.7118008732795715, "learning_rate": 9.213657531642541e-05, "loss": 2.7913, "step": 45523 }, { "epoch": 2.23, "grad_norm": 0.7367550730705261, "learning_rate": 9.212547409382473e-05, "loss": 3.1136, "step": 45524 }, { "epoch": 2.23, "grad_norm": 0.7201369404792786, "learning_rate": 9.211437341872557e-05, "loss": 2.8379, "step": 45525 }, { "epoch": 2.23, "grad_norm": 0.7369980216026306, "learning_rate": 9.2103273291157e-05, "loss": 2.9005, "step": 45526 }, { "epoch": 2.23, "grad_norm": 0.7533488869667053, "learning_rate": 9.209217371114852e-05, "loss": 3.1198, "step": 45527 }, { "epoch": 2.23, "grad_norm": 0.6949545741081238, "learning_rate": 9.208107467872914e-05, "loss": 2.8966, "step": 45528 }, { "epoch": 2.23, "grad_norm": 0.7416513562202454, "learning_rate": 9.20699761939282e-05, "loss": 2.7839, "step": 45529 }, { "epoch": 2.23, "grad_norm": 0.6859050989151001, "learning_rate": 9.205887825677504e-05, "loss": 2.9379, "step": 45530 }, { "epoch": 2.23, "grad_norm": 0.7521802186965942, "learning_rate": 9.204778086729875e-05, "loss": 3.0114, "step": 45531 }, { "epoch": 2.23, "grad_norm": 0.6823815703392029, "learning_rate": 9.203668402552857e-05, "loss": 2.9262, "step": 45532 }, { "epoch": 2.23, "grad_norm": 0.7390176057815552, "learning_rate": 9.202558773149368e-05, "loss": 2.6721, "step": 45533 }, { "epoch": 2.23, "grad_norm": 0.7297099232673645, "learning_rate": 9.201449198522334e-05, "loss": 2.9628, "step": 45534 }, { "epoch": 2.23, "grad_norm": 0.7210890054702759, "learning_rate": 9.20033967867469e-05, "loss": 2.8604, "step": 45535 }, { "epoch": 2.23, "grad_norm": 0.7243456244468689, "learning_rate": 9.199230213609342e-05, "loss": 2.7529, "step": 45536 }, { "epoch": 2.23, "grad_norm": 0.7083402872085571, "learning_rate": 9.198120803329225e-05, "loss": 3.0748, "step": 45537 }, { "epoch": 2.23, "grad_norm": 0.7190839648246765, "learning_rate": 9.197011447837253e-05, "loss": 2.7782, "step": 45538 }, { "epoch": 2.23, "grad_norm": 0.7521587014198303, "learning_rate": 9.19590214713634e-05, "loss": 3.3249, "step": 45539 }, { "epoch": 2.23, "grad_norm": 0.7044076323509216, "learning_rate": 9.194792901229429e-05, "loss": 2.9068, "step": 45540 }, { "epoch": 2.23, "grad_norm": 0.7235004901885986, "learning_rate": 9.19368371011942e-05, "loss": 3.2096, "step": 45541 }, { "epoch": 2.23, "grad_norm": 0.728836715221405, "learning_rate": 9.19257457380925e-05, "loss": 2.7914, "step": 45542 }, { "epoch": 2.23, "grad_norm": 0.7259882688522339, "learning_rate": 9.191465492301827e-05, "loss": 2.8201, "step": 45543 }, { "epoch": 2.23, "grad_norm": 0.6856926083564758, "learning_rate": 9.190356465600081e-05, "loss": 3.0713, "step": 45544 }, { "epoch": 2.23, "grad_norm": 0.7389464974403381, "learning_rate": 9.189247493706939e-05, "loss": 3.0653, "step": 45545 }, { "epoch": 2.23, "grad_norm": 0.7572897672653198, "learning_rate": 9.188138576625314e-05, "loss": 3.0166, "step": 45546 }, { "epoch": 2.23, "grad_norm": 0.7614259123802185, "learning_rate": 9.187029714358124e-05, "loss": 2.9215, "step": 45547 }, { "epoch": 2.23, "grad_norm": 0.8179017305374146, "learning_rate": 9.185920906908288e-05, "loss": 2.6157, "step": 45548 }, { "epoch": 2.23, "grad_norm": 0.7034313082695007, "learning_rate": 9.184812154278726e-05, "loss": 3.0541, "step": 45549 }, { "epoch": 2.23, "grad_norm": 0.7329689264297485, "learning_rate": 9.183703456472373e-05, "loss": 3.0281, "step": 45550 }, { "epoch": 2.23, "grad_norm": 0.7217639088630676, "learning_rate": 9.18259481349213e-05, "loss": 2.6217, "step": 45551 }, { "epoch": 2.23, "grad_norm": 0.7059627771377563, "learning_rate": 9.181486225340938e-05, "loss": 2.8865, "step": 45552 }, { "epoch": 2.23, "grad_norm": 0.726000964641571, "learning_rate": 9.180377692021702e-05, "loss": 2.8841, "step": 45553 }, { "epoch": 2.23, "grad_norm": 0.7023661136627197, "learning_rate": 9.179269213537334e-05, "loss": 3.087, "step": 45554 }, { "epoch": 2.23, "grad_norm": 0.736512303352356, "learning_rate": 9.178160789890776e-05, "loss": 2.9646, "step": 45555 }, { "epoch": 2.23, "grad_norm": 0.7114843130111694, "learning_rate": 9.177052421084923e-05, "loss": 2.8579, "step": 45556 }, { "epoch": 2.23, "grad_norm": 0.7629355192184448, "learning_rate": 9.175944107122718e-05, "loss": 2.6514, "step": 45557 }, { "epoch": 2.23, "grad_norm": 0.7430128455162048, "learning_rate": 9.17483584800706e-05, "loss": 2.9601, "step": 45558 }, { "epoch": 2.23, "grad_norm": 0.6909064650535583, "learning_rate": 9.173727643740882e-05, "loss": 2.8679, "step": 45559 }, { "epoch": 2.23, "grad_norm": 0.6793957948684692, "learning_rate": 9.172619494327101e-05, "loss": 2.9662, "step": 45560 }, { "epoch": 2.23, "grad_norm": 0.6956231594085693, "learning_rate": 9.171511399768621e-05, "loss": 2.8303, "step": 45561 }, { "epoch": 2.23, "grad_norm": 0.7205344438552856, "learning_rate": 9.170403360068381e-05, "loss": 2.8042, "step": 45562 }, { "epoch": 2.23, "grad_norm": 0.7068600058555603, "learning_rate": 9.16929537522928e-05, "loss": 2.7959, "step": 45563 }, { "epoch": 2.23, "grad_norm": 0.7382459044456482, "learning_rate": 9.168187445254247e-05, "loss": 3.0018, "step": 45564 }, { "epoch": 2.23, "grad_norm": 0.7958078980445862, "learning_rate": 9.167079570146207e-05, "loss": 2.7408, "step": 45565 }, { "epoch": 2.23, "grad_norm": 0.7165841460227966, "learning_rate": 9.165971749908063e-05, "loss": 2.958, "step": 45566 }, { "epoch": 2.23, "grad_norm": 0.7124565839767456, "learning_rate": 9.164863984542746e-05, "loss": 2.9686, "step": 45567 }, { "epoch": 2.23, "grad_norm": 0.7308372855186462, "learning_rate": 9.163756274053171e-05, "loss": 2.7848, "step": 45568 }, { "epoch": 2.23, "grad_norm": 0.6912724375724792, "learning_rate": 9.162648618442239e-05, "loss": 2.6554, "step": 45569 }, { "epoch": 2.23, "grad_norm": 0.7064422965049744, "learning_rate": 9.161541017712893e-05, "loss": 2.9808, "step": 45570 }, { "epoch": 2.23, "grad_norm": 0.7386999130249023, "learning_rate": 9.160433471868026e-05, "loss": 2.8824, "step": 45571 }, { "epoch": 2.23, "grad_norm": 0.7071724534034729, "learning_rate": 9.159325980910578e-05, "loss": 3.0607, "step": 45572 }, { "epoch": 2.23, "grad_norm": 0.734996497631073, "learning_rate": 9.158218544843442e-05, "loss": 2.6605, "step": 45573 }, { "epoch": 2.23, "grad_norm": 0.7159255743026733, "learning_rate": 9.15711116366956e-05, "loss": 3.0333, "step": 45574 }, { "epoch": 2.23, "grad_norm": 0.6946538686752319, "learning_rate": 9.156003837391834e-05, "loss": 3.0226, "step": 45575 }, { "epoch": 2.23, "grad_norm": 0.678464949131012, "learning_rate": 9.154896566013172e-05, "loss": 2.7583, "step": 45576 }, { "epoch": 2.23, "grad_norm": 0.7494169473648071, "learning_rate": 9.153789349536511e-05, "loss": 3.1025, "step": 45577 }, { "epoch": 2.23, "grad_norm": 0.7178239822387695, "learning_rate": 9.152682187964749e-05, "loss": 3.0465, "step": 45578 }, { "epoch": 2.23, "grad_norm": 0.7179162502288818, "learning_rate": 9.151575081300807e-05, "loss": 2.8249, "step": 45579 }, { "epoch": 2.23, "grad_norm": 0.6873793005943298, "learning_rate": 9.150468029547616e-05, "loss": 3.0525, "step": 45580 }, { "epoch": 2.23, "grad_norm": 0.7262808680534363, "learning_rate": 9.14936103270808e-05, "loss": 2.9266, "step": 45581 }, { "epoch": 2.23, "grad_norm": 0.6978574991226196, "learning_rate": 9.148254090785113e-05, "loss": 2.8802, "step": 45582 }, { "epoch": 2.23, "grad_norm": 0.7047426104545593, "learning_rate": 9.147147203781622e-05, "loss": 2.6446, "step": 45583 }, { "epoch": 2.23, "grad_norm": 0.7362146377563477, "learning_rate": 9.14604037170054e-05, "loss": 2.7547, "step": 45584 }, { "epoch": 2.23, "grad_norm": 0.6840211153030396, "learning_rate": 9.14493359454477e-05, "loss": 3.0048, "step": 45585 }, { "epoch": 2.23, "grad_norm": 0.727241575717926, "learning_rate": 9.143826872317224e-05, "loss": 2.8566, "step": 45586 }, { "epoch": 2.23, "grad_norm": 0.6769251823425293, "learning_rate": 9.14272020502084e-05, "loss": 2.8878, "step": 45587 }, { "epoch": 2.23, "grad_norm": 0.7173831462860107, "learning_rate": 9.141613592658504e-05, "loss": 2.9605, "step": 45588 }, { "epoch": 2.23, "grad_norm": 0.7770647406578064, "learning_rate": 9.140507035233154e-05, "loss": 2.8595, "step": 45589 }, { "epoch": 2.23, "grad_norm": 0.7519656419754028, "learning_rate": 9.139400532747694e-05, "loss": 2.6777, "step": 45590 }, { "epoch": 2.23, "grad_norm": 0.7570061087608337, "learning_rate": 9.138294085205031e-05, "loss": 2.9145, "step": 45591 }, { "epoch": 2.23, "grad_norm": 0.7038795351982117, "learning_rate": 9.137187692608091e-05, "loss": 2.9259, "step": 45592 }, { "epoch": 2.23, "grad_norm": 0.7403584718704224, "learning_rate": 9.136081354959777e-05, "loss": 3.0187, "step": 45593 }, { "epoch": 2.23, "grad_norm": 0.6944155693054199, "learning_rate": 9.134975072263022e-05, "loss": 2.928, "step": 45594 }, { "epoch": 2.23, "grad_norm": 0.7204017043113708, "learning_rate": 9.133868844520712e-05, "loss": 2.8462, "step": 45595 }, { "epoch": 2.23, "grad_norm": 0.7419741153717041, "learning_rate": 9.132762671735789e-05, "loss": 3.0493, "step": 45596 }, { "epoch": 2.23, "grad_norm": 0.6750164031982422, "learning_rate": 9.13165655391115e-05, "loss": 2.957, "step": 45597 }, { "epoch": 2.23, "grad_norm": 0.7366441488265991, "learning_rate": 9.130550491049705e-05, "loss": 2.8784, "step": 45598 }, { "epoch": 2.23, "grad_norm": 0.8148265480995178, "learning_rate": 9.129444483154379e-05, "loss": 3.0984, "step": 45599 }, { "epoch": 2.23, "grad_norm": 0.7063391804695129, "learning_rate": 9.128338530228074e-05, "loss": 2.8162, "step": 45600 }, { "epoch": 2.23, "grad_norm": 0.7022308707237244, "learning_rate": 9.127232632273708e-05, "loss": 2.8945, "step": 45601 }, { "epoch": 2.23, "grad_norm": 0.6840649247169495, "learning_rate": 9.126126789294203e-05, "loss": 2.8795, "step": 45602 }, { "epoch": 2.23, "grad_norm": 0.7260749340057373, "learning_rate": 9.125021001292462e-05, "loss": 2.9185, "step": 45603 }, { "epoch": 2.23, "grad_norm": 0.7081303596496582, "learning_rate": 9.1239152682714e-05, "loss": 3.0542, "step": 45604 }, { "epoch": 2.23, "grad_norm": 0.6989736557006836, "learning_rate": 9.122809590233916e-05, "loss": 3.1102, "step": 45605 }, { "epoch": 2.24, "grad_norm": 0.6952615976333618, "learning_rate": 9.121703967182934e-05, "loss": 2.8599, "step": 45606 }, { "epoch": 2.24, "grad_norm": 0.714263916015625, "learning_rate": 9.120598399121375e-05, "loss": 2.8885, "step": 45607 }, { "epoch": 2.24, "grad_norm": 0.7635734677314758, "learning_rate": 9.119492886052133e-05, "loss": 2.9962, "step": 45608 }, { "epoch": 2.24, "grad_norm": 0.690173864364624, "learning_rate": 9.118387427978137e-05, "loss": 2.9021, "step": 45609 }, { "epoch": 2.24, "grad_norm": 0.6887124180793762, "learning_rate": 9.117282024902282e-05, "loss": 2.9197, "step": 45610 }, { "epoch": 2.24, "grad_norm": 0.6885898113250732, "learning_rate": 9.116176676827496e-05, "loss": 2.9658, "step": 45611 }, { "epoch": 2.24, "grad_norm": 0.6705561280250549, "learning_rate": 9.115071383756681e-05, "loss": 3.082, "step": 45612 }, { "epoch": 2.24, "grad_norm": 0.6776615977287292, "learning_rate": 9.113966145692737e-05, "loss": 3.0067, "step": 45613 }, { "epoch": 2.24, "grad_norm": 0.7082581520080566, "learning_rate": 9.112860962638598e-05, "loss": 3.1007, "step": 45614 }, { "epoch": 2.24, "grad_norm": 0.7481296062469482, "learning_rate": 9.111755834597155e-05, "loss": 2.9351, "step": 45615 }, { "epoch": 2.24, "grad_norm": 0.6857460737228394, "learning_rate": 9.110650761571325e-05, "loss": 2.7641, "step": 45616 }, { "epoch": 2.24, "grad_norm": 0.7014007568359375, "learning_rate": 9.109545743564032e-05, "loss": 2.897, "step": 45617 }, { "epoch": 2.24, "grad_norm": 0.7194677591323853, "learning_rate": 9.108440780578172e-05, "loss": 2.8158, "step": 45618 }, { "epoch": 2.24, "grad_norm": 0.7104588150978088, "learning_rate": 9.107335872616661e-05, "loss": 2.8991, "step": 45619 }, { "epoch": 2.24, "grad_norm": 0.7148900628089905, "learning_rate": 9.106231019682396e-05, "loss": 2.6667, "step": 45620 }, { "epoch": 2.24, "grad_norm": 0.7613111734390259, "learning_rate": 9.105126221778299e-05, "loss": 2.9804, "step": 45621 }, { "epoch": 2.24, "grad_norm": 0.7180856466293335, "learning_rate": 9.104021478907289e-05, "loss": 3.0473, "step": 45622 }, { "epoch": 2.24, "grad_norm": 0.7277432680130005, "learning_rate": 9.102916791072252e-05, "loss": 3.0207, "step": 45623 }, { "epoch": 2.24, "grad_norm": 0.6842195987701416, "learning_rate": 9.101812158276121e-05, "loss": 2.8484, "step": 45624 }, { "epoch": 2.24, "grad_norm": 0.719687819480896, "learning_rate": 9.100707580521785e-05, "loss": 3.1122, "step": 45625 }, { "epoch": 2.24, "grad_norm": 0.7382856607437134, "learning_rate": 9.099603057812173e-05, "loss": 2.9134, "step": 45626 }, { "epoch": 2.24, "grad_norm": 0.7554395198822021, "learning_rate": 9.098498590150183e-05, "loss": 2.8051, "step": 45627 }, { "epoch": 2.24, "grad_norm": 0.7509669661521912, "learning_rate": 9.097394177538717e-05, "loss": 2.9297, "step": 45628 }, { "epoch": 2.24, "grad_norm": 1.0624964237213135, "learning_rate": 9.096289819980701e-05, "loss": 2.9979, "step": 45629 }, { "epoch": 2.24, "grad_norm": 0.6812724471092224, "learning_rate": 9.095185517479025e-05, "loss": 2.8224, "step": 45630 }, { "epoch": 2.24, "grad_norm": 0.7406055927276611, "learning_rate": 9.094081270036608e-05, "loss": 2.8994, "step": 45631 }, { "epoch": 2.24, "grad_norm": 0.7246912717819214, "learning_rate": 9.092977077656366e-05, "loss": 2.7739, "step": 45632 }, { "epoch": 2.24, "grad_norm": 0.7108069658279419, "learning_rate": 9.091872940341199e-05, "loss": 2.8253, "step": 45633 }, { "epoch": 2.24, "grad_norm": 0.7075560688972473, "learning_rate": 9.090768858094015e-05, "loss": 3.2586, "step": 45634 }, { "epoch": 2.24, "grad_norm": 0.7471787333488464, "learning_rate": 9.089664830917713e-05, "loss": 2.915, "step": 45635 }, { "epoch": 2.24, "grad_norm": 0.6982598304748535, "learning_rate": 9.088560858815208e-05, "loss": 2.8928, "step": 45636 }, { "epoch": 2.24, "grad_norm": 0.6782969236373901, "learning_rate": 9.087456941789421e-05, "loss": 2.8622, "step": 45637 }, { "epoch": 2.24, "grad_norm": 0.7105681896209717, "learning_rate": 9.086353079843238e-05, "loss": 2.9933, "step": 45638 }, { "epoch": 2.24, "grad_norm": 0.719509482383728, "learning_rate": 9.085249272979582e-05, "loss": 2.9826, "step": 45639 }, { "epoch": 2.24, "grad_norm": 0.7250756025314331, "learning_rate": 9.084145521201357e-05, "loss": 2.8474, "step": 45640 }, { "epoch": 2.24, "grad_norm": 0.7311588525772095, "learning_rate": 9.083041824511455e-05, "loss": 2.9898, "step": 45641 }, { "epoch": 2.24, "grad_norm": 0.7223825454711914, "learning_rate": 9.081938182912809e-05, "loss": 2.9584, "step": 45642 }, { "epoch": 2.24, "grad_norm": 0.7354487180709839, "learning_rate": 9.080834596408301e-05, "loss": 2.9169, "step": 45643 }, { "epoch": 2.24, "grad_norm": 0.7054993510246277, "learning_rate": 9.079731065000858e-05, "loss": 2.7169, "step": 45644 }, { "epoch": 2.24, "grad_norm": 0.7050856351852417, "learning_rate": 9.07862758869337e-05, "loss": 2.8571, "step": 45645 }, { "epoch": 2.24, "grad_norm": 0.7472595572471619, "learning_rate": 9.077524167488746e-05, "loss": 3.0456, "step": 45646 }, { "epoch": 2.24, "grad_norm": 0.7568950653076172, "learning_rate": 9.076420801389909e-05, "loss": 2.7929, "step": 45647 }, { "epoch": 2.24, "grad_norm": 0.6783512830734253, "learning_rate": 9.075317490399752e-05, "loss": 2.9669, "step": 45648 }, { "epoch": 2.24, "grad_norm": 0.706605851650238, "learning_rate": 9.074214234521182e-05, "loss": 2.7641, "step": 45649 }, { "epoch": 2.24, "grad_norm": 0.6669334173202515, "learning_rate": 9.073111033757094e-05, "loss": 2.7432, "step": 45650 }, { "epoch": 2.24, "grad_norm": 0.7224730253219604, "learning_rate": 9.072007888110404e-05, "loss": 2.7118, "step": 45651 }, { "epoch": 2.24, "grad_norm": 0.7125197052955627, "learning_rate": 9.070904797584029e-05, "loss": 2.9566, "step": 45652 }, { "epoch": 2.24, "grad_norm": 0.7195077538490295, "learning_rate": 9.069801762180853e-05, "loss": 2.6613, "step": 45653 }, { "epoch": 2.24, "grad_norm": 0.7205254435539246, "learning_rate": 9.0686987819038e-05, "loss": 3.0001, "step": 45654 }, { "epoch": 2.24, "grad_norm": 0.7503374218940735, "learning_rate": 9.067595856755765e-05, "loss": 2.7024, "step": 45655 }, { "epoch": 2.24, "grad_norm": 0.6984966397285461, "learning_rate": 9.066492986739647e-05, "loss": 2.7526, "step": 45656 }, { "epoch": 2.24, "grad_norm": 0.6726536154747009, "learning_rate": 9.065390171858366e-05, "loss": 2.9738, "step": 45657 }, { "epoch": 2.24, "grad_norm": 0.6979069113731384, "learning_rate": 9.064287412114808e-05, "loss": 2.9208, "step": 45658 }, { "epoch": 2.24, "grad_norm": 0.695800244808197, "learning_rate": 9.063184707511899e-05, "loss": 2.9246, "step": 45659 }, { "epoch": 2.24, "grad_norm": 0.7155253887176514, "learning_rate": 9.06208205805252e-05, "loss": 2.8638, "step": 45660 }, { "epoch": 2.24, "grad_norm": 0.6801446676254272, "learning_rate": 9.060979463739598e-05, "loss": 2.9644, "step": 45661 }, { "epoch": 2.24, "grad_norm": 0.6838302612304688, "learning_rate": 9.059876924576029e-05, "loss": 2.9361, "step": 45662 }, { "epoch": 2.24, "grad_norm": 0.7186485528945923, "learning_rate": 9.0587744405647e-05, "loss": 2.8708, "step": 45663 }, { "epoch": 2.24, "grad_norm": 0.664668083190918, "learning_rate": 9.057672011708541e-05, "loss": 2.7418, "step": 45664 }, { "epoch": 2.24, "grad_norm": 0.7575511336326599, "learning_rate": 9.056569638010436e-05, "loss": 2.9639, "step": 45665 }, { "epoch": 2.24, "grad_norm": 0.7134124040603638, "learning_rate": 9.055467319473303e-05, "loss": 2.6629, "step": 45666 }, { "epoch": 2.24, "grad_norm": 0.7106820344924927, "learning_rate": 9.054365056100027e-05, "loss": 2.8322, "step": 45667 }, { "epoch": 2.24, "grad_norm": 0.6775752902030945, "learning_rate": 9.053262847893526e-05, "loss": 2.8653, "step": 45668 }, { "epoch": 2.24, "grad_norm": 0.7096874713897705, "learning_rate": 9.052160694856706e-05, "loss": 3.0051, "step": 45669 }, { "epoch": 2.24, "grad_norm": 0.7215758562088013, "learning_rate": 9.051058596992463e-05, "loss": 2.9697, "step": 45670 }, { "epoch": 2.24, "grad_norm": 0.7285184860229492, "learning_rate": 9.049956554303702e-05, "loss": 2.8607, "step": 45671 }, { "epoch": 2.24, "grad_norm": 0.7512555122375488, "learning_rate": 9.048854566793314e-05, "loss": 3.0024, "step": 45672 }, { "epoch": 2.24, "grad_norm": 0.7152148485183716, "learning_rate": 9.04775263446421e-05, "loss": 3.0288, "step": 45673 }, { "epoch": 2.24, "grad_norm": 0.7303699851036072, "learning_rate": 9.046650757319304e-05, "loss": 2.9171, "step": 45674 }, { "epoch": 2.24, "grad_norm": 0.6994890570640564, "learning_rate": 9.045548935361475e-05, "loss": 2.7558, "step": 45675 }, { "epoch": 2.24, "grad_norm": 0.7585562467575073, "learning_rate": 9.044447168593649e-05, "loss": 3.004, "step": 45676 }, { "epoch": 2.24, "grad_norm": 0.7422113418579102, "learning_rate": 9.043345457018718e-05, "loss": 2.9295, "step": 45677 }, { "epoch": 2.24, "grad_norm": 0.7175257802009583, "learning_rate": 9.042243800639569e-05, "loss": 2.9137, "step": 45678 }, { "epoch": 2.24, "grad_norm": 0.7064406275749207, "learning_rate": 9.041142199459127e-05, "loss": 2.9965, "step": 45679 }, { "epoch": 2.24, "grad_norm": 0.7073319554328918, "learning_rate": 9.040040653480272e-05, "loss": 2.8982, "step": 45680 }, { "epoch": 2.24, "grad_norm": 0.7129002213478088, "learning_rate": 9.038939162705926e-05, "loss": 2.8788, "step": 45681 }, { "epoch": 2.24, "grad_norm": 0.7113178968429565, "learning_rate": 9.037837727138974e-05, "loss": 3.0197, "step": 45682 }, { "epoch": 2.24, "grad_norm": 0.7069679498672485, "learning_rate": 9.036736346782331e-05, "loss": 2.7209, "step": 45683 }, { "epoch": 2.24, "grad_norm": 0.7247236371040344, "learning_rate": 9.035635021638889e-05, "loss": 2.9684, "step": 45684 }, { "epoch": 2.24, "grad_norm": 0.685911238193512, "learning_rate": 9.034533751711539e-05, "loss": 3.061, "step": 45685 }, { "epoch": 2.24, "grad_norm": 0.7022624611854553, "learning_rate": 9.033432537003204e-05, "loss": 2.9441, "step": 45686 }, { "epoch": 2.24, "grad_norm": 0.7100083827972412, "learning_rate": 9.032331377516764e-05, "loss": 2.7339, "step": 45687 }, { "epoch": 2.24, "grad_norm": 0.7229495048522949, "learning_rate": 9.031230273255124e-05, "loss": 2.8123, "step": 45688 }, { "epoch": 2.24, "grad_norm": 0.7110562920570374, "learning_rate": 9.030129224221201e-05, "loss": 2.9297, "step": 45689 }, { "epoch": 2.24, "grad_norm": 0.6981791257858276, "learning_rate": 9.029028230417871e-05, "loss": 3.0289, "step": 45690 }, { "epoch": 2.24, "grad_norm": 0.7141596674919128, "learning_rate": 9.027927291848055e-05, "loss": 2.9254, "step": 45691 }, { "epoch": 2.24, "grad_norm": 0.7503494620323181, "learning_rate": 9.026826408514641e-05, "loss": 2.9531, "step": 45692 }, { "epoch": 2.24, "grad_norm": 0.729244589805603, "learning_rate": 9.025725580420521e-05, "loss": 2.9759, "step": 45693 }, { "epoch": 2.24, "grad_norm": 0.7077925801277161, "learning_rate": 9.024624807568615e-05, "loss": 2.9944, "step": 45694 }, { "epoch": 2.24, "grad_norm": 0.7241457104682922, "learning_rate": 9.023524089961797e-05, "loss": 2.8535, "step": 45695 }, { "epoch": 2.24, "grad_norm": 0.7095908522605896, "learning_rate": 9.022423427602991e-05, "loss": 2.9944, "step": 45696 }, { "epoch": 2.24, "grad_norm": 0.717788815498352, "learning_rate": 9.021322820495074e-05, "loss": 2.9462, "step": 45697 }, { "epoch": 2.24, "grad_norm": 0.6879000663757324, "learning_rate": 9.020222268640968e-05, "loss": 2.8645, "step": 45698 }, { "epoch": 2.24, "grad_norm": 0.7336751222610474, "learning_rate": 9.019121772043554e-05, "loss": 2.8715, "step": 45699 }, { "epoch": 2.24, "grad_norm": 0.7165769338607788, "learning_rate": 9.01802133070573e-05, "loss": 3.0437, "step": 45700 }, { "epoch": 2.24, "grad_norm": 0.7212401032447815, "learning_rate": 9.016920944630407e-05, "loss": 2.9006, "step": 45701 }, { "epoch": 2.24, "grad_norm": 0.7075793147087097, "learning_rate": 9.015820613820468e-05, "loss": 2.9832, "step": 45702 }, { "epoch": 2.24, "grad_norm": 0.6998143196105957, "learning_rate": 9.014720338278818e-05, "loss": 2.8956, "step": 45703 }, { "epoch": 2.24, "grad_norm": 0.6937516927719116, "learning_rate": 9.013620118008365e-05, "loss": 3.1962, "step": 45704 }, { "epoch": 2.24, "grad_norm": 0.6879575252532959, "learning_rate": 9.012519953011999e-05, "loss": 2.9001, "step": 45705 }, { "epoch": 2.24, "grad_norm": 0.7310060858726501, "learning_rate": 9.011419843292615e-05, "loss": 2.6941, "step": 45706 }, { "epoch": 2.24, "grad_norm": 0.6938087344169617, "learning_rate": 9.010319788853102e-05, "loss": 2.8176, "step": 45707 }, { "epoch": 2.24, "grad_norm": 0.7372106313705444, "learning_rate": 9.009219789696369e-05, "loss": 2.8348, "step": 45708 }, { "epoch": 2.24, "grad_norm": 0.7054558992385864, "learning_rate": 9.00811984582532e-05, "loss": 2.8556, "step": 45709 }, { "epoch": 2.24, "grad_norm": 0.709180474281311, "learning_rate": 9.007019957242831e-05, "loss": 2.8494, "step": 45710 }, { "epoch": 2.24, "grad_norm": 0.714429497718811, "learning_rate": 9.005920123951824e-05, "loss": 3.0477, "step": 45711 }, { "epoch": 2.24, "grad_norm": 0.7273450493812561, "learning_rate": 9.004820345955173e-05, "loss": 2.9587, "step": 45712 }, { "epoch": 2.24, "grad_norm": 0.7173202633857727, "learning_rate": 9.003720623255795e-05, "loss": 3.0399, "step": 45713 }, { "epoch": 2.24, "grad_norm": 0.7252642512321472, "learning_rate": 9.002620955856574e-05, "loss": 3.1725, "step": 45714 }, { "epoch": 2.24, "grad_norm": 0.707578182220459, "learning_rate": 9.0015213437604e-05, "loss": 2.9012, "step": 45715 }, { "epoch": 2.24, "grad_norm": 0.7207184433937073, "learning_rate": 9.000421786970188e-05, "loss": 2.5106, "step": 45716 }, { "epoch": 2.24, "grad_norm": 0.7439934611320496, "learning_rate": 8.999322285488814e-05, "loss": 2.9038, "step": 45717 }, { "epoch": 2.24, "grad_norm": 0.6846615076065063, "learning_rate": 8.998222839319182e-05, "loss": 2.8526, "step": 45718 }, { "epoch": 2.24, "grad_norm": 0.6808404326438904, "learning_rate": 8.997123448464199e-05, "loss": 3.0216, "step": 45719 }, { "epoch": 2.24, "grad_norm": 0.6881184577941895, "learning_rate": 8.99602411292675e-05, "loss": 2.9798, "step": 45720 }, { "epoch": 2.24, "grad_norm": 0.7790664434432983, "learning_rate": 8.994924832709732e-05, "loss": 3.0434, "step": 45721 }, { "epoch": 2.24, "grad_norm": 0.7797252535820007, "learning_rate": 8.993825607816031e-05, "loss": 2.7815, "step": 45722 }, { "epoch": 2.24, "grad_norm": 0.6707017421722412, "learning_rate": 8.99272643824855e-05, "loss": 3.0048, "step": 45723 }, { "epoch": 2.24, "grad_norm": 0.7407448887825012, "learning_rate": 8.991627324010193e-05, "loss": 2.8102, "step": 45724 }, { "epoch": 2.24, "grad_norm": 0.7850496768951416, "learning_rate": 8.990528265103839e-05, "loss": 3.0061, "step": 45725 }, { "epoch": 2.24, "grad_norm": 0.7100532054901123, "learning_rate": 8.989429261532398e-05, "loss": 2.9508, "step": 45726 }, { "epoch": 2.24, "grad_norm": 0.7451736927032471, "learning_rate": 8.988330313298748e-05, "loss": 2.9667, "step": 45727 }, { "epoch": 2.24, "grad_norm": 0.7049230337142944, "learning_rate": 8.9872314204058e-05, "loss": 2.787, "step": 45728 }, { "epoch": 2.24, "grad_norm": 0.6660589575767517, "learning_rate": 8.986132582856442e-05, "loss": 2.7769, "step": 45729 }, { "epoch": 2.24, "grad_norm": 0.7338511347770691, "learning_rate": 8.985033800653558e-05, "loss": 2.8149, "step": 45730 }, { "epoch": 2.24, "grad_norm": 0.6853067278862, "learning_rate": 8.983935073800059e-05, "loss": 2.7641, "step": 45731 }, { "epoch": 2.24, "grad_norm": 0.7548957467079163, "learning_rate": 8.98283640229882e-05, "loss": 2.7663, "step": 45732 }, { "epoch": 2.24, "grad_norm": 0.7389004826545715, "learning_rate": 8.981737786152748e-05, "loss": 3.224, "step": 45733 }, { "epoch": 2.24, "grad_norm": 0.6959095001220703, "learning_rate": 8.98063922536474e-05, "loss": 2.9059, "step": 45734 }, { "epoch": 2.24, "grad_norm": 0.7141698002815247, "learning_rate": 8.979540719937684e-05, "loss": 3.022, "step": 45735 }, { "epoch": 2.24, "grad_norm": 0.7193446159362793, "learning_rate": 8.978442269874473e-05, "loss": 2.8698, "step": 45736 }, { "epoch": 2.24, "grad_norm": 0.6984773874282837, "learning_rate": 8.977343875177988e-05, "loss": 2.8872, "step": 45737 }, { "epoch": 2.24, "grad_norm": 0.7044937610626221, "learning_rate": 8.976245535851145e-05, "loss": 2.9768, "step": 45738 }, { "epoch": 2.24, "grad_norm": 0.732440173625946, "learning_rate": 8.975147251896816e-05, "loss": 2.8359, "step": 45739 }, { "epoch": 2.24, "grad_norm": 0.7215022444725037, "learning_rate": 8.974049023317898e-05, "loss": 2.7151, "step": 45740 }, { "epoch": 2.24, "grad_norm": 0.741974949836731, "learning_rate": 8.972950850117303e-05, "loss": 2.7282, "step": 45741 }, { "epoch": 2.24, "grad_norm": 0.702601432800293, "learning_rate": 8.971852732297908e-05, "loss": 2.9357, "step": 45742 }, { "epoch": 2.24, "grad_norm": 0.690036952495575, "learning_rate": 8.970754669862605e-05, "loss": 2.7958, "step": 45743 }, { "epoch": 2.24, "grad_norm": 0.7078272700309753, "learning_rate": 8.969656662814278e-05, "loss": 2.6961, "step": 45744 }, { "epoch": 2.24, "grad_norm": 0.7023754119873047, "learning_rate": 8.968558711155828e-05, "loss": 2.9431, "step": 45745 }, { "epoch": 2.24, "grad_norm": 0.7015004754066467, "learning_rate": 8.967460814890154e-05, "loss": 2.9241, "step": 45746 }, { "epoch": 2.24, "grad_norm": 0.7229611277580261, "learning_rate": 8.966362974020135e-05, "loss": 2.9218, "step": 45747 }, { "epoch": 2.24, "grad_norm": 0.7046184539794922, "learning_rate": 8.965265188548675e-05, "loss": 2.7128, "step": 45748 }, { "epoch": 2.24, "grad_norm": 0.6686332821846008, "learning_rate": 8.964167458478649e-05, "loss": 2.9473, "step": 45749 }, { "epoch": 2.24, "grad_norm": 0.7126154899597168, "learning_rate": 8.963069783812966e-05, "loss": 3.0076, "step": 45750 }, { "epoch": 2.24, "grad_norm": 0.720814049243927, "learning_rate": 8.961972164554509e-05, "loss": 2.8959, "step": 45751 }, { "epoch": 2.24, "grad_norm": 0.767537534236908, "learning_rate": 8.960874600706163e-05, "loss": 2.9654, "step": 45752 }, { "epoch": 2.24, "grad_norm": 0.7737765312194824, "learning_rate": 8.95977709227083e-05, "loss": 2.9991, "step": 45753 }, { "epoch": 2.24, "grad_norm": 0.7451054453849792, "learning_rate": 8.958679639251383e-05, "loss": 2.76, "step": 45754 }, { "epoch": 2.24, "grad_norm": 0.682572066783905, "learning_rate": 8.957582241650729e-05, "loss": 3.0767, "step": 45755 }, { "epoch": 2.24, "grad_norm": 0.7428975701332092, "learning_rate": 8.956484899471761e-05, "loss": 2.8986, "step": 45756 }, { "epoch": 2.24, "grad_norm": 0.7675819993019104, "learning_rate": 8.955387612717362e-05, "loss": 2.911, "step": 45757 }, { "epoch": 2.24, "grad_norm": 0.7058717012405396, "learning_rate": 8.954290381390422e-05, "loss": 2.8263, "step": 45758 }, { "epoch": 2.24, "grad_norm": 0.7053318023681641, "learning_rate": 8.953193205493819e-05, "loss": 2.8086, "step": 45759 }, { "epoch": 2.24, "grad_norm": 0.7443513870239258, "learning_rate": 8.952096085030458e-05, "loss": 2.9749, "step": 45760 }, { "epoch": 2.24, "grad_norm": 0.6907855868339539, "learning_rate": 8.950999020003236e-05, "loss": 2.86, "step": 45761 }, { "epoch": 2.24, "grad_norm": 0.6709179282188416, "learning_rate": 8.949902010415016e-05, "loss": 2.8877, "step": 45762 }, { "epoch": 2.24, "grad_norm": 0.6978246569633484, "learning_rate": 8.948805056268718e-05, "loss": 2.9097, "step": 45763 }, { "epoch": 2.24, "grad_norm": 0.7394630908966064, "learning_rate": 8.947708157567213e-05, "loss": 2.9971, "step": 45764 }, { "epoch": 2.24, "grad_norm": 0.7391385436058044, "learning_rate": 8.946611314313386e-05, "loss": 2.8428, "step": 45765 }, { "epoch": 2.24, "grad_norm": 0.7328847646713257, "learning_rate": 8.945514526510141e-05, "loss": 2.82, "step": 45766 }, { "epoch": 2.24, "grad_norm": 0.7323940992355347, "learning_rate": 8.944417794160352e-05, "loss": 3.0405, "step": 45767 }, { "epoch": 2.24, "grad_norm": 0.7202035784721375, "learning_rate": 8.94332111726692e-05, "loss": 2.8026, "step": 45768 }, { "epoch": 2.24, "grad_norm": 0.6832515597343445, "learning_rate": 8.942224495832721e-05, "loss": 2.8618, "step": 45769 }, { "epoch": 2.24, "grad_norm": 0.6816558241844177, "learning_rate": 8.941127929860645e-05, "loss": 2.9306, "step": 45770 }, { "epoch": 2.24, "grad_norm": 0.7403445839881897, "learning_rate": 8.9400314193536e-05, "loss": 3.0726, "step": 45771 }, { "epoch": 2.24, "grad_norm": 0.6996352672576904, "learning_rate": 8.938934964314456e-05, "loss": 2.7873, "step": 45772 }, { "epoch": 2.24, "grad_norm": 0.7047386169433594, "learning_rate": 8.937838564746107e-05, "loss": 2.8053, "step": 45773 }, { "epoch": 2.24, "grad_norm": 0.7159958481788635, "learning_rate": 8.936742220651427e-05, "loss": 2.9682, "step": 45774 }, { "epoch": 2.24, "grad_norm": 0.6776139736175537, "learning_rate": 8.935645932033311e-05, "loss": 2.88, "step": 45775 }, { "epoch": 2.24, "grad_norm": 0.6902870535850525, "learning_rate": 8.934549698894664e-05, "loss": 2.8853, "step": 45776 }, { "epoch": 2.24, "grad_norm": 0.7082247138023376, "learning_rate": 8.933453521238348e-05, "loss": 2.9993, "step": 45777 }, { "epoch": 2.24, "grad_norm": 0.694507360458374, "learning_rate": 8.93235739906727e-05, "loss": 2.9914, "step": 45778 }, { "epoch": 2.24, "grad_norm": 0.6993120908737183, "learning_rate": 8.931261332384308e-05, "loss": 2.9088, "step": 45779 }, { "epoch": 2.24, "grad_norm": 0.701582670211792, "learning_rate": 8.930165321192341e-05, "loss": 2.9265, "step": 45780 }, { "epoch": 2.24, "grad_norm": 0.7343215942382812, "learning_rate": 8.92906936549427e-05, "loss": 2.856, "step": 45781 }, { "epoch": 2.24, "grad_norm": 0.7673470973968506, "learning_rate": 8.927973465292967e-05, "loss": 3.0379, "step": 45782 }, { "epoch": 2.24, "grad_norm": 0.7404628396034241, "learning_rate": 8.926877620591339e-05, "loss": 3.0584, "step": 45783 }, { "epoch": 2.24, "grad_norm": 0.7033378481864929, "learning_rate": 8.925781831392246e-05, "loss": 2.9548, "step": 45784 }, { "epoch": 2.24, "grad_norm": 0.70482337474823, "learning_rate": 8.9246860976986e-05, "loss": 2.8364, "step": 45785 }, { "epoch": 2.24, "grad_norm": 0.6887717247009277, "learning_rate": 8.923590419513273e-05, "loss": 3.0289, "step": 45786 }, { "epoch": 2.24, "grad_norm": 0.7255544066429138, "learning_rate": 8.922494796839143e-05, "loss": 2.9807, "step": 45787 }, { "epoch": 2.24, "grad_norm": 0.6836867928504944, "learning_rate": 8.921399229679118e-05, "loss": 2.8207, "step": 45788 }, { "epoch": 2.24, "grad_norm": 0.707114040851593, "learning_rate": 8.920303718036058e-05, "loss": 2.8818, "step": 45789 }, { "epoch": 2.24, "grad_norm": 0.7001112699508667, "learning_rate": 8.919208261912862e-05, "loss": 3.0064, "step": 45790 }, { "epoch": 2.24, "grad_norm": 0.7056055068969727, "learning_rate": 8.918112861312422e-05, "loss": 2.915, "step": 45791 }, { "epoch": 2.24, "grad_norm": 0.71812504529953, "learning_rate": 8.91701751623761e-05, "loss": 2.6304, "step": 45792 }, { "epoch": 2.24, "grad_norm": 0.7017490267753601, "learning_rate": 8.915922226691323e-05, "loss": 3.0009, "step": 45793 }, { "epoch": 2.24, "grad_norm": 0.6928110718727112, "learning_rate": 8.914826992676439e-05, "loss": 2.8037, "step": 45794 }, { "epoch": 2.24, "grad_norm": 0.7178500890731812, "learning_rate": 8.913731814195836e-05, "loss": 3.0088, "step": 45795 }, { "epoch": 2.24, "grad_norm": 0.7326123118400574, "learning_rate": 8.91263669125241e-05, "loss": 2.7809, "step": 45796 }, { "epoch": 2.24, "grad_norm": 0.7544444799423218, "learning_rate": 8.911541623849035e-05, "loss": 2.9505, "step": 45797 }, { "epoch": 2.24, "grad_norm": 0.6959039568901062, "learning_rate": 8.910446611988609e-05, "loss": 2.9881, "step": 45798 }, { "epoch": 2.24, "grad_norm": 0.7174462676048279, "learning_rate": 8.909351655674e-05, "loss": 2.8899, "step": 45799 }, { "epoch": 2.24, "grad_norm": 0.7137072682380676, "learning_rate": 8.908256754908108e-05, "loss": 2.8453, "step": 45800 }, { "epoch": 2.24, "grad_norm": 0.7283577919006348, "learning_rate": 8.907161909693806e-05, "loss": 2.7934, "step": 45801 }, { "epoch": 2.24, "grad_norm": 0.732901930809021, "learning_rate": 8.906067120033975e-05, "loss": 2.6958, "step": 45802 }, { "epoch": 2.24, "grad_norm": 0.7311907410621643, "learning_rate": 8.904972385931509e-05, "loss": 2.8987, "step": 45803 }, { "epoch": 2.24, "grad_norm": 0.6583136320114136, "learning_rate": 8.903877707389279e-05, "loss": 2.809, "step": 45804 }, { "epoch": 2.24, "grad_norm": 0.6969159245491028, "learning_rate": 8.902783084410174e-05, "loss": 2.8369, "step": 45805 }, { "epoch": 2.24, "grad_norm": 0.7024007439613342, "learning_rate": 8.901688516997091e-05, "loss": 2.8592, "step": 45806 }, { "epoch": 2.24, "grad_norm": 0.8098918795585632, "learning_rate": 8.900594005152896e-05, "loss": 2.9321, "step": 45807 }, { "epoch": 2.24, "grad_norm": 0.7050246000289917, "learning_rate": 8.899499548880476e-05, "loss": 2.8934, "step": 45808 }, { "epoch": 2.24, "grad_norm": 0.7347972393035889, "learning_rate": 8.898405148182708e-05, "loss": 2.9272, "step": 45809 }, { "epoch": 2.25, "grad_norm": 0.7350515127182007, "learning_rate": 8.897310803062475e-05, "loss": 2.971, "step": 45810 }, { "epoch": 2.25, "grad_norm": 0.7553649544715881, "learning_rate": 8.896216513522676e-05, "loss": 3.0858, "step": 45811 }, { "epoch": 2.25, "grad_norm": 0.736858069896698, "learning_rate": 8.895122279566168e-05, "loss": 2.8214, "step": 45812 }, { "epoch": 2.25, "grad_norm": 0.7918192744255066, "learning_rate": 8.894028101195859e-05, "loss": 2.7361, "step": 45813 }, { "epoch": 2.25, "grad_norm": 0.70150226354599, "learning_rate": 8.89293397841461e-05, "loss": 2.9136, "step": 45814 }, { "epoch": 2.25, "grad_norm": 0.7298532724380493, "learning_rate": 8.89183991122532e-05, "loss": 2.9162, "step": 45815 }, { "epoch": 2.25, "grad_norm": 0.6884034276008606, "learning_rate": 8.890745899630857e-05, "loss": 2.7884, "step": 45816 }, { "epoch": 2.25, "grad_norm": 0.7185069918632507, "learning_rate": 8.889651943634101e-05, "loss": 2.9132, "step": 45817 }, { "epoch": 2.25, "grad_norm": 0.7185012698173523, "learning_rate": 8.888558043237946e-05, "loss": 2.8284, "step": 45818 }, { "epoch": 2.25, "grad_norm": 0.6928901672363281, "learning_rate": 8.88746419844526e-05, "loss": 2.8727, "step": 45819 }, { "epoch": 2.25, "grad_norm": 0.7991043329238892, "learning_rate": 8.886370409258935e-05, "loss": 3.0365, "step": 45820 }, { "epoch": 2.25, "grad_norm": 0.7154514193534851, "learning_rate": 8.885276675681842e-05, "loss": 2.7826, "step": 45821 }, { "epoch": 2.25, "grad_norm": 0.6965875625610352, "learning_rate": 8.884182997716874e-05, "loss": 3.0469, "step": 45822 }, { "epoch": 2.25, "grad_norm": 0.7708733677864075, "learning_rate": 8.883089375366904e-05, "loss": 2.8216, "step": 45823 }, { "epoch": 2.25, "grad_norm": 0.7230860590934753, "learning_rate": 8.881995808634801e-05, "loss": 3.0006, "step": 45824 }, { "epoch": 2.25, "grad_norm": 0.6626743674278259, "learning_rate": 8.880902297523471e-05, "loss": 2.7648, "step": 45825 }, { "epoch": 2.25, "grad_norm": 0.7058641910552979, "learning_rate": 8.879808842035769e-05, "loss": 2.9872, "step": 45826 }, { "epoch": 2.25, "grad_norm": 0.7745085954666138, "learning_rate": 8.878715442174585e-05, "loss": 2.9538, "step": 45827 }, { "epoch": 2.25, "grad_norm": 0.6953856945037842, "learning_rate": 8.877622097942807e-05, "loss": 2.8034, "step": 45828 }, { "epoch": 2.25, "grad_norm": 0.8223050236701965, "learning_rate": 8.876528809343302e-05, "loss": 2.8538, "step": 45829 }, { "epoch": 2.25, "grad_norm": 0.7568581700325012, "learning_rate": 8.87543557637897e-05, "loss": 2.9885, "step": 45830 }, { "epoch": 2.25, "grad_norm": 0.7025486826896667, "learning_rate": 8.874342399052658e-05, "loss": 2.6614, "step": 45831 }, { "epoch": 2.25, "grad_norm": 0.7281057834625244, "learning_rate": 8.873249277367262e-05, "loss": 2.9158, "step": 45832 }, { "epoch": 2.25, "grad_norm": 0.7436516284942627, "learning_rate": 8.872156211325673e-05, "loss": 2.8681, "step": 45833 }, { "epoch": 2.25, "grad_norm": 0.6978209018707275, "learning_rate": 8.871063200930747e-05, "loss": 2.8703, "step": 45834 }, { "epoch": 2.25, "grad_norm": 0.7288390398025513, "learning_rate": 8.869970246185382e-05, "loss": 2.9508, "step": 45835 }, { "epoch": 2.25, "grad_norm": 0.6683169007301331, "learning_rate": 8.868877347092445e-05, "loss": 3.0472, "step": 45836 }, { "epoch": 2.25, "grad_norm": 0.7435875535011292, "learning_rate": 8.867784503654823e-05, "loss": 3.1275, "step": 45837 }, { "epoch": 2.25, "grad_norm": 0.751097559928894, "learning_rate": 8.86669171587539e-05, "loss": 2.6619, "step": 45838 }, { "epoch": 2.25, "grad_norm": 0.7168517708778381, "learning_rate": 8.865598983757017e-05, "loss": 2.9837, "step": 45839 }, { "epoch": 2.25, "grad_norm": 0.7347090840339661, "learning_rate": 8.864506307302596e-05, "loss": 2.6833, "step": 45840 }, { "epoch": 2.25, "grad_norm": 0.6629171967506409, "learning_rate": 8.863413686514986e-05, "loss": 2.9058, "step": 45841 }, { "epoch": 2.25, "grad_norm": 0.7456681132316589, "learning_rate": 8.862321121397079e-05, "loss": 2.7433, "step": 45842 }, { "epoch": 2.25, "grad_norm": 0.6898133158683777, "learning_rate": 8.861228611951762e-05, "loss": 2.9031, "step": 45843 }, { "epoch": 2.25, "grad_norm": 0.7045638561248779, "learning_rate": 8.860136158181898e-05, "loss": 2.8671, "step": 45844 }, { "epoch": 2.25, "grad_norm": 0.7033824324607849, "learning_rate": 8.859043760090368e-05, "loss": 3.0791, "step": 45845 }, { "epoch": 2.25, "grad_norm": 0.7060011029243469, "learning_rate": 8.857951417680036e-05, "loss": 2.7681, "step": 45846 }, { "epoch": 2.25, "grad_norm": 0.6827572584152222, "learning_rate": 8.856859130953795e-05, "loss": 3.0132, "step": 45847 }, { "epoch": 2.25, "grad_norm": 0.725695788860321, "learning_rate": 8.855766899914527e-05, "loss": 3.0672, "step": 45848 }, { "epoch": 2.25, "grad_norm": 0.7227092981338501, "learning_rate": 8.854674724565086e-05, "loss": 3.0037, "step": 45849 }, { "epoch": 2.25, "grad_norm": 0.6998618245124817, "learning_rate": 8.853582604908374e-05, "loss": 2.7627, "step": 45850 }, { "epoch": 2.25, "grad_norm": 0.7095021605491638, "learning_rate": 8.852490540947249e-05, "loss": 2.8435, "step": 45851 }, { "epoch": 2.25, "grad_norm": 0.7027034163475037, "learning_rate": 8.8513985326846e-05, "loss": 2.8237, "step": 45852 }, { "epoch": 2.25, "grad_norm": 0.7380695939064026, "learning_rate": 8.850306580123298e-05, "loss": 2.7772, "step": 45853 }, { "epoch": 2.25, "grad_norm": 0.7337902784347534, "learning_rate": 8.849214683266208e-05, "loss": 3.0559, "step": 45854 }, { "epoch": 2.25, "grad_norm": 0.7061793804168701, "learning_rate": 8.848122842116227e-05, "loss": 2.8721, "step": 45855 }, { "epoch": 2.25, "grad_norm": 0.7373761534690857, "learning_rate": 8.847031056676211e-05, "loss": 2.9133, "step": 45856 }, { "epoch": 2.25, "grad_norm": 0.7317122220993042, "learning_rate": 8.84593932694904e-05, "loss": 2.997, "step": 45857 }, { "epoch": 2.25, "grad_norm": 0.7210045456886292, "learning_rate": 8.844847652937609e-05, "loss": 2.9035, "step": 45858 }, { "epoch": 2.25, "grad_norm": 0.7218835949897766, "learning_rate": 8.843756034644773e-05, "loss": 2.761, "step": 45859 }, { "epoch": 2.25, "grad_norm": 0.6628503799438477, "learning_rate": 8.842664472073415e-05, "loss": 2.9027, "step": 45860 }, { "epoch": 2.25, "grad_norm": 0.694086492061615, "learning_rate": 8.841572965226396e-05, "loss": 2.9911, "step": 45861 }, { "epoch": 2.25, "grad_norm": 0.7303391098976135, "learning_rate": 8.840481514106602e-05, "loss": 2.8838, "step": 45862 }, { "epoch": 2.25, "grad_norm": 0.7094843983650208, "learning_rate": 8.839390118716915e-05, "loss": 3.0449, "step": 45863 }, { "epoch": 2.25, "grad_norm": 0.6973925828933716, "learning_rate": 8.838298779060198e-05, "loss": 2.9713, "step": 45864 }, { "epoch": 2.25, "grad_norm": 0.7205259799957275, "learning_rate": 8.837207495139336e-05, "loss": 2.8643, "step": 45865 }, { "epoch": 2.25, "grad_norm": 0.6511806845664978, "learning_rate": 8.836116266957193e-05, "loss": 2.7828, "step": 45866 }, { "epoch": 2.25, "grad_norm": 0.7268171906471252, "learning_rate": 8.83502509451664e-05, "loss": 2.982, "step": 45867 }, { "epoch": 2.25, "grad_norm": 0.751723051071167, "learning_rate": 8.833933977820569e-05, "loss": 3.0784, "step": 45868 }, { "epoch": 2.25, "grad_norm": 0.7350054979324341, "learning_rate": 8.83284291687183e-05, "loss": 2.9919, "step": 45869 }, { "epoch": 2.25, "grad_norm": 0.7295348644256592, "learning_rate": 8.83175191167332e-05, "loss": 2.9076, "step": 45870 }, { "epoch": 2.25, "grad_norm": 0.7354061603546143, "learning_rate": 8.830660962227892e-05, "loss": 2.8714, "step": 45871 }, { "epoch": 2.25, "grad_norm": 0.7122185230255127, "learning_rate": 8.82957006853843e-05, "loss": 2.8408, "step": 45872 }, { "epoch": 2.25, "grad_norm": 0.7535485029220581, "learning_rate": 8.828479230607814e-05, "loss": 3.0789, "step": 45873 }, { "epoch": 2.25, "grad_norm": 0.7035789489746094, "learning_rate": 8.827388448438908e-05, "loss": 2.9393, "step": 45874 }, { "epoch": 2.25, "grad_norm": 0.7172368764877319, "learning_rate": 8.826297722034589e-05, "loss": 2.8124, "step": 45875 }, { "epoch": 2.25, "grad_norm": 0.7242939472198486, "learning_rate": 8.825207051397717e-05, "loss": 2.8569, "step": 45876 }, { "epoch": 2.25, "grad_norm": 0.7067150473594666, "learning_rate": 8.824116436531174e-05, "loss": 2.7216, "step": 45877 }, { "epoch": 2.25, "grad_norm": 0.6949864029884338, "learning_rate": 8.823025877437839e-05, "loss": 3.1062, "step": 45878 }, { "epoch": 2.25, "grad_norm": 0.6859537363052368, "learning_rate": 8.821935374120572e-05, "loss": 2.8185, "step": 45879 }, { "epoch": 2.25, "grad_norm": 0.7593329548835754, "learning_rate": 8.82084492658226e-05, "loss": 2.8668, "step": 45880 }, { "epoch": 2.25, "grad_norm": 0.6818985342979431, "learning_rate": 8.819754534825766e-05, "loss": 2.8889, "step": 45881 }, { "epoch": 2.25, "grad_norm": 0.7531813383102417, "learning_rate": 8.818664198853954e-05, "loss": 2.6414, "step": 45882 }, { "epoch": 2.25, "grad_norm": 0.6858770847320557, "learning_rate": 8.817573918669716e-05, "loss": 2.6542, "step": 45883 }, { "epoch": 2.25, "grad_norm": 0.7132046222686768, "learning_rate": 8.8164836942759e-05, "loss": 2.9236, "step": 45884 }, { "epoch": 2.25, "grad_norm": 0.7180655598640442, "learning_rate": 8.815393525675397e-05, "loss": 2.9675, "step": 45885 }, { "epoch": 2.25, "grad_norm": 0.6883565187454224, "learning_rate": 8.814303412871063e-05, "loss": 2.8287, "step": 45886 }, { "epoch": 2.25, "grad_norm": 0.7425944209098816, "learning_rate": 8.813213355865783e-05, "loss": 2.9131, "step": 45887 }, { "epoch": 2.25, "grad_norm": 0.736083447933197, "learning_rate": 8.812123354662427e-05, "loss": 2.8409, "step": 45888 }, { "epoch": 2.25, "grad_norm": 0.6757725477218628, "learning_rate": 8.811033409263844e-05, "loss": 2.7647, "step": 45889 }, { "epoch": 2.25, "grad_norm": 0.7013656497001648, "learning_rate": 8.809943519672936e-05, "loss": 2.9786, "step": 45890 }, { "epoch": 2.25, "grad_norm": 0.7240048050880432, "learning_rate": 8.808853685892545e-05, "loss": 3.0001, "step": 45891 }, { "epoch": 2.25, "grad_norm": 0.7245268821716309, "learning_rate": 8.807763907925568e-05, "loss": 2.9762, "step": 45892 }, { "epoch": 2.25, "grad_norm": 0.7503712177276611, "learning_rate": 8.806674185774852e-05, "loss": 2.8701, "step": 45893 }, { "epoch": 2.25, "grad_norm": 0.7144389152526855, "learning_rate": 8.805584519443276e-05, "loss": 2.8848, "step": 45894 }, { "epoch": 2.25, "grad_norm": 0.6713013052940369, "learning_rate": 8.804494908933723e-05, "loss": 3.0602, "step": 45895 }, { "epoch": 2.25, "grad_norm": 0.7255938053131104, "learning_rate": 8.803405354249049e-05, "loss": 2.8985, "step": 45896 }, { "epoch": 2.25, "grad_norm": 0.7103670835494995, "learning_rate": 8.802315855392127e-05, "loss": 2.865, "step": 45897 }, { "epoch": 2.25, "grad_norm": 0.7415536046028137, "learning_rate": 8.801226412365818e-05, "loss": 2.9041, "step": 45898 }, { "epoch": 2.25, "grad_norm": 0.7270449995994568, "learning_rate": 8.800137025172998e-05, "loss": 2.9277, "step": 45899 }, { "epoch": 2.25, "grad_norm": 0.7240244150161743, "learning_rate": 8.799047693816545e-05, "loss": 2.9637, "step": 45900 }, { "epoch": 2.25, "grad_norm": 0.6903491616249084, "learning_rate": 8.79795841829931e-05, "loss": 2.8616, "step": 45901 }, { "epoch": 2.25, "grad_norm": 0.6921778917312622, "learning_rate": 8.796869198624182e-05, "loss": 3.0011, "step": 45902 }, { "epoch": 2.25, "grad_norm": 0.7207080125808716, "learning_rate": 8.795780034794024e-05, "loss": 2.7592, "step": 45903 }, { "epoch": 2.25, "grad_norm": 0.7446100115776062, "learning_rate": 8.794690926811687e-05, "loss": 3.0125, "step": 45904 }, { "epoch": 2.25, "grad_norm": 0.7436581254005432, "learning_rate": 8.793601874680061e-05, "loss": 2.8783, "step": 45905 }, { "epoch": 2.25, "grad_norm": 0.69056236743927, "learning_rate": 8.792512878402001e-05, "loss": 2.9237, "step": 45906 }, { "epoch": 2.25, "grad_norm": 0.743360698223114, "learning_rate": 8.791423937980387e-05, "loss": 2.9432, "step": 45907 }, { "epoch": 2.25, "grad_norm": 0.7039749026298523, "learning_rate": 8.790335053418074e-05, "loss": 3.0534, "step": 45908 }, { "epoch": 2.25, "grad_norm": 0.6992817521095276, "learning_rate": 8.789246224717945e-05, "loss": 3.0331, "step": 45909 }, { "epoch": 2.25, "grad_norm": 0.6970164179801941, "learning_rate": 8.78815745188286e-05, "loss": 2.9252, "step": 45910 }, { "epoch": 2.25, "grad_norm": 0.7451713681221008, "learning_rate": 8.787068734915673e-05, "loss": 3.0809, "step": 45911 }, { "epoch": 2.25, "grad_norm": 0.7480039000511169, "learning_rate": 8.78598007381928e-05, "loss": 3.0333, "step": 45912 }, { "epoch": 2.25, "grad_norm": 0.7085638046264648, "learning_rate": 8.784891468596517e-05, "loss": 2.881, "step": 45913 }, { "epoch": 2.25, "grad_norm": 0.728363573551178, "learning_rate": 8.783802919250271e-05, "loss": 2.8696, "step": 45914 }, { "epoch": 2.25, "grad_norm": 0.708055317401886, "learning_rate": 8.782714425783414e-05, "loss": 3.2101, "step": 45915 }, { "epoch": 2.25, "grad_norm": 0.6821580529212952, "learning_rate": 8.781625988198792e-05, "loss": 3.1762, "step": 45916 }, { "epoch": 2.25, "grad_norm": 0.7033885717391968, "learning_rate": 8.780537606499297e-05, "loss": 2.8277, "step": 45917 }, { "epoch": 2.25, "grad_norm": 0.7111839652061462, "learning_rate": 8.779449280687778e-05, "loss": 2.9337, "step": 45918 }, { "epoch": 2.25, "grad_norm": 0.7263917326927185, "learning_rate": 8.778361010767099e-05, "loss": 2.8357, "step": 45919 }, { "epoch": 2.25, "grad_norm": 0.7069695591926575, "learning_rate": 8.777272796740139e-05, "loss": 2.7324, "step": 45920 }, { "epoch": 2.25, "grad_norm": 0.7394420504570007, "learning_rate": 8.77618463860975e-05, "loss": 2.7066, "step": 45921 }, { "epoch": 2.25, "grad_norm": 0.7445680499076843, "learning_rate": 8.77509653637882e-05, "loss": 2.8956, "step": 45922 }, { "epoch": 2.25, "grad_norm": 0.7384760975837708, "learning_rate": 8.774008490050185e-05, "loss": 2.944, "step": 45923 }, { "epoch": 2.25, "grad_norm": 0.7003813982009888, "learning_rate": 8.772920499626741e-05, "loss": 2.7802, "step": 45924 }, { "epoch": 2.25, "grad_norm": 0.7218019366264343, "learning_rate": 8.771832565111335e-05, "loss": 2.6604, "step": 45925 }, { "epoch": 2.25, "grad_norm": 0.7279051542282104, "learning_rate": 8.770744686506828e-05, "loss": 2.8343, "step": 45926 }, { "epoch": 2.25, "grad_norm": 0.6893950700759888, "learning_rate": 8.769656863816106e-05, "loss": 2.986, "step": 45927 }, { "epoch": 2.25, "grad_norm": 0.7431288361549377, "learning_rate": 8.768569097042011e-05, "loss": 2.7472, "step": 45928 }, { "epoch": 2.25, "grad_norm": 0.7378048896789551, "learning_rate": 8.767481386187418e-05, "loss": 2.8564, "step": 45929 }, { "epoch": 2.25, "grad_norm": 0.7334486842155457, "learning_rate": 8.766393731255205e-05, "loss": 2.9231, "step": 45930 }, { "epoch": 2.25, "grad_norm": 0.7250813841819763, "learning_rate": 8.76530613224822e-05, "loss": 2.9032, "step": 45931 }, { "epoch": 2.25, "grad_norm": 0.7708786725997925, "learning_rate": 8.764218589169338e-05, "loss": 2.8288, "step": 45932 }, { "epoch": 2.25, "grad_norm": 0.7491968274116516, "learning_rate": 8.763131102021402e-05, "loss": 2.9354, "step": 45933 }, { "epoch": 2.25, "grad_norm": 0.7650907039642334, "learning_rate": 8.762043670807293e-05, "loss": 2.8662, "step": 45934 }, { "epoch": 2.25, "grad_norm": 0.7074927091598511, "learning_rate": 8.760956295529887e-05, "loss": 3.0082, "step": 45935 }, { "epoch": 2.25, "grad_norm": 0.6994934678077698, "learning_rate": 8.75986897619202e-05, "loss": 2.7367, "step": 45936 }, { "epoch": 2.25, "grad_norm": 0.7439029812812805, "learning_rate": 8.758781712796584e-05, "loss": 2.9358, "step": 45937 }, { "epoch": 2.25, "grad_norm": 0.737886369228363, "learning_rate": 8.757694505346416e-05, "loss": 2.7807, "step": 45938 }, { "epoch": 2.25, "grad_norm": 0.6919266581535339, "learning_rate": 8.756607353844405e-05, "loss": 2.9541, "step": 45939 }, { "epoch": 2.25, "grad_norm": 0.6991779804229736, "learning_rate": 8.7555202582934e-05, "loss": 2.8848, "step": 45940 }, { "epoch": 2.25, "grad_norm": 0.7198300361633301, "learning_rate": 8.754433218696258e-05, "loss": 2.7478, "step": 45941 }, { "epoch": 2.25, "grad_norm": 0.7473375201225281, "learning_rate": 8.753346235055857e-05, "loss": 2.7545, "step": 45942 }, { "epoch": 2.25, "grad_norm": 0.6938808560371399, "learning_rate": 8.752259307375046e-05, "loss": 2.8858, "step": 45943 }, { "epoch": 2.25, "grad_norm": 0.6992427110671997, "learning_rate": 8.751172435656695e-05, "loss": 3.0062, "step": 45944 }, { "epoch": 2.25, "grad_norm": 0.7342805862426758, "learning_rate": 8.750085619903672e-05, "loss": 3.0506, "step": 45945 }, { "epoch": 2.25, "grad_norm": 0.714984655380249, "learning_rate": 8.748998860118838e-05, "loss": 2.9983, "step": 45946 }, { "epoch": 2.25, "grad_norm": 0.6858196258544922, "learning_rate": 8.74791215630505e-05, "loss": 2.8878, "step": 45947 }, { "epoch": 2.25, "grad_norm": 0.7082827687263489, "learning_rate": 8.74682550846516e-05, "loss": 2.883, "step": 45948 }, { "epoch": 2.25, "grad_norm": 0.7259243726730347, "learning_rate": 8.745738916602044e-05, "loss": 2.6689, "step": 45949 }, { "epoch": 2.25, "grad_norm": 0.7247135639190674, "learning_rate": 8.744652380718568e-05, "loss": 2.9188, "step": 45950 }, { "epoch": 2.25, "grad_norm": 0.7118974924087524, "learning_rate": 8.743565900817582e-05, "loss": 2.8574, "step": 45951 }, { "epoch": 2.25, "grad_norm": 0.6941021680831909, "learning_rate": 8.742479476901958e-05, "loss": 3.0137, "step": 45952 }, { "epoch": 2.25, "grad_norm": 0.7223904728889465, "learning_rate": 8.741393108974545e-05, "loss": 2.9941, "step": 45953 }, { "epoch": 2.25, "grad_norm": 0.7473703026771545, "learning_rate": 8.740306797038217e-05, "loss": 2.8769, "step": 45954 }, { "epoch": 2.25, "grad_norm": 0.6923856735229492, "learning_rate": 8.739220541095831e-05, "loss": 3.0033, "step": 45955 }, { "epoch": 2.25, "grad_norm": 0.7007562518119812, "learning_rate": 8.73813434115024e-05, "loss": 3.1408, "step": 45956 }, { "epoch": 2.25, "grad_norm": 0.7167068123817444, "learning_rate": 8.737048197204317e-05, "loss": 2.8446, "step": 45957 }, { "epoch": 2.25, "grad_norm": 0.7058669328689575, "learning_rate": 8.73596210926091e-05, "loss": 2.9255, "step": 45958 }, { "epoch": 2.25, "grad_norm": 0.7336747646331787, "learning_rate": 8.734876077322887e-05, "loss": 2.907, "step": 45959 }, { "epoch": 2.25, "grad_norm": 0.6712983250617981, "learning_rate": 8.733790101393114e-05, "loss": 2.8126, "step": 45960 }, { "epoch": 2.25, "grad_norm": 0.7041860818862915, "learning_rate": 8.732704181474448e-05, "loss": 2.9029, "step": 45961 }, { "epoch": 2.25, "grad_norm": 0.739032506942749, "learning_rate": 8.731618317569742e-05, "loss": 2.9452, "step": 45962 }, { "epoch": 2.25, "grad_norm": 0.790059506893158, "learning_rate": 8.730532509681853e-05, "loss": 3.131, "step": 45963 }, { "epoch": 2.25, "grad_norm": 0.6874712109565735, "learning_rate": 8.729446757813651e-05, "loss": 2.8963, "step": 45964 }, { "epoch": 2.25, "grad_norm": 0.718464732170105, "learning_rate": 8.728361061967998e-05, "loss": 2.9917, "step": 45965 }, { "epoch": 2.25, "grad_norm": 0.7371983528137207, "learning_rate": 8.727275422147743e-05, "loss": 3.0787, "step": 45966 }, { "epoch": 2.25, "grad_norm": 0.689892590045929, "learning_rate": 8.726189838355756e-05, "loss": 2.9387, "step": 45967 }, { "epoch": 2.25, "grad_norm": 0.7095820903778076, "learning_rate": 8.725104310594893e-05, "loss": 2.8798, "step": 45968 }, { "epoch": 2.25, "grad_norm": 0.7040510773658752, "learning_rate": 8.724018838867999e-05, "loss": 2.8971, "step": 45969 }, { "epoch": 2.25, "grad_norm": 0.713518500328064, "learning_rate": 8.722933423177956e-05, "loss": 2.9733, "step": 45970 }, { "epoch": 2.25, "grad_norm": 0.6998233199119568, "learning_rate": 8.721848063527601e-05, "loss": 2.6608, "step": 45971 }, { "epoch": 2.25, "grad_norm": 0.6755211353302002, "learning_rate": 8.720762759919814e-05, "loss": 2.9071, "step": 45972 }, { "epoch": 2.25, "grad_norm": 0.7184854745864868, "learning_rate": 8.719677512357432e-05, "loss": 2.8504, "step": 45973 }, { "epoch": 2.25, "grad_norm": 0.704601526260376, "learning_rate": 8.718592320843335e-05, "loss": 2.9019, "step": 45974 }, { "epoch": 2.25, "grad_norm": 0.7312195301055908, "learning_rate": 8.717507185380359e-05, "loss": 2.9244, "step": 45975 }, { "epoch": 2.25, "grad_norm": 0.7099519371986389, "learning_rate": 8.716422105971385e-05, "loss": 2.9385, "step": 45976 }, { "epoch": 2.25, "grad_norm": 0.7298969626426697, "learning_rate": 8.715337082619256e-05, "loss": 2.9582, "step": 45977 }, { "epoch": 2.25, "grad_norm": 0.7228435277938843, "learning_rate": 8.714252115326825e-05, "loss": 3.0337, "step": 45978 }, { "epoch": 2.25, "grad_norm": 0.7032173275947571, "learning_rate": 8.713167204096967e-05, "loss": 2.8777, "step": 45979 }, { "epoch": 2.25, "grad_norm": 0.7508836388587952, "learning_rate": 8.712082348932518e-05, "loss": 2.8243, "step": 45980 }, { "epoch": 2.25, "grad_norm": 0.7287681102752686, "learning_rate": 8.71099754983635e-05, "loss": 3.0163, "step": 45981 }, { "epoch": 2.25, "grad_norm": 0.7005704045295715, "learning_rate": 8.709912806811327e-05, "loss": 2.8342, "step": 45982 }, { "epoch": 2.25, "grad_norm": 0.7210054993629456, "learning_rate": 8.708828119860295e-05, "loss": 3.0389, "step": 45983 }, { "epoch": 2.25, "grad_norm": 0.693412184715271, "learning_rate": 8.70774348898611e-05, "loss": 2.8852, "step": 45984 }, { "epoch": 2.25, "grad_norm": 0.687518835067749, "learning_rate": 8.706658914191625e-05, "loss": 2.9454, "step": 45985 }, { "epoch": 2.25, "grad_norm": 0.7263759970664978, "learning_rate": 8.7055743954797e-05, "loss": 2.9181, "step": 45986 }, { "epoch": 2.25, "grad_norm": 0.7387471199035645, "learning_rate": 8.704489932853203e-05, "loss": 2.8456, "step": 45987 }, { "epoch": 2.25, "grad_norm": 0.6722941398620605, "learning_rate": 8.703405526314975e-05, "loss": 2.8528, "step": 45988 }, { "epoch": 2.25, "grad_norm": 0.6813567876815796, "learning_rate": 8.702321175867884e-05, "loss": 2.9437, "step": 45989 }, { "epoch": 2.25, "grad_norm": 0.7118014693260193, "learning_rate": 8.701236881514784e-05, "loss": 2.8783, "step": 45990 }, { "epoch": 2.25, "grad_norm": 0.7127276659011841, "learning_rate": 8.700152643258516e-05, "loss": 3.0367, "step": 45991 }, { "epoch": 2.25, "grad_norm": 0.8109259605407715, "learning_rate": 8.699068461101955e-05, "loss": 2.8818, "step": 45992 }, { "epoch": 2.25, "grad_norm": 0.689415693283081, "learning_rate": 8.697984335047939e-05, "loss": 3.0599, "step": 45993 }, { "epoch": 2.25, "grad_norm": 0.7123958468437195, "learning_rate": 8.696900265099343e-05, "loss": 2.8822, "step": 45994 }, { "epoch": 2.25, "grad_norm": 0.7250252366065979, "learning_rate": 8.695816251259002e-05, "loss": 2.9265, "step": 45995 }, { "epoch": 2.25, "grad_norm": 0.7128042578697205, "learning_rate": 8.694732293529783e-05, "loss": 3.0612, "step": 45996 }, { "epoch": 2.25, "grad_norm": 0.7381935119628906, "learning_rate": 8.693648391914543e-05, "loss": 2.7554, "step": 45997 }, { "epoch": 2.25, "grad_norm": 0.6966440677642822, "learning_rate": 8.692564546416137e-05, "loss": 2.7047, "step": 45998 }, { "epoch": 2.25, "grad_norm": 0.7341095209121704, "learning_rate": 8.691480757037414e-05, "loss": 3.1269, "step": 45999 }, { "epoch": 2.25, "grad_norm": 0.6964755654335022, "learning_rate": 8.690397023781219e-05, "loss": 3.019, "step": 46000 }, { "epoch": 2.25, "grad_norm": 0.7326573729515076, "learning_rate": 8.689313346650419e-05, "loss": 2.7295, "step": 46001 }, { "epoch": 2.25, "grad_norm": 0.7106379866600037, "learning_rate": 8.688229725647876e-05, "loss": 2.8008, "step": 46002 }, { "epoch": 2.25, "grad_norm": 0.6738712787628174, "learning_rate": 8.687146160776424e-05, "loss": 2.7775, "step": 46003 }, { "epoch": 2.25, "grad_norm": 0.6668511033058167, "learning_rate": 8.686062652038939e-05, "loss": 2.9989, "step": 46004 }, { "epoch": 2.25, "grad_norm": 0.8261511921882629, "learning_rate": 8.684979199438259e-05, "loss": 2.7222, "step": 46005 }, { "epoch": 2.25, "grad_norm": 0.6966729760169983, "learning_rate": 8.683895802977236e-05, "loss": 3.1088, "step": 46006 }, { "epoch": 2.25, "grad_norm": 0.7278913259506226, "learning_rate": 8.682812462658736e-05, "loss": 3.029, "step": 46007 }, { "epoch": 2.25, "grad_norm": 0.6950763463973999, "learning_rate": 8.681729178485595e-05, "loss": 2.7216, "step": 46008 }, { "epoch": 2.25, "grad_norm": 0.753512442111969, "learning_rate": 8.68064595046069e-05, "loss": 2.9456, "step": 46009 }, { "epoch": 2.25, "grad_norm": 0.7178856730461121, "learning_rate": 8.679562778586848e-05, "loss": 2.7979, "step": 46010 }, { "epoch": 2.25, "grad_norm": 0.6799273490905762, "learning_rate": 8.678479662866944e-05, "loss": 3.0293, "step": 46011 }, { "epoch": 2.25, "grad_norm": 0.7092909812927246, "learning_rate": 8.677396603303821e-05, "loss": 2.8555, "step": 46012 }, { "epoch": 2.25, "grad_norm": 0.744310736656189, "learning_rate": 8.676313599900325e-05, "loss": 3.0313, "step": 46013 }, { "epoch": 2.26, "grad_norm": 0.7400776743888855, "learning_rate": 8.675230652659322e-05, "loss": 2.9776, "step": 46014 }, { "epoch": 2.26, "grad_norm": 0.7001795768737793, "learning_rate": 8.674147761583649e-05, "loss": 3.0375, "step": 46015 }, { "epoch": 2.26, "grad_norm": 0.7090204954147339, "learning_rate": 8.673064926676166e-05, "loss": 3.1105, "step": 46016 }, { "epoch": 2.26, "grad_norm": 0.6803956031799316, "learning_rate": 8.671982147939733e-05, "loss": 2.9111, "step": 46017 }, { "epoch": 2.26, "grad_norm": 0.6881393790245056, "learning_rate": 8.670899425377191e-05, "loss": 2.9803, "step": 46018 }, { "epoch": 2.26, "grad_norm": 0.7794377207756042, "learning_rate": 8.669816758991397e-05, "loss": 3.0788, "step": 46019 }, { "epoch": 2.26, "grad_norm": 0.6823427677154541, "learning_rate": 8.668734148785205e-05, "loss": 2.7918, "step": 46020 }, { "epoch": 2.26, "grad_norm": 0.7123182415962219, "learning_rate": 8.667651594761452e-05, "loss": 2.896, "step": 46021 }, { "epoch": 2.26, "grad_norm": 0.7293609976768494, "learning_rate": 8.666569096923007e-05, "loss": 3.039, "step": 46022 }, { "epoch": 2.26, "grad_norm": 0.7326741218566895, "learning_rate": 8.665486655272708e-05, "loss": 3.0871, "step": 46023 }, { "epoch": 2.26, "grad_norm": 0.7077974677085876, "learning_rate": 8.664404269813416e-05, "loss": 2.9412, "step": 46024 }, { "epoch": 2.26, "grad_norm": 0.7263396382331848, "learning_rate": 8.663321940547971e-05, "loss": 2.8853, "step": 46025 }, { "epoch": 2.26, "grad_norm": 0.6902568340301514, "learning_rate": 8.662239667479238e-05, "loss": 2.8761, "step": 46026 }, { "epoch": 2.26, "grad_norm": 0.6834216713905334, "learning_rate": 8.661157450610058e-05, "loss": 2.987, "step": 46027 }, { "epoch": 2.26, "grad_norm": 0.7192767262458801, "learning_rate": 8.660075289943274e-05, "loss": 3.0016, "step": 46028 }, { "epoch": 2.26, "grad_norm": 0.711889922618866, "learning_rate": 8.658993185481752e-05, "loss": 2.9948, "step": 46029 }, { "epoch": 2.26, "grad_norm": 0.7222273349761963, "learning_rate": 8.657911137228328e-05, "loss": 2.9268, "step": 46030 }, { "epoch": 2.26, "grad_norm": 0.7341220378875732, "learning_rate": 8.656829145185858e-05, "loss": 2.8919, "step": 46031 }, { "epoch": 2.26, "grad_norm": 0.6676339507102966, "learning_rate": 8.655747209357203e-05, "loss": 2.9097, "step": 46032 }, { "epoch": 2.26, "grad_norm": 0.7094964385032654, "learning_rate": 8.654665329745199e-05, "loss": 3.0597, "step": 46033 }, { "epoch": 2.26, "grad_norm": 0.7654356360435486, "learning_rate": 8.653583506352702e-05, "loss": 2.7896, "step": 46034 }, { "epoch": 2.26, "grad_norm": 0.7312438488006592, "learning_rate": 8.652501739182546e-05, "loss": 2.9281, "step": 46035 }, { "epoch": 2.26, "grad_norm": 0.6991698741912842, "learning_rate": 8.651420028237594e-05, "loss": 2.9008, "step": 46036 }, { "epoch": 2.26, "grad_norm": 0.725242018699646, "learning_rate": 8.650338373520702e-05, "loss": 2.9847, "step": 46037 }, { "epoch": 2.26, "grad_norm": 0.6978802680969238, "learning_rate": 8.6492567750347e-05, "loss": 2.6138, "step": 46038 }, { "epoch": 2.26, "grad_norm": 0.7490013241767883, "learning_rate": 8.648175232782458e-05, "loss": 2.8943, "step": 46039 }, { "epoch": 2.26, "grad_norm": 0.7092497944831848, "learning_rate": 8.647093746766799e-05, "loss": 2.9318, "step": 46040 }, { "epoch": 2.26, "grad_norm": 0.709816575050354, "learning_rate": 8.6460123169906e-05, "loss": 2.881, "step": 46041 }, { "epoch": 2.26, "grad_norm": 0.7483521699905396, "learning_rate": 8.644930943456693e-05, "loss": 2.7731, "step": 46042 }, { "epoch": 2.26, "grad_norm": 0.7091911435127258, "learning_rate": 8.643849626167917e-05, "loss": 2.8516, "step": 46043 }, { "epoch": 2.26, "grad_norm": 0.7004270553588867, "learning_rate": 8.642768365127141e-05, "loss": 2.8595, "step": 46044 }, { "epoch": 2.26, "grad_norm": 0.703554093837738, "learning_rate": 8.641687160337195e-05, "loss": 2.5378, "step": 46045 }, { "epoch": 2.26, "grad_norm": 0.68243408203125, "learning_rate": 8.640606011800935e-05, "loss": 2.8221, "step": 46046 }, { "epoch": 2.26, "grad_norm": 0.743057906627655, "learning_rate": 8.639524919521213e-05, "loss": 2.9218, "step": 46047 }, { "epoch": 2.26, "grad_norm": 0.6928690075874329, "learning_rate": 8.638443883500877e-05, "loss": 2.9012, "step": 46048 }, { "epoch": 2.26, "grad_norm": 0.7314662337303162, "learning_rate": 8.637362903742768e-05, "loss": 2.9544, "step": 46049 }, { "epoch": 2.26, "grad_norm": 0.6911783218383789, "learning_rate": 8.63628198024972e-05, "loss": 2.8013, "step": 46050 }, { "epoch": 2.26, "grad_norm": 0.8123417496681213, "learning_rate": 8.635201113024607e-05, "loss": 2.9374, "step": 46051 }, { "epoch": 2.26, "grad_norm": 0.7197860479354858, "learning_rate": 8.634120302070254e-05, "loss": 3.0019, "step": 46052 }, { "epoch": 2.26, "grad_norm": 0.7315776944160461, "learning_rate": 8.633039547389511e-05, "loss": 2.7753, "step": 46053 }, { "epoch": 2.26, "grad_norm": 0.790367841720581, "learning_rate": 8.631958848985241e-05, "loss": 2.9611, "step": 46054 }, { "epoch": 2.26, "grad_norm": 0.8130670189857483, "learning_rate": 8.630878206860275e-05, "loss": 2.861, "step": 46055 }, { "epoch": 2.26, "grad_norm": 0.7119998931884766, "learning_rate": 8.629797621017475e-05, "loss": 2.7987, "step": 46056 }, { "epoch": 2.26, "grad_norm": 0.7341967821121216, "learning_rate": 8.62871709145966e-05, "loss": 2.9332, "step": 46057 }, { "epoch": 2.26, "grad_norm": 0.7142411470413208, "learning_rate": 8.62763661818969e-05, "loss": 2.8448, "step": 46058 }, { "epoch": 2.26, "grad_norm": 0.7042397260665894, "learning_rate": 8.626556201210424e-05, "loss": 2.8789, "step": 46059 }, { "epoch": 2.26, "grad_norm": 0.7835875153541565, "learning_rate": 8.625475840524681e-05, "loss": 2.9927, "step": 46060 }, { "epoch": 2.26, "grad_norm": 0.718367338180542, "learning_rate": 8.624395536135336e-05, "loss": 3.0031, "step": 46061 }, { "epoch": 2.26, "grad_norm": 0.742847204208374, "learning_rate": 8.623315288045207e-05, "loss": 2.9724, "step": 46062 }, { "epoch": 2.26, "grad_norm": 0.7116962671279907, "learning_rate": 8.62223509625716e-05, "loss": 2.7999, "step": 46063 }, { "epoch": 2.26, "grad_norm": 0.7158562541007996, "learning_rate": 8.621154960774035e-05, "loss": 3.0854, "step": 46064 }, { "epoch": 2.26, "grad_norm": 0.7016698122024536, "learning_rate": 8.620074881598662e-05, "loss": 2.6261, "step": 46065 }, { "epoch": 2.26, "grad_norm": 0.7218732833862305, "learning_rate": 8.618994858733906e-05, "loss": 3.0268, "step": 46066 }, { "epoch": 2.26, "grad_norm": 0.7430708408355713, "learning_rate": 8.617914892182592e-05, "loss": 3.0253, "step": 46067 }, { "epoch": 2.26, "grad_norm": 0.7524659633636475, "learning_rate": 8.616834981947577e-05, "loss": 2.8653, "step": 46068 }, { "epoch": 2.26, "grad_norm": 0.6865372061729431, "learning_rate": 8.615755128031714e-05, "loss": 3.1231, "step": 46069 }, { "epoch": 2.26, "grad_norm": 0.7529311180114746, "learning_rate": 8.614675330437835e-05, "loss": 2.8919, "step": 46070 }, { "epoch": 2.26, "grad_norm": 0.7249378561973572, "learning_rate": 8.613595589168787e-05, "loss": 2.9534, "step": 46071 }, { "epoch": 2.26, "grad_norm": 0.7141491770744324, "learning_rate": 8.612515904227401e-05, "loss": 2.8534, "step": 46072 }, { "epoch": 2.26, "grad_norm": 0.6884422302246094, "learning_rate": 8.611436275616534e-05, "loss": 2.8794, "step": 46073 }, { "epoch": 2.26, "grad_norm": 0.7267909646034241, "learning_rate": 8.610356703339033e-05, "loss": 2.7915, "step": 46074 }, { "epoch": 2.26, "grad_norm": 0.7484807968139648, "learning_rate": 8.609277187397728e-05, "loss": 2.9957, "step": 46075 }, { "epoch": 2.26, "grad_norm": 0.7652937769889832, "learning_rate": 8.60819772779548e-05, "loss": 2.8617, "step": 46076 }, { "epoch": 2.26, "grad_norm": 0.6917968392372131, "learning_rate": 8.607118324535114e-05, "loss": 2.7486, "step": 46077 }, { "epoch": 2.26, "grad_norm": 0.7388394474983215, "learning_rate": 8.606038977619492e-05, "loss": 2.768, "step": 46078 }, { "epoch": 2.26, "grad_norm": 0.7591552734375, "learning_rate": 8.604959687051444e-05, "loss": 2.7493, "step": 46079 }, { "epoch": 2.26, "grad_norm": 0.7432824969291687, "learning_rate": 8.603880452833805e-05, "loss": 2.7448, "step": 46080 }, { "epoch": 2.26, "grad_norm": 0.6959530115127563, "learning_rate": 8.602801274969436e-05, "loss": 3.0574, "step": 46081 }, { "epoch": 2.26, "grad_norm": 0.7385079264640808, "learning_rate": 8.601722153461162e-05, "loss": 2.8571, "step": 46082 }, { "epoch": 2.26, "grad_norm": 0.6802938580513, "learning_rate": 8.600643088311833e-05, "loss": 2.9388, "step": 46083 }, { "epoch": 2.26, "grad_norm": 0.7278597950935364, "learning_rate": 8.599564079524302e-05, "loss": 2.9041, "step": 46084 }, { "epoch": 2.26, "grad_norm": 0.7153709530830383, "learning_rate": 8.598485127101399e-05, "loss": 2.9392, "step": 46085 }, { "epoch": 2.26, "grad_norm": 0.7356995344161987, "learning_rate": 8.59740623104597e-05, "loss": 3.0901, "step": 46086 }, { "epoch": 2.26, "grad_norm": 0.7170118689537048, "learning_rate": 8.596327391360843e-05, "loss": 3.0913, "step": 46087 }, { "epoch": 2.26, "grad_norm": 0.7780619859695435, "learning_rate": 8.595248608048869e-05, "loss": 2.8865, "step": 46088 }, { "epoch": 2.26, "grad_norm": 0.7607010006904602, "learning_rate": 8.594169881112902e-05, "loss": 2.9598, "step": 46089 }, { "epoch": 2.26, "grad_norm": 0.7569159269332886, "learning_rate": 8.593091210555764e-05, "loss": 2.8838, "step": 46090 }, { "epoch": 2.26, "grad_norm": 0.738321840763092, "learning_rate": 8.592012596380311e-05, "loss": 2.94, "step": 46091 }, { "epoch": 2.26, "grad_norm": 0.722771406173706, "learning_rate": 8.59093403858938e-05, "loss": 3.0879, "step": 46092 }, { "epoch": 2.26, "grad_norm": 0.7251681089401245, "learning_rate": 8.589855537185794e-05, "loss": 3.1147, "step": 46093 }, { "epoch": 2.26, "grad_norm": 0.7316486239433289, "learning_rate": 8.58877709217242e-05, "loss": 2.9141, "step": 46094 }, { "epoch": 2.26, "grad_norm": 0.6975620985031128, "learning_rate": 8.587698703552078e-05, "loss": 2.7911, "step": 46095 }, { "epoch": 2.26, "grad_norm": 0.69795161485672, "learning_rate": 8.586620371327625e-05, "loss": 2.9119, "step": 46096 }, { "epoch": 2.26, "grad_norm": 0.7358713746070862, "learning_rate": 8.585542095501883e-05, "loss": 2.9688, "step": 46097 }, { "epoch": 2.26, "grad_norm": 0.692922830581665, "learning_rate": 8.584463876077703e-05, "loss": 3.0083, "step": 46098 }, { "epoch": 2.26, "grad_norm": 0.7178481817245483, "learning_rate": 8.58338571305793e-05, "loss": 2.887, "step": 46099 }, { "epoch": 2.26, "grad_norm": 0.6919596195220947, "learning_rate": 8.582307606445402e-05, "loss": 2.746, "step": 46100 }, { "epoch": 2.26, "grad_norm": 0.7563260197639465, "learning_rate": 8.581229556242951e-05, "loss": 2.7533, "step": 46101 }, { "epoch": 2.26, "grad_norm": 0.7244362831115723, "learning_rate": 8.58015156245341e-05, "loss": 3.0494, "step": 46102 }, { "epoch": 2.26, "grad_norm": 0.7256781458854675, "learning_rate": 8.579073625079627e-05, "loss": 2.8626, "step": 46103 }, { "epoch": 2.26, "grad_norm": 0.7332637310028076, "learning_rate": 8.577995744124451e-05, "loss": 2.8991, "step": 46104 }, { "epoch": 2.26, "grad_norm": 0.7425779700279236, "learning_rate": 8.576917919590704e-05, "loss": 2.7907, "step": 46105 }, { "epoch": 2.26, "grad_norm": 0.7489483952522278, "learning_rate": 8.57584015148124e-05, "loss": 2.9985, "step": 46106 }, { "epoch": 2.26, "grad_norm": 0.7290831208229065, "learning_rate": 8.57476243979889e-05, "loss": 2.969, "step": 46107 }, { "epoch": 2.26, "grad_norm": 0.752694308757782, "learning_rate": 8.573684784546485e-05, "loss": 3.069, "step": 46108 }, { "epoch": 2.26, "grad_norm": 0.7758665084838867, "learning_rate": 8.572607185726879e-05, "loss": 2.8742, "step": 46109 }, { "epoch": 2.26, "grad_norm": 0.7011808753013611, "learning_rate": 8.571529643342891e-05, "loss": 3.0316, "step": 46110 }, { "epoch": 2.26, "grad_norm": 0.7500031590461731, "learning_rate": 8.570452157397378e-05, "loss": 3.0522, "step": 46111 }, { "epoch": 2.26, "grad_norm": 0.7314960956573486, "learning_rate": 8.569374727893164e-05, "loss": 3.0971, "step": 46112 }, { "epoch": 2.26, "grad_norm": 0.6992918252944946, "learning_rate": 8.568297354833102e-05, "loss": 2.9469, "step": 46113 }, { "epoch": 2.26, "grad_norm": 0.7275790572166443, "learning_rate": 8.567220038220016e-05, "loss": 3.1835, "step": 46114 }, { "epoch": 2.26, "grad_norm": 0.7153680920600891, "learning_rate": 8.566142778056743e-05, "loss": 2.79, "step": 46115 }, { "epoch": 2.26, "grad_norm": 0.7057393193244934, "learning_rate": 8.565065574346132e-05, "loss": 3.0257, "step": 46116 }, { "epoch": 2.26, "grad_norm": 0.7389050126075745, "learning_rate": 8.563988427091005e-05, "loss": 2.9956, "step": 46117 }, { "epoch": 2.26, "grad_norm": 0.69362473487854, "learning_rate": 8.562911336294204e-05, "loss": 2.6957, "step": 46118 }, { "epoch": 2.26, "grad_norm": 0.7017712593078613, "learning_rate": 8.56183430195858e-05, "loss": 2.9043, "step": 46119 }, { "epoch": 2.26, "grad_norm": 0.7296519875526428, "learning_rate": 8.560757324086951e-05, "loss": 3.0865, "step": 46120 }, { "epoch": 2.26, "grad_norm": 0.7155781388282776, "learning_rate": 8.559680402682168e-05, "loss": 2.9978, "step": 46121 }, { "epoch": 2.26, "grad_norm": 0.7151504158973694, "learning_rate": 8.558603537747062e-05, "loss": 2.8498, "step": 46122 }, { "epoch": 2.26, "grad_norm": 0.7091836929321289, "learning_rate": 8.557526729284459e-05, "loss": 3.0795, "step": 46123 }, { "epoch": 2.26, "grad_norm": 0.701084315776825, "learning_rate": 8.55644997729721e-05, "loss": 2.8647, "step": 46124 }, { "epoch": 2.26, "grad_norm": 0.7470046281814575, "learning_rate": 8.55537328178814e-05, "loss": 2.8312, "step": 46125 }, { "epoch": 2.26, "grad_norm": 0.7091386318206787, "learning_rate": 8.554296642760095e-05, "loss": 2.9283, "step": 46126 }, { "epoch": 2.26, "grad_norm": 0.7808635234832764, "learning_rate": 8.5532200602159e-05, "loss": 2.9434, "step": 46127 }, { "epoch": 2.26, "grad_norm": 0.6947294473648071, "learning_rate": 8.552143534158403e-05, "loss": 2.8923, "step": 46128 }, { "epoch": 2.26, "grad_norm": 0.6899562478065491, "learning_rate": 8.551067064590432e-05, "loss": 2.7862, "step": 46129 }, { "epoch": 2.26, "grad_norm": 0.6616258025169373, "learning_rate": 8.54999065151481e-05, "loss": 2.9572, "step": 46130 }, { "epoch": 2.26, "grad_norm": 0.7294912338256836, "learning_rate": 8.548914294934399e-05, "loss": 3.0817, "step": 46131 }, { "epoch": 2.26, "grad_norm": 0.7399985194206238, "learning_rate": 8.547837994852007e-05, "loss": 2.8683, "step": 46132 }, { "epoch": 2.26, "grad_norm": 0.6928227543830872, "learning_rate": 8.546761751270492e-05, "loss": 2.9985, "step": 46133 }, { "epoch": 2.26, "grad_norm": 0.7888341546058655, "learning_rate": 8.545685564192669e-05, "loss": 2.8663, "step": 46134 }, { "epoch": 2.26, "grad_norm": 0.7424514889717102, "learning_rate": 8.544609433621388e-05, "loss": 2.8363, "step": 46135 }, { "epoch": 2.26, "grad_norm": 0.7457790970802307, "learning_rate": 8.543533359559479e-05, "loss": 2.7649, "step": 46136 }, { "epoch": 2.26, "grad_norm": 0.7245312333106995, "learning_rate": 8.542457342009765e-05, "loss": 2.641, "step": 46137 }, { "epoch": 2.26, "grad_norm": 0.761008620262146, "learning_rate": 8.541381380975096e-05, "loss": 2.9057, "step": 46138 }, { "epoch": 2.26, "grad_norm": 0.7968191504478455, "learning_rate": 8.54030547645829e-05, "loss": 2.6337, "step": 46139 }, { "epoch": 2.26, "grad_norm": 0.7402268648147583, "learning_rate": 8.53922962846219e-05, "loss": 3.0397, "step": 46140 }, { "epoch": 2.26, "grad_norm": 0.8710418939590454, "learning_rate": 8.538153836989635e-05, "loss": 2.8891, "step": 46141 }, { "epoch": 2.26, "grad_norm": 0.734683096408844, "learning_rate": 8.537078102043447e-05, "loss": 2.929, "step": 46142 }, { "epoch": 2.26, "grad_norm": 0.7593229413032532, "learning_rate": 8.536002423626475e-05, "loss": 2.8161, "step": 46143 }, { "epoch": 2.26, "grad_norm": 0.6786648035049438, "learning_rate": 8.534926801741538e-05, "loss": 2.9832, "step": 46144 }, { "epoch": 2.26, "grad_norm": 0.7301282286643982, "learning_rate": 8.533851236391463e-05, "loss": 2.9853, "step": 46145 }, { "epoch": 2.26, "grad_norm": 0.725745439529419, "learning_rate": 8.532775727579104e-05, "loss": 3.1637, "step": 46146 }, { "epoch": 2.26, "grad_norm": 0.7195970416069031, "learning_rate": 8.531700275307274e-05, "loss": 2.8625, "step": 46147 }, { "epoch": 2.26, "grad_norm": 0.7576825618743896, "learning_rate": 8.530624879578824e-05, "loss": 2.8941, "step": 46148 }, { "epoch": 2.26, "grad_norm": 0.7363550066947937, "learning_rate": 8.529549540396564e-05, "loss": 3.004, "step": 46149 }, { "epoch": 2.26, "grad_norm": 0.6844567656517029, "learning_rate": 8.528474257763346e-05, "loss": 2.9282, "step": 46150 }, { "epoch": 2.26, "grad_norm": 0.6960411667823792, "learning_rate": 8.527399031682e-05, "loss": 3.001, "step": 46151 }, { "epoch": 2.26, "grad_norm": 0.7222846150398254, "learning_rate": 8.52632386215534e-05, "loss": 2.987, "step": 46152 }, { "epoch": 2.26, "grad_norm": 0.7160438895225525, "learning_rate": 8.525248749186219e-05, "loss": 2.9166, "step": 46153 }, { "epoch": 2.26, "grad_norm": 0.7318093776702881, "learning_rate": 8.52417369277745e-05, "loss": 2.9424, "step": 46154 }, { "epoch": 2.26, "grad_norm": 0.7212334275245667, "learning_rate": 8.523098692931877e-05, "loss": 2.9443, "step": 46155 }, { "epoch": 2.26, "grad_norm": 0.7121982574462891, "learning_rate": 8.522023749652338e-05, "loss": 2.9703, "step": 46156 }, { "epoch": 2.26, "grad_norm": 0.722030520439148, "learning_rate": 8.520948862941646e-05, "loss": 3.0426, "step": 46157 }, { "epoch": 2.26, "grad_norm": 0.7229368090629578, "learning_rate": 8.519874032802658e-05, "loss": 3.1051, "step": 46158 }, { "epoch": 2.26, "grad_norm": 0.7376280426979065, "learning_rate": 8.518799259238167e-05, "loss": 2.8847, "step": 46159 }, { "epoch": 2.26, "grad_norm": 0.6587594151496887, "learning_rate": 8.517724542251026e-05, "loss": 2.7757, "step": 46160 }, { "epoch": 2.26, "grad_norm": 0.7172147631645203, "learning_rate": 8.516649881844077e-05, "loss": 2.6424, "step": 46161 }, { "epoch": 2.26, "grad_norm": 0.7306764125823975, "learning_rate": 8.515575278020125e-05, "loss": 2.9843, "step": 46162 }, { "epoch": 2.26, "grad_norm": 0.7316793203353882, "learning_rate": 8.514500730782023e-05, "loss": 2.9263, "step": 46163 }, { "epoch": 2.26, "grad_norm": 0.8195810317993164, "learning_rate": 8.513426240132584e-05, "loss": 2.9318, "step": 46164 }, { "epoch": 2.26, "grad_norm": 0.7335458397865295, "learning_rate": 8.51235180607465e-05, "loss": 2.8091, "step": 46165 }, { "epoch": 2.26, "grad_norm": 0.6977335810661316, "learning_rate": 8.511277428611048e-05, "loss": 3.0965, "step": 46166 }, { "epoch": 2.26, "grad_norm": 0.6865370273590088, "learning_rate": 8.510203107744598e-05, "loss": 2.9652, "step": 46167 }, { "epoch": 2.26, "grad_norm": 0.737516462802887, "learning_rate": 8.509128843478145e-05, "loss": 2.9361, "step": 46168 }, { "epoch": 2.26, "grad_norm": 0.7313123345375061, "learning_rate": 8.508054635814501e-05, "loss": 2.9974, "step": 46169 }, { "epoch": 2.26, "grad_norm": 0.7545174360275269, "learning_rate": 8.506980484756507e-05, "loss": 3.1408, "step": 46170 }, { "epoch": 2.26, "grad_norm": 0.7121573090553284, "learning_rate": 8.505906390306994e-05, "loss": 2.9658, "step": 46171 }, { "epoch": 2.26, "grad_norm": 0.7106088399887085, "learning_rate": 8.504832352468795e-05, "loss": 2.9336, "step": 46172 }, { "epoch": 2.26, "grad_norm": 0.741375744342804, "learning_rate": 8.503758371244724e-05, "loss": 3.0173, "step": 46173 }, { "epoch": 2.26, "grad_norm": 0.7445472478866577, "learning_rate": 8.502684446637608e-05, "loss": 2.7716, "step": 46174 }, { "epoch": 2.26, "grad_norm": 0.7355408668518066, "learning_rate": 8.501610578650289e-05, "loss": 2.6886, "step": 46175 }, { "epoch": 2.26, "grad_norm": 0.7312442660331726, "learning_rate": 8.500536767285595e-05, "loss": 2.8622, "step": 46176 }, { "epoch": 2.26, "grad_norm": 0.7817408442497253, "learning_rate": 8.49946301254634e-05, "loss": 2.7271, "step": 46177 }, { "epoch": 2.26, "grad_norm": 0.7041735649108887, "learning_rate": 8.498389314435372e-05, "loss": 2.8909, "step": 46178 }, { "epoch": 2.26, "grad_norm": 0.7182589173316956, "learning_rate": 8.497315672955498e-05, "loss": 2.8677, "step": 46179 }, { "epoch": 2.26, "grad_norm": 0.6795207262039185, "learning_rate": 8.496242088109569e-05, "loss": 2.7094, "step": 46180 }, { "epoch": 2.26, "grad_norm": 0.6875749826431274, "learning_rate": 8.495168559900397e-05, "loss": 2.8307, "step": 46181 }, { "epoch": 2.26, "grad_norm": 0.7328691482543945, "learning_rate": 8.494095088330803e-05, "loss": 2.9532, "step": 46182 }, { "epoch": 2.26, "grad_norm": 0.7125970721244812, "learning_rate": 8.493021673403635e-05, "loss": 2.7698, "step": 46183 }, { "epoch": 2.26, "grad_norm": 0.7266592383384705, "learning_rate": 8.491948315121696e-05, "loss": 2.8341, "step": 46184 }, { "epoch": 2.26, "grad_norm": 0.7378901243209839, "learning_rate": 8.490875013487831e-05, "loss": 2.9183, "step": 46185 }, { "epoch": 2.26, "grad_norm": 0.7208519577980042, "learning_rate": 8.489801768504868e-05, "loss": 3.2133, "step": 46186 }, { "epoch": 2.26, "grad_norm": 0.7134647369384766, "learning_rate": 8.488728580175625e-05, "loss": 2.9837, "step": 46187 }, { "epoch": 2.26, "grad_norm": 0.7949679493904114, "learning_rate": 8.487655448502934e-05, "loss": 2.9484, "step": 46188 }, { "epoch": 2.26, "grad_norm": 0.7748767137527466, "learning_rate": 8.48658237348961e-05, "loss": 2.7965, "step": 46189 }, { "epoch": 2.26, "grad_norm": 0.7083140015602112, "learning_rate": 8.485509355138485e-05, "loss": 2.912, "step": 46190 }, { "epoch": 2.26, "grad_norm": 0.6907854676246643, "learning_rate": 8.484436393452399e-05, "loss": 2.6598, "step": 46191 }, { "epoch": 2.26, "grad_norm": 0.7154836058616638, "learning_rate": 8.483363488434156e-05, "loss": 2.9983, "step": 46192 }, { "epoch": 2.26, "grad_norm": 0.6884499788284302, "learning_rate": 8.482290640086602e-05, "loss": 2.947, "step": 46193 }, { "epoch": 2.26, "grad_norm": 0.723860502243042, "learning_rate": 8.481217848412554e-05, "loss": 2.8889, "step": 46194 }, { "epoch": 2.26, "grad_norm": 0.7943236827850342, "learning_rate": 8.480145113414828e-05, "loss": 2.9106, "step": 46195 }, { "epoch": 2.26, "grad_norm": 0.7377872467041016, "learning_rate": 8.479072435096265e-05, "loss": 2.8235, "step": 46196 }, { "epoch": 2.26, "grad_norm": 0.6710457801818848, "learning_rate": 8.477999813459678e-05, "loss": 2.9226, "step": 46197 }, { "epoch": 2.26, "grad_norm": 0.6965027451515198, "learning_rate": 8.476927248507904e-05, "loss": 3.1496, "step": 46198 }, { "epoch": 2.26, "grad_norm": 0.6677283048629761, "learning_rate": 8.475854740243755e-05, "loss": 2.8667, "step": 46199 }, { "epoch": 2.26, "grad_norm": 0.7114673256874084, "learning_rate": 8.474782288670057e-05, "loss": 3.0478, "step": 46200 }, { "epoch": 2.26, "grad_norm": 0.734982967376709, "learning_rate": 8.473709893789654e-05, "loss": 3.0154, "step": 46201 }, { "epoch": 2.26, "grad_norm": 0.7224012613296509, "learning_rate": 8.472637555605356e-05, "loss": 2.8927, "step": 46202 }, { "epoch": 2.26, "grad_norm": 0.7512854337692261, "learning_rate": 8.471565274119984e-05, "loss": 2.7395, "step": 46203 }, { "epoch": 2.26, "grad_norm": 0.7039549946784973, "learning_rate": 8.470493049336361e-05, "loss": 2.8335, "step": 46204 }, { "epoch": 2.26, "grad_norm": 0.7275938987731934, "learning_rate": 8.469420881257322e-05, "loss": 2.8, "step": 46205 }, { "epoch": 2.26, "grad_norm": 0.6578214168548584, "learning_rate": 8.468348769885678e-05, "loss": 2.8258, "step": 46206 }, { "epoch": 2.26, "grad_norm": 0.6866016387939453, "learning_rate": 8.467276715224257e-05, "loss": 2.9636, "step": 46207 }, { "epoch": 2.26, "grad_norm": 0.7433063983917236, "learning_rate": 8.466204717275894e-05, "loss": 3.011, "step": 46208 }, { "epoch": 2.26, "grad_norm": 0.6856465935707092, "learning_rate": 8.465132776043407e-05, "loss": 2.7571, "step": 46209 }, { "epoch": 2.26, "grad_norm": 0.6921495795249939, "learning_rate": 8.464060891529612e-05, "loss": 2.729, "step": 46210 }, { "epoch": 2.26, "grad_norm": 0.7133631706237793, "learning_rate": 8.46298906373733e-05, "loss": 2.9614, "step": 46211 }, { "epoch": 2.26, "grad_norm": 0.6963069438934326, "learning_rate": 8.461917292669389e-05, "loss": 2.9202, "step": 46212 }, { "epoch": 2.26, "grad_norm": 0.7098976373672485, "learning_rate": 8.46084557832862e-05, "loss": 2.7825, "step": 46213 }, { "epoch": 2.26, "grad_norm": 0.6936521530151367, "learning_rate": 8.459773920717831e-05, "loss": 2.7475, "step": 46214 }, { "epoch": 2.26, "grad_norm": 0.6678090691566467, "learning_rate": 8.458702319839861e-05, "loss": 2.8444, "step": 46215 }, { "epoch": 2.26, "grad_norm": 0.8158767223358154, "learning_rate": 8.457630775697522e-05, "loss": 2.8724, "step": 46216 }, { "epoch": 2.26, "grad_norm": 0.7068122625350952, "learning_rate": 8.456559288293632e-05, "loss": 2.9663, "step": 46217 }, { "epoch": 2.27, "grad_norm": 0.7318935990333557, "learning_rate": 8.455487857631023e-05, "loss": 2.9523, "step": 46218 }, { "epoch": 2.27, "grad_norm": 0.7318170666694641, "learning_rate": 8.454416483712505e-05, "loss": 2.8784, "step": 46219 }, { "epoch": 2.27, "grad_norm": 0.718914270401001, "learning_rate": 8.45334516654092e-05, "loss": 2.9895, "step": 46220 }, { "epoch": 2.27, "grad_norm": 0.7228567600250244, "learning_rate": 8.452273906119064e-05, "loss": 2.9328, "step": 46221 }, { "epoch": 2.27, "grad_norm": 0.7042717933654785, "learning_rate": 8.451202702449773e-05, "loss": 3.196, "step": 46222 }, { "epoch": 2.27, "grad_norm": 0.7152907848358154, "learning_rate": 8.450131555535876e-05, "loss": 2.9061, "step": 46223 }, { "epoch": 2.27, "grad_norm": 0.7358282208442688, "learning_rate": 8.449060465380188e-05, "loss": 2.9795, "step": 46224 }, { "epoch": 2.27, "grad_norm": 0.6989845037460327, "learning_rate": 8.447989431985521e-05, "loss": 2.791, "step": 46225 }, { "epoch": 2.27, "grad_norm": 0.7169927954673767, "learning_rate": 8.446918455354699e-05, "loss": 2.9684, "step": 46226 }, { "epoch": 2.27, "grad_norm": 0.7589333653450012, "learning_rate": 8.44584753549054e-05, "loss": 2.7864, "step": 46227 }, { "epoch": 2.27, "grad_norm": 0.7092520594596863, "learning_rate": 8.444776672395886e-05, "loss": 3.1328, "step": 46228 }, { "epoch": 2.27, "grad_norm": 0.7001926898956299, "learning_rate": 8.443705866073526e-05, "loss": 2.773, "step": 46229 }, { "epoch": 2.27, "grad_norm": 0.713153600692749, "learning_rate": 8.44263511652631e-05, "loss": 2.9185, "step": 46230 }, { "epoch": 2.27, "grad_norm": 0.7044378519058228, "learning_rate": 8.441564423757045e-05, "loss": 2.8194, "step": 46231 }, { "epoch": 2.27, "grad_norm": 0.7447625398635864, "learning_rate": 8.44049378776854e-05, "loss": 2.9084, "step": 46232 }, { "epoch": 2.27, "grad_norm": 0.7260729074478149, "learning_rate": 8.439423208563633e-05, "loss": 2.9366, "step": 46233 }, { "epoch": 2.27, "grad_norm": 0.6695955991744995, "learning_rate": 8.438352686145124e-05, "loss": 2.9983, "step": 46234 }, { "epoch": 2.27, "grad_norm": 0.7658150792121887, "learning_rate": 8.43728222051586e-05, "loss": 2.9357, "step": 46235 }, { "epoch": 2.27, "grad_norm": 0.6993694305419922, "learning_rate": 8.436211811678632e-05, "loss": 2.9622, "step": 46236 }, { "epoch": 2.27, "grad_norm": 0.7462490200996399, "learning_rate": 8.435141459636281e-05, "loss": 2.8533, "step": 46237 }, { "epoch": 2.27, "grad_norm": 0.7425763607025146, "learning_rate": 8.43407116439162e-05, "loss": 2.9425, "step": 46238 }, { "epoch": 2.27, "grad_norm": 0.7130029201507568, "learning_rate": 8.433000925947452e-05, "loss": 2.857, "step": 46239 }, { "epoch": 2.27, "grad_norm": 0.6981950402259827, "learning_rate": 8.43193074430662e-05, "loss": 2.7975, "step": 46240 }, { "epoch": 2.27, "grad_norm": 0.6925346255302429, "learning_rate": 8.430860619471921e-05, "loss": 2.9858, "step": 46241 }, { "epoch": 2.27, "grad_norm": 0.6922650933265686, "learning_rate": 8.429790551446186e-05, "loss": 2.8014, "step": 46242 }, { "epoch": 2.27, "grad_norm": 0.7049052119255066, "learning_rate": 8.42872054023224e-05, "loss": 3.0081, "step": 46243 }, { "epoch": 2.27, "grad_norm": 0.7104154825210571, "learning_rate": 8.427650585832884e-05, "loss": 2.9192, "step": 46244 }, { "epoch": 2.27, "grad_norm": 0.6965845227241516, "learning_rate": 8.426580688250952e-05, "loss": 3.0567, "step": 46245 }, { "epoch": 2.27, "grad_norm": 0.6756129264831543, "learning_rate": 8.425510847489257e-05, "loss": 2.8244, "step": 46246 }, { "epoch": 2.27, "grad_norm": 0.7112818956375122, "learning_rate": 8.424441063550603e-05, "loss": 2.77, "step": 46247 }, { "epoch": 2.27, "grad_norm": 0.7068428993225098, "learning_rate": 8.42337133643783e-05, "loss": 2.8858, "step": 46248 }, { "epoch": 2.27, "grad_norm": 0.6985403299331665, "learning_rate": 8.422301666153734e-05, "loss": 2.8649, "step": 46249 }, { "epoch": 2.27, "grad_norm": 0.6854354739189148, "learning_rate": 8.421232052701152e-05, "loss": 2.9009, "step": 46250 }, { "epoch": 2.27, "grad_norm": 0.6568443775177002, "learning_rate": 8.42016249608288e-05, "loss": 2.7745, "step": 46251 }, { "epoch": 2.27, "grad_norm": 0.7257750034332275, "learning_rate": 8.419092996301761e-05, "loss": 2.9674, "step": 46252 }, { "epoch": 2.27, "grad_norm": 0.7708278894424438, "learning_rate": 8.418023553360596e-05, "loss": 2.837, "step": 46253 }, { "epoch": 2.27, "grad_norm": 0.7032803893089294, "learning_rate": 8.416954167262193e-05, "loss": 3.0264, "step": 46254 }, { "epoch": 2.27, "grad_norm": 0.6957308650016785, "learning_rate": 8.415884838009388e-05, "loss": 2.9592, "step": 46255 }, { "epoch": 2.27, "grad_norm": 0.7218675017356873, "learning_rate": 8.414815565604983e-05, "loss": 2.8956, "step": 46256 }, { "epoch": 2.27, "grad_norm": 0.7233446836471558, "learning_rate": 8.413746350051797e-05, "loss": 2.9225, "step": 46257 }, { "epoch": 2.27, "grad_norm": 0.7115861177444458, "learning_rate": 8.412677191352656e-05, "loss": 3.0288, "step": 46258 }, { "epoch": 2.27, "grad_norm": 0.7438410520553589, "learning_rate": 8.41160808951037e-05, "loss": 2.8894, "step": 46259 }, { "epoch": 2.27, "grad_norm": 0.700964629650116, "learning_rate": 8.410539044527753e-05, "loss": 3.0023, "step": 46260 }, { "epoch": 2.27, "grad_norm": 0.7393155694007874, "learning_rate": 8.409470056407614e-05, "loss": 2.9852, "step": 46261 }, { "epoch": 2.27, "grad_norm": 0.6867033243179321, "learning_rate": 8.408401125152776e-05, "loss": 2.9322, "step": 46262 }, { "epoch": 2.27, "grad_norm": 0.6977315545082092, "learning_rate": 8.407332250766063e-05, "loss": 2.9155, "step": 46263 }, { "epoch": 2.27, "grad_norm": 0.6820677518844604, "learning_rate": 8.40626343325027e-05, "loss": 2.9321, "step": 46264 }, { "epoch": 2.27, "grad_norm": 0.7445458173751831, "learning_rate": 8.405194672608233e-05, "loss": 2.9113, "step": 46265 }, { "epoch": 2.27, "grad_norm": 0.6989564895629883, "learning_rate": 8.40412596884275e-05, "loss": 3.0236, "step": 46266 }, { "epoch": 2.27, "grad_norm": 0.7086830735206604, "learning_rate": 8.403057321956648e-05, "loss": 2.9824, "step": 46267 }, { "epoch": 2.27, "grad_norm": 0.7059872150421143, "learning_rate": 8.401988731952741e-05, "loss": 3.0183, "step": 46268 }, { "epoch": 2.27, "grad_norm": 0.7030812501907349, "learning_rate": 8.400920198833829e-05, "loss": 3.0954, "step": 46269 }, { "epoch": 2.27, "grad_norm": 0.7751390337944031, "learning_rate": 8.399851722602743e-05, "loss": 2.9932, "step": 46270 }, { "epoch": 2.27, "grad_norm": 0.7409400343894958, "learning_rate": 8.398783303262283e-05, "loss": 2.9746, "step": 46271 }, { "epoch": 2.27, "grad_norm": 0.7673764228820801, "learning_rate": 8.39771494081527e-05, "loss": 2.8363, "step": 46272 }, { "epoch": 2.27, "grad_norm": 0.7108684182167053, "learning_rate": 8.396646635264528e-05, "loss": 3.0562, "step": 46273 }, { "epoch": 2.27, "grad_norm": 0.6982866525650024, "learning_rate": 8.395578386612861e-05, "loss": 2.8194, "step": 46274 }, { "epoch": 2.27, "grad_norm": 0.7467199563980103, "learning_rate": 8.394510194863085e-05, "loss": 3.0077, "step": 46275 }, { "epoch": 2.27, "grad_norm": 0.7095677256584167, "learning_rate": 8.393442060017998e-05, "loss": 2.9502, "step": 46276 }, { "epoch": 2.27, "grad_norm": 0.7236535549163818, "learning_rate": 8.392373982080428e-05, "loss": 2.7709, "step": 46277 }, { "epoch": 2.27, "grad_norm": 0.7553972005844116, "learning_rate": 8.391305961053197e-05, "loss": 2.9195, "step": 46278 }, { "epoch": 2.27, "grad_norm": 0.7126523852348328, "learning_rate": 8.390237996939098e-05, "loss": 2.9921, "step": 46279 }, { "epoch": 2.27, "grad_norm": 0.743773341178894, "learning_rate": 8.389170089740962e-05, "loss": 2.9643, "step": 46280 }, { "epoch": 2.27, "grad_norm": 0.7017213106155396, "learning_rate": 8.388102239461582e-05, "loss": 3.0562, "step": 46281 }, { "epoch": 2.27, "grad_norm": 0.7224516868591309, "learning_rate": 8.387034446103794e-05, "loss": 2.9265, "step": 46282 }, { "epoch": 2.27, "grad_norm": 0.7359697818756104, "learning_rate": 8.385966709670395e-05, "loss": 3.0036, "step": 46283 }, { "epoch": 2.27, "grad_norm": 0.6996152400970459, "learning_rate": 8.384899030164189e-05, "loss": 2.9987, "step": 46284 }, { "epoch": 2.27, "grad_norm": 0.6811954975128174, "learning_rate": 8.383831407588011e-05, "loss": 3.0215, "step": 46285 }, { "epoch": 2.27, "grad_norm": 0.746880829334259, "learning_rate": 8.382763841944654e-05, "loss": 3.0656, "step": 46286 }, { "epoch": 2.27, "grad_norm": 0.7244267463684082, "learning_rate": 8.381696333236944e-05, "loss": 2.9721, "step": 46287 }, { "epoch": 2.27, "grad_norm": 0.7403231263160706, "learning_rate": 8.380628881467674e-05, "loss": 3.0218, "step": 46288 }, { "epoch": 2.27, "grad_norm": 0.7562906742095947, "learning_rate": 8.379561486639679e-05, "loss": 2.9035, "step": 46289 }, { "epoch": 2.27, "grad_norm": 0.7179818749427795, "learning_rate": 8.378494148755753e-05, "loss": 2.8786, "step": 46290 }, { "epoch": 2.27, "grad_norm": 0.7321277856826782, "learning_rate": 8.377426867818708e-05, "loss": 2.9989, "step": 46291 }, { "epoch": 2.27, "grad_norm": 0.7414536476135254, "learning_rate": 8.376359643831369e-05, "loss": 2.8159, "step": 46292 }, { "epoch": 2.27, "grad_norm": 0.7076948285102844, "learning_rate": 8.375292476796524e-05, "loss": 2.7403, "step": 46293 }, { "epoch": 2.27, "grad_norm": 0.7004036903381348, "learning_rate": 8.374225366717002e-05, "loss": 2.9799, "step": 46294 }, { "epoch": 2.27, "grad_norm": 0.728317379951477, "learning_rate": 8.373158313595612e-05, "loss": 2.6187, "step": 46295 }, { "epoch": 2.27, "grad_norm": 0.7136837840080261, "learning_rate": 8.372091317435164e-05, "loss": 2.8687, "step": 46296 }, { "epoch": 2.27, "grad_norm": 0.6955955028533936, "learning_rate": 8.371024378238464e-05, "loss": 2.9601, "step": 46297 }, { "epoch": 2.27, "grad_norm": 0.6891588568687439, "learning_rate": 8.369957496008314e-05, "loss": 2.9733, "step": 46298 }, { "epoch": 2.27, "grad_norm": 0.7407911419868469, "learning_rate": 8.368890670747535e-05, "loss": 2.8449, "step": 46299 }, { "epoch": 2.27, "grad_norm": 0.7350627183914185, "learning_rate": 8.367823902458941e-05, "loss": 2.957, "step": 46300 }, { "epoch": 2.27, "grad_norm": 0.7319565415382385, "learning_rate": 8.366757191145328e-05, "loss": 2.77, "step": 46301 }, { "epoch": 2.27, "grad_norm": 0.7195742130279541, "learning_rate": 8.365690536809525e-05, "loss": 2.8073, "step": 46302 }, { "epoch": 2.27, "grad_norm": 0.7700269818305969, "learning_rate": 8.364623939454318e-05, "loss": 2.8723, "step": 46303 }, { "epoch": 2.27, "grad_norm": 0.7323468923568726, "learning_rate": 8.363557399082535e-05, "loss": 2.8773, "step": 46304 }, { "epoch": 2.27, "grad_norm": 0.7363598346710205, "learning_rate": 8.362490915696981e-05, "loss": 2.843, "step": 46305 }, { "epoch": 2.27, "grad_norm": 0.7405996322631836, "learning_rate": 8.36142448930045e-05, "loss": 3.0222, "step": 46306 }, { "epoch": 2.27, "grad_norm": 0.7685582041740417, "learning_rate": 8.360358119895773e-05, "loss": 2.8072, "step": 46307 }, { "epoch": 2.27, "grad_norm": 0.7079715728759766, "learning_rate": 8.35929180748574e-05, "loss": 2.8948, "step": 46308 }, { "epoch": 2.27, "grad_norm": 0.7493276000022888, "learning_rate": 8.358225552073166e-05, "loss": 2.8799, "step": 46309 }, { "epoch": 2.27, "grad_norm": 0.7022292613983154, "learning_rate": 8.357159353660871e-05, "loss": 3.0506, "step": 46310 }, { "epoch": 2.27, "grad_norm": 0.7002536058425903, "learning_rate": 8.356093212251652e-05, "loss": 2.9324, "step": 46311 }, { "epoch": 2.27, "grad_norm": 0.7278270721435547, "learning_rate": 8.35502712784832e-05, "loss": 2.9016, "step": 46312 }, { "epoch": 2.27, "grad_norm": 0.6652935147285461, "learning_rate": 8.353961100453668e-05, "loss": 3.0654, "step": 46313 }, { "epoch": 2.27, "grad_norm": 0.7217055559158325, "learning_rate": 8.352895130070518e-05, "loss": 2.75, "step": 46314 }, { "epoch": 2.27, "grad_norm": 0.7330880165100098, "learning_rate": 8.351829216701685e-05, "loss": 2.9563, "step": 46315 }, { "epoch": 2.27, "grad_norm": 0.6977697014808655, "learning_rate": 8.350763360349961e-05, "loss": 3.0273, "step": 46316 }, { "epoch": 2.27, "grad_norm": 0.7105409502983093, "learning_rate": 8.349697561018167e-05, "loss": 2.9189, "step": 46317 }, { "epoch": 2.27, "grad_norm": 0.7222777009010315, "learning_rate": 8.348631818709103e-05, "loss": 2.8138, "step": 46318 }, { "epoch": 2.27, "grad_norm": 0.6798412203788757, "learning_rate": 8.347566133425568e-05, "loss": 2.9272, "step": 46319 }, { "epoch": 2.27, "grad_norm": 0.6820101737976074, "learning_rate": 8.346500505170385e-05, "loss": 3.1816, "step": 46320 }, { "epoch": 2.27, "grad_norm": 0.7206324934959412, "learning_rate": 8.345434933946345e-05, "loss": 2.8773, "step": 46321 }, { "epoch": 2.27, "grad_norm": 0.7084947228431702, "learning_rate": 8.344369419756268e-05, "loss": 2.8789, "step": 46322 }, { "epoch": 2.27, "grad_norm": 0.7819315195083618, "learning_rate": 8.343303962602948e-05, "loss": 2.7798, "step": 46323 }, { "epoch": 2.27, "grad_norm": 0.7215809226036072, "learning_rate": 8.342238562489192e-05, "loss": 3.1057, "step": 46324 }, { "epoch": 2.27, "grad_norm": 0.7359743118286133, "learning_rate": 8.341173219417826e-05, "loss": 2.7669, "step": 46325 }, { "epoch": 2.27, "grad_norm": 0.7675909996032715, "learning_rate": 8.340107933391641e-05, "loss": 2.9143, "step": 46326 }, { "epoch": 2.27, "grad_norm": 0.7441685199737549, "learning_rate": 8.339042704413443e-05, "loss": 2.8385, "step": 46327 }, { "epoch": 2.27, "grad_norm": 0.6832141280174255, "learning_rate": 8.337977532486024e-05, "loss": 2.7445, "step": 46328 }, { "epoch": 2.27, "grad_norm": 0.6876572966575623, "learning_rate": 8.336912417612208e-05, "loss": 3.1471, "step": 46329 }, { "epoch": 2.27, "grad_norm": 0.6860120892524719, "learning_rate": 8.335847359794805e-05, "loss": 3.0647, "step": 46330 }, { "epoch": 2.27, "grad_norm": 0.6835752725601196, "learning_rate": 8.334782359036599e-05, "loss": 3.1026, "step": 46331 }, { "epoch": 2.27, "grad_norm": 0.7607629299163818, "learning_rate": 8.333717415340419e-05, "loss": 2.8308, "step": 46332 }, { "epoch": 2.27, "grad_norm": 0.7346097826957703, "learning_rate": 8.332652528709054e-05, "loss": 2.8343, "step": 46333 }, { "epoch": 2.27, "grad_norm": 0.7202624678611755, "learning_rate": 8.331587699145303e-05, "loss": 2.9422, "step": 46334 }, { "epoch": 2.27, "grad_norm": 0.7167713642120361, "learning_rate": 8.330522926651992e-05, "loss": 2.8753, "step": 46335 }, { "epoch": 2.27, "grad_norm": 0.6617588400840759, "learning_rate": 8.329458211231902e-05, "loss": 2.9843, "step": 46336 }, { "epoch": 2.27, "grad_norm": 0.7064581513404846, "learning_rate": 8.32839355288786e-05, "loss": 2.9314, "step": 46337 }, { "epoch": 2.27, "grad_norm": 0.670749306678772, "learning_rate": 8.327328951622646e-05, "loss": 2.8856, "step": 46338 }, { "epoch": 2.27, "grad_norm": 0.6679714918136597, "learning_rate": 8.326264407439088e-05, "loss": 2.9836, "step": 46339 }, { "epoch": 2.27, "grad_norm": 0.729651153087616, "learning_rate": 8.325199920339978e-05, "loss": 2.982, "step": 46340 }, { "epoch": 2.27, "grad_norm": 0.706098735332489, "learning_rate": 8.324135490328112e-05, "loss": 2.766, "step": 46341 }, { "epoch": 2.27, "grad_norm": 0.7295530438423157, "learning_rate": 8.32307111740631e-05, "loss": 3.0328, "step": 46342 }, { "epoch": 2.27, "grad_norm": 0.6685080528259277, "learning_rate": 8.322006801577356e-05, "loss": 2.9178, "step": 46343 }, { "epoch": 2.27, "grad_norm": 0.6723880171775818, "learning_rate": 8.320942542844065e-05, "loss": 2.8969, "step": 46344 }, { "epoch": 2.27, "grad_norm": 0.741836428642273, "learning_rate": 8.319878341209248e-05, "loss": 3.0116, "step": 46345 }, { "epoch": 2.27, "grad_norm": 0.7528204321861267, "learning_rate": 8.318814196675689e-05, "loss": 2.9499, "step": 46346 }, { "epoch": 2.27, "grad_norm": 0.7128376364707947, "learning_rate": 8.317750109246213e-05, "loss": 2.8801, "step": 46347 }, { "epoch": 2.27, "grad_norm": 0.7809441685676575, "learning_rate": 8.316686078923608e-05, "loss": 2.8204, "step": 46348 }, { "epoch": 2.27, "grad_norm": 0.7417147755622864, "learning_rate": 8.315622105710672e-05, "loss": 2.7649, "step": 46349 }, { "epoch": 2.27, "grad_norm": 0.6948042511940002, "learning_rate": 8.31455818961022e-05, "loss": 2.8864, "step": 46350 }, { "epoch": 2.27, "grad_norm": 0.757095456123352, "learning_rate": 8.313494330625041e-05, "loss": 2.9817, "step": 46351 }, { "epoch": 2.27, "grad_norm": 0.6737290024757385, "learning_rate": 8.312430528757954e-05, "loss": 2.9095, "step": 46352 }, { "epoch": 2.27, "grad_norm": 0.7008237838745117, "learning_rate": 8.31136678401174e-05, "loss": 2.9619, "step": 46353 }, { "epoch": 2.27, "grad_norm": 0.7046443223953247, "learning_rate": 8.310303096389224e-05, "loss": 2.9833, "step": 46354 }, { "epoch": 2.27, "grad_norm": 0.7089089751243591, "learning_rate": 8.309239465893191e-05, "loss": 2.67, "step": 46355 }, { "epoch": 2.27, "grad_norm": 0.7592144012451172, "learning_rate": 8.30817589252644e-05, "loss": 2.9825, "step": 46356 }, { "epoch": 2.27, "grad_norm": 0.6998030543327332, "learning_rate": 8.307112376291789e-05, "loss": 2.8194, "step": 46357 }, { "epoch": 2.27, "grad_norm": 0.714889407157898, "learning_rate": 8.306048917192021e-05, "loss": 2.9081, "step": 46358 }, { "epoch": 2.27, "grad_norm": 0.7006431818008423, "learning_rate": 8.304985515229943e-05, "loss": 3.1693, "step": 46359 }, { "epoch": 2.27, "grad_norm": 0.7032368183135986, "learning_rate": 8.303922170408366e-05, "loss": 2.8906, "step": 46360 }, { "epoch": 2.27, "grad_norm": 0.7039076089859009, "learning_rate": 8.302858882730085e-05, "loss": 3.0944, "step": 46361 }, { "epoch": 2.27, "grad_norm": 0.7378221750259399, "learning_rate": 8.301795652197898e-05, "loss": 2.7746, "step": 46362 }, { "epoch": 2.27, "grad_norm": 0.738791823387146, "learning_rate": 8.300732478814594e-05, "loss": 2.9973, "step": 46363 }, { "epoch": 2.27, "grad_norm": 0.7050439715385437, "learning_rate": 8.299669362582995e-05, "loss": 2.8897, "step": 46364 }, { "epoch": 2.27, "grad_norm": 0.7334348559379578, "learning_rate": 8.298606303505883e-05, "loss": 2.8901, "step": 46365 }, { "epoch": 2.27, "grad_norm": 0.6942351460456848, "learning_rate": 8.297543301586062e-05, "loss": 3.1832, "step": 46366 }, { "epoch": 2.27, "grad_norm": 0.7496381998062134, "learning_rate": 8.296480356826347e-05, "loss": 2.9803, "step": 46367 }, { "epoch": 2.27, "grad_norm": 0.7410102486610413, "learning_rate": 8.295417469229514e-05, "loss": 2.7448, "step": 46368 }, { "epoch": 2.27, "grad_norm": 0.6778762936592102, "learning_rate": 8.294354638798386e-05, "loss": 2.9249, "step": 46369 }, { "epoch": 2.27, "grad_norm": 0.7150377035140991, "learning_rate": 8.293291865535748e-05, "loss": 2.9564, "step": 46370 }, { "epoch": 2.27, "grad_norm": 0.7311868071556091, "learning_rate": 8.292229149444394e-05, "loss": 2.8055, "step": 46371 }, { "epoch": 2.27, "grad_norm": 0.7008099555969238, "learning_rate": 8.291166490527143e-05, "loss": 2.923, "step": 46372 }, { "epoch": 2.27, "grad_norm": 0.7243220210075378, "learning_rate": 8.290103888786768e-05, "loss": 3.0309, "step": 46373 }, { "epoch": 2.27, "grad_norm": 0.7139156460762024, "learning_rate": 8.289041344226091e-05, "loss": 3.1218, "step": 46374 }, { "epoch": 2.27, "grad_norm": 0.7168537378311157, "learning_rate": 8.287978856847894e-05, "loss": 2.9758, "step": 46375 }, { "epoch": 2.27, "grad_norm": 0.7514629364013672, "learning_rate": 8.286916426654988e-05, "loss": 2.6922, "step": 46376 }, { "epoch": 2.27, "grad_norm": 0.7429746985435486, "learning_rate": 8.285854053650167e-05, "loss": 3.0634, "step": 46377 }, { "epoch": 2.27, "grad_norm": 0.8082495331764221, "learning_rate": 8.284791737836219e-05, "loss": 2.968, "step": 46378 }, { "epoch": 2.27, "grad_norm": 0.7452201843261719, "learning_rate": 8.283729479215959e-05, "loss": 2.9215, "step": 46379 }, { "epoch": 2.27, "grad_norm": 0.7076332569122314, "learning_rate": 8.282667277792168e-05, "loss": 2.6926, "step": 46380 }, { "epoch": 2.27, "grad_norm": 0.6897568106651306, "learning_rate": 8.281605133567652e-05, "loss": 2.9806, "step": 46381 }, { "epoch": 2.27, "grad_norm": 0.7367916703224182, "learning_rate": 8.280543046545215e-05, "loss": 2.9092, "step": 46382 }, { "epoch": 2.27, "grad_norm": 0.7310504913330078, "learning_rate": 8.27948101672764e-05, "loss": 2.8276, "step": 46383 }, { "epoch": 2.27, "grad_norm": 0.7102162837982178, "learning_rate": 8.278419044117749e-05, "loss": 2.9182, "step": 46384 }, { "epoch": 2.27, "grad_norm": 0.6854374408721924, "learning_rate": 8.277357128718305e-05, "loss": 2.9456, "step": 46385 }, { "epoch": 2.27, "grad_norm": 0.7474464178085327, "learning_rate": 8.276295270532119e-05, "loss": 2.8981, "step": 46386 }, { "epoch": 2.27, "grad_norm": 0.7687355875968933, "learning_rate": 8.275233469562004e-05, "loss": 2.7403, "step": 46387 }, { "epoch": 2.27, "grad_norm": 0.7094236612319946, "learning_rate": 8.274171725810729e-05, "loss": 2.9686, "step": 46388 }, { "epoch": 2.27, "grad_norm": 0.7075121402740479, "learning_rate": 8.273110039281117e-05, "loss": 2.8354, "step": 46389 }, { "epoch": 2.27, "grad_norm": 0.724186360836029, "learning_rate": 8.272048409975942e-05, "loss": 2.7629, "step": 46390 }, { "epoch": 2.27, "grad_norm": 0.7304971218109131, "learning_rate": 8.27098683789802e-05, "loss": 2.7759, "step": 46391 }, { "epoch": 2.27, "grad_norm": 0.7718026041984558, "learning_rate": 8.269925323050134e-05, "loss": 3.0894, "step": 46392 }, { "epoch": 2.27, "grad_norm": 0.7265807390213013, "learning_rate": 8.268863865435074e-05, "loss": 2.9327, "step": 46393 }, { "epoch": 2.27, "grad_norm": 0.6995274424552917, "learning_rate": 8.267802465055656e-05, "loss": 2.6844, "step": 46394 }, { "epoch": 2.27, "grad_norm": 0.7589938044548035, "learning_rate": 8.266741121914653e-05, "loss": 2.9637, "step": 46395 }, { "epoch": 2.27, "grad_norm": 0.7459881901741028, "learning_rate": 8.265679836014872e-05, "loss": 2.7266, "step": 46396 }, { "epoch": 2.27, "grad_norm": 0.7490134239196777, "learning_rate": 8.264618607359115e-05, "loss": 2.9801, "step": 46397 }, { "epoch": 2.27, "grad_norm": 0.7336806058883667, "learning_rate": 8.263557435950172e-05, "loss": 2.8788, "step": 46398 }, { "epoch": 2.27, "grad_norm": 0.6905733942985535, "learning_rate": 8.262496321790831e-05, "loss": 2.9608, "step": 46399 }, { "epoch": 2.27, "grad_norm": 0.6890795230865479, "learning_rate": 8.261435264883887e-05, "loss": 2.9883, "step": 46400 }, { "epoch": 2.27, "grad_norm": 0.7028673887252808, "learning_rate": 8.260374265232135e-05, "loss": 2.8123, "step": 46401 }, { "epoch": 2.27, "grad_norm": 0.7058315873146057, "learning_rate": 8.259313322838386e-05, "loss": 2.807, "step": 46402 }, { "epoch": 2.27, "grad_norm": 0.7144296169281006, "learning_rate": 8.258252437705411e-05, "loss": 2.912, "step": 46403 }, { "epoch": 2.27, "grad_norm": 0.7268756628036499, "learning_rate": 8.257191609836024e-05, "loss": 2.9637, "step": 46404 }, { "epoch": 2.27, "grad_norm": 0.6951913237571716, "learning_rate": 8.256130839233001e-05, "loss": 2.7703, "step": 46405 }, { "epoch": 2.27, "grad_norm": 0.7235651016235352, "learning_rate": 8.255070125899151e-05, "loss": 3.0491, "step": 46406 }, { "epoch": 2.27, "grad_norm": 0.6981080770492554, "learning_rate": 8.254009469837264e-05, "loss": 2.9714, "step": 46407 }, { "epoch": 2.27, "grad_norm": 0.6944175362586975, "learning_rate": 8.252948871050122e-05, "loss": 2.8881, "step": 46408 }, { "epoch": 2.27, "grad_norm": 0.7283467054367065, "learning_rate": 8.251888329540536e-05, "loss": 2.8444, "step": 46409 }, { "epoch": 2.27, "grad_norm": 0.7321710586547852, "learning_rate": 8.250827845311283e-05, "loss": 3.0853, "step": 46410 }, { "epoch": 2.27, "grad_norm": 0.747199535369873, "learning_rate": 8.249767418365162e-05, "loss": 3.0333, "step": 46411 }, { "epoch": 2.27, "grad_norm": 0.693293571472168, "learning_rate": 8.248707048704975e-05, "loss": 2.949, "step": 46412 }, { "epoch": 2.27, "grad_norm": 0.6886857151985168, "learning_rate": 8.247646736333511e-05, "loss": 3.0812, "step": 46413 }, { "epoch": 2.27, "grad_norm": 0.7032137513160706, "learning_rate": 8.24658648125356e-05, "loss": 2.7775, "step": 46414 }, { "epoch": 2.27, "grad_norm": 0.7268944382667542, "learning_rate": 8.2455262834679e-05, "loss": 2.8439, "step": 46415 }, { "epoch": 2.27, "grad_norm": 0.6935317516326904, "learning_rate": 8.244466142979341e-05, "loss": 2.8731, "step": 46416 }, { "epoch": 2.27, "grad_norm": 0.7060828804969788, "learning_rate": 8.243406059790676e-05, "loss": 2.8517, "step": 46417 }, { "epoch": 2.27, "grad_norm": 0.6857595443725586, "learning_rate": 8.242346033904688e-05, "loss": 2.9907, "step": 46418 }, { "epoch": 2.27, "grad_norm": 0.7835713624954224, "learning_rate": 8.24128606532418e-05, "loss": 2.6536, "step": 46419 }, { "epoch": 2.27, "grad_norm": 0.7131385803222656, "learning_rate": 8.240226154051936e-05, "loss": 2.8273, "step": 46420 }, { "epoch": 2.27, "grad_norm": 0.7091208100318909, "learning_rate": 8.23916630009074e-05, "loss": 2.8953, "step": 46421 }, { "epoch": 2.28, "grad_norm": 0.7581696510314941, "learning_rate": 8.238106503443402e-05, "loss": 2.8396, "step": 46422 }, { "epoch": 2.28, "grad_norm": 0.7144935131072998, "learning_rate": 8.237046764112694e-05, "loss": 2.9896, "step": 46423 }, { "epoch": 2.28, "grad_norm": 0.7232292890548706, "learning_rate": 8.235987082101427e-05, "loss": 2.5879, "step": 46424 }, { "epoch": 2.28, "grad_norm": 0.7325698137283325, "learning_rate": 8.234927457412371e-05, "loss": 2.9548, "step": 46425 }, { "epoch": 2.28, "grad_norm": 0.7658261656761169, "learning_rate": 8.233867890048327e-05, "loss": 2.8471, "step": 46426 }, { "epoch": 2.28, "grad_norm": 0.7133269906044006, "learning_rate": 8.232808380012095e-05, "loss": 3.0446, "step": 46427 }, { "epoch": 2.28, "grad_norm": 0.7498300075531006, "learning_rate": 8.231748927306456e-05, "loss": 2.9269, "step": 46428 }, { "epoch": 2.28, "grad_norm": 0.7016955614089966, "learning_rate": 8.230689531934203e-05, "loss": 3.0732, "step": 46429 }, { "epoch": 2.28, "grad_norm": 0.6857497692108154, "learning_rate": 8.229630193898111e-05, "loss": 2.9937, "step": 46430 }, { "epoch": 2.28, "grad_norm": 0.7271170020103455, "learning_rate": 8.228570913200987e-05, "loss": 2.8245, "step": 46431 }, { "epoch": 2.28, "grad_norm": 0.720839262008667, "learning_rate": 8.227511689845627e-05, "loss": 3.0504, "step": 46432 }, { "epoch": 2.28, "grad_norm": 0.720252513885498, "learning_rate": 8.2264525238348e-05, "loss": 3.0355, "step": 46433 }, { "epoch": 2.28, "grad_norm": 0.7447952032089233, "learning_rate": 8.225393415171316e-05, "loss": 3.0458, "step": 46434 }, { "epoch": 2.28, "grad_norm": 0.720052182674408, "learning_rate": 8.224334363857954e-05, "loss": 2.916, "step": 46435 }, { "epoch": 2.28, "grad_norm": 0.7244362235069275, "learning_rate": 8.223275369897497e-05, "loss": 2.7844, "step": 46436 }, { "epoch": 2.28, "grad_norm": 0.7059257626533508, "learning_rate": 8.222216433292751e-05, "loss": 3.0789, "step": 46437 }, { "epoch": 2.28, "grad_norm": 0.6963223218917847, "learning_rate": 8.221157554046486e-05, "loss": 2.9243, "step": 46438 }, { "epoch": 2.28, "grad_norm": 0.7097539305686951, "learning_rate": 8.22009873216151e-05, "loss": 2.9694, "step": 46439 }, { "epoch": 2.28, "grad_norm": 0.70842045545578, "learning_rate": 8.219039967640594e-05, "loss": 2.9949, "step": 46440 }, { "epoch": 2.28, "grad_norm": 0.730398952960968, "learning_rate": 8.217981260486542e-05, "loss": 2.9813, "step": 46441 }, { "epoch": 2.28, "grad_norm": 0.7620899081230164, "learning_rate": 8.216922610702136e-05, "loss": 2.8141, "step": 46442 }, { "epoch": 2.28, "grad_norm": 0.7119383811950684, "learning_rate": 8.215864018290154e-05, "loss": 2.92, "step": 46443 }, { "epoch": 2.28, "grad_norm": 0.7197368144989014, "learning_rate": 8.214805483253403e-05, "loss": 2.9015, "step": 46444 }, { "epoch": 2.28, "grad_norm": 0.7236267924308777, "learning_rate": 8.213747005594655e-05, "loss": 2.8312, "step": 46445 }, { "epoch": 2.28, "grad_norm": 0.743288516998291, "learning_rate": 8.21268858531671e-05, "loss": 3.0393, "step": 46446 }, { "epoch": 2.28, "grad_norm": 0.6969639658927917, "learning_rate": 8.211630222422343e-05, "loss": 3.0242, "step": 46447 }, { "epoch": 2.28, "grad_norm": 0.9204838275909424, "learning_rate": 8.210571916914348e-05, "loss": 2.927, "step": 46448 }, { "epoch": 2.28, "grad_norm": 0.7029668092727661, "learning_rate": 8.209513668795523e-05, "loss": 2.8216, "step": 46449 }, { "epoch": 2.28, "grad_norm": 0.7012711763381958, "learning_rate": 8.208455478068646e-05, "loss": 2.8378, "step": 46450 }, { "epoch": 2.28, "grad_norm": 0.7693458199501038, "learning_rate": 8.207397344736502e-05, "loss": 2.926, "step": 46451 }, { "epoch": 2.28, "grad_norm": 0.6860461831092834, "learning_rate": 8.20633926880187e-05, "loss": 2.8357, "step": 46452 }, { "epoch": 2.28, "grad_norm": 0.7062203288078308, "learning_rate": 8.20528125026755e-05, "loss": 3.0731, "step": 46453 }, { "epoch": 2.28, "grad_norm": 0.7307056188583374, "learning_rate": 8.20422328913633e-05, "loss": 2.6463, "step": 46454 }, { "epoch": 2.28, "grad_norm": 0.6875963807106018, "learning_rate": 8.203165385410981e-05, "loss": 2.8983, "step": 46455 }, { "epoch": 2.28, "grad_norm": 0.8253306150436401, "learning_rate": 8.202107539094311e-05, "loss": 2.8164, "step": 46456 }, { "epoch": 2.28, "grad_norm": 0.703727662563324, "learning_rate": 8.201049750189094e-05, "loss": 2.9208, "step": 46457 }, { "epoch": 2.28, "grad_norm": 0.7636400461196899, "learning_rate": 8.19999201869811e-05, "loss": 2.9006, "step": 46458 }, { "epoch": 2.28, "grad_norm": 0.6992866396903992, "learning_rate": 8.198934344624156e-05, "loss": 3.0271, "step": 46459 }, { "epoch": 2.28, "grad_norm": 0.7500672340393066, "learning_rate": 8.197876727970006e-05, "loss": 2.8297, "step": 46460 }, { "epoch": 2.28, "grad_norm": 0.682694137096405, "learning_rate": 8.196819168738465e-05, "loss": 2.9394, "step": 46461 }, { "epoch": 2.28, "grad_norm": 0.6699866056442261, "learning_rate": 8.195761666932291e-05, "loss": 2.9927, "step": 46462 }, { "epoch": 2.28, "grad_norm": 0.7636361718177795, "learning_rate": 8.194704222554297e-05, "loss": 2.6734, "step": 46463 }, { "epoch": 2.28, "grad_norm": 0.8084840774536133, "learning_rate": 8.193646835607256e-05, "loss": 2.8364, "step": 46464 }, { "epoch": 2.28, "grad_norm": 0.7584612965583801, "learning_rate": 8.192589506093943e-05, "loss": 2.8917, "step": 46465 }, { "epoch": 2.28, "grad_norm": 0.8246785998344421, "learning_rate": 8.191532234017162e-05, "loss": 2.7962, "step": 46466 }, { "epoch": 2.28, "grad_norm": 0.7840319871902466, "learning_rate": 8.190475019379678e-05, "loss": 2.8299, "step": 46467 }, { "epoch": 2.28, "grad_norm": 0.7328280806541443, "learning_rate": 8.189417862184287e-05, "loss": 2.7687, "step": 46468 }, { "epoch": 2.28, "grad_norm": 0.7843512892723083, "learning_rate": 8.18836076243378e-05, "loss": 3.0041, "step": 46469 }, { "epoch": 2.28, "grad_norm": 0.7056668400764465, "learning_rate": 8.187303720130922e-05, "loss": 3.0747, "step": 46470 }, { "epoch": 2.28, "grad_norm": 0.7091785669326782, "learning_rate": 8.186246735278518e-05, "loss": 2.9689, "step": 46471 }, { "epoch": 2.28, "grad_norm": 0.7240222096443176, "learning_rate": 8.185189807879345e-05, "loss": 2.8462, "step": 46472 }, { "epoch": 2.28, "grad_norm": 0.6857366561889648, "learning_rate": 8.18413293793617e-05, "loss": 3.0774, "step": 46473 }, { "epoch": 2.28, "grad_norm": 0.7089368104934692, "learning_rate": 8.183076125451803e-05, "loss": 2.8356, "step": 46474 }, { "epoch": 2.28, "grad_norm": 0.8050333261489868, "learning_rate": 8.182019370429005e-05, "loss": 3.0691, "step": 46475 }, { "epoch": 2.28, "grad_norm": 0.740153431892395, "learning_rate": 8.180962672870577e-05, "loss": 2.796, "step": 46476 }, { "epoch": 2.28, "grad_norm": 0.7142683267593384, "learning_rate": 8.179906032779283e-05, "loss": 2.8724, "step": 46477 }, { "epoch": 2.28, "grad_norm": 0.6894727349281311, "learning_rate": 8.178849450157932e-05, "loss": 2.6895, "step": 46478 }, { "epoch": 2.28, "grad_norm": 0.7132574915885925, "learning_rate": 8.177792925009286e-05, "loss": 2.925, "step": 46479 }, { "epoch": 2.28, "grad_norm": 0.7323726415634155, "learning_rate": 8.176736457336129e-05, "loss": 2.7728, "step": 46480 }, { "epoch": 2.28, "grad_norm": 0.7239104509353638, "learning_rate": 8.175680047141254e-05, "loss": 2.7761, "step": 46481 }, { "epoch": 2.28, "grad_norm": 0.8582285046577454, "learning_rate": 8.174623694427433e-05, "loss": 3.2315, "step": 46482 }, { "epoch": 2.28, "grad_norm": 0.6943944692611694, "learning_rate": 8.173567399197448e-05, "loss": 2.8219, "step": 46483 }, { "epoch": 2.28, "grad_norm": 0.7097330689430237, "learning_rate": 8.172511161454095e-05, "loss": 2.7027, "step": 46484 }, { "epoch": 2.28, "grad_norm": 0.7035298347473145, "learning_rate": 8.17145498120015e-05, "loss": 2.9617, "step": 46485 }, { "epoch": 2.28, "grad_norm": 0.7039844393730164, "learning_rate": 8.17039885843839e-05, "loss": 2.9092, "step": 46486 }, { "epoch": 2.28, "grad_norm": 0.7377235889434814, "learning_rate": 8.16934279317159e-05, "loss": 2.912, "step": 46487 }, { "epoch": 2.28, "grad_norm": 0.6866671442985535, "learning_rate": 8.168286785402537e-05, "loss": 2.9288, "step": 46488 }, { "epoch": 2.28, "grad_norm": 0.7029548287391663, "learning_rate": 8.167230835134027e-05, "loss": 2.9683, "step": 46489 }, { "epoch": 2.28, "grad_norm": 0.7336953282356262, "learning_rate": 8.16617494236882e-05, "loss": 2.921, "step": 46490 }, { "epoch": 2.28, "grad_norm": 0.6844274997711182, "learning_rate": 8.165119107109714e-05, "loss": 2.8364, "step": 46491 }, { "epoch": 2.28, "grad_norm": 0.7402933835983276, "learning_rate": 8.164063329359473e-05, "loss": 2.6783, "step": 46492 }, { "epoch": 2.28, "grad_norm": 0.6989611983299255, "learning_rate": 8.163007609120897e-05, "loss": 3.0188, "step": 46493 }, { "epoch": 2.28, "grad_norm": 0.7093426585197449, "learning_rate": 8.161951946396756e-05, "loss": 2.6546, "step": 46494 }, { "epoch": 2.28, "grad_norm": 0.7372518181800842, "learning_rate": 8.160896341189821e-05, "loss": 2.7915, "step": 46495 }, { "epoch": 2.28, "grad_norm": 0.7127835154533386, "learning_rate": 8.159840793502891e-05, "loss": 2.8474, "step": 46496 }, { "epoch": 2.28, "grad_norm": 0.7472658157348633, "learning_rate": 8.158785303338731e-05, "loss": 2.912, "step": 46497 }, { "epoch": 2.28, "grad_norm": 0.7016239166259766, "learning_rate": 8.157729870700125e-05, "loss": 2.835, "step": 46498 }, { "epoch": 2.28, "grad_norm": 0.7203468680381775, "learning_rate": 8.156674495589862e-05, "loss": 2.7075, "step": 46499 }, { "epoch": 2.28, "grad_norm": 0.6911539435386658, "learning_rate": 8.155619178010717e-05, "loss": 2.7875, "step": 46500 }, { "epoch": 2.28, "grad_norm": 0.8295294046401978, "learning_rate": 8.154563917965465e-05, "loss": 2.7513, "step": 46501 }, { "epoch": 2.28, "grad_norm": 0.715740978717804, "learning_rate": 8.15350871545688e-05, "loss": 2.8735, "step": 46502 }, { "epoch": 2.28, "grad_norm": 0.7473269104957581, "learning_rate": 8.152453570487749e-05, "loss": 2.6743, "step": 46503 }, { "epoch": 2.28, "grad_norm": 0.7766650319099426, "learning_rate": 8.151398483060859e-05, "loss": 2.8402, "step": 46504 }, { "epoch": 2.28, "grad_norm": 0.6803699731826782, "learning_rate": 8.150343453178971e-05, "loss": 2.6261, "step": 46505 }, { "epoch": 2.28, "grad_norm": 0.9094350934028625, "learning_rate": 8.149288480844884e-05, "loss": 2.9338, "step": 46506 }, { "epoch": 2.28, "grad_norm": 0.7277110815048218, "learning_rate": 8.148233566061352e-05, "loss": 3.097, "step": 46507 }, { "epoch": 2.28, "grad_norm": 0.7063420414924622, "learning_rate": 8.147178708831181e-05, "loss": 2.9711, "step": 46508 }, { "epoch": 2.28, "grad_norm": 0.7325363159179688, "learning_rate": 8.146123909157137e-05, "loss": 2.838, "step": 46509 }, { "epoch": 2.28, "grad_norm": 0.6835830211639404, "learning_rate": 8.145069167041982e-05, "loss": 2.8725, "step": 46510 }, { "epoch": 2.28, "grad_norm": 0.7027117609977722, "learning_rate": 8.144014482488523e-05, "loss": 2.8241, "step": 46511 }, { "epoch": 2.28, "grad_norm": 0.7053887248039246, "learning_rate": 8.142959855499511e-05, "loss": 2.895, "step": 46512 }, { "epoch": 2.28, "grad_norm": 0.6964938044548035, "learning_rate": 8.141905286077736e-05, "loss": 3.0054, "step": 46513 }, { "epoch": 2.28, "grad_norm": 0.7192620038986206, "learning_rate": 8.140850774225988e-05, "loss": 2.8054, "step": 46514 }, { "epoch": 2.28, "grad_norm": 0.7804059386253357, "learning_rate": 8.139796319947032e-05, "loss": 2.9574, "step": 46515 }, { "epoch": 2.28, "grad_norm": 0.6990054845809937, "learning_rate": 8.138741923243643e-05, "loss": 2.7702, "step": 46516 }, { "epoch": 2.28, "grad_norm": 0.7165387272834778, "learning_rate": 8.137687584118594e-05, "loss": 2.841, "step": 46517 }, { "epoch": 2.28, "grad_norm": 0.7471354603767395, "learning_rate": 8.136633302574678e-05, "loss": 2.9077, "step": 46518 }, { "epoch": 2.28, "grad_norm": 0.7257309556007385, "learning_rate": 8.13557907861465e-05, "loss": 3.0477, "step": 46519 }, { "epoch": 2.28, "grad_norm": 0.7215765118598938, "learning_rate": 8.1345249122413e-05, "loss": 2.9011, "step": 46520 }, { "epoch": 2.28, "grad_norm": 0.6833771467208862, "learning_rate": 8.133470803457413e-05, "loss": 2.7987, "step": 46521 }, { "epoch": 2.28, "grad_norm": 0.7473302483558655, "learning_rate": 8.132416752265754e-05, "loss": 2.707, "step": 46522 }, { "epoch": 2.28, "grad_norm": 0.7858054637908936, "learning_rate": 8.131362758669103e-05, "loss": 3.0084, "step": 46523 }, { "epoch": 2.28, "grad_norm": 0.7041542530059814, "learning_rate": 8.130308822670223e-05, "loss": 2.665, "step": 46524 }, { "epoch": 2.28, "grad_norm": 0.705814778804779, "learning_rate": 8.1292549442719e-05, "loss": 3.0473, "step": 46525 }, { "epoch": 2.28, "grad_norm": 0.7002612352371216, "learning_rate": 8.128201123476923e-05, "loss": 2.7564, "step": 46526 }, { "epoch": 2.28, "grad_norm": 0.746374249458313, "learning_rate": 8.127147360288043e-05, "loss": 2.9071, "step": 46527 }, { "epoch": 2.28, "grad_norm": 0.7429247498512268, "learning_rate": 8.126093654708056e-05, "loss": 2.9663, "step": 46528 }, { "epoch": 2.28, "grad_norm": 0.7674092650413513, "learning_rate": 8.125040006739722e-05, "loss": 3.1434, "step": 46529 }, { "epoch": 2.28, "grad_norm": 0.6966801285743713, "learning_rate": 8.123986416385831e-05, "loss": 2.9157, "step": 46530 }, { "epoch": 2.28, "grad_norm": 0.7209636569023132, "learning_rate": 8.122932883649147e-05, "loss": 3.0265, "step": 46531 }, { "epoch": 2.28, "grad_norm": 0.7358286380767822, "learning_rate": 8.121879408532439e-05, "loss": 3.1313, "step": 46532 }, { "epoch": 2.28, "grad_norm": 0.7346293926239014, "learning_rate": 8.120825991038502e-05, "loss": 2.9752, "step": 46533 }, { "epoch": 2.28, "grad_norm": 0.7059187889099121, "learning_rate": 8.119772631170088e-05, "loss": 2.8991, "step": 46534 }, { "epoch": 2.28, "grad_norm": 0.6938578486442566, "learning_rate": 8.118719328929983e-05, "loss": 2.9862, "step": 46535 }, { "epoch": 2.28, "grad_norm": 0.7114361524581909, "learning_rate": 8.117666084320967e-05, "loss": 2.7991, "step": 46536 }, { "epoch": 2.28, "grad_norm": 0.7124288082122803, "learning_rate": 8.116612897345807e-05, "loss": 3.0468, "step": 46537 }, { "epoch": 2.28, "grad_norm": 0.7127500772476196, "learning_rate": 8.115559768007278e-05, "loss": 2.8632, "step": 46538 }, { "epoch": 2.28, "grad_norm": 0.6895161867141724, "learning_rate": 8.114506696308147e-05, "loss": 2.7845, "step": 46539 }, { "epoch": 2.28, "grad_norm": 0.7293110489845276, "learning_rate": 8.113453682251188e-05, "loss": 2.8978, "step": 46540 }, { "epoch": 2.28, "grad_norm": 0.7473793029785156, "learning_rate": 8.112400725839193e-05, "loss": 3.1251, "step": 46541 }, { "epoch": 2.28, "grad_norm": 0.710602343082428, "learning_rate": 8.111347827074912e-05, "loss": 2.9912, "step": 46542 }, { "epoch": 2.28, "grad_norm": 0.6810015439987183, "learning_rate": 8.110294985961136e-05, "loss": 2.8958, "step": 46543 }, { "epoch": 2.28, "grad_norm": 0.6719366312026978, "learning_rate": 8.10924220250063e-05, "loss": 3.0995, "step": 46544 }, { "epoch": 2.28, "grad_norm": 0.6884929537773132, "learning_rate": 8.108189476696162e-05, "loss": 3.2298, "step": 46545 }, { "epoch": 2.28, "grad_norm": 0.7371322512626648, "learning_rate": 8.107136808550513e-05, "loss": 2.9997, "step": 46546 }, { "epoch": 2.28, "grad_norm": 0.7723472118377686, "learning_rate": 8.106084198066448e-05, "loss": 2.9791, "step": 46547 }, { "epoch": 2.28, "grad_norm": 0.7253080606460571, "learning_rate": 8.105031645246752e-05, "loss": 2.6226, "step": 46548 }, { "epoch": 2.28, "grad_norm": 0.7322795987129211, "learning_rate": 8.103979150094178e-05, "loss": 2.8257, "step": 46549 }, { "epoch": 2.28, "grad_norm": 0.7000114321708679, "learning_rate": 8.102926712611508e-05, "loss": 3.0231, "step": 46550 }, { "epoch": 2.28, "grad_norm": 0.7282766699790955, "learning_rate": 8.101874332801526e-05, "loss": 3.0256, "step": 46551 }, { "epoch": 2.28, "grad_norm": 0.702883243560791, "learning_rate": 8.100822010666995e-05, "loss": 2.8617, "step": 46552 }, { "epoch": 2.28, "grad_norm": 0.7281079292297363, "learning_rate": 8.099769746210681e-05, "loss": 3.0817, "step": 46553 }, { "epoch": 2.28, "grad_norm": 0.7449455261230469, "learning_rate": 8.098717539435349e-05, "loss": 2.7433, "step": 46554 }, { "epoch": 2.28, "grad_norm": 0.7251495122909546, "learning_rate": 8.09766539034378e-05, "loss": 2.6756, "step": 46555 }, { "epoch": 2.28, "grad_norm": 0.7186842560768127, "learning_rate": 8.096613298938758e-05, "loss": 2.9822, "step": 46556 }, { "epoch": 2.28, "grad_norm": 0.7262799739837646, "learning_rate": 8.095561265223028e-05, "loss": 2.6999, "step": 46557 }, { "epoch": 2.28, "grad_norm": 0.7726730108261108, "learning_rate": 8.094509289199384e-05, "loss": 2.7446, "step": 46558 }, { "epoch": 2.28, "grad_norm": 0.7000957727432251, "learning_rate": 8.093457370870588e-05, "loss": 2.8547, "step": 46559 }, { "epoch": 2.28, "grad_norm": 0.7143601179122925, "learning_rate": 8.092405510239402e-05, "loss": 2.8717, "step": 46560 }, { "epoch": 2.28, "grad_norm": 0.6854216456413269, "learning_rate": 8.09135370730861e-05, "loss": 2.9581, "step": 46561 }, { "epoch": 2.28, "grad_norm": 0.745839536190033, "learning_rate": 8.090301962080968e-05, "loss": 2.9961, "step": 46562 }, { "epoch": 2.28, "grad_norm": 0.7243121862411499, "learning_rate": 8.089250274559262e-05, "loss": 2.821, "step": 46563 }, { "epoch": 2.28, "grad_norm": 0.7288719415664673, "learning_rate": 8.088198644746244e-05, "loss": 2.6618, "step": 46564 }, { "epoch": 2.28, "grad_norm": 0.7389212250709534, "learning_rate": 8.087147072644704e-05, "loss": 2.9932, "step": 46565 }, { "epoch": 2.28, "grad_norm": 0.7060454487800598, "learning_rate": 8.086095558257402e-05, "loss": 2.9091, "step": 46566 }, { "epoch": 2.28, "grad_norm": 0.7329028248786926, "learning_rate": 8.085044101587097e-05, "loss": 2.9153, "step": 46567 }, { "epoch": 2.28, "grad_norm": 0.7609493136405945, "learning_rate": 8.083992702636578e-05, "loss": 3.0979, "step": 46568 }, { "epoch": 2.28, "grad_norm": 0.7355934381484985, "learning_rate": 8.082941361408593e-05, "loss": 2.9956, "step": 46569 }, { "epoch": 2.28, "grad_norm": 0.6936449408531189, "learning_rate": 8.081890077905925e-05, "loss": 3.0962, "step": 46570 }, { "epoch": 2.28, "grad_norm": 0.7607849836349487, "learning_rate": 8.08083885213135e-05, "loss": 2.7487, "step": 46571 }, { "epoch": 2.28, "grad_norm": 0.7081700563430786, "learning_rate": 8.079787684087614e-05, "loss": 2.7324, "step": 46572 }, { "epoch": 2.28, "grad_norm": 0.7279213070869446, "learning_rate": 8.07873657377751e-05, "loss": 2.8963, "step": 46573 }, { "epoch": 2.28, "grad_norm": 0.7351059317588806, "learning_rate": 8.077685521203795e-05, "loss": 2.8639, "step": 46574 }, { "epoch": 2.28, "grad_norm": 0.7259153127670288, "learning_rate": 8.07663452636923e-05, "loss": 2.8075, "step": 46575 }, { "epoch": 2.28, "grad_norm": 0.7282455563545227, "learning_rate": 8.0755835892766e-05, "loss": 3.0483, "step": 46576 }, { "epoch": 2.28, "grad_norm": 0.6987950801849365, "learning_rate": 8.074532709928651e-05, "loss": 2.9277, "step": 46577 }, { "epoch": 2.28, "grad_norm": 0.7334350347518921, "learning_rate": 8.073481888328174e-05, "loss": 2.9942, "step": 46578 }, { "epoch": 2.28, "grad_norm": 0.7089470624923706, "learning_rate": 8.072431124477917e-05, "loss": 2.9821, "step": 46579 }, { "epoch": 2.28, "grad_norm": 0.7162338495254517, "learning_rate": 8.071380418380663e-05, "loss": 2.7657, "step": 46580 }, { "epoch": 2.28, "grad_norm": 0.7498306035995483, "learning_rate": 8.070329770039175e-05, "loss": 2.8501, "step": 46581 }, { "epoch": 2.28, "grad_norm": 0.735768735408783, "learning_rate": 8.069279179456209e-05, "loss": 2.9595, "step": 46582 }, { "epoch": 2.28, "grad_norm": 0.7566409111022949, "learning_rate": 8.068228646634549e-05, "loss": 2.9093, "step": 46583 }, { "epoch": 2.28, "grad_norm": 0.7465990781784058, "learning_rate": 8.067178171576945e-05, "loss": 2.9164, "step": 46584 }, { "epoch": 2.28, "grad_norm": 0.7334000468254089, "learning_rate": 8.066127754286173e-05, "loss": 3.0872, "step": 46585 }, { "epoch": 2.28, "grad_norm": 0.8011319041252136, "learning_rate": 8.06507739476501e-05, "loss": 2.9013, "step": 46586 }, { "epoch": 2.28, "grad_norm": 0.729567289352417, "learning_rate": 8.064027093016208e-05, "loss": 2.8262, "step": 46587 }, { "epoch": 2.28, "grad_norm": 0.7324111461639404, "learning_rate": 8.062976849042539e-05, "loss": 2.7336, "step": 46588 }, { "epoch": 2.28, "grad_norm": 0.7233032584190369, "learning_rate": 8.06192666284676e-05, "loss": 2.4499, "step": 46589 }, { "epoch": 2.28, "grad_norm": 0.7346892952919006, "learning_rate": 8.060876534431642e-05, "loss": 2.9368, "step": 46590 }, { "epoch": 2.28, "grad_norm": 0.7054365277290344, "learning_rate": 8.059826463799961e-05, "loss": 2.6252, "step": 46591 }, { "epoch": 2.28, "grad_norm": 0.7693349719047546, "learning_rate": 8.058776450954467e-05, "loss": 2.9058, "step": 46592 }, { "epoch": 2.28, "grad_norm": 0.7086447477340698, "learning_rate": 8.057726495897941e-05, "loss": 2.8654, "step": 46593 }, { "epoch": 2.28, "grad_norm": 0.7039918303489685, "learning_rate": 8.056676598633134e-05, "loss": 2.985, "step": 46594 }, { "epoch": 2.28, "grad_norm": 0.722930371761322, "learning_rate": 8.055626759162824e-05, "loss": 2.9315, "step": 46595 }, { "epoch": 2.28, "grad_norm": 0.7084254026412964, "learning_rate": 8.054576977489771e-05, "loss": 2.7143, "step": 46596 }, { "epoch": 2.28, "grad_norm": 0.7326437830924988, "learning_rate": 8.053527253616731e-05, "loss": 2.91, "step": 46597 }, { "epoch": 2.28, "grad_norm": 0.709077000617981, "learning_rate": 8.052477587546486e-05, "loss": 2.8769, "step": 46598 }, { "epoch": 2.28, "grad_norm": 0.7226983308792114, "learning_rate": 8.051427979281779e-05, "loss": 2.8338, "step": 46599 }, { "epoch": 2.28, "grad_norm": 0.7126448154449463, "learning_rate": 8.050378428825398e-05, "loss": 2.8205, "step": 46600 }, { "epoch": 2.28, "grad_norm": 0.6979407668113708, "learning_rate": 8.049328936180087e-05, "loss": 2.9157, "step": 46601 }, { "epoch": 2.28, "grad_norm": 0.7206785678863525, "learning_rate": 8.048279501348624e-05, "loss": 2.8154, "step": 46602 }, { "epoch": 2.28, "grad_norm": 0.711652934551239, "learning_rate": 8.047230124333771e-05, "loss": 3.0502, "step": 46603 }, { "epoch": 2.28, "grad_norm": 1.0295687913894653, "learning_rate": 8.04618080513828e-05, "loss": 2.8277, "step": 46604 }, { "epoch": 2.28, "grad_norm": 0.7558762431144714, "learning_rate": 8.045131543764933e-05, "loss": 2.846, "step": 46605 }, { "epoch": 2.28, "grad_norm": 0.7087622284889221, "learning_rate": 8.044082340216474e-05, "loss": 2.8821, "step": 46606 }, { "epoch": 2.28, "grad_norm": 0.7025753259658813, "learning_rate": 8.043033194495678e-05, "loss": 3.1409, "step": 46607 }, { "epoch": 2.28, "grad_norm": 0.6761602163314819, "learning_rate": 8.041984106605316e-05, "loss": 2.7138, "step": 46608 }, { "epoch": 2.28, "grad_norm": 0.7491973042488098, "learning_rate": 8.040935076548135e-05, "loss": 2.8269, "step": 46609 }, { "epoch": 2.28, "grad_norm": 0.7301766276359558, "learning_rate": 8.039886104326921e-05, "loss": 3.1797, "step": 46610 }, { "epoch": 2.28, "grad_norm": 0.6868095993995667, "learning_rate": 8.0388371899444e-05, "loss": 3.0877, "step": 46611 }, { "epoch": 2.28, "grad_norm": 0.7199649214744568, "learning_rate": 8.037788333403357e-05, "loss": 3.0416, "step": 46612 }, { "epoch": 2.28, "grad_norm": 0.6923564076423645, "learning_rate": 8.036739534706563e-05, "loss": 2.9795, "step": 46613 }, { "epoch": 2.28, "grad_norm": 0.7084708213806152, "learning_rate": 8.03569079385676e-05, "loss": 2.8675, "step": 46614 }, { "epoch": 2.28, "grad_norm": 0.7493816614151001, "learning_rate": 8.03464211085673e-05, "loss": 3.133, "step": 46615 }, { "epoch": 2.28, "grad_norm": 0.6935444474220276, "learning_rate": 8.033593485709214e-05, "loss": 2.8476, "step": 46616 }, { "epoch": 2.28, "grad_norm": 0.7323094606399536, "learning_rate": 8.032544918416996e-05, "loss": 3.0348, "step": 46617 }, { "epoch": 2.28, "grad_norm": 0.7361232042312622, "learning_rate": 8.03149640898283e-05, "loss": 2.812, "step": 46618 }, { "epoch": 2.28, "grad_norm": 0.7444538474082947, "learning_rate": 8.030447957409464e-05, "loss": 2.874, "step": 46619 }, { "epoch": 2.28, "grad_norm": 0.718134343624115, "learning_rate": 8.029399563699674e-05, "loss": 2.8746, "step": 46620 }, { "epoch": 2.28, "grad_norm": 0.7226566076278687, "learning_rate": 8.028351227856213e-05, "loss": 3.0168, "step": 46621 }, { "epoch": 2.28, "grad_norm": 0.844632089138031, "learning_rate": 8.027302949881845e-05, "loss": 2.8895, "step": 46622 }, { "epoch": 2.28, "grad_norm": 0.7833714485168457, "learning_rate": 8.026254729779344e-05, "loss": 2.9055, "step": 46623 }, { "epoch": 2.28, "grad_norm": 0.7003939747810364, "learning_rate": 8.025206567551455e-05, "loss": 3.0071, "step": 46624 }, { "epoch": 2.28, "grad_norm": 0.6972513794898987, "learning_rate": 8.024158463200946e-05, "loss": 2.9896, "step": 46625 }, { "epoch": 2.29, "grad_norm": 0.7376452684402466, "learning_rate": 8.023110416730564e-05, "loss": 2.8573, "step": 46626 }, { "epoch": 2.29, "grad_norm": 0.720854640007019, "learning_rate": 8.022062428143076e-05, "loss": 3.1533, "step": 46627 }, { "epoch": 2.29, "grad_norm": 0.7451248168945312, "learning_rate": 8.02101449744126e-05, "loss": 3.0563, "step": 46628 }, { "epoch": 2.29, "grad_norm": 0.7125359773635864, "learning_rate": 8.019966624627852e-05, "loss": 2.8611, "step": 46629 }, { "epoch": 2.29, "grad_norm": 0.7607391476631165, "learning_rate": 8.018918809705628e-05, "loss": 2.8573, "step": 46630 }, { "epoch": 2.29, "grad_norm": 0.7929643988609314, "learning_rate": 8.017871052677335e-05, "loss": 2.8047, "step": 46631 }, { "epoch": 2.29, "grad_norm": 0.7148974537849426, "learning_rate": 8.016823353545749e-05, "loss": 2.8755, "step": 46632 }, { "epoch": 2.29, "grad_norm": 0.8018413782119751, "learning_rate": 8.015775712313616e-05, "loss": 2.7893, "step": 46633 }, { "epoch": 2.29, "grad_norm": 0.6894913911819458, "learning_rate": 8.014728128983693e-05, "loss": 2.7284, "step": 46634 }, { "epoch": 2.29, "grad_norm": 0.7120516896247864, "learning_rate": 8.013680603558754e-05, "loss": 2.9376, "step": 46635 }, { "epoch": 2.29, "grad_norm": 0.7336519360542297, "learning_rate": 8.012633136041537e-05, "loss": 2.8246, "step": 46636 }, { "epoch": 2.29, "grad_norm": 0.7027100920677185, "learning_rate": 8.011585726434813e-05, "loss": 2.955, "step": 46637 }, { "epoch": 2.29, "grad_norm": 0.7373955249786377, "learning_rate": 8.010538374741355e-05, "loss": 2.8527, "step": 46638 }, { "epoch": 2.29, "grad_norm": 0.6577633619308472, "learning_rate": 8.0094910809639e-05, "loss": 2.8324, "step": 46639 }, { "epoch": 2.29, "grad_norm": 0.7087864279747009, "learning_rate": 8.008443845105216e-05, "loss": 2.7932, "step": 46640 }, { "epoch": 2.29, "grad_norm": 0.7115554809570312, "learning_rate": 8.007396667168052e-05, "loss": 2.8594, "step": 46641 }, { "epoch": 2.29, "grad_norm": 0.7160695791244507, "learning_rate": 8.006349547155171e-05, "loss": 2.9263, "step": 46642 }, { "epoch": 2.29, "grad_norm": 0.7285076379776001, "learning_rate": 8.005302485069339e-05, "loss": 2.9813, "step": 46643 }, { "epoch": 2.29, "grad_norm": 0.7814637422561646, "learning_rate": 8.004255480913302e-05, "loss": 2.8582, "step": 46644 }, { "epoch": 2.29, "grad_norm": 0.6893579959869385, "learning_rate": 8.00320853468983e-05, "loss": 2.6703, "step": 46645 }, { "epoch": 2.29, "grad_norm": 0.8030416965484619, "learning_rate": 8.002161646401671e-05, "loss": 2.9652, "step": 46646 }, { "epoch": 2.29, "grad_norm": 0.7509509325027466, "learning_rate": 8.00111481605158e-05, "loss": 2.639, "step": 46647 }, { "epoch": 2.29, "grad_norm": 0.6878439784049988, "learning_rate": 8.000068043642325e-05, "loss": 3.2898, "step": 46648 }, { "epoch": 2.29, "grad_norm": 0.6887226104736328, "learning_rate": 7.999021329176649e-05, "loss": 2.7212, "step": 46649 }, { "epoch": 2.29, "grad_norm": 0.7231206297874451, "learning_rate": 7.997974672657325e-05, "loss": 2.9418, "step": 46650 }, { "epoch": 2.29, "grad_norm": 0.7704378366470337, "learning_rate": 7.996928074087092e-05, "loss": 2.7953, "step": 46651 }, { "epoch": 2.29, "grad_norm": 0.6910778284072876, "learning_rate": 7.995881533468717e-05, "loss": 3.2411, "step": 46652 }, { "epoch": 2.29, "grad_norm": 0.6970378160476685, "learning_rate": 7.994835050804966e-05, "loss": 2.6902, "step": 46653 }, { "epoch": 2.29, "grad_norm": 0.6877171993255615, "learning_rate": 7.993788626098583e-05, "loss": 2.701, "step": 46654 }, { "epoch": 2.29, "grad_norm": 0.7191632390022278, "learning_rate": 7.992742259352324e-05, "loss": 2.7879, "step": 46655 }, { "epoch": 2.29, "grad_norm": 0.7293527722358704, "learning_rate": 7.991695950568939e-05, "loss": 2.9878, "step": 46656 }, { "epoch": 2.29, "grad_norm": 0.6775935292243958, "learning_rate": 7.99064969975119e-05, "loss": 2.8923, "step": 46657 }, { "epoch": 2.29, "grad_norm": 0.7595655918121338, "learning_rate": 7.989603506901842e-05, "loss": 2.995, "step": 46658 }, { "epoch": 2.29, "grad_norm": 0.6996396780014038, "learning_rate": 7.988557372023637e-05, "loss": 2.8157, "step": 46659 }, { "epoch": 2.29, "grad_norm": 0.7048630714416504, "learning_rate": 7.987511295119342e-05, "loss": 2.8022, "step": 46660 }, { "epoch": 2.29, "grad_norm": 0.7288792133331299, "learning_rate": 7.986465276191708e-05, "loss": 2.8384, "step": 46661 }, { "epoch": 2.29, "grad_norm": 0.6870715618133545, "learning_rate": 7.985419315243476e-05, "loss": 3.0466, "step": 46662 }, { "epoch": 2.29, "grad_norm": 0.7056323289871216, "learning_rate": 7.984373412277422e-05, "loss": 2.8514, "step": 46663 }, { "epoch": 2.29, "grad_norm": 0.6987048387527466, "learning_rate": 7.983327567296284e-05, "loss": 3.0196, "step": 46664 }, { "epoch": 2.29, "grad_norm": 0.7563616037368774, "learning_rate": 7.982281780302834e-05, "loss": 2.9289, "step": 46665 }, { "epoch": 2.29, "grad_norm": 0.684380054473877, "learning_rate": 7.981236051299806e-05, "loss": 2.8782, "step": 46666 }, { "epoch": 2.29, "grad_norm": 0.715981662273407, "learning_rate": 7.980190380289973e-05, "loss": 2.9129, "step": 46667 }, { "epoch": 2.29, "grad_norm": 0.7292078733444214, "learning_rate": 7.97914476727608e-05, "loss": 2.8948, "step": 46668 }, { "epoch": 2.29, "grad_norm": 0.791476845741272, "learning_rate": 7.978099212260877e-05, "loss": 2.6953, "step": 46669 }, { "epoch": 2.29, "grad_norm": 0.7197558879852295, "learning_rate": 7.977053715247127e-05, "loss": 2.9594, "step": 46670 }, { "epoch": 2.29, "grad_norm": 0.7770285606384277, "learning_rate": 7.976008276237572e-05, "loss": 2.7839, "step": 46671 }, { "epoch": 2.29, "grad_norm": 0.6870531439781189, "learning_rate": 7.97496289523498e-05, "loss": 2.8836, "step": 46672 }, { "epoch": 2.29, "grad_norm": 0.7081472277641296, "learning_rate": 7.973917572242091e-05, "loss": 2.8042, "step": 46673 }, { "epoch": 2.29, "grad_norm": 0.702302098274231, "learning_rate": 7.972872307261661e-05, "loss": 2.8638, "step": 46674 }, { "epoch": 2.29, "grad_norm": 0.7121188044548035, "learning_rate": 7.971827100296456e-05, "loss": 2.8277, "step": 46675 }, { "epoch": 2.29, "grad_norm": 0.7616391777992249, "learning_rate": 7.970781951349218e-05, "loss": 2.8267, "step": 46676 }, { "epoch": 2.29, "grad_norm": 0.7238693237304688, "learning_rate": 7.969736860422704e-05, "loss": 3.0476, "step": 46677 }, { "epoch": 2.29, "grad_norm": 0.7182580828666687, "learning_rate": 7.968691827519653e-05, "loss": 3.1638, "step": 46678 }, { "epoch": 2.29, "grad_norm": 0.708895742893219, "learning_rate": 7.967646852642827e-05, "loss": 3.0584, "step": 46679 }, { "epoch": 2.29, "grad_norm": 0.7667748332023621, "learning_rate": 7.96660193579499e-05, "loss": 2.935, "step": 46680 }, { "epoch": 2.29, "grad_norm": 0.7279300093650818, "learning_rate": 7.965557076978871e-05, "loss": 2.8778, "step": 46681 }, { "epoch": 2.29, "grad_norm": 0.6909590363502502, "learning_rate": 7.964512276197244e-05, "loss": 2.6265, "step": 46682 }, { "epoch": 2.29, "grad_norm": 0.7029200792312622, "learning_rate": 7.963467533452851e-05, "loss": 2.8785, "step": 46683 }, { "epoch": 2.29, "grad_norm": 0.6875868439674377, "learning_rate": 7.962422848748434e-05, "loss": 2.9, "step": 46684 }, { "epoch": 2.29, "grad_norm": 0.7104182243347168, "learning_rate": 7.961378222086765e-05, "loss": 2.7144, "step": 46685 }, { "epoch": 2.29, "grad_norm": 0.7271749973297119, "learning_rate": 7.960333653470575e-05, "loss": 2.947, "step": 46686 }, { "epoch": 2.29, "grad_norm": 0.7475045919418335, "learning_rate": 7.959289142902633e-05, "loss": 2.7681, "step": 46687 }, { "epoch": 2.29, "grad_norm": 0.7369611263275146, "learning_rate": 7.958244690385669e-05, "loss": 3.0237, "step": 46688 }, { "epoch": 2.29, "grad_norm": 0.7487080097198486, "learning_rate": 7.95720029592246e-05, "loss": 2.9306, "step": 46689 }, { "epoch": 2.29, "grad_norm": 0.7130888104438782, "learning_rate": 7.956155959515741e-05, "loss": 2.7723, "step": 46690 }, { "epoch": 2.29, "grad_norm": 0.7896283268928528, "learning_rate": 7.955111681168256e-05, "loss": 2.7895, "step": 46691 }, { "epoch": 2.29, "grad_norm": 0.783572256565094, "learning_rate": 7.954067460882776e-05, "loss": 2.797, "step": 46692 }, { "epoch": 2.29, "grad_norm": 0.7980788350105286, "learning_rate": 7.953023298662029e-05, "loss": 2.9751, "step": 46693 }, { "epoch": 2.29, "grad_norm": 0.7563785314559937, "learning_rate": 7.951979194508772e-05, "loss": 2.9569, "step": 46694 }, { "epoch": 2.29, "grad_norm": 0.7462531328201294, "learning_rate": 7.950935148425772e-05, "loss": 2.9621, "step": 46695 }, { "epoch": 2.29, "grad_norm": 0.7288581132888794, "learning_rate": 7.949891160415755e-05, "loss": 2.8099, "step": 46696 }, { "epoch": 2.29, "grad_norm": 0.7490408420562744, "learning_rate": 7.948847230481491e-05, "loss": 2.7926, "step": 46697 }, { "epoch": 2.29, "grad_norm": 0.726197361946106, "learning_rate": 7.947803358625716e-05, "loss": 2.9515, "step": 46698 }, { "epoch": 2.29, "grad_norm": 0.7250626087188721, "learning_rate": 7.946759544851178e-05, "loss": 2.9783, "step": 46699 }, { "epoch": 2.29, "grad_norm": 0.7216305732727051, "learning_rate": 7.945715789160637e-05, "loss": 2.7268, "step": 46700 }, { "epoch": 2.29, "grad_norm": 0.7879276275634766, "learning_rate": 7.944672091556829e-05, "loss": 2.9204, "step": 46701 }, { "epoch": 2.29, "grad_norm": 0.7333400845527649, "learning_rate": 7.94362845204252e-05, "loss": 2.8761, "step": 46702 }, { "epoch": 2.29, "grad_norm": 0.7444015145301819, "learning_rate": 7.942584870620439e-05, "loss": 3.0149, "step": 46703 }, { "epoch": 2.29, "grad_norm": 0.6867544651031494, "learning_rate": 7.941541347293353e-05, "loss": 2.848, "step": 46704 }, { "epoch": 2.29, "grad_norm": 0.7290630340576172, "learning_rate": 7.940497882064e-05, "loss": 3.0744, "step": 46705 }, { "epoch": 2.29, "grad_norm": 0.7640529870986938, "learning_rate": 7.939454474935122e-05, "loss": 3.1576, "step": 46706 }, { "epoch": 2.29, "grad_norm": 0.7068827748298645, "learning_rate": 7.938411125909487e-05, "loss": 2.9971, "step": 46707 }, { "epoch": 2.29, "grad_norm": 0.6828535199165344, "learning_rate": 7.937367834989821e-05, "loss": 2.9971, "step": 46708 }, { "epoch": 2.29, "grad_norm": 0.6947302222251892, "learning_rate": 7.93632460217888e-05, "loss": 2.9175, "step": 46709 }, { "epoch": 2.29, "grad_norm": 0.7065187096595764, "learning_rate": 7.935281427479425e-05, "loss": 3.0409, "step": 46710 }, { "epoch": 2.29, "grad_norm": 0.7438076734542847, "learning_rate": 7.934238310894183e-05, "loss": 2.8061, "step": 46711 }, { "epoch": 2.29, "grad_norm": 0.7637118101119995, "learning_rate": 7.933195252425929e-05, "loss": 2.8936, "step": 46712 }, { "epoch": 2.29, "grad_norm": 0.7260427474975586, "learning_rate": 7.932152252077374e-05, "loss": 2.9526, "step": 46713 }, { "epoch": 2.29, "grad_norm": 0.7055556774139404, "learning_rate": 7.931109309851283e-05, "loss": 2.802, "step": 46714 }, { "epoch": 2.29, "grad_norm": 0.7367199659347534, "learning_rate": 7.93006642575041e-05, "loss": 3.0472, "step": 46715 }, { "epoch": 2.29, "grad_norm": 0.6921574473381042, "learning_rate": 7.929023599777484e-05, "loss": 2.6497, "step": 46716 }, { "epoch": 2.29, "grad_norm": 0.6976795792579651, "learning_rate": 7.927980831935276e-05, "loss": 2.7202, "step": 46717 }, { "epoch": 2.29, "grad_norm": 0.7396517992019653, "learning_rate": 7.926938122226508e-05, "loss": 3.1099, "step": 46718 }, { "epoch": 2.29, "grad_norm": 0.7003223299980164, "learning_rate": 7.925895470653943e-05, "loss": 2.8682, "step": 46719 }, { "epoch": 2.29, "grad_norm": 0.7250968217849731, "learning_rate": 7.924852877220324e-05, "loss": 3.0162, "step": 46720 }, { "epoch": 2.29, "grad_norm": 0.7336358428001404, "learning_rate": 7.923810341928385e-05, "loss": 2.7529, "step": 46721 }, { "epoch": 2.29, "grad_norm": 0.6997026205062866, "learning_rate": 7.92276786478089e-05, "loss": 2.8156, "step": 46722 }, { "epoch": 2.29, "grad_norm": 0.7014487981796265, "learning_rate": 7.921725445780567e-05, "loss": 3.0013, "step": 46723 }, { "epoch": 2.29, "grad_norm": 0.6987916827201843, "learning_rate": 7.920683084930166e-05, "loss": 3.029, "step": 46724 }, { "epoch": 2.29, "grad_norm": 0.7134968042373657, "learning_rate": 7.919640782232451e-05, "loss": 2.8053, "step": 46725 }, { "epoch": 2.29, "grad_norm": 0.7232719659805298, "learning_rate": 7.91859853769015e-05, "loss": 3.2245, "step": 46726 }, { "epoch": 2.29, "grad_norm": 0.718622624874115, "learning_rate": 7.917556351306011e-05, "loss": 2.8381, "step": 46727 }, { "epoch": 2.29, "grad_norm": 0.708051860332489, "learning_rate": 7.916514223082768e-05, "loss": 2.9364, "step": 46728 }, { "epoch": 2.29, "grad_norm": 0.7392677068710327, "learning_rate": 7.91547215302318e-05, "loss": 3.0071, "step": 46729 }, { "epoch": 2.29, "grad_norm": 0.7383646368980408, "learning_rate": 7.914430141129994e-05, "loss": 2.6716, "step": 46730 }, { "epoch": 2.29, "grad_norm": 0.6965837478637695, "learning_rate": 7.913388187405943e-05, "loss": 2.9672, "step": 46731 }, { "epoch": 2.29, "grad_norm": 0.7411499619483948, "learning_rate": 7.912346291853783e-05, "loss": 2.9199, "step": 46732 }, { "epoch": 2.29, "grad_norm": 0.7892962098121643, "learning_rate": 7.911304454476244e-05, "loss": 2.9208, "step": 46733 }, { "epoch": 2.29, "grad_norm": 0.6945703029632568, "learning_rate": 7.910262675276087e-05, "loss": 2.8022, "step": 46734 }, { "epoch": 2.29, "grad_norm": 0.7150418162345886, "learning_rate": 7.909220954256046e-05, "loss": 2.8295, "step": 46735 }, { "epoch": 2.29, "grad_norm": 0.6972780823707581, "learning_rate": 7.90817929141886e-05, "loss": 2.9666, "step": 46736 }, { "epoch": 2.29, "grad_norm": 0.7024328708648682, "learning_rate": 7.907137686767281e-05, "loss": 2.7697, "step": 46737 }, { "epoch": 2.29, "grad_norm": 0.7076515555381775, "learning_rate": 7.906096140304046e-05, "loss": 2.7878, "step": 46738 }, { "epoch": 2.29, "grad_norm": 0.7264235615730286, "learning_rate": 7.905054652031897e-05, "loss": 2.7891, "step": 46739 }, { "epoch": 2.29, "grad_norm": 0.7383031845092773, "learning_rate": 7.904013221953594e-05, "loss": 3.013, "step": 46740 }, { "epoch": 2.29, "grad_norm": 0.7196172475814819, "learning_rate": 7.90297185007187e-05, "loss": 2.5622, "step": 46741 }, { "epoch": 2.29, "grad_norm": 0.7266930341720581, "learning_rate": 7.901930536389461e-05, "loss": 3.0448, "step": 46742 }, { "epoch": 2.29, "grad_norm": 0.6897668242454529, "learning_rate": 7.900889280909106e-05, "loss": 2.7908, "step": 46743 }, { "epoch": 2.29, "grad_norm": 0.7381284832954407, "learning_rate": 7.899848083633556e-05, "loss": 2.889, "step": 46744 }, { "epoch": 2.29, "grad_norm": 0.6909975409507751, "learning_rate": 7.898806944565563e-05, "loss": 3.0759, "step": 46745 }, { "epoch": 2.29, "grad_norm": 0.791771411895752, "learning_rate": 7.897765863707848e-05, "loss": 2.8085, "step": 46746 }, { "epoch": 2.29, "grad_norm": 0.7384874224662781, "learning_rate": 7.896724841063176e-05, "loss": 2.9682, "step": 46747 }, { "epoch": 2.29, "grad_norm": 0.7547340989112854, "learning_rate": 7.895683876634274e-05, "loss": 2.9999, "step": 46748 }, { "epoch": 2.29, "grad_norm": 0.7127857208251953, "learning_rate": 7.894642970423878e-05, "loss": 2.5953, "step": 46749 }, { "epoch": 2.29, "grad_norm": 0.7140387296676636, "learning_rate": 7.893602122434748e-05, "loss": 2.8312, "step": 46750 }, { "epoch": 2.29, "grad_norm": 0.6861268877983093, "learning_rate": 7.892561332669609e-05, "loss": 2.9191, "step": 46751 }, { "epoch": 2.29, "grad_norm": 0.731544017791748, "learning_rate": 7.891520601131213e-05, "loss": 2.9862, "step": 46752 }, { "epoch": 2.29, "grad_norm": 0.7233723402023315, "learning_rate": 7.89047992782229e-05, "loss": 3.1886, "step": 46753 }, { "epoch": 2.29, "grad_norm": 0.7004035115242004, "learning_rate": 7.889439312745598e-05, "loss": 2.8099, "step": 46754 }, { "epoch": 2.29, "grad_norm": 0.7136520743370056, "learning_rate": 7.888398755903857e-05, "loss": 2.9018, "step": 46755 }, { "epoch": 2.29, "grad_norm": 0.7034738659858704, "learning_rate": 7.88735825729983e-05, "loss": 2.9447, "step": 46756 }, { "epoch": 2.29, "grad_norm": 0.7353273630142212, "learning_rate": 7.886317816936242e-05, "loss": 2.9035, "step": 46757 }, { "epoch": 2.29, "grad_norm": 0.7724609971046448, "learning_rate": 7.885277434815828e-05, "loss": 2.8167, "step": 46758 }, { "epoch": 2.29, "grad_norm": 0.9952621459960938, "learning_rate": 7.884237110941348e-05, "loss": 3.0053, "step": 46759 }, { "epoch": 2.29, "grad_norm": 0.7721501588821411, "learning_rate": 7.883196845315521e-05, "loss": 2.9342, "step": 46760 }, { "epoch": 2.29, "grad_norm": 0.7446574568748474, "learning_rate": 7.882156637941098e-05, "loss": 2.8327, "step": 46761 }, { "epoch": 2.29, "grad_norm": 0.7871111631393433, "learning_rate": 7.881116488820827e-05, "loss": 2.8406, "step": 46762 }, { "epoch": 2.29, "grad_norm": 0.7120261788368225, "learning_rate": 7.880076397957438e-05, "loss": 2.7964, "step": 46763 }, { "epoch": 2.29, "grad_norm": 0.8707037568092346, "learning_rate": 7.879036365353672e-05, "loss": 2.9292, "step": 46764 }, { "epoch": 2.29, "grad_norm": 0.699736475944519, "learning_rate": 7.877996391012256e-05, "loss": 2.8143, "step": 46765 }, { "epoch": 2.29, "grad_norm": 0.731170117855072, "learning_rate": 7.876956474935941e-05, "loss": 2.8974, "step": 46766 }, { "epoch": 2.29, "grad_norm": 0.7486166954040527, "learning_rate": 7.875916617127475e-05, "loss": 3.0931, "step": 46767 }, { "epoch": 2.29, "grad_norm": 0.6976475119590759, "learning_rate": 7.874876817589572e-05, "loss": 3.0062, "step": 46768 }, { "epoch": 2.29, "grad_norm": 0.76833176612854, "learning_rate": 7.873837076325001e-05, "loss": 2.9005, "step": 46769 }, { "epoch": 2.29, "grad_norm": 0.7090509533882141, "learning_rate": 7.872797393336482e-05, "loss": 3.1187, "step": 46770 }, { "epoch": 2.29, "grad_norm": 0.7460590600967407, "learning_rate": 7.871757768626749e-05, "loss": 2.9877, "step": 46771 }, { "epoch": 2.29, "grad_norm": 0.7121726870536804, "learning_rate": 7.870718202198555e-05, "loss": 2.9237, "step": 46772 }, { "epoch": 2.29, "grad_norm": 0.7376371026039124, "learning_rate": 7.869678694054622e-05, "loss": 2.9882, "step": 46773 }, { "epoch": 2.29, "grad_norm": 0.7384995818138123, "learning_rate": 7.868639244197704e-05, "loss": 2.9219, "step": 46774 }, { "epoch": 2.29, "grad_norm": 0.8270297646522522, "learning_rate": 7.867599852630523e-05, "loss": 2.8947, "step": 46775 }, { "epoch": 2.29, "grad_norm": 0.765021026134491, "learning_rate": 7.866560519355825e-05, "loss": 2.8757, "step": 46776 }, { "epoch": 2.29, "grad_norm": 0.6961572170257568, "learning_rate": 7.865521244376353e-05, "loss": 3.0606, "step": 46777 }, { "epoch": 2.29, "grad_norm": 0.7141311764717102, "learning_rate": 7.864482027694842e-05, "loss": 2.6212, "step": 46778 }, { "epoch": 2.29, "grad_norm": 0.6754100322723389, "learning_rate": 7.863442869314024e-05, "loss": 2.7227, "step": 46779 }, { "epoch": 2.29, "grad_norm": 0.7049227952957153, "learning_rate": 7.862403769236628e-05, "loss": 3.0977, "step": 46780 }, { "epoch": 2.29, "grad_norm": 0.780094563961029, "learning_rate": 7.861364727465399e-05, "loss": 2.8449, "step": 46781 }, { "epoch": 2.29, "grad_norm": 0.6903862357139587, "learning_rate": 7.860325744003085e-05, "loss": 3.1181, "step": 46782 }, { "epoch": 2.29, "grad_norm": 0.6869261860847473, "learning_rate": 7.859286818852401e-05, "loss": 2.9011, "step": 46783 }, { "epoch": 2.29, "grad_norm": 0.7356374859809875, "learning_rate": 7.858247952016106e-05, "loss": 3.0236, "step": 46784 }, { "epoch": 2.29, "grad_norm": 0.6988884210586548, "learning_rate": 7.857209143496925e-05, "loss": 2.8257, "step": 46785 }, { "epoch": 2.29, "grad_norm": 0.7464091777801514, "learning_rate": 7.856170393297582e-05, "loss": 2.5629, "step": 46786 }, { "epoch": 2.29, "grad_norm": 0.7155094742774963, "learning_rate": 7.855131701420836e-05, "loss": 3.0864, "step": 46787 }, { "epoch": 2.29, "grad_norm": 0.7148899435997009, "learning_rate": 7.854093067869401e-05, "loss": 2.9552, "step": 46788 }, { "epoch": 2.29, "grad_norm": 0.7054211497306824, "learning_rate": 7.853054492646033e-05, "loss": 3.0602, "step": 46789 }, { "epoch": 2.29, "grad_norm": 0.7065744996070862, "learning_rate": 7.852015975753447e-05, "loss": 2.9686, "step": 46790 }, { "epoch": 2.29, "grad_norm": 0.7648019194602966, "learning_rate": 7.850977517194397e-05, "loss": 3.0381, "step": 46791 }, { "epoch": 2.29, "grad_norm": 0.7037829756736755, "learning_rate": 7.849939116971608e-05, "loss": 3.0974, "step": 46792 }, { "epoch": 2.29, "grad_norm": 0.7240031957626343, "learning_rate": 7.84890077508781e-05, "loss": 3.1276, "step": 46793 }, { "epoch": 2.29, "grad_norm": 0.7773902416229248, "learning_rate": 7.847862491545755e-05, "loss": 2.8614, "step": 46794 }, { "epoch": 2.29, "grad_norm": 0.708374559879303, "learning_rate": 7.846824266348153e-05, "loss": 2.7868, "step": 46795 }, { "epoch": 2.29, "grad_norm": 0.7300131916999817, "learning_rate": 7.845786099497757e-05, "loss": 2.8318, "step": 46796 }, { "epoch": 2.29, "grad_norm": 0.7228390574455261, "learning_rate": 7.844747990997305e-05, "loss": 3.0077, "step": 46797 }, { "epoch": 2.29, "grad_norm": 0.6896500587463379, "learning_rate": 7.843709940849513e-05, "loss": 2.915, "step": 46798 }, { "epoch": 2.29, "grad_norm": 0.7400826811790466, "learning_rate": 7.842671949057134e-05, "loss": 3.0408, "step": 46799 }, { "epoch": 2.29, "grad_norm": 0.7104637622833252, "learning_rate": 7.841634015622891e-05, "loss": 2.9581, "step": 46800 }, { "epoch": 2.29, "grad_norm": 0.7338810563087463, "learning_rate": 7.840596140549514e-05, "loss": 3.0433, "step": 46801 }, { "epoch": 2.29, "grad_norm": 0.727415919303894, "learning_rate": 7.83955832383975e-05, "loss": 3.0346, "step": 46802 }, { "epoch": 2.29, "grad_norm": 0.7342788577079773, "learning_rate": 7.838520565496314e-05, "loss": 2.829, "step": 46803 }, { "epoch": 2.29, "grad_norm": 0.7318736910820007, "learning_rate": 7.837482865521959e-05, "loss": 3.0128, "step": 46804 }, { "epoch": 2.29, "grad_norm": 0.7315552830696106, "learning_rate": 7.836445223919401e-05, "loss": 3.0684, "step": 46805 }, { "epoch": 2.29, "grad_norm": 0.7073324918746948, "learning_rate": 7.835407640691388e-05, "loss": 2.7697, "step": 46806 }, { "epoch": 2.29, "grad_norm": 0.659433126449585, "learning_rate": 7.834370115840647e-05, "loss": 2.8127, "step": 46807 }, { "epoch": 2.29, "grad_norm": 0.7416350245475769, "learning_rate": 7.833332649369902e-05, "loss": 2.9297, "step": 46808 }, { "epoch": 2.29, "grad_norm": 0.6925668120384216, "learning_rate": 7.832295241281898e-05, "loss": 2.7095, "step": 46809 }, { "epoch": 2.29, "grad_norm": 0.7669311761856079, "learning_rate": 7.831257891579358e-05, "loss": 2.9366, "step": 46810 }, { "epoch": 2.29, "grad_norm": 0.7601991891860962, "learning_rate": 7.830220600265014e-05, "loss": 2.8255, "step": 46811 }, { "epoch": 2.29, "grad_norm": 0.6947373747825623, "learning_rate": 7.829183367341613e-05, "loss": 2.9179, "step": 46812 }, { "epoch": 2.29, "grad_norm": 0.7191194295883179, "learning_rate": 7.828146192811877e-05, "loss": 2.9629, "step": 46813 }, { "epoch": 2.29, "grad_norm": 0.753840982913971, "learning_rate": 7.827109076678533e-05, "loss": 2.7047, "step": 46814 }, { "epoch": 2.29, "grad_norm": 0.7168121933937073, "learning_rate": 7.826072018944312e-05, "loss": 3.094, "step": 46815 }, { "epoch": 2.29, "grad_norm": 0.75309157371521, "learning_rate": 7.825035019611948e-05, "loss": 2.8804, "step": 46816 }, { "epoch": 2.29, "grad_norm": 0.6898667216300964, "learning_rate": 7.82399807868418e-05, "loss": 3.1949, "step": 46817 }, { "epoch": 2.29, "grad_norm": 0.7027104496955872, "learning_rate": 7.82296119616373e-05, "loss": 2.7947, "step": 46818 }, { "epoch": 2.29, "grad_norm": 0.7549039125442505, "learning_rate": 7.821924372053338e-05, "loss": 3.0059, "step": 46819 }, { "epoch": 2.29, "grad_norm": 0.7107914090156555, "learning_rate": 7.82088760635572e-05, "loss": 3.0119, "step": 46820 }, { "epoch": 2.29, "grad_norm": 0.7330605387687683, "learning_rate": 7.819850899073625e-05, "loss": 3.0567, "step": 46821 }, { "epoch": 2.29, "grad_norm": 0.7199442982673645, "learning_rate": 7.818814250209775e-05, "loss": 2.9784, "step": 46822 }, { "epoch": 2.29, "grad_norm": 0.7049921751022339, "learning_rate": 7.81777765976689e-05, "loss": 3.0305, "step": 46823 }, { "epoch": 2.29, "grad_norm": 0.7271491289138794, "learning_rate": 7.816741127747716e-05, "loss": 2.9861, "step": 46824 }, { "epoch": 2.29, "grad_norm": 0.7731443643569946, "learning_rate": 7.815704654154971e-05, "loss": 2.9895, "step": 46825 }, { "epoch": 2.29, "grad_norm": 0.7125991582870483, "learning_rate": 7.81466823899139e-05, "loss": 3.0968, "step": 46826 }, { "epoch": 2.29, "grad_norm": 0.7187132239341736, "learning_rate": 7.813631882259708e-05, "loss": 2.8701, "step": 46827 }, { "epoch": 2.29, "grad_norm": 0.7413823008537292, "learning_rate": 7.812595583962653e-05, "loss": 2.9285, "step": 46828 }, { "epoch": 2.29, "grad_norm": 0.6879110932350159, "learning_rate": 7.811559344102953e-05, "loss": 3.0651, "step": 46829 }, { "epoch": 2.3, "grad_norm": 0.6981056332588196, "learning_rate": 7.810523162683323e-05, "loss": 2.7413, "step": 46830 }, { "epoch": 2.3, "grad_norm": 0.7995371222496033, "learning_rate": 7.809487039706517e-05, "loss": 2.7898, "step": 46831 }, { "epoch": 2.3, "grad_norm": 0.6955015063285828, "learning_rate": 7.808450975175239e-05, "loss": 2.8881, "step": 46832 }, { "epoch": 2.3, "grad_norm": 0.7273989319801331, "learning_rate": 7.807414969092233e-05, "loss": 3.0188, "step": 46833 }, { "epoch": 2.3, "grad_norm": 0.7289535403251648, "learning_rate": 7.806379021460233e-05, "loss": 2.9289, "step": 46834 }, { "epoch": 2.3, "grad_norm": 0.7082308530807495, "learning_rate": 7.805343132281951e-05, "loss": 2.8786, "step": 46835 }, { "epoch": 2.3, "grad_norm": 0.736067533493042, "learning_rate": 7.804307301560131e-05, "loss": 3.1124, "step": 46836 }, { "epoch": 2.3, "grad_norm": 0.751034677028656, "learning_rate": 7.803271529297496e-05, "loss": 2.9974, "step": 46837 }, { "epoch": 2.3, "grad_norm": 0.7050460577011108, "learning_rate": 7.802235815496762e-05, "loss": 2.7239, "step": 46838 }, { "epoch": 2.3, "grad_norm": 0.7277799844741821, "learning_rate": 7.801200160160676e-05, "loss": 3.0167, "step": 46839 }, { "epoch": 2.3, "grad_norm": 0.7088861465454102, "learning_rate": 7.800164563291947e-05, "loss": 2.8003, "step": 46840 }, { "epoch": 2.3, "grad_norm": 0.6924530267715454, "learning_rate": 7.79912902489332e-05, "loss": 2.7845, "step": 46841 }, { "epoch": 2.3, "grad_norm": 0.7400920391082764, "learning_rate": 7.798093544967509e-05, "loss": 3.0177, "step": 46842 }, { "epoch": 2.3, "grad_norm": 0.7346185445785522, "learning_rate": 7.797058123517253e-05, "loss": 3.1688, "step": 46843 }, { "epoch": 2.3, "grad_norm": 0.7445531487464905, "learning_rate": 7.796022760545272e-05, "loss": 2.8967, "step": 46844 }, { "epoch": 2.3, "grad_norm": 0.7266702651977539, "learning_rate": 7.794987456054288e-05, "loss": 2.9966, "step": 46845 }, { "epoch": 2.3, "grad_norm": 0.7435703277587891, "learning_rate": 7.793952210047039e-05, "loss": 2.9924, "step": 46846 }, { "epoch": 2.3, "grad_norm": 0.7759212255477905, "learning_rate": 7.792917022526239e-05, "loss": 2.6906, "step": 46847 }, { "epoch": 2.3, "grad_norm": 0.7034785747528076, "learning_rate": 7.791881893494622e-05, "loss": 3.0356, "step": 46848 }, { "epoch": 2.3, "grad_norm": 0.7417486906051636, "learning_rate": 7.790846822954922e-05, "loss": 2.8477, "step": 46849 }, { "epoch": 2.3, "grad_norm": 0.73434978723526, "learning_rate": 7.78981181090986e-05, "loss": 2.7031, "step": 46850 }, { "epoch": 2.3, "grad_norm": 0.7167014479637146, "learning_rate": 7.788776857362157e-05, "loss": 2.9902, "step": 46851 }, { "epoch": 2.3, "grad_norm": 0.717901885509491, "learning_rate": 7.787741962314533e-05, "loss": 3.0099, "step": 46852 }, { "epoch": 2.3, "grad_norm": 0.7119569778442383, "learning_rate": 7.78670712576972e-05, "loss": 2.8717, "step": 46853 }, { "epoch": 2.3, "grad_norm": 0.7578343152999878, "learning_rate": 7.785672347730459e-05, "loss": 2.9561, "step": 46854 }, { "epoch": 2.3, "grad_norm": 0.741181492805481, "learning_rate": 7.784637628199448e-05, "loss": 2.934, "step": 46855 }, { "epoch": 2.3, "grad_norm": 0.7272793054580688, "learning_rate": 7.783602967179438e-05, "loss": 2.8121, "step": 46856 }, { "epoch": 2.3, "grad_norm": 0.7096351981163025, "learning_rate": 7.78256836467313e-05, "loss": 3.0762, "step": 46857 }, { "epoch": 2.3, "grad_norm": 0.703946590423584, "learning_rate": 7.781533820683272e-05, "loss": 2.9331, "step": 46858 }, { "epoch": 2.3, "grad_norm": 0.7070743441581726, "learning_rate": 7.780499335212576e-05, "loss": 2.8825, "step": 46859 }, { "epoch": 2.3, "grad_norm": 0.7188315987586975, "learning_rate": 7.779464908263762e-05, "loss": 2.9131, "step": 46860 }, { "epoch": 2.3, "grad_norm": 0.6855785846710205, "learning_rate": 7.778430539839569e-05, "loss": 2.9938, "step": 46861 }, { "epoch": 2.3, "grad_norm": 0.7094498872756958, "learning_rate": 7.777396229942705e-05, "loss": 2.9944, "step": 46862 }, { "epoch": 2.3, "grad_norm": 0.7395783066749573, "learning_rate": 7.776361978575902e-05, "loss": 2.9292, "step": 46863 }, { "epoch": 2.3, "grad_norm": 0.729633629322052, "learning_rate": 7.775327785741894e-05, "loss": 2.8902, "step": 46864 }, { "epoch": 2.3, "grad_norm": 0.7409253716468811, "learning_rate": 7.774293651443394e-05, "loss": 3.0206, "step": 46865 }, { "epoch": 2.3, "grad_norm": 0.7665625810623169, "learning_rate": 7.773259575683131e-05, "loss": 2.5951, "step": 46866 }, { "epoch": 2.3, "grad_norm": 0.7642672657966614, "learning_rate": 7.772225558463813e-05, "loss": 2.8159, "step": 46867 }, { "epoch": 2.3, "grad_norm": 0.7062388062477112, "learning_rate": 7.771191599788174e-05, "loss": 2.9574, "step": 46868 }, { "epoch": 2.3, "grad_norm": 0.7327240705490112, "learning_rate": 7.770157699658947e-05, "loss": 2.9558, "step": 46869 }, { "epoch": 2.3, "grad_norm": 0.7051011323928833, "learning_rate": 7.76912385807884e-05, "loss": 2.7563, "step": 46870 }, { "epoch": 2.3, "grad_norm": 0.721517026424408, "learning_rate": 7.768090075050588e-05, "loss": 2.9567, "step": 46871 }, { "epoch": 2.3, "grad_norm": 0.7337429523468018, "learning_rate": 7.767056350576911e-05, "loss": 2.8228, "step": 46872 }, { "epoch": 2.3, "grad_norm": 0.7145420908927917, "learning_rate": 7.76602268466052e-05, "loss": 2.8212, "step": 46873 }, { "epoch": 2.3, "grad_norm": 0.717994749546051, "learning_rate": 7.764989077304153e-05, "loss": 2.808, "step": 46874 }, { "epoch": 2.3, "grad_norm": 0.6848964095115662, "learning_rate": 7.763955528510516e-05, "loss": 3.0177, "step": 46875 }, { "epoch": 2.3, "grad_norm": 0.7062221765518188, "learning_rate": 7.762922038282352e-05, "loss": 2.721, "step": 46876 }, { "epoch": 2.3, "grad_norm": 0.7325926423072815, "learning_rate": 7.761888606622362e-05, "loss": 2.8309, "step": 46877 }, { "epoch": 2.3, "grad_norm": 0.7448124289512634, "learning_rate": 7.760855233533279e-05, "loss": 2.8649, "step": 46878 }, { "epoch": 2.3, "grad_norm": 0.6935537457466125, "learning_rate": 7.759821919017831e-05, "loss": 2.824, "step": 46879 }, { "epoch": 2.3, "grad_norm": 0.7377654314041138, "learning_rate": 7.75878866307873e-05, "loss": 2.9616, "step": 46880 }, { "epoch": 2.3, "grad_norm": 0.7462741732597351, "learning_rate": 7.757755465718703e-05, "loss": 2.763, "step": 46881 }, { "epoch": 2.3, "grad_norm": 0.7333121299743652, "learning_rate": 7.756722326940455e-05, "loss": 3.0852, "step": 46882 }, { "epoch": 2.3, "grad_norm": 0.7244988083839417, "learning_rate": 7.755689246746721e-05, "loss": 2.975, "step": 46883 }, { "epoch": 2.3, "grad_norm": 0.7320109605789185, "learning_rate": 7.754656225140229e-05, "loss": 2.8481, "step": 46884 }, { "epoch": 2.3, "grad_norm": 0.6962860226631165, "learning_rate": 7.753623262123683e-05, "loss": 2.9392, "step": 46885 }, { "epoch": 2.3, "grad_norm": 0.761470377445221, "learning_rate": 7.75259035769982e-05, "loss": 3.0863, "step": 46886 }, { "epoch": 2.3, "grad_norm": 0.6684213876724243, "learning_rate": 7.751557511871356e-05, "loss": 2.7991, "step": 46887 }, { "epoch": 2.3, "grad_norm": 0.6986576318740845, "learning_rate": 7.750524724640996e-05, "loss": 2.8605, "step": 46888 }, { "epoch": 2.3, "grad_norm": 0.7003403902053833, "learning_rate": 7.74949199601148e-05, "loss": 2.8998, "step": 46889 }, { "epoch": 2.3, "grad_norm": 0.7219340205192566, "learning_rate": 7.748459325985512e-05, "loss": 2.9807, "step": 46890 }, { "epoch": 2.3, "grad_norm": 0.7780990600585938, "learning_rate": 7.747426714565828e-05, "loss": 2.9845, "step": 46891 }, { "epoch": 2.3, "grad_norm": 0.7375266551971436, "learning_rate": 7.74639416175513e-05, "loss": 2.8539, "step": 46892 }, { "epoch": 2.3, "grad_norm": 0.702833890914917, "learning_rate": 7.745361667556159e-05, "loss": 2.7213, "step": 46893 }, { "epoch": 2.3, "grad_norm": 0.7389039397239685, "learning_rate": 7.744329231971621e-05, "loss": 3.0499, "step": 46894 }, { "epoch": 2.3, "grad_norm": 0.7284427285194397, "learning_rate": 7.743296855004224e-05, "loss": 2.8602, "step": 46895 }, { "epoch": 2.3, "grad_norm": 0.7277424335479736, "learning_rate": 7.742264536656713e-05, "loss": 2.7597, "step": 46896 }, { "epoch": 2.3, "grad_norm": 0.7396055459976196, "learning_rate": 7.741232276931785e-05, "loss": 2.9429, "step": 46897 }, { "epoch": 2.3, "grad_norm": 0.7248284220695496, "learning_rate": 7.740200075832164e-05, "loss": 2.803, "step": 46898 }, { "epoch": 2.3, "grad_norm": 0.7068817019462585, "learning_rate": 7.739167933360584e-05, "loss": 2.8822, "step": 46899 }, { "epoch": 2.3, "grad_norm": 0.724687933921814, "learning_rate": 7.73813584951974e-05, "loss": 2.7615, "step": 46900 }, { "epoch": 2.3, "grad_norm": 0.7012940645217896, "learning_rate": 7.73710382431237e-05, "loss": 3.0116, "step": 46901 }, { "epoch": 2.3, "grad_norm": 0.706241250038147, "learning_rate": 7.736071857741188e-05, "loss": 3.0091, "step": 46902 }, { "epoch": 2.3, "grad_norm": 0.8327338099479675, "learning_rate": 7.735039949808897e-05, "loss": 2.8047, "step": 46903 }, { "epoch": 2.3, "grad_norm": 0.7665473818778992, "learning_rate": 7.734008100518233e-05, "loss": 2.9935, "step": 46904 }, { "epoch": 2.3, "grad_norm": 0.6795613169670105, "learning_rate": 7.732976309871901e-05, "loss": 2.8637, "step": 46905 }, { "epoch": 2.3, "grad_norm": 0.7590937614440918, "learning_rate": 7.731944577872629e-05, "loss": 2.9044, "step": 46906 }, { "epoch": 2.3, "grad_norm": 0.6913984417915344, "learning_rate": 7.730912904523123e-05, "loss": 2.8735, "step": 46907 }, { "epoch": 2.3, "grad_norm": 0.7234129309654236, "learning_rate": 7.729881289826116e-05, "loss": 2.7942, "step": 46908 }, { "epoch": 2.3, "grad_norm": 0.7211498618125916, "learning_rate": 7.728849733784313e-05, "loss": 3.0061, "step": 46909 }, { "epoch": 2.3, "grad_norm": 0.7252943515777588, "learning_rate": 7.72781823640043e-05, "loss": 2.9296, "step": 46910 }, { "epoch": 2.3, "grad_norm": 0.7195742130279541, "learning_rate": 7.726786797677191e-05, "loss": 3.0563, "step": 46911 }, { "epoch": 2.3, "grad_norm": 0.7221367955207825, "learning_rate": 7.7257554176173e-05, "loss": 3.0016, "step": 46912 }, { "epoch": 2.3, "grad_norm": 0.7329986095428467, "learning_rate": 7.724724096223496e-05, "loss": 3.0884, "step": 46913 }, { "epoch": 2.3, "grad_norm": 0.7828996181488037, "learning_rate": 7.723692833498471e-05, "loss": 2.7201, "step": 46914 }, { "epoch": 2.3, "grad_norm": 0.7411604523658752, "learning_rate": 7.722661629444959e-05, "loss": 2.9872, "step": 46915 }, { "epoch": 2.3, "grad_norm": 0.7232510447502136, "learning_rate": 7.72163048406567e-05, "loss": 2.959, "step": 46916 }, { "epoch": 2.3, "grad_norm": 0.7410550713539124, "learning_rate": 7.720599397363309e-05, "loss": 2.7832, "step": 46917 }, { "epoch": 2.3, "grad_norm": 0.6873605251312256, "learning_rate": 7.719568369340611e-05, "loss": 2.9664, "step": 46918 }, { "epoch": 2.3, "grad_norm": 0.6807610392570496, "learning_rate": 7.718537400000275e-05, "loss": 2.8011, "step": 46919 }, { "epoch": 2.3, "grad_norm": 0.7145941257476807, "learning_rate": 7.71750648934502e-05, "loss": 2.9894, "step": 46920 }, { "epoch": 2.3, "grad_norm": 0.6965793371200562, "learning_rate": 7.716475637377575e-05, "loss": 3.0861, "step": 46921 }, { "epoch": 2.3, "grad_norm": 0.7429308295249939, "learning_rate": 7.715444844100637e-05, "loss": 2.9534, "step": 46922 }, { "epoch": 2.3, "grad_norm": 0.7118332386016846, "learning_rate": 7.714414109516937e-05, "loss": 3.0844, "step": 46923 }, { "epoch": 2.3, "grad_norm": 0.7275873422622681, "learning_rate": 7.71338343362918e-05, "loss": 2.7865, "step": 46924 }, { "epoch": 2.3, "grad_norm": 0.779687762260437, "learning_rate": 7.712352816440072e-05, "loss": 3.0541, "step": 46925 }, { "epoch": 2.3, "grad_norm": 0.7450523376464844, "learning_rate": 7.711322257952349e-05, "loss": 2.8133, "step": 46926 }, { "epoch": 2.3, "grad_norm": 0.725862979888916, "learning_rate": 7.7102917581687e-05, "loss": 2.8848, "step": 46927 }, { "epoch": 2.3, "grad_norm": 0.705909788608551, "learning_rate": 7.709261317091869e-05, "loss": 2.7166, "step": 46928 }, { "epoch": 2.3, "grad_norm": 0.7430562376976013, "learning_rate": 7.70823093472454e-05, "loss": 2.7686, "step": 46929 }, { "epoch": 2.3, "grad_norm": 0.717370867729187, "learning_rate": 7.70720061106945e-05, "loss": 2.9506, "step": 46930 }, { "epoch": 2.3, "grad_norm": 0.7363516688346863, "learning_rate": 7.706170346129303e-05, "loss": 2.9871, "step": 46931 }, { "epoch": 2.3, "grad_norm": 0.7221070528030396, "learning_rate": 7.705140139906806e-05, "loss": 2.7575, "step": 46932 }, { "epoch": 2.3, "grad_norm": 0.6913543939590454, "learning_rate": 7.704109992404684e-05, "loss": 3.0035, "step": 46933 }, { "epoch": 2.3, "grad_norm": 0.7119285464286804, "learning_rate": 7.703079903625639e-05, "loss": 2.9111, "step": 46934 }, { "epoch": 2.3, "grad_norm": 0.7075133323669434, "learning_rate": 7.702049873572392e-05, "loss": 2.7502, "step": 46935 }, { "epoch": 2.3, "grad_norm": 0.717917263507843, "learning_rate": 7.701019902247661e-05, "loss": 2.8735, "step": 46936 }, { "epoch": 2.3, "grad_norm": 0.735043466091156, "learning_rate": 7.699989989654145e-05, "loss": 2.7548, "step": 46937 }, { "epoch": 2.3, "grad_norm": 0.7249195575714111, "learning_rate": 7.698960135794581e-05, "loss": 2.8316, "step": 46938 }, { "epoch": 2.3, "grad_norm": 0.7049363255500793, "learning_rate": 7.697930340671641e-05, "loss": 3.0058, "step": 46939 }, { "epoch": 2.3, "grad_norm": 0.7325100302696228, "learning_rate": 7.696900604288066e-05, "loss": 2.9678, "step": 46940 }, { "epoch": 2.3, "grad_norm": 0.7083897590637207, "learning_rate": 7.695870926646573e-05, "loss": 2.813, "step": 46941 }, { "epoch": 2.3, "grad_norm": 0.7134562730789185, "learning_rate": 7.69484130774985e-05, "loss": 3.0571, "step": 46942 }, { "epoch": 2.3, "grad_norm": 0.7213225364685059, "learning_rate": 7.693811747600633e-05, "loss": 2.9821, "step": 46943 }, { "epoch": 2.3, "grad_norm": 0.6950119137763977, "learning_rate": 7.692782246201615e-05, "loss": 2.9263, "step": 46944 }, { "epoch": 2.3, "grad_norm": 0.6881087422370911, "learning_rate": 7.691752803555524e-05, "loss": 2.8272, "step": 46945 }, { "epoch": 2.3, "grad_norm": 0.7488217353820801, "learning_rate": 7.69072341966506e-05, "loss": 2.9328, "step": 46946 }, { "epoch": 2.3, "grad_norm": 0.7348080277442932, "learning_rate": 7.68969409453293e-05, "loss": 2.8666, "step": 46947 }, { "epoch": 2.3, "grad_norm": 0.738717257976532, "learning_rate": 7.688664828161861e-05, "loss": 2.9959, "step": 46948 }, { "epoch": 2.3, "grad_norm": 0.7138335108757019, "learning_rate": 7.687635620554549e-05, "loss": 3.0209, "step": 46949 }, { "epoch": 2.3, "grad_norm": 0.6965306997299194, "learning_rate": 7.686606471713708e-05, "loss": 2.7597, "step": 46950 }, { "epoch": 2.3, "grad_norm": 0.702285885810852, "learning_rate": 7.685577381642059e-05, "loss": 2.8853, "step": 46951 }, { "epoch": 2.3, "grad_norm": 0.7388809323310852, "learning_rate": 7.684548350342305e-05, "loss": 2.8097, "step": 46952 }, { "epoch": 2.3, "grad_norm": 0.7757939696311951, "learning_rate": 7.683519377817156e-05, "loss": 2.7593, "step": 46953 }, { "epoch": 2.3, "grad_norm": 0.7448767423629761, "learning_rate": 7.682490464069315e-05, "loss": 2.9256, "step": 46954 }, { "epoch": 2.3, "grad_norm": 0.7056995630264282, "learning_rate": 7.6814616091015e-05, "loss": 2.8403, "step": 46955 }, { "epoch": 2.3, "grad_norm": 0.6833332180976868, "learning_rate": 7.680432812916427e-05, "loss": 2.839, "step": 46956 }, { "epoch": 2.3, "grad_norm": 0.6968466639518738, "learning_rate": 7.679404075516786e-05, "loss": 2.8719, "step": 46957 }, { "epoch": 2.3, "grad_norm": 0.7767067551612854, "learning_rate": 7.678375396905314e-05, "loss": 3.0657, "step": 46958 }, { "epoch": 2.3, "grad_norm": 0.690322995185852, "learning_rate": 7.677346777084691e-05, "loss": 2.9365, "step": 46959 }, { "epoch": 2.3, "grad_norm": 0.7153850197792053, "learning_rate": 7.676318216057654e-05, "loss": 2.8756, "step": 46960 }, { "epoch": 2.3, "grad_norm": 0.6876060962677002, "learning_rate": 7.675289713826896e-05, "loss": 2.8716, "step": 46961 }, { "epoch": 2.3, "grad_norm": 0.6872727870941162, "learning_rate": 7.674261270395119e-05, "loss": 2.7886, "step": 46962 }, { "epoch": 2.3, "grad_norm": 0.7221353054046631, "learning_rate": 7.673232885765053e-05, "loss": 2.7449, "step": 46963 }, { "epoch": 2.3, "grad_norm": 0.7340434789657593, "learning_rate": 7.672204559939381e-05, "loss": 2.7857, "step": 46964 }, { "epoch": 2.3, "grad_norm": 0.7158043384552002, "learning_rate": 7.671176292920828e-05, "loss": 2.9561, "step": 46965 }, { "epoch": 2.3, "grad_norm": 0.68252032995224, "learning_rate": 7.67014808471211e-05, "loss": 2.8707, "step": 46966 }, { "epoch": 2.3, "grad_norm": 0.7790015339851379, "learning_rate": 7.669119935315923e-05, "loss": 2.9822, "step": 46967 }, { "epoch": 2.3, "grad_norm": 0.7351877689361572, "learning_rate": 7.668091844734975e-05, "loss": 2.7752, "step": 46968 }, { "epoch": 2.3, "grad_norm": 0.7354260683059692, "learning_rate": 7.667063812971968e-05, "loss": 2.8619, "step": 46969 }, { "epoch": 2.3, "grad_norm": 0.7240676283836365, "learning_rate": 7.666035840029615e-05, "loss": 2.9733, "step": 46970 }, { "epoch": 2.3, "grad_norm": 0.6971290111541748, "learning_rate": 7.665007925910635e-05, "loss": 2.8701, "step": 46971 }, { "epoch": 2.3, "grad_norm": 0.7421733140945435, "learning_rate": 7.663980070617721e-05, "loss": 2.8046, "step": 46972 }, { "epoch": 2.3, "grad_norm": 0.7057068347930908, "learning_rate": 7.66295227415359e-05, "loss": 2.9185, "step": 46973 }, { "epoch": 2.3, "grad_norm": 0.704230010509491, "learning_rate": 7.661924536520943e-05, "loss": 2.961, "step": 46974 }, { "epoch": 2.3, "grad_norm": 0.7011248469352722, "learning_rate": 7.66089685772248e-05, "loss": 3.1366, "step": 46975 }, { "epoch": 2.3, "grad_norm": 0.7493485808372498, "learning_rate": 7.659869237760925e-05, "loss": 2.9313, "step": 46976 }, { "epoch": 2.3, "grad_norm": 0.7271580696105957, "learning_rate": 7.658841676638963e-05, "loss": 2.659, "step": 46977 }, { "epoch": 2.3, "grad_norm": 0.7556811571121216, "learning_rate": 7.657814174359323e-05, "loss": 2.7887, "step": 46978 }, { "epoch": 2.3, "grad_norm": 0.786780595779419, "learning_rate": 7.656786730924692e-05, "loss": 2.8375, "step": 46979 }, { "epoch": 2.3, "grad_norm": 0.7101176381111145, "learning_rate": 7.655759346337788e-05, "loss": 3.0459, "step": 46980 }, { "epoch": 2.3, "grad_norm": 0.753944456577301, "learning_rate": 7.654732020601318e-05, "loss": 3.0886, "step": 46981 }, { "epoch": 2.3, "grad_norm": 0.6981664896011353, "learning_rate": 7.653704753717983e-05, "loss": 2.9098, "step": 46982 }, { "epoch": 2.3, "grad_norm": 0.7053401470184326, "learning_rate": 7.652677545690493e-05, "loss": 2.8841, "step": 46983 }, { "epoch": 2.3, "grad_norm": 0.7201017737388611, "learning_rate": 7.65165039652154e-05, "loss": 2.8831, "step": 46984 }, { "epoch": 2.3, "grad_norm": 0.7298591732978821, "learning_rate": 7.650623306213845e-05, "loss": 2.8965, "step": 46985 }, { "epoch": 2.3, "grad_norm": 0.7369044423103333, "learning_rate": 7.649596274770098e-05, "loss": 2.7928, "step": 46986 }, { "epoch": 2.3, "grad_norm": 0.7387498021125793, "learning_rate": 7.648569302193014e-05, "loss": 3.0365, "step": 46987 }, { "epoch": 2.3, "grad_norm": 0.7449101209640503, "learning_rate": 7.647542388485308e-05, "loss": 2.7906, "step": 46988 }, { "epoch": 2.3, "grad_norm": 0.7396485805511475, "learning_rate": 7.64651553364967e-05, "loss": 2.9116, "step": 46989 }, { "epoch": 2.3, "grad_norm": 0.6997805237770081, "learning_rate": 7.64548873768881e-05, "loss": 2.776, "step": 46990 }, { "epoch": 2.3, "grad_norm": 0.6954943537712097, "learning_rate": 7.644462000605423e-05, "loss": 2.9855, "step": 46991 }, { "epoch": 2.3, "grad_norm": 0.7475723028182983, "learning_rate": 7.64343532240222e-05, "loss": 3.1449, "step": 46992 }, { "epoch": 2.3, "grad_norm": 0.7167398929595947, "learning_rate": 7.642408703081911e-05, "loss": 2.9414, "step": 46993 }, { "epoch": 2.3, "grad_norm": 0.7235413789749146, "learning_rate": 7.64138214264719e-05, "loss": 3.0369, "step": 46994 }, { "epoch": 2.3, "grad_norm": 0.7205052971839905, "learning_rate": 7.640355641100773e-05, "loss": 2.9086, "step": 46995 }, { "epoch": 2.3, "grad_norm": 0.7066383957862854, "learning_rate": 7.639329198445354e-05, "loss": 3.0573, "step": 46996 }, { "epoch": 2.3, "grad_norm": 0.7013208270072937, "learning_rate": 7.638302814683632e-05, "loss": 2.8161, "step": 46997 }, { "epoch": 2.3, "grad_norm": 0.7191861271858215, "learning_rate": 7.637276489818325e-05, "loss": 2.9867, "step": 46998 }, { "epoch": 2.3, "grad_norm": 0.7596316337585449, "learning_rate": 7.63625022385212e-05, "loss": 2.9672, "step": 46999 }, { "epoch": 2.3, "grad_norm": 0.7377591729164124, "learning_rate": 7.635224016787734e-05, "loss": 3.1616, "step": 47000 }, { "epoch": 2.3, "grad_norm": 0.6754733324050903, "learning_rate": 7.634197868627858e-05, "loss": 2.9541, "step": 47001 }, { "epoch": 2.3, "grad_norm": 0.6994279026985168, "learning_rate": 7.633171779375199e-05, "loss": 2.7826, "step": 47002 }, { "epoch": 2.3, "grad_norm": 0.7444949746131897, "learning_rate": 7.632145749032468e-05, "loss": 2.913, "step": 47003 }, { "epoch": 2.3, "grad_norm": 0.7129213213920593, "learning_rate": 7.631119777602365e-05, "loss": 2.9534, "step": 47004 }, { "epoch": 2.3, "grad_norm": 0.7305833101272583, "learning_rate": 7.630093865087584e-05, "loss": 2.8632, "step": 47005 }, { "epoch": 2.3, "grad_norm": 0.7327616214752197, "learning_rate": 7.629068011490822e-05, "loss": 2.9634, "step": 47006 }, { "epoch": 2.3, "grad_norm": 0.7405668497085571, "learning_rate": 7.628042216814789e-05, "loss": 3.079, "step": 47007 }, { "epoch": 2.3, "grad_norm": 0.721222996711731, "learning_rate": 7.627016481062196e-05, "loss": 3.0155, "step": 47008 }, { "epoch": 2.3, "grad_norm": 0.7441813945770264, "learning_rate": 7.625990804235729e-05, "loss": 2.9115, "step": 47009 }, { "epoch": 2.3, "grad_norm": 0.7018464803695679, "learning_rate": 7.624965186338106e-05, "loss": 2.7319, "step": 47010 }, { "epoch": 2.3, "grad_norm": 0.7066758275032043, "learning_rate": 7.623939627372015e-05, "loss": 2.9433, "step": 47011 }, { "epoch": 2.3, "grad_norm": 0.7435897588729858, "learning_rate": 7.622914127340156e-05, "loss": 3.0037, "step": 47012 }, { "epoch": 2.3, "grad_norm": 0.7094926238059998, "learning_rate": 7.62188868624524e-05, "loss": 2.9102, "step": 47013 }, { "epoch": 2.3, "grad_norm": 0.7312250733375549, "learning_rate": 7.620863304089955e-05, "loss": 2.9911, "step": 47014 }, { "epoch": 2.3, "grad_norm": 0.7396236658096313, "learning_rate": 7.619837980877019e-05, "loss": 3.1673, "step": 47015 }, { "epoch": 2.3, "grad_norm": 0.7736823558807373, "learning_rate": 7.61881271660911e-05, "loss": 2.6794, "step": 47016 }, { "epoch": 2.3, "grad_norm": 0.7479914426803589, "learning_rate": 7.617787511288953e-05, "loss": 3.0262, "step": 47017 }, { "epoch": 2.3, "grad_norm": 0.7253764271736145, "learning_rate": 7.616762364919236e-05, "loss": 2.7085, "step": 47018 }, { "epoch": 2.3, "grad_norm": 0.6933542490005493, "learning_rate": 7.615737277502649e-05, "loss": 3.026, "step": 47019 }, { "epoch": 2.3, "grad_norm": 0.7153028845787048, "learning_rate": 7.614712249041912e-05, "loss": 3.0809, "step": 47020 }, { "epoch": 2.3, "grad_norm": 0.7277600765228271, "learning_rate": 7.613687279539706e-05, "loss": 3.0096, "step": 47021 }, { "epoch": 2.3, "grad_norm": 0.7183275818824768, "learning_rate": 7.612662368998736e-05, "loss": 2.9682, "step": 47022 }, { "epoch": 2.3, "grad_norm": 0.7380486726760864, "learning_rate": 7.611637517421717e-05, "loss": 2.8966, "step": 47023 }, { "epoch": 2.3, "grad_norm": 0.7355697154998779, "learning_rate": 7.61061272481133e-05, "loss": 3.1274, "step": 47024 }, { "epoch": 2.3, "grad_norm": 0.6961773633956909, "learning_rate": 7.609587991170284e-05, "loss": 2.9941, "step": 47025 }, { "epoch": 2.3, "grad_norm": 0.7682431936264038, "learning_rate": 7.608563316501276e-05, "loss": 2.8109, "step": 47026 }, { "epoch": 2.3, "grad_norm": 0.7720531225204468, "learning_rate": 7.607538700806994e-05, "loss": 2.9825, "step": 47027 }, { "epoch": 2.3, "grad_norm": 0.725392758846283, "learning_rate": 7.606514144090154e-05, "loss": 3.0161, "step": 47028 }, { "epoch": 2.3, "grad_norm": 0.7036164999008179, "learning_rate": 7.605489646353437e-05, "loss": 2.7043, "step": 47029 }, { "epoch": 2.3, "grad_norm": 0.734400749206543, "learning_rate": 7.604465207599561e-05, "loss": 3.0003, "step": 47030 }, { "epoch": 2.3, "grad_norm": 0.7298409342765808, "learning_rate": 7.603440827831203e-05, "loss": 2.6084, "step": 47031 }, { "epoch": 2.3, "grad_norm": 0.7258648872375488, "learning_rate": 7.60241650705108e-05, "loss": 2.769, "step": 47032 }, { "epoch": 2.3, "grad_norm": 0.676993191242218, "learning_rate": 7.601392245261883e-05, "loss": 2.8381, "step": 47033 }, { "epoch": 2.31, "grad_norm": 0.7960457801818848, "learning_rate": 7.600368042466298e-05, "loss": 2.8848, "step": 47034 }, { "epoch": 2.31, "grad_norm": 0.7140025496482849, "learning_rate": 7.599343898667042e-05, "loss": 2.9983, "step": 47035 }, { "epoch": 2.31, "grad_norm": 0.707834005355835, "learning_rate": 7.598319813866794e-05, "loss": 2.9884, "step": 47036 }, { "epoch": 2.31, "grad_norm": 0.8292667269706726, "learning_rate": 7.59729578806826e-05, "loss": 2.8562, "step": 47037 }, { "epoch": 2.31, "grad_norm": 0.6989453434944153, "learning_rate": 7.596271821274148e-05, "loss": 2.7451, "step": 47038 }, { "epoch": 2.31, "grad_norm": 0.7435256838798523, "learning_rate": 7.595247913487135e-05, "loss": 2.8466, "step": 47039 }, { "epoch": 2.31, "grad_norm": 0.7675233483314514, "learning_rate": 7.594224064709946e-05, "loss": 2.978, "step": 47040 }, { "epoch": 2.31, "grad_norm": 0.7073809504508972, "learning_rate": 7.593200274945238e-05, "loss": 2.8363, "step": 47041 }, { "epoch": 2.31, "grad_norm": 0.7317488789558411, "learning_rate": 7.592176544195729e-05, "loss": 2.9328, "step": 47042 }, { "epoch": 2.31, "grad_norm": 0.7460283041000366, "learning_rate": 7.591152872464123e-05, "loss": 2.8801, "step": 47043 }, { "epoch": 2.31, "grad_norm": 0.7187976241111755, "learning_rate": 7.5901292597531e-05, "loss": 2.9518, "step": 47044 }, { "epoch": 2.31, "grad_norm": 0.7074181437492371, "learning_rate": 7.589105706065371e-05, "loss": 2.8221, "step": 47045 }, { "epoch": 2.31, "grad_norm": 0.6958547830581665, "learning_rate": 7.588082211403617e-05, "loss": 2.9221, "step": 47046 }, { "epoch": 2.31, "grad_norm": 0.7111318707466125, "learning_rate": 7.58705877577055e-05, "loss": 2.8711, "step": 47047 }, { "epoch": 2.31, "grad_norm": 0.6999523639678955, "learning_rate": 7.586035399168856e-05, "loss": 2.839, "step": 47048 }, { "epoch": 2.31, "grad_norm": 0.726325511932373, "learning_rate": 7.585012081601222e-05, "loss": 2.83, "step": 47049 }, { "epoch": 2.31, "grad_norm": 0.7136778235435486, "learning_rate": 7.583988823070362e-05, "loss": 2.9703, "step": 47050 }, { "epoch": 2.31, "grad_norm": 0.7415741086006165, "learning_rate": 7.582965623578954e-05, "loss": 2.8102, "step": 47051 }, { "epoch": 2.31, "grad_norm": 0.83095782995224, "learning_rate": 7.581942483129697e-05, "loss": 2.9474, "step": 47052 }, { "epoch": 2.31, "grad_norm": 0.7691175937652588, "learning_rate": 7.580919401725303e-05, "loss": 2.9403, "step": 47053 }, { "epoch": 2.31, "grad_norm": 0.7467166185379028, "learning_rate": 7.579896379368449e-05, "loss": 3.0011, "step": 47054 }, { "epoch": 2.31, "grad_norm": 0.6988757252693176, "learning_rate": 7.578873416061833e-05, "loss": 3.056, "step": 47055 }, { "epoch": 2.31, "grad_norm": 0.666217029094696, "learning_rate": 7.577850511808139e-05, "loss": 2.89, "step": 47056 }, { "epoch": 2.31, "grad_norm": 0.7455030083656311, "learning_rate": 7.576827666610075e-05, "loss": 2.9968, "step": 47057 }, { "epoch": 2.31, "grad_norm": 0.742010235786438, "learning_rate": 7.575804880470338e-05, "loss": 3.046, "step": 47058 }, { "epoch": 2.31, "grad_norm": 0.7573196291923523, "learning_rate": 7.574782153391608e-05, "loss": 2.8716, "step": 47059 }, { "epoch": 2.31, "grad_norm": 0.725332498550415, "learning_rate": 7.573759485376593e-05, "loss": 2.8598, "step": 47060 }, { "epoch": 2.31, "grad_norm": 0.7262852787971497, "learning_rate": 7.572736876427973e-05, "loss": 2.8723, "step": 47061 }, { "epoch": 2.31, "grad_norm": 0.6897485852241516, "learning_rate": 7.571714326548454e-05, "loss": 2.8583, "step": 47062 }, { "epoch": 2.31, "grad_norm": 0.6986570954322815, "learning_rate": 7.570691835740725e-05, "loss": 2.6931, "step": 47063 }, { "epoch": 2.31, "grad_norm": 0.7163300514221191, "learning_rate": 7.569669404007471e-05, "loss": 3.034, "step": 47064 }, { "epoch": 2.31, "grad_norm": 0.7340909838676453, "learning_rate": 7.568647031351396e-05, "loss": 2.7613, "step": 47065 }, { "epoch": 2.31, "grad_norm": 0.75508713722229, "learning_rate": 7.567624717775179e-05, "loss": 3.004, "step": 47066 }, { "epoch": 2.31, "grad_norm": 0.6939140558242798, "learning_rate": 7.566602463281528e-05, "loss": 2.7456, "step": 47067 }, { "epoch": 2.31, "grad_norm": 0.7123984694480896, "learning_rate": 7.565580267873124e-05, "loss": 2.9987, "step": 47068 }, { "epoch": 2.31, "grad_norm": 0.7077834010124207, "learning_rate": 7.56455813155267e-05, "loss": 2.9786, "step": 47069 }, { "epoch": 2.31, "grad_norm": 0.7424761652946472, "learning_rate": 7.563536054322856e-05, "loss": 2.8784, "step": 47070 }, { "epoch": 2.31, "grad_norm": 0.7440283894538879, "learning_rate": 7.562514036186359e-05, "loss": 2.7986, "step": 47071 }, { "epoch": 2.31, "grad_norm": 0.7975735068321228, "learning_rate": 7.56149207714589e-05, "loss": 2.9229, "step": 47072 }, { "epoch": 2.31, "grad_norm": 0.6961278915405273, "learning_rate": 7.560470177204124e-05, "loss": 2.8992, "step": 47073 }, { "epoch": 2.31, "grad_norm": 0.6985488533973694, "learning_rate": 7.55944833636376e-05, "loss": 2.6921, "step": 47074 }, { "epoch": 2.31, "grad_norm": 0.7156649827957153, "learning_rate": 7.558426554627499e-05, "loss": 2.8174, "step": 47075 }, { "epoch": 2.31, "grad_norm": 0.776537299156189, "learning_rate": 7.557404831998028e-05, "loss": 2.9881, "step": 47076 }, { "epoch": 2.31, "grad_norm": 0.7213156819343567, "learning_rate": 7.55638316847803e-05, "loss": 2.8593, "step": 47077 }, { "epoch": 2.31, "grad_norm": 0.7126505374908447, "learning_rate": 7.55536156407019e-05, "loss": 2.7945, "step": 47078 }, { "epoch": 2.31, "grad_norm": 0.7194722890853882, "learning_rate": 7.554340018777207e-05, "loss": 2.8476, "step": 47079 }, { "epoch": 2.31, "grad_norm": 0.718202531337738, "learning_rate": 7.553318532601785e-05, "loss": 2.6935, "step": 47080 }, { "epoch": 2.31, "grad_norm": 0.7163863778114319, "learning_rate": 7.552297105546588e-05, "loss": 3.0641, "step": 47081 }, { "epoch": 2.31, "grad_norm": 0.7556397914886475, "learning_rate": 7.551275737614335e-05, "loss": 2.9546, "step": 47082 }, { "epoch": 2.31, "grad_norm": 0.7361882925033569, "learning_rate": 7.55025442880769e-05, "loss": 2.9487, "step": 47083 }, { "epoch": 2.31, "grad_norm": 0.7101148962974548, "learning_rate": 7.549233179129362e-05, "loss": 2.9951, "step": 47084 }, { "epoch": 2.31, "grad_norm": 0.7225397229194641, "learning_rate": 7.548211988582035e-05, "loss": 3.103, "step": 47085 }, { "epoch": 2.31, "grad_norm": 0.7092863917350769, "learning_rate": 7.547190857168387e-05, "loss": 2.7833, "step": 47086 }, { "epoch": 2.31, "grad_norm": 0.6932755708694458, "learning_rate": 7.546169784891127e-05, "loss": 2.8817, "step": 47087 }, { "epoch": 2.31, "grad_norm": 0.6869648098945618, "learning_rate": 7.545148771752925e-05, "loss": 2.8556, "step": 47088 }, { "epoch": 2.31, "grad_norm": 0.7577425837516785, "learning_rate": 7.54412781775648e-05, "loss": 2.88, "step": 47089 }, { "epoch": 2.31, "grad_norm": 0.7419918179512024, "learning_rate": 7.543106922904488e-05, "loss": 2.9787, "step": 47090 }, { "epoch": 2.31, "grad_norm": 0.7026674747467041, "learning_rate": 7.542086087199631e-05, "loss": 2.8453, "step": 47091 }, { "epoch": 2.31, "grad_norm": 0.7323837280273438, "learning_rate": 7.541065310644597e-05, "loss": 2.9609, "step": 47092 }, { "epoch": 2.31, "grad_norm": 0.6933581233024597, "learning_rate": 7.540044593242067e-05, "loss": 3.0251, "step": 47093 }, { "epoch": 2.31, "grad_norm": 0.7326129078865051, "learning_rate": 7.539023934994734e-05, "loss": 2.8222, "step": 47094 }, { "epoch": 2.31, "grad_norm": 0.7666764259338379, "learning_rate": 7.5380033359053e-05, "loss": 2.9111, "step": 47095 }, { "epoch": 2.31, "grad_norm": 0.7760982513427734, "learning_rate": 7.536982795976436e-05, "loss": 2.896, "step": 47096 }, { "epoch": 2.31, "grad_norm": 0.6980563998222351, "learning_rate": 7.535962315210843e-05, "loss": 2.9076, "step": 47097 }, { "epoch": 2.31, "grad_norm": 0.730495810508728, "learning_rate": 7.534941893611201e-05, "loss": 3.0254, "step": 47098 }, { "epoch": 2.31, "grad_norm": 0.6933460235595703, "learning_rate": 7.533921531180191e-05, "loss": 2.8862, "step": 47099 }, { "epoch": 2.31, "grad_norm": 0.7039830088615417, "learning_rate": 7.532901227920517e-05, "loss": 2.9391, "step": 47100 }, { "epoch": 2.31, "grad_norm": 0.7359967827796936, "learning_rate": 7.531880983834845e-05, "loss": 2.8176, "step": 47101 }, { "epoch": 2.31, "grad_norm": 0.6860753893852234, "learning_rate": 7.530860798925888e-05, "loss": 2.8287, "step": 47102 }, { "epoch": 2.31, "grad_norm": 0.7534465789794922, "learning_rate": 7.529840673196309e-05, "loss": 2.7622, "step": 47103 }, { "epoch": 2.31, "grad_norm": 0.7103568315505981, "learning_rate": 7.528820606648805e-05, "loss": 3.1858, "step": 47104 }, { "epoch": 2.31, "grad_norm": 0.6781308054924011, "learning_rate": 7.527800599286073e-05, "loss": 3.0361, "step": 47105 }, { "epoch": 2.31, "grad_norm": 0.7126346230506897, "learning_rate": 7.526780651110788e-05, "loss": 2.8852, "step": 47106 }, { "epoch": 2.31, "grad_norm": 0.7078046798706055, "learning_rate": 7.525760762125636e-05, "loss": 2.8481, "step": 47107 }, { "epoch": 2.31, "grad_norm": 0.7195985317230225, "learning_rate": 7.524740932333298e-05, "loss": 2.8783, "step": 47108 }, { "epoch": 2.31, "grad_norm": 0.7203580737113953, "learning_rate": 7.523721161736468e-05, "loss": 2.9165, "step": 47109 }, { "epoch": 2.31, "grad_norm": 0.7243421673774719, "learning_rate": 7.522701450337837e-05, "loss": 2.8335, "step": 47110 }, { "epoch": 2.31, "grad_norm": 0.7248387336730957, "learning_rate": 7.521681798140076e-05, "loss": 2.9192, "step": 47111 }, { "epoch": 2.31, "grad_norm": 0.7272186875343323, "learning_rate": 7.520662205145888e-05, "loss": 2.9508, "step": 47112 }, { "epoch": 2.31, "grad_norm": 0.6788175702095032, "learning_rate": 7.519642671357951e-05, "loss": 2.9344, "step": 47113 }, { "epoch": 2.31, "grad_norm": 0.7410783767700195, "learning_rate": 7.518623196778939e-05, "loss": 2.9244, "step": 47114 }, { "epoch": 2.31, "grad_norm": 0.7222927212715149, "learning_rate": 7.517603781411558e-05, "loss": 2.7248, "step": 47115 }, { "epoch": 2.31, "grad_norm": 0.7629684805870056, "learning_rate": 7.516584425258471e-05, "loss": 3.0434, "step": 47116 }, { "epoch": 2.31, "grad_norm": 0.762963056564331, "learning_rate": 7.515565128322382e-05, "loss": 3.1039, "step": 47117 }, { "epoch": 2.31, "grad_norm": 0.7189714908599854, "learning_rate": 7.51454589060596e-05, "loss": 2.8823, "step": 47118 }, { "epoch": 2.31, "grad_norm": 0.7012630105018616, "learning_rate": 7.513526712111904e-05, "loss": 2.8067, "step": 47119 }, { "epoch": 2.31, "grad_norm": 0.734362006187439, "learning_rate": 7.512507592842892e-05, "loss": 2.7973, "step": 47120 }, { "epoch": 2.31, "grad_norm": 0.7629452347755432, "learning_rate": 7.511488532801598e-05, "loss": 3.0645, "step": 47121 }, { "epoch": 2.31, "grad_norm": 0.7741067409515381, "learning_rate": 7.510469531990723e-05, "loss": 2.7464, "step": 47122 }, { "epoch": 2.31, "grad_norm": 0.7644525170326233, "learning_rate": 7.509450590412936e-05, "loss": 2.8866, "step": 47123 }, { "epoch": 2.31, "grad_norm": 0.7327432036399841, "learning_rate": 7.508431708070927e-05, "loss": 2.83, "step": 47124 }, { "epoch": 2.31, "grad_norm": 0.6900554299354553, "learning_rate": 7.507412884967387e-05, "loss": 2.9412, "step": 47125 }, { "epoch": 2.31, "grad_norm": 0.742673933506012, "learning_rate": 7.506394121104988e-05, "loss": 2.7973, "step": 47126 }, { "epoch": 2.31, "grad_norm": 0.7525250315666199, "learning_rate": 7.505375416486423e-05, "loss": 2.7082, "step": 47127 }, { "epoch": 2.31, "grad_norm": 0.6825203895568848, "learning_rate": 7.50435677111437e-05, "loss": 2.8795, "step": 47128 }, { "epoch": 2.31, "grad_norm": 0.6943187117576599, "learning_rate": 7.503338184991506e-05, "loss": 3.0387, "step": 47129 }, { "epoch": 2.31, "grad_norm": 0.781815767288208, "learning_rate": 7.502319658120526e-05, "loss": 3.0341, "step": 47130 }, { "epoch": 2.31, "grad_norm": 0.7488953471183777, "learning_rate": 7.5013011905041e-05, "loss": 2.9886, "step": 47131 }, { "epoch": 2.31, "grad_norm": 0.7378432750701904, "learning_rate": 7.500282782144923e-05, "loss": 2.9618, "step": 47132 }, { "epoch": 2.31, "grad_norm": 0.6865012049674988, "learning_rate": 7.499264433045663e-05, "loss": 2.6925, "step": 47133 }, { "epoch": 2.31, "grad_norm": 0.7527883648872375, "learning_rate": 7.498246143209022e-05, "loss": 2.9324, "step": 47134 }, { "epoch": 2.31, "grad_norm": 0.7522280216217041, "learning_rate": 7.497227912637666e-05, "loss": 3.007, "step": 47135 }, { "epoch": 2.31, "grad_norm": 0.7576622366905212, "learning_rate": 7.496209741334274e-05, "loss": 2.9856, "step": 47136 }, { "epoch": 2.31, "grad_norm": 0.7035020589828491, "learning_rate": 7.49519162930154e-05, "loss": 3.0091, "step": 47137 }, { "epoch": 2.31, "grad_norm": 0.7081121206283569, "learning_rate": 7.494173576542137e-05, "loss": 2.7836, "step": 47138 }, { "epoch": 2.31, "grad_norm": 0.7172082662582397, "learning_rate": 7.493155583058747e-05, "loss": 2.775, "step": 47139 }, { "epoch": 2.31, "grad_norm": 0.7473821043968201, "learning_rate": 7.492137648854062e-05, "loss": 3.1286, "step": 47140 }, { "epoch": 2.31, "grad_norm": 0.7130926847457886, "learning_rate": 7.491119773930754e-05, "loss": 2.9354, "step": 47141 }, { "epoch": 2.31, "grad_norm": 0.7745637893676758, "learning_rate": 7.490101958291504e-05, "loss": 2.7904, "step": 47142 }, { "epoch": 2.31, "grad_norm": 0.6880003213882446, "learning_rate": 7.489084201938985e-05, "loss": 2.8231, "step": 47143 }, { "epoch": 2.31, "grad_norm": 0.7113720178604126, "learning_rate": 7.488066504875897e-05, "loss": 2.9742, "step": 47144 }, { "epoch": 2.31, "grad_norm": 0.710329532623291, "learning_rate": 7.4870488671049e-05, "loss": 3.0068, "step": 47145 }, { "epoch": 2.31, "grad_norm": 0.7468894720077515, "learning_rate": 7.486031288628682e-05, "loss": 2.9647, "step": 47146 }, { "epoch": 2.31, "grad_norm": 0.7187853455543518, "learning_rate": 7.485013769449935e-05, "loss": 2.9264, "step": 47147 }, { "epoch": 2.31, "grad_norm": 0.6992723941802979, "learning_rate": 7.483996309571319e-05, "loss": 2.8219, "step": 47148 }, { "epoch": 2.31, "grad_norm": 0.7436049580574036, "learning_rate": 7.482978908995532e-05, "loss": 2.9799, "step": 47149 }, { "epoch": 2.31, "grad_norm": 0.7207313776016235, "learning_rate": 7.481961567725246e-05, "loss": 2.915, "step": 47150 }, { "epoch": 2.31, "grad_norm": 0.7421787977218628, "learning_rate": 7.480944285763127e-05, "loss": 2.8734, "step": 47151 }, { "epoch": 2.31, "grad_norm": 0.73797607421875, "learning_rate": 7.479927063111879e-05, "loss": 2.9639, "step": 47152 }, { "epoch": 2.31, "grad_norm": 0.7333713173866272, "learning_rate": 7.478909899774161e-05, "loss": 2.8489, "step": 47153 }, { "epoch": 2.31, "grad_norm": 0.717434823513031, "learning_rate": 7.477892795752669e-05, "loss": 2.7659, "step": 47154 }, { "epoch": 2.31, "grad_norm": 0.715705156326294, "learning_rate": 7.476875751050063e-05, "loss": 2.7922, "step": 47155 }, { "epoch": 2.31, "grad_norm": 0.7075499892234802, "learning_rate": 7.475858765669038e-05, "loss": 3.0544, "step": 47156 }, { "epoch": 2.31, "grad_norm": 0.7804867625236511, "learning_rate": 7.474841839612269e-05, "loss": 2.7703, "step": 47157 }, { "epoch": 2.31, "grad_norm": 0.7036874890327454, "learning_rate": 7.473824972882422e-05, "loss": 2.846, "step": 47158 }, { "epoch": 2.31, "grad_norm": 0.7310692071914673, "learning_rate": 7.472808165482195e-05, "loss": 2.9657, "step": 47159 }, { "epoch": 2.31, "grad_norm": 0.7222773432731628, "learning_rate": 7.471791417414246e-05, "loss": 3.0682, "step": 47160 }, { "epoch": 2.31, "grad_norm": 0.6930948495864868, "learning_rate": 7.47077472868126e-05, "loss": 2.6749, "step": 47161 }, { "epoch": 2.31, "grad_norm": 0.748211681842804, "learning_rate": 7.46975809928593e-05, "loss": 2.9393, "step": 47162 }, { "epoch": 2.31, "grad_norm": 0.7243354916572571, "learning_rate": 7.468741529230911e-05, "loss": 2.8963, "step": 47163 }, { "epoch": 2.31, "grad_norm": 0.7261446714401245, "learning_rate": 7.4677250185189e-05, "loss": 2.9313, "step": 47164 }, { "epoch": 2.31, "grad_norm": 0.7376651167869568, "learning_rate": 7.466708567152565e-05, "loss": 2.7539, "step": 47165 }, { "epoch": 2.31, "grad_norm": 0.7748557925224304, "learning_rate": 7.465692175134575e-05, "loss": 2.8347, "step": 47166 }, { "epoch": 2.31, "grad_norm": 0.6955622434616089, "learning_rate": 7.46467584246762e-05, "loss": 2.6828, "step": 47167 }, { "epoch": 2.31, "grad_norm": 0.7150453329086304, "learning_rate": 7.463659569154367e-05, "loss": 2.9824, "step": 47168 }, { "epoch": 2.31, "grad_norm": 0.7358688712120056, "learning_rate": 7.462643355197506e-05, "loss": 2.9723, "step": 47169 }, { "epoch": 2.31, "grad_norm": 0.7066740393638611, "learning_rate": 7.461627200599694e-05, "loss": 2.6598, "step": 47170 }, { "epoch": 2.31, "grad_norm": 0.7047446370124817, "learning_rate": 7.46061110536363e-05, "loss": 2.9277, "step": 47171 }, { "epoch": 2.31, "grad_norm": 0.7581452131271362, "learning_rate": 7.459595069491976e-05, "loss": 2.9114, "step": 47172 }, { "epoch": 2.31, "grad_norm": 0.7496849894523621, "learning_rate": 7.4585790929874e-05, "loss": 2.9983, "step": 47173 }, { "epoch": 2.31, "grad_norm": 0.7440602779388428, "learning_rate": 7.4575631758526e-05, "loss": 3.0909, "step": 47174 }, { "epoch": 2.31, "grad_norm": 0.7057591080665588, "learning_rate": 7.456547318090234e-05, "loss": 2.9252, "step": 47175 }, { "epoch": 2.31, "grad_norm": 0.7352858781814575, "learning_rate": 7.455531519702978e-05, "loss": 2.806, "step": 47176 }, { "epoch": 2.31, "grad_norm": 0.7194220423698425, "learning_rate": 7.454515780693527e-05, "loss": 2.9611, "step": 47177 }, { "epoch": 2.31, "grad_norm": 0.7085871696472168, "learning_rate": 7.453500101064542e-05, "loss": 3.1392, "step": 47178 }, { "epoch": 2.31, "grad_norm": 0.7426517605781555, "learning_rate": 7.452484480818694e-05, "loss": 2.7845, "step": 47179 }, { "epoch": 2.31, "grad_norm": 0.671974241733551, "learning_rate": 7.451468919958657e-05, "loss": 2.9442, "step": 47180 }, { "epoch": 2.31, "grad_norm": 0.6885994672775269, "learning_rate": 7.450453418487112e-05, "loss": 2.8299, "step": 47181 }, { "epoch": 2.31, "grad_norm": 0.7266910672187805, "learning_rate": 7.44943797640674e-05, "loss": 3.0384, "step": 47182 }, { "epoch": 2.31, "grad_norm": 0.7851859927177429, "learning_rate": 7.448422593720203e-05, "loss": 2.8663, "step": 47183 }, { "epoch": 2.31, "grad_norm": 0.704609751701355, "learning_rate": 7.447407270430185e-05, "loss": 2.9252, "step": 47184 }, { "epoch": 2.31, "grad_norm": 0.705119252204895, "learning_rate": 7.446392006539351e-05, "loss": 3.0968, "step": 47185 }, { "epoch": 2.31, "grad_norm": 0.739789605140686, "learning_rate": 7.445376802050387e-05, "loss": 2.7645, "step": 47186 }, { "epoch": 2.31, "grad_norm": 0.7058214545249939, "learning_rate": 7.444361656965956e-05, "loss": 2.7778, "step": 47187 }, { "epoch": 2.31, "grad_norm": 0.6922550201416016, "learning_rate": 7.443346571288733e-05, "loss": 3.1309, "step": 47188 }, { "epoch": 2.31, "grad_norm": 0.7196448445320129, "learning_rate": 7.4423315450214e-05, "loss": 2.9757, "step": 47189 }, { "epoch": 2.31, "grad_norm": 0.7384520173072815, "learning_rate": 7.441316578166617e-05, "loss": 2.8736, "step": 47190 }, { "epoch": 2.31, "grad_norm": 0.8098287582397461, "learning_rate": 7.440301670727062e-05, "loss": 2.8157, "step": 47191 }, { "epoch": 2.31, "grad_norm": 0.7096481919288635, "learning_rate": 7.439286822705423e-05, "loss": 2.9116, "step": 47192 }, { "epoch": 2.31, "grad_norm": 0.7278345823287964, "learning_rate": 7.438272034104356e-05, "loss": 2.9966, "step": 47193 }, { "epoch": 2.31, "grad_norm": 0.7431257367134094, "learning_rate": 7.437257304926541e-05, "loss": 2.9264, "step": 47194 }, { "epoch": 2.31, "grad_norm": 0.7055538892745972, "learning_rate": 7.436242635174638e-05, "loss": 2.9575, "step": 47195 }, { "epoch": 2.31, "grad_norm": 0.7002086639404297, "learning_rate": 7.43522802485133e-05, "loss": 2.7348, "step": 47196 }, { "epoch": 2.31, "grad_norm": 0.7327378988265991, "learning_rate": 7.434213473959299e-05, "loss": 2.9672, "step": 47197 }, { "epoch": 2.31, "grad_norm": 0.6708829402923584, "learning_rate": 7.4331989825012e-05, "loss": 2.9387, "step": 47198 }, { "epoch": 2.31, "grad_norm": 0.6982355117797852, "learning_rate": 7.432184550479715e-05, "loss": 2.7249, "step": 47199 }, { "epoch": 2.31, "grad_norm": 0.719652533531189, "learning_rate": 7.431170177897514e-05, "loss": 3.0177, "step": 47200 }, { "epoch": 2.31, "grad_norm": 0.7651442885398865, "learning_rate": 7.430155864757261e-05, "loss": 3.0903, "step": 47201 }, { "epoch": 2.31, "grad_norm": 0.7121278047561646, "learning_rate": 7.42914161106164e-05, "loss": 2.9088, "step": 47202 }, { "epoch": 2.31, "grad_norm": 0.6944317817687988, "learning_rate": 7.42812741681331e-05, "loss": 3.1477, "step": 47203 }, { "epoch": 2.31, "grad_norm": 0.7282336950302124, "learning_rate": 7.427113282014955e-05, "loss": 2.9578, "step": 47204 }, { "epoch": 2.31, "grad_norm": 0.6893202662467957, "learning_rate": 7.426099206669234e-05, "loss": 2.8417, "step": 47205 }, { "epoch": 2.31, "grad_norm": 0.7046981453895569, "learning_rate": 7.425085190778818e-05, "loss": 2.9142, "step": 47206 }, { "epoch": 2.31, "grad_norm": 0.694952130317688, "learning_rate": 7.424071234346395e-05, "loss": 3.0216, "step": 47207 }, { "epoch": 2.31, "grad_norm": 0.7959489822387695, "learning_rate": 7.423057337374623e-05, "loss": 2.7588, "step": 47208 }, { "epoch": 2.31, "grad_norm": 0.8016543388366699, "learning_rate": 7.422043499866174e-05, "loss": 2.9142, "step": 47209 }, { "epoch": 2.31, "grad_norm": 0.7541260719299316, "learning_rate": 7.421029721823706e-05, "loss": 2.8306, "step": 47210 }, { "epoch": 2.31, "grad_norm": 0.7044775485992432, "learning_rate": 7.420016003249904e-05, "loss": 2.8552, "step": 47211 }, { "epoch": 2.31, "grad_norm": 0.704594075679779, "learning_rate": 7.419002344147441e-05, "loss": 3.0789, "step": 47212 }, { "epoch": 2.31, "grad_norm": 0.7283633351325989, "learning_rate": 7.417988744518968e-05, "loss": 2.8004, "step": 47213 }, { "epoch": 2.31, "grad_norm": 0.9223310351371765, "learning_rate": 7.416975204367182e-05, "loss": 2.9323, "step": 47214 }, { "epoch": 2.31, "grad_norm": 0.7180284857749939, "learning_rate": 7.41596172369473e-05, "loss": 3.002, "step": 47215 }, { "epoch": 2.31, "grad_norm": 0.7672765254974365, "learning_rate": 7.414948302504284e-05, "loss": 3.1372, "step": 47216 }, { "epoch": 2.31, "grad_norm": 0.7458218932151794, "learning_rate": 7.413934940798525e-05, "loss": 2.8517, "step": 47217 }, { "epoch": 2.31, "grad_norm": 0.6984786987304688, "learning_rate": 7.412921638580106e-05, "loss": 2.946, "step": 47218 }, { "epoch": 2.31, "grad_norm": 0.7315978407859802, "learning_rate": 7.411908395851715e-05, "loss": 2.9058, "step": 47219 }, { "epoch": 2.31, "grad_norm": 0.7283531427383423, "learning_rate": 7.410895212616e-05, "loss": 3.0075, "step": 47220 }, { "epoch": 2.31, "grad_norm": 0.7277526259422302, "learning_rate": 7.409882088875649e-05, "loss": 2.9993, "step": 47221 }, { "epoch": 2.31, "grad_norm": 0.7374258637428284, "learning_rate": 7.408869024633321e-05, "loss": 2.9424, "step": 47222 }, { "epoch": 2.31, "grad_norm": 0.7135753035545349, "learning_rate": 7.407856019891673e-05, "loss": 2.9005, "step": 47223 }, { "epoch": 2.31, "grad_norm": 0.7380639910697937, "learning_rate": 7.406843074653395e-05, "loss": 3.0794, "step": 47224 }, { "epoch": 2.31, "grad_norm": 0.7215073108673096, "learning_rate": 7.405830188921134e-05, "loss": 3.0077, "step": 47225 }, { "epoch": 2.31, "grad_norm": 0.7313400506973267, "learning_rate": 7.404817362697579e-05, "loss": 3.0572, "step": 47226 }, { "epoch": 2.31, "grad_norm": 0.7082253694534302, "learning_rate": 7.403804595985377e-05, "loss": 2.9114, "step": 47227 }, { "epoch": 2.31, "grad_norm": 0.7084292769432068, "learning_rate": 7.402791888787204e-05, "loss": 2.8432, "step": 47228 }, { "epoch": 2.31, "grad_norm": 0.6946665048599243, "learning_rate": 7.401779241105739e-05, "loss": 2.9464, "step": 47229 }, { "epoch": 2.31, "grad_norm": 0.697638988494873, "learning_rate": 7.400766652943636e-05, "loss": 2.7472, "step": 47230 }, { "epoch": 2.31, "grad_norm": 0.7209494709968567, "learning_rate": 7.399754124303568e-05, "loss": 2.576, "step": 47231 }, { "epoch": 2.31, "grad_norm": 0.7583011984825134, "learning_rate": 7.398741655188184e-05, "loss": 3.1185, "step": 47232 }, { "epoch": 2.31, "grad_norm": 0.8109094500541687, "learning_rate": 7.39772924560017e-05, "loss": 3.1152, "step": 47233 }, { "epoch": 2.31, "grad_norm": 0.7156438231468201, "learning_rate": 7.396716895542191e-05, "loss": 2.9722, "step": 47234 }, { "epoch": 2.31, "grad_norm": 0.712462842464447, "learning_rate": 7.395704605016903e-05, "loss": 2.8642, "step": 47235 }, { "epoch": 2.31, "grad_norm": 0.6899236440658569, "learning_rate": 7.394692374026989e-05, "loss": 2.9877, "step": 47236 }, { "epoch": 2.31, "grad_norm": 0.7041913270950317, "learning_rate": 7.393680202575106e-05, "loss": 2.9676, "step": 47237 }, { "epoch": 2.32, "grad_norm": 0.7307323217391968, "learning_rate": 7.392668090663905e-05, "loss": 2.9229, "step": 47238 }, { "epoch": 2.32, "grad_norm": 0.7059488892555237, "learning_rate": 7.39165603829608e-05, "loss": 3.1652, "step": 47239 }, { "epoch": 2.32, "grad_norm": 0.723430335521698, "learning_rate": 7.39064404547427e-05, "loss": 2.8094, "step": 47240 }, { "epoch": 2.32, "grad_norm": 0.700806200504303, "learning_rate": 7.38963211220116e-05, "loss": 2.7546, "step": 47241 }, { "epoch": 2.32, "grad_norm": 0.7575515508651733, "learning_rate": 7.388620238479399e-05, "loss": 2.9442, "step": 47242 }, { "epoch": 2.32, "grad_norm": 0.7537017464637756, "learning_rate": 7.387608424311671e-05, "loss": 2.818, "step": 47243 }, { "epoch": 2.32, "grad_norm": 0.7906556129455566, "learning_rate": 7.38659666970063e-05, "loss": 2.6717, "step": 47244 }, { "epoch": 2.32, "grad_norm": 0.7230514287948608, "learning_rate": 7.385584974648932e-05, "loss": 2.937, "step": 47245 }, { "epoch": 2.32, "grad_norm": 0.7493695020675659, "learning_rate": 7.384573339159261e-05, "loss": 2.8982, "step": 47246 }, { "epoch": 2.32, "grad_norm": 0.6845947504043579, "learning_rate": 7.383561763234262e-05, "loss": 2.7624, "step": 47247 }, { "epoch": 2.32, "grad_norm": 0.6852810382843018, "learning_rate": 7.382550246876609e-05, "loss": 2.8006, "step": 47248 }, { "epoch": 2.32, "grad_norm": 0.7320659756660461, "learning_rate": 7.381538790088974e-05, "loss": 2.9585, "step": 47249 }, { "epoch": 2.32, "grad_norm": 0.704412579536438, "learning_rate": 7.380527392874004e-05, "loss": 2.7925, "step": 47250 }, { "epoch": 2.32, "grad_norm": 0.7940717935562134, "learning_rate": 7.379516055234381e-05, "loss": 3.0233, "step": 47251 }, { "epoch": 2.32, "grad_norm": 0.7513147592544556, "learning_rate": 7.378504777172757e-05, "loss": 3.0405, "step": 47252 }, { "epoch": 2.32, "grad_norm": 0.7274273633956909, "learning_rate": 7.377493558691793e-05, "loss": 3.1427, "step": 47253 }, { "epoch": 2.32, "grad_norm": 0.7024219632148743, "learning_rate": 7.376482399794164e-05, "loss": 2.915, "step": 47254 }, { "epoch": 2.32, "grad_norm": 0.7163192629814148, "learning_rate": 7.375471300482517e-05, "loss": 3.0311, "step": 47255 }, { "epoch": 2.32, "grad_norm": 0.685631513595581, "learning_rate": 7.374460260759531e-05, "loss": 2.7272, "step": 47256 }, { "epoch": 2.32, "grad_norm": 0.7799549102783203, "learning_rate": 7.373449280627858e-05, "loss": 2.7492, "step": 47257 }, { "epoch": 2.32, "grad_norm": 0.6964437365531921, "learning_rate": 7.372438360090174e-05, "loss": 2.9227, "step": 47258 }, { "epoch": 2.32, "grad_norm": 0.7885956764221191, "learning_rate": 7.371427499149127e-05, "loss": 2.8981, "step": 47259 }, { "epoch": 2.32, "grad_norm": 0.7466039657592773, "learning_rate": 7.370416697807384e-05, "loss": 2.7457, "step": 47260 }, { "epoch": 2.32, "grad_norm": 0.7249606251716614, "learning_rate": 7.369405956067612e-05, "loss": 2.8658, "step": 47261 }, { "epoch": 2.32, "grad_norm": 0.7615867853164673, "learning_rate": 7.368395273932459e-05, "loss": 2.8922, "step": 47262 }, { "epoch": 2.32, "grad_norm": 0.6936567425727844, "learning_rate": 7.367384651404602e-05, "loss": 2.9543, "step": 47263 }, { "epoch": 2.32, "grad_norm": 0.7578475475311279, "learning_rate": 7.366374088486705e-05, "loss": 2.954, "step": 47264 }, { "epoch": 2.32, "grad_norm": 0.7116115689277649, "learning_rate": 7.365363585181413e-05, "loss": 3.0167, "step": 47265 }, { "epoch": 2.32, "grad_norm": 0.7062137722969055, "learning_rate": 7.364353141491413e-05, "loss": 2.9352, "step": 47266 }, { "epoch": 2.32, "grad_norm": 0.7953941226005554, "learning_rate": 7.363342757419338e-05, "loss": 2.9842, "step": 47267 }, { "epoch": 2.32, "grad_norm": 0.7345401644706726, "learning_rate": 7.362332432967857e-05, "loss": 2.936, "step": 47268 }, { "epoch": 2.32, "grad_norm": 0.768124520778656, "learning_rate": 7.361322168139647e-05, "loss": 2.9616, "step": 47269 }, { "epoch": 2.32, "grad_norm": 0.7298167943954468, "learning_rate": 7.360311962937347e-05, "loss": 3.0135, "step": 47270 }, { "epoch": 2.32, "grad_norm": 0.6858828067779541, "learning_rate": 7.35930181736364e-05, "loss": 3.1113, "step": 47271 }, { "epoch": 2.32, "grad_norm": 0.7418141961097717, "learning_rate": 7.358291731421165e-05, "loss": 2.8592, "step": 47272 }, { "epoch": 2.32, "grad_norm": 0.7051644921302795, "learning_rate": 7.357281705112598e-05, "loss": 2.9234, "step": 47273 }, { "epoch": 2.32, "grad_norm": 0.7438302040100098, "learning_rate": 7.356271738440597e-05, "loss": 2.9264, "step": 47274 }, { "epoch": 2.32, "grad_norm": 0.700322151184082, "learning_rate": 7.355261831407807e-05, "loss": 2.9011, "step": 47275 }, { "epoch": 2.32, "grad_norm": 0.7566295862197876, "learning_rate": 7.354251984016907e-05, "loss": 3.0113, "step": 47276 }, { "epoch": 2.32, "grad_norm": 0.7644765377044678, "learning_rate": 7.353242196270545e-05, "loss": 2.7803, "step": 47277 }, { "epoch": 2.32, "grad_norm": 0.7297467589378357, "learning_rate": 7.352232468171381e-05, "loss": 2.927, "step": 47278 }, { "epoch": 2.32, "grad_norm": 0.7533126473426819, "learning_rate": 7.351222799722087e-05, "loss": 2.7635, "step": 47279 }, { "epoch": 2.32, "grad_norm": 0.7412834763526917, "learning_rate": 7.350213190925316e-05, "loss": 2.8367, "step": 47280 }, { "epoch": 2.32, "grad_norm": 0.7042602300643921, "learning_rate": 7.349203641783722e-05, "loss": 2.9655, "step": 47281 }, { "epoch": 2.32, "grad_norm": 0.7818167209625244, "learning_rate": 7.348194152299956e-05, "loss": 2.9304, "step": 47282 }, { "epoch": 2.32, "grad_norm": 0.6732924580574036, "learning_rate": 7.34718472247669e-05, "loss": 2.804, "step": 47283 }, { "epoch": 2.32, "grad_norm": 0.7412101030349731, "learning_rate": 7.346175352316587e-05, "loss": 2.9378, "step": 47284 }, { "epoch": 2.32, "grad_norm": 0.7184247374534607, "learning_rate": 7.345166041822291e-05, "loss": 2.9313, "step": 47285 }, { "epoch": 2.32, "grad_norm": 0.7510634064674377, "learning_rate": 7.344156790996476e-05, "loss": 2.8656, "step": 47286 }, { "epoch": 2.32, "grad_norm": 0.6889684200286865, "learning_rate": 7.343147599841783e-05, "loss": 2.844, "step": 47287 }, { "epoch": 2.32, "grad_norm": 0.688102126121521, "learning_rate": 7.342138468360885e-05, "loss": 2.9992, "step": 47288 }, { "epoch": 2.32, "grad_norm": 0.7358808517456055, "learning_rate": 7.341129396556437e-05, "loss": 3.0776, "step": 47289 }, { "epoch": 2.32, "grad_norm": 0.7751854062080383, "learning_rate": 7.340120384431084e-05, "loss": 2.8425, "step": 47290 }, { "epoch": 2.32, "grad_norm": 0.7157254815101624, "learning_rate": 7.339111431987501e-05, "loss": 2.9434, "step": 47291 }, { "epoch": 2.32, "grad_norm": 0.703119695186615, "learning_rate": 7.338102539228328e-05, "loss": 2.9188, "step": 47292 }, { "epoch": 2.32, "grad_norm": 0.6870105862617493, "learning_rate": 7.337093706156232e-05, "loss": 3.0188, "step": 47293 }, { "epoch": 2.32, "grad_norm": 0.753438413143158, "learning_rate": 7.336084932773878e-05, "loss": 2.8878, "step": 47294 }, { "epoch": 2.32, "grad_norm": 0.715949535369873, "learning_rate": 7.335076219083915e-05, "loss": 2.9588, "step": 47295 }, { "epoch": 2.32, "grad_norm": 0.6934475898742676, "learning_rate": 7.334067565089001e-05, "loss": 2.8501, "step": 47296 }, { "epoch": 2.32, "grad_norm": 0.696337878704071, "learning_rate": 7.333058970791781e-05, "loss": 2.8632, "step": 47297 }, { "epoch": 2.32, "grad_norm": 0.7058655023574829, "learning_rate": 7.332050436194928e-05, "loss": 3.0071, "step": 47298 }, { "epoch": 2.32, "grad_norm": 0.7588328719139099, "learning_rate": 7.331041961301086e-05, "loss": 2.8746, "step": 47299 }, { "epoch": 2.32, "grad_norm": 0.7144933938980103, "learning_rate": 7.330033546112915e-05, "loss": 2.7623, "step": 47300 }, { "epoch": 2.32, "grad_norm": 0.7363554239273071, "learning_rate": 7.329025190633081e-05, "loss": 2.9335, "step": 47301 }, { "epoch": 2.32, "grad_norm": 0.7441504597663879, "learning_rate": 7.328016894864232e-05, "loss": 2.7166, "step": 47302 }, { "epoch": 2.32, "grad_norm": 0.7541495561599731, "learning_rate": 7.327008658809025e-05, "loss": 2.8355, "step": 47303 }, { "epoch": 2.32, "grad_norm": 0.7827197909355164, "learning_rate": 7.326000482470102e-05, "loss": 3.0587, "step": 47304 }, { "epoch": 2.32, "grad_norm": 0.7094131112098694, "learning_rate": 7.324992365850133e-05, "loss": 2.8584, "step": 47305 }, { "epoch": 2.32, "grad_norm": 0.7611328959465027, "learning_rate": 7.323984308951778e-05, "loss": 2.8228, "step": 47306 }, { "epoch": 2.32, "grad_norm": 0.6947973370552063, "learning_rate": 7.322976311777675e-05, "loss": 2.6941, "step": 47307 }, { "epoch": 2.32, "grad_norm": 0.7067661881446838, "learning_rate": 7.321968374330495e-05, "loss": 3.1039, "step": 47308 }, { "epoch": 2.32, "grad_norm": 0.7562273144721985, "learning_rate": 7.320960496612881e-05, "loss": 2.8796, "step": 47309 }, { "epoch": 2.32, "grad_norm": 0.7289214134216309, "learning_rate": 7.319952678627499e-05, "loss": 3.0467, "step": 47310 }, { "epoch": 2.32, "grad_norm": 0.7128106951713562, "learning_rate": 7.318944920376997e-05, "loss": 2.882, "step": 47311 }, { "epoch": 2.32, "grad_norm": 0.7181450128555298, "learning_rate": 7.31793722186402e-05, "loss": 3.0642, "step": 47312 }, { "epoch": 2.32, "grad_norm": 0.7351670265197754, "learning_rate": 7.316929583091238e-05, "loss": 2.862, "step": 47313 }, { "epoch": 2.32, "grad_norm": 0.693118691444397, "learning_rate": 7.315922004061292e-05, "loss": 2.9532, "step": 47314 }, { "epoch": 2.32, "grad_norm": 0.7117541432380676, "learning_rate": 7.314914484776841e-05, "loss": 2.8341, "step": 47315 }, { "epoch": 2.32, "grad_norm": 0.690302312374115, "learning_rate": 7.313907025240549e-05, "loss": 2.7515, "step": 47316 }, { "epoch": 2.32, "grad_norm": 0.7544472217559814, "learning_rate": 7.312899625455057e-05, "loss": 2.8819, "step": 47317 }, { "epoch": 2.32, "grad_norm": 0.7129111886024475, "learning_rate": 7.311892285423025e-05, "loss": 2.878, "step": 47318 }, { "epoch": 2.32, "grad_norm": 0.7087661623954773, "learning_rate": 7.31088500514709e-05, "loss": 2.8482, "step": 47319 }, { "epoch": 2.32, "grad_norm": 0.7653520703315735, "learning_rate": 7.30987778462992e-05, "loss": 2.7622, "step": 47320 }, { "epoch": 2.32, "grad_norm": 0.6924532055854797, "learning_rate": 7.308870623874171e-05, "loss": 2.8243, "step": 47321 }, { "epoch": 2.32, "grad_norm": 0.7128939032554626, "learning_rate": 7.307863522882483e-05, "loss": 2.8937, "step": 47322 }, { "epoch": 2.32, "grad_norm": 0.6976691484451294, "learning_rate": 7.306856481657526e-05, "loss": 2.7315, "step": 47323 }, { "epoch": 2.32, "grad_norm": 0.7400538921356201, "learning_rate": 7.30584950020194e-05, "loss": 2.7653, "step": 47324 }, { "epoch": 2.32, "grad_norm": 0.734775960445404, "learning_rate": 7.30484257851837e-05, "loss": 3.0132, "step": 47325 }, { "epoch": 2.32, "grad_norm": 0.7255701422691345, "learning_rate": 7.303835716609482e-05, "loss": 2.882, "step": 47326 }, { "epoch": 2.32, "grad_norm": 0.7620897889137268, "learning_rate": 7.302828914477918e-05, "loss": 2.9522, "step": 47327 }, { "epoch": 2.32, "grad_norm": 0.8050826787948608, "learning_rate": 7.301822172126344e-05, "loss": 2.9349, "step": 47328 }, { "epoch": 2.32, "grad_norm": 0.737463116645813, "learning_rate": 7.300815489557394e-05, "loss": 2.7589, "step": 47329 }, { "epoch": 2.32, "grad_norm": 0.7167901992797852, "learning_rate": 7.299808866773722e-05, "loss": 3.015, "step": 47330 }, { "epoch": 2.32, "grad_norm": 0.7121860384941101, "learning_rate": 7.298802303777999e-05, "loss": 2.9854, "step": 47331 }, { "epoch": 2.32, "grad_norm": 0.7176076769828796, "learning_rate": 7.297795800572858e-05, "loss": 2.8822, "step": 47332 }, { "epoch": 2.32, "grad_norm": 0.7200085520744324, "learning_rate": 7.296789357160958e-05, "loss": 3.0043, "step": 47333 }, { "epoch": 2.32, "grad_norm": 0.7072357535362244, "learning_rate": 7.295782973544932e-05, "loss": 2.8592, "step": 47334 }, { "epoch": 2.32, "grad_norm": 0.7582478523254395, "learning_rate": 7.294776649727446e-05, "loss": 3.0922, "step": 47335 }, { "epoch": 2.32, "grad_norm": 0.767278254032135, "learning_rate": 7.29377038571116e-05, "loss": 2.7644, "step": 47336 }, { "epoch": 2.32, "grad_norm": 0.7298552989959717, "learning_rate": 7.2927641814987e-05, "loss": 2.8319, "step": 47337 }, { "epoch": 2.32, "grad_norm": 0.6883513331413269, "learning_rate": 7.291758037092739e-05, "loss": 3.0519, "step": 47338 }, { "epoch": 2.32, "grad_norm": 0.7653185725212097, "learning_rate": 7.290751952495918e-05, "loss": 3.0538, "step": 47339 }, { "epoch": 2.32, "grad_norm": 0.7106703519821167, "learning_rate": 7.289745927710878e-05, "loss": 2.7821, "step": 47340 }, { "epoch": 2.32, "grad_norm": 0.7490907311439514, "learning_rate": 7.288739962740285e-05, "loss": 3.0965, "step": 47341 }, { "epoch": 2.32, "grad_norm": 0.7440788149833679, "learning_rate": 7.287734057586768e-05, "loss": 2.9511, "step": 47342 }, { "epoch": 2.32, "grad_norm": 0.7107495665550232, "learning_rate": 7.286728212252997e-05, "loss": 2.9284, "step": 47343 }, { "epoch": 2.32, "grad_norm": 0.7320402264595032, "learning_rate": 7.285722426741608e-05, "loss": 2.8895, "step": 47344 }, { "epoch": 2.32, "grad_norm": 0.7153285145759583, "learning_rate": 7.284716701055263e-05, "loss": 2.7128, "step": 47345 }, { "epoch": 2.32, "grad_norm": 0.7777250409126282, "learning_rate": 7.283711035196602e-05, "loss": 2.8695, "step": 47346 }, { "epoch": 2.32, "grad_norm": 0.7311616539955139, "learning_rate": 7.282705429168263e-05, "loss": 3.0246, "step": 47347 }, { "epoch": 2.32, "grad_norm": 0.7358239889144897, "learning_rate": 7.281699882972916e-05, "loss": 2.912, "step": 47348 }, { "epoch": 2.32, "grad_norm": 0.7160769701004028, "learning_rate": 7.280694396613194e-05, "loss": 3.0558, "step": 47349 }, { "epoch": 2.32, "grad_norm": 0.8619295358657837, "learning_rate": 7.279688970091744e-05, "loss": 2.6971, "step": 47350 }, { "epoch": 2.32, "grad_norm": 0.7436091303825378, "learning_rate": 7.278683603411235e-05, "loss": 2.9886, "step": 47351 }, { "epoch": 2.32, "grad_norm": 0.7118484973907471, "learning_rate": 7.277678296574288e-05, "loss": 2.9102, "step": 47352 }, { "epoch": 2.32, "grad_norm": 0.7110477089881897, "learning_rate": 7.276673049583576e-05, "loss": 2.9047, "step": 47353 }, { "epoch": 2.32, "grad_norm": 0.7068123817443848, "learning_rate": 7.275667862441731e-05, "loss": 3.0401, "step": 47354 }, { "epoch": 2.32, "grad_norm": 0.7033482789993286, "learning_rate": 7.274662735151396e-05, "loss": 2.8322, "step": 47355 }, { "epoch": 2.32, "grad_norm": 0.707513689994812, "learning_rate": 7.273657667715235e-05, "loss": 2.9297, "step": 47356 }, { "epoch": 2.32, "grad_norm": 0.7551565170288086, "learning_rate": 7.272652660135877e-05, "loss": 3.0123, "step": 47357 }, { "epoch": 2.32, "grad_norm": 0.7359175682067871, "learning_rate": 7.271647712415987e-05, "loss": 2.9357, "step": 47358 }, { "epoch": 2.32, "grad_norm": 0.6989636421203613, "learning_rate": 7.270642824558192e-05, "loss": 2.9436, "step": 47359 }, { "epoch": 2.32, "grad_norm": 0.7528616189956665, "learning_rate": 7.269637996565159e-05, "loss": 2.9145, "step": 47360 }, { "epoch": 2.32, "grad_norm": 0.7683088183403015, "learning_rate": 7.268633228439526e-05, "loss": 2.9134, "step": 47361 }, { "epoch": 2.32, "grad_norm": 0.7808603048324585, "learning_rate": 7.267628520183927e-05, "loss": 2.794, "step": 47362 }, { "epoch": 2.32, "grad_norm": 0.763262927532196, "learning_rate": 7.266623871801032e-05, "loss": 2.8442, "step": 47363 }, { "epoch": 2.32, "grad_norm": 0.7707550525665283, "learning_rate": 7.265619283293464e-05, "loss": 2.8606, "step": 47364 }, { "epoch": 2.32, "grad_norm": 0.7410865426063538, "learning_rate": 7.26461475466388e-05, "loss": 2.995, "step": 47365 }, { "epoch": 2.32, "grad_norm": 0.7402563095092773, "learning_rate": 7.263610285914933e-05, "loss": 2.8527, "step": 47366 }, { "epoch": 2.32, "grad_norm": 0.774781346321106, "learning_rate": 7.26260587704925e-05, "loss": 2.9038, "step": 47367 }, { "epoch": 2.32, "grad_norm": 0.7119985222816467, "learning_rate": 7.261601528069507e-05, "loss": 2.8396, "step": 47368 }, { "epoch": 2.32, "grad_norm": 0.8101449608802795, "learning_rate": 7.260597238978316e-05, "loss": 2.9007, "step": 47369 }, { "epoch": 2.32, "grad_norm": 0.7611187696456909, "learning_rate": 7.259593009778329e-05, "loss": 2.9946, "step": 47370 }, { "epoch": 2.32, "grad_norm": 0.7523656487464905, "learning_rate": 7.258588840472209e-05, "loss": 2.9665, "step": 47371 }, { "epoch": 2.32, "grad_norm": 0.7050994634628296, "learning_rate": 7.257584731062583e-05, "loss": 2.9543, "step": 47372 }, { "epoch": 2.32, "grad_norm": 0.7177386283874512, "learning_rate": 7.256580681552106e-05, "loss": 3.003, "step": 47373 }, { "epoch": 2.32, "grad_norm": 0.7362361550331116, "learning_rate": 7.255576691943413e-05, "loss": 2.6465, "step": 47374 }, { "epoch": 2.32, "grad_norm": 0.7049803733825684, "learning_rate": 7.25457276223916e-05, "loss": 2.9968, "step": 47375 }, { "epoch": 2.32, "grad_norm": 0.6966225504875183, "learning_rate": 7.253568892441988e-05, "loss": 2.8923, "step": 47376 }, { "epoch": 2.32, "grad_norm": 0.733306884765625, "learning_rate": 7.252565082554526e-05, "loss": 2.816, "step": 47377 }, { "epoch": 2.32, "grad_norm": 0.7428738474845886, "learning_rate": 7.251561332579441e-05, "loss": 2.5529, "step": 47378 }, { "epoch": 2.32, "grad_norm": 0.7241402268409729, "learning_rate": 7.250557642519354e-05, "loss": 2.6333, "step": 47379 }, { "epoch": 2.32, "grad_norm": 0.7124062776565552, "learning_rate": 7.249554012376931e-05, "loss": 2.9414, "step": 47380 }, { "epoch": 2.32, "grad_norm": 0.721976101398468, "learning_rate": 7.248550442154797e-05, "loss": 2.9294, "step": 47381 }, { "epoch": 2.32, "grad_norm": 0.7031937837600708, "learning_rate": 7.247546931855609e-05, "loss": 2.6558, "step": 47382 }, { "epoch": 2.32, "grad_norm": 0.7368708848953247, "learning_rate": 7.246543481482002e-05, "loss": 2.6632, "step": 47383 }, { "epoch": 2.32, "grad_norm": 0.7131219506263733, "learning_rate": 7.245540091036614e-05, "loss": 2.7965, "step": 47384 }, { "epoch": 2.32, "grad_norm": 0.7883298993110657, "learning_rate": 7.244536760522103e-05, "loss": 2.7718, "step": 47385 }, { "epoch": 2.32, "grad_norm": 0.8104693293571472, "learning_rate": 7.243533489941094e-05, "loss": 2.874, "step": 47386 }, { "epoch": 2.32, "grad_norm": 0.7419458031654358, "learning_rate": 7.242530279296239e-05, "loss": 2.8306, "step": 47387 }, { "epoch": 2.32, "grad_norm": 0.7081753015518188, "learning_rate": 7.241527128590185e-05, "loss": 2.8082, "step": 47388 }, { "epoch": 2.32, "grad_norm": 0.7221062183380127, "learning_rate": 7.240524037825561e-05, "loss": 2.83, "step": 47389 }, { "epoch": 2.32, "grad_norm": 0.736663281917572, "learning_rate": 7.239521007005026e-05, "loss": 2.8742, "step": 47390 }, { "epoch": 2.32, "grad_norm": 0.7576841711997986, "learning_rate": 7.238518036131214e-05, "loss": 2.9401, "step": 47391 }, { "epoch": 2.32, "grad_norm": 0.7570251822471619, "learning_rate": 7.237515125206755e-05, "loss": 2.7961, "step": 47392 }, { "epoch": 2.32, "grad_norm": 0.7038759589195251, "learning_rate": 7.236512274234308e-05, "loss": 2.9395, "step": 47393 }, { "epoch": 2.32, "grad_norm": 0.714281439781189, "learning_rate": 7.235509483216501e-05, "loss": 3.0216, "step": 47394 }, { "epoch": 2.32, "grad_norm": 0.7493951916694641, "learning_rate": 7.234506752155986e-05, "loss": 2.8871, "step": 47395 }, { "epoch": 2.32, "grad_norm": 0.7389973998069763, "learning_rate": 7.233504081055393e-05, "loss": 3.0943, "step": 47396 }, { "epoch": 2.32, "grad_norm": 0.7300322651863098, "learning_rate": 7.232501469917377e-05, "loss": 2.9865, "step": 47397 }, { "epoch": 2.32, "grad_norm": 0.6961638331413269, "learning_rate": 7.231498918744567e-05, "loss": 2.7805, "step": 47398 }, { "epoch": 2.32, "grad_norm": 0.7474333047866821, "learning_rate": 7.230496427539601e-05, "loss": 2.7427, "step": 47399 }, { "epoch": 2.32, "grad_norm": 0.7698484063148499, "learning_rate": 7.229493996305135e-05, "loss": 2.8524, "step": 47400 }, { "epoch": 2.32, "grad_norm": 0.7011622190475464, "learning_rate": 7.228491625043791e-05, "loss": 3.0087, "step": 47401 }, { "epoch": 2.32, "grad_norm": 0.7040664553642273, "learning_rate": 7.227489313758214e-05, "loss": 2.7225, "step": 47402 }, { "epoch": 2.32, "grad_norm": 0.7392282485961914, "learning_rate": 7.226487062451061e-05, "loss": 2.9377, "step": 47403 }, { "epoch": 2.32, "grad_norm": 0.697232723236084, "learning_rate": 7.225484871124953e-05, "loss": 2.9972, "step": 47404 }, { "epoch": 2.32, "grad_norm": 0.6810755133628845, "learning_rate": 7.224482739782538e-05, "loss": 3.0003, "step": 47405 }, { "epoch": 2.32, "grad_norm": 0.7662976384162903, "learning_rate": 7.223480668426444e-05, "loss": 2.6729, "step": 47406 }, { "epoch": 2.32, "grad_norm": 0.7100536227226257, "learning_rate": 7.222478657059318e-05, "loss": 2.8443, "step": 47407 }, { "epoch": 2.32, "grad_norm": 0.7035341262817383, "learning_rate": 7.221476705683807e-05, "loss": 2.8649, "step": 47408 }, { "epoch": 2.32, "grad_norm": 0.7688108086585999, "learning_rate": 7.220474814302536e-05, "loss": 2.8819, "step": 47409 }, { "epoch": 2.32, "grad_norm": 0.7461357116699219, "learning_rate": 7.219472982918155e-05, "loss": 2.9177, "step": 47410 }, { "epoch": 2.32, "grad_norm": 0.727735161781311, "learning_rate": 7.218471211533293e-05, "loss": 2.9527, "step": 47411 }, { "epoch": 2.32, "grad_norm": 0.7366343140602112, "learning_rate": 7.217469500150602e-05, "loss": 2.886, "step": 47412 }, { "epoch": 2.32, "grad_norm": 0.76343834400177, "learning_rate": 7.21646784877271e-05, "loss": 2.9228, "step": 47413 }, { "epoch": 2.32, "grad_norm": 0.7279701828956604, "learning_rate": 7.215466257402252e-05, "loss": 2.9934, "step": 47414 }, { "epoch": 2.32, "grad_norm": 0.7330734133720398, "learning_rate": 7.214464726041873e-05, "loss": 2.8277, "step": 47415 }, { "epoch": 2.32, "grad_norm": 0.7413004636764526, "learning_rate": 7.213463254694206e-05, "loss": 2.9595, "step": 47416 }, { "epoch": 2.32, "grad_norm": 0.7532314658164978, "learning_rate": 7.212461843361888e-05, "loss": 2.9631, "step": 47417 }, { "epoch": 2.32, "grad_norm": 0.7230085134506226, "learning_rate": 7.211460492047571e-05, "loss": 3.1592, "step": 47418 }, { "epoch": 2.32, "grad_norm": 0.7556701302528381, "learning_rate": 7.21045920075388e-05, "loss": 2.9073, "step": 47419 }, { "epoch": 2.32, "grad_norm": 0.713424801826477, "learning_rate": 7.209457969483454e-05, "loss": 3.1115, "step": 47420 }, { "epoch": 2.32, "grad_norm": 0.7252991199493408, "learning_rate": 7.20845679823892e-05, "loss": 2.8228, "step": 47421 }, { "epoch": 2.32, "grad_norm": 0.7200642228126526, "learning_rate": 7.207455687022924e-05, "loss": 2.9238, "step": 47422 }, { "epoch": 2.32, "grad_norm": 0.7238421440124512, "learning_rate": 7.206454635838115e-05, "loss": 2.826, "step": 47423 }, { "epoch": 2.32, "grad_norm": 0.7494496703147888, "learning_rate": 7.205453644687107e-05, "loss": 2.9615, "step": 47424 }, { "epoch": 2.32, "grad_norm": 0.7229695916175842, "learning_rate": 7.204452713572555e-05, "loss": 2.6852, "step": 47425 }, { "epoch": 2.32, "grad_norm": 0.7765418887138367, "learning_rate": 7.20345184249709e-05, "loss": 2.8497, "step": 47426 }, { "epoch": 2.32, "grad_norm": 0.7109969258308411, "learning_rate": 7.202451031463334e-05, "loss": 2.8812, "step": 47427 }, { "epoch": 2.32, "grad_norm": 0.7664215564727783, "learning_rate": 7.201450280473943e-05, "loss": 2.7231, "step": 47428 }, { "epoch": 2.32, "grad_norm": 0.7280341982841492, "learning_rate": 7.20044958953154e-05, "loss": 3.0021, "step": 47429 }, { "epoch": 2.32, "grad_norm": 0.7474103569984436, "learning_rate": 7.199448958638768e-05, "loss": 2.6722, "step": 47430 }, { "epoch": 2.32, "grad_norm": 0.8022735714912415, "learning_rate": 7.198448387798253e-05, "loss": 2.6483, "step": 47431 }, { "epoch": 2.32, "grad_norm": 0.7711005210876465, "learning_rate": 7.19744787701264e-05, "loss": 2.7537, "step": 47432 }, { "epoch": 2.32, "grad_norm": 0.7397340536117554, "learning_rate": 7.196447426284565e-05, "loss": 2.9846, "step": 47433 }, { "epoch": 2.32, "grad_norm": 0.7473747730255127, "learning_rate": 7.195447035616661e-05, "loss": 2.7482, "step": 47434 }, { "epoch": 2.32, "grad_norm": 0.7164973020553589, "learning_rate": 7.194446705011561e-05, "loss": 3.0895, "step": 47435 }, { "epoch": 2.32, "grad_norm": 0.7285258769989014, "learning_rate": 7.19344643447189e-05, "loss": 2.9992, "step": 47436 }, { "epoch": 2.32, "grad_norm": 0.741519033908844, "learning_rate": 7.19244622400029e-05, "loss": 2.8107, "step": 47437 }, { "epoch": 2.32, "grad_norm": 0.7468116283416748, "learning_rate": 7.191446073599408e-05, "loss": 3.1146, "step": 47438 }, { "epoch": 2.32, "grad_norm": 0.7286897301673889, "learning_rate": 7.19044598327186e-05, "loss": 2.855, "step": 47439 }, { "epoch": 2.32, "grad_norm": 0.7384865880012512, "learning_rate": 7.189445953020292e-05, "loss": 2.6907, "step": 47440 }, { "epoch": 2.32, "grad_norm": 0.7932009696960449, "learning_rate": 7.188445982847336e-05, "loss": 2.9254, "step": 47441 }, { "epoch": 2.33, "grad_norm": 0.7377317547798157, "learning_rate": 7.187446072755613e-05, "loss": 2.8892, "step": 47442 }, { "epoch": 2.33, "grad_norm": 0.7107809782028198, "learning_rate": 7.18644622274778e-05, "loss": 2.9628, "step": 47443 }, { "epoch": 2.33, "grad_norm": 0.8373969197273254, "learning_rate": 7.185446432826444e-05, "loss": 2.7945, "step": 47444 }, { "epoch": 2.33, "grad_norm": 0.7064989805221558, "learning_rate": 7.184446702994262e-05, "loss": 2.7455, "step": 47445 }, { "epoch": 2.33, "grad_norm": 0.7400098443031311, "learning_rate": 7.183447033253845e-05, "loss": 2.8944, "step": 47446 }, { "epoch": 2.33, "grad_norm": 0.8077271580696106, "learning_rate": 7.182447423607852e-05, "loss": 3.0493, "step": 47447 }, { "epoch": 2.33, "grad_norm": 0.6956033706665039, "learning_rate": 7.181447874058895e-05, "loss": 2.9979, "step": 47448 }, { "epoch": 2.33, "grad_norm": 0.6707293391227722, "learning_rate": 7.180448384609606e-05, "loss": 2.9002, "step": 47449 }, { "epoch": 2.33, "grad_norm": 0.6920158863067627, "learning_rate": 7.179448955262634e-05, "loss": 3.0506, "step": 47450 }, { "epoch": 2.33, "grad_norm": 0.7085862159729004, "learning_rate": 7.17844958602059e-05, "loss": 2.8566, "step": 47451 }, { "epoch": 2.33, "grad_norm": 0.7076865434646606, "learning_rate": 7.177450276886129e-05, "loss": 2.936, "step": 47452 }, { "epoch": 2.33, "grad_norm": 0.7326858043670654, "learning_rate": 7.176451027861863e-05, "loss": 3.1567, "step": 47453 }, { "epoch": 2.33, "grad_norm": 0.6912264823913574, "learning_rate": 7.175451838950433e-05, "loss": 3.0702, "step": 47454 }, { "epoch": 2.33, "grad_norm": 0.7292463779449463, "learning_rate": 7.174452710154475e-05, "loss": 2.7248, "step": 47455 }, { "epoch": 2.33, "grad_norm": 0.7355402112007141, "learning_rate": 7.173453641476622e-05, "loss": 2.8516, "step": 47456 }, { "epoch": 2.33, "grad_norm": 0.7186945080757141, "learning_rate": 7.172454632919493e-05, "loss": 2.7671, "step": 47457 }, { "epoch": 2.33, "grad_norm": 0.7160497903823853, "learning_rate": 7.171455684485721e-05, "loss": 2.8474, "step": 47458 }, { "epoch": 2.33, "grad_norm": 0.7210008502006531, "learning_rate": 7.170456796177939e-05, "loss": 2.6995, "step": 47459 }, { "epoch": 2.33, "grad_norm": 0.7707409858703613, "learning_rate": 7.16945796799879e-05, "loss": 2.9242, "step": 47460 }, { "epoch": 2.33, "grad_norm": 0.6895835399627686, "learning_rate": 7.168459199950884e-05, "loss": 2.9707, "step": 47461 }, { "epoch": 2.33, "grad_norm": 0.734785795211792, "learning_rate": 7.167460492036871e-05, "loss": 2.9447, "step": 47462 }, { "epoch": 2.33, "grad_norm": 0.7238161563873291, "learning_rate": 7.166461844259376e-05, "loss": 2.8899, "step": 47463 }, { "epoch": 2.33, "grad_norm": 0.6985582709312439, "learning_rate": 7.165463256621016e-05, "loss": 3.0099, "step": 47464 }, { "epoch": 2.33, "grad_norm": 0.8546037673950195, "learning_rate": 7.164464729124439e-05, "loss": 2.9665, "step": 47465 }, { "epoch": 2.33, "grad_norm": 0.7167295813560486, "learning_rate": 7.163466261772261e-05, "loss": 2.8678, "step": 47466 }, { "epoch": 2.33, "grad_norm": 0.7737851142883301, "learning_rate": 7.162467854567125e-05, "loss": 2.9525, "step": 47467 }, { "epoch": 2.33, "grad_norm": 0.741790771484375, "learning_rate": 7.161469507511642e-05, "loss": 3.0993, "step": 47468 }, { "epoch": 2.33, "grad_norm": 0.7221540808677673, "learning_rate": 7.160471220608466e-05, "loss": 2.7828, "step": 47469 }, { "epoch": 2.33, "grad_norm": 0.7722229957580566, "learning_rate": 7.159472993860212e-05, "loss": 2.8201, "step": 47470 }, { "epoch": 2.33, "grad_norm": 0.7124035358428955, "learning_rate": 7.1584748272695e-05, "loss": 2.8623, "step": 47471 }, { "epoch": 2.33, "grad_norm": 0.6828311681747437, "learning_rate": 7.157476720838981e-05, "loss": 3.045, "step": 47472 }, { "epoch": 2.33, "grad_norm": 0.6699220538139343, "learning_rate": 7.156478674571261e-05, "loss": 2.8361, "step": 47473 }, { "epoch": 2.33, "grad_norm": 0.7215057015419006, "learning_rate": 7.15548068846898e-05, "loss": 3.0085, "step": 47474 }, { "epoch": 2.33, "grad_norm": 0.7264130115509033, "learning_rate": 7.154482762534776e-05, "loss": 2.7652, "step": 47475 }, { "epoch": 2.33, "grad_norm": 0.7632395029067993, "learning_rate": 7.15348489677126e-05, "loss": 2.9227, "step": 47476 }, { "epoch": 2.33, "grad_norm": 0.7025119662284851, "learning_rate": 7.152487091181074e-05, "loss": 2.9366, "step": 47477 }, { "epoch": 2.33, "grad_norm": 0.700484037399292, "learning_rate": 7.151489345766842e-05, "loss": 2.8368, "step": 47478 }, { "epoch": 2.33, "grad_norm": 0.7083252668380737, "learning_rate": 7.150491660531179e-05, "loss": 2.9546, "step": 47479 }, { "epoch": 2.33, "grad_norm": 0.7263261675834656, "learning_rate": 7.149494035476732e-05, "loss": 3.1148, "step": 47480 }, { "epoch": 2.33, "grad_norm": 0.755137026309967, "learning_rate": 7.148496470606114e-05, "loss": 2.9635, "step": 47481 }, { "epoch": 2.33, "grad_norm": 0.7463118433952332, "learning_rate": 7.147498965921965e-05, "loss": 2.8403, "step": 47482 }, { "epoch": 2.33, "grad_norm": 0.6622539162635803, "learning_rate": 7.1465015214269e-05, "loss": 2.8063, "step": 47483 }, { "epoch": 2.33, "grad_norm": 0.7079895734786987, "learning_rate": 7.145504137123555e-05, "loss": 2.8559, "step": 47484 }, { "epoch": 2.33, "grad_norm": 0.6947416067123413, "learning_rate": 7.144506813014557e-05, "loss": 2.9942, "step": 47485 }, { "epoch": 2.33, "grad_norm": 0.7773357629776001, "learning_rate": 7.143509549102524e-05, "loss": 3.0241, "step": 47486 }, { "epoch": 2.33, "grad_norm": 0.7051486372947693, "learning_rate": 7.142512345390093e-05, "loss": 2.9468, "step": 47487 }, { "epoch": 2.33, "grad_norm": 0.7427360415458679, "learning_rate": 7.141515201879877e-05, "loss": 2.8745, "step": 47488 }, { "epoch": 2.33, "grad_norm": 0.6730073094367981, "learning_rate": 7.14051811857451e-05, "loss": 2.7274, "step": 47489 }, { "epoch": 2.33, "grad_norm": 0.6964417695999146, "learning_rate": 7.139521095476629e-05, "loss": 2.8322, "step": 47490 }, { "epoch": 2.33, "grad_norm": 0.6890613436698914, "learning_rate": 7.13852413258884e-05, "loss": 3.0825, "step": 47491 }, { "epoch": 2.33, "grad_norm": 0.7256914973258972, "learning_rate": 7.137527229913796e-05, "loss": 2.9446, "step": 47492 }, { "epoch": 2.33, "grad_norm": 0.6925685405731201, "learning_rate": 7.136530387454089e-05, "loss": 2.8681, "step": 47493 }, { "epoch": 2.33, "grad_norm": 0.712538480758667, "learning_rate": 7.135533605212363e-05, "loss": 3.152, "step": 47494 }, { "epoch": 2.33, "grad_norm": 0.7189764380455017, "learning_rate": 7.134536883191244e-05, "loss": 2.7111, "step": 47495 }, { "epoch": 2.33, "grad_norm": 0.73213130235672, "learning_rate": 7.133540221393349e-05, "loss": 2.8043, "step": 47496 }, { "epoch": 2.33, "grad_norm": 0.704254150390625, "learning_rate": 7.132543619821318e-05, "loss": 2.9035, "step": 47497 }, { "epoch": 2.33, "grad_norm": 0.7130454778671265, "learning_rate": 7.131547078477757e-05, "loss": 2.6674, "step": 47498 }, { "epoch": 2.33, "grad_norm": 0.6928410530090332, "learning_rate": 7.130550597365308e-05, "loss": 2.9636, "step": 47499 }, { "epoch": 2.33, "grad_norm": 0.6888012886047363, "learning_rate": 7.129554176486587e-05, "loss": 2.921, "step": 47500 }, { "epoch": 2.33, "grad_norm": 0.7133302688598633, "learning_rate": 7.12855781584421e-05, "loss": 2.9925, "step": 47501 }, { "epoch": 2.33, "grad_norm": 0.7603508234024048, "learning_rate": 7.127561515440818e-05, "loss": 2.8541, "step": 47502 }, { "epoch": 2.33, "grad_norm": 0.8669589757919312, "learning_rate": 7.126565275279019e-05, "loss": 2.982, "step": 47503 }, { "epoch": 2.33, "grad_norm": 0.7202988862991333, "learning_rate": 7.125569095361447e-05, "loss": 2.8785, "step": 47504 }, { "epoch": 2.33, "grad_norm": 0.6969466805458069, "learning_rate": 7.124572975690731e-05, "loss": 3.0048, "step": 47505 }, { "epoch": 2.33, "grad_norm": 0.7168153524398804, "learning_rate": 7.123576916269487e-05, "loss": 2.8723, "step": 47506 }, { "epoch": 2.33, "grad_norm": 0.7444748282432556, "learning_rate": 7.122580917100338e-05, "loss": 3.053, "step": 47507 }, { "epoch": 2.33, "grad_norm": 0.7564586997032166, "learning_rate": 7.121584978185903e-05, "loss": 2.9999, "step": 47508 }, { "epoch": 2.33, "grad_norm": 0.7514041066169739, "learning_rate": 7.120589099528807e-05, "loss": 2.8877, "step": 47509 }, { "epoch": 2.33, "grad_norm": 0.7307299971580505, "learning_rate": 7.119593281131684e-05, "loss": 3.0657, "step": 47510 }, { "epoch": 2.33, "grad_norm": 0.7431744933128357, "learning_rate": 7.118597522997142e-05, "loss": 2.7934, "step": 47511 }, { "epoch": 2.33, "grad_norm": 0.7438200116157532, "learning_rate": 7.117601825127817e-05, "loss": 2.9412, "step": 47512 }, { "epoch": 2.33, "grad_norm": 0.7108175754547119, "learning_rate": 7.11660618752632e-05, "loss": 2.8302, "step": 47513 }, { "epoch": 2.33, "grad_norm": 0.6957178711891174, "learning_rate": 7.115610610195285e-05, "loss": 3.1017, "step": 47514 }, { "epoch": 2.33, "grad_norm": 0.7372037768363953, "learning_rate": 7.114615093137326e-05, "loss": 2.8933, "step": 47515 }, { "epoch": 2.33, "grad_norm": 0.7208225131034851, "learning_rate": 7.113619636355061e-05, "loss": 3.0632, "step": 47516 }, { "epoch": 2.33, "grad_norm": 0.7117692232131958, "learning_rate": 7.112624239851123e-05, "loss": 2.95, "step": 47517 }, { "epoch": 2.33, "grad_norm": 0.7726485133171082, "learning_rate": 7.11162890362812e-05, "loss": 3.0097, "step": 47518 }, { "epoch": 2.33, "grad_norm": 0.7577906847000122, "learning_rate": 7.110633627688684e-05, "loss": 2.875, "step": 47519 }, { "epoch": 2.33, "grad_norm": 0.7112172245979309, "learning_rate": 7.109638412035439e-05, "loss": 2.795, "step": 47520 }, { "epoch": 2.33, "grad_norm": 0.7526218891143799, "learning_rate": 7.108643256671002e-05, "loss": 3.0337, "step": 47521 }, { "epoch": 2.33, "grad_norm": 0.883800208568573, "learning_rate": 7.107648161597993e-05, "loss": 2.7492, "step": 47522 }, { "epoch": 2.33, "grad_norm": 0.7418673634529114, "learning_rate": 7.106653126819027e-05, "loss": 2.9126, "step": 47523 }, { "epoch": 2.33, "grad_norm": 0.7279163002967834, "learning_rate": 7.10565815233673e-05, "loss": 3.0511, "step": 47524 }, { "epoch": 2.33, "grad_norm": 0.7238751649856567, "learning_rate": 7.104663238153731e-05, "loss": 2.8631, "step": 47525 }, { "epoch": 2.33, "grad_norm": 0.7138600945472717, "learning_rate": 7.103668384272632e-05, "loss": 2.7983, "step": 47526 }, { "epoch": 2.33, "grad_norm": 0.7913941144943237, "learning_rate": 7.102673590696074e-05, "loss": 3.039, "step": 47527 }, { "epoch": 2.33, "grad_norm": 0.7563286423683167, "learning_rate": 7.101678857426669e-05, "loss": 2.974, "step": 47528 }, { "epoch": 2.33, "grad_norm": 0.744836151599884, "learning_rate": 7.100684184467025e-05, "loss": 2.9618, "step": 47529 }, { "epoch": 2.33, "grad_norm": 0.7190981507301331, "learning_rate": 7.099689571819778e-05, "loss": 3.0592, "step": 47530 }, { "epoch": 2.33, "grad_norm": 0.7641801834106445, "learning_rate": 7.098695019487535e-05, "loss": 2.817, "step": 47531 }, { "epoch": 2.33, "grad_norm": 0.7573862075805664, "learning_rate": 7.097700527472934e-05, "loss": 2.9914, "step": 47532 }, { "epoch": 2.33, "grad_norm": 0.7357966899871826, "learning_rate": 7.096706095778567e-05, "loss": 2.8585, "step": 47533 }, { "epoch": 2.33, "grad_norm": 0.8040800094604492, "learning_rate": 7.095711724407081e-05, "loss": 3.1379, "step": 47534 }, { "epoch": 2.33, "grad_norm": 0.7585517764091492, "learning_rate": 7.094717413361075e-05, "loss": 2.8646, "step": 47535 }, { "epoch": 2.33, "grad_norm": 0.7271699905395508, "learning_rate": 7.09372316264318e-05, "loss": 3.0099, "step": 47536 }, { "epoch": 2.33, "grad_norm": 0.7077714800834656, "learning_rate": 7.092728972256013e-05, "loss": 2.8668, "step": 47537 }, { "epoch": 2.33, "grad_norm": 0.7180951237678528, "learning_rate": 7.09173484220218e-05, "loss": 2.758, "step": 47538 }, { "epoch": 2.33, "grad_norm": 0.7497140765190125, "learning_rate": 7.090740772484315e-05, "loss": 2.8124, "step": 47539 }, { "epoch": 2.33, "grad_norm": 0.7486867904663086, "learning_rate": 7.089746763105021e-05, "loss": 2.8029, "step": 47540 }, { "epoch": 2.33, "grad_norm": 0.6781291961669922, "learning_rate": 7.088752814066926e-05, "loss": 3.086, "step": 47541 }, { "epoch": 2.33, "grad_norm": 0.7236211895942688, "learning_rate": 7.087758925372658e-05, "loss": 2.9152, "step": 47542 }, { "epoch": 2.33, "grad_norm": 0.7155327200889587, "learning_rate": 7.08676509702482e-05, "loss": 2.8893, "step": 47543 }, { "epoch": 2.33, "grad_norm": 0.7060009837150574, "learning_rate": 7.085771329026036e-05, "loss": 2.9783, "step": 47544 }, { "epoch": 2.33, "grad_norm": 0.7284157872200012, "learning_rate": 7.084777621378908e-05, "loss": 2.9028, "step": 47545 }, { "epoch": 2.33, "grad_norm": 0.7081487774848938, "learning_rate": 7.083783974086068e-05, "loss": 2.9875, "step": 47546 }, { "epoch": 2.33, "grad_norm": 0.7967720627784729, "learning_rate": 7.082790387150137e-05, "loss": 2.9191, "step": 47547 }, { "epoch": 2.33, "grad_norm": 0.6838148832321167, "learning_rate": 7.081796860573721e-05, "loss": 2.9943, "step": 47548 }, { "epoch": 2.33, "grad_norm": 0.7207189798355103, "learning_rate": 7.080803394359448e-05, "loss": 2.8106, "step": 47549 }, { "epoch": 2.33, "grad_norm": 0.7479190826416016, "learning_rate": 7.079809988509926e-05, "loss": 2.8428, "step": 47550 }, { "epoch": 2.33, "grad_norm": 0.6582784652709961, "learning_rate": 7.078816643027767e-05, "loss": 2.7417, "step": 47551 }, { "epoch": 2.33, "grad_norm": 0.7573397755622864, "learning_rate": 7.077823357915602e-05, "loss": 3.0219, "step": 47552 }, { "epoch": 2.33, "grad_norm": 0.7158548831939697, "learning_rate": 7.076830133176032e-05, "loss": 2.9151, "step": 47553 }, { "epoch": 2.33, "grad_norm": 0.7018261551856995, "learning_rate": 7.075836968811687e-05, "loss": 2.9753, "step": 47554 }, { "epoch": 2.33, "grad_norm": 0.7017635703086853, "learning_rate": 7.074843864825164e-05, "loss": 3.038, "step": 47555 }, { "epoch": 2.33, "grad_norm": 0.7433605194091797, "learning_rate": 7.073850821219094e-05, "loss": 2.915, "step": 47556 }, { "epoch": 2.33, "grad_norm": 0.7094408273696899, "learning_rate": 7.072857837996098e-05, "loss": 2.9466, "step": 47557 }, { "epoch": 2.33, "grad_norm": 0.7520463466644287, "learning_rate": 7.071864915158777e-05, "loss": 2.8484, "step": 47558 }, { "epoch": 2.33, "grad_norm": 0.7429606318473816, "learning_rate": 7.070872052709754e-05, "loss": 2.9665, "step": 47559 }, { "epoch": 2.33, "grad_norm": 0.7459293603897095, "learning_rate": 7.069879250651634e-05, "loss": 3.1393, "step": 47560 }, { "epoch": 2.33, "grad_norm": 0.7327176928520203, "learning_rate": 7.06888650898704e-05, "loss": 2.6332, "step": 47561 }, { "epoch": 2.33, "grad_norm": 0.7204429507255554, "learning_rate": 7.06789382771859e-05, "loss": 2.6896, "step": 47562 }, { "epoch": 2.33, "grad_norm": 0.751361608505249, "learning_rate": 7.066901206848888e-05, "loss": 3.0084, "step": 47563 }, { "epoch": 2.33, "grad_norm": 0.7837120294570923, "learning_rate": 7.065908646380565e-05, "loss": 2.9443, "step": 47564 }, { "epoch": 2.33, "grad_norm": 0.7149581909179688, "learning_rate": 7.064916146316221e-05, "loss": 3.048, "step": 47565 }, { "epoch": 2.33, "grad_norm": 0.7094987630844116, "learning_rate": 7.063923706658469e-05, "loss": 2.8596, "step": 47566 }, { "epoch": 2.33, "grad_norm": 0.7471241354942322, "learning_rate": 7.062931327409933e-05, "loss": 2.9456, "step": 47567 }, { "epoch": 2.33, "grad_norm": 0.8007349967956543, "learning_rate": 7.061939008573216e-05, "loss": 3.0527, "step": 47568 }, { "epoch": 2.33, "grad_norm": 0.7094486355781555, "learning_rate": 7.060946750150944e-05, "loss": 2.8813, "step": 47569 }, { "epoch": 2.33, "grad_norm": 0.6963364481925964, "learning_rate": 7.059954552145718e-05, "loss": 2.9723, "step": 47570 }, { "epoch": 2.33, "grad_norm": 0.737665057182312, "learning_rate": 7.058962414560161e-05, "loss": 2.8979, "step": 47571 }, { "epoch": 2.33, "grad_norm": 0.7348478436470032, "learning_rate": 7.057970337396884e-05, "loss": 3.1241, "step": 47572 }, { "epoch": 2.33, "grad_norm": 0.7251835465431213, "learning_rate": 7.05697832065849e-05, "loss": 2.9297, "step": 47573 }, { "epoch": 2.33, "grad_norm": 0.7356771230697632, "learning_rate": 7.055986364347608e-05, "loss": 2.9944, "step": 47574 }, { "epoch": 2.33, "grad_norm": 0.7503240704536438, "learning_rate": 7.05499446846683e-05, "loss": 2.7325, "step": 47575 }, { "epoch": 2.33, "grad_norm": 0.7172363996505737, "learning_rate": 7.054002633018785e-05, "loss": 2.8792, "step": 47576 }, { "epoch": 2.33, "grad_norm": 0.7425442934036255, "learning_rate": 7.05301085800609e-05, "loss": 2.9969, "step": 47577 }, { "epoch": 2.33, "grad_norm": 0.7278176546096802, "learning_rate": 7.052019143431336e-05, "loss": 2.8364, "step": 47578 }, { "epoch": 2.33, "grad_norm": 0.711143970489502, "learning_rate": 7.051027489297157e-05, "loss": 2.8685, "step": 47579 }, { "epoch": 2.33, "grad_norm": 0.7150108218193054, "learning_rate": 7.050035895606156e-05, "loss": 2.9239, "step": 47580 }, { "epoch": 2.33, "grad_norm": 0.7470396161079407, "learning_rate": 7.049044362360938e-05, "loss": 2.9947, "step": 47581 }, { "epoch": 2.33, "grad_norm": 0.6868398189544678, "learning_rate": 7.048052889564125e-05, "loss": 3.0457, "step": 47582 }, { "epoch": 2.33, "grad_norm": 0.7090058326721191, "learning_rate": 7.047061477218312e-05, "loss": 2.9331, "step": 47583 }, { "epoch": 2.33, "grad_norm": 0.7475082278251648, "learning_rate": 7.046070125326135e-05, "loss": 2.9428, "step": 47584 }, { "epoch": 2.33, "grad_norm": 0.7163534164428711, "learning_rate": 7.045078833890183e-05, "loss": 2.9627, "step": 47585 }, { "epoch": 2.33, "grad_norm": 0.7448176741600037, "learning_rate": 7.044087602913084e-05, "loss": 2.9439, "step": 47586 }, { "epoch": 2.33, "grad_norm": 0.7112734317779541, "learning_rate": 7.043096432397437e-05, "loss": 2.8171, "step": 47587 }, { "epoch": 2.33, "grad_norm": 0.7507283687591553, "learning_rate": 7.042105322345847e-05, "loss": 2.9742, "step": 47588 }, { "epoch": 2.33, "grad_norm": 0.749320924282074, "learning_rate": 7.041114272760948e-05, "loss": 2.8345, "step": 47589 }, { "epoch": 2.33, "grad_norm": 0.7169381976127625, "learning_rate": 7.040123283645322e-05, "loss": 3.0269, "step": 47590 }, { "epoch": 2.33, "grad_norm": 0.7252839207649231, "learning_rate": 7.039132355001592e-05, "loss": 2.8734, "step": 47591 }, { "epoch": 2.33, "grad_norm": 0.7459837794303894, "learning_rate": 7.03814148683238e-05, "loss": 2.8828, "step": 47592 }, { "epoch": 2.33, "grad_norm": 0.7205169200897217, "learning_rate": 7.037150679140276e-05, "loss": 2.8298, "step": 47593 }, { "epoch": 2.33, "grad_norm": 0.7566838264465332, "learning_rate": 7.036159931927911e-05, "loss": 2.6596, "step": 47594 }, { "epoch": 2.33, "grad_norm": 0.7399532198905945, "learning_rate": 7.035169245197867e-05, "loss": 2.791, "step": 47595 }, { "epoch": 2.33, "grad_norm": 0.744898796081543, "learning_rate": 7.034178618952769e-05, "loss": 2.9397, "step": 47596 }, { "epoch": 2.33, "grad_norm": 0.741045355796814, "learning_rate": 7.03318805319523e-05, "loss": 2.9292, "step": 47597 }, { "epoch": 2.33, "grad_norm": 0.7274682521820068, "learning_rate": 7.032197547927847e-05, "loss": 2.7797, "step": 47598 }, { "epoch": 2.33, "grad_norm": 0.7332300543785095, "learning_rate": 7.031207103153241e-05, "loss": 2.758, "step": 47599 }, { "epoch": 2.33, "grad_norm": 0.7493497729301453, "learning_rate": 7.030216718874011e-05, "loss": 3.1175, "step": 47600 }, { "epoch": 2.33, "grad_norm": 0.7280975580215454, "learning_rate": 7.029226395092774e-05, "loss": 2.8282, "step": 47601 }, { "epoch": 2.33, "grad_norm": 0.7699424028396606, "learning_rate": 7.028236131812138e-05, "loss": 2.8258, "step": 47602 }, { "epoch": 2.33, "grad_norm": 0.7214636206626892, "learning_rate": 7.027245929034695e-05, "loss": 2.7958, "step": 47603 }, { "epoch": 2.33, "grad_norm": 0.7424951195716858, "learning_rate": 7.026255786763075e-05, "loss": 2.8481, "step": 47604 }, { "epoch": 2.33, "grad_norm": 0.7581061124801636, "learning_rate": 7.025265704999867e-05, "loss": 2.9243, "step": 47605 }, { "epoch": 2.33, "grad_norm": 0.6965128779411316, "learning_rate": 7.024275683747688e-05, "loss": 2.958, "step": 47606 }, { "epoch": 2.33, "grad_norm": 0.7387304902076721, "learning_rate": 7.023285723009155e-05, "loss": 3.0201, "step": 47607 }, { "epoch": 2.33, "grad_norm": 0.7237225770950317, "learning_rate": 7.022295822786861e-05, "loss": 2.7551, "step": 47608 }, { "epoch": 2.33, "grad_norm": 0.6971107721328735, "learning_rate": 7.021305983083423e-05, "loss": 2.8338, "step": 47609 }, { "epoch": 2.33, "grad_norm": 0.7018718719482422, "learning_rate": 7.020316203901432e-05, "loss": 2.6294, "step": 47610 }, { "epoch": 2.33, "grad_norm": 0.678143322467804, "learning_rate": 7.019326485243513e-05, "loss": 3.0442, "step": 47611 }, { "epoch": 2.33, "grad_norm": 0.718937873840332, "learning_rate": 7.018336827112257e-05, "loss": 2.7917, "step": 47612 }, { "epoch": 2.33, "grad_norm": 0.701008677482605, "learning_rate": 7.01734722951028e-05, "loss": 2.7951, "step": 47613 }, { "epoch": 2.33, "grad_norm": 0.7457568645477295, "learning_rate": 7.016357692440193e-05, "loss": 2.8648, "step": 47614 }, { "epoch": 2.33, "grad_norm": 0.6575247049331665, "learning_rate": 7.015368215904593e-05, "loss": 2.987, "step": 47615 }, { "epoch": 2.33, "grad_norm": 0.7346352338790894, "learning_rate": 7.014378799906095e-05, "loss": 2.8045, "step": 47616 }, { "epoch": 2.33, "grad_norm": 0.7044388055801392, "learning_rate": 7.013389444447301e-05, "loss": 2.7003, "step": 47617 }, { "epoch": 2.33, "grad_norm": 0.7355045676231384, "learning_rate": 7.012400149530804e-05, "loss": 2.9629, "step": 47618 }, { "epoch": 2.33, "grad_norm": 0.7335948348045349, "learning_rate": 7.011410915159232e-05, "loss": 2.8067, "step": 47619 }, { "epoch": 2.33, "grad_norm": 0.7035436630249023, "learning_rate": 7.010421741335168e-05, "loss": 2.7938, "step": 47620 }, { "epoch": 2.33, "grad_norm": 0.6967812180519104, "learning_rate": 7.009432628061243e-05, "loss": 2.8902, "step": 47621 }, { "epoch": 2.33, "grad_norm": 0.7366414070129395, "learning_rate": 7.008443575340032e-05, "loss": 2.6877, "step": 47622 }, { "epoch": 2.33, "grad_norm": 0.7091039419174194, "learning_rate": 7.00745458317417e-05, "loss": 2.7761, "step": 47623 }, { "epoch": 2.33, "grad_norm": 0.7306919693946838, "learning_rate": 7.006465651566247e-05, "loss": 2.782, "step": 47624 }, { "epoch": 2.33, "grad_norm": 0.6974079012870789, "learning_rate": 7.005476780518856e-05, "loss": 2.9357, "step": 47625 }, { "epoch": 2.33, "grad_norm": 0.7919893860816956, "learning_rate": 7.004487970034627e-05, "loss": 2.9349, "step": 47626 }, { "epoch": 2.33, "grad_norm": 0.7445873022079468, "learning_rate": 7.00349922011614e-05, "loss": 3.0683, "step": 47627 }, { "epoch": 2.33, "grad_norm": 0.7282187342643738, "learning_rate": 7.00251053076601e-05, "loss": 2.9457, "step": 47628 }, { "epoch": 2.33, "grad_norm": 0.6826276779174805, "learning_rate": 7.001521901986848e-05, "loss": 2.7284, "step": 47629 }, { "epoch": 2.33, "grad_norm": 0.7542269825935364, "learning_rate": 7.000533333781256e-05, "loss": 2.8766, "step": 47630 }, { "epoch": 2.33, "grad_norm": 0.763077974319458, "learning_rate": 6.999544826151829e-05, "loss": 2.846, "step": 47631 }, { "epoch": 2.33, "grad_norm": 0.7759633660316467, "learning_rate": 6.99855637910117e-05, "loss": 3.0532, "step": 47632 }, { "epoch": 2.33, "grad_norm": 0.7288686037063599, "learning_rate": 6.997567992631883e-05, "loss": 3.0587, "step": 47633 }, { "epoch": 2.33, "grad_norm": 0.7107670307159424, "learning_rate": 6.996579666746584e-05, "loss": 2.9452, "step": 47634 }, { "epoch": 2.33, "grad_norm": 0.7164334654808044, "learning_rate": 6.995591401447858e-05, "loss": 3.1213, "step": 47635 }, { "epoch": 2.33, "grad_norm": 0.7293843030929565, "learning_rate": 6.994603196738326e-05, "loss": 2.9488, "step": 47636 }, { "epoch": 2.33, "grad_norm": 0.7001025676727295, "learning_rate": 6.993615052620576e-05, "loss": 2.8525, "step": 47637 }, { "epoch": 2.33, "grad_norm": 0.736518919467926, "learning_rate": 6.992626969097221e-05, "loss": 3.0508, "step": 47638 }, { "epoch": 2.33, "grad_norm": 0.6913434267044067, "learning_rate": 6.991638946170861e-05, "loss": 2.8179, "step": 47639 }, { "epoch": 2.33, "grad_norm": 0.6732978820800781, "learning_rate": 6.990650983844087e-05, "loss": 3.0415, "step": 47640 }, { "epoch": 2.33, "grad_norm": 0.7510814070701599, "learning_rate": 6.989663082119518e-05, "loss": 2.9317, "step": 47641 }, { "epoch": 2.33, "grad_norm": 0.6988676190376282, "learning_rate": 6.98867524099974e-05, "loss": 2.9012, "step": 47642 }, { "epoch": 2.33, "grad_norm": 0.7889525890350342, "learning_rate": 6.987687460487362e-05, "loss": 2.7416, "step": 47643 }, { "epoch": 2.33, "grad_norm": 0.6934266686439514, "learning_rate": 6.986699740584998e-05, "loss": 3.0119, "step": 47644 }, { "epoch": 2.33, "grad_norm": 0.7091248631477356, "learning_rate": 6.985712081295235e-05, "loss": 2.8714, "step": 47645 }, { "epoch": 2.34, "grad_norm": 0.7630372047424316, "learning_rate": 6.984724482620678e-05, "loss": 3.0798, "step": 47646 }, { "epoch": 2.34, "grad_norm": 0.7124559879302979, "learning_rate": 6.983736944563919e-05, "loss": 3.0748, "step": 47647 }, { "epoch": 2.34, "grad_norm": 0.7879320979118347, "learning_rate": 6.982749467127566e-05, "loss": 2.7061, "step": 47648 }, { "epoch": 2.34, "grad_norm": 0.7572959065437317, "learning_rate": 6.981762050314232e-05, "loss": 3.1158, "step": 47649 }, { "epoch": 2.34, "grad_norm": 0.7393097877502441, "learning_rate": 6.980774694126495e-05, "loss": 2.8247, "step": 47650 }, { "epoch": 2.34, "grad_norm": 0.7403393387794495, "learning_rate": 6.979787398566979e-05, "loss": 2.862, "step": 47651 }, { "epoch": 2.34, "grad_norm": 0.7622966170310974, "learning_rate": 6.978800163638268e-05, "loss": 2.9045, "step": 47652 }, { "epoch": 2.34, "grad_norm": 0.7208401560783386, "learning_rate": 6.977812989342964e-05, "loss": 2.8656, "step": 47653 }, { "epoch": 2.34, "grad_norm": 0.7213475108146667, "learning_rate": 6.976825875683674e-05, "loss": 2.9971, "step": 47654 }, { "epoch": 2.34, "grad_norm": 0.7142321467399597, "learning_rate": 6.975838822662987e-05, "loss": 3.0197, "step": 47655 }, { "epoch": 2.34, "grad_norm": 0.8058083057403564, "learning_rate": 6.97485183028352e-05, "loss": 2.7902, "step": 47656 }, { "epoch": 2.34, "grad_norm": 0.7557923197746277, "learning_rate": 6.973864898547852e-05, "loss": 2.9234, "step": 47657 }, { "epoch": 2.34, "grad_norm": 0.6770955324172974, "learning_rate": 6.97287802745859e-05, "loss": 2.6823, "step": 47658 }, { "epoch": 2.34, "grad_norm": 0.7257001399993896, "learning_rate": 6.971891217018349e-05, "loss": 2.9746, "step": 47659 }, { "epoch": 2.34, "grad_norm": 0.665259063243866, "learning_rate": 6.97090446722971e-05, "loss": 2.6029, "step": 47660 }, { "epoch": 2.34, "grad_norm": 0.7421854734420776, "learning_rate": 6.96991777809528e-05, "loss": 2.9455, "step": 47661 }, { "epoch": 2.34, "grad_norm": 0.7707141041755676, "learning_rate": 6.968931149617643e-05, "loss": 3.1785, "step": 47662 }, { "epoch": 2.34, "grad_norm": 0.7522193789482117, "learning_rate": 6.967944581799409e-05, "loss": 2.8248, "step": 47663 }, { "epoch": 2.34, "grad_norm": 0.747096836566925, "learning_rate": 6.966958074643189e-05, "loss": 3.0049, "step": 47664 }, { "epoch": 2.34, "grad_norm": 0.7807623744010925, "learning_rate": 6.965971628151558e-05, "loss": 2.9797, "step": 47665 }, { "epoch": 2.34, "grad_norm": 0.6890551447868347, "learning_rate": 6.964985242327132e-05, "loss": 2.7737, "step": 47666 }, { "epoch": 2.34, "grad_norm": 0.744841456413269, "learning_rate": 6.963998917172501e-05, "loss": 2.9702, "step": 47667 }, { "epoch": 2.34, "grad_norm": 0.7255067825317383, "learning_rate": 6.963012652690257e-05, "loss": 2.9437, "step": 47668 }, { "epoch": 2.34, "grad_norm": 0.7375694513320923, "learning_rate": 6.962026448883013e-05, "loss": 2.8523, "step": 47669 }, { "epoch": 2.34, "grad_norm": 0.7139278054237366, "learning_rate": 6.961040305753347e-05, "loss": 2.7384, "step": 47670 }, { "epoch": 2.34, "grad_norm": 0.74039626121521, "learning_rate": 6.960054223303875e-05, "loss": 2.9185, "step": 47671 }, { "epoch": 2.34, "grad_norm": 0.7546448707580566, "learning_rate": 6.95906820153718e-05, "loss": 2.917, "step": 47672 }, { "epoch": 2.34, "grad_norm": 0.726020872592926, "learning_rate": 6.958082240455871e-05, "loss": 2.8537, "step": 47673 }, { "epoch": 2.34, "grad_norm": 0.7269747257232666, "learning_rate": 6.95709634006254e-05, "loss": 2.9355, "step": 47674 }, { "epoch": 2.34, "grad_norm": 0.7227620482444763, "learning_rate": 6.956110500359776e-05, "loss": 2.8675, "step": 47675 }, { "epoch": 2.34, "grad_norm": 0.6997947096824646, "learning_rate": 6.955124721350187e-05, "loss": 2.8877, "step": 47676 }, { "epoch": 2.34, "grad_norm": 0.7475396990776062, "learning_rate": 6.954139003036357e-05, "loss": 2.9018, "step": 47677 }, { "epoch": 2.34, "grad_norm": 0.6795568466186523, "learning_rate": 6.953153345420892e-05, "loss": 2.901, "step": 47678 }, { "epoch": 2.34, "grad_norm": 0.7249543070793152, "learning_rate": 6.952167748506393e-05, "loss": 2.9914, "step": 47679 }, { "epoch": 2.34, "grad_norm": 0.715927243232727, "learning_rate": 6.951182212295438e-05, "loss": 2.8587, "step": 47680 }, { "epoch": 2.34, "grad_norm": 0.7298700213432312, "learning_rate": 6.950196736790645e-05, "loss": 2.942, "step": 47681 }, { "epoch": 2.34, "grad_norm": 0.7658237814903259, "learning_rate": 6.949211321994597e-05, "loss": 3.0521, "step": 47682 }, { "epoch": 2.34, "grad_norm": 0.7058979272842407, "learning_rate": 6.948225967909878e-05, "loss": 2.8072, "step": 47683 }, { "epoch": 2.34, "grad_norm": 0.6835278868675232, "learning_rate": 6.947240674539109e-05, "loss": 3.063, "step": 47684 }, { "epoch": 2.34, "grad_norm": 0.7149368524551392, "learning_rate": 6.946255441884858e-05, "loss": 3.0687, "step": 47685 }, { "epoch": 2.34, "grad_norm": 0.7184211611747742, "learning_rate": 6.945270269949744e-05, "loss": 2.9289, "step": 47686 }, { "epoch": 2.34, "grad_norm": 0.8042710423469543, "learning_rate": 6.944285158736344e-05, "loss": 2.8712, "step": 47687 }, { "epoch": 2.34, "grad_norm": 0.7624470591545105, "learning_rate": 6.943300108247268e-05, "loss": 2.86, "step": 47688 }, { "epoch": 2.34, "grad_norm": 0.6996687650680542, "learning_rate": 6.9423151184851e-05, "loss": 2.894, "step": 47689 }, { "epoch": 2.34, "grad_norm": 0.7101437449455261, "learning_rate": 6.941330189452431e-05, "loss": 2.7634, "step": 47690 }, { "epoch": 2.34, "grad_norm": 0.7409912943840027, "learning_rate": 6.940345321151867e-05, "loss": 2.7299, "step": 47691 }, { "epoch": 2.34, "grad_norm": 0.7612147331237793, "learning_rate": 6.939360513585988e-05, "loss": 2.9949, "step": 47692 }, { "epoch": 2.34, "grad_norm": 0.7287157773971558, "learning_rate": 6.938375766757401e-05, "loss": 2.9987, "step": 47693 }, { "epoch": 2.34, "grad_norm": 0.7364184260368347, "learning_rate": 6.937391080668691e-05, "loss": 2.9102, "step": 47694 }, { "epoch": 2.34, "grad_norm": 0.7477836608886719, "learning_rate": 6.936406455322458e-05, "loss": 2.9471, "step": 47695 }, { "epoch": 2.34, "grad_norm": 0.746062159538269, "learning_rate": 6.935421890721293e-05, "loss": 2.8956, "step": 47696 }, { "epoch": 2.34, "grad_norm": 0.7618799209594727, "learning_rate": 6.934437386867779e-05, "loss": 3.0421, "step": 47697 }, { "epoch": 2.34, "grad_norm": 0.6989330053329468, "learning_rate": 6.933452943764527e-05, "loss": 3.0442, "step": 47698 }, { "epoch": 2.34, "grad_norm": 0.7096387147903442, "learning_rate": 6.932468561414115e-05, "loss": 2.7441, "step": 47699 }, { "epoch": 2.34, "grad_norm": 0.7569087147712708, "learning_rate": 6.931484239819134e-05, "loss": 2.8035, "step": 47700 }, { "epoch": 2.34, "grad_norm": 0.7354052662849426, "learning_rate": 6.930499978982198e-05, "loss": 3.0321, "step": 47701 }, { "epoch": 2.34, "grad_norm": 0.723621129989624, "learning_rate": 6.929515778905878e-05, "loss": 2.9771, "step": 47702 }, { "epoch": 2.34, "grad_norm": 0.7257896661758423, "learning_rate": 6.928531639592778e-05, "loss": 2.5832, "step": 47703 }, { "epoch": 2.34, "grad_norm": 0.7249346375465393, "learning_rate": 6.927547561045487e-05, "loss": 2.8028, "step": 47704 }, { "epoch": 2.34, "grad_norm": 0.7552078366279602, "learning_rate": 6.926563543266587e-05, "loss": 2.9345, "step": 47705 }, { "epoch": 2.34, "grad_norm": 0.7495518922805786, "learning_rate": 6.925579586258689e-05, "loss": 2.7828, "step": 47706 }, { "epoch": 2.34, "grad_norm": 0.6926343441009521, "learning_rate": 6.92459569002436e-05, "loss": 3.0711, "step": 47707 }, { "epoch": 2.34, "grad_norm": 0.7301257252693176, "learning_rate": 6.923611854566219e-05, "loss": 2.9068, "step": 47708 }, { "epoch": 2.34, "grad_norm": 0.7383841276168823, "learning_rate": 6.922628079886829e-05, "loss": 2.9509, "step": 47709 }, { "epoch": 2.34, "grad_norm": 0.7082839608192444, "learning_rate": 6.921644365988807e-05, "loss": 2.8345, "step": 47710 }, { "epoch": 2.34, "grad_norm": 0.8540667295455933, "learning_rate": 6.920660712874732e-05, "loss": 2.9104, "step": 47711 }, { "epoch": 2.34, "grad_norm": 0.6855905652046204, "learning_rate": 6.919677120547189e-05, "loss": 3.1228, "step": 47712 }, { "epoch": 2.34, "grad_norm": 0.7041099667549133, "learning_rate": 6.91869358900878e-05, "loss": 2.6503, "step": 47713 }, { "epoch": 2.34, "grad_norm": 0.7083460688591003, "learning_rate": 6.91771011826208e-05, "loss": 2.8676, "step": 47714 }, { "epoch": 2.34, "grad_norm": 0.7578673362731934, "learning_rate": 6.916726708309693e-05, "loss": 3.0593, "step": 47715 }, { "epoch": 2.34, "grad_norm": 0.7105020880699158, "learning_rate": 6.915743359154212e-05, "loss": 2.7786, "step": 47716 }, { "epoch": 2.34, "grad_norm": 0.7636972665786743, "learning_rate": 6.91476007079821e-05, "loss": 2.8917, "step": 47717 }, { "epoch": 2.34, "grad_norm": 0.7262598276138306, "learning_rate": 6.9137768432443e-05, "loss": 3.1578, "step": 47718 }, { "epoch": 2.34, "grad_norm": 0.7343432307243347, "learning_rate": 6.912793676495057e-05, "loss": 3.0025, "step": 47719 }, { "epoch": 2.34, "grad_norm": 0.7269200086593628, "learning_rate": 6.911810570553063e-05, "loss": 2.8975, "step": 47720 }, { "epoch": 2.34, "grad_norm": 0.7389751672744751, "learning_rate": 6.910827525420923e-05, "loss": 3.0294, "step": 47721 }, { "epoch": 2.34, "grad_norm": 0.7205168604850769, "learning_rate": 6.909844541101213e-05, "loss": 2.8921, "step": 47722 }, { "epoch": 2.34, "grad_norm": 0.7263239622116089, "learning_rate": 6.908861617596538e-05, "loss": 2.9286, "step": 47723 }, { "epoch": 2.34, "grad_norm": 0.7304479479789734, "learning_rate": 6.907878754909467e-05, "loss": 2.722, "step": 47724 }, { "epoch": 2.34, "grad_norm": 0.7433699369430542, "learning_rate": 6.906895953042611e-05, "loss": 2.9203, "step": 47725 }, { "epoch": 2.34, "grad_norm": 0.7258157730102539, "learning_rate": 6.905913211998546e-05, "loss": 2.8464, "step": 47726 }, { "epoch": 2.34, "grad_norm": 0.7196770906448364, "learning_rate": 6.904930531779852e-05, "loss": 2.9407, "step": 47727 }, { "epoch": 2.34, "grad_norm": 0.736857533454895, "learning_rate": 6.903947912389133e-05, "loss": 2.902, "step": 47728 }, { "epoch": 2.34, "grad_norm": 0.8262715935707092, "learning_rate": 6.902965353828963e-05, "loss": 3.0228, "step": 47729 }, { "epoch": 2.34, "grad_norm": 0.740009069442749, "learning_rate": 6.901982856101938e-05, "loss": 2.985, "step": 47730 }, { "epoch": 2.34, "grad_norm": 0.7470229864120483, "learning_rate": 6.901000419210652e-05, "loss": 2.9201, "step": 47731 }, { "epoch": 2.34, "grad_norm": 0.705930769443512, "learning_rate": 6.900018043157686e-05, "loss": 2.4926, "step": 47732 }, { "epoch": 2.34, "grad_norm": 0.7490159869194031, "learning_rate": 6.899035727945624e-05, "loss": 2.8409, "step": 47733 }, { "epoch": 2.34, "grad_norm": 0.7825806736946106, "learning_rate": 6.898053473577052e-05, "loss": 2.9334, "step": 47734 }, { "epoch": 2.34, "grad_norm": 0.7251452207565308, "learning_rate": 6.897071280054558e-05, "loss": 2.864, "step": 47735 }, { "epoch": 2.34, "grad_norm": 0.7280852794647217, "learning_rate": 6.896089147380743e-05, "loss": 2.8302, "step": 47736 }, { "epoch": 2.34, "grad_norm": 0.7135215401649475, "learning_rate": 6.895107075558173e-05, "loss": 2.8002, "step": 47737 }, { "epoch": 2.34, "grad_norm": 0.7429882884025574, "learning_rate": 6.894125064589455e-05, "loss": 2.7959, "step": 47738 }, { "epoch": 2.34, "grad_norm": 0.7092627882957458, "learning_rate": 6.893143114477152e-05, "loss": 2.8633, "step": 47739 }, { "epoch": 2.34, "grad_norm": 0.7626802921295166, "learning_rate": 6.892161225223875e-05, "loss": 2.9394, "step": 47740 }, { "epoch": 2.34, "grad_norm": 0.7585441470146179, "learning_rate": 6.891179396832196e-05, "loss": 2.8403, "step": 47741 }, { "epoch": 2.34, "grad_norm": 0.6958228945732117, "learning_rate": 6.890197629304694e-05, "loss": 2.9464, "step": 47742 }, { "epoch": 2.34, "grad_norm": 0.7759091854095459, "learning_rate": 6.889215922643975e-05, "loss": 2.838, "step": 47743 }, { "epoch": 2.34, "grad_norm": 0.7096752524375916, "learning_rate": 6.888234276852604e-05, "loss": 2.9345, "step": 47744 }, { "epoch": 2.34, "grad_norm": 0.7344189882278442, "learning_rate": 6.887252691933175e-05, "loss": 3.2115, "step": 47745 }, { "epoch": 2.34, "grad_norm": 0.7086279392242432, "learning_rate": 6.886271167888284e-05, "loss": 2.7383, "step": 47746 }, { "epoch": 2.34, "grad_norm": 0.7072312235832214, "learning_rate": 6.885289704720507e-05, "loss": 2.9756, "step": 47747 }, { "epoch": 2.34, "grad_norm": 0.7190287709236145, "learning_rate": 6.884308302432428e-05, "loss": 2.8416, "step": 47748 }, { "epoch": 2.34, "grad_norm": 0.7287060618400574, "learning_rate": 6.883326961026621e-05, "loss": 2.9435, "step": 47749 }, { "epoch": 2.34, "grad_norm": 0.7083621621131897, "learning_rate": 6.882345680505684e-05, "loss": 2.9013, "step": 47750 }, { "epoch": 2.34, "grad_norm": 0.7850791215896606, "learning_rate": 6.881364460872209e-05, "loss": 2.8539, "step": 47751 }, { "epoch": 2.34, "grad_norm": 0.7175219655036926, "learning_rate": 6.880383302128761e-05, "loss": 2.8696, "step": 47752 }, { "epoch": 2.34, "grad_norm": 0.7923054099082947, "learning_rate": 6.879402204277945e-05, "loss": 2.7013, "step": 47753 }, { "epoch": 2.34, "grad_norm": 0.7685131430625916, "learning_rate": 6.878421167322329e-05, "loss": 2.782, "step": 47754 }, { "epoch": 2.34, "grad_norm": 0.6883244514465332, "learning_rate": 6.877440191264497e-05, "loss": 2.882, "step": 47755 }, { "epoch": 2.34, "grad_norm": 0.7355750203132629, "learning_rate": 6.876459276107043e-05, "loss": 2.6442, "step": 47756 }, { "epoch": 2.34, "grad_norm": 0.7222840189933777, "learning_rate": 6.875478421852539e-05, "loss": 2.9603, "step": 47757 }, { "epoch": 2.34, "grad_norm": 0.7285621762275696, "learning_rate": 6.874497628503582e-05, "loss": 2.9548, "step": 47758 }, { "epoch": 2.34, "grad_norm": 0.7584347724914551, "learning_rate": 6.87351689606274e-05, "loss": 2.9329, "step": 47759 }, { "epoch": 2.34, "grad_norm": 0.8179945945739746, "learning_rate": 6.872536224532602e-05, "loss": 3.0256, "step": 47760 }, { "epoch": 2.34, "grad_norm": 0.7475566267967224, "learning_rate": 6.871555613915764e-05, "loss": 3.0588, "step": 47761 }, { "epoch": 2.34, "grad_norm": 0.6885706186294556, "learning_rate": 6.870575064214795e-05, "loss": 3.1584, "step": 47762 }, { "epoch": 2.34, "grad_norm": 0.713989794254303, "learning_rate": 6.869594575432282e-05, "loss": 2.9286, "step": 47763 }, { "epoch": 2.34, "grad_norm": 0.7147015929222107, "learning_rate": 6.868614147570795e-05, "loss": 2.9192, "step": 47764 }, { "epoch": 2.34, "grad_norm": 0.718229353427887, "learning_rate": 6.867633780632934e-05, "loss": 2.8925, "step": 47765 }, { "epoch": 2.34, "grad_norm": 0.7605919241905212, "learning_rate": 6.866653474621268e-05, "loss": 2.8551, "step": 47766 }, { "epoch": 2.34, "grad_norm": 0.7661386728286743, "learning_rate": 6.865673229538384e-05, "loss": 2.9573, "step": 47767 }, { "epoch": 2.34, "grad_norm": 0.7024096846580505, "learning_rate": 6.864693045386872e-05, "loss": 2.832, "step": 47768 }, { "epoch": 2.34, "grad_norm": 0.7051783800125122, "learning_rate": 6.863712922169305e-05, "loss": 2.7672, "step": 47769 }, { "epoch": 2.34, "grad_norm": 0.7214155793190002, "learning_rate": 6.862732859888267e-05, "loss": 2.6812, "step": 47770 }, { "epoch": 2.34, "grad_norm": 0.7348127365112305, "learning_rate": 6.861752858546328e-05, "loss": 2.9103, "step": 47771 }, { "epoch": 2.34, "grad_norm": 0.6963437795639038, "learning_rate": 6.860772918146082e-05, "loss": 2.7701, "step": 47772 }, { "epoch": 2.34, "grad_norm": 0.733690619468689, "learning_rate": 6.859793038690113e-05, "loss": 2.8199, "step": 47773 }, { "epoch": 2.34, "grad_norm": 0.6885663270950317, "learning_rate": 6.858813220180988e-05, "loss": 2.8728, "step": 47774 }, { "epoch": 2.34, "grad_norm": 0.708145797252655, "learning_rate": 6.857833462621303e-05, "loss": 2.7329, "step": 47775 }, { "epoch": 2.34, "grad_norm": 0.71966552734375, "learning_rate": 6.85685376601363e-05, "loss": 2.8923, "step": 47776 }, { "epoch": 2.34, "grad_norm": 0.7468180656433105, "learning_rate": 6.855874130360546e-05, "loss": 2.9768, "step": 47777 }, { "epoch": 2.34, "grad_norm": 0.7484171986579895, "learning_rate": 6.85489455566464e-05, "loss": 2.7614, "step": 47778 }, { "epoch": 2.34, "grad_norm": 0.7327077388763428, "learning_rate": 6.853915041928479e-05, "loss": 3.0613, "step": 47779 }, { "epoch": 2.34, "grad_norm": 0.7350683212280273, "learning_rate": 6.85293558915466e-05, "loss": 2.9889, "step": 47780 }, { "epoch": 2.34, "grad_norm": 0.7374870181083679, "learning_rate": 6.851956197345747e-05, "loss": 2.9741, "step": 47781 }, { "epoch": 2.34, "grad_norm": 0.7582040429115295, "learning_rate": 6.850976866504325e-05, "loss": 2.8146, "step": 47782 }, { "epoch": 2.34, "grad_norm": 0.6711180806159973, "learning_rate": 6.849997596632983e-05, "loss": 3.099, "step": 47783 }, { "epoch": 2.34, "grad_norm": 0.7463443279266357, "learning_rate": 6.849018387734293e-05, "loss": 2.9372, "step": 47784 }, { "epoch": 2.34, "grad_norm": 0.74971604347229, "learning_rate": 6.848039239810832e-05, "loss": 2.8886, "step": 47785 }, { "epoch": 2.34, "grad_norm": 0.7736210227012634, "learning_rate": 6.847060152865173e-05, "loss": 2.7884, "step": 47786 }, { "epoch": 2.34, "grad_norm": 0.8005310297012329, "learning_rate": 6.846081126899903e-05, "loss": 2.9747, "step": 47787 }, { "epoch": 2.34, "grad_norm": 0.7372254133224487, "learning_rate": 6.845102161917604e-05, "loss": 2.6831, "step": 47788 }, { "epoch": 2.34, "grad_norm": 0.7566565275192261, "learning_rate": 6.844123257920845e-05, "loss": 3.0529, "step": 47789 }, { "epoch": 2.34, "grad_norm": 0.726959764957428, "learning_rate": 6.843144414912214e-05, "loss": 2.8369, "step": 47790 }, { "epoch": 2.34, "grad_norm": 0.7393621206283569, "learning_rate": 6.842165632894286e-05, "loss": 3.0986, "step": 47791 }, { "epoch": 2.34, "grad_norm": 0.7415285110473633, "learning_rate": 6.841186911869628e-05, "loss": 2.9621, "step": 47792 }, { "epoch": 2.34, "grad_norm": 0.7591820955276489, "learning_rate": 6.840208251840834e-05, "loss": 2.9591, "step": 47793 }, { "epoch": 2.34, "grad_norm": 0.7299678325653076, "learning_rate": 6.839229652810467e-05, "loss": 2.8123, "step": 47794 }, { "epoch": 2.34, "grad_norm": 0.727067232131958, "learning_rate": 6.838251114781118e-05, "loss": 3.0317, "step": 47795 }, { "epoch": 2.34, "grad_norm": 0.721146285533905, "learning_rate": 6.837272637755351e-05, "loss": 2.9183, "step": 47796 }, { "epoch": 2.34, "grad_norm": 0.7278393507003784, "learning_rate": 6.836294221735761e-05, "loss": 2.8804, "step": 47797 }, { "epoch": 2.34, "grad_norm": 0.6947075724601746, "learning_rate": 6.835315866724911e-05, "loss": 2.8544, "step": 47798 }, { "epoch": 2.34, "grad_norm": 0.7457790970802307, "learning_rate": 6.834337572725372e-05, "loss": 2.9784, "step": 47799 }, { "epoch": 2.34, "grad_norm": 0.7459518313407898, "learning_rate": 6.83335933973974e-05, "loss": 2.8344, "step": 47800 }, { "epoch": 2.34, "grad_norm": 0.7571932673454285, "learning_rate": 6.832381167770574e-05, "loss": 2.968, "step": 47801 }, { "epoch": 2.34, "grad_norm": 0.6742973327636719, "learning_rate": 6.831403056820455e-05, "loss": 2.9231, "step": 47802 }, { "epoch": 2.34, "grad_norm": 0.7140032052993774, "learning_rate": 6.83042500689197e-05, "loss": 2.7132, "step": 47803 }, { "epoch": 2.34, "grad_norm": 0.7056100368499756, "learning_rate": 6.82944701798768e-05, "loss": 2.9182, "step": 47804 }, { "epoch": 2.34, "grad_norm": 0.7132856845855713, "learning_rate": 6.828469090110172e-05, "loss": 2.9506, "step": 47805 }, { "epoch": 2.34, "grad_norm": 0.7402212619781494, "learning_rate": 6.827491223262018e-05, "loss": 2.9935, "step": 47806 }, { "epoch": 2.34, "grad_norm": 0.7678213119506836, "learning_rate": 6.826513417445785e-05, "loss": 2.8589, "step": 47807 }, { "epoch": 2.34, "grad_norm": 0.6965996623039246, "learning_rate": 6.825535672664063e-05, "loss": 2.8132, "step": 47808 }, { "epoch": 2.34, "grad_norm": 0.7514899969100952, "learning_rate": 6.824557988919414e-05, "loss": 2.8645, "step": 47809 }, { "epoch": 2.34, "grad_norm": 0.7812216281890869, "learning_rate": 6.823580366214428e-05, "loss": 2.6188, "step": 47810 }, { "epoch": 2.34, "grad_norm": 0.7255864143371582, "learning_rate": 6.822602804551659e-05, "loss": 2.8784, "step": 47811 }, { "epoch": 2.34, "grad_norm": 0.7047956585884094, "learning_rate": 6.821625303933703e-05, "loss": 2.9166, "step": 47812 }, { "epoch": 2.34, "grad_norm": 0.6879226565361023, "learning_rate": 6.820647864363128e-05, "loss": 2.9923, "step": 47813 }, { "epoch": 2.34, "grad_norm": 0.7026329636573792, "learning_rate": 6.819670485842495e-05, "loss": 2.7337, "step": 47814 }, { "epoch": 2.34, "grad_norm": 0.7578959465026855, "learning_rate": 6.818693168374398e-05, "loss": 2.8791, "step": 47815 }, { "epoch": 2.34, "grad_norm": 0.6991091966629028, "learning_rate": 6.817715911961391e-05, "loss": 2.6325, "step": 47816 }, { "epoch": 2.34, "grad_norm": 0.769070029258728, "learning_rate": 6.81673871660606e-05, "loss": 3.1683, "step": 47817 }, { "epoch": 2.34, "grad_norm": 0.72699373960495, "learning_rate": 6.815761582310986e-05, "loss": 2.7933, "step": 47818 }, { "epoch": 2.34, "grad_norm": 0.756587028503418, "learning_rate": 6.814784509078728e-05, "loss": 2.7654, "step": 47819 }, { "epoch": 2.34, "grad_norm": 0.7062783241271973, "learning_rate": 6.813807496911882e-05, "loss": 2.9737, "step": 47820 }, { "epoch": 2.34, "grad_norm": 0.708519458770752, "learning_rate": 6.812830545812985e-05, "loss": 2.8675, "step": 47821 }, { "epoch": 2.34, "grad_norm": 0.7568124532699585, "learning_rate": 6.811853655784633e-05, "loss": 2.9477, "step": 47822 }, { "epoch": 2.34, "grad_norm": 0.7141517400741577, "learning_rate": 6.810876826829403e-05, "loss": 2.8733, "step": 47823 }, { "epoch": 2.34, "grad_norm": 0.732783854007721, "learning_rate": 6.80990005894985e-05, "loss": 2.923, "step": 47824 }, { "epoch": 2.34, "grad_norm": 0.7289860248565674, "learning_rate": 6.808923352148567e-05, "loss": 2.9246, "step": 47825 }, { "epoch": 2.34, "grad_norm": 0.7409937977790833, "learning_rate": 6.80794670642811e-05, "loss": 3.007, "step": 47826 }, { "epoch": 2.34, "grad_norm": 0.7329813838005066, "learning_rate": 6.806970121791062e-05, "loss": 2.8461, "step": 47827 }, { "epoch": 2.34, "grad_norm": 0.6916979551315308, "learning_rate": 6.805993598239998e-05, "loss": 2.9873, "step": 47828 }, { "epoch": 2.34, "grad_norm": 0.7569551467895508, "learning_rate": 6.805017135777469e-05, "loss": 2.7445, "step": 47829 }, { "epoch": 2.34, "grad_norm": 0.7574961185455322, "learning_rate": 6.804040734406071e-05, "loss": 2.7435, "step": 47830 }, { "epoch": 2.34, "grad_norm": 0.7416138648986816, "learning_rate": 6.803064394128355e-05, "loss": 2.873, "step": 47831 }, { "epoch": 2.34, "grad_norm": 0.7477595210075378, "learning_rate": 6.802088114946908e-05, "loss": 2.9572, "step": 47832 }, { "epoch": 2.34, "grad_norm": 0.6934207677841187, "learning_rate": 6.801111896864301e-05, "loss": 2.7564, "step": 47833 }, { "epoch": 2.34, "grad_norm": 0.7345800995826721, "learning_rate": 6.800135739883103e-05, "loss": 2.9987, "step": 47834 }, { "epoch": 2.34, "grad_norm": 0.7357404232025146, "learning_rate": 6.799159644005882e-05, "loss": 2.8571, "step": 47835 }, { "epoch": 2.34, "grad_norm": 0.7066752314567566, "learning_rate": 6.7981836092352e-05, "loss": 2.9571, "step": 47836 }, { "epoch": 2.34, "grad_norm": 0.766917884349823, "learning_rate": 6.797207635573636e-05, "loss": 2.8954, "step": 47837 }, { "epoch": 2.34, "grad_norm": 0.8036574721336365, "learning_rate": 6.796231723023772e-05, "loss": 2.9591, "step": 47838 }, { "epoch": 2.34, "grad_norm": 0.7077456116676331, "learning_rate": 6.795255871588157e-05, "loss": 2.9171, "step": 47839 }, { "epoch": 2.34, "grad_norm": 0.7191286087036133, "learning_rate": 6.794280081269385e-05, "loss": 2.8068, "step": 47840 }, { "epoch": 2.34, "grad_norm": 0.6867917776107788, "learning_rate": 6.79330435207e-05, "loss": 2.9191, "step": 47841 }, { "epoch": 2.34, "grad_norm": 0.7825203537940979, "learning_rate": 6.792328683992598e-05, "loss": 2.9092, "step": 47842 }, { "epoch": 2.34, "grad_norm": 0.7153095602989197, "learning_rate": 6.791353077039733e-05, "loss": 2.7737, "step": 47843 }, { "epoch": 2.34, "grad_norm": 0.768951416015625, "learning_rate": 6.790377531213969e-05, "loss": 2.8843, "step": 47844 }, { "epoch": 2.34, "grad_norm": 0.8321551084518433, "learning_rate": 6.789402046517896e-05, "loss": 2.6498, "step": 47845 }, { "epoch": 2.34, "grad_norm": 0.726223886013031, "learning_rate": 6.788426622954059e-05, "loss": 2.914, "step": 47846 }, { "epoch": 2.34, "grad_norm": 0.7852296233177185, "learning_rate": 6.787451260525045e-05, "loss": 2.9952, "step": 47847 }, { "epoch": 2.34, "grad_norm": 0.7488097548484802, "learning_rate": 6.786475959233414e-05, "loss": 3.0178, "step": 47848 }, { "epoch": 2.34, "grad_norm": 0.7254324555397034, "learning_rate": 6.785500719081742e-05, "loss": 3.0343, "step": 47849 }, { "epoch": 2.35, "grad_norm": 0.7108405828475952, "learning_rate": 6.784525540072596e-05, "loss": 2.789, "step": 47850 }, { "epoch": 2.35, "grad_norm": 0.7349480986595154, "learning_rate": 6.783550422208534e-05, "loss": 2.6636, "step": 47851 }, { "epoch": 2.35, "grad_norm": 0.7456988096237183, "learning_rate": 6.782575365492138e-05, "loss": 2.9112, "step": 47852 }, { "epoch": 2.35, "grad_norm": 0.7272082567214966, "learning_rate": 6.781600369925962e-05, "loss": 3.1821, "step": 47853 }, { "epoch": 2.35, "grad_norm": 0.7798593640327454, "learning_rate": 6.780625435512585e-05, "loss": 2.9526, "step": 47854 }, { "epoch": 2.35, "grad_norm": 0.7535591721534729, "learning_rate": 6.77965056225458e-05, "loss": 2.8622, "step": 47855 }, { "epoch": 2.35, "grad_norm": 0.680900514125824, "learning_rate": 6.778675750154506e-05, "loss": 2.9729, "step": 47856 }, { "epoch": 2.35, "grad_norm": 0.7286996245384216, "learning_rate": 6.777700999214928e-05, "loss": 3.0362, "step": 47857 }, { "epoch": 2.35, "grad_norm": 0.772244930267334, "learning_rate": 6.776726309438408e-05, "loss": 3.0197, "step": 47858 }, { "epoch": 2.35, "grad_norm": 0.7076226472854614, "learning_rate": 6.775751680827525e-05, "loss": 3.0025, "step": 47859 }, { "epoch": 2.35, "grad_norm": 0.7190698385238647, "learning_rate": 6.774777113384848e-05, "loss": 3.1247, "step": 47860 }, { "epoch": 2.35, "grad_norm": 0.7115904688835144, "learning_rate": 6.773802607112934e-05, "loss": 2.7755, "step": 47861 }, { "epoch": 2.35, "grad_norm": 0.7206274271011353, "learning_rate": 6.772828162014359e-05, "loss": 2.8821, "step": 47862 }, { "epoch": 2.35, "grad_norm": 0.7294838428497314, "learning_rate": 6.771853778091678e-05, "loss": 2.8908, "step": 47863 }, { "epoch": 2.35, "grad_norm": 0.728641927242279, "learning_rate": 6.77087945534747e-05, "loss": 2.984, "step": 47864 }, { "epoch": 2.35, "grad_norm": 0.6940865516662598, "learning_rate": 6.769905193784295e-05, "loss": 2.7064, "step": 47865 }, { "epoch": 2.35, "grad_norm": 0.7145792245864868, "learning_rate": 6.768930993404712e-05, "loss": 2.7834, "step": 47866 }, { "epoch": 2.35, "grad_norm": 0.7666870951652527, "learning_rate": 6.767956854211302e-05, "loss": 2.8824, "step": 47867 }, { "epoch": 2.35, "grad_norm": 0.6863735318183899, "learning_rate": 6.766982776206616e-05, "loss": 2.7799, "step": 47868 }, { "epoch": 2.35, "grad_norm": 0.714529275894165, "learning_rate": 6.766008759393222e-05, "loss": 2.8986, "step": 47869 }, { "epoch": 2.35, "grad_norm": 0.6909911036491394, "learning_rate": 6.7650348037737e-05, "loss": 2.6996, "step": 47870 }, { "epoch": 2.35, "grad_norm": 0.6930615901947021, "learning_rate": 6.764060909350604e-05, "loss": 3.0113, "step": 47871 }, { "epoch": 2.35, "grad_norm": 0.7450976967811584, "learning_rate": 6.7630870761265e-05, "loss": 2.7526, "step": 47872 }, { "epoch": 2.35, "grad_norm": 0.7528846263885498, "learning_rate": 6.762113304103947e-05, "loss": 3.0688, "step": 47873 }, { "epoch": 2.35, "grad_norm": 0.7500634789466858, "learning_rate": 6.761139593285512e-05, "loss": 2.8791, "step": 47874 }, { "epoch": 2.35, "grad_norm": 0.7247843146324158, "learning_rate": 6.760165943673774e-05, "loss": 3.054, "step": 47875 }, { "epoch": 2.35, "grad_norm": 0.6777768731117249, "learning_rate": 6.759192355271272e-05, "loss": 3.0018, "step": 47876 }, { "epoch": 2.35, "grad_norm": 0.7233495116233826, "learning_rate": 6.758218828080598e-05, "loss": 2.7997, "step": 47877 }, { "epoch": 2.35, "grad_norm": 0.7728328108787537, "learning_rate": 6.757245362104303e-05, "loss": 3.1007, "step": 47878 }, { "epoch": 2.35, "grad_norm": 0.6918376684188843, "learning_rate": 6.756271957344938e-05, "loss": 2.9147, "step": 47879 }, { "epoch": 2.35, "grad_norm": 0.6865652799606323, "learning_rate": 6.755298613805092e-05, "loss": 3.0058, "step": 47880 }, { "epoch": 2.35, "grad_norm": 0.7141489386558533, "learning_rate": 6.754325331487308e-05, "loss": 2.8366, "step": 47881 }, { "epoch": 2.35, "grad_norm": 0.7318657636642456, "learning_rate": 6.753352110394163e-05, "loss": 2.7465, "step": 47882 }, { "epoch": 2.35, "grad_norm": 0.7256183624267578, "learning_rate": 6.752378950528206e-05, "loss": 2.975, "step": 47883 }, { "epoch": 2.35, "grad_norm": 0.8105762600898743, "learning_rate": 6.751405851892009e-05, "loss": 2.629, "step": 47884 }, { "epoch": 2.35, "grad_norm": 0.7484055757522583, "learning_rate": 6.750432814488145e-05, "loss": 2.9615, "step": 47885 }, { "epoch": 2.35, "grad_norm": 0.7243873476982117, "learning_rate": 6.749459838319164e-05, "loss": 2.8934, "step": 47886 }, { "epoch": 2.35, "grad_norm": 0.7047001719474792, "learning_rate": 6.748486923387636e-05, "loss": 2.9622, "step": 47887 }, { "epoch": 2.35, "grad_norm": 0.6941709518432617, "learning_rate": 6.747514069696104e-05, "loss": 3.0624, "step": 47888 }, { "epoch": 2.35, "grad_norm": 0.7410936951637268, "learning_rate": 6.74654127724715e-05, "loss": 2.6276, "step": 47889 }, { "epoch": 2.35, "grad_norm": 0.74365234375, "learning_rate": 6.745568546043338e-05, "loss": 3.0654, "step": 47890 }, { "epoch": 2.35, "grad_norm": 0.7380276322364807, "learning_rate": 6.744595876087211e-05, "loss": 2.9712, "step": 47891 }, { "epoch": 2.35, "grad_norm": 0.6733323335647583, "learning_rate": 6.743623267381355e-05, "loss": 2.6858, "step": 47892 }, { "epoch": 2.35, "grad_norm": 0.7377191781997681, "learning_rate": 6.742650719928317e-05, "loss": 2.7803, "step": 47893 }, { "epoch": 2.35, "grad_norm": 0.7515687942504883, "learning_rate": 6.741678233730654e-05, "loss": 3.0201, "step": 47894 }, { "epoch": 2.35, "grad_norm": 0.7286888360977173, "learning_rate": 6.740705808790943e-05, "loss": 3.0113, "step": 47895 }, { "epoch": 2.35, "grad_norm": 0.7062298059463501, "learning_rate": 6.739733445111728e-05, "loss": 2.7799, "step": 47896 }, { "epoch": 2.35, "grad_norm": 0.7720447778701782, "learning_rate": 6.738761142695588e-05, "loss": 2.9522, "step": 47897 }, { "epoch": 2.35, "grad_norm": 0.714799165725708, "learning_rate": 6.737788901545067e-05, "loss": 2.951, "step": 47898 }, { "epoch": 2.35, "grad_norm": 0.7219805717468262, "learning_rate": 6.736816721662737e-05, "loss": 2.8175, "step": 47899 }, { "epoch": 2.35, "grad_norm": 0.7763769626617432, "learning_rate": 6.735844603051158e-05, "loss": 2.9363, "step": 47900 }, { "epoch": 2.35, "grad_norm": 0.7317289113998413, "learning_rate": 6.73487254571288e-05, "loss": 2.9334, "step": 47901 }, { "epoch": 2.35, "grad_norm": 0.7248978018760681, "learning_rate": 6.733900549650475e-05, "loss": 3.0164, "step": 47902 }, { "epoch": 2.35, "grad_norm": 0.7333472967147827, "learning_rate": 6.732928614866494e-05, "loss": 3.0241, "step": 47903 }, { "epoch": 2.35, "grad_norm": 0.7930324673652649, "learning_rate": 6.731956741363498e-05, "loss": 2.8516, "step": 47904 }, { "epoch": 2.35, "grad_norm": 0.7575118541717529, "learning_rate": 6.730984929144058e-05, "loss": 2.876, "step": 47905 }, { "epoch": 2.35, "grad_norm": 0.7359734773635864, "learning_rate": 6.730013178210717e-05, "loss": 2.8377, "step": 47906 }, { "epoch": 2.35, "grad_norm": 0.7023991942405701, "learning_rate": 6.729041488566054e-05, "loss": 2.9254, "step": 47907 }, { "epoch": 2.35, "grad_norm": 0.7352596521377563, "learning_rate": 6.728069860212616e-05, "loss": 2.9139, "step": 47908 }, { "epoch": 2.35, "grad_norm": 0.6935974359512329, "learning_rate": 6.727098293152952e-05, "loss": 2.9124, "step": 47909 }, { "epoch": 2.35, "grad_norm": 0.7225365042686462, "learning_rate": 6.726126787389645e-05, "loss": 2.8963, "step": 47910 }, { "epoch": 2.35, "grad_norm": 0.7281026840209961, "learning_rate": 6.725155342925228e-05, "loss": 2.9505, "step": 47911 }, { "epoch": 2.35, "grad_norm": 0.7165417075157166, "learning_rate": 6.724183959762285e-05, "loss": 2.8035, "step": 47912 }, { "epoch": 2.35, "grad_norm": 0.7453783750534058, "learning_rate": 6.72321263790335e-05, "loss": 2.9435, "step": 47913 }, { "epoch": 2.35, "grad_norm": 0.7814623117446899, "learning_rate": 6.722241377351006e-05, "loss": 2.9833, "step": 47914 }, { "epoch": 2.35, "grad_norm": 0.7536753416061401, "learning_rate": 6.721270178107794e-05, "loss": 2.8194, "step": 47915 }, { "epoch": 2.35, "grad_norm": 0.7346058487892151, "learning_rate": 6.72029904017627e-05, "loss": 2.7661, "step": 47916 }, { "epoch": 2.35, "grad_norm": 0.7435902953147888, "learning_rate": 6.719327963559005e-05, "loss": 2.7599, "step": 47917 }, { "epoch": 2.35, "grad_norm": 0.7150677442550659, "learning_rate": 6.718356948258543e-05, "loss": 2.984, "step": 47918 }, { "epoch": 2.35, "grad_norm": 0.7524111270904541, "learning_rate": 6.717385994277446e-05, "loss": 2.838, "step": 47919 }, { "epoch": 2.35, "grad_norm": 0.7796128988265991, "learning_rate": 6.716415101618284e-05, "loss": 2.7455, "step": 47920 }, { "epoch": 2.35, "grad_norm": 0.7386764883995056, "learning_rate": 6.715444270283598e-05, "loss": 2.9067, "step": 47921 }, { "epoch": 2.35, "grad_norm": 0.7747188806533813, "learning_rate": 6.71447350027596e-05, "loss": 2.8656, "step": 47922 }, { "epoch": 2.35, "grad_norm": 0.7116460800170898, "learning_rate": 6.713502791597906e-05, "loss": 2.8489, "step": 47923 }, { "epoch": 2.35, "grad_norm": 0.7225497961044312, "learning_rate": 6.712532144252014e-05, "loss": 2.9257, "step": 47924 }, { "epoch": 2.35, "grad_norm": 0.7185220122337341, "learning_rate": 6.711561558240818e-05, "loss": 2.9328, "step": 47925 }, { "epoch": 2.35, "grad_norm": 0.7107573747634888, "learning_rate": 6.710591033566888e-05, "loss": 2.8896, "step": 47926 }, { "epoch": 2.35, "grad_norm": 0.6854578852653503, "learning_rate": 6.709620570232787e-05, "loss": 3.0585, "step": 47927 }, { "epoch": 2.35, "grad_norm": 0.7601986527442932, "learning_rate": 6.708650168241055e-05, "loss": 2.7619, "step": 47928 }, { "epoch": 2.35, "grad_norm": 0.7219326496124268, "learning_rate": 6.707679827594266e-05, "loss": 3.0518, "step": 47929 }, { "epoch": 2.35, "grad_norm": 0.7219486236572266, "learning_rate": 6.706709548294963e-05, "loss": 2.8814, "step": 47930 }, { "epoch": 2.35, "grad_norm": 0.7470270395278931, "learning_rate": 6.705739330345699e-05, "loss": 2.8936, "step": 47931 }, { "epoch": 2.35, "grad_norm": 0.7633888125419617, "learning_rate": 6.70476917374904e-05, "loss": 3.121, "step": 47932 }, { "epoch": 2.35, "grad_norm": 0.7289944291114807, "learning_rate": 6.703799078507527e-05, "loss": 2.9113, "step": 47933 }, { "epoch": 2.35, "grad_norm": 0.7318068742752075, "learning_rate": 6.702829044623736e-05, "loss": 2.9518, "step": 47934 }, { "epoch": 2.35, "grad_norm": 0.6930985450744629, "learning_rate": 6.701859072100197e-05, "loss": 3.0098, "step": 47935 }, { "epoch": 2.35, "grad_norm": 0.691349983215332, "learning_rate": 6.700889160939488e-05, "loss": 3.0021, "step": 47936 }, { "epoch": 2.35, "grad_norm": 0.6962461471557617, "learning_rate": 6.699919311144151e-05, "loss": 2.9648, "step": 47937 }, { "epoch": 2.35, "grad_norm": 0.6997219324111938, "learning_rate": 6.698949522716737e-05, "loss": 3.0215, "step": 47938 }, { "epoch": 2.35, "grad_norm": 0.7151573896408081, "learning_rate": 6.69797979565981e-05, "loss": 2.9138, "step": 47939 }, { "epoch": 2.35, "grad_norm": 0.7079306840896606, "learning_rate": 6.697010129975917e-05, "loss": 2.7507, "step": 47940 }, { "epoch": 2.35, "grad_norm": 0.7613853216171265, "learning_rate": 6.69604052566761e-05, "loss": 2.7584, "step": 47941 }, { "epoch": 2.35, "grad_norm": 0.7670363187789917, "learning_rate": 6.695070982737456e-05, "loss": 2.9046, "step": 47942 }, { "epoch": 2.35, "grad_norm": 0.7608156204223633, "learning_rate": 6.694101501187993e-05, "loss": 2.9756, "step": 47943 }, { "epoch": 2.35, "grad_norm": 0.7187494039535522, "learning_rate": 6.693132081021788e-05, "loss": 2.8245, "step": 47944 }, { "epoch": 2.35, "grad_norm": 0.766250729560852, "learning_rate": 6.69216272224139e-05, "loss": 2.7639, "step": 47945 }, { "epoch": 2.35, "grad_norm": 0.7323248386383057, "learning_rate": 6.69119342484934e-05, "loss": 2.975, "step": 47946 }, { "epoch": 2.35, "grad_norm": 0.7261918187141418, "learning_rate": 6.690224188848208e-05, "loss": 3.0653, "step": 47947 }, { "epoch": 2.35, "grad_norm": 0.7363904118537903, "learning_rate": 6.689255014240533e-05, "loss": 2.8412, "step": 47948 }, { "epoch": 2.35, "grad_norm": 0.7276012301445007, "learning_rate": 6.688285901028883e-05, "loss": 3.0029, "step": 47949 }, { "epoch": 2.35, "grad_norm": 0.7301309108734131, "learning_rate": 6.68731684921579e-05, "loss": 2.9247, "step": 47950 }, { "epoch": 2.35, "grad_norm": 0.7242274880409241, "learning_rate": 6.686347858803827e-05, "loss": 2.767, "step": 47951 }, { "epoch": 2.35, "grad_norm": 0.7706395387649536, "learning_rate": 6.685378929795541e-05, "loss": 3.0477, "step": 47952 }, { "epoch": 2.35, "grad_norm": 0.7164594531059265, "learning_rate": 6.684410062193467e-05, "loss": 2.8957, "step": 47953 }, { "epoch": 2.35, "grad_norm": 0.7162006497383118, "learning_rate": 6.683441256000182e-05, "loss": 2.9765, "step": 47954 }, { "epoch": 2.35, "grad_norm": 0.7521647214889526, "learning_rate": 6.682472511218213e-05, "loss": 2.8028, "step": 47955 }, { "epoch": 2.35, "grad_norm": 0.7329918146133423, "learning_rate": 6.681503827850124e-05, "loss": 3.0094, "step": 47956 }, { "epoch": 2.35, "grad_norm": 0.7082628011703491, "learning_rate": 6.680535205898476e-05, "loss": 3.1608, "step": 47957 }, { "epoch": 2.35, "grad_norm": 0.721556544303894, "learning_rate": 6.679566645365813e-05, "loss": 2.7341, "step": 47958 }, { "epoch": 2.35, "grad_norm": 0.7148202657699585, "learning_rate": 6.67859814625468e-05, "loss": 3.0916, "step": 47959 }, { "epoch": 2.35, "grad_norm": 0.7291754484176636, "learning_rate": 6.677629708567624e-05, "loss": 2.8334, "step": 47960 }, { "epoch": 2.35, "grad_norm": 0.8292950391769409, "learning_rate": 6.676661332307201e-05, "loss": 3.0277, "step": 47961 }, { "epoch": 2.35, "grad_norm": 0.7232643365859985, "learning_rate": 6.675693017475972e-05, "loss": 2.8574, "step": 47962 }, { "epoch": 2.35, "grad_norm": 0.7282116413116455, "learning_rate": 6.674724764076471e-05, "loss": 3.191, "step": 47963 }, { "epoch": 2.35, "grad_norm": 0.7802262902259827, "learning_rate": 6.673756572111268e-05, "loss": 2.9908, "step": 47964 }, { "epoch": 2.35, "grad_norm": 0.732752799987793, "learning_rate": 6.672788441582886e-05, "loss": 2.8625, "step": 47965 }, { "epoch": 2.35, "grad_norm": 0.7601721286773682, "learning_rate": 6.671820372493901e-05, "loss": 2.9282, "step": 47966 }, { "epoch": 2.35, "grad_norm": 0.7560071349143982, "learning_rate": 6.670852364846855e-05, "loss": 3.0195, "step": 47967 }, { "epoch": 2.35, "grad_norm": 0.694574236869812, "learning_rate": 6.669884418644281e-05, "loss": 3.183, "step": 47968 }, { "epoch": 2.35, "grad_norm": 0.7665233016014099, "learning_rate": 6.668916533888752e-05, "loss": 3.1757, "step": 47969 }, { "epoch": 2.35, "grad_norm": 0.7764652371406555, "learning_rate": 6.667948710582798e-05, "loss": 2.9517, "step": 47970 }, { "epoch": 2.35, "grad_norm": 0.7054910063743591, "learning_rate": 6.666980948728978e-05, "loss": 2.9967, "step": 47971 }, { "epoch": 2.35, "grad_norm": 0.7566725611686707, "learning_rate": 6.666013248329845e-05, "loss": 2.9945, "step": 47972 }, { "epoch": 2.35, "grad_norm": 0.7435179948806763, "learning_rate": 6.665045609387944e-05, "loss": 2.9586, "step": 47973 }, { "epoch": 2.35, "grad_norm": 0.6983252167701721, "learning_rate": 6.664078031905822e-05, "loss": 2.8248, "step": 47974 }, { "epoch": 2.35, "grad_norm": 0.7251109480857849, "learning_rate": 6.663110515886021e-05, "loss": 3.0567, "step": 47975 }, { "epoch": 2.35, "grad_norm": 0.7957937717437744, "learning_rate": 6.662143061331093e-05, "loss": 2.6457, "step": 47976 }, { "epoch": 2.35, "grad_norm": 0.7234058380126953, "learning_rate": 6.661175668243596e-05, "loss": 3.1194, "step": 47977 }, { "epoch": 2.35, "grad_norm": 0.7288504242897034, "learning_rate": 6.660208336626066e-05, "loss": 2.9923, "step": 47978 }, { "epoch": 2.35, "grad_norm": 0.7363245487213135, "learning_rate": 6.659241066481063e-05, "loss": 3.0909, "step": 47979 }, { "epoch": 2.35, "grad_norm": 0.7586546540260315, "learning_rate": 6.658273857811123e-05, "loss": 2.919, "step": 47980 }, { "epoch": 2.35, "grad_norm": 0.7015671730041504, "learning_rate": 6.657306710618794e-05, "loss": 2.8089, "step": 47981 }, { "epoch": 2.35, "grad_norm": 0.7294623851776123, "learning_rate": 6.656339624906633e-05, "loss": 3.0357, "step": 47982 }, { "epoch": 2.35, "grad_norm": 0.7198174595832825, "learning_rate": 6.655372600677171e-05, "loss": 2.9374, "step": 47983 }, { "epoch": 2.35, "grad_norm": 0.7496002316474915, "learning_rate": 6.654405637932976e-05, "loss": 2.7852, "step": 47984 }, { "epoch": 2.35, "grad_norm": 0.71015465259552, "learning_rate": 6.653438736676575e-05, "loss": 2.9953, "step": 47985 }, { "epoch": 2.35, "grad_norm": 0.7362428307533264, "learning_rate": 6.65247189691052e-05, "loss": 2.6275, "step": 47986 }, { "epoch": 2.35, "grad_norm": 0.7148375511169434, "learning_rate": 6.651505118637374e-05, "loss": 2.952, "step": 47987 }, { "epoch": 2.35, "grad_norm": 0.7447728514671326, "learning_rate": 6.650538401859665e-05, "loss": 3.0321, "step": 47988 }, { "epoch": 2.35, "grad_norm": 0.716418981552124, "learning_rate": 6.649571746579946e-05, "loss": 2.7179, "step": 47989 }, { "epoch": 2.35, "grad_norm": 0.7445757389068604, "learning_rate": 6.648605152800753e-05, "loss": 2.9258, "step": 47990 }, { "epoch": 2.35, "grad_norm": 0.7147215008735657, "learning_rate": 6.647638620524642e-05, "loss": 2.9995, "step": 47991 }, { "epoch": 2.35, "grad_norm": 0.7629702091217041, "learning_rate": 6.646672149754161e-05, "loss": 2.941, "step": 47992 }, { "epoch": 2.35, "grad_norm": 0.6876111030578613, "learning_rate": 6.645705740491847e-05, "loss": 2.9911, "step": 47993 }, { "epoch": 2.35, "grad_norm": 0.7215240597724915, "learning_rate": 6.644739392740255e-05, "loss": 2.8185, "step": 47994 }, { "epoch": 2.35, "grad_norm": 0.7500731348991394, "learning_rate": 6.643773106501925e-05, "loss": 2.9694, "step": 47995 }, { "epoch": 2.35, "grad_norm": 0.7356647253036499, "learning_rate": 6.642806881779392e-05, "loss": 2.8889, "step": 47996 }, { "epoch": 2.35, "grad_norm": 0.7396929860115051, "learning_rate": 6.641840718575221e-05, "loss": 2.9491, "step": 47997 }, { "epoch": 2.35, "grad_norm": 0.7047430276870728, "learning_rate": 6.640874616891938e-05, "loss": 2.9185, "step": 47998 }, { "epoch": 2.35, "grad_norm": 0.7342384457588196, "learning_rate": 6.639908576732105e-05, "loss": 2.8062, "step": 47999 }, { "epoch": 2.35, "grad_norm": 0.7048220634460449, "learning_rate": 6.638942598098245e-05, "loss": 3.0256, "step": 48000 }, { "epoch": 2.35, "grad_norm": 0.6992772817611694, "learning_rate": 6.637976680992926e-05, "loss": 2.8008, "step": 48001 }, { "epoch": 2.35, "grad_norm": 0.7305018305778503, "learning_rate": 6.63701082541868e-05, "loss": 2.9617, "step": 48002 }, { "epoch": 2.35, "grad_norm": 0.7258499264717102, "learning_rate": 6.636045031378041e-05, "loss": 2.9443, "step": 48003 }, { "epoch": 2.35, "grad_norm": 0.7828282713890076, "learning_rate": 6.635079298873572e-05, "loss": 3.0035, "step": 48004 }, { "epoch": 2.35, "grad_norm": 0.7275944948196411, "learning_rate": 6.634113627907798e-05, "loss": 2.8979, "step": 48005 }, { "epoch": 2.35, "grad_norm": 0.7850326299667358, "learning_rate": 6.633148018483281e-05, "loss": 3.0463, "step": 48006 }, { "epoch": 2.35, "grad_norm": 0.7134331464767456, "learning_rate": 6.63218247060255e-05, "loss": 2.8976, "step": 48007 }, { "epoch": 2.35, "grad_norm": 0.7655074596405029, "learning_rate": 6.631216984268149e-05, "loss": 2.9853, "step": 48008 }, { "epoch": 2.35, "grad_norm": 0.7371658682823181, "learning_rate": 6.630251559482633e-05, "loss": 2.8503, "step": 48009 }, { "epoch": 2.35, "grad_norm": 0.7096714973449707, "learning_rate": 6.629286196248539e-05, "loss": 2.905, "step": 48010 }, { "epoch": 2.35, "grad_norm": 0.7324158549308777, "learning_rate": 6.628320894568407e-05, "loss": 3.0072, "step": 48011 }, { "epoch": 2.35, "grad_norm": 0.7133394479751587, "learning_rate": 6.627355654444774e-05, "loss": 2.8081, "step": 48012 }, { "epoch": 2.35, "grad_norm": 0.7191671133041382, "learning_rate": 6.626390475880183e-05, "loss": 2.7956, "step": 48013 }, { "epoch": 2.35, "grad_norm": 0.7937895655632019, "learning_rate": 6.625425358877194e-05, "loss": 2.8923, "step": 48014 }, { "epoch": 2.35, "grad_norm": 0.7353552579879761, "learning_rate": 6.624460303438325e-05, "loss": 2.9375, "step": 48015 }, { "epoch": 2.35, "grad_norm": 0.7080419063568115, "learning_rate": 6.623495309566138e-05, "loss": 2.9092, "step": 48016 }, { "epoch": 2.35, "grad_norm": 0.7076489925384521, "learning_rate": 6.622530377263164e-05, "loss": 3.1517, "step": 48017 }, { "epoch": 2.35, "grad_norm": 0.7452599406242371, "learning_rate": 6.621565506531939e-05, "loss": 2.9966, "step": 48018 }, { "epoch": 2.35, "grad_norm": 0.7066365480422974, "learning_rate": 6.620600697375019e-05, "loss": 2.7327, "step": 48019 }, { "epoch": 2.35, "grad_norm": 0.6905827522277832, "learning_rate": 6.619635949794929e-05, "loss": 3.0837, "step": 48020 }, { "epoch": 2.35, "grad_norm": 0.7231424450874329, "learning_rate": 6.618671263794226e-05, "loss": 3.1329, "step": 48021 }, { "epoch": 2.35, "grad_norm": 0.7269807457923889, "learning_rate": 6.617706639375437e-05, "loss": 2.7666, "step": 48022 }, { "epoch": 2.35, "grad_norm": 0.7062513828277588, "learning_rate": 6.616742076541117e-05, "loss": 2.7635, "step": 48023 }, { "epoch": 2.35, "grad_norm": 0.7632284760475159, "learning_rate": 6.615777575293797e-05, "loss": 2.9912, "step": 48024 }, { "epoch": 2.35, "grad_norm": 0.7120453715324402, "learning_rate": 6.614813135636008e-05, "loss": 3.2382, "step": 48025 }, { "epoch": 2.35, "grad_norm": 0.7771921157836914, "learning_rate": 6.61384875757031e-05, "loss": 2.8899, "step": 48026 }, { "epoch": 2.35, "grad_norm": 0.71280837059021, "learning_rate": 6.612884441099229e-05, "loss": 2.8302, "step": 48027 }, { "epoch": 2.35, "grad_norm": 0.7356342077255249, "learning_rate": 6.611920186225305e-05, "loss": 3.1114, "step": 48028 }, { "epoch": 2.35, "grad_norm": 0.7350482940673828, "learning_rate": 6.610955992951092e-05, "loss": 3.127, "step": 48029 }, { "epoch": 2.35, "grad_norm": 0.7893431782722473, "learning_rate": 6.60999186127911e-05, "loss": 2.8438, "step": 48030 }, { "epoch": 2.35, "grad_norm": 0.716373085975647, "learning_rate": 6.609027791211919e-05, "loss": 2.901, "step": 48031 }, { "epoch": 2.35, "grad_norm": 0.7196996212005615, "learning_rate": 6.608063782752045e-05, "loss": 2.7733, "step": 48032 }, { "epoch": 2.35, "grad_norm": 0.7472556829452515, "learning_rate": 6.607099835902023e-05, "loss": 3.0575, "step": 48033 }, { "epoch": 2.35, "grad_norm": 0.7139104604721069, "learning_rate": 6.606135950664403e-05, "loss": 2.9093, "step": 48034 }, { "epoch": 2.35, "grad_norm": 0.8291552662849426, "learning_rate": 6.605172127041715e-05, "loss": 2.989, "step": 48035 }, { "epoch": 2.35, "grad_norm": 0.7301540970802307, "learning_rate": 6.604208365036507e-05, "loss": 2.89, "step": 48036 }, { "epoch": 2.35, "grad_norm": 0.7189569473266602, "learning_rate": 6.603244664651301e-05, "loss": 2.8514, "step": 48037 }, { "epoch": 2.35, "grad_norm": 0.7034281492233276, "learning_rate": 6.602281025888656e-05, "loss": 2.8714, "step": 48038 }, { "epoch": 2.35, "grad_norm": 0.7544010281562805, "learning_rate": 6.601317448751101e-05, "loss": 2.8995, "step": 48039 }, { "epoch": 2.35, "grad_norm": 0.6748170256614685, "learning_rate": 6.600353933241164e-05, "loss": 2.6243, "step": 48040 }, { "epoch": 2.35, "grad_norm": 0.685340940952301, "learning_rate": 6.599390479361398e-05, "loss": 2.7344, "step": 48041 }, { "epoch": 2.35, "grad_norm": 0.6887692213058472, "learning_rate": 6.598427087114326e-05, "loss": 3.0116, "step": 48042 }, { "epoch": 2.35, "grad_norm": 0.7316964864730835, "learning_rate": 6.597463756502492e-05, "loss": 3.0474, "step": 48043 }, { "epoch": 2.35, "grad_norm": 0.7430610060691833, "learning_rate": 6.596500487528447e-05, "loss": 2.7318, "step": 48044 }, { "epoch": 2.35, "grad_norm": 0.71198570728302, "learning_rate": 6.595537280194704e-05, "loss": 2.9316, "step": 48045 }, { "epoch": 2.35, "grad_norm": 0.720703661441803, "learning_rate": 6.594574134503822e-05, "loss": 3.1858, "step": 48046 }, { "epoch": 2.35, "grad_norm": 0.7150319218635559, "learning_rate": 6.593611050458325e-05, "loss": 2.8819, "step": 48047 }, { "epoch": 2.35, "grad_norm": 0.7347413897514343, "learning_rate": 6.592648028060746e-05, "loss": 3.1495, "step": 48048 }, { "epoch": 2.35, "grad_norm": 0.7172079682350159, "learning_rate": 6.591685067313634e-05, "loss": 2.856, "step": 48049 }, { "epoch": 2.35, "grad_norm": 0.7229471802711487, "learning_rate": 6.59072216821951e-05, "loss": 2.9738, "step": 48050 }, { "epoch": 2.35, "grad_norm": 0.7183575630187988, "learning_rate": 6.589759330780926e-05, "loss": 3.0105, "step": 48051 }, { "epoch": 2.35, "grad_norm": 0.7318481802940369, "learning_rate": 6.588796555000403e-05, "loss": 2.8139, "step": 48052 }, { "epoch": 2.35, "grad_norm": 0.7228243350982666, "learning_rate": 6.587833840880493e-05, "loss": 2.8365, "step": 48053 }, { "epoch": 2.36, "grad_norm": 0.7609462738037109, "learning_rate": 6.586871188423724e-05, "loss": 2.6898, "step": 48054 }, { "epoch": 2.36, "grad_norm": 0.7056893110275269, "learning_rate": 6.585908597632622e-05, "loss": 2.9007, "step": 48055 }, { "epoch": 2.36, "grad_norm": 0.7319220304489136, "learning_rate": 6.584946068509737e-05, "loss": 2.9661, "step": 48056 }, { "epoch": 2.36, "grad_norm": 0.7054177522659302, "learning_rate": 6.583983601057593e-05, "loss": 2.7722, "step": 48057 }, { "epoch": 2.36, "grad_norm": 0.7039554119110107, "learning_rate": 6.583021195278726e-05, "loss": 2.7155, "step": 48058 }, { "epoch": 2.36, "grad_norm": 0.7091907858848572, "learning_rate": 6.582058851175683e-05, "loss": 3.0179, "step": 48059 }, { "epoch": 2.36, "grad_norm": 0.7543073892593384, "learning_rate": 6.581096568750993e-05, "loss": 2.9976, "step": 48060 }, { "epoch": 2.36, "grad_norm": 0.7862129211425781, "learning_rate": 6.580134348007183e-05, "loss": 2.6822, "step": 48061 }, { "epoch": 2.36, "grad_norm": 0.7358943223953247, "learning_rate": 6.579172188946789e-05, "loss": 2.8732, "step": 48062 }, { "epoch": 2.36, "grad_norm": 0.7497488260269165, "learning_rate": 6.578210091572344e-05, "loss": 2.9566, "step": 48063 }, { "epoch": 2.36, "grad_norm": 0.6996098160743713, "learning_rate": 6.577248055886396e-05, "loss": 3.085, "step": 48064 }, { "epoch": 2.36, "grad_norm": 0.7074849009513855, "learning_rate": 6.576286081891462e-05, "loss": 2.998, "step": 48065 }, { "epoch": 2.36, "grad_norm": 0.7271658182144165, "learning_rate": 6.575324169590086e-05, "loss": 3.0539, "step": 48066 }, { "epoch": 2.36, "grad_norm": 0.8061689138412476, "learning_rate": 6.574362318984792e-05, "loss": 2.6798, "step": 48067 }, { "epoch": 2.36, "grad_norm": 0.7243710160255432, "learning_rate": 6.573400530078129e-05, "loss": 2.8028, "step": 48068 }, { "epoch": 2.36, "grad_norm": 0.7632570862770081, "learning_rate": 6.57243880287262e-05, "loss": 2.8476, "step": 48069 }, { "epoch": 2.36, "grad_norm": 0.7301486134529114, "learning_rate": 6.571477137370788e-05, "loss": 2.9998, "step": 48070 }, { "epoch": 2.36, "grad_norm": 0.7374549508094788, "learning_rate": 6.570515533575188e-05, "loss": 3.0035, "step": 48071 }, { "epoch": 2.36, "grad_norm": 0.7524304986000061, "learning_rate": 6.569553991488332e-05, "loss": 2.8548, "step": 48072 }, { "epoch": 2.36, "grad_norm": 0.706081748008728, "learning_rate": 6.568592511112759e-05, "loss": 2.8905, "step": 48073 }, { "epoch": 2.36, "grad_norm": 0.7375027537345886, "learning_rate": 6.567631092451014e-05, "loss": 3.1871, "step": 48074 }, { "epoch": 2.36, "grad_norm": 0.7398857474327087, "learning_rate": 6.566669735505619e-05, "loss": 2.9136, "step": 48075 }, { "epoch": 2.36, "grad_norm": 0.7311112880706787, "learning_rate": 6.565708440279106e-05, "loss": 2.6949, "step": 48076 }, { "epoch": 2.36, "grad_norm": 0.7611480355262756, "learning_rate": 6.564747206773996e-05, "loss": 3.016, "step": 48077 }, { "epoch": 2.36, "grad_norm": 0.7483656406402588, "learning_rate": 6.563786034992843e-05, "loss": 3.0053, "step": 48078 }, { "epoch": 2.36, "grad_norm": 0.7387386560440063, "learning_rate": 6.562824924938162e-05, "loss": 2.9809, "step": 48079 }, { "epoch": 2.36, "grad_norm": 0.745112955570221, "learning_rate": 6.561863876612484e-05, "loss": 2.9586, "step": 48080 }, { "epoch": 2.36, "grad_norm": 0.7196276783943176, "learning_rate": 6.560902890018357e-05, "loss": 3.2109, "step": 48081 }, { "epoch": 2.36, "grad_norm": 0.7283993363380432, "learning_rate": 6.559941965158297e-05, "loss": 3.1374, "step": 48082 }, { "epoch": 2.36, "grad_norm": 0.7114156484603882, "learning_rate": 6.558981102034844e-05, "loss": 3.015, "step": 48083 }, { "epoch": 2.36, "grad_norm": 0.7603095769882202, "learning_rate": 6.55802030065051e-05, "loss": 2.9397, "step": 48084 }, { "epoch": 2.36, "grad_norm": 0.6935713291168213, "learning_rate": 6.557059561007843e-05, "loss": 3.1052, "step": 48085 }, { "epoch": 2.36, "grad_norm": 0.6886460185050964, "learning_rate": 6.556098883109378e-05, "loss": 2.9214, "step": 48086 }, { "epoch": 2.36, "grad_norm": 0.7370249032974243, "learning_rate": 6.555138266957627e-05, "loss": 2.9907, "step": 48087 }, { "epoch": 2.36, "grad_norm": 0.7281854748725891, "learning_rate": 6.554177712555138e-05, "loss": 2.7215, "step": 48088 }, { "epoch": 2.36, "grad_norm": 0.7895329594612122, "learning_rate": 6.553217219904424e-05, "loss": 2.8622, "step": 48089 }, { "epoch": 2.36, "grad_norm": 0.7070068717002869, "learning_rate": 6.552256789008031e-05, "loss": 2.8635, "step": 48090 }, { "epoch": 2.36, "grad_norm": 0.8132027387619019, "learning_rate": 6.551296419868481e-05, "loss": 2.7608, "step": 48091 }, { "epoch": 2.36, "grad_norm": 0.7146351933479309, "learning_rate": 6.5503361124883e-05, "loss": 2.5896, "step": 48092 }, { "epoch": 2.36, "grad_norm": 0.7069380879402161, "learning_rate": 6.549375866870023e-05, "loss": 3.038, "step": 48093 }, { "epoch": 2.36, "grad_norm": 0.7371593713760376, "learning_rate": 6.548415683016169e-05, "loss": 2.9843, "step": 48094 }, { "epoch": 2.36, "grad_norm": 0.7395793795585632, "learning_rate": 6.547455560929278e-05, "loss": 2.6887, "step": 48095 }, { "epoch": 2.36, "grad_norm": 0.7200589179992676, "learning_rate": 6.546495500611882e-05, "loss": 2.8007, "step": 48096 }, { "epoch": 2.36, "grad_norm": 0.7023578882217407, "learning_rate": 6.545535502066505e-05, "loss": 2.632, "step": 48097 }, { "epoch": 2.36, "grad_norm": 0.7165631055831909, "learning_rate": 6.544575565295672e-05, "loss": 2.9052, "step": 48098 }, { "epoch": 2.36, "grad_norm": 0.7894178032875061, "learning_rate": 6.543615690301903e-05, "loss": 2.7053, "step": 48099 }, { "epoch": 2.36, "grad_norm": 0.6898087859153748, "learning_rate": 6.54265587708774e-05, "loss": 2.7166, "step": 48100 }, { "epoch": 2.36, "grad_norm": 0.7402461171150208, "learning_rate": 6.541696125655711e-05, "loss": 3.0452, "step": 48101 }, { "epoch": 2.36, "grad_norm": 0.7221176028251648, "learning_rate": 6.540736436008333e-05, "loss": 2.9999, "step": 48102 }, { "epoch": 2.36, "grad_norm": 0.7226662039756775, "learning_rate": 6.539776808148148e-05, "loss": 2.8164, "step": 48103 }, { "epoch": 2.36, "grad_norm": 0.7799202799797058, "learning_rate": 6.538817242077676e-05, "loss": 2.9798, "step": 48104 }, { "epoch": 2.36, "grad_norm": 0.7280610203742981, "learning_rate": 6.537857737799438e-05, "loss": 2.9594, "step": 48105 }, { "epoch": 2.36, "grad_norm": 0.7227134108543396, "learning_rate": 6.536898295315975e-05, "loss": 2.6752, "step": 48106 }, { "epoch": 2.36, "grad_norm": 0.6850351691246033, "learning_rate": 6.535938914629797e-05, "loss": 2.8372, "step": 48107 }, { "epoch": 2.36, "grad_norm": 0.716296911239624, "learning_rate": 6.534979595743449e-05, "loss": 2.9671, "step": 48108 }, { "epoch": 2.36, "grad_norm": 0.6905850172042847, "learning_rate": 6.534020338659439e-05, "loss": 2.728, "step": 48109 }, { "epoch": 2.36, "grad_norm": 0.7352511882781982, "learning_rate": 6.533061143380304e-05, "loss": 2.9374, "step": 48110 }, { "epoch": 2.36, "grad_norm": 0.7677817940711975, "learning_rate": 6.53210200990858e-05, "loss": 2.7527, "step": 48111 }, { "epoch": 2.36, "grad_norm": 0.7257785201072693, "learning_rate": 6.531142938246781e-05, "loss": 3.1291, "step": 48112 }, { "epoch": 2.36, "grad_norm": 0.7301912903785706, "learning_rate": 6.530183928397435e-05, "loss": 2.7511, "step": 48113 }, { "epoch": 2.36, "grad_norm": 0.7411414384841919, "learning_rate": 6.529224980363062e-05, "loss": 2.822, "step": 48114 }, { "epoch": 2.36, "grad_norm": 0.7773602604866028, "learning_rate": 6.528266094146191e-05, "loss": 2.8123, "step": 48115 }, { "epoch": 2.36, "grad_norm": 0.7311223149299622, "learning_rate": 6.527307269749358e-05, "loss": 2.9162, "step": 48116 }, { "epoch": 2.36, "grad_norm": 0.7010335922241211, "learning_rate": 6.526348507175073e-05, "loss": 2.9099, "step": 48117 }, { "epoch": 2.36, "grad_norm": 0.7506341338157654, "learning_rate": 6.525389806425875e-05, "loss": 2.963, "step": 48118 }, { "epoch": 2.36, "grad_norm": 0.7527403235435486, "learning_rate": 6.524431167504283e-05, "loss": 2.9887, "step": 48119 }, { "epoch": 2.36, "grad_norm": 0.7092116475105286, "learning_rate": 6.523472590412814e-05, "loss": 2.7487, "step": 48120 }, { "epoch": 2.36, "grad_norm": 0.7055248618125916, "learning_rate": 6.522514075154006e-05, "loss": 3.0072, "step": 48121 }, { "epoch": 2.36, "grad_norm": 0.7026886940002441, "learning_rate": 6.521555621730372e-05, "loss": 2.7419, "step": 48122 }, { "epoch": 2.36, "grad_norm": 0.6752901673316956, "learning_rate": 6.52059723014445e-05, "loss": 2.918, "step": 48123 }, { "epoch": 2.36, "grad_norm": 0.715085506439209, "learning_rate": 6.519638900398747e-05, "loss": 2.9607, "step": 48124 }, { "epoch": 2.36, "grad_norm": 0.7532249093055725, "learning_rate": 6.518680632495805e-05, "loss": 2.8491, "step": 48125 }, { "epoch": 2.36, "grad_norm": 0.7319445610046387, "learning_rate": 6.517722426438139e-05, "loss": 2.9336, "step": 48126 }, { "epoch": 2.36, "grad_norm": 0.7512108683586121, "learning_rate": 6.516764282228264e-05, "loss": 2.6582, "step": 48127 }, { "epoch": 2.36, "grad_norm": 0.7304036021232605, "learning_rate": 6.515806199868722e-05, "loss": 3.0088, "step": 48128 }, { "epoch": 2.36, "grad_norm": 0.7679119110107422, "learning_rate": 6.514848179362017e-05, "loss": 2.9008, "step": 48129 }, { "epoch": 2.36, "grad_norm": 0.6933061480522156, "learning_rate": 6.513890220710682e-05, "loss": 3.1265, "step": 48130 }, { "epoch": 2.36, "grad_norm": 0.7610794305801392, "learning_rate": 6.512932323917251e-05, "loss": 2.8423, "step": 48131 }, { "epoch": 2.36, "grad_norm": 0.7362642288208008, "learning_rate": 6.511974488984226e-05, "loss": 2.924, "step": 48132 }, { "epoch": 2.36, "grad_norm": 0.7452098727226257, "learning_rate": 6.511016715914147e-05, "loss": 2.9748, "step": 48133 }, { "epoch": 2.36, "grad_norm": 0.7530220150947571, "learning_rate": 6.51005900470953e-05, "loss": 3.0002, "step": 48134 }, { "epoch": 2.36, "grad_norm": 0.8055080771446228, "learning_rate": 6.50910135537289e-05, "loss": 3.0089, "step": 48135 }, { "epoch": 2.36, "grad_norm": 0.722628653049469, "learning_rate": 6.508143767906761e-05, "loss": 2.9678, "step": 48136 }, { "epoch": 2.36, "grad_norm": 0.7451878190040588, "learning_rate": 6.507186242313657e-05, "loss": 2.8849, "step": 48137 }, { "epoch": 2.36, "grad_norm": 0.7400360107421875, "learning_rate": 6.50622877859611e-05, "loss": 2.9511, "step": 48138 }, { "epoch": 2.36, "grad_norm": 0.7950756549835205, "learning_rate": 6.505271376756625e-05, "loss": 2.7903, "step": 48139 }, { "epoch": 2.36, "grad_norm": 0.7754967212677002, "learning_rate": 6.504314036797742e-05, "loss": 2.8494, "step": 48140 }, { "epoch": 2.36, "grad_norm": 0.719846785068512, "learning_rate": 6.503356758721974e-05, "loss": 3.1789, "step": 48141 }, { "epoch": 2.36, "grad_norm": 0.7460191249847412, "learning_rate": 6.502399542531835e-05, "loss": 2.7791, "step": 48142 }, { "epoch": 2.36, "grad_norm": 0.6921855211257935, "learning_rate": 6.501442388229862e-05, "loss": 2.8102, "step": 48143 }, { "epoch": 2.36, "grad_norm": 0.8616728782653809, "learning_rate": 6.500485295818557e-05, "loss": 2.8678, "step": 48144 }, { "epoch": 2.36, "grad_norm": 0.7126127481460571, "learning_rate": 6.499528265300453e-05, "loss": 3.0098, "step": 48145 }, { "epoch": 2.36, "grad_norm": 0.723812460899353, "learning_rate": 6.498571296678075e-05, "loss": 2.8521, "step": 48146 }, { "epoch": 2.36, "grad_norm": 0.7311473488807678, "learning_rate": 6.497614389953932e-05, "loss": 3.0363, "step": 48147 }, { "epoch": 2.36, "grad_norm": 0.7247294783592224, "learning_rate": 6.496657545130565e-05, "loss": 2.7482, "step": 48148 }, { "epoch": 2.36, "grad_norm": 0.7272924184799194, "learning_rate": 6.495700762210459e-05, "loss": 3.0696, "step": 48149 }, { "epoch": 2.36, "grad_norm": 0.7352733016014099, "learning_rate": 6.494744041196158e-05, "loss": 2.9417, "step": 48150 }, { "epoch": 2.36, "grad_norm": 0.7280297875404358, "learning_rate": 6.493787382090186e-05, "loss": 3.0833, "step": 48151 }, { "epoch": 2.36, "grad_norm": 0.6772411465644836, "learning_rate": 6.492830784895042e-05, "loss": 3.0068, "step": 48152 }, { "epoch": 2.36, "grad_norm": 0.7198385000228882, "learning_rate": 6.491874249613267e-05, "loss": 3.0322, "step": 48153 }, { "epoch": 2.36, "grad_norm": 0.7675883173942566, "learning_rate": 6.490917776247364e-05, "loss": 3.009, "step": 48154 }, { "epoch": 2.36, "grad_norm": 0.7238621115684509, "learning_rate": 6.489961364799868e-05, "loss": 2.8473, "step": 48155 }, { "epoch": 2.36, "grad_norm": 0.804047703742981, "learning_rate": 6.489005015273288e-05, "loss": 2.7081, "step": 48156 }, { "epoch": 2.36, "grad_norm": 0.688144862651825, "learning_rate": 6.488048727670136e-05, "loss": 3.1035, "step": 48157 }, { "epoch": 2.36, "grad_norm": 0.8125393390655518, "learning_rate": 6.487092501992946e-05, "loss": 2.8867, "step": 48158 }, { "epoch": 2.36, "grad_norm": 0.7027606964111328, "learning_rate": 6.486136338244222e-05, "loss": 3.0313, "step": 48159 }, { "epoch": 2.36, "grad_norm": 0.7894521355628967, "learning_rate": 6.485180236426499e-05, "loss": 2.9952, "step": 48160 }, { "epoch": 2.36, "grad_norm": 0.7138534188270569, "learning_rate": 6.484224196542276e-05, "loss": 2.9025, "step": 48161 }, { "epoch": 2.36, "grad_norm": 0.7329240441322327, "learning_rate": 6.48326821859409e-05, "loss": 2.9142, "step": 48162 }, { "epoch": 2.36, "grad_norm": 0.7437616586685181, "learning_rate": 6.482312302584448e-05, "loss": 2.9068, "step": 48163 }, { "epoch": 2.36, "grad_norm": 0.7317550182342529, "learning_rate": 6.481356448515863e-05, "loss": 2.7661, "step": 48164 }, { "epoch": 2.36, "grad_norm": 0.7197153568267822, "learning_rate": 6.480400656390866e-05, "loss": 2.8229, "step": 48165 }, { "epoch": 2.36, "grad_norm": 0.7194134593009949, "learning_rate": 6.479444926211959e-05, "loss": 2.7384, "step": 48166 }, { "epoch": 2.36, "grad_norm": 0.7750094532966614, "learning_rate": 6.478489257981667e-05, "loss": 2.7421, "step": 48167 }, { "epoch": 2.36, "grad_norm": 0.7340207695960999, "learning_rate": 6.477533651702517e-05, "loss": 2.7373, "step": 48168 }, { "epoch": 2.36, "grad_norm": 0.7263909578323364, "learning_rate": 6.476578107377008e-05, "loss": 2.9162, "step": 48169 }, { "epoch": 2.36, "grad_norm": 0.7772607207298279, "learning_rate": 6.475622625007674e-05, "loss": 3.024, "step": 48170 }, { "epoch": 2.36, "grad_norm": 0.7200969457626343, "learning_rate": 6.47466720459702e-05, "loss": 3.0136, "step": 48171 }, { "epoch": 2.36, "grad_norm": 0.7096529006958008, "learning_rate": 6.473711846147557e-05, "loss": 3.0276, "step": 48172 }, { "epoch": 2.36, "grad_norm": 0.7910388708114624, "learning_rate": 6.47275654966182e-05, "loss": 2.8153, "step": 48173 }, { "epoch": 2.36, "grad_norm": 0.753176212310791, "learning_rate": 6.471801315142304e-05, "loss": 2.9071, "step": 48174 }, { "epoch": 2.36, "grad_norm": 0.7686089277267456, "learning_rate": 6.470846142591545e-05, "loss": 2.9368, "step": 48175 }, { "epoch": 2.36, "grad_norm": 0.7298809885978699, "learning_rate": 6.46989103201204e-05, "loss": 2.8194, "step": 48176 }, { "epoch": 2.36, "grad_norm": 0.7550878524780273, "learning_rate": 6.46893598340632e-05, "loss": 3.235, "step": 48177 }, { "epoch": 2.36, "grad_norm": 0.7033806443214417, "learning_rate": 6.467980996776898e-05, "loss": 2.869, "step": 48178 }, { "epoch": 2.36, "grad_norm": 0.743836522102356, "learning_rate": 6.467026072126275e-05, "loss": 2.7897, "step": 48179 }, { "epoch": 2.36, "grad_norm": 0.7496225833892822, "learning_rate": 6.466071209456984e-05, "loss": 2.9425, "step": 48180 }, { "epoch": 2.36, "grad_norm": 0.7606794238090515, "learning_rate": 6.465116408771526e-05, "loss": 2.9058, "step": 48181 }, { "epoch": 2.36, "grad_norm": 0.7309315204620361, "learning_rate": 6.46416167007242e-05, "loss": 2.6442, "step": 48182 }, { "epoch": 2.36, "grad_norm": 0.7935462594032288, "learning_rate": 6.463206993362191e-05, "loss": 2.9268, "step": 48183 }, { "epoch": 2.36, "grad_norm": 0.7384037971496582, "learning_rate": 6.462252378643345e-05, "loss": 3.1264, "step": 48184 }, { "epoch": 2.36, "grad_norm": 0.7172885537147522, "learning_rate": 6.461297825918394e-05, "loss": 2.9865, "step": 48185 }, { "epoch": 2.36, "grad_norm": 0.7741020321846008, "learning_rate": 6.460343335189849e-05, "loss": 2.7657, "step": 48186 }, { "epoch": 2.36, "grad_norm": 0.742149293422699, "learning_rate": 6.45938890646023e-05, "loss": 2.8609, "step": 48187 }, { "epoch": 2.36, "grad_norm": 0.6724493503570557, "learning_rate": 6.458434539732058e-05, "loss": 2.9769, "step": 48188 }, { "epoch": 2.36, "grad_norm": 0.7558503746986389, "learning_rate": 6.457480235007829e-05, "loss": 2.8232, "step": 48189 }, { "epoch": 2.36, "grad_norm": 0.7640053033828735, "learning_rate": 6.456525992290075e-05, "loss": 2.8353, "step": 48190 }, { "epoch": 2.36, "grad_norm": 0.7217065095901489, "learning_rate": 6.455571811581293e-05, "loss": 2.814, "step": 48191 }, { "epoch": 2.36, "grad_norm": 0.7169857025146484, "learning_rate": 6.454617692884014e-05, "loss": 2.9869, "step": 48192 }, { "epoch": 2.36, "grad_norm": 0.7590023279190063, "learning_rate": 6.453663636200738e-05, "loss": 2.8972, "step": 48193 }, { "epoch": 2.36, "grad_norm": 0.6975011825561523, "learning_rate": 6.45270964153397e-05, "loss": 3.0994, "step": 48194 }, { "epoch": 2.36, "grad_norm": 0.7145510911941528, "learning_rate": 6.451755708886243e-05, "loss": 2.8702, "step": 48195 }, { "epoch": 2.36, "grad_norm": 0.7827848196029663, "learning_rate": 6.450801838260055e-05, "loss": 3.2263, "step": 48196 }, { "epoch": 2.36, "grad_norm": 0.7232081890106201, "learning_rate": 6.449848029657918e-05, "loss": 2.9435, "step": 48197 }, { "epoch": 2.36, "grad_norm": 0.7153977155685425, "learning_rate": 6.448894283082358e-05, "loss": 2.8311, "step": 48198 }, { "epoch": 2.36, "grad_norm": 0.7476628422737122, "learning_rate": 6.44794059853588e-05, "loss": 2.9139, "step": 48199 }, { "epoch": 2.36, "grad_norm": 0.7741516828536987, "learning_rate": 6.446986976020994e-05, "loss": 2.8474, "step": 48200 }, { "epoch": 2.36, "grad_norm": 0.7252960801124573, "learning_rate": 6.4460334155402e-05, "loss": 3.0896, "step": 48201 }, { "epoch": 2.36, "grad_norm": 0.711409866809845, "learning_rate": 6.445079917096024e-05, "loss": 2.9098, "step": 48202 }, { "epoch": 2.36, "grad_norm": 0.6946399211883545, "learning_rate": 6.44412648069098e-05, "loss": 2.8107, "step": 48203 }, { "epoch": 2.36, "grad_norm": 0.7085345983505249, "learning_rate": 6.443173106327568e-05, "loss": 3.0131, "step": 48204 }, { "epoch": 2.36, "grad_norm": 0.7308792471885681, "learning_rate": 6.44221979400831e-05, "loss": 3.0106, "step": 48205 }, { "epoch": 2.36, "grad_norm": 0.7140591144561768, "learning_rate": 6.441266543735712e-05, "loss": 3.0825, "step": 48206 }, { "epoch": 2.36, "grad_norm": 0.7082121968269348, "learning_rate": 6.440313355512277e-05, "loss": 2.9419, "step": 48207 }, { "epoch": 2.36, "grad_norm": 0.7402740120887756, "learning_rate": 6.43936022934053e-05, "loss": 3.0304, "step": 48208 }, { "epoch": 2.36, "grad_norm": 0.7371035814285278, "learning_rate": 6.438407165222964e-05, "loss": 2.9194, "step": 48209 }, { "epoch": 2.36, "grad_norm": 0.695787250995636, "learning_rate": 6.437454163162108e-05, "loss": 3.0194, "step": 48210 }, { "epoch": 2.36, "grad_norm": 0.7401152849197388, "learning_rate": 6.436501223160456e-05, "loss": 3.0295, "step": 48211 }, { "epoch": 2.36, "grad_norm": 0.7056068778038025, "learning_rate": 6.435548345220523e-05, "loss": 3.2774, "step": 48212 }, { "epoch": 2.36, "grad_norm": 0.7429165840148926, "learning_rate": 6.434595529344832e-05, "loss": 2.6779, "step": 48213 }, { "epoch": 2.36, "grad_norm": 0.7436327338218689, "learning_rate": 6.43364277553588e-05, "loss": 2.8903, "step": 48214 }, { "epoch": 2.36, "grad_norm": 0.7075293064117432, "learning_rate": 6.432690083796177e-05, "loss": 2.9532, "step": 48215 }, { "epoch": 2.36, "grad_norm": 0.728111982345581, "learning_rate": 6.431737454128224e-05, "loss": 2.8952, "step": 48216 }, { "epoch": 2.36, "grad_norm": 0.7516396045684814, "learning_rate": 6.43078488653454e-05, "loss": 3.0249, "step": 48217 }, { "epoch": 2.36, "grad_norm": 0.7346088290214539, "learning_rate": 6.42983238101764e-05, "loss": 2.7594, "step": 48218 }, { "epoch": 2.36, "grad_norm": 0.7349608540534973, "learning_rate": 6.428879937580015e-05, "loss": 2.7937, "step": 48219 }, { "epoch": 2.36, "grad_norm": 0.7432407140731812, "learning_rate": 6.427927556224196e-05, "loss": 2.8822, "step": 48220 }, { "epoch": 2.36, "grad_norm": 0.7252591848373413, "learning_rate": 6.426975236952675e-05, "loss": 2.7845, "step": 48221 }, { "epoch": 2.36, "grad_norm": 0.6931779384613037, "learning_rate": 6.42602297976796e-05, "loss": 2.9773, "step": 48222 }, { "epoch": 2.36, "grad_norm": 0.7488669157028198, "learning_rate": 6.425070784672571e-05, "loss": 2.9075, "step": 48223 }, { "epoch": 2.36, "grad_norm": 0.7043687105178833, "learning_rate": 6.424118651668998e-05, "loss": 3.07, "step": 48224 }, { "epoch": 2.36, "grad_norm": 0.726883053779602, "learning_rate": 6.423166580759771e-05, "loss": 2.6815, "step": 48225 }, { "epoch": 2.36, "grad_norm": 0.7561427354812622, "learning_rate": 6.422214571947376e-05, "loss": 2.7455, "step": 48226 }, { "epoch": 2.36, "grad_norm": 0.6870282292366028, "learning_rate": 6.421262625234337e-05, "loss": 2.6591, "step": 48227 }, { "epoch": 2.36, "grad_norm": 0.7265474200248718, "learning_rate": 6.420310740623154e-05, "loss": 2.7049, "step": 48228 }, { "epoch": 2.36, "grad_norm": 0.7331658601760864, "learning_rate": 6.419358918116329e-05, "loss": 2.8969, "step": 48229 }, { "epoch": 2.36, "grad_norm": 0.7479391098022461, "learning_rate": 6.418407157716381e-05, "loss": 3.0111, "step": 48230 }, { "epoch": 2.36, "grad_norm": 0.738805890083313, "learning_rate": 6.417455459425804e-05, "loss": 2.7992, "step": 48231 }, { "epoch": 2.36, "grad_norm": 0.7408674955368042, "learning_rate": 6.416503823247115e-05, "loss": 2.7176, "step": 48232 }, { "epoch": 2.36, "grad_norm": 0.7549987435340881, "learning_rate": 6.415552249182812e-05, "loss": 2.8767, "step": 48233 }, { "epoch": 2.36, "grad_norm": 0.7115577459335327, "learning_rate": 6.414600737235402e-05, "loss": 2.8056, "step": 48234 }, { "epoch": 2.36, "grad_norm": 0.7305412292480469, "learning_rate": 6.413649287407406e-05, "loss": 2.9721, "step": 48235 }, { "epoch": 2.36, "grad_norm": 0.7211993336677551, "learning_rate": 6.412697899701315e-05, "loss": 2.8724, "step": 48236 }, { "epoch": 2.36, "grad_norm": 0.7803027629852295, "learning_rate": 6.411746574119642e-05, "loss": 2.7537, "step": 48237 }, { "epoch": 2.36, "grad_norm": 0.7594745755195618, "learning_rate": 6.410795310664878e-05, "loss": 3.07, "step": 48238 }, { "epoch": 2.36, "grad_norm": 0.7324116230010986, "learning_rate": 6.409844109339542e-05, "loss": 2.8141, "step": 48239 }, { "epoch": 2.36, "grad_norm": 0.6955828070640564, "learning_rate": 6.408892970146143e-05, "loss": 2.7801, "step": 48240 }, { "epoch": 2.36, "grad_norm": 0.7122442722320557, "learning_rate": 6.407941893087172e-05, "loss": 2.8966, "step": 48241 }, { "epoch": 2.36, "grad_norm": 0.7142869234085083, "learning_rate": 6.40699087816515e-05, "loss": 2.817, "step": 48242 }, { "epoch": 2.36, "grad_norm": 0.7723212838172913, "learning_rate": 6.40603992538257e-05, "loss": 2.9792, "step": 48243 }, { "epoch": 2.36, "grad_norm": 0.7571322917938232, "learning_rate": 6.405089034741937e-05, "loss": 2.9604, "step": 48244 }, { "epoch": 2.36, "grad_norm": 0.730701744556427, "learning_rate": 6.404138206245765e-05, "loss": 2.9751, "step": 48245 }, { "epoch": 2.36, "grad_norm": 0.7998728156089783, "learning_rate": 6.403187439896541e-05, "loss": 3.1012, "step": 48246 }, { "epoch": 2.36, "grad_norm": 0.6990532279014587, "learning_rate": 6.402236735696793e-05, "loss": 2.839, "step": 48247 }, { "epoch": 2.36, "grad_norm": 0.7441544532775879, "learning_rate": 6.401286093648998e-05, "loss": 2.9951, "step": 48248 }, { "epoch": 2.36, "grad_norm": 0.6886500716209412, "learning_rate": 6.400335513755679e-05, "loss": 2.8154, "step": 48249 }, { "epoch": 2.36, "grad_norm": 0.7198939323425293, "learning_rate": 6.399384996019349e-05, "loss": 3.056, "step": 48250 }, { "epoch": 2.36, "grad_norm": 0.7159644961357117, "learning_rate": 6.398434540442479e-05, "loss": 2.8903, "step": 48251 }, { "epoch": 2.36, "grad_norm": 0.733924388885498, "learning_rate": 6.3974841470276e-05, "loss": 2.9423, "step": 48252 }, { "epoch": 2.36, "grad_norm": 0.7536724209785461, "learning_rate": 6.396533815777197e-05, "loss": 2.8155, "step": 48253 }, { "epoch": 2.36, "grad_norm": 0.7304123044013977, "learning_rate": 6.395583546693781e-05, "loss": 2.8958, "step": 48254 }, { "epoch": 2.36, "grad_norm": 0.7245292663574219, "learning_rate": 6.394633339779865e-05, "loss": 2.9736, "step": 48255 }, { "epoch": 2.36, "grad_norm": 0.7507905960083008, "learning_rate": 6.393683195037931e-05, "loss": 2.8121, "step": 48256 }, { "epoch": 2.36, "grad_norm": 0.7602225542068481, "learning_rate": 6.392733112470502e-05, "loss": 3.0037, "step": 48257 }, { "epoch": 2.37, "grad_norm": 0.7352206707000732, "learning_rate": 6.391783092080069e-05, "loss": 2.929, "step": 48258 }, { "epoch": 2.37, "grad_norm": 0.7542200684547424, "learning_rate": 6.390833133869132e-05, "loss": 2.9124, "step": 48259 }, { "epoch": 2.37, "grad_norm": 0.7620871663093567, "learning_rate": 6.389883237840201e-05, "loss": 2.9583, "step": 48260 }, { "epoch": 2.37, "grad_norm": 0.756596028804779, "learning_rate": 6.388933403995768e-05, "loss": 2.8351, "step": 48261 }, { "epoch": 2.37, "grad_norm": 0.7041810750961304, "learning_rate": 6.387983632338346e-05, "loss": 2.9144, "step": 48262 }, { "epoch": 2.37, "grad_norm": 0.7106823921203613, "learning_rate": 6.387033922870425e-05, "loss": 3.1492, "step": 48263 }, { "epoch": 2.37, "grad_norm": 0.7551729083061218, "learning_rate": 6.386084275594519e-05, "loss": 2.929, "step": 48264 }, { "epoch": 2.37, "grad_norm": 0.7914819717407227, "learning_rate": 6.385134690513122e-05, "loss": 2.8709, "step": 48265 }, { "epoch": 2.37, "grad_norm": 0.7370805144309998, "learning_rate": 6.384185167628726e-05, "loss": 2.7838, "step": 48266 }, { "epoch": 2.37, "grad_norm": 0.7235316634178162, "learning_rate": 6.383235706943852e-05, "loss": 3.0178, "step": 48267 }, { "epoch": 2.37, "grad_norm": 0.7227551341056824, "learning_rate": 6.382286308460982e-05, "loss": 3.1076, "step": 48268 }, { "epoch": 2.37, "grad_norm": 0.7112234830856323, "learning_rate": 6.381336972182624e-05, "loss": 2.9721, "step": 48269 }, { "epoch": 2.37, "grad_norm": 0.7669709920883179, "learning_rate": 6.380387698111288e-05, "loss": 2.8578, "step": 48270 }, { "epoch": 2.37, "grad_norm": 0.7495376467704773, "learning_rate": 6.379438486249457e-05, "loss": 2.8566, "step": 48271 }, { "epoch": 2.37, "grad_norm": 0.6867067813873291, "learning_rate": 6.378489336599644e-05, "loss": 3.2184, "step": 48272 }, { "epoch": 2.37, "grad_norm": 0.6908702254295349, "learning_rate": 6.377540249164348e-05, "loss": 2.8861, "step": 48273 }, { "epoch": 2.37, "grad_norm": 0.7597622871398926, "learning_rate": 6.376591223946054e-05, "loss": 2.673, "step": 48274 }, { "epoch": 2.37, "grad_norm": 0.7196870446205139, "learning_rate": 6.375642260947278e-05, "loss": 2.8766, "step": 48275 }, { "epoch": 2.37, "grad_norm": 0.7505701780319214, "learning_rate": 6.374693360170512e-05, "loss": 2.9577, "step": 48276 }, { "epoch": 2.37, "grad_norm": 0.7584325671195984, "learning_rate": 6.373744521618261e-05, "loss": 2.9858, "step": 48277 }, { "epoch": 2.37, "grad_norm": 0.7458747625350952, "learning_rate": 6.372795745293013e-05, "loss": 3.0763, "step": 48278 }, { "epoch": 2.37, "grad_norm": 0.79670649766922, "learning_rate": 6.371847031197283e-05, "loss": 2.8207, "step": 48279 }, { "epoch": 2.37, "grad_norm": 0.67808997631073, "learning_rate": 6.37089837933356e-05, "loss": 2.705, "step": 48280 }, { "epoch": 2.37, "grad_norm": 0.741486132144928, "learning_rate": 6.369949789704332e-05, "loss": 2.8786, "step": 48281 }, { "epoch": 2.37, "grad_norm": 0.7459058165550232, "learning_rate": 6.369001262312118e-05, "loss": 3.1757, "step": 48282 }, { "epoch": 2.37, "grad_norm": 0.6977839469909668, "learning_rate": 6.368052797159402e-05, "loss": 2.8612, "step": 48283 }, { "epoch": 2.37, "grad_norm": 0.7027153968811035, "learning_rate": 6.367104394248684e-05, "loss": 2.9531, "step": 48284 }, { "epoch": 2.37, "grad_norm": 0.6924808621406555, "learning_rate": 6.366156053582475e-05, "loss": 2.8736, "step": 48285 }, { "epoch": 2.37, "grad_norm": 0.7258365750312805, "learning_rate": 6.365207775163259e-05, "loss": 2.7555, "step": 48286 }, { "epoch": 2.37, "grad_norm": 0.7599617838859558, "learning_rate": 6.36425955899354e-05, "loss": 2.9788, "step": 48287 }, { "epoch": 2.37, "grad_norm": 0.6839301586151123, "learning_rate": 6.363311405075803e-05, "loss": 3.2282, "step": 48288 }, { "epoch": 2.37, "grad_norm": 0.7343278527259827, "learning_rate": 6.362363313412557e-05, "loss": 2.9432, "step": 48289 }, { "epoch": 2.37, "grad_norm": 0.739058256149292, "learning_rate": 6.361415284006301e-05, "loss": 2.7162, "step": 48290 }, { "epoch": 2.37, "grad_norm": 0.7455694675445557, "learning_rate": 6.360467316859522e-05, "loss": 3.0939, "step": 48291 }, { "epoch": 2.37, "grad_norm": 0.7573563456535339, "learning_rate": 6.359519411974733e-05, "loss": 2.9621, "step": 48292 }, { "epoch": 2.37, "grad_norm": 0.8001168370246887, "learning_rate": 6.358571569354408e-05, "loss": 2.9751, "step": 48293 }, { "epoch": 2.37, "grad_norm": 0.7187392115592957, "learning_rate": 6.357623789001064e-05, "loss": 2.984, "step": 48294 }, { "epoch": 2.37, "grad_norm": 0.6924291849136353, "learning_rate": 6.356676070917192e-05, "loss": 2.9977, "step": 48295 }, { "epoch": 2.37, "grad_norm": 0.7191535830497742, "learning_rate": 6.355728415105276e-05, "loss": 2.8086, "step": 48296 }, { "epoch": 2.37, "grad_norm": 0.7379903793334961, "learning_rate": 6.354780821567828e-05, "loss": 2.7732, "step": 48297 }, { "epoch": 2.37, "grad_norm": 0.7109218239784241, "learning_rate": 6.353833290307329e-05, "loss": 2.6506, "step": 48298 }, { "epoch": 2.37, "grad_norm": 0.7022459506988525, "learning_rate": 6.35288582132628e-05, "loss": 2.8524, "step": 48299 }, { "epoch": 2.37, "grad_norm": 0.734695553779602, "learning_rate": 6.351938414627191e-05, "loss": 2.8649, "step": 48300 }, { "epoch": 2.37, "grad_norm": 0.7235419750213623, "learning_rate": 6.350991070212544e-05, "loss": 2.6928, "step": 48301 }, { "epoch": 2.37, "grad_norm": 0.7387106418609619, "learning_rate": 6.350043788084835e-05, "loss": 2.7351, "step": 48302 }, { "epoch": 2.37, "grad_norm": 0.709895133972168, "learning_rate": 6.349096568246549e-05, "loss": 2.8434, "step": 48303 }, { "epoch": 2.37, "grad_norm": 0.757611870765686, "learning_rate": 6.348149410700192e-05, "loss": 2.9994, "step": 48304 }, { "epoch": 2.37, "grad_norm": 0.6948244571685791, "learning_rate": 6.347202315448265e-05, "loss": 2.7651, "step": 48305 }, { "epoch": 2.37, "grad_norm": 0.7108848094940186, "learning_rate": 6.346255282493248e-05, "loss": 2.6992, "step": 48306 }, { "epoch": 2.37, "grad_norm": 0.7500197291374207, "learning_rate": 6.34530831183765e-05, "loss": 2.9149, "step": 48307 }, { "epoch": 2.37, "grad_norm": 0.7581573724746704, "learning_rate": 6.34436140348396e-05, "loss": 2.8101, "step": 48308 }, { "epoch": 2.37, "grad_norm": 0.7715035676956177, "learning_rate": 6.343414557434658e-05, "loss": 2.795, "step": 48309 }, { "epoch": 2.37, "grad_norm": 0.7194180488586426, "learning_rate": 6.342467773692258e-05, "loss": 2.9186, "step": 48310 }, { "epoch": 2.37, "grad_norm": 0.7068958282470703, "learning_rate": 6.341521052259236e-05, "loss": 2.9039, "step": 48311 }, { "epoch": 2.37, "grad_norm": 0.7456514239311218, "learning_rate": 6.340574393138104e-05, "loss": 2.9809, "step": 48312 }, { "epoch": 2.37, "grad_norm": 0.7299491763114929, "learning_rate": 6.339627796331338e-05, "loss": 3.1068, "step": 48313 }, { "epoch": 2.37, "grad_norm": 0.7144024968147278, "learning_rate": 6.338681261841447e-05, "loss": 2.7794, "step": 48314 }, { "epoch": 2.37, "grad_norm": 0.7453769445419312, "learning_rate": 6.337734789670907e-05, "loss": 2.8954, "step": 48315 }, { "epoch": 2.37, "grad_norm": 0.7227847576141357, "learning_rate": 6.336788379822226e-05, "loss": 2.9263, "step": 48316 }, { "epoch": 2.37, "grad_norm": 0.7394989132881165, "learning_rate": 6.335842032297892e-05, "loss": 2.9357, "step": 48317 }, { "epoch": 2.37, "grad_norm": 0.7643879652023315, "learning_rate": 6.334895747100388e-05, "loss": 2.8486, "step": 48318 }, { "epoch": 2.37, "grad_norm": 0.710767388343811, "learning_rate": 6.333949524232222e-05, "loss": 2.8892, "step": 48319 }, { "epoch": 2.37, "grad_norm": 0.743543803691864, "learning_rate": 6.33300336369587e-05, "loss": 3.076, "step": 48320 }, { "epoch": 2.37, "grad_norm": 0.7929369211196899, "learning_rate": 6.332057265493834e-05, "loss": 2.9037, "step": 48321 }, { "epoch": 2.37, "grad_norm": 0.7415796518325806, "learning_rate": 6.33111122962861e-05, "loss": 2.8008, "step": 48322 }, { "epoch": 2.37, "grad_norm": 0.7029473185539246, "learning_rate": 6.330165256102687e-05, "loss": 2.8482, "step": 48323 }, { "epoch": 2.37, "grad_norm": 0.7240558862686157, "learning_rate": 6.32921934491855e-05, "loss": 2.8949, "step": 48324 }, { "epoch": 2.37, "grad_norm": 0.9161472320556641, "learning_rate": 6.328273496078686e-05, "loss": 2.7527, "step": 48325 }, { "epoch": 2.37, "grad_norm": 0.7548016309738159, "learning_rate": 6.327327709585597e-05, "loss": 2.7239, "step": 48326 }, { "epoch": 2.37, "grad_norm": 0.7531619071960449, "learning_rate": 6.326381985441776e-05, "loss": 2.9115, "step": 48327 }, { "epoch": 2.37, "grad_norm": 0.7124982476234436, "learning_rate": 6.3254363236497e-05, "loss": 2.8327, "step": 48328 }, { "epoch": 2.37, "grad_norm": 0.7740418910980225, "learning_rate": 6.324490724211879e-05, "loss": 2.811, "step": 48329 }, { "epoch": 2.37, "grad_norm": 0.7585882544517517, "learning_rate": 6.32354518713079e-05, "loss": 2.7411, "step": 48330 }, { "epoch": 2.37, "grad_norm": 0.6965351104736328, "learning_rate": 6.322599712408922e-05, "loss": 2.8907, "step": 48331 }, { "epoch": 2.37, "grad_norm": 0.7372415065765381, "learning_rate": 6.321654300048774e-05, "loss": 3.0206, "step": 48332 }, { "epoch": 2.37, "grad_norm": 0.7417186498641968, "learning_rate": 6.320708950052827e-05, "loss": 2.8534, "step": 48333 }, { "epoch": 2.37, "grad_norm": 0.701483964920044, "learning_rate": 6.31976366242358e-05, "loss": 2.8217, "step": 48334 }, { "epoch": 2.37, "grad_norm": 0.7164797186851501, "learning_rate": 6.318818437163514e-05, "loss": 2.8596, "step": 48335 }, { "epoch": 2.37, "grad_norm": 0.727675199508667, "learning_rate": 6.31787327427512e-05, "loss": 2.7906, "step": 48336 }, { "epoch": 2.37, "grad_norm": 0.6889267563819885, "learning_rate": 6.3169281737609e-05, "loss": 3.2159, "step": 48337 }, { "epoch": 2.37, "grad_norm": 0.7056227326393127, "learning_rate": 6.315983135623331e-05, "loss": 2.9742, "step": 48338 }, { "epoch": 2.37, "grad_norm": 0.7440009713172913, "learning_rate": 6.315038159864905e-05, "loss": 2.872, "step": 48339 }, { "epoch": 2.37, "grad_norm": 0.7045519948005676, "learning_rate": 6.314093246488101e-05, "loss": 3.1583, "step": 48340 }, { "epoch": 2.37, "grad_norm": 0.7586000561714172, "learning_rate": 6.313148395495418e-05, "loss": 2.7775, "step": 48341 }, { "epoch": 2.37, "grad_norm": 0.7050657272338867, "learning_rate": 6.312203606889356e-05, "loss": 2.7034, "step": 48342 }, { "epoch": 2.37, "grad_norm": 0.734698474407196, "learning_rate": 6.311258880672376e-05, "loss": 2.8888, "step": 48343 }, { "epoch": 2.37, "grad_norm": 0.7230560183525085, "learning_rate": 6.310314216846992e-05, "loss": 2.8722, "step": 48344 }, { "epoch": 2.37, "grad_norm": 0.7460218667984009, "learning_rate": 6.309369615415681e-05, "loss": 2.943, "step": 48345 }, { "epoch": 2.37, "grad_norm": 0.7174545526504517, "learning_rate": 6.308425076380925e-05, "loss": 2.6023, "step": 48346 }, { "epoch": 2.37, "grad_norm": 0.7263779044151306, "learning_rate": 6.307480599745225e-05, "loss": 2.9191, "step": 48347 }, { "epoch": 2.37, "grad_norm": 0.7491353154182434, "learning_rate": 6.306536185511053e-05, "loss": 2.9063, "step": 48348 }, { "epoch": 2.37, "grad_norm": 0.691290557384491, "learning_rate": 6.30559183368091e-05, "loss": 3.0822, "step": 48349 }, { "epoch": 2.37, "grad_norm": 0.776136577129364, "learning_rate": 6.304647544257273e-05, "loss": 2.7441, "step": 48350 }, { "epoch": 2.37, "grad_norm": 0.7215666174888611, "learning_rate": 6.30370331724264e-05, "loss": 2.9793, "step": 48351 }, { "epoch": 2.37, "grad_norm": 0.7477503418922424, "learning_rate": 6.302759152639496e-05, "loss": 3.0098, "step": 48352 }, { "epoch": 2.37, "grad_norm": 0.7336341142654419, "learning_rate": 6.301815050450313e-05, "loss": 2.9931, "step": 48353 }, { "epoch": 2.37, "grad_norm": 0.8228724598884583, "learning_rate": 6.300871010677596e-05, "loss": 2.9006, "step": 48354 }, { "epoch": 2.37, "grad_norm": 0.7225062847137451, "learning_rate": 6.299927033323817e-05, "loss": 3.0748, "step": 48355 }, { "epoch": 2.37, "grad_norm": 0.7133581638336182, "learning_rate": 6.298983118391467e-05, "loss": 2.8641, "step": 48356 }, { "epoch": 2.37, "grad_norm": 0.7188670635223389, "learning_rate": 6.298039265883044e-05, "loss": 2.9304, "step": 48357 }, { "epoch": 2.37, "grad_norm": 0.6973915100097656, "learning_rate": 6.297095475801014e-05, "loss": 3.0409, "step": 48358 }, { "epoch": 2.37, "grad_norm": 0.7095557451248169, "learning_rate": 6.296151748147883e-05, "loss": 3.026, "step": 48359 }, { "epoch": 2.37, "grad_norm": 0.75076824426651, "learning_rate": 6.295208082926126e-05, "loss": 3.1059, "step": 48360 }, { "epoch": 2.37, "grad_norm": 0.7055838704109192, "learning_rate": 6.294264480138222e-05, "loss": 2.9436, "step": 48361 }, { "epoch": 2.37, "grad_norm": 0.7464839220046997, "learning_rate": 6.29332093978667e-05, "loss": 3.007, "step": 48362 }, { "epoch": 2.37, "grad_norm": 0.715093731880188, "learning_rate": 6.29237746187394e-05, "loss": 2.8428, "step": 48363 }, { "epoch": 2.37, "grad_norm": 0.7014065384864807, "learning_rate": 6.291434046402533e-05, "loss": 3.0259, "step": 48364 }, { "epoch": 2.37, "grad_norm": 0.7219024896621704, "learning_rate": 6.290490693374915e-05, "loss": 3.1076, "step": 48365 }, { "epoch": 2.37, "grad_norm": 0.7014488577842712, "learning_rate": 6.289547402793594e-05, "loss": 2.9713, "step": 48366 }, { "epoch": 2.37, "grad_norm": 0.703105092048645, "learning_rate": 6.288604174661041e-05, "loss": 2.9675, "step": 48367 }, { "epoch": 2.37, "grad_norm": 0.7211623191833496, "learning_rate": 6.287661008979732e-05, "loss": 2.8565, "step": 48368 }, { "epoch": 2.37, "grad_norm": 0.7292293310165405, "learning_rate": 6.286717905752169e-05, "loss": 3.0596, "step": 48369 }, { "epoch": 2.37, "grad_norm": 0.7422485947608948, "learning_rate": 6.285774864980819e-05, "loss": 2.8302, "step": 48370 }, { "epoch": 2.37, "grad_norm": 0.7382919192314148, "learning_rate": 6.284831886668175e-05, "loss": 2.8995, "step": 48371 }, { "epoch": 2.37, "grad_norm": 0.7146931886672974, "learning_rate": 6.283888970816726e-05, "loss": 3.0275, "step": 48372 }, { "epoch": 2.37, "grad_norm": 0.6943666934967041, "learning_rate": 6.282946117428942e-05, "loss": 2.9928, "step": 48373 }, { "epoch": 2.37, "grad_norm": 0.730255126953125, "learning_rate": 6.282003326507323e-05, "loss": 2.9221, "step": 48374 }, { "epoch": 2.37, "grad_norm": 0.6991233825683594, "learning_rate": 6.281060598054342e-05, "loss": 2.6633, "step": 48375 }, { "epoch": 2.37, "grad_norm": 0.7602810859680176, "learning_rate": 6.280117932072474e-05, "loss": 2.8916, "step": 48376 }, { "epoch": 2.37, "grad_norm": 0.6797572374343872, "learning_rate": 6.279175328564217e-05, "loss": 2.6735, "step": 48377 }, { "epoch": 2.37, "grad_norm": 0.8254128694534302, "learning_rate": 6.278232787532041e-05, "loss": 2.8041, "step": 48378 }, { "epoch": 2.37, "grad_norm": 0.701127827167511, "learning_rate": 6.277290308978445e-05, "loss": 2.8629, "step": 48379 }, { "epoch": 2.37, "grad_norm": 0.7156503796577454, "learning_rate": 6.276347892905887e-05, "loss": 2.8143, "step": 48380 }, { "epoch": 2.37, "grad_norm": 0.7461564540863037, "learning_rate": 6.275405539316876e-05, "loss": 3.0213, "step": 48381 }, { "epoch": 2.37, "grad_norm": 0.7387633323669434, "learning_rate": 6.274463248213877e-05, "loss": 3.003, "step": 48382 }, { "epoch": 2.37, "grad_norm": 0.7231044173240662, "learning_rate": 6.27352101959937e-05, "loss": 2.9757, "step": 48383 }, { "epoch": 2.37, "grad_norm": 0.7560230493545532, "learning_rate": 6.27257885347585e-05, "loss": 2.792, "step": 48384 }, { "epoch": 2.37, "grad_norm": 0.7416335344314575, "learning_rate": 6.271636749845783e-05, "loss": 2.8567, "step": 48385 }, { "epoch": 2.37, "grad_norm": 0.7156395316123962, "learning_rate": 6.270694708711656e-05, "loss": 2.8991, "step": 48386 }, { "epoch": 2.37, "grad_norm": 0.7483652830123901, "learning_rate": 6.269752730075962e-05, "loss": 2.8864, "step": 48387 }, { "epoch": 2.37, "grad_norm": 0.7325050234794617, "learning_rate": 6.268810813941173e-05, "loss": 2.7133, "step": 48388 }, { "epoch": 2.37, "grad_norm": 0.7214424014091492, "learning_rate": 6.26786896030977e-05, "loss": 2.7828, "step": 48389 }, { "epoch": 2.37, "grad_norm": 0.7492168545722961, "learning_rate": 6.266927169184223e-05, "loss": 2.9989, "step": 48390 }, { "epoch": 2.37, "grad_norm": 0.7547978758811951, "learning_rate": 6.265985440567029e-05, "loss": 2.6112, "step": 48391 }, { "epoch": 2.37, "grad_norm": 0.6953510642051697, "learning_rate": 6.265043774460659e-05, "loss": 3.0589, "step": 48392 }, { "epoch": 2.37, "grad_norm": 0.7603529095649719, "learning_rate": 6.264102170867588e-05, "loss": 2.8438, "step": 48393 }, { "epoch": 2.37, "grad_norm": 0.6982311606407166, "learning_rate": 6.263160629790318e-05, "loss": 2.8759, "step": 48394 }, { "epoch": 2.37, "grad_norm": 0.6933879852294922, "learning_rate": 6.262219151231305e-05, "loss": 3.0471, "step": 48395 }, { "epoch": 2.37, "grad_norm": 0.7062197923660278, "learning_rate": 6.261277735193047e-05, "loss": 2.9002, "step": 48396 }, { "epoch": 2.37, "grad_norm": 0.7573047280311584, "learning_rate": 6.260336381678011e-05, "loss": 2.892, "step": 48397 }, { "epoch": 2.37, "grad_norm": 0.6713040471076965, "learning_rate": 6.259395090688678e-05, "loss": 2.9715, "step": 48398 }, { "epoch": 2.37, "grad_norm": 0.7490809559822083, "learning_rate": 6.258453862227534e-05, "loss": 2.9012, "step": 48399 }, { "epoch": 2.37, "grad_norm": 0.7417430877685547, "learning_rate": 6.257512696297047e-05, "loss": 2.9564, "step": 48400 }, { "epoch": 2.37, "grad_norm": 0.7003105878829956, "learning_rate": 6.256571592899708e-05, "loss": 2.9267, "step": 48401 }, { "epoch": 2.37, "grad_norm": 0.7170599102973938, "learning_rate": 6.255630552037983e-05, "loss": 2.8603, "step": 48402 }, { "epoch": 2.37, "grad_norm": 0.7321606874465942, "learning_rate": 6.254689573714363e-05, "loss": 3.1191, "step": 48403 }, { "epoch": 2.37, "grad_norm": 0.7242743372917175, "learning_rate": 6.253748657931326e-05, "loss": 2.9785, "step": 48404 }, { "epoch": 2.37, "grad_norm": 0.7323653101921082, "learning_rate": 6.252807804691333e-05, "loss": 2.8328, "step": 48405 }, { "epoch": 2.37, "grad_norm": 0.7143465280532837, "learning_rate": 6.251867013996884e-05, "loss": 2.9296, "step": 48406 }, { "epoch": 2.37, "grad_norm": 0.7798712849617004, "learning_rate": 6.250926285850437e-05, "loss": 3.0471, "step": 48407 }, { "epoch": 2.37, "grad_norm": 0.7423224449157715, "learning_rate": 6.249985620254479e-05, "loss": 2.9293, "step": 48408 }, { "epoch": 2.37, "grad_norm": 0.7109093070030212, "learning_rate": 6.249045017211499e-05, "loss": 2.9326, "step": 48409 }, { "epoch": 2.37, "grad_norm": 0.7001209259033203, "learning_rate": 6.248104476723963e-05, "loss": 2.6705, "step": 48410 }, { "epoch": 2.37, "grad_norm": 0.7019423842430115, "learning_rate": 6.247163998794349e-05, "loss": 2.7414, "step": 48411 }, { "epoch": 2.37, "grad_norm": 0.7206318378448486, "learning_rate": 6.246223583425122e-05, "loss": 2.8257, "step": 48412 }, { "epoch": 2.37, "grad_norm": 0.7256973385810852, "learning_rate": 6.245283230618773e-05, "loss": 2.7982, "step": 48413 }, { "epoch": 2.37, "grad_norm": 0.7365603446960449, "learning_rate": 6.244342940377787e-05, "loss": 2.8841, "step": 48414 }, { "epoch": 2.37, "grad_norm": 0.7689672708511353, "learning_rate": 6.243402712704618e-05, "loss": 2.956, "step": 48415 }, { "epoch": 2.37, "grad_norm": 0.7122086882591248, "learning_rate": 6.242462547601762e-05, "loss": 2.8686, "step": 48416 }, { "epoch": 2.37, "grad_norm": 0.7387104034423828, "learning_rate": 6.241522445071683e-05, "loss": 3.0019, "step": 48417 }, { "epoch": 2.37, "grad_norm": 0.7708308100700378, "learning_rate": 6.240582405116865e-05, "loss": 2.8456, "step": 48418 }, { "epoch": 2.37, "grad_norm": 0.7489545345306396, "learning_rate": 6.239642427739783e-05, "loss": 2.8216, "step": 48419 }, { "epoch": 2.37, "grad_norm": 0.7343196272850037, "learning_rate": 6.238702512942904e-05, "loss": 2.7546, "step": 48420 }, { "epoch": 2.37, "grad_norm": 0.7246786952018738, "learning_rate": 6.237762660728712e-05, "loss": 3.0248, "step": 48421 }, { "epoch": 2.37, "grad_norm": 0.7109408378601074, "learning_rate": 6.236822871099678e-05, "loss": 2.9897, "step": 48422 }, { "epoch": 2.37, "grad_norm": 0.7401118278503418, "learning_rate": 6.235883144058275e-05, "loss": 2.7363, "step": 48423 }, { "epoch": 2.37, "grad_norm": 0.6865191459655762, "learning_rate": 6.234943479606993e-05, "loss": 2.7305, "step": 48424 }, { "epoch": 2.37, "grad_norm": 0.7721907496452332, "learning_rate": 6.234003877748295e-05, "loss": 2.9704, "step": 48425 }, { "epoch": 2.37, "grad_norm": 0.728862464427948, "learning_rate": 6.233064338484656e-05, "loss": 2.7578, "step": 48426 }, { "epoch": 2.37, "grad_norm": 0.7266650199890137, "learning_rate": 6.232124861818543e-05, "loss": 2.904, "step": 48427 }, { "epoch": 2.37, "grad_norm": 0.698026716709137, "learning_rate": 6.231185447752444e-05, "loss": 3.1348, "step": 48428 }, { "epoch": 2.37, "grad_norm": 0.7453986406326294, "learning_rate": 6.23024609628883e-05, "loss": 2.982, "step": 48429 }, { "epoch": 2.37, "grad_norm": 0.7807963490486145, "learning_rate": 6.229306807430168e-05, "loss": 3.0589, "step": 48430 }, { "epoch": 2.37, "grad_norm": 0.7143594622612, "learning_rate": 6.228367581178945e-05, "loss": 2.7484, "step": 48431 }, { "epoch": 2.37, "grad_norm": 0.7836489081382751, "learning_rate": 6.227428417537629e-05, "loss": 2.9194, "step": 48432 }, { "epoch": 2.37, "grad_norm": 0.702145516872406, "learning_rate": 6.226489316508678e-05, "loss": 2.9083, "step": 48433 }, { "epoch": 2.37, "grad_norm": 0.8490208387374878, "learning_rate": 6.225550278094591e-05, "loss": 2.8837, "step": 48434 }, { "epoch": 2.37, "grad_norm": 0.6956592798233032, "learning_rate": 6.224611302297821e-05, "loss": 3.056, "step": 48435 }, { "epoch": 2.37, "grad_norm": 0.7090912461280823, "learning_rate": 6.223672389120855e-05, "loss": 2.9451, "step": 48436 }, { "epoch": 2.37, "grad_norm": 0.7443285584449768, "learning_rate": 6.222733538566153e-05, "loss": 2.851, "step": 48437 }, { "epoch": 2.37, "grad_norm": 0.7666028738021851, "learning_rate": 6.221794750636195e-05, "loss": 2.8243, "step": 48438 }, { "epoch": 2.37, "grad_norm": 0.7153770923614502, "learning_rate": 6.220856025333462e-05, "loss": 2.7963, "step": 48439 }, { "epoch": 2.37, "grad_norm": 0.7110601663589478, "learning_rate": 6.219917362660419e-05, "loss": 2.9492, "step": 48440 }, { "epoch": 2.37, "grad_norm": 0.7466623187065125, "learning_rate": 6.218978762619533e-05, "loss": 2.8231, "step": 48441 }, { "epoch": 2.37, "grad_norm": 0.746893584728241, "learning_rate": 6.218040225213277e-05, "loss": 2.8501, "step": 48442 }, { "epoch": 2.37, "grad_norm": 0.7358802556991577, "learning_rate": 6.217101750444124e-05, "loss": 2.9031, "step": 48443 }, { "epoch": 2.37, "grad_norm": 0.6966966986656189, "learning_rate": 6.216163338314555e-05, "loss": 3.0072, "step": 48444 }, { "epoch": 2.37, "grad_norm": 0.7609187960624695, "learning_rate": 6.215224988827027e-05, "loss": 2.7928, "step": 48445 }, { "epoch": 2.37, "grad_norm": 0.6976915001869202, "learning_rate": 6.214286701984028e-05, "loss": 3.0634, "step": 48446 }, { "epoch": 2.37, "grad_norm": 0.7734736204147339, "learning_rate": 6.213348477788017e-05, "loss": 3.026, "step": 48447 }, { "epoch": 2.37, "grad_norm": 0.7737905383110046, "learning_rate": 6.212410316241465e-05, "loss": 2.9261, "step": 48448 }, { "epoch": 2.37, "grad_norm": 0.7344973683357239, "learning_rate": 6.21147221734685e-05, "loss": 2.6841, "step": 48449 }, { "epoch": 2.37, "grad_norm": 0.687893807888031, "learning_rate": 6.210534181106634e-05, "loss": 2.7757, "step": 48450 }, { "epoch": 2.37, "grad_norm": 0.7061976194381714, "learning_rate": 6.209596207523299e-05, "loss": 3.0322, "step": 48451 }, { "epoch": 2.37, "grad_norm": 0.7552989721298218, "learning_rate": 6.2086582965993e-05, "loss": 2.8587, "step": 48452 }, { "epoch": 2.37, "grad_norm": 0.7478323578834534, "learning_rate": 6.207720448337127e-05, "loss": 2.93, "step": 48453 }, { "epoch": 2.37, "grad_norm": 0.7599266767501831, "learning_rate": 6.206782662739236e-05, "loss": 2.9545, "step": 48454 }, { "epoch": 2.37, "grad_norm": 0.752623975276947, "learning_rate": 6.205844939808096e-05, "loss": 2.79, "step": 48455 }, { "epoch": 2.37, "grad_norm": 0.7089788913726807, "learning_rate": 6.204907279546188e-05, "loss": 2.9757, "step": 48456 }, { "epoch": 2.37, "grad_norm": 0.7725157737731934, "learning_rate": 6.203969681955967e-05, "loss": 3.0366, "step": 48457 }, { "epoch": 2.37, "grad_norm": 0.6831510663032532, "learning_rate": 6.203032147039909e-05, "loss": 3.068, "step": 48458 }, { "epoch": 2.37, "grad_norm": 0.7839120626449585, "learning_rate": 6.202094674800492e-05, "loss": 2.8344, "step": 48459 }, { "epoch": 2.37, "grad_norm": 0.7446629405021667, "learning_rate": 6.20115726524017e-05, "loss": 2.8361, "step": 48460 }, { "epoch": 2.37, "grad_norm": 0.7064789533615112, "learning_rate": 6.200219918361431e-05, "loss": 2.8209, "step": 48461 }, { "epoch": 2.38, "grad_norm": 0.7748939394950867, "learning_rate": 6.199282634166726e-05, "loss": 2.9435, "step": 48462 }, { "epoch": 2.38, "grad_norm": 0.7227396368980408, "learning_rate": 6.198345412658529e-05, "loss": 2.6958, "step": 48463 }, { "epoch": 2.38, "grad_norm": 0.705480694770813, "learning_rate": 6.197408253839314e-05, "loss": 2.9662, "step": 48464 }, { "epoch": 2.38, "grad_norm": 0.704855740070343, "learning_rate": 6.196471157711535e-05, "loss": 3.0984, "step": 48465 }, { "epoch": 2.38, "grad_norm": 0.8089683651924133, "learning_rate": 6.19553412427768e-05, "loss": 2.8041, "step": 48466 }, { "epoch": 2.38, "grad_norm": 0.7625295519828796, "learning_rate": 6.194597153540199e-05, "loss": 2.6505, "step": 48467 }, { "epoch": 2.38, "grad_norm": 0.7611942887306213, "learning_rate": 6.193660245501575e-05, "loss": 2.988, "step": 48468 }, { "epoch": 2.38, "grad_norm": 0.7407697439193726, "learning_rate": 6.192723400164268e-05, "loss": 2.9857, "step": 48469 }, { "epoch": 2.38, "grad_norm": 0.7640385627746582, "learning_rate": 6.191786617530739e-05, "loss": 2.7547, "step": 48470 }, { "epoch": 2.38, "grad_norm": 0.7157742977142334, "learning_rate": 6.190849897603472e-05, "loss": 2.9381, "step": 48471 }, { "epoch": 2.38, "grad_norm": 0.7664361596107483, "learning_rate": 6.189913240384911e-05, "loss": 2.8453, "step": 48472 }, { "epoch": 2.38, "grad_norm": 0.7323439121246338, "learning_rate": 6.188976645877548e-05, "loss": 2.9364, "step": 48473 }, { "epoch": 2.38, "grad_norm": 0.7110058069229126, "learning_rate": 6.188040114083829e-05, "loss": 2.8488, "step": 48474 }, { "epoch": 2.38, "grad_norm": 0.7112416625022888, "learning_rate": 6.187103645006228e-05, "loss": 2.9664, "step": 48475 }, { "epoch": 2.38, "grad_norm": 0.7495659589767456, "learning_rate": 6.186167238647232e-05, "loss": 2.6364, "step": 48476 }, { "epoch": 2.38, "grad_norm": 0.7033055424690247, "learning_rate": 6.18523089500927e-05, "loss": 2.9183, "step": 48477 }, { "epoch": 2.38, "grad_norm": 0.6921057105064392, "learning_rate": 6.184294614094835e-05, "loss": 2.8459, "step": 48478 }, { "epoch": 2.38, "grad_norm": 0.7195352911949158, "learning_rate": 6.183358395906376e-05, "loss": 2.8092, "step": 48479 }, { "epoch": 2.38, "grad_norm": 0.6981593370437622, "learning_rate": 6.182422240446372e-05, "loss": 2.9155, "step": 48480 }, { "epoch": 2.38, "grad_norm": 0.715376079082489, "learning_rate": 6.181486147717287e-05, "loss": 2.7584, "step": 48481 }, { "epoch": 2.38, "grad_norm": 0.7249025106430054, "learning_rate": 6.180550117721577e-05, "loss": 2.8825, "step": 48482 }, { "epoch": 2.38, "grad_norm": 0.7154632806777954, "learning_rate": 6.179614150461722e-05, "loss": 2.695, "step": 48483 }, { "epoch": 2.38, "grad_norm": 0.7087334394454956, "learning_rate": 6.178678245940179e-05, "loss": 2.7371, "step": 48484 }, { "epoch": 2.38, "grad_norm": 0.7212601900100708, "learning_rate": 6.177742404159404e-05, "loss": 2.6432, "step": 48485 }, { "epoch": 2.38, "grad_norm": 0.7712526321411133, "learning_rate": 6.176806625121882e-05, "loss": 2.7397, "step": 48486 }, { "epoch": 2.38, "grad_norm": 0.7613638639450073, "learning_rate": 6.175870908830056e-05, "loss": 2.9963, "step": 48487 }, { "epoch": 2.38, "grad_norm": 0.7476872205734253, "learning_rate": 6.174935255286408e-05, "loss": 2.7339, "step": 48488 }, { "epoch": 2.38, "grad_norm": 0.7133546471595764, "learning_rate": 6.17399966449339e-05, "loss": 2.8263, "step": 48489 }, { "epoch": 2.38, "grad_norm": 0.736723005771637, "learning_rate": 6.173064136453477e-05, "loss": 2.7688, "step": 48490 }, { "epoch": 2.38, "grad_norm": 0.7491676807403564, "learning_rate": 6.172128671169128e-05, "loss": 3.0156, "step": 48491 }, { "epoch": 2.38, "grad_norm": 0.7248450517654419, "learning_rate": 6.171193268642799e-05, "loss": 2.8164, "step": 48492 }, { "epoch": 2.38, "grad_norm": 0.7055536508560181, "learning_rate": 6.170257928876969e-05, "loss": 2.9714, "step": 48493 }, { "epoch": 2.38, "grad_norm": 0.7106114029884338, "learning_rate": 6.169322651874085e-05, "loss": 3.0877, "step": 48494 }, { "epoch": 2.38, "grad_norm": 0.7218091487884521, "learning_rate": 6.168387437636619e-05, "loss": 2.9154, "step": 48495 }, { "epoch": 2.38, "grad_norm": 0.7130762338638306, "learning_rate": 6.167452286167042e-05, "loss": 2.9306, "step": 48496 }, { "epoch": 2.38, "grad_norm": 0.7284539937973022, "learning_rate": 6.1665171974678e-05, "loss": 2.9702, "step": 48497 }, { "epoch": 2.38, "grad_norm": 0.736548125743866, "learning_rate": 6.165582171541375e-05, "loss": 2.7348, "step": 48498 }, { "epoch": 2.38, "grad_norm": 0.7026635408401489, "learning_rate": 6.164647208390217e-05, "loss": 3.0233, "step": 48499 }, { "epoch": 2.38, "grad_norm": 0.7796565890312195, "learning_rate": 6.163712308016788e-05, "loss": 2.9025, "step": 48500 }, { "epoch": 2.38, "grad_norm": 0.8084689378738403, "learning_rate": 6.162777470423555e-05, "loss": 2.9667, "step": 48501 }, { "epoch": 2.38, "grad_norm": 0.7033407092094421, "learning_rate": 6.161842695612974e-05, "loss": 2.8283, "step": 48502 }, { "epoch": 2.38, "grad_norm": 0.7982221841812134, "learning_rate": 6.160907983587521e-05, "loss": 3.0055, "step": 48503 }, { "epoch": 2.38, "grad_norm": 0.7438850402832031, "learning_rate": 6.159973334349637e-05, "loss": 2.9091, "step": 48504 }, { "epoch": 2.38, "grad_norm": 0.7206811904907227, "learning_rate": 6.159038747901804e-05, "loss": 2.7498, "step": 48505 }, { "epoch": 2.38, "grad_norm": 0.6850907206535339, "learning_rate": 6.158104224246477e-05, "loss": 2.9421, "step": 48506 }, { "epoch": 2.38, "grad_norm": 0.7406187057495117, "learning_rate": 6.157169763386102e-05, "loss": 2.954, "step": 48507 }, { "epoch": 2.38, "grad_norm": 0.728126585483551, "learning_rate": 6.156235365323165e-05, "loss": 2.7771, "step": 48508 }, { "epoch": 2.38, "grad_norm": 0.6951186656951904, "learning_rate": 6.15530103006011e-05, "loss": 2.8559, "step": 48509 }, { "epoch": 2.38, "grad_norm": 0.7429779767990112, "learning_rate": 6.154366757599399e-05, "loss": 2.92, "step": 48510 }, { "epoch": 2.38, "grad_norm": 0.6777859330177307, "learning_rate": 6.153432547943504e-05, "loss": 2.9665, "step": 48511 }, { "epoch": 2.38, "grad_norm": 0.7383310794830322, "learning_rate": 6.15249840109488e-05, "loss": 2.7535, "step": 48512 }, { "epoch": 2.38, "grad_norm": 0.7504093050956726, "learning_rate": 6.151564317055986e-05, "loss": 2.9476, "step": 48513 }, { "epoch": 2.38, "grad_norm": 0.6900618672370911, "learning_rate": 6.150630295829275e-05, "loss": 2.7624, "step": 48514 }, { "epoch": 2.38, "grad_norm": 0.7341283559799194, "learning_rate": 6.149696337417213e-05, "loss": 2.8167, "step": 48515 }, { "epoch": 2.38, "grad_norm": 0.7598667144775391, "learning_rate": 6.148762441822266e-05, "loss": 2.7693, "step": 48516 }, { "epoch": 2.38, "grad_norm": 0.7878901362419128, "learning_rate": 6.147828609046882e-05, "loss": 2.7556, "step": 48517 }, { "epoch": 2.38, "grad_norm": 0.7257072329521179, "learning_rate": 6.146894839093535e-05, "loss": 3.006, "step": 48518 }, { "epoch": 2.38, "grad_norm": 0.7379465103149414, "learning_rate": 6.145961131964671e-05, "loss": 3.0419, "step": 48519 }, { "epoch": 2.38, "grad_norm": 0.7503682374954224, "learning_rate": 6.145027487662762e-05, "loss": 2.8252, "step": 48520 }, { "epoch": 2.38, "grad_norm": 0.7141533493995667, "learning_rate": 6.144093906190256e-05, "loss": 2.9226, "step": 48521 }, { "epoch": 2.38, "grad_norm": 0.7183959484100342, "learning_rate": 6.14316038754961e-05, "loss": 2.8882, "step": 48522 }, { "epoch": 2.38, "grad_norm": 0.7660282254219055, "learning_rate": 6.142226931743296e-05, "loss": 2.8312, "step": 48523 }, { "epoch": 2.38, "grad_norm": 0.7023894190788269, "learning_rate": 6.141293538773758e-05, "loss": 2.787, "step": 48524 }, { "epoch": 2.38, "grad_norm": 0.7554915547370911, "learning_rate": 6.140360208643459e-05, "loss": 2.9541, "step": 48525 }, { "epoch": 2.38, "grad_norm": 0.6967014670372009, "learning_rate": 6.139426941354866e-05, "loss": 2.9179, "step": 48526 }, { "epoch": 2.38, "grad_norm": 0.7923035025596619, "learning_rate": 6.138493736910433e-05, "loss": 3.1075, "step": 48527 }, { "epoch": 2.38, "grad_norm": 0.7242387533187866, "learning_rate": 6.137560595312614e-05, "loss": 2.7576, "step": 48528 }, { "epoch": 2.38, "grad_norm": 0.7367933392524719, "learning_rate": 6.136627516563861e-05, "loss": 2.8662, "step": 48529 }, { "epoch": 2.38, "grad_norm": 0.7342579364776611, "learning_rate": 6.135694500666638e-05, "loss": 2.8216, "step": 48530 }, { "epoch": 2.38, "grad_norm": 0.7151744365692139, "learning_rate": 6.134761547623413e-05, "loss": 2.8982, "step": 48531 }, { "epoch": 2.38, "grad_norm": 0.7614980340003967, "learning_rate": 6.133828657436622e-05, "loss": 2.969, "step": 48532 }, { "epoch": 2.38, "grad_norm": 0.7628613114356995, "learning_rate": 6.132895830108744e-05, "loss": 2.946, "step": 48533 }, { "epoch": 2.38, "grad_norm": 0.7301518321037292, "learning_rate": 6.131963065642221e-05, "loss": 2.8227, "step": 48534 }, { "epoch": 2.38, "grad_norm": 0.7378628253936768, "learning_rate": 6.13103036403951e-05, "loss": 3.0836, "step": 48535 }, { "epoch": 2.38, "grad_norm": 0.7157490253448486, "learning_rate": 6.130097725303078e-05, "loss": 2.8048, "step": 48536 }, { "epoch": 2.38, "grad_norm": 0.7131938338279724, "learning_rate": 6.129165149435364e-05, "loss": 3.134, "step": 48537 }, { "epoch": 2.38, "grad_norm": 0.7120147943496704, "learning_rate": 6.128232636438846e-05, "loss": 2.9152, "step": 48538 }, { "epoch": 2.38, "grad_norm": 0.8592355847358704, "learning_rate": 6.127300186315962e-05, "loss": 2.9719, "step": 48539 }, { "epoch": 2.38, "grad_norm": 0.7270697951316833, "learning_rate": 6.126367799069172e-05, "loss": 2.8289, "step": 48540 }, { "epoch": 2.38, "grad_norm": 0.7252556681632996, "learning_rate": 6.125435474700944e-05, "loss": 2.7769, "step": 48541 }, { "epoch": 2.38, "grad_norm": 0.8012216687202454, "learning_rate": 6.124503213213723e-05, "loss": 2.9533, "step": 48542 }, { "epoch": 2.38, "grad_norm": 0.7498360872268677, "learning_rate": 6.123571014609967e-05, "loss": 2.8852, "step": 48543 }, { "epoch": 2.38, "grad_norm": 0.7274046540260315, "learning_rate": 6.122638878892121e-05, "loss": 3.0165, "step": 48544 }, { "epoch": 2.38, "grad_norm": 0.713111400604248, "learning_rate": 6.12170680606266e-05, "loss": 2.8686, "step": 48545 }, { "epoch": 2.38, "grad_norm": 0.7824342250823975, "learning_rate": 6.120774796124017e-05, "loss": 2.973, "step": 48546 }, { "epoch": 2.38, "grad_norm": 0.7419819235801697, "learning_rate": 6.119842849078656e-05, "loss": 2.9885, "step": 48547 }, { "epoch": 2.38, "grad_norm": 0.7952620983123779, "learning_rate": 6.118910964929042e-05, "loss": 2.8178, "step": 48548 }, { "epoch": 2.38, "grad_norm": 0.7857388854026794, "learning_rate": 6.117979143677623e-05, "loss": 2.9158, "step": 48549 }, { "epoch": 2.38, "grad_norm": 0.7016808986663818, "learning_rate": 6.117047385326851e-05, "loss": 3.0688, "step": 48550 }, { "epoch": 2.38, "grad_norm": 0.7236056923866272, "learning_rate": 6.116115689879168e-05, "loss": 3.1444, "step": 48551 }, { "epoch": 2.38, "grad_norm": 0.7185097336769104, "learning_rate": 6.11518405733704e-05, "loss": 2.6668, "step": 48552 }, { "epoch": 2.38, "grad_norm": 0.7458245754241943, "learning_rate": 6.114252487702931e-05, "loss": 2.8227, "step": 48553 }, { "epoch": 2.38, "grad_norm": 0.7336084246635437, "learning_rate": 6.113320980979275e-05, "loss": 2.7382, "step": 48554 }, { "epoch": 2.38, "grad_norm": 0.750745415687561, "learning_rate": 6.112389537168544e-05, "loss": 2.87, "step": 48555 }, { "epoch": 2.38, "grad_norm": 0.7281879186630249, "learning_rate": 6.11145815627318e-05, "loss": 2.9631, "step": 48556 }, { "epoch": 2.38, "grad_norm": 0.777651309967041, "learning_rate": 6.110526838295631e-05, "loss": 2.7747, "step": 48557 }, { "epoch": 2.38, "grad_norm": 0.7897119522094727, "learning_rate": 6.109595583238363e-05, "loss": 2.9692, "step": 48558 }, { "epoch": 2.38, "grad_norm": 0.7822614312171936, "learning_rate": 6.108664391103815e-05, "loss": 2.7721, "step": 48559 }, { "epoch": 2.38, "grad_norm": 0.7523203492164612, "learning_rate": 6.107733261894457e-05, "loss": 2.9797, "step": 48560 }, { "epoch": 2.38, "grad_norm": 0.747348964214325, "learning_rate": 6.10680219561272e-05, "loss": 3.1613, "step": 48561 }, { "epoch": 2.38, "grad_norm": 0.7030299305915833, "learning_rate": 6.105871192261068e-05, "loss": 2.8633, "step": 48562 }, { "epoch": 2.38, "grad_norm": 0.703755259513855, "learning_rate": 6.104940251841966e-05, "loss": 2.968, "step": 48563 }, { "epoch": 2.38, "grad_norm": 0.7231656908988953, "learning_rate": 6.104009374357846e-05, "loss": 2.7655, "step": 48564 }, { "epoch": 2.38, "grad_norm": 0.7645540833473206, "learning_rate": 6.10307855981117e-05, "loss": 3.0025, "step": 48565 }, { "epoch": 2.38, "grad_norm": 0.7383845448493958, "learning_rate": 6.1021478082043765e-05, "loss": 2.9483, "step": 48566 }, { "epoch": 2.38, "grad_norm": 0.7293666005134583, "learning_rate": 6.101217119539927e-05, "loss": 3.0159, "step": 48567 }, { "epoch": 2.38, "grad_norm": 0.736205518245697, "learning_rate": 6.100286493820281e-05, "loss": 2.806, "step": 48568 }, { "epoch": 2.38, "grad_norm": 0.7195806503295898, "learning_rate": 6.099355931047872e-05, "loss": 2.8859, "step": 48569 }, { "epoch": 2.38, "grad_norm": 0.7205501198768616, "learning_rate": 6.098425431225169e-05, "loss": 2.6709, "step": 48570 }, { "epoch": 2.38, "grad_norm": 0.7264590263366699, "learning_rate": 6.097494994354612e-05, "loss": 2.8121, "step": 48571 }, { "epoch": 2.38, "grad_norm": 0.7377135753631592, "learning_rate": 6.0965646204386445e-05, "loss": 2.7602, "step": 48572 }, { "epoch": 2.38, "grad_norm": 0.7292772531509399, "learning_rate": 6.0956343094797355e-05, "loss": 2.9107, "step": 48573 }, { "epoch": 2.38, "grad_norm": 0.8081817626953125, "learning_rate": 6.0947040614803144e-05, "loss": 2.7616, "step": 48574 }, { "epoch": 2.38, "grad_norm": 0.6972966194152832, "learning_rate": 6.093773876442852e-05, "loss": 2.8046, "step": 48575 }, { "epoch": 2.38, "grad_norm": 0.6994813084602356, "learning_rate": 6.09284375436978e-05, "loss": 2.872, "step": 48576 }, { "epoch": 2.38, "grad_norm": 0.7522056102752686, "learning_rate": 6.091913695263557e-05, "loss": 3.0694, "step": 48577 }, { "epoch": 2.38, "grad_norm": 0.7025183439254761, "learning_rate": 6.0909836991266506e-05, "loss": 2.9151, "step": 48578 }, { "epoch": 2.38, "grad_norm": 0.7216269373893738, "learning_rate": 6.090053765961471e-05, "loss": 2.8595, "step": 48579 }, { "epoch": 2.38, "grad_norm": 0.7327861189842224, "learning_rate": 6.089123895770498e-05, "loss": 2.7038, "step": 48580 }, { "epoch": 2.38, "grad_norm": 0.7535680532455444, "learning_rate": 6.088194088556165e-05, "loss": 2.866, "step": 48581 }, { "epoch": 2.38, "grad_norm": 0.7392609119415283, "learning_rate": 6.0872643443209246e-05, "loss": 2.8061, "step": 48582 }, { "epoch": 2.38, "grad_norm": 0.7233937382698059, "learning_rate": 6.0863346630672336e-05, "loss": 2.8853, "step": 48583 }, { "epoch": 2.38, "grad_norm": 0.7154392600059509, "learning_rate": 6.085405044797531e-05, "loss": 3.0673, "step": 48584 }, { "epoch": 2.38, "grad_norm": 0.7913565039634705, "learning_rate": 6.084475489514274e-05, "loss": 3.0256, "step": 48585 }, { "epoch": 2.38, "grad_norm": 0.7132130861282349, "learning_rate": 6.0835459972199086e-05, "loss": 2.9848, "step": 48586 }, { "epoch": 2.38, "grad_norm": 0.7644349932670593, "learning_rate": 6.082616567916871e-05, "loss": 2.9009, "step": 48587 }, { "epoch": 2.38, "grad_norm": 0.7143843173980713, "learning_rate": 6.081687201607628e-05, "loss": 2.6485, "step": 48588 }, { "epoch": 2.38, "grad_norm": 0.6977376341819763, "learning_rate": 6.080757898294606e-05, "loss": 2.8017, "step": 48589 }, { "epoch": 2.38, "grad_norm": 0.7115483283996582, "learning_rate": 6.0798286579802765e-05, "loss": 2.9167, "step": 48590 }, { "epoch": 2.38, "grad_norm": 0.7304779887199402, "learning_rate": 6.0788994806670656e-05, "loss": 2.9317, "step": 48591 }, { "epoch": 2.38, "grad_norm": 0.7648419141769409, "learning_rate": 6.0779703663574364e-05, "loss": 3.0281, "step": 48592 }, { "epoch": 2.38, "grad_norm": 0.7392321825027466, "learning_rate": 6.077041315053828e-05, "loss": 2.9541, "step": 48593 }, { "epoch": 2.38, "grad_norm": 0.8053700923919678, "learning_rate": 6.076112326758684e-05, "loss": 3.0026, "step": 48594 }, { "epoch": 2.38, "grad_norm": 0.7006090879440308, "learning_rate": 6.075183401474464e-05, "loss": 3.0143, "step": 48595 }, { "epoch": 2.38, "grad_norm": 0.7133437395095825, "learning_rate": 6.0742545392035945e-05, "loss": 2.8935, "step": 48596 }, { "epoch": 2.38, "grad_norm": 0.7549411654472351, "learning_rate": 6.073325739948538e-05, "loss": 2.9046, "step": 48597 }, { "epoch": 2.38, "grad_norm": 0.6961774826049805, "learning_rate": 6.072397003711742e-05, "loss": 2.9923, "step": 48598 }, { "epoch": 2.38, "grad_norm": 0.8232783675193787, "learning_rate": 6.0714683304956415e-05, "loss": 2.8209, "step": 48599 }, { "epoch": 2.38, "grad_norm": 0.7344974875450134, "learning_rate": 6.070539720302694e-05, "loss": 2.8822, "step": 48600 }, { "epoch": 2.38, "grad_norm": 0.7219202518463135, "learning_rate": 6.0696111731353414e-05, "loss": 2.8987, "step": 48601 }, { "epoch": 2.38, "grad_norm": 0.7357993721961975, "learning_rate": 6.0686826889960185e-05, "loss": 2.9248, "step": 48602 }, { "epoch": 2.38, "grad_norm": 0.7212379574775696, "learning_rate": 6.0677542678871894e-05, "loss": 2.7127, "step": 48603 }, { "epoch": 2.38, "grad_norm": 0.7509791254997253, "learning_rate": 6.066825909811279e-05, "loss": 2.7341, "step": 48604 }, { "epoch": 2.38, "grad_norm": 0.6835814714431763, "learning_rate": 6.065897614770753e-05, "loss": 2.8247, "step": 48605 }, { "epoch": 2.38, "grad_norm": 0.7493883371353149, "learning_rate": 6.0649693827680376e-05, "loss": 2.8594, "step": 48606 }, { "epoch": 2.38, "grad_norm": 0.7076842784881592, "learning_rate": 6.064041213805596e-05, "loss": 2.963, "step": 48607 }, { "epoch": 2.38, "grad_norm": 0.7497107982635498, "learning_rate": 6.063113107885862e-05, "loss": 2.8824, "step": 48608 }, { "epoch": 2.38, "grad_norm": 0.7138265371322632, "learning_rate": 6.0621850650112735e-05, "loss": 2.7468, "step": 48609 }, { "epoch": 2.38, "grad_norm": 0.7438095211982727, "learning_rate": 6.061257085184292e-05, "loss": 2.7966, "step": 48610 }, { "epoch": 2.38, "grad_norm": 0.7051562666893005, "learning_rate": 6.060329168407343e-05, "loss": 2.8157, "step": 48611 }, { "epoch": 2.38, "grad_norm": 0.7326208353042603, "learning_rate": 6.059401314682877e-05, "loss": 2.7358, "step": 48612 }, { "epoch": 2.38, "grad_norm": 0.7127295732498169, "learning_rate": 6.058473524013352e-05, "loss": 2.9006, "step": 48613 }, { "epoch": 2.38, "grad_norm": 0.7416107654571533, "learning_rate": 6.0575457964012e-05, "loss": 2.9238, "step": 48614 }, { "epoch": 2.38, "grad_norm": 0.7051231265068054, "learning_rate": 6.0566181318488614e-05, "loss": 2.9534, "step": 48615 }, { "epoch": 2.38, "grad_norm": 0.6944036483764648, "learning_rate": 6.0556905303587786e-05, "loss": 2.7596, "step": 48616 }, { "epoch": 2.38, "grad_norm": 0.7137592434883118, "learning_rate": 6.054762991933396e-05, "loss": 2.7521, "step": 48617 }, { "epoch": 2.38, "grad_norm": 0.7165379524230957, "learning_rate": 6.053835516575168e-05, "loss": 2.7684, "step": 48618 }, { "epoch": 2.38, "grad_norm": 0.7815676331520081, "learning_rate": 6.052908104286524e-05, "loss": 2.6846, "step": 48619 }, { "epoch": 2.38, "grad_norm": 0.7360538840293884, "learning_rate": 6.0519807550699164e-05, "loss": 2.9516, "step": 48620 }, { "epoch": 2.38, "grad_norm": 0.7272911667823792, "learning_rate": 6.0510534689277746e-05, "loss": 2.9062, "step": 48621 }, { "epoch": 2.38, "grad_norm": 0.761579155921936, "learning_rate": 6.050126245862557e-05, "loss": 2.8845, "step": 48622 }, { "epoch": 2.38, "grad_norm": 0.7632530331611633, "learning_rate": 6.049199085876697e-05, "loss": 2.9801, "step": 48623 }, { "epoch": 2.38, "grad_norm": 0.8465824723243713, "learning_rate": 6.04827198897263e-05, "loss": 2.775, "step": 48624 }, { "epoch": 2.38, "grad_norm": 0.7626355290412903, "learning_rate": 6.047344955152812e-05, "loss": 2.7857, "step": 48625 }, { "epoch": 2.38, "grad_norm": 0.7368438839912415, "learning_rate": 6.0464179844196693e-05, "loss": 2.8184, "step": 48626 }, { "epoch": 2.38, "grad_norm": 0.72593754529953, "learning_rate": 6.04549107677566e-05, "loss": 2.9202, "step": 48627 }, { "epoch": 2.38, "grad_norm": 0.7318313717842102, "learning_rate": 6.04456423222321e-05, "loss": 2.9295, "step": 48628 }, { "epoch": 2.38, "grad_norm": 0.7129902243614197, "learning_rate": 6.043637450764777e-05, "loss": 2.8795, "step": 48629 }, { "epoch": 2.38, "grad_norm": 0.7956576943397522, "learning_rate": 6.042710732402791e-05, "loss": 2.9245, "step": 48630 }, { "epoch": 2.38, "grad_norm": 0.7019463777542114, "learning_rate": 6.0417840771396854e-05, "loss": 2.6454, "step": 48631 }, { "epoch": 2.38, "grad_norm": 0.7243613600730896, "learning_rate": 6.04085748497792e-05, "loss": 2.916, "step": 48632 }, { "epoch": 2.38, "grad_norm": 0.6990789771080017, "learning_rate": 6.039930955919914e-05, "loss": 3.0681, "step": 48633 }, { "epoch": 2.38, "grad_norm": 0.7699520587921143, "learning_rate": 6.039004489968121e-05, "loss": 2.863, "step": 48634 }, { "epoch": 2.38, "grad_norm": 0.7257212400436401, "learning_rate": 6.038078087124983e-05, "loss": 2.7365, "step": 48635 }, { "epoch": 2.38, "grad_norm": 0.7219493985176086, "learning_rate": 6.037151747392939e-05, "loss": 2.9616, "step": 48636 }, { "epoch": 2.38, "grad_norm": 0.7039251327514648, "learning_rate": 6.036225470774425e-05, "loss": 2.5781, "step": 48637 }, { "epoch": 2.38, "grad_norm": 0.7816781401634216, "learning_rate": 6.035299257271874e-05, "loss": 2.8161, "step": 48638 }, { "epoch": 2.38, "grad_norm": 0.7119601368904114, "learning_rate": 6.034373106887733e-05, "loss": 2.9124, "step": 48639 }, { "epoch": 2.38, "grad_norm": 0.7373980283737183, "learning_rate": 6.033447019624448e-05, "loss": 2.7964, "step": 48640 }, { "epoch": 2.38, "grad_norm": 0.6862812042236328, "learning_rate": 6.032520995484442e-05, "loss": 2.9101, "step": 48641 }, { "epoch": 2.38, "grad_norm": 0.7756862640380859, "learning_rate": 6.031595034470171e-05, "loss": 2.986, "step": 48642 }, { "epoch": 2.38, "grad_norm": 0.6969619393348694, "learning_rate": 6.0306691365840596e-05, "loss": 2.9996, "step": 48643 }, { "epoch": 2.38, "grad_norm": 0.7498837113380432, "learning_rate": 6.029743301828559e-05, "loss": 2.8987, "step": 48644 }, { "epoch": 2.38, "grad_norm": 0.7080926895141602, "learning_rate": 6.028817530206104e-05, "loss": 2.9105, "step": 48645 }, { "epoch": 2.38, "grad_norm": 0.7312246561050415, "learning_rate": 6.027891821719122e-05, "loss": 3.199, "step": 48646 }, { "epoch": 2.38, "grad_norm": 0.7515241503715515, "learning_rate": 6.026966176370065e-05, "loss": 2.9646, "step": 48647 }, { "epoch": 2.38, "grad_norm": 0.7634444832801819, "learning_rate": 6.026040594161358e-05, "loss": 3.049, "step": 48648 }, { "epoch": 2.38, "grad_norm": 0.7122647166252136, "learning_rate": 6.025115075095448e-05, "loss": 2.6741, "step": 48649 }, { "epoch": 2.38, "grad_norm": 0.6937204003334045, "learning_rate": 6.0241896191747774e-05, "loss": 2.8746, "step": 48650 }, { "epoch": 2.38, "grad_norm": 0.7191386222839355, "learning_rate": 6.023264226401777e-05, "loss": 2.843, "step": 48651 }, { "epoch": 2.38, "grad_norm": 0.7549066543579102, "learning_rate": 6.022338896778882e-05, "loss": 2.9481, "step": 48652 }, { "epoch": 2.38, "grad_norm": 0.731148898601532, "learning_rate": 6.021413630308527e-05, "loss": 2.8964, "step": 48653 }, { "epoch": 2.38, "grad_norm": 0.7206429243087769, "learning_rate": 6.020488426993153e-05, "loss": 2.709, "step": 48654 }, { "epoch": 2.38, "grad_norm": 0.7050144076347351, "learning_rate": 6.019563286835205e-05, "loss": 2.7941, "step": 48655 }, { "epoch": 2.38, "grad_norm": 0.7599301338195801, "learning_rate": 6.0186382098371047e-05, "loss": 2.9327, "step": 48656 }, { "epoch": 2.38, "grad_norm": 0.6911978721618652, "learning_rate": 6.0177131960013056e-05, "loss": 2.8945, "step": 48657 }, { "epoch": 2.38, "grad_norm": 0.7535004019737244, "learning_rate": 6.016788245330231e-05, "loss": 2.9582, "step": 48658 }, { "epoch": 2.38, "grad_norm": 0.7646125555038452, "learning_rate": 6.015863357826314e-05, "loss": 3.0724, "step": 48659 }, { "epoch": 2.38, "grad_norm": 0.7803003787994385, "learning_rate": 6.014938533492006e-05, "loss": 2.959, "step": 48660 }, { "epoch": 2.38, "grad_norm": 0.730233371257782, "learning_rate": 6.014013772329727e-05, "loss": 2.8426, "step": 48661 }, { "epoch": 2.38, "grad_norm": 0.7188946008682251, "learning_rate": 6.0130890743419225e-05, "loss": 3.0968, "step": 48662 }, { "epoch": 2.38, "grad_norm": 0.7240670323371887, "learning_rate": 6.012164439531022e-05, "loss": 2.8007, "step": 48663 }, { "epoch": 2.38, "grad_norm": 0.7316474318504333, "learning_rate": 6.011239867899463e-05, "loss": 2.9304, "step": 48664 }, { "epoch": 2.38, "grad_norm": 0.8154338598251343, "learning_rate": 6.010315359449688e-05, "loss": 2.9659, "step": 48665 }, { "epoch": 2.39, "grad_norm": 0.7354592084884644, "learning_rate": 6.0093909141841266e-05, "loss": 2.9925, "step": 48666 }, { "epoch": 2.39, "grad_norm": 0.7725946307182312, "learning_rate": 6.008466532105212e-05, "loss": 2.8038, "step": 48667 }, { "epoch": 2.39, "grad_norm": 0.7260931134223938, "learning_rate": 6.0075422132153705e-05, "loss": 2.8687, "step": 48668 }, { "epoch": 2.39, "grad_norm": 0.7305506467819214, "learning_rate": 6.0066179575170445e-05, "loss": 2.9901, "step": 48669 }, { "epoch": 2.39, "grad_norm": 0.8036696910858154, "learning_rate": 6.005693765012679e-05, "loss": 2.9703, "step": 48670 }, { "epoch": 2.39, "grad_norm": 0.7172324061393738, "learning_rate": 6.004769635704691e-05, "loss": 2.9001, "step": 48671 }, { "epoch": 2.39, "grad_norm": 0.7303609251976013, "learning_rate": 6.0038455695955265e-05, "loss": 3.0238, "step": 48672 }, { "epoch": 2.39, "grad_norm": 0.7004274725914001, "learning_rate": 6.0029215666876194e-05, "loss": 2.9343, "step": 48673 }, { "epoch": 2.39, "grad_norm": 0.7272935509681702, "learning_rate": 6.001997626983388e-05, "loss": 2.738, "step": 48674 }, { "epoch": 2.39, "grad_norm": 0.7348398566246033, "learning_rate": 6.001073750485283e-05, "loss": 2.7692, "step": 48675 }, { "epoch": 2.39, "grad_norm": 0.7049981951713562, "learning_rate": 6.000149937195724e-05, "loss": 2.9259, "step": 48676 }, { "epoch": 2.39, "grad_norm": 0.7831222414970398, "learning_rate": 5.999226187117161e-05, "loss": 2.91, "step": 48677 }, { "epoch": 2.39, "grad_norm": 0.7655361294746399, "learning_rate": 5.998302500252009e-05, "loss": 2.6398, "step": 48678 }, { "epoch": 2.39, "grad_norm": 0.7228807210922241, "learning_rate": 5.997378876602713e-05, "loss": 2.789, "step": 48679 }, { "epoch": 2.39, "grad_norm": 0.7596110105514526, "learning_rate": 5.996455316171708e-05, "loss": 2.8773, "step": 48680 }, { "epoch": 2.39, "grad_norm": 0.7604156136512756, "learning_rate": 5.995531818961408e-05, "loss": 2.9434, "step": 48681 }, { "epoch": 2.39, "grad_norm": 0.7096068859100342, "learning_rate": 5.9946083849742645e-05, "loss": 2.9145, "step": 48682 }, { "epoch": 2.39, "grad_norm": 0.7317915558815002, "learning_rate": 5.993685014212696e-05, "loss": 2.9156, "step": 48683 }, { "epoch": 2.39, "grad_norm": 0.6918458342552185, "learning_rate": 5.992761706679143e-05, "loss": 2.7525, "step": 48684 }, { "epoch": 2.39, "grad_norm": 0.7611396908760071, "learning_rate": 5.991838462376042e-05, "loss": 2.9744, "step": 48685 }, { "epoch": 2.39, "grad_norm": 0.7494409680366516, "learning_rate": 5.990915281305809e-05, "loss": 3.0139, "step": 48686 }, { "epoch": 2.39, "grad_norm": 0.7255983352661133, "learning_rate": 5.9899921634708936e-05, "loss": 3.0039, "step": 48687 }, { "epoch": 2.39, "grad_norm": 0.7247331738471985, "learning_rate": 5.9890691088737165e-05, "loss": 2.9438, "step": 48688 }, { "epoch": 2.39, "grad_norm": 0.7208533883094788, "learning_rate": 5.988146117516702e-05, "loss": 2.7187, "step": 48689 }, { "epoch": 2.39, "grad_norm": 0.7259299755096436, "learning_rate": 5.987223189402299e-05, "loss": 3.0192, "step": 48690 }, { "epoch": 2.39, "grad_norm": 0.7051576375961304, "learning_rate": 5.986300324532922e-05, "loss": 3.1708, "step": 48691 }, { "epoch": 2.39, "grad_norm": 0.8053131699562073, "learning_rate": 5.985377522911016e-05, "loss": 2.8649, "step": 48692 }, { "epoch": 2.39, "grad_norm": 0.7644606828689575, "learning_rate": 5.984454784538994e-05, "loss": 2.9055, "step": 48693 }, { "epoch": 2.39, "grad_norm": 0.7201364636421204, "learning_rate": 5.9835321094193035e-05, "loss": 2.8487, "step": 48694 }, { "epoch": 2.39, "grad_norm": 0.7057961821556091, "learning_rate": 5.9826094975543705e-05, "loss": 3.0138, "step": 48695 }, { "epoch": 2.39, "grad_norm": 0.7159346342086792, "learning_rate": 5.9816869489466135e-05, "loss": 2.6607, "step": 48696 }, { "epoch": 2.39, "grad_norm": 0.7882915735244751, "learning_rate": 5.9807644635984775e-05, "loss": 2.9877, "step": 48697 }, { "epoch": 2.39, "grad_norm": 0.7171709537506104, "learning_rate": 5.9798420415123774e-05, "loss": 2.8216, "step": 48698 }, { "epoch": 2.39, "grad_norm": 0.7261692881584167, "learning_rate": 5.97891968269075e-05, "loss": 2.7935, "step": 48699 }, { "epoch": 2.39, "grad_norm": 0.7348390221595764, "learning_rate": 5.977997387136032e-05, "loss": 3.0635, "step": 48700 }, { "epoch": 2.39, "grad_norm": 0.7532333731651306, "learning_rate": 5.97707515485064e-05, "loss": 2.9131, "step": 48701 }, { "epoch": 2.39, "grad_norm": 0.6886923313140869, "learning_rate": 5.976152985837026e-05, "loss": 2.7078, "step": 48702 }, { "epoch": 2.39, "grad_norm": 0.7542856335639954, "learning_rate": 5.975230880097581e-05, "loss": 2.6983, "step": 48703 }, { "epoch": 2.39, "grad_norm": 0.7375133037567139, "learning_rate": 5.974308837634767e-05, "loss": 2.6483, "step": 48704 }, { "epoch": 2.39, "grad_norm": 0.7276037335395813, "learning_rate": 5.973386858450988e-05, "loss": 2.8235, "step": 48705 }, { "epoch": 2.39, "grad_norm": 0.7167425751686096, "learning_rate": 5.972464942548686e-05, "loss": 2.6771, "step": 48706 }, { "epoch": 2.39, "grad_norm": 0.760288655757904, "learning_rate": 5.9715430899302954e-05, "loss": 2.7104, "step": 48707 }, { "epoch": 2.39, "grad_norm": 0.7757988572120667, "learning_rate": 5.970621300598227e-05, "loss": 2.8529, "step": 48708 }, { "epoch": 2.39, "grad_norm": 0.7403610348701477, "learning_rate": 5.969699574554927e-05, "loss": 2.9158, "step": 48709 }, { "epoch": 2.39, "grad_norm": 0.7176191210746765, "learning_rate": 5.968777911802812e-05, "loss": 3.0029, "step": 48710 }, { "epoch": 2.39, "grad_norm": 0.711616039276123, "learning_rate": 5.9678563123443045e-05, "loss": 3.1245, "step": 48711 }, { "epoch": 2.39, "grad_norm": 0.7264795303344727, "learning_rate": 5.9669347761818455e-05, "loss": 2.7855, "step": 48712 }, { "epoch": 2.39, "grad_norm": 0.7248842120170593, "learning_rate": 5.966013303317846e-05, "loss": 2.8838, "step": 48713 }, { "epoch": 2.39, "grad_norm": 0.7030981183052063, "learning_rate": 5.965091893754751e-05, "loss": 3.0203, "step": 48714 }, { "epoch": 2.39, "grad_norm": 0.7812050580978394, "learning_rate": 5.964170547494972e-05, "loss": 2.9395, "step": 48715 }, { "epoch": 2.39, "grad_norm": 0.762490451335907, "learning_rate": 5.9632492645409467e-05, "loss": 2.9752, "step": 48716 }, { "epoch": 2.39, "grad_norm": 0.68095463514328, "learning_rate": 5.962328044895097e-05, "loss": 2.803, "step": 48717 }, { "epoch": 2.39, "grad_norm": 0.6975252628326416, "learning_rate": 5.961406888559844e-05, "loss": 3.1397, "step": 48718 }, { "epoch": 2.39, "grad_norm": 0.7371811866760254, "learning_rate": 5.960485795537626e-05, "loss": 2.9046, "step": 48719 }, { "epoch": 2.39, "grad_norm": 0.7280109524726868, "learning_rate": 5.959564765830854e-05, "loss": 2.8761, "step": 48720 }, { "epoch": 2.39, "grad_norm": 0.7430052161216736, "learning_rate": 5.95864379944196e-05, "loss": 2.8337, "step": 48721 }, { "epoch": 2.39, "grad_norm": 0.7345473170280457, "learning_rate": 5.957722896373381e-05, "loss": 2.9135, "step": 48722 }, { "epoch": 2.39, "grad_norm": 0.7026997208595276, "learning_rate": 5.9568020566275235e-05, "loss": 2.9278, "step": 48723 }, { "epoch": 2.39, "grad_norm": 0.7376667857170105, "learning_rate": 5.9558812802068335e-05, "loss": 2.7952, "step": 48724 }, { "epoch": 2.39, "grad_norm": 0.7143561840057373, "learning_rate": 5.95496056711372e-05, "loss": 2.7486, "step": 48725 }, { "epoch": 2.39, "grad_norm": 0.7290409803390503, "learning_rate": 5.954039917350608e-05, "loss": 2.697, "step": 48726 }, { "epoch": 2.39, "grad_norm": 0.7321444749832153, "learning_rate": 5.953119330919935e-05, "loss": 2.8251, "step": 48727 }, { "epoch": 2.39, "grad_norm": 0.7561767101287842, "learning_rate": 5.95219880782411e-05, "loss": 2.7957, "step": 48728 }, { "epoch": 2.39, "grad_norm": 0.750032365322113, "learning_rate": 5.95127834806557e-05, "loss": 3.2731, "step": 48729 }, { "epoch": 2.39, "grad_norm": 0.7444902658462524, "learning_rate": 5.95035795164673e-05, "loss": 2.8988, "step": 48730 }, { "epoch": 2.39, "grad_norm": 0.735755205154419, "learning_rate": 5.9494376185700256e-05, "loss": 2.9861, "step": 48731 }, { "epoch": 2.39, "grad_norm": 0.7328507900238037, "learning_rate": 5.948517348837874e-05, "loss": 2.9542, "step": 48732 }, { "epoch": 2.39, "grad_norm": 0.7095339894294739, "learning_rate": 5.947597142452693e-05, "loss": 2.7841, "step": 48733 }, { "epoch": 2.39, "grad_norm": 0.7547805309295654, "learning_rate": 5.946676999416917e-05, "loss": 2.9635, "step": 48734 }, { "epoch": 2.39, "grad_norm": 0.6976374387741089, "learning_rate": 5.945756919732956e-05, "loss": 2.8442, "step": 48735 }, { "epoch": 2.39, "grad_norm": 0.7196562886238098, "learning_rate": 5.944836903403246e-05, "loss": 3.1681, "step": 48736 }, { "epoch": 2.39, "grad_norm": 0.7531615495681763, "learning_rate": 5.94391695043021e-05, "loss": 3.1516, "step": 48737 }, { "epoch": 2.39, "grad_norm": 0.7510145306587219, "learning_rate": 5.942997060816268e-05, "loss": 2.8634, "step": 48738 }, { "epoch": 2.39, "grad_norm": 0.7500221133232117, "learning_rate": 5.9420772345638423e-05, "loss": 3.1042, "step": 48739 }, { "epoch": 2.39, "grad_norm": 0.7888630032539368, "learning_rate": 5.9411574716753474e-05, "loss": 2.7464, "step": 48740 }, { "epoch": 2.39, "grad_norm": 0.7947269082069397, "learning_rate": 5.9402377721532144e-05, "loss": 2.9241, "step": 48741 }, { "epoch": 2.39, "grad_norm": 0.6809054613113403, "learning_rate": 5.939318135999872e-05, "loss": 2.7824, "step": 48742 }, { "epoch": 2.39, "grad_norm": 0.785391628742218, "learning_rate": 5.938398563217728e-05, "loss": 2.9763, "step": 48743 }, { "epoch": 2.39, "grad_norm": 0.7565441131591797, "learning_rate": 5.9374790538092166e-05, "loss": 3.02, "step": 48744 }, { "epoch": 2.39, "grad_norm": 0.719491720199585, "learning_rate": 5.93655960777675e-05, "loss": 3.204, "step": 48745 }, { "epoch": 2.39, "grad_norm": 0.7319931983947754, "learning_rate": 5.9356402251227585e-05, "loss": 2.8652, "step": 48746 }, { "epoch": 2.39, "grad_norm": 0.7423975467681885, "learning_rate": 5.9347209058496615e-05, "loss": 2.956, "step": 48747 }, { "epoch": 2.39, "grad_norm": 0.7304463386535645, "learning_rate": 5.9338016499598725e-05, "loss": 2.9522, "step": 48748 }, { "epoch": 2.39, "grad_norm": 0.7549775838851929, "learning_rate": 5.932882457455821e-05, "loss": 2.9967, "step": 48749 }, { "epoch": 2.39, "grad_norm": 0.6921327710151672, "learning_rate": 5.93196332833992e-05, "loss": 2.9852, "step": 48750 }, { "epoch": 2.39, "grad_norm": 0.7499932646751404, "learning_rate": 5.931044262614596e-05, "loss": 3.0477, "step": 48751 }, { "epoch": 2.39, "grad_norm": 0.7360295057296753, "learning_rate": 5.9301252602822794e-05, "loss": 3.2046, "step": 48752 }, { "epoch": 2.39, "grad_norm": 0.70384281873703, "learning_rate": 5.929206321345378e-05, "loss": 2.6769, "step": 48753 }, { "epoch": 2.39, "grad_norm": 0.7233741879463196, "learning_rate": 5.9282874458063136e-05, "loss": 2.8938, "step": 48754 }, { "epoch": 2.39, "grad_norm": 0.7333195805549622, "learning_rate": 5.9273686336675007e-05, "loss": 2.8446, "step": 48755 }, { "epoch": 2.39, "grad_norm": 0.7375016808509827, "learning_rate": 5.9264498849313644e-05, "loss": 3.0102, "step": 48756 }, { "epoch": 2.39, "grad_norm": 0.7300297617912292, "learning_rate": 5.925531199600336e-05, "loss": 2.8862, "step": 48757 }, { "epoch": 2.39, "grad_norm": 0.7252567410469055, "learning_rate": 5.924612577676817e-05, "loss": 2.9107, "step": 48758 }, { "epoch": 2.39, "grad_norm": 0.7813132405281067, "learning_rate": 5.923694019163242e-05, "loss": 2.8536, "step": 48759 }, { "epoch": 2.39, "grad_norm": 0.714250385761261, "learning_rate": 5.9227755240620254e-05, "loss": 2.9715, "step": 48760 }, { "epoch": 2.39, "grad_norm": 0.7643177509307861, "learning_rate": 5.921857092375576e-05, "loss": 2.942, "step": 48761 }, { "epoch": 2.39, "grad_norm": 0.7428061366081238, "learning_rate": 5.920938724106328e-05, "loss": 2.9505, "step": 48762 }, { "epoch": 2.39, "grad_norm": 0.7188575267791748, "learning_rate": 5.9200204192566856e-05, "loss": 2.9623, "step": 48763 }, { "epoch": 2.39, "grad_norm": 0.7388699650764465, "learning_rate": 5.9191021778290817e-05, "loss": 3.0572, "step": 48764 }, { "epoch": 2.39, "grad_norm": 0.7034398913383484, "learning_rate": 5.918183999825923e-05, "loss": 2.8113, "step": 48765 }, { "epoch": 2.39, "grad_norm": 0.7972329258918762, "learning_rate": 5.917265885249631e-05, "loss": 3.0816, "step": 48766 }, { "epoch": 2.39, "grad_norm": 0.7589764595031738, "learning_rate": 5.916347834102635e-05, "loss": 2.9365, "step": 48767 }, { "epoch": 2.39, "grad_norm": 0.7065830826759338, "learning_rate": 5.915429846387342e-05, "loss": 2.9464, "step": 48768 }, { "epoch": 2.39, "grad_norm": 0.8015071153640747, "learning_rate": 5.914511922106173e-05, "loss": 3.0396, "step": 48769 }, { "epoch": 2.39, "grad_norm": 0.7204930186271667, "learning_rate": 5.913594061261534e-05, "loss": 2.7337, "step": 48770 }, { "epoch": 2.39, "grad_norm": 0.7478088140487671, "learning_rate": 5.9126762638558556e-05, "loss": 2.9955, "step": 48771 }, { "epoch": 2.39, "grad_norm": 0.7554129362106323, "learning_rate": 5.911758529891559e-05, "loss": 2.9007, "step": 48772 }, { "epoch": 2.39, "grad_norm": 0.7005829811096191, "learning_rate": 5.9108408593710465e-05, "loss": 2.9987, "step": 48773 }, { "epoch": 2.39, "grad_norm": 0.7382482290267944, "learning_rate": 5.909923252296752e-05, "loss": 3.1134, "step": 48774 }, { "epoch": 2.39, "grad_norm": 0.8164120316505432, "learning_rate": 5.909005708671084e-05, "loss": 2.9471, "step": 48775 }, { "epoch": 2.39, "grad_norm": 0.706451416015625, "learning_rate": 5.9080882284964506e-05, "loss": 3.1401, "step": 48776 }, { "epoch": 2.39, "grad_norm": 0.7515252232551575, "learning_rate": 5.907170811775283e-05, "loss": 2.8985, "step": 48777 }, { "epoch": 2.39, "grad_norm": 0.6926350593566895, "learning_rate": 5.906253458509982e-05, "loss": 2.9885, "step": 48778 }, { "epoch": 2.39, "grad_norm": 0.7306018471717834, "learning_rate": 5.90533616870298e-05, "loss": 2.8483, "step": 48779 }, { "epoch": 2.39, "grad_norm": 0.6878507733345032, "learning_rate": 5.904418942356678e-05, "loss": 2.9312, "step": 48780 }, { "epoch": 2.39, "grad_norm": 0.728437602519989, "learning_rate": 5.9035017794735107e-05, "loss": 2.9679, "step": 48781 }, { "epoch": 2.39, "grad_norm": 0.710863471031189, "learning_rate": 5.902584680055876e-05, "loss": 2.9908, "step": 48782 }, { "epoch": 2.39, "grad_norm": 0.7090634703636169, "learning_rate": 5.901667644106193e-05, "loss": 3.095, "step": 48783 }, { "epoch": 2.39, "grad_norm": 0.7385855913162231, "learning_rate": 5.900750671626885e-05, "loss": 3.1334, "step": 48784 }, { "epoch": 2.39, "grad_norm": 0.705529510974884, "learning_rate": 5.899833762620354e-05, "loss": 2.8821, "step": 48785 }, { "epoch": 2.39, "grad_norm": 0.716135561466217, "learning_rate": 5.8989169170890316e-05, "loss": 2.8941, "step": 48786 }, { "epoch": 2.39, "grad_norm": 0.690018355846405, "learning_rate": 5.898000135035316e-05, "loss": 2.8815, "step": 48787 }, { "epoch": 2.39, "grad_norm": 0.7214192748069763, "learning_rate": 5.8970834164616273e-05, "loss": 2.8851, "step": 48788 }, { "epoch": 2.39, "grad_norm": 0.8130916357040405, "learning_rate": 5.8961667613703923e-05, "loss": 2.9957, "step": 48789 }, { "epoch": 2.39, "grad_norm": 0.7414829730987549, "learning_rate": 5.8952501697640145e-05, "loss": 2.6515, "step": 48790 }, { "epoch": 2.39, "grad_norm": 0.7804091572761536, "learning_rate": 5.894333641644908e-05, "loss": 2.7691, "step": 48791 }, { "epoch": 2.39, "grad_norm": 0.7167255878448486, "learning_rate": 5.8934171770154824e-05, "loss": 2.9098, "step": 48792 }, { "epoch": 2.39, "grad_norm": 0.7627856135368347, "learning_rate": 5.892500775878155e-05, "loss": 3.1166, "step": 48793 }, { "epoch": 2.39, "grad_norm": 0.7267736792564392, "learning_rate": 5.8915844382353474e-05, "loss": 3.0016, "step": 48794 }, { "epoch": 2.39, "grad_norm": 0.7814797163009644, "learning_rate": 5.89066816408946e-05, "loss": 2.6712, "step": 48795 }, { "epoch": 2.39, "grad_norm": 0.7293899059295654, "learning_rate": 5.889751953442919e-05, "loss": 2.9074, "step": 48796 }, { "epoch": 2.39, "grad_norm": 0.7417458891868591, "learning_rate": 5.888835806298132e-05, "loss": 2.8068, "step": 48797 }, { "epoch": 2.39, "grad_norm": 0.742914080619812, "learning_rate": 5.8879197226575034e-05, "loss": 2.6278, "step": 48798 }, { "epoch": 2.39, "grad_norm": 0.7378799915313721, "learning_rate": 5.887003702523462e-05, "loss": 2.7636, "step": 48799 }, { "epoch": 2.39, "grad_norm": 0.7445825338363647, "learning_rate": 5.886087745898405e-05, "loss": 2.9709, "step": 48800 }, { "epoch": 2.39, "grad_norm": 0.7399541139602661, "learning_rate": 5.88517185278476e-05, "loss": 2.699, "step": 48801 }, { "epoch": 2.39, "grad_norm": 0.7600640654563904, "learning_rate": 5.884256023184924e-05, "loss": 2.9231, "step": 48802 }, { "epoch": 2.39, "grad_norm": 0.7569655776023865, "learning_rate": 5.883340257101312e-05, "loss": 3.065, "step": 48803 }, { "epoch": 2.39, "grad_norm": 0.7483676671981812, "learning_rate": 5.8824245545363615e-05, "loss": 2.8574, "step": 48804 }, { "epoch": 2.39, "grad_norm": 0.7173334360122681, "learning_rate": 5.8815089154924456e-05, "loss": 2.865, "step": 48805 }, { "epoch": 2.39, "grad_norm": 0.7077659964561462, "learning_rate": 5.8805933399719993e-05, "loss": 2.8537, "step": 48806 }, { "epoch": 2.39, "grad_norm": 0.7615037560462952, "learning_rate": 5.879677827977425e-05, "loss": 2.7053, "step": 48807 }, { "epoch": 2.39, "grad_norm": 0.7203567028045654, "learning_rate": 5.878762379511133e-05, "loss": 2.8675, "step": 48808 }, { "epoch": 2.39, "grad_norm": 0.7756044864654541, "learning_rate": 5.877846994575548e-05, "loss": 2.9412, "step": 48809 }, { "epoch": 2.39, "grad_norm": 0.7094483971595764, "learning_rate": 5.876931673173062e-05, "loss": 2.9699, "step": 48810 }, { "epoch": 2.39, "grad_norm": 0.7442003488540649, "learning_rate": 5.8760164153061064e-05, "loss": 2.912, "step": 48811 }, { "epoch": 2.39, "grad_norm": 0.7222998142242432, "learning_rate": 5.8751012209770786e-05, "loss": 2.8013, "step": 48812 }, { "epoch": 2.39, "grad_norm": 0.7189565300941467, "learning_rate": 5.8741860901883844e-05, "loss": 2.8706, "step": 48813 }, { "epoch": 2.39, "grad_norm": 0.7589866518974304, "learning_rate": 5.8732710229424507e-05, "loss": 3.0814, "step": 48814 }, { "epoch": 2.39, "grad_norm": 0.7047926783561707, "learning_rate": 5.8723560192416665e-05, "loss": 2.7251, "step": 48815 }, { "epoch": 2.39, "grad_norm": 0.7266496419906616, "learning_rate": 5.8714410790884626e-05, "loss": 2.8967, "step": 48816 }, { "epoch": 2.39, "grad_norm": 0.7001505494117737, "learning_rate": 5.8705262024852304e-05, "loss": 2.843, "step": 48817 }, { "epoch": 2.39, "grad_norm": 0.7681334018707275, "learning_rate": 5.869611389434399e-05, "loss": 2.7958, "step": 48818 }, { "epoch": 2.39, "grad_norm": 0.7098401784896851, "learning_rate": 5.868696639938365e-05, "loss": 2.9353, "step": 48819 }, { "epoch": 2.39, "grad_norm": 0.7518061995506287, "learning_rate": 5.867781953999531e-05, "loss": 2.8312, "step": 48820 }, { "epoch": 2.39, "grad_norm": 0.7705138325691223, "learning_rate": 5.866867331620324e-05, "loss": 2.8373, "step": 48821 }, { "epoch": 2.39, "grad_norm": 0.7576525211334229, "learning_rate": 5.865952772803136e-05, "loss": 2.8809, "step": 48822 }, { "epoch": 2.39, "grad_norm": 0.7420570850372314, "learning_rate": 5.8650382775503855e-05, "loss": 3.1835, "step": 48823 }, { "epoch": 2.39, "grad_norm": 0.7061996459960938, "learning_rate": 5.8641238458644835e-05, "loss": 2.9064, "step": 48824 }, { "epoch": 2.39, "grad_norm": 0.7823624610900879, "learning_rate": 5.8632094777478275e-05, "loss": 2.9696, "step": 48825 }, { "epoch": 2.39, "grad_norm": 0.7167471647262573, "learning_rate": 5.8622951732028435e-05, "loss": 3.052, "step": 48826 }, { "epoch": 2.39, "grad_norm": 0.7196730375289917, "learning_rate": 5.861380932231925e-05, "loss": 2.752, "step": 48827 }, { "epoch": 2.39, "grad_norm": 0.758525013923645, "learning_rate": 5.8604667548374776e-05, "loss": 2.8304, "step": 48828 }, { "epoch": 2.39, "grad_norm": 0.7166337370872498, "learning_rate": 5.859552641021922e-05, "loss": 3.0448, "step": 48829 }, { "epoch": 2.39, "grad_norm": 0.6951048970222473, "learning_rate": 5.8586385907876534e-05, "loss": 2.9832, "step": 48830 }, { "epoch": 2.39, "grad_norm": 0.7193121314048767, "learning_rate": 5.8577246041370896e-05, "loss": 2.9533, "step": 48831 }, { "epoch": 2.39, "grad_norm": 0.7775723934173584, "learning_rate": 5.856810681072626e-05, "loss": 2.7201, "step": 48832 }, { "epoch": 2.39, "grad_norm": 0.7279043793678284, "learning_rate": 5.855896821596683e-05, "loss": 2.7716, "step": 48833 }, { "epoch": 2.39, "grad_norm": 0.7554843425750732, "learning_rate": 5.854983025711662e-05, "loss": 2.8317, "step": 48834 }, { "epoch": 2.39, "grad_norm": 0.7403421401977539, "learning_rate": 5.854069293419963e-05, "loss": 3.1781, "step": 48835 }, { "epoch": 2.39, "grad_norm": 0.70306396484375, "learning_rate": 5.8531556247240054e-05, "loss": 2.8058, "step": 48836 }, { "epoch": 2.39, "grad_norm": 0.716789960861206, "learning_rate": 5.852242019626181e-05, "loss": 2.9193, "step": 48837 }, { "epoch": 2.39, "grad_norm": 0.7575057148933411, "learning_rate": 5.851328478128902e-05, "loss": 2.7854, "step": 48838 }, { "epoch": 2.39, "grad_norm": 0.7853378057479858, "learning_rate": 5.850415000234585e-05, "loss": 3.17, "step": 48839 }, { "epoch": 2.39, "grad_norm": 0.7358184456825256, "learning_rate": 5.8495015859456294e-05, "loss": 2.8608, "step": 48840 }, { "epoch": 2.39, "grad_norm": 0.6891388893127441, "learning_rate": 5.8485882352644366e-05, "loss": 2.8855, "step": 48841 }, { "epoch": 2.39, "grad_norm": 0.7379217743873596, "learning_rate": 5.847674948193407e-05, "loss": 2.6099, "step": 48842 }, { "epoch": 2.39, "grad_norm": 0.7153098583221436, "learning_rate": 5.846761724734953e-05, "loss": 2.8372, "step": 48843 }, { "epoch": 2.39, "grad_norm": 0.750810980796814, "learning_rate": 5.845848564891489e-05, "loss": 2.8569, "step": 48844 }, { "epoch": 2.39, "grad_norm": 0.7509940266609192, "learning_rate": 5.8449354686654025e-05, "loss": 2.6827, "step": 48845 }, { "epoch": 2.39, "grad_norm": 0.7342125773429871, "learning_rate": 5.844022436059116e-05, "loss": 2.7864, "step": 48846 }, { "epoch": 2.39, "grad_norm": 0.7348741292953491, "learning_rate": 5.8431094670750165e-05, "loss": 2.8741, "step": 48847 }, { "epoch": 2.39, "grad_norm": 0.7125906348228455, "learning_rate": 5.842196561715526e-05, "loss": 3.0068, "step": 48848 }, { "epoch": 2.39, "grad_norm": 0.7036176323890686, "learning_rate": 5.84128371998304e-05, "loss": 2.9852, "step": 48849 }, { "epoch": 2.39, "grad_norm": 0.7853854298591614, "learning_rate": 5.8403709418799546e-05, "loss": 2.9464, "step": 48850 }, { "epoch": 2.39, "grad_norm": 0.7350966930389404, "learning_rate": 5.8394582274086935e-05, "loss": 2.9554, "step": 48851 }, { "epoch": 2.39, "grad_norm": 0.7737547755241394, "learning_rate": 5.838545576571639e-05, "loss": 2.9423, "step": 48852 }, { "epoch": 2.39, "grad_norm": 0.7212133407592773, "learning_rate": 5.837632989371205e-05, "loss": 2.8444, "step": 48853 }, { "epoch": 2.39, "grad_norm": 0.7580508589744568, "learning_rate": 5.836720465809808e-05, "loss": 2.9049, "step": 48854 }, { "epoch": 2.39, "grad_norm": 0.7183226943016052, "learning_rate": 5.8358080058898326e-05, "loss": 2.894, "step": 48855 }, { "epoch": 2.39, "grad_norm": 0.7060414552688599, "learning_rate": 5.8348956096136945e-05, "loss": 2.6954, "step": 48856 }, { "epoch": 2.39, "grad_norm": 0.7525982856750488, "learning_rate": 5.833983276983778e-05, "loss": 3.1094, "step": 48857 }, { "epoch": 2.39, "grad_norm": 0.7521215677261353, "learning_rate": 5.833071008002508e-05, "loss": 2.8246, "step": 48858 }, { "epoch": 2.39, "grad_norm": 0.7773438096046448, "learning_rate": 5.832158802672272e-05, "loss": 2.7105, "step": 48859 }, { "epoch": 2.39, "grad_norm": 0.7317736148834229, "learning_rate": 5.8312466609954756e-05, "loss": 3.0221, "step": 48860 }, { "epoch": 2.39, "grad_norm": 0.7350138425827026, "learning_rate": 5.830334582974533e-05, "loss": 2.9349, "step": 48861 }, { "epoch": 2.39, "grad_norm": 0.7256360650062561, "learning_rate": 5.829422568611839e-05, "loss": 2.9305, "step": 48862 }, { "epoch": 2.39, "grad_norm": 0.7844482660293579, "learning_rate": 5.828510617909795e-05, "loss": 3.0334, "step": 48863 }, { "epoch": 2.39, "grad_norm": 0.7349040508270264, "learning_rate": 5.827598730870793e-05, "loss": 2.9445, "step": 48864 }, { "epoch": 2.39, "grad_norm": 0.7633810639381409, "learning_rate": 5.8266869074972436e-05, "loss": 2.8846, "step": 48865 }, { "epoch": 2.39, "grad_norm": 0.7389031052589417, "learning_rate": 5.825775147791555e-05, "loss": 2.833, "step": 48866 }, { "epoch": 2.39, "grad_norm": 0.7971253395080566, "learning_rate": 5.8248634517561156e-05, "loss": 2.8842, "step": 48867 }, { "epoch": 2.39, "grad_norm": 0.7472830414772034, "learning_rate": 5.82395181939334e-05, "loss": 2.6718, "step": 48868 }, { "epoch": 2.39, "grad_norm": 1.0377117395401, "learning_rate": 5.8230402507056165e-05, "loss": 2.8771, "step": 48869 }, { "epoch": 2.4, "grad_norm": 0.7521938681602478, "learning_rate": 5.822128745695359e-05, "loss": 2.8167, "step": 48870 }, { "epoch": 2.4, "grad_norm": 0.7492688298225403, "learning_rate": 5.821217304364963e-05, "loss": 2.8992, "step": 48871 }, { "epoch": 2.4, "grad_norm": 0.7289814949035645, "learning_rate": 5.820305926716817e-05, "loss": 2.9214, "step": 48872 }, { "epoch": 2.4, "grad_norm": 0.6818594336509705, "learning_rate": 5.819394612753342e-05, "loss": 2.7029, "step": 48873 }, { "epoch": 2.4, "grad_norm": 0.7078396677970886, "learning_rate": 5.818483362476919e-05, "loss": 2.6411, "step": 48874 }, { "epoch": 2.4, "grad_norm": 0.7174808979034424, "learning_rate": 5.8175721758899566e-05, "loss": 2.9228, "step": 48875 }, { "epoch": 2.4, "grad_norm": 0.7881730794906616, "learning_rate": 5.8166610529948624e-05, "loss": 2.882, "step": 48876 }, { "epoch": 2.4, "grad_norm": 0.7044768929481506, "learning_rate": 5.8157499937940275e-05, "loss": 2.976, "step": 48877 }, { "epoch": 2.4, "grad_norm": 0.72385573387146, "learning_rate": 5.814838998289855e-05, "loss": 2.8273, "step": 48878 }, { "epoch": 2.4, "grad_norm": 0.68699049949646, "learning_rate": 5.813928066484734e-05, "loss": 2.8484, "step": 48879 }, { "epoch": 2.4, "grad_norm": 0.7593944072723389, "learning_rate": 5.813017198381071e-05, "loss": 2.8658, "step": 48880 }, { "epoch": 2.4, "grad_norm": 0.7795584201812744, "learning_rate": 5.8121063939812776e-05, "loss": 2.9373, "step": 48881 }, { "epoch": 2.4, "grad_norm": 0.7256165742874146, "learning_rate": 5.811195653287727e-05, "loss": 2.8412, "step": 48882 }, { "epoch": 2.4, "grad_norm": 0.8968254923820496, "learning_rate": 5.8102849763028444e-05, "loss": 2.9702, "step": 48883 }, { "epoch": 2.4, "grad_norm": 0.7178112864494324, "learning_rate": 5.8093743630290125e-05, "loss": 2.5675, "step": 48884 }, { "epoch": 2.4, "grad_norm": 0.75639408826828, "learning_rate": 5.808463813468628e-05, "loss": 2.9394, "step": 48885 }, { "epoch": 2.4, "grad_norm": 0.7796924114227295, "learning_rate": 5.807553327624099e-05, "loss": 2.9314, "step": 48886 }, { "epoch": 2.4, "grad_norm": 0.7454410195350647, "learning_rate": 5.806642905497811e-05, "loss": 3.0443, "step": 48887 }, { "epoch": 2.4, "grad_norm": 0.7198060750961304, "learning_rate": 5.805732547092179e-05, "loss": 2.7605, "step": 48888 }, { "epoch": 2.4, "grad_norm": 0.6760872006416321, "learning_rate": 5.8048222524095833e-05, "loss": 2.7732, "step": 48889 }, { "epoch": 2.4, "grad_norm": 0.7360389828681946, "learning_rate": 5.8039120214524293e-05, "loss": 2.6977, "step": 48890 }, { "epoch": 2.4, "grad_norm": 0.7301698327064514, "learning_rate": 5.803001854223122e-05, "loss": 2.8464, "step": 48891 }, { "epoch": 2.4, "grad_norm": 0.7200692296028137, "learning_rate": 5.8020917507240516e-05, "loss": 3.0045, "step": 48892 }, { "epoch": 2.4, "grad_norm": 0.7441954016685486, "learning_rate": 5.8011817109576145e-05, "loss": 3.1825, "step": 48893 }, { "epoch": 2.4, "grad_norm": 0.7359301447868347, "learning_rate": 5.800271734926197e-05, "loss": 2.9192, "step": 48894 }, { "epoch": 2.4, "grad_norm": 0.7263439893722534, "learning_rate": 5.799361822632209e-05, "loss": 2.9108, "step": 48895 }, { "epoch": 2.4, "grad_norm": 0.7272346615791321, "learning_rate": 5.798451974078051e-05, "loss": 2.8907, "step": 48896 }, { "epoch": 2.4, "grad_norm": 0.7894495129585266, "learning_rate": 5.7975421892661045e-05, "loss": 2.9208, "step": 48897 }, { "epoch": 2.4, "grad_norm": 0.8710970282554626, "learning_rate": 5.796632468198784e-05, "loss": 3.0931, "step": 48898 }, { "epoch": 2.4, "grad_norm": 0.6832615733146667, "learning_rate": 5.795722810878475e-05, "loss": 2.9985, "step": 48899 }, { "epoch": 2.4, "grad_norm": 0.7081145644187927, "learning_rate": 5.794813217307565e-05, "loss": 3.0453, "step": 48900 }, { "epoch": 2.4, "grad_norm": 0.7711806893348694, "learning_rate": 5.793903687488468e-05, "loss": 2.8482, "step": 48901 }, { "epoch": 2.4, "grad_norm": 0.8075990676879883, "learning_rate": 5.79299422142356e-05, "loss": 3.1117, "step": 48902 }, { "epoch": 2.4, "grad_norm": 0.7430427670478821, "learning_rate": 5.7920848191152555e-05, "loss": 2.9316, "step": 48903 }, { "epoch": 2.4, "grad_norm": 0.7735027074813843, "learning_rate": 5.791175480565936e-05, "loss": 2.8317, "step": 48904 }, { "epoch": 2.4, "grad_norm": 0.7572428584098816, "learning_rate": 5.790266205778006e-05, "loss": 2.9534, "step": 48905 }, { "epoch": 2.4, "grad_norm": 0.721071720123291, "learning_rate": 5.789356994753853e-05, "loss": 2.9628, "step": 48906 }, { "epoch": 2.4, "grad_norm": 0.7726892828941345, "learning_rate": 5.788447847495869e-05, "loss": 2.8635, "step": 48907 }, { "epoch": 2.4, "grad_norm": 0.7207630276679993, "learning_rate": 5.787538764006463e-05, "loss": 2.9928, "step": 48908 }, { "epoch": 2.4, "grad_norm": 0.7308686971664429, "learning_rate": 5.786629744288012e-05, "loss": 2.8482, "step": 48909 }, { "epoch": 2.4, "grad_norm": 0.8075633645057678, "learning_rate": 5.785720788342917e-05, "loss": 2.8302, "step": 48910 }, { "epoch": 2.4, "grad_norm": 0.7197324633598328, "learning_rate": 5.78481189617358e-05, "loss": 2.9422, "step": 48911 }, { "epoch": 2.4, "grad_norm": 0.7496452927589417, "learning_rate": 5.783903067782383e-05, "loss": 2.7668, "step": 48912 }, { "epoch": 2.4, "grad_norm": 0.7587379813194275, "learning_rate": 5.7829943031717295e-05, "loss": 2.8343, "step": 48913 }, { "epoch": 2.4, "grad_norm": 0.7361343502998352, "learning_rate": 5.782085602344012e-05, "loss": 2.9595, "step": 48914 }, { "epoch": 2.4, "grad_norm": 0.6897303462028503, "learning_rate": 5.7811769653016117e-05, "loss": 2.8756, "step": 48915 }, { "epoch": 2.4, "grad_norm": 0.7227274179458618, "learning_rate": 5.7802683920469364e-05, "loss": 3.046, "step": 48916 }, { "epoch": 2.4, "grad_norm": 0.7711227536201477, "learning_rate": 5.779359882582364e-05, "loss": 3.0059, "step": 48917 }, { "epoch": 2.4, "grad_norm": 0.6948781609535217, "learning_rate": 5.7784514369103064e-05, "loss": 2.9202, "step": 48918 }, { "epoch": 2.4, "grad_norm": 0.7455137968063354, "learning_rate": 5.777543055033138e-05, "loss": 2.9468, "step": 48919 }, { "epoch": 2.4, "grad_norm": 0.738844096660614, "learning_rate": 5.776634736953267e-05, "loss": 3.0334, "step": 48920 }, { "epoch": 2.4, "grad_norm": 0.7826026082038879, "learning_rate": 5.7757264826730786e-05, "loss": 2.8642, "step": 48921 }, { "epoch": 2.4, "grad_norm": 0.7317241430282593, "learning_rate": 5.774818292194957e-05, "loss": 2.8831, "step": 48922 }, { "epoch": 2.4, "grad_norm": 0.745648980140686, "learning_rate": 5.773910165521311e-05, "loss": 3.1187, "step": 48923 }, { "epoch": 2.4, "grad_norm": 0.7066932916641235, "learning_rate": 5.7730021026545107e-05, "loss": 2.9324, "step": 48924 }, { "epoch": 2.4, "grad_norm": 0.719154417514801, "learning_rate": 5.772094103596965e-05, "loss": 2.9116, "step": 48925 }, { "epoch": 2.4, "grad_norm": 0.7419759631156921, "learning_rate": 5.7711861683510654e-05, "loss": 2.7544, "step": 48926 }, { "epoch": 2.4, "grad_norm": 0.7109057307243347, "learning_rate": 5.770278296919193e-05, "loss": 2.6735, "step": 48927 }, { "epoch": 2.4, "grad_norm": 0.6973155736923218, "learning_rate": 5.769370489303749e-05, "loss": 2.9472, "step": 48928 }, { "epoch": 2.4, "grad_norm": 0.7412685751914978, "learning_rate": 5.768462745507123e-05, "loss": 2.96, "step": 48929 }, { "epoch": 2.4, "grad_norm": 0.726198136806488, "learning_rate": 5.767555065531695e-05, "loss": 2.9192, "step": 48930 }, { "epoch": 2.4, "grad_norm": 0.7647451162338257, "learning_rate": 5.76664744937987e-05, "loss": 2.9987, "step": 48931 }, { "epoch": 2.4, "grad_norm": 0.790251612663269, "learning_rate": 5.7657398970540226e-05, "loss": 2.8102, "step": 48932 }, { "epoch": 2.4, "grad_norm": 0.7141204476356506, "learning_rate": 5.764832408556561e-05, "loss": 3.3217, "step": 48933 }, { "epoch": 2.4, "grad_norm": 0.742522120475769, "learning_rate": 5.76392498388986e-05, "loss": 2.9367, "step": 48934 }, { "epoch": 2.4, "grad_norm": 0.6904557347297668, "learning_rate": 5.763017623056325e-05, "loss": 2.5379, "step": 48935 }, { "epoch": 2.4, "grad_norm": 0.6816965937614441, "learning_rate": 5.762110326058337e-05, "loss": 2.8829, "step": 48936 }, { "epoch": 2.4, "grad_norm": 0.7373006343841553, "learning_rate": 5.761203092898277e-05, "loss": 2.9315, "step": 48937 }, { "epoch": 2.4, "grad_norm": 0.7109895348548889, "learning_rate": 5.76029592357855e-05, "loss": 2.756, "step": 48938 }, { "epoch": 2.4, "grad_norm": 0.7374069690704346, "learning_rate": 5.75938881810153e-05, "loss": 3.042, "step": 48939 }, { "epoch": 2.4, "grad_norm": 0.7400507926940918, "learning_rate": 5.758481776469623e-05, "loss": 2.9827, "step": 48940 }, { "epoch": 2.4, "grad_norm": 0.7280579209327698, "learning_rate": 5.757574798685204e-05, "loss": 2.8857, "step": 48941 }, { "epoch": 2.4, "grad_norm": 0.7616007924079895, "learning_rate": 5.756667884750673e-05, "loss": 2.8854, "step": 48942 }, { "epoch": 2.4, "grad_norm": 0.7113364338874817, "learning_rate": 5.755761034668413e-05, "loss": 3.0651, "step": 48943 }, { "epoch": 2.4, "grad_norm": 0.7248356938362122, "learning_rate": 5.754854248440807e-05, "loss": 2.927, "step": 48944 }, { "epoch": 2.4, "grad_norm": 0.7483928799629211, "learning_rate": 5.753947526070254e-05, "loss": 2.9758, "step": 48945 }, { "epoch": 2.4, "grad_norm": 0.7303197383880615, "learning_rate": 5.753040867559132e-05, "loss": 3.0986, "step": 48946 }, { "epoch": 2.4, "grad_norm": 0.7678343653678894, "learning_rate": 5.752134272909833e-05, "loss": 2.8695, "step": 48947 }, { "epoch": 2.4, "grad_norm": 0.7396532893180847, "learning_rate": 5.751227742124751e-05, "loss": 3.1046, "step": 48948 }, { "epoch": 2.4, "grad_norm": 0.7055127620697021, "learning_rate": 5.7503212752062646e-05, "loss": 2.9769, "step": 48949 }, { "epoch": 2.4, "grad_norm": 0.8863729238510132, "learning_rate": 5.7494148721567676e-05, "loss": 2.8523, "step": 48950 }, { "epoch": 2.4, "grad_norm": 0.7113443613052368, "learning_rate": 5.748508532978649e-05, "loss": 2.8839, "step": 48951 }, { "epoch": 2.4, "grad_norm": 0.7774294018745422, "learning_rate": 5.747602257674284e-05, "loss": 2.8566, "step": 48952 }, { "epoch": 2.4, "grad_norm": 0.747187614440918, "learning_rate": 5.746696046246073e-05, "loss": 2.8786, "step": 48953 }, { "epoch": 2.4, "grad_norm": 0.6938170790672302, "learning_rate": 5.745789898696388e-05, "loss": 2.8971, "step": 48954 }, { "epoch": 2.4, "grad_norm": 0.7477014064788818, "learning_rate": 5.7448838150276366e-05, "loss": 2.9451, "step": 48955 }, { "epoch": 2.4, "grad_norm": 0.7510203123092651, "learning_rate": 5.7439777952421805e-05, "loss": 2.918, "step": 48956 }, { "epoch": 2.4, "grad_norm": 0.7017959356307983, "learning_rate": 5.7430718393424315e-05, "loss": 2.7877, "step": 48957 }, { "epoch": 2.4, "grad_norm": 0.7510301470756531, "learning_rate": 5.7421659473307605e-05, "loss": 2.9053, "step": 48958 }, { "epoch": 2.4, "grad_norm": 0.6991977095603943, "learning_rate": 5.7412601192095454e-05, "loss": 3.0594, "step": 48959 }, { "epoch": 2.4, "grad_norm": 0.6977912783622742, "learning_rate": 5.740354354981195e-05, "loss": 3.0323, "step": 48960 }, { "epoch": 2.4, "grad_norm": 0.7294617295265198, "learning_rate": 5.73944865464807e-05, "loss": 3.0129, "step": 48961 }, { "epoch": 2.4, "grad_norm": 0.7957404255867004, "learning_rate": 5.738543018212573e-05, "loss": 2.7911, "step": 48962 }, { "epoch": 2.4, "grad_norm": 0.850426971912384, "learning_rate": 5.73763744567709e-05, "loss": 2.9213, "step": 48963 }, { "epoch": 2.4, "grad_norm": 0.7113354802131653, "learning_rate": 5.7367319370440014e-05, "loss": 2.8616, "step": 48964 }, { "epoch": 2.4, "grad_norm": 0.77390456199646, "learning_rate": 5.735826492315687e-05, "loss": 3.1204, "step": 48965 }, { "epoch": 2.4, "grad_norm": 0.734718382358551, "learning_rate": 5.7349211114945325e-05, "loss": 2.8761, "step": 48966 }, { "epoch": 2.4, "grad_norm": 0.9392869472503662, "learning_rate": 5.734015794582926e-05, "loss": 2.9756, "step": 48967 }, { "epoch": 2.4, "grad_norm": 0.7067123055458069, "learning_rate": 5.733110541583258e-05, "loss": 3.0634, "step": 48968 }, { "epoch": 2.4, "grad_norm": 0.7297945618629456, "learning_rate": 5.732205352497901e-05, "loss": 2.9535, "step": 48969 }, { "epoch": 2.4, "grad_norm": 0.7064141631126404, "learning_rate": 5.731300227329249e-05, "loss": 3.0467, "step": 48970 }, { "epoch": 2.4, "grad_norm": 0.7072996497154236, "learning_rate": 5.730395166079677e-05, "loss": 2.9136, "step": 48971 }, { "epoch": 2.4, "grad_norm": 0.749560534954071, "learning_rate": 5.7294901687515795e-05, "loss": 2.8613, "step": 48972 }, { "epoch": 2.4, "grad_norm": 0.735333263874054, "learning_rate": 5.7285852353473315e-05, "loss": 2.983, "step": 48973 }, { "epoch": 2.4, "grad_norm": 0.7418922185897827, "learning_rate": 5.727680365869315e-05, "loss": 3.1744, "step": 48974 }, { "epoch": 2.4, "grad_norm": 0.7562124133110046, "learning_rate": 5.726775560319924e-05, "loss": 2.8761, "step": 48975 }, { "epoch": 2.4, "grad_norm": 0.724221408367157, "learning_rate": 5.725870818701527e-05, "loss": 2.9732, "step": 48976 }, { "epoch": 2.4, "grad_norm": 0.7300118207931519, "learning_rate": 5.724966141016515e-05, "loss": 2.77, "step": 48977 }, { "epoch": 2.4, "grad_norm": 0.7261303663253784, "learning_rate": 5.7240615272672763e-05, "loss": 2.919, "step": 48978 }, { "epoch": 2.4, "grad_norm": 0.7926436066627502, "learning_rate": 5.723156977456189e-05, "loss": 2.8964, "step": 48979 }, { "epoch": 2.4, "grad_norm": 0.7375580072402954, "learning_rate": 5.7222524915856314e-05, "loss": 3.0317, "step": 48980 }, { "epoch": 2.4, "grad_norm": 0.7428630590438843, "learning_rate": 5.721348069657985e-05, "loss": 2.9936, "step": 48981 }, { "epoch": 2.4, "grad_norm": 0.766548216342926, "learning_rate": 5.720443711675631e-05, "loss": 2.9699, "step": 48982 }, { "epoch": 2.4, "grad_norm": 0.7440727949142456, "learning_rate": 5.719539417640967e-05, "loss": 3.1029, "step": 48983 }, { "epoch": 2.4, "grad_norm": 0.7492101192474365, "learning_rate": 5.718635187556355e-05, "loss": 2.7294, "step": 48984 }, { "epoch": 2.4, "grad_norm": 0.8655734658241272, "learning_rate": 5.71773102142419e-05, "loss": 2.683, "step": 48985 }, { "epoch": 2.4, "grad_norm": 0.7333561182022095, "learning_rate": 5.71682691924685e-05, "loss": 2.9099, "step": 48986 }, { "epoch": 2.4, "grad_norm": 0.7671554684638977, "learning_rate": 5.715922881026705e-05, "loss": 2.9234, "step": 48987 }, { "epoch": 2.4, "grad_norm": 0.6992800235748291, "learning_rate": 5.715018906766156e-05, "loss": 3.0187, "step": 48988 }, { "epoch": 2.4, "grad_norm": 0.707787036895752, "learning_rate": 5.714114996467565e-05, "loss": 2.8687, "step": 48989 }, { "epoch": 2.4, "grad_norm": 0.7215185165405273, "learning_rate": 5.713211150133328e-05, "loss": 2.97, "step": 48990 }, { "epoch": 2.4, "grad_norm": 0.7288063168525696, "learning_rate": 5.7123073677658126e-05, "loss": 3.0844, "step": 48991 }, { "epoch": 2.4, "grad_norm": 0.6853630542755127, "learning_rate": 5.7114036493674045e-05, "loss": 3.0357, "step": 48992 }, { "epoch": 2.4, "grad_norm": 0.694296658039093, "learning_rate": 5.710499994940491e-05, "loss": 3.081, "step": 48993 }, { "epoch": 2.4, "grad_norm": 0.7616432905197144, "learning_rate": 5.7095964044874496e-05, "loss": 3.1458, "step": 48994 }, { "epoch": 2.4, "grad_norm": 0.7581666707992554, "learning_rate": 5.708692878010653e-05, "loss": 2.9142, "step": 48995 }, { "epoch": 2.4, "grad_norm": 0.8747384548187256, "learning_rate": 5.7077894155124795e-05, "loss": 2.6311, "step": 48996 }, { "epoch": 2.4, "grad_norm": 0.71066814661026, "learning_rate": 5.706886016995309e-05, "loss": 2.8955, "step": 48997 }, { "epoch": 2.4, "grad_norm": 0.7173435688018799, "learning_rate": 5.7059826824615377e-05, "loss": 2.8613, "step": 48998 }, { "epoch": 2.4, "grad_norm": 0.7304638028144836, "learning_rate": 5.705079411913522e-05, "loss": 2.5475, "step": 48999 }, { "epoch": 2.4, "grad_norm": 0.7528974413871765, "learning_rate": 5.704176205353662e-05, "loss": 2.7217, "step": 49000 }, { "epoch": 2.4, "grad_norm": 0.7631033062934875, "learning_rate": 5.703273062784321e-05, "loss": 2.8355, "step": 49001 }, { "epoch": 2.4, "grad_norm": 0.734296441078186, "learning_rate": 5.702369984207882e-05, "loss": 2.9648, "step": 49002 }, { "epoch": 2.4, "grad_norm": 0.6912224888801575, "learning_rate": 5.701466969626728e-05, "loss": 3.0517, "step": 49003 }, { "epoch": 2.4, "grad_norm": 0.7426257133483887, "learning_rate": 5.700564019043224e-05, "loss": 2.8531, "step": 49004 }, { "epoch": 2.4, "grad_norm": 0.795962393283844, "learning_rate": 5.6996611324597695e-05, "loss": 2.8197, "step": 49005 }, { "epoch": 2.4, "grad_norm": 0.7349714040756226, "learning_rate": 5.698758309878721e-05, "loss": 2.938, "step": 49006 }, { "epoch": 2.4, "grad_norm": 0.7070464491844177, "learning_rate": 5.697855551302475e-05, "loss": 3.0683, "step": 49007 }, { "epoch": 2.4, "grad_norm": 0.7317219972610474, "learning_rate": 5.696952856733401e-05, "loss": 2.8491, "step": 49008 }, { "epoch": 2.4, "grad_norm": 0.7292014956474304, "learning_rate": 5.696050226173865e-05, "loss": 2.8261, "step": 49009 }, { "epoch": 2.4, "grad_norm": 0.7460183501243591, "learning_rate": 5.695147659626265e-05, "loss": 2.7796, "step": 49010 }, { "epoch": 2.4, "grad_norm": 0.7135501503944397, "learning_rate": 5.6942451570929614e-05, "loss": 3.0784, "step": 49011 }, { "epoch": 2.4, "grad_norm": 0.7707333564758301, "learning_rate": 5.6933427185763466e-05, "loss": 3.0009, "step": 49012 }, { "epoch": 2.4, "grad_norm": 0.7286562323570251, "learning_rate": 5.692440344078779e-05, "loss": 2.8597, "step": 49013 }, { "epoch": 2.4, "grad_norm": 0.7598409056663513, "learning_rate": 5.6915380336026494e-05, "loss": 3.0185, "step": 49014 }, { "epoch": 2.4, "grad_norm": 0.7345313429832458, "learning_rate": 5.6906357871503326e-05, "loss": 3.0258, "step": 49015 }, { "epoch": 2.4, "grad_norm": 0.7436714172363281, "learning_rate": 5.689733604724207e-05, "loss": 2.8548, "step": 49016 }, { "epoch": 2.4, "grad_norm": 0.750116765499115, "learning_rate": 5.6888314863266436e-05, "loss": 2.9594, "step": 49017 }, { "epoch": 2.4, "grad_norm": 0.7049277424812317, "learning_rate": 5.687929431960011e-05, "loss": 2.9449, "step": 49018 }, { "epoch": 2.4, "grad_norm": 0.7214593291282654, "learning_rate": 5.6870274416266934e-05, "loss": 3.0394, "step": 49019 }, { "epoch": 2.4, "grad_norm": 0.7183932662010193, "learning_rate": 5.6861255153290765e-05, "loss": 2.8671, "step": 49020 }, { "epoch": 2.4, "grad_norm": 0.7463213801383972, "learning_rate": 5.685223653069514e-05, "loss": 2.714, "step": 49021 }, { "epoch": 2.4, "grad_norm": 0.7598058581352234, "learning_rate": 5.684321854850401e-05, "loss": 2.9408, "step": 49022 }, { "epoch": 2.4, "grad_norm": 0.7609604597091675, "learning_rate": 5.683420120674106e-05, "loss": 2.9313, "step": 49023 }, { "epoch": 2.4, "grad_norm": 0.7309617400169373, "learning_rate": 5.682518450542996e-05, "loss": 2.8476, "step": 49024 }, { "epoch": 2.4, "grad_norm": 0.7466161251068115, "learning_rate": 5.6816168444594576e-05, "loss": 2.8262, "step": 49025 }, { "epoch": 2.4, "grad_norm": 0.7562272548675537, "learning_rate": 5.680715302425855e-05, "loss": 2.8261, "step": 49026 }, { "epoch": 2.4, "grad_norm": 0.7695786952972412, "learning_rate": 5.6798138244445745e-05, "loss": 2.9201, "step": 49027 }, { "epoch": 2.4, "grad_norm": 0.686572790145874, "learning_rate": 5.678912410517976e-05, "loss": 2.8701, "step": 49028 }, { "epoch": 2.4, "grad_norm": 0.746677815914154, "learning_rate": 5.678011060648441e-05, "loss": 2.8989, "step": 49029 }, { "epoch": 2.4, "grad_norm": 0.7117769122123718, "learning_rate": 5.677109774838363e-05, "loss": 2.8207, "step": 49030 }, { "epoch": 2.4, "grad_norm": 0.7121254205703735, "learning_rate": 5.676208553090078e-05, "loss": 3.1207, "step": 49031 }, { "epoch": 2.4, "grad_norm": 0.7127178907394409, "learning_rate": 5.6753073954059866e-05, "loss": 3.032, "step": 49032 }, { "epoch": 2.4, "grad_norm": 0.7273410558700562, "learning_rate": 5.6744063017884447e-05, "loss": 2.8643, "step": 49033 }, { "epoch": 2.4, "grad_norm": 0.7432453632354736, "learning_rate": 5.673505272239839e-05, "loss": 2.9869, "step": 49034 }, { "epoch": 2.4, "grad_norm": 0.7287410497665405, "learning_rate": 5.6726043067625447e-05, "loss": 2.8154, "step": 49035 }, { "epoch": 2.4, "grad_norm": 0.7052610516548157, "learning_rate": 5.6717034053589205e-05, "loss": 2.8844, "step": 49036 }, { "epoch": 2.4, "grad_norm": 0.7654694318771362, "learning_rate": 5.670802568031354e-05, "loss": 2.9412, "step": 49037 }, { "epoch": 2.4, "grad_norm": 0.7241032123565674, "learning_rate": 5.6699017947822137e-05, "loss": 2.8518, "step": 49038 }, { "epoch": 2.4, "grad_norm": 0.7288097143173218, "learning_rate": 5.66900108561386e-05, "loss": 2.8373, "step": 49039 }, { "epoch": 2.4, "grad_norm": 0.7443707585334778, "learning_rate": 5.6681004405286826e-05, "loss": 2.9401, "step": 49040 }, { "epoch": 2.4, "grad_norm": 0.7623165249824524, "learning_rate": 5.667199859529038e-05, "loss": 2.8009, "step": 49041 }, { "epoch": 2.4, "grad_norm": 0.7050398588180542, "learning_rate": 5.666299342617315e-05, "loss": 2.7906, "step": 49042 }, { "epoch": 2.4, "grad_norm": 0.7741838097572327, "learning_rate": 5.665398889795869e-05, "loss": 2.944, "step": 49043 }, { "epoch": 2.4, "grad_norm": 0.7164863348007202, "learning_rate": 5.6644985010670877e-05, "loss": 2.9287, "step": 49044 }, { "epoch": 2.4, "grad_norm": 0.7640129327774048, "learning_rate": 5.663598176433332e-05, "loss": 2.988, "step": 49045 }, { "epoch": 2.4, "grad_norm": 0.7587370872497559, "learning_rate": 5.662697915896967e-05, "loss": 2.884, "step": 49046 }, { "epoch": 2.4, "grad_norm": 0.8077144026756287, "learning_rate": 5.66179771946038e-05, "loss": 2.9191, "step": 49047 }, { "epoch": 2.4, "grad_norm": 0.7830314636230469, "learning_rate": 5.660897587125928e-05, "loss": 2.935, "step": 49048 }, { "epoch": 2.4, "grad_norm": 0.7073858976364136, "learning_rate": 5.6599975188959835e-05, "loss": 3.1283, "step": 49049 }, { "epoch": 2.4, "grad_norm": 0.7274441719055176, "learning_rate": 5.659097514772932e-05, "loss": 2.8923, "step": 49050 }, { "epoch": 2.4, "grad_norm": 0.7369108200073242, "learning_rate": 5.658197574759126e-05, "loss": 2.8339, "step": 49051 }, { "epoch": 2.4, "grad_norm": 0.7142553925514221, "learning_rate": 5.6572976988569486e-05, "loss": 2.8431, "step": 49052 }, { "epoch": 2.4, "grad_norm": 0.7000463604927063, "learning_rate": 5.656397887068765e-05, "loss": 2.8695, "step": 49053 }, { "epoch": 2.4, "grad_norm": 0.693121612071991, "learning_rate": 5.655498139396938e-05, "loss": 2.7841, "step": 49054 }, { "epoch": 2.4, "grad_norm": 0.6943217515945435, "learning_rate": 5.654598455843851e-05, "loss": 3.0532, "step": 49055 }, { "epoch": 2.4, "grad_norm": 0.7242630124092102, "learning_rate": 5.653698836411855e-05, "loss": 2.8341, "step": 49056 }, { "epoch": 2.4, "grad_norm": 0.7549852132797241, "learning_rate": 5.6527992811033427e-05, "loss": 2.8954, "step": 49057 }, { "epoch": 2.4, "grad_norm": 0.6854625940322876, "learning_rate": 5.6518997899206616e-05, "loss": 3.1429, "step": 49058 }, { "epoch": 2.4, "grad_norm": 0.7364606857299805, "learning_rate": 5.6510003628662006e-05, "loss": 2.7754, "step": 49059 }, { "epoch": 2.4, "grad_norm": 0.7241211533546448, "learning_rate": 5.6501009999423164e-05, "loss": 3.0467, "step": 49060 }, { "epoch": 2.4, "grad_norm": 0.7505494952201843, "learning_rate": 5.649201701151371e-05, "loss": 3.0433, "step": 49061 }, { "epoch": 2.4, "grad_norm": 0.7099612355232239, "learning_rate": 5.648302466495753e-05, "loss": 2.716, "step": 49062 }, { "epoch": 2.4, "grad_norm": 0.7239779233932495, "learning_rate": 5.6474032959778104e-05, "loss": 2.9557, "step": 49063 }, { "epoch": 2.4, "grad_norm": 0.7321217656135559, "learning_rate": 5.6465041895999205e-05, "loss": 2.7211, "step": 49064 }, { "epoch": 2.4, "grad_norm": 0.7070171236991882, "learning_rate": 5.645605147364459e-05, "loss": 2.7646, "step": 49065 }, { "epoch": 2.4, "grad_norm": 0.6989107131958008, "learning_rate": 5.644706169273787e-05, "loss": 2.9561, "step": 49066 }, { "epoch": 2.4, "grad_norm": 0.730031430721283, "learning_rate": 5.6438072553302725e-05, "loss": 2.9178, "step": 49067 }, { "epoch": 2.4, "grad_norm": 0.7238811254501343, "learning_rate": 5.642908405536274e-05, "loss": 2.7931, "step": 49068 }, { "epoch": 2.4, "grad_norm": 0.7353603839874268, "learning_rate": 5.6420096198941654e-05, "loss": 2.9666, "step": 49069 }, { "epoch": 2.4, "grad_norm": 0.738623321056366, "learning_rate": 5.6411108984063255e-05, "loss": 2.9291, "step": 49070 }, { "epoch": 2.4, "grad_norm": 0.7306110858917236, "learning_rate": 5.640212241075103e-05, "loss": 2.9059, "step": 49071 }, { "epoch": 2.4, "grad_norm": 0.7487077713012695, "learning_rate": 5.639313647902881e-05, "loss": 2.8363, "step": 49072 }, { "epoch": 2.4, "grad_norm": 0.7644553184509277, "learning_rate": 5.638415118892009e-05, "loss": 2.9736, "step": 49073 }, { "epoch": 2.4, "grad_norm": 1.27792489528656, "learning_rate": 5.6375166540448744e-05, "loss": 3.1355, "step": 49074 }, { "epoch": 2.41, "grad_norm": 0.7529338598251343, "learning_rate": 5.636618253363829e-05, "loss": 2.8665, "step": 49075 }, { "epoch": 2.41, "grad_norm": 0.7581132650375366, "learning_rate": 5.635719916851238e-05, "loss": 3.0295, "step": 49076 }, { "epoch": 2.41, "grad_norm": 0.7204800248146057, "learning_rate": 5.634821644509479e-05, "loss": 2.7356, "step": 49077 }, { "epoch": 2.41, "grad_norm": 0.7828012704849243, "learning_rate": 5.6339234363408994e-05, "loss": 2.927, "step": 49078 }, { "epoch": 2.41, "grad_norm": 0.786764919757843, "learning_rate": 5.633025292347878e-05, "loss": 2.9383, "step": 49079 }, { "epoch": 2.41, "grad_norm": 0.7644099593162537, "learning_rate": 5.6321272125327865e-05, "loss": 2.9283, "step": 49080 }, { "epoch": 2.41, "grad_norm": 0.7182053923606873, "learning_rate": 5.631229196897983e-05, "loss": 2.9578, "step": 49081 }, { "epoch": 2.41, "grad_norm": 0.7207154631614685, "learning_rate": 5.6303312454458316e-05, "loss": 3.0525, "step": 49082 }, { "epoch": 2.41, "grad_norm": 0.773795485496521, "learning_rate": 5.6294333581786875e-05, "loss": 3.0164, "step": 49083 }, { "epoch": 2.41, "grad_norm": 0.7547546625137329, "learning_rate": 5.628535535098929e-05, "loss": 3.0012, "step": 49084 }, { "epoch": 2.41, "grad_norm": 0.7342237830162048, "learning_rate": 5.6276377762089243e-05, "loss": 2.9923, "step": 49085 }, { "epoch": 2.41, "grad_norm": 0.7141954898834229, "learning_rate": 5.626740081511024e-05, "loss": 2.6876, "step": 49086 }, { "epoch": 2.41, "grad_norm": 0.7377126216888428, "learning_rate": 5.6258424510076074e-05, "loss": 2.8332, "step": 49087 }, { "epoch": 2.41, "grad_norm": 0.7284415364265442, "learning_rate": 5.624944884701028e-05, "loss": 3.057, "step": 49088 }, { "epoch": 2.41, "grad_norm": 0.7273825407028198, "learning_rate": 5.624047382593655e-05, "loss": 2.7143, "step": 49089 }, { "epoch": 2.41, "grad_norm": 0.7405761480331421, "learning_rate": 5.6231499446878424e-05, "loss": 2.8364, "step": 49090 }, { "epoch": 2.41, "grad_norm": 0.7068529725074768, "learning_rate": 5.622252570985959e-05, "loss": 2.8806, "step": 49091 }, { "epoch": 2.41, "grad_norm": 0.6906842589378357, "learning_rate": 5.621355261490382e-05, "loss": 2.7804, "step": 49092 }, { "epoch": 2.41, "grad_norm": 0.7543202638626099, "learning_rate": 5.620458016203454e-05, "loss": 2.6974, "step": 49093 }, { "epoch": 2.41, "grad_norm": 0.735177218914032, "learning_rate": 5.619560835127556e-05, "loss": 3.0508, "step": 49094 }, { "epoch": 2.41, "grad_norm": 0.733551025390625, "learning_rate": 5.618663718265032e-05, "loss": 2.8695, "step": 49095 }, { "epoch": 2.41, "grad_norm": 0.7386725544929504, "learning_rate": 5.617766665618267e-05, "loss": 2.9166, "step": 49096 }, { "epoch": 2.41, "grad_norm": 0.7156366109848022, "learning_rate": 5.61686967718961e-05, "loss": 2.7209, "step": 49097 }, { "epoch": 2.41, "grad_norm": 0.7398567199707031, "learning_rate": 5.615972752981419e-05, "loss": 2.716, "step": 49098 }, { "epoch": 2.41, "grad_norm": 0.7204150557518005, "learning_rate": 5.6150758929960715e-05, "loss": 3.0253, "step": 49099 }, { "epoch": 2.41, "grad_norm": 0.7159655690193176, "learning_rate": 5.614179097235913e-05, "loss": 2.8584, "step": 49100 }, { "epoch": 2.41, "grad_norm": 0.6986108422279358, "learning_rate": 5.61328236570331e-05, "loss": 2.7455, "step": 49101 }, { "epoch": 2.41, "grad_norm": 0.7308990955352783, "learning_rate": 5.612385698400639e-05, "loss": 3.1152, "step": 49102 }, { "epoch": 2.41, "grad_norm": 0.7399376034736633, "learning_rate": 5.611489095330251e-05, "loss": 2.8173, "step": 49103 }, { "epoch": 2.41, "grad_norm": 0.7102044820785522, "learning_rate": 5.6105925564945066e-05, "loss": 2.8044, "step": 49104 }, { "epoch": 2.41, "grad_norm": 0.7224224805831909, "learning_rate": 5.609696081895758e-05, "loss": 2.9648, "step": 49105 }, { "epoch": 2.41, "grad_norm": 0.6727746725082397, "learning_rate": 5.608799671536377e-05, "loss": 2.7387, "step": 49106 }, { "epoch": 2.41, "grad_norm": 0.7503066062927246, "learning_rate": 5.607903325418731e-05, "loss": 3.0684, "step": 49107 }, { "epoch": 2.41, "grad_norm": 0.7269359827041626, "learning_rate": 5.6070070435451685e-05, "loss": 2.9476, "step": 49108 }, { "epoch": 2.41, "grad_norm": 0.7300078868865967, "learning_rate": 5.6061108259180575e-05, "loss": 2.7939, "step": 49109 }, { "epoch": 2.41, "grad_norm": 0.7930198311805725, "learning_rate": 5.60521467253976e-05, "loss": 2.9471, "step": 49110 }, { "epoch": 2.41, "grad_norm": 0.7446059584617615, "learning_rate": 5.604318583412622e-05, "loss": 2.9892, "step": 49111 }, { "epoch": 2.41, "grad_norm": 0.6801463961601257, "learning_rate": 5.6034225585390245e-05, "loss": 2.8705, "step": 49112 }, { "epoch": 2.41, "grad_norm": 0.82508385181427, "learning_rate": 5.6025265979213074e-05, "loss": 2.7941, "step": 49113 }, { "epoch": 2.41, "grad_norm": 0.7679817080497742, "learning_rate": 5.6016307015618454e-05, "loss": 2.8535, "step": 49114 }, { "epoch": 2.41, "grad_norm": 0.7149144411087036, "learning_rate": 5.600734869462987e-05, "loss": 2.7853, "step": 49115 }, { "epoch": 2.41, "grad_norm": 0.7261170148849487, "learning_rate": 5.599839101627097e-05, "loss": 2.8658, "step": 49116 }, { "epoch": 2.41, "grad_norm": 0.7425785660743713, "learning_rate": 5.5989433980565435e-05, "loss": 2.9014, "step": 49117 }, { "epoch": 2.41, "grad_norm": 0.7709639072418213, "learning_rate": 5.5980477587536775e-05, "loss": 2.8676, "step": 49118 }, { "epoch": 2.41, "grad_norm": 0.6940927505493164, "learning_rate": 5.5971521837208546e-05, "loss": 2.9155, "step": 49119 }, { "epoch": 2.41, "grad_norm": 0.747685968875885, "learning_rate": 5.5962566729604317e-05, "loss": 2.6554, "step": 49120 }, { "epoch": 2.41, "grad_norm": 0.7228460907936096, "learning_rate": 5.595361226474768e-05, "loss": 2.7611, "step": 49121 }, { "epoch": 2.41, "grad_norm": 0.6861898303031921, "learning_rate": 5.5944658442662385e-05, "loss": 2.8041, "step": 49122 }, { "epoch": 2.41, "grad_norm": 0.7472571134567261, "learning_rate": 5.593570526337176e-05, "loss": 2.7697, "step": 49123 }, { "epoch": 2.41, "grad_norm": 0.7464053630828857, "learning_rate": 5.592675272689965e-05, "loss": 2.8793, "step": 49124 }, { "epoch": 2.41, "grad_norm": 0.6809940338134766, "learning_rate": 5.591780083326948e-05, "loss": 2.857, "step": 49125 }, { "epoch": 2.41, "grad_norm": 0.6927462220191956, "learning_rate": 5.5908849582504764e-05, "loss": 2.9232, "step": 49126 }, { "epoch": 2.41, "grad_norm": 0.7227660417556763, "learning_rate": 5.589989897462923e-05, "loss": 2.784, "step": 49127 }, { "epoch": 2.41, "grad_norm": 0.7501949071884155, "learning_rate": 5.589094900966632e-05, "loss": 2.7866, "step": 49128 }, { "epoch": 2.41, "grad_norm": 0.7532203197479248, "learning_rate": 5.5881999687639746e-05, "loss": 2.9745, "step": 49129 }, { "epoch": 2.41, "grad_norm": 0.7279804944992065, "learning_rate": 5.587305100857292e-05, "loss": 2.9465, "step": 49130 }, { "epoch": 2.41, "grad_norm": 0.7034523487091064, "learning_rate": 5.58641029724895e-05, "loss": 2.9068, "step": 49131 }, { "epoch": 2.41, "grad_norm": 0.7209697961807251, "learning_rate": 5.585515557941322e-05, "loss": 2.7805, "step": 49132 }, { "epoch": 2.41, "grad_norm": 0.7370447516441345, "learning_rate": 5.584620882936731e-05, "loss": 2.8805, "step": 49133 }, { "epoch": 2.41, "grad_norm": 0.7712350487709045, "learning_rate": 5.5837262722375574e-05, "loss": 2.7915, "step": 49134 }, { "epoch": 2.41, "grad_norm": 0.7178522944450378, "learning_rate": 5.58283172584614e-05, "loss": 2.9186, "step": 49135 }, { "epoch": 2.41, "grad_norm": 0.730476438999176, "learning_rate": 5.581937243764846e-05, "loss": 2.9216, "step": 49136 }, { "epoch": 2.41, "grad_norm": 0.7608109712600708, "learning_rate": 5.5810428259960375e-05, "loss": 3.1027, "step": 49137 }, { "epoch": 2.41, "grad_norm": 0.7009885907173157, "learning_rate": 5.580148472542055e-05, "loss": 2.7478, "step": 49138 }, { "epoch": 2.41, "grad_norm": 0.7496047019958496, "learning_rate": 5.579254183405272e-05, "loss": 2.9551, "step": 49139 }, { "epoch": 2.41, "grad_norm": 0.7258723974227905, "learning_rate": 5.578359958588028e-05, "loss": 2.7385, "step": 49140 }, { "epoch": 2.41, "grad_norm": 0.7414917945861816, "learning_rate": 5.5774657980926815e-05, "loss": 2.8253, "step": 49141 }, { "epoch": 2.41, "grad_norm": 0.7586607933044434, "learning_rate": 5.576571701921595e-05, "loss": 2.8642, "step": 49142 }, { "epoch": 2.41, "grad_norm": 0.7755255699157715, "learning_rate": 5.575677670077112e-05, "loss": 2.964, "step": 49143 }, { "epoch": 2.41, "grad_norm": 0.7838589549064636, "learning_rate": 5.574783702561598e-05, "loss": 2.9531, "step": 49144 }, { "epoch": 2.41, "grad_norm": 0.7484715580940247, "learning_rate": 5.573889799377398e-05, "loss": 2.9606, "step": 49145 }, { "epoch": 2.41, "grad_norm": 0.7210723757743835, "learning_rate": 5.572995960526879e-05, "loss": 2.9659, "step": 49146 }, { "epoch": 2.41, "grad_norm": 0.7425600290298462, "learning_rate": 5.572102186012387e-05, "loss": 3.0856, "step": 49147 }, { "epoch": 2.41, "grad_norm": 0.7232694625854492, "learning_rate": 5.571208475836269e-05, "loss": 2.9543, "step": 49148 }, { "epoch": 2.41, "grad_norm": 0.713165819644928, "learning_rate": 5.570314830000895e-05, "loss": 2.8556, "step": 49149 }, { "epoch": 2.41, "grad_norm": 0.741438627243042, "learning_rate": 5.569421248508601e-05, "loss": 3.0215, "step": 49150 }, { "epoch": 2.41, "grad_norm": 0.7061321139335632, "learning_rate": 5.56852773136175e-05, "loss": 3.0182, "step": 49151 }, { "epoch": 2.41, "grad_norm": 0.7206674814224243, "learning_rate": 5.5676342785627045e-05, "loss": 2.8039, "step": 49152 }, { "epoch": 2.41, "grad_norm": 0.7030647397041321, "learning_rate": 5.566740890113799e-05, "loss": 3.02, "step": 49153 }, { "epoch": 2.41, "grad_norm": 0.7327060699462891, "learning_rate": 5.565847566017403e-05, "loss": 2.7408, "step": 49154 }, { "epoch": 2.41, "grad_norm": 0.7196124196052551, "learning_rate": 5.5649543062758617e-05, "loss": 2.8843, "step": 49155 }, { "epoch": 2.41, "grad_norm": 0.7554773092269897, "learning_rate": 5.5640611108915204e-05, "loss": 2.8299, "step": 49156 }, { "epoch": 2.41, "grad_norm": 0.7063097357749939, "learning_rate": 5.5631679798667495e-05, "loss": 3.0298, "step": 49157 }, { "epoch": 2.41, "grad_norm": 0.7428682446479797, "learning_rate": 5.5622749132038815e-05, "loss": 3.0282, "step": 49158 }, { "epoch": 2.41, "grad_norm": 0.7211458086967468, "learning_rate": 5.561381910905287e-05, "loss": 2.7996, "step": 49159 }, { "epoch": 2.41, "grad_norm": 0.7217656373977661, "learning_rate": 5.5604889729733016e-05, "loss": 2.6599, "step": 49160 }, { "epoch": 2.41, "grad_norm": 0.7122716903686523, "learning_rate": 5.55959609941029e-05, "loss": 2.7535, "step": 49161 }, { "epoch": 2.41, "grad_norm": 0.7155596613883972, "learning_rate": 5.558703290218599e-05, "loss": 2.8666, "step": 49162 }, { "epoch": 2.41, "grad_norm": 0.7553215622901917, "learning_rate": 5.557810545400575e-05, "loss": 2.9545, "step": 49163 }, { "epoch": 2.41, "grad_norm": 0.727124810218811, "learning_rate": 5.556917864958579e-05, "loss": 2.8262, "step": 49164 }, { "epoch": 2.41, "grad_norm": 0.7256839871406555, "learning_rate": 5.556025248894952e-05, "loss": 2.8445, "step": 49165 }, { "epoch": 2.41, "grad_norm": 0.7141228318214417, "learning_rate": 5.555132697212049e-05, "loss": 2.7558, "step": 49166 }, { "epoch": 2.41, "grad_norm": 0.7409408688545227, "learning_rate": 5.5542402099122287e-05, "loss": 2.7731, "step": 49167 }, { "epoch": 2.41, "grad_norm": 0.7171137928962708, "learning_rate": 5.553347786997835e-05, "loss": 3.0352, "step": 49168 }, { "epoch": 2.41, "grad_norm": 0.7588046789169312, "learning_rate": 5.5524554284712196e-05, "loss": 3.0276, "step": 49169 }, { "epoch": 2.41, "grad_norm": 0.7045204639434814, "learning_rate": 5.5515631343347244e-05, "loss": 2.807, "step": 49170 }, { "epoch": 2.41, "grad_norm": 0.7897858619689941, "learning_rate": 5.550670904590714e-05, "loss": 2.7827, "step": 49171 }, { "epoch": 2.41, "grad_norm": 0.7354269623756409, "learning_rate": 5.5497787392415227e-05, "loss": 2.6785, "step": 49172 }, { "epoch": 2.41, "grad_norm": 0.7657710313796997, "learning_rate": 5.548886638289506e-05, "loss": 2.7895, "step": 49173 }, { "epoch": 2.41, "grad_norm": 0.789874255657196, "learning_rate": 5.547994601737028e-05, "loss": 2.8425, "step": 49174 }, { "epoch": 2.41, "grad_norm": 0.7321118712425232, "learning_rate": 5.547102629586417e-05, "loss": 2.8132, "step": 49175 }, { "epoch": 2.41, "grad_norm": 0.7470834851264954, "learning_rate": 5.546210721840039e-05, "loss": 2.9483, "step": 49176 }, { "epoch": 2.41, "grad_norm": 0.725573718547821, "learning_rate": 5.5453188785002366e-05, "loss": 3.0163, "step": 49177 }, { "epoch": 2.41, "grad_norm": 0.7329528331756592, "learning_rate": 5.5444270995693496e-05, "loss": 2.7606, "step": 49178 }, { "epoch": 2.41, "grad_norm": 0.6969563364982605, "learning_rate": 5.543535385049742e-05, "loss": 2.8289, "step": 49179 }, { "epoch": 2.41, "grad_norm": 0.7398406267166138, "learning_rate": 5.542643734943749e-05, "loss": 3.0499, "step": 49180 }, { "epoch": 2.41, "grad_norm": 0.7166543006896973, "learning_rate": 5.541752149253735e-05, "loss": 2.9627, "step": 49181 }, { "epoch": 2.41, "grad_norm": 0.7646118998527527, "learning_rate": 5.540860627982029e-05, "loss": 2.8877, "step": 49182 }, { "epoch": 2.41, "grad_norm": 0.7465002536773682, "learning_rate": 5.5399691711309944e-05, "loss": 2.8294, "step": 49183 }, { "epoch": 2.41, "grad_norm": 0.7415258288383484, "learning_rate": 5.539077778702977e-05, "loss": 3.1178, "step": 49184 }, { "epoch": 2.41, "grad_norm": 0.7385164499282837, "learning_rate": 5.538186450700312e-05, "loss": 3.0409, "step": 49185 }, { "epoch": 2.41, "grad_norm": 0.7214295864105225, "learning_rate": 5.537295187125366e-05, "loss": 2.9575, "step": 49186 }, { "epoch": 2.41, "grad_norm": 0.742912769317627, "learning_rate": 5.5364039879804646e-05, "loss": 2.7598, "step": 49187 }, { "epoch": 2.41, "grad_norm": 0.726211428642273, "learning_rate": 5.535512853267969e-05, "loss": 3.0765, "step": 49188 }, { "epoch": 2.41, "grad_norm": 0.6831627488136292, "learning_rate": 5.534621782990232e-05, "loss": 2.8655, "step": 49189 }, { "epoch": 2.41, "grad_norm": 0.7041841745376587, "learning_rate": 5.5337307771495933e-05, "loss": 2.9758, "step": 49190 }, { "epoch": 2.41, "grad_norm": 0.7825841307640076, "learning_rate": 5.5328398357483986e-05, "loss": 2.8501, "step": 49191 }, { "epoch": 2.41, "grad_norm": 0.795807957649231, "learning_rate": 5.5319489587889886e-05, "loss": 2.7646, "step": 49192 }, { "epoch": 2.41, "grad_norm": 0.7698444724082947, "learning_rate": 5.5310581462737155e-05, "loss": 2.7806, "step": 49193 }, { "epoch": 2.41, "grad_norm": 0.6966370940208435, "learning_rate": 5.5301673982049314e-05, "loss": 3.0312, "step": 49194 }, { "epoch": 2.41, "grad_norm": 0.7309635877609253, "learning_rate": 5.5292767145849726e-05, "loss": 2.7085, "step": 49195 }, { "epoch": 2.41, "grad_norm": 0.7802844643592834, "learning_rate": 5.528386095416195e-05, "loss": 2.9802, "step": 49196 }, { "epoch": 2.41, "grad_norm": 0.7086724638938904, "learning_rate": 5.5274955407009305e-05, "loss": 3.0243, "step": 49197 }, { "epoch": 2.41, "grad_norm": 0.8560823798179626, "learning_rate": 5.5266050504415424e-05, "loss": 2.8772, "step": 49198 }, { "epoch": 2.41, "grad_norm": 0.6955791115760803, "learning_rate": 5.525714624640367e-05, "loss": 2.7175, "step": 49199 }, { "epoch": 2.41, "grad_norm": 0.7265035510063171, "learning_rate": 5.524824263299738e-05, "loss": 2.8838, "step": 49200 }, { "epoch": 2.41, "grad_norm": 0.7440124750137329, "learning_rate": 5.523933966422025e-05, "loss": 2.9098, "step": 49201 }, { "epoch": 2.41, "grad_norm": 0.7409399151802063, "learning_rate": 5.523043734009548e-05, "loss": 2.7065, "step": 49202 }, { "epoch": 2.41, "grad_norm": 0.7265484929084778, "learning_rate": 5.522153566064662e-05, "loss": 3.1762, "step": 49203 }, { "epoch": 2.41, "grad_norm": 0.7463249564170837, "learning_rate": 5.521263462589722e-05, "loss": 2.8447, "step": 49204 }, { "epoch": 2.41, "grad_norm": 0.7473812699317932, "learning_rate": 5.520373423587064e-05, "loss": 2.7834, "step": 49205 }, { "epoch": 2.41, "grad_norm": 0.725105881690979, "learning_rate": 5.51948344905903e-05, "loss": 3.1029, "step": 49206 }, { "epoch": 2.41, "grad_norm": 0.694555938243866, "learning_rate": 5.518593539007957e-05, "loss": 2.9168, "step": 49207 }, { "epoch": 2.41, "grad_norm": 0.7044192552566528, "learning_rate": 5.5177036934362e-05, "loss": 2.7009, "step": 49208 }, { "epoch": 2.41, "grad_norm": 0.7090144753456116, "learning_rate": 5.516813912346102e-05, "loss": 3.0191, "step": 49209 }, { "epoch": 2.41, "grad_norm": 0.7357432842254639, "learning_rate": 5.515924195740002e-05, "loss": 2.8309, "step": 49210 }, { "epoch": 2.41, "grad_norm": 0.6832718253135681, "learning_rate": 5.5150345436202495e-05, "loss": 3.1344, "step": 49211 }, { "epoch": 2.41, "grad_norm": 0.6962417960166931, "learning_rate": 5.514144955989187e-05, "loss": 2.9699, "step": 49212 }, { "epoch": 2.41, "grad_norm": 0.7619498372077942, "learning_rate": 5.513255432849144e-05, "loss": 2.7511, "step": 49213 }, { "epoch": 2.41, "grad_norm": 0.7212022542953491, "learning_rate": 5.512365974202482e-05, "loss": 2.8064, "step": 49214 }, { "epoch": 2.41, "grad_norm": 0.7095093131065369, "learning_rate": 5.511476580051528e-05, "loss": 2.7405, "step": 49215 }, { "epoch": 2.41, "grad_norm": 0.7469050884246826, "learning_rate": 5.510587250398638e-05, "loss": 2.773, "step": 49216 }, { "epoch": 2.41, "grad_norm": 0.708609938621521, "learning_rate": 5.509697985246143e-05, "loss": 2.9376, "step": 49217 }, { "epoch": 2.41, "grad_norm": 0.7691136598587036, "learning_rate": 5.508808784596388e-05, "loss": 2.9028, "step": 49218 }, { "epoch": 2.41, "grad_norm": 0.7040303945541382, "learning_rate": 5.5079196484517274e-05, "loss": 2.8717, "step": 49219 }, { "epoch": 2.41, "grad_norm": 0.7086603045463562, "learning_rate": 5.507030576814493e-05, "loss": 2.9845, "step": 49220 }, { "epoch": 2.41, "grad_norm": 0.7217570543289185, "learning_rate": 5.5061415696870223e-05, "loss": 3.0093, "step": 49221 }, { "epoch": 2.41, "grad_norm": 0.7315422892570496, "learning_rate": 5.505252627071655e-05, "loss": 2.9514, "step": 49222 }, { "epoch": 2.41, "grad_norm": 0.7391554117202759, "learning_rate": 5.5043637489707406e-05, "loss": 2.9218, "step": 49223 }, { "epoch": 2.41, "grad_norm": 0.7177310585975647, "learning_rate": 5.503474935386621e-05, "loss": 2.8227, "step": 49224 }, { "epoch": 2.41, "grad_norm": 0.7659426331520081, "learning_rate": 5.502586186321629e-05, "loss": 2.9178, "step": 49225 }, { "epoch": 2.41, "grad_norm": 0.7270904183387756, "learning_rate": 5.5016975017781184e-05, "loss": 2.9833, "step": 49226 }, { "epoch": 2.41, "grad_norm": 0.6957828998565674, "learning_rate": 5.500808881758421e-05, "loss": 2.883, "step": 49227 }, { "epoch": 2.41, "grad_norm": 0.7378817200660706, "learning_rate": 5.49992032626487e-05, "loss": 2.8653, "step": 49228 }, { "epoch": 2.41, "grad_norm": 0.750215470790863, "learning_rate": 5.499031835299823e-05, "loss": 2.5859, "step": 49229 }, { "epoch": 2.41, "grad_norm": 0.790286123752594, "learning_rate": 5.4981434088656026e-05, "loss": 2.9305, "step": 49230 }, { "epoch": 2.41, "grad_norm": 0.7604691386222839, "learning_rate": 5.497255046964565e-05, "loss": 2.8586, "step": 49231 }, { "epoch": 2.41, "grad_norm": 0.7047723531723022, "learning_rate": 5.496366749599034e-05, "loss": 2.9883, "step": 49232 }, { "epoch": 2.41, "grad_norm": 0.7287726998329163, "learning_rate": 5.4954785167713645e-05, "loss": 2.9042, "step": 49233 }, { "epoch": 2.41, "grad_norm": 0.7202035188674927, "learning_rate": 5.494590348483888e-05, "loss": 2.8477, "step": 49234 }, { "epoch": 2.41, "grad_norm": 0.7223737239837646, "learning_rate": 5.4937022447389365e-05, "loss": 2.9645, "step": 49235 }, { "epoch": 2.41, "grad_norm": 0.7385289669036865, "learning_rate": 5.4928142055388645e-05, "loss": 2.9676, "step": 49236 }, { "epoch": 2.41, "grad_norm": 0.7490689754486084, "learning_rate": 5.491926230885997e-05, "loss": 2.8101, "step": 49237 }, { "epoch": 2.41, "grad_norm": 0.7428318858146667, "learning_rate": 5.491038320782678e-05, "loss": 2.9541, "step": 49238 }, { "epoch": 2.41, "grad_norm": 0.7282332181930542, "learning_rate": 5.490150475231257e-05, "loss": 3.0235, "step": 49239 }, { "epoch": 2.41, "grad_norm": 0.7052958011627197, "learning_rate": 5.4892626942340533e-05, "loss": 2.8107, "step": 49240 }, { "epoch": 2.41, "grad_norm": 0.7355846166610718, "learning_rate": 5.488374977793421e-05, "loss": 3.0651, "step": 49241 }, { "epoch": 2.41, "grad_norm": 0.7115058898925781, "learning_rate": 5.4874873259116924e-05, "loss": 2.8074, "step": 49242 }, { "epoch": 2.41, "grad_norm": 0.6894238591194153, "learning_rate": 5.4865997385911976e-05, "loss": 2.9663, "step": 49243 }, { "epoch": 2.41, "grad_norm": 0.7079063653945923, "learning_rate": 5.48571221583429e-05, "loss": 2.9716, "step": 49244 }, { "epoch": 2.41, "grad_norm": 0.7443049550056458, "learning_rate": 5.484824757643289e-05, "loss": 2.9997, "step": 49245 }, { "epoch": 2.41, "grad_norm": 0.7488800883293152, "learning_rate": 5.483937364020552e-05, "loss": 2.8637, "step": 49246 }, { "epoch": 2.41, "grad_norm": 0.6989515423774719, "learning_rate": 5.483050034968397e-05, "loss": 2.9274, "step": 49247 }, { "epoch": 2.41, "grad_norm": 0.66061931848526, "learning_rate": 5.4821627704891756e-05, "loss": 2.9987, "step": 49248 }, { "epoch": 2.41, "grad_norm": 0.7440872192382812, "learning_rate": 5.4812755705852194e-05, "loss": 3.2342, "step": 49249 }, { "epoch": 2.41, "grad_norm": 0.7412754893302917, "learning_rate": 5.4803884352588566e-05, "loss": 2.899, "step": 49250 }, { "epoch": 2.41, "grad_norm": 0.7123976945877075, "learning_rate": 5.479501364512442e-05, "loss": 2.9107, "step": 49251 }, { "epoch": 2.41, "grad_norm": 0.7113731503486633, "learning_rate": 5.478614358348293e-05, "loss": 3.1027, "step": 49252 }, { "epoch": 2.41, "grad_norm": 0.7375507354736328, "learning_rate": 5.4777274167687625e-05, "loss": 2.7018, "step": 49253 }, { "epoch": 2.41, "grad_norm": 0.7448770999908447, "learning_rate": 5.4768405397761674e-05, "loss": 2.9676, "step": 49254 }, { "epoch": 2.41, "grad_norm": 0.7443416714668274, "learning_rate": 5.4759537273728595e-05, "loss": 2.904, "step": 49255 }, { "epoch": 2.41, "grad_norm": 0.7311747670173645, "learning_rate": 5.475066979561174e-05, "loss": 3.1093, "step": 49256 }, { "epoch": 2.41, "grad_norm": 0.728496253490448, "learning_rate": 5.474180296343444e-05, "loss": 3.0179, "step": 49257 }, { "epoch": 2.41, "grad_norm": 0.7192690968513489, "learning_rate": 5.4732936777220035e-05, "loss": 2.9779, "step": 49258 }, { "epoch": 2.41, "grad_norm": 0.7940871715545654, "learning_rate": 5.472407123699176e-05, "loss": 2.894, "step": 49259 }, { "epoch": 2.41, "grad_norm": 0.7260642647743225, "learning_rate": 5.471520634277313e-05, "loss": 2.9742, "step": 49260 }, { "epoch": 2.41, "grad_norm": 0.7162463665008545, "learning_rate": 5.470634209458746e-05, "loss": 2.9621, "step": 49261 }, { "epoch": 2.41, "grad_norm": 0.7385729551315308, "learning_rate": 5.469747849245803e-05, "loss": 2.6355, "step": 49262 }, { "epoch": 2.41, "grad_norm": 0.7240334153175354, "learning_rate": 5.468861553640833e-05, "loss": 2.7548, "step": 49263 }, { "epoch": 2.41, "grad_norm": 0.7283338308334351, "learning_rate": 5.46797532264616e-05, "loss": 2.7219, "step": 49264 }, { "epoch": 2.41, "grad_norm": 0.8645117282867432, "learning_rate": 5.467089156264109e-05, "loss": 2.8017, "step": 49265 }, { "epoch": 2.41, "grad_norm": 0.7759827375411987, "learning_rate": 5.46620305449703e-05, "loss": 2.8121, "step": 49266 }, { "epoch": 2.41, "grad_norm": 0.7253652811050415, "learning_rate": 5.465317017347244e-05, "loss": 2.9753, "step": 49267 }, { "epoch": 2.41, "grad_norm": 0.720482349395752, "learning_rate": 5.4644310448171e-05, "loss": 3.0358, "step": 49268 }, { "epoch": 2.41, "grad_norm": 0.7371129393577576, "learning_rate": 5.463545136908915e-05, "loss": 2.7949, "step": 49269 }, { "epoch": 2.41, "grad_norm": 0.6884015202522278, "learning_rate": 5.462659293625038e-05, "loss": 2.8906, "step": 49270 }, { "epoch": 2.41, "grad_norm": 0.7280858159065247, "learning_rate": 5.4617735149677925e-05, "loss": 3.058, "step": 49271 }, { "epoch": 2.41, "grad_norm": 0.7321854829788208, "learning_rate": 5.4608878009395084e-05, "loss": 3.049, "step": 49272 }, { "epoch": 2.41, "grad_norm": 0.7545171976089478, "learning_rate": 5.4600021515425284e-05, "loss": 2.9722, "step": 49273 }, { "epoch": 2.41, "grad_norm": 0.7591923475265503, "learning_rate": 5.45911656677917e-05, "loss": 2.9183, "step": 49274 }, { "epoch": 2.41, "grad_norm": 0.6920126676559448, "learning_rate": 5.458231046651779e-05, "loss": 2.8652, "step": 49275 }, { "epoch": 2.41, "grad_norm": 0.7270497679710388, "learning_rate": 5.457345591162692e-05, "loss": 2.8887, "step": 49276 }, { "epoch": 2.41, "grad_norm": 0.8276441693305969, "learning_rate": 5.456460200314224e-05, "loss": 2.7831, "step": 49277 }, { "epoch": 2.41, "grad_norm": 0.7535173892974854, "learning_rate": 5.455574874108725e-05, "loss": 3.0215, "step": 49278 }, { "epoch": 2.42, "grad_norm": 0.7337012887001038, "learning_rate": 5.4546896125485173e-05, "loss": 2.7542, "step": 49279 }, { "epoch": 2.42, "grad_norm": 0.7610710859298706, "learning_rate": 5.453804415635927e-05, "loss": 3.0278, "step": 49280 }, { "epoch": 2.42, "grad_norm": 0.751594066619873, "learning_rate": 5.4529192833732996e-05, "loss": 3.1184, "step": 49281 }, { "epoch": 2.42, "grad_norm": 0.6849843859672546, "learning_rate": 5.452034215762948e-05, "loss": 3.1234, "step": 49282 }, { "epoch": 2.42, "grad_norm": 0.7445322871208191, "learning_rate": 5.4511492128072255e-05, "loss": 2.7937, "step": 49283 }, { "epoch": 2.42, "grad_norm": 0.7667579054832458, "learning_rate": 5.4502642745084425e-05, "loss": 2.9009, "step": 49284 }, { "epoch": 2.42, "grad_norm": 0.6802947521209717, "learning_rate": 5.449379400868944e-05, "loss": 3.005, "step": 49285 }, { "epoch": 2.42, "grad_norm": 0.7073135375976562, "learning_rate": 5.4484945918910585e-05, "loss": 2.8548, "step": 49286 }, { "epoch": 2.42, "grad_norm": 0.7205443978309631, "learning_rate": 5.4476098475771033e-05, "loss": 2.8307, "step": 49287 }, { "epoch": 2.42, "grad_norm": 0.7851844429969788, "learning_rate": 5.446725167929428e-05, "loss": 2.8409, "step": 49288 }, { "epoch": 2.42, "grad_norm": 0.7031289339065552, "learning_rate": 5.445840552950346e-05, "loss": 2.8974, "step": 49289 }, { "epoch": 2.42, "grad_norm": 0.7477833032608032, "learning_rate": 5.4449560026421924e-05, "loss": 2.9867, "step": 49290 }, { "epoch": 2.42, "grad_norm": 0.7260494828224182, "learning_rate": 5.444071517007306e-05, "loss": 2.6814, "step": 49291 }, { "epoch": 2.42, "grad_norm": 0.7753791809082031, "learning_rate": 5.443187096048011e-05, "loss": 2.9645, "step": 49292 }, { "epoch": 2.42, "grad_norm": 0.8131752610206604, "learning_rate": 5.4423027397666354e-05, "loss": 3.0435, "step": 49293 }, { "epoch": 2.42, "grad_norm": 0.7920990586280823, "learning_rate": 5.441418448165501e-05, "loss": 2.957, "step": 49294 }, { "epoch": 2.42, "grad_norm": 0.7970216274261475, "learning_rate": 5.4405342212469427e-05, "loss": 2.7669, "step": 49295 }, { "epoch": 2.42, "grad_norm": 0.7318688631057739, "learning_rate": 5.4396500590132984e-05, "loss": 2.9577, "step": 49296 }, { "epoch": 2.42, "grad_norm": 0.7441763281822205, "learning_rate": 5.4387659614668834e-05, "loss": 2.9414, "step": 49297 }, { "epoch": 2.42, "grad_norm": 0.7256457209587097, "learning_rate": 5.437881928610035e-05, "loss": 2.8141, "step": 49298 }, { "epoch": 2.42, "grad_norm": 0.790198564529419, "learning_rate": 5.436997960445075e-05, "loss": 3.0438, "step": 49299 }, { "epoch": 2.42, "grad_norm": 0.7521136999130249, "learning_rate": 5.4361140569743423e-05, "loss": 2.976, "step": 49300 }, { "epoch": 2.42, "grad_norm": 0.7699940800666809, "learning_rate": 5.435230218200157e-05, "loss": 2.8781, "step": 49301 }, { "epoch": 2.42, "grad_norm": 0.7283733487129211, "learning_rate": 5.4343464441248384e-05, "loss": 2.7156, "step": 49302 }, { "epoch": 2.42, "grad_norm": 0.7702507376670837, "learning_rate": 5.433462734750734e-05, "loss": 3.0198, "step": 49303 }, { "epoch": 2.42, "grad_norm": 1.300162672996521, "learning_rate": 5.432579090080152e-05, "loss": 2.9121, "step": 49304 }, { "epoch": 2.42, "grad_norm": 0.7907041907310486, "learning_rate": 5.431695510115428e-05, "loss": 2.9838, "step": 49305 }, { "epoch": 2.42, "grad_norm": 0.7288923859596252, "learning_rate": 5.4308119948588994e-05, "loss": 2.9276, "step": 49306 }, { "epoch": 2.42, "grad_norm": 0.7126811742782593, "learning_rate": 5.4299285443128806e-05, "loss": 2.9427, "step": 49307 }, { "epoch": 2.42, "grad_norm": 0.7134640216827393, "learning_rate": 5.429045158479703e-05, "loss": 2.8017, "step": 49308 }, { "epoch": 2.42, "grad_norm": 0.7535879015922546, "learning_rate": 5.428161837361686e-05, "loss": 2.7213, "step": 49309 }, { "epoch": 2.42, "grad_norm": 0.6850473880767822, "learning_rate": 5.4272785809611606e-05, "loss": 2.9667, "step": 49310 }, { "epoch": 2.42, "grad_norm": 0.7917988896369934, "learning_rate": 5.4263953892804614e-05, "loss": 2.9065, "step": 49311 }, { "epoch": 2.42, "grad_norm": 0.7340681552886963, "learning_rate": 5.4255122623218994e-05, "loss": 2.9701, "step": 49312 }, { "epoch": 2.42, "grad_norm": 0.7131487131118774, "learning_rate": 5.4246292000878164e-05, "loss": 2.864, "step": 49313 }, { "epoch": 2.42, "grad_norm": 0.7326993942260742, "learning_rate": 5.423746202580528e-05, "loss": 2.9615, "step": 49314 }, { "epoch": 2.42, "grad_norm": 0.7834832668304443, "learning_rate": 5.422863269802358e-05, "loss": 3.0211, "step": 49315 }, { "epoch": 2.42, "grad_norm": 0.7300680875778198, "learning_rate": 5.421980401755645e-05, "loss": 2.9014, "step": 49316 }, { "epoch": 2.42, "grad_norm": 0.7191560864448547, "learning_rate": 5.421097598442693e-05, "loss": 2.6949, "step": 49317 }, { "epoch": 2.42, "grad_norm": 0.7470296025276184, "learning_rate": 5.420214859865851e-05, "loss": 2.8543, "step": 49318 }, { "epoch": 2.42, "grad_norm": 0.7371562719345093, "learning_rate": 5.419332186027424e-05, "loss": 2.9266, "step": 49319 }, { "epoch": 2.42, "grad_norm": 0.7162860631942749, "learning_rate": 5.418449576929746e-05, "loss": 2.8709, "step": 49320 }, { "epoch": 2.42, "grad_norm": 0.7437654137611389, "learning_rate": 5.41756703257515e-05, "loss": 2.857, "step": 49321 }, { "epoch": 2.42, "grad_norm": 0.7650477290153503, "learning_rate": 5.416684552965949e-05, "loss": 2.6571, "step": 49322 }, { "epoch": 2.42, "grad_norm": 0.743986189365387, "learning_rate": 5.415802138104468e-05, "loss": 2.8046, "step": 49323 }, { "epoch": 2.42, "grad_norm": 0.7592489719390869, "learning_rate": 5.4149197879930296e-05, "loss": 2.8845, "step": 49324 }, { "epoch": 2.42, "grad_norm": 0.700624406337738, "learning_rate": 5.414037502633965e-05, "loss": 2.8208, "step": 49325 }, { "epoch": 2.42, "grad_norm": 0.7299220561981201, "learning_rate": 5.4131552820295875e-05, "loss": 2.7412, "step": 49326 }, { "epoch": 2.42, "grad_norm": 0.756610095500946, "learning_rate": 5.412273126182227e-05, "loss": 3.0103, "step": 49327 }, { "epoch": 2.42, "grad_norm": 0.7145278453826904, "learning_rate": 5.411391035094218e-05, "loss": 2.8637, "step": 49328 }, { "epoch": 2.42, "grad_norm": 0.749764621257782, "learning_rate": 5.410509008767868e-05, "loss": 2.8035, "step": 49329 }, { "epoch": 2.42, "grad_norm": 0.7430508136749268, "learning_rate": 5.409627047205509e-05, "loss": 2.7404, "step": 49330 }, { "epoch": 2.42, "grad_norm": 0.7532656788825989, "learning_rate": 5.4087451504094494e-05, "loss": 3.022, "step": 49331 }, { "epoch": 2.42, "grad_norm": 0.7438516020774841, "learning_rate": 5.407863318382023e-05, "loss": 3.084, "step": 49332 }, { "epoch": 2.42, "grad_norm": 0.7108733654022217, "learning_rate": 5.4069815511255585e-05, "loss": 2.985, "step": 49333 }, { "epoch": 2.42, "grad_norm": 0.7077670693397522, "learning_rate": 5.406099848642368e-05, "loss": 3.0003, "step": 49334 }, { "epoch": 2.42, "grad_norm": 0.7985681891441345, "learning_rate": 5.405218210934782e-05, "loss": 2.78, "step": 49335 }, { "epoch": 2.42, "grad_norm": 0.7880358695983887, "learning_rate": 5.404336638005116e-05, "loss": 3.0255, "step": 49336 }, { "epoch": 2.42, "grad_norm": 0.7047920227050781, "learning_rate": 5.403455129855687e-05, "loss": 3.151, "step": 49337 }, { "epoch": 2.42, "grad_norm": 0.7189116477966309, "learning_rate": 5.4025736864888346e-05, "loss": 2.9223, "step": 49338 }, { "epoch": 2.42, "grad_norm": 0.7386056780815125, "learning_rate": 5.4016923079068565e-05, "loss": 2.9541, "step": 49339 }, { "epoch": 2.42, "grad_norm": 0.7559934854507446, "learning_rate": 5.400810994112097e-05, "loss": 2.9204, "step": 49340 }, { "epoch": 2.42, "grad_norm": 0.7456420063972473, "learning_rate": 5.3999297451068614e-05, "loss": 3.1086, "step": 49341 }, { "epoch": 2.42, "grad_norm": 0.748192310333252, "learning_rate": 5.399048560893474e-05, "loss": 2.7161, "step": 49342 }, { "epoch": 2.42, "grad_norm": 0.7148739099502563, "learning_rate": 5.398167441474267e-05, "loss": 2.9892, "step": 49343 }, { "epoch": 2.42, "grad_norm": 0.7266228795051575, "learning_rate": 5.397286386851552e-05, "loss": 3.0108, "step": 49344 }, { "epoch": 2.42, "grad_norm": 0.7317888736724854, "learning_rate": 5.396405397027649e-05, "loss": 3.0233, "step": 49345 }, { "epoch": 2.42, "grad_norm": 0.7267348766326904, "learning_rate": 5.3955244720048704e-05, "loss": 2.9689, "step": 49346 }, { "epoch": 2.42, "grad_norm": 0.7102034687995911, "learning_rate": 5.394643611785549e-05, "loss": 2.6048, "step": 49347 }, { "epoch": 2.42, "grad_norm": 0.7675157785415649, "learning_rate": 5.393762816372004e-05, "loss": 3.0444, "step": 49348 }, { "epoch": 2.42, "grad_norm": 0.7861278653144836, "learning_rate": 5.392882085766548e-05, "loss": 2.9381, "step": 49349 }, { "epoch": 2.42, "grad_norm": 0.7278153300285339, "learning_rate": 5.392001419971512e-05, "loss": 2.6851, "step": 49350 }, { "epoch": 2.42, "grad_norm": 0.7110613584518433, "learning_rate": 5.391120818989205e-05, "loss": 3.0432, "step": 49351 }, { "epoch": 2.42, "grad_norm": 0.7356806993484497, "learning_rate": 5.390240282821945e-05, "loss": 2.8609, "step": 49352 }, { "epoch": 2.42, "grad_norm": 0.73133784532547, "learning_rate": 5.389359811472066e-05, "loss": 2.9498, "step": 49353 }, { "epoch": 2.42, "grad_norm": 0.7367465496063232, "learning_rate": 5.388479404941863e-05, "loss": 2.9129, "step": 49354 }, { "epoch": 2.42, "grad_norm": 0.7416877746582031, "learning_rate": 5.387599063233682e-05, "loss": 2.9267, "step": 49355 }, { "epoch": 2.42, "grad_norm": 0.7233571410179138, "learning_rate": 5.386718786349819e-05, "loss": 3.1255, "step": 49356 }, { "epoch": 2.42, "grad_norm": 0.7607924342155457, "learning_rate": 5.3858385742926016e-05, "loss": 2.7879, "step": 49357 }, { "epoch": 2.42, "grad_norm": 0.7684453129768372, "learning_rate": 5.384958427064362e-05, "loss": 2.8697, "step": 49358 }, { "epoch": 2.42, "grad_norm": 0.7534241676330566, "learning_rate": 5.3840783446673895e-05, "loss": 2.7628, "step": 49359 }, { "epoch": 2.42, "grad_norm": 0.7305625677108765, "learning_rate": 5.383198327104026e-05, "loss": 2.663, "step": 49360 }, { "epoch": 2.42, "grad_norm": 0.7438086867332458, "learning_rate": 5.3823183743765716e-05, "loss": 2.9783, "step": 49361 }, { "epoch": 2.42, "grad_norm": 0.7273472547531128, "learning_rate": 5.381438486487356e-05, "loss": 2.9193, "step": 49362 }, { "epoch": 2.42, "grad_norm": 0.7327690124511719, "learning_rate": 5.3805586634387e-05, "loss": 2.7964, "step": 49363 }, { "epoch": 2.42, "grad_norm": 0.7488052248954773, "learning_rate": 5.379678905232905e-05, "loss": 2.9159, "step": 49364 }, { "epoch": 2.42, "grad_norm": 0.7673367857933044, "learning_rate": 5.378799211872306e-05, "loss": 3.1029, "step": 49365 }, { "epoch": 2.42, "grad_norm": 0.7909415364265442, "learning_rate": 5.37791958335921e-05, "loss": 3.0563, "step": 49366 }, { "epoch": 2.42, "grad_norm": 0.7198272347450256, "learning_rate": 5.377040019695931e-05, "loss": 2.782, "step": 49367 }, { "epoch": 2.42, "grad_norm": 0.7427800893783569, "learning_rate": 5.3761605208847944e-05, "loss": 3.2383, "step": 49368 }, { "epoch": 2.42, "grad_norm": 0.6806737184524536, "learning_rate": 5.375281086928104e-05, "loss": 2.7696, "step": 49369 }, { "epoch": 2.42, "grad_norm": 0.7873852252960205, "learning_rate": 5.374401717828193e-05, "loss": 2.9428, "step": 49370 }, { "epoch": 2.42, "grad_norm": 0.7423635721206665, "learning_rate": 5.373522413587363e-05, "loss": 2.8635, "step": 49371 }, { "epoch": 2.42, "grad_norm": 0.7548320293426514, "learning_rate": 5.37264317420794e-05, "loss": 3.1002, "step": 49372 }, { "epoch": 2.42, "grad_norm": 0.711924135684967, "learning_rate": 5.371763999692238e-05, "loss": 3.0354, "step": 49373 }, { "epoch": 2.42, "grad_norm": 0.773473858833313, "learning_rate": 5.370884890042558e-05, "loss": 2.8397, "step": 49374 }, { "epoch": 2.42, "grad_norm": 0.7292789220809937, "learning_rate": 5.370005845261239e-05, "loss": 2.9119, "step": 49375 }, { "epoch": 2.42, "grad_norm": 0.7399142980575562, "learning_rate": 5.369126865350576e-05, "loss": 2.8673, "step": 49376 }, { "epoch": 2.42, "grad_norm": 0.7260851263999939, "learning_rate": 5.368247950312889e-05, "loss": 2.8445, "step": 49377 }, { "epoch": 2.42, "grad_norm": 0.7297760248184204, "learning_rate": 5.367369100150508e-05, "loss": 2.8716, "step": 49378 }, { "epoch": 2.42, "grad_norm": 0.7921993136405945, "learning_rate": 5.366490314865727e-05, "loss": 3.0418, "step": 49379 }, { "epoch": 2.42, "grad_norm": 0.7311543226242065, "learning_rate": 5.36561159446088e-05, "loss": 2.768, "step": 49380 }, { "epoch": 2.42, "grad_norm": 0.7326648831367493, "learning_rate": 5.364732938938266e-05, "loss": 3.0984, "step": 49381 }, { "epoch": 2.42, "grad_norm": 0.6894707083702087, "learning_rate": 5.363854348300201e-05, "loss": 2.6443, "step": 49382 }, { "epoch": 2.42, "grad_norm": 0.754602313041687, "learning_rate": 5.362975822549007e-05, "loss": 2.7576, "step": 49383 }, { "epoch": 2.42, "grad_norm": 0.7371578216552734, "learning_rate": 5.362097361686988e-05, "loss": 2.9871, "step": 49384 }, { "epoch": 2.42, "grad_norm": 0.7002536058425903, "learning_rate": 5.36121896571647e-05, "loss": 2.982, "step": 49385 }, { "epoch": 2.42, "grad_norm": 0.7025123238563538, "learning_rate": 5.3603406346397504e-05, "loss": 2.8782, "step": 49386 }, { "epoch": 2.42, "grad_norm": 0.7461484670639038, "learning_rate": 5.359462368459162e-05, "loss": 3.0057, "step": 49387 }, { "epoch": 2.42, "grad_norm": 0.7868021130561829, "learning_rate": 5.3585841671770036e-05, "loss": 2.9907, "step": 49388 }, { "epoch": 2.42, "grad_norm": 0.7514649033546448, "learning_rate": 5.3577060307955867e-05, "loss": 3.0199, "step": 49389 }, { "epoch": 2.42, "grad_norm": 0.7672540545463562, "learning_rate": 5.356827959317239e-05, "loss": 2.8189, "step": 49390 }, { "epoch": 2.42, "grad_norm": 0.6891894340515137, "learning_rate": 5.355949952744253e-05, "loss": 2.785, "step": 49391 }, { "epoch": 2.42, "grad_norm": 0.775546669960022, "learning_rate": 5.355072011078953e-05, "loss": 2.8854, "step": 49392 }, { "epoch": 2.42, "grad_norm": 0.7490105628967285, "learning_rate": 5.35419413432366e-05, "loss": 2.9474, "step": 49393 }, { "epoch": 2.42, "grad_norm": 0.7415432333946228, "learning_rate": 5.3533163224806755e-05, "loss": 2.5371, "step": 49394 }, { "epoch": 2.42, "grad_norm": 0.7412639260292053, "learning_rate": 5.352438575552312e-05, "loss": 2.828, "step": 49395 }, { "epoch": 2.42, "grad_norm": 0.7198470234870911, "learning_rate": 5.351560893540873e-05, "loss": 2.9522, "step": 49396 }, { "epoch": 2.42, "grad_norm": 0.7310846447944641, "learning_rate": 5.3506832764486776e-05, "loss": 3.1216, "step": 49397 }, { "epoch": 2.42, "grad_norm": 0.776674747467041, "learning_rate": 5.34980572427805e-05, "loss": 2.9169, "step": 49398 }, { "epoch": 2.42, "grad_norm": 0.7521768808364868, "learning_rate": 5.348928237031279e-05, "loss": 3.0317, "step": 49399 }, { "epoch": 2.42, "grad_norm": 0.7370387315750122, "learning_rate": 5.348050814710696e-05, "loss": 3.0164, "step": 49400 }, { "epoch": 2.42, "grad_norm": 0.727357804775238, "learning_rate": 5.347173457318595e-05, "loss": 2.8261, "step": 49401 }, { "epoch": 2.42, "grad_norm": 0.697250247001648, "learning_rate": 5.346296164857305e-05, "loss": 2.782, "step": 49402 }, { "epoch": 2.42, "grad_norm": 0.7319405674934387, "learning_rate": 5.3454189373291246e-05, "loss": 2.8101, "step": 49403 }, { "epoch": 2.42, "grad_norm": 0.7242304682731628, "learning_rate": 5.344541774736357e-05, "loss": 2.9944, "step": 49404 }, { "epoch": 2.42, "grad_norm": 0.7132266163825989, "learning_rate": 5.3436646770813286e-05, "loss": 2.9694, "step": 49405 }, { "epoch": 2.42, "grad_norm": 0.7501279711723328, "learning_rate": 5.342787644366337e-05, "loss": 2.7354, "step": 49406 }, { "epoch": 2.42, "grad_norm": 0.6961253881454468, "learning_rate": 5.3419106765937025e-05, "loss": 2.8499, "step": 49407 }, { "epoch": 2.42, "grad_norm": 0.7209061980247498, "learning_rate": 5.3410337737657216e-05, "loss": 3.1587, "step": 49408 }, { "epoch": 2.42, "grad_norm": 0.718574047088623, "learning_rate": 5.340156935884722e-05, "loss": 2.867, "step": 49409 }, { "epoch": 2.42, "grad_norm": 0.8040580153465271, "learning_rate": 5.3392801629530015e-05, "loss": 2.8241, "step": 49410 }, { "epoch": 2.42, "grad_norm": 0.717680037021637, "learning_rate": 5.338403454972865e-05, "loss": 3.0446, "step": 49411 }, { "epoch": 2.42, "grad_norm": 0.7457809448242188, "learning_rate": 5.337526811946634e-05, "loss": 2.5612, "step": 49412 }, { "epoch": 2.42, "grad_norm": 0.7153359055519104, "learning_rate": 5.336650233876604e-05, "loss": 3.0441, "step": 49413 }, { "epoch": 2.42, "grad_norm": 0.7195101976394653, "learning_rate": 5.3357737207650886e-05, "loss": 3.1782, "step": 49414 }, { "epoch": 2.42, "grad_norm": 0.6793602705001831, "learning_rate": 5.3348972726144104e-05, "loss": 2.7251, "step": 49415 }, { "epoch": 2.42, "grad_norm": 0.7900977730751038, "learning_rate": 5.334020889426863e-05, "loss": 2.7028, "step": 49416 }, { "epoch": 2.42, "grad_norm": 0.7003040909767151, "learning_rate": 5.333144571204756e-05, "loss": 2.7757, "step": 49417 }, { "epoch": 2.42, "grad_norm": 0.7215965986251831, "learning_rate": 5.332268317950393e-05, "loss": 2.8256, "step": 49418 }, { "epoch": 2.42, "grad_norm": 0.6919942498207092, "learning_rate": 5.33139212966609e-05, "loss": 2.7628, "step": 49419 }, { "epoch": 2.42, "grad_norm": 0.7243634462356567, "learning_rate": 5.3305160063541576e-05, "loss": 2.8523, "step": 49420 }, { "epoch": 2.42, "grad_norm": 0.7183544635772705, "learning_rate": 5.329639948016891e-05, "loss": 2.7497, "step": 49421 }, { "epoch": 2.42, "grad_norm": 0.7528842687606812, "learning_rate": 5.3287639546566144e-05, "loss": 2.9952, "step": 49422 }, { "epoch": 2.42, "grad_norm": 0.7461656332015991, "learning_rate": 5.327888026275616e-05, "loss": 2.9245, "step": 49423 }, { "epoch": 2.42, "grad_norm": 0.7598857879638672, "learning_rate": 5.327012162876219e-05, "loss": 2.8832, "step": 49424 }, { "epoch": 2.42, "grad_norm": 0.7612981200218201, "learning_rate": 5.3261363644607255e-05, "loss": 3.0593, "step": 49425 }, { "epoch": 2.42, "grad_norm": 0.7197171449661255, "learning_rate": 5.325260631031433e-05, "loss": 2.6926, "step": 49426 }, { "epoch": 2.42, "grad_norm": 0.7351194620132446, "learning_rate": 5.324384962590661e-05, "loss": 3.1384, "step": 49427 }, { "epoch": 2.42, "grad_norm": 0.7707248330116272, "learning_rate": 5.323509359140702e-05, "loss": 2.9092, "step": 49428 }, { "epoch": 2.42, "grad_norm": 0.7522650957107544, "learning_rate": 5.322633820683874e-05, "loss": 2.8542, "step": 49429 }, { "epoch": 2.42, "grad_norm": 0.7015370726585388, "learning_rate": 5.321758347222481e-05, "loss": 2.7529, "step": 49430 }, { "epoch": 2.42, "grad_norm": 0.733875036239624, "learning_rate": 5.320882938758833e-05, "loss": 2.9341, "step": 49431 }, { "epoch": 2.42, "grad_norm": 0.7439164519309998, "learning_rate": 5.320007595295226e-05, "loss": 3.0875, "step": 49432 }, { "epoch": 2.42, "grad_norm": 0.7210975289344788, "learning_rate": 5.319132316833963e-05, "loss": 2.8665, "step": 49433 }, { "epoch": 2.42, "grad_norm": 0.7494742274284363, "learning_rate": 5.3182571033773543e-05, "loss": 2.8854, "step": 49434 }, { "epoch": 2.42, "grad_norm": 0.7517364025115967, "learning_rate": 5.3173819549277164e-05, "loss": 2.7901, "step": 49435 }, { "epoch": 2.42, "grad_norm": 0.8002448678016663, "learning_rate": 5.316506871487336e-05, "loss": 2.639, "step": 49436 }, { "epoch": 2.42, "grad_norm": 0.6921734809875488, "learning_rate": 5.3156318530585315e-05, "loss": 2.8351, "step": 49437 }, { "epoch": 2.42, "grad_norm": 0.7180572152137756, "learning_rate": 5.314756899643605e-05, "loss": 2.8778, "step": 49438 }, { "epoch": 2.42, "grad_norm": 0.7430548667907715, "learning_rate": 5.313882011244848e-05, "loss": 3.0747, "step": 49439 }, { "epoch": 2.42, "grad_norm": 0.7170864939689636, "learning_rate": 5.313007187864584e-05, "loss": 2.9257, "step": 49440 }, { "epoch": 2.42, "grad_norm": 0.7517858743667603, "learning_rate": 5.312132429505099e-05, "loss": 2.9093, "step": 49441 }, { "epoch": 2.42, "grad_norm": 0.7107861042022705, "learning_rate": 5.311257736168715e-05, "loss": 2.7721, "step": 49442 }, { "epoch": 2.42, "grad_norm": 0.7427153587341309, "learning_rate": 5.310383107857718e-05, "loss": 2.8472, "step": 49443 }, { "epoch": 2.42, "grad_norm": 0.7173393964767456, "learning_rate": 5.309508544574419e-05, "loss": 2.7931, "step": 49444 }, { "epoch": 2.42, "grad_norm": 0.7084386944770813, "learning_rate": 5.308634046321131e-05, "loss": 2.6603, "step": 49445 }, { "epoch": 2.42, "grad_norm": 0.6978672742843628, "learning_rate": 5.307759613100152e-05, "loss": 2.8571, "step": 49446 }, { "epoch": 2.42, "grad_norm": 0.7057198286056519, "learning_rate": 5.3068852449137775e-05, "loss": 2.7171, "step": 49447 }, { "epoch": 2.42, "grad_norm": 0.7628961801528931, "learning_rate": 5.306010941764313e-05, "loss": 2.8952, "step": 49448 }, { "epoch": 2.42, "grad_norm": 0.7418630123138428, "learning_rate": 5.305136703654058e-05, "loss": 2.7643, "step": 49449 }, { "epoch": 2.42, "grad_norm": 0.7383913993835449, "learning_rate": 5.304262530585329e-05, "loss": 2.8741, "step": 49450 }, { "epoch": 2.42, "grad_norm": 0.7368167042732239, "learning_rate": 5.303388422560414e-05, "loss": 2.8344, "step": 49451 }, { "epoch": 2.42, "grad_norm": 0.7100529670715332, "learning_rate": 5.302514379581627e-05, "loss": 2.9651, "step": 49452 }, { "epoch": 2.42, "grad_norm": 0.7233177423477173, "learning_rate": 5.301640401651267e-05, "loss": 3.1435, "step": 49453 }, { "epoch": 2.42, "grad_norm": 0.7448151111602783, "learning_rate": 5.3007664887716216e-05, "loss": 2.6896, "step": 49454 }, { "epoch": 2.42, "grad_norm": 0.7152411341667175, "learning_rate": 5.2998926409450126e-05, "loss": 2.9082, "step": 49455 }, { "epoch": 2.42, "grad_norm": 0.7502825260162354, "learning_rate": 5.299018858173725e-05, "loss": 2.7657, "step": 49456 }, { "epoch": 2.42, "grad_norm": 0.7130440473556519, "learning_rate": 5.298145140460076e-05, "loss": 2.8697, "step": 49457 }, { "epoch": 2.42, "grad_norm": 0.7099788188934326, "learning_rate": 5.297271487806352e-05, "loss": 2.6939, "step": 49458 }, { "epoch": 2.42, "grad_norm": 0.706955075263977, "learning_rate": 5.2963979002148596e-05, "loss": 2.7304, "step": 49459 }, { "epoch": 2.42, "grad_norm": 0.7011268734931946, "learning_rate": 5.295524377687914e-05, "loss": 2.7672, "step": 49460 }, { "epoch": 2.42, "grad_norm": 0.7620493769645691, "learning_rate": 5.294650920227791e-05, "loss": 2.7686, "step": 49461 }, { "epoch": 2.42, "grad_norm": 0.7569233775138855, "learning_rate": 5.293777527836808e-05, "loss": 2.7607, "step": 49462 }, { "epoch": 2.42, "grad_norm": 0.7677346467971802, "learning_rate": 5.292904200517253e-05, "loss": 2.9383, "step": 49463 }, { "epoch": 2.42, "grad_norm": 0.7182645201683044, "learning_rate": 5.292030938271434e-05, "loss": 2.9959, "step": 49464 }, { "epoch": 2.42, "grad_norm": 0.7732892036437988, "learning_rate": 5.291157741101657e-05, "loss": 2.8637, "step": 49465 }, { "epoch": 2.42, "grad_norm": 0.7590673565864563, "learning_rate": 5.290284609010209e-05, "loss": 2.7361, "step": 49466 }, { "epoch": 2.42, "grad_norm": 0.6932297945022583, "learning_rate": 5.289411541999406e-05, "loss": 3.0343, "step": 49467 }, { "epoch": 2.42, "grad_norm": 0.7510533332824707, "learning_rate": 5.288538540071534e-05, "loss": 2.8191, "step": 49468 }, { "epoch": 2.42, "grad_norm": 0.7724784016609192, "learning_rate": 5.287665603228887e-05, "loss": 2.7771, "step": 49469 }, { "epoch": 2.42, "grad_norm": 0.7106216549873352, "learning_rate": 5.2867927314737815e-05, "loss": 2.9812, "step": 49470 }, { "epoch": 2.42, "grad_norm": 0.7474266290664673, "learning_rate": 5.2859199248084994e-05, "loss": 2.8478, "step": 49471 }, { "epoch": 2.42, "grad_norm": 0.7339901924133301, "learning_rate": 5.2850471832353594e-05, "loss": 2.9416, "step": 49472 }, { "epoch": 2.42, "grad_norm": 0.7034941911697388, "learning_rate": 5.284174506756636e-05, "loss": 2.9021, "step": 49473 }, { "epoch": 2.42, "grad_norm": 0.7442716956138611, "learning_rate": 5.28330189537465e-05, "loss": 2.9255, "step": 49474 }, { "epoch": 2.42, "grad_norm": 0.7037724852561951, "learning_rate": 5.28242934909169e-05, "loss": 2.6829, "step": 49475 }, { "epoch": 2.42, "grad_norm": 0.7233594655990601, "learning_rate": 5.281556867910048e-05, "loss": 2.8918, "step": 49476 }, { "epoch": 2.42, "grad_norm": 0.7807363867759705, "learning_rate": 5.280684451832032e-05, "loss": 2.9158, "step": 49477 }, { "epoch": 2.42, "grad_norm": 0.7810481786727905, "learning_rate": 5.279812100859934e-05, "loss": 2.9131, "step": 49478 }, { "epoch": 2.42, "grad_norm": 0.7510912418365479, "learning_rate": 5.278939814996057e-05, "loss": 2.8349, "step": 49479 }, { "epoch": 2.42, "grad_norm": 0.8030551671981812, "learning_rate": 5.2780675942426874e-05, "loss": 2.7635, "step": 49480 }, { "epoch": 2.42, "grad_norm": 0.7197742462158203, "learning_rate": 5.2771954386021296e-05, "loss": 3.1214, "step": 49481 }, { "epoch": 2.42, "grad_norm": 0.7838296890258789, "learning_rate": 5.276323348076689e-05, "loss": 3.0325, "step": 49482 }, { "epoch": 2.43, "grad_norm": 0.777555525302887, "learning_rate": 5.2754513226686556e-05, "loss": 2.9526, "step": 49483 }, { "epoch": 2.43, "grad_norm": 0.7960125207901001, "learning_rate": 5.274579362380322e-05, "loss": 2.8986, "step": 49484 }, { "epoch": 2.43, "grad_norm": 0.7355430126190186, "learning_rate": 5.2737074672139837e-05, "loss": 2.8928, "step": 49485 }, { "epoch": 2.43, "grad_norm": 0.7454689741134644, "learning_rate": 5.272835637171941e-05, "loss": 2.8384, "step": 49486 }, { "epoch": 2.43, "grad_norm": 0.7542171478271484, "learning_rate": 5.2719638722564986e-05, "loss": 3.036, "step": 49487 }, { "epoch": 2.43, "grad_norm": 0.7559950947761536, "learning_rate": 5.271092172469935e-05, "loss": 2.6835, "step": 49488 }, { "epoch": 2.43, "grad_norm": 0.6909935474395752, "learning_rate": 5.270220537814562e-05, "loss": 2.8508, "step": 49489 }, { "epoch": 2.43, "grad_norm": 0.7282992005348206, "learning_rate": 5.269348968292671e-05, "loss": 2.9179, "step": 49490 }, { "epoch": 2.43, "grad_norm": 0.7650557160377502, "learning_rate": 5.268477463906548e-05, "loss": 2.8272, "step": 49491 }, { "epoch": 2.43, "grad_norm": 0.744831383228302, "learning_rate": 5.267606024658502e-05, "loss": 2.8916, "step": 49492 }, { "epoch": 2.43, "grad_norm": 0.7628011107444763, "learning_rate": 5.266734650550813e-05, "loss": 2.6101, "step": 49493 }, { "epoch": 2.43, "grad_norm": 0.7256711721420288, "learning_rate": 5.265863341585794e-05, "loss": 3.0817, "step": 49494 }, { "epoch": 2.43, "grad_norm": 0.7441546320915222, "learning_rate": 5.2649920977657225e-05, "loss": 2.9964, "step": 49495 }, { "epoch": 2.43, "grad_norm": 0.7593109607696533, "learning_rate": 5.264120919092911e-05, "loss": 2.8387, "step": 49496 }, { "epoch": 2.43, "grad_norm": 0.722559928894043, "learning_rate": 5.2632498055696424e-05, "loss": 3.0698, "step": 49497 }, { "epoch": 2.43, "grad_norm": 0.7306622862815857, "learning_rate": 5.2623787571982044e-05, "loss": 2.8901, "step": 49498 }, { "epoch": 2.43, "grad_norm": 0.7256033420562744, "learning_rate": 5.261507773980912e-05, "loss": 2.958, "step": 49499 }, { "epoch": 2.43, "grad_norm": 0.7559966444969177, "learning_rate": 5.260636855920036e-05, "loss": 2.9003, "step": 49500 }, { "epoch": 2.43, "grad_norm": 0.7539312839508057, "learning_rate": 5.2597660030178825e-05, "loss": 2.7591, "step": 49501 }, { "epoch": 2.43, "grad_norm": 0.7615699172019958, "learning_rate": 5.258895215276752e-05, "loss": 2.8271, "step": 49502 }, { "epoch": 2.43, "grad_norm": 0.720807671546936, "learning_rate": 5.258024492698919e-05, "loss": 2.739, "step": 49503 }, { "epoch": 2.43, "grad_norm": 0.8670095801353455, "learning_rate": 5.2571538352866994e-05, "loss": 2.8535, "step": 49504 }, { "epoch": 2.43, "grad_norm": 0.708129346370697, "learning_rate": 5.256283243042374e-05, "loss": 3.1879, "step": 49505 }, { "epoch": 2.43, "grad_norm": 0.6828888058662415, "learning_rate": 5.255412715968228e-05, "loss": 3.0061, "step": 49506 }, { "epoch": 2.43, "grad_norm": 0.722131073474884, "learning_rate": 5.254542254066569e-05, "loss": 3.0957, "step": 49507 }, { "epoch": 2.43, "grad_norm": 0.7353353500366211, "learning_rate": 5.2536718573396796e-05, "loss": 2.8735, "step": 49508 }, { "epoch": 2.43, "grad_norm": 0.7344322204589844, "learning_rate": 5.2528015257898605e-05, "loss": 2.8081, "step": 49509 }, { "epoch": 2.43, "grad_norm": 0.7135079503059387, "learning_rate": 5.251931259419393e-05, "loss": 2.9412, "step": 49510 }, { "epoch": 2.43, "grad_norm": 0.7717560529708862, "learning_rate": 5.251061058230582e-05, "loss": 2.8145, "step": 49511 }, { "epoch": 2.43, "grad_norm": 0.7272468209266663, "learning_rate": 5.250190922225717e-05, "loss": 2.9457, "step": 49512 }, { "epoch": 2.43, "grad_norm": 0.7593507170677185, "learning_rate": 5.2493208514070736e-05, "loss": 2.7668, "step": 49513 }, { "epoch": 2.43, "grad_norm": 0.7987231612205505, "learning_rate": 5.2484508457769657e-05, "loss": 2.7785, "step": 49514 }, { "epoch": 2.43, "grad_norm": 0.6929014325141907, "learning_rate": 5.247580905337667e-05, "loss": 2.7896, "step": 49515 }, { "epoch": 2.43, "grad_norm": 0.7104371190071106, "learning_rate": 5.246711030091476e-05, "loss": 2.8161, "step": 49516 }, { "epoch": 2.43, "grad_norm": 0.6814081072807312, "learning_rate": 5.245841220040691e-05, "loss": 2.8394, "step": 49517 }, { "epoch": 2.43, "grad_norm": 0.691145658493042, "learning_rate": 5.244971475187599e-05, "loss": 2.9412, "step": 49518 }, { "epoch": 2.43, "grad_norm": 0.7448225021362305, "learning_rate": 5.244101795534487e-05, "loss": 2.8151, "step": 49519 }, { "epoch": 2.43, "grad_norm": 0.7341607213020325, "learning_rate": 5.243232181083638e-05, "loss": 2.8329, "step": 49520 }, { "epoch": 2.43, "grad_norm": 0.7958085536956787, "learning_rate": 5.24236263183735e-05, "loss": 2.9727, "step": 49521 }, { "epoch": 2.43, "grad_norm": 0.7446455359458923, "learning_rate": 5.2414931477979225e-05, "loss": 3.0168, "step": 49522 }, { "epoch": 2.43, "grad_norm": 0.7200234532356262, "learning_rate": 5.24062372896763e-05, "loss": 2.8772, "step": 49523 }, { "epoch": 2.43, "grad_norm": 0.7179722785949707, "learning_rate": 5.239754375348777e-05, "loss": 2.9839, "step": 49524 }, { "epoch": 2.43, "grad_norm": 0.7151455879211426, "learning_rate": 5.2388850869436385e-05, "loss": 2.7769, "step": 49525 }, { "epoch": 2.43, "grad_norm": 0.7526031732559204, "learning_rate": 5.238015863754519e-05, "loss": 3.0104, "step": 49526 }, { "epoch": 2.43, "grad_norm": 0.7519762516021729, "learning_rate": 5.2371467057837003e-05, "loss": 2.9088, "step": 49527 }, { "epoch": 2.43, "grad_norm": 0.7097587585449219, "learning_rate": 5.236277613033461e-05, "loss": 2.985, "step": 49528 }, { "epoch": 2.43, "grad_norm": 0.6919777989387512, "learning_rate": 5.235408585506111e-05, "loss": 2.8978, "step": 49529 }, { "epoch": 2.43, "grad_norm": 0.7104206681251526, "learning_rate": 5.2345396232039205e-05, "loss": 2.9985, "step": 49530 }, { "epoch": 2.43, "grad_norm": 0.721882164478302, "learning_rate": 5.2336707261291865e-05, "loss": 2.9761, "step": 49531 }, { "epoch": 2.43, "grad_norm": 0.7098190784454346, "learning_rate": 5.2328018942842044e-05, "loss": 2.9406, "step": 49532 }, { "epoch": 2.43, "grad_norm": 0.7680677771568298, "learning_rate": 5.231933127671258e-05, "loss": 2.9672, "step": 49533 }, { "epoch": 2.43, "grad_norm": 0.7131537795066833, "learning_rate": 5.2310644262926305e-05, "loss": 2.9661, "step": 49534 }, { "epoch": 2.43, "grad_norm": 0.7221238017082214, "learning_rate": 5.2301957901506054e-05, "loss": 2.9696, "step": 49535 }, { "epoch": 2.43, "grad_norm": 0.7687680125236511, "learning_rate": 5.229327219247478e-05, "loss": 2.8797, "step": 49536 }, { "epoch": 2.43, "grad_norm": 0.7497595548629761, "learning_rate": 5.228458713585543e-05, "loss": 2.8141, "step": 49537 }, { "epoch": 2.43, "grad_norm": 0.7552316784858704, "learning_rate": 5.2275902731670716e-05, "loss": 2.9196, "step": 49538 }, { "epoch": 2.43, "grad_norm": 0.7658446431159973, "learning_rate": 5.2267218979943695e-05, "loss": 2.9684, "step": 49539 }, { "epoch": 2.43, "grad_norm": 0.7868543267250061, "learning_rate": 5.225853588069714e-05, "loss": 2.8374, "step": 49540 }, { "epoch": 2.43, "grad_norm": 0.72367262840271, "learning_rate": 5.224985343395384e-05, "loss": 2.9543, "step": 49541 }, { "epoch": 2.43, "grad_norm": 0.7303585410118103, "learning_rate": 5.22411716397368e-05, "loss": 2.9499, "step": 49542 }, { "epoch": 2.43, "grad_norm": 0.7236695885658264, "learning_rate": 5.223249049806878e-05, "loss": 3.0774, "step": 49543 }, { "epoch": 2.43, "grad_norm": 0.7127900123596191, "learning_rate": 5.222381000897279e-05, "loss": 3.1424, "step": 49544 }, { "epoch": 2.43, "grad_norm": 0.7575745582580566, "learning_rate": 5.2215130172471476e-05, "loss": 3.0335, "step": 49545 }, { "epoch": 2.43, "grad_norm": 0.7566746473312378, "learning_rate": 5.220645098858786e-05, "loss": 2.8638, "step": 49546 }, { "epoch": 2.43, "grad_norm": 0.7185924053192139, "learning_rate": 5.219777245734484e-05, "loss": 2.9908, "step": 49547 }, { "epoch": 2.43, "grad_norm": 0.7872166633605957, "learning_rate": 5.218909457876515e-05, "loss": 2.8108, "step": 49548 }, { "epoch": 2.43, "grad_norm": 0.7263684868812561, "learning_rate": 5.2180417352871736e-05, "loss": 2.7144, "step": 49549 }, { "epoch": 2.43, "grad_norm": 0.7238753437995911, "learning_rate": 5.2171740779687346e-05, "loss": 2.9509, "step": 49550 }, { "epoch": 2.43, "grad_norm": 0.7120121121406555, "learning_rate": 5.216306485923485e-05, "loss": 2.8628, "step": 49551 }, { "epoch": 2.43, "grad_norm": 0.6943619251251221, "learning_rate": 5.2154389591537236e-05, "loss": 2.7111, "step": 49552 }, { "epoch": 2.43, "grad_norm": 0.709274411201477, "learning_rate": 5.214571497661723e-05, "loss": 2.7601, "step": 49553 }, { "epoch": 2.43, "grad_norm": 0.7480226755142212, "learning_rate": 5.2137041014497726e-05, "loss": 2.9873, "step": 49554 }, { "epoch": 2.43, "grad_norm": 0.7200178503990173, "learning_rate": 5.2128367705201594e-05, "loss": 2.7593, "step": 49555 }, { "epoch": 2.43, "grad_norm": 0.7735962867736816, "learning_rate": 5.211969504875153e-05, "loss": 2.8678, "step": 49556 }, { "epoch": 2.43, "grad_norm": 0.7781702876091003, "learning_rate": 5.2111023045170565e-05, "loss": 2.9205, "step": 49557 }, { "epoch": 2.43, "grad_norm": 0.7568464875221252, "learning_rate": 5.21023516944814e-05, "loss": 3.0284, "step": 49558 }, { "epoch": 2.43, "grad_norm": 0.7191801071166992, "learning_rate": 5.2093680996707e-05, "loss": 2.9578, "step": 49559 }, { "epoch": 2.43, "grad_norm": 0.7512852549552917, "learning_rate": 5.208501095187003e-05, "loss": 3.0233, "step": 49560 }, { "epoch": 2.43, "grad_norm": 0.7531700134277344, "learning_rate": 5.207634155999355e-05, "loss": 2.6399, "step": 49561 }, { "epoch": 2.43, "grad_norm": 0.7530140280723572, "learning_rate": 5.2067672821100234e-05, "loss": 3.0416, "step": 49562 }, { "epoch": 2.43, "grad_norm": 0.7887686491012573, "learning_rate": 5.2059004735212904e-05, "loss": 3.0472, "step": 49563 }, { "epoch": 2.43, "grad_norm": 0.7277945280075073, "learning_rate": 5.20503373023545e-05, "loss": 2.7531, "step": 49564 }, { "epoch": 2.43, "grad_norm": 0.7439764142036438, "learning_rate": 5.2041670522547715e-05, "loss": 2.9304, "step": 49565 }, { "epoch": 2.43, "grad_norm": 0.737322986125946, "learning_rate": 5.203300439581556e-05, "loss": 3.0708, "step": 49566 }, { "epoch": 2.43, "grad_norm": 0.7382588982582092, "learning_rate": 5.2024338922180606e-05, "loss": 2.8939, "step": 49567 }, { "epoch": 2.43, "grad_norm": 0.729706883430481, "learning_rate": 5.201567410166588e-05, "loss": 3.2045, "step": 49568 }, { "epoch": 2.43, "grad_norm": 0.7282545566558838, "learning_rate": 5.20070099342942e-05, "loss": 2.8105, "step": 49569 }, { "epoch": 2.43, "grad_norm": 0.7177003026008606, "learning_rate": 5.19983464200883e-05, "loss": 2.7949, "step": 49570 }, { "epoch": 2.43, "grad_norm": 0.7702353000640869, "learning_rate": 5.1989683559071074e-05, "loss": 2.9669, "step": 49571 }, { "epoch": 2.43, "grad_norm": 0.7214787602424622, "learning_rate": 5.198102135126516e-05, "loss": 2.8121, "step": 49572 }, { "epoch": 2.43, "grad_norm": 0.8282303214073181, "learning_rate": 5.1972359796693554e-05, "loss": 2.9693, "step": 49573 }, { "epoch": 2.43, "grad_norm": 0.7337009906768799, "learning_rate": 5.196369889537906e-05, "loss": 2.7557, "step": 49574 }, { "epoch": 2.43, "grad_norm": 0.7773966789245605, "learning_rate": 5.195503864734439e-05, "loss": 2.9181, "step": 49575 }, { "epoch": 2.43, "grad_norm": 0.6771313548088074, "learning_rate": 5.194637905261247e-05, "loss": 2.6734, "step": 49576 }, { "epoch": 2.43, "grad_norm": 0.7496398091316223, "learning_rate": 5.1937720111206085e-05, "loss": 2.9345, "step": 49577 }, { "epoch": 2.43, "grad_norm": 0.7166499495506287, "learning_rate": 5.19290618231479e-05, "loss": 2.7787, "step": 49578 }, { "epoch": 2.43, "grad_norm": 0.7843281626701355, "learning_rate": 5.192040418846091e-05, "loss": 2.7845, "step": 49579 }, { "epoch": 2.43, "grad_norm": 0.7141745686531067, "learning_rate": 5.1911747207167754e-05, "loss": 2.7912, "step": 49580 }, { "epoch": 2.43, "grad_norm": 0.7347038388252258, "learning_rate": 5.1903090879291384e-05, "loss": 2.7822, "step": 49581 }, { "epoch": 2.43, "grad_norm": 0.7614521980285645, "learning_rate": 5.189443520485442e-05, "loss": 2.749, "step": 49582 }, { "epoch": 2.43, "grad_norm": 0.7512968182563782, "learning_rate": 5.188578018387981e-05, "loss": 2.9849, "step": 49583 }, { "epoch": 2.43, "grad_norm": 0.7517346143722534, "learning_rate": 5.1877125816390365e-05, "loss": 2.7312, "step": 49584 }, { "epoch": 2.43, "grad_norm": 0.728527307510376, "learning_rate": 5.1868472102408834e-05, "loss": 2.9731, "step": 49585 }, { "epoch": 2.43, "grad_norm": 0.7840355634689331, "learning_rate": 5.185981904195796e-05, "loss": 2.8932, "step": 49586 }, { "epoch": 2.43, "grad_norm": 0.7409562468528748, "learning_rate": 5.18511666350605e-05, "loss": 3.0215, "step": 49587 }, { "epoch": 2.43, "grad_norm": 0.756843626499176, "learning_rate": 5.184251488173934e-05, "loss": 3.0411, "step": 49588 }, { "epoch": 2.43, "grad_norm": 0.7006279826164246, "learning_rate": 5.1833863782017284e-05, "loss": 2.7748, "step": 49589 }, { "epoch": 2.43, "grad_norm": 0.708868145942688, "learning_rate": 5.182521333591698e-05, "loss": 2.8136, "step": 49590 }, { "epoch": 2.43, "grad_norm": 0.779094934463501, "learning_rate": 5.181656354346142e-05, "loss": 2.9222, "step": 49591 }, { "epoch": 2.43, "grad_norm": 0.7329817414283752, "learning_rate": 5.1807914404673244e-05, "loss": 2.7675, "step": 49592 }, { "epoch": 2.43, "grad_norm": 0.7352889180183411, "learning_rate": 5.1799265919575175e-05, "loss": 2.6653, "step": 49593 }, { "epoch": 2.43, "grad_norm": 0.737707793712616, "learning_rate": 5.179061808819015e-05, "loss": 3.0523, "step": 49594 }, { "epoch": 2.43, "grad_norm": 0.737191915512085, "learning_rate": 5.178197091054079e-05, "loss": 3.004, "step": 49595 }, { "epoch": 2.43, "grad_norm": 0.7531353831291199, "learning_rate": 5.177332438665005e-05, "loss": 3.0624, "step": 49596 }, { "epoch": 2.43, "grad_norm": 0.6949726939201355, "learning_rate": 5.17646785165405e-05, "loss": 2.8186, "step": 49597 }, { "epoch": 2.43, "grad_norm": 0.6904205083847046, "learning_rate": 5.17560333002351e-05, "loss": 3.0604, "step": 49598 }, { "epoch": 2.43, "grad_norm": 0.7312195301055908, "learning_rate": 5.174738873775652e-05, "loss": 3.1245, "step": 49599 }, { "epoch": 2.43, "grad_norm": 0.7542881965637207, "learning_rate": 5.173874482912749e-05, "loss": 2.9555, "step": 49600 }, { "epoch": 2.43, "grad_norm": 0.7179239988327026, "learning_rate": 5.173010157437088e-05, "loss": 2.8191, "step": 49601 }, { "epoch": 2.43, "grad_norm": 0.7660733461380005, "learning_rate": 5.172145897350933e-05, "loss": 2.7249, "step": 49602 }, { "epoch": 2.43, "grad_norm": 0.7874742150306702, "learning_rate": 5.171281702656568e-05, "loss": 3.0061, "step": 49603 }, { "epoch": 2.43, "grad_norm": 0.7039852738380432, "learning_rate": 5.170417573356276e-05, "loss": 3.0712, "step": 49604 }, { "epoch": 2.43, "grad_norm": 0.7054070830345154, "learning_rate": 5.169553509452319e-05, "loss": 2.7834, "step": 49605 }, { "epoch": 2.43, "grad_norm": 0.7984621524810791, "learning_rate": 5.168689510946985e-05, "loss": 2.8207, "step": 49606 }, { "epoch": 2.43, "grad_norm": 0.7172672748565674, "learning_rate": 5.167825577842546e-05, "loss": 2.8539, "step": 49607 }, { "epoch": 2.43, "grad_norm": 0.7304677367210388, "learning_rate": 5.1669617101412663e-05, "loss": 2.7716, "step": 49608 }, { "epoch": 2.43, "grad_norm": 0.6986484527587891, "learning_rate": 5.166097907845438e-05, "loss": 3.0376, "step": 49609 }, { "epoch": 2.43, "grad_norm": 0.7038609981536865, "learning_rate": 5.165234170957319e-05, "loss": 2.8795, "step": 49610 }, { "epoch": 2.43, "grad_norm": 0.7226280570030212, "learning_rate": 5.164370499479204e-05, "loss": 2.8304, "step": 49611 }, { "epoch": 2.43, "grad_norm": 0.7109252214431763, "learning_rate": 5.16350689341335e-05, "loss": 2.8575, "step": 49612 }, { "epoch": 2.43, "grad_norm": 0.7385297417640686, "learning_rate": 5.162643352762045e-05, "loss": 2.8261, "step": 49613 }, { "epoch": 2.43, "grad_norm": 0.7628830671310425, "learning_rate": 5.1617798775275576e-05, "loss": 2.9239, "step": 49614 }, { "epoch": 2.43, "grad_norm": 0.787331759929657, "learning_rate": 5.160916467712154e-05, "loss": 3.0273, "step": 49615 }, { "epoch": 2.43, "grad_norm": 0.7409013509750366, "learning_rate": 5.160053123318125e-05, "loss": 2.9011, "step": 49616 }, { "epoch": 2.43, "grad_norm": 0.7271203994750977, "learning_rate": 5.1591898443477263e-05, "loss": 2.9749, "step": 49617 }, { "epoch": 2.43, "grad_norm": 0.7120565176010132, "learning_rate": 5.158326630803242e-05, "loss": 2.8932, "step": 49618 }, { "epoch": 2.43, "grad_norm": 0.706852376461029, "learning_rate": 5.1574634826869544e-05, "loss": 3.0749, "step": 49619 }, { "epoch": 2.43, "grad_norm": 0.7524295449256897, "learning_rate": 5.156600400001124e-05, "loss": 2.855, "step": 49620 }, { "epoch": 2.43, "grad_norm": 0.7389339208602905, "learning_rate": 5.1557373827480264e-05, "loss": 2.967, "step": 49621 }, { "epoch": 2.43, "grad_norm": 0.7299748063087463, "learning_rate": 5.1548744309299295e-05, "loss": 2.9577, "step": 49622 }, { "epoch": 2.43, "grad_norm": 0.7077644467353821, "learning_rate": 5.154011544549108e-05, "loss": 2.8618, "step": 49623 }, { "epoch": 2.43, "grad_norm": 0.7535231113433838, "learning_rate": 5.153148723607851e-05, "loss": 2.6616, "step": 49624 }, { "epoch": 2.43, "grad_norm": 0.7145181894302368, "learning_rate": 5.1522859681084084e-05, "loss": 2.9405, "step": 49625 }, { "epoch": 2.43, "grad_norm": 0.7331188917160034, "learning_rate": 5.151423278053069e-05, "loss": 2.9301, "step": 49626 }, { "epoch": 2.43, "grad_norm": 0.756357729434967, "learning_rate": 5.150560653444093e-05, "loss": 2.965, "step": 49627 }, { "epoch": 2.43, "grad_norm": 0.7148869633674622, "learning_rate": 5.149698094283765e-05, "loss": 2.9109, "step": 49628 }, { "epoch": 2.43, "grad_norm": 0.7142993211746216, "learning_rate": 5.148835600574349e-05, "loss": 3.0988, "step": 49629 }, { "epoch": 2.43, "grad_norm": 0.8061341047286987, "learning_rate": 5.14797317231811e-05, "loss": 2.7739, "step": 49630 }, { "epoch": 2.43, "grad_norm": 0.7073236107826233, "learning_rate": 5.147110809517334e-05, "loss": 2.8595, "step": 49631 }, { "epoch": 2.43, "grad_norm": 0.7803508043289185, "learning_rate": 5.146248512174278e-05, "loss": 2.7204, "step": 49632 }, { "epoch": 2.43, "grad_norm": 0.758151113986969, "learning_rate": 5.1453862802912195e-05, "loss": 2.9361, "step": 49633 }, { "epoch": 2.43, "grad_norm": 0.7642468214035034, "learning_rate": 5.1445241138704395e-05, "loss": 2.6602, "step": 49634 }, { "epoch": 2.43, "grad_norm": 0.7350394129753113, "learning_rate": 5.143662012914197e-05, "loss": 2.734, "step": 49635 }, { "epoch": 2.43, "grad_norm": 0.6950120329856873, "learning_rate": 5.1427999774247664e-05, "loss": 2.6326, "step": 49636 }, { "epoch": 2.43, "grad_norm": 0.7212783694267273, "learning_rate": 5.141938007404409e-05, "loss": 2.8121, "step": 49637 }, { "epoch": 2.43, "grad_norm": 0.7310404777526855, "learning_rate": 5.14107610285541e-05, "loss": 2.9614, "step": 49638 }, { "epoch": 2.43, "grad_norm": 0.7168389558792114, "learning_rate": 5.1402142637800246e-05, "loss": 2.9321, "step": 49639 }, { "epoch": 2.43, "grad_norm": 0.7117984294891357, "learning_rate": 5.139352490180534e-05, "loss": 2.7888, "step": 49640 }, { "epoch": 2.43, "grad_norm": 0.7920997738838196, "learning_rate": 5.138490782059209e-05, "loss": 2.9202, "step": 49641 }, { "epoch": 2.43, "grad_norm": 0.6791305541992188, "learning_rate": 5.1376291394183154e-05, "loss": 2.5108, "step": 49642 }, { "epoch": 2.43, "grad_norm": 0.7199430465698242, "learning_rate": 5.136767562260121e-05, "loss": 2.7794, "step": 49643 }, { "epoch": 2.43, "grad_norm": 0.6989941596984863, "learning_rate": 5.135906050586888e-05, "loss": 2.9884, "step": 49644 }, { "epoch": 2.43, "grad_norm": 0.7715273499488831, "learning_rate": 5.135044604400893e-05, "loss": 2.8518, "step": 49645 }, { "epoch": 2.43, "grad_norm": 0.7331072688102722, "learning_rate": 5.134183223704412e-05, "loss": 2.9443, "step": 49646 }, { "epoch": 2.43, "grad_norm": 0.7313644886016846, "learning_rate": 5.1333219084997036e-05, "loss": 2.8292, "step": 49647 }, { "epoch": 2.43, "grad_norm": 0.7467599511146545, "learning_rate": 5.1324606587890416e-05, "loss": 3.0896, "step": 49648 }, { "epoch": 2.43, "grad_norm": 0.7694347500801086, "learning_rate": 5.131599474574688e-05, "loss": 3.0131, "step": 49649 }, { "epoch": 2.43, "grad_norm": 0.7463330626487732, "learning_rate": 5.130738355858924e-05, "loss": 3.0468, "step": 49650 }, { "epoch": 2.43, "grad_norm": 0.7481557130813599, "learning_rate": 5.1298773026440056e-05, "loss": 2.9252, "step": 49651 }, { "epoch": 2.43, "grad_norm": 0.705471932888031, "learning_rate": 5.129016314932199e-05, "loss": 2.9475, "step": 49652 }, { "epoch": 2.43, "grad_norm": 0.7567241191864014, "learning_rate": 5.128155392725785e-05, "loss": 2.8548, "step": 49653 }, { "epoch": 2.43, "grad_norm": 0.6947323083877563, "learning_rate": 5.127294536027012e-05, "loss": 2.675, "step": 49654 }, { "epoch": 2.43, "grad_norm": 0.7445532083511353, "learning_rate": 5.126433744838161e-05, "loss": 3.0083, "step": 49655 }, { "epoch": 2.43, "grad_norm": 0.7622706294059753, "learning_rate": 5.1255730191615055e-05, "loss": 2.7358, "step": 49656 }, { "epoch": 2.43, "grad_norm": 0.7756778597831726, "learning_rate": 5.1247123589993e-05, "loss": 3.0063, "step": 49657 }, { "epoch": 2.43, "grad_norm": 0.7175281047821045, "learning_rate": 5.123851764353817e-05, "loss": 2.9401, "step": 49658 }, { "epoch": 2.43, "grad_norm": 0.710747480392456, "learning_rate": 5.122991235227314e-05, "loss": 2.8863, "step": 49659 }, { "epoch": 2.43, "grad_norm": 0.7191362977027893, "learning_rate": 5.122130771622063e-05, "loss": 2.8623, "step": 49660 }, { "epoch": 2.43, "grad_norm": 0.7421351075172424, "learning_rate": 5.121270373540338e-05, "loss": 2.8252, "step": 49661 }, { "epoch": 2.43, "grad_norm": 0.7567232847213745, "learning_rate": 5.120410040984395e-05, "loss": 3.1095, "step": 49662 }, { "epoch": 2.43, "grad_norm": 0.7147611975669861, "learning_rate": 5.1195497739565086e-05, "loss": 2.9548, "step": 49663 }, { "epoch": 2.43, "grad_norm": 0.7359359264373779, "learning_rate": 5.11868957245894e-05, "loss": 2.8819, "step": 49664 }, { "epoch": 2.43, "grad_norm": 0.7486942410469055, "learning_rate": 5.1178294364939464e-05, "loss": 3.0448, "step": 49665 }, { "epoch": 2.43, "grad_norm": 0.7085438966751099, "learning_rate": 5.116969366063811e-05, "loss": 2.9634, "step": 49666 }, { "epoch": 2.43, "grad_norm": 0.7525193095207214, "learning_rate": 5.1161093611707816e-05, "loss": 2.8993, "step": 49667 }, { "epoch": 2.43, "grad_norm": 0.760326623916626, "learning_rate": 5.115249421817136e-05, "loss": 3.0144, "step": 49668 }, { "epoch": 2.43, "grad_norm": 0.75190669298172, "learning_rate": 5.114389548005129e-05, "loss": 2.6683, "step": 49669 }, { "epoch": 2.43, "grad_norm": 0.7118992805480957, "learning_rate": 5.113529739737029e-05, "loss": 3.0528, "step": 49670 }, { "epoch": 2.43, "grad_norm": 0.7461121082305908, "learning_rate": 5.112669997015111e-05, "loss": 2.8172, "step": 49671 }, { "epoch": 2.43, "grad_norm": 0.7285375595092773, "learning_rate": 5.1118103198416325e-05, "loss": 2.8323, "step": 49672 }, { "epoch": 2.43, "grad_norm": 0.7130274176597595, "learning_rate": 5.110950708218853e-05, "loss": 2.9635, "step": 49673 }, { "epoch": 2.43, "grad_norm": 0.7329024076461792, "learning_rate": 5.110091162149033e-05, "loss": 2.9495, "step": 49674 }, { "epoch": 2.43, "grad_norm": 0.7448126077651978, "learning_rate": 5.1092316816344404e-05, "loss": 3.0704, "step": 49675 }, { "epoch": 2.43, "grad_norm": 0.7353721857070923, "learning_rate": 5.108372266677351e-05, "loss": 2.918, "step": 49676 }, { "epoch": 2.43, "grad_norm": 0.7445089817047119, "learning_rate": 5.107512917280012e-05, "loss": 3.0014, "step": 49677 }, { "epoch": 2.43, "grad_norm": 0.7183898091316223, "learning_rate": 5.1066536334447e-05, "loss": 2.6663, "step": 49678 }, { "epoch": 2.43, "grad_norm": 0.7457320690155029, "learning_rate": 5.105794415173671e-05, "loss": 2.7268, "step": 49679 }, { "epoch": 2.43, "grad_norm": 0.7721786499023438, "learning_rate": 5.104935262469181e-05, "loss": 2.8341, "step": 49680 }, { "epoch": 2.43, "grad_norm": 0.7069118022918701, "learning_rate": 5.1040761753335044e-05, "loss": 2.6973, "step": 49681 }, { "epoch": 2.43, "grad_norm": 0.7151609659194946, "learning_rate": 5.103217153768897e-05, "loss": 2.901, "step": 49682 }, { "epoch": 2.43, "grad_norm": 0.7430471777915955, "learning_rate": 5.1023581977776296e-05, "loss": 3.03, "step": 49683 }, { "epoch": 2.43, "grad_norm": 0.7141035199165344, "learning_rate": 5.1014993073619536e-05, "loss": 2.9409, "step": 49684 }, { "epoch": 2.43, "grad_norm": 0.7425801753997803, "learning_rate": 5.100640482524134e-05, "loss": 2.962, "step": 49685 }, { "epoch": 2.43, "grad_norm": 0.8909529447555542, "learning_rate": 5.099781723266453e-05, "loss": 2.8787, "step": 49686 }, { "epoch": 2.44, "grad_norm": 0.7250303030014038, "learning_rate": 5.098923029591138e-05, "loss": 2.8883, "step": 49687 }, { "epoch": 2.44, "grad_norm": 0.7350665926933289, "learning_rate": 5.098064401500477e-05, "loss": 2.9089, "step": 49688 }, { "epoch": 2.44, "grad_norm": 0.7405259013175964, "learning_rate": 5.0972058389967116e-05, "loss": 2.9799, "step": 49689 }, { "epoch": 2.44, "grad_norm": 0.7162747383117676, "learning_rate": 5.096347342082114e-05, "loss": 2.8799, "step": 49690 }, { "epoch": 2.44, "grad_norm": 0.7429664731025696, "learning_rate": 5.095488910758956e-05, "loss": 2.8375, "step": 49691 }, { "epoch": 2.44, "grad_norm": 0.7161949872970581, "learning_rate": 5.094630545029475e-05, "loss": 2.8939, "step": 49692 }, { "epoch": 2.44, "grad_norm": 0.7380734086036682, "learning_rate": 5.093772244895952e-05, "loss": 2.928, "step": 49693 }, { "epoch": 2.44, "grad_norm": 0.778752326965332, "learning_rate": 5.092914010360642e-05, "loss": 2.9385, "step": 49694 }, { "epoch": 2.44, "grad_norm": 0.6871793866157532, "learning_rate": 5.092055841425795e-05, "loss": 2.8888, "step": 49695 }, { "epoch": 2.44, "grad_norm": 0.7245429158210754, "learning_rate": 5.091197738093684e-05, "loss": 3.1834, "step": 49696 }, { "epoch": 2.44, "grad_norm": 0.7372999787330627, "learning_rate": 5.0903397003665616e-05, "loss": 2.8991, "step": 49697 }, { "epoch": 2.44, "grad_norm": 0.7384405732154846, "learning_rate": 5.089481728246695e-05, "loss": 3.0387, "step": 49698 }, { "epoch": 2.44, "grad_norm": 0.7918789386749268, "learning_rate": 5.088623821736333e-05, "loss": 2.6822, "step": 49699 }, { "epoch": 2.44, "grad_norm": 0.6995186805725098, "learning_rate": 5.087765980837747e-05, "loss": 2.7499, "step": 49700 }, { "epoch": 2.44, "grad_norm": 0.7654358148574829, "learning_rate": 5.0869082055531915e-05, "loss": 2.7592, "step": 49701 }, { "epoch": 2.44, "grad_norm": 0.7654218077659607, "learning_rate": 5.0860504958849194e-05, "loss": 2.7973, "step": 49702 }, { "epoch": 2.44, "grad_norm": 0.7214524149894714, "learning_rate": 5.0851928518352e-05, "loss": 2.9906, "step": 49703 }, { "epoch": 2.44, "grad_norm": 0.737037718296051, "learning_rate": 5.084335273406284e-05, "loss": 2.8326, "step": 49704 }, { "epoch": 2.44, "grad_norm": 0.6899082660675049, "learning_rate": 5.083477760600431e-05, "loss": 3.0577, "step": 49705 }, { "epoch": 2.44, "grad_norm": 0.717755138874054, "learning_rate": 5.0826203134199095e-05, "loss": 2.9758, "step": 49706 }, { "epoch": 2.44, "grad_norm": 0.7414624094963074, "learning_rate": 5.081762931866965e-05, "loss": 2.791, "step": 49707 }, { "epoch": 2.44, "grad_norm": 0.7816479802131653, "learning_rate": 5.0809056159438686e-05, "loss": 2.9598, "step": 49708 }, { "epoch": 2.44, "grad_norm": 0.7575608491897583, "learning_rate": 5.080048365652872e-05, "loss": 2.9766, "step": 49709 }, { "epoch": 2.44, "grad_norm": 0.7834826707839966, "learning_rate": 5.079191180996224e-05, "loss": 2.8922, "step": 49710 }, { "epoch": 2.44, "grad_norm": 0.7255894541740417, "learning_rate": 5.078334061976195e-05, "loss": 3.0187, "step": 49711 }, { "epoch": 2.44, "grad_norm": 0.7802708745002747, "learning_rate": 5.0774770085950336e-05, "loss": 2.9521, "step": 49712 }, { "epoch": 2.44, "grad_norm": 0.6934137940406799, "learning_rate": 5.076620020855008e-05, "loss": 2.8263, "step": 49713 }, { "epoch": 2.44, "grad_norm": 0.7279984951019287, "learning_rate": 5.0757630987583596e-05, "loss": 2.8758, "step": 49714 }, { "epoch": 2.44, "grad_norm": 0.7556265592575073, "learning_rate": 5.074906242307364e-05, "loss": 2.8469, "step": 49715 }, { "epoch": 2.44, "grad_norm": 0.784926176071167, "learning_rate": 5.074049451504265e-05, "loss": 2.8924, "step": 49716 }, { "epoch": 2.44, "grad_norm": 0.7134363651275635, "learning_rate": 5.0731927263513185e-05, "loss": 2.9432, "step": 49717 }, { "epoch": 2.44, "grad_norm": 0.7337852120399475, "learning_rate": 5.072336066850792e-05, "loss": 3.0742, "step": 49718 }, { "epoch": 2.44, "grad_norm": 0.7417206168174744, "learning_rate": 5.071479473004927e-05, "loss": 2.9438, "step": 49719 }, { "epoch": 2.44, "grad_norm": 0.7676438689231873, "learning_rate": 5.0706229448159954e-05, "loss": 3.0447, "step": 49720 }, { "epoch": 2.44, "grad_norm": 0.7735446691513062, "learning_rate": 5.069766482286238e-05, "loss": 3.1604, "step": 49721 }, { "epoch": 2.44, "grad_norm": 0.7267362475395203, "learning_rate": 5.0689100854179245e-05, "loss": 2.8411, "step": 49722 }, { "epoch": 2.44, "grad_norm": 0.7745874524116516, "learning_rate": 5.068053754213305e-05, "loss": 3.0884, "step": 49723 }, { "epoch": 2.44, "grad_norm": 0.7192590236663818, "learning_rate": 5.067197488674625e-05, "loss": 2.8488, "step": 49724 }, { "epoch": 2.44, "grad_norm": 0.766343891620636, "learning_rate": 5.066341288804155e-05, "loss": 2.883, "step": 49725 }, { "epoch": 2.44, "grad_norm": 0.7043959498405457, "learning_rate": 5.0654851546041356e-05, "loss": 2.8619, "step": 49726 }, { "epoch": 2.44, "grad_norm": 0.7229441404342651, "learning_rate": 5.064629086076832e-05, "loss": 2.9009, "step": 49727 }, { "epoch": 2.44, "grad_norm": 0.71528559923172, "learning_rate": 5.063773083224502e-05, "loss": 2.8358, "step": 49728 }, { "epoch": 2.44, "grad_norm": 0.7294989824295044, "learning_rate": 5.062917146049388e-05, "loss": 2.5765, "step": 49729 }, { "epoch": 2.44, "grad_norm": 0.7297892570495605, "learning_rate": 5.062061274553759e-05, "loss": 3.1941, "step": 49730 }, { "epoch": 2.44, "grad_norm": 0.7526649832725525, "learning_rate": 5.061205468739861e-05, "loss": 2.9326, "step": 49731 }, { "epoch": 2.44, "grad_norm": 0.7116498351097107, "learning_rate": 5.06034972860994e-05, "loss": 2.7313, "step": 49732 }, { "epoch": 2.44, "grad_norm": 0.7177486419677734, "learning_rate": 5.0594940541662664e-05, "loss": 2.9048, "step": 49733 }, { "epoch": 2.44, "grad_norm": 0.7500801682472229, "learning_rate": 5.058638445411077e-05, "loss": 2.8435, "step": 49734 }, { "epoch": 2.44, "grad_norm": 0.7371991872787476, "learning_rate": 5.0577829023466446e-05, "loss": 2.8987, "step": 49735 }, { "epoch": 2.44, "grad_norm": 0.7661231756210327, "learning_rate": 5.056927424975199e-05, "loss": 2.7352, "step": 49736 }, { "epoch": 2.44, "grad_norm": 0.7188680171966553, "learning_rate": 5.056072013299016e-05, "loss": 3.0446, "step": 49737 }, { "epoch": 2.44, "grad_norm": 0.7141361832618713, "learning_rate": 5.055216667320341e-05, "loss": 3.023, "step": 49738 }, { "epoch": 2.44, "grad_norm": 0.7152678966522217, "learning_rate": 5.0543613870414144e-05, "loss": 2.9097, "step": 49739 }, { "epoch": 2.44, "grad_norm": 0.7155481576919556, "learning_rate": 5.0535061724645086e-05, "loss": 2.9771, "step": 49740 }, { "epoch": 2.44, "grad_norm": 0.7778782844543457, "learning_rate": 5.052651023591858e-05, "loss": 3.011, "step": 49741 }, { "epoch": 2.44, "grad_norm": 0.6794896125793457, "learning_rate": 5.0517959404257215e-05, "loss": 2.8614, "step": 49742 }, { "epoch": 2.44, "grad_norm": 0.7529333233833313, "learning_rate": 5.050940922968363e-05, "loss": 3.0088, "step": 49743 }, { "epoch": 2.44, "grad_norm": 0.7487618923187256, "learning_rate": 5.0500859712220245e-05, "loss": 2.8297, "step": 49744 }, { "epoch": 2.44, "grad_norm": 0.735675573348999, "learning_rate": 5.0492310851889575e-05, "loss": 2.8424, "step": 49745 }, { "epoch": 2.44, "grad_norm": 0.7913566827774048, "learning_rate": 5.048376264871406e-05, "loss": 2.986, "step": 49746 }, { "epoch": 2.44, "grad_norm": 0.668773353099823, "learning_rate": 5.0475215102716306e-05, "loss": 2.8925, "step": 49747 }, { "epoch": 2.44, "grad_norm": 0.7151376008987427, "learning_rate": 5.046666821391887e-05, "loss": 2.7141, "step": 49748 }, { "epoch": 2.44, "grad_norm": 0.7672746777534485, "learning_rate": 5.0458121982344144e-05, "loss": 2.8902, "step": 49749 }, { "epoch": 2.44, "grad_norm": 0.6590734124183655, "learning_rate": 5.044957640801475e-05, "loss": 2.8578, "step": 49750 }, { "epoch": 2.44, "grad_norm": 0.7381081581115723, "learning_rate": 5.044103149095309e-05, "loss": 2.8413, "step": 49751 }, { "epoch": 2.44, "grad_norm": 0.7295042872428894, "learning_rate": 5.043248723118182e-05, "loss": 2.9015, "step": 49752 }, { "epoch": 2.44, "grad_norm": 0.7408165335655212, "learning_rate": 5.042394362872332e-05, "loss": 2.8261, "step": 49753 }, { "epoch": 2.44, "grad_norm": 0.7612532377243042, "learning_rate": 5.041540068360004e-05, "loss": 2.8644, "step": 49754 }, { "epoch": 2.44, "grad_norm": 0.7137171030044556, "learning_rate": 5.0406858395834656e-05, "loss": 3.0074, "step": 49755 }, { "epoch": 2.44, "grad_norm": 0.6950783133506775, "learning_rate": 5.03983167654495e-05, "loss": 2.9107, "step": 49756 }, { "epoch": 2.44, "grad_norm": 0.7441567778587341, "learning_rate": 5.0389775792467134e-05, "loss": 2.7601, "step": 49757 }, { "epoch": 2.44, "grad_norm": 0.7424207925796509, "learning_rate": 5.038123547691012e-05, "loss": 2.8487, "step": 49758 }, { "epoch": 2.44, "grad_norm": 0.6980047225952148, "learning_rate": 5.03726958188009e-05, "loss": 2.7768, "step": 49759 }, { "epoch": 2.44, "grad_norm": 0.7469981908798218, "learning_rate": 5.036415681816196e-05, "loss": 2.9057, "step": 49760 }, { "epoch": 2.44, "grad_norm": 0.7088938355445862, "learning_rate": 5.0355618475015716e-05, "loss": 2.9294, "step": 49761 }, { "epoch": 2.44, "grad_norm": 0.7196033000946045, "learning_rate": 5.034708078938474e-05, "loss": 2.9308, "step": 49762 }, { "epoch": 2.44, "grad_norm": 0.7394540309906006, "learning_rate": 5.033854376129156e-05, "loss": 2.7985, "step": 49763 }, { "epoch": 2.44, "grad_norm": 0.7375257611274719, "learning_rate": 5.033000739075852e-05, "loss": 2.968, "step": 49764 }, { "epoch": 2.44, "grad_norm": 0.7190216779708862, "learning_rate": 5.0321471677808304e-05, "loss": 2.9031, "step": 49765 }, { "epoch": 2.44, "grad_norm": 0.769286572933197, "learning_rate": 5.031293662246325e-05, "loss": 2.5801, "step": 49766 }, { "epoch": 2.44, "grad_norm": 0.7621033191680908, "learning_rate": 5.030440222474582e-05, "loss": 2.91, "step": 49767 }, { "epoch": 2.44, "grad_norm": 0.7350282669067383, "learning_rate": 5.0295868484678595e-05, "loss": 2.8721, "step": 49768 }, { "epoch": 2.44, "grad_norm": 0.7469178438186646, "learning_rate": 5.028733540228391e-05, "loss": 2.9342, "step": 49769 }, { "epoch": 2.44, "grad_norm": 0.8052636384963989, "learning_rate": 5.027880297758443e-05, "loss": 3.0468, "step": 49770 }, { "epoch": 2.44, "grad_norm": 0.7492589354515076, "learning_rate": 5.027027121060243e-05, "loss": 2.9264, "step": 49771 }, { "epoch": 2.44, "grad_norm": 0.7447794675827026, "learning_rate": 5.026174010136046e-05, "loss": 2.9722, "step": 49772 }, { "epoch": 2.44, "grad_norm": 0.7605386972427368, "learning_rate": 5.0253209649881096e-05, "loss": 2.9998, "step": 49773 }, { "epoch": 2.44, "grad_norm": 0.7599433660507202, "learning_rate": 5.024467985618673e-05, "loss": 2.7689, "step": 49774 }, { "epoch": 2.44, "grad_norm": 0.7454763650894165, "learning_rate": 5.023615072029977e-05, "loss": 2.8439, "step": 49775 }, { "epoch": 2.44, "grad_norm": 0.7649785876274109, "learning_rate": 5.022762224224267e-05, "loss": 2.7798, "step": 49776 }, { "epoch": 2.44, "grad_norm": 0.7098384499549866, "learning_rate": 5.021909442203794e-05, "loss": 2.8681, "step": 49777 }, { "epoch": 2.44, "grad_norm": 0.7371013760566711, "learning_rate": 5.0210567259708135e-05, "loss": 2.8648, "step": 49778 }, { "epoch": 2.44, "grad_norm": 0.7675368785858154, "learning_rate": 5.020204075527553e-05, "loss": 2.9071, "step": 49779 }, { "epoch": 2.44, "grad_norm": 0.7284151315689087, "learning_rate": 5.0193514908762775e-05, "loss": 2.9625, "step": 49780 }, { "epoch": 2.44, "grad_norm": 0.7707260251045227, "learning_rate": 5.0184989720192194e-05, "loss": 2.9693, "step": 49781 }, { "epoch": 2.44, "grad_norm": 0.7367619276046753, "learning_rate": 5.017646518958619e-05, "loss": 2.9056, "step": 49782 }, { "epoch": 2.44, "grad_norm": 0.7275775074958801, "learning_rate": 5.0167941316967417e-05, "loss": 2.9327, "step": 49783 }, { "epoch": 2.44, "grad_norm": 0.7671332359313965, "learning_rate": 5.015941810235809e-05, "loss": 2.7362, "step": 49784 }, { "epoch": 2.44, "grad_norm": 0.7026022672653198, "learning_rate": 5.015089554578087e-05, "loss": 2.8569, "step": 49785 }, { "epoch": 2.44, "grad_norm": 0.7949316501617432, "learning_rate": 5.014237364725801e-05, "loss": 2.9605, "step": 49786 }, { "epoch": 2.44, "grad_norm": 0.7388706207275391, "learning_rate": 5.013385240681216e-05, "loss": 2.7446, "step": 49787 }, { "epoch": 2.44, "grad_norm": 0.7240619659423828, "learning_rate": 5.012533182446561e-05, "loss": 2.7338, "step": 49788 }, { "epoch": 2.44, "grad_norm": 0.7435773015022278, "learning_rate": 5.011681190024082e-05, "loss": 2.9415, "step": 49789 }, { "epoch": 2.44, "grad_norm": 0.7612718939781189, "learning_rate": 5.0108292634160294e-05, "loss": 2.857, "step": 49790 }, { "epoch": 2.44, "grad_norm": 0.7587111592292786, "learning_rate": 5.0099774026246366e-05, "loss": 3.0447, "step": 49791 }, { "epoch": 2.44, "grad_norm": 0.7427108287811279, "learning_rate": 5.009125607652163e-05, "loss": 2.682, "step": 49792 }, { "epoch": 2.44, "grad_norm": 0.75941002368927, "learning_rate": 5.008273878500836e-05, "loss": 2.8607, "step": 49793 }, { "epoch": 2.44, "grad_norm": 0.7439859509468079, "learning_rate": 5.007422215172905e-05, "loss": 2.8351, "step": 49794 }, { "epoch": 2.44, "grad_norm": 0.758802056312561, "learning_rate": 5.006570617670619e-05, "loss": 2.8682, "step": 49795 }, { "epoch": 2.44, "grad_norm": 0.7315456867218018, "learning_rate": 5.005719085996216e-05, "loss": 3.1513, "step": 49796 }, { "epoch": 2.44, "grad_norm": 0.7400312423706055, "learning_rate": 5.004867620151941e-05, "loss": 2.9039, "step": 49797 }, { "epoch": 2.44, "grad_norm": 0.766927182674408, "learning_rate": 5.0040162201400256e-05, "loss": 3.0392, "step": 49798 }, { "epoch": 2.44, "grad_norm": 0.756879985332489, "learning_rate": 5.003164885962722e-05, "loss": 2.956, "step": 49799 }, { "epoch": 2.44, "grad_norm": 0.7322831153869629, "learning_rate": 5.00231361762228e-05, "loss": 2.8099, "step": 49800 }, { "epoch": 2.44, "grad_norm": 0.7429162859916687, "learning_rate": 5.0014624151209224e-05, "loss": 2.9539, "step": 49801 }, { "epoch": 2.44, "grad_norm": 0.7556778192520142, "learning_rate": 5.000611278460911e-05, "loss": 3.0639, "step": 49802 }, { "epoch": 2.44, "grad_norm": 0.7347615957260132, "learning_rate": 4.99976020764448e-05, "loss": 2.9699, "step": 49803 }, { "epoch": 2.44, "grad_norm": 0.7194696068763733, "learning_rate": 4.998909202673858e-05, "loss": 2.9908, "step": 49804 }, { "epoch": 2.44, "grad_norm": 0.7030249238014221, "learning_rate": 4.998058263551306e-05, "loss": 2.8154, "step": 49805 }, { "epoch": 2.44, "grad_norm": 0.7510625123977661, "learning_rate": 4.997207390279052e-05, "loss": 2.9402, "step": 49806 }, { "epoch": 2.44, "grad_norm": 0.7080065011978149, "learning_rate": 4.9963565828593455e-05, "loss": 2.8348, "step": 49807 }, { "epoch": 2.44, "grad_norm": 0.7457221150398254, "learning_rate": 4.9955058412944205e-05, "loss": 3.1089, "step": 49808 }, { "epoch": 2.44, "grad_norm": 0.7274998426437378, "learning_rate": 4.994655165586519e-05, "loss": 2.8142, "step": 49809 }, { "epoch": 2.44, "grad_norm": 0.7139880657196045, "learning_rate": 4.993804555737888e-05, "loss": 2.9572, "step": 49810 }, { "epoch": 2.44, "grad_norm": 0.7105646133422852, "learning_rate": 4.9929540117507674e-05, "loss": 2.8571, "step": 49811 }, { "epoch": 2.44, "grad_norm": 0.6922122240066528, "learning_rate": 4.9921035336273904e-05, "loss": 2.9435, "step": 49812 }, { "epoch": 2.44, "grad_norm": 0.7425985932350159, "learning_rate": 4.991253121369993e-05, "loss": 2.8171, "step": 49813 }, { "epoch": 2.44, "grad_norm": 0.7572745084762573, "learning_rate": 4.990402774980824e-05, "loss": 3.0096, "step": 49814 }, { "epoch": 2.44, "grad_norm": 0.7443933486938477, "learning_rate": 4.989552494462127e-05, "loss": 2.6377, "step": 49815 }, { "epoch": 2.44, "grad_norm": 0.7220988273620605, "learning_rate": 4.9887022798161266e-05, "loss": 3.0297, "step": 49816 }, { "epoch": 2.44, "grad_norm": 0.7042877078056335, "learning_rate": 4.987852131045079e-05, "loss": 2.9561, "step": 49817 }, { "epoch": 2.44, "grad_norm": 0.7358666062355042, "learning_rate": 4.987002048151215e-05, "loss": 3.0252, "step": 49818 }, { "epoch": 2.44, "grad_norm": 0.7317989468574524, "learning_rate": 4.986152031136766e-05, "loss": 2.9775, "step": 49819 }, { "epoch": 2.44, "grad_norm": 0.7053791284561157, "learning_rate": 4.985302080003988e-05, "loss": 2.7853, "step": 49820 }, { "epoch": 2.44, "grad_norm": 0.7373185753822327, "learning_rate": 4.984452194755101e-05, "loss": 2.8829, "step": 49821 }, { "epoch": 2.44, "grad_norm": 0.7272002100944519, "learning_rate": 4.98360237539236e-05, "loss": 3.0424, "step": 49822 }, { "epoch": 2.44, "grad_norm": 0.7341743111610413, "learning_rate": 4.9827526219179914e-05, "loss": 2.8273, "step": 49823 }, { "epoch": 2.44, "grad_norm": 0.7472624778747559, "learning_rate": 4.981902934334242e-05, "loss": 2.8973, "step": 49824 }, { "epoch": 2.44, "grad_norm": 0.7138078808784485, "learning_rate": 4.9810533126433475e-05, "loss": 2.9605, "step": 49825 }, { "epoch": 2.44, "grad_norm": 0.7111098766326904, "learning_rate": 4.980203756847532e-05, "loss": 3.0122, "step": 49826 }, { "epoch": 2.44, "grad_norm": 0.7603220343589783, "learning_rate": 4.979354266949057e-05, "loss": 2.9321, "step": 49827 }, { "epoch": 2.44, "grad_norm": 0.7600682973861694, "learning_rate": 4.978504842950138e-05, "loss": 2.6835, "step": 49828 }, { "epoch": 2.44, "grad_norm": 0.7622936367988586, "learning_rate": 4.9776554848530205e-05, "loss": 2.6772, "step": 49829 }, { "epoch": 2.44, "grad_norm": 0.7368950843811035, "learning_rate": 4.976806192659951e-05, "loss": 2.8547, "step": 49830 }, { "epoch": 2.44, "grad_norm": 0.7676935195922852, "learning_rate": 4.975956966373153e-05, "loss": 2.8766, "step": 49831 }, { "epoch": 2.44, "grad_norm": 0.7402030229568481, "learning_rate": 4.975107805994872e-05, "loss": 2.8607, "step": 49832 }, { "epoch": 2.44, "grad_norm": 0.745424211025238, "learning_rate": 4.974258711527346e-05, "loss": 2.6656, "step": 49833 }, { "epoch": 2.44, "grad_norm": 0.756466269493103, "learning_rate": 4.973409682972794e-05, "loss": 2.926, "step": 49834 }, { "epoch": 2.44, "grad_norm": 0.7675303816795349, "learning_rate": 4.972560720333473e-05, "loss": 2.8321, "step": 49835 }, { "epoch": 2.44, "grad_norm": 0.758357584476471, "learning_rate": 4.971711823611606e-05, "loss": 3.0498, "step": 49836 }, { "epoch": 2.44, "grad_norm": 0.8041175603866577, "learning_rate": 4.970862992809436e-05, "loss": 2.9132, "step": 49837 }, { "epoch": 2.44, "grad_norm": 0.7109980583190918, "learning_rate": 4.9700142279291933e-05, "loss": 2.7802, "step": 49838 }, { "epoch": 2.44, "grad_norm": 0.785557746887207, "learning_rate": 4.9691655289731205e-05, "loss": 3.0044, "step": 49839 }, { "epoch": 2.44, "grad_norm": 0.726588249206543, "learning_rate": 4.968316895943448e-05, "loss": 2.7941, "step": 49840 }, { "epoch": 2.44, "grad_norm": 0.7485399842262268, "learning_rate": 4.967468328842406e-05, "loss": 2.9152, "step": 49841 }, { "epoch": 2.44, "grad_norm": 0.740860104560852, "learning_rate": 4.96661982767224e-05, "loss": 2.8871, "step": 49842 }, { "epoch": 2.44, "grad_norm": 0.790346622467041, "learning_rate": 4.965771392435174e-05, "loss": 3.1134, "step": 49843 }, { "epoch": 2.44, "grad_norm": 0.7316777110099792, "learning_rate": 4.964923023133448e-05, "loss": 3.0394, "step": 49844 }, { "epoch": 2.44, "grad_norm": 0.7692133784294128, "learning_rate": 4.9640747197693007e-05, "loss": 2.8604, "step": 49845 }, { "epoch": 2.44, "grad_norm": 0.7087564468383789, "learning_rate": 4.963226482344968e-05, "loss": 2.8347, "step": 49846 }, { "epoch": 2.44, "grad_norm": 0.7064667344093323, "learning_rate": 4.962378310862672e-05, "loss": 3.0958, "step": 49847 }, { "epoch": 2.44, "grad_norm": 0.7500705122947693, "learning_rate": 4.961530205324651e-05, "loss": 2.8577, "step": 49848 }, { "epoch": 2.44, "grad_norm": 0.7233101725578308, "learning_rate": 4.960682165733137e-05, "loss": 2.8902, "step": 49849 }, { "epoch": 2.44, "grad_norm": 0.7733365893363953, "learning_rate": 4.959834192090377e-05, "loss": 2.8184, "step": 49850 }, { "epoch": 2.44, "grad_norm": 0.7586425542831421, "learning_rate": 4.9589862843985886e-05, "loss": 2.9315, "step": 49851 }, { "epoch": 2.44, "grad_norm": 0.6858609914779663, "learning_rate": 4.958138442660016e-05, "loss": 2.6727, "step": 49852 }, { "epoch": 2.44, "grad_norm": 0.6984794735908508, "learning_rate": 4.957290666876881e-05, "loss": 2.9782, "step": 49853 }, { "epoch": 2.44, "grad_norm": 0.7984606027603149, "learning_rate": 4.956442957051429e-05, "loss": 3.0055, "step": 49854 }, { "epoch": 2.44, "grad_norm": 0.7043806314468384, "learning_rate": 4.9555953131858875e-05, "loss": 2.9892, "step": 49855 }, { "epoch": 2.44, "grad_norm": 0.732420802116394, "learning_rate": 4.9547477352824815e-05, "loss": 2.8946, "step": 49856 }, { "epoch": 2.44, "grad_norm": 0.7292214632034302, "learning_rate": 4.9539002233434535e-05, "loss": 2.8447, "step": 49857 }, { "epoch": 2.44, "grad_norm": 0.7352430820465088, "learning_rate": 4.95305277737103e-05, "loss": 2.9061, "step": 49858 }, { "epoch": 2.44, "grad_norm": 0.7342036962509155, "learning_rate": 4.952205397367441e-05, "loss": 3.0595, "step": 49859 }, { "epoch": 2.44, "grad_norm": 0.6796708106994629, "learning_rate": 4.951358083334931e-05, "loss": 3.1384, "step": 49860 }, { "epoch": 2.44, "grad_norm": 0.7156011462211609, "learning_rate": 4.950510835275724e-05, "loss": 2.8325, "step": 49861 }, { "epoch": 2.44, "grad_norm": 0.7859975099563599, "learning_rate": 4.9496636531920484e-05, "loss": 2.9611, "step": 49862 }, { "epoch": 2.44, "grad_norm": 0.7750837206840515, "learning_rate": 4.948816537086133e-05, "loss": 3.0251, "step": 49863 }, { "epoch": 2.44, "grad_norm": 0.7656334638595581, "learning_rate": 4.9479694869602123e-05, "loss": 2.9598, "step": 49864 }, { "epoch": 2.44, "grad_norm": 0.720266580581665, "learning_rate": 4.9471225028165285e-05, "loss": 2.8096, "step": 49865 }, { "epoch": 2.44, "grad_norm": 0.7152171730995178, "learning_rate": 4.946275584657292e-05, "loss": 3.0708, "step": 49866 }, { "epoch": 2.44, "grad_norm": 0.7371180653572083, "learning_rate": 4.945428732484752e-05, "loss": 2.7161, "step": 49867 }, { "epoch": 2.44, "grad_norm": 0.7790018916130066, "learning_rate": 4.9445819463011294e-05, "loss": 2.8841, "step": 49868 }, { "epoch": 2.44, "grad_norm": 0.7312780022621155, "learning_rate": 4.9437352261086594e-05, "loss": 3.1128, "step": 49869 }, { "epoch": 2.44, "grad_norm": 0.7291393876075745, "learning_rate": 4.94288857190956e-05, "loss": 3.0187, "step": 49870 }, { "epoch": 2.44, "grad_norm": 0.7577651739120483, "learning_rate": 4.94204198370607e-05, "loss": 2.9457, "step": 49871 }, { "epoch": 2.44, "grad_norm": 0.7718584537506104, "learning_rate": 4.941195461500424e-05, "loss": 2.8328, "step": 49872 }, { "epoch": 2.44, "grad_norm": 0.7502276301383972, "learning_rate": 4.940349005294839e-05, "loss": 2.9141, "step": 49873 }, { "epoch": 2.44, "grad_norm": 0.7148169279098511, "learning_rate": 4.939502615091562e-05, "loss": 2.9375, "step": 49874 }, { "epoch": 2.44, "grad_norm": 0.7954051494598389, "learning_rate": 4.938656290892803e-05, "loss": 2.9624, "step": 49875 }, { "epoch": 2.44, "grad_norm": 0.7510395646095276, "learning_rate": 4.937810032700811e-05, "loss": 2.9211, "step": 49876 }, { "epoch": 2.44, "grad_norm": 0.7023428678512573, "learning_rate": 4.9369638405178e-05, "loss": 3.2698, "step": 49877 }, { "epoch": 2.44, "grad_norm": 0.7281398773193359, "learning_rate": 4.936117714345996e-05, "loss": 2.9131, "step": 49878 }, { "epoch": 2.44, "grad_norm": 0.7942187190055847, "learning_rate": 4.9352716541876436e-05, "loss": 2.9639, "step": 49879 }, { "epoch": 2.44, "grad_norm": 0.70033860206604, "learning_rate": 4.934425660044951e-05, "loss": 2.9204, "step": 49880 }, { "epoch": 2.44, "grad_norm": 0.7631633281707764, "learning_rate": 4.933579731920162e-05, "loss": 3.0097, "step": 49881 }, { "epoch": 2.44, "grad_norm": 0.7145242691040039, "learning_rate": 4.932733869815503e-05, "loss": 2.8329, "step": 49882 }, { "epoch": 2.44, "grad_norm": 0.7550216913223267, "learning_rate": 4.9318880737332015e-05, "loss": 2.8755, "step": 49883 }, { "epoch": 2.44, "grad_norm": 0.7773282527923584, "learning_rate": 4.931042343675482e-05, "loss": 2.9673, "step": 49884 }, { "epoch": 2.44, "grad_norm": 0.7082733511924744, "learning_rate": 4.9301966796445636e-05, "loss": 2.9873, "step": 49885 }, { "epoch": 2.44, "grad_norm": 0.6855611801147461, "learning_rate": 4.9293510816426807e-05, "loss": 2.7685, "step": 49886 }, { "epoch": 2.44, "grad_norm": 0.696873664855957, "learning_rate": 4.928505549672072e-05, "loss": 3.0811, "step": 49887 }, { "epoch": 2.44, "grad_norm": 0.7408643364906311, "learning_rate": 4.927660083734948e-05, "loss": 2.7622, "step": 49888 }, { "epoch": 2.44, "grad_norm": 0.7321268916130066, "learning_rate": 4.926814683833547e-05, "loss": 2.9631, "step": 49889 }, { "epoch": 2.44, "grad_norm": 0.7376499176025391, "learning_rate": 4.925969349970093e-05, "loss": 3.2182, "step": 49890 }, { "epoch": 2.45, "grad_norm": 0.6759738922119141, "learning_rate": 4.925124082146804e-05, "loss": 2.8273, "step": 49891 }, { "epoch": 2.45, "grad_norm": 0.7222961783409119, "learning_rate": 4.924278880365917e-05, "loss": 2.7853, "step": 49892 }, { "epoch": 2.45, "grad_norm": 0.7377529740333557, "learning_rate": 4.923433744629647e-05, "loss": 3.1511, "step": 49893 }, { "epoch": 2.45, "grad_norm": 0.8030790686607361, "learning_rate": 4.922588674940231e-05, "loss": 2.9492, "step": 49894 }, { "epoch": 2.45, "grad_norm": 0.7189480066299438, "learning_rate": 4.921743671299888e-05, "loss": 2.8765, "step": 49895 }, { "epoch": 2.45, "grad_norm": 0.7487554550170898, "learning_rate": 4.920898733710843e-05, "loss": 2.9909, "step": 49896 }, { "epoch": 2.45, "grad_norm": 0.7432091236114502, "learning_rate": 4.92005386217533e-05, "loss": 2.8594, "step": 49897 }, { "epoch": 2.45, "grad_norm": 0.7807390093803406, "learning_rate": 4.919209056695572e-05, "loss": 3.0344, "step": 49898 }, { "epoch": 2.45, "grad_norm": 0.7431802749633789, "learning_rate": 4.918364317273786e-05, "loss": 2.9692, "step": 49899 }, { "epoch": 2.45, "grad_norm": 0.7242401242256165, "learning_rate": 4.917519643912198e-05, "loss": 2.6837, "step": 49900 }, { "epoch": 2.45, "grad_norm": 0.7620343565940857, "learning_rate": 4.9166750366130345e-05, "loss": 2.7063, "step": 49901 }, { "epoch": 2.45, "grad_norm": 0.7530914545059204, "learning_rate": 4.9158304953785286e-05, "loss": 2.923, "step": 49902 }, { "epoch": 2.45, "grad_norm": 0.7380638122558594, "learning_rate": 4.914986020210892e-05, "loss": 3.0402, "step": 49903 }, { "epoch": 2.45, "grad_norm": 0.7419988512992859, "learning_rate": 4.914141611112361e-05, "loss": 2.7586, "step": 49904 }, { "epoch": 2.45, "grad_norm": 0.7261704802513123, "learning_rate": 4.913297268085152e-05, "loss": 3.0975, "step": 49905 }, { "epoch": 2.45, "grad_norm": 0.7934512495994568, "learning_rate": 4.912452991131483e-05, "loss": 2.8156, "step": 49906 }, { "epoch": 2.45, "grad_norm": 0.7128746509552002, "learning_rate": 4.911608780253593e-05, "loss": 2.8872, "step": 49907 }, { "epoch": 2.45, "grad_norm": 0.7485001683235168, "learning_rate": 4.910764635453689e-05, "loss": 2.9185, "step": 49908 }, { "epoch": 2.45, "grad_norm": 0.7733572721481323, "learning_rate": 4.909920556734009e-05, "loss": 2.8178, "step": 49909 }, { "epoch": 2.45, "grad_norm": 0.7357202172279358, "learning_rate": 4.909076544096763e-05, "loss": 2.7962, "step": 49910 }, { "epoch": 2.45, "grad_norm": 1.1617584228515625, "learning_rate": 4.9082325975441805e-05, "loss": 3.1216, "step": 49911 }, { "epoch": 2.45, "grad_norm": 0.7278109788894653, "learning_rate": 4.907388717078498e-05, "loss": 3.0274, "step": 49912 }, { "epoch": 2.45, "grad_norm": 0.7125959992408752, "learning_rate": 4.9065449027019125e-05, "loss": 2.8246, "step": 49913 }, { "epoch": 2.45, "grad_norm": 0.8183368444442749, "learning_rate": 4.9057011544166625e-05, "loss": 2.6864, "step": 49914 }, { "epoch": 2.45, "grad_norm": 0.7317843437194824, "learning_rate": 4.90485747222496e-05, "loss": 2.7996, "step": 49915 }, { "epoch": 2.45, "grad_norm": 0.8040397763252258, "learning_rate": 4.904013856129036e-05, "loss": 3.1255, "step": 49916 }, { "epoch": 2.45, "grad_norm": 0.6812474131584167, "learning_rate": 4.903170306131113e-05, "loss": 2.9811, "step": 49917 }, { "epoch": 2.45, "grad_norm": 0.7134155631065369, "learning_rate": 4.9023268222334054e-05, "loss": 2.9236, "step": 49918 }, { "epoch": 2.45, "grad_norm": 0.730156421661377, "learning_rate": 4.901483404438145e-05, "loss": 3.0052, "step": 49919 }, { "epoch": 2.45, "grad_norm": 0.7486832737922668, "learning_rate": 4.9006400527475455e-05, "loss": 2.9662, "step": 49920 }, { "epoch": 2.45, "grad_norm": 0.7389925122261047, "learning_rate": 4.8997967671638225e-05, "loss": 2.913, "step": 49921 }, { "epoch": 2.45, "grad_norm": 0.7639115452766418, "learning_rate": 4.898953547689212e-05, "loss": 2.9612, "step": 49922 }, { "epoch": 2.45, "grad_norm": 0.8156792521476746, "learning_rate": 4.89811039432592e-05, "loss": 3.0135, "step": 49923 }, { "epoch": 2.45, "grad_norm": 0.7284063696861267, "learning_rate": 4.8972673070761836e-05, "loss": 2.9221, "step": 49924 }, { "epoch": 2.45, "grad_norm": 0.7258008122444153, "learning_rate": 4.8964242859422033e-05, "loss": 2.9088, "step": 49925 }, { "epoch": 2.45, "grad_norm": 0.7349685430526733, "learning_rate": 4.8955813309262204e-05, "loss": 3.1959, "step": 49926 }, { "epoch": 2.45, "grad_norm": 0.7146064043045044, "learning_rate": 4.894738442030444e-05, "loss": 2.7013, "step": 49927 }, { "epoch": 2.45, "grad_norm": 0.75479656457901, "learning_rate": 4.8938956192570847e-05, "loss": 3.0321, "step": 49928 }, { "epoch": 2.45, "grad_norm": 0.7491357922554016, "learning_rate": 4.8930528626083846e-05, "loss": 2.6871, "step": 49929 }, { "epoch": 2.45, "grad_norm": 0.7528855800628662, "learning_rate": 4.8922101720865384e-05, "loss": 2.7284, "step": 49930 }, { "epoch": 2.45, "grad_norm": 0.7925986647605896, "learning_rate": 4.891367547693784e-05, "loss": 2.9621, "step": 49931 }, { "epoch": 2.45, "grad_norm": 0.7412816882133484, "learning_rate": 4.890524989432341e-05, "loss": 2.8603, "step": 49932 }, { "epoch": 2.45, "grad_norm": 0.7685806751251221, "learning_rate": 4.889682497304412e-05, "loss": 2.7904, "step": 49933 }, { "epoch": 2.45, "grad_norm": 0.7178759574890137, "learning_rate": 4.8888400713122376e-05, "loss": 2.9155, "step": 49934 }, { "epoch": 2.45, "grad_norm": 0.7180420160293579, "learning_rate": 4.887997711458025e-05, "loss": 2.8599, "step": 49935 }, { "epoch": 2.45, "grad_norm": 0.7158687710762024, "learning_rate": 4.887155417743985e-05, "loss": 2.684, "step": 49936 }, { "epoch": 2.45, "grad_norm": 0.7684953212738037, "learning_rate": 4.886313190172354e-05, "loss": 2.7244, "step": 49937 }, { "epoch": 2.45, "grad_norm": 0.7468008399009705, "learning_rate": 4.885471028745332e-05, "loss": 2.9189, "step": 49938 }, { "epoch": 2.45, "grad_norm": 0.7421904802322388, "learning_rate": 4.8846289334651554e-05, "loss": 2.9611, "step": 49939 }, { "epoch": 2.45, "grad_norm": 0.7222840785980225, "learning_rate": 4.883786904334021e-05, "loss": 2.7801, "step": 49940 }, { "epoch": 2.45, "grad_norm": 0.7169114351272583, "learning_rate": 4.882944941354169e-05, "loss": 3.0289, "step": 49941 }, { "epoch": 2.45, "grad_norm": 0.7277195453643799, "learning_rate": 4.882103044527805e-05, "loss": 2.8314, "step": 49942 }, { "epoch": 2.45, "grad_norm": 0.7513947486877441, "learning_rate": 4.8812612138571426e-05, "loss": 2.7394, "step": 49943 }, { "epoch": 2.45, "grad_norm": 0.7872393727302551, "learning_rate": 4.880419449344406e-05, "loss": 2.8735, "step": 49944 }, { "epoch": 2.45, "grad_norm": 0.8093016743659973, "learning_rate": 4.8795777509918064e-05, "loss": 2.8194, "step": 49945 }, { "epoch": 2.45, "grad_norm": 0.7250414490699768, "learning_rate": 4.878736118801566e-05, "loss": 2.9011, "step": 49946 }, { "epoch": 2.45, "grad_norm": 0.7930007576942444, "learning_rate": 4.877894552775906e-05, "loss": 2.9038, "step": 49947 }, { "epoch": 2.45, "grad_norm": 0.8064557909965515, "learning_rate": 4.877053052917035e-05, "loss": 2.7648, "step": 49948 }, { "epoch": 2.45, "grad_norm": 0.7449289560317993, "learning_rate": 4.876211619227174e-05, "loss": 2.8408, "step": 49949 }, { "epoch": 2.45, "grad_norm": 0.7603309154510498, "learning_rate": 4.875370251708528e-05, "loss": 2.8804, "step": 49950 }, { "epoch": 2.45, "grad_norm": 0.7044733762741089, "learning_rate": 4.874528950363325e-05, "loss": 2.9399, "step": 49951 }, { "epoch": 2.45, "grad_norm": 0.7044036388397217, "learning_rate": 4.873687715193775e-05, "loss": 2.7616, "step": 49952 }, { "epoch": 2.45, "grad_norm": 0.7298387885093689, "learning_rate": 4.8728465462020935e-05, "loss": 2.9534, "step": 49953 }, { "epoch": 2.45, "grad_norm": 0.7330809831619263, "learning_rate": 4.872005443390508e-05, "loss": 3.0583, "step": 49954 }, { "epoch": 2.45, "grad_norm": 0.7165080904960632, "learning_rate": 4.8711644067612145e-05, "loss": 2.9493, "step": 49955 }, { "epoch": 2.45, "grad_norm": 0.7864888310432434, "learning_rate": 4.870323436316446e-05, "loss": 2.9212, "step": 49956 }, { "epoch": 2.45, "grad_norm": 0.7288803458213806, "learning_rate": 4.8694825320584097e-05, "loss": 2.8663, "step": 49957 }, { "epoch": 2.45, "grad_norm": 0.7244741320610046, "learning_rate": 4.8686416939893114e-05, "loss": 2.874, "step": 49958 }, { "epoch": 2.45, "grad_norm": 0.7267864346504211, "learning_rate": 4.8678009221113826e-05, "loss": 2.8159, "step": 49959 }, { "epoch": 2.45, "grad_norm": 0.7464987635612488, "learning_rate": 4.866960216426821e-05, "loss": 2.8817, "step": 49960 }, { "epoch": 2.45, "grad_norm": 0.7408061027526855, "learning_rate": 4.866119576937858e-05, "loss": 2.9639, "step": 49961 }, { "epoch": 2.45, "grad_norm": 0.7390897870063782, "learning_rate": 4.8652790036466925e-05, "loss": 3.0278, "step": 49962 }, { "epoch": 2.45, "grad_norm": 0.7427120804786682, "learning_rate": 4.86443849655555e-05, "loss": 3.0713, "step": 49963 }, { "epoch": 2.45, "grad_norm": 0.7576953172683716, "learning_rate": 4.86359805566664e-05, "loss": 2.905, "step": 49964 }, { "epoch": 2.45, "grad_norm": 0.7212951183319092, "learning_rate": 4.8627576809821654e-05, "loss": 3.0014, "step": 49965 }, { "epoch": 2.45, "grad_norm": 0.761062741279602, "learning_rate": 4.861917372504361e-05, "loss": 2.7335, "step": 49966 }, { "epoch": 2.45, "grad_norm": 0.7463256120681763, "learning_rate": 4.861077130235419e-05, "loss": 2.7925, "step": 49967 }, { "epoch": 2.45, "grad_norm": 0.7250059843063354, "learning_rate": 4.86023695417756e-05, "loss": 2.9038, "step": 49968 }, { "epoch": 2.45, "grad_norm": 0.7397086024284363, "learning_rate": 4.859396844333009e-05, "loss": 3.0888, "step": 49969 }, { "epoch": 2.45, "grad_norm": 0.7244906425476074, "learning_rate": 4.858556800703964e-05, "loss": 3.0914, "step": 49970 }, { "epoch": 2.45, "grad_norm": 0.7587701082229614, "learning_rate": 4.857716823292644e-05, "loss": 2.8498, "step": 49971 }, { "epoch": 2.45, "grad_norm": 0.7259935736656189, "learning_rate": 4.856876912101254e-05, "loss": 2.7492, "step": 49972 }, { "epoch": 2.45, "grad_norm": 0.7055765390396118, "learning_rate": 4.856037067132012e-05, "loss": 3.1046, "step": 49973 }, { "epoch": 2.45, "grad_norm": 0.7304884195327759, "learning_rate": 4.8551972883871313e-05, "loss": 2.8807, "step": 49974 }, { "epoch": 2.45, "grad_norm": 0.7568566203117371, "learning_rate": 4.854357575868819e-05, "loss": 2.9917, "step": 49975 }, { "epoch": 2.45, "grad_norm": 0.7316765189170837, "learning_rate": 4.8535179295792957e-05, "loss": 3.0772, "step": 49976 }, { "epoch": 2.45, "grad_norm": 0.7427469491958618, "learning_rate": 4.8526783495207565e-05, "loss": 2.8457, "step": 49977 }, { "epoch": 2.45, "grad_norm": 0.7722180485725403, "learning_rate": 4.851838835695433e-05, "loss": 3.0185, "step": 49978 }, { "epoch": 2.45, "grad_norm": 0.7619324922561646, "learning_rate": 4.850999388105527e-05, "loss": 2.7384, "step": 49979 }, { "epoch": 2.45, "grad_norm": 0.7218577861785889, "learning_rate": 4.850160006753239e-05, "loss": 2.9204, "step": 49980 }, { "epoch": 2.45, "grad_norm": 0.7448925375938416, "learning_rate": 4.8493206916407976e-05, "loss": 2.8308, "step": 49981 }, { "epoch": 2.45, "grad_norm": 0.7064295411109924, "learning_rate": 4.8484814427703986e-05, "loss": 3.0217, "step": 49982 }, { "epoch": 2.45, "grad_norm": 0.7662560939788818, "learning_rate": 4.8476422601442564e-05, "loss": 2.6942, "step": 49983 }, { "epoch": 2.45, "grad_norm": 0.7750674486160278, "learning_rate": 4.846803143764593e-05, "loss": 2.761, "step": 49984 }, { "epoch": 2.45, "grad_norm": 0.7387879490852356, "learning_rate": 4.845964093633612e-05, "loss": 2.8892, "step": 49985 }, { "epoch": 2.45, "grad_norm": 0.7031064033508301, "learning_rate": 4.845125109753516e-05, "loss": 3.0094, "step": 49986 }, { "epoch": 2.45, "grad_norm": 0.7086855173110962, "learning_rate": 4.844286192126513e-05, "loss": 2.8456, "step": 49987 }, { "epoch": 2.45, "grad_norm": 0.7467719912528992, "learning_rate": 4.843447340754821e-05, "loss": 2.914, "step": 49988 }, { "epoch": 2.45, "grad_norm": 0.7186830639839172, "learning_rate": 4.8426085556406523e-05, "loss": 3.1394, "step": 49989 }, { "epoch": 2.45, "grad_norm": 0.7500671744346619, "learning_rate": 4.841769836786205e-05, "loss": 2.966, "step": 49990 }, { "epoch": 2.45, "grad_norm": 0.7453409433364868, "learning_rate": 4.840931184193703e-05, "loss": 2.7501, "step": 49991 }, { "epoch": 2.45, "grad_norm": 0.7155488133430481, "learning_rate": 4.840092597865342e-05, "loss": 2.8779, "step": 49992 }, { "epoch": 2.45, "grad_norm": 0.7144157290458679, "learning_rate": 4.8392540778033295e-05, "loss": 3.0127, "step": 49993 }, { "epoch": 2.45, "grad_norm": 0.7175836563110352, "learning_rate": 4.838415624009888e-05, "loss": 2.9094, "step": 49994 }, { "epoch": 2.45, "grad_norm": 0.7162435054779053, "learning_rate": 4.837577236487209e-05, "loss": 2.8378, "step": 49995 }, { "epoch": 2.45, "grad_norm": 0.7388350963592529, "learning_rate": 4.8367389152375166e-05, "loss": 2.8836, "step": 49996 }, { "epoch": 2.45, "grad_norm": 0.7505469918251038, "learning_rate": 4.835900660263003e-05, "loss": 2.8015, "step": 49997 }, { "epoch": 2.45, "grad_norm": 0.734264612197876, "learning_rate": 4.835062471565887e-05, "loss": 2.9549, "step": 49998 }, { "epoch": 2.45, "grad_norm": 0.753314733505249, "learning_rate": 4.834224349148379e-05, "loss": 2.9851, "step": 49999 }, { "epoch": 2.45, "grad_norm": 0.7283154726028442, "learning_rate": 4.833386293012681e-05, "loss": 2.9575, "step": 50000 }, { "epoch": 2.45, "grad_norm": 0.6950429081916809, "learning_rate": 4.832548303160998e-05, "loss": 3.0719, "step": 50001 }, { "epoch": 2.45, "grad_norm": 0.7324820756912231, "learning_rate": 4.831710379595535e-05, "loss": 2.906, "step": 50002 }, { "epoch": 2.45, "grad_norm": 0.7295659780502319, "learning_rate": 4.8308725223185e-05, "loss": 2.8802, "step": 50003 }, { "epoch": 2.45, "grad_norm": 0.7294290661811829, "learning_rate": 4.830034731332111e-05, "loss": 3.0532, "step": 50004 }, { "epoch": 2.45, "grad_norm": 0.764670729637146, "learning_rate": 4.82919700663856e-05, "loss": 3.0021, "step": 50005 }, { "epoch": 2.45, "grad_norm": 0.7712165713310242, "learning_rate": 4.828359348240069e-05, "loss": 2.8191, "step": 50006 }, { "epoch": 2.45, "grad_norm": 0.749573290348053, "learning_rate": 4.827521756138831e-05, "loss": 3.0938, "step": 50007 }, { "epoch": 2.45, "grad_norm": 0.7555610537528992, "learning_rate": 4.8266842303370534e-05, "loss": 2.7916, "step": 50008 }, { "epoch": 2.45, "grad_norm": 0.716448187828064, "learning_rate": 4.8258467708369495e-05, "loss": 3.0179, "step": 50009 }, { "epoch": 2.45, "grad_norm": 0.7162739038467407, "learning_rate": 4.825009377640712e-05, "loss": 2.8407, "step": 50010 }, { "epoch": 2.45, "grad_norm": 0.7964804768562317, "learning_rate": 4.824172050750564e-05, "loss": 2.8375, "step": 50011 }, { "epoch": 2.45, "grad_norm": 0.7051910161972046, "learning_rate": 4.823334790168693e-05, "loss": 2.7792, "step": 50012 }, { "epoch": 2.45, "grad_norm": 0.7237557768821716, "learning_rate": 4.822497595897315e-05, "loss": 2.9245, "step": 50013 }, { "epoch": 2.45, "grad_norm": 0.7709342837333679, "learning_rate": 4.821660467938645e-05, "loss": 2.8221, "step": 50014 }, { "epoch": 2.45, "grad_norm": 0.7040632367134094, "learning_rate": 4.8208234062948626e-05, "loss": 2.7416, "step": 50015 }, { "epoch": 2.45, "grad_norm": 0.6828323602676392, "learning_rate": 4.819986410968192e-05, "loss": 2.9322, "step": 50016 }, { "epoch": 2.45, "grad_norm": 0.7632225751876831, "learning_rate": 4.819149481960825e-05, "loss": 2.7827, "step": 50017 }, { "epoch": 2.45, "grad_norm": 0.7432723641395569, "learning_rate": 4.8183126192749686e-05, "loss": 2.7461, "step": 50018 }, { "epoch": 2.45, "grad_norm": 0.7697473764419556, "learning_rate": 4.81747582291284e-05, "loss": 2.8574, "step": 50019 }, { "epoch": 2.45, "grad_norm": 0.6835653185844421, "learning_rate": 4.8166390928766286e-05, "loss": 2.9083, "step": 50020 }, { "epoch": 2.45, "grad_norm": 0.6899628639221191, "learning_rate": 4.815802429168547e-05, "loss": 2.7771, "step": 50021 }, { "epoch": 2.45, "grad_norm": 0.7158491015434265, "learning_rate": 4.814965831790797e-05, "loss": 2.8758, "step": 50022 }, { "epoch": 2.45, "grad_norm": 0.7541047930717468, "learning_rate": 4.8141293007455727e-05, "loss": 2.8541, "step": 50023 }, { "epoch": 2.45, "grad_norm": 0.7383518815040588, "learning_rate": 4.8132928360350934e-05, "loss": 2.932, "step": 50024 }, { "epoch": 2.45, "grad_norm": 0.736765444278717, "learning_rate": 4.8124564376615424e-05, "loss": 2.6686, "step": 50025 }, { "epoch": 2.45, "grad_norm": 0.7256408929824829, "learning_rate": 4.811620105627143e-05, "loss": 3.1262, "step": 50026 }, { "epoch": 2.45, "grad_norm": 0.7852066159248352, "learning_rate": 4.810783839934082e-05, "loss": 2.8989, "step": 50027 }, { "epoch": 2.45, "grad_norm": 0.7440248131752014, "learning_rate": 4.809947640584576e-05, "loss": 2.8755, "step": 50028 }, { "epoch": 2.45, "grad_norm": 0.7258433699607849, "learning_rate": 4.809111507580819e-05, "loss": 2.9438, "step": 50029 }, { "epoch": 2.45, "grad_norm": 0.7557896375656128, "learning_rate": 4.808275440925005e-05, "loss": 2.8328, "step": 50030 }, { "epoch": 2.45, "grad_norm": 0.7530323266983032, "learning_rate": 4.8074394406193564e-05, "loss": 2.973, "step": 50031 }, { "epoch": 2.45, "grad_norm": 0.7720298767089844, "learning_rate": 4.8066035066660544e-05, "loss": 2.9985, "step": 50032 }, { "epoch": 2.45, "grad_norm": 0.7010929584503174, "learning_rate": 4.8057676390673184e-05, "loss": 2.9103, "step": 50033 }, { "epoch": 2.45, "grad_norm": 0.8532752990722656, "learning_rate": 4.804931837825332e-05, "loss": 2.9676, "step": 50034 }, { "epoch": 2.45, "grad_norm": 0.7763757705688477, "learning_rate": 4.804096102942311e-05, "loss": 2.7373, "step": 50035 }, { "epoch": 2.45, "grad_norm": 0.7673393487930298, "learning_rate": 4.803260434420456e-05, "loss": 2.762, "step": 50036 }, { "epoch": 2.45, "grad_norm": 0.722378134727478, "learning_rate": 4.802424832261963e-05, "loss": 3.0049, "step": 50037 }, { "epoch": 2.45, "grad_norm": 0.7699034810066223, "learning_rate": 4.8015892964690364e-05, "loss": 3.0195, "step": 50038 }, { "epoch": 2.45, "grad_norm": 0.8004751801490784, "learning_rate": 4.800753827043864e-05, "loss": 2.8241, "step": 50039 }, { "epoch": 2.45, "grad_norm": 0.7232542037963867, "learning_rate": 4.799918423988658e-05, "loss": 2.8742, "step": 50040 }, { "epoch": 2.45, "grad_norm": 0.778252363204956, "learning_rate": 4.799083087305623e-05, "loss": 2.7134, "step": 50041 }, { "epoch": 2.45, "grad_norm": 0.7834492325782776, "learning_rate": 4.798247816996948e-05, "loss": 2.9848, "step": 50042 }, { "epoch": 2.45, "grad_norm": 0.7058895230293274, "learning_rate": 4.7974126130648425e-05, "loss": 3.0141, "step": 50043 }, { "epoch": 2.45, "grad_norm": 0.7730516791343689, "learning_rate": 4.7965774755115035e-05, "loss": 2.8361, "step": 50044 }, { "epoch": 2.45, "grad_norm": 0.7547017931938171, "learning_rate": 4.795742404339122e-05, "loss": 2.7368, "step": 50045 }, { "epoch": 2.45, "grad_norm": 0.747089684009552, "learning_rate": 4.79490739954991e-05, "loss": 2.7331, "step": 50046 }, { "epoch": 2.45, "grad_norm": 0.7834062576293945, "learning_rate": 4.794072461146055e-05, "loss": 2.8744, "step": 50047 }, { "epoch": 2.45, "grad_norm": 0.7755687236785889, "learning_rate": 4.793237589129772e-05, "loss": 2.6205, "step": 50048 }, { "epoch": 2.45, "grad_norm": 0.7240215539932251, "learning_rate": 4.7924027835032385e-05, "loss": 3.0052, "step": 50049 }, { "epoch": 2.45, "grad_norm": 0.7895364165306091, "learning_rate": 4.7915680442686745e-05, "loss": 3.0121, "step": 50050 }, { "epoch": 2.45, "grad_norm": 0.7438759207725525, "learning_rate": 4.790733371428267e-05, "loss": 2.6353, "step": 50051 }, { "epoch": 2.45, "grad_norm": 0.7612919211387634, "learning_rate": 4.789898764984211e-05, "loss": 2.974, "step": 50052 }, { "epoch": 2.45, "grad_norm": 0.7200536727905273, "learning_rate": 4.7890642249387154e-05, "loss": 2.8291, "step": 50053 }, { "epoch": 2.45, "grad_norm": 0.744355320930481, "learning_rate": 4.788229751293965e-05, "loss": 2.8287, "step": 50054 }, { "epoch": 2.45, "grad_norm": 0.7230464816093445, "learning_rate": 4.787395344052167e-05, "loss": 2.8453, "step": 50055 }, { "epoch": 2.45, "grad_norm": 0.7152355909347534, "learning_rate": 4.7865610032155246e-05, "loss": 2.9215, "step": 50056 }, { "epoch": 2.45, "grad_norm": 0.7902154922485352, "learning_rate": 4.785726728786221e-05, "loss": 2.7708, "step": 50057 }, { "epoch": 2.45, "grad_norm": 0.7618465423583984, "learning_rate": 4.784892520766466e-05, "loss": 2.8228, "step": 50058 }, { "epoch": 2.45, "grad_norm": 0.8009930849075317, "learning_rate": 4.784058379158453e-05, "loss": 2.6846, "step": 50059 }, { "epoch": 2.45, "grad_norm": 0.7363435626029968, "learning_rate": 4.7832243039643714e-05, "loss": 2.7056, "step": 50060 }, { "epoch": 2.45, "grad_norm": 0.7347774505615234, "learning_rate": 4.782390295186429e-05, "loss": 2.8223, "step": 50061 }, { "epoch": 2.45, "grad_norm": 0.7615084648132324, "learning_rate": 4.7815563528268105e-05, "loss": 3.0836, "step": 50062 }, { "epoch": 2.45, "grad_norm": 0.7269819378852844, "learning_rate": 4.780722476887725e-05, "loss": 2.7853, "step": 50063 }, { "epoch": 2.45, "grad_norm": 0.7375494837760925, "learning_rate": 4.779888667371356e-05, "loss": 2.589, "step": 50064 }, { "epoch": 2.45, "grad_norm": 0.7187495827674866, "learning_rate": 4.779054924279917e-05, "loss": 2.8879, "step": 50065 }, { "epoch": 2.45, "grad_norm": 0.7238545417785645, "learning_rate": 4.778221247615591e-05, "loss": 2.8817, "step": 50066 }, { "epoch": 2.45, "grad_norm": 0.7386276125907898, "learning_rate": 4.7773876373805676e-05, "loss": 2.8928, "step": 50067 }, { "epoch": 2.45, "grad_norm": 0.720185399055481, "learning_rate": 4.776554093577061e-05, "loss": 2.9978, "step": 50068 }, { "epoch": 2.45, "grad_norm": 0.7330771684646606, "learning_rate": 4.775720616207249e-05, "loss": 2.6761, "step": 50069 }, { "epoch": 2.45, "grad_norm": 0.6842424273490906, "learning_rate": 4.774887205273335e-05, "loss": 2.7894, "step": 50070 }, { "epoch": 2.45, "grad_norm": 0.7490212321281433, "learning_rate": 4.774053860777518e-05, "loss": 2.9307, "step": 50071 }, { "epoch": 2.45, "grad_norm": 0.7413821220397949, "learning_rate": 4.773220582721988e-05, "loss": 2.7725, "step": 50072 }, { "epoch": 2.45, "grad_norm": 0.6934046149253845, "learning_rate": 4.7723873711089436e-05, "loss": 2.8426, "step": 50073 }, { "epoch": 2.45, "grad_norm": 0.7559138536453247, "learning_rate": 4.7715542259405645e-05, "loss": 2.9685, "step": 50074 }, { "epoch": 2.45, "grad_norm": 0.7740827798843384, "learning_rate": 4.770721147219061e-05, "loss": 3.0002, "step": 50075 }, { "epoch": 2.45, "grad_norm": 0.7439709305763245, "learning_rate": 4.7698881349466265e-05, "loss": 3.0176, "step": 50076 }, { "epoch": 2.45, "grad_norm": 0.7715969085693359, "learning_rate": 4.7690551891254436e-05, "loss": 2.862, "step": 50077 }, { "epoch": 2.45, "grad_norm": 0.7283157110214233, "learning_rate": 4.76822230975772e-05, "loss": 3.0025, "step": 50078 }, { "epoch": 2.45, "grad_norm": 0.7790409326553345, "learning_rate": 4.7673894968456375e-05, "loss": 2.9668, "step": 50079 }, { "epoch": 2.45, "grad_norm": 0.7363762259483337, "learning_rate": 4.766556750391404e-05, "loss": 2.8291, "step": 50080 }, { "epoch": 2.45, "grad_norm": 0.723741888999939, "learning_rate": 4.765724070397201e-05, "loss": 3.0666, "step": 50081 }, { "epoch": 2.45, "grad_norm": 0.7611827254295349, "learning_rate": 4.7648914568652176e-05, "loss": 2.9559, "step": 50082 }, { "epoch": 2.45, "grad_norm": 0.7597978115081787, "learning_rate": 4.764058909797661e-05, "loss": 2.7526, "step": 50083 }, { "epoch": 2.45, "grad_norm": 0.7658380270004272, "learning_rate": 4.763226429196707e-05, "loss": 2.6695, "step": 50084 }, { "epoch": 2.45, "grad_norm": 0.7255105972290039, "learning_rate": 4.76239401506456e-05, "loss": 2.7215, "step": 50085 }, { "epoch": 2.45, "grad_norm": 0.715186595916748, "learning_rate": 4.761561667403418e-05, "loss": 2.8727, "step": 50086 }, { "epoch": 2.45, "grad_norm": 0.7413427233695984, "learning_rate": 4.760729386215464e-05, "loss": 2.8856, "step": 50087 }, { "epoch": 2.45, "grad_norm": 0.7388500571250916, "learning_rate": 4.759897171502891e-05, "loss": 3.0116, "step": 50088 }, { "epoch": 2.45, "grad_norm": 0.6811955571174622, "learning_rate": 4.759065023267885e-05, "loss": 3.0865, "step": 50089 }, { "epoch": 2.45, "grad_norm": 0.7523310780525208, "learning_rate": 4.758232941512644e-05, "loss": 2.9596, "step": 50090 }, { "epoch": 2.45, "grad_norm": 0.7621850371360779, "learning_rate": 4.757400926239366e-05, "loss": 2.8558, "step": 50091 }, { "epoch": 2.45, "grad_norm": 0.697986900806427, "learning_rate": 4.75656897745023e-05, "loss": 2.8994, "step": 50092 }, { "epoch": 2.45, "grad_norm": 0.722909688949585, "learning_rate": 4.7557370951474404e-05, "loss": 2.8762, "step": 50093 }, { "epoch": 2.45, "grad_norm": 0.7483739852905273, "learning_rate": 4.754905279333182e-05, "loss": 2.8685, "step": 50094 }, { "epoch": 2.46, "grad_norm": 0.7431113123893738, "learning_rate": 4.754073530009633e-05, "loss": 2.8286, "step": 50095 }, { "epoch": 2.46, "grad_norm": 0.750974178314209, "learning_rate": 4.7532418471790055e-05, "loss": 3.0905, "step": 50096 }, { "epoch": 2.46, "grad_norm": 0.7384655475616455, "learning_rate": 4.752410230843474e-05, "loss": 2.9023, "step": 50097 }, { "epoch": 2.46, "grad_norm": 0.7771185636520386, "learning_rate": 4.75157868100524e-05, "loss": 2.6056, "step": 50098 }, { "epoch": 2.46, "grad_norm": 0.7554793357849121, "learning_rate": 4.750747197666482e-05, "loss": 2.7112, "step": 50099 }, { "epoch": 2.46, "grad_norm": 0.7633035182952881, "learning_rate": 4.749915780829399e-05, "loss": 2.9715, "step": 50100 }, { "epoch": 2.46, "grad_norm": 0.7684073448181152, "learning_rate": 4.749084430496184e-05, "loss": 3.0109, "step": 50101 }, { "epoch": 2.46, "grad_norm": 0.8110712170600891, "learning_rate": 4.74825314666902e-05, "loss": 2.9253, "step": 50102 }, { "epoch": 2.46, "grad_norm": 0.6965272426605225, "learning_rate": 4.747421929350098e-05, "loss": 2.757, "step": 50103 }, { "epoch": 2.46, "grad_norm": 0.834354817867279, "learning_rate": 4.7465907785416005e-05, "loss": 2.9619, "step": 50104 }, { "epoch": 2.46, "grad_norm": 0.7369504570960999, "learning_rate": 4.7457596942457274e-05, "loss": 3.0316, "step": 50105 }, { "epoch": 2.46, "grad_norm": 0.727476954460144, "learning_rate": 4.744928676464659e-05, "loss": 2.924, "step": 50106 }, { "epoch": 2.46, "grad_norm": 0.7420288920402527, "learning_rate": 4.744097725200585e-05, "loss": 2.7727, "step": 50107 }, { "epoch": 2.46, "grad_norm": 0.7241715788841248, "learning_rate": 4.7432668404557054e-05, "loss": 2.7901, "step": 50108 }, { "epoch": 2.46, "grad_norm": 0.70522540807724, "learning_rate": 4.7424360222322e-05, "loss": 2.6263, "step": 50109 }, { "epoch": 2.46, "grad_norm": 0.7506316304206848, "learning_rate": 4.7416052705322584e-05, "loss": 3.0343, "step": 50110 }, { "epoch": 2.46, "grad_norm": 0.7315349578857422, "learning_rate": 4.740774585358058e-05, "loss": 3.0413, "step": 50111 }, { "epoch": 2.46, "grad_norm": 0.7280415892601013, "learning_rate": 4.739943966711798e-05, "loss": 2.9838, "step": 50112 }, { "epoch": 2.46, "grad_norm": 0.7337613701820374, "learning_rate": 4.739113414595672e-05, "loss": 3.1778, "step": 50113 }, { "epoch": 2.46, "grad_norm": 0.7724540829658508, "learning_rate": 4.73828292901185e-05, "loss": 2.7373, "step": 50114 }, { "epoch": 2.46, "grad_norm": 0.728657603263855, "learning_rate": 4.737452509962538e-05, "loss": 2.6146, "step": 50115 }, { "epoch": 2.46, "grad_norm": 0.774933397769928, "learning_rate": 4.736622157449913e-05, "loss": 2.9025, "step": 50116 }, { "epoch": 2.46, "grad_norm": 0.7137971520423889, "learning_rate": 4.735791871476159e-05, "loss": 2.8057, "step": 50117 }, { "epoch": 2.46, "grad_norm": 0.723777711391449, "learning_rate": 4.734961652043471e-05, "loss": 2.9628, "step": 50118 }, { "epoch": 2.46, "grad_norm": 0.7265045642852783, "learning_rate": 4.734131499154027e-05, "loss": 2.6844, "step": 50119 }, { "epoch": 2.46, "grad_norm": 0.7293531894683838, "learning_rate": 4.733301412810022e-05, "loss": 2.9703, "step": 50120 }, { "epoch": 2.46, "grad_norm": 0.7541471719741821, "learning_rate": 4.7324713930136346e-05, "loss": 2.9009, "step": 50121 }, { "epoch": 2.46, "grad_norm": 0.7486446499824524, "learning_rate": 4.7316414397670535e-05, "loss": 2.8286, "step": 50122 }, { "epoch": 2.46, "grad_norm": 0.7592146396636963, "learning_rate": 4.730811553072472e-05, "loss": 2.7649, "step": 50123 }, { "epoch": 2.46, "grad_norm": 0.7211746573448181, "learning_rate": 4.72998173293207e-05, "loss": 2.611, "step": 50124 }, { "epoch": 2.46, "grad_norm": 0.7064350843429565, "learning_rate": 4.729151979348032e-05, "loss": 2.9046, "step": 50125 }, { "epoch": 2.46, "grad_norm": 0.7288532257080078, "learning_rate": 4.7283222923225385e-05, "loss": 2.9871, "step": 50126 }, { "epoch": 2.46, "grad_norm": 0.7138639092445374, "learning_rate": 4.727492671857782e-05, "loss": 3.0573, "step": 50127 }, { "epoch": 2.46, "grad_norm": 0.7234009504318237, "learning_rate": 4.726663117955951e-05, "loss": 2.9079, "step": 50128 }, { "epoch": 2.46, "grad_norm": 0.7255566716194153, "learning_rate": 4.725833630619217e-05, "loss": 2.8949, "step": 50129 }, { "epoch": 2.46, "grad_norm": 0.7464993596076965, "learning_rate": 4.725004209849784e-05, "loss": 2.8752, "step": 50130 }, { "epoch": 2.46, "grad_norm": 0.7366446256637573, "learning_rate": 4.724174855649822e-05, "loss": 2.8406, "step": 50131 }, { "epoch": 2.46, "grad_norm": 0.7235312461853027, "learning_rate": 4.723345568021514e-05, "loss": 2.7292, "step": 50132 }, { "epoch": 2.46, "grad_norm": 0.7639561891555786, "learning_rate": 4.7225163469670555e-05, "loss": 2.93, "step": 50133 }, { "epoch": 2.46, "grad_norm": 0.7084791660308838, "learning_rate": 4.721687192488616e-05, "loss": 3.0532, "step": 50134 }, { "epoch": 2.46, "grad_norm": 0.7618387937545776, "learning_rate": 4.720858104588399e-05, "loss": 2.9025, "step": 50135 }, { "epoch": 2.46, "grad_norm": 0.7836553454399109, "learning_rate": 4.720029083268566e-05, "loss": 2.9431, "step": 50136 }, { "epoch": 2.46, "grad_norm": 0.7372074127197266, "learning_rate": 4.7192001285313095e-05, "loss": 2.4869, "step": 50137 }, { "epoch": 2.46, "grad_norm": 0.7809687852859497, "learning_rate": 4.718371240378824e-05, "loss": 2.9196, "step": 50138 }, { "epoch": 2.46, "grad_norm": 0.7471933960914612, "learning_rate": 4.717542418813285e-05, "loss": 3.0387, "step": 50139 }, { "epoch": 2.46, "grad_norm": 0.7411444187164307, "learning_rate": 4.7167136638368694e-05, "loss": 3.055, "step": 50140 }, { "epoch": 2.46, "grad_norm": 0.7536376118659973, "learning_rate": 4.7158849754517604e-05, "loss": 2.8807, "step": 50141 }, { "epoch": 2.46, "grad_norm": 0.7287087440490723, "learning_rate": 4.715056353660142e-05, "loss": 3.0737, "step": 50142 }, { "epoch": 2.46, "grad_norm": 0.7162652611732483, "learning_rate": 4.7142277984642084e-05, "loss": 3.0043, "step": 50143 }, { "epoch": 2.46, "grad_norm": 0.7836141586303711, "learning_rate": 4.7133993098661226e-05, "loss": 2.9177, "step": 50144 }, { "epoch": 2.46, "grad_norm": 0.7386676669120789, "learning_rate": 4.712570887868086e-05, "loss": 2.6577, "step": 50145 }, { "epoch": 2.46, "grad_norm": 0.7532510757446289, "learning_rate": 4.711742532472269e-05, "loss": 2.8556, "step": 50146 }, { "epoch": 2.46, "grad_norm": 0.7517036199569702, "learning_rate": 4.710914243680851e-05, "loss": 2.9074, "step": 50147 }, { "epoch": 2.46, "grad_norm": 0.7926651239395142, "learning_rate": 4.710086021496024e-05, "loss": 2.6681, "step": 50148 }, { "epoch": 2.46, "grad_norm": 0.7204487919807434, "learning_rate": 4.709257865919954e-05, "loss": 2.8429, "step": 50149 }, { "epoch": 2.46, "grad_norm": 0.7931594252586365, "learning_rate": 4.708429776954838e-05, "loss": 2.6764, "step": 50150 }, { "epoch": 2.46, "grad_norm": 0.7327009439468384, "learning_rate": 4.7076017546028474e-05, "loss": 2.8861, "step": 50151 }, { "epoch": 2.46, "grad_norm": 0.7598114609718323, "learning_rate": 4.7067737988661706e-05, "loss": 2.5865, "step": 50152 }, { "epoch": 2.46, "grad_norm": 0.7243731617927551, "learning_rate": 4.705945909746984e-05, "loss": 2.872, "step": 50153 }, { "epoch": 2.46, "grad_norm": 0.7335036396980286, "learning_rate": 4.705118087247461e-05, "loss": 2.701, "step": 50154 }, { "epoch": 2.46, "grad_norm": 0.7911424040794373, "learning_rate": 4.7042903313697966e-05, "loss": 2.7696, "step": 50155 }, { "epoch": 2.46, "grad_norm": 0.6929256916046143, "learning_rate": 4.703462642116158e-05, "loss": 2.9583, "step": 50156 }, { "epoch": 2.46, "grad_norm": 0.7362402081489563, "learning_rate": 4.7026350194887276e-05, "loss": 3.0049, "step": 50157 }, { "epoch": 2.46, "grad_norm": 0.6989345550537109, "learning_rate": 4.7018074634896966e-05, "loss": 3.0146, "step": 50158 }, { "epoch": 2.46, "grad_norm": 0.7882124781608582, "learning_rate": 4.700979974121226e-05, "loss": 2.9058, "step": 50159 }, { "epoch": 2.46, "grad_norm": 0.7394413948059082, "learning_rate": 4.7001525513855155e-05, "loss": 3.002, "step": 50160 }, { "epoch": 2.46, "grad_norm": 0.7787472009658813, "learning_rate": 4.699325195284732e-05, "loss": 2.9563, "step": 50161 }, { "epoch": 2.46, "grad_norm": 0.7208383083343506, "learning_rate": 4.698497905821052e-05, "loss": 2.9109, "step": 50162 }, { "epoch": 2.46, "grad_norm": 0.7691134214401245, "learning_rate": 4.6976706829966646e-05, "loss": 2.6999, "step": 50163 }, { "epoch": 2.46, "grad_norm": 0.7439152598381042, "learning_rate": 4.696843526813737e-05, "loss": 2.7271, "step": 50164 }, { "epoch": 2.46, "grad_norm": 0.7308996915817261, "learning_rate": 4.69601643727446e-05, "loss": 2.6876, "step": 50165 }, { "epoch": 2.46, "grad_norm": 0.8340305685997009, "learning_rate": 4.695189414380999e-05, "loss": 3.0734, "step": 50166 }, { "epoch": 2.46, "grad_norm": 0.6861405968666077, "learning_rate": 4.694362458135544e-05, "loss": 2.9448, "step": 50167 }, { "epoch": 2.46, "grad_norm": 0.7236627340316772, "learning_rate": 4.693535568540272e-05, "loss": 2.7754, "step": 50168 }, { "epoch": 2.46, "grad_norm": 0.7470681071281433, "learning_rate": 4.6927087455973466e-05, "loss": 2.9247, "step": 50169 }, { "epoch": 2.46, "grad_norm": 0.7589225172996521, "learning_rate": 4.6918819893089667e-05, "loss": 2.902, "step": 50170 }, { "epoch": 2.46, "grad_norm": 0.7107172608375549, "learning_rate": 4.69105529967729e-05, "loss": 2.9717, "step": 50171 }, { "epoch": 2.46, "grad_norm": 0.7193456888198853, "learning_rate": 4.690228676704502e-05, "loss": 2.9345, "step": 50172 }, { "epoch": 2.46, "grad_norm": 0.7701574563980103, "learning_rate": 4.689402120392787e-05, "loss": 2.6803, "step": 50173 }, { "epoch": 2.46, "grad_norm": 0.7131604552268982, "learning_rate": 4.688575630744317e-05, "loss": 3.0103, "step": 50174 }, { "epoch": 2.46, "grad_norm": 0.7473611831665039, "learning_rate": 4.68774920776127e-05, "loss": 2.9097, "step": 50175 }, { "epoch": 2.46, "grad_norm": 0.7232372164726257, "learning_rate": 4.6869228514458076e-05, "loss": 2.8485, "step": 50176 }, { "epoch": 2.46, "grad_norm": 0.7372998595237732, "learning_rate": 4.6860965618001223e-05, "loss": 2.696, "step": 50177 }, { "epoch": 2.46, "grad_norm": 0.7132263779640198, "learning_rate": 4.6852703388263946e-05, "loss": 2.7469, "step": 50178 }, { "epoch": 2.46, "grad_norm": 0.7559736967086792, "learning_rate": 4.684444182526787e-05, "loss": 2.8729, "step": 50179 }, { "epoch": 2.46, "grad_norm": 0.7392177581787109, "learning_rate": 4.6836180929034826e-05, "loss": 2.8812, "step": 50180 }, { "epoch": 2.46, "grad_norm": 0.7340937256813049, "learning_rate": 4.6827920699586555e-05, "loss": 2.896, "step": 50181 }, { "epoch": 2.46, "grad_norm": 0.7307325005531311, "learning_rate": 4.681966113694485e-05, "loss": 2.8654, "step": 50182 }, { "epoch": 2.46, "grad_norm": 0.7325087785720825, "learning_rate": 4.6811402241131445e-05, "loss": 2.8743, "step": 50183 }, { "epoch": 2.46, "grad_norm": 0.7375662326812744, "learning_rate": 4.680314401216798e-05, "loss": 2.886, "step": 50184 }, { "epoch": 2.46, "grad_norm": 0.7653541564941406, "learning_rate": 4.679488645007641e-05, "loss": 2.9494, "step": 50185 }, { "epoch": 2.46, "grad_norm": 0.7421068549156189, "learning_rate": 4.678662955487831e-05, "loss": 3.1609, "step": 50186 }, { "epoch": 2.46, "grad_norm": 0.7914893627166748, "learning_rate": 4.6778373326595534e-05, "loss": 3.062, "step": 50187 }, { "epoch": 2.46, "grad_norm": 0.7131171226501465, "learning_rate": 4.677011776524973e-05, "loss": 2.5862, "step": 50188 }, { "epoch": 2.46, "grad_norm": 0.7471457123756409, "learning_rate": 4.6761862870862776e-05, "loss": 3.0483, "step": 50189 }, { "epoch": 2.46, "grad_norm": 0.7291505932807922, "learning_rate": 4.6753608643456365e-05, "loss": 2.9675, "step": 50190 }, { "epoch": 2.46, "grad_norm": 0.7441790103912354, "learning_rate": 4.674535508305211e-05, "loss": 2.9056, "step": 50191 }, { "epoch": 2.46, "grad_norm": 0.7517664432525635, "learning_rate": 4.6737102189671916e-05, "loss": 2.8592, "step": 50192 }, { "epoch": 2.46, "grad_norm": 0.7586925029754639, "learning_rate": 4.6728849963337415e-05, "loss": 2.7071, "step": 50193 }, { "epoch": 2.46, "grad_norm": 0.754487931728363, "learning_rate": 4.672059840407034e-05, "loss": 3.0674, "step": 50194 }, { "epoch": 2.46, "grad_norm": 0.7265577912330627, "learning_rate": 4.6712347511892556e-05, "loss": 2.9569, "step": 50195 }, { "epoch": 2.46, "grad_norm": 0.7128158807754517, "learning_rate": 4.67040972868257e-05, "loss": 3.096, "step": 50196 }, { "epoch": 2.46, "grad_norm": 0.6750921607017517, "learning_rate": 4.66958477288915e-05, "loss": 2.7767, "step": 50197 }, { "epoch": 2.46, "grad_norm": 0.724058210849762, "learning_rate": 4.668759883811163e-05, "loss": 2.9174, "step": 50198 }, { "epoch": 2.46, "grad_norm": 0.7566876411437988, "learning_rate": 4.667935061450787e-05, "loss": 2.9974, "step": 50199 }, { "epoch": 2.46, "grad_norm": 0.7634623050689697, "learning_rate": 4.667110305810201e-05, "loss": 2.8993, "step": 50200 }, { "epoch": 2.46, "grad_norm": 0.7499482035636902, "learning_rate": 4.666285616891563e-05, "loss": 3.0086, "step": 50201 }, { "epoch": 2.46, "grad_norm": 0.7458711862564087, "learning_rate": 4.6654609946970614e-05, "loss": 3.0176, "step": 50202 }, { "epoch": 2.46, "grad_norm": 0.7508730292320251, "learning_rate": 4.664636439228854e-05, "loss": 2.8624, "step": 50203 }, { "epoch": 2.46, "grad_norm": 0.7427207231521606, "learning_rate": 4.663811950489122e-05, "loss": 2.8796, "step": 50204 }, { "epoch": 2.46, "grad_norm": 0.7459037899971008, "learning_rate": 4.6629875284800385e-05, "loss": 2.821, "step": 50205 }, { "epoch": 2.46, "grad_norm": 0.6995728611946106, "learning_rate": 4.662163173203758e-05, "loss": 2.9301, "step": 50206 }, { "epoch": 2.46, "grad_norm": 0.7462338805198669, "learning_rate": 4.661338884662471e-05, "loss": 2.9296, "step": 50207 }, { "epoch": 2.46, "grad_norm": 0.7074584364891052, "learning_rate": 4.6605146628583365e-05, "loss": 2.956, "step": 50208 }, { "epoch": 2.46, "grad_norm": 0.745572030544281, "learning_rate": 4.6596905077935266e-05, "loss": 2.9507, "step": 50209 }, { "epoch": 2.46, "grad_norm": 0.7482078075408936, "learning_rate": 4.658866419470225e-05, "loss": 2.9588, "step": 50210 }, { "epoch": 2.46, "grad_norm": 0.6879667043685913, "learning_rate": 4.658042397890591e-05, "loss": 2.7677, "step": 50211 }, { "epoch": 2.46, "grad_norm": 0.7026304602622986, "learning_rate": 4.657218443056796e-05, "loss": 3.1308, "step": 50212 }, { "epoch": 2.46, "grad_norm": 0.7168485522270203, "learning_rate": 4.656394554971002e-05, "loss": 2.6956, "step": 50213 }, { "epoch": 2.46, "grad_norm": 0.7557949423789978, "learning_rate": 4.655570733635389e-05, "loss": 2.959, "step": 50214 }, { "epoch": 2.46, "grad_norm": 0.7124577760696411, "learning_rate": 4.654746979052133e-05, "loss": 2.9676, "step": 50215 }, { "epoch": 2.46, "grad_norm": 0.7355839014053345, "learning_rate": 4.653923291223386e-05, "loss": 3.0602, "step": 50216 }, { "epoch": 2.46, "grad_norm": 0.7148059606552124, "learning_rate": 4.653099670151336e-05, "loss": 2.8388, "step": 50217 }, { "epoch": 2.46, "grad_norm": 0.7521902918815613, "learning_rate": 4.6522761158381407e-05, "loss": 2.7998, "step": 50218 }, { "epoch": 2.46, "grad_norm": 0.7478179335594177, "learning_rate": 4.651452628285969e-05, "loss": 3.0467, "step": 50219 }, { "epoch": 2.46, "grad_norm": 0.7289862036705017, "learning_rate": 4.6506292074969955e-05, "loss": 2.8849, "step": 50220 }, { "epoch": 2.46, "grad_norm": 0.7552449703216553, "learning_rate": 4.649805853473382e-05, "loss": 2.9719, "step": 50221 }, { "epoch": 2.46, "grad_norm": 0.7331597805023193, "learning_rate": 4.648982566217307e-05, "loss": 3.1323, "step": 50222 }, { "epoch": 2.46, "grad_norm": 0.7304408550262451, "learning_rate": 4.648159345730927e-05, "loss": 2.7518, "step": 50223 }, { "epoch": 2.46, "grad_norm": 0.7600448727607727, "learning_rate": 4.647336192016413e-05, "loss": 2.8241, "step": 50224 }, { "epoch": 2.46, "grad_norm": 0.7154588103294373, "learning_rate": 4.646513105075949e-05, "loss": 2.7548, "step": 50225 }, { "epoch": 2.46, "grad_norm": 0.729921281337738, "learning_rate": 4.645690084911685e-05, "loss": 2.94, "step": 50226 }, { "epoch": 2.46, "grad_norm": 0.7225162386894226, "learning_rate": 4.644867131525795e-05, "loss": 2.8544, "step": 50227 }, { "epoch": 2.46, "grad_norm": 0.7441264390945435, "learning_rate": 4.6440442449204386e-05, "loss": 3.0495, "step": 50228 }, { "epoch": 2.46, "grad_norm": 0.7257991433143616, "learning_rate": 4.643221425097792e-05, "loss": 2.7045, "step": 50229 }, { "epoch": 2.46, "grad_norm": 0.7100118398666382, "learning_rate": 4.642398672060026e-05, "loss": 2.7428, "step": 50230 }, { "epoch": 2.46, "grad_norm": 0.7242577075958252, "learning_rate": 4.6415759858092926e-05, "loss": 2.9569, "step": 50231 }, { "epoch": 2.46, "grad_norm": 0.7217210531234741, "learning_rate": 4.6407533663477766e-05, "loss": 3.0893, "step": 50232 }, { "epoch": 2.46, "grad_norm": 0.7589887380599976, "learning_rate": 4.6399308136776336e-05, "loss": 2.7432, "step": 50233 }, { "epoch": 2.46, "grad_norm": 0.6991437077522278, "learning_rate": 4.639108327801028e-05, "loss": 3.0003, "step": 50234 }, { "epoch": 2.46, "grad_norm": 0.7275975942611694, "learning_rate": 4.638285908720137e-05, "loss": 2.8634, "step": 50235 }, { "epoch": 2.46, "grad_norm": 0.7588894367218018, "learning_rate": 4.637463556437111e-05, "loss": 3.0073, "step": 50236 }, { "epoch": 2.46, "grad_norm": 0.7242416739463806, "learning_rate": 4.6366412709541336e-05, "loss": 3.0063, "step": 50237 }, { "epoch": 2.46, "grad_norm": 0.7257980704307556, "learning_rate": 4.635819052273353e-05, "loss": 3.0214, "step": 50238 }, { "epoch": 2.46, "grad_norm": 0.7471466064453125, "learning_rate": 4.6349969003969444e-05, "loss": 2.9178, "step": 50239 }, { "epoch": 2.46, "grad_norm": 0.746959924697876, "learning_rate": 4.6341748153270864e-05, "loss": 2.9513, "step": 50240 }, { "epoch": 2.46, "grad_norm": 0.7836167812347412, "learning_rate": 4.633352797065917e-05, "loss": 2.9061, "step": 50241 }, { "epoch": 2.46, "grad_norm": 0.764286994934082, "learning_rate": 4.632530845615621e-05, "loss": 2.8445, "step": 50242 }, { "epoch": 2.46, "grad_norm": 0.7559465765953064, "learning_rate": 4.63170896097835e-05, "loss": 2.8471, "step": 50243 }, { "epoch": 2.46, "grad_norm": 0.7415547966957092, "learning_rate": 4.630887143156276e-05, "loss": 2.8428, "step": 50244 }, { "epoch": 2.46, "grad_norm": 0.7760792970657349, "learning_rate": 4.6300653921515665e-05, "loss": 2.8229, "step": 50245 }, { "epoch": 2.46, "grad_norm": 0.7538564801216125, "learning_rate": 4.6292437079663766e-05, "loss": 2.8987, "step": 50246 }, { "epoch": 2.46, "grad_norm": 0.7254053354263306, "learning_rate": 4.6284220906028854e-05, "loss": 2.9147, "step": 50247 }, { "epoch": 2.46, "grad_norm": 0.808498203754425, "learning_rate": 4.627600540063248e-05, "loss": 2.7537, "step": 50248 }, { "epoch": 2.46, "grad_norm": 0.7465435862541199, "learning_rate": 4.626779056349617e-05, "loss": 2.7917, "step": 50249 }, { "epoch": 2.46, "grad_norm": 0.7259182929992676, "learning_rate": 4.625957639464177e-05, "loss": 2.9146, "step": 50250 }, { "epoch": 2.46, "grad_norm": 0.7807119488716125, "learning_rate": 4.6251362894090715e-05, "loss": 2.967, "step": 50251 }, { "epoch": 2.46, "grad_norm": 0.744609534740448, "learning_rate": 4.624315006186482e-05, "loss": 2.8336, "step": 50252 }, { "epoch": 2.46, "grad_norm": 0.703451931476593, "learning_rate": 4.623493789798557e-05, "loss": 2.8774, "step": 50253 }, { "epoch": 2.46, "grad_norm": 0.8264244198799133, "learning_rate": 4.622672640247471e-05, "loss": 2.7383, "step": 50254 }, { "epoch": 2.46, "grad_norm": 0.736638069152832, "learning_rate": 4.621851557535383e-05, "loss": 2.7002, "step": 50255 }, { "epoch": 2.46, "grad_norm": 0.7538456320762634, "learning_rate": 4.621030541664444e-05, "loss": 2.9702, "step": 50256 }, { "epoch": 2.46, "grad_norm": 0.7488062977790833, "learning_rate": 4.6202095926368366e-05, "loss": 2.812, "step": 50257 }, { "epoch": 2.46, "grad_norm": 0.7549282908439636, "learning_rate": 4.619388710454704e-05, "loss": 2.9381, "step": 50258 }, { "epoch": 2.46, "grad_norm": 0.7125292420387268, "learning_rate": 4.618567895120223e-05, "loss": 3.0659, "step": 50259 }, { "epoch": 2.46, "grad_norm": 0.8093187808990479, "learning_rate": 4.617747146635544e-05, "loss": 2.9393, "step": 50260 }, { "epoch": 2.46, "grad_norm": 0.7467921376228333, "learning_rate": 4.616926465002836e-05, "loss": 2.9444, "step": 50261 }, { "epoch": 2.46, "grad_norm": 0.774636447429657, "learning_rate": 4.616105850224263e-05, "loss": 2.9592, "step": 50262 }, { "epoch": 2.46, "grad_norm": 0.7182961702346802, "learning_rate": 4.615285302301981e-05, "loss": 2.8498, "step": 50263 }, { "epoch": 2.46, "grad_norm": 0.7876618504524231, "learning_rate": 4.614464821238154e-05, "loss": 2.8896, "step": 50264 }, { "epoch": 2.46, "grad_norm": 0.7740433812141418, "learning_rate": 4.613644407034934e-05, "loss": 3.0115, "step": 50265 }, { "epoch": 2.46, "grad_norm": 0.7827932834625244, "learning_rate": 4.612824059694486e-05, "loss": 3.0513, "step": 50266 }, { "epoch": 2.46, "grad_norm": 0.7410049438476562, "learning_rate": 4.6120037792189854e-05, "loss": 2.7407, "step": 50267 }, { "epoch": 2.46, "grad_norm": 0.7162790298461914, "learning_rate": 4.61118356561057e-05, "loss": 2.7981, "step": 50268 }, { "epoch": 2.46, "grad_norm": 0.7440741658210754, "learning_rate": 4.6103634188714214e-05, "loss": 2.9791, "step": 50269 }, { "epoch": 2.46, "grad_norm": 0.715707540512085, "learning_rate": 4.6095433390036876e-05, "loss": 2.86, "step": 50270 }, { "epoch": 2.46, "grad_norm": 0.7865996956825256, "learning_rate": 4.60872332600952e-05, "loss": 2.8264, "step": 50271 }, { "epoch": 2.46, "grad_norm": 0.7152213454246521, "learning_rate": 4.607903379891098e-05, "loss": 2.9668, "step": 50272 }, { "epoch": 2.46, "grad_norm": 0.7056474089622498, "learning_rate": 4.6070835006505655e-05, "loss": 2.9386, "step": 50273 }, { "epoch": 2.46, "grad_norm": 0.7383698225021362, "learning_rate": 4.606263688290095e-05, "loss": 3.0749, "step": 50274 }, { "epoch": 2.46, "grad_norm": 0.7201040387153625, "learning_rate": 4.6054439428118306e-05, "loss": 2.8478, "step": 50275 }, { "epoch": 2.46, "grad_norm": 0.7062273025512695, "learning_rate": 4.6046242642179474e-05, "loss": 2.9832, "step": 50276 }, { "epoch": 2.46, "grad_norm": 0.687692403793335, "learning_rate": 4.6038046525105974e-05, "loss": 2.6567, "step": 50277 }, { "epoch": 2.46, "grad_norm": 0.7824928760528564, "learning_rate": 4.602985107691929e-05, "loss": 2.8693, "step": 50278 }, { "epoch": 2.46, "grad_norm": 0.7675195932388306, "learning_rate": 4.60216562976412e-05, "loss": 2.7285, "step": 50279 }, { "epoch": 2.46, "grad_norm": 0.7547996640205383, "learning_rate": 4.6013462187293085e-05, "loss": 2.709, "step": 50280 }, { "epoch": 2.46, "grad_norm": 0.7081446647644043, "learning_rate": 4.600526874589666e-05, "loss": 2.9257, "step": 50281 }, { "epoch": 2.46, "grad_norm": 0.7581326961517334, "learning_rate": 4.599707597347352e-05, "loss": 2.8561, "step": 50282 }, { "epoch": 2.46, "grad_norm": 0.6948836445808411, "learning_rate": 4.5988883870045135e-05, "loss": 2.9192, "step": 50283 }, { "epoch": 2.46, "grad_norm": 0.7415758371353149, "learning_rate": 4.598069243563323e-05, "loss": 2.7314, "step": 50284 }, { "epoch": 2.46, "grad_norm": 0.7500677704811096, "learning_rate": 4.5972501670259275e-05, "loss": 2.8871, "step": 50285 }, { "epoch": 2.46, "grad_norm": 0.739080548286438, "learning_rate": 4.59643115739448e-05, "loss": 3.08, "step": 50286 }, { "epoch": 2.46, "grad_norm": 0.7174320816993713, "learning_rate": 4.595612214671151e-05, "loss": 3.0523, "step": 50287 }, { "epoch": 2.46, "grad_norm": 0.7595369219779968, "learning_rate": 4.5947933388580835e-05, "loss": 3.0033, "step": 50288 }, { "epoch": 2.46, "grad_norm": 0.741030216217041, "learning_rate": 4.593974529957448e-05, "loss": 2.7456, "step": 50289 }, { "epoch": 2.46, "grad_norm": 0.7448769807815552, "learning_rate": 4.5931557879713865e-05, "loss": 2.9838, "step": 50290 }, { "epoch": 2.46, "grad_norm": 0.744935154914856, "learning_rate": 4.5923371129020704e-05, "loss": 2.8322, "step": 50291 }, { "epoch": 2.46, "grad_norm": 0.7401701211929321, "learning_rate": 4.591518504751648e-05, "loss": 3.0576, "step": 50292 }, { "epoch": 2.46, "grad_norm": 0.748890221118927, "learning_rate": 4.5906999635222706e-05, "loss": 2.9098, "step": 50293 }, { "epoch": 2.46, "grad_norm": 0.7550206780433655, "learning_rate": 4.589881489216104e-05, "loss": 2.846, "step": 50294 }, { "epoch": 2.46, "grad_norm": 0.7691561579704285, "learning_rate": 4.5890630818352926e-05, "loss": 2.8187, "step": 50295 }, { "epoch": 2.46, "grad_norm": 0.7871925234794617, "learning_rate": 4.5882447413820015e-05, "loss": 2.7289, "step": 50296 }, { "epoch": 2.46, "grad_norm": 0.7003697752952576, "learning_rate": 4.587426467858388e-05, "loss": 2.8819, "step": 50297 }, { "epoch": 2.46, "grad_norm": 0.7725687026977539, "learning_rate": 4.586608261266602e-05, "loss": 2.8305, "step": 50298 }, { "epoch": 2.47, "grad_norm": 0.7272656559944153, "learning_rate": 4.585790121608798e-05, "loss": 3.1106, "step": 50299 }, { "epoch": 2.47, "grad_norm": 0.7602006196975708, "learning_rate": 4.584972048887127e-05, "loss": 2.934, "step": 50300 }, { "epoch": 2.47, "grad_norm": 0.7348072528839111, "learning_rate": 4.584154043103747e-05, "loss": 3.0664, "step": 50301 }, { "epoch": 2.47, "grad_norm": 0.752991795539856, "learning_rate": 4.583336104260821e-05, "loss": 2.8116, "step": 50302 }, { "epoch": 2.47, "grad_norm": 0.7089395523071289, "learning_rate": 4.582518232360489e-05, "loss": 2.9561, "step": 50303 }, { "epoch": 2.47, "grad_norm": 0.7611316442489624, "learning_rate": 4.5817004274049205e-05, "loss": 2.9413, "step": 50304 }, { "epoch": 2.47, "grad_norm": 0.7629949450492859, "learning_rate": 4.5808826893962535e-05, "loss": 2.7534, "step": 50305 }, { "epoch": 2.47, "grad_norm": 0.7246274948120117, "learning_rate": 4.580065018336656e-05, "loss": 2.9361, "step": 50306 }, { "epoch": 2.47, "grad_norm": 0.7618169784545898, "learning_rate": 4.5792474142282766e-05, "loss": 2.8229, "step": 50307 }, { "epoch": 2.47, "grad_norm": 0.7503147721290588, "learning_rate": 4.5784298770732595e-05, "loss": 3.0257, "step": 50308 }, { "epoch": 2.47, "grad_norm": 0.721960723400116, "learning_rate": 4.577612406873774e-05, "loss": 2.9242, "step": 50309 }, { "epoch": 2.47, "grad_norm": 0.7781165242195129, "learning_rate": 4.576795003631954e-05, "loss": 2.9042, "step": 50310 }, { "epoch": 2.47, "grad_norm": 0.7347601056098938, "learning_rate": 4.575977667349966e-05, "loss": 2.8163, "step": 50311 }, { "epoch": 2.47, "grad_norm": 0.7000023722648621, "learning_rate": 4.575160398029967e-05, "loss": 2.8964, "step": 50312 }, { "epoch": 2.47, "grad_norm": 0.7539365887641907, "learning_rate": 4.574343195674102e-05, "loss": 3.0921, "step": 50313 }, { "epoch": 2.47, "grad_norm": 0.6888923645019531, "learning_rate": 4.573526060284523e-05, "loss": 2.8024, "step": 50314 }, { "epoch": 2.47, "grad_norm": 0.7311072945594788, "learning_rate": 4.572708991863374e-05, "loss": 2.6267, "step": 50315 }, { "epoch": 2.47, "grad_norm": 0.7336209416389465, "learning_rate": 4.571891990412822e-05, "loss": 2.8586, "step": 50316 }, { "epoch": 2.47, "grad_norm": 0.7051305174827576, "learning_rate": 4.5710750559350126e-05, "loss": 2.9322, "step": 50317 }, { "epoch": 2.47, "grad_norm": 0.7536783814430237, "learning_rate": 4.5702581884320964e-05, "loss": 2.7619, "step": 50318 }, { "epoch": 2.47, "grad_norm": 0.7377327680587769, "learning_rate": 4.569441387906231e-05, "loss": 2.7176, "step": 50319 }, { "epoch": 2.47, "grad_norm": 0.7616892457008362, "learning_rate": 4.568624654359561e-05, "loss": 2.8448, "step": 50320 }, { "epoch": 2.47, "grad_norm": 0.7389867305755615, "learning_rate": 4.567807987794234e-05, "loss": 2.8724, "step": 50321 }, { "epoch": 2.47, "grad_norm": 0.7348322868347168, "learning_rate": 4.566991388212413e-05, "loss": 3.0046, "step": 50322 }, { "epoch": 2.47, "grad_norm": 0.7657172679901123, "learning_rate": 4.5661748556162357e-05, "loss": 2.9771, "step": 50323 }, { "epoch": 2.47, "grad_norm": 0.6982909440994263, "learning_rate": 4.5653583900078636e-05, "loss": 2.8583, "step": 50324 }, { "epoch": 2.47, "grad_norm": 0.8317862153053284, "learning_rate": 4.564541991389439e-05, "loss": 2.9129, "step": 50325 }, { "epoch": 2.47, "grad_norm": 0.6974571943283081, "learning_rate": 4.563725659763112e-05, "loss": 2.9483, "step": 50326 }, { "epoch": 2.47, "grad_norm": 0.813891589641571, "learning_rate": 4.5629093951310456e-05, "loss": 2.9055, "step": 50327 }, { "epoch": 2.47, "grad_norm": 0.7852758765220642, "learning_rate": 4.5620931974953815e-05, "loss": 2.8643, "step": 50328 }, { "epoch": 2.47, "grad_norm": 0.7360036373138428, "learning_rate": 4.5612770668582664e-05, "loss": 2.8992, "step": 50329 }, { "epoch": 2.47, "grad_norm": 0.7240647077560425, "learning_rate": 4.560461003221844e-05, "loss": 2.815, "step": 50330 }, { "epoch": 2.47, "grad_norm": 0.7547604441642761, "learning_rate": 4.559645006588274e-05, "loss": 3.0039, "step": 50331 }, { "epoch": 2.47, "grad_norm": 0.7343550324440002, "learning_rate": 4.558829076959706e-05, "loss": 2.8534, "step": 50332 }, { "epoch": 2.47, "grad_norm": 0.7282590866088867, "learning_rate": 4.5580132143382805e-05, "loss": 3.1069, "step": 50333 }, { "epoch": 2.47, "grad_norm": 0.7644495964050293, "learning_rate": 4.557197418726161e-05, "loss": 3.1148, "step": 50334 }, { "epoch": 2.47, "grad_norm": 0.7662924528121948, "learning_rate": 4.556381690125487e-05, "loss": 3.0473, "step": 50335 }, { "epoch": 2.47, "grad_norm": 0.7156445980072021, "learning_rate": 4.555566028538398e-05, "loss": 2.9062, "step": 50336 }, { "epoch": 2.47, "grad_norm": 0.7530838251113892, "learning_rate": 4.554750433967058e-05, "loss": 2.9819, "step": 50337 }, { "epoch": 2.47, "grad_norm": 0.7620644569396973, "learning_rate": 4.5539349064136034e-05, "loss": 2.9154, "step": 50338 }, { "epoch": 2.47, "grad_norm": 0.7103702425956726, "learning_rate": 4.5531194458801914e-05, "loss": 3.012, "step": 50339 }, { "epoch": 2.47, "grad_norm": 0.6881940960884094, "learning_rate": 4.55230405236896e-05, "loss": 3.1089, "step": 50340 }, { "epoch": 2.47, "grad_norm": 0.7752044796943665, "learning_rate": 4.5514887258820686e-05, "loss": 2.8773, "step": 50341 }, { "epoch": 2.47, "grad_norm": 0.7745306491851807, "learning_rate": 4.550673466421658e-05, "loss": 2.8716, "step": 50342 }, { "epoch": 2.47, "grad_norm": 0.7594026923179626, "learning_rate": 4.5498582739898705e-05, "loss": 2.9974, "step": 50343 }, { "epoch": 2.47, "grad_norm": 0.7362309694290161, "learning_rate": 4.549043148588863e-05, "loss": 2.9818, "step": 50344 }, { "epoch": 2.47, "grad_norm": 0.7302587032318115, "learning_rate": 4.548228090220773e-05, "loss": 2.8843, "step": 50345 }, { "epoch": 2.47, "grad_norm": 0.7178745865821838, "learning_rate": 4.547413098887755e-05, "loss": 2.8736, "step": 50346 }, { "epoch": 2.47, "grad_norm": 0.753461480140686, "learning_rate": 4.546598174591951e-05, "loss": 3.1034, "step": 50347 }, { "epoch": 2.47, "grad_norm": 0.7480427026748657, "learning_rate": 4.545783317335504e-05, "loss": 3.1151, "step": 50348 }, { "epoch": 2.47, "grad_norm": 0.7292770147323608, "learning_rate": 4.544968527120576e-05, "loss": 2.8363, "step": 50349 }, { "epoch": 2.47, "grad_norm": 0.7527563571929932, "learning_rate": 4.5441538039492985e-05, "loss": 2.9367, "step": 50350 }, { "epoch": 2.47, "grad_norm": 0.7489085793495178, "learning_rate": 4.543339147823824e-05, "loss": 2.9372, "step": 50351 }, { "epoch": 2.47, "grad_norm": 0.7443061470985413, "learning_rate": 4.5425245587462855e-05, "loss": 2.9903, "step": 50352 }, { "epoch": 2.47, "grad_norm": 0.73963463306427, "learning_rate": 4.5417100367188365e-05, "loss": 2.977, "step": 50353 }, { "epoch": 2.47, "grad_norm": 0.7527986168861389, "learning_rate": 4.5408955817436344e-05, "loss": 2.7493, "step": 50354 }, { "epoch": 2.47, "grad_norm": 0.7716648578643799, "learning_rate": 4.540081193822804e-05, "loss": 2.9532, "step": 50355 }, { "epoch": 2.47, "grad_norm": 0.6952173113822937, "learning_rate": 4.539266872958508e-05, "loss": 3.0827, "step": 50356 }, { "epoch": 2.47, "grad_norm": 0.7592951059341431, "learning_rate": 4.538452619152884e-05, "loss": 2.9578, "step": 50357 }, { "epoch": 2.47, "grad_norm": 0.7521546483039856, "learning_rate": 4.537638432408066e-05, "loss": 2.7848, "step": 50358 }, { "epoch": 2.47, "grad_norm": 0.7431459426879883, "learning_rate": 4.536824312726217e-05, "loss": 2.8052, "step": 50359 }, { "epoch": 2.47, "grad_norm": 0.7520620822906494, "learning_rate": 4.536010260109465e-05, "loss": 2.8657, "step": 50360 }, { "epoch": 2.47, "grad_norm": 0.7569798827171326, "learning_rate": 4.535196274559968e-05, "loss": 2.9279, "step": 50361 }, { "epoch": 2.47, "grad_norm": 0.771931529045105, "learning_rate": 4.5343823560798574e-05, "loss": 2.8402, "step": 50362 }, { "epoch": 2.47, "grad_norm": 0.7233810424804688, "learning_rate": 4.533568504671282e-05, "loss": 2.8706, "step": 50363 }, { "epoch": 2.47, "grad_norm": 0.6947945952415466, "learning_rate": 4.53275472033639e-05, "loss": 2.771, "step": 50364 }, { "epoch": 2.47, "grad_norm": 0.7521064281463623, "learning_rate": 4.531941003077326e-05, "loss": 2.7515, "step": 50365 }, { "epoch": 2.47, "grad_norm": 0.7488537430763245, "learning_rate": 4.531127352896226e-05, "loss": 2.7538, "step": 50366 }, { "epoch": 2.47, "grad_norm": 0.78842693567276, "learning_rate": 4.5303137697952266e-05, "loss": 3.043, "step": 50367 }, { "epoch": 2.47, "grad_norm": 0.7508882284164429, "learning_rate": 4.5295002537764804e-05, "loss": 2.9645, "step": 50368 }, { "epoch": 2.47, "grad_norm": 0.8009318113327026, "learning_rate": 4.528686804842136e-05, "loss": 2.9572, "step": 50369 }, { "epoch": 2.47, "grad_norm": 0.7622132897377014, "learning_rate": 4.5278734229943204e-05, "loss": 2.6743, "step": 50370 }, { "epoch": 2.47, "grad_norm": 0.7067914605140686, "learning_rate": 4.52706010823519e-05, "loss": 2.8212, "step": 50371 }, { "epoch": 2.47, "grad_norm": 0.7103236317634583, "learning_rate": 4.5262468605668823e-05, "loss": 2.9323, "step": 50372 }, { "epoch": 2.47, "grad_norm": 0.7678020596504211, "learning_rate": 4.525433679991529e-05, "loss": 3.0528, "step": 50373 }, { "epoch": 2.47, "grad_norm": 0.7323145866394043, "learning_rate": 4.5246205665112924e-05, "loss": 2.7942, "step": 50374 }, { "epoch": 2.47, "grad_norm": 0.713549792766571, "learning_rate": 4.52380752012829e-05, "loss": 2.9964, "step": 50375 }, { "epoch": 2.47, "grad_norm": 0.7168814539909363, "learning_rate": 4.5229945408446865e-05, "loss": 2.8764, "step": 50376 }, { "epoch": 2.47, "grad_norm": 0.7501139044761658, "learning_rate": 4.522181628662605e-05, "loss": 2.8077, "step": 50377 }, { "epoch": 2.47, "grad_norm": 0.7667758464813232, "learning_rate": 4.5213687835841983e-05, "loss": 2.8093, "step": 50378 }, { "epoch": 2.47, "grad_norm": 0.7523639798164368, "learning_rate": 4.520556005611603e-05, "loss": 2.8804, "step": 50379 }, { "epoch": 2.47, "grad_norm": 0.7287237048149109, "learning_rate": 4.5197432947469536e-05, "loss": 3.0469, "step": 50380 }, { "epoch": 2.47, "grad_norm": 0.7388063073158264, "learning_rate": 4.518930650992405e-05, "loss": 2.8228, "step": 50381 }, { "epoch": 2.47, "grad_norm": 0.751380980014801, "learning_rate": 4.518118074350079e-05, "loss": 2.8182, "step": 50382 }, { "epoch": 2.47, "grad_norm": 0.7500567436218262, "learning_rate": 4.517305564822127e-05, "loss": 3.1013, "step": 50383 }, { "epoch": 2.47, "grad_norm": 0.7376142740249634, "learning_rate": 4.516493122410697e-05, "loss": 2.8591, "step": 50384 }, { "epoch": 2.47, "grad_norm": 0.6814765334129333, "learning_rate": 4.515680747117911e-05, "loss": 2.7155, "step": 50385 }, { "epoch": 2.47, "grad_norm": 0.7675392031669617, "learning_rate": 4.5148684389459244e-05, "loss": 2.9702, "step": 50386 }, { "epoch": 2.47, "grad_norm": 0.7462222576141357, "learning_rate": 4.514056197896868e-05, "loss": 2.7139, "step": 50387 }, { "epoch": 2.47, "grad_norm": 0.7849350571632385, "learning_rate": 4.5132440239728774e-05, "loss": 3.1364, "step": 50388 }, { "epoch": 2.47, "grad_norm": 0.7008315324783325, "learning_rate": 4.512431917176107e-05, "loss": 3.0296, "step": 50389 }, { "epoch": 2.47, "grad_norm": 0.7495970726013184, "learning_rate": 4.5116198775086756e-05, "loss": 2.5677, "step": 50390 }, { "epoch": 2.47, "grad_norm": 0.732743501663208, "learning_rate": 4.510807904972737e-05, "loss": 2.8116, "step": 50391 }, { "epoch": 2.47, "grad_norm": 0.7251850962638855, "learning_rate": 4.509995999570422e-05, "loss": 2.9006, "step": 50392 }, { "epoch": 2.47, "grad_norm": 0.7376589775085449, "learning_rate": 4.509184161303876e-05, "loss": 3.0105, "step": 50393 }, { "epoch": 2.47, "grad_norm": 0.7272580862045288, "learning_rate": 4.508372390175235e-05, "loss": 2.7939, "step": 50394 }, { "epoch": 2.47, "grad_norm": 0.7777386903762817, "learning_rate": 4.5075606861866285e-05, "loss": 2.7377, "step": 50395 }, { "epoch": 2.47, "grad_norm": 0.7213007211685181, "learning_rate": 4.5067490493402066e-05, "loss": 2.7687, "step": 50396 }, { "epoch": 2.47, "grad_norm": 0.707280158996582, "learning_rate": 4.505937479638092e-05, "loss": 2.8246, "step": 50397 }, { "epoch": 2.47, "grad_norm": 0.74385666847229, "learning_rate": 4.505125977082436e-05, "loss": 2.7228, "step": 50398 }, { "epoch": 2.47, "grad_norm": 0.738721489906311, "learning_rate": 4.5043145416753755e-05, "loss": 2.8539, "step": 50399 }, { "epoch": 2.47, "grad_norm": 0.7245712876319885, "learning_rate": 4.5035031734190476e-05, "loss": 3.0549, "step": 50400 }, { "epoch": 2.47, "grad_norm": 0.7531771063804626, "learning_rate": 4.5026918723155826e-05, "loss": 2.9992, "step": 50401 }, { "epoch": 2.47, "grad_norm": 0.7361901998519897, "learning_rate": 4.501880638367112e-05, "loss": 2.7009, "step": 50402 }, { "epoch": 2.47, "grad_norm": 0.7059527039527893, "learning_rate": 4.5010694715757826e-05, "loss": 2.8706, "step": 50403 }, { "epoch": 2.47, "grad_norm": 0.7453323602676392, "learning_rate": 4.500258371943737e-05, "loss": 2.6934, "step": 50404 }, { "epoch": 2.47, "grad_norm": 0.7383242845535278, "learning_rate": 4.4994473394730944e-05, "loss": 2.8522, "step": 50405 }, { "epoch": 2.47, "grad_norm": 0.723354697227478, "learning_rate": 4.498636374166007e-05, "loss": 2.944, "step": 50406 }, { "epoch": 2.47, "grad_norm": 0.7948548793792725, "learning_rate": 4.4978254760245955e-05, "loss": 2.9672, "step": 50407 }, { "epoch": 2.47, "grad_norm": 0.7579402923583984, "learning_rate": 4.4970146450510124e-05, "loss": 2.704, "step": 50408 }, { "epoch": 2.47, "grad_norm": 0.7572686672210693, "learning_rate": 4.496203881247385e-05, "loss": 2.9605, "step": 50409 }, { "epoch": 2.47, "grad_norm": 0.7329517006874084, "learning_rate": 4.49539318461584e-05, "loss": 2.8146, "step": 50410 }, { "epoch": 2.47, "grad_norm": 0.744154691696167, "learning_rate": 4.49458255515853e-05, "loss": 2.7126, "step": 50411 }, { "epoch": 2.47, "grad_norm": 0.7456027269363403, "learning_rate": 4.493771992877572e-05, "loss": 2.9384, "step": 50412 }, { "epoch": 2.47, "grad_norm": 0.7537271976470947, "learning_rate": 4.492961497775109e-05, "loss": 2.9007, "step": 50413 }, { "epoch": 2.47, "grad_norm": 0.7661435008049011, "learning_rate": 4.4921510698532846e-05, "loss": 2.7748, "step": 50414 }, { "epoch": 2.47, "grad_norm": 0.7212929129600525, "learning_rate": 4.4913407091142265e-05, "loss": 3.0509, "step": 50415 }, { "epoch": 2.47, "grad_norm": 0.7772074341773987, "learning_rate": 4.490530415560065e-05, "loss": 2.6768, "step": 50416 }, { "epoch": 2.47, "grad_norm": 0.7107312083244324, "learning_rate": 4.48972018919293e-05, "loss": 3.0139, "step": 50417 }, { "epoch": 2.47, "grad_norm": 0.7618693709373474, "learning_rate": 4.488910030014973e-05, "loss": 2.992, "step": 50418 }, { "epoch": 2.47, "grad_norm": 0.7590107321739197, "learning_rate": 4.4880999380283075e-05, "loss": 2.7462, "step": 50419 }, { "epoch": 2.47, "grad_norm": 0.7364644408226013, "learning_rate": 4.487289913235077e-05, "loss": 2.9629, "step": 50420 }, { "epoch": 2.47, "grad_norm": 0.726629376411438, "learning_rate": 4.486479955637422e-05, "loss": 3.1113, "step": 50421 }, { "epoch": 2.47, "grad_norm": 0.7420285940170288, "learning_rate": 4.4856700652374714e-05, "loss": 2.9697, "step": 50422 }, { "epoch": 2.47, "grad_norm": 0.7878907322883606, "learning_rate": 4.484860242037352e-05, "loss": 2.7718, "step": 50423 }, { "epoch": 2.47, "grad_norm": 0.7129849791526794, "learning_rate": 4.484050486039195e-05, "loss": 3.0591, "step": 50424 }, { "epoch": 2.47, "grad_norm": 0.7504441142082214, "learning_rate": 4.4832407972451354e-05, "loss": 2.9215, "step": 50425 }, { "epoch": 2.47, "grad_norm": 0.7380017042160034, "learning_rate": 4.4824311756573204e-05, "loss": 2.9334, "step": 50426 }, { "epoch": 2.47, "grad_norm": 0.7509652376174927, "learning_rate": 4.481621621277861e-05, "loss": 3.0772, "step": 50427 }, { "epoch": 2.47, "grad_norm": 0.7229263186454773, "learning_rate": 4.4808121341089054e-05, "loss": 2.8744, "step": 50428 }, { "epoch": 2.47, "grad_norm": 0.7127883434295654, "learning_rate": 4.480002714152575e-05, "loss": 2.7404, "step": 50429 }, { "epoch": 2.47, "grad_norm": 0.77323979139328, "learning_rate": 4.479193361411012e-05, "loss": 3.0917, "step": 50430 }, { "epoch": 2.47, "grad_norm": 0.7353416085243225, "learning_rate": 4.478384075886341e-05, "loss": 2.8433, "step": 50431 }, { "epoch": 2.47, "grad_norm": 0.7446988821029663, "learning_rate": 4.477574857580687e-05, "loss": 3.1125, "step": 50432 }, { "epoch": 2.47, "grad_norm": 0.7732658386230469, "learning_rate": 4.476765706496198e-05, "loss": 2.7603, "step": 50433 }, { "epoch": 2.47, "grad_norm": 0.7612114548683167, "learning_rate": 4.475956622634986e-05, "loss": 3.1181, "step": 50434 }, { "epoch": 2.47, "grad_norm": 0.9173521399497986, "learning_rate": 4.475147605999196e-05, "loss": 2.8608, "step": 50435 }, { "epoch": 2.47, "grad_norm": 0.7881463766098022, "learning_rate": 4.47433865659096e-05, "loss": 2.8996, "step": 50436 }, { "epoch": 2.47, "grad_norm": 0.7628815174102783, "learning_rate": 4.473529774412405e-05, "loss": 2.9975, "step": 50437 }, { "epoch": 2.47, "grad_norm": 0.710648238658905, "learning_rate": 4.472720959465658e-05, "loss": 3.011, "step": 50438 }, { "epoch": 2.47, "grad_norm": 0.705659806728363, "learning_rate": 4.471912211752845e-05, "loss": 3.1302, "step": 50439 }, { "epoch": 2.47, "grad_norm": 0.7531771659851074, "learning_rate": 4.471103531276102e-05, "loss": 2.7541, "step": 50440 }, { "epoch": 2.47, "grad_norm": 0.7266751527786255, "learning_rate": 4.470294918037565e-05, "loss": 2.8924, "step": 50441 }, { "epoch": 2.47, "grad_norm": 0.7597355246543884, "learning_rate": 4.4694863720393536e-05, "loss": 2.7383, "step": 50442 }, { "epoch": 2.47, "grad_norm": 0.7361400723457336, "learning_rate": 4.4686778932836043e-05, "loss": 2.8306, "step": 50443 }, { "epoch": 2.47, "grad_norm": 0.7326222062110901, "learning_rate": 4.467869481772448e-05, "loss": 2.7957, "step": 50444 }, { "epoch": 2.47, "grad_norm": 0.7179027199745178, "learning_rate": 4.467061137508e-05, "loss": 2.7548, "step": 50445 }, { "epoch": 2.47, "grad_norm": 0.7314981818199158, "learning_rate": 4.466252860492409e-05, "loss": 2.7318, "step": 50446 }, { "epoch": 2.47, "grad_norm": 0.8546863794326782, "learning_rate": 4.465444650727785e-05, "loss": 2.7946, "step": 50447 }, { "epoch": 2.47, "grad_norm": 0.7847939729690552, "learning_rate": 4.4646365082162715e-05, "loss": 2.8562, "step": 50448 }, { "epoch": 2.47, "grad_norm": 0.7865023016929626, "learning_rate": 4.463828432959986e-05, "loss": 2.7398, "step": 50449 }, { "epoch": 2.47, "grad_norm": 0.7689087390899658, "learning_rate": 4.463020424961064e-05, "loss": 2.9209, "step": 50450 }, { "epoch": 2.47, "grad_norm": 0.7183415293693542, "learning_rate": 4.462212484221636e-05, "loss": 2.8917, "step": 50451 }, { "epoch": 2.47, "grad_norm": 0.7046708464622498, "learning_rate": 4.461404610743824e-05, "loss": 3.1845, "step": 50452 }, { "epoch": 2.47, "grad_norm": 0.7757652401924133, "learning_rate": 4.4605968045297566e-05, "loss": 2.8422, "step": 50453 }, { "epoch": 2.47, "grad_norm": 0.702682614326477, "learning_rate": 4.459789065581559e-05, "loss": 2.8507, "step": 50454 }, { "epoch": 2.47, "grad_norm": 0.7367978692054749, "learning_rate": 4.45898139390136e-05, "loss": 2.7709, "step": 50455 }, { "epoch": 2.47, "grad_norm": 0.7395363450050354, "learning_rate": 4.458173789491293e-05, "loss": 2.6215, "step": 50456 }, { "epoch": 2.47, "grad_norm": 0.8202866315841675, "learning_rate": 4.457366252353477e-05, "loss": 2.9662, "step": 50457 }, { "epoch": 2.47, "grad_norm": 0.738206148147583, "learning_rate": 4.4565587824900475e-05, "loss": 2.843, "step": 50458 }, { "epoch": 2.47, "grad_norm": 0.6997504830360413, "learning_rate": 4.455751379903129e-05, "loss": 2.8662, "step": 50459 }, { "epoch": 2.47, "grad_norm": 0.7250627875328064, "learning_rate": 4.454944044594836e-05, "loss": 2.9364, "step": 50460 }, { "epoch": 2.47, "grad_norm": 0.7618042230606079, "learning_rate": 4.4541367765673105e-05, "loss": 2.7884, "step": 50461 }, { "epoch": 2.47, "grad_norm": 0.7377805113792419, "learning_rate": 4.453329575822665e-05, "loss": 2.7566, "step": 50462 }, { "epoch": 2.47, "grad_norm": 0.7210468649864197, "learning_rate": 4.4525224423630434e-05, "loss": 2.8037, "step": 50463 }, { "epoch": 2.47, "grad_norm": 0.7345592975616455, "learning_rate": 4.451715376190551e-05, "loss": 2.8834, "step": 50464 }, { "epoch": 2.47, "grad_norm": 0.7215968370437622, "learning_rate": 4.4509083773073226e-05, "loss": 2.9943, "step": 50465 }, { "epoch": 2.47, "grad_norm": 0.7119701504707336, "learning_rate": 4.4501014457154935e-05, "loss": 2.8041, "step": 50466 }, { "epoch": 2.47, "grad_norm": 0.7098177075386047, "learning_rate": 4.449294581417182e-05, "loss": 2.8293, "step": 50467 }, { "epoch": 2.47, "grad_norm": 0.7481569647789001, "learning_rate": 4.448487784414506e-05, "loss": 2.802, "step": 50468 }, { "epoch": 2.47, "grad_norm": 0.7269555330276489, "learning_rate": 4.447681054709594e-05, "loss": 2.7692, "step": 50469 }, { "epoch": 2.47, "grad_norm": 0.6905206441879272, "learning_rate": 4.446874392304568e-05, "loss": 2.6578, "step": 50470 }, { "epoch": 2.47, "grad_norm": 0.7605257630348206, "learning_rate": 4.446067797201569e-05, "loss": 2.8615, "step": 50471 }, { "epoch": 2.47, "grad_norm": 0.7663816213607788, "learning_rate": 4.445261269402699e-05, "loss": 2.7005, "step": 50472 }, { "epoch": 2.47, "grad_norm": 0.7191436290740967, "learning_rate": 4.4444548089101e-05, "loss": 3.0061, "step": 50473 }, { "epoch": 2.47, "grad_norm": 0.7263677716255188, "learning_rate": 4.443648415725889e-05, "loss": 2.8994, "step": 50474 }, { "epoch": 2.47, "grad_norm": 0.7229388952255249, "learning_rate": 4.442842089852182e-05, "loss": 2.9245, "step": 50475 }, { "epoch": 2.47, "grad_norm": 0.8069564700126648, "learning_rate": 4.442035831291117e-05, "loss": 2.7624, "step": 50476 }, { "epoch": 2.47, "grad_norm": 0.7363615036010742, "learning_rate": 4.4412296400448064e-05, "loss": 2.8038, "step": 50477 }, { "epoch": 2.47, "grad_norm": 0.7362646460533142, "learning_rate": 4.4404235161153846e-05, "loss": 2.9683, "step": 50478 }, { "epoch": 2.47, "grad_norm": 0.7199697494506836, "learning_rate": 4.439617459504959e-05, "loss": 2.852, "step": 50479 }, { "epoch": 2.47, "grad_norm": 0.7283482551574707, "learning_rate": 4.43881147021567e-05, "loss": 2.7507, "step": 50480 }, { "epoch": 2.47, "grad_norm": 0.7187907695770264, "learning_rate": 4.43800554824963e-05, "loss": 3.07, "step": 50481 }, { "epoch": 2.47, "grad_norm": 0.711829423904419, "learning_rate": 4.4371996936089595e-05, "loss": 2.7205, "step": 50482 }, { "epoch": 2.47, "grad_norm": 0.7250786423683167, "learning_rate": 4.436393906295792e-05, "loss": 2.7172, "step": 50483 }, { "epoch": 2.47, "grad_norm": 0.8321327567100525, "learning_rate": 4.4355881863122367e-05, "loss": 2.9453, "step": 50484 }, { "epoch": 2.47, "grad_norm": 0.7267419099807739, "learning_rate": 4.4347825336604216e-05, "loss": 2.6185, "step": 50485 }, { "epoch": 2.47, "grad_norm": 0.7733017802238464, "learning_rate": 4.4339769483424783e-05, "loss": 3.1191, "step": 50486 }, { "epoch": 2.47, "grad_norm": 0.75758957862854, "learning_rate": 4.433171430360508e-05, "loss": 2.7548, "step": 50487 }, { "epoch": 2.47, "grad_norm": 0.7199147939682007, "learning_rate": 4.432365979716653e-05, "loss": 2.8798, "step": 50488 }, { "epoch": 2.47, "grad_norm": 0.723355233669281, "learning_rate": 4.431560596413025e-05, "loss": 2.8854, "step": 50489 }, { "epoch": 2.47, "grad_norm": 0.7314903140068054, "learning_rate": 4.430755280451739e-05, "loss": 2.8459, "step": 50490 }, { "epoch": 2.47, "grad_norm": 0.8310138583183289, "learning_rate": 4.429950031834932e-05, "loss": 3.1573, "step": 50491 }, { "epoch": 2.47, "grad_norm": 0.728308916091919, "learning_rate": 4.4291448505647043e-05, "loss": 3.0318, "step": 50492 }, { "epoch": 2.47, "grad_norm": 0.776800274848938, "learning_rate": 4.4283397366431964e-05, "loss": 2.8144, "step": 50493 }, { "epoch": 2.47, "grad_norm": 0.7904463410377502, "learning_rate": 4.427534690072516e-05, "loss": 2.8312, "step": 50494 }, { "epoch": 2.47, "grad_norm": 0.734760046005249, "learning_rate": 4.426729710854793e-05, "loss": 3.018, "step": 50495 }, { "epoch": 2.47, "grad_norm": 0.7029054760932922, "learning_rate": 4.425924798992141e-05, "loss": 2.8469, "step": 50496 }, { "epoch": 2.47, "grad_norm": 0.7326534390449524, "learning_rate": 4.4251199544866755e-05, "loss": 2.8889, "step": 50497 }, { "epoch": 2.47, "grad_norm": 0.7710874676704407, "learning_rate": 4.424315177340528e-05, "loss": 2.9888, "step": 50498 }, { "epoch": 2.47, "grad_norm": 0.7983696460723877, "learning_rate": 4.423510467555804e-05, "loss": 3.0389, "step": 50499 }, { "epoch": 2.47, "grad_norm": 0.7880064249038696, "learning_rate": 4.4227058251346416e-05, "loss": 2.8939, "step": 50500 }, { "epoch": 2.47, "grad_norm": 0.7095093131065369, "learning_rate": 4.421901250079138e-05, "loss": 2.9274, "step": 50501 }, { "epoch": 2.47, "grad_norm": 0.732288658618927, "learning_rate": 4.4210967423914366e-05, "loss": 3.0009, "step": 50502 }, { "epoch": 2.48, "grad_norm": 0.7357195615768433, "learning_rate": 4.420292302073638e-05, "loss": 3.0076, "step": 50503 }, { "epoch": 2.48, "grad_norm": 0.6846479773521423, "learning_rate": 4.419487929127864e-05, "loss": 2.9429, "step": 50504 }, { "epoch": 2.48, "grad_norm": 0.7166612148284912, "learning_rate": 4.41868362355624e-05, "loss": 2.7909, "step": 50505 }, { "epoch": 2.48, "grad_norm": 0.7130656242370605, "learning_rate": 4.417879385360876e-05, "loss": 2.7799, "step": 50506 }, { "epoch": 2.48, "grad_norm": 0.7403449416160583, "learning_rate": 4.417075214543893e-05, "loss": 3.0642, "step": 50507 }, { "epoch": 2.48, "grad_norm": 0.7145542502403259, "learning_rate": 4.416271111107417e-05, "loss": 2.9387, "step": 50508 }, { "epoch": 2.48, "grad_norm": 0.7323039174079895, "learning_rate": 4.41546707505355e-05, "loss": 2.8822, "step": 50509 }, { "epoch": 2.48, "grad_norm": 0.7286449074745178, "learning_rate": 4.414663106384428e-05, "loss": 3.0583, "step": 50510 }, { "epoch": 2.48, "grad_norm": 0.773391842842102, "learning_rate": 4.4138592051021595e-05, "loss": 2.8229, "step": 50511 }, { "epoch": 2.48, "grad_norm": 0.7383174300193787, "learning_rate": 4.413055371208854e-05, "loss": 3.0271, "step": 50512 }, { "epoch": 2.48, "grad_norm": 0.7910652160644531, "learning_rate": 4.4122516047066457e-05, "loss": 2.7217, "step": 50513 }, { "epoch": 2.48, "grad_norm": 0.874261200428009, "learning_rate": 4.4114479055976316e-05, "loss": 3.0323, "step": 50514 }, { "epoch": 2.48, "grad_norm": 0.7225984930992126, "learning_rate": 4.41064427388395e-05, "loss": 3.0949, "step": 50515 }, { "epoch": 2.48, "grad_norm": 0.7266954779624939, "learning_rate": 4.4098407095676956e-05, "loss": 2.8574, "step": 50516 }, { "epoch": 2.48, "grad_norm": 0.7139513492584229, "learning_rate": 4.409037212651004e-05, "loss": 2.6369, "step": 50517 }, { "epoch": 2.48, "grad_norm": 0.7322560548782349, "learning_rate": 4.408233783135984e-05, "loss": 2.8167, "step": 50518 }, { "epoch": 2.48, "grad_norm": 0.7208927869796753, "learning_rate": 4.407430421024746e-05, "loss": 2.8963, "step": 50519 }, { "epoch": 2.48, "grad_norm": 0.7421464323997498, "learning_rate": 4.406627126319415e-05, "loss": 2.9859, "step": 50520 }, { "epoch": 2.48, "grad_norm": 0.7248895764350891, "learning_rate": 4.4058238990220975e-05, "loss": 2.9614, "step": 50521 }, { "epoch": 2.48, "grad_norm": 0.7308017611503601, "learning_rate": 4.405020739134914e-05, "loss": 2.8708, "step": 50522 }, { "epoch": 2.48, "grad_norm": 0.7517200708389282, "learning_rate": 4.404217646659985e-05, "loss": 2.8794, "step": 50523 }, { "epoch": 2.48, "grad_norm": 0.7376325726509094, "learning_rate": 4.403414621599421e-05, "loss": 2.8069, "step": 50524 }, { "epoch": 2.48, "grad_norm": 0.7713701128959656, "learning_rate": 4.4026116639553364e-05, "loss": 2.7789, "step": 50525 }, { "epoch": 2.48, "grad_norm": 0.7685970067977905, "learning_rate": 4.401808773729842e-05, "loss": 2.8286, "step": 50526 }, { "epoch": 2.48, "grad_norm": 0.753248393535614, "learning_rate": 4.401005950925054e-05, "loss": 2.903, "step": 50527 }, { "epoch": 2.48, "grad_norm": 0.7071647047996521, "learning_rate": 4.4002031955430994e-05, "loss": 2.7273, "step": 50528 }, { "epoch": 2.48, "grad_norm": 0.7305782437324524, "learning_rate": 4.399400507586074e-05, "loss": 3.0772, "step": 50529 }, { "epoch": 2.48, "grad_norm": 0.7409292459487915, "learning_rate": 4.398597887056109e-05, "loss": 2.8982, "step": 50530 }, { "epoch": 2.48, "grad_norm": 0.7586793303489685, "learning_rate": 4.397795333955303e-05, "loss": 3.0227, "step": 50531 }, { "epoch": 2.48, "grad_norm": 0.7480147480964661, "learning_rate": 4.396992848285781e-05, "loss": 2.5625, "step": 50532 }, { "epoch": 2.48, "grad_norm": 0.7570845484733582, "learning_rate": 4.3961904300496574e-05, "loss": 2.9181, "step": 50533 }, { "epoch": 2.48, "grad_norm": 0.7564980983734131, "learning_rate": 4.3953880792490285e-05, "loss": 2.9893, "step": 50534 }, { "epoch": 2.48, "grad_norm": 0.7372446060180664, "learning_rate": 4.394585795886031e-05, "loss": 2.7992, "step": 50535 }, { "epoch": 2.48, "grad_norm": 0.789084792137146, "learning_rate": 4.3937835799627577e-05, "loss": 2.991, "step": 50536 }, { "epoch": 2.48, "grad_norm": 0.7723736763000488, "learning_rate": 4.392981431481329e-05, "loss": 2.9124, "step": 50537 }, { "epoch": 2.48, "grad_norm": 0.7522666454315186, "learning_rate": 4.3921793504438685e-05, "loss": 2.9372, "step": 50538 }, { "epoch": 2.48, "grad_norm": 0.765546977519989, "learning_rate": 4.391377336852476e-05, "loss": 2.8857, "step": 50539 }, { "epoch": 2.48, "grad_norm": 0.8160582780838013, "learning_rate": 4.390575390709271e-05, "loss": 3.0308, "step": 50540 }, { "epoch": 2.48, "grad_norm": 0.7867605686187744, "learning_rate": 4.3897735120163544e-05, "loss": 2.9236, "step": 50541 }, { "epoch": 2.48, "grad_norm": 0.7485517263412476, "learning_rate": 4.3889717007758416e-05, "loss": 3.0409, "step": 50542 }, { "epoch": 2.48, "grad_norm": 0.7289761900901794, "learning_rate": 4.388169956989861e-05, "loss": 3.1161, "step": 50543 }, { "epoch": 2.48, "grad_norm": 0.7518919110298157, "learning_rate": 4.3873682806605004e-05, "loss": 2.7525, "step": 50544 }, { "epoch": 2.48, "grad_norm": 0.758308470249176, "learning_rate": 4.3865666717898915e-05, "loss": 2.7692, "step": 50545 }, { "epoch": 2.48, "grad_norm": 0.7173359394073486, "learning_rate": 4.385765130380137e-05, "loss": 2.7888, "step": 50546 }, { "epoch": 2.48, "grad_norm": 0.7279924154281616, "learning_rate": 4.384963656433337e-05, "loss": 2.8149, "step": 50547 }, { "epoch": 2.48, "grad_norm": 0.754176139831543, "learning_rate": 4.384162249951624e-05, "loss": 2.9872, "step": 50548 }, { "epoch": 2.48, "grad_norm": 0.7126150727272034, "learning_rate": 4.383360910937089e-05, "loss": 2.9256, "step": 50549 }, { "epoch": 2.48, "grad_norm": 0.7775080800056458, "learning_rate": 4.3825596393918586e-05, "loss": 2.9169, "step": 50550 }, { "epoch": 2.48, "grad_norm": 0.7692570686340332, "learning_rate": 4.381758435318029e-05, "loss": 2.8355, "step": 50551 }, { "epoch": 2.48, "grad_norm": 0.7409884929656982, "learning_rate": 4.380957298717717e-05, "loss": 2.9527, "step": 50552 }, { "epoch": 2.48, "grad_norm": 0.7356708645820618, "learning_rate": 4.3801562295930395e-05, "loss": 2.7726, "step": 50553 }, { "epoch": 2.48, "grad_norm": 0.7332990169525146, "learning_rate": 4.379355227946102e-05, "loss": 2.7626, "step": 50554 }, { "epoch": 2.48, "grad_norm": 0.7492383718490601, "learning_rate": 4.37855429377901e-05, "loss": 2.7641, "step": 50555 }, { "epoch": 2.48, "grad_norm": 0.7193925976753235, "learning_rate": 4.377753427093867e-05, "loss": 2.7948, "step": 50556 }, { "epoch": 2.48, "grad_norm": 0.7237381339073181, "learning_rate": 4.3769526278927935e-05, "loss": 2.8049, "step": 50557 }, { "epoch": 2.48, "grad_norm": 0.7913342118263245, "learning_rate": 4.376151896177903e-05, "loss": 2.9314, "step": 50558 }, { "epoch": 2.48, "grad_norm": 0.6835290789604187, "learning_rate": 4.3753512319512876e-05, "loss": 3.2759, "step": 50559 }, { "epoch": 2.48, "grad_norm": 0.7239914536476135, "learning_rate": 4.3745506352150715e-05, "loss": 2.7385, "step": 50560 }, { "epoch": 2.48, "grad_norm": 0.7153903841972351, "learning_rate": 4.373750105971361e-05, "loss": 3.0137, "step": 50561 }, { "epoch": 2.48, "grad_norm": 0.7405564785003662, "learning_rate": 4.37294964422225e-05, "loss": 2.8258, "step": 50562 }, { "epoch": 2.48, "grad_norm": 0.7112692594528198, "learning_rate": 4.3721492499698674e-05, "loss": 2.9587, "step": 50563 }, { "epoch": 2.48, "grad_norm": 0.7458570003509521, "learning_rate": 4.3713489232163014e-05, "loss": 2.8849, "step": 50564 }, { "epoch": 2.48, "grad_norm": 0.7017235159873962, "learning_rate": 4.37054866396368e-05, "loss": 2.9045, "step": 50565 }, { "epoch": 2.48, "grad_norm": 0.7555688619613647, "learning_rate": 4.3697484722140917e-05, "loss": 2.9832, "step": 50566 }, { "epoch": 2.48, "grad_norm": 0.7248188853263855, "learning_rate": 4.368948347969655e-05, "loss": 2.955, "step": 50567 }, { "epoch": 2.48, "grad_norm": 0.7523334622383118, "learning_rate": 4.368148291232492e-05, "loss": 2.9627, "step": 50568 }, { "epoch": 2.48, "grad_norm": 0.7347005009651184, "learning_rate": 4.3673483020046764e-05, "loss": 3.0475, "step": 50569 }, { "epoch": 2.48, "grad_norm": 0.7141953110694885, "learning_rate": 4.366548380288337e-05, "loss": 2.8616, "step": 50570 }, { "epoch": 2.48, "grad_norm": 0.7329511046409607, "learning_rate": 4.365748526085573e-05, "loss": 2.9409, "step": 50571 }, { "epoch": 2.48, "grad_norm": 0.7447880506515503, "learning_rate": 4.364948739398498e-05, "loss": 2.8136, "step": 50572 }, { "epoch": 2.48, "grad_norm": 0.7083245515823364, "learning_rate": 4.3641490202292086e-05, "loss": 2.616, "step": 50573 }, { "epoch": 2.48, "grad_norm": 0.7109681367874146, "learning_rate": 4.363349368579818e-05, "loss": 3.0928, "step": 50574 }, { "epoch": 2.48, "grad_norm": 0.8247482180595398, "learning_rate": 4.362549784452436e-05, "loss": 2.8209, "step": 50575 }, { "epoch": 2.48, "grad_norm": 0.7620562314987183, "learning_rate": 4.3617502678491666e-05, "loss": 3.0653, "step": 50576 }, { "epoch": 2.48, "grad_norm": 0.7446070313453674, "learning_rate": 4.360950818772112e-05, "loss": 2.795, "step": 50577 }, { "epoch": 2.48, "grad_norm": 0.7512825727462769, "learning_rate": 4.36015143722337e-05, "loss": 2.978, "step": 50578 }, { "epoch": 2.48, "grad_norm": 0.7243783473968506, "learning_rate": 4.3593521232050564e-05, "loss": 3.0133, "step": 50579 }, { "epoch": 2.48, "grad_norm": 0.7371456027030945, "learning_rate": 4.358552876719282e-05, "loss": 2.8551, "step": 50580 }, { "epoch": 2.48, "grad_norm": 0.7351969480514526, "learning_rate": 4.357753697768136e-05, "loss": 3.0309, "step": 50581 }, { "epoch": 2.48, "grad_norm": 0.7740240693092346, "learning_rate": 4.356954586353739e-05, "loss": 2.8982, "step": 50582 }, { "epoch": 2.48, "grad_norm": 0.7251624464988708, "learning_rate": 4.356155542478187e-05, "loss": 2.8026, "step": 50583 }, { "epoch": 2.48, "grad_norm": 0.734462559223175, "learning_rate": 4.3553565661435804e-05, "loss": 2.9494, "step": 50584 }, { "epoch": 2.48, "grad_norm": 0.7287883758544922, "learning_rate": 4.354557657352039e-05, "loss": 2.782, "step": 50585 }, { "epoch": 2.48, "grad_norm": 0.7678456902503967, "learning_rate": 4.3537588161056466e-05, "loss": 2.7814, "step": 50586 }, { "epoch": 2.48, "grad_norm": 0.722035825252533, "learning_rate": 4.352960042406526e-05, "loss": 2.8701, "step": 50587 }, { "epoch": 2.48, "grad_norm": 0.7033929824829102, "learning_rate": 4.352161336256768e-05, "loss": 2.9547, "step": 50588 }, { "epoch": 2.48, "grad_norm": 0.7072238326072693, "learning_rate": 4.351362697658477e-05, "loss": 2.9044, "step": 50589 }, { "epoch": 2.48, "grad_norm": 0.7303126454353333, "learning_rate": 4.350564126613769e-05, "loss": 2.7136, "step": 50590 }, { "epoch": 2.48, "grad_norm": 0.7483873963356018, "learning_rate": 4.349765623124739e-05, "loss": 2.8554, "step": 50591 }, { "epoch": 2.48, "grad_norm": 0.722395658493042, "learning_rate": 4.3489671871934915e-05, "loss": 2.8171, "step": 50592 }, { "epoch": 2.48, "grad_norm": 0.71732497215271, "learning_rate": 4.3481688188221185e-05, "loss": 2.9246, "step": 50593 }, { "epoch": 2.48, "grad_norm": 0.7207403182983398, "learning_rate": 4.347370518012731e-05, "loss": 3.0359, "step": 50594 }, { "epoch": 2.48, "grad_norm": 0.7453340291976929, "learning_rate": 4.346572284767442e-05, "loss": 2.9454, "step": 50595 }, { "epoch": 2.48, "grad_norm": 0.7322558164596558, "learning_rate": 4.345774119088339e-05, "loss": 3.0738, "step": 50596 }, { "epoch": 2.48, "grad_norm": 0.7502358555793762, "learning_rate": 4.344976020977533e-05, "loss": 2.9023, "step": 50597 }, { "epoch": 2.48, "grad_norm": 0.7384856939315796, "learning_rate": 4.3441779904371234e-05, "loss": 2.8681, "step": 50598 }, { "epoch": 2.48, "grad_norm": 0.7567711472511292, "learning_rate": 4.343380027469208e-05, "loss": 3.0825, "step": 50599 }, { "epoch": 2.48, "grad_norm": 0.7449147701263428, "learning_rate": 4.342582132075895e-05, "loss": 2.8255, "step": 50600 }, { "epoch": 2.48, "grad_norm": 0.7349591255187988, "learning_rate": 4.341784304259276e-05, "loss": 2.8695, "step": 50601 }, { "epoch": 2.48, "grad_norm": 0.7392820119857788, "learning_rate": 4.340986544021466e-05, "loss": 3.0477, "step": 50602 }, { "epoch": 2.48, "grad_norm": 0.7246425747871399, "learning_rate": 4.3401888513645545e-05, "loss": 2.7809, "step": 50603 }, { "epoch": 2.48, "grad_norm": 0.7254135608673096, "learning_rate": 4.339391226290655e-05, "loss": 2.8379, "step": 50604 }, { "epoch": 2.48, "grad_norm": 0.7831563353538513, "learning_rate": 4.3385936688018564e-05, "loss": 2.6841, "step": 50605 }, { "epoch": 2.48, "grad_norm": 0.7377023100852966, "learning_rate": 4.33779617890026e-05, "loss": 2.8574, "step": 50606 }, { "epoch": 2.48, "grad_norm": 0.7295764088630676, "learning_rate": 4.336998756587975e-05, "loss": 2.7794, "step": 50607 }, { "epoch": 2.48, "grad_norm": 0.7548102736473083, "learning_rate": 4.336201401867089e-05, "loss": 2.8338, "step": 50608 }, { "epoch": 2.48, "grad_norm": 0.7400689721107483, "learning_rate": 4.33540411473971e-05, "loss": 3.0272, "step": 50609 }, { "epoch": 2.48, "grad_norm": 0.7324529886245728, "learning_rate": 4.334606895207944e-05, "loss": 2.8919, "step": 50610 }, { "epoch": 2.48, "grad_norm": 0.7176007628440857, "learning_rate": 4.333809743273878e-05, "loss": 2.9282, "step": 50611 }, { "epoch": 2.48, "grad_norm": 0.7532137632369995, "learning_rate": 4.333012658939625e-05, "loss": 2.8807, "step": 50612 }, { "epoch": 2.48, "grad_norm": 0.7730857133865356, "learning_rate": 4.332215642207276e-05, "loss": 2.9552, "step": 50613 }, { "epoch": 2.48, "grad_norm": 0.7338061332702637, "learning_rate": 4.3314186930789226e-05, "loss": 2.8655, "step": 50614 }, { "epoch": 2.48, "grad_norm": 0.7536339163780212, "learning_rate": 4.33062181155668e-05, "loss": 2.9174, "step": 50615 }, { "epoch": 2.48, "grad_norm": 0.7680534720420837, "learning_rate": 4.3298249976426336e-05, "loss": 2.8697, "step": 50616 }, { "epoch": 2.48, "grad_norm": 0.7779417634010315, "learning_rate": 4.329028251338897e-05, "loss": 2.6885, "step": 50617 }, { "epoch": 2.48, "grad_norm": 0.7388231754302979, "learning_rate": 4.3282315726475505e-05, "loss": 2.8754, "step": 50618 }, { "epoch": 2.48, "grad_norm": 0.7245052456855774, "learning_rate": 4.327434961570707e-05, "loss": 2.9758, "step": 50619 }, { "epoch": 2.48, "grad_norm": 0.7409709692001343, "learning_rate": 4.3266384181104595e-05, "loss": 2.9113, "step": 50620 }, { "epoch": 2.48, "grad_norm": 0.7566462755203247, "learning_rate": 4.3258419422688995e-05, "loss": 3.0348, "step": 50621 }, { "epoch": 2.48, "grad_norm": 0.7550995349884033, "learning_rate": 4.325045534048135e-05, "loss": 2.9427, "step": 50622 }, { "epoch": 2.48, "grad_norm": 0.6849644184112549, "learning_rate": 4.324249193450254e-05, "loss": 2.708, "step": 50623 }, { "epoch": 2.48, "grad_norm": 0.7459685206413269, "learning_rate": 4.3234529204773624e-05, "loss": 2.933, "step": 50624 }, { "epoch": 2.48, "grad_norm": 0.753688633441925, "learning_rate": 4.3226567151315585e-05, "loss": 2.771, "step": 50625 }, { "epoch": 2.48, "grad_norm": 0.7236641645431519, "learning_rate": 4.321860577414933e-05, "loss": 2.9226, "step": 50626 }, { "epoch": 2.48, "grad_norm": 0.7358285188674927, "learning_rate": 4.321064507329588e-05, "loss": 2.8284, "step": 50627 }, { "epoch": 2.48, "grad_norm": 0.769458532333374, "learning_rate": 4.3202685048776085e-05, "loss": 2.8737, "step": 50628 }, { "epoch": 2.48, "grad_norm": 0.721544086933136, "learning_rate": 4.3194725700611e-05, "loss": 2.9255, "step": 50629 }, { "epoch": 2.48, "grad_norm": 0.7584481239318848, "learning_rate": 4.3186767028821666e-05, "loss": 2.8509, "step": 50630 }, { "epoch": 2.48, "grad_norm": 0.8179458975791931, "learning_rate": 4.317880903342887e-05, "loss": 2.7925, "step": 50631 }, { "epoch": 2.48, "grad_norm": 0.7520755529403687, "learning_rate": 4.317085171445377e-05, "loss": 2.8761, "step": 50632 }, { "epoch": 2.48, "grad_norm": 0.7519469261169434, "learning_rate": 4.31628950719171e-05, "loss": 2.8604, "step": 50633 }, { "epoch": 2.48, "grad_norm": 0.7431280016899109, "learning_rate": 4.3154939105840056e-05, "loss": 2.6998, "step": 50634 }, { "epoch": 2.48, "grad_norm": 0.7596693634986877, "learning_rate": 4.314698381624342e-05, "loss": 3.019, "step": 50635 }, { "epoch": 2.48, "grad_norm": 0.6919360756874084, "learning_rate": 4.313902920314817e-05, "loss": 2.8299, "step": 50636 }, { "epoch": 2.48, "grad_norm": 0.7272117733955383, "learning_rate": 4.313107526657532e-05, "loss": 3.0207, "step": 50637 }, { "epoch": 2.48, "grad_norm": 0.7418824434280396, "learning_rate": 4.312312200654573e-05, "loss": 2.9491, "step": 50638 }, { "epoch": 2.48, "grad_norm": 0.759490966796875, "learning_rate": 4.3115169423080406e-05, "loss": 2.7287, "step": 50639 }, { "epoch": 2.48, "grad_norm": 0.7105665802955627, "learning_rate": 4.310721751620034e-05, "loss": 2.8817, "step": 50640 }, { "epoch": 2.48, "grad_norm": 0.6908241510391235, "learning_rate": 4.309926628592645e-05, "loss": 2.9844, "step": 50641 }, { "epoch": 2.48, "grad_norm": 0.7713239789009094, "learning_rate": 4.309131573227964e-05, "loss": 2.9163, "step": 50642 }, { "epoch": 2.48, "grad_norm": 0.7205983400344849, "learning_rate": 4.308336585528077e-05, "loss": 2.8435, "step": 50643 }, { "epoch": 2.48, "grad_norm": 0.7385979294776917, "learning_rate": 4.307541665495089e-05, "loss": 2.8786, "step": 50644 }, { "epoch": 2.48, "grad_norm": 0.753772497177124, "learning_rate": 4.306746813131098e-05, "loss": 2.8506, "step": 50645 }, { "epoch": 2.48, "grad_norm": 0.7099587321281433, "learning_rate": 4.305952028438186e-05, "loss": 2.9241, "step": 50646 }, { "epoch": 2.48, "grad_norm": 0.7350571155548096, "learning_rate": 4.305157311418457e-05, "loss": 3.0212, "step": 50647 }, { "epoch": 2.48, "grad_norm": 0.7650042176246643, "learning_rate": 4.304362662073997e-05, "loss": 3.1281, "step": 50648 }, { "epoch": 2.48, "grad_norm": 0.761756181716919, "learning_rate": 4.303568080406901e-05, "loss": 2.9067, "step": 50649 }, { "epoch": 2.48, "grad_norm": 0.7702816724777222, "learning_rate": 4.302773566419256e-05, "loss": 3.1063, "step": 50650 }, { "epoch": 2.48, "grad_norm": 0.7798253893852234, "learning_rate": 4.301979120113159e-05, "loss": 2.8491, "step": 50651 }, { "epoch": 2.48, "grad_norm": 0.7569634318351746, "learning_rate": 4.301184741490711e-05, "loss": 2.9733, "step": 50652 }, { "epoch": 2.48, "grad_norm": 0.7624911665916443, "learning_rate": 4.3003904305539896e-05, "loss": 3.0518, "step": 50653 }, { "epoch": 2.48, "grad_norm": 0.7225568294525146, "learning_rate": 4.2995961873051e-05, "loss": 2.8497, "step": 50654 }, { "epoch": 2.48, "grad_norm": 0.6899399757385254, "learning_rate": 4.298802011746121e-05, "loss": 2.7573, "step": 50655 }, { "epoch": 2.48, "grad_norm": 0.7465593218803406, "learning_rate": 4.298007903879157e-05, "loss": 2.9092, "step": 50656 }, { "epoch": 2.48, "grad_norm": 0.7861872315406799, "learning_rate": 4.297213863706294e-05, "loss": 2.9574, "step": 50657 }, { "epoch": 2.48, "grad_norm": 0.7941358685493469, "learning_rate": 4.296419891229619e-05, "loss": 3.0628, "step": 50658 }, { "epoch": 2.48, "grad_norm": 0.7803980112075806, "learning_rate": 4.295625986451231e-05, "loss": 2.8888, "step": 50659 }, { "epoch": 2.48, "grad_norm": 0.7576730847358704, "learning_rate": 4.294832149373212e-05, "loss": 2.9702, "step": 50660 }, { "epoch": 2.48, "grad_norm": 0.6931632161140442, "learning_rate": 4.294038379997656e-05, "loss": 2.9009, "step": 50661 }, { "epoch": 2.48, "grad_norm": 0.7237907648086548, "learning_rate": 4.293244678326666e-05, "loss": 3.0382, "step": 50662 }, { "epoch": 2.48, "grad_norm": 0.7643592953681946, "learning_rate": 4.292451044362319e-05, "loss": 3.0474, "step": 50663 }, { "epoch": 2.48, "grad_norm": 0.7706605195999146, "learning_rate": 4.2916574781067116e-05, "loss": 2.8414, "step": 50664 }, { "epoch": 2.48, "grad_norm": 0.7509461045265198, "learning_rate": 4.2908639795619204e-05, "loss": 2.8932, "step": 50665 }, { "epoch": 2.48, "grad_norm": 0.7358827590942383, "learning_rate": 4.290070548730048e-05, "loss": 2.8797, "step": 50666 }, { "epoch": 2.48, "grad_norm": 0.7475077509880066, "learning_rate": 4.289277185613189e-05, "loss": 2.8333, "step": 50667 }, { "epoch": 2.48, "grad_norm": 0.7222772240638733, "learning_rate": 4.288483890213416e-05, "loss": 2.7275, "step": 50668 }, { "epoch": 2.48, "grad_norm": 0.7573405504226685, "learning_rate": 4.287690662532839e-05, "loss": 2.9758, "step": 50669 }, { "epoch": 2.48, "grad_norm": 0.7598530054092407, "learning_rate": 4.286897502573534e-05, "loss": 3.048, "step": 50670 }, { "epoch": 2.48, "grad_norm": 0.7809165716171265, "learning_rate": 4.286104410337586e-05, "loss": 3.0113, "step": 50671 }, { "epoch": 2.48, "grad_norm": 0.7310870885848999, "learning_rate": 4.2853113858270974e-05, "loss": 2.9604, "step": 50672 }, { "epoch": 2.48, "grad_norm": 0.7275199294090271, "learning_rate": 4.2845184290441415e-05, "loss": 2.9085, "step": 50673 }, { "epoch": 2.48, "grad_norm": 0.7422723174095154, "learning_rate": 4.283725539990821e-05, "loss": 2.789, "step": 50674 }, { "epoch": 2.48, "grad_norm": 0.7516480088233948, "learning_rate": 4.282932718669214e-05, "loss": 2.8959, "step": 50675 }, { "epoch": 2.48, "grad_norm": 0.7623327374458313, "learning_rate": 4.2821399650814126e-05, "loss": 2.8064, "step": 50676 }, { "epoch": 2.48, "grad_norm": 0.7669196724891663, "learning_rate": 4.281347279229511e-05, "loss": 2.8621, "step": 50677 }, { "epoch": 2.48, "grad_norm": 0.726617157459259, "learning_rate": 4.2805546611155916e-05, "loss": 2.616, "step": 50678 }, { "epoch": 2.48, "grad_norm": 0.7164513468742371, "learning_rate": 4.279762110741742e-05, "loss": 2.7553, "step": 50679 }, { "epoch": 2.48, "grad_norm": 0.7366183400154114, "learning_rate": 4.2789696281100417e-05, "loss": 3.0931, "step": 50680 }, { "epoch": 2.48, "grad_norm": 0.7067437171936035, "learning_rate": 4.278177213222584e-05, "loss": 2.9771, "step": 50681 }, { "epoch": 2.48, "grad_norm": 0.8154669404029846, "learning_rate": 4.2773848660814656e-05, "loss": 2.7634, "step": 50682 }, { "epoch": 2.48, "grad_norm": 0.7058912515640259, "learning_rate": 4.2765925866887574e-05, "loss": 2.9507, "step": 50683 }, { "epoch": 2.48, "grad_norm": 0.7300341129302979, "learning_rate": 4.275800375046561e-05, "loss": 3.0809, "step": 50684 }, { "epoch": 2.48, "grad_norm": 0.7966801524162292, "learning_rate": 4.275008231156954e-05, "loss": 2.898, "step": 50685 }, { "epoch": 2.48, "grad_norm": 0.7171053290367126, "learning_rate": 4.27421615502202e-05, "loss": 3.0457, "step": 50686 }, { "epoch": 2.48, "grad_norm": 0.7475465536117554, "learning_rate": 4.2734241466438554e-05, "loss": 2.9942, "step": 50687 }, { "epoch": 2.48, "grad_norm": 0.738580584526062, "learning_rate": 4.272632206024533e-05, "loss": 3.0752, "step": 50688 }, { "epoch": 2.48, "grad_norm": 0.7871452569961548, "learning_rate": 4.2718403331661547e-05, "loss": 3.007, "step": 50689 }, { "epoch": 2.48, "grad_norm": 0.745241105556488, "learning_rate": 4.2710485280707873e-05, "loss": 2.8626, "step": 50690 }, { "epoch": 2.48, "grad_norm": 0.7506486773490906, "learning_rate": 4.2702567907405305e-05, "loss": 2.9813, "step": 50691 }, { "epoch": 2.48, "grad_norm": 0.7289913296699524, "learning_rate": 4.269465121177469e-05, "loss": 2.7847, "step": 50692 }, { "epoch": 2.48, "grad_norm": 0.7110291123390198, "learning_rate": 4.268673519383687e-05, "loss": 3.036, "step": 50693 }, { "epoch": 2.48, "grad_norm": 0.773071825504303, "learning_rate": 4.2678819853612646e-05, "loss": 2.926, "step": 50694 }, { "epoch": 2.48, "grad_norm": 0.7590256333351135, "learning_rate": 4.267090519112285e-05, "loss": 2.9054, "step": 50695 }, { "epoch": 2.48, "grad_norm": 0.7588886022567749, "learning_rate": 4.266299120638834e-05, "loss": 2.8203, "step": 50696 }, { "epoch": 2.48, "grad_norm": 0.7508629560470581, "learning_rate": 4.265507789943007e-05, "loss": 2.8658, "step": 50697 }, { "epoch": 2.48, "grad_norm": 0.7774369120597839, "learning_rate": 4.264716527026871e-05, "loss": 3.0411, "step": 50698 }, { "epoch": 2.48, "grad_norm": 0.7264341115951538, "learning_rate": 4.263925331892526e-05, "loss": 2.9546, "step": 50699 }, { "epoch": 2.48, "grad_norm": 0.7322808504104614, "learning_rate": 4.2631342045420523e-05, "loss": 2.8151, "step": 50700 }, { "epoch": 2.48, "grad_norm": 0.7444933652877808, "learning_rate": 4.2623431449775194e-05, "loss": 2.7928, "step": 50701 }, { "epoch": 2.48, "grad_norm": 0.7420282959938049, "learning_rate": 4.261552153201028e-05, "loss": 2.8152, "step": 50702 }, { "epoch": 2.48, "grad_norm": 0.7798295021057129, "learning_rate": 4.2607612292146496e-05, "loss": 2.8232, "step": 50703 }, { "epoch": 2.48, "grad_norm": 0.7416540384292603, "learning_rate": 4.259970373020477e-05, "loss": 2.9673, "step": 50704 }, { "epoch": 2.48, "grad_norm": 0.6930776238441467, "learning_rate": 4.2591795846205844e-05, "loss": 2.8342, "step": 50705 }, { "epoch": 2.48, "grad_norm": 0.7511429786682129, "learning_rate": 4.2583888640170674e-05, "loss": 2.8983, "step": 50706 }, { "epoch": 2.49, "grad_norm": 0.7975016236305237, "learning_rate": 4.257598211211997e-05, "loss": 2.861, "step": 50707 }, { "epoch": 2.49, "grad_norm": 0.7396607995033264, "learning_rate": 4.256807626207451e-05, "loss": 2.6752, "step": 50708 }, { "epoch": 2.49, "grad_norm": 0.7415522336959839, "learning_rate": 4.256017109005526e-05, "loss": 2.7366, "step": 50709 }, { "epoch": 2.49, "grad_norm": 0.7048690915107727, "learning_rate": 4.2552266596082926e-05, "loss": 2.9888, "step": 50710 }, { "epoch": 2.49, "grad_norm": 0.787742555141449, "learning_rate": 4.2544362780178366e-05, "loss": 2.7388, "step": 50711 }, { "epoch": 2.49, "grad_norm": 0.8210556507110596, "learning_rate": 4.253645964236249e-05, "loss": 2.6602, "step": 50712 }, { "epoch": 2.49, "grad_norm": 0.7322614789009094, "learning_rate": 4.252855718265594e-05, "loss": 2.8288, "step": 50713 }, { "epoch": 2.49, "grad_norm": 0.7463746070861816, "learning_rate": 4.252065540107972e-05, "loss": 2.9142, "step": 50714 }, { "epoch": 2.49, "grad_norm": 0.7110394835472107, "learning_rate": 4.25127542976545e-05, "loss": 2.7829, "step": 50715 }, { "epoch": 2.49, "grad_norm": 0.7307591438293457, "learning_rate": 4.250485387240109e-05, "loss": 3.1279, "step": 50716 }, { "epoch": 2.49, "grad_norm": 0.7428984045982361, "learning_rate": 4.249695412534039e-05, "loss": 2.9116, "step": 50717 }, { "epoch": 2.49, "grad_norm": 0.7223466634750366, "learning_rate": 4.24890550564931e-05, "loss": 2.9612, "step": 50718 }, { "epoch": 2.49, "grad_norm": 0.7388598322868347, "learning_rate": 4.2481156665880154e-05, "loss": 3.0812, "step": 50719 }, { "epoch": 2.49, "grad_norm": 0.7969038486480713, "learning_rate": 4.247325895352219e-05, "loss": 2.5446, "step": 50720 }, { "epoch": 2.49, "grad_norm": 0.7594483494758606, "learning_rate": 4.2465361919440165e-05, "loss": 2.7923, "step": 50721 }, { "epoch": 2.49, "grad_norm": 0.7183117270469666, "learning_rate": 4.2457465563654824e-05, "loss": 2.7408, "step": 50722 }, { "epoch": 2.49, "grad_norm": 0.7206244468688965, "learning_rate": 4.244956988618692e-05, "loss": 3.0161, "step": 50723 }, { "epoch": 2.49, "grad_norm": 0.781730592250824, "learning_rate": 4.24416748870573e-05, "loss": 2.9401, "step": 50724 }, { "epoch": 2.49, "grad_norm": 0.7326677441596985, "learning_rate": 4.243378056628668e-05, "loss": 2.9432, "step": 50725 }, { "epoch": 2.49, "grad_norm": 0.7561776041984558, "learning_rate": 4.242588692389595e-05, "loss": 2.7459, "step": 50726 }, { "epoch": 2.49, "grad_norm": 0.7506656646728516, "learning_rate": 4.241799395990588e-05, "loss": 2.7, "step": 50727 }, { "epoch": 2.49, "grad_norm": 0.7250350713729858, "learning_rate": 4.241010167433727e-05, "loss": 2.8735, "step": 50728 }, { "epoch": 2.49, "grad_norm": 0.7116358876228333, "learning_rate": 4.24022100672109e-05, "loss": 2.829, "step": 50729 }, { "epoch": 2.49, "grad_norm": 0.7642216682434082, "learning_rate": 4.239431913854744e-05, "loss": 2.8395, "step": 50730 }, { "epoch": 2.49, "grad_norm": 0.7662079334259033, "learning_rate": 4.238642888836782e-05, "loss": 2.9226, "step": 50731 }, { "epoch": 2.49, "grad_norm": 0.7560353875160217, "learning_rate": 4.237853931669272e-05, "loss": 2.8221, "step": 50732 }, { "epoch": 2.49, "grad_norm": 0.7635805010795593, "learning_rate": 4.237065042354295e-05, "loss": 2.9538, "step": 50733 }, { "epoch": 2.49, "grad_norm": 0.750133752822876, "learning_rate": 4.236276220893937e-05, "loss": 3.0837, "step": 50734 }, { "epoch": 2.49, "grad_norm": 0.7176488041877747, "learning_rate": 4.2354874672902664e-05, "loss": 2.914, "step": 50735 }, { "epoch": 2.49, "grad_norm": 0.7769431471824646, "learning_rate": 4.234698781545367e-05, "loss": 3.0444, "step": 50736 }, { "epoch": 2.49, "grad_norm": 0.7291898727416992, "learning_rate": 4.2339101636613116e-05, "loss": 2.7426, "step": 50737 }, { "epoch": 2.49, "grad_norm": 0.7305982112884521, "learning_rate": 4.2331216136401716e-05, "loss": 2.8365, "step": 50738 }, { "epoch": 2.49, "grad_norm": 0.7421076893806458, "learning_rate": 4.232333131484038e-05, "loss": 2.9467, "step": 50739 }, { "epoch": 2.49, "grad_norm": 0.7870148420333862, "learning_rate": 4.2315447171949735e-05, "loss": 2.9623, "step": 50740 }, { "epoch": 2.49, "grad_norm": 0.7365769147872925, "learning_rate": 4.230756370775066e-05, "loss": 3.0139, "step": 50741 }, { "epoch": 2.49, "grad_norm": 0.7033193707466125, "learning_rate": 4.2299680922263835e-05, "loss": 2.8967, "step": 50742 }, { "epoch": 2.49, "grad_norm": 0.7226850390434265, "learning_rate": 4.229179881551008e-05, "loss": 2.9138, "step": 50743 }, { "epoch": 2.49, "grad_norm": 0.763777494430542, "learning_rate": 4.228391738751015e-05, "loss": 2.7401, "step": 50744 }, { "epoch": 2.49, "grad_norm": 0.7105336785316467, "learning_rate": 4.227603663828472e-05, "loss": 3.0466, "step": 50745 }, { "epoch": 2.49, "grad_norm": 0.7253413200378418, "learning_rate": 4.226815656785467e-05, "loss": 2.976, "step": 50746 }, { "epoch": 2.49, "grad_norm": 0.7211529612541199, "learning_rate": 4.226027717624063e-05, "loss": 2.8548, "step": 50747 }, { "epoch": 2.49, "grad_norm": 0.7225125432014465, "learning_rate": 4.225239846346341e-05, "loss": 2.841, "step": 50748 }, { "epoch": 2.49, "grad_norm": 0.7354315519332886, "learning_rate": 4.224452042954386e-05, "loss": 2.6947, "step": 50749 }, { "epoch": 2.49, "grad_norm": 0.7653220891952515, "learning_rate": 4.223664307450263e-05, "loss": 2.9488, "step": 50750 }, { "epoch": 2.49, "grad_norm": 0.7226547598838806, "learning_rate": 4.2228766398360434e-05, "loss": 2.9566, "step": 50751 }, { "epoch": 2.49, "grad_norm": 0.7502910494804382, "learning_rate": 4.222089040113803e-05, "loss": 2.8887, "step": 50752 }, { "epoch": 2.49, "grad_norm": 0.7303741574287415, "learning_rate": 4.221301508285619e-05, "loss": 3.1261, "step": 50753 }, { "epoch": 2.49, "grad_norm": 0.7288039326667786, "learning_rate": 4.2205140443535714e-05, "loss": 2.8225, "step": 50754 }, { "epoch": 2.49, "grad_norm": 0.7314246296882629, "learning_rate": 4.21972664831972e-05, "loss": 2.9364, "step": 50755 }, { "epoch": 2.49, "grad_norm": 0.7128660678863525, "learning_rate": 4.218939320186158e-05, "loss": 2.7165, "step": 50756 }, { "epoch": 2.49, "grad_norm": 0.7295336127281189, "learning_rate": 4.218152059954939e-05, "loss": 2.7854, "step": 50757 }, { "epoch": 2.49, "grad_norm": 0.7478935122489929, "learning_rate": 4.2173648676281524e-05, "loss": 2.8468, "step": 50758 }, { "epoch": 2.49, "grad_norm": 0.7409915328025818, "learning_rate": 4.216577743207863e-05, "loss": 2.678, "step": 50759 }, { "epoch": 2.49, "grad_norm": 0.742350161075592, "learning_rate": 4.2157906866961425e-05, "loss": 2.8212, "step": 50760 }, { "epoch": 2.49, "grad_norm": 0.7968260645866394, "learning_rate": 4.215003698095072e-05, "loss": 2.9907, "step": 50761 }, { "epoch": 2.49, "grad_norm": 0.712583601474762, "learning_rate": 4.2142167774067104e-05, "loss": 3.0309, "step": 50762 }, { "epoch": 2.49, "grad_norm": 0.7087548971176147, "learning_rate": 4.213429924633143e-05, "loss": 2.7685, "step": 50763 }, { "epoch": 2.49, "grad_norm": 0.7460187673568726, "learning_rate": 4.212643139776444e-05, "loss": 2.9495, "step": 50764 }, { "epoch": 2.49, "grad_norm": 0.8229163885116577, "learning_rate": 4.211856422838679e-05, "loss": 2.9066, "step": 50765 }, { "epoch": 2.49, "grad_norm": 0.7447080612182617, "learning_rate": 4.211069773821922e-05, "loss": 3.06, "step": 50766 }, { "epoch": 2.49, "grad_norm": 0.712235689163208, "learning_rate": 4.2102831927282364e-05, "loss": 2.8577, "step": 50767 }, { "epoch": 2.49, "grad_norm": 0.7422438859939575, "learning_rate": 4.209496679559703e-05, "loss": 2.9714, "step": 50768 }, { "epoch": 2.49, "grad_norm": 0.7196982502937317, "learning_rate": 4.2087102343183995e-05, "loss": 2.5705, "step": 50769 }, { "epoch": 2.49, "grad_norm": 0.7336692214012146, "learning_rate": 4.207923857006379e-05, "loss": 2.9921, "step": 50770 }, { "epoch": 2.49, "grad_norm": 0.8158669471740723, "learning_rate": 4.2071375476257316e-05, "loss": 2.8008, "step": 50771 }, { "epoch": 2.49, "grad_norm": 0.7556344270706177, "learning_rate": 4.2063513061785236e-05, "loss": 2.9787, "step": 50772 }, { "epoch": 2.49, "grad_norm": 0.7272917628288269, "learning_rate": 4.2055651326668126e-05, "loss": 2.8565, "step": 50773 }, { "epoch": 2.49, "grad_norm": 0.7603312730789185, "learning_rate": 4.204779027092684e-05, "loss": 2.9942, "step": 50774 }, { "epoch": 2.49, "grad_norm": 0.728800892829895, "learning_rate": 4.203992989458199e-05, "loss": 2.9436, "step": 50775 }, { "epoch": 2.49, "grad_norm": 0.7117544412612915, "learning_rate": 4.2032070197654364e-05, "loss": 2.9086, "step": 50776 }, { "epoch": 2.49, "grad_norm": 0.7658325433731079, "learning_rate": 4.2024211180164545e-05, "loss": 2.9249, "step": 50777 }, { "epoch": 2.49, "grad_norm": 0.6998578906059265, "learning_rate": 4.201635284213335e-05, "loss": 2.9473, "step": 50778 }, { "epoch": 2.49, "grad_norm": 0.7814579606056213, "learning_rate": 4.200849518358146e-05, "loss": 2.9595, "step": 50779 }, { "epoch": 2.49, "grad_norm": 0.7035899758338928, "learning_rate": 4.200063820452956e-05, "loss": 2.8746, "step": 50780 }, { "epoch": 2.49, "grad_norm": 0.7148592472076416, "learning_rate": 4.1992781904998295e-05, "loss": 2.9529, "step": 50781 }, { "epoch": 2.49, "grad_norm": 0.7354755401611328, "learning_rate": 4.1984926285008355e-05, "loss": 2.8767, "step": 50782 }, { "epoch": 2.49, "grad_norm": 0.723908543586731, "learning_rate": 4.197707134458046e-05, "loss": 2.9528, "step": 50783 }, { "epoch": 2.49, "grad_norm": 0.7492967844009399, "learning_rate": 4.196921708373535e-05, "loss": 3.0366, "step": 50784 }, { "epoch": 2.49, "grad_norm": 0.7645527720451355, "learning_rate": 4.1961363502493615e-05, "loss": 2.9825, "step": 50785 }, { "epoch": 2.49, "grad_norm": 0.7469053268432617, "learning_rate": 4.1953510600876036e-05, "loss": 2.858, "step": 50786 }, { "epoch": 2.49, "grad_norm": 0.7710843682289124, "learning_rate": 4.194565837890327e-05, "loss": 2.8289, "step": 50787 }, { "epoch": 2.49, "grad_norm": 0.7673295140266418, "learning_rate": 4.193780683659589e-05, "loss": 3.0426, "step": 50788 }, { "epoch": 2.49, "grad_norm": 0.7491339445114136, "learning_rate": 4.192995597397475e-05, "loss": 3.077, "step": 50789 }, { "epoch": 2.49, "grad_norm": 0.756820023059845, "learning_rate": 4.192210579106037e-05, "loss": 3.0091, "step": 50790 }, { "epoch": 2.49, "grad_norm": 0.7435303926467896, "learning_rate": 4.1914256287873525e-05, "loss": 2.8404, "step": 50791 }, { "epoch": 2.49, "grad_norm": 0.7386417388916016, "learning_rate": 4.190640746443484e-05, "loss": 2.8662, "step": 50792 }, { "epoch": 2.49, "grad_norm": 0.7664363980293274, "learning_rate": 4.189855932076497e-05, "loss": 3.0039, "step": 50793 }, { "epoch": 2.49, "grad_norm": 0.7032740116119385, "learning_rate": 4.1890711856884786e-05, "loss": 2.7883, "step": 50794 }, { "epoch": 2.49, "grad_norm": 0.7009146809577942, "learning_rate": 4.188286507281464e-05, "loss": 2.8372, "step": 50795 }, { "epoch": 2.49, "grad_norm": 0.7520615458488464, "learning_rate": 4.187501896857542e-05, "loss": 2.9782, "step": 50796 }, { "epoch": 2.49, "grad_norm": 0.726274847984314, "learning_rate": 4.1867173544187685e-05, "loss": 2.7904, "step": 50797 }, { "epoch": 2.49, "grad_norm": 0.7836074233055115, "learning_rate": 4.18593287996721e-05, "loss": 3.0073, "step": 50798 }, { "epoch": 2.49, "grad_norm": 0.7328587770462036, "learning_rate": 4.1851484735049425e-05, "loss": 2.7654, "step": 50799 }, { "epoch": 2.49, "grad_norm": 0.7041208744049072, "learning_rate": 4.1843641350340205e-05, "loss": 3.1183, "step": 50800 }, { "epoch": 2.49, "grad_norm": 0.7885406017303467, "learning_rate": 4.183579864556523e-05, "loss": 3.0173, "step": 50801 }, { "epoch": 2.49, "grad_norm": 0.7367026805877686, "learning_rate": 4.182795662074505e-05, "loss": 2.8478, "step": 50802 }, { "epoch": 2.49, "grad_norm": 0.7012369632720947, "learning_rate": 4.182011527590031e-05, "loss": 2.9023, "step": 50803 }, { "epoch": 2.49, "grad_norm": 0.8011801838874817, "learning_rate": 4.1812274611051765e-05, "loss": 2.8189, "step": 50804 }, { "epoch": 2.49, "grad_norm": 0.7242926359176636, "learning_rate": 4.1804434626219896e-05, "loss": 2.8217, "step": 50805 }, { "epoch": 2.49, "grad_norm": 0.6962056159973145, "learning_rate": 4.179659532142555e-05, "loss": 2.7622, "step": 50806 }, { "epoch": 2.49, "grad_norm": 0.728546142578125, "learning_rate": 4.1788756696689216e-05, "loss": 3.0023, "step": 50807 }, { "epoch": 2.49, "grad_norm": 0.7366439700126648, "learning_rate": 4.1780918752031644e-05, "loss": 2.9501, "step": 50808 }, { "epoch": 2.49, "grad_norm": 0.7231490612030029, "learning_rate": 4.177308148747345e-05, "loss": 2.9292, "step": 50809 }, { "epoch": 2.49, "grad_norm": 0.7084015011787415, "learning_rate": 4.176524490303521e-05, "loss": 2.782, "step": 50810 }, { "epoch": 2.49, "grad_norm": 0.7416843175888062, "learning_rate": 4.175740899873768e-05, "loss": 2.898, "step": 50811 }, { "epoch": 2.49, "grad_norm": 0.7785187363624573, "learning_rate": 4.1749573774601354e-05, "loss": 3.0888, "step": 50812 }, { "epoch": 2.49, "grad_norm": 0.7746663689613342, "learning_rate": 4.174173923064703e-05, "loss": 2.8559, "step": 50813 }, { "epoch": 2.49, "grad_norm": 0.7415159344673157, "learning_rate": 4.173390536689517e-05, "loss": 2.9723, "step": 50814 }, { "epoch": 2.49, "grad_norm": 1.2300888299942017, "learning_rate": 4.172607218336652e-05, "loss": 2.9255, "step": 50815 }, { "epoch": 2.49, "grad_norm": 0.7681926488876343, "learning_rate": 4.171823968008177e-05, "loss": 2.791, "step": 50816 }, { "epoch": 2.49, "grad_norm": 0.7152865529060364, "learning_rate": 4.171040785706146e-05, "loss": 2.6684, "step": 50817 }, { "epoch": 2.49, "grad_norm": 0.7356628179550171, "learning_rate": 4.1702576714326205e-05, "loss": 2.873, "step": 50818 }, { "epoch": 2.49, "grad_norm": 0.7573530673980713, "learning_rate": 4.1694746251896606e-05, "loss": 2.8615, "step": 50819 }, { "epoch": 2.49, "grad_norm": 0.7217455506324768, "learning_rate": 4.168691646979333e-05, "loss": 2.9256, "step": 50820 }, { "epoch": 2.49, "grad_norm": 0.7954509854316711, "learning_rate": 4.1679087368037066e-05, "loss": 2.9018, "step": 50821 }, { "epoch": 2.49, "grad_norm": 0.7794967293739319, "learning_rate": 4.16712589466483e-05, "loss": 2.9567, "step": 50822 }, { "epoch": 2.49, "grad_norm": 0.719433605670929, "learning_rate": 4.166343120564781e-05, "loss": 2.7478, "step": 50823 }, { "epoch": 2.49, "grad_norm": 0.7607887387275696, "learning_rate": 4.1655604145056085e-05, "loss": 2.9903, "step": 50824 }, { "epoch": 2.49, "grad_norm": 0.7895941734313965, "learning_rate": 4.1647777764893745e-05, "loss": 2.8344, "step": 50825 }, { "epoch": 2.49, "grad_norm": 0.7224075794219971, "learning_rate": 4.163995206518149e-05, "loss": 3.0332, "step": 50826 }, { "epoch": 2.49, "grad_norm": 0.7128440737724304, "learning_rate": 4.1632127045939826e-05, "loss": 2.6762, "step": 50827 }, { "epoch": 2.49, "grad_norm": 0.7446962594985962, "learning_rate": 4.162430270718948e-05, "loss": 2.9153, "step": 50828 }, { "epoch": 2.49, "grad_norm": 0.7283123731613159, "learning_rate": 4.161647904895092e-05, "loss": 2.8714, "step": 50829 }, { "epoch": 2.49, "grad_norm": 0.7481231093406677, "learning_rate": 4.1608656071244897e-05, "loss": 2.9547, "step": 50830 }, { "epoch": 2.49, "grad_norm": 0.7285983562469482, "learning_rate": 4.1600833774091945e-05, "loss": 2.8458, "step": 50831 }, { "epoch": 2.49, "grad_norm": 0.7572833299636841, "learning_rate": 4.159301215751259e-05, "loss": 2.9878, "step": 50832 }, { "epoch": 2.49, "grad_norm": 0.748000979423523, "learning_rate": 4.1585191221527595e-05, "loss": 2.9091, "step": 50833 }, { "epoch": 2.49, "grad_norm": 0.7040547132492065, "learning_rate": 4.157737096615742e-05, "loss": 2.8364, "step": 50834 }, { "epoch": 2.49, "grad_norm": 0.7504925727844238, "learning_rate": 4.1569551391422695e-05, "loss": 2.8602, "step": 50835 }, { "epoch": 2.49, "grad_norm": 0.7645800113677979, "learning_rate": 4.15617324973441e-05, "loss": 2.9762, "step": 50836 }, { "epoch": 2.49, "grad_norm": 0.7223212122917175, "learning_rate": 4.1553914283942094e-05, "loss": 3.0629, "step": 50837 }, { "epoch": 2.49, "grad_norm": 0.7501276135444641, "learning_rate": 4.154609675123741e-05, "loss": 2.8408, "step": 50838 }, { "epoch": 2.49, "grad_norm": 0.7350544929504395, "learning_rate": 4.153827989925055e-05, "loss": 2.9287, "step": 50839 }, { "epoch": 2.49, "grad_norm": 0.7663587331771851, "learning_rate": 4.153046372800208e-05, "loss": 2.9415, "step": 50840 }, { "epoch": 2.49, "grad_norm": 0.8215747475624084, "learning_rate": 4.1522648237512666e-05, "loss": 2.8608, "step": 50841 }, { "epoch": 2.49, "grad_norm": 0.7529155015945435, "learning_rate": 4.151483342780282e-05, "loss": 2.9685, "step": 50842 }, { "epoch": 2.49, "grad_norm": 0.7362189888954163, "learning_rate": 4.1507019298893186e-05, "loss": 2.8494, "step": 50843 }, { "epoch": 2.49, "grad_norm": 0.7676265835762024, "learning_rate": 4.149920585080429e-05, "loss": 2.8628, "step": 50844 }, { "epoch": 2.49, "grad_norm": 0.7176138758659363, "learning_rate": 4.1491393083556766e-05, "loss": 3.0375, "step": 50845 }, { "epoch": 2.49, "grad_norm": 0.7533155083656311, "learning_rate": 4.1483580997171186e-05, "loss": 2.8913, "step": 50846 }, { "epoch": 2.49, "grad_norm": 0.7027636170387268, "learning_rate": 4.147576959166802e-05, "loss": 2.9655, "step": 50847 }, { "epoch": 2.49, "grad_norm": 0.7691273093223572, "learning_rate": 4.146795886706798e-05, "loss": 2.9198, "step": 50848 }, { "epoch": 2.49, "grad_norm": 0.7978218793869019, "learning_rate": 4.146014882339154e-05, "loss": 2.9319, "step": 50849 }, { "epoch": 2.49, "grad_norm": 0.7753750085830688, "learning_rate": 4.145233946065929e-05, "loss": 2.8632, "step": 50850 }, { "epoch": 2.49, "grad_norm": 0.7168684601783752, "learning_rate": 4.144453077889189e-05, "loss": 3.0417, "step": 50851 }, { "epoch": 2.49, "grad_norm": 0.730107307434082, "learning_rate": 4.143672277810983e-05, "loss": 2.9331, "step": 50852 }, { "epoch": 2.49, "grad_norm": 0.7644616365432739, "learning_rate": 4.142891545833369e-05, "loss": 2.7204, "step": 50853 }, { "epoch": 2.49, "grad_norm": 0.7631624937057495, "learning_rate": 4.142110881958398e-05, "loss": 3.1044, "step": 50854 }, { "epoch": 2.49, "grad_norm": 0.7302795648574829, "learning_rate": 4.1413302861881256e-05, "loss": 2.8666, "step": 50855 }, { "epoch": 2.49, "grad_norm": 0.7175984382629395, "learning_rate": 4.140549758524624e-05, "loss": 2.8372, "step": 50856 }, { "epoch": 2.49, "grad_norm": 0.7876809239387512, "learning_rate": 4.139769298969927e-05, "loss": 2.9315, "step": 50857 }, { "epoch": 2.49, "grad_norm": 0.7984057068824768, "learning_rate": 4.1389889075261105e-05, "loss": 2.756, "step": 50858 }, { "epoch": 2.49, "grad_norm": 0.8823890686035156, "learning_rate": 4.138208584195213e-05, "loss": 2.8131, "step": 50859 }, { "epoch": 2.49, "grad_norm": 0.7620226144790649, "learning_rate": 4.137428328979302e-05, "loss": 3.0526, "step": 50860 }, { "epoch": 2.49, "grad_norm": 0.7054426670074463, "learning_rate": 4.1366481418804266e-05, "loss": 3.0109, "step": 50861 }, { "epoch": 2.49, "grad_norm": 0.7472173571586609, "learning_rate": 4.135868022900638e-05, "loss": 2.9188, "step": 50862 }, { "epoch": 2.49, "grad_norm": 0.7145726084709167, "learning_rate": 4.1350879720420014e-05, "loss": 2.9618, "step": 50863 }, { "epoch": 2.49, "grad_norm": 0.7612037062644958, "learning_rate": 4.1343079893065586e-05, "loss": 2.781, "step": 50864 }, { "epoch": 2.49, "grad_norm": 0.7213705778121948, "learning_rate": 4.133528074696372e-05, "loss": 2.8342, "step": 50865 }, { "epoch": 2.49, "grad_norm": 0.8010908365249634, "learning_rate": 4.132748228213498e-05, "loss": 2.7692, "step": 50866 }, { "epoch": 2.49, "grad_norm": 0.7797710299491882, "learning_rate": 4.1319684498599906e-05, "loss": 3.0265, "step": 50867 }, { "epoch": 2.49, "grad_norm": 0.7466559410095215, "learning_rate": 4.131188739637896e-05, "loss": 2.8454, "step": 50868 }, { "epoch": 2.49, "grad_norm": 0.7062391638755798, "learning_rate": 4.130409097549268e-05, "loss": 2.9389, "step": 50869 }, { "epoch": 2.49, "grad_norm": 0.7370114326477051, "learning_rate": 4.1296295235961627e-05, "loss": 2.7059, "step": 50870 }, { "epoch": 2.49, "grad_norm": 0.7448911070823669, "learning_rate": 4.128850017780641e-05, "loss": 3.0902, "step": 50871 }, { "epoch": 2.49, "grad_norm": 0.7794041633605957, "learning_rate": 4.128070580104742e-05, "loss": 3.0043, "step": 50872 }, { "epoch": 2.49, "grad_norm": 0.7436469197273254, "learning_rate": 4.127291210570536e-05, "loss": 2.7554, "step": 50873 }, { "epoch": 2.49, "grad_norm": 0.8578379154205322, "learning_rate": 4.12651190918006e-05, "loss": 2.766, "step": 50874 }, { "epoch": 2.49, "grad_norm": 0.7621721029281616, "learning_rate": 4.12573267593537e-05, "loss": 2.7681, "step": 50875 }, { "epoch": 2.49, "grad_norm": 0.6907230019569397, "learning_rate": 4.1249535108385246e-05, "loss": 2.9351, "step": 50876 }, { "epoch": 2.49, "grad_norm": 0.7239318490028381, "learning_rate": 4.1241744138915676e-05, "loss": 2.8717, "step": 50877 }, { "epoch": 2.49, "grad_norm": 0.7087956070899963, "learning_rate": 4.1233953850965616e-05, "loss": 2.7214, "step": 50878 }, { "epoch": 2.49, "grad_norm": 0.7738022804260254, "learning_rate": 4.122616424455545e-05, "loss": 2.937, "step": 50879 }, { "epoch": 2.49, "grad_norm": 0.7342925071716309, "learning_rate": 4.121837531970575e-05, "loss": 2.8623, "step": 50880 }, { "epoch": 2.49, "grad_norm": 0.7471789121627808, "learning_rate": 4.121058707643712e-05, "loss": 2.9401, "step": 50881 }, { "epoch": 2.49, "grad_norm": 0.7654462456703186, "learning_rate": 4.120279951476999e-05, "loss": 2.8794, "step": 50882 }, { "epoch": 2.49, "grad_norm": 0.6907823085784912, "learning_rate": 4.119501263472489e-05, "loss": 2.8218, "step": 50883 }, { "epoch": 2.49, "grad_norm": 0.7497276067733765, "learning_rate": 4.1187226436322265e-05, "loss": 2.9644, "step": 50884 }, { "epoch": 2.49, "grad_norm": 0.6869165301322937, "learning_rate": 4.117944091958273e-05, "loss": 3.0914, "step": 50885 }, { "epoch": 2.49, "grad_norm": 0.7467650175094604, "learning_rate": 4.117165608452667e-05, "loss": 2.7657, "step": 50886 }, { "epoch": 2.49, "grad_norm": 0.7433682084083557, "learning_rate": 4.1163871931174674e-05, "loss": 2.8362, "step": 50887 }, { "epoch": 2.49, "grad_norm": 0.7507684826850891, "learning_rate": 4.1156088459547254e-05, "loss": 2.918, "step": 50888 }, { "epoch": 2.49, "grad_norm": 0.7188553810119629, "learning_rate": 4.114830566966489e-05, "loss": 2.9672, "step": 50889 }, { "epoch": 2.49, "grad_norm": 0.7091402411460876, "learning_rate": 4.114052356154811e-05, "loss": 2.8625, "step": 50890 }, { "epoch": 2.49, "grad_norm": 0.7486855983734131, "learning_rate": 4.113274213521726e-05, "loss": 2.8445, "step": 50891 }, { "epoch": 2.49, "grad_norm": 0.7630000114440918, "learning_rate": 4.112496139069298e-05, "loss": 2.8836, "step": 50892 }, { "epoch": 2.49, "grad_norm": 0.8397104740142822, "learning_rate": 4.11171813279958e-05, "loss": 2.9828, "step": 50893 }, { "epoch": 2.49, "grad_norm": 0.7878146767616272, "learning_rate": 4.110940194714603e-05, "loss": 3.0389, "step": 50894 }, { "epoch": 2.49, "grad_norm": 0.744600236415863, "learning_rate": 4.1101623248164386e-05, "loss": 2.859, "step": 50895 }, { "epoch": 2.49, "grad_norm": 0.7375232577323914, "learning_rate": 4.109384523107122e-05, "loss": 3.0262, "step": 50896 }, { "epoch": 2.49, "grad_norm": 0.7445172071456909, "learning_rate": 4.108606789588695e-05, "loss": 2.8573, "step": 50897 }, { "epoch": 2.49, "grad_norm": 0.7916731834411621, "learning_rate": 4.107829124263227e-05, "loss": 2.5277, "step": 50898 }, { "epoch": 2.49, "grad_norm": 0.7740074396133423, "learning_rate": 4.107051527132741e-05, "loss": 2.8962, "step": 50899 }, { "epoch": 2.49, "grad_norm": 0.7415305376052856, "learning_rate": 4.10627399819931e-05, "loss": 3.0435, "step": 50900 }, { "epoch": 2.49, "grad_norm": 0.7645403146743774, "learning_rate": 4.105496537464958e-05, "loss": 3.0199, "step": 50901 }, { "epoch": 2.49, "grad_norm": 0.7634096741676331, "learning_rate": 4.104719144931749e-05, "loss": 2.9406, "step": 50902 }, { "epoch": 2.49, "grad_norm": 0.7687584161758423, "learning_rate": 4.103941820601731e-05, "loss": 2.9617, "step": 50903 }, { "epoch": 2.49, "grad_norm": 0.7485208511352539, "learning_rate": 4.1031645644769494e-05, "loss": 2.8768, "step": 50904 }, { "epoch": 2.49, "grad_norm": 0.7247251868247986, "learning_rate": 4.102387376559444e-05, "loss": 2.923, "step": 50905 }, { "epoch": 2.49, "grad_norm": 0.8152522444725037, "learning_rate": 4.101610256851263e-05, "loss": 2.8562, "step": 50906 }, { "epoch": 2.49, "grad_norm": 0.7078531384468079, "learning_rate": 4.100833205354452e-05, "loss": 3.0635, "step": 50907 }, { "epoch": 2.49, "grad_norm": 0.7330393195152283, "learning_rate": 4.100056222071073e-05, "loss": 2.6665, "step": 50908 }, { "epoch": 2.49, "grad_norm": 0.7819045186042786, "learning_rate": 4.099279307003154e-05, "loss": 3.0064, "step": 50909 }, { "epoch": 2.49, "grad_norm": 0.8102908730506897, "learning_rate": 4.098502460152753e-05, "loss": 3.0131, "step": 50910 }, { "epoch": 2.5, "grad_norm": 0.7458033561706543, "learning_rate": 4.097725681521912e-05, "loss": 2.8156, "step": 50911 }, { "epoch": 2.5, "grad_norm": 0.750956654548645, "learning_rate": 4.0969489711126726e-05, "loss": 3.1724, "step": 50912 }, { "epoch": 2.5, "grad_norm": 0.7348149418830872, "learning_rate": 4.09617232892709e-05, "loss": 2.8127, "step": 50913 }, { "epoch": 2.5, "grad_norm": 0.7550942897796631, "learning_rate": 4.095395754967196e-05, "loss": 2.8972, "step": 50914 }, { "epoch": 2.5, "grad_norm": 0.7148580551147461, "learning_rate": 4.0946192492350514e-05, "loss": 2.8255, "step": 50915 }, { "epoch": 2.5, "grad_norm": 0.7225046157836914, "learning_rate": 4.0938428117326895e-05, "loss": 2.7282, "step": 50916 }, { "epoch": 2.5, "grad_norm": 0.7666391134262085, "learning_rate": 4.093066442462157e-05, "loss": 2.7273, "step": 50917 }, { "epoch": 2.5, "grad_norm": 0.746823787689209, "learning_rate": 4.092290141425511e-05, "loss": 2.9481, "step": 50918 }, { "epoch": 2.5, "grad_norm": 0.7512428164482117, "learning_rate": 4.091513908624784e-05, "loss": 2.8239, "step": 50919 }, { "epoch": 2.5, "grad_norm": 0.7290512919425964, "learning_rate": 4.090737744062023e-05, "loss": 2.7134, "step": 50920 }, { "epoch": 2.5, "grad_norm": 0.7385398149490356, "learning_rate": 4.089961647739268e-05, "loss": 3.15, "step": 50921 }, { "epoch": 2.5, "grad_norm": 0.7204491496086121, "learning_rate": 4.089185619658566e-05, "loss": 2.7842, "step": 50922 }, { "epoch": 2.5, "grad_norm": 0.7641138434410095, "learning_rate": 4.088409659821971e-05, "loss": 2.9864, "step": 50923 }, { "epoch": 2.5, "grad_norm": 0.7317721843719482, "learning_rate": 4.087633768231511e-05, "loss": 2.883, "step": 50924 }, { "epoch": 2.5, "grad_norm": 0.7656816244125366, "learning_rate": 4.0868579448892415e-05, "loss": 2.813, "step": 50925 }, { "epoch": 2.5, "grad_norm": 0.7583822011947632, "learning_rate": 4.086082189797204e-05, "loss": 2.9214, "step": 50926 }, { "epoch": 2.5, "grad_norm": 0.7554618716239929, "learning_rate": 4.085306502957427e-05, "loss": 2.8041, "step": 50927 }, { "epoch": 2.5, "grad_norm": 0.7482122778892517, "learning_rate": 4.0845308843719756e-05, "loss": 2.8939, "step": 50928 }, { "epoch": 2.5, "grad_norm": 0.8274185657501221, "learning_rate": 4.0837553340428784e-05, "loss": 2.7364, "step": 50929 }, { "epoch": 2.5, "grad_norm": 0.7026564478874207, "learning_rate": 4.082979851972183e-05, "loss": 2.7929, "step": 50930 }, { "epoch": 2.5, "grad_norm": 0.8461477160453796, "learning_rate": 4.082204438161929e-05, "loss": 2.9104, "step": 50931 }, { "epoch": 2.5, "grad_norm": 0.7340132594108582, "learning_rate": 4.0814290926141636e-05, "loss": 2.814, "step": 50932 }, { "epoch": 2.5, "grad_norm": 0.7171696424484253, "learning_rate": 4.0806538153309284e-05, "loss": 2.979, "step": 50933 }, { "epoch": 2.5, "grad_norm": 0.7311767935752869, "learning_rate": 4.079878606314253e-05, "loss": 2.9879, "step": 50934 }, { "epoch": 2.5, "grad_norm": 0.7532653212547302, "learning_rate": 4.079103465566198e-05, "loss": 2.8224, "step": 50935 }, { "epoch": 2.5, "grad_norm": 0.7105627059936523, "learning_rate": 4.078328393088789e-05, "loss": 2.803, "step": 50936 }, { "epoch": 2.5, "grad_norm": 0.7665572166442871, "learning_rate": 4.077553388884074e-05, "loss": 3.1236, "step": 50937 }, { "epoch": 2.5, "grad_norm": 0.7285357713699341, "learning_rate": 4.076778452954101e-05, "loss": 2.8497, "step": 50938 }, { "epoch": 2.5, "grad_norm": 0.7463434338569641, "learning_rate": 4.0760035853009e-05, "loss": 2.943, "step": 50939 }, { "epoch": 2.5, "grad_norm": 0.7962722778320312, "learning_rate": 4.075228785926521e-05, "loss": 3.0252, "step": 50940 }, { "epoch": 2.5, "grad_norm": 0.7439616322517395, "learning_rate": 4.0744540548329996e-05, "loss": 2.759, "step": 50941 }, { "epoch": 2.5, "grad_norm": 0.7312211990356445, "learning_rate": 4.073679392022371e-05, "loss": 2.6245, "step": 50942 }, { "epoch": 2.5, "grad_norm": 0.7195311188697815, "learning_rate": 4.072904797496686e-05, "loss": 3.0517, "step": 50943 }, { "epoch": 2.5, "grad_norm": 0.6905331015586853, "learning_rate": 4.072130271257975e-05, "loss": 2.9166, "step": 50944 }, { "epoch": 2.5, "grad_norm": 0.7037015557289124, "learning_rate": 4.071355813308291e-05, "loss": 2.9205, "step": 50945 }, { "epoch": 2.5, "grad_norm": 0.7677614688873291, "learning_rate": 4.070581423649657e-05, "loss": 2.7968, "step": 50946 }, { "epoch": 2.5, "grad_norm": 0.7849667072296143, "learning_rate": 4.069807102284127e-05, "loss": 2.8588, "step": 50947 }, { "epoch": 2.5, "grad_norm": 0.7227040529251099, "learning_rate": 4.0690328492137366e-05, "loss": 2.7918, "step": 50948 }, { "epoch": 2.5, "grad_norm": 0.8007916212081909, "learning_rate": 4.068258664440518e-05, "loss": 3.1101, "step": 50949 }, { "epoch": 2.5, "grad_norm": 0.7214465141296387, "learning_rate": 4.0674845479665185e-05, "loss": 2.6496, "step": 50950 }, { "epoch": 2.5, "grad_norm": 0.7386294603347778, "learning_rate": 4.066710499793771e-05, "loss": 2.728, "step": 50951 }, { "epoch": 2.5, "grad_norm": 0.7470434904098511, "learning_rate": 4.065936519924315e-05, "loss": 2.9676, "step": 50952 }, { "epoch": 2.5, "grad_norm": 0.7561085820198059, "learning_rate": 4.065162608360198e-05, "loss": 2.8916, "step": 50953 }, { "epoch": 2.5, "grad_norm": 0.7485124468803406, "learning_rate": 4.0643887651034514e-05, "loss": 2.6446, "step": 50954 }, { "epoch": 2.5, "grad_norm": 0.7447924017906189, "learning_rate": 4.063614990156112e-05, "loss": 2.8693, "step": 50955 }, { "epoch": 2.5, "grad_norm": 0.729457437992096, "learning_rate": 4.062841283520213e-05, "loss": 2.7354, "step": 50956 }, { "epoch": 2.5, "grad_norm": 0.7371611595153809, "learning_rate": 4.062067645197802e-05, "loss": 3.0109, "step": 50957 }, { "epoch": 2.5, "grad_norm": 0.7636083960533142, "learning_rate": 4.061294075190914e-05, "loss": 2.6421, "step": 50958 }, { "epoch": 2.5, "grad_norm": 0.8881843090057373, "learning_rate": 4.060520573501581e-05, "loss": 2.8784, "step": 50959 }, { "epoch": 2.5, "grad_norm": 0.7290064692497253, "learning_rate": 4.059747140131855e-05, "loss": 3.0156, "step": 50960 }, { "epoch": 2.5, "grad_norm": 0.7382261157035828, "learning_rate": 4.0589737750837506e-05, "loss": 2.779, "step": 50961 }, { "epoch": 2.5, "grad_norm": 0.7628456354141235, "learning_rate": 4.058200478359326e-05, "loss": 2.9542, "step": 50962 }, { "epoch": 2.5, "grad_norm": 0.7667835354804993, "learning_rate": 4.05742724996061e-05, "loss": 3.0102, "step": 50963 }, { "epoch": 2.5, "grad_norm": 0.7994858026504517, "learning_rate": 4.0566540898896275e-05, "loss": 2.8177, "step": 50964 }, { "epoch": 2.5, "grad_norm": 0.7050232887268066, "learning_rate": 4.0558809981484364e-05, "loss": 2.9967, "step": 50965 }, { "epoch": 2.5, "grad_norm": 0.7449204325675964, "learning_rate": 4.055107974739053e-05, "loss": 2.8754, "step": 50966 }, { "epoch": 2.5, "grad_norm": 0.7724292874336243, "learning_rate": 4.054335019663527e-05, "loss": 2.6654, "step": 50967 }, { "epoch": 2.5, "grad_norm": 0.7795135378837585, "learning_rate": 4.0535621329238857e-05, "loss": 2.7718, "step": 50968 }, { "epoch": 2.5, "grad_norm": 0.6958447098731995, "learning_rate": 4.052789314522172e-05, "loss": 2.8641, "step": 50969 }, { "epoch": 2.5, "grad_norm": 0.7487135529518127, "learning_rate": 4.05201656446042e-05, "loss": 2.8888, "step": 50970 }, { "epoch": 2.5, "grad_norm": 0.7429445385932922, "learning_rate": 4.051243882740652e-05, "loss": 2.8184, "step": 50971 }, { "epoch": 2.5, "grad_norm": 0.7123727798461914, "learning_rate": 4.050471269364926e-05, "loss": 3.1401, "step": 50972 }, { "epoch": 2.5, "grad_norm": 0.7284667491912842, "learning_rate": 4.049698724335254e-05, "loss": 2.7846, "step": 50973 }, { "epoch": 2.5, "grad_norm": 0.7588906288146973, "learning_rate": 4.04892624765368e-05, "loss": 2.6116, "step": 50974 }, { "epoch": 2.5, "grad_norm": 0.7456479072570801, "learning_rate": 4.0481538393222516e-05, "loss": 2.9805, "step": 50975 }, { "epoch": 2.5, "grad_norm": 0.7160006761550903, "learning_rate": 4.047381499342986e-05, "loss": 2.7069, "step": 50976 }, { "epoch": 2.5, "grad_norm": 0.7530381083488464, "learning_rate": 4.0466092277179274e-05, "loss": 2.8844, "step": 50977 }, { "epoch": 2.5, "grad_norm": 0.7419160604476929, "learning_rate": 4.0458370244490954e-05, "loss": 2.5117, "step": 50978 }, { "epoch": 2.5, "grad_norm": 0.7657301425933838, "learning_rate": 4.045064889538536e-05, "loss": 2.5939, "step": 50979 }, { "epoch": 2.5, "grad_norm": 0.7660251259803772, "learning_rate": 4.044292822988284e-05, "loss": 2.7927, "step": 50980 }, { "epoch": 2.5, "grad_norm": 0.8668144941329956, "learning_rate": 4.043520824800368e-05, "loss": 2.9519, "step": 50981 }, { "epoch": 2.5, "grad_norm": 0.7728575468063354, "learning_rate": 4.042748894976826e-05, "loss": 3.0094, "step": 50982 }, { "epoch": 2.5, "grad_norm": 0.7155167460441589, "learning_rate": 4.04197703351968e-05, "loss": 2.6734, "step": 50983 }, { "epoch": 2.5, "grad_norm": 0.7189810276031494, "learning_rate": 4.041205240430981e-05, "loss": 2.9716, "step": 50984 }, { "epoch": 2.5, "grad_norm": 0.728173553943634, "learning_rate": 4.040433515712749e-05, "loss": 2.8533, "step": 50985 }, { "epoch": 2.5, "grad_norm": 0.7641592025756836, "learning_rate": 4.039661859367014e-05, "loss": 3.3096, "step": 50986 }, { "epoch": 2.5, "grad_norm": 0.7122716903686523, "learning_rate": 4.0388902713958184e-05, "loss": 2.7429, "step": 50987 }, { "epoch": 2.5, "grad_norm": 0.7600113153457642, "learning_rate": 4.038118751801184e-05, "loss": 2.947, "step": 50988 }, { "epoch": 2.5, "grad_norm": 0.7478308081626892, "learning_rate": 4.0373473005851454e-05, "loss": 2.8651, "step": 50989 }, { "epoch": 2.5, "grad_norm": 0.7460908889770508, "learning_rate": 4.036575917749748e-05, "loss": 2.9738, "step": 50990 }, { "epoch": 2.5, "grad_norm": 0.7135355472564697, "learning_rate": 4.03580460329701e-05, "loss": 2.7612, "step": 50991 }, { "epoch": 2.5, "grad_norm": 0.743721604347229, "learning_rate": 4.0350333572289675e-05, "loss": 2.7419, "step": 50992 }, { "epoch": 2.5, "grad_norm": 0.737909197807312, "learning_rate": 4.034262179547644e-05, "loss": 2.8635, "step": 50993 }, { "epoch": 2.5, "grad_norm": 0.7796975374221802, "learning_rate": 4.033491070255075e-05, "loss": 2.8157, "step": 50994 }, { "epoch": 2.5, "grad_norm": 0.7635996341705322, "learning_rate": 4.032720029353302e-05, "loss": 2.777, "step": 50995 }, { "epoch": 2.5, "grad_norm": 0.7595275640487671, "learning_rate": 4.031949056844338e-05, "loss": 2.8294, "step": 50996 }, { "epoch": 2.5, "grad_norm": 0.7468879222869873, "learning_rate": 4.03117815273023e-05, "loss": 2.8661, "step": 50997 }, { "epoch": 2.5, "grad_norm": 0.7194596529006958, "learning_rate": 4.0304073170130036e-05, "loss": 3.0833, "step": 50998 }, { "epoch": 2.5, "grad_norm": 0.7229351997375488, "learning_rate": 4.029636549694677e-05, "loss": 3.0329, "step": 50999 }, { "epoch": 2.5, "grad_norm": 0.7696372270584106, "learning_rate": 4.028865850777295e-05, "loss": 3.0217, "step": 51000 }, { "epoch": 2.5, "grad_norm": 0.730970561504364, "learning_rate": 4.028095220262877e-05, "loss": 2.8736, "step": 51001 }, { "epoch": 2.5, "grad_norm": 0.7224681973457336, "learning_rate": 4.0273246581534636e-05, "loss": 2.857, "step": 51002 }, { "epoch": 2.5, "grad_norm": 0.7172154188156128, "learning_rate": 4.026554164451073e-05, "loss": 2.9542, "step": 51003 }, { "epoch": 2.5, "grad_norm": 0.7283074259757996, "learning_rate": 4.025783739157738e-05, "loss": 2.8613, "step": 51004 }, { "epoch": 2.5, "grad_norm": 0.7916626930236816, "learning_rate": 4.0250133822755004e-05, "loss": 3.0261, "step": 51005 }, { "epoch": 2.5, "grad_norm": 0.751471757888794, "learning_rate": 4.0242430938063776e-05, "loss": 2.9181, "step": 51006 }, { "epoch": 2.5, "grad_norm": 0.7159141898155212, "learning_rate": 4.023472873752398e-05, "loss": 2.8968, "step": 51007 }, { "epoch": 2.5, "grad_norm": 0.769489049911499, "learning_rate": 4.022702722115584e-05, "loss": 2.9992, "step": 51008 }, { "epoch": 2.5, "grad_norm": 0.7926540970802307, "learning_rate": 4.0219326388979736e-05, "loss": 2.8284, "step": 51009 }, { "epoch": 2.5, "grad_norm": 0.7663864493370056, "learning_rate": 4.021162624101598e-05, "loss": 2.996, "step": 51010 }, { "epoch": 2.5, "grad_norm": 0.8369364142417908, "learning_rate": 4.020392677728473e-05, "loss": 2.7815, "step": 51011 }, { "epoch": 2.5, "grad_norm": 0.7207963466644287, "learning_rate": 4.019622799780644e-05, "loss": 3.0512, "step": 51012 }, { "epoch": 2.5, "grad_norm": 0.826167106628418, "learning_rate": 4.018852990260125e-05, "loss": 2.9079, "step": 51013 }, { "epoch": 2.5, "grad_norm": 0.7820410132408142, "learning_rate": 4.018083249168942e-05, "loss": 3.0379, "step": 51014 }, { "epoch": 2.5, "grad_norm": 0.7185248136520386, "learning_rate": 4.0173135765091325e-05, "loss": 2.8409, "step": 51015 }, { "epoch": 2.5, "grad_norm": 0.7619501948356628, "learning_rate": 4.016543972282715e-05, "loss": 2.7795, "step": 51016 }, { "epoch": 2.5, "grad_norm": 0.7843639254570007, "learning_rate": 4.0157744364917254e-05, "loss": 2.994, "step": 51017 }, { "epoch": 2.5, "grad_norm": 0.7171763777732849, "learning_rate": 4.015004969138178e-05, "loss": 2.8102, "step": 51018 }, { "epoch": 2.5, "grad_norm": 0.7367191910743713, "learning_rate": 4.014235570224105e-05, "loss": 2.9537, "step": 51019 }, { "epoch": 2.5, "grad_norm": 0.7253427505493164, "learning_rate": 4.013466239751544e-05, "loss": 2.9189, "step": 51020 }, { "epoch": 2.5, "grad_norm": 0.7575175166130066, "learning_rate": 4.0126969777225105e-05, "loss": 2.8425, "step": 51021 }, { "epoch": 2.5, "grad_norm": 0.7350121140480042, "learning_rate": 4.0119277841390296e-05, "loss": 2.9173, "step": 51022 }, { "epoch": 2.5, "grad_norm": 0.7644230723381042, "learning_rate": 4.0111586590031264e-05, "loss": 2.844, "step": 51023 }, { "epoch": 2.5, "grad_norm": 0.7468816041946411, "learning_rate": 4.010389602316829e-05, "loss": 3.0427, "step": 51024 }, { "epoch": 2.5, "grad_norm": 0.7907638549804688, "learning_rate": 4.0096206140821695e-05, "loss": 2.8312, "step": 51025 }, { "epoch": 2.5, "grad_norm": 0.7258133888244629, "learning_rate": 4.00885169430116e-05, "loss": 2.8525, "step": 51026 }, { "epoch": 2.5, "grad_norm": 0.7306505441665649, "learning_rate": 4.00808284297584e-05, "loss": 2.9856, "step": 51027 }, { "epoch": 2.5, "grad_norm": 0.7286311388015747, "learning_rate": 4.007314060108228e-05, "loss": 3.0388, "step": 51028 }, { "epoch": 2.5, "grad_norm": 0.7742034792900085, "learning_rate": 4.006545345700343e-05, "loss": 2.9805, "step": 51029 }, { "epoch": 2.5, "grad_norm": 0.7489369511604309, "learning_rate": 4.005776699754217e-05, "loss": 2.8488, "step": 51030 }, { "epoch": 2.5, "grad_norm": 0.7013357877731323, "learning_rate": 4.00500812227187e-05, "loss": 2.9688, "step": 51031 }, { "epoch": 2.5, "grad_norm": 0.7368605136871338, "learning_rate": 4.0042396132553355e-05, "loss": 2.7995, "step": 51032 }, { "epoch": 2.5, "grad_norm": 0.7846304178237915, "learning_rate": 4.003471172706623e-05, "loss": 2.9969, "step": 51033 }, { "epoch": 2.5, "grad_norm": 0.7510572075843811, "learning_rate": 4.00270280062777e-05, "loss": 2.8879, "step": 51034 }, { "epoch": 2.5, "grad_norm": 0.7406590580940247, "learning_rate": 4.0019344970207976e-05, "loss": 2.8439, "step": 51035 }, { "epoch": 2.5, "grad_norm": 0.7419261336326599, "learning_rate": 4.001166261887716e-05, "loss": 2.7751, "step": 51036 }, { "epoch": 2.5, "grad_norm": 0.7146857976913452, "learning_rate": 4.000398095230569e-05, "loss": 3.0476, "step": 51037 }, { "epoch": 2.5, "grad_norm": 0.7753526568412781, "learning_rate": 3.999629997051359e-05, "loss": 2.6755, "step": 51038 }, { "epoch": 2.5, "grad_norm": 0.7410012483596802, "learning_rate": 3.998861967352131e-05, "loss": 2.756, "step": 51039 }, { "epoch": 2.5, "grad_norm": 0.7166079878807068, "learning_rate": 3.998094006134885e-05, "loss": 2.9648, "step": 51040 }, { "epoch": 2.5, "grad_norm": 0.7411850094795227, "learning_rate": 3.997326113401659e-05, "loss": 2.8957, "step": 51041 }, { "epoch": 2.5, "grad_norm": 0.7261947989463806, "learning_rate": 3.996558289154479e-05, "loss": 2.9451, "step": 51042 }, { "epoch": 2.5, "grad_norm": 0.7302013635635376, "learning_rate": 3.9957905333953574e-05, "loss": 3.0935, "step": 51043 }, { "epoch": 2.5, "grad_norm": 0.7443523406982422, "learning_rate": 3.99502284612632e-05, "loss": 3.1287, "step": 51044 }, { "epoch": 2.5, "grad_norm": 0.7418161630630493, "learning_rate": 3.994255227349381e-05, "loss": 2.9928, "step": 51045 }, { "epoch": 2.5, "grad_norm": 0.7650214433670044, "learning_rate": 3.9934876770665694e-05, "loss": 2.4832, "step": 51046 }, { "epoch": 2.5, "grad_norm": 0.7540137767791748, "learning_rate": 3.9927201952799094e-05, "loss": 2.8556, "step": 51047 }, { "epoch": 2.5, "grad_norm": 0.7868987321853638, "learning_rate": 3.991952781991417e-05, "loss": 3.0147, "step": 51048 }, { "epoch": 2.5, "grad_norm": 0.6718930006027222, "learning_rate": 3.9911854372031196e-05, "loss": 2.7966, "step": 51049 }, { "epoch": 2.5, "grad_norm": 0.722568690776825, "learning_rate": 3.990418160917036e-05, "loss": 2.8758, "step": 51050 }, { "epoch": 2.5, "grad_norm": 0.7367124557495117, "learning_rate": 3.989650953135178e-05, "loss": 2.8975, "step": 51051 }, { "epoch": 2.5, "grad_norm": 0.7118638157844543, "learning_rate": 3.988883813859578e-05, "loss": 2.8865, "step": 51052 }, { "epoch": 2.5, "grad_norm": 0.7357069849967957, "learning_rate": 3.98811674309225e-05, "loss": 3.1935, "step": 51053 }, { "epoch": 2.5, "grad_norm": 0.7383196949958801, "learning_rate": 3.9873497408352194e-05, "loss": 3.1177, "step": 51054 }, { "epoch": 2.5, "grad_norm": 0.7383975982666016, "learning_rate": 3.986582807090498e-05, "loss": 2.8114, "step": 51055 }, { "epoch": 2.5, "grad_norm": 0.7209492325782776, "learning_rate": 3.9858159418601174e-05, "loss": 2.8342, "step": 51056 }, { "epoch": 2.5, "grad_norm": 0.7390090823173523, "learning_rate": 3.98504914514609e-05, "loss": 2.7422, "step": 51057 }, { "epoch": 2.5, "grad_norm": 0.7286245226860046, "learning_rate": 3.984282416950433e-05, "loss": 2.655, "step": 51058 }, { "epoch": 2.5, "grad_norm": 0.7232625484466553, "learning_rate": 3.983515757275172e-05, "loss": 3.0018, "step": 51059 }, { "epoch": 2.5, "grad_norm": 0.7462209463119507, "learning_rate": 3.9827491661223185e-05, "loss": 2.7941, "step": 51060 }, { "epoch": 2.5, "grad_norm": 0.7977537512779236, "learning_rate": 3.981982643493898e-05, "loss": 2.9345, "step": 51061 }, { "epoch": 2.5, "grad_norm": 0.7521597743034363, "learning_rate": 3.9812161893919356e-05, "loss": 2.7811, "step": 51062 }, { "epoch": 2.5, "grad_norm": 0.7260125279426575, "learning_rate": 3.980449803818433e-05, "loss": 2.8128, "step": 51063 }, { "epoch": 2.5, "grad_norm": 0.7464675307273865, "learning_rate": 3.9796834867754244e-05, "loss": 3.1766, "step": 51064 }, { "epoch": 2.5, "grad_norm": 0.71709144115448, "learning_rate": 3.9789172382649226e-05, "loss": 2.724, "step": 51065 }, { "epoch": 2.5, "grad_norm": 0.7842869162559509, "learning_rate": 3.978151058288939e-05, "loss": 2.8037, "step": 51066 }, { "epoch": 2.5, "grad_norm": 0.7320528626441956, "learning_rate": 3.9773849468495044e-05, "loss": 2.8024, "step": 51067 }, { "epoch": 2.5, "grad_norm": 0.7437505125999451, "learning_rate": 3.976618903948622e-05, "loss": 2.9678, "step": 51068 }, { "epoch": 2.5, "grad_norm": 0.7431596517562866, "learning_rate": 3.9758529295883226e-05, "loss": 2.9204, "step": 51069 }, { "epoch": 2.5, "grad_norm": 0.7559123635292053, "learning_rate": 3.9750870237706146e-05, "loss": 2.9171, "step": 51070 }, { "epoch": 2.5, "grad_norm": 0.7823855876922607, "learning_rate": 3.974321186497523e-05, "loss": 2.9853, "step": 51071 }, { "epoch": 2.5, "grad_norm": 0.7621415853500366, "learning_rate": 3.97355541777106e-05, "loss": 2.9533, "step": 51072 }, { "epoch": 2.5, "grad_norm": 0.7120107412338257, "learning_rate": 3.9727897175932377e-05, "loss": 2.8572, "step": 51073 }, { "epoch": 2.5, "grad_norm": 0.7418361306190491, "learning_rate": 3.972024085966087e-05, "loss": 2.9235, "step": 51074 }, { "epoch": 2.5, "grad_norm": 0.7263070940971375, "learning_rate": 3.9712585228916064e-05, "loss": 2.7967, "step": 51075 }, { "epoch": 2.5, "grad_norm": 0.7198815941810608, "learning_rate": 3.970493028371824e-05, "loss": 3.0169, "step": 51076 }, { "epoch": 2.5, "grad_norm": 0.7565320134162903, "learning_rate": 3.9697276024087555e-05, "loss": 2.8488, "step": 51077 }, { "epoch": 2.5, "grad_norm": 0.7032371759414673, "learning_rate": 3.968962245004419e-05, "loss": 3.0858, "step": 51078 }, { "epoch": 2.5, "grad_norm": 0.7568981051445007, "learning_rate": 3.968196956160826e-05, "loss": 3.0574, "step": 51079 }, { "epoch": 2.5, "grad_norm": 0.7816786170005798, "learning_rate": 3.967431735879982e-05, "loss": 2.8361, "step": 51080 }, { "epoch": 2.5, "grad_norm": 0.7889500260353088, "learning_rate": 3.966666584163916e-05, "loss": 2.8982, "step": 51081 }, { "epoch": 2.5, "grad_norm": 0.8262700438499451, "learning_rate": 3.965901501014648e-05, "loss": 2.9173, "step": 51082 }, { "epoch": 2.5, "grad_norm": 0.7360769510269165, "learning_rate": 3.965136486434174e-05, "loss": 2.8857, "step": 51083 }, { "epoch": 2.5, "grad_norm": 0.7041099071502686, "learning_rate": 3.964371540424529e-05, "loss": 2.8569, "step": 51084 }, { "epoch": 2.5, "grad_norm": 0.7271643280982971, "learning_rate": 3.963606662987712e-05, "loss": 2.841, "step": 51085 }, { "epoch": 2.5, "grad_norm": 0.7795175313949585, "learning_rate": 3.9628418541257534e-05, "loss": 2.9807, "step": 51086 }, { "epoch": 2.5, "grad_norm": 0.7626325488090515, "learning_rate": 3.962077113840656e-05, "loss": 2.9521, "step": 51087 }, { "epoch": 2.5, "grad_norm": 0.7495712637901306, "learning_rate": 3.9613124421344275e-05, "loss": 2.8875, "step": 51088 }, { "epoch": 2.5, "grad_norm": 0.7421875, "learning_rate": 3.960547839009101e-05, "loss": 2.8907, "step": 51089 }, { "epoch": 2.5, "grad_norm": 0.7435591816902161, "learning_rate": 3.959783304466673e-05, "loss": 2.9217, "step": 51090 }, { "epoch": 2.5, "grad_norm": 0.7713919878005981, "learning_rate": 3.959018838509162e-05, "loss": 2.7192, "step": 51091 }, { "epoch": 2.5, "grad_norm": 0.7549655437469482, "learning_rate": 3.958254441138592e-05, "loss": 2.9587, "step": 51092 }, { "epoch": 2.5, "grad_norm": 0.7387896776199341, "learning_rate": 3.9574901123569655e-05, "loss": 2.9832, "step": 51093 }, { "epoch": 2.5, "grad_norm": 0.7287462949752808, "learning_rate": 3.956725852166302e-05, "loss": 2.7358, "step": 51094 }, { "epoch": 2.5, "grad_norm": 0.7182101011276245, "learning_rate": 3.9559616605686e-05, "loss": 2.9946, "step": 51095 }, { "epoch": 2.5, "grad_norm": 0.7613502740859985, "learning_rate": 3.955197537565885e-05, "loss": 2.8062, "step": 51096 }, { "epoch": 2.5, "grad_norm": 0.7491579055786133, "learning_rate": 3.954433483160174e-05, "loss": 2.9498, "step": 51097 }, { "epoch": 2.5, "grad_norm": 0.7342544794082642, "learning_rate": 3.953669497353467e-05, "loss": 2.814, "step": 51098 }, { "epoch": 2.5, "grad_norm": 0.7425305843353271, "learning_rate": 3.952905580147787e-05, "loss": 2.8258, "step": 51099 }, { "epoch": 2.5, "grad_norm": 0.73570716381073, "learning_rate": 3.952141731545141e-05, "loss": 2.7309, "step": 51100 }, { "epoch": 2.5, "grad_norm": 0.7669528722763062, "learning_rate": 3.951377951547533e-05, "loss": 2.9622, "step": 51101 }, { "epoch": 2.5, "grad_norm": 0.8032728433609009, "learning_rate": 3.950614240156992e-05, "loss": 2.6455, "step": 51102 }, { "epoch": 2.5, "grad_norm": 0.7362860441207886, "learning_rate": 3.949850597375509e-05, "loss": 2.8745, "step": 51103 }, { "epoch": 2.5, "grad_norm": 0.7232388854026794, "learning_rate": 3.9490870232051164e-05, "loss": 2.8219, "step": 51104 }, { "epoch": 2.5, "grad_norm": 0.7235947847366333, "learning_rate": 3.948323517647806e-05, "loss": 2.9228, "step": 51105 }, { "epoch": 2.5, "grad_norm": 0.7498955726623535, "learning_rate": 3.9475600807055986e-05, "loss": 3.1506, "step": 51106 }, { "epoch": 2.5, "grad_norm": 0.7524616122245789, "learning_rate": 3.94679671238051e-05, "loss": 2.9171, "step": 51107 }, { "epoch": 2.5, "grad_norm": 0.707608163356781, "learning_rate": 3.9460334126745455e-05, "loss": 2.8342, "step": 51108 }, { "epoch": 2.5, "grad_norm": 0.7663477659225464, "learning_rate": 3.945270181589716e-05, "loss": 2.8276, "step": 51109 }, { "epoch": 2.5, "grad_norm": 0.7328577041625977, "learning_rate": 3.94450701912802e-05, "loss": 2.7313, "step": 51110 }, { "epoch": 2.5, "grad_norm": 0.7494615912437439, "learning_rate": 3.9437439252914805e-05, "loss": 2.828, "step": 51111 }, { "epoch": 2.5, "grad_norm": 0.7223064303398132, "learning_rate": 3.9429809000821145e-05, "loss": 2.8811, "step": 51112 }, { "epoch": 2.5, "grad_norm": 0.746543288230896, "learning_rate": 3.9422179435019106e-05, "loss": 2.8726, "step": 51113 }, { "epoch": 2.5, "grad_norm": 0.7731015682220459, "learning_rate": 3.941455055552898e-05, "loss": 2.7396, "step": 51114 }, { "epoch": 2.51, "grad_norm": 0.7490095496177673, "learning_rate": 3.9406922362370775e-05, "loss": 2.8321, "step": 51115 }, { "epoch": 2.51, "grad_norm": 0.7323755621910095, "learning_rate": 3.9399294855564545e-05, "loss": 2.8937, "step": 51116 }, { "epoch": 2.51, "grad_norm": 0.7344057559967041, "learning_rate": 3.9391668035130444e-05, "loss": 2.7432, "step": 51117 }, { "epoch": 2.51, "grad_norm": 0.744623601436615, "learning_rate": 3.938404190108848e-05, "loss": 2.8554, "step": 51118 }, { "epoch": 2.51, "grad_norm": 0.7350231409072876, "learning_rate": 3.937641645345888e-05, "loss": 2.9645, "step": 51119 }, { "epoch": 2.51, "grad_norm": 0.7532517910003662, "learning_rate": 3.936879169226156e-05, "loss": 2.8945, "step": 51120 }, { "epoch": 2.51, "grad_norm": 0.7580457329750061, "learning_rate": 3.936116761751676e-05, "loss": 2.9476, "step": 51121 }, { "epoch": 2.51, "grad_norm": 0.7381550073623657, "learning_rate": 3.935354422924445e-05, "loss": 2.6959, "step": 51122 }, { "epoch": 2.51, "grad_norm": 0.7573136687278748, "learning_rate": 3.93459215274647e-05, "loss": 2.9742, "step": 51123 }, { "epoch": 2.51, "grad_norm": 0.7315176129341125, "learning_rate": 3.933829951219769e-05, "loss": 3.0369, "step": 51124 }, { "epoch": 2.51, "grad_norm": 0.8132463693618774, "learning_rate": 3.9330678183463385e-05, "loss": 2.9298, "step": 51125 }, { "epoch": 2.51, "grad_norm": 0.7474046945571899, "learning_rate": 3.932305754128196e-05, "loss": 2.8276, "step": 51126 }, { "epoch": 2.51, "grad_norm": 0.7960267663002014, "learning_rate": 3.9315437585673356e-05, "loss": 2.8031, "step": 51127 }, { "epoch": 2.51, "grad_norm": 0.7314231395721436, "learning_rate": 3.9307818316657744e-05, "loss": 2.7175, "step": 51128 }, { "epoch": 2.51, "grad_norm": 0.7347849011421204, "learning_rate": 3.930019973425523e-05, "loss": 2.9909, "step": 51129 }, { "epoch": 2.51, "grad_norm": 0.7521358728408813, "learning_rate": 3.92925818384858e-05, "loss": 2.9316, "step": 51130 }, { "epoch": 2.51, "grad_norm": 0.7546024322509766, "learning_rate": 3.9284964629369545e-05, "loss": 3.0779, "step": 51131 }, { "epoch": 2.51, "grad_norm": 0.7242218255996704, "learning_rate": 3.927734810692643e-05, "loss": 3.0528, "step": 51132 }, { "epoch": 2.51, "grad_norm": 0.7637183666229248, "learning_rate": 3.9269732271176624e-05, "loss": 2.8383, "step": 51133 }, { "epoch": 2.51, "grad_norm": 0.7487627267837524, "learning_rate": 3.926211712214024e-05, "loss": 3.0692, "step": 51134 }, { "epoch": 2.51, "grad_norm": 0.7518205642700195, "learning_rate": 3.925450265983718e-05, "loss": 2.7788, "step": 51135 }, { "epoch": 2.51, "grad_norm": 0.7417492270469666, "learning_rate": 3.924688888428765e-05, "loss": 2.9289, "step": 51136 }, { "epoch": 2.51, "grad_norm": 0.7587374448776245, "learning_rate": 3.923927579551163e-05, "loss": 3.0257, "step": 51137 }, { "epoch": 2.51, "grad_norm": 0.7488877177238464, "learning_rate": 3.923166339352912e-05, "loss": 2.7045, "step": 51138 }, { "epoch": 2.51, "grad_norm": 0.743396520614624, "learning_rate": 3.9224051678360256e-05, "loss": 2.9128, "step": 51139 }, { "epoch": 2.51, "grad_norm": 0.739387571811676, "learning_rate": 3.921644065002502e-05, "loss": 2.8156, "step": 51140 }, { "epoch": 2.51, "grad_norm": 0.771299421787262, "learning_rate": 3.920883030854355e-05, "loss": 2.7683, "step": 51141 }, { "epoch": 2.51, "grad_norm": 0.7252259850502014, "learning_rate": 3.9201220653935735e-05, "loss": 2.9843, "step": 51142 }, { "epoch": 2.51, "grad_norm": 0.7235883474349976, "learning_rate": 3.919361168622174e-05, "loss": 2.9401, "step": 51143 }, { "epoch": 2.51, "grad_norm": 0.73777174949646, "learning_rate": 3.918600340542163e-05, "loss": 2.9255, "step": 51144 }, { "epoch": 2.51, "grad_norm": 0.7344832420349121, "learning_rate": 3.9178395811555396e-05, "loss": 2.7266, "step": 51145 }, { "epoch": 2.51, "grad_norm": 0.7370460033416748, "learning_rate": 3.917078890464309e-05, "loss": 2.7727, "step": 51146 }, { "epoch": 2.51, "grad_norm": 0.7238647937774658, "learning_rate": 3.916318268470466e-05, "loss": 2.9441, "step": 51147 }, { "epoch": 2.51, "grad_norm": 0.7683873176574707, "learning_rate": 3.9155577151760195e-05, "loss": 2.7424, "step": 51148 }, { "epoch": 2.51, "grad_norm": 0.7333675026893616, "learning_rate": 3.914797230582981e-05, "loss": 2.9641, "step": 51149 }, { "epoch": 2.51, "grad_norm": 0.789627730846405, "learning_rate": 3.914036814693339e-05, "loss": 2.6447, "step": 51150 }, { "epoch": 2.51, "grad_norm": 0.7240229845046997, "learning_rate": 3.913276467509111e-05, "loss": 2.7824, "step": 51151 }, { "epoch": 2.51, "grad_norm": 0.7708064317703247, "learning_rate": 3.91251618903229e-05, "loss": 2.9563, "step": 51152 }, { "epoch": 2.51, "grad_norm": 0.7075161337852478, "learning_rate": 3.9117559792648764e-05, "loss": 2.9594, "step": 51153 }, { "epoch": 2.51, "grad_norm": 0.8024198412895203, "learning_rate": 3.910995838208883e-05, "loss": 2.9412, "step": 51154 }, { "epoch": 2.51, "grad_norm": 0.7190766334533691, "learning_rate": 3.910235765866302e-05, "loss": 3.0936, "step": 51155 }, { "epoch": 2.51, "grad_norm": 0.7699955701828003, "learning_rate": 3.9094757622391407e-05, "loss": 3.0168, "step": 51156 }, { "epoch": 2.51, "grad_norm": 0.7096518874168396, "learning_rate": 3.908715827329394e-05, "loss": 2.9694, "step": 51157 }, { "epoch": 2.51, "grad_norm": 0.7942549586296082, "learning_rate": 3.907955961139078e-05, "loss": 2.8103, "step": 51158 }, { "epoch": 2.51, "grad_norm": 0.7407145500183105, "learning_rate": 3.90719616367018e-05, "loss": 2.7899, "step": 51159 }, { "epoch": 2.51, "grad_norm": 0.7580335736274719, "learning_rate": 3.9064364349247025e-05, "loss": 2.9164, "step": 51160 }, { "epoch": 2.51, "grad_norm": 0.7507249116897583, "learning_rate": 3.9056767749046534e-05, "loss": 2.795, "step": 51161 }, { "epoch": 2.51, "grad_norm": 0.7054608464241028, "learning_rate": 3.9049171836120255e-05, "loss": 3.0047, "step": 51162 }, { "epoch": 2.51, "grad_norm": 0.8000778555870056, "learning_rate": 3.9041576610488226e-05, "loss": 2.8907, "step": 51163 }, { "epoch": 2.51, "grad_norm": 0.7749502062797546, "learning_rate": 3.903398207217053e-05, "loss": 2.9108, "step": 51164 }, { "epoch": 2.51, "grad_norm": 0.7396555542945862, "learning_rate": 3.902638822118703e-05, "loss": 2.8869, "step": 51165 }, { "epoch": 2.51, "grad_norm": 0.7471228241920471, "learning_rate": 3.9018795057557874e-05, "loss": 2.7746, "step": 51166 }, { "epoch": 2.51, "grad_norm": 0.7587604522705078, "learning_rate": 3.901120258130297e-05, "loss": 2.8944, "step": 51167 }, { "epoch": 2.51, "grad_norm": 0.729351818561554, "learning_rate": 3.900361079244228e-05, "loss": 3.1015, "step": 51168 }, { "epoch": 2.51, "grad_norm": 0.7310697436332703, "learning_rate": 3.899601969099591e-05, "loss": 2.7458, "step": 51169 }, { "epoch": 2.51, "grad_norm": 0.7379913330078125, "learning_rate": 3.898842927698371e-05, "loss": 2.7591, "step": 51170 }, { "epoch": 2.51, "grad_norm": 0.8379819393157959, "learning_rate": 3.8980839550425846e-05, "loss": 2.8154, "step": 51171 }, { "epoch": 2.51, "grad_norm": 0.7568002343177795, "learning_rate": 3.8973250511342127e-05, "loss": 2.9693, "step": 51172 }, { "epoch": 2.51, "grad_norm": 0.7051762938499451, "learning_rate": 3.89656621597527e-05, "loss": 2.9149, "step": 51173 }, { "epoch": 2.51, "grad_norm": 0.7339165210723877, "learning_rate": 3.895807449567752e-05, "loss": 2.9697, "step": 51174 }, { "epoch": 2.51, "grad_norm": 0.7606975436210632, "learning_rate": 3.895048751913643e-05, "loss": 2.8402, "step": 51175 }, { "epoch": 2.51, "grad_norm": 0.7482293248176575, "learning_rate": 3.894290123014959e-05, "loss": 3.0584, "step": 51176 }, { "epoch": 2.51, "grad_norm": 0.727558970451355, "learning_rate": 3.8935315628736816e-05, "loss": 2.9286, "step": 51177 }, { "epoch": 2.51, "grad_norm": 0.7616907954216003, "learning_rate": 3.892773071491823e-05, "loss": 2.9106, "step": 51178 }, { "epoch": 2.51, "grad_norm": 0.7025411128997803, "learning_rate": 3.89201464887138e-05, "loss": 2.7646, "step": 51179 }, { "epoch": 2.51, "grad_norm": 0.7629191279411316, "learning_rate": 3.891256295014342e-05, "loss": 3.0237, "step": 51180 }, { "epoch": 2.51, "grad_norm": 0.776381254196167, "learning_rate": 3.8904980099227145e-05, "loss": 3.0137, "step": 51181 }, { "epoch": 2.51, "grad_norm": 0.7896276116371155, "learning_rate": 3.8897397935984854e-05, "loss": 2.7922, "step": 51182 }, { "epoch": 2.51, "grad_norm": 0.6885989904403687, "learning_rate": 3.888981646043653e-05, "loss": 2.9651, "step": 51183 }, { "epoch": 2.51, "grad_norm": 0.7584837675094604, "learning_rate": 3.888223567260226e-05, "loss": 3.0849, "step": 51184 }, { "epoch": 2.51, "grad_norm": 0.7259736061096191, "learning_rate": 3.887465557250186e-05, "loss": 2.7324, "step": 51185 }, { "epoch": 2.51, "grad_norm": 0.7147319912910461, "learning_rate": 3.886707616015541e-05, "loss": 2.9832, "step": 51186 }, { "epoch": 2.51, "grad_norm": 0.7638548016548157, "learning_rate": 3.8859497435582764e-05, "loss": 2.7991, "step": 51187 }, { "epoch": 2.51, "grad_norm": 0.7534319162368774, "learning_rate": 3.885191939880404e-05, "loss": 2.9444, "step": 51188 }, { "epoch": 2.51, "grad_norm": 0.7515311241149902, "learning_rate": 3.8844342049839065e-05, "loss": 2.7472, "step": 51189 }, { "epoch": 2.51, "grad_norm": 0.7729167342185974, "learning_rate": 3.88367653887078e-05, "loss": 2.9857, "step": 51190 }, { "epoch": 2.51, "grad_norm": 0.735460102558136, "learning_rate": 3.8829189415430285e-05, "loss": 2.9408, "step": 51191 }, { "epoch": 2.51, "grad_norm": 0.7101142406463623, "learning_rate": 3.882161413002636e-05, "loss": 3.033, "step": 51192 }, { "epoch": 2.51, "grad_norm": 0.7463287115097046, "learning_rate": 3.8814039532516005e-05, "loss": 2.7537, "step": 51193 }, { "epoch": 2.51, "grad_norm": 0.8116194009780884, "learning_rate": 3.880646562291931e-05, "loss": 3.1006, "step": 51194 }, { "epoch": 2.51, "grad_norm": 0.7226468324661255, "learning_rate": 3.87988924012561e-05, "loss": 2.9244, "step": 51195 }, { "epoch": 2.51, "grad_norm": 0.7452834248542786, "learning_rate": 3.8791319867546345e-05, "loss": 2.8646, "step": 51196 }, { "epoch": 2.51, "grad_norm": 0.78560471534729, "learning_rate": 3.878374802180994e-05, "loss": 2.895, "step": 51197 }, { "epoch": 2.51, "grad_norm": 0.7502028942108154, "learning_rate": 3.8776176864066886e-05, "loss": 2.9412, "step": 51198 }, { "epoch": 2.51, "grad_norm": 0.753572940826416, "learning_rate": 3.8768606394337086e-05, "loss": 2.994, "step": 51199 }, { "epoch": 2.51, "grad_norm": 0.7555680871009827, "learning_rate": 3.876103661264048e-05, "loss": 2.7353, "step": 51200 }, { "epoch": 2.51, "grad_norm": 0.7278770804405212, "learning_rate": 3.8753467518997094e-05, "loss": 2.8839, "step": 51201 }, { "epoch": 2.51, "grad_norm": 0.7377351522445679, "learning_rate": 3.87458991134268e-05, "loss": 2.8566, "step": 51202 }, { "epoch": 2.51, "grad_norm": 0.7519879341125488, "learning_rate": 3.873833139594953e-05, "loss": 2.9622, "step": 51203 }, { "epoch": 2.51, "grad_norm": 0.7733878493309021, "learning_rate": 3.873076436658512e-05, "loss": 2.902, "step": 51204 }, { "epoch": 2.51, "grad_norm": 0.7269855737686157, "learning_rate": 3.872319802535363e-05, "loss": 3.0837, "step": 51205 }, { "epoch": 2.51, "grad_norm": 0.7522500157356262, "learning_rate": 3.8715632372275005e-05, "loss": 2.8483, "step": 51206 }, { "epoch": 2.51, "grad_norm": 0.7667732834815979, "learning_rate": 3.870806740736907e-05, "loss": 2.8965, "step": 51207 }, { "epoch": 2.51, "grad_norm": 0.7667376399040222, "learning_rate": 3.8700503130655836e-05, "loss": 2.6717, "step": 51208 }, { "epoch": 2.51, "grad_norm": 0.7345377206802368, "learning_rate": 3.869293954215516e-05, "loss": 2.9421, "step": 51209 }, { "epoch": 2.51, "grad_norm": 0.8869428038597107, "learning_rate": 3.868537664188701e-05, "loss": 2.8926, "step": 51210 }, { "epoch": 2.51, "grad_norm": 0.725968062877655, "learning_rate": 3.8677814429871324e-05, "loss": 2.996, "step": 51211 }, { "epoch": 2.51, "grad_norm": 0.805472195148468, "learning_rate": 3.8670252906127905e-05, "loss": 2.8731, "step": 51212 }, { "epoch": 2.51, "grad_norm": 0.7062275409698486, "learning_rate": 3.866269207067681e-05, "loss": 2.8816, "step": 51213 }, { "epoch": 2.51, "grad_norm": 0.7009338736534119, "learning_rate": 3.865513192353783e-05, "loss": 2.8552, "step": 51214 }, { "epoch": 2.51, "grad_norm": 0.7251269817352295, "learning_rate": 3.864757246473094e-05, "loss": 2.9754, "step": 51215 }, { "epoch": 2.51, "grad_norm": 0.7357146739959717, "learning_rate": 3.864001369427608e-05, "loss": 3.0632, "step": 51216 }, { "epoch": 2.51, "grad_norm": 0.7163044810295105, "learning_rate": 3.863245561219316e-05, "loss": 2.9741, "step": 51217 }, { "epoch": 2.51, "grad_norm": 0.7487192153930664, "learning_rate": 3.862489821850204e-05, "loss": 2.9866, "step": 51218 }, { "epoch": 2.51, "grad_norm": 0.727894127368927, "learning_rate": 3.8617341513222554e-05, "loss": 2.9668, "step": 51219 }, { "epoch": 2.51, "grad_norm": 0.7058404088020325, "learning_rate": 3.8609785496374714e-05, "loss": 2.8872, "step": 51220 }, { "epoch": 2.51, "grad_norm": 0.7830812931060791, "learning_rate": 3.8602230167978434e-05, "loss": 2.5873, "step": 51221 }, { "epoch": 2.51, "grad_norm": 0.7016585469245911, "learning_rate": 3.859467552805353e-05, "loss": 2.812, "step": 51222 }, { "epoch": 2.51, "grad_norm": 0.7951351404190063, "learning_rate": 3.8587121576619996e-05, "loss": 2.7951, "step": 51223 }, { "epoch": 2.51, "grad_norm": 0.7332335114479065, "learning_rate": 3.8579568313697674e-05, "loss": 3.1449, "step": 51224 }, { "epoch": 2.51, "grad_norm": 0.7628679275512695, "learning_rate": 3.8572015739306386e-05, "loss": 2.8698, "step": 51225 }, { "epoch": 2.51, "grad_norm": 0.7135177850723267, "learning_rate": 3.8564463853466175e-05, "loss": 2.8874, "step": 51226 }, { "epoch": 2.51, "grad_norm": 0.7211930751800537, "learning_rate": 3.855691265619677e-05, "loss": 2.8293, "step": 51227 }, { "epoch": 2.51, "grad_norm": 0.7237968444824219, "learning_rate": 3.854936214751822e-05, "loss": 2.8799, "step": 51228 }, { "epoch": 2.51, "grad_norm": 0.7675337791442871, "learning_rate": 3.854181232745026e-05, "loss": 2.9046, "step": 51229 }, { "epoch": 2.51, "grad_norm": 0.7495302557945251, "learning_rate": 3.853426319601286e-05, "loss": 3.2686, "step": 51230 }, { "epoch": 2.51, "grad_norm": 0.7274502515792847, "learning_rate": 3.852671475322593e-05, "loss": 2.9237, "step": 51231 }, { "epoch": 2.51, "grad_norm": 0.7208870053291321, "learning_rate": 3.851916699910932e-05, "loss": 3.0848, "step": 51232 }, { "epoch": 2.51, "grad_norm": 0.7488343715667725, "learning_rate": 3.851161993368291e-05, "loss": 2.658, "step": 51233 }, { "epoch": 2.51, "grad_norm": 0.749234139919281, "learning_rate": 3.850407355696653e-05, "loss": 2.8975, "step": 51234 }, { "epoch": 2.51, "grad_norm": 0.7662747502326965, "learning_rate": 3.849652786898005e-05, "loss": 2.8386, "step": 51235 }, { "epoch": 2.51, "grad_norm": 0.779507040977478, "learning_rate": 3.848898286974347e-05, "loss": 2.7845, "step": 51236 }, { "epoch": 2.51, "grad_norm": 0.7223421931266785, "learning_rate": 3.848143855927652e-05, "loss": 2.8635, "step": 51237 }, { "epoch": 2.51, "grad_norm": 0.7322688102722168, "learning_rate": 3.84738949375992e-05, "loss": 3.0542, "step": 51238 }, { "epoch": 2.51, "grad_norm": 0.7349992990493774, "learning_rate": 3.846635200473129e-05, "loss": 2.9684, "step": 51239 }, { "epoch": 2.51, "grad_norm": 0.7478016018867493, "learning_rate": 3.845880976069261e-05, "loss": 3.0442, "step": 51240 }, { "epoch": 2.51, "grad_norm": 0.7727739214897156, "learning_rate": 3.845126820550317e-05, "loss": 2.7984, "step": 51241 }, { "epoch": 2.51, "grad_norm": 0.7362459301948547, "learning_rate": 3.8443727339182695e-05, "loss": 2.8293, "step": 51242 }, { "epoch": 2.51, "grad_norm": 0.7158995866775513, "learning_rate": 3.843618716175116e-05, "loss": 2.8788, "step": 51243 }, { "epoch": 2.51, "grad_norm": 0.7135204672813416, "learning_rate": 3.8428647673228294e-05, "loss": 2.933, "step": 51244 }, { "epoch": 2.51, "grad_norm": 0.7790452241897583, "learning_rate": 3.842110887363404e-05, "loss": 2.9307, "step": 51245 }, { "epoch": 2.51, "grad_norm": 0.7330326437950134, "learning_rate": 3.8413570762988325e-05, "loss": 2.6545, "step": 51246 }, { "epoch": 2.51, "grad_norm": 0.7503302693367004, "learning_rate": 3.8406033341310884e-05, "loss": 2.8702, "step": 51247 }, { "epoch": 2.51, "grad_norm": 0.8307878375053406, "learning_rate": 3.8398496608621644e-05, "loss": 3.0346, "step": 51248 }, { "epoch": 2.51, "grad_norm": 0.7150816917419434, "learning_rate": 3.839096056494032e-05, "loss": 2.9994, "step": 51249 }, { "epoch": 2.51, "grad_norm": 0.7854032516479492, "learning_rate": 3.8383425210286864e-05, "loss": 2.895, "step": 51250 }, { "epoch": 2.51, "grad_norm": 0.7631329298019409, "learning_rate": 3.8375890544681195e-05, "loss": 2.9736, "step": 51251 }, { "epoch": 2.51, "grad_norm": 0.7520675659179688, "learning_rate": 3.8368356568143036e-05, "loss": 3.052, "step": 51252 }, { "epoch": 2.51, "grad_norm": 0.7277513742446899, "learning_rate": 3.836082328069229e-05, "loss": 2.9834, "step": 51253 }, { "epoch": 2.51, "grad_norm": 0.7684758901596069, "learning_rate": 3.8353290682348814e-05, "loss": 2.7606, "step": 51254 }, { "epoch": 2.51, "grad_norm": 0.7762331366539001, "learning_rate": 3.834575877313233e-05, "loss": 3.0253, "step": 51255 }, { "epoch": 2.51, "grad_norm": 0.7357913851737976, "learning_rate": 3.833822755306285e-05, "loss": 3.0781, "step": 51256 }, { "epoch": 2.51, "grad_norm": 0.7422073483467102, "learning_rate": 3.833069702216003e-05, "loss": 2.9321, "step": 51257 }, { "epoch": 2.51, "grad_norm": 0.7310648560523987, "learning_rate": 3.832316718044388e-05, "loss": 2.8499, "step": 51258 }, { "epoch": 2.51, "grad_norm": 0.6889393925666809, "learning_rate": 3.831563802793409e-05, "loss": 3.0701, "step": 51259 }, { "epoch": 2.51, "grad_norm": 1.1530731916427612, "learning_rate": 3.830810956465058e-05, "loss": 3.111, "step": 51260 }, { "epoch": 2.51, "grad_norm": 0.7462144494056702, "learning_rate": 3.830058179061316e-05, "loss": 2.8284, "step": 51261 }, { "epoch": 2.51, "grad_norm": 0.7419637441635132, "learning_rate": 3.829305470584158e-05, "loss": 3.0065, "step": 51262 }, { "epoch": 2.51, "grad_norm": 0.7542158365249634, "learning_rate": 3.828552831035581e-05, "loss": 2.8055, "step": 51263 }, { "epoch": 2.51, "grad_norm": 0.7965158224105835, "learning_rate": 3.82780026041755e-05, "loss": 2.7436, "step": 51264 }, { "epoch": 2.51, "grad_norm": 0.743840217590332, "learning_rate": 3.8270477587320566e-05, "loss": 2.7356, "step": 51265 }, { "epoch": 2.51, "grad_norm": 0.7825320959091187, "learning_rate": 3.826295325981091e-05, "loss": 2.7954, "step": 51266 }, { "epoch": 2.51, "grad_norm": 0.7897096276283264, "learning_rate": 3.825542962166619e-05, "loss": 2.9194, "step": 51267 }, { "epoch": 2.51, "grad_norm": 0.7811319828033447, "learning_rate": 3.8247906672906345e-05, "loss": 2.7623, "step": 51268 }, { "epoch": 2.51, "grad_norm": 0.7193589806556702, "learning_rate": 3.824038441355114e-05, "loss": 2.8256, "step": 51269 }, { "epoch": 2.51, "grad_norm": 0.7284131050109863, "learning_rate": 3.823286284362031e-05, "loss": 2.8813, "step": 51270 }, { "epoch": 2.51, "grad_norm": 0.7225155830383301, "learning_rate": 3.822534196313383e-05, "loss": 2.9283, "step": 51271 }, { "epoch": 2.51, "grad_norm": 0.7368038296699524, "learning_rate": 3.8217821772111354e-05, "loss": 2.7434, "step": 51272 }, { "epoch": 2.51, "grad_norm": 0.7491246461868286, "learning_rate": 3.821030227057279e-05, "loss": 2.9343, "step": 51273 }, { "epoch": 2.51, "grad_norm": 0.7268712520599365, "learning_rate": 3.820278345853788e-05, "loss": 2.9003, "step": 51274 }, { "epoch": 2.51, "grad_norm": 0.7777917981147766, "learning_rate": 3.81952653360265e-05, "loss": 3.0695, "step": 51275 }, { "epoch": 2.51, "grad_norm": 0.6771141886711121, "learning_rate": 3.818774790305842e-05, "loss": 2.813, "step": 51276 }, { "epoch": 2.51, "grad_norm": 0.7285410165786743, "learning_rate": 3.818023115965334e-05, "loss": 2.9205, "step": 51277 }, { "epoch": 2.51, "grad_norm": 0.7287654876708984, "learning_rate": 3.817271510583122e-05, "loss": 2.9855, "step": 51278 }, { "epoch": 2.51, "grad_norm": 0.7238658666610718, "learning_rate": 3.816519974161171e-05, "loss": 2.6708, "step": 51279 }, { "epoch": 2.51, "grad_norm": 0.756721556186676, "learning_rate": 3.815768506701472e-05, "loss": 2.866, "step": 51280 }, { "epoch": 2.51, "grad_norm": 0.7326313853263855, "learning_rate": 3.8150171082059976e-05, "loss": 2.8672, "step": 51281 }, { "epoch": 2.51, "grad_norm": 0.7391651272773743, "learning_rate": 3.8142657786767324e-05, "loss": 2.893, "step": 51282 }, { "epoch": 2.51, "grad_norm": 0.7335715889930725, "learning_rate": 3.813514518115649e-05, "loss": 2.8543, "step": 51283 }, { "epoch": 2.51, "grad_norm": 0.7516505718231201, "learning_rate": 3.812763326524725e-05, "loss": 3.074, "step": 51284 }, { "epoch": 2.51, "grad_norm": 0.6861124038696289, "learning_rate": 3.812012203905949e-05, "loss": 2.9163, "step": 51285 }, { "epoch": 2.51, "grad_norm": 0.784110426902771, "learning_rate": 3.8112611502612886e-05, "loss": 2.7284, "step": 51286 }, { "epoch": 2.51, "grad_norm": 0.7559488415718079, "learning_rate": 3.810510165592724e-05, "loss": 3.0254, "step": 51287 }, { "epoch": 2.51, "grad_norm": 0.7138981223106384, "learning_rate": 3.8097592499022424e-05, "loss": 2.9743, "step": 51288 }, { "epoch": 2.51, "grad_norm": 0.7892126441001892, "learning_rate": 3.809008403191806e-05, "loss": 2.8434, "step": 51289 }, { "epoch": 2.51, "grad_norm": 0.7221898436546326, "learning_rate": 3.808257625463409e-05, "loss": 3.0125, "step": 51290 }, { "epoch": 2.51, "grad_norm": 0.745223879814148, "learning_rate": 3.807506916719021e-05, "loss": 2.8629, "step": 51291 }, { "epoch": 2.51, "grad_norm": 0.7798271775245667, "learning_rate": 3.8067562769606096e-05, "loss": 2.8282, "step": 51292 }, { "epoch": 2.51, "grad_norm": 0.7572090029716492, "learning_rate": 3.8060057061901704e-05, "loss": 2.9673, "step": 51293 }, { "epoch": 2.51, "grad_norm": 0.7191266417503357, "learning_rate": 3.8052552044096655e-05, "loss": 2.8256, "step": 51294 }, { "epoch": 2.51, "grad_norm": 0.7357266545295715, "learning_rate": 3.804504771621082e-05, "loss": 2.7246, "step": 51295 }, { "epoch": 2.51, "grad_norm": 0.742739737033844, "learning_rate": 3.803754407826386e-05, "loss": 3.0129, "step": 51296 }, { "epoch": 2.51, "grad_norm": 0.7276623249053955, "learning_rate": 3.8030041130275625e-05, "loss": 2.9153, "step": 51297 }, { "epoch": 2.51, "grad_norm": 0.7444546222686768, "learning_rate": 3.80225388722659e-05, "loss": 2.9379, "step": 51298 }, { "epoch": 2.51, "grad_norm": 0.727475643157959, "learning_rate": 3.801503730425429e-05, "loss": 2.8814, "step": 51299 }, { "epoch": 2.51, "grad_norm": 0.714273989200592, "learning_rate": 3.80075364262607e-05, "loss": 2.8059, "step": 51300 }, { "epoch": 2.51, "grad_norm": 0.7392892837524414, "learning_rate": 3.80000362383048e-05, "loss": 2.6062, "step": 51301 }, { "epoch": 2.51, "grad_norm": 0.7788336277008057, "learning_rate": 3.799253674040638e-05, "loss": 2.6913, "step": 51302 }, { "epoch": 2.51, "grad_norm": 0.76091468334198, "learning_rate": 3.798503793258525e-05, "loss": 2.9406, "step": 51303 }, { "epoch": 2.51, "grad_norm": 0.8225963115692139, "learning_rate": 3.7977539814861105e-05, "loss": 2.7359, "step": 51304 }, { "epoch": 2.51, "grad_norm": 0.7406080365180969, "learning_rate": 3.797004238725372e-05, "loss": 3.0082, "step": 51305 }, { "epoch": 2.51, "grad_norm": 0.7237011194229126, "learning_rate": 3.7962545649782715e-05, "loss": 2.7815, "step": 51306 }, { "epoch": 2.51, "grad_norm": 0.7329822778701782, "learning_rate": 3.7955049602467944e-05, "loss": 2.8379, "step": 51307 }, { "epoch": 2.51, "grad_norm": 0.7319344282150269, "learning_rate": 3.794755424532919e-05, "loss": 2.7743, "step": 51308 }, { "epoch": 2.51, "grad_norm": 0.7189127206802368, "learning_rate": 3.794005957838611e-05, "loss": 2.8306, "step": 51309 }, { "epoch": 2.51, "grad_norm": 0.7663130760192871, "learning_rate": 3.7932565601658535e-05, "loss": 2.7893, "step": 51310 }, { "epoch": 2.51, "grad_norm": 0.7873907089233398, "learning_rate": 3.792507231516607e-05, "loss": 2.7739, "step": 51311 }, { "epoch": 2.51, "grad_norm": 0.7518800497055054, "learning_rate": 3.791757971892859e-05, "loss": 2.6856, "step": 51312 }, { "epoch": 2.51, "grad_norm": 0.7844192981719971, "learning_rate": 3.791008781296578e-05, "loss": 2.8565, "step": 51313 }, { "epoch": 2.51, "grad_norm": 0.8025628924369812, "learning_rate": 3.79025965972973e-05, "loss": 2.7685, "step": 51314 }, { "epoch": 2.51, "grad_norm": 0.7403668165206909, "learning_rate": 3.789510607194298e-05, "loss": 2.9046, "step": 51315 }, { "epoch": 2.51, "grad_norm": 0.7729617953300476, "learning_rate": 3.788761623692246e-05, "loss": 3.0336, "step": 51316 }, { "epoch": 2.51, "grad_norm": 0.7558656930923462, "learning_rate": 3.7880127092255505e-05, "loss": 2.9393, "step": 51317 }, { "epoch": 2.51, "grad_norm": 0.7228742837905884, "learning_rate": 3.787263863796195e-05, "loss": 3.109, "step": 51318 }, { "epoch": 2.52, "grad_norm": 0.7505278587341309, "learning_rate": 3.786515087406137e-05, "loss": 3.0069, "step": 51319 }, { "epoch": 2.52, "grad_norm": 0.6907845735549927, "learning_rate": 3.785766380057354e-05, "loss": 2.9476, "step": 51320 }, { "epoch": 2.52, "grad_norm": 0.7361884713172913, "learning_rate": 3.7850177417518135e-05, "loss": 2.8551, "step": 51321 }, { "epoch": 2.52, "grad_norm": 0.7802038192749023, "learning_rate": 3.7842691724914884e-05, "loss": 3.0691, "step": 51322 }, { "epoch": 2.52, "grad_norm": 0.7692674398422241, "learning_rate": 3.783520672278362e-05, "loss": 2.9041, "step": 51323 }, { "epoch": 2.52, "grad_norm": 0.7283543348312378, "learning_rate": 3.782772241114388e-05, "loss": 2.753, "step": 51324 }, { "epoch": 2.52, "grad_norm": 0.7569485306739807, "learning_rate": 3.7820238790015526e-05, "loss": 3.0877, "step": 51325 }, { "epoch": 2.52, "grad_norm": 0.8705662488937378, "learning_rate": 3.781275585941823e-05, "loss": 2.8212, "step": 51326 }, { "epoch": 2.52, "grad_norm": 0.7248303890228271, "learning_rate": 3.780527361937159e-05, "loss": 2.9386, "step": 51327 }, { "epoch": 2.52, "grad_norm": 0.7549064755439758, "learning_rate": 3.779779206989548e-05, "loss": 2.8461, "step": 51328 }, { "epoch": 2.52, "grad_norm": 0.7630690932273865, "learning_rate": 3.7790311211009426e-05, "loss": 2.9711, "step": 51329 }, { "epoch": 2.52, "grad_norm": 0.7439910769462585, "learning_rate": 3.778283104273334e-05, "loss": 2.9077, "step": 51330 }, { "epoch": 2.52, "grad_norm": 0.7504767179489136, "learning_rate": 3.777535156508671e-05, "loss": 2.7569, "step": 51331 }, { "epoch": 2.52, "grad_norm": 0.7491384148597717, "learning_rate": 3.776787277808935e-05, "loss": 2.7003, "step": 51332 }, { "epoch": 2.52, "grad_norm": 0.770693302154541, "learning_rate": 3.7760394681761006e-05, "loss": 2.9364, "step": 51333 }, { "epoch": 2.52, "grad_norm": 0.7890000343322754, "learning_rate": 3.775291727612134e-05, "loss": 3.0071, "step": 51334 }, { "epoch": 2.52, "grad_norm": 0.711444616317749, "learning_rate": 3.774544056119e-05, "loss": 2.8482, "step": 51335 }, { "epoch": 2.52, "grad_norm": 0.7917689085006714, "learning_rate": 3.773796453698663e-05, "loss": 2.8997, "step": 51336 }, { "epoch": 2.52, "grad_norm": 0.6970062255859375, "learning_rate": 3.773048920353099e-05, "loss": 2.9283, "step": 51337 }, { "epoch": 2.52, "grad_norm": 0.7180047631263733, "learning_rate": 3.7723014560842825e-05, "loss": 2.8176, "step": 51338 }, { "epoch": 2.52, "grad_norm": 0.743811309337616, "learning_rate": 3.771554060894172e-05, "loss": 3.1697, "step": 51339 }, { "epoch": 2.52, "grad_norm": 0.7111158967018127, "learning_rate": 3.7708067347847424e-05, "loss": 2.7527, "step": 51340 }, { "epoch": 2.52, "grad_norm": 0.7135360240936279, "learning_rate": 3.770059477757966e-05, "loss": 2.7748, "step": 51341 }, { "epoch": 2.52, "grad_norm": 0.767493724822998, "learning_rate": 3.7693122898157945e-05, "loss": 3.1682, "step": 51342 }, { "epoch": 2.52, "grad_norm": 0.7691839337348938, "learning_rate": 3.7685651709602126e-05, "loss": 3.0069, "step": 51343 }, { "epoch": 2.52, "grad_norm": 0.7307931780815125, "learning_rate": 3.767818121193176e-05, "loss": 2.963, "step": 51344 }, { "epoch": 2.52, "grad_norm": 0.7612271308898926, "learning_rate": 3.767071140516665e-05, "loss": 2.7677, "step": 51345 }, { "epoch": 2.52, "grad_norm": 0.7013478875160217, "learning_rate": 3.766324228932633e-05, "loss": 2.7819, "step": 51346 }, { "epoch": 2.52, "grad_norm": 0.7199133038520813, "learning_rate": 3.765577386443055e-05, "loss": 2.7997, "step": 51347 }, { "epoch": 2.52, "grad_norm": 0.7004144787788391, "learning_rate": 3.764830613049902e-05, "loss": 2.9246, "step": 51348 }, { "epoch": 2.52, "grad_norm": 0.7983659505844116, "learning_rate": 3.7640839087551386e-05, "loss": 3.0463, "step": 51349 }, { "epoch": 2.52, "grad_norm": 0.7310093641281128, "learning_rate": 3.763337273560728e-05, "loss": 2.8928, "step": 51350 }, { "epoch": 2.52, "grad_norm": 0.7364878058433533, "learning_rate": 3.762590707468631e-05, "loss": 2.8745, "step": 51351 }, { "epoch": 2.52, "grad_norm": 0.7587478756904602, "learning_rate": 3.7618442104808266e-05, "loss": 2.9023, "step": 51352 }, { "epoch": 2.52, "grad_norm": 0.7633194327354431, "learning_rate": 3.761097782599266e-05, "loss": 2.8162, "step": 51353 }, { "epoch": 2.52, "grad_norm": 0.7305599451065063, "learning_rate": 3.760351423825928e-05, "loss": 2.9469, "step": 51354 }, { "epoch": 2.52, "grad_norm": 0.7444048523902893, "learning_rate": 3.759605134162781e-05, "loss": 3.1084, "step": 51355 }, { "epoch": 2.52, "grad_norm": 0.7268261909484863, "learning_rate": 3.758858913611783e-05, "loss": 2.8725, "step": 51356 }, { "epoch": 2.52, "grad_norm": 0.7650936841964722, "learning_rate": 3.7581127621748996e-05, "loss": 2.9345, "step": 51357 }, { "epoch": 2.52, "grad_norm": 0.7452462315559387, "learning_rate": 3.757366679854089e-05, "loss": 2.8995, "step": 51358 }, { "epoch": 2.52, "grad_norm": 0.7164917588233948, "learning_rate": 3.756620666651327e-05, "loss": 2.8016, "step": 51359 }, { "epoch": 2.52, "grad_norm": 0.7461237907409668, "learning_rate": 3.75587472256858e-05, "loss": 2.7738, "step": 51360 }, { "epoch": 2.52, "grad_norm": 0.7313634753227234, "learning_rate": 3.755128847607802e-05, "loss": 2.7154, "step": 51361 }, { "epoch": 2.52, "grad_norm": 0.7086673378944397, "learning_rate": 3.754383041770971e-05, "loss": 3.009, "step": 51362 }, { "epoch": 2.52, "grad_norm": 0.7295812368392944, "learning_rate": 3.753637305060044e-05, "loss": 2.7669, "step": 51363 }, { "epoch": 2.52, "grad_norm": 0.7354058027267456, "learning_rate": 3.752891637476978e-05, "loss": 3.046, "step": 51364 }, { "epoch": 2.52, "grad_norm": 0.7307260632514954, "learning_rate": 3.752146039023753e-05, "loss": 2.8995, "step": 51365 }, { "epoch": 2.52, "grad_norm": 0.7760553359985352, "learning_rate": 3.7514005097023125e-05, "loss": 3.0448, "step": 51366 }, { "epoch": 2.52, "grad_norm": 0.8252313733100891, "learning_rate": 3.750655049514644e-05, "loss": 2.8753, "step": 51367 }, { "epoch": 2.52, "grad_norm": 0.7388860583305359, "learning_rate": 3.7499096584626854e-05, "loss": 2.8755, "step": 51368 }, { "epoch": 2.52, "grad_norm": 0.7460933327674866, "learning_rate": 3.7491643365484156e-05, "loss": 3.0417, "step": 51369 }, { "epoch": 2.52, "grad_norm": 0.7309091091156006, "learning_rate": 3.748419083773804e-05, "loss": 3.0214, "step": 51370 }, { "epoch": 2.52, "grad_norm": 0.7197194695472717, "learning_rate": 3.7476739001408006e-05, "loss": 2.9985, "step": 51371 }, { "epoch": 2.52, "grad_norm": 0.7402889132499695, "learning_rate": 3.7469287856513755e-05, "loss": 3.0035, "step": 51372 }, { "epoch": 2.52, "grad_norm": 0.7771908640861511, "learning_rate": 3.746183740307476e-05, "loss": 3.1793, "step": 51373 }, { "epoch": 2.52, "grad_norm": 0.7580812573432922, "learning_rate": 3.7454387641110804e-05, "loss": 2.6364, "step": 51374 }, { "epoch": 2.52, "grad_norm": 0.7813465595245361, "learning_rate": 3.7446938570641515e-05, "loss": 2.84, "step": 51375 }, { "epoch": 2.52, "grad_norm": 0.7351908087730408, "learning_rate": 3.7439490191686374e-05, "loss": 2.9712, "step": 51376 }, { "epoch": 2.52, "grad_norm": 0.7609416842460632, "learning_rate": 3.743204250426516e-05, "loss": 2.6509, "step": 51377 }, { "epoch": 2.52, "grad_norm": 0.7777287364006042, "learning_rate": 3.742459550839743e-05, "loss": 3.0221, "step": 51378 }, { "epoch": 2.52, "grad_norm": 0.7442467212677002, "learning_rate": 3.741714920410269e-05, "loss": 2.8488, "step": 51379 }, { "epoch": 2.52, "grad_norm": 0.7483687400817871, "learning_rate": 3.740970359140074e-05, "loss": 2.9856, "step": 51380 }, { "epoch": 2.52, "grad_norm": 0.7566103935241699, "learning_rate": 3.740225867031103e-05, "loss": 2.9011, "step": 51381 }, { "epoch": 2.52, "grad_norm": 0.7553043365478516, "learning_rate": 3.739481444085326e-05, "loss": 2.8707, "step": 51382 }, { "epoch": 2.52, "grad_norm": 0.7112729549407959, "learning_rate": 3.7387370903046963e-05, "loss": 2.8141, "step": 51383 }, { "epoch": 2.52, "grad_norm": 0.7292284965515137, "learning_rate": 3.737992805691189e-05, "loss": 3.0078, "step": 51384 }, { "epoch": 2.52, "grad_norm": 0.7168455123901367, "learning_rate": 3.737248590246749e-05, "loss": 2.8198, "step": 51385 }, { "epoch": 2.52, "grad_norm": 0.7240537405014038, "learning_rate": 3.736504443973337e-05, "loss": 2.6507, "step": 51386 }, { "epoch": 2.52, "grad_norm": 0.7349284291267395, "learning_rate": 3.7357603668729264e-05, "loss": 2.8739, "step": 51387 }, { "epoch": 2.52, "grad_norm": 0.7479743957519531, "learning_rate": 3.7350163589474615e-05, "loss": 2.7535, "step": 51388 }, { "epoch": 2.52, "grad_norm": 0.7725386619567871, "learning_rate": 3.734272420198907e-05, "loss": 2.9225, "step": 51389 }, { "epoch": 2.52, "grad_norm": 0.7183440923690796, "learning_rate": 3.733528550629232e-05, "loss": 2.6772, "step": 51390 }, { "epoch": 2.52, "grad_norm": 0.7492996454238892, "learning_rate": 3.732784750240378e-05, "loss": 2.9754, "step": 51391 }, { "epoch": 2.52, "grad_norm": 0.7240472435951233, "learning_rate": 3.732041019034324e-05, "loss": 2.9141, "step": 51392 }, { "epoch": 2.52, "grad_norm": 0.7172340750694275, "learning_rate": 3.731297357013018e-05, "loss": 2.8972, "step": 51393 }, { "epoch": 2.52, "grad_norm": 0.8641431927680969, "learning_rate": 3.730553764178411e-05, "loss": 2.8425, "step": 51394 }, { "epoch": 2.52, "grad_norm": 0.9124081134796143, "learning_rate": 3.729810240532476e-05, "loss": 2.9483, "step": 51395 }, { "epoch": 2.52, "grad_norm": 0.7566990256309509, "learning_rate": 3.7290667860771606e-05, "loss": 2.8396, "step": 51396 }, { "epoch": 2.52, "grad_norm": 0.7051795721054077, "learning_rate": 3.728323400814434e-05, "loss": 2.8282, "step": 51397 }, { "epoch": 2.52, "grad_norm": 0.7272311449050903, "learning_rate": 3.72758008474624e-05, "loss": 2.8787, "step": 51398 }, { "epoch": 2.52, "grad_norm": 0.7363135814666748, "learning_rate": 3.7268368378745486e-05, "loss": 3.0014, "step": 51399 }, { "epoch": 2.52, "grad_norm": 0.7577966451644897, "learning_rate": 3.7260936602013136e-05, "loss": 2.8576, "step": 51400 }, { "epoch": 2.52, "grad_norm": 0.7400948405265808, "learning_rate": 3.7253505517284876e-05, "loss": 2.6637, "step": 51401 }, { "epoch": 2.52, "grad_norm": 0.7425705194473267, "learning_rate": 3.724607512458035e-05, "loss": 3.0053, "step": 51402 }, { "epoch": 2.52, "grad_norm": 0.8094008564949036, "learning_rate": 3.7238645423919054e-05, "loss": 2.8479, "step": 51403 }, { "epoch": 2.52, "grad_norm": 0.7409995198249817, "learning_rate": 3.723121641532056e-05, "loss": 2.8932, "step": 51404 }, { "epoch": 2.52, "grad_norm": 0.7713068723678589, "learning_rate": 3.7223788098804565e-05, "loss": 2.8011, "step": 51405 }, { "epoch": 2.52, "grad_norm": 0.7903899550437927, "learning_rate": 3.721636047439054e-05, "loss": 2.9123, "step": 51406 }, { "epoch": 2.52, "grad_norm": 0.7373391389846802, "learning_rate": 3.720893354209804e-05, "loss": 2.7982, "step": 51407 }, { "epoch": 2.52, "grad_norm": 0.720302164554596, "learning_rate": 3.7201507301946556e-05, "loss": 2.796, "step": 51408 }, { "epoch": 2.52, "grad_norm": 0.7427999973297119, "learning_rate": 3.719408175395573e-05, "loss": 2.8756, "step": 51409 }, { "epoch": 2.52, "grad_norm": 0.7941123843193054, "learning_rate": 3.718665689814522e-05, "loss": 3.0277, "step": 51410 }, { "epoch": 2.52, "grad_norm": 0.724496603012085, "learning_rate": 3.717923273453437e-05, "loss": 2.8808, "step": 51411 }, { "epoch": 2.52, "grad_norm": 0.7771130204200745, "learning_rate": 3.71718092631429e-05, "loss": 2.6689, "step": 51412 }, { "epoch": 2.52, "grad_norm": 0.7540898323059082, "learning_rate": 3.716438648399027e-05, "loss": 3.088, "step": 51413 }, { "epoch": 2.52, "grad_norm": 0.7112698554992676, "learning_rate": 3.715696439709609e-05, "loss": 2.8278, "step": 51414 }, { "epoch": 2.52, "grad_norm": 0.7136939167976379, "learning_rate": 3.714954300247991e-05, "loss": 2.9853, "step": 51415 }, { "epoch": 2.52, "grad_norm": 0.7295254468917847, "learning_rate": 3.714212230016118e-05, "loss": 2.9651, "step": 51416 }, { "epoch": 2.52, "grad_norm": 0.7394436597824097, "learning_rate": 3.713470229015956e-05, "loss": 2.8072, "step": 51417 }, { "epoch": 2.52, "grad_norm": 0.6955799460411072, "learning_rate": 3.712728297249449e-05, "loss": 2.9469, "step": 51418 }, { "epoch": 2.52, "grad_norm": 0.8046622276306152, "learning_rate": 3.7119864347185556e-05, "loss": 3.0347, "step": 51419 }, { "epoch": 2.52, "grad_norm": 0.729836642742157, "learning_rate": 3.7112446414252386e-05, "loss": 2.9362, "step": 51420 }, { "epoch": 2.52, "grad_norm": 0.722966730594635, "learning_rate": 3.710502917371442e-05, "loss": 3.0038, "step": 51421 }, { "epoch": 2.52, "grad_norm": 0.7136669158935547, "learning_rate": 3.709761262559124e-05, "loss": 2.904, "step": 51422 }, { "epoch": 2.52, "grad_norm": 0.6891632676124573, "learning_rate": 3.709019676990228e-05, "loss": 2.9472, "step": 51423 }, { "epoch": 2.52, "grad_norm": 0.7332636117935181, "learning_rate": 3.708278160666711e-05, "loss": 3.063, "step": 51424 }, { "epoch": 2.52, "grad_norm": 0.7345249056816101, "learning_rate": 3.707536713590539e-05, "loss": 2.8518, "step": 51425 }, { "epoch": 2.52, "grad_norm": 0.7762575745582581, "learning_rate": 3.706795335763649e-05, "loss": 3.0165, "step": 51426 }, { "epoch": 2.52, "grad_norm": 0.7118173837661743, "learning_rate": 3.7060540271880044e-05, "loss": 2.721, "step": 51427 }, { "epoch": 2.52, "grad_norm": 0.841128408908844, "learning_rate": 3.705312787865557e-05, "loss": 2.938, "step": 51428 }, { "epoch": 2.52, "grad_norm": 0.6889047622680664, "learning_rate": 3.70457161779825e-05, "loss": 2.7814, "step": 51429 }, { "epoch": 2.52, "grad_norm": 0.7486089468002319, "learning_rate": 3.7038305169880367e-05, "loss": 3.0047, "step": 51430 }, { "epoch": 2.52, "grad_norm": 0.7171392440795898, "learning_rate": 3.703089485436872e-05, "loss": 2.7904, "step": 51431 }, { "epoch": 2.52, "grad_norm": 0.7496344447135925, "learning_rate": 3.702348523146718e-05, "loss": 3.0611, "step": 51432 }, { "epoch": 2.52, "grad_norm": 0.7369357943534851, "learning_rate": 3.7016076301195084e-05, "loss": 2.9536, "step": 51433 }, { "epoch": 2.52, "grad_norm": 0.7333647012710571, "learning_rate": 3.70086680635721e-05, "loss": 3.0673, "step": 51434 }, { "epoch": 2.52, "grad_norm": 0.7265602946281433, "learning_rate": 3.7001260518617604e-05, "loss": 3.0387, "step": 51435 }, { "epoch": 2.52, "grad_norm": 0.7276657223701477, "learning_rate": 3.6993853666351214e-05, "loss": 2.9921, "step": 51436 }, { "epoch": 2.52, "grad_norm": 0.75041264295578, "learning_rate": 3.6986447506792414e-05, "loss": 2.9, "step": 51437 }, { "epoch": 2.52, "grad_norm": 0.77069091796875, "learning_rate": 3.697904203996066e-05, "loss": 2.801, "step": 51438 }, { "epoch": 2.52, "grad_norm": 0.7159631848335266, "learning_rate": 3.6971637265875495e-05, "loss": 2.9552, "step": 51439 }, { "epoch": 2.52, "grad_norm": 0.7610557079315186, "learning_rate": 3.6964233184556415e-05, "loss": 2.9516, "step": 51440 }, { "epoch": 2.52, "grad_norm": 0.7705284953117371, "learning_rate": 3.695682979602289e-05, "loss": 2.8762, "step": 51441 }, { "epoch": 2.52, "grad_norm": 0.7517569065093994, "learning_rate": 3.694942710029452e-05, "loss": 2.9491, "step": 51442 }, { "epoch": 2.52, "grad_norm": 0.774166464805603, "learning_rate": 3.6942025097390705e-05, "loss": 2.8259, "step": 51443 }, { "epoch": 2.52, "grad_norm": 0.7174193263053894, "learning_rate": 3.693462378733101e-05, "loss": 2.8627, "step": 51444 }, { "epoch": 2.52, "grad_norm": 0.7680938839912415, "learning_rate": 3.692722317013481e-05, "loss": 2.9821, "step": 51445 }, { "epoch": 2.52, "grad_norm": 0.7469550371170044, "learning_rate": 3.691982324582167e-05, "loss": 2.8813, "step": 51446 }, { "epoch": 2.52, "grad_norm": 0.8036483526229858, "learning_rate": 3.6912424014411156e-05, "loss": 2.7214, "step": 51447 }, { "epoch": 2.52, "grad_norm": 0.7748443484306335, "learning_rate": 3.6905025475922634e-05, "loss": 2.9701, "step": 51448 }, { "epoch": 2.52, "grad_norm": 0.7516213059425354, "learning_rate": 3.689762763037568e-05, "loss": 3.0216, "step": 51449 }, { "epoch": 2.52, "grad_norm": 0.7000873684883118, "learning_rate": 3.6890230477789754e-05, "loss": 2.8373, "step": 51450 }, { "epoch": 2.52, "grad_norm": 0.7015297412872314, "learning_rate": 3.688283401818426e-05, "loss": 2.9047, "step": 51451 }, { "epoch": 2.52, "grad_norm": 0.717642068862915, "learning_rate": 3.6875438251578794e-05, "loss": 2.8453, "step": 51452 }, { "epoch": 2.52, "grad_norm": 0.7319772243499756, "learning_rate": 3.6868043177992736e-05, "loss": 2.8335, "step": 51453 }, { "epoch": 2.52, "grad_norm": 0.7205098867416382, "learning_rate": 3.686064879744567e-05, "loss": 3.0223, "step": 51454 }, { "epoch": 2.52, "grad_norm": 0.7658298015594482, "learning_rate": 3.685325510995695e-05, "loss": 2.8972, "step": 51455 }, { "epoch": 2.52, "grad_norm": 0.7169014811515808, "learning_rate": 3.684586211554609e-05, "loss": 3.1171, "step": 51456 }, { "epoch": 2.52, "grad_norm": 0.7414019703865051, "learning_rate": 3.683846981423267e-05, "loss": 3.0538, "step": 51457 }, { "epoch": 2.52, "grad_norm": 0.772877037525177, "learning_rate": 3.683107820603609e-05, "loss": 2.8904, "step": 51458 }, { "epoch": 2.52, "grad_norm": 0.7601258158683777, "learning_rate": 3.682368729097579e-05, "loss": 2.7492, "step": 51459 }, { "epoch": 2.52, "grad_norm": 0.7426812052726746, "learning_rate": 3.681629706907115e-05, "loss": 2.7345, "step": 51460 }, { "epoch": 2.52, "grad_norm": 0.7161027193069458, "learning_rate": 3.680890754034177e-05, "loss": 3.0118, "step": 51461 }, { "epoch": 2.52, "grad_norm": 0.7432463765144348, "learning_rate": 3.6801518704807156e-05, "loss": 2.8489, "step": 51462 }, { "epoch": 2.52, "grad_norm": 0.7574390769004822, "learning_rate": 3.679413056248659e-05, "loss": 2.8106, "step": 51463 }, { "epoch": 2.52, "grad_norm": 0.7752411961555481, "learning_rate": 3.6786743113399696e-05, "loss": 2.9405, "step": 51464 }, { "epoch": 2.52, "grad_norm": 0.7201191782951355, "learning_rate": 3.677935635756586e-05, "loss": 2.943, "step": 51465 }, { "epoch": 2.52, "grad_norm": 0.7174774408340454, "learning_rate": 3.67719702950045e-05, "loss": 2.8726, "step": 51466 }, { "epoch": 2.52, "grad_norm": 0.7147430777549744, "learning_rate": 3.676458492573515e-05, "loss": 3.0209, "step": 51467 }, { "epoch": 2.52, "grad_norm": 0.757570743560791, "learning_rate": 3.675720024977718e-05, "loss": 3.0395, "step": 51468 }, { "epoch": 2.52, "grad_norm": 0.6996948719024658, "learning_rate": 3.674981626715015e-05, "loss": 2.7247, "step": 51469 }, { "epoch": 2.52, "grad_norm": 0.8142804503440857, "learning_rate": 3.674243297787335e-05, "loss": 2.7782, "step": 51470 }, { "epoch": 2.52, "grad_norm": 0.7229151129722595, "learning_rate": 3.6735050381966346e-05, "loss": 2.7468, "step": 51471 }, { "epoch": 2.52, "grad_norm": 0.7174829840660095, "learning_rate": 3.672766847944859e-05, "loss": 2.9069, "step": 51472 }, { "epoch": 2.52, "grad_norm": 0.7757089734077454, "learning_rate": 3.67202872703395e-05, "loss": 3.1334, "step": 51473 }, { "epoch": 2.52, "grad_norm": 0.7175819873809814, "learning_rate": 3.6712906754658524e-05, "loss": 3.0466, "step": 51474 }, { "epoch": 2.52, "grad_norm": 0.739388108253479, "learning_rate": 3.6705526932424975e-05, "loss": 2.9729, "step": 51475 }, { "epoch": 2.52, "grad_norm": 0.7365408539772034, "learning_rate": 3.669814780365842e-05, "loss": 2.8922, "step": 51476 }, { "epoch": 2.52, "grad_norm": 0.6910611987113953, "learning_rate": 3.669076936837836e-05, "loss": 2.7283, "step": 51477 }, { "epoch": 2.52, "grad_norm": 0.7462716102600098, "learning_rate": 3.668339162660402e-05, "loss": 2.814, "step": 51478 }, { "epoch": 2.52, "grad_norm": 0.7815803289413452, "learning_rate": 3.667601457835505e-05, "loss": 2.8589, "step": 51479 }, { "epoch": 2.52, "grad_norm": 0.7381653189659119, "learning_rate": 3.666863822365077e-05, "loss": 2.882, "step": 51480 }, { "epoch": 2.52, "grad_norm": 0.7210900783538818, "learning_rate": 3.6661262562510566e-05, "loss": 3.0684, "step": 51481 }, { "epoch": 2.52, "grad_norm": 0.7968168258666992, "learning_rate": 3.665388759495398e-05, "loss": 3.0299, "step": 51482 }, { "epoch": 2.52, "grad_norm": 0.7475718855857849, "learning_rate": 3.664651332100027e-05, "loss": 2.8383, "step": 51483 }, { "epoch": 2.52, "grad_norm": 0.8005061149597168, "learning_rate": 3.6639139740669055e-05, "loss": 3.1725, "step": 51484 }, { "epoch": 2.52, "grad_norm": 0.7502545118331909, "learning_rate": 3.663176685397962e-05, "loss": 2.7869, "step": 51485 }, { "epoch": 2.52, "grad_norm": 0.7680295705795288, "learning_rate": 3.662439466095144e-05, "loss": 2.8294, "step": 51486 }, { "epoch": 2.52, "grad_norm": 0.710451066493988, "learning_rate": 3.661702316160394e-05, "loss": 2.7683, "step": 51487 }, { "epoch": 2.52, "grad_norm": 0.6979348063468933, "learning_rate": 3.660965235595644e-05, "loss": 2.9291, "step": 51488 }, { "epoch": 2.52, "grad_norm": 0.7553119659423828, "learning_rate": 3.660228224402849e-05, "loss": 2.9579, "step": 51489 }, { "epoch": 2.52, "grad_norm": 0.7529929876327515, "learning_rate": 3.659491282583941e-05, "loss": 2.8826, "step": 51490 }, { "epoch": 2.52, "grad_norm": 0.7481595277786255, "learning_rate": 3.65875441014086e-05, "loss": 2.8124, "step": 51491 }, { "epoch": 2.52, "grad_norm": 0.7778012156486511, "learning_rate": 3.6580176070755576e-05, "loss": 2.7338, "step": 51492 }, { "epoch": 2.52, "grad_norm": 0.7474700808525085, "learning_rate": 3.657280873389959e-05, "loss": 3.1009, "step": 51493 }, { "epoch": 2.52, "grad_norm": 0.7289611101150513, "learning_rate": 3.656544209086022e-05, "loss": 3.0016, "step": 51494 }, { "epoch": 2.52, "grad_norm": 0.815883219242096, "learning_rate": 3.6558076141656766e-05, "loss": 2.9222, "step": 51495 }, { "epoch": 2.52, "grad_norm": 0.7379744648933411, "learning_rate": 3.655071088630855e-05, "loss": 3.0274, "step": 51496 }, { "epoch": 2.52, "grad_norm": 0.7309653162956238, "learning_rate": 3.654334632483516e-05, "loss": 2.8887, "step": 51497 }, { "epoch": 2.52, "grad_norm": 0.7571339011192322, "learning_rate": 3.6535982457255817e-05, "loss": 2.9202, "step": 51498 }, { "epoch": 2.52, "grad_norm": 0.7372366786003113, "learning_rate": 3.6528619283590066e-05, "loss": 3.1448, "step": 51499 }, { "epoch": 2.52, "grad_norm": 0.7438984513282776, "learning_rate": 3.652125680385712e-05, "loss": 2.9245, "step": 51500 }, { "epoch": 2.52, "grad_norm": 0.7498157024383545, "learning_rate": 3.651389501807658e-05, "loss": 3.1159, "step": 51501 }, { "epoch": 2.52, "grad_norm": 0.7399702072143555, "learning_rate": 3.650653392626775e-05, "loss": 3.0412, "step": 51502 }, { "epoch": 2.52, "grad_norm": 0.7499619126319885, "learning_rate": 3.6499173528449874e-05, "loss": 2.8588, "step": 51503 }, { "epoch": 2.52, "grad_norm": 0.784655749797821, "learning_rate": 3.649181382464256e-05, "loss": 3.1072, "step": 51504 }, { "epoch": 2.52, "grad_norm": 0.742243230342865, "learning_rate": 3.648445481486504e-05, "loss": 3.0878, "step": 51505 }, { "epoch": 2.52, "grad_norm": 0.7311636805534363, "learning_rate": 3.6477096499136735e-05, "loss": 2.9728, "step": 51506 }, { "epoch": 2.52, "grad_norm": 0.7376363277435303, "learning_rate": 3.646973887747714e-05, "loss": 2.9388, "step": 51507 }, { "epoch": 2.52, "grad_norm": 0.746273934841156, "learning_rate": 3.646238194990549e-05, "loss": 2.919, "step": 51508 }, { "epoch": 2.52, "grad_norm": 0.7192387580871582, "learning_rate": 3.645502571644122e-05, "loss": 2.8624, "step": 51509 }, { "epoch": 2.52, "grad_norm": 0.7488130331039429, "learning_rate": 3.644767017710364e-05, "loss": 2.7476, "step": 51510 }, { "epoch": 2.52, "grad_norm": 0.7002290487289429, "learning_rate": 3.644031533191223e-05, "loss": 2.8554, "step": 51511 }, { "epoch": 2.52, "grad_norm": 0.7030738592147827, "learning_rate": 3.6432961180886246e-05, "loss": 2.9592, "step": 51512 }, { "epoch": 2.52, "grad_norm": 0.8064178228378296, "learning_rate": 3.642560772404514e-05, "loss": 2.8433, "step": 51513 }, { "epoch": 2.52, "grad_norm": 0.7585741877555847, "learning_rate": 3.641825496140829e-05, "loss": 2.8685, "step": 51514 }, { "epoch": 2.52, "grad_norm": 0.7327173352241516, "learning_rate": 3.6410902892994953e-05, "loss": 2.8906, "step": 51515 }, { "epoch": 2.52, "grad_norm": 0.7685412168502808, "learning_rate": 3.640355151882468e-05, "loss": 2.6577, "step": 51516 }, { "epoch": 2.52, "grad_norm": 0.7944349646568298, "learning_rate": 3.639620083891669e-05, "loss": 2.9258, "step": 51517 }, { "epoch": 2.52, "grad_norm": 0.7449418902397156, "learning_rate": 3.6388850853290294e-05, "loss": 2.7102, "step": 51518 }, { "epoch": 2.52, "grad_norm": 0.7388597130775452, "learning_rate": 3.638150156196502e-05, "loss": 2.9237, "step": 51519 }, { "epoch": 2.52, "grad_norm": 0.7266759872436523, "learning_rate": 3.6374152964960076e-05, "loss": 2.9021, "step": 51520 }, { "epoch": 2.52, "grad_norm": 0.7184749841690063, "learning_rate": 3.6366805062294956e-05, "loss": 3.0353, "step": 51521 }, { "epoch": 2.52, "grad_norm": 0.7220653891563416, "learning_rate": 3.6359457853988835e-05, "loss": 2.8162, "step": 51522 }, { "epoch": 2.53, "grad_norm": 0.7334814667701721, "learning_rate": 3.635211134006124e-05, "loss": 2.9972, "step": 51523 }, { "epoch": 2.53, "grad_norm": 0.7480335831642151, "learning_rate": 3.634476552053145e-05, "loss": 2.8407, "step": 51524 }, { "epoch": 2.53, "grad_norm": 0.7650389671325684, "learning_rate": 3.633742039541875e-05, "loss": 2.8594, "step": 51525 }, { "epoch": 2.53, "grad_norm": 0.7087839841842651, "learning_rate": 3.6330075964742586e-05, "loss": 2.7335, "step": 51526 }, { "epoch": 2.53, "grad_norm": 0.7374259829521179, "learning_rate": 3.632273222852222e-05, "loss": 2.954, "step": 51527 }, { "epoch": 2.53, "grad_norm": 0.7621423006057739, "learning_rate": 3.6315389186777e-05, "loss": 2.9835, "step": 51528 }, { "epoch": 2.53, "grad_norm": 0.7097713351249695, "learning_rate": 3.630804683952637e-05, "loss": 2.8251, "step": 51529 }, { "epoch": 2.53, "grad_norm": 0.7970553636550903, "learning_rate": 3.6300705186789627e-05, "loss": 2.8793, "step": 51530 }, { "epoch": 2.53, "grad_norm": 0.7120518088340759, "learning_rate": 3.629336422858604e-05, "loss": 2.8642, "step": 51531 }, { "epoch": 2.53, "grad_norm": 0.731629490852356, "learning_rate": 3.628602396493494e-05, "loss": 2.9259, "step": 51532 }, { "epoch": 2.53, "grad_norm": 0.7600454688072205, "learning_rate": 3.6278684395855676e-05, "loss": 2.7663, "step": 51533 }, { "epoch": 2.53, "grad_norm": 0.7035639882087708, "learning_rate": 3.627134552136769e-05, "loss": 2.8127, "step": 51534 }, { "epoch": 2.53, "grad_norm": 0.7026732563972473, "learning_rate": 3.6264007341490145e-05, "loss": 2.805, "step": 51535 }, { "epoch": 2.53, "grad_norm": 0.7547394633293152, "learning_rate": 3.6256669856242516e-05, "loss": 2.8502, "step": 51536 }, { "epoch": 2.53, "grad_norm": 0.7437660694122314, "learning_rate": 3.624933306564399e-05, "loss": 2.8254, "step": 51537 }, { "epoch": 2.53, "grad_norm": 0.7333990335464478, "learning_rate": 3.624199696971403e-05, "loss": 2.9906, "step": 51538 }, { "epoch": 2.53, "grad_norm": 0.7599939107894897, "learning_rate": 3.62346615684719e-05, "loss": 2.7959, "step": 51539 }, { "epoch": 2.53, "grad_norm": 0.8379149436950684, "learning_rate": 3.622732686193683e-05, "loss": 2.9625, "step": 51540 }, { "epoch": 2.53, "grad_norm": 0.7774891257286072, "learning_rate": 3.621999285012828e-05, "loss": 2.7655, "step": 51541 }, { "epoch": 2.53, "grad_norm": 0.7588175535202026, "learning_rate": 3.621265953306545e-05, "loss": 2.7889, "step": 51542 }, { "epoch": 2.53, "grad_norm": 0.7654353380203247, "learning_rate": 3.62053269107677e-05, "loss": 2.9883, "step": 51543 }, { "epoch": 2.53, "grad_norm": 0.744993269443512, "learning_rate": 3.619799498325441e-05, "loss": 2.7745, "step": 51544 }, { "epoch": 2.53, "grad_norm": 0.7357940077781677, "learning_rate": 3.6190663750544833e-05, "loss": 2.6419, "step": 51545 }, { "epoch": 2.53, "grad_norm": 0.737634539604187, "learning_rate": 3.6183333212658294e-05, "loss": 2.9255, "step": 51546 }, { "epoch": 2.53, "grad_norm": 0.7712468504905701, "learning_rate": 3.6176003369614e-05, "loss": 3.0007, "step": 51547 }, { "epoch": 2.53, "grad_norm": 0.7073559761047363, "learning_rate": 3.616867422143135e-05, "loss": 2.9726, "step": 51548 }, { "epoch": 2.53, "grad_norm": 0.7305967807769775, "learning_rate": 3.616134576812971e-05, "loss": 2.8264, "step": 51549 }, { "epoch": 2.53, "grad_norm": 0.7160578370094299, "learning_rate": 3.6154018009728216e-05, "loss": 2.9483, "step": 51550 }, { "epoch": 2.53, "grad_norm": 0.7831854820251465, "learning_rate": 3.614669094624634e-05, "loss": 2.7699, "step": 51551 }, { "epoch": 2.53, "grad_norm": 0.7335165739059448, "learning_rate": 3.61393645777033e-05, "loss": 3.0746, "step": 51552 }, { "epoch": 2.53, "grad_norm": 0.8146199584007263, "learning_rate": 3.6132038904118356e-05, "loss": 2.8505, "step": 51553 }, { "epoch": 2.53, "grad_norm": 0.7218393087387085, "learning_rate": 3.6124713925510854e-05, "loss": 2.8958, "step": 51554 }, { "epoch": 2.53, "grad_norm": 0.7315160036087036, "learning_rate": 3.6117389641900044e-05, "loss": 2.7957, "step": 51555 }, { "epoch": 2.53, "grad_norm": 0.7376806735992432, "learning_rate": 3.611006605330527e-05, "loss": 2.8755, "step": 51556 }, { "epoch": 2.53, "grad_norm": 0.8134010434150696, "learning_rate": 3.610274315974577e-05, "loss": 2.8359, "step": 51557 }, { "epoch": 2.53, "grad_norm": 0.7388611435890198, "learning_rate": 3.6095420961240854e-05, "loss": 2.6661, "step": 51558 }, { "epoch": 2.53, "grad_norm": 0.7700594663619995, "learning_rate": 3.608809945780986e-05, "loss": 2.9496, "step": 51559 }, { "epoch": 2.53, "grad_norm": 0.7100481986999512, "learning_rate": 3.608077864947202e-05, "loss": 2.7765, "step": 51560 }, { "epoch": 2.53, "grad_norm": 0.7788733243942261, "learning_rate": 3.6073458536246616e-05, "loss": 3.0668, "step": 51561 }, { "epoch": 2.53, "grad_norm": 0.7740787863731384, "learning_rate": 3.6066139118152894e-05, "loss": 3.0295, "step": 51562 }, { "epoch": 2.53, "grad_norm": 0.7355701327323914, "learning_rate": 3.6058820395210105e-05, "loss": 2.6675, "step": 51563 }, { "epoch": 2.53, "grad_norm": 0.7832901477813721, "learning_rate": 3.605150236743771e-05, "loss": 2.7939, "step": 51564 }, { "epoch": 2.53, "grad_norm": 0.725982129573822, "learning_rate": 3.604418503485476e-05, "loss": 2.9671, "step": 51565 }, { "epoch": 2.53, "grad_norm": 0.7156925797462463, "learning_rate": 3.603686839748072e-05, "loss": 2.908, "step": 51566 }, { "epoch": 2.53, "grad_norm": 0.801988959312439, "learning_rate": 3.602955245533473e-05, "loss": 2.8038, "step": 51567 }, { "epoch": 2.53, "grad_norm": 0.7519605159759521, "learning_rate": 3.602223720843607e-05, "loss": 2.9654, "step": 51568 }, { "epoch": 2.53, "grad_norm": 0.7388525605201721, "learning_rate": 3.601492265680408e-05, "loss": 3.1603, "step": 51569 }, { "epoch": 2.53, "grad_norm": 0.7522304654121399, "learning_rate": 3.600760880045792e-05, "loss": 2.9316, "step": 51570 }, { "epoch": 2.53, "grad_norm": 0.7278318405151367, "learning_rate": 3.6000295639416974e-05, "loss": 2.791, "step": 51571 }, { "epoch": 2.53, "grad_norm": 0.7324314117431641, "learning_rate": 3.599298317370036e-05, "loss": 3.0109, "step": 51572 }, { "epoch": 2.53, "grad_norm": 0.7239487767219543, "learning_rate": 3.598567140332747e-05, "loss": 2.8985, "step": 51573 }, { "epoch": 2.53, "grad_norm": 0.731003999710083, "learning_rate": 3.597836032831754e-05, "loss": 2.9196, "step": 51574 }, { "epoch": 2.53, "grad_norm": 0.746536910533905, "learning_rate": 3.597104994868979e-05, "loss": 2.985, "step": 51575 }, { "epoch": 2.53, "grad_norm": 0.7338457703590393, "learning_rate": 3.596374026446348e-05, "loss": 2.9579, "step": 51576 }, { "epoch": 2.53, "grad_norm": 0.8143104314804077, "learning_rate": 3.595643127565779e-05, "loss": 2.9791, "step": 51577 }, { "epoch": 2.53, "grad_norm": 0.7997439503669739, "learning_rate": 3.594912298229208e-05, "loss": 2.8689, "step": 51578 }, { "epoch": 2.53, "grad_norm": 0.755618691444397, "learning_rate": 3.594181538438562e-05, "loss": 2.9621, "step": 51579 }, { "epoch": 2.53, "grad_norm": 0.7650567293167114, "learning_rate": 3.59345084819575e-05, "loss": 2.8828, "step": 51580 }, { "epoch": 2.53, "grad_norm": 0.7664803266525269, "learning_rate": 3.5927202275027176e-05, "loss": 3.0211, "step": 51581 }, { "epoch": 2.53, "grad_norm": 0.6953228116035461, "learning_rate": 3.591989676361373e-05, "loss": 2.8972, "step": 51582 }, { "epoch": 2.53, "grad_norm": 0.7946908473968506, "learning_rate": 3.591259194773641e-05, "loss": 2.993, "step": 51583 }, { "epoch": 2.53, "grad_norm": 0.716802179813385, "learning_rate": 3.5905287827414574e-05, "loss": 2.9496, "step": 51584 }, { "epoch": 2.53, "grad_norm": 0.7397177219390869, "learning_rate": 3.589798440266734e-05, "loss": 2.8957, "step": 51585 }, { "epoch": 2.53, "grad_norm": 0.7066813707351685, "learning_rate": 3.589068167351401e-05, "loss": 2.8603, "step": 51586 }, { "epoch": 2.53, "grad_norm": 0.7754790186882019, "learning_rate": 3.588337963997375e-05, "loss": 3.0405, "step": 51587 }, { "epoch": 2.53, "grad_norm": 0.7783805131912231, "learning_rate": 3.587607830206588e-05, "loss": 3.0427, "step": 51588 }, { "epoch": 2.53, "grad_norm": 0.7516245245933533, "learning_rate": 3.586877765980961e-05, "loss": 2.9936, "step": 51589 }, { "epoch": 2.53, "grad_norm": 0.7606446146965027, "learning_rate": 3.5861477713224096e-05, "loss": 3.0245, "step": 51590 }, { "epoch": 2.53, "grad_norm": 0.7204053401947021, "learning_rate": 3.5854178462328654e-05, "loss": 2.9596, "step": 51591 }, { "epoch": 2.53, "grad_norm": 0.7473350763320923, "learning_rate": 3.58468799071424e-05, "loss": 2.8518, "step": 51592 }, { "epoch": 2.53, "grad_norm": 0.7622993588447571, "learning_rate": 3.583958204768472e-05, "loss": 2.7923, "step": 51593 }, { "epoch": 2.53, "grad_norm": 0.7171756029129028, "learning_rate": 3.583228488397464e-05, "loss": 2.9466, "step": 51594 }, { "epoch": 2.53, "grad_norm": 0.7305697202682495, "learning_rate": 3.582498841603153e-05, "loss": 2.7989, "step": 51595 }, { "epoch": 2.53, "grad_norm": 0.7551934123039246, "learning_rate": 3.581769264387459e-05, "loss": 2.8935, "step": 51596 }, { "epoch": 2.53, "grad_norm": 0.7103335857391357, "learning_rate": 3.581039756752302e-05, "loss": 2.8689, "step": 51597 }, { "epoch": 2.53, "grad_norm": 0.7509092688560486, "learning_rate": 3.5803103186995985e-05, "loss": 3.0213, "step": 51598 }, { "epoch": 2.53, "grad_norm": 0.7707995772361755, "learning_rate": 3.57958095023127e-05, "loss": 2.9597, "step": 51599 }, { "epoch": 2.53, "grad_norm": 0.7239026427268982, "learning_rate": 3.578851651349238e-05, "loss": 2.9482, "step": 51600 }, { "epoch": 2.53, "grad_norm": 0.699456512928009, "learning_rate": 3.578122422055435e-05, "loss": 3.0693, "step": 51601 }, { "epoch": 2.53, "grad_norm": 0.7883853316307068, "learning_rate": 3.577393262351766e-05, "loss": 2.8765, "step": 51602 }, { "epoch": 2.53, "grad_norm": 0.7522447109222412, "learning_rate": 3.576664172240165e-05, "loss": 2.7847, "step": 51603 }, { "epoch": 2.53, "grad_norm": 0.8980352282524109, "learning_rate": 3.575935151722541e-05, "loss": 3.0591, "step": 51604 }, { "epoch": 2.53, "grad_norm": 0.7412164211273193, "learning_rate": 3.5752062008008163e-05, "loss": 2.887, "step": 51605 }, { "epoch": 2.53, "grad_norm": 0.7191179990768433, "learning_rate": 3.574477319476919e-05, "loss": 3.2386, "step": 51606 }, { "epoch": 2.53, "grad_norm": 0.8027909398078918, "learning_rate": 3.573748507752755e-05, "loss": 2.7148, "step": 51607 }, { "epoch": 2.53, "grad_norm": 0.7330635786056519, "learning_rate": 3.5730197656302585e-05, "loss": 2.9588, "step": 51608 }, { "epoch": 2.53, "grad_norm": 0.7735836505889893, "learning_rate": 3.5722910931113345e-05, "loss": 3.1469, "step": 51609 }, { "epoch": 2.53, "grad_norm": 0.7595041990280151, "learning_rate": 3.5715624901979154e-05, "loss": 2.7322, "step": 51610 }, { "epoch": 2.53, "grad_norm": 0.683493435382843, "learning_rate": 3.5708339568919164e-05, "loss": 2.8669, "step": 51611 }, { "epoch": 2.53, "grad_norm": 0.7193924188613892, "learning_rate": 3.570105493195249e-05, "loss": 2.9912, "step": 51612 }, { "epoch": 2.53, "grad_norm": 0.7553349137306213, "learning_rate": 3.5693770991098417e-05, "loss": 3.0392, "step": 51613 }, { "epoch": 2.53, "grad_norm": 0.7322878837585449, "learning_rate": 3.5686487746376027e-05, "loss": 2.989, "step": 51614 }, { "epoch": 2.53, "grad_norm": 0.7660918235778809, "learning_rate": 3.567920519780454e-05, "loss": 2.9561, "step": 51615 }, { "epoch": 2.53, "grad_norm": 0.7109578847885132, "learning_rate": 3.567192334540324e-05, "loss": 2.8681, "step": 51616 }, { "epoch": 2.53, "grad_norm": 0.743213415145874, "learning_rate": 3.566464218919115e-05, "loss": 2.8795, "step": 51617 }, { "epoch": 2.53, "grad_norm": 0.7197250723838806, "learning_rate": 3.565736172918762e-05, "loss": 2.8877, "step": 51618 }, { "epoch": 2.53, "grad_norm": 0.692306399345398, "learning_rate": 3.565008196541169e-05, "loss": 2.9454, "step": 51619 }, { "epoch": 2.53, "grad_norm": 0.7972225546836853, "learning_rate": 3.564280289788249e-05, "loss": 2.827, "step": 51620 }, { "epoch": 2.53, "grad_norm": 0.7207708358764648, "learning_rate": 3.563552452661938e-05, "loss": 2.9103, "step": 51621 }, { "epoch": 2.53, "grad_norm": 0.7996771931648254, "learning_rate": 3.562824685164132e-05, "loss": 2.7235, "step": 51622 }, { "epoch": 2.53, "grad_norm": 0.6930966973304749, "learning_rate": 3.5620969872967644e-05, "loss": 2.818, "step": 51623 }, { "epoch": 2.53, "grad_norm": 0.6936909556388855, "learning_rate": 3.56136935906174e-05, "loss": 2.8292, "step": 51624 }, { "epoch": 2.53, "grad_norm": 0.7523632049560547, "learning_rate": 3.560641800460987e-05, "loss": 2.9122, "step": 51625 }, { "epoch": 2.53, "grad_norm": 0.7502445578575134, "learning_rate": 3.5599143114964143e-05, "loss": 2.8885, "step": 51626 }, { "epoch": 2.53, "grad_norm": 0.7347469329833984, "learning_rate": 3.559186892169934e-05, "loss": 2.8755, "step": 51627 }, { "epoch": 2.53, "grad_norm": 0.7211341857910156, "learning_rate": 3.558459542483474e-05, "loss": 3.0235, "step": 51628 }, { "epoch": 2.53, "grad_norm": 0.7356860637664795, "learning_rate": 3.557732262438932e-05, "loss": 2.7142, "step": 51629 }, { "epoch": 2.53, "grad_norm": 0.7704647779464722, "learning_rate": 3.5570050520382386e-05, "loss": 2.9018, "step": 51630 }, { "epoch": 2.53, "grad_norm": 0.6986088156700134, "learning_rate": 3.55627791128331e-05, "loss": 2.9044, "step": 51631 }, { "epoch": 2.53, "grad_norm": 0.8135074377059937, "learning_rate": 3.555550840176057e-05, "loss": 2.9279, "step": 51632 }, { "epoch": 2.53, "grad_norm": 0.7185907959938049, "learning_rate": 3.554823838718393e-05, "loss": 2.9699, "step": 51633 }, { "epoch": 2.53, "grad_norm": 0.7311407327651978, "learning_rate": 3.554096906912226e-05, "loss": 2.9253, "step": 51634 }, { "epoch": 2.53, "grad_norm": 0.7427823543548584, "learning_rate": 3.553370044759476e-05, "loss": 2.9968, "step": 51635 }, { "epoch": 2.53, "grad_norm": 0.7487204074859619, "learning_rate": 3.5526432522620704e-05, "loss": 2.8834, "step": 51636 }, { "epoch": 2.53, "grad_norm": 0.745222806930542, "learning_rate": 3.551916529421906e-05, "loss": 2.8098, "step": 51637 }, { "epoch": 2.53, "grad_norm": 0.7326517105102539, "learning_rate": 3.551189876240905e-05, "loss": 2.9652, "step": 51638 }, { "epoch": 2.53, "grad_norm": 0.8086389899253845, "learning_rate": 3.5504632927209786e-05, "loss": 3.0676, "step": 51639 }, { "epoch": 2.53, "grad_norm": 0.7548852562904358, "learning_rate": 3.5497367788640474e-05, "loss": 3.0023, "step": 51640 }, { "epoch": 2.53, "grad_norm": 0.7331960201263428, "learning_rate": 3.5490103346720166e-05, "loss": 2.9562, "step": 51641 }, { "epoch": 2.53, "grad_norm": 0.7696003913879395, "learning_rate": 3.548283960146795e-05, "loss": 2.9089, "step": 51642 }, { "epoch": 2.53, "grad_norm": 0.6968817710876465, "learning_rate": 3.5475576552903105e-05, "loss": 3.0244, "step": 51643 }, { "epoch": 2.53, "grad_norm": 0.7616210579872131, "learning_rate": 3.5468314201044624e-05, "loss": 2.6087, "step": 51644 }, { "epoch": 2.53, "grad_norm": 0.8356584310531616, "learning_rate": 3.546105254591172e-05, "loss": 2.9079, "step": 51645 }, { "epoch": 2.53, "grad_norm": 0.7322165369987488, "learning_rate": 3.5453791587523504e-05, "loss": 2.9639, "step": 51646 }, { "epoch": 2.53, "grad_norm": 0.7493136525154114, "learning_rate": 3.544653132589911e-05, "loss": 2.6329, "step": 51647 }, { "epoch": 2.53, "grad_norm": 0.7908979654312134, "learning_rate": 3.5439271761057645e-05, "loss": 2.7651, "step": 51648 }, { "epoch": 2.53, "grad_norm": 0.7793807983398438, "learning_rate": 3.543201289301816e-05, "loss": 2.8588, "step": 51649 }, { "epoch": 2.53, "grad_norm": 0.7199450135231018, "learning_rate": 3.542475472179981e-05, "loss": 3.1165, "step": 51650 }, { "epoch": 2.53, "grad_norm": 0.7530863285064697, "learning_rate": 3.5417497247421844e-05, "loss": 2.7485, "step": 51651 }, { "epoch": 2.53, "grad_norm": 0.7242304086685181, "learning_rate": 3.541024046990318e-05, "loss": 3.0574, "step": 51652 }, { "epoch": 2.53, "grad_norm": 0.7129125595092773, "learning_rate": 3.540298438926308e-05, "loss": 2.7127, "step": 51653 }, { "epoch": 2.53, "grad_norm": 0.7528355717658997, "learning_rate": 3.539572900552061e-05, "loss": 2.8536, "step": 51654 }, { "epoch": 2.53, "grad_norm": 0.7420986890792847, "learning_rate": 3.53884743186948e-05, "loss": 2.8166, "step": 51655 }, { "epoch": 2.53, "grad_norm": 0.7624697089195251, "learning_rate": 3.538122032880487e-05, "loss": 3.1127, "step": 51656 }, { "epoch": 2.53, "grad_norm": 0.7528888583183289, "learning_rate": 3.537396703586983e-05, "loss": 3.0215, "step": 51657 }, { "epoch": 2.53, "grad_norm": 0.7675604224205017, "learning_rate": 3.5366714439908905e-05, "loss": 2.966, "step": 51658 }, { "epoch": 2.53, "grad_norm": 0.7084840536117554, "learning_rate": 3.535946254094104e-05, "loss": 2.7934, "step": 51659 }, { "epoch": 2.53, "grad_norm": 0.7773526310920715, "learning_rate": 3.5352211338985425e-05, "loss": 2.9984, "step": 51660 }, { "epoch": 2.53, "grad_norm": 0.7066696882247925, "learning_rate": 3.534496083406121e-05, "loss": 2.8627, "step": 51661 }, { "epoch": 2.53, "grad_norm": 0.7332533597946167, "learning_rate": 3.533771102618744e-05, "loss": 2.885, "step": 51662 }, { "epoch": 2.53, "grad_norm": 0.7373495697975159, "learning_rate": 3.533046191538321e-05, "loss": 3.0135, "step": 51663 }, { "epoch": 2.53, "grad_norm": 0.7333385944366455, "learning_rate": 3.532321350166757e-05, "loss": 2.8784, "step": 51664 }, { "epoch": 2.53, "grad_norm": 0.726533055305481, "learning_rate": 3.531596578505966e-05, "loss": 2.9567, "step": 51665 }, { "epoch": 2.53, "grad_norm": 0.7740032076835632, "learning_rate": 3.530871876557852e-05, "loss": 2.9343, "step": 51666 }, { "epoch": 2.53, "grad_norm": 0.7073380351066589, "learning_rate": 3.530147244324327e-05, "loss": 2.7355, "step": 51667 }, { "epoch": 2.53, "grad_norm": 0.7208665013313293, "learning_rate": 3.5294226818073056e-05, "loss": 2.9038, "step": 51668 }, { "epoch": 2.53, "grad_norm": 0.7397687435150146, "learning_rate": 3.528698189008692e-05, "loss": 2.8913, "step": 51669 }, { "epoch": 2.53, "grad_norm": 0.7066610455513, "learning_rate": 3.527973765930394e-05, "loss": 2.8714, "step": 51670 }, { "epoch": 2.53, "grad_norm": 0.7497684359550476, "learning_rate": 3.527249412574308e-05, "loss": 2.8729, "step": 51671 }, { "epoch": 2.53, "grad_norm": 0.7150473594665527, "learning_rate": 3.5265251289423544e-05, "loss": 2.8148, "step": 51672 }, { "epoch": 2.53, "grad_norm": 0.7562102675437927, "learning_rate": 3.525800915036446e-05, "loss": 2.991, "step": 51673 }, { "epoch": 2.53, "grad_norm": 0.7330208420753479, "learning_rate": 3.525076770858475e-05, "loss": 2.917, "step": 51674 }, { "epoch": 2.53, "grad_norm": 0.7095870971679688, "learning_rate": 3.5243526964103655e-05, "loss": 2.8007, "step": 51675 }, { "epoch": 2.53, "grad_norm": 0.7798789739608765, "learning_rate": 3.523628691694006e-05, "loss": 2.918, "step": 51676 }, { "epoch": 2.53, "grad_norm": 0.7631597518920898, "learning_rate": 3.522904756711322e-05, "loss": 2.9471, "step": 51677 }, { "epoch": 2.53, "grad_norm": 0.8316076993942261, "learning_rate": 3.522180891464209e-05, "loss": 3.0965, "step": 51678 }, { "epoch": 2.53, "grad_norm": 0.739554762840271, "learning_rate": 3.521457095954572e-05, "loss": 2.9548, "step": 51679 }, { "epoch": 2.53, "grad_norm": 0.7704421877861023, "learning_rate": 3.5207333701843255e-05, "loss": 2.9717, "step": 51680 }, { "epoch": 2.53, "grad_norm": 0.7329279780387878, "learning_rate": 3.520009714155365e-05, "loss": 2.9602, "step": 51681 }, { "epoch": 2.53, "grad_norm": 0.7119458913803101, "learning_rate": 3.5192861278696025e-05, "loss": 2.7271, "step": 51682 }, { "epoch": 2.53, "grad_norm": 0.7648967504501343, "learning_rate": 3.51856261132895e-05, "loss": 2.7971, "step": 51683 }, { "epoch": 2.53, "grad_norm": 0.7791581749916077, "learning_rate": 3.5178391645353085e-05, "loss": 2.8109, "step": 51684 }, { "epoch": 2.53, "grad_norm": 0.7299320697784424, "learning_rate": 3.517115787490581e-05, "loss": 2.6825, "step": 51685 }, { "epoch": 2.53, "grad_norm": 0.7683125734329224, "learning_rate": 3.5163924801966684e-05, "loss": 2.733, "step": 51686 }, { "epoch": 2.53, "grad_norm": 0.7981067895889282, "learning_rate": 3.515669242655482e-05, "loss": 2.8742, "step": 51687 }, { "epoch": 2.53, "grad_norm": 0.7027236819267273, "learning_rate": 3.5149460748689315e-05, "loss": 2.9845, "step": 51688 }, { "epoch": 2.53, "grad_norm": 0.7060105204582214, "learning_rate": 3.5142229768389085e-05, "loss": 2.709, "step": 51689 }, { "epoch": 2.53, "grad_norm": 0.7767622470855713, "learning_rate": 3.513499948567331e-05, "loss": 2.9245, "step": 51690 }, { "epoch": 2.53, "grad_norm": 0.7285616993904114, "learning_rate": 3.512776990056098e-05, "loss": 3.0044, "step": 51691 }, { "epoch": 2.53, "grad_norm": 0.7438006401062012, "learning_rate": 3.512054101307107e-05, "loss": 2.9719, "step": 51692 }, { "epoch": 2.53, "grad_norm": 0.7528930306434631, "learning_rate": 3.511331282322274e-05, "loss": 3.2001, "step": 51693 }, { "epoch": 2.53, "grad_norm": 0.7622228860855103, "learning_rate": 3.5106085331034873e-05, "loss": 2.8069, "step": 51694 }, { "epoch": 2.53, "grad_norm": 0.8420156836509705, "learning_rate": 3.509885853652668e-05, "loss": 2.8725, "step": 51695 }, { "epoch": 2.53, "grad_norm": 0.7685613632202148, "learning_rate": 3.5091632439717065e-05, "loss": 3.0688, "step": 51696 }, { "epoch": 2.53, "grad_norm": 0.7986699342727661, "learning_rate": 3.5084407040625094e-05, "loss": 2.8336, "step": 51697 }, { "epoch": 2.53, "grad_norm": 0.8661927580833435, "learning_rate": 3.507718233926989e-05, "loss": 2.9024, "step": 51698 }, { "epoch": 2.53, "grad_norm": 0.8043288588523865, "learning_rate": 3.506995833567038e-05, "loss": 2.8473, "step": 51699 }, { "epoch": 2.53, "grad_norm": 0.7660233974456787, "learning_rate": 3.5062735029845634e-05, "loss": 2.9776, "step": 51700 }, { "epoch": 2.53, "grad_norm": 0.8327121734619141, "learning_rate": 3.505551242181458e-05, "loss": 2.9838, "step": 51701 }, { "epoch": 2.53, "grad_norm": 0.786426842212677, "learning_rate": 3.5048290511596334e-05, "loss": 2.7796, "step": 51702 }, { "epoch": 2.53, "grad_norm": 0.7408648133277893, "learning_rate": 3.5041069299209944e-05, "loss": 3.0313, "step": 51703 }, { "epoch": 2.53, "grad_norm": 0.7075411677360535, "learning_rate": 3.50338487846743e-05, "loss": 2.8537, "step": 51704 }, { "epoch": 2.53, "grad_norm": 0.7919765114784241, "learning_rate": 3.5026628968008616e-05, "loss": 3.0262, "step": 51705 }, { "epoch": 2.53, "grad_norm": 0.726375937461853, "learning_rate": 3.501940984923175e-05, "loss": 2.9436, "step": 51706 }, { "epoch": 2.53, "grad_norm": 0.7896806597709656, "learning_rate": 3.501219142836275e-05, "loss": 3.012, "step": 51707 }, { "epoch": 2.53, "grad_norm": 0.741202175617218, "learning_rate": 3.500497370542066e-05, "loss": 2.7639, "step": 51708 }, { "epoch": 2.53, "grad_norm": 0.7486560940742493, "learning_rate": 3.4997756680424404e-05, "loss": 3.1071, "step": 51709 }, { "epoch": 2.53, "grad_norm": 0.7122935652732849, "learning_rate": 3.4990540353393136e-05, "loss": 2.9733, "step": 51710 }, { "epoch": 2.53, "grad_norm": 0.739435076713562, "learning_rate": 3.498332472434574e-05, "loss": 2.9056, "step": 51711 }, { "epoch": 2.53, "grad_norm": 0.7047844529151917, "learning_rate": 3.49761097933013e-05, "loss": 2.8209, "step": 51712 }, { "epoch": 2.53, "grad_norm": 0.7015252113342285, "learning_rate": 3.4968895560278796e-05, "loss": 2.9192, "step": 51713 }, { "epoch": 2.53, "grad_norm": 0.8801385760307312, "learning_rate": 3.4961682025297125e-05, "loss": 3.0679, "step": 51714 }, { "epoch": 2.53, "grad_norm": 0.7651318311691284, "learning_rate": 3.495446918837545e-05, "loss": 2.8485, "step": 51715 }, { "epoch": 2.53, "grad_norm": 0.7404123544692993, "learning_rate": 3.494725704953265e-05, "loss": 2.8387, "step": 51716 }, { "epoch": 2.53, "grad_norm": 0.7318333387374878, "learning_rate": 3.494004560878776e-05, "loss": 2.6693, "step": 51717 }, { "epoch": 2.53, "grad_norm": 0.7572652697563171, "learning_rate": 3.493283486615983e-05, "loss": 2.8279, "step": 51718 }, { "epoch": 2.53, "grad_norm": 0.7449601888656616, "learning_rate": 3.4925624821667755e-05, "loss": 2.6999, "step": 51719 }, { "epoch": 2.53, "grad_norm": 0.7463000416755676, "learning_rate": 3.4918415475330606e-05, "loss": 2.8917, "step": 51720 }, { "epoch": 2.53, "grad_norm": 0.7627390623092651, "learning_rate": 3.491120682716735e-05, "loss": 3.0798, "step": 51721 }, { "epoch": 2.53, "grad_norm": 0.75359708070755, "learning_rate": 3.490399887719689e-05, "loss": 2.8438, "step": 51722 }, { "epoch": 2.53, "grad_norm": 0.6942808628082275, "learning_rate": 3.489679162543836e-05, "loss": 2.7695, "step": 51723 }, { "epoch": 2.53, "grad_norm": 0.7942733764648438, "learning_rate": 3.488958507191057e-05, "loss": 3.0674, "step": 51724 }, { "epoch": 2.53, "grad_norm": 0.7533266544342041, "learning_rate": 3.488237921663266e-05, "loss": 3.0008, "step": 51725 }, { "epoch": 2.53, "grad_norm": 0.760011613368988, "learning_rate": 3.48751740596235e-05, "loss": 2.8886, "step": 51726 }, { "epoch": 2.54, "grad_norm": 0.7659701704978943, "learning_rate": 3.4867969600902135e-05, "loss": 2.9795, "step": 51727 }, { "epoch": 2.54, "grad_norm": 0.7460302710533142, "learning_rate": 3.4860765840487545e-05, "loss": 2.9011, "step": 51728 }, { "epoch": 2.54, "grad_norm": 0.7115795016288757, "learning_rate": 3.485356277839859e-05, "loss": 3.0382, "step": 51729 }, { "epoch": 2.54, "grad_norm": 0.7279272675514221, "learning_rate": 3.4846360414654384e-05, "loss": 3.0661, "step": 51730 }, { "epoch": 2.54, "grad_norm": 0.7116730809211731, "learning_rate": 3.483915874927378e-05, "loss": 3.0457, "step": 51731 }, { "epoch": 2.54, "grad_norm": 0.7574999928474426, "learning_rate": 3.4831957782275785e-05, "loss": 3.053, "step": 51732 }, { "epoch": 2.54, "grad_norm": 0.752925455570221, "learning_rate": 3.4824757513679466e-05, "loss": 2.9957, "step": 51733 }, { "epoch": 2.54, "grad_norm": 0.7213291525840759, "learning_rate": 3.481755794350367e-05, "loss": 2.759, "step": 51734 }, { "epoch": 2.54, "grad_norm": 0.7119690775871277, "learning_rate": 3.481035907176741e-05, "loss": 2.8161, "step": 51735 }, { "epoch": 2.54, "grad_norm": 0.6793041229248047, "learning_rate": 3.480316089848958e-05, "loss": 2.7194, "step": 51736 }, { "epoch": 2.54, "grad_norm": 0.7623587250709534, "learning_rate": 3.479596342368919e-05, "loss": 2.6806, "step": 51737 }, { "epoch": 2.54, "grad_norm": 0.7153224349021912, "learning_rate": 3.478876664738522e-05, "loss": 2.838, "step": 51738 }, { "epoch": 2.54, "grad_norm": 0.7242084741592407, "learning_rate": 3.478157056959657e-05, "loss": 2.9762, "step": 51739 }, { "epoch": 2.54, "grad_norm": 0.7159818410873413, "learning_rate": 3.477437519034225e-05, "loss": 2.732, "step": 51740 }, { "epoch": 2.54, "grad_norm": 0.7122927904129028, "learning_rate": 3.4767180509641144e-05, "loss": 2.8784, "step": 51741 }, { "epoch": 2.54, "grad_norm": 0.745762825012207, "learning_rate": 3.475998652751231e-05, "loss": 2.8922, "step": 51742 }, { "epoch": 2.54, "grad_norm": 0.7402251958847046, "learning_rate": 3.4752793243974584e-05, "loss": 3.2264, "step": 51743 }, { "epoch": 2.54, "grad_norm": 0.7608932256698608, "learning_rate": 3.4745600659046936e-05, "loss": 2.9075, "step": 51744 }, { "epoch": 2.54, "grad_norm": 0.7457864284515381, "learning_rate": 3.473840877274837e-05, "loss": 2.9522, "step": 51745 }, { "epoch": 2.54, "grad_norm": 0.7215996384620667, "learning_rate": 3.4731217585097716e-05, "loss": 3.1533, "step": 51746 }, { "epoch": 2.54, "grad_norm": 0.7540760636329651, "learning_rate": 3.472402709611405e-05, "loss": 3.0193, "step": 51747 }, { "epoch": 2.54, "grad_norm": 0.7310689091682434, "learning_rate": 3.471683730581619e-05, "loss": 2.9448, "step": 51748 }, { "epoch": 2.54, "grad_norm": 0.7167690992355347, "learning_rate": 3.4709648214223166e-05, "loss": 2.8068, "step": 51749 }, { "epoch": 2.54, "grad_norm": 0.7109416127204895, "learning_rate": 3.470245982135388e-05, "loss": 2.8514, "step": 51750 }, { "epoch": 2.54, "grad_norm": 0.7370871305465698, "learning_rate": 3.469527212722719e-05, "loss": 2.8947, "step": 51751 }, { "epoch": 2.54, "grad_norm": 0.7531648278236389, "learning_rate": 3.468808513186219e-05, "loss": 2.9873, "step": 51752 }, { "epoch": 2.54, "grad_norm": 0.7331883311271667, "learning_rate": 3.468089883527762e-05, "loss": 2.7971, "step": 51753 }, { "epoch": 2.54, "grad_norm": 0.7884783744812012, "learning_rate": 3.4673713237492496e-05, "loss": 2.839, "step": 51754 }, { "epoch": 2.54, "grad_norm": 0.7267898321151733, "learning_rate": 3.4666528338525846e-05, "loss": 2.8982, "step": 51755 }, { "epoch": 2.54, "grad_norm": 0.7529955506324768, "learning_rate": 3.465934413839645e-05, "loss": 3.0185, "step": 51756 }, { "epoch": 2.54, "grad_norm": 0.7315231561660767, "learning_rate": 3.4652160637123327e-05, "loss": 2.8829, "step": 51757 }, { "epoch": 2.54, "grad_norm": 0.7301185131072998, "learning_rate": 3.464497783472523e-05, "loss": 2.7218, "step": 51758 }, { "epoch": 2.54, "grad_norm": 0.7687699198722839, "learning_rate": 3.463779573122124e-05, "loss": 3.0069, "step": 51759 }, { "epoch": 2.54, "grad_norm": 0.7508442401885986, "learning_rate": 3.463061432663025e-05, "loss": 3.0149, "step": 51760 }, { "epoch": 2.54, "grad_norm": 0.7293921113014221, "learning_rate": 3.462343362097113e-05, "loss": 2.8922, "step": 51761 }, { "epoch": 2.54, "grad_norm": 0.7335087656974792, "learning_rate": 3.4616253614262846e-05, "loss": 2.7265, "step": 51762 }, { "epoch": 2.54, "grad_norm": 0.6986225247383118, "learning_rate": 3.460907430652421e-05, "loss": 3.1249, "step": 51763 }, { "epoch": 2.54, "grad_norm": 0.7187238931655884, "learning_rate": 3.4601895697774274e-05, "loss": 2.8012, "step": 51764 }, { "epoch": 2.54, "grad_norm": 0.7392198443412781, "learning_rate": 3.4594717788031853e-05, "loss": 2.8605, "step": 51765 }, { "epoch": 2.54, "grad_norm": 0.7785656452178955, "learning_rate": 3.458754057731581e-05, "loss": 2.8712, "step": 51766 }, { "epoch": 2.54, "grad_norm": 0.7567797899246216, "learning_rate": 3.458036406564518e-05, "loss": 2.7214, "step": 51767 }, { "epoch": 2.54, "grad_norm": 0.8188661932945251, "learning_rate": 3.4573188253038696e-05, "loss": 3.044, "step": 51768 }, { "epoch": 2.54, "grad_norm": 0.8261837959289551, "learning_rate": 3.45660131395154e-05, "loss": 2.8088, "step": 51769 }, { "epoch": 2.54, "grad_norm": 0.7429391145706177, "learning_rate": 3.4558838725094154e-05, "loss": 2.9782, "step": 51770 }, { "epoch": 2.54, "grad_norm": 0.7897228002548218, "learning_rate": 3.45516650097939e-05, "loss": 2.9229, "step": 51771 }, { "epoch": 2.54, "grad_norm": 0.8158841729164124, "learning_rate": 3.4544491993633415e-05, "loss": 3.0301, "step": 51772 }, { "epoch": 2.54, "grad_norm": 0.7363582253456116, "learning_rate": 3.45373196766316e-05, "loss": 3.0389, "step": 51773 }, { "epoch": 2.54, "grad_norm": 0.7058192491531372, "learning_rate": 3.453014805880744e-05, "loss": 2.7813, "step": 51774 }, { "epoch": 2.54, "grad_norm": 0.7266924977302551, "learning_rate": 3.452297714017981e-05, "loss": 2.8937, "step": 51775 }, { "epoch": 2.54, "grad_norm": 0.7514150738716125, "learning_rate": 3.45158069207675e-05, "loss": 3.0089, "step": 51776 }, { "epoch": 2.54, "grad_norm": 0.7654863595962524, "learning_rate": 3.4508637400589526e-05, "loss": 2.95, "step": 51777 }, { "epoch": 2.54, "grad_norm": 0.7285271286964417, "learning_rate": 3.45014685796647e-05, "loss": 2.9363, "step": 51778 }, { "epoch": 2.54, "grad_norm": 0.7652084827423096, "learning_rate": 3.4494300458011856e-05, "loss": 2.9796, "step": 51779 }, { "epoch": 2.54, "grad_norm": 0.7151200771331787, "learning_rate": 3.448713303564997e-05, "loss": 3.1283, "step": 51780 }, { "epoch": 2.54, "grad_norm": 0.7224323749542236, "learning_rate": 3.447996631259783e-05, "loss": 3.054, "step": 51781 }, { "epoch": 2.54, "grad_norm": 0.7343427538871765, "learning_rate": 3.447280028887445e-05, "loss": 2.9277, "step": 51782 }, { "epoch": 2.54, "grad_norm": 0.7346922755241394, "learning_rate": 3.446563496449851e-05, "loss": 3.0434, "step": 51783 }, { "epoch": 2.54, "grad_norm": 0.7594969868659973, "learning_rate": 3.445847033948901e-05, "loss": 2.8904, "step": 51784 }, { "epoch": 2.54, "grad_norm": 0.7676674723625183, "learning_rate": 3.445130641386482e-05, "loss": 2.7869, "step": 51785 }, { "epoch": 2.54, "grad_norm": 0.7546485066413879, "learning_rate": 3.44441431876448e-05, "loss": 2.9141, "step": 51786 }, { "epoch": 2.54, "grad_norm": 0.7444491982460022, "learning_rate": 3.4436980660847836e-05, "loss": 2.9501, "step": 51787 }, { "epoch": 2.54, "grad_norm": 0.7077409625053406, "learning_rate": 3.442981883349264e-05, "loss": 2.9668, "step": 51788 }, { "epoch": 2.54, "grad_norm": 0.7607021331787109, "learning_rate": 3.442265770559823e-05, "loss": 2.7839, "step": 51789 }, { "epoch": 2.54, "grad_norm": 0.790345311164856, "learning_rate": 3.441549727718347e-05, "loss": 2.909, "step": 51790 }, { "epoch": 2.54, "grad_norm": 0.7288457155227661, "learning_rate": 3.440833754826713e-05, "loss": 2.85, "step": 51791 }, { "epoch": 2.54, "grad_norm": 0.7285025119781494, "learning_rate": 3.4401178518868163e-05, "loss": 3.0604, "step": 51792 }, { "epoch": 2.54, "grad_norm": 0.7310810685157776, "learning_rate": 3.4394020189005364e-05, "loss": 2.8663, "step": 51793 }, { "epoch": 2.54, "grad_norm": 0.725584089756012, "learning_rate": 3.438686255869758e-05, "loss": 3.018, "step": 51794 }, { "epoch": 2.54, "grad_norm": 0.7113367319107056, "learning_rate": 3.43797056279637e-05, "loss": 2.9908, "step": 51795 }, { "epoch": 2.54, "grad_norm": 0.8494749665260315, "learning_rate": 3.4372549396822526e-05, "loss": 2.9683, "step": 51796 }, { "epoch": 2.54, "grad_norm": 0.7407485246658325, "learning_rate": 3.436539386529299e-05, "loss": 2.7897, "step": 51797 }, { "epoch": 2.54, "grad_norm": 0.7727249264717102, "learning_rate": 3.4358239033393806e-05, "loss": 2.8892, "step": 51798 }, { "epoch": 2.54, "grad_norm": 0.8405013084411621, "learning_rate": 3.43510849011439e-05, "loss": 3.0491, "step": 51799 }, { "epoch": 2.54, "grad_norm": 0.7366344332695007, "learning_rate": 3.434393146856218e-05, "loss": 2.8452, "step": 51800 }, { "epoch": 2.54, "grad_norm": 0.7424528002738953, "learning_rate": 3.433677873566743e-05, "loss": 2.9742, "step": 51801 }, { "epoch": 2.54, "grad_norm": 0.721091091632843, "learning_rate": 3.4329626702478455e-05, "loss": 2.9626, "step": 51802 }, { "epoch": 2.54, "grad_norm": 0.770412027835846, "learning_rate": 3.432247536901408e-05, "loss": 2.8381, "step": 51803 }, { "epoch": 2.54, "grad_norm": 0.7273968458175659, "learning_rate": 3.431532473529314e-05, "loss": 2.6277, "step": 51804 }, { "epoch": 2.54, "grad_norm": 0.7612076997756958, "learning_rate": 3.430817480133458e-05, "loss": 2.9437, "step": 51805 }, { "epoch": 2.54, "grad_norm": 0.7737910151481628, "learning_rate": 3.4301025567157084e-05, "loss": 3.02, "step": 51806 }, { "epoch": 2.54, "grad_norm": 0.8094823956489563, "learning_rate": 3.429387703277964e-05, "loss": 2.9737, "step": 51807 }, { "epoch": 2.54, "grad_norm": 0.7228150963783264, "learning_rate": 3.428672919822096e-05, "loss": 2.7584, "step": 51808 }, { "epoch": 2.54, "grad_norm": 0.7346405982971191, "learning_rate": 3.4279582063499845e-05, "loss": 2.948, "step": 51809 }, { "epoch": 2.54, "grad_norm": 0.7100589275360107, "learning_rate": 3.427243562863523e-05, "loss": 2.7957, "step": 51810 }, { "epoch": 2.54, "grad_norm": 0.7395407557487488, "learning_rate": 3.426528989364584e-05, "loss": 2.9512, "step": 51811 }, { "epoch": 2.54, "grad_norm": 0.7701348066329956, "learning_rate": 3.425814485855056e-05, "loss": 3.0815, "step": 51812 }, { "epoch": 2.54, "grad_norm": 0.8105944991111755, "learning_rate": 3.425100052336814e-05, "loss": 2.8488, "step": 51813 }, { "epoch": 2.54, "grad_norm": 0.7398055791854858, "learning_rate": 3.424385688811751e-05, "loss": 2.9291, "step": 51814 }, { "epoch": 2.54, "grad_norm": 0.7211189866065979, "learning_rate": 3.4236713952817394e-05, "loss": 2.9474, "step": 51815 }, { "epoch": 2.54, "grad_norm": 0.7564665675163269, "learning_rate": 3.4229571717486606e-05, "loss": 2.7423, "step": 51816 }, { "epoch": 2.54, "grad_norm": 0.7213353514671326, "learning_rate": 3.4222430182144015e-05, "loss": 2.8272, "step": 51817 }, { "epoch": 2.54, "grad_norm": 0.7244040966033936, "learning_rate": 3.42152893468083e-05, "loss": 2.907, "step": 51818 }, { "epoch": 2.54, "grad_norm": 0.6921009421348572, "learning_rate": 3.420814921149847e-05, "loss": 2.9494, "step": 51819 }, { "epoch": 2.54, "grad_norm": 0.7488122582435608, "learning_rate": 3.420100977623312e-05, "loss": 2.8671, "step": 51820 }, { "epoch": 2.54, "grad_norm": 0.7758278846740723, "learning_rate": 3.419387104103118e-05, "loss": 2.7779, "step": 51821 }, { "epoch": 2.54, "grad_norm": 0.7232535481452942, "learning_rate": 3.41867330059115e-05, "loss": 2.8373, "step": 51822 }, { "epoch": 2.54, "grad_norm": 0.716827392578125, "learning_rate": 3.41795956708928e-05, "loss": 2.9449, "step": 51823 }, { "epoch": 2.54, "grad_norm": 0.7418515086174011, "learning_rate": 3.417245903599389e-05, "loss": 2.9679, "step": 51824 }, { "epoch": 2.54, "grad_norm": 0.7049885392189026, "learning_rate": 3.41653231012335e-05, "loss": 2.8945, "step": 51825 }, { "epoch": 2.54, "grad_norm": 0.757987916469574, "learning_rate": 3.415818786663047e-05, "loss": 2.8117, "step": 51826 }, { "epoch": 2.54, "grad_norm": 0.7701760530471802, "learning_rate": 3.415105333220369e-05, "loss": 2.8238, "step": 51827 }, { "epoch": 2.54, "grad_norm": 0.75865238904953, "learning_rate": 3.414391949797182e-05, "loss": 3.0257, "step": 51828 }, { "epoch": 2.54, "grad_norm": 0.698841392993927, "learning_rate": 3.413678636395373e-05, "loss": 2.8413, "step": 51829 }, { "epoch": 2.54, "grad_norm": 0.7989115715026855, "learning_rate": 3.412965393016821e-05, "loss": 2.8112, "step": 51830 }, { "epoch": 2.54, "grad_norm": 0.6878560185432434, "learning_rate": 3.412252219663392e-05, "loss": 2.8933, "step": 51831 }, { "epoch": 2.54, "grad_norm": 0.7924463748931885, "learning_rate": 3.411539116336983e-05, "loss": 2.914, "step": 51832 }, { "epoch": 2.54, "grad_norm": 0.7778879404067993, "learning_rate": 3.410826083039454e-05, "loss": 2.887, "step": 51833 }, { "epoch": 2.54, "grad_norm": 0.7213044762611389, "learning_rate": 3.4101131197726996e-05, "loss": 2.6942, "step": 51834 }, { "epoch": 2.54, "grad_norm": 0.7068144679069519, "learning_rate": 3.4094002265385846e-05, "loss": 2.758, "step": 51835 }, { "epoch": 2.54, "grad_norm": 0.7102383375167847, "learning_rate": 3.4086874033389974e-05, "loss": 3.0547, "step": 51836 }, { "epoch": 2.54, "grad_norm": 0.7071000337600708, "learning_rate": 3.407974650175811e-05, "loss": 2.9646, "step": 51837 }, { "epoch": 2.54, "grad_norm": 0.7384864687919617, "learning_rate": 3.4072619670508925e-05, "loss": 2.7728, "step": 51838 }, { "epoch": 2.54, "grad_norm": 0.739606499671936, "learning_rate": 3.406549353966135e-05, "loss": 2.8419, "step": 51839 }, { "epoch": 2.54, "grad_norm": 0.8046254515647888, "learning_rate": 3.405836810923406e-05, "loss": 3.0422, "step": 51840 }, { "epoch": 2.54, "grad_norm": 0.7787489891052246, "learning_rate": 3.405124337924581e-05, "loss": 3.0144, "step": 51841 }, { "epoch": 2.54, "grad_norm": 0.7587060332298279, "learning_rate": 3.404411934971548e-05, "loss": 3.011, "step": 51842 }, { "epoch": 2.54, "grad_norm": 0.7384954690933228, "learning_rate": 3.403699602066167e-05, "loss": 2.9519, "step": 51843 }, { "epoch": 2.54, "grad_norm": 0.7247627973556519, "learning_rate": 3.402987339210331e-05, "loss": 2.9142, "step": 51844 }, { "epoch": 2.54, "grad_norm": 0.7663771510124207, "learning_rate": 3.402275146405907e-05, "loss": 2.8239, "step": 51845 }, { "epoch": 2.54, "grad_norm": 0.7334901094436646, "learning_rate": 3.401563023654766e-05, "loss": 3.017, "step": 51846 }, { "epoch": 2.54, "grad_norm": 0.7264374494552612, "learning_rate": 3.400850970958797e-05, "loss": 2.8086, "step": 51847 }, { "epoch": 2.54, "grad_norm": 0.7451301217079163, "learning_rate": 3.4001389883198584e-05, "loss": 2.8351, "step": 51848 }, { "epoch": 2.54, "grad_norm": 0.7210157513618469, "learning_rate": 3.3994270757398415e-05, "loss": 2.8837, "step": 51849 }, { "epoch": 2.54, "grad_norm": 0.7271472811698914, "learning_rate": 3.398715233220608e-05, "loss": 2.8446, "step": 51850 }, { "epoch": 2.54, "grad_norm": 0.7448669075965881, "learning_rate": 3.398003460764044e-05, "loss": 2.896, "step": 51851 }, { "epoch": 2.54, "grad_norm": 0.7264134287834167, "learning_rate": 3.397291758372021e-05, "loss": 2.7349, "step": 51852 }, { "epoch": 2.54, "grad_norm": 0.7169459462165833, "learning_rate": 3.396580126046403e-05, "loss": 3.0358, "step": 51853 }, { "epoch": 2.54, "grad_norm": 0.7395771145820618, "learning_rate": 3.395868563789084e-05, "loss": 2.7717, "step": 51854 }, { "epoch": 2.54, "grad_norm": 0.7105684876441956, "learning_rate": 3.395157071601917e-05, "loss": 2.9395, "step": 51855 }, { "epoch": 2.54, "grad_norm": 0.7309338450431824, "learning_rate": 3.394445649486785e-05, "loss": 2.8171, "step": 51856 }, { "epoch": 2.54, "grad_norm": 0.7164181470870972, "learning_rate": 3.393734297445574e-05, "loss": 2.8214, "step": 51857 }, { "epoch": 2.54, "grad_norm": 0.7518510818481445, "learning_rate": 3.393023015480144e-05, "loss": 2.8898, "step": 51858 }, { "epoch": 2.54, "grad_norm": 0.7415131330490112, "learning_rate": 3.392311803592368e-05, "loss": 2.9758, "step": 51859 }, { "epoch": 2.54, "grad_norm": 0.746828556060791, "learning_rate": 3.3916006617841175e-05, "loss": 2.955, "step": 51860 }, { "epoch": 2.54, "grad_norm": 0.7056711316108704, "learning_rate": 3.390889590057271e-05, "loss": 2.8957, "step": 51861 }, { "epoch": 2.54, "grad_norm": 0.8097109198570251, "learning_rate": 3.390178588413707e-05, "loss": 2.9484, "step": 51862 }, { "epoch": 2.54, "grad_norm": 0.7409765720367432, "learning_rate": 3.3894676568552834e-05, "loss": 2.7206, "step": 51863 }, { "epoch": 2.54, "grad_norm": 0.8118706345558167, "learning_rate": 3.388756795383887e-05, "loss": 2.8796, "step": 51864 }, { "epoch": 2.54, "grad_norm": 0.744475781917572, "learning_rate": 3.3880460040013815e-05, "loss": 2.8114, "step": 51865 }, { "epoch": 2.54, "grad_norm": 0.7227175831794739, "learning_rate": 3.387335282709642e-05, "loss": 3.1192, "step": 51866 }, { "epoch": 2.54, "grad_norm": 0.7669481635093689, "learning_rate": 3.3866246315105415e-05, "loss": 2.7376, "step": 51867 }, { "epoch": 2.54, "grad_norm": 0.7482892274856567, "learning_rate": 3.3859140504059446e-05, "loss": 2.8004, "step": 51868 }, { "epoch": 2.54, "grad_norm": 0.7517153024673462, "learning_rate": 3.385203539397733e-05, "loss": 2.7044, "step": 51869 }, { "epoch": 2.54, "grad_norm": 0.7399962544441223, "learning_rate": 3.384493098487768e-05, "loss": 3.0025, "step": 51870 }, { "epoch": 2.54, "grad_norm": 0.7390328049659729, "learning_rate": 3.383782727677926e-05, "loss": 2.8516, "step": 51871 }, { "epoch": 2.54, "grad_norm": 0.7233524322509766, "learning_rate": 3.383072426970085e-05, "loss": 2.804, "step": 51872 }, { "epoch": 2.54, "grad_norm": 0.7317050695419312, "learning_rate": 3.382362196366106e-05, "loss": 2.9562, "step": 51873 }, { "epoch": 2.54, "grad_norm": 0.7686887383460999, "learning_rate": 3.3816520358678655e-05, "loss": 2.997, "step": 51874 }, { "epoch": 2.54, "grad_norm": 0.7436739802360535, "learning_rate": 3.3809419454772214e-05, "loss": 2.8637, "step": 51875 }, { "epoch": 2.54, "grad_norm": 0.701702892780304, "learning_rate": 3.3802319251960554e-05, "loss": 2.7465, "step": 51876 }, { "epoch": 2.54, "grad_norm": 0.7306755185127258, "learning_rate": 3.379521975026239e-05, "loss": 2.7137, "step": 51877 }, { "epoch": 2.54, "grad_norm": 0.7624945044517517, "learning_rate": 3.378812094969635e-05, "loss": 3.029, "step": 51878 }, { "epoch": 2.54, "grad_norm": 0.7094208002090454, "learning_rate": 3.378102285028121e-05, "loss": 3.0725, "step": 51879 }, { "epoch": 2.54, "grad_norm": 0.7654349207878113, "learning_rate": 3.377392545203567e-05, "loss": 3.1119, "step": 51880 }, { "epoch": 2.54, "grad_norm": 0.7229974865913391, "learning_rate": 3.376682875497826e-05, "loss": 2.8137, "step": 51881 }, { "epoch": 2.54, "grad_norm": 0.7172889709472656, "learning_rate": 3.375973275912784e-05, "loss": 2.8475, "step": 51882 }, { "epoch": 2.54, "grad_norm": 0.7357591986656189, "learning_rate": 3.375263746450301e-05, "loss": 2.9436, "step": 51883 }, { "epoch": 2.54, "grad_norm": 0.7437008619308472, "learning_rate": 3.374554287112256e-05, "loss": 3.1229, "step": 51884 }, { "epoch": 2.54, "grad_norm": 0.7146857976913452, "learning_rate": 3.373844897900504e-05, "loss": 2.7566, "step": 51885 }, { "epoch": 2.54, "grad_norm": 0.7032164335250854, "learning_rate": 3.3731355788169176e-05, "loss": 2.8108, "step": 51886 }, { "epoch": 2.54, "grad_norm": 0.738189697265625, "learning_rate": 3.372426329863377e-05, "loss": 2.9045, "step": 51887 }, { "epoch": 2.54, "grad_norm": 0.7533491253852844, "learning_rate": 3.3717171510417396e-05, "loss": 2.9161, "step": 51888 }, { "epoch": 2.54, "grad_norm": 0.7233903408050537, "learning_rate": 3.371008042353874e-05, "loss": 2.9135, "step": 51889 }, { "epoch": 2.54, "grad_norm": 0.7648455500602722, "learning_rate": 3.370299003801643e-05, "loss": 2.9126, "step": 51890 }, { "epoch": 2.54, "grad_norm": 0.742719829082489, "learning_rate": 3.369590035386921e-05, "loss": 2.9763, "step": 51891 }, { "epoch": 2.54, "grad_norm": 0.7518740892410278, "learning_rate": 3.368881137111577e-05, "loss": 3.0087, "step": 51892 }, { "epoch": 2.54, "grad_norm": 0.7219116687774658, "learning_rate": 3.3681723089774714e-05, "loss": 3.0071, "step": 51893 }, { "epoch": 2.54, "grad_norm": 0.7408004403114319, "learning_rate": 3.367463550986478e-05, "loss": 2.9956, "step": 51894 }, { "epoch": 2.54, "grad_norm": 0.7436817288398743, "learning_rate": 3.3667548631404604e-05, "loss": 2.7571, "step": 51895 }, { "epoch": 2.54, "grad_norm": 0.6922502517700195, "learning_rate": 3.366046245441282e-05, "loss": 2.9126, "step": 51896 }, { "epoch": 2.54, "grad_norm": 0.7235960364341736, "learning_rate": 3.365337697890813e-05, "loss": 2.8327, "step": 51897 }, { "epoch": 2.54, "grad_norm": 0.7371718287467957, "learning_rate": 3.3646292204909164e-05, "loss": 2.8076, "step": 51898 }, { "epoch": 2.54, "grad_norm": 0.7488693594932556, "learning_rate": 3.3639208132434635e-05, "loss": 2.727, "step": 51899 }, { "epoch": 2.54, "grad_norm": 0.7194048762321472, "learning_rate": 3.363212476150313e-05, "loss": 2.8375, "step": 51900 }, { "epoch": 2.54, "grad_norm": 0.7807648777961731, "learning_rate": 3.3625042092133406e-05, "loss": 2.9876, "step": 51901 }, { "epoch": 2.54, "grad_norm": 0.741208016872406, "learning_rate": 3.3617960124344e-05, "loss": 2.77, "step": 51902 }, { "epoch": 2.54, "grad_norm": 0.7476096749305725, "learning_rate": 3.361087885815368e-05, "loss": 3.1433, "step": 51903 }, { "epoch": 2.54, "grad_norm": 0.8125763535499573, "learning_rate": 3.3603798293581016e-05, "loss": 2.8455, "step": 51904 }, { "epoch": 2.54, "grad_norm": 0.7425322532653809, "learning_rate": 3.359671843064464e-05, "loss": 3.098, "step": 51905 }, { "epoch": 2.54, "grad_norm": 0.7436474561691284, "learning_rate": 3.358963926936327e-05, "loss": 2.9924, "step": 51906 }, { "epoch": 2.54, "grad_norm": 0.8901980519294739, "learning_rate": 3.358256080975548e-05, "loss": 2.8039, "step": 51907 }, { "epoch": 2.54, "grad_norm": 0.7209775447845459, "learning_rate": 3.357548305183996e-05, "loss": 3.2191, "step": 51908 }, { "epoch": 2.54, "grad_norm": 0.7874775528907776, "learning_rate": 3.3568405995635393e-05, "loss": 2.9348, "step": 51909 }, { "epoch": 2.54, "grad_norm": 0.7514326572418213, "learning_rate": 3.3561329641160376e-05, "loss": 3.0049, "step": 51910 }, { "epoch": 2.54, "grad_norm": 0.7351415157318115, "learning_rate": 3.3554253988433513e-05, "loss": 2.8308, "step": 51911 }, { "epoch": 2.54, "grad_norm": 0.7183605432510376, "learning_rate": 3.354717903747343e-05, "loss": 2.9062, "step": 51912 }, { "epoch": 2.54, "grad_norm": 1.2949225902557373, "learning_rate": 3.354010478829881e-05, "loss": 3.0845, "step": 51913 }, { "epoch": 2.54, "grad_norm": 0.7274050116539001, "learning_rate": 3.35330312409283e-05, "loss": 3.066, "step": 51914 }, { "epoch": 2.54, "grad_norm": 0.7558318376541138, "learning_rate": 3.352595839538046e-05, "loss": 2.9214, "step": 51915 }, { "epoch": 2.54, "grad_norm": 0.7233791351318359, "learning_rate": 3.3518886251674004e-05, "loss": 2.8911, "step": 51916 }, { "epoch": 2.54, "grad_norm": 0.7367327809333801, "learning_rate": 3.351181480982755e-05, "loss": 2.8778, "step": 51917 }, { "epoch": 2.54, "grad_norm": 0.7649989128112793, "learning_rate": 3.350474406985959e-05, "loss": 2.802, "step": 51918 }, { "epoch": 2.54, "grad_norm": 0.7352694869041443, "learning_rate": 3.3497674031788935e-05, "loss": 2.871, "step": 51919 }, { "epoch": 2.54, "grad_norm": 0.7183537483215332, "learning_rate": 3.3490604695634073e-05, "loss": 2.9789, "step": 51920 }, { "epoch": 2.54, "grad_norm": 0.7267533540725708, "learning_rate": 3.348353606141369e-05, "loss": 2.9397, "step": 51921 }, { "epoch": 2.54, "grad_norm": 0.7344212532043457, "learning_rate": 3.347646812914633e-05, "loss": 2.9678, "step": 51922 }, { "epoch": 2.54, "grad_norm": 0.7288460731506348, "learning_rate": 3.346940089885065e-05, "loss": 3.0565, "step": 51923 }, { "epoch": 2.54, "grad_norm": 0.77585369348526, "learning_rate": 3.346233437054538e-05, "loss": 2.9148, "step": 51924 }, { "epoch": 2.54, "grad_norm": 0.7512287497520447, "learning_rate": 3.345526854424898e-05, "loss": 2.9999, "step": 51925 }, { "epoch": 2.54, "grad_norm": 0.7456086874008179, "learning_rate": 3.344820341998012e-05, "loss": 3.0275, "step": 51926 }, { "epoch": 2.54, "grad_norm": 0.7627673149108887, "learning_rate": 3.344113899775732e-05, "loss": 2.8022, "step": 51927 }, { "epoch": 2.54, "grad_norm": 0.701185405254364, "learning_rate": 3.343407527759929e-05, "loss": 2.6771, "step": 51928 }, { "epoch": 2.54, "grad_norm": 0.7408918738365173, "learning_rate": 3.3427012259524623e-05, "loss": 2.8085, "step": 51929 }, { "epoch": 2.54, "grad_norm": 0.7534040212631226, "learning_rate": 3.341994994355186e-05, "loss": 3.0257, "step": 51930 }, { "epoch": 2.55, "grad_norm": 0.7601995468139648, "learning_rate": 3.341288832969973e-05, "loss": 2.8248, "step": 51931 }, { "epoch": 2.55, "grad_norm": 0.7661672830581665, "learning_rate": 3.340582741798671e-05, "loss": 3.0491, "step": 51932 }, { "epoch": 2.55, "grad_norm": 0.7597264051437378, "learning_rate": 3.339876720843139e-05, "loss": 2.9227, "step": 51933 }, { "epoch": 2.55, "grad_norm": 0.726146936416626, "learning_rate": 3.3391707701052484e-05, "loss": 2.941, "step": 51934 }, { "epoch": 2.55, "grad_norm": 0.7189964652061462, "learning_rate": 3.338464889586842e-05, "loss": 2.6657, "step": 51935 }, { "epoch": 2.55, "grad_norm": 0.7691967487335205, "learning_rate": 3.337759079289797e-05, "loss": 2.7853, "step": 51936 }, { "epoch": 2.55, "grad_norm": 0.7658074498176575, "learning_rate": 3.3370533392159557e-05, "loss": 2.9422, "step": 51937 }, { "epoch": 2.55, "grad_norm": 0.7648226618766785, "learning_rate": 3.336347669367191e-05, "loss": 2.8923, "step": 51938 }, { "epoch": 2.55, "grad_norm": 0.7221632599830627, "learning_rate": 3.3356420697453535e-05, "loss": 2.7206, "step": 51939 }, { "epoch": 2.55, "grad_norm": 0.7435133457183838, "learning_rate": 3.3349365403522986e-05, "loss": 2.8739, "step": 51940 }, { "epoch": 2.55, "grad_norm": 0.744724690914154, "learning_rate": 3.3342310811898923e-05, "loss": 2.8973, "step": 51941 }, { "epoch": 2.55, "grad_norm": 0.7191993594169617, "learning_rate": 3.3335256922599886e-05, "loss": 2.7973, "step": 51942 }, { "epoch": 2.55, "grad_norm": 0.7432810068130493, "learning_rate": 3.33282037356444e-05, "loss": 2.9852, "step": 51943 }, { "epoch": 2.55, "grad_norm": 0.7368122339248657, "learning_rate": 3.3321151251051216e-05, "loss": 3.0071, "step": 51944 }, { "epoch": 2.55, "grad_norm": 0.7477759122848511, "learning_rate": 3.331409946883872e-05, "loss": 2.988, "step": 51945 }, { "epoch": 2.55, "grad_norm": 0.8247474431991577, "learning_rate": 3.330704838902563e-05, "loss": 2.8604, "step": 51946 }, { "epoch": 2.55, "grad_norm": 0.7637084126472473, "learning_rate": 3.3299998011630424e-05, "loss": 2.9437, "step": 51947 }, { "epoch": 2.55, "grad_norm": 0.7311221361160278, "learning_rate": 3.3292948336671635e-05, "loss": 2.9512, "step": 51948 }, { "epoch": 2.55, "grad_norm": 0.7546210885047913, "learning_rate": 3.328589936416797e-05, "loss": 2.8564, "step": 51949 }, { "epoch": 2.55, "grad_norm": 0.7457635402679443, "learning_rate": 3.327885109413785e-05, "loss": 2.8564, "step": 51950 }, { "epoch": 2.55, "grad_norm": 0.7271265983581543, "learning_rate": 3.327180352659996e-05, "loss": 3.0531, "step": 51951 }, { "epoch": 2.55, "grad_norm": 0.7246139049530029, "learning_rate": 3.326475666157275e-05, "loss": 2.6776, "step": 51952 }, { "epoch": 2.55, "grad_norm": 0.7810348868370056, "learning_rate": 3.325771049907492e-05, "loss": 2.8616, "step": 51953 }, { "epoch": 2.55, "grad_norm": 0.7812083959579468, "learning_rate": 3.32506650391249e-05, "loss": 3.0086, "step": 51954 }, { "epoch": 2.55, "grad_norm": 0.7078095078468323, "learning_rate": 3.3243620281741254e-05, "loss": 2.9684, "step": 51955 }, { "epoch": 2.55, "grad_norm": 0.7382186651229858, "learning_rate": 3.323657622694264e-05, "loss": 3.0665, "step": 51956 }, { "epoch": 2.55, "grad_norm": 0.7355473041534424, "learning_rate": 3.322953287474749e-05, "loss": 2.7563, "step": 51957 }, { "epoch": 2.55, "grad_norm": 0.7071644067764282, "learning_rate": 3.32224902251744e-05, "loss": 2.8823, "step": 51958 }, { "epoch": 2.55, "grad_norm": 0.7337292432785034, "learning_rate": 3.321544827824196e-05, "loss": 2.8112, "step": 51959 }, { "epoch": 2.55, "grad_norm": 0.7419911026954651, "learning_rate": 3.320840703396872e-05, "loss": 2.8555, "step": 51960 }, { "epoch": 2.55, "grad_norm": 0.7350503206253052, "learning_rate": 3.3201366492373195e-05, "loss": 3.1303, "step": 51961 }, { "epoch": 2.55, "grad_norm": 0.7591217160224915, "learning_rate": 3.3194326653473836e-05, "loss": 2.8945, "step": 51962 }, { "epoch": 2.55, "grad_norm": 0.7995786666870117, "learning_rate": 3.318728751728929e-05, "loss": 2.9841, "step": 51963 }, { "epoch": 2.55, "grad_norm": 0.7559335231781006, "learning_rate": 3.318024908383816e-05, "loss": 2.9551, "step": 51964 }, { "epoch": 2.55, "grad_norm": 0.7337844371795654, "learning_rate": 3.3173211353138816e-05, "loss": 3.106, "step": 51965 }, { "epoch": 2.55, "grad_norm": 0.7507258057594299, "learning_rate": 3.316617432520995e-05, "loss": 2.6799, "step": 51966 }, { "epoch": 2.55, "grad_norm": 0.7264440059661865, "learning_rate": 3.315913800006994e-05, "loss": 2.8526, "step": 51967 }, { "epoch": 2.55, "grad_norm": 0.7675163745880127, "learning_rate": 3.315210237773751e-05, "loss": 3.0471, "step": 51968 }, { "epoch": 2.55, "grad_norm": 0.7322779893875122, "learning_rate": 3.314506745823108e-05, "loss": 2.9175, "step": 51969 }, { "epoch": 2.55, "grad_norm": 0.7262869477272034, "learning_rate": 3.31380332415691e-05, "loss": 2.886, "step": 51970 }, { "epoch": 2.55, "grad_norm": 0.7132663726806641, "learning_rate": 3.313099972777025e-05, "loss": 2.7285, "step": 51971 }, { "epoch": 2.55, "grad_norm": 0.7315619587898254, "learning_rate": 3.3123966916852915e-05, "loss": 2.9182, "step": 51972 }, { "epoch": 2.55, "grad_norm": 0.7278238534927368, "learning_rate": 3.3116934808835726e-05, "loss": 2.7525, "step": 51973 }, { "epoch": 2.55, "grad_norm": 0.7661489248275757, "learning_rate": 3.310990340373719e-05, "loss": 2.9219, "step": 51974 }, { "epoch": 2.55, "grad_norm": 0.7610893845558167, "learning_rate": 3.3102872701575836e-05, "loss": 2.8953, "step": 51975 }, { "epoch": 2.55, "grad_norm": 0.7103796601295471, "learning_rate": 3.309584270237013e-05, "loss": 2.8619, "step": 51976 }, { "epoch": 2.55, "grad_norm": 0.733978271484375, "learning_rate": 3.3088813406138545e-05, "loss": 2.899, "step": 51977 }, { "epoch": 2.55, "grad_norm": 0.7456706762313843, "learning_rate": 3.308178481289976e-05, "loss": 2.9013, "step": 51978 }, { "epoch": 2.55, "grad_norm": 0.7410678863525391, "learning_rate": 3.3074756922672085e-05, "loss": 2.8654, "step": 51979 }, { "epoch": 2.55, "grad_norm": 0.7431950569152832, "learning_rate": 3.3067729735474115e-05, "loss": 2.8395, "step": 51980 }, { "epoch": 2.55, "grad_norm": 0.715957522392273, "learning_rate": 3.306070325132446e-05, "loss": 2.9237, "step": 51981 }, { "epoch": 2.55, "grad_norm": 0.7735357284545898, "learning_rate": 3.3053677470241544e-05, "loss": 2.9148, "step": 51982 }, { "epoch": 2.55, "grad_norm": 0.7633320689201355, "learning_rate": 3.304665239224385e-05, "loss": 2.6345, "step": 51983 }, { "epoch": 2.55, "grad_norm": 0.6995530128479004, "learning_rate": 3.303962801734984e-05, "loss": 3.1224, "step": 51984 }, { "epoch": 2.55, "grad_norm": 0.7412270903587341, "learning_rate": 3.303260434557808e-05, "loss": 2.9119, "step": 51985 }, { "epoch": 2.55, "grad_norm": 0.7831922173500061, "learning_rate": 3.302558137694711e-05, "loss": 2.9452, "step": 51986 }, { "epoch": 2.55, "grad_norm": 0.7183090448379517, "learning_rate": 3.301855911147533e-05, "loss": 2.9392, "step": 51987 }, { "epoch": 2.55, "grad_norm": 0.7193189263343811, "learning_rate": 3.3011537549181335e-05, "loss": 3.1388, "step": 51988 }, { "epoch": 2.55, "grad_norm": 0.7489942312240601, "learning_rate": 3.3004516690083505e-05, "loss": 2.946, "step": 51989 }, { "epoch": 2.55, "grad_norm": 0.7693890333175659, "learning_rate": 3.2997496534200464e-05, "loss": 2.8692, "step": 51990 }, { "epoch": 2.55, "grad_norm": 0.7755201458930969, "learning_rate": 3.2990477081550624e-05, "loss": 2.934, "step": 51991 }, { "epoch": 2.55, "grad_norm": 0.7237342596054077, "learning_rate": 3.2983458332152415e-05, "loss": 2.7404, "step": 51992 }, { "epoch": 2.55, "grad_norm": 0.711622953414917, "learning_rate": 3.2976440286024474e-05, "loss": 3.0484, "step": 51993 }, { "epoch": 2.55, "grad_norm": 0.7582530975341797, "learning_rate": 3.2969422943185095e-05, "loss": 2.8522, "step": 51994 }, { "epoch": 2.55, "grad_norm": 0.7546455264091492, "learning_rate": 3.29624063036529e-05, "loss": 2.8292, "step": 51995 }, { "epoch": 2.55, "grad_norm": 0.7362459301948547, "learning_rate": 3.29553903674464e-05, "loss": 2.8576, "step": 51996 }, { "epoch": 2.55, "grad_norm": 0.7272038459777832, "learning_rate": 3.2948375134583984e-05, "loss": 2.9262, "step": 51997 }, { "epoch": 2.55, "grad_norm": 0.787972092628479, "learning_rate": 3.2941360605084166e-05, "loss": 2.8167, "step": 51998 }, { "epoch": 2.55, "grad_norm": 0.7820271253585815, "learning_rate": 3.293434677896534e-05, "loss": 2.9891, "step": 51999 }, { "epoch": 2.55, "grad_norm": 0.7015359997749329, "learning_rate": 3.292733365624605e-05, "loss": 2.8756, "step": 52000 }, { "epoch": 2.55, "grad_norm": 0.7298908233642578, "learning_rate": 3.292032123694481e-05, "loss": 2.7706, "step": 52001 }, { "epoch": 2.55, "grad_norm": 0.7448387145996094, "learning_rate": 3.2913309521080026e-05, "loss": 2.8888, "step": 52002 }, { "epoch": 2.55, "grad_norm": 0.7021864056587219, "learning_rate": 3.2906298508670195e-05, "loss": 2.9838, "step": 52003 }, { "epoch": 2.55, "grad_norm": 0.7519044280052185, "learning_rate": 3.289928819973381e-05, "loss": 2.8275, "step": 52004 }, { "epoch": 2.55, "grad_norm": 0.7517359256744385, "learning_rate": 3.289227859428919e-05, "loss": 2.9263, "step": 52005 }, { "epoch": 2.55, "grad_norm": 0.7345372438430786, "learning_rate": 3.288526969235499e-05, "loss": 2.9273, "step": 52006 }, { "epoch": 2.55, "grad_norm": 0.702253520488739, "learning_rate": 3.287826149394952e-05, "loss": 3.0666, "step": 52007 }, { "epoch": 2.55, "grad_norm": 0.7254359126091003, "learning_rate": 3.2871253999091377e-05, "loss": 2.7956, "step": 52008 }, { "epoch": 2.55, "grad_norm": 0.7682890295982361, "learning_rate": 3.2864247207798864e-05, "loss": 2.8114, "step": 52009 }, { "epoch": 2.55, "grad_norm": 0.718375027179718, "learning_rate": 3.2857241120090515e-05, "loss": 2.8538, "step": 52010 }, { "epoch": 2.55, "grad_norm": 0.7394582629203796, "learning_rate": 3.285023573598484e-05, "loss": 2.9124, "step": 52011 }, { "epoch": 2.55, "grad_norm": 0.723473310470581, "learning_rate": 3.284323105550023e-05, "loss": 3.0113, "step": 52012 }, { "epoch": 2.55, "grad_norm": 0.6897356510162354, "learning_rate": 3.283622707865513e-05, "loss": 2.8036, "step": 52013 }, { "epoch": 2.55, "grad_norm": 0.7808870077133179, "learning_rate": 3.282922380546793e-05, "loss": 2.8074, "step": 52014 }, { "epoch": 2.55, "grad_norm": 0.7283383011817932, "learning_rate": 3.2822221235957145e-05, "loss": 2.8458, "step": 52015 }, { "epoch": 2.55, "grad_norm": 0.7218790650367737, "learning_rate": 3.2815219370141264e-05, "loss": 3.0506, "step": 52016 }, { "epoch": 2.55, "grad_norm": 0.7908827066421509, "learning_rate": 3.280821820803861e-05, "loss": 3.0478, "step": 52017 }, { "epoch": 2.55, "grad_norm": 0.7488206028938293, "learning_rate": 3.280121774966775e-05, "loss": 2.793, "step": 52018 }, { "epoch": 2.55, "grad_norm": 0.7366180419921875, "learning_rate": 3.2794217995047054e-05, "loss": 3.0707, "step": 52019 }, { "epoch": 2.55, "grad_norm": 0.7356551885604858, "learning_rate": 3.278721894419494e-05, "loss": 2.9331, "step": 52020 }, { "epoch": 2.55, "grad_norm": 0.7605820298194885, "learning_rate": 3.278022059712988e-05, "loss": 3.1874, "step": 52021 }, { "epoch": 2.55, "grad_norm": 0.7392238974571228, "learning_rate": 3.2773222953870236e-05, "loss": 2.8032, "step": 52022 }, { "epoch": 2.55, "grad_norm": 0.7410476803779602, "learning_rate": 3.2766226014434596e-05, "loss": 2.8852, "step": 52023 }, { "epoch": 2.55, "grad_norm": 0.7377091646194458, "learning_rate": 3.2759229778841166e-05, "loss": 2.7027, "step": 52024 }, { "epoch": 2.55, "grad_norm": 0.7168262004852295, "learning_rate": 3.275223424710854e-05, "loss": 2.813, "step": 52025 }, { "epoch": 2.55, "grad_norm": 0.7007832527160645, "learning_rate": 3.2745239419255145e-05, "loss": 2.9249, "step": 52026 }, { "epoch": 2.55, "grad_norm": 0.7348765134811401, "learning_rate": 3.273824529529935e-05, "loss": 2.8679, "step": 52027 }, { "epoch": 2.55, "grad_norm": 0.7212089896202087, "learning_rate": 3.273125187525958e-05, "loss": 2.7796, "step": 52028 }, { "epoch": 2.55, "grad_norm": 0.7870141863822937, "learning_rate": 3.2724259159154186e-05, "loss": 2.965, "step": 52029 }, { "epoch": 2.55, "grad_norm": 0.728225588798523, "learning_rate": 3.2717267147001656e-05, "loss": 2.9082, "step": 52030 }, { "epoch": 2.55, "grad_norm": 0.7556633353233337, "learning_rate": 3.271027583882048e-05, "loss": 2.9904, "step": 52031 }, { "epoch": 2.55, "grad_norm": 0.7504242658615112, "learning_rate": 3.270328523462893e-05, "loss": 2.9538, "step": 52032 }, { "epoch": 2.55, "grad_norm": 0.7282172441482544, "learning_rate": 3.2696295334445566e-05, "loss": 2.9926, "step": 52033 }, { "epoch": 2.55, "grad_norm": 0.7356157302856445, "learning_rate": 3.2689306138288675e-05, "loss": 2.938, "step": 52034 }, { "epoch": 2.55, "grad_norm": 0.6893876194953918, "learning_rate": 3.268231764617667e-05, "loss": 2.9002, "step": 52035 }, { "epoch": 2.55, "grad_norm": 0.7842584848403931, "learning_rate": 3.267532985812808e-05, "loss": 2.8638, "step": 52036 }, { "epoch": 2.55, "grad_norm": 0.7505773901939392, "learning_rate": 3.2668342774161105e-05, "loss": 3.0113, "step": 52037 }, { "epoch": 2.55, "grad_norm": 0.7909178733825684, "learning_rate": 3.266135639429438e-05, "loss": 2.8813, "step": 52038 }, { "epoch": 2.55, "grad_norm": 0.7430846095085144, "learning_rate": 3.265437071854609e-05, "loss": 3.109, "step": 52039 }, { "epoch": 2.55, "grad_norm": 0.7298345565795898, "learning_rate": 3.264738574693481e-05, "loss": 3.0871, "step": 52040 }, { "epoch": 2.55, "grad_norm": 0.7301103472709656, "learning_rate": 3.264040147947889e-05, "loss": 2.9725, "step": 52041 }, { "epoch": 2.55, "grad_norm": 0.7341718077659607, "learning_rate": 3.26334179161966e-05, "loss": 2.6133, "step": 52042 }, { "epoch": 2.55, "grad_norm": 0.7384917140007019, "learning_rate": 3.262643505710651e-05, "loss": 2.7426, "step": 52043 }, { "epoch": 2.55, "grad_norm": 0.754947304725647, "learning_rate": 3.2619452902226885e-05, "loss": 2.7363, "step": 52044 }, { "epoch": 2.55, "grad_norm": 0.7241181135177612, "learning_rate": 3.261247145157613e-05, "loss": 2.871, "step": 52045 }, { "epoch": 2.55, "grad_norm": 0.7246091365814209, "learning_rate": 3.2605490705172734e-05, "loss": 2.969, "step": 52046 }, { "epoch": 2.55, "grad_norm": 0.7573584914207458, "learning_rate": 3.259851066303498e-05, "loss": 2.9414, "step": 52047 }, { "epoch": 2.55, "grad_norm": 0.7820131182670593, "learning_rate": 3.259153132518133e-05, "loss": 3.2345, "step": 52048 }, { "epoch": 2.55, "grad_norm": 0.7728943824768066, "learning_rate": 3.2584552691630125e-05, "loss": 2.8005, "step": 52049 }, { "epoch": 2.55, "grad_norm": 0.7590402960777283, "learning_rate": 3.257757476239966e-05, "loss": 2.6722, "step": 52050 }, { "epoch": 2.55, "grad_norm": 0.7413563132286072, "learning_rate": 3.257059753750848e-05, "loss": 2.722, "step": 52051 }, { "epoch": 2.55, "grad_norm": 0.7250611782073975, "learning_rate": 3.256362101697484e-05, "loss": 2.6977, "step": 52052 }, { "epoch": 2.55, "grad_norm": 0.7582236528396606, "learning_rate": 3.255664520081719e-05, "loss": 3.023, "step": 52053 }, { "epoch": 2.55, "grad_norm": 0.7816905975341797, "learning_rate": 3.254967008905378e-05, "loss": 2.8675, "step": 52054 }, { "epoch": 2.55, "grad_norm": 0.7680097818374634, "learning_rate": 3.254269568170317e-05, "loss": 2.8494, "step": 52055 }, { "epoch": 2.55, "grad_norm": 0.7169177532196045, "learning_rate": 3.25357219787836e-05, "loss": 2.951, "step": 52056 }, { "epoch": 2.55, "grad_norm": 0.7729882597923279, "learning_rate": 3.25287489803134e-05, "loss": 2.806, "step": 52057 }, { "epoch": 2.55, "grad_norm": 0.7043116688728333, "learning_rate": 3.252177668631109e-05, "loss": 3.1595, "step": 52058 }, { "epoch": 2.55, "grad_norm": 0.8605642318725586, "learning_rate": 3.2514805096794835e-05, "loss": 2.8124, "step": 52059 }, { "epoch": 2.55, "grad_norm": 0.7150072455406189, "learning_rate": 3.2507834211783214e-05, "loss": 2.7993, "step": 52060 }, { "epoch": 2.55, "grad_norm": 0.7066315412521362, "learning_rate": 3.250086403129439e-05, "loss": 2.9328, "step": 52061 }, { "epoch": 2.55, "grad_norm": 0.7706637978553772, "learning_rate": 3.2493894555346866e-05, "loss": 3.0378, "step": 52062 }, { "epoch": 2.55, "grad_norm": 0.7277361750602722, "learning_rate": 3.248692578395898e-05, "loss": 2.8421, "step": 52063 }, { "epoch": 2.55, "grad_norm": 0.7525270581245422, "learning_rate": 3.2479957717148966e-05, "loss": 2.9334, "step": 52064 }, { "epoch": 2.55, "grad_norm": 0.7777685523033142, "learning_rate": 3.247299035493529e-05, "loss": 2.6474, "step": 52065 }, { "epoch": 2.55, "grad_norm": 0.7585991621017456, "learning_rate": 3.246602369733622e-05, "loss": 2.9298, "step": 52066 }, { "epoch": 2.55, "grad_norm": 0.7407344579696655, "learning_rate": 3.24590577443702e-05, "loss": 2.8826, "step": 52067 }, { "epoch": 2.55, "grad_norm": 0.7381274700164795, "learning_rate": 3.245209249605555e-05, "loss": 2.8998, "step": 52068 }, { "epoch": 2.55, "grad_norm": 0.7720596790313721, "learning_rate": 3.2445127952410554e-05, "loss": 3.0174, "step": 52069 }, { "epoch": 2.55, "grad_norm": 0.7466363310813904, "learning_rate": 3.243816411345364e-05, "loss": 2.9214, "step": 52070 }, { "epoch": 2.55, "grad_norm": 0.7581253051757812, "learning_rate": 3.243120097920311e-05, "loss": 2.9456, "step": 52071 }, { "epoch": 2.55, "grad_norm": 0.7453657388687134, "learning_rate": 3.2424238549677265e-05, "loss": 2.8891, "step": 52072 }, { "epoch": 2.55, "grad_norm": 0.7070062756538391, "learning_rate": 3.2417276824894514e-05, "loss": 3.0115, "step": 52073 }, { "epoch": 2.55, "grad_norm": 0.7681336402893066, "learning_rate": 3.2410315804873114e-05, "loss": 2.6473, "step": 52074 }, { "epoch": 2.55, "grad_norm": 0.7477194666862488, "learning_rate": 3.240335548963151e-05, "loss": 2.9022, "step": 52075 }, { "epoch": 2.55, "grad_norm": 0.7516164183616638, "learning_rate": 3.23963958791879e-05, "loss": 2.9424, "step": 52076 }, { "epoch": 2.55, "grad_norm": 0.7572428584098816, "learning_rate": 3.2389436973560754e-05, "loss": 2.8975, "step": 52077 }, { "epoch": 2.55, "grad_norm": 0.7532944679260254, "learning_rate": 3.238247877276831e-05, "loss": 2.9374, "step": 52078 }, { "epoch": 2.55, "grad_norm": 0.7349500060081482, "learning_rate": 3.2375521276828866e-05, "loss": 2.7757, "step": 52079 }, { "epoch": 2.55, "grad_norm": 0.7738845944404602, "learning_rate": 3.236856448576085e-05, "loss": 3.076, "step": 52080 }, { "epoch": 2.55, "grad_norm": 0.7966225743293762, "learning_rate": 3.2361608399582454e-05, "loss": 2.8035, "step": 52081 }, { "epoch": 2.55, "grad_norm": 0.6876529455184937, "learning_rate": 3.235465301831208e-05, "loss": 2.9115, "step": 52082 }, { "epoch": 2.55, "grad_norm": 0.7187408208847046, "learning_rate": 3.2347698341968095e-05, "loss": 2.8788, "step": 52083 }, { "epoch": 2.55, "grad_norm": 0.7126637697219849, "learning_rate": 3.2340744370568783e-05, "loss": 3.1122, "step": 52084 }, { "epoch": 2.55, "grad_norm": 0.7737832069396973, "learning_rate": 3.233379110413242e-05, "loss": 2.8319, "step": 52085 }, { "epoch": 2.55, "grad_norm": 0.7817880511283875, "learning_rate": 3.23268385426773e-05, "loss": 2.8349, "step": 52086 }, { "epoch": 2.55, "grad_norm": 0.7609059810638428, "learning_rate": 3.231988668622174e-05, "loss": 3.1345, "step": 52087 }, { "epoch": 2.55, "grad_norm": 0.7383360266685486, "learning_rate": 3.2312935534784154e-05, "loss": 2.9601, "step": 52088 }, { "epoch": 2.55, "grad_norm": 0.7559726238250732, "learning_rate": 3.2305985088382736e-05, "loss": 2.8189, "step": 52089 }, { "epoch": 2.55, "grad_norm": 0.7402336001396179, "learning_rate": 3.2299035347035896e-05, "loss": 2.9232, "step": 52090 }, { "epoch": 2.55, "grad_norm": 0.8144344687461853, "learning_rate": 3.229208631076179e-05, "loss": 2.8724, "step": 52091 }, { "epoch": 2.55, "grad_norm": 0.7357531785964966, "learning_rate": 3.22851379795789e-05, "loss": 2.8621, "step": 52092 }, { "epoch": 2.55, "grad_norm": 0.7645347714424133, "learning_rate": 3.227819035350542e-05, "loss": 2.7017, "step": 52093 }, { "epoch": 2.55, "grad_norm": 0.7534157037734985, "learning_rate": 3.227124343255959e-05, "loss": 2.8386, "step": 52094 }, { "epoch": 2.55, "grad_norm": 0.7897367477416992, "learning_rate": 3.226429721675984e-05, "loss": 2.8818, "step": 52095 }, { "epoch": 2.55, "grad_norm": 0.7672426700592041, "learning_rate": 3.225735170612435e-05, "loss": 2.8568, "step": 52096 }, { "epoch": 2.55, "grad_norm": 0.7439735531806946, "learning_rate": 3.2250406900671466e-05, "loss": 2.8941, "step": 52097 }, { "epoch": 2.55, "grad_norm": 0.7530331611633301, "learning_rate": 3.224346280041955e-05, "loss": 2.8021, "step": 52098 }, { "epoch": 2.55, "grad_norm": 0.7255603671073914, "learning_rate": 3.223651940538682e-05, "loss": 3.0742, "step": 52099 }, { "epoch": 2.55, "grad_norm": 0.7502477765083313, "learning_rate": 3.2229576715591555e-05, "loss": 2.8602, "step": 52100 }, { "epoch": 2.55, "grad_norm": 0.8245431184768677, "learning_rate": 3.222263473105198e-05, "loss": 2.7466, "step": 52101 }, { "epoch": 2.55, "grad_norm": 0.7484453320503235, "learning_rate": 3.2215693451786474e-05, "loss": 2.9278, "step": 52102 }, { "epoch": 2.55, "grad_norm": 0.720234215259552, "learning_rate": 3.2208752877813357e-05, "loss": 3.0219, "step": 52103 }, { "epoch": 2.55, "grad_norm": 0.7335119843482971, "learning_rate": 3.220181300915079e-05, "loss": 2.795, "step": 52104 }, { "epoch": 2.55, "grad_norm": 0.7557951807975769, "learning_rate": 3.219487384581715e-05, "loss": 3.0479, "step": 52105 }, { "epoch": 2.55, "grad_norm": 0.7810825705528259, "learning_rate": 3.218793538783069e-05, "loss": 2.9214, "step": 52106 }, { "epoch": 2.55, "grad_norm": 0.7902493476867676, "learning_rate": 3.218099763520959e-05, "loss": 2.8729, "step": 52107 }, { "epoch": 2.55, "grad_norm": 0.8319610953330994, "learning_rate": 3.217406058797225e-05, "loss": 2.9528, "step": 52108 }, { "epoch": 2.55, "grad_norm": 0.7634714841842651, "learning_rate": 3.2167124246136876e-05, "loss": 2.9904, "step": 52109 }, { "epoch": 2.55, "grad_norm": 0.7566140294075012, "learning_rate": 3.216018860972176e-05, "loss": 2.822, "step": 52110 }, { "epoch": 2.55, "grad_norm": 0.8258447051048279, "learning_rate": 3.215325367874512e-05, "loss": 2.7601, "step": 52111 }, { "epoch": 2.55, "grad_norm": 0.7029900550842285, "learning_rate": 3.2146319453225266e-05, "loss": 3.0402, "step": 52112 }, { "epoch": 2.55, "grad_norm": 0.7495381832122803, "learning_rate": 3.2139385933180526e-05, "loss": 2.8941, "step": 52113 }, { "epoch": 2.55, "grad_norm": 0.7451898455619812, "learning_rate": 3.213245311862909e-05, "loss": 2.9559, "step": 52114 }, { "epoch": 2.55, "grad_norm": 0.7116692662239075, "learning_rate": 3.212552100958919e-05, "loss": 3.0828, "step": 52115 }, { "epoch": 2.55, "grad_norm": 0.7679808735847473, "learning_rate": 3.21185896060791e-05, "loss": 2.9068, "step": 52116 }, { "epoch": 2.55, "grad_norm": 0.7367801070213318, "learning_rate": 3.2111658908117065e-05, "loss": 2.9342, "step": 52117 }, { "epoch": 2.55, "grad_norm": 0.7176463007926941, "learning_rate": 3.210472891572143e-05, "loss": 3.1503, "step": 52118 }, { "epoch": 2.55, "grad_norm": 0.6963474750518799, "learning_rate": 3.2097799628910295e-05, "loss": 2.9381, "step": 52119 }, { "epoch": 2.55, "grad_norm": 0.7654833197593689, "learning_rate": 3.209087104770209e-05, "loss": 2.8578, "step": 52120 }, { "epoch": 2.55, "grad_norm": 0.7469300627708435, "learning_rate": 3.208394317211496e-05, "loss": 3.1074, "step": 52121 }, { "epoch": 2.55, "grad_norm": 0.702953577041626, "learning_rate": 3.20770160021671e-05, "loss": 2.6675, "step": 52122 }, { "epoch": 2.55, "grad_norm": 0.7122648358345032, "learning_rate": 3.207008953787688e-05, "loss": 2.9066, "step": 52123 }, { "epoch": 2.55, "grad_norm": 0.7659664750099182, "learning_rate": 3.206316377926244e-05, "loss": 2.8832, "step": 52124 }, { "epoch": 2.55, "grad_norm": 0.747031033039093, "learning_rate": 3.20562387263421e-05, "loss": 2.965, "step": 52125 }, { "epoch": 2.55, "grad_norm": 0.7918295860290527, "learning_rate": 3.204931437913398e-05, "loss": 2.8316, "step": 52126 }, { "epoch": 2.55, "grad_norm": 0.7406182289123535, "learning_rate": 3.204239073765641e-05, "loss": 2.9045, "step": 52127 }, { "epoch": 2.55, "grad_norm": 0.7530829906463623, "learning_rate": 3.2035467801927696e-05, "loss": 2.9757, "step": 52128 }, { "epoch": 2.55, "grad_norm": 0.7679747343063354, "learning_rate": 3.2028545571965994e-05, "loss": 2.9212, "step": 52129 }, { "epoch": 2.55, "grad_norm": 0.7454062104225159, "learning_rate": 3.202162404778949e-05, "loss": 2.9671, "step": 52130 }, { "epoch": 2.55, "grad_norm": 0.7858791947364807, "learning_rate": 3.2014703229416426e-05, "loss": 2.8775, "step": 52131 }, { "epoch": 2.55, "grad_norm": 0.7846336364746094, "learning_rate": 3.2007783116865104e-05, "loss": 2.9201, "step": 52132 }, { "epoch": 2.55, "grad_norm": 0.7320681810379028, "learning_rate": 3.2000863710153635e-05, "loss": 3.0948, "step": 52133 }, { "epoch": 2.55, "grad_norm": 0.8238586783409119, "learning_rate": 3.1993945009300336e-05, "loss": 2.9597, "step": 52134 }, { "epoch": 2.56, "grad_norm": 0.7886196970939636, "learning_rate": 3.198702701432346e-05, "loss": 2.9869, "step": 52135 }, { "epoch": 2.56, "grad_norm": 0.7563168406486511, "learning_rate": 3.19801097252412e-05, "loss": 2.9357, "step": 52136 }, { "epoch": 2.56, "grad_norm": 0.8196958899497986, "learning_rate": 3.197319314207169e-05, "loss": 2.9284, "step": 52137 }, { "epoch": 2.56, "grad_norm": 0.7236234545707703, "learning_rate": 3.196627726483321e-05, "loss": 2.8384, "step": 52138 }, { "epoch": 2.56, "grad_norm": 0.773149847984314, "learning_rate": 3.195936209354395e-05, "loss": 2.8404, "step": 52139 }, { "epoch": 2.56, "grad_norm": 0.7744463682174683, "learning_rate": 3.1952447628222176e-05, "loss": 2.7944, "step": 52140 }, { "epoch": 2.56, "grad_norm": 0.7418228387832642, "learning_rate": 3.194553386888604e-05, "loss": 2.9338, "step": 52141 }, { "epoch": 2.56, "grad_norm": 0.7621162533760071, "learning_rate": 3.193862081555383e-05, "loss": 2.9538, "step": 52142 }, { "epoch": 2.56, "grad_norm": 0.7875955104827881, "learning_rate": 3.193170846824369e-05, "loss": 2.9326, "step": 52143 }, { "epoch": 2.56, "grad_norm": 0.7359634637832642, "learning_rate": 3.192479682697381e-05, "loss": 2.7486, "step": 52144 }, { "epoch": 2.56, "grad_norm": 0.7574135661125183, "learning_rate": 3.191788589176244e-05, "loss": 2.8255, "step": 52145 }, { "epoch": 2.56, "grad_norm": 0.7023094296455383, "learning_rate": 3.1910975662627734e-05, "loss": 2.9397, "step": 52146 }, { "epoch": 2.56, "grad_norm": 0.7390046119689941, "learning_rate": 3.190406613958798e-05, "loss": 3.0639, "step": 52147 }, { "epoch": 2.56, "grad_norm": 0.7670930027961731, "learning_rate": 3.189715732266126e-05, "loss": 2.6807, "step": 52148 }, { "epoch": 2.56, "grad_norm": 0.711762011051178, "learning_rate": 3.1890249211865826e-05, "loss": 2.7934, "step": 52149 }, { "epoch": 2.56, "grad_norm": 0.7005589008331299, "learning_rate": 3.1883341807219965e-05, "loss": 2.9111, "step": 52150 }, { "epoch": 2.56, "grad_norm": 0.7569049596786499, "learning_rate": 3.1876435108741725e-05, "loss": 2.8733, "step": 52151 }, { "epoch": 2.56, "grad_norm": 0.8212657570838928, "learning_rate": 3.18695291164494e-05, "loss": 2.9394, "step": 52152 }, { "epoch": 2.56, "grad_norm": 0.7402049899101257, "learning_rate": 3.186262383036107e-05, "loss": 2.8861, "step": 52153 }, { "epoch": 2.56, "grad_norm": 0.7410562038421631, "learning_rate": 3.185571925049495e-05, "loss": 3.0182, "step": 52154 }, { "epoch": 2.56, "grad_norm": 0.7571350336074829, "learning_rate": 3.1848815376869366e-05, "loss": 2.7717, "step": 52155 }, { "epoch": 2.56, "grad_norm": 0.7215419411659241, "learning_rate": 3.1841912209502294e-05, "loss": 2.86, "step": 52156 }, { "epoch": 2.56, "grad_norm": 0.751708984375, "learning_rate": 3.18350097484121e-05, "loss": 3.1578, "step": 52157 }, { "epoch": 2.56, "grad_norm": 0.7202328443527222, "learning_rate": 3.182810799361689e-05, "loss": 2.8002, "step": 52158 }, { "epoch": 2.56, "grad_norm": 0.7052213549613953, "learning_rate": 3.182120694513476e-05, "loss": 3.1323, "step": 52159 }, { "epoch": 2.56, "grad_norm": 0.7351638674736023, "learning_rate": 3.181430660298402e-05, "loss": 2.8612, "step": 52160 }, { "epoch": 2.56, "grad_norm": 0.7632426619529724, "learning_rate": 3.180740696718269e-05, "loss": 3.0659, "step": 52161 }, { "epoch": 2.56, "grad_norm": 0.7189815640449524, "learning_rate": 3.180050803774913e-05, "loss": 3.0106, "step": 52162 }, { "epoch": 2.56, "grad_norm": 0.7511869668960571, "learning_rate": 3.179360981470138e-05, "loss": 3.0049, "step": 52163 }, { "epoch": 2.56, "grad_norm": 0.7162873148918152, "learning_rate": 3.178671229805767e-05, "loss": 2.9714, "step": 52164 }, { "epoch": 2.56, "grad_norm": 0.8090042471885681, "learning_rate": 3.177981548783615e-05, "loss": 2.873, "step": 52165 }, { "epoch": 2.56, "grad_norm": 0.7131547927856445, "learning_rate": 3.1772919384054897e-05, "loss": 2.6244, "step": 52166 }, { "epoch": 2.56, "grad_norm": 0.7170288562774658, "learning_rate": 3.176602398673224e-05, "loss": 3.1226, "step": 52167 }, { "epoch": 2.56, "grad_norm": 0.7279961705207825, "learning_rate": 3.1759129295886184e-05, "loss": 2.9755, "step": 52168 }, { "epoch": 2.56, "grad_norm": 0.7459250688552856, "learning_rate": 3.175223531153497e-05, "loss": 2.7423, "step": 52169 }, { "epoch": 2.56, "grad_norm": 0.806098461151123, "learning_rate": 3.1745342033696764e-05, "loss": 2.8613, "step": 52170 }, { "epoch": 2.56, "grad_norm": 0.7358162999153137, "learning_rate": 3.1738449462389656e-05, "loss": 2.8704, "step": 52171 }, { "epoch": 2.56, "grad_norm": 0.7452125549316406, "learning_rate": 3.173155759763194e-05, "loss": 2.9032, "step": 52172 }, { "epoch": 2.56, "grad_norm": 0.765661358833313, "learning_rate": 3.1724666439441626e-05, "loss": 2.9674, "step": 52173 }, { "epoch": 2.56, "grad_norm": 0.7364770770072937, "learning_rate": 3.171777598783687e-05, "loss": 2.7907, "step": 52174 }, { "epoch": 2.56, "grad_norm": 0.740664005279541, "learning_rate": 3.171088624283592e-05, "loss": 2.8336, "step": 52175 }, { "epoch": 2.56, "grad_norm": 0.7158206701278687, "learning_rate": 3.17039972044568e-05, "loss": 2.734, "step": 52176 }, { "epoch": 2.56, "grad_norm": 0.7403131127357483, "learning_rate": 3.169710887271779e-05, "loss": 2.9151, "step": 52177 }, { "epoch": 2.56, "grad_norm": 0.7874448299407959, "learning_rate": 3.169022124763688e-05, "loss": 2.9587, "step": 52178 }, { "epoch": 2.56, "grad_norm": 0.7513279318809509, "learning_rate": 3.168333432923236e-05, "loss": 2.9218, "step": 52179 }, { "epoch": 2.56, "grad_norm": 0.715566873550415, "learning_rate": 3.16764481175223e-05, "loss": 2.8593, "step": 52180 }, { "epoch": 2.56, "grad_norm": 0.7262329459190369, "learning_rate": 3.166956261252477e-05, "loss": 2.9149, "step": 52181 }, { "epoch": 2.56, "grad_norm": 0.7445135712623596, "learning_rate": 3.166267781425804e-05, "loss": 2.9653, "step": 52182 }, { "epoch": 2.56, "grad_norm": 0.731885552406311, "learning_rate": 3.165579372274011e-05, "loss": 2.8945, "step": 52183 }, { "epoch": 2.56, "grad_norm": 0.7854867577552795, "learning_rate": 3.164891033798919e-05, "loss": 2.7433, "step": 52184 }, { "epoch": 2.56, "grad_norm": 0.7866995334625244, "learning_rate": 3.164202766002343e-05, "loss": 2.8823, "step": 52185 }, { "epoch": 2.56, "grad_norm": 0.7712637782096863, "learning_rate": 3.1635145688860916e-05, "loss": 2.944, "step": 52186 }, { "epoch": 2.56, "grad_norm": 0.758851170539856, "learning_rate": 3.162826442451981e-05, "loss": 2.9999, "step": 52187 }, { "epoch": 2.56, "grad_norm": 0.7671577334403992, "learning_rate": 3.162138386701812e-05, "loss": 2.9154, "step": 52188 }, { "epoch": 2.56, "grad_norm": 0.7265655398368835, "learning_rate": 3.161450401637407e-05, "loss": 2.9974, "step": 52189 }, { "epoch": 2.56, "grad_norm": 0.7680802941322327, "learning_rate": 3.160762487260582e-05, "loss": 2.9132, "step": 52190 }, { "epoch": 2.56, "grad_norm": 0.7444314360618591, "learning_rate": 3.160074643573137e-05, "loss": 2.8905, "step": 52191 }, { "epoch": 2.56, "grad_norm": 0.7568455934524536, "learning_rate": 3.1593868705768955e-05, "loss": 2.7307, "step": 52192 }, { "epoch": 2.56, "grad_norm": 0.7822867631912231, "learning_rate": 3.1586991682736585e-05, "loss": 2.93, "step": 52193 }, { "epoch": 2.56, "grad_norm": 0.7543140053749084, "learning_rate": 3.158011536665248e-05, "loss": 3.1412, "step": 52194 }, { "epoch": 2.56, "grad_norm": 0.853643000125885, "learning_rate": 3.15732397575347e-05, "loss": 2.9038, "step": 52195 }, { "epoch": 2.56, "grad_norm": 0.8402360677719116, "learning_rate": 3.1566364855401285e-05, "loss": 2.9202, "step": 52196 }, { "epoch": 2.56, "grad_norm": 0.7252039313316345, "learning_rate": 3.155949066027046e-05, "loss": 2.977, "step": 52197 }, { "epoch": 2.56, "grad_norm": 0.7317278385162354, "learning_rate": 3.155261717216021e-05, "loss": 2.7395, "step": 52198 }, { "epoch": 2.56, "grad_norm": 0.7297314405441284, "learning_rate": 3.154574439108869e-05, "loss": 2.8092, "step": 52199 }, { "epoch": 2.56, "grad_norm": 0.7250798940658569, "learning_rate": 3.153887231707408e-05, "loss": 3.1066, "step": 52200 }, { "epoch": 2.56, "grad_norm": 0.7466351389884949, "learning_rate": 3.153200095013444e-05, "loss": 3.1056, "step": 52201 }, { "epoch": 2.56, "grad_norm": 0.7196610569953918, "learning_rate": 3.1525130290287816e-05, "loss": 2.9067, "step": 52202 }, { "epoch": 2.56, "grad_norm": 0.7877478003501892, "learning_rate": 3.151826033755229e-05, "loss": 2.6556, "step": 52203 }, { "epoch": 2.56, "grad_norm": 0.7279103398323059, "learning_rate": 3.151139109194596e-05, "loss": 2.9329, "step": 52204 }, { "epoch": 2.56, "grad_norm": 0.7403162717819214, "learning_rate": 3.1504522553487035e-05, "loss": 2.9713, "step": 52205 }, { "epoch": 2.56, "grad_norm": 0.748703122138977, "learning_rate": 3.1497654722193466e-05, "loss": 2.7022, "step": 52206 }, { "epoch": 2.56, "grad_norm": 0.7111794948577881, "learning_rate": 3.149078759808348e-05, "loss": 3.1579, "step": 52207 }, { "epoch": 2.56, "grad_norm": 0.7396597266197205, "learning_rate": 3.148392118117505e-05, "loss": 2.6688, "step": 52208 }, { "epoch": 2.56, "grad_norm": 0.7661385536193848, "learning_rate": 3.1477055471486315e-05, "loss": 3.2509, "step": 52209 }, { "epoch": 2.56, "grad_norm": 0.7724836468696594, "learning_rate": 3.1470190469035274e-05, "loss": 3.0427, "step": 52210 }, { "epoch": 2.56, "grad_norm": 0.7351310849189758, "learning_rate": 3.146332617384009e-05, "loss": 2.949, "step": 52211 }, { "epoch": 2.56, "grad_norm": 0.7441316246986389, "learning_rate": 3.145646258591884e-05, "loss": 2.9306, "step": 52212 }, { "epoch": 2.56, "grad_norm": 0.7312101125717163, "learning_rate": 3.1449599705289554e-05, "loss": 2.9497, "step": 52213 }, { "epoch": 2.56, "grad_norm": 0.733696699142456, "learning_rate": 3.144273753197041e-05, "loss": 2.8247, "step": 52214 }, { "epoch": 2.56, "grad_norm": 0.7087571620941162, "learning_rate": 3.143587606597933e-05, "loss": 2.8334, "step": 52215 }, { "epoch": 2.56, "grad_norm": 0.7312347292900085, "learning_rate": 3.142901530733453e-05, "loss": 2.869, "step": 52216 }, { "epoch": 2.56, "grad_norm": 0.6981326341629028, "learning_rate": 3.1422155256054024e-05, "loss": 2.8639, "step": 52217 }, { "epoch": 2.56, "grad_norm": 0.7454235553741455, "learning_rate": 3.141529591215578e-05, "loss": 3.1351, "step": 52218 }, { "epoch": 2.56, "grad_norm": 0.7256028652191162, "learning_rate": 3.1408437275658074e-05, "loss": 2.776, "step": 52219 }, { "epoch": 2.56, "grad_norm": 0.7506396174430847, "learning_rate": 3.1401579346578753e-05, "loss": 2.7559, "step": 52220 }, { "epoch": 2.56, "grad_norm": 0.7295669913291931, "learning_rate": 3.139472212493598e-05, "loss": 2.8567, "step": 52221 }, { "epoch": 2.56, "grad_norm": 0.731708288192749, "learning_rate": 3.1387865610747864e-05, "loss": 2.7866, "step": 52222 }, { "epoch": 2.56, "grad_norm": 0.718250036239624, "learning_rate": 3.138100980403243e-05, "loss": 2.9393, "step": 52223 }, { "epoch": 2.56, "grad_norm": 0.7222898006439209, "learning_rate": 3.137415470480773e-05, "loss": 2.9184, "step": 52224 }, { "epoch": 2.56, "grad_norm": 0.7631383538246155, "learning_rate": 3.136730031309175e-05, "loss": 2.9492, "step": 52225 }, { "epoch": 2.56, "grad_norm": 0.7158065438270569, "learning_rate": 3.13604466289026e-05, "loss": 3.0133, "step": 52226 }, { "epoch": 2.56, "grad_norm": 0.7890329360961914, "learning_rate": 3.135359365225838e-05, "loss": 3.0817, "step": 52227 }, { "epoch": 2.56, "grad_norm": 0.7782434225082397, "learning_rate": 3.134674138317703e-05, "loss": 3.0138, "step": 52228 }, { "epoch": 2.56, "grad_norm": 0.7466015815734863, "learning_rate": 3.133988982167671e-05, "loss": 2.823, "step": 52229 }, { "epoch": 2.56, "grad_norm": 0.712049663066864, "learning_rate": 3.1333038967775404e-05, "loss": 2.9232, "step": 52230 }, { "epoch": 2.56, "grad_norm": 0.7094315886497498, "learning_rate": 3.1326188821491196e-05, "loss": 2.7197, "step": 52231 }, { "epoch": 2.56, "grad_norm": 0.7306052446365356, "learning_rate": 3.131933938284207e-05, "loss": 2.9825, "step": 52232 }, { "epoch": 2.56, "grad_norm": 0.6810746788978577, "learning_rate": 3.131249065184608e-05, "loss": 2.7697, "step": 52233 }, { "epoch": 2.56, "grad_norm": 0.748337984085083, "learning_rate": 3.130564262852131e-05, "loss": 2.9846, "step": 52234 }, { "epoch": 2.56, "grad_norm": 0.7128395438194275, "learning_rate": 3.1298795312885716e-05, "loss": 3.0067, "step": 52235 }, { "epoch": 2.56, "grad_norm": 0.7342303395271301, "learning_rate": 3.1291948704957416e-05, "loss": 3.1391, "step": 52236 }, { "epoch": 2.56, "grad_norm": 0.7613341808319092, "learning_rate": 3.1285102804754424e-05, "loss": 2.9238, "step": 52237 }, { "epoch": 2.56, "grad_norm": 0.8025845289230347, "learning_rate": 3.1278257612294765e-05, "loss": 3.0679, "step": 52238 }, { "epoch": 2.56, "grad_norm": 0.7101073265075684, "learning_rate": 3.127141312759649e-05, "loss": 2.7353, "step": 52239 }, { "epoch": 2.56, "grad_norm": 0.7481630444526672, "learning_rate": 3.126456935067748e-05, "loss": 2.8775, "step": 52240 }, { "epoch": 2.56, "grad_norm": 0.747065544128418, "learning_rate": 3.125772628155593e-05, "loss": 2.9329, "step": 52241 }, { "epoch": 2.56, "grad_norm": 0.7435370683670044, "learning_rate": 3.125088392024985e-05, "loss": 2.9153, "step": 52242 }, { "epoch": 2.56, "grad_norm": 0.781072199344635, "learning_rate": 3.124404226677712e-05, "loss": 2.9379, "step": 52243 }, { "epoch": 2.56, "grad_norm": 0.7462438344955444, "learning_rate": 3.123720132115598e-05, "loss": 2.897, "step": 52244 }, { "epoch": 2.56, "grad_norm": 0.7858874797821045, "learning_rate": 3.12303610834043e-05, "loss": 2.8322, "step": 52245 }, { "epoch": 2.56, "grad_norm": 0.7304945588111877, "learning_rate": 3.1223521553540064e-05, "loss": 2.8908, "step": 52246 }, { "epoch": 2.56, "grad_norm": 0.7903547286987305, "learning_rate": 3.12166827315814e-05, "loss": 2.8174, "step": 52247 }, { "epoch": 2.56, "grad_norm": 0.7469950914382935, "learning_rate": 3.120984461754622e-05, "loss": 2.9731, "step": 52248 }, { "epoch": 2.56, "grad_norm": 0.7844982743263245, "learning_rate": 3.120300721145261e-05, "loss": 2.8559, "step": 52249 }, { "epoch": 2.56, "grad_norm": 0.7452136278152466, "learning_rate": 3.119617051331853e-05, "loss": 3.1268, "step": 52250 }, { "epoch": 2.56, "grad_norm": 0.7502493262290955, "learning_rate": 3.1189334523161954e-05, "loss": 2.8521, "step": 52251 }, { "epoch": 2.56, "grad_norm": 0.6964902877807617, "learning_rate": 3.1182499241001045e-05, "loss": 2.8879, "step": 52252 }, { "epoch": 2.56, "grad_norm": 0.7556952834129333, "learning_rate": 3.1175664666853686e-05, "loss": 2.8724, "step": 52253 }, { "epoch": 2.56, "grad_norm": 0.6863505244255066, "learning_rate": 3.116883080073789e-05, "loss": 2.9686, "step": 52254 }, { "epoch": 2.56, "grad_norm": 0.7049617767333984, "learning_rate": 3.1161997642671574e-05, "loss": 2.9675, "step": 52255 }, { "epoch": 2.56, "grad_norm": 0.7351288199424744, "learning_rate": 3.115516519267284e-05, "loss": 2.7977, "step": 52256 }, { "epoch": 2.56, "grad_norm": 0.7706930637359619, "learning_rate": 3.114833345075972e-05, "loss": 2.9956, "step": 52257 }, { "epoch": 2.56, "grad_norm": 0.771697998046875, "learning_rate": 3.114150241695008e-05, "loss": 2.7364, "step": 52258 }, { "epoch": 2.56, "grad_norm": 0.7425963878631592, "learning_rate": 3.1134672091262035e-05, "loss": 2.963, "step": 52259 }, { "epoch": 2.56, "grad_norm": 0.7799915075302124, "learning_rate": 3.11278424737135e-05, "loss": 2.9248, "step": 52260 }, { "epoch": 2.56, "grad_norm": 0.7096208930015564, "learning_rate": 3.112101356432247e-05, "loss": 2.7819, "step": 52261 }, { "epoch": 2.56, "grad_norm": 0.747329831123352, "learning_rate": 3.111418536310699e-05, "loss": 2.8839, "step": 52262 }, { "epoch": 2.56, "grad_norm": 0.7840654253959656, "learning_rate": 3.110735787008492e-05, "loss": 2.8895, "step": 52263 }, { "epoch": 2.56, "grad_norm": 0.7264786958694458, "learning_rate": 3.1100531085274394e-05, "loss": 2.8788, "step": 52264 }, { "epoch": 2.56, "grad_norm": 0.7728217840194702, "learning_rate": 3.1093705008693245e-05, "loss": 2.9602, "step": 52265 }, { "epoch": 2.56, "grad_norm": 0.7420971393585205, "learning_rate": 3.1086879640359596e-05, "loss": 2.8934, "step": 52266 }, { "epoch": 2.56, "grad_norm": 0.714773952960968, "learning_rate": 3.108005498029136e-05, "loss": 2.8666, "step": 52267 }, { "epoch": 2.56, "grad_norm": 0.7264063954353333, "learning_rate": 3.107323102850642e-05, "loss": 2.8107, "step": 52268 }, { "epoch": 2.56, "grad_norm": 0.7227268815040588, "learning_rate": 3.1066407785022895e-05, "loss": 2.8292, "step": 52269 }, { "epoch": 2.56, "grad_norm": 0.7567482590675354, "learning_rate": 3.1059585249858675e-05, "loss": 2.8256, "step": 52270 }, { "epoch": 2.56, "grad_norm": 0.7323058247566223, "learning_rate": 3.1052763423031714e-05, "loss": 2.9365, "step": 52271 }, { "epoch": 2.56, "grad_norm": 0.7168264389038086, "learning_rate": 3.104594230456009e-05, "loss": 3.0581, "step": 52272 }, { "epoch": 2.56, "grad_norm": 0.7276723980903625, "learning_rate": 3.1039121894461625e-05, "loss": 2.7855, "step": 52273 }, { "epoch": 2.56, "grad_norm": 0.7067778706550598, "learning_rate": 3.103230219275441e-05, "loss": 3.0388, "step": 52274 }, { "epoch": 2.56, "grad_norm": 0.7861632704734802, "learning_rate": 3.1025483199456327e-05, "loss": 2.954, "step": 52275 }, { "epoch": 2.56, "grad_norm": 0.6696493625640869, "learning_rate": 3.1018664914585326e-05, "loss": 2.9169, "step": 52276 }, { "epoch": 2.56, "grad_norm": 0.7829317450523376, "learning_rate": 3.1011847338159454e-05, "loss": 2.9342, "step": 52277 }, { "epoch": 2.56, "grad_norm": 0.7882497310638428, "learning_rate": 3.1005030470196514e-05, "loss": 2.8775, "step": 52278 }, { "epoch": 2.56, "grad_norm": 0.7552138566970825, "learning_rate": 3.099821431071464e-05, "loss": 2.9103, "step": 52279 }, { "epoch": 2.56, "grad_norm": 0.7698383927345276, "learning_rate": 3.099139885973166e-05, "loss": 2.8783, "step": 52280 }, { "epoch": 2.56, "grad_norm": 0.782609760761261, "learning_rate": 3.09845841172656e-05, "loss": 2.975, "step": 52281 }, { "epoch": 2.56, "grad_norm": 0.7387914061546326, "learning_rate": 3.0977770083334366e-05, "loss": 2.8243, "step": 52282 }, { "epoch": 2.56, "grad_norm": 0.7554448843002319, "learning_rate": 3.097095675795588e-05, "loss": 2.9946, "step": 52283 }, { "epoch": 2.56, "grad_norm": 0.7315139770507812, "learning_rate": 3.096414414114813e-05, "loss": 2.9418, "step": 52284 }, { "epoch": 2.56, "grad_norm": 0.7560511827468872, "learning_rate": 3.0957332232929036e-05, "loss": 3.0498, "step": 52285 }, { "epoch": 2.56, "grad_norm": 0.7678272724151611, "learning_rate": 3.095052103331651e-05, "loss": 2.7948, "step": 52286 }, { "epoch": 2.56, "grad_norm": 0.7785819172859192, "learning_rate": 3.0943710542328613e-05, "loss": 2.7721, "step": 52287 }, { "epoch": 2.56, "grad_norm": 0.7176530361175537, "learning_rate": 3.093690075998323e-05, "loss": 2.9313, "step": 52288 }, { "epoch": 2.56, "grad_norm": 0.7264245748519897, "learning_rate": 3.0930091686298206e-05, "loss": 2.7374, "step": 52289 }, { "epoch": 2.56, "grad_norm": 0.7617298364639282, "learning_rate": 3.092328332129154e-05, "loss": 2.8061, "step": 52290 }, { "epoch": 2.56, "grad_norm": 0.7552651166915894, "learning_rate": 3.0916475664981165e-05, "loss": 2.9829, "step": 52291 }, { "epoch": 2.56, "grad_norm": 0.7785048484802246, "learning_rate": 3.0909668717384986e-05, "loss": 2.8478, "step": 52292 }, { "epoch": 2.56, "grad_norm": 0.719987154006958, "learning_rate": 3.090286247852094e-05, "loss": 2.8228, "step": 52293 }, { "epoch": 2.56, "grad_norm": 0.735836386680603, "learning_rate": 3.089605694840702e-05, "loss": 2.9333, "step": 52294 }, { "epoch": 2.56, "grad_norm": 0.763145923614502, "learning_rate": 3.0889252127061045e-05, "loss": 3.0371, "step": 52295 }, { "epoch": 2.56, "grad_norm": 0.7010157108306885, "learning_rate": 3.088244801450104e-05, "loss": 3.0665, "step": 52296 }, { "epoch": 2.56, "grad_norm": 0.7367874979972839, "learning_rate": 3.0875644610744873e-05, "loss": 2.93, "step": 52297 }, { "epoch": 2.56, "grad_norm": 0.7362725138664246, "learning_rate": 3.0868841915810414e-05, "loss": 2.8234, "step": 52298 }, { "epoch": 2.56, "grad_norm": 0.6777426600456238, "learning_rate": 3.086203992971568e-05, "loss": 2.9562, "step": 52299 }, { "epoch": 2.56, "grad_norm": 0.7489884495735168, "learning_rate": 3.085523865247851e-05, "loss": 2.8091, "step": 52300 }, { "epoch": 2.56, "grad_norm": 0.7176619172096252, "learning_rate": 3.084843808411687e-05, "loss": 3.1135, "step": 52301 }, { "epoch": 2.56, "grad_norm": 0.7485775351524353, "learning_rate": 3.084163822464857e-05, "loss": 2.8253, "step": 52302 }, { "epoch": 2.56, "grad_norm": 0.7570214867591858, "learning_rate": 3.08348390740917e-05, "loss": 2.7535, "step": 52303 }, { "epoch": 2.56, "grad_norm": 0.7418150305747986, "learning_rate": 3.082804063246401e-05, "loss": 3.0901, "step": 52304 }, { "epoch": 2.56, "grad_norm": 0.6917791962623596, "learning_rate": 3.0821242899783425e-05, "loss": 2.8208, "step": 52305 }, { "epoch": 2.56, "grad_norm": 0.7729413509368896, "learning_rate": 3.081444587606796e-05, "loss": 2.76, "step": 52306 }, { "epoch": 2.56, "grad_norm": 0.7332080006599426, "learning_rate": 3.0807649561335335e-05, "loss": 3.0991, "step": 52307 }, { "epoch": 2.56, "grad_norm": 0.7556661367416382, "learning_rate": 3.08008539556036e-05, "loss": 2.9618, "step": 52308 }, { "epoch": 2.56, "grad_norm": 0.7616263031959534, "learning_rate": 3.079405905889064e-05, "loss": 2.7836, "step": 52309 }, { "epoch": 2.56, "grad_norm": 0.7406206727027893, "learning_rate": 3.078726487121431e-05, "loss": 2.9042, "step": 52310 }, { "epoch": 2.56, "grad_norm": 0.7701447010040283, "learning_rate": 3.078047139259253e-05, "loss": 3.0468, "step": 52311 }, { "epoch": 2.56, "grad_norm": 0.7630977630615234, "learning_rate": 3.077367862304312e-05, "loss": 2.8137, "step": 52312 }, { "epoch": 2.56, "grad_norm": 0.7166601419448853, "learning_rate": 3.0766886562584024e-05, "loss": 2.9846, "step": 52313 }, { "epoch": 2.56, "grad_norm": 0.7290651202201843, "learning_rate": 3.07600952112332e-05, "loss": 2.9109, "step": 52314 }, { "epoch": 2.56, "grad_norm": 0.7534510493278503, "learning_rate": 3.075330456900841e-05, "loss": 3.0675, "step": 52315 }, { "epoch": 2.56, "grad_norm": 0.7178420424461365, "learning_rate": 3.074651463592762e-05, "loss": 2.9887, "step": 52316 }, { "epoch": 2.56, "grad_norm": 0.7508209347724915, "learning_rate": 3.0739725412008665e-05, "loss": 2.9242, "step": 52317 }, { "epoch": 2.56, "grad_norm": 0.7520042061805725, "learning_rate": 3.073293689726952e-05, "loss": 2.9403, "step": 52318 }, { "epoch": 2.56, "grad_norm": 0.7215971946716309, "learning_rate": 3.072614909172798e-05, "loss": 3.038, "step": 52319 }, { "epoch": 2.56, "grad_norm": 0.761830747127533, "learning_rate": 3.071936199540189e-05, "loss": 2.9039, "step": 52320 }, { "epoch": 2.56, "grad_norm": 0.7258598208427429, "learning_rate": 3.0712575608309206e-05, "loss": 2.8936, "step": 52321 }, { "epoch": 2.56, "grad_norm": 0.7424689531326294, "learning_rate": 3.070578993046775e-05, "loss": 2.8634, "step": 52322 }, { "epoch": 2.56, "grad_norm": 0.7346766591072083, "learning_rate": 3.06990049618954e-05, "loss": 3.0474, "step": 52323 }, { "epoch": 2.56, "grad_norm": 0.7245206236839294, "learning_rate": 3.0692220702610115e-05, "loss": 2.8944, "step": 52324 }, { "epoch": 2.56, "grad_norm": 0.7064627408981323, "learning_rate": 3.0685437152629676e-05, "loss": 2.9548, "step": 52325 }, { "epoch": 2.56, "grad_norm": 0.7573798894882202, "learning_rate": 3.0678654311971974e-05, "loss": 2.772, "step": 52326 }, { "epoch": 2.56, "grad_norm": 0.6997696161270142, "learning_rate": 3.0671872180654825e-05, "loss": 2.7829, "step": 52327 }, { "epoch": 2.56, "grad_norm": 0.7369567155838013, "learning_rate": 3.066509075869611e-05, "loss": 2.819, "step": 52328 }, { "epoch": 2.56, "grad_norm": 0.7621482014656067, "learning_rate": 3.065831004611375e-05, "loss": 2.9502, "step": 52329 }, { "epoch": 2.56, "grad_norm": 0.7581804394721985, "learning_rate": 3.065153004292554e-05, "loss": 2.7999, "step": 52330 }, { "epoch": 2.56, "grad_norm": 0.7452824711799622, "learning_rate": 3.064475074914939e-05, "loss": 3.0601, "step": 52331 }, { "epoch": 2.56, "grad_norm": 0.7450490593910217, "learning_rate": 3.063797216480316e-05, "loss": 2.7313, "step": 52332 }, { "epoch": 2.56, "grad_norm": 0.7387819290161133, "learning_rate": 3.063119428990459e-05, "loss": 3.134, "step": 52333 }, { "epoch": 2.56, "grad_norm": 0.7497106790542603, "learning_rate": 3.062441712447171e-05, "loss": 2.9456, "step": 52334 }, { "epoch": 2.56, "grad_norm": 0.7112284898757935, "learning_rate": 3.061764066852217e-05, "loss": 2.6568, "step": 52335 }, { "epoch": 2.56, "grad_norm": 0.753256618976593, "learning_rate": 3.0610864922073985e-05, "loss": 2.8336, "step": 52336 }, { "epoch": 2.56, "grad_norm": 0.6917237639427185, "learning_rate": 3.0604089885144914e-05, "loss": 2.8997, "step": 52337 }, { "epoch": 2.56, "grad_norm": 0.7741192579269409, "learning_rate": 3.05973155577528e-05, "loss": 2.942, "step": 52338 }, { "epoch": 2.57, "grad_norm": 0.7585358619689941, "learning_rate": 3.059054193991557e-05, "loss": 2.755, "step": 52339 }, { "epoch": 2.57, "grad_norm": 0.7429476380348206, "learning_rate": 3.058376903165097e-05, "loss": 3.0445, "step": 52340 }, { "epoch": 2.57, "grad_norm": 0.7957443594932556, "learning_rate": 3.057699683297693e-05, "loss": 3.0225, "step": 52341 }, { "epoch": 2.57, "grad_norm": 0.7200005650520325, "learning_rate": 3.057022534391113e-05, "loss": 3.0441, "step": 52342 }, { "epoch": 2.57, "grad_norm": 0.7461026310920715, "learning_rate": 3.056345456447151e-05, "loss": 2.8421, "step": 52343 }, { "epoch": 2.57, "grad_norm": 0.7428950071334839, "learning_rate": 3.055668449467598e-05, "loss": 2.6936, "step": 52344 }, { "epoch": 2.57, "grad_norm": 0.7124583721160889, "learning_rate": 3.05499151345422e-05, "loss": 3.1548, "step": 52345 }, { "epoch": 2.57, "grad_norm": 0.7571352124214172, "learning_rate": 3.054314648408815e-05, "loss": 2.9257, "step": 52346 }, { "epoch": 2.57, "grad_norm": 0.7435238361358643, "learning_rate": 3.0536378543331616e-05, "loss": 2.7556, "step": 52347 }, { "epoch": 2.57, "grad_norm": 0.733097493648529, "learning_rate": 3.052961131229032e-05, "loss": 2.807, "step": 52348 }, { "epoch": 2.57, "grad_norm": 0.7678351998329163, "learning_rate": 3.052284479098225e-05, "loss": 2.7884, "step": 52349 }, { "epoch": 2.57, "grad_norm": 0.7466233372688293, "learning_rate": 3.051607897942504e-05, "loss": 2.9157, "step": 52350 }, { "epoch": 2.57, "grad_norm": 0.7528746724128723, "learning_rate": 3.050931387763671e-05, "loss": 2.8802, "step": 52351 }, { "epoch": 2.57, "grad_norm": 0.7836788296699524, "learning_rate": 3.0502549485634908e-05, "loss": 2.6929, "step": 52352 }, { "epoch": 2.57, "grad_norm": 0.7055717706680298, "learning_rate": 3.0495785803437557e-05, "loss": 2.9488, "step": 52353 }, { "epoch": 2.57, "grad_norm": 0.7400059103965759, "learning_rate": 3.0489022831062472e-05, "loss": 2.755, "step": 52354 }, { "epoch": 2.57, "grad_norm": 0.7582606077194214, "learning_rate": 3.048226056852744e-05, "loss": 2.8012, "step": 52355 }, { "epoch": 2.57, "grad_norm": 0.751268208026886, "learning_rate": 3.0475499015850247e-05, "loss": 2.6739, "step": 52356 }, { "epoch": 2.57, "grad_norm": 0.741468071937561, "learning_rate": 3.0468738173048678e-05, "loss": 2.9144, "step": 52357 }, { "epoch": 2.57, "grad_norm": 0.6897956132888794, "learning_rate": 3.0461978040140557e-05, "loss": 3.1279, "step": 52358 }, { "epoch": 2.57, "grad_norm": 0.7293424606323242, "learning_rate": 3.0455218617143796e-05, "loss": 3.0598, "step": 52359 }, { "epoch": 2.57, "grad_norm": 0.8046936988830566, "learning_rate": 3.044845990407605e-05, "loss": 2.8674, "step": 52360 }, { "epoch": 2.57, "grad_norm": 0.7003205418586731, "learning_rate": 3.044170190095524e-05, "loss": 2.8303, "step": 52361 }, { "epoch": 2.57, "grad_norm": 0.7055088877677917, "learning_rate": 3.0434944607799116e-05, "loss": 2.8384, "step": 52362 }, { "epoch": 2.57, "grad_norm": 0.7319667935371399, "learning_rate": 3.04281880246254e-05, "loss": 2.9059, "step": 52363 }, { "epoch": 2.57, "grad_norm": 0.7617000937461853, "learning_rate": 3.042143215145201e-05, "loss": 2.8728, "step": 52364 }, { "epoch": 2.57, "grad_norm": 0.7651677131652832, "learning_rate": 3.041467698829666e-05, "loss": 2.963, "step": 52365 }, { "epoch": 2.57, "grad_norm": 0.7394811511039734, "learning_rate": 3.0407922535177176e-05, "loss": 2.7771, "step": 52366 }, { "epoch": 2.57, "grad_norm": 0.7691500782966614, "learning_rate": 3.040116879211131e-05, "loss": 2.866, "step": 52367 }, { "epoch": 2.57, "grad_norm": 0.7116948962211609, "learning_rate": 3.039441575911694e-05, "loss": 2.9602, "step": 52368 }, { "epoch": 2.57, "grad_norm": 0.7746070623397827, "learning_rate": 3.0387663436211796e-05, "loss": 2.8734, "step": 52369 }, { "epoch": 2.57, "grad_norm": 0.743488073348999, "learning_rate": 3.038091182341359e-05, "loss": 2.7054, "step": 52370 }, { "epoch": 2.57, "grad_norm": 0.7217757105827332, "learning_rate": 3.0374160920740242e-05, "loss": 2.8834, "step": 52371 }, { "epoch": 2.57, "grad_norm": 0.7398301362991333, "learning_rate": 3.0367410728209408e-05, "loss": 3.0355, "step": 52372 }, { "epoch": 2.57, "grad_norm": 0.7479260563850403, "learning_rate": 3.036066124583897e-05, "loss": 2.9872, "step": 52373 }, { "epoch": 2.57, "grad_norm": 0.7478479743003845, "learning_rate": 3.0353912473646615e-05, "loss": 2.9438, "step": 52374 }, { "epoch": 2.57, "grad_norm": 0.7435922026634216, "learning_rate": 3.0347164411650126e-05, "loss": 2.8484, "step": 52375 }, { "epoch": 2.57, "grad_norm": 0.7409965991973877, "learning_rate": 3.0340417059867396e-05, "loss": 2.9877, "step": 52376 }, { "epoch": 2.57, "grad_norm": 0.748325526714325, "learning_rate": 3.0333670418316103e-05, "loss": 2.9524, "step": 52377 }, { "epoch": 2.57, "grad_norm": 0.7367417216300964, "learning_rate": 3.0326924487014005e-05, "loss": 2.951, "step": 52378 }, { "epoch": 2.57, "grad_norm": 0.753197431564331, "learning_rate": 3.032017926597885e-05, "loss": 3.1165, "step": 52379 }, { "epoch": 2.57, "grad_norm": 0.7246016263961792, "learning_rate": 3.031343475522846e-05, "loss": 2.8368, "step": 52380 }, { "epoch": 2.57, "grad_norm": 0.7062520980834961, "learning_rate": 3.0306690954780622e-05, "loss": 2.9998, "step": 52381 }, { "epoch": 2.57, "grad_norm": 0.73405522108078, "learning_rate": 3.0299947864652984e-05, "loss": 2.8911, "step": 52382 }, { "epoch": 2.57, "grad_norm": 0.7310712337493896, "learning_rate": 3.0293205484863435e-05, "loss": 2.8359, "step": 52383 }, { "epoch": 2.57, "grad_norm": 0.759092390537262, "learning_rate": 3.0286463815429695e-05, "loss": 3.0487, "step": 52384 }, { "epoch": 2.57, "grad_norm": 0.7694963812828064, "learning_rate": 3.0279722856369447e-05, "loss": 2.7858, "step": 52385 }, { "epoch": 2.57, "grad_norm": 0.7476155161857605, "learning_rate": 3.0272982607700546e-05, "loss": 3.0141, "step": 52386 }, { "epoch": 2.57, "grad_norm": 0.7267019152641296, "learning_rate": 3.0266243069440645e-05, "loss": 2.9373, "step": 52387 }, { "epoch": 2.57, "grad_norm": 0.7420546412467957, "learning_rate": 3.0259504241607625e-05, "loss": 3.0561, "step": 52388 }, { "epoch": 2.57, "grad_norm": 0.7914209961891174, "learning_rate": 3.0252766124219076e-05, "loss": 3.0165, "step": 52389 }, { "epoch": 2.57, "grad_norm": 0.7075768113136292, "learning_rate": 3.024602871729288e-05, "loss": 3.1116, "step": 52390 }, { "epoch": 2.57, "grad_norm": 0.7824947237968445, "learning_rate": 3.023929202084676e-05, "loss": 2.9569, "step": 52391 }, { "epoch": 2.57, "grad_norm": 0.7449918985366821, "learning_rate": 3.0232556034898336e-05, "loss": 2.8483, "step": 52392 }, { "epoch": 2.57, "grad_norm": 0.7132568359375, "learning_rate": 3.0225820759465525e-05, "loss": 2.9501, "step": 52393 }, { "epoch": 2.57, "grad_norm": 0.8174427151679993, "learning_rate": 3.0219086194565913e-05, "loss": 2.925, "step": 52394 }, { "epoch": 2.57, "grad_norm": 0.7686358690261841, "learning_rate": 3.0212352340217283e-05, "loss": 2.7436, "step": 52395 }, { "epoch": 2.57, "grad_norm": 0.7603972554206848, "learning_rate": 3.020561919643749e-05, "loss": 2.8535, "step": 52396 }, { "epoch": 2.57, "grad_norm": 0.7688196301460266, "learning_rate": 3.0198886763244124e-05, "loss": 2.8584, "step": 52397 }, { "epoch": 2.57, "grad_norm": 0.7407582402229309, "learning_rate": 3.0192155040655e-05, "loss": 2.8672, "step": 52398 }, { "epoch": 2.57, "grad_norm": 0.7440164685249329, "learning_rate": 3.01854240286878e-05, "loss": 2.8893, "step": 52399 }, { "epoch": 2.57, "grad_norm": 0.768831193447113, "learning_rate": 3.0178693727360247e-05, "loss": 2.7461, "step": 52400 }, { "epoch": 2.57, "grad_norm": 0.7303265333175659, "learning_rate": 3.0171964136690098e-05, "loss": 2.874, "step": 52401 }, { "epoch": 2.57, "grad_norm": 0.7380855083465576, "learning_rate": 3.016523525669503e-05, "loss": 2.6449, "step": 52402 }, { "epoch": 2.57, "grad_norm": 0.7118068933486938, "learning_rate": 3.015850708739287e-05, "loss": 2.978, "step": 52403 }, { "epoch": 2.57, "grad_norm": 0.7120081782341003, "learning_rate": 3.0151779628801198e-05, "loss": 2.8128, "step": 52404 }, { "epoch": 2.57, "grad_norm": 0.7590805292129517, "learning_rate": 3.0145052880937903e-05, "loss": 2.8198, "step": 52405 }, { "epoch": 2.57, "grad_norm": 0.7395599484443665, "learning_rate": 3.013832684382057e-05, "loss": 2.7616, "step": 52406 }, { "epoch": 2.57, "grad_norm": 0.7489631772041321, "learning_rate": 3.0131601517466885e-05, "loss": 2.9875, "step": 52407 }, { "epoch": 2.57, "grad_norm": 0.7233737707138062, "learning_rate": 3.012487690189467e-05, "loss": 2.8304, "step": 52408 }, { "epoch": 2.57, "grad_norm": 0.808874785900116, "learning_rate": 3.011815299712157e-05, "loss": 2.8516, "step": 52409 }, { "epoch": 2.57, "grad_norm": 0.7949105501174927, "learning_rate": 3.0111429803165277e-05, "loss": 2.6781, "step": 52410 }, { "epoch": 2.57, "grad_norm": 0.7281270027160645, "learning_rate": 3.0104707320043643e-05, "loss": 2.8587, "step": 52411 }, { "epoch": 2.57, "grad_norm": 0.7180661559104919, "learning_rate": 3.0097985547774216e-05, "loss": 2.7965, "step": 52412 }, { "epoch": 2.57, "grad_norm": 0.7658478021621704, "learning_rate": 3.0091264486374788e-05, "loss": 2.6224, "step": 52413 }, { "epoch": 2.57, "grad_norm": 0.7765020132064819, "learning_rate": 3.0084544135862943e-05, "loss": 3.065, "step": 52414 }, { "epoch": 2.57, "grad_norm": 0.7255092263221741, "learning_rate": 3.0077824496256496e-05, "loss": 2.8975, "step": 52415 }, { "epoch": 2.57, "grad_norm": 0.7271882891654968, "learning_rate": 3.007110556757314e-05, "loss": 2.7939, "step": 52416 }, { "epoch": 2.57, "grad_norm": 0.7662795782089233, "learning_rate": 3.0064387349830486e-05, "loss": 2.9394, "step": 52417 }, { "epoch": 2.57, "grad_norm": 0.7720481753349304, "learning_rate": 3.005766984304636e-05, "loss": 3.0693, "step": 52418 }, { "epoch": 2.57, "grad_norm": 0.7715625762939453, "learning_rate": 3.005095304723831e-05, "loss": 2.8276, "step": 52419 }, { "epoch": 2.57, "grad_norm": 0.7382410764694214, "learning_rate": 3.0044236962424162e-05, "loss": 2.9642, "step": 52420 }, { "epoch": 2.57, "grad_norm": 0.7307043075561523, "learning_rate": 3.0037521588621527e-05, "loss": 2.7719, "step": 52421 }, { "epoch": 2.57, "grad_norm": 0.7969446182250977, "learning_rate": 3.0030806925848062e-05, "loss": 2.9994, "step": 52422 }, { "epoch": 2.57, "grad_norm": 0.7565550208091736, "learning_rate": 3.0024092974121517e-05, "loss": 2.9169, "step": 52423 }, { "epoch": 2.57, "grad_norm": 0.7802337408065796, "learning_rate": 3.0017379733459545e-05, "loss": 2.9471, "step": 52424 }, { "epoch": 2.57, "grad_norm": 0.7368726134300232, "learning_rate": 3.00106672038798e-05, "loss": 2.9638, "step": 52425 }, { "epoch": 2.57, "grad_norm": 0.7419968247413635, "learning_rate": 3.000395538540007e-05, "loss": 2.9303, "step": 52426 }, { "epoch": 2.57, "grad_norm": 0.7627372741699219, "learning_rate": 2.9997244278037967e-05, "loss": 3.1326, "step": 52427 }, { "epoch": 2.57, "grad_norm": 0.7172695994377136, "learning_rate": 2.999053388181115e-05, "loss": 2.9077, "step": 52428 }, { "epoch": 2.57, "grad_norm": 0.7625597715377808, "learning_rate": 2.9983824196737237e-05, "loss": 2.6087, "step": 52429 }, { "epoch": 2.57, "grad_norm": 0.7408584952354431, "learning_rate": 2.997711522283398e-05, "loss": 3.0352, "step": 52430 }, { "epoch": 2.57, "grad_norm": 0.7761721611022949, "learning_rate": 2.9970406960119096e-05, "loss": 2.7375, "step": 52431 }, { "epoch": 2.57, "grad_norm": 0.7523345351219177, "learning_rate": 2.996369940861011e-05, "loss": 3.0537, "step": 52432 }, { "epoch": 2.57, "grad_norm": 0.7538020610809326, "learning_rate": 2.9956992568324833e-05, "loss": 2.8131, "step": 52433 }, { "epoch": 2.57, "grad_norm": 0.7494543790817261, "learning_rate": 2.995028643928089e-05, "loss": 2.9724, "step": 52434 }, { "epoch": 2.57, "grad_norm": 0.8455016016960144, "learning_rate": 2.9943581021495833e-05, "loss": 2.8156, "step": 52435 }, { "epoch": 2.57, "grad_norm": 0.7785493731498718, "learning_rate": 2.9936876314987513e-05, "loss": 2.874, "step": 52436 }, { "epoch": 2.57, "grad_norm": 0.740427553653717, "learning_rate": 2.9930172319773383e-05, "loss": 2.8312, "step": 52437 }, { "epoch": 2.57, "grad_norm": 0.7532118558883667, "learning_rate": 2.9923469035871294e-05, "loss": 2.6936, "step": 52438 }, { "epoch": 2.57, "grad_norm": 0.7429023385047913, "learning_rate": 2.9916766463298736e-05, "loss": 2.7284, "step": 52439 }, { "epoch": 2.57, "grad_norm": 0.7911916971206665, "learning_rate": 2.9910064602073426e-05, "loss": 2.9946, "step": 52440 }, { "epoch": 2.57, "grad_norm": 0.7958114743232727, "learning_rate": 2.990336345221308e-05, "loss": 2.8705, "step": 52441 }, { "epoch": 2.57, "grad_norm": 0.7775363326072693, "learning_rate": 2.9896663013735324e-05, "loss": 2.8921, "step": 52442 }, { "epoch": 2.57, "grad_norm": 0.7311971187591553, "learning_rate": 2.9889963286657737e-05, "loss": 2.9018, "step": 52443 }, { "epoch": 2.57, "grad_norm": 0.7621795535087585, "learning_rate": 2.9883264270997977e-05, "loss": 3.0524, "step": 52444 }, { "epoch": 2.57, "grad_norm": 0.7280380725860596, "learning_rate": 2.987656596677376e-05, "loss": 2.9503, "step": 52445 }, { "epoch": 2.57, "grad_norm": 0.7853645086288452, "learning_rate": 2.9869868374002637e-05, "loss": 2.7609, "step": 52446 }, { "epoch": 2.57, "grad_norm": 0.7341321706771851, "learning_rate": 2.9863171492702263e-05, "loss": 2.7723, "step": 52447 }, { "epoch": 2.57, "grad_norm": 0.7179156541824341, "learning_rate": 2.9856475322890396e-05, "loss": 2.89, "step": 52448 }, { "epoch": 2.57, "grad_norm": 0.7569622993469238, "learning_rate": 2.9849779864584577e-05, "loss": 2.9834, "step": 52449 }, { "epoch": 2.57, "grad_norm": 0.7575824856758118, "learning_rate": 2.9843085117802433e-05, "loss": 2.9927, "step": 52450 }, { "epoch": 2.57, "grad_norm": 0.7787556052207947, "learning_rate": 2.983639108256155e-05, "loss": 2.9165, "step": 52451 }, { "epoch": 2.57, "grad_norm": 0.7417843341827393, "learning_rate": 2.9829697758879645e-05, "loss": 3.0732, "step": 52452 }, { "epoch": 2.57, "grad_norm": 0.756346583366394, "learning_rate": 2.9823005146774336e-05, "loss": 3.1536, "step": 52453 }, { "epoch": 2.57, "grad_norm": 0.7447629570960999, "learning_rate": 2.981631324626321e-05, "loss": 2.9677, "step": 52454 }, { "epoch": 2.57, "grad_norm": 0.7455997467041016, "learning_rate": 2.9809622057363992e-05, "loss": 2.8583, "step": 52455 }, { "epoch": 2.57, "grad_norm": 0.7502301335334778, "learning_rate": 2.9802931580094126e-05, "loss": 2.9962, "step": 52456 }, { "epoch": 2.57, "grad_norm": 0.7398320436477661, "learning_rate": 2.9796241814471432e-05, "loss": 3.0755, "step": 52457 }, { "epoch": 2.57, "grad_norm": 0.8331220149993896, "learning_rate": 2.9789552760513403e-05, "loss": 2.8654, "step": 52458 }, { "epoch": 2.57, "grad_norm": 0.711363673210144, "learning_rate": 2.978286441823765e-05, "loss": 3.0171, "step": 52459 }, { "epoch": 2.57, "grad_norm": 0.7725111246109009, "learning_rate": 2.97761767876619e-05, "loss": 2.9687, "step": 52460 }, { "epoch": 2.57, "grad_norm": 0.7222541570663452, "learning_rate": 2.9769489868803598e-05, "loss": 2.7685, "step": 52461 }, { "epoch": 2.57, "grad_norm": 0.7226037979125977, "learning_rate": 2.9762803661680502e-05, "loss": 2.8108, "step": 52462 }, { "epoch": 2.57, "grad_norm": 0.7615007162094116, "learning_rate": 2.9756118166310193e-05, "loss": 2.9072, "step": 52463 }, { "epoch": 2.57, "grad_norm": 0.7283841967582703, "learning_rate": 2.9749433382710265e-05, "loss": 2.8891, "step": 52464 }, { "epoch": 2.57, "grad_norm": 0.7138139605522156, "learning_rate": 2.9742749310898327e-05, "loss": 2.6945, "step": 52465 }, { "epoch": 2.57, "grad_norm": 0.7099862694740295, "learning_rate": 2.9736065950891908e-05, "loss": 2.9704, "step": 52466 }, { "epoch": 2.57, "grad_norm": 0.6979156136512756, "learning_rate": 2.9729383302708688e-05, "loss": 2.8211, "step": 52467 }, { "epoch": 2.57, "grad_norm": 0.734841525554657, "learning_rate": 2.972270136636632e-05, "loss": 3.0207, "step": 52468 }, { "epoch": 2.57, "grad_norm": 0.7860302329063416, "learning_rate": 2.971602014188229e-05, "loss": 2.7577, "step": 52469 }, { "epoch": 2.57, "grad_norm": 0.7355887293815613, "learning_rate": 2.9709339629274285e-05, "loss": 3.0183, "step": 52470 }, { "epoch": 2.57, "grad_norm": 0.737175464630127, "learning_rate": 2.970265982855986e-05, "loss": 2.9608, "step": 52471 }, { "epoch": 2.57, "grad_norm": 0.7097364664077759, "learning_rate": 2.9695980739756564e-05, "loss": 2.9803, "step": 52472 }, { "epoch": 2.57, "grad_norm": 0.7723646759986877, "learning_rate": 2.9689302362882084e-05, "loss": 2.8259, "step": 52473 }, { "epoch": 2.57, "grad_norm": 0.7352739572525024, "learning_rate": 2.9682624697953873e-05, "loss": 2.9392, "step": 52474 }, { "epoch": 2.57, "grad_norm": 0.7245145440101624, "learning_rate": 2.9675947744989716e-05, "loss": 2.7403, "step": 52475 }, { "epoch": 2.57, "grad_norm": 0.7283601760864258, "learning_rate": 2.9669271504006997e-05, "loss": 2.7105, "step": 52476 }, { "epoch": 2.57, "grad_norm": 0.7669225335121155, "learning_rate": 2.9662595975023406e-05, "loss": 2.8408, "step": 52477 }, { "epoch": 2.57, "grad_norm": 0.7420752048492432, "learning_rate": 2.965592115805656e-05, "loss": 2.8891, "step": 52478 }, { "epoch": 2.57, "grad_norm": 0.7681543827056885, "learning_rate": 2.964924705312398e-05, "loss": 2.959, "step": 52479 }, { "epoch": 2.57, "grad_norm": 0.8160313367843628, "learning_rate": 2.9642573660243252e-05, "loss": 3.0549, "step": 52480 }, { "epoch": 2.57, "grad_norm": 0.7001559138298035, "learning_rate": 2.9635900979431927e-05, "loss": 2.8459, "step": 52481 }, { "epoch": 2.57, "grad_norm": 0.7368727326393127, "learning_rate": 2.9629229010707588e-05, "loss": 2.7764, "step": 52482 }, { "epoch": 2.57, "grad_norm": 0.7512417435646057, "learning_rate": 2.962255775408786e-05, "loss": 2.8818, "step": 52483 }, { "epoch": 2.57, "grad_norm": 0.7434666752815247, "learning_rate": 2.9615887209590263e-05, "loss": 2.8293, "step": 52484 }, { "epoch": 2.57, "grad_norm": 0.7331339120864868, "learning_rate": 2.960921737723241e-05, "loss": 2.8604, "step": 52485 }, { "epoch": 2.57, "grad_norm": 0.7475157976150513, "learning_rate": 2.9602548257031854e-05, "loss": 2.8809, "step": 52486 }, { "epoch": 2.57, "grad_norm": 0.7474045753479004, "learning_rate": 2.959587984900609e-05, "loss": 3.0424, "step": 52487 }, { "epoch": 2.57, "grad_norm": 0.7134113311767578, "learning_rate": 2.9589212153172794e-05, "loss": 2.8692, "step": 52488 }, { "epoch": 2.57, "grad_norm": 0.7441120147705078, "learning_rate": 2.958254516954939e-05, "loss": 2.7825, "step": 52489 }, { "epoch": 2.57, "grad_norm": 0.7090118527412415, "learning_rate": 2.9575878898153594e-05, "loss": 2.9669, "step": 52490 }, { "epoch": 2.57, "grad_norm": 0.7831944227218628, "learning_rate": 2.956921333900286e-05, "loss": 2.934, "step": 52491 }, { "epoch": 2.57, "grad_norm": 0.7588694095611572, "learning_rate": 2.9562548492114814e-05, "loss": 2.9626, "step": 52492 }, { "epoch": 2.57, "grad_norm": 0.7268178462982178, "learning_rate": 2.955588435750693e-05, "loss": 2.8937, "step": 52493 }, { "epoch": 2.57, "grad_norm": 0.7331435084342957, "learning_rate": 2.954922093519677e-05, "loss": 2.879, "step": 52494 }, { "epoch": 2.57, "grad_norm": 0.7241976261138916, "learning_rate": 2.9542558225201984e-05, "loss": 2.7338, "step": 52495 }, { "epoch": 2.57, "grad_norm": 0.7318156957626343, "learning_rate": 2.953589622753999e-05, "loss": 2.8821, "step": 52496 }, { "epoch": 2.57, "grad_norm": 0.7937421202659607, "learning_rate": 2.9529234942228376e-05, "loss": 2.8645, "step": 52497 }, { "epoch": 2.57, "grad_norm": 0.7748422622680664, "learning_rate": 2.952257436928476e-05, "loss": 2.8881, "step": 52498 }, { "epoch": 2.57, "grad_norm": 0.7224046587944031, "learning_rate": 2.951591450872659e-05, "loss": 2.9246, "step": 52499 }, { "epoch": 2.57, "grad_norm": 0.7512742877006531, "learning_rate": 2.9509255360571493e-05, "loss": 2.9546, "step": 52500 }, { "epoch": 2.57, "grad_norm": 0.7295030951499939, "learning_rate": 2.9502596924836952e-05, "loss": 2.9228, "step": 52501 }, { "epoch": 2.57, "grad_norm": 0.7399193644523621, "learning_rate": 2.949593920154045e-05, "loss": 2.7575, "step": 52502 }, { "epoch": 2.57, "grad_norm": 0.7585479021072388, "learning_rate": 2.948928219069968e-05, "loss": 2.8837, "step": 52503 }, { "epoch": 2.57, "grad_norm": 0.8097811341285706, "learning_rate": 2.948262589233199e-05, "loss": 2.9717, "step": 52504 }, { "epoch": 2.57, "grad_norm": 0.7251771688461304, "learning_rate": 2.9475970306455065e-05, "loss": 2.9217, "step": 52505 }, { "epoch": 2.57, "grad_norm": 0.7347126603126526, "learning_rate": 2.9469315433086327e-05, "loss": 2.8441, "step": 52506 }, { "epoch": 2.57, "grad_norm": 0.7708790302276611, "learning_rate": 2.9462661272243394e-05, "loss": 2.7953, "step": 52507 }, { "epoch": 2.57, "grad_norm": 0.7497082948684692, "learning_rate": 2.9456007823943716e-05, "loss": 2.8825, "step": 52508 }, { "epoch": 2.57, "grad_norm": 0.7288712859153748, "learning_rate": 2.9449355088204817e-05, "loss": 2.8716, "step": 52509 }, { "epoch": 2.57, "grad_norm": 0.745967447757721, "learning_rate": 2.9442703065044314e-05, "loss": 2.9418, "step": 52510 }, { "epoch": 2.57, "grad_norm": 0.7650946378707886, "learning_rate": 2.9436051754479595e-05, "loss": 3.0354, "step": 52511 }, { "epoch": 2.57, "grad_norm": 0.7825707793235779, "learning_rate": 2.9429401156528242e-05, "loss": 2.9016, "step": 52512 }, { "epoch": 2.57, "grad_norm": 0.775109052658081, "learning_rate": 2.9422751271207845e-05, "loss": 2.8788, "step": 52513 }, { "epoch": 2.57, "grad_norm": 0.7160129547119141, "learning_rate": 2.941610209853582e-05, "loss": 2.8551, "step": 52514 }, { "epoch": 2.57, "grad_norm": 0.7564792037010193, "learning_rate": 2.9409453638529755e-05, "loss": 2.9526, "step": 52515 }, { "epoch": 2.57, "grad_norm": 0.7617117166519165, "learning_rate": 2.9402805891207006e-05, "loss": 2.9431, "step": 52516 }, { "epoch": 2.57, "grad_norm": 0.7508881688117981, "learning_rate": 2.9396158856585216e-05, "loss": 2.9826, "step": 52517 }, { "epoch": 2.57, "grad_norm": 0.7413908839225769, "learning_rate": 2.9389512534681914e-05, "loss": 2.6746, "step": 52518 }, { "epoch": 2.57, "grad_norm": 0.749812126159668, "learning_rate": 2.938286692551448e-05, "loss": 2.9919, "step": 52519 }, { "epoch": 2.57, "grad_norm": 0.7722899317741394, "learning_rate": 2.937622202910057e-05, "loss": 3.0812, "step": 52520 }, { "epoch": 2.57, "grad_norm": 0.7824468016624451, "learning_rate": 2.936957784545757e-05, "loss": 2.9144, "step": 52521 }, { "epoch": 2.57, "grad_norm": 0.7449400424957275, "learning_rate": 2.936293437460303e-05, "loss": 3.1251, "step": 52522 }, { "epoch": 2.57, "grad_norm": 0.7934387922286987, "learning_rate": 2.9356291616554473e-05, "loss": 2.9064, "step": 52523 }, { "epoch": 2.57, "grad_norm": 0.7479174733161926, "learning_rate": 2.934964957132928e-05, "loss": 3.1097, "step": 52524 }, { "epoch": 2.57, "grad_norm": 0.7839178442955017, "learning_rate": 2.934300823894511e-05, "loss": 2.9528, "step": 52525 }, { "epoch": 2.57, "grad_norm": 0.7495297789573669, "learning_rate": 2.9336367619419276e-05, "loss": 2.9153, "step": 52526 }, { "epoch": 2.57, "grad_norm": 0.7545944452285767, "learning_rate": 2.9329727712769435e-05, "loss": 2.9136, "step": 52527 }, { "epoch": 2.57, "grad_norm": 0.7327878475189209, "learning_rate": 2.9323088519012938e-05, "loss": 3.0669, "step": 52528 }, { "epoch": 2.57, "grad_norm": 0.7432766556739807, "learning_rate": 2.9316450038167372e-05, "loss": 3.0313, "step": 52529 }, { "epoch": 2.57, "grad_norm": 0.7075245380401611, "learning_rate": 2.930981227025022e-05, "loss": 2.9219, "step": 52530 }, { "epoch": 2.57, "grad_norm": 0.740532398223877, "learning_rate": 2.930317521527884e-05, "loss": 2.7855, "step": 52531 }, { "epoch": 2.57, "grad_norm": 0.7468576431274414, "learning_rate": 2.9296538873270847e-05, "loss": 2.8628, "step": 52532 }, { "epoch": 2.57, "grad_norm": 0.7329097390174866, "learning_rate": 2.9289903244243662e-05, "loss": 2.8902, "step": 52533 }, { "epoch": 2.57, "grad_norm": 0.7266642451286316, "learning_rate": 2.9283268328214737e-05, "loss": 3.0384, "step": 52534 }, { "epoch": 2.57, "grad_norm": 0.7914463877677917, "learning_rate": 2.9276634125201627e-05, "loss": 3.044, "step": 52535 }, { "epoch": 2.57, "grad_norm": 0.7478669285774231, "learning_rate": 2.927000063522178e-05, "loss": 2.7363, "step": 52536 }, { "epoch": 2.57, "grad_norm": 0.7697821855545044, "learning_rate": 2.9263367858292653e-05, "loss": 2.6884, "step": 52537 }, { "epoch": 2.57, "grad_norm": 0.7177620530128479, "learning_rate": 2.9256735794431662e-05, "loss": 3.0663, "step": 52538 }, { "epoch": 2.57, "grad_norm": 0.7495757937431335, "learning_rate": 2.9250104443656295e-05, "loss": 2.8574, "step": 52539 }, { "epoch": 2.57, "grad_norm": 0.7898780703544617, "learning_rate": 2.924347380598414e-05, "loss": 3.1215, "step": 52540 }, { "epoch": 2.57, "grad_norm": 0.7186577320098877, "learning_rate": 2.9236843881432482e-05, "loss": 2.8895, "step": 52541 }, { "epoch": 2.57, "grad_norm": 0.7339335680007935, "learning_rate": 2.9230214670018936e-05, "loss": 3.0893, "step": 52542 }, { "epoch": 2.58, "grad_norm": 0.7203998565673828, "learning_rate": 2.9223586171760827e-05, "loss": 2.7479, "step": 52543 }, { "epoch": 2.58, "grad_norm": 0.7361055612564087, "learning_rate": 2.9216958386675738e-05, "loss": 2.9828, "step": 52544 }, { "epoch": 2.58, "grad_norm": 0.7605053186416626, "learning_rate": 2.9210331314781088e-05, "loss": 2.8262, "step": 52545 }, { "epoch": 2.58, "grad_norm": 0.7568164467811584, "learning_rate": 2.920370495609423e-05, "loss": 2.8678, "step": 52546 }, { "epoch": 2.58, "grad_norm": 0.703392744064331, "learning_rate": 2.9197079310632787e-05, "loss": 2.7076, "step": 52547 }, { "epoch": 2.58, "grad_norm": 0.7546159625053406, "learning_rate": 2.9190454378414007e-05, "loss": 3.0159, "step": 52548 }, { "epoch": 2.58, "grad_norm": 0.7149426937103271, "learning_rate": 2.9183830159455513e-05, "loss": 2.7881, "step": 52549 }, { "epoch": 2.58, "grad_norm": 0.7450437545776367, "learning_rate": 2.9177206653774722e-05, "loss": 2.8648, "step": 52550 }, { "epoch": 2.58, "grad_norm": 0.7561696171760559, "learning_rate": 2.917058386138902e-05, "loss": 2.7633, "step": 52551 }, { "epoch": 2.58, "grad_norm": 0.7118037939071655, "learning_rate": 2.9163961782315924e-05, "loss": 2.9854, "step": 52552 }, { "epoch": 2.58, "grad_norm": 0.7305014133453369, "learning_rate": 2.9157340416572727e-05, "loss": 2.6679, "step": 52553 }, { "epoch": 2.58, "grad_norm": 0.7449045181274414, "learning_rate": 2.915071976417701e-05, "loss": 2.8047, "step": 52554 }, { "epoch": 2.58, "grad_norm": 0.7418058514595032, "learning_rate": 2.9144099825146194e-05, "loss": 2.8945, "step": 52555 }, { "epoch": 2.58, "grad_norm": 0.7265969514846802, "learning_rate": 2.9137480599497664e-05, "loss": 3.0225, "step": 52556 }, { "epoch": 2.58, "grad_norm": 0.7401600480079651, "learning_rate": 2.913086208724894e-05, "loss": 2.9199, "step": 52557 }, { "epoch": 2.58, "grad_norm": 0.7653848528862, "learning_rate": 2.912424428841731e-05, "loss": 2.897, "step": 52558 }, { "epoch": 2.58, "grad_norm": 0.7716215252876282, "learning_rate": 2.9117627203020354e-05, "loss": 3.0183, "step": 52559 }, { "epoch": 2.58, "grad_norm": 0.7675553560256958, "learning_rate": 2.9111010831075467e-05, "loss": 2.9278, "step": 52560 }, { "epoch": 2.58, "grad_norm": 0.8107246160507202, "learning_rate": 2.910439517259996e-05, "loss": 2.7924, "step": 52561 }, { "epoch": 2.58, "grad_norm": 0.7437394261360168, "learning_rate": 2.909778022761139e-05, "loss": 2.9086, "step": 52562 }, { "epoch": 2.58, "grad_norm": 0.7718828320503235, "learning_rate": 2.909116599612711e-05, "loss": 2.9937, "step": 52563 }, { "epoch": 2.58, "grad_norm": 0.7451728582382202, "learning_rate": 2.9084552478164537e-05, "loss": 2.7001, "step": 52564 }, { "epoch": 2.58, "grad_norm": 0.7395138144493103, "learning_rate": 2.9077939673741158e-05, "loss": 3.1498, "step": 52565 }, { "epoch": 2.58, "grad_norm": 0.7220340967178345, "learning_rate": 2.907132758287436e-05, "loss": 3.1024, "step": 52566 }, { "epoch": 2.58, "grad_norm": 0.7452659010887146, "learning_rate": 2.9064716205581527e-05, "loss": 3.0528, "step": 52567 }, { "epoch": 2.58, "grad_norm": 0.7208335995674133, "learning_rate": 2.905810554188005e-05, "loss": 2.9171, "step": 52568 }, { "epoch": 2.58, "grad_norm": 0.7047619223594666, "learning_rate": 2.9051495591787376e-05, "loss": 2.9405, "step": 52569 }, { "epoch": 2.58, "grad_norm": 0.7219128608703613, "learning_rate": 2.9044886355320996e-05, "loss": 2.9738, "step": 52570 }, { "epoch": 2.58, "grad_norm": 0.7547860145568848, "learning_rate": 2.9038277832498157e-05, "loss": 2.9313, "step": 52571 }, { "epoch": 2.58, "grad_norm": 0.7108772397041321, "learning_rate": 2.9031670023336418e-05, "loss": 3.0976, "step": 52572 }, { "epoch": 2.58, "grad_norm": 0.7670025825500488, "learning_rate": 2.9025062927853094e-05, "loss": 2.9123, "step": 52573 }, { "epoch": 2.58, "grad_norm": 0.7210864424705505, "learning_rate": 2.901845654606554e-05, "loss": 2.8945, "step": 52574 }, { "epoch": 2.58, "grad_norm": 0.7419854998588562, "learning_rate": 2.9011850877991306e-05, "loss": 3.1037, "step": 52575 }, { "epoch": 2.58, "grad_norm": 0.7315715551376343, "learning_rate": 2.9005245923647614e-05, "loss": 2.96, "step": 52576 }, { "epoch": 2.58, "grad_norm": 0.7764679789543152, "learning_rate": 2.8998641683052047e-05, "loss": 2.8197, "step": 52577 }, { "epoch": 2.58, "grad_norm": 0.7059036493301392, "learning_rate": 2.899203815622183e-05, "loss": 2.9622, "step": 52578 }, { "epoch": 2.58, "grad_norm": 0.8265849947929382, "learning_rate": 2.8985435343174414e-05, "loss": 2.9801, "step": 52579 }, { "epoch": 2.58, "grad_norm": 0.8718390464782715, "learning_rate": 2.897883324392728e-05, "loss": 2.6337, "step": 52580 }, { "epoch": 2.58, "grad_norm": 0.7311578989028931, "learning_rate": 2.897223185849772e-05, "loss": 2.6949, "step": 52581 }, { "epoch": 2.58, "grad_norm": 0.7237415909767151, "learning_rate": 2.896563118690315e-05, "loss": 2.9607, "step": 52582 }, { "epoch": 2.58, "grad_norm": 0.730317234992981, "learning_rate": 2.895903122916089e-05, "loss": 2.8626, "step": 52583 }, { "epoch": 2.58, "grad_norm": 0.7898061871528625, "learning_rate": 2.895243198528836e-05, "loss": 2.8689, "step": 52584 }, { "epoch": 2.58, "grad_norm": 0.7148504853248596, "learning_rate": 2.8945833455303045e-05, "loss": 2.8797, "step": 52585 }, { "epoch": 2.58, "grad_norm": 0.79593425989151, "learning_rate": 2.893923563922217e-05, "loss": 2.9757, "step": 52586 }, { "epoch": 2.58, "grad_norm": 0.7755904793739319, "learning_rate": 2.8932638537063245e-05, "loss": 2.9006, "step": 52587 }, { "epoch": 2.58, "grad_norm": 0.7300575375556946, "learning_rate": 2.8926042148843566e-05, "loss": 2.7823, "step": 52588 }, { "epoch": 2.58, "grad_norm": 0.7470569014549255, "learning_rate": 2.8919446474580477e-05, "loss": 2.8235, "step": 52589 }, { "epoch": 2.58, "grad_norm": 0.7756972908973694, "learning_rate": 2.8912851514291468e-05, "loss": 2.8028, "step": 52590 }, { "epoch": 2.58, "grad_norm": 0.7556537389755249, "learning_rate": 2.890625726799376e-05, "loss": 2.7269, "step": 52591 }, { "epoch": 2.58, "grad_norm": 0.7390879392623901, "learning_rate": 2.889966373570487e-05, "loss": 2.8715, "step": 52592 }, { "epoch": 2.58, "grad_norm": 0.7211794853210449, "learning_rate": 2.889307091744202e-05, "loss": 2.8193, "step": 52593 }, { "epoch": 2.58, "grad_norm": 0.7417266964912415, "learning_rate": 2.888647881322269e-05, "loss": 2.8811, "step": 52594 }, { "epoch": 2.58, "grad_norm": 0.738362193107605, "learning_rate": 2.887988742306424e-05, "loss": 2.9706, "step": 52595 }, { "epoch": 2.58, "grad_norm": 0.7565693855285645, "learning_rate": 2.8873296746983888e-05, "loss": 2.7883, "step": 52596 }, { "epoch": 2.58, "grad_norm": 0.786318838596344, "learning_rate": 2.8866706784999184e-05, "loss": 2.8329, "step": 52597 }, { "epoch": 2.58, "grad_norm": 0.7326686978340149, "learning_rate": 2.8860117537127314e-05, "loss": 2.9177, "step": 52598 }, { "epoch": 2.58, "grad_norm": 0.741766095161438, "learning_rate": 2.8853529003385767e-05, "loss": 2.8992, "step": 52599 }, { "epoch": 2.58, "grad_norm": 0.7155507206916809, "learning_rate": 2.8846941183791793e-05, "loss": 2.9925, "step": 52600 }, { "epoch": 2.58, "grad_norm": 0.7600530385971069, "learning_rate": 2.8840354078362782e-05, "loss": 2.8607, "step": 52601 }, { "epoch": 2.58, "grad_norm": 0.7748110294342041, "learning_rate": 2.8833767687116184e-05, "loss": 2.8176, "step": 52602 }, { "epoch": 2.58, "grad_norm": 0.7540781497955322, "learning_rate": 2.8827182010069217e-05, "loss": 2.8926, "step": 52603 }, { "epoch": 2.58, "grad_norm": 0.7902787327766418, "learning_rate": 2.882059704723927e-05, "loss": 3.0199, "step": 52604 }, { "epoch": 2.58, "grad_norm": 0.7752910852432251, "learning_rate": 2.8814012798643627e-05, "loss": 2.8814, "step": 52605 }, { "epoch": 2.58, "grad_norm": 0.7527629137039185, "learning_rate": 2.8807429264299708e-05, "loss": 2.6384, "step": 52606 }, { "epoch": 2.58, "grad_norm": 0.7904459834098816, "learning_rate": 2.8800846444224867e-05, "loss": 2.8519, "step": 52607 }, { "epoch": 2.58, "grad_norm": 0.6968057155609131, "learning_rate": 2.8794264338436325e-05, "loss": 2.8082, "step": 52608 }, { "epoch": 2.58, "grad_norm": 0.7696243524551392, "learning_rate": 2.8787682946951596e-05, "loss": 2.8339, "step": 52609 }, { "epoch": 2.58, "grad_norm": 0.8164247870445251, "learning_rate": 2.878110226978787e-05, "loss": 2.82, "step": 52610 }, { "epoch": 2.58, "grad_norm": 0.7112769484519958, "learning_rate": 2.87745223069625e-05, "loss": 2.7388, "step": 52611 }, { "epoch": 2.58, "grad_norm": 0.7222334146499634, "learning_rate": 2.876794305849287e-05, "loss": 2.8311, "step": 52612 }, { "epoch": 2.58, "grad_norm": 0.7716138362884521, "learning_rate": 2.8761364524396234e-05, "loss": 2.9988, "step": 52613 }, { "epoch": 2.58, "grad_norm": 0.6985487341880798, "learning_rate": 2.8754786704690012e-05, "loss": 2.7424, "step": 52614 }, { "epoch": 2.58, "grad_norm": 0.7328038811683655, "learning_rate": 2.874820959939146e-05, "loss": 2.8344, "step": 52615 }, { "epoch": 2.58, "grad_norm": 0.7214050889015198, "learning_rate": 2.8741633208517922e-05, "loss": 2.8049, "step": 52616 }, { "epoch": 2.58, "grad_norm": 0.7892204523086548, "learning_rate": 2.873505753208676e-05, "loss": 2.8323, "step": 52617 }, { "epoch": 2.58, "grad_norm": 0.743575930595398, "learning_rate": 2.8728482570115185e-05, "loss": 2.9289, "step": 52618 }, { "epoch": 2.58, "grad_norm": 0.7116771936416626, "learning_rate": 2.8721908322620625e-05, "loss": 2.827, "step": 52619 }, { "epoch": 2.58, "grad_norm": 0.7374192476272583, "learning_rate": 2.871533478962029e-05, "loss": 3.0038, "step": 52620 }, { "epoch": 2.58, "grad_norm": 0.7295014262199402, "learning_rate": 2.8708761971131577e-05, "loss": 2.6753, "step": 52621 }, { "epoch": 2.58, "grad_norm": 0.7758492827415466, "learning_rate": 2.87021898671718e-05, "loss": 3.0055, "step": 52622 }, { "epoch": 2.58, "grad_norm": 0.7198835015296936, "learning_rate": 2.869561847775821e-05, "loss": 2.8754, "step": 52623 }, { "epoch": 2.58, "grad_norm": 0.7312856316566467, "learning_rate": 2.8689047802908193e-05, "loss": 2.7471, "step": 52624 }, { "epoch": 2.58, "grad_norm": 0.7765367031097412, "learning_rate": 2.8682477842639007e-05, "loss": 2.952, "step": 52625 }, { "epoch": 2.58, "grad_norm": 0.7989129424095154, "learning_rate": 2.86759085969679e-05, "loss": 2.8156, "step": 52626 }, { "epoch": 2.58, "grad_norm": 0.7187029719352722, "learning_rate": 2.8669340065912295e-05, "loss": 3.0734, "step": 52627 }, { "epoch": 2.58, "grad_norm": 0.7583007216453552, "learning_rate": 2.8662772249489373e-05, "loss": 2.8671, "step": 52628 }, { "epoch": 2.58, "grad_norm": 0.7283254265785217, "learning_rate": 2.865620514771656e-05, "loss": 2.874, "step": 52629 }, { "epoch": 2.58, "grad_norm": 0.764652669429779, "learning_rate": 2.8649638760611004e-05, "loss": 2.9456, "step": 52630 }, { "epoch": 2.58, "grad_norm": 0.7280563116073608, "learning_rate": 2.8643073088190126e-05, "loss": 3.0906, "step": 52631 }, { "epoch": 2.58, "grad_norm": 0.7654474973678589, "learning_rate": 2.863650813047118e-05, "loss": 3.0266, "step": 52632 }, { "epoch": 2.58, "grad_norm": 0.7615987062454224, "learning_rate": 2.8629943887471418e-05, "loss": 2.9073, "step": 52633 }, { "epoch": 2.58, "grad_norm": 0.7482545375823975, "learning_rate": 2.8623380359208194e-05, "loss": 2.8876, "step": 52634 }, { "epoch": 2.58, "grad_norm": 0.7490363121032715, "learning_rate": 2.861681754569869e-05, "loss": 2.7844, "step": 52635 }, { "epoch": 2.58, "grad_norm": 0.7606791853904724, "learning_rate": 2.861025544696026e-05, "loss": 2.9984, "step": 52636 }, { "epoch": 2.58, "grad_norm": 0.7316489815711975, "learning_rate": 2.860369406301026e-05, "loss": 2.8326, "step": 52637 }, { "epoch": 2.58, "grad_norm": 0.7653703093528748, "learning_rate": 2.859713339386588e-05, "loss": 2.8809, "step": 52638 }, { "epoch": 2.58, "grad_norm": 0.7543324828147888, "learning_rate": 2.8590573439544427e-05, "loss": 2.7399, "step": 52639 }, { "epoch": 2.58, "grad_norm": 0.739680290222168, "learning_rate": 2.8584014200063132e-05, "loss": 2.6263, "step": 52640 }, { "epoch": 2.58, "grad_norm": 0.7400057315826416, "learning_rate": 2.8577455675439277e-05, "loss": 2.9609, "step": 52641 }, { "epoch": 2.58, "grad_norm": 0.7001802325248718, "learning_rate": 2.8570897865690247e-05, "loss": 2.8452, "step": 52642 }, { "epoch": 2.58, "grad_norm": 0.7247951030731201, "learning_rate": 2.856434077083316e-05, "loss": 2.8749, "step": 52643 }, { "epoch": 2.58, "grad_norm": 0.7461290955543518, "learning_rate": 2.855778439088544e-05, "loss": 2.8568, "step": 52644 }, { "epoch": 2.58, "grad_norm": 0.7752999067306519, "learning_rate": 2.8551228725864205e-05, "loss": 2.9557, "step": 52645 }, { "epoch": 2.58, "grad_norm": 0.7620311379432678, "learning_rate": 2.8544673775786842e-05, "loss": 2.9193, "step": 52646 }, { "epoch": 2.58, "grad_norm": 0.7228102087974548, "learning_rate": 2.853811954067057e-05, "loss": 2.7064, "step": 52647 }, { "epoch": 2.58, "grad_norm": 0.7312363386154175, "learning_rate": 2.8531566020532604e-05, "loss": 2.8944, "step": 52648 }, { "epoch": 2.58, "grad_norm": 0.7253252267837524, "learning_rate": 2.8525013215390268e-05, "loss": 3.0867, "step": 52649 }, { "epoch": 2.58, "grad_norm": 0.7094355225563049, "learning_rate": 2.8518461125260782e-05, "loss": 3.0171, "step": 52650 }, { "epoch": 2.58, "grad_norm": 0.7288451194763184, "learning_rate": 2.85119097501614e-05, "loss": 2.8495, "step": 52651 }, { "epoch": 2.58, "grad_norm": 0.7485461235046387, "learning_rate": 2.850535909010947e-05, "loss": 2.9792, "step": 52652 }, { "epoch": 2.58, "grad_norm": 0.7915205359458923, "learning_rate": 2.8498809145122182e-05, "loss": 2.8687, "step": 52653 }, { "epoch": 2.58, "grad_norm": 0.8087054491043091, "learning_rate": 2.8492259915216754e-05, "loss": 2.746, "step": 52654 }, { "epoch": 2.58, "grad_norm": 0.7752242088317871, "learning_rate": 2.848571140041044e-05, "loss": 2.8696, "step": 52655 }, { "epoch": 2.58, "grad_norm": 0.7321742177009583, "learning_rate": 2.8479163600720458e-05, "loss": 3.0578, "step": 52656 }, { "epoch": 2.58, "grad_norm": 0.7605643272399902, "learning_rate": 2.847261651616419e-05, "loss": 2.7562, "step": 52657 }, { "epoch": 2.58, "grad_norm": 0.7781768441200256, "learning_rate": 2.8466070146758734e-05, "loss": 2.7825, "step": 52658 }, { "epoch": 2.58, "grad_norm": 0.7603253722190857, "learning_rate": 2.845952449252147e-05, "loss": 2.8124, "step": 52659 }, { "epoch": 2.58, "grad_norm": 0.750426173210144, "learning_rate": 2.845297955346951e-05, "loss": 3.084, "step": 52660 }, { "epoch": 2.58, "grad_norm": 0.765870213508606, "learning_rate": 2.8446435329620122e-05, "loss": 2.8798, "step": 52661 }, { "epoch": 2.58, "grad_norm": 0.7285685539245605, "learning_rate": 2.843989182099061e-05, "loss": 2.8378, "step": 52662 }, { "epoch": 2.58, "grad_norm": 0.7301684617996216, "learning_rate": 2.8433349027598107e-05, "loss": 2.9406, "step": 52663 }, { "epoch": 2.58, "grad_norm": 0.7690022587776184, "learning_rate": 2.8426806949459956e-05, "loss": 2.8652, "step": 52664 }, { "epoch": 2.58, "grad_norm": 0.766257107257843, "learning_rate": 2.842026558659325e-05, "loss": 2.9153, "step": 52665 }, { "epoch": 2.58, "grad_norm": 0.7352449893951416, "learning_rate": 2.8413724939015336e-05, "loss": 3.1021, "step": 52666 }, { "epoch": 2.58, "grad_norm": 0.777466893196106, "learning_rate": 2.840718500674344e-05, "loss": 2.8008, "step": 52667 }, { "epoch": 2.58, "grad_norm": 0.8289501667022705, "learning_rate": 2.840064578979474e-05, "loss": 2.8083, "step": 52668 }, { "epoch": 2.58, "grad_norm": 0.7426512837409973, "learning_rate": 2.8394107288186496e-05, "loss": 2.7525, "step": 52669 }, { "epoch": 2.58, "grad_norm": 0.7094964981079102, "learning_rate": 2.8387569501935825e-05, "loss": 3.0038, "step": 52670 }, { "epoch": 2.58, "grad_norm": 0.7811385989189148, "learning_rate": 2.8381032431060013e-05, "loss": 2.8479, "step": 52671 }, { "epoch": 2.58, "grad_norm": 0.7416138052940369, "learning_rate": 2.837449607557635e-05, "loss": 3.1491, "step": 52672 }, { "epoch": 2.58, "grad_norm": 0.7839840650558472, "learning_rate": 2.8367960435501946e-05, "loss": 2.6479, "step": 52673 }, { "epoch": 2.58, "grad_norm": 0.78972327709198, "learning_rate": 2.8361425510854096e-05, "loss": 2.8504, "step": 52674 }, { "epoch": 2.58, "grad_norm": 0.7591714262962341, "learning_rate": 2.8354891301649985e-05, "loss": 3.0018, "step": 52675 }, { "epoch": 2.58, "grad_norm": 0.7825368046760559, "learning_rate": 2.8348357807906764e-05, "loss": 2.7301, "step": 52676 }, { "epoch": 2.58, "grad_norm": 0.727197527885437, "learning_rate": 2.8341825029641717e-05, "loss": 2.9514, "step": 52677 }, { "epoch": 2.58, "grad_norm": 0.7893258333206177, "learning_rate": 2.8335292966872003e-05, "loss": 2.8807, "step": 52678 }, { "epoch": 2.58, "grad_norm": 0.7999505996704102, "learning_rate": 2.832876161961487e-05, "loss": 2.8625, "step": 52679 }, { "epoch": 2.58, "grad_norm": 0.7148997187614441, "learning_rate": 2.832223098788744e-05, "loss": 2.9241, "step": 52680 }, { "epoch": 2.58, "grad_norm": 0.7615914344787598, "learning_rate": 2.831570107170703e-05, "loss": 2.8288, "step": 52681 }, { "epoch": 2.58, "grad_norm": 0.7838194370269775, "learning_rate": 2.830917187109073e-05, "loss": 2.7449, "step": 52682 }, { "epoch": 2.58, "grad_norm": 0.7049852609634399, "learning_rate": 2.8302643386055855e-05, "loss": 2.8416, "step": 52683 }, { "epoch": 2.58, "grad_norm": 0.7759522199630737, "learning_rate": 2.8296115616619497e-05, "loss": 2.8045, "step": 52684 }, { "epoch": 2.58, "grad_norm": 0.726335883140564, "learning_rate": 2.8289588562798836e-05, "loss": 2.8608, "step": 52685 }, { "epoch": 2.58, "grad_norm": 0.7436689138412476, "learning_rate": 2.8283062224611165e-05, "loss": 2.8539, "step": 52686 }, { "epoch": 2.58, "grad_norm": 0.7301530838012695, "learning_rate": 2.827653660207353e-05, "loss": 2.9732, "step": 52687 }, { "epoch": 2.58, "grad_norm": 0.7957533001899719, "learning_rate": 2.8270011695203222e-05, "loss": 2.867, "step": 52688 }, { "epoch": 2.58, "grad_norm": 0.6973696947097778, "learning_rate": 2.826348750401749e-05, "loss": 2.9357, "step": 52689 }, { "epoch": 2.58, "grad_norm": 0.8173954486846924, "learning_rate": 2.8256964028533425e-05, "loss": 2.7172, "step": 52690 }, { "epoch": 2.58, "grad_norm": 0.7530957460403442, "learning_rate": 2.8250441268768174e-05, "loss": 3.071, "step": 52691 }, { "epoch": 2.58, "grad_norm": 0.7341398596763611, "learning_rate": 2.8243919224738964e-05, "loss": 2.9068, "step": 52692 }, { "epoch": 2.58, "grad_norm": 0.7384122014045715, "learning_rate": 2.8237397896462944e-05, "loss": 2.7069, "step": 52693 }, { "epoch": 2.58, "grad_norm": 0.7298815250396729, "learning_rate": 2.8230877283957365e-05, "loss": 3.116, "step": 52694 }, { "epoch": 2.58, "grad_norm": 0.7248790860176086, "learning_rate": 2.8224357387239316e-05, "loss": 2.8794, "step": 52695 }, { "epoch": 2.58, "grad_norm": 0.739068329334259, "learning_rate": 2.821783820632605e-05, "loss": 3.0346, "step": 52696 }, { "epoch": 2.58, "grad_norm": 0.7384850382804871, "learning_rate": 2.8211319741234684e-05, "loss": 2.8443, "step": 52697 }, { "epoch": 2.58, "grad_norm": 0.7337626218795776, "learning_rate": 2.820480199198234e-05, "loss": 2.7513, "step": 52698 }, { "epoch": 2.58, "grad_norm": 0.7873049378395081, "learning_rate": 2.8198284958586302e-05, "loss": 2.8253, "step": 52699 }, { "epoch": 2.58, "grad_norm": 0.7321355938911438, "learning_rate": 2.8191768641063628e-05, "loss": 2.7804, "step": 52700 }, { "epoch": 2.58, "grad_norm": 0.7380569577217102, "learning_rate": 2.8185253039431566e-05, "loss": 2.9944, "step": 52701 }, { "epoch": 2.58, "grad_norm": 0.8345293402671814, "learning_rate": 2.8178738153707202e-05, "loss": 3.1125, "step": 52702 }, { "epoch": 2.58, "grad_norm": 0.779485821723938, "learning_rate": 2.8172223983907692e-05, "loss": 2.6968, "step": 52703 }, { "epoch": 2.58, "grad_norm": 0.7306099534034729, "learning_rate": 2.8165710530050322e-05, "loss": 2.9204, "step": 52704 }, { "epoch": 2.58, "grad_norm": 0.8526567816734314, "learning_rate": 2.8159197792152112e-05, "loss": 2.9036, "step": 52705 }, { "epoch": 2.58, "grad_norm": 0.7224339246749878, "learning_rate": 2.815268577023031e-05, "loss": 3.0588, "step": 52706 }, { "epoch": 2.58, "grad_norm": 0.7223872542381287, "learning_rate": 2.8146174464301907e-05, "loss": 3.0034, "step": 52707 }, { "epoch": 2.58, "grad_norm": 0.7458459138870239, "learning_rate": 2.813966387438419e-05, "loss": 3.0477, "step": 52708 }, { "epoch": 2.58, "grad_norm": 0.7503674626350403, "learning_rate": 2.813315400049434e-05, "loss": 2.8695, "step": 52709 }, { "epoch": 2.58, "grad_norm": 0.7620118260383606, "learning_rate": 2.8126644842649382e-05, "loss": 2.8405, "step": 52710 }, { "epoch": 2.58, "grad_norm": 0.7874787449836731, "learning_rate": 2.8120136400866566e-05, "loss": 2.8848, "step": 52711 }, { "epoch": 2.58, "grad_norm": 0.7471845746040344, "learning_rate": 2.811362867516298e-05, "loss": 2.8329, "step": 52712 }, { "epoch": 2.58, "grad_norm": 0.8011273741722107, "learning_rate": 2.8107121665555743e-05, "loss": 2.7753, "step": 52713 }, { "epoch": 2.58, "grad_norm": 0.6914375424385071, "learning_rate": 2.8100615372062042e-05, "loss": 2.9165, "step": 52714 }, { "epoch": 2.58, "grad_norm": 0.7664171457290649, "learning_rate": 2.809410979469896e-05, "loss": 2.8735, "step": 52715 }, { "epoch": 2.58, "grad_norm": 0.7507244348526001, "learning_rate": 2.808760493348372e-05, "loss": 2.8179, "step": 52716 }, { "epoch": 2.58, "grad_norm": 0.7342262864112854, "learning_rate": 2.808110078843331e-05, "loss": 2.9128, "step": 52717 }, { "epoch": 2.58, "grad_norm": 0.7676442265510559, "learning_rate": 2.8074597359565045e-05, "loss": 2.9506, "step": 52718 }, { "epoch": 2.58, "grad_norm": 0.7457807064056396, "learning_rate": 2.8068094646895945e-05, "loss": 3.0169, "step": 52719 }, { "epoch": 2.58, "grad_norm": 0.700971782207489, "learning_rate": 2.8061592650443065e-05, "loss": 2.9474, "step": 52720 }, { "epoch": 2.58, "grad_norm": 0.7742531895637512, "learning_rate": 2.8055091370223692e-05, "loss": 2.8221, "step": 52721 }, { "epoch": 2.58, "grad_norm": 0.7480669617652893, "learning_rate": 2.8048590806254813e-05, "loss": 2.7798, "step": 52722 }, { "epoch": 2.58, "grad_norm": 0.7456562519073486, "learning_rate": 2.804209095855361e-05, "loss": 2.9058, "step": 52723 }, { "epoch": 2.58, "grad_norm": 0.865938127040863, "learning_rate": 2.803559182713727e-05, "loss": 2.8553, "step": 52724 }, { "epoch": 2.58, "grad_norm": 0.7843738794326782, "learning_rate": 2.8029093412022785e-05, "loss": 2.737, "step": 52725 }, { "epoch": 2.58, "grad_norm": 0.7294140458106995, "learning_rate": 2.802259571322737e-05, "loss": 2.752, "step": 52726 }, { "epoch": 2.58, "grad_norm": 0.7814497351646423, "learning_rate": 2.801609873076808e-05, "loss": 3.033, "step": 52727 }, { "epoch": 2.58, "grad_norm": 0.7317684888839722, "learning_rate": 2.8009602464661996e-05, "loss": 2.8929, "step": 52728 }, { "epoch": 2.58, "grad_norm": 0.7592427134513855, "learning_rate": 2.8003106914926344e-05, "loss": 2.9463, "step": 52729 }, { "epoch": 2.58, "grad_norm": 1.053802728652954, "learning_rate": 2.7996612081578074e-05, "loss": 2.9645, "step": 52730 }, { "epoch": 2.58, "grad_norm": 0.7302281856536865, "learning_rate": 2.7990117964634475e-05, "loss": 2.8003, "step": 52731 }, { "epoch": 2.58, "grad_norm": 0.7234871983528137, "learning_rate": 2.7983624564112494e-05, "loss": 2.8953, "step": 52732 }, { "epoch": 2.58, "grad_norm": 0.7112483382225037, "learning_rate": 2.7977131880029324e-05, "loss": 2.7117, "step": 52733 }, { "epoch": 2.58, "grad_norm": 0.7166719436645508, "learning_rate": 2.7970639912402047e-05, "loss": 2.9582, "step": 52734 }, { "epoch": 2.58, "grad_norm": 0.7226663827896118, "learning_rate": 2.796414866124769e-05, "loss": 2.8281, "step": 52735 }, { "epoch": 2.58, "grad_norm": 0.7304128408432007, "learning_rate": 2.7957658126583494e-05, "loss": 3.0941, "step": 52736 }, { "epoch": 2.58, "grad_norm": 0.7455897331237793, "learning_rate": 2.7951168308426387e-05, "loss": 2.9331, "step": 52737 }, { "epoch": 2.58, "grad_norm": 0.7342404127120972, "learning_rate": 2.7944679206793553e-05, "loss": 2.9336, "step": 52738 }, { "epoch": 2.58, "grad_norm": 0.7524769902229309, "learning_rate": 2.7938190821702145e-05, "loss": 2.8062, "step": 52739 }, { "epoch": 2.58, "grad_norm": 0.7616720199584961, "learning_rate": 2.7931703153169182e-05, "loss": 2.9295, "step": 52740 }, { "epoch": 2.58, "grad_norm": 0.7344446182250977, "learning_rate": 2.792521620121172e-05, "loss": 2.8753, "step": 52741 }, { "epoch": 2.58, "grad_norm": 0.7952720522880554, "learning_rate": 2.7918729965846876e-05, "loss": 2.8517, "step": 52742 }, { "epoch": 2.58, "grad_norm": 0.7229446768760681, "learning_rate": 2.79122444470917e-05, "loss": 2.7266, "step": 52743 }, { "epoch": 2.58, "grad_norm": 0.751518964767456, "learning_rate": 2.7905759644963355e-05, "loss": 2.9713, "step": 52744 }, { "epoch": 2.58, "grad_norm": 0.7525866627693176, "learning_rate": 2.7899275559478818e-05, "loss": 2.9393, "step": 52745 }, { "epoch": 2.58, "grad_norm": 0.752153217792511, "learning_rate": 2.7892792190655312e-05, "loss": 2.991, "step": 52746 }, { "epoch": 2.59, "grad_norm": 0.7784674167633057, "learning_rate": 2.788630953850972e-05, "loss": 2.9634, "step": 52747 }, { "epoch": 2.59, "grad_norm": 0.7399433255195618, "learning_rate": 2.78798276030593e-05, "loss": 2.9404, "step": 52748 }, { "epoch": 2.59, "grad_norm": 0.7343171834945679, "learning_rate": 2.787334638432104e-05, "loss": 3.0185, "step": 52749 }, { "epoch": 2.59, "grad_norm": 0.7647814750671387, "learning_rate": 2.7866865882311985e-05, "loss": 3.0713, "step": 52750 }, { "epoch": 2.59, "grad_norm": 0.7639553546905518, "learning_rate": 2.7860386097049257e-05, "loss": 2.974, "step": 52751 }, { "epoch": 2.59, "grad_norm": 0.7180127501487732, "learning_rate": 2.785390702854985e-05, "loss": 2.7201, "step": 52752 }, { "epoch": 2.59, "grad_norm": 0.7458502054214478, "learning_rate": 2.7847428676830876e-05, "loss": 2.8863, "step": 52753 }, { "epoch": 2.59, "grad_norm": 0.7427824139595032, "learning_rate": 2.7840951041909455e-05, "loss": 2.755, "step": 52754 }, { "epoch": 2.59, "grad_norm": 0.7325022220611572, "learning_rate": 2.7834474123802574e-05, "loss": 3.1051, "step": 52755 }, { "epoch": 2.59, "grad_norm": 0.8776165246963501, "learning_rate": 2.7827997922527324e-05, "loss": 2.9282, "step": 52756 }, { "epoch": 2.59, "grad_norm": 0.7489161491394043, "learning_rate": 2.7821522438100685e-05, "loss": 3.0858, "step": 52757 }, { "epoch": 2.59, "grad_norm": 0.7696153521537781, "learning_rate": 2.781504767053985e-05, "loss": 2.8448, "step": 52758 }, { "epoch": 2.59, "grad_norm": 0.7301738858222961, "learning_rate": 2.780857361986173e-05, "loss": 3.0967, "step": 52759 }, { "epoch": 2.59, "grad_norm": 0.7664462924003601, "learning_rate": 2.780210028608345e-05, "loss": 2.7462, "step": 52760 }, { "epoch": 2.59, "grad_norm": 0.7320300340652466, "learning_rate": 2.77956276692221e-05, "loss": 3.0839, "step": 52761 }, { "epoch": 2.59, "grad_norm": 0.7702510356903076, "learning_rate": 2.7789155769294657e-05, "loss": 3.0114, "step": 52762 }, { "epoch": 2.59, "grad_norm": 0.7481235861778259, "learning_rate": 2.7782684586318216e-05, "loss": 2.9768, "step": 52763 }, { "epoch": 2.59, "grad_norm": 0.779035747051239, "learning_rate": 2.7776214120309726e-05, "loss": 3.0398, "step": 52764 }, { "epoch": 2.59, "grad_norm": 0.7233691215515137, "learning_rate": 2.7769744371286308e-05, "loss": 2.8899, "step": 52765 }, { "epoch": 2.59, "grad_norm": 0.7301437854766846, "learning_rate": 2.7763275339265044e-05, "loss": 2.8502, "step": 52766 }, { "epoch": 2.59, "grad_norm": 0.7371978759765625, "learning_rate": 2.7756807024262863e-05, "loss": 2.8388, "step": 52767 }, { "epoch": 2.59, "grad_norm": 0.7594659328460693, "learning_rate": 2.7750339426296908e-05, "loss": 2.7252, "step": 52768 }, { "epoch": 2.59, "grad_norm": 0.7644268870353699, "learning_rate": 2.774387254538414e-05, "loss": 2.8623, "step": 52769 }, { "epoch": 2.59, "grad_norm": 0.7400708794593811, "learning_rate": 2.7737406381541637e-05, "loss": 2.8575, "step": 52770 }, { "epoch": 2.59, "grad_norm": 0.7719500064849854, "learning_rate": 2.7730940934786426e-05, "loss": 2.852, "step": 52771 }, { "epoch": 2.59, "grad_norm": 0.7356804609298706, "learning_rate": 2.7724476205135425e-05, "loss": 2.996, "step": 52772 }, { "epoch": 2.59, "grad_norm": 0.7251750230789185, "learning_rate": 2.7718012192605854e-05, "loss": 2.897, "step": 52773 }, { "epoch": 2.59, "grad_norm": 0.7100973129272461, "learning_rate": 2.7711548897214562e-05, "loss": 2.9647, "step": 52774 }, { "epoch": 2.59, "grad_norm": 0.7529676556587219, "learning_rate": 2.770508631897864e-05, "loss": 3.0783, "step": 52775 }, { "epoch": 2.59, "grad_norm": 0.7607330083847046, "learning_rate": 2.7698624457915175e-05, "loss": 2.8693, "step": 52776 }, { "epoch": 2.59, "grad_norm": 0.7323965430259705, "learning_rate": 2.769216331404115e-05, "loss": 2.9267, "step": 52777 }, { "epoch": 2.59, "grad_norm": 0.7149645686149597, "learning_rate": 2.768570288737352e-05, "loss": 2.8751, "step": 52778 }, { "epoch": 2.59, "grad_norm": 0.8055224418640137, "learning_rate": 2.7679243177929332e-05, "loss": 2.9824, "step": 52779 }, { "epoch": 2.59, "grad_norm": 0.7920107841491699, "learning_rate": 2.7672784185725584e-05, "loss": 3.1636, "step": 52780 }, { "epoch": 2.59, "grad_norm": 0.6878994107246399, "learning_rate": 2.766632591077935e-05, "loss": 2.9685, "step": 52781 }, { "epoch": 2.59, "grad_norm": 0.7483735084533691, "learning_rate": 2.7659868353107563e-05, "loss": 2.7888, "step": 52782 }, { "epoch": 2.59, "grad_norm": 0.7039675116539001, "learning_rate": 2.765341151272733e-05, "loss": 2.8918, "step": 52783 }, { "epoch": 2.59, "grad_norm": 0.752712070941925, "learning_rate": 2.7646955389655544e-05, "loss": 3.069, "step": 52784 }, { "epoch": 2.59, "grad_norm": 0.7546181082725525, "learning_rate": 2.7640499983909326e-05, "loss": 2.7278, "step": 52785 }, { "epoch": 2.59, "grad_norm": 0.7798104286193848, "learning_rate": 2.763404529550559e-05, "loss": 2.8005, "step": 52786 }, { "epoch": 2.59, "grad_norm": 0.6820088028907776, "learning_rate": 2.7627591324461295e-05, "loss": 2.9015, "step": 52787 }, { "epoch": 2.59, "grad_norm": 0.7711160182952881, "learning_rate": 2.762113807079359e-05, "loss": 2.7493, "step": 52788 }, { "epoch": 2.59, "grad_norm": 0.77363121509552, "learning_rate": 2.7614685534519332e-05, "loss": 2.8504, "step": 52789 }, { "epoch": 2.59, "grad_norm": 0.7659868597984314, "learning_rate": 2.760823371565557e-05, "loss": 2.6641, "step": 52790 }, { "epoch": 2.59, "grad_norm": 0.7323107719421387, "learning_rate": 2.7601782614219326e-05, "loss": 2.7258, "step": 52791 }, { "epoch": 2.59, "grad_norm": 0.7489632368087769, "learning_rate": 2.7595332230227584e-05, "loss": 3.0457, "step": 52792 }, { "epoch": 2.59, "grad_norm": 0.7547075748443604, "learning_rate": 2.75888825636973e-05, "loss": 2.9536, "step": 52793 }, { "epoch": 2.59, "grad_norm": 0.7597982883453369, "learning_rate": 2.758243361464543e-05, "loss": 2.7106, "step": 52794 }, { "epoch": 2.59, "grad_norm": 0.7556941509246826, "learning_rate": 2.7575985383089018e-05, "loss": 2.908, "step": 52795 }, { "epoch": 2.59, "grad_norm": 0.7499327659606934, "learning_rate": 2.7569537869045055e-05, "loss": 3.0735, "step": 52796 }, { "epoch": 2.59, "grad_norm": 0.780232310295105, "learning_rate": 2.7563091072530465e-05, "loss": 2.8993, "step": 52797 }, { "epoch": 2.59, "grad_norm": 0.7690110206604004, "learning_rate": 2.7556644993562295e-05, "loss": 2.6908, "step": 52798 }, { "epoch": 2.59, "grad_norm": 0.7913340926170349, "learning_rate": 2.75501996321575e-05, "loss": 2.8206, "step": 52799 }, { "epoch": 2.59, "grad_norm": 0.7513294219970703, "learning_rate": 2.7543754988333e-05, "loss": 2.8581, "step": 52800 }, { "epoch": 2.59, "grad_norm": 0.7517967820167542, "learning_rate": 2.753731106210585e-05, "loss": 2.8545, "step": 52801 }, { "epoch": 2.59, "grad_norm": 0.7514926195144653, "learning_rate": 2.7530867853492933e-05, "loss": 3.0491, "step": 52802 }, { "epoch": 2.59, "grad_norm": 0.7327825427055359, "learning_rate": 2.7524425362511337e-05, "loss": 2.9297, "step": 52803 }, { "epoch": 2.59, "grad_norm": 0.7828764915466309, "learning_rate": 2.7517983589177917e-05, "loss": 2.8612, "step": 52804 }, { "epoch": 2.59, "grad_norm": 0.7338088154792786, "learning_rate": 2.751154253350969e-05, "loss": 3.1315, "step": 52805 }, { "epoch": 2.59, "grad_norm": 0.7811881303787231, "learning_rate": 2.7505102195523675e-05, "loss": 2.6917, "step": 52806 }, { "epoch": 2.59, "grad_norm": 0.7544569969177246, "learning_rate": 2.7498662575236762e-05, "loss": 2.8917, "step": 52807 }, { "epoch": 2.59, "grad_norm": 0.7520846128463745, "learning_rate": 2.7492223672665937e-05, "loss": 2.9407, "step": 52808 }, { "epoch": 2.59, "grad_norm": 0.7595059871673584, "learning_rate": 2.7485785487828083e-05, "loss": 3.0334, "step": 52809 }, { "epoch": 2.59, "grad_norm": 0.7427202463150024, "learning_rate": 2.7479348020740223e-05, "loss": 2.8506, "step": 52810 }, { "epoch": 2.59, "grad_norm": 0.7459549903869629, "learning_rate": 2.7472911271419374e-05, "loss": 2.9055, "step": 52811 }, { "epoch": 2.59, "grad_norm": 0.7440264225006104, "learning_rate": 2.746647523988239e-05, "loss": 2.9919, "step": 52812 }, { "epoch": 2.59, "grad_norm": 0.7285496592521667, "learning_rate": 2.7460039926146293e-05, "loss": 3.0071, "step": 52813 }, { "epoch": 2.59, "grad_norm": 0.7452382445335388, "learning_rate": 2.7453605330228e-05, "loss": 2.8223, "step": 52814 }, { "epoch": 2.59, "grad_norm": 0.8756013512611389, "learning_rate": 2.74471714521444e-05, "loss": 2.8339, "step": 52815 }, { "epoch": 2.59, "grad_norm": 0.8711315393447876, "learning_rate": 2.744073829191258e-05, "loss": 3.0862, "step": 52816 }, { "epoch": 2.59, "grad_norm": 0.7606200575828552, "learning_rate": 2.743430584954932e-05, "loss": 2.7368, "step": 52817 }, { "epoch": 2.59, "grad_norm": 0.7606265544891357, "learning_rate": 2.7427874125071714e-05, "loss": 2.8173, "step": 52818 }, { "epoch": 2.59, "grad_norm": 0.7671118378639221, "learning_rate": 2.7421443118496544e-05, "loss": 3.0363, "step": 52819 }, { "epoch": 2.59, "grad_norm": 0.7543085813522339, "learning_rate": 2.7415012829840933e-05, "loss": 2.8662, "step": 52820 }, { "epoch": 2.59, "grad_norm": 0.7956048846244812, "learning_rate": 2.7408583259121697e-05, "loss": 2.999, "step": 52821 }, { "epoch": 2.59, "grad_norm": 0.7940734028816223, "learning_rate": 2.7402154406355726e-05, "loss": 2.9409, "step": 52822 }, { "epoch": 2.59, "grad_norm": 0.7047262191772461, "learning_rate": 2.739572627156007e-05, "loss": 2.9213, "step": 52823 }, { "epoch": 2.59, "grad_norm": 0.7310606241226196, "learning_rate": 2.7389298854751586e-05, "loss": 2.7791, "step": 52824 }, { "epoch": 2.59, "grad_norm": 0.7745860815048218, "learning_rate": 2.738287215594719e-05, "loss": 3.0216, "step": 52825 }, { "epoch": 2.59, "grad_norm": 0.7691651582717896, "learning_rate": 2.7376446175163903e-05, "loss": 2.6237, "step": 52826 }, { "epoch": 2.59, "grad_norm": 0.7258844971656799, "learning_rate": 2.7370020912418543e-05, "loss": 2.9344, "step": 52827 }, { "epoch": 2.59, "grad_norm": 0.7222638130187988, "learning_rate": 2.7363596367728135e-05, "loss": 2.5812, "step": 52828 }, { "epoch": 2.59, "grad_norm": 0.7643725872039795, "learning_rate": 2.735717254110953e-05, "loss": 3.0799, "step": 52829 }, { "epoch": 2.59, "grad_norm": 0.7751563191413879, "learning_rate": 2.735074943257961e-05, "loss": 2.8096, "step": 52830 }, { "epoch": 2.59, "grad_norm": 0.7652669548988342, "learning_rate": 2.73443270421554e-05, "loss": 2.9164, "step": 52831 }, { "epoch": 2.59, "grad_norm": 0.7439308166503906, "learning_rate": 2.733790536985372e-05, "loss": 2.983, "step": 52832 }, { "epoch": 2.59, "grad_norm": 0.7255779504776001, "learning_rate": 2.7331484415691584e-05, "loss": 2.8595, "step": 52833 }, { "epoch": 2.59, "grad_norm": 0.7431380152702332, "learning_rate": 2.7325064179685753e-05, "loss": 2.9898, "step": 52834 }, { "epoch": 2.59, "grad_norm": 0.7448784708976746, "learning_rate": 2.7318644661853307e-05, "loss": 2.9715, "step": 52835 }, { "epoch": 2.59, "grad_norm": 0.7783845067024231, "learning_rate": 2.7312225862211067e-05, "loss": 2.908, "step": 52836 }, { "epoch": 2.59, "grad_norm": 0.7135199308395386, "learning_rate": 2.7305807780775856e-05, "loss": 2.8114, "step": 52837 }, { "epoch": 2.59, "grad_norm": 0.7589677572250366, "learning_rate": 2.729939041756476e-05, "loss": 2.9255, "step": 52838 }, { "epoch": 2.59, "grad_norm": 0.7436524033546448, "learning_rate": 2.7292973772594527e-05, "loss": 2.9792, "step": 52839 }, { "epoch": 2.59, "grad_norm": 0.786425769329071, "learning_rate": 2.7286557845882185e-05, "loss": 2.6977, "step": 52840 }, { "epoch": 2.59, "grad_norm": 0.7813881039619446, "learning_rate": 2.728014263744448e-05, "loss": 3.0427, "step": 52841 }, { "epoch": 2.59, "grad_norm": 0.7063042521476746, "learning_rate": 2.7273728147298466e-05, "loss": 2.9036, "step": 52842 }, { "epoch": 2.59, "grad_norm": 0.7354524731636047, "learning_rate": 2.7267314375460968e-05, "loss": 2.9452, "step": 52843 }, { "epoch": 2.59, "grad_norm": 0.7480248212814331, "learning_rate": 2.72609013219488e-05, "loss": 2.9096, "step": 52844 }, { "epoch": 2.59, "grad_norm": 0.7572676539421082, "learning_rate": 2.725448898677899e-05, "loss": 2.8905, "step": 52845 }, { "epoch": 2.59, "grad_norm": 0.7566613554954529, "learning_rate": 2.7248077369968312e-05, "loss": 2.8314, "step": 52846 }, { "epoch": 2.59, "grad_norm": 0.733538806438446, "learning_rate": 2.724166647153373e-05, "loss": 2.9796, "step": 52847 }, { "epoch": 2.59, "grad_norm": 0.7428073287010193, "learning_rate": 2.7235256291492124e-05, "loss": 2.7486, "step": 52848 }, { "epoch": 2.59, "grad_norm": 0.7868484258651733, "learning_rate": 2.7228846829860318e-05, "loss": 2.7314, "step": 52849 }, { "epoch": 2.59, "grad_norm": 0.7082564830780029, "learning_rate": 2.7222438086655295e-05, "loss": 3.042, "step": 52850 }, { "epoch": 2.59, "grad_norm": 0.7359206676483154, "learning_rate": 2.721603006189388e-05, "loss": 3.0088, "step": 52851 }, { "epoch": 2.59, "grad_norm": 0.7429801821708679, "learning_rate": 2.7209622755592852e-05, "loss": 2.9179, "step": 52852 }, { "epoch": 2.59, "grad_norm": 0.7138932347297668, "learning_rate": 2.720321616776927e-05, "loss": 2.8191, "step": 52853 }, { "epoch": 2.59, "grad_norm": 0.7627942562103271, "learning_rate": 2.7196810298439852e-05, "loss": 2.9606, "step": 52854 }, { "epoch": 2.59, "grad_norm": 0.7788562178611755, "learning_rate": 2.7190405147621587e-05, "loss": 2.7794, "step": 52855 }, { "epoch": 2.59, "grad_norm": 0.7393107414245605, "learning_rate": 2.7184000715331222e-05, "loss": 2.879, "step": 52856 }, { "epoch": 2.59, "grad_norm": 0.7325177192687988, "learning_rate": 2.7177597001585782e-05, "loss": 2.9611, "step": 52857 }, { "epoch": 2.59, "grad_norm": 0.7476690411567688, "learning_rate": 2.717119400640202e-05, "loss": 2.9348, "step": 52858 }, { "epoch": 2.59, "grad_norm": 0.7651125192642212, "learning_rate": 2.7164791729796753e-05, "loss": 2.7011, "step": 52859 }, { "epoch": 2.59, "grad_norm": 0.7937335968017578, "learning_rate": 2.7158390171787003e-05, "loss": 3.0789, "step": 52860 }, { "epoch": 2.59, "grad_norm": 0.7635074257850647, "learning_rate": 2.715198933238949e-05, "loss": 2.9107, "step": 52861 }, { "epoch": 2.59, "grad_norm": 0.7641463875770569, "learning_rate": 2.7145589211621132e-05, "loss": 2.8345, "step": 52862 }, { "epoch": 2.59, "grad_norm": 0.8104754090309143, "learning_rate": 2.713918980949882e-05, "loss": 2.9759, "step": 52863 }, { "epoch": 2.59, "grad_norm": 0.7333548069000244, "learning_rate": 2.7132791126039367e-05, "loss": 3.0388, "step": 52864 }, { "epoch": 2.59, "grad_norm": 0.752964437007904, "learning_rate": 2.712639316125963e-05, "loss": 2.9652, "step": 52865 }, { "epoch": 2.59, "grad_norm": 0.73786860704422, "learning_rate": 2.7119995915176396e-05, "loss": 3.0213, "step": 52866 }, { "epoch": 2.59, "grad_norm": 0.7262935638427734, "learning_rate": 2.7113599387806582e-05, "loss": 2.8607, "step": 52867 }, { "epoch": 2.59, "grad_norm": 0.7377870678901672, "learning_rate": 2.710720357916708e-05, "loss": 2.9051, "step": 52868 }, { "epoch": 2.59, "grad_norm": 0.7554672956466675, "learning_rate": 2.7100808489274638e-05, "loss": 2.7477, "step": 52869 }, { "epoch": 2.59, "grad_norm": 0.7196999192237854, "learning_rate": 2.7094414118146213e-05, "loss": 2.8733, "step": 52870 }, { "epoch": 2.59, "grad_norm": 0.7686446309089661, "learning_rate": 2.708802046579849e-05, "loss": 3.0189, "step": 52871 }, { "epoch": 2.59, "grad_norm": 0.7374565601348877, "learning_rate": 2.7081627532248486e-05, "loss": 3.0224, "step": 52872 }, { "epoch": 2.59, "grad_norm": 0.727088212966919, "learning_rate": 2.7075235317512923e-05, "loss": 2.7337, "step": 52873 }, { "epoch": 2.59, "grad_norm": 0.7013735175132751, "learning_rate": 2.706884382160862e-05, "loss": 2.9789, "step": 52874 }, { "epoch": 2.59, "grad_norm": 0.7275285124778748, "learning_rate": 2.7062453044552536e-05, "loss": 2.843, "step": 52875 }, { "epoch": 2.59, "grad_norm": 0.7339413166046143, "learning_rate": 2.705606298636135e-05, "loss": 2.786, "step": 52876 }, { "epoch": 2.59, "grad_norm": 0.8053582906723022, "learning_rate": 2.704967364705195e-05, "loss": 3.011, "step": 52877 }, { "epoch": 2.59, "grad_norm": 0.770816445350647, "learning_rate": 2.7043285026641258e-05, "loss": 3.0154, "step": 52878 }, { "epoch": 2.59, "grad_norm": 0.7276818752288818, "learning_rate": 2.7036897125145995e-05, "loss": 3.0039, "step": 52879 }, { "epoch": 2.59, "grad_norm": 0.7705271244049072, "learning_rate": 2.7030509942583013e-05, "loss": 2.8089, "step": 52880 }, { "epoch": 2.59, "grad_norm": 0.764750063419342, "learning_rate": 2.7024123478969095e-05, "loss": 2.9019, "step": 52881 }, { "epoch": 2.59, "grad_norm": 0.8144281506538391, "learning_rate": 2.70177377343211e-05, "loss": 2.7056, "step": 52882 }, { "epoch": 2.59, "grad_norm": 0.7661029696464539, "learning_rate": 2.7011352708655908e-05, "loss": 2.907, "step": 52883 }, { "epoch": 2.59, "grad_norm": 0.7692121267318726, "learning_rate": 2.700496840199021e-05, "loss": 2.8668, "step": 52884 }, { "epoch": 2.59, "grad_norm": 0.7304009199142456, "learning_rate": 2.699858481434096e-05, "loss": 2.9795, "step": 52885 }, { "epoch": 2.59, "grad_norm": 0.7058482766151428, "learning_rate": 2.6992201945724878e-05, "loss": 2.8171, "step": 52886 }, { "epoch": 2.59, "grad_norm": 0.8239343762397766, "learning_rate": 2.6985819796158746e-05, "loss": 2.7458, "step": 52887 }, { "epoch": 2.59, "grad_norm": 0.76352459192276, "learning_rate": 2.6979438365659456e-05, "loss": 2.9991, "step": 52888 }, { "epoch": 2.59, "grad_norm": 0.7338518500328064, "learning_rate": 2.6973057654243756e-05, "loss": 2.8587, "step": 52889 }, { "epoch": 2.59, "grad_norm": 0.7867611050605774, "learning_rate": 2.6966677661928504e-05, "loss": 2.6982, "step": 52890 }, { "epoch": 2.59, "grad_norm": 0.7336326837539673, "learning_rate": 2.696029838873045e-05, "loss": 2.8797, "step": 52891 }, { "epoch": 2.59, "grad_norm": 0.7487276196479797, "learning_rate": 2.6953919834666417e-05, "loss": 2.8777, "step": 52892 }, { "epoch": 2.59, "grad_norm": 0.7201322913169861, "learning_rate": 2.6947541999753253e-05, "loss": 2.806, "step": 52893 }, { "epoch": 2.59, "grad_norm": 0.731619119644165, "learning_rate": 2.6941164884007715e-05, "loss": 2.782, "step": 52894 }, { "epoch": 2.59, "grad_norm": 0.7519465684890747, "learning_rate": 2.6934788487446623e-05, "loss": 2.7115, "step": 52895 }, { "epoch": 2.59, "grad_norm": 0.7223137021064758, "learning_rate": 2.6928412810086665e-05, "loss": 2.8679, "step": 52896 }, { "epoch": 2.59, "grad_norm": 0.7320907711982727, "learning_rate": 2.692203785194472e-05, "loss": 3.1397, "step": 52897 }, { "epoch": 2.59, "grad_norm": 0.7210008502006531, "learning_rate": 2.6915663613037618e-05, "loss": 2.9665, "step": 52898 }, { "epoch": 2.59, "grad_norm": 0.6909995675086975, "learning_rate": 2.690929009338204e-05, "loss": 2.9763, "step": 52899 }, { "epoch": 2.59, "grad_norm": 0.7912375926971436, "learning_rate": 2.6902917292994906e-05, "loss": 2.9634, "step": 52900 }, { "epoch": 2.59, "grad_norm": 0.7833209037780762, "learning_rate": 2.6896545211892938e-05, "loss": 2.7593, "step": 52901 }, { "epoch": 2.59, "grad_norm": 0.7575777769088745, "learning_rate": 2.6890173850092855e-05, "loss": 2.8503, "step": 52902 }, { "epoch": 2.59, "grad_norm": 0.7618677616119385, "learning_rate": 2.6883803207611543e-05, "loss": 3.023, "step": 52903 }, { "epoch": 2.59, "grad_norm": 0.7287328839302063, "learning_rate": 2.6877433284465656e-05, "loss": 2.6533, "step": 52904 }, { "epoch": 2.59, "grad_norm": 0.7309133410453796, "learning_rate": 2.687106408067211e-05, "loss": 2.9655, "step": 52905 }, { "epoch": 2.59, "grad_norm": 0.7655085921287537, "learning_rate": 2.6864695596247566e-05, "loss": 2.9034, "step": 52906 }, { "epoch": 2.59, "grad_norm": 0.7788105010986328, "learning_rate": 2.685832783120887e-05, "loss": 2.8593, "step": 52907 }, { "epoch": 2.59, "grad_norm": 0.7491944432258606, "learning_rate": 2.6851960785572813e-05, "loss": 2.7122, "step": 52908 }, { "epoch": 2.59, "grad_norm": 0.7422747015953064, "learning_rate": 2.684559445935611e-05, "loss": 2.8433, "step": 52909 }, { "epoch": 2.59, "grad_norm": 0.7405474781990051, "learning_rate": 2.6839228852575523e-05, "loss": 2.984, "step": 52910 }, { "epoch": 2.59, "grad_norm": 0.719196617603302, "learning_rate": 2.683286396524783e-05, "loss": 2.7548, "step": 52911 }, { "epoch": 2.59, "grad_norm": 0.7382709980010986, "learning_rate": 2.6826499797389822e-05, "loss": 2.9205, "step": 52912 }, { "epoch": 2.59, "grad_norm": 0.7906863689422607, "learning_rate": 2.6820136349018184e-05, "loss": 2.7707, "step": 52913 }, { "epoch": 2.59, "grad_norm": 0.753178596496582, "learning_rate": 2.681377362014977e-05, "loss": 2.8353, "step": 52914 }, { "epoch": 2.59, "grad_norm": 0.7814869284629822, "learning_rate": 2.68074116108013e-05, "loss": 2.7692, "step": 52915 }, { "epoch": 2.59, "grad_norm": 0.7554951310157776, "learning_rate": 2.680105032098956e-05, "loss": 2.6514, "step": 52916 }, { "epoch": 2.59, "grad_norm": 0.7147135138511658, "learning_rate": 2.67946897507313e-05, "loss": 3.0395, "step": 52917 }, { "epoch": 2.59, "grad_norm": 0.7095446586608887, "learning_rate": 2.6788329900043147e-05, "loss": 3.0561, "step": 52918 }, { "epoch": 2.59, "grad_norm": 0.7295847535133362, "learning_rate": 2.6781970768941985e-05, "loss": 3.0686, "step": 52919 }, { "epoch": 2.59, "grad_norm": 0.7395632863044739, "learning_rate": 2.6775612357444564e-05, "loss": 3.1226, "step": 52920 }, { "epoch": 2.59, "grad_norm": 0.7481321096420288, "learning_rate": 2.6769254665567575e-05, "loss": 2.9356, "step": 52921 }, { "epoch": 2.59, "grad_norm": 0.7256965637207031, "learning_rate": 2.67628976933278e-05, "loss": 2.8715, "step": 52922 }, { "epoch": 2.59, "grad_norm": 0.7288814783096313, "learning_rate": 2.6756541440741996e-05, "loss": 3.017, "step": 52923 }, { "epoch": 2.59, "grad_norm": 0.7406113743782043, "learning_rate": 2.675018590782678e-05, "loss": 3.1521, "step": 52924 }, { "epoch": 2.59, "grad_norm": 0.7255039811134338, "learning_rate": 2.6743831094599077e-05, "loss": 2.865, "step": 52925 }, { "epoch": 2.59, "grad_norm": 0.7836463451385498, "learning_rate": 2.673747700107547e-05, "loss": 2.8227, "step": 52926 }, { "epoch": 2.59, "grad_norm": 0.7792579531669617, "learning_rate": 2.673112362727281e-05, "loss": 2.8882, "step": 52927 }, { "epoch": 2.59, "grad_norm": 0.7477447986602783, "learning_rate": 2.6724770973207754e-05, "loss": 3.0428, "step": 52928 }, { "epoch": 2.59, "grad_norm": 0.7685944437980652, "learning_rate": 2.671841903889702e-05, "loss": 2.9894, "step": 52929 }, { "epoch": 2.59, "grad_norm": 0.7276458740234375, "learning_rate": 2.671206782435743e-05, "loss": 2.9397, "step": 52930 }, { "epoch": 2.59, "grad_norm": 0.766247034072876, "learning_rate": 2.670571732960567e-05, "loss": 3.0083, "step": 52931 }, { "epoch": 2.59, "grad_norm": 0.759260356426239, "learning_rate": 2.669936755465846e-05, "loss": 2.7322, "step": 52932 }, { "epoch": 2.59, "grad_norm": 0.7431995272636414, "learning_rate": 2.6693018499532483e-05, "loss": 2.9465, "step": 52933 }, { "epoch": 2.59, "grad_norm": 0.744221568107605, "learning_rate": 2.668667016424446e-05, "loss": 3.0348, "step": 52934 }, { "epoch": 2.59, "grad_norm": 0.8303017616271973, "learning_rate": 2.6680322548811216e-05, "loss": 2.8454, "step": 52935 }, { "epoch": 2.59, "grad_norm": 0.7433030605316162, "learning_rate": 2.6673975653249336e-05, "loss": 2.8534, "step": 52936 }, { "epoch": 2.59, "grad_norm": 0.7234331965446472, "learning_rate": 2.666762947757567e-05, "loss": 2.7294, "step": 52937 }, { "epoch": 2.59, "grad_norm": 0.7342804074287415, "learning_rate": 2.666128402180684e-05, "loss": 3.0371, "step": 52938 }, { "epoch": 2.59, "grad_norm": 0.7552002668380737, "learning_rate": 2.6654939285959564e-05, "loss": 3.075, "step": 52939 }, { "epoch": 2.59, "grad_norm": 0.6919820308685303, "learning_rate": 2.6648595270050598e-05, "loss": 2.9163, "step": 52940 }, { "epoch": 2.59, "grad_norm": 0.7407262921333313, "learning_rate": 2.6642251974096597e-05, "loss": 2.9234, "step": 52941 }, { "epoch": 2.59, "grad_norm": 0.7760119438171387, "learning_rate": 2.663590939811434e-05, "loss": 3.0161, "step": 52942 }, { "epoch": 2.59, "grad_norm": 0.7739540338516235, "learning_rate": 2.6629567542120422e-05, "loss": 2.8185, "step": 52943 }, { "epoch": 2.59, "grad_norm": 0.7417166829109192, "learning_rate": 2.662322640613166e-05, "loss": 2.893, "step": 52944 }, { "epoch": 2.59, "grad_norm": 0.7252646088600159, "learning_rate": 2.661688599016474e-05, "loss": 2.8558, "step": 52945 }, { "epoch": 2.59, "grad_norm": 0.7560940980911255, "learning_rate": 2.6610546294236245e-05, "loss": 2.9693, "step": 52946 }, { "epoch": 2.59, "grad_norm": 0.7531810402870178, "learning_rate": 2.6604207318363003e-05, "loss": 3.0296, "step": 52947 }, { "epoch": 2.59, "grad_norm": 0.7631690502166748, "learning_rate": 2.6597869062561627e-05, "loss": 3.013, "step": 52948 }, { "epoch": 2.59, "grad_norm": 0.7429794669151306, "learning_rate": 2.659153152684884e-05, "loss": 2.7711, "step": 52949 }, { "epoch": 2.59, "grad_norm": 0.7256251573562622, "learning_rate": 2.6585194711241397e-05, "loss": 3.0047, "step": 52950 }, { "epoch": 2.6, "grad_norm": 0.7361646890640259, "learning_rate": 2.6578858615755882e-05, "loss": 3.0397, "step": 52951 }, { "epoch": 2.6, "grad_norm": 0.7536941766738892, "learning_rate": 2.6572523240409082e-05, "loss": 2.8646, "step": 52952 }, { "epoch": 2.6, "grad_norm": 0.7746854424476624, "learning_rate": 2.6566188585217652e-05, "loss": 2.9317, "step": 52953 }, { "epoch": 2.6, "grad_norm": 0.7454334497451782, "learning_rate": 2.6559854650198174e-05, "loss": 2.8275, "step": 52954 }, { "epoch": 2.6, "grad_norm": 0.7651907205581665, "learning_rate": 2.6553521435367474e-05, "loss": 2.7026, "step": 52955 }, { "epoch": 2.6, "grad_norm": 0.7534736394882202, "learning_rate": 2.6547188940742136e-05, "loss": 2.9386, "step": 52956 }, { "epoch": 2.6, "grad_norm": 0.7591540217399597, "learning_rate": 2.6540857166338915e-05, "loss": 2.9647, "step": 52957 }, { "epoch": 2.6, "grad_norm": 0.8195220232009888, "learning_rate": 2.6534526112174392e-05, "loss": 2.8516, "step": 52958 }, { "epoch": 2.6, "grad_norm": 0.8042005300521851, "learning_rate": 2.652819577826536e-05, "loss": 2.8863, "step": 52959 }, { "epoch": 2.6, "grad_norm": 0.7308369278907776, "learning_rate": 2.6521866164628437e-05, "loss": 3.1317, "step": 52960 }, { "epoch": 2.6, "grad_norm": 0.7242509126663208, "learning_rate": 2.6515537271280206e-05, "loss": 2.8885, "step": 52961 }, { "epoch": 2.6, "grad_norm": 0.7605120539665222, "learning_rate": 2.6509209098237494e-05, "loss": 3.0218, "step": 52962 }, { "epoch": 2.6, "grad_norm": 0.729441225528717, "learning_rate": 2.6502881645516848e-05, "loss": 2.9623, "step": 52963 }, { "epoch": 2.6, "grad_norm": 0.7368941903114319, "learning_rate": 2.649655491313496e-05, "loss": 3.0673, "step": 52964 }, { "epoch": 2.6, "grad_norm": 0.7427717447280884, "learning_rate": 2.6490228901108546e-05, "loss": 2.8994, "step": 52965 }, { "epoch": 2.6, "grad_norm": 0.7409499883651733, "learning_rate": 2.648390360945426e-05, "loss": 2.772, "step": 52966 }, { "epoch": 2.6, "grad_norm": 0.7534723877906799, "learning_rate": 2.6477579038188722e-05, "loss": 3.1012, "step": 52967 }, { "epoch": 2.6, "grad_norm": 0.7700952887535095, "learning_rate": 2.647125518732852e-05, "loss": 2.9597, "step": 52968 }, { "epoch": 2.6, "grad_norm": 0.718622088432312, "learning_rate": 2.646493205689044e-05, "loss": 2.9911, "step": 52969 }, { "epoch": 2.6, "grad_norm": 0.7912169694900513, "learning_rate": 2.6458609646891106e-05, "loss": 3.0782, "step": 52970 }, { "epoch": 2.6, "grad_norm": 0.766054630279541, "learning_rate": 2.6452287957347095e-05, "loss": 3.1029, "step": 52971 }, { "epoch": 2.6, "grad_norm": 0.816627025604248, "learning_rate": 2.644596698827517e-05, "loss": 2.7479, "step": 52972 }, { "epoch": 2.6, "grad_norm": 0.779098391532898, "learning_rate": 2.6439646739691876e-05, "loss": 2.7438, "step": 52973 }, { "epoch": 2.6, "grad_norm": 0.7731761932373047, "learning_rate": 2.6433327211613974e-05, "loss": 2.9227, "step": 52974 }, { "epoch": 2.6, "grad_norm": 0.7547659277915955, "learning_rate": 2.6427008404058014e-05, "loss": 2.8468, "step": 52975 }, { "epoch": 2.6, "grad_norm": 0.7428092956542969, "learning_rate": 2.6420690317040616e-05, "loss": 2.9101, "step": 52976 }, { "epoch": 2.6, "grad_norm": 0.7234500050544739, "learning_rate": 2.6414372950578533e-05, "loss": 2.7825, "step": 52977 }, { "epoch": 2.6, "grad_norm": 0.7860729098320007, "learning_rate": 2.6408056304688253e-05, "loss": 2.832, "step": 52978 }, { "epoch": 2.6, "grad_norm": 0.7206407189369202, "learning_rate": 2.6401740379386526e-05, "loss": 2.6774, "step": 52979 }, { "epoch": 2.6, "grad_norm": 0.715305507183075, "learning_rate": 2.639542517469001e-05, "loss": 2.9311, "step": 52980 }, { "epoch": 2.6, "grad_norm": 0.7753638625144958, "learning_rate": 2.6389110690615288e-05, "loss": 2.929, "step": 52981 }, { "epoch": 2.6, "grad_norm": 0.7320675253868103, "learning_rate": 2.6382796927178984e-05, "loss": 2.7352, "step": 52982 }, { "epoch": 2.6, "grad_norm": 0.7664274573326111, "learning_rate": 2.6376483884397682e-05, "loss": 2.772, "step": 52983 }, { "epoch": 2.6, "grad_norm": 0.723810076713562, "learning_rate": 2.6370171562288068e-05, "loss": 2.9876, "step": 52984 }, { "epoch": 2.6, "grad_norm": 0.7204124927520752, "learning_rate": 2.6363859960866828e-05, "loss": 2.9197, "step": 52985 }, { "epoch": 2.6, "grad_norm": 0.748243510723114, "learning_rate": 2.6357549080150452e-05, "loss": 2.8726, "step": 52986 }, { "epoch": 2.6, "grad_norm": 0.7590616941452026, "learning_rate": 2.635123892015566e-05, "loss": 2.8701, "step": 52987 }, { "epoch": 2.6, "grad_norm": 0.7760512828826904, "learning_rate": 2.6344929480899068e-05, "loss": 2.7923, "step": 52988 }, { "epoch": 2.6, "grad_norm": 0.7821771502494812, "learning_rate": 2.6338620762397266e-05, "loss": 2.9296, "step": 52989 }, { "epoch": 2.6, "grad_norm": 0.7788903117179871, "learning_rate": 2.6332312764666808e-05, "loss": 2.7867, "step": 52990 }, { "epoch": 2.6, "grad_norm": 0.7191170454025269, "learning_rate": 2.6326005487724345e-05, "loss": 3.0973, "step": 52991 }, { "epoch": 2.6, "grad_norm": 0.7111521363258362, "learning_rate": 2.6319698931586598e-05, "loss": 2.9587, "step": 52992 }, { "epoch": 2.6, "grad_norm": 0.742542028427124, "learning_rate": 2.6313393096270053e-05, "loss": 2.8454, "step": 52993 }, { "epoch": 2.6, "grad_norm": 0.7643880844116211, "learning_rate": 2.6307087981791365e-05, "loss": 2.9678, "step": 52994 }, { "epoch": 2.6, "grad_norm": 0.7396053075790405, "learning_rate": 2.630078358816712e-05, "loss": 2.7297, "step": 52995 }, { "epoch": 2.6, "grad_norm": 0.7279162406921387, "learning_rate": 2.6294479915413968e-05, "loss": 2.7479, "step": 52996 }, { "epoch": 2.6, "grad_norm": 0.7429473996162415, "learning_rate": 2.62881769635485e-05, "loss": 2.9331, "step": 52997 }, { "epoch": 2.6, "grad_norm": 0.7297216057777405, "learning_rate": 2.6281874732587238e-05, "loss": 2.9241, "step": 52998 }, { "epoch": 2.6, "grad_norm": 0.7150318026542664, "learning_rate": 2.6275573222546898e-05, "loss": 2.9074, "step": 52999 }, { "epoch": 2.6, "grad_norm": 0.7341070175170898, "learning_rate": 2.6269272433443933e-05, "loss": 2.8459, "step": 53000 }, { "epoch": 2.6, "grad_norm": 0.741511881351471, "learning_rate": 2.6262972365295067e-05, "loss": 3.1279, "step": 53001 }, { "epoch": 2.6, "grad_norm": 0.7074028849601746, "learning_rate": 2.625667301811688e-05, "loss": 2.8032, "step": 53002 }, { "epoch": 2.6, "grad_norm": 0.7410276532173157, "learning_rate": 2.6250374391925933e-05, "loss": 2.6586, "step": 53003 }, { "epoch": 2.6, "grad_norm": 0.7217987775802612, "learning_rate": 2.624407648673881e-05, "loss": 2.6289, "step": 53004 }, { "epoch": 2.6, "grad_norm": 0.7761947512626648, "learning_rate": 2.623777930257206e-05, "loss": 2.86, "step": 53005 }, { "epoch": 2.6, "grad_norm": 0.7420976161956787, "learning_rate": 2.6231482839442307e-05, "loss": 3.0115, "step": 53006 }, { "epoch": 2.6, "grad_norm": 0.7364739179611206, "learning_rate": 2.6225187097366206e-05, "loss": 2.8002, "step": 53007 }, { "epoch": 2.6, "grad_norm": 0.7955524325370789, "learning_rate": 2.621889207636021e-05, "loss": 2.9859, "step": 53008 }, { "epoch": 2.6, "grad_norm": 0.7260406613349915, "learning_rate": 2.6212597776441034e-05, "loss": 2.896, "step": 53009 }, { "epoch": 2.6, "grad_norm": 0.7395499348640442, "learning_rate": 2.6206304197625107e-05, "loss": 2.9315, "step": 53010 }, { "epoch": 2.6, "grad_norm": 0.7385746836662292, "learning_rate": 2.6200011339929138e-05, "loss": 2.8443, "step": 53011 }, { "epoch": 2.6, "grad_norm": 0.7338225245475769, "learning_rate": 2.6193719203369658e-05, "loss": 2.9998, "step": 53012 }, { "epoch": 2.6, "grad_norm": 0.7368943691253662, "learning_rate": 2.6187427787963143e-05, "loss": 2.812, "step": 53013 }, { "epoch": 2.6, "grad_norm": 0.7941758036613464, "learning_rate": 2.6181137093726358e-05, "loss": 2.964, "step": 53014 }, { "epoch": 2.6, "grad_norm": 0.8088509440422058, "learning_rate": 2.6174847120675646e-05, "loss": 3.0882, "step": 53015 }, { "epoch": 2.6, "grad_norm": 0.7890652418136597, "learning_rate": 2.6168557868827733e-05, "loss": 2.8184, "step": 53016 }, { "epoch": 2.6, "grad_norm": 0.7560929656028748, "learning_rate": 2.6162269338199172e-05, "loss": 2.9436, "step": 53017 }, { "epoch": 2.6, "grad_norm": 0.7155969738960266, "learning_rate": 2.615598152880648e-05, "loss": 2.9308, "step": 53018 }, { "epoch": 2.6, "grad_norm": 0.7594807147979736, "learning_rate": 2.6149694440666246e-05, "loss": 2.8756, "step": 53019 }, { "epoch": 2.6, "grad_norm": 0.7847410440444946, "learning_rate": 2.6143408073794957e-05, "loss": 3.0293, "step": 53020 }, { "epoch": 2.6, "grad_norm": 0.8248916864395142, "learning_rate": 2.6137122428209234e-05, "loss": 2.7244, "step": 53021 }, { "epoch": 2.6, "grad_norm": 0.7598782777786255, "learning_rate": 2.6130837503925662e-05, "loss": 2.9428, "step": 53022 }, { "epoch": 2.6, "grad_norm": 0.8169916868209839, "learning_rate": 2.6124553300960725e-05, "loss": 2.9181, "step": 53023 }, { "epoch": 2.6, "grad_norm": 0.7497507333755493, "learning_rate": 2.611826981933105e-05, "loss": 2.7606, "step": 53024 }, { "epoch": 2.6, "grad_norm": 0.7234208583831787, "learning_rate": 2.611198705905312e-05, "loss": 2.9675, "step": 53025 }, { "epoch": 2.6, "grad_norm": 0.6970089077949524, "learning_rate": 2.6105705020143487e-05, "loss": 2.9544, "step": 53026 }, { "epoch": 2.6, "grad_norm": 0.702825129032135, "learning_rate": 2.6099423702618738e-05, "loss": 2.8923, "step": 53027 }, { "epoch": 2.6, "grad_norm": 0.7164490222930908, "learning_rate": 2.6093143106495362e-05, "loss": 3.0565, "step": 53028 }, { "epoch": 2.6, "grad_norm": 0.8021504878997803, "learning_rate": 2.6086863231789978e-05, "loss": 2.9865, "step": 53029 }, { "epoch": 2.6, "grad_norm": 0.7775247097015381, "learning_rate": 2.608058407851904e-05, "loss": 2.9305, "step": 53030 }, { "epoch": 2.6, "grad_norm": 0.903372049331665, "learning_rate": 2.60743056466991e-05, "loss": 2.945, "step": 53031 }, { "epoch": 2.6, "grad_norm": 0.7477425932884216, "learning_rate": 2.6068027936346814e-05, "loss": 2.6972, "step": 53032 }, { "epoch": 2.6, "grad_norm": 0.7447633147239685, "learning_rate": 2.6061750947478598e-05, "loss": 3.0102, "step": 53033 }, { "epoch": 2.6, "grad_norm": 0.7586161494255066, "learning_rate": 2.605547468011101e-05, "loss": 2.9697, "step": 53034 }, { "epoch": 2.6, "grad_norm": 0.7298742532730103, "learning_rate": 2.6049199134260535e-05, "loss": 2.9407, "step": 53035 }, { "epoch": 2.6, "grad_norm": 0.7363532185554504, "learning_rate": 2.6042924309943724e-05, "loss": 3.0353, "step": 53036 }, { "epoch": 2.6, "grad_norm": 0.7362156510353088, "learning_rate": 2.6036650207177202e-05, "loss": 2.9521, "step": 53037 }, { "epoch": 2.6, "grad_norm": 0.7948529720306396, "learning_rate": 2.603037682597735e-05, "loss": 2.9741, "step": 53038 }, { "epoch": 2.6, "grad_norm": 0.7342039942741394, "learning_rate": 2.6024104166360826e-05, "loss": 2.9473, "step": 53039 }, { "epoch": 2.6, "grad_norm": 0.7215148210525513, "learning_rate": 2.601783222834408e-05, "loss": 3.0339, "step": 53040 }, { "epoch": 2.6, "grad_norm": 0.7647704482078552, "learning_rate": 2.601156101194357e-05, "loss": 2.7878, "step": 53041 }, { "epoch": 2.6, "grad_norm": 0.7760405540466309, "learning_rate": 2.6005290517175948e-05, "loss": 2.7535, "step": 53042 }, { "epoch": 2.6, "grad_norm": 0.7145057916641235, "learning_rate": 2.5999020744057598e-05, "loss": 2.9127, "step": 53043 }, { "epoch": 2.6, "grad_norm": 0.7269275784492493, "learning_rate": 2.599275169260514e-05, "loss": 2.6882, "step": 53044 }, { "epoch": 2.6, "grad_norm": 0.7115113735198975, "learning_rate": 2.5986483362835e-05, "loss": 2.9195, "step": 53045 }, { "epoch": 2.6, "grad_norm": 0.7383373379707336, "learning_rate": 2.5980215754763788e-05, "loss": 2.9078, "step": 53046 }, { "epoch": 2.6, "grad_norm": 0.6950742602348328, "learning_rate": 2.5973948868407936e-05, "loss": 2.8095, "step": 53047 }, { "epoch": 2.6, "grad_norm": 0.7306373715400696, "learning_rate": 2.596768270378392e-05, "loss": 2.9741, "step": 53048 }, { "epoch": 2.6, "grad_norm": 0.7336162328720093, "learning_rate": 2.5961417260908335e-05, "loss": 2.9961, "step": 53049 }, { "epoch": 2.6, "grad_norm": 0.7189518213272095, "learning_rate": 2.5955152539797598e-05, "loss": 3.0896, "step": 53050 }, { "epoch": 2.6, "grad_norm": 0.8078373670578003, "learning_rate": 2.5948888540468226e-05, "loss": 2.5814, "step": 53051 }, { "epoch": 2.6, "grad_norm": 0.7885643839836121, "learning_rate": 2.594262526293681e-05, "loss": 3.1085, "step": 53052 }, { "epoch": 2.6, "grad_norm": 0.7464584112167358, "learning_rate": 2.5936362707219704e-05, "loss": 2.8979, "step": 53053 }, { "epoch": 2.6, "grad_norm": 0.7252410650253296, "learning_rate": 2.593010087333356e-05, "loss": 2.8299, "step": 53054 }, { "epoch": 2.6, "grad_norm": 0.7533923387527466, "learning_rate": 2.5923839761294762e-05, "loss": 2.694, "step": 53055 }, { "epoch": 2.6, "grad_norm": 0.700678825378418, "learning_rate": 2.5917579371119768e-05, "loss": 2.9615, "step": 53056 }, { "epoch": 2.6, "grad_norm": 0.7296587824821472, "learning_rate": 2.591131970282516e-05, "loss": 2.9133, "step": 53057 }, { "epoch": 2.6, "grad_norm": 0.7433834671974182, "learning_rate": 2.5905060756427365e-05, "loss": 2.9297, "step": 53058 }, { "epoch": 2.6, "grad_norm": 0.7818628549575806, "learning_rate": 2.589880253194293e-05, "loss": 2.846, "step": 53059 }, { "epoch": 2.6, "grad_norm": 0.775662362575531, "learning_rate": 2.5892545029388245e-05, "loss": 2.7622, "step": 53060 }, { "epoch": 2.6, "grad_norm": 0.7537034153938293, "learning_rate": 2.5886288248779896e-05, "loss": 3.0378, "step": 53061 }, { "epoch": 2.6, "grad_norm": 0.7831193804740906, "learning_rate": 2.5880032190134303e-05, "loss": 2.782, "step": 53062 }, { "epoch": 2.6, "grad_norm": 0.7730798125267029, "learning_rate": 2.587377685346792e-05, "loss": 2.9905, "step": 53063 }, { "epoch": 2.6, "grad_norm": 0.7518443465232849, "learning_rate": 2.5867522238797266e-05, "loss": 2.8506, "step": 53064 }, { "epoch": 2.6, "grad_norm": 0.7517948150634766, "learning_rate": 2.5861268346138796e-05, "loss": 2.8146, "step": 53065 }, { "epoch": 2.6, "grad_norm": 0.7461619973182678, "learning_rate": 2.5855015175508998e-05, "loss": 2.994, "step": 53066 }, { "epoch": 2.6, "grad_norm": 0.7383305430412292, "learning_rate": 2.5848762726924287e-05, "loss": 2.8297, "step": 53067 }, { "epoch": 2.6, "grad_norm": 0.7097854614257812, "learning_rate": 2.584251100040122e-05, "loss": 2.6958, "step": 53068 }, { "epoch": 2.6, "grad_norm": 0.7683972120285034, "learning_rate": 2.5836259995956254e-05, "loss": 2.7007, "step": 53069 }, { "epoch": 2.6, "grad_norm": 0.7571167945861816, "learning_rate": 2.583000971360574e-05, "loss": 2.9229, "step": 53070 }, { "epoch": 2.6, "grad_norm": 0.7330532670021057, "learning_rate": 2.5823760153366258e-05, "loss": 2.9509, "step": 53071 }, { "epoch": 2.6, "grad_norm": 0.7204066514968872, "learning_rate": 2.581751131525417e-05, "loss": 2.9927, "step": 53072 }, { "epoch": 2.6, "grad_norm": 0.7530893683433533, "learning_rate": 2.5811263199285992e-05, "loss": 2.9785, "step": 53073 }, { "epoch": 2.6, "grad_norm": 0.7310274839401245, "learning_rate": 2.5805015805478245e-05, "loss": 2.8125, "step": 53074 }, { "epoch": 2.6, "grad_norm": 0.7056111097335815, "learning_rate": 2.579876913384725e-05, "loss": 2.8229, "step": 53075 }, { "epoch": 2.6, "grad_norm": 0.7395758628845215, "learning_rate": 2.5792523184409597e-05, "loss": 3.0898, "step": 53076 }, { "epoch": 2.6, "grad_norm": 0.7391893267631531, "learning_rate": 2.578627795718167e-05, "loss": 2.7922, "step": 53077 }, { "epoch": 2.6, "grad_norm": 0.719342052936554, "learning_rate": 2.5780033452179815e-05, "loss": 2.8224, "step": 53078 }, { "epoch": 2.6, "grad_norm": 0.7190227508544922, "learning_rate": 2.5773789669420662e-05, "loss": 2.9418, "step": 53079 }, { "epoch": 2.6, "grad_norm": 0.7570742964744568, "learning_rate": 2.576754660892053e-05, "loss": 2.5958, "step": 53080 }, { "epoch": 2.6, "grad_norm": 0.7037019729614258, "learning_rate": 2.5761304270695936e-05, "loss": 2.9506, "step": 53081 }, { "epoch": 2.6, "grad_norm": 0.7258500456809998, "learning_rate": 2.5755062654763237e-05, "loss": 2.6689, "step": 53082 }, { "epoch": 2.6, "grad_norm": 0.7900306582450867, "learning_rate": 2.5748821761138982e-05, "loss": 3.0713, "step": 53083 }, { "epoch": 2.6, "grad_norm": 0.7837575078010559, "learning_rate": 2.5742581589839563e-05, "loss": 2.656, "step": 53084 }, { "epoch": 2.6, "grad_norm": 0.715522050857544, "learning_rate": 2.5736342140881326e-05, "loss": 2.9993, "step": 53085 }, { "epoch": 2.6, "grad_norm": 0.7533067464828491, "learning_rate": 2.5730103414280834e-05, "loss": 3.0117, "step": 53086 }, { "epoch": 2.6, "grad_norm": 0.7553693652153015, "learning_rate": 2.5723865410054434e-05, "loss": 2.9827, "step": 53087 }, { "epoch": 2.6, "grad_norm": 0.7062312364578247, "learning_rate": 2.5717628128218548e-05, "loss": 3.022, "step": 53088 }, { "epoch": 2.6, "grad_norm": 0.7795845866203308, "learning_rate": 2.571139156878973e-05, "loss": 2.8414, "step": 53089 }, { "epoch": 2.6, "grad_norm": 0.7023152709007263, "learning_rate": 2.5705155731784333e-05, "loss": 2.8888, "step": 53090 }, { "epoch": 2.6, "grad_norm": 0.7439648509025574, "learning_rate": 2.5698920617218743e-05, "loss": 2.8256, "step": 53091 }, { "epoch": 2.6, "grad_norm": 0.7769492268562317, "learning_rate": 2.569268622510935e-05, "loss": 2.7845, "step": 53092 }, { "epoch": 2.6, "grad_norm": 0.7483662962913513, "learning_rate": 2.5686452555472637e-05, "loss": 2.8814, "step": 53093 }, { "epoch": 2.6, "grad_norm": 0.7647069096565247, "learning_rate": 2.568021960832506e-05, "loss": 2.6949, "step": 53094 }, { "epoch": 2.6, "grad_norm": 0.7527300715446472, "learning_rate": 2.5673987383682936e-05, "loss": 3.0339, "step": 53095 }, { "epoch": 2.6, "grad_norm": 0.7335328459739685, "learning_rate": 2.566775588156279e-05, "loss": 2.869, "step": 53096 }, { "epoch": 2.6, "grad_norm": 0.7456409335136414, "learning_rate": 2.566152510198094e-05, "loss": 2.9675, "step": 53097 }, { "epoch": 2.6, "grad_norm": 0.7429218888282776, "learning_rate": 2.5655295044953873e-05, "loss": 2.9004, "step": 53098 }, { "epoch": 2.6, "grad_norm": 0.7677924036979675, "learning_rate": 2.5649065710497974e-05, "loss": 2.9512, "step": 53099 }, { "epoch": 2.6, "grad_norm": 0.7493651509284973, "learning_rate": 2.5642837098629566e-05, "loss": 2.9513, "step": 53100 }, { "epoch": 2.6, "grad_norm": 0.7118934392929077, "learning_rate": 2.5636609209365168e-05, "loss": 2.6457, "step": 53101 }, { "epoch": 2.6, "grad_norm": 0.8193805813789368, "learning_rate": 2.5630382042721098e-05, "loss": 2.668, "step": 53102 }, { "epoch": 2.6, "grad_norm": 0.6974309086799622, "learning_rate": 2.5624155598713815e-05, "loss": 2.8641, "step": 53103 }, { "epoch": 2.6, "grad_norm": 0.7470758557319641, "learning_rate": 2.5617929877359732e-05, "loss": 2.9929, "step": 53104 }, { "epoch": 2.6, "grad_norm": 0.7814863324165344, "learning_rate": 2.5611704878675244e-05, "loss": 2.8942, "step": 53105 }, { "epoch": 2.6, "grad_norm": 0.7379601001739502, "learning_rate": 2.5605480602676698e-05, "loss": 3.0074, "step": 53106 }, { "epoch": 2.6, "grad_norm": 0.7781730890274048, "learning_rate": 2.5599257049380452e-05, "loss": 2.663, "step": 53107 }, { "epoch": 2.6, "grad_norm": 0.7469813227653503, "learning_rate": 2.5593034218802956e-05, "loss": 2.8231, "step": 53108 }, { "epoch": 2.6, "grad_norm": 0.7671642303466797, "learning_rate": 2.5586812110960665e-05, "loss": 2.9087, "step": 53109 }, { "epoch": 2.6, "grad_norm": 0.7788114547729492, "learning_rate": 2.558059072586983e-05, "loss": 2.7514, "step": 53110 }, { "epoch": 2.6, "grad_norm": 0.7065609693527222, "learning_rate": 2.5574370063546944e-05, "loss": 2.991, "step": 53111 }, { "epoch": 2.6, "grad_norm": 0.725106418132782, "learning_rate": 2.5568150124008324e-05, "loss": 2.8381, "step": 53112 }, { "epoch": 2.6, "grad_norm": 0.7204715013504028, "learning_rate": 2.5561930907270456e-05, "loss": 2.9109, "step": 53113 }, { "epoch": 2.6, "grad_norm": 0.7312618494033813, "learning_rate": 2.5555712413349594e-05, "loss": 3.0618, "step": 53114 }, { "epoch": 2.6, "grad_norm": 0.7476996779441833, "learning_rate": 2.554949464226216e-05, "loss": 2.8415, "step": 53115 }, { "epoch": 2.6, "grad_norm": 0.763667106628418, "learning_rate": 2.554327759402457e-05, "loss": 2.8231, "step": 53116 }, { "epoch": 2.6, "grad_norm": 0.759936511516571, "learning_rate": 2.5537061268653114e-05, "loss": 2.825, "step": 53117 }, { "epoch": 2.6, "grad_norm": 0.7484183311462402, "learning_rate": 2.5530845666164212e-05, "loss": 2.7023, "step": 53118 }, { "epoch": 2.6, "grad_norm": 0.676468014717102, "learning_rate": 2.5524630786574285e-05, "loss": 3.0118, "step": 53119 }, { "epoch": 2.6, "grad_norm": 0.7524356842041016, "learning_rate": 2.5518416629899687e-05, "loss": 2.7162, "step": 53120 }, { "epoch": 2.6, "grad_norm": 0.7328376173973083, "learning_rate": 2.5512203196156735e-05, "loss": 2.9075, "step": 53121 }, { "epoch": 2.6, "grad_norm": 0.7236394286155701, "learning_rate": 2.550599048536175e-05, "loss": 2.8896, "step": 53122 }, { "epoch": 2.6, "grad_norm": 0.7498464584350586, "learning_rate": 2.5499778497531188e-05, "loss": 2.8998, "step": 53123 }, { "epoch": 2.6, "grad_norm": 0.7795760631561279, "learning_rate": 2.54935672326814e-05, "loss": 2.6818, "step": 53124 }, { "epoch": 2.6, "grad_norm": 0.7325364351272583, "learning_rate": 2.5487356690828707e-05, "loss": 2.8785, "step": 53125 }, { "epoch": 2.6, "grad_norm": 0.7392412424087524, "learning_rate": 2.54811468719895e-05, "loss": 2.9473, "step": 53126 }, { "epoch": 2.6, "grad_norm": 0.7106963992118835, "learning_rate": 2.547493777618016e-05, "loss": 2.9328, "step": 53127 }, { "epoch": 2.6, "grad_norm": 0.7459375262260437, "learning_rate": 2.546872940341691e-05, "loss": 2.8673, "step": 53128 }, { "epoch": 2.6, "grad_norm": 0.7813073992729187, "learning_rate": 2.546252175371627e-05, "loss": 2.8378, "step": 53129 }, { "epoch": 2.6, "grad_norm": 0.7099730372428894, "learning_rate": 2.545631482709446e-05, "loss": 2.8736, "step": 53130 }, { "epoch": 2.6, "grad_norm": 0.739006757736206, "learning_rate": 2.5450108623567933e-05, "loss": 2.7253, "step": 53131 }, { "epoch": 2.6, "grad_norm": 0.7848395109176636, "learning_rate": 2.5443903143152912e-05, "loss": 2.7427, "step": 53132 }, { "epoch": 2.6, "grad_norm": 0.7411284446716309, "learning_rate": 2.543769838586581e-05, "loss": 2.8126, "step": 53133 }, { "epoch": 2.6, "grad_norm": 0.8115738034248352, "learning_rate": 2.5431494351723058e-05, "loss": 3.1662, "step": 53134 }, { "epoch": 2.6, "grad_norm": 0.7761831879615784, "learning_rate": 2.542529104074087e-05, "loss": 2.9151, "step": 53135 }, { "epoch": 2.6, "grad_norm": 0.7265602350234985, "learning_rate": 2.541908845293563e-05, "loss": 3.0349, "step": 53136 }, { "epoch": 2.6, "grad_norm": 0.721573531627655, "learning_rate": 2.5412886588323634e-05, "loss": 2.9599, "step": 53137 }, { "epoch": 2.6, "grad_norm": 0.7648131251335144, "learning_rate": 2.5406685446921228e-05, "loss": 2.9716, "step": 53138 }, { "epoch": 2.6, "grad_norm": 0.7431436777114868, "learning_rate": 2.5400485028744833e-05, "loss": 2.8647, "step": 53139 }, { "epoch": 2.6, "grad_norm": 0.7635769844055176, "learning_rate": 2.5394285333810637e-05, "loss": 3.0528, "step": 53140 }, { "epoch": 2.6, "grad_norm": 0.7431437373161316, "learning_rate": 2.538808636213513e-05, "loss": 3.0272, "step": 53141 }, { "epoch": 2.6, "grad_norm": 0.735590934753418, "learning_rate": 2.5381888113734528e-05, "loss": 2.8362, "step": 53142 }, { "epoch": 2.6, "grad_norm": 0.7066057920455933, "learning_rate": 2.537569058862512e-05, "loss": 3.017, "step": 53143 }, { "epoch": 2.6, "grad_norm": 0.7370970845222473, "learning_rate": 2.5369493786823358e-05, "loss": 2.9394, "step": 53144 }, { "epoch": 2.6, "grad_norm": 0.721082866191864, "learning_rate": 2.5363297708345465e-05, "loss": 2.8916, "step": 53145 }, { "epoch": 2.6, "grad_norm": 0.7915112376213074, "learning_rate": 2.5357102353207827e-05, "loss": 2.8815, "step": 53146 }, { "epoch": 2.6, "grad_norm": 0.7461155652999878, "learning_rate": 2.5350907721426663e-05, "loss": 2.838, "step": 53147 }, { "epoch": 2.6, "grad_norm": 0.7456420660018921, "learning_rate": 2.5344713813018425e-05, "loss": 2.8849, "step": 53148 }, { "epoch": 2.6, "grad_norm": 0.7861379384994507, "learning_rate": 2.5338520627999337e-05, "loss": 2.6795, "step": 53149 }, { "epoch": 2.6, "grad_norm": 0.7006232738494873, "learning_rate": 2.533232816638565e-05, "loss": 3.0351, "step": 53150 }, { "epoch": 2.6, "grad_norm": 0.7393845319747925, "learning_rate": 2.5326136428193823e-05, "loss": 2.7669, "step": 53151 }, { "epoch": 2.6, "grad_norm": 0.7685011029243469, "learning_rate": 2.531994541344007e-05, "loss": 2.8029, "step": 53152 }, { "epoch": 2.6, "grad_norm": 0.7489452958106995, "learning_rate": 2.531375512214071e-05, "loss": 3.1667, "step": 53153 }, { "epoch": 2.6, "grad_norm": 0.7370985150337219, "learning_rate": 2.5307565554312038e-05, "loss": 2.8493, "step": 53154 }, { "epoch": 2.6, "grad_norm": 0.7306023240089417, "learning_rate": 2.5301376709970368e-05, "loss": 2.9711, "step": 53155 }, { "epoch": 2.61, "grad_norm": 0.7589641809463501, "learning_rate": 2.5295188589132054e-05, "loss": 2.9249, "step": 53156 }, { "epoch": 2.61, "grad_norm": 0.7181271314620972, "learning_rate": 2.5289001191813353e-05, "loss": 2.8004, "step": 53157 }, { "epoch": 2.61, "grad_norm": 0.765256941318512, "learning_rate": 2.5282814518030547e-05, "loss": 2.964, "step": 53158 }, { "epoch": 2.61, "grad_norm": 0.6980361342430115, "learning_rate": 2.5276628567799894e-05, "loss": 2.819, "step": 53159 }, { "epoch": 2.61, "grad_norm": 0.6991076469421387, "learning_rate": 2.5270443341137714e-05, "loss": 3.0081, "step": 53160 }, { "epoch": 2.61, "grad_norm": 0.8100741505622864, "learning_rate": 2.526425883806039e-05, "loss": 2.9064, "step": 53161 }, { "epoch": 2.61, "grad_norm": 0.7279528975486755, "learning_rate": 2.5258075058584047e-05, "loss": 3.0166, "step": 53162 }, { "epoch": 2.61, "grad_norm": 0.7336760759353638, "learning_rate": 2.5251892002725138e-05, "loss": 2.7742, "step": 53163 }, { "epoch": 2.61, "grad_norm": 0.7446197271347046, "learning_rate": 2.524570967049988e-05, "loss": 3.0359, "step": 53164 }, { "epoch": 2.61, "grad_norm": 0.7815460562705994, "learning_rate": 2.5239528061924462e-05, "loss": 3.029, "step": 53165 }, { "epoch": 2.61, "grad_norm": 0.8095081448554993, "learning_rate": 2.5233347177015307e-05, "loss": 2.8958, "step": 53166 }, { "epoch": 2.61, "grad_norm": 0.7752934694290161, "learning_rate": 2.5227167015788597e-05, "loss": 2.8065, "step": 53167 }, { "epoch": 2.61, "grad_norm": 0.7434821724891663, "learning_rate": 2.5220987578260688e-05, "loss": 2.9032, "step": 53168 }, { "epoch": 2.61, "grad_norm": 0.7885562181472778, "learning_rate": 2.5214808864447765e-05, "loss": 3.0296, "step": 53169 }, { "epoch": 2.61, "grad_norm": 0.7058457136154175, "learning_rate": 2.520863087436622e-05, "loss": 2.7885, "step": 53170 }, { "epoch": 2.61, "grad_norm": 0.7025486826896667, "learning_rate": 2.5202453608032236e-05, "loss": 2.8992, "step": 53171 }, { "epoch": 2.61, "grad_norm": 0.7743384838104248, "learning_rate": 2.5196277065462066e-05, "loss": 2.8032, "step": 53172 }, { "epoch": 2.61, "grad_norm": 0.7513494491577148, "learning_rate": 2.5190101246672068e-05, "loss": 2.8837, "step": 53173 }, { "epoch": 2.61, "grad_norm": 0.7396803498268127, "learning_rate": 2.5183926151678392e-05, "loss": 2.8922, "step": 53174 }, { "epoch": 2.61, "grad_norm": 0.7196879386901855, "learning_rate": 2.5177751780497358e-05, "loss": 3.0125, "step": 53175 }, { "epoch": 2.61, "grad_norm": 0.7186749577522278, "learning_rate": 2.517157813314532e-05, "loss": 2.7121, "step": 53176 }, { "epoch": 2.61, "grad_norm": 0.7624224424362183, "learning_rate": 2.5165405209638366e-05, "loss": 2.9514, "step": 53177 }, { "epoch": 2.61, "grad_norm": 0.7520177364349365, "learning_rate": 2.5159233009992917e-05, "loss": 3.0939, "step": 53178 }, { "epoch": 2.61, "grad_norm": 0.7938098907470703, "learning_rate": 2.5153061534225128e-05, "loss": 2.8881, "step": 53179 }, { "epoch": 2.61, "grad_norm": 0.7314983606338501, "learning_rate": 2.514689078235125e-05, "loss": 2.8824, "step": 53180 }, { "epoch": 2.61, "grad_norm": 0.7542241811752319, "learning_rate": 2.5140720754387634e-05, "loss": 2.9394, "step": 53181 }, { "epoch": 2.61, "grad_norm": 0.7400225400924683, "learning_rate": 2.513455145035037e-05, "loss": 2.7797, "step": 53182 }, { "epoch": 2.61, "grad_norm": 0.747697651386261, "learning_rate": 2.5128382870255882e-05, "loss": 2.9337, "step": 53183 }, { "epoch": 2.61, "grad_norm": 0.8165611624717712, "learning_rate": 2.5122215014120252e-05, "loss": 2.7655, "step": 53184 }, { "epoch": 2.61, "grad_norm": 0.7541787624359131, "learning_rate": 2.5116047881959866e-05, "loss": 2.9598, "step": 53185 }, { "epoch": 2.61, "grad_norm": 0.7399340271949768, "learning_rate": 2.5109881473790914e-05, "loss": 2.9438, "step": 53186 }, { "epoch": 2.61, "grad_norm": 0.7630729079246521, "learning_rate": 2.5103715789629585e-05, "loss": 2.9433, "step": 53187 }, { "epoch": 2.61, "grad_norm": 0.7590308785438538, "learning_rate": 2.509755082949223e-05, "loss": 3.0062, "step": 53188 }, { "epoch": 2.61, "grad_norm": 0.7435056567192078, "learning_rate": 2.5091386593394935e-05, "loss": 2.7753, "step": 53189 }, { "epoch": 2.61, "grad_norm": 0.7512586712837219, "learning_rate": 2.5085223081354023e-05, "loss": 2.9262, "step": 53190 }, { "epoch": 2.61, "grad_norm": 0.723823070526123, "learning_rate": 2.507906029338581e-05, "loss": 2.9895, "step": 53191 }, { "epoch": 2.61, "grad_norm": 0.743118166923523, "learning_rate": 2.5072898229506423e-05, "loss": 2.8739, "step": 53192 }, { "epoch": 2.61, "grad_norm": 0.7139184474945068, "learning_rate": 2.506673688973211e-05, "loss": 2.7876, "step": 53193 }, { "epoch": 2.61, "grad_norm": 0.8011366724967957, "learning_rate": 2.5060576274079024e-05, "loss": 3.0433, "step": 53194 }, { "epoch": 2.61, "grad_norm": 0.7116770148277283, "learning_rate": 2.505441638256349e-05, "loss": 3.1634, "step": 53195 }, { "epoch": 2.61, "grad_norm": 0.7735740542411804, "learning_rate": 2.5048257215201762e-05, "loss": 2.8701, "step": 53196 }, { "epoch": 2.61, "grad_norm": 0.7631982564926147, "learning_rate": 2.5042098772009955e-05, "loss": 2.9832, "step": 53197 }, { "epoch": 2.61, "grad_norm": 0.7225585579872131, "learning_rate": 2.5035941053004392e-05, "loss": 3.0262, "step": 53198 }, { "epoch": 2.61, "grad_norm": 0.744941234588623, "learning_rate": 2.5029784058201197e-05, "loss": 2.9931, "step": 53199 }, { "epoch": 2.61, "grad_norm": 0.7380294799804688, "learning_rate": 2.5023627787616685e-05, "loss": 2.8191, "step": 53200 }, { "epoch": 2.61, "grad_norm": 0.6965009570121765, "learning_rate": 2.501747224126701e-05, "loss": 2.9667, "step": 53201 }, { "epoch": 2.61, "grad_norm": 0.767867922782898, "learning_rate": 2.501131741916833e-05, "loss": 2.8884, "step": 53202 }, { "epoch": 2.61, "grad_norm": 0.7580838203430176, "learning_rate": 2.5005163321336996e-05, "loss": 2.9514, "step": 53203 }, { "epoch": 2.61, "grad_norm": 0.7149063944816589, "learning_rate": 2.499900994778906e-05, "loss": 2.8192, "step": 53204 }, { "epoch": 2.61, "grad_norm": 0.771682858467102, "learning_rate": 2.4992857298540813e-05, "loss": 2.8842, "step": 53205 }, { "epoch": 2.61, "grad_norm": 0.7764263153076172, "learning_rate": 2.498670537360854e-05, "loss": 2.8959, "step": 53206 }, { "epoch": 2.61, "grad_norm": 0.7236824631690979, "learning_rate": 2.4980554173008326e-05, "loss": 2.844, "step": 53207 }, { "epoch": 2.61, "grad_norm": 0.7100843191146851, "learning_rate": 2.4974403696756394e-05, "loss": 2.8599, "step": 53208 }, { "epoch": 2.61, "grad_norm": 0.7423948049545288, "learning_rate": 2.4968253944868933e-05, "loss": 2.8558, "step": 53209 }, { "epoch": 2.61, "grad_norm": 0.698421061038971, "learning_rate": 2.4962104917362124e-05, "loss": 2.9021, "step": 53210 }, { "epoch": 2.61, "grad_norm": 0.7575321793556213, "learning_rate": 2.495595661425226e-05, "loss": 3.1162, "step": 53211 }, { "epoch": 2.61, "grad_norm": 0.710386335849762, "learning_rate": 2.4949809035555425e-05, "loss": 2.816, "step": 53212 }, { "epoch": 2.61, "grad_norm": 0.7282596826553345, "learning_rate": 2.4943662181287937e-05, "loss": 2.9855, "step": 53213 }, { "epoch": 2.61, "grad_norm": 0.7500553727149963, "learning_rate": 2.4937516051465888e-05, "loss": 2.7933, "step": 53214 }, { "epoch": 2.61, "grad_norm": 0.8065747022628784, "learning_rate": 2.493137064610543e-05, "loss": 2.8999, "step": 53215 }, { "epoch": 2.61, "grad_norm": 0.7221754789352417, "learning_rate": 2.492522596522285e-05, "loss": 2.7116, "step": 53216 }, { "epoch": 2.61, "grad_norm": 0.7535700798034668, "learning_rate": 2.4919082008834236e-05, "loss": 2.842, "step": 53217 }, { "epoch": 2.61, "grad_norm": 0.7541929483413696, "learning_rate": 2.4912938776955872e-05, "loss": 2.9126, "step": 53218 }, { "epoch": 2.61, "grad_norm": 0.7476422190666199, "learning_rate": 2.4906796269603847e-05, "loss": 2.9692, "step": 53219 }, { "epoch": 2.61, "grad_norm": 0.7244073152542114, "learning_rate": 2.4900654486794346e-05, "loss": 2.8498, "step": 53220 }, { "epoch": 2.61, "grad_norm": 0.7529792189598083, "learning_rate": 2.489451342854366e-05, "loss": 3.0183, "step": 53221 }, { "epoch": 2.61, "grad_norm": 0.7540156841278076, "learning_rate": 2.4888373094867876e-05, "loss": 2.6817, "step": 53222 }, { "epoch": 2.61, "grad_norm": 0.7746258974075317, "learning_rate": 2.4882233485783144e-05, "loss": 3.0357, "step": 53223 }, { "epoch": 2.61, "grad_norm": 0.7871257662773132, "learning_rate": 2.487609460130565e-05, "loss": 2.6511, "step": 53224 }, { "epoch": 2.61, "grad_norm": 0.7155481576919556, "learning_rate": 2.4869956441451587e-05, "loss": 3.1273, "step": 53225 }, { "epoch": 2.61, "grad_norm": 0.7250311970710754, "learning_rate": 2.4863819006237072e-05, "loss": 2.9099, "step": 53226 }, { "epoch": 2.61, "grad_norm": 0.7574231028556824, "learning_rate": 2.4857682295678326e-05, "loss": 3.0626, "step": 53227 }, { "epoch": 2.61, "grad_norm": 0.7355062961578369, "learning_rate": 2.4851546309791536e-05, "loss": 2.9971, "step": 53228 }, { "epoch": 2.61, "grad_norm": 0.7345170974731445, "learning_rate": 2.484541104859282e-05, "loss": 3.0486, "step": 53229 }, { "epoch": 2.61, "grad_norm": 0.6943804621696472, "learning_rate": 2.4839276512098304e-05, "loss": 2.799, "step": 53230 }, { "epoch": 2.61, "grad_norm": 0.7673830389976501, "learning_rate": 2.483314270032417e-05, "loss": 2.7265, "step": 53231 }, { "epoch": 2.61, "grad_norm": 0.7196083068847656, "learning_rate": 2.4827009613286575e-05, "loss": 2.816, "step": 53232 }, { "epoch": 2.61, "grad_norm": 0.734291136264801, "learning_rate": 2.4820877251001702e-05, "loss": 2.8073, "step": 53233 }, { "epoch": 2.61, "grad_norm": 0.7641286253929138, "learning_rate": 2.4814745613485643e-05, "loss": 3.1057, "step": 53234 }, { "epoch": 2.61, "grad_norm": 0.7599497437477112, "learning_rate": 2.4808614700754647e-05, "loss": 2.8137, "step": 53235 }, { "epoch": 2.61, "grad_norm": 0.7263058423995972, "learning_rate": 2.480248451282474e-05, "loss": 2.8649, "step": 53236 }, { "epoch": 2.61, "grad_norm": 0.7398238182067871, "learning_rate": 2.479635504971217e-05, "loss": 2.7207, "step": 53237 }, { "epoch": 2.61, "grad_norm": 0.7094271779060364, "learning_rate": 2.4790226311433025e-05, "loss": 2.9685, "step": 53238 }, { "epoch": 2.61, "grad_norm": 0.7355355620384216, "learning_rate": 2.478409829800343e-05, "loss": 2.8561, "step": 53239 }, { "epoch": 2.61, "grad_norm": 0.747589111328125, "learning_rate": 2.4777971009439602e-05, "loss": 2.9834, "step": 53240 }, { "epoch": 2.61, "grad_norm": 0.7668625116348267, "learning_rate": 2.477184444575756e-05, "loss": 2.7875, "step": 53241 }, { "epoch": 2.61, "grad_norm": 0.7156922221183777, "learning_rate": 2.476571860697353e-05, "loss": 2.7711, "step": 53242 }, { "epoch": 2.61, "grad_norm": 0.7356650233268738, "learning_rate": 2.475959349310369e-05, "loss": 2.8436, "step": 53243 }, { "epoch": 2.61, "grad_norm": 0.7467832565307617, "learning_rate": 2.47534691041641e-05, "loss": 2.7338, "step": 53244 }, { "epoch": 2.61, "grad_norm": 0.7915269732475281, "learning_rate": 2.4747345440170885e-05, "loss": 2.8669, "step": 53245 }, { "epoch": 2.61, "grad_norm": 0.7523009181022644, "learning_rate": 2.4741222501140155e-05, "loss": 2.8385, "step": 53246 }, { "epoch": 2.61, "grad_norm": 0.8412205576896667, "learning_rate": 2.4735100287088038e-05, "loss": 3.0301, "step": 53247 }, { "epoch": 2.61, "grad_norm": 0.7007306814193726, "learning_rate": 2.4728978798030787e-05, "loss": 3.0305, "step": 53248 }, { "epoch": 2.61, "grad_norm": 0.7333875298500061, "learning_rate": 2.472285803398435e-05, "loss": 2.9612, "step": 53249 }, { "epoch": 2.61, "grad_norm": 0.8363428711891174, "learning_rate": 2.4716737994964986e-05, "loss": 2.8032, "step": 53250 }, { "epoch": 2.61, "grad_norm": 0.7706584334373474, "learning_rate": 2.4710618680988746e-05, "loss": 3.0626, "step": 53251 }, { "epoch": 2.61, "grad_norm": 0.7772002816200256, "learning_rate": 2.4704500092071723e-05, "loss": 2.8498, "step": 53252 }, { "epoch": 2.61, "grad_norm": 0.7295860648155212, "learning_rate": 2.4698382228230095e-05, "loss": 2.9828, "step": 53253 }, { "epoch": 2.61, "grad_norm": 0.763123631477356, "learning_rate": 2.4692265089479922e-05, "loss": 2.9188, "step": 53254 }, { "epoch": 2.61, "grad_norm": 0.8055310845375061, "learning_rate": 2.468614867583736e-05, "loss": 2.993, "step": 53255 }, { "epoch": 2.61, "grad_norm": 0.7372824549674988, "learning_rate": 2.4680032987318454e-05, "loss": 2.5725, "step": 53256 }, { "epoch": 2.61, "grad_norm": 0.8814607262611389, "learning_rate": 2.4673918023939364e-05, "loss": 2.8363, "step": 53257 }, { "epoch": 2.61, "grad_norm": 0.7216055393218994, "learning_rate": 2.4667803785716245e-05, "loss": 2.7199, "step": 53258 }, { "epoch": 2.61, "grad_norm": 0.7859689593315125, "learning_rate": 2.4661690272665114e-05, "loss": 2.9632, "step": 53259 }, { "epoch": 2.61, "grad_norm": 0.7562228441238403, "learning_rate": 2.4655577484802125e-05, "loss": 2.9153, "step": 53260 }, { "epoch": 2.61, "grad_norm": 0.7715989351272583, "learning_rate": 2.46494654221433e-05, "loss": 2.8636, "step": 53261 }, { "epoch": 2.61, "grad_norm": 0.7629818916320801, "learning_rate": 2.464335408470479e-05, "loss": 3.0001, "step": 53262 }, { "epoch": 2.61, "grad_norm": 0.8690528273582458, "learning_rate": 2.4637243472502723e-05, "loss": 2.9002, "step": 53263 }, { "epoch": 2.61, "grad_norm": 0.7542612552642822, "learning_rate": 2.4631133585553144e-05, "loss": 2.8384, "step": 53264 }, { "epoch": 2.61, "grad_norm": 0.7296197414398193, "learning_rate": 2.462502442387221e-05, "loss": 2.9057, "step": 53265 }, { "epoch": 2.61, "grad_norm": 0.7588483095169067, "learning_rate": 2.461891598747594e-05, "loss": 2.5271, "step": 53266 }, { "epoch": 2.61, "grad_norm": 0.750971794128418, "learning_rate": 2.461280827638039e-05, "loss": 2.8642, "step": 53267 }, { "epoch": 2.61, "grad_norm": 0.7146446704864502, "learning_rate": 2.4606701290601783e-05, "loss": 2.8347, "step": 53268 }, { "epoch": 2.61, "grad_norm": 0.755339503288269, "learning_rate": 2.4600595030156066e-05, "loss": 2.8039, "step": 53269 }, { "epoch": 2.61, "grad_norm": 0.7227151393890381, "learning_rate": 2.4594489495059434e-05, "loss": 3.0266, "step": 53270 }, { "epoch": 2.61, "grad_norm": 0.7845844626426697, "learning_rate": 2.4588384685327865e-05, "loss": 2.7491, "step": 53271 }, { "epoch": 2.61, "grad_norm": 0.7608388066291809, "learning_rate": 2.4582280600977522e-05, "loss": 3.0839, "step": 53272 }, { "epoch": 2.61, "grad_norm": 0.7561764121055603, "learning_rate": 2.4576177242024453e-05, "loss": 3.1059, "step": 53273 }, { "epoch": 2.61, "grad_norm": 0.7092791199684143, "learning_rate": 2.457007460848468e-05, "loss": 2.9953, "step": 53274 }, { "epoch": 2.61, "grad_norm": 0.7406812906265259, "learning_rate": 2.4563972700374357e-05, "loss": 3.1362, "step": 53275 }, { "epoch": 2.61, "grad_norm": 0.7598647475242615, "learning_rate": 2.4557871517709503e-05, "loss": 2.6158, "step": 53276 }, { "epoch": 2.61, "grad_norm": 0.7269623279571533, "learning_rate": 2.4551771060506177e-05, "loss": 2.9399, "step": 53277 }, { "epoch": 2.61, "grad_norm": 0.7937368154525757, "learning_rate": 2.4545671328780526e-05, "loss": 2.7327, "step": 53278 }, { "epoch": 2.61, "grad_norm": 0.7488956451416016, "learning_rate": 2.453957232254854e-05, "loss": 2.8383, "step": 53279 }, { "epoch": 2.61, "grad_norm": 0.7417857646942139, "learning_rate": 2.4533474041826338e-05, "loss": 2.8689, "step": 53280 }, { "epoch": 2.61, "grad_norm": 0.7532116770744324, "learning_rate": 2.4527376486629945e-05, "loss": 2.963, "step": 53281 }, { "epoch": 2.61, "grad_norm": 0.7646198868751526, "learning_rate": 2.452127965697538e-05, "loss": 3.0262, "step": 53282 }, { "epoch": 2.61, "grad_norm": 0.7706944346427917, "learning_rate": 2.4515183552878824e-05, "loss": 2.781, "step": 53283 }, { "epoch": 2.61, "grad_norm": 0.7329809665679932, "learning_rate": 2.450908817435617e-05, "loss": 2.8377, "step": 53284 }, { "epoch": 2.61, "grad_norm": 0.7508072257041931, "learning_rate": 2.4502993521423643e-05, "loss": 2.8186, "step": 53285 }, { "epoch": 2.61, "grad_norm": 0.755642831325531, "learning_rate": 2.4496899594097118e-05, "loss": 2.888, "step": 53286 }, { "epoch": 2.61, "grad_norm": 0.7248871922492981, "learning_rate": 2.4490806392392826e-05, "loss": 2.7755, "step": 53287 }, { "epoch": 2.61, "grad_norm": 0.7461181879043579, "learning_rate": 2.4484713916326715e-05, "loss": 2.8476, "step": 53288 }, { "epoch": 2.61, "grad_norm": 0.7649577856063843, "learning_rate": 2.4478622165914774e-05, "loss": 2.784, "step": 53289 }, { "epoch": 2.61, "grad_norm": 0.7114865779876709, "learning_rate": 2.447253114117319e-05, "loss": 2.9854, "step": 53290 }, { "epoch": 2.61, "grad_norm": 0.7199516892433167, "learning_rate": 2.4466440842117886e-05, "loss": 2.9036, "step": 53291 }, { "epoch": 2.61, "grad_norm": 0.7920954823493958, "learning_rate": 2.4460351268764943e-05, "loss": 2.7358, "step": 53292 }, { "epoch": 2.61, "grad_norm": 0.762523889541626, "learning_rate": 2.4454262421130456e-05, "loss": 2.8051, "step": 53293 }, { "epoch": 2.61, "grad_norm": 0.7442542910575867, "learning_rate": 2.4448174299230404e-05, "loss": 2.7057, "step": 53294 }, { "epoch": 2.61, "grad_norm": 0.7322526574134827, "learning_rate": 2.4442086903080815e-05, "loss": 2.954, "step": 53295 }, { "epoch": 2.61, "grad_norm": 0.7498273849487305, "learning_rate": 2.4436000232697706e-05, "loss": 2.9617, "step": 53296 }, { "epoch": 2.61, "grad_norm": 0.8006744384765625, "learning_rate": 2.4429914288097162e-05, "loss": 2.8717, "step": 53297 }, { "epoch": 2.61, "grad_norm": 0.7188971638679504, "learning_rate": 2.4423829069295207e-05, "loss": 2.9337, "step": 53298 }, { "epoch": 2.61, "grad_norm": 0.728274941444397, "learning_rate": 2.4417744576307796e-05, "loss": 2.8373, "step": 53299 }, { "epoch": 2.61, "grad_norm": 0.7777703404426575, "learning_rate": 2.441166080915108e-05, "loss": 2.7575, "step": 53300 }, { "epoch": 2.61, "grad_norm": 0.8000141978263855, "learning_rate": 2.4405577767840946e-05, "loss": 3.0339, "step": 53301 }, { "epoch": 2.61, "grad_norm": 0.749862551689148, "learning_rate": 2.4399495452393547e-05, "loss": 2.854, "step": 53302 }, { "epoch": 2.61, "grad_norm": 0.7590504884719849, "learning_rate": 2.439341386282484e-05, "loss": 2.7715, "step": 53303 }, { "epoch": 2.61, "grad_norm": 0.7074437737464905, "learning_rate": 2.4387332999150778e-05, "loss": 2.9909, "step": 53304 }, { "epoch": 2.61, "grad_norm": 0.7344616651535034, "learning_rate": 2.4381252861387478e-05, "loss": 3.0727, "step": 53305 }, { "epoch": 2.61, "grad_norm": 0.7661343812942505, "learning_rate": 2.43751734495509e-05, "loss": 2.9525, "step": 53306 }, { "epoch": 2.61, "grad_norm": 0.7796639204025269, "learning_rate": 2.4369094763657093e-05, "loss": 2.9349, "step": 53307 }, { "epoch": 2.61, "grad_norm": 0.749068021774292, "learning_rate": 2.4363016803721978e-05, "loss": 3.0218, "step": 53308 }, { "epoch": 2.61, "grad_norm": 0.7495163083076477, "learning_rate": 2.4356939569761713e-05, "loss": 2.7993, "step": 53309 }, { "epoch": 2.61, "grad_norm": 0.7630246877670288, "learning_rate": 2.435086306179218e-05, "loss": 2.8242, "step": 53310 }, { "epoch": 2.61, "grad_norm": 0.7312656044960022, "learning_rate": 2.43447872798294e-05, "loss": 2.8078, "step": 53311 }, { "epoch": 2.61, "grad_norm": 0.7347916960716248, "learning_rate": 2.4338712223889467e-05, "loss": 2.7272, "step": 53312 }, { "epoch": 2.61, "grad_norm": 0.7420660853385925, "learning_rate": 2.4332637893988228e-05, "loss": 3.0838, "step": 53313 }, { "epoch": 2.61, "grad_norm": 0.7539196014404297, "learning_rate": 2.4326564290141802e-05, "loss": 2.8889, "step": 53314 }, { "epoch": 2.61, "grad_norm": 0.7311657071113586, "learning_rate": 2.4320491412366183e-05, "loss": 2.8895, "step": 53315 }, { "epoch": 2.61, "grad_norm": 0.7519007921218872, "learning_rate": 2.431441926067732e-05, "loss": 2.7742, "step": 53316 }, { "epoch": 2.61, "grad_norm": 0.7469418048858643, "learning_rate": 2.4308347835091234e-05, "loss": 2.7949, "step": 53317 }, { "epoch": 2.61, "grad_norm": 0.7581943273544312, "learning_rate": 2.4302277135623883e-05, "loss": 2.8109, "step": 53318 }, { "epoch": 2.61, "grad_norm": 0.7415329813957214, "learning_rate": 2.4296207162291213e-05, "loss": 3.1124, "step": 53319 }, { "epoch": 2.61, "grad_norm": 0.784935712814331, "learning_rate": 2.4290137915109386e-05, "loss": 2.8482, "step": 53320 }, { "epoch": 2.61, "grad_norm": 0.6955008506774902, "learning_rate": 2.4284069394094187e-05, "loss": 2.8681, "step": 53321 }, { "epoch": 2.61, "grad_norm": 0.7275387644767761, "learning_rate": 2.4278001599261732e-05, "loss": 2.9916, "step": 53322 }, { "epoch": 2.61, "grad_norm": 0.7459524273872375, "learning_rate": 2.4271934530627913e-05, "loss": 2.751, "step": 53323 }, { "epoch": 2.61, "grad_norm": 0.7747052311897278, "learning_rate": 2.4265868188208813e-05, "loss": 2.9919, "step": 53324 }, { "epoch": 2.61, "grad_norm": 0.7346550822257996, "learning_rate": 2.425980257202036e-05, "loss": 2.6747, "step": 53325 }, { "epoch": 2.61, "grad_norm": 0.7680572271347046, "learning_rate": 2.4253737682078433e-05, "loss": 2.7634, "step": 53326 }, { "epoch": 2.61, "grad_norm": 0.7524115443229675, "learning_rate": 2.424767351839919e-05, "loss": 3.0331, "step": 53327 }, { "epoch": 2.61, "grad_norm": 0.7559760808944702, "learning_rate": 2.424161008099842e-05, "loss": 2.8906, "step": 53328 }, { "epoch": 2.61, "grad_norm": 0.7822624444961548, "learning_rate": 2.4235547369892174e-05, "loss": 2.6455, "step": 53329 }, { "epoch": 2.61, "grad_norm": 0.6873143315315247, "learning_rate": 2.4229485385096504e-05, "loss": 2.9405, "step": 53330 }, { "epoch": 2.61, "grad_norm": 0.7661668658256531, "learning_rate": 2.4223424126627267e-05, "loss": 2.9843, "step": 53331 }, { "epoch": 2.61, "grad_norm": 0.7725754380226135, "learning_rate": 2.4217363594500484e-05, "loss": 2.8333, "step": 53332 }, { "epoch": 2.61, "grad_norm": 0.7318544983863831, "learning_rate": 2.421130378873204e-05, "loss": 2.8713, "step": 53333 }, { "epoch": 2.61, "grad_norm": 0.7380740642547607, "learning_rate": 2.4205244709337923e-05, "loss": 3.1337, "step": 53334 }, { "epoch": 2.61, "grad_norm": 0.7293857932090759, "learning_rate": 2.4199186356334156e-05, "loss": 2.7604, "step": 53335 }, { "epoch": 2.61, "grad_norm": 0.8121163845062256, "learning_rate": 2.419312872973662e-05, "loss": 2.935, "step": 53336 }, { "epoch": 2.61, "grad_norm": 0.7472822070121765, "learning_rate": 2.4187071829561376e-05, "loss": 2.801, "step": 53337 }, { "epoch": 2.61, "grad_norm": 0.7153192758560181, "learning_rate": 2.418101565582421e-05, "loss": 2.7313, "step": 53338 }, { "epoch": 2.61, "grad_norm": 0.7651809453964233, "learning_rate": 2.4174960208541238e-05, "loss": 3.0331, "step": 53339 }, { "epoch": 2.61, "grad_norm": 0.7362973093986511, "learning_rate": 2.4168905487728353e-05, "loss": 2.9481, "step": 53340 }, { "epoch": 2.61, "grad_norm": 0.8571172952651978, "learning_rate": 2.4162851493401403e-05, "loss": 2.7162, "step": 53341 }, { "epoch": 2.61, "grad_norm": 0.766938328742981, "learning_rate": 2.4156798225576478e-05, "loss": 3.0625, "step": 53342 }, { "epoch": 2.61, "grad_norm": 0.7803354859352112, "learning_rate": 2.4150745684269436e-05, "loss": 2.8998, "step": 53343 }, { "epoch": 2.61, "grad_norm": 0.7079799771308899, "learning_rate": 2.414469386949619e-05, "loss": 2.8232, "step": 53344 }, { "epoch": 2.61, "grad_norm": 0.7367957830429077, "learning_rate": 2.413864278127283e-05, "loss": 2.5717, "step": 53345 }, { "epoch": 2.61, "grad_norm": 0.7332743406295776, "learning_rate": 2.4132592419615184e-05, "loss": 2.8145, "step": 53346 }, { "epoch": 2.61, "grad_norm": 0.724522590637207, "learning_rate": 2.412654278453916e-05, "loss": 3.0007, "step": 53347 }, { "epoch": 2.61, "grad_norm": 0.7491048574447632, "learning_rate": 2.412049387606072e-05, "loss": 2.6494, "step": 53348 }, { "epoch": 2.61, "grad_norm": 0.7689740061759949, "learning_rate": 2.4114445694195783e-05, "loss": 2.8103, "step": 53349 }, { "epoch": 2.61, "grad_norm": 0.8025519251823425, "learning_rate": 2.410839823896037e-05, "loss": 2.7399, "step": 53350 }, { "epoch": 2.61, "grad_norm": 0.7537023425102234, "learning_rate": 2.4102351510370267e-05, "loss": 2.8394, "step": 53351 }, { "epoch": 2.61, "grad_norm": 0.7922477722167969, "learning_rate": 2.409630550844156e-05, "loss": 2.9793, "step": 53352 }, { "epoch": 2.61, "grad_norm": 0.77238529920578, "learning_rate": 2.409026023319004e-05, "loss": 2.83, "step": 53353 }, { "epoch": 2.61, "grad_norm": 0.7189983129501343, "learning_rate": 2.408421568463166e-05, "loss": 3.0147, "step": 53354 }, { "epoch": 2.61, "grad_norm": 0.7614080905914307, "learning_rate": 2.4078171862782403e-05, "loss": 2.7063, "step": 53355 }, { "epoch": 2.61, "grad_norm": 0.749724805355072, "learning_rate": 2.4072128767658062e-05, "loss": 2.812, "step": 53356 }, { "epoch": 2.61, "grad_norm": 0.7478910684585571, "learning_rate": 2.4066086399274687e-05, "loss": 2.8834, "step": 53357 }, { "epoch": 2.61, "grad_norm": 0.8119695782661438, "learning_rate": 2.4060044757648102e-05, "loss": 3.0316, "step": 53358 }, { "epoch": 2.61, "grad_norm": 0.7431463003158569, "learning_rate": 2.4054003842794257e-05, "loss": 2.8187, "step": 53359 }, { "epoch": 2.62, "grad_norm": 0.7376152276992798, "learning_rate": 2.4047963654729075e-05, "loss": 2.7177, "step": 53360 }, { "epoch": 2.62, "grad_norm": 0.7031460404396057, "learning_rate": 2.4041924193468475e-05, "loss": 2.753, "step": 53361 }, { "epoch": 2.62, "grad_norm": 0.7614917755126953, "learning_rate": 2.4035885459028347e-05, "loss": 2.8834, "step": 53362 }, { "epoch": 2.62, "grad_norm": 0.7040819525718689, "learning_rate": 2.4029847451424512e-05, "loss": 2.9195, "step": 53363 }, { "epoch": 2.62, "grad_norm": 0.7586194276809692, "learning_rate": 2.4023810170672954e-05, "loss": 3.0339, "step": 53364 }, { "epoch": 2.62, "grad_norm": 0.7714332938194275, "learning_rate": 2.4017773616789628e-05, "loss": 2.9253, "step": 53365 }, { "epoch": 2.62, "grad_norm": 0.7194476127624512, "learning_rate": 2.401173778979032e-05, "loss": 2.8969, "step": 53366 }, { "epoch": 2.62, "grad_norm": 0.7926198840141296, "learning_rate": 2.400570268969102e-05, "loss": 3.1839, "step": 53367 }, { "epoch": 2.62, "grad_norm": 0.7163087129592896, "learning_rate": 2.3999668316507582e-05, "loss": 2.7451, "step": 53368 }, { "epoch": 2.62, "grad_norm": 0.7153766751289368, "learning_rate": 2.3993634670255856e-05, "loss": 2.8174, "step": 53369 }, { "epoch": 2.62, "grad_norm": 0.7822369933128357, "learning_rate": 2.3987601750951835e-05, "loss": 2.7719, "step": 53370 }, { "epoch": 2.62, "grad_norm": 0.7908663749694824, "learning_rate": 2.3981569558611303e-05, "loss": 2.7651, "step": 53371 }, { "epoch": 2.62, "grad_norm": 0.7336825132369995, "learning_rate": 2.3975538093250245e-05, "loss": 2.8495, "step": 53372 }, { "epoch": 2.62, "grad_norm": 0.8331197500228882, "learning_rate": 2.396950735488442e-05, "loss": 2.895, "step": 53373 }, { "epoch": 2.62, "grad_norm": 0.7461316585540771, "learning_rate": 2.396347734352988e-05, "loss": 2.8857, "step": 53374 }, { "epoch": 2.62, "grad_norm": 0.7184967994689941, "learning_rate": 2.395744805920238e-05, "loss": 2.9948, "step": 53375 }, { "epoch": 2.62, "grad_norm": 0.8150850534439087, "learning_rate": 2.39514195019178e-05, "loss": 2.8987, "step": 53376 }, { "epoch": 2.62, "grad_norm": 0.750407338142395, "learning_rate": 2.3945391671692138e-05, "loss": 3.0958, "step": 53377 }, { "epoch": 2.62, "grad_norm": 0.7473511099815369, "learning_rate": 2.3939364568541075e-05, "loss": 2.9058, "step": 53378 }, { "epoch": 2.62, "grad_norm": 0.7648722529411316, "learning_rate": 2.39333381924807e-05, "loss": 2.9031, "step": 53379 }, { "epoch": 2.62, "grad_norm": 0.7389246821403503, "learning_rate": 2.39273125435267e-05, "loss": 3.0294, "step": 53380 }, { "epoch": 2.62, "grad_norm": 0.7998449206352234, "learning_rate": 2.392128762169503e-05, "loss": 2.9568, "step": 53381 }, { "epoch": 2.62, "grad_norm": 0.7724308371543884, "learning_rate": 2.3915263427001608e-05, "loss": 2.7814, "step": 53382 }, { "epoch": 2.62, "grad_norm": 0.7729584574699402, "learning_rate": 2.3909239959462258e-05, "loss": 2.9317, "step": 53383 }, { "epoch": 2.62, "grad_norm": 0.7403950095176697, "learning_rate": 2.390321721909283e-05, "loss": 2.8143, "step": 53384 }, { "epoch": 2.62, "grad_norm": 0.7252776026725769, "learning_rate": 2.3897195205909148e-05, "loss": 2.7213, "step": 53385 }, { "epoch": 2.62, "grad_norm": 0.7848640084266663, "learning_rate": 2.389117391992713e-05, "loss": 2.8572, "step": 53386 }, { "epoch": 2.62, "grad_norm": 0.7389460206031799, "learning_rate": 2.3885153361162636e-05, "loss": 2.7936, "step": 53387 }, { "epoch": 2.62, "grad_norm": 0.7464485764503479, "learning_rate": 2.3879133529631477e-05, "loss": 2.8893, "step": 53388 }, { "epoch": 2.62, "grad_norm": 0.7308782339096069, "learning_rate": 2.3873114425349584e-05, "loss": 2.8139, "step": 53389 }, { "epoch": 2.62, "grad_norm": 0.7255467772483826, "learning_rate": 2.38670960483328e-05, "loss": 3.0575, "step": 53390 }, { "epoch": 2.62, "grad_norm": 0.7490675449371338, "learning_rate": 2.3861078398596857e-05, "loss": 2.8725, "step": 53391 }, { "epoch": 2.62, "grad_norm": 0.7539382576942444, "learning_rate": 2.3855061476157733e-05, "loss": 2.81, "step": 53392 }, { "epoch": 2.62, "grad_norm": 0.7484925389289856, "learning_rate": 2.3849045281031188e-05, "loss": 2.9626, "step": 53393 }, { "epoch": 2.62, "grad_norm": 0.7690110802650452, "learning_rate": 2.3843029813233173e-05, "loss": 3.0192, "step": 53394 }, { "epoch": 2.62, "grad_norm": 0.697595477104187, "learning_rate": 2.383701507277941e-05, "loss": 3.0619, "step": 53395 }, { "epoch": 2.62, "grad_norm": 0.7498865723609924, "learning_rate": 2.3831001059685884e-05, "loss": 2.7544, "step": 53396 }, { "epoch": 2.62, "grad_norm": 0.6930327415466309, "learning_rate": 2.382498777396832e-05, "loss": 2.9071, "step": 53397 }, { "epoch": 2.62, "grad_norm": 0.7832930088043213, "learning_rate": 2.3818975215642532e-05, "loss": 2.8844, "step": 53398 }, { "epoch": 2.62, "grad_norm": 0.7338464856147766, "learning_rate": 2.381296338472448e-05, "loss": 2.8968, "step": 53399 }, { "epoch": 2.62, "grad_norm": 0.7855767607688904, "learning_rate": 2.3806952281229853e-05, "loss": 2.6963, "step": 53400 }, { "epoch": 2.62, "grad_norm": 0.7518692016601562, "learning_rate": 2.3800941905174563e-05, "loss": 2.9422, "step": 53401 }, { "epoch": 2.62, "grad_norm": 0.7118070721626282, "learning_rate": 2.3794932256574505e-05, "loss": 2.9071, "step": 53402 }, { "epoch": 2.62, "grad_norm": 0.8377758264541626, "learning_rate": 2.3788923335445398e-05, "loss": 3.1013, "step": 53403 }, { "epoch": 2.62, "grad_norm": 0.7513577342033386, "learning_rate": 2.3782915141803127e-05, "loss": 3.0399, "step": 53404 }, { "epoch": 2.62, "grad_norm": 0.796913206577301, "learning_rate": 2.3776907675663513e-05, "loss": 2.7059, "step": 53405 }, { "epoch": 2.62, "grad_norm": 0.7465709447860718, "learning_rate": 2.3770900937042314e-05, "loss": 2.7208, "step": 53406 }, { "epoch": 2.62, "grad_norm": 0.7374803423881531, "learning_rate": 2.376489492595548e-05, "loss": 2.7431, "step": 53407 }, { "epoch": 2.62, "grad_norm": 0.7477961182594299, "learning_rate": 2.3758889642418665e-05, "loss": 2.7817, "step": 53408 }, { "epoch": 2.62, "grad_norm": 0.8048363327980042, "learning_rate": 2.3752885086447827e-05, "loss": 2.9601, "step": 53409 }, { "epoch": 2.62, "grad_norm": 0.7863703370094299, "learning_rate": 2.374688125805868e-05, "loss": 2.7946, "step": 53410 }, { "epoch": 2.62, "grad_norm": 0.7128931283950806, "learning_rate": 2.3740878157267118e-05, "loss": 2.9408, "step": 53411 }, { "epoch": 2.62, "grad_norm": 0.7168523073196411, "learning_rate": 2.3734875784088926e-05, "loss": 2.8748, "step": 53412 }, { "epoch": 2.62, "grad_norm": 0.7598782181739807, "learning_rate": 2.3728874138539856e-05, "loss": 2.8996, "step": 53413 }, { "epoch": 2.62, "grad_norm": 0.7533124089241028, "learning_rate": 2.3722873220635795e-05, "loss": 2.7102, "step": 53414 }, { "epoch": 2.62, "grad_norm": 0.7148170471191406, "learning_rate": 2.3716873030392504e-05, "loss": 2.9118, "step": 53415 }, { "epoch": 2.62, "grad_norm": 0.7307083606719971, "learning_rate": 2.371087356782576e-05, "loss": 2.8594, "step": 53416 }, { "epoch": 2.62, "grad_norm": 0.7332124710083008, "learning_rate": 2.3704874832951458e-05, "loss": 3.065, "step": 53417 }, { "epoch": 2.62, "grad_norm": 0.7090801000595093, "learning_rate": 2.369887682578535e-05, "loss": 2.8158, "step": 53418 }, { "epoch": 2.62, "grad_norm": 0.7521113157272339, "learning_rate": 2.3692879546343224e-05, "loss": 2.92, "step": 53419 }, { "epoch": 2.62, "grad_norm": 0.7190861105918884, "learning_rate": 2.368688299464083e-05, "loss": 2.8477, "step": 53420 }, { "epoch": 2.62, "grad_norm": 0.7460111975669861, "learning_rate": 2.3680887170693995e-05, "loss": 2.887, "step": 53421 }, { "epoch": 2.62, "grad_norm": 0.7339314222335815, "learning_rate": 2.3674892074518602e-05, "loss": 2.8122, "step": 53422 }, { "epoch": 2.62, "grad_norm": 0.7279649972915649, "learning_rate": 2.366889770613031e-05, "loss": 2.9055, "step": 53423 }, { "epoch": 2.62, "grad_norm": 0.7642202973365784, "learning_rate": 2.3662904065544996e-05, "loss": 2.8235, "step": 53424 }, { "epoch": 2.62, "grad_norm": 0.726921021938324, "learning_rate": 2.3656911152778357e-05, "loss": 2.9702, "step": 53425 }, { "epoch": 2.62, "grad_norm": 0.7702478170394897, "learning_rate": 2.3650918967846276e-05, "loss": 2.729, "step": 53426 }, { "epoch": 2.62, "grad_norm": 0.7660560011863708, "learning_rate": 2.364492751076451e-05, "loss": 3.0538, "step": 53427 }, { "epoch": 2.62, "grad_norm": 0.7796090245246887, "learning_rate": 2.3638936781548745e-05, "loss": 2.9503, "step": 53428 }, { "epoch": 2.62, "grad_norm": 0.7895865440368652, "learning_rate": 2.36329467802149e-05, "loss": 2.9029, "step": 53429 }, { "epoch": 2.62, "grad_norm": 0.7662897109985352, "learning_rate": 2.362695750677863e-05, "loss": 2.8069, "step": 53430 }, { "epoch": 2.62, "grad_norm": 0.7575547695159912, "learning_rate": 2.362096896125576e-05, "loss": 3.0073, "step": 53431 }, { "epoch": 2.62, "grad_norm": 0.753297746181488, "learning_rate": 2.3614981143662136e-05, "loss": 2.9955, "step": 53432 }, { "epoch": 2.62, "grad_norm": 0.7785336971282959, "learning_rate": 2.3608994054013452e-05, "loss": 2.8248, "step": 53433 }, { "epoch": 2.62, "grad_norm": 0.7514358758926392, "learning_rate": 2.3603007692325493e-05, "loss": 2.9667, "step": 53434 }, { "epoch": 2.62, "grad_norm": 0.7518543004989624, "learning_rate": 2.3597022058613946e-05, "loss": 3.0098, "step": 53435 }, { "epoch": 2.62, "grad_norm": 0.8257125616073608, "learning_rate": 2.3591037152894664e-05, "loss": 2.9063, "step": 53436 }, { "epoch": 2.62, "grad_norm": 0.7336806654930115, "learning_rate": 2.358505297518344e-05, "loss": 2.9125, "step": 53437 }, { "epoch": 2.62, "grad_norm": 0.7743903398513794, "learning_rate": 2.3579069525495918e-05, "loss": 2.756, "step": 53438 }, { "epoch": 2.62, "grad_norm": 0.7176278233528137, "learning_rate": 2.3573086803848028e-05, "loss": 2.8686, "step": 53439 }, { "epoch": 2.62, "grad_norm": 0.7256504893302917, "learning_rate": 2.3567104810255323e-05, "loss": 2.8576, "step": 53440 }, { "epoch": 2.62, "grad_norm": 0.7542241215705872, "learning_rate": 2.356112354473375e-05, "loss": 2.9628, "step": 53441 }, { "epoch": 2.62, "grad_norm": 0.7405779957771301, "learning_rate": 2.355514300729897e-05, "loss": 2.891, "step": 53442 }, { "epoch": 2.62, "grad_norm": 0.7476978898048401, "learning_rate": 2.354916319796667e-05, "loss": 2.7838, "step": 53443 }, { "epoch": 2.62, "grad_norm": 0.7352883219718933, "learning_rate": 2.3543184116752766e-05, "loss": 2.9383, "step": 53444 }, { "epoch": 2.62, "grad_norm": 0.766408383846283, "learning_rate": 2.3537205763672816e-05, "loss": 3.1493, "step": 53445 }, { "epoch": 2.62, "grad_norm": 0.7481442093849182, "learning_rate": 2.3531228138742675e-05, "loss": 2.8828, "step": 53446 }, { "epoch": 2.62, "grad_norm": 0.7440130710601807, "learning_rate": 2.352525124197813e-05, "loss": 2.9028, "step": 53447 }, { "epoch": 2.62, "grad_norm": 0.7087607383728027, "learning_rate": 2.351927507339486e-05, "loss": 2.7698, "step": 53448 }, { "epoch": 2.62, "grad_norm": 0.7708470225334167, "learning_rate": 2.3513299633008597e-05, "loss": 2.8684, "step": 53449 }, { "epoch": 2.62, "grad_norm": 0.7144055962562561, "learning_rate": 2.3507324920835057e-05, "loss": 2.8738, "step": 53450 }, { "epoch": 2.62, "grad_norm": 0.7633524537086487, "learning_rate": 2.3501350936889995e-05, "loss": 2.8275, "step": 53451 }, { "epoch": 2.62, "grad_norm": 0.6785310506820679, "learning_rate": 2.349537768118923e-05, "loss": 2.8683, "step": 53452 }, { "epoch": 2.62, "grad_norm": 0.7202582359313965, "learning_rate": 2.348940515374835e-05, "loss": 2.7919, "step": 53453 }, { "epoch": 2.62, "grad_norm": 0.7880463004112244, "learning_rate": 2.3483433354583214e-05, "loss": 2.7937, "step": 53454 }, { "epoch": 2.62, "grad_norm": 0.7895649075508118, "learning_rate": 2.3477462283709535e-05, "loss": 2.97, "step": 53455 }, { "epoch": 2.62, "grad_norm": 0.741170346736908, "learning_rate": 2.3471491941142972e-05, "loss": 2.8845, "step": 53456 }, { "epoch": 2.62, "grad_norm": 0.7909066081047058, "learning_rate": 2.3465522326899245e-05, "loss": 2.7116, "step": 53457 }, { "epoch": 2.62, "grad_norm": 0.7080055475234985, "learning_rate": 2.3459553440994107e-05, "loss": 2.9053, "step": 53458 }, { "epoch": 2.62, "grad_norm": 0.7602565884590149, "learning_rate": 2.345358528344331e-05, "loss": 2.9799, "step": 53459 }, { "epoch": 2.62, "grad_norm": 0.7593586444854736, "learning_rate": 2.344761785426251e-05, "loss": 3.0791, "step": 53460 }, { "epoch": 2.62, "grad_norm": 0.760124146938324, "learning_rate": 2.3441651153467532e-05, "loss": 2.8708, "step": 53461 }, { "epoch": 2.62, "grad_norm": 0.725273847579956, "learning_rate": 2.343568518107396e-05, "loss": 2.902, "step": 53462 }, { "epoch": 2.62, "grad_norm": 0.7621003985404968, "learning_rate": 2.3429719937097614e-05, "loss": 2.8048, "step": 53463 }, { "epoch": 2.62, "grad_norm": 0.7457227110862732, "learning_rate": 2.342375542155418e-05, "loss": 3.0858, "step": 53464 }, { "epoch": 2.62, "grad_norm": 0.7379422187805176, "learning_rate": 2.3417791634459248e-05, "loss": 2.7287, "step": 53465 }, { "epoch": 2.62, "grad_norm": 0.7121053338050842, "learning_rate": 2.3411828575828707e-05, "loss": 2.7895, "step": 53466 }, { "epoch": 2.62, "grad_norm": 0.7786771655082703, "learning_rate": 2.340586624567814e-05, "loss": 3.047, "step": 53467 }, { "epoch": 2.62, "grad_norm": 0.7441116571426392, "learning_rate": 2.339990464402327e-05, "loss": 2.9813, "step": 53468 }, { "epoch": 2.62, "grad_norm": 0.7259562611579895, "learning_rate": 2.339394377087985e-05, "loss": 2.8765, "step": 53469 }, { "epoch": 2.62, "grad_norm": 0.7485330700874329, "learning_rate": 2.3387983626263575e-05, "loss": 2.7566, "step": 53470 }, { "epoch": 2.62, "grad_norm": 0.7584130764007568, "learning_rate": 2.338202421019012e-05, "loss": 3.0932, "step": 53471 }, { "epoch": 2.62, "grad_norm": 0.7374604940414429, "learning_rate": 2.3376065522675113e-05, "loss": 3.0193, "step": 53472 }, { "epoch": 2.62, "grad_norm": 0.7430775761604309, "learning_rate": 2.337010756373431e-05, "loss": 3.0866, "step": 53473 }, { "epoch": 2.62, "grad_norm": 0.7617211937904358, "learning_rate": 2.336415033338346e-05, "loss": 2.6868, "step": 53474 }, { "epoch": 2.62, "grad_norm": 0.7515766620635986, "learning_rate": 2.335819383163815e-05, "loss": 2.9058, "step": 53475 }, { "epoch": 2.62, "grad_norm": 0.7782991528511047, "learning_rate": 2.335223805851417e-05, "loss": 2.9492, "step": 53476 }, { "epoch": 2.62, "grad_norm": 0.7339718341827393, "learning_rate": 2.3346283014027144e-05, "loss": 3.0604, "step": 53477 }, { "epoch": 2.62, "grad_norm": 0.7309208512306213, "learning_rate": 2.334032869819272e-05, "loss": 2.9403, "step": 53478 }, { "epoch": 2.62, "grad_norm": 0.7868647575378418, "learning_rate": 2.3334375111026693e-05, "loss": 2.9261, "step": 53479 }, { "epoch": 2.62, "grad_norm": 0.7272360920906067, "learning_rate": 2.332842225254461e-05, "loss": 2.7392, "step": 53480 }, { "epoch": 2.62, "grad_norm": 0.719589114189148, "learning_rate": 2.332247012276226e-05, "loss": 2.952, "step": 53481 }, { "epoch": 2.62, "grad_norm": 0.7240522503852844, "learning_rate": 2.331651872169523e-05, "loss": 2.7877, "step": 53482 }, { "epoch": 2.62, "grad_norm": 0.7426586747169495, "learning_rate": 2.3310568049359245e-05, "loss": 2.9101, "step": 53483 }, { "epoch": 2.62, "grad_norm": 0.7182551026344299, "learning_rate": 2.3304618105770024e-05, "loss": 3.0309, "step": 53484 }, { "epoch": 2.62, "grad_norm": 0.7897670269012451, "learning_rate": 2.3298668890943183e-05, "loss": 2.8458, "step": 53485 }, { "epoch": 2.62, "grad_norm": 0.7790352702140808, "learning_rate": 2.3292720404894416e-05, "loss": 2.6728, "step": 53486 }, { "epoch": 2.62, "grad_norm": 0.7496531009674072, "learning_rate": 2.3286772647639307e-05, "loss": 3.0795, "step": 53487 }, { "epoch": 2.62, "grad_norm": 0.7485788464546204, "learning_rate": 2.3280825619193577e-05, "loss": 2.8998, "step": 53488 }, { "epoch": 2.62, "grad_norm": 0.7316558361053467, "learning_rate": 2.3274879319572946e-05, "loss": 2.9014, "step": 53489 }, { "epoch": 2.62, "grad_norm": 0.766089141368866, "learning_rate": 2.3268933748792972e-05, "loss": 2.9425, "step": 53490 }, { "epoch": 2.62, "grad_norm": 0.7590373754501343, "learning_rate": 2.326298890686944e-05, "loss": 2.9089, "step": 53491 }, { "epoch": 2.62, "grad_norm": 0.7415793538093567, "learning_rate": 2.3257044793817904e-05, "loss": 2.9765, "step": 53492 }, { "epoch": 2.62, "grad_norm": 0.7299401760101318, "learning_rate": 2.3251101409654015e-05, "loss": 2.9363, "step": 53493 }, { "epoch": 2.62, "grad_norm": 0.714733898639679, "learning_rate": 2.3245158754393534e-05, "loss": 2.7415, "step": 53494 }, { "epoch": 2.62, "grad_norm": 0.7463199496269226, "learning_rate": 2.3239216828051975e-05, "loss": 2.841, "step": 53495 }, { "epoch": 2.62, "grad_norm": 0.7127196788787842, "learning_rate": 2.3233275630645098e-05, "loss": 2.7024, "step": 53496 }, { "epoch": 2.62, "grad_norm": 0.7690715193748474, "learning_rate": 2.3227335162188486e-05, "loss": 2.9423, "step": 53497 }, { "epoch": 2.62, "grad_norm": 0.7808687090873718, "learning_rate": 2.322139542269783e-05, "loss": 3.0039, "step": 53498 }, { "epoch": 2.62, "grad_norm": 0.7283824682235718, "learning_rate": 2.321545641218878e-05, "loss": 2.8473, "step": 53499 }, { "epoch": 2.62, "grad_norm": 0.762187659740448, "learning_rate": 2.3209518130676863e-05, "loss": 2.9331, "step": 53500 }, { "epoch": 2.62, "grad_norm": 0.7350523471832275, "learning_rate": 2.3203580578177894e-05, "loss": 3.0007, "step": 53501 }, { "epoch": 2.62, "grad_norm": 0.7983888983726501, "learning_rate": 2.319764375470733e-05, "loss": 2.7056, "step": 53502 }, { "epoch": 2.62, "grad_norm": 0.697699248790741, "learning_rate": 2.319170766028092e-05, "loss": 2.7097, "step": 53503 }, { "epoch": 2.62, "grad_norm": 0.7238744497299194, "learning_rate": 2.318577229491433e-05, "loss": 2.9083, "step": 53504 }, { "epoch": 2.62, "grad_norm": 0.7800061106681824, "learning_rate": 2.31798376586231e-05, "loss": 2.7768, "step": 53505 }, { "epoch": 2.62, "grad_norm": 0.7480177283287048, "learning_rate": 2.317390375142296e-05, "loss": 2.7823, "step": 53506 }, { "epoch": 2.62, "grad_norm": 0.7896789312362671, "learning_rate": 2.3167970573329463e-05, "loss": 2.7216, "step": 53507 }, { "epoch": 2.62, "grad_norm": 0.7196285724639893, "learning_rate": 2.3162038124358196e-05, "loss": 2.8438, "step": 53508 }, { "epoch": 2.62, "grad_norm": 0.8125433325767517, "learning_rate": 2.315610640452491e-05, "loss": 2.9548, "step": 53509 }, { "epoch": 2.62, "grad_norm": 0.8360536694526672, "learning_rate": 2.3150175413845094e-05, "loss": 2.8399, "step": 53510 }, { "epoch": 2.62, "grad_norm": 0.7761578559875488, "learning_rate": 2.3144245152334506e-05, "loss": 2.8066, "step": 53511 }, { "epoch": 2.62, "grad_norm": 0.7385191917419434, "learning_rate": 2.3138315620008628e-05, "loss": 2.8548, "step": 53512 }, { "epoch": 2.62, "grad_norm": 0.7106783390045166, "learning_rate": 2.313238681688322e-05, "loss": 2.7003, "step": 53513 }, { "epoch": 2.62, "grad_norm": 0.7793312668800354, "learning_rate": 2.3126458742973796e-05, "loss": 2.7662, "step": 53514 }, { "epoch": 2.62, "grad_norm": 0.7528009414672852, "learning_rate": 2.312053139829595e-05, "loss": 3.0059, "step": 53515 }, { "epoch": 2.62, "grad_norm": 0.7911593317985535, "learning_rate": 2.31146047828654e-05, "loss": 2.8624, "step": 53516 }, { "epoch": 2.62, "grad_norm": 0.7517163157463074, "learning_rate": 2.310867889669763e-05, "loss": 2.8081, "step": 53517 }, { "epoch": 2.62, "grad_norm": 0.7046359777450562, "learning_rate": 2.31027537398083e-05, "loss": 2.8519, "step": 53518 }, { "epoch": 2.62, "grad_norm": 0.7641608715057373, "learning_rate": 2.3096829312213093e-05, "loss": 2.8228, "step": 53519 }, { "epoch": 2.62, "grad_norm": 0.7242162227630615, "learning_rate": 2.3090905613927535e-05, "loss": 2.9697, "step": 53520 }, { "epoch": 2.62, "grad_norm": 0.7436241507530212, "learning_rate": 2.308498264496724e-05, "loss": 2.9085, "step": 53521 }, { "epoch": 2.62, "grad_norm": 0.7740703225135803, "learning_rate": 2.307906040534777e-05, "loss": 2.9504, "step": 53522 }, { "epoch": 2.62, "grad_norm": 0.7288923859596252, "learning_rate": 2.3073138895084742e-05, "loss": 2.8711, "step": 53523 }, { "epoch": 2.62, "grad_norm": 0.7444291114807129, "learning_rate": 2.3067218114193842e-05, "loss": 3.0138, "step": 53524 }, { "epoch": 2.62, "grad_norm": 0.7635215520858765, "learning_rate": 2.3061298062690525e-05, "loss": 2.8761, "step": 53525 }, { "epoch": 2.62, "grad_norm": 0.790448784828186, "learning_rate": 2.3055378740590514e-05, "loss": 2.8546, "step": 53526 }, { "epoch": 2.62, "grad_norm": 0.7522045969963074, "learning_rate": 2.3049460147909294e-05, "loss": 2.9095, "step": 53527 }, { "epoch": 2.62, "grad_norm": 0.7432355880737305, "learning_rate": 2.304354228466252e-05, "loss": 3.1744, "step": 53528 }, { "epoch": 2.62, "grad_norm": 0.759417712688446, "learning_rate": 2.303762515086578e-05, "loss": 2.956, "step": 53529 }, { "epoch": 2.62, "grad_norm": 0.7882241606712341, "learning_rate": 2.3031708746534562e-05, "loss": 2.8042, "step": 53530 }, { "epoch": 2.62, "grad_norm": 0.7573877573013306, "learning_rate": 2.3025793071684585e-05, "loss": 2.7942, "step": 53531 }, { "epoch": 2.62, "grad_norm": 0.8343915343284607, "learning_rate": 2.3019878126331304e-05, "loss": 2.9512, "step": 53532 }, { "epoch": 2.62, "grad_norm": 0.7674956917762756, "learning_rate": 2.301396391049034e-05, "loss": 2.8093, "step": 53533 }, { "epoch": 2.62, "grad_norm": 0.714229941368103, "learning_rate": 2.3008050424177384e-05, "loss": 2.9264, "step": 53534 }, { "epoch": 2.62, "grad_norm": 0.780677318572998, "learning_rate": 2.3002137667407884e-05, "loss": 3.0235, "step": 53535 }, { "epoch": 2.62, "grad_norm": 0.7305729985237122, "learning_rate": 2.2996225640197463e-05, "loss": 2.7816, "step": 53536 }, { "epoch": 2.62, "grad_norm": 0.773148775100708, "learning_rate": 2.2990314342561612e-05, "loss": 3.1407, "step": 53537 }, { "epoch": 2.62, "grad_norm": 0.7465836405754089, "learning_rate": 2.298440377451598e-05, "loss": 2.9568, "step": 53538 }, { "epoch": 2.62, "grad_norm": 0.8020625710487366, "learning_rate": 2.2978493936076125e-05, "loss": 2.9573, "step": 53539 }, { "epoch": 2.62, "grad_norm": 0.7247298955917358, "learning_rate": 2.2972584827257568e-05, "loss": 2.9254, "step": 53540 }, { "epoch": 2.62, "grad_norm": 0.7181655168533325, "learning_rate": 2.296667644807596e-05, "loss": 2.895, "step": 53541 }, { "epoch": 2.62, "grad_norm": 0.7766932249069214, "learning_rate": 2.2960768798546824e-05, "loss": 2.7412, "step": 53542 }, { "epoch": 2.62, "grad_norm": 0.7357663512229919, "learning_rate": 2.2954861878685684e-05, "loss": 2.7664, "step": 53543 }, { "epoch": 2.62, "grad_norm": 0.7322977185249329, "learning_rate": 2.2948955688508085e-05, "loss": 2.931, "step": 53544 }, { "epoch": 2.62, "grad_norm": 0.7766660451889038, "learning_rate": 2.294305022802959e-05, "loss": 2.9752, "step": 53545 }, { "epoch": 2.62, "grad_norm": 0.7280550003051758, "learning_rate": 2.293714549726585e-05, "loss": 2.6769, "step": 53546 }, { "epoch": 2.62, "grad_norm": 0.726729154586792, "learning_rate": 2.293124149623232e-05, "loss": 3.118, "step": 53547 }, { "epoch": 2.62, "grad_norm": 0.750844419002533, "learning_rate": 2.2925338224944588e-05, "loss": 2.9707, "step": 53548 }, { "epoch": 2.62, "grad_norm": 0.7047064304351807, "learning_rate": 2.2919435683418175e-05, "loss": 2.7327, "step": 53549 }, { "epoch": 2.62, "grad_norm": 0.7407240867614746, "learning_rate": 2.2913533871668667e-05, "loss": 2.7221, "step": 53550 }, { "epoch": 2.62, "grad_norm": 0.7109093070030212, "learning_rate": 2.2907632789711617e-05, "loss": 3.0363, "step": 53551 }, { "epoch": 2.62, "grad_norm": 0.7659242749214172, "learning_rate": 2.2901732437562447e-05, "loss": 2.7527, "step": 53552 }, { "epoch": 2.62, "grad_norm": 0.7269027829170227, "learning_rate": 2.2895832815236848e-05, "loss": 2.7867, "step": 53553 }, { "epoch": 2.62, "grad_norm": 0.739622950553894, "learning_rate": 2.288993392275027e-05, "loss": 2.8092, "step": 53554 }, { "epoch": 2.62, "grad_norm": 0.7424209713935852, "learning_rate": 2.2884035760118235e-05, "loss": 2.8659, "step": 53555 }, { "epoch": 2.62, "grad_norm": 0.7513794302940369, "learning_rate": 2.28781383273564e-05, "loss": 2.9606, "step": 53556 }, { "epoch": 2.62, "grad_norm": 0.7400728464126587, "learning_rate": 2.287224162448018e-05, "loss": 2.8118, "step": 53557 }, { "epoch": 2.62, "grad_norm": 0.7726714015007019, "learning_rate": 2.2866345651505168e-05, "loss": 3.0265, "step": 53558 }, { "epoch": 2.62, "grad_norm": 0.7855329513549805, "learning_rate": 2.2860450408446818e-05, "loss": 2.9305, "step": 53559 }, { "epoch": 2.62, "grad_norm": 0.7705275416374207, "learning_rate": 2.2854555895320713e-05, "loss": 2.7834, "step": 53560 }, { "epoch": 2.62, "grad_norm": 0.7320125699043274, "learning_rate": 2.2848662112142412e-05, "loss": 2.9377, "step": 53561 }, { "epoch": 2.62, "grad_norm": 0.7487272024154663, "learning_rate": 2.2842769058927336e-05, "loss": 3.0346, "step": 53562 }, { "epoch": 2.62, "grad_norm": 0.7231678366661072, "learning_rate": 2.2836876735691102e-05, "loss": 2.7278, "step": 53563 }, { "epoch": 2.63, "grad_norm": 0.8296177387237549, "learning_rate": 2.2830985142449164e-05, "loss": 2.9324, "step": 53564 }, { "epoch": 2.63, "grad_norm": 0.7314631342887878, "learning_rate": 2.2825094279217116e-05, "loss": 2.9992, "step": 53565 }, { "epoch": 2.63, "grad_norm": 0.7270010113716125, "learning_rate": 2.281920414601044e-05, "loss": 2.9121, "step": 53566 }, { "epoch": 2.63, "grad_norm": 0.7274070382118225, "learning_rate": 2.2813314742844556e-05, "loss": 3.0206, "step": 53567 }, { "epoch": 2.63, "grad_norm": 0.7393171787261963, "learning_rate": 2.2807426069735124e-05, "loss": 2.9025, "step": 53568 }, { "epoch": 2.63, "grad_norm": 0.7126226425170898, "learning_rate": 2.2801538126697527e-05, "loss": 2.7961, "step": 53569 }, { "epoch": 2.63, "grad_norm": 0.7575780153274536, "learning_rate": 2.2795650913747355e-05, "loss": 3.0125, "step": 53570 }, { "epoch": 2.63, "grad_norm": 0.7629463076591492, "learning_rate": 2.2789764430900126e-05, "loss": 2.834, "step": 53571 }, { "epoch": 2.63, "grad_norm": 0.7294546961784363, "learning_rate": 2.2783878678171295e-05, "loss": 2.7511, "step": 53572 }, { "epoch": 2.63, "grad_norm": 0.7730554938316345, "learning_rate": 2.2777993655576388e-05, "loss": 2.8679, "step": 53573 }, { "epoch": 2.63, "grad_norm": 0.7737736105918884, "learning_rate": 2.277210936313082e-05, "loss": 2.808, "step": 53574 }, { "epoch": 2.63, "grad_norm": 0.7201272249221802, "learning_rate": 2.276622580085018e-05, "loss": 3.0931, "step": 53575 }, { "epoch": 2.63, "grad_norm": 0.7674169540405273, "learning_rate": 2.2760342968750024e-05, "loss": 2.6019, "step": 53576 }, { "epoch": 2.63, "grad_norm": 0.6896553039550781, "learning_rate": 2.275446086684567e-05, "loss": 2.7181, "step": 53577 }, { "epoch": 2.63, "grad_norm": 0.7537925839424133, "learning_rate": 2.2748579495152808e-05, "loss": 3.0731, "step": 53578 }, { "epoch": 2.63, "grad_norm": 0.7853711247444153, "learning_rate": 2.274269885368679e-05, "loss": 3.0916, "step": 53579 }, { "epoch": 2.63, "grad_norm": 0.8356667160987854, "learning_rate": 2.273681894246311e-05, "loss": 2.9315, "step": 53580 }, { "epoch": 2.63, "grad_norm": 0.7744783759117126, "learning_rate": 2.2730939761497347e-05, "loss": 2.62, "step": 53581 }, { "epoch": 2.63, "grad_norm": 0.7801983952522278, "learning_rate": 2.272506131080486e-05, "loss": 2.8224, "step": 53582 }, { "epoch": 2.63, "grad_norm": 0.737760066986084, "learning_rate": 2.2719183590401237e-05, "loss": 3.0579, "step": 53583 }, { "epoch": 2.63, "grad_norm": 0.7177762389183044, "learning_rate": 2.2713306600301894e-05, "loss": 2.8598, "step": 53584 }, { "epoch": 2.63, "grad_norm": 0.7965755462646484, "learning_rate": 2.2707430340522326e-05, "loss": 2.925, "step": 53585 }, { "epoch": 2.63, "grad_norm": 0.7102624177932739, "learning_rate": 2.2701554811078083e-05, "loss": 2.8057, "step": 53586 }, { "epoch": 2.63, "grad_norm": 0.8156418800354004, "learning_rate": 2.2695680011984583e-05, "loss": 2.8195, "step": 53587 }, { "epoch": 2.63, "grad_norm": 0.7690003514289856, "learning_rate": 2.2689805943257257e-05, "loss": 2.8399, "step": 53588 }, { "epoch": 2.63, "grad_norm": 0.7723907828330994, "learning_rate": 2.268393260491158e-05, "loss": 3.0257, "step": 53589 }, { "epoch": 2.63, "grad_norm": 0.7428565621376038, "learning_rate": 2.267805999696308e-05, "loss": 2.9596, "step": 53590 }, { "epoch": 2.63, "grad_norm": 0.7959504127502441, "learning_rate": 2.2672188119427214e-05, "loss": 2.9929, "step": 53591 }, { "epoch": 2.63, "grad_norm": 0.7445618510246277, "learning_rate": 2.2666316972319397e-05, "loss": 2.9063, "step": 53592 }, { "epoch": 2.63, "grad_norm": 0.7014729976654053, "learning_rate": 2.266044655565519e-05, "loss": 2.7315, "step": 53593 }, { "epoch": 2.63, "grad_norm": 0.7331639528274536, "learning_rate": 2.2654576869449968e-05, "loss": 3.0117, "step": 53594 }, { "epoch": 2.63, "grad_norm": 0.7533635497093201, "learning_rate": 2.2648707913719165e-05, "loss": 2.9198, "step": 53595 }, { "epoch": 2.63, "grad_norm": 0.7588867545127869, "learning_rate": 2.2642839688478364e-05, "loss": 2.9348, "step": 53596 }, { "epoch": 2.63, "grad_norm": 0.669175922870636, "learning_rate": 2.2636972193742887e-05, "loss": 2.9407, "step": 53597 }, { "epoch": 2.63, "grad_norm": 0.7316837310791016, "learning_rate": 2.2631105429528284e-05, "loss": 2.952, "step": 53598 }, { "epoch": 2.63, "grad_norm": 0.767562210559845, "learning_rate": 2.2625239395849916e-05, "loss": 2.943, "step": 53599 }, { "epoch": 2.63, "grad_norm": 0.7083771228790283, "learning_rate": 2.261937409272333e-05, "loss": 2.9877, "step": 53600 }, { "epoch": 2.63, "grad_norm": 0.7206432223320007, "learning_rate": 2.2613509520163954e-05, "loss": 2.9687, "step": 53601 }, { "epoch": 2.63, "grad_norm": 0.7313346862792969, "learning_rate": 2.2607645678187138e-05, "loss": 2.8454, "step": 53602 }, { "epoch": 2.63, "grad_norm": 0.713829517364502, "learning_rate": 2.260178256680847e-05, "loss": 3.1016, "step": 53603 }, { "epoch": 2.63, "grad_norm": 0.695851743221283, "learning_rate": 2.2595920186043238e-05, "loss": 2.5597, "step": 53604 }, { "epoch": 2.63, "grad_norm": 0.7535444498062134, "learning_rate": 2.2590058535906996e-05, "loss": 2.924, "step": 53605 }, { "epoch": 2.63, "grad_norm": 0.7128457427024841, "learning_rate": 2.25841976164152e-05, "loss": 2.8773, "step": 53606 }, { "epoch": 2.63, "grad_norm": 0.7772616147994995, "learning_rate": 2.2578337427583172e-05, "loss": 3.1488, "step": 53607 }, { "epoch": 2.63, "grad_norm": 0.727367103099823, "learning_rate": 2.2572477969426462e-05, "loss": 2.798, "step": 53608 }, { "epoch": 2.63, "grad_norm": 0.7210254073143005, "learning_rate": 2.256661924196046e-05, "loss": 3.0219, "step": 53609 }, { "epoch": 2.63, "grad_norm": 0.7165561318397522, "learning_rate": 2.256076124520052e-05, "loss": 2.7132, "step": 53610 }, { "epoch": 2.63, "grad_norm": 0.7372732162475586, "learning_rate": 2.255490397916223e-05, "loss": 2.8915, "step": 53611 }, { "epoch": 2.63, "grad_norm": 0.7609841227531433, "learning_rate": 2.2549047443860845e-05, "loss": 3.088, "step": 53612 }, { "epoch": 2.63, "grad_norm": 0.744179904460907, "learning_rate": 2.2543191639311954e-05, "loss": 3.0776, "step": 53613 }, { "epoch": 2.63, "grad_norm": 0.7504658102989197, "learning_rate": 2.2537336565530807e-05, "loss": 2.9734, "step": 53614 }, { "epoch": 2.63, "grad_norm": 0.6780826449394226, "learning_rate": 2.2531482222532994e-05, "loss": 2.9569, "step": 53615 }, { "epoch": 2.63, "grad_norm": 0.7250926494598389, "learning_rate": 2.2525628610333835e-05, "loss": 2.8901, "step": 53616 }, { "epoch": 2.63, "grad_norm": 0.7274686694145203, "learning_rate": 2.2519775728948754e-05, "loss": 2.8393, "step": 53617 }, { "epoch": 2.63, "grad_norm": 0.7236843109130859, "learning_rate": 2.25139235783932e-05, "loss": 2.6181, "step": 53618 }, { "epoch": 2.63, "grad_norm": 0.7012773752212524, "learning_rate": 2.2508072158682535e-05, "loss": 2.9166, "step": 53619 }, { "epoch": 2.63, "grad_norm": 0.7341805696487427, "learning_rate": 2.2502221469832273e-05, "loss": 2.7775, "step": 53620 }, { "epoch": 2.63, "grad_norm": 0.7430617809295654, "learning_rate": 2.249637151185767e-05, "loss": 2.943, "step": 53621 }, { "epoch": 2.63, "grad_norm": 0.7874128222465515, "learning_rate": 2.249052228477428e-05, "loss": 2.9523, "step": 53622 }, { "epoch": 2.63, "grad_norm": 0.8702890276908875, "learning_rate": 2.248467378859743e-05, "loss": 2.6836, "step": 53623 }, { "epoch": 2.63, "grad_norm": 0.7678395509719849, "learning_rate": 2.24788260233425e-05, "loss": 2.9465, "step": 53624 }, { "epoch": 2.63, "grad_norm": 0.7276766300201416, "learning_rate": 2.247297898902498e-05, "loss": 2.8315, "step": 53625 }, { "epoch": 2.63, "grad_norm": 0.7727230191230774, "learning_rate": 2.2467132685660196e-05, "loss": 3.0265, "step": 53626 }, { "epoch": 2.63, "grad_norm": 0.7668293714523315, "learning_rate": 2.246128711326356e-05, "loss": 2.9094, "step": 53627 }, { "epoch": 2.63, "grad_norm": 0.7255916595458984, "learning_rate": 2.2455442271850532e-05, "loss": 2.8169, "step": 53628 }, { "epoch": 2.63, "grad_norm": 0.7411938905715942, "learning_rate": 2.24495981614364e-05, "loss": 2.9508, "step": 53629 }, { "epoch": 2.63, "grad_norm": 0.7330008745193481, "learning_rate": 2.2443754782036648e-05, "loss": 2.8352, "step": 53630 }, { "epoch": 2.63, "grad_norm": 0.7218151092529297, "learning_rate": 2.2437912133666634e-05, "loss": 2.706, "step": 53631 }, { "epoch": 2.63, "grad_norm": 0.7262304425239563, "learning_rate": 2.2432070216341715e-05, "loss": 2.8294, "step": 53632 }, { "epoch": 2.63, "grad_norm": 0.7365819215774536, "learning_rate": 2.2426229030077335e-05, "loss": 2.846, "step": 53633 }, { "epoch": 2.63, "grad_norm": 0.7333903312683105, "learning_rate": 2.2420388574888793e-05, "loss": 3.0362, "step": 53634 }, { "epoch": 2.63, "grad_norm": 0.7654792666435242, "learning_rate": 2.241454885079157e-05, "loss": 2.7503, "step": 53635 }, { "epoch": 2.63, "grad_norm": 0.7742012739181519, "learning_rate": 2.2408709857800988e-05, "loss": 2.7352, "step": 53636 }, { "epoch": 2.63, "grad_norm": 0.7555630803108215, "learning_rate": 2.240287159593247e-05, "loss": 2.9539, "step": 53637 }, { "epoch": 2.63, "grad_norm": 0.7577449083328247, "learning_rate": 2.2397034065201336e-05, "loss": 2.9425, "step": 53638 }, { "epoch": 2.63, "grad_norm": 0.7749025821685791, "learning_rate": 2.2391197265622975e-05, "loss": 3.015, "step": 53639 }, { "epoch": 2.63, "grad_norm": 0.7856140732765198, "learning_rate": 2.2385361197212805e-05, "loss": 2.7987, "step": 53640 }, { "epoch": 2.63, "grad_norm": 0.7463555932044983, "learning_rate": 2.2379525859986148e-05, "loss": 2.8118, "step": 53641 }, { "epoch": 2.63, "grad_norm": 0.7431473135948181, "learning_rate": 2.2373691253958358e-05, "loss": 2.8837, "step": 53642 }, { "epoch": 2.63, "grad_norm": 0.7310684323310852, "learning_rate": 2.2367857379144892e-05, "loss": 2.8888, "step": 53643 }, { "epoch": 2.63, "grad_norm": 0.7498796582221985, "learning_rate": 2.236202423556107e-05, "loss": 2.9496, "step": 53644 }, { "epoch": 2.63, "grad_norm": 0.740679919719696, "learning_rate": 2.2356191823222214e-05, "loss": 3.0156, "step": 53645 }, { "epoch": 2.63, "grad_norm": 0.7141246199607849, "learning_rate": 2.235036014214371e-05, "loss": 2.8782, "step": 53646 }, { "epoch": 2.63, "grad_norm": 0.7410897612571716, "learning_rate": 2.2344529192340876e-05, "loss": 2.6084, "step": 53647 }, { "epoch": 2.63, "grad_norm": 0.7512717247009277, "learning_rate": 2.2338698973829207e-05, "loss": 2.8707, "step": 53648 }, { "epoch": 2.63, "grad_norm": 0.7618011832237244, "learning_rate": 2.2332869486623884e-05, "loss": 2.8932, "step": 53649 }, { "epoch": 2.63, "grad_norm": 0.8862194418907166, "learning_rate": 2.2327040730740397e-05, "loss": 2.8414, "step": 53650 }, { "epoch": 2.63, "grad_norm": 0.7280387878417969, "learning_rate": 2.2321212706194037e-05, "loss": 2.7621, "step": 53651 }, { "epoch": 2.63, "grad_norm": 0.714809000492096, "learning_rate": 2.2315385413000152e-05, "loss": 2.623, "step": 53652 }, { "epoch": 2.63, "grad_norm": 0.7935085892677307, "learning_rate": 2.2309558851174138e-05, "loss": 2.7519, "step": 53653 }, { "epoch": 2.63, "grad_norm": 0.736251711845398, "learning_rate": 2.230373302073124e-05, "loss": 2.8681, "step": 53654 }, { "epoch": 2.63, "grad_norm": 0.7445536851882935, "learning_rate": 2.2297907921686953e-05, "loss": 3.1613, "step": 53655 }, { "epoch": 2.63, "grad_norm": 0.7414441108703613, "learning_rate": 2.2292083554056427e-05, "loss": 2.8987, "step": 53656 }, { "epoch": 2.63, "grad_norm": 0.7777810096740723, "learning_rate": 2.2286259917855152e-05, "loss": 2.6787, "step": 53657 }, { "epoch": 2.63, "grad_norm": 0.7500730156898499, "learning_rate": 2.2280437013098452e-05, "loss": 2.768, "step": 53658 }, { "epoch": 2.63, "grad_norm": 0.7526910901069641, "learning_rate": 2.2274614839801607e-05, "loss": 2.9552, "step": 53659 }, { "epoch": 2.63, "grad_norm": 0.7558931112289429, "learning_rate": 2.2268793397980013e-05, "loss": 3.0392, "step": 53660 }, { "epoch": 2.63, "grad_norm": 0.7288579344749451, "learning_rate": 2.2262972687648916e-05, "loss": 2.9854, "step": 53661 }, { "epoch": 2.63, "grad_norm": 0.7054191827774048, "learning_rate": 2.225715270882368e-05, "loss": 2.9694, "step": 53662 }, { "epoch": 2.63, "grad_norm": 0.7429760098457336, "learning_rate": 2.2251333461519717e-05, "loss": 2.8739, "step": 53663 }, { "epoch": 2.63, "grad_norm": 0.8079425692558289, "learning_rate": 2.224551494575222e-05, "loss": 2.8049, "step": 53664 }, { "epoch": 2.63, "grad_norm": 0.7400147318840027, "learning_rate": 2.2239697161536642e-05, "loss": 2.9006, "step": 53665 }, { "epoch": 2.63, "grad_norm": 0.7143925428390503, "learning_rate": 2.223388010888817e-05, "loss": 2.6057, "step": 53666 }, { "epoch": 2.63, "grad_norm": 0.7217711806297302, "learning_rate": 2.2228063787822292e-05, "loss": 2.896, "step": 53667 }, { "epoch": 2.63, "grad_norm": 0.8171392679214478, "learning_rate": 2.2222248198354198e-05, "loss": 3.0967, "step": 53668 }, { "epoch": 2.63, "grad_norm": 0.7449054718017578, "learning_rate": 2.221643334049924e-05, "loss": 2.9392, "step": 53669 }, { "epoch": 2.63, "grad_norm": 0.7087463140487671, "learning_rate": 2.221061921427274e-05, "loss": 2.9487, "step": 53670 }, { "epoch": 2.63, "grad_norm": 0.7898861169815063, "learning_rate": 2.2204805819689987e-05, "loss": 2.8475, "step": 53671 }, { "epoch": 2.63, "grad_norm": 0.762144923210144, "learning_rate": 2.21989931567663e-05, "loss": 2.948, "step": 53672 }, { "epoch": 2.63, "grad_norm": 0.7354418635368347, "learning_rate": 2.2193181225517065e-05, "loss": 2.6357, "step": 53673 }, { "epoch": 2.63, "grad_norm": 0.7770733833312988, "learning_rate": 2.2187370025957506e-05, "loss": 2.9122, "step": 53674 }, { "epoch": 2.63, "grad_norm": 0.7444289922714233, "learning_rate": 2.2181559558102947e-05, "loss": 2.8764, "step": 53675 }, { "epoch": 2.63, "grad_norm": 0.7131602764129639, "learning_rate": 2.217574982196867e-05, "loss": 2.8945, "step": 53676 }, { "epoch": 2.63, "grad_norm": 0.8043651580810547, "learning_rate": 2.2169940817569965e-05, "loss": 2.7772, "step": 53677 }, { "epoch": 2.63, "grad_norm": 0.8861671686172485, "learning_rate": 2.216413254492222e-05, "loss": 2.8719, "step": 53678 }, { "epoch": 2.63, "grad_norm": 0.782197892665863, "learning_rate": 2.2158325004040655e-05, "loss": 2.927, "step": 53679 }, { "epoch": 2.63, "grad_norm": 0.7546136379241943, "learning_rate": 2.2152518194940626e-05, "loss": 2.9038, "step": 53680 }, { "epoch": 2.63, "grad_norm": 0.7524154186248779, "learning_rate": 2.2146712117637388e-05, "loss": 2.8427, "step": 53681 }, { "epoch": 2.63, "grad_norm": 0.7260668277740479, "learning_rate": 2.2140906772146195e-05, "loss": 2.9093, "step": 53682 }, { "epoch": 2.63, "grad_norm": 0.7640723586082458, "learning_rate": 2.2135102158482432e-05, "loss": 2.7328, "step": 53683 }, { "epoch": 2.63, "grad_norm": 0.7455698251724243, "learning_rate": 2.2129298276661255e-05, "loss": 2.8891, "step": 53684 }, { "epoch": 2.63, "grad_norm": 0.7532725930213928, "learning_rate": 2.212349512669812e-05, "loss": 2.9091, "step": 53685 }, { "epoch": 2.63, "grad_norm": 0.7638143301010132, "learning_rate": 2.2117692708608148e-05, "loss": 3.1224, "step": 53686 }, { "epoch": 2.63, "grad_norm": 0.7833783626556396, "learning_rate": 2.211189102240669e-05, "loss": 2.9242, "step": 53687 }, { "epoch": 2.63, "grad_norm": 0.7878255844116211, "learning_rate": 2.2106090068109107e-05, "loss": 2.9914, "step": 53688 }, { "epoch": 2.63, "grad_norm": 0.7649726867675781, "learning_rate": 2.210028984573058e-05, "loss": 2.6449, "step": 53689 }, { "epoch": 2.63, "grad_norm": 0.7419236898422241, "learning_rate": 2.2094490355286397e-05, "loss": 3.0778, "step": 53690 }, { "epoch": 2.63, "grad_norm": 0.7762550115585327, "learning_rate": 2.2088691596791784e-05, "loss": 2.869, "step": 53691 }, { "epoch": 2.63, "grad_norm": 0.7269838452339172, "learning_rate": 2.2082893570262163e-05, "loss": 2.8345, "step": 53692 }, { "epoch": 2.63, "grad_norm": 0.7062630653381348, "learning_rate": 2.2077096275712648e-05, "loss": 2.7138, "step": 53693 }, { "epoch": 2.63, "grad_norm": 0.7006743550300598, "learning_rate": 2.2071299713158564e-05, "loss": 2.9197, "step": 53694 }, { "epoch": 2.63, "grad_norm": 0.738308846950531, "learning_rate": 2.206550388261523e-05, "loss": 2.6824, "step": 53695 }, { "epoch": 2.63, "grad_norm": 0.8569402694702148, "learning_rate": 2.2059708784097874e-05, "loss": 3.0809, "step": 53696 }, { "epoch": 2.63, "grad_norm": 0.7651105523109436, "learning_rate": 2.2053914417621776e-05, "loss": 2.6283, "step": 53697 }, { "epoch": 2.63, "grad_norm": 0.7134808897972107, "learning_rate": 2.2048120783202128e-05, "loss": 2.8568, "step": 53698 }, { "epoch": 2.63, "grad_norm": 0.7216563820838928, "learning_rate": 2.2042327880854215e-05, "loss": 2.7479, "step": 53699 }, { "epoch": 2.63, "grad_norm": 0.7495296597480774, "learning_rate": 2.2036535710593362e-05, "loss": 3.0511, "step": 53700 }, { "epoch": 2.63, "grad_norm": 0.7457597255706787, "learning_rate": 2.2030744272434754e-05, "loss": 2.8722, "step": 53701 }, { "epoch": 2.63, "grad_norm": 0.76603764295578, "learning_rate": 2.2024953566393678e-05, "loss": 3.0893, "step": 53702 }, { "epoch": 2.63, "grad_norm": 0.7615640163421631, "learning_rate": 2.201916359248542e-05, "loss": 3.0252, "step": 53703 }, { "epoch": 2.63, "grad_norm": 0.765140950679779, "learning_rate": 2.201337435072511e-05, "loss": 2.9289, "step": 53704 }, { "epoch": 2.63, "grad_norm": 0.7220942974090576, "learning_rate": 2.200758584112813e-05, "loss": 2.8011, "step": 53705 }, { "epoch": 2.63, "grad_norm": 0.7161285281181335, "learning_rate": 2.2001798063709632e-05, "loss": 2.7574, "step": 53706 }, { "epoch": 2.63, "grad_norm": 0.7688199281692505, "learning_rate": 2.1996011018484936e-05, "loss": 3.0325, "step": 53707 }, { "epoch": 2.63, "grad_norm": 0.7416340708732605, "learning_rate": 2.199022470546917e-05, "loss": 2.8929, "step": 53708 }, { "epoch": 2.63, "grad_norm": 0.7346240282058716, "learning_rate": 2.198443912467769e-05, "loss": 2.9675, "step": 53709 }, { "epoch": 2.63, "grad_norm": 0.7140522599220276, "learning_rate": 2.1978654276125708e-05, "loss": 2.9636, "step": 53710 }, { "epoch": 2.63, "grad_norm": 0.761841356754303, "learning_rate": 2.1972870159828482e-05, "loss": 2.8097, "step": 53711 }, { "epoch": 2.63, "grad_norm": 0.7178420424461365, "learning_rate": 2.196708677580117e-05, "loss": 2.9565, "step": 53712 }, { "epoch": 2.63, "grad_norm": 0.7384170889854431, "learning_rate": 2.196130412405902e-05, "loss": 2.9932, "step": 53713 }, { "epoch": 2.63, "grad_norm": 0.742639422416687, "learning_rate": 2.1955522204617258e-05, "loss": 2.967, "step": 53714 }, { "epoch": 2.63, "grad_norm": 0.7487492561340332, "learning_rate": 2.1949741017491206e-05, "loss": 2.8679, "step": 53715 }, { "epoch": 2.63, "grad_norm": 0.7340525984764099, "learning_rate": 2.1943960562695983e-05, "loss": 2.7455, "step": 53716 }, { "epoch": 2.63, "grad_norm": 0.7590996623039246, "learning_rate": 2.193818084024691e-05, "loss": 2.8737, "step": 53717 }, { "epoch": 2.63, "grad_norm": 0.7017925977706909, "learning_rate": 2.1932401850159142e-05, "loss": 2.7985, "step": 53718 }, { "epoch": 2.63, "grad_norm": 0.7645330429077148, "learning_rate": 2.1926623592447834e-05, "loss": 2.939, "step": 53719 }, { "epoch": 2.63, "grad_norm": 0.7296159863471985, "learning_rate": 2.1920846067128372e-05, "loss": 2.9681, "step": 53720 }, { "epoch": 2.63, "grad_norm": 0.7412862181663513, "learning_rate": 2.1915069274215812e-05, "loss": 2.8342, "step": 53721 }, { "epoch": 2.63, "grad_norm": 0.7657079100608826, "learning_rate": 2.190929321372551e-05, "loss": 3.0628, "step": 53722 }, { "epoch": 2.63, "grad_norm": 0.7571131587028503, "learning_rate": 2.1903517885672552e-05, "loss": 2.7669, "step": 53723 }, { "epoch": 2.63, "grad_norm": 0.7139904499053955, "learning_rate": 2.1897743290072257e-05, "loss": 2.8135, "step": 53724 }, { "epoch": 2.63, "grad_norm": 0.7659240365028381, "learning_rate": 2.1891969426939783e-05, "loss": 2.8425, "step": 53725 }, { "epoch": 2.63, "grad_norm": 0.7518433928489685, "learning_rate": 2.188619629629028e-05, "loss": 2.9292, "step": 53726 }, { "epoch": 2.63, "grad_norm": 0.8315146565437317, "learning_rate": 2.1880423898139078e-05, "loss": 2.921, "step": 53727 }, { "epoch": 2.63, "grad_norm": 0.7377954125404358, "learning_rate": 2.1874652232501255e-05, "loss": 2.7456, "step": 53728 }, { "epoch": 2.63, "grad_norm": 0.7297799587249756, "learning_rate": 2.186888129939207e-05, "loss": 2.5994, "step": 53729 }, { "epoch": 2.63, "grad_norm": 0.7472618818283081, "learning_rate": 2.186311109882678e-05, "loss": 2.7483, "step": 53730 }, { "epoch": 2.63, "grad_norm": 0.7658689618110657, "learning_rate": 2.1857341630820503e-05, "loss": 2.6907, "step": 53731 }, { "epoch": 2.63, "grad_norm": 0.7699265480041504, "learning_rate": 2.185157289538846e-05, "loss": 2.9277, "step": 53732 }, { "epoch": 2.63, "grad_norm": 0.8159839510917664, "learning_rate": 2.1845804892545872e-05, "loss": 2.8331, "step": 53733 }, { "epoch": 2.63, "grad_norm": 0.7433233857154846, "learning_rate": 2.1840037622307827e-05, "loss": 2.7793, "step": 53734 }, { "epoch": 2.63, "grad_norm": 0.7243334054946899, "learning_rate": 2.183427108468968e-05, "loss": 2.8757, "step": 53735 }, { "epoch": 2.63, "grad_norm": 0.7512285709381104, "learning_rate": 2.1828505279706454e-05, "loss": 2.91, "step": 53736 }, { "epoch": 2.63, "grad_norm": 0.761920690536499, "learning_rate": 2.182274020737347e-05, "loss": 2.9553, "step": 53737 }, { "epoch": 2.63, "grad_norm": 0.7395731210708618, "learning_rate": 2.181697586770578e-05, "loss": 2.8619, "step": 53738 }, { "epoch": 2.63, "grad_norm": 0.7716213464736938, "learning_rate": 2.18112122607187e-05, "loss": 2.9312, "step": 53739 }, { "epoch": 2.63, "grad_norm": 0.7732451558113098, "learning_rate": 2.1805449386427364e-05, "loss": 2.8891, "step": 53740 }, { "epoch": 2.63, "grad_norm": 0.7086454629898071, "learning_rate": 2.1799687244846852e-05, "loss": 3.0304, "step": 53741 }, { "epoch": 2.63, "grad_norm": 0.7271209359169006, "learning_rate": 2.1793925835992486e-05, "loss": 2.7164, "step": 53742 }, { "epoch": 2.63, "grad_norm": 0.7848542332649231, "learning_rate": 2.1788165159879356e-05, "loss": 2.7783, "step": 53743 }, { "epoch": 2.63, "grad_norm": 0.7181041240692139, "learning_rate": 2.1782405216522615e-05, "loss": 2.8273, "step": 53744 }, { "epoch": 2.63, "grad_norm": 0.7394196391105652, "learning_rate": 2.177664600593755e-05, "loss": 2.9158, "step": 53745 }, { "epoch": 2.63, "grad_norm": 0.7684555053710938, "learning_rate": 2.1770887528139245e-05, "loss": 2.8852, "step": 53746 }, { "epoch": 2.63, "grad_norm": 0.733661413192749, "learning_rate": 2.1765129783142897e-05, "loss": 2.8123, "step": 53747 }, { "epoch": 2.63, "grad_norm": 0.7537463307380676, "learning_rate": 2.1759372770963558e-05, "loss": 2.9477, "step": 53748 }, { "epoch": 2.63, "grad_norm": 0.752604067325592, "learning_rate": 2.175361649161651e-05, "loss": 2.8565, "step": 53749 }, { "epoch": 2.63, "grad_norm": 0.8050605654716492, "learning_rate": 2.1747860945116946e-05, "loss": 2.6993, "step": 53750 }, { "epoch": 2.63, "grad_norm": 0.7391548156738281, "learning_rate": 2.1742106131479886e-05, "loss": 2.7675, "step": 53751 }, { "epoch": 2.63, "grad_norm": 0.7486521005630493, "learning_rate": 2.173635205072065e-05, "loss": 2.7678, "step": 53752 }, { "epoch": 2.63, "grad_norm": 0.7553504705429077, "learning_rate": 2.173059870285423e-05, "loss": 2.8386, "step": 53753 }, { "epoch": 2.63, "grad_norm": 0.8861485123634338, "learning_rate": 2.1724846087895943e-05, "loss": 2.651, "step": 53754 }, { "epoch": 2.63, "grad_norm": 0.7568973898887634, "learning_rate": 2.171909420586081e-05, "loss": 2.9888, "step": 53755 }, { "epoch": 2.63, "grad_norm": 0.7912908792495728, "learning_rate": 2.171334305676402e-05, "loss": 2.9017, "step": 53756 }, { "epoch": 2.63, "grad_norm": 0.7801294326782227, "learning_rate": 2.1707592640620762e-05, "loss": 2.8552, "step": 53757 }, { "epoch": 2.63, "grad_norm": 0.749573290348053, "learning_rate": 2.1701842957446092e-05, "loss": 2.9134, "step": 53758 }, { "epoch": 2.63, "grad_norm": 0.778415322303772, "learning_rate": 2.1696094007255194e-05, "loss": 2.8841, "step": 53759 }, { "epoch": 2.63, "grad_norm": 0.7514265179634094, "learning_rate": 2.169034579006329e-05, "loss": 3.0026, "step": 53760 }, { "epoch": 2.63, "grad_norm": 0.7774766683578491, "learning_rate": 2.168459830588547e-05, "loss": 2.7802, "step": 53761 }, { "epoch": 2.63, "grad_norm": 0.7230890393257141, "learning_rate": 2.167885155473682e-05, "loss": 2.9125, "step": 53762 }, { "epoch": 2.63, "grad_norm": 0.7347655892372131, "learning_rate": 2.1673105536632497e-05, "loss": 3.0178, "step": 53763 }, { "epoch": 2.63, "grad_norm": 0.7173894643783569, "learning_rate": 2.166736025158762e-05, "loss": 3.0613, "step": 53764 }, { "epoch": 2.63, "grad_norm": 0.7474822402000427, "learning_rate": 2.1661615699617407e-05, "loss": 2.8945, "step": 53765 }, { "epoch": 2.63, "grad_norm": 0.7168195247650146, "learning_rate": 2.1655871880736887e-05, "loss": 2.8247, "step": 53766 }, { "epoch": 2.63, "grad_norm": 0.7810943722724915, "learning_rate": 2.1650128794961276e-05, "loss": 2.7536, "step": 53767 }, { "epoch": 2.64, "grad_norm": 0.7393297553062439, "learning_rate": 2.1644386442305627e-05, "loss": 2.957, "step": 53768 }, { "epoch": 2.64, "grad_norm": 0.758391797542572, "learning_rate": 2.1638644822785166e-05, "loss": 2.6468, "step": 53769 }, { "epoch": 2.64, "grad_norm": 0.764182448387146, "learning_rate": 2.1632903936414848e-05, "loss": 2.9961, "step": 53770 }, { "epoch": 2.64, "grad_norm": 0.7735395431518555, "learning_rate": 2.162716378320989e-05, "loss": 2.9979, "step": 53771 }, { "epoch": 2.64, "grad_norm": 0.7840405702590942, "learning_rate": 2.162142436318548e-05, "loss": 2.9104, "step": 53772 }, { "epoch": 2.64, "grad_norm": 0.7420419454574585, "learning_rate": 2.1615685676356577e-05, "loss": 3.1512, "step": 53773 }, { "epoch": 2.64, "grad_norm": 0.7456751465797424, "learning_rate": 2.160994772273846e-05, "loss": 2.9972, "step": 53774 }, { "epoch": 2.64, "grad_norm": 0.7378044128417969, "learning_rate": 2.16042105023461e-05, "loss": 2.7904, "step": 53775 }, { "epoch": 2.64, "grad_norm": 0.744476318359375, "learning_rate": 2.1598474015194733e-05, "loss": 2.807, "step": 53776 }, { "epoch": 2.64, "grad_norm": 0.7326436638832092, "learning_rate": 2.1592738261299393e-05, "loss": 2.8704, "step": 53777 }, { "epoch": 2.64, "grad_norm": 0.752892792224884, "learning_rate": 2.158700324067516e-05, "loss": 2.941, "step": 53778 }, { "epoch": 2.64, "grad_norm": 0.7676821947097778, "learning_rate": 2.1581268953337227e-05, "loss": 2.8962, "step": 53779 }, { "epoch": 2.64, "grad_norm": 0.788492739200592, "learning_rate": 2.157553539930058e-05, "loss": 3.0261, "step": 53780 }, { "epoch": 2.64, "grad_norm": 0.7496923804283142, "learning_rate": 2.1569802578580407e-05, "loss": 3.1289, "step": 53781 }, { "epoch": 2.64, "grad_norm": 0.7280400395393372, "learning_rate": 2.156407049119183e-05, "loss": 3.0553, "step": 53782 }, { "epoch": 2.64, "grad_norm": 0.8010007739067078, "learning_rate": 2.1558339137149904e-05, "loss": 2.7921, "step": 53783 }, { "epoch": 2.64, "grad_norm": 0.7570450305938721, "learning_rate": 2.155260851646975e-05, "loss": 3.0598, "step": 53784 }, { "epoch": 2.64, "grad_norm": 0.7340518832206726, "learning_rate": 2.1546878629166355e-05, "loss": 2.8535, "step": 53785 }, { "epoch": 2.64, "grad_norm": 0.7815223336219788, "learning_rate": 2.1541149475254903e-05, "loss": 3.059, "step": 53786 }, { "epoch": 2.64, "grad_norm": 0.7372803688049316, "learning_rate": 2.1535421054750558e-05, "loss": 3.0539, "step": 53787 }, { "epoch": 2.64, "grad_norm": 0.7437090277671814, "learning_rate": 2.1529693367668233e-05, "loss": 3.0135, "step": 53788 }, { "epoch": 2.64, "grad_norm": 0.7286654710769653, "learning_rate": 2.152396641402315e-05, "loss": 2.7342, "step": 53789 }, { "epoch": 2.64, "grad_norm": 0.7460008263587952, "learning_rate": 2.1518240193830306e-05, "loss": 3.0216, "step": 53790 }, { "epoch": 2.64, "grad_norm": 0.7398113012313843, "learning_rate": 2.151251470710488e-05, "loss": 3.0744, "step": 53791 }, { "epoch": 2.64, "grad_norm": 0.7812997698783875, "learning_rate": 2.150678995386189e-05, "loss": 2.8752, "step": 53792 }, { "epoch": 2.64, "grad_norm": 0.7865747809410095, "learning_rate": 2.1501065934116368e-05, "loss": 2.8237, "step": 53793 }, { "epoch": 2.64, "grad_norm": 0.7350339293479919, "learning_rate": 2.1495342647883496e-05, "loss": 2.929, "step": 53794 }, { "epoch": 2.64, "grad_norm": 0.7692163586616516, "learning_rate": 2.148962009517823e-05, "loss": 2.9889, "step": 53795 }, { "epoch": 2.64, "grad_norm": 0.7704609036445618, "learning_rate": 2.1483898276015688e-05, "loss": 2.9512, "step": 53796 }, { "epoch": 2.64, "grad_norm": 0.7423082590103149, "learning_rate": 2.147817719041103e-05, "loss": 2.7966, "step": 53797 }, { "epoch": 2.64, "grad_norm": 0.7098038196563721, "learning_rate": 2.147245683837927e-05, "loss": 3.0031, "step": 53798 }, { "epoch": 2.64, "grad_norm": 0.7264339923858643, "learning_rate": 2.1466737219935405e-05, "loss": 2.8246, "step": 53799 }, { "epoch": 2.64, "grad_norm": 0.7618200778961182, "learning_rate": 2.1461018335094548e-05, "loss": 3.1037, "step": 53800 }, { "epoch": 2.64, "grad_norm": 0.742719292640686, "learning_rate": 2.1455300183871728e-05, "loss": 2.9129, "step": 53801 }, { "epoch": 2.64, "grad_norm": 0.8027572631835938, "learning_rate": 2.1449582766282125e-05, "loss": 3.0597, "step": 53802 }, { "epoch": 2.64, "grad_norm": 0.7896417379379272, "learning_rate": 2.1443866082340632e-05, "loss": 3.015, "step": 53803 }, { "epoch": 2.64, "grad_norm": 0.7622906565666199, "learning_rate": 2.1438150132062436e-05, "loss": 3.051, "step": 53804 }, { "epoch": 2.64, "grad_norm": 0.7213104367256165, "learning_rate": 2.143243491546256e-05, "loss": 2.6144, "step": 53805 }, { "epoch": 2.64, "grad_norm": 0.754871666431427, "learning_rate": 2.142672043255599e-05, "loss": 2.8604, "step": 53806 }, { "epoch": 2.64, "grad_norm": 0.7541650533676147, "learning_rate": 2.1421006683357844e-05, "loss": 2.7727, "step": 53807 }, { "epoch": 2.64, "grad_norm": 0.7695738673210144, "learning_rate": 2.141529366788315e-05, "loss": 2.9745, "step": 53808 }, { "epoch": 2.64, "grad_norm": 0.7393126487731934, "learning_rate": 2.140958138614699e-05, "loss": 2.868, "step": 53809 }, { "epoch": 2.64, "grad_norm": 0.7608216404914856, "learning_rate": 2.140386983816429e-05, "loss": 2.8814, "step": 53810 }, { "epoch": 2.64, "grad_norm": 0.7869523763656616, "learning_rate": 2.1398159023950235e-05, "loss": 2.9099, "step": 53811 }, { "epoch": 2.64, "grad_norm": 0.7133737206459045, "learning_rate": 2.1392448943519812e-05, "loss": 2.9117, "step": 53812 }, { "epoch": 2.64, "grad_norm": 0.7602190375328064, "learning_rate": 2.1386739596888116e-05, "loss": 2.8612, "step": 53813 }, { "epoch": 2.64, "grad_norm": 0.7913804650306702, "learning_rate": 2.138103098407009e-05, "loss": 2.9442, "step": 53814 }, { "epoch": 2.64, "grad_norm": 0.7600404620170593, "learning_rate": 2.1375323105080768e-05, "loss": 2.8422, "step": 53815 }, { "epoch": 2.64, "grad_norm": 0.7508252263069153, "learning_rate": 2.1369615959935192e-05, "loss": 2.6938, "step": 53816 }, { "epoch": 2.64, "grad_norm": 0.7605404853820801, "learning_rate": 2.1363909548648527e-05, "loss": 2.6986, "step": 53817 }, { "epoch": 2.64, "grad_norm": 0.7708483934402466, "learning_rate": 2.1358203871235623e-05, "loss": 2.7698, "step": 53818 }, { "epoch": 2.64, "grad_norm": 0.7216460704803467, "learning_rate": 2.1352498927711603e-05, "loss": 2.9233, "step": 53819 }, { "epoch": 2.64, "grad_norm": 0.7428836226463318, "learning_rate": 2.134679471809152e-05, "loss": 2.8189, "step": 53820 }, { "epoch": 2.64, "grad_norm": 0.757232666015625, "learning_rate": 2.1341091242390296e-05, "loss": 2.8876, "step": 53821 }, { "epoch": 2.64, "grad_norm": 0.7275024652481079, "learning_rate": 2.133538850062305e-05, "loss": 2.7804, "step": 53822 }, { "epoch": 2.64, "grad_norm": 0.7810689210891724, "learning_rate": 2.132968649280471e-05, "loss": 2.8834, "step": 53823 }, { "epoch": 2.64, "grad_norm": 0.758514404296875, "learning_rate": 2.132398521895039e-05, "loss": 2.9051, "step": 53824 }, { "epoch": 2.64, "grad_norm": 0.7726489305496216, "learning_rate": 2.1318284679075016e-05, "loss": 3.0325, "step": 53825 }, { "epoch": 2.64, "grad_norm": 0.7153679728507996, "learning_rate": 2.1312584873193705e-05, "loss": 2.9691, "step": 53826 }, { "epoch": 2.64, "grad_norm": 0.7290853261947632, "learning_rate": 2.1306885801321384e-05, "loss": 3.0509, "step": 53827 }, { "epoch": 2.64, "grad_norm": 0.7138325572013855, "learning_rate": 2.1301187463473067e-05, "loss": 2.9116, "step": 53828 }, { "epoch": 2.64, "grad_norm": 0.7441121339797974, "learning_rate": 2.1295489859663817e-05, "loss": 2.9767, "step": 53829 }, { "epoch": 2.64, "grad_norm": 0.7585853934288025, "learning_rate": 2.1289792989908583e-05, "loss": 2.8873, "step": 53830 }, { "epoch": 2.64, "grad_norm": 0.7859900593757629, "learning_rate": 2.1284096854222388e-05, "loss": 2.9058, "step": 53831 }, { "epoch": 2.64, "grad_norm": 0.762042224407196, "learning_rate": 2.1278401452620287e-05, "loss": 2.8423, "step": 53832 }, { "epoch": 2.64, "grad_norm": 0.7395182251930237, "learning_rate": 2.1272706785117167e-05, "loss": 2.7781, "step": 53833 }, { "epoch": 2.64, "grad_norm": 0.7580464482307434, "learning_rate": 2.126701285172815e-05, "loss": 2.6739, "step": 53834 }, { "epoch": 2.64, "grad_norm": 0.7301293015480042, "learning_rate": 2.126131965246819e-05, "loss": 2.7645, "step": 53835 }, { "epoch": 2.64, "grad_norm": 0.7253274321556091, "learning_rate": 2.1255627187352244e-05, "loss": 2.7732, "step": 53836 }, { "epoch": 2.64, "grad_norm": 0.7815554738044739, "learning_rate": 2.124993545639533e-05, "loss": 2.714, "step": 53837 }, { "epoch": 2.64, "grad_norm": 0.7133461236953735, "learning_rate": 2.1244244459612436e-05, "loss": 2.8703, "step": 53838 }, { "epoch": 2.64, "grad_norm": 0.7465064525604248, "learning_rate": 2.1238554197018587e-05, "loss": 2.9164, "step": 53839 }, { "epoch": 2.64, "grad_norm": 0.7770967483520508, "learning_rate": 2.123286466862867e-05, "loss": 2.7855, "step": 53840 }, { "epoch": 2.64, "grad_norm": 0.7370714545249939, "learning_rate": 2.12271758744578e-05, "loss": 2.7894, "step": 53841 }, { "epoch": 2.64, "grad_norm": 0.747188925743103, "learning_rate": 2.122148781452091e-05, "loss": 3.0637, "step": 53842 }, { "epoch": 2.64, "grad_norm": 0.747463047504425, "learning_rate": 2.1215800488832913e-05, "loss": 2.6843, "step": 53843 }, { "epoch": 2.64, "grad_norm": 0.7289397120475769, "learning_rate": 2.1210113897408898e-05, "loss": 2.9257, "step": 53844 }, { "epoch": 2.64, "grad_norm": 0.7071889042854309, "learning_rate": 2.120442804026372e-05, "loss": 2.7178, "step": 53845 }, { "epoch": 2.64, "grad_norm": 0.7718331813812256, "learning_rate": 2.119874291741247e-05, "loss": 3.0457, "step": 53846 }, { "epoch": 2.64, "grad_norm": 0.7785387635231018, "learning_rate": 2.1193058528870066e-05, "loss": 2.9793, "step": 53847 }, { "epoch": 2.64, "grad_norm": 0.7537055015563965, "learning_rate": 2.11873748746515e-05, "loss": 2.723, "step": 53848 }, { "epoch": 2.64, "grad_norm": 0.7677419185638428, "learning_rate": 2.118169195477175e-05, "loss": 2.7947, "step": 53849 }, { "epoch": 2.64, "grad_norm": 0.7995899319648743, "learning_rate": 2.1176009769245683e-05, "loss": 3.0872, "step": 53850 }, { "epoch": 2.64, "grad_norm": 0.688861608505249, "learning_rate": 2.1170328318088447e-05, "loss": 2.8195, "step": 53851 }, { "epoch": 2.64, "grad_norm": 0.8810485005378723, "learning_rate": 2.11646476013148e-05, "loss": 2.9762, "step": 53852 }, { "epoch": 2.64, "grad_norm": 0.7384539842605591, "learning_rate": 2.1158967618939858e-05, "loss": 2.8733, "step": 53853 }, { "epoch": 2.64, "grad_norm": 0.7322173118591309, "learning_rate": 2.115328837097855e-05, "loss": 2.847, "step": 53854 }, { "epoch": 2.64, "grad_norm": 0.8136117458343506, "learning_rate": 2.114760985744576e-05, "loss": 2.916, "step": 53855 }, { "epoch": 2.64, "grad_norm": 0.7917467951774597, "learning_rate": 2.114193207835657e-05, "loss": 2.8955, "step": 53856 }, { "epoch": 2.64, "grad_norm": 0.7615936994552612, "learning_rate": 2.1136255033725846e-05, "loss": 3.037, "step": 53857 }, { "epoch": 2.64, "grad_norm": 0.7316914200782776, "learning_rate": 2.1130578723568538e-05, "loss": 3.0313, "step": 53858 }, { "epoch": 2.64, "grad_norm": 0.705625593662262, "learning_rate": 2.112490314789963e-05, "loss": 3.0341, "step": 53859 }, { "epoch": 2.64, "grad_norm": 0.7249335646629333, "learning_rate": 2.1119228306734014e-05, "loss": 3.0307, "step": 53860 }, { "epoch": 2.64, "grad_norm": 0.7347077131271362, "learning_rate": 2.1113554200086748e-05, "loss": 2.8507, "step": 53861 }, { "epoch": 2.64, "grad_norm": 0.734248161315918, "learning_rate": 2.1107880827972646e-05, "loss": 2.9392, "step": 53862 }, { "epoch": 2.64, "grad_norm": 0.7667978405952454, "learning_rate": 2.1102208190406767e-05, "loss": 2.7851, "step": 53863 }, { "epoch": 2.64, "grad_norm": 0.7558903098106384, "learning_rate": 2.1096536287403997e-05, "loss": 2.7192, "step": 53864 }, { "epoch": 2.64, "grad_norm": 0.8619865775108337, "learning_rate": 2.1090865118979194e-05, "loss": 2.8567, "step": 53865 }, { "epoch": 2.64, "grad_norm": 0.7786526083946228, "learning_rate": 2.1085194685147478e-05, "loss": 2.949, "step": 53866 }, { "epoch": 2.64, "grad_norm": 0.7425805330276489, "learning_rate": 2.1079524985923602e-05, "loss": 2.7537, "step": 53867 }, { "epoch": 2.64, "grad_norm": 0.6905446648597717, "learning_rate": 2.1073856021322555e-05, "loss": 2.6834, "step": 53868 }, { "epoch": 2.64, "grad_norm": 0.7856373190879822, "learning_rate": 2.1068187791359393e-05, "loss": 2.9288, "step": 53869 }, { "epoch": 2.64, "grad_norm": 0.7731509804725647, "learning_rate": 2.1062520296048903e-05, "loss": 2.9401, "step": 53870 }, { "epoch": 2.64, "grad_norm": 0.7262278199195862, "learning_rate": 2.105685353540607e-05, "loss": 2.8644, "step": 53871 }, { "epoch": 2.64, "grad_norm": 0.730444610118866, "learning_rate": 2.105118750944572e-05, "loss": 3.1441, "step": 53872 }, { "epoch": 2.64, "grad_norm": 0.7428088188171387, "learning_rate": 2.1045522218182908e-05, "loss": 2.9372, "step": 53873 }, { "epoch": 2.64, "grad_norm": 0.7640175819396973, "learning_rate": 2.103985766163252e-05, "loss": 3.0142, "step": 53874 }, { "epoch": 2.64, "grad_norm": 0.8164355158805847, "learning_rate": 2.103419383980941e-05, "loss": 2.9677, "step": 53875 }, { "epoch": 2.64, "grad_norm": 0.7192038893699646, "learning_rate": 2.10285307527286e-05, "loss": 2.8549, "step": 53876 }, { "epoch": 2.64, "grad_norm": 0.7759872078895569, "learning_rate": 2.1022868400404913e-05, "loss": 2.9487, "step": 53877 }, { "epoch": 2.64, "grad_norm": 0.7276328802108765, "learning_rate": 2.1017206782853334e-05, "loss": 2.7763, "step": 53878 }, { "epoch": 2.64, "grad_norm": 0.7632603049278259, "learning_rate": 2.1011545900088757e-05, "loss": 2.974, "step": 53879 }, { "epoch": 2.64, "grad_norm": 0.7106670141220093, "learning_rate": 2.1005885752126027e-05, "loss": 2.8494, "step": 53880 }, { "epoch": 2.64, "grad_norm": 0.7603490948677063, "learning_rate": 2.1000226338980143e-05, "loss": 2.8562, "step": 53881 }, { "epoch": 2.64, "grad_norm": 0.7455410361289978, "learning_rate": 2.0994567660665916e-05, "loss": 2.8456, "step": 53882 }, { "epoch": 2.64, "grad_norm": 0.7279428243637085, "learning_rate": 2.098890971719831e-05, "loss": 3.1957, "step": 53883 }, { "epoch": 2.64, "grad_norm": 0.8473446369171143, "learning_rate": 2.098325250859224e-05, "loss": 2.9781, "step": 53884 }, { "epoch": 2.64, "grad_norm": 0.7343671321868896, "learning_rate": 2.0977596034862632e-05, "loss": 2.9825, "step": 53885 }, { "epoch": 2.64, "grad_norm": 0.7839120030403137, "learning_rate": 2.0971940296024304e-05, "loss": 3.0148, "step": 53886 }, { "epoch": 2.64, "grad_norm": 0.7506952285766602, "learning_rate": 2.0966285292092144e-05, "loss": 3.0424, "step": 53887 }, { "epoch": 2.64, "grad_norm": 0.7599936723709106, "learning_rate": 2.0960631023081077e-05, "loss": 2.9168, "step": 53888 }, { "epoch": 2.64, "grad_norm": 0.7371647953987122, "learning_rate": 2.0954977489006085e-05, "loss": 2.5458, "step": 53889 }, { "epoch": 2.64, "grad_norm": 0.7845785021781921, "learning_rate": 2.0949324689881896e-05, "loss": 2.9261, "step": 53890 }, { "epoch": 2.64, "grad_norm": 0.7766330242156982, "learning_rate": 2.094367262572356e-05, "loss": 2.9509, "step": 53891 }, { "epoch": 2.64, "grad_norm": 0.7818014025688171, "learning_rate": 2.0938021296545804e-05, "loss": 2.9393, "step": 53892 }, { "epoch": 2.64, "grad_norm": 0.7388964891433716, "learning_rate": 2.0932370702363678e-05, "loss": 2.8543, "step": 53893 }, { "epoch": 2.64, "grad_norm": 0.7346888780593872, "learning_rate": 2.0926720843191936e-05, "loss": 2.8711, "step": 53894 }, { "epoch": 2.64, "grad_norm": 0.7887476086616516, "learning_rate": 2.0921071719045503e-05, "loss": 2.9683, "step": 53895 }, { "epoch": 2.64, "grad_norm": 0.7444952130317688, "learning_rate": 2.091542332993926e-05, "loss": 2.6975, "step": 53896 }, { "epoch": 2.64, "grad_norm": 0.7575118541717529, "learning_rate": 2.0909775675888073e-05, "loss": 2.5881, "step": 53897 }, { "epoch": 2.64, "grad_norm": 0.7286080121994019, "learning_rate": 2.090412875690679e-05, "loss": 2.7892, "step": 53898 }, { "epoch": 2.64, "grad_norm": 0.7232897281646729, "learning_rate": 2.08984825730104e-05, "loss": 2.9935, "step": 53899 }, { "epoch": 2.64, "grad_norm": 0.7478100061416626, "learning_rate": 2.0892837124213657e-05, "loss": 2.9702, "step": 53900 }, { "epoch": 2.64, "grad_norm": 0.7384991645812988, "learning_rate": 2.0887192410531483e-05, "loss": 2.8308, "step": 53901 }, { "epoch": 2.64, "grad_norm": 0.7648786902427673, "learning_rate": 2.08815484319787e-05, "loss": 2.9977, "step": 53902 }, { "epoch": 2.64, "grad_norm": 0.7234745621681213, "learning_rate": 2.0875905188570163e-05, "loss": 2.9809, "step": 53903 }, { "epoch": 2.64, "grad_norm": 0.7378318905830383, "learning_rate": 2.087026268032086e-05, "loss": 2.9467, "step": 53904 }, { "epoch": 2.64, "grad_norm": 0.7225425243377686, "learning_rate": 2.086462090724548e-05, "loss": 2.7495, "step": 53905 }, { "epoch": 2.64, "grad_norm": 0.744601309299469, "learning_rate": 2.085897986935904e-05, "loss": 2.9628, "step": 53906 }, { "epoch": 2.64, "grad_norm": 0.7627322673797607, "learning_rate": 2.085333956667633e-05, "loss": 2.8884, "step": 53907 }, { "epoch": 2.64, "grad_norm": 0.7031084299087524, "learning_rate": 2.084769999921214e-05, "loss": 3.1328, "step": 53908 }, { "epoch": 2.64, "grad_norm": 0.8040719032287598, "learning_rate": 2.0842061166981427e-05, "loss": 2.9711, "step": 53909 }, { "epoch": 2.64, "grad_norm": 0.733121931552887, "learning_rate": 2.0836423069998974e-05, "loss": 2.9283, "step": 53910 }, { "epoch": 2.64, "grad_norm": 0.7206590175628662, "learning_rate": 2.0830785708279673e-05, "loss": 2.8658, "step": 53911 }, { "epoch": 2.64, "grad_norm": 0.7238807678222656, "learning_rate": 2.082514908183831e-05, "loss": 2.9172, "step": 53912 }, { "epoch": 2.64, "grad_norm": 0.7584816217422485, "learning_rate": 2.081951319068981e-05, "loss": 2.977, "step": 53913 }, { "epoch": 2.64, "grad_norm": 0.8075535297393799, "learning_rate": 2.081387803484902e-05, "loss": 2.8249, "step": 53914 }, { "epoch": 2.64, "grad_norm": 0.7531391382217407, "learning_rate": 2.0808243614330733e-05, "loss": 2.8588, "step": 53915 }, { "epoch": 2.64, "grad_norm": 0.7435611486434937, "learning_rate": 2.0802609929149804e-05, "loss": 2.9846, "step": 53916 }, { "epoch": 2.64, "grad_norm": 0.7779226303100586, "learning_rate": 2.0796976979321024e-05, "loss": 2.9811, "step": 53917 }, { "epoch": 2.64, "grad_norm": 0.7348940372467041, "learning_rate": 2.079134476485924e-05, "loss": 2.8087, "step": 53918 }, { "epoch": 2.64, "grad_norm": 0.7662265300750732, "learning_rate": 2.0785713285779415e-05, "loss": 2.6446, "step": 53919 }, { "epoch": 2.64, "grad_norm": 0.8061109185218811, "learning_rate": 2.07800825420962e-05, "loss": 2.6772, "step": 53920 }, { "epoch": 2.64, "grad_norm": 0.744640052318573, "learning_rate": 2.0774452533824583e-05, "loss": 3.0171, "step": 53921 }, { "epoch": 2.64, "grad_norm": 0.7543174624443054, "learning_rate": 2.0768823260979318e-05, "loss": 2.7892, "step": 53922 }, { "epoch": 2.64, "grad_norm": 0.7616002559661865, "learning_rate": 2.0763194723575193e-05, "loss": 2.8543, "step": 53923 }, { "epoch": 2.64, "grad_norm": 0.725988507270813, "learning_rate": 2.07575669216271e-05, "loss": 2.82, "step": 53924 }, { "epoch": 2.64, "grad_norm": 0.7437060475349426, "learning_rate": 2.075193985514979e-05, "loss": 2.7369, "step": 53925 }, { "epoch": 2.64, "grad_norm": 0.7315745949745178, "learning_rate": 2.0746313524158188e-05, "loss": 2.7923, "step": 53926 }, { "epoch": 2.64, "grad_norm": 0.7858138680458069, "learning_rate": 2.0740687928666978e-05, "loss": 2.9287, "step": 53927 }, { "epoch": 2.64, "grad_norm": 0.7877421975135803, "learning_rate": 2.073506306869115e-05, "loss": 2.9113, "step": 53928 }, { "epoch": 2.64, "grad_norm": 0.8002856969833374, "learning_rate": 2.072943894424536e-05, "loss": 2.9444, "step": 53929 }, { "epoch": 2.64, "grad_norm": 0.7211002111434937, "learning_rate": 2.0723815555344457e-05, "loss": 2.7723, "step": 53930 }, { "epoch": 2.64, "grad_norm": 0.7559928894042969, "learning_rate": 2.0718192902003338e-05, "loss": 2.9722, "step": 53931 }, { "epoch": 2.64, "grad_norm": 0.7671013474464417, "learning_rate": 2.0712570984236686e-05, "loss": 2.9676, "step": 53932 }, { "epoch": 2.64, "grad_norm": 0.7174298167228699, "learning_rate": 2.070694980205946e-05, "loss": 2.8203, "step": 53933 }, { "epoch": 2.64, "grad_norm": 0.7641894817352295, "learning_rate": 2.0701329355486274e-05, "loss": 2.8446, "step": 53934 }, { "epoch": 2.64, "grad_norm": 0.6962066292762756, "learning_rate": 2.0695709644532056e-05, "loss": 2.7518, "step": 53935 }, { "epoch": 2.64, "grad_norm": 0.74459308385849, "learning_rate": 2.0690090669211658e-05, "loss": 2.8212, "step": 53936 }, { "epoch": 2.64, "grad_norm": 0.7321491837501526, "learning_rate": 2.0684472429539767e-05, "loss": 2.8309, "step": 53937 }, { "epoch": 2.64, "grad_norm": 0.739220917224884, "learning_rate": 2.067885492553124e-05, "loss": 3.009, "step": 53938 }, { "epoch": 2.64, "grad_norm": 0.7226603627204895, "learning_rate": 2.06732381572008e-05, "loss": 2.7967, "step": 53939 }, { "epoch": 2.64, "grad_norm": 0.7862400412559509, "learning_rate": 2.06676221245633e-05, "loss": 2.827, "step": 53940 }, { "epoch": 2.64, "grad_norm": 0.7336839437484741, "learning_rate": 2.0662006827633558e-05, "loss": 2.9242, "step": 53941 }, { "epoch": 2.64, "grad_norm": 0.7695524096488953, "learning_rate": 2.06563922664263e-05, "loss": 2.8123, "step": 53942 }, { "epoch": 2.64, "grad_norm": 0.7591333389282227, "learning_rate": 2.0650778440956383e-05, "loss": 2.9191, "step": 53943 }, { "epoch": 2.64, "grad_norm": 0.7534220218658447, "learning_rate": 2.0645165351238557e-05, "loss": 2.9842, "step": 53944 }, { "epoch": 2.64, "grad_norm": 0.7719517946243286, "learning_rate": 2.0639552997287546e-05, "loss": 2.8474, "step": 53945 }, { "epoch": 2.64, "grad_norm": 0.7812724113464355, "learning_rate": 2.0633941379118237e-05, "loss": 2.7268, "step": 53946 }, { "epoch": 2.64, "grad_norm": 0.7152418494224548, "learning_rate": 2.0628330496745317e-05, "loss": 2.833, "step": 53947 }, { "epoch": 2.64, "grad_norm": 0.7359592914581299, "learning_rate": 2.0622720350183674e-05, "loss": 2.8466, "step": 53948 }, { "epoch": 2.64, "grad_norm": 0.7347102761268616, "learning_rate": 2.0617110939447966e-05, "loss": 3.0976, "step": 53949 }, { "epoch": 2.64, "grad_norm": 0.7281254529953003, "learning_rate": 2.0611502264553048e-05, "loss": 3.0755, "step": 53950 }, { "epoch": 2.64, "grad_norm": 0.7379975914955139, "learning_rate": 2.0605894325513673e-05, "loss": 2.7204, "step": 53951 }, { "epoch": 2.64, "grad_norm": 0.7058905959129333, "learning_rate": 2.0600287122344562e-05, "loss": 2.7165, "step": 53952 }, { "epoch": 2.64, "grad_norm": 0.7494770884513855, "learning_rate": 2.0594680655060536e-05, "loss": 2.9196, "step": 53953 }, { "epoch": 2.64, "grad_norm": 0.7461589574813843, "learning_rate": 2.0589074923676352e-05, "loss": 2.7942, "step": 53954 }, { "epoch": 2.64, "grad_norm": 0.8033571839332581, "learning_rate": 2.058346992820673e-05, "loss": 2.6223, "step": 53955 }, { "epoch": 2.64, "grad_norm": 0.7271087169647217, "learning_rate": 2.0577865668666527e-05, "loss": 3.1622, "step": 53956 }, { "epoch": 2.64, "grad_norm": 0.7702967524528503, "learning_rate": 2.0572262145070427e-05, "loss": 2.827, "step": 53957 }, { "epoch": 2.64, "grad_norm": 0.7806286811828613, "learning_rate": 2.056665935743326e-05, "loss": 3.1208, "step": 53958 }, { "epoch": 2.64, "grad_norm": 0.7661564350128174, "learning_rate": 2.0561057305769703e-05, "loss": 2.7039, "step": 53959 }, { "epoch": 2.64, "grad_norm": 0.7610711455345154, "learning_rate": 2.0555455990094515e-05, "loss": 2.873, "step": 53960 }, { "epoch": 2.64, "grad_norm": 0.7484396696090698, "learning_rate": 2.054985541042252e-05, "loss": 2.7665, "step": 53961 }, { "epoch": 2.64, "grad_norm": 0.734775722026825, "learning_rate": 2.0544255566768376e-05, "loss": 3.0531, "step": 53962 }, { "epoch": 2.64, "grad_norm": 0.728737473487854, "learning_rate": 2.0538656459146928e-05, "loss": 3.0091, "step": 53963 }, { "epoch": 2.64, "grad_norm": 0.8020430207252502, "learning_rate": 2.0533058087572805e-05, "loss": 3.1086, "step": 53964 }, { "epoch": 2.64, "grad_norm": 0.7418114542961121, "learning_rate": 2.0527460452060895e-05, "loss": 2.8423, "step": 53965 }, { "epoch": 2.64, "grad_norm": 0.7765441536903381, "learning_rate": 2.0521863552625884e-05, "loss": 2.9005, "step": 53966 }, { "epoch": 2.64, "grad_norm": 0.7442047595977783, "learning_rate": 2.0516267389282426e-05, "loss": 2.8706, "step": 53967 }, { "epoch": 2.64, "grad_norm": 0.7826739549636841, "learning_rate": 2.051067196204538e-05, "loss": 2.861, "step": 53968 }, { "epoch": 2.64, "grad_norm": 0.7431984543800354, "learning_rate": 2.0505077270929392e-05, "loss": 2.8538, "step": 53969 }, { "epoch": 2.64, "grad_norm": 0.7477771043777466, "learning_rate": 2.0499483315949262e-05, "loss": 2.9119, "step": 53970 }, { "epoch": 2.64, "grad_norm": 0.772100031375885, "learning_rate": 2.0493890097119703e-05, "loss": 2.8383, "step": 53971 }, { "epoch": 2.65, "grad_norm": 0.7262080311775208, "learning_rate": 2.0488297614455474e-05, "loss": 2.9591, "step": 53972 }, { "epoch": 2.65, "grad_norm": 0.8009024262428284, "learning_rate": 2.0482705867971293e-05, "loss": 3.0581, "step": 53973 }, { "epoch": 2.65, "grad_norm": 0.7679193019866943, "learning_rate": 2.0477114857681787e-05, "loss": 2.7829, "step": 53974 }, { "epoch": 2.65, "grad_norm": 0.7369647026062012, "learning_rate": 2.047152458360177e-05, "loss": 3.0353, "step": 53975 }, { "epoch": 2.65, "grad_norm": 0.7575830817222595, "learning_rate": 2.046593504574604e-05, "loss": 2.9625, "step": 53976 }, { "epoch": 2.65, "grad_norm": 0.7336440682411194, "learning_rate": 2.0460346244129177e-05, "loss": 2.8509, "step": 53977 }, { "epoch": 2.65, "grad_norm": 0.7487339377403259, "learning_rate": 2.0454758178766007e-05, "loss": 2.6161, "step": 53978 }, { "epoch": 2.65, "grad_norm": 0.7532362341880798, "learning_rate": 2.044917084967115e-05, "loss": 2.8774, "step": 53979 }, { "epoch": 2.65, "grad_norm": 0.7453672885894775, "learning_rate": 2.0443584256859426e-05, "loss": 2.9005, "step": 53980 }, { "epoch": 2.65, "grad_norm": 0.750306248664856, "learning_rate": 2.0437998400345522e-05, "loss": 3.0054, "step": 53981 }, { "epoch": 2.65, "grad_norm": 0.7489728927612305, "learning_rate": 2.0432413280144035e-05, "loss": 3.005, "step": 53982 }, { "epoch": 2.65, "grad_norm": 0.7205755710601807, "learning_rate": 2.0426828896269843e-05, "loss": 2.9092, "step": 53983 }, { "epoch": 2.65, "grad_norm": 0.7374038696289062, "learning_rate": 2.042124524873754e-05, "loss": 2.7873, "step": 53984 }, { "epoch": 2.65, "grad_norm": 0.7091577649116516, "learning_rate": 2.041566233756188e-05, "loss": 2.8018, "step": 53985 }, { "epoch": 2.65, "grad_norm": 0.7290316820144653, "learning_rate": 2.041008016275758e-05, "loss": 2.9189, "step": 53986 }, { "epoch": 2.65, "grad_norm": 0.7330735921859741, "learning_rate": 2.0404498724339336e-05, "loss": 2.9191, "step": 53987 }, { "epoch": 2.65, "grad_norm": 0.7329016923904419, "learning_rate": 2.0398918022321796e-05, "loss": 3.0506, "step": 53988 }, { "epoch": 2.65, "grad_norm": 0.764037013053894, "learning_rate": 2.039333805671969e-05, "loss": 2.8124, "step": 53989 }, { "epoch": 2.65, "grad_norm": 0.7291437983512878, "learning_rate": 2.0387758827547696e-05, "loss": 2.7463, "step": 53990 }, { "epoch": 2.65, "grad_norm": 0.7491414546966553, "learning_rate": 2.038218033482061e-05, "loss": 2.7817, "step": 53991 }, { "epoch": 2.65, "grad_norm": 0.7548601031303406, "learning_rate": 2.037660257855298e-05, "loss": 2.7962, "step": 53992 }, { "epoch": 2.65, "grad_norm": 0.7716947197914124, "learning_rate": 2.0371025558759602e-05, "loss": 2.8948, "step": 53993 }, { "epoch": 2.65, "grad_norm": 0.7344406247138977, "learning_rate": 2.036544927545509e-05, "loss": 2.9549, "step": 53994 }, { "epoch": 2.65, "grad_norm": 0.7470876574516296, "learning_rate": 2.035987372865421e-05, "loss": 3.075, "step": 53995 }, { "epoch": 2.65, "grad_norm": 0.7501749992370605, "learning_rate": 2.0354298918371638e-05, "loss": 2.5949, "step": 53996 }, { "epoch": 2.65, "grad_norm": 0.7505838871002197, "learning_rate": 2.0348724844621932e-05, "loss": 3.097, "step": 53997 }, { "epoch": 2.65, "grad_norm": 0.7241684198379517, "learning_rate": 2.034315150741992e-05, "loss": 2.8974, "step": 53998 }, { "epoch": 2.65, "grad_norm": 0.7406726479530334, "learning_rate": 2.0337578906780185e-05, "loss": 2.8339, "step": 53999 }, { "epoch": 2.65, "grad_norm": 0.7717121839523315, "learning_rate": 2.0332007042717447e-05, "loss": 3.0096, "step": 54000 }, { "epoch": 2.65, "grad_norm": 0.7408150434494019, "learning_rate": 2.032643591524643e-05, "loss": 2.9066, "step": 54001 }, { "epoch": 2.65, "grad_norm": 0.7498103380203247, "learning_rate": 2.0320865524381758e-05, "loss": 2.8442, "step": 54002 }, { "epoch": 2.65, "grad_norm": 0.7494451999664307, "learning_rate": 2.0315295870138114e-05, "loss": 2.6605, "step": 54003 }, { "epoch": 2.65, "grad_norm": 0.7431146502494812, "learning_rate": 2.0309726952530092e-05, "loss": 2.8177, "step": 54004 }, { "epoch": 2.65, "grad_norm": 0.7047357559204102, "learning_rate": 2.0304158771572442e-05, "loss": 2.879, "step": 54005 }, { "epoch": 2.65, "grad_norm": 0.7716295123100281, "learning_rate": 2.029859132727979e-05, "loss": 3.1973, "step": 54006 }, { "epoch": 2.65, "grad_norm": 0.7560664415359497, "learning_rate": 2.029302461966682e-05, "loss": 3.088, "step": 54007 }, { "epoch": 2.65, "grad_norm": 0.7658318877220154, "learning_rate": 2.0287458648748257e-05, "loss": 2.7865, "step": 54008 }, { "epoch": 2.65, "grad_norm": 0.7343339323997498, "learning_rate": 2.0281893414538653e-05, "loss": 2.7412, "step": 54009 }, { "epoch": 2.65, "grad_norm": 0.7271828651428223, "learning_rate": 2.027632891705273e-05, "loss": 2.8022, "step": 54010 }, { "epoch": 2.65, "grad_norm": 0.7835471034049988, "learning_rate": 2.027076515630508e-05, "loss": 2.8315, "step": 54011 }, { "epoch": 2.65, "grad_norm": 0.7581990361213684, "learning_rate": 2.0265202132310388e-05, "loss": 2.8508, "step": 54012 }, { "epoch": 2.65, "grad_norm": 0.7217426896095276, "learning_rate": 2.0259639845083374e-05, "loss": 3.0623, "step": 54013 }, { "epoch": 2.65, "grad_norm": 0.7215791344642639, "learning_rate": 2.0254078294638598e-05, "loss": 2.8275, "step": 54014 }, { "epoch": 2.65, "grad_norm": 0.7520722150802612, "learning_rate": 2.0248517480990777e-05, "loss": 2.8071, "step": 54015 }, { "epoch": 2.65, "grad_norm": 0.7443130016326904, "learning_rate": 2.024295740415447e-05, "loss": 2.849, "step": 54016 }, { "epoch": 2.65, "grad_norm": 0.7317799925804138, "learning_rate": 2.0237398064144394e-05, "loss": 2.8952, "step": 54017 }, { "epoch": 2.65, "grad_norm": 0.7501115798950195, "learning_rate": 2.0231839460975206e-05, "loss": 3.1212, "step": 54018 }, { "epoch": 2.65, "grad_norm": 0.7234840393066406, "learning_rate": 2.0226281594661464e-05, "loss": 3.0704, "step": 54019 }, { "epoch": 2.65, "grad_norm": 0.7871589064598083, "learning_rate": 2.0220724465217884e-05, "loss": 2.9676, "step": 54020 }, { "epoch": 2.65, "grad_norm": 0.7186853885650635, "learning_rate": 2.0215168072659027e-05, "loss": 3.0373, "step": 54021 }, { "epoch": 2.65, "grad_norm": 0.7855047583580017, "learning_rate": 2.0209612416999578e-05, "loss": 2.8772, "step": 54022 }, { "epoch": 2.65, "grad_norm": 0.7410944104194641, "learning_rate": 2.0204057498254223e-05, "loss": 2.7401, "step": 54023 }, { "epoch": 2.65, "grad_norm": 0.7117852568626404, "learning_rate": 2.019850331643752e-05, "loss": 2.8494, "step": 54024 }, { "epoch": 2.65, "grad_norm": 0.7461109757423401, "learning_rate": 2.019294987156409e-05, "loss": 2.9391, "step": 54025 }, { "epoch": 2.65, "grad_norm": 0.769916296005249, "learning_rate": 2.0187397163648555e-05, "loss": 2.9171, "step": 54026 }, { "epoch": 2.65, "grad_norm": 0.7795796990394592, "learning_rate": 2.018184519270557e-05, "loss": 2.7288, "step": 54027 }, { "epoch": 2.65, "grad_norm": 0.7641432285308838, "learning_rate": 2.017629395874979e-05, "loss": 2.8498, "step": 54028 }, { "epoch": 2.65, "grad_norm": 0.7332988977432251, "learning_rate": 2.0170743461795736e-05, "loss": 2.8082, "step": 54029 }, { "epoch": 2.65, "grad_norm": 0.758630096912384, "learning_rate": 2.0165193701858163e-05, "loss": 2.8566, "step": 54030 }, { "epoch": 2.65, "grad_norm": 0.7000086307525635, "learning_rate": 2.015964467895159e-05, "loss": 2.8143, "step": 54031 }, { "epoch": 2.65, "grad_norm": 0.7220397591590881, "learning_rate": 2.0154096393090647e-05, "loss": 2.7702, "step": 54032 }, { "epoch": 2.65, "grad_norm": 0.7732095718383789, "learning_rate": 2.0148548844289946e-05, "loss": 2.875, "step": 54033 }, { "epoch": 2.65, "grad_norm": 0.7628332376480103, "learning_rate": 2.0143002032564116e-05, "loss": 2.7058, "step": 54034 }, { "epoch": 2.65, "grad_norm": 0.7517362833023071, "learning_rate": 2.0137455957927773e-05, "loss": 2.7692, "step": 54035 }, { "epoch": 2.65, "grad_norm": 0.7574434280395508, "learning_rate": 2.0131910620395476e-05, "loss": 2.7894, "step": 54036 }, { "epoch": 2.65, "grad_norm": 0.7853326797485352, "learning_rate": 2.0126366019981878e-05, "loss": 2.861, "step": 54037 }, { "epoch": 2.65, "grad_norm": 0.7405124306678772, "learning_rate": 2.01208221567016e-05, "loss": 2.9171, "step": 54038 }, { "epoch": 2.65, "grad_norm": 0.7227654457092285, "learning_rate": 2.0115279030569232e-05, "loss": 2.873, "step": 54039 }, { "epoch": 2.65, "grad_norm": 0.7326869368553162, "learning_rate": 2.010973664159933e-05, "loss": 2.8694, "step": 54040 }, { "epoch": 2.65, "grad_norm": 0.8230959177017212, "learning_rate": 2.0104194989806476e-05, "loss": 2.8616, "step": 54041 }, { "epoch": 2.65, "grad_norm": 0.7183669209480286, "learning_rate": 2.00986540752053e-05, "loss": 2.9716, "step": 54042 }, { "epoch": 2.65, "grad_norm": 0.7086861729621887, "learning_rate": 2.0093113897810453e-05, "loss": 2.7061, "step": 54043 }, { "epoch": 2.65, "grad_norm": 0.7914672493934631, "learning_rate": 2.0087574457636424e-05, "loss": 2.8385, "step": 54044 }, { "epoch": 2.65, "grad_norm": 0.7433429956436157, "learning_rate": 2.00820357546979e-05, "loss": 2.8681, "step": 54045 }, { "epoch": 2.65, "grad_norm": 0.7644243240356445, "learning_rate": 2.0076497789009437e-05, "loss": 3.195, "step": 54046 }, { "epoch": 2.65, "grad_norm": 0.76406329870224, "learning_rate": 2.007096056058556e-05, "loss": 2.6896, "step": 54047 }, { "epoch": 2.65, "grad_norm": 0.7108646035194397, "learning_rate": 2.0065424069440915e-05, "loss": 2.6872, "step": 54048 }, { "epoch": 2.65, "grad_norm": 0.7588872313499451, "learning_rate": 2.0059888315590068e-05, "loss": 3.081, "step": 54049 }, { "epoch": 2.65, "grad_norm": 0.7359708547592163, "learning_rate": 2.0054353299047598e-05, "loss": 2.7153, "step": 54050 }, { "epoch": 2.65, "grad_norm": 0.7428367733955383, "learning_rate": 2.0048819019828068e-05, "loss": 2.935, "step": 54051 }, { "epoch": 2.65, "grad_norm": 0.7662042379379272, "learning_rate": 2.0043285477946124e-05, "loss": 3.0226, "step": 54052 }, { "epoch": 2.65, "grad_norm": 0.7269078493118286, "learning_rate": 2.0037752673416264e-05, "loss": 2.9673, "step": 54053 }, { "epoch": 2.65, "grad_norm": 0.7469284534454346, "learning_rate": 2.0032220606253037e-05, "loss": 2.9737, "step": 54054 }, { "epoch": 2.65, "grad_norm": 0.7175904512405396, "learning_rate": 2.002668927647113e-05, "loss": 2.8703, "step": 54055 }, { "epoch": 2.65, "grad_norm": 0.7442259192466736, "learning_rate": 2.002115868408497e-05, "loss": 2.9262, "step": 54056 }, { "epoch": 2.65, "grad_norm": 0.7313393354415894, "learning_rate": 2.001562882910921e-05, "loss": 2.946, "step": 54057 }, { "epoch": 2.65, "grad_norm": 0.7050204873085022, "learning_rate": 2.0010099711558468e-05, "loss": 3.024, "step": 54058 }, { "epoch": 2.65, "grad_norm": 0.7282868027687073, "learning_rate": 2.000457133144714e-05, "loss": 2.8238, "step": 54059 }, { "epoch": 2.65, "grad_norm": 0.7803441882133484, "learning_rate": 1.9999043688789975e-05, "loss": 2.9185, "step": 54060 }, { "epoch": 2.65, "grad_norm": 0.7263804078102112, "learning_rate": 1.9993516783601426e-05, "loss": 2.9197, "step": 54061 }, { "epoch": 2.65, "grad_norm": 0.7453001141548157, "learning_rate": 1.998799061589602e-05, "loss": 2.7994, "step": 54062 }, { "epoch": 2.65, "grad_norm": 0.7197867631912231, "learning_rate": 1.998246518568841e-05, "loss": 3.0516, "step": 54063 }, { "epoch": 2.65, "grad_norm": 0.7825305461883545, "learning_rate": 1.997694049299302e-05, "loss": 2.9953, "step": 54064 }, { "epoch": 2.65, "grad_norm": 0.7243884801864624, "learning_rate": 1.997141653782457e-05, "loss": 2.828, "step": 54065 }, { "epoch": 2.65, "grad_norm": 0.7629521489143372, "learning_rate": 1.9965893320197447e-05, "loss": 2.8634, "step": 54066 }, { "epoch": 2.65, "grad_norm": 0.7828942537307739, "learning_rate": 1.9960370840126306e-05, "loss": 3.0588, "step": 54067 }, { "epoch": 2.65, "grad_norm": 0.7585510611534119, "learning_rate": 1.9954849097625668e-05, "loss": 2.7942, "step": 54068 }, { "epoch": 2.65, "grad_norm": 0.7742347717285156, "learning_rate": 1.9949328092709994e-05, "loss": 3.0219, "step": 54069 }, { "epoch": 2.65, "grad_norm": 0.736878514289856, "learning_rate": 1.9943807825393965e-05, "loss": 2.9672, "step": 54070 }, { "epoch": 2.65, "grad_norm": 0.7557036280632019, "learning_rate": 1.9938288295691974e-05, "loss": 3.0019, "step": 54071 }, { "epoch": 2.65, "grad_norm": 0.7458971738815308, "learning_rate": 1.993276950361864e-05, "loss": 2.762, "step": 54072 }, { "epoch": 2.65, "grad_norm": 0.7320734858512878, "learning_rate": 1.9927251449188552e-05, "loss": 3.0947, "step": 54073 }, { "epoch": 2.65, "grad_norm": 0.7172092199325562, "learning_rate": 1.992173413241617e-05, "loss": 2.8381, "step": 54074 }, { "epoch": 2.65, "grad_norm": 0.707459568977356, "learning_rate": 1.9916217553316037e-05, "loss": 2.9832, "step": 54075 }, { "epoch": 2.65, "grad_norm": 0.7637947797775269, "learning_rate": 1.9910701711902656e-05, "loss": 3.0281, "step": 54076 }, { "epoch": 2.65, "grad_norm": 0.7895542979240417, "learning_rate": 1.990518660819057e-05, "loss": 3.0742, "step": 54077 }, { "epoch": 2.65, "grad_norm": 0.707626223564148, "learning_rate": 1.9899672242194376e-05, "loss": 2.8941, "step": 54078 }, { "epoch": 2.65, "grad_norm": 0.743711531162262, "learning_rate": 1.989415861392849e-05, "loss": 3.0584, "step": 54079 }, { "epoch": 2.65, "grad_norm": 0.7679846286773682, "learning_rate": 1.9888645723407504e-05, "loss": 2.858, "step": 54080 }, { "epoch": 2.65, "grad_norm": 0.7698391675949097, "learning_rate": 1.9883133570645903e-05, "loss": 2.6755, "step": 54081 }, { "epoch": 2.65, "grad_norm": 0.7134325504302979, "learning_rate": 1.9877622155658246e-05, "loss": 2.8351, "step": 54082 }, { "epoch": 2.65, "grad_norm": 0.7812037467956543, "learning_rate": 1.9872111478459018e-05, "loss": 2.9758, "step": 54083 }, { "epoch": 2.65, "grad_norm": 0.7417611479759216, "learning_rate": 1.9866601539062677e-05, "loss": 2.8881, "step": 54084 }, { "epoch": 2.65, "grad_norm": 0.8070379495620728, "learning_rate": 1.9861092337483876e-05, "loss": 2.8155, "step": 54085 }, { "epoch": 2.65, "grad_norm": 0.7638986706733704, "learning_rate": 1.985558387373697e-05, "loss": 2.7645, "step": 54086 }, { "epoch": 2.65, "grad_norm": 0.6919122934341431, "learning_rate": 1.9850076147836613e-05, "loss": 2.8189, "step": 54087 }, { "epoch": 2.65, "grad_norm": 0.7425715923309326, "learning_rate": 1.9844569159797165e-05, "loss": 2.8075, "step": 54088 }, { "epoch": 2.65, "grad_norm": 0.7360454201698303, "learning_rate": 1.9839062909633274e-05, "loss": 2.898, "step": 54089 }, { "epoch": 2.65, "grad_norm": 0.7551466226577759, "learning_rate": 1.9833557397359366e-05, "loss": 2.9805, "step": 54090 }, { "epoch": 2.65, "grad_norm": 0.7435944080352783, "learning_rate": 1.98280526229899e-05, "loss": 2.9451, "step": 54091 }, { "epoch": 2.65, "grad_norm": 0.7457761168479919, "learning_rate": 1.9822548586539487e-05, "loss": 2.7013, "step": 54092 }, { "epoch": 2.65, "grad_norm": 0.8180214762687683, "learning_rate": 1.981704528802249e-05, "loss": 3.0459, "step": 54093 }, { "epoch": 2.65, "grad_norm": 0.7294940948486328, "learning_rate": 1.9811542727453467e-05, "loss": 2.8207, "step": 54094 }, { "epoch": 2.65, "grad_norm": 0.742396354675293, "learning_rate": 1.9806040904846997e-05, "loss": 2.9505, "step": 54095 }, { "epoch": 2.65, "grad_norm": 0.7712141275405884, "learning_rate": 1.9800539820217474e-05, "loss": 2.8266, "step": 54096 }, { "epoch": 2.65, "grad_norm": 0.7579017877578735, "learning_rate": 1.979503947357939e-05, "loss": 2.7108, "step": 54097 }, { "epoch": 2.65, "grad_norm": 0.7335031628608704, "learning_rate": 1.9789539864947224e-05, "loss": 2.7703, "step": 54098 }, { "epoch": 2.65, "grad_norm": 0.7949396967887878, "learning_rate": 1.9784040994335472e-05, "loss": 2.902, "step": 54099 }, { "epoch": 2.65, "grad_norm": 0.7702135443687439, "learning_rate": 1.977854286175865e-05, "loss": 2.8729, "step": 54100 }, { "epoch": 2.65, "grad_norm": 0.7160282731056213, "learning_rate": 1.9773045467231185e-05, "loss": 2.9078, "step": 54101 }, { "epoch": 2.65, "grad_norm": 0.7243478298187256, "learning_rate": 1.9767548810767663e-05, "loss": 2.9189, "step": 54102 }, { "epoch": 2.65, "grad_norm": 0.7236966490745544, "learning_rate": 1.9762052892382407e-05, "loss": 3.0583, "step": 54103 }, { "epoch": 2.65, "grad_norm": 0.7192164063453674, "learning_rate": 1.9756557712090037e-05, "loss": 2.7622, "step": 54104 }, { "epoch": 2.65, "grad_norm": 0.7658694386482239, "learning_rate": 1.9751063269904945e-05, "loss": 2.9541, "step": 54105 }, { "epoch": 2.65, "grad_norm": 0.7122344374656677, "learning_rate": 1.9745569565841578e-05, "loss": 2.9305, "step": 54106 }, { "epoch": 2.65, "grad_norm": 0.7958580851554871, "learning_rate": 1.9740076599914468e-05, "loss": 2.9211, "step": 54107 }, { "epoch": 2.65, "grad_norm": 0.7178252339363098, "learning_rate": 1.973458437213803e-05, "loss": 2.8776, "step": 54108 }, { "epoch": 2.65, "grad_norm": 0.7540079951286316, "learning_rate": 1.9729092882526788e-05, "loss": 2.8094, "step": 54109 }, { "epoch": 2.65, "grad_norm": 0.766581118106842, "learning_rate": 1.9723602131095195e-05, "loss": 2.7469, "step": 54110 }, { "epoch": 2.65, "grad_norm": 0.7104828953742981, "learning_rate": 1.9718112117857675e-05, "loss": 2.7799, "step": 54111 }, { "epoch": 2.65, "grad_norm": 0.7695900201797485, "learning_rate": 1.9712622842828718e-05, "loss": 3.009, "step": 54112 }, { "epoch": 2.65, "grad_norm": 0.7981126308441162, "learning_rate": 1.9707134306022744e-05, "loss": 2.7478, "step": 54113 }, { "epoch": 2.65, "grad_norm": 0.714474618434906, "learning_rate": 1.9701646507454237e-05, "loss": 2.9426, "step": 54114 }, { "epoch": 2.65, "grad_norm": 0.7519267201423645, "learning_rate": 1.9696159447137662e-05, "loss": 2.8552, "step": 54115 }, { "epoch": 2.65, "grad_norm": 0.7607603669166565, "learning_rate": 1.969067312508743e-05, "loss": 2.8663, "step": 54116 }, { "epoch": 2.65, "grad_norm": 0.7680455446243286, "learning_rate": 1.968518754131807e-05, "loss": 2.8828, "step": 54117 }, { "epoch": 2.65, "grad_norm": 0.7606989145278931, "learning_rate": 1.9679702695843935e-05, "loss": 2.7714, "step": 54118 }, { "epoch": 2.65, "grad_norm": 0.7711759805679321, "learning_rate": 1.9674218588679548e-05, "loss": 2.8402, "step": 54119 }, { "epoch": 2.65, "grad_norm": 0.7324842214584351, "learning_rate": 1.9668735219839327e-05, "loss": 2.7645, "step": 54120 }, { "epoch": 2.65, "grad_norm": 0.7444130182266235, "learning_rate": 1.966325258933763e-05, "loss": 2.8317, "step": 54121 }, { "epoch": 2.65, "grad_norm": 0.7366726398468018, "learning_rate": 1.965777069718908e-05, "loss": 2.8117, "step": 54122 }, { "epoch": 2.65, "grad_norm": 0.7467571496963501, "learning_rate": 1.9652289543407927e-05, "loss": 3.1177, "step": 54123 }, { "epoch": 2.65, "grad_norm": 0.7339689135551453, "learning_rate": 1.9646809128008668e-05, "loss": 2.7936, "step": 54124 }, { "epoch": 2.65, "grad_norm": 0.812752902507782, "learning_rate": 1.964132945100585e-05, "loss": 2.9288, "step": 54125 }, { "epoch": 2.65, "grad_norm": 0.777722954750061, "learning_rate": 1.9635850512413797e-05, "loss": 2.8443, "step": 54126 }, { "epoch": 2.65, "grad_norm": 0.8094779253005981, "learning_rate": 1.9630372312246933e-05, "loss": 2.9391, "step": 54127 }, { "epoch": 2.65, "grad_norm": 0.7842881083488464, "learning_rate": 1.962489485051968e-05, "loss": 2.8474, "step": 54128 }, { "epoch": 2.65, "grad_norm": 0.7789468765258789, "learning_rate": 1.9619418127246524e-05, "loss": 2.8385, "step": 54129 }, { "epoch": 2.65, "grad_norm": 0.8131735920906067, "learning_rate": 1.961394214244185e-05, "loss": 3.0392, "step": 54130 }, { "epoch": 2.65, "grad_norm": 0.7453126907348633, "learning_rate": 1.960846689612009e-05, "loss": 2.8532, "step": 54131 }, { "epoch": 2.65, "grad_norm": 0.7671250104904175, "learning_rate": 1.960299238829569e-05, "loss": 2.914, "step": 54132 }, { "epoch": 2.65, "grad_norm": 0.7735947370529175, "learning_rate": 1.9597518618983044e-05, "loss": 2.8704, "step": 54133 }, { "epoch": 2.65, "grad_norm": 0.762312650680542, "learning_rate": 1.9592045588196536e-05, "loss": 2.7795, "step": 54134 }, { "epoch": 2.65, "grad_norm": 0.7702953815460205, "learning_rate": 1.9586573295950658e-05, "loss": 2.7177, "step": 54135 }, { "epoch": 2.65, "grad_norm": 0.7665062546730042, "learning_rate": 1.958110174225973e-05, "loss": 3.0213, "step": 54136 }, { "epoch": 2.65, "grad_norm": 0.7448751926422119, "learning_rate": 1.9575630927138274e-05, "loss": 2.8374, "step": 54137 }, { "epoch": 2.65, "grad_norm": 0.7393977642059326, "learning_rate": 1.957016085060058e-05, "loss": 2.8929, "step": 54138 }, { "epoch": 2.65, "grad_norm": 0.7659404873847961, "learning_rate": 1.9564691512661102e-05, "loss": 2.7063, "step": 54139 }, { "epoch": 2.65, "grad_norm": 0.7388503551483154, "learning_rate": 1.9559222913334293e-05, "loss": 2.922, "step": 54140 }, { "epoch": 2.65, "grad_norm": 0.7335487604141235, "learning_rate": 1.9553755052634545e-05, "loss": 2.8808, "step": 54141 }, { "epoch": 2.65, "grad_norm": 0.7967286705970764, "learning_rate": 1.954828793057621e-05, "loss": 2.7343, "step": 54142 }, { "epoch": 2.65, "grad_norm": 0.7063218951225281, "learning_rate": 1.954282154717368e-05, "loss": 3.0011, "step": 54143 }, { "epoch": 2.65, "grad_norm": 0.7302457690238953, "learning_rate": 1.9537355902441408e-05, "loss": 3.0176, "step": 54144 }, { "epoch": 2.65, "grad_norm": 0.739165723323822, "learning_rate": 1.9531890996393785e-05, "loss": 2.6946, "step": 54145 }, { "epoch": 2.65, "grad_norm": 0.7565047144889832, "learning_rate": 1.9526426829045126e-05, "loss": 2.8469, "step": 54146 }, { "epoch": 2.65, "grad_norm": 0.7552198171615601, "learning_rate": 1.9520963400409927e-05, "loss": 2.8518, "step": 54147 }, { "epoch": 2.65, "grad_norm": 0.7470796704292297, "learning_rate": 1.9515500710502542e-05, "loss": 2.8107, "step": 54148 }, { "epoch": 2.65, "grad_norm": 0.764629602432251, "learning_rate": 1.9510038759337287e-05, "loss": 2.8671, "step": 54149 }, { "epoch": 2.65, "grad_norm": 0.7533672451972961, "learning_rate": 1.950457754692869e-05, "loss": 2.9093, "step": 54150 }, { "epoch": 2.65, "grad_norm": 0.7616235613822937, "learning_rate": 1.949911707329097e-05, "loss": 2.8767, "step": 54151 }, { "epoch": 2.65, "grad_norm": 0.7351246476173401, "learning_rate": 1.949365733843865e-05, "loss": 2.6712, "step": 54152 }, { "epoch": 2.65, "grad_norm": 0.80738765001297, "learning_rate": 1.948819834238602e-05, "loss": 2.7892, "step": 54153 }, { "epoch": 2.65, "grad_norm": 0.7314664721488953, "learning_rate": 1.948274008514753e-05, "loss": 2.8074, "step": 54154 }, { "epoch": 2.65, "grad_norm": 0.7750166058540344, "learning_rate": 1.9477282566737505e-05, "loss": 2.7988, "step": 54155 }, { "epoch": 2.65, "grad_norm": 0.739399790763855, "learning_rate": 1.9471825787170304e-05, "loss": 2.8234, "step": 54156 }, { "epoch": 2.65, "grad_norm": 0.7659290432929993, "learning_rate": 1.946636974646034e-05, "loss": 2.828, "step": 54157 }, { "epoch": 2.65, "grad_norm": 0.7248683571815491, "learning_rate": 1.9460914444621943e-05, "loss": 2.9416, "step": 54158 }, { "epoch": 2.65, "grad_norm": 0.7538133859634399, "learning_rate": 1.9455459881669566e-05, "loss": 3.1345, "step": 54159 }, { "epoch": 2.65, "grad_norm": 0.7005477547645569, "learning_rate": 1.945000605761743e-05, "loss": 2.7204, "step": 54160 }, { "epoch": 2.65, "grad_norm": 0.7675425410270691, "learning_rate": 1.944455297248002e-05, "loss": 2.7766, "step": 54161 }, { "epoch": 2.65, "grad_norm": 0.7757595777511597, "learning_rate": 1.9439100626271697e-05, "loss": 2.87, "step": 54162 }, { "epoch": 2.65, "grad_norm": 0.7428346276283264, "learning_rate": 1.943364901900678e-05, "loss": 2.8913, "step": 54163 }, { "epoch": 2.65, "grad_norm": 0.74378901720047, "learning_rate": 1.942819815069966e-05, "loss": 2.7817, "step": 54164 }, { "epoch": 2.65, "grad_norm": 0.7840437889099121, "learning_rate": 1.942274802136462e-05, "loss": 2.8597, "step": 54165 }, { "epoch": 2.65, "grad_norm": 0.735499382019043, "learning_rate": 1.941729863101602e-05, "loss": 2.672, "step": 54166 }, { "epoch": 2.65, "grad_norm": 0.7412136793136597, "learning_rate": 1.941184997966835e-05, "loss": 2.9249, "step": 54167 }, { "epoch": 2.65, "grad_norm": 0.7294532656669617, "learning_rate": 1.9406402067335792e-05, "loss": 2.917, "step": 54168 }, { "epoch": 2.65, "grad_norm": 0.7440246343612671, "learning_rate": 1.9400954894032837e-05, "loss": 2.9341, "step": 54169 }, { "epoch": 2.65, "grad_norm": 0.7838345766067505, "learning_rate": 1.9395508459773745e-05, "loss": 2.9521, "step": 54170 }, { "epoch": 2.65, "grad_norm": 0.7340986728668213, "learning_rate": 1.9390062764572865e-05, "loss": 3.0219, "step": 54171 }, { "epoch": 2.65, "grad_norm": 0.7282890677452087, "learning_rate": 1.938461780844456e-05, "loss": 2.8559, "step": 54172 }, { "epoch": 2.65, "grad_norm": 0.7802963256835938, "learning_rate": 1.9379173591403142e-05, "loss": 3.0019, "step": 54173 }, { "epoch": 2.65, "grad_norm": 0.7121721506118774, "learning_rate": 1.9373730113463037e-05, "loss": 3.0312, "step": 54174 }, { "epoch": 2.65, "grad_norm": 0.7408917546272278, "learning_rate": 1.9368287374638437e-05, "loss": 3.026, "step": 54175 }, { "epoch": 2.66, "grad_norm": 0.7899402976036072, "learning_rate": 1.9362845374943823e-05, "loss": 2.8091, "step": 54176 }, { "epoch": 2.66, "grad_norm": 0.7442259192466736, "learning_rate": 1.935740411439346e-05, "loss": 2.894, "step": 54177 }, { "epoch": 2.66, "grad_norm": 0.8089560270309448, "learning_rate": 1.9351963593001664e-05, "loss": 2.7595, "step": 54178 }, { "epoch": 2.66, "grad_norm": 0.7835502624511719, "learning_rate": 1.9346523810782788e-05, "loss": 2.8599, "step": 54179 }, { "epoch": 2.66, "grad_norm": 0.7364153265953064, "learning_rate": 1.9341084767751158e-05, "loss": 2.6572, "step": 54180 }, { "epoch": 2.66, "grad_norm": 1.0555264949798584, "learning_rate": 1.933564646392106e-05, "loss": 2.8671, "step": 54181 }, { "epoch": 2.66, "grad_norm": 0.7606764435768127, "learning_rate": 1.933020889930692e-05, "loss": 2.974, "step": 54182 }, { "epoch": 2.66, "grad_norm": 0.7183963656425476, "learning_rate": 1.9324772073922922e-05, "loss": 2.8173, "step": 54183 }, { "epoch": 2.66, "grad_norm": 0.7525346875190735, "learning_rate": 1.9319335987783523e-05, "loss": 2.8566, "step": 54184 }, { "epoch": 2.66, "grad_norm": 0.7471001744270325, "learning_rate": 1.9313900640902978e-05, "loss": 2.8165, "step": 54185 }, { "epoch": 2.66, "grad_norm": 0.7534440159797668, "learning_rate": 1.9308466033295544e-05, "loss": 2.922, "step": 54186 }, { "epoch": 2.66, "grad_norm": 0.7355985045433044, "learning_rate": 1.930303216497564e-05, "loss": 2.7781, "step": 54187 }, { "epoch": 2.66, "grad_norm": 0.7623375058174133, "learning_rate": 1.9297599035957457e-05, "loss": 2.872, "step": 54188 }, { "epoch": 2.66, "grad_norm": 0.7745537161827087, "learning_rate": 1.9292166646255447e-05, "loss": 2.624, "step": 54189 }, { "epoch": 2.66, "grad_norm": 0.7441405653953552, "learning_rate": 1.92867349958838e-05, "loss": 2.7594, "step": 54190 }, { "epoch": 2.66, "grad_norm": 0.7160558104515076, "learning_rate": 1.9281304084856942e-05, "loss": 2.8772, "step": 54191 }, { "epoch": 2.66, "grad_norm": 0.7814651727676392, "learning_rate": 1.9275873913189055e-05, "loss": 2.7706, "step": 54192 }, { "epoch": 2.66, "grad_norm": 0.7084113359451294, "learning_rate": 1.9270444480894463e-05, "loss": 3.0227, "step": 54193 }, { "epoch": 2.66, "grad_norm": 0.8035101890563965, "learning_rate": 1.9265015787987558e-05, "loss": 2.9222, "step": 54194 }, { "epoch": 2.66, "grad_norm": 0.7509192824363708, "learning_rate": 1.9259587834482493e-05, "loss": 3.0245, "step": 54195 }, { "epoch": 2.66, "grad_norm": 0.7465216517448425, "learning_rate": 1.9254160620393656e-05, "loss": 2.9304, "step": 54196 }, { "epoch": 2.66, "grad_norm": 0.7854889035224915, "learning_rate": 1.92487341457354e-05, "loss": 2.6707, "step": 54197 }, { "epoch": 2.66, "grad_norm": 0.7187995314598083, "learning_rate": 1.924330841052192e-05, "loss": 2.899, "step": 54198 }, { "epoch": 2.66, "grad_norm": 0.7536890506744385, "learning_rate": 1.923788341476753e-05, "loss": 2.8897, "step": 54199 }, { "epoch": 2.66, "grad_norm": 0.7636274695396423, "learning_rate": 1.9232459158486492e-05, "loss": 2.8353, "step": 54200 }, { "epoch": 2.66, "grad_norm": 0.7261160612106323, "learning_rate": 1.9227035641693123e-05, "loss": 2.7868, "step": 54201 }, { "epoch": 2.66, "grad_norm": 0.7559409141540527, "learning_rate": 1.9221612864401716e-05, "loss": 3.1095, "step": 54202 }, { "epoch": 2.66, "grad_norm": 0.8080114722251892, "learning_rate": 1.9216190826626555e-05, "loss": 3.0153, "step": 54203 }, { "epoch": 2.66, "grad_norm": 0.7144877314567566, "learning_rate": 1.9210769528381898e-05, "loss": 3.012, "step": 54204 }, { "epoch": 2.66, "grad_norm": 0.700718343257904, "learning_rate": 1.9205348969682032e-05, "loss": 2.8758, "step": 54205 }, { "epoch": 2.66, "grad_norm": 0.7655426263809204, "learning_rate": 1.919992915054125e-05, "loss": 3.0361, "step": 54206 }, { "epoch": 2.66, "grad_norm": 0.7685981392860413, "learning_rate": 1.9194510070973834e-05, "loss": 2.9705, "step": 54207 }, { "epoch": 2.66, "grad_norm": 0.7193219661712646, "learning_rate": 1.9189091730994012e-05, "loss": 2.799, "step": 54208 }, { "epoch": 2.66, "grad_norm": 0.7027367353439331, "learning_rate": 1.91836741306161e-05, "loss": 2.9511, "step": 54209 }, { "epoch": 2.66, "grad_norm": 0.744439423084259, "learning_rate": 1.917825726985429e-05, "loss": 2.788, "step": 54210 }, { "epoch": 2.66, "grad_norm": 0.7260327339172363, "learning_rate": 1.9172841148722905e-05, "loss": 2.7213, "step": 54211 }, { "epoch": 2.66, "grad_norm": 0.7319767475128174, "learning_rate": 1.9167425767236268e-05, "loss": 3.0538, "step": 54212 }, { "epoch": 2.66, "grad_norm": 0.7671077251434326, "learning_rate": 1.9162011125408595e-05, "loss": 2.8832, "step": 54213 }, { "epoch": 2.66, "grad_norm": 0.8228315711021423, "learning_rate": 1.915659722325411e-05, "loss": 2.8539, "step": 54214 }, { "epoch": 2.66, "grad_norm": 0.7222673296928406, "learning_rate": 1.9151184060787072e-05, "loss": 2.9277, "step": 54215 }, { "epoch": 2.66, "grad_norm": 0.7548918724060059, "learning_rate": 1.9145771638021767e-05, "loss": 2.8345, "step": 54216 }, { "epoch": 2.66, "grad_norm": 0.7558291554450989, "learning_rate": 1.9140359954972485e-05, "loss": 2.799, "step": 54217 }, { "epoch": 2.66, "grad_norm": 0.7385692000389099, "learning_rate": 1.913494901165341e-05, "loss": 2.9751, "step": 54218 }, { "epoch": 2.66, "grad_norm": 0.7456986904144287, "learning_rate": 1.9129538808078838e-05, "loss": 3.0317, "step": 54219 }, { "epoch": 2.66, "grad_norm": 0.7515402436256409, "learning_rate": 1.9124129344262984e-05, "loss": 3.0887, "step": 54220 }, { "epoch": 2.66, "grad_norm": 0.7937130928039551, "learning_rate": 1.9118720620220172e-05, "loss": 2.9111, "step": 54221 }, { "epoch": 2.66, "grad_norm": 0.8028191924095154, "learning_rate": 1.9113312635964593e-05, "loss": 2.9427, "step": 54222 }, { "epoch": 2.66, "grad_norm": 0.7735779881477356, "learning_rate": 1.9107905391510437e-05, "loss": 2.6901, "step": 54223 }, { "epoch": 2.66, "grad_norm": 0.7522846460342407, "learning_rate": 1.910249888687202e-05, "loss": 2.7894, "step": 54224 }, { "epoch": 2.66, "grad_norm": 0.7548792362213135, "learning_rate": 1.9097093122063534e-05, "loss": 2.8319, "step": 54225 }, { "epoch": 2.66, "grad_norm": 0.7677998542785645, "learning_rate": 1.9091688097099266e-05, "loss": 2.9, "step": 54226 }, { "epoch": 2.66, "grad_norm": 0.752396821975708, "learning_rate": 1.908628381199344e-05, "loss": 3.1708, "step": 54227 }, { "epoch": 2.66, "grad_norm": 0.7591312527656555, "learning_rate": 1.9080880266760312e-05, "loss": 2.8663, "step": 54228 }, { "epoch": 2.66, "grad_norm": 0.7363387942314148, "learning_rate": 1.9075477461414067e-05, "loss": 2.9872, "step": 54229 }, { "epoch": 2.66, "grad_norm": 0.7834699153900146, "learning_rate": 1.90700753959689e-05, "loss": 2.6909, "step": 54230 }, { "epoch": 2.66, "grad_norm": 0.7395800352096558, "learning_rate": 1.906467407043909e-05, "loss": 2.9373, "step": 54231 }, { "epoch": 2.66, "grad_norm": 0.7479794025421143, "learning_rate": 1.90592734848389e-05, "loss": 2.7438, "step": 54232 }, { "epoch": 2.66, "grad_norm": 0.7837538719177246, "learning_rate": 1.9053873639182484e-05, "loss": 2.7192, "step": 54233 }, { "epoch": 2.66, "grad_norm": 0.8077936768531799, "learning_rate": 1.9048474533484126e-05, "loss": 2.7285, "step": 54234 }, { "epoch": 2.66, "grad_norm": 0.7308048605918884, "learning_rate": 1.9043076167758055e-05, "loss": 3.0173, "step": 54235 }, { "epoch": 2.66, "grad_norm": 0.7778046131134033, "learning_rate": 1.9037678542018418e-05, "loss": 2.8399, "step": 54236 }, { "epoch": 2.66, "grad_norm": 0.7745692133903503, "learning_rate": 1.9032281656279412e-05, "loss": 2.9058, "step": 54237 }, { "epoch": 2.66, "grad_norm": 0.7720127105712891, "learning_rate": 1.9026885510555324e-05, "loss": 2.9293, "step": 54238 }, { "epoch": 2.66, "grad_norm": 0.7536343932151794, "learning_rate": 1.902149010486037e-05, "loss": 2.8993, "step": 54239 }, { "epoch": 2.66, "grad_norm": 0.8127491474151611, "learning_rate": 1.901609543920868e-05, "loss": 2.9735, "step": 54240 }, { "epoch": 2.66, "grad_norm": 0.7343817949295044, "learning_rate": 1.9010701513614603e-05, "loss": 2.9745, "step": 54241 }, { "epoch": 2.66, "grad_norm": 0.7082345485687256, "learning_rate": 1.9005308328092196e-05, "loss": 3.0584, "step": 54242 }, { "epoch": 2.66, "grad_norm": 0.7429682612419128, "learning_rate": 1.8999915882655746e-05, "loss": 2.7716, "step": 54243 }, { "epoch": 2.66, "grad_norm": 0.7653400897979736, "learning_rate": 1.899452417731948e-05, "loss": 2.8173, "step": 54244 }, { "epoch": 2.66, "grad_norm": 0.7767915725708008, "learning_rate": 1.8989133212097485e-05, "loss": 2.7113, "step": 54245 }, { "epoch": 2.66, "grad_norm": 0.7101877331733704, "learning_rate": 1.8983742987004113e-05, "loss": 2.7319, "step": 54246 }, { "epoch": 2.66, "grad_norm": 0.7132937908172607, "learning_rate": 1.8978353502053388e-05, "loss": 2.6421, "step": 54247 }, { "epoch": 2.66, "grad_norm": 0.7609696984291077, "learning_rate": 1.8972964757259633e-05, "loss": 3.0005, "step": 54248 }, { "epoch": 2.66, "grad_norm": 0.7206118106842041, "learning_rate": 1.8967576752637036e-05, "loss": 3.1412, "step": 54249 }, { "epoch": 2.66, "grad_norm": 0.7709072828292847, "learning_rate": 1.8962189488199754e-05, "loss": 2.8475, "step": 54250 }, { "epoch": 2.66, "grad_norm": 0.7561743259429932, "learning_rate": 1.895680296396197e-05, "loss": 2.7725, "step": 54251 }, { "epoch": 2.66, "grad_norm": 0.7318551540374756, "learning_rate": 1.8951417179937844e-05, "loss": 2.8117, "step": 54252 }, { "epoch": 2.66, "grad_norm": 0.7357078790664673, "learning_rate": 1.89460321361416e-05, "loss": 2.6899, "step": 54253 }, { "epoch": 2.66, "grad_norm": 0.7411123514175415, "learning_rate": 1.8940647832587452e-05, "loss": 2.9539, "step": 54254 }, { "epoch": 2.66, "grad_norm": 0.7445243000984192, "learning_rate": 1.8935264269289497e-05, "loss": 2.8118, "step": 54255 }, { "epoch": 2.66, "grad_norm": 0.7355711460113525, "learning_rate": 1.892988144626202e-05, "loss": 2.8894, "step": 54256 }, { "epoch": 2.66, "grad_norm": 0.7821559906005859, "learning_rate": 1.892449936351914e-05, "loss": 2.8614, "step": 54257 }, { "epoch": 2.66, "grad_norm": 0.7362728714942932, "learning_rate": 1.8919118021074985e-05, "loss": 3.1394, "step": 54258 }, { "epoch": 2.66, "grad_norm": 0.7443636655807495, "learning_rate": 1.891373741894384e-05, "loss": 2.877, "step": 54259 }, { "epoch": 2.66, "grad_norm": 0.745068371295929, "learning_rate": 1.8908357557139732e-05, "loss": 2.8646, "step": 54260 }, { "epoch": 2.66, "grad_norm": 0.7140920162200928, "learning_rate": 1.890297843567701e-05, "loss": 2.768, "step": 54261 }, { "epoch": 2.66, "grad_norm": 0.7033805847167969, "learning_rate": 1.889760005456966e-05, "loss": 2.8209, "step": 54262 }, { "epoch": 2.66, "grad_norm": 0.7535343766212463, "learning_rate": 1.8892222413831948e-05, "loss": 2.92, "step": 54263 }, { "epoch": 2.66, "grad_norm": 0.7832685112953186, "learning_rate": 1.8886845513478054e-05, "loss": 2.8499, "step": 54264 }, { "epoch": 2.66, "grad_norm": 0.7332696914672852, "learning_rate": 1.8881469353522107e-05, "loss": 3.1087, "step": 54265 }, { "epoch": 2.66, "grad_norm": 0.727424144744873, "learning_rate": 1.8876093933978285e-05, "loss": 3.0913, "step": 54266 }, { "epoch": 2.66, "grad_norm": 0.7173473834991455, "learning_rate": 1.8870719254860688e-05, "loss": 3.0397, "step": 54267 }, { "epoch": 2.66, "grad_norm": 0.7304912805557251, "learning_rate": 1.88653453161835e-05, "loss": 2.8918, "step": 54268 }, { "epoch": 2.66, "grad_norm": 0.8100420832633972, "learning_rate": 1.885997211796094e-05, "loss": 2.7616, "step": 54269 }, { "epoch": 2.66, "grad_norm": 0.8588573336601257, "learning_rate": 1.885459966020704e-05, "loss": 2.7817, "step": 54270 }, { "epoch": 2.66, "grad_norm": 0.7639005780220032, "learning_rate": 1.8849227942936074e-05, "loss": 3.0441, "step": 54271 }, { "epoch": 2.66, "grad_norm": 0.7512505054473877, "learning_rate": 1.884385696616214e-05, "loss": 2.9208, "step": 54272 }, { "epoch": 2.66, "grad_norm": 0.7321940660476685, "learning_rate": 1.8838486729899327e-05, "loss": 2.8442, "step": 54273 }, { "epoch": 2.66, "grad_norm": 0.7625482678413391, "learning_rate": 1.8833117234161886e-05, "loss": 2.9836, "step": 54274 }, { "epoch": 2.66, "grad_norm": 0.7443647384643555, "learning_rate": 1.8827748478963844e-05, "loss": 2.8452, "step": 54275 }, { "epoch": 2.66, "grad_norm": 0.7935992479324341, "learning_rate": 1.882238046431942e-05, "loss": 2.7888, "step": 54276 }, { "epoch": 2.66, "grad_norm": 0.7821605205535889, "learning_rate": 1.8817013190242734e-05, "loss": 2.7943, "step": 54277 }, { "epoch": 2.66, "grad_norm": 0.7425271272659302, "learning_rate": 1.8811646656747948e-05, "loss": 3.0639, "step": 54278 }, { "epoch": 2.66, "grad_norm": 0.702383816242218, "learning_rate": 1.8806280863849145e-05, "loss": 2.6989, "step": 54279 }, { "epoch": 2.66, "grad_norm": 0.7176997661590576, "learning_rate": 1.880091581156048e-05, "loss": 2.9766, "step": 54280 }, { "epoch": 2.66, "grad_norm": 0.7591765522956848, "learning_rate": 1.879555149989611e-05, "loss": 2.663, "step": 54281 }, { "epoch": 2.66, "grad_norm": 0.7460857033729553, "learning_rate": 1.8790187928870092e-05, "loss": 2.9084, "step": 54282 }, { "epoch": 2.66, "grad_norm": 0.7594284415245056, "learning_rate": 1.878482509849658e-05, "loss": 2.7448, "step": 54283 }, { "epoch": 2.66, "grad_norm": 0.7766778469085693, "learning_rate": 1.8779463008789797e-05, "loss": 2.9685, "step": 54284 }, { "epoch": 2.66, "grad_norm": 0.7691056132316589, "learning_rate": 1.877410165976373e-05, "loss": 3.0542, "step": 54285 }, { "epoch": 2.66, "grad_norm": 0.7344174385070801, "learning_rate": 1.8768741051432567e-05, "loss": 3.0607, "step": 54286 }, { "epoch": 2.66, "grad_norm": 0.7203677892684937, "learning_rate": 1.8763381183810468e-05, "loss": 2.8125, "step": 54287 }, { "epoch": 2.66, "grad_norm": 0.7336849570274353, "learning_rate": 1.8758022056911414e-05, "loss": 2.8562, "step": 54288 }, { "epoch": 2.66, "grad_norm": 0.7745854258537292, "learning_rate": 1.875266367074967e-05, "loss": 2.9174, "step": 54289 }, { "epoch": 2.66, "grad_norm": 0.7709341049194336, "learning_rate": 1.8747306025339247e-05, "loss": 2.6841, "step": 54290 }, { "epoch": 2.66, "grad_norm": 0.8152937293052673, "learning_rate": 1.874194912069431e-05, "loss": 2.9381, "step": 54291 }, { "epoch": 2.66, "grad_norm": 0.7402138710021973, "learning_rate": 1.873659295682891e-05, "loss": 2.6456, "step": 54292 }, { "epoch": 2.66, "grad_norm": 0.7518551349639893, "learning_rate": 1.8731237533757272e-05, "loss": 2.8282, "step": 54293 }, { "epoch": 2.66, "grad_norm": 0.7548336386680603, "learning_rate": 1.872588285149338e-05, "loss": 2.7088, "step": 54294 }, { "epoch": 2.66, "grad_norm": 0.7288030982017517, "learning_rate": 1.8720528910051356e-05, "loss": 2.9081, "step": 54295 }, { "epoch": 2.66, "grad_norm": 0.7580604553222656, "learning_rate": 1.8715175709445362e-05, "loss": 2.9925, "step": 54296 }, { "epoch": 2.66, "grad_norm": 0.7717550992965698, "learning_rate": 1.8709823249689414e-05, "loss": 2.8237, "step": 54297 }, { "epoch": 2.66, "grad_norm": 0.706786572933197, "learning_rate": 1.870447153079767e-05, "loss": 2.9454, "step": 54298 }, { "epoch": 2.66, "grad_norm": 0.7198885083198547, "learning_rate": 1.8699120552784252e-05, "loss": 2.728, "step": 54299 }, { "epoch": 2.66, "grad_norm": 0.7398534417152405, "learning_rate": 1.8693770315663215e-05, "loss": 3.0013, "step": 54300 }, { "epoch": 2.66, "grad_norm": 0.7463058829307556, "learning_rate": 1.8688420819448645e-05, "loss": 3.0623, "step": 54301 }, { "epoch": 2.66, "grad_norm": 0.7530163526535034, "learning_rate": 1.86830720641546e-05, "loss": 2.8843, "step": 54302 }, { "epoch": 2.66, "grad_norm": 0.7228906750679016, "learning_rate": 1.8677724049795174e-05, "loss": 3.0058, "step": 54303 }, { "epoch": 2.66, "grad_norm": 0.759463906288147, "learning_rate": 1.8672376776384547e-05, "loss": 2.649, "step": 54304 }, { "epoch": 2.66, "grad_norm": 0.7761176824569702, "learning_rate": 1.8667030243936708e-05, "loss": 2.94, "step": 54305 }, { "epoch": 2.66, "grad_norm": 0.7421549558639526, "learning_rate": 1.8661684452465787e-05, "loss": 2.8615, "step": 54306 }, { "epoch": 2.66, "grad_norm": 0.7677056193351746, "learning_rate": 1.8656339401985797e-05, "loss": 2.7058, "step": 54307 }, { "epoch": 2.66, "grad_norm": 0.7426798939704895, "learning_rate": 1.8650995092510934e-05, "loss": 2.8782, "step": 54308 }, { "epoch": 2.66, "grad_norm": 0.7160890698432922, "learning_rate": 1.8645651524055184e-05, "loss": 2.8999, "step": 54309 }, { "epoch": 2.66, "grad_norm": 0.7567172646522522, "learning_rate": 1.86403086966326e-05, "loss": 2.9189, "step": 54310 }, { "epoch": 2.66, "grad_norm": 0.7990744113922119, "learning_rate": 1.863496661025734e-05, "loss": 2.9437, "step": 54311 }, { "epoch": 2.66, "grad_norm": 0.7247997522354126, "learning_rate": 1.862962526494336e-05, "loss": 2.8656, "step": 54312 }, { "epoch": 2.66, "grad_norm": 0.7634393572807312, "learning_rate": 1.8624284660704815e-05, "loss": 3.0048, "step": 54313 }, { "epoch": 2.66, "grad_norm": 0.7473614811897278, "learning_rate": 1.8618944797555824e-05, "loss": 3.1732, "step": 54314 }, { "epoch": 2.66, "grad_norm": 0.7102734446525574, "learning_rate": 1.8613605675510345e-05, "loss": 2.7127, "step": 54315 }, { "epoch": 2.66, "grad_norm": 0.7932694554328918, "learning_rate": 1.860826729458247e-05, "loss": 2.9487, "step": 54316 }, { "epoch": 2.66, "grad_norm": 0.7079724669456482, "learning_rate": 1.8602929654786247e-05, "loss": 2.798, "step": 54317 }, { "epoch": 2.66, "grad_norm": 0.7377166748046875, "learning_rate": 1.859759275613577e-05, "loss": 2.9455, "step": 54318 }, { "epoch": 2.66, "grad_norm": 0.8409765958786011, "learning_rate": 1.859225659864506e-05, "loss": 2.8922, "step": 54319 }, { "epoch": 2.66, "grad_norm": 0.7548701167106628, "learning_rate": 1.8586921182328173e-05, "loss": 2.875, "step": 54320 }, { "epoch": 2.66, "grad_norm": 0.7892856001853943, "learning_rate": 1.858158650719923e-05, "loss": 3.0358, "step": 54321 }, { "epoch": 2.66, "grad_norm": 0.7447632551193237, "learning_rate": 1.8576252573272155e-05, "loss": 3.0135, "step": 54322 }, { "epoch": 2.66, "grad_norm": 0.7343568205833435, "learning_rate": 1.85709193805612e-05, "loss": 2.9174, "step": 54323 }, { "epoch": 2.66, "grad_norm": 0.7546588778495789, "learning_rate": 1.8565586929080156e-05, "loss": 3.1323, "step": 54324 }, { "epoch": 2.66, "grad_norm": 0.7940450310707092, "learning_rate": 1.8560255218843213e-05, "loss": 2.7931, "step": 54325 }, { "epoch": 2.66, "grad_norm": 0.7970552444458008, "learning_rate": 1.8554924249864423e-05, "loss": 3.0955, "step": 54326 }, { "epoch": 2.66, "grad_norm": 0.7288427352905273, "learning_rate": 1.8549594022157775e-05, "loss": 2.8776, "step": 54327 }, { "epoch": 2.66, "grad_norm": 0.8218358159065247, "learning_rate": 1.8544264535737364e-05, "loss": 2.8839, "step": 54328 }, { "epoch": 2.66, "grad_norm": 0.7489563226699829, "learning_rate": 1.8538935790617136e-05, "loss": 2.6361, "step": 54329 }, { "epoch": 2.66, "grad_norm": 0.7439214587211609, "learning_rate": 1.8533607786811257e-05, "loss": 2.7068, "step": 54330 }, { "epoch": 2.66, "grad_norm": 0.7576811909675598, "learning_rate": 1.8528280524333637e-05, "loss": 2.7559, "step": 54331 }, { "epoch": 2.66, "grad_norm": 0.7353628873825073, "learning_rate": 1.8522954003198342e-05, "loss": 2.9782, "step": 54332 }, { "epoch": 2.66, "grad_norm": 0.7459601163864136, "learning_rate": 1.851762822341949e-05, "loss": 2.9133, "step": 54333 }, { "epoch": 2.66, "grad_norm": 0.7878472208976746, "learning_rate": 1.8512303185010936e-05, "loss": 2.8641, "step": 54334 }, { "epoch": 2.66, "grad_norm": 0.7898440957069397, "learning_rate": 1.8506978887986835e-05, "loss": 2.9113, "step": 54335 }, { "epoch": 2.66, "grad_norm": 0.7459374666213989, "learning_rate": 1.850165533236121e-05, "loss": 3.0018, "step": 54336 }, { "epoch": 2.66, "grad_norm": 0.7519286870956421, "learning_rate": 1.849633251814805e-05, "loss": 2.7782, "step": 54337 }, { "epoch": 2.66, "grad_norm": 0.7953622341156006, "learning_rate": 1.8491010445361376e-05, "loss": 2.8511, "step": 54338 }, { "epoch": 2.66, "grad_norm": 0.7520739436149597, "learning_rate": 1.8485689114015146e-05, "loss": 2.993, "step": 54339 }, { "epoch": 2.66, "grad_norm": 0.7880058884620667, "learning_rate": 1.8480368524123445e-05, "loss": 2.6661, "step": 54340 }, { "epoch": 2.66, "grad_norm": 0.7255048751831055, "learning_rate": 1.84750486757003e-05, "loss": 2.8142, "step": 54341 }, { "epoch": 2.66, "grad_norm": 0.7529579997062683, "learning_rate": 1.8469729568759662e-05, "loss": 2.6264, "step": 54342 }, { "epoch": 2.66, "grad_norm": 0.7077673077583313, "learning_rate": 1.8464411203315655e-05, "loss": 3.071, "step": 54343 }, { "epoch": 2.66, "grad_norm": 0.7397609949111938, "learning_rate": 1.84590935793821e-05, "loss": 2.9625, "step": 54344 }, { "epoch": 2.66, "grad_norm": 0.7642784714698792, "learning_rate": 1.8453776696973187e-05, "loss": 2.8608, "step": 54345 }, { "epoch": 2.66, "grad_norm": 0.7922868728637695, "learning_rate": 1.844846055610284e-05, "loss": 2.6326, "step": 54346 }, { "epoch": 2.66, "grad_norm": 0.7658751606941223, "learning_rate": 1.8443145156785012e-05, "loss": 2.8669, "step": 54347 }, { "epoch": 2.66, "grad_norm": 0.7865391969680786, "learning_rate": 1.8437830499033822e-05, "loss": 2.7268, "step": 54348 }, { "epoch": 2.66, "grad_norm": 0.7619051337242126, "learning_rate": 1.843251658286313e-05, "loss": 2.6806, "step": 54349 }, { "epoch": 2.66, "grad_norm": 0.7652508616447449, "learning_rate": 1.8427203408286994e-05, "loss": 2.8923, "step": 54350 }, { "epoch": 2.66, "grad_norm": 0.718989372253418, "learning_rate": 1.8421890975319465e-05, "loss": 3.2317, "step": 54351 }, { "epoch": 2.66, "grad_norm": 0.757768988609314, "learning_rate": 1.84165792839745e-05, "loss": 2.825, "step": 54352 }, { "epoch": 2.66, "grad_norm": 0.7357819676399231, "learning_rate": 1.841126833426605e-05, "loss": 3.0808, "step": 54353 }, { "epoch": 2.66, "grad_norm": 0.7870209217071533, "learning_rate": 1.840595812620811e-05, "loss": 2.8403, "step": 54354 }, { "epoch": 2.66, "grad_norm": 0.7332582473754883, "learning_rate": 1.8400648659814664e-05, "loss": 2.9084, "step": 54355 }, { "epoch": 2.66, "grad_norm": 0.7562378644943237, "learning_rate": 1.8395339935099772e-05, "loss": 2.9404, "step": 54356 }, { "epoch": 2.66, "grad_norm": 0.7957108020782471, "learning_rate": 1.8390031952077287e-05, "loss": 2.9112, "step": 54357 }, { "epoch": 2.66, "grad_norm": 0.7108379006385803, "learning_rate": 1.8384724710761334e-05, "loss": 2.8825, "step": 54358 }, { "epoch": 2.66, "grad_norm": 0.7623836398124695, "learning_rate": 1.8379418211165796e-05, "loss": 3.0071, "step": 54359 }, { "epoch": 2.66, "grad_norm": 0.7817762494087219, "learning_rate": 1.8374112453304634e-05, "loss": 2.6759, "step": 54360 }, { "epoch": 2.66, "grad_norm": 0.8142674565315247, "learning_rate": 1.83688074371919e-05, "loss": 2.9088, "step": 54361 }, { "epoch": 2.66, "grad_norm": 0.7520138621330261, "learning_rate": 1.836350316284152e-05, "loss": 2.876, "step": 54362 }, { "epoch": 2.66, "grad_norm": 0.7290118336677551, "learning_rate": 1.835819963026748e-05, "loss": 2.8468, "step": 54363 }, { "epoch": 2.66, "grad_norm": 0.7808724641799927, "learning_rate": 1.83528968394837e-05, "loss": 2.9203, "step": 54364 }, { "epoch": 2.66, "grad_norm": 0.7678559422492981, "learning_rate": 1.834759479050417e-05, "loss": 2.9192, "step": 54365 }, { "epoch": 2.66, "grad_norm": 0.7148266434669495, "learning_rate": 1.834229348334292e-05, "loss": 2.7605, "step": 54366 }, { "epoch": 2.66, "grad_norm": 0.714754045009613, "learning_rate": 1.8336992918013894e-05, "loss": 2.8237, "step": 54367 }, { "epoch": 2.66, "grad_norm": 0.7212704420089722, "learning_rate": 1.8331693094530985e-05, "loss": 2.8664, "step": 54368 }, { "epoch": 2.66, "grad_norm": 0.7763723731040955, "learning_rate": 1.8326394012908153e-05, "loss": 2.7022, "step": 54369 }, { "epoch": 2.66, "grad_norm": 0.7871028780937195, "learning_rate": 1.8321095673159414e-05, "loss": 2.9555, "step": 54370 }, { "epoch": 2.66, "grad_norm": 0.7615554928779602, "learning_rate": 1.8315798075298728e-05, "loss": 2.8821, "step": 54371 }, { "epoch": 2.66, "grad_norm": 0.7369310259819031, "learning_rate": 1.8310501219339947e-05, "loss": 2.7315, "step": 54372 }, { "epoch": 2.66, "grad_norm": 0.7455233931541443, "learning_rate": 1.8305205105297162e-05, "loss": 3.1036, "step": 54373 }, { "epoch": 2.66, "grad_norm": 0.7491878867149353, "learning_rate": 1.829990973318426e-05, "loss": 2.8756, "step": 54374 }, { "epoch": 2.66, "grad_norm": 0.8159949779510498, "learning_rate": 1.829461510301513e-05, "loss": 2.8182, "step": 54375 }, { "epoch": 2.66, "grad_norm": 0.7366239428520203, "learning_rate": 1.828932121480383e-05, "loss": 2.8917, "step": 54376 }, { "epoch": 2.66, "grad_norm": 0.7913699150085449, "learning_rate": 1.8284028068564183e-05, "loss": 3.1585, "step": 54377 }, { "epoch": 2.66, "grad_norm": 0.7340931296348572, "learning_rate": 1.8278735664310208e-05, "loss": 2.9116, "step": 54378 }, { "epoch": 2.66, "grad_norm": 0.7172591686248779, "learning_rate": 1.8273444002055828e-05, "loss": 2.9317, "step": 54379 }, { "epoch": 2.67, "grad_norm": 0.7502052783966064, "learning_rate": 1.8268153081814994e-05, "loss": 2.9903, "step": 54380 }, { "epoch": 2.67, "grad_norm": 0.7538665533065796, "learning_rate": 1.8262862903601606e-05, "loss": 3.0005, "step": 54381 }, { "epoch": 2.67, "grad_norm": 0.7647122740745544, "learning_rate": 1.8257573467429608e-05, "loss": 2.9253, "step": 54382 }, { "epoch": 2.67, "grad_norm": 0.7346746921539307, "learning_rate": 1.8252284773312964e-05, "loss": 2.8669, "step": 54383 }, { "epoch": 2.67, "grad_norm": 0.7537041306495667, "learning_rate": 1.8246996821265557e-05, "loss": 2.7086, "step": 54384 }, { "epoch": 2.67, "grad_norm": 0.74997878074646, "learning_rate": 1.8241709611301313e-05, "loss": 2.7979, "step": 54385 }, { "epoch": 2.67, "grad_norm": 0.7360642552375793, "learning_rate": 1.823642314343422e-05, "loss": 2.7866, "step": 54386 }, { "epoch": 2.67, "grad_norm": 0.7375506162643433, "learning_rate": 1.8231137417678133e-05, "loss": 2.8911, "step": 54387 }, { "epoch": 2.67, "grad_norm": 0.7365680932998657, "learning_rate": 1.8225852434047038e-05, "loss": 2.8581, "step": 54388 }, { "epoch": 2.67, "grad_norm": 0.7538241147994995, "learning_rate": 1.822056819255483e-05, "loss": 2.9333, "step": 54389 }, { "epoch": 2.67, "grad_norm": 0.7558910846710205, "learning_rate": 1.821528469321536e-05, "loss": 2.7096, "step": 54390 }, { "epoch": 2.67, "grad_norm": 0.7916548252105713, "learning_rate": 1.8210001936042652e-05, "loss": 2.9807, "step": 54391 }, { "epoch": 2.67, "grad_norm": 1.1039891242980957, "learning_rate": 1.8204719921050492e-05, "loss": 3.1819, "step": 54392 }, { "epoch": 2.67, "grad_norm": 0.734824538230896, "learning_rate": 1.819943864825294e-05, "loss": 2.6976, "step": 54393 }, { "epoch": 2.67, "grad_norm": 0.7892003655433655, "learning_rate": 1.819415811766378e-05, "loss": 3.0551, "step": 54394 }, { "epoch": 2.67, "grad_norm": 0.7752202749252319, "learning_rate": 1.818887832929704e-05, "loss": 2.8143, "step": 54395 }, { "epoch": 2.67, "grad_norm": 0.7521406412124634, "learning_rate": 1.818359928316654e-05, "loss": 2.8164, "step": 54396 }, { "epoch": 2.67, "grad_norm": 0.7605895400047302, "learning_rate": 1.8178320979286164e-05, "loss": 2.945, "step": 54397 }, { "epoch": 2.67, "grad_norm": 0.8226302862167358, "learning_rate": 1.8173043417669908e-05, "loss": 2.8654, "step": 54398 }, { "epoch": 2.67, "grad_norm": 0.7271564602851868, "learning_rate": 1.8167766598331558e-05, "loss": 2.9806, "step": 54399 }, { "epoch": 2.67, "grad_norm": 0.7447865009307861, "learning_rate": 1.81624905212851e-05, "loss": 2.9206, "step": 54400 }, { "epoch": 2.67, "grad_norm": 0.7446723580360413, "learning_rate": 1.815721518654436e-05, "loss": 3.0564, "step": 54401 }, { "epoch": 2.67, "grad_norm": 0.7353099584579468, "learning_rate": 1.815194059412336e-05, "loss": 3.0469, "step": 54402 }, { "epoch": 2.67, "grad_norm": 0.7726219296455383, "learning_rate": 1.8146666744035852e-05, "loss": 2.9872, "step": 54403 }, { "epoch": 2.67, "grad_norm": 0.7748692035675049, "learning_rate": 1.8141393636295764e-05, "loss": 2.8217, "step": 54404 }, { "epoch": 2.67, "grad_norm": 0.7315913438796997, "learning_rate": 1.813612127091705e-05, "loss": 2.871, "step": 54405 }, { "epoch": 2.67, "grad_norm": 0.7552880644798279, "learning_rate": 1.8130849647913492e-05, "loss": 2.8242, "step": 54406 }, { "epoch": 2.67, "grad_norm": 0.726218581199646, "learning_rate": 1.812557876729902e-05, "loss": 2.8563, "step": 54407 }, { "epoch": 2.67, "grad_norm": 0.7617534399032593, "learning_rate": 1.8120308629087587e-05, "loss": 2.8983, "step": 54408 }, { "epoch": 2.67, "grad_norm": 0.7186073064804077, "learning_rate": 1.811503923329295e-05, "loss": 2.9901, "step": 54409 }, { "epoch": 2.67, "grad_norm": 0.7599871754646301, "learning_rate": 1.810977057992913e-05, "loss": 2.8198, "step": 54410 }, { "epoch": 2.67, "grad_norm": 0.7000196576118469, "learning_rate": 1.810450266900988e-05, "loss": 2.902, "step": 54411 }, { "epoch": 2.67, "grad_norm": 0.7205071449279785, "learning_rate": 1.8099235500549092e-05, "loss": 2.6609, "step": 54412 }, { "epoch": 2.67, "grad_norm": 0.7560635209083557, "learning_rate": 1.809396907456072e-05, "loss": 2.936, "step": 54413 }, { "epoch": 2.67, "grad_norm": 0.7410910129547119, "learning_rate": 1.8088703391058523e-05, "loss": 2.9418, "step": 54414 }, { "epoch": 2.67, "grad_norm": 0.7488538026809692, "learning_rate": 1.8083438450056487e-05, "loss": 2.6645, "step": 54415 }, { "epoch": 2.67, "grad_norm": 0.7629244923591614, "learning_rate": 1.8078174251568333e-05, "loss": 2.7683, "step": 54416 }, { "epoch": 2.67, "grad_norm": 0.7645202279090881, "learning_rate": 1.8072910795608088e-05, "loss": 2.9679, "step": 54417 }, { "epoch": 2.67, "grad_norm": 0.7587352395057678, "learning_rate": 1.8067648082189536e-05, "loss": 2.8207, "step": 54418 }, { "epoch": 2.67, "grad_norm": 0.7425807118415833, "learning_rate": 1.80623861113265e-05, "loss": 3.1311, "step": 54419 }, { "epoch": 2.67, "grad_norm": 0.7521542906761169, "learning_rate": 1.8057124883032904e-05, "loss": 3.1478, "step": 54420 }, { "epoch": 2.67, "grad_norm": 0.7295096516609192, "learning_rate": 1.805186439732257e-05, "loss": 3.0066, "step": 54421 }, { "epoch": 2.67, "grad_norm": 0.7017486095428467, "learning_rate": 1.8046604654209317e-05, "loss": 2.8773, "step": 54422 }, { "epoch": 2.67, "grad_norm": 0.7578108310699463, "learning_rate": 1.8041345653707106e-05, "loss": 2.8673, "step": 54423 }, { "epoch": 2.67, "grad_norm": 0.771789014339447, "learning_rate": 1.8036087395829758e-05, "loss": 2.9348, "step": 54424 }, { "epoch": 2.67, "grad_norm": 0.7727000117301941, "learning_rate": 1.8030829880591057e-05, "loss": 2.6452, "step": 54425 }, { "epoch": 2.67, "grad_norm": 0.7393551468849182, "learning_rate": 1.802557310800483e-05, "loss": 3.0339, "step": 54426 }, { "epoch": 2.67, "grad_norm": 0.7382111549377441, "learning_rate": 1.8020317078085e-05, "loss": 2.8841, "step": 54427 }, { "epoch": 2.67, "grad_norm": 0.7548832297325134, "learning_rate": 1.801506179084542e-05, "loss": 3.0882, "step": 54428 }, { "epoch": 2.67, "grad_norm": 0.747711718082428, "learning_rate": 1.800980724629988e-05, "loss": 2.8361, "step": 54429 }, { "epoch": 2.67, "grad_norm": 0.7639087438583374, "learning_rate": 1.8004553444462265e-05, "loss": 2.8952, "step": 54430 }, { "epoch": 2.67, "grad_norm": 0.7807071208953857, "learning_rate": 1.799930038534634e-05, "loss": 2.8251, "step": 54431 }, { "epoch": 2.67, "grad_norm": 0.7129517197608948, "learning_rate": 1.7994048068966018e-05, "loss": 2.9591, "step": 54432 }, { "epoch": 2.67, "grad_norm": 0.7340091466903687, "learning_rate": 1.7988796495335088e-05, "loss": 2.899, "step": 54433 }, { "epoch": 2.67, "grad_norm": 0.7638095617294312, "learning_rate": 1.798354566446738e-05, "loss": 2.9115, "step": 54434 }, { "epoch": 2.67, "grad_norm": 0.7314512729644775, "learning_rate": 1.797829557637678e-05, "loss": 3.0898, "step": 54435 }, { "epoch": 2.67, "grad_norm": 0.7057316899299622, "learning_rate": 1.7973046231077037e-05, "loss": 2.7938, "step": 54436 }, { "epoch": 2.67, "grad_norm": 0.7399982213973999, "learning_rate": 1.796779762858198e-05, "loss": 2.8381, "step": 54437 }, { "epoch": 2.67, "grad_norm": 0.7212401032447815, "learning_rate": 1.7962549768905532e-05, "loss": 2.5719, "step": 54438 }, { "epoch": 2.67, "grad_norm": 0.7532590627670288, "learning_rate": 1.7957302652061444e-05, "loss": 2.8661, "step": 54439 }, { "epoch": 2.67, "grad_norm": 0.7178873419761658, "learning_rate": 1.7952056278063542e-05, "loss": 2.9399, "step": 54440 }, { "epoch": 2.67, "grad_norm": 0.7503253221511841, "learning_rate": 1.794681064692558e-05, "loss": 3.0511, "step": 54441 }, { "epoch": 2.67, "grad_norm": 0.7387188673019409, "learning_rate": 1.794156575866148e-05, "loss": 2.8939, "step": 54442 }, { "epoch": 2.67, "grad_norm": 0.7133545279502869, "learning_rate": 1.793632161328503e-05, "loss": 2.8469, "step": 54443 }, { "epoch": 2.67, "grad_norm": 0.7400594353675842, "learning_rate": 1.7931078210809958e-05, "loss": 2.8812, "step": 54444 }, { "epoch": 2.67, "grad_norm": 0.7527070641517639, "learning_rate": 1.792583555125021e-05, "loss": 2.89, "step": 54445 }, { "epoch": 2.67, "grad_norm": 0.7184710502624512, "learning_rate": 1.7920593634619483e-05, "loss": 3.0791, "step": 54446 }, { "epoch": 2.67, "grad_norm": 0.7595903873443604, "learning_rate": 1.7915352460931664e-05, "loss": 2.9072, "step": 54447 }, { "epoch": 2.67, "grad_norm": 0.7046235799789429, "learning_rate": 1.7910112030200542e-05, "loss": 2.66, "step": 54448 }, { "epoch": 2.67, "grad_norm": 0.728029727935791, "learning_rate": 1.7904872342439803e-05, "loss": 2.9842, "step": 54449 }, { "epoch": 2.67, "grad_norm": 0.7607099413871765, "learning_rate": 1.7899633397663406e-05, "loss": 2.8981, "step": 54450 }, { "epoch": 2.67, "grad_norm": 0.7469753623008728, "learning_rate": 1.789439519588507e-05, "loss": 2.8449, "step": 54451 }, { "epoch": 2.67, "grad_norm": 0.7470847368240356, "learning_rate": 1.7889157737118554e-05, "loss": 3.0769, "step": 54452 }, { "epoch": 2.67, "grad_norm": 0.7539829611778259, "learning_rate": 1.7883921021377777e-05, "loss": 2.7504, "step": 54453 }, { "epoch": 2.67, "grad_norm": 0.7649708986282349, "learning_rate": 1.7878685048676467e-05, "loss": 2.929, "step": 54454 }, { "epoch": 2.67, "grad_norm": 0.7718356847763062, "learning_rate": 1.7873449819028372e-05, "loss": 3.0847, "step": 54455 }, { "epoch": 2.67, "grad_norm": 0.7208101153373718, "learning_rate": 1.786821533244729e-05, "loss": 3.0591, "step": 54456 }, { "epoch": 2.67, "grad_norm": 0.7297426462173462, "learning_rate": 1.7862981588947032e-05, "loss": 2.8899, "step": 54457 }, { "epoch": 2.67, "grad_norm": 0.7583280801773071, "learning_rate": 1.785774858854143e-05, "loss": 2.958, "step": 54458 }, { "epoch": 2.67, "grad_norm": 0.7504193186759949, "learning_rate": 1.785251633124417e-05, "loss": 2.8244, "step": 54459 }, { "epoch": 2.67, "grad_norm": 0.78138267993927, "learning_rate": 1.7847284817069107e-05, "loss": 2.8839, "step": 54460 }, { "epoch": 2.67, "grad_norm": 0.7497425675392151, "learning_rate": 1.784205404603003e-05, "loss": 2.8106, "step": 54461 }, { "epoch": 2.67, "grad_norm": 0.8312497735023499, "learning_rate": 1.7836824018140628e-05, "loss": 2.8887, "step": 54462 }, { "epoch": 2.67, "grad_norm": 0.7753313779830933, "learning_rate": 1.7831594733414757e-05, "loss": 2.8713, "step": 54463 }, { "epoch": 2.67, "grad_norm": 0.7584692239761353, "learning_rate": 1.7826366191866105e-05, "loss": 2.8697, "step": 54464 }, { "epoch": 2.67, "grad_norm": 0.799094021320343, "learning_rate": 1.782113839350856e-05, "loss": 2.9074, "step": 54465 }, { "epoch": 2.67, "grad_norm": 0.7895060777664185, "learning_rate": 1.781591133835578e-05, "loss": 2.7601, "step": 54466 }, { "epoch": 2.67, "grad_norm": 0.7699868083000183, "learning_rate": 1.7810685026421588e-05, "loss": 2.8981, "step": 54467 }, { "epoch": 2.67, "grad_norm": 0.7355355620384216, "learning_rate": 1.7805459457719805e-05, "loss": 2.941, "step": 54468 }, { "epoch": 2.67, "grad_norm": 0.7582590579986572, "learning_rate": 1.780023463226412e-05, "loss": 2.6234, "step": 54469 }, { "epoch": 2.67, "grad_norm": 0.7793235182762146, "learning_rate": 1.779501055006829e-05, "loss": 2.7773, "step": 54470 }, { "epoch": 2.67, "grad_norm": 0.7583927512168884, "learning_rate": 1.7789787211146067e-05, "loss": 2.8734, "step": 54471 }, { "epoch": 2.67, "grad_norm": 0.7218800187110901, "learning_rate": 1.7784564615511244e-05, "loss": 2.9254, "step": 54472 }, { "epoch": 2.67, "grad_norm": 0.7587677240371704, "learning_rate": 1.7779342763177572e-05, "loss": 2.8399, "step": 54473 }, { "epoch": 2.67, "grad_norm": 0.7498154640197754, "learning_rate": 1.7774121654158745e-05, "loss": 2.8976, "step": 54474 }, { "epoch": 2.67, "grad_norm": 0.7345309257507324, "learning_rate": 1.7768901288468652e-05, "loss": 2.9342, "step": 54475 }, { "epoch": 2.67, "grad_norm": 0.758578360080719, "learning_rate": 1.7763681666120942e-05, "loss": 2.8475, "step": 54476 }, { "epoch": 2.67, "grad_norm": 0.7470417618751526, "learning_rate": 1.7758462787129346e-05, "loss": 2.9, "step": 54477 }, { "epoch": 2.67, "grad_norm": 0.750644326210022, "learning_rate": 1.7753244651507648e-05, "loss": 2.934, "step": 54478 }, { "epoch": 2.67, "grad_norm": 0.7721165418624878, "learning_rate": 1.774802725926954e-05, "loss": 2.9427, "step": 54479 }, { "epoch": 2.67, "grad_norm": 0.7746807336807251, "learning_rate": 1.774281061042887e-05, "loss": 3.035, "step": 54480 }, { "epoch": 2.67, "grad_norm": 0.7496808767318726, "learning_rate": 1.773759470499927e-05, "loss": 3.0492, "step": 54481 }, { "epoch": 2.67, "grad_norm": 0.7302466630935669, "learning_rate": 1.773237954299459e-05, "loss": 2.8497, "step": 54482 }, { "epoch": 2.67, "grad_norm": 0.7932860851287842, "learning_rate": 1.7727165124428455e-05, "loss": 2.9827, "step": 54483 }, { "epoch": 2.67, "grad_norm": 0.7311192154884338, "learning_rate": 1.7721951449314654e-05, "loss": 3.0511, "step": 54484 }, { "epoch": 2.67, "grad_norm": 0.7317575812339783, "learning_rate": 1.7716738517666906e-05, "loss": 2.745, "step": 54485 }, { "epoch": 2.67, "grad_norm": 0.7372452020645142, "learning_rate": 1.7711526329498906e-05, "loss": 2.8255, "step": 54486 }, { "epoch": 2.67, "grad_norm": 0.7596213221549988, "learning_rate": 1.770631488482447e-05, "loss": 2.8587, "step": 54487 }, { "epoch": 2.67, "grad_norm": 0.7744680643081665, "learning_rate": 1.770110418365722e-05, "loss": 2.7446, "step": 54488 }, { "epoch": 2.67, "grad_norm": 0.7649564743041992, "learning_rate": 1.7695894226010953e-05, "loss": 2.7356, "step": 54489 }, { "epoch": 2.67, "grad_norm": 0.7747321724891663, "learning_rate": 1.7690685011899385e-05, "loss": 2.8946, "step": 54490 }, { "epoch": 2.67, "grad_norm": 0.7615075707435608, "learning_rate": 1.7685476541336242e-05, "loss": 2.9659, "step": 54491 }, { "epoch": 2.67, "grad_norm": 0.7637103796005249, "learning_rate": 1.768026881433521e-05, "loss": 2.7237, "step": 54492 }, { "epoch": 2.67, "grad_norm": 0.7580251693725586, "learning_rate": 1.7675061830909976e-05, "loss": 3.0487, "step": 54493 }, { "epoch": 2.67, "grad_norm": 0.7343547940254211, "learning_rate": 1.76698555910743e-05, "loss": 2.7701, "step": 54494 }, { "epoch": 2.67, "grad_norm": 0.7214182615280151, "learning_rate": 1.7664650094841937e-05, "loss": 2.8685, "step": 54495 }, { "epoch": 2.67, "grad_norm": 0.7279322147369385, "learning_rate": 1.7659445342226473e-05, "loss": 2.869, "step": 54496 }, { "epoch": 2.67, "grad_norm": 0.7043575048446655, "learning_rate": 1.765424133324177e-05, "loss": 3.072, "step": 54497 }, { "epoch": 2.67, "grad_norm": 0.7192384600639343, "learning_rate": 1.7649038067901445e-05, "loss": 2.9503, "step": 54498 }, { "epoch": 2.67, "grad_norm": 0.7618083357810974, "learning_rate": 1.764383554621919e-05, "loss": 2.9244, "step": 54499 }, { "epoch": 2.67, "grad_norm": 0.7713229656219482, "learning_rate": 1.7638633768208753e-05, "loss": 2.8477, "step": 54500 }, { "epoch": 2.67, "grad_norm": 0.761009931564331, "learning_rate": 1.763343273388377e-05, "loss": 2.7269, "step": 54501 }, { "epoch": 2.67, "grad_norm": 0.7590358257293701, "learning_rate": 1.7628232443258016e-05, "loss": 2.6942, "step": 54502 }, { "epoch": 2.67, "grad_norm": 0.7583818435668945, "learning_rate": 1.7623032896345123e-05, "loss": 2.8209, "step": 54503 }, { "epoch": 2.67, "grad_norm": 0.7613120675086975, "learning_rate": 1.7617834093158878e-05, "loss": 2.9961, "step": 54504 }, { "epoch": 2.67, "grad_norm": 0.7189532518386841, "learning_rate": 1.76126360337129e-05, "loss": 3.0106, "step": 54505 }, { "epoch": 2.67, "grad_norm": 0.7215641736984253, "learning_rate": 1.7607438718020848e-05, "loss": 2.9489, "step": 54506 }, { "epoch": 2.67, "grad_norm": 0.7565929889678955, "learning_rate": 1.760224214609648e-05, "loss": 2.9464, "step": 54507 }, { "epoch": 2.67, "grad_norm": 0.8237677216529846, "learning_rate": 1.7597046317953444e-05, "loss": 3.1464, "step": 54508 }, { "epoch": 2.67, "grad_norm": 0.7600015997886658, "learning_rate": 1.7591851233605405e-05, "loss": 2.7613, "step": 54509 }, { "epoch": 2.67, "grad_norm": 0.7842651009559631, "learning_rate": 1.7586656893066144e-05, "loss": 2.8621, "step": 54510 }, { "epoch": 2.67, "grad_norm": 0.7692767977714539, "learning_rate": 1.7581463296349252e-05, "loss": 2.752, "step": 54511 }, { "epoch": 2.67, "grad_norm": 0.7494772672653198, "learning_rate": 1.7576270443468454e-05, "loss": 2.8582, "step": 54512 }, { "epoch": 2.67, "grad_norm": 0.742486298084259, "learning_rate": 1.7571078334437405e-05, "loss": 2.8898, "step": 54513 }, { "epoch": 2.67, "grad_norm": 0.7653214335441589, "learning_rate": 1.7565886969269728e-05, "loss": 3.1248, "step": 54514 }, { "epoch": 2.67, "grad_norm": 0.7274088859558105, "learning_rate": 1.756069634797921e-05, "loss": 2.7404, "step": 54515 }, { "epoch": 2.67, "grad_norm": 0.7633993625640869, "learning_rate": 1.7555506470579404e-05, "loss": 2.8211, "step": 54516 }, { "epoch": 2.67, "grad_norm": 0.7414184808731079, "learning_rate": 1.7550317337084107e-05, "loss": 2.8421, "step": 54517 }, { "epoch": 2.67, "grad_norm": 0.7275792956352234, "learning_rate": 1.7545128947506836e-05, "loss": 2.9229, "step": 54518 }, { "epoch": 2.67, "grad_norm": 0.7488593459129333, "learning_rate": 1.7539941301861414e-05, "loss": 2.8447, "step": 54519 }, { "epoch": 2.67, "grad_norm": 0.8635866641998291, "learning_rate": 1.7534754400161398e-05, "loss": 2.927, "step": 54520 }, { "epoch": 2.67, "grad_norm": 0.7082469463348389, "learning_rate": 1.7529568242420445e-05, "loss": 2.8341, "step": 54521 }, { "epoch": 2.67, "grad_norm": 0.7854346632957458, "learning_rate": 1.7524382828652307e-05, "loss": 3.1416, "step": 54522 }, { "epoch": 2.67, "grad_norm": 0.7766318321228027, "learning_rate": 1.7519198158870507e-05, "loss": 3.0129, "step": 54523 }, { "epoch": 2.67, "grad_norm": 0.7670121788978577, "learning_rate": 1.7514014233088804e-05, "loss": 2.9533, "step": 54524 }, { "epoch": 2.67, "grad_norm": 0.7497897148132324, "learning_rate": 1.7508831051320848e-05, "loss": 2.9596, "step": 54525 }, { "epoch": 2.67, "grad_norm": 0.7375215888023376, "learning_rate": 1.7503648613580266e-05, "loss": 3.1485, "step": 54526 }, { "epoch": 2.67, "grad_norm": 0.7439850568771362, "learning_rate": 1.7498466919880717e-05, "loss": 2.9282, "step": 54527 }, { "epoch": 2.67, "grad_norm": 0.7792314887046814, "learning_rate": 1.7493285970235783e-05, "loss": 3.0288, "step": 54528 }, { "epoch": 2.67, "grad_norm": 0.7756353616714478, "learning_rate": 1.7488105764659156e-05, "loss": 2.7858, "step": 54529 }, { "epoch": 2.67, "grad_norm": 0.7475228905677795, "learning_rate": 1.7482926303164557e-05, "loss": 2.8076, "step": 54530 }, { "epoch": 2.67, "grad_norm": 0.7505745887756348, "learning_rate": 1.7477747585765478e-05, "loss": 2.9298, "step": 54531 }, { "epoch": 2.67, "grad_norm": 0.7236295342445374, "learning_rate": 1.7472569612475705e-05, "loss": 2.9612, "step": 54532 }, { "epoch": 2.67, "grad_norm": 0.7206342816352844, "learning_rate": 1.7467392383308765e-05, "loss": 2.8051, "step": 54533 }, { "epoch": 2.67, "grad_norm": 0.7881414294242859, "learning_rate": 1.7462215898278408e-05, "loss": 3.0171, "step": 54534 }, { "epoch": 2.67, "grad_norm": 0.7353194952011108, "learning_rate": 1.745704015739816e-05, "loss": 2.9517, "step": 54535 }, { "epoch": 2.67, "grad_norm": 0.7909258008003235, "learning_rate": 1.7451865160681678e-05, "loss": 2.8948, "step": 54536 }, { "epoch": 2.67, "grad_norm": 0.7583587765693665, "learning_rate": 1.7446690908142613e-05, "loss": 2.7776, "step": 54537 }, { "epoch": 2.67, "grad_norm": 0.7082347273826599, "learning_rate": 1.744151739979456e-05, "loss": 2.8779, "step": 54538 }, { "epoch": 2.67, "grad_norm": 0.7351507544517517, "learning_rate": 1.7436344635651167e-05, "loss": 3.0366, "step": 54539 }, { "epoch": 2.67, "grad_norm": 0.7974845170974731, "learning_rate": 1.743117261572613e-05, "loss": 2.8264, "step": 54540 }, { "epoch": 2.67, "grad_norm": 0.736272394657135, "learning_rate": 1.742600134003297e-05, "loss": 3.0235, "step": 54541 }, { "epoch": 2.67, "grad_norm": 0.8139263391494751, "learning_rate": 1.7420830808585373e-05, "loss": 2.8582, "step": 54542 }, { "epoch": 2.67, "grad_norm": 0.7663759589195251, "learning_rate": 1.7415661021396865e-05, "loss": 2.965, "step": 54543 }, { "epoch": 2.67, "grad_norm": 0.7662950754165649, "learning_rate": 1.74104919784811e-05, "loss": 2.7665, "step": 54544 }, { "epoch": 2.67, "grad_norm": 0.7392333149909973, "learning_rate": 1.7405323679851768e-05, "loss": 2.9475, "step": 54545 }, { "epoch": 2.67, "grad_norm": 0.7623760104179382, "learning_rate": 1.740015612552239e-05, "loss": 2.9041, "step": 54546 }, { "epoch": 2.67, "grad_norm": 0.7544460296630859, "learning_rate": 1.739498931550669e-05, "loss": 2.6681, "step": 54547 }, { "epoch": 2.67, "grad_norm": 0.7664691209793091, "learning_rate": 1.7389823249818124e-05, "loss": 2.902, "step": 54548 }, { "epoch": 2.67, "grad_norm": 0.7272012829780579, "learning_rate": 1.7384657928470448e-05, "loss": 2.7784, "step": 54549 }, { "epoch": 2.67, "grad_norm": 0.747775137424469, "learning_rate": 1.7379493351477114e-05, "loss": 2.6218, "step": 54550 }, { "epoch": 2.67, "grad_norm": 0.7551100850105286, "learning_rate": 1.7374329518851814e-05, "loss": 2.5569, "step": 54551 }, { "epoch": 2.67, "grad_norm": 0.7571572661399841, "learning_rate": 1.7369166430608172e-05, "loss": 2.973, "step": 54552 }, { "epoch": 2.67, "grad_norm": 0.7804607152938843, "learning_rate": 1.7364004086759707e-05, "loss": 2.9752, "step": 54553 }, { "epoch": 2.67, "grad_norm": 0.739106297492981, "learning_rate": 1.7358842487320113e-05, "loss": 2.9756, "step": 54554 }, { "epoch": 2.67, "grad_norm": 0.7254763841629028, "learning_rate": 1.7353681632302873e-05, "loss": 3.0763, "step": 54555 }, { "epoch": 2.67, "grad_norm": 0.7539757490158081, "learning_rate": 1.7348521521721713e-05, "loss": 2.7454, "step": 54556 }, { "epoch": 2.67, "grad_norm": 0.7677503228187561, "learning_rate": 1.7343362155590123e-05, "loss": 2.9946, "step": 54557 }, { "epoch": 2.67, "grad_norm": 0.7571077346801758, "learning_rate": 1.733820353392169e-05, "loss": 3.0087, "step": 54558 }, { "epoch": 2.67, "grad_norm": 0.7669923901557922, "learning_rate": 1.733304565673007e-05, "loss": 2.9143, "step": 54559 }, { "epoch": 2.67, "grad_norm": 0.73554927110672, "learning_rate": 1.7327888524028755e-05, "loss": 2.9558, "step": 54560 }, { "epoch": 2.67, "grad_norm": 0.7662096619606018, "learning_rate": 1.7322732135831396e-05, "loss": 2.7881, "step": 54561 }, { "epoch": 2.67, "grad_norm": 0.7233344912528992, "learning_rate": 1.7317576492151587e-05, "loss": 2.831, "step": 54562 }, { "epoch": 2.67, "grad_norm": 0.7593091726303101, "learning_rate": 1.731242159300288e-05, "loss": 2.9025, "step": 54563 }, { "epoch": 2.67, "grad_norm": 0.7726497650146484, "learning_rate": 1.7307267438398865e-05, "loss": 2.8003, "step": 54564 }, { "epoch": 2.67, "grad_norm": 0.765876829624176, "learning_rate": 1.7302114028353032e-05, "loss": 2.9848, "step": 54565 }, { "epoch": 2.67, "grad_norm": 0.7233142852783203, "learning_rate": 1.7296961362879036e-05, "loss": 2.9807, "step": 54566 }, { "epoch": 2.67, "grad_norm": 0.7348734140396118, "learning_rate": 1.72918094419905e-05, "loss": 2.7916, "step": 54567 }, { "epoch": 2.67, "grad_norm": 0.769679069519043, "learning_rate": 1.728665826570088e-05, "loss": 2.8728, "step": 54568 }, { "epoch": 2.67, "grad_norm": 0.7657221555709839, "learning_rate": 1.7281507834023832e-05, "loss": 2.8739, "step": 54569 }, { "epoch": 2.67, "grad_norm": 0.7933992743492126, "learning_rate": 1.727635814697281e-05, "loss": 2.812, "step": 54570 }, { "epoch": 2.67, "grad_norm": 0.7689788937568665, "learning_rate": 1.727120920456151e-05, "loss": 2.8414, "step": 54571 }, { "epoch": 2.67, "grad_norm": 0.7207183837890625, "learning_rate": 1.7266061006803444e-05, "loss": 2.7137, "step": 54572 }, { "epoch": 2.67, "grad_norm": 0.7436949610710144, "learning_rate": 1.7260913553712107e-05, "loss": 2.9456, "step": 54573 }, { "epoch": 2.67, "grad_norm": 0.7527655363082886, "learning_rate": 1.7255766845301157e-05, "loss": 2.8905, "step": 54574 }, { "epoch": 2.67, "grad_norm": 0.7638602256774902, "learning_rate": 1.7250620881584077e-05, "loss": 2.7258, "step": 54575 }, { "epoch": 2.67, "grad_norm": 0.7570220232009888, "learning_rate": 1.7245475662574428e-05, "loss": 2.6924, "step": 54576 }, { "epoch": 2.67, "grad_norm": 0.7669765949249268, "learning_rate": 1.724033118828583e-05, "loss": 2.797, "step": 54577 }, { "epoch": 2.67, "grad_norm": 0.7875416278839111, "learning_rate": 1.723518745873177e-05, "loss": 2.7294, "step": 54578 }, { "epoch": 2.67, "grad_norm": 0.7687140703201294, "learning_rate": 1.723004447392581e-05, "loss": 2.9238, "step": 54579 }, { "epoch": 2.67, "grad_norm": 0.7571902275085449, "learning_rate": 1.7224902233881433e-05, "loss": 2.9232, "step": 54580 }, { "epoch": 2.67, "grad_norm": 0.7242570519447327, "learning_rate": 1.7219760738612264e-05, "loss": 2.9942, "step": 54581 }, { "epoch": 2.67, "grad_norm": 0.7527267336845398, "learning_rate": 1.721461998813186e-05, "loss": 2.8456, "step": 54582 }, { "epoch": 2.67, "grad_norm": 0.7353515028953552, "learning_rate": 1.720947998245368e-05, "loss": 2.8948, "step": 54583 }, { "epoch": 2.68, "grad_norm": 0.7328376770019531, "learning_rate": 1.7204340721591335e-05, "loss": 2.8345, "step": 54584 }, { "epoch": 2.68, "grad_norm": 0.7656261920928955, "learning_rate": 1.719920220555836e-05, "loss": 2.8635, "step": 54585 }, { "epoch": 2.68, "grad_norm": 0.7430219650268555, "learning_rate": 1.7194064434368206e-05, "loss": 2.8203, "step": 54586 }, { "epoch": 2.68, "grad_norm": 0.7474393844604492, "learning_rate": 1.718892740803449e-05, "loss": 2.9497, "step": 54587 }, { "epoch": 2.68, "grad_norm": 0.75533527135849, "learning_rate": 1.7183791126570678e-05, "loss": 2.9873, "step": 54588 }, { "epoch": 2.68, "grad_norm": 0.776290237903595, "learning_rate": 1.7178655589990387e-05, "loss": 2.8499, "step": 54589 }, { "epoch": 2.68, "grad_norm": 0.743800699710846, "learning_rate": 1.717352079830704e-05, "loss": 2.7576, "step": 54590 }, { "epoch": 2.68, "grad_norm": 0.7225151658058167, "learning_rate": 1.7168386751534192e-05, "loss": 2.8802, "step": 54591 }, { "epoch": 2.68, "grad_norm": 0.7379876971244812, "learning_rate": 1.716325344968543e-05, "loss": 2.721, "step": 54592 }, { "epoch": 2.68, "grad_norm": 0.7471773028373718, "learning_rate": 1.715812089277422e-05, "loss": 2.7912, "step": 54593 }, { "epoch": 2.68, "grad_norm": 0.7949302792549133, "learning_rate": 1.7152989080814106e-05, "loss": 2.771, "step": 54594 }, { "epoch": 2.68, "grad_norm": 0.7417853474617004, "learning_rate": 1.7147858013818517e-05, "loss": 2.923, "step": 54595 }, { "epoch": 2.68, "grad_norm": 0.7578975558280945, "learning_rate": 1.714272769180104e-05, "loss": 2.8479, "step": 54596 }, { "epoch": 2.68, "grad_norm": 0.77840256690979, "learning_rate": 1.7137598114775232e-05, "loss": 2.7247, "step": 54597 }, { "epoch": 2.68, "grad_norm": 0.7270179986953735, "learning_rate": 1.7132469282754513e-05, "loss": 2.7254, "step": 54598 }, { "epoch": 2.68, "grad_norm": 0.7670302391052246, "learning_rate": 1.7127341195752475e-05, "loss": 2.8518, "step": 54599 }, { "epoch": 2.68, "grad_norm": 0.7435368895530701, "learning_rate": 1.7122213853782572e-05, "loss": 2.9362, "step": 54600 }, { "epoch": 2.68, "grad_norm": 0.7616455554962158, "learning_rate": 1.7117087256858262e-05, "loss": 2.7466, "step": 54601 }, { "epoch": 2.68, "grad_norm": 0.7222978472709656, "learning_rate": 1.711196140499317e-05, "loss": 2.826, "step": 54602 }, { "epoch": 2.68, "grad_norm": 0.7720111012458801, "learning_rate": 1.7106836298200675e-05, "loss": 2.9449, "step": 54603 }, { "epoch": 2.68, "grad_norm": 0.7425702214241028, "learning_rate": 1.7101711936494378e-05, "loss": 3.0421, "step": 54604 }, { "epoch": 2.68, "grad_norm": 0.7751484513282776, "learning_rate": 1.709658831988766e-05, "loss": 3.0768, "step": 54605 }, { "epoch": 2.68, "grad_norm": 0.731267511844635, "learning_rate": 1.7091465448394148e-05, "loss": 2.7284, "step": 54606 }, { "epoch": 2.68, "grad_norm": 0.7741591930389404, "learning_rate": 1.7086343322027263e-05, "loss": 2.9372, "step": 54607 }, { "epoch": 2.68, "grad_norm": 0.7876058220863342, "learning_rate": 1.708122194080046e-05, "loss": 2.9196, "step": 54608 }, { "epoch": 2.68, "grad_norm": 0.7788753509521484, "learning_rate": 1.7076101304727296e-05, "loss": 2.8865, "step": 54609 }, { "epoch": 2.68, "grad_norm": 0.7326127290725708, "learning_rate": 1.7070981413821227e-05, "loss": 2.9553, "step": 54610 }, { "epoch": 2.68, "grad_norm": 0.7484868764877319, "learning_rate": 1.706586226809571e-05, "loss": 2.9617, "step": 54611 }, { "epoch": 2.68, "grad_norm": 0.7754936814308167, "learning_rate": 1.706074386756433e-05, "loss": 2.8784, "step": 54612 }, { "epoch": 2.68, "grad_norm": 0.8197011947631836, "learning_rate": 1.7055626212240413e-05, "loss": 2.9881, "step": 54613 }, { "epoch": 2.68, "grad_norm": 0.7415262460708618, "learning_rate": 1.7050509302137584e-05, "loss": 2.9983, "step": 54614 }, { "epoch": 2.68, "grad_norm": 0.7331694960594177, "learning_rate": 1.704539313726929e-05, "loss": 2.9561, "step": 54615 }, { "epoch": 2.68, "grad_norm": 0.7985134720802307, "learning_rate": 1.70402777176489e-05, "loss": 2.9792, "step": 54616 }, { "epoch": 2.68, "grad_norm": 0.7267589569091797, "learning_rate": 1.7035163043290022e-05, "loss": 2.8766, "step": 54617 }, { "epoch": 2.68, "grad_norm": 0.7310107946395874, "learning_rate": 1.703004911420599e-05, "loss": 2.987, "step": 54618 }, { "epoch": 2.68, "grad_norm": 0.7549310326576233, "learning_rate": 1.702493593041042e-05, "loss": 2.8447, "step": 54619 }, { "epoch": 2.68, "grad_norm": 0.7123493552207947, "learning_rate": 1.7019823491916673e-05, "loss": 3.1187, "step": 54620 }, { "epoch": 2.68, "grad_norm": 0.7951881885528564, "learning_rate": 1.7014711798738268e-05, "loss": 2.8816, "step": 54621 }, { "epoch": 2.68, "grad_norm": 0.725439190864563, "learning_rate": 1.7009600850888693e-05, "loss": 2.9719, "step": 54622 }, { "epoch": 2.68, "grad_norm": 0.7655175924301147, "learning_rate": 1.7004490648381274e-05, "loss": 2.8131, "step": 54623 }, { "epoch": 2.68, "grad_norm": 0.7330092191696167, "learning_rate": 1.6999381191229634e-05, "loss": 2.9509, "step": 54624 }, { "epoch": 2.68, "grad_norm": 0.7745311856269836, "learning_rate": 1.6994272479447124e-05, "loss": 2.7781, "step": 54625 }, { "epoch": 2.68, "grad_norm": 0.7374331951141357, "learning_rate": 1.6989164513047238e-05, "loss": 3.0763, "step": 54626 }, { "epoch": 2.68, "grad_norm": 0.7071889042854309, "learning_rate": 1.698405729204343e-05, "loss": 2.7063, "step": 54627 }, { "epoch": 2.68, "grad_norm": 0.7430211901664734, "learning_rate": 1.6978950816449153e-05, "loss": 3.0131, "step": 54628 }, { "epoch": 2.68, "grad_norm": 0.746117889881134, "learning_rate": 1.697384508627787e-05, "loss": 2.7566, "step": 54629 }, { "epoch": 2.68, "grad_norm": 0.7581526041030884, "learning_rate": 1.6968740101542965e-05, "loss": 3.0198, "step": 54630 }, { "epoch": 2.68, "grad_norm": 0.7774915099143982, "learning_rate": 1.6963635862257996e-05, "loss": 3.039, "step": 54631 }, { "epoch": 2.68, "grad_norm": 0.7348177433013916, "learning_rate": 1.695853236843625e-05, "loss": 3.1106, "step": 54632 }, { "epoch": 2.68, "grad_norm": 0.7624771595001221, "learning_rate": 1.6953429620091284e-05, "loss": 3.0076, "step": 54633 }, { "epoch": 2.68, "grad_norm": 0.7285212874412537, "learning_rate": 1.6948327617236557e-05, "loss": 2.7591, "step": 54634 }, { "epoch": 2.68, "grad_norm": 0.8262482285499573, "learning_rate": 1.6943226359885385e-05, "loss": 3.0077, "step": 54635 }, { "epoch": 2.68, "grad_norm": 0.7384718656539917, "learning_rate": 1.6938125848051363e-05, "loss": 2.8514, "step": 54636 }, { "epoch": 2.68, "grad_norm": 0.7086389064788818, "learning_rate": 1.6933026081747814e-05, "loss": 2.9698, "step": 54637 }, { "epoch": 2.68, "grad_norm": 0.7307590246200562, "learning_rate": 1.6927927060988156e-05, "loss": 2.8567, "step": 54638 }, { "epoch": 2.68, "grad_norm": 0.7535133957862854, "learning_rate": 1.6922828785785912e-05, "loss": 2.821, "step": 54639 }, { "epoch": 2.68, "grad_norm": 0.7423950433731079, "learning_rate": 1.6917731256154443e-05, "loss": 2.8587, "step": 54640 }, { "epoch": 2.68, "grad_norm": 0.7522428035736084, "learning_rate": 1.69126344721072e-05, "loss": 2.9274, "step": 54641 }, { "epoch": 2.68, "grad_norm": 0.7286722660064697, "learning_rate": 1.6907538433657543e-05, "loss": 2.7226, "step": 54642 }, { "epoch": 2.68, "grad_norm": 0.7772533297538757, "learning_rate": 1.690244314081902e-05, "loss": 2.9504, "step": 54643 }, { "epoch": 2.68, "grad_norm": 0.7469460964202881, "learning_rate": 1.6897348593604965e-05, "loss": 2.8693, "step": 54644 }, { "epoch": 2.68, "grad_norm": 0.7319382429122925, "learning_rate": 1.6892254792028757e-05, "loss": 2.8529, "step": 54645 }, { "epoch": 2.68, "grad_norm": 0.7492722272872925, "learning_rate": 1.6887161736103893e-05, "loss": 2.8104, "step": 54646 }, { "epoch": 2.68, "grad_norm": 0.7667667865753174, "learning_rate": 1.688206942584376e-05, "loss": 2.8156, "step": 54647 }, { "epoch": 2.68, "grad_norm": 0.6894829869270325, "learning_rate": 1.6876977861261744e-05, "loss": 2.8207, "step": 54648 }, { "epoch": 2.68, "grad_norm": 0.7800548672676086, "learning_rate": 1.6871887042371334e-05, "loss": 2.9097, "step": 54649 }, { "epoch": 2.68, "grad_norm": 0.7656291127204895, "learning_rate": 1.6866796969185827e-05, "loss": 2.7117, "step": 54650 }, { "epoch": 2.68, "grad_norm": 0.7687351107597351, "learning_rate": 1.686170764171877e-05, "loss": 2.8556, "step": 54651 }, { "epoch": 2.68, "grad_norm": 0.7354574203491211, "learning_rate": 1.6856619059983423e-05, "loss": 2.8432, "step": 54652 }, { "epoch": 2.68, "grad_norm": 0.7067791819572449, "learning_rate": 1.685153122399321e-05, "loss": 2.976, "step": 54653 }, { "epoch": 2.68, "grad_norm": 0.7321593165397644, "learning_rate": 1.684644413376165e-05, "loss": 2.8863, "step": 54654 }, { "epoch": 2.68, "grad_norm": 0.7532573342323303, "learning_rate": 1.6841357789302e-05, "loss": 2.8874, "step": 54655 }, { "epoch": 2.68, "grad_norm": 0.7408707141876221, "learning_rate": 1.6836272190627787e-05, "loss": 3.2351, "step": 54656 }, { "epoch": 2.68, "grad_norm": 0.7293885946273804, "learning_rate": 1.683118733775226e-05, "loss": 2.8992, "step": 54657 }, { "epoch": 2.68, "grad_norm": 0.7179583311080933, "learning_rate": 1.682610323068895e-05, "loss": 2.8923, "step": 54658 }, { "epoch": 2.68, "grad_norm": 0.7808348536491394, "learning_rate": 1.6821019869451167e-05, "loss": 3.0609, "step": 54659 }, { "epoch": 2.68, "grad_norm": 0.7644177675247192, "learning_rate": 1.6815937254052314e-05, "loss": 2.9049, "step": 54660 }, { "epoch": 2.68, "grad_norm": 0.7662442326545715, "learning_rate": 1.6810855384505804e-05, "loss": 2.8631, "step": 54661 }, { "epoch": 2.68, "grad_norm": 0.7450445890426636, "learning_rate": 1.6805774260824968e-05, "loss": 2.835, "step": 54662 }, { "epoch": 2.68, "grad_norm": 0.7103903889656067, "learning_rate": 1.6800693883023185e-05, "loss": 2.7432, "step": 54663 }, { "epoch": 2.68, "grad_norm": 0.7254341244697571, "learning_rate": 1.6795614251113953e-05, "loss": 3.0087, "step": 54664 }, { "epoch": 2.68, "grad_norm": 0.7645226716995239, "learning_rate": 1.6790535365110557e-05, "loss": 2.6737, "step": 54665 }, { "epoch": 2.68, "grad_norm": 0.7188624143600464, "learning_rate": 1.6785457225026387e-05, "loss": 2.6871, "step": 54666 }, { "epoch": 2.68, "grad_norm": 0.7680506110191345, "learning_rate": 1.678037983087477e-05, "loss": 2.8942, "step": 54667 }, { "epoch": 2.68, "grad_norm": 0.7478795051574707, "learning_rate": 1.677530318266912e-05, "loss": 2.8025, "step": 54668 }, { "epoch": 2.68, "grad_norm": 0.7576417922973633, "learning_rate": 1.6770227280422866e-05, "loss": 2.8098, "step": 54669 }, { "epoch": 2.68, "grad_norm": 0.7802990078926086, "learning_rate": 1.6765152124149262e-05, "loss": 2.8422, "step": 54670 }, { "epoch": 2.68, "grad_norm": 0.7456069588661194, "learning_rate": 1.6760077713861797e-05, "loss": 2.8779, "step": 54671 }, { "epoch": 2.68, "grad_norm": 0.7156590819358826, "learning_rate": 1.675500404957376e-05, "loss": 2.8347, "step": 54672 }, { "epoch": 2.68, "grad_norm": 0.7358556389808655, "learning_rate": 1.6749931131298545e-05, "loss": 3.0632, "step": 54673 }, { "epoch": 2.68, "grad_norm": 0.7578690648078918, "learning_rate": 1.6744858959049502e-05, "loss": 2.8503, "step": 54674 }, { "epoch": 2.68, "grad_norm": 0.7345317602157593, "learning_rate": 1.6739787532839922e-05, "loss": 2.9099, "step": 54675 }, { "epoch": 2.68, "grad_norm": 0.7033917903900146, "learning_rate": 1.6734716852683295e-05, "loss": 2.906, "step": 54676 }, { "epoch": 2.68, "grad_norm": 0.6990805268287659, "learning_rate": 1.6729646918592875e-05, "loss": 2.9649, "step": 54677 }, { "epoch": 2.68, "grad_norm": 0.7403311729431152, "learning_rate": 1.6724577730582024e-05, "loss": 2.6907, "step": 54678 }, { "epoch": 2.68, "grad_norm": 0.770260214805603, "learning_rate": 1.671950928866419e-05, "loss": 2.8517, "step": 54679 }, { "epoch": 2.68, "grad_norm": 0.7441731691360474, "learning_rate": 1.6714441592852633e-05, "loss": 3.0706, "step": 54680 }, { "epoch": 2.68, "grad_norm": 0.7643983960151672, "learning_rate": 1.670937464316071e-05, "loss": 2.9851, "step": 54681 }, { "epoch": 2.68, "grad_norm": 0.7585827708244324, "learning_rate": 1.6704308439601745e-05, "loss": 2.8638, "step": 54682 }, { "epoch": 2.68, "grad_norm": 0.7482290863990784, "learning_rate": 1.6699242982189088e-05, "loss": 2.8541, "step": 54683 }, { "epoch": 2.68, "grad_norm": 0.7718914151191711, "learning_rate": 1.6694178270936167e-05, "loss": 2.9846, "step": 54684 }, { "epoch": 2.68, "grad_norm": 0.8131400346755981, "learning_rate": 1.66891143058562e-05, "loss": 2.7676, "step": 54685 }, { "epoch": 2.68, "grad_norm": 0.7344086170196533, "learning_rate": 1.6684051086962648e-05, "loss": 2.9111, "step": 54686 }, { "epoch": 2.68, "grad_norm": 0.7355520725250244, "learning_rate": 1.6678988614268762e-05, "loss": 3.0156, "step": 54687 }, { "epoch": 2.68, "grad_norm": 0.7679204940795898, "learning_rate": 1.667392688778787e-05, "loss": 2.7147, "step": 54688 }, { "epoch": 2.68, "grad_norm": 0.7418351173400879, "learning_rate": 1.6668865907533356e-05, "loss": 2.9869, "step": 54689 }, { "epoch": 2.68, "grad_norm": 0.7173027992248535, "learning_rate": 1.666380567351845e-05, "loss": 2.8766, "step": 54690 }, { "epoch": 2.68, "grad_norm": 0.7610350251197815, "learning_rate": 1.6658746185756632e-05, "loss": 3.1118, "step": 54691 }, { "epoch": 2.68, "grad_norm": 0.7524661421775818, "learning_rate": 1.66536874442611e-05, "loss": 2.8634, "step": 54692 }, { "epoch": 2.68, "grad_norm": 0.7599107623100281, "learning_rate": 1.6648629449045204e-05, "loss": 2.6746, "step": 54693 }, { "epoch": 2.68, "grad_norm": 0.7229357361793518, "learning_rate": 1.6643572200122336e-05, "loss": 2.9954, "step": 54694 }, { "epoch": 2.68, "grad_norm": 0.8016541600227356, "learning_rate": 1.663851569750578e-05, "loss": 2.8605, "step": 54695 }, { "epoch": 2.68, "grad_norm": 0.7787773609161377, "learning_rate": 1.663345994120884e-05, "loss": 3.0242, "step": 54696 }, { "epoch": 2.68, "grad_norm": 0.768181562423706, "learning_rate": 1.6628404931244755e-05, "loss": 2.7763, "step": 54697 }, { "epoch": 2.68, "grad_norm": 0.7640612721443176, "learning_rate": 1.6623350667626955e-05, "loss": 2.9869, "step": 54698 }, { "epoch": 2.68, "grad_norm": 0.7562863230705261, "learning_rate": 1.6618297150368733e-05, "loss": 2.7484, "step": 54699 }, { "epoch": 2.68, "grad_norm": 0.755027711391449, "learning_rate": 1.6613244379483336e-05, "loss": 2.9133, "step": 54700 }, { "epoch": 2.68, "grad_norm": 0.7754169702529907, "learning_rate": 1.6608192354984162e-05, "loss": 3.0882, "step": 54701 }, { "epoch": 2.68, "grad_norm": 0.7277271151542664, "learning_rate": 1.6603141076884495e-05, "loss": 2.8785, "step": 54702 }, { "epoch": 2.68, "grad_norm": 0.7051646113395691, "learning_rate": 1.659809054519753e-05, "loss": 2.8702, "step": 54703 }, { "epoch": 2.68, "grad_norm": 0.7471274137496948, "learning_rate": 1.6593040759936715e-05, "loss": 2.8466, "step": 54704 }, { "epoch": 2.68, "grad_norm": 0.7694349884986877, "learning_rate": 1.6587991721115245e-05, "loss": 2.6614, "step": 54705 }, { "epoch": 2.68, "grad_norm": 0.7603945732116699, "learning_rate": 1.658294342874651e-05, "loss": 2.8695, "step": 54706 }, { "epoch": 2.68, "grad_norm": 0.7334092259407043, "learning_rate": 1.6577895882843693e-05, "loss": 2.7742, "step": 54707 }, { "epoch": 2.68, "grad_norm": 0.7664968371391296, "learning_rate": 1.6572849083420225e-05, "loss": 2.977, "step": 54708 }, { "epoch": 2.68, "grad_norm": 0.7536723613739014, "learning_rate": 1.6567803030489292e-05, "loss": 2.8594, "step": 54709 }, { "epoch": 2.68, "grad_norm": 0.7267215251922607, "learning_rate": 1.656275772406418e-05, "loss": 2.8231, "step": 54710 }, { "epoch": 2.68, "grad_norm": 0.7520238161087036, "learning_rate": 1.6557713164158283e-05, "loss": 2.8955, "step": 54711 }, { "epoch": 2.68, "grad_norm": 0.7576550245285034, "learning_rate": 1.6552669350784753e-05, "loss": 2.8592, "step": 54712 }, { "epoch": 2.68, "grad_norm": 0.7513378262519836, "learning_rate": 1.6547626283957016e-05, "loss": 2.9951, "step": 54713 }, { "epoch": 2.68, "grad_norm": 0.7380017638206482, "learning_rate": 1.6542583963688195e-05, "loss": 2.9589, "step": 54714 }, { "epoch": 2.68, "grad_norm": 0.7709711194038391, "learning_rate": 1.6537542389991676e-05, "loss": 2.7643, "step": 54715 }, { "epoch": 2.68, "grad_norm": 0.7406794428825378, "learning_rate": 1.6532501562880784e-05, "loss": 3.0594, "step": 54716 }, { "epoch": 2.68, "grad_norm": 0.7626711130142212, "learning_rate": 1.652746148236871e-05, "loss": 2.7846, "step": 54717 }, { "epoch": 2.68, "grad_norm": 0.7286007404327393, "learning_rate": 1.6522422148468738e-05, "loss": 3.0798, "step": 54718 }, { "epoch": 2.68, "grad_norm": 0.740604817867279, "learning_rate": 1.65173835611941e-05, "loss": 2.8224, "step": 54719 }, { "epoch": 2.68, "grad_norm": 0.8206152319908142, "learning_rate": 1.651234572055814e-05, "loss": 2.9789, "step": 54720 }, { "epoch": 2.68, "grad_norm": 0.7075120210647583, "learning_rate": 1.6507308626574122e-05, "loss": 2.9661, "step": 54721 }, { "epoch": 2.68, "grad_norm": 0.775321364402771, "learning_rate": 1.650227227925527e-05, "loss": 2.9403, "step": 54722 }, { "epoch": 2.68, "grad_norm": 0.7323336005210876, "learning_rate": 1.64972366786149e-05, "loss": 2.9916, "step": 54723 }, { "epoch": 2.68, "grad_norm": 0.7589771151542664, "learning_rate": 1.6492201824666274e-05, "loss": 2.7981, "step": 54724 }, { "epoch": 2.68, "grad_norm": 0.7782710194587708, "learning_rate": 1.6487167717422544e-05, "loss": 2.9767, "step": 54725 }, { "epoch": 2.68, "grad_norm": 0.7076034545898438, "learning_rate": 1.6482134356897138e-05, "loss": 2.6416, "step": 54726 }, { "epoch": 2.68, "grad_norm": 0.7385815382003784, "learning_rate": 1.6477101743103138e-05, "loss": 2.7569, "step": 54727 }, { "epoch": 2.68, "grad_norm": 0.7414993047714233, "learning_rate": 1.6472069876053973e-05, "loss": 2.775, "step": 54728 }, { "epoch": 2.68, "grad_norm": 0.7520781755447388, "learning_rate": 1.646703875576273e-05, "loss": 2.914, "step": 54729 }, { "epoch": 2.68, "grad_norm": 0.7415902614593506, "learning_rate": 1.64620083822428e-05, "loss": 2.8865, "step": 54730 }, { "epoch": 2.68, "grad_norm": 0.7842666506767273, "learning_rate": 1.6456978755507366e-05, "loss": 3.0898, "step": 54731 }, { "epoch": 2.68, "grad_norm": 0.7630308866500854, "learning_rate": 1.6451949875569658e-05, "loss": 2.8664, "step": 54732 }, { "epoch": 2.68, "grad_norm": 0.7267240285873413, "learning_rate": 1.6446921742442963e-05, "loss": 2.9599, "step": 54733 }, { "epoch": 2.68, "grad_norm": 0.7458666563034058, "learning_rate": 1.6441894356140474e-05, "loss": 2.6751, "step": 54734 }, { "epoch": 2.68, "grad_norm": 0.7139987349510193, "learning_rate": 1.6436867716675473e-05, "loss": 2.7982, "step": 54735 }, { "epoch": 2.68, "grad_norm": 0.7688502073287964, "learning_rate": 1.643184182406122e-05, "loss": 2.8371, "step": 54736 }, { "epoch": 2.68, "grad_norm": 0.7603595852851868, "learning_rate": 1.6426816678310873e-05, "loss": 3.0501, "step": 54737 }, { "epoch": 2.68, "grad_norm": 0.7536769509315491, "learning_rate": 1.6421792279437752e-05, "loss": 2.9334, "step": 54738 }, { "epoch": 2.68, "grad_norm": 0.7776528000831604, "learning_rate": 1.641676862745508e-05, "loss": 2.8698, "step": 54739 }, { "epoch": 2.68, "grad_norm": 0.7579948902130127, "learning_rate": 1.6411745722376014e-05, "loss": 2.7414, "step": 54740 }, { "epoch": 2.68, "grad_norm": 0.7683404684066772, "learning_rate": 1.6406723564213843e-05, "loss": 2.9881, "step": 54741 }, { "epoch": 2.68, "grad_norm": 0.7273842692375183, "learning_rate": 1.640170215298179e-05, "loss": 2.7239, "step": 54742 }, { "epoch": 2.68, "grad_norm": 0.7698961496353149, "learning_rate": 1.6396681488693077e-05, "loss": 2.672, "step": 54743 }, { "epoch": 2.68, "grad_norm": 0.7344798445701599, "learning_rate": 1.639166157136089e-05, "loss": 2.9347, "step": 54744 }, { "epoch": 2.68, "grad_norm": 0.7438050508499146, "learning_rate": 1.638664240099856e-05, "loss": 2.9031, "step": 54745 }, { "epoch": 2.68, "grad_norm": 0.7391680479049683, "learning_rate": 1.6381623977619206e-05, "loss": 3.0264, "step": 54746 }, { "epoch": 2.68, "grad_norm": 0.7523117661476135, "learning_rate": 1.6376606301236017e-05, "loss": 2.9136, "step": 54747 }, { "epoch": 2.68, "grad_norm": 0.7371009588241577, "learning_rate": 1.6371589371862315e-05, "loss": 3.0831, "step": 54748 }, { "epoch": 2.68, "grad_norm": 0.718420147895813, "learning_rate": 1.6366573189511224e-05, "loss": 2.6528, "step": 54749 }, { "epoch": 2.68, "grad_norm": 0.7344409227371216, "learning_rate": 1.6361557754195997e-05, "loss": 2.9695, "step": 54750 }, { "epoch": 2.68, "grad_norm": 0.7529169321060181, "learning_rate": 1.6356543065929862e-05, "loss": 2.8281, "step": 54751 }, { "epoch": 2.68, "grad_norm": 0.7365326881408691, "learning_rate": 1.6351529124726005e-05, "loss": 2.85, "step": 54752 }, { "epoch": 2.68, "grad_norm": 0.7705813050270081, "learning_rate": 1.6346515930597648e-05, "loss": 3.0134, "step": 54753 }, { "epoch": 2.68, "grad_norm": 0.7423902750015259, "learning_rate": 1.634150348355795e-05, "loss": 2.9457, "step": 54754 }, { "epoch": 2.68, "grad_norm": 0.7638031244277954, "learning_rate": 1.6336491783620097e-05, "loss": 2.8813, "step": 54755 }, { "epoch": 2.68, "grad_norm": 0.7596058249473572, "learning_rate": 1.6331480830797416e-05, "loss": 2.9061, "step": 54756 }, { "epoch": 2.68, "grad_norm": 0.7403856515884399, "learning_rate": 1.632647062510296e-05, "loss": 3.0641, "step": 54757 }, { "epoch": 2.68, "grad_norm": 0.7983343005180359, "learning_rate": 1.632146116655002e-05, "loss": 2.8901, "step": 54758 }, { "epoch": 2.68, "grad_norm": 0.7619073987007141, "learning_rate": 1.631645245515172e-05, "loss": 2.8923, "step": 54759 }, { "epoch": 2.68, "grad_norm": 0.7451263666152954, "learning_rate": 1.6311444490921344e-05, "loss": 2.9146, "step": 54760 }, { "epoch": 2.68, "grad_norm": 0.7346713542938232, "learning_rate": 1.6306437273872018e-05, "loss": 2.8883, "step": 54761 }, { "epoch": 2.68, "grad_norm": 0.7482584118843079, "learning_rate": 1.6301430804016902e-05, "loss": 2.8736, "step": 54762 }, { "epoch": 2.68, "grad_norm": 0.7432484030723572, "learning_rate": 1.6296425081369248e-05, "loss": 2.9137, "step": 54763 }, { "epoch": 2.68, "grad_norm": 0.7410925030708313, "learning_rate": 1.6291420105942178e-05, "loss": 2.7335, "step": 54764 }, { "epoch": 2.68, "grad_norm": 0.7755419015884399, "learning_rate": 1.6286415877748915e-05, "loss": 2.6624, "step": 54765 }, { "epoch": 2.68, "grad_norm": 0.7352432608604431, "learning_rate": 1.628141239680265e-05, "loss": 2.8728, "step": 54766 }, { "epoch": 2.68, "grad_norm": 0.7407886385917664, "learning_rate": 1.627640966311654e-05, "loss": 2.9961, "step": 54767 }, { "epoch": 2.68, "grad_norm": 0.7732663750648499, "learning_rate": 1.6271407676703773e-05, "loss": 2.774, "step": 54768 }, { "epoch": 2.68, "grad_norm": 0.8005004525184631, "learning_rate": 1.6266406437577506e-05, "loss": 3.0023, "step": 54769 }, { "epoch": 2.68, "grad_norm": 0.7332467436790466, "learning_rate": 1.626140594575086e-05, "loss": 3.1547, "step": 54770 }, { "epoch": 2.68, "grad_norm": 0.7251038551330566, "learning_rate": 1.625640620123716e-05, "loss": 2.958, "step": 54771 }, { "epoch": 2.68, "grad_norm": 0.7420737743377686, "learning_rate": 1.6251407204049394e-05, "loss": 2.9342, "step": 54772 }, { "epoch": 2.68, "grad_norm": 0.7631248831748962, "learning_rate": 1.6246408954200883e-05, "loss": 2.882, "step": 54773 }, { "epoch": 2.68, "grad_norm": 0.7106769680976868, "learning_rate": 1.6241411451704656e-05, "loss": 2.754, "step": 54774 }, { "epoch": 2.68, "grad_norm": 0.77427077293396, "learning_rate": 1.6236414696573994e-05, "loss": 3.0064, "step": 54775 }, { "epoch": 2.68, "grad_norm": 0.7603651285171509, "learning_rate": 1.6231418688821997e-05, "loss": 2.6856, "step": 54776 }, { "epoch": 2.68, "grad_norm": 0.7737959623336792, "learning_rate": 1.622642342846181e-05, "loss": 2.7616, "step": 54777 }, { "epoch": 2.68, "grad_norm": 0.7463186979293823, "learning_rate": 1.622142891550663e-05, "loss": 2.7421, "step": 54778 }, { "epoch": 2.68, "grad_norm": 0.7298126816749573, "learning_rate": 1.6216435149969574e-05, "loss": 2.9434, "step": 54779 }, { "epoch": 2.68, "grad_norm": 0.7219801545143127, "learning_rate": 1.6211442131863772e-05, "loss": 2.9801, "step": 54780 }, { "epoch": 2.68, "grad_norm": 0.7264379858970642, "learning_rate": 1.620644986120251e-05, "loss": 2.9004, "step": 54781 }, { "epoch": 2.68, "grad_norm": 0.7708029747009277, "learning_rate": 1.6201458337998807e-05, "loss": 2.9669, "step": 54782 }, { "epoch": 2.68, "grad_norm": 0.7728287577629089, "learning_rate": 1.6196467562265857e-05, "loss": 2.7538, "step": 54783 }, { "epoch": 2.68, "grad_norm": 0.7423531413078308, "learning_rate": 1.619147753401675e-05, "loss": 2.9775, "step": 54784 }, { "epoch": 2.68, "grad_norm": 0.7586064338684082, "learning_rate": 1.6186488253264707e-05, "loss": 3.0552, "step": 54785 }, { "epoch": 2.68, "grad_norm": 0.7348181009292603, "learning_rate": 1.6181499720022784e-05, "loss": 2.7265, "step": 54786 }, { "epoch": 2.68, "grad_norm": 0.7246251702308655, "learning_rate": 1.617651193430417e-05, "loss": 2.7787, "step": 54787 }, { "epoch": 2.69, "grad_norm": 0.7123945355415344, "learning_rate": 1.6171524896122056e-05, "loss": 2.9102, "step": 54788 }, { "epoch": 2.69, "grad_norm": 0.7603493332862854, "learning_rate": 1.6166538605489497e-05, "loss": 2.6584, "step": 54789 }, { "epoch": 2.69, "grad_norm": 0.7190308570861816, "learning_rate": 1.6161553062419687e-05, "loss": 2.8735, "step": 54790 }, { "epoch": 2.69, "grad_norm": 0.7607088088989258, "learning_rate": 1.615656826692564e-05, "loss": 2.9559, "step": 54791 }, { "epoch": 2.69, "grad_norm": 0.7394618391990662, "learning_rate": 1.6151584219020585e-05, "loss": 2.9037, "step": 54792 }, { "epoch": 2.69, "grad_norm": 0.7496686577796936, "learning_rate": 1.6146600918717676e-05, "loss": 2.8412, "step": 54793 }, { "epoch": 2.69, "grad_norm": 0.7344440817832947, "learning_rate": 1.6141618366029974e-05, "loss": 2.8324, "step": 54794 }, { "epoch": 2.69, "grad_norm": 0.7307662963867188, "learning_rate": 1.6136636560970628e-05, "loss": 2.7831, "step": 54795 }, { "epoch": 2.69, "grad_norm": 0.7606189250946045, "learning_rate": 1.61316555035527e-05, "loss": 2.9232, "step": 54796 }, { "epoch": 2.69, "grad_norm": 0.7663336992263794, "learning_rate": 1.6126675193789406e-05, "loss": 2.7469, "step": 54797 }, { "epoch": 2.69, "grad_norm": 0.7470273375511169, "learning_rate": 1.6121695631693843e-05, "loss": 2.819, "step": 54798 }, { "epoch": 2.69, "grad_norm": 0.7757609486579895, "learning_rate": 1.611671681727903e-05, "loss": 2.9269, "step": 54799 }, { "epoch": 2.69, "grad_norm": 0.6888236999511719, "learning_rate": 1.6111738750558223e-05, "loss": 2.8834, "step": 54800 }, { "epoch": 2.69, "grad_norm": 0.7634980082511902, "learning_rate": 1.610676143154438e-05, "loss": 3.0092, "step": 54801 }, { "epoch": 2.69, "grad_norm": 0.7457658648490906, "learning_rate": 1.6101784860250722e-05, "loss": 2.8261, "step": 54802 }, { "epoch": 2.69, "grad_norm": 0.7501567602157593, "learning_rate": 1.6096809036690373e-05, "loss": 2.6977, "step": 54803 }, { "epoch": 2.69, "grad_norm": 0.771567165851593, "learning_rate": 1.6091833960876355e-05, "loss": 3.0102, "step": 54804 }, { "epoch": 2.69, "grad_norm": 0.7606413960456848, "learning_rate": 1.6086859632821826e-05, "loss": 3.0169, "step": 54805 }, { "epoch": 2.69, "grad_norm": 0.7576195001602173, "learning_rate": 1.608188605253984e-05, "loss": 2.7713, "step": 54806 }, { "epoch": 2.69, "grad_norm": 0.7587432861328125, "learning_rate": 1.6076913220043487e-05, "loss": 2.7597, "step": 54807 }, { "epoch": 2.69, "grad_norm": 0.7228325605392456, "learning_rate": 1.607194113534599e-05, "loss": 3.0178, "step": 54808 }, { "epoch": 2.69, "grad_norm": 0.7396401166915894, "learning_rate": 1.6066969798460306e-05, "loss": 2.9906, "step": 54809 }, { "epoch": 2.69, "grad_norm": 0.744340181350708, "learning_rate": 1.606199920939959e-05, "loss": 2.8594, "step": 54810 }, { "epoch": 2.69, "grad_norm": 0.7660406827926636, "learning_rate": 1.6057029368176933e-05, "loss": 2.9305, "step": 54811 }, { "epoch": 2.69, "grad_norm": 0.7232922911643982, "learning_rate": 1.6052060274805388e-05, "loss": 2.6945, "step": 54812 }, { "epoch": 2.69, "grad_norm": 0.7410985827445984, "learning_rate": 1.6047091929298118e-05, "loss": 2.826, "step": 54813 }, { "epoch": 2.69, "grad_norm": 0.7533641457557678, "learning_rate": 1.6042124331668106e-05, "loss": 2.9354, "step": 54814 }, { "epoch": 2.69, "grad_norm": 0.8169004321098328, "learning_rate": 1.6037157481928542e-05, "loss": 2.8995, "step": 54815 }, { "epoch": 2.69, "grad_norm": 0.7725902199745178, "learning_rate": 1.6032191380092385e-05, "loss": 2.8986, "step": 54816 }, { "epoch": 2.69, "grad_norm": 0.7658239603042603, "learning_rate": 1.6027226026172822e-05, "loss": 2.7877, "step": 54817 }, { "epoch": 2.69, "grad_norm": 0.7260845899581909, "learning_rate": 1.6022261420182915e-05, "loss": 2.7825, "step": 54818 }, { "epoch": 2.69, "grad_norm": 0.7706580758094788, "learning_rate": 1.601729756213571e-05, "loss": 2.6989, "step": 54819 }, { "epoch": 2.69, "grad_norm": 0.7238854169845581, "learning_rate": 1.6012334452044305e-05, "loss": 2.9405, "step": 54820 }, { "epoch": 2.69, "grad_norm": 0.8242838382720947, "learning_rate": 1.600737208992172e-05, "loss": 2.927, "step": 54821 }, { "epoch": 2.69, "grad_norm": 0.7421150803565979, "learning_rate": 1.6002410475781048e-05, "loss": 2.8967, "step": 54822 }, { "epoch": 2.69, "grad_norm": 0.7626473903656006, "learning_rate": 1.5997449609635404e-05, "loss": 2.8926, "step": 54823 }, { "epoch": 2.69, "grad_norm": 0.7503937482833862, "learning_rate": 1.5992489491497783e-05, "loss": 2.6612, "step": 54824 }, { "epoch": 2.69, "grad_norm": 0.8117401599884033, "learning_rate": 1.5987530121381344e-05, "loss": 2.6266, "step": 54825 }, { "epoch": 2.69, "grad_norm": 0.7335777282714844, "learning_rate": 1.59825714992991e-05, "loss": 2.9657, "step": 54826 }, { "epoch": 2.69, "grad_norm": 0.8089480400085449, "learning_rate": 1.5977613625264018e-05, "loss": 2.5981, "step": 54827 }, { "epoch": 2.69, "grad_norm": 0.7682913541793823, "learning_rate": 1.5972656499289317e-05, "loss": 2.8463, "step": 54828 }, { "epoch": 2.69, "grad_norm": 0.7823068499565125, "learning_rate": 1.596770012138795e-05, "loss": 3.0189, "step": 54829 }, { "epoch": 2.69, "grad_norm": 0.7197554707527161, "learning_rate": 1.596274449157301e-05, "loss": 2.952, "step": 54830 }, { "epoch": 2.69, "grad_norm": 0.8423383831977844, "learning_rate": 1.5957789609857486e-05, "loss": 2.9469, "step": 54831 }, { "epoch": 2.69, "grad_norm": 0.742664635181427, "learning_rate": 1.5952835476254534e-05, "loss": 2.9605, "step": 54832 }, { "epoch": 2.69, "grad_norm": 0.7450295090675354, "learning_rate": 1.5947882090777142e-05, "loss": 3.0918, "step": 54833 }, { "epoch": 2.69, "grad_norm": 0.7666531801223755, "learning_rate": 1.5942929453438335e-05, "loss": 2.7747, "step": 54834 }, { "epoch": 2.69, "grad_norm": 0.7433580756187439, "learning_rate": 1.5937977564251238e-05, "loss": 2.8439, "step": 54835 }, { "epoch": 2.69, "grad_norm": 0.7616721391677856, "learning_rate": 1.5933026423228766e-05, "loss": 2.9655, "step": 54836 }, { "epoch": 2.69, "grad_norm": 0.7969316840171814, "learning_rate": 1.5928076030384053e-05, "loss": 2.7916, "step": 54837 }, { "epoch": 2.69, "grad_norm": 0.7451574206352234, "learning_rate": 1.5923126385730145e-05, "loss": 2.7925, "step": 54838 }, { "epoch": 2.69, "grad_norm": 0.7240291833877563, "learning_rate": 1.5918177489280003e-05, "loss": 3.0301, "step": 54839 }, { "epoch": 2.69, "grad_norm": 0.7478523254394531, "learning_rate": 1.5913229341046752e-05, "loss": 2.9261, "step": 54840 }, { "epoch": 2.69, "grad_norm": 0.715190589427948, "learning_rate": 1.590828194104341e-05, "loss": 2.8159, "step": 54841 }, { "epoch": 2.69, "grad_norm": 0.7498929500579834, "learning_rate": 1.5903335289282903e-05, "loss": 2.826, "step": 54842 }, { "epoch": 2.69, "grad_norm": 0.7427136301994324, "learning_rate": 1.5898389385778388e-05, "loss": 3.1507, "step": 54843 }, { "epoch": 2.69, "grad_norm": 0.7392913699150085, "learning_rate": 1.589344423054282e-05, "loss": 2.8675, "step": 54844 }, { "epoch": 2.69, "grad_norm": 0.7416170835494995, "learning_rate": 1.588849982358925e-05, "loss": 2.9029, "step": 54845 }, { "epoch": 2.69, "grad_norm": 0.767384946346283, "learning_rate": 1.588355616493068e-05, "loss": 2.8468, "step": 54846 }, { "epoch": 2.69, "grad_norm": 0.8128290176391602, "learning_rate": 1.5878613254580153e-05, "loss": 2.8221, "step": 54847 }, { "epoch": 2.69, "grad_norm": 0.7421417832374573, "learning_rate": 1.58736710925507e-05, "loss": 2.8124, "step": 54848 }, { "epoch": 2.69, "grad_norm": 0.7256699800491333, "learning_rate": 1.5868729678855276e-05, "loss": 2.8736, "step": 54849 }, { "epoch": 2.69, "grad_norm": 0.7774531841278076, "learning_rate": 1.5863789013506966e-05, "loss": 3.0403, "step": 54850 }, { "epoch": 2.69, "grad_norm": 0.7691476941108704, "learning_rate": 1.5858849096518732e-05, "loss": 2.8938, "step": 54851 }, { "epoch": 2.69, "grad_norm": 0.8459373712539673, "learning_rate": 1.5853909927903596e-05, "loss": 2.6245, "step": 54852 }, { "epoch": 2.69, "grad_norm": 0.7290185689926147, "learning_rate": 1.5848971507674612e-05, "loss": 2.8709, "step": 54853 }, { "epoch": 2.69, "grad_norm": 0.7624875903129578, "learning_rate": 1.584403383584474e-05, "loss": 2.9471, "step": 54854 }, { "epoch": 2.69, "grad_norm": 0.7604098320007324, "learning_rate": 1.583909691242703e-05, "loss": 2.7269, "step": 54855 }, { "epoch": 2.69, "grad_norm": 0.7130759954452515, "learning_rate": 1.5834160737434377e-05, "loss": 2.8143, "step": 54856 }, { "epoch": 2.69, "grad_norm": 0.7458640933036804, "learning_rate": 1.5829225310879868e-05, "loss": 2.9933, "step": 54857 }, { "epoch": 2.69, "grad_norm": 0.7971216440200806, "learning_rate": 1.5824290632776525e-05, "loss": 2.9175, "step": 54858 }, { "epoch": 2.69, "grad_norm": 0.743493378162384, "learning_rate": 1.5819356703137275e-05, "loss": 2.7029, "step": 54859 }, { "epoch": 2.69, "grad_norm": 0.7640153765678406, "learning_rate": 1.5814423521975172e-05, "loss": 2.829, "step": 54860 }, { "epoch": 2.69, "grad_norm": 0.7117670774459839, "learning_rate": 1.5809491089303172e-05, "loss": 2.9363, "step": 54861 }, { "epoch": 2.69, "grad_norm": 0.7635127305984497, "learning_rate": 1.58045594051343e-05, "loss": 2.9184, "step": 54862 }, { "epoch": 2.69, "grad_norm": 0.7638167142868042, "learning_rate": 1.579962846948154e-05, "loss": 2.9043, "step": 54863 }, { "epoch": 2.69, "grad_norm": 0.7500227093696594, "learning_rate": 1.579469828235782e-05, "loss": 2.999, "step": 54864 }, { "epoch": 2.69, "grad_norm": 0.75905442237854, "learning_rate": 1.5789768843776195e-05, "loss": 2.7478, "step": 54865 }, { "epoch": 2.69, "grad_norm": 0.7460845112800598, "learning_rate": 1.578484015374959e-05, "loss": 2.9451, "step": 54866 }, { "epoch": 2.69, "grad_norm": 0.7751177549362183, "learning_rate": 1.577991221229106e-05, "loss": 2.8939, "step": 54867 }, { "epoch": 2.69, "grad_norm": 0.7179179191589355, "learning_rate": 1.577498501941349e-05, "loss": 2.9286, "step": 54868 }, { "epoch": 2.69, "grad_norm": 0.7725952863693237, "learning_rate": 1.5770058575129972e-05, "loss": 2.89, "step": 54869 }, { "epoch": 2.69, "grad_norm": 0.7968480587005615, "learning_rate": 1.57651328794534e-05, "loss": 2.9152, "step": 54870 }, { "epoch": 2.69, "grad_norm": 0.7715826630592346, "learning_rate": 1.576020793239673e-05, "loss": 2.9582, "step": 54871 }, { "epoch": 2.69, "grad_norm": 0.7265507578849792, "learning_rate": 1.575528373397301e-05, "loss": 2.8741, "step": 54872 }, { "epoch": 2.69, "grad_norm": 0.7969720363616943, "learning_rate": 1.5750360284195107e-05, "loss": 3.063, "step": 54873 }, { "epoch": 2.69, "grad_norm": 0.7510287761688232, "learning_rate": 1.574543758307607e-05, "loss": 2.8564, "step": 54874 }, { "epoch": 2.69, "grad_norm": 0.7693957090377808, "learning_rate": 1.574051563062889e-05, "loss": 2.9187, "step": 54875 }, { "epoch": 2.69, "grad_norm": 0.8029640316963196, "learning_rate": 1.573559442686646e-05, "loss": 2.7687, "step": 54876 }, { "epoch": 2.69, "grad_norm": 0.7065871953964233, "learning_rate": 1.5730673971801833e-05, "loss": 2.8054, "step": 54877 }, { "epoch": 2.69, "grad_norm": 0.7508147358894348, "learning_rate": 1.5725754265447798e-05, "loss": 3.1097, "step": 54878 }, { "epoch": 2.69, "grad_norm": 0.7067500352859497, "learning_rate": 1.5720835307817448e-05, "loss": 3.004, "step": 54879 }, { "epoch": 2.69, "grad_norm": 0.7348147034645081, "learning_rate": 1.5715917098923737e-05, "loss": 3.0635, "step": 54880 }, { "epoch": 2.69, "grad_norm": 0.7430323362350464, "learning_rate": 1.571099963877952e-05, "loss": 3.0832, "step": 54881 }, { "epoch": 2.69, "grad_norm": 0.7505277395248413, "learning_rate": 1.570608292739789e-05, "loss": 3.0346, "step": 54882 }, { "epoch": 2.69, "grad_norm": 0.7366390824317932, "learning_rate": 1.5701166964791667e-05, "loss": 3.0246, "step": 54883 }, { "epoch": 2.69, "grad_norm": 0.7447764277458191, "learning_rate": 1.569625175097391e-05, "loss": 2.9516, "step": 54884 }, { "epoch": 2.69, "grad_norm": 0.7516607046127319, "learning_rate": 1.5691337285957504e-05, "loss": 2.9391, "step": 54885 }, { "epoch": 2.69, "grad_norm": 0.7200809121131897, "learning_rate": 1.568642356975538e-05, "loss": 3.0, "step": 54886 }, { "epoch": 2.69, "grad_norm": 0.7547498941421509, "learning_rate": 1.568151060238052e-05, "loss": 2.9319, "step": 54887 }, { "epoch": 2.69, "grad_norm": 0.7287806868553162, "learning_rate": 1.5676598383845817e-05, "loss": 2.8652, "step": 54888 }, { "epoch": 2.69, "grad_norm": 0.756786048412323, "learning_rate": 1.5671686914164194e-05, "loss": 3.0127, "step": 54889 }, { "epoch": 2.69, "grad_norm": 0.7761844992637634, "learning_rate": 1.5666776193348706e-05, "loss": 2.817, "step": 54890 }, { "epoch": 2.69, "grad_norm": 0.7763392925262451, "learning_rate": 1.566186622141221e-05, "loss": 2.9292, "step": 54891 }, { "epoch": 2.69, "grad_norm": 0.7687212824821472, "learning_rate": 1.5656956998367596e-05, "loss": 3.0882, "step": 54892 }, { "epoch": 2.69, "grad_norm": 0.7391537427902222, "learning_rate": 1.5652048524227857e-05, "loss": 3.0195, "step": 54893 }, { "epoch": 2.69, "grad_norm": 0.7586456537246704, "learning_rate": 1.564714079900584e-05, "loss": 2.8656, "step": 54894 }, { "epoch": 2.69, "grad_norm": 0.7237671613693237, "learning_rate": 1.5642233822714612e-05, "loss": 3.0369, "step": 54895 }, { "epoch": 2.69, "grad_norm": 0.7231841683387756, "learning_rate": 1.5637327595366954e-05, "loss": 2.9005, "step": 54896 }, { "epoch": 2.69, "grad_norm": 0.7463599443435669, "learning_rate": 1.5632422116975863e-05, "loss": 2.8008, "step": 54897 }, { "epoch": 2.69, "grad_norm": 0.7573348879814148, "learning_rate": 1.5627517387554256e-05, "loss": 2.8954, "step": 54898 }, { "epoch": 2.69, "grad_norm": 0.7449231147766113, "learning_rate": 1.562261340711506e-05, "loss": 2.8526, "step": 54899 }, { "epoch": 2.69, "grad_norm": 0.7895654439926147, "learning_rate": 1.5617710175671162e-05, "loss": 2.8813, "step": 54900 }, { "epoch": 2.69, "grad_norm": 0.7453767657279968, "learning_rate": 1.561280769323545e-05, "loss": 2.8936, "step": 54901 }, { "epoch": 2.69, "grad_norm": 0.7513246536254883, "learning_rate": 1.5607905959820887e-05, "loss": 2.9529, "step": 54902 }, { "epoch": 2.69, "grad_norm": 0.752334475517273, "learning_rate": 1.5603004975440357e-05, "loss": 2.9262, "step": 54903 }, { "epoch": 2.69, "grad_norm": 0.7545332312583923, "learning_rate": 1.5598104740106788e-05, "loss": 2.6894, "step": 54904 }, { "epoch": 2.69, "grad_norm": 0.7315773963928223, "learning_rate": 1.5593205253833096e-05, "loss": 3.0838, "step": 54905 }, { "epoch": 2.69, "grad_norm": 0.7586761713027954, "learning_rate": 1.5588306516632175e-05, "loss": 2.8601, "step": 54906 }, { "epoch": 2.69, "grad_norm": 0.7025976181030273, "learning_rate": 1.5583408528516916e-05, "loss": 2.8994, "step": 54907 }, { "epoch": 2.69, "grad_norm": 0.7706955075263977, "learning_rate": 1.5578511289500174e-05, "loss": 2.8067, "step": 54908 }, { "epoch": 2.69, "grad_norm": 0.7359408736228943, "learning_rate": 1.5573614799594902e-05, "loss": 2.7985, "step": 54909 }, { "epoch": 2.69, "grad_norm": 0.7297497391700745, "learning_rate": 1.556871905881403e-05, "loss": 2.9224, "step": 54910 }, { "epoch": 2.69, "grad_norm": 0.7188913822174072, "learning_rate": 1.556382406717037e-05, "loss": 2.9255, "step": 54911 }, { "epoch": 2.69, "grad_norm": 0.7384754419326782, "learning_rate": 1.5558929824676892e-05, "loss": 2.882, "step": 54912 }, { "epoch": 2.69, "grad_norm": 0.7281101942062378, "learning_rate": 1.5554036331346444e-05, "loss": 3.1627, "step": 54913 }, { "epoch": 2.69, "grad_norm": 0.7926753163337708, "learning_rate": 1.5549143587191914e-05, "loss": 3.0256, "step": 54914 }, { "epoch": 2.69, "grad_norm": 0.7407711148262024, "learning_rate": 1.5544251592226198e-05, "loss": 3.0445, "step": 54915 }, { "epoch": 2.69, "grad_norm": 0.7497155666351318, "learning_rate": 1.553936034646218e-05, "loss": 3.0041, "step": 54916 }, { "epoch": 2.69, "grad_norm": 0.7246167063713074, "learning_rate": 1.5534469849912755e-05, "loss": 2.8765, "step": 54917 }, { "epoch": 2.69, "grad_norm": 0.7616339325904846, "learning_rate": 1.5529580102590743e-05, "loss": 2.975, "step": 54918 }, { "epoch": 2.69, "grad_norm": 0.7942472696304321, "learning_rate": 1.552469110450907e-05, "loss": 2.9639, "step": 54919 }, { "epoch": 2.69, "grad_norm": 0.7623060345649719, "learning_rate": 1.5519802855680685e-05, "loss": 2.798, "step": 54920 }, { "epoch": 2.69, "grad_norm": 0.7489006519317627, "learning_rate": 1.551491535611835e-05, "loss": 2.7875, "step": 54921 }, { "epoch": 2.69, "grad_norm": 0.7411266565322876, "learning_rate": 1.5510028605834988e-05, "loss": 2.9106, "step": 54922 }, { "epoch": 2.69, "grad_norm": 0.7480968832969666, "learning_rate": 1.5505142604843422e-05, "loss": 2.9138, "step": 54923 }, { "epoch": 2.69, "grad_norm": 0.7965890765190125, "learning_rate": 1.5500257353156574e-05, "loss": 2.8865, "step": 54924 }, { "epoch": 2.69, "grad_norm": 0.7516406774520874, "learning_rate": 1.5495372850787337e-05, "loss": 2.8225, "step": 54925 }, { "epoch": 2.69, "grad_norm": 0.8011462092399597, "learning_rate": 1.549048909774846e-05, "loss": 2.7632, "step": 54926 }, { "epoch": 2.69, "grad_norm": 0.7487853169441223, "learning_rate": 1.548560609405294e-05, "loss": 2.9072, "step": 54927 }, { "epoch": 2.69, "grad_norm": 0.7621021866798401, "learning_rate": 1.5480723839713592e-05, "loss": 2.98, "step": 54928 }, { "epoch": 2.69, "grad_norm": 0.7990534901618958, "learning_rate": 1.5475842334743216e-05, "loss": 2.9085, "step": 54929 }, { "epoch": 2.69, "grad_norm": 0.7323390245437622, "learning_rate": 1.5470961579154727e-05, "loss": 2.9067, "step": 54930 }, { "epoch": 2.69, "grad_norm": 0.7541958093643188, "learning_rate": 1.5466081572960952e-05, "loss": 2.6722, "step": 54931 }, { "epoch": 2.69, "grad_norm": 0.7375277876853943, "learning_rate": 1.5461202316174783e-05, "loss": 3.0888, "step": 54932 }, { "epoch": 2.69, "grad_norm": 0.7689357399940491, "learning_rate": 1.5456323808809033e-05, "loss": 2.816, "step": 54933 }, { "epoch": 2.69, "grad_norm": 0.7837892174720764, "learning_rate": 1.545144605087657e-05, "loss": 2.7925, "step": 54934 }, { "epoch": 2.69, "grad_norm": 0.7377240061759949, "learning_rate": 1.5446569042390246e-05, "loss": 2.8422, "step": 54935 }, { "epoch": 2.69, "grad_norm": 0.716027021408081, "learning_rate": 1.544169278336288e-05, "loss": 2.9464, "step": 54936 }, { "epoch": 2.69, "grad_norm": 0.7693297266960144, "learning_rate": 1.5436817273807335e-05, "loss": 2.8894, "step": 54937 }, { "epoch": 2.69, "grad_norm": 0.766839861869812, "learning_rate": 1.5431942513736428e-05, "loss": 2.981, "step": 54938 }, { "epoch": 2.69, "grad_norm": 0.7598301768302917, "learning_rate": 1.5427068503163052e-05, "loss": 3.0225, "step": 54939 }, { "epoch": 2.69, "grad_norm": 0.7385877966880798, "learning_rate": 1.5422195242099966e-05, "loss": 2.9501, "step": 54940 }, { "epoch": 2.69, "grad_norm": 0.7691483497619629, "learning_rate": 1.5417322730560055e-05, "loss": 2.7536, "step": 54941 }, { "epoch": 2.69, "grad_norm": 0.7527814507484436, "learning_rate": 1.541245096855621e-05, "loss": 2.7586, "step": 54942 }, { "epoch": 2.69, "grad_norm": 0.7725242376327515, "learning_rate": 1.5407579956101155e-05, "loss": 2.7225, "step": 54943 }, { "epoch": 2.69, "grad_norm": 0.7793623208999634, "learning_rate": 1.5402709693207816e-05, "loss": 2.852, "step": 54944 }, { "epoch": 2.69, "grad_norm": 0.7739753127098083, "learning_rate": 1.5397840179888876e-05, "loss": 2.8733, "step": 54945 }, { "epoch": 2.69, "grad_norm": 0.7654193043708801, "learning_rate": 1.5392971416157296e-05, "loss": 2.7769, "step": 54946 }, { "epoch": 2.69, "grad_norm": 0.7343190908432007, "learning_rate": 1.5388103402025863e-05, "loss": 3.0919, "step": 54947 }, { "epoch": 2.69, "grad_norm": 0.7587972283363342, "learning_rate": 1.538323613750737e-05, "loss": 3.0448, "step": 54948 }, { "epoch": 2.69, "grad_norm": 0.7687961459159851, "learning_rate": 1.537836962261467e-05, "loss": 2.8649, "step": 54949 }, { "epoch": 2.69, "grad_norm": 0.7500821948051453, "learning_rate": 1.5373503857360592e-05, "loss": 2.7358, "step": 54950 }, { "epoch": 2.69, "grad_norm": 0.7614321708679199, "learning_rate": 1.5368638841757885e-05, "loss": 3.0014, "step": 54951 }, { "epoch": 2.69, "grad_norm": 0.7479397058486938, "learning_rate": 1.5363774575819444e-05, "loss": 2.8803, "step": 54952 }, { "epoch": 2.69, "grad_norm": 0.7300918102264404, "learning_rate": 1.5358911059557986e-05, "loss": 2.969, "step": 54953 }, { "epoch": 2.69, "grad_norm": 0.7309346199035645, "learning_rate": 1.5354048292986444e-05, "loss": 2.9086, "step": 54954 }, { "epoch": 2.69, "grad_norm": 0.7472579479217529, "learning_rate": 1.5349186276117498e-05, "loss": 2.7324, "step": 54955 }, { "epoch": 2.69, "grad_norm": 0.7331668734550476, "learning_rate": 1.5344325008964075e-05, "loss": 2.9536, "step": 54956 }, { "epoch": 2.69, "grad_norm": 0.7437717318534851, "learning_rate": 1.53394644915389e-05, "loss": 2.7823, "step": 54957 }, { "epoch": 2.69, "grad_norm": 0.7608617544174194, "learning_rate": 1.5334604723854727e-05, "loss": 2.9898, "step": 54958 }, { "epoch": 2.69, "grad_norm": 0.8812674880027771, "learning_rate": 1.5329745705924512e-05, "loss": 2.9538, "step": 54959 }, { "epoch": 2.69, "grad_norm": 0.7427992224693298, "learning_rate": 1.532488743776088e-05, "loss": 2.6872, "step": 54960 }, { "epoch": 2.69, "grad_norm": 0.7801377773284912, "learning_rate": 1.532002991937672e-05, "loss": 2.7277, "step": 54961 }, { "epoch": 2.69, "grad_norm": 0.7416569590568542, "learning_rate": 1.5315173150784853e-05, "loss": 2.9374, "step": 54962 }, { "epoch": 2.69, "grad_norm": 0.7589424252510071, "learning_rate": 1.5310317131998007e-05, "loss": 2.9005, "step": 54963 }, { "epoch": 2.69, "grad_norm": 0.7642767429351807, "learning_rate": 1.5305461863029e-05, "loss": 2.8343, "step": 54964 }, { "epoch": 2.69, "grad_norm": 0.7222141027450562, "learning_rate": 1.530060734389066e-05, "loss": 2.8669, "step": 54965 }, { "epoch": 2.69, "grad_norm": 0.8169352412223816, "learning_rate": 1.529575357459567e-05, "loss": 2.7694, "step": 54966 }, { "epoch": 2.69, "grad_norm": 0.7054728269577026, "learning_rate": 1.5290900555156893e-05, "loss": 2.8031, "step": 54967 }, { "epoch": 2.69, "grad_norm": 0.7217891812324524, "learning_rate": 1.5286048285587082e-05, "loss": 2.7711, "step": 54968 }, { "epoch": 2.69, "grad_norm": 0.7295551300048828, "learning_rate": 1.528119676589903e-05, "loss": 2.9962, "step": 54969 }, { "epoch": 2.69, "grad_norm": 0.7057387232780457, "learning_rate": 1.527634599610549e-05, "loss": 3.0983, "step": 54970 }, { "epoch": 2.69, "grad_norm": 0.779369056224823, "learning_rate": 1.5271495976219316e-05, "loss": 2.9026, "step": 54971 }, { "epoch": 2.69, "grad_norm": 0.7789977788925171, "learning_rate": 1.5266646706253205e-05, "loss": 2.9408, "step": 54972 }, { "epoch": 2.69, "grad_norm": 0.7889942526817322, "learning_rate": 1.526179818621991e-05, "loss": 2.7538, "step": 54973 }, { "epoch": 2.69, "grad_norm": 0.7462673187255859, "learning_rate": 1.5256950416132286e-05, "loss": 2.8916, "step": 54974 }, { "epoch": 2.69, "grad_norm": 0.7497276067733765, "learning_rate": 1.5252103396003023e-05, "loss": 2.956, "step": 54975 }, { "epoch": 2.69, "grad_norm": 0.7257047891616821, "learning_rate": 1.524725712584488e-05, "loss": 2.831, "step": 54976 }, { "epoch": 2.69, "grad_norm": 0.7067046761512756, "learning_rate": 1.5242411605670746e-05, "loss": 2.7964, "step": 54977 }, { "epoch": 2.69, "grad_norm": 0.7375363111495972, "learning_rate": 1.5237566835493275e-05, "loss": 2.8754, "step": 54978 }, { "epoch": 2.69, "grad_norm": 0.7760704159736633, "learning_rate": 1.523272281532526e-05, "loss": 2.8453, "step": 54979 }, { "epoch": 2.69, "grad_norm": 0.772099494934082, "learning_rate": 1.5227879545179388e-05, "loss": 2.8503, "step": 54980 }, { "epoch": 2.69, "grad_norm": 0.7476798295974731, "learning_rate": 1.5223037025068485e-05, "loss": 2.9263, "step": 54981 }, { "epoch": 2.69, "grad_norm": 0.7684528827667236, "learning_rate": 1.521819525500534e-05, "loss": 2.8822, "step": 54982 }, { "epoch": 2.69, "grad_norm": 0.7638944387435913, "learning_rate": 1.5213354235002606e-05, "loss": 2.865, "step": 54983 }, { "epoch": 2.69, "grad_norm": 0.7224180102348328, "learning_rate": 1.5208513965073144e-05, "loss": 2.8717, "step": 54984 }, { "epoch": 2.69, "grad_norm": 0.8036991953849792, "learning_rate": 1.5203674445229608e-05, "loss": 2.8207, "step": 54985 }, { "epoch": 2.69, "grad_norm": 0.701972246170044, "learning_rate": 1.5198835675484821e-05, "loss": 2.6852, "step": 54986 }, { "epoch": 2.69, "grad_norm": 0.7487140893936157, "learning_rate": 1.5193997655851508e-05, "loss": 2.9361, "step": 54987 }, { "epoch": 2.69, "grad_norm": 0.7961825132369995, "learning_rate": 1.5189160386342324e-05, "loss": 2.8853, "step": 54988 }, { "epoch": 2.69, "grad_norm": 0.7591695785522461, "learning_rate": 1.5184323866970127e-05, "loss": 2.8568, "step": 54989 }, { "epoch": 2.69, "grad_norm": 0.8069265484809875, "learning_rate": 1.5179488097747571e-05, "loss": 2.7135, "step": 54990 }, { "epoch": 2.69, "grad_norm": 0.729277491569519, "learning_rate": 1.5174653078687449e-05, "loss": 2.9049, "step": 54991 }, { "epoch": 2.7, "grad_norm": 0.7785135507583618, "learning_rate": 1.516981880980248e-05, "loss": 2.9743, "step": 54992 }, { "epoch": 2.7, "grad_norm": 0.7566962242126465, "learning_rate": 1.5164985291105392e-05, "loss": 2.7836, "step": 54993 }, { "epoch": 2.7, "grad_norm": 0.7411288022994995, "learning_rate": 1.5160152522608937e-05, "loss": 2.9298, "step": 54994 }, { "epoch": 2.7, "grad_norm": 0.7546302676200867, "learning_rate": 1.5155320504325773e-05, "loss": 3.0254, "step": 54995 }, { "epoch": 2.7, "grad_norm": 0.7514198422431946, "learning_rate": 1.5150489236268692e-05, "loss": 2.8508, "step": 54996 }, { "epoch": 2.7, "grad_norm": 0.7835859060287476, "learning_rate": 1.5145658718450415e-05, "loss": 2.929, "step": 54997 }, { "epoch": 2.7, "grad_norm": 0.7201144695281982, "learning_rate": 1.5140828950883633e-05, "loss": 2.9676, "step": 54998 }, { "epoch": 2.7, "grad_norm": 0.7485184073448181, "learning_rate": 1.51359999335811e-05, "loss": 3.0914, "step": 54999 }, { "epoch": 2.7, "grad_norm": 0.7460274696350098, "learning_rate": 1.5131171666555507e-05, "loss": 2.8109, "step": 55000 }, { "epoch": 2.7, "grad_norm": 0.7447788715362549, "learning_rate": 1.5126344149819613e-05, "loss": 2.808, "step": 55001 }, { "epoch": 2.7, "grad_norm": 0.7432894706726074, "learning_rate": 1.5121517383386073e-05, "loss": 2.8884, "step": 55002 }, { "epoch": 2.7, "grad_norm": 0.7191660404205322, "learning_rate": 1.5116691367267642e-05, "loss": 2.7407, "step": 55003 }, { "epoch": 2.7, "grad_norm": 0.7479361295700073, "learning_rate": 1.511186610147701e-05, "loss": 2.7467, "step": 55004 }, { "epoch": 2.7, "grad_norm": 0.7603451609611511, "learning_rate": 1.5107041586026902e-05, "loss": 3.1482, "step": 55005 }, { "epoch": 2.7, "grad_norm": 0.7904460430145264, "learning_rate": 1.5102217820929974e-05, "loss": 2.9543, "step": 55006 }, { "epoch": 2.7, "grad_norm": 0.7052236795425415, "learning_rate": 1.5097394806199048e-05, "loss": 3.0005, "step": 55007 }, { "epoch": 2.7, "grad_norm": 0.824068546295166, "learning_rate": 1.5092572541846748e-05, "loss": 2.8321, "step": 55008 }, { "epoch": 2.7, "grad_norm": 0.7605258822441101, "learning_rate": 1.5087751027885764e-05, "loss": 2.7439, "step": 55009 }, { "epoch": 2.7, "grad_norm": 0.7721036076545715, "learning_rate": 1.5082930264328786e-05, "loss": 2.9184, "step": 55010 }, { "epoch": 2.7, "grad_norm": 0.7376681566238403, "learning_rate": 1.5078110251188535e-05, "loss": 2.7824, "step": 55011 }, { "epoch": 2.7, "grad_norm": 0.7896484732627869, "learning_rate": 1.5073290988477737e-05, "loss": 2.9285, "step": 55012 }, { "epoch": 2.7, "grad_norm": 0.7744725346565247, "learning_rate": 1.5068472476209048e-05, "loss": 2.919, "step": 55013 }, { "epoch": 2.7, "grad_norm": 0.7636879086494446, "learning_rate": 1.5063654714395157e-05, "loss": 2.7188, "step": 55014 }, { "epoch": 2.7, "grad_norm": 0.7519050240516663, "learning_rate": 1.5058837703048788e-05, "loss": 2.9375, "step": 55015 }, { "epoch": 2.7, "grad_norm": 0.7617154121398926, "learning_rate": 1.505402144218263e-05, "loss": 2.8453, "step": 55016 }, { "epoch": 2.7, "grad_norm": 0.7616671919822693, "learning_rate": 1.5049205931809238e-05, "loss": 2.9701, "step": 55017 }, { "epoch": 2.7, "grad_norm": 0.7466253042221069, "learning_rate": 1.5044391171941439e-05, "loss": 2.8328, "step": 55018 }, { "epoch": 2.7, "grad_norm": 0.7612420320510864, "learning_rate": 1.5039577162591921e-05, "loss": 2.83, "step": 55019 }, { "epoch": 2.7, "grad_norm": 0.7458781003952026, "learning_rate": 1.5034763903773239e-05, "loss": 2.8543, "step": 55020 }, { "epoch": 2.7, "grad_norm": 0.7177784442901611, "learning_rate": 1.5029951395498219e-05, "loss": 2.9089, "step": 55021 }, { "epoch": 2.7, "grad_norm": 0.7244158983230591, "learning_rate": 1.5025139637779415e-05, "loss": 2.7326, "step": 55022 }, { "epoch": 2.7, "grad_norm": 0.7530938386917114, "learning_rate": 1.5020328630629586e-05, "loss": 2.9726, "step": 55023 }, { "epoch": 2.7, "grad_norm": 0.7853320240974426, "learning_rate": 1.5015518374061352e-05, "loss": 2.8278, "step": 55024 }, { "epoch": 2.7, "grad_norm": 0.7918479442596436, "learning_rate": 1.5010708868087373e-05, "loss": 2.889, "step": 55025 }, { "epoch": 2.7, "grad_norm": 0.748648464679718, "learning_rate": 1.5005900112720371e-05, "loss": 3.0377, "step": 55026 }, { "epoch": 2.7, "grad_norm": 0.755605161190033, "learning_rate": 1.5001092107972968e-05, "loss": 2.9121, "step": 55027 }, { "epoch": 2.7, "grad_norm": 0.7698187828063965, "learning_rate": 1.4996284853857787e-05, "loss": 2.7703, "step": 55028 }, { "epoch": 2.7, "grad_norm": 0.683066189289093, "learning_rate": 1.499147835038762e-05, "loss": 2.8249, "step": 55029 }, { "epoch": 2.7, "grad_norm": 0.7477896809577942, "learning_rate": 1.4986672597575056e-05, "loss": 2.9249, "step": 55030 }, { "epoch": 2.7, "grad_norm": 0.7523871064186096, "learning_rate": 1.4981867595432718e-05, "loss": 2.9331, "step": 55031 }, { "epoch": 2.7, "grad_norm": 0.7780340909957886, "learning_rate": 1.4977063343973261e-05, "loss": 2.859, "step": 55032 }, { "epoch": 2.7, "grad_norm": 0.7800566554069519, "learning_rate": 1.4972259843209344e-05, "loss": 2.6747, "step": 55033 }, { "epoch": 2.7, "grad_norm": 0.7878885269165039, "learning_rate": 1.496745709315369e-05, "loss": 3.0291, "step": 55034 }, { "epoch": 2.7, "grad_norm": 0.7875646352767944, "learning_rate": 1.4962655093818887e-05, "loss": 2.8538, "step": 55035 }, { "epoch": 2.7, "grad_norm": 0.7212032079696655, "learning_rate": 1.4957853845217594e-05, "loss": 2.7787, "step": 55036 }, { "epoch": 2.7, "grad_norm": 0.7758206725120544, "learning_rate": 1.4953053347362498e-05, "loss": 2.8677, "step": 55037 }, { "epoch": 2.7, "grad_norm": 0.7322551012039185, "learning_rate": 1.4948253600266124e-05, "loss": 3.0312, "step": 55038 }, { "epoch": 2.7, "grad_norm": 0.7753440141677856, "learning_rate": 1.494345460394123e-05, "loss": 2.9871, "step": 55039 }, { "epoch": 2.7, "grad_norm": 0.7763801217079163, "learning_rate": 1.4938656358400402e-05, "loss": 2.8968, "step": 55040 }, { "epoch": 2.7, "grad_norm": 0.7767189741134644, "learning_rate": 1.4933858863656334e-05, "loss": 2.7693, "step": 55041 }, { "epoch": 2.7, "grad_norm": 0.749442458152771, "learning_rate": 1.4929062119721546e-05, "loss": 2.8442, "step": 55042 }, { "epoch": 2.7, "grad_norm": 0.7398672103881836, "learning_rate": 1.4924266126608763e-05, "loss": 2.7577, "step": 55043 }, { "epoch": 2.7, "grad_norm": 0.7189925909042358, "learning_rate": 1.4919470884330642e-05, "loss": 3.0464, "step": 55044 }, { "epoch": 2.7, "grad_norm": 0.758366584777832, "learning_rate": 1.4914676392899772e-05, "loss": 3.0093, "step": 55045 }, { "epoch": 2.7, "grad_norm": 0.7701453566551208, "learning_rate": 1.4909882652328775e-05, "loss": 3.0331, "step": 55046 }, { "epoch": 2.7, "grad_norm": 0.7704036831855774, "learning_rate": 1.4905089662630243e-05, "loss": 2.8136, "step": 55047 }, { "epoch": 2.7, "grad_norm": 0.781955897808075, "learning_rate": 1.4900297423816832e-05, "loss": 3.0821, "step": 55048 }, { "epoch": 2.7, "grad_norm": 0.7298367619514465, "learning_rate": 1.489550593590123e-05, "loss": 3.0272, "step": 55049 }, { "epoch": 2.7, "grad_norm": 0.7215245366096497, "learning_rate": 1.489071519889593e-05, "loss": 2.8109, "step": 55050 }, { "epoch": 2.7, "grad_norm": 0.7225742340087891, "learning_rate": 1.4885925212813688e-05, "loss": 2.7193, "step": 55051 }, { "epoch": 2.7, "grad_norm": 0.7542920708656311, "learning_rate": 1.4881135977667025e-05, "loss": 2.8853, "step": 55052 }, { "epoch": 2.7, "grad_norm": 0.7337054014205933, "learning_rate": 1.4876347493468532e-05, "loss": 2.9647, "step": 55053 }, { "epoch": 2.7, "grad_norm": 0.7353600859642029, "learning_rate": 1.48715597602309e-05, "loss": 2.9208, "step": 55054 }, { "epoch": 2.7, "grad_norm": 0.7551136016845703, "learning_rate": 1.4866772777966685e-05, "loss": 2.9143, "step": 55055 }, { "epoch": 2.7, "grad_norm": 0.7284271717071533, "learning_rate": 1.4861986546688543e-05, "loss": 2.7553, "step": 55056 }, { "epoch": 2.7, "grad_norm": 0.769647479057312, "learning_rate": 1.485720106640903e-05, "loss": 2.7599, "step": 55057 }, { "epoch": 2.7, "grad_norm": 0.793817937374115, "learning_rate": 1.485241633714077e-05, "loss": 2.8884, "step": 55058 }, { "epoch": 2.7, "grad_norm": 0.7645768523216248, "learning_rate": 1.4847632358896422e-05, "loss": 3.0118, "step": 55059 }, { "epoch": 2.7, "grad_norm": 0.7738844156265259, "learning_rate": 1.4842849131688472e-05, "loss": 3.0922, "step": 55060 }, { "epoch": 2.7, "grad_norm": 0.7235080003738403, "learning_rate": 1.4838066655529613e-05, "loss": 2.8157, "step": 55061 }, { "epoch": 2.7, "grad_norm": 0.7338597774505615, "learning_rate": 1.4833284930432366e-05, "loss": 3.099, "step": 55062 }, { "epoch": 2.7, "grad_norm": 0.7195146679878235, "learning_rate": 1.4828503956409355e-05, "loss": 2.7882, "step": 55063 }, { "epoch": 2.7, "grad_norm": 0.7406694889068604, "learning_rate": 1.4823723733473237e-05, "loss": 2.6486, "step": 55064 }, { "epoch": 2.7, "grad_norm": 0.7771344184875488, "learning_rate": 1.48189442616365e-05, "loss": 3.0643, "step": 55065 }, { "epoch": 2.7, "grad_norm": 0.7858314514160156, "learning_rate": 1.4814165540911805e-05, "loss": 2.9493, "step": 55066 }, { "epoch": 2.7, "grad_norm": 0.8330277800559998, "learning_rate": 1.4809387571311738e-05, "loss": 2.9517, "step": 55067 }, { "epoch": 2.7, "grad_norm": 0.7209185361862183, "learning_rate": 1.4804610352848823e-05, "loss": 2.8796, "step": 55068 }, { "epoch": 2.7, "grad_norm": 0.7240145802497864, "learning_rate": 1.4799833885535684e-05, "loss": 2.7944, "step": 55069 }, { "epoch": 2.7, "grad_norm": 0.7134544253349304, "learning_rate": 1.4795058169384877e-05, "loss": 2.7025, "step": 55070 }, { "epoch": 2.7, "grad_norm": 0.7314766049385071, "learning_rate": 1.4790283204409059e-05, "loss": 2.7722, "step": 55071 }, { "epoch": 2.7, "grad_norm": 0.7572324275970459, "learning_rate": 1.4785508990620654e-05, "loss": 3.037, "step": 55072 }, { "epoch": 2.7, "grad_norm": 0.7572822570800781, "learning_rate": 1.4780735528032418e-05, "loss": 2.9088, "step": 55073 }, { "epoch": 2.7, "grad_norm": 0.7391690611839294, "learning_rate": 1.4775962816656806e-05, "loss": 2.8931, "step": 55074 }, { "epoch": 2.7, "grad_norm": 0.7284706830978394, "learning_rate": 1.4771190856506377e-05, "loss": 3.114, "step": 55075 }, { "epoch": 2.7, "grad_norm": 0.7849894165992737, "learning_rate": 1.4766419647593785e-05, "loss": 2.9319, "step": 55076 }, { "epoch": 2.7, "grad_norm": 0.7333627939224243, "learning_rate": 1.476164918993149e-05, "loss": 3.0572, "step": 55077 }, { "epoch": 2.7, "grad_norm": 0.7046851515769958, "learning_rate": 1.4756879483532146e-05, "loss": 2.9404, "step": 55078 }, { "epoch": 2.7, "grad_norm": 0.7099255919456482, "learning_rate": 1.4752110528408312e-05, "loss": 2.7765, "step": 55079 }, { "epoch": 2.7, "grad_norm": 0.7698432803153992, "learning_rate": 1.4747342324572509e-05, "loss": 2.8692, "step": 55080 }, { "epoch": 2.7, "grad_norm": 0.7886145114898682, "learning_rate": 1.4742574872037327e-05, "loss": 2.9755, "step": 55081 }, { "epoch": 2.7, "grad_norm": 0.7380675077438354, "learning_rate": 1.4737808170815258e-05, "loss": 2.6219, "step": 55082 }, { "epoch": 2.7, "grad_norm": 0.80084228515625, "learning_rate": 1.4733042220918923e-05, "loss": 2.9648, "step": 55083 }, { "epoch": 2.7, "grad_norm": 0.7491824626922607, "learning_rate": 1.4728277022360846e-05, "loss": 2.7953, "step": 55084 }, { "epoch": 2.7, "grad_norm": 0.7680736184120178, "learning_rate": 1.4723512575153584e-05, "loss": 2.9031, "step": 55085 }, { "epoch": 2.7, "grad_norm": 0.76555997133255, "learning_rate": 1.4718748879309727e-05, "loss": 2.863, "step": 55086 }, { "epoch": 2.7, "grad_norm": 0.7460500597953796, "learning_rate": 1.4713985934841731e-05, "loss": 2.8605, "step": 55087 }, { "epoch": 2.7, "grad_norm": 0.7447842359542847, "learning_rate": 1.4709223741762221e-05, "loss": 2.8689, "step": 55088 }, { "epoch": 2.7, "grad_norm": 0.7283835411071777, "learning_rate": 1.470446230008372e-05, "loss": 2.7695, "step": 55089 }, { "epoch": 2.7, "grad_norm": 0.775287926197052, "learning_rate": 1.4699701609818714e-05, "loss": 2.8405, "step": 55090 }, { "epoch": 2.7, "grad_norm": 0.7444217801094055, "learning_rate": 1.4694941670979832e-05, "loss": 2.9038, "step": 55091 }, { "epoch": 2.7, "grad_norm": 0.7645972371101379, "learning_rate": 1.4690182483579527e-05, "loss": 2.7941, "step": 55092 }, { "epoch": 2.7, "grad_norm": 0.7570917010307312, "learning_rate": 1.468542404763039e-05, "loss": 2.9379, "step": 55093 }, { "epoch": 2.7, "grad_norm": 0.7789246439933777, "learning_rate": 1.4680666363144944e-05, "loss": 3.1158, "step": 55094 }, { "epoch": 2.7, "grad_norm": 0.7554366588592529, "learning_rate": 1.4675909430135713e-05, "loss": 2.782, "step": 55095 }, { "epoch": 2.7, "grad_norm": 0.7414656281471252, "learning_rate": 1.4671153248615253e-05, "loss": 2.6158, "step": 55096 }, { "epoch": 2.7, "grad_norm": 0.7360018491744995, "learning_rate": 1.4666397818596021e-05, "loss": 2.9317, "step": 55097 }, { "epoch": 2.7, "grad_norm": 0.740975022315979, "learning_rate": 1.4661643140090606e-05, "loss": 2.8232, "step": 55098 }, { "epoch": 2.7, "grad_norm": 0.7482433319091797, "learning_rate": 1.4656889213111467e-05, "loss": 2.8816, "step": 55099 }, { "epoch": 2.7, "grad_norm": 0.7271791696548462, "learning_rate": 1.4652136037671158e-05, "loss": 2.9859, "step": 55100 }, { "epoch": 2.7, "grad_norm": 0.7431482076644897, "learning_rate": 1.464738361378227e-05, "loss": 2.9572, "step": 55101 }, { "epoch": 2.7, "grad_norm": 0.7354775071144104, "learning_rate": 1.4642631941457194e-05, "loss": 3.016, "step": 55102 }, { "epoch": 2.7, "grad_norm": 0.7678640484809875, "learning_rate": 1.463788102070862e-05, "loss": 2.709, "step": 55103 }, { "epoch": 2.7, "grad_norm": 0.7526733875274658, "learning_rate": 1.4633130851548835e-05, "loss": 2.9976, "step": 55104 }, { "epoch": 2.7, "grad_norm": 0.740717887878418, "learning_rate": 1.4628381433990466e-05, "loss": 2.8201, "step": 55105 }, { "epoch": 2.7, "grad_norm": 0.7562867999076843, "learning_rate": 1.4623632768046101e-05, "loss": 2.9115, "step": 55106 }, { "epoch": 2.7, "grad_norm": 0.7273349761962891, "learning_rate": 1.4618884853728063e-05, "loss": 2.9555, "step": 55107 }, { "epoch": 2.7, "grad_norm": 0.7348599433898926, "learning_rate": 1.4614137691049044e-05, "loss": 2.7613, "step": 55108 }, { "epoch": 2.7, "grad_norm": 0.7380170226097107, "learning_rate": 1.46093912800214e-05, "loss": 2.8046, "step": 55109 }, { "epoch": 2.7, "grad_norm": 0.7288892269134521, "learning_rate": 1.4604645620657751e-05, "loss": 2.7613, "step": 55110 }, { "epoch": 2.7, "grad_norm": 0.7518560290336609, "learning_rate": 1.4599900712970524e-05, "loss": 2.834, "step": 55111 }, { "epoch": 2.7, "grad_norm": 0.792517900466919, "learning_rate": 1.459515655697221e-05, "loss": 3.1014, "step": 55112 }, { "epoch": 2.7, "grad_norm": 0.7188156247138977, "learning_rate": 1.4590413152675362e-05, "loss": 3.0312, "step": 55113 }, { "epoch": 2.7, "grad_norm": 0.7061635255813599, "learning_rate": 1.4585670500092405e-05, "loss": 2.9461, "step": 55114 }, { "epoch": 2.7, "grad_norm": 0.7597672939300537, "learning_rate": 1.458092859923583e-05, "loss": 2.8916, "step": 55115 }, { "epoch": 2.7, "grad_norm": 0.7571054697036743, "learning_rate": 1.4576187450118227e-05, "loss": 3.2875, "step": 55116 }, { "epoch": 2.7, "grad_norm": 0.7508683204650879, "learning_rate": 1.4571447052752017e-05, "loss": 3.0029, "step": 55117 }, { "epoch": 2.7, "grad_norm": 0.7968552708625793, "learning_rate": 1.4566707407149691e-05, "loss": 2.9155, "step": 55118 }, { "epoch": 2.7, "grad_norm": 0.7436490654945374, "learning_rate": 1.4561968513323641e-05, "loss": 2.799, "step": 55119 }, { "epoch": 2.7, "grad_norm": 0.6988527178764343, "learning_rate": 1.4557230371286488e-05, "loss": 2.8147, "step": 55120 }, { "epoch": 2.7, "grad_norm": 0.7517215609550476, "learning_rate": 1.4552492981050656e-05, "loss": 2.9274, "step": 55121 }, { "epoch": 2.7, "grad_norm": 0.7683263421058655, "learning_rate": 1.4547756342628603e-05, "loss": 2.8543, "step": 55122 }, { "epoch": 2.7, "grad_norm": 0.7408250570297241, "learning_rate": 1.454302045603285e-05, "loss": 2.9393, "step": 55123 }, { "epoch": 2.7, "grad_norm": 0.737571656703949, "learning_rate": 1.4538285321275823e-05, "loss": 2.8324, "step": 55124 }, { "epoch": 2.7, "grad_norm": 0.7951434850692749, "learning_rate": 1.4533550938370008e-05, "loss": 2.7856, "step": 55125 }, { "epoch": 2.7, "grad_norm": 0.7539615631103516, "learning_rate": 1.4528817307327933e-05, "loss": 2.8728, "step": 55126 }, { "epoch": 2.7, "grad_norm": 0.7781716585159302, "learning_rate": 1.4524084428161953e-05, "loss": 2.78, "step": 55127 }, { "epoch": 2.7, "grad_norm": 0.7636751532554626, "learning_rate": 1.4519352300884623e-05, "loss": 2.9116, "step": 55128 }, { "epoch": 2.7, "grad_norm": 0.7897639274597168, "learning_rate": 1.451462092550837e-05, "loss": 2.8244, "step": 55129 }, { "epoch": 2.7, "grad_norm": 0.7303542494773865, "learning_rate": 1.4509890302045612e-05, "loss": 2.9801, "step": 55130 }, { "epoch": 2.7, "grad_norm": 0.7917243838310242, "learning_rate": 1.4505160430508944e-05, "loss": 2.9172, "step": 55131 }, { "epoch": 2.7, "grad_norm": 0.7893748879432678, "learning_rate": 1.450043131091072e-05, "loss": 3.0641, "step": 55132 }, { "epoch": 2.7, "grad_norm": 0.7223455309867859, "learning_rate": 1.4495702943263432e-05, "loss": 2.8755, "step": 55133 }, { "epoch": 2.7, "grad_norm": 0.7736308574676514, "learning_rate": 1.4490975327579436e-05, "loss": 3.098, "step": 55134 }, { "epoch": 2.7, "grad_norm": 0.7550448179244995, "learning_rate": 1.4486248463871287e-05, "loss": 2.8793, "step": 55135 }, { "epoch": 2.7, "grad_norm": 0.7876956462860107, "learning_rate": 1.4481522352151475e-05, "loss": 2.8375, "step": 55136 }, { "epoch": 2.7, "grad_norm": 0.7860339283943176, "learning_rate": 1.4476796992432327e-05, "loss": 3.0902, "step": 55137 }, { "epoch": 2.7, "grad_norm": 0.7922260761260986, "learning_rate": 1.4472072384726363e-05, "loss": 2.7761, "step": 55138 }, { "epoch": 2.7, "grad_norm": 0.7500098347663879, "learning_rate": 1.4467348529046041e-05, "loss": 2.9636, "step": 55139 }, { "epoch": 2.7, "grad_norm": 0.7248625159263611, "learning_rate": 1.4462625425403718e-05, "loss": 2.9241, "step": 55140 }, { "epoch": 2.7, "grad_norm": 0.7339884638786316, "learning_rate": 1.4457903073811916e-05, "loss": 2.9563, "step": 55141 }, { "epoch": 2.7, "grad_norm": 0.7312856316566467, "learning_rate": 1.4453181474283026e-05, "loss": 2.8138, "step": 55142 }, { "epoch": 2.7, "grad_norm": 0.7703109979629517, "learning_rate": 1.4448460626829505e-05, "loss": 2.9011, "step": 55143 }, { "epoch": 2.7, "grad_norm": 0.7444011569023132, "learning_rate": 1.4443740531463777e-05, "loss": 2.8922, "step": 55144 }, { "epoch": 2.7, "grad_norm": 0.7480001449584961, "learning_rate": 1.4439021188198264e-05, "loss": 2.7758, "step": 55145 }, { "epoch": 2.7, "grad_norm": 0.7840633988380432, "learning_rate": 1.4434302597045454e-05, "loss": 3.0501, "step": 55146 }, { "epoch": 2.7, "grad_norm": 0.8444885015487671, "learning_rate": 1.4429584758017743e-05, "loss": 2.9258, "step": 55147 }, { "epoch": 2.7, "grad_norm": 0.7418254017829895, "learning_rate": 1.442486767112755e-05, "loss": 2.9051, "step": 55148 }, { "epoch": 2.7, "grad_norm": 0.75757896900177, "learning_rate": 1.4420151336387231e-05, "loss": 2.8969, "step": 55149 }, { "epoch": 2.7, "grad_norm": 0.7526350021362305, "learning_rate": 1.4415435753809313e-05, "loss": 2.8936, "step": 55150 }, { "epoch": 2.7, "grad_norm": 0.7957975268363953, "learning_rate": 1.4410720923406183e-05, "loss": 2.763, "step": 55151 }, { "epoch": 2.7, "grad_norm": 0.7414007186889648, "learning_rate": 1.4406006845190233e-05, "loss": 2.7035, "step": 55152 }, { "epoch": 2.7, "grad_norm": 0.7431672215461731, "learning_rate": 1.4401293519173918e-05, "loss": 2.8199, "step": 55153 }, { "epoch": 2.7, "grad_norm": 0.7612506151199341, "learning_rate": 1.439658094536963e-05, "loss": 2.7203, "step": 55154 }, { "epoch": 2.7, "grad_norm": 0.7056882977485657, "learning_rate": 1.4391869123789756e-05, "loss": 2.8934, "step": 55155 }, { "epoch": 2.7, "grad_norm": 0.7619946002960205, "learning_rate": 1.4387158054446756e-05, "loss": 2.7752, "step": 55156 }, { "epoch": 2.7, "grad_norm": 0.7698616981506348, "learning_rate": 1.4382447737352987e-05, "loss": 2.8134, "step": 55157 }, { "epoch": 2.7, "grad_norm": 0.8463789224624634, "learning_rate": 1.4377738172520903e-05, "loss": 2.8462, "step": 55158 }, { "epoch": 2.7, "grad_norm": 0.7743161916732788, "learning_rate": 1.4373029359962862e-05, "loss": 2.8833, "step": 55159 }, { "epoch": 2.7, "grad_norm": 0.7336245775222778, "learning_rate": 1.4368321299691355e-05, "loss": 2.9437, "step": 55160 }, { "epoch": 2.7, "grad_norm": 0.7166826128959656, "learning_rate": 1.436361399171867e-05, "loss": 3.0139, "step": 55161 }, { "epoch": 2.7, "grad_norm": 0.7472900748252869, "learning_rate": 1.4358907436057266e-05, "loss": 2.816, "step": 55162 }, { "epoch": 2.7, "grad_norm": 0.7503640055656433, "learning_rate": 1.435420163271953e-05, "loss": 2.651, "step": 55163 }, { "epoch": 2.7, "grad_norm": 0.7015929222106934, "learning_rate": 1.4349496581717823e-05, "loss": 2.8712, "step": 55164 }, { "epoch": 2.7, "grad_norm": 0.7873293161392212, "learning_rate": 1.4344792283064565e-05, "loss": 2.7243, "step": 55165 }, { "epoch": 2.7, "grad_norm": 0.7600153684616089, "learning_rate": 1.4340088736772214e-05, "loss": 3.0151, "step": 55166 }, { "epoch": 2.7, "grad_norm": 0.8037029504776001, "learning_rate": 1.4335385942853028e-05, "loss": 2.9545, "step": 55167 }, { "epoch": 2.7, "grad_norm": 0.7690410017967224, "learning_rate": 1.4330683901319496e-05, "loss": 2.8564, "step": 55168 }, { "epoch": 2.7, "grad_norm": 0.7575297355651855, "learning_rate": 1.4325982612183973e-05, "loss": 2.8627, "step": 55169 }, { "epoch": 2.7, "grad_norm": 0.7149128913879395, "learning_rate": 1.4321282075458784e-05, "loss": 2.8766, "step": 55170 }, { "epoch": 2.7, "grad_norm": 0.7330735325813293, "learning_rate": 1.431658229115642e-05, "loss": 2.8966, "step": 55171 }, { "epoch": 2.7, "grad_norm": 0.725845456123352, "learning_rate": 1.4311883259289136e-05, "loss": 2.7962, "step": 55172 }, { "epoch": 2.7, "grad_norm": 0.712296724319458, "learning_rate": 1.4307184979869391e-05, "loss": 2.8348, "step": 55173 }, { "epoch": 2.7, "grad_norm": 0.8258801102638245, "learning_rate": 1.4302487452909539e-05, "loss": 2.9634, "step": 55174 }, { "epoch": 2.7, "grad_norm": 0.7133929133415222, "learning_rate": 1.4297790678421972e-05, "loss": 2.7334, "step": 55175 }, { "epoch": 2.7, "grad_norm": 0.7767179012298584, "learning_rate": 1.4293094656419046e-05, "loss": 2.8227, "step": 55176 }, { "epoch": 2.7, "grad_norm": 0.7630220651626587, "learning_rate": 1.4288399386913086e-05, "loss": 2.9548, "step": 55177 }, { "epoch": 2.7, "grad_norm": 0.756858766078949, "learning_rate": 1.4283704869916512e-05, "loss": 3.0724, "step": 55178 }, { "epoch": 2.7, "grad_norm": 0.734491765499115, "learning_rate": 1.4279011105441651e-05, "loss": 2.8499, "step": 55179 }, { "epoch": 2.7, "grad_norm": 0.7555728554725647, "learning_rate": 1.4274318093500925e-05, "loss": 3.0742, "step": 55180 }, { "epoch": 2.7, "grad_norm": 0.779068648815155, "learning_rate": 1.4269625834106625e-05, "loss": 3.0113, "step": 55181 }, { "epoch": 2.7, "grad_norm": 0.7292513847351074, "learning_rate": 1.4264934327271172e-05, "loss": 2.9021, "step": 55182 }, { "epoch": 2.7, "grad_norm": 0.7384148240089417, "learning_rate": 1.4260243573006891e-05, "loss": 2.6843, "step": 55183 }, { "epoch": 2.7, "grad_norm": 0.72471022605896, "learning_rate": 1.4255553571326106e-05, "loss": 3.1414, "step": 55184 }, { "epoch": 2.7, "grad_norm": 0.761789083480835, "learning_rate": 1.425086432224124e-05, "loss": 3.0294, "step": 55185 }, { "epoch": 2.7, "grad_norm": 0.7743604779243469, "learning_rate": 1.424617582576455e-05, "loss": 2.8922, "step": 55186 }, { "epoch": 2.7, "grad_norm": 0.7332843542098999, "learning_rate": 1.4241488081908426e-05, "loss": 2.8659, "step": 55187 }, { "epoch": 2.7, "grad_norm": 0.739586353302002, "learning_rate": 1.423680109068529e-05, "loss": 2.7717, "step": 55188 }, { "epoch": 2.7, "grad_norm": 0.7435079216957092, "learning_rate": 1.4232114852107368e-05, "loss": 2.8082, "step": 55189 }, { "epoch": 2.7, "grad_norm": 0.7537873983383179, "learning_rate": 1.4227429366187115e-05, "loss": 2.7587, "step": 55190 }, { "epoch": 2.7, "grad_norm": 0.7387575507164001, "learning_rate": 1.4222744632936822e-05, "loss": 2.9726, "step": 55191 }, { "epoch": 2.7, "grad_norm": 0.762256920337677, "learning_rate": 1.421806065236878e-05, "loss": 2.7777, "step": 55192 }, { "epoch": 2.7, "grad_norm": 0.7154507637023926, "learning_rate": 1.4213377424495375e-05, "loss": 2.9309, "step": 55193 }, { "epoch": 2.7, "grad_norm": 0.7694316506385803, "learning_rate": 1.4208694949328903e-05, "loss": 2.4981, "step": 55194 }, { "epoch": 2.7, "grad_norm": 0.7170578241348267, "learning_rate": 1.4204013226881784e-05, "loss": 3.0198, "step": 55195 }, { "epoch": 2.71, "grad_norm": 0.7894667387008667, "learning_rate": 1.419933225716624e-05, "loss": 2.8454, "step": 55196 }, { "epoch": 2.71, "grad_norm": 0.7934728860855103, "learning_rate": 1.4194652040194698e-05, "loss": 3.0578, "step": 55197 }, { "epoch": 2.71, "grad_norm": 0.7518163919448853, "learning_rate": 1.4189972575979448e-05, "loss": 2.7094, "step": 55198 }, { "epoch": 2.71, "grad_norm": 0.7911694645881653, "learning_rate": 1.4185293864532743e-05, "loss": 3.0337, "step": 55199 }, { "epoch": 2.71, "grad_norm": 0.7032492756843567, "learning_rate": 1.418061590586701e-05, "loss": 2.9355, "step": 55200 }, { "epoch": 2.71, "grad_norm": 0.7293274402618408, "learning_rate": 1.4175938699994504e-05, "loss": 2.9049, "step": 55201 }, { "epoch": 2.71, "grad_norm": 0.7434591054916382, "learning_rate": 1.417126224692755e-05, "loss": 2.8035, "step": 55202 }, { "epoch": 2.71, "grad_norm": 0.7389995455741882, "learning_rate": 1.4166586546678538e-05, "loss": 2.8651, "step": 55203 }, { "epoch": 2.71, "grad_norm": 0.7393417358398438, "learning_rate": 1.416191159925969e-05, "loss": 2.8286, "step": 55204 }, { "epoch": 2.71, "grad_norm": 0.7257364988327026, "learning_rate": 1.415723740468343e-05, "loss": 2.9278, "step": 55205 }, { "epoch": 2.71, "grad_norm": 0.8316163420677185, "learning_rate": 1.4152563962961916e-05, "loss": 2.9318, "step": 55206 }, { "epoch": 2.71, "grad_norm": 0.718009889125824, "learning_rate": 1.4147891274107503e-05, "loss": 3.0043, "step": 55207 }, { "epoch": 2.71, "grad_norm": 0.7439994812011719, "learning_rate": 1.4143219338132616e-05, "loss": 2.8376, "step": 55208 }, { "epoch": 2.71, "grad_norm": 0.7357783317565918, "learning_rate": 1.4138548155049411e-05, "loss": 2.8897, "step": 55209 }, { "epoch": 2.71, "grad_norm": 0.7854210734367371, "learning_rate": 1.4133877724870312e-05, "loss": 2.7374, "step": 55210 }, { "epoch": 2.71, "grad_norm": 0.7327690124511719, "learning_rate": 1.412920804760751e-05, "loss": 2.7055, "step": 55211 }, { "epoch": 2.71, "grad_norm": 0.784988284111023, "learning_rate": 1.4124539123273392e-05, "loss": 2.7917, "step": 55212 }, { "epoch": 2.71, "grad_norm": 0.7121521830558777, "learning_rate": 1.4119870951880252e-05, "loss": 2.8236, "step": 55213 }, { "epoch": 2.71, "grad_norm": 0.7981328368186951, "learning_rate": 1.4115203533440278e-05, "loss": 3.0959, "step": 55214 }, { "epoch": 2.71, "grad_norm": 0.7848892211914062, "learning_rate": 1.4110536867965894e-05, "loss": 2.7955, "step": 55215 }, { "epoch": 2.71, "grad_norm": 0.7660860419273376, "learning_rate": 1.410587095546929e-05, "loss": 3.0071, "step": 55216 }, { "epoch": 2.71, "grad_norm": 0.7169570326805115, "learning_rate": 1.4101205795962788e-05, "loss": 2.6379, "step": 55217 }, { "epoch": 2.71, "grad_norm": 0.7584700584411621, "learning_rate": 1.409654138945875e-05, "loss": 2.9686, "step": 55218 }, { "epoch": 2.71, "grad_norm": 0.7803013920783997, "learning_rate": 1.4091877735969393e-05, "loss": 2.8314, "step": 55219 }, { "epoch": 2.71, "grad_norm": 0.7412531971931458, "learning_rate": 1.408721483550701e-05, "loss": 2.9973, "step": 55220 }, { "epoch": 2.71, "grad_norm": 0.8721296191215515, "learning_rate": 1.4082552688083827e-05, "loss": 2.917, "step": 55221 }, { "epoch": 2.71, "grad_norm": 0.7726236581802368, "learning_rate": 1.4077891293712162e-05, "loss": 2.9424, "step": 55222 }, { "epoch": 2.71, "grad_norm": 0.761952817440033, "learning_rate": 1.4073230652404378e-05, "loss": 2.6168, "step": 55223 }, { "epoch": 2.71, "grad_norm": 0.7393391132354736, "learning_rate": 1.4068570764172626e-05, "loss": 2.9725, "step": 55224 }, { "epoch": 2.71, "grad_norm": 0.7465383410453796, "learning_rate": 1.4063911629029268e-05, "loss": 2.8849, "step": 55225 }, { "epoch": 2.71, "grad_norm": 0.7391934394836426, "learning_rate": 1.405925324698649e-05, "loss": 2.9616, "step": 55226 }, { "epoch": 2.71, "grad_norm": 0.7626303434371948, "learning_rate": 1.4054595618056652e-05, "loss": 2.9186, "step": 55227 }, { "epoch": 2.71, "grad_norm": 0.7824798226356506, "learning_rate": 1.4049938742251976e-05, "loss": 3.2266, "step": 55228 }, { "epoch": 2.71, "grad_norm": 0.7267372012138367, "learning_rate": 1.4045282619584685e-05, "loss": 2.7209, "step": 55229 }, { "epoch": 2.71, "grad_norm": 0.7535328269004822, "learning_rate": 1.4040627250067138e-05, "loss": 2.9377, "step": 55230 }, { "epoch": 2.71, "grad_norm": 0.7468425035476685, "learning_rate": 1.403597263371149e-05, "loss": 2.8851, "step": 55231 }, { "epoch": 2.71, "grad_norm": 0.7248378396034241, "learning_rate": 1.4031318770530065e-05, "loss": 2.6145, "step": 55232 }, { "epoch": 2.71, "grad_norm": 0.7613323926925659, "learning_rate": 1.4026665660535152e-05, "loss": 2.9701, "step": 55233 }, { "epoch": 2.71, "grad_norm": 0.7856761813163757, "learning_rate": 1.4022013303738977e-05, "loss": 2.8771, "step": 55234 }, { "epoch": 2.71, "grad_norm": 0.7455305457115173, "learning_rate": 1.4017361700153762e-05, "loss": 2.9148, "step": 55235 }, { "epoch": 2.71, "grad_norm": 0.7511041760444641, "learning_rate": 1.4012710849791731e-05, "loss": 2.6375, "step": 55236 }, { "epoch": 2.71, "grad_norm": 0.7273489832878113, "learning_rate": 1.4008060752665206e-05, "loss": 2.7796, "step": 55237 }, { "epoch": 2.71, "grad_norm": 0.744505763053894, "learning_rate": 1.4003411408786414e-05, "loss": 3.0129, "step": 55238 }, { "epoch": 2.71, "grad_norm": 0.7265143394470215, "learning_rate": 1.3998762818167575e-05, "loss": 2.7688, "step": 55239 }, { "epoch": 2.71, "grad_norm": 0.8545629978179932, "learning_rate": 1.3994114980821014e-05, "loss": 2.9477, "step": 55240 }, { "epoch": 2.71, "grad_norm": 0.7982408404350281, "learning_rate": 1.3989467896758887e-05, "loss": 2.9287, "step": 55241 }, { "epoch": 2.71, "grad_norm": 0.7544611692428589, "learning_rate": 1.398482156599342e-05, "loss": 2.8994, "step": 55242 }, { "epoch": 2.71, "grad_norm": 0.8148059248924255, "learning_rate": 1.3980175988536935e-05, "loss": 2.81, "step": 55243 }, { "epoch": 2.71, "grad_norm": 0.7339653372764587, "learning_rate": 1.3975531164401555e-05, "loss": 2.9793, "step": 55244 }, { "epoch": 2.71, "grad_norm": 0.7554219961166382, "learning_rate": 1.397088709359967e-05, "loss": 3.0484, "step": 55245 }, { "epoch": 2.71, "grad_norm": 0.8348968625068665, "learning_rate": 1.3966243776143338e-05, "loss": 3.0106, "step": 55246 }, { "epoch": 2.71, "grad_norm": 0.7876124382019043, "learning_rate": 1.3961601212044881e-05, "loss": 2.8469, "step": 55247 }, { "epoch": 2.71, "grad_norm": 0.7836396098136902, "learning_rate": 1.3956959401316558e-05, "loss": 2.7991, "step": 55248 }, { "epoch": 2.71, "grad_norm": 0.7819403409957886, "learning_rate": 1.3952318343970559e-05, "loss": 3.033, "step": 55249 }, { "epoch": 2.71, "grad_norm": 0.7380668520927429, "learning_rate": 1.3947678040019105e-05, "loss": 2.8926, "step": 55250 }, { "epoch": 2.71, "grad_norm": 0.7316756248474121, "learning_rate": 1.3943038489474356e-05, "loss": 2.9032, "step": 55251 }, { "epoch": 2.71, "grad_norm": 0.7308788299560547, "learning_rate": 1.3938399692348667e-05, "loss": 2.911, "step": 55252 }, { "epoch": 2.71, "grad_norm": 0.7654592990875244, "learning_rate": 1.393376164865413e-05, "loss": 2.6837, "step": 55253 }, { "epoch": 2.71, "grad_norm": 0.7962247133255005, "learning_rate": 1.3929124358402999e-05, "loss": 2.9718, "step": 55254 }, { "epoch": 2.71, "grad_norm": 0.7708144187927246, "learning_rate": 1.3924487821607534e-05, "loss": 2.8453, "step": 55255 }, { "epoch": 2.71, "grad_norm": 0.7123581767082214, "learning_rate": 1.3919852038279888e-05, "loss": 2.7315, "step": 55256 }, { "epoch": 2.71, "grad_norm": 0.7534207105636597, "learning_rate": 1.3915217008432355e-05, "loss": 3.0169, "step": 55257 }, { "epoch": 2.71, "grad_norm": 0.7725705504417419, "learning_rate": 1.3910582732076991e-05, "loss": 2.8719, "step": 55258 }, { "epoch": 2.71, "grad_norm": 0.7859570384025574, "learning_rate": 1.3905949209226119e-05, "loss": 2.9936, "step": 55259 }, { "epoch": 2.71, "grad_norm": 0.7947921752929688, "learning_rate": 1.3901316439891962e-05, "loss": 2.7604, "step": 55260 }, { "epoch": 2.71, "grad_norm": 0.7809910774230957, "learning_rate": 1.3896684424086613e-05, "loss": 2.9163, "step": 55261 }, { "epoch": 2.71, "grad_norm": 0.7197073698043823, "learning_rate": 1.3892053161822392e-05, "loss": 2.9279, "step": 55262 }, { "epoch": 2.71, "grad_norm": 0.7367950677871704, "learning_rate": 1.3887422653111424e-05, "loss": 2.7356, "step": 55263 }, { "epoch": 2.71, "grad_norm": 0.7174615263938904, "learning_rate": 1.3882792897965899e-05, "loss": 3.0581, "step": 55264 }, { "epoch": 2.71, "grad_norm": 0.7707169055938721, "learning_rate": 1.3878163896398076e-05, "loss": 2.6856, "step": 55265 }, { "epoch": 2.71, "grad_norm": 0.7517051100730896, "learning_rate": 1.3873535648420043e-05, "loss": 2.8619, "step": 55266 }, { "epoch": 2.71, "grad_norm": 0.7613734602928162, "learning_rate": 1.3868908154044089e-05, "loss": 2.7424, "step": 55267 }, { "epoch": 2.71, "grad_norm": 0.7365747690200806, "learning_rate": 1.386428141328234e-05, "loss": 3.0878, "step": 55268 }, { "epoch": 2.71, "grad_norm": 0.7309760451316833, "learning_rate": 1.385965542614702e-05, "loss": 2.7943, "step": 55269 }, { "epoch": 2.71, "grad_norm": 0.7792554497718811, "learning_rate": 1.3855030192650318e-05, "loss": 2.9993, "step": 55270 }, { "epoch": 2.71, "grad_norm": 0.7756322622299194, "learning_rate": 1.385040571280439e-05, "loss": 2.5423, "step": 55271 }, { "epoch": 2.71, "grad_norm": 0.7910360097885132, "learning_rate": 1.3845781986621463e-05, "loss": 3.0719, "step": 55272 }, { "epoch": 2.71, "grad_norm": 0.7808008790016174, "learning_rate": 1.384115901411359e-05, "loss": 2.9894, "step": 55273 }, { "epoch": 2.71, "grad_norm": 0.7794164419174194, "learning_rate": 1.3836536795293063e-05, "loss": 2.6242, "step": 55274 }, { "epoch": 2.71, "grad_norm": 0.7962504625320435, "learning_rate": 1.3831915330172038e-05, "loss": 2.9159, "step": 55275 }, { "epoch": 2.71, "grad_norm": 0.7393701076507568, "learning_rate": 1.382729461876264e-05, "loss": 2.8513, "step": 55276 }, { "epoch": 2.71, "grad_norm": 0.7692285776138306, "learning_rate": 1.3822674661077126e-05, "loss": 2.7925, "step": 55277 }, { "epoch": 2.71, "grad_norm": 0.721187949180603, "learning_rate": 1.3818055457127586e-05, "loss": 3.0527, "step": 55278 }, { "epoch": 2.71, "grad_norm": 0.7725158929824829, "learning_rate": 1.3813437006926209e-05, "loss": 2.9564, "step": 55279 }, { "epoch": 2.71, "grad_norm": 0.7646429538726807, "learning_rate": 1.3808819310485186e-05, "loss": 2.8219, "step": 55280 }, { "epoch": 2.71, "grad_norm": 0.7827332019805908, "learning_rate": 1.3804202367816608e-05, "loss": 2.9961, "step": 55281 }, { "epoch": 2.71, "grad_norm": 0.7357837557792664, "learning_rate": 1.3799586178932697e-05, "loss": 2.9484, "step": 55282 }, { "epoch": 2.71, "grad_norm": 0.7550036311149597, "learning_rate": 1.3794970743845612e-05, "loss": 2.9146, "step": 55283 }, { "epoch": 2.71, "grad_norm": 0.7835947275161743, "learning_rate": 1.3790356062567475e-05, "loss": 2.8771, "step": 55284 }, { "epoch": 2.71, "grad_norm": 0.7576488852500916, "learning_rate": 1.378574213511051e-05, "loss": 2.6269, "step": 55285 }, { "epoch": 2.71, "grad_norm": 0.7808263301849365, "learning_rate": 1.3781128961486742e-05, "loss": 2.813, "step": 55286 }, { "epoch": 2.71, "grad_norm": 0.7787500619888306, "learning_rate": 1.377651654170846e-05, "loss": 2.8559, "step": 55287 }, { "epoch": 2.71, "grad_norm": 0.8117019534111023, "learning_rate": 1.3771904875787687e-05, "loss": 2.8457, "step": 55288 }, { "epoch": 2.71, "grad_norm": 0.712166965007782, "learning_rate": 1.3767293963736647e-05, "loss": 2.8873, "step": 55289 }, { "epoch": 2.71, "grad_norm": 0.7200855016708374, "learning_rate": 1.3762683805567498e-05, "loss": 2.8848, "step": 55290 }, { "epoch": 2.71, "grad_norm": 0.7672809958457947, "learning_rate": 1.3758074401292296e-05, "loss": 2.8106, "step": 55291 }, { "epoch": 2.71, "grad_norm": 0.7226176261901855, "learning_rate": 1.3753465750923297e-05, "loss": 2.8904, "step": 55292 }, { "epoch": 2.71, "grad_norm": 0.740872323513031, "learning_rate": 1.3748857854472594e-05, "loss": 2.9025, "step": 55293 }, { "epoch": 2.71, "grad_norm": 0.7510790824890137, "learning_rate": 1.3744250711952242e-05, "loss": 2.9526, "step": 55294 }, { "epoch": 2.71, "grad_norm": 0.7161646485328674, "learning_rate": 1.37396443233745e-05, "loss": 3.074, "step": 55295 }, { "epoch": 2.71, "grad_norm": 0.7294967770576477, "learning_rate": 1.3735038688751387e-05, "loss": 2.8639, "step": 55296 }, { "epoch": 2.71, "grad_norm": 0.7264935970306396, "learning_rate": 1.3730433808095165e-05, "loss": 2.8598, "step": 55297 }, { "epoch": 2.71, "grad_norm": 0.8006035685539246, "learning_rate": 1.3725829681417822e-05, "loss": 2.9085, "step": 55298 }, { "epoch": 2.71, "grad_norm": 0.7237944602966309, "learning_rate": 1.372122630873158e-05, "loss": 2.7831, "step": 55299 }, { "epoch": 2.71, "grad_norm": 0.7685567140579224, "learning_rate": 1.3716623690048534e-05, "loss": 3.0923, "step": 55300 }, { "epoch": 2.71, "grad_norm": 0.8011992573738098, "learning_rate": 1.371202182538077e-05, "loss": 2.9499, "step": 55301 }, { "epoch": 2.71, "grad_norm": 0.7467533349990845, "learning_rate": 1.370742071474048e-05, "loss": 3.0305, "step": 55302 }, { "epoch": 2.71, "grad_norm": 0.7417289614677429, "learning_rate": 1.3702820358139721e-05, "loss": 2.7992, "step": 55303 }, { "epoch": 2.71, "grad_norm": 0.7307183742523193, "learning_rate": 1.3698220755590616e-05, "loss": 2.9265, "step": 55304 }, { "epoch": 2.71, "grad_norm": 0.7761890888214111, "learning_rate": 1.3693621907105357e-05, "loss": 2.7917, "step": 55305 }, { "epoch": 2.71, "grad_norm": 0.7544965744018555, "learning_rate": 1.3689023812695999e-05, "loss": 2.9144, "step": 55306 }, { "epoch": 2.71, "grad_norm": 0.8068240284919739, "learning_rate": 1.3684426472374632e-05, "loss": 2.7519, "step": 55307 }, { "epoch": 2.71, "grad_norm": 0.7504112124443054, "learning_rate": 1.3679829886153349e-05, "loss": 2.7891, "step": 55308 }, { "epoch": 2.71, "grad_norm": 0.7085170149803162, "learning_rate": 1.367523405404427e-05, "loss": 2.6752, "step": 55309 }, { "epoch": 2.71, "grad_norm": 0.7529444694519043, "learning_rate": 1.3670638976059589e-05, "loss": 3.0175, "step": 55310 }, { "epoch": 2.71, "grad_norm": 0.7292396426200867, "learning_rate": 1.3666044652211295e-05, "loss": 2.9474, "step": 55311 }, { "epoch": 2.71, "grad_norm": 0.787841260433197, "learning_rate": 1.3661451082511577e-05, "loss": 2.6969, "step": 55312 }, { "epoch": 2.71, "grad_norm": 0.7327471375465393, "learning_rate": 1.3656858266972427e-05, "loss": 2.9287, "step": 55313 }, { "epoch": 2.71, "grad_norm": 0.7643541693687439, "learning_rate": 1.3652266205606065e-05, "loss": 2.8279, "step": 55314 }, { "epoch": 2.71, "grad_norm": 0.6762557625770569, "learning_rate": 1.364767489842452e-05, "loss": 2.8347, "step": 55315 }, { "epoch": 2.71, "grad_norm": 0.726814866065979, "learning_rate": 1.3643084345439847e-05, "loss": 2.7628, "step": 55316 }, { "epoch": 2.71, "grad_norm": 0.770945131778717, "learning_rate": 1.3638494546664236e-05, "loss": 2.8228, "step": 55317 }, { "epoch": 2.71, "grad_norm": 0.7129945158958435, "learning_rate": 1.3633905502109676e-05, "loss": 2.8855, "step": 55318 }, { "epoch": 2.71, "grad_norm": 0.7419125437736511, "learning_rate": 1.362931721178826e-05, "loss": 2.8758, "step": 55319 }, { "epoch": 2.71, "grad_norm": 0.7454127669334412, "learning_rate": 1.3624729675712176e-05, "loss": 2.9655, "step": 55320 }, { "epoch": 2.71, "grad_norm": 0.7969364523887634, "learning_rate": 1.362014289389345e-05, "loss": 3.003, "step": 55321 }, { "epoch": 2.71, "grad_norm": 0.7323818206787109, "learning_rate": 1.3615556866344135e-05, "loss": 2.8169, "step": 55322 }, { "epoch": 2.71, "grad_norm": 0.7280770540237427, "learning_rate": 1.3610971593076293e-05, "loss": 2.7861, "step": 55323 }, { "epoch": 2.71, "grad_norm": 0.808602511882782, "learning_rate": 1.3606387074102042e-05, "loss": 2.9433, "step": 55324 }, { "epoch": 2.71, "grad_norm": 0.717870831489563, "learning_rate": 1.360180330943348e-05, "loss": 2.9154, "step": 55325 }, { "epoch": 2.71, "grad_norm": 0.7248071432113647, "learning_rate": 1.3597220299082623e-05, "loss": 2.9233, "step": 55326 }, { "epoch": 2.71, "grad_norm": 0.7266684174537659, "learning_rate": 1.3592638043061598e-05, "loss": 3.1069, "step": 55327 }, { "epoch": 2.71, "grad_norm": 0.7501939535140991, "learning_rate": 1.3588056541382397e-05, "loss": 2.9581, "step": 55328 }, { "epoch": 2.71, "grad_norm": 0.7233149409294128, "learning_rate": 1.3583475794057207e-05, "loss": 2.8132, "step": 55329 }, { "epoch": 2.71, "grad_norm": 0.8018295168876648, "learning_rate": 1.3578895801097989e-05, "loss": 3.065, "step": 55330 }, { "epoch": 2.71, "grad_norm": 0.7703627347946167, "learning_rate": 1.3574316562516796e-05, "loss": 2.7958, "step": 55331 }, { "epoch": 2.71, "grad_norm": 0.7846996188163757, "learning_rate": 1.3569738078325788e-05, "loss": 2.72, "step": 55332 }, { "epoch": 2.71, "grad_norm": 0.8439915180206299, "learning_rate": 1.3565160348536918e-05, "loss": 2.673, "step": 55333 }, { "epoch": 2.71, "grad_norm": 0.7853070497512817, "learning_rate": 1.3560583373162315e-05, "loss": 2.8157, "step": 55334 }, { "epoch": 2.71, "grad_norm": 0.7770476937294006, "learning_rate": 1.3556007152213966e-05, "loss": 2.9171, "step": 55335 }, { "epoch": 2.71, "grad_norm": 0.7779586315155029, "learning_rate": 1.3551431685704028e-05, "loss": 2.9688, "step": 55336 }, { "epoch": 2.71, "grad_norm": 0.7522245049476624, "learning_rate": 1.3546856973644493e-05, "loss": 2.7268, "step": 55337 }, { "epoch": 2.71, "grad_norm": 0.7922734022140503, "learning_rate": 1.354228301604735e-05, "loss": 2.7946, "step": 55338 }, { "epoch": 2.71, "grad_norm": 0.7396979331970215, "learning_rate": 1.3537709812924757e-05, "loss": 2.9228, "step": 55339 }, { "epoch": 2.71, "grad_norm": 0.7690479755401611, "learning_rate": 1.3533137364288671e-05, "loss": 2.8392, "step": 55340 }, { "epoch": 2.71, "grad_norm": 0.8230964541435242, "learning_rate": 1.3528565670151149e-05, "loss": 2.7559, "step": 55341 }, { "epoch": 2.71, "grad_norm": 0.7428261637687683, "learning_rate": 1.3523994730524312e-05, "loss": 2.8789, "step": 55342 }, { "epoch": 2.71, "grad_norm": 0.7513474822044373, "learning_rate": 1.351942454542012e-05, "loss": 2.711, "step": 55343 }, { "epoch": 2.71, "grad_norm": 0.7638099193572998, "learning_rate": 1.351485511485063e-05, "loss": 2.6492, "step": 55344 }, { "epoch": 2.71, "grad_norm": 0.7367132306098938, "learning_rate": 1.3510286438827866e-05, "loss": 2.936, "step": 55345 }, { "epoch": 2.71, "grad_norm": 0.7802484035491943, "learning_rate": 1.3505718517363851e-05, "loss": 2.9882, "step": 55346 }, { "epoch": 2.71, "grad_norm": 0.7542453408241272, "learning_rate": 1.3501151350470673e-05, "loss": 2.8119, "step": 55347 }, { "epoch": 2.71, "grad_norm": 0.79583340883255, "learning_rate": 1.3496584938160292e-05, "loss": 2.8344, "step": 55348 }, { "epoch": 2.71, "grad_norm": 0.7419581413269043, "learning_rate": 1.3492019280444798e-05, "loss": 2.6465, "step": 55349 }, { "epoch": 2.71, "grad_norm": 0.7605627775192261, "learning_rate": 1.348745437733615e-05, "loss": 2.7418, "step": 55350 }, { "epoch": 2.71, "grad_norm": 0.7467504143714905, "learning_rate": 1.3482890228846466e-05, "loss": 2.8421, "step": 55351 }, { "epoch": 2.71, "grad_norm": 0.7766313552856445, "learning_rate": 1.3478326834987673e-05, "loss": 2.8587, "step": 55352 }, { "epoch": 2.71, "grad_norm": 0.7753242254257202, "learning_rate": 1.3473764195771797e-05, "loss": 2.7824, "step": 55353 }, { "epoch": 2.71, "grad_norm": 0.7845016121864319, "learning_rate": 1.3469202311210925e-05, "loss": 2.9468, "step": 55354 }, { "epoch": 2.71, "grad_norm": 0.7222416400909424, "learning_rate": 1.3464641181317015e-05, "loss": 3.0865, "step": 55355 }, { "epoch": 2.71, "grad_norm": 0.7512432932853699, "learning_rate": 1.3460080806102057e-05, "loss": 2.6878, "step": 55356 }, { "epoch": 2.71, "grad_norm": 0.7436035871505737, "learning_rate": 1.3455521185578177e-05, "loss": 2.7787, "step": 55357 }, { "epoch": 2.71, "grad_norm": 0.7184932827949524, "learning_rate": 1.3450962319757263e-05, "loss": 2.8446, "step": 55358 }, { "epoch": 2.71, "grad_norm": 0.748602569103241, "learning_rate": 1.3446404208651406e-05, "loss": 2.948, "step": 55359 }, { "epoch": 2.71, "grad_norm": 0.8889462947845459, "learning_rate": 1.344184685227253e-05, "loss": 2.9219, "step": 55360 }, { "epoch": 2.71, "grad_norm": 0.7768132090568542, "learning_rate": 1.343729025063266e-05, "loss": 2.8009, "step": 55361 }, { "epoch": 2.71, "grad_norm": 0.727043628692627, "learning_rate": 1.343273440374385e-05, "loss": 2.7721, "step": 55362 }, { "epoch": 2.71, "grad_norm": 0.7611899375915527, "learning_rate": 1.3428179311618058e-05, "loss": 2.8881, "step": 55363 }, { "epoch": 2.71, "grad_norm": 0.7601465582847595, "learning_rate": 1.3423624974267312e-05, "loss": 2.8482, "step": 55364 }, { "epoch": 2.71, "grad_norm": 0.7595193386077881, "learning_rate": 1.3419071391703596e-05, "loss": 2.8653, "step": 55365 }, { "epoch": 2.71, "grad_norm": 0.8344078660011292, "learning_rate": 1.3414518563938837e-05, "loss": 2.969, "step": 55366 }, { "epoch": 2.71, "grad_norm": 0.7677531242370605, "learning_rate": 1.3409966490985125e-05, "loss": 2.7756, "step": 55367 }, { "epoch": 2.71, "grad_norm": 0.7744829654693604, "learning_rate": 1.3405415172854384e-05, "loss": 2.9537, "step": 55368 }, { "epoch": 2.71, "grad_norm": 0.7642647624015808, "learning_rate": 1.3400864609558637e-05, "loss": 2.6661, "step": 55369 }, { "epoch": 2.71, "grad_norm": 0.7297582626342773, "learning_rate": 1.3396314801109808e-05, "loss": 2.977, "step": 55370 }, { "epoch": 2.71, "grad_norm": 0.7670851945877075, "learning_rate": 1.3391765747519956e-05, "loss": 2.9117, "step": 55371 }, { "epoch": 2.71, "grad_norm": 0.7472463250160217, "learning_rate": 1.3387217448801069e-05, "loss": 2.8681, "step": 55372 }, { "epoch": 2.71, "grad_norm": 0.7477490305900574, "learning_rate": 1.3382669904965105e-05, "loss": 2.8268, "step": 55373 }, { "epoch": 2.71, "grad_norm": 0.7499014735221863, "learning_rate": 1.3378123116023987e-05, "loss": 2.9298, "step": 55374 }, { "epoch": 2.71, "grad_norm": 0.748065710067749, "learning_rate": 1.3373577081989707e-05, "loss": 2.9597, "step": 55375 }, { "epoch": 2.71, "grad_norm": 0.7136443257331848, "learning_rate": 1.3369031802874286e-05, "loss": 2.8244, "step": 55376 }, { "epoch": 2.71, "grad_norm": 0.7487014532089233, "learning_rate": 1.3364487278689684e-05, "loss": 2.7234, "step": 55377 }, { "epoch": 2.71, "grad_norm": 0.7109510898590088, "learning_rate": 1.3359943509447858e-05, "loss": 3.0295, "step": 55378 }, { "epoch": 2.71, "grad_norm": 0.7548745274543762, "learning_rate": 1.3355400495160762e-05, "loss": 2.9223, "step": 55379 }, { "epoch": 2.71, "grad_norm": 0.7589823007583618, "learning_rate": 1.3350858235840423e-05, "loss": 2.8757, "step": 55380 }, { "epoch": 2.71, "grad_norm": 0.8094698190689087, "learning_rate": 1.3346316731498697e-05, "loss": 2.9904, "step": 55381 }, { "epoch": 2.71, "grad_norm": 0.7765593528747559, "learning_rate": 1.334177598214764e-05, "loss": 2.9588, "step": 55382 }, { "epoch": 2.71, "grad_norm": 0.7499661445617676, "learning_rate": 1.3337235987799144e-05, "loss": 2.8294, "step": 55383 }, { "epoch": 2.71, "grad_norm": 0.7597896456718445, "learning_rate": 1.3332696748465232e-05, "loss": 2.8076, "step": 55384 }, { "epoch": 2.71, "grad_norm": 0.7389174103736877, "learning_rate": 1.3328158264157762e-05, "loss": 3.1259, "step": 55385 }, { "epoch": 2.71, "grad_norm": 0.7625584006309509, "learning_rate": 1.3323620534888823e-05, "loss": 2.8586, "step": 55386 }, { "epoch": 2.71, "grad_norm": 0.7467266917228699, "learning_rate": 1.3319083560670308e-05, "loss": 3.0767, "step": 55387 }, { "epoch": 2.71, "grad_norm": 0.7760413885116577, "learning_rate": 1.3314547341514104e-05, "loss": 3.0268, "step": 55388 }, { "epoch": 2.71, "grad_norm": 0.7491160035133362, "learning_rate": 1.3310011877432236e-05, "loss": 2.9444, "step": 55389 }, { "epoch": 2.71, "grad_norm": 0.8000679612159729, "learning_rate": 1.3305477168436596e-05, "loss": 2.9765, "step": 55390 }, { "epoch": 2.71, "grad_norm": 1.0552611351013184, "learning_rate": 1.3300943214539138e-05, "loss": 2.9166, "step": 55391 }, { "epoch": 2.71, "grad_norm": 0.7722744345664978, "learning_rate": 1.3296410015751858e-05, "loss": 2.8614, "step": 55392 }, { "epoch": 2.71, "grad_norm": 0.7588552832603455, "learning_rate": 1.329187757208664e-05, "loss": 2.7327, "step": 55393 }, { "epoch": 2.71, "grad_norm": 0.7379563450813293, "learning_rate": 1.3287345883555445e-05, "loss": 2.8572, "step": 55394 }, { "epoch": 2.71, "grad_norm": 0.7401456236839294, "learning_rate": 1.328281495017023e-05, "loss": 2.8642, "step": 55395 }, { "epoch": 2.71, "grad_norm": 0.7446950078010559, "learning_rate": 1.3278284771942849e-05, "loss": 2.8721, "step": 55396 }, { "epoch": 2.71, "grad_norm": 0.788352370262146, "learning_rate": 1.3273755348885329e-05, "loss": 2.9431, "step": 55397 }, { "epoch": 2.71, "grad_norm": 0.8451722264289856, "learning_rate": 1.3269226681009525e-05, "loss": 2.822, "step": 55398 }, { "epoch": 2.71, "grad_norm": 0.7008662223815918, "learning_rate": 1.3264698768327432e-05, "loss": 3.1527, "step": 55399 }, { "epoch": 2.72, "grad_norm": 0.7523859143257141, "learning_rate": 1.3260171610850867e-05, "loss": 2.9673, "step": 55400 }, { "epoch": 2.72, "grad_norm": 0.7550097703933716, "learning_rate": 1.3255645208591893e-05, "loss": 2.9408, "step": 55401 }, { "epoch": 2.72, "grad_norm": 0.7374807596206665, "learning_rate": 1.3251119561562362e-05, "loss": 2.815, "step": 55402 }, { "epoch": 2.72, "grad_norm": 0.7401943802833557, "learning_rate": 1.3246594669774169e-05, "loss": 2.8424, "step": 55403 }, { "epoch": 2.72, "grad_norm": 0.7593181729316711, "learning_rate": 1.3242070533239302e-05, "loss": 2.9994, "step": 55404 }, { "epoch": 2.72, "grad_norm": 0.7926674485206604, "learning_rate": 1.3237547151969552e-05, "loss": 2.9654, "step": 55405 }, { "epoch": 2.72, "grad_norm": 0.7539723515510559, "learning_rate": 1.323302452597701e-05, "loss": 2.8782, "step": 55406 }, { "epoch": 2.72, "grad_norm": 0.776358962059021, "learning_rate": 1.3228502655273398e-05, "loss": 3.0188, "step": 55407 }, { "epoch": 2.72, "grad_norm": 0.7997563481330872, "learning_rate": 1.322398153987081e-05, "loss": 2.9002, "step": 55408 }, { "epoch": 2.72, "grad_norm": 0.7761156558990479, "learning_rate": 1.3219461179781033e-05, "loss": 2.8597, "step": 55409 }, { "epoch": 2.72, "grad_norm": 0.7830085158348083, "learning_rate": 1.3214941575015958e-05, "loss": 2.8994, "step": 55410 }, { "epoch": 2.72, "grad_norm": 0.7592552304267883, "learning_rate": 1.3210422725587577e-05, "loss": 2.755, "step": 55411 }, { "epoch": 2.72, "grad_norm": 0.716784656047821, "learning_rate": 1.3205904631507714e-05, "loss": 2.8458, "step": 55412 }, { "epoch": 2.72, "grad_norm": 0.7735759615898132, "learning_rate": 1.3201387292788324e-05, "loss": 2.9543, "step": 55413 }, { "epoch": 2.72, "grad_norm": 0.7621521949768066, "learning_rate": 1.3196870709441298e-05, "loss": 3.0033, "step": 55414 }, { "epoch": 2.72, "grad_norm": 0.7094641923904419, "learning_rate": 1.3192354881478496e-05, "loss": 3.1288, "step": 55415 }, { "epoch": 2.72, "grad_norm": 0.7761197090148926, "learning_rate": 1.3187839808911871e-05, "loss": 3.1512, "step": 55416 }, { "epoch": 2.72, "grad_norm": 0.8005975484848022, "learning_rate": 1.318332549175325e-05, "loss": 2.9146, "step": 55417 }, { "epoch": 2.72, "grad_norm": 0.7150023579597473, "learning_rate": 1.3178811930014554e-05, "loss": 2.6611, "step": 55418 }, { "epoch": 2.72, "grad_norm": 0.7599121332168579, "learning_rate": 1.3174299123707677e-05, "loss": 2.8705, "step": 55419 }, { "epoch": 2.72, "grad_norm": 0.8056515455245972, "learning_rate": 1.3169787072844473e-05, "loss": 2.8577, "step": 55420 }, { "epoch": 2.72, "grad_norm": 0.7697814106941223, "learning_rate": 1.3165275777436867e-05, "loss": 3.1584, "step": 55421 }, { "epoch": 2.72, "grad_norm": 0.7207685112953186, "learning_rate": 1.3160765237496685e-05, "loss": 3.1243, "step": 55422 }, { "epoch": 2.72, "grad_norm": 0.7401747107505798, "learning_rate": 1.3156255453035914e-05, "loss": 3.0593, "step": 55423 }, { "epoch": 2.72, "grad_norm": 0.8038659691810608, "learning_rate": 1.3151746424066312e-05, "loss": 2.9738, "step": 55424 }, { "epoch": 2.72, "grad_norm": 0.7309437990188599, "learning_rate": 1.3147238150599804e-05, "loss": 2.9488, "step": 55425 }, { "epoch": 2.72, "grad_norm": 0.752806544303894, "learning_rate": 1.3142730632648313e-05, "loss": 2.7383, "step": 55426 }, { "epoch": 2.72, "grad_norm": 0.72652268409729, "learning_rate": 1.3138223870223597e-05, "loss": 2.7769, "step": 55427 }, { "epoch": 2.72, "grad_norm": 0.75226891040802, "learning_rate": 1.3133717863337612e-05, "loss": 3.0681, "step": 55428 }, { "epoch": 2.72, "grad_norm": 0.7673428654670715, "learning_rate": 1.3129212612002215e-05, "loss": 2.7021, "step": 55429 }, { "epoch": 2.72, "grad_norm": 0.7407193779945374, "learning_rate": 1.3124708116229233e-05, "loss": 2.9799, "step": 55430 }, { "epoch": 2.72, "grad_norm": 0.7485948801040649, "learning_rate": 1.3120204376030652e-05, "loss": 3.0959, "step": 55431 }, { "epoch": 2.72, "grad_norm": 0.7666661739349365, "learning_rate": 1.3115701391418165e-05, "loss": 2.7309, "step": 55432 }, { "epoch": 2.72, "grad_norm": 0.7030153870582581, "learning_rate": 1.3111199162403697e-05, "loss": 2.716, "step": 55433 }, { "epoch": 2.72, "grad_norm": 0.6960538625717163, "learning_rate": 1.3106697688999169e-05, "loss": 2.7599, "step": 55434 }, { "epoch": 2.72, "grad_norm": 0.6722820997238159, "learning_rate": 1.310219697121634e-05, "loss": 2.941, "step": 55435 }, { "epoch": 2.72, "grad_norm": 0.7488369941711426, "learning_rate": 1.3097697009067132e-05, "loss": 2.8376, "step": 55436 }, { "epoch": 2.72, "grad_norm": 0.7796913981437683, "learning_rate": 1.3093197802563338e-05, "loss": 2.9606, "step": 55437 }, { "epoch": 2.72, "grad_norm": 0.6959371566772461, "learning_rate": 1.3088699351716914e-05, "loss": 2.9329, "step": 55438 }, { "epoch": 2.72, "grad_norm": 0.7195696234703064, "learning_rate": 1.3084201656539617e-05, "loss": 2.8452, "step": 55439 }, { "epoch": 2.72, "grad_norm": 0.7293813824653625, "learning_rate": 1.3079704717043272e-05, "loss": 2.7457, "step": 55440 }, { "epoch": 2.72, "grad_norm": 0.7376397252082825, "learning_rate": 1.3075208533239834e-05, "loss": 2.6492, "step": 55441 }, { "epoch": 2.72, "grad_norm": 0.7778947353363037, "learning_rate": 1.3070713105140996e-05, "loss": 2.8703, "step": 55442 }, { "epoch": 2.72, "grad_norm": 0.7461794018745422, "learning_rate": 1.3066218432758712e-05, "loss": 2.8688, "step": 55443 }, { "epoch": 2.72, "grad_norm": 0.7236195802688599, "learning_rate": 1.306172451610481e-05, "loss": 2.9004, "step": 55444 }, { "epoch": 2.72, "grad_norm": 0.8100160360336304, "learning_rate": 1.305723135519111e-05, "loss": 2.7097, "step": 55445 }, { "epoch": 2.72, "grad_norm": 0.7482362389564514, "learning_rate": 1.305273895002944e-05, "loss": 2.6586, "step": 55446 }, { "epoch": 2.72, "grad_norm": 0.7682929635047913, "learning_rate": 1.3048247300631587e-05, "loss": 2.9453, "step": 55447 }, { "epoch": 2.72, "grad_norm": 0.7298302054405212, "learning_rate": 1.304375640700941e-05, "loss": 2.8216, "step": 55448 }, { "epoch": 2.72, "grad_norm": 0.7340006828308105, "learning_rate": 1.303926626917483e-05, "loss": 2.8007, "step": 55449 }, { "epoch": 2.72, "grad_norm": 0.7444607615470886, "learning_rate": 1.3034776887139509e-05, "loss": 2.9086, "step": 55450 }, { "epoch": 2.72, "grad_norm": 0.7466793060302734, "learning_rate": 1.3030288260915433e-05, "loss": 3.0851, "step": 55451 }, { "epoch": 2.72, "grad_norm": 0.7335530519485474, "learning_rate": 1.3025800390514296e-05, "loss": 2.8182, "step": 55452 }, { "epoch": 2.72, "grad_norm": 0.7638733983039856, "learning_rate": 1.302131327594802e-05, "loss": 2.7318, "step": 55453 }, { "epoch": 2.72, "grad_norm": 0.8187944293022156, "learning_rate": 1.3016826917228362e-05, "loss": 3.0425, "step": 55454 }, { "epoch": 2.72, "grad_norm": 0.7817355990409851, "learning_rate": 1.3012341314367147e-05, "loss": 2.9438, "step": 55455 }, { "epoch": 2.72, "grad_norm": 0.7346907258033752, "learning_rate": 1.3007856467376198e-05, "loss": 3.1035, "step": 55456 }, { "epoch": 2.72, "grad_norm": 0.7391301989555359, "learning_rate": 1.3003372376267273e-05, "loss": 3.0594, "step": 55457 }, { "epoch": 2.72, "grad_norm": 0.7475512027740479, "learning_rate": 1.2998889041052263e-05, "loss": 3.0051, "step": 55458 }, { "epoch": 2.72, "grad_norm": 0.7690040469169617, "learning_rate": 1.2994406461742957e-05, "loss": 2.7683, "step": 55459 }, { "epoch": 2.72, "grad_norm": 0.7919315099716187, "learning_rate": 1.298992463835118e-05, "loss": 2.8209, "step": 55460 }, { "epoch": 2.72, "grad_norm": 0.7413168549537659, "learning_rate": 1.2985443570888687e-05, "loss": 2.9064, "step": 55461 }, { "epoch": 2.72, "grad_norm": 0.7446701526641846, "learning_rate": 1.2980963259367238e-05, "loss": 2.9471, "step": 55462 }, { "epoch": 2.72, "grad_norm": 0.7528586983680725, "learning_rate": 1.2976483703798724e-05, "loss": 2.7709, "step": 55463 }, { "epoch": 2.72, "grad_norm": 0.7157964706420898, "learning_rate": 1.2972004904194965e-05, "loss": 3.0465, "step": 55464 }, { "epoch": 2.72, "grad_norm": 0.7323057651519775, "learning_rate": 1.2967526860567623e-05, "loss": 2.8482, "step": 55465 }, { "epoch": 2.72, "grad_norm": 0.7675830721855164, "learning_rate": 1.2963049572928652e-05, "loss": 2.8903, "step": 55466 }, { "epoch": 2.72, "grad_norm": 0.7248068451881409, "learning_rate": 1.2958573041289744e-05, "loss": 2.8582, "step": 55467 }, { "epoch": 2.72, "grad_norm": 0.8281282186508179, "learning_rate": 1.2954097265662689e-05, "loss": 3.0083, "step": 55468 }, { "epoch": 2.72, "grad_norm": 0.7674946784973145, "learning_rate": 1.2949622246059311e-05, "loss": 2.9369, "step": 55469 }, { "epoch": 2.72, "grad_norm": 0.7439644932746887, "learning_rate": 1.2945147982491367e-05, "loss": 2.9113, "step": 55470 }, { "epoch": 2.72, "grad_norm": 0.7129039764404297, "learning_rate": 1.2940674474970713e-05, "loss": 2.8134, "step": 55471 }, { "epoch": 2.72, "grad_norm": 0.8135375380516052, "learning_rate": 1.293620172350901e-05, "loss": 2.8027, "step": 55472 }, { "epoch": 2.72, "grad_norm": 0.735965371131897, "learning_rate": 1.2931729728118112e-05, "loss": 2.8844, "step": 55473 }, { "epoch": 2.72, "grad_norm": 0.7808622717857361, "learning_rate": 1.2927258488809845e-05, "loss": 3.1059, "step": 55474 }, { "epoch": 2.72, "grad_norm": 0.6862757205963135, "learning_rate": 1.2922788005595896e-05, "loss": 2.6411, "step": 55475 }, { "epoch": 2.72, "grad_norm": 0.7670363187789917, "learning_rate": 1.2918318278488093e-05, "loss": 2.93, "step": 55476 }, { "epoch": 2.72, "grad_norm": 0.8030156493186951, "learning_rate": 1.2913849307498158e-05, "loss": 2.9131, "step": 55477 }, { "epoch": 2.72, "grad_norm": 0.7516763806343079, "learning_rate": 1.2909381092637883e-05, "loss": 2.9154, "step": 55478 }, { "epoch": 2.72, "grad_norm": 0.7685103416442871, "learning_rate": 1.290491363391909e-05, "loss": 2.8171, "step": 55479 }, { "epoch": 2.72, "grad_norm": 0.7892995476722717, "learning_rate": 1.290044693135347e-05, "loss": 2.8467, "step": 55480 }, { "epoch": 2.72, "grad_norm": 0.7564141154289246, "learning_rate": 1.2895980984952847e-05, "loss": 2.9793, "step": 55481 }, { "epoch": 2.72, "grad_norm": 0.7547937631607056, "learning_rate": 1.2891515794728979e-05, "loss": 2.723, "step": 55482 }, { "epoch": 2.72, "grad_norm": 0.7510333061218262, "learning_rate": 1.2887051360693524e-05, "loss": 2.8364, "step": 55483 }, { "epoch": 2.72, "grad_norm": 0.738362193107605, "learning_rate": 1.2882587682858403e-05, "loss": 3.0928, "step": 55484 }, { "epoch": 2.72, "grad_norm": 0.7294523119926453, "learning_rate": 1.2878124761235208e-05, "loss": 2.8948, "step": 55485 }, { "epoch": 2.72, "grad_norm": 0.7492228746414185, "learning_rate": 1.2873662595835833e-05, "loss": 2.9309, "step": 55486 }, { "epoch": 2.72, "grad_norm": 0.7402193546295166, "learning_rate": 1.286920118667193e-05, "loss": 2.9529, "step": 55487 }, { "epoch": 2.72, "grad_norm": 0.708809494972229, "learning_rate": 1.2864740533755357e-05, "loss": 2.8814, "step": 55488 }, { "epoch": 2.72, "grad_norm": 0.749814510345459, "learning_rate": 1.2860280637097776e-05, "loss": 3.1164, "step": 55489 }, { "epoch": 2.72, "grad_norm": 0.7216050624847412, "learning_rate": 1.2855821496710905e-05, "loss": 3.0579, "step": 55490 }, { "epoch": 2.72, "grad_norm": 0.7654860019683838, "learning_rate": 1.2851363112606572e-05, "loss": 2.9416, "step": 55491 }, { "epoch": 2.72, "grad_norm": 0.7191634178161621, "learning_rate": 1.2846905484796466e-05, "loss": 2.8358, "step": 55492 }, { "epoch": 2.72, "grad_norm": 0.7747024893760681, "learning_rate": 1.2842448613292378e-05, "loss": 2.883, "step": 55493 }, { "epoch": 2.72, "grad_norm": 0.7576825022697449, "learning_rate": 1.2837992498105998e-05, "loss": 2.858, "step": 55494 }, { "epoch": 2.72, "grad_norm": 0.7463220953941345, "learning_rate": 1.283353713924905e-05, "loss": 2.8142, "step": 55495 }, { "epoch": 2.72, "grad_norm": 0.7193440198898315, "learning_rate": 1.282908253673336e-05, "loss": 2.7727, "step": 55496 }, { "epoch": 2.72, "grad_norm": 0.7219351530075073, "learning_rate": 1.2824628690570582e-05, "loss": 2.8686, "step": 55497 }, { "epoch": 2.72, "grad_norm": 0.7690739035606384, "learning_rate": 1.2820175600772475e-05, "loss": 3.065, "step": 55498 }, { "epoch": 2.72, "grad_norm": 0.7162508964538574, "learning_rate": 1.281572326735073e-05, "loss": 2.8442, "step": 55499 }, { "epoch": 2.72, "grad_norm": 0.7276342511177063, "learning_rate": 1.281127169031707e-05, "loss": 3.0282, "step": 55500 }, { "epoch": 2.72, "grad_norm": 0.7825616002082825, "learning_rate": 1.2806820869683288e-05, "loss": 2.9913, "step": 55501 }, { "epoch": 2.72, "grad_norm": 0.742638111114502, "learning_rate": 1.280237080546107e-05, "loss": 2.76, "step": 55502 }, { "epoch": 2.72, "grad_norm": 0.750740647315979, "learning_rate": 1.2797921497662145e-05, "loss": 3.0382, "step": 55503 }, { "epoch": 2.72, "grad_norm": 0.7699446082115173, "learning_rate": 1.2793472946298233e-05, "loss": 2.8473, "step": 55504 }, { "epoch": 2.72, "grad_norm": 0.7575497627258301, "learning_rate": 1.2789025151380994e-05, "loss": 2.777, "step": 55505 }, { "epoch": 2.72, "grad_norm": 0.9555964469909668, "learning_rate": 1.2784578112922217e-05, "loss": 2.9411, "step": 55506 }, { "epoch": 2.72, "grad_norm": 0.785099983215332, "learning_rate": 1.278013183093356e-05, "loss": 2.6507, "step": 55507 }, { "epoch": 2.72, "grad_norm": 0.7826633453369141, "learning_rate": 1.277568630542678e-05, "loss": 2.8064, "step": 55508 }, { "epoch": 2.72, "grad_norm": 0.745834469795227, "learning_rate": 1.2771241536413534e-05, "loss": 2.7561, "step": 55509 }, { "epoch": 2.72, "grad_norm": 0.7851356267929077, "learning_rate": 1.2766797523905614e-05, "loss": 2.7275, "step": 55510 }, { "epoch": 2.72, "grad_norm": 0.747675895690918, "learning_rate": 1.2762354267914643e-05, "loss": 2.7524, "step": 55511 }, { "epoch": 2.72, "grad_norm": 0.6990408897399902, "learning_rate": 1.2757911768452344e-05, "loss": 2.7738, "step": 55512 }, { "epoch": 2.72, "grad_norm": 0.7718357443809509, "learning_rate": 1.2753470025530444e-05, "loss": 2.9192, "step": 55513 }, { "epoch": 2.72, "grad_norm": 0.7508569359779358, "learning_rate": 1.2749029039160597e-05, "loss": 2.9562, "step": 55514 }, { "epoch": 2.72, "grad_norm": 0.777606189250946, "learning_rate": 1.2744588809354495e-05, "loss": 3.0925, "step": 55515 }, { "epoch": 2.72, "grad_norm": 0.7705445885658264, "learning_rate": 1.2740149336123928e-05, "loss": 2.9461, "step": 55516 }, { "epoch": 2.72, "grad_norm": 0.7426809072494507, "learning_rate": 1.2735710619480488e-05, "loss": 2.9199, "step": 55517 }, { "epoch": 2.72, "grad_norm": 0.7444571852684021, "learning_rate": 1.2731272659435932e-05, "loss": 2.8854, "step": 55518 }, { "epoch": 2.72, "grad_norm": 0.7581663131713867, "learning_rate": 1.2726835456001916e-05, "loss": 2.9637, "step": 55519 }, { "epoch": 2.72, "grad_norm": 0.77213054895401, "learning_rate": 1.2722399009190098e-05, "loss": 2.7211, "step": 55520 }, { "epoch": 2.72, "grad_norm": 0.7387886643409729, "learning_rate": 1.2717963319012203e-05, "loss": 2.9042, "step": 55521 }, { "epoch": 2.72, "grad_norm": 0.7408025860786438, "learning_rate": 1.2713528385479888e-05, "loss": 2.9561, "step": 55522 }, { "epoch": 2.72, "grad_norm": 0.756806492805481, "learning_rate": 1.2709094208604908e-05, "loss": 2.6417, "step": 55523 }, { "epoch": 2.72, "grad_norm": 0.750133752822876, "learning_rate": 1.2704660788398824e-05, "loss": 3.0107, "step": 55524 }, { "epoch": 2.72, "grad_norm": 0.7915104627609253, "learning_rate": 1.270022812487339e-05, "loss": 2.6029, "step": 55525 }, { "epoch": 2.72, "grad_norm": 0.7304241061210632, "learning_rate": 1.26957962180403e-05, "loss": 2.8826, "step": 55526 }, { "epoch": 2.72, "grad_norm": 0.8078506588935852, "learning_rate": 1.269136506791114e-05, "loss": 2.917, "step": 55527 }, { "epoch": 2.72, "grad_norm": 0.7720636129379272, "learning_rate": 1.268693467449764e-05, "loss": 2.9377, "step": 55528 }, { "epoch": 2.72, "grad_norm": 0.7359655499458313, "learning_rate": 1.2682505037811452e-05, "loss": 2.8316, "step": 55529 }, { "epoch": 2.72, "grad_norm": 0.7408030033111572, "learning_rate": 1.2678076157864237e-05, "loss": 3.0292, "step": 55530 }, { "epoch": 2.72, "grad_norm": 0.7604274749755859, "learning_rate": 1.2673648034667749e-05, "loss": 2.8554, "step": 55531 }, { "epoch": 2.72, "grad_norm": 0.7211507558822632, "learning_rate": 1.2669220668233481e-05, "loss": 2.9626, "step": 55532 }, { "epoch": 2.72, "grad_norm": 0.7372564077377319, "learning_rate": 1.266479405857329e-05, "loss": 3.081, "step": 55533 }, { "epoch": 2.72, "grad_norm": 0.7603108882904053, "learning_rate": 1.2660368205698667e-05, "loss": 2.7713, "step": 55534 }, { "epoch": 2.72, "grad_norm": 0.7562865018844604, "learning_rate": 1.2655943109621337e-05, "loss": 2.7846, "step": 55535 }, { "epoch": 2.72, "grad_norm": 0.7241320013999939, "learning_rate": 1.2651518770352954e-05, "loss": 2.8448, "step": 55536 }, { "epoch": 2.72, "grad_norm": 0.7706020474433899, "learning_rate": 1.2647095187905177e-05, "loss": 3.0305, "step": 55537 }, { "epoch": 2.72, "grad_norm": 0.7894367575645447, "learning_rate": 1.2642672362289663e-05, "loss": 2.6124, "step": 55538 }, { "epoch": 2.72, "grad_norm": 0.7493867874145508, "learning_rate": 1.2638250293518004e-05, "loss": 2.8271, "step": 55539 }, { "epoch": 2.72, "grad_norm": 0.781067967414856, "learning_rate": 1.2633828981601923e-05, "loss": 3.0305, "step": 55540 }, { "epoch": 2.72, "grad_norm": 0.7739070653915405, "learning_rate": 1.2629408426553044e-05, "loss": 2.9046, "step": 55541 }, { "epoch": 2.72, "grad_norm": 0.7560089230537415, "learning_rate": 1.262498862838296e-05, "loss": 2.9307, "step": 55542 }, { "epoch": 2.72, "grad_norm": 0.7880851030349731, "learning_rate": 1.262056958710339e-05, "loss": 2.7827, "step": 55543 }, { "epoch": 2.72, "grad_norm": 0.7638664841651917, "learning_rate": 1.2616151302725896e-05, "loss": 2.9314, "step": 55544 }, { "epoch": 2.72, "grad_norm": 0.7615971565246582, "learning_rate": 1.2611733775262133e-05, "loss": 2.8241, "step": 55545 }, { "epoch": 2.72, "grad_norm": 0.7791383266448975, "learning_rate": 1.2607317004723827e-05, "loss": 2.9721, "step": 55546 }, { "epoch": 2.72, "grad_norm": 0.7734250426292419, "learning_rate": 1.26029009911225e-05, "loss": 3.0511, "step": 55547 }, { "epoch": 2.72, "grad_norm": 0.7525005340576172, "learning_rate": 1.2598485734469843e-05, "loss": 3.016, "step": 55548 }, { "epoch": 2.72, "grad_norm": 0.7330466508865356, "learning_rate": 1.2594071234777413e-05, "loss": 2.9917, "step": 55549 }, { "epoch": 2.72, "grad_norm": 0.7011294364929199, "learning_rate": 1.2589657492056904e-05, "loss": 2.9246, "step": 55550 }, { "epoch": 2.72, "grad_norm": 0.7659233212471008, "learning_rate": 1.2585244506319936e-05, "loss": 2.9226, "step": 55551 }, { "epoch": 2.72, "grad_norm": 0.7099422812461853, "learning_rate": 1.2580832277578102e-05, "loss": 2.7514, "step": 55552 }, { "epoch": 2.72, "grad_norm": 0.7204059958457947, "learning_rate": 1.257642080584309e-05, "loss": 2.8493, "step": 55553 }, { "epoch": 2.72, "grad_norm": 0.719089150428772, "learning_rate": 1.2572010091126395e-05, "loss": 2.8074, "step": 55554 }, { "epoch": 2.72, "grad_norm": 0.7707274556159973, "learning_rate": 1.256760013343977e-05, "loss": 2.8689, "step": 55555 }, { "epoch": 2.72, "grad_norm": 0.7827194333076477, "learning_rate": 1.2563190932794775e-05, "loss": 2.8556, "step": 55556 }, { "epoch": 2.72, "grad_norm": 0.724220335483551, "learning_rate": 1.2558782489202967e-05, "loss": 2.8183, "step": 55557 }, { "epoch": 2.72, "grad_norm": 0.7621506452560425, "learning_rate": 1.255437480267607e-05, "loss": 2.8624, "step": 55558 }, { "epoch": 2.72, "grad_norm": 0.7910413146018982, "learning_rate": 1.2549967873225542e-05, "loss": 2.9232, "step": 55559 }, { "epoch": 2.72, "grad_norm": 0.7383802533149719, "learning_rate": 1.2545561700863105e-05, "loss": 2.8597, "step": 55560 }, { "epoch": 2.72, "grad_norm": 0.7192094326019287, "learning_rate": 1.2541156285600385e-05, "loss": 2.9147, "step": 55561 }, { "epoch": 2.72, "grad_norm": 0.7950126528739929, "learning_rate": 1.2536751627448938e-05, "loss": 2.9614, "step": 55562 }, { "epoch": 2.72, "grad_norm": 0.7479918599128723, "learning_rate": 1.2532347726420355e-05, "loss": 3.1158, "step": 55563 }, { "epoch": 2.72, "grad_norm": 0.7508048415184021, "learning_rate": 1.2527944582526196e-05, "loss": 3.1672, "step": 55564 }, { "epoch": 2.72, "grad_norm": 0.7449748516082764, "learning_rate": 1.2523542195778147e-05, "loss": 2.9723, "step": 55565 }, { "epoch": 2.72, "grad_norm": 0.731986939907074, "learning_rate": 1.2519140566187735e-05, "loss": 3.0262, "step": 55566 }, { "epoch": 2.72, "grad_norm": 0.7509217262268066, "learning_rate": 1.2514739693766585e-05, "loss": 3.002, "step": 55567 }, { "epoch": 2.72, "grad_norm": 0.8176628947257996, "learning_rate": 1.2510339578526286e-05, "loss": 2.9815, "step": 55568 }, { "epoch": 2.72, "grad_norm": 0.7532127499580383, "learning_rate": 1.250594022047846e-05, "loss": 2.8316, "step": 55569 }, { "epoch": 2.72, "grad_norm": 0.7925772070884705, "learning_rate": 1.2501541619634636e-05, "loss": 2.817, "step": 55570 }, { "epoch": 2.72, "grad_norm": 0.7416542768478394, "learning_rate": 1.2497143776006402e-05, "loss": 3.0308, "step": 55571 }, { "epoch": 2.72, "grad_norm": 0.7497258186340332, "learning_rate": 1.2492746689605349e-05, "loss": 2.9368, "step": 55572 }, { "epoch": 2.72, "grad_norm": 0.7755299210548401, "learning_rate": 1.24883503604431e-05, "loss": 2.9622, "step": 55573 }, { "epoch": 2.72, "grad_norm": 0.772011399269104, "learning_rate": 1.248395478853118e-05, "loss": 2.8229, "step": 55574 }, { "epoch": 2.72, "grad_norm": 0.7620413899421692, "learning_rate": 1.2479559973881248e-05, "loss": 2.9222, "step": 55575 }, { "epoch": 2.72, "grad_norm": 0.7677061557769775, "learning_rate": 1.2475165916504726e-05, "loss": 3.0159, "step": 55576 }, { "epoch": 2.72, "grad_norm": 0.72245854139328, "learning_rate": 1.2470772616413371e-05, "loss": 2.8573, "step": 55577 }, { "epoch": 2.72, "grad_norm": 0.7207547426223755, "learning_rate": 1.2466380073618642e-05, "loss": 2.7114, "step": 55578 }, { "epoch": 2.72, "grad_norm": 0.7339671850204468, "learning_rate": 1.2461988288132097e-05, "loss": 2.9585, "step": 55579 }, { "epoch": 2.72, "grad_norm": 0.7937641143798828, "learning_rate": 1.2457597259965358e-05, "loss": 2.8606, "step": 55580 }, { "epoch": 2.72, "grad_norm": 0.7468560338020325, "learning_rate": 1.2453206989129949e-05, "loss": 2.8659, "step": 55581 }, { "epoch": 2.72, "grad_norm": 0.7348960638046265, "learning_rate": 1.2448817475637462e-05, "loss": 2.9035, "step": 55582 }, { "epoch": 2.72, "grad_norm": 0.7143893241882324, "learning_rate": 1.2444428719499488e-05, "loss": 2.892, "step": 55583 }, { "epoch": 2.72, "grad_norm": 0.787746012210846, "learning_rate": 1.2440040720727518e-05, "loss": 2.8582, "step": 55584 }, { "epoch": 2.72, "grad_norm": 0.7754120826721191, "learning_rate": 1.2435653479333173e-05, "loss": 2.696, "step": 55585 }, { "epoch": 2.72, "grad_norm": 0.7287518382072449, "learning_rate": 1.2431266995327916e-05, "loss": 2.9811, "step": 55586 }, { "epoch": 2.72, "grad_norm": 0.7545494437217712, "learning_rate": 1.2426881268723365e-05, "loss": 2.7484, "step": 55587 }, { "epoch": 2.72, "grad_norm": 0.7642377614974976, "learning_rate": 1.2422496299531081e-05, "loss": 2.8666, "step": 55588 }, { "epoch": 2.72, "grad_norm": 0.7176851034164429, "learning_rate": 1.2418112087762588e-05, "loss": 2.8306, "step": 55589 }, { "epoch": 2.72, "grad_norm": 0.7618530988693237, "learning_rate": 1.2413728633429476e-05, "loss": 2.8173, "step": 55590 }, { "epoch": 2.72, "grad_norm": 0.7074477076530457, "learning_rate": 1.2409345936543235e-05, "loss": 2.847, "step": 55591 }, { "epoch": 2.72, "grad_norm": 0.7396611571311951, "learning_rate": 1.2404963997115391e-05, "loss": 2.8979, "step": 55592 }, { "epoch": 2.72, "grad_norm": 0.7340872287750244, "learning_rate": 1.24005828151576e-05, "loss": 2.9145, "step": 55593 }, { "epoch": 2.72, "grad_norm": 0.7579055428504944, "learning_rate": 1.2396202390681254e-05, "loss": 2.8307, "step": 55594 }, { "epoch": 2.72, "grad_norm": 0.7400532364845276, "learning_rate": 1.2391822723698009e-05, "loss": 2.8784, "step": 55595 }, { "epoch": 2.72, "grad_norm": 0.7686823606491089, "learning_rate": 1.238744381421929e-05, "loss": 2.8043, "step": 55596 }, { "epoch": 2.72, "grad_norm": 0.7867351770401001, "learning_rate": 1.2383065662256719e-05, "loss": 2.8821, "step": 55597 }, { "epoch": 2.72, "grad_norm": 0.7557711005210876, "learning_rate": 1.2378688267821824e-05, "loss": 2.6193, "step": 55598 }, { "epoch": 2.72, "grad_norm": 0.7336000204086304, "learning_rate": 1.2374311630926126e-05, "loss": 2.7775, "step": 55599 }, { "epoch": 2.72, "grad_norm": 0.8032825589179993, "learning_rate": 1.2369935751581117e-05, "loss": 3.0363, "step": 55600 }, { "epoch": 2.72, "grad_norm": 0.7513951063156128, "learning_rate": 1.2365560629798322e-05, "loss": 2.9107, "step": 55601 }, { "epoch": 2.72, "grad_norm": 0.7409695386886597, "learning_rate": 1.2361186265589262e-05, "loss": 2.9644, "step": 55602 }, { "epoch": 2.72, "grad_norm": 0.7681583166122437, "learning_rate": 1.2356812658965532e-05, "loss": 2.9018, "step": 55603 }, { "epoch": 2.73, "grad_norm": 0.745311439037323, "learning_rate": 1.2352439809938585e-05, "loss": 2.8465, "step": 55604 }, { "epoch": 2.73, "grad_norm": 0.7664110660552979, "learning_rate": 1.2348067718519949e-05, "loss": 3.0799, "step": 55605 }, { "epoch": 2.73, "grad_norm": 0.7889949679374695, "learning_rate": 1.234369638472118e-05, "loss": 2.9645, "step": 55606 }, { "epoch": 2.73, "grad_norm": 0.7036034464836121, "learning_rate": 1.233932580855367e-05, "loss": 2.9552, "step": 55607 }, { "epoch": 2.73, "grad_norm": 0.726105272769928, "learning_rate": 1.2334955990029105e-05, "loss": 3.079, "step": 55608 }, { "epoch": 2.73, "grad_norm": 0.741583526134491, "learning_rate": 1.2330586929158814e-05, "loss": 2.8413, "step": 55609 }, { "epoch": 2.73, "grad_norm": 0.7955318689346313, "learning_rate": 1.2326218625954454e-05, "loss": 2.9633, "step": 55610 }, { "epoch": 2.73, "grad_norm": 0.874595046043396, "learning_rate": 1.2321851080427448e-05, "loss": 2.8424, "step": 55611 }, { "epoch": 2.73, "grad_norm": 0.7419061064720154, "learning_rate": 1.2317484292589352e-05, "loss": 2.7988, "step": 55612 }, { "epoch": 2.73, "grad_norm": 0.8153803944587708, "learning_rate": 1.2313118262451627e-05, "loss": 2.7867, "step": 55613 }, { "epoch": 2.73, "grad_norm": 0.7266865968704224, "learning_rate": 1.2308752990025761e-05, "loss": 2.5696, "step": 55614 }, { "epoch": 2.73, "grad_norm": 0.7737489938735962, "learning_rate": 1.230438847532328e-05, "loss": 3.1006, "step": 55615 }, { "epoch": 2.73, "grad_norm": 0.762378990650177, "learning_rate": 1.2300024718355673e-05, "loss": 3.0235, "step": 55616 }, { "epoch": 2.73, "grad_norm": 0.710425078868866, "learning_rate": 1.2295661719134397e-05, "loss": 2.9223, "step": 55617 }, { "epoch": 2.73, "grad_norm": 0.7368878722190857, "learning_rate": 1.2291299477671047e-05, "loss": 2.8503, "step": 55618 }, { "epoch": 2.73, "grad_norm": 0.7075875997543335, "learning_rate": 1.2286937993976976e-05, "loss": 2.8905, "step": 55619 }, { "epoch": 2.73, "grad_norm": 0.7213077545166016, "learning_rate": 1.2282577268063777e-05, "loss": 2.8651, "step": 55620 }, { "epoch": 2.73, "grad_norm": 0.7512006163597107, "learning_rate": 1.2278217299942939e-05, "loss": 2.6974, "step": 55621 }, { "epoch": 2.73, "grad_norm": 0.7457561492919922, "learning_rate": 1.2273858089625821e-05, "loss": 3.0884, "step": 55622 }, { "epoch": 2.73, "grad_norm": 0.8021249771118164, "learning_rate": 1.2269499637124047e-05, "loss": 2.9176, "step": 55623 }, { "epoch": 2.73, "grad_norm": 0.7707574963569641, "learning_rate": 1.2265141942448975e-05, "loss": 2.8524, "step": 55624 }, { "epoch": 2.73, "grad_norm": 0.7147628664970398, "learning_rate": 1.2260785005612195e-05, "loss": 2.8479, "step": 55625 }, { "epoch": 2.73, "grad_norm": 0.7359575629234314, "learning_rate": 1.2256428826625098e-05, "loss": 2.7975, "step": 55626 }, { "epoch": 2.73, "grad_norm": 0.7357754707336426, "learning_rate": 1.2252073405499241e-05, "loss": 2.9908, "step": 55627 }, { "epoch": 2.73, "grad_norm": 0.8136209845542908, "learning_rate": 1.2247718742246016e-05, "loss": 2.988, "step": 55628 }, { "epoch": 2.73, "grad_norm": 0.7402750253677368, "learning_rate": 1.2243364836876912e-05, "loss": 2.8614, "step": 55629 }, { "epoch": 2.73, "grad_norm": 0.7957865595817566, "learning_rate": 1.2239011689403422e-05, "loss": 2.7521, "step": 55630 }, { "epoch": 2.73, "grad_norm": 0.7921202182769775, "learning_rate": 1.2234659299836969e-05, "loss": 2.8699, "step": 55631 }, { "epoch": 2.73, "grad_norm": 0.7857522964477539, "learning_rate": 1.223030766818901e-05, "loss": 2.8791, "step": 55632 }, { "epoch": 2.73, "grad_norm": 0.7415589094161987, "learning_rate": 1.2225956794471103e-05, "loss": 2.8202, "step": 55633 }, { "epoch": 2.73, "grad_norm": 0.7329033017158508, "learning_rate": 1.222160667869464e-05, "loss": 2.8956, "step": 55634 }, { "epoch": 2.73, "grad_norm": 0.7467180490493774, "learning_rate": 1.2217257320871076e-05, "loss": 2.7925, "step": 55635 }, { "epoch": 2.73, "grad_norm": 0.841463029384613, "learning_rate": 1.2212908721011837e-05, "loss": 2.9325, "step": 55636 }, { "epoch": 2.73, "grad_norm": 0.7936882376670837, "learning_rate": 1.2208560879128416e-05, "loss": 2.9749, "step": 55637 }, { "epoch": 2.73, "grad_norm": 0.7346864938735962, "learning_rate": 1.2204213795232298e-05, "loss": 2.9747, "step": 55638 }, { "epoch": 2.73, "grad_norm": 0.7802562713623047, "learning_rate": 1.2199867469334845e-05, "loss": 2.9534, "step": 55639 }, { "epoch": 2.73, "grad_norm": 0.7328370213508606, "learning_rate": 1.2195521901447614e-05, "loss": 2.9276, "step": 55640 }, { "epoch": 2.73, "grad_norm": 0.7416670918464661, "learning_rate": 1.2191177091581927e-05, "loss": 2.9684, "step": 55641 }, { "epoch": 2.73, "grad_norm": 0.7478400468826294, "learning_rate": 1.2186833039749344e-05, "loss": 3.1439, "step": 55642 }, { "epoch": 2.73, "grad_norm": 0.7259265780448914, "learning_rate": 1.218248974596122e-05, "loss": 2.9068, "step": 55643 }, { "epoch": 2.73, "grad_norm": 0.7286504507064819, "learning_rate": 1.2178147210229017e-05, "loss": 2.7912, "step": 55644 }, { "epoch": 2.73, "grad_norm": 0.804137647151947, "learning_rate": 1.2173805432564221e-05, "loss": 3.0854, "step": 55645 }, { "epoch": 2.73, "grad_norm": 0.7600886821746826, "learning_rate": 1.2169464412978192e-05, "loss": 3.1028, "step": 55646 }, { "epoch": 2.73, "grad_norm": 0.7712217569351196, "learning_rate": 1.2165124151482419e-05, "loss": 3.0731, "step": 55647 }, { "epoch": 2.73, "grad_norm": 0.7367176413536072, "learning_rate": 1.2160784648088295e-05, "loss": 2.9482, "step": 55648 }, { "epoch": 2.73, "grad_norm": 0.7509863376617432, "learning_rate": 1.2156445902807278e-05, "loss": 2.8918, "step": 55649 }, { "epoch": 2.73, "grad_norm": 0.7302809357643127, "learning_rate": 1.2152107915650821e-05, "loss": 3.0575, "step": 55650 }, { "epoch": 2.73, "grad_norm": 0.7326139211654663, "learning_rate": 1.2147770686630254e-05, "loss": 2.9015, "step": 55651 }, { "epoch": 2.73, "grad_norm": 0.722706139087677, "learning_rate": 1.2143434215757097e-05, "loss": 2.8909, "step": 55652 }, { "epoch": 2.73, "grad_norm": 0.7505283355712891, "learning_rate": 1.213909850304271e-05, "loss": 2.7945, "step": 55653 }, { "epoch": 2.73, "grad_norm": 0.7195035815238953, "learning_rate": 1.213476354849855e-05, "loss": 2.7821, "step": 55654 }, { "epoch": 2.73, "grad_norm": 0.7279053330421448, "learning_rate": 1.2130429352136039e-05, "loss": 2.8279, "step": 55655 }, { "epoch": 2.73, "grad_norm": 0.7183561325073242, "learning_rate": 1.2126095913966538e-05, "loss": 2.8743, "step": 55656 }, { "epoch": 2.73, "grad_norm": 0.7786692976951599, "learning_rate": 1.2121763234001536e-05, "loss": 2.9192, "step": 55657 }, { "epoch": 2.73, "grad_norm": 0.7574261426925659, "learning_rate": 1.211743131225239e-05, "loss": 2.8047, "step": 55658 }, { "epoch": 2.73, "grad_norm": 0.7224125862121582, "learning_rate": 1.2113100148730526e-05, "loss": 3.0287, "step": 55659 }, { "epoch": 2.73, "grad_norm": 0.7393490076065063, "learning_rate": 1.2108769743447366e-05, "loss": 2.8423, "step": 55660 }, { "epoch": 2.73, "grad_norm": 0.746552586555481, "learning_rate": 1.2104440096414269e-05, "loss": 2.7179, "step": 55661 }, { "epoch": 2.73, "grad_norm": 0.7337487936019897, "learning_rate": 1.2100111207642693e-05, "loss": 2.864, "step": 55662 }, { "epoch": 2.73, "grad_norm": 0.7381230592727661, "learning_rate": 1.2095783077143994e-05, "loss": 2.9342, "step": 55663 }, { "epoch": 2.73, "grad_norm": 0.7539899945259094, "learning_rate": 1.2091455704929631e-05, "loss": 2.8978, "step": 55664 }, { "epoch": 2.73, "grad_norm": 0.8014904856681824, "learning_rate": 1.208712909101096e-05, "loss": 2.7139, "step": 55665 }, { "epoch": 2.73, "grad_norm": 0.7007734775543213, "learning_rate": 1.2082803235399374e-05, "loss": 2.9909, "step": 55666 }, { "epoch": 2.73, "grad_norm": 0.7687050700187683, "learning_rate": 1.2078478138106262e-05, "loss": 2.9276, "step": 55667 }, { "epoch": 2.73, "grad_norm": 0.7429559230804443, "learning_rate": 1.2074153799143016e-05, "loss": 2.8966, "step": 55668 }, { "epoch": 2.73, "grad_norm": 0.7226108312606812, "learning_rate": 1.2069830218521025e-05, "loss": 2.9716, "step": 55669 }, { "epoch": 2.73, "grad_norm": 0.7323675751686096, "learning_rate": 1.206550739625175e-05, "loss": 2.9101, "step": 55670 }, { "epoch": 2.73, "grad_norm": 0.7690631151199341, "learning_rate": 1.2061185332346478e-05, "loss": 2.8661, "step": 55671 }, { "epoch": 2.73, "grad_norm": 0.8044900894165039, "learning_rate": 1.2056864026816671e-05, "loss": 2.8463, "step": 55672 }, { "epoch": 2.73, "grad_norm": 0.7190569043159485, "learning_rate": 1.2052543479673582e-05, "loss": 3.0036, "step": 55673 }, { "epoch": 2.73, "grad_norm": 0.7784371376037598, "learning_rate": 1.2048223690928704e-05, "loss": 2.9049, "step": 55674 }, { "epoch": 2.73, "grad_norm": 0.7170578241348267, "learning_rate": 1.2043904660593429e-05, "loss": 2.8486, "step": 55675 }, { "epoch": 2.73, "grad_norm": 0.7676547765731812, "learning_rate": 1.2039586388679046e-05, "loss": 2.9257, "step": 55676 }, { "epoch": 2.73, "grad_norm": 0.7858740091323853, "learning_rate": 1.2035268875197014e-05, "loss": 2.7855, "step": 55677 }, { "epoch": 2.73, "grad_norm": 0.7509922981262207, "learning_rate": 1.2030952120158621e-05, "loss": 3.0946, "step": 55678 }, { "epoch": 2.73, "grad_norm": 0.7644749283790588, "learning_rate": 1.2026636123575329e-05, "loss": 2.892, "step": 55679 }, { "epoch": 2.73, "grad_norm": 0.7936258912086487, "learning_rate": 1.2022320885458458e-05, "loss": 2.7818, "step": 55680 }, { "epoch": 2.73, "grad_norm": 0.7399991750717163, "learning_rate": 1.2018006405819335e-05, "loss": 3.1416, "step": 55681 }, { "epoch": 2.73, "grad_norm": 0.7325205206871033, "learning_rate": 1.2013692684669386e-05, "loss": 2.7667, "step": 55682 }, { "epoch": 2.73, "grad_norm": 0.7739435434341431, "learning_rate": 1.2009379722019896e-05, "loss": 2.9389, "step": 55683 }, { "epoch": 2.73, "grad_norm": 0.769698441028595, "learning_rate": 1.2005067517882294e-05, "loss": 2.8377, "step": 55684 }, { "epoch": 2.73, "grad_norm": 0.752373456954956, "learning_rate": 1.2000756072267936e-05, "loss": 2.7622, "step": 55685 }, { "epoch": 2.73, "grad_norm": 0.7731847763061523, "learning_rate": 1.1996445385188181e-05, "loss": 3.0762, "step": 55686 }, { "epoch": 2.73, "grad_norm": 0.7705298662185669, "learning_rate": 1.199213545665435e-05, "loss": 2.8453, "step": 55687 }, { "epoch": 2.73, "grad_norm": 0.7608867287635803, "learning_rate": 1.1987826286677771e-05, "loss": 3.0201, "step": 55688 }, { "epoch": 2.73, "grad_norm": 0.7582928538322449, "learning_rate": 1.1983517875269832e-05, "loss": 2.9205, "step": 55689 }, { "epoch": 2.73, "grad_norm": 0.7286206483840942, "learning_rate": 1.1979210222441925e-05, "loss": 2.9747, "step": 55690 }, { "epoch": 2.73, "grad_norm": 0.7775378227233887, "learning_rate": 1.1974903328205276e-05, "loss": 2.792, "step": 55691 }, { "epoch": 2.73, "grad_norm": 0.7497450709342957, "learning_rate": 1.1970597192571374e-05, "loss": 2.9682, "step": 55692 }, { "epoch": 2.73, "grad_norm": 0.7265482544898987, "learning_rate": 1.1966291815551477e-05, "loss": 2.9189, "step": 55693 }, { "epoch": 2.73, "grad_norm": 0.7717911601066589, "learning_rate": 1.1961987197156876e-05, "loss": 3.1288, "step": 55694 }, { "epoch": 2.73, "grad_norm": 0.803691565990448, "learning_rate": 1.1957683337399026e-05, "loss": 2.978, "step": 55695 }, { "epoch": 2.73, "grad_norm": 0.7337557673454285, "learning_rate": 1.1953380236289156e-05, "loss": 2.7589, "step": 55696 }, { "epoch": 2.73, "grad_norm": 0.7882424592971802, "learning_rate": 1.1949077893838688e-05, "loss": 2.9169, "step": 55697 }, { "epoch": 2.73, "grad_norm": 0.7700529098510742, "learning_rate": 1.194477631005888e-05, "loss": 2.88, "step": 55698 }, { "epoch": 2.73, "grad_norm": 0.7207964658737183, "learning_rate": 1.1940475484961087e-05, "loss": 2.7995, "step": 55699 }, { "epoch": 2.73, "grad_norm": 0.7262753844261169, "learning_rate": 1.193617541855667e-05, "loss": 2.9706, "step": 55700 }, { "epoch": 2.73, "grad_norm": 0.7570127844810486, "learning_rate": 1.1931876110856953e-05, "loss": 2.9817, "step": 55701 }, { "epoch": 2.73, "grad_norm": 0.7883241176605225, "learning_rate": 1.1927577561873225e-05, "loss": 2.7953, "step": 55702 }, { "epoch": 2.73, "grad_norm": 0.773369550704956, "learning_rate": 1.1923279771616778e-05, "loss": 2.8493, "step": 55703 }, { "epoch": 2.73, "grad_norm": 0.7068461179733276, "learning_rate": 1.1918982740098971e-05, "loss": 3.0174, "step": 55704 }, { "epoch": 2.73, "grad_norm": 0.7062078714370728, "learning_rate": 1.1914686467331158e-05, "loss": 2.6588, "step": 55705 }, { "epoch": 2.73, "grad_norm": 0.7688685059547424, "learning_rate": 1.1910390953324567e-05, "loss": 2.9203, "step": 55706 }, { "epoch": 2.73, "grad_norm": 0.7777459621429443, "learning_rate": 1.190609619809062e-05, "loss": 2.9701, "step": 55707 }, { "epoch": 2.73, "grad_norm": 0.7545894384384155, "learning_rate": 1.1901802201640542e-05, "loss": 2.8712, "step": 55708 }, { "epoch": 2.73, "grad_norm": 0.7288576364517212, "learning_rate": 1.1897508963985659e-05, "loss": 2.7988, "step": 55709 }, { "epoch": 2.73, "grad_norm": 0.806057333946228, "learning_rate": 1.1893216485137325e-05, "loss": 2.8109, "step": 55710 }, { "epoch": 2.73, "grad_norm": 0.7674176692962646, "learning_rate": 1.18889247651068e-05, "loss": 2.8256, "step": 55711 }, { "epoch": 2.73, "grad_norm": 0.7585336565971375, "learning_rate": 1.1884633803905408e-05, "loss": 2.8867, "step": 55712 }, { "epoch": 2.73, "grad_norm": 0.7617801427841187, "learning_rate": 1.1880343601544406e-05, "loss": 2.9724, "step": 55713 }, { "epoch": 2.73, "grad_norm": 0.814563512802124, "learning_rate": 1.1876054158035152e-05, "loss": 2.8553, "step": 55714 }, { "epoch": 2.73, "grad_norm": 0.7293022871017456, "learning_rate": 1.187176547338894e-05, "loss": 2.9574, "step": 55715 }, { "epoch": 2.73, "grad_norm": 0.7652244567871094, "learning_rate": 1.1867477547616988e-05, "loss": 2.9344, "step": 55716 }, { "epoch": 2.73, "grad_norm": 0.7348045706748962, "learning_rate": 1.1863190380730725e-05, "loss": 3.0233, "step": 55717 }, { "epoch": 2.73, "grad_norm": 0.6973322033882141, "learning_rate": 1.1858903972741308e-05, "loss": 2.9189, "step": 55718 }, { "epoch": 2.73, "grad_norm": 0.7177212238311768, "learning_rate": 1.1854618323660092e-05, "loss": 2.5284, "step": 55719 }, { "epoch": 2.73, "grad_norm": 0.7871966361999512, "learning_rate": 1.1850333433498372e-05, "loss": 3.0959, "step": 55720 }, { "epoch": 2.73, "grad_norm": 0.7772104144096375, "learning_rate": 1.1846049302267369e-05, "loss": 2.8529, "step": 55721 }, { "epoch": 2.73, "grad_norm": 0.7406902313232422, "learning_rate": 1.1841765929978476e-05, "loss": 2.917, "step": 55722 }, { "epoch": 2.73, "grad_norm": 0.7036511301994324, "learning_rate": 1.1837483316642915e-05, "loss": 3.015, "step": 55723 }, { "epoch": 2.73, "grad_norm": 0.7832501530647278, "learning_rate": 1.183320146227198e-05, "loss": 2.8413, "step": 55724 }, { "epoch": 2.73, "grad_norm": 0.7509165406227112, "learning_rate": 1.182892036687686e-05, "loss": 2.9039, "step": 55725 }, { "epoch": 2.73, "grad_norm": 0.7857611179351807, "learning_rate": 1.1824640030468945e-05, "loss": 3.0673, "step": 55726 }, { "epoch": 2.73, "grad_norm": 0.7503923177719116, "learning_rate": 1.1820360453059463e-05, "loss": 2.8432, "step": 55727 }, { "epoch": 2.73, "grad_norm": 0.7694743275642395, "learning_rate": 1.1816081634659668e-05, "loss": 2.9747, "step": 55728 }, { "epoch": 2.73, "grad_norm": 0.7916954755783081, "learning_rate": 1.1811803575280921e-05, "loss": 2.7779, "step": 55729 }, { "epoch": 2.73, "grad_norm": 0.8194757699966431, "learning_rate": 1.1807526274934375e-05, "loss": 2.7108, "step": 55730 }, { "epoch": 2.73, "grad_norm": 0.7391253113746643, "learning_rate": 1.1803249733631326e-05, "loss": 3.1041, "step": 55731 }, { "epoch": 2.73, "grad_norm": 0.7366408705711365, "learning_rate": 1.1798973951383094e-05, "loss": 2.9705, "step": 55732 }, { "epoch": 2.73, "grad_norm": 0.7290082573890686, "learning_rate": 1.179469892820084e-05, "loss": 2.6135, "step": 55733 }, { "epoch": 2.73, "grad_norm": 0.7617344856262207, "learning_rate": 1.1790424664095954e-05, "loss": 3.099, "step": 55734 }, { "epoch": 2.73, "grad_norm": 0.7532030940055847, "learning_rate": 1.178615115907956e-05, "loss": 2.7876, "step": 55735 }, { "epoch": 2.73, "grad_norm": 0.7590699195861816, "learning_rate": 1.1781878413163015e-05, "loss": 2.9633, "step": 55736 }, { "epoch": 2.73, "grad_norm": 0.7126039862632751, "learning_rate": 1.1777606426357544e-05, "loss": 2.9342, "step": 55737 }, { "epoch": 2.73, "grad_norm": 0.7936747074127197, "learning_rate": 1.1773335198674338e-05, "loss": 2.7536, "step": 55738 }, { "epoch": 2.73, "grad_norm": 0.7752701044082642, "learning_rate": 1.1769064730124756e-05, "loss": 3.0024, "step": 55739 }, { "epoch": 2.73, "grad_norm": 0.7142334580421448, "learning_rate": 1.176479502071992e-05, "loss": 3.15, "step": 55740 }, { "epoch": 2.73, "grad_norm": 0.7375084757804871, "learning_rate": 1.1760526070471154e-05, "loss": 2.8894, "step": 55741 }, { "epoch": 2.73, "grad_norm": 0.7870582342147827, "learning_rate": 1.1756257879389752e-05, "loss": 2.8652, "step": 55742 }, { "epoch": 2.73, "grad_norm": 0.7322790026664734, "learning_rate": 1.1751990447486836e-05, "loss": 3.0524, "step": 55743 }, { "epoch": 2.73, "grad_norm": 0.7427972555160522, "learning_rate": 1.1747723774773733e-05, "loss": 2.6524, "step": 55744 }, { "epoch": 2.73, "grad_norm": 0.766864538192749, "learning_rate": 1.1743457861261662e-05, "loss": 2.7924, "step": 55745 }, { "epoch": 2.73, "grad_norm": 0.729448139667511, "learning_rate": 1.1739192706961786e-05, "loss": 2.9511, "step": 55746 }, { "epoch": 2.73, "grad_norm": 0.7730950117111206, "learning_rate": 1.1734928311885461e-05, "loss": 2.9649, "step": 55747 }, { "epoch": 2.73, "grad_norm": 0.7573014497756958, "learning_rate": 1.173066467604381e-05, "loss": 2.9444, "step": 55748 }, { "epoch": 2.73, "grad_norm": 0.768448531627655, "learning_rate": 1.172640179944816e-05, "loss": 2.8058, "step": 55749 }, { "epoch": 2.73, "grad_norm": 0.7829905152320862, "learning_rate": 1.1722139682109633e-05, "loss": 2.6185, "step": 55750 }, { "epoch": 2.73, "grad_norm": 0.7292187809944153, "learning_rate": 1.1717878324039554e-05, "loss": 2.919, "step": 55751 }, { "epoch": 2.73, "grad_norm": 0.7217176556587219, "learning_rate": 1.171361772524908e-05, "loss": 2.7037, "step": 55752 }, { "epoch": 2.73, "grad_norm": 0.7316499948501587, "learning_rate": 1.1709357885749437e-05, "loss": 2.9193, "step": 55753 }, { "epoch": 2.73, "grad_norm": 0.748083233833313, "learning_rate": 1.1705098805551916e-05, "loss": 2.8312, "step": 55754 }, { "epoch": 2.73, "grad_norm": 0.7519057393074036, "learning_rate": 1.1700840484667639e-05, "loss": 2.8975, "step": 55755 }, { "epoch": 2.73, "grad_norm": 0.7586154937744141, "learning_rate": 1.1696582923107866e-05, "loss": 2.8254, "step": 55756 }, { "epoch": 2.73, "grad_norm": 0.7953873872756958, "learning_rate": 1.1692326120883822e-05, "loss": 2.7867, "step": 55757 }, { "epoch": 2.73, "grad_norm": 0.733325183391571, "learning_rate": 1.1688070078006695e-05, "loss": 2.8756, "step": 55758 }, { "epoch": 2.73, "grad_norm": 0.7922911047935486, "learning_rate": 1.1683814794487744e-05, "loss": 3.1538, "step": 55759 }, { "epoch": 2.73, "grad_norm": 0.7304345369338989, "learning_rate": 1.1679560270338095e-05, "loss": 3.0398, "step": 55760 }, { "epoch": 2.73, "grad_norm": 0.8140565752983093, "learning_rate": 1.1675306505568972e-05, "loss": 2.7469, "step": 55761 }, { "epoch": 2.73, "grad_norm": 0.7204803824424744, "learning_rate": 1.1671053500191663e-05, "loss": 2.8597, "step": 55762 }, { "epoch": 2.73, "grad_norm": 0.7560591101646423, "learning_rate": 1.1666801254217262e-05, "loss": 2.7704, "step": 55763 }, { "epoch": 2.73, "grad_norm": 0.7345255017280579, "learning_rate": 1.1662549767657059e-05, "loss": 2.8837, "step": 55764 }, { "epoch": 2.73, "grad_norm": 0.7856339812278748, "learning_rate": 1.1658299040522145e-05, "loss": 2.748, "step": 55765 }, { "epoch": 2.73, "grad_norm": 0.7450258135795593, "learning_rate": 1.1654049072823845e-05, "loss": 2.8668, "step": 55766 }, { "epoch": 2.73, "grad_norm": 0.751708984375, "learning_rate": 1.1649799864573284e-05, "loss": 2.9728, "step": 55767 }, { "epoch": 2.73, "grad_norm": 0.7992194890975952, "learning_rate": 1.1645551415781618e-05, "loss": 2.9086, "step": 55768 }, { "epoch": 2.73, "grad_norm": 0.7067645192146301, "learning_rate": 1.1641303726460106e-05, "loss": 2.8046, "step": 55769 }, { "epoch": 2.73, "grad_norm": 0.7081477046012878, "learning_rate": 1.1637056796619871e-05, "loss": 3.1195, "step": 55770 }, { "epoch": 2.73, "grad_norm": 0.733273983001709, "learning_rate": 1.1632810626272104e-05, "loss": 2.7449, "step": 55771 }, { "epoch": 2.73, "grad_norm": 0.7776265740394592, "learning_rate": 1.1628565215428099e-05, "loss": 2.8875, "step": 55772 }, { "epoch": 2.73, "grad_norm": 0.7776868939399719, "learning_rate": 1.1624320564098911e-05, "loss": 2.5992, "step": 55773 }, { "epoch": 2.73, "grad_norm": 0.783747136592865, "learning_rate": 1.1620076672295797e-05, "loss": 2.9476, "step": 55774 }, { "epoch": 2.73, "grad_norm": 0.7857709527015686, "learning_rate": 1.161583354002985e-05, "loss": 3.107, "step": 55775 }, { "epoch": 2.73, "grad_norm": 0.729192316532135, "learning_rate": 1.1611591167312295e-05, "loss": 2.7004, "step": 55776 }, { "epoch": 2.73, "grad_norm": 0.7840185165405273, "learning_rate": 1.1607349554154355e-05, "loss": 3.0569, "step": 55777 }, { "epoch": 2.73, "grad_norm": 0.7330710887908936, "learning_rate": 1.1603108700567121e-05, "loss": 2.8325, "step": 55778 }, { "epoch": 2.73, "grad_norm": 0.7933141589164734, "learning_rate": 1.1598868606561817e-05, "loss": 2.984, "step": 55779 }, { "epoch": 2.73, "grad_norm": 0.7475676536560059, "learning_rate": 1.1594629272149568e-05, "loss": 2.7584, "step": 55780 }, { "epoch": 2.73, "grad_norm": 0.7124994397163391, "learning_rate": 1.1590390697341567e-05, "loss": 2.9443, "step": 55781 }, { "epoch": 2.73, "grad_norm": 0.7357338666915894, "learning_rate": 1.1586152882149002e-05, "loss": 3.0484, "step": 55782 }, { "epoch": 2.73, "grad_norm": 0.7515398859977722, "learning_rate": 1.1581915826582966e-05, "loss": 3.0142, "step": 55783 }, { "epoch": 2.73, "grad_norm": 0.8320146203041077, "learning_rate": 1.1577679530654681e-05, "loss": 2.9811, "step": 55784 }, { "epoch": 2.73, "grad_norm": 0.7434693574905396, "learning_rate": 1.1573443994375243e-05, "loss": 2.9766, "step": 55785 }, { "epoch": 2.73, "grad_norm": 0.7687200307846069, "learning_rate": 1.1569209217755871e-05, "loss": 2.8711, "step": 55786 }, { "epoch": 2.73, "grad_norm": 0.7312271595001221, "learning_rate": 1.1564975200807692e-05, "loss": 2.8606, "step": 55787 }, { "epoch": 2.73, "grad_norm": 0.77076256275177, "learning_rate": 1.1560741943541862e-05, "loss": 2.7525, "step": 55788 }, { "epoch": 2.73, "grad_norm": 0.7475319504737854, "learning_rate": 1.1556509445969542e-05, "loss": 2.8676, "step": 55789 }, { "epoch": 2.73, "grad_norm": 0.7481474876403809, "learning_rate": 1.155227770810182e-05, "loss": 2.707, "step": 55790 }, { "epoch": 2.73, "grad_norm": 0.7318161725997925, "learning_rate": 1.154804672994989e-05, "loss": 2.7252, "step": 55791 }, { "epoch": 2.73, "grad_norm": 0.7487803101539612, "learning_rate": 1.154381651152494e-05, "loss": 2.6743, "step": 55792 }, { "epoch": 2.73, "grad_norm": 0.7442032694816589, "learning_rate": 1.1539587052837995e-05, "loss": 2.8245, "step": 55793 }, { "epoch": 2.73, "grad_norm": 0.7299758195877075, "learning_rate": 1.1535358353900314e-05, "loss": 2.7304, "step": 55794 }, { "epoch": 2.73, "grad_norm": 0.7423626780509949, "learning_rate": 1.1531130414722988e-05, "loss": 2.8714, "step": 55795 }, { "epoch": 2.73, "grad_norm": 0.7554141879081726, "learning_rate": 1.1526903235317142e-05, "loss": 2.9592, "step": 55796 }, { "epoch": 2.73, "grad_norm": 0.7774835824966431, "learning_rate": 1.1522676815693899e-05, "loss": 2.9311, "step": 55797 }, { "epoch": 2.73, "grad_norm": 0.7268118262290955, "learning_rate": 1.1518451155864383e-05, "loss": 2.9979, "step": 55798 }, { "epoch": 2.73, "grad_norm": 0.7733696103096008, "learning_rate": 1.1514226255839786e-05, "loss": 2.7952, "step": 55799 }, { "epoch": 2.73, "grad_norm": 0.7510420680046082, "learning_rate": 1.1510002115631167e-05, "loss": 2.9407, "step": 55800 }, { "epoch": 2.73, "grad_norm": 0.7454401254653931, "learning_rate": 1.1505778735249715e-05, "loss": 2.8337, "step": 55801 }, { "epoch": 2.73, "grad_norm": 0.7559973001480103, "learning_rate": 1.1501556114706489e-05, "loss": 2.7546, "step": 55802 }, { "epoch": 2.73, "grad_norm": 0.7908011674880981, "learning_rate": 1.149733425401268e-05, "loss": 2.935, "step": 55803 }, { "epoch": 2.73, "grad_norm": 0.7167772650718689, "learning_rate": 1.1493113153179345e-05, "loss": 2.8562, "step": 55804 }, { "epoch": 2.73, "grad_norm": 0.8508129119873047, "learning_rate": 1.1488892812217609e-05, "loss": 2.9246, "step": 55805 }, { "epoch": 2.73, "grad_norm": 0.7644029855728149, "learning_rate": 1.148467323113863e-05, "loss": 2.9711, "step": 55806 }, { "epoch": 2.73, "grad_norm": 0.7415115237236023, "learning_rate": 1.1480454409953466e-05, "loss": 2.8969, "step": 55807 }, { "epoch": 2.74, "grad_norm": 0.7756478190422058, "learning_rate": 1.1476236348673273e-05, "loss": 3.0643, "step": 55808 }, { "epoch": 2.74, "grad_norm": 0.7457125782966614, "learning_rate": 1.1472019047309177e-05, "loss": 2.823, "step": 55809 }, { "epoch": 2.74, "grad_norm": 0.763870358467102, "learning_rate": 1.1467802505872236e-05, "loss": 3.125, "step": 55810 }, { "epoch": 2.74, "grad_norm": 0.8004308342933655, "learning_rate": 1.1463586724373574e-05, "loss": 2.7832, "step": 55811 }, { "epoch": 2.74, "grad_norm": 0.7365848422050476, "learning_rate": 1.1459371702824283e-05, "loss": 2.9564, "step": 55812 }, { "epoch": 2.74, "grad_norm": 0.746489942073822, "learning_rate": 1.1455157441235452e-05, "loss": 2.8939, "step": 55813 }, { "epoch": 2.74, "grad_norm": 0.7295230031013489, "learning_rate": 1.1450943939618273e-05, "loss": 2.8244, "step": 55814 }, { "epoch": 2.74, "grad_norm": 0.7783901691436768, "learning_rate": 1.1446731197983706e-05, "loss": 3.006, "step": 55815 }, { "epoch": 2.74, "grad_norm": 0.7580480575561523, "learning_rate": 1.1442519216342972e-05, "loss": 2.9875, "step": 55816 }, { "epoch": 2.74, "grad_norm": 0.7256256341934204, "learning_rate": 1.1438307994707097e-05, "loss": 3.119, "step": 55817 }, { "epoch": 2.74, "grad_norm": 0.7361882925033569, "learning_rate": 1.1434097533087139e-05, "loss": 2.7991, "step": 55818 }, { "epoch": 2.74, "grad_norm": 0.7718648910522461, "learning_rate": 1.1429887831494289e-05, "loss": 3.0094, "step": 55819 }, { "epoch": 2.74, "grad_norm": 0.7573488354682922, "learning_rate": 1.1425678889939538e-05, "loss": 2.8855, "step": 55820 }, { "epoch": 2.74, "grad_norm": 0.7509820461273193, "learning_rate": 1.1421470708434044e-05, "loss": 3.0757, "step": 55821 }, { "epoch": 2.74, "grad_norm": 0.7302854657173157, "learning_rate": 1.1417263286988798e-05, "loss": 3.0187, "step": 55822 }, { "epoch": 2.74, "grad_norm": 0.7011269927024841, "learning_rate": 1.1413056625614991e-05, "loss": 2.9904, "step": 55823 }, { "epoch": 2.74, "grad_norm": 0.8346005082130432, "learning_rate": 1.1408850724323648e-05, "loss": 3.1022, "step": 55824 }, { "epoch": 2.74, "grad_norm": 0.7266502380371094, "learning_rate": 1.140464558312586e-05, "loss": 2.9047, "step": 55825 }, { "epoch": 2.74, "grad_norm": 0.7775142788887024, "learning_rate": 1.1400441202032717e-05, "loss": 2.9262, "step": 55826 }, { "epoch": 2.74, "grad_norm": 0.7800171971321106, "learning_rate": 1.1396237581055212e-05, "loss": 2.7697, "step": 55827 }, { "epoch": 2.74, "grad_norm": 0.7560826539993286, "learning_rate": 1.1392034720204468e-05, "loss": 2.7127, "step": 55828 }, { "epoch": 2.74, "grad_norm": 0.7661461234092712, "learning_rate": 1.138783261949161e-05, "loss": 2.9436, "step": 55829 }, { "epoch": 2.74, "grad_norm": 0.7515476942062378, "learning_rate": 1.138363127892763e-05, "loss": 2.8215, "step": 55830 }, { "epoch": 2.74, "grad_norm": 0.7410834431648254, "learning_rate": 1.137943069852365e-05, "loss": 2.9039, "step": 55831 }, { "epoch": 2.74, "grad_norm": 0.7249680161476135, "learning_rate": 1.1375230878290697e-05, "loss": 2.8317, "step": 55832 }, { "epoch": 2.74, "grad_norm": 0.7653201818466187, "learning_rate": 1.1371031818239795e-05, "loss": 2.7605, "step": 55833 }, { "epoch": 2.74, "grad_norm": 0.7415288090705872, "learning_rate": 1.1366833518382102e-05, "loss": 3.0566, "step": 55834 }, { "epoch": 2.74, "grad_norm": 0.7455683946609497, "learning_rate": 1.1362635978728574e-05, "loss": 2.8293, "step": 55835 }, { "epoch": 2.74, "grad_norm": 0.7558901906013489, "learning_rate": 1.1358439199290337e-05, "loss": 2.8531, "step": 55836 }, { "epoch": 2.74, "grad_norm": 0.6817432641983032, "learning_rate": 1.1354243180078415e-05, "loss": 3.1373, "step": 55837 }, { "epoch": 2.74, "grad_norm": 0.7174025774002075, "learning_rate": 1.1350047921103866e-05, "loss": 2.8915, "step": 55838 }, { "epoch": 2.74, "grad_norm": 0.759171187877655, "learning_rate": 1.1345853422377749e-05, "loss": 2.965, "step": 55839 }, { "epoch": 2.74, "grad_norm": 0.7580920457839966, "learning_rate": 1.1341659683911086e-05, "loss": 3.0421, "step": 55840 }, { "epoch": 2.74, "grad_norm": 0.7607132792472839, "learning_rate": 1.133746670571497e-05, "loss": 2.8414, "step": 55841 }, { "epoch": 2.74, "grad_norm": 0.7223249077796936, "learning_rate": 1.133327448780036e-05, "loss": 3.2551, "step": 55842 }, { "epoch": 2.74, "grad_norm": 0.7324064373970032, "learning_rate": 1.1329083030178376e-05, "loss": 2.8549, "step": 55843 }, { "epoch": 2.74, "grad_norm": 0.8532474040985107, "learning_rate": 1.1324892332860047e-05, "loss": 2.9252, "step": 55844 }, { "epoch": 2.74, "grad_norm": 0.7820866703987122, "learning_rate": 1.1320702395856362e-05, "loss": 2.8606, "step": 55845 }, { "epoch": 2.74, "grad_norm": 0.7099783420562744, "learning_rate": 1.1316513219178413e-05, "loss": 2.785, "step": 55846 }, { "epoch": 2.74, "grad_norm": 0.7845707535743713, "learning_rate": 1.1312324802837225e-05, "loss": 2.8446, "step": 55847 }, { "epoch": 2.74, "grad_norm": 0.7959213852882385, "learning_rate": 1.1308137146843787e-05, "loss": 2.9116, "step": 55848 }, { "epoch": 2.74, "grad_norm": 0.780271589756012, "learning_rate": 1.130395025120916e-05, "loss": 2.7504, "step": 55849 }, { "epoch": 2.74, "grad_norm": 0.7800261378288269, "learning_rate": 1.1299764115944365e-05, "loss": 3.0029, "step": 55850 }, { "epoch": 2.74, "grad_norm": 0.7617143392562866, "learning_rate": 1.1295578741060463e-05, "loss": 2.8185, "step": 55851 }, { "epoch": 2.74, "grad_norm": 0.7690078616142273, "learning_rate": 1.1291394126568376e-05, "loss": 2.8685, "step": 55852 }, { "epoch": 2.74, "grad_norm": 0.7068555355072021, "learning_rate": 1.1287210272479264e-05, "loss": 2.9051, "step": 55853 }, { "epoch": 2.74, "grad_norm": 0.7279587388038635, "learning_rate": 1.1283027178804082e-05, "loss": 2.832, "step": 55854 }, { "epoch": 2.74, "grad_norm": 0.7110167145729065, "learning_rate": 1.1278844845553792e-05, "loss": 2.6555, "step": 55855 }, { "epoch": 2.74, "grad_norm": 0.7727231383323669, "learning_rate": 1.1274663272739482e-05, "loss": 2.8709, "step": 55856 }, { "epoch": 2.74, "grad_norm": 0.7116361856460571, "learning_rate": 1.127048246037211e-05, "loss": 3.0165, "step": 55857 }, { "epoch": 2.74, "grad_norm": 0.7671318054199219, "learning_rate": 1.1266302408462734e-05, "loss": 2.8943, "step": 55858 }, { "epoch": 2.74, "grad_norm": 0.7194930911064148, "learning_rate": 1.1262123117022382e-05, "loss": 2.8822, "step": 55859 }, { "epoch": 2.74, "grad_norm": 0.7566820979118347, "learning_rate": 1.1257944586061974e-05, "loss": 2.7942, "step": 55860 }, { "epoch": 2.74, "grad_norm": 0.717607855796814, "learning_rate": 1.1253766815592669e-05, "loss": 2.8937, "step": 55861 }, { "epoch": 2.74, "grad_norm": 0.7694738507270813, "learning_rate": 1.1249589805625292e-05, "loss": 2.8219, "step": 55862 }, { "epoch": 2.74, "grad_norm": 0.729606032371521, "learning_rate": 1.1245413556170901e-05, "loss": 2.5149, "step": 55863 }, { "epoch": 2.74, "grad_norm": 0.7487224340438843, "learning_rate": 1.1241238067240588e-05, "loss": 2.6698, "step": 55864 }, { "epoch": 2.74, "grad_norm": 0.7842611074447632, "learning_rate": 1.1237063338845209e-05, "loss": 2.8821, "step": 55865 }, { "epoch": 2.74, "grad_norm": 0.7518981695175171, "learning_rate": 1.123288937099589e-05, "loss": 2.8636, "step": 55866 }, { "epoch": 2.74, "grad_norm": 0.7385653257369995, "learning_rate": 1.1228716163703556e-05, "loss": 2.8735, "step": 55867 }, { "epoch": 2.74, "grad_norm": 0.7072750329971313, "learning_rate": 1.1224543716979195e-05, "loss": 2.8221, "step": 55868 }, { "epoch": 2.74, "grad_norm": 0.7266203165054321, "learning_rate": 1.1220372030833836e-05, "loss": 3.007, "step": 55869 }, { "epoch": 2.74, "grad_norm": 0.7844318151473999, "learning_rate": 1.12162011052784e-05, "loss": 2.8807, "step": 55870 }, { "epoch": 2.74, "grad_norm": 0.866727352142334, "learning_rate": 1.121203094032398e-05, "loss": 2.8678, "step": 55871 }, { "epoch": 2.74, "grad_norm": 0.7634037137031555, "learning_rate": 1.12078615359814e-05, "loss": 2.876, "step": 55872 }, { "epoch": 2.74, "grad_norm": 0.72465580701828, "learning_rate": 1.1203692892261785e-05, "loss": 2.9704, "step": 55873 }, { "epoch": 2.74, "grad_norm": 0.7288641929626465, "learning_rate": 1.1199525009176059e-05, "loss": 2.9168, "step": 55874 }, { "epoch": 2.74, "grad_norm": 0.7715079188346863, "learning_rate": 1.1195357886735213e-05, "loss": 2.7081, "step": 55875 }, { "epoch": 2.74, "grad_norm": 0.7418809533119202, "learning_rate": 1.1191191524950239e-05, "loss": 2.8808, "step": 55876 }, { "epoch": 2.74, "grad_norm": 0.7965562343597412, "learning_rate": 1.1187025923832027e-05, "loss": 3.0647, "step": 55877 }, { "epoch": 2.74, "grad_norm": 0.6859378218650818, "learning_rate": 1.1182861083391636e-05, "loss": 2.8672, "step": 55878 }, { "epoch": 2.74, "grad_norm": 0.7590106129646301, "learning_rate": 1.1178697003639958e-05, "loss": 2.8654, "step": 55879 }, { "epoch": 2.74, "grad_norm": 0.7052371501922607, "learning_rate": 1.117453368458805e-05, "loss": 2.7231, "step": 55880 }, { "epoch": 2.74, "grad_norm": 0.7554574608802795, "learning_rate": 1.1170371126246835e-05, "loss": 2.8292, "step": 55881 }, { "epoch": 2.74, "grad_norm": 0.7286297678947449, "learning_rate": 1.116620932862724e-05, "loss": 3.073, "step": 55882 }, { "epoch": 2.74, "grad_norm": 0.7669945359230042, "learning_rate": 1.1162048291740321e-05, "loss": 3.0026, "step": 55883 }, { "epoch": 2.74, "grad_norm": 0.7861215472221375, "learning_rate": 1.1157888015596972e-05, "loss": 2.9076, "step": 55884 }, { "epoch": 2.74, "grad_norm": 0.7036914825439453, "learning_rate": 1.1153728500208115e-05, "loss": 2.9243, "step": 55885 }, { "epoch": 2.74, "grad_norm": 0.7453054785728455, "learning_rate": 1.1149569745584774e-05, "loss": 2.9301, "step": 55886 }, { "epoch": 2.74, "grad_norm": 0.7406743764877319, "learning_rate": 1.1145411751737843e-05, "loss": 3.0202, "step": 55887 }, { "epoch": 2.74, "grad_norm": 0.7263259291648865, "learning_rate": 1.1141254518678344e-05, "loss": 3.0923, "step": 55888 }, { "epoch": 2.74, "grad_norm": 0.7786001563072205, "learning_rate": 1.113709804641717e-05, "loss": 3.0409, "step": 55889 }, { "epoch": 2.74, "grad_norm": 0.757474958896637, "learning_rate": 1.1132942334965311e-05, "loss": 2.7983, "step": 55890 }, { "epoch": 2.74, "grad_norm": 0.7858365774154663, "learning_rate": 1.1128787384333692e-05, "loss": 3.0745, "step": 55891 }, { "epoch": 2.74, "grad_norm": 0.7730826735496521, "learning_rate": 1.1124633194533206e-05, "loss": 2.7281, "step": 55892 }, { "epoch": 2.74, "grad_norm": 0.7334781885147095, "learning_rate": 1.1120479765574875e-05, "loss": 2.8114, "step": 55893 }, { "epoch": 2.74, "grad_norm": 0.7341852784156799, "learning_rate": 1.111632709746959e-05, "loss": 2.957, "step": 55894 }, { "epoch": 2.74, "grad_norm": 0.7085254788398743, "learning_rate": 1.1112175190228279e-05, "loss": 2.9096, "step": 55895 }, { "epoch": 2.74, "grad_norm": 0.7142402529716492, "learning_rate": 1.1108024043861963e-05, "loss": 3.1765, "step": 55896 }, { "epoch": 2.74, "grad_norm": 0.7631176710128784, "learning_rate": 1.1103873658381502e-05, "loss": 2.923, "step": 55897 }, { "epoch": 2.74, "grad_norm": 0.8920885920524597, "learning_rate": 1.109972403379782e-05, "loss": 3.0669, "step": 55898 }, { "epoch": 2.74, "grad_norm": 0.7416764497756958, "learning_rate": 1.109557517012184e-05, "loss": 2.7871, "step": 55899 }, { "epoch": 2.74, "grad_norm": 0.8150529265403748, "learning_rate": 1.1091427067364523e-05, "loss": 2.9976, "step": 55900 }, { "epoch": 2.74, "grad_norm": 0.7230525612831116, "learning_rate": 1.1087279725536824e-05, "loss": 2.9518, "step": 55901 }, { "epoch": 2.74, "grad_norm": 0.762174665927887, "learning_rate": 1.1083133144649569e-05, "loss": 3.2105, "step": 55902 }, { "epoch": 2.74, "grad_norm": 0.7683262228965759, "learning_rate": 1.1078987324713817e-05, "loss": 2.7405, "step": 55903 }, { "epoch": 2.74, "grad_norm": 0.7210031151771545, "learning_rate": 1.1074842265740325e-05, "loss": 3.0056, "step": 55904 }, { "epoch": 2.74, "grad_norm": 0.7431241273880005, "learning_rate": 1.1070697967740149e-05, "loss": 2.7492, "step": 55905 }, { "epoch": 2.74, "grad_norm": 0.7543492317199707, "learning_rate": 1.106655443072415e-05, "loss": 2.8373, "step": 55906 }, { "epoch": 2.74, "grad_norm": 0.7546671628952026, "learning_rate": 1.1062411654703218e-05, "loss": 2.8681, "step": 55907 }, { "epoch": 2.74, "grad_norm": 0.7264155745506287, "learning_rate": 1.105826963968831e-05, "loss": 2.7632, "step": 55908 }, { "epoch": 2.74, "grad_norm": 0.7544650435447693, "learning_rate": 1.1054128385690254e-05, "loss": 2.8906, "step": 55909 }, { "epoch": 2.74, "grad_norm": 0.7051803469657898, "learning_rate": 1.1049987892720036e-05, "loss": 2.8865, "step": 55910 }, { "epoch": 2.74, "grad_norm": 0.7877517938613892, "learning_rate": 1.1045848160788585e-05, "loss": 2.9269, "step": 55911 }, { "epoch": 2.74, "grad_norm": 0.7090002298355103, "learning_rate": 1.1041709189906756e-05, "loss": 3.0367, "step": 55912 }, { "epoch": 2.74, "grad_norm": 0.7225410342216492, "learning_rate": 1.1037570980085442e-05, "loss": 2.7446, "step": 55913 }, { "epoch": 2.74, "grad_norm": 0.7321907877922058, "learning_rate": 1.1033433531335533e-05, "loss": 2.8573, "step": 55914 }, { "epoch": 2.74, "grad_norm": 0.7515150904655457, "learning_rate": 1.1029296843667923e-05, "loss": 2.9509, "step": 55915 }, { "epoch": 2.74, "grad_norm": 0.7628140449523926, "learning_rate": 1.10251609170936e-05, "loss": 3.1318, "step": 55916 }, { "epoch": 2.74, "grad_norm": 0.7710598111152649, "learning_rate": 1.1021025751623324e-05, "loss": 2.8181, "step": 55917 }, { "epoch": 2.74, "grad_norm": 0.7168538570404053, "learning_rate": 1.1016891347268087e-05, "loss": 2.9143, "step": 55918 }, { "epoch": 2.74, "grad_norm": 0.7750139236450195, "learning_rate": 1.1012757704038778e-05, "loss": 2.9098, "step": 55919 }, { "epoch": 2.74, "grad_norm": 0.8607103824615479, "learning_rate": 1.1008624821946188e-05, "loss": 2.8608, "step": 55920 }, { "epoch": 2.74, "grad_norm": 0.7440026998519897, "learning_rate": 1.1004492701001278e-05, "loss": 2.8417, "step": 55921 }, { "epoch": 2.74, "grad_norm": 0.7108123898506165, "learning_rate": 1.1000361341214902e-05, "loss": 2.9667, "step": 55922 }, { "epoch": 2.74, "grad_norm": 0.7313125729560852, "learning_rate": 1.099623074259799e-05, "loss": 2.6806, "step": 55923 }, { "epoch": 2.74, "grad_norm": 0.8081473708152771, "learning_rate": 1.099210090516136e-05, "loss": 2.704, "step": 55924 }, { "epoch": 2.74, "grad_norm": 0.7507513761520386, "learning_rate": 1.098797182891591e-05, "loss": 2.9803, "step": 55925 }, { "epoch": 2.74, "grad_norm": 0.770849347114563, "learning_rate": 1.0983843513872559e-05, "loss": 3.0487, "step": 55926 }, { "epoch": 2.74, "grad_norm": 0.7244208455085754, "learning_rate": 1.0979715960042135e-05, "loss": 2.879, "step": 55927 }, { "epoch": 2.74, "grad_norm": 0.7546032667160034, "learning_rate": 1.0975589167435527e-05, "loss": 3.0663, "step": 55928 }, { "epoch": 2.74, "grad_norm": 0.8063235878944397, "learning_rate": 1.097146313606353e-05, "loss": 2.9658, "step": 55929 }, { "epoch": 2.74, "grad_norm": 0.714979887008667, "learning_rate": 1.0967337865937098e-05, "loss": 2.9623, "step": 55930 }, { "epoch": 2.74, "grad_norm": 0.7428481578826904, "learning_rate": 1.096321335706709e-05, "loss": 2.9092, "step": 55931 }, { "epoch": 2.74, "grad_norm": 0.7758767008781433, "learning_rate": 1.0959089609464334e-05, "loss": 2.7544, "step": 55932 }, { "epoch": 2.74, "grad_norm": 0.7635376453399658, "learning_rate": 1.0954966623139748e-05, "loss": 2.9114, "step": 55933 }, { "epoch": 2.74, "grad_norm": 0.7332720160484314, "learning_rate": 1.095084439810413e-05, "loss": 2.836, "step": 55934 }, { "epoch": 2.74, "grad_norm": 0.7916608452796936, "learning_rate": 1.0946722934368334e-05, "loss": 2.814, "step": 55935 }, { "epoch": 2.74, "grad_norm": 0.7521407008171082, "learning_rate": 1.0942602231943287e-05, "loss": 2.8053, "step": 55936 }, { "epoch": 2.74, "grad_norm": 0.8015977144241333, "learning_rate": 1.0938482290839745e-05, "loss": 2.9304, "step": 55937 }, { "epoch": 2.74, "grad_norm": 0.7351771593093872, "learning_rate": 1.0934363111068668e-05, "loss": 2.9438, "step": 55938 }, { "epoch": 2.74, "grad_norm": 0.7315554022789001, "learning_rate": 1.0930244692640777e-05, "loss": 2.8405, "step": 55939 }, { "epoch": 2.74, "grad_norm": 0.7486281394958496, "learning_rate": 1.0926127035567067e-05, "loss": 2.9316, "step": 55940 }, { "epoch": 2.74, "grad_norm": 0.721809446811676, "learning_rate": 1.0922010139858262e-05, "loss": 2.9926, "step": 55941 }, { "epoch": 2.74, "grad_norm": 0.7924941778182983, "learning_rate": 1.0917894005525251e-05, "loss": 2.8259, "step": 55942 }, { "epoch": 2.74, "grad_norm": 0.7480098009109497, "learning_rate": 1.0913778632578862e-05, "loss": 2.9488, "step": 55943 }, { "epoch": 2.74, "grad_norm": 0.7476303577423096, "learning_rate": 1.090966402102995e-05, "loss": 2.8346, "step": 55944 }, { "epoch": 2.74, "grad_norm": 0.7705007195472717, "learning_rate": 1.0905550170889344e-05, "loss": 2.9332, "step": 55945 }, { "epoch": 2.74, "grad_norm": 0.745185375213623, "learning_rate": 1.0901437082167896e-05, "loss": 2.8909, "step": 55946 }, { "epoch": 2.74, "grad_norm": 0.7382268905639648, "learning_rate": 1.0897324754876402e-05, "loss": 2.8413, "step": 55947 }, { "epoch": 2.74, "grad_norm": 0.8054601550102234, "learning_rate": 1.0893213189025751e-05, "loss": 2.7561, "step": 55948 }, { "epoch": 2.74, "grad_norm": 0.7425912022590637, "learning_rate": 1.0889102384626702e-05, "loss": 2.9528, "step": 55949 }, { "epoch": 2.74, "grad_norm": 0.7311367988586426, "learning_rate": 1.0884992341690113e-05, "loss": 2.9087, "step": 55950 }, { "epoch": 2.74, "grad_norm": 0.752752959728241, "learning_rate": 1.0880883060226875e-05, "loss": 3.0692, "step": 55951 }, { "epoch": 2.74, "grad_norm": 0.74880051612854, "learning_rate": 1.087677454024768e-05, "loss": 2.8871, "step": 55952 }, { "epoch": 2.74, "grad_norm": 0.776472806930542, "learning_rate": 1.087266678176345e-05, "loss": 2.823, "step": 55953 }, { "epoch": 2.74, "grad_norm": 0.7268320322036743, "learning_rate": 1.0868559784784947e-05, "loss": 2.6937, "step": 55954 }, { "epoch": 2.74, "grad_norm": 0.7472983598709106, "learning_rate": 1.086445354932306e-05, "loss": 3.2036, "step": 55955 }, { "epoch": 2.74, "grad_norm": 0.7563039064407349, "learning_rate": 1.0860348075388514e-05, "loss": 2.9643, "step": 55956 }, { "epoch": 2.74, "grad_norm": 0.72945237159729, "learning_rate": 1.0856243362992167e-05, "loss": 2.9227, "step": 55957 }, { "epoch": 2.74, "grad_norm": 0.7762035727500916, "learning_rate": 1.0852139412144877e-05, "loss": 2.5892, "step": 55958 }, { "epoch": 2.74, "grad_norm": 0.7469337582588196, "learning_rate": 1.0848036222857337e-05, "loss": 3.0108, "step": 55959 }, { "epoch": 2.74, "grad_norm": 0.6988431215286255, "learning_rate": 1.084393379514047e-05, "loss": 3.0516, "step": 55960 }, { "epoch": 2.74, "grad_norm": 0.7568329572677612, "learning_rate": 1.0839832129005e-05, "loss": 2.9372, "step": 55961 }, { "epoch": 2.74, "grad_norm": 0.7815048098564148, "learning_rate": 1.0835731224461786e-05, "loss": 2.7753, "step": 55962 }, { "epoch": 2.74, "grad_norm": 0.7493932843208313, "learning_rate": 1.0831631081521618e-05, "loss": 2.9575, "step": 55963 }, { "epoch": 2.74, "grad_norm": 0.7615856528282166, "learning_rate": 1.0827531700195224e-05, "loss": 2.6857, "step": 55964 }, { "epoch": 2.74, "grad_norm": 0.889343798160553, "learning_rate": 1.0823433080493526e-05, "loss": 2.6804, "step": 55965 }, { "epoch": 2.74, "grad_norm": 0.7530947923660278, "learning_rate": 1.0819335222427183e-05, "loss": 2.8449, "step": 55966 }, { "epoch": 2.74, "grad_norm": 0.7794963121414185, "learning_rate": 1.0815238126007086e-05, "loss": 2.8913, "step": 55967 }, { "epoch": 2.74, "grad_norm": 0.7236289381980896, "learning_rate": 1.0811141791244026e-05, "loss": 2.8246, "step": 55968 }, { "epoch": 2.74, "grad_norm": 0.7335292100906372, "learning_rate": 1.0807046218148729e-05, "loss": 2.9881, "step": 55969 }, { "epoch": 2.74, "grad_norm": 0.7387674450874329, "learning_rate": 1.0802951406732018e-05, "loss": 2.9562, "step": 55970 }, { "epoch": 2.74, "grad_norm": 0.754050076007843, "learning_rate": 1.0798857357004719e-05, "loss": 2.7057, "step": 55971 }, { "epoch": 2.74, "grad_norm": 0.7682558298110962, "learning_rate": 1.0794764068977524e-05, "loss": 2.7377, "step": 55972 }, { "epoch": 2.74, "grad_norm": 0.7760759592056274, "learning_rate": 1.0790671542661289e-05, "loss": 2.8588, "step": 55973 }, { "epoch": 2.74, "grad_norm": 0.74440997838974, "learning_rate": 1.0786579778066739e-05, "loss": 2.9655, "step": 55974 }, { "epoch": 2.74, "grad_norm": 0.7802349925041199, "learning_rate": 1.07824887752047e-05, "loss": 2.9066, "step": 55975 }, { "epoch": 2.74, "grad_norm": 0.7338876724243164, "learning_rate": 1.0778398534085898e-05, "loss": 3.0789, "step": 55976 }, { "epoch": 2.74, "grad_norm": 0.7808542251586914, "learning_rate": 1.0774309054721187e-05, "loss": 2.7526, "step": 55977 }, { "epoch": 2.74, "grad_norm": 0.7372686862945557, "learning_rate": 1.077022033712126e-05, "loss": 2.9052, "step": 55978 }, { "epoch": 2.74, "grad_norm": 0.8141536116600037, "learning_rate": 1.0766132381296876e-05, "loss": 2.8219, "step": 55979 }, { "epoch": 2.74, "grad_norm": 0.7406730651855469, "learning_rate": 1.076204518725886e-05, "loss": 2.9798, "step": 55980 }, { "epoch": 2.74, "grad_norm": 0.7653099298477173, "learning_rate": 1.0757958755017937e-05, "loss": 2.756, "step": 55981 }, { "epoch": 2.74, "grad_norm": 0.7098361849784851, "learning_rate": 1.0753873084584896e-05, "loss": 2.767, "step": 55982 }, { "epoch": 2.74, "grad_norm": 0.7831395268440247, "learning_rate": 1.0749788175970497e-05, "loss": 2.6796, "step": 55983 }, { "epoch": 2.74, "grad_norm": 0.79066401720047, "learning_rate": 1.0745704029185464e-05, "loss": 2.7994, "step": 55984 }, { "epoch": 2.74, "grad_norm": 0.7196535468101501, "learning_rate": 1.0741620644240622e-05, "loss": 2.7152, "step": 55985 }, { "epoch": 2.74, "grad_norm": 0.8042973875999451, "learning_rate": 1.0737538021146663e-05, "loss": 2.9082, "step": 55986 }, { "epoch": 2.74, "grad_norm": 0.8539407849311829, "learning_rate": 1.073345615991431e-05, "loss": 3.0031, "step": 55987 }, { "epoch": 2.74, "grad_norm": 0.7349897027015686, "learning_rate": 1.0729375060554424e-05, "loss": 2.8356, "step": 55988 }, { "epoch": 2.74, "grad_norm": 0.7442134022712708, "learning_rate": 1.072529472307766e-05, "loss": 2.9911, "step": 55989 }, { "epoch": 2.74, "grad_norm": 0.742074191570282, "learning_rate": 1.072121514749481e-05, "loss": 2.9064, "step": 55990 }, { "epoch": 2.74, "grad_norm": 0.7211217880249023, "learning_rate": 1.07171363338166e-05, "loss": 2.8363, "step": 55991 }, { "epoch": 2.74, "grad_norm": 0.7274338006973267, "learning_rate": 1.0713058282053788e-05, "loss": 2.9167, "step": 55992 }, { "epoch": 2.74, "grad_norm": 0.7637966871261597, "learning_rate": 1.0708980992217131e-05, "loss": 3.002, "step": 55993 }, { "epoch": 2.74, "grad_norm": 0.7385900616645813, "learning_rate": 1.0704904464317287e-05, "loss": 2.7543, "step": 55994 }, { "epoch": 2.74, "grad_norm": 0.7418367862701416, "learning_rate": 1.0700828698365117e-05, "loss": 2.9536, "step": 55995 }, { "epoch": 2.74, "grad_norm": 0.7280617356300354, "learning_rate": 1.0696753694371207e-05, "loss": 2.6508, "step": 55996 }, { "epoch": 2.74, "grad_norm": 0.7581601738929749, "learning_rate": 1.069267945234642e-05, "loss": 3.0828, "step": 55997 }, { "epoch": 2.74, "grad_norm": 0.7517957091331482, "learning_rate": 1.0688605972301444e-05, "loss": 2.8601, "step": 55998 }, { "epoch": 2.74, "grad_norm": 0.7825131416320801, "learning_rate": 1.0684533254246974e-05, "loss": 2.9613, "step": 55999 }, { "epoch": 2.74, "grad_norm": 0.7765049934387207, "learning_rate": 1.0680461298193799e-05, "loss": 2.908, "step": 56000 }, { "epoch": 2.74, "grad_norm": 0.7475263476371765, "learning_rate": 1.0676390104152577e-05, "loss": 2.9421, "step": 56001 }, { "epoch": 2.74, "grad_norm": 0.7581568360328674, "learning_rate": 1.0672319672134033e-05, "loss": 2.7795, "step": 56002 }, { "epoch": 2.74, "grad_norm": 0.7449358105659485, "learning_rate": 1.0668250002148993e-05, "loss": 2.8471, "step": 56003 }, { "epoch": 2.74, "grad_norm": 0.7291288375854492, "learning_rate": 1.0664181094208012e-05, "loss": 3.0978, "step": 56004 }, { "epoch": 2.74, "grad_norm": 0.7702457308769226, "learning_rate": 1.0660112948321953e-05, "loss": 2.9775, "step": 56005 }, { "epoch": 2.74, "grad_norm": 0.738068699836731, "learning_rate": 1.065604556450147e-05, "loss": 2.9591, "step": 56006 }, { "epoch": 2.74, "grad_norm": 0.7106137871742249, "learning_rate": 1.0651978942757256e-05, "loss": 2.7704, "step": 56007 }, { "epoch": 2.74, "grad_norm": 0.7228193879127502, "learning_rate": 1.0647913083100101e-05, "loss": 2.8967, "step": 56008 }, { "epoch": 2.74, "grad_norm": 0.728057324886322, "learning_rate": 1.0643847985540566e-05, "loss": 2.8884, "step": 56009 }, { "epoch": 2.74, "grad_norm": 0.8260751366615295, "learning_rate": 1.0639783650089506e-05, "loss": 3.0548, "step": 56010 }, { "epoch": 2.74, "grad_norm": 0.7846874594688416, "learning_rate": 1.0635720076757548e-05, "loss": 2.8825, "step": 56011 }, { "epoch": 2.75, "grad_norm": 0.7633529901504517, "learning_rate": 1.0631657265555382e-05, "loss": 2.8856, "step": 56012 }, { "epoch": 2.75, "grad_norm": 0.7845501899719238, "learning_rate": 1.0627595216493801e-05, "loss": 2.9424, "step": 56013 }, { "epoch": 2.75, "grad_norm": 0.7749620676040649, "learning_rate": 1.0623533929583461e-05, "loss": 2.994, "step": 56014 }, { "epoch": 2.75, "grad_norm": 0.7692545056343079, "learning_rate": 1.0619473404834988e-05, "loss": 3.0687, "step": 56015 }, { "epoch": 2.75, "grad_norm": 0.7629958391189575, "learning_rate": 1.0615413642259141e-05, "loss": 3.0601, "step": 56016 }, { "epoch": 2.75, "grad_norm": 0.7727113962173462, "learning_rate": 1.0611354641866576e-05, "loss": 2.9892, "step": 56017 }, { "epoch": 2.75, "grad_norm": 0.7810448408126831, "learning_rate": 1.0607296403668052e-05, "loss": 3.0302, "step": 56018 }, { "epoch": 2.75, "grad_norm": 0.7546478509902954, "learning_rate": 1.0603238927674197e-05, "loss": 3.0782, "step": 56019 }, { "epoch": 2.75, "grad_norm": 0.7549473643302917, "learning_rate": 1.0599182213895729e-05, "loss": 2.8536, "step": 56020 }, { "epoch": 2.75, "grad_norm": 0.712246298789978, "learning_rate": 1.0595126262343313e-05, "loss": 2.8927, "step": 56021 }, { "epoch": 2.75, "grad_norm": 0.7353459000587463, "learning_rate": 1.0591071073027601e-05, "loss": 2.7289, "step": 56022 }, { "epoch": 2.75, "grad_norm": 0.7388055920600891, "learning_rate": 1.0587016645959356e-05, "loss": 2.7817, "step": 56023 }, { "epoch": 2.75, "grad_norm": 0.7375566959381104, "learning_rate": 1.0582962981149167e-05, "loss": 2.7332, "step": 56024 }, { "epoch": 2.75, "grad_norm": 0.7719786167144775, "learning_rate": 1.0578910078607794e-05, "loss": 2.8114, "step": 56025 }, { "epoch": 2.75, "grad_norm": 0.7156137824058533, "learning_rate": 1.057485793834586e-05, "loss": 2.8983, "step": 56026 }, { "epoch": 2.75, "grad_norm": 0.7468981742858887, "learning_rate": 1.0570806560374023e-05, "loss": 2.8096, "step": 56027 }, { "epoch": 2.75, "grad_norm": 0.7423037886619568, "learning_rate": 1.0566755944703009e-05, "loss": 2.9614, "step": 56028 }, { "epoch": 2.75, "grad_norm": 0.7885465621948242, "learning_rate": 1.0562706091343475e-05, "loss": 2.8188, "step": 56029 }, { "epoch": 2.75, "grad_norm": 0.7959953546524048, "learning_rate": 1.055865700030608e-05, "loss": 2.7377, "step": 56030 }, { "epoch": 2.75, "grad_norm": 0.7130370736122131, "learning_rate": 1.0554608671601417e-05, "loss": 2.8816, "step": 56031 }, { "epoch": 2.75, "grad_norm": 0.724517822265625, "learning_rate": 1.055056110524024e-05, "loss": 2.898, "step": 56032 }, { "epoch": 2.75, "grad_norm": 0.7943875789642334, "learning_rate": 1.0546514301233178e-05, "loss": 2.956, "step": 56033 }, { "epoch": 2.75, "grad_norm": 0.7979520559310913, "learning_rate": 1.0542468259590853e-05, "loss": 2.7335, "step": 56034 }, { "epoch": 2.75, "grad_norm": 0.7505634427070618, "learning_rate": 1.0538422980324025e-05, "loss": 2.8819, "step": 56035 }, { "epoch": 2.75, "grad_norm": 0.761986255645752, "learning_rate": 1.0534378463443283e-05, "loss": 2.7005, "step": 56036 }, { "epoch": 2.75, "grad_norm": 0.7043732404708862, "learning_rate": 1.0530334708959255e-05, "loss": 2.8908, "step": 56037 }, { "epoch": 2.75, "grad_norm": 0.7133161425590515, "learning_rate": 1.0526291716882596e-05, "loss": 2.9596, "step": 56038 }, { "epoch": 2.75, "grad_norm": 0.7343462109565735, "learning_rate": 1.0522249487223966e-05, "loss": 2.8832, "step": 56039 }, { "epoch": 2.75, "grad_norm": 0.7378050684928894, "learning_rate": 1.0518208019994056e-05, "loss": 2.9357, "step": 56040 }, { "epoch": 2.75, "grad_norm": 0.7832443714141846, "learning_rate": 1.0514167315203458e-05, "loss": 3.0828, "step": 56041 }, { "epoch": 2.75, "grad_norm": 0.715339183807373, "learning_rate": 1.0510127372862831e-05, "loss": 2.8073, "step": 56042 }, { "epoch": 2.75, "grad_norm": 0.7633505463600159, "learning_rate": 1.0506088192982831e-05, "loss": 2.9054, "step": 56043 }, { "epoch": 2.75, "grad_norm": 0.7171920537948608, "learning_rate": 1.050204977557405e-05, "loss": 2.8434, "step": 56044 }, { "epoch": 2.75, "grad_norm": 0.7434439063072205, "learning_rate": 1.0498012120647215e-05, "loss": 2.8091, "step": 56045 }, { "epoch": 2.75, "grad_norm": 0.7520685791969299, "learning_rate": 1.0493975228212814e-05, "loss": 2.7975, "step": 56046 }, { "epoch": 2.75, "grad_norm": 0.7988944053649902, "learning_rate": 1.048993909828164e-05, "loss": 2.6339, "step": 56047 }, { "epoch": 2.75, "grad_norm": 0.7427051663398743, "learning_rate": 1.048590373086422e-05, "loss": 2.7419, "step": 56048 }, { "epoch": 2.75, "grad_norm": 0.7187119126319885, "learning_rate": 1.0481869125971176e-05, "loss": 2.964, "step": 56049 }, { "epoch": 2.75, "grad_norm": 0.7609080672264099, "learning_rate": 1.0477835283613233e-05, "loss": 2.6344, "step": 56050 }, { "epoch": 2.75, "grad_norm": 0.7719628810882568, "learning_rate": 1.0473802203800918e-05, "loss": 2.8794, "step": 56051 }, { "epoch": 2.75, "grad_norm": 0.807428240776062, "learning_rate": 1.0469769886544921e-05, "loss": 2.9451, "step": 56052 }, { "epoch": 2.75, "grad_norm": 0.7161225080490112, "learning_rate": 1.0465738331855767e-05, "loss": 2.7363, "step": 56053 }, { "epoch": 2.75, "grad_norm": 0.7533316612243652, "learning_rate": 1.0461707539744179e-05, "loss": 2.9044, "step": 56054 }, { "epoch": 2.75, "grad_norm": 0.7733919620513916, "learning_rate": 1.045767751022072e-05, "loss": 3.0109, "step": 56055 }, { "epoch": 2.75, "grad_norm": 0.7934540510177612, "learning_rate": 1.0453648243296008e-05, "loss": 2.6885, "step": 56056 }, { "epoch": 2.75, "grad_norm": 0.75040203332901, "learning_rate": 1.0449619738980675e-05, "loss": 3.0411, "step": 56057 }, { "epoch": 2.75, "grad_norm": 0.7220397591590881, "learning_rate": 1.0445591997285341e-05, "loss": 2.8238, "step": 56058 }, { "epoch": 2.75, "grad_norm": 0.7255467772483826, "learning_rate": 1.0441565018220532e-05, "loss": 3.0506, "step": 56059 }, { "epoch": 2.75, "grad_norm": 0.7547920346260071, "learning_rate": 1.0437538801796973e-05, "loss": 3.0475, "step": 56060 }, { "epoch": 2.75, "grad_norm": 0.7408540844917297, "learning_rate": 1.0433513348025158e-05, "loss": 2.8226, "step": 56061 }, { "epoch": 2.75, "grad_norm": 0.7349470853805542, "learning_rate": 1.0429488656915807e-05, "loss": 2.9271, "step": 56062 }, { "epoch": 2.75, "grad_norm": 0.7243402600288391, "learning_rate": 1.0425464728479382e-05, "loss": 2.8937, "step": 56063 }, { "epoch": 2.75, "grad_norm": 0.7938183546066284, "learning_rate": 1.0421441562726607e-05, "loss": 2.8097, "step": 56064 }, { "epoch": 2.75, "grad_norm": 0.7476469278335571, "learning_rate": 1.0417419159668006e-05, "loss": 2.5328, "step": 56065 }, { "epoch": 2.75, "grad_norm": 0.7038164734840393, "learning_rate": 1.041339751931417e-05, "loss": 2.7658, "step": 56066 }, { "epoch": 2.75, "grad_norm": 0.6968178749084473, "learning_rate": 1.0409376641675759e-05, "loss": 2.946, "step": 56067 }, { "epoch": 2.75, "grad_norm": 0.7435596585273743, "learning_rate": 1.0405356526763264e-05, "loss": 2.8976, "step": 56068 }, { "epoch": 2.75, "grad_norm": 0.7599115371704102, "learning_rate": 1.0401337174587344e-05, "loss": 2.6582, "step": 56069 }, { "epoch": 2.75, "grad_norm": 0.7440100312232971, "learning_rate": 1.0397318585158587e-05, "loss": 2.7613, "step": 56070 }, { "epoch": 2.75, "grad_norm": 0.7963297963142395, "learning_rate": 1.0393300758487522e-05, "loss": 2.7282, "step": 56071 }, { "epoch": 2.75, "grad_norm": 0.746436357498169, "learning_rate": 1.0389283694584805e-05, "loss": 2.777, "step": 56072 }, { "epoch": 2.75, "grad_norm": 0.7619957327842712, "learning_rate": 1.0385267393460994e-05, "loss": 2.6955, "step": 56073 }, { "epoch": 2.75, "grad_norm": 0.7717376947402954, "learning_rate": 1.0381251855126582e-05, "loss": 3.0108, "step": 56074 }, { "epoch": 2.75, "grad_norm": 0.7113671898841858, "learning_rate": 1.037723707959226e-05, "loss": 2.7965, "step": 56075 }, { "epoch": 2.75, "grad_norm": 0.7118061184883118, "learning_rate": 1.0373223066868552e-05, "loss": 2.8349, "step": 56076 }, { "epoch": 2.75, "grad_norm": 0.7409781217575073, "learning_rate": 1.0369209816966051e-05, "loss": 2.7353, "step": 56077 }, { "epoch": 2.75, "grad_norm": 0.7771452069282532, "learning_rate": 1.0365197329895281e-05, "loss": 2.8024, "step": 56078 }, { "epoch": 2.75, "grad_norm": 0.787254810333252, "learning_rate": 1.0361185605666866e-05, "loss": 2.937, "step": 56079 }, { "epoch": 2.75, "grad_norm": 0.7171097993850708, "learning_rate": 1.0357174644291332e-05, "loss": 2.9785, "step": 56080 }, { "epoch": 2.75, "grad_norm": 0.7196240425109863, "learning_rate": 1.0353164445779238e-05, "loss": 2.988, "step": 56081 }, { "epoch": 2.75, "grad_norm": 0.718910813331604, "learning_rate": 1.0349155010141208e-05, "loss": 2.6988, "step": 56082 }, { "epoch": 2.75, "grad_norm": 0.7512689828872681, "learning_rate": 1.03451463373877e-05, "loss": 3.0488, "step": 56083 }, { "epoch": 2.75, "grad_norm": 0.7203948497772217, "learning_rate": 1.0341138427529339e-05, "loss": 2.8411, "step": 56084 }, { "epoch": 2.75, "grad_norm": 0.7465165257453918, "learning_rate": 1.0337131280576716e-05, "loss": 2.9288, "step": 56085 }, { "epoch": 2.75, "grad_norm": 0.7512971758842468, "learning_rate": 1.0333124896540324e-05, "loss": 2.981, "step": 56086 }, { "epoch": 2.75, "grad_norm": 0.7287713885307312, "learning_rate": 1.0329119275430785e-05, "loss": 2.9989, "step": 56087 }, { "epoch": 2.75, "grad_norm": 0.7980208992958069, "learning_rate": 1.0325114417258529e-05, "loss": 2.9048, "step": 56088 }, { "epoch": 2.75, "grad_norm": 0.6989551782608032, "learning_rate": 1.0321110322034175e-05, "loss": 2.8285, "step": 56089 }, { "epoch": 2.75, "grad_norm": 0.7511205077171326, "learning_rate": 1.0317106989768287e-05, "loss": 2.8994, "step": 56090 }, { "epoch": 2.75, "grad_norm": 0.7218790650367737, "learning_rate": 1.0313104420471386e-05, "loss": 2.7294, "step": 56091 }, { "epoch": 2.75, "grad_norm": 0.7532414197921753, "learning_rate": 1.030910261415403e-05, "loss": 2.9913, "step": 56092 }, { "epoch": 2.75, "grad_norm": 0.7547132968902588, "learning_rate": 1.0305101570826713e-05, "loss": 2.8585, "step": 56093 }, { "epoch": 2.75, "grad_norm": 0.7735399603843689, "learning_rate": 1.0301101290500025e-05, "loss": 3.0481, "step": 56094 }, { "epoch": 2.75, "grad_norm": 0.728217363357544, "learning_rate": 1.0297101773184523e-05, "loss": 2.887, "step": 56095 }, { "epoch": 2.75, "grad_norm": 0.7460715174674988, "learning_rate": 1.0293103018890637e-05, "loss": 2.7163, "step": 56096 }, { "epoch": 2.75, "grad_norm": 0.7449331283569336, "learning_rate": 1.0289105027628985e-05, "loss": 2.903, "step": 56097 }, { "epoch": 2.75, "grad_norm": 0.738858699798584, "learning_rate": 1.0285107799410063e-05, "loss": 2.7658, "step": 56098 }, { "epoch": 2.75, "grad_norm": 0.7564826011657715, "learning_rate": 1.0281111334244396e-05, "loss": 2.8346, "step": 56099 }, { "epoch": 2.75, "grad_norm": 0.7498881816864014, "learning_rate": 1.0277115632142574e-05, "loss": 2.8828, "step": 56100 }, { "epoch": 2.75, "grad_norm": 0.7890697717666626, "learning_rate": 1.0273120693115056e-05, "loss": 2.8409, "step": 56101 }, { "epoch": 2.75, "grad_norm": 0.7382162809371948, "learning_rate": 1.02691265171724e-05, "loss": 2.915, "step": 56102 }, { "epoch": 2.75, "grad_norm": 0.7483049035072327, "learning_rate": 1.0265133104325063e-05, "loss": 2.6881, "step": 56103 }, { "epoch": 2.75, "grad_norm": 0.7414224147796631, "learning_rate": 1.0261140454583572e-05, "loss": 2.8794, "step": 56104 }, { "epoch": 2.75, "grad_norm": 0.7294985055923462, "learning_rate": 1.0257148567958551e-05, "loss": 2.8673, "step": 56105 }, { "epoch": 2.75, "grad_norm": 0.7478199005126953, "learning_rate": 1.0253157444460391e-05, "loss": 3.0047, "step": 56106 }, { "epoch": 2.75, "grad_norm": 0.7180767059326172, "learning_rate": 1.0249167084099685e-05, "loss": 2.7526, "step": 56107 }, { "epoch": 2.75, "grad_norm": 0.7234374284744263, "learning_rate": 1.0245177486886858e-05, "loss": 3.1474, "step": 56108 }, { "epoch": 2.75, "grad_norm": 0.7626802325248718, "learning_rate": 1.0241188652832533e-05, "loss": 2.8545, "step": 56109 }, { "epoch": 2.75, "grad_norm": 0.7502772212028503, "learning_rate": 1.0237200581947136e-05, "loss": 2.9798, "step": 56110 }, { "epoch": 2.75, "grad_norm": 0.7730661034584045, "learning_rate": 1.0233213274241158e-05, "loss": 3.0912, "step": 56111 }, { "epoch": 2.75, "grad_norm": 0.7321888208389282, "learning_rate": 1.022922672972516e-05, "loss": 2.8724, "step": 56112 }, { "epoch": 2.75, "grad_norm": 0.7915605902671814, "learning_rate": 1.0225240948409563e-05, "loss": 2.7356, "step": 56113 }, { "epoch": 2.75, "grad_norm": 0.7326378226280212, "learning_rate": 1.0221255930304961e-05, "loss": 2.7905, "step": 56114 }, { "epoch": 2.75, "grad_norm": 0.7629287838935852, "learning_rate": 1.0217271675421779e-05, "loss": 2.915, "step": 56115 }, { "epoch": 2.75, "grad_norm": 0.721251904964447, "learning_rate": 1.0213288183770574e-05, "loss": 2.9442, "step": 56116 }, { "epoch": 2.75, "grad_norm": 0.8092251420021057, "learning_rate": 1.0209305455361771e-05, "loss": 2.9196, "step": 56117 }, { "epoch": 2.75, "grad_norm": 0.7478073835372925, "learning_rate": 1.0205323490205863e-05, "loss": 2.8916, "step": 56118 }, { "epoch": 2.75, "grad_norm": 0.7610535621643066, "learning_rate": 1.0201342288313374e-05, "loss": 2.9998, "step": 56119 }, { "epoch": 2.75, "grad_norm": 0.7558162212371826, "learning_rate": 1.0197361849694763e-05, "loss": 2.7903, "step": 56120 }, { "epoch": 2.75, "grad_norm": 0.7537033557891846, "learning_rate": 1.0193382174360554e-05, "loss": 2.9119, "step": 56121 }, { "epoch": 2.75, "grad_norm": 0.7669283747673035, "learning_rate": 1.0189403262321205e-05, "loss": 2.9549, "step": 56122 }, { "epoch": 2.75, "grad_norm": 0.7572535872459412, "learning_rate": 1.0185425113587175e-05, "loss": 3.1026, "step": 56123 }, { "epoch": 2.75, "grad_norm": 0.7835591435432434, "learning_rate": 1.018144772816899e-05, "loss": 2.9209, "step": 56124 }, { "epoch": 2.75, "grad_norm": 0.8195697665214539, "learning_rate": 1.017747110607704e-05, "loss": 2.9498, "step": 56125 }, { "epoch": 2.75, "grad_norm": 0.7439544796943665, "learning_rate": 1.0173495247321884e-05, "loss": 2.8571, "step": 56126 }, { "epoch": 2.75, "grad_norm": 0.7486630082130432, "learning_rate": 1.016952015191398e-05, "loss": 2.938, "step": 56127 }, { "epoch": 2.75, "grad_norm": 0.7225660085678101, "learning_rate": 1.0165545819863751e-05, "loss": 2.8997, "step": 56128 }, { "epoch": 2.75, "grad_norm": 0.7393267750740051, "learning_rate": 1.0161572251181692e-05, "loss": 2.9516, "step": 56129 }, { "epoch": 2.75, "grad_norm": 0.7707771062850952, "learning_rate": 1.0157599445878296e-05, "loss": 2.9941, "step": 56130 }, { "epoch": 2.75, "grad_norm": 0.8051019906997681, "learning_rate": 1.0153627403963983e-05, "loss": 2.863, "step": 56131 }, { "epoch": 2.75, "grad_norm": 0.7474145293235779, "learning_rate": 1.014965612544928e-05, "loss": 2.6743, "step": 56132 }, { "epoch": 2.75, "grad_norm": 0.7893514633178711, "learning_rate": 1.0145685610344544e-05, "loss": 2.8199, "step": 56133 }, { "epoch": 2.75, "grad_norm": 0.7323321104049683, "learning_rate": 1.0141715858660305e-05, "loss": 2.9556, "step": 56134 }, { "epoch": 2.75, "grad_norm": 0.7719254493713379, "learning_rate": 1.0137746870406983e-05, "loss": 2.7592, "step": 56135 }, { "epoch": 2.75, "grad_norm": 0.7276996970176697, "learning_rate": 1.013377864559507e-05, "loss": 2.6894, "step": 56136 }, { "epoch": 2.75, "grad_norm": 0.750486433506012, "learning_rate": 1.0129811184234993e-05, "loss": 2.9752, "step": 56137 }, { "epoch": 2.75, "grad_norm": 0.7667336463928223, "learning_rate": 1.0125844486337242e-05, "loss": 2.9073, "step": 56138 }, { "epoch": 2.75, "grad_norm": 0.74593186378479, "learning_rate": 1.012187855191221e-05, "loss": 2.8232, "step": 56139 }, { "epoch": 2.75, "grad_norm": 0.7445417046546936, "learning_rate": 1.0117913380970355e-05, "loss": 2.8888, "step": 56140 }, { "epoch": 2.75, "grad_norm": 0.7170975208282471, "learning_rate": 1.0113948973522101e-05, "loss": 3.0145, "step": 56141 }, { "epoch": 2.75, "grad_norm": 0.7595486640930176, "learning_rate": 1.010998532957794e-05, "loss": 2.8864, "step": 56142 }, { "epoch": 2.75, "grad_norm": 0.7461397051811218, "learning_rate": 1.0106022449148266e-05, "loss": 2.9398, "step": 56143 }, { "epoch": 2.75, "grad_norm": 0.7885101437568665, "learning_rate": 1.01020603322436e-05, "loss": 2.6727, "step": 56144 }, { "epoch": 2.75, "grad_norm": 0.7303288578987122, "learning_rate": 1.009809897887427e-05, "loss": 2.9738, "step": 56145 }, { "epoch": 2.75, "grad_norm": 0.7623375654220581, "learning_rate": 1.0094138389050733e-05, "loss": 3.0115, "step": 56146 }, { "epoch": 2.75, "grad_norm": 0.9273803234100342, "learning_rate": 1.009017856278348e-05, "loss": 2.7699, "step": 56147 }, { "epoch": 2.75, "grad_norm": 0.7486945390701294, "learning_rate": 1.008621950008287e-05, "loss": 3.0087, "step": 56148 }, { "epoch": 2.75, "grad_norm": 0.7255769968032837, "learning_rate": 1.0082261200959397e-05, "loss": 2.9543, "step": 56149 }, { "epoch": 2.75, "grad_norm": 0.7860700488090515, "learning_rate": 1.0078303665423448e-05, "loss": 2.8486, "step": 56150 }, { "epoch": 2.75, "grad_norm": 0.7196409106254578, "learning_rate": 1.0074346893485418e-05, "loss": 2.7253, "step": 56151 }, { "epoch": 2.75, "grad_norm": 0.7964147329330444, "learning_rate": 1.00703908851558e-05, "loss": 2.9744, "step": 56152 }, { "epoch": 2.75, "grad_norm": 0.7458089590072632, "learning_rate": 1.0066435640444947e-05, "loss": 2.7989, "step": 56153 }, { "epoch": 2.75, "grad_norm": 0.7688996195793152, "learning_rate": 1.0062481159363356e-05, "loss": 3.0695, "step": 56154 }, { "epoch": 2.75, "grad_norm": 0.7817563414573669, "learning_rate": 1.0058527441921316e-05, "loss": 2.6904, "step": 56155 }, { "epoch": 2.75, "grad_norm": 0.6953161358833313, "learning_rate": 1.0054574488129318e-05, "loss": 2.7915, "step": 56156 }, { "epoch": 2.75, "grad_norm": 0.7510791420936584, "learning_rate": 1.0050622297997824e-05, "loss": 2.8286, "step": 56157 }, { "epoch": 2.75, "grad_norm": 0.7296958565711975, "learning_rate": 1.0046670871537154e-05, "loss": 3.0983, "step": 56158 }, { "epoch": 2.75, "grad_norm": 0.7343621850013733, "learning_rate": 1.0042720208757771e-05, "loss": 2.971, "step": 56159 }, { "epoch": 2.75, "grad_norm": 0.7544857263565063, "learning_rate": 1.0038770309670064e-05, "loss": 2.8618, "step": 56160 }, { "epoch": 2.75, "grad_norm": 0.7312347292900085, "learning_rate": 1.0034821174284391e-05, "loss": 2.8683, "step": 56161 }, { "epoch": 2.75, "grad_norm": 0.723470151424408, "learning_rate": 1.0030872802611245e-05, "loss": 3.1332, "step": 56162 }, { "epoch": 2.75, "grad_norm": 0.7472372055053711, "learning_rate": 1.0026925194660918e-05, "loss": 2.6761, "step": 56163 }, { "epoch": 2.75, "grad_norm": 0.7491852641105652, "learning_rate": 1.00229783504439e-05, "loss": 2.791, "step": 56164 }, { "epoch": 2.75, "grad_norm": 0.7424899339675903, "learning_rate": 1.001903226997055e-05, "loss": 2.9423, "step": 56165 }, { "epoch": 2.75, "grad_norm": 0.7745535373687744, "learning_rate": 1.0015086953251262e-05, "loss": 2.9281, "step": 56166 }, { "epoch": 2.75, "grad_norm": 0.8208742141723633, "learning_rate": 1.0011142400296424e-05, "loss": 2.8546, "step": 56167 }, { "epoch": 2.75, "grad_norm": 0.8146986365318298, "learning_rate": 1.0007198611116397e-05, "loss": 2.9061, "step": 56168 }, { "epoch": 2.75, "grad_norm": 0.7721244096755981, "learning_rate": 1.0003255585721636e-05, "loss": 3.1029, "step": 56169 }, { "epoch": 2.75, "grad_norm": 0.709169328212738, "learning_rate": 9.999313324122471e-06, "loss": 2.8192, "step": 56170 }, { "epoch": 2.75, "grad_norm": 0.7202749848365784, "learning_rate": 9.99537182632929e-06, "loss": 2.6902, "step": 56171 }, { "epoch": 2.75, "grad_norm": 0.7772741913795471, "learning_rate": 9.99143109235252e-06, "loss": 3.0664, "step": 56172 }, { "epoch": 2.75, "grad_norm": 0.7416417002677917, "learning_rate": 9.987491122202484e-06, "loss": 3.0866, "step": 56173 }, { "epoch": 2.75, "grad_norm": 0.7932614088058472, "learning_rate": 9.983551915889609e-06, "loss": 2.6837, "step": 56174 }, { "epoch": 2.75, "grad_norm": 0.7134155035018921, "learning_rate": 9.97961347342422e-06, "loss": 2.6047, "step": 56175 }, { "epoch": 2.75, "grad_norm": 0.736059308052063, "learning_rate": 9.975675794816707e-06, "loss": 2.8903, "step": 56176 }, { "epoch": 2.75, "grad_norm": 0.7584584951400757, "learning_rate": 9.971738880077495e-06, "loss": 2.9494, "step": 56177 }, { "epoch": 2.75, "grad_norm": 0.6984370946884155, "learning_rate": 9.967802729216844e-06, "loss": 2.7811, "step": 56178 }, { "epoch": 2.75, "grad_norm": 0.7537134885787964, "learning_rate": 9.963867342245213e-06, "loss": 2.9652, "step": 56179 }, { "epoch": 2.75, "grad_norm": 0.7283973693847656, "learning_rate": 9.959932719172924e-06, "loss": 2.8149, "step": 56180 }, { "epoch": 2.75, "grad_norm": 0.7228909730911255, "learning_rate": 9.955998860010372e-06, "loss": 3.0026, "step": 56181 }, { "epoch": 2.75, "grad_norm": 0.8229911923408508, "learning_rate": 9.952065764767913e-06, "loss": 2.7358, "step": 56182 }, { "epoch": 2.75, "grad_norm": 0.768810510635376, "learning_rate": 9.94813343345584e-06, "loss": 2.9345, "step": 56183 }, { "epoch": 2.75, "grad_norm": 0.7733370065689087, "learning_rate": 9.944201866084612e-06, "loss": 2.9988, "step": 56184 }, { "epoch": 2.75, "grad_norm": 0.7900514602661133, "learning_rate": 9.940271062664484e-06, "loss": 2.9167, "step": 56185 }, { "epoch": 2.75, "grad_norm": 0.797058641910553, "learning_rate": 9.936341023205918e-06, "loss": 2.7364, "step": 56186 }, { "epoch": 2.75, "grad_norm": 0.766906201839447, "learning_rate": 9.932411747719171e-06, "loss": 2.899, "step": 56187 }, { "epoch": 2.75, "grad_norm": 0.7349963784217834, "learning_rate": 9.928483236214634e-06, "loss": 2.7932, "step": 56188 }, { "epoch": 2.75, "grad_norm": 0.808515191078186, "learning_rate": 9.924555488702667e-06, "loss": 2.8549, "step": 56189 }, { "epoch": 2.75, "grad_norm": 0.7727739810943604, "learning_rate": 9.920628505193529e-06, "loss": 2.8072, "step": 56190 }, { "epoch": 2.75, "grad_norm": 0.761241614818573, "learning_rate": 9.916702285697708e-06, "loss": 2.6368, "step": 56191 }, { "epoch": 2.75, "grad_norm": 0.7611913681030273, "learning_rate": 9.9127768302254e-06, "loss": 2.9305, "step": 56192 }, { "epoch": 2.75, "grad_norm": 0.7446513772010803, "learning_rate": 9.908852138787027e-06, "loss": 2.8921, "step": 56193 }, { "epoch": 2.75, "grad_norm": 0.7649831771850586, "learning_rate": 9.904928211392915e-06, "loss": 2.7833, "step": 56194 }, { "epoch": 2.75, "grad_norm": 0.7408809065818787, "learning_rate": 9.90100504805339e-06, "loss": 2.9076, "step": 56195 }, { "epoch": 2.75, "grad_norm": 0.7453528642654419, "learning_rate": 9.897082648778809e-06, "loss": 2.7996, "step": 56196 }, { "epoch": 2.75, "grad_norm": 0.7997094392776489, "learning_rate": 9.893161013579499e-06, "loss": 2.9272, "step": 56197 }, { "epoch": 2.75, "grad_norm": 0.7552264928817749, "learning_rate": 9.889240142465715e-06, "loss": 2.9482, "step": 56198 }, { "epoch": 2.75, "grad_norm": 0.7569772005081177, "learning_rate": 9.885320035447853e-06, "loss": 2.9521, "step": 56199 }, { "epoch": 2.75, "grad_norm": 0.7403649091720581, "learning_rate": 9.881400692536235e-06, "loss": 2.9042, "step": 56200 }, { "epoch": 2.75, "grad_norm": 0.7069177627563477, "learning_rate": 9.877482113741186e-06, "loss": 2.686, "step": 56201 }, { "epoch": 2.75, "grad_norm": 0.7120211124420166, "learning_rate": 9.873564299072968e-06, "loss": 2.6873, "step": 56202 }, { "epoch": 2.75, "grad_norm": 0.7966209650039673, "learning_rate": 9.869647248542001e-06, "loss": 2.9283, "step": 56203 }, { "epoch": 2.75, "grad_norm": 0.6995888352394104, "learning_rate": 9.865730962158546e-06, "loss": 3.067, "step": 56204 }, { "epoch": 2.75, "grad_norm": 0.7135257124900818, "learning_rate": 9.861815439932864e-06, "loss": 2.9348, "step": 56205 }, { "epoch": 2.75, "grad_norm": 0.8033514022827148, "learning_rate": 9.857900681875374e-06, "loss": 2.8451, "step": 56206 }, { "epoch": 2.75, "grad_norm": 0.8119677901268005, "learning_rate": 9.853986687996273e-06, "loss": 2.8265, "step": 56207 }, { "epoch": 2.75, "grad_norm": 0.7657623291015625, "learning_rate": 9.850073458305952e-06, "loss": 2.9276, "step": 56208 }, { "epoch": 2.75, "grad_norm": 0.7248706817626953, "learning_rate": 9.846160992814734e-06, "loss": 2.9206, "step": 56209 }, { "epoch": 2.75, "grad_norm": 0.7554598450660706, "learning_rate": 9.842249291532878e-06, "loss": 2.9856, "step": 56210 }, { "epoch": 2.75, "grad_norm": 0.789419949054718, "learning_rate": 9.838338354470675e-06, "loss": 3.0536, "step": 56211 }, { "epoch": 2.75, "grad_norm": 0.7719467282295227, "learning_rate": 9.834428181638487e-06, "loss": 2.8967, "step": 56212 }, { "epoch": 2.75, "grad_norm": 0.7833728194236755, "learning_rate": 9.830518773046536e-06, "loss": 2.6544, "step": 56213 }, { "epoch": 2.75, "grad_norm": 0.7524414658546448, "learning_rate": 9.826610128705148e-06, "loss": 2.7656, "step": 56214 }, { "epoch": 2.75, "grad_norm": 0.7353940606117249, "learning_rate": 9.822702248624614e-06, "loss": 2.8325, "step": 56215 }, { "epoch": 2.76, "grad_norm": 0.7165583372116089, "learning_rate": 9.818795132815294e-06, "loss": 2.8988, "step": 56216 }, { "epoch": 2.76, "grad_norm": 0.6943876147270203, "learning_rate": 9.814888781287378e-06, "loss": 2.8794, "step": 56217 }, { "epoch": 2.76, "grad_norm": 0.7502477765083313, "learning_rate": 9.810983194051193e-06, "loss": 2.8726, "step": 56218 }, { "epoch": 2.76, "grad_norm": 0.7636768817901611, "learning_rate": 9.80707837111706e-06, "loss": 2.9083, "step": 56219 }, { "epoch": 2.76, "grad_norm": 0.7751126885414124, "learning_rate": 9.80317431249521e-06, "loss": 2.9759, "step": 56220 }, { "epoch": 2.76, "grad_norm": 0.7540944218635559, "learning_rate": 9.799271018195964e-06, "loss": 2.8304, "step": 56221 }, { "epoch": 2.76, "grad_norm": 0.7522205114364624, "learning_rate": 9.795368488229583e-06, "loss": 2.9295, "step": 56222 }, { "epoch": 2.76, "grad_norm": 0.7293267846107483, "learning_rate": 9.791466722606323e-06, "loss": 3.1562, "step": 56223 }, { "epoch": 2.76, "grad_norm": 0.7595233917236328, "learning_rate": 9.78756572133651e-06, "loss": 2.8134, "step": 56224 }, { "epoch": 2.76, "grad_norm": 0.7647051215171814, "learning_rate": 9.783665484430436e-06, "loss": 2.8663, "step": 56225 }, { "epoch": 2.76, "grad_norm": 0.7567137479782104, "learning_rate": 9.779766011898293e-06, "loss": 2.9385, "step": 56226 }, { "epoch": 2.76, "grad_norm": 0.7509698271751404, "learning_rate": 9.775867303750372e-06, "loss": 3.0103, "step": 56227 }, { "epoch": 2.76, "grad_norm": 0.7402850389480591, "learning_rate": 9.771969359996967e-06, "loss": 2.8077, "step": 56228 }, { "epoch": 2.76, "grad_norm": 0.7876865267753601, "learning_rate": 9.768072180648401e-06, "loss": 2.8664, "step": 56229 }, { "epoch": 2.76, "grad_norm": 0.7342726588249207, "learning_rate": 9.7641757657148e-06, "loss": 3.1046, "step": 56230 }, { "epoch": 2.76, "grad_norm": 0.8054348826408386, "learning_rate": 9.760280115206553e-06, "loss": 3.0441, "step": 56231 }, { "epoch": 2.76, "grad_norm": 0.7675466537475586, "learning_rate": 9.756385229133823e-06, "loss": 2.9284, "step": 56232 }, { "epoch": 2.76, "grad_norm": 0.7613694071769714, "learning_rate": 9.752491107506966e-06, "loss": 2.923, "step": 56233 }, { "epoch": 2.76, "grad_norm": 0.7603533267974854, "learning_rate": 9.748597750336207e-06, "loss": 2.7409, "step": 56234 }, { "epoch": 2.76, "grad_norm": 0.783909261226654, "learning_rate": 9.744705157631705e-06, "loss": 2.7609, "step": 56235 }, { "epoch": 2.76, "grad_norm": 0.7282220721244812, "learning_rate": 9.740813329403851e-06, "loss": 2.8911, "step": 56236 }, { "epoch": 2.76, "grad_norm": 0.7373564839363098, "learning_rate": 9.736922265662805e-06, "loss": 2.8638, "step": 56237 }, { "epoch": 2.76, "grad_norm": 0.7997520565986633, "learning_rate": 9.733031966418826e-06, "loss": 2.9606, "step": 56238 }, { "epoch": 2.76, "grad_norm": 0.7475751638412476, "learning_rate": 9.729142431682201e-06, "loss": 2.7857, "step": 56239 }, { "epoch": 2.76, "grad_norm": 0.7757993936538696, "learning_rate": 9.725253661463161e-06, "loss": 2.6651, "step": 56240 }, { "epoch": 2.76, "grad_norm": 0.7897785902023315, "learning_rate": 9.721365655771962e-06, "loss": 2.7296, "step": 56241 }, { "epoch": 2.76, "grad_norm": 0.7303987145423889, "learning_rate": 9.717478414618763e-06, "loss": 2.9488, "step": 56242 }, { "epoch": 2.76, "grad_norm": 0.7232341170310974, "learning_rate": 9.713591938013854e-06, "loss": 2.8354, "step": 56243 }, { "epoch": 2.76, "grad_norm": 0.7750096321105957, "learning_rate": 9.709706225967529e-06, "loss": 3.0524, "step": 56244 }, { "epoch": 2.76, "grad_norm": 0.7706706523895264, "learning_rate": 9.705821278489912e-06, "loss": 2.8424, "step": 56245 }, { "epoch": 2.76, "grad_norm": 0.8001003265380859, "learning_rate": 9.701937095591362e-06, "loss": 2.8951, "step": 56246 }, { "epoch": 2.76, "grad_norm": 0.7747387290000916, "learning_rate": 9.698053677282037e-06, "loss": 2.9802, "step": 56247 }, { "epoch": 2.76, "grad_norm": 0.7447709441184998, "learning_rate": 9.694171023572095e-06, "loss": 2.7805, "step": 56248 }, { "epoch": 2.76, "grad_norm": 0.7555181384086609, "learning_rate": 9.690289134471896e-06, "loss": 2.9242, "step": 56249 }, { "epoch": 2.76, "grad_norm": 0.7425608038902283, "learning_rate": 9.686408009991565e-06, "loss": 2.7431, "step": 56250 }, { "epoch": 2.76, "grad_norm": 0.7470229864120483, "learning_rate": 9.682527650141426e-06, "loss": 2.8323, "step": 56251 }, { "epoch": 2.76, "grad_norm": 0.7438477873802185, "learning_rate": 9.678648054931571e-06, "loss": 2.9314, "step": 56252 }, { "epoch": 2.76, "grad_norm": 0.7268569469451904, "learning_rate": 9.674769224372292e-06, "loss": 2.9639, "step": 56253 }, { "epoch": 2.76, "grad_norm": 0.7507839798927307, "learning_rate": 9.670891158473814e-06, "loss": 2.7267, "step": 56254 }, { "epoch": 2.76, "grad_norm": 0.7674872875213623, "learning_rate": 9.667013857246363e-06, "loss": 2.801, "step": 56255 }, { "epoch": 2.76, "grad_norm": 0.7318063974380493, "learning_rate": 9.663137320700098e-06, "loss": 2.9449, "step": 56256 }, { "epoch": 2.76, "grad_norm": 0.7555484771728516, "learning_rate": 9.659261548845243e-06, "loss": 2.8206, "step": 56257 }, { "epoch": 2.76, "grad_norm": 0.8074846267700195, "learning_rate": 9.655386541691989e-06, "loss": 2.9654, "step": 56258 }, { "epoch": 2.76, "grad_norm": 0.7630916833877563, "learning_rate": 9.65151229925063e-06, "loss": 2.931, "step": 56259 }, { "epoch": 2.76, "grad_norm": 0.7579756379127502, "learning_rate": 9.647638821531256e-06, "loss": 3.0187, "step": 56260 }, { "epoch": 2.76, "grad_norm": 0.7585728764533997, "learning_rate": 9.64376610854416e-06, "loss": 2.6944, "step": 56261 }, { "epoch": 2.76, "grad_norm": 0.7346169352531433, "learning_rate": 9.639894160299499e-06, "loss": 2.5751, "step": 56262 }, { "epoch": 2.76, "grad_norm": 0.7276310920715332, "learning_rate": 9.636022976807467e-06, "loss": 2.8546, "step": 56263 }, { "epoch": 2.76, "grad_norm": 0.7806369066238403, "learning_rate": 9.632152558078287e-06, "loss": 2.9523, "step": 56264 }, { "epoch": 2.76, "grad_norm": 0.7958837747573853, "learning_rate": 9.628282904122087e-06, "loss": 2.7377, "step": 56265 }, { "epoch": 2.76, "grad_norm": 0.7492504715919495, "learning_rate": 9.624414014949156e-06, "loss": 3.0474, "step": 56266 }, { "epoch": 2.76, "grad_norm": 0.7660962343215942, "learning_rate": 9.620545890569587e-06, "loss": 3.0414, "step": 56267 }, { "epoch": 2.76, "grad_norm": 0.775435745716095, "learning_rate": 9.616678530993671e-06, "loss": 3.2265, "step": 56268 }, { "epoch": 2.76, "grad_norm": 0.7225391268730164, "learning_rate": 9.6128119362315e-06, "loss": 2.8305, "step": 56269 }, { "epoch": 2.76, "grad_norm": 0.7277866005897522, "learning_rate": 9.608946106293302e-06, "loss": 2.715, "step": 56270 }, { "epoch": 2.76, "grad_norm": 0.7221185564994812, "learning_rate": 9.605081041189266e-06, "loss": 2.7211, "step": 56271 }, { "epoch": 2.76, "grad_norm": 0.7964680194854736, "learning_rate": 9.60121674092955e-06, "loss": 2.9876, "step": 56272 }, { "epoch": 2.76, "grad_norm": 0.7531980276107788, "learning_rate": 9.59735320552435e-06, "loss": 2.7597, "step": 56273 }, { "epoch": 2.76, "grad_norm": 0.74334716796875, "learning_rate": 9.593490434983787e-06, "loss": 2.8664, "step": 56274 }, { "epoch": 2.76, "grad_norm": 0.7619105577468872, "learning_rate": 9.58962842931812e-06, "loss": 2.6577, "step": 56275 }, { "epoch": 2.76, "grad_norm": 0.8216807842254639, "learning_rate": 9.585767188537474e-06, "loss": 2.9393, "step": 56276 }, { "epoch": 2.76, "grad_norm": 0.7856898903846741, "learning_rate": 9.581906712652043e-06, "loss": 2.8374, "step": 56277 }, { "epoch": 2.76, "grad_norm": 0.7424861788749695, "learning_rate": 9.578047001671984e-06, "loss": 2.8822, "step": 56278 }, { "epoch": 2.76, "grad_norm": 0.758293092250824, "learning_rate": 9.574188055607423e-06, "loss": 2.972, "step": 56279 }, { "epoch": 2.76, "grad_norm": 0.7339947819709778, "learning_rate": 9.570329874468553e-06, "loss": 3.113, "step": 56280 }, { "epoch": 2.76, "grad_norm": 0.7537075281143188, "learning_rate": 9.566472458265594e-06, "loss": 2.7329, "step": 56281 }, { "epoch": 2.76, "grad_norm": 0.7673822045326233, "learning_rate": 9.562615807008577e-06, "loss": 3.0186, "step": 56282 }, { "epoch": 2.76, "grad_norm": 0.7142574787139893, "learning_rate": 9.558759920707792e-06, "loss": 2.6684, "step": 56283 }, { "epoch": 2.76, "grad_norm": 0.7520612478256226, "learning_rate": 9.55490479937333e-06, "loss": 2.6922, "step": 56284 }, { "epoch": 2.76, "grad_norm": 0.7886697053909302, "learning_rate": 9.551050443015318e-06, "loss": 2.95, "step": 56285 }, { "epoch": 2.76, "grad_norm": 0.7925397157669067, "learning_rate": 9.54719685164398e-06, "loss": 2.953, "step": 56286 }, { "epoch": 2.76, "grad_norm": 0.7573472261428833, "learning_rate": 9.543344025269406e-06, "loss": 2.975, "step": 56287 }, { "epoch": 2.76, "grad_norm": 0.7683876752853394, "learning_rate": 9.539491963901757e-06, "loss": 2.8721, "step": 56288 }, { "epoch": 2.76, "grad_norm": 0.7631601095199585, "learning_rate": 9.535640667551192e-06, "loss": 3.1299, "step": 56289 }, { "epoch": 2.76, "grad_norm": 0.7261856198310852, "learning_rate": 9.531790136227869e-06, "loss": 2.7541, "step": 56290 }, { "epoch": 2.76, "grad_norm": 0.7447924017906189, "learning_rate": 9.527940369941911e-06, "loss": 2.9447, "step": 56291 }, { "epoch": 2.76, "grad_norm": 0.7883365154266357, "learning_rate": 9.524091368703413e-06, "loss": 2.8184, "step": 56292 }, { "epoch": 2.76, "grad_norm": 0.7671542763710022, "learning_rate": 9.520243132522597e-06, "loss": 2.9793, "step": 56293 }, { "epoch": 2.76, "grad_norm": 0.7295643091201782, "learning_rate": 9.516395661409526e-06, "loss": 3.0381, "step": 56294 }, { "epoch": 2.76, "grad_norm": 0.7493358850479126, "learning_rate": 9.512548955374355e-06, "loss": 2.9604, "step": 56295 }, { "epoch": 2.76, "grad_norm": 0.7209811806678772, "learning_rate": 9.508703014427278e-06, "loss": 3.0627, "step": 56296 }, { "epoch": 2.76, "grad_norm": 0.7741036415100098, "learning_rate": 9.504857838578317e-06, "loss": 2.9489, "step": 56297 }, { "epoch": 2.76, "grad_norm": 0.7175572514533997, "learning_rate": 9.501013427837666e-06, "loss": 2.8748, "step": 56298 }, { "epoch": 2.76, "grad_norm": 0.7129703760147095, "learning_rate": 9.497169782215485e-06, "loss": 2.9452, "step": 56299 }, { "epoch": 2.76, "grad_norm": 0.7331249117851257, "learning_rate": 9.493326901721798e-06, "loss": 2.7358, "step": 56300 }, { "epoch": 2.76, "grad_norm": 0.738678514957428, "learning_rate": 9.489484786366797e-06, "loss": 2.7248, "step": 56301 }, { "epoch": 2.76, "grad_norm": 0.7397850751876831, "learning_rate": 9.485643436160572e-06, "loss": 2.9859, "step": 56302 }, { "epoch": 2.76, "grad_norm": 0.7723121643066406, "learning_rate": 9.48180285111325e-06, "loss": 3.1972, "step": 56303 }, { "epoch": 2.76, "grad_norm": 0.7921167612075806, "learning_rate": 9.477963031234958e-06, "loss": 2.9502, "step": 56304 }, { "epoch": 2.76, "grad_norm": 0.7509664297103882, "learning_rate": 9.474123976535819e-06, "loss": 2.9147, "step": 56305 }, { "epoch": 2.76, "grad_norm": 0.8044818043708801, "learning_rate": 9.470285687025925e-06, "loss": 3.1336, "step": 56306 }, { "epoch": 2.76, "grad_norm": 0.7625141143798828, "learning_rate": 9.466448162715335e-06, "loss": 2.882, "step": 56307 }, { "epoch": 2.76, "grad_norm": 0.7402327656745911, "learning_rate": 9.46261140361424e-06, "loss": 2.8615, "step": 56308 }, { "epoch": 2.76, "grad_norm": 0.7257512807846069, "learning_rate": 9.4587754097327e-06, "loss": 3.0802, "step": 56309 }, { "epoch": 2.76, "grad_norm": 0.7531664371490479, "learning_rate": 9.454940181080839e-06, "loss": 2.9292, "step": 56310 }, { "epoch": 2.76, "grad_norm": 0.7305158972740173, "learning_rate": 9.45110571766875e-06, "loss": 2.9042, "step": 56311 }, { "epoch": 2.76, "grad_norm": 0.7849349975585938, "learning_rate": 9.447272019506524e-06, "loss": 2.812, "step": 56312 }, { "epoch": 2.76, "grad_norm": 0.7790784239768982, "learning_rate": 9.443439086604287e-06, "loss": 2.886, "step": 56313 }, { "epoch": 2.76, "grad_norm": 0.7468773126602173, "learning_rate": 9.439606918972097e-06, "loss": 2.9739, "step": 56314 }, { "epoch": 2.76, "grad_norm": 0.7422334551811218, "learning_rate": 9.435775516620049e-06, "loss": 2.983, "step": 56315 }, { "epoch": 2.76, "grad_norm": 0.7719339728355408, "learning_rate": 9.431944879558263e-06, "loss": 2.9528, "step": 56316 }, { "epoch": 2.76, "grad_norm": 0.7582297325134277, "learning_rate": 9.428115007796799e-06, "loss": 2.8616, "step": 56317 }, { "epoch": 2.76, "grad_norm": 0.7723546624183655, "learning_rate": 9.424285901345785e-06, "loss": 2.8218, "step": 56318 }, { "epoch": 2.76, "grad_norm": 0.7500159740447998, "learning_rate": 9.420457560215245e-06, "loss": 2.9789, "step": 56319 }, { "epoch": 2.76, "grad_norm": 0.7486183047294617, "learning_rate": 9.416629984415335e-06, "loss": 2.8109, "step": 56320 }, { "epoch": 2.76, "grad_norm": 0.7632740139961243, "learning_rate": 9.412803173956085e-06, "loss": 2.9913, "step": 56321 }, { "epoch": 2.76, "grad_norm": 0.7619692087173462, "learning_rate": 9.408977128847584e-06, "loss": 3.0314, "step": 56322 }, { "epoch": 2.76, "grad_norm": 0.7509559392929077, "learning_rate": 9.405151849099923e-06, "loss": 2.7083, "step": 56323 }, { "epoch": 2.76, "grad_norm": 0.7356145977973938, "learning_rate": 9.40132733472313e-06, "loss": 2.7204, "step": 56324 }, { "epoch": 2.76, "grad_norm": 0.7453984618186951, "learning_rate": 9.397503585727328e-06, "loss": 2.845, "step": 56325 }, { "epoch": 2.76, "grad_norm": 0.7576645612716675, "learning_rate": 9.393680602122578e-06, "loss": 2.8996, "step": 56326 }, { "epoch": 2.76, "grad_norm": 0.7218955755233765, "learning_rate": 9.389858383918969e-06, "loss": 2.8574, "step": 56327 }, { "epoch": 2.76, "grad_norm": 0.7386839389801025, "learning_rate": 9.386036931126529e-06, "loss": 2.9469, "step": 56328 }, { "epoch": 2.76, "grad_norm": 0.7222349047660828, "learning_rate": 9.382216243755313e-06, "loss": 2.7775, "step": 56329 }, { "epoch": 2.76, "grad_norm": 0.7363526225090027, "learning_rate": 9.378396321815419e-06, "loss": 2.9486, "step": 56330 }, { "epoch": 2.76, "grad_norm": 0.7235442996025085, "learning_rate": 9.374577165316932e-06, "loss": 3.037, "step": 56331 }, { "epoch": 2.76, "grad_norm": 0.7777265310287476, "learning_rate": 9.37075877426985e-06, "loss": 2.9327, "step": 56332 }, { "epoch": 2.76, "grad_norm": 0.7499440908432007, "learning_rate": 9.36694114868426e-06, "loss": 3.0098, "step": 56333 }, { "epoch": 2.76, "grad_norm": 0.7149835824966431, "learning_rate": 9.363124288570189e-06, "loss": 2.913, "step": 56334 }, { "epoch": 2.76, "grad_norm": 0.7982455492019653, "learning_rate": 9.359308193937765e-06, "loss": 3.024, "step": 56335 }, { "epoch": 2.76, "grad_norm": 0.7221076488494873, "learning_rate": 9.355492864796977e-06, "loss": 2.7995, "step": 56336 }, { "epoch": 2.76, "grad_norm": 0.6967481970787048, "learning_rate": 9.35167830115785e-06, "loss": 3.0547, "step": 56337 }, { "epoch": 2.76, "grad_norm": 0.7800359129905701, "learning_rate": 9.347864503030511e-06, "loss": 2.9932, "step": 56338 }, { "epoch": 2.76, "grad_norm": 0.7424789071083069, "learning_rate": 9.344051470424884e-06, "loss": 2.8447, "step": 56339 }, { "epoch": 2.76, "grad_norm": 0.7663801908493042, "learning_rate": 9.340239203351129e-06, "loss": 2.9393, "step": 56340 }, { "epoch": 2.76, "grad_norm": 0.7594396471977234, "learning_rate": 9.336427701819272e-06, "loss": 2.8301, "step": 56341 }, { "epoch": 2.76, "grad_norm": 0.7261060476303101, "learning_rate": 9.332616965839335e-06, "loss": 2.8903, "step": 56342 }, { "epoch": 2.76, "grad_norm": 0.7838093042373657, "learning_rate": 9.328806995421311e-06, "loss": 2.7609, "step": 56343 }, { "epoch": 2.76, "grad_norm": 0.7690815925598145, "learning_rate": 9.324997790575262e-06, "loss": 2.8137, "step": 56344 }, { "epoch": 2.76, "grad_norm": 0.7286287546157837, "learning_rate": 9.321189351311275e-06, "loss": 2.7991, "step": 56345 }, { "epoch": 2.76, "grad_norm": 0.7787858843803406, "learning_rate": 9.31738167763928e-06, "loss": 3.0519, "step": 56346 }, { "epoch": 2.76, "grad_norm": 0.7564954161643982, "learning_rate": 9.313574769569366e-06, "loss": 2.8195, "step": 56347 }, { "epoch": 2.76, "grad_norm": 0.7487925887107849, "learning_rate": 9.309768627111558e-06, "loss": 2.8282, "step": 56348 }, { "epoch": 2.76, "grad_norm": 0.7704711556434631, "learning_rate": 9.305963250275916e-06, "loss": 2.811, "step": 56349 }, { "epoch": 2.76, "grad_norm": 0.7544106841087341, "learning_rate": 9.302158639072399e-06, "loss": 3.0789, "step": 56350 }, { "epoch": 2.76, "grad_norm": 0.7106571793556213, "learning_rate": 9.298354793510998e-06, "loss": 2.7505, "step": 56351 }, { "epoch": 2.76, "grad_norm": 0.7845272421836853, "learning_rate": 9.294551713601805e-06, "loss": 2.916, "step": 56352 }, { "epoch": 2.76, "grad_norm": 0.7282042503356934, "learning_rate": 9.290749399354847e-06, "loss": 3.0183, "step": 56353 }, { "epoch": 2.76, "grad_norm": 0.7017480134963989, "learning_rate": 9.28694785078008e-06, "loss": 2.8869, "step": 56354 }, { "epoch": 2.76, "grad_norm": 0.7535594701766968, "learning_rate": 9.283147067887564e-06, "loss": 2.8366, "step": 56355 }, { "epoch": 2.76, "grad_norm": 0.7693657279014587, "learning_rate": 9.279347050687291e-06, "loss": 2.8678, "step": 56356 }, { "epoch": 2.76, "grad_norm": 0.7219848036766052, "learning_rate": 9.275547799189253e-06, "loss": 2.8987, "step": 56357 }, { "epoch": 2.76, "grad_norm": 0.7361370325088501, "learning_rate": 9.271749313403509e-06, "loss": 2.8727, "step": 56358 }, { "epoch": 2.76, "grad_norm": 0.7325665950775146, "learning_rate": 9.26795159333995e-06, "loss": 2.8354, "step": 56359 }, { "epoch": 2.76, "grad_norm": 0.7923938035964966, "learning_rate": 9.264154639008736e-06, "loss": 2.6835, "step": 56360 }, { "epoch": 2.76, "grad_norm": 0.7122077345848083, "learning_rate": 9.260358450419724e-06, "loss": 2.8346, "step": 56361 }, { "epoch": 2.76, "grad_norm": 0.7342128753662109, "learning_rate": 9.256563027582975e-06, "loss": 2.8962, "step": 56362 }, { "epoch": 2.76, "grad_norm": 0.7379400730133057, "learning_rate": 9.252768370508513e-06, "loss": 2.8967, "step": 56363 }, { "epoch": 2.76, "grad_norm": 0.7097499966621399, "learning_rate": 9.248974479206328e-06, "loss": 2.8588, "step": 56364 }, { "epoch": 2.76, "grad_norm": 0.7544580101966858, "learning_rate": 9.245181353686348e-06, "loss": 2.9067, "step": 56365 }, { "epoch": 2.76, "grad_norm": 0.7791709899902344, "learning_rate": 9.241388993958598e-06, "loss": 2.976, "step": 56366 }, { "epoch": 2.76, "grad_norm": 0.8830820918083191, "learning_rate": 9.23759740003307e-06, "loss": 3.0506, "step": 56367 }, { "epoch": 2.76, "grad_norm": 0.7556170225143433, "learning_rate": 9.233806571919788e-06, "loss": 2.7625, "step": 56368 }, { "epoch": 2.76, "grad_norm": 0.7081811428070068, "learning_rate": 9.230016509628679e-06, "loss": 2.8684, "step": 56369 }, { "epoch": 2.76, "grad_norm": 0.7261504530906677, "learning_rate": 9.226227213169735e-06, "loss": 2.8581, "step": 56370 }, { "epoch": 2.76, "grad_norm": 0.768895149230957, "learning_rate": 9.22243868255298e-06, "loss": 2.9572, "step": 56371 }, { "epoch": 2.76, "grad_norm": 0.7371829152107239, "learning_rate": 9.218650917788339e-06, "loss": 2.7464, "step": 56372 }, { "epoch": 2.76, "grad_norm": 0.7369073033332825, "learning_rate": 9.214863918885806e-06, "loss": 2.6433, "step": 56373 }, { "epoch": 2.76, "grad_norm": 0.7547151446342468, "learning_rate": 9.211077685855372e-06, "loss": 2.8662, "step": 56374 }, { "epoch": 2.76, "grad_norm": 0.7417933344841003, "learning_rate": 9.207292218706996e-06, "loss": 3.0114, "step": 56375 }, { "epoch": 2.76, "grad_norm": 0.7300156354904175, "learning_rate": 9.203507517450603e-06, "loss": 3.0027, "step": 56376 }, { "epoch": 2.76, "grad_norm": 0.7735124230384827, "learning_rate": 9.199723582096253e-06, "loss": 2.8641, "step": 56377 }, { "epoch": 2.76, "grad_norm": 0.7115468382835388, "learning_rate": 9.195940412653868e-06, "loss": 2.8722, "step": 56378 }, { "epoch": 2.76, "grad_norm": 0.7530733942985535, "learning_rate": 9.19215800913341e-06, "loss": 2.9117, "step": 56379 }, { "epoch": 2.76, "grad_norm": 0.74192875623703, "learning_rate": 9.188376371544837e-06, "loss": 2.8185, "step": 56380 }, { "epoch": 2.76, "grad_norm": 0.7729009389877319, "learning_rate": 9.184595499898073e-06, "loss": 2.9038, "step": 56381 }, { "epoch": 2.76, "grad_norm": 0.7162991166114807, "learning_rate": 9.180815394203145e-06, "loss": 3.1287, "step": 56382 }, { "epoch": 2.76, "grad_norm": 0.7535189390182495, "learning_rate": 9.177036054470011e-06, "loss": 2.8736, "step": 56383 }, { "epoch": 2.76, "grad_norm": 0.7650796175003052, "learning_rate": 9.17325748070853e-06, "loss": 3.0441, "step": 56384 }, { "epoch": 2.76, "grad_norm": 0.7328662872314453, "learning_rate": 9.169479672928759e-06, "loss": 2.8645, "step": 56385 }, { "epoch": 2.76, "grad_norm": 0.7376196384429932, "learning_rate": 9.165702631140625e-06, "loss": 2.7676, "step": 56386 }, { "epoch": 2.76, "grad_norm": 0.7664865255355835, "learning_rate": 9.161926355354022e-06, "loss": 3.0326, "step": 56387 }, { "epoch": 2.76, "grad_norm": 0.7669900059700012, "learning_rate": 9.158150845578971e-06, "loss": 2.8375, "step": 56388 }, { "epoch": 2.76, "grad_norm": 0.7463096976280212, "learning_rate": 9.154376101825333e-06, "loss": 2.9926, "step": 56389 }, { "epoch": 2.76, "grad_norm": 0.7658016085624695, "learning_rate": 9.1506021241031e-06, "loss": 2.7995, "step": 56390 }, { "epoch": 2.76, "grad_norm": 0.7736062407493591, "learning_rate": 9.146828912422199e-06, "loss": 2.858, "step": 56391 }, { "epoch": 2.76, "grad_norm": 0.7192699313163757, "learning_rate": 9.14305646679262e-06, "loss": 2.8394, "step": 56392 }, { "epoch": 2.76, "grad_norm": 0.7871600985527039, "learning_rate": 9.139284787224254e-06, "loss": 2.9598, "step": 56393 }, { "epoch": 2.76, "grad_norm": 0.7646672129631042, "learning_rate": 9.135513873726996e-06, "loss": 2.9829, "step": 56394 }, { "epoch": 2.76, "grad_norm": 0.7649540305137634, "learning_rate": 9.131743726310835e-06, "loss": 3.1376, "step": 56395 }, { "epoch": 2.76, "grad_norm": 0.7846724987030029, "learning_rate": 9.127974344985634e-06, "loss": 2.955, "step": 56396 }, { "epoch": 2.76, "grad_norm": 0.7587399482727051, "learning_rate": 9.124205729761414e-06, "loss": 2.7941, "step": 56397 }, { "epoch": 2.76, "grad_norm": 0.7872162461280823, "learning_rate": 9.120437880648068e-06, "loss": 2.9246, "step": 56398 }, { "epoch": 2.76, "grad_norm": 0.7765682339668274, "learning_rate": 9.116670797655456e-06, "loss": 2.6701, "step": 56399 }, { "epoch": 2.76, "grad_norm": 0.7208048701286316, "learning_rate": 9.112904480793604e-06, "loss": 2.915, "step": 56400 }, { "epoch": 2.76, "grad_norm": 0.7376406192779541, "learning_rate": 9.109138930072401e-06, "loss": 3.0416, "step": 56401 }, { "epoch": 2.76, "grad_norm": 0.7587045431137085, "learning_rate": 9.105374145501676e-06, "loss": 3.0432, "step": 56402 }, { "epoch": 2.76, "grad_norm": 0.7954713106155396, "learning_rate": 9.101610127091453e-06, "loss": 2.8843, "step": 56403 }, { "epoch": 2.76, "grad_norm": 0.783104419708252, "learning_rate": 9.097846874851588e-06, "loss": 3.0071, "step": 56404 }, { "epoch": 2.76, "grad_norm": 0.7557005286216736, "learning_rate": 9.094084388792012e-06, "loss": 2.8479, "step": 56405 }, { "epoch": 2.76, "grad_norm": 0.738347053527832, "learning_rate": 9.090322668922612e-06, "loss": 3.0269, "step": 56406 }, { "epoch": 2.76, "grad_norm": 0.7137772440910339, "learning_rate": 9.086561715253349e-06, "loss": 2.7575, "step": 56407 }, { "epoch": 2.76, "grad_norm": 0.7108136415481567, "learning_rate": 9.082801527794115e-06, "loss": 3.1663, "step": 56408 }, { "epoch": 2.76, "grad_norm": 0.773616373538971, "learning_rate": 9.079042106554734e-06, "loss": 2.8878, "step": 56409 }, { "epoch": 2.76, "grad_norm": 0.7863094806671143, "learning_rate": 9.075283451545202e-06, "loss": 3.0319, "step": 56410 }, { "epoch": 2.76, "grad_norm": 0.7484992146492004, "learning_rate": 9.071525562775373e-06, "loss": 2.9823, "step": 56411 }, { "epoch": 2.76, "grad_norm": 0.7688180804252625, "learning_rate": 9.067768440255141e-06, "loss": 2.727, "step": 56412 }, { "epoch": 2.76, "grad_norm": 0.7612563967704773, "learning_rate": 9.064012083994433e-06, "loss": 2.9456, "step": 56413 }, { "epoch": 2.76, "grad_norm": 0.7126185297966003, "learning_rate": 9.060256494003105e-06, "loss": 2.7921, "step": 56414 }, { "epoch": 2.76, "grad_norm": 0.7291039824485779, "learning_rate": 9.056501670291149e-06, "loss": 2.8641, "step": 56415 }, { "epoch": 2.76, "grad_norm": 0.7190093398094177, "learning_rate": 9.052747612868294e-06, "loss": 3.0766, "step": 56416 }, { "epoch": 2.76, "grad_norm": 0.7553203701972961, "learning_rate": 9.04899432174453e-06, "loss": 2.7905, "step": 56417 }, { "epoch": 2.76, "grad_norm": 0.7836436629295349, "learning_rate": 9.045241796929713e-06, "loss": 2.6367, "step": 56418 }, { "epoch": 2.76, "grad_norm": 0.7655666470527649, "learning_rate": 9.041490038433741e-06, "loss": 2.9534, "step": 56419 }, { "epoch": 2.77, "grad_norm": 0.7657229900360107, "learning_rate": 9.037739046266501e-06, "loss": 3.0535, "step": 56420 }, { "epoch": 2.77, "grad_norm": 0.7775158882141113, "learning_rate": 9.033988820437821e-06, "loss": 2.8456, "step": 56421 }, { "epoch": 2.77, "grad_norm": 0.773569643497467, "learning_rate": 9.030239360957658e-06, "loss": 2.8731, "step": 56422 }, { "epoch": 2.77, "grad_norm": 0.7964552640914917, "learning_rate": 9.02649066783584e-06, "loss": 2.8055, "step": 56423 }, { "epoch": 2.77, "grad_norm": 0.7493492960929871, "learning_rate": 9.022742741082223e-06, "loss": 2.7148, "step": 56424 }, { "epoch": 2.77, "grad_norm": 0.7704905867576599, "learning_rate": 9.018995580706733e-06, "loss": 2.834, "step": 56425 }, { "epoch": 2.77, "grad_norm": 0.773236095905304, "learning_rate": 9.015249186719164e-06, "loss": 2.7859, "step": 56426 }, { "epoch": 2.77, "grad_norm": 0.7138835787773132, "learning_rate": 9.011503559129474e-06, "loss": 3.0637, "step": 56427 }, { "epoch": 2.77, "grad_norm": 0.7717339396476746, "learning_rate": 9.007758697947453e-06, "loss": 3.0914, "step": 56428 }, { "epoch": 2.77, "grad_norm": 0.7018489837646484, "learning_rate": 9.004014603182996e-06, "loss": 2.8645, "step": 56429 }, { "epoch": 2.77, "grad_norm": 0.7404462099075317, "learning_rate": 9.000271274845994e-06, "loss": 2.8071, "step": 56430 }, { "epoch": 2.77, "grad_norm": 0.74892258644104, "learning_rate": 8.996528712946238e-06, "loss": 3.0029, "step": 56431 }, { "epoch": 2.77, "grad_norm": 0.8299066424369812, "learning_rate": 8.992786917493622e-06, "loss": 2.8981, "step": 56432 }, { "epoch": 2.77, "grad_norm": 0.7275087237358093, "learning_rate": 8.989045888498003e-06, "loss": 2.8402, "step": 56433 }, { "epoch": 2.77, "grad_norm": 0.73467618227005, "learning_rate": 8.985305625969209e-06, "loss": 2.5677, "step": 56434 }, { "epoch": 2.77, "grad_norm": 0.7989902496337891, "learning_rate": 8.981566129917162e-06, "loss": 2.9348, "step": 56435 }, { "epoch": 2.77, "grad_norm": 0.7334526777267456, "learning_rate": 8.977827400351623e-06, "loss": 2.8775, "step": 56436 }, { "epoch": 2.77, "grad_norm": 0.7814991474151611, "learning_rate": 8.974089437282517e-06, "loss": 3.0359, "step": 56437 }, { "epoch": 2.77, "grad_norm": 0.7895364165306091, "learning_rate": 8.970352240719637e-06, "loss": 3.0237, "step": 56438 }, { "epoch": 2.77, "grad_norm": 0.7760238647460938, "learning_rate": 8.966615810672805e-06, "loss": 2.7975, "step": 56439 }, { "epoch": 2.77, "grad_norm": 0.795097827911377, "learning_rate": 8.962880147151951e-06, "loss": 2.7911, "step": 56440 }, { "epoch": 2.77, "grad_norm": 0.7412228584289551, "learning_rate": 8.959145250166799e-06, "loss": 2.7482, "step": 56441 }, { "epoch": 2.77, "grad_norm": 0.7412148118019104, "learning_rate": 8.955411119727307e-06, "loss": 2.8579, "step": 56442 }, { "epoch": 2.77, "grad_norm": 0.740685224533081, "learning_rate": 8.951677755843201e-06, "loss": 2.8466, "step": 56443 }, { "epoch": 2.77, "grad_norm": 0.7448912262916565, "learning_rate": 8.947945158524372e-06, "loss": 2.7933, "step": 56444 }, { "epoch": 2.77, "grad_norm": 0.7429138422012329, "learning_rate": 8.94421332778068e-06, "loss": 2.8862, "step": 56445 }, { "epoch": 2.77, "grad_norm": 0.7522229552268982, "learning_rate": 8.940482263621884e-06, "loss": 2.8816, "step": 56446 }, { "epoch": 2.77, "grad_norm": 0.7623513340950012, "learning_rate": 8.936751966057842e-06, "loss": 2.8082, "step": 56447 }, { "epoch": 2.77, "grad_norm": 0.7645907402038574, "learning_rate": 8.93302243509838e-06, "loss": 2.9118, "step": 56448 }, { "epoch": 2.77, "grad_norm": 0.7380853891372681, "learning_rate": 8.92929367075329e-06, "loss": 2.8021, "step": 56449 }, { "epoch": 2.77, "grad_norm": 0.714244544506073, "learning_rate": 8.925565673032465e-06, "loss": 3.1272, "step": 56450 }, { "epoch": 2.77, "grad_norm": 0.7148142457008362, "learning_rate": 8.921838441945695e-06, "loss": 2.8177, "step": 56451 }, { "epoch": 2.77, "grad_norm": 0.7204702496528625, "learning_rate": 8.918111977502772e-06, "loss": 2.8349, "step": 56452 }, { "epoch": 2.77, "grad_norm": 0.7750880718231201, "learning_rate": 8.914386279713492e-06, "loss": 3.0569, "step": 56453 }, { "epoch": 2.77, "grad_norm": 0.7485977411270142, "learning_rate": 8.910661348587711e-06, "loss": 2.8497, "step": 56454 }, { "epoch": 2.77, "grad_norm": 0.7468576431274414, "learning_rate": 8.90693718413522e-06, "loss": 2.9756, "step": 56455 }, { "epoch": 2.77, "grad_norm": 0.7802996635437012, "learning_rate": 8.903213786365847e-06, "loss": 2.9238, "step": 56456 }, { "epoch": 2.77, "grad_norm": 0.7729799747467041, "learning_rate": 8.899491155289385e-06, "loss": 2.8883, "step": 56457 }, { "epoch": 2.77, "grad_norm": 0.7460412383079529, "learning_rate": 8.895769290915621e-06, "loss": 2.7785, "step": 56458 }, { "epoch": 2.77, "grad_norm": 0.8159653544425964, "learning_rate": 8.89204819325442e-06, "loss": 2.9526, "step": 56459 }, { "epoch": 2.77, "grad_norm": 0.7661879658699036, "learning_rate": 8.888327862315536e-06, "loss": 2.9368, "step": 56460 }, { "epoch": 2.77, "grad_norm": 0.769864022731781, "learning_rate": 8.884608298108731e-06, "loss": 2.9032, "step": 56461 }, { "epoch": 2.77, "grad_norm": 0.7339568734169006, "learning_rate": 8.880889500643861e-06, "loss": 2.8907, "step": 56462 }, { "epoch": 2.77, "grad_norm": 0.7468345761299133, "learning_rate": 8.877171469930689e-06, "loss": 2.7929, "step": 56463 }, { "epoch": 2.77, "grad_norm": 0.7140334844589233, "learning_rate": 8.873454205979002e-06, "loss": 2.695, "step": 56464 }, { "epoch": 2.77, "grad_norm": 0.7177031636238098, "learning_rate": 8.869737708798663e-06, "loss": 2.9442, "step": 56465 }, { "epoch": 2.77, "grad_norm": 0.7148962616920471, "learning_rate": 8.866021978399396e-06, "loss": 2.8238, "step": 56466 }, { "epoch": 2.77, "grad_norm": 0.771285891532898, "learning_rate": 8.862307014790992e-06, "loss": 2.9251, "step": 56467 }, { "epoch": 2.77, "grad_norm": 0.7293102741241455, "learning_rate": 8.858592817983212e-06, "loss": 3.0754, "step": 56468 }, { "epoch": 2.77, "grad_norm": 0.776573657989502, "learning_rate": 8.85487938798588e-06, "loss": 2.9431, "step": 56469 }, { "epoch": 2.77, "grad_norm": 0.7331776022911072, "learning_rate": 8.85116672480879e-06, "loss": 3.0407, "step": 56470 }, { "epoch": 2.77, "grad_norm": 0.7505592703819275, "learning_rate": 8.847454828461632e-06, "loss": 2.9197, "step": 56471 }, { "epoch": 2.77, "grad_norm": 0.7655132412910461, "learning_rate": 8.843743698954297e-06, "loss": 3.0714, "step": 56472 }, { "epoch": 2.77, "grad_norm": 0.7211343050003052, "learning_rate": 8.840033336296516e-06, "loss": 2.8666, "step": 56473 }, { "epoch": 2.77, "grad_norm": 0.7068425416946411, "learning_rate": 8.836323740498009e-06, "loss": 2.8191, "step": 56474 }, { "epoch": 2.77, "grad_norm": 0.6717082262039185, "learning_rate": 8.832614911568636e-06, "loss": 2.8159, "step": 56475 }, { "epoch": 2.77, "grad_norm": 0.7441225051879883, "learning_rate": 8.828906849518092e-06, "loss": 2.8976, "step": 56476 }, { "epoch": 2.77, "grad_norm": 0.7472232580184937, "learning_rate": 8.825199554356166e-06, "loss": 2.7381, "step": 56477 }, { "epoch": 2.77, "grad_norm": 0.7638667225837708, "learning_rate": 8.821493026092619e-06, "loss": 2.8284, "step": 56478 }, { "epoch": 2.77, "grad_norm": 0.7200614213943481, "learning_rate": 8.817787264737242e-06, "loss": 2.9348, "step": 56479 }, { "epoch": 2.77, "grad_norm": 0.7430014610290527, "learning_rate": 8.81408227029976e-06, "loss": 2.9065, "step": 56480 }, { "epoch": 2.77, "grad_norm": 0.7274750471115112, "learning_rate": 8.810378042789968e-06, "loss": 2.7577, "step": 56481 }, { "epoch": 2.77, "grad_norm": 0.7740485072135925, "learning_rate": 8.80667458221762e-06, "loss": 2.9349, "step": 56482 }, { "epoch": 2.77, "grad_norm": 0.7242769598960876, "learning_rate": 8.802971888592381e-06, "loss": 2.9105, "step": 56483 }, { "epoch": 2.77, "grad_norm": 0.7862998843193054, "learning_rate": 8.799269961924104e-06, "loss": 2.8458, "step": 56484 }, { "epoch": 2.77, "grad_norm": 0.7596186399459839, "learning_rate": 8.79556880222252e-06, "loss": 2.8756, "step": 56485 }, { "epoch": 2.77, "grad_norm": 0.7429347634315491, "learning_rate": 8.791868409497316e-06, "loss": 3.0002, "step": 56486 }, { "epoch": 2.77, "grad_norm": 0.7703331708908081, "learning_rate": 8.788168783758354e-06, "loss": 2.9832, "step": 56487 }, { "epoch": 2.77, "grad_norm": 0.7370572090148926, "learning_rate": 8.784469925015292e-06, "loss": 2.7631, "step": 56488 }, { "epoch": 2.77, "grad_norm": 0.7434906959533691, "learning_rate": 8.780771833277855e-06, "loss": 2.8704, "step": 56489 }, { "epoch": 2.77, "grad_norm": 0.7217363119125366, "learning_rate": 8.777074508555871e-06, "loss": 2.8909, "step": 56490 }, { "epoch": 2.77, "grad_norm": 0.7547829747200012, "learning_rate": 8.773377950858995e-06, "loss": 2.8094, "step": 56491 }, { "epoch": 2.77, "grad_norm": 0.7625556588172913, "learning_rate": 8.769682160196989e-06, "loss": 2.9916, "step": 56492 }, { "epoch": 2.77, "grad_norm": 0.7367002964019775, "learning_rate": 8.765987136579577e-06, "loss": 2.7937, "step": 56493 }, { "epoch": 2.77, "grad_norm": 0.7374213337898254, "learning_rate": 8.762292880016553e-06, "loss": 2.9097, "step": 56494 }, { "epoch": 2.77, "grad_norm": 0.8057516813278198, "learning_rate": 8.758599390517607e-06, "loss": 2.7651, "step": 56495 }, { "epoch": 2.77, "grad_norm": 0.7465009093284607, "learning_rate": 8.754906668092398e-06, "loss": 2.9281, "step": 56496 }, { "epoch": 2.77, "grad_norm": 0.7300065755844116, "learning_rate": 8.751214712750754e-06, "loss": 2.9257, "step": 56497 }, { "epoch": 2.77, "grad_norm": 0.7591533064842224, "learning_rate": 8.747523524502332e-06, "loss": 3.0157, "step": 56498 }, { "epoch": 2.77, "grad_norm": 0.7155272960662842, "learning_rate": 8.743833103356923e-06, "loss": 2.881, "step": 56499 }, { "epoch": 2.77, "grad_norm": 0.7301709651947021, "learning_rate": 8.740143449324155e-06, "loss": 3.1471, "step": 56500 }, { "epoch": 2.77, "grad_norm": 0.7165641784667969, "learning_rate": 8.736454562413786e-06, "loss": 2.7683, "step": 56501 }, { "epoch": 2.77, "grad_norm": 0.7362425923347473, "learning_rate": 8.732766442635607e-06, "loss": 2.9066, "step": 56502 }, { "epoch": 2.77, "grad_norm": 0.7725816965103149, "learning_rate": 8.729079089999246e-06, "loss": 2.9066, "step": 56503 }, { "epoch": 2.77, "grad_norm": 0.7778238654136658, "learning_rate": 8.725392504514429e-06, "loss": 2.8673, "step": 56504 }, { "epoch": 2.77, "grad_norm": 0.7198028564453125, "learning_rate": 8.721706686190843e-06, "loss": 2.9211, "step": 56505 }, { "epoch": 2.77, "grad_norm": 0.7817999720573425, "learning_rate": 8.71802163503822e-06, "loss": 2.8808, "step": 56506 }, { "epoch": 2.77, "grad_norm": 0.7423765063285828, "learning_rate": 8.714337351066314e-06, "loss": 2.7144, "step": 56507 }, { "epoch": 2.77, "grad_norm": 0.7737480998039246, "learning_rate": 8.710653834284753e-06, "loss": 2.9229, "step": 56508 }, { "epoch": 2.77, "grad_norm": 0.7494272589683533, "learning_rate": 8.706971084703295e-06, "loss": 2.9935, "step": 56509 }, { "epoch": 2.77, "grad_norm": 0.7207717895507812, "learning_rate": 8.7032891023316e-06, "loss": 3.0677, "step": 56510 }, { "epoch": 2.77, "grad_norm": 0.7812270522117615, "learning_rate": 8.699607887179361e-06, "loss": 2.7982, "step": 56511 }, { "epoch": 2.77, "grad_norm": 0.8005214929580688, "learning_rate": 8.695927439256334e-06, "loss": 2.6689, "step": 56512 }, { "epoch": 2.77, "grad_norm": 0.7496790885925293, "learning_rate": 8.692247758572146e-06, "loss": 2.8616, "step": 56513 }, { "epoch": 2.77, "grad_norm": 0.7499210238456726, "learning_rate": 8.688568845136523e-06, "loss": 2.803, "step": 56514 }, { "epoch": 2.77, "grad_norm": 0.7113456130027771, "learning_rate": 8.684890698959125e-06, "loss": 2.8449, "step": 56515 }, { "epoch": 2.77, "grad_norm": 0.7013689279556274, "learning_rate": 8.681213320049674e-06, "loss": 2.8038, "step": 56516 }, { "epoch": 2.77, "grad_norm": 0.7134240865707397, "learning_rate": 8.677536708417865e-06, "loss": 2.8203, "step": 56517 }, { "epoch": 2.77, "grad_norm": 0.7750184535980225, "learning_rate": 8.673860864073323e-06, "loss": 2.7887, "step": 56518 }, { "epoch": 2.77, "grad_norm": 0.7498697638511658, "learning_rate": 8.670185787025807e-06, "loss": 3.0101, "step": 56519 }, { "epoch": 2.77, "grad_norm": 0.7713249921798706, "learning_rate": 8.66651147728491e-06, "loss": 2.7569, "step": 56520 }, { "epoch": 2.77, "grad_norm": 0.7590934038162231, "learning_rate": 8.662837934860322e-06, "loss": 2.8114, "step": 56521 }, { "epoch": 2.77, "grad_norm": 0.729499101638794, "learning_rate": 8.659165159761838e-06, "loss": 2.8747, "step": 56522 }, { "epoch": 2.77, "grad_norm": 0.7570987939834595, "learning_rate": 8.655493151998983e-06, "loss": 3.1679, "step": 56523 }, { "epoch": 2.77, "grad_norm": 0.7177196145057678, "learning_rate": 8.651821911581513e-06, "loss": 2.9119, "step": 56524 }, { "epoch": 2.77, "grad_norm": 0.7343611121177673, "learning_rate": 8.648151438519091e-06, "loss": 3.0303, "step": 56525 }, { "epoch": 2.77, "grad_norm": 0.7677000164985657, "learning_rate": 8.644481732821306e-06, "loss": 2.9359, "step": 56526 }, { "epoch": 2.77, "grad_norm": 0.7537042498588562, "learning_rate": 8.640812794497953e-06, "loss": 2.7783, "step": 56527 }, { "epoch": 2.77, "grad_norm": 0.7486345171928406, "learning_rate": 8.637144623558557e-06, "loss": 2.7331, "step": 56528 }, { "epoch": 2.77, "grad_norm": 0.7860642671585083, "learning_rate": 8.633477220012908e-06, "loss": 2.9114, "step": 56529 }, { "epoch": 2.77, "grad_norm": 0.7259573340415955, "learning_rate": 8.629810583870567e-06, "loss": 2.9674, "step": 56530 }, { "epoch": 2.77, "grad_norm": 0.7208335995674133, "learning_rate": 8.62614471514126e-06, "loss": 2.7895, "step": 56531 }, { "epoch": 2.77, "grad_norm": 0.7486009001731873, "learning_rate": 8.62247961383461e-06, "loss": 2.948, "step": 56532 }, { "epoch": 2.77, "grad_norm": 0.7948107719421387, "learning_rate": 8.618815279960244e-06, "loss": 2.8621, "step": 56533 }, { "epoch": 2.77, "grad_norm": 0.7149216532707214, "learning_rate": 8.615151713527857e-06, "loss": 2.8832, "step": 56534 }, { "epoch": 2.77, "grad_norm": 0.7751069068908691, "learning_rate": 8.611488914547071e-06, "loss": 2.9773, "step": 56535 }, { "epoch": 2.77, "grad_norm": 0.7153540849685669, "learning_rate": 8.607826883027547e-06, "loss": 2.687, "step": 56536 }, { "epoch": 2.77, "grad_norm": 0.7932222485542297, "learning_rate": 8.604165618978942e-06, "loss": 3.1045, "step": 56537 }, { "epoch": 2.77, "grad_norm": 0.7211452126502991, "learning_rate": 8.60050512241085e-06, "loss": 2.8193, "step": 56538 }, { "epoch": 2.77, "grad_norm": 0.7320334911346436, "learning_rate": 8.596845393332997e-06, "loss": 2.969, "step": 56539 }, { "epoch": 2.77, "grad_norm": 0.7381606101989746, "learning_rate": 8.593186431754973e-06, "loss": 2.72, "step": 56540 }, { "epoch": 2.77, "grad_norm": 0.7109737992286682, "learning_rate": 8.589528237686372e-06, "loss": 2.9596, "step": 56541 }, { "epoch": 2.77, "grad_norm": 0.7556430101394653, "learning_rate": 8.585870811136885e-06, "loss": 3.0741, "step": 56542 }, { "epoch": 2.77, "grad_norm": 0.7720571160316467, "learning_rate": 8.58221415211614e-06, "loss": 3.1015, "step": 56543 }, { "epoch": 2.77, "grad_norm": 0.7691468000411987, "learning_rate": 8.57855826063376e-06, "loss": 2.952, "step": 56544 }, { "epoch": 2.77, "grad_norm": 0.8022409677505493, "learning_rate": 8.57490313669934e-06, "loss": 2.8529, "step": 56545 }, { "epoch": 2.77, "grad_norm": 0.7315953969955444, "learning_rate": 8.57124878032257e-06, "loss": 2.7401, "step": 56546 }, { "epoch": 2.77, "grad_norm": 0.7592272162437439, "learning_rate": 8.567595191513044e-06, "loss": 2.9078, "step": 56547 }, { "epoch": 2.77, "grad_norm": 0.7328471541404724, "learning_rate": 8.563942370280386e-06, "loss": 2.691, "step": 56548 }, { "epoch": 2.77, "grad_norm": 0.7198687195777893, "learning_rate": 8.560290316634222e-06, "loss": 2.7965, "step": 56549 }, { "epoch": 2.77, "grad_norm": 0.7822607159614563, "learning_rate": 8.556639030584112e-06, "loss": 2.9445, "step": 56550 }, { "epoch": 2.77, "grad_norm": 0.789264976978302, "learning_rate": 8.552988512139747e-06, "loss": 2.85, "step": 56551 }, { "epoch": 2.77, "grad_norm": 0.7752166390419006, "learning_rate": 8.549338761310753e-06, "loss": 2.541, "step": 56552 }, { "epoch": 2.77, "grad_norm": 0.7067755460739136, "learning_rate": 8.545689778106724e-06, "loss": 2.9171, "step": 56553 }, { "epoch": 2.77, "grad_norm": 0.7491674423217773, "learning_rate": 8.54204156253725e-06, "loss": 2.7476, "step": 56554 }, { "epoch": 2.77, "grad_norm": 0.8012225031852722, "learning_rate": 8.53839411461189e-06, "loss": 2.9535, "step": 56555 }, { "epoch": 2.77, "grad_norm": 0.7741601467132568, "learning_rate": 8.534747434340306e-06, "loss": 2.8989, "step": 56556 }, { "epoch": 2.77, "grad_norm": 0.735129714012146, "learning_rate": 8.531101521732153e-06, "loss": 2.8754, "step": 56557 }, { "epoch": 2.77, "grad_norm": 0.730719268321991, "learning_rate": 8.527456376796959e-06, "loss": 2.8079, "step": 56558 }, { "epoch": 2.77, "grad_norm": 0.8632533550262451, "learning_rate": 8.523811999544383e-06, "loss": 2.8242, "step": 56559 }, { "epoch": 2.77, "grad_norm": 0.7945080399513245, "learning_rate": 8.52016838998395e-06, "loss": 2.7674, "step": 56560 }, { "epoch": 2.77, "grad_norm": 0.7971088290214539, "learning_rate": 8.516525548125319e-06, "loss": 2.7071, "step": 56561 }, { "epoch": 2.77, "grad_norm": 0.7828199863433838, "learning_rate": 8.512883473978083e-06, "loss": 2.7697, "step": 56562 }, { "epoch": 2.77, "grad_norm": 0.7509476542472839, "learning_rate": 8.509242167551767e-06, "loss": 2.7219, "step": 56563 }, { "epoch": 2.77, "grad_norm": 0.7151806950569153, "learning_rate": 8.505601628856062e-06, "loss": 3.0501, "step": 56564 }, { "epoch": 2.77, "grad_norm": 0.7569729089736938, "learning_rate": 8.50196185790043e-06, "loss": 2.9743, "step": 56565 }, { "epoch": 2.77, "grad_norm": 0.7372108101844788, "learning_rate": 8.498322854694562e-06, "loss": 2.8322, "step": 56566 }, { "epoch": 2.77, "grad_norm": 0.7561090588569641, "learning_rate": 8.494684619248049e-06, "loss": 2.9245, "step": 56567 }, { "epoch": 2.77, "grad_norm": 0.7554269433021545, "learning_rate": 8.491047151570418e-06, "loss": 2.9634, "step": 56568 }, { "epoch": 2.77, "grad_norm": 0.7970679402351379, "learning_rate": 8.487410451671261e-06, "loss": 2.8352, "step": 56569 }, { "epoch": 2.77, "grad_norm": 0.7189784049987793, "learning_rate": 8.483774519560137e-06, "loss": 2.9002, "step": 56570 }, { "epoch": 2.77, "grad_norm": 0.768104612827301, "learning_rate": 8.480139355246674e-06, "loss": 3.1014, "step": 56571 }, { "epoch": 2.77, "grad_norm": 0.7429171800613403, "learning_rate": 8.476504958740427e-06, "loss": 2.9454, "step": 56572 }, { "epoch": 2.77, "grad_norm": 0.7118484973907471, "learning_rate": 8.472871330050923e-06, "loss": 3.0559, "step": 56573 }, { "epoch": 2.77, "grad_norm": 0.7167723178863525, "learning_rate": 8.46923846918779e-06, "loss": 2.8801, "step": 56574 }, { "epoch": 2.77, "grad_norm": 0.749420702457428, "learning_rate": 8.465606376160617e-06, "loss": 2.8208, "step": 56575 }, { "epoch": 2.77, "grad_norm": 0.8014590740203857, "learning_rate": 8.461975050978898e-06, "loss": 2.9306, "step": 56576 }, { "epoch": 2.77, "grad_norm": 0.855155348777771, "learning_rate": 8.458344493652225e-06, "loss": 2.9504, "step": 56577 }, { "epoch": 2.77, "grad_norm": 0.7464328408241272, "learning_rate": 8.454714704190125e-06, "loss": 2.8316, "step": 56578 }, { "epoch": 2.77, "grad_norm": 0.7837406992912292, "learning_rate": 8.451085682602255e-06, "loss": 2.8898, "step": 56579 }, { "epoch": 2.77, "grad_norm": 0.7430059313774109, "learning_rate": 8.447457428898108e-06, "loss": 2.8781, "step": 56580 }, { "epoch": 2.77, "grad_norm": 0.7241303324699402, "learning_rate": 8.443829943087244e-06, "loss": 3.1459, "step": 56581 }, { "epoch": 2.77, "grad_norm": 0.7481164932250977, "learning_rate": 8.440203225179187e-06, "loss": 2.721, "step": 56582 }, { "epoch": 2.77, "grad_norm": 0.766409158706665, "learning_rate": 8.436577275183564e-06, "loss": 3.0709, "step": 56583 }, { "epoch": 2.77, "grad_norm": 0.7170672416687012, "learning_rate": 8.432952093109901e-06, "loss": 3.1805, "step": 56584 }, { "epoch": 2.77, "grad_norm": 0.7190765142440796, "learning_rate": 8.429327678967656e-06, "loss": 2.8354, "step": 56585 }, { "epoch": 2.77, "grad_norm": 0.783243715763092, "learning_rate": 8.425704032766523e-06, "loss": 2.836, "step": 56586 }, { "epoch": 2.77, "grad_norm": 0.716397762298584, "learning_rate": 8.422081154515925e-06, "loss": 2.8644, "step": 56587 }, { "epoch": 2.77, "grad_norm": 0.766613781452179, "learning_rate": 8.418459044225456e-06, "loss": 2.8583, "step": 56588 }, { "epoch": 2.77, "grad_norm": 0.7538684606552124, "learning_rate": 8.414837701904642e-06, "loss": 2.8837, "step": 56589 }, { "epoch": 2.77, "grad_norm": 0.7688279151916504, "learning_rate": 8.411217127563075e-06, "loss": 2.9859, "step": 56590 }, { "epoch": 2.77, "grad_norm": 0.8095577359199524, "learning_rate": 8.407597321210214e-06, "loss": 2.7778, "step": 56591 }, { "epoch": 2.77, "grad_norm": 0.7337608337402344, "learning_rate": 8.403978282855617e-06, "loss": 2.8053, "step": 56592 }, { "epoch": 2.77, "grad_norm": 0.7341989278793335, "learning_rate": 8.40036001250881e-06, "loss": 2.9954, "step": 56593 }, { "epoch": 2.77, "grad_norm": 0.7581641674041748, "learning_rate": 8.396742510179388e-06, "loss": 3.1145, "step": 56594 }, { "epoch": 2.77, "grad_norm": 0.7279971241950989, "learning_rate": 8.393125775876775e-06, "loss": 2.8688, "step": 56595 }, { "epoch": 2.77, "grad_norm": 0.7769571542739868, "learning_rate": 8.389509809610562e-06, "loss": 2.6323, "step": 56596 }, { "epoch": 2.77, "grad_norm": 0.7689619660377502, "learning_rate": 8.385894611390276e-06, "loss": 2.814, "step": 56597 }, { "epoch": 2.77, "grad_norm": 0.7330824732780457, "learning_rate": 8.38228018122541e-06, "loss": 2.7816, "step": 56598 }, { "epoch": 2.77, "grad_norm": 0.7108086943626404, "learning_rate": 8.378666519125487e-06, "loss": 2.8276, "step": 56599 }, { "epoch": 2.77, "grad_norm": 0.7666386365890503, "learning_rate": 8.375053625100037e-06, "loss": 2.9543, "step": 56600 }, { "epoch": 2.77, "grad_norm": 0.7531747817993164, "learning_rate": 8.371441499158582e-06, "loss": 3.0132, "step": 56601 }, { "epoch": 2.77, "grad_norm": 0.7575362920761108, "learning_rate": 8.367830141310583e-06, "loss": 3.0232, "step": 56602 }, { "epoch": 2.77, "grad_norm": 0.741112232208252, "learning_rate": 8.364219551565631e-06, "loss": 2.7388, "step": 56603 }, { "epoch": 2.77, "grad_norm": 0.7264926433563232, "learning_rate": 8.360609729933187e-06, "loss": 2.987, "step": 56604 }, { "epoch": 2.77, "grad_norm": 0.8191945552825928, "learning_rate": 8.357000676422808e-06, "loss": 2.8198, "step": 56605 }, { "epoch": 2.77, "grad_norm": 0.8269351720809937, "learning_rate": 8.353392391043922e-06, "loss": 2.8031, "step": 56606 }, { "epoch": 2.77, "grad_norm": 0.7800099849700928, "learning_rate": 8.349784873806087e-06, "loss": 2.8467, "step": 56607 }, { "epoch": 2.77, "grad_norm": 0.7215859293937683, "learning_rate": 8.34617812471876e-06, "loss": 2.7812, "step": 56608 }, { "epoch": 2.77, "grad_norm": 0.8046889901161194, "learning_rate": 8.342572143791504e-06, "loss": 2.8322, "step": 56609 }, { "epoch": 2.77, "grad_norm": 0.753553569316864, "learning_rate": 8.338966931033741e-06, "loss": 2.8108, "step": 56610 }, { "epoch": 2.77, "grad_norm": 0.7166416049003601, "learning_rate": 8.335362486455066e-06, "loss": 2.8806, "step": 56611 }, { "epoch": 2.77, "grad_norm": 0.7240424156188965, "learning_rate": 8.331758810064903e-06, "loss": 3.0349, "step": 56612 }, { "epoch": 2.77, "grad_norm": 0.7516683340072632, "learning_rate": 8.328155901872713e-06, "loss": 2.9413, "step": 56613 }, { "epoch": 2.77, "grad_norm": 0.7496482729911804, "learning_rate": 8.324553761888053e-06, "loss": 2.8857, "step": 56614 }, { "epoch": 2.77, "grad_norm": 0.7219851016998291, "learning_rate": 8.320952390120383e-06, "loss": 2.9247, "step": 56615 }, { "epoch": 2.77, "grad_norm": 0.7659447193145752, "learning_rate": 8.317351786579196e-06, "loss": 2.9269, "step": 56616 }, { "epoch": 2.77, "grad_norm": 0.7846766114234924, "learning_rate": 8.31375195127395e-06, "loss": 2.7706, "step": 56617 }, { "epoch": 2.77, "grad_norm": 0.8673431277275085, "learning_rate": 8.310152884214173e-06, "loss": 3.0868, "step": 56618 }, { "epoch": 2.77, "grad_norm": 0.8626456260681152, "learning_rate": 8.30655458540932e-06, "loss": 2.8726, "step": 56619 }, { "epoch": 2.77, "grad_norm": 0.7804566621780396, "learning_rate": 8.30295705486882e-06, "loss": 2.7799, "step": 56620 }, { "epoch": 2.77, "grad_norm": 0.8007548451423645, "learning_rate": 8.29936029260223e-06, "loss": 2.9062, "step": 56621 }, { "epoch": 2.77, "grad_norm": 0.7293591499328613, "learning_rate": 8.295764298618946e-06, "loss": 2.8577, "step": 56622 }, { "epoch": 2.77, "grad_norm": 0.8285486698150635, "learning_rate": 8.29216907292849e-06, "loss": 2.9366, "step": 56623 }, { "epoch": 2.78, "grad_norm": 0.7310425043106079, "learning_rate": 8.288574615540355e-06, "loss": 2.931, "step": 56624 }, { "epoch": 2.78, "grad_norm": 0.7126893401145935, "learning_rate": 8.284980926463936e-06, "loss": 2.839, "step": 56625 }, { "epoch": 2.78, "grad_norm": 0.7340129017829895, "learning_rate": 8.28138800570879e-06, "loss": 2.925, "step": 56626 }, { "epoch": 2.78, "grad_norm": 0.7461667060852051, "learning_rate": 8.277795853284308e-06, "loss": 2.7116, "step": 56627 }, { "epoch": 2.78, "grad_norm": 0.7524672150611877, "learning_rate": 8.274204469199919e-06, "loss": 3.0097, "step": 56628 }, { "epoch": 2.78, "grad_norm": 0.7391806244850159, "learning_rate": 8.27061385346518e-06, "loss": 2.9233, "step": 56629 }, { "epoch": 2.78, "grad_norm": 0.7427461743354797, "learning_rate": 8.267024006089484e-06, "loss": 2.8764, "step": 56630 }, { "epoch": 2.78, "grad_norm": 0.7367878556251526, "learning_rate": 8.263434927082291e-06, "loss": 2.9975, "step": 56631 }, { "epoch": 2.78, "grad_norm": 0.7743518948554993, "learning_rate": 8.259846616453058e-06, "loss": 2.8299, "step": 56632 }, { "epoch": 2.78, "grad_norm": 0.9217862486839294, "learning_rate": 8.25625907421128e-06, "loss": 2.9932, "step": 56633 }, { "epoch": 2.78, "grad_norm": 0.7349005937576294, "learning_rate": 8.25267230036638e-06, "loss": 2.9098, "step": 56634 }, { "epoch": 2.78, "grad_norm": 0.7806947827339172, "learning_rate": 8.24908629492772e-06, "loss": 2.6354, "step": 56635 }, { "epoch": 2.78, "grad_norm": 0.7729756832122803, "learning_rate": 8.24550105790489e-06, "loss": 2.8451, "step": 56636 }, { "epoch": 2.78, "grad_norm": 0.7258002758026123, "learning_rate": 8.241916589307219e-06, "loss": 2.9029, "step": 56637 }, { "epoch": 2.78, "grad_norm": 0.7425938248634338, "learning_rate": 8.238332889144194e-06, "loss": 3.0206, "step": 56638 }, { "epoch": 2.78, "grad_norm": 0.7938366532325745, "learning_rate": 8.234749957425247e-06, "loss": 3.0195, "step": 56639 }, { "epoch": 2.78, "grad_norm": 0.7334127426147461, "learning_rate": 8.231167794159832e-06, "loss": 2.958, "step": 56640 }, { "epoch": 2.78, "grad_norm": 0.7349879145622253, "learning_rate": 8.22758639935741e-06, "loss": 2.8197, "step": 56641 }, { "epoch": 2.78, "grad_norm": 0.7187401056289673, "learning_rate": 8.22400577302731e-06, "loss": 2.9082, "step": 56642 }, { "epoch": 2.78, "grad_norm": 0.7203710079193115, "learning_rate": 8.220425915179018e-06, "loss": 2.7315, "step": 56643 }, { "epoch": 2.78, "grad_norm": 0.7575718760490417, "learning_rate": 8.216846825821999e-06, "loss": 2.829, "step": 56644 }, { "epoch": 2.78, "grad_norm": 0.7450301647186279, "learning_rate": 8.213268504965643e-06, "loss": 2.7181, "step": 56645 }, { "epoch": 2.78, "grad_norm": 0.8067429065704346, "learning_rate": 8.209690952619407e-06, "loss": 2.8633, "step": 56646 }, { "epoch": 2.78, "grad_norm": 0.795412540435791, "learning_rate": 8.206114168792622e-06, "loss": 2.8272, "step": 56647 }, { "epoch": 2.78, "grad_norm": 0.8101593852043152, "learning_rate": 8.202538153494842e-06, "loss": 3.12, "step": 56648 }, { "epoch": 2.78, "grad_norm": 0.7850880026817322, "learning_rate": 8.19896290673543e-06, "loss": 3.0746, "step": 56649 }, { "epoch": 2.78, "grad_norm": 0.7382371425628662, "learning_rate": 8.195388428523741e-06, "loss": 2.7615, "step": 56650 }, { "epoch": 2.78, "grad_norm": 0.7291812896728516, "learning_rate": 8.191814718869271e-06, "loss": 2.6933, "step": 56651 }, { "epoch": 2.78, "grad_norm": 0.7332150936126709, "learning_rate": 8.18824177778138e-06, "loss": 2.9268, "step": 56652 }, { "epoch": 2.78, "grad_norm": 0.7187190055847168, "learning_rate": 8.18466960526949e-06, "loss": 2.8866, "step": 56653 }, { "epoch": 2.78, "grad_norm": 0.7524594664573669, "learning_rate": 8.181098201343028e-06, "loss": 3.1315, "step": 56654 }, { "epoch": 2.78, "grad_norm": 0.7417961955070496, "learning_rate": 8.177527566011422e-06, "loss": 2.8357, "step": 56655 }, { "epoch": 2.78, "grad_norm": 0.7433927059173584, "learning_rate": 8.173957699284029e-06, "loss": 2.8197, "step": 56656 }, { "epoch": 2.78, "grad_norm": 0.7373107671737671, "learning_rate": 8.170388601170275e-06, "loss": 2.847, "step": 56657 }, { "epoch": 2.78, "grad_norm": 0.7926795482635498, "learning_rate": 8.166820271679552e-06, "loss": 2.6819, "step": 56658 }, { "epoch": 2.78, "grad_norm": 0.7191329598426819, "learning_rate": 8.163252710821222e-06, "loss": 2.9689, "step": 56659 }, { "epoch": 2.78, "grad_norm": 0.820548415184021, "learning_rate": 8.15968591860474e-06, "loss": 2.8692, "step": 56660 }, { "epoch": 2.78, "grad_norm": 0.7487797141075134, "learning_rate": 8.156119895039503e-06, "loss": 3.0475, "step": 56661 }, { "epoch": 2.78, "grad_norm": 0.78996741771698, "learning_rate": 8.152554640134834e-06, "loss": 3.1179, "step": 56662 }, { "epoch": 2.78, "grad_norm": 0.7686530351638794, "learning_rate": 8.148990153900225e-06, "loss": 2.9168, "step": 56663 }, { "epoch": 2.78, "grad_norm": 0.8370349407196045, "learning_rate": 8.145426436345004e-06, "loss": 2.891, "step": 56664 }, { "epoch": 2.78, "grad_norm": 0.756450355052948, "learning_rate": 8.141863487478528e-06, "loss": 2.8712, "step": 56665 }, { "epoch": 2.78, "grad_norm": 0.7915993332862854, "learning_rate": 8.138301307310224e-06, "loss": 2.7854, "step": 56666 }, { "epoch": 2.78, "grad_norm": 0.7291852235794067, "learning_rate": 8.134739895849452e-06, "loss": 2.91, "step": 56667 }, { "epoch": 2.78, "grad_norm": 0.7370338439941406, "learning_rate": 8.131179253105635e-06, "loss": 2.8508, "step": 56668 }, { "epoch": 2.78, "grad_norm": 0.7495514750480652, "learning_rate": 8.127619379088102e-06, "loss": 3.1002, "step": 56669 }, { "epoch": 2.78, "grad_norm": 0.6904610395431519, "learning_rate": 8.124060273806242e-06, "loss": 2.8653, "step": 56670 }, { "epoch": 2.78, "grad_norm": 0.7376260757446289, "learning_rate": 8.120501937269453e-06, "loss": 2.8539, "step": 56671 }, { "epoch": 2.78, "grad_norm": 0.7773568034172058, "learning_rate": 8.116944369487088e-06, "loss": 2.6758, "step": 56672 }, { "epoch": 2.78, "grad_norm": 0.8228160738945007, "learning_rate": 8.11338757046851e-06, "loss": 2.7239, "step": 56673 }, { "epoch": 2.78, "grad_norm": 0.7620078921318054, "learning_rate": 8.109831540223077e-06, "loss": 2.8486, "step": 56674 }, { "epoch": 2.78, "grad_norm": 0.7939218282699585, "learning_rate": 8.106276278760183e-06, "loss": 2.8879, "step": 56675 }, { "epoch": 2.78, "grad_norm": 0.7919220924377441, "learning_rate": 8.102721786089183e-06, "loss": 2.9234, "step": 56676 }, { "epoch": 2.78, "grad_norm": 0.7480902671813965, "learning_rate": 8.099168062219474e-06, "loss": 2.7817, "step": 56677 }, { "epoch": 2.78, "grad_norm": 0.7824358344078064, "learning_rate": 8.095615107160347e-06, "loss": 2.8962, "step": 56678 }, { "epoch": 2.78, "grad_norm": 0.7388315200805664, "learning_rate": 8.09206292092116e-06, "loss": 2.9825, "step": 56679 }, { "epoch": 2.78, "grad_norm": 0.749704122543335, "learning_rate": 8.088511503511342e-06, "loss": 2.8423, "step": 56680 }, { "epoch": 2.78, "grad_norm": 0.7328793406486511, "learning_rate": 8.084960854940181e-06, "loss": 2.9085, "step": 56681 }, { "epoch": 2.78, "grad_norm": 0.7136931419372559, "learning_rate": 8.081410975217073e-06, "loss": 2.9693, "step": 56682 }, { "epoch": 2.78, "grad_norm": 0.7360411286354065, "learning_rate": 8.077861864351343e-06, "loss": 2.7465, "step": 56683 }, { "epoch": 2.78, "grad_norm": 0.7628071308135986, "learning_rate": 8.074313522352315e-06, "loss": 2.7308, "step": 56684 }, { "epoch": 2.78, "grad_norm": 0.72130286693573, "learning_rate": 8.070765949229419e-06, "loss": 2.6445, "step": 56685 }, { "epoch": 2.78, "grad_norm": 0.7512112259864807, "learning_rate": 8.06721914499191e-06, "loss": 2.8764, "step": 56686 }, { "epoch": 2.78, "grad_norm": 0.7311689853668213, "learning_rate": 8.063673109649148e-06, "loss": 3.1255, "step": 56687 }, { "epoch": 2.78, "grad_norm": 0.806206226348877, "learning_rate": 8.060127843210528e-06, "loss": 2.7643, "step": 56688 }, { "epoch": 2.78, "grad_norm": 0.7583916187286377, "learning_rate": 8.056583345685308e-06, "loss": 3.0265, "step": 56689 }, { "epoch": 2.78, "grad_norm": 0.7623338103294373, "learning_rate": 8.053039617082846e-06, "loss": 2.8745, "step": 56690 }, { "epoch": 2.78, "grad_norm": 0.7402061820030212, "learning_rate": 8.049496657412535e-06, "loss": 2.9607, "step": 56691 }, { "epoch": 2.78, "grad_norm": 0.7634629607200623, "learning_rate": 8.045954466683668e-06, "loss": 3.1609, "step": 56692 }, { "epoch": 2.78, "grad_norm": 0.7831898927688599, "learning_rate": 8.042413044905572e-06, "loss": 2.6583, "step": 56693 }, { "epoch": 2.78, "grad_norm": 0.7754256129264832, "learning_rate": 8.038872392087537e-06, "loss": 2.5636, "step": 56694 }, { "epoch": 2.78, "grad_norm": 0.7307144999504089, "learning_rate": 8.035332508238923e-06, "loss": 2.8217, "step": 56695 }, { "epoch": 2.78, "grad_norm": 0.7803258895874023, "learning_rate": 8.031793393369057e-06, "loss": 2.9664, "step": 56696 }, { "epoch": 2.78, "grad_norm": 0.7296281456947327, "learning_rate": 8.028255047487265e-06, "loss": 2.8883, "step": 56697 }, { "epoch": 2.78, "grad_norm": 0.7694997191429138, "learning_rate": 8.02471747060287e-06, "loss": 2.7386, "step": 56698 }, { "epoch": 2.78, "grad_norm": 0.7115613222122192, "learning_rate": 8.021180662725169e-06, "loss": 2.8441, "step": 56699 }, { "epoch": 2.78, "grad_norm": 0.7649816274642944, "learning_rate": 8.017644623863484e-06, "loss": 2.9336, "step": 56700 }, { "epoch": 2.78, "grad_norm": 0.7806012630462646, "learning_rate": 8.014109354027142e-06, "loss": 2.9071, "step": 56701 }, { "epoch": 2.78, "grad_norm": 0.7786357402801514, "learning_rate": 8.010574853225405e-06, "loss": 2.8436, "step": 56702 }, { "epoch": 2.78, "grad_norm": 0.7318533658981323, "learning_rate": 8.00704112146766e-06, "loss": 2.9391, "step": 56703 }, { "epoch": 2.78, "grad_norm": 0.7530311942100525, "learning_rate": 8.003508158763138e-06, "loss": 3.1211, "step": 56704 }, { "epoch": 2.78, "grad_norm": 0.7505061626434326, "learning_rate": 7.999975965121164e-06, "loss": 3.1264, "step": 56705 }, { "epoch": 2.78, "grad_norm": 0.7486022114753723, "learning_rate": 7.996444540551128e-06, "loss": 2.7554, "step": 56706 }, { "epoch": 2.78, "grad_norm": 0.7808429002761841, "learning_rate": 7.992913885062224e-06, "loss": 2.8135, "step": 56707 }, { "epoch": 2.78, "grad_norm": 0.7680838704109192, "learning_rate": 7.989383998663812e-06, "loss": 2.9188, "step": 56708 }, { "epoch": 2.78, "grad_norm": 0.7538081407546997, "learning_rate": 7.985854881365117e-06, "loss": 2.7569, "step": 56709 }, { "epoch": 2.78, "grad_norm": 0.7408961057662964, "learning_rate": 7.982326533175466e-06, "loss": 2.9055, "step": 56710 }, { "epoch": 2.78, "grad_norm": 0.7339892983436584, "learning_rate": 7.978798954104216e-06, "loss": 2.8456, "step": 56711 }, { "epoch": 2.78, "grad_norm": 0.7620311975479126, "learning_rate": 7.975272144160594e-06, "loss": 2.9099, "step": 56712 }, { "epoch": 2.78, "grad_norm": 0.7628583908081055, "learning_rate": 7.971746103353926e-06, "loss": 2.7379, "step": 56713 }, { "epoch": 2.78, "grad_norm": 0.7465094327926636, "learning_rate": 7.968220831693473e-06, "loss": 2.7516, "step": 56714 }, { "epoch": 2.78, "grad_norm": 0.7672311067581177, "learning_rate": 7.964696329188492e-06, "loss": 2.8941, "step": 56715 }, { "epoch": 2.78, "grad_norm": 0.7001157402992249, "learning_rate": 7.96117259584831e-06, "loss": 2.779, "step": 56716 }, { "epoch": 2.78, "grad_norm": 0.7452120780944824, "learning_rate": 7.957649631682183e-06, "loss": 3.0356, "step": 56717 }, { "epoch": 2.78, "grad_norm": 0.7730103731155396, "learning_rate": 7.954127436699441e-06, "loss": 2.8371, "step": 56718 }, { "epoch": 2.78, "grad_norm": 0.7829596996307373, "learning_rate": 7.950606010909277e-06, "loss": 2.8766, "step": 56719 }, { "epoch": 2.78, "grad_norm": 0.733018696308136, "learning_rate": 7.947085354321047e-06, "loss": 2.9807, "step": 56720 }, { "epoch": 2.78, "grad_norm": 0.7372322678565979, "learning_rate": 7.94356546694398e-06, "loss": 2.8052, "step": 56721 }, { "epoch": 2.78, "grad_norm": 0.7484538555145264, "learning_rate": 7.940046348787332e-06, "loss": 2.9547, "step": 56722 }, { "epoch": 2.78, "grad_norm": 0.7925270795822144, "learning_rate": 7.936527999860432e-06, "loss": 2.9496, "step": 56723 }, { "epoch": 2.78, "grad_norm": 0.7163591980934143, "learning_rate": 7.93301042017247e-06, "loss": 2.7463, "step": 56724 }, { "epoch": 2.78, "grad_norm": 0.777164101600647, "learning_rate": 7.92949360973274e-06, "loss": 2.9165, "step": 56725 }, { "epoch": 2.78, "grad_norm": 0.7506938576698303, "learning_rate": 7.925977568550568e-06, "loss": 2.9781, "step": 56726 }, { "epoch": 2.78, "grad_norm": 0.7980271577835083, "learning_rate": 7.922462296635112e-06, "loss": 2.6755, "step": 56727 }, { "epoch": 2.78, "grad_norm": 0.7719228863716125, "learning_rate": 7.9189477939957e-06, "loss": 2.7433, "step": 56728 }, { "epoch": 2.78, "grad_norm": 0.7837870121002197, "learning_rate": 7.91543406064159e-06, "loss": 2.8218, "step": 56729 }, { "epoch": 2.78, "grad_norm": 0.7866212725639343, "learning_rate": 7.911921096581975e-06, "loss": 2.895, "step": 56730 }, { "epoch": 2.78, "grad_norm": 0.7204878926277161, "learning_rate": 7.908408901826147e-06, "loss": 2.9522, "step": 56731 }, { "epoch": 2.78, "grad_norm": 0.7222285270690918, "learning_rate": 7.904897476383365e-06, "loss": 2.9197, "step": 56732 }, { "epoch": 2.78, "grad_norm": 0.7325133085250854, "learning_rate": 7.90138682026289e-06, "loss": 2.6879, "step": 56733 }, { "epoch": 2.78, "grad_norm": 0.7699395418167114, "learning_rate": 7.897876933473912e-06, "loss": 2.8361, "step": 56734 }, { "epoch": 2.78, "grad_norm": 0.7754160165786743, "learning_rate": 7.894367816025726e-06, "loss": 2.81, "step": 56735 }, { "epoch": 2.78, "grad_norm": 0.7943594455718994, "learning_rate": 7.89085946792759e-06, "loss": 2.8848, "step": 56736 }, { "epoch": 2.78, "grad_norm": 0.7334774136543274, "learning_rate": 7.887351889188665e-06, "loss": 2.7652, "step": 56737 }, { "epoch": 2.78, "grad_norm": 0.7373555898666382, "learning_rate": 7.883845079818273e-06, "loss": 3.0028, "step": 56738 }, { "epoch": 2.78, "grad_norm": 0.7737788558006287, "learning_rate": 7.880339039825578e-06, "loss": 2.7939, "step": 56739 }, { "epoch": 2.78, "grad_norm": 0.7455878257751465, "learning_rate": 7.87683376921987e-06, "loss": 2.8291, "step": 56740 }, { "epoch": 2.78, "grad_norm": 0.7712538242340088, "learning_rate": 7.873329268010343e-06, "loss": 3.0044, "step": 56741 }, { "epoch": 2.78, "grad_norm": 0.7128819227218628, "learning_rate": 7.869825536206254e-06, "loss": 2.7565, "step": 56742 }, { "epoch": 2.78, "grad_norm": 0.72657710313797, "learning_rate": 7.866322573816864e-06, "loss": 2.8434, "step": 56743 }, { "epoch": 2.78, "grad_norm": 0.7716096639633179, "learning_rate": 7.862820380851299e-06, "loss": 2.961, "step": 56744 }, { "epoch": 2.78, "grad_norm": 0.7939549684524536, "learning_rate": 7.859318957318884e-06, "loss": 2.9316, "step": 56745 }, { "epoch": 2.78, "grad_norm": 0.7576471567153931, "learning_rate": 7.855818303228779e-06, "loss": 2.9361, "step": 56746 }, { "epoch": 2.78, "grad_norm": 0.7495378255844116, "learning_rate": 7.852318418590208e-06, "loss": 2.9346, "step": 56747 }, { "epoch": 2.78, "grad_norm": 0.7718560099601746, "learning_rate": 7.848819303412435e-06, "loss": 2.8327, "step": 56748 }, { "epoch": 2.78, "grad_norm": 0.7532568573951721, "learning_rate": 7.845320957704614e-06, "loss": 2.7433, "step": 56749 }, { "epoch": 2.78, "grad_norm": 0.7588929533958435, "learning_rate": 7.841823381476043e-06, "loss": 2.962, "step": 56750 }, { "epoch": 2.78, "grad_norm": 0.7554545998573303, "learning_rate": 7.838326574735875e-06, "loss": 3.0306, "step": 56751 }, { "epoch": 2.78, "grad_norm": 0.7670568227767944, "learning_rate": 7.834830537493275e-06, "loss": 2.9727, "step": 56752 }, { "epoch": 2.78, "grad_norm": 0.7550936341285706, "learning_rate": 7.831335269757532e-06, "loss": 2.9098, "step": 56753 }, { "epoch": 2.78, "grad_norm": 0.7789241075515747, "learning_rate": 7.827840771537808e-06, "loss": 2.7227, "step": 56754 }, { "epoch": 2.78, "grad_norm": 0.7854425311088562, "learning_rate": 7.82434704284336e-06, "loss": 2.8727, "step": 56755 }, { "epoch": 2.78, "grad_norm": 0.7839749455451965, "learning_rate": 7.820854083683314e-06, "loss": 2.9157, "step": 56756 }, { "epoch": 2.78, "grad_norm": 0.7156407833099365, "learning_rate": 7.81736189406693e-06, "loss": 3.0155, "step": 56757 }, { "epoch": 2.78, "grad_norm": 0.7155882716178894, "learning_rate": 7.813870474003403e-06, "loss": 2.8871, "step": 56758 }, { "epoch": 2.78, "grad_norm": 0.7472594380378723, "learning_rate": 7.810379823501855e-06, "loss": 3.0024, "step": 56759 }, { "epoch": 2.78, "grad_norm": 0.7841302156448364, "learning_rate": 7.806889942571582e-06, "loss": 2.7603, "step": 56760 }, { "epoch": 2.78, "grad_norm": 0.7454701662063599, "learning_rate": 7.803400831221707e-06, "loss": 2.7723, "step": 56761 }, { "epoch": 2.78, "grad_norm": 0.7371299862861633, "learning_rate": 7.799912489461423e-06, "loss": 2.9579, "step": 56762 }, { "epoch": 2.78, "grad_norm": 0.7872686386108398, "learning_rate": 7.796424917299959e-06, "loss": 2.9897, "step": 56763 }, { "epoch": 2.78, "grad_norm": 0.7827180624008179, "learning_rate": 7.792938114746439e-06, "loss": 3.0709, "step": 56764 }, { "epoch": 2.78, "grad_norm": 0.7246116399765015, "learning_rate": 7.78945208181012e-06, "loss": 2.8398, "step": 56765 }, { "epoch": 2.78, "grad_norm": 0.7627786993980408, "learning_rate": 7.785966818500166e-06, "loss": 3.0078, "step": 56766 }, { "epoch": 2.78, "grad_norm": 0.7191957831382751, "learning_rate": 7.7824823248257e-06, "loss": 3.119, "step": 56767 }, { "epoch": 2.78, "grad_norm": 0.7956440448760986, "learning_rate": 7.778998600795949e-06, "loss": 3.0264, "step": 56768 }, { "epoch": 2.78, "grad_norm": 0.7572888135910034, "learning_rate": 7.775515646420071e-06, "loss": 2.9061, "step": 56769 }, { "epoch": 2.78, "grad_norm": 0.7518506646156311, "learning_rate": 7.772033461707261e-06, "loss": 2.9363, "step": 56770 }, { "epoch": 2.78, "grad_norm": 0.7562054991722107, "learning_rate": 7.768552046666642e-06, "loss": 2.9044, "step": 56771 }, { "epoch": 2.78, "grad_norm": 0.7331022024154663, "learning_rate": 7.765071401307443e-06, "loss": 2.8371, "step": 56772 }, { "epoch": 2.78, "grad_norm": 0.7433456778526306, "learning_rate": 7.761591525638822e-06, "loss": 2.8736, "step": 56773 }, { "epoch": 2.78, "grad_norm": 0.7637978196144104, "learning_rate": 7.75811241966987e-06, "loss": 2.9288, "step": 56774 }, { "epoch": 2.78, "grad_norm": 0.7655687928199768, "learning_rate": 7.75463408340985e-06, "loss": 2.8614, "step": 56775 }, { "epoch": 2.78, "grad_norm": 0.7311526536941528, "learning_rate": 7.751156516867884e-06, "loss": 2.7973, "step": 56776 }, { "epoch": 2.78, "grad_norm": 0.7516604661941528, "learning_rate": 7.747679720053069e-06, "loss": 2.8696, "step": 56777 }, { "epoch": 2.78, "grad_norm": 0.7627346515655518, "learning_rate": 7.744203692974692e-06, "loss": 2.7741, "step": 56778 }, { "epoch": 2.78, "grad_norm": 0.8112263679504395, "learning_rate": 7.740728435641818e-06, "loss": 2.8013, "step": 56779 }, { "epoch": 2.78, "grad_norm": 0.7233673930168152, "learning_rate": 7.737253948063638e-06, "loss": 3.0573, "step": 56780 }, { "epoch": 2.78, "grad_norm": 0.7762109637260437, "learning_rate": 7.733780230249243e-06, "loss": 2.8076, "step": 56781 }, { "epoch": 2.78, "grad_norm": 0.7639904618263245, "learning_rate": 7.730307282207794e-06, "loss": 2.7387, "step": 56782 }, { "epoch": 2.78, "grad_norm": 0.7313291430473328, "learning_rate": 7.726835103948548e-06, "loss": 2.7993, "step": 56783 }, { "epoch": 2.78, "grad_norm": 0.7252292633056641, "learning_rate": 7.7233636954805e-06, "loss": 2.8332, "step": 56784 }, { "epoch": 2.78, "grad_norm": 0.7580043077468872, "learning_rate": 7.719893056812908e-06, "loss": 2.9502, "step": 56785 }, { "epoch": 2.78, "grad_norm": 0.7498751878738403, "learning_rate": 7.716423187954835e-06, "loss": 2.8644, "step": 56786 }, { "epoch": 2.78, "grad_norm": 0.7402743697166443, "learning_rate": 7.712954088915469e-06, "loss": 2.7173, "step": 56787 }, { "epoch": 2.78, "grad_norm": 0.7868022322654724, "learning_rate": 7.709485759703971e-06, "loss": 2.769, "step": 56788 }, { "epoch": 2.78, "grad_norm": 0.762657880783081, "learning_rate": 7.706018200329367e-06, "loss": 2.8656, "step": 56789 }, { "epoch": 2.78, "grad_norm": 0.8196261525154114, "learning_rate": 7.702551410800884e-06, "loss": 2.8877, "step": 56790 }, { "epoch": 2.78, "grad_norm": 0.7575928568840027, "learning_rate": 7.699085391127614e-06, "loss": 2.9055, "step": 56791 }, { "epoch": 2.78, "grad_norm": 0.7336806654930115, "learning_rate": 7.695620141318715e-06, "loss": 2.9307, "step": 56792 }, { "epoch": 2.78, "grad_norm": 0.7909324765205383, "learning_rate": 7.692155661383314e-06, "loss": 2.8882, "step": 56793 }, { "epoch": 2.78, "grad_norm": 0.778021514415741, "learning_rate": 7.688691951330506e-06, "loss": 2.8321, "step": 56794 }, { "epoch": 2.78, "grad_norm": 0.7695522904396057, "learning_rate": 7.685229011169447e-06, "loss": 2.7739, "step": 56795 }, { "epoch": 2.78, "grad_norm": 0.7543647289276123, "learning_rate": 7.681766840909197e-06, "loss": 2.9972, "step": 56796 }, { "epoch": 2.78, "grad_norm": 0.7531213164329529, "learning_rate": 7.678305440558884e-06, "loss": 2.674, "step": 56797 }, { "epoch": 2.78, "grad_norm": 0.7810092568397522, "learning_rate": 7.674844810127734e-06, "loss": 2.8329, "step": 56798 }, { "epoch": 2.78, "grad_norm": 0.7242538332939148, "learning_rate": 7.671384949624736e-06, "loss": 2.9462, "step": 56799 }, { "epoch": 2.78, "grad_norm": 0.7586749792098999, "learning_rate": 7.667925859059087e-06, "loss": 2.7034, "step": 56800 }, { "epoch": 2.78, "grad_norm": 0.757546067237854, "learning_rate": 7.664467538439845e-06, "loss": 2.7994, "step": 56801 }, { "epoch": 2.78, "grad_norm": 0.7679392695426941, "learning_rate": 7.6610099877761e-06, "loss": 2.8346, "step": 56802 }, { "epoch": 2.78, "grad_norm": 0.7457258701324463, "learning_rate": 7.657553207077049e-06, "loss": 2.8294, "step": 56803 }, { "epoch": 2.78, "grad_norm": 0.734887957572937, "learning_rate": 7.654097196351716e-06, "loss": 2.8035, "step": 56804 }, { "epoch": 2.78, "grad_norm": 0.7640114426612854, "learning_rate": 7.650641955609227e-06, "loss": 2.8279, "step": 56805 }, { "epoch": 2.78, "grad_norm": 0.7584049105644226, "learning_rate": 7.647187484858674e-06, "loss": 3.168, "step": 56806 }, { "epoch": 2.78, "grad_norm": 0.7257455587387085, "learning_rate": 7.643733784109151e-06, "loss": 2.6585, "step": 56807 }, { "epoch": 2.78, "grad_norm": 0.7480292916297913, "learning_rate": 7.640280853369784e-06, "loss": 2.7445, "step": 56808 }, { "epoch": 2.78, "grad_norm": 0.7356035709381104, "learning_rate": 7.6368286926497e-06, "loss": 2.9441, "step": 56809 }, { "epoch": 2.78, "grad_norm": 0.719250500202179, "learning_rate": 7.633377301957889e-06, "loss": 2.8578, "step": 56810 }, { "epoch": 2.78, "grad_norm": 0.7773850560188293, "learning_rate": 7.629926681303512e-06, "loss": 2.7369, "step": 56811 }, { "epoch": 2.78, "grad_norm": 0.7469416856765747, "learning_rate": 7.626476830695627e-06, "loss": 2.9075, "step": 56812 }, { "epoch": 2.78, "grad_norm": 0.768189549446106, "learning_rate": 7.623027750143329e-06, "loss": 2.776, "step": 56813 }, { "epoch": 2.78, "grad_norm": 0.7666859030723572, "learning_rate": 7.61957943965571e-06, "loss": 2.8001, "step": 56814 }, { "epoch": 2.78, "grad_norm": 0.734056293964386, "learning_rate": 7.616131899241862e-06, "loss": 2.8107, "step": 56815 }, { "epoch": 2.78, "grad_norm": 0.7309668064117432, "learning_rate": 7.612685128910878e-06, "loss": 3.0441, "step": 56816 }, { "epoch": 2.78, "grad_norm": 0.7235714793205261, "learning_rate": 7.609239128671785e-06, "loss": 2.8479, "step": 56817 }, { "epoch": 2.78, "grad_norm": 0.8136651515960693, "learning_rate": 7.6057938985336746e-06, "loss": 2.8428, "step": 56818 }, { "epoch": 2.78, "grad_norm": 0.7454987168312073, "learning_rate": 7.602349438505606e-06, "loss": 2.7822, "step": 56819 }, { "epoch": 2.78, "grad_norm": 0.775428831577301, "learning_rate": 7.59890574859674e-06, "loss": 2.9205, "step": 56820 }, { "epoch": 2.78, "grad_norm": 0.82244873046875, "learning_rate": 7.595462828816035e-06, "loss": 2.9713, "step": 56821 }, { "epoch": 2.78, "grad_norm": 0.7343226075172424, "learning_rate": 7.5920206791726165e-06, "loss": 2.7359, "step": 56822 }, { "epoch": 2.78, "grad_norm": 0.7130812406539917, "learning_rate": 7.588579299675579e-06, "loss": 3.0358, "step": 56823 }, { "epoch": 2.78, "grad_norm": 0.752113401889801, "learning_rate": 7.585138690333881e-06, "loss": 2.9996, "step": 56824 }, { "epoch": 2.78, "grad_norm": 0.7490484118461609, "learning_rate": 7.581698851156715e-06, "loss": 2.9933, "step": 56825 }, { "epoch": 2.78, "grad_norm": 0.7558857202529907, "learning_rate": 7.578259782153006e-06, "loss": 2.846, "step": 56826 }, { "epoch": 2.78, "grad_norm": 0.7821259498596191, "learning_rate": 7.57482148333195e-06, "loss": 2.5186, "step": 56827 }, { "epoch": 2.79, "grad_norm": 0.7697935700416565, "learning_rate": 7.571383954702504e-06, "loss": 2.9788, "step": 56828 }, { "epoch": 2.79, "grad_norm": 0.7339929342269897, "learning_rate": 7.567947196273727e-06, "loss": 3.0044, "step": 56829 }, { "epoch": 2.79, "grad_norm": 0.724521815776825, "learning_rate": 7.5645112080547464e-06, "loss": 2.8909, "step": 56830 }, { "epoch": 2.79, "grad_norm": 0.7704073786735535, "learning_rate": 7.561075990054555e-06, "loss": 3.2447, "step": 56831 }, { "epoch": 2.79, "grad_norm": 0.7758163213729858, "learning_rate": 7.5576415422822115e-06, "loss": 2.9757, "step": 56832 }, { "epoch": 2.79, "grad_norm": 0.7906506657600403, "learning_rate": 7.554207864746742e-06, "loss": 2.8299, "step": 56833 }, { "epoch": 2.79, "grad_norm": 0.7277704477310181, "learning_rate": 7.550774957457173e-06, "loss": 2.7329, "step": 56834 }, { "epoch": 2.79, "grad_norm": 0.7903933525085449, "learning_rate": 7.54734282042263e-06, "loss": 2.7854, "step": 56835 }, { "epoch": 2.79, "grad_norm": 0.7526589632034302, "learning_rate": 7.543911453652074e-06, "loss": 2.937, "step": 56836 }, { "epoch": 2.79, "grad_norm": 0.790673017501831, "learning_rate": 7.540480857154596e-06, "loss": 3.0866, "step": 56837 }, { "epoch": 2.79, "grad_norm": 0.7465153336524963, "learning_rate": 7.537051030939223e-06, "loss": 2.8482, "step": 56838 }, { "epoch": 2.79, "grad_norm": 0.7059990167617798, "learning_rate": 7.5336219750149475e-06, "loss": 2.9167, "step": 56839 }, { "epoch": 2.79, "grad_norm": 0.7449349164962769, "learning_rate": 7.530193689390828e-06, "loss": 2.6752, "step": 56840 }, { "epoch": 2.79, "grad_norm": 0.7419530153274536, "learning_rate": 7.52676617407586e-06, "loss": 2.9918, "step": 56841 }, { "epoch": 2.79, "grad_norm": 0.7866668105125427, "learning_rate": 7.5233394290791334e-06, "loss": 2.8204, "step": 56842 }, { "epoch": 2.79, "grad_norm": 0.8241013288497925, "learning_rate": 7.519913454409643e-06, "loss": 2.8006, "step": 56843 }, { "epoch": 2.79, "grad_norm": 0.793013334274292, "learning_rate": 7.516488250076414e-06, "loss": 2.8997, "step": 56844 }, { "epoch": 2.79, "grad_norm": 0.7368309497833252, "learning_rate": 7.513063816088472e-06, "loss": 2.9748, "step": 56845 }, { "epoch": 2.79, "grad_norm": 0.7466829419136047, "learning_rate": 7.50964015245481e-06, "loss": 2.785, "step": 56846 }, { "epoch": 2.79, "grad_norm": 0.7598251104354858, "learning_rate": 7.506217259184488e-06, "loss": 2.7236, "step": 56847 }, { "epoch": 2.79, "grad_norm": 0.7561330199241638, "learning_rate": 7.502795136286465e-06, "loss": 2.999, "step": 56848 }, { "epoch": 2.79, "grad_norm": 0.7700143456459045, "learning_rate": 7.499373783769768e-06, "loss": 2.8617, "step": 56849 }, { "epoch": 2.79, "grad_norm": 0.7483510971069336, "learning_rate": 7.49595320164349e-06, "loss": 2.8559, "step": 56850 }, { "epoch": 2.79, "grad_norm": 0.7109506726264954, "learning_rate": 7.4925333899165216e-06, "loss": 2.8932, "step": 56851 }, { "epoch": 2.79, "grad_norm": 0.7281533479690552, "learning_rate": 7.489114348597991e-06, "loss": 3.0285, "step": 56852 }, { "epoch": 2.79, "grad_norm": 0.7932998538017273, "learning_rate": 7.48569607769679e-06, "loss": 2.87, "step": 56853 }, { "epoch": 2.79, "grad_norm": 0.7219394445419312, "learning_rate": 7.4822785772219786e-06, "loss": 2.8928, "step": 56854 }, { "epoch": 2.79, "grad_norm": 0.7718573808670044, "learning_rate": 7.478861847182582e-06, "loss": 2.8527, "step": 56855 }, { "epoch": 2.79, "grad_norm": 0.7460842728614807, "learning_rate": 7.475445887587528e-06, "loss": 2.8612, "step": 56856 }, { "epoch": 2.79, "grad_norm": 0.7437963485717773, "learning_rate": 7.472030698445875e-06, "loss": 2.8628, "step": 56857 }, { "epoch": 2.79, "grad_norm": 0.7210949659347534, "learning_rate": 7.468616279766548e-06, "loss": 2.7437, "step": 56858 }, { "epoch": 2.79, "grad_norm": 0.6923820376396179, "learning_rate": 7.465202631558642e-06, "loss": 2.7774, "step": 56859 }, { "epoch": 2.79, "grad_norm": 0.7626507878303528, "learning_rate": 7.461789753831082e-06, "loss": 2.8715, "step": 56860 }, { "epoch": 2.79, "grad_norm": 0.7916741371154785, "learning_rate": 7.458377646592828e-06, "loss": 2.9131, "step": 56861 }, { "epoch": 2.79, "grad_norm": 0.7709150314331055, "learning_rate": 7.454966309852972e-06, "loss": 2.7865, "step": 56862 }, { "epoch": 2.79, "grad_norm": 0.7656967639923096, "learning_rate": 7.4515557436203746e-06, "loss": 2.9982, "step": 56863 }, { "epoch": 2.79, "grad_norm": 0.7165960669517517, "learning_rate": 7.448145947904094e-06, "loss": 2.8934, "step": 56864 }, { "epoch": 2.79, "grad_norm": 0.7551760673522949, "learning_rate": 7.444736922713091e-06, "loss": 2.9164, "step": 56865 }, { "epoch": 2.79, "grad_norm": 0.7682600021362305, "learning_rate": 7.441328668056357e-06, "loss": 2.9947, "step": 56866 }, { "epoch": 2.79, "grad_norm": 0.7713602781295776, "learning_rate": 7.4379211839428856e-06, "loss": 2.8302, "step": 56867 }, { "epoch": 2.79, "grad_norm": 0.7858055830001831, "learning_rate": 7.434514470381603e-06, "loss": 2.838, "step": 56868 }, { "epoch": 2.79, "grad_norm": 0.7591269016265869, "learning_rate": 7.431108527381469e-06, "loss": 2.764, "step": 56869 }, { "epoch": 2.79, "grad_norm": 0.7732019424438477, "learning_rate": 7.427703354951542e-06, "loss": 3.1269, "step": 56870 }, { "epoch": 2.79, "grad_norm": 0.7430728077888489, "learning_rate": 7.424298953100716e-06, "loss": 2.8704, "step": 56871 }, { "epoch": 2.79, "grad_norm": 0.7464427947998047, "learning_rate": 7.420895321837983e-06, "loss": 2.9217, "step": 56872 }, { "epoch": 2.79, "grad_norm": 0.7523966431617737, "learning_rate": 7.41749246117227e-06, "loss": 2.8089, "step": 56873 }, { "epoch": 2.79, "grad_norm": 0.7557439804077148, "learning_rate": 7.414090371112636e-06, "loss": 2.7274, "step": 56874 }, { "epoch": 2.79, "grad_norm": 0.7846986651420593, "learning_rate": 7.41068905166794e-06, "loss": 2.8855, "step": 56875 }, { "epoch": 2.79, "grad_norm": 0.7404799461364746, "learning_rate": 7.4072885028471755e-06, "loss": 2.7038, "step": 56876 }, { "epoch": 2.79, "grad_norm": 0.7172025442123413, "learning_rate": 7.403888724659334e-06, "loss": 2.9229, "step": 56877 }, { "epoch": 2.79, "grad_norm": 0.7421782612800598, "learning_rate": 7.400489717113278e-06, "loss": 2.8372, "step": 56878 }, { "epoch": 2.79, "grad_norm": 0.7487596869468689, "learning_rate": 7.397091480218065e-06, "loss": 2.6678, "step": 56879 }, { "epoch": 2.79, "grad_norm": 0.753854513168335, "learning_rate": 7.393694013982587e-06, "loss": 2.9184, "step": 56880 }, { "epoch": 2.79, "grad_norm": 0.7490181922912598, "learning_rate": 7.390297318415839e-06, "loss": 2.9672, "step": 56881 }, { "epoch": 2.79, "grad_norm": 0.7417166233062744, "learning_rate": 7.386901393526712e-06, "loss": 2.9964, "step": 56882 }, { "epoch": 2.79, "grad_norm": 0.7824398875236511, "learning_rate": 7.383506239324166e-06, "loss": 2.9129, "step": 56883 }, { "epoch": 2.79, "grad_norm": 0.7355906367301941, "learning_rate": 7.380111855817128e-06, "loss": 2.8434, "step": 56884 }, { "epoch": 2.79, "grad_norm": 0.7218039035797119, "learning_rate": 7.3767182430145895e-06, "loss": 2.8935, "step": 56885 }, { "epoch": 2.79, "grad_norm": 0.7684163451194763, "learning_rate": 7.373325400925445e-06, "loss": 2.976, "step": 56886 }, { "epoch": 2.79, "grad_norm": 0.7165760397911072, "learning_rate": 7.3699333295586526e-06, "loss": 2.6787, "step": 56887 }, { "epoch": 2.79, "grad_norm": 0.7151129841804504, "learning_rate": 7.366542028923139e-06, "loss": 2.7651, "step": 56888 }, { "epoch": 2.79, "grad_norm": 0.7509085536003113, "learning_rate": 7.3631514990278305e-06, "loss": 3.0883, "step": 56889 }, { "epoch": 2.79, "grad_norm": 0.7478203773498535, "learning_rate": 7.359761739881686e-06, "loss": 3.0038, "step": 56890 }, { "epoch": 2.79, "grad_norm": 0.77152019739151, "learning_rate": 7.3563727514935665e-06, "loss": 2.804, "step": 56891 }, { "epoch": 2.79, "grad_norm": 0.7167760729789734, "learning_rate": 7.352984533872464e-06, "loss": 3.1426, "step": 56892 }, { "epoch": 2.79, "grad_norm": 0.7409500479698181, "learning_rate": 7.34959708702727e-06, "loss": 2.9238, "step": 56893 }, { "epoch": 2.79, "grad_norm": 0.8209675550460815, "learning_rate": 7.346210410966946e-06, "loss": 2.8103, "step": 56894 }, { "epoch": 2.79, "grad_norm": 0.7210500836372375, "learning_rate": 7.342824505700351e-06, "loss": 2.6596, "step": 56895 }, { "epoch": 2.79, "grad_norm": 0.7498226761817932, "learning_rate": 7.339439371236444e-06, "loss": 2.8775, "step": 56896 }, { "epoch": 2.79, "grad_norm": 0.7758424282073975, "learning_rate": 7.336055007584152e-06, "loss": 2.7742, "step": 56897 }, { "epoch": 2.79, "grad_norm": 0.7447173595428467, "learning_rate": 7.3326714147523e-06, "loss": 2.8494, "step": 56898 }, { "epoch": 2.79, "grad_norm": 0.837697446346283, "learning_rate": 7.329288592749916e-06, "loss": 2.8712, "step": 56899 }, { "epoch": 2.79, "grad_norm": 0.7390157580375671, "learning_rate": 7.3259065415858575e-06, "loss": 2.8616, "step": 56900 }, { "epoch": 2.79, "grad_norm": 0.735798180103302, "learning_rate": 7.322525261269019e-06, "loss": 2.9517, "step": 56901 }, { "epoch": 2.79, "grad_norm": 0.7630640268325806, "learning_rate": 7.319144751808325e-06, "loss": 2.9783, "step": 56902 }, { "epoch": 2.79, "grad_norm": 0.7421290278434753, "learning_rate": 7.315765013212705e-06, "loss": 2.8817, "step": 56903 }, { "epoch": 2.79, "grad_norm": 0.7206147313117981, "learning_rate": 7.312386045491015e-06, "loss": 2.7915, "step": 56904 }, { "epoch": 2.79, "grad_norm": 0.7598541975021362, "learning_rate": 7.309007848652149e-06, "loss": 2.647, "step": 56905 }, { "epoch": 2.79, "grad_norm": 0.7555434703826904, "learning_rate": 7.305630422705033e-06, "loss": 2.7925, "step": 56906 }, { "epoch": 2.79, "grad_norm": 0.7854859232902527, "learning_rate": 7.3022537676585944e-06, "loss": 2.8326, "step": 56907 }, { "epoch": 2.79, "grad_norm": 0.7405829429626465, "learning_rate": 7.298877883521626e-06, "loss": 2.8358, "step": 56908 }, { "epoch": 2.79, "grad_norm": 0.7246938347816467, "learning_rate": 7.295502770303152e-06, "loss": 2.6278, "step": 56909 }, { "epoch": 2.79, "grad_norm": 0.7304655313491821, "learning_rate": 7.292128428011934e-06, "loss": 2.824, "step": 56910 }, { "epoch": 2.79, "grad_norm": 0.7219448685646057, "learning_rate": 7.288754856656964e-06, "loss": 3.1004, "step": 56911 }, { "epoch": 2.79, "grad_norm": 0.7531955242156982, "learning_rate": 7.285382056247069e-06, "loss": 2.8265, "step": 56912 }, { "epoch": 2.79, "grad_norm": 0.7347313761711121, "learning_rate": 7.282010026791107e-06, "loss": 2.758, "step": 56913 }, { "epoch": 2.79, "grad_norm": 0.740077555179596, "learning_rate": 7.27863876829804e-06, "loss": 2.9192, "step": 56914 }, { "epoch": 2.79, "grad_norm": 0.7309461832046509, "learning_rate": 7.275268280776659e-06, "loss": 2.8396, "step": 56915 }, { "epoch": 2.79, "grad_norm": 0.7398518919944763, "learning_rate": 7.2718985642358894e-06, "loss": 2.9457, "step": 56916 }, { "epoch": 2.79, "grad_norm": 0.7491543292999268, "learning_rate": 7.268529618684627e-06, "loss": 2.9195, "step": 56917 }, { "epoch": 2.79, "grad_norm": 0.7731919884681702, "learning_rate": 7.265161444131729e-06, "loss": 3.1226, "step": 56918 }, { "epoch": 2.79, "grad_norm": 0.802014946937561, "learning_rate": 7.261794040586055e-06, "loss": 2.9917, "step": 56919 }, { "epoch": 2.79, "grad_norm": 0.7600812315940857, "learning_rate": 7.258427408056467e-06, "loss": 2.9308, "step": 56920 }, { "epoch": 2.79, "grad_norm": 0.7489023804664612, "learning_rate": 7.255061546551821e-06, "loss": 3.0007, "step": 56921 }, { "epoch": 2.79, "grad_norm": 0.7434468269348145, "learning_rate": 7.251696456081046e-06, "loss": 2.9466, "step": 56922 }, { "epoch": 2.79, "grad_norm": 0.7413731813430786, "learning_rate": 7.248332136652901e-06, "loss": 3.0299, "step": 56923 }, { "epoch": 2.79, "grad_norm": 0.7570599913597107, "learning_rate": 7.2449685882763784e-06, "loss": 2.9096, "step": 56924 }, { "epoch": 2.79, "grad_norm": 0.7353522777557373, "learning_rate": 7.241605810960238e-06, "loss": 2.7581, "step": 56925 }, { "epoch": 2.79, "grad_norm": 0.7793547511100769, "learning_rate": 7.2382438047133395e-06, "loss": 3.1442, "step": 56926 }, { "epoch": 2.79, "grad_norm": 0.7294006943702698, "learning_rate": 7.234882569544609e-06, "loss": 3.0892, "step": 56927 }, { "epoch": 2.79, "grad_norm": 0.7296475172042847, "learning_rate": 7.231522105462806e-06, "loss": 2.8816, "step": 56928 }, { "epoch": 2.79, "grad_norm": 0.8307911157608032, "learning_rate": 7.22816241247689e-06, "loss": 2.9092, "step": 56929 }, { "epoch": 2.79, "grad_norm": 0.744428277015686, "learning_rate": 7.224803490595587e-06, "loss": 2.8779, "step": 56930 }, { "epoch": 2.79, "grad_norm": 0.758772075176239, "learning_rate": 7.221445339827825e-06, "loss": 2.9967, "step": 56931 }, { "epoch": 2.79, "grad_norm": 0.7177093029022217, "learning_rate": 7.218087960182428e-06, "loss": 2.8032, "step": 56932 }, { "epoch": 2.79, "grad_norm": 0.7423251867294312, "learning_rate": 7.21473135166829e-06, "loss": 2.9131, "step": 56933 }, { "epoch": 2.79, "grad_norm": 0.7697576880455017, "learning_rate": 7.211375514294171e-06, "loss": 3.128, "step": 56934 }, { "epoch": 2.79, "grad_norm": 0.7272599935531616, "learning_rate": 7.208020448068897e-06, "loss": 2.6826, "step": 56935 }, { "epoch": 2.79, "grad_norm": 0.7582986354827881, "learning_rate": 7.204666153001393e-06, "loss": 2.9562, "step": 56936 }, { "epoch": 2.79, "grad_norm": 0.757316529750824, "learning_rate": 7.201312629100453e-06, "loss": 2.9547, "step": 56937 }, { "epoch": 2.79, "grad_norm": 0.7660689949989319, "learning_rate": 7.197959876374871e-06, "loss": 2.8556, "step": 56938 }, { "epoch": 2.79, "grad_norm": 0.7317332625389099, "learning_rate": 7.194607894833537e-06, "loss": 2.8184, "step": 56939 }, { "epoch": 2.79, "grad_norm": 0.7949168682098389, "learning_rate": 7.1912566844852475e-06, "loss": 3.0257, "step": 56940 }, { "epoch": 2.79, "grad_norm": 0.7534281611442566, "learning_rate": 7.187906245338826e-06, "loss": 2.8538, "step": 56941 }, { "epoch": 2.79, "grad_norm": 0.7638459205627441, "learning_rate": 7.184556577403134e-06, "loss": 2.8792, "step": 56942 }, { "epoch": 2.79, "grad_norm": 0.761776864528656, "learning_rate": 7.181207680686929e-06, "loss": 2.9767, "step": 56943 }, { "epoch": 2.79, "grad_norm": 0.7768969535827637, "learning_rate": 7.1778595551991055e-06, "loss": 2.87, "step": 56944 }, { "epoch": 2.79, "grad_norm": 0.7668014168739319, "learning_rate": 7.1745122009484235e-06, "loss": 2.8332, "step": 56945 }, { "epoch": 2.79, "grad_norm": 0.7426735162734985, "learning_rate": 7.171165617943741e-06, "loss": 2.8732, "step": 56946 }, { "epoch": 2.79, "grad_norm": 0.7256327867507935, "learning_rate": 7.167819806193853e-06, "loss": 2.9715, "step": 56947 }, { "epoch": 2.79, "grad_norm": 0.8112422227859497, "learning_rate": 7.164474765707518e-06, "loss": 2.9562, "step": 56948 }, { "epoch": 2.79, "grad_norm": 0.7298985719680786, "learning_rate": 7.161130496493661e-06, "loss": 2.8423, "step": 56949 }, { "epoch": 2.79, "grad_norm": 0.7860491275787354, "learning_rate": 7.157786998560977e-06, "loss": 2.8787, "step": 56950 }, { "epoch": 2.79, "grad_norm": 0.815058708190918, "learning_rate": 7.154444271918358e-06, "loss": 2.8567, "step": 56951 }, { "epoch": 2.79, "grad_norm": 0.7936457395553589, "learning_rate": 7.151102316574564e-06, "loss": 2.978, "step": 56952 }, { "epoch": 2.79, "grad_norm": 0.7552315592765808, "learning_rate": 7.147761132538387e-06, "loss": 2.8227, "step": 56953 }, { "epoch": 2.79, "grad_norm": 0.7764397263526917, "learning_rate": 7.144420719818689e-06, "loss": 3.0444, "step": 56954 }, { "epoch": 2.79, "grad_norm": 0.7704004049301147, "learning_rate": 7.141081078424227e-06, "loss": 2.8896, "step": 56955 }, { "epoch": 2.79, "grad_norm": 0.749974250793457, "learning_rate": 7.137742208363762e-06, "loss": 2.7538, "step": 56956 }, { "epoch": 2.79, "grad_norm": 0.7289097309112549, "learning_rate": 7.1344041096461525e-06, "loss": 2.9296, "step": 56957 }, { "epoch": 2.79, "grad_norm": 0.7585612535476685, "learning_rate": 7.131066782280126e-06, "loss": 3.0644, "step": 56958 }, { "epoch": 2.79, "grad_norm": 0.7690691947937012, "learning_rate": 7.127730226274509e-06, "loss": 3.0722, "step": 56959 }, { "epoch": 2.79, "grad_norm": 0.7478421330451965, "learning_rate": 7.124394441638092e-06, "loss": 2.7564, "step": 56960 }, { "epoch": 2.79, "grad_norm": 0.7582880258560181, "learning_rate": 7.121059428379672e-06, "loss": 2.7356, "step": 56961 }, { "epoch": 2.79, "grad_norm": 0.7731198668479919, "learning_rate": 7.117725186508006e-06, "loss": 2.8882, "step": 56962 }, { "epoch": 2.79, "grad_norm": 0.7167331576347351, "learning_rate": 7.114391716031887e-06, "loss": 2.9934, "step": 56963 }, { "epoch": 2.79, "grad_norm": 0.7596493363380432, "learning_rate": 7.111059016960108e-06, "loss": 3.0592, "step": 56964 }, { "epoch": 2.79, "grad_norm": 0.7319817543029785, "learning_rate": 7.107727089301396e-06, "loss": 3.0243, "step": 56965 }, { "epoch": 2.79, "grad_norm": 0.7279293537139893, "learning_rate": 7.1043959330645774e-06, "loss": 2.96, "step": 56966 }, { "epoch": 2.79, "grad_norm": 0.7954239249229431, "learning_rate": 7.101065548258411e-06, "loss": 2.9018, "step": 56967 }, { "epoch": 2.79, "grad_norm": 0.7724497318267822, "learning_rate": 7.097735934891657e-06, "loss": 3.1738, "step": 56968 }, { "epoch": 2.79, "grad_norm": 0.753320574760437, "learning_rate": 7.094407092973142e-06, "loss": 2.981, "step": 56969 }, { "epoch": 2.79, "grad_norm": 0.8049317002296448, "learning_rate": 7.091079022511559e-06, "loss": 2.9705, "step": 56970 }, { "epoch": 2.79, "grad_norm": 0.7446392774581909, "learning_rate": 7.0877517235157e-06, "loss": 2.6321, "step": 56971 }, { "epoch": 2.79, "grad_norm": 0.7473111152648926, "learning_rate": 7.0844251959943255e-06, "loss": 2.8549, "step": 56972 }, { "epoch": 2.79, "grad_norm": 0.7095595598220825, "learning_rate": 7.081099439956195e-06, "loss": 2.7745, "step": 56973 }, { "epoch": 2.79, "grad_norm": 0.7316811084747314, "learning_rate": 7.077774455410102e-06, "loss": 2.6908, "step": 56974 }, { "epoch": 2.79, "grad_norm": 0.7425003051757812, "learning_rate": 7.074450242364738e-06, "loss": 2.8079, "step": 56975 }, { "epoch": 2.79, "grad_norm": 0.7391384840011597, "learning_rate": 7.07112680082893e-06, "loss": 2.8774, "step": 56976 }, { "epoch": 2.79, "grad_norm": 0.763640820980072, "learning_rate": 7.067804130811405e-06, "loss": 2.8268, "step": 56977 }, { "epoch": 2.79, "grad_norm": 0.7273032069206238, "learning_rate": 7.06448223232089e-06, "loss": 2.7182, "step": 56978 }, { "epoch": 2.79, "grad_norm": 0.7752225399017334, "learning_rate": 7.061161105366176e-06, "loss": 2.8235, "step": 56979 }, { "epoch": 2.79, "grad_norm": 0.755729615688324, "learning_rate": 7.057840749955956e-06, "loss": 2.8752, "step": 56980 }, { "epoch": 2.79, "grad_norm": 0.7277241349220276, "learning_rate": 7.054521166099026e-06, "loss": 2.7167, "step": 56981 }, { "epoch": 2.79, "grad_norm": 0.7544986605644226, "learning_rate": 7.0512023538040756e-06, "loss": 2.9328, "step": 56982 }, { "epoch": 2.79, "grad_norm": 0.7631686925888062, "learning_rate": 7.047884313079899e-06, "loss": 2.9111, "step": 56983 }, { "epoch": 2.79, "grad_norm": 0.7494498491287231, "learning_rate": 7.044567043935257e-06, "loss": 2.8915, "step": 56984 }, { "epoch": 2.79, "grad_norm": 0.7393820285797119, "learning_rate": 7.0412505463787734e-06, "loss": 3.0258, "step": 56985 }, { "epoch": 2.79, "grad_norm": 0.7384428381919861, "learning_rate": 7.037934820419311e-06, "loss": 2.8008, "step": 56986 }, { "epoch": 2.79, "grad_norm": 0.7311258316040039, "learning_rate": 7.034619866065494e-06, "loss": 2.9212, "step": 56987 }, { "epoch": 2.79, "grad_norm": 0.7373533248901367, "learning_rate": 7.0313056833261495e-06, "loss": 2.8112, "step": 56988 }, { "epoch": 2.79, "grad_norm": 0.7468560338020325, "learning_rate": 7.0279922722099365e-06, "loss": 2.9788, "step": 56989 }, { "epoch": 2.79, "grad_norm": 0.7494136691093445, "learning_rate": 7.024679632725616e-06, "loss": 2.9388, "step": 56990 }, { "epoch": 2.79, "grad_norm": 0.731553852558136, "learning_rate": 7.0213677648819135e-06, "loss": 2.7017, "step": 56991 }, { "epoch": 2.79, "grad_norm": 0.8054585456848145, "learning_rate": 7.018056668687555e-06, "loss": 2.8854, "step": 56992 }, { "epoch": 2.79, "grad_norm": 0.7437418103218079, "learning_rate": 7.014746344151234e-06, "loss": 2.8213, "step": 56993 }, { "epoch": 2.79, "grad_norm": 0.7302021980285645, "learning_rate": 7.01143679128171e-06, "loss": 2.8911, "step": 56994 }, { "epoch": 2.79, "grad_norm": 0.7534018158912659, "learning_rate": 7.00812801008761e-06, "loss": 2.8335, "step": 56995 }, { "epoch": 2.79, "grad_norm": 0.7726359367370605, "learning_rate": 7.004820000577793e-06, "loss": 2.9413, "step": 56996 }, { "epoch": 2.79, "grad_norm": 0.748771071434021, "learning_rate": 7.0015127627608195e-06, "loss": 2.7926, "step": 56997 }, { "epoch": 2.79, "grad_norm": 0.7633996605873108, "learning_rate": 6.998206296645515e-06, "loss": 2.8268, "step": 56998 }, { "epoch": 2.79, "grad_norm": 0.7452352643013, "learning_rate": 6.994900602240572e-06, "loss": 2.7504, "step": 56999 }, { "epoch": 2.79, "grad_norm": 0.7363735437393188, "learning_rate": 6.991595679554618e-06, "loss": 3.0628, "step": 57000 }, { "epoch": 2.79, "grad_norm": 0.7469814419746399, "learning_rate": 6.988291528596446e-06, "loss": 3.0343, "step": 57001 }, { "epoch": 2.79, "grad_norm": 0.7548006176948547, "learning_rate": 6.984988149374682e-06, "loss": 3.1684, "step": 57002 }, { "epoch": 2.79, "grad_norm": 0.7495371699333191, "learning_rate": 6.981685541898086e-06, "loss": 2.9532, "step": 57003 }, { "epoch": 2.79, "grad_norm": 0.7482824325561523, "learning_rate": 6.97838370617535e-06, "loss": 3.0097, "step": 57004 }, { "epoch": 2.79, "grad_norm": 0.7665938138961792, "learning_rate": 6.975082642215168e-06, "loss": 2.7349, "step": 57005 }, { "epoch": 2.79, "grad_norm": 0.7205333113670349, "learning_rate": 6.9717823500262e-06, "loss": 2.8713, "step": 57006 }, { "epoch": 2.79, "grad_norm": 0.7368008494377136, "learning_rate": 6.9684828296171725e-06, "loss": 2.7659, "step": 57007 }, { "epoch": 2.79, "grad_norm": 0.7547926306724548, "learning_rate": 6.965184080996744e-06, "loss": 2.9229, "step": 57008 }, { "epoch": 2.79, "grad_norm": 0.7362197041511536, "learning_rate": 6.9618861041736415e-06, "loss": 2.6904, "step": 57009 }, { "epoch": 2.79, "grad_norm": 0.7418313026428223, "learning_rate": 6.958588899156525e-06, "loss": 2.8835, "step": 57010 }, { "epoch": 2.79, "grad_norm": 0.7631124258041382, "learning_rate": 6.955292465954121e-06, "loss": 2.7645, "step": 57011 }, { "epoch": 2.79, "grad_norm": 0.7663556337356567, "learning_rate": 6.951996804575055e-06, "loss": 2.9622, "step": 57012 }, { "epoch": 2.79, "grad_norm": 0.7453634738922119, "learning_rate": 6.948701915028054e-06, "loss": 2.8942, "step": 57013 }, { "epoch": 2.79, "grad_norm": 0.7672542333602905, "learning_rate": 6.9454077973217444e-06, "loss": 2.9699, "step": 57014 }, { "epoch": 2.79, "grad_norm": 0.7669967412948608, "learning_rate": 6.9421144514648195e-06, "loss": 2.8731, "step": 57015 }, { "epoch": 2.79, "grad_norm": 0.7587398886680603, "learning_rate": 6.938821877466005e-06, "loss": 2.8788, "step": 57016 }, { "epoch": 2.79, "grad_norm": 0.7381514310836792, "learning_rate": 6.935530075333895e-06, "loss": 2.9416, "step": 57017 }, { "epoch": 2.79, "grad_norm": 0.7473018765449524, "learning_rate": 6.932239045077215e-06, "loss": 3.012, "step": 57018 }, { "epoch": 2.79, "grad_norm": 0.7571165561676025, "learning_rate": 6.928948786704624e-06, "loss": 2.8817, "step": 57019 }, { "epoch": 2.79, "grad_norm": 0.7279015183448792, "learning_rate": 6.925659300224784e-06, "loss": 3.0053, "step": 57020 }, { "epoch": 2.79, "grad_norm": 0.7720736861228943, "learning_rate": 6.9223705856463525e-06, "loss": 2.7265, "step": 57021 }, { "epoch": 2.79, "grad_norm": 0.7333340048789978, "learning_rate": 6.919082642977991e-06, "loss": 2.9004, "step": 57022 }, { "epoch": 2.79, "grad_norm": 0.7707780599594116, "learning_rate": 6.915795472228325e-06, "loss": 3.0141, "step": 57023 }, { "epoch": 2.79, "grad_norm": 0.7591186165809631, "learning_rate": 6.912509073406114e-06, "loss": 2.719, "step": 57024 }, { "epoch": 2.79, "grad_norm": 0.7694956064224243, "learning_rate": 6.909223446519885e-06, "loss": 2.7744, "step": 57025 }, { "epoch": 2.79, "grad_norm": 0.7224785089492798, "learning_rate": 6.9059385915783974e-06, "loss": 2.9739, "step": 57026 }, { "epoch": 2.79, "grad_norm": 0.8058514595031738, "learning_rate": 6.9026545085902445e-06, "loss": 3.1169, "step": 57027 }, { "epoch": 2.79, "grad_norm": 0.7608110308647156, "learning_rate": 6.899371197564085e-06, "loss": 2.9059, "step": 57028 }, { "epoch": 2.79, "grad_norm": 0.7489415407180786, "learning_rate": 6.896088658508581e-06, "loss": 2.8504, "step": 57029 }, { "epoch": 2.79, "grad_norm": 0.802652895450592, "learning_rate": 6.892806891432356e-06, "loss": 2.8233, "step": 57030 }, { "epoch": 2.79, "grad_norm": 0.7284513711929321, "learning_rate": 6.889525896344106e-06, "loss": 2.678, "step": 57031 }, { "epoch": 2.8, "grad_norm": 0.7275854349136353, "learning_rate": 6.886245673252389e-06, "loss": 3.0578, "step": 57032 }, { "epoch": 2.8, "grad_norm": 0.7676070332527161, "learning_rate": 6.882966222165898e-06, "loss": 2.9871, "step": 57033 }, { "epoch": 2.8, "grad_norm": 0.7430452108383179, "learning_rate": 6.879687543093293e-06, "loss": 2.8993, "step": 57034 }, { "epoch": 2.8, "grad_norm": 0.740665853023529, "learning_rate": 6.876409636043168e-06, "loss": 2.8878, "step": 57035 }, { "epoch": 2.8, "grad_norm": 0.744522750377655, "learning_rate": 6.8731325010241815e-06, "loss": 3.1277, "step": 57036 }, { "epoch": 2.8, "grad_norm": 0.7291746735572815, "learning_rate": 6.869856138044927e-06, "loss": 3.0474, "step": 57037 }, { "epoch": 2.8, "grad_norm": 0.7282450795173645, "learning_rate": 6.866580547114031e-06, "loss": 2.6989, "step": 57038 }, { "epoch": 2.8, "grad_norm": 0.7871297597885132, "learning_rate": 6.863305728240187e-06, "loss": 2.9547, "step": 57039 }, { "epoch": 2.8, "grad_norm": 0.7655256390571594, "learning_rate": 6.860031681431954e-06, "loss": 2.9997, "step": 57040 }, { "epoch": 2.8, "grad_norm": 0.7627614736557007, "learning_rate": 6.8567584066980265e-06, "loss": 2.9494, "step": 57041 }, { "epoch": 2.8, "grad_norm": 0.7453312873840332, "learning_rate": 6.853485904046962e-06, "loss": 3.21, "step": 57042 }, { "epoch": 2.8, "grad_norm": 0.7098878622055054, "learning_rate": 6.850214173487389e-06, "loss": 2.8033, "step": 57043 }, { "epoch": 2.8, "grad_norm": 0.79835045337677, "learning_rate": 6.846943215027934e-06, "loss": 2.8388, "step": 57044 }, { "epoch": 2.8, "grad_norm": 0.7601944804191589, "learning_rate": 6.843673028677188e-06, "loss": 3.1851, "step": 57045 }, { "epoch": 2.8, "grad_norm": 0.826819896697998, "learning_rate": 6.840403614443846e-06, "loss": 2.7549, "step": 57046 }, { "epoch": 2.8, "grad_norm": 0.7625347971916199, "learning_rate": 6.8371349723364e-06, "loss": 2.9151, "step": 57047 }, { "epoch": 2.8, "grad_norm": 0.7577785849571228, "learning_rate": 6.833867102363544e-06, "loss": 2.7609, "step": 57048 }, { "epoch": 2.8, "grad_norm": 0.7831384539604187, "learning_rate": 6.830600004533903e-06, "loss": 2.9857, "step": 57049 }, { "epoch": 2.8, "grad_norm": 0.7404026985168457, "learning_rate": 6.827333678855973e-06, "loss": 2.9262, "step": 57050 }, { "epoch": 2.8, "grad_norm": 0.7409828305244446, "learning_rate": 6.824068125338478e-06, "loss": 2.8237, "step": 57051 }, { "epoch": 2.8, "grad_norm": 0.7415507435798645, "learning_rate": 6.820803343989911e-06, "loss": 2.8341, "step": 57052 }, { "epoch": 2.8, "grad_norm": 0.7397423982620239, "learning_rate": 6.817539334818967e-06, "loss": 3.0505, "step": 57053 }, { "epoch": 2.8, "grad_norm": 0.7370979189872742, "learning_rate": 6.81427609783417e-06, "loss": 2.8316, "step": 57054 }, { "epoch": 2.8, "grad_norm": 0.7267847657203674, "learning_rate": 6.8110136330441485e-06, "loss": 2.8226, "step": 57055 }, { "epoch": 2.8, "grad_norm": 0.7403401136398315, "learning_rate": 6.807751940457529e-06, "loss": 3.0404, "step": 57056 }, { "epoch": 2.8, "grad_norm": 0.770000696182251, "learning_rate": 6.804491020082836e-06, "loss": 3.0793, "step": 57057 }, { "epoch": 2.8, "grad_norm": 0.7584140300750732, "learning_rate": 6.801230871928698e-06, "loss": 2.8472, "step": 57058 }, { "epoch": 2.8, "grad_norm": 0.7541932463645935, "learning_rate": 6.7979714960036735e-06, "loss": 2.9838, "step": 57059 }, { "epoch": 2.8, "grad_norm": 0.749034583568573, "learning_rate": 6.794712892316356e-06, "loss": 2.8473, "step": 57060 }, { "epoch": 2.8, "grad_norm": 0.7517021298408508, "learning_rate": 6.791455060875372e-06, "loss": 2.8163, "step": 57061 }, { "epoch": 2.8, "grad_norm": 0.746054470539093, "learning_rate": 6.788198001689249e-06, "loss": 3.0738, "step": 57062 }, { "epoch": 2.8, "grad_norm": 0.7522591352462769, "learning_rate": 6.784941714766579e-06, "loss": 2.8944, "step": 57063 }, { "epoch": 2.8, "grad_norm": 0.7277589440345764, "learning_rate": 6.781686200115987e-06, "loss": 2.8983, "step": 57064 }, { "epoch": 2.8, "grad_norm": 0.7314507961273193, "learning_rate": 6.7784314577459364e-06, "loss": 2.8121, "step": 57065 }, { "epoch": 2.8, "grad_norm": 0.7789071202278137, "learning_rate": 6.7751774876650845e-06, "loss": 3.0653, "step": 57066 }, { "epoch": 2.8, "grad_norm": 0.8103259205818176, "learning_rate": 6.7719242898819915e-06, "loss": 2.6541, "step": 57067 }, { "epoch": 2.8, "grad_norm": 0.7770789265632629, "learning_rate": 6.768671864405218e-06, "loss": 3.0381, "step": 57068 }, { "epoch": 2.8, "grad_norm": 0.7559372186660767, "learning_rate": 6.765420211243322e-06, "loss": 2.9393, "step": 57069 }, { "epoch": 2.8, "grad_norm": 0.722319483757019, "learning_rate": 6.7621693304048984e-06, "loss": 3.1307, "step": 57070 }, { "epoch": 2.8, "grad_norm": 0.7697762846946716, "learning_rate": 6.758919221898473e-06, "loss": 2.8636, "step": 57071 }, { "epoch": 2.8, "grad_norm": 0.7274612188339233, "learning_rate": 6.755669885732607e-06, "loss": 2.9749, "step": 57072 }, { "epoch": 2.8, "grad_norm": 0.7847182750701904, "learning_rate": 6.7524213219158905e-06, "loss": 2.8848, "step": 57073 }, { "epoch": 2.8, "grad_norm": 0.7678075432777405, "learning_rate": 6.749173530456819e-06, "loss": 2.7446, "step": 57074 }, { "epoch": 2.8, "grad_norm": 0.8545045852661133, "learning_rate": 6.745926511363986e-06, "loss": 2.9995, "step": 57075 }, { "epoch": 2.8, "grad_norm": 0.7134187817573547, "learning_rate": 6.742680264645983e-06, "loss": 2.8353, "step": 57076 }, { "epoch": 2.8, "grad_norm": 0.8113738298416138, "learning_rate": 6.7394347903113045e-06, "loss": 3.1161, "step": 57077 }, { "epoch": 2.8, "grad_norm": 0.7588936686515808, "learning_rate": 6.736190088368543e-06, "loss": 2.9629, "step": 57078 }, { "epoch": 2.8, "grad_norm": 0.7868394255638123, "learning_rate": 6.732946158826224e-06, "loss": 2.9781, "step": 57079 }, { "epoch": 2.8, "grad_norm": 0.7183898091316223, "learning_rate": 6.72970300169281e-06, "loss": 2.8361, "step": 57080 }, { "epoch": 2.8, "grad_norm": 0.7224221229553223, "learning_rate": 6.726460616976992e-06, "loss": 2.7046, "step": 57081 }, { "epoch": 2.8, "grad_norm": 0.724663257598877, "learning_rate": 6.723219004687197e-06, "loss": 2.8677, "step": 57082 }, { "epoch": 2.8, "grad_norm": 0.7192972302436829, "learning_rate": 6.7199781648320185e-06, "loss": 2.7882, "step": 57083 }, { "epoch": 2.8, "grad_norm": 0.7668144106864929, "learning_rate": 6.716738097419949e-06, "loss": 2.9125, "step": 57084 }, { "epoch": 2.8, "grad_norm": 0.7557151317596436, "learning_rate": 6.713498802459583e-06, "loss": 2.6406, "step": 57085 }, { "epoch": 2.8, "grad_norm": 0.7683967351913452, "learning_rate": 6.710260279959412e-06, "loss": 2.9452, "step": 57086 }, { "epoch": 2.8, "grad_norm": 0.7353421449661255, "learning_rate": 6.707022529927964e-06, "loss": 2.8917, "step": 57087 }, { "epoch": 2.8, "grad_norm": 0.7499251961708069, "learning_rate": 6.703785552373764e-06, "loss": 2.8837, "step": 57088 }, { "epoch": 2.8, "grad_norm": 0.7639588713645935, "learning_rate": 6.700549347305339e-06, "loss": 2.915, "step": 57089 }, { "epoch": 2.8, "grad_norm": 0.776925265789032, "learning_rate": 6.697313914731217e-06, "loss": 2.9083, "step": 57090 }, { "epoch": 2.8, "grad_norm": 0.755670428276062, "learning_rate": 6.694079254659957e-06, "loss": 2.8506, "step": 57091 }, { "epoch": 2.8, "grad_norm": 0.789338231086731, "learning_rate": 6.690845367100017e-06, "loss": 2.8634, "step": 57092 }, { "epoch": 2.8, "grad_norm": 0.7923612594604492, "learning_rate": 6.68761225205996e-06, "loss": 2.8226, "step": 57093 }, { "epoch": 2.8, "grad_norm": 0.731760561466217, "learning_rate": 6.6843799095482766e-06, "loss": 2.9201, "step": 57094 }, { "epoch": 2.8, "grad_norm": 0.7637943029403687, "learning_rate": 6.681148339573461e-06, "loss": 2.7399, "step": 57095 }, { "epoch": 2.8, "grad_norm": 0.759672224521637, "learning_rate": 6.677917542144107e-06, "loss": 3.0211, "step": 57096 }, { "epoch": 2.8, "grad_norm": 0.7676330208778381, "learning_rate": 6.674687517268606e-06, "loss": 2.6788, "step": 57097 }, { "epoch": 2.8, "grad_norm": 0.7366164326667786, "learning_rate": 6.671458264955554e-06, "loss": 2.8141, "step": 57098 }, { "epoch": 2.8, "grad_norm": 0.7516197562217712, "learning_rate": 6.668229785213441e-06, "loss": 3.1885, "step": 57099 }, { "epoch": 2.8, "grad_norm": 0.7431179285049438, "learning_rate": 6.6650020780507295e-06, "loss": 2.7607, "step": 57100 }, { "epoch": 2.8, "grad_norm": 0.7496405243873596, "learning_rate": 6.661775143475978e-06, "loss": 3.0291, "step": 57101 }, { "epoch": 2.8, "grad_norm": 0.7139589786529541, "learning_rate": 6.658548981497647e-06, "loss": 2.7426, "step": 57102 }, { "epoch": 2.8, "grad_norm": 0.7403964400291443, "learning_rate": 6.655323592124262e-06, "loss": 2.9492, "step": 57103 }, { "epoch": 2.8, "grad_norm": 0.7503599524497986, "learning_rate": 6.652098975364251e-06, "loss": 2.7976, "step": 57104 }, { "epoch": 2.8, "grad_norm": 0.7594204545021057, "learning_rate": 6.648875131226173e-06, "loss": 2.8069, "step": 57105 }, { "epoch": 2.8, "grad_norm": 0.7432661652565002, "learning_rate": 6.645652059718521e-06, "loss": 2.7111, "step": 57106 }, { "epoch": 2.8, "grad_norm": 0.7444175481796265, "learning_rate": 6.642429760849754e-06, "loss": 2.7686, "step": 57107 }, { "epoch": 2.8, "grad_norm": 0.7241867780685425, "learning_rate": 6.639208234628402e-06, "loss": 2.8685, "step": 57108 }, { "epoch": 2.8, "grad_norm": 0.8182801008224487, "learning_rate": 6.635987481062854e-06, "loss": 2.8802, "step": 57109 }, { "epoch": 2.8, "grad_norm": 0.7803454995155334, "learning_rate": 6.632767500161673e-06, "loss": 2.9425, "step": 57110 }, { "epoch": 2.8, "grad_norm": 0.7847126126289368, "learning_rate": 6.629548291933351e-06, "loss": 2.8971, "step": 57111 }, { "epoch": 2.8, "grad_norm": 0.7423437833786011, "learning_rate": 6.626329856386314e-06, "loss": 2.8664, "step": 57112 }, { "epoch": 2.8, "grad_norm": 0.7208489775657654, "learning_rate": 6.62311219352909e-06, "loss": 2.8964, "step": 57113 }, { "epoch": 2.8, "grad_norm": 0.7785851955413818, "learning_rate": 6.619895303370071e-06, "loss": 2.9177, "step": 57114 }, { "epoch": 2.8, "grad_norm": 0.7598791122436523, "learning_rate": 6.61667918591785e-06, "loss": 2.933, "step": 57115 }, { "epoch": 2.8, "grad_norm": 0.7468469142913818, "learning_rate": 6.613463841180822e-06, "loss": 3.0654, "step": 57116 }, { "epoch": 2.8, "grad_norm": 0.7566969394683838, "learning_rate": 6.610249269167445e-06, "loss": 2.978, "step": 57117 }, { "epoch": 2.8, "grad_norm": 0.7173053622245789, "learning_rate": 6.607035469886213e-06, "loss": 2.6706, "step": 57118 }, { "epoch": 2.8, "grad_norm": 0.7708240747451782, "learning_rate": 6.603822443345586e-06, "loss": 2.9867, "step": 57119 }, { "epoch": 2.8, "grad_norm": 0.7301391959190369, "learning_rate": 6.60061018955399e-06, "loss": 2.9992, "step": 57120 }, { "epoch": 2.8, "grad_norm": 0.7776297330856323, "learning_rate": 6.597398708519986e-06, "loss": 2.88, "step": 57121 }, { "epoch": 2.8, "grad_norm": 0.7679793834686279, "learning_rate": 6.594188000251932e-06, "loss": 2.8798, "step": 57122 }, { "epoch": 2.8, "grad_norm": 0.740397036075592, "learning_rate": 6.590978064758356e-06, "loss": 2.9748, "step": 57123 }, { "epoch": 2.8, "grad_norm": 0.7653552293777466, "learning_rate": 6.587768902047618e-06, "loss": 2.9602, "step": 57124 }, { "epoch": 2.8, "grad_norm": 0.70607590675354, "learning_rate": 6.584560512128278e-06, "loss": 3.0756, "step": 57125 }, { "epoch": 2.8, "grad_norm": 0.7225512266159058, "learning_rate": 6.581352895008696e-06, "loss": 2.8843, "step": 57126 }, { "epoch": 2.8, "grad_norm": 0.7174662351608276, "learning_rate": 6.578146050697364e-06, "loss": 2.8845, "step": 57127 }, { "epoch": 2.8, "grad_norm": 0.7353945374488831, "learning_rate": 6.574939979202776e-06, "loss": 2.7851, "step": 57128 }, { "epoch": 2.8, "grad_norm": 0.7632526159286499, "learning_rate": 6.571734680533292e-06, "loss": 2.8001, "step": 57129 }, { "epoch": 2.8, "grad_norm": 0.7371880412101746, "learning_rate": 6.568530154697404e-06, "loss": 2.8652, "step": 57130 }, { "epoch": 2.8, "grad_norm": 0.7216587066650391, "learning_rate": 6.5653264017035075e-06, "loss": 2.9515, "step": 57131 }, { "epoch": 2.8, "grad_norm": 0.7436114549636841, "learning_rate": 6.562123421560062e-06, "loss": 2.7013, "step": 57132 }, { "epoch": 2.8, "grad_norm": 0.7178574800491333, "learning_rate": 6.5589212142755255e-06, "loss": 2.8224, "step": 57133 }, { "epoch": 2.8, "grad_norm": 0.723756730556488, "learning_rate": 6.555719779858293e-06, "loss": 2.9817, "step": 57134 }, { "epoch": 2.8, "grad_norm": 0.7632118463516235, "learning_rate": 6.552519118316857e-06, "loss": 2.7139, "step": 57135 }, { "epoch": 2.8, "grad_norm": 0.7216606140136719, "learning_rate": 6.549319229659611e-06, "loss": 2.9235, "step": 57136 }, { "epoch": 2.8, "grad_norm": 0.7410150766372681, "learning_rate": 6.546120113894981e-06, "loss": 2.6225, "step": 57137 }, { "epoch": 2.8, "grad_norm": 0.7452429533004761, "learning_rate": 6.542921771031395e-06, "loss": 2.729, "step": 57138 }, { "epoch": 2.8, "grad_norm": 0.7587102651596069, "learning_rate": 6.5397242010772454e-06, "loss": 2.7456, "step": 57139 }, { "epoch": 2.8, "grad_norm": 0.7959299087524414, "learning_rate": 6.536527404041025e-06, "loss": 3.0694, "step": 57140 }, { "epoch": 2.8, "grad_norm": 0.7172296643257141, "learning_rate": 6.533331379931061e-06, "loss": 2.7504, "step": 57141 }, { "epoch": 2.8, "grad_norm": 0.773472785949707, "learning_rate": 6.5301361287558475e-06, "loss": 2.8911, "step": 57142 }, { "epoch": 2.8, "grad_norm": 0.7361921072006226, "learning_rate": 6.526941650523776e-06, "loss": 3.1896, "step": 57143 }, { "epoch": 2.8, "grad_norm": 0.7535603046417236, "learning_rate": 6.523747945243307e-06, "loss": 2.7142, "step": 57144 }, { "epoch": 2.8, "grad_norm": 0.7661078572273254, "learning_rate": 6.520555012922768e-06, "loss": 2.8727, "step": 57145 }, { "epoch": 2.8, "grad_norm": 0.8038497567176819, "learning_rate": 6.5173628535705846e-06, "loss": 2.9623, "step": 57146 }, { "epoch": 2.8, "grad_norm": 0.7339990139007568, "learning_rate": 6.514171467195184e-06, "loss": 2.9381, "step": 57147 }, { "epoch": 2.8, "grad_norm": 0.7659273743629456, "learning_rate": 6.510980853805026e-06, "loss": 2.6611, "step": 57148 }, { "epoch": 2.8, "grad_norm": 0.7171333432197571, "learning_rate": 6.507791013408403e-06, "loss": 2.8451, "step": 57149 }, { "epoch": 2.8, "grad_norm": 0.7424218058586121, "learning_rate": 6.50460194601381e-06, "loss": 2.8667, "step": 57150 }, { "epoch": 2.8, "grad_norm": 0.7660609483718872, "learning_rate": 6.501413651629605e-06, "loss": 2.9709, "step": 57151 }, { "epoch": 2.8, "grad_norm": 0.7458734512329102, "learning_rate": 6.498226130264183e-06, "loss": 2.931, "step": 57152 }, { "epoch": 2.8, "grad_norm": 0.7954019904136658, "learning_rate": 6.495039381925971e-06, "loss": 2.6553, "step": 57153 }, { "epoch": 2.8, "grad_norm": 0.760521650314331, "learning_rate": 6.491853406623293e-06, "loss": 2.7921, "step": 57154 }, { "epoch": 2.8, "grad_norm": 0.7642186284065247, "learning_rate": 6.488668204364644e-06, "loss": 2.8834, "step": 57155 }, { "epoch": 2.8, "grad_norm": 0.7433639764785767, "learning_rate": 6.485483775158317e-06, "loss": 3.1153, "step": 57156 }, { "epoch": 2.8, "grad_norm": 0.7498500943183899, "learning_rate": 6.482300119012706e-06, "loss": 2.9402, "step": 57157 }, { "epoch": 2.8, "grad_norm": 0.7769145965576172, "learning_rate": 6.479117235936304e-06, "loss": 2.9213, "step": 57158 }, { "epoch": 2.8, "grad_norm": 0.7752602100372314, "learning_rate": 6.475935125937404e-06, "loss": 3.0248, "step": 57159 }, { "epoch": 2.8, "grad_norm": 0.7823769450187683, "learning_rate": 6.472753789024365e-06, "loss": 3.0033, "step": 57160 }, { "epoch": 2.8, "grad_norm": 0.7235212922096252, "learning_rate": 6.469573225205615e-06, "loss": 2.8306, "step": 57161 }, { "epoch": 2.8, "grad_norm": 0.7436525225639343, "learning_rate": 6.466393434489514e-06, "loss": 2.8062, "step": 57162 }, { "epoch": 2.8, "grad_norm": 0.716906726360321, "learning_rate": 6.463214416884455e-06, "loss": 2.84, "step": 57163 }, { "epoch": 2.8, "grad_norm": 0.7609844207763672, "learning_rate": 6.460036172398764e-06, "loss": 2.8809, "step": 57164 }, { "epoch": 2.8, "grad_norm": 0.7516579031944275, "learning_rate": 6.4568587010409015e-06, "loss": 2.942, "step": 57165 }, { "epoch": 2.8, "grad_norm": 0.7477497458457947, "learning_rate": 6.453682002819161e-06, "loss": 2.815, "step": 57166 }, { "epoch": 2.8, "grad_norm": 0.7754057049751282, "learning_rate": 6.450506077741901e-06, "loss": 2.8088, "step": 57167 }, { "epoch": 2.8, "grad_norm": 0.7623452544212341, "learning_rate": 6.447330925817518e-06, "loss": 2.8484, "step": 57168 }, { "epoch": 2.8, "grad_norm": 0.7394402623176575, "learning_rate": 6.444156547054369e-06, "loss": 2.9405, "step": 57169 }, { "epoch": 2.8, "grad_norm": 0.779207706451416, "learning_rate": 6.440982941460848e-06, "loss": 3.1458, "step": 57170 }, { "epoch": 2.8, "grad_norm": 0.7456086874008179, "learning_rate": 6.437810109045249e-06, "loss": 3.0439, "step": 57171 }, { "epoch": 2.8, "grad_norm": 0.7450244426727295, "learning_rate": 6.434638049815966e-06, "loss": 2.8157, "step": 57172 }, { "epoch": 2.8, "grad_norm": 0.7540341019630432, "learning_rate": 6.4314667637813565e-06, "loss": 3.0942, "step": 57173 }, { "epoch": 2.8, "grad_norm": 0.7208600044250488, "learning_rate": 6.428296250949749e-06, "loss": 2.9982, "step": 57174 }, { "epoch": 2.8, "grad_norm": 0.7499627470970154, "learning_rate": 6.425126511329537e-06, "loss": 2.8532, "step": 57175 }, { "epoch": 2.8, "grad_norm": 0.7473828196525574, "learning_rate": 6.421957544929013e-06, "loss": 2.9155, "step": 57176 }, { "epoch": 2.8, "grad_norm": 0.7623142004013062, "learning_rate": 6.41878935175657e-06, "loss": 2.984, "step": 57177 }, { "epoch": 2.8, "grad_norm": 0.7398785352706909, "learning_rate": 6.415621931820536e-06, "loss": 2.7673, "step": 57178 }, { "epoch": 2.8, "grad_norm": 0.7367839813232422, "learning_rate": 6.412455285129236e-06, "loss": 3.085, "step": 57179 }, { "epoch": 2.8, "grad_norm": 0.7085956335067749, "learning_rate": 6.409289411691065e-06, "loss": 2.8524, "step": 57180 }, { "epoch": 2.8, "grad_norm": 0.741699755191803, "learning_rate": 6.406124311514316e-06, "loss": 2.7711, "step": 57181 }, { "epoch": 2.8, "grad_norm": 0.7420092225074768, "learning_rate": 6.402959984607281e-06, "loss": 3.0438, "step": 57182 }, { "epoch": 2.8, "grad_norm": 0.7586853504180908, "learning_rate": 6.3997964309783875e-06, "loss": 2.9436, "step": 57183 }, { "epoch": 2.8, "grad_norm": 0.7290069460868835, "learning_rate": 6.396633650635929e-06, "loss": 2.8053, "step": 57184 }, { "epoch": 2.8, "grad_norm": 0.7516114115715027, "learning_rate": 6.393471643588233e-06, "loss": 2.7453, "step": 57185 }, { "epoch": 2.8, "grad_norm": 0.7613343596458435, "learning_rate": 6.390310409843591e-06, "loss": 2.723, "step": 57186 }, { "epoch": 2.8, "grad_norm": 0.6871271729469299, "learning_rate": 6.387149949410398e-06, "loss": 2.9468, "step": 57187 }, { "epoch": 2.8, "grad_norm": 0.7292281985282898, "learning_rate": 6.383990262296945e-06, "loss": 3.0544, "step": 57188 }, { "epoch": 2.8, "grad_norm": 0.7564751505851746, "learning_rate": 6.380831348511528e-06, "loss": 2.873, "step": 57189 }, { "epoch": 2.8, "grad_norm": 0.7569683194160461, "learning_rate": 6.377673208062539e-06, "loss": 2.8391, "step": 57190 }, { "epoch": 2.8, "grad_norm": 0.7515416741371155, "learning_rate": 6.374515840958206e-06, "loss": 2.9058, "step": 57191 }, { "epoch": 2.8, "grad_norm": 0.7558345794677734, "learning_rate": 6.371359247206887e-06, "loss": 2.8219, "step": 57192 }, { "epoch": 2.8, "grad_norm": 0.7959738969802856, "learning_rate": 6.368203426816909e-06, "loss": 2.8152, "step": 57193 }, { "epoch": 2.8, "grad_norm": 0.7814188599586487, "learning_rate": 6.365048379796567e-06, "loss": 3.0779, "step": 57194 }, { "epoch": 2.8, "grad_norm": 0.7512837648391724, "learning_rate": 6.361894106154253e-06, "loss": 2.8264, "step": 57195 }, { "epoch": 2.8, "grad_norm": 0.7334195375442505, "learning_rate": 6.358740605898094e-06, "loss": 2.921, "step": 57196 }, { "epoch": 2.8, "grad_norm": 0.7542886137962341, "learning_rate": 6.35558787903655e-06, "loss": 2.8447, "step": 57197 }, { "epoch": 2.8, "grad_norm": 0.7210304737091064, "learning_rate": 6.352435925577848e-06, "loss": 2.9945, "step": 57198 }, { "epoch": 2.8, "grad_norm": 0.767892062664032, "learning_rate": 6.349284745530347e-06, "loss": 2.9185, "step": 57199 }, { "epoch": 2.8, "grad_norm": 0.7845554351806641, "learning_rate": 6.3461343389023086e-06, "loss": 3.0824, "step": 57200 }, { "epoch": 2.8, "grad_norm": 0.7469670176506042, "learning_rate": 6.342984705701992e-06, "loss": 2.7764, "step": 57201 }, { "epoch": 2.8, "grad_norm": 0.7826364636421204, "learning_rate": 6.339835845937791e-06, "loss": 2.9049, "step": 57202 }, { "epoch": 2.8, "grad_norm": 0.7540563344955444, "learning_rate": 6.336687759617965e-06, "loss": 2.9289, "step": 57203 }, { "epoch": 2.8, "grad_norm": 0.7304716110229492, "learning_rate": 6.33354044675074e-06, "loss": 2.9662, "step": 57204 }, { "epoch": 2.8, "grad_norm": 0.7389193773269653, "learning_rate": 6.330393907344478e-06, "loss": 2.9829, "step": 57205 }, { "epoch": 2.8, "grad_norm": 0.8157293200492859, "learning_rate": 6.3272481414074036e-06, "loss": 3.1955, "step": 57206 }, { "epoch": 2.8, "grad_norm": 0.775015652179718, "learning_rate": 6.324103148947879e-06, "loss": 2.8361, "step": 57207 }, { "epoch": 2.8, "grad_norm": 0.7417725920677185, "learning_rate": 6.320958929974096e-06, "loss": 3.1331, "step": 57208 }, { "epoch": 2.8, "grad_norm": 0.7489861845970154, "learning_rate": 6.3178154844944484e-06, "loss": 2.947, "step": 57209 }, { "epoch": 2.8, "grad_norm": 0.7145342826843262, "learning_rate": 6.31467281251713e-06, "loss": 3.0041, "step": 57210 }, { "epoch": 2.8, "grad_norm": 0.7301230430603027, "learning_rate": 6.311530914050433e-06, "loss": 2.8497, "step": 57211 }, { "epoch": 2.8, "grad_norm": 0.8062174916267395, "learning_rate": 6.3083897891026855e-06, "loss": 2.843, "step": 57212 }, { "epoch": 2.8, "grad_norm": 0.7669421434402466, "learning_rate": 6.305249437682047e-06, "loss": 2.9091, "step": 57213 }, { "epoch": 2.8, "grad_norm": 0.7261912822723389, "learning_rate": 6.302109859796878e-06, "loss": 2.7274, "step": 57214 }, { "epoch": 2.8, "grad_norm": 0.7557966709136963, "learning_rate": 6.298971055455471e-06, "loss": 2.7214, "step": 57215 }, { "epoch": 2.8, "grad_norm": 0.776716947555542, "learning_rate": 6.29583302466602e-06, "loss": 3.1463, "step": 57216 }, { "epoch": 2.8, "grad_norm": 0.7491167783737183, "learning_rate": 6.292695767436817e-06, "loss": 2.8682, "step": 57217 }, { "epoch": 2.8, "grad_norm": 0.7415738701820374, "learning_rate": 6.289559283776158e-06, "loss": 2.9238, "step": 57218 }, { "epoch": 2.8, "grad_norm": 0.731604278087616, "learning_rate": 6.286423573692234e-06, "loss": 2.946, "step": 57219 }, { "epoch": 2.8, "grad_norm": 0.7397477626800537, "learning_rate": 6.283288637193373e-06, "loss": 2.7806, "step": 57220 }, { "epoch": 2.8, "grad_norm": 0.761749804019928, "learning_rate": 6.280154474287769e-06, "loss": 2.791, "step": 57221 }, { "epoch": 2.8, "grad_norm": 0.7503309845924377, "learning_rate": 6.277021084983747e-06, "loss": 2.8343, "step": 57222 }, { "epoch": 2.8, "grad_norm": 0.7354109883308411, "learning_rate": 6.273888469289501e-06, "loss": 2.7524, "step": 57223 }, { "epoch": 2.8, "grad_norm": 0.7324250936508179, "learning_rate": 6.270756627213325e-06, "loss": 2.9752, "step": 57224 }, { "epoch": 2.8, "grad_norm": 0.7345625162124634, "learning_rate": 6.267625558763445e-06, "loss": 2.7768, "step": 57225 }, { "epoch": 2.8, "grad_norm": 0.7356343269348145, "learning_rate": 6.264495263948088e-06, "loss": 2.7994, "step": 57226 }, { "epoch": 2.8, "grad_norm": 0.72329181432724, "learning_rate": 6.261365742775548e-06, "loss": 2.7858, "step": 57227 }, { "epoch": 2.8, "grad_norm": 0.7713742852210999, "learning_rate": 6.258236995254018e-06, "loss": 2.6734, "step": 57228 }, { "epoch": 2.8, "grad_norm": 0.7680825591087341, "learning_rate": 6.255109021391724e-06, "loss": 3.1817, "step": 57229 }, { "epoch": 2.8, "grad_norm": 0.7381764650344849, "learning_rate": 6.251981821196994e-06, "loss": 2.9703, "step": 57230 }, { "epoch": 2.8, "grad_norm": 0.7881075739860535, "learning_rate": 6.24885539467802e-06, "loss": 2.8687, "step": 57231 }, { "epoch": 2.8, "grad_norm": 0.7882162928581238, "learning_rate": 6.245729741843031e-06, "loss": 2.9799, "step": 57232 }, { "epoch": 2.8, "grad_norm": 0.8076366782188416, "learning_rate": 6.242604862700218e-06, "loss": 2.7061, "step": 57233 }, { "epoch": 2.8, "grad_norm": 0.7945789694786072, "learning_rate": 6.239480757257842e-06, "loss": 2.9682, "step": 57234 }, { "epoch": 2.8, "grad_norm": 0.7510719895362854, "learning_rate": 6.236357425524164e-06, "loss": 3.0232, "step": 57235 }, { "epoch": 2.8, "grad_norm": 0.7661550045013428, "learning_rate": 6.233234867507375e-06, "loss": 2.9934, "step": 57236 }, { "epoch": 2.81, "grad_norm": 0.7666568160057068, "learning_rate": 6.2301130832157046e-06, "loss": 2.8113, "step": 57237 }, { "epoch": 2.81, "grad_norm": 0.7529629468917847, "learning_rate": 6.226992072657377e-06, "loss": 2.7529, "step": 57238 }, { "epoch": 2.81, "grad_norm": 0.7184775471687317, "learning_rate": 6.223871835840655e-06, "loss": 2.7139, "step": 57239 }, { "epoch": 2.81, "grad_norm": 0.7889968752861023, "learning_rate": 6.220752372773696e-06, "loss": 3.0028, "step": 57240 }, { "epoch": 2.81, "grad_norm": 0.7780910134315491, "learning_rate": 6.217633683464696e-06, "loss": 2.9095, "step": 57241 }, { "epoch": 2.81, "grad_norm": 0.7787454724311829, "learning_rate": 6.214515767921946e-06, "loss": 2.9341, "step": 57242 }, { "epoch": 2.81, "grad_norm": 0.7292056083679199, "learning_rate": 6.211398626153607e-06, "loss": 2.8772, "step": 57243 }, { "epoch": 2.81, "grad_norm": 0.7210398316383362, "learning_rate": 6.208282258167907e-06, "loss": 2.7536, "step": 57244 }, { "epoch": 2.81, "grad_norm": 0.7634195685386658, "learning_rate": 6.205166663973038e-06, "loss": 2.9861, "step": 57245 }, { "epoch": 2.81, "grad_norm": 0.7329500317573547, "learning_rate": 6.202051843577261e-06, "loss": 3.0233, "step": 57246 }, { "epoch": 2.81, "grad_norm": 0.7299970388412476, "learning_rate": 6.198937796988734e-06, "loss": 2.5595, "step": 57247 }, { "epoch": 2.81, "grad_norm": 0.7431080937385559, "learning_rate": 6.19582452421562e-06, "loss": 2.9949, "step": 57248 }, { "epoch": 2.81, "grad_norm": 0.7681297063827515, "learning_rate": 6.1927120252661444e-06, "loss": 3.1091, "step": 57249 }, { "epoch": 2.81, "grad_norm": 0.7236172556877136, "learning_rate": 6.189600300148567e-06, "loss": 2.9795, "step": 57250 }, { "epoch": 2.81, "grad_norm": 0.7395146489143372, "learning_rate": 6.186489348871016e-06, "loss": 3.0194, "step": 57251 }, { "epoch": 2.81, "grad_norm": 0.7280433773994446, "learning_rate": 6.183379171441716e-06, "loss": 2.8707, "step": 57252 }, { "epoch": 2.81, "grad_norm": 0.7451938390731812, "learning_rate": 6.180269767868862e-06, "loss": 2.9683, "step": 57253 }, { "epoch": 2.81, "grad_norm": 0.7515122890472412, "learning_rate": 6.177161138160614e-06, "loss": 2.8751, "step": 57254 }, { "epoch": 2.81, "grad_norm": 0.7310147881507874, "learning_rate": 6.174053282325198e-06, "loss": 2.7883, "step": 57255 }, { "epoch": 2.81, "grad_norm": 0.7602556943893433, "learning_rate": 6.170946200370741e-06, "loss": 3.0935, "step": 57256 }, { "epoch": 2.81, "grad_norm": 0.7617813348770142, "learning_rate": 6.1678398923055035e-06, "loss": 3.003, "step": 57257 }, { "epoch": 2.81, "grad_norm": 0.7423365116119385, "learning_rate": 6.164734358137579e-06, "loss": 2.8405, "step": 57258 }, { "epoch": 2.81, "grad_norm": 0.730692446231842, "learning_rate": 6.161629597875195e-06, "loss": 2.8546, "step": 57259 }, { "epoch": 2.81, "grad_norm": 0.7807807922363281, "learning_rate": 6.158525611526577e-06, "loss": 2.837, "step": 57260 }, { "epoch": 2.81, "grad_norm": 0.7491143345832825, "learning_rate": 6.155422399099852e-06, "loss": 3.056, "step": 57261 }, { "epoch": 2.81, "grad_norm": 0.7525192499160767, "learning_rate": 6.1523199606031806e-06, "loss": 2.7682, "step": 57262 }, { "epoch": 2.81, "grad_norm": 0.8082860708236694, "learning_rate": 6.149218296044722e-06, "loss": 2.7799, "step": 57263 }, { "epoch": 2.81, "grad_norm": 0.7616897821426392, "learning_rate": 6.146117405432638e-06, "loss": 2.6663, "step": 57264 }, { "epoch": 2.81, "grad_norm": 0.784067690372467, "learning_rate": 6.143017288775187e-06, "loss": 3.032, "step": 57265 }, { "epoch": 2.81, "grad_norm": 0.773425817489624, "learning_rate": 6.139917946080464e-06, "loss": 2.8674, "step": 57266 }, { "epoch": 2.81, "grad_norm": 0.7454070448875427, "learning_rate": 6.136819377356628e-06, "loss": 2.6727, "step": 57267 }, { "epoch": 2.81, "grad_norm": 0.7858648896217346, "learning_rate": 6.133721582611906e-06, "loss": 2.9446, "step": 57268 }, { "epoch": 2.81, "grad_norm": 0.7687196135520935, "learning_rate": 6.130624561854325e-06, "loss": 2.6409, "step": 57269 }, { "epoch": 2.81, "grad_norm": 0.7145859599113464, "learning_rate": 6.127528315092178e-06, "loss": 3.2014, "step": 57270 }, { "epoch": 2.81, "grad_norm": 0.7361599802970886, "learning_rate": 6.1244328423335265e-06, "loss": 3.015, "step": 57271 }, { "epoch": 2.81, "grad_norm": 0.7314938902854919, "learning_rate": 6.121338143586596e-06, "loss": 3.0502, "step": 57272 }, { "epoch": 2.81, "grad_norm": 0.7757865786552429, "learning_rate": 6.11824421885948e-06, "loss": 2.767, "step": 57273 }, { "epoch": 2.81, "grad_norm": 0.7310301661491394, "learning_rate": 6.115151068160373e-06, "loss": 2.7884, "step": 57274 }, { "epoch": 2.81, "grad_norm": 0.748052179813385, "learning_rate": 6.112058691497401e-06, "loss": 2.7668, "step": 57275 }, { "epoch": 2.81, "grad_norm": 0.7008647918701172, "learning_rate": 6.108967088878691e-06, "loss": 2.8638, "step": 57276 }, { "epoch": 2.81, "grad_norm": 0.7348527312278748, "learning_rate": 6.105876260312404e-06, "loss": 2.8012, "step": 57277 }, { "epoch": 2.81, "grad_norm": 0.7351037859916687, "learning_rate": 6.102786205806664e-06, "loss": 2.9923, "step": 57278 }, { "epoch": 2.81, "grad_norm": 0.7870645523071289, "learning_rate": 6.099696925369635e-06, "loss": 2.9804, "step": 57279 }, { "epoch": 2.81, "grad_norm": 0.7273508310317993, "learning_rate": 6.096608419009441e-06, "loss": 2.9093, "step": 57280 }, { "epoch": 2.81, "grad_norm": 0.7449030876159668, "learning_rate": 6.09352068673421e-06, "loss": 3.0118, "step": 57281 }, { "epoch": 2.81, "grad_norm": 0.7714568972587585, "learning_rate": 6.090433728552102e-06, "loss": 3.0912, "step": 57282 }, { "epoch": 2.81, "grad_norm": 0.7424775958061218, "learning_rate": 6.08734754447121e-06, "loss": 2.9796, "step": 57283 }, { "epoch": 2.81, "grad_norm": 0.7018720507621765, "learning_rate": 6.084262134499696e-06, "loss": 2.7236, "step": 57284 }, { "epoch": 2.81, "grad_norm": 0.8280545473098755, "learning_rate": 6.081177498645651e-06, "loss": 2.8014, "step": 57285 }, { "epoch": 2.81, "grad_norm": 0.7586017847061157, "learning_rate": 6.078093636917236e-06, "loss": 2.9029, "step": 57286 }, { "epoch": 2.81, "grad_norm": 0.7319121956825256, "learning_rate": 6.075010549322545e-06, "loss": 2.954, "step": 57287 }, { "epoch": 2.81, "grad_norm": 0.7442706823348999, "learning_rate": 6.071928235869705e-06, "loss": 2.993, "step": 57288 }, { "epoch": 2.81, "grad_norm": 0.7516229152679443, "learning_rate": 6.068846696566842e-06, "loss": 2.8055, "step": 57289 }, { "epoch": 2.81, "grad_norm": 0.7649211883544922, "learning_rate": 6.065765931422084e-06, "loss": 2.8407, "step": 57290 }, { "epoch": 2.81, "grad_norm": 0.7471636533737183, "learning_rate": 6.062685940443523e-06, "loss": 2.7969, "step": 57291 }, { "epoch": 2.81, "grad_norm": 0.7413542866706848, "learning_rate": 6.0596067236392874e-06, "loss": 2.9349, "step": 57292 }, { "epoch": 2.81, "grad_norm": 0.7625101804733276, "learning_rate": 6.056528281017436e-06, "loss": 3.0211, "step": 57293 }, { "epoch": 2.81, "grad_norm": 0.7629085779190063, "learning_rate": 6.0534506125861635e-06, "loss": 2.8523, "step": 57294 }, { "epoch": 2.81, "grad_norm": 0.7582331895828247, "learning_rate": 6.050373718353496e-06, "loss": 2.83, "step": 57295 }, { "epoch": 2.81, "grad_norm": 0.7564645409584045, "learning_rate": 6.047297598327561e-06, "loss": 2.7993, "step": 57296 }, { "epoch": 2.81, "grad_norm": 0.7481827735900879, "learning_rate": 6.044222252516551e-06, "loss": 2.9593, "step": 57297 }, { "epoch": 2.81, "grad_norm": 0.7227786183357239, "learning_rate": 6.041147680928427e-06, "loss": 3.0073, "step": 57298 }, { "epoch": 2.81, "grad_norm": 0.7628014087677002, "learning_rate": 6.038073883571348e-06, "loss": 3.0438, "step": 57299 }, { "epoch": 2.81, "grad_norm": 0.7520178556442261, "learning_rate": 6.035000860453409e-06, "loss": 2.8691, "step": 57300 }, { "epoch": 2.81, "grad_norm": 0.7767812013626099, "learning_rate": 6.031928611582704e-06, "loss": 2.8563, "step": 57301 }, { "epoch": 2.81, "grad_norm": 0.738016664981842, "learning_rate": 6.028857136967325e-06, "loss": 2.9511, "step": 57302 }, { "epoch": 2.81, "grad_norm": 0.7443080544471741, "learning_rate": 6.0257864366153655e-06, "loss": 2.7384, "step": 57303 }, { "epoch": 2.81, "grad_norm": 0.7690123915672302, "learning_rate": 6.02271651053492e-06, "loss": 2.7267, "step": 57304 }, { "epoch": 2.81, "grad_norm": 0.757183313369751, "learning_rate": 6.019647358734048e-06, "loss": 2.8482, "step": 57305 }, { "epoch": 2.81, "grad_norm": 0.7601922750473022, "learning_rate": 6.0165789812208435e-06, "loss": 2.9199, "step": 57306 }, { "epoch": 2.81, "grad_norm": 0.7741249799728394, "learning_rate": 6.0135113780034e-06, "loss": 3.0289, "step": 57307 }, { "epoch": 2.81, "grad_norm": 0.7413105368614197, "learning_rate": 6.010444549089777e-06, "loss": 2.9368, "step": 57308 }, { "epoch": 2.81, "grad_norm": 0.763995349407196, "learning_rate": 6.007378494488102e-06, "loss": 2.8091, "step": 57309 }, { "epoch": 2.81, "grad_norm": 0.742984414100647, "learning_rate": 6.004313214206369e-06, "loss": 2.9815, "step": 57310 }, { "epoch": 2.81, "grad_norm": 0.7137545347213745, "learning_rate": 6.001248708252704e-06, "loss": 2.8202, "step": 57311 }, { "epoch": 2.81, "grad_norm": 0.74273282289505, "learning_rate": 5.998184976635201e-06, "loss": 2.8075, "step": 57312 }, { "epoch": 2.81, "grad_norm": 0.7143562436103821, "learning_rate": 5.995122019361853e-06, "loss": 2.8815, "step": 57313 }, { "epoch": 2.81, "grad_norm": 0.713367760181427, "learning_rate": 5.992059836440788e-06, "loss": 2.8194, "step": 57314 }, { "epoch": 2.81, "grad_norm": 0.6914703845977783, "learning_rate": 5.988998427880032e-06, "loss": 2.8552, "step": 57315 }, { "epoch": 2.81, "grad_norm": 0.7266353368759155, "learning_rate": 5.985937793687679e-06, "loss": 2.7779, "step": 57316 }, { "epoch": 2.81, "grad_norm": 0.7659033536911011, "learning_rate": 5.982877933871822e-06, "loss": 3.0615, "step": 57317 }, { "epoch": 2.81, "grad_norm": 0.740502119064331, "learning_rate": 5.979818848440421e-06, "loss": 2.8445, "step": 57318 }, { "epoch": 2.81, "grad_norm": 0.7166577577590942, "learning_rate": 5.976760537401637e-06, "loss": 2.8114, "step": 57319 }, { "epoch": 2.81, "grad_norm": 0.7707402110099792, "learning_rate": 5.973703000763497e-06, "loss": 2.8491, "step": 57320 }, { "epoch": 2.81, "grad_norm": 0.7468615174293518, "learning_rate": 5.9706462385339936e-06, "loss": 2.8557, "step": 57321 }, { "epoch": 2.81, "grad_norm": 0.7503215670585632, "learning_rate": 5.967590250721222e-06, "loss": 2.9284, "step": 57322 }, { "epoch": 2.81, "grad_norm": 0.7697805166244507, "learning_rate": 5.96453503733324e-06, "loss": 2.8139, "step": 57323 }, { "epoch": 2.81, "grad_norm": 0.7666358351707458, "learning_rate": 5.96148059837811e-06, "loss": 3.2273, "step": 57324 }, { "epoch": 2.81, "grad_norm": 1.1106570959091187, "learning_rate": 5.958426933863791e-06, "loss": 2.9179, "step": 57325 }, { "epoch": 2.81, "grad_norm": 0.7379875183105469, "learning_rate": 5.955374043798444e-06, "loss": 2.8579, "step": 57326 }, { "epoch": 2.81, "grad_norm": 0.7157080173492432, "learning_rate": 5.952321928190029e-06, "loss": 2.6449, "step": 57327 }, { "epoch": 2.81, "grad_norm": 0.7337937355041504, "learning_rate": 5.949270587046606e-06, "loss": 2.6561, "step": 57328 }, { "epoch": 2.81, "grad_norm": 0.7767958641052246, "learning_rate": 5.946220020376235e-06, "loss": 2.7252, "step": 57329 }, { "epoch": 2.81, "grad_norm": 0.699264645576477, "learning_rate": 5.943170228186878e-06, "loss": 2.7687, "step": 57330 }, { "epoch": 2.81, "grad_norm": 0.769777238368988, "learning_rate": 5.940121210486659e-06, "loss": 3.0783, "step": 57331 }, { "epoch": 2.81, "grad_norm": 0.7534322738647461, "learning_rate": 5.93707296728354e-06, "loss": 2.7708, "step": 57332 }, { "epoch": 2.81, "grad_norm": 0.7632072567939758, "learning_rate": 5.934025498585615e-06, "loss": 2.759, "step": 57333 }, { "epoch": 2.81, "grad_norm": 0.7482007741928101, "learning_rate": 5.930978804400876e-06, "loss": 3.1279, "step": 57334 }, { "epoch": 2.81, "grad_norm": 0.7381764054298401, "learning_rate": 5.927932884737318e-06, "loss": 2.8895, "step": 57335 }, { "epoch": 2.81, "grad_norm": 0.7939985394477844, "learning_rate": 5.924887739602968e-06, "loss": 2.8252, "step": 57336 }, { "epoch": 2.81, "grad_norm": 0.7845645546913147, "learning_rate": 5.921843369005885e-06, "loss": 3.0684, "step": 57337 }, { "epoch": 2.81, "grad_norm": 0.7730729579925537, "learning_rate": 5.918799772954064e-06, "loss": 3.0756, "step": 57338 }, { "epoch": 2.81, "grad_norm": 0.7449852824211121, "learning_rate": 5.9157569514555635e-06, "loss": 3.0701, "step": 57339 }, { "epoch": 2.81, "grad_norm": 0.7435621023178101, "learning_rate": 5.912714904518312e-06, "loss": 2.9434, "step": 57340 }, { "epoch": 2.81, "grad_norm": 0.7603535652160645, "learning_rate": 5.909673632150436e-06, "loss": 3.0084, "step": 57341 }, { "epoch": 2.81, "grad_norm": 0.7573125958442688, "learning_rate": 5.9066331343598285e-06, "loss": 2.795, "step": 57342 }, { "epoch": 2.81, "grad_norm": 0.7497143745422363, "learning_rate": 5.90359341115455e-06, "loss": 2.8122, "step": 57343 }, { "epoch": 2.81, "grad_norm": 0.7319371104240417, "learning_rate": 5.900554462542662e-06, "loss": 2.9278, "step": 57344 }, { "epoch": 2.81, "grad_norm": 0.8178033828735352, "learning_rate": 5.897516288532056e-06, "loss": 2.8397, "step": 57345 }, { "epoch": 2.81, "grad_norm": 0.8565611243247986, "learning_rate": 5.894478889130794e-06, "loss": 2.8193, "step": 57346 }, { "epoch": 2.81, "grad_norm": 0.744626522064209, "learning_rate": 5.8914422643468685e-06, "loss": 2.9301, "step": 57347 }, { "epoch": 2.81, "grad_norm": 0.7861790060997009, "learning_rate": 5.88840641418834e-06, "loss": 2.8523, "step": 57348 }, { "epoch": 2.81, "grad_norm": 0.7643438577651978, "learning_rate": 5.885371338663103e-06, "loss": 2.8766, "step": 57349 }, { "epoch": 2.81, "grad_norm": 0.8093913197517395, "learning_rate": 5.882337037779183e-06, "loss": 3.0652, "step": 57350 }, { "epoch": 2.81, "grad_norm": 0.7430794835090637, "learning_rate": 5.879303511544576e-06, "loss": 2.8223, "step": 57351 }, { "epoch": 2.81, "grad_norm": 0.8071761131286621, "learning_rate": 5.876270759967305e-06, "loss": 2.9618, "step": 57352 }, { "epoch": 2.81, "grad_norm": 0.7300369143486023, "learning_rate": 5.873238783055334e-06, "loss": 2.9846, "step": 57353 }, { "epoch": 2.81, "grad_norm": 0.7400018572807312, "learning_rate": 5.870207580816655e-06, "loss": 2.6373, "step": 57354 }, { "epoch": 2.81, "grad_norm": 0.7521073818206787, "learning_rate": 5.8671771532592284e-06, "loss": 2.9761, "step": 57355 }, { "epoch": 2.81, "grad_norm": 0.7645672559738159, "learning_rate": 5.864147500391047e-06, "loss": 2.6966, "step": 57356 }, { "epoch": 2.81, "grad_norm": 0.7233006358146667, "learning_rate": 5.861118622220073e-06, "loss": 2.8905, "step": 57357 }, { "epoch": 2.81, "grad_norm": 0.7672364115715027, "learning_rate": 5.858090518754332e-06, "loss": 2.6379, "step": 57358 }, { "epoch": 2.81, "grad_norm": 0.7448053359985352, "learning_rate": 5.855063190001785e-06, "loss": 2.89, "step": 57359 }, { "epoch": 2.81, "grad_norm": 0.7966521978378296, "learning_rate": 5.8520366359703585e-06, "loss": 2.9453, "step": 57360 }, { "epoch": 2.81, "grad_norm": 0.851170003414154, "learning_rate": 5.849010856668079e-06, "loss": 2.8829, "step": 57361 }, { "epoch": 2.81, "grad_norm": 0.8057568073272705, "learning_rate": 5.845985852102874e-06, "loss": 2.6775, "step": 57362 }, { "epoch": 2.81, "grad_norm": 0.7434202432632446, "learning_rate": 5.842961622282738e-06, "loss": 2.8152, "step": 57363 }, { "epoch": 2.81, "grad_norm": 0.846804678440094, "learning_rate": 5.839938167215663e-06, "loss": 3.005, "step": 57364 }, { "epoch": 2.81, "grad_norm": 0.7839913368225098, "learning_rate": 5.836915486909544e-06, "loss": 2.8143, "step": 57365 }, { "epoch": 2.81, "grad_norm": 0.7269675731658936, "learning_rate": 5.833893581372373e-06, "loss": 2.9256, "step": 57366 }, { "epoch": 2.81, "grad_norm": 0.7645124793052673, "learning_rate": 5.830872450612112e-06, "loss": 2.9203, "step": 57367 }, { "epoch": 2.81, "grad_norm": 0.7519145011901855, "learning_rate": 5.82785209463672e-06, "loss": 2.7965, "step": 57368 }, { "epoch": 2.81, "grad_norm": 0.7483285665512085, "learning_rate": 5.824832513454192e-06, "loss": 2.703, "step": 57369 }, { "epoch": 2.81, "grad_norm": 0.767500638961792, "learning_rate": 5.82181370707242e-06, "loss": 2.6382, "step": 57370 }, { "epoch": 2.81, "grad_norm": 0.7527577877044678, "learning_rate": 5.8187956754993655e-06, "loss": 2.8592, "step": 57371 }, { "epoch": 2.81, "grad_norm": 0.7957414984703064, "learning_rate": 5.815778418742989e-06, "loss": 2.7785, "step": 57372 }, { "epoch": 2.81, "grad_norm": 0.7185649871826172, "learning_rate": 5.812761936811217e-06, "loss": 2.8294, "step": 57373 }, { "epoch": 2.81, "grad_norm": 0.7688578367233276, "learning_rate": 5.8097462297120425e-06, "loss": 2.8467, "step": 57374 }, { "epoch": 2.81, "grad_norm": 0.7571620941162109, "learning_rate": 5.806731297453326e-06, "loss": 2.8394, "step": 57375 }, { "epoch": 2.81, "grad_norm": 0.7824880480766296, "learning_rate": 5.803717140043129e-06, "loss": 2.8248, "step": 57376 }, { "epoch": 2.81, "grad_norm": 0.7680121064186096, "learning_rate": 5.800703757489311e-06, "loss": 3.0829, "step": 57377 }, { "epoch": 2.81, "grad_norm": 0.7361317276954651, "learning_rate": 5.7976911497997655e-06, "loss": 2.8745, "step": 57378 }, { "epoch": 2.81, "grad_norm": 0.7746545076370239, "learning_rate": 5.79467931698252e-06, "loss": 2.9367, "step": 57379 }, { "epoch": 2.81, "grad_norm": 0.7424500584602356, "learning_rate": 5.791668259045468e-06, "loss": 2.7415, "step": 57380 }, { "epoch": 2.81, "grad_norm": 0.9100118279457092, "learning_rate": 5.788657975996536e-06, "loss": 3.0618, "step": 57381 }, { "epoch": 2.81, "grad_norm": 0.768410325050354, "learning_rate": 5.785648467843651e-06, "loss": 2.8411, "step": 57382 }, { "epoch": 2.81, "grad_norm": 0.7742539644241333, "learning_rate": 5.782639734594741e-06, "loss": 2.8681, "step": 57383 }, { "epoch": 2.81, "grad_norm": 0.7879564762115479, "learning_rate": 5.779631776257765e-06, "loss": 2.8448, "step": 57384 }, { "epoch": 2.81, "grad_norm": 0.7167067527770996, "learning_rate": 5.776624592840584e-06, "loss": 2.8608, "step": 57385 }, { "epoch": 2.81, "grad_norm": 0.7402490377426147, "learning_rate": 5.773618184351192e-06, "loss": 3.1005, "step": 57386 }, { "epoch": 2.81, "grad_norm": 0.7294678092002869, "learning_rate": 5.770612550797416e-06, "loss": 2.8117, "step": 57387 }, { "epoch": 2.81, "grad_norm": 0.7386787533760071, "learning_rate": 5.767607692187215e-06, "loss": 2.7278, "step": 57388 }, { "epoch": 2.81, "grad_norm": 0.7458301782608032, "learning_rate": 5.7646036085285505e-06, "loss": 2.8675, "step": 57389 }, { "epoch": 2.81, "grad_norm": 0.7428733706474304, "learning_rate": 5.76160029982925e-06, "loss": 2.8125, "step": 57390 }, { "epoch": 2.81, "grad_norm": 0.7372967004776001, "learning_rate": 5.758597766097306e-06, "loss": 3.0284, "step": 57391 }, { "epoch": 2.81, "grad_norm": 0.7986836433410645, "learning_rate": 5.755596007340579e-06, "loss": 2.8643, "step": 57392 }, { "epoch": 2.81, "grad_norm": 0.7363787293434143, "learning_rate": 5.752595023566964e-06, "loss": 3.0305, "step": 57393 }, { "epoch": 2.81, "grad_norm": 0.7744364738464355, "learning_rate": 5.7495948147844194e-06, "loss": 2.8855, "step": 57394 }, { "epoch": 2.81, "grad_norm": 0.7604561448097229, "learning_rate": 5.746595381000774e-06, "loss": 2.8601, "step": 57395 }, { "epoch": 2.81, "grad_norm": 0.7391270399093628, "learning_rate": 5.74359672222402e-06, "loss": 2.9664, "step": 57396 }, { "epoch": 2.81, "grad_norm": 0.7320667505264282, "learning_rate": 5.740598838461951e-06, "loss": 3.0343, "step": 57397 }, { "epoch": 2.81, "grad_norm": 0.7917554378509521, "learning_rate": 5.73760172972253e-06, "loss": 2.9968, "step": 57398 }, { "epoch": 2.81, "grad_norm": 0.772669792175293, "learning_rate": 5.734605396013647e-06, "loss": 2.9576, "step": 57399 }, { "epoch": 2.81, "grad_norm": 0.7252935767173767, "learning_rate": 5.731609837343132e-06, "loss": 3.0033, "step": 57400 }, { "epoch": 2.81, "grad_norm": 0.7601112127304077, "learning_rate": 5.728615053718977e-06, "loss": 3.03, "step": 57401 }, { "epoch": 2.81, "grad_norm": 0.7780544757843018, "learning_rate": 5.725621045148976e-06, "loss": 2.9666, "step": 57402 }, { "epoch": 2.81, "grad_norm": 0.760009229183197, "learning_rate": 5.72262781164109e-06, "loss": 2.9728, "step": 57403 }, { "epoch": 2.81, "grad_norm": 0.7839109301567078, "learning_rate": 5.719635353203145e-06, "loss": 3.0657, "step": 57404 }, { "epoch": 2.81, "grad_norm": 0.7375196218490601, "learning_rate": 5.716643669843035e-06, "loss": 2.8473, "step": 57405 }, { "epoch": 2.81, "grad_norm": 0.7924519777297974, "learning_rate": 5.713652761568688e-06, "loss": 2.8607, "step": 57406 }, { "epoch": 2.81, "grad_norm": 0.7020806074142456, "learning_rate": 5.71066262838793e-06, "loss": 2.9978, "step": 57407 }, { "epoch": 2.81, "grad_norm": 0.744614839553833, "learning_rate": 5.707673270308655e-06, "loss": 3.1015, "step": 57408 }, { "epoch": 2.81, "grad_norm": 0.7562986612319946, "learning_rate": 5.704684687338723e-06, "loss": 2.7133, "step": 57409 }, { "epoch": 2.81, "grad_norm": 0.7199798226356506, "learning_rate": 5.7016968794859955e-06, "loss": 2.8839, "step": 57410 }, { "epoch": 2.81, "grad_norm": 0.7654950022697449, "learning_rate": 5.698709846758364e-06, "loss": 2.9377, "step": 57411 }, { "epoch": 2.81, "grad_norm": 0.737543523311615, "learning_rate": 5.695723589163692e-06, "loss": 2.7927, "step": 57412 }, { "epoch": 2.81, "grad_norm": 0.7589178681373596, "learning_rate": 5.692738106709871e-06, "loss": 2.9463, "step": 57413 }, { "epoch": 2.81, "grad_norm": 0.7573418021202087, "learning_rate": 5.689753399404729e-06, "loss": 2.8789, "step": 57414 }, { "epoch": 2.81, "grad_norm": 0.7789357900619507, "learning_rate": 5.686769467256125e-06, "loss": 2.8112, "step": 57415 }, { "epoch": 2.81, "grad_norm": 0.7354355454444885, "learning_rate": 5.683786310271921e-06, "loss": 2.6794, "step": 57416 }, { "epoch": 2.81, "grad_norm": 0.7149415016174316, "learning_rate": 5.680803928459976e-06, "loss": 2.7977, "step": 57417 }, { "epoch": 2.81, "grad_norm": 0.7695300579071045, "learning_rate": 5.677822321828152e-06, "loss": 2.9736, "step": 57418 }, { "epoch": 2.81, "grad_norm": 0.7486740350723267, "learning_rate": 5.674841490384341e-06, "loss": 2.9578, "step": 57419 }, { "epoch": 2.81, "grad_norm": 0.8076779842376709, "learning_rate": 5.671861434136304e-06, "loss": 2.9316, "step": 57420 }, { "epoch": 2.81, "grad_norm": 0.766984224319458, "learning_rate": 5.668882153091969e-06, "loss": 2.9472, "step": 57421 }, { "epoch": 2.81, "grad_norm": 0.7838339805603027, "learning_rate": 5.665903647259162e-06, "loss": 2.7034, "step": 57422 }, { "epoch": 2.81, "grad_norm": 0.764181911945343, "learning_rate": 5.662925916645677e-06, "loss": 2.7349, "step": 57423 }, { "epoch": 2.81, "grad_norm": 0.7412382960319519, "learning_rate": 5.659948961259442e-06, "loss": 3.072, "step": 57424 }, { "epoch": 2.81, "grad_norm": 0.7331834435462952, "learning_rate": 5.656972781108216e-06, "loss": 2.9661, "step": 57425 }, { "epoch": 2.81, "grad_norm": 0.7448732852935791, "learning_rate": 5.653997376199925e-06, "loss": 2.9843, "step": 57426 }, { "epoch": 2.81, "grad_norm": 0.739306628704071, "learning_rate": 5.6510227465423e-06, "loss": 2.8614, "step": 57427 }, { "epoch": 2.81, "grad_norm": 0.8040658235549927, "learning_rate": 5.6480488921432645e-06, "loss": 2.9276, "step": 57428 }, { "epoch": 2.81, "grad_norm": 0.7358850240707397, "learning_rate": 5.645075813010647e-06, "loss": 2.9018, "step": 57429 }, { "epoch": 2.81, "grad_norm": 0.7447613477706909, "learning_rate": 5.642103509152207e-06, "loss": 2.9166, "step": 57430 }, { "epoch": 2.81, "grad_norm": 0.8018415570259094, "learning_rate": 5.6391319805758396e-06, "loss": 2.8888, "step": 57431 }, { "epoch": 2.81, "grad_norm": 0.7493090033531189, "learning_rate": 5.6361612272893045e-06, "loss": 2.8736, "step": 57432 }, { "epoch": 2.81, "grad_norm": 0.770797073841095, "learning_rate": 5.633191249300529e-06, "loss": 2.8206, "step": 57433 }, { "epoch": 2.81, "grad_norm": 0.7289892435073853, "learning_rate": 5.630222046617205e-06, "loss": 2.8424, "step": 57434 }, { "epoch": 2.81, "grad_norm": 0.7826147079467773, "learning_rate": 5.627253619247296e-06, "loss": 2.9898, "step": 57435 }, { "epoch": 2.81, "grad_norm": 0.7384172081947327, "learning_rate": 5.624285967198528e-06, "loss": 2.7443, "step": 57436 }, { "epoch": 2.81, "grad_norm": 0.7605634927749634, "learning_rate": 5.621319090478693e-06, "loss": 2.8757, "step": 57437 }, { "epoch": 2.81, "grad_norm": 0.7587273120880127, "learning_rate": 5.6183529890957205e-06, "loss": 2.8125, "step": 57438 }, { "epoch": 2.81, "grad_norm": 0.6983641982078552, "learning_rate": 5.615387663057302e-06, "loss": 2.8265, "step": 57439 }, { "epoch": 2.81, "grad_norm": 0.7505119442939758, "learning_rate": 5.612423112371267e-06, "loss": 3.1734, "step": 57440 }, { "epoch": 2.82, "grad_norm": 0.7802063822746277, "learning_rate": 5.609459337045507e-06, "loss": 3.1378, "step": 57441 }, { "epoch": 2.82, "grad_norm": 0.7453617453575134, "learning_rate": 5.606496337087751e-06, "loss": 2.9801, "step": 57442 }, { "epoch": 2.82, "grad_norm": 0.7429621815681458, "learning_rate": 5.603534112505859e-06, "loss": 3.0071, "step": 57443 }, { "epoch": 2.82, "grad_norm": 0.7750768661499023, "learning_rate": 5.60057266330759e-06, "loss": 2.9811, "step": 57444 }, { "epoch": 2.82, "grad_norm": 0.767142117023468, "learning_rate": 5.5976119895007385e-06, "loss": 2.6831, "step": 57445 }, { "epoch": 2.82, "grad_norm": 0.8097765445709229, "learning_rate": 5.594652091093133e-06, "loss": 3.0158, "step": 57446 }, { "epoch": 2.82, "grad_norm": 0.735511064529419, "learning_rate": 5.591692968092532e-06, "loss": 2.8753, "step": 57447 }, { "epoch": 2.82, "grad_norm": 0.778668224811554, "learning_rate": 5.588734620506763e-06, "loss": 2.624, "step": 57448 }, { "epoch": 2.82, "grad_norm": 0.7237783670425415, "learning_rate": 5.585777048343587e-06, "loss": 2.9627, "step": 57449 }, { "epoch": 2.82, "grad_norm": 0.7732894420623779, "learning_rate": 5.582820251610864e-06, "loss": 2.9355, "step": 57450 }, { "epoch": 2.82, "grad_norm": 0.7555185556411743, "learning_rate": 5.5798642303163225e-06, "loss": 2.781, "step": 57451 }, { "epoch": 2.82, "grad_norm": 0.7874925136566162, "learning_rate": 5.576908984467721e-06, "loss": 2.9256, "step": 57452 }, { "epoch": 2.82, "grad_norm": 0.7143713235855103, "learning_rate": 5.573954514072887e-06, "loss": 2.8541, "step": 57453 }, { "epoch": 2.82, "grad_norm": 0.7951681613922119, "learning_rate": 5.571000819139582e-06, "loss": 2.9457, "step": 57454 }, { "epoch": 2.82, "grad_norm": 0.7409695982933044, "learning_rate": 5.568047899675599e-06, "loss": 3.0258, "step": 57455 }, { "epoch": 2.82, "grad_norm": 0.7536093592643738, "learning_rate": 5.565095755688731e-06, "loss": 2.9839, "step": 57456 }, { "epoch": 2.82, "grad_norm": 0.7845450639724731, "learning_rate": 5.56214438718674e-06, "loss": 2.8603, "step": 57457 }, { "epoch": 2.82, "grad_norm": 0.7714955806732178, "learning_rate": 5.559193794177419e-06, "loss": 3.0423, "step": 57458 }, { "epoch": 2.82, "grad_norm": 0.7862733602523804, "learning_rate": 5.556243976668462e-06, "loss": 2.7072, "step": 57459 }, { "epoch": 2.82, "grad_norm": 0.7352171540260315, "learning_rate": 5.553294934667696e-06, "loss": 3.0502, "step": 57460 }, { "epoch": 2.82, "grad_norm": 0.7902106642723083, "learning_rate": 5.5503466681829146e-06, "loss": 2.8126, "step": 57461 }, { "epoch": 2.82, "grad_norm": 0.7630064487457275, "learning_rate": 5.547399177221812e-06, "loss": 2.8373, "step": 57462 }, { "epoch": 2.82, "grad_norm": 0.7489845156669617, "learning_rate": 5.544452461792248e-06, "loss": 2.9392, "step": 57463 }, { "epoch": 2.82, "grad_norm": 0.7420606017112732, "learning_rate": 5.541506521901884e-06, "loss": 2.9887, "step": 57464 }, { "epoch": 2.82, "grad_norm": 0.7396260499954224, "learning_rate": 5.538561357558546e-06, "loss": 2.5488, "step": 57465 }, { "epoch": 2.82, "grad_norm": 0.7430559992790222, "learning_rate": 5.535616968769963e-06, "loss": 2.9337, "step": 57466 }, { "epoch": 2.82, "grad_norm": 0.7690236568450928, "learning_rate": 5.53267335554386e-06, "loss": 2.9588, "step": 57467 }, { "epoch": 2.82, "grad_norm": 0.733445405960083, "learning_rate": 5.529730517888064e-06, "loss": 2.8503, "step": 57468 }, { "epoch": 2.82, "grad_norm": 0.7059511542320251, "learning_rate": 5.526788455810271e-06, "loss": 2.9426, "step": 57469 }, { "epoch": 2.82, "grad_norm": 0.7520898580551147, "learning_rate": 5.52384716931824e-06, "loss": 2.9003, "step": 57470 }, { "epoch": 2.82, "grad_norm": 0.8237969875335693, "learning_rate": 5.520906658419733e-06, "loss": 2.8435, "step": 57471 }, { "epoch": 2.82, "grad_norm": 0.7647461891174316, "learning_rate": 5.517966923122508e-06, "loss": 2.9031, "step": 57472 }, { "epoch": 2.82, "grad_norm": 0.7231850624084473, "learning_rate": 5.51502796343426e-06, "loss": 2.7187, "step": 57473 }, { "epoch": 2.82, "grad_norm": 0.6980883479118347, "learning_rate": 5.512089779362716e-06, "loss": 2.8957, "step": 57474 }, { "epoch": 2.82, "grad_norm": 0.7455374598503113, "learning_rate": 5.50915237091567e-06, "loss": 2.9012, "step": 57475 }, { "epoch": 2.82, "grad_norm": 0.7731330990791321, "learning_rate": 5.506215738100883e-06, "loss": 3.0828, "step": 57476 }, { "epoch": 2.82, "grad_norm": 0.7092301249504089, "learning_rate": 5.503279880926015e-06, "loss": 2.9492, "step": 57477 }, { "epoch": 2.82, "grad_norm": 0.7573080062866211, "learning_rate": 5.5003447993988255e-06, "loss": 2.8745, "step": 57478 }, { "epoch": 2.82, "grad_norm": 0.7752591371536255, "learning_rate": 5.497410493527077e-06, "loss": 2.9229, "step": 57479 }, { "epoch": 2.82, "grad_norm": 0.7782385945320129, "learning_rate": 5.4944769633184616e-06, "loss": 3.0516, "step": 57480 }, { "epoch": 2.82, "grad_norm": 0.7580782175064087, "learning_rate": 5.491544208780707e-06, "loss": 2.9722, "step": 57481 }, { "epoch": 2.82, "grad_norm": 0.749438464641571, "learning_rate": 5.488612229921541e-06, "loss": 2.9657, "step": 57482 }, { "epoch": 2.82, "grad_norm": 0.7620990872383118, "learning_rate": 5.485681026748723e-06, "loss": 2.8918, "step": 57483 }, { "epoch": 2.82, "grad_norm": 0.7435450553894043, "learning_rate": 5.48275059926988e-06, "loss": 2.8467, "step": 57484 }, { "epoch": 2.82, "grad_norm": 0.7541936635971069, "learning_rate": 5.4798209474928075e-06, "loss": 2.6854, "step": 57485 }, { "epoch": 2.82, "grad_norm": 0.7550050020217896, "learning_rate": 5.4768920714252315e-06, "loss": 2.6602, "step": 57486 }, { "epoch": 2.82, "grad_norm": 0.7599036693572998, "learning_rate": 5.473963971074846e-06, "loss": 2.8434, "step": 57487 }, { "epoch": 2.82, "grad_norm": 0.714177668094635, "learning_rate": 5.471036646449378e-06, "loss": 2.844, "step": 57488 }, { "epoch": 2.82, "grad_norm": 0.7540480494499207, "learning_rate": 5.468110097556455e-06, "loss": 2.8204, "step": 57489 }, { "epoch": 2.82, "grad_norm": 0.7340836524963379, "learning_rate": 5.4651843244038705e-06, "loss": 2.8181, "step": 57490 }, { "epoch": 2.82, "grad_norm": 0.7581061124801636, "learning_rate": 5.462259326999319e-06, "loss": 2.8814, "step": 57491 }, { "epoch": 2.82, "grad_norm": 0.834175169467926, "learning_rate": 5.45933510535046e-06, "loss": 2.9842, "step": 57492 }, { "epoch": 2.82, "grad_norm": 0.7358565330505371, "learning_rate": 5.456411659465054e-06, "loss": 2.8919, "step": 57493 }, { "epoch": 2.82, "grad_norm": 0.7109126448631287, "learning_rate": 5.453488989350762e-06, "loss": 2.7427, "step": 57494 }, { "epoch": 2.82, "grad_norm": 0.724243700504303, "learning_rate": 5.450567095015312e-06, "loss": 2.5712, "step": 57495 }, { "epoch": 2.82, "grad_norm": 0.7592142224311829, "learning_rate": 5.447645976466364e-06, "loss": 2.9613, "step": 57496 }, { "epoch": 2.82, "grad_norm": 0.7279868125915527, "learning_rate": 5.444725633711611e-06, "loss": 2.8802, "step": 57497 }, { "epoch": 2.82, "grad_norm": 0.7003878355026245, "learning_rate": 5.4418060667588135e-06, "loss": 3.1777, "step": 57498 }, { "epoch": 2.82, "grad_norm": 0.7382853031158447, "learning_rate": 5.438887275615566e-06, "loss": 2.8485, "step": 57499 }, { "epoch": 2.82, "grad_norm": 0.7092037200927734, "learning_rate": 5.43596926028963e-06, "loss": 2.8656, "step": 57500 }, { "epoch": 2.82, "grad_norm": 0.756615936756134, "learning_rate": 5.433052020788631e-06, "loss": 3.0391, "step": 57501 }, { "epoch": 2.82, "grad_norm": 0.7301920056343079, "learning_rate": 5.4301355571202965e-06, "loss": 2.8625, "step": 57502 }, { "epoch": 2.82, "grad_norm": 0.753453254699707, "learning_rate": 5.427219869292288e-06, "loss": 2.8667, "step": 57503 }, { "epoch": 2.82, "grad_norm": 0.8032575845718384, "learning_rate": 5.424304957312298e-06, "loss": 2.8938, "step": 57504 }, { "epoch": 2.82, "grad_norm": 0.7729468941688538, "learning_rate": 5.421390821187988e-06, "loss": 3.0574, "step": 57505 }, { "epoch": 2.82, "grad_norm": 0.7835939526557922, "learning_rate": 5.418477460927051e-06, "loss": 2.7424, "step": 57506 }, { "epoch": 2.82, "grad_norm": 0.7130961418151855, "learning_rate": 5.415564876537149e-06, "loss": 2.6947, "step": 57507 }, { "epoch": 2.82, "grad_norm": 0.7707082033157349, "learning_rate": 5.412653068025941e-06, "loss": 2.8117, "step": 57508 }, { "epoch": 2.82, "grad_norm": 0.7709498405456543, "learning_rate": 5.409742035401154e-06, "loss": 2.7995, "step": 57509 }, { "epoch": 2.82, "grad_norm": 0.7126640677452087, "learning_rate": 5.406831778670351e-06, "loss": 2.8213, "step": 57510 }, { "epoch": 2.82, "grad_norm": 0.7614896893501282, "learning_rate": 5.403922297841323e-06, "loss": 2.8555, "step": 57511 }, { "epoch": 2.82, "grad_norm": 0.7267091870307922, "learning_rate": 5.4010135929216e-06, "loss": 2.9359, "step": 57512 }, { "epoch": 2.82, "grad_norm": 0.7336955070495605, "learning_rate": 5.398105663918972e-06, "loss": 2.9989, "step": 57513 }, { "epoch": 2.82, "grad_norm": 0.7180055975914001, "learning_rate": 5.39519851084097e-06, "loss": 3.036, "step": 57514 }, { "epoch": 2.82, "grad_norm": 0.8044130206108093, "learning_rate": 5.392292133695386e-06, "loss": 2.8144, "step": 57515 }, { "epoch": 2.82, "grad_norm": 0.7591328024864197, "learning_rate": 5.389386532489781e-06, "loss": 2.801, "step": 57516 }, { "epoch": 2.82, "grad_norm": 0.7301998138427734, "learning_rate": 5.386481707231816e-06, "loss": 2.7639, "step": 57517 }, { "epoch": 2.82, "grad_norm": 0.7751278281211853, "learning_rate": 5.3835776579291835e-06, "loss": 2.7864, "step": 57518 }, { "epoch": 2.82, "grad_norm": 0.7999823689460754, "learning_rate": 5.3806743845894784e-06, "loss": 2.9398, "step": 57519 }, { "epoch": 2.82, "grad_norm": 0.7624409794807434, "learning_rate": 5.377771887220394e-06, "loss": 2.5767, "step": 57520 }, { "epoch": 2.82, "grad_norm": 0.8037823438644409, "learning_rate": 5.374870165829559e-06, "loss": 2.9523, "step": 57521 }, { "epoch": 2.82, "grad_norm": 0.7656051516532898, "learning_rate": 5.371969220424599e-06, "loss": 2.8153, "step": 57522 }, { "epoch": 2.82, "grad_norm": 0.7563663125038147, "learning_rate": 5.369069051013208e-06, "loss": 2.8219, "step": 57523 }, { "epoch": 2.82, "grad_norm": 0.7026633620262146, "learning_rate": 5.366169657602948e-06, "loss": 2.8431, "step": 57524 }, { "epoch": 2.82, "grad_norm": 0.7382919192314148, "learning_rate": 5.363271040201511e-06, "loss": 2.9779, "step": 57525 }, { "epoch": 2.82, "grad_norm": 1.0166808366775513, "learning_rate": 5.360373198816492e-06, "loss": 2.9759, "step": 57526 }, { "epoch": 2.82, "grad_norm": 0.7511675357818604, "learning_rate": 5.357476133455552e-06, "loss": 2.8831, "step": 57527 }, { "epoch": 2.82, "grad_norm": 0.7667033076286316, "learning_rate": 5.354579844126317e-06, "loss": 2.8983, "step": 57528 }, { "epoch": 2.82, "grad_norm": 0.7955288290977478, "learning_rate": 5.351684330836414e-06, "loss": 2.9765, "step": 57529 }, { "epoch": 2.82, "grad_norm": 0.7648731470108032, "learning_rate": 5.348789593593505e-06, "loss": 2.9163, "step": 57530 }, { "epoch": 2.82, "grad_norm": 0.8392326235771179, "learning_rate": 5.3458956324051504e-06, "loss": 2.8243, "step": 57531 }, { "epoch": 2.82, "grad_norm": 0.7558670043945312, "learning_rate": 5.343002447278977e-06, "loss": 3.025, "step": 57532 }, { "epoch": 2.82, "grad_norm": 0.7797101736068726, "learning_rate": 5.340110038222644e-06, "loss": 3.0272, "step": 57533 }, { "epoch": 2.82, "grad_norm": 0.7675113081932068, "learning_rate": 5.337218405243748e-06, "loss": 3.2485, "step": 57534 }, { "epoch": 2.82, "grad_norm": 0.7596341371536255, "learning_rate": 5.334327548349948e-06, "loss": 3.0808, "step": 57535 }, { "epoch": 2.82, "grad_norm": 0.7368146777153015, "learning_rate": 5.3314374675487714e-06, "loss": 2.724, "step": 57536 }, { "epoch": 2.82, "grad_norm": 0.7727293968200684, "learning_rate": 5.328548162847879e-06, "loss": 2.7899, "step": 57537 }, { "epoch": 2.82, "grad_norm": 0.71891850233078, "learning_rate": 5.325659634254931e-06, "loss": 2.8625, "step": 57538 }, { "epoch": 2.82, "grad_norm": 0.7327159643173218, "learning_rate": 5.322771881777421e-06, "loss": 2.9382, "step": 57539 }, { "epoch": 2.82, "grad_norm": 0.7162185311317444, "learning_rate": 5.319884905423077e-06, "loss": 2.7911, "step": 57540 }, { "epoch": 2.82, "grad_norm": 0.740092396736145, "learning_rate": 5.316998705199393e-06, "loss": 2.9277, "step": 57541 }, { "epoch": 2.82, "grad_norm": 0.771077036857605, "learning_rate": 5.31411328111403e-06, "loss": 2.7363, "step": 57542 }, { "epoch": 2.82, "grad_norm": 0.728895902633667, "learning_rate": 5.31122863317458e-06, "loss": 2.8725, "step": 57543 }, { "epoch": 2.82, "grad_norm": 0.7603982090950012, "learning_rate": 5.308344761388639e-06, "loss": 2.8479, "step": 57544 }, { "epoch": 2.82, "grad_norm": 0.7482584118843079, "learning_rate": 5.305461665763833e-06, "loss": 2.876, "step": 57545 }, { "epoch": 2.82, "grad_norm": 0.7513059377670288, "learning_rate": 5.302579346307723e-06, "loss": 2.9574, "step": 57546 }, { "epoch": 2.82, "grad_norm": 0.7721797227859497, "learning_rate": 5.299697803027869e-06, "loss": 3.2145, "step": 57547 }, { "epoch": 2.82, "grad_norm": 0.7654960751533508, "learning_rate": 5.296817035931933e-06, "loss": 3.0258, "step": 57548 }, { "epoch": 2.82, "grad_norm": 0.793851912021637, "learning_rate": 5.293937045027441e-06, "loss": 2.9649, "step": 57549 }, { "epoch": 2.82, "grad_norm": 0.7595252394676208, "learning_rate": 5.2910578303220205e-06, "loss": 2.8733, "step": 57550 }, { "epoch": 2.82, "grad_norm": 0.7790116667747498, "learning_rate": 5.2881793918232e-06, "loss": 2.889, "step": 57551 }, { "epoch": 2.82, "grad_norm": 0.74222332239151, "learning_rate": 5.285301729538638e-06, "loss": 2.9103, "step": 57552 }, { "epoch": 2.82, "grad_norm": 0.793062150478363, "learning_rate": 5.282424843475863e-06, "loss": 2.8367, "step": 57553 }, { "epoch": 2.82, "grad_norm": 0.7489209175109863, "learning_rate": 5.279548733642436e-06, "loss": 2.8782, "step": 57554 }, { "epoch": 2.82, "grad_norm": 0.7790075540542603, "learning_rate": 5.276673400045983e-06, "loss": 2.9005, "step": 57555 }, { "epoch": 2.82, "grad_norm": 0.7113046646118164, "learning_rate": 5.273798842694033e-06, "loss": 2.7339, "step": 57556 }, { "epoch": 2.82, "grad_norm": 0.7641512751579285, "learning_rate": 5.2709250615941445e-06, "loss": 2.8315, "step": 57557 }, { "epoch": 2.82, "grad_norm": 0.7478774189949036, "learning_rate": 5.26805205675398e-06, "loss": 2.7826, "step": 57558 }, { "epoch": 2.82, "grad_norm": 0.7339596748352051, "learning_rate": 5.265179828181032e-06, "loss": 2.9666, "step": 57559 }, { "epoch": 2.82, "grad_norm": 0.7537974715232849, "learning_rate": 5.2623083758828624e-06, "loss": 2.9265, "step": 57560 }, { "epoch": 2.82, "grad_norm": 0.7144533395767212, "learning_rate": 5.259437699867064e-06, "loss": 2.8843, "step": 57561 }, { "epoch": 2.82, "grad_norm": 0.7479895949363708, "learning_rate": 5.256567800141165e-06, "loss": 2.9927, "step": 57562 }, { "epoch": 2.82, "grad_norm": 0.7054181098937988, "learning_rate": 5.253698676712759e-06, "loss": 2.8793, "step": 57563 }, { "epoch": 2.82, "grad_norm": 0.7824179530143738, "learning_rate": 5.250830329589339e-06, "loss": 2.8978, "step": 57564 }, { "epoch": 2.82, "grad_norm": 0.7174307703971863, "learning_rate": 5.2479627587785675e-06, "loss": 2.8663, "step": 57565 }, { "epoch": 2.82, "grad_norm": 0.7782198786735535, "learning_rate": 5.245095964287904e-06, "loss": 2.8694, "step": 57566 }, { "epoch": 2.82, "grad_norm": 0.7361534833908081, "learning_rate": 5.242229946124943e-06, "loss": 2.8386, "step": 57567 }, { "epoch": 2.82, "grad_norm": 0.7740105986595154, "learning_rate": 5.239364704297211e-06, "loss": 2.7616, "step": 57568 }, { "epoch": 2.82, "grad_norm": 0.7601125240325928, "learning_rate": 5.236500238812269e-06, "loss": 3.121, "step": 57569 }, { "epoch": 2.82, "grad_norm": 0.7358449697494507, "learning_rate": 5.233636549677678e-06, "loss": 2.883, "step": 57570 }, { "epoch": 2.82, "grad_norm": 0.7319931387901306, "learning_rate": 5.230773636900898e-06, "loss": 2.9506, "step": 57571 }, { "epoch": 2.82, "grad_norm": 0.7816628217697144, "learning_rate": 5.22791150048959e-06, "loss": 2.8939, "step": 57572 }, { "epoch": 2.82, "grad_norm": 0.7447727918624878, "learning_rate": 5.225050140451215e-06, "loss": 2.9117, "step": 57573 }, { "epoch": 2.82, "grad_norm": 0.7741653919219971, "learning_rate": 5.222189556793332e-06, "loss": 2.7785, "step": 57574 }, { "epoch": 2.82, "grad_norm": 0.7777344584465027, "learning_rate": 5.219329749523471e-06, "loss": 2.7844, "step": 57575 }, { "epoch": 2.82, "grad_norm": 0.7560518980026245, "learning_rate": 5.216470718649157e-06, "loss": 3.047, "step": 57576 }, { "epoch": 2.82, "grad_norm": 0.7396130561828613, "learning_rate": 5.213612464177918e-06, "loss": 2.8337, "step": 57577 }, { "epoch": 2.82, "grad_norm": 0.7314813137054443, "learning_rate": 5.210754986117316e-06, "loss": 2.9577, "step": 57578 }, { "epoch": 2.82, "grad_norm": 0.7825134992599487, "learning_rate": 5.207898284474809e-06, "loss": 2.8162, "step": 57579 }, { "epoch": 2.82, "grad_norm": 0.787757933139801, "learning_rate": 5.205042359257994e-06, "loss": 3.0029, "step": 57580 }, { "epoch": 2.82, "grad_norm": 0.7091009616851807, "learning_rate": 5.202187210474396e-06, "loss": 2.8445, "step": 57581 }, { "epoch": 2.82, "grad_norm": 0.7412608861923218, "learning_rate": 5.199332838131443e-06, "loss": 2.9387, "step": 57582 }, { "epoch": 2.82, "grad_norm": 0.7515140771865845, "learning_rate": 5.196479242236762e-06, "loss": 2.7867, "step": 57583 }, { "epoch": 2.82, "grad_norm": 0.7683895826339722, "learning_rate": 5.1936264227977806e-06, "loss": 3.0592, "step": 57584 }, { "epoch": 2.82, "grad_norm": 0.7576361298561096, "learning_rate": 5.1907743798220606e-06, "loss": 2.7596, "step": 57585 }, { "epoch": 2.82, "grad_norm": 0.7240979075431824, "learning_rate": 5.187923113317094e-06, "loss": 3.0416, "step": 57586 }, { "epoch": 2.82, "grad_norm": 0.7744309306144714, "learning_rate": 5.1850726232904095e-06, "loss": 2.9025, "step": 57587 }, { "epoch": 2.82, "grad_norm": 0.7236972451210022, "learning_rate": 5.182222909749501e-06, "loss": 2.9219, "step": 57588 }, { "epoch": 2.82, "grad_norm": 0.7626051306724548, "learning_rate": 5.179373972701895e-06, "loss": 3.0846, "step": 57589 }, { "epoch": 2.82, "grad_norm": 0.7822689414024353, "learning_rate": 5.176525812155085e-06, "loss": 2.8351, "step": 57590 }, { "epoch": 2.82, "grad_norm": 0.7402531504631042, "learning_rate": 5.173678428116534e-06, "loss": 2.8016, "step": 57591 }, { "epoch": 2.82, "grad_norm": 0.8098210692405701, "learning_rate": 5.170831820593802e-06, "loss": 3.0032, "step": 57592 }, { "epoch": 2.82, "grad_norm": 0.7769423723220825, "learning_rate": 5.167985989594348e-06, "loss": 2.9816, "step": 57593 }, { "epoch": 2.82, "grad_norm": 0.7293099761009216, "learning_rate": 5.1651409351256665e-06, "loss": 2.828, "step": 57594 }, { "epoch": 2.82, "grad_norm": 0.7770448923110962, "learning_rate": 5.162296657195253e-06, "loss": 2.9258, "step": 57595 }, { "epoch": 2.82, "grad_norm": 0.7682694792747498, "learning_rate": 5.159453155810633e-06, "loss": 2.7589, "step": 57596 }, { "epoch": 2.82, "grad_norm": 0.7171539068222046, "learning_rate": 5.156610430979302e-06, "loss": 2.7792, "step": 57597 }, { "epoch": 2.82, "grad_norm": 0.7691710591316223, "learning_rate": 5.1537684827086535e-06, "loss": 2.9293, "step": 57598 }, { "epoch": 2.82, "grad_norm": 0.7626630663871765, "learning_rate": 5.1509273110062475e-06, "loss": 2.9575, "step": 57599 }, { "epoch": 2.82, "grad_norm": 0.7831852436065674, "learning_rate": 5.148086915879579e-06, "loss": 2.7166, "step": 57600 }, { "epoch": 2.82, "grad_norm": 0.734420895576477, "learning_rate": 5.145247297336075e-06, "loss": 2.883, "step": 57601 }, { "epoch": 2.82, "grad_norm": 0.7334008812904358, "learning_rate": 5.142408455383262e-06, "loss": 3.034, "step": 57602 }, { "epoch": 2.82, "grad_norm": 0.7297717928886414, "learning_rate": 5.139570390028602e-06, "loss": 2.6785, "step": 57603 }, { "epoch": 2.82, "grad_norm": 0.7485255002975464, "learning_rate": 5.136733101279556e-06, "loss": 3.0351, "step": 57604 }, { "epoch": 2.82, "grad_norm": 0.7516413927078247, "learning_rate": 5.133896589143616e-06, "loss": 2.8879, "step": 57605 }, { "epoch": 2.82, "grad_norm": 0.8280989527702332, "learning_rate": 5.1310608536282105e-06, "loss": 2.9283, "step": 57606 }, { "epoch": 2.82, "grad_norm": 0.7576739192008972, "learning_rate": 5.128225894740867e-06, "loss": 2.8524, "step": 57607 }, { "epoch": 2.82, "grad_norm": 0.7196022868156433, "learning_rate": 5.125391712489047e-06, "loss": 2.8059, "step": 57608 }, { "epoch": 2.82, "grad_norm": 0.7515164017677307, "learning_rate": 5.122558306880143e-06, "loss": 2.6472, "step": 57609 }, { "epoch": 2.82, "grad_norm": 0.7482687830924988, "learning_rate": 5.119725677921715e-06, "loss": 3.0543, "step": 57610 }, { "epoch": 2.82, "grad_norm": 0.7849657535552979, "learning_rate": 5.116893825621194e-06, "loss": 2.604, "step": 57611 }, { "epoch": 2.82, "grad_norm": 0.737377941608429, "learning_rate": 5.114062749986003e-06, "loss": 2.8361, "step": 57612 }, { "epoch": 2.82, "grad_norm": 0.7955058217048645, "learning_rate": 5.1112324510236055e-06, "loss": 2.9009, "step": 57613 }, { "epoch": 2.82, "grad_norm": 0.7713934183120728, "learning_rate": 5.108402928741462e-06, "loss": 2.5417, "step": 57614 }, { "epoch": 2.82, "grad_norm": 0.7756964564323425, "learning_rate": 5.105574183147065e-06, "loss": 2.8647, "step": 57615 }, { "epoch": 2.82, "grad_norm": 0.7626116871833801, "learning_rate": 5.10274621424781e-06, "loss": 2.8613, "step": 57616 }, { "epoch": 2.82, "grad_norm": 0.7126087546348572, "learning_rate": 5.099919022051157e-06, "loss": 2.7191, "step": 57617 }, { "epoch": 2.82, "grad_norm": 0.7495712637901306, "learning_rate": 5.097092606564601e-06, "loss": 2.7508, "step": 57618 }, { "epoch": 2.82, "grad_norm": 0.743903398513794, "learning_rate": 5.094266967795502e-06, "loss": 2.9745, "step": 57619 }, { "epoch": 2.82, "grad_norm": 0.7743878960609436, "learning_rate": 5.091442105751387e-06, "loss": 2.6665, "step": 57620 }, { "epoch": 2.82, "grad_norm": 0.7580164074897766, "learning_rate": 5.0886180204396165e-06, "loss": 2.9658, "step": 57621 }, { "epoch": 2.82, "grad_norm": 0.7179275155067444, "learning_rate": 5.085794711867719e-06, "loss": 2.9423, "step": 57622 }, { "epoch": 2.82, "grad_norm": 0.7473773956298828, "learning_rate": 5.0829721800430544e-06, "loss": 2.9537, "step": 57623 }, { "epoch": 2.82, "grad_norm": 0.7446900606155396, "learning_rate": 5.080150424973051e-06, "loss": 2.8643, "step": 57624 }, { "epoch": 2.82, "grad_norm": 0.7130290865898132, "learning_rate": 5.077329446665235e-06, "loss": 2.8381, "step": 57625 }, { "epoch": 2.82, "grad_norm": 0.7973248362541199, "learning_rate": 5.0745092451269345e-06, "loss": 2.7452, "step": 57626 }, { "epoch": 2.82, "grad_norm": 0.7995738387107849, "learning_rate": 5.071689820365643e-06, "loss": 2.9036, "step": 57627 }, { "epoch": 2.82, "grad_norm": 0.8131319880485535, "learning_rate": 5.068871172388755e-06, "loss": 2.9542, "step": 57628 }, { "epoch": 2.82, "grad_norm": 0.7483770251274109, "learning_rate": 5.066053301203665e-06, "loss": 2.8858, "step": 57629 }, { "epoch": 2.82, "grad_norm": 0.7863949537277222, "learning_rate": 5.0632362068178665e-06, "loss": 2.9802, "step": 57630 }, { "epoch": 2.82, "grad_norm": 0.7554341554641724, "learning_rate": 5.0604198892387205e-06, "loss": 3.0817, "step": 57631 }, { "epoch": 2.82, "grad_norm": 0.7444338798522949, "learning_rate": 5.057604348473687e-06, "loss": 3.0471, "step": 57632 }, { "epoch": 2.82, "grad_norm": 0.8026116490364075, "learning_rate": 5.054789584530161e-06, "loss": 2.6147, "step": 57633 }, { "epoch": 2.82, "grad_norm": 0.7201833128929138, "learning_rate": 5.051975597415536e-06, "loss": 2.7799, "step": 57634 }, { "epoch": 2.82, "grad_norm": 0.7888780236244202, "learning_rate": 5.0491623871372396e-06, "loss": 3.0041, "step": 57635 }, { "epoch": 2.82, "grad_norm": 0.7324066162109375, "learning_rate": 5.0463499537027e-06, "loss": 2.7168, "step": 57636 }, { "epoch": 2.82, "grad_norm": 0.7467460632324219, "learning_rate": 5.043538297119309e-06, "loss": 2.7614, "step": 57637 }, { "epoch": 2.82, "grad_norm": 0.8163952827453613, "learning_rate": 5.040727417394464e-06, "loss": 3.0423, "step": 57638 }, { "epoch": 2.82, "grad_norm": 0.788611888885498, "learning_rate": 5.037917314535589e-06, "loss": 3.0102, "step": 57639 }, { "epoch": 2.82, "grad_norm": 0.7810007929801941, "learning_rate": 5.03510798855008e-06, "loss": 2.9274, "step": 57640 }, { "epoch": 2.82, "grad_norm": 0.8209431171417236, "learning_rate": 5.032299439445297e-06, "loss": 2.7132, "step": 57641 }, { "epoch": 2.82, "grad_norm": 0.7978459596633911, "learning_rate": 5.0294916672287025e-06, "loss": 2.7835, "step": 57642 }, { "epoch": 2.82, "grad_norm": 0.7896865010261536, "learning_rate": 5.026684671907655e-06, "loss": 2.7713, "step": 57643 }, { "epoch": 2.82, "grad_norm": 0.7331417202949524, "learning_rate": 5.023878453489516e-06, "loss": 2.8776, "step": 57644 }, { "epoch": 2.83, "grad_norm": 0.7624796628952026, "learning_rate": 5.0210730119817465e-06, "loss": 2.9129, "step": 57645 }, { "epoch": 2.83, "grad_norm": 0.7527821660041809, "learning_rate": 5.018268347391674e-06, "loss": 2.9413, "step": 57646 }, { "epoch": 2.83, "grad_norm": 0.7554188966751099, "learning_rate": 5.015464459726759e-06, "loss": 3.1033, "step": 57647 }, { "epoch": 2.83, "grad_norm": 0.7641181349754333, "learning_rate": 5.012661348994329e-06, "loss": 2.8095, "step": 57648 }, { "epoch": 2.83, "grad_norm": 0.7661232352256775, "learning_rate": 5.009859015201745e-06, "loss": 2.9625, "step": 57649 }, { "epoch": 2.83, "grad_norm": 0.7631051540374756, "learning_rate": 5.0070574583564335e-06, "loss": 2.8323, "step": 57650 }, { "epoch": 2.83, "grad_norm": 0.8133715391159058, "learning_rate": 5.004256678465757e-06, "loss": 2.9095, "step": 57651 }, { "epoch": 2.83, "grad_norm": 0.7536367774009705, "learning_rate": 5.001456675537108e-06, "loss": 2.8894, "step": 57652 }, { "epoch": 2.83, "grad_norm": 0.7896614074707031, "learning_rate": 4.998657449577848e-06, "loss": 3.0761, "step": 57653 }, { "epoch": 2.83, "grad_norm": 0.7261320352554321, "learning_rate": 4.995859000595337e-06, "loss": 2.9384, "step": 57654 }, { "epoch": 2.83, "grad_norm": 0.7240200638771057, "learning_rate": 4.99306132859697e-06, "loss": 2.6961, "step": 57655 }, { "epoch": 2.83, "grad_norm": 0.7582331895828247, "learning_rate": 4.990264433590108e-06, "loss": 2.7058, "step": 57656 }, { "epoch": 2.83, "grad_norm": 0.7294660210609436, "learning_rate": 4.98746831558211e-06, "loss": 2.9779, "step": 57657 }, { "epoch": 2.83, "grad_norm": 0.752255916595459, "learning_rate": 4.984672974580306e-06, "loss": 2.8786, "step": 57658 }, { "epoch": 2.83, "grad_norm": 0.751939594745636, "learning_rate": 4.981878410592122e-06, "loss": 2.8781, "step": 57659 }, { "epoch": 2.83, "grad_norm": 0.7334133982658386, "learning_rate": 4.979084623624918e-06, "loss": 2.9869, "step": 57660 }, { "epoch": 2.83, "grad_norm": 0.7954682111740112, "learning_rate": 4.976291613686023e-06, "loss": 3.1021, "step": 57661 }, { "epoch": 2.83, "grad_norm": 0.7579163312911987, "learning_rate": 4.973499380782797e-06, "loss": 2.9179, "step": 57662 }, { "epoch": 2.83, "grad_norm": 0.743008553981781, "learning_rate": 4.970707924922568e-06, "loss": 2.8985, "step": 57663 }, { "epoch": 2.83, "grad_norm": 0.7410622835159302, "learning_rate": 4.9679172461127295e-06, "loss": 3.0076, "step": 57664 }, { "epoch": 2.83, "grad_norm": 0.725355327129364, "learning_rate": 4.965127344360608e-06, "loss": 2.8972, "step": 57665 }, { "epoch": 2.83, "grad_norm": 0.7321346998214722, "learning_rate": 4.962338219673567e-06, "loss": 3.1501, "step": 57666 }, { "epoch": 2.83, "grad_norm": 0.7218225002288818, "learning_rate": 4.9595498720589655e-06, "loss": 2.7632, "step": 57667 }, { "epoch": 2.83, "grad_norm": 0.7458818554878235, "learning_rate": 4.9567623015240975e-06, "loss": 3.0234, "step": 57668 }, { "epoch": 2.83, "grad_norm": 0.7871835231781006, "learning_rate": 4.9539755080763575e-06, "loss": 2.8553, "step": 57669 }, { "epoch": 2.83, "grad_norm": 0.9065406322479248, "learning_rate": 4.95118949172304e-06, "loss": 2.8418, "step": 57670 }, { "epoch": 2.83, "grad_norm": 0.7578248977661133, "learning_rate": 4.948404252471539e-06, "loss": 2.8822, "step": 57671 }, { "epoch": 2.83, "grad_norm": 0.734098494052887, "learning_rate": 4.945619790329147e-06, "loss": 2.7124, "step": 57672 }, { "epoch": 2.83, "grad_norm": 0.7335187196731567, "learning_rate": 4.942836105303161e-06, "loss": 2.9535, "step": 57673 }, { "epoch": 2.83, "grad_norm": 0.7521882057189941, "learning_rate": 4.940053197401006e-06, "loss": 2.7288, "step": 57674 }, { "epoch": 2.83, "grad_norm": 0.8029778003692627, "learning_rate": 4.937271066629944e-06, "loss": 2.9228, "step": 57675 }, { "epoch": 2.83, "grad_norm": 0.8154861330986023, "learning_rate": 4.934489712997369e-06, "loss": 3.0219, "step": 57676 }, { "epoch": 2.83, "grad_norm": 0.7279578447341919, "learning_rate": 4.931709136510542e-06, "loss": 2.9194, "step": 57677 }, { "epoch": 2.83, "grad_norm": 0.7490201592445374, "learning_rate": 4.9289293371767905e-06, "loss": 2.8508, "step": 57678 }, { "epoch": 2.83, "grad_norm": 0.8128371834754944, "learning_rate": 4.926150315003441e-06, "loss": 2.6265, "step": 57679 }, { "epoch": 2.83, "grad_norm": 0.7674548029899597, "learning_rate": 4.923372069997822e-06, "loss": 2.8969, "step": 57680 }, { "epoch": 2.83, "grad_norm": 0.7656154036521912, "learning_rate": 4.920594602167261e-06, "loss": 2.8899, "step": 57681 }, { "epoch": 2.83, "grad_norm": 0.733749270439148, "learning_rate": 4.917817911519084e-06, "loss": 2.8042, "step": 57682 }, { "epoch": 2.83, "grad_norm": 0.7814816832542419, "learning_rate": 4.915041998060587e-06, "loss": 2.9326, "step": 57683 }, { "epoch": 2.83, "grad_norm": 0.7335500121116638, "learning_rate": 4.912266861799097e-06, "loss": 2.7703, "step": 57684 }, { "epoch": 2.83, "grad_norm": 0.7337460517883301, "learning_rate": 4.909492502741841e-06, "loss": 3.0456, "step": 57685 }, { "epoch": 2.83, "grad_norm": 0.7310239672660828, "learning_rate": 4.906718920896213e-06, "loss": 2.8439, "step": 57686 }, { "epoch": 2.83, "grad_norm": 0.7263157963752747, "learning_rate": 4.903946116269508e-06, "loss": 3.001, "step": 57687 }, { "epoch": 2.83, "grad_norm": 0.7620536088943481, "learning_rate": 4.90117408886902e-06, "loss": 3.0321, "step": 57688 }, { "epoch": 2.83, "grad_norm": 0.7526038289070129, "learning_rate": 4.898402838702043e-06, "loss": 2.9153, "step": 57689 }, { "epoch": 2.83, "grad_norm": 0.7336723804473877, "learning_rate": 4.89563236577587e-06, "loss": 3.0666, "step": 57690 }, { "epoch": 2.83, "grad_norm": 0.7981674671173096, "learning_rate": 4.89286267009783e-06, "loss": 2.7095, "step": 57691 }, { "epoch": 2.83, "grad_norm": 0.7469791173934937, "learning_rate": 4.890093751675183e-06, "loss": 2.9837, "step": 57692 }, { "epoch": 2.83, "grad_norm": 0.7180920243263245, "learning_rate": 4.887325610515225e-06, "loss": 2.9881, "step": 57693 }, { "epoch": 2.83, "grad_norm": 0.7620313763618469, "learning_rate": 4.884558246625248e-06, "loss": 2.7828, "step": 57694 }, { "epoch": 2.83, "grad_norm": 0.7135055661201477, "learning_rate": 4.8817916600125465e-06, "loss": 3.048, "step": 57695 }, { "epoch": 2.83, "grad_norm": 0.7366616129875183, "learning_rate": 4.879025850684415e-06, "loss": 2.9772, "step": 57696 }, { "epoch": 2.83, "grad_norm": 0.745126485824585, "learning_rate": 4.876260818648181e-06, "loss": 2.8637, "step": 57697 }, { "epoch": 2.83, "grad_norm": 0.7213049530982971, "learning_rate": 4.873496563911039e-06, "loss": 2.9629, "step": 57698 }, { "epoch": 2.83, "grad_norm": 0.7770732641220093, "learning_rate": 4.8707330864803166e-06, "loss": 2.9155, "step": 57699 }, { "epoch": 2.83, "grad_norm": 0.773853063583374, "learning_rate": 4.8679703863632735e-06, "loss": 2.8713, "step": 57700 }, { "epoch": 2.83, "grad_norm": 0.7769290804862976, "learning_rate": 4.865208463567172e-06, "loss": 2.8884, "step": 57701 }, { "epoch": 2.83, "grad_norm": 0.7928144335746765, "learning_rate": 4.8624473180993385e-06, "loss": 2.7892, "step": 57702 }, { "epoch": 2.83, "grad_norm": 0.7701162695884705, "learning_rate": 4.859686949967034e-06, "loss": 2.9104, "step": 57703 }, { "epoch": 2.83, "grad_norm": 0.7658120393753052, "learning_rate": 4.856927359177487e-06, "loss": 2.825, "step": 57704 }, { "epoch": 2.83, "grad_norm": 0.7138972282409668, "learning_rate": 4.854168545738024e-06, "loss": 3.0144, "step": 57705 }, { "epoch": 2.83, "grad_norm": 0.7636241316795349, "learning_rate": 4.8514105096558396e-06, "loss": 2.7451, "step": 57706 }, { "epoch": 2.83, "grad_norm": 0.7346954941749573, "learning_rate": 4.848653250938261e-06, "loss": 2.9506, "step": 57707 }, { "epoch": 2.83, "grad_norm": 0.7780959010124207, "learning_rate": 4.8458967695925166e-06, "loss": 2.8082, "step": 57708 }, { "epoch": 2.83, "grad_norm": 0.7654991149902344, "learning_rate": 4.843141065625899e-06, "loss": 2.7958, "step": 57709 }, { "epoch": 2.83, "grad_norm": 0.7330126166343689, "learning_rate": 4.840386139045604e-06, "loss": 2.9405, "step": 57710 }, { "epoch": 2.83, "grad_norm": 0.7495510578155518, "learning_rate": 4.837631989858959e-06, "loss": 2.8339, "step": 57711 }, { "epoch": 2.83, "grad_norm": 0.7610169053077698, "learning_rate": 4.83487861807319e-06, "loss": 2.6812, "step": 57712 }, { "epoch": 2.83, "grad_norm": 0.711486279964447, "learning_rate": 4.832126023695526e-06, "loss": 2.9422, "step": 57713 }, { "epoch": 2.83, "grad_norm": 0.7498459815979004, "learning_rate": 4.829374206733261e-06, "loss": 2.8501, "step": 57714 }, { "epoch": 2.83, "grad_norm": 0.755424439907074, "learning_rate": 4.826623167193588e-06, "loss": 2.9558, "step": 57715 }, { "epoch": 2.83, "grad_norm": 0.744842529296875, "learning_rate": 4.82387290508377e-06, "loss": 2.8703, "step": 57716 }, { "epoch": 2.83, "grad_norm": 0.7416000962257385, "learning_rate": 4.821123420411099e-06, "loss": 2.9342, "step": 57717 }, { "epoch": 2.83, "grad_norm": 0.782110869884491, "learning_rate": 4.818374713182771e-06, "loss": 2.8408, "step": 57718 }, { "epoch": 2.83, "grad_norm": 0.7619156241416931, "learning_rate": 4.815626783406046e-06, "loss": 2.8473, "step": 57719 }, { "epoch": 2.83, "grad_norm": 0.7477062940597534, "learning_rate": 4.812879631088151e-06, "loss": 3.0257, "step": 57720 }, { "epoch": 2.83, "grad_norm": 0.7299256920814514, "learning_rate": 4.810133256236282e-06, "loss": 3.0029, "step": 57721 }, { "epoch": 2.83, "grad_norm": 0.7986459136009216, "learning_rate": 4.807387658857764e-06, "loss": 2.4808, "step": 57722 }, { "epoch": 2.83, "grad_norm": 0.7746559381484985, "learning_rate": 4.804642838959727e-06, "loss": 3.0949, "step": 57723 }, { "epoch": 2.83, "grad_norm": 0.8096112012863159, "learning_rate": 4.8018987965494635e-06, "loss": 2.8608, "step": 57724 }, { "epoch": 2.83, "grad_norm": 0.7485320568084717, "learning_rate": 4.799155531634203e-06, "loss": 2.904, "step": 57725 }, { "epoch": 2.83, "grad_norm": 0.7414878010749817, "learning_rate": 4.796413044221136e-06, "loss": 2.7812, "step": 57726 }, { "epoch": 2.83, "grad_norm": 0.7604761123657227, "learning_rate": 4.793671334317528e-06, "loss": 2.8507, "step": 57727 }, { "epoch": 2.83, "grad_norm": 0.7408043146133423, "learning_rate": 4.7909304019305705e-06, "loss": 3.0301, "step": 57728 }, { "epoch": 2.83, "grad_norm": 0.8125025033950806, "learning_rate": 4.788190247067492e-06, "loss": 3.0556, "step": 57729 }, { "epoch": 2.83, "grad_norm": 0.7148528099060059, "learning_rate": 4.785450869735452e-06, "loss": 2.9549, "step": 57730 }, { "epoch": 2.83, "grad_norm": 0.7859530448913574, "learning_rate": 4.782712269941747e-06, "loss": 2.904, "step": 57731 }, { "epoch": 2.83, "grad_norm": 0.814154863357544, "learning_rate": 4.7799744476936015e-06, "loss": 2.845, "step": 57732 }, { "epoch": 2.83, "grad_norm": 0.7224166393280029, "learning_rate": 4.777237402998147e-06, "loss": 2.8495, "step": 57733 }, { "epoch": 2.83, "grad_norm": 0.7980074286460876, "learning_rate": 4.7745011358626406e-06, "loss": 2.9546, "step": 57734 }, { "epoch": 2.83, "grad_norm": 0.7275083661079407, "learning_rate": 4.7717656462943125e-06, "loss": 2.9702, "step": 57735 }, { "epoch": 2.83, "grad_norm": 0.7441866993904114, "learning_rate": 4.769030934300289e-06, "loss": 2.918, "step": 57736 }, { "epoch": 2.83, "grad_norm": 0.8218494653701782, "learning_rate": 4.766296999887864e-06, "loss": 3.025, "step": 57737 }, { "epoch": 2.83, "grad_norm": 0.7137600779533386, "learning_rate": 4.763563843064167e-06, "loss": 2.8481, "step": 57738 }, { "epoch": 2.83, "grad_norm": 0.8052471876144409, "learning_rate": 4.760831463836423e-06, "loss": 2.8723, "step": 57739 }, { "epoch": 2.83, "grad_norm": 0.7791904807090759, "learning_rate": 4.7580998622118285e-06, "loss": 2.9187, "step": 57740 }, { "epoch": 2.83, "grad_norm": 0.7228248119354248, "learning_rate": 4.755369038197576e-06, "loss": 3.0401, "step": 57741 }, { "epoch": 2.83, "grad_norm": 0.796574056148529, "learning_rate": 4.752638991800895e-06, "loss": 2.9358, "step": 57742 }, { "epoch": 2.83, "grad_norm": 0.7155166268348694, "learning_rate": 4.749909723028911e-06, "loss": 2.9773, "step": 57743 }, { "epoch": 2.83, "grad_norm": 0.731598973274231, "learning_rate": 4.747181231888852e-06, "loss": 2.8988, "step": 57744 }, { "epoch": 2.83, "grad_norm": 0.7063519954681396, "learning_rate": 4.74445351838788e-06, "loss": 2.8026, "step": 57745 }, { "epoch": 2.83, "grad_norm": 0.7575142979621887, "learning_rate": 4.741726582533189e-06, "loss": 2.9443, "step": 57746 }, { "epoch": 2.83, "grad_norm": 0.7412130832672119, "learning_rate": 4.739000424331973e-06, "loss": 3.0455, "step": 57747 }, { "epoch": 2.83, "grad_norm": 0.700270414352417, "learning_rate": 4.736275043791393e-06, "loss": 2.9013, "step": 57748 }, { "epoch": 2.83, "grad_norm": 0.8509933352470398, "learning_rate": 4.733550440918676e-06, "loss": 2.6319, "step": 57749 }, { "epoch": 2.83, "grad_norm": 0.7422335147857666, "learning_rate": 4.730826615720951e-06, "loss": 2.8367, "step": 57750 }, { "epoch": 2.83, "grad_norm": 0.7345046401023865, "learning_rate": 4.7281035682054105e-06, "loss": 3.0125, "step": 57751 }, { "epoch": 2.83, "grad_norm": 0.7160439491271973, "learning_rate": 4.725381298379216e-06, "loss": 2.8065, "step": 57752 }, { "epoch": 2.83, "grad_norm": 0.7227357625961304, "learning_rate": 4.722659806249496e-06, "loss": 2.978, "step": 57753 }, { "epoch": 2.83, "grad_norm": 0.7689091563224792, "learning_rate": 4.71993909182351e-06, "loss": 2.9034, "step": 57754 }, { "epoch": 2.83, "grad_norm": 0.7610191106796265, "learning_rate": 4.717219155108354e-06, "loss": 2.8094, "step": 57755 }, { "epoch": 2.83, "grad_norm": 0.777751088142395, "learning_rate": 4.714499996111254e-06, "loss": 2.7979, "step": 57756 }, { "epoch": 2.83, "grad_norm": 0.8778126239776611, "learning_rate": 4.711781614839305e-06, "loss": 2.9273, "step": 57757 }, { "epoch": 2.83, "grad_norm": 0.7235648036003113, "learning_rate": 4.709064011299668e-06, "loss": 2.98, "step": 57758 }, { "epoch": 2.83, "grad_norm": 0.7848798036575317, "learning_rate": 4.70634718549957e-06, "loss": 2.9884, "step": 57759 }, { "epoch": 2.83, "grad_norm": 0.7237803936004639, "learning_rate": 4.703631137446107e-06, "loss": 2.9722, "step": 57760 }, { "epoch": 2.83, "grad_norm": 0.7664092183113098, "learning_rate": 4.700915867146438e-06, "loss": 2.7523, "step": 57761 }, { "epoch": 2.83, "grad_norm": 0.7664266228675842, "learning_rate": 4.698201374607724e-06, "loss": 2.7736, "step": 57762 }, { "epoch": 2.83, "grad_norm": 0.7319455742835999, "learning_rate": 4.695487659837127e-06, "loss": 2.956, "step": 57763 }, { "epoch": 2.83, "grad_norm": 0.7283288836479187, "learning_rate": 4.692774722841807e-06, "loss": 2.8592, "step": 57764 }, { "epoch": 2.83, "grad_norm": 0.7501567602157593, "learning_rate": 4.69006256362886e-06, "loss": 2.7725, "step": 57765 }, { "epoch": 2.83, "grad_norm": 0.8033075332641602, "learning_rate": 4.687351182205446e-06, "loss": 2.627, "step": 57766 }, { "epoch": 2.83, "grad_norm": 0.7505348324775696, "learning_rate": 4.684640578578725e-06, "loss": 2.9283, "step": 57767 }, { "epoch": 2.83, "grad_norm": 0.7713310718536377, "learning_rate": 4.681930752755791e-06, "loss": 2.8058, "step": 57768 }, { "epoch": 2.83, "grad_norm": 0.7473810315132141, "learning_rate": 4.679221704743874e-06, "loss": 3.0927, "step": 57769 }, { "epoch": 2.83, "grad_norm": 0.7242414951324463, "learning_rate": 4.67651343455e-06, "loss": 2.848, "step": 57770 }, { "epoch": 2.83, "grad_norm": 0.7761023640632629, "learning_rate": 4.673805942181397e-06, "loss": 3.0277, "step": 57771 }, { "epoch": 2.83, "grad_norm": 0.7542916536331177, "learning_rate": 4.671099227645159e-06, "loss": 2.9206, "step": 57772 }, { "epoch": 2.83, "grad_norm": 0.7616918087005615, "learning_rate": 4.668393290948347e-06, "loss": 3.0023, "step": 57773 }, { "epoch": 2.83, "grad_norm": 0.7441499829292297, "learning_rate": 4.665688132098222e-06, "loss": 2.8658, "step": 57774 }, { "epoch": 2.83, "grad_norm": 0.736674964427948, "learning_rate": 4.662983751101779e-06, "loss": 2.8954, "step": 57775 }, { "epoch": 2.83, "grad_norm": 0.7363643050193787, "learning_rate": 4.660280147966211e-06, "loss": 3.0603, "step": 57776 }, { "epoch": 2.83, "grad_norm": 0.7268117070198059, "learning_rate": 4.657577322698647e-06, "loss": 2.9473, "step": 57777 }, { "epoch": 2.83, "grad_norm": 0.7369086742401123, "learning_rate": 4.65487527530618e-06, "loss": 2.6568, "step": 57778 }, { "epoch": 2.83, "grad_norm": 0.7631237506866455, "learning_rate": 4.652174005795939e-06, "loss": 3.0302, "step": 57779 }, { "epoch": 2.83, "grad_norm": 0.7441269755363464, "learning_rate": 4.649473514174984e-06, "loss": 2.884, "step": 57780 }, { "epoch": 2.83, "grad_norm": 0.7539127469062805, "learning_rate": 4.646773800450543e-06, "loss": 2.6154, "step": 57781 }, { "epoch": 2.83, "grad_norm": 0.7788946628570557, "learning_rate": 4.644074864629576e-06, "loss": 2.783, "step": 57782 }, { "epoch": 2.83, "grad_norm": 0.7208992838859558, "learning_rate": 4.641376706719313e-06, "loss": 2.7661, "step": 57783 }, { "epoch": 2.83, "grad_norm": 0.7645790576934814, "learning_rate": 4.6386793267268465e-06, "loss": 2.8266, "step": 57784 }, { "epoch": 2.83, "grad_norm": 0.7291238307952881, "learning_rate": 4.635982724659237e-06, "loss": 2.8439, "step": 57785 }, { "epoch": 2.83, "grad_norm": 0.7457494139671326, "learning_rate": 4.633286900523614e-06, "loss": 2.883, "step": 57786 }, { "epoch": 2.83, "grad_norm": 0.7361904382705688, "learning_rate": 4.630591854327037e-06, "loss": 3.0172, "step": 57787 }, { "epoch": 2.83, "grad_norm": 0.7620532512664795, "learning_rate": 4.627897586076667e-06, "loss": 2.8486, "step": 57788 }, { "epoch": 2.83, "grad_norm": 0.7753808498382568, "learning_rate": 4.625204095779566e-06, "loss": 2.9601, "step": 57789 }, { "epoch": 2.83, "grad_norm": 0.7386515736579895, "learning_rate": 4.6225113834427955e-06, "loss": 2.8754, "step": 57790 }, { "epoch": 2.83, "grad_norm": 0.7125532627105713, "learning_rate": 4.619819449073548e-06, "loss": 2.8305, "step": 57791 }, { "epoch": 2.83, "grad_norm": 0.7690471410751343, "learning_rate": 4.617128292678784e-06, "loss": 2.8357, "step": 57792 }, { "epoch": 2.83, "grad_norm": 0.717765212059021, "learning_rate": 4.614437914265701e-06, "loss": 2.9072, "step": 57793 }, { "epoch": 2.83, "grad_norm": 0.7456265091896057, "learning_rate": 4.611748313841324e-06, "loss": 2.7729, "step": 57794 }, { "epoch": 2.83, "grad_norm": 0.7556934952735901, "learning_rate": 4.6090594914127475e-06, "loss": 3.028, "step": 57795 }, { "epoch": 2.83, "grad_norm": 0.76350337266922, "learning_rate": 4.6063714469871005e-06, "loss": 2.9979, "step": 57796 }, { "epoch": 2.83, "grad_norm": 0.7897090911865234, "learning_rate": 4.6036841805713765e-06, "loss": 2.8443, "step": 57797 }, { "epoch": 2.83, "grad_norm": 0.7721547484397888, "learning_rate": 4.6009976921726695e-06, "loss": 3.1241, "step": 57798 }, { "epoch": 2.83, "grad_norm": 0.7226487398147583, "learning_rate": 4.598311981798142e-06, "loss": 2.8602, "step": 57799 }, { "epoch": 2.83, "grad_norm": 0.7703262567520142, "learning_rate": 4.59562704945482e-06, "loss": 2.8151, "step": 57800 }, { "epoch": 2.83, "grad_norm": 0.7636512517929077, "learning_rate": 4.592942895149732e-06, "loss": 2.9814, "step": 57801 }, { "epoch": 2.83, "grad_norm": 0.7318775057792664, "learning_rate": 4.590259518889971e-06, "loss": 2.7938, "step": 57802 }, { "epoch": 2.83, "grad_norm": 0.7615649700164795, "learning_rate": 4.587576920682601e-06, "loss": 2.8404, "step": 57803 }, { "epoch": 2.83, "grad_norm": 0.7781710624694824, "learning_rate": 4.584895100534747e-06, "loss": 3.0723, "step": 57804 }, { "epoch": 2.83, "grad_norm": 0.7660056948661804, "learning_rate": 4.582214058453404e-06, "loss": 2.945, "step": 57805 }, { "epoch": 2.83, "grad_norm": 0.7571398019790649, "learning_rate": 4.579533794445667e-06, "loss": 2.8892, "step": 57806 }, { "epoch": 2.83, "grad_norm": 0.793786346912384, "learning_rate": 4.576854308518563e-06, "loss": 3.0665, "step": 57807 }, { "epoch": 2.83, "grad_norm": 0.7984482645988464, "learning_rate": 4.574175600679186e-06, "loss": 2.8812, "step": 57808 }, { "epoch": 2.83, "grad_norm": 0.7446393966674805, "learning_rate": 4.571497670934565e-06, "loss": 2.9753, "step": 57809 }, { "epoch": 2.83, "grad_norm": 0.7223838567733765, "learning_rate": 4.56882051929176e-06, "loss": 2.9849, "step": 57810 }, { "epoch": 2.83, "grad_norm": 0.7122632265090942, "learning_rate": 4.566144145757833e-06, "loss": 3.0396, "step": 57811 }, { "epoch": 2.83, "grad_norm": 0.7451797723770142, "learning_rate": 4.56346855033981e-06, "loss": 2.9919, "step": 57812 }, { "epoch": 2.83, "grad_norm": 0.7426049113273621, "learning_rate": 4.560793733044754e-06, "loss": 2.8361, "step": 57813 }, { "epoch": 2.83, "grad_norm": 0.7814031839370728, "learning_rate": 4.558119693879725e-06, "loss": 2.8659, "step": 57814 }, { "epoch": 2.83, "grad_norm": 0.7301146388053894, "learning_rate": 4.55544643285175e-06, "loss": 2.7683, "step": 57815 }, { "epoch": 2.83, "grad_norm": 0.7373456954956055, "learning_rate": 4.552773949967892e-06, "loss": 3.0187, "step": 57816 }, { "epoch": 2.83, "grad_norm": 0.7450728416442871, "learning_rate": 4.5501022452351095e-06, "loss": 2.842, "step": 57817 }, { "epoch": 2.83, "grad_norm": 0.7419819831848145, "learning_rate": 4.5474313186605326e-06, "loss": 2.803, "step": 57818 }, { "epoch": 2.83, "grad_norm": 0.7292543649673462, "learning_rate": 4.544761170251154e-06, "loss": 3.1129, "step": 57819 }, { "epoch": 2.83, "grad_norm": 0.7770516276359558, "learning_rate": 4.542091800014003e-06, "loss": 2.9207, "step": 57820 }, { "epoch": 2.83, "grad_norm": 0.7825976610183716, "learning_rate": 4.5394232079561385e-06, "loss": 3.0597, "step": 57821 }, { "epoch": 2.83, "grad_norm": 0.7760852575302124, "learning_rate": 4.53675539408459e-06, "loss": 2.6327, "step": 57822 }, { "epoch": 2.83, "grad_norm": 0.7495847344398499, "learning_rate": 4.534088358406318e-06, "loss": 2.6761, "step": 57823 }, { "epoch": 2.83, "grad_norm": 0.7523453235626221, "learning_rate": 4.531422100928417e-06, "loss": 2.7543, "step": 57824 }, { "epoch": 2.83, "grad_norm": 0.7610305547714233, "learning_rate": 4.528756621657881e-06, "loss": 2.9113, "step": 57825 }, { "epoch": 2.83, "grad_norm": 0.7986166477203369, "learning_rate": 4.526091920601737e-06, "loss": 2.74, "step": 57826 }, { "epoch": 2.83, "grad_norm": 0.7516844272613525, "learning_rate": 4.523427997766982e-06, "loss": 2.9315, "step": 57827 }, { "epoch": 2.83, "grad_norm": 0.8079379796981812, "learning_rate": 4.520764853160674e-06, "loss": 3.0084, "step": 57828 }, { "epoch": 2.83, "grad_norm": 0.8045254349708557, "learning_rate": 4.518102486789843e-06, "loss": 2.6265, "step": 57829 }, { "epoch": 2.83, "grad_norm": 0.7560080289840698, "learning_rate": 4.515440898661382e-06, "loss": 2.8943, "step": 57830 }, { "epoch": 2.83, "grad_norm": 0.7986272573471069, "learning_rate": 4.512780088782453e-06, "loss": 2.8917, "step": 57831 }, { "epoch": 2.83, "grad_norm": 0.7806861400604248, "learning_rate": 4.510120057159949e-06, "loss": 2.9683, "step": 57832 }, { "epoch": 2.83, "grad_norm": 0.7481813430786133, "learning_rate": 4.507460803800933e-06, "loss": 2.7959, "step": 57833 }, { "epoch": 2.83, "grad_norm": 0.8359472155570984, "learning_rate": 4.504802328712398e-06, "loss": 2.8192, "step": 57834 }, { "epoch": 2.83, "grad_norm": 0.7175796031951904, "learning_rate": 4.502144631901339e-06, "loss": 3.0029, "step": 57835 }, { "epoch": 2.83, "grad_norm": 0.7417463660240173, "learning_rate": 4.499487713374783e-06, "loss": 3.0287, "step": 57836 }, { "epoch": 2.83, "grad_norm": 0.7876279950141907, "learning_rate": 4.496831573139692e-06, "loss": 2.8647, "step": 57837 }, { "epoch": 2.83, "grad_norm": 0.7579513788223267, "learning_rate": 4.494176211203093e-06, "loss": 3.0034, "step": 57838 }, { "epoch": 2.83, "grad_norm": 0.7461996078491211, "learning_rate": 4.491521627571948e-06, "loss": 2.7366, "step": 57839 }, { "epoch": 2.83, "grad_norm": 0.753563404083252, "learning_rate": 4.48886782225325e-06, "loss": 3.0483, "step": 57840 }, { "epoch": 2.83, "grad_norm": 0.7457968592643738, "learning_rate": 4.4862147952539954e-06, "loss": 2.7279, "step": 57841 }, { "epoch": 2.83, "grad_norm": 0.7886890769004822, "learning_rate": 4.4835625465812095e-06, "loss": 3.0078, "step": 57842 }, { "epoch": 2.83, "grad_norm": 0.7757450342178345, "learning_rate": 4.4809110762418225e-06, "loss": 2.752, "step": 57843 }, { "epoch": 2.83, "grad_norm": 0.731127917766571, "learning_rate": 4.47826038424286e-06, "loss": 2.929, "step": 57844 }, { "epoch": 2.83, "grad_norm": 0.7428131103515625, "learning_rate": 4.475610470591284e-06, "loss": 2.9674, "step": 57845 }, { "epoch": 2.83, "grad_norm": 0.8183236718177795, "learning_rate": 4.472961335294056e-06, "loss": 2.9806, "step": 57846 }, { "epoch": 2.83, "grad_norm": 0.7529081702232361, "learning_rate": 4.47031297835817e-06, "loss": 2.944, "step": 57847 }, { "epoch": 2.83, "grad_norm": 0.7067267894744873, "learning_rate": 4.467665399790654e-06, "loss": 2.9451, "step": 57848 }, { "epoch": 2.84, "grad_norm": 0.793649435043335, "learning_rate": 4.465018599598369e-06, "loss": 2.9698, "step": 57849 }, { "epoch": 2.84, "grad_norm": 0.7690135836601257, "learning_rate": 4.462372577788376e-06, "loss": 2.6836, "step": 57850 }, { "epoch": 2.84, "grad_norm": 0.7430137991905212, "learning_rate": 4.459727334367635e-06, "loss": 2.9179, "step": 57851 }, { "epoch": 2.84, "grad_norm": 0.7567710876464844, "learning_rate": 4.457082869343076e-06, "loss": 3.0389, "step": 57852 }, { "epoch": 2.84, "grad_norm": 0.7652338743209839, "learning_rate": 4.454439182721692e-06, "loss": 2.9754, "step": 57853 }, { "epoch": 2.84, "grad_norm": 0.7574499845504761, "learning_rate": 4.451796274510411e-06, "loss": 2.7802, "step": 57854 }, { "epoch": 2.84, "grad_norm": 0.7804326415061951, "learning_rate": 4.449154144716227e-06, "loss": 2.8233, "step": 57855 }, { "epoch": 2.84, "grad_norm": 0.777418315410614, "learning_rate": 4.446512793346102e-06, "loss": 2.8151, "step": 57856 }, { "epoch": 2.84, "grad_norm": 0.7421988248825073, "learning_rate": 4.443872220406963e-06, "loss": 2.8111, "step": 57857 }, { "epoch": 2.84, "grad_norm": 0.758773922920227, "learning_rate": 4.441232425905805e-06, "loss": 3.0517, "step": 57858 }, { "epoch": 2.84, "grad_norm": 0.8324280381202698, "learning_rate": 4.438593409849556e-06, "loss": 3.0362, "step": 57859 }, { "epoch": 2.84, "grad_norm": 0.7193976044654846, "learning_rate": 4.435955172245176e-06, "loss": 2.7782, "step": 57860 }, { "epoch": 2.84, "grad_norm": 0.7793514132499695, "learning_rate": 4.433317713099593e-06, "loss": 2.9797, "step": 57861 }, { "epoch": 2.84, "grad_norm": 0.7374630570411682, "learning_rate": 4.43068103241977e-06, "loss": 2.6941, "step": 57862 }, { "epoch": 2.84, "grad_norm": 0.7534642219543457, "learning_rate": 4.428045130212665e-06, "loss": 2.9008, "step": 57863 }, { "epoch": 2.84, "grad_norm": 0.7688164114952087, "learning_rate": 4.425410006485175e-06, "loss": 3.0181, "step": 57864 }, { "epoch": 2.84, "grad_norm": 0.7269670963287354, "learning_rate": 4.4227756612442936e-06, "loss": 2.9657, "step": 57865 }, { "epoch": 2.84, "grad_norm": 0.7430902123451233, "learning_rate": 4.420142094496948e-06, "loss": 2.9997, "step": 57866 }, { "epoch": 2.84, "grad_norm": 0.7281354069709778, "learning_rate": 4.417509306250033e-06, "loss": 2.9365, "step": 57867 }, { "epoch": 2.84, "grad_norm": 0.7742909789085388, "learning_rate": 4.414877296510544e-06, "loss": 2.9117, "step": 57868 }, { "epoch": 2.84, "grad_norm": 0.7314521670341492, "learning_rate": 4.412246065285341e-06, "loss": 2.8858, "step": 57869 }, { "epoch": 2.84, "grad_norm": 0.7391765713691711, "learning_rate": 4.409615612581418e-06, "loss": 3.1135, "step": 57870 }, { "epoch": 2.84, "grad_norm": 0.7244545221328735, "learning_rate": 4.406985938405672e-06, "loss": 2.8085, "step": 57871 }, { "epoch": 2.84, "grad_norm": 0.7341313362121582, "learning_rate": 4.404357042765028e-06, "loss": 2.8818, "step": 57872 }, { "epoch": 2.84, "grad_norm": 0.7704923152923584, "learning_rate": 4.4017289256664476e-06, "loss": 2.8435, "step": 57873 }, { "epoch": 2.84, "grad_norm": 0.8078938722610474, "learning_rate": 4.399101587116827e-06, "loss": 3.1185, "step": 57874 }, { "epoch": 2.84, "grad_norm": 0.7866788506507874, "learning_rate": 4.396475027123059e-06, "loss": 2.8965, "step": 57875 }, { "epoch": 2.84, "grad_norm": 0.7199814915657043, "learning_rate": 4.393849245692105e-06, "loss": 2.9652, "step": 57876 }, { "epoch": 2.84, "grad_norm": 0.7180720567703247, "learning_rate": 4.391224242830827e-06, "loss": 2.8224, "step": 57877 }, { "epoch": 2.84, "grad_norm": 0.731706976890564, "learning_rate": 4.388600018546218e-06, "loss": 2.8166, "step": 57878 }, { "epoch": 2.84, "grad_norm": 0.7510423064231873, "learning_rate": 4.385976572845107e-06, "loss": 2.8502, "step": 57879 }, { "epoch": 2.84, "grad_norm": 0.7061409950256348, "learning_rate": 4.383353905734488e-06, "loss": 2.8161, "step": 57880 }, { "epoch": 2.84, "grad_norm": 0.756230354309082, "learning_rate": 4.380732017221189e-06, "loss": 2.8982, "step": 57881 }, { "epoch": 2.84, "grad_norm": 0.7549283504486084, "learning_rate": 4.378110907312138e-06, "loss": 2.9815, "step": 57882 }, { "epoch": 2.84, "grad_norm": 0.7772846817970276, "learning_rate": 4.375490576014296e-06, "loss": 2.7412, "step": 57883 }, { "epoch": 2.84, "grad_norm": 0.7680914998054504, "learning_rate": 4.372871023334457e-06, "loss": 2.6235, "step": 57884 }, { "epoch": 2.84, "grad_norm": 0.7407665848731995, "learning_rate": 4.370252249279615e-06, "loss": 2.8474, "step": 57885 }, { "epoch": 2.84, "grad_norm": 0.7720359563827515, "learning_rate": 4.367634253856633e-06, "loss": 2.807, "step": 57886 }, { "epoch": 2.84, "grad_norm": 0.7055439352989197, "learning_rate": 4.365017037072405e-06, "loss": 2.6587, "step": 57887 }, { "epoch": 2.84, "grad_norm": 0.8091383576393127, "learning_rate": 4.362400598933824e-06, "loss": 2.8048, "step": 57888 }, { "epoch": 2.84, "grad_norm": 0.7102215886116028, "learning_rate": 4.3597849394477855e-06, "loss": 2.9037, "step": 57889 }, { "epoch": 2.84, "grad_norm": 0.7323922514915466, "learning_rate": 4.357170058621151e-06, "loss": 2.7498, "step": 57890 }, { "epoch": 2.84, "grad_norm": 0.7890856266021729, "learning_rate": 4.354555956460881e-06, "loss": 2.8841, "step": 57891 }, { "epoch": 2.84, "grad_norm": 0.782171905040741, "learning_rate": 4.3519426329737705e-06, "loss": 2.9299, "step": 57892 }, { "epoch": 2.84, "grad_norm": 0.7346777319908142, "learning_rate": 4.349330088166781e-06, "loss": 2.9818, "step": 57893 }, { "epoch": 2.84, "grad_norm": 0.7390797138214111, "learning_rate": 4.346718322046705e-06, "loss": 2.9289, "step": 57894 }, { "epoch": 2.84, "grad_norm": 0.710119366645813, "learning_rate": 4.344107334620539e-06, "loss": 2.9441, "step": 57895 }, { "epoch": 2.84, "grad_norm": 0.8123124241828918, "learning_rate": 4.341497125895044e-06, "loss": 3.1647, "step": 57896 }, { "epoch": 2.84, "grad_norm": 0.7103381156921387, "learning_rate": 4.33888769587718e-06, "loss": 2.8699, "step": 57897 }, { "epoch": 2.84, "grad_norm": 0.740925669670105, "learning_rate": 4.336279044573776e-06, "loss": 2.9577, "step": 57898 }, { "epoch": 2.84, "grad_norm": 0.7418505549430847, "learning_rate": 4.33367117199166e-06, "loss": 2.951, "step": 57899 }, { "epoch": 2.84, "grad_norm": 0.7685325145721436, "learning_rate": 4.331064078137791e-06, "loss": 2.8862, "step": 57900 }, { "epoch": 2.84, "grad_norm": 0.787038266658783, "learning_rate": 4.328457763019033e-06, "loss": 2.7146, "step": 57901 }, { "epoch": 2.84, "grad_norm": 0.7468493580818176, "learning_rate": 4.325852226642179e-06, "loss": 2.8801, "step": 57902 }, { "epoch": 2.84, "grad_norm": 0.7677332162857056, "learning_rate": 4.323247469014124e-06, "loss": 2.8442, "step": 57903 }, { "epoch": 2.84, "grad_norm": 0.7528624534606934, "learning_rate": 4.320643490141729e-06, "loss": 2.6777, "step": 57904 }, { "epoch": 2.84, "grad_norm": 0.7097703218460083, "learning_rate": 4.318040290031888e-06, "loss": 2.7669, "step": 57905 }, { "epoch": 2.84, "grad_norm": 0.6983256936073303, "learning_rate": 4.315437868691363e-06, "loss": 2.9228, "step": 57906 }, { "epoch": 2.84, "grad_norm": 0.8027907013893127, "learning_rate": 4.312836226127115e-06, "loss": 2.9804, "step": 57907 }, { "epoch": 2.84, "grad_norm": 0.7508549690246582, "learning_rate": 4.3102353623459376e-06, "loss": 2.9204, "step": 57908 }, { "epoch": 2.84, "grad_norm": 0.7843279242515564, "learning_rate": 4.3076352773546925e-06, "loss": 2.8192, "step": 57909 }, { "epoch": 2.84, "grad_norm": 0.7532942295074463, "learning_rate": 4.305035971160242e-06, "loss": 2.9188, "step": 57910 }, { "epoch": 2.84, "grad_norm": 0.7582915425300598, "learning_rate": 4.3024374437694135e-06, "loss": 2.9539, "step": 57911 }, { "epoch": 2.84, "grad_norm": 0.7695998549461365, "learning_rate": 4.299839695189034e-06, "loss": 2.9009, "step": 57912 }, { "epoch": 2.84, "grad_norm": 0.739365816116333, "learning_rate": 4.2972427254259974e-06, "loss": 2.8835, "step": 57913 }, { "epoch": 2.84, "grad_norm": 0.745867133140564, "learning_rate": 4.294646534487068e-06, "loss": 2.8355, "step": 57914 }, { "epoch": 2.84, "grad_norm": 0.7075350880622864, "learning_rate": 4.2920511223791695e-06, "loss": 2.8435, "step": 57915 }, { "epoch": 2.84, "grad_norm": 0.8138481974601746, "learning_rate": 4.2894564891091e-06, "loss": 3.016, "step": 57916 }, { "epoch": 2.84, "grad_norm": 0.9259201288223267, "learning_rate": 4.286862634683685e-06, "loss": 2.7868, "step": 57917 }, { "epoch": 2.84, "grad_norm": 0.7305136322975159, "learning_rate": 4.284269559109787e-06, "loss": 2.81, "step": 57918 }, { "epoch": 2.84, "grad_norm": 0.7330555319786072, "learning_rate": 4.2816772623941675e-06, "loss": 2.6985, "step": 57919 }, { "epoch": 2.84, "grad_norm": 0.7787506580352783, "learning_rate": 4.27908574454372e-06, "loss": 2.8231, "step": 57920 }, { "epoch": 2.84, "grad_norm": 0.7437611222267151, "learning_rate": 4.2764950055652395e-06, "loss": 2.8805, "step": 57921 }, { "epoch": 2.84, "grad_norm": 0.7861391305923462, "learning_rate": 4.273905045465553e-06, "loss": 2.9361, "step": 57922 }, { "epoch": 2.84, "grad_norm": 0.7396217584609985, "learning_rate": 4.27131586425149e-06, "loss": 2.7683, "step": 57923 }, { "epoch": 2.84, "grad_norm": 0.7936074137687683, "learning_rate": 4.268727461929877e-06, "loss": 2.794, "step": 57924 }, { "epoch": 2.84, "grad_norm": 0.7759957313537598, "learning_rate": 4.2661398385075426e-06, "loss": 2.7761, "step": 57925 }, { "epoch": 2.84, "grad_norm": 0.7496296167373657, "learning_rate": 4.263552993991248e-06, "loss": 2.6912, "step": 57926 }, { "epoch": 2.84, "grad_norm": 0.7550476789474487, "learning_rate": 4.2609669283878215e-06, "loss": 2.8715, "step": 57927 }, { "epoch": 2.84, "grad_norm": 0.7111081480979919, "learning_rate": 4.258381641704123e-06, "loss": 2.7266, "step": 57928 }, { "epoch": 2.84, "grad_norm": 0.7496201992034912, "learning_rate": 4.255797133946915e-06, "loss": 2.8366, "step": 57929 }, { "epoch": 2.84, "grad_norm": 0.802704393863678, "learning_rate": 4.253213405123057e-06, "loss": 2.9926, "step": 57930 }, { "epoch": 2.84, "grad_norm": 0.7297253608703613, "learning_rate": 4.250630455239312e-06, "loss": 3.123, "step": 57931 }, { "epoch": 2.84, "grad_norm": 0.71900874376297, "learning_rate": 4.248048284302441e-06, "loss": 2.8253, "step": 57932 }, { "epoch": 2.84, "grad_norm": 0.7305431962013245, "learning_rate": 4.245466892319338e-06, "loss": 2.8313, "step": 57933 }, { "epoch": 2.84, "grad_norm": 0.7327029705047607, "learning_rate": 4.242886279296731e-06, "loss": 2.9302, "step": 57934 }, { "epoch": 2.84, "grad_norm": 0.7928759455680847, "learning_rate": 4.240306445241448e-06, "loss": 2.8044, "step": 57935 }, { "epoch": 2.84, "grad_norm": 0.731713056564331, "learning_rate": 4.237727390160251e-06, "loss": 3.0344, "step": 57936 }, { "epoch": 2.84, "grad_norm": 0.736788809299469, "learning_rate": 4.235149114059966e-06, "loss": 2.8996, "step": 57937 }, { "epoch": 2.84, "grad_norm": 0.7987184524536133, "learning_rate": 4.232571616947422e-06, "loss": 2.8778, "step": 57938 }, { "epoch": 2.84, "grad_norm": 0.7894659638404846, "learning_rate": 4.229994898829348e-06, "loss": 2.9336, "step": 57939 }, { "epoch": 2.84, "grad_norm": 0.7339268922805786, "learning_rate": 4.227418959712536e-06, "loss": 2.855, "step": 57940 }, { "epoch": 2.84, "grad_norm": 0.7853479385375977, "learning_rate": 4.224843799603783e-06, "loss": 2.9414, "step": 57941 }, { "epoch": 2.84, "grad_norm": 0.8146184086799622, "learning_rate": 4.2222694185098495e-06, "loss": 2.8615, "step": 57942 }, { "epoch": 2.84, "grad_norm": 0.825524091720581, "learning_rate": 4.2196958164375625e-06, "loss": 2.9887, "step": 57943 }, { "epoch": 2.84, "grad_norm": 0.7087740898132324, "learning_rate": 4.2171229933936515e-06, "loss": 2.742, "step": 57944 }, { "epoch": 2.84, "grad_norm": 0.7632535099983215, "learning_rate": 4.21455094938491e-06, "loss": 2.7973, "step": 57945 }, { "epoch": 2.84, "grad_norm": 0.7354146838188171, "learning_rate": 4.211979684418165e-06, "loss": 3.1476, "step": 57946 }, { "epoch": 2.84, "grad_norm": 0.7568386793136597, "learning_rate": 4.20940919850008e-06, "loss": 2.8029, "step": 57947 }, { "epoch": 2.84, "grad_norm": 0.7767660617828369, "learning_rate": 4.206839491637515e-06, "loss": 2.9156, "step": 57948 }, { "epoch": 2.84, "grad_norm": 0.7283722162246704, "learning_rate": 4.204270563837198e-06, "loss": 2.8352, "step": 57949 }, { "epoch": 2.84, "grad_norm": 0.7674680948257446, "learning_rate": 4.201702415105923e-06, "loss": 2.6455, "step": 57950 }, { "epoch": 2.84, "grad_norm": 0.751236081123352, "learning_rate": 4.199135045450386e-06, "loss": 2.8124, "step": 57951 }, { "epoch": 2.84, "grad_norm": 0.7168905735015869, "learning_rate": 4.196568454877447e-06, "loss": 2.817, "step": 57952 }, { "epoch": 2.84, "grad_norm": 0.7038781046867371, "learning_rate": 4.194002643393834e-06, "loss": 2.5922, "step": 57953 }, { "epoch": 2.84, "grad_norm": 0.790746808052063, "learning_rate": 4.191437611006243e-06, "loss": 2.7898, "step": 57954 }, { "epoch": 2.84, "grad_norm": 0.7640130519866943, "learning_rate": 4.188873357721501e-06, "loss": 2.9048, "step": 57955 }, { "epoch": 2.84, "grad_norm": 0.7775148749351501, "learning_rate": 4.186309883546335e-06, "loss": 2.9071, "step": 57956 }, { "epoch": 2.84, "grad_norm": 0.7022102475166321, "learning_rate": 4.183747188487474e-06, "loss": 2.9208, "step": 57957 }, { "epoch": 2.84, "grad_norm": 0.8010618686676025, "learning_rate": 4.181185272551712e-06, "loss": 2.9989, "step": 57958 }, { "epoch": 2.84, "grad_norm": 0.7519500851631165, "learning_rate": 4.178624135745779e-06, "loss": 2.8834, "step": 57959 }, { "epoch": 2.84, "grad_norm": 0.745065450668335, "learning_rate": 4.176063778076433e-06, "loss": 2.8246, "step": 57960 }, { "epoch": 2.84, "grad_norm": 0.7399493455886841, "learning_rate": 4.17350419955037e-06, "loss": 2.997, "step": 57961 }, { "epoch": 2.84, "grad_norm": 0.7513877153396606, "learning_rate": 4.170945400174386e-06, "loss": 2.8798, "step": 57962 }, { "epoch": 2.84, "grad_norm": 0.7985098958015442, "learning_rate": 4.168387379955207e-06, "loss": 2.7076, "step": 57963 }, { "epoch": 2.84, "grad_norm": 0.7976999878883362, "learning_rate": 4.1658301388995285e-06, "loss": 2.992, "step": 57964 }, { "epoch": 2.84, "grad_norm": 0.7618657946586609, "learning_rate": 4.163273677014145e-06, "loss": 2.8952, "step": 57965 }, { "epoch": 2.84, "grad_norm": 0.9525282979011536, "learning_rate": 4.1607179943057176e-06, "loss": 2.7617, "step": 57966 }, { "epoch": 2.84, "grad_norm": 0.7083631753921509, "learning_rate": 4.1581630907810745e-06, "loss": 2.9582, "step": 57967 }, { "epoch": 2.84, "grad_norm": 0.7903333902359009, "learning_rate": 4.155608966446911e-06, "loss": 2.9069, "step": 57968 }, { "epoch": 2.84, "grad_norm": 0.7692322134971619, "learning_rate": 4.153055621309886e-06, "loss": 2.8151, "step": 57969 }, { "epoch": 2.84, "grad_norm": 0.8629139065742493, "learning_rate": 4.150503055376797e-06, "loss": 2.9081, "step": 57970 }, { "epoch": 2.84, "grad_norm": 0.7383148074150085, "learning_rate": 4.147951268654337e-06, "loss": 2.7084, "step": 57971 }, { "epoch": 2.84, "grad_norm": 0.7752810716629028, "learning_rate": 4.145400261149235e-06, "loss": 2.7297, "step": 57972 }, { "epoch": 2.84, "grad_norm": 0.7584665417671204, "learning_rate": 4.142850032868217e-06, "loss": 2.7295, "step": 57973 }, { "epoch": 2.84, "grad_norm": 0.7689148783683777, "learning_rate": 4.140300583817979e-06, "loss": 2.9014, "step": 57974 }, { "epoch": 2.84, "grad_norm": 0.7338578104972839, "learning_rate": 4.137751914005283e-06, "loss": 2.6482, "step": 57975 }, { "epoch": 2.84, "grad_norm": 0.7252892255783081, "learning_rate": 4.135204023436822e-06, "loss": 3.0192, "step": 57976 }, { "epoch": 2.84, "grad_norm": 0.7320716977119446, "learning_rate": 4.132656912119226e-06, "loss": 2.8298, "step": 57977 }, { "epoch": 2.84, "grad_norm": 0.7503402829170227, "learning_rate": 4.130110580059321e-06, "loss": 2.7203, "step": 57978 }, { "epoch": 2.84, "grad_norm": 0.756325364112854, "learning_rate": 4.127565027263735e-06, "loss": 2.9428, "step": 57979 }, { "epoch": 2.84, "grad_norm": 0.7448549270629883, "learning_rate": 4.125020253739231e-06, "loss": 2.8511, "step": 57980 }, { "epoch": 2.84, "grad_norm": 0.7655499577522278, "learning_rate": 4.122476259492469e-06, "loss": 2.8666, "step": 57981 }, { "epoch": 2.84, "grad_norm": 0.7265483736991882, "learning_rate": 4.119933044530177e-06, "loss": 2.8987, "step": 57982 }, { "epoch": 2.84, "grad_norm": 0.7448196411132812, "learning_rate": 4.117390608859017e-06, "loss": 2.792, "step": 57983 }, { "epoch": 2.84, "grad_norm": 0.7171865105628967, "learning_rate": 4.114848952485683e-06, "loss": 3.0098, "step": 57984 }, { "epoch": 2.84, "grad_norm": 0.8641087412834167, "learning_rate": 4.112308075416937e-06, "loss": 2.7329, "step": 57985 }, { "epoch": 2.84, "grad_norm": 0.7868609428405762, "learning_rate": 4.109767977659406e-06, "loss": 2.8346, "step": 57986 }, { "epoch": 2.84, "grad_norm": 0.7726017236709595, "learning_rate": 4.107228659219819e-06, "loss": 2.8714, "step": 57987 }, { "epoch": 2.84, "grad_norm": 0.7346259951591492, "learning_rate": 4.104690120104803e-06, "loss": 2.9884, "step": 57988 }, { "epoch": 2.84, "grad_norm": 0.7412421107292175, "learning_rate": 4.102152360321121e-06, "loss": 2.9378, "step": 57989 }, { "epoch": 2.84, "grad_norm": 0.7275198101997375, "learning_rate": 4.099615379875432e-06, "loss": 2.8435, "step": 57990 }, { "epoch": 2.84, "grad_norm": 0.7367424368858337, "learning_rate": 4.097079178774399e-06, "loss": 2.8953, "step": 57991 }, { "epoch": 2.84, "grad_norm": 0.7552502751350403, "learning_rate": 4.094543757024715e-06, "loss": 2.7933, "step": 57992 }, { "epoch": 2.84, "grad_norm": 0.7371522188186646, "learning_rate": 4.09200911463301e-06, "loss": 3.0965, "step": 57993 }, { "epoch": 2.84, "grad_norm": 0.785754919052124, "learning_rate": 4.089475251606045e-06, "loss": 2.8669, "step": 57994 }, { "epoch": 2.84, "grad_norm": 0.793599009513855, "learning_rate": 4.086942167950446e-06, "loss": 2.6429, "step": 57995 }, { "epoch": 2.84, "grad_norm": 0.7560648918151855, "learning_rate": 4.084409863672877e-06, "loss": 3.0258, "step": 57996 }, { "epoch": 2.84, "grad_norm": 0.7576151490211487, "learning_rate": 4.081878338780065e-06, "loss": 2.9662, "step": 57997 }, { "epoch": 2.84, "grad_norm": 0.7509180903434753, "learning_rate": 4.079347593278604e-06, "loss": 2.7924, "step": 57998 }, { "epoch": 2.84, "grad_norm": 0.7320362329483032, "learning_rate": 4.076817627175189e-06, "loss": 3.1727, "step": 57999 }, { "epoch": 2.84, "grad_norm": 0.7936714887619019, "learning_rate": 4.074288440476514e-06, "loss": 2.9039, "step": 58000 }, { "epoch": 2.84, "grad_norm": 0.7300601005554199, "learning_rate": 4.071760033189175e-06, "loss": 2.7854, "step": 58001 }, { "epoch": 2.84, "grad_norm": 0.8125167489051819, "learning_rate": 4.0692324053199e-06, "loss": 2.8679, "step": 58002 }, { "epoch": 2.84, "grad_norm": 0.768974244594574, "learning_rate": 4.066705556875283e-06, "loss": 2.8779, "step": 58003 }, { "epoch": 2.84, "grad_norm": 0.8136953711509705, "learning_rate": 4.0641794878620184e-06, "loss": 2.9216, "step": 58004 }, { "epoch": 2.84, "grad_norm": 0.7406480312347412, "learning_rate": 4.061654198286768e-06, "loss": 2.853, "step": 58005 }, { "epoch": 2.84, "grad_norm": 0.7325662970542908, "learning_rate": 4.059129688156127e-06, "loss": 2.8341, "step": 58006 }, { "epoch": 2.84, "grad_norm": 0.7513902187347412, "learning_rate": 4.056605957476822e-06, "loss": 3.0636, "step": 58007 }, { "epoch": 2.84, "grad_norm": 0.7709435224533081, "learning_rate": 4.0540830062554485e-06, "loss": 2.8569, "step": 58008 }, { "epoch": 2.84, "grad_norm": 0.7840178608894348, "learning_rate": 4.051560834498668e-06, "loss": 2.9059, "step": 58009 }, { "epoch": 2.84, "grad_norm": 0.7339916825294495, "learning_rate": 4.049039442213109e-06, "loss": 3.0242, "step": 58010 }, { "epoch": 2.84, "grad_norm": 0.7548692226409912, "learning_rate": 4.046518829405432e-06, "loss": 2.9171, "step": 58011 }, { "epoch": 2.84, "grad_norm": 0.7953229546546936, "learning_rate": 4.0439989960822985e-06, "loss": 3.0365, "step": 58012 }, { "epoch": 2.84, "grad_norm": 0.7593258023262024, "learning_rate": 4.04147994225027e-06, "loss": 2.9056, "step": 58013 }, { "epoch": 2.84, "grad_norm": 0.7055078744888306, "learning_rate": 4.0389616679160074e-06, "loss": 2.7517, "step": 58014 }, { "epoch": 2.84, "grad_norm": 0.7557649612426758, "learning_rate": 4.036444173086206e-06, "loss": 2.7834, "step": 58015 }, { "epoch": 2.84, "grad_norm": 0.7741184234619141, "learning_rate": 4.0339274577674275e-06, "loss": 2.812, "step": 58016 }, { "epoch": 2.84, "grad_norm": 0.7468675374984741, "learning_rate": 4.031411521966332e-06, "loss": 3.0378, "step": 58017 }, { "epoch": 2.84, "grad_norm": 0.7847301959991455, "learning_rate": 4.028896365689516e-06, "loss": 2.9056, "step": 58018 }, { "epoch": 2.84, "grad_norm": 0.7446919083595276, "learning_rate": 4.026381988943672e-06, "loss": 2.8941, "step": 58019 }, { "epoch": 2.84, "grad_norm": 0.7204757332801819, "learning_rate": 4.0238683917353634e-06, "loss": 2.852, "step": 58020 }, { "epoch": 2.84, "grad_norm": 0.7593426704406738, "learning_rate": 4.021355574071217e-06, "loss": 2.8235, "step": 58021 }, { "epoch": 2.84, "grad_norm": 0.7240512371063232, "learning_rate": 4.018843535957861e-06, "loss": 2.7313, "step": 58022 }, { "epoch": 2.84, "grad_norm": 0.7632853388786316, "learning_rate": 4.01633227740189e-06, "loss": 2.9329, "step": 58023 }, { "epoch": 2.84, "grad_norm": 0.753861665725708, "learning_rate": 4.013821798409933e-06, "loss": 2.913, "step": 58024 }, { "epoch": 2.84, "grad_norm": 0.8168571591377258, "learning_rate": 4.0113120989886176e-06, "loss": 2.8373, "step": 58025 }, { "epoch": 2.84, "grad_norm": 0.7445526719093323, "learning_rate": 4.008803179144571e-06, "loss": 2.951, "step": 58026 }, { "epoch": 2.84, "grad_norm": 0.7250993251800537, "learning_rate": 4.006295038884355e-06, "loss": 3.1706, "step": 58027 }, { "epoch": 2.84, "grad_norm": 0.7294281721115112, "learning_rate": 4.003787678214565e-06, "loss": 2.9983, "step": 58028 }, { "epoch": 2.84, "grad_norm": 0.8010278344154358, "learning_rate": 4.0012810971418285e-06, "loss": 2.7596, "step": 58029 }, { "epoch": 2.84, "grad_norm": 0.7343051433563232, "learning_rate": 3.998775295672773e-06, "loss": 2.9007, "step": 58030 }, { "epoch": 2.84, "grad_norm": 0.7521501779556274, "learning_rate": 3.9962702738139955e-06, "loss": 2.9278, "step": 58031 }, { "epoch": 2.84, "grad_norm": 0.7345162630081177, "learning_rate": 3.993766031572055e-06, "loss": 3.1938, "step": 58032 }, { "epoch": 2.84, "grad_norm": 0.7414243817329407, "learning_rate": 3.9912625689535795e-06, "loss": 2.9453, "step": 58033 }, { "epoch": 2.84, "grad_norm": 0.7303183674812317, "learning_rate": 3.988759885965131e-06, "loss": 2.8327, "step": 58034 }, { "epoch": 2.84, "grad_norm": 0.7680356502532959, "learning_rate": 3.986257982613339e-06, "loss": 2.9224, "step": 58035 }, { "epoch": 2.84, "grad_norm": 0.7452159523963928, "learning_rate": 3.983756858904763e-06, "loss": 2.9883, "step": 58036 }, { "epoch": 2.84, "grad_norm": 0.7528615593910217, "learning_rate": 3.981256514845999e-06, "loss": 2.7967, "step": 58037 }, { "epoch": 2.84, "grad_norm": 0.7940719723701477, "learning_rate": 3.97875695044364e-06, "loss": 2.8682, "step": 58038 }, { "epoch": 2.84, "grad_norm": 0.7146586179733276, "learning_rate": 3.9762581657042496e-06, "loss": 3.025, "step": 58039 }, { "epoch": 2.84, "grad_norm": 0.7448782920837402, "learning_rate": 3.973760160634453e-06, "loss": 2.828, "step": 58040 }, { "epoch": 2.84, "grad_norm": 0.7157915830612183, "learning_rate": 3.971262935240782e-06, "loss": 2.9029, "step": 58041 }, { "epoch": 2.84, "grad_norm": 0.824224591255188, "learning_rate": 3.968766489529862e-06, "loss": 2.7699, "step": 58042 }, { "epoch": 2.84, "grad_norm": 0.7506048679351807, "learning_rate": 3.966270823508189e-06, "loss": 2.8328, "step": 58043 }, { "epoch": 2.84, "grad_norm": 0.7083612084388733, "learning_rate": 3.963775937182423e-06, "loss": 2.8861, "step": 58044 }, { "epoch": 2.84, "grad_norm": 0.7859985828399658, "learning_rate": 3.9612818305590934e-06, "loss": 2.6817, "step": 58045 }, { "epoch": 2.84, "grad_norm": 0.7196922898292542, "learning_rate": 3.958788503644761e-06, "loss": 3.0316, "step": 58046 }, { "epoch": 2.84, "grad_norm": 0.7509509325027466, "learning_rate": 3.95629595644602e-06, "loss": 2.9547, "step": 58047 }, { "epoch": 2.84, "grad_norm": 0.7594537138938904, "learning_rate": 3.953804188969401e-06, "loss": 3.013, "step": 58048 }, { "epoch": 2.84, "grad_norm": 0.7385445237159729, "learning_rate": 3.951313201221495e-06, "loss": 2.6765, "step": 58049 }, { "epoch": 2.84, "grad_norm": 0.7691807746887207, "learning_rate": 3.9488229932088665e-06, "loss": 2.778, "step": 58050 }, { "epoch": 2.84, "grad_norm": 0.8221585154533386, "learning_rate": 3.946333564938042e-06, "loss": 2.7925, "step": 58051 }, { "epoch": 2.84, "grad_norm": 0.7647941708564758, "learning_rate": 3.943844916415617e-06, "loss": 2.9116, "step": 58052 }, { "epoch": 2.85, "grad_norm": 0.7364143133163452, "learning_rate": 3.941357047648119e-06, "loss": 2.872, "step": 58053 }, { "epoch": 2.85, "grad_norm": 0.7513567805290222, "learning_rate": 3.9388699586421104e-06, "loss": 2.9102, "step": 58054 }, { "epoch": 2.85, "grad_norm": 0.8263747692108154, "learning_rate": 3.936383649404151e-06, "loss": 3.1531, "step": 58055 }, { "epoch": 2.85, "grad_norm": 0.7924360632896423, "learning_rate": 3.93389811994077e-06, "loss": 3.0245, "step": 58056 }, { "epoch": 2.85, "grad_norm": 0.7682927250862122, "learning_rate": 3.931413370258529e-06, "loss": 2.8791, "step": 58057 }, { "epoch": 2.85, "grad_norm": 0.7378009557723999, "learning_rate": 3.928929400363922e-06, "loss": 2.8736, "step": 58058 }, { "epoch": 2.85, "grad_norm": 0.742211103439331, "learning_rate": 3.926446210263579e-06, "loss": 2.6486, "step": 58059 }, { "epoch": 2.85, "grad_norm": 0.7150681614875793, "learning_rate": 3.923963799963992e-06, "loss": 3.034, "step": 58060 }, { "epoch": 2.85, "grad_norm": 0.726213276386261, "learning_rate": 3.92148216947169e-06, "loss": 2.9354, "step": 58061 }, { "epoch": 2.85, "grad_norm": 0.7555464506149292, "learning_rate": 3.919001318793235e-06, "loss": 2.6879, "step": 58062 }, { "epoch": 2.85, "grad_norm": 0.7286418080329895, "learning_rate": 3.916521247935156e-06, "loss": 2.8126, "step": 58063 }, { "epoch": 2.85, "grad_norm": 0.7420178651809692, "learning_rate": 3.914041956903979e-06, "loss": 2.7675, "step": 58064 }, { "epoch": 2.85, "grad_norm": 0.7674248814582825, "learning_rate": 3.9115634457062e-06, "loss": 3.0303, "step": 58065 }, { "epoch": 2.85, "grad_norm": 0.7217745184898376, "learning_rate": 3.909085714348381e-06, "loss": 2.9029, "step": 58066 }, { "epoch": 2.85, "grad_norm": 0.8204011917114258, "learning_rate": 3.906608762837082e-06, "loss": 2.9336, "step": 58067 }, { "epoch": 2.85, "grad_norm": 0.744408369064331, "learning_rate": 3.904132591178766e-06, "loss": 2.9035, "step": 58068 }, { "epoch": 2.85, "grad_norm": 0.7998400926589966, "learning_rate": 3.901657199379993e-06, "loss": 2.8445, "step": 58069 }, { "epoch": 2.85, "grad_norm": 0.7387979030609131, "learning_rate": 3.899182587447258e-06, "loss": 2.7588, "step": 58070 }, { "epoch": 2.85, "grad_norm": 0.7533370852470398, "learning_rate": 3.89670875538709e-06, "loss": 2.8385, "step": 58071 }, { "epoch": 2.85, "grad_norm": 0.7749383449554443, "learning_rate": 3.8942357032060166e-06, "loss": 2.8858, "step": 58072 }, { "epoch": 2.85, "grad_norm": 0.7244678735733032, "learning_rate": 3.891763430910533e-06, "loss": 2.9102, "step": 58073 }, { "epoch": 2.85, "grad_norm": 0.7526236176490784, "learning_rate": 3.8892919385071664e-06, "loss": 2.7091, "step": 58074 }, { "epoch": 2.85, "grad_norm": 0.7498911619186401, "learning_rate": 3.886821226002379e-06, "loss": 2.7809, "step": 58075 }, { "epoch": 2.85, "grad_norm": 0.8182533383369446, "learning_rate": 3.884351293402766e-06, "loss": 2.8096, "step": 58076 }, { "epoch": 2.85, "grad_norm": 0.7636764645576477, "learning_rate": 3.8818821407147875e-06, "loss": 2.8166, "step": 58077 }, { "epoch": 2.85, "grad_norm": 0.7577216625213623, "learning_rate": 3.879413767944905e-06, "loss": 3.0238, "step": 58078 }, { "epoch": 2.85, "grad_norm": 0.7836326360702515, "learning_rate": 3.876946175099682e-06, "loss": 2.8152, "step": 58079 }, { "epoch": 2.85, "grad_norm": 0.7404009699821472, "learning_rate": 3.874479362185545e-06, "loss": 2.831, "step": 58080 }, { "epoch": 2.85, "grad_norm": 0.7890130877494812, "learning_rate": 3.872013329209089e-06, "loss": 2.7029, "step": 58081 }, { "epoch": 2.85, "grad_norm": 0.821405827999115, "learning_rate": 3.869548076176743e-06, "loss": 2.731, "step": 58082 }, { "epoch": 2.85, "grad_norm": 0.7491986155509949, "learning_rate": 3.867083603095e-06, "loss": 2.6854, "step": 58083 }, { "epoch": 2.85, "grad_norm": 0.7356759309768677, "learning_rate": 3.86461990997039e-06, "loss": 3.0514, "step": 58084 }, { "epoch": 2.85, "grad_norm": 0.7587080597877502, "learning_rate": 3.862156996809407e-06, "loss": 3.1083, "step": 58085 }, { "epoch": 2.85, "grad_norm": 0.7740174531936646, "learning_rate": 3.859694863618445e-06, "loss": 2.9711, "step": 58086 }, { "epoch": 2.85, "grad_norm": 0.761070191860199, "learning_rate": 3.857233510404101e-06, "loss": 3.0597, "step": 58087 }, { "epoch": 2.85, "grad_norm": 0.7623142004013062, "learning_rate": 3.854772937172768e-06, "loss": 2.7424, "step": 58088 }, { "epoch": 2.85, "grad_norm": 0.7792325615882874, "learning_rate": 3.852313143931007e-06, "loss": 2.8965, "step": 58089 }, { "epoch": 2.85, "grad_norm": 0.7377564311027527, "learning_rate": 3.849854130685248e-06, "loss": 2.9561, "step": 58090 }, { "epoch": 2.85, "grad_norm": 0.7853538393974304, "learning_rate": 3.847395897441985e-06, "loss": 3.1712, "step": 58091 }, { "epoch": 2.85, "grad_norm": 0.7288347482681274, "learning_rate": 3.844938444207679e-06, "loss": 2.7964, "step": 58092 }, { "epoch": 2.85, "grad_norm": 0.7623826265335083, "learning_rate": 3.842481770988792e-06, "loss": 2.8275, "step": 58093 }, { "epoch": 2.85, "grad_norm": 0.7762326598167419, "learning_rate": 3.840025877791852e-06, "loss": 2.961, "step": 58094 }, { "epoch": 2.85, "grad_norm": 0.7291229963302612, "learning_rate": 3.837570764623254e-06, "loss": 2.9422, "step": 58095 }, { "epoch": 2.85, "grad_norm": 0.7547798752784729, "learning_rate": 3.8351164314894935e-06, "loss": 2.7757, "step": 58096 }, { "epoch": 2.85, "grad_norm": 0.7603956460952759, "learning_rate": 3.8326628783970306e-06, "loss": 3.2117, "step": 58097 }, { "epoch": 2.85, "grad_norm": 0.7306252121925354, "learning_rate": 3.8302101053523605e-06, "loss": 2.7616, "step": 58098 }, { "epoch": 2.85, "grad_norm": 0.7425011992454529, "learning_rate": 3.8277581123619114e-06, "loss": 2.9913, "step": 58099 }, { "epoch": 2.85, "grad_norm": 0.8286489248275757, "learning_rate": 3.825306899432145e-06, "loss": 2.7551, "step": 58100 }, { "epoch": 2.85, "grad_norm": 0.7279012799263, "learning_rate": 3.8228564665695235e-06, "loss": 2.9012, "step": 58101 }, { "epoch": 2.85, "grad_norm": 0.713568925857544, "learning_rate": 3.820406813780508e-06, "loss": 2.9402, "step": 58102 }, { "epoch": 2.85, "grad_norm": 0.7257249355316162, "learning_rate": 3.8179579410715255e-06, "loss": 2.6947, "step": 58103 }, { "epoch": 2.85, "grad_norm": 0.7878603935241699, "learning_rate": 3.815509848449039e-06, "loss": 2.8945, "step": 58104 }, { "epoch": 2.85, "grad_norm": 0.7164974808692932, "learning_rate": 3.8130625359194756e-06, "loss": 3.0317, "step": 58105 }, { "epoch": 2.85, "grad_norm": 0.731782853603363, "learning_rate": 3.810616003489364e-06, "loss": 2.9602, "step": 58106 }, { "epoch": 2.85, "grad_norm": 0.7645469903945923, "learning_rate": 3.808170251165066e-06, "loss": 2.8414, "step": 58107 }, { "epoch": 2.85, "grad_norm": 0.7678006887435913, "learning_rate": 3.805725278953009e-06, "loss": 2.8792, "step": 58108 }, { "epoch": 2.85, "grad_norm": 0.727119505405426, "learning_rate": 3.8032810868596888e-06, "loss": 3.0004, "step": 58109 }, { "epoch": 2.85, "grad_norm": 0.8259202837944031, "learning_rate": 3.8008376748915324e-06, "loss": 2.7632, "step": 58110 }, { "epoch": 2.85, "grad_norm": 0.7696822285652161, "learning_rate": 3.798395043054936e-06, "loss": 2.7003, "step": 58111 }, { "epoch": 2.85, "grad_norm": 0.753521203994751, "learning_rate": 3.7959531913563932e-06, "loss": 2.9795, "step": 58112 }, { "epoch": 2.85, "grad_norm": 0.7405487298965454, "learning_rate": 3.7935121198023e-06, "loss": 2.8325, "step": 58113 }, { "epoch": 2.85, "grad_norm": 0.7650240063667297, "learning_rate": 3.7910718283990837e-06, "loss": 3.0367, "step": 58114 }, { "epoch": 2.85, "grad_norm": 0.7302567958831787, "learning_rate": 3.788632317153173e-06, "loss": 2.7721, "step": 58115 }, { "epoch": 2.85, "grad_norm": 0.7484518885612488, "learning_rate": 3.786193586070996e-06, "loss": 2.791, "step": 58116 }, { "epoch": 2.85, "grad_norm": 0.7105563282966614, "learning_rate": 3.7837556351589802e-06, "loss": 2.9578, "step": 58117 }, { "epoch": 2.85, "grad_norm": 0.7681797742843628, "learning_rate": 3.7813184644235217e-06, "loss": 3.0148, "step": 58118 }, { "epoch": 2.85, "grad_norm": 0.7496532797813416, "learning_rate": 3.7788820738711145e-06, "loss": 3.0347, "step": 58119 }, { "epoch": 2.85, "grad_norm": 0.7610386610031128, "learning_rate": 3.7764464635080873e-06, "loss": 2.8305, "step": 58120 }, { "epoch": 2.85, "grad_norm": 0.7656939625740051, "learning_rate": 3.774011633340901e-06, "loss": 2.881, "step": 58121 }, { "epoch": 2.85, "grad_norm": 0.7909092307090759, "learning_rate": 3.771577583375951e-06, "loss": 2.9376, "step": 58122 }, { "epoch": 2.85, "grad_norm": 0.7618745565414429, "learning_rate": 3.7691443136196653e-06, "loss": 2.592, "step": 58123 }, { "epoch": 2.85, "grad_norm": 0.7755120992660522, "learning_rate": 3.766711824078439e-06, "loss": 2.8602, "step": 58124 }, { "epoch": 2.85, "grad_norm": 0.7302433252334595, "learning_rate": 3.7642801147586664e-06, "loss": 2.7635, "step": 58125 }, { "epoch": 2.85, "grad_norm": 0.7398534417152405, "learning_rate": 3.7618491856667764e-06, "loss": 2.9394, "step": 58126 }, { "epoch": 2.85, "grad_norm": 0.7428982853889465, "learning_rate": 3.7594190368091638e-06, "loss": 2.8178, "step": 58127 }, { "epoch": 2.85, "grad_norm": 0.7572886347770691, "learning_rate": 3.756989668192256e-06, "loss": 2.7583, "step": 58128 }, { "epoch": 2.85, "grad_norm": 0.7500163912773132, "learning_rate": 3.7545610798224156e-06, "loss": 2.7213, "step": 58129 }, { "epoch": 2.85, "grad_norm": 0.7317116260528564, "learning_rate": 3.7521332717060372e-06, "loss": 2.727, "step": 58130 }, { "epoch": 2.85, "grad_norm": 0.7358356714248657, "learning_rate": 3.749706243849482e-06, "loss": 2.8328, "step": 58131 }, { "epoch": 2.85, "grad_norm": 0.7284991145133972, "learning_rate": 3.747279996259245e-06, "loss": 2.8797, "step": 58132 }, { "epoch": 2.85, "grad_norm": 0.7599244713783264, "learning_rate": 3.744854528941621e-06, "loss": 2.8693, "step": 58133 }, { "epoch": 2.85, "grad_norm": 0.791732907295227, "learning_rate": 3.742429841903072e-06, "loss": 2.6926, "step": 58134 }, { "epoch": 2.85, "grad_norm": 0.7750145792961121, "learning_rate": 3.740005935149959e-06, "loss": 3.0105, "step": 58135 }, { "epoch": 2.85, "grad_norm": 0.7046689391136169, "learning_rate": 3.7375828086886106e-06, "loss": 2.9164, "step": 58136 }, { "epoch": 2.85, "grad_norm": 0.729168176651001, "learning_rate": 3.735160462525455e-06, "loss": 3.0215, "step": 58137 }, { "epoch": 2.85, "grad_norm": 0.7688641548156738, "learning_rate": 3.732738896666887e-06, "loss": 2.8869, "step": 58138 }, { "epoch": 2.85, "grad_norm": 0.7332656383514404, "learning_rate": 3.730318111119268e-06, "loss": 2.721, "step": 58139 }, { "epoch": 2.85, "grad_norm": 0.7618536353111267, "learning_rate": 3.72789810588896e-06, "loss": 2.9654, "step": 58140 }, { "epoch": 2.85, "grad_norm": 0.7432697415351868, "learning_rate": 3.7254788809823577e-06, "loss": 2.8195, "step": 58141 }, { "epoch": 2.85, "grad_norm": 0.7419174313545227, "learning_rate": 3.7230604364057894e-06, "loss": 2.9438, "step": 58142 }, { "epoch": 2.85, "grad_norm": 0.7732837200164795, "learning_rate": 3.7206427721657162e-06, "loss": 2.9231, "step": 58143 }, { "epoch": 2.85, "grad_norm": 0.7398931980133057, "learning_rate": 3.7182258882684333e-06, "loss": 2.7001, "step": 58144 }, { "epoch": 2.85, "grad_norm": 0.7343015670776367, "learning_rate": 3.715809784720303e-06, "loss": 2.7543, "step": 58145 }, { "epoch": 2.85, "grad_norm": 0.7587280869483948, "learning_rate": 3.713394461527719e-06, "loss": 2.8208, "step": 58146 }, { "epoch": 2.85, "grad_norm": 0.74312824010849, "learning_rate": 3.7109799186970437e-06, "loss": 2.7435, "step": 58147 }, { "epoch": 2.85, "grad_norm": 0.7362629175186157, "learning_rate": 3.708566156234605e-06, "loss": 2.8137, "step": 58148 }, { "epoch": 2.85, "grad_norm": 0.7702608108520508, "learning_rate": 3.7061531741467975e-06, "loss": 2.9806, "step": 58149 }, { "epoch": 2.85, "grad_norm": 0.7515386343002319, "learning_rate": 3.7037409724399504e-06, "loss": 2.8289, "step": 58150 }, { "epoch": 2.85, "grad_norm": 0.762421190738678, "learning_rate": 3.7013295511204576e-06, "loss": 2.8378, "step": 58151 }, { "epoch": 2.85, "grad_norm": 0.7281259298324585, "learning_rate": 3.6989189101945817e-06, "loss": 2.8135, "step": 58152 }, { "epoch": 2.85, "grad_norm": 0.782008171081543, "learning_rate": 3.69650904966875e-06, "loss": 3.1768, "step": 58153 }, { "epoch": 2.85, "grad_norm": 0.7807068824768066, "learning_rate": 3.6940999695493247e-06, "loss": 2.9561, "step": 58154 }, { "epoch": 2.85, "grad_norm": 0.8265916705131531, "learning_rate": 3.691691669842567e-06, "loss": 2.907, "step": 58155 }, { "epoch": 2.85, "grad_norm": 0.7538347244262695, "learning_rate": 3.689284150554905e-06, "loss": 2.9961, "step": 58156 }, { "epoch": 2.85, "grad_norm": 0.7269682288169861, "learning_rate": 3.6868774116926346e-06, "loss": 2.894, "step": 58157 }, { "epoch": 2.85, "grad_norm": 0.7205626964569092, "learning_rate": 3.6844714532620836e-06, "loss": 2.9546, "step": 58158 }, { "epoch": 2.85, "grad_norm": 0.7215340733528137, "learning_rate": 3.6820662752696127e-06, "loss": 2.8961, "step": 58159 }, { "epoch": 2.85, "grad_norm": 0.7290651202201843, "learning_rate": 3.6796618777215516e-06, "loss": 2.9625, "step": 58160 }, { "epoch": 2.85, "grad_norm": 0.7407297492027283, "learning_rate": 3.6772582606242605e-06, "loss": 2.9215, "step": 58161 }, { "epoch": 2.85, "grad_norm": 0.8198636770248413, "learning_rate": 3.6748554239840023e-06, "loss": 2.954, "step": 58162 }, { "epoch": 2.85, "grad_norm": 0.7506884932518005, "learning_rate": 3.6724533678071376e-06, "loss": 2.9087, "step": 58163 }, { "epoch": 2.85, "grad_norm": 0.7431238889694214, "learning_rate": 3.670052092100062e-06, "loss": 2.8626, "step": 58164 }, { "epoch": 2.85, "grad_norm": 0.7417303323745728, "learning_rate": 3.6676515968690035e-06, "loss": 3.2512, "step": 58165 }, { "epoch": 2.85, "grad_norm": 0.7299365997314453, "learning_rate": 3.6652518821203236e-06, "loss": 2.8178, "step": 58166 }, { "epoch": 2.85, "grad_norm": 0.7602360248565674, "learning_rate": 3.6628529478603176e-06, "loss": 2.8408, "step": 58167 }, { "epoch": 2.85, "grad_norm": 0.7500886917114258, "learning_rate": 3.6604547940953468e-06, "loss": 2.9354, "step": 58168 }, { "epoch": 2.85, "grad_norm": 0.740761399269104, "learning_rate": 3.6580574208317058e-06, "loss": 3.0434, "step": 58169 }, { "epoch": 2.85, "grad_norm": 0.7828353643417358, "learning_rate": 3.6556608280757237e-06, "loss": 2.9932, "step": 58170 }, { "epoch": 2.85, "grad_norm": 0.8029820322990417, "learning_rate": 3.6532650158336952e-06, "loss": 2.9728, "step": 58171 }, { "epoch": 2.85, "grad_norm": 0.8066024780273438, "learning_rate": 3.650869984111915e-06, "loss": 2.7727, "step": 58172 }, { "epoch": 2.85, "grad_norm": 0.7479804158210754, "learning_rate": 3.6484757329167115e-06, "loss": 2.7737, "step": 58173 }, { "epoch": 2.85, "grad_norm": 0.7437139749526978, "learning_rate": 3.6460822622544126e-06, "loss": 2.8592, "step": 58174 }, { "epoch": 2.85, "grad_norm": 0.7401143312454224, "learning_rate": 3.6436895721312476e-06, "loss": 2.7352, "step": 58175 }, { "epoch": 2.85, "grad_norm": 0.7213879823684692, "learning_rate": 3.6412976625536105e-06, "loss": 3.0583, "step": 58176 }, { "epoch": 2.85, "grad_norm": 0.7483210563659668, "learning_rate": 3.6389065335277632e-06, "loss": 2.8794, "step": 58177 }, { "epoch": 2.85, "grad_norm": 0.7725558280944824, "learning_rate": 3.636516185059968e-06, "loss": 2.8978, "step": 58178 }, { "epoch": 2.85, "grad_norm": 0.7521719336509705, "learning_rate": 3.6341266171566185e-06, "loss": 2.8083, "step": 58179 }, { "epoch": 2.85, "grad_norm": 0.7107489705085754, "learning_rate": 3.6317378298238774e-06, "loss": 2.9908, "step": 58180 }, { "epoch": 2.85, "grad_norm": 0.7555409669876099, "learning_rate": 3.6293498230681397e-06, "loss": 2.6925, "step": 58181 }, { "epoch": 2.85, "grad_norm": 0.7039787769317627, "learning_rate": 3.6269625968955997e-06, "loss": 2.9181, "step": 58182 }, { "epoch": 2.85, "grad_norm": 0.7815123200416565, "learning_rate": 3.6245761513126525e-06, "loss": 2.9034, "step": 58183 }, { "epoch": 2.85, "grad_norm": 0.8096544146537781, "learning_rate": 3.622190486325527e-06, "loss": 2.9037, "step": 58184 }, { "epoch": 2.85, "grad_norm": 0.7660767436027527, "learning_rate": 3.6198056019404843e-06, "loss": 2.7394, "step": 58185 }, { "epoch": 2.85, "grad_norm": 0.7837504744529724, "learning_rate": 3.617421498163886e-06, "loss": 2.9246, "step": 58186 }, { "epoch": 2.85, "grad_norm": 0.7592591643333435, "learning_rate": 3.6150381750019276e-06, "loss": 2.9072, "step": 58187 }, { "epoch": 2.85, "grad_norm": 0.7563009858131409, "learning_rate": 3.6126556324609034e-06, "loss": 2.9115, "step": 58188 }, { "epoch": 2.85, "grad_norm": 0.7264691591262817, "learning_rate": 3.6102738705471425e-06, "loss": 2.9466, "step": 58189 }, { "epoch": 2.85, "grad_norm": 0.6998595595359802, "learning_rate": 3.6078928892668392e-06, "loss": 3.0228, "step": 58190 }, { "epoch": 2.85, "grad_norm": 0.7344692945480347, "learning_rate": 3.6055126886263554e-06, "loss": 2.8275, "step": 58191 }, { "epoch": 2.85, "grad_norm": 0.7686775326728821, "learning_rate": 3.6031332686318525e-06, "loss": 2.7886, "step": 58192 }, { "epoch": 2.85, "grad_norm": 0.8161377906799316, "learning_rate": 3.600754629289693e-06, "loss": 2.9067, "step": 58193 }, { "epoch": 2.85, "grad_norm": 0.7631113529205322, "learning_rate": 3.598376770606104e-06, "loss": 2.8216, "step": 58194 }, { "epoch": 2.85, "grad_norm": 0.7431334853172302, "learning_rate": 3.5959996925873145e-06, "loss": 2.821, "step": 58195 }, { "epoch": 2.85, "grad_norm": 0.7493770718574524, "learning_rate": 3.593623395239653e-06, "loss": 2.7067, "step": 58196 }, { "epoch": 2.85, "grad_norm": 0.7191861271858215, "learning_rate": 3.5912478785693144e-06, "loss": 2.8691, "step": 58197 }, { "epoch": 2.85, "grad_norm": 0.8084812760353088, "learning_rate": 3.5888731425825603e-06, "loss": 3.0337, "step": 58198 }, { "epoch": 2.85, "grad_norm": 0.7551262974739075, "learning_rate": 3.5864991872857186e-06, "loss": 2.9638, "step": 58199 }, { "epoch": 2.85, "grad_norm": 0.7352034449577332, "learning_rate": 3.5841260126849515e-06, "loss": 2.9625, "step": 58200 }, { "epoch": 2.85, "grad_norm": 0.7375224232673645, "learning_rate": 3.581753618786587e-06, "loss": 2.8832, "step": 58201 }, { "epoch": 2.85, "grad_norm": 0.7833859920501709, "learning_rate": 3.5793820055968535e-06, "loss": 2.7742, "step": 58202 }, { "epoch": 2.85, "grad_norm": 0.7902634143829346, "learning_rate": 3.577011173121913e-06, "loss": 3.0064, "step": 58203 }, { "epoch": 2.85, "grad_norm": 0.7929283380508423, "learning_rate": 3.574641121368127e-06, "loss": 3.0257, "step": 58204 }, { "epoch": 2.85, "grad_norm": 0.7383139729499817, "learning_rate": 3.5722718503416904e-06, "loss": 2.9243, "step": 58205 }, { "epoch": 2.85, "grad_norm": 0.7614343762397766, "learning_rate": 3.5699033600488313e-06, "loss": 3.0933, "step": 58206 }, { "epoch": 2.85, "grad_norm": 0.7375363707542419, "learning_rate": 3.5675356504957786e-06, "loss": 2.8334, "step": 58207 }, { "epoch": 2.85, "grad_norm": 0.7111465930938721, "learning_rate": 3.5651687216887938e-06, "loss": 2.6562, "step": 58208 }, { "epoch": 2.85, "grad_norm": 0.7754095196723938, "learning_rate": 3.5628025736341383e-06, "loss": 2.7742, "step": 58209 }, { "epoch": 2.85, "grad_norm": 0.7329739332199097, "learning_rate": 3.5604372063379737e-06, "loss": 2.8782, "step": 58210 }, { "epoch": 2.85, "grad_norm": 0.7588919401168823, "learning_rate": 3.5580726198065958e-06, "loss": 2.7934, "step": 58211 }, { "epoch": 2.85, "grad_norm": 0.7436473369598389, "learning_rate": 3.5557088140461655e-06, "loss": 2.8764, "step": 58212 }, { "epoch": 2.85, "grad_norm": 0.7899497151374817, "learning_rate": 3.553345789062978e-06, "loss": 2.7362, "step": 58213 }, { "epoch": 2.85, "grad_norm": 0.7157718539237976, "learning_rate": 3.550983544863195e-06, "loss": 3.0378, "step": 58214 }, { "epoch": 2.85, "grad_norm": 0.7522872686386108, "learning_rate": 3.548622081453112e-06, "loss": 2.7286, "step": 58215 }, { "epoch": 2.85, "grad_norm": 0.7334341406822205, "learning_rate": 3.5462613988388898e-06, "loss": 2.8558, "step": 58216 }, { "epoch": 2.85, "grad_norm": 0.7479165196418762, "learning_rate": 3.543901497026724e-06, "loss": 2.782, "step": 58217 }, { "epoch": 2.85, "grad_norm": 0.744875967502594, "learning_rate": 3.54154237602291e-06, "loss": 2.9049, "step": 58218 }, { "epoch": 2.85, "grad_norm": 0.7260187864303589, "learning_rate": 3.539184035833609e-06, "loss": 2.8262, "step": 58219 }, { "epoch": 2.85, "grad_norm": 0.7581130266189575, "learning_rate": 3.5368264764650157e-06, "loss": 2.9337, "step": 58220 }, { "epoch": 2.85, "grad_norm": 0.745539128780365, "learning_rate": 3.534469697923392e-06, "loss": 3.1823, "step": 58221 }, { "epoch": 2.85, "grad_norm": 0.7698248028755188, "learning_rate": 3.5321137002149e-06, "loss": 2.9272, "step": 58222 }, { "epoch": 2.85, "grad_norm": 0.7561793923377991, "learning_rate": 3.5297584833458015e-06, "loss": 2.6776, "step": 58223 }, { "epoch": 2.85, "grad_norm": 0.7512466311454773, "learning_rate": 3.5274040473222578e-06, "loss": 2.94, "step": 58224 }, { "epoch": 2.85, "grad_norm": 0.7217196226119995, "learning_rate": 3.5250503921504305e-06, "loss": 2.9191, "step": 58225 }, { "epoch": 2.85, "grad_norm": 0.7432135343551636, "learning_rate": 3.5226975178366144e-06, "loss": 3.0536, "step": 58226 }, { "epoch": 2.85, "grad_norm": 0.7715436220169067, "learning_rate": 3.5203454243869056e-06, "loss": 2.866, "step": 58227 }, { "epoch": 2.85, "grad_norm": 0.765325129032135, "learning_rate": 3.517994111807565e-06, "loss": 2.8093, "step": 58228 }, { "epoch": 2.85, "grad_norm": 0.7218096256256104, "learning_rate": 3.5156435801047877e-06, "loss": 2.7881, "step": 58229 }, { "epoch": 2.85, "grad_norm": 0.7781936526298523, "learning_rate": 3.5132938292847355e-06, "loss": 2.9954, "step": 58230 }, { "epoch": 2.85, "grad_norm": 0.759574294090271, "learning_rate": 3.510944859353604e-06, "loss": 2.7673, "step": 58231 }, { "epoch": 2.85, "grad_norm": 0.723047137260437, "learning_rate": 3.508596670317587e-06, "loss": 2.9904, "step": 58232 }, { "epoch": 2.85, "grad_norm": 0.7782467007637024, "learning_rate": 3.506249262182881e-06, "loss": 2.993, "step": 58233 }, { "epoch": 2.85, "grad_norm": 0.7288681864738464, "learning_rate": 3.5039026349556133e-06, "loss": 2.8057, "step": 58234 }, { "epoch": 2.85, "grad_norm": 0.7624087333679199, "learning_rate": 3.5015567886420125e-06, "loss": 2.8476, "step": 58235 }, { "epoch": 2.85, "grad_norm": 0.7774996757507324, "learning_rate": 3.499211723248274e-06, "loss": 2.8622, "step": 58236 }, { "epoch": 2.85, "grad_norm": 0.7889575958251953, "learning_rate": 3.4968674387805597e-06, "loss": 2.9525, "step": 58237 }, { "epoch": 2.85, "grad_norm": 0.8073905110359192, "learning_rate": 3.4945239352449974e-06, "loss": 2.9227, "step": 58238 }, { "epoch": 2.85, "grad_norm": 0.781495988368988, "learning_rate": 3.4921812126478153e-06, "loss": 3.019, "step": 58239 }, { "epoch": 2.85, "grad_norm": 0.7901045680046082, "learning_rate": 3.489839270995143e-06, "loss": 2.8305, "step": 58240 }, { "epoch": 2.85, "grad_norm": 0.8189294934272766, "learning_rate": 3.4874981102932074e-06, "loss": 2.8926, "step": 58241 }, { "epoch": 2.85, "grad_norm": 0.7915399670600891, "learning_rate": 3.4851577305481047e-06, "loss": 2.8993, "step": 58242 }, { "epoch": 2.85, "grad_norm": 0.7558654546737671, "learning_rate": 3.4828181317660296e-06, "loss": 2.8288, "step": 58243 }, { "epoch": 2.85, "grad_norm": 0.7701000571250916, "learning_rate": 3.480479313953144e-06, "loss": 2.6183, "step": 58244 }, { "epoch": 2.85, "grad_norm": 0.7547457814216614, "learning_rate": 3.4781412771156424e-06, "loss": 2.9023, "step": 58245 }, { "epoch": 2.85, "grad_norm": 0.7245228290557861, "learning_rate": 3.4758040212596206e-06, "loss": 2.8595, "step": 58246 }, { "epoch": 2.85, "grad_norm": 0.7754863500595093, "learning_rate": 3.473467546391273e-06, "loss": 2.9257, "step": 58247 }, { "epoch": 2.85, "grad_norm": 0.7499856948852539, "learning_rate": 3.471131852516762e-06, "loss": 2.9872, "step": 58248 }, { "epoch": 2.85, "grad_norm": 0.7747548222541809, "learning_rate": 3.468796939642182e-06, "loss": 2.9552, "step": 58249 }, { "epoch": 2.85, "grad_norm": 0.8232293128967285, "learning_rate": 3.466462807773729e-06, "loss": 3.0993, "step": 58250 }, { "epoch": 2.85, "grad_norm": 0.7535151839256287, "learning_rate": 3.464129456917564e-06, "loss": 3.1768, "step": 58251 }, { "epoch": 2.85, "grad_norm": 0.7690832018852234, "learning_rate": 3.4617968870798154e-06, "loss": 2.5916, "step": 58252 }, { "epoch": 2.85, "grad_norm": 0.7519869208335876, "learning_rate": 3.459465098266645e-06, "loss": 2.8841, "step": 58253 }, { "epoch": 2.85, "grad_norm": 0.7166727185249329, "learning_rate": 3.457134090484115e-06, "loss": 2.9734, "step": 58254 }, { "epoch": 2.85, "grad_norm": 0.742997944355011, "learning_rate": 3.4548038637384536e-06, "loss": 2.9862, "step": 58255 }, { "epoch": 2.85, "grad_norm": 0.7113354802131653, "learning_rate": 3.452474418035789e-06, "loss": 2.8711, "step": 58256 }, { "epoch": 2.86, "grad_norm": 0.785720944404602, "learning_rate": 3.450145753382183e-06, "loss": 2.9458, "step": 58257 }, { "epoch": 2.86, "grad_norm": 0.7967292666435242, "learning_rate": 3.4478178697838644e-06, "loss": 2.5159, "step": 58258 }, { "epoch": 2.86, "grad_norm": 0.7993360757827759, "learning_rate": 3.445490767246928e-06, "loss": 2.8102, "step": 58259 }, { "epoch": 2.86, "grad_norm": 0.7960979342460632, "learning_rate": 3.4431644457774356e-06, "loss": 3.063, "step": 58260 }, { "epoch": 2.86, "grad_norm": 0.7577957510948181, "learning_rate": 3.4408389053816486e-06, "loss": 2.733, "step": 58261 }, { "epoch": 2.86, "grad_norm": 0.8428319692611694, "learning_rate": 3.438514146065563e-06, "loss": 2.8959, "step": 58262 }, { "epoch": 2.86, "grad_norm": 0.740744411945343, "learning_rate": 3.4361901678353734e-06, "loss": 3.0609, "step": 58263 }, { "epoch": 2.86, "grad_norm": 0.7100527882575989, "learning_rate": 3.4338669706971743e-06, "loss": 2.7642, "step": 58264 }, { "epoch": 2.86, "grad_norm": 0.8007273077964783, "learning_rate": 3.4315445546570953e-06, "loss": 2.9573, "step": 58265 }, { "epoch": 2.86, "grad_norm": 0.7537129521369934, "learning_rate": 3.429222919721297e-06, "loss": 2.8178, "step": 58266 }, { "epoch": 2.86, "grad_norm": 0.8160079717636108, "learning_rate": 3.4269020658958093e-06, "loss": 2.7717, "step": 58267 }, { "epoch": 2.86, "grad_norm": 0.7378734350204468, "learning_rate": 3.4245819931867924e-06, "loss": 2.7693, "step": 58268 }, { "epoch": 2.86, "grad_norm": 0.7598668932914734, "learning_rate": 3.422262701600342e-06, "loss": 3.0711, "step": 58269 }, { "epoch": 2.86, "grad_norm": 0.7291812300682068, "learning_rate": 3.4199441911425874e-06, "loss": 2.9735, "step": 58270 }, { "epoch": 2.86, "grad_norm": 0.8131197094917297, "learning_rate": 3.4176264618196224e-06, "loss": 2.922, "step": 58271 }, { "epoch": 2.86, "grad_norm": 0.7432801127433777, "learning_rate": 3.415309513637543e-06, "loss": 2.7121, "step": 58272 }, { "epoch": 2.86, "grad_norm": 0.7480484843254089, "learning_rate": 3.412993346602444e-06, "loss": 3.0394, "step": 58273 }, { "epoch": 2.86, "grad_norm": 0.7310391664505005, "learning_rate": 3.410677960720487e-06, "loss": 3.0235, "step": 58274 }, { "epoch": 2.86, "grad_norm": 0.7628144025802612, "learning_rate": 3.4083633559976676e-06, "loss": 2.85, "step": 58275 }, { "epoch": 2.86, "grad_norm": 0.7469602227210999, "learning_rate": 3.4060495324401803e-06, "loss": 2.8982, "step": 58276 }, { "epoch": 2.86, "grad_norm": 0.8014663457870483, "learning_rate": 3.4037364900540544e-06, "loss": 2.9782, "step": 58277 }, { "epoch": 2.86, "grad_norm": 0.7652974724769592, "learning_rate": 3.4014242288454176e-06, "loss": 2.901, "step": 58278 }, { "epoch": 2.86, "grad_norm": 0.7459307909011841, "learning_rate": 3.3991127488203318e-06, "loss": 2.9058, "step": 58279 }, { "epoch": 2.86, "grad_norm": 0.8366508483886719, "learning_rate": 3.3968020499849256e-06, "loss": 2.9857, "step": 58280 }, { "epoch": 2.86, "grad_norm": 0.7131357192993164, "learning_rate": 3.3944921323452613e-06, "loss": 2.8848, "step": 58281 }, { "epoch": 2.86, "grad_norm": 0.7664152383804321, "learning_rate": 3.3921829959074e-06, "loss": 2.9115, "step": 58282 }, { "epoch": 2.86, "grad_norm": 0.7806337475776672, "learning_rate": 3.3898746406774703e-06, "loss": 2.8592, "step": 58283 }, { "epoch": 2.86, "grad_norm": 0.7475855350494385, "learning_rate": 3.3875670666615007e-06, "loss": 2.9584, "step": 58284 }, { "epoch": 2.86, "grad_norm": 0.754424512386322, "learning_rate": 3.3852602738655866e-06, "loss": 2.8993, "step": 58285 }, { "epoch": 2.86, "grad_norm": 0.7555055022239685, "learning_rate": 3.3829542622958563e-06, "loss": 3.0222, "step": 58286 }, { "epoch": 2.86, "grad_norm": 0.7311819791793823, "learning_rate": 3.3806490319583047e-06, "loss": 2.8419, "step": 58287 }, { "epoch": 2.86, "grad_norm": 0.7578787803649902, "learning_rate": 3.3783445828590605e-06, "loss": 2.871, "step": 58288 }, { "epoch": 2.86, "grad_norm": 0.9047726392745972, "learning_rate": 3.3760409150041524e-06, "loss": 2.8261, "step": 58289 }, { "epoch": 2.86, "grad_norm": 0.7717176079750061, "learning_rate": 3.373738028399675e-06, "loss": 2.8182, "step": 58290 }, { "epoch": 2.86, "grad_norm": 0.7483989000320435, "learning_rate": 3.3714359230516906e-06, "loss": 2.8682, "step": 58291 }, { "epoch": 2.86, "grad_norm": 0.8349034786224365, "learning_rate": 3.369134598966228e-06, "loss": 2.9128, "step": 58292 }, { "epoch": 2.86, "grad_norm": 0.7524193525314331, "learning_rate": 3.3668340561494145e-06, "loss": 2.8352, "step": 58293 }, { "epoch": 2.86, "grad_norm": 0.7450447082519531, "learning_rate": 3.364534294607246e-06, "loss": 3.0256, "step": 58294 }, { "epoch": 2.86, "grad_norm": 0.7853724360466003, "learning_rate": 3.362235314345818e-06, "loss": 2.8471, "step": 58295 }, { "epoch": 2.86, "grad_norm": 0.8045984506607056, "learning_rate": 3.3599371153711917e-06, "loss": 2.9097, "step": 58296 }, { "epoch": 2.86, "grad_norm": 0.7121866941452026, "learning_rate": 3.3576396976893626e-06, "loss": 2.7478, "step": 58297 }, { "epoch": 2.86, "grad_norm": 0.7629473805427551, "learning_rate": 3.355343061306459e-06, "loss": 2.8422, "step": 58298 }, { "epoch": 2.86, "grad_norm": 0.7671418786048889, "learning_rate": 3.353047206228443e-06, "loss": 3.1245, "step": 58299 }, { "epoch": 2.86, "grad_norm": 0.7228366136550903, "learning_rate": 3.350752132461443e-06, "loss": 3.0361, "step": 58300 }, { "epoch": 2.86, "grad_norm": 0.7080792188644409, "learning_rate": 3.3484578400114537e-06, "loss": 3.03, "step": 58301 }, { "epoch": 2.86, "grad_norm": 0.7466286420822144, "learning_rate": 3.3461643288845375e-06, "loss": 2.765, "step": 58302 }, { "epoch": 2.86, "grad_norm": 0.7471815943717957, "learning_rate": 3.343871599086756e-06, "loss": 2.9213, "step": 58303 }, { "epoch": 2.86, "grad_norm": 0.7371976375579834, "learning_rate": 3.3415796506241045e-06, "loss": 2.9685, "step": 58304 }, { "epoch": 2.86, "grad_norm": 0.6997452974319458, "learning_rate": 3.3392884835026444e-06, "loss": 2.9548, "step": 58305 }, { "epoch": 2.86, "grad_norm": 0.801490068435669, "learning_rate": 3.336998097728405e-06, "loss": 3.1318, "step": 58306 }, { "epoch": 2.86, "grad_norm": 0.7446818947792053, "learning_rate": 3.3347084933074143e-06, "loss": 2.9129, "step": 58307 }, { "epoch": 2.86, "grad_norm": 0.7398616075515747, "learning_rate": 3.3324196702457006e-06, "loss": 2.9168, "step": 58308 }, { "epoch": 2.86, "grad_norm": 0.7449392676353455, "learning_rate": 3.3301316285493263e-06, "loss": 3.0701, "step": 58309 }, { "epoch": 2.86, "grad_norm": 0.7428542375564575, "learning_rate": 3.327844368224253e-06, "loss": 2.6963, "step": 58310 }, { "epoch": 2.86, "grad_norm": 0.7714420557022095, "learning_rate": 3.3255578892765753e-06, "loss": 2.8949, "step": 58311 }, { "epoch": 2.86, "grad_norm": 0.7456600069999695, "learning_rate": 3.323272191712256e-06, "loss": 2.841, "step": 58312 }, { "epoch": 2.86, "grad_norm": 0.7867556810379028, "learning_rate": 3.3209872755373566e-06, "loss": 2.8416, "step": 58313 }, { "epoch": 2.86, "grad_norm": 0.7435100674629211, "learning_rate": 3.318703140757872e-06, "loss": 2.958, "step": 58314 }, { "epoch": 2.86, "grad_norm": 0.752841591835022, "learning_rate": 3.3164197873798316e-06, "loss": 3.1411, "step": 58315 }, { "epoch": 2.86, "grad_norm": 0.7200860381126404, "learning_rate": 3.3141372154092293e-06, "loss": 2.8908, "step": 58316 }, { "epoch": 2.86, "grad_norm": 0.6982949376106262, "learning_rate": 3.311855424852128e-06, "loss": 2.5464, "step": 58317 }, { "epoch": 2.86, "grad_norm": 0.7470500469207764, "learning_rate": 3.309574415714489e-06, "loss": 2.7845, "step": 58318 }, { "epoch": 2.86, "grad_norm": 0.7575530409812927, "learning_rate": 3.3072941880023075e-06, "loss": 2.935, "step": 58319 }, { "epoch": 2.86, "grad_norm": 0.7366733551025391, "learning_rate": 3.305014741721612e-06, "loss": 3.0839, "step": 58320 }, { "epoch": 2.86, "grad_norm": 0.7596519589424133, "learning_rate": 3.3027360768784318e-06, "loss": 2.7866, "step": 58321 }, { "epoch": 2.86, "grad_norm": 0.7350767254829407, "learning_rate": 3.3004581934787276e-06, "loss": 2.8478, "step": 58322 }, { "epoch": 2.86, "grad_norm": 0.730754554271698, "learning_rate": 3.298181091528529e-06, "loss": 2.8066, "step": 58323 }, { "epoch": 2.86, "grad_norm": 0.7694946527481079, "learning_rate": 3.295904771033797e-06, "loss": 2.6976, "step": 58324 }, { "epoch": 2.86, "grad_norm": 0.7357961535453796, "learning_rate": 3.293629232000561e-06, "loss": 2.8541, "step": 58325 }, { "epoch": 2.86, "grad_norm": 0.7392043471336365, "learning_rate": 3.2913544744348152e-06, "loss": 2.7927, "step": 58326 }, { "epoch": 2.86, "grad_norm": 0.7261336445808411, "learning_rate": 3.289080498342522e-06, "loss": 2.9183, "step": 58327 }, { "epoch": 2.86, "grad_norm": 0.7417194843292236, "learning_rate": 3.2868073037297104e-06, "loss": 2.9671, "step": 58328 }, { "epoch": 2.86, "grad_norm": 0.7913023829460144, "learning_rate": 3.2845348906023084e-06, "loss": 3.0921, "step": 58329 }, { "epoch": 2.86, "grad_norm": 0.7364912033081055, "learning_rate": 3.2822632589663776e-06, "loss": 2.9238, "step": 58330 }, { "epoch": 2.86, "grad_norm": 0.7688376903533936, "learning_rate": 3.279992408827847e-06, "loss": 2.9304, "step": 58331 }, { "epoch": 2.86, "grad_norm": 0.7457802295684814, "learning_rate": 3.277722340192712e-06, "loss": 3.1047, "step": 58332 }, { "epoch": 2.86, "grad_norm": 0.74704509973526, "learning_rate": 3.2754530530669343e-06, "loss": 2.9558, "step": 58333 }, { "epoch": 2.86, "grad_norm": 0.7239862680435181, "learning_rate": 3.2731845474565086e-06, "loss": 2.8178, "step": 58334 }, { "epoch": 2.86, "grad_norm": 0.7056124806404114, "learning_rate": 3.270916823367431e-06, "loss": 3.0095, "step": 58335 }, { "epoch": 2.86, "grad_norm": 0.7435981035232544, "learning_rate": 3.2686498808056296e-06, "loss": 2.7761, "step": 58336 }, { "epoch": 2.86, "grad_norm": 0.7090030908584595, "learning_rate": 3.266383719777099e-06, "loss": 2.9752, "step": 58337 }, { "epoch": 2.86, "grad_norm": 0.8070139288902283, "learning_rate": 3.2641183402878356e-06, "loss": 2.9494, "step": 58338 }, { "epoch": 2.86, "grad_norm": 0.7761786580085754, "learning_rate": 3.2618537423437672e-06, "loss": 2.7368, "step": 58339 }, { "epoch": 2.86, "grad_norm": 0.7421362400054932, "learning_rate": 3.259589925950856e-06, "loss": 2.8487, "step": 58340 }, { "epoch": 2.86, "grad_norm": 0.8992583155632019, "learning_rate": 3.257326891115064e-06, "loss": 2.9035, "step": 58341 }, { "epoch": 2.86, "grad_norm": 0.7621567249298096, "learning_rate": 3.255064637842386e-06, "loss": 2.8351, "step": 58342 }, { "epoch": 2.86, "grad_norm": 0.7418533563613892, "learning_rate": 3.252803166138751e-06, "loss": 3.0166, "step": 58343 }, { "epoch": 2.86, "grad_norm": 0.7442709803581238, "learning_rate": 3.2505424760101207e-06, "loss": 2.6756, "step": 58344 }, { "epoch": 2.86, "grad_norm": 0.7631102800369263, "learning_rate": 3.248282567462457e-06, "loss": 3.0295, "step": 58345 }, { "epoch": 2.86, "grad_norm": 0.8174268007278442, "learning_rate": 3.246023440501688e-06, "loss": 2.9048, "step": 58346 }, { "epoch": 2.86, "grad_norm": 0.736268162727356, "learning_rate": 3.243765095133777e-06, "loss": 2.951, "step": 58347 }, { "epoch": 2.86, "grad_norm": 0.7341980934143066, "learning_rate": 3.2415075313647177e-06, "loss": 2.8567, "step": 58348 }, { "epoch": 2.86, "grad_norm": 0.7812278866767883, "learning_rate": 3.239250749200373e-06, "loss": 2.882, "step": 58349 }, { "epoch": 2.86, "grad_norm": 0.7396385073661804, "learning_rate": 3.2369947486467707e-06, "loss": 3.0547, "step": 58350 }, { "epoch": 2.86, "grad_norm": 0.7556043267250061, "learning_rate": 3.2347395297097732e-06, "loss": 2.7976, "step": 58351 }, { "epoch": 2.86, "grad_norm": 0.7392325401306152, "learning_rate": 3.2324850923953426e-06, "loss": 2.9214, "step": 58352 }, { "epoch": 2.86, "grad_norm": 0.7273942232131958, "learning_rate": 3.230231436709474e-06, "loss": 2.8526, "step": 58353 }, { "epoch": 2.86, "grad_norm": 0.7350383996963501, "learning_rate": 3.2279785626580622e-06, "loss": 3.0958, "step": 58354 }, { "epoch": 2.86, "grad_norm": 0.7365357875823975, "learning_rate": 3.2257264702470363e-06, "loss": 2.8979, "step": 58355 }, { "epoch": 2.86, "grad_norm": 0.742205023765564, "learning_rate": 3.223475159482325e-06, "loss": 2.8678, "step": 58356 }, { "epoch": 2.86, "grad_norm": 0.7722266912460327, "learning_rate": 3.2212246303698565e-06, "loss": 2.7893, "step": 58357 }, { "epoch": 2.86, "grad_norm": 0.7623163461685181, "learning_rate": 3.2189748829155593e-06, "loss": 2.8126, "step": 58358 }, { "epoch": 2.86, "grad_norm": 0.794621467590332, "learning_rate": 3.2167259171253625e-06, "loss": 2.8671, "step": 58359 }, { "epoch": 2.86, "grad_norm": 0.737347424030304, "learning_rate": 3.2144777330052274e-06, "loss": 2.8197, "step": 58360 }, { "epoch": 2.86, "grad_norm": 0.7127636075019836, "learning_rate": 3.2122303305610162e-06, "loss": 2.803, "step": 58361 }, { "epoch": 2.86, "grad_norm": 0.7416470646858215, "learning_rate": 3.2099837097986914e-06, "loss": 2.7396, "step": 58362 }, { "epoch": 2.86, "grad_norm": 0.7706217765808105, "learning_rate": 3.2077378707241142e-06, "loss": 2.7705, "step": 58363 }, { "epoch": 2.86, "grad_norm": 0.7568953037261963, "learning_rate": 3.2054928133432467e-06, "loss": 2.9229, "step": 58364 }, { "epoch": 2.86, "grad_norm": 0.7913509607315063, "learning_rate": 3.2032485376620175e-06, "loss": 2.9663, "step": 58365 }, { "epoch": 2.86, "grad_norm": 0.777898371219635, "learning_rate": 3.2010050436862887e-06, "loss": 2.7852, "step": 58366 }, { "epoch": 2.86, "grad_norm": 0.7000056505203247, "learning_rate": 3.198762331421989e-06, "loss": 2.7677, "step": 58367 }, { "epoch": 2.86, "grad_norm": 0.7845345139503479, "learning_rate": 3.19652040087508e-06, "loss": 2.7989, "step": 58368 }, { "epoch": 2.86, "grad_norm": 0.7394914627075195, "learning_rate": 3.194279252051357e-06, "loss": 2.9253, "step": 58369 }, { "epoch": 2.86, "grad_norm": 0.741482675075531, "learning_rate": 3.1920388849568157e-06, "loss": 3.07, "step": 58370 }, { "epoch": 2.86, "grad_norm": 0.7681283950805664, "learning_rate": 3.1897992995973175e-06, "loss": 2.9483, "step": 58371 }, { "epoch": 2.86, "grad_norm": 0.7793736457824707, "learning_rate": 3.187560495978758e-06, "loss": 2.5742, "step": 58372 }, { "epoch": 2.86, "grad_norm": 0.7607495784759521, "learning_rate": 3.1853224741070326e-06, "loss": 3.0333, "step": 58373 }, { "epoch": 2.86, "grad_norm": 0.7457079887390137, "learning_rate": 3.1830852339880366e-06, "loss": 2.9459, "step": 58374 }, { "epoch": 2.86, "grad_norm": 0.7789652347564697, "learning_rate": 3.180848775627698e-06, "loss": 2.9568, "step": 58375 }, { "epoch": 2.86, "grad_norm": 0.7540808916091919, "learning_rate": 3.178613099031879e-06, "loss": 2.8091, "step": 58376 }, { "epoch": 2.86, "grad_norm": 0.7654391527175903, "learning_rate": 3.176378204206442e-06, "loss": 2.8447, "step": 58377 }, { "epoch": 2.86, "grad_norm": 0.7442813515663147, "learning_rate": 3.174144091157316e-06, "loss": 2.8316, "step": 58378 }, { "epoch": 2.86, "grad_norm": 0.7185924053192139, "learning_rate": 3.1719107598903615e-06, "loss": 2.8791, "step": 58379 }, { "epoch": 2.86, "grad_norm": 0.7041293978691101, "learning_rate": 3.169678210411475e-06, "loss": 2.8301, "step": 58380 }, { "epoch": 2.86, "grad_norm": 0.8128548860549927, "learning_rate": 3.167446442726518e-06, "loss": 2.9627, "step": 58381 }, { "epoch": 2.86, "grad_norm": 0.7193200588226318, "learning_rate": 3.165215456841419e-06, "loss": 2.8726, "step": 58382 }, { "epoch": 2.86, "grad_norm": 0.7626433372497559, "learning_rate": 3.162985252761974e-06, "loss": 2.8444, "step": 58383 }, { "epoch": 2.86, "grad_norm": 0.7526370286941528, "learning_rate": 3.1607558304941107e-06, "loss": 2.6661, "step": 58384 }, { "epoch": 2.86, "grad_norm": 0.7516125440597534, "learning_rate": 3.1585271900436913e-06, "loss": 2.9895, "step": 58385 }, { "epoch": 2.86, "grad_norm": 0.7663487792015076, "learning_rate": 3.1562993314165784e-06, "loss": 2.832, "step": 58386 }, { "epoch": 2.86, "grad_norm": 0.7314614653587341, "learning_rate": 3.154072254618667e-06, "loss": 3.0411, "step": 58387 }, { "epoch": 2.86, "grad_norm": 0.7240493893623352, "learning_rate": 3.151845959655752e-06, "loss": 2.9518, "step": 58388 }, { "epoch": 2.86, "grad_norm": 0.7454181909561157, "learning_rate": 3.1496204465337626e-06, "loss": 2.7987, "step": 58389 }, { "epoch": 2.86, "grad_norm": 0.6941863894462585, "learning_rate": 3.1473957152585936e-06, "loss": 3.1225, "step": 58390 }, { "epoch": 2.86, "grad_norm": 0.7873305678367615, "learning_rate": 3.1451717658360075e-06, "loss": 2.7987, "step": 58391 }, { "epoch": 2.86, "grad_norm": 0.7635713219642639, "learning_rate": 3.1429485982719326e-06, "loss": 2.6695, "step": 58392 }, { "epoch": 2.86, "grad_norm": 0.74675452709198, "learning_rate": 3.1407262125721645e-06, "loss": 2.7484, "step": 58393 }, { "epoch": 2.86, "grad_norm": 0.7379273176193237, "learning_rate": 3.138504608742598e-06, "loss": 2.9954, "step": 58394 }, { "epoch": 2.86, "grad_norm": 0.7270514965057373, "learning_rate": 3.1362837867890957e-06, "loss": 3.2038, "step": 58395 }, { "epoch": 2.86, "grad_norm": 0.7507408261299133, "learning_rate": 3.134063746717486e-06, "loss": 2.8354, "step": 58396 }, { "epoch": 2.86, "grad_norm": 0.7894320487976074, "learning_rate": 3.13184448853363e-06, "loss": 2.9427, "step": 58397 }, { "epoch": 2.86, "grad_norm": 0.7546813488006592, "learning_rate": 3.129626012243358e-06, "loss": 2.8778, "step": 58398 }, { "epoch": 2.86, "grad_norm": 0.7203192114830017, "learning_rate": 3.127408317852498e-06, "loss": 2.9069, "step": 58399 }, { "epoch": 2.86, "grad_norm": 0.7543355226516724, "learning_rate": 3.125191405366911e-06, "loss": 2.9173, "step": 58400 }, { "epoch": 2.86, "grad_norm": 0.7346734404563904, "learning_rate": 3.1229752747924606e-06, "loss": 2.8988, "step": 58401 }, { "epoch": 2.86, "grad_norm": 0.8272573351860046, "learning_rate": 3.1207599261349416e-06, "loss": 2.8884, "step": 58402 }, { "epoch": 2.86, "grad_norm": 0.7785396575927734, "learning_rate": 3.1185453594001818e-06, "loss": 2.7603, "step": 58403 }, { "epoch": 2.86, "grad_norm": 0.7275567650794983, "learning_rate": 3.1163315745940777e-06, "loss": 2.8534, "step": 58404 }, { "epoch": 2.86, "grad_norm": 0.7752771377563477, "learning_rate": 3.114118571722424e-06, "loss": 2.741, "step": 58405 }, { "epoch": 2.86, "grad_norm": 0.7439063191413879, "learning_rate": 3.111906350791016e-06, "loss": 3.0018, "step": 58406 }, { "epoch": 2.86, "grad_norm": 0.7584403157234192, "learning_rate": 3.109694911805716e-06, "loss": 3.0572, "step": 58407 }, { "epoch": 2.86, "grad_norm": 0.7162322998046875, "learning_rate": 3.1074842547723523e-06, "loss": 2.9417, "step": 58408 }, { "epoch": 2.86, "grad_norm": 0.7319763898849487, "learning_rate": 3.1052743796967207e-06, "loss": 3.1582, "step": 58409 }, { "epoch": 2.86, "grad_norm": 0.825232744216919, "learning_rate": 3.1030652865846828e-06, "loss": 2.8801, "step": 58410 }, { "epoch": 2.86, "grad_norm": 0.7098069787025452, "learning_rate": 3.100856975442001e-06, "loss": 2.9849, "step": 58411 }, { "epoch": 2.86, "grad_norm": 0.754648745059967, "learning_rate": 3.0986494462745372e-06, "loss": 3.0793, "step": 58412 }, { "epoch": 2.86, "grad_norm": 0.7526900768280029, "learning_rate": 3.0964426990880865e-06, "loss": 2.9493, "step": 58413 }, { "epoch": 2.86, "grad_norm": 0.7582096457481384, "learning_rate": 3.0942367338884775e-06, "loss": 2.8612, "step": 58414 }, { "epoch": 2.86, "grad_norm": 0.7545999884605408, "learning_rate": 3.0920315506815063e-06, "loss": 2.8995, "step": 58415 }, { "epoch": 2.86, "grad_norm": 0.7414794564247131, "learning_rate": 3.0898271494729677e-06, "loss": 2.8369, "step": 58416 }, { "epoch": 2.86, "grad_norm": 0.8262287378311157, "learning_rate": 3.08762353026869e-06, "loss": 2.9537, "step": 58417 }, { "epoch": 2.86, "grad_norm": 0.7526206374168396, "learning_rate": 3.0854206930744696e-06, "loss": 2.8687, "step": 58418 }, { "epoch": 2.86, "grad_norm": 0.7402799725532532, "learning_rate": 3.0832186378961343e-06, "loss": 2.8745, "step": 58419 }, { "epoch": 2.86, "grad_norm": 0.784649133682251, "learning_rate": 3.0810173647394463e-06, "loss": 2.6984, "step": 58420 }, { "epoch": 2.86, "grad_norm": 0.7722074389457703, "learning_rate": 3.0788168736102014e-06, "loss": 2.8043, "step": 58421 }, { "epoch": 2.86, "grad_norm": 0.8019753694534302, "learning_rate": 3.0766171645141946e-06, "loss": 2.8064, "step": 58422 }, { "epoch": 2.86, "grad_norm": 0.7800315618515015, "learning_rate": 3.0744182374572548e-06, "loss": 2.8841, "step": 58423 }, { "epoch": 2.86, "grad_norm": 0.7406907081604004, "learning_rate": 3.0722200924451434e-06, "loss": 2.9577, "step": 58424 }, { "epoch": 2.86, "grad_norm": 0.7482556104660034, "learning_rate": 3.0700227294836897e-06, "loss": 2.8487, "step": 58425 }, { "epoch": 2.86, "grad_norm": 0.7338652610778809, "learning_rate": 3.0678261485785892e-06, "loss": 3.0761, "step": 58426 }, { "epoch": 2.86, "grad_norm": 0.7629140019416809, "learning_rate": 3.0656303497357373e-06, "loss": 2.8937, "step": 58427 }, { "epoch": 2.86, "grad_norm": 0.7479938864707947, "learning_rate": 3.0634353329608954e-06, "loss": 2.6075, "step": 58428 }, { "epoch": 2.86, "grad_norm": 0.793538510799408, "learning_rate": 3.061241098259759e-06, "loss": 3.0803, "step": 58429 }, { "epoch": 2.86, "grad_norm": 0.7822150588035583, "learning_rate": 3.059047645638191e-06, "loss": 2.6345, "step": 58430 }, { "epoch": 2.86, "grad_norm": 0.7450809478759766, "learning_rate": 3.056854975101952e-06, "loss": 2.9367, "step": 58431 }, { "epoch": 2.86, "grad_norm": 0.7621826529502869, "learning_rate": 3.0546630866568057e-06, "loss": 3.0372, "step": 58432 }, { "epoch": 2.86, "grad_norm": 0.7780464887619019, "learning_rate": 3.052471980308513e-06, "loss": 2.9461, "step": 58433 }, { "epoch": 2.86, "grad_norm": 0.7118364572525024, "learning_rate": 3.0502816560628695e-06, "loss": 2.8477, "step": 58434 }, { "epoch": 2.86, "grad_norm": 0.7661958336830139, "learning_rate": 3.0480921139256378e-06, "loss": 2.9218, "step": 58435 }, { "epoch": 2.86, "grad_norm": 0.7564506530761719, "learning_rate": 3.045903353902579e-06, "loss": 2.9434, "step": 58436 }, { "epoch": 2.86, "grad_norm": 0.7816385626792908, "learning_rate": 3.043715375999456e-06, "loss": 2.8841, "step": 58437 }, { "epoch": 2.86, "grad_norm": 0.7606683373451233, "learning_rate": 3.0415281802220304e-06, "loss": 2.7296, "step": 58438 }, { "epoch": 2.86, "grad_norm": 0.7658647298812866, "learning_rate": 3.0393417665760976e-06, "loss": 3.0555, "step": 58439 }, { "epoch": 2.86, "grad_norm": 0.7768723964691162, "learning_rate": 3.0371561350673537e-06, "loss": 2.8682, "step": 58440 }, { "epoch": 2.86, "grad_norm": 0.7276012301445007, "learning_rate": 3.0349712857016263e-06, "loss": 2.7861, "step": 58441 }, { "epoch": 2.86, "grad_norm": 0.7483054995536804, "learning_rate": 3.0327872184846113e-06, "loss": 2.8149, "step": 58442 }, { "epoch": 2.86, "grad_norm": 0.7850237488746643, "learning_rate": 3.030603933422071e-06, "loss": 2.8655, "step": 58443 }, { "epoch": 2.86, "grad_norm": 0.764310359954834, "learning_rate": 3.0284214305197675e-06, "loss": 3.0662, "step": 58444 }, { "epoch": 2.86, "grad_norm": 0.7962012887001038, "learning_rate": 3.0262397097834956e-06, "loss": 2.8438, "step": 58445 }, { "epoch": 2.86, "grad_norm": 0.789114773273468, "learning_rate": 3.0240587712188848e-06, "loss": 3.0808, "step": 58446 }, { "epoch": 2.86, "grad_norm": 0.7645512819290161, "learning_rate": 3.0218786148317965e-06, "loss": 2.8729, "step": 58447 }, { "epoch": 2.86, "grad_norm": 0.7275797724723816, "learning_rate": 3.019699240627893e-06, "loss": 3.0125, "step": 58448 }, { "epoch": 2.86, "grad_norm": 0.7716579437255859, "learning_rate": 3.01752064861297e-06, "loss": 2.9713, "step": 58449 }, { "epoch": 2.86, "grad_norm": 0.781461775302887, "learning_rate": 3.015342838792756e-06, "loss": 2.8235, "step": 58450 }, { "epoch": 2.86, "grad_norm": 0.7181127667427063, "learning_rate": 3.0131658111729463e-06, "loss": 2.9488, "step": 58451 }, { "epoch": 2.86, "grad_norm": 0.8236247897148132, "learning_rate": 3.0109895657593365e-06, "loss": 2.9258, "step": 58452 }, { "epoch": 2.86, "grad_norm": 0.7421223521232605, "learning_rate": 3.0088141025575887e-06, "loss": 2.8039, "step": 58453 }, { "epoch": 2.86, "grad_norm": 0.7448740601539612, "learning_rate": 3.0066394215734645e-06, "loss": 2.7816, "step": 58454 }, { "epoch": 2.86, "grad_norm": 0.751390814781189, "learning_rate": 3.0044655228126937e-06, "loss": 2.8791, "step": 58455 }, { "epoch": 2.86, "grad_norm": 0.7451471090316772, "learning_rate": 3.0022924062810373e-06, "loss": 2.8559, "step": 58456 }, { "epoch": 2.86, "grad_norm": 0.7556320428848267, "learning_rate": 3.0001200719841578e-06, "loss": 2.661, "step": 58457 }, { "epoch": 2.86, "grad_norm": 0.7909460067749023, "learning_rate": 2.9979485199278175e-06, "loss": 3.1855, "step": 58458 }, { "epoch": 2.86, "grad_norm": 0.7238598465919495, "learning_rate": 2.9957777501177115e-06, "loss": 2.8134, "step": 58459 }, { "epoch": 2.86, "grad_norm": 0.7427256107330322, "learning_rate": 2.9936077625595356e-06, "loss": 3.0505, "step": 58460 }, { "epoch": 2.87, "grad_norm": 0.7333883047103882, "learning_rate": 2.9914385572590848e-06, "loss": 2.9026, "step": 58461 }, { "epoch": 2.87, "grad_norm": 0.8171818256378174, "learning_rate": 2.9892701342220215e-06, "loss": 2.8903, "step": 58462 }, { "epoch": 2.87, "grad_norm": 0.7717416286468506, "learning_rate": 2.987102493454041e-06, "loss": 2.8525, "step": 58463 }, { "epoch": 2.87, "grad_norm": 0.7565333247184753, "learning_rate": 2.9849356349608724e-06, "loss": 2.8967, "step": 58464 }, { "epoch": 2.87, "grad_norm": 0.7647886872291565, "learning_rate": 2.9827695587482434e-06, "loss": 2.9653, "step": 58465 }, { "epoch": 2.87, "grad_norm": 0.7452661395072937, "learning_rate": 2.9806042648217843e-06, "loss": 2.8538, "step": 58466 }, { "epoch": 2.87, "grad_norm": 0.7374056577682495, "learning_rate": 2.9784397531872894e-06, "loss": 2.9242, "step": 58467 }, { "epoch": 2.87, "grad_norm": 0.7226657271385193, "learning_rate": 2.9762760238504212e-06, "loss": 2.7999, "step": 58468 }, { "epoch": 2.87, "grad_norm": 0.7068235874176025, "learning_rate": 2.9741130768168752e-06, "loss": 2.7401, "step": 58469 }, { "epoch": 2.87, "grad_norm": 0.7041826248168945, "learning_rate": 2.971950912092347e-06, "loss": 2.8399, "step": 58470 }, { "epoch": 2.87, "grad_norm": 0.7928381562232971, "learning_rate": 2.9697895296825645e-06, "loss": 2.8014, "step": 58471 }, { "epoch": 2.87, "grad_norm": 0.7238459587097168, "learning_rate": 2.9676289295931577e-06, "loss": 2.9021, "step": 58472 }, { "epoch": 2.87, "grad_norm": 0.7136267423629761, "learning_rate": 2.9654691118298544e-06, "loss": 2.937, "step": 58473 }, { "epoch": 2.87, "grad_norm": 0.7531981468200684, "learning_rate": 2.963310076398351e-06, "loss": 2.7484, "step": 58474 }, { "epoch": 2.87, "grad_norm": 0.7254064083099365, "learning_rate": 2.9611518233043084e-06, "loss": 2.8743, "step": 58475 }, { "epoch": 2.87, "grad_norm": 0.74024498462677, "learning_rate": 2.9589943525534566e-06, "loss": 2.8673, "step": 58476 }, { "epoch": 2.87, "grad_norm": 0.7722803354263306, "learning_rate": 2.9568376641514237e-06, "loss": 3.0154, "step": 58477 }, { "epoch": 2.87, "grad_norm": 0.7294988036155701, "learning_rate": 2.9546817581039385e-06, "loss": 2.8068, "step": 58478 }, { "epoch": 2.87, "grad_norm": 0.7921958565711975, "learning_rate": 2.95252663441663e-06, "loss": 3.0001, "step": 58479 }, { "epoch": 2.87, "grad_norm": 0.7250039577484131, "learning_rate": 2.9503722930951933e-06, "loss": 2.9304, "step": 58480 }, { "epoch": 2.87, "grad_norm": 0.8294291496276855, "learning_rate": 2.948218734145291e-06, "loss": 2.7402, "step": 58481 }, { "epoch": 2.87, "grad_norm": 0.7494039535522461, "learning_rate": 2.946065957572652e-06, "loss": 2.7678, "step": 58482 }, { "epoch": 2.87, "grad_norm": 0.8283053040504456, "learning_rate": 2.9439139633828714e-06, "loss": 2.863, "step": 58483 }, { "epoch": 2.87, "grad_norm": 0.772268533706665, "learning_rate": 2.941762751581678e-06, "loss": 2.7249, "step": 58484 }, { "epoch": 2.87, "grad_norm": 0.7302533388137817, "learning_rate": 2.939612322174734e-06, "loss": 2.8957, "step": 58485 }, { "epoch": 2.87, "grad_norm": 0.787757933139801, "learning_rate": 2.9374626751676347e-06, "loss": 2.9369, "step": 58486 }, { "epoch": 2.87, "grad_norm": 0.7752677798271179, "learning_rate": 2.935313810566109e-06, "loss": 2.8947, "step": 58487 }, { "epoch": 2.87, "grad_norm": 0.7386978268623352, "learning_rate": 2.933165728375786e-06, "loss": 2.7775, "step": 58488 }, { "epoch": 2.87, "grad_norm": 0.7692047953605652, "learning_rate": 2.9310184286023276e-06, "loss": 3.0412, "step": 58489 }, { "epoch": 2.87, "grad_norm": 0.7471635937690735, "learning_rate": 2.928871911251396e-06, "loss": 2.7332, "step": 58490 }, { "epoch": 2.87, "grad_norm": 0.7705933451652527, "learning_rate": 2.926726176328653e-06, "loss": 3.0526, "step": 58491 }, { "epoch": 2.87, "grad_norm": 0.7803830504417419, "learning_rate": 2.9245812238397282e-06, "loss": 2.9832, "step": 58492 }, { "epoch": 2.87, "grad_norm": 0.7428114414215088, "learning_rate": 2.9224370537903163e-06, "loss": 2.7619, "step": 58493 }, { "epoch": 2.87, "grad_norm": 0.7303468585014343, "learning_rate": 2.9202936661860134e-06, "loss": 2.7892, "step": 58494 }, { "epoch": 2.87, "grad_norm": 0.7733170390129089, "learning_rate": 2.918151061032481e-06, "loss": 2.8862, "step": 58495 }, { "epoch": 2.87, "grad_norm": 0.7337782979011536, "learning_rate": 2.9160092383353483e-06, "loss": 2.9276, "step": 58496 }, { "epoch": 2.87, "grad_norm": 0.7292686700820923, "learning_rate": 2.9138681981002775e-06, "loss": 3.0101, "step": 58497 }, { "epoch": 2.87, "grad_norm": 0.7879104018211365, "learning_rate": 2.9117279403329307e-06, "loss": 2.9083, "step": 58498 }, { "epoch": 2.87, "grad_norm": 0.7869991660118103, "learning_rate": 2.9095884650389034e-06, "loss": 2.912, "step": 58499 }, { "epoch": 2.87, "grad_norm": 0.7370448112487793, "learning_rate": 2.9074497722238242e-06, "loss": 3.0133, "step": 58500 }, { "epoch": 2.87, "grad_norm": 0.7162608504295349, "learning_rate": 2.9053118618933556e-06, "loss": 3.022, "step": 58501 }, { "epoch": 2.87, "grad_norm": 0.7638165950775146, "learning_rate": 2.9031747340531264e-06, "loss": 2.965, "step": 58502 }, { "epoch": 2.87, "grad_norm": 0.731220543384552, "learning_rate": 2.9010383887087317e-06, "loss": 3.0144, "step": 58503 }, { "epoch": 2.87, "grad_norm": 0.778514564037323, "learning_rate": 2.8989028258658674e-06, "loss": 2.801, "step": 58504 }, { "epoch": 2.87, "grad_norm": 0.8099600076675415, "learning_rate": 2.8967680455300956e-06, "loss": 3.0025, "step": 58505 }, { "epoch": 2.87, "grad_norm": 0.7909402847290039, "learning_rate": 2.8946340477070452e-06, "loss": 2.8844, "step": 58506 }, { "epoch": 2.87, "grad_norm": 0.7233417630195618, "learning_rate": 2.892500832402378e-06, "loss": 2.7969, "step": 58507 }, { "epoch": 2.87, "grad_norm": 0.7570949792861938, "learning_rate": 2.8903683996216562e-06, "loss": 2.9808, "step": 58508 }, { "epoch": 2.87, "grad_norm": 0.7265278100967407, "learning_rate": 2.8882367493705425e-06, "loss": 2.9376, "step": 58509 }, { "epoch": 2.87, "grad_norm": 0.7586358189582825, "learning_rate": 2.886105881654599e-06, "loss": 2.8498, "step": 58510 }, { "epoch": 2.87, "grad_norm": 0.7601178884506226, "learning_rate": 2.8839757964794873e-06, "loss": 2.9384, "step": 58511 }, { "epoch": 2.87, "grad_norm": 0.7448052167892456, "learning_rate": 2.8818464938508366e-06, "loss": 2.997, "step": 58512 }, { "epoch": 2.87, "grad_norm": 0.7501569986343384, "learning_rate": 2.879717973774176e-06, "loss": 2.7712, "step": 58513 }, { "epoch": 2.87, "grad_norm": 0.7544119954109192, "learning_rate": 2.8775902362552004e-06, "loss": 2.9557, "step": 58514 }, { "epoch": 2.87, "grad_norm": 0.7905291318893433, "learning_rate": 2.875463281299439e-06, "loss": 3.0345, "step": 58515 }, { "epoch": 2.87, "grad_norm": 0.7742028832435608, "learning_rate": 2.8733371089125213e-06, "loss": 2.854, "step": 58516 }, { "epoch": 2.87, "grad_norm": 0.748437762260437, "learning_rate": 2.871211719100075e-06, "loss": 2.7507, "step": 58517 }, { "epoch": 2.87, "grad_norm": 0.7571814060211182, "learning_rate": 2.8690871118676295e-06, "loss": 2.6589, "step": 58518 }, { "epoch": 2.87, "grad_norm": 0.7534952759742737, "learning_rate": 2.8669632872208805e-06, "loss": 2.8444, "step": 58519 }, { "epoch": 2.87, "grad_norm": 0.710351824760437, "learning_rate": 2.864840245165323e-06, "loss": 2.7474, "step": 58520 }, { "epoch": 2.87, "grad_norm": 0.7488248944282532, "learning_rate": 2.8627179857066195e-06, "loss": 2.7427, "step": 58521 }, { "epoch": 2.87, "grad_norm": 0.7320175766944885, "learning_rate": 2.8605965088503324e-06, "loss": 2.8392, "step": 58522 }, { "epoch": 2.87, "grad_norm": 0.738500714302063, "learning_rate": 2.8584758146020236e-06, "loss": 2.6562, "step": 58523 }, { "epoch": 2.87, "grad_norm": 0.7765610218048096, "learning_rate": 2.856355902967322e-06, "loss": 3.1511, "step": 58524 }, { "epoch": 2.87, "grad_norm": 0.7138228416442871, "learning_rate": 2.85423677395179e-06, "loss": 2.9996, "step": 58525 }, { "epoch": 2.87, "grad_norm": 0.7560122013092041, "learning_rate": 2.8521184275609897e-06, "loss": 2.9072, "step": 58526 }, { "epoch": 2.87, "grad_norm": 0.7315943241119385, "learning_rate": 2.85000086380055e-06, "loss": 2.8984, "step": 58527 }, { "epoch": 2.87, "grad_norm": 0.7746604084968567, "learning_rate": 2.8478840826759996e-06, "loss": 2.7893, "step": 58528 }, { "epoch": 2.87, "grad_norm": 0.7341941595077515, "learning_rate": 2.8457680841929344e-06, "loss": 2.7045, "step": 58529 }, { "epoch": 2.87, "grad_norm": 0.7307849526405334, "learning_rate": 2.843652868356949e-06, "loss": 2.7645, "step": 58530 }, { "epoch": 2.87, "grad_norm": 0.7367571592330933, "learning_rate": 2.841538435173607e-06, "loss": 2.8463, "step": 58531 }, { "epoch": 2.87, "grad_norm": 0.7715376615524292, "learning_rate": 2.8394247846484363e-06, "loss": 2.8366, "step": 58532 }, { "epoch": 2.87, "grad_norm": 0.7914366722106934, "learning_rate": 2.8373119167869994e-06, "loss": 2.777, "step": 58533 }, { "epoch": 2.87, "grad_norm": 0.7575352191925049, "learning_rate": 2.835199831594959e-06, "loss": 3.0308, "step": 58534 }, { "epoch": 2.87, "grad_norm": 0.7674602270126343, "learning_rate": 2.8330885290777426e-06, "loss": 2.7934, "step": 58535 }, { "epoch": 2.87, "grad_norm": 0.7100291848182678, "learning_rate": 2.830978009241047e-06, "loss": 2.8665, "step": 58536 }, { "epoch": 2.87, "grad_norm": 0.7356333136558533, "learning_rate": 2.828868272090334e-06, "loss": 2.9221, "step": 58537 }, { "epoch": 2.87, "grad_norm": 0.7471137642860413, "learning_rate": 2.8267593176311665e-06, "loss": 3.0157, "step": 58538 }, { "epoch": 2.87, "grad_norm": 0.7663685083389282, "learning_rate": 2.8246511458691722e-06, "loss": 2.8722, "step": 58539 }, { "epoch": 2.87, "grad_norm": 0.7581278085708618, "learning_rate": 2.8225437568098143e-06, "loss": 2.776, "step": 58540 }, { "epoch": 2.87, "grad_norm": 0.7169347405433655, "learning_rate": 2.820437150458721e-06, "loss": 2.5599, "step": 58541 }, { "epoch": 2.87, "grad_norm": 0.7184317111968994, "learning_rate": 2.818331326821355e-06, "loss": 2.8356, "step": 58542 }, { "epoch": 2.87, "grad_norm": 0.7458834648132324, "learning_rate": 2.8162262859033447e-06, "loss": 2.8992, "step": 58543 }, { "epoch": 2.87, "grad_norm": 0.7352158427238464, "learning_rate": 2.8141220277102196e-06, "loss": 2.8397, "step": 58544 }, { "epoch": 2.87, "grad_norm": 0.716495156288147, "learning_rate": 2.8120185522474415e-06, "loss": 2.9937, "step": 58545 }, { "epoch": 2.87, "grad_norm": 0.7433305978775024, "learning_rate": 2.809915859520673e-06, "loss": 3.0178, "step": 58546 }, { "epoch": 2.87, "grad_norm": 0.7520804405212402, "learning_rate": 2.8078139495353423e-06, "loss": 2.777, "step": 58547 }, { "epoch": 2.87, "grad_norm": 0.728420078754425, "learning_rate": 2.8057128222970458e-06, "loss": 3.1996, "step": 58548 }, { "epoch": 2.87, "grad_norm": 0.816613495349884, "learning_rate": 2.8036124778113123e-06, "loss": 3.1029, "step": 58549 }, { "epoch": 2.87, "grad_norm": 0.7988955974578857, "learning_rate": 2.8015129160836704e-06, "loss": 2.818, "step": 58550 }, { "epoch": 2.87, "grad_norm": 0.7273043394088745, "learning_rate": 2.7994141371196487e-06, "loss": 2.9679, "step": 58551 }, { "epoch": 2.87, "grad_norm": 0.7395277619361877, "learning_rate": 2.797316140924777e-06, "loss": 2.86, "step": 58552 }, { "epoch": 2.87, "grad_norm": 0.7469648122787476, "learning_rate": 2.79521892750455e-06, "loss": 2.8988, "step": 58553 }, { "epoch": 2.87, "grad_norm": 0.789919376373291, "learning_rate": 2.7931224968645304e-06, "loss": 2.7315, "step": 58554 }, { "epoch": 2.87, "grad_norm": 0.7301388382911682, "learning_rate": 2.7910268490102473e-06, "loss": 3.0406, "step": 58555 }, { "epoch": 2.87, "grad_norm": 0.7471961975097656, "learning_rate": 2.7889319839471956e-06, "loss": 2.7793, "step": 58556 }, { "epoch": 2.87, "grad_norm": 0.8272915482521057, "learning_rate": 2.7868379016808717e-06, "loss": 2.7463, "step": 58557 }, { "epoch": 2.87, "grad_norm": 0.7283830046653748, "learning_rate": 2.7847446022168375e-06, "loss": 2.9614, "step": 58558 }, { "epoch": 2.87, "grad_norm": 0.697257399559021, "learning_rate": 2.782652085560588e-06, "loss": 2.6746, "step": 58559 }, { "epoch": 2.87, "grad_norm": 0.7829978466033936, "learning_rate": 2.7805603517176203e-06, "loss": 3.048, "step": 58560 }, { "epoch": 2.87, "grad_norm": 0.7517426609992981, "learning_rate": 2.7784694006934618e-06, "loss": 2.9577, "step": 58561 }, { "epoch": 2.87, "grad_norm": 0.8072444796562195, "learning_rate": 2.7763792324936083e-06, "loss": 2.8453, "step": 58562 }, { "epoch": 2.87, "grad_norm": 0.7768639326095581, "learning_rate": 2.7742898471235562e-06, "loss": 2.779, "step": 58563 }, { "epoch": 2.87, "grad_norm": 0.7537323832511902, "learning_rate": 2.7722012445888676e-06, "loss": 2.6229, "step": 58564 }, { "epoch": 2.87, "grad_norm": 0.7503150701522827, "learning_rate": 2.7701134248949707e-06, "loss": 2.7897, "step": 58565 }, { "epoch": 2.87, "grad_norm": 0.7244771122932434, "learning_rate": 2.7680263880473953e-06, "loss": 2.7409, "step": 58566 }, { "epoch": 2.87, "grad_norm": 0.7575398087501526, "learning_rate": 2.765940134051636e-06, "loss": 3.0002, "step": 58567 }, { "epoch": 2.87, "grad_norm": 0.8123334646224976, "learning_rate": 2.7638546629131566e-06, "loss": 3.0256, "step": 58568 }, { "epoch": 2.87, "grad_norm": 0.8360390663146973, "learning_rate": 2.7617699746375178e-06, "loss": 3.0575, "step": 58569 }, { "epoch": 2.87, "grad_norm": 0.7276893854141235, "learning_rate": 2.7596860692301492e-06, "loss": 2.9831, "step": 58570 }, { "epoch": 2.87, "grad_norm": 0.7142115831375122, "learning_rate": 2.75760294669658e-06, "loss": 3.0064, "step": 58571 }, { "epoch": 2.87, "grad_norm": 0.7129017114639282, "learning_rate": 2.755520607042272e-06, "loss": 2.8834, "step": 58572 }, { "epoch": 2.87, "grad_norm": 0.7552473545074463, "learning_rate": 2.753439050272721e-06, "loss": 2.7786, "step": 58573 }, { "epoch": 2.87, "grad_norm": 0.7475464940071106, "learning_rate": 2.751358276393423e-06, "loss": 2.9663, "step": 58574 }, { "epoch": 2.87, "grad_norm": 0.7509301900863647, "learning_rate": 2.749278285409806e-06, "loss": 2.6761, "step": 58575 }, { "epoch": 2.87, "grad_norm": 0.7471511960029602, "learning_rate": 2.7471990773274e-06, "loss": 2.9407, "step": 58576 }, { "epoch": 2.87, "grad_norm": 0.7476446628570557, "learning_rate": 2.7451206521516334e-06, "loss": 2.8716, "step": 58577 }, { "epoch": 2.87, "grad_norm": 0.7277437448501587, "learning_rate": 2.743043009888035e-06, "loss": 3.0029, "step": 58578 }, { "epoch": 2.87, "grad_norm": 0.7496187686920166, "learning_rate": 2.7409661505420675e-06, "loss": 2.9366, "step": 58579 }, { "epoch": 2.87, "grad_norm": 0.7967445254325867, "learning_rate": 2.738890074119193e-06, "loss": 2.8992, "step": 58580 }, { "epoch": 2.87, "grad_norm": 0.7687461376190186, "learning_rate": 2.736814780624874e-06, "loss": 2.6873, "step": 58581 }, { "epoch": 2.87, "grad_norm": 0.7759029269218445, "learning_rate": 2.7347402700645393e-06, "loss": 3.1086, "step": 58582 }, { "epoch": 2.87, "grad_norm": 0.8095799684524536, "learning_rate": 2.7326665424436844e-06, "loss": 2.8247, "step": 58583 }, { "epoch": 2.87, "grad_norm": 0.7569171786308289, "learning_rate": 2.730593597767805e-06, "loss": 2.8072, "step": 58584 }, { "epoch": 2.87, "grad_norm": 0.7547518014907837, "learning_rate": 2.72852143604233e-06, "loss": 2.8641, "step": 58585 }, { "epoch": 2.87, "grad_norm": 0.7172838449478149, "learning_rate": 2.7264500572727222e-06, "loss": 2.9092, "step": 58586 }, { "epoch": 2.87, "grad_norm": 0.7331339120864868, "learning_rate": 2.724379461464443e-06, "loss": 3.0988, "step": 58587 }, { "epoch": 2.87, "grad_norm": 0.7577721476554871, "learning_rate": 2.722309648622889e-06, "loss": 2.5406, "step": 58588 }, { "epoch": 2.87, "grad_norm": 0.7274854183197021, "learning_rate": 2.7202406187535887e-06, "loss": 2.9189, "step": 58589 }, { "epoch": 2.87, "grad_norm": 0.7488541007041931, "learning_rate": 2.7181723718619707e-06, "loss": 2.788, "step": 58590 }, { "epoch": 2.87, "grad_norm": 0.7572417855262756, "learning_rate": 2.7161049079534646e-06, "loss": 2.7814, "step": 58591 }, { "epoch": 2.87, "grad_norm": 0.7595935463905334, "learning_rate": 2.7140382270334994e-06, "loss": 2.8215, "step": 58592 }, { "epoch": 2.87, "grad_norm": 0.7744547724723816, "learning_rate": 2.71197232910757e-06, "loss": 2.7036, "step": 58593 }, { "epoch": 2.87, "grad_norm": 0.7563602924346924, "learning_rate": 2.709907214181106e-06, "loss": 2.7836, "step": 58594 }, { "epoch": 2.87, "grad_norm": 0.7460562586784363, "learning_rate": 2.7078428822595034e-06, "loss": 2.9322, "step": 58595 }, { "epoch": 2.87, "grad_norm": 0.7538842558860779, "learning_rate": 2.705779333348257e-06, "loss": 2.8706, "step": 58596 }, { "epoch": 2.87, "grad_norm": 0.7640366554260254, "learning_rate": 2.703716567452763e-06, "loss": 2.8659, "step": 58597 }, { "epoch": 2.87, "grad_norm": 0.7012739777565002, "learning_rate": 2.7016545845784497e-06, "loss": 2.9261, "step": 58598 }, { "epoch": 2.87, "grad_norm": 0.7518980503082275, "learning_rate": 2.6995933847307473e-06, "loss": 2.8051, "step": 58599 }, { "epoch": 2.87, "grad_norm": 0.7138656973838806, "learning_rate": 2.697532967915117e-06, "loss": 2.7991, "step": 58600 }, { "epoch": 2.87, "grad_norm": 0.7334188222885132, "learning_rate": 2.6954733341369884e-06, "loss": 2.888, "step": 58601 }, { "epoch": 2.87, "grad_norm": 0.7425467371940613, "learning_rate": 2.6934144834017567e-06, "loss": 2.5813, "step": 58602 }, { "epoch": 2.87, "grad_norm": 0.7227640151977539, "learning_rate": 2.6913564157148515e-06, "loss": 2.7079, "step": 58603 }, { "epoch": 2.87, "grad_norm": 0.7601994276046753, "learning_rate": 2.689299131081668e-06, "loss": 2.9336, "step": 58604 }, { "epoch": 2.87, "grad_norm": 0.7707348465919495, "learning_rate": 2.6872426295076686e-06, "loss": 3.0102, "step": 58605 }, { "epoch": 2.87, "grad_norm": 0.7580685019493103, "learning_rate": 2.685186910998283e-06, "loss": 3.0894, "step": 58606 }, { "epoch": 2.87, "grad_norm": 0.7323053479194641, "learning_rate": 2.6831319755588387e-06, "loss": 3.0489, "step": 58607 }, { "epoch": 2.87, "grad_norm": 0.7686645984649658, "learning_rate": 2.681077823194866e-06, "loss": 2.8456, "step": 58608 }, { "epoch": 2.87, "grad_norm": 0.7553392052650452, "learning_rate": 2.6790244539116935e-06, "loss": 2.83, "step": 58609 }, { "epoch": 2.87, "grad_norm": 0.7549324631690979, "learning_rate": 2.6769718677147167e-06, "loss": 3.0199, "step": 58610 }, { "epoch": 2.87, "grad_norm": 0.758166491985321, "learning_rate": 2.6749200646093982e-06, "loss": 2.9844, "step": 58611 }, { "epoch": 2.87, "grad_norm": 0.7355962991714478, "learning_rate": 2.6728690446011336e-06, "loss": 3.0423, "step": 58612 }, { "epoch": 2.87, "grad_norm": 0.7758812308311462, "learning_rate": 2.6708188076952854e-06, "loss": 2.8497, "step": 58613 }, { "epoch": 2.87, "grad_norm": 0.7655168175697327, "learning_rate": 2.668769353897282e-06, "loss": 2.911, "step": 58614 }, { "epoch": 2.87, "grad_norm": 0.731803834438324, "learning_rate": 2.6667206832125197e-06, "loss": 2.5736, "step": 58615 }, { "epoch": 2.87, "grad_norm": 0.727342426776886, "learning_rate": 2.6646727956463943e-06, "loss": 3.0545, "step": 58616 }, { "epoch": 2.87, "grad_norm": 0.7434524297714233, "learning_rate": 2.6626256912043343e-06, "loss": 3.0437, "step": 58617 }, { "epoch": 2.87, "grad_norm": 0.805879533290863, "learning_rate": 2.660579369891669e-06, "loss": 2.9437, "step": 58618 }, { "epoch": 2.87, "grad_norm": 0.76210618019104, "learning_rate": 2.658533831713794e-06, "loss": 2.8384, "step": 58619 }, { "epoch": 2.87, "grad_norm": 0.7613382935523987, "learning_rate": 2.6564890766761047e-06, "loss": 2.7502, "step": 58620 }, { "epoch": 2.87, "grad_norm": 0.7467081546783447, "learning_rate": 2.654445104784031e-06, "loss": 2.6637, "step": 58621 }, { "epoch": 2.87, "grad_norm": 0.7348057627677917, "learning_rate": 2.652401916042901e-06, "loss": 2.9316, "step": 58622 }, { "epoch": 2.87, "grad_norm": 0.8229193687438965, "learning_rate": 2.6503595104581444e-06, "loss": 3.0189, "step": 58623 }, { "epoch": 2.87, "grad_norm": 0.741585910320282, "learning_rate": 2.648317888035123e-06, "loss": 2.9296, "step": 58624 }, { "epoch": 2.87, "grad_norm": 0.7843425869941711, "learning_rate": 2.6462770487791663e-06, "loss": 2.562, "step": 58625 }, { "epoch": 2.87, "grad_norm": 0.7439048290252686, "learning_rate": 2.644236992695703e-06, "loss": 3.0224, "step": 58626 }, { "epoch": 2.87, "grad_norm": 0.7619640827178955, "learning_rate": 2.6421977197900956e-06, "loss": 2.9144, "step": 58627 }, { "epoch": 2.87, "grad_norm": 0.7499315142631531, "learning_rate": 2.64015923006774e-06, "loss": 2.5829, "step": 58628 }, { "epoch": 2.87, "grad_norm": 0.7854250073432922, "learning_rate": 2.6381215235339315e-06, "loss": 2.7211, "step": 58629 }, { "epoch": 2.87, "grad_norm": 0.7892012000083923, "learning_rate": 2.6360846001940994e-06, "loss": 2.8919, "step": 58630 }, { "epoch": 2.87, "grad_norm": 0.7646967768669128, "learning_rate": 2.634048460053573e-06, "loss": 3.0164, "step": 58631 }, { "epoch": 2.87, "grad_norm": 0.7222882509231567, "learning_rate": 2.6320131031177805e-06, "loss": 2.8806, "step": 58632 }, { "epoch": 2.87, "grad_norm": 0.7930301427841187, "learning_rate": 2.6299785293920184e-06, "loss": 2.9666, "step": 58633 }, { "epoch": 2.87, "grad_norm": 0.775130569934845, "learning_rate": 2.627944738881649e-06, "loss": 2.9753, "step": 58634 }, { "epoch": 2.87, "grad_norm": 0.7208459377288818, "learning_rate": 2.625911731592034e-06, "loss": 3.2084, "step": 58635 }, { "epoch": 2.87, "grad_norm": 0.7527031898498535, "learning_rate": 2.62387950752857e-06, "loss": 2.8473, "step": 58636 }, { "epoch": 2.87, "grad_norm": 0.7546898722648621, "learning_rate": 2.621848066696519e-06, "loss": 2.799, "step": 58637 }, { "epoch": 2.87, "grad_norm": 0.7190507054328918, "learning_rate": 2.6198174091013435e-06, "loss": 2.8375, "step": 58638 }, { "epoch": 2.87, "grad_norm": 0.7207900285720825, "learning_rate": 2.617787534748339e-06, "loss": 2.6189, "step": 58639 }, { "epoch": 2.87, "grad_norm": 0.7409490346908569, "learning_rate": 2.6157584436428016e-06, "loss": 2.9473, "step": 58640 }, { "epoch": 2.87, "grad_norm": 0.7580714225769043, "learning_rate": 2.6137301357901596e-06, "loss": 2.8106, "step": 58641 }, { "epoch": 2.87, "grad_norm": 0.792218804359436, "learning_rate": 2.6117026111957097e-06, "loss": 2.783, "step": 58642 }, { "epoch": 2.87, "grad_norm": 0.7418773770332336, "learning_rate": 2.6096758698648137e-06, "loss": 3.0417, "step": 58643 }, { "epoch": 2.87, "grad_norm": 0.7960341572761536, "learning_rate": 2.607649911802767e-06, "loss": 2.8398, "step": 58644 }, { "epoch": 2.87, "grad_norm": 0.7470495700836182, "learning_rate": 2.605624737014966e-06, "loss": 3.0263, "step": 58645 }, { "epoch": 2.87, "grad_norm": 0.7760054469108582, "learning_rate": 2.60360034550674e-06, "loss": 2.8589, "step": 58646 }, { "epoch": 2.87, "grad_norm": 0.7305024266242981, "learning_rate": 2.6015767372833507e-06, "loss": 2.8561, "step": 58647 }, { "epoch": 2.87, "grad_norm": 0.753297746181488, "learning_rate": 2.599553912350194e-06, "loss": 2.9018, "step": 58648 }, { "epoch": 2.87, "grad_norm": 0.7611088156700134, "learning_rate": 2.5975318707125325e-06, "loss": 2.9396, "step": 58649 }, { "epoch": 2.87, "grad_norm": 0.768486499786377, "learning_rate": 2.595510612375762e-06, "loss": 2.6987, "step": 58650 }, { "epoch": 2.87, "grad_norm": 0.8263199925422668, "learning_rate": 2.593490137345211e-06, "loss": 2.8593, "step": 58651 }, { "epoch": 2.87, "grad_norm": 0.7508795857429504, "learning_rate": 2.5914704456261428e-06, "loss": 2.9293, "step": 58652 }, { "epoch": 2.87, "grad_norm": 0.790963888168335, "learning_rate": 2.5894515372238854e-06, "loss": 2.7323, "step": 58653 }, { "epoch": 2.87, "grad_norm": 0.7731552124023438, "learning_rate": 2.5874334121438356e-06, "loss": 2.994, "step": 58654 }, { "epoch": 2.87, "grad_norm": 0.7483537197113037, "learning_rate": 2.5854160703911885e-06, "loss": 2.8272, "step": 58655 }, { "epoch": 2.87, "grad_norm": 0.749880850315094, "learning_rate": 2.5833995119713403e-06, "loss": 2.9543, "step": 58656 }, { "epoch": 2.87, "grad_norm": 0.7379136681556702, "learning_rate": 2.581383736889553e-06, "loss": 2.9406, "step": 58657 }, { "epoch": 2.87, "grad_norm": 0.753685474395752, "learning_rate": 2.5793687451512223e-06, "loss": 2.8851, "step": 58658 }, { "epoch": 2.87, "grad_norm": 0.7053732872009277, "learning_rate": 2.5773545367615444e-06, "loss": 2.9854, "step": 58659 }, { "epoch": 2.87, "grad_norm": 0.7595216035842896, "learning_rate": 2.5753411117258817e-06, "loss": 2.8745, "step": 58660 }, { "epoch": 2.87, "grad_norm": 0.7526916861534119, "learning_rate": 2.5733284700495295e-06, "loss": 2.9997, "step": 58661 }, { "epoch": 2.87, "grad_norm": 0.7947744131088257, "learning_rate": 2.5713166117377836e-06, "loss": 2.8262, "step": 58662 }, { "epoch": 2.87, "grad_norm": 0.7724664211273193, "learning_rate": 2.5693055367959737e-06, "loss": 2.7696, "step": 58663 }, { "epoch": 2.87, "grad_norm": 0.7890022397041321, "learning_rate": 2.5672952452293617e-06, "loss": 2.8872, "step": 58664 }, { "epoch": 2.88, "grad_norm": 0.7602983713150024, "learning_rate": 2.5652857370432432e-06, "loss": 2.9562, "step": 58665 }, { "epoch": 2.88, "grad_norm": 0.7163755297660828, "learning_rate": 2.5632770122429148e-06, "loss": 2.8898, "step": 58666 }, { "epoch": 2.88, "grad_norm": 0.7645668983459473, "learning_rate": 2.5612690708337046e-06, "loss": 2.8835, "step": 58667 }, { "epoch": 2.88, "grad_norm": 0.7817904949188232, "learning_rate": 2.5592619128208424e-06, "loss": 3.0378, "step": 58668 }, { "epoch": 2.88, "grad_norm": 0.7333517670631409, "learning_rate": 2.5572555382096236e-06, "loss": 2.9638, "step": 58669 }, { "epoch": 2.88, "grad_norm": 0.7220652103424072, "learning_rate": 2.555249947005378e-06, "loss": 2.7169, "step": 58670 }, { "epoch": 2.88, "grad_norm": 0.7610824108123779, "learning_rate": 2.5532451392133667e-06, "loss": 2.8406, "step": 58671 }, { "epoch": 2.88, "grad_norm": 0.8026726245880127, "learning_rate": 2.55124111483882e-06, "loss": 3.1763, "step": 58672 }, { "epoch": 2.88, "grad_norm": 0.7766146659851074, "learning_rate": 2.5492378738871e-06, "loss": 2.7902, "step": 58673 }, { "epoch": 2.88, "grad_norm": 0.7230070233345032, "learning_rate": 2.5472354163634357e-06, "loss": 2.7006, "step": 58674 }, { "epoch": 2.88, "grad_norm": 0.7731808423995972, "learning_rate": 2.5452337422730894e-06, "loss": 3.0314, "step": 58675 }, { "epoch": 2.88, "grad_norm": 0.7285119891166687, "learning_rate": 2.5432328516213574e-06, "loss": 2.8947, "step": 58676 }, { "epoch": 2.88, "grad_norm": 0.746861457824707, "learning_rate": 2.5412327444135017e-06, "loss": 2.9094, "step": 58677 }, { "epoch": 2.88, "grad_norm": 0.7949832677841187, "learning_rate": 2.539233420654818e-06, "loss": 2.8981, "step": 58678 }, { "epoch": 2.88, "grad_norm": 0.7782156467437744, "learning_rate": 2.5372348803505026e-06, "loss": 2.8062, "step": 58679 }, { "epoch": 2.88, "grad_norm": 0.7405202984809875, "learning_rate": 2.5352371235058845e-06, "loss": 2.943, "step": 58680 }, { "epoch": 2.88, "grad_norm": 0.7959614396095276, "learning_rate": 2.5332401501262256e-06, "loss": 2.7178, "step": 58681 }, { "epoch": 2.88, "grad_norm": 0.7623839974403381, "learning_rate": 2.5312439602167555e-06, "loss": 2.9395, "step": 58682 }, { "epoch": 2.88, "grad_norm": 0.7368699312210083, "learning_rate": 2.529248553782737e-06, "loss": 2.8478, "step": 58683 }, { "epoch": 2.88, "grad_norm": 0.7328764796257019, "learning_rate": 2.527253930829398e-06, "loss": 2.7829, "step": 58684 }, { "epoch": 2.88, "grad_norm": 0.7400158643722534, "learning_rate": 2.525260091362036e-06, "loss": 2.8827, "step": 58685 }, { "epoch": 2.88, "grad_norm": 0.7456002831459045, "learning_rate": 2.523267035385912e-06, "loss": 2.8261, "step": 58686 }, { "epoch": 2.88, "grad_norm": 0.726157009601593, "learning_rate": 2.5212747629062224e-06, "loss": 2.7118, "step": 58687 }, { "epoch": 2.88, "grad_norm": 0.7584595680236816, "learning_rate": 2.519283273928263e-06, "loss": 2.9325, "step": 58688 }, { "epoch": 2.88, "grad_norm": 0.8131120204925537, "learning_rate": 2.5172925684572633e-06, "loss": 3.0036, "step": 58689 }, { "epoch": 2.88, "grad_norm": 0.760067880153656, "learning_rate": 2.5153026464984517e-06, "loss": 2.9375, "step": 58690 }, { "epoch": 2.88, "grad_norm": 0.7592241764068604, "learning_rate": 2.513313508057091e-06, "loss": 2.9265, "step": 58691 }, { "epoch": 2.88, "grad_norm": 0.800504744052887, "learning_rate": 2.5113251531383773e-06, "loss": 2.8072, "step": 58692 }, { "epoch": 2.88, "grad_norm": 0.7303928732872009, "learning_rate": 2.509337581747606e-06, "loss": 2.8293, "step": 58693 }, { "epoch": 2.88, "grad_norm": 0.8083999156951904, "learning_rate": 2.5073507938900064e-06, "loss": 2.6546, "step": 58694 }, { "epoch": 2.88, "grad_norm": 0.7412149310112, "learning_rate": 2.505364789570774e-06, "loss": 2.9867, "step": 58695 }, { "epoch": 2.88, "grad_norm": 0.7698215246200562, "learning_rate": 2.5033795687951387e-06, "loss": 3.118, "step": 58696 }, { "epoch": 2.88, "grad_norm": 0.77435702085495, "learning_rate": 2.501395131568362e-06, "loss": 3.0678, "step": 58697 }, { "epoch": 2.88, "grad_norm": 0.7428318858146667, "learning_rate": 2.499411477895674e-06, "loss": 2.9809, "step": 58698 }, { "epoch": 2.88, "grad_norm": 0.7360471487045288, "learning_rate": 2.497428607782237e-06, "loss": 2.9411, "step": 58699 }, { "epoch": 2.88, "grad_norm": 0.7438157796859741, "learning_rate": 2.4954465212333465e-06, "loss": 2.7105, "step": 58700 }, { "epoch": 2.88, "grad_norm": 0.6961084604263306, "learning_rate": 2.493465218254198e-06, "loss": 2.7633, "step": 58701 }, { "epoch": 2.88, "grad_norm": 0.7811095118522644, "learning_rate": 2.491484698849988e-06, "loss": 2.8801, "step": 58702 }, { "epoch": 2.88, "grad_norm": 0.7979738116264343, "learning_rate": 2.489504963025979e-06, "loss": 2.9504, "step": 58703 }, { "epoch": 2.88, "grad_norm": 0.8025102615356445, "learning_rate": 2.487526010787333e-06, "loss": 2.6882, "step": 58704 }, { "epoch": 2.88, "grad_norm": 0.771865725517273, "learning_rate": 2.4855478421393126e-06, "loss": 2.8791, "step": 58705 }, { "epoch": 2.88, "grad_norm": 0.7456363439559937, "learning_rate": 2.4835704570870806e-06, "loss": 2.6352, "step": 58706 }, { "epoch": 2.88, "grad_norm": 0.7263805866241455, "learning_rate": 2.481593855635833e-06, "loss": 2.8166, "step": 58707 }, { "epoch": 2.88, "grad_norm": 0.7499218583106995, "learning_rate": 2.479618037790865e-06, "loss": 2.9985, "step": 58708 }, { "epoch": 2.88, "grad_norm": 0.724136471748352, "learning_rate": 2.4776430035572723e-06, "loss": 2.9608, "step": 58709 }, { "epoch": 2.88, "grad_norm": 0.7608489990234375, "learning_rate": 2.475668752940352e-06, "loss": 2.6573, "step": 58710 }, { "epoch": 2.88, "grad_norm": 0.7402869462966919, "learning_rate": 2.4736952859452653e-06, "loss": 3.0642, "step": 58711 }, { "epoch": 2.88, "grad_norm": 0.7280595302581787, "learning_rate": 2.471722602577175e-06, "loss": 2.778, "step": 58712 }, { "epoch": 2.88, "grad_norm": 0.7287282943725586, "learning_rate": 2.469750702841311e-06, "loss": 2.8341, "step": 58713 }, { "epoch": 2.88, "grad_norm": 0.7806359529495239, "learning_rate": 2.4677795867428684e-06, "loss": 3.1066, "step": 58714 }, { "epoch": 2.88, "grad_norm": 0.7894913554191589, "learning_rate": 2.4658092542870434e-06, "loss": 3.0778, "step": 58715 }, { "epoch": 2.88, "grad_norm": 0.7530649304389954, "learning_rate": 2.4638397054789984e-06, "loss": 2.9428, "step": 58716 }, { "epoch": 2.88, "grad_norm": 0.7815943360328674, "learning_rate": 2.461870940323929e-06, "loss": 3.0238, "step": 58717 }, { "epoch": 2.88, "grad_norm": 0.7556338310241699, "learning_rate": 2.459902958827065e-06, "loss": 2.8127, "step": 58718 }, { "epoch": 2.88, "grad_norm": 0.7679249048233032, "learning_rate": 2.4579357609935344e-06, "loss": 2.9174, "step": 58719 }, { "epoch": 2.88, "grad_norm": 0.8016083836555481, "learning_rate": 2.455969346828568e-06, "loss": 2.8592, "step": 58720 }, { "epoch": 2.88, "grad_norm": 0.7322315573692322, "learning_rate": 2.4540037163372606e-06, "loss": 3.0443, "step": 58721 }, { "epoch": 2.88, "grad_norm": 0.7427347898483276, "learning_rate": 2.452038869524875e-06, "loss": 2.7349, "step": 58722 }, { "epoch": 2.88, "grad_norm": 0.7253776788711548, "learning_rate": 2.45007480639654e-06, "loss": 2.9283, "step": 58723 }, { "epoch": 2.88, "grad_norm": 0.7481330037117004, "learning_rate": 2.4481115269574526e-06, "loss": 2.8383, "step": 58724 }, { "epoch": 2.88, "grad_norm": 0.7602149248123169, "learning_rate": 2.446149031212774e-06, "loss": 2.7756, "step": 58725 }, { "epoch": 2.88, "grad_norm": 0.7619256377220154, "learning_rate": 2.4441873191676677e-06, "loss": 3.0721, "step": 58726 }, { "epoch": 2.88, "grad_norm": 0.7644228935241699, "learning_rate": 2.4422263908272953e-06, "loss": 2.8749, "step": 58727 }, { "epoch": 2.88, "grad_norm": 0.7094855904579163, "learning_rate": 2.4402662461968538e-06, "loss": 3.0339, "step": 58728 }, { "epoch": 2.88, "grad_norm": 0.7785953283309937, "learning_rate": 2.4383068852814715e-06, "loss": 2.9188, "step": 58729 }, { "epoch": 2.88, "grad_norm": 0.7782620191574097, "learning_rate": 2.4363483080863446e-06, "loss": 2.7007, "step": 58730 }, { "epoch": 2.88, "grad_norm": 0.7648835778236389, "learning_rate": 2.434390514616602e-06, "loss": 3.0109, "step": 58731 }, { "epoch": 2.88, "grad_norm": 0.7509472966194153, "learning_rate": 2.4324335048773734e-06, "loss": 2.8961, "step": 58732 }, { "epoch": 2.88, "grad_norm": 0.7752648591995239, "learning_rate": 2.430477278873888e-06, "loss": 2.8416, "step": 58733 }, { "epoch": 2.88, "grad_norm": 0.775607168674469, "learning_rate": 2.428521836611241e-06, "loss": 2.7793, "step": 58734 }, { "epoch": 2.88, "grad_norm": 0.7678676247596741, "learning_rate": 2.426567178094596e-06, "loss": 2.8429, "step": 58735 }, { "epoch": 2.88, "grad_norm": 0.7633538246154785, "learning_rate": 2.4246133033291148e-06, "loss": 2.7533, "step": 58736 }, { "epoch": 2.88, "grad_norm": 0.7499951124191284, "learning_rate": 2.4226602123199267e-06, "loss": 3.0187, "step": 58737 }, { "epoch": 2.88, "grad_norm": 0.8302484750747681, "learning_rate": 2.420707905072161e-06, "loss": 2.9427, "step": 58738 }, { "epoch": 2.88, "grad_norm": 0.7577258348464966, "learning_rate": 2.418756381591014e-06, "loss": 2.967, "step": 58739 }, { "epoch": 2.88, "grad_norm": 0.7333148121833801, "learning_rate": 2.416805641881581e-06, "loss": 2.869, "step": 58740 }, { "epoch": 2.88, "grad_norm": 0.8242916464805603, "learning_rate": 2.4148556859490242e-06, "loss": 2.9112, "step": 58741 }, { "epoch": 2.88, "grad_norm": 0.7202953696250916, "learning_rate": 2.4129065137984404e-06, "loss": 2.6746, "step": 58742 }, { "epoch": 2.88, "grad_norm": 0.7955062389373779, "learning_rate": 2.410958125434992e-06, "loss": 2.7812, "step": 58743 }, { "epoch": 2.88, "grad_norm": 0.8054195642471313, "learning_rate": 2.4090105208638078e-06, "loss": 3.0059, "step": 58744 }, { "epoch": 2.88, "grad_norm": 0.7250577807426453, "learning_rate": 2.4070637000900174e-06, "loss": 2.8463, "step": 58745 }, { "epoch": 2.88, "grad_norm": 0.6978896260261536, "learning_rate": 2.4051176631187497e-06, "loss": 2.8534, "step": 58746 }, { "epoch": 2.88, "grad_norm": 0.794734001159668, "learning_rate": 2.403172409955134e-06, "loss": 2.6841, "step": 58747 }, { "epoch": 2.88, "grad_norm": 0.7843784093856812, "learning_rate": 2.4012279406043e-06, "loss": 2.7228, "step": 58748 }, { "epoch": 2.88, "grad_norm": 0.7873679399490356, "learning_rate": 2.3992842550713433e-06, "loss": 2.8963, "step": 58749 }, { "epoch": 2.88, "grad_norm": 0.7977191209793091, "learning_rate": 2.397341353361393e-06, "loss": 2.8242, "step": 58750 }, { "epoch": 2.88, "grad_norm": 0.7681131958961487, "learning_rate": 2.395399235479578e-06, "loss": 2.8686, "step": 58751 }, { "epoch": 2.88, "grad_norm": 0.7318379282951355, "learning_rate": 2.393457901430995e-06, "loss": 2.9615, "step": 58752 }, { "epoch": 2.88, "grad_norm": 0.7517814040184021, "learning_rate": 2.391517351220773e-06, "loss": 2.8989, "step": 58753 }, { "epoch": 2.88, "grad_norm": 0.7598768472671509, "learning_rate": 2.3895775848540413e-06, "loss": 2.9079, "step": 58754 }, { "epoch": 2.88, "grad_norm": 0.7105078101158142, "learning_rate": 2.387638602335862e-06, "loss": 2.8294, "step": 58755 }, { "epoch": 2.88, "grad_norm": 0.7269749641418457, "learning_rate": 2.385700403671398e-06, "loss": 2.7675, "step": 58756 }, { "epoch": 2.88, "grad_norm": 0.7759285569190979, "learning_rate": 2.383762988865678e-06, "loss": 2.9059, "step": 58757 }, { "epoch": 2.88, "grad_norm": 0.74515700340271, "learning_rate": 2.381826357923866e-06, "loss": 2.7272, "step": 58758 }, { "epoch": 2.88, "grad_norm": 0.7521591186523438, "learning_rate": 2.3798905108510567e-06, "loss": 3.0477, "step": 58759 }, { "epoch": 2.88, "grad_norm": 0.7557122707366943, "learning_rate": 2.3779554476523134e-06, "loss": 2.7349, "step": 58760 }, { "epoch": 2.88, "grad_norm": 0.7690261602401733, "learning_rate": 2.3760211683327644e-06, "loss": 2.8652, "step": 58761 }, { "epoch": 2.88, "grad_norm": 0.780361533164978, "learning_rate": 2.374087672897507e-06, "loss": 2.8442, "step": 58762 }, { "epoch": 2.88, "grad_norm": 0.7434288263320923, "learning_rate": 2.3721549613516355e-06, "loss": 2.88, "step": 58763 }, { "epoch": 2.88, "grad_norm": 0.8056708574295044, "learning_rate": 2.3702230337002135e-06, "loss": 2.9113, "step": 58764 }, { "epoch": 2.88, "grad_norm": 0.8034791946411133, "learning_rate": 2.368291889948337e-06, "loss": 2.7607, "step": 58765 }, { "epoch": 2.88, "grad_norm": 0.7488557696342468, "learning_rate": 2.3663615301011018e-06, "loss": 2.7314, "step": 58766 }, { "epoch": 2.88, "grad_norm": 0.7275140881538391, "learning_rate": 2.364431954163604e-06, "loss": 2.7287, "step": 58767 }, { "epoch": 2.88, "grad_norm": 0.7067748308181763, "learning_rate": 2.3625031621409053e-06, "loss": 2.8275, "step": 58768 }, { "epoch": 2.88, "grad_norm": 0.7348287105560303, "learning_rate": 2.3605751540380692e-06, "loss": 2.9979, "step": 58769 }, { "epoch": 2.88, "grad_norm": 0.7248157262802124, "learning_rate": 2.3586479298602245e-06, "loss": 2.8282, "step": 58770 }, { "epoch": 2.88, "grad_norm": 0.7620055079460144, "learning_rate": 2.3567214896124007e-06, "loss": 2.7483, "step": 58771 }, { "epoch": 2.88, "grad_norm": 0.7315212488174438, "learning_rate": 2.3547958332997276e-06, "loss": 2.7802, "step": 58772 }, { "epoch": 2.88, "grad_norm": 0.7276974320411682, "learning_rate": 2.3528709609272e-06, "loss": 2.9266, "step": 58773 }, { "epoch": 2.88, "grad_norm": 0.7391351461410522, "learning_rate": 2.3509468724999146e-06, "loss": 2.9255, "step": 58774 }, { "epoch": 2.88, "grad_norm": 0.8038753271102905, "learning_rate": 2.3490235680230006e-06, "loss": 2.572, "step": 58775 }, { "epoch": 2.88, "grad_norm": 0.7655202150344849, "learning_rate": 2.3471010475014207e-06, "loss": 2.8412, "step": 58776 }, { "epoch": 2.88, "grad_norm": 0.7686148881912231, "learning_rate": 2.345179310940337e-06, "loss": 3.075, "step": 58777 }, { "epoch": 2.88, "grad_norm": 0.7740659117698669, "learning_rate": 2.3432583583447462e-06, "loss": 2.96, "step": 58778 }, { "epoch": 2.88, "grad_norm": 0.6870248913764954, "learning_rate": 2.34133818971971e-06, "loss": 2.7546, "step": 58779 }, { "epoch": 2.88, "grad_norm": 0.7673691511154175, "learning_rate": 2.3394188050703587e-06, "loss": 2.7017, "step": 58780 }, { "epoch": 2.88, "grad_norm": 0.7266324162483215, "learning_rate": 2.337500204401621e-06, "loss": 2.9339, "step": 58781 }, { "epoch": 2.88, "grad_norm": 0.7386431694030762, "learning_rate": 2.335582387718693e-06, "loss": 2.8721, "step": 58782 }, { "epoch": 2.88, "grad_norm": 0.7468572854995728, "learning_rate": 2.3336653550265037e-06, "loss": 2.9022, "step": 58783 }, { "epoch": 2.88, "grad_norm": 0.7643420100212097, "learning_rate": 2.331749106330183e-06, "loss": 3.2001, "step": 58784 }, { "epoch": 2.88, "grad_norm": 0.7512932419776917, "learning_rate": 2.3298336416347263e-06, "loss": 2.9548, "step": 58785 }, { "epoch": 2.88, "grad_norm": 0.7336491942405701, "learning_rate": 2.3279189609451964e-06, "loss": 2.8733, "step": 58786 }, { "epoch": 2.88, "grad_norm": 0.7166445851325989, "learning_rate": 2.326005064266656e-06, "loss": 2.8924, "step": 58787 }, { "epoch": 2.88, "grad_norm": 0.8269556760787964, "learning_rate": 2.3240919516041014e-06, "loss": 2.9191, "step": 58788 }, { "epoch": 2.88, "grad_norm": 0.7374542355537415, "learning_rate": 2.322179622962628e-06, "loss": 2.9622, "step": 58789 }, { "epoch": 2.88, "grad_norm": 0.7649866342544556, "learning_rate": 2.3202680783472315e-06, "loss": 2.8605, "step": 58790 }, { "epoch": 2.88, "grad_norm": 0.7545380592346191, "learning_rate": 2.3183573177629757e-06, "loss": 2.9829, "step": 58791 }, { "epoch": 2.88, "grad_norm": 0.7572214007377625, "learning_rate": 2.316447341214889e-06, "loss": 2.9821, "step": 58792 }, { "epoch": 2.88, "grad_norm": 0.7295137047767639, "learning_rate": 2.3145381487079674e-06, "loss": 2.9972, "step": 58793 }, { "epoch": 2.88, "grad_norm": 0.7803434133529663, "learning_rate": 2.3126297402472404e-06, "loss": 2.8749, "step": 58794 }, { "epoch": 2.88, "grad_norm": 0.7383474707603455, "learning_rate": 2.310722115837771e-06, "loss": 2.8434, "step": 58795 }, { "epoch": 2.88, "grad_norm": 0.8103850483894348, "learning_rate": 2.308815275484588e-06, "loss": 2.4542, "step": 58796 }, { "epoch": 2.88, "grad_norm": 0.7786707282066345, "learning_rate": 2.3069092191926873e-06, "loss": 3.1745, "step": 58797 }, { "epoch": 2.88, "grad_norm": 0.7713927626609802, "learning_rate": 2.3050039469670656e-06, "loss": 2.824, "step": 58798 }, { "epoch": 2.88, "grad_norm": 0.7706761956214905, "learning_rate": 2.3030994588128183e-06, "loss": 2.8247, "step": 58799 }, { "epoch": 2.88, "grad_norm": 0.7205535173416138, "learning_rate": 2.301195754734875e-06, "loss": 2.8544, "step": 58800 }, { "epoch": 2.88, "grad_norm": 0.7274318933486938, "learning_rate": 2.299292834738298e-06, "loss": 2.9493, "step": 58801 }, { "epoch": 2.88, "grad_norm": 0.7716490030288696, "learning_rate": 2.2973906988281167e-06, "loss": 2.9651, "step": 58802 }, { "epoch": 2.88, "grad_norm": 0.7678560614585876, "learning_rate": 2.295489347009294e-06, "loss": 2.9299, "step": 58803 }, { "epoch": 2.88, "grad_norm": 0.7848706245422363, "learning_rate": 2.2935887792868257e-06, "loss": 2.8571, "step": 58804 }, { "epoch": 2.88, "grad_norm": 0.7634941339492798, "learning_rate": 2.291688995665808e-06, "loss": 2.9207, "step": 58805 }, { "epoch": 2.88, "grad_norm": 0.7758709788322449, "learning_rate": 2.2897899961511367e-06, "loss": 2.8365, "step": 58806 }, { "epoch": 2.88, "grad_norm": 0.7400304675102234, "learning_rate": 2.287891780747908e-06, "loss": 2.952, "step": 58807 }, { "epoch": 2.88, "grad_norm": 0.7655829787254333, "learning_rate": 2.285994349461051e-06, "loss": 2.6617, "step": 58808 }, { "epoch": 2.88, "grad_norm": 0.738251268863678, "learning_rate": 2.284097702295562e-06, "loss": 2.9595, "step": 58809 }, { "epoch": 2.88, "grad_norm": 0.7782675623893738, "learning_rate": 2.2822018392565035e-06, "loss": 2.7474, "step": 58810 }, { "epoch": 2.88, "grad_norm": 0.7073926329612732, "learning_rate": 2.2803067603488378e-06, "loss": 2.8823, "step": 58811 }, { "epoch": 2.88, "grad_norm": 0.8041476607322693, "learning_rate": 2.278412465577528e-06, "loss": 2.7922, "step": 58812 }, { "epoch": 2.88, "grad_norm": 0.7299349308013916, "learning_rate": 2.2765189549475703e-06, "loss": 3.0036, "step": 58813 }, { "epoch": 2.88, "grad_norm": 0.7464435696601868, "learning_rate": 2.2746262284639607e-06, "loss": 2.8143, "step": 58814 }, { "epoch": 2.88, "grad_norm": 0.7710967659950256, "learning_rate": 2.2727342861317275e-06, "loss": 2.8977, "step": 58815 }, { "epoch": 2.88, "grad_norm": 0.7730777263641357, "learning_rate": 2.270843127955768e-06, "loss": 2.773, "step": 58816 }, { "epoch": 2.88, "grad_norm": 0.7543210983276367, "learning_rate": 2.2689527539411445e-06, "loss": 2.9257, "step": 58817 }, { "epoch": 2.88, "grad_norm": 0.7473542094230652, "learning_rate": 2.2670631640927527e-06, "loss": 2.7792, "step": 58818 }, { "epoch": 2.88, "grad_norm": 0.7523601055145264, "learning_rate": 2.2651743584156555e-06, "loss": 2.9197, "step": 58819 }, { "epoch": 2.88, "grad_norm": 0.750109851360321, "learning_rate": 2.2632863369147824e-06, "loss": 3.0966, "step": 58820 }, { "epoch": 2.88, "grad_norm": 0.7598704099655151, "learning_rate": 2.261399099595129e-06, "loss": 2.9957, "step": 58821 }, { "epoch": 2.88, "grad_norm": 0.7527625560760498, "learning_rate": 2.2595126464616254e-06, "loss": 2.9311, "step": 58822 }, { "epoch": 2.88, "grad_norm": 0.7135601043701172, "learning_rate": 2.257626977519267e-06, "loss": 2.9232, "step": 58823 }, { "epoch": 2.88, "grad_norm": 0.7807445526123047, "learning_rate": 2.2557420927730163e-06, "loss": 2.9763, "step": 58824 }, { "epoch": 2.88, "grad_norm": 0.7743086814880371, "learning_rate": 2.2538579922278364e-06, "loss": 3.0103, "step": 58825 }, { "epoch": 2.88, "grad_norm": 0.7185844779014587, "learning_rate": 2.2519746758886904e-06, "loss": 2.9725, "step": 58826 }, { "epoch": 2.88, "grad_norm": 0.7576574683189392, "learning_rate": 2.2500921437605736e-06, "loss": 2.8631, "step": 58827 }, { "epoch": 2.88, "grad_norm": 0.7718368172645569, "learning_rate": 2.2482103958483823e-06, "loss": 3.0295, "step": 58828 }, { "epoch": 2.88, "grad_norm": 0.7165724635124207, "learning_rate": 2.246329432157079e-06, "loss": 2.7588, "step": 58829 }, { "epoch": 2.88, "grad_norm": 0.7765059471130371, "learning_rate": 2.2444492526916935e-06, "loss": 2.832, "step": 58830 }, { "epoch": 2.88, "grad_norm": 0.7383131980895996, "learning_rate": 2.2425698574570884e-06, "loss": 2.8534, "step": 58831 }, { "epoch": 2.88, "grad_norm": 0.7530897259712219, "learning_rate": 2.2406912464582594e-06, "loss": 2.5967, "step": 58832 }, { "epoch": 2.88, "grad_norm": 0.7495601773262024, "learning_rate": 2.238813419700136e-06, "loss": 2.8738, "step": 58833 }, { "epoch": 2.88, "grad_norm": 0.7114120125770569, "learning_rate": 2.236936377187681e-06, "loss": 2.8921, "step": 58834 }, { "epoch": 2.88, "grad_norm": 0.8221262693405151, "learning_rate": 2.2350601189258243e-06, "loss": 2.7301, "step": 58835 }, { "epoch": 2.88, "grad_norm": 0.7930930852890015, "learning_rate": 2.233184644919528e-06, "loss": 2.8669, "step": 58836 }, { "epoch": 2.88, "grad_norm": 0.7594324350357056, "learning_rate": 2.2313099551737213e-06, "loss": 2.8567, "step": 58837 }, { "epoch": 2.88, "grad_norm": 0.733824610710144, "learning_rate": 2.2294360496933007e-06, "loss": 2.9, "step": 58838 }, { "epoch": 2.88, "grad_norm": 0.7296682000160217, "learning_rate": 2.2275629284832618e-06, "loss": 2.8983, "step": 58839 }, { "epoch": 2.88, "grad_norm": 0.7384452819824219, "learning_rate": 2.2256905915485013e-06, "loss": 2.8469, "step": 58840 }, { "epoch": 2.88, "grad_norm": 0.7113006114959717, "learning_rate": 2.2238190388939815e-06, "loss": 2.9981, "step": 58841 }, { "epoch": 2.88, "grad_norm": 0.7444003224372864, "learning_rate": 2.2219482705246317e-06, "loss": 2.7707, "step": 58842 }, { "epoch": 2.88, "grad_norm": 0.7594712972640991, "learning_rate": 2.220078286445315e-06, "loss": 2.9082, "step": 58843 }, { "epoch": 2.88, "grad_norm": 0.7738532423973083, "learning_rate": 2.2182090866610604e-06, "loss": 2.8424, "step": 58844 }, { "epoch": 2.88, "grad_norm": 0.7955849170684814, "learning_rate": 2.2163406711766975e-06, "loss": 2.9444, "step": 58845 }, { "epoch": 2.88, "grad_norm": 0.7212498188018799, "learning_rate": 2.2144730399971554e-06, "loss": 2.9951, "step": 58846 }, { "epoch": 2.88, "grad_norm": 0.7770746350288391, "learning_rate": 2.2126061931274307e-06, "loss": 2.9731, "step": 58847 }, { "epoch": 2.88, "grad_norm": 0.7356753945350647, "learning_rate": 2.210740130572353e-06, "loss": 2.8022, "step": 58848 }, { "epoch": 2.88, "grad_norm": 0.7566166520118713, "learning_rate": 2.208874852336917e-06, "loss": 2.7831, "step": 58849 }, { "epoch": 2.88, "grad_norm": 0.7645593881607056, "learning_rate": 2.2070103584259867e-06, "loss": 2.8821, "step": 58850 }, { "epoch": 2.88, "grad_norm": 0.729720413684845, "learning_rate": 2.2051466488444246e-06, "loss": 3.0565, "step": 58851 }, { "epoch": 2.88, "grad_norm": 0.7721166610717773, "learning_rate": 2.20328372359726e-06, "loss": 2.9014, "step": 58852 }, { "epoch": 2.88, "grad_norm": 0.7474196553230286, "learning_rate": 2.201421582689289e-06, "loss": 2.9263, "step": 58853 }, { "epoch": 2.88, "grad_norm": 0.7505802512168884, "learning_rate": 2.1995602261255074e-06, "loss": 2.9056, "step": 58854 }, { "epoch": 2.88, "grad_norm": 0.7443969249725342, "learning_rate": 2.1976996539107117e-06, "loss": 3.0748, "step": 58855 }, { "epoch": 2.88, "grad_norm": 0.7555038332939148, "learning_rate": 2.1958398660498976e-06, "loss": 2.9408, "step": 58856 }, { "epoch": 2.88, "grad_norm": 0.7873604893684387, "learning_rate": 2.193980862547928e-06, "loss": 2.9474, "step": 58857 }, { "epoch": 2.88, "grad_norm": 0.7833068370819092, "learning_rate": 2.1921226434096995e-06, "loss": 2.9086, "step": 58858 }, { "epoch": 2.88, "grad_norm": 0.7554247379302979, "learning_rate": 2.1902652086401075e-06, "loss": 2.8782, "step": 58859 }, { "epoch": 2.88, "grad_norm": 0.7960243821144104, "learning_rate": 2.188408558244048e-06, "loss": 2.8816, "step": 58860 }, { "epoch": 2.88, "grad_norm": 0.8111169338226318, "learning_rate": 2.1865526922263842e-06, "loss": 2.7859, "step": 58861 }, { "epoch": 2.88, "grad_norm": 0.800476610660553, "learning_rate": 2.1846976105920456e-06, "loss": 2.8443, "step": 58862 }, { "epoch": 2.88, "grad_norm": 0.7481632828712463, "learning_rate": 2.1828433133458613e-06, "loss": 2.7816, "step": 58863 }, { "epoch": 2.88, "grad_norm": 0.7516404986381531, "learning_rate": 2.180989800492794e-06, "loss": 2.7836, "step": 58864 }, { "epoch": 2.88, "grad_norm": 0.7697651982307434, "learning_rate": 2.1791370720376733e-06, "loss": 2.772, "step": 58865 }, { "epoch": 2.88, "grad_norm": 0.7722952365875244, "learning_rate": 2.177285127985362e-06, "loss": 2.7993, "step": 58866 }, { "epoch": 2.88, "grad_norm": 0.7531412839889526, "learning_rate": 2.1754339683407897e-06, "loss": 2.6921, "step": 58867 }, { "epoch": 2.88, "grad_norm": 0.7838771939277649, "learning_rate": 2.1735835931088187e-06, "loss": 3.0806, "step": 58868 }, { "epoch": 2.89, "grad_norm": 0.7424795031547546, "learning_rate": 2.1717340022942785e-06, "loss": 2.8725, "step": 58869 }, { "epoch": 2.89, "grad_norm": 0.6980968713760376, "learning_rate": 2.169885195902099e-06, "loss": 2.9858, "step": 58870 }, { "epoch": 2.89, "grad_norm": 0.7613954544067383, "learning_rate": 2.1680371739371093e-06, "loss": 3.0112, "step": 58871 }, { "epoch": 2.89, "grad_norm": 0.7577490210533142, "learning_rate": 2.166189936404206e-06, "loss": 3.0586, "step": 58872 }, { "epoch": 2.89, "grad_norm": 0.7090437412261963, "learning_rate": 2.1643434833082176e-06, "loss": 2.9531, "step": 58873 }, { "epoch": 2.89, "grad_norm": 0.7910329699516296, "learning_rate": 2.1624978146540407e-06, "loss": 2.6705, "step": 58874 }, { "epoch": 2.89, "grad_norm": 0.7528069019317627, "learning_rate": 2.1606529304465046e-06, "loss": 3.0284, "step": 58875 }, { "epoch": 2.89, "grad_norm": 0.759083092212677, "learning_rate": 2.158808830690506e-06, "loss": 2.9894, "step": 58876 }, { "epoch": 2.89, "grad_norm": 0.764187753200531, "learning_rate": 2.156965515390907e-06, "loss": 2.8765, "step": 58877 }, { "epoch": 2.89, "grad_norm": 0.7952934503555298, "learning_rate": 2.155122984552504e-06, "loss": 2.8857, "step": 58878 }, { "epoch": 2.89, "grad_norm": 0.751152753829956, "learning_rate": 2.1532812381801932e-06, "loss": 2.8625, "step": 58879 }, { "epoch": 2.89, "grad_norm": 0.7216384410858154, "learning_rate": 2.1514402762788376e-06, "loss": 3.1293, "step": 58880 }, { "epoch": 2.89, "grad_norm": 0.7260209321975708, "learning_rate": 2.1496000988532327e-06, "loss": 2.8408, "step": 58881 }, { "epoch": 2.89, "grad_norm": 0.756483793258667, "learning_rate": 2.147760705908308e-06, "loss": 2.9624, "step": 58882 }, { "epoch": 2.89, "grad_norm": 0.8064687848091125, "learning_rate": 2.1459220974487933e-06, "loss": 2.9032, "step": 58883 }, { "epoch": 2.89, "grad_norm": 0.8111887574195862, "learning_rate": 2.1440842734796514e-06, "loss": 2.8218, "step": 58884 }, { "epoch": 2.89, "grad_norm": 0.7237271070480347, "learning_rate": 2.142247234005645e-06, "loss": 2.879, "step": 58885 }, { "epoch": 2.89, "grad_norm": 0.699209451675415, "learning_rate": 2.140410979031637e-06, "loss": 2.7129, "step": 58886 }, { "epoch": 2.89, "grad_norm": 0.7380729913711548, "learning_rate": 2.13857550856249e-06, "loss": 2.9703, "step": 58887 }, { "epoch": 2.89, "grad_norm": 0.7376232147216797, "learning_rate": 2.136740822602967e-06, "loss": 2.9186, "step": 58888 }, { "epoch": 2.89, "grad_norm": 0.7537232637405396, "learning_rate": 2.1349069211579637e-06, "loss": 3.0193, "step": 58889 }, { "epoch": 2.89, "grad_norm": 0.7629387378692627, "learning_rate": 2.133073804232277e-06, "loss": 2.8616, "step": 58890 }, { "epoch": 2.89, "grad_norm": 0.7387884259223938, "learning_rate": 2.1312414718307693e-06, "loss": 2.8796, "step": 58891 }, { "epoch": 2.89, "grad_norm": 0.701157808303833, "learning_rate": 2.129409923958236e-06, "loss": 2.829, "step": 58892 }, { "epoch": 2.89, "grad_norm": 0.8402901291847229, "learning_rate": 2.127579160619508e-06, "loss": 2.6893, "step": 58893 }, { "epoch": 2.89, "grad_norm": 0.732913076877594, "learning_rate": 2.125749181819414e-06, "loss": 2.7791, "step": 58894 }, { "epoch": 2.89, "grad_norm": 0.7669530510902405, "learning_rate": 2.1239199875627834e-06, "loss": 2.8068, "step": 58895 }, { "epoch": 2.89, "grad_norm": 0.7334358096122742, "learning_rate": 2.122091577854379e-06, "loss": 2.9818, "step": 58896 }, { "epoch": 2.89, "grad_norm": 0.785017192363739, "learning_rate": 2.1202639526990973e-06, "loss": 2.8513, "step": 58897 }, { "epoch": 2.89, "grad_norm": 0.7877793312072754, "learning_rate": 2.1184371121017006e-06, "loss": 2.8978, "step": 58898 }, { "epoch": 2.89, "grad_norm": 0.7523209452629089, "learning_rate": 2.116611056067019e-06, "loss": 2.6516, "step": 58899 }, { "epoch": 2.89, "grad_norm": 0.7709547877311707, "learning_rate": 2.114785784599815e-06, "loss": 2.7821, "step": 58900 }, { "epoch": 2.89, "grad_norm": 0.7097203731536865, "learning_rate": 2.1129612977049847e-06, "loss": 2.9419, "step": 58901 }, { "epoch": 2.89, "grad_norm": 0.7327682375907898, "learning_rate": 2.1111375953872576e-06, "loss": 2.8539, "step": 58902 }, { "epoch": 2.89, "grad_norm": 0.7576892375946045, "learning_rate": 2.1093146776514637e-06, "loss": 2.9477, "step": 58903 }, { "epoch": 2.89, "grad_norm": 0.7579607367515564, "learning_rate": 2.1074925445024314e-06, "loss": 2.7468, "step": 58904 }, { "epoch": 2.89, "grad_norm": 0.7820842862129211, "learning_rate": 2.1056711959449248e-06, "loss": 2.9472, "step": 58905 }, { "epoch": 2.89, "grad_norm": 0.7186954021453857, "learning_rate": 2.1038506319837056e-06, "loss": 2.9416, "step": 58906 }, { "epoch": 2.89, "grad_norm": 0.740797758102417, "learning_rate": 2.102030852623671e-06, "loss": 3.0116, "step": 58907 }, { "epoch": 2.89, "grad_norm": 0.7477077841758728, "learning_rate": 2.100211857869549e-06, "loss": 3.0106, "step": 58908 }, { "epoch": 2.89, "grad_norm": 0.7214314341545105, "learning_rate": 2.098393647726104e-06, "loss": 2.8677, "step": 58909 }, { "epoch": 2.89, "grad_norm": 0.7203336358070374, "learning_rate": 2.0965762221981654e-06, "loss": 3.0648, "step": 58910 }, { "epoch": 2.89, "grad_norm": 0.7489813566207886, "learning_rate": 2.094759581290528e-06, "loss": 2.7633, "step": 58911 }, { "epoch": 2.89, "grad_norm": 0.7301415801048279, "learning_rate": 2.092943725007956e-06, "loss": 2.837, "step": 58912 }, { "epoch": 2.89, "grad_norm": 0.8440554141998291, "learning_rate": 2.091128653355245e-06, "loss": 2.796, "step": 58913 }, { "epoch": 2.89, "grad_norm": 0.7419830560684204, "learning_rate": 2.089314366337158e-06, "loss": 2.8407, "step": 58914 }, { "epoch": 2.89, "grad_norm": 0.7549934387207031, "learning_rate": 2.0875008639584912e-06, "loss": 3.0267, "step": 58915 }, { "epoch": 2.89, "grad_norm": 0.7184531092643738, "learning_rate": 2.085688146224007e-06, "loss": 2.6725, "step": 58916 }, { "epoch": 2.89, "grad_norm": 0.7466804385185242, "learning_rate": 2.0838762131384688e-06, "loss": 2.9272, "step": 58917 }, { "epoch": 2.89, "grad_norm": 0.7794800400733948, "learning_rate": 2.0820650647066726e-06, "loss": 2.7772, "step": 58918 }, { "epoch": 2.89, "grad_norm": 0.7532921433448792, "learning_rate": 2.0802547009333814e-06, "loss": 3.0, "step": 58919 }, { "epoch": 2.89, "grad_norm": 0.7411693334579468, "learning_rate": 2.078445121823358e-06, "loss": 3.0414, "step": 58920 }, { "epoch": 2.89, "grad_norm": 0.7391944527626038, "learning_rate": 2.076636327381398e-06, "loss": 2.8976, "step": 58921 }, { "epoch": 2.89, "grad_norm": 0.7387174963951111, "learning_rate": 2.0748283176122317e-06, "loss": 3.035, "step": 58922 }, { "epoch": 2.89, "grad_norm": 0.7118051052093506, "learning_rate": 2.0730210925206213e-06, "loss": 2.8181, "step": 58923 }, { "epoch": 2.89, "grad_norm": 0.7901133894920349, "learning_rate": 2.0712146521113636e-06, "loss": 3.0142, "step": 58924 }, { "epoch": 2.89, "grad_norm": 0.7497255206108093, "learning_rate": 2.0694089963891545e-06, "loss": 2.7844, "step": 58925 }, { "epoch": 2.89, "grad_norm": 0.7415169477462769, "learning_rate": 2.067604125358824e-06, "loss": 2.911, "step": 58926 }, { "epoch": 2.89, "grad_norm": 0.749427855014801, "learning_rate": 2.065800039025034e-06, "loss": 2.9933, "step": 58927 }, { "epoch": 2.89, "grad_norm": 0.7294812202453613, "learning_rate": 2.0639967373926146e-06, "loss": 2.869, "step": 58928 }, { "epoch": 2.89, "grad_norm": 0.7828423380851746, "learning_rate": 2.0621942204662956e-06, "loss": 2.7197, "step": 58929 }, { "epoch": 2.89, "grad_norm": 0.7263489365577698, "learning_rate": 2.060392488250806e-06, "loss": 2.85, "step": 58930 }, { "epoch": 2.89, "grad_norm": 0.7711845636367798, "learning_rate": 2.0585915407509424e-06, "loss": 3.1033, "step": 58931 }, { "epoch": 2.89, "grad_norm": 0.7376527190208435, "learning_rate": 2.0567913779713675e-06, "loss": 2.7261, "step": 58932 }, { "epoch": 2.89, "grad_norm": 0.7345303893089294, "learning_rate": 2.0549919999168442e-06, "loss": 2.7508, "step": 58933 }, { "epoch": 2.89, "grad_norm": 0.7842707633972168, "learning_rate": 2.0531934065921685e-06, "loss": 2.9854, "step": 58934 }, { "epoch": 2.89, "grad_norm": 0.7627545595169067, "learning_rate": 2.051395598002037e-06, "loss": 2.6343, "step": 58935 }, { "epoch": 2.89, "grad_norm": 0.7491629719734192, "learning_rate": 2.049598574151179e-06, "loss": 2.8958, "step": 58936 }, { "epoch": 2.89, "grad_norm": 0.7489625811576843, "learning_rate": 2.047802335044324e-06, "loss": 2.8476, "step": 58937 }, { "epoch": 2.89, "grad_norm": 0.7320138812065125, "learning_rate": 2.046006880686235e-06, "loss": 2.9481, "step": 58938 }, { "epoch": 2.89, "grad_norm": 0.7232726812362671, "learning_rate": 2.0442122110816084e-06, "loss": 2.924, "step": 58939 }, { "epoch": 2.89, "grad_norm": 0.7281123399734497, "learning_rate": 2.0424183262352066e-06, "loss": 2.9472, "step": 58940 }, { "epoch": 2.89, "grad_norm": 0.7523177266120911, "learning_rate": 2.0406252261517263e-06, "loss": 2.8302, "step": 58941 }, { "epoch": 2.89, "grad_norm": 0.738097608089447, "learning_rate": 2.0388329108358636e-06, "loss": 2.8233, "step": 58942 }, { "epoch": 2.89, "grad_norm": 0.727954089641571, "learning_rate": 2.0370413802923813e-06, "loss": 2.9305, "step": 58943 }, { "epoch": 2.89, "grad_norm": 0.7692335844039917, "learning_rate": 2.035250634526009e-06, "loss": 2.9345, "step": 58944 }, { "epoch": 2.89, "grad_norm": 0.7667322754859924, "learning_rate": 2.0334606735414426e-06, "loss": 2.9262, "step": 58945 }, { "epoch": 2.89, "grad_norm": 0.8380236625671387, "learning_rate": 2.0316714973434124e-06, "loss": 3.0058, "step": 58946 }, { "epoch": 2.89, "grad_norm": 0.768031120300293, "learning_rate": 2.0298831059365807e-06, "loss": 3.2082, "step": 58947 }, { "epoch": 2.89, "grad_norm": 0.773243248462677, "learning_rate": 2.028095499325677e-06, "loss": 2.9019, "step": 58948 }, { "epoch": 2.89, "grad_norm": 0.7496214509010315, "learning_rate": 2.026308677515465e-06, "loss": 2.9999, "step": 58949 }, { "epoch": 2.89, "grad_norm": 0.7690075635910034, "learning_rate": 2.024522640510573e-06, "loss": 2.6612, "step": 58950 }, { "epoch": 2.89, "grad_norm": 0.8003986477851868, "learning_rate": 2.022737388315765e-06, "loss": 2.8753, "step": 58951 }, { "epoch": 2.89, "grad_norm": 0.7260475158691406, "learning_rate": 2.020952920935737e-06, "loss": 3.0065, "step": 58952 }, { "epoch": 2.89, "grad_norm": 0.7463489770889282, "learning_rate": 2.0191692383751514e-06, "loss": 3.0693, "step": 58953 }, { "epoch": 2.89, "grad_norm": 0.7311463356018066, "learning_rate": 2.0173863406387047e-06, "loss": 2.9125, "step": 58954 }, { "epoch": 2.89, "grad_norm": 0.8149186372756958, "learning_rate": 2.0156042277311267e-06, "loss": 3.1956, "step": 58955 }, { "epoch": 2.89, "grad_norm": 0.7813419103622437, "learning_rate": 2.0138228996571136e-06, "loss": 2.8011, "step": 58956 }, { "epoch": 2.89, "grad_norm": 0.7746787071228027, "learning_rate": 2.0120423564213285e-06, "loss": 2.9801, "step": 58957 }, { "epoch": 2.89, "grad_norm": 0.8212281465530396, "learning_rate": 2.0102625980285002e-06, "loss": 2.8556, "step": 58958 }, { "epoch": 2.89, "grad_norm": 0.7449041604995728, "learning_rate": 2.008483624483259e-06, "loss": 3.0733, "step": 58959 }, { "epoch": 2.89, "grad_norm": 0.7603235840797424, "learning_rate": 2.006705435790368e-06, "loss": 2.9102, "step": 58960 }, { "epoch": 2.89, "grad_norm": 0.7589277625083923, "learning_rate": 2.0049280319544225e-06, "loss": 2.7227, "step": 58961 }, { "epoch": 2.89, "grad_norm": 0.752943754196167, "learning_rate": 2.0031514129801527e-06, "loss": 2.7721, "step": 58962 }, { "epoch": 2.89, "grad_norm": 0.768645703792572, "learning_rate": 2.0013755788722217e-06, "loss": 2.7773, "step": 58963 }, { "epoch": 2.89, "grad_norm": 0.7890374660491943, "learning_rate": 1.999600529635359e-06, "loss": 2.7951, "step": 58964 }, { "epoch": 2.89, "grad_norm": 0.7113425135612488, "learning_rate": 1.9978262652741605e-06, "loss": 2.9277, "step": 58965 }, { "epoch": 2.89, "grad_norm": 0.797177255153656, "learning_rate": 1.996052785793323e-06, "loss": 2.67, "step": 58966 }, { "epoch": 2.89, "grad_norm": 0.7570541501045227, "learning_rate": 1.9942800911975755e-06, "loss": 2.9775, "step": 58967 }, { "epoch": 2.89, "grad_norm": 0.8025187849998474, "learning_rate": 1.9925081814915145e-06, "loss": 2.9072, "step": 58968 }, { "epoch": 2.89, "grad_norm": 0.7739835381507874, "learning_rate": 1.9907370566798363e-06, "loss": 2.955, "step": 58969 }, { "epoch": 2.89, "grad_norm": 0.7543403506278992, "learning_rate": 1.9889667167672042e-06, "loss": 2.8965, "step": 58970 }, { "epoch": 2.89, "grad_norm": 0.8121760487556458, "learning_rate": 1.9871971617582804e-06, "loss": 2.8277, "step": 58971 }, { "epoch": 2.89, "grad_norm": 0.8004959225654602, "learning_rate": 1.9854283916577285e-06, "loss": 2.7487, "step": 58972 }, { "epoch": 2.89, "grad_norm": 0.7454361915588379, "learning_rate": 1.983660406470211e-06, "loss": 2.7616, "step": 58973 }, { "epoch": 2.89, "grad_norm": 0.7641414999961853, "learning_rate": 1.9818932062003578e-06, "loss": 2.9533, "step": 58974 }, { "epoch": 2.89, "grad_norm": 0.7749798893928528, "learning_rate": 1.980126790852865e-06, "loss": 2.5778, "step": 58975 }, { "epoch": 2.89, "grad_norm": 0.7524572610855103, "learning_rate": 1.978361160432362e-06, "loss": 2.9732, "step": 58976 }, { "epoch": 2.89, "grad_norm": 0.7702212929725647, "learning_rate": 1.9765963149434793e-06, "loss": 2.9104, "step": 58977 }, { "epoch": 2.89, "grad_norm": 0.802010715007782, "learning_rate": 1.974832254390879e-06, "loss": 2.927, "step": 58978 }, { "epoch": 2.89, "grad_norm": 0.7283293604850769, "learning_rate": 1.9730689787792573e-06, "loss": 3.0435, "step": 58979 }, { "epoch": 2.89, "grad_norm": 0.7269130349159241, "learning_rate": 1.971306488113178e-06, "loss": 2.8377, "step": 58980 }, { "epoch": 2.89, "grad_norm": 0.7806401252746582, "learning_rate": 1.9695447823973364e-06, "loss": 2.7224, "step": 58981 }, { "epoch": 2.89, "grad_norm": 0.7446566224098206, "learning_rate": 1.967783861636363e-06, "loss": 2.9339, "step": 58982 }, { "epoch": 2.89, "grad_norm": 0.8244256377220154, "learning_rate": 1.9660237258348533e-06, "loss": 2.8912, "step": 58983 }, { "epoch": 2.89, "grad_norm": 0.7497897148132324, "learning_rate": 1.9642643749975373e-06, "loss": 2.8148, "step": 58984 }, { "epoch": 2.89, "grad_norm": 0.8421869874000549, "learning_rate": 1.9625058091289447e-06, "loss": 3.0151, "step": 58985 }, { "epoch": 2.89, "grad_norm": 0.7372134327888489, "learning_rate": 1.9607480282337716e-06, "loss": 2.8176, "step": 58986 }, { "epoch": 2.89, "grad_norm": 0.7009263634681702, "learning_rate": 1.958991032316615e-06, "loss": 2.819, "step": 58987 }, { "epoch": 2.89, "grad_norm": 0.7520594000816345, "learning_rate": 1.9572348213821365e-06, "loss": 2.8624, "step": 58988 }, { "epoch": 2.89, "grad_norm": 0.7330684661865234, "learning_rate": 1.9554793954349333e-06, "loss": 2.7898, "step": 58989 }, { "epoch": 2.89, "grad_norm": 0.7308124303817749, "learning_rate": 1.953724754479635e-06, "loss": 2.8689, "step": 58990 }, { "epoch": 2.89, "grad_norm": 0.7374723553657532, "learning_rate": 1.9519708985208717e-06, "loss": 2.7598, "step": 58991 }, { "epoch": 2.89, "grad_norm": 0.7027939558029175, "learning_rate": 1.9502178275632718e-06, "loss": 2.9557, "step": 58992 }, { "epoch": 2.89, "grad_norm": 0.7598909139633179, "learning_rate": 1.9484655416113993e-06, "loss": 2.9631, "step": 58993 }, { "epoch": 2.89, "grad_norm": 0.7432723641395569, "learning_rate": 1.94671404066995e-06, "loss": 3.0194, "step": 58994 }, { "epoch": 2.89, "grad_norm": 0.7426236867904663, "learning_rate": 1.944963324743454e-06, "loss": 2.9044, "step": 58995 }, { "epoch": 2.89, "grad_norm": 0.7190657258033752, "learning_rate": 1.943213393836607e-06, "loss": 2.4641, "step": 58996 }, { "epoch": 2.89, "grad_norm": 0.7499857544898987, "learning_rate": 1.9414642479539386e-06, "loss": 3.0042, "step": 58997 }, { "epoch": 2.89, "grad_norm": 0.7538277506828308, "learning_rate": 1.939715887100113e-06, "loss": 3.1177, "step": 58998 }, { "epoch": 2.89, "grad_norm": 0.7844749093055725, "learning_rate": 1.937968311279692e-06, "loss": 2.7512, "step": 58999 }, { "epoch": 2.89, "grad_norm": 0.7818543314933777, "learning_rate": 1.9362215204972718e-06, "loss": 2.9125, "step": 59000 }, { "epoch": 2.89, "grad_norm": 0.7285245656967163, "learning_rate": 1.9344755147575496e-06, "loss": 2.6752, "step": 59001 }, { "epoch": 2.89, "grad_norm": 0.721545934677124, "learning_rate": 1.932730294064988e-06, "loss": 2.9226, "step": 59002 }, { "epoch": 2.89, "grad_norm": 0.7440168857574463, "learning_rate": 1.9309858584242834e-06, "loss": 2.9274, "step": 59003 }, { "epoch": 2.89, "grad_norm": 0.7558407783508301, "learning_rate": 1.929242207840032e-06, "loss": 2.8321, "step": 59004 }, { "epoch": 2.89, "grad_norm": 0.7752707600593567, "learning_rate": 1.9274993423167295e-06, "loss": 2.9114, "step": 59005 }, { "epoch": 2.89, "grad_norm": 0.7502283453941345, "learning_rate": 1.9257572618590732e-06, "loss": 2.8542, "step": 59006 }, { "epoch": 2.89, "grad_norm": 0.7198836207389832, "learning_rate": 1.9240159664715926e-06, "loss": 2.6527, "step": 59007 }, { "epoch": 2.89, "grad_norm": 0.7290161848068237, "learning_rate": 1.9222754561588836e-06, "loss": 2.8487, "step": 59008 }, { "epoch": 2.89, "grad_norm": 0.7256250381469727, "learning_rate": 1.920535730925543e-06, "loss": 2.6648, "step": 59009 }, { "epoch": 2.89, "grad_norm": 0.7327529191970825, "learning_rate": 1.9187967907761335e-06, "loss": 2.9445, "step": 59010 }, { "epoch": 2.89, "grad_norm": 0.7599136233329773, "learning_rate": 1.9170586357152518e-06, "loss": 2.9671, "step": 59011 }, { "epoch": 2.89, "grad_norm": 0.7350993752479553, "learning_rate": 1.9153212657474605e-06, "loss": 2.881, "step": 59012 }, { "epoch": 2.89, "grad_norm": 0.7257763147354126, "learning_rate": 1.913584680877356e-06, "loss": 2.8071, "step": 59013 }, { "epoch": 2.89, "grad_norm": 0.7590259909629822, "learning_rate": 1.9118488811095013e-06, "loss": 2.8561, "step": 59014 }, { "epoch": 2.89, "grad_norm": 0.7977140545845032, "learning_rate": 1.9101138664484593e-06, "loss": 2.8963, "step": 59015 }, { "epoch": 2.89, "grad_norm": 0.734058141708374, "learning_rate": 1.908379636898827e-06, "loss": 2.9871, "step": 59016 }, { "epoch": 2.89, "grad_norm": 0.7733717560768127, "learning_rate": 1.9066461924651665e-06, "loss": 2.7593, "step": 59017 }, { "epoch": 2.89, "grad_norm": 0.7138202786445618, "learning_rate": 1.9049135331520081e-06, "loss": 2.9411, "step": 59018 }, { "epoch": 2.89, "grad_norm": 0.7705768346786499, "learning_rate": 1.9031816589639148e-06, "loss": 2.9631, "step": 59019 }, { "epoch": 2.89, "grad_norm": 0.7369682192802429, "learning_rate": 1.9014505699054827e-06, "loss": 2.8449, "step": 59020 }, { "epoch": 2.89, "grad_norm": 0.7815206050872803, "learning_rate": 1.8997202659812749e-06, "loss": 2.7891, "step": 59021 }, { "epoch": 2.89, "grad_norm": 0.7516481280326843, "learning_rate": 1.8979907471958212e-06, "loss": 2.9268, "step": 59022 }, { "epoch": 2.89, "grad_norm": 0.8083335161209106, "learning_rate": 1.8962620135537176e-06, "loss": 3.0219, "step": 59023 }, { "epoch": 2.89, "grad_norm": 0.7490785717964172, "learning_rate": 1.894534065059461e-06, "loss": 2.9312, "step": 59024 }, { "epoch": 2.89, "grad_norm": 0.6919969916343689, "learning_rate": 1.8928069017176472e-06, "loss": 2.8647, "step": 59025 }, { "epoch": 2.89, "grad_norm": 0.7737188339233398, "learning_rate": 1.8910805235328063e-06, "loss": 2.7855, "step": 59026 }, { "epoch": 2.89, "grad_norm": 0.7691394686698914, "learning_rate": 1.889354930509468e-06, "loss": 3.0189, "step": 59027 }, { "epoch": 2.89, "grad_norm": 0.7691125273704529, "learning_rate": 1.8876301226522284e-06, "loss": 2.7889, "step": 59028 }, { "epoch": 2.89, "grad_norm": 0.7242297530174255, "learning_rate": 1.8859060999655507e-06, "loss": 2.8643, "step": 59029 }, { "epoch": 2.89, "grad_norm": 0.7567639946937561, "learning_rate": 1.8841828624540645e-06, "loss": 2.7662, "step": 59030 }, { "epoch": 2.89, "grad_norm": 0.7616976499557495, "learning_rate": 1.882460410122233e-06, "loss": 2.828, "step": 59031 }, { "epoch": 2.89, "grad_norm": 0.7366723418235779, "learning_rate": 1.8807387429746524e-06, "loss": 2.8366, "step": 59032 }, { "epoch": 2.89, "grad_norm": 0.7073797583580017, "learning_rate": 1.8790178610158525e-06, "loss": 2.9282, "step": 59033 }, { "epoch": 2.89, "grad_norm": 0.7266165614128113, "learning_rate": 1.8772977642502963e-06, "loss": 3.0461, "step": 59034 }, { "epoch": 2.89, "grad_norm": 0.7223436832427979, "learning_rate": 1.87557845268258e-06, "loss": 2.7378, "step": 59035 }, { "epoch": 2.89, "grad_norm": 0.8283471465110779, "learning_rate": 1.8738599263172338e-06, "loss": 2.844, "step": 59036 }, { "epoch": 2.89, "grad_norm": 0.7933211326599121, "learning_rate": 1.8721421851587537e-06, "loss": 2.9484, "step": 59037 }, { "epoch": 2.89, "grad_norm": 0.7600369453430176, "learning_rate": 1.8704252292116696e-06, "loss": 2.9175, "step": 59038 }, { "epoch": 2.89, "grad_norm": 0.788422703742981, "learning_rate": 1.8687090584805108e-06, "loss": 2.7721, "step": 59039 }, { "epoch": 2.89, "grad_norm": 0.7650947570800781, "learning_rate": 1.8669936729698076e-06, "loss": 2.7601, "step": 59040 }, { "epoch": 2.89, "grad_norm": 0.748820960521698, "learning_rate": 1.865279072684056e-06, "loss": 2.8145, "step": 59041 }, { "epoch": 2.89, "grad_norm": 0.7527621388435364, "learning_rate": 1.8635652576277527e-06, "loss": 2.9823, "step": 59042 }, { "epoch": 2.89, "grad_norm": 0.7002708315849304, "learning_rate": 1.8618522278054938e-06, "loss": 2.8998, "step": 59043 }, { "epoch": 2.89, "grad_norm": 0.7159339785575867, "learning_rate": 1.860139983221709e-06, "loss": 2.6089, "step": 59044 }, { "epoch": 2.89, "grad_norm": 0.737927258014679, "learning_rate": 1.8584285238809281e-06, "loss": 2.9601, "step": 59045 }, { "epoch": 2.89, "grad_norm": 0.7862527370452881, "learning_rate": 1.8567178497876811e-06, "loss": 2.7792, "step": 59046 }, { "epoch": 2.89, "grad_norm": 0.7486521005630493, "learning_rate": 1.8550079609464974e-06, "loss": 2.7664, "step": 59047 }, { "epoch": 2.89, "grad_norm": 0.7823449969291687, "learning_rate": 1.8532988573618068e-06, "loss": 2.8471, "step": 59048 }, { "epoch": 2.89, "grad_norm": 0.7296319603919983, "learning_rate": 1.8515905390381392e-06, "loss": 2.7969, "step": 59049 }, { "epoch": 2.89, "grad_norm": 0.8135117888450623, "learning_rate": 1.8498830059799908e-06, "loss": 2.8586, "step": 59050 }, { "epoch": 2.89, "grad_norm": 0.6989290714263916, "learning_rate": 1.8481762581918913e-06, "loss": 2.6872, "step": 59051 }, { "epoch": 2.89, "grad_norm": 0.7613128423690796, "learning_rate": 1.8464702956783372e-06, "loss": 2.995, "step": 59052 }, { "epoch": 2.89, "grad_norm": 0.721194863319397, "learning_rate": 1.8447651184437584e-06, "loss": 2.7919, "step": 59053 }, { "epoch": 2.89, "grad_norm": 0.726525068283081, "learning_rate": 1.8430607264927178e-06, "loss": 2.972, "step": 59054 }, { "epoch": 2.89, "grad_norm": 0.7626053690910339, "learning_rate": 1.8413571198296784e-06, "loss": 2.8219, "step": 59055 }, { "epoch": 2.89, "grad_norm": 0.7047833204269409, "learning_rate": 1.8396542984591035e-06, "loss": 2.7473, "step": 59056 }, { "epoch": 2.89, "grad_norm": 0.7405434846878052, "learning_rate": 1.8379522623854892e-06, "loss": 2.7802, "step": 59057 }, { "epoch": 2.89, "grad_norm": 0.7458202242851257, "learning_rate": 1.8362510116133323e-06, "loss": 3.0058, "step": 59058 }, { "epoch": 2.89, "grad_norm": 0.7674630880355835, "learning_rate": 1.8345505461471287e-06, "loss": 2.9578, "step": 59059 }, { "epoch": 2.89, "grad_norm": 0.7461906671524048, "learning_rate": 1.8328508659913087e-06, "loss": 2.8953, "step": 59060 }, { "epoch": 2.89, "grad_norm": 0.745392382144928, "learning_rate": 1.831151971150402e-06, "loss": 2.8253, "step": 59061 }, { "epoch": 2.89, "grad_norm": 0.7358865141868591, "learning_rate": 1.829453861628838e-06, "loss": 2.9244, "step": 59062 }, { "epoch": 2.89, "grad_norm": 0.7252581715583801, "learning_rate": 1.8277565374311132e-06, "loss": 3.0891, "step": 59063 }, { "epoch": 2.89, "grad_norm": 0.7202037572860718, "learning_rate": 1.8260599985616908e-06, "loss": 2.5241, "step": 59064 }, { "epoch": 2.89, "grad_norm": 0.7681000232696533, "learning_rate": 1.824364245025034e-06, "loss": 2.9659, "step": 59065 }, { "epoch": 2.89, "grad_norm": 0.7250438928604126, "learning_rate": 1.822669276825639e-06, "loss": 2.7523, "step": 59066 }, { "epoch": 2.89, "grad_norm": 0.7999386787414551, "learning_rate": 1.8209750939679357e-06, "loss": 2.7248, "step": 59067 }, { "epoch": 2.89, "grad_norm": 0.8048508167266846, "learning_rate": 1.8192816964563873e-06, "loss": 2.6697, "step": 59068 }, { "epoch": 2.89, "grad_norm": 0.7457453012466431, "learning_rate": 1.81758908429549e-06, "loss": 2.8159, "step": 59069 }, { "epoch": 2.89, "grad_norm": 0.7637097239494324, "learning_rate": 1.8158972574896403e-06, "loss": 2.748, "step": 59070 }, { "epoch": 2.89, "grad_norm": 0.7343689799308777, "learning_rate": 1.814206216043368e-06, "loss": 2.8942, "step": 59071 }, { "epoch": 2.89, "grad_norm": 0.7599355578422546, "learning_rate": 1.8125159599610694e-06, "loss": 2.9799, "step": 59072 }, { "epoch": 2.9, "grad_norm": 0.7494969964027405, "learning_rate": 1.8108264892472412e-06, "loss": 2.8893, "step": 59073 }, { "epoch": 2.9, "grad_norm": 0.7399177551269531, "learning_rate": 1.80913780390628e-06, "loss": 2.8731, "step": 59074 }, { "epoch": 2.9, "grad_norm": 0.7720394134521484, "learning_rate": 1.8074499039426815e-06, "loss": 2.7274, "step": 59075 }, { "epoch": 2.9, "grad_norm": 0.7690413594245911, "learning_rate": 1.8057627893608428e-06, "loss": 2.7685, "step": 59076 }, { "epoch": 2.9, "grad_norm": 0.7422636151313782, "learning_rate": 1.8040764601652601e-06, "loss": 2.884, "step": 59077 }, { "epoch": 2.9, "grad_norm": 0.7094586491584778, "learning_rate": 1.8023909163603634e-06, "loss": 2.8035, "step": 59078 }, { "epoch": 2.9, "grad_norm": 0.7637078166007996, "learning_rate": 1.8007061579505488e-06, "loss": 2.9295, "step": 59079 }, { "epoch": 2.9, "grad_norm": 0.7474511861801147, "learning_rate": 1.7990221849402797e-06, "loss": 2.9187, "step": 59080 }, { "epoch": 2.9, "grad_norm": 0.7161756157875061, "learning_rate": 1.797338997334019e-06, "loss": 2.7149, "step": 59081 }, { "epoch": 2.9, "grad_norm": 0.7648656368255615, "learning_rate": 1.7956565951361635e-06, "loss": 2.979, "step": 59082 }, { "epoch": 2.9, "grad_norm": 0.8284847736358643, "learning_rate": 1.7939749783511758e-06, "loss": 3.0267, "step": 59083 }, { "epoch": 2.9, "grad_norm": 0.7770487070083618, "learning_rate": 1.7922941469834528e-06, "loss": 2.8505, "step": 59084 }, { "epoch": 2.9, "grad_norm": 0.7366689443588257, "learning_rate": 1.7906141010374576e-06, "loss": 2.9935, "step": 59085 }, { "epoch": 2.9, "grad_norm": 0.7799773812294006, "learning_rate": 1.788934840517553e-06, "loss": 2.8051, "step": 59086 }, { "epoch": 2.9, "grad_norm": 0.7727737426757812, "learning_rate": 1.7872563654282357e-06, "loss": 3.0461, "step": 59087 }, { "epoch": 2.9, "grad_norm": 0.7719646692276001, "learning_rate": 1.7855786757738687e-06, "loss": 2.8494, "step": 59088 }, { "epoch": 2.9, "grad_norm": 0.7487027049064636, "learning_rate": 1.7839017715589155e-06, "loss": 2.9247, "step": 59089 }, { "epoch": 2.9, "grad_norm": 0.7525886297225952, "learning_rate": 1.7822256527877721e-06, "loss": 2.9167, "step": 59090 }, { "epoch": 2.9, "grad_norm": 0.7553564310073853, "learning_rate": 1.7805503194648684e-06, "loss": 2.7595, "step": 59091 }, { "epoch": 2.9, "grad_norm": 0.8537721633911133, "learning_rate": 1.778875771594568e-06, "loss": 2.9898, "step": 59092 }, { "epoch": 2.9, "grad_norm": 0.7500604391098022, "learning_rate": 1.7772020091813333e-06, "loss": 2.9763, "step": 59093 }, { "epoch": 2.9, "grad_norm": 0.7806376814842224, "learning_rate": 1.775529032229528e-06, "loss": 2.7487, "step": 59094 }, { "epoch": 2.9, "grad_norm": 0.7601209282875061, "learning_rate": 1.7738568407436149e-06, "loss": 2.9419, "step": 59095 }, { "epoch": 2.9, "grad_norm": 0.7778134346008301, "learning_rate": 1.7721854347279573e-06, "loss": 2.723, "step": 59096 }, { "epoch": 2.9, "grad_norm": 0.759236216545105, "learning_rate": 1.7705148141869853e-06, "loss": 3.0486, "step": 59097 }, { "epoch": 2.9, "grad_norm": 0.8189762234687805, "learning_rate": 1.7688449791250614e-06, "loss": 2.8238, "step": 59098 }, { "epoch": 2.9, "grad_norm": 0.7347763776779175, "learning_rate": 1.7671759295465826e-06, "loss": 2.8377, "step": 59099 }, { "epoch": 2.9, "grad_norm": 0.7606347799301147, "learning_rate": 1.7655076654560119e-06, "loss": 2.7916, "step": 59100 }, { "epoch": 2.9, "grad_norm": 0.7474910020828247, "learning_rate": 1.7638401868576791e-06, "loss": 2.9414, "step": 59101 }, { "epoch": 2.9, "grad_norm": 0.7264478206634521, "learning_rate": 1.7621734937559806e-06, "loss": 2.8281, "step": 59102 }, { "epoch": 2.9, "grad_norm": 0.7830918431282043, "learning_rate": 1.7605075861553464e-06, "loss": 2.9221, "step": 59103 }, { "epoch": 2.9, "grad_norm": 0.78126060962677, "learning_rate": 1.7588424640601062e-06, "loss": 2.9048, "step": 59104 }, { "epoch": 2.9, "grad_norm": 0.7438468933105469, "learning_rate": 1.7571781274746898e-06, "loss": 2.7915, "step": 59105 }, { "epoch": 2.9, "grad_norm": 0.8499040007591248, "learning_rate": 1.755514576403494e-06, "loss": 2.8397, "step": 59106 }, { "epoch": 2.9, "grad_norm": 0.7694287300109863, "learning_rate": 1.7538518108508481e-06, "loss": 2.8528, "step": 59107 }, { "epoch": 2.9, "grad_norm": 0.7242769598960876, "learning_rate": 1.752189830821149e-06, "loss": 2.9493, "step": 59108 }, { "epoch": 2.9, "grad_norm": 0.7706876397132874, "learning_rate": 1.750528636318793e-06, "loss": 2.7071, "step": 59109 }, { "epoch": 2.9, "grad_norm": 0.7799972295761108, "learning_rate": 1.7488682273481769e-06, "loss": 2.9786, "step": 59110 }, { "epoch": 2.9, "grad_norm": 0.7658340930938721, "learning_rate": 1.7472086039135968e-06, "loss": 2.991, "step": 59111 }, { "epoch": 2.9, "grad_norm": 0.7521490454673767, "learning_rate": 1.7455497660194828e-06, "loss": 2.8154, "step": 59112 }, { "epoch": 2.9, "grad_norm": 0.7272855639457703, "learning_rate": 1.7438917136702312e-06, "loss": 2.84, "step": 59113 }, { "epoch": 2.9, "grad_norm": 0.7962722182273865, "learning_rate": 1.7422344468701055e-06, "loss": 3.0468, "step": 59114 }, { "epoch": 2.9, "grad_norm": 0.770363986492157, "learning_rate": 1.7405779656235686e-06, "loss": 2.8827, "step": 59115 }, { "epoch": 2.9, "grad_norm": 0.7188537120819092, "learning_rate": 1.7389222699349503e-06, "loss": 2.8618, "step": 59116 }, { "epoch": 2.9, "grad_norm": 0.7084076404571533, "learning_rate": 1.7372673598085808e-06, "loss": 2.8814, "step": 59117 }, { "epoch": 2.9, "grad_norm": 0.7412644028663635, "learning_rate": 1.7356132352488562e-06, "loss": 2.8781, "step": 59118 }, { "epoch": 2.9, "grad_norm": 0.7647395730018616, "learning_rate": 1.733959896260173e-06, "loss": 2.8254, "step": 59119 }, { "epoch": 2.9, "grad_norm": 0.7721149921417236, "learning_rate": 1.7323073428467948e-06, "loss": 2.8524, "step": 59120 }, { "epoch": 2.9, "grad_norm": 0.7480499148368835, "learning_rate": 1.730655575013118e-06, "loss": 2.8018, "step": 59121 }, { "epoch": 2.9, "grad_norm": 0.7373523116111755, "learning_rate": 1.7290045927634721e-06, "loss": 2.8485, "step": 59122 }, { "epoch": 2.9, "grad_norm": 0.7831674218177795, "learning_rate": 1.727354396102254e-06, "loss": 3.0131, "step": 59123 }, { "epoch": 2.9, "grad_norm": 0.7761872410774231, "learning_rate": 1.72570498503376e-06, "loss": 2.8869, "step": 59124 }, { "epoch": 2.9, "grad_norm": 0.7596009969711304, "learning_rate": 1.7240563595623868e-06, "loss": 2.6972, "step": 59125 }, { "epoch": 2.9, "grad_norm": 0.734153151512146, "learning_rate": 1.7224085196923977e-06, "loss": 2.7864, "step": 59126 }, { "epoch": 2.9, "grad_norm": 0.7314983606338501, "learning_rate": 1.720761465428222e-06, "loss": 2.9053, "step": 59127 }, { "epoch": 2.9, "grad_norm": 0.7744563817977905, "learning_rate": 1.7191151967741568e-06, "loss": 2.905, "step": 59128 }, { "epoch": 2.9, "grad_norm": 0.7810060381889343, "learning_rate": 1.7174697137344983e-06, "loss": 2.9127, "step": 59129 }, { "epoch": 2.9, "grad_norm": 0.7507235407829285, "learning_rate": 1.7158250163136432e-06, "loss": 2.8359, "step": 59130 }, { "epoch": 2.9, "grad_norm": 0.7610126733779907, "learning_rate": 1.7141811045158883e-06, "loss": 2.6502, "step": 59131 }, { "epoch": 2.9, "grad_norm": 0.7299541234970093, "learning_rate": 1.712537978345596e-06, "loss": 2.7764, "step": 59132 }, { "epoch": 2.9, "grad_norm": 0.805625319480896, "learning_rate": 1.7108956378070637e-06, "loss": 2.8168, "step": 59133 }, { "epoch": 2.9, "grad_norm": 0.7253496646881104, "learning_rate": 1.7092540829046208e-06, "loss": 2.8982, "step": 59134 }, { "epoch": 2.9, "grad_norm": 0.8161607384681702, "learning_rate": 1.7076133136425974e-06, "loss": 2.846, "step": 59135 }, { "epoch": 2.9, "grad_norm": 0.7335495948791504, "learning_rate": 1.7059733300253232e-06, "loss": 2.9378, "step": 59136 }, { "epoch": 2.9, "grad_norm": 0.7281621098518372, "learning_rate": 1.7043341320570613e-06, "loss": 2.8579, "step": 59137 }, { "epoch": 2.9, "grad_norm": 0.7644967436790466, "learning_rate": 1.702695719742242e-06, "loss": 2.8762, "step": 59138 }, { "epoch": 2.9, "grad_norm": 0.7520266771316528, "learning_rate": 1.7010580930850614e-06, "loss": 2.8353, "step": 59139 }, { "epoch": 2.9, "grad_norm": 0.7870677709579468, "learning_rate": 1.6994212520899164e-06, "loss": 2.9208, "step": 59140 }, { "epoch": 2.9, "grad_norm": 0.7785607576370239, "learning_rate": 1.69778519676107e-06, "loss": 3.0277, "step": 59141 }, { "epoch": 2.9, "grad_norm": 0.7652883529663086, "learning_rate": 1.6961499271028521e-06, "loss": 2.9603, "step": 59142 }, { "epoch": 2.9, "grad_norm": 0.7231030464172363, "learning_rate": 1.6945154431195596e-06, "loss": 2.5437, "step": 59143 }, { "epoch": 2.9, "grad_norm": 0.7375117540359497, "learning_rate": 1.6928817448155218e-06, "loss": 2.6275, "step": 59144 }, { "epoch": 2.9, "grad_norm": 0.7867879867553711, "learning_rate": 1.6912488321950023e-06, "loss": 2.8344, "step": 59145 }, { "epoch": 2.9, "grad_norm": 0.8134239912033081, "learning_rate": 1.689616705262331e-06, "loss": 2.8047, "step": 59146 }, { "epoch": 2.9, "grad_norm": 0.7559427618980408, "learning_rate": 1.6879853640218044e-06, "loss": 2.6349, "step": 59147 }, { "epoch": 2.9, "grad_norm": 0.7431520223617554, "learning_rate": 1.686354808477719e-06, "loss": 2.9635, "step": 59148 }, { "epoch": 2.9, "grad_norm": 0.7744369506835938, "learning_rate": 1.684725038634338e-06, "loss": 2.972, "step": 59149 }, { "epoch": 2.9, "grad_norm": 0.769906222820282, "learning_rate": 1.6830960544959915e-06, "loss": 2.7403, "step": 59150 }, { "epoch": 2.9, "grad_norm": 0.772413432598114, "learning_rate": 1.681467856066976e-06, "loss": 2.9986, "step": 59151 }, { "epoch": 2.9, "grad_norm": 0.7344859838485718, "learning_rate": 1.6798404433515545e-06, "loss": 2.8915, "step": 59152 }, { "epoch": 2.9, "grad_norm": 0.7600609660148621, "learning_rate": 1.6782138163539903e-06, "loss": 2.6796, "step": 59153 }, { "epoch": 2.9, "grad_norm": 0.7428922653198242, "learning_rate": 1.6765879750786138e-06, "loss": 3.1002, "step": 59154 }, { "epoch": 2.9, "grad_norm": 0.7847473621368408, "learning_rate": 1.6749629195296877e-06, "loss": 2.7684, "step": 59155 }, { "epoch": 2.9, "grad_norm": 0.7154970169067383, "learning_rate": 1.6733386497115086e-06, "loss": 2.9555, "step": 59156 }, { "epoch": 2.9, "grad_norm": 0.8147268295288086, "learning_rate": 1.6717151656283734e-06, "loss": 2.8499, "step": 59157 }, { "epoch": 2.9, "grad_norm": 0.7564356923103333, "learning_rate": 1.6700924672844783e-06, "loss": 2.8353, "step": 59158 }, { "epoch": 2.9, "grad_norm": 0.7662482261657715, "learning_rate": 1.6684705546841537e-06, "loss": 2.9374, "step": 59159 }, { "epoch": 2.9, "grad_norm": 0.7386744022369385, "learning_rate": 1.6668494278316624e-06, "loss": 2.749, "step": 59160 }, { "epoch": 2.9, "grad_norm": 0.7575583457946777, "learning_rate": 1.665229086731268e-06, "loss": 3.0314, "step": 59161 }, { "epoch": 2.9, "grad_norm": 0.7565872073173523, "learning_rate": 1.6636095313872666e-06, "loss": 2.854, "step": 59162 }, { "epoch": 2.9, "grad_norm": 0.7207484245300293, "learning_rate": 1.6619907618038887e-06, "loss": 2.8848, "step": 59163 }, { "epoch": 2.9, "grad_norm": 0.7588822245597839, "learning_rate": 1.660372777985397e-06, "loss": 2.7857, "step": 59164 }, { "epoch": 2.9, "grad_norm": 0.7523000836372375, "learning_rate": 1.6587555799360885e-06, "loss": 2.9389, "step": 59165 }, { "epoch": 2.9, "grad_norm": 0.7837392687797546, "learning_rate": 1.657139167660193e-06, "loss": 2.7966, "step": 59166 }, { "epoch": 2.9, "grad_norm": 0.7007728815078735, "learning_rate": 1.655523541161974e-06, "loss": 2.8785, "step": 59167 }, { "epoch": 2.9, "grad_norm": 0.7474599480628967, "learning_rate": 1.653908700445694e-06, "loss": 2.8779, "step": 59168 }, { "epoch": 2.9, "grad_norm": 0.8092578649520874, "learning_rate": 1.6522946455155839e-06, "loss": 2.7932, "step": 59169 }, { "epoch": 2.9, "grad_norm": 0.7371110916137695, "learning_rate": 1.6506813763759064e-06, "loss": 2.8497, "step": 59170 }, { "epoch": 2.9, "grad_norm": 0.7597226500511169, "learning_rate": 1.6490688930309581e-06, "loss": 2.9094, "step": 59171 }, { "epoch": 2.9, "grad_norm": 0.7393887639045715, "learning_rate": 1.6474571954849358e-06, "loss": 2.8139, "step": 59172 }, { "epoch": 2.9, "grad_norm": 0.7919648885726929, "learning_rate": 1.6458462837420693e-06, "loss": 2.8186, "step": 59173 }, { "epoch": 2.9, "grad_norm": 0.7349705696105957, "learning_rate": 1.6442361578066221e-06, "loss": 2.7144, "step": 59174 }, { "epoch": 2.9, "grad_norm": 0.7724583745002747, "learning_rate": 1.6426268176828239e-06, "loss": 2.7673, "step": 59175 }, { "epoch": 2.9, "grad_norm": 0.7804393172264099, "learning_rate": 1.641018263374938e-06, "loss": 2.8634, "step": 59176 }, { "epoch": 2.9, "grad_norm": 0.8103668689727783, "learning_rate": 1.6394104948872277e-06, "loss": 2.7951, "step": 59177 }, { "epoch": 2.9, "grad_norm": 0.7489713430404663, "learning_rate": 1.6378035122238565e-06, "loss": 3.0172, "step": 59178 }, { "epoch": 2.9, "grad_norm": 0.7497350573539734, "learning_rate": 1.6361973153890873e-06, "loss": 2.7531, "step": 59179 }, { "epoch": 2.9, "grad_norm": 0.7449610829353333, "learning_rate": 1.6345919043871835e-06, "loss": 2.9164, "step": 59180 }, { "epoch": 2.9, "grad_norm": 0.7574251294136047, "learning_rate": 1.6329872792223087e-06, "loss": 2.7664, "step": 59181 }, { "epoch": 2.9, "grad_norm": 0.7449432611465454, "learning_rate": 1.6313834398987258e-06, "loss": 2.9572, "step": 59182 }, { "epoch": 2.9, "grad_norm": 0.7493433356285095, "learning_rate": 1.629780386420665e-06, "loss": 2.8492, "step": 59183 }, { "epoch": 2.9, "grad_norm": 0.721378743648529, "learning_rate": 1.6281781187923226e-06, "loss": 2.8584, "step": 59184 }, { "epoch": 2.9, "grad_norm": 0.705966055393219, "learning_rate": 1.6265766370179623e-06, "loss": 2.7012, "step": 59185 }, { "epoch": 2.9, "grad_norm": 0.7675456404685974, "learning_rate": 1.6249759411017805e-06, "loss": 2.7796, "step": 59186 }, { "epoch": 2.9, "grad_norm": 0.7658663988113403, "learning_rate": 1.623376031048007e-06, "loss": 2.6216, "step": 59187 }, { "epoch": 2.9, "grad_norm": 0.7195037603378296, "learning_rate": 1.6217769068607722e-06, "loss": 2.9578, "step": 59188 }, { "epoch": 2.9, "grad_norm": 0.7412412762641907, "learning_rate": 1.6201785685444058e-06, "loss": 2.7394, "step": 59189 }, { "epoch": 2.9, "grad_norm": 0.7388929724693298, "learning_rate": 1.6185810161030378e-06, "loss": 2.9017, "step": 59190 }, { "epoch": 2.9, "grad_norm": 0.8015137910842896, "learning_rate": 1.616984249540898e-06, "loss": 2.7832, "step": 59191 }, { "epoch": 2.9, "grad_norm": 0.733881413936615, "learning_rate": 1.6153882688622167e-06, "loss": 2.8523, "step": 59192 }, { "epoch": 2.9, "grad_norm": 0.7632375359535217, "learning_rate": 1.6137930740711901e-06, "loss": 2.7231, "step": 59193 }, { "epoch": 2.9, "grad_norm": 0.7348890900611877, "learning_rate": 1.6121986651719821e-06, "loss": 2.8821, "step": 59194 }, { "epoch": 2.9, "grad_norm": 0.729511559009552, "learning_rate": 1.610605042168822e-06, "loss": 2.9414, "step": 59195 }, { "epoch": 2.9, "grad_norm": 0.7740534543991089, "learning_rate": 1.609012205065907e-06, "loss": 2.7154, "step": 59196 }, { "epoch": 2.9, "grad_norm": 0.7699583768844604, "learning_rate": 1.6074201538674335e-06, "loss": 2.8821, "step": 59197 }, { "epoch": 2.9, "grad_norm": 0.7296841144561768, "learning_rate": 1.605828888577565e-06, "loss": 2.6342, "step": 59198 }, { "epoch": 2.9, "grad_norm": 0.7484259009361267, "learning_rate": 1.6042384092005312e-06, "loss": 2.9587, "step": 59199 }, { "epoch": 2.9, "grad_norm": 0.7556095719337463, "learning_rate": 1.6026487157404954e-06, "loss": 2.8836, "step": 59200 }, { "epoch": 2.9, "grad_norm": 0.7266550064086914, "learning_rate": 1.6010598082016545e-06, "loss": 2.7881, "step": 59201 }, { "epoch": 2.9, "grad_norm": 0.745608925819397, "learning_rate": 1.5994716865882384e-06, "loss": 3.12, "step": 59202 }, { "epoch": 2.9, "grad_norm": 0.7120503783226013, "learning_rate": 1.5978843509043103e-06, "loss": 3.1001, "step": 59203 }, { "epoch": 2.9, "grad_norm": 0.7117788195610046, "learning_rate": 1.5962978011541672e-06, "loss": 3.027, "step": 59204 }, { "epoch": 2.9, "grad_norm": 0.7518680691719055, "learning_rate": 1.5947120373419719e-06, "loss": 2.94, "step": 59205 }, { "epoch": 2.9, "grad_norm": 0.731158435344696, "learning_rate": 1.5931270594718215e-06, "loss": 2.7666, "step": 59206 }, { "epoch": 2.9, "grad_norm": 0.7914332151412964, "learning_rate": 1.5915428675479792e-06, "loss": 2.8187, "step": 59207 }, { "epoch": 2.9, "grad_norm": 0.7586977481842041, "learning_rate": 1.5899594615746081e-06, "loss": 2.8927, "step": 59208 }, { "epoch": 2.9, "grad_norm": 0.7375707030296326, "learning_rate": 1.588376841555805e-06, "loss": 2.7874, "step": 59209 }, { "epoch": 2.9, "grad_norm": 0.7598499655723572, "learning_rate": 1.5867950074958001e-06, "loss": 2.8557, "step": 59210 }, { "epoch": 2.9, "grad_norm": 0.759164571762085, "learning_rate": 1.5852139593987566e-06, "loss": 2.8819, "step": 59211 }, { "epoch": 2.9, "grad_norm": 0.7925102710723877, "learning_rate": 1.583633697268838e-06, "loss": 2.7934, "step": 59212 }, { "epoch": 2.9, "grad_norm": 0.7286711931228638, "learning_rate": 1.582054221110174e-06, "loss": 2.9716, "step": 59213 }, { "epoch": 2.9, "grad_norm": 0.7402360439300537, "learning_rate": 1.5804755309269612e-06, "loss": 2.8322, "step": 59214 }, { "epoch": 2.9, "grad_norm": 0.7518147230148315, "learning_rate": 1.57889762672333e-06, "loss": 2.8124, "step": 59215 }, { "epoch": 2.9, "grad_norm": 0.7689666748046875, "learning_rate": 1.5773205085034435e-06, "loss": 2.7527, "step": 59216 }, { "epoch": 2.9, "grad_norm": 0.746090829372406, "learning_rate": 1.5757441762714984e-06, "loss": 2.9052, "step": 59217 }, { "epoch": 2.9, "grad_norm": 0.7587335109710693, "learning_rate": 1.5741686300315581e-06, "loss": 2.7332, "step": 59218 }, { "epoch": 2.9, "grad_norm": 0.7321277260780334, "learning_rate": 1.5725938697878525e-06, "loss": 3.0402, "step": 59219 }, { "epoch": 2.9, "grad_norm": 0.774876058101654, "learning_rate": 1.5710198955445119e-06, "loss": 2.9392, "step": 59220 }, { "epoch": 2.9, "grad_norm": 0.7118234634399414, "learning_rate": 1.5694467073056661e-06, "loss": 2.7393, "step": 59221 }, { "epoch": 2.9, "grad_norm": 0.8065067529678345, "learning_rate": 1.5678743050754784e-06, "loss": 2.8755, "step": 59222 }, { "epoch": 2.9, "grad_norm": 0.815037190914154, "learning_rate": 1.5663026888580454e-06, "loss": 2.7749, "step": 59223 }, { "epoch": 2.9, "grad_norm": 0.7758423089981079, "learning_rate": 1.564731858657531e-06, "loss": 2.9076, "step": 59224 }, { "epoch": 2.9, "grad_norm": 0.7859812378883362, "learning_rate": 1.5631618144780977e-06, "loss": 3.0618, "step": 59225 }, { "epoch": 2.9, "grad_norm": 0.7478824257850647, "learning_rate": 1.5615925563238763e-06, "loss": 2.906, "step": 59226 }, { "epoch": 2.9, "grad_norm": 0.7796872854232788, "learning_rate": 1.5600240841989632e-06, "loss": 2.8319, "step": 59227 }, { "epoch": 2.9, "grad_norm": 0.7634370923042297, "learning_rate": 1.5584563981075215e-06, "loss": 2.7921, "step": 59228 }, { "epoch": 2.9, "grad_norm": 0.7156879901885986, "learning_rate": 1.5568894980536484e-06, "loss": 2.8655, "step": 59229 }, { "epoch": 2.9, "grad_norm": 0.7343173027038574, "learning_rate": 1.5553233840415068e-06, "loss": 3.1111, "step": 59230 }, { "epoch": 2.9, "grad_norm": 0.7309796810150146, "learning_rate": 1.5537580560751938e-06, "loss": 2.6718, "step": 59231 }, { "epoch": 2.9, "grad_norm": 0.8148305416107178, "learning_rate": 1.5521935141588726e-06, "loss": 2.8729, "step": 59232 }, { "epoch": 2.9, "grad_norm": 0.7874312400817871, "learning_rate": 1.5506297582966066e-06, "loss": 2.889, "step": 59233 }, { "epoch": 2.9, "grad_norm": 0.7957163453102112, "learning_rate": 1.549066788492559e-06, "loss": 2.9847, "step": 59234 }, { "epoch": 2.9, "grad_norm": 0.7534181475639343, "learning_rate": 1.5475046047508265e-06, "loss": 2.8653, "step": 59235 }, { "epoch": 2.9, "grad_norm": 0.7766826748847961, "learning_rate": 1.5459432070755395e-06, "loss": 2.8438, "step": 59236 }, { "epoch": 2.9, "grad_norm": 0.7522812485694885, "learning_rate": 1.5443825954707945e-06, "loss": 2.7315, "step": 59237 }, { "epoch": 2.9, "grad_norm": 0.7763208746910095, "learning_rate": 1.5428227699407213e-06, "loss": 2.9692, "step": 59238 }, { "epoch": 2.9, "grad_norm": 0.773130476474762, "learning_rate": 1.5412637304893838e-06, "loss": 2.821, "step": 59239 }, { "epoch": 2.9, "grad_norm": 0.7237739562988281, "learning_rate": 1.539705477120945e-06, "loss": 2.8297, "step": 59240 }, { "epoch": 2.9, "grad_norm": 0.7151420712471008, "learning_rate": 1.538148009839435e-06, "loss": 2.758, "step": 59241 }, { "epoch": 2.9, "grad_norm": 0.7747194170951843, "learning_rate": 1.5365913286490506e-06, "loss": 2.7573, "step": 59242 }, { "epoch": 2.9, "grad_norm": 0.7364190220832825, "learning_rate": 1.5350354335538217e-06, "loss": 3.0683, "step": 59243 }, { "epoch": 2.9, "grad_norm": 0.6902226209640503, "learning_rate": 1.533480324557912e-06, "loss": 2.7821, "step": 59244 }, { "epoch": 2.9, "grad_norm": 0.7629439234733582, "learning_rate": 1.5319260016653178e-06, "loss": 2.8934, "step": 59245 }, { "epoch": 2.9, "grad_norm": 0.7819312214851379, "learning_rate": 1.530372464880203e-06, "loss": 2.9913, "step": 59246 }, { "epoch": 2.9, "grad_norm": 0.7529870867729187, "learning_rate": 1.5288197142066638e-06, "loss": 2.7037, "step": 59247 }, { "epoch": 2.9, "grad_norm": 0.7887903451919556, "learning_rate": 1.5272677496487307e-06, "loss": 2.9547, "step": 59248 }, { "epoch": 2.9, "grad_norm": 0.7321487665176392, "learning_rate": 1.5257165712106e-06, "loss": 2.9963, "step": 59249 }, { "epoch": 2.9, "grad_norm": 0.7392261028289795, "learning_rate": 1.5241661788962357e-06, "loss": 2.9604, "step": 59250 }, { "epoch": 2.9, "grad_norm": 0.7557159662246704, "learning_rate": 1.5226165727098005e-06, "loss": 2.964, "step": 59251 }, { "epoch": 2.9, "grad_norm": 0.7541767358779907, "learning_rate": 1.521067752655325e-06, "loss": 3.0179, "step": 59252 }, { "epoch": 2.9, "grad_norm": 0.7616634368896484, "learning_rate": 1.519519718736939e-06, "loss": 2.7647, "step": 59253 }, { "epoch": 2.9, "grad_norm": 0.7685297727584839, "learning_rate": 1.5179724709586727e-06, "loss": 2.9063, "step": 59254 }, { "epoch": 2.9, "grad_norm": 0.7325605154037476, "learning_rate": 1.5164260093246228e-06, "loss": 3.0109, "step": 59255 }, { "epoch": 2.9, "grad_norm": 0.7943716645240784, "learning_rate": 1.514880333838886e-06, "loss": 2.7804, "step": 59256 }, { "epoch": 2.9, "grad_norm": 0.7822527885437012, "learning_rate": 1.5133354445054923e-06, "loss": 3.128, "step": 59257 }, { "epoch": 2.9, "grad_norm": 0.7548781037330627, "learning_rate": 1.5117913413285388e-06, "loss": 2.8991, "step": 59258 }, { "epoch": 2.9, "grad_norm": 0.769331157207489, "learning_rate": 1.5102480243120885e-06, "loss": 3.0113, "step": 59259 }, { "epoch": 2.9, "grad_norm": 0.772138237953186, "learning_rate": 1.5087054934601716e-06, "loss": 2.8884, "step": 59260 }, { "epoch": 2.9, "grad_norm": 0.7810102105140686, "learning_rate": 1.507163748776885e-06, "loss": 2.8167, "step": 59261 }, { "epoch": 2.9, "grad_norm": 0.7660080194473267, "learning_rate": 1.505622790266292e-06, "loss": 2.8325, "step": 59262 }, { "epoch": 2.9, "grad_norm": 0.7257230281829834, "learning_rate": 1.504082617932456e-06, "loss": 3.0289, "step": 59263 }, { "epoch": 2.9, "grad_norm": 0.7466867566108704, "learning_rate": 1.5025432317794073e-06, "loss": 2.8704, "step": 59264 }, { "epoch": 2.9, "grad_norm": 0.7224690914154053, "learning_rate": 1.501004631811209e-06, "loss": 2.7124, "step": 59265 }, { "epoch": 2.9, "grad_norm": 0.7552944421768188, "learning_rate": 1.4994668180319248e-06, "loss": 3.1567, "step": 59266 }, { "epoch": 2.9, "grad_norm": 0.8041907548904419, "learning_rate": 1.4979297904455844e-06, "loss": 2.9041, "step": 59267 }, { "epoch": 2.9, "grad_norm": 0.7628178596496582, "learning_rate": 1.4963935490562516e-06, "loss": 2.8904, "step": 59268 }, { "epoch": 2.9, "grad_norm": 0.8382160663604736, "learning_rate": 1.49485809386799e-06, "loss": 2.7538, "step": 59269 }, { "epoch": 2.9, "grad_norm": 0.7363545298576355, "learning_rate": 1.4933234248847958e-06, "loss": 2.8377, "step": 59270 }, { "epoch": 2.9, "grad_norm": 0.7144303321838379, "learning_rate": 1.491789542110766e-06, "loss": 2.7339, "step": 59271 }, { "epoch": 2.9, "grad_norm": 0.8086472749710083, "learning_rate": 1.4902564455498978e-06, "loss": 2.9529, "step": 59272 }, { "epoch": 2.9, "grad_norm": 0.7652781009674072, "learning_rate": 1.488724135206254e-06, "loss": 2.8901, "step": 59273 }, { "epoch": 2.9, "grad_norm": 0.7442313432693481, "learning_rate": 1.487192611083865e-06, "loss": 3.0013, "step": 59274 }, { "epoch": 2.9, "grad_norm": 0.7640436887741089, "learning_rate": 1.4856618731867277e-06, "loss": 2.8635, "step": 59275 }, { "epoch": 2.9, "grad_norm": 0.7747520208358765, "learning_rate": 1.4841319215189385e-06, "loss": 2.9427, "step": 59276 }, { "epoch": 2.91, "grad_norm": 0.7614535689353943, "learning_rate": 1.482602756084461e-06, "loss": 2.713, "step": 59277 }, { "epoch": 2.91, "grad_norm": 0.7723492980003357, "learning_rate": 1.4810743768873923e-06, "loss": 2.9073, "step": 59278 }, { "epoch": 2.91, "grad_norm": 0.7300555109977722, "learning_rate": 1.4795467839317288e-06, "loss": 2.9138, "step": 59279 }, { "epoch": 2.91, "grad_norm": 0.7472530603408813, "learning_rate": 1.4780199772214674e-06, "loss": 2.831, "step": 59280 }, { "epoch": 2.91, "grad_norm": 0.7535228729248047, "learning_rate": 1.4764939567606382e-06, "loss": 2.9556, "step": 59281 }, { "epoch": 2.91, "grad_norm": 0.7726250886917114, "learning_rate": 1.4749687225533047e-06, "loss": 2.7113, "step": 59282 }, { "epoch": 2.91, "grad_norm": 0.7266640663146973, "learning_rate": 1.4734442746034303e-06, "loss": 2.8155, "step": 59283 }, { "epoch": 2.91, "grad_norm": 0.7508488297462463, "learning_rate": 1.471920612915045e-06, "loss": 2.8604, "step": 59284 }, { "epoch": 2.91, "grad_norm": 0.777144193649292, "learning_rate": 1.4703977374921794e-06, "loss": 2.6361, "step": 59285 }, { "epoch": 2.91, "grad_norm": 0.7734659910202026, "learning_rate": 1.4688756483388297e-06, "loss": 2.8084, "step": 59286 }, { "epoch": 2.91, "grad_norm": 0.7497792840003967, "learning_rate": 1.4673543454590264e-06, "loss": 2.8345, "step": 59287 }, { "epoch": 2.91, "grad_norm": 0.753085732460022, "learning_rate": 1.4658338288567329e-06, "loss": 2.9196, "step": 59288 }, { "epoch": 2.91, "grad_norm": 0.7959982752799988, "learning_rate": 1.4643140985359792e-06, "loss": 2.8826, "step": 59289 }, { "epoch": 2.91, "grad_norm": 0.7402199506759644, "learning_rate": 1.462795154500762e-06, "loss": 2.8387, "step": 59290 }, { "epoch": 2.91, "grad_norm": 0.7226002216339111, "learning_rate": 1.461276996755112e-06, "loss": 3.0343, "step": 59291 }, { "epoch": 2.91, "grad_norm": 0.7589437365531921, "learning_rate": 1.459759625302992e-06, "loss": 2.9562, "step": 59292 }, { "epoch": 2.91, "grad_norm": 0.7250106930732727, "learning_rate": 1.458243040148399e-06, "loss": 2.8674, "step": 59293 }, { "epoch": 2.91, "grad_norm": 0.7297521233558655, "learning_rate": 1.4567272412953633e-06, "loss": 3.1333, "step": 59294 }, { "epoch": 2.91, "grad_norm": 0.729756236076355, "learning_rate": 1.4552122287478485e-06, "loss": 2.7591, "step": 59295 }, { "epoch": 2.91, "grad_norm": 0.7715573906898499, "learning_rate": 1.4536980025098178e-06, "loss": 3.0325, "step": 59296 }, { "epoch": 2.91, "grad_norm": 0.7817413806915283, "learning_rate": 1.4521845625853347e-06, "loss": 3.0629, "step": 59297 }, { "epoch": 2.91, "grad_norm": 0.7538647651672363, "learning_rate": 1.4506719089782958e-06, "loss": 2.9156, "step": 59298 }, { "epoch": 2.91, "grad_norm": 0.7621957063674927, "learning_rate": 1.4491600416927652e-06, "loss": 3.0193, "step": 59299 }, { "epoch": 2.91, "grad_norm": 0.7765817642211914, "learning_rate": 1.4476489607326725e-06, "loss": 3.0129, "step": 59300 }, { "epoch": 2.91, "grad_norm": 0.7483084797859192, "learning_rate": 1.4461386661020146e-06, "loss": 2.8993, "step": 59301 }, { "epoch": 2.91, "grad_norm": 0.7720745801925659, "learning_rate": 1.4446291578047886e-06, "loss": 3.0589, "step": 59302 }, { "epoch": 2.91, "grad_norm": 0.7810670137405396, "learning_rate": 1.4431204358449244e-06, "loss": 3.0455, "step": 59303 }, { "epoch": 2.91, "grad_norm": 0.7453815937042236, "learning_rate": 1.441612500226419e-06, "loss": 2.8832, "step": 59304 }, { "epoch": 2.91, "grad_norm": 0.7267022132873535, "learning_rate": 1.440105350953269e-06, "loss": 2.8214, "step": 59305 }, { "epoch": 2.91, "grad_norm": 0.7542246580123901, "learning_rate": 1.438598988029438e-06, "loss": 2.7414, "step": 59306 }, { "epoch": 2.91, "grad_norm": 0.7183325290679932, "learning_rate": 1.4370934114588228e-06, "loss": 2.7841, "step": 59307 }, { "epoch": 2.91, "grad_norm": 0.9062812924385071, "learning_rate": 1.435588621245487e-06, "loss": 2.7707, "step": 59308 }, { "epoch": 2.91, "grad_norm": 0.7738885283470154, "learning_rate": 1.4340846173933606e-06, "loss": 2.9159, "step": 59309 }, { "epoch": 2.91, "grad_norm": 0.7519917488098145, "learning_rate": 1.432581399906374e-06, "loss": 3.0687, "step": 59310 }, { "epoch": 2.91, "grad_norm": 0.7735452055931091, "learning_rate": 1.4310789687885238e-06, "loss": 2.6822, "step": 59311 }, { "epoch": 2.91, "grad_norm": 0.767966091632843, "learning_rate": 1.4295773240437068e-06, "loss": 2.9608, "step": 59312 }, { "epoch": 2.91, "grad_norm": 0.7551448941230774, "learning_rate": 1.4280764656759535e-06, "loss": 2.9766, "step": 59313 }, { "epoch": 2.91, "grad_norm": 0.785728394985199, "learning_rate": 1.426576393689194e-06, "loss": 3.004, "step": 59314 }, { "epoch": 2.91, "grad_norm": 0.7843324542045593, "learning_rate": 1.425077108087358e-06, "loss": 2.6908, "step": 59315 }, { "epoch": 2.91, "grad_norm": 0.717927098274231, "learning_rate": 1.4235786088744094e-06, "loss": 3.0576, "step": 59316 }, { "epoch": 2.91, "grad_norm": 0.7239921689033508, "learning_rate": 1.4220808960542784e-06, "loss": 3.0828, "step": 59317 }, { "epoch": 2.91, "grad_norm": 0.7748315334320068, "learning_rate": 1.4205839696309618e-06, "loss": 2.8963, "step": 59318 }, { "epoch": 2.91, "grad_norm": 0.7714270353317261, "learning_rate": 1.419087829608323e-06, "loss": 2.9912, "step": 59319 }, { "epoch": 2.91, "grad_norm": 0.7787078619003296, "learning_rate": 1.4175924759903589e-06, "loss": 2.7653, "step": 59320 }, { "epoch": 2.91, "grad_norm": 0.7504279613494873, "learning_rate": 1.4160979087809999e-06, "loss": 2.7674, "step": 59321 }, { "epoch": 2.91, "grad_norm": 0.7364410161972046, "learning_rate": 1.4146041279841759e-06, "loss": 3.0725, "step": 59322 }, { "epoch": 2.91, "grad_norm": 0.757007896900177, "learning_rate": 1.413111133603817e-06, "loss": 2.8924, "step": 59323 }, { "epoch": 2.91, "grad_norm": 0.7655906677246094, "learning_rate": 1.4116189256438538e-06, "loss": 2.8821, "step": 59324 }, { "epoch": 2.91, "grad_norm": 0.7602041363716125, "learning_rate": 1.410127504108216e-06, "loss": 2.6149, "step": 59325 }, { "epoch": 2.91, "grad_norm": 0.7541115283966064, "learning_rate": 1.4086368690008675e-06, "loss": 2.7603, "step": 59326 }, { "epoch": 2.91, "grad_norm": 0.7562124133110046, "learning_rate": 1.4071470203256718e-06, "loss": 2.9612, "step": 59327 }, { "epoch": 2.91, "grad_norm": 0.7242172956466675, "learning_rate": 1.4056579580865923e-06, "loss": 2.708, "step": 59328 }, { "epoch": 2.91, "grad_norm": 0.7529643774032593, "learning_rate": 1.4041696822875592e-06, "loss": 2.7851, "step": 59329 }, { "epoch": 2.91, "grad_norm": 0.6918560266494751, "learning_rate": 1.4026821929324695e-06, "loss": 2.9248, "step": 59330 }, { "epoch": 2.91, "grad_norm": 0.7533132433891296, "learning_rate": 1.4011954900252532e-06, "loss": 2.9044, "step": 59331 }, { "epoch": 2.91, "grad_norm": 0.7325230240821838, "learning_rate": 1.3997095735698071e-06, "loss": 2.7677, "step": 59332 }, { "epoch": 2.91, "grad_norm": 0.6999819278717041, "learning_rate": 1.3982244435700617e-06, "loss": 2.7991, "step": 59333 }, { "epoch": 2.91, "grad_norm": 0.7240120768547058, "learning_rate": 1.3967401000299472e-06, "loss": 2.9905, "step": 59334 }, { "epoch": 2.91, "grad_norm": 0.7363443374633789, "learning_rate": 1.3952565429533268e-06, "loss": 2.7389, "step": 59335 }, { "epoch": 2.91, "grad_norm": 0.7331762909889221, "learning_rate": 1.3937737723441644e-06, "loss": 2.6583, "step": 59336 }, { "epoch": 2.91, "grad_norm": 0.7030410766601562, "learning_rate": 1.39229178820629e-06, "loss": 2.8128, "step": 59337 }, { "epoch": 2.91, "grad_norm": 0.7303512692451477, "learning_rate": 1.3908105905436672e-06, "loss": 2.8716, "step": 59338 }, { "epoch": 2.91, "grad_norm": 0.7628925442695618, "learning_rate": 1.389330179360193e-06, "loss": 2.9677, "step": 59339 }, { "epoch": 2.91, "grad_norm": 0.7441055178642273, "learning_rate": 1.387850554659764e-06, "loss": 2.8874, "step": 59340 }, { "epoch": 2.91, "grad_norm": 0.7332181334495544, "learning_rate": 1.386371716446244e-06, "loss": 2.8901, "step": 59341 }, { "epoch": 2.91, "grad_norm": 0.7477689385414124, "learning_rate": 1.384893664723563e-06, "loss": 2.9624, "step": 59342 }, { "epoch": 2.91, "grad_norm": 0.7767438888549805, "learning_rate": 1.3834163994955848e-06, "loss": 2.825, "step": 59343 }, { "epoch": 2.91, "grad_norm": 0.7393341660499573, "learning_rate": 1.3819399207662396e-06, "loss": 2.8816, "step": 59344 }, { "epoch": 2.91, "grad_norm": 0.7812148928642273, "learning_rate": 1.3804642285393908e-06, "loss": 2.8379, "step": 59345 }, { "epoch": 2.91, "grad_norm": 0.7248178124427795, "learning_rate": 1.3789893228189352e-06, "loss": 3.0399, "step": 59346 }, { "epoch": 2.91, "grad_norm": 0.7927574515342712, "learning_rate": 1.3775152036087367e-06, "loss": 3.02, "step": 59347 }, { "epoch": 2.91, "grad_norm": 0.7457157969474792, "learning_rate": 1.3760418709126919e-06, "loss": 3.0966, "step": 59348 }, { "epoch": 2.91, "grad_norm": 0.6921155452728271, "learning_rate": 1.3745693247346978e-06, "loss": 2.9358, "step": 59349 }, { "epoch": 2.91, "grad_norm": 0.7026863098144531, "learning_rate": 1.3730975650785847e-06, "loss": 2.6633, "step": 59350 }, { "epoch": 2.91, "grad_norm": 0.7563304901123047, "learning_rate": 1.3716265919482827e-06, "loss": 2.8672, "step": 59351 }, { "epoch": 2.91, "grad_norm": 0.7299355268478394, "learning_rate": 1.370156405347622e-06, "loss": 2.9305, "step": 59352 }, { "epoch": 2.91, "grad_norm": 0.7180936336517334, "learning_rate": 1.3686870052805332e-06, "loss": 3.0106, "step": 59353 }, { "epoch": 2.91, "grad_norm": 0.8197305798530579, "learning_rate": 1.3672183917508461e-06, "loss": 2.9598, "step": 59354 }, { "epoch": 2.91, "grad_norm": 0.7417368292808533, "learning_rate": 1.3657505647624245e-06, "loss": 2.879, "step": 59355 }, { "epoch": 2.91, "grad_norm": 0.7476125955581665, "learning_rate": 1.364283524319132e-06, "loss": 2.804, "step": 59356 }, { "epoch": 2.91, "grad_norm": 0.7566918134689331, "learning_rate": 1.362817270424832e-06, "loss": 2.9898, "step": 59357 }, { "epoch": 2.91, "grad_norm": 0.7430882453918457, "learning_rate": 1.3613518030834215e-06, "loss": 2.7312, "step": 59358 }, { "epoch": 2.91, "grad_norm": 0.7406376004219055, "learning_rate": 1.3598871222987307e-06, "loss": 2.9305, "step": 59359 }, { "epoch": 2.91, "grad_norm": 0.7154626846313477, "learning_rate": 1.3584232280746231e-06, "loss": 2.8426, "step": 59360 }, { "epoch": 2.91, "grad_norm": 0.7587353587150574, "learning_rate": 1.3569601204149628e-06, "loss": 2.8959, "step": 59361 }, { "epoch": 2.91, "grad_norm": 0.7413969039916992, "learning_rate": 1.3554977993235794e-06, "loss": 2.8985, "step": 59362 }, { "epoch": 2.91, "grad_norm": 0.6970677971839905, "learning_rate": 1.354036264804337e-06, "loss": 2.7983, "step": 59363 }, { "epoch": 2.91, "grad_norm": 0.7779316902160645, "learning_rate": 1.3525755168610985e-06, "loss": 2.8532, "step": 59364 }, { "epoch": 2.91, "grad_norm": 0.7617775201797485, "learning_rate": 1.3511155554976948e-06, "loss": 2.9153, "step": 59365 }, { "epoch": 2.91, "grad_norm": 0.7746046781539917, "learning_rate": 1.3496563807179895e-06, "loss": 3.0756, "step": 59366 }, { "epoch": 2.91, "grad_norm": 0.7900286316871643, "learning_rate": 1.3481979925257792e-06, "loss": 2.6014, "step": 59367 }, { "epoch": 2.91, "grad_norm": 0.7511681914329529, "learning_rate": 1.3467403909249608e-06, "loss": 2.8521, "step": 59368 }, { "epoch": 2.91, "grad_norm": 0.7791048884391785, "learning_rate": 1.3452835759193648e-06, "loss": 2.9309, "step": 59369 }, { "epoch": 2.91, "grad_norm": 0.7215959429740906, "learning_rate": 1.343827547512788e-06, "loss": 3.0088, "step": 59370 }, { "epoch": 2.91, "grad_norm": 0.7659755349159241, "learning_rate": 1.3423723057091274e-06, "loss": 2.7044, "step": 59371 }, { "epoch": 2.91, "grad_norm": 0.7261947393417358, "learning_rate": 1.3409178505121465e-06, "loss": 2.7411, "step": 59372 }, { "epoch": 2.91, "grad_norm": 0.7190525531768799, "learning_rate": 1.339464181925709e-06, "loss": 2.938, "step": 59373 }, { "epoch": 2.91, "grad_norm": 0.7865150570869446, "learning_rate": 1.3380112999536786e-06, "loss": 3.0685, "step": 59374 }, { "epoch": 2.91, "grad_norm": 0.7611143589019775, "learning_rate": 1.3365592045998186e-06, "loss": 2.7982, "step": 59375 }, { "epoch": 2.91, "grad_norm": 0.758155107498169, "learning_rate": 1.3351078958679929e-06, "loss": 2.8102, "step": 59376 }, { "epoch": 2.91, "grad_norm": 0.7684991955757141, "learning_rate": 1.3336573737619981e-06, "loss": 2.8202, "step": 59377 }, { "epoch": 2.91, "grad_norm": 0.72589510679245, "learning_rate": 1.3322076382856984e-06, "loss": 2.9137, "step": 59378 }, { "epoch": 2.91, "grad_norm": 0.7442227005958557, "learning_rate": 1.3307586894428568e-06, "loss": 2.8947, "step": 59379 }, { "epoch": 2.91, "grad_norm": 0.8001748919487, "learning_rate": 1.3293105272373372e-06, "loss": 2.8781, "step": 59380 }, { "epoch": 2.91, "grad_norm": 0.77564537525177, "learning_rate": 1.3278631516729366e-06, "loss": 2.9313, "step": 59381 }, { "epoch": 2.91, "grad_norm": 0.7333300709724426, "learning_rate": 1.3264165627534518e-06, "loss": 2.889, "step": 59382 }, { "epoch": 2.91, "grad_norm": 0.7711548805236816, "learning_rate": 1.3249707604827131e-06, "loss": 2.8388, "step": 59383 }, { "epoch": 2.91, "grad_norm": 0.7647919058799744, "learning_rate": 1.3235257448644843e-06, "loss": 2.7657, "step": 59384 }, { "epoch": 2.91, "grad_norm": 0.7608219981193542, "learning_rate": 1.3220815159026288e-06, "loss": 2.745, "step": 59385 }, { "epoch": 2.91, "grad_norm": 0.7038655877113342, "learning_rate": 1.3206380736009437e-06, "loss": 2.6167, "step": 59386 }, { "epoch": 2.91, "grad_norm": 0.7791205048561096, "learning_rate": 1.3191954179632258e-06, "loss": 3.025, "step": 59387 }, { "epoch": 2.91, "grad_norm": 0.7197181582450867, "learning_rate": 1.3177535489932388e-06, "loss": 3.1071, "step": 59388 }, { "epoch": 2.91, "grad_norm": 0.7548245191574097, "learning_rate": 1.3163124666948132e-06, "loss": 2.7108, "step": 59389 }, { "epoch": 2.91, "grad_norm": 0.7533859014511108, "learning_rate": 1.3148721710717459e-06, "loss": 3.086, "step": 59390 }, { "epoch": 2.91, "grad_norm": 0.7900393009185791, "learning_rate": 1.3134326621278002e-06, "loss": 2.8453, "step": 59391 }, { "epoch": 2.91, "grad_norm": 0.7366978526115417, "learning_rate": 1.3119939398668068e-06, "loss": 3.0731, "step": 59392 }, { "epoch": 2.91, "grad_norm": 0.7741217613220215, "learning_rate": 1.3105560042925623e-06, "loss": 2.9944, "step": 59393 }, { "epoch": 2.91, "grad_norm": 0.7335111498832703, "learning_rate": 1.3091188554087971e-06, "loss": 2.7327, "step": 59394 }, { "epoch": 2.91, "grad_norm": 0.7100944519042969, "learning_rate": 1.3076824932193086e-06, "loss": 2.8545, "step": 59395 }, { "epoch": 2.91, "grad_norm": 0.7313215136528015, "learning_rate": 1.3062469177279267e-06, "loss": 2.8571, "step": 59396 }, { "epoch": 2.91, "grad_norm": 0.7661868929862976, "learning_rate": 1.304812128938415e-06, "loss": 3.0734, "step": 59397 }, { "epoch": 2.91, "grad_norm": 0.7975308895111084, "learning_rate": 1.3033781268545374e-06, "loss": 2.8728, "step": 59398 }, { "epoch": 2.91, "grad_norm": 0.7336994409561157, "learning_rate": 1.3019449114800573e-06, "loss": 3.1197, "step": 59399 }, { "epoch": 2.91, "grad_norm": 0.7279320359230042, "learning_rate": 1.3005124828188052e-06, "loss": 2.7903, "step": 59400 }, { "epoch": 2.91, "grad_norm": 0.7265657186508179, "learning_rate": 1.299080840874478e-06, "loss": 3.0084, "step": 59401 }, { "epoch": 2.91, "grad_norm": 0.7682440876960754, "learning_rate": 1.2976499856509059e-06, "loss": 2.8712, "step": 59402 }, { "epoch": 2.91, "grad_norm": 0.7393474578857422, "learning_rate": 1.2962199171518528e-06, "loss": 2.8719, "step": 59403 }, { "epoch": 2.91, "grad_norm": 0.7249324917793274, "learning_rate": 1.2947906353810489e-06, "loss": 2.9401, "step": 59404 }, { "epoch": 2.91, "grad_norm": 0.7434907555580139, "learning_rate": 1.293362140342258e-06, "loss": 2.9189, "step": 59405 }, { "epoch": 2.91, "grad_norm": 0.766069233417511, "learning_rate": 1.29193443203931e-06, "loss": 3.0494, "step": 59406 }, { "epoch": 2.91, "grad_norm": 0.7799139618873596, "learning_rate": 1.2905075104759022e-06, "loss": 2.7501, "step": 59407 }, { "epoch": 2.91, "grad_norm": 0.7259254455566406, "learning_rate": 1.2890813756557983e-06, "loss": 2.9274, "step": 59408 }, { "epoch": 2.91, "grad_norm": 0.8382498621940613, "learning_rate": 1.2876560275827619e-06, "loss": 3.0196, "step": 59409 }, { "epoch": 2.91, "grad_norm": 0.7565951943397522, "learning_rate": 1.2862314662605565e-06, "loss": 3.0605, "step": 59410 }, { "epoch": 2.91, "grad_norm": 0.7980879545211792, "learning_rate": 1.2848076916929462e-06, "loss": 2.7056, "step": 59411 }, { "epoch": 2.91, "grad_norm": 0.8130079507827759, "learning_rate": 1.283384703883661e-06, "loss": 2.6706, "step": 59412 }, { "epoch": 2.91, "grad_norm": 0.7338296175003052, "learning_rate": 1.2819625028364312e-06, "loss": 2.8004, "step": 59413 }, { "epoch": 2.91, "grad_norm": 0.7697415351867676, "learning_rate": 1.2805410885550205e-06, "loss": 2.8974, "step": 59414 }, { "epoch": 2.91, "grad_norm": 0.7453495860099792, "learning_rate": 1.2791204610431927e-06, "loss": 3.0127, "step": 59415 }, { "epoch": 2.91, "grad_norm": 0.7462559342384338, "learning_rate": 1.277700620304678e-06, "loss": 2.7122, "step": 59416 }, { "epoch": 2.91, "grad_norm": 0.7518516778945923, "learning_rate": 1.2762815663431735e-06, "loss": 2.7392, "step": 59417 }, { "epoch": 2.91, "grad_norm": 0.7591348886489868, "learning_rate": 1.2748632991625096e-06, "loss": 3.0127, "step": 59418 }, { "epoch": 2.91, "grad_norm": 0.7799931764602661, "learning_rate": 1.2734458187663166e-06, "loss": 2.912, "step": 59419 }, { "epoch": 2.91, "grad_norm": 0.7421641945838928, "learning_rate": 1.2720291251583913e-06, "loss": 2.6909, "step": 59420 }, { "epoch": 2.91, "grad_norm": 0.7436515092849731, "learning_rate": 1.2706132183424644e-06, "loss": 2.7775, "step": 59421 }, { "epoch": 2.91, "grad_norm": 0.7163355350494385, "learning_rate": 1.2691980983222327e-06, "loss": 2.8094, "step": 59422 }, { "epoch": 2.91, "grad_norm": 0.7329570651054382, "learning_rate": 1.26778376510146e-06, "loss": 2.9062, "step": 59423 }, { "epoch": 2.91, "grad_norm": 0.8021432161331177, "learning_rate": 1.26637021868381e-06, "loss": 3.0601, "step": 59424 }, { "epoch": 2.91, "grad_norm": 0.7619763016700745, "learning_rate": 1.2649574590730794e-06, "loss": 2.9556, "step": 59425 }, { "epoch": 2.91, "grad_norm": 0.7866322994232178, "learning_rate": 1.2635454862729656e-06, "loss": 2.7857, "step": 59426 }, { "epoch": 2.91, "grad_norm": 0.7534366250038147, "learning_rate": 1.2621343002871654e-06, "loss": 2.8646, "step": 59427 }, { "epoch": 2.91, "grad_norm": 0.7509728074073792, "learning_rate": 1.2607239011194092e-06, "loss": 2.8734, "step": 59428 }, { "epoch": 2.91, "grad_norm": 0.7655785083770752, "learning_rate": 1.2593142887734276e-06, "loss": 2.8618, "step": 59429 }, { "epoch": 2.91, "grad_norm": 0.759252667427063, "learning_rate": 1.2579054632529173e-06, "loss": 3.0101, "step": 59430 }, { "epoch": 2.91, "grad_norm": 0.7218188047409058, "learning_rate": 1.2564974245615756e-06, "loss": 2.8201, "step": 59431 }, { "epoch": 2.91, "grad_norm": 0.7249210476875305, "learning_rate": 1.2550901727031326e-06, "loss": 2.7672, "step": 59432 }, { "epoch": 2.91, "grad_norm": 0.7824299335479736, "learning_rate": 1.2536837076812856e-06, "loss": 2.8039, "step": 59433 }, { "epoch": 2.91, "grad_norm": 0.731982946395874, "learning_rate": 1.2522780294997315e-06, "loss": 2.997, "step": 59434 }, { "epoch": 2.91, "grad_norm": 0.7653811573982239, "learning_rate": 1.2508731381622006e-06, "loss": 3.0187, "step": 59435 }, { "epoch": 2.91, "grad_norm": 0.7498577833175659, "learning_rate": 1.2494690336723568e-06, "loss": 2.7194, "step": 59436 }, { "epoch": 2.91, "grad_norm": 0.7105596661567688, "learning_rate": 1.2480657160339303e-06, "loss": 2.8219, "step": 59437 }, { "epoch": 2.91, "grad_norm": 0.7923392653465271, "learning_rate": 1.2466631852506182e-06, "loss": 2.8113, "step": 59438 }, { "epoch": 2.91, "grad_norm": 0.725375235080719, "learning_rate": 1.2452614413260509e-06, "loss": 2.9244, "step": 59439 }, { "epoch": 2.91, "grad_norm": 0.7582253217697144, "learning_rate": 1.2438604842640253e-06, "loss": 2.7547, "step": 59440 }, { "epoch": 2.91, "grad_norm": 0.7434356808662415, "learning_rate": 1.242460314068139e-06, "loss": 2.8055, "step": 59441 }, { "epoch": 2.91, "grad_norm": 0.7622381448745728, "learning_rate": 1.2410609307421216e-06, "loss": 2.8985, "step": 59442 }, { "epoch": 2.91, "grad_norm": 0.8195493817329407, "learning_rate": 1.2396623342896373e-06, "loss": 2.8076, "step": 59443 }, { "epoch": 2.91, "grad_norm": 0.7499621510505676, "learning_rate": 1.2382645247144163e-06, "loss": 2.7527, "step": 59444 }, { "epoch": 2.91, "grad_norm": 0.7483765482902527, "learning_rate": 1.2368675020200891e-06, "loss": 2.9304, "step": 59445 }, { "epoch": 2.91, "grad_norm": 0.7556241154670715, "learning_rate": 1.2354712662103528e-06, "loss": 2.9852, "step": 59446 }, { "epoch": 2.91, "grad_norm": 0.7922104597091675, "learning_rate": 1.2340758172889043e-06, "loss": 2.9386, "step": 59447 }, { "epoch": 2.91, "grad_norm": 0.7229467034339905, "learning_rate": 1.232681155259374e-06, "loss": 2.897, "step": 59448 }, { "epoch": 2.91, "grad_norm": 0.7952060699462891, "learning_rate": 1.2312872801254926e-06, "loss": 3.0477, "step": 59449 }, { "epoch": 2.91, "grad_norm": 0.8594965934753418, "learning_rate": 1.2298941918908566e-06, "loss": 2.9014, "step": 59450 }, { "epoch": 2.91, "grad_norm": 0.7167303562164307, "learning_rate": 1.2285018905592303e-06, "loss": 2.8262, "step": 59451 }, { "epoch": 2.91, "grad_norm": 0.7530221343040466, "learning_rate": 1.2271103761342105e-06, "loss": 2.9817, "step": 59452 }, { "epoch": 2.91, "grad_norm": 0.7742487192153931, "learning_rate": 1.225719648619461e-06, "loss": 2.8349, "step": 59453 }, { "epoch": 2.91, "grad_norm": 0.7406141757965088, "learning_rate": 1.2243297080186786e-06, "loss": 2.9665, "step": 59454 }, { "epoch": 2.91, "grad_norm": 0.7695170044898987, "learning_rate": 1.2229405543355274e-06, "loss": 2.9037, "step": 59455 }, { "epoch": 2.91, "grad_norm": 0.7280238270759583, "learning_rate": 1.221552187573671e-06, "loss": 2.9269, "step": 59456 }, { "epoch": 2.91, "grad_norm": 0.730404257774353, "learning_rate": 1.2201646077367067e-06, "loss": 2.8521, "step": 59457 }, { "epoch": 2.91, "grad_norm": 0.7469308376312256, "learning_rate": 1.2187778148283312e-06, "loss": 2.7984, "step": 59458 }, { "epoch": 2.91, "grad_norm": 0.7723640203475952, "learning_rate": 1.217391808852175e-06, "loss": 2.9174, "step": 59459 }, { "epoch": 2.91, "grad_norm": 0.8354815244674683, "learning_rate": 1.2160065898119352e-06, "loss": 2.7031, "step": 59460 }, { "epoch": 2.91, "grad_norm": 0.7310277819633484, "learning_rate": 1.2146221577112426e-06, "loss": 2.8573, "step": 59461 }, { "epoch": 2.91, "grad_norm": 0.761574923992157, "learning_rate": 1.2132385125537269e-06, "loss": 2.947, "step": 59462 }, { "epoch": 2.91, "grad_norm": 0.8336168527603149, "learning_rate": 1.2118556543430192e-06, "loss": 2.9162, "step": 59463 }, { "epoch": 2.91, "grad_norm": 0.7921255230903625, "learning_rate": 1.2104735830827827e-06, "loss": 2.8752, "step": 59464 }, { "epoch": 2.91, "grad_norm": 0.7641667723655701, "learning_rate": 1.2090922987766816e-06, "loss": 2.8504, "step": 59465 }, { "epoch": 2.91, "grad_norm": 0.7452090382575989, "learning_rate": 1.2077118014282794e-06, "loss": 2.8647, "step": 59466 }, { "epoch": 2.91, "grad_norm": 0.751675546169281, "learning_rate": 1.2063320910413067e-06, "loss": 2.8153, "step": 59467 }, { "epoch": 2.91, "grad_norm": 0.7424731254577637, "learning_rate": 1.204953167619327e-06, "loss": 3.0888, "step": 59468 }, { "epoch": 2.91, "grad_norm": 0.8056619167327881, "learning_rate": 1.2035750311660042e-06, "loss": 2.851, "step": 59469 }, { "epoch": 2.91, "grad_norm": 0.8469576239585876, "learning_rate": 1.2021976816849687e-06, "loss": 2.8366, "step": 59470 }, { "epoch": 2.91, "grad_norm": 0.7873582243919373, "learning_rate": 1.2008211191798177e-06, "loss": 2.9648, "step": 59471 }, { "epoch": 2.91, "grad_norm": 0.7305603623390198, "learning_rate": 1.1994453436542152e-06, "loss": 2.845, "step": 59472 }, { "epoch": 2.91, "grad_norm": 0.7844927906990051, "learning_rate": 1.198070355111791e-06, "loss": 2.9788, "step": 59473 }, { "epoch": 2.91, "grad_norm": 0.7984238862991333, "learning_rate": 1.1966961535561094e-06, "loss": 2.9127, "step": 59474 }, { "epoch": 2.91, "grad_norm": 0.7224608659744263, "learning_rate": 1.1953227389908339e-06, "loss": 2.9176, "step": 59475 }, { "epoch": 2.91, "grad_norm": 0.7413891553878784, "learning_rate": 1.1939501114195616e-06, "loss": 2.6806, "step": 59476 }, { "epoch": 2.91, "grad_norm": 0.8167316317558289, "learning_rate": 1.192578270845923e-06, "loss": 3.0055, "step": 59477 }, { "epoch": 2.91, "grad_norm": 0.7244284749031067, "learning_rate": 1.1912072172735488e-06, "loss": 2.8573, "step": 59478 }, { "epoch": 2.91, "grad_norm": 0.7515481114387512, "learning_rate": 1.1898369507060024e-06, "loss": 2.9796, "step": 59479 }, { "epoch": 2.91, "grad_norm": 0.7766403555870056, "learning_rate": 1.1884674711469477e-06, "loss": 2.9782, "step": 59480 }, { "epoch": 2.92, "grad_norm": 0.7261124849319458, "learning_rate": 1.1870987785999154e-06, "loss": 2.7586, "step": 59481 }, { "epoch": 2.92, "grad_norm": 0.7415921092033386, "learning_rate": 1.1857308730685688e-06, "loss": 3.0122, "step": 59482 }, { "epoch": 2.92, "grad_norm": 0.7948103547096252, "learning_rate": 1.1843637545565388e-06, "loss": 3.0193, "step": 59483 }, { "epoch": 2.92, "grad_norm": 0.7435404062271118, "learning_rate": 1.1829974230673557e-06, "loss": 2.734, "step": 59484 }, { "epoch": 2.92, "grad_norm": 0.7635899186134338, "learning_rate": 1.1816318786046497e-06, "loss": 2.918, "step": 59485 }, { "epoch": 2.92, "grad_norm": 0.7679666876792908, "learning_rate": 1.1802671211720182e-06, "loss": 2.9842, "step": 59486 }, { "epoch": 2.92, "grad_norm": 0.7481985092163086, "learning_rate": 1.1789031507730584e-06, "loss": 2.8345, "step": 59487 }, { "epoch": 2.92, "grad_norm": 0.7324191927909851, "learning_rate": 1.1775399674113672e-06, "loss": 2.9773, "step": 59488 }, { "epoch": 2.92, "grad_norm": 0.7569427490234375, "learning_rate": 1.1761775710905086e-06, "loss": 2.8726, "step": 59489 }, { "epoch": 2.92, "grad_norm": 0.7332199811935425, "learning_rate": 1.174815961814113e-06, "loss": 2.9145, "step": 59490 }, { "epoch": 2.92, "grad_norm": 0.7667021155357361, "learning_rate": 1.1734551395857107e-06, "loss": 2.8075, "step": 59491 }, { "epoch": 2.92, "grad_norm": 0.7241703271865845, "learning_rate": 1.1720951044089323e-06, "loss": 2.8421, "step": 59492 }, { "epoch": 2.92, "grad_norm": 0.807370662689209, "learning_rate": 1.1707358562873414e-06, "loss": 2.9485, "step": 59493 }, { "epoch": 2.92, "grad_norm": 0.7350105047225952, "learning_rate": 1.1693773952245355e-06, "loss": 3.1253, "step": 59494 }, { "epoch": 2.92, "grad_norm": 0.8460718989372253, "learning_rate": 1.1680197212240783e-06, "loss": 2.8301, "step": 59495 }, { "epoch": 2.92, "grad_norm": 0.7515153884887695, "learning_rate": 1.1666628342895e-06, "loss": 2.8304, "step": 59496 }, { "epoch": 2.92, "grad_norm": 0.718813955783844, "learning_rate": 1.1653067344244649e-06, "loss": 2.9743, "step": 59497 }, { "epoch": 2.92, "grad_norm": 0.7459069490432739, "learning_rate": 1.163951421632503e-06, "loss": 2.8279, "step": 59498 }, { "epoch": 2.92, "grad_norm": 0.7909800410270691, "learning_rate": 1.1625968959171782e-06, "loss": 3.0892, "step": 59499 }, { "epoch": 2.92, "grad_norm": 0.7388293743133545, "learning_rate": 1.1612431572820547e-06, "loss": 2.8286, "step": 59500 }, { "epoch": 2.92, "grad_norm": 0.7680931091308594, "learning_rate": 1.1598902057306958e-06, "loss": 2.8482, "step": 59501 }, { "epoch": 2.92, "grad_norm": 0.7312542200088501, "learning_rate": 1.1585380412666657e-06, "loss": 2.8631, "step": 59502 }, { "epoch": 2.92, "grad_norm": 0.7777981758117676, "learning_rate": 1.157186663893528e-06, "loss": 2.8585, "step": 59503 }, { "epoch": 2.92, "grad_norm": 0.7600824236869812, "learning_rate": 1.1558360736148797e-06, "loss": 2.6733, "step": 59504 }, { "epoch": 2.92, "grad_norm": 0.7767745852470398, "learning_rate": 1.1544862704342184e-06, "loss": 2.8698, "step": 59505 }, { "epoch": 2.92, "grad_norm": 0.7815971970558167, "learning_rate": 1.1531372543551409e-06, "loss": 2.8926, "step": 59506 }, { "epoch": 2.92, "grad_norm": 0.7735201716423035, "learning_rate": 1.151789025381178e-06, "loss": 2.8795, "step": 59507 }, { "epoch": 2.92, "grad_norm": 0.7491256594657898, "learning_rate": 1.150441583515893e-06, "loss": 2.9923, "step": 59508 }, { "epoch": 2.92, "grad_norm": 0.7839617133140564, "learning_rate": 1.1490949287628503e-06, "loss": 3.0807, "step": 59509 }, { "epoch": 2.92, "grad_norm": 0.7311417460441589, "learning_rate": 1.1477490611255468e-06, "loss": 3.0293, "step": 59510 }, { "epoch": 2.92, "grad_norm": 0.7277370691299438, "learning_rate": 1.1464039806075464e-06, "loss": 2.8279, "step": 59511 }, { "epoch": 2.92, "grad_norm": 0.7706161141395569, "learning_rate": 1.1450596872124463e-06, "loss": 2.918, "step": 59512 }, { "epoch": 2.92, "grad_norm": 0.7380478978157043, "learning_rate": 1.14371618094371e-06, "loss": 2.7686, "step": 59513 }, { "epoch": 2.92, "grad_norm": 0.7525132894515991, "learning_rate": 1.1423734618049352e-06, "loss": 2.6612, "step": 59514 }, { "epoch": 2.92, "grad_norm": 0.7478514909744263, "learning_rate": 1.1410315297996187e-06, "loss": 3.1156, "step": 59515 }, { "epoch": 2.92, "grad_norm": 0.7614080309867859, "learning_rate": 1.139690384931291e-06, "loss": 2.9505, "step": 59516 }, { "epoch": 2.92, "grad_norm": 0.7383436560630798, "learning_rate": 1.1383500272035163e-06, "loss": 2.9673, "step": 59517 }, { "epoch": 2.92, "grad_norm": 0.7430320382118225, "learning_rate": 1.1370104566197913e-06, "loss": 2.9268, "step": 59518 }, { "epoch": 2.92, "grad_norm": 0.7891849279403687, "learning_rate": 1.1356716731836802e-06, "loss": 2.8843, "step": 59519 }, { "epoch": 2.92, "grad_norm": 0.7453582286834717, "learning_rate": 1.1343336768987132e-06, "loss": 2.9038, "step": 59520 }, { "epoch": 2.92, "grad_norm": 0.7106955051422119, "learning_rate": 1.1329964677683878e-06, "loss": 2.5896, "step": 59521 }, { "epoch": 2.92, "grad_norm": 0.807589054107666, "learning_rate": 1.1316600457962011e-06, "loss": 2.8919, "step": 59522 }, { "epoch": 2.92, "grad_norm": 0.7693127989768982, "learning_rate": 1.1303244109857168e-06, "loss": 3.0337, "step": 59523 }, { "epoch": 2.92, "grad_norm": 0.747882068157196, "learning_rate": 1.1289895633404656e-06, "loss": 3.0611, "step": 59524 }, { "epoch": 2.92, "grad_norm": 0.7988489270210266, "learning_rate": 1.127655502863911e-06, "loss": 2.8766, "step": 59525 }, { "epoch": 2.92, "grad_norm": 0.7369797229766846, "learning_rate": 1.1263222295595842e-06, "loss": 2.8861, "step": 59526 }, { "epoch": 2.92, "grad_norm": 0.7736074924468994, "learning_rate": 1.124989743431015e-06, "loss": 2.7278, "step": 59527 }, { "epoch": 2.92, "grad_norm": 0.7230579257011414, "learning_rate": 1.1236580444817345e-06, "loss": 2.8485, "step": 59528 }, { "epoch": 2.92, "grad_norm": 0.7344254851341248, "learning_rate": 1.1223271327151728e-06, "loss": 3.0521, "step": 59529 }, { "epoch": 2.92, "grad_norm": 0.7863304615020752, "learning_rate": 1.1209970081348941e-06, "loss": 2.9299, "step": 59530 }, { "epoch": 2.92, "grad_norm": 0.7944401502609253, "learning_rate": 1.1196676707443953e-06, "loss": 2.8285, "step": 59531 }, { "epoch": 2.92, "grad_norm": 0.7780875563621521, "learning_rate": 1.1183391205471737e-06, "loss": 2.8872, "step": 59532 }, { "epoch": 2.92, "grad_norm": 0.7192845344543457, "learning_rate": 1.1170113575467266e-06, "loss": 2.7766, "step": 59533 }, { "epoch": 2.92, "grad_norm": 0.7729279398918152, "learning_rate": 1.1156843817465178e-06, "loss": 2.7897, "step": 59534 }, { "epoch": 2.92, "grad_norm": 0.7623705267906189, "learning_rate": 1.1143581931501111e-06, "loss": 2.882, "step": 59535 }, { "epoch": 2.92, "grad_norm": 0.7770814895629883, "learning_rate": 1.1130327917609704e-06, "loss": 3.0556, "step": 59536 }, { "epoch": 2.92, "grad_norm": 0.7293316721916199, "learning_rate": 1.1117081775825598e-06, "loss": 2.7412, "step": 59537 }, { "epoch": 2.92, "grad_norm": 0.7572553157806396, "learning_rate": 1.1103843506184096e-06, "loss": 2.7117, "step": 59538 }, { "epoch": 2.92, "grad_norm": 0.7205670475959778, "learning_rate": 1.1090613108719505e-06, "loss": 2.7834, "step": 59539 }, { "epoch": 2.92, "grad_norm": 0.7953411936759949, "learning_rate": 1.1077390583467126e-06, "loss": 3.0259, "step": 59540 }, { "epoch": 2.92, "grad_norm": 0.7684890031814575, "learning_rate": 1.1064175930461604e-06, "loss": 2.7954, "step": 59541 }, { "epoch": 2.92, "grad_norm": 0.7398133873939514, "learning_rate": 1.1050969149738243e-06, "loss": 3.0277, "step": 59542 }, { "epoch": 2.92, "grad_norm": 0.7606955170631409, "learning_rate": 1.1037770241331013e-06, "loss": 2.8503, "step": 59543 }, { "epoch": 2.92, "grad_norm": 0.7371569871902466, "learning_rate": 1.1024579205275218e-06, "loss": 2.629, "step": 59544 }, { "epoch": 2.92, "grad_norm": 0.7682278156280518, "learning_rate": 1.101139604160517e-06, "loss": 3.0296, "step": 59545 }, { "epoch": 2.92, "grad_norm": 0.8034104704856873, "learning_rate": 1.0998220750356169e-06, "loss": 2.9309, "step": 59546 }, { "epoch": 2.92, "grad_norm": 0.7584922909736633, "learning_rate": 1.098505333156252e-06, "loss": 2.854, "step": 59547 }, { "epoch": 2.92, "grad_norm": 0.7490361928939819, "learning_rate": 1.0971893785258868e-06, "loss": 3.0433, "step": 59548 }, { "epoch": 2.92, "grad_norm": 0.7814920544624329, "learning_rate": 1.0958742111480179e-06, "loss": 2.7952, "step": 59549 }, { "epoch": 2.92, "grad_norm": 0.7534535527229309, "learning_rate": 1.094559831026076e-06, "loss": 2.8577, "step": 59550 }, { "epoch": 2.92, "grad_norm": 0.722969114780426, "learning_rate": 1.0932462381635254e-06, "loss": 2.7215, "step": 59551 }, { "epoch": 2.92, "grad_norm": 0.7845919728279114, "learning_rate": 1.0919334325638629e-06, "loss": 2.7127, "step": 59552 }, { "epoch": 2.92, "grad_norm": 0.8240389227867126, "learning_rate": 1.0906214142305191e-06, "loss": 2.735, "step": 59553 }, { "epoch": 2.92, "grad_norm": 0.7913576364517212, "learning_rate": 1.0893101831669249e-06, "loss": 2.9265, "step": 59554 }, { "epoch": 2.92, "grad_norm": 0.716317892074585, "learning_rate": 1.0879997393766105e-06, "loss": 2.9504, "step": 59555 }, { "epoch": 2.92, "grad_norm": 0.7530320882797241, "learning_rate": 1.08669008286294e-06, "loss": 2.8871, "step": 59556 }, { "epoch": 2.92, "grad_norm": 0.7626606822013855, "learning_rate": 1.0853812136294104e-06, "loss": 3.0875, "step": 59557 }, { "epoch": 2.92, "grad_norm": 0.7616795301437378, "learning_rate": 1.084073131679486e-06, "loss": 2.6925, "step": 59558 }, { "epoch": 2.92, "grad_norm": 0.7134222388267517, "learning_rate": 1.0827658370165304e-06, "loss": 2.921, "step": 59559 }, { "epoch": 2.92, "grad_norm": 0.7397261261940002, "learning_rate": 1.0814593296441077e-06, "loss": 2.9264, "step": 59560 }, { "epoch": 2.92, "grad_norm": 0.7546602487564087, "learning_rate": 1.0801536095655482e-06, "loss": 3.096, "step": 59561 }, { "epoch": 2.92, "grad_norm": 0.7439787983894348, "learning_rate": 1.0788486767843497e-06, "loss": 2.8096, "step": 59562 }, { "epoch": 2.92, "grad_norm": 0.7837499380111694, "learning_rate": 1.0775445313039088e-06, "loss": 2.8704, "step": 59563 }, { "epoch": 2.92, "grad_norm": 0.7200571894645691, "learning_rate": 1.0762411731277232e-06, "loss": 2.9517, "step": 59564 }, { "epoch": 2.92, "grad_norm": 0.7449080348014832, "learning_rate": 1.0749386022591899e-06, "loss": 2.9333, "step": 59565 }, { "epoch": 2.92, "grad_norm": 0.7231045365333557, "learning_rate": 1.0736368187017064e-06, "loss": 2.7141, "step": 59566 }, { "epoch": 2.92, "grad_norm": 0.8130992650985718, "learning_rate": 1.0723358224587698e-06, "loss": 2.8928, "step": 59567 }, { "epoch": 2.92, "grad_norm": 0.7753105759620667, "learning_rate": 1.071035613533744e-06, "loss": 2.8285, "step": 59568 }, { "epoch": 2.92, "grad_norm": 0.7972942590713501, "learning_rate": 1.0697361919300928e-06, "loss": 2.8247, "step": 59569 }, { "epoch": 2.92, "grad_norm": 0.710925817489624, "learning_rate": 1.068437557651214e-06, "loss": 2.8817, "step": 59570 }, { "epoch": 2.92, "grad_norm": 0.7548450827598572, "learning_rate": 1.0671397107005708e-06, "loss": 2.7388, "step": 59571 }, { "epoch": 2.92, "grad_norm": 0.7482631802558899, "learning_rate": 1.0658426510815276e-06, "loss": 2.7826, "step": 59572 }, { "epoch": 2.92, "grad_norm": 0.7267739772796631, "learning_rate": 1.064546378797515e-06, "loss": 2.8136, "step": 59573 }, { "epoch": 2.92, "grad_norm": 0.7370942831039429, "learning_rate": 1.0632508938519634e-06, "loss": 2.8792, "step": 59574 }, { "epoch": 2.92, "grad_norm": 0.7242228984832764, "learning_rate": 1.0619561962482703e-06, "loss": 2.8447, "step": 59575 }, { "epoch": 2.92, "grad_norm": 0.7722229361534119, "learning_rate": 1.0606622859898662e-06, "loss": 3.0327, "step": 59576 }, { "epoch": 2.92, "grad_norm": 0.7654498219490051, "learning_rate": 1.0593691630801482e-06, "loss": 2.9213, "step": 59577 }, { "epoch": 2.92, "grad_norm": 0.7603773474693298, "learning_rate": 1.0580768275225139e-06, "loss": 3.1623, "step": 59578 }, { "epoch": 2.92, "grad_norm": 0.7189480662345886, "learning_rate": 1.0567852793203602e-06, "loss": 2.812, "step": 59579 }, { "epoch": 2.92, "grad_norm": 0.7826395034790039, "learning_rate": 1.055494518477118e-06, "loss": 3.1349, "step": 59580 }, { "epoch": 2.92, "grad_norm": 0.7658017873764038, "learning_rate": 1.0542045449961844e-06, "loss": 2.6386, "step": 59581 }, { "epoch": 2.92, "grad_norm": 0.7522681951522827, "learning_rate": 1.0529153588809235e-06, "loss": 2.8757, "step": 59582 }, { "epoch": 2.92, "grad_norm": 0.7336506843566895, "learning_rate": 1.0516269601347326e-06, "loss": 2.8125, "step": 59583 }, { "epoch": 2.92, "grad_norm": 0.7140693068504333, "learning_rate": 1.0503393487610423e-06, "loss": 2.9674, "step": 59584 }, { "epoch": 2.92, "grad_norm": 0.7803701758384705, "learning_rate": 1.0490525247632165e-06, "loss": 3.1645, "step": 59585 }, { "epoch": 2.92, "grad_norm": 0.7349176406860352, "learning_rate": 1.0477664881446857e-06, "loss": 2.7669, "step": 59586 }, { "epoch": 2.92, "grad_norm": 0.7476712465286255, "learning_rate": 1.0464812389087806e-06, "loss": 2.765, "step": 59587 }, { "epoch": 2.92, "grad_norm": 0.764438271522522, "learning_rate": 1.0451967770588987e-06, "loss": 2.971, "step": 59588 }, { "epoch": 2.92, "grad_norm": 0.7536563873291016, "learning_rate": 1.043913102598437e-06, "loss": 2.9274, "step": 59589 }, { "epoch": 2.92, "grad_norm": 0.728675365447998, "learning_rate": 1.0426302155307597e-06, "loss": 2.7336, "step": 59590 }, { "epoch": 2.92, "grad_norm": 0.7635043859481812, "learning_rate": 1.041348115859264e-06, "loss": 2.8814, "step": 59591 }, { "epoch": 2.92, "grad_norm": 0.7409845590591431, "learning_rate": 1.0400668035873472e-06, "loss": 2.8674, "step": 59592 }, { "epoch": 2.92, "grad_norm": 0.707777202129364, "learning_rate": 1.03878627871834e-06, "loss": 2.8546, "step": 59593 }, { "epoch": 2.92, "grad_norm": 0.7888078689575195, "learning_rate": 1.0375065412556393e-06, "loss": 3.0258, "step": 59594 }, { "epoch": 2.92, "grad_norm": 0.7485946416854858, "learning_rate": 1.0362275912026096e-06, "loss": 2.9028, "step": 59595 }, { "epoch": 2.92, "grad_norm": 0.7295870780944824, "learning_rate": 1.0349494285626147e-06, "loss": 3.0208, "step": 59596 }, { "epoch": 2.92, "grad_norm": 0.7414456009864807, "learning_rate": 1.0336720533390187e-06, "loss": 2.789, "step": 59597 }, { "epoch": 2.92, "grad_norm": 0.7335713505744934, "learning_rate": 1.0323954655352185e-06, "loss": 3.0562, "step": 59598 }, { "epoch": 2.92, "grad_norm": 0.759948194026947, "learning_rate": 1.0311196651545118e-06, "loss": 2.9113, "step": 59599 }, { "epoch": 2.92, "grad_norm": 0.7210058569908142, "learning_rate": 1.029844652200329e-06, "loss": 3.0582, "step": 59600 }, { "epoch": 2.92, "grad_norm": 0.736574113368988, "learning_rate": 1.028570426676001e-06, "loss": 2.7906, "step": 59601 }, { "epoch": 2.92, "grad_norm": 0.7816761136054993, "learning_rate": 1.0272969885848915e-06, "loss": 2.9032, "step": 59602 }, { "epoch": 2.92, "grad_norm": 0.8219104409217834, "learning_rate": 1.0260243379302979e-06, "loss": 3.044, "step": 59603 }, { "epoch": 2.92, "grad_norm": 0.7174280285835266, "learning_rate": 1.024752474715651e-06, "loss": 2.9151, "step": 59604 }, { "epoch": 2.92, "grad_norm": 0.7537269592285156, "learning_rate": 1.023481398944248e-06, "loss": 2.9196, "step": 59605 }, { "epoch": 2.92, "grad_norm": 0.7246042490005493, "learning_rate": 1.022211110619453e-06, "loss": 2.7453, "step": 59606 }, { "epoch": 2.92, "grad_norm": 0.7886763215065002, "learning_rate": 1.0209416097446632e-06, "loss": 2.8192, "step": 59607 }, { "epoch": 2.92, "grad_norm": 0.7710185050964355, "learning_rate": 1.0196728963231427e-06, "loss": 3.0203, "step": 59608 }, { "epoch": 2.92, "grad_norm": 0.704352080821991, "learning_rate": 1.018404970358222e-06, "loss": 2.8758, "step": 59609 }, { "epoch": 2.92, "grad_norm": 0.7111163139343262, "learning_rate": 1.0171378318533318e-06, "loss": 2.7523, "step": 59610 }, { "epoch": 2.92, "grad_norm": 0.7470084428787231, "learning_rate": 1.0158714808117362e-06, "loss": 2.7751, "step": 59611 }, { "epoch": 2.92, "grad_norm": 0.7764518857002258, "learning_rate": 1.0146059172368327e-06, "loss": 2.9253, "step": 59612 }, { "epoch": 2.92, "grad_norm": 0.7668911218643188, "learning_rate": 1.0133411411318849e-06, "loss": 3.1101, "step": 59613 }, { "epoch": 2.92, "grad_norm": 0.7347301840782166, "learning_rate": 1.012077152500257e-06, "loss": 2.9405, "step": 59614 }, { "epoch": 2.92, "grad_norm": 0.8694640398025513, "learning_rate": 1.0108139513452795e-06, "loss": 2.8178, "step": 59615 }, { "epoch": 2.92, "grad_norm": 0.7998950481414795, "learning_rate": 1.0095515376702833e-06, "loss": 2.9485, "step": 59616 }, { "epoch": 2.92, "grad_norm": 0.7624552249908447, "learning_rate": 1.0082899114785659e-06, "loss": 3.0671, "step": 59617 }, { "epoch": 2.92, "grad_norm": 0.7087655067443848, "learning_rate": 1.0070290727734909e-06, "loss": 2.8172, "step": 59618 }, { "epoch": 2.92, "grad_norm": 0.7436306476593018, "learning_rate": 1.0057690215583558e-06, "loss": 2.9391, "step": 59619 }, { "epoch": 2.92, "grad_norm": 0.7129530906677246, "learning_rate": 1.004509757836458e-06, "loss": 2.7657, "step": 59620 }, { "epoch": 2.92, "grad_norm": 0.7246747016906738, "learning_rate": 1.0032512816111616e-06, "loss": 3.0554, "step": 59621 }, { "epoch": 2.92, "grad_norm": 0.7818371057510376, "learning_rate": 1.0019935928857637e-06, "loss": 2.7496, "step": 59622 }, { "epoch": 2.92, "grad_norm": 0.754097580909729, "learning_rate": 1.0007366916635618e-06, "loss": 2.7842, "step": 59623 }, { "epoch": 2.92, "grad_norm": 0.7770971655845642, "learning_rate": 9.994805779478865e-07, "loss": 2.9256, "step": 59624 }, { "epoch": 2.92, "grad_norm": 0.7818499803543091, "learning_rate": 9.982252517420352e-07, "loss": 3.0954, "step": 59625 }, { "epoch": 2.92, "grad_norm": 0.7363375425338745, "learning_rate": 9.969707130493055e-07, "loss": 3.0082, "step": 59626 }, { "epoch": 2.92, "grad_norm": 0.7672472596168518, "learning_rate": 9.957169618729944e-07, "loss": 2.9003, "step": 59627 }, { "epoch": 2.92, "grad_norm": 0.7299450039863586, "learning_rate": 9.944639982164326e-07, "loss": 3.1018, "step": 59628 }, { "epoch": 2.92, "grad_norm": 0.788252592086792, "learning_rate": 9.932118220829177e-07, "loss": 2.7675, "step": 59629 }, { "epoch": 2.92, "grad_norm": 0.760606050491333, "learning_rate": 9.91960433475747e-07, "loss": 2.8997, "step": 59630 }, { "epoch": 2.92, "grad_norm": 0.7685892581939697, "learning_rate": 9.907098323981845e-07, "loss": 2.9651, "step": 59631 }, { "epoch": 2.92, "grad_norm": 0.7383074760437012, "learning_rate": 9.894600188535606e-07, "loss": 2.9564, "step": 59632 }, { "epoch": 2.92, "grad_norm": 0.7410311698913574, "learning_rate": 9.882109928451398e-07, "loss": 2.6135, "step": 59633 }, { "epoch": 2.92, "grad_norm": 0.7539855241775513, "learning_rate": 9.869627543762527e-07, "loss": 3.037, "step": 59634 }, { "epoch": 2.92, "grad_norm": 0.7680968046188354, "learning_rate": 9.857153034501297e-07, "loss": 2.7394, "step": 59635 }, { "epoch": 2.92, "grad_norm": 0.779704213142395, "learning_rate": 9.84468640070102e-07, "loss": 2.7469, "step": 59636 }, { "epoch": 2.92, "grad_norm": 0.7694498896598816, "learning_rate": 9.83222764239433e-07, "loss": 3.0253, "step": 59637 }, { "epoch": 2.92, "grad_norm": 0.8191694617271423, "learning_rate": 9.819776759614206e-07, "loss": 3.0428, "step": 59638 }, { "epoch": 2.92, "grad_norm": 0.722665548324585, "learning_rate": 9.807333752392953e-07, "loss": 2.944, "step": 59639 }, { "epoch": 2.92, "grad_norm": 0.7409653067588806, "learning_rate": 9.79489862076388e-07, "loss": 2.8522, "step": 59640 }, { "epoch": 2.92, "grad_norm": 0.7272565364837646, "learning_rate": 9.782471364759625e-07, "loss": 2.7308, "step": 59641 }, { "epoch": 2.92, "grad_norm": 0.766656756401062, "learning_rate": 9.770051984413164e-07, "loss": 2.7798, "step": 59642 }, { "epoch": 2.92, "grad_norm": 0.7929888367652893, "learning_rate": 9.757640479756468e-07, "loss": 2.9093, "step": 59643 }, { "epoch": 2.92, "grad_norm": 0.7375426888465881, "learning_rate": 9.745236850822846e-07, "loss": 2.6784, "step": 59644 }, { "epoch": 2.92, "grad_norm": 0.7349624037742615, "learning_rate": 9.732841097644605e-07, "loss": 2.7962, "step": 59645 }, { "epoch": 2.92, "grad_norm": 0.7367823719978333, "learning_rate": 9.72045322025472e-07, "loss": 2.975, "step": 59646 }, { "epoch": 2.92, "grad_norm": 0.7537251114845276, "learning_rate": 9.70807321868583e-07, "loss": 2.8237, "step": 59647 }, { "epoch": 2.92, "grad_norm": 0.7877868413925171, "learning_rate": 9.695701092970243e-07, "loss": 2.7918, "step": 59648 }, { "epoch": 2.92, "grad_norm": 0.7532494068145752, "learning_rate": 9.683336843140598e-07, "loss": 2.8354, "step": 59649 }, { "epoch": 2.92, "grad_norm": 0.7658734321594238, "learning_rate": 9.67098046922954e-07, "loss": 2.9192, "step": 59650 }, { "epoch": 2.92, "grad_norm": 0.7385966777801514, "learning_rate": 9.658631971269704e-07, "loss": 2.8873, "step": 59651 }, { "epoch": 2.92, "grad_norm": 0.7445787787437439, "learning_rate": 9.6462913492934e-07, "loss": 2.6811, "step": 59652 }, { "epoch": 2.92, "grad_norm": 0.7418826818466187, "learning_rate": 9.63395860333327e-07, "loss": 2.8437, "step": 59653 }, { "epoch": 2.92, "grad_norm": 0.7458414435386658, "learning_rate": 9.621633733421953e-07, "loss": 2.7761, "step": 59654 }, { "epoch": 2.92, "grad_norm": 0.7673033475875854, "learning_rate": 9.609316739591754e-07, "loss": 2.9863, "step": 59655 }, { "epoch": 2.92, "grad_norm": 0.7251089811325073, "learning_rate": 9.597007621874985e-07, "loss": 3.0085, "step": 59656 }, { "epoch": 2.92, "grad_norm": 0.7382851243019104, "learning_rate": 9.584706380304286e-07, "loss": 2.6704, "step": 59657 }, { "epoch": 2.92, "grad_norm": 0.7357835173606873, "learning_rate": 9.572413014911628e-07, "loss": 2.9542, "step": 59658 }, { "epoch": 2.92, "grad_norm": 0.7621243596076965, "learning_rate": 9.560127525730321e-07, "loss": 2.9173, "step": 59659 }, { "epoch": 2.92, "grad_norm": 0.8021424412727356, "learning_rate": 9.547849912791673e-07, "loss": 2.9653, "step": 59660 }, { "epoch": 2.92, "grad_norm": 0.7782572507858276, "learning_rate": 9.535580176128654e-07, "loss": 2.9197, "step": 59661 }, { "epoch": 2.92, "grad_norm": 0.7518317103385925, "learning_rate": 9.523318315773242e-07, "loss": 2.8542, "step": 59662 }, { "epoch": 2.92, "grad_norm": 0.7433437705039978, "learning_rate": 9.511064331757745e-07, "loss": 2.9361, "step": 59663 }, { "epoch": 2.92, "grad_norm": 0.7323741912841797, "learning_rate": 9.498818224114802e-07, "loss": 2.8354, "step": 59664 }, { "epoch": 2.92, "grad_norm": 0.7862437963485718, "learning_rate": 9.486579992876386e-07, "loss": 2.9296, "step": 59665 }, { "epoch": 2.92, "grad_norm": 0.729877769947052, "learning_rate": 9.474349638074808e-07, "loss": 2.6364, "step": 59666 }, { "epoch": 2.92, "grad_norm": 0.7364502549171448, "learning_rate": 9.462127159742372e-07, "loss": 2.9278, "step": 59667 }, { "epoch": 2.92, "grad_norm": 0.748285174369812, "learning_rate": 9.449912557911055e-07, "loss": 2.8261, "step": 59668 }, { "epoch": 2.92, "grad_norm": 0.7488347887992859, "learning_rate": 9.437705832613163e-07, "loss": 3.0538, "step": 59669 }, { "epoch": 2.92, "grad_norm": 0.7590417265892029, "learning_rate": 9.425506983880671e-07, "loss": 2.8349, "step": 59670 }, { "epoch": 2.92, "grad_norm": 0.7655706405639648, "learning_rate": 9.413316011745886e-07, "loss": 2.8614, "step": 59671 }, { "epoch": 2.92, "grad_norm": 0.7407287955284119, "learning_rate": 9.401132916240784e-07, "loss": 3.0396, "step": 59672 }, { "epoch": 2.92, "grad_norm": 0.7478367686271667, "learning_rate": 9.388957697397669e-07, "loss": 2.9103, "step": 59673 }, { "epoch": 2.92, "grad_norm": 0.8015658259391785, "learning_rate": 9.37679035524852e-07, "loss": 2.7994, "step": 59674 }, { "epoch": 2.92, "grad_norm": 0.748598039150238, "learning_rate": 9.364630889825309e-07, "loss": 2.7964, "step": 59675 }, { "epoch": 2.92, "grad_norm": 0.7474225759506226, "learning_rate": 9.352479301160009e-07, "loss": 2.935, "step": 59676 }, { "epoch": 2.92, "grad_norm": 0.7985930442810059, "learning_rate": 9.340335589284931e-07, "loss": 2.9996, "step": 59677 }, { "epoch": 2.92, "grad_norm": 0.739578127861023, "learning_rate": 9.328199754231713e-07, "loss": 2.8553, "step": 59678 }, { "epoch": 2.92, "grad_norm": 0.745047926902771, "learning_rate": 9.316071796032331e-07, "loss": 2.7658, "step": 59679 }, { "epoch": 2.92, "grad_norm": 0.7639679312705994, "learning_rate": 9.303951714719093e-07, "loss": 2.7573, "step": 59680 }, { "epoch": 2.92, "grad_norm": 0.8055102229118347, "learning_rate": 9.29183951032364e-07, "loss": 2.7784, "step": 59681 }, { "epoch": 2.92, "grad_norm": 0.717705488204956, "learning_rate": 9.279735182877613e-07, "loss": 2.8754, "step": 59682 }, { "epoch": 2.92, "grad_norm": 0.7145783305168152, "learning_rate": 9.267638732413651e-07, "loss": 2.9277, "step": 59683 }, { "epoch": 2.92, "grad_norm": 0.7795868515968323, "learning_rate": 9.255550158962733e-07, "loss": 2.8612, "step": 59684 }, { "epoch": 2.93, "grad_norm": 0.7477320432662964, "learning_rate": 9.243469462557163e-07, "loss": 2.8429, "step": 59685 }, { "epoch": 2.93, "grad_norm": 0.7685227990150452, "learning_rate": 9.231396643228917e-07, "loss": 2.7086, "step": 59686 }, { "epoch": 2.93, "grad_norm": 0.742152214050293, "learning_rate": 9.219331701009635e-07, "loss": 2.9458, "step": 59687 }, { "epoch": 2.93, "grad_norm": 0.7278732657432556, "learning_rate": 9.207274635930961e-07, "loss": 2.8947, "step": 59688 }, { "epoch": 2.93, "grad_norm": 0.7534875273704529, "learning_rate": 9.195225448024535e-07, "loss": 2.974, "step": 59689 }, { "epoch": 2.93, "grad_norm": 0.726291835308075, "learning_rate": 9.183184137322664e-07, "loss": 2.9126, "step": 59690 }, { "epoch": 2.93, "grad_norm": 0.746126651763916, "learning_rate": 9.171150703856323e-07, "loss": 2.9024, "step": 59691 }, { "epoch": 2.93, "grad_norm": 0.7456505298614502, "learning_rate": 9.15912514765782e-07, "loss": 3.0167, "step": 59692 }, { "epoch": 2.93, "grad_norm": 0.7747290134429932, "learning_rate": 9.147107468758464e-07, "loss": 2.956, "step": 59693 }, { "epoch": 2.93, "grad_norm": 0.7368700504302979, "learning_rate": 9.135097667189895e-07, "loss": 2.9151, "step": 59694 }, { "epoch": 2.93, "grad_norm": 0.7123827934265137, "learning_rate": 9.123095742984088e-07, "loss": 3.1211, "step": 59695 }, { "epoch": 2.93, "grad_norm": 0.7461838722229004, "learning_rate": 9.111101696172351e-07, "loss": 2.9004, "step": 59696 }, { "epoch": 2.93, "grad_norm": 0.7076005935668945, "learning_rate": 9.099115526786327e-07, "loss": 2.8209, "step": 59697 }, { "epoch": 2.93, "grad_norm": 0.7806337475776672, "learning_rate": 9.087137234857656e-07, "loss": 2.6983, "step": 59698 }, { "epoch": 2.93, "grad_norm": 0.7522020936012268, "learning_rate": 9.075166820417978e-07, "loss": 2.8614, "step": 59699 }, { "epoch": 2.93, "grad_norm": 0.7688117623329163, "learning_rate": 9.063204283498604e-07, "loss": 2.8533, "step": 59700 }, { "epoch": 2.93, "grad_norm": 0.7308453917503357, "learning_rate": 9.051249624131174e-07, "loss": 2.8086, "step": 59701 }, { "epoch": 2.93, "grad_norm": 0.7483466267585754, "learning_rate": 9.039302842346996e-07, "loss": 2.9024, "step": 59702 }, { "epoch": 2.93, "grad_norm": 0.7657617926597595, "learning_rate": 9.027363938178045e-07, "loss": 2.7308, "step": 59703 }, { "epoch": 2.93, "grad_norm": 0.7511147856712341, "learning_rate": 9.015432911654962e-07, "loss": 2.9577, "step": 59704 }, { "epoch": 2.93, "grad_norm": 0.7356677651405334, "learning_rate": 9.003509762809724e-07, "loss": 2.991, "step": 59705 }, { "epoch": 2.93, "grad_norm": 0.7413914203643799, "learning_rate": 8.991594491673637e-07, "loss": 3.2518, "step": 59706 }, { "epoch": 2.93, "grad_norm": 0.7317723631858826, "learning_rate": 8.97968709827801e-07, "loss": 2.9233, "step": 59707 }, { "epoch": 2.93, "grad_norm": 0.7266475558280945, "learning_rate": 8.967787582654484e-07, "loss": 2.9267, "step": 59708 }, { "epoch": 2.93, "grad_norm": 0.725741446018219, "learning_rate": 8.955895944834035e-07, "loss": 2.8691, "step": 59709 }, { "epoch": 2.93, "grad_norm": 0.7397601008415222, "learning_rate": 8.94401218484797e-07, "loss": 2.9947, "step": 59710 }, { "epoch": 2.93, "grad_norm": 0.735898494720459, "learning_rate": 8.932136302727933e-07, "loss": 2.7562, "step": 59711 }, { "epoch": 2.93, "grad_norm": 0.7711181044578552, "learning_rate": 8.920268298504896e-07, "loss": 2.9557, "step": 59712 }, { "epoch": 2.93, "grad_norm": 0.8324527740478516, "learning_rate": 8.908408172210169e-07, "loss": 2.8636, "step": 59713 }, { "epoch": 2.93, "grad_norm": 0.7208682894706726, "learning_rate": 8.896555923875393e-07, "loss": 2.7489, "step": 59714 }, { "epoch": 2.93, "grad_norm": 0.7793273329734802, "learning_rate": 8.884711553530876e-07, "loss": 2.862, "step": 59715 }, { "epoch": 2.93, "grad_norm": 0.7881961464881897, "learning_rate": 8.872875061208928e-07, "loss": 2.8456, "step": 59716 }, { "epoch": 2.93, "grad_norm": 0.7748516798019409, "learning_rate": 8.861046446939857e-07, "loss": 3.0182, "step": 59717 }, { "epoch": 2.93, "grad_norm": 0.7874419093132019, "learning_rate": 8.849225710755304e-07, "loss": 2.9961, "step": 59718 }, { "epoch": 2.93, "grad_norm": 0.7328853011131287, "learning_rate": 8.83741285268591e-07, "loss": 3.0178, "step": 59719 }, { "epoch": 2.93, "grad_norm": 0.7477407455444336, "learning_rate": 8.825607872763319e-07, "loss": 2.914, "step": 59720 }, { "epoch": 2.93, "grad_norm": 0.7703258991241455, "learning_rate": 8.813810771018503e-07, "loss": 3.1145, "step": 59721 }, { "epoch": 2.93, "grad_norm": 0.7548531889915466, "learning_rate": 8.802021547482441e-07, "loss": 3.0983, "step": 59722 }, { "epoch": 2.93, "grad_norm": 0.7501577734947205, "learning_rate": 8.790240202186105e-07, "loss": 2.8297, "step": 59723 }, { "epoch": 2.93, "grad_norm": 0.8373538255691528, "learning_rate": 8.778466735160472e-07, "loss": 2.8036, "step": 59724 }, { "epoch": 2.93, "grad_norm": 0.7723232507705688, "learning_rate": 8.76670114643685e-07, "loss": 2.98, "step": 59725 }, { "epoch": 2.93, "grad_norm": 0.7283118963241577, "learning_rate": 8.754943436046214e-07, "loss": 2.9201, "step": 59726 }, { "epoch": 2.93, "grad_norm": 0.7659836411476135, "learning_rate": 8.743193604019205e-07, "loss": 2.7259, "step": 59727 }, { "epoch": 2.93, "grad_norm": 0.7766382098197937, "learning_rate": 8.731451650386801e-07, "loss": 2.8198, "step": 59728 }, { "epoch": 2.93, "grad_norm": 0.7227720618247986, "learning_rate": 8.719717575180308e-07, "loss": 3.1198, "step": 59729 }, { "epoch": 2.93, "grad_norm": 0.7672495245933533, "learning_rate": 8.707991378430034e-07, "loss": 2.8466, "step": 59730 }, { "epoch": 2.93, "grad_norm": 0.754241406917572, "learning_rate": 8.696273060167624e-07, "loss": 2.6828, "step": 59731 }, { "epoch": 2.93, "grad_norm": 0.7628318667411804, "learning_rate": 8.684562620423385e-07, "loss": 3.0211, "step": 59732 }, { "epoch": 2.93, "grad_norm": 0.788650393486023, "learning_rate": 8.672860059228293e-07, "loss": 2.8669, "step": 59733 }, { "epoch": 2.93, "grad_norm": 0.7480198740959167, "learning_rate": 8.661165376613321e-07, "loss": 2.9814, "step": 59734 }, { "epoch": 2.93, "grad_norm": 0.7865045666694641, "learning_rate": 8.649478572609114e-07, "loss": 2.9745, "step": 59735 }, { "epoch": 2.93, "grad_norm": 0.7339010238647461, "learning_rate": 8.637799647246646e-07, "loss": 2.803, "step": 59736 }, { "epoch": 2.93, "grad_norm": 0.6831942200660706, "learning_rate": 8.626128600556226e-07, "loss": 3.0673, "step": 59737 }, { "epoch": 2.93, "grad_norm": 0.7489328980445862, "learning_rate": 8.614465432568829e-07, "loss": 2.8345, "step": 59738 }, { "epoch": 2.93, "grad_norm": 0.8042427897453308, "learning_rate": 8.602810143315431e-07, "loss": 2.8714, "step": 59739 }, { "epoch": 2.93, "grad_norm": 0.8107337355613708, "learning_rate": 8.591162732826339e-07, "loss": 2.9028, "step": 59740 }, { "epoch": 2.93, "grad_norm": 0.7713976502418518, "learning_rate": 8.579523201132866e-07, "loss": 2.9202, "step": 59741 }, { "epoch": 2.93, "grad_norm": 0.8133545517921448, "learning_rate": 8.567891548264649e-07, "loss": 2.8562, "step": 59742 }, { "epoch": 2.93, "grad_norm": 0.8308982253074646, "learning_rate": 8.556267774253001e-07, "loss": 2.7826, "step": 59743 }, { "epoch": 2.93, "grad_norm": 0.756340503692627, "learning_rate": 8.544651879128894e-07, "loss": 2.8316, "step": 59744 }, { "epoch": 2.93, "grad_norm": 0.7788317203521729, "learning_rate": 8.533043862921973e-07, "loss": 2.8322, "step": 59745 }, { "epoch": 2.93, "grad_norm": 0.735393226146698, "learning_rate": 8.521443725663546e-07, "loss": 2.8576, "step": 59746 }, { "epoch": 2.93, "grad_norm": 0.7246791124343872, "learning_rate": 8.509851467383588e-07, "loss": 3.0481, "step": 59747 }, { "epoch": 2.93, "grad_norm": 0.7521356344223022, "learning_rate": 8.498267088113409e-07, "loss": 2.7354, "step": 59748 }, { "epoch": 2.93, "grad_norm": 0.803340494632721, "learning_rate": 8.48669058788265e-07, "loss": 2.8304, "step": 59749 }, { "epoch": 2.93, "grad_norm": 0.7315862774848938, "learning_rate": 8.47512196672262e-07, "loss": 2.8125, "step": 59750 }, { "epoch": 2.93, "grad_norm": 0.7708979845046997, "learning_rate": 8.463561224662962e-07, "loss": 2.8332, "step": 59751 }, { "epoch": 2.93, "grad_norm": 0.7568145990371704, "learning_rate": 8.452008361734985e-07, "loss": 2.7078, "step": 59752 }, { "epoch": 2.93, "grad_norm": 0.7380116581916809, "learning_rate": 8.440463377968332e-07, "loss": 2.9881, "step": 59753 }, { "epoch": 2.93, "grad_norm": 0.7458478212356567, "learning_rate": 8.428926273393977e-07, "loss": 2.9401, "step": 59754 }, { "epoch": 2.93, "grad_norm": 0.7849690914154053, "learning_rate": 8.417397048041897e-07, "loss": 2.8784, "step": 59755 }, { "epoch": 2.93, "grad_norm": 0.8106399178504944, "learning_rate": 8.405875701942732e-07, "loss": 2.7235, "step": 59756 }, { "epoch": 2.93, "grad_norm": 0.7472584247589111, "learning_rate": 8.394362235126795e-07, "loss": 3.1152, "step": 59757 }, { "epoch": 2.93, "grad_norm": 0.7843793630599976, "learning_rate": 8.382856647624392e-07, "loss": 2.928, "step": 59758 }, { "epoch": 2.93, "grad_norm": 0.7767054438591003, "learning_rate": 8.371358939465833e-07, "loss": 2.9102, "step": 59759 }, { "epoch": 2.93, "grad_norm": 0.7358002066612244, "learning_rate": 8.359869110681095e-07, "loss": 2.8749, "step": 59760 }, { "epoch": 2.93, "grad_norm": 0.768272340297699, "learning_rate": 8.348387161301151e-07, "loss": 2.8866, "step": 59761 }, { "epoch": 2.93, "grad_norm": 0.7254506945610046, "learning_rate": 8.336913091355313e-07, "loss": 2.7688, "step": 59762 }, { "epoch": 2.93, "grad_norm": 0.7313871383666992, "learning_rate": 8.325446900874555e-07, "loss": 2.6363, "step": 59763 }, { "epoch": 2.93, "grad_norm": 0.793598473072052, "learning_rate": 8.313988589888853e-07, "loss": 2.9504, "step": 59764 }, { "epoch": 2.93, "grad_norm": 0.73553466796875, "learning_rate": 8.302538158428185e-07, "loss": 2.9293, "step": 59765 }, { "epoch": 2.93, "grad_norm": 0.765264093875885, "learning_rate": 8.29109560652319e-07, "loss": 3.1213, "step": 59766 }, { "epoch": 2.93, "grad_norm": 0.7166268229484558, "learning_rate": 8.279660934203181e-07, "loss": 2.8594, "step": 59767 }, { "epoch": 2.93, "grad_norm": 0.772975742816925, "learning_rate": 8.268234141499131e-07, "loss": 2.7602, "step": 59768 }, { "epoch": 2.93, "grad_norm": 0.73553466796875, "learning_rate": 8.256815228440683e-07, "loss": 2.8789, "step": 59769 }, { "epoch": 2.93, "grad_norm": 0.7122424244880676, "learning_rate": 8.245404195058147e-07, "loss": 2.7645, "step": 59770 }, { "epoch": 2.93, "grad_norm": 0.6854143738746643, "learning_rate": 8.234001041381166e-07, "loss": 2.9399, "step": 59771 }, { "epoch": 2.93, "grad_norm": 0.7698032855987549, "learning_rate": 8.222605767440383e-07, "loss": 3.0868, "step": 59772 }, { "epoch": 2.93, "grad_norm": 0.7176466584205627, "learning_rate": 8.211218373265105e-07, "loss": 2.824, "step": 59773 }, { "epoch": 2.93, "grad_norm": 0.7282741665840149, "learning_rate": 8.199838858885977e-07, "loss": 2.9953, "step": 59774 }, { "epoch": 2.93, "grad_norm": 0.8401013612747192, "learning_rate": 8.18846722433264e-07, "loss": 2.7525, "step": 59775 }, { "epoch": 2.93, "grad_norm": 0.7844291925430298, "learning_rate": 8.177103469635071e-07, "loss": 2.899, "step": 59776 }, { "epoch": 2.93, "grad_norm": 0.7597390413284302, "learning_rate": 8.165747594823247e-07, "loss": 2.7735, "step": 59777 }, { "epoch": 2.93, "grad_norm": 0.7756280899047852, "learning_rate": 8.154399599926809e-07, "loss": 2.8091, "step": 59778 }, { "epoch": 2.93, "grad_norm": 0.7339857816696167, "learning_rate": 8.143059484976066e-07, "loss": 2.9528, "step": 59779 }, { "epoch": 2.93, "grad_norm": 0.764804482460022, "learning_rate": 8.131727250000663e-07, "loss": 2.8768, "step": 59780 }, { "epoch": 2.93, "grad_norm": 0.8054780960083008, "learning_rate": 8.120402895030908e-07, "loss": 2.5517, "step": 59781 }, { "epoch": 2.93, "grad_norm": 0.7895975708961487, "learning_rate": 8.109086420095779e-07, "loss": 2.8077, "step": 59782 }, { "epoch": 2.93, "grad_norm": 0.7449101805686951, "learning_rate": 8.097777825225582e-07, "loss": 2.8612, "step": 59783 }, { "epoch": 2.93, "grad_norm": 0.7465765476226807, "learning_rate": 8.086477110449962e-07, "loss": 2.7231, "step": 59784 }, { "epoch": 2.93, "grad_norm": 0.7886302471160889, "learning_rate": 8.075184275798896e-07, "loss": 2.8589, "step": 59785 }, { "epoch": 2.93, "grad_norm": 0.7831330895423889, "learning_rate": 8.063899321302025e-07, "loss": 2.7825, "step": 59786 }, { "epoch": 2.93, "grad_norm": 0.7341232895851135, "learning_rate": 8.052622246988993e-07, "loss": 2.8403, "step": 59787 }, { "epoch": 2.93, "grad_norm": 0.7725659608840942, "learning_rate": 8.041353052889443e-07, "loss": 3.0116, "step": 59788 }, { "epoch": 2.93, "grad_norm": 0.7446717023849487, "learning_rate": 8.030091739033018e-07, "loss": 2.9361, "step": 59789 }, { "epoch": 2.93, "grad_norm": 0.7172529697418213, "learning_rate": 8.018838305450026e-07, "loss": 2.827, "step": 59790 }, { "epoch": 2.93, "grad_norm": 0.7514923810958862, "learning_rate": 8.007592752169112e-07, "loss": 2.8141, "step": 59791 }, { "epoch": 2.93, "grad_norm": 0.7799364924430847, "learning_rate": 7.996355079220585e-07, "loss": 2.9459, "step": 59792 }, { "epoch": 2.93, "grad_norm": 0.801102876663208, "learning_rate": 7.985125286633753e-07, "loss": 2.6798, "step": 59793 }, { "epoch": 2.93, "grad_norm": 0.7336550354957581, "learning_rate": 7.973903374438262e-07, "loss": 3.0554, "step": 59794 }, { "epoch": 2.93, "grad_norm": 0.7697311043739319, "learning_rate": 7.962689342663753e-07, "loss": 2.8884, "step": 59795 }, { "epoch": 2.93, "grad_norm": 0.7298890948295593, "learning_rate": 7.951483191339869e-07, "loss": 2.6709, "step": 59796 }, { "epoch": 2.93, "grad_norm": 0.7298159003257751, "learning_rate": 7.940284920495588e-07, "loss": 2.8218, "step": 59797 }, { "epoch": 2.93, "grad_norm": 0.7589902281761169, "learning_rate": 7.929094530161217e-07, "loss": 2.8853, "step": 59798 }, { "epoch": 2.93, "grad_norm": 0.7164919972419739, "learning_rate": 7.917912020365402e-07, "loss": 2.8939, "step": 59799 }, { "epoch": 2.93, "grad_norm": 0.7424035668373108, "learning_rate": 7.906737391137785e-07, "loss": 2.9849, "step": 59800 }, { "epoch": 2.93, "grad_norm": 0.7874431014060974, "learning_rate": 7.895570642508342e-07, "loss": 2.6638, "step": 59801 }, { "epoch": 2.93, "grad_norm": 0.7526593804359436, "learning_rate": 7.88441177450605e-07, "loss": 2.9343, "step": 59802 }, { "epoch": 2.93, "grad_norm": 0.7278794050216675, "learning_rate": 7.873260787160551e-07, "loss": 2.8957, "step": 59803 }, { "epoch": 2.93, "grad_norm": 0.7460549473762512, "learning_rate": 7.862117680500823e-07, "loss": 2.8864, "step": 59804 }, { "epoch": 2.93, "grad_norm": 0.7620648145675659, "learning_rate": 7.850982454556509e-07, "loss": 2.8314, "step": 59805 }, { "epoch": 2.93, "grad_norm": 0.727797269821167, "learning_rate": 7.839855109356585e-07, "loss": 2.9278, "step": 59806 }, { "epoch": 2.93, "grad_norm": 0.7091959714889526, "learning_rate": 7.828735644931028e-07, "loss": 2.9797, "step": 59807 }, { "epoch": 2.93, "grad_norm": 0.7799173593521118, "learning_rate": 7.81762406130848e-07, "loss": 2.9589, "step": 59808 }, { "epoch": 2.93, "grad_norm": 0.7307581901550293, "learning_rate": 7.806520358518587e-07, "loss": 2.7646, "step": 59809 }, { "epoch": 2.93, "grad_norm": 0.8110495805740356, "learning_rate": 7.795424536590322e-07, "loss": 2.9862, "step": 59810 }, { "epoch": 2.93, "grad_norm": 0.7516154646873474, "learning_rate": 7.784336595552998e-07, "loss": 3.0308, "step": 59811 }, { "epoch": 2.93, "grad_norm": 0.7222862839698792, "learning_rate": 7.773256535436257e-07, "loss": 2.7501, "step": 59812 }, { "epoch": 2.93, "grad_norm": 0.7540051937103271, "learning_rate": 7.762184356268408e-07, "loss": 2.8516, "step": 59813 }, { "epoch": 2.93, "grad_norm": 0.7463213205337524, "learning_rate": 7.751120058079429e-07, "loss": 2.9547, "step": 59814 }, { "epoch": 2.93, "grad_norm": 0.7319959402084351, "learning_rate": 7.740063640897964e-07, "loss": 2.8742, "step": 59815 }, { "epoch": 2.93, "grad_norm": 0.7268669605255127, "learning_rate": 7.729015104753322e-07, "loss": 2.9301, "step": 59816 }, { "epoch": 2.93, "grad_norm": 0.7571326494216919, "learning_rate": 7.71797444967448e-07, "loss": 2.7898, "step": 59817 }, { "epoch": 2.93, "grad_norm": 0.7747343182563782, "learning_rate": 7.706941675690747e-07, "loss": 3.0083, "step": 59818 }, { "epoch": 2.93, "grad_norm": 0.7545844912528992, "learning_rate": 7.695916782831102e-07, "loss": 2.9409, "step": 59819 }, { "epoch": 2.93, "grad_norm": 0.7999106049537659, "learning_rate": 7.68489977112452e-07, "loss": 2.7857, "step": 59820 }, { "epoch": 2.93, "grad_norm": 0.7516934275627136, "learning_rate": 7.673890640599978e-07, "loss": 2.9531, "step": 59821 }, { "epoch": 2.93, "grad_norm": 0.73682701587677, "learning_rate": 7.662889391286453e-07, "loss": 2.9481, "step": 59822 }, { "epoch": 2.93, "grad_norm": 0.7407304048538208, "learning_rate": 7.651896023213255e-07, "loss": 2.8385, "step": 59823 }, { "epoch": 2.93, "grad_norm": 0.8045844435691833, "learning_rate": 7.640910536408695e-07, "loss": 2.9524, "step": 59824 }, { "epoch": 2.93, "grad_norm": 0.7355616092681885, "learning_rate": 7.629932930902416e-07, "loss": 2.7532, "step": 59825 }, { "epoch": 2.93, "grad_norm": 0.8066126704216003, "learning_rate": 7.618963206722728e-07, "loss": 2.9467, "step": 59826 }, { "epoch": 2.93, "grad_norm": 0.8547759652137756, "learning_rate": 7.608001363899274e-07, "loss": 2.8454, "step": 59827 }, { "epoch": 2.93, "grad_norm": 0.77346271276474, "learning_rate": 7.597047402460033e-07, "loss": 2.6539, "step": 59828 }, { "epoch": 2.93, "grad_norm": 0.7750498652458191, "learning_rate": 7.586101322434313e-07, "loss": 2.8954, "step": 59829 }, { "epoch": 2.93, "grad_norm": 0.7115117907524109, "learning_rate": 7.575163123850758e-07, "loss": 2.933, "step": 59830 }, { "epoch": 2.93, "grad_norm": 0.7660773992538452, "learning_rate": 7.564232806738679e-07, "loss": 3.1174, "step": 59831 }, { "epoch": 2.93, "grad_norm": 0.7857538461685181, "learning_rate": 7.553310371126053e-07, "loss": 2.6361, "step": 59832 }, { "epoch": 2.93, "grad_norm": 0.789605438709259, "learning_rate": 7.542395817042523e-07, "loss": 2.6682, "step": 59833 }, { "epoch": 2.93, "grad_norm": 0.7484418153762817, "learning_rate": 7.5314891445164e-07, "loss": 2.6815, "step": 59834 }, { "epoch": 2.93, "grad_norm": 0.7630303502082825, "learning_rate": 7.520590353575994e-07, "loss": 2.6285, "step": 59835 }, { "epoch": 2.93, "grad_norm": 0.8174473643302917, "learning_rate": 7.509699444250949e-07, "loss": 2.7668, "step": 59836 }, { "epoch": 2.93, "grad_norm": 0.791915774345398, "learning_rate": 7.498816416569242e-07, "loss": 2.9532, "step": 59837 }, { "epoch": 2.93, "grad_norm": 0.7464105486869812, "learning_rate": 7.487941270559516e-07, "loss": 3.0629, "step": 59838 }, { "epoch": 2.93, "grad_norm": 0.7361699938774109, "learning_rate": 7.47707400625075e-07, "loss": 2.7977, "step": 59839 }, { "epoch": 2.93, "grad_norm": 0.7796388268470764, "learning_rate": 7.466214623671585e-07, "loss": 2.9098, "step": 59840 }, { "epoch": 2.93, "grad_norm": 0.7234344482421875, "learning_rate": 7.455363122850333e-07, "loss": 2.918, "step": 59841 }, { "epoch": 2.93, "grad_norm": 0.7764577269554138, "learning_rate": 7.444519503815971e-07, "loss": 2.8202, "step": 59842 }, { "epoch": 2.93, "grad_norm": 0.7591822147369385, "learning_rate": 7.43368376659681e-07, "loss": 2.964, "step": 59843 }, { "epoch": 2.93, "grad_norm": 0.6883577704429626, "learning_rate": 7.422855911221159e-07, "loss": 2.7551, "step": 59844 }, { "epoch": 2.93, "grad_norm": 0.7411786913871765, "learning_rate": 7.412035937717665e-07, "loss": 2.8426, "step": 59845 }, { "epoch": 2.93, "grad_norm": 0.7492934465408325, "learning_rate": 7.4012238461153e-07, "loss": 2.8528, "step": 59846 }, { "epoch": 2.93, "grad_norm": 0.7484312057495117, "learning_rate": 7.390419636442047e-07, "loss": 2.831, "step": 59847 }, { "epoch": 2.93, "grad_norm": 0.7504763007164001, "learning_rate": 7.379623308726213e-07, "loss": 2.8901, "step": 59848 }, { "epoch": 2.93, "grad_norm": 1.2700088024139404, "learning_rate": 7.368834862997109e-07, "loss": 2.9359, "step": 59849 }, { "epoch": 2.93, "grad_norm": 0.7634099125862122, "learning_rate": 7.358054299282045e-07, "loss": 2.8977, "step": 59850 }, { "epoch": 2.93, "grad_norm": 0.7338115572929382, "learning_rate": 7.347281617610001e-07, "loss": 2.7591, "step": 59851 }, { "epoch": 2.93, "grad_norm": 0.7483226656913757, "learning_rate": 7.336516818009286e-07, "loss": 2.9123, "step": 59852 }, { "epoch": 2.93, "grad_norm": 0.7367458343505859, "learning_rate": 7.32575990050821e-07, "loss": 2.9619, "step": 59853 }, { "epoch": 2.93, "grad_norm": 0.7470386624336243, "learning_rate": 7.315010865135418e-07, "loss": 2.7604, "step": 59854 }, { "epoch": 2.93, "grad_norm": 0.7699258923530579, "learning_rate": 7.304269711918553e-07, "loss": 3.1449, "step": 59855 }, { "epoch": 2.93, "grad_norm": 0.7648332118988037, "learning_rate": 7.293536440886594e-07, "loss": 2.8605, "step": 59856 }, { "epoch": 2.93, "grad_norm": 0.7431793212890625, "learning_rate": 7.282811052067183e-07, "loss": 3.0201, "step": 59857 }, { "epoch": 2.93, "grad_norm": 0.7206470966339111, "learning_rate": 7.272093545489299e-07, "loss": 2.9679, "step": 59858 }, { "epoch": 2.93, "grad_norm": 0.7647475600242615, "learning_rate": 7.261383921180253e-07, "loss": 3.2728, "step": 59859 }, { "epoch": 2.93, "grad_norm": 0.7028852701187134, "learning_rate": 7.250682179169021e-07, "loss": 2.8486, "step": 59860 }, { "epoch": 2.93, "grad_norm": 0.7771363854408264, "learning_rate": 7.239988319483581e-07, "loss": 2.6647, "step": 59861 }, { "epoch": 2.93, "grad_norm": 0.802079975605011, "learning_rate": 7.22930234215191e-07, "loss": 2.7629, "step": 59862 }, { "epoch": 2.93, "grad_norm": 0.7468277812004089, "learning_rate": 7.21862424720232e-07, "loss": 2.5919, "step": 59863 }, { "epoch": 2.93, "grad_norm": 0.7966805696487427, "learning_rate": 7.207954034663122e-07, "loss": 2.8663, "step": 59864 }, { "epoch": 2.93, "grad_norm": 0.7449951767921448, "learning_rate": 7.197291704561959e-07, "loss": 2.8764, "step": 59865 }, { "epoch": 2.93, "grad_norm": 0.7162653803825378, "learning_rate": 7.186637256927141e-07, "loss": 2.9887, "step": 59866 }, { "epoch": 2.93, "grad_norm": 0.762417733669281, "learning_rate": 7.175990691786981e-07, "loss": 2.6655, "step": 59867 }, { "epoch": 2.93, "grad_norm": 0.7331001162528992, "learning_rate": 7.165352009169123e-07, "loss": 2.7909, "step": 59868 }, { "epoch": 2.93, "grad_norm": 0.8558750152587891, "learning_rate": 7.154721209101544e-07, "loss": 2.9628, "step": 59869 }, { "epoch": 2.93, "grad_norm": 0.7514426112174988, "learning_rate": 7.144098291612888e-07, "loss": 2.8311, "step": 59870 }, { "epoch": 2.93, "grad_norm": 0.7667511701583862, "learning_rate": 7.133483256730465e-07, "loss": 2.8383, "step": 59871 }, { "epoch": 2.93, "grad_norm": 0.7563773393630981, "learning_rate": 7.122876104482256e-07, "loss": 2.8637, "step": 59872 }, { "epoch": 2.93, "grad_norm": 0.7660837173461914, "learning_rate": 7.112276834896568e-07, "loss": 2.8503, "step": 59873 }, { "epoch": 2.93, "grad_norm": 0.7688391804695129, "learning_rate": 7.101685448001382e-07, "loss": 2.9298, "step": 59874 }, { "epoch": 2.93, "grad_norm": 0.7841348052024841, "learning_rate": 7.091101943824007e-07, "loss": 3.0863, "step": 59875 }, { "epoch": 2.93, "grad_norm": 0.748285174369812, "learning_rate": 7.080526322392754e-07, "loss": 2.9875, "step": 59876 }, { "epoch": 2.93, "grad_norm": 0.7872505784034729, "learning_rate": 7.069958583735602e-07, "loss": 3.2147, "step": 59877 }, { "epoch": 2.93, "grad_norm": 0.7570214867591858, "learning_rate": 7.059398727880194e-07, "loss": 3.0622, "step": 59878 }, { "epoch": 2.93, "grad_norm": 0.9395710825920105, "learning_rate": 7.048846754853843e-07, "loss": 3.0173, "step": 59879 }, { "epoch": 2.93, "grad_norm": 0.7911810874938965, "learning_rate": 7.038302664685192e-07, "loss": 2.8858, "step": 59880 }, { "epoch": 2.93, "grad_norm": 0.7433484792709351, "learning_rate": 7.027766457401551e-07, "loss": 2.9579, "step": 59881 }, { "epoch": 2.93, "grad_norm": 0.7412406206130981, "learning_rate": 7.017238133030901e-07, "loss": 2.8247, "step": 59882 }, { "epoch": 2.93, "grad_norm": 0.742946207523346, "learning_rate": 7.006717691600883e-07, "loss": 2.6349, "step": 59883 }, { "epoch": 2.93, "grad_norm": 0.7795896530151367, "learning_rate": 6.99620513313881e-07, "loss": 2.8827, "step": 59884 }, { "epoch": 2.93, "grad_norm": 0.7590773701667786, "learning_rate": 6.985700457672993e-07, "loss": 2.8935, "step": 59885 }, { "epoch": 2.93, "grad_norm": 0.7604755163192749, "learning_rate": 6.975203665230744e-07, "loss": 3.0704, "step": 59886 }, { "epoch": 2.93, "grad_norm": 0.7391711473464966, "learning_rate": 6.964714755839707e-07, "loss": 2.9104, "step": 59887 }, { "epoch": 2.93, "grad_norm": 0.7480840682983398, "learning_rate": 6.954233729527859e-07, "loss": 2.9565, "step": 59888 }, { "epoch": 2.94, "grad_norm": 0.7480906844139099, "learning_rate": 6.943760586322178e-07, "loss": 2.8197, "step": 59889 }, { "epoch": 2.94, "grad_norm": 0.7429905533790588, "learning_rate": 6.933295326250976e-07, "loss": 2.9929, "step": 59890 }, { "epoch": 2.94, "grad_norm": 0.8021376132965088, "learning_rate": 6.922837949341231e-07, "loss": 2.9372, "step": 59891 }, { "epoch": 2.94, "grad_norm": 0.8098527193069458, "learning_rate": 6.912388455620588e-07, "loss": 3.0109, "step": 59892 }, { "epoch": 2.94, "grad_norm": 0.7434927225112915, "learning_rate": 6.901946845117023e-07, "loss": 2.8893, "step": 59893 }, { "epoch": 2.94, "grad_norm": 0.7502673864364624, "learning_rate": 6.891513117857517e-07, "loss": 2.8706, "step": 59894 }, { "epoch": 2.94, "grad_norm": 0.7409077882766724, "learning_rate": 6.881087273869712e-07, "loss": 2.8049, "step": 59895 }, { "epoch": 2.94, "grad_norm": 0.7963376045227051, "learning_rate": 6.870669313180921e-07, "loss": 2.9638, "step": 59896 }, { "epoch": 2.94, "grad_norm": 0.7674747705459595, "learning_rate": 6.860259235818788e-07, "loss": 2.8135, "step": 59897 }, { "epoch": 2.94, "grad_norm": 0.8309979438781738, "learning_rate": 6.849857041810958e-07, "loss": 2.8022, "step": 59898 }, { "epoch": 2.94, "grad_norm": 0.7403390407562256, "learning_rate": 6.839462731184409e-07, "loss": 2.8496, "step": 59899 }, { "epoch": 2.94, "grad_norm": 0.7429600954055786, "learning_rate": 6.829076303966785e-07, "loss": 3.2261, "step": 59900 }, { "epoch": 2.94, "grad_norm": 0.7548747658729553, "learning_rate": 6.818697760185065e-07, "loss": 2.8569, "step": 59901 }, { "epoch": 2.94, "grad_norm": 0.7258870005607605, "learning_rate": 6.808327099867228e-07, "loss": 2.9454, "step": 59902 }, { "epoch": 2.94, "grad_norm": 0.7824847102165222, "learning_rate": 6.797964323039918e-07, "loss": 3.0032, "step": 59903 }, { "epoch": 2.94, "grad_norm": 0.769087016582489, "learning_rate": 6.787609429730779e-07, "loss": 2.9478, "step": 59904 }, { "epoch": 2.94, "grad_norm": 0.7909684777259827, "learning_rate": 6.777262419967122e-07, "loss": 2.8437, "step": 59905 }, { "epoch": 2.94, "grad_norm": 0.7738947868347168, "learning_rate": 6.766923293775928e-07, "loss": 2.8174, "step": 59906 }, { "epoch": 2.94, "grad_norm": 0.7569680213928223, "learning_rate": 6.756592051184507e-07, "loss": 2.7229, "step": 59907 }, { "epoch": 2.94, "grad_norm": 0.7944665551185608, "learning_rate": 6.746268692220502e-07, "loss": 2.8974, "step": 59908 }, { "epoch": 2.94, "grad_norm": 0.7685111165046692, "learning_rate": 6.735953216910561e-07, "loss": 2.7728, "step": 59909 }, { "epoch": 2.94, "grad_norm": 0.7883465886116028, "learning_rate": 6.725645625281995e-07, "loss": 2.8538, "step": 59910 }, { "epoch": 2.94, "grad_norm": 0.7175097465515137, "learning_rate": 6.715345917362113e-07, "loss": 2.7597, "step": 59911 }, { "epoch": 2.94, "grad_norm": 0.8032987713813782, "learning_rate": 6.705054093177897e-07, "loss": 2.6333, "step": 59912 }, { "epoch": 2.94, "grad_norm": 0.8348165154457092, "learning_rate": 6.694770152756657e-07, "loss": 2.7465, "step": 59913 }, { "epoch": 2.94, "grad_norm": 0.78910893201828, "learning_rate": 6.684494096125037e-07, "loss": 2.8535, "step": 59914 }, { "epoch": 2.94, "grad_norm": 0.7422067523002625, "learning_rate": 6.674225923310683e-07, "loss": 2.9404, "step": 59915 }, { "epoch": 2.94, "grad_norm": 0.8071993589401245, "learning_rate": 6.66396563434024e-07, "loss": 3.0353, "step": 59916 }, { "epoch": 2.94, "grad_norm": 0.7702639698982239, "learning_rate": 6.653713229240686e-07, "loss": 3.0289, "step": 59917 }, { "epoch": 2.94, "grad_norm": 0.7507279515266418, "learning_rate": 6.643468708039335e-07, "loss": 2.84, "step": 59918 }, { "epoch": 2.94, "grad_norm": 0.7871813178062439, "learning_rate": 6.633232070762828e-07, "loss": 2.923, "step": 59919 }, { "epoch": 2.94, "grad_norm": 0.7500841617584229, "learning_rate": 6.62300331743848e-07, "loss": 2.9329, "step": 59920 }, { "epoch": 2.94, "grad_norm": 0.7141773104667664, "learning_rate": 6.612782448092935e-07, "loss": 3.0867, "step": 59921 }, { "epoch": 2.94, "grad_norm": 0.7696955800056458, "learning_rate": 6.602569462753171e-07, "loss": 2.8553, "step": 59922 }, { "epoch": 2.94, "grad_norm": 0.7573153376579285, "learning_rate": 6.5923643614465e-07, "loss": 2.8073, "step": 59923 }, { "epoch": 2.94, "grad_norm": 0.7639854550361633, "learning_rate": 6.582167144198902e-07, "loss": 2.7228, "step": 59924 }, { "epoch": 2.94, "grad_norm": 0.7519087195396423, "learning_rate": 6.571977811038354e-07, "loss": 2.9471, "step": 59925 }, { "epoch": 2.94, "grad_norm": 0.7760326266288757, "learning_rate": 6.561796361990834e-07, "loss": 2.9599, "step": 59926 }, { "epoch": 2.94, "grad_norm": 0.791580855846405, "learning_rate": 6.551622797083322e-07, "loss": 2.843, "step": 59927 }, { "epoch": 2.94, "grad_norm": 0.7381553053855896, "learning_rate": 6.541457116342797e-07, "loss": 2.8464, "step": 59928 }, { "epoch": 2.94, "grad_norm": 0.7461539506912231, "learning_rate": 6.531299319796234e-07, "loss": 2.7977, "step": 59929 }, { "epoch": 2.94, "grad_norm": 0.7161452174186707, "learning_rate": 6.52114940746995e-07, "loss": 3.0583, "step": 59930 }, { "epoch": 2.94, "grad_norm": 0.7718291878700256, "learning_rate": 6.51100737939092e-07, "loss": 2.8127, "step": 59931 }, { "epoch": 2.94, "grad_norm": 0.8032710552215576, "learning_rate": 6.500873235585458e-07, "loss": 2.8657, "step": 59932 }, { "epoch": 2.94, "grad_norm": 0.8120545744895935, "learning_rate": 6.490746976080874e-07, "loss": 2.8896, "step": 59933 }, { "epoch": 2.94, "grad_norm": 0.7528083920478821, "learning_rate": 6.480628600903482e-07, "loss": 2.6816, "step": 59934 }, { "epoch": 2.94, "grad_norm": 0.7332311868667603, "learning_rate": 6.470518110080259e-07, "loss": 2.7482, "step": 59935 }, { "epoch": 2.94, "grad_norm": 0.7682661414146423, "learning_rate": 6.460415503637184e-07, "loss": 2.9757, "step": 59936 }, { "epoch": 2.94, "grad_norm": 0.7852498292922974, "learning_rate": 6.45032078160157e-07, "loss": 2.8943, "step": 59937 }, { "epoch": 2.94, "grad_norm": 0.7649948596954346, "learning_rate": 6.440233943999396e-07, "loss": 3.1078, "step": 59938 }, { "epoch": 2.94, "grad_norm": 0.7823342084884644, "learning_rate": 6.430154990857639e-07, "loss": 2.9817, "step": 59939 }, { "epoch": 2.94, "grad_norm": 0.755197286605835, "learning_rate": 6.420083922202945e-07, "loss": 2.9656, "step": 59940 }, { "epoch": 2.94, "grad_norm": 0.7865545153617859, "learning_rate": 6.410020738061294e-07, "loss": 2.8751, "step": 59941 }, { "epoch": 2.94, "grad_norm": 0.7866171598434448, "learning_rate": 6.399965438459664e-07, "loss": 2.8668, "step": 59942 }, { "epoch": 2.94, "grad_norm": 0.7472968697547913, "learning_rate": 6.389918023424367e-07, "loss": 2.7944, "step": 59943 }, { "epoch": 2.94, "grad_norm": 0.8500422835350037, "learning_rate": 6.379878492982049e-07, "loss": 2.9968, "step": 59944 }, { "epoch": 2.94, "grad_norm": 0.7453992962837219, "learning_rate": 6.369846847158689e-07, "loss": 2.8663, "step": 59945 }, { "epoch": 2.94, "grad_norm": 0.7211142182350159, "learning_rate": 6.359823085981264e-07, "loss": 2.7665, "step": 59946 }, { "epoch": 2.94, "grad_norm": 0.7596474885940552, "learning_rate": 6.349807209476088e-07, "loss": 2.7561, "step": 59947 }, { "epoch": 2.94, "grad_norm": 0.7471470832824707, "learning_rate": 6.339799217668806e-07, "loss": 2.7884, "step": 59948 }, { "epoch": 2.94, "grad_norm": 0.7682774662971497, "learning_rate": 6.329799110586731e-07, "loss": 2.7054, "step": 59949 }, { "epoch": 2.94, "grad_norm": 0.7464492321014404, "learning_rate": 6.31980688825584e-07, "loss": 2.7138, "step": 59950 }, { "epoch": 2.94, "grad_norm": 0.6935986876487732, "learning_rate": 6.309822550702448e-07, "loss": 2.8231, "step": 59951 }, { "epoch": 2.94, "grad_norm": 0.7591943740844727, "learning_rate": 6.299846097952865e-07, "loss": 2.8893, "step": 59952 }, { "epoch": 2.94, "grad_norm": 0.7476579546928406, "learning_rate": 6.289877530033072e-07, "loss": 2.9504, "step": 59953 }, { "epoch": 2.94, "grad_norm": 0.8381856083869934, "learning_rate": 6.279916846969713e-07, "loss": 2.689, "step": 59954 }, { "epoch": 2.94, "grad_norm": 0.779679536819458, "learning_rate": 6.2699640487891e-07, "loss": 2.6562, "step": 59955 }, { "epoch": 2.94, "grad_norm": 0.7561830282211304, "learning_rate": 6.260019135517213e-07, "loss": 2.7303, "step": 59956 }, { "epoch": 2.94, "grad_norm": 0.764441728591919, "learning_rate": 6.250082107180032e-07, "loss": 2.9123, "step": 59957 }, { "epoch": 2.94, "grad_norm": 0.7792513370513916, "learning_rate": 6.240152963804202e-07, "loss": 2.6153, "step": 59958 }, { "epoch": 2.94, "grad_norm": 0.7713596820831299, "learning_rate": 6.230231705415367e-07, "loss": 3.1653, "step": 59959 }, { "epoch": 2.94, "grad_norm": 0.7923070192337036, "learning_rate": 6.220318332040175e-07, "loss": 2.753, "step": 59960 }, { "epoch": 2.94, "grad_norm": 0.7460241317749023, "learning_rate": 6.210412843704605e-07, "loss": 2.9271, "step": 59961 }, { "epoch": 2.94, "grad_norm": 0.768636167049408, "learning_rate": 6.2005152404343e-07, "loss": 2.8513, "step": 59962 }, { "epoch": 2.94, "grad_norm": 0.7382229566574097, "learning_rate": 6.19062552225591e-07, "loss": 2.8161, "step": 59963 }, { "epoch": 2.94, "grad_norm": 0.7440184354782104, "learning_rate": 6.180743689195078e-07, "loss": 2.9522, "step": 59964 }, { "epoch": 2.94, "grad_norm": 0.7206867337226868, "learning_rate": 6.170869741278117e-07, "loss": 2.7925, "step": 59965 }, { "epoch": 2.94, "grad_norm": 0.7849396467208862, "learning_rate": 6.161003678531006e-07, "loss": 3.0001, "step": 59966 }, { "epoch": 2.94, "grad_norm": 0.7822026610374451, "learning_rate": 6.151145500979726e-07, "loss": 3.0031, "step": 59967 }, { "epoch": 2.94, "grad_norm": 0.7590682506561279, "learning_rate": 6.141295208649921e-07, "loss": 2.9123, "step": 59968 }, { "epoch": 2.94, "grad_norm": 0.8161299824714661, "learning_rate": 6.131452801567571e-07, "loss": 2.7404, "step": 59969 }, { "epoch": 2.94, "grad_norm": 0.7368728518486023, "learning_rate": 6.121618279758989e-07, "loss": 2.7845, "step": 59970 }, { "epoch": 2.94, "grad_norm": 0.7626436352729797, "learning_rate": 6.111791643250152e-07, "loss": 2.8567, "step": 59971 }, { "epoch": 2.94, "grad_norm": 0.7329202890396118, "learning_rate": 6.101972892066376e-07, "loss": 2.7161, "step": 59972 }, { "epoch": 2.94, "grad_norm": 0.7684486508369446, "learning_rate": 6.092162026233971e-07, "loss": 2.9794, "step": 59973 }, { "epoch": 2.94, "grad_norm": 0.769387423992157, "learning_rate": 6.082359045778251e-07, "loss": 2.7875, "step": 59974 }, { "epoch": 2.94, "grad_norm": 0.7294189929962158, "learning_rate": 6.072563950725862e-07, "loss": 2.8859, "step": 59975 }, { "epoch": 2.94, "grad_norm": 0.7302929759025574, "learning_rate": 6.062776741101783e-07, "loss": 2.7874, "step": 59976 }, { "epoch": 2.94, "grad_norm": 0.7487363815307617, "learning_rate": 6.052997416932326e-07, "loss": 2.9571, "step": 59977 }, { "epoch": 2.94, "grad_norm": 0.7794578075408936, "learning_rate": 6.043225978243138e-07, "loss": 2.9774, "step": 59978 }, { "epoch": 2.94, "grad_norm": 0.7639981508255005, "learning_rate": 6.033462425059865e-07, "loss": 2.8534, "step": 59979 }, { "epoch": 2.94, "grad_norm": 0.7297922968864441, "learning_rate": 6.023706757408487e-07, "loss": 2.799, "step": 59980 }, { "epoch": 2.94, "grad_norm": 0.7623937726020813, "learning_rate": 6.013958975313982e-07, "loss": 2.9303, "step": 59981 }, { "epoch": 2.94, "grad_norm": 0.7628301382064819, "learning_rate": 6.004219078802663e-07, "loss": 2.7251, "step": 59982 }, { "epoch": 2.94, "grad_norm": 0.7557390928268433, "learning_rate": 5.994487067900178e-07, "loss": 2.8481, "step": 59983 }, { "epoch": 2.94, "grad_norm": 0.7709507346153259, "learning_rate": 5.984762942631838e-07, "loss": 2.871, "step": 59984 }, { "epoch": 2.94, "grad_norm": 0.718964159488678, "learning_rate": 5.975046703023623e-07, "loss": 2.7692, "step": 59985 }, { "epoch": 2.94, "grad_norm": 0.7481675744056702, "learning_rate": 5.965338349100845e-07, "loss": 2.899, "step": 59986 }, { "epoch": 2.94, "grad_norm": 0.7227299809455872, "learning_rate": 5.955637880889153e-07, "loss": 2.7444, "step": 59987 }, { "epoch": 2.94, "grad_norm": 0.7137525677680969, "learning_rate": 5.945945298413856e-07, "loss": 2.8117, "step": 59988 }, { "epoch": 2.94, "grad_norm": 0.7731427550315857, "learning_rate": 5.936260601700937e-07, "loss": 2.776, "step": 59989 }, { "epoch": 2.94, "grad_norm": 0.7331191897392273, "learning_rate": 5.926583790775707e-07, "loss": 3.0075, "step": 59990 }, { "epoch": 2.94, "grad_norm": 0.7784517407417297, "learning_rate": 5.916914865663481e-07, "loss": 2.8882, "step": 59991 }, { "epoch": 2.94, "grad_norm": 0.847660481929779, "learning_rate": 5.907253826390234e-07, "loss": 2.9467, "step": 59992 }, { "epoch": 2.94, "grad_norm": 0.7578979730606079, "learning_rate": 5.897600672980618e-07, "loss": 2.915, "step": 59993 }, { "epoch": 2.94, "grad_norm": 0.754513144493103, "learning_rate": 5.88795540546061e-07, "loss": 2.7695, "step": 59994 }, { "epoch": 2.94, "grad_norm": 0.7934916615486145, "learning_rate": 5.878318023855855e-07, "loss": 2.6653, "step": 59995 }, { "epoch": 2.94, "grad_norm": 0.7949754595756531, "learning_rate": 5.868688528191002e-07, "loss": 2.8544, "step": 59996 }, { "epoch": 2.94, "grad_norm": 0.7312231659889221, "learning_rate": 5.859066918492028e-07, "loss": 2.908, "step": 59997 }, { "epoch": 2.94, "grad_norm": 0.7361305952072144, "learning_rate": 5.849453194783915e-07, "loss": 2.8526, "step": 59998 }, { "epoch": 2.94, "grad_norm": 0.7572147250175476, "learning_rate": 5.839847357091975e-07, "loss": 2.8984, "step": 59999 }, { "epoch": 2.94, "grad_norm": 0.793131411075592, "learning_rate": 5.830249405441855e-07, "loss": 2.9666, "step": 60000 }, { "epoch": 2.94, "grad_norm": 0.7692931294441223, "learning_rate": 5.820659339858536e-07, "loss": 3.1132, "step": 60001 }, { "epoch": 2.94, "grad_norm": 0.7415686249732971, "learning_rate": 5.811077160367328e-07, "loss": 2.9689, "step": 60002 }, { "epoch": 2.94, "grad_norm": 0.704154372215271, "learning_rate": 5.801502866993546e-07, "loss": 2.8167, "step": 60003 }, { "epoch": 2.94, "grad_norm": 0.7278680205345154, "learning_rate": 5.791936459762169e-07, "loss": 2.8319, "step": 60004 }, { "epoch": 2.94, "grad_norm": 0.7455959320068359, "learning_rate": 5.782377938698845e-07, "loss": 3.024, "step": 60005 }, { "epoch": 2.94, "grad_norm": 0.7692423462867737, "learning_rate": 5.77282730382822e-07, "loss": 2.9902, "step": 60006 }, { "epoch": 2.94, "grad_norm": 0.7454026937484741, "learning_rate": 5.76328455517594e-07, "loss": 2.8497, "step": 60007 }, { "epoch": 2.94, "grad_norm": 0.7537476420402527, "learning_rate": 5.753749692766984e-07, "loss": 2.8039, "step": 60008 }, { "epoch": 2.94, "grad_norm": 0.7972829937934875, "learning_rate": 5.744222716626334e-07, "loss": 2.9203, "step": 60009 }, { "epoch": 2.94, "grad_norm": 0.8087637424468994, "learning_rate": 5.734703626778969e-07, "loss": 2.9301, "step": 60010 }, { "epoch": 2.94, "grad_norm": 0.7381844520568848, "learning_rate": 5.725192423250203e-07, "loss": 2.8868, "step": 60011 }, { "epoch": 2.94, "grad_norm": 0.7417834997177124, "learning_rate": 5.715689106065346e-07, "loss": 2.7835, "step": 60012 }, { "epoch": 2.94, "grad_norm": 0.7412571310997009, "learning_rate": 5.706193675248716e-07, "loss": 3.0335, "step": 60013 }, { "epoch": 2.94, "grad_norm": 0.7355944514274597, "learning_rate": 5.696706130825624e-07, "loss": 2.7241, "step": 60014 }, { "epoch": 2.94, "grad_norm": 0.738745927810669, "learning_rate": 5.687226472821383e-07, "loss": 2.7394, "step": 60015 }, { "epoch": 2.94, "grad_norm": 0.6923105716705322, "learning_rate": 5.67775470126064e-07, "loss": 2.9196, "step": 60016 }, { "epoch": 2.94, "grad_norm": 0.7854421734809875, "learning_rate": 5.668290816168708e-07, "loss": 2.734, "step": 60017 }, { "epoch": 2.94, "grad_norm": 0.8272637128829956, "learning_rate": 5.658834817569901e-07, "loss": 2.9645, "step": 60018 }, { "epoch": 2.94, "grad_norm": 0.72818922996521, "learning_rate": 5.649386705489533e-07, "loss": 2.9149, "step": 60019 }, { "epoch": 2.94, "grad_norm": 0.7304298877716064, "learning_rate": 5.63994647995225e-07, "loss": 2.7626, "step": 60020 }, { "epoch": 2.94, "grad_norm": 0.8027445673942566, "learning_rate": 5.630514140983366e-07, "loss": 2.9628, "step": 60021 }, { "epoch": 2.94, "grad_norm": 0.719323456287384, "learning_rate": 5.621089688607194e-07, "loss": 2.6681, "step": 60022 }, { "epoch": 2.94, "grad_norm": 0.806006669998169, "learning_rate": 5.611673122849048e-07, "loss": 2.7381, "step": 60023 }, { "epoch": 2.94, "grad_norm": 0.7545458078384399, "learning_rate": 5.60226444373324e-07, "loss": 2.8362, "step": 60024 }, { "epoch": 2.94, "grad_norm": 0.8001763224601746, "learning_rate": 5.592863651285084e-07, "loss": 3.0595, "step": 60025 }, { "epoch": 2.94, "grad_norm": 0.7519499659538269, "learning_rate": 5.583470745528562e-07, "loss": 2.7645, "step": 60026 }, { "epoch": 2.94, "grad_norm": 0.7754395604133606, "learning_rate": 5.574085726489319e-07, "loss": 2.7446, "step": 60027 }, { "epoch": 2.94, "grad_norm": 0.7979375123977661, "learning_rate": 5.564708594191337e-07, "loss": 3.0058, "step": 60028 }, { "epoch": 2.94, "grad_norm": 0.7920042872428894, "learning_rate": 5.555339348659926e-07, "loss": 2.7863, "step": 60029 }, { "epoch": 2.94, "grad_norm": 0.7542409300804138, "learning_rate": 5.54597798991907e-07, "loss": 2.8707, "step": 60030 }, { "epoch": 2.94, "grad_norm": 0.7596069574356079, "learning_rate": 5.536624517994081e-07, "loss": 2.9896, "step": 60031 }, { "epoch": 2.94, "grad_norm": 0.7304089665412903, "learning_rate": 5.527278932909274e-07, "loss": 2.6327, "step": 60032 }, { "epoch": 2.94, "grad_norm": 0.7943688631057739, "learning_rate": 5.517941234689294e-07, "loss": 2.9162, "step": 60033 }, { "epoch": 2.94, "grad_norm": 0.7383855581283569, "learning_rate": 5.508611423358789e-07, "loss": 2.8657, "step": 60034 }, { "epoch": 2.94, "grad_norm": 0.717839777469635, "learning_rate": 5.499289498942405e-07, "loss": 3.0219, "step": 60035 }, { "epoch": 2.94, "grad_norm": 0.8198238015174866, "learning_rate": 5.489975461464458e-07, "loss": 2.9683, "step": 60036 }, { "epoch": 2.94, "grad_norm": 0.7249385118484497, "learning_rate": 5.480669310949593e-07, "loss": 2.7602, "step": 60037 }, { "epoch": 2.94, "grad_norm": 0.771135687828064, "learning_rate": 5.471371047422457e-07, "loss": 3.0002, "step": 60038 }, { "epoch": 2.94, "grad_norm": 0.759382426738739, "learning_rate": 5.462080670907365e-07, "loss": 2.7144, "step": 60039 }, { "epoch": 2.94, "grad_norm": 0.8611246943473816, "learning_rate": 5.452798181428631e-07, "loss": 2.7942, "step": 60040 }, { "epoch": 2.94, "grad_norm": 0.7444459795951843, "learning_rate": 5.443523579011233e-07, "loss": 3.0667, "step": 60041 }, { "epoch": 2.94, "grad_norm": 0.7149555683135986, "learning_rate": 5.434256863679154e-07, "loss": 2.9428, "step": 60042 }, { "epoch": 2.94, "grad_norm": 0.7191066741943359, "learning_rate": 5.424998035456707e-07, "loss": 2.6826, "step": 60043 }, { "epoch": 2.94, "grad_norm": 0.7511018514633179, "learning_rate": 5.415747094368872e-07, "loss": 2.7892, "step": 60044 }, { "epoch": 2.94, "grad_norm": 0.7677654027938843, "learning_rate": 5.40650404043963e-07, "loss": 2.8827, "step": 60045 }, { "epoch": 2.94, "grad_norm": 0.7885034084320068, "learning_rate": 5.397268873692961e-07, "loss": 2.6797, "step": 60046 }, { "epoch": 2.94, "grad_norm": 0.7618846893310547, "learning_rate": 5.388041594153847e-07, "loss": 3.108, "step": 60047 }, { "epoch": 2.94, "grad_norm": 0.7315735816955566, "learning_rate": 5.378822201846267e-07, "loss": 2.8573, "step": 60048 }, { "epoch": 2.94, "grad_norm": 0.7194553017616272, "learning_rate": 5.369610696794535e-07, "loss": 2.9314, "step": 60049 }, { "epoch": 2.94, "grad_norm": 0.712523341178894, "learning_rate": 5.360407079022966e-07, "loss": 2.717, "step": 60050 }, { "epoch": 2.94, "grad_norm": 0.725117027759552, "learning_rate": 5.35121134855554e-07, "loss": 2.6336, "step": 60051 }, { "epoch": 2.94, "grad_norm": 0.7298504114151001, "learning_rate": 5.342023505416903e-07, "loss": 2.9929, "step": 60052 }, { "epoch": 2.94, "grad_norm": 1.2413877248764038, "learning_rate": 5.332843549631038e-07, "loss": 2.911, "step": 60053 }, { "epoch": 2.94, "grad_norm": 0.7253249883651733, "learning_rate": 5.323671481222258e-07, "loss": 2.8556, "step": 60054 }, { "epoch": 2.94, "grad_norm": 0.7611720561981201, "learning_rate": 5.31450730021421e-07, "loss": 2.8577, "step": 60055 }, { "epoch": 2.94, "grad_norm": 0.7636274695396423, "learning_rate": 5.305351006631542e-07, "loss": 2.7728, "step": 60056 }, { "epoch": 2.94, "grad_norm": 0.7457626461982727, "learning_rate": 5.296202600498567e-07, "loss": 2.8102, "step": 60057 }, { "epoch": 2.94, "grad_norm": 0.7568612098693848, "learning_rate": 5.2870620818386e-07, "loss": 2.9234, "step": 60058 }, { "epoch": 2.94, "grad_norm": 0.7343557476997375, "learning_rate": 5.277929450676288e-07, "loss": 2.979, "step": 60059 }, { "epoch": 2.94, "grad_norm": 0.7438076734542847, "learning_rate": 5.268804707035946e-07, "loss": 3.08, "step": 60060 }, { "epoch": 2.94, "grad_norm": 0.7437184453010559, "learning_rate": 5.259687850940886e-07, "loss": 2.7556, "step": 60061 }, { "epoch": 2.94, "grad_norm": 0.7388575077056885, "learning_rate": 5.250578882415423e-07, "loss": 2.9538, "step": 60062 }, { "epoch": 2.94, "grad_norm": 0.7527971267700195, "learning_rate": 5.24147780148354e-07, "loss": 2.9749, "step": 60063 }, { "epoch": 2.94, "grad_norm": 0.7302408814430237, "learning_rate": 5.232384608169549e-07, "loss": 2.6886, "step": 60064 }, { "epoch": 2.94, "grad_norm": 0.7744280695915222, "learning_rate": 5.223299302496764e-07, "loss": 2.6481, "step": 60065 }, { "epoch": 2.94, "grad_norm": 0.72216796875, "learning_rate": 5.2142218844895e-07, "loss": 2.9079, "step": 60066 }, { "epoch": 2.94, "grad_norm": 0.7193196415901184, "learning_rate": 5.205152354171738e-07, "loss": 2.98, "step": 60067 }, { "epoch": 2.94, "grad_norm": 0.7564364075660706, "learning_rate": 5.196090711567457e-07, "loss": 2.8694, "step": 60068 }, { "epoch": 2.94, "grad_norm": 0.7751148343086243, "learning_rate": 5.187036956699975e-07, "loss": 2.8992, "step": 60069 }, { "epoch": 2.94, "grad_norm": 0.7742836475372314, "learning_rate": 5.177991089593603e-07, "loss": 2.8661, "step": 60070 }, { "epoch": 2.94, "grad_norm": 0.7468364834785461, "learning_rate": 5.16895311027199e-07, "loss": 2.8607, "step": 60071 }, { "epoch": 2.94, "grad_norm": 0.7487417459487915, "learning_rate": 5.159923018759115e-07, "loss": 2.8908, "step": 60072 }, { "epoch": 2.94, "grad_norm": 0.7274312376976013, "learning_rate": 5.150900815078296e-07, "loss": 2.9371, "step": 60073 }, { "epoch": 2.94, "grad_norm": 0.7443316578865051, "learning_rate": 5.141886499253844e-07, "loss": 2.9809, "step": 60074 }, { "epoch": 2.94, "grad_norm": 0.7913233041763306, "learning_rate": 5.132880071309408e-07, "loss": 2.9794, "step": 60075 }, { "epoch": 2.94, "grad_norm": 0.763105034828186, "learning_rate": 5.123881531268637e-07, "loss": 2.7934, "step": 60076 }, { "epoch": 2.94, "grad_norm": 0.7337465882301331, "learning_rate": 5.114890879155176e-07, "loss": 2.7711, "step": 60077 }, { "epoch": 2.94, "grad_norm": 0.785071849822998, "learning_rate": 5.105908114992341e-07, "loss": 2.9069, "step": 60078 }, { "epoch": 2.94, "grad_norm": 0.7351061701774597, "learning_rate": 5.096933238804446e-07, "loss": 2.5839, "step": 60079 }, { "epoch": 2.94, "grad_norm": 0.7270272970199585, "learning_rate": 5.087966250614806e-07, "loss": 2.8432, "step": 60080 }, { "epoch": 2.94, "grad_norm": 0.8075821399688721, "learning_rate": 5.079007150447067e-07, "loss": 2.7532, "step": 60081 }, { "epoch": 2.94, "grad_norm": 0.7147870659828186, "learning_rate": 5.07005593832488e-07, "loss": 3.1726, "step": 60082 }, { "epoch": 2.94, "grad_norm": 0.772592306137085, "learning_rate": 5.06111261427189e-07, "loss": 3.0678, "step": 60083 }, { "epoch": 2.94, "grad_norm": 0.7498898506164551, "learning_rate": 5.052177178311412e-07, "loss": 2.8506, "step": 60084 }, { "epoch": 2.94, "grad_norm": 0.7638654708862305, "learning_rate": 5.043249630467094e-07, "loss": 3.0761, "step": 60085 }, { "epoch": 2.94, "grad_norm": 0.7409616112709045, "learning_rate": 5.034329970762586e-07, "loss": 2.9633, "step": 60086 }, { "epoch": 2.94, "grad_norm": 0.7529875040054321, "learning_rate": 5.025418199220866e-07, "loss": 2.938, "step": 60087 }, { "epoch": 2.94, "grad_norm": 0.7796822190284729, "learning_rate": 5.01651431586625e-07, "loss": 2.8388, "step": 60088 }, { "epoch": 2.94, "grad_norm": 0.7700410485267639, "learning_rate": 5.007618320721718e-07, "loss": 2.8562, "step": 60089 }, { "epoch": 2.94, "grad_norm": 0.7736303210258484, "learning_rate": 4.998730213810587e-07, "loss": 2.8441, "step": 60090 }, { "epoch": 2.94, "grad_norm": 0.7461416721343994, "learning_rate": 4.989849995156503e-07, "loss": 3.0117, "step": 60091 }, { "epoch": 2.94, "grad_norm": 0.7124243974685669, "learning_rate": 4.980977664782448e-07, "loss": 3.0132, "step": 60092 }, { "epoch": 2.95, "grad_norm": 0.7641018629074097, "learning_rate": 4.972113222712404e-07, "loss": 3.0398, "step": 60093 }, { "epoch": 2.95, "grad_norm": 0.763166069984436, "learning_rate": 4.963256668969351e-07, "loss": 2.9907, "step": 60094 }, { "epoch": 2.95, "grad_norm": 0.7739465236663818, "learning_rate": 4.954408003576271e-07, "loss": 2.6657, "step": 60095 }, { "epoch": 2.95, "grad_norm": 0.7320348024368286, "learning_rate": 4.945567226557478e-07, "loss": 2.9288, "step": 60096 }, { "epoch": 2.95, "grad_norm": 0.8021144270896912, "learning_rate": 4.936734337935289e-07, "loss": 2.7831, "step": 60097 }, { "epoch": 2.95, "grad_norm": 0.6665416359901428, "learning_rate": 4.927909337733349e-07, "loss": 2.8185, "step": 60098 }, { "epoch": 2.95, "grad_norm": 0.737612783908844, "learning_rate": 4.919092225975307e-07, "loss": 2.7675, "step": 60099 }, { "epoch": 2.95, "grad_norm": 0.7620866894721985, "learning_rate": 4.91028300268348e-07, "loss": 2.9269, "step": 60100 }, { "epoch": 2.95, "grad_norm": 0.7583200335502625, "learning_rate": 4.901481667881846e-07, "loss": 2.9917, "step": 60101 }, { "epoch": 2.95, "grad_norm": 0.7368113398551941, "learning_rate": 4.892688221593055e-07, "loss": 2.9468, "step": 60102 }, { "epoch": 2.95, "grad_norm": 0.754863440990448, "learning_rate": 4.883902663840422e-07, "loss": 2.7378, "step": 60103 }, { "epoch": 2.95, "grad_norm": 0.793886125087738, "learning_rate": 4.875124994647595e-07, "loss": 3.0714, "step": 60104 }, { "epoch": 2.95, "grad_norm": 0.7754970192909241, "learning_rate": 4.866355214036887e-07, "loss": 2.759, "step": 60105 }, { "epoch": 2.95, "grad_norm": 0.7628046870231628, "learning_rate": 4.85759332203195e-07, "loss": 2.9111, "step": 60106 }, { "epoch": 2.95, "grad_norm": 0.7230570912361145, "learning_rate": 4.848839318655429e-07, "loss": 2.9432, "step": 60107 }, { "epoch": 2.95, "grad_norm": 0.7568901777267456, "learning_rate": 4.840093203930973e-07, "loss": 2.9762, "step": 60108 }, { "epoch": 2.95, "grad_norm": 0.772777259349823, "learning_rate": 4.83135497788123e-07, "loss": 3.0028, "step": 60109 }, { "epoch": 2.95, "grad_norm": 0.7773007750511169, "learning_rate": 4.822624640529182e-07, "loss": 2.7886, "step": 60110 }, { "epoch": 2.95, "grad_norm": 0.7324758768081665, "learning_rate": 4.813902191897812e-07, "loss": 2.8291, "step": 60111 }, { "epoch": 2.95, "grad_norm": 0.7719749808311462, "learning_rate": 4.805187632010432e-07, "loss": 2.7614, "step": 60112 }, { "epoch": 2.95, "grad_norm": 0.7728280425071716, "learning_rate": 4.796480960889359e-07, "loss": 2.9761, "step": 60113 }, { "epoch": 2.95, "grad_norm": 0.7225713133811951, "learning_rate": 4.787782178558241e-07, "loss": 2.757, "step": 60114 }, { "epoch": 2.95, "grad_norm": 0.7266071438789368, "learning_rate": 4.779091285039726e-07, "loss": 3.0808, "step": 60115 }, { "epoch": 2.95, "grad_norm": 0.7436891198158264, "learning_rate": 4.770408280356464e-07, "loss": 2.7847, "step": 60116 }, { "epoch": 2.95, "grad_norm": 0.7610815167427063, "learning_rate": 4.761733164531434e-07, "loss": 3.0614, "step": 60117 }, { "epoch": 2.95, "grad_norm": 0.7271731495857239, "learning_rate": 4.753065937587619e-07, "loss": 2.7171, "step": 60118 }, { "epoch": 2.95, "grad_norm": 0.7084574103355408, "learning_rate": 4.7444065995480006e-07, "loss": 2.8051, "step": 60119 }, { "epoch": 2.95, "grad_norm": 0.7272463440895081, "learning_rate": 4.7357551504348944e-07, "loss": 2.9871, "step": 60120 }, { "epoch": 2.95, "grad_norm": 0.7564078569412231, "learning_rate": 4.727111590271615e-07, "loss": 3.0835, "step": 60121 }, { "epoch": 2.95, "grad_norm": 0.7226554155349731, "learning_rate": 4.718475919080478e-07, "loss": 3.0259, "step": 60122 }, { "epoch": 2.95, "grad_norm": 0.747903048992157, "learning_rate": 4.7098481368844645e-07, "loss": 2.9517, "step": 60123 }, { "epoch": 2.95, "grad_norm": 0.7668612599372864, "learning_rate": 4.7012282437062234e-07, "loss": 2.9348, "step": 60124 }, { "epoch": 2.95, "grad_norm": 0.7929732203483582, "learning_rate": 4.692616239568403e-07, "loss": 3.0129, "step": 60125 }, { "epoch": 2.95, "grad_norm": 0.7587389945983887, "learning_rate": 4.6840121244939856e-07, "loss": 2.7101, "step": 60126 }, { "epoch": 2.95, "grad_norm": 0.7391680479049683, "learning_rate": 4.675415898505286e-07, "loss": 2.7574, "step": 60127 }, { "epoch": 2.95, "grad_norm": 0.827909529209137, "learning_rate": 4.6668275616249526e-07, "loss": 2.8657, "step": 60128 }, { "epoch": 2.95, "grad_norm": 0.7175806164741516, "learning_rate": 4.658247113875968e-07, "loss": 2.9622, "step": 60129 }, { "epoch": 2.95, "grad_norm": 0.7418898940086365, "learning_rate": 4.649674555280647e-07, "loss": 3.021, "step": 60130 }, { "epoch": 2.95, "grad_norm": 0.7666599154472351, "learning_rate": 4.641109885861638e-07, "loss": 2.723, "step": 60131 }, { "epoch": 2.95, "grad_norm": 0.775105357170105, "learning_rate": 4.632553105641257e-07, "loss": 2.871, "step": 60132 }, { "epoch": 2.95, "grad_norm": 0.7589561939239502, "learning_rate": 4.624004214642485e-07, "loss": 2.9596, "step": 60133 }, { "epoch": 2.95, "grad_norm": 0.7122417092323303, "learning_rate": 4.615463212887638e-07, "loss": 2.7842, "step": 60134 }, { "epoch": 2.95, "grad_norm": 0.7434190511703491, "learning_rate": 4.6069301003990313e-07, "loss": 2.9867, "step": 60135 }, { "epoch": 2.95, "grad_norm": 0.8169651627540588, "learning_rate": 4.598404877199646e-07, "loss": 2.8957, "step": 60136 }, { "epoch": 2.95, "grad_norm": 0.7982842922210693, "learning_rate": 4.5898875433111327e-07, "loss": 2.9529, "step": 60137 }, { "epoch": 2.95, "grad_norm": 0.7457339763641357, "learning_rate": 4.581378098756472e-07, "loss": 2.8845, "step": 60138 }, { "epoch": 2.95, "grad_norm": 0.7173131704330444, "learning_rate": 4.5728765435583124e-07, "loss": 2.9729, "step": 60139 }, { "epoch": 2.95, "grad_norm": 0.791340708732605, "learning_rate": 4.564382877738304e-07, "loss": 2.6271, "step": 60140 }, { "epoch": 2.95, "grad_norm": 0.7958931922912598, "learning_rate": 4.5558971013194277e-07, "loss": 2.7756, "step": 60141 }, { "epoch": 2.95, "grad_norm": 0.7099641561508179, "learning_rate": 4.547419214324e-07, "loss": 2.8837, "step": 60142 }, { "epoch": 2.95, "grad_norm": 0.7637771368026733, "learning_rate": 4.5389492167736685e-07, "loss": 2.9816, "step": 60143 }, { "epoch": 2.95, "grad_norm": 0.7536028027534485, "learning_rate": 4.5304871086917495e-07, "loss": 2.8259, "step": 60144 }, { "epoch": 2.95, "grad_norm": 0.8726342916488647, "learning_rate": 4.5220328900998917e-07, "loss": 3.1647, "step": 60145 }, { "epoch": 2.95, "grad_norm": 0.7555391192436218, "learning_rate": 4.5135865610204103e-07, "loss": 2.7219, "step": 60146 }, { "epoch": 2.95, "grad_norm": 0.7423574328422546, "learning_rate": 4.5051481214759545e-07, "loss": 2.8423, "step": 60147 }, { "epoch": 2.95, "grad_norm": 0.7736955285072327, "learning_rate": 4.4967175714881733e-07, "loss": 2.8912, "step": 60148 }, { "epoch": 2.95, "grad_norm": 0.7960270643234253, "learning_rate": 4.488294911079382e-07, "loss": 3.2692, "step": 60149 }, { "epoch": 2.95, "grad_norm": 0.7632052898406982, "learning_rate": 4.47988014027223e-07, "loss": 2.9191, "step": 60150 }, { "epoch": 2.95, "grad_norm": 0.7297966480255127, "learning_rate": 4.471473259088365e-07, "loss": 2.8145, "step": 60151 }, { "epoch": 2.95, "grad_norm": 0.707733154296875, "learning_rate": 4.463074267550104e-07, "loss": 2.9402, "step": 60152 }, { "epoch": 2.95, "grad_norm": 0.741661012172699, "learning_rate": 4.454683165679762e-07, "loss": 2.7414, "step": 60153 }, { "epoch": 2.95, "grad_norm": 0.7709798812866211, "learning_rate": 4.4462999534993214e-07, "loss": 2.7895, "step": 60154 }, { "epoch": 2.95, "grad_norm": 0.7329643964767456, "learning_rate": 4.4379246310307646e-07, "loss": 2.8292, "step": 60155 }, { "epoch": 2.95, "grad_norm": 0.7478441596031189, "learning_rate": 4.4295571982964074e-07, "loss": 3.0081, "step": 60156 }, { "epoch": 2.95, "grad_norm": 0.7324329614639282, "learning_rate": 4.421197655317565e-07, "loss": 2.9113, "step": 60157 }, { "epoch": 2.95, "grad_norm": 0.7825899720191956, "learning_rate": 4.412846002117221e-07, "loss": 2.8504, "step": 60158 }, { "epoch": 2.95, "grad_norm": 0.7741754055023193, "learning_rate": 4.4045022387166897e-07, "loss": 2.8648, "step": 60159 }, { "epoch": 2.95, "grad_norm": 0.7594071626663208, "learning_rate": 4.396166365138287e-07, "loss": 3.0879, "step": 60160 }, { "epoch": 2.95, "grad_norm": 0.7644873261451721, "learning_rate": 4.3878383814039964e-07, "loss": 2.9545, "step": 60161 }, { "epoch": 2.95, "grad_norm": 0.7412201762199402, "learning_rate": 4.379518287535466e-07, "loss": 2.967, "step": 60162 }, { "epoch": 2.95, "grad_norm": 0.7475193738937378, "learning_rate": 4.3712060835550123e-07, "loss": 2.8789, "step": 60163 }, { "epoch": 2.95, "grad_norm": 0.7200668454170227, "learning_rate": 4.3629017694839505e-07, "loss": 2.7571, "step": 60164 }, { "epoch": 2.95, "grad_norm": 0.7569831013679504, "learning_rate": 4.354605345344597e-07, "loss": 2.8523, "step": 60165 }, { "epoch": 2.95, "grad_norm": 0.7711689472198486, "learning_rate": 4.346316811158601e-07, "loss": 2.9118, "step": 60166 }, { "epoch": 2.95, "grad_norm": 0.7586641311645508, "learning_rate": 4.3380361669479447e-07, "loss": 2.9045, "step": 60167 }, { "epoch": 2.95, "grad_norm": 0.7284442782402039, "learning_rate": 4.3297634127346106e-07, "loss": 2.7352, "step": 60168 }, { "epoch": 2.95, "grad_norm": 0.8342050909996033, "learning_rate": 4.3214985485399144e-07, "loss": 2.7207, "step": 60169 }, { "epoch": 2.95, "grad_norm": 0.749860405921936, "learning_rate": 4.3132415743858394e-07, "loss": 2.7941, "step": 60170 }, { "epoch": 2.95, "grad_norm": 0.7597050070762634, "learning_rate": 4.3049924902940345e-07, "loss": 2.8716, "step": 60171 }, { "epoch": 2.95, "grad_norm": 0.7795093059539795, "learning_rate": 4.2967512962864825e-07, "loss": 2.7667, "step": 60172 }, { "epoch": 2.95, "grad_norm": 0.7680733799934387, "learning_rate": 4.2885179923848323e-07, "loss": 2.8817, "step": 60173 }, { "epoch": 2.95, "grad_norm": 0.780337929725647, "learning_rate": 4.280292578610733e-07, "loss": 2.7562, "step": 60174 }, { "epoch": 2.95, "grad_norm": 0.7037748694419861, "learning_rate": 4.272075054985835e-07, "loss": 2.8465, "step": 60175 }, { "epoch": 2.95, "grad_norm": 0.7473371028900146, "learning_rate": 4.263865421531454e-07, "loss": 3.058, "step": 60176 }, { "epoch": 2.95, "grad_norm": 0.766514778137207, "learning_rate": 4.255663678269905e-07, "loss": 3.0962, "step": 60177 }, { "epoch": 2.95, "grad_norm": 0.705461859703064, "learning_rate": 4.2474698252221715e-07, "loss": 2.9497, "step": 60178 }, { "epoch": 2.95, "grad_norm": 0.7880750298500061, "learning_rate": 4.2392838624099033e-07, "loss": 2.8343, "step": 60179 }, { "epoch": 2.95, "grad_norm": 0.8290342092514038, "learning_rate": 4.231105789855083e-07, "loss": 2.8913, "step": 60180 }, { "epoch": 2.95, "grad_norm": 0.8501545190811157, "learning_rate": 4.2229356075786923e-07, "loss": 2.9309, "step": 60181 }, { "epoch": 2.95, "grad_norm": 0.7908932566642761, "learning_rate": 4.214773315602715e-07, "loss": 2.9298, "step": 60182 }, { "epoch": 2.95, "grad_norm": 0.7290691137313843, "learning_rate": 4.206618913948467e-07, "loss": 2.7574, "step": 60183 }, { "epoch": 2.95, "grad_norm": 0.7517605423927307, "learning_rate": 4.198472402637598e-07, "loss": 2.9407, "step": 60184 }, { "epoch": 2.95, "grad_norm": 0.7302461862564087, "learning_rate": 4.190333781691424e-07, "loss": 2.9164, "step": 60185 }, { "epoch": 2.95, "grad_norm": 0.7860396504402161, "learning_rate": 4.182203051130928e-07, "loss": 2.8992, "step": 60186 }, { "epoch": 2.95, "grad_norm": 0.7854883670806885, "learning_rate": 4.174080210978426e-07, "loss": 3.107, "step": 60187 }, { "epoch": 2.95, "grad_norm": 0.7467091083526611, "learning_rate": 4.165965261254567e-07, "loss": 2.832, "step": 60188 }, { "epoch": 2.95, "grad_norm": 0.7421848773956299, "learning_rate": 4.157858201981001e-07, "loss": 2.874, "step": 60189 }, { "epoch": 2.95, "grad_norm": 0.7487695217132568, "learning_rate": 4.1497590331790453e-07, "loss": 2.6692, "step": 60190 }, { "epoch": 2.95, "grad_norm": 0.7340686321258545, "learning_rate": 4.141667754870348e-07, "loss": 2.8941, "step": 60191 }, { "epoch": 2.95, "grad_norm": 0.7664749622344971, "learning_rate": 4.13358436707556e-07, "loss": 2.8168, "step": 60192 }, { "epoch": 2.95, "grad_norm": 0.7620031833648682, "learning_rate": 4.1255088698166625e-07, "loss": 3.0271, "step": 60193 }, { "epoch": 2.95, "grad_norm": 0.7645248174667358, "learning_rate": 4.117441263114307e-07, "loss": 2.8387, "step": 60194 }, { "epoch": 2.95, "grad_norm": 0.7493310570716858, "learning_rate": 4.1093815469901423e-07, "loss": 2.8177, "step": 60195 }, { "epoch": 2.95, "grad_norm": 0.7595289349555969, "learning_rate": 4.101329721465485e-07, "loss": 3.058, "step": 60196 }, { "epoch": 2.95, "grad_norm": 0.7337210774421692, "learning_rate": 4.093285786560985e-07, "loss": 2.8429, "step": 60197 }, { "epoch": 2.95, "grad_norm": 0.7361249327659607, "learning_rate": 4.085249742298624e-07, "loss": 2.9273, "step": 60198 }, { "epoch": 2.95, "grad_norm": 0.7934818863868713, "learning_rate": 4.077221588699053e-07, "loss": 2.8338, "step": 60199 }, { "epoch": 2.95, "grad_norm": 0.8189910650253296, "learning_rate": 4.0692013257832556e-07, "loss": 2.6777, "step": 60200 }, { "epoch": 2.95, "grad_norm": 0.777704119682312, "learning_rate": 4.0611889535728803e-07, "loss": 2.9845, "step": 60201 }, { "epoch": 2.95, "grad_norm": 0.7534101605415344, "learning_rate": 4.0531844720885774e-07, "loss": 3.0629, "step": 60202 }, { "epoch": 2.95, "grad_norm": 0.7779284119606018, "learning_rate": 4.0451878813516636e-07, "loss": 2.8526, "step": 60203 }, { "epoch": 2.95, "grad_norm": 0.72135990858078, "learning_rate": 4.0371991813831215e-07, "loss": 2.9209, "step": 60204 }, { "epoch": 2.95, "grad_norm": 0.7906358242034912, "learning_rate": 4.0292183722039353e-07, "loss": 3.1225, "step": 60205 }, { "epoch": 2.95, "grad_norm": 0.6974494457244873, "learning_rate": 4.02124545383542e-07, "loss": 2.844, "step": 60206 }, { "epoch": 2.95, "grad_norm": 0.7315931916236877, "learning_rate": 4.013280426297893e-07, "loss": 2.9354, "step": 60207 }, { "epoch": 2.95, "grad_norm": 0.759009838104248, "learning_rate": 4.005323289613338e-07, "loss": 2.8429, "step": 60208 }, { "epoch": 2.95, "grad_norm": 0.7365983128547668, "learning_rate": 3.9973740438017376e-07, "loss": 2.7836, "step": 60209 }, { "epoch": 2.95, "grad_norm": 0.7780154943466187, "learning_rate": 3.989432688884742e-07, "loss": 2.786, "step": 60210 }, { "epoch": 2.95, "grad_norm": 0.7027831673622131, "learning_rate": 3.981499224883e-07, "loss": 2.8762, "step": 60211 }, { "epoch": 2.95, "grad_norm": 0.7347671389579773, "learning_rate": 3.9735736518171634e-07, "loss": 2.6765, "step": 60212 }, { "epoch": 2.95, "grad_norm": 0.7240535616874695, "learning_rate": 3.965655969708548e-07, "loss": 2.9078, "step": 60213 }, { "epoch": 2.95, "grad_norm": 0.7486701011657715, "learning_rate": 3.957746178577803e-07, "loss": 2.8639, "step": 60214 }, { "epoch": 2.95, "grad_norm": 0.7389553785324097, "learning_rate": 3.94984427844558e-07, "loss": 2.7024, "step": 60215 }, { "epoch": 2.95, "grad_norm": 0.737718403339386, "learning_rate": 3.941950269333194e-07, "loss": 3.0266, "step": 60216 }, { "epoch": 2.95, "grad_norm": 0.7455750703811646, "learning_rate": 3.9340641512606295e-07, "loss": 2.8594, "step": 60217 }, { "epoch": 2.95, "grad_norm": 0.7492016553878784, "learning_rate": 3.9261859242495363e-07, "loss": 2.9803, "step": 60218 }, { "epoch": 2.95, "grad_norm": 0.7766376733779907, "learning_rate": 3.918315588319898e-07, "loss": 2.68, "step": 60219 }, { "epoch": 2.95, "grad_norm": 0.7816788554191589, "learning_rate": 3.910453143493364e-07, "loss": 2.9721, "step": 60220 }, { "epoch": 2.95, "grad_norm": 0.7525882720947266, "learning_rate": 3.902598589789585e-07, "loss": 2.6368, "step": 60221 }, { "epoch": 2.95, "grad_norm": 0.746664822101593, "learning_rate": 3.8947519272298777e-07, "loss": 2.7792, "step": 60222 }, { "epoch": 2.95, "grad_norm": 0.7470570802688599, "learning_rate": 3.886913155834892e-07, "loss": 3.0567, "step": 60223 }, { "epoch": 2.95, "grad_norm": 0.7868690490722656, "learning_rate": 3.8790822756249453e-07, "loss": 2.7597, "step": 60224 }, { "epoch": 2.95, "grad_norm": 0.8167101144790649, "learning_rate": 3.87125928662102e-07, "loss": 2.9416, "step": 60225 }, { "epoch": 2.95, "grad_norm": 0.7606449127197266, "learning_rate": 3.863444188843767e-07, "loss": 2.9537, "step": 60226 }, { "epoch": 2.95, "grad_norm": 0.7369158864021301, "learning_rate": 3.8556369823135036e-07, "loss": 2.8176, "step": 60227 }, { "epoch": 2.95, "grad_norm": 0.7974393963813782, "learning_rate": 3.847837667050546e-07, "loss": 2.9026, "step": 60228 }, { "epoch": 2.95, "grad_norm": 0.7653765082359314, "learning_rate": 3.8400462430762113e-07, "loss": 2.6229, "step": 60229 }, { "epoch": 2.95, "grad_norm": 0.7237482070922852, "learning_rate": 3.8322627104101497e-07, "loss": 2.9362, "step": 60230 }, { "epoch": 2.95, "grad_norm": 0.7616258263587952, "learning_rate": 3.8244870690736785e-07, "loss": 2.702, "step": 60231 }, { "epoch": 2.95, "grad_norm": 0.750545859336853, "learning_rate": 3.8167193190864477e-07, "loss": 2.8297, "step": 60232 }, { "epoch": 2.95, "grad_norm": 0.7552297711372375, "learning_rate": 3.808959460469774e-07, "loss": 2.7552, "step": 60233 }, { "epoch": 2.95, "grad_norm": 0.7343993782997131, "learning_rate": 3.801207493243308e-07, "loss": 2.9365, "step": 60234 }, { "epoch": 2.95, "grad_norm": 0.8061990737915039, "learning_rate": 3.793463417428033e-07, "loss": 2.784, "step": 60235 }, { "epoch": 2.95, "grad_norm": 0.7554232478141785, "learning_rate": 3.7857272330442667e-07, "loss": 2.8843, "step": 60236 }, { "epoch": 2.95, "grad_norm": 0.8099880814552307, "learning_rate": 3.7779989401119925e-07, "loss": 2.8103, "step": 60237 }, { "epoch": 2.95, "grad_norm": 0.7447828054428101, "learning_rate": 3.770278538652194e-07, "loss": 2.6748, "step": 60238 }, { "epoch": 2.95, "grad_norm": 0.7810490727424622, "learning_rate": 3.7625660286845215e-07, "loss": 2.7598, "step": 60239 }, { "epoch": 2.95, "grad_norm": 0.8725793361663818, "learning_rate": 3.754861410229626e-07, "loss": 2.9231, "step": 60240 }, { "epoch": 2.95, "grad_norm": 0.7612780332565308, "learning_rate": 3.747164683307824e-07, "loss": 2.8793, "step": 60241 }, { "epoch": 2.95, "grad_norm": 0.7492809295654297, "learning_rate": 3.7394758479394326e-07, "loss": 2.8316, "step": 60242 }, { "epoch": 2.95, "grad_norm": 0.7240146398544312, "learning_rate": 3.731794904144769e-07, "loss": 3.118, "step": 60243 }, { "epoch": 2.95, "grad_norm": 0.7847809791564941, "learning_rate": 3.7241218519434845e-07, "loss": 2.8566, "step": 60244 }, { "epoch": 2.95, "grad_norm": 0.7559931874275208, "learning_rate": 3.7164566913565617e-07, "loss": 2.8552, "step": 60245 }, { "epoch": 2.95, "grad_norm": 0.7063673734664917, "learning_rate": 3.7087994224036524e-07, "loss": 2.8538, "step": 60246 }, { "epoch": 2.95, "grad_norm": 0.7377515435218811, "learning_rate": 3.7011500451054055e-07, "loss": 2.9346, "step": 60247 }, { "epoch": 2.95, "grad_norm": 0.7750132083892822, "learning_rate": 3.693508559481473e-07, "loss": 2.8041, "step": 60248 }, { "epoch": 2.95, "grad_norm": 0.7176759243011475, "learning_rate": 3.6858749655521714e-07, "loss": 2.7881, "step": 60249 }, { "epoch": 2.95, "grad_norm": 0.834689199924469, "learning_rate": 3.678249263337818e-07, "loss": 2.8782, "step": 60250 }, { "epoch": 2.95, "grad_norm": 0.7550405263900757, "learning_rate": 3.6706314528580637e-07, "loss": 3.157, "step": 60251 }, { "epoch": 2.95, "grad_norm": 0.7085062861442566, "learning_rate": 3.663021534133226e-07, "loss": 2.9406, "step": 60252 }, { "epoch": 2.95, "grad_norm": 0.7482970952987671, "learning_rate": 3.655419507183288e-07, "loss": 3.069, "step": 60253 }, { "epoch": 2.95, "grad_norm": 0.7461667656898499, "learning_rate": 3.647825372028568e-07, "loss": 2.9194, "step": 60254 }, { "epoch": 2.95, "grad_norm": 0.7867854237556458, "learning_rate": 3.640239128688716e-07, "loss": 2.7486, "step": 60255 }, { "epoch": 2.95, "grad_norm": 0.7498422861099243, "learning_rate": 3.6326607771837156e-07, "loss": 2.6522, "step": 60256 }, { "epoch": 2.95, "grad_norm": 0.75998854637146, "learning_rate": 3.625090317533552e-07, "loss": 2.9697, "step": 60257 }, { "epoch": 2.95, "grad_norm": 0.7807403802871704, "learning_rate": 3.617527749758542e-07, "loss": 2.9362, "step": 60258 }, { "epoch": 2.95, "grad_norm": 0.773429811000824, "learning_rate": 3.609973073878003e-07, "loss": 2.9117, "step": 60259 }, { "epoch": 2.95, "grad_norm": 0.76775062084198, "learning_rate": 3.602426289912252e-07, "loss": 3.1239, "step": 60260 }, { "epoch": 2.95, "grad_norm": 0.7371132969856262, "learning_rate": 3.594887397880941e-07, "loss": 2.8399, "step": 60261 }, { "epoch": 2.95, "grad_norm": 0.747292697429657, "learning_rate": 3.5873563978040534e-07, "loss": 2.7784, "step": 60262 }, { "epoch": 2.95, "grad_norm": 0.8072085976600647, "learning_rate": 3.5798332897015727e-07, "loss": 2.8365, "step": 60263 }, { "epoch": 2.95, "grad_norm": 0.7212876677513123, "learning_rate": 3.57231807359315e-07, "loss": 2.7879, "step": 60264 }, { "epoch": 2.95, "grad_norm": 0.7406131029129028, "learning_rate": 3.564810749498437e-07, "loss": 2.7748, "step": 60265 }, { "epoch": 2.95, "grad_norm": 0.7893876433372498, "learning_rate": 3.5573113174374167e-07, "loss": 2.9208, "step": 60266 }, { "epoch": 2.95, "grad_norm": 0.7580415606498718, "learning_rate": 3.549819777429408e-07, "loss": 2.708, "step": 60267 }, { "epoch": 2.95, "grad_norm": 0.7362975478172302, "learning_rate": 3.5423361294950603e-07, "loss": 2.9144, "step": 60268 }, { "epoch": 2.95, "grad_norm": 0.7409021854400635, "learning_rate": 3.5348603736530254e-07, "loss": 2.9298, "step": 60269 }, { "epoch": 2.95, "grad_norm": 0.7252013683319092, "learning_rate": 3.527392509923621e-07, "loss": 3.0379, "step": 60270 }, { "epoch": 2.95, "grad_norm": 0.8025727272033691, "learning_rate": 3.5199325383264976e-07, "loss": 2.7628, "step": 60271 }, { "epoch": 2.95, "grad_norm": 0.775205135345459, "learning_rate": 3.512480458881306e-07, "loss": 2.9703, "step": 60272 }, { "epoch": 2.95, "grad_norm": 0.7527696490287781, "learning_rate": 3.505036271607364e-07, "loss": 2.7959, "step": 60273 }, { "epoch": 2.95, "grad_norm": 0.732417106628418, "learning_rate": 3.4975999765243233e-07, "loss": 2.8526, "step": 60274 }, { "epoch": 2.95, "grad_norm": 0.7460353374481201, "learning_rate": 3.490171573652167e-07, "loss": 2.827, "step": 60275 }, { "epoch": 2.95, "grad_norm": 0.785345733165741, "learning_rate": 3.482751063010214e-07, "loss": 2.8966, "step": 60276 }, { "epoch": 2.95, "grad_norm": 0.7670798301696777, "learning_rate": 3.4753384446177813e-07, "loss": 3.0136, "step": 60277 }, { "epoch": 2.95, "grad_norm": 0.7101062536239624, "learning_rate": 3.467933718494853e-07, "loss": 2.7194, "step": 60278 }, { "epoch": 2.95, "grad_norm": 0.7125613689422607, "learning_rate": 3.4605368846607473e-07, "loss": 2.9547, "step": 60279 }, { "epoch": 2.95, "grad_norm": 0.7298769950866699, "learning_rate": 3.453147943134782e-07, "loss": 2.9709, "step": 60280 }, { "epoch": 2.95, "grad_norm": 0.7311604619026184, "learning_rate": 3.4457668939362747e-07, "loss": 2.7934, "step": 60281 }, { "epoch": 2.95, "grad_norm": 0.7600909471511841, "learning_rate": 3.43839373708521e-07, "loss": 3.0138, "step": 60282 }, { "epoch": 2.95, "grad_norm": 0.9641423225402832, "learning_rate": 3.431028472600572e-07, "loss": 2.7382, "step": 60283 }, { "epoch": 2.95, "grad_norm": 0.7784249186515808, "learning_rate": 3.423671100502012e-07, "loss": 2.7052, "step": 60284 }, { "epoch": 2.95, "grad_norm": 0.7559043765068054, "learning_rate": 3.4163216208088485e-07, "loss": 2.7005, "step": 60285 }, { "epoch": 2.95, "grad_norm": 0.7473515272140503, "learning_rate": 3.408980033540398e-07, "loss": 2.9085, "step": 60286 }, { "epoch": 2.95, "grad_norm": 0.7658305168151855, "learning_rate": 3.401646338715647e-07, "loss": 2.7508, "step": 60287 }, { "epoch": 2.95, "grad_norm": 0.7382382154464722, "learning_rate": 3.394320536354578e-07, "loss": 2.912, "step": 60288 }, { "epoch": 2.95, "grad_norm": 0.762175440788269, "learning_rate": 3.387002626476176e-07, "loss": 3.04, "step": 60289 }, { "epoch": 2.95, "grad_norm": 0.7681100964546204, "learning_rate": 3.379692609099427e-07, "loss": 2.9685, "step": 60290 }, { "epoch": 2.95, "grad_norm": 0.7338895797729492, "learning_rate": 3.372390484244314e-07, "loss": 2.8639, "step": 60291 }, { "epoch": 2.95, "grad_norm": 0.7698550224304199, "learning_rate": 3.3650962519291557e-07, "loss": 2.8677, "step": 60292 }, { "epoch": 2.95, "grad_norm": 0.7156473398208618, "learning_rate": 3.3578099121739365e-07, "loss": 2.9148, "step": 60293 }, { "epoch": 2.95, "grad_norm": 0.7647261023521423, "learning_rate": 3.350531464997308e-07, "loss": 2.7391, "step": 60294 }, { "epoch": 2.95, "grad_norm": 0.7407509088516235, "learning_rate": 3.3432609104189214e-07, "loss": 2.776, "step": 60295 }, { "epoch": 2.95, "grad_norm": 0.713916540145874, "learning_rate": 3.335998248457428e-07, "loss": 2.6472, "step": 60296 }, { "epoch": 2.96, "grad_norm": 0.7673091888427734, "learning_rate": 3.328743479132145e-07, "loss": 2.8011, "step": 60297 }, { "epoch": 2.96, "grad_norm": 0.7312276363372803, "learning_rate": 3.3214966024623926e-07, "loss": 2.7984, "step": 60298 }, { "epoch": 2.96, "grad_norm": 0.7208895087242126, "learning_rate": 3.3142576184668204e-07, "loss": 2.8252, "step": 60299 }, { "epoch": 2.96, "grad_norm": 0.7697820663452148, "learning_rate": 3.30702652716508e-07, "loss": 2.7858, "step": 60300 }, { "epoch": 2.96, "grad_norm": 0.7742111682891846, "learning_rate": 3.299803328575823e-07, "loss": 2.994, "step": 60301 }, { "epoch": 2.96, "grad_norm": 0.7125436663627625, "learning_rate": 3.2925880227183677e-07, "loss": 2.9656, "step": 60302 }, { "epoch": 2.96, "grad_norm": 0.7392163872718811, "learning_rate": 3.2853806096113655e-07, "loss": 2.8482, "step": 60303 }, { "epoch": 2.96, "grad_norm": 0.77128005027771, "learning_rate": 3.2781810892738016e-07, "loss": 3.0908, "step": 60304 }, { "epoch": 2.96, "grad_norm": 0.7209950685501099, "learning_rate": 3.27098946172466e-07, "loss": 2.9793, "step": 60305 }, { "epoch": 2.96, "grad_norm": 0.7680009007453918, "learning_rate": 3.2638057269832594e-07, "loss": 2.9299, "step": 60306 }, { "epoch": 2.96, "grad_norm": 0.7300169467926025, "learning_rate": 3.2566298850682513e-07, "loss": 2.9231, "step": 60307 }, { "epoch": 2.96, "grad_norm": 0.7227309942245483, "learning_rate": 3.249461935998621e-07, "loss": 2.7252, "step": 60308 }, { "epoch": 2.96, "grad_norm": 0.7580020427703857, "learning_rate": 3.242301879793019e-07, "loss": 2.8591, "step": 60309 }, { "epoch": 2.96, "grad_norm": 0.7477824687957764, "learning_rate": 3.235149716470431e-07, "loss": 2.9945, "step": 60310 }, { "epoch": 2.96, "grad_norm": 0.7373795509338379, "learning_rate": 3.2280054460498414e-07, "loss": 2.882, "step": 60311 }, { "epoch": 2.96, "grad_norm": 0.766094982624054, "learning_rate": 3.22086906854957e-07, "loss": 2.8065, "step": 60312 }, { "epoch": 2.96, "grad_norm": 0.7640054225921631, "learning_rate": 3.213740583989266e-07, "loss": 2.9561, "step": 60313 }, { "epoch": 2.96, "grad_norm": 0.7449299097061157, "learning_rate": 3.206619992386916e-07, "loss": 3.0813, "step": 60314 }, { "epoch": 2.96, "grad_norm": 0.7240998148918152, "learning_rate": 3.1995072937618385e-07, "loss": 3.1171, "step": 60315 }, { "epoch": 2.96, "grad_norm": 0.756550133228302, "learning_rate": 3.1924024881323505e-07, "loss": 2.882, "step": 60316 }, { "epoch": 2.96, "grad_norm": 0.7245644330978394, "learning_rate": 3.1853055755174386e-07, "loss": 3.0473, "step": 60317 }, { "epoch": 2.96, "grad_norm": 0.7418863773345947, "learning_rate": 3.17821655593542e-07, "loss": 2.9368, "step": 60318 }, { "epoch": 2.96, "grad_norm": 0.7780527472496033, "learning_rate": 3.1711354294056137e-07, "loss": 2.8981, "step": 60319 }, { "epoch": 2.96, "grad_norm": 0.6818873286247253, "learning_rate": 3.1640621959460046e-07, "loss": 2.7333, "step": 60320 }, { "epoch": 2.96, "grad_norm": 0.7510486245155334, "learning_rate": 3.1569968555755777e-07, "loss": 2.8713, "step": 60321 }, { "epoch": 2.96, "grad_norm": 0.7911639213562012, "learning_rate": 3.1499394083129845e-07, "loss": 2.9481, "step": 60322 }, { "epoch": 2.96, "grad_norm": 0.779403030872345, "learning_rate": 3.1428898541765444e-07, "loss": 2.9089, "step": 60323 }, { "epoch": 2.96, "grad_norm": 0.7606043815612793, "learning_rate": 3.135848193184909e-07, "loss": 2.7561, "step": 60324 }, { "epoch": 2.96, "grad_norm": 0.7843817472457886, "learning_rate": 3.128814425357062e-07, "loss": 2.6959, "step": 60325 }, { "epoch": 2.96, "grad_norm": 0.7207468152046204, "learning_rate": 3.1217885507106576e-07, "loss": 2.6606, "step": 60326 }, { "epoch": 2.96, "grad_norm": 0.753822386264801, "learning_rate": 3.1147705692650126e-07, "loss": 2.8586, "step": 60327 }, { "epoch": 2.96, "grad_norm": 0.7353950142860413, "learning_rate": 3.107760481038113e-07, "loss": 2.9383, "step": 60328 }, { "epoch": 2.96, "grad_norm": 0.7305023074150085, "learning_rate": 3.1007582860489433e-07, "loss": 2.8052, "step": 60329 }, { "epoch": 2.96, "grad_norm": 0.716469943523407, "learning_rate": 3.0937639843154893e-07, "loss": 2.8077, "step": 60330 }, { "epoch": 2.96, "grad_norm": 0.78750079870224, "learning_rate": 3.0867775758560697e-07, "loss": 2.8025, "step": 60331 }, { "epoch": 2.96, "grad_norm": 0.7497711777687073, "learning_rate": 3.079799060689336e-07, "loss": 3.0581, "step": 60332 }, { "epoch": 2.96, "grad_norm": 0.7364689111709595, "learning_rate": 3.072828438833941e-07, "loss": 2.6442, "step": 60333 }, { "epoch": 2.96, "grad_norm": 0.8130603432655334, "learning_rate": 3.065865710307536e-07, "loss": 3.0189, "step": 60334 }, { "epoch": 2.96, "grad_norm": 0.7645628452301025, "learning_rate": 3.058910875129106e-07, "loss": 2.8934, "step": 60335 }, { "epoch": 2.96, "grad_norm": 0.759429395198822, "learning_rate": 3.051963933316637e-07, "loss": 2.8775, "step": 60336 }, { "epoch": 2.96, "grad_norm": 0.7373213171958923, "learning_rate": 3.04502488488878e-07, "loss": 2.9541, "step": 60337 }, { "epoch": 2.96, "grad_norm": 0.7441393136978149, "learning_rate": 3.038093729863522e-07, "loss": 2.9521, "step": 60338 }, { "epoch": 2.96, "grad_norm": 0.7805241346359253, "learning_rate": 3.0311704682588477e-07, "loss": 2.8231, "step": 60339 }, { "epoch": 2.96, "grad_norm": 0.7624837160110474, "learning_rate": 3.024255100093409e-07, "loss": 2.7851, "step": 60340 }, { "epoch": 2.96, "grad_norm": 0.800739049911499, "learning_rate": 3.0173476253851915e-07, "loss": 2.8968, "step": 60341 }, { "epoch": 2.96, "grad_norm": 0.7413939833641052, "learning_rate": 3.0104480441528465e-07, "loss": 2.7343, "step": 60342 }, { "epoch": 2.96, "grad_norm": 0.7429508566856384, "learning_rate": 3.0035563564140275e-07, "loss": 3.1652, "step": 60343 }, { "epoch": 2.96, "grad_norm": 0.7063744068145752, "learning_rate": 2.9966725621870527e-07, "loss": 2.7918, "step": 60344 }, { "epoch": 2.96, "grad_norm": 0.8287339806556702, "learning_rate": 2.989796661489907e-07, "loss": 2.8146, "step": 60345 }, { "epoch": 2.96, "grad_norm": 0.7384853959083557, "learning_rate": 2.98292865434091e-07, "loss": 3.0369, "step": 60346 }, { "epoch": 2.96, "grad_norm": 0.7586947679519653, "learning_rate": 2.976068540758381e-07, "loss": 3.0617, "step": 60347 }, { "epoch": 2.96, "grad_norm": 0.7649457454681396, "learning_rate": 2.9692163207596375e-07, "loss": 2.9689, "step": 60348 }, { "epoch": 2.96, "grad_norm": 0.8336549401283264, "learning_rate": 2.9623719943633326e-07, "loss": 3.0257, "step": 60349 }, { "epoch": 2.96, "grad_norm": 0.767746090888977, "learning_rate": 2.955535561587452e-07, "loss": 2.9512, "step": 60350 }, { "epoch": 2.96, "grad_norm": 0.737592339515686, "learning_rate": 2.948707022449981e-07, "loss": 3.0337, "step": 60351 }, { "epoch": 2.96, "grad_norm": 0.7662484049797058, "learning_rate": 2.941886376968905e-07, "loss": 3.0496, "step": 60352 }, { "epoch": 2.96, "grad_norm": 0.7627364993095398, "learning_rate": 2.9350736251618765e-07, "loss": 2.8868, "step": 60353 }, { "epoch": 2.96, "grad_norm": 0.7466115951538086, "learning_rate": 2.9282687670468817e-07, "loss": 3.1975, "step": 60354 }, { "epoch": 2.96, "grad_norm": 0.7700613141059875, "learning_rate": 2.921471802642572e-07, "loss": 2.9977, "step": 60355 }, { "epoch": 2.96, "grad_norm": 0.7370407581329346, "learning_rate": 2.9146827319659337e-07, "loss": 2.7743, "step": 60356 }, { "epoch": 2.96, "grad_norm": 0.7551794648170471, "learning_rate": 2.907901555035286e-07, "loss": 2.8775, "step": 60357 }, { "epoch": 2.96, "grad_norm": 0.7633122801780701, "learning_rate": 2.90112827186828e-07, "loss": 3.004, "step": 60358 }, { "epoch": 2.96, "grad_norm": 0.7845262289047241, "learning_rate": 2.8943628824832365e-07, "loss": 2.8964, "step": 60359 }, { "epoch": 2.96, "grad_norm": 0.7491711378097534, "learning_rate": 2.8876053868974734e-07, "loss": 2.9076, "step": 60360 }, { "epoch": 2.96, "grad_norm": 0.7596331238746643, "learning_rate": 2.880855785128644e-07, "loss": 2.9886, "step": 60361 }, { "epoch": 2.96, "grad_norm": 0.7640942931175232, "learning_rate": 2.8741140771953996e-07, "loss": 2.879, "step": 60362 }, { "epoch": 2.96, "grad_norm": 0.7326937317848206, "learning_rate": 2.867380263114394e-07, "loss": 3.1048, "step": 60363 }, { "epoch": 2.96, "grad_norm": 0.7499207854270935, "learning_rate": 2.860654342904278e-07, "loss": 2.9645, "step": 60364 }, { "epoch": 2.96, "grad_norm": 0.8236726522445679, "learning_rate": 2.853936316582039e-07, "loss": 2.878, "step": 60365 }, { "epoch": 2.96, "grad_norm": 0.7456938624382019, "learning_rate": 2.847226184165996e-07, "loss": 2.9094, "step": 60366 }, { "epoch": 2.96, "grad_norm": 0.7293276786804199, "learning_rate": 2.8405239456734675e-07, "loss": 2.7994, "step": 60367 }, { "epoch": 2.96, "grad_norm": 0.7146532535552979, "learning_rate": 2.833829601122439e-07, "loss": 2.8819, "step": 60368 }, { "epoch": 2.96, "grad_norm": 0.784432053565979, "learning_rate": 2.827143150529898e-07, "loss": 2.6586, "step": 60369 }, { "epoch": 2.96, "grad_norm": 0.7396759986877441, "learning_rate": 2.8204645939141623e-07, "loss": 2.786, "step": 60370 }, { "epoch": 2.96, "grad_norm": 0.7426009774208069, "learning_rate": 2.813793931292219e-07, "loss": 2.9405, "step": 60371 }, { "epoch": 2.96, "grad_norm": 0.7698811292648315, "learning_rate": 2.8071311626820526e-07, "loss": 2.6749, "step": 60372 }, { "epoch": 2.96, "grad_norm": 0.8511273860931396, "learning_rate": 2.8004762881009833e-07, "loss": 2.877, "step": 60373 }, { "epoch": 2.96, "grad_norm": 0.7467965483665466, "learning_rate": 2.793829307566664e-07, "loss": 2.8784, "step": 60374 }, { "epoch": 2.96, "grad_norm": 0.7436198592185974, "learning_rate": 2.7871902210967467e-07, "loss": 3.0372, "step": 60375 }, { "epoch": 2.96, "grad_norm": 0.7805924415588379, "learning_rate": 2.780559028708551e-07, "loss": 3.0216, "step": 60376 }, { "epoch": 2.96, "grad_norm": 0.7895100712776184, "learning_rate": 2.7739357304193963e-07, "loss": 2.9302, "step": 60377 }, { "epoch": 2.96, "grad_norm": 0.7379295229911804, "learning_rate": 2.7673203262469355e-07, "loss": 2.8006, "step": 60378 }, { "epoch": 2.96, "grad_norm": 0.7495266199111938, "learning_rate": 2.760712816208155e-07, "loss": 2.8561, "step": 60379 }, { "epoch": 2.96, "grad_norm": 0.7267303466796875, "learning_rate": 2.754113200321373e-07, "loss": 2.9655, "step": 60380 }, { "epoch": 2.96, "grad_norm": 0.743676483631134, "learning_rate": 2.7475214786032427e-07, "loss": 2.9309, "step": 60381 }, { "epoch": 2.96, "grad_norm": 0.7842069864273071, "learning_rate": 2.7409376510710847e-07, "loss": 2.9816, "step": 60382 }, { "epoch": 2.96, "grad_norm": 0.7070533037185669, "learning_rate": 2.7343617177428835e-07, "loss": 2.7144, "step": 60383 }, { "epoch": 2.96, "grad_norm": 0.7402557134628296, "learning_rate": 2.7277936786352925e-07, "loss": 3.0865, "step": 60384 }, { "epoch": 2.96, "grad_norm": 0.7583931684494019, "learning_rate": 2.7212335337656323e-07, "loss": 2.7086, "step": 60385 }, { "epoch": 2.96, "grad_norm": 0.7335100173950195, "learning_rate": 2.714681283151554e-07, "loss": 2.8412, "step": 60386 }, { "epoch": 2.96, "grad_norm": 0.7490924596786499, "learning_rate": 2.708136926810378e-07, "loss": 2.7074, "step": 60387 }, { "epoch": 2.96, "grad_norm": 0.7380550503730774, "learning_rate": 2.7016004647590905e-07, "loss": 2.6201, "step": 60388 }, { "epoch": 2.96, "grad_norm": 0.7886814475059509, "learning_rate": 2.695071897014678e-07, "loss": 3.0093, "step": 60389 }, { "epoch": 2.96, "grad_norm": 0.7457057237625122, "learning_rate": 2.688551223594793e-07, "loss": 2.8093, "step": 60390 }, { "epoch": 2.96, "grad_norm": 0.7019795179367065, "learning_rate": 2.682038444516421e-07, "loss": 2.7947, "step": 60391 }, { "epoch": 2.96, "grad_norm": 0.7453329563140869, "learning_rate": 2.675533559796883e-07, "loss": 2.8498, "step": 60392 }, { "epoch": 2.96, "grad_norm": 0.7653692364692688, "learning_rate": 2.6690365694528317e-07, "loss": 2.88, "step": 60393 }, { "epoch": 2.96, "grad_norm": 0.7203850746154785, "learning_rate": 2.662547473501919e-07, "loss": 2.7761, "step": 60394 }, { "epoch": 2.96, "grad_norm": 0.7470075488090515, "learning_rate": 2.656066271960799e-07, "loss": 2.8994, "step": 60395 }, { "epoch": 2.96, "grad_norm": 0.765425980091095, "learning_rate": 2.649592964846792e-07, "loss": 2.7976, "step": 60396 }, { "epoch": 2.96, "grad_norm": 0.7347993850708008, "learning_rate": 2.6431275521772156e-07, "loss": 2.7715, "step": 60397 }, { "epoch": 2.96, "grad_norm": 0.7307003736495972, "learning_rate": 2.6366700339683913e-07, "loss": 3.1203, "step": 60398 }, { "epoch": 2.96, "grad_norm": 0.7308559417724609, "learning_rate": 2.630220410237971e-07, "loss": 2.9197, "step": 60399 }, { "epoch": 2.96, "grad_norm": 0.7948404550552368, "learning_rate": 2.6237786810026085e-07, "loss": 2.9589, "step": 60400 }, { "epoch": 2.96, "grad_norm": 0.7453910112380981, "learning_rate": 2.61734484627929e-07, "loss": 2.9592, "step": 60401 }, { "epoch": 2.96, "grad_norm": 0.7211994528770447, "learning_rate": 2.610918906085002e-07, "loss": 3.1642, "step": 60402 }, { "epoch": 2.96, "grad_norm": 0.7621414065361023, "learning_rate": 2.604500860436731e-07, "loss": 2.7984, "step": 60403 }, { "epoch": 2.96, "grad_norm": 0.722687840461731, "learning_rate": 2.5980907093514635e-07, "loss": 3.0943, "step": 60404 }, { "epoch": 2.96, "grad_norm": 0.7349399924278259, "learning_rate": 2.5916884528458523e-07, "loss": 2.7707, "step": 60405 }, { "epoch": 2.96, "grad_norm": 0.7808841466903687, "learning_rate": 2.5852940909368846e-07, "loss": 2.7073, "step": 60406 }, { "epoch": 2.96, "grad_norm": 0.7772266268730164, "learning_rate": 2.578907623641546e-07, "loss": 2.9222, "step": 60407 }, { "epoch": 2.96, "grad_norm": 0.7723431587219238, "learning_rate": 2.572529050976491e-07, "loss": 2.8101, "step": 60408 }, { "epoch": 2.96, "grad_norm": 0.7670159935951233, "learning_rate": 2.5661583729583713e-07, "loss": 2.8798, "step": 60409 }, { "epoch": 2.96, "grad_norm": 0.780825138092041, "learning_rate": 2.5597955896041744e-07, "loss": 2.6376, "step": 60410 }, { "epoch": 2.96, "grad_norm": 0.7525875568389893, "learning_rate": 2.5534407009305536e-07, "loss": 2.8498, "step": 60411 }, { "epoch": 2.96, "grad_norm": 0.7023805975914001, "learning_rate": 2.547093706954162e-07, "loss": 2.9365, "step": 60412 }, { "epoch": 2.96, "grad_norm": 0.7754830121994019, "learning_rate": 2.540754607691986e-07, "loss": 2.9444, "step": 60413 }, { "epoch": 2.96, "grad_norm": 0.7659097909927368, "learning_rate": 2.534423403160679e-07, "loss": 2.9425, "step": 60414 }, { "epoch": 2.96, "grad_norm": 0.8276565074920654, "learning_rate": 2.5281000933768946e-07, "loss": 2.9218, "step": 60415 }, { "epoch": 2.96, "grad_norm": 0.778061032295227, "learning_rate": 2.521784678356953e-07, "loss": 3.0158, "step": 60416 }, { "epoch": 2.96, "grad_norm": 0.7523320317268372, "learning_rate": 2.515477158118173e-07, "loss": 2.7615, "step": 60417 }, { "epoch": 2.96, "grad_norm": 0.8072141408920288, "learning_rate": 2.50917753267621e-07, "loss": 2.6866, "step": 60418 }, { "epoch": 2.96, "grad_norm": 0.7441281676292419, "learning_rate": 2.502885802048715e-07, "loss": 2.6342, "step": 60419 }, { "epoch": 2.96, "grad_norm": 0.7458040714263916, "learning_rate": 2.4966019662513435e-07, "loss": 2.9695, "step": 60420 }, { "epoch": 2.96, "grad_norm": 0.7723721265792847, "learning_rate": 2.490326025301082e-07, "loss": 2.8109, "step": 60421 }, { "epoch": 2.96, "grad_norm": 0.7250966429710388, "learning_rate": 2.484057979214582e-07, "loss": 3.1312, "step": 60422 }, { "epoch": 2.96, "grad_norm": 0.7306153178215027, "learning_rate": 2.4777978280081657e-07, "loss": 2.9188, "step": 60423 }, { "epoch": 2.96, "grad_norm": 0.7456044554710388, "learning_rate": 2.4715455716984853e-07, "loss": 2.8441, "step": 60424 }, { "epoch": 2.96, "grad_norm": 0.7753781080245972, "learning_rate": 2.4653012103015293e-07, "loss": 2.8846, "step": 60425 }, { "epoch": 2.96, "grad_norm": 0.6862159371376038, "learning_rate": 2.4590647438342823e-07, "loss": 2.8749, "step": 60426 }, { "epoch": 2.96, "grad_norm": 0.7013908624649048, "learning_rate": 2.452836172313066e-07, "loss": 2.9398, "step": 60427 }, { "epoch": 2.96, "grad_norm": 0.7356927990913391, "learning_rate": 2.4466154957538676e-07, "loss": 2.9671, "step": 60428 }, { "epoch": 2.96, "grad_norm": 0.7475108504295349, "learning_rate": 2.4404027141736726e-07, "loss": 2.9092, "step": 60429 }, { "epoch": 2.96, "grad_norm": 0.7366794943809509, "learning_rate": 2.4341978275884686e-07, "loss": 2.8098, "step": 60430 }, { "epoch": 2.96, "grad_norm": 0.7213911414146423, "learning_rate": 2.428000836014576e-07, "loss": 3.0121, "step": 60431 }, { "epoch": 2.96, "grad_norm": 0.7313321232795715, "learning_rate": 2.4218117394686487e-07, "loss": 2.7566, "step": 60432 }, { "epoch": 2.96, "grad_norm": 0.7520000338554382, "learning_rate": 2.415630537966673e-07, "loss": 2.8234, "step": 60433 }, { "epoch": 2.96, "grad_norm": 0.7432150840759277, "learning_rate": 2.4094572315253025e-07, "loss": 3.0233, "step": 60434 }, { "epoch": 2.96, "grad_norm": 0.7823925018310547, "learning_rate": 2.403291820160191e-07, "loss": 2.9009, "step": 60435 }, { "epoch": 2.96, "grad_norm": 0.7720388770103455, "learning_rate": 2.397134303887993e-07, "loss": 2.8322, "step": 60436 }, { "epoch": 2.96, "grad_norm": 0.7479499578475952, "learning_rate": 2.390984682724695e-07, "loss": 2.8643, "step": 60437 }, { "epoch": 2.96, "grad_norm": 0.7411769032478333, "learning_rate": 2.384842956686617e-07, "loss": 2.8443, "step": 60438 }, { "epoch": 2.96, "grad_norm": 0.7253438830375671, "learning_rate": 2.3787091257904122e-07, "loss": 3.2558, "step": 60439 }, { "epoch": 2.96, "grad_norm": 0.7320099472999573, "learning_rate": 2.3725831900514026e-07, "loss": 2.7542, "step": 60440 }, { "epoch": 2.96, "grad_norm": 0.7726000547409058, "learning_rate": 2.3664651494862407e-07, "loss": 3.1502, "step": 60441 }, { "epoch": 2.96, "grad_norm": 0.7863835096359253, "learning_rate": 2.3603550041109142e-07, "loss": 2.9089, "step": 60442 }, { "epoch": 2.96, "grad_norm": 0.7607067823410034, "learning_rate": 2.3542527539414102e-07, "loss": 2.8335, "step": 60443 }, { "epoch": 2.96, "grad_norm": 0.7399455308914185, "learning_rate": 2.348158398994049e-07, "loss": 2.9403, "step": 60444 }, { "epoch": 2.96, "grad_norm": 0.7538642883300781, "learning_rate": 2.3420719392844844e-07, "loss": 2.9633, "step": 60445 }, { "epoch": 2.96, "grad_norm": 0.7989528775215149, "learning_rate": 2.335993374829037e-07, "loss": 2.8171, "step": 60446 }, { "epoch": 2.96, "grad_norm": 0.7250673770904541, "learning_rate": 2.3299227056436942e-07, "loss": 2.9535, "step": 60447 }, { "epoch": 2.96, "grad_norm": 0.7597386240959167, "learning_rate": 2.323859931744776e-07, "loss": 2.8269, "step": 60448 }, { "epoch": 2.96, "grad_norm": 0.7532151937484741, "learning_rate": 2.3178050531476034e-07, "loss": 2.8806, "step": 60449 }, { "epoch": 2.96, "grad_norm": 0.7555036544799805, "learning_rate": 2.3117580698681638e-07, "loss": 3.0434, "step": 60450 }, { "epoch": 2.96, "grad_norm": 0.7302855253219604, "learning_rate": 2.3057189819231103e-07, "loss": 2.9344, "step": 60451 }, { "epoch": 2.96, "grad_norm": 0.715503454208374, "learning_rate": 2.2996877893274313e-07, "loss": 2.769, "step": 60452 }, { "epoch": 2.96, "grad_norm": 0.770855188369751, "learning_rate": 2.2936644920977798e-07, "loss": 2.8592, "step": 60453 }, { "epoch": 2.96, "grad_norm": 0.7369658350944519, "learning_rate": 2.2876490902498102e-07, "loss": 2.8822, "step": 60454 }, { "epoch": 2.96, "grad_norm": 0.7520059943199158, "learning_rate": 2.2816415837988432e-07, "loss": 2.9528, "step": 60455 }, { "epoch": 2.96, "grad_norm": 0.7750919461250305, "learning_rate": 2.2756419727611997e-07, "loss": 2.8362, "step": 60456 }, { "epoch": 2.96, "grad_norm": 0.7011710405349731, "learning_rate": 2.2696502571528664e-07, "loss": 2.9554, "step": 60457 }, { "epoch": 2.96, "grad_norm": 0.7666123509407043, "learning_rate": 2.2636664369891643e-07, "loss": 2.6842, "step": 60458 }, { "epoch": 2.96, "grad_norm": 0.7649023532867432, "learning_rate": 2.2576905122860812e-07, "loss": 2.9592, "step": 60459 }, { "epoch": 2.96, "grad_norm": 0.7662659287452698, "learning_rate": 2.2517224830592707e-07, "loss": 3.0518, "step": 60460 }, { "epoch": 2.96, "grad_norm": 0.7530821561813354, "learning_rate": 2.245762349324387e-07, "loss": 2.9532, "step": 60461 }, { "epoch": 2.96, "grad_norm": 0.7605764865875244, "learning_rate": 2.2398101110974176e-07, "loss": 2.7051, "step": 60462 }, { "epoch": 2.96, "grad_norm": 0.760724663734436, "learning_rate": 2.2338657683936834e-07, "loss": 2.9107, "step": 60463 }, { "epoch": 2.96, "grad_norm": 0.744367241859436, "learning_rate": 2.2279293212291715e-07, "loss": 2.7573, "step": 60464 }, { "epoch": 2.96, "grad_norm": 0.7295060157775879, "learning_rate": 2.2220007696195364e-07, "loss": 2.9798, "step": 60465 }, { "epoch": 2.96, "grad_norm": 0.7500439286231995, "learning_rate": 2.2160801135797658e-07, "loss": 2.9385, "step": 60466 }, { "epoch": 2.96, "grad_norm": 0.7618116140365601, "learning_rate": 2.21016735312618e-07, "loss": 3.0254, "step": 60467 }, { "epoch": 2.96, "grad_norm": 0.7346891760826111, "learning_rate": 2.2042624882741e-07, "loss": 2.9205, "step": 60468 }, { "epoch": 2.96, "grad_norm": 0.7819466590881348, "learning_rate": 2.1983655190391804e-07, "loss": 2.892, "step": 60469 }, { "epoch": 2.96, "grad_norm": 0.7500108480453491, "learning_rate": 2.1924764454364084e-07, "loss": 2.7599, "step": 60470 }, { "epoch": 2.96, "grad_norm": 0.7795835137367249, "learning_rate": 2.1865952674821051e-07, "loss": 2.8849, "step": 60471 }, { "epoch": 2.96, "grad_norm": 0.7483556270599365, "learning_rate": 2.1807219851912582e-07, "loss": 2.7881, "step": 60472 }, { "epoch": 2.96, "grad_norm": 0.7419742345809937, "learning_rate": 2.1748565985791888e-07, "loss": 3.0436, "step": 60473 }, { "epoch": 2.96, "grad_norm": 0.7780374884605408, "learning_rate": 2.168999107661884e-07, "loss": 2.9787, "step": 60474 }, { "epoch": 2.96, "grad_norm": 0.7674023509025574, "learning_rate": 2.163149512454665e-07, "loss": 3.093, "step": 60475 }, { "epoch": 2.96, "grad_norm": 0.7375624775886536, "learning_rate": 2.1573078129725196e-07, "loss": 2.7765, "step": 60476 }, { "epoch": 2.96, "grad_norm": 0.7869064211845398, "learning_rate": 2.1514740092311023e-07, "loss": 2.8789, "step": 60477 }, { "epoch": 2.96, "grad_norm": 0.7125802040100098, "learning_rate": 2.1456481012454006e-07, "loss": 2.7467, "step": 60478 }, { "epoch": 2.96, "grad_norm": 0.7873156666755676, "learning_rate": 2.1398300890317354e-07, "loss": 2.6907, "step": 60479 }, { "epoch": 2.96, "grad_norm": 0.7791264653205872, "learning_rate": 2.1340199726044282e-07, "loss": 2.898, "step": 60480 }, { "epoch": 2.96, "grad_norm": 0.7683501839637756, "learning_rate": 2.1282177519791333e-07, "loss": 2.8212, "step": 60481 }, { "epoch": 2.96, "grad_norm": 0.7381917834281921, "learning_rate": 2.1224234271715045e-07, "loss": 2.8748, "step": 60482 }, { "epoch": 2.96, "grad_norm": 0.7684051394462585, "learning_rate": 2.1166369981961973e-07, "loss": 2.9751, "step": 60483 }, { "epoch": 2.96, "grad_norm": 0.7232508659362793, "learning_rate": 2.1108584650688652e-07, "loss": 2.8718, "step": 60484 }, { "epoch": 2.96, "grad_norm": 0.7385919094085693, "learning_rate": 2.1050878278041637e-07, "loss": 3.0691, "step": 60485 }, { "epoch": 2.96, "grad_norm": 0.7586545944213867, "learning_rate": 2.09932508641808e-07, "loss": 2.9754, "step": 60486 }, { "epoch": 2.96, "grad_norm": 0.7664059996604919, "learning_rate": 2.0935702409252686e-07, "loss": 2.9511, "step": 60487 }, { "epoch": 2.96, "grad_norm": 0.7628438472747803, "learning_rate": 2.0878232913413839e-07, "loss": 2.8217, "step": 60488 }, { "epoch": 2.96, "grad_norm": 0.765557587146759, "learning_rate": 2.082084237680748e-07, "loss": 2.8077, "step": 60489 }, { "epoch": 2.96, "grad_norm": 0.7436291575431824, "learning_rate": 2.0763530799593474e-07, "loss": 2.8652, "step": 60490 }, { "epoch": 2.96, "grad_norm": 0.7807187438011169, "learning_rate": 2.070629818191505e-07, "loss": 2.835, "step": 60491 }, { "epoch": 2.96, "grad_norm": 0.7364583611488342, "learning_rate": 2.064914452392874e-07, "loss": 2.8279, "step": 60492 }, { "epoch": 2.96, "grad_norm": 0.8078753352165222, "learning_rate": 2.05920698257811e-07, "loss": 2.6383, "step": 60493 }, { "epoch": 2.96, "grad_norm": 0.7190173268318176, "learning_rate": 2.0535074087625337e-07, "loss": 2.8113, "step": 60494 }, { "epoch": 2.96, "grad_norm": 0.7492725253105164, "learning_rate": 2.047815730961133e-07, "loss": 2.8811, "step": 60495 }, { "epoch": 2.96, "grad_norm": 0.7870933413505554, "learning_rate": 2.0421319491888966e-07, "loss": 3.0474, "step": 60496 }, { "epoch": 2.96, "grad_norm": 0.7799807786941528, "learning_rate": 2.0364560634604787e-07, "loss": 2.8235, "step": 60497 }, { "epoch": 2.96, "grad_norm": 0.7293304800987244, "learning_rate": 2.030788073791201e-07, "loss": 2.9151, "step": 60498 }, { "epoch": 2.96, "grad_norm": 0.7670882344245911, "learning_rate": 2.0251279801957177e-07, "loss": 2.7792, "step": 60499 }, { "epoch": 2.96, "grad_norm": 0.7522982358932495, "learning_rate": 2.0194757826893503e-07, "loss": 2.9497, "step": 60500 }, { "epoch": 2.97, "grad_norm": 0.811562716960907, "learning_rate": 2.013831481286421e-07, "loss": 3.0968, "step": 60501 }, { "epoch": 2.97, "grad_norm": 0.7589870095252991, "learning_rate": 2.0081950760022502e-07, "loss": 2.9288, "step": 60502 }, { "epoch": 2.97, "grad_norm": 0.7747796177864075, "learning_rate": 2.0025665668514935e-07, "loss": 2.8051, "step": 60503 }, { "epoch": 2.97, "grad_norm": 0.7064770460128784, "learning_rate": 1.9969459538488052e-07, "loss": 3.077, "step": 60504 }, { "epoch": 2.97, "grad_norm": 0.784367024898529, "learning_rate": 1.991333237009507e-07, "loss": 2.9097, "step": 60505 }, { "epoch": 2.97, "grad_norm": 0.7294931411743164, "learning_rate": 1.9857284163479203e-07, "loss": 2.8172, "step": 60506 }, { "epoch": 2.97, "grad_norm": 0.7817466259002686, "learning_rate": 1.9801314918787003e-07, "loss": 2.8352, "step": 60507 }, { "epoch": 2.97, "grad_norm": 0.790314257144928, "learning_rate": 1.974542463617168e-07, "loss": 2.8238, "step": 60508 }, { "epoch": 2.97, "grad_norm": 0.833737313747406, "learning_rate": 1.9689613315776454e-07, "loss": 2.7384, "step": 60509 }, { "epoch": 2.97, "grad_norm": 0.7663082480430603, "learning_rate": 1.963388095774787e-07, "loss": 2.7838, "step": 60510 }, { "epoch": 2.97, "grad_norm": 0.7980212569236755, "learning_rate": 1.9578227562235816e-07, "loss": 3.0168, "step": 60511 }, { "epoch": 2.97, "grad_norm": 0.7827617526054382, "learning_rate": 1.9522653129383504e-07, "loss": 2.9978, "step": 60512 }, { "epoch": 2.97, "grad_norm": 0.7780979871749878, "learning_rate": 1.9467157659337486e-07, "loss": 2.8574, "step": 60513 }, { "epoch": 2.97, "grad_norm": 0.716793954372406, "learning_rate": 1.941174115224764e-07, "loss": 2.8676, "step": 60514 }, { "epoch": 2.97, "grad_norm": 0.7576918601989746, "learning_rate": 1.9356403608257187e-07, "loss": 2.9088, "step": 60515 }, { "epoch": 2.97, "grad_norm": 0.725763201713562, "learning_rate": 1.9301145027509346e-07, "loss": 2.8202, "step": 60516 }, { "epoch": 2.97, "grad_norm": 0.7284558415412903, "learning_rate": 1.9245965410153996e-07, "loss": 2.9063, "step": 60517 }, { "epoch": 2.97, "grad_norm": 0.729720950126648, "learning_rate": 1.9190864756334355e-07, "loss": 2.6978, "step": 60518 }, { "epoch": 2.97, "grad_norm": 0.7677045464515686, "learning_rate": 1.9135843066196976e-07, "loss": 2.9292, "step": 60519 }, { "epoch": 2.97, "grad_norm": 0.7284916639328003, "learning_rate": 1.9080900339885075e-07, "loss": 3.099, "step": 60520 }, { "epoch": 2.97, "grad_norm": 0.7096062898635864, "learning_rate": 1.90260365775452e-07, "loss": 2.7886, "step": 60521 }, { "epoch": 2.97, "grad_norm": 0.7152775526046753, "learning_rate": 1.8971251779320574e-07, "loss": 2.8109, "step": 60522 }, { "epoch": 2.97, "grad_norm": 0.7443001866340637, "learning_rate": 1.8916545945354412e-07, "loss": 2.9791, "step": 60523 }, { "epoch": 2.97, "grad_norm": 0.7410553693771362, "learning_rate": 1.8861919075793263e-07, "loss": 2.8849, "step": 60524 }, { "epoch": 2.97, "grad_norm": 0.6965992450714111, "learning_rate": 1.880737117078035e-07, "loss": 2.9143, "step": 60525 }, { "epoch": 2.97, "grad_norm": 0.7935270667076111, "learning_rate": 1.8752902230458888e-07, "loss": 2.9024, "step": 60526 }, { "epoch": 2.97, "grad_norm": 0.725361168384552, "learning_rate": 1.8698512254968766e-07, "loss": 2.9605, "step": 60527 }, { "epoch": 2.97, "grad_norm": 0.745184063911438, "learning_rate": 1.8644201244459866e-07, "loss": 2.8531, "step": 60528 }, { "epoch": 2.97, "grad_norm": 0.7021890878677368, "learning_rate": 1.8589969199072074e-07, "loss": 3.0086, "step": 60529 }, { "epoch": 2.97, "grad_norm": 0.7811173796653748, "learning_rate": 1.853581611894861e-07, "loss": 2.9315, "step": 60530 }, { "epoch": 2.97, "grad_norm": 0.7329429984092712, "learning_rate": 1.848174200422936e-07, "loss": 2.9401, "step": 60531 }, { "epoch": 2.97, "grad_norm": 0.7532490491867065, "learning_rate": 1.8427746855060877e-07, "loss": 2.6845, "step": 60532 }, { "epoch": 2.97, "grad_norm": 0.7217630743980408, "learning_rate": 1.8373830671583045e-07, "loss": 2.6904, "step": 60533 }, { "epoch": 2.97, "grad_norm": 0.772171676158905, "learning_rate": 1.8319993453939087e-07, "loss": 2.8626, "step": 60534 }, { "epoch": 2.97, "grad_norm": 0.6981392502784729, "learning_rate": 1.8266235202272217e-07, "loss": 3.0572, "step": 60535 }, { "epoch": 2.97, "grad_norm": 0.7536487579345703, "learning_rate": 1.8212555916718997e-07, "loss": 3.0188, "step": 60536 }, { "epoch": 2.97, "grad_norm": 0.7514350414276123, "learning_rate": 1.815895559742264e-07, "loss": 2.8476, "step": 60537 }, { "epoch": 2.97, "grad_norm": 0.7271486520767212, "learning_rate": 1.8105434244529705e-07, "loss": 2.8713, "step": 60538 }, { "epoch": 2.97, "grad_norm": 0.8349723815917969, "learning_rate": 1.805199185817341e-07, "loss": 2.9083, "step": 60539 }, { "epoch": 2.97, "grad_norm": 0.7469309568405151, "learning_rate": 1.7998628438500306e-07, "loss": 2.9315, "step": 60540 }, { "epoch": 2.97, "grad_norm": 0.7527533769607544, "learning_rate": 1.7945343985646955e-07, "loss": 3.0218, "step": 60541 }, { "epoch": 2.97, "grad_norm": 0.713559091091156, "learning_rate": 1.789213849975657e-07, "loss": 2.7901, "step": 60542 }, { "epoch": 2.97, "grad_norm": 0.7423522472381592, "learning_rate": 1.7839011980965712e-07, "loss": 2.95, "step": 60543 }, { "epoch": 2.97, "grad_norm": 0.7502104640007019, "learning_rate": 1.7785964429420928e-07, "loss": 2.8922, "step": 60544 }, { "epoch": 2.97, "grad_norm": 0.7708683609962463, "learning_rate": 1.7732995845255448e-07, "loss": 2.8545, "step": 60545 }, { "epoch": 2.97, "grad_norm": 0.7669790983200073, "learning_rate": 1.7680106228609158e-07, "loss": 3.0175, "step": 60546 }, { "epoch": 2.97, "grad_norm": 0.7533451318740845, "learning_rate": 1.7627295579625277e-07, "loss": 2.8942, "step": 60547 }, { "epoch": 2.97, "grad_norm": 0.7395743131637573, "learning_rate": 1.7574563898440364e-07, "loss": 2.8993, "step": 60548 }, { "epoch": 2.97, "grad_norm": 0.777221143245697, "learning_rate": 1.7521911185194305e-07, "loss": 2.8378, "step": 60549 }, { "epoch": 2.97, "grad_norm": 0.7704327702522278, "learning_rate": 1.7469337440026986e-07, "loss": 2.7663, "step": 60550 }, { "epoch": 2.97, "grad_norm": 0.7657343149185181, "learning_rate": 1.741684266307164e-07, "loss": 2.8244, "step": 60551 }, { "epoch": 2.97, "grad_norm": 0.7424664497375488, "learning_rate": 1.736442685447148e-07, "loss": 2.9918, "step": 60552 }, { "epoch": 2.97, "grad_norm": 0.7621636390686035, "learning_rate": 1.7312090014363068e-07, "loss": 2.7005, "step": 60553 }, { "epoch": 2.97, "grad_norm": 0.8045515418052673, "learning_rate": 1.7259832142886286e-07, "loss": 2.9842, "step": 60554 }, { "epoch": 2.97, "grad_norm": 0.7256165742874146, "learning_rate": 1.7207653240174369e-07, "loss": 2.92, "step": 60555 }, { "epoch": 2.97, "grad_norm": 0.7653000354766846, "learning_rate": 1.7155553306367197e-07, "loss": 2.6685, "step": 60556 }, { "epoch": 2.97, "grad_norm": 0.7511091828346252, "learning_rate": 1.7103532341604665e-07, "loss": 2.8257, "step": 60557 }, { "epoch": 2.97, "grad_norm": 0.7537457346916199, "learning_rate": 1.7051590346019994e-07, "loss": 2.954, "step": 60558 }, { "epoch": 2.97, "grad_norm": 0.7920700311660767, "learning_rate": 1.6999727319749745e-07, "loss": 2.7778, "step": 60559 }, { "epoch": 2.97, "grad_norm": 0.7353650331497192, "learning_rate": 1.6947943262930475e-07, "loss": 2.7668, "step": 60560 }, { "epoch": 2.97, "grad_norm": 0.68696129322052, "learning_rate": 1.689623817570207e-07, "loss": 2.8671, "step": 60561 }, { "epoch": 2.97, "grad_norm": 0.7720268368721008, "learning_rate": 1.684461205819776e-07, "loss": 2.7461, "step": 60562 }, { "epoch": 2.97, "grad_norm": 0.787878692150116, "learning_rate": 1.679306491055743e-07, "loss": 2.6584, "step": 60563 }, { "epoch": 2.97, "grad_norm": 0.7417284846305847, "learning_rate": 1.674159673291098e-07, "loss": 2.8816, "step": 60564 }, { "epoch": 2.97, "grad_norm": 0.7512297034263611, "learning_rate": 1.669020752539829e-07, "loss": 2.8187, "step": 60565 }, { "epoch": 2.97, "grad_norm": 0.7542157769203186, "learning_rate": 1.6638897288152598e-07, "loss": 2.9312, "step": 60566 }, { "epoch": 2.97, "grad_norm": 0.7483489513397217, "learning_rate": 1.6587666021307123e-07, "loss": 2.934, "step": 60567 }, { "epoch": 2.97, "grad_norm": 0.7624621987342834, "learning_rate": 1.6536513725001754e-07, "loss": 2.9115, "step": 60568 }, { "epoch": 2.97, "grad_norm": 0.7325168251991272, "learning_rate": 1.648544039936972e-07, "loss": 2.9444, "step": 60569 }, { "epoch": 2.97, "grad_norm": 0.693103015422821, "learning_rate": 1.6434446044540916e-07, "loss": 2.6785, "step": 60570 }, { "epoch": 2.97, "grad_norm": 0.7448264360427856, "learning_rate": 1.638353066065856e-07, "loss": 2.8162, "step": 60571 }, { "epoch": 2.97, "grad_norm": 0.7847685217857361, "learning_rate": 1.633269424784589e-07, "loss": 2.9311, "step": 60572 }, { "epoch": 2.97, "grad_norm": 0.7694564461708069, "learning_rate": 1.628193680624612e-07, "loss": 2.8487, "step": 60573 }, { "epoch": 2.97, "grad_norm": 0.7881042957305908, "learning_rate": 1.623125833598915e-07, "loss": 2.7117, "step": 60574 }, { "epoch": 2.97, "grad_norm": 0.7773247957229614, "learning_rate": 1.6180658837208204e-07, "loss": 3.0141, "step": 60575 }, { "epoch": 2.97, "grad_norm": 0.7404429912567139, "learning_rate": 1.613013831003651e-07, "loss": 2.8635, "step": 60576 }, { "epoch": 2.97, "grad_norm": 0.7321801781654358, "learning_rate": 1.6079696754607298e-07, "loss": 2.7727, "step": 60577 }, { "epoch": 2.97, "grad_norm": 0.7246294617652893, "learning_rate": 1.6029334171053786e-07, "loss": 3.004, "step": 60578 }, { "epoch": 2.97, "grad_norm": 0.7736529111862183, "learning_rate": 1.597905055950921e-07, "loss": 2.9635, "step": 60579 }, { "epoch": 2.97, "grad_norm": 0.746509850025177, "learning_rate": 1.592884592010346e-07, "loss": 2.8717, "step": 60580 }, { "epoch": 2.97, "grad_norm": 0.7547891736030579, "learning_rate": 1.587872025296977e-07, "loss": 3.016, "step": 60581 }, { "epoch": 2.97, "grad_norm": 0.7664916515350342, "learning_rate": 1.5828673558244687e-07, "loss": 2.9747, "step": 60582 }, { "epoch": 2.97, "grad_norm": 0.7931352853775024, "learning_rate": 1.5778705836054784e-07, "loss": 2.8982, "step": 60583 }, { "epoch": 2.97, "grad_norm": 0.7977129817008972, "learning_rate": 1.5728817086533286e-07, "loss": 3.0114, "step": 60584 }, { "epoch": 2.97, "grad_norm": 0.7142460346221924, "learning_rate": 1.5679007309810088e-07, "loss": 2.9114, "step": 60585 }, { "epoch": 2.97, "grad_norm": 0.7245585918426514, "learning_rate": 1.5629276506018418e-07, "loss": 2.7651, "step": 60586 }, { "epoch": 2.97, "grad_norm": 0.742992639541626, "learning_rate": 1.557962467528817e-07, "loss": 2.851, "step": 60587 }, { "epoch": 2.97, "grad_norm": 0.7510650753974915, "learning_rate": 1.5530051817752576e-07, "loss": 2.9322, "step": 60588 }, { "epoch": 2.97, "grad_norm": 0.7841531038284302, "learning_rate": 1.5480557933538195e-07, "loss": 3.0033, "step": 60589 }, { "epoch": 2.97, "grad_norm": 0.7130355834960938, "learning_rate": 1.5431143022778257e-07, "loss": 2.8831, "step": 60590 }, { "epoch": 2.97, "grad_norm": 0.7567395567893982, "learning_rate": 1.538180708560266e-07, "loss": 3.1319, "step": 60591 }, { "epoch": 2.97, "grad_norm": 0.7231013178825378, "learning_rate": 1.5332550122141295e-07, "loss": 2.9103, "step": 60592 }, { "epoch": 2.97, "grad_norm": 0.7480908036231995, "learning_rate": 1.528337213252073e-07, "loss": 2.7543, "step": 60593 }, { "epoch": 2.97, "grad_norm": 0.7661453485488892, "learning_rate": 1.5234273116874197e-07, "loss": 2.8553, "step": 60594 }, { "epoch": 2.97, "grad_norm": 0.7607757449150085, "learning_rate": 1.5185253075331584e-07, "loss": 3.1001, "step": 60595 }, { "epoch": 2.97, "grad_norm": 0.7835237383842468, "learning_rate": 1.5136312008019458e-07, "loss": 2.6377, "step": 60596 }, { "epoch": 2.97, "grad_norm": 0.7366989254951477, "learning_rate": 1.5087449915064387e-07, "loss": 2.9224, "step": 60597 }, { "epoch": 2.97, "grad_norm": 0.7715787887573242, "learning_rate": 1.5038666796602926e-07, "loss": 2.8592, "step": 60598 }, { "epoch": 2.97, "grad_norm": 0.7971885800361633, "learning_rate": 1.4989962652754983e-07, "loss": 2.7836, "step": 60599 }, { "epoch": 2.97, "grad_norm": 0.770579993724823, "learning_rate": 1.4941337483657113e-07, "loss": 2.8284, "step": 60600 }, { "epoch": 2.97, "grad_norm": 0.7559013962745667, "learning_rate": 1.489279128942922e-07, "loss": 2.7782, "step": 60601 }, { "epoch": 2.97, "grad_norm": 0.737792432308197, "learning_rate": 1.4844324070204528e-07, "loss": 2.8605, "step": 60602 }, { "epoch": 2.97, "grad_norm": 0.726133406162262, "learning_rate": 1.4795935826109606e-07, "loss": 3.0003, "step": 60603 }, { "epoch": 2.97, "grad_norm": 0.7365215420722961, "learning_rate": 1.4747626557271018e-07, "loss": 2.8262, "step": 60604 }, { "epoch": 2.97, "grad_norm": 0.7618759274482727, "learning_rate": 1.4699396263815332e-07, "loss": 2.8041, "step": 60605 }, { "epoch": 2.97, "grad_norm": 0.7196887731552124, "learning_rate": 1.4651244945869113e-07, "loss": 2.7637, "step": 60606 }, { "epoch": 2.97, "grad_norm": 0.7778249382972717, "learning_rate": 1.4603172603565583e-07, "loss": 2.9692, "step": 60607 }, { "epoch": 2.97, "grad_norm": 0.8094426989555359, "learning_rate": 1.455517923702465e-07, "loss": 2.815, "step": 60608 }, { "epoch": 2.97, "grad_norm": 0.706639289855957, "learning_rate": 1.450726484637288e-07, "loss": 2.6698, "step": 60609 }, { "epoch": 2.97, "grad_norm": 0.6968713998794556, "learning_rate": 1.4459429431736835e-07, "loss": 3.1378, "step": 60610 }, { "epoch": 2.97, "grad_norm": 0.7427403926849365, "learning_rate": 1.4411672993246415e-07, "loss": 2.8479, "step": 60611 }, { "epoch": 2.97, "grad_norm": 0.7301240563392639, "learning_rate": 1.4363995531024853e-07, "loss": 2.8367, "step": 60612 }, { "epoch": 2.97, "grad_norm": 0.7708475589752197, "learning_rate": 1.4316397045198712e-07, "loss": 2.9659, "step": 60613 }, { "epoch": 2.97, "grad_norm": 0.7885755300521851, "learning_rate": 1.426887753589123e-07, "loss": 2.8787, "step": 60614 }, { "epoch": 2.97, "grad_norm": 0.73357093334198, "learning_rate": 1.4221437003228974e-07, "loss": 2.7029, "step": 60615 }, { "epoch": 2.97, "grad_norm": 0.7780753374099731, "learning_rate": 1.4174075447335177e-07, "loss": 2.9124, "step": 60616 }, { "epoch": 2.97, "grad_norm": 0.7349255084991455, "learning_rate": 1.41267928683364e-07, "loss": 2.872, "step": 60617 }, { "epoch": 2.97, "grad_norm": 0.7401142120361328, "learning_rate": 1.4079589266355885e-07, "loss": 2.8839, "step": 60618 }, { "epoch": 2.97, "grad_norm": 0.7998162508010864, "learning_rate": 1.403246464152019e-07, "loss": 2.874, "step": 60619 }, { "epoch": 2.97, "grad_norm": 0.7657843232154846, "learning_rate": 1.3985418993952558e-07, "loss": 2.8038, "step": 60620 }, { "epoch": 2.97, "grad_norm": 0.7320613861083984, "learning_rate": 1.3938452323776217e-07, "loss": 2.9106, "step": 60621 }, { "epoch": 2.97, "grad_norm": 0.7517797350883484, "learning_rate": 1.3891564631114404e-07, "loss": 3.0539, "step": 60622 }, { "epoch": 2.97, "grad_norm": 0.7234874963760376, "learning_rate": 1.3844755916090355e-07, "loss": 2.8527, "step": 60623 }, { "epoch": 2.97, "grad_norm": 0.7384620904922485, "learning_rate": 1.3798026178830634e-07, "loss": 2.853, "step": 60624 }, { "epoch": 2.97, "grad_norm": 0.7372459173202515, "learning_rate": 1.3751375419455145e-07, "loss": 2.9442, "step": 60625 }, { "epoch": 2.97, "grad_norm": 0.7490679025650024, "learning_rate": 1.3704803638087125e-07, "loss": 2.808, "step": 60626 }, { "epoch": 2.97, "grad_norm": 0.7829398512840271, "learning_rate": 1.3658310834849807e-07, "loss": 2.9018, "step": 60627 }, { "epoch": 2.97, "grad_norm": 0.7897228002548218, "learning_rate": 1.3611897009866425e-07, "loss": 2.5938, "step": 60628 }, { "epoch": 2.97, "grad_norm": 0.7101447582244873, "learning_rate": 1.3565562163256882e-07, "loss": 2.9465, "step": 60629 }, { "epoch": 2.97, "grad_norm": 0.7695879340171814, "learning_rate": 1.3519306295144416e-07, "loss": 2.8979, "step": 60630 }, { "epoch": 2.97, "grad_norm": 0.752558708190918, "learning_rate": 1.347312940565226e-07, "loss": 3.0605, "step": 60631 }, { "epoch": 2.97, "grad_norm": 0.7636677026748657, "learning_rate": 1.342703149490032e-07, "loss": 2.7709, "step": 60632 }, { "epoch": 2.97, "grad_norm": 0.7717976570129395, "learning_rate": 1.3381012563008497e-07, "loss": 2.8797, "step": 60633 }, { "epoch": 2.97, "grad_norm": 0.7932553291320801, "learning_rate": 1.3335072610103359e-07, "loss": 2.944, "step": 60634 }, { "epoch": 2.97, "grad_norm": 0.7431058883666992, "learning_rate": 1.3289211636301478e-07, "loss": 2.8666, "step": 60635 }, { "epoch": 2.97, "grad_norm": 0.7574412226676941, "learning_rate": 1.324342964172276e-07, "loss": 3.0864, "step": 60636 }, { "epoch": 2.97, "grad_norm": 0.7627542018890381, "learning_rate": 1.3197726626493764e-07, "loss": 2.9098, "step": 60637 }, { "epoch": 2.97, "grad_norm": 0.7745662331581116, "learning_rate": 1.315210259072774e-07, "loss": 2.9873, "step": 60638 }, { "epoch": 2.97, "grad_norm": 0.7328659296035767, "learning_rate": 1.3106557534547923e-07, "loss": 2.9549, "step": 60639 }, { "epoch": 2.97, "grad_norm": 0.778195321559906, "learning_rate": 1.3061091458077543e-07, "loss": 2.9793, "step": 60640 }, { "epoch": 2.97, "grad_norm": 0.7515281438827515, "learning_rate": 1.3015704361429846e-07, "loss": 2.9819, "step": 60641 }, { "epoch": 2.97, "grad_norm": 0.7318151593208313, "learning_rate": 1.2970396244728065e-07, "loss": 2.7252, "step": 60642 }, { "epoch": 2.97, "grad_norm": 0.7786916494369507, "learning_rate": 1.2925167108092105e-07, "loss": 2.9772, "step": 60643 }, { "epoch": 2.97, "grad_norm": 0.7459295392036438, "learning_rate": 1.288001695164187e-07, "loss": 2.9008, "step": 60644 }, { "epoch": 2.97, "grad_norm": 0.7117395401000977, "learning_rate": 1.2834945775490601e-07, "loss": 3.1014, "step": 60645 }, { "epoch": 2.97, "grad_norm": 0.7417547702789307, "learning_rate": 1.2789953579764866e-07, "loss": 3.09, "step": 60646 }, { "epoch": 2.97, "grad_norm": 0.8106539249420166, "learning_rate": 1.2745040364574576e-07, "loss": 3.0429, "step": 60647 }, { "epoch": 2.97, "grad_norm": 0.7398137450218201, "learning_rate": 1.2700206130046297e-07, "loss": 3.0541, "step": 60648 }, { "epoch": 2.97, "grad_norm": 0.7454041242599487, "learning_rate": 1.265545087629327e-07, "loss": 2.9233, "step": 60649 }, { "epoch": 2.97, "grad_norm": 0.7723972201347351, "learning_rate": 1.2610774603435403e-07, "loss": 3.01, "step": 60650 }, { "epoch": 2.97, "grad_norm": 0.7595714926719666, "learning_rate": 1.2566177311589264e-07, "loss": 3.0519, "step": 60651 }, { "epoch": 2.97, "grad_norm": 0.7491887807846069, "learning_rate": 1.2521659000871433e-07, "loss": 2.7545, "step": 60652 }, { "epoch": 2.97, "grad_norm": 0.7535812258720398, "learning_rate": 1.2477219671401807e-07, "loss": 2.9838, "step": 60653 }, { "epoch": 2.97, "grad_norm": 0.7194299101829529, "learning_rate": 1.2432859323296962e-07, "loss": 2.8807, "step": 60654 }, { "epoch": 2.97, "grad_norm": 0.7006655335426331, "learning_rate": 1.2388577956670143e-07, "loss": 2.7205, "step": 60655 }, { "epoch": 2.97, "grad_norm": 0.7317060232162476, "learning_rate": 1.2344375571641251e-07, "loss": 2.8463, "step": 60656 }, { "epoch": 2.97, "grad_norm": 0.761465311050415, "learning_rate": 1.2300252168326862e-07, "loss": 2.9015, "step": 60657 }, { "epoch": 2.97, "grad_norm": 0.7406482696533203, "learning_rate": 1.225620774684355e-07, "loss": 2.8137, "step": 60658 }, { "epoch": 2.97, "grad_norm": 0.7317981123924255, "learning_rate": 1.2212242307304554e-07, "loss": 2.7945, "step": 60659 }, { "epoch": 2.97, "grad_norm": 0.7996775507926941, "learning_rate": 1.216835584982645e-07, "loss": 2.9023, "step": 60660 }, { "epoch": 2.97, "grad_norm": 0.7539355158805847, "learning_rate": 1.212454837452581e-07, "loss": 3.0568, "step": 60661 }, { "epoch": 2.97, "grad_norm": 0.7302529215812683, "learning_rate": 1.2080819881519211e-07, "loss": 2.985, "step": 60662 }, { "epoch": 2.97, "grad_norm": 0.6857694387435913, "learning_rate": 1.203717037091989e-07, "loss": 2.8932, "step": 60663 }, { "epoch": 2.97, "grad_norm": 0.7334001660346985, "learning_rate": 1.1993599842844426e-07, "loss": 2.8093, "step": 60664 }, { "epoch": 2.97, "grad_norm": 0.7142249345779419, "learning_rate": 1.1950108297406058e-07, "loss": 2.8508, "step": 60665 }, { "epoch": 2.97, "grad_norm": 0.7907318472862244, "learning_rate": 1.1906695734718031e-07, "loss": 2.9828, "step": 60666 }, { "epoch": 2.97, "grad_norm": 0.8050836324691772, "learning_rate": 1.1863362154896916e-07, "loss": 2.9504, "step": 60667 }, { "epoch": 2.97, "grad_norm": 0.7420530915260315, "learning_rate": 1.1820107558055958e-07, "loss": 2.9651, "step": 60668 }, { "epoch": 2.97, "grad_norm": 0.7614012360572815, "learning_rate": 1.177693194431173e-07, "loss": 2.9209, "step": 60669 }, { "epoch": 2.97, "grad_norm": 0.7007195949554443, "learning_rate": 1.1733835313774143e-07, "loss": 2.9823, "step": 60670 }, { "epoch": 2.97, "grad_norm": 0.7247536778450012, "learning_rate": 1.169081766655977e-07, "loss": 2.8296, "step": 60671 }, { "epoch": 2.97, "grad_norm": 0.7323319315910339, "learning_rate": 1.1647879002778526e-07, "loss": 2.6532, "step": 60672 }, { "epoch": 2.97, "grad_norm": 0.7220818996429443, "learning_rate": 1.1605019322546982e-07, "loss": 2.8848, "step": 60673 }, { "epoch": 2.97, "grad_norm": 0.7621148824691772, "learning_rate": 1.1562238625975051e-07, "loss": 2.9724, "step": 60674 }, { "epoch": 2.97, "grad_norm": 0.7443565130233765, "learning_rate": 1.1519536913175976e-07, "loss": 3.0197, "step": 60675 }, { "epoch": 2.97, "grad_norm": 0.7332092523574829, "learning_rate": 1.1476914184262997e-07, "loss": 2.8808, "step": 60676 }, { "epoch": 2.97, "grad_norm": 0.7590422630310059, "learning_rate": 1.1434370439352691e-07, "loss": 2.9004, "step": 60677 }, { "epoch": 2.97, "grad_norm": 0.7833712697029114, "learning_rate": 1.1391905678548308e-07, "loss": 2.8703, "step": 60678 }, { "epoch": 2.97, "grad_norm": 0.7459626197814941, "learning_rate": 1.134951990196975e-07, "loss": 2.9135, "step": 60679 }, { "epoch": 2.97, "grad_norm": 0.7762752175331116, "learning_rate": 1.130721310972027e-07, "loss": 2.8371, "step": 60680 }, { "epoch": 2.97, "grad_norm": 0.7452636361122131, "learning_rate": 1.1264985301919771e-07, "loss": 2.8966, "step": 60681 }, { "epoch": 2.97, "grad_norm": 0.766267716884613, "learning_rate": 1.1222836478674834e-07, "loss": 2.8336, "step": 60682 }, { "epoch": 2.97, "grad_norm": 0.7369273900985718, "learning_rate": 1.1180766640098704e-07, "loss": 2.8865, "step": 60683 }, { "epoch": 2.97, "grad_norm": 0.7170395255088806, "learning_rate": 1.1138775786301291e-07, "loss": 2.7637, "step": 60684 }, { "epoch": 2.97, "grad_norm": 0.7494436502456665, "learning_rate": 1.1096863917389175e-07, "loss": 3.0208, "step": 60685 }, { "epoch": 2.97, "grad_norm": 0.7833617329597473, "learning_rate": 1.1055031033478934e-07, "loss": 2.8597, "step": 60686 }, { "epoch": 2.97, "grad_norm": 0.7761126160621643, "learning_rate": 1.1013277134680476e-07, "loss": 2.6651, "step": 60687 }, { "epoch": 2.97, "grad_norm": 0.7350229024887085, "learning_rate": 1.0971602221097054e-07, "loss": 2.9107, "step": 60688 }, { "epoch": 2.97, "grad_norm": 0.7333272695541382, "learning_rate": 1.093000629284524e-07, "loss": 2.8596, "step": 60689 }, { "epoch": 2.97, "grad_norm": 0.8076415061950684, "learning_rate": 1.0888489350034946e-07, "loss": 2.7711, "step": 60690 }, { "epoch": 2.97, "grad_norm": 0.7704684734344482, "learning_rate": 1.0847051392769424e-07, "loss": 2.7579, "step": 60691 }, { "epoch": 2.97, "grad_norm": 0.7633429169654846, "learning_rate": 1.0805692421161915e-07, "loss": 2.75, "step": 60692 }, { "epoch": 2.97, "grad_norm": 0.7828161716461182, "learning_rate": 1.0764412435319004e-07, "loss": 2.8519, "step": 60693 }, { "epoch": 2.97, "grad_norm": 0.8346174955368042, "learning_rate": 1.0723211435353929e-07, "loss": 2.9347, "step": 60694 }, { "epoch": 2.97, "grad_norm": 0.783769965171814, "learning_rate": 1.0682089421369944e-07, "loss": 2.8745, "step": 60695 }, { "epoch": 2.97, "grad_norm": 0.7348896861076355, "learning_rate": 1.0641046393480291e-07, "loss": 2.9594, "step": 60696 }, { "epoch": 2.97, "grad_norm": 0.7758384346961975, "learning_rate": 1.060008235178822e-07, "loss": 2.971, "step": 60697 }, { "epoch": 2.97, "grad_norm": 0.7444601058959961, "learning_rate": 1.0559197296406974e-07, "loss": 2.8435, "step": 60698 }, { "epoch": 2.97, "grad_norm": 0.7654844522476196, "learning_rate": 1.0518391227439804e-07, "loss": 2.7562, "step": 60699 }, { "epoch": 2.97, "grad_norm": 0.7547435760498047, "learning_rate": 1.0477664144993292e-07, "loss": 2.9196, "step": 60700 }, { "epoch": 2.97, "grad_norm": 0.7515742778778076, "learning_rate": 1.043701604918068e-07, "loss": 2.9464, "step": 60701 }, { "epoch": 2.97, "grad_norm": 0.7705299854278564, "learning_rate": 1.0396446940101888e-07, "loss": 2.8524, "step": 60702 }, { "epoch": 2.97, "grad_norm": 0.7585739493370056, "learning_rate": 1.0355956817870159e-07, "loss": 3.0153, "step": 60703 }, { "epoch": 2.97, "grad_norm": 0.7425493001937866, "learning_rate": 1.0315545682585413e-07, "loss": 2.9251, "step": 60704 }, { "epoch": 2.98, "grad_norm": 0.7635918855667114, "learning_rate": 1.0275213534360893e-07, "loss": 2.8483, "step": 60705 }, { "epoch": 2.98, "grad_norm": 0.8116132616996765, "learning_rate": 1.0234960373299849e-07, "loss": 2.6353, "step": 60706 }, { "epoch": 2.98, "grad_norm": 0.7795093059539795, "learning_rate": 1.0194786199508865e-07, "loss": 2.9259, "step": 60707 }, { "epoch": 2.98, "grad_norm": 0.7156280279159546, "learning_rate": 1.0154691013091187e-07, "loss": 2.8244, "step": 60708 }, { "epoch": 2.98, "grad_norm": 0.7367256879806519, "learning_rate": 1.0114674814156731e-07, "loss": 2.8971, "step": 60709 }, { "epoch": 2.98, "grad_norm": 0.762520968914032, "learning_rate": 1.0074737602805416e-07, "loss": 2.7293, "step": 60710 }, { "epoch": 2.98, "grad_norm": 0.7359768748283386, "learning_rate": 1.0034879379147154e-07, "loss": 2.8846, "step": 60711 }, { "epoch": 2.98, "grad_norm": 0.7226070761680603, "learning_rate": 9.995100143285195e-08, "loss": 3.0858, "step": 60712 }, { "epoch": 2.98, "grad_norm": 0.7261221408843994, "learning_rate": 9.955399895326121e-08, "loss": 2.9844, "step": 60713 }, { "epoch": 2.98, "grad_norm": 0.7427242398262024, "learning_rate": 9.915778635369854e-08, "loss": 2.7948, "step": 60714 }, { "epoch": 2.98, "grad_norm": 0.771078884601593, "learning_rate": 9.876236363526303e-08, "loss": 2.7511, "step": 60715 }, { "epoch": 2.98, "grad_norm": 0.7642545104026794, "learning_rate": 9.836773079895389e-08, "loss": 2.8683, "step": 60716 }, { "epoch": 2.98, "grad_norm": 0.7525956034660339, "learning_rate": 9.797388784583692e-08, "loss": 2.9183, "step": 60717 }, { "epoch": 2.98, "grad_norm": 0.7317299246788025, "learning_rate": 9.758083477694468e-08, "loss": 2.7959, "step": 60718 }, { "epoch": 2.98, "grad_norm": 0.7813636064529419, "learning_rate": 9.71885715932763e-08, "loss": 2.9581, "step": 60719 }, { "epoch": 2.98, "grad_norm": 0.7140172719955444, "learning_rate": 9.679709829593096e-08, "loss": 2.9738, "step": 60720 }, { "epoch": 2.98, "grad_norm": 0.7551311254501343, "learning_rate": 9.640641488587453e-08, "loss": 2.7313, "step": 60721 }, { "epoch": 2.98, "grad_norm": 0.7514950037002563, "learning_rate": 9.601652136420612e-08, "loss": 2.9285, "step": 60722 }, { "epoch": 2.98, "grad_norm": 0.7382096648216248, "learning_rate": 9.562741773185833e-08, "loss": 3.1155, "step": 60723 }, { "epoch": 2.98, "grad_norm": 0.7395986318588257, "learning_rate": 9.523910398996359e-08, "loss": 2.9011, "step": 60724 }, { "epoch": 2.98, "grad_norm": 0.742736279964447, "learning_rate": 9.485158013945449e-08, "loss": 2.9065, "step": 60725 }, { "epoch": 2.98, "grad_norm": 0.7521002888679504, "learning_rate": 9.446484618139682e-08, "loss": 2.7803, "step": 60726 }, { "epoch": 2.98, "grad_norm": 0.7703086733818054, "learning_rate": 9.407890211682312e-08, "loss": 2.8006, "step": 60727 }, { "epoch": 2.98, "grad_norm": 0.7542996406555176, "learning_rate": 9.369374794669926e-08, "loss": 2.8211, "step": 60728 }, { "epoch": 2.98, "grad_norm": 0.7727019190788269, "learning_rate": 9.330938367209107e-08, "loss": 2.8717, "step": 60729 }, { "epoch": 2.98, "grad_norm": 0.7284082174301147, "learning_rate": 9.292580929396443e-08, "loss": 2.7599, "step": 60730 }, { "epoch": 2.98, "grad_norm": 0.7392890453338623, "learning_rate": 9.254302481338516e-08, "loss": 2.9226, "step": 60731 }, { "epoch": 2.98, "grad_norm": 0.7863142490386963, "learning_rate": 9.216103023128584e-08, "loss": 2.753, "step": 60732 }, { "epoch": 2.98, "grad_norm": 0.7285134196281433, "learning_rate": 9.177982554873231e-08, "loss": 3.173, "step": 60733 }, { "epoch": 2.98, "grad_norm": 0.7787915468215942, "learning_rate": 9.139941076672374e-08, "loss": 2.8017, "step": 60734 }, { "epoch": 2.98, "grad_norm": 0.77415531873703, "learning_rate": 9.101978588625935e-08, "loss": 2.6794, "step": 60735 }, { "epoch": 2.98, "grad_norm": 0.7192097902297974, "learning_rate": 9.064095090830504e-08, "loss": 2.8903, "step": 60736 }, { "epoch": 2.98, "grad_norm": 0.755970299243927, "learning_rate": 9.026290583389329e-08, "loss": 3.218, "step": 60737 }, { "epoch": 2.98, "grad_norm": 0.7275537848472595, "learning_rate": 8.988565066402332e-08, "loss": 2.9964, "step": 60738 }, { "epoch": 2.98, "grad_norm": 0.8885321617126465, "learning_rate": 8.950918539966102e-08, "loss": 2.9317, "step": 60739 }, { "epoch": 2.98, "grad_norm": 0.7572287321090698, "learning_rate": 8.913351004180558e-08, "loss": 3.1145, "step": 60740 }, { "epoch": 2.98, "grad_norm": 0.7967362999916077, "learning_rate": 8.875862459145622e-08, "loss": 2.6688, "step": 60741 }, { "epoch": 2.98, "grad_norm": 0.7556520700454712, "learning_rate": 8.838452904961213e-08, "loss": 2.9846, "step": 60742 }, { "epoch": 2.98, "grad_norm": 0.6918498873710632, "learning_rate": 8.801122341723921e-08, "loss": 2.8424, "step": 60743 }, { "epoch": 2.98, "grad_norm": 0.7346342206001282, "learning_rate": 8.763870769533666e-08, "loss": 2.6878, "step": 60744 }, { "epoch": 2.98, "grad_norm": 0.7678686380386353, "learning_rate": 8.726698188483705e-08, "loss": 2.9052, "step": 60745 }, { "epoch": 2.98, "grad_norm": 0.7246375679969788, "learning_rate": 8.689604598680621e-08, "loss": 2.956, "step": 60746 }, { "epoch": 2.98, "grad_norm": 0.7658019065856934, "learning_rate": 8.652590000214343e-08, "loss": 2.8864, "step": 60747 }, { "epoch": 2.98, "grad_norm": 0.7121071219444275, "learning_rate": 8.61565439318479e-08, "loss": 2.8179, "step": 60748 }, { "epoch": 2.98, "grad_norm": 0.767242431640625, "learning_rate": 8.578797777688551e-08, "loss": 2.8497, "step": 60749 }, { "epoch": 2.98, "grad_norm": 0.7547940611839294, "learning_rate": 8.542020153828877e-08, "loss": 2.9006, "step": 60750 }, { "epoch": 2.98, "grad_norm": 0.7395439743995667, "learning_rate": 8.505321521692366e-08, "loss": 3.0071, "step": 60751 }, { "epoch": 2.98, "grad_norm": 0.7863125801086426, "learning_rate": 8.468701881382267e-08, "loss": 3.1082, "step": 60752 }, { "epoch": 2.98, "grad_norm": 0.7661166191101074, "learning_rate": 8.432161232995172e-08, "loss": 2.8509, "step": 60753 }, { "epoch": 2.98, "grad_norm": 0.7914922833442688, "learning_rate": 8.395699576624338e-08, "loss": 2.7245, "step": 60754 }, { "epoch": 2.98, "grad_norm": 0.7542526125907898, "learning_rate": 8.359316912366355e-08, "loss": 2.7699, "step": 60755 }, { "epoch": 2.98, "grad_norm": 0.7157665491104126, "learning_rate": 8.323013240321141e-08, "loss": 2.9214, "step": 60756 }, { "epoch": 2.98, "grad_norm": 0.7522284388542175, "learning_rate": 8.286788560578628e-08, "loss": 2.7608, "step": 60757 }, { "epoch": 2.98, "grad_norm": 0.7462372779846191, "learning_rate": 8.250642873235402e-08, "loss": 2.8285, "step": 60758 }, { "epoch": 2.98, "grad_norm": 0.7638521790504456, "learning_rate": 8.214576178391386e-08, "loss": 3.0214, "step": 60759 }, { "epoch": 2.98, "grad_norm": 0.7532297968864441, "learning_rate": 8.178588476136505e-08, "loss": 2.8109, "step": 60760 }, { "epoch": 2.98, "grad_norm": 0.7298749089241028, "learning_rate": 8.14267976656735e-08, "loss": 2.7234, "step": 60761 }, { "epoch": 2.98, "grad_norm": 0.7774704694747925, "learning_rate": 8.10685004977718e-08, "loss": 2.7798, "step": 60762 }, { "epoch": 2.98, "grad_norm": 0.756490170955658, "learning_rate": 8.071099325862584e-08, "loss": 2.8804, "step": 60763 }, { "epoch": 2.98, "grad_norm": 0.7454630732536316, "learning_rate": 8.035427594916822e-08, "loss": 2.839, "step": 60764 }, { "epoch": 2.98, "grad_norm": 0.7813576459884644, "learning_rate": 7.99983485703315e-08, "loss": 2.8881, "step": 60765 }, { "epoch": 2.98, "grad_norm": 0.7779887318611145, "learning_rate": 7.964321112304828e-08, "loss": 2.9005, "step": 60766 }, { "epoch": 2.98, "grad_norm": 0.7656338214874268, "learning_rate": 7.928886360828446e-08, "loss": 3.1341, "step": 60767 }, { "epoch": 2.98, "grad_norm": 0.7824438810348511, "learning_rate": 7.893530602693931e-08, "loss": 2.8625, "step": 60768 }, { "epoch": 2.98, "grad_norm": 0.779429018497467, "learning_rate": 7.858253837997874e-08, "loss": 2.8873, "step": 60769 }, { "epoch": 2.98, "grad_norm": 0.7629287242889404, "learning_rate": 7.823056066830202e-08, "loss": 2.9589, "step": 60770 }, { "epoch": 2.98, "grad_norm": 0.7588117718696594, "learning_rate": 7.787937289284174e-08, "loss": 2.9203, "step": 60771 }, { "epoch": 2.98, "grad_norm": 0.7689938545227051, "learning_rate": 7.752897505449718e-08, "loss": 2.9673, "step": 60772 }, { "epoch": 2.98, "grad_norm": 0.7638649344444275, "learning_rate": 7.717936715426753e-08, "loss": 2.893, "step": 60773 }, { "epoch": 2.98, "grad_norm": 0.7803759574890137, "learning_rate": 7.683054919298549e-08, "loss": 2.8572, "step": 60774 }, { "epoch": 2.98, "grad_norm": 0.7284832000732422, "learning_rate": 7.648252117165021e-08, "loss": 2.7997, "step": 60775 }, { "epoch": 2.98, "grad_norm": 0.7143325209617615, "learning_rate": 7.61352830910944e-08, "loss": 2.8185, "step": 60776 }, { "epoch": 2.98, "grad_norm": 0.8019963502883911, "learning_rate": 7.578883495231725e-08, "loss": 2.6413, "step": 60777 }, { "epoch": 2.98, "grad_norm": 0.7633552551269531, "learning_rate": 7.544317675615142e-08, "loss": 2.8431, "step": 60778 }, { "epoch": 2.98, "grad_norm": 0.7556493878364563, "learning_rate": 7.50983085035961e-08, "loss": 2.885, "step": 60779 }, { "epoch": 2.98, "grad_norm": 0.7214013338088989, "learning_rate": 7.475423019548399e-08, "loss": 2.959, "step": 60780 }, { "epoch": 2.98, "grad_norm": 0.7588320970535278, "learning_rate": 7.441094183271435e-08, "loss": 2.9404, "step": 60781 }, { "epoch": 2.98, "grad_norm": 0.7851195335388184, "learning_rate": 7.406844341628638e-08, "loss": 2.9118, "step": 60782 }, { "epoch": 2.98, "grad_norm": 0.7186158299446106, "learning_rate": 7.372673494699943e-08, "loss": 2.8767, "step": 60783 }, { "epoch": 2.98, "grad_norm": 0.7348068952560425, "learning_rate": 7.338581642578611e-08, "loss": 2.8697, "step": 60784 }, { "epoch": 2.98, "grad_norm": 0.7313429713249207, "learning_rate": 7.304568785357901e-08, "loss": 2.8737, "step": 60785 }, { "epoch": 2.98, "grad_norm": 0.7902020812034607, "learning_rate": 7.270634923124408e-08, "loss": 2.9459, "step": 60786 }, { "epoch": 2.98, "grad_norm": 0.7413740158081055, "learning_rate": 7.236780055968061e-08, "loss": 2.8579, "step": 60787 }, { "epoch": 2.98, "grad_norm": 0.7744763493537903, "learning_rate": 7.203004183975459e-08, "loss": 2.8673, "step": 60788 }, { "epoch": 2.98, "grad_norm": 0.7383904457092285, "learning_rate": 7.169307307239857e-08, "loss": 2.976, "step": 60789 }, { "epoch": 2.98, "grad_norm": 0.7870457768440247, "learning_rate": 7.135689425847857e-08, "loss": 2.8683, "step": 60790 }, { "epoch": 2.98, "grad_norm": 0.7270851135253906, "learning_rate": 7.102150539889384e-08, "loss": 2.9019, "step": 60791 }, { "epoch": 2.98, "grad_norm": 0.7581509947776794, "learning_rate": 7.068690649451036e-08, "loss": 2.8886, "step": 60792 }, { "epoch": 2.98, "grad_norm": 0.7382477521896362, "learning_rate": 7.035309754619411e-08, "loss": 2.9169, "step": 60793 }, { "epoch": 2.98, "grad_norm": 0.7643795013427734, "learning_rate": 7.002007855487768e-08, "loss": 2.9914, "step": 60794 }, { "epoch": 2.98, "grad_norm": 0.7508678436279297, "learning_rate": 6.968784952136042e-08, "loss": 2.7102, "step": 60795 }, { "epoch": 2.98, "grad_norm": 0.7405849099159241, "learning_rate": 6.935641044660823e-08, "loss": 2.7978, "step": 60796 }, { "epoch": 2.98, "grad_norm": 0.7330188155174255, "learning_rate": 6.902576133142047e-08, "loss": 2.8835, "step": 60797 }, { "epoch": 2.98, "grad_norm": 0.7529171109199524, "learning_rate": 6.86959021766964e-08, "loss": 2.7461, "step": 60798 }, { "epoch": 2.98, "grad_norm": 0.7461307048797607, "learning_rate": 6.836683298333534e-08, "loss": 3.0741, "step": 60799 }, { "epoch": 2.98, "grad_norm": 0.755508542060852, "learning_rate": 6.803855375213663e-08, "loss": 2.6922, "step": 60800 }, { "epoch": 2.98, "grad_norm": 0.8063923120498657, "learning_rate": 6.771106448399955e-08, "loss": 2.7744, "step": 60801 }, { "epoch": 2.98, "grad_norm": 0.7900208234786987, "learning_rate": 6.738436517982338e-08, "loss": 2.8385, "step": 60802 }, { "epoch": 2.98, "grad_norm": 0.7695721387863159, "learning_rate": 6.705845584040748e-08, "loss": 2.9264, "step": 60803 }, { "epoch": 2.98, "grad_norm": 0.7548896670341492, "learning_rate": 6.673333646665113e-08, "loss": 2.9377, "step": 60804 }, { "epoch": 2.98, "grad_norm": 0.7459438443183899, "learning_rate": 6.640900705938701e-08, "loss": 2.9739, "step": 60805 }, { "epoch": 2.98, "grad_norm": 0.7769594788551331, "learning_rate": 6.608546761948108e-08, "loss": 2.9764, "step": 60806 }, { "epoch": 2.98, "grad_norm": 0.7430137395858765, "learning_rate": 6.576271814776601e-08, "loss": 2.8683, "step": 60807 }, { "epoch": 2.98, "grad_norm": 0.7870651483535767, "learning_rate": 6.544075864514109e-08, "loss": 2.7308, "step": 60808 }, { "epoch": 2.98, "grad_norm": 0.7871001958847046, "learning_rate": 6.511958911240567e-08, "loss": 2.9413, "step": 60809 }, { "epoch": 2.98, "grad_norm": 0.7236528992652893, "learning_rate": 6.479920955042573e-08, "loss": 2.7938, "step": 60810 }, { "epoch": 2.98, "grad_norm": 0.8005480170249939, "learning_rate": 6.447961996003392e-08, "loss": 2.7432, "step": 60811 }, { "epoch": 2.98, "grad_norm": 0.7919958233833313, "learning_rate": 6.416082034206294e-08, "loss": 2.8455, "step": 60812 }, { "epoch": 2.98, "grad_norm": 0.8858550190925598, "learning_rate": 6.384281069737873e-08, "loss": 2.9674, "step": 60813 }, { "epoch": 2.98, "grad_norm": 0.7702856659889221, "learning_rate": 6.352559102681399e-08, "loss": 2.6873, "step": 60814 }, { "epoch": 2.98, "grad_norm": 0.7305554747581482, "learning_rate": 6.320916133120135e-08, "loss": 2.8038, "step": 60815 }, { "epoch": 2.98, "grad_norm": 0.818423330783844, "learning_rate": 6.289352161137351e-08, "loss": 2.7824, "step": 60816 }, { "epoch": 2.98, "grad_norm": 0.7536776661872864, "learning_rate": 6.257867186816312e-08, "loss": 2.9083, "step": 60817 }, { "epoch": 2.98, "grad_norm": 0.7545386552810669, "learning_rate": 6.226461210240286e-08, "loss": 3.1492, "step": 60818 }, { "epoch": 2.98, "grad_norm": 0.7835391163825989, "learning_rate": 6.195134231489207e-08, "loss": 3.1372, "step": 60819 }, { "epoch": 2.98, "grad_norm": 0.7253636717796326, "learning_rate": 6.163886250649675e-08, "loss": 3.0669, "step": 60820 }, { "epoch": 2.98, "grad_norm": 0.7546964287757874, "learning_rate": 6.132717267801624e-08, "loss": 2.9428, "step": 60821 }, { "epoch": 2.98, "grad_norm": 0.7309494614601135, "learning_rate": 6.10162728302499e-08, "loss": 2.8188, "step": 60822 }, { "epoch": 2.98, "grad_norm": 0.7510556578636169, "learning_rate": 6.070616296406372e-08, "loss": 2.7979, "step": 60823 }, { "epoch": 2.98, "grad_norm": 0.7674130201339722, "learning_rate": 6.039684308025705e-08, "loss": 2.7607, "step": 60824 }, { "epoch": 2.98, "grad_norm": 0.7727053761482239, "learning_rate": 6.008831317962926e-08, "loss": 2.949, "step": 60825 }, { "epoch": 2.98, "grad_norm": 0.7896957993507385, "learning_rate": 5.9780573263013e-08, "loss": 2.7591, "step": 60826 }, { "epoch": 2.98, "grad_norm": 0.7202666997909546, "learning_rate": 5.947362333120764e-08, "loss": 2.9216, "step": 60827 }, { "epoch": 2.98, "grad_norm": 0.7396377921104431, "learning_rate": 5.916746338501255e-08, "loss": 2.9348, "step": 60828 }, { "epoch": 2.98, "grad_norm": 0.8150199055671692, "learning_rate": 5.886209342526038e-08, "loss": 2.7997, "step": 60829 }, { "epoch": 2.98, "grad_norm": 0.7385109066963196, "learning_rate": 5.8557513452750506e-08, "loss": 2.8084, "step": 60830 }, { "epoch": 2.98, "grad_norm": 0.739365816116333, "learning_rate": 5.8253723468248966e-08, "loss": 2.8535, "step": 60831 }, { "epoch": 2.98, "grad_norm": 0.7645565271377563, "learning_rate": 5.795072347258844e-08, "loss": 2.9378, "step": 60832 }, { "epoch": 2.98, "grad_norm": 0.7741032838821411, "learning_rate": 5.764851346656829e-08, "loss": 2.9119, "step": 60833 }, { "epoch": 2.98, "grad_norm": 0.7111135125160217, "learning_rate": 5.734709345098787e-08, "loss": 2.8008, "step": 60834 }, { "epoch": 2.98, "grad_norm": 0.7414789199829102, "learning_rate": 5.704646342661323e-08, "loss": 2.8277, "step": 60835 }, { "epoch": 2.98, "grad_norm": 0.7525594830513, "learning_rate": 5.674662339427705e-08, "loss": 2.9346, "step": 60836 }, { "epoch": 2.98, "grad_norm": 0.704296886920929, "learning_rate": 5.644757335471206e-08, "loss": 2.8076, "step": 60837 }, { "epoch": 2.98, "grad_norm": 0.793901264667511, "learning_rate": 5.614931330875094e-08, "loss": 2.8382, "step": 60838 }, { "epoch": 2.98, "grad_norm": 0.7604386210441589, "learning_rate": 5.585184325715975e-08, "loss": 2.8852, "step": 60839 }, { "epoch": 2.98, "grad_norm": 0.7613590955734253, "learning_rate": 5.555516320073783e-08, "loss": 2.7332, "step": 60840 }, { "epoch": 2.98, "grad_norm": 0.7492443323135376, "learning_rate": 5.5259273140284554e-08, "loss": 2.9632, "step": 60841 }, { "epoch": 2.98, "grad_norm": 0.7389005422592163, "learning_rate": 5.4964173076499364e-08, "loss": 2.99, "step": 60842 }, { "epoch": 2.98, "grad_norm": 0.7318647503852844, "learning_rate": 5.466986301024823e-08, "loss": 2.8089, "step": 60843 }, { "epoch": 2.98, "grad_norm": 0.7469201683998108, "learning_rate": 5.4376342942263895e-08, "loss": 2.7664, "step": 60844 }, { "epoch": 2.98, "grad_norm": 0.7827994227409363, "learning_rate": 5.4083612873345726e-08, "loss": 2.7729, "step": 60845 }, { "epoch": 2.98, "grad_norm": 0.7702500224113464, "learning_rate": 5.3791672804226474e-08, "loss": 2.8159, "step": 60846 }, { "epoch": 2.98, "grad_norm": 0.7574726343154907, "learning_rate": 5.350052273567218e-08, "loss": 2.8351, "step": 60847 }, { "epoch": 2.98, "grad_norm": 0.7728380560874939, "learning_rate": 5.321016266851552e-08, "loss": 3.0487, "step": 60848 }, { "epoch": 2.98, "grad_norm": 0.7415326833724976, "learning_rate": 5.2920592603455934e-08, "loss": 2.5731, "step": 60849 }, { "epoch": 2.98, "grad_norm": 0.7318070530891418, "learning_rate": 5.263181254125948e-08, "loss": 2.7188, "step": 60850 }, { "epoch": 2.98, "grad_norm": 0.8076432943344116, "learning_rate": 5.2343822482725504e-08, "loss": 2.7616, "step": 60851 }, { "epoch": 2.98, "grad_norm": 0.7778842449188232, "learning_rate": 5.205662242858677e-08, "loss": 2.9999, "step": 60852 }, { "epoch": 2.98, "grad_norm": 0.7371053695678711, "learning_rate": 5.1770212379609324e-08, "loss": 2.8389, "step": 60853 }, { "epoch": 2.98, "grad_norm": 0.732855498790741, "learning_rate": 5.148459233652591e-08, "loss": 2.8063, "step": 60854 }, { "epoch": 2.98, "grad_norm": 0.7521831393241882, "learning_rate": 5.119976230013589e-08, "loss": 2.8495, "step": 60855 }, { "epoch": 2.98, "grad_norm": 0.7384352087974548, "learning_rate": 5.0915722271138715e-08, "loss": 2.8919, "step": 60856 }, { "epoch": 2.98, "grad_norm": 0.762461245059967, "learning_rate": 5.0632472250300424e-08, "loss": 2.8156, "step": 60857 }, { "epoch": 2.98, "grad_norm": 0.774097740650177, "learning_rate": 5.035001223838708e-08, "loss": 2.8582, "step": 60858 }, { "epoch": 2.98, "grad_norm": 0.758370578289032, "learning_rate": 5.0068342236098124e-08, "loss": 2.7938, "step": 60859 }, { "epoch": 2.98, "grad_norm": 0.7590804100036621, "learning_rate": 4.9787462244232914e-08, "loss": 2.8904, "step": 60860 }, { "epoch": 2.98, "grad_norm": 0.7966086268424988, "learning_rate": 4.950737226349089e-08, "loss": 3.0966, "step": 60861 }, { "epoch": 2.98, "grad_norm": 0.7693399786949158, "learning_rate": 4.9228072294604795e-08, "loss": 2.9406, "step": 60862 }, { "epoch": 2.98, "grad_norm": 0.7839014530181885, "learning_rate": 4.8949562338340684e-08, "loss": 2.771, "step": 60863 }, { "epoch": 2.98, "grad_norm": 0.7283673882484436, "learning_rate": 4.867184239543131e-08, "loss": 2.9914, "step": 60864 }, { "epoch": 2.98, "grad_norm": 0.7859243750572205, "learning_rate": 4.839491246657612e-08, "loss": 3.0732, "step": 60865 }, { "epoch": 2.98, "grad_norm": 0.7411079406738281, "learning_rate": 4.811877255250785e-08, "loss": 3.0077, "step": 60866 }, { "epoch": 2.98, "grad_norm": 0.7194960117340088, "learning_rate": 4.7843422653992546e-08, "loss": 2.9794, "step": 60867 }, { "epoch": 2.98, "grad_norm": 0.7908714413642883, "learning_rate": 4.756886277169636e-08, "loss": 2.8156, "step": 60868 }, { "epoch": 2.98, "grad_norm": 0.7900540828704834, "learning_rate": 4.729509290641864e-08, "loss": 2.8043, "step": 60869 }, { "epoch": 2.98, "grad_norm": 0.8169203400611877, "learning_rate": 4.702211305882553e-08, "loss": 2.8463, "step": 60870 }, { "epoch": 2.98, "grad_norm": 0.7235054969787598, "learning_rate": 4.674992322961646e-08, "loss": 2.7804, "step": 60871 }, { "epoch": 2.98, "grad_norm": 0.7315838932991028, "learning_rate": 4.64785234195908e-08, "loss": 2.9179, "step": 60872 }, { "epoch": 2.98, "grad_norm": 0.7634678483009338, "learning_rate": 4.620791362938137e-08, "loss": 2.7995, "step": 60873 }, { "epoch": 2.98, "grad_norm": 0.7515854835510254, "learning_rate": 4.593809385975422e-08, "loss": 2.7075, "step": 60874 }, { "epoch": 2.98, "grad_norm": 0.746206521987915, "learning_rate": 4.56690641113755e-08, "loss": 2.7542, "step": 60875 }, { "epoch": 2.98, "grad_norm": 0.7026474475860596, "learning_rate": 4.540082438497794e-08, "loss": 2.9067, "step": 60876 }, { "epoch": 2.98, "grad_norm": 0.7854546308517456, "learning_rate": 4.51333746812943e-08, "loss": 2.8615, "step": 60877 }, { "epoch": 2.98, "grad_norm": 0.8138797879219055, "learning_rate": 4.4866715000990703e-08, "loss": 2.6686, "step": 60878 }, { "epoch": 2.98, "grad_norm": 0.7269437313079834, "learning_rate": 4.46008453447666e-08, "loss": 2.9791, "step": 60879 }, { "epoch": 2.98, "grad_norm": 0.7314205765724182, "learning_rate": 4.4335765713321427e-08, "loss": 3.0126, "step": 60880 }, { "epoch": 2.98, "grad_norm": 0.7562000155448914, "learning_rate": 4.407147610742123e-08, "loss": 2.915, "step": 60881 }, { "epoch": 2.98, "grad_norm": 0.7194030284881592, "learning_rate": 4.3807976527665544e-08, "loss": 2.7785, "step": 60882 }, { "epoch": 2.98, "grad_norm": 0.7603053450584412, "learning_rate": 4.3545266974820417e-08, "loss": 2.7567, "step": 60883 }, { "epoch": 2.98, "grad_norm": 0.7493316531181335, "learning_rate": 4.328334744951867e-08, "loss": 3.0762, "step": 60884 }, { "epoch": 2.98, "grad_norm": 0.7748812437057495, "learning_rate": 4.3022217952493055e-08, "loss": 2.9854, "step": 60885 }, { "epoch": 2.98, "grad_norm": 0.7610214352607727, "learning_rate": 4.2761878484443014e-08, "loss": 2.941, "step": 60886 }, { "epoch": 2.98, "grad_norm": 0.7458277940750122, "learning_rate": 4.250232904600137e-08, "loss": 2.921, "step": 60887 }, { "epoch": 2.98, "grad_norm": 0.7483231425285339, "learning_rate": 4.224356963786757e-08, "loss": 2.8299, "step": 60888 }, { "epoch": 2.98, "grad_norm": 0.738000214099884, "learning_rate": 4.1985600260774354e-08, "loss": 3.1012, "step": 60889 }, { "epoch": 2.98, "grad_norm": 0.7348437309265137, "learning_rate": 4.1728420915354555e-08, "loss": 2.8678, "step": 60890 }, { "epoch": 2.98, "grad_norm": 0.7560616135597229, "learning_rate": 4.14720316022743e-08, "loss": 3.0925, "step": 60891 }, { "epoch": 2.98, "grad_norm": 0.7360788583755493, "learning_rate": 4.121643232223304e-08, "loss": 2.85, "step": 60892 }, { "epoch": 2.98, "grad_norm": 0.7723987698554993, "learning_rate": 4.0961623075930206e-08, "loss": 2.9986, "step": 60893 }, { "epoch": 2.98, "grad_norm": 0.7306909561157227, "learning_rate": 4.070760386396532e-08, "loss": 2.8415, "step": 60894 }, { "epoch": 2.98, "grad_norm": 0.7794232368469238, "learning_rate": 4.045437468707113e-08, "loss": 2.8476, "step": 60895 }, { "epoch": 2.98, "grad_norm": 0.7389131784439087, "learning_rate": 4.020193554588047e-08, "loss": 2.9256, "step": 60896 }, { "epoch": 2.98, "grad_norm": 0.7388023734092712, "learning_rate": 3.9950286441092773e-08, "loss": 3.1202, "step": 60897 }, { "epoch": 2.98, "grad_norm": 0.7390792369842529, "learning_rate": 3.9699427373307554e-08, "loss": 2.8996, "step": 60898 }, { "epoch": 2.98, "grad_norm": 0.7606891989707947, "learning_rate": 3.9449358343257574e-08, "loss": 3.14, "step": 60899 }, { "epoch": 2.98, "grad_norm": 0.7230342626571655, "learning_rate": 3.920007935157565e-08, "loss": 2.8227, "step": 60900 }, { "epoch": 2.98, "grad_norm": 0.7700236439704895, "learning_rate": 3.8951590398894614e-08, "loss": 2.8222, "step": 60901 }, { "epoch": 2.98, "grad_norm": 0.7606983780860901, "learning_rate": 3.87038914858806e-08, "loss": 2.849, "step": 60902 }, { "epoch": 2.98, "grad_norm": 0.7816833853721619, "learning_rate": 3.845698261319974e-08, "loss": 2.6993, "step": 60903 }, { "epoch": 2.98, "grad_norm": 0.768175482749939, "learning_rate": 3.8210863781484856e-08, "loss": 2.8003, "step": 60904 }, { "epoch": 2.98, "grad_norm": 0.7766599655151367, "learning_rate": 3.796553499140209e-08, "loss": 3.0448, "step": 60905 }, { "epoch": 2.98, "grad_norm": 0.7720254063606262, "learning_rate": 3.7720996243584265e-08, "loss": 2.8401, "step": 60906 }, { "epoch": 2.98, "grad_norm": 0.7150791883468628, "learning_rate": 3.747724753869752e-08, "loss": 2.7285, "step": 60907 }, { "epoch": 2.98, "grad_norm": 0.7499209046363831, "learning_rate": 3.723428887734137e-08, "loss": 2.8719, "step": 60908 }, { "epoch": 2.99, "grad_norm": 0.7711845636367798, "learning_rate": 3.6992120260215254e-08, "loss": 2.8742, "step": 60909 }, { "epoch": 2.99, "grad_norm": 0.7651515007019043, "learning_rate": 3.6750741687885385e-08, "loss": 2.8158, "step": 60910 }, { "epoch": 2.99, "grad_norm": 0.7973775267601013, "learning_rate": 3.651015316105121e-08, "loss": 3.1713, "step": 60911 }, { "epoch": 2.99, "grad_norm": 0.8048893213272095, "learning_rate": 3.627035468031225e-08, "loss": 3.031, "step": 60912 }, { "epoch": 2.99, "grad_norm": 0.7640774250030518, "learning_rate": 3.603134624630133e-08, "loss": 2.8489, "step": 60913 }, { "epoch": 2.99, "grad_norm": 0.7189072370529175, "learning_rate": 3.579312785965127e-08, "loss": 2.9895, "step": 60914 }, { "epoch": 2.99, "grad_norm": 0.7827427387237549, "learning_rate": 3.5555699521028214e-08, "loss": 3.075, "step": 60915 }, { "epoch": 2.99, "grad_norm": 0.7645692229270935, "learning_rate": 3.531906123099837e-08, "loss": 2.8066, "step": 60916 }, { "epoch": 2.99, "grad_norm": 0.7669183611869812, "learning_rate": 3.5083212990194564e-08, "loss": 2.787, "step": 60917 }, { "epoch": 2.99, "grad_norm": 0.7498992085456848, "learning_rate": 3.4848154799249627e-08, "loss": 2.9104, "step": 60918 }, { "epoch": 2.99, "grad_norm": 0.761599600315094, "learning_rate": 3.4613886658796383e-08, "loss": 3.0023, "step": 60919 }, { "epoch": 2.99, "grad_norm": 0.8176621794700623, "learning_rate": 3.438040856946767e-08, "loss": 2.9239, "step": 60920 }, { "epoch": 2.99, "grad_norm": 0.7003955245018005, "learning_rate": 3.414772053182968e-08, "loss": 2.9555, "step": 60921 }, { "epoch": 2.99, "grad_norm": 0.7190863490104675, "learning_rate": 3.391582254651526e-08, "loss": 2.682, "step": 60922 }, { "epoch": 2.99, "grad_norm": 0.7530226707458496, "learning_rate": 3.368471461412392e-08, "loss": 2.7699, "step": 60923 }, { "epoch": 2.99, "grad_norm": 0.7347545027732849, "learning_rate": 3.345439673528849e-08, "loss": 2.9471, "step": 60924 }, { "epoch": 2.99, "grad_norm": 0.7659326791763306, "learning_rate": 3.322486891060849e-08, "loss": 2.7471, "step": 60925 }, { "epoch": 2.99, "grad_norm": 0.7832285165786743, "learning_rate": 3.299613114068345e-08, "loss": 2.9932, "step": 60926 }, { "epoch": 2.99, "grad_norm": 0.7939525842666626, "learning_rate": 3.276818342611287e-08, "loss": 2.9293, "step": 60927 }, { "epoch": 2.99, "grad_norm": 0.7634779810905457, "learning_rate": 3.254102576749629e-08, "loss": 2.8537, "step": 60928 }, { "epoch": 2.99, "grad_norm": 0.7426297664642334, "learning_rate": 3.231465816546652e-08, "loss": 2.8445, "step": 60929 }, { "epoch": 2.99, "grad_norm": 0.7596210837364197, "learning_rate": 3.2089080620556483e-08, "loss": 2.7078, "step": 60930 }, { "epoch": 2.99, "grad_norm": 0.743432879447937, "learning_rate": 3.1864293133399e-08, "loss": 2.8809, "step": 60931 }, { "epoch": 2.99, "grad_norm": 0.7715290784835815, "learning_rate": 3.1640295704593586e-08, "loss": 3.0125, "step": 60932 }, { "epoch": 2.99, "grad_norm": 0.7802637815475464, "learning_rate": 3.141708833470646e-08, "loss": 2.9165, "step": 60933 }, { "epoch": 2.99, "grad_norm": 0.8331025242805481, "learning_rate": 3.119467102433715e-08, "loss": 2.8806, "step": 60934 }, { "epoch": 2.99, "grad_norm": 0.7259206771850586, "learning_rate": 3.097304377408516e-08, "loss": 2.8832, "step": 60935 }, { "epoch": 2.99, "grad_norm": 0.7143881916999817, "learning_rate": 3.0752206584516714e-08, "loss": 2.9913, "step": 60936 }, { "epoch": 2.99, "grad_norm": 0.7747646570205688, "learning_rate": 3.053215945619802e-08, "loss": 2.8695, "step": 60937 }, { "epoch": 2.99, "grad_norm": 0.7718039155006409, "learning_rate": 3.031290238972861e-08, "loss": 3.0108, "step": 60938 }, { "epoch": 2.99, "grad_norm": 0.7405833601951599, "learning_rate": 3.0094435385707994e-08, "loss": 2.7529, "step": 60939 }, { "epoch": 2.99, "grad_norm": 0.7925773859024048, "learning_rate": 2.987675844466908e-08, "loss": 2.6726, "step": 60940 }, { "epoch": 2.99, "grad_norm": 0.7442820072174072, "learning_rate": 2.9659871567211392e-08, "loss": 2.8788, "step": 60941 }, { "epoch": 2.99, "grad_norm": 0.7715669870376587, "learning_rate": 2.9443774753901138e-08, "loss": 2.6613, "step": 60942 }, { "epoch": 2.99, "grad_norm": 0.7934419512748718, "learning_rate": 2.922846800530454e-08, "loss": 2.6129, "step": 60943 }, { "epoch": 2.99, "grad_norm": 0.7691748738288879, "learning_rate": 2.9013951321987804e-08, "loss": 2.9847, "step": 60944 }, { "epoch": 2.99, "grad_norm": 0.8082806468009949, "learning_rate": 2.880022470451715e-08, "loss": 2.9681, "step": 60945 }, { "epoch": 2.99, "grad_norm": 0.7494584918022156, "learning_rate": 2.8587288153458788e-08, "loss": 2.8827, "step": 60946 }, { "epoch": 2.99, "grad_norm": 0.7465475797653198, "learning_rate": 2.8375141669345624e-08, "loss": 2.9779, "step": 60947 }, { "epoch": 2.99, "grad_norm": 0.7119285464286804, "learning_rate": 2.816378525281049e-08, "loss": 2.8423, "step": 60948 }, { "epoch": 2.99, "grad_norm": 0.7398651838302612, "learning_rate": 2.7953218904319674e-08, "loss": 2.6514, "step": 60949 }, { "epoch": 2.99, "grad_norm": 0.7157944440841675, "learning_rate": 2.7743442624506008e-08, "loss": 2.8982, "step": 60950 }, { "epoch": 2.99, "grad_norm": 0.7688831686973572, "learning_rate": 2.753445641383578e-08, "loss": 3.0429, "step": 60951 }, { "epoch": 2.99, "grad_norm": 0.7500354647636414, "learning_rate": 2.7326260272941825e-08, "loss": 3.1836, "step": 60952 }, { "epoch": 2.99, "grad_norm": 0.7020663619041443, "learning_rate": 2.7118854202357044e-08, "loss": 2.8623, "step": 60953 }, { "epoch": 2.99, "grad_norm": 0.7152649164199829, "learning_rate": 2.6912238202581038e-08, "loss": 2.8368, "step": 60954 }, { "epoch": 2.99, "grad_norm": 0.7284510731697083, "learning_rate": 2.6706412274180022e-08, "loss": 3.0018, "step": 60955 }, { "epoch": 2.99, "grad_norm": 0.7411639094352722, "learning_rate": 2.650137641772021e-08, "loss": 2.9187, "step": 60956 }, { "epoch": 2.99, "grad_norm": 0.77473384141922, "learning_rate": 2.629713063373451e-08, "loss": 3.0106, "step": 60957 }, { "epoch": 2.99, "grad_norm": 0.729745626449585, "learning_rate": 2.6093674922722518e-08, "loss": 2.9179, "step": 60958 }, { "epoch": 2.99, "grad_norm": 0.7914096117019653, "learning_rate": 2.589100928528376e-08, "loss": 2.9409, "step": 60959 }, { "epoch": 2.99, "grad_norm": 0.7265556454658508, "learning_rate": 2.5689133721884526e-08, "loss": 2.8163, "step": 60960 }, { "epoch": 2.99, "grad_norm": 0.7446621656417847, "learning_rate": 2.5488048233124337e-08, "loss": 2.7302, "step": 60961 }, { "epoch": 2.99, "grad_norm": 0.7492318153381348, "learning_rate": 2.5287752819469488e-08, "loss": 2.8235, "step": 60962 }, { "epoch": 2.99, "grad_norm": 0.8737034797668457, "learning_rate": 2.508824748148619e-08, "loss": 2.7881, "step": 60963 }, { "epoch": 2.99, "grad_norm": 0.7302938103675842, "learning_rate": 2.4889532219674047e-08, "loss": 2.8977, "step": 60964 }, { "epoch": 2.99, "grad_norm": 0.7328683137893677, "learning_rate": 2.4691607034565963e-08, "loss": 3.0039, "step": 60965 }, { "epoch": 2.99, "grad_norm": 0.8015636801719666, "learning_rate": 2.4494471926728155e-08, "loss": 2.8645, "step": 60966 }, { "epoch": 2.99, "grad_norm": 0.7779310941696167, "learning_rate": 2.4298126896593607e-08, "loss": 2.8827, "step": 60967 }, { "epoch": 2.99, "grad_norm": 0.7854167222976685, "learning_rate": 2.4102571944761838e-08, "loss": 3.0293, "step": 60968 }, { "epoch": 2.99, "grad_norm": 0.7807804346084595, "learning_rate": 2.3907807071699148e-08, "loss": 2.6994, "step": 60969 }, { "epoch": 2.99, "grad_norm": 0.7262849807739258, "learning_rate": 2.371383227793844e-08, "loss": 3.0185, "step": 60970 }, { "epoch": 2.99, "grad_norm": 0.7822308540344238, "learning_rate": 2.352064756397931e-08, "loss": 2.7, "step": 60971 }, { "epoch": 2.99, "grad_norm": 0.7708417773246765, "learning_rate": 2.3328252930321366e-08, "loss": 3.0354, "step": 60972 }, { "epoch": 2.99, "grad_norm": 0.7239246964454651, "learning_rate": 2.3136648377497512e-08, "loss": 2.9021, "step": 60973 }, { "epoch": 2.99, "grad_norm": 0.7216629981994629, "learning_rate": 2.294583390600735e-08, "loss": 2.932, "step": 60974 }, { "epoch": 2.99, "grad_norm": 0.7417978048324585, "learning_rate": 2.2755809516350475e-08, "loss": 3.0051, "step": 60975 }, { "epoch": 2.99, "grad_norm": 0.7490457892417908, "learning_rate": 2.2566575208993187e-08, "loss": 2.5854, "step": 60976 }, { "epoch": 2.99, "grad_norm": 0.7109230756759644, "learning_rate": 2.2378130984501695e-08, "loss": 2.6226, "step": 60977 }, { "epoch": 2.99, "grad_norm": 0.7185120582580566, "learning_rate": 2.2190476843308992e-08, "loss": 2.9134, "step": 60978 }, { "epoch": 2.99, "grad_norm": 0.7175775170326233, "learning_rate": 2.2003612785947977e-08, "loss": 2.7592, "step": 60979 }, { "epoch": 2.99, "grad_norm": 0.7656946182250977, "learning_rate": 2.181753881288495e-08, "loss": 3.0205, "step": 60980 }, { "epoch": 2.99, "grad_norm": 0.7306361198425293, "learning_rate": 2.1632254924619507e-08, "loss": 2.7981, "step": 60981 }, { "epoch": 2.99, "grad_norm": 0.8056076169013977, "learning_rate": 2.1447761121651254e-08, "loss": 3.096, "step": 60982 }, { "epoch": 2.99, "grad_norm": 0.7194265127182007, "learning_rate": 2.1264057404446476e-08, "loss": 2.9004, "step": 60983 }, { "epoch": 2.99, "grad_norm": 0.7339944243431091, "learning_rate": 2.108114377353809e-08, "loss": 2.9615, "step": 60984 }, { "epoch": 2.99, "grad_norm": 0.7202855348587036, "learning_rate": 2.089902022932577e-08, "loss": 3.0398, "step": 60985 }, { "epoch": 2.99, "grad_norm": 0.7589322328567505, "learning_rate": 2.0717686772342422e-08, "loss": 3.0222, "step": 60986 }, { "epoch": 2.99, "grad_norm": 0.7830872535705566, "learning_rate": 2.0537143403087653e-08, "loss": 2.7202, "step": 60987 }, { "epoch": 2.99, "grad_norm": 0.7454860210418701, "learning_rate": 2.0357390121961138e-08, "loss": 2.89, "step": 60988 }, { "epoch": 2.99, "grad_norm": 0.7104257941246033, "learning_rate": 2.017842692952909e-08, "loss": 2.9121, "step": 60989 }, { "epoch": 2.99, "grad_norm": 0.7989444136619568, "learning_rate": 2.0000253826157885e-08, "loss": 2.8338, "step": 60990 }, { "epoch": 2.99, "grad_norm": 0.7443826794624329, "learning_rate": 1.9822870812413738e-08, "loss": 2.7031, "step": 60991 }, { "epoch": 2.99, "grad_norm": 0.7373210191726685, "learning_rate": 1.9646277888729635e-08, "loss": 2.858, "step": 60992 }, { "epoch": 2.99, "grad_norm": 0.7563875317573547, "learning_rate": 1.9470475055538558e-08, "loss": 2.8906, "step": 60993 }, { "epoch": 2.99, "grad_norm": 0.7365525960922241, "learning_rate": 1.9295462313340116e-08, "loss": 2.7371, "step": 60994 }, { "epoch": 2.99, "grad_norm": 0.8209661841392517, "learning_rate": 1.9121239662600595e-08, "loss": 2.8336, "step": 60995 }, { "epoch": 2.99, "grad_norm": 0.7196007370948792, "learning_rate": 1.894780710371968e-08, "loss": 3.0186, "step": 60996 }, { "epoch": 2.99, "grad_norm": 0.7250151634216309, "learning_rate": 1.8775164637230278e-08, "loss": 2.6314, "step": 60997 }, { "epoch": 2.99, "grad_norm": 0.7289470434188843, "learning_rate": 1.8603312263565374e-08, "loss": 3.0658, "step": 60998 }, { "epoch": 2.99, "grad_norm": 0.7411735653877258, "learning_rate": 1.8432249983157954e-08, "loss": 2.9024, "step": 60999 }, { "epoch": 2.99, "grad_norm": 0.7378327250480652, "learning_rate": 1.8261977796441007e-08, "loss": 2.8826, "step": 61000 }, { "epoch": 2.99, "grad_norm": 0.8322823643684387, "learning_rate": 1.8092495703914134e-08, "loss": 2.8086, "step": 61001 }, { "epoch": 2.99, "grad_norm": 0.7469660043716431, "learning_rate": 1.792380370601032e-08, "loss": 2.8937, "step": 61002 }, { "epoch": 2.99, "grad_norm": 0.6969466209411621, "learning_rate": 1.7755901803129246e-08, "loss": 2.7511, "step": 61003 }, { "epoch": 2.99, "grad_norm": 0.7883122563362122, "learning_rate": 1.7588789995770515e-08, "loss": 2.7053, "step": 61004 }, { "epoch": 2.99, "grad_norm": 0.7031104564666748, "learning_rate": 1.7422468284333802e-08, "loss": 2.7567, "step": 61005 }, { "epoch": 2.99, "grad_norm": 0.7445712089538574, "learning_rate": 1.7256936669285402e-08, "loss": 2.9512, "step": 61006 }, { "epoch": 2.99, "grad_norm": 0.7441263794898987, "learning_rate": 1.709219515105831e-08, "loss": 3.0192, "step": 61007 }, { "epoch": 2.99, "grad_norm": 0.7398879528045654, "learning_rate": 1.6928243730052194e-08, "loss": 2.6434, "step": 61008 }, { "epoch": 2.99, "grad_norm": 0.8084988594055176, "learning_rate": 1.676508240676666e-08, "loss": 2.6561, "step": 61009 }, { "epoch": 2.99, "grad_norm": 0.7381051778793335, "learning_rate": 1.6602711181534778e-08, "loss": 3.1026, "step": 61010 }, { "epoch": 2.99, "grad_norm": 0.77137690782547, "learning_rate": 1.6441130054889452e-08, "loss": 2.9732, "step": 61011 }, { "epoch": 2.99, "grad_norm": 0.7231003046035767, "learning_rate": 1.6280339027163747e-08, "loss": 2.6597, "step": 61012 }, { "epoch": 2.99, "grad_norm": 0.740614116191864, "learning_rate": 1.612033809882396e-08, "loss": 2.9157, "step": 61013 }, { "epoch": 2.99, "grad_norm": 0.7222123146057129, "learning_rate": 1.5961127270336382e-08, "loss": 2.9297, "step": 61014 }, { "epoch": 2.99, "grad_norm": 0.7570497989654541, "learning_rate": 1.5802706542034083e-08, "loss": 2.7988, "step": 61015 }, { "epoch": 2.99, "grad_norm": 0.7413751482963562, "learning_rate": 1.5645075914383352e-08, "loss": 2.7553, "step": 61016 }, { "epoch": 2.99, "grad_norm": 0.715862512588501, "learning_rate": 1.5488235387783877e-08, "loss": 3.0391, "step": 61017 }, { "epoch": 2.99, "grad_norm": 0.782159686088562, "learning_rate": 1.5332184962668638e-08, "loss": 2.856, "step": 61018 }, { "epoch": 2.99, "grad_norm": 0.7552375197410583, "learning_rate": 1.517692463940401e-08, "loss": 2.7686, "step": 61019 }, { "epoch": 2.99, "grad_norm": 0.7790356874465942, "learning_rate": 1.5022454418456285e-08, "loss": 3.0508, "step": 61020 }, { "epoch": 2.99, "grad_norm": 0.7251024842262268, "learning_rate": 1.486877430022515e-08, "loss": 2.9912, "step": 61021 }, { "epoch": 2.99, "grad_norm": 0.7824087142944336, "learning_rate": 1.4715884285076974e-08, "loss": 2.7077, "step": 61022 }, { "epoch": 2.99, "grad_norm": 0.7169434428215027, "learning_rate": 1.4563784373411435e-08, "loss": 2.813, "step": 61023 }, { "epoch": 2.99, "grad_norm": 0.7796780467033386, "learning_rate": 1.441247456569483e-08, "loss": 2.8747, "step": 61024 }, { "epoch": 2.99, "grad_norm": 0.756627082824707, "learning_rate": 1.4261954862260228e-08, "loss": 2.8956, "step": 61025 }, { "epoch": 2.99, "grad_norm": 0.7208350300788879, "learning_rate": 1.4112225263507304e-08, "loss": 2.7472, "step": 61026 }, { "epoch": 2.99, "grad_norm": 0.7269884347915649, "learning_rate": 1.3963285769869049e-08, "loss": 2.8458, "step": 61027 }, { "epoch": 2.99, "grad_norm": 0.7311272621154785, "learning_rate": 1.3815136381711833e-08, "loss": 2.8329, "step": 61028 }, { "epoch": 2.99, "grad_norm": 0.7485470175743103, "learning_rate": 1.366777709943534e-08, "loss": 3.0685, "step": 61029 }, { "epoch": 2.99, "grad_norm": 0.7784717679023743, "learning_rate": 1.3521207923439247e-08, "loss": 2.9271, "step": 61030 }, { "epoch": 2.99, "grad_norm": 0.7870509624481201, "learning_rate": 1.3375428854089931e-08, "loss": 2.8426, "step": 61031 }, { "epoch": 2.99, "grad_norm": 0.7719137072563171, "learning_rate": 1.3230439891753763e-08, "loss": 2.9635, "step": 61032 }, { "epoch": 2.99, "grad_norm": 0.7624124884605408, "learning_rate": 1.3086241036863732e-08, "loss": 3.0412, "step": 61033 }, { "epoch": 2.99, "grad_norm": 0.7434390187263489, "learning_rate": 1.2942832289752903e-08, "loss": 2.7478, "step": 61034 }, { "epoch": 2.99, "grad_norm": 0.7709497213363647, "learning_rate": 1.2800213650820957e-08, "loss": 2.8803, "step": 61035 }, { "epoch": 2.99, "grad_norm": 0.7721742987632751, "learning_rate": 1.2658385120434266e-08, "loss": 2.8181, "step": 61036 }, { "epoch": 2.99, "grad_norm": 0.7293633222579956, "learning_rate": 1.2517346698959207e-08, "loss": 2.9901, "step": 61037 }, { "epoch": 2.99, "grad_norm": 0.7672916650772095, "learning_rate": 1.2377098386828766e-08, "loss": 2.7232, "step": 61038 }, { "epoch": 2.99, "grad_norm": 0.7433645725250244, "learning_rate": 1.2237640184309394e-08, "loss": 2.8997, "step": 61039 }, { "epoch": 2.99, "grad_norm": 0.7337106466293335, "learning_rate": 1.209897209183408e-08, "loss": 2.7652, "step": 61040 }, { "epoch": 2.99, "grad_norm": 0.74492347240448, "learning_rate": 1.1961094109769199e-08, "loss": 3.0415, "step": 61041 }, { "epoch": 2.99, "grad_norm": 0.751065731048584, "learning_rate": 1.1824006238481121e-08, "loss": 2.6755, "step": 61042 }, { "epoch": 2.99, "grad_norm": 0.7474420666694641, "learning_rate": 1.1687708478302915e-08, "loss": 2.7026, "step": 61043 }, { "epoch": 2.99, "grad_norm": 0.7618042826652527, "learning_rate": 1.1552200829600955e-08, "loss": 2.8862, "step": 61044 }, { "epoch": 2.99, "grad_norm": 0.7556697130203247, "learning_rate": 1.1417483292708308e-08, "loss": 2.9045, "step": 61045 }, { "epoch": 2.99, "grad_norm": 0.7072124481201172, "learning_rate": 1.1283555868057958e-08, "loss": 2.8852, "step": 61046 }, { "epoch": 2.99, "grad_norm": 0.7252053618431091, "learning_rate": 1.1150418555916362e-08, "loss": 2.6886, "step": 61047 }, { "epoch": 2.99, "grad_norm": 0.7440884709358215, "learning_rate": 1.1018071356683201e-08, "loss": 2.9452, "step": 61048 }, { "epoch": 2.99, "grad_norm": 0.7469719052314758, "learning_rate": 1.0886514270691538e-08, "loss": 3.1659, "step": 61049 }, { "epoch": 2.99, "grad_norm": 0.771772563457489, "learning_rate": 1.0755747298307748e-08, "loss": 2.9415, "step": 61050 }, { "epoch": 2.99, "grad_norm": 0.7458013892173767, "learning_rate": 1.0625770439831594e-08, "loss": 2.8189, "step": 61051 }, { "epoch": 2.99, "grad_norm": 0.749146044254303, "learning_rate": 1.0496583695629447e-08, "loss": 2.8342, "step": 61052 }, { "epoch": 2.99, "grad_norm": 0.7614862322807312, "learning_rate": 1.0368187066067679e-08, "loss": 2.9507, "step": 61053 }, { "epoch": 2.99, "grad_norm": 0.7297611832618713, "learning_rate": 1.0240580551479361e-08, "loss": 2.8405, "step": 61054 }, { "epoch": 2.99, "grad_norm": 0.764214277267456, "learning_rate": 1.0113764152130944e-08, "loss": 2.7654, "step": 61055 }, { "epoch": 2.99, "grad_norm": 0.7660698890686035, "learning_rate": 9.987737868455415e-09, "loss": 2.8231, "step": 61056 }, { "epoch": 2.99, "grad_norm": 0.7836101055145264, "learning_rate": 9.862501700719227e-09, "loss": 2.6716, "step": 61057 }, { "epoch": 2.99, "grad_norm": 0.7371546626091003, "learning_rate": 9.73805564925545e-09, "loss": 2.7436, "step": 61058 }, { "epoch": 2.99, "grad_norm": 0.7891651391983032, "learning_rate": 9.614399714430455e-09, "loss": 2.951, "step": 61059 }, { "epoch": 2.99, "grad_norm": 0.7586222290992737, "learning_rate": 9.491533896544002e-09, "loss": 2.8016, "step": 61060 }, { "epoch": 2.99, "grad_norm": 0.7199023365974426, "learning_rate": 9.369458195929158e-09, "loss": 2.7525, "step": 61061 }, { "epoch": 2.99, "grad_norm": 0.7390625476837158, "learning_rate": 9.248172612885686e-09, "loss": 3.1196, "step": 61062 }, { "epoch": 2.99, "grad_norm": 0.7458911538124084, "learning_rate": 9.127677147746649e-09, "loss": 2.8515, "step": 61063 }, { "epoch": 2.99, "grad_norm": 0.7433958053588867, "learning_rate": 9.007971800845115e-09, "loss": 2.6842, "step": 61064 }, { "epoch": 2.99, "grad_norm": 0.7755656242370605, "learning_rate": 8.889056572480847e-09, "loss": 2.8729, "step": 61065 }, { "epoch": 2.99, "grad_norm": 0.730906069278717, "learning_rate": 8.7709314629536e-09, "loss": 2.811, "step": 61066 }, { "epoch": 2.99, "grad_norm": 0.7480701804161072, "learning_rate": 8.653596472596447e-09, "loss": 2.8485, "step": 61067 }, { "epoch": 2.99, "grad_norm": 0.7432823777198792, "learning_rate": 8.537051601709144e-09, "loss": 2.8094, "step": 61068 }, { "epoch": 2.99, "grad_norm": 0.796856701374054, "learning_rate": 8.421296850591452e-09, "loss": 2.7903, "step": 61069 }, { "epoch": 2.99, "grad_norm": 0.7544580698013306, "learning_rate": 8.306332219576439e-09, "loss": 2.7556, "step": 61070 }, { "epoch": 2.99, "grad_norm": 0.7667417526245117, "learning_rate": 8.192157708930558e-09, "loss": 2.8409, "step": 61071 }, { "epoch": 2.99, "grad_norm": 0.7658982872962952, "learning_rate": 8.078773318986875e-09, "loss": 2.8028, "step": 61072 }, { "epoch": 2.99, "grad_norm": 0.7310402989387512, "learning_rate": 7.966179050045152e-09, "loss": 2.904, "step": 61073 }, { "epoch": 2.99, "grad_norm": 0.7373026609420776, "learning_rate": 7.854374902338533e-09, "loss": 3.0008, "step": 61074 }, { "epoch": 2.99, "grad_norm": 0.732184886932373, "learning_rate": 7.743360876233395e-09, "loss": 2.732, "step": 61075 }, { "epoch": 2.99, "grad_norm": 0.7509750723838806, "learning_rate": 7.633136972029497e-09, "loss": 2.9609, "step": 61076 }, { "epoch": 2.99, "grad_norm": 0.8037346601486206, "learning_rate": 7.523703189926677e-09, "loss": 2.8011, "step": 61077 }, { "epoch": 2.99, "grad_norm": 0.7447194457054138, "learning_rate": 7.415059530324619e-09, "loss": 3.1165, "step": 61078 }, { "epoch": 2.99, "grad_norm": 0.7197108268737793, "learning_rate": 7.30720599342316e-09, "loss": 2.8209, "step": 61079 }, { "epoch": 2.99, "grad_norm": 0.7522173523902893, "learning_rate": 7.200142579555368e-09, "loss": 3.2279, "step": 61080 }, { "epoch": 2.99, "grad_norm": 0.7585819363594055, "learning_rate": 7.093869288987697e-09, "loss": 2.8398, "step": 61081 }, { "epoch": 2.99, "grad_norm": 0.741621732711792, "learning_rate": 6.9883861219866e-09, "loss": 2.9234, "step": 61082 }, { "epoch": 2.99, "grad_norm": 0.758293092250824, "learning_rate": 6.883693078851838e-09, "loss": 2.9133, "step": 61083 }, { "epoch": 2.99, "grad_norm": 0.7666619420051575, "learning_rate": 6.779790159849863e-09, "loss": 2.7775, "step": 61084 }, { "epoch": 2.99, "grad_norm": 0.7205373644828796, "learning_rate": 6.67667736524713e-09, "loss": 3.0212, "step": 61085 }, { "epoch": 2.99, "grad_norm": 0.7747965455055237, "learning_rate": 6.574354695343398e-09, "loss": 2.7993, "step": 61086 }, { "epoch": 2.99, "grad_norm": 0.7606662511825562, "learning_rate": 6.472822150371815e-09, "loss": 2.7932, "step": 61087 }, { "epoch": 2.99, "grad_norm": 0.7349268198013306, "learning_rate": 6.372079730598833e-09, "loss": 2.9177, "step": 61088 }, { "epoch": 2.99, "grad_norm": 0.7886388897895813, "learning_rate": 6.272127436324215e-09, "loss": 2.8516, "step": 61089 }, { "epoch": 2.99, "grad_norm": 0.733219563961029, "learning_rate": 6.172965267814411e-09, "loss": 2.7334, "step": 61090 }, { "epoch": 2.99, "grad_norm": 0.8257836699485779, "learning_rate": 6.074593225302571e-09, "loss": 2.939, "step": 61091 }, { "epoch": 2.99, "grad_norm": 0.7202447056770325, "learning_rate": 5.9770113090218396e-09, "loss": 2.8128, "step": 61092 }, { "epoch": 2.99, "grad_norm": 0.7605451941490173, "learning_rate": 5.880219519305285e-09, "loss": 2.9223, "step": 61093 }, { "epoch": 2.99, "grad_norm": 0.7240636348724365, "learning_rate": 5.784217856352746e-09, "loss": 3.0476, "step": 61094 }, { "epoch": 2.99, "grad_norm": 0.7533585429191589, "learning_rate": 5.6890063203973715e-09, "loss": 2.95, "step": 61095 }, { "epoch": 2.99, "grad_norm": 0.724474310874939, "learning_rate": 5.594584911772227e-09, "loss": 3.0003, "step": 61096 }, { "epoch": 2.99, "grad_norm": 0.7521463632583618, "learning_rate": 5.5009536306438455e-09, "loss": 2.5749, "step": 61097 }, { "epoch": 2.99, "grad_norm": 0.7722815275192261, "learning_rate": 5.408112477278681e-09, "loss": 2.865, "step": 61098 }, { "epoch": 2.99, "grad_norm": 0.776710033416748, "learning_rate": 5.316061451976494e-09, "loss": 2.9713, "step": 61099 }, { "epoch": 2.99, "grad_norm": 0.7619010806083679, "learning_rate": 5.2248005549038184e-09, "loss": 3.0012, "step": 61100 }, { "epoch": 2.99, "grad_norm": 0.7714282870292664, "learning_rate": 5.134329786327107e-09, "loss": 2.6743, "step": 61101 }, { "epoch": 2.99, "grad_norm": 0.707321286201477, "learning_rate": 5.044649146512814e-09, "loss": 2.9208, "step": 61102 }, { "epoch": 2.99, "grad_norm": 0.7693123817443848, "learning_rate": 4.955758635694085e-09, "loss": 2.8076, "step": 61103 }, { "epoch": 2.99, "grad_norm": 0.7465909123420715, "learning_rate": 4.867658254037454e-09, "loss": 2.8324, "step": 61104 }, { "epoch": 2.99, "grad_norm": 0.732196033000946, "learning_rate": 4.780348001842682e-09, "loss": 2.9903, "step": 61105 }, { "epoch": 2.99, "grad_norm": 0.7602418065071106, "learning_rate": 4.693827879342915e-09, "loss": 2.9082, "step": 61106 }, { "epoch": 2.99, "grad_norm": 0.7443466782569885, "learning_rate": 4.608097886737994e-09, "loss": 2.9654, "step": 61107 }, { "epoch": 2.99, "grad_norm": 0.7329009175300598, "learning_rate": 4.523158024227758e-09, "loss": 2.9078, "step": 61108 }, { "epoch": 2.99, "grad_norm": 0.8062663078308105, "learning_rate": 4.439008292111967e-09, "loss": 2.8382, "step": 61109 }, { "epoch": 2.99, "grad_norm": 0.7804901003837585, "learning_rate": 4.355648690557157e-09, "loss": 2.9814, "step": 61110 }, { "epoch": 2.99, "grad_norm": 0.7507225871086121, "learning_rate": 4.273079219763165e-09, "loss": 2.7954, "step": 61111 }, { "epoch": 2.99, "grad_norm": 0.7602078914642334, "learning_rate": 4.191299879996446e-09, "loss": 2.7327, "step": 61112 }, { "epoch": 3.0, "grad_norm": 0.7694182991981506, "learning_rate": 4.110310671490147e-09, "loss": 2.9445, "step": 61113 }, { "epoch": 3.0, "grad_norm": 0.7346636652946472, "learning_rate": 4.030111594377494e-09, "loss": 2.5756, "step": 61114 }, { "epoch": 3.0, "grad_norm": 0.7528190612792969, "learning_rate": 3.950702648924942e-09, "loss": 2.7316, "step": 61115 }, { "epoch": 3.0, "grad_norm": 0.7616682052612305, "learning_rate": 3.872083835332329e-09, "loss": 2.8147, "step": 61116 }, { "epoch": 3.0, "grad_norm": 0.7209632396697998, "learning_rate": 3.794255153799497e-09, "loss": 2.9357, "step": 61117 }, { "epoch": 3.0, "grad_norm": 0.7706297039985657, "learning_rate": 3.717216604559592e-09, "loss": 2.9647, "step": 61118 }, { "epoch": 3.0, "grad_norm": 0.7737950682640076, "learning_rate": 3.6409681877791475e-09, "loss": 2.6065, "step": 61119 }, { "epoch": 3.0, "grad_norm": 0.7609835863113403, "learning_rate": 3.5655099036580036e-09, "loss": 2.9106, "step": 61120 }, { "epoch": 3.0, "grad_norm": 0.774637758731842, "learning_rate": 3.4908417524293075e-09, "loss": 2.862, "step": 61121 }, { "epoch": 3.0, "grad_norm": 0.8165029883384705, "learning_rate": 3.4169637342595922e-09, "loss": 2.9376, "step": 61122 }, { "epoch": 3.0, "grad_norm": 0.817268967628479, "learning_rate": 3.3438758493486984e-09, "loss": 3.0128, "step": 61123 }, { "epoch": 3.0, "grad_norm": 0.7474488615989685, "learning_rate": 3.2715780979297723e-09, "loss": 2.8821, "step": 61124 }, { "epoch": 3.0, "grad_norm": 0.72201007604599, "learning_rate": 3.200070480136041e-09, "loss": 3.1782, "step": 61125 }, { "epoch": 3.0, "grad_norm": 0.762576699256897, "learning_rate": 3.1293529961673445e-09, "loss": 2.8246, "step": 61126 }, { "epoch": 3.0, "grad_norm": 0.8136159777641296, "learning_rate": 3.0594256462235234e-09, "loss": 2.9168, "step": 61127 }, { "epoch": 3.0, "grad_norm": 0.7495198249816895, "learning_rate": 2.9902884304711106e-09, "loss": 2.7297, "step": 61128 }, { "epoch": 3.0, "grad_norm": 0.7615594267845154, "learning_rate": 2.921941349143253e-09, "loss": 2.8838, "step": 61129 }, { "epoch": 3.0, "grad_norm": 0.7518624067306519, "learning_rate": 2.854384402373178e-09, "loss": 3.0472, "step": 61130 }, { "epoch": 3.0, "grad_norm": 0.7348370552062988, "learning_rate": 2.7876175903274177e-09, "loss": 2.8619, "step": 61131 }, { "epoch": 3.0, "grad_norm": 0.7087485790252686, "learning_rate": 2.7216409132058136e-09, "loss": 3.0459, "step": 61132 }, { "epoch": 3.0, "grad_norm": 0.8339840769767761, "learning_rate": 2.6564543712082053e-09, "loss": 2.7792, "step": 61133 }, { "epoch": 3.0, "grad_norm": 0.7480267882347107, "learning_rate": 2.5920579644678195e-09, "loss": 2.9, "step": 61134 }, { "epoch": 3.0, "grad_norm": 0.7389785647392273, "learning_rate": 2.52845169315119e-09, "loss": 2.9488, "step": 61135 }, { "epoch": 3.0, "grad_norm": 0.7411351799964905, "learning_rate": 2.4656355574581565e-09, "loss": 2.8339, "step": 61136 }, { "epoch": 3.0, "grad_norm": 0.8139830231666565, "learning_rate": 2.4036095575219462e-09, "loss": 2.8145, "step": 61137 }, { "epoch": 3.0, "grad_norm": 0.7364872097969055, "learning_rate": 2.342373693509092e-09, "loss": 2.7267, "step": 61138 }, { "epoch": 3.0, "grad_norm": 0.7832111120223999, "learning_rate": 2.281927965586128e-09, "loss": 2.8519, "step": 61139 }, { "epoch": 3.0, "grad_norm": 0.7762227058410645, "learning_rate": 2.2222723739528935e-09, "loss": 2.7967, "step": 61140 }, { "epoch": 3.0, "grad_norm": 0.7120994329452515, "learning_rate": 2.1634069187093096e-09, "loss": 2.878, "step": 61141 }, { "epoch": 3.0, "grad_norm": 0.7865113019943237, "learning_rate": 2.105331600021909e-09, "loss": 2.6527, "step": 61142 }, { "epoch": 3.0, "grad_norm": 0.7547078728675842, "learning_rate": 2.0480464180572252e-09, "loss": 3.0927, "step": 61143 }, { "epoch": 3.0, "grad_norm": 0.7572701573371887, "learning_rate": 1.991551372981792e-09, "loss": 2.9788, "step": 61144 }, { "epoch": 3.0, "grad_norm": 0.7272855639457703, "learning_rate": 1.9358464648955295e-09, "loss": 2.9688, "step": 61145 }, { "epoch": 3.0, "grad_norm": 0.8515251278877258, "learning_rate": 1.8809316939982775e-09, "loss": 2.8112, "step": 61146 }, { "epoch": 3.0, "grad_norm": 0.7926790714263916, "learning_rate": 1.8268070603899565e-09, "loss": 2.7903, "step": 61147 }, { "epoch": 3.0, "grad_norm": 0.795349657535553, "learning_rate": 1.7734725642370994e-09, "loss": 2.7626, "step": 61148 }, { "epoch": 3.0, "grad_norm": 0.7533791661262512, "learning_rate": 1.7209282057062401e-09, "loss": 2.8706, "step": 61149 }, { "epoch": 3.0, "grad_norm": 0.7342902421951294, "learning_rate": 1.6691739848972985e-09, "loss": 2.7107, "step": 61150 }, { "epoch": 3.0, "grad_norm": 0.7694773077964783, "learning_rate": 1.6182099019435013e-09, "loss": 2.9749, "step": 61151 }, { "epoch": 3.0, "grad_norm": 0.7363452315330505, "learning_rate": 1.5680359569780754e-09, "loss": 2.7131, "step": 61152 }, { "epoch": 3.0, "grad_norm": 0.6961015462875366, "learning_rate": 1.5186521501675541e-09, "loss": 2.7004, "step": 61153 }, { "epoch": 3.0, "grad_norm": 0.7018082141876221, "learning_rate": 1.4700584816118576e-09, "loss": 2.9433, "step": 61154 }, { "epoch": 3.0, "grad_norm": 0.6965725421905518, "learning_rate": 1.4222549514442127e-09, "loss": 3.0143, "step": 61155 }, { "epoch": 3.0, "grad_norm": 0.7362546324729919, "learning_rate": 1.3752415597978462e-09, "loss": 2.652, "step": 61156 }, { "epoch": 3.0, "grad_norm": 0.7468457818031311, "learning_rate": 1.3290183068059845e-09, "loss": 2.9477, "step": 61157 }, { "epoch": 3.0, "grad_norm": 0.764543890953064, "learning_rate": 1.283585192568548e-09, "loss": 2.9168, "step": 61158 }, { "epoch": 3.0, "grad_norm": 0.7274712920188904, "learning_rate": 1.2389422172187636e-09, "loss": 2.7423, "step": 61159 }, { "epoch": 3.0, "grad_norm": 0.8610571026802063, "learning_rate": 1.1950893808565509e-09, "loss": 2.8983, "step": 61160 }, { "epoch": 3.0, "grad_norm": 0.7543455362319946, "learning_rate": 1.1520266836151371e-09, "loss": 3.0349, "step": 61161 }, { "epoch": 3.0, "grad_norm": 0.7443333864212036, "learning_rate": 1.1097541256277487e-09, "loss": 2.9159, "step": 61162 }, { "epoch": 3.0, "grad_norm": 0.7243948578834534, "learning_rate": 1.0682717069609993e-09, "loss": 2.8332, "step": 61163 }, { "epoch": 3.0, "grad_norm": 0.7481058239936829, "learning_rate": 1.0275794277481154e-09, "loss": 3.0435, "step": 61164 }, { "epoch": 3.0, "grad_norm": 0.7074810862541199, "learning_rate": 9.876772880890171e-10, "loss": 2.8396, "step": 61165 }, { "epoch": 3.0, "grad_norm": 0.743200421333313, "learning_rate": 9.485652880836248e-10, "loss": 2.8234, "step": 61166 }, { "epoch": 3.0, "grad_norm": 0.7826438546180725, "learning_rate": 9.102434278318582e-10, "loss": 2.8359, "step": 61167 }, { "epoch": 3.0, "grad_norm": 0.7499393820762634, "learning_rate": 8.72711707500251e-10, "loss": 2.7631, "step": 61168 }, { "epoch": 3.0, "grad_norm": 0.7657136917114258, "learning_rate": 8.359701270888031e-10, "loss": 2.7475, "step": 61169 }, { "epoch": 3.0, "grad_norm": 0.72947758436203, "learning_rate": 8.000186867307413e-10, "loss": 2.9276, "step": 61170 }, { "epoch": 3.0, "grad_norm": 0.7392745018005371, "learning_rate": 7.648573865592921e-10, "loss": 2.9282, "step": 61171 }, { "epoch": 3.0, "grad_norm": 0.7172539830207825, "learning_rate": 7.304862266410693e-10, "loss": 2.8242, "step": 61172 }, { "epoch": 3.0, "grad_norm": 0.7763723731040955, "learning_rate": 6.969052070426862e-10, "loss": 3.1106, "step": 61173 }, { "epoch": 3.0, "grad_norm": 0.7424915432929993, "learning_rate": 6.641143278973693e-10, "loss": 3.0323, "step": 61174 }, { "epoch": 3.0, "grad_norm": 0.7958371043205261, "learning_rate": 6.321135892384254e-10, "loss": 2.972, "step": 61175 }, { "epoch": 3.0, "grad_norm": 0.7552845478057861, "learning_rate": 6.009029911990815e-10, "loss": 2.764, "step": 61176 }, { "epoch": 3.0, "grad_norm": 0.7244768142700195, "learning_rate": 5.70482533812644e-10, "loss": 3.1299, "step": 61177 }, { "epoch": 3.0, "grad_norm": 0.7536823153495789, "learning_rate": 5.408522172123397e-10, "loss": 2.8117, "step": 61178 }, { "epoch": 3.0, "grad_norm": 0.7438461780548096, "learning_rate": 5.120120414314754e-10, "loss": 2.8251, "step": 61179 }, { "epoch": 3.0, "grad_norm": 0.767284095287323, "learning_rate": 4.839620066032778e-10, "loss": 2.6252, "step": 61180 }, { "epoch": 3.0, "grad_norm": 0.755458652973175, "learning_rate": 4.5670211272774705e-10, "loss": 2.9643, "step": 61181 }, { "epoch": 3.0, "grad_norm": 0.7305696606636047, "learning_rate": 4.3023235993810966e-10, "loss": 3.0228, "step": 61182 }, { "epoch": 3.0, "grad_norm": 0.7119986414909363, "learning_rate": 4.0455274826767246e-10, "loss": 2.8719, "step": 61183 }, { "epoch": 3.0, "grad_norm": 0.7614222764968872, "learning_rate": 3.7966327778304883e-10, "loss": 2.9814, "step": 61184 }, { "epoch": 3.0, "grad_norm": 0.8122992515563965, "learning_rate": 3.555639485841588e-10, "loss": 2.9432, "step": 61185 }, { "epoch": 3.0, "grad_norm": 0.7403243780136108, "learning_rate": 3.322547607043091e-10, "loss": 2.8608, "step": 61186 }, { "epoch": 3.0, "grad_norm": 0.7525769472122192, "learning_rate": 3.0973571421011313e-10, "loss": 2.9349, "step": 61187 }, { "epoch": 3.0, "grad_norm": 0.7958530187606812, "learning_rate": 2.880068091681842e-10, "loss": 3.0496, "step": 61188 }, { "epoch": 3.0, "grad_norm": 0.7971166968345642, "learning_rate": 2.67068045611829e-10, "loss": 3.0409, "step": 61189 }, { "epoch": 3.0, "grad_norm": 0.7553383708000183, "learning_rate": 2.469194236409677e-10, "loss": 2.8486, "step": 61190 }, { "epoch": 3.0, "grad_norm": 0.7261549830436707, "learning_rate": 2.2756094325560025e-10, "loss": 2.9742, "step": 61191 }, { "epoch": 3.0, "grad_norm": 0.7534186840057373, "learning_rate": 2.089926045556467e-10, "loss": 2.5935, "step": 61192 }, { "epoch": 3.0, "grad_norm": 0.7727741003036499, "learning_rate": 1.9121440757441376e-10, "loss": 2.9467, "step": 61193 }, { "epoch": 3.0, "grad_norm": 0.7001724243164062, "learning_rate": 1.742263523452081e-10, "loss": 2.7758, "step": 61194 }, { "epoch": 3.0, "grad_norm": 0.7367728352546692, "learning_rate": 1.5802843893464312e-10, "loss": 2.7828, "step": 61195 }, { "epoch": 3.0, "grad_norm": 0.7262899875640869, "learning_rate": 1.4262066734271881e-10, "loss": 2.8854, "step": 61196 }, { "epoch": 3.0, "grad_norm": 0.7722108364105225, "learning_rate": 1.2800303766935526e-10, "loss": 2.9377, "step": 61197 }, { "epoch": 3.0, "grad_norm": 0.774972677230835, "learning_rate": 1.1417554991455247e-10, "loss": 2.8263, "step": 61198 }, { "epoch": 3.0, "grad_norm": 0.7445690035820007, "learning_rate": 1.011382041116171e-10, "loss": 2.9051, "step": 61199 }, { "epoch": 3.0, "grad_norm": 0.7341079115867615, "learning_rate": 8.889100032716257e-11, "loss": 2.925, "step": 61200 }, { "epoch": 3.0, "grad_norm": 0.7602269649505615, "learning_rate": 7.743393852788216e-11, "loss": 2.9037, "step": 61201 }, { "epoch": 3.0, "grad_norm": 0.7879922986030579, "learning_rate": 6.676701884700264e-11, "loss": 2.7574, "step": 61202 }, { "epoch": 3.0, "grad_norm": 0.7422546148300171, "learning_rate": 5.689024121791064e-11, "loss": 2.8568, "step": 61203 }, { "epoch": 3.0, "grad_norm": 0.7315881252288818, "learning_rate": 4.7803605707219525e-11, "loss": 2.8086, "step": 61204 }, { "epoch": 3.0, "grad_norm": 0.7382916212081909, "learning_rate": 3.9507112348236e-11, "loss": 2.7754, "step": 61205 }, { "epoch": 3.0, "grad_norm": 0.8105321526527405, "learning_rate": 3.2000761107653375e-11, "loss": 2.7572, "step": 61206 }, { "epoch": 3.0, "grad_norm": 0.7308658361434937, "learning_rate": 2.5284552085391706e-11, "loss": 2.9413, "step": 61207 }, { "epoch": 3.0, "grad_norm": 0.8163118362426758, "learning_rate": 1.9358485248144318e-11, "loss": 2.8004, "step": 61208 }, { "epoch": 3.0, "grad_norm": 0.7237694263458252, "learning_rate": 1.4222560629217894e-11, "loss": 2.8694, "step": 61209 }, { "epoch": 3.0, "grad_norm": 0.7595729231834412, "learning_rate": 9.876778261919127e-12, "loss": 2.8303, "step": 61210 }, { "epoch": 3.0, "grad_norm": 0.7996221780776978, "learning_rate": 6.3211380796346356e-12, "loss": 3.0761, "step": 61211 }, { "epoch": 3.0, "grad_norm": 0.793536901473999, "learning_rate": 3.5556401822844914e-12, "loss": 2.7739, "step": 61212 }, { "epoch": 3.0, "grad_norm": 0.7907678484916687, "learning_rate": 1.5802845365620042e-12, "loss": 2.7948, "step": 61213 }, { "epoch": 3.0, "grad_norm": 0.7259610891342163, "learning_rate": 3.9507114246717374e-13, "loss": 2.8744, "step": 61214 }, { "epoch": 3.0, "grad_norm": 0.8341996669769287, "learning_rate": 0.0, "loss": 3.0732, "step": 61215 }, { "epoch": 3.0, "step": 61215, "total_flos": 1.1279892023790797e+17, "train_loss": 3.1076881288265, "train_runtime": 9220.7966, "train_samples_per_second": 424.877, "train_steps_per_second": 6.639 } ], "logging_steps": 1.0, "max_steps": 61215, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 5000, "total_flos": 1.1279892023790797e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }