{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 500, "global_step": 110500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02262443438914027, "grad_norm": 1.5616087913513184, "learning_rate": 3.122171945701357e-07, "loss": 12.3328, "step": 25 }, { "epoch": 0.04524886877828054, "grad_norm": 1.4146922826766968, "learning_rate": 6.515837104072398e-07, "loss": 12.5401, "step": 50 }, { "epoch": 0.06787330316742081, "grad_norm": 2.0305981636047363, "learning_rate": 9.909502262443438e-07, "loss": 12.2748, "step": 75 }, { "epoch": 0.09049773755656108, "grad_norm": 1.0272996425628662, "learning_rate": 1.3303167420814479e-06, "loss": 12.5666, "step": 100 }, { "epoch": 0.11312217194570136, "grad_norm": 1.0590907335281372, "learning_rate": 1.6696832579185518e-06, "loss": 12.6943, "step": 125 }, { "epoch": 0.13574660633484162, "grad_norm": 1.196068525314331, "learning_rate": 2.009049773755656e-06, "loss": 12.6146, "step": 150 }, { "epoch": 0.1583710407239819, "grad_norm": 1.4678159952163696, "learning_rate": 2.34841628959276e-06, "loss": 11.977, "step": 175 }, { "epoch": 0.18099547511312217, "grad_norm": 1.9250414371490479, "learning_rate": 2.6877828054298643e-06, "loss": 12.002, "step": 200 }, { "epoch": 0.20361990950226244, "grad_norm": 2.0926384925842285, "learning_rate": 3.027149321266968e-06, "loss": 12.1648, "step": 225 }, { "epoch": 0.22624434389140272, "grad_norm": 1.752112627029419, "learning_rate": 3.366515837104072e-06, "loss": 11.4138, "step": 250 }, { "epoch": 0.248868778280543, "grad_norm": 1.5051826238632202, "learning_rate": 3.705882352941176e-06, "loss": 11.7424, "step": 275 }, { "epoch": 0.27149321266968324, "grad_norm": 1.4604785442352295, "learning_rate": 4.045248868778281e-06, "loss": 10.9347, "step": 300 }, { "epoch": 0.29411764705882354, "grad_norm": 1.1712795495986938, "learning_rate": 4.384615384615384e-06, "loss": 11.0688, "step": 325 }, { "epoch": 0.3167420814479638, "grad_norm": 1.562002420425415, "learning_rate": 4.723981900452488e-06, "loss": 10.3233, "step": 350 }, { "epoch": 0.3393665158371041, "grad_norm": 1.423509120941162, "learning_rate": 5.063348416289593e-06, "loss": 10.8483, "step": 375 }, { "epoch": 0.36199095022624433, "grad_norm": 1.488276481628418, "learning_rate": 5.402714932126696e-06, "loss": 10.4095, "step": 400 }, { "epoch": 0.38461538461538464, "grad_norm": 1.0716525316238403, "learning_rate": 5.7420814479638004e-06, "loss": 10.0423, "step": 425 }, { "epoch": 0.4072398190045249, "grad_norm": 1.3523911237716675, "learning_rate": 6.081447963800904e-06, "loss": 9.3279, "step": 450 }, { "epoch": 0.4298642533936652, "grad_norm": 1.3519021272659302, "learning_rate": 6.420814479638009e-06, "loss": 8.6838, "step": 475 }, { "epoch": 0.45248868778280543, "grad_norm": 0.8001397252082825, "learning_rate": 6.7601809954751125e-06, "loss": 8.5509, "step": 500 }, { "epoch": 0.4751131221719457, "grad_norm": 1.0891382694244385, "learning_rate": 7.099547511312217e-06, "loss": 7.935, "step": 525 }, { "epoch": 0.497737556561086, "grad_norm": 0.7729541659355164, "learning_rate": 7.43891402714932e-06, "loss": 7.5149, "step": 550 }, { "epoch": 0.5203619909502263, "grad_norm": 0.8944075703620911, "learning_rate": 7.778280542986424e-06, "loss": 7.1296, "step": 575 }, { "epoch": 0.5429864253393665, "grad_norm": 0.6566529273986816, "learning_rate": 8.117647058823528e-06, "loss": 7.0305, "step": 600 }, { "epoch": 0.5656108597285068, "grad_norm": 0.871402382850647, "learning_rate": 8.457013574660632e-06, "loss": 6.7858, "step": 625 }, { "epoch": 0.5882352941176471, "grad_norm": 1.4330376386642456, "learning_rate": 8.796380090497737e-06, "loss": 6.7152, "step": 650 }, { "epoch": 0.6108597285067874, "grad_norm": 0.7765570878982544, "learning_rate": 9.135746606334841e-06, "loss": 6.524, "step": 675 }, { "epoch": 0.6334841628959276, "grad_norm": 0.7222900390625, "learning_rate": 9.475113122171945e-06, "loss": 6.5056, "step": 700 }, { "epoch": 0.6561085972850679, "grad_norm": 0.8623555302619934, "learning_rate": 9.81447963800905e-06, "loss": 6.4704, "step": 725 }, { "epoch": 0.6787330316742082, "grad_norm": 0.7340829968452454, "learning_rate": 1.0153846153846152e-05, "loss": 6.4959, "step": 750 }, { "epoch": 0.7013574660633484, "grad_norm": 0.8369361162185669, "learning_rate": 1.0493212669683258e-05, "loss": 6.352, "step": 775 }, { "epoch": 0.7239819004524887, "grad_norm": 1.066939353942871, "learning_rate": 1.083257918552036e-05, "loss": 6.3632, "step": 800 }, { "epoch": 0.746606334841629, "grad_norm": 0.9356958866119385, "learning_rate": 1.1171945701357465e-05, "loss": 6.3694, "step": 825 }, { "epoch": 0.7692307692307693, "grad_norm": 0.9633516073226929, "learning_rate": 1.1511312217194568e-05, "loss": 6.3532, "step": 850 }, { "epoch": 0.7918552036199095, "grad_norm": 0.8974762558937073, "learning_rate": 1.1850678733031674e-05, "loss": 6.3075, "step": 875 }, { "epoch": 0.8144796380090498, "grad_norm": 0.9667727947235107, "learning_rate": 1.2190045248868778e-05, "loss": 6.3347, "step": 900 }, { "epoch": 0.8371040723981901, "grad_norm": 1.1597741842269897, "learning_rate": 1.252941176470588e-05, "loss": 6.301, "step": 925 }, { "epoch": 0.8597285067873304, "grad_norm": 1.0287904739379883, "learning_rate": 1.2868778280542987e-05, "loss": 6.4209, "step": 950 }, { "epoch": 0.8823529411764706, "grad_norm": 1.2371269464492798, "learning_rate": 1.320814479638009e-05, "loss": 6.2208, "step": 975 }, { "epoch": 0.9049773755656109, "grad_norm": 1.331064224243164, "learning_rate": 1.3547511312217193e-05, "loss": 6.3464, "step": 1000 }, { "epoch": 0.9276018099547512, "grad_norm": 1.0429011583328247, "learning_rate": 1.3886877828054298e-05, "loss": 6.1974, "step": 1025 }, { "epoch": 0.9502262443438914, "grad_norm": 1.0016249418258667, "learning_rate": 1.4226244343891402e-05, "loss": 6.1865, "step": 1050 }, { "epoch": 0.9728506787330317, "grad_norm": 1.299787163734436, "learning_rate": 1.4565610859728506e-05, "loss": 6.1026, "step": 1075 }, { "epoch": 0.995475113122172, "grad_norm": 1.2434639930725098, "learning_rate": 1.4904977375565609e-05, "loss": 6.1164, "step": 1100 }, { "epoch": 1.0180995475113122, "grad_norm": 1.4540140628814697, "learning_rate": 1.5244343891402713e-05, "loss": 6.2016, "step": 1125 }, { "epoch": 1.0407239819004526, "grad_norm": 1.1259605884552002, "learning_rate": 1.5583710407239816e-05, "loss": 6.1216, "step": 1150 }, { "epoch": 1.0633484162895928, "grad_norm": 1.0611627101898193, "learning_rate": 1.592307692307692e-05, "loss": 6.2301, "step": 1175 }, { "epoch": 1.085972850678733, "grad_norm": 0.903337836265564, "learning_rate": 1.6262443438914028e-05, "loss": 6.102, "step": 1200 }, { "epoch": 1.1085972850678734, "grad_norm": 1.3304970264434814, "learning_rate": 1.660180995475113e-05, "loss": 6.0894, "step": 1225 }, { "epoch": 1.1312217194570136, "grad_norm": 1.39982008934021, "learning_rate": 1.6941176470588233e-05, "loss": 6.1409, "step": 1250 }, { "epoch": 1.1538461538461537, "grad_norm": 2.8062944412231445, "learning_rate": 1.7280542986425337e-05, "loss": 6.1516, "step": 1275 }, { "epoch": 1.1764705882352942, "grad_norm": 1.2145191431045532, "learning_rate": 1.7619909502262442e-05, "loss": 6.1426, "step": 1300 }, { "epoch": 1.1990950226244343, "grad_norm": 1.5336560010910034, "learning_rate": 1.7959276018099546e-05, "loss": 6.1265, "step": 1325 }, { "epoch": 1.2217194570135748, "grad_norm": 1.1498029232025146, "learning_rate": 1.829864253393665e-05, "loss": 6.1197, "step": 1350 }, { "epoch": 1.244343891402715, "grad_norm": 1.1084051132202148, "learning_rate": 1.8638009049773755e-05, "loss": 6.085, "step": 1375 }, { "epoch": 1.2669683257918551, "grad_norm": 1.5752390623092651, "learning_rate": 1.897737556561086e-05, "loss": 6.0966, "step": 1400 }, { "epoch": 1.2895927601809956, "grad_norm": 1.2039798498153687, "learning_rate": 1.9316742081447963e-05, "loss": 6.023, "step": 1425 }, { "epoch": 1.3122171945701357, "grad_norm": 1.3117939233779907, "learning_rate": 1.9656108597285064e-05, "loss": 6.0805, "step": 1450 }, { "epoch": 1.334841628959276, "grad_norm": 1.2346285581588745, "learning_rate": 1.999547511312217e-05, "loss": 6.1057, "step": 1475 }, { "epoch": 1.3574660633484164, "grad_norm": 1.4661617279052734, "learning_rate": 2.0334841628959276e-05, "loss": 6.1266, "step": 1500 }, { "epoch": 1.3800904977375565, "grad_norm": 0.9981404542922974, "learning_rate": 2.0674208144796377e-05, "loss": 6.1344, "step": 1525 }, { "epoch": 1.4027149321266967, "grad_norm": 1.305759072303772, "learning_rate": 2.101357466063348e-05, "loss": 5.9825, "step": 1550 }, { "epoch": 1.4253393665158371, "grad_norm": 1.2048537731170654, "learning_rate": 2.135294117647059e-05, "loss": 5.8368, "step": 1575 }, { "epoch": 1.4479638009049773, "grad_norm": 1.233276128768921, "learning_rate": 2.169230769230769e-05, "loss": 6.0436, "step": 1600 }, { "epoch": 1.4705882352941178, "grad_norm": 1.9798985719680786, "learning_rate": 2.2031674208144794e-05, "loss": 5.9756, "step": 1625 }, { "epoch": 1.493212669683258, "grad_norm": 1.366964340209961, "learning_rate": 2.2371040723981895e-05, "loss": 6.0294, "step": 1650 }, { "epoch": 1.5158371040723981, "grad_norm": 1.187187910079956, "learning_rate": 2.2710407239819003e-05, "loss": 6.0066, "step": 1675 }, { "epoch": 1.5384615384615383, "grad_norm": 1.3633379936218262, "learning_rate": 2.3049773755656107e-05, "loss": 5.9852, "step": 1700 }, { "epoch": 1.5610859728506787, "grad_norm": 1.536897897720337, "learning_rate": 2.3389140271493208e-05, "loss": 6.0531, "step": 1725 }, { "epoch": 1.5837104072398192, "grad_norm": 1.3276063203811646, "learning_rate": 2.3728506787330316e-05, "loss": 5.9943, "step": 1750 }, { "epoch": 1.6063348416289593, "grad_norm": 1.8121161460876465, "learning_rate": 2.406787330316742e-05, "loss": 5.938, "step": 1775 }, { "epoch": 1.6289592760180995, "grad_norm": 1.8641277551651, "learning_rate": 2.440723981900452e-05, "loss": 5.8515, "step": 1800 }, { "epoch": 1.6515837104072397, "grad_norm": 1.6875813007354736, "learning_rate": 2.474660633484163e-05, "loss": 6.0208, "step": 1825 }, { "epoch": 1.6742081447963801, "grad_norm": 1.865849256515503, "learning_rate": 2.5085972850678733e-05, "loss": 5.9911, "step": 1850 }, { "epoch": 1.6968325791855203, "grad_norm": 2.502399444580078, "learning_rate": 2.5425339366515834e-05, "loss": 6.0454, "step": 1875 }, { "epoch": 1.7194570135746607, "grad_norm": 1.32041597366333, "learning_rate": 2.5764705882352938e-05, "loss": 6.0646, "step": 1900 }, { "epoch": 1.742081447963801, "grad_norm": 1.2567394971847534, "learning_rate": 2.6104072398190046e-05, "loss": 5.8021, "step": 1925 }, { "epoch": 1.7647058823529411, "grad_norm": 1.3535124063491821, "learning_rate": 2.6443438914027147e-05, "loss": 5.9069, "step": 1950 }, { "epoch": 1.7873303167420813, "grad_norm": 2.1293888092041016, "learning_rate": 2.678280542986425e-05, "loss": 5.931, "step": 1975 }, { "epoch": 1.8099547511312217, "grad_norm": 1.651511311531067, "learning_rate": 2.7122171945701355e-05, "loss": 6.0154, "step": 2000 }, { "epoch": 1.8325791855203621, "grad_norm": 1.5086476802825928, "learning_rate": 2.746153846153846e-05, "loss": 5.984, "step": 2025 }, { "epoch": 1.8552036199095023, "grad_norm": 1.448927402496338, "learning_rate": 2.7800904977375564e-05, "loss": 5.9867, "step": 2050 }, { "epoch": 1.8778280542986425, "grad_norm": 1.412226676940918, "learning_rate": 2.8140271493212665e-05, "loss": 5.9561, "step": 2075 }, { "epoch": 1.9004524886877827, "grad_norm": 1.206612467765808, "learning_rate": 2.8479638009049773e-05, "loss": 5.9315, "step": 2100 }, { "epoch": 1.9230769230769231, "grad_norm": 2.53358793258667, "learning_rate": 2.8819004524886877e-05, "loss": 5.8414, "step": 2125 }, { "epoch": 1.9457013574660633, "grad_norm": 1.4013510942459106, "learning_rate": 2.9158371040723978e-05, "loss": 5.9605, "step": 2150 }, { "epoch": 1.9683257918552037, "grad_norm": 1.202830195426941, "learning_rate": 2.9497737556561086e-05, "loss": 5.9057, "step": 2175 }, { "epoch": 1.990950226244344, "grad_norm": 1.3365705013275146, "learning_rate": 2.9837104072398186e-05, "loss": 5.904, "step": 2200 }, { "epoch": 2.013574660633484, "grad_norm": 1.5933775901794434, "learning_rate": 3.017647058823529e-05, "loss": 5.8016, "step": 2225 }, { "epoch": 2.0361990950226243, "grad_norm": 1.8763705492019653, "learning_rate": 3.0515837104072395e-05, "loss": 5.7132, "step": 2250 }, { "epoch": 2.0588235294117645, "grad_norm": 1.6877716779708862, "learning_rate": 3.08552036199095e-05, "loss": 5.9003, "step": 2275 }, { "epoch": 2.081447963800905, "grad_norm": 1.698456883430481, "learning_rate": 3.1194570135746604e-05, "loss": 5.8553, "step": 2300 }, { "epoch": 2.1040723981900453, "grad_norm": 1.278631567955017, "learning_rate": 3.1533936651583705e-05, "loss": 5.8442, "step": 2325 }, { "epoch": 2.1266968325791855, "grad_norm": 1.5354814529418945, "learning_rate": 3.187330316742081e-05, "loss": 5.8794, "step": 2350 }, { "epoch": 2.1493212669683257, "grad_norm": 1.8818252086639404, "learning_rate": 3.221266968325791e-05, "loss": 5.7308, "step": 2375 }, { "epoch": 2.171945701357466, "grad_norm": 1.439894437789917, "learning_rate": 3.255203619909502e-05, "loss": 5.841, "step": 2400 }, { "epoch": 2.1945701357466065, "grad_norm": 1.8124769926071167, "learning_rate": 3.289140271493212e-05, "loss": 5.8127, "step": 2425 }, { "epoch": 2.2171945701357467, "grad_norm": 1.4732517004013062, "learning_rate": 3.323076923076923e-05, "loss": 5.7981, "step": 2450 }, { "epoch": 2.239819004524887, "grad_norm": 1.1843006610870361, "learning_rate": 3.357013574660633e-05, "loss": 5.8788, "step": 2475 }, { "epoch": 2.262443438914027, "grad_norm": 1.776401162147522, "learning_rate": 3.390950226244343e-05, "loss": 5.7363, "step": 2500 }, { "epoch": 2.2850678733031673, "grad_norm": 1.7445324659347534, "learning_rate": 3.424886877828054e-05, "loss": 5.8269, "step": 2525 }, { "epoch": 2.3076923076923075, "grad_norm": 1.3931142091751099, "learning_rate": 3.458823529411765e-05, "loss": 5.7918, "step": 2550 }, { "epoch": 2.330316742081448, "grad_norm": 1.1032215356826782, "learning_rate": 3.492760180995475e-05, "loss": 5.7233, "step": 2575 }, { "epoch": 2.3529411764705883, "grad_norm": 1.4925826787948608, "learning_rate": 3.526696832579185e-05, "loss": 5.8363, "step": 2600 }, { "epoch": 2.3755656108597285, "grad_norm": 1.5988218784332275, "learning_rate": 3.5606334841628956e-05, "loss": 5.8818, "step": 2625 }, { "epoch": 2.3981900452488687, "grad_norm": 1.3079369068145752, "learning_rate": 3.594570135746606e-05, "loss": 5.8039, "step": 2650 }, { "epoch": 2.420814479638009, "grad_norm": 1.6587289571762085, "learning_rate": 3.6285067873303165e-05, "loss": 5.774, "step": 2675 }, { "epoch": 2.4434389140271495, "grad_norm": 1.2775558233261108, "learning_rate": 3.662443438914027e-05, "loss": 5.7375, "step": 2700 }, { "epoch": 2.4660633484162897, "grad_norm": 1.5299643278121948, "learning_rate": 3.6963800904977373e-05, "loss": 5.8269, "step": 2725 }, { "epoch": 2.48868778280543, "grad_norm": 1.7605458498001099, "learning_rate": 3.7303167420814474e-05, "loss": 5.8024, "step": 2750 }, { "epoch": 2.51131221719457, "grad_norm": 1.6457359790802002, "learning_rate": 3.7642533936651575e-05, "loss": 5.8177, "step": 2775 }, { "epoch": 2.5339366515837103, "grad_norm": 1.691697120666504, "learning_rate": 3.798190045248868e-05, "loss": 5.8871, "step": 2800 }, { "epoch": 2.5565610859728505, "grad_norm": 1.1488367319107056, "learning_rate": 3.832126696832579e-05, "loss": 5.8424, "step": 2825 }, { "epoch": 2.579185520361991, "grad_norm": 1.314544677734375, "learning_rate": 3.86606334841629e-05, "loss": 5.7421, "step": 2850 }, { "epoch": 2.6018099547511313, "grad_norm": 1.5784523487091064, "learning_rate": 3.9e-05, "loss": 5.9461, "step": 2875 }, { "epoch": 2.6244343891402715, "grad_norm": 1.5426156520843506, "learning_rate": 3.93393665158371e-05, "loss": 5.8428, "step": 2900 }, { "epoch": 2.6470588235294117, "grad_norm": 1.8941651582717896, "learning_rate": 3.96787330316742e-05, "loss": 5.8117, "step": 2925 }, { "epoch": 2.669683257918552, "grad_norm": 1.6045634746551514, "learning_rate": 4.001809954751131e-05, "loss": 5.7422, "step": 2950 }, { "epoch": 2.6923076923076925, "grad_norm": 1.4464021921157837, "learning_rate": 4.035746606334841e-05, "loss": 5.802, "step": 2975 }, { "epoch": 2.7149321266968327, "grad_norm": 1.4441245794296265, "learning_rate": 4.069683257918552e-05, "loss": 5.8503, "step": 3000 }, { "epoch": 2.737556561085973, "grad_norm": 1.614174485206604, "learning_rate": 4.1036199095022625e-05, "loss": 5.8229, "step": 3025 }, { "epoch": 2.760180995475113, "grad_norm": 2.3225739002227783, "learning_rate": 4.1375565610859726e-05, "loss": 5.7577, "step": 3050 }, { "epoch": 2.7828054298642533, "grad_norm": 1.2462753057479858, "learning_rate": 4.171493212669683e-05, "loss": 5.8104, "step": 3075 }, { "epoch": 2.8054298642533935, "grad_norm": 1.1745747327804565, "learning_rate": 4.2054298642533935e-05, "loss": 5.8207, "step": 3100 }, { "epoch": 2.8280542986425337, "grad_norm": 1.7370704412460327, "learning_rate": 4.2393665158371036e-05, "loss": 5.7556, "step": 3125 }, { "epoch": 2.8506787330316743, "grad_norm": 1.9213569164276123, "learning_rate": 4.2733031674208136e-05, "loss": 5.797, "step": 3150 }, { "epoch": 2.8733031674208145, "grad_norm": 1.8364306688308716, "learning_rate": 4.307239819004525e-05, "loss": 5.8159, "step": 3175 }, { "epoch": 2.8959276018099547, "grad_norm": 1.2981412410736084, "learning_rate": 4.341176470588235e-05, "loss": 5.816, "step": 3200 }, { "epoch": 2.918552036199095, "grad_norm": 1.7525430917739868, "learning_rate": 4.375113122171945e-05, "loss": 5.7447, "step": 3225 }, { "epoch": 2.9411764705882355, "grad_norm": 1.1247882843017578, "learning_rate": 4.409049773755656e-05, "loss": 5.7757, "step": 3250 }, { "epoch": 2.9638009049773757, "grad_norm": 1.7951980829238892, "learning_rate": 4.442986425339366e-05, "loss": 5.7815, "step": 3275 }, { "epoch": 2.986425339366516, "grad_norm": 2.1062705516815186, "learning_rate": 4.476923076923076e-05, "loss": 5.9039, "step": 3300 }, { "epoch": 3.009049773755656, "grad_norm": 1.276943325996399, "learning_rate": 4.510859728506786e-05, "loss": 5.798, "step": 3325 }, { "epoch": 3.0316742081447963, "grad_norm": 1.2819347381591797, "learning_rate": 4.544796380090498e-05, "loss": 5.587, "step": 3350 }, { "epoch": 3.0542986425339365, "grad_norm": 1.5981217622756958, "learning_rate": 4.578733031674208e-05, "loss": 5.6573, "step": 3375 }, { "epoch": 3.076923076923077, "grad_norm": 1.9141427278518677, "learning_rate": 4.612669683257918e-05, "loss": 5.6523, "step": 3400 }, { "epoch": 3.0995475113122173, "grad_norm": 2.002366304397583, "learning_rate": 4.646606334841629e-05, "loss": 5.7209, "step": 3425 }, { "epoch": 3.1221719457013575, "grad_norm": 1.3727688789367676, "learning_rate": 4.680542986425339e-05, "loss": 5.7155, "step": 3450 }, { "epoch": 3.1447963800904977, "grad_norm": 1.5259437561035156, "learning_rate": 4.714479638009049e-05, "loss": 5.6509, "step": 3475 }, { "epoch": 3.167420814479638, "grad_norm": 1.8274619579315186, "learning_rate": 4.74841628959276e-05, "loss": 5.8386, "step": 3500 }, { "epoch": 3.1900452488687785, "grad_norm": 1.372196912765503, "learning_rate": 4.7823529411764704e-05, "loss": 5.6764, "step": 3525 }, { "epoch": 3.2126696832579187, "grad_norm": 2.9106500148773193, "learning_rate": 4.8162895927601805e-05, "loss": 5.6857, "step": 3550 }, { "epoch": 3.235294117647059, "grad_norm": 1.648293137550354, "learning_rate": 4.850226244343891e-05, "loss": 5.6533, "step": 3575 }, { "epoch": 3.257918552036199, "grad_norm": 1.904435634613037, "learning_rate": 4.8841628959276014e-05, "loss": 5.7202, "step": 3600 }, { "epoch": 3.2805429864253393, "grad_norm": 1.3707633018493652, "learning_rate": 4.9180995475113115e-05, "loss": 5.6916, "step": 3625 }, { "epoch": 3.3031674208144794, "grad_norm": 1.5876151323318481, "learning_rate": 4.952036199095022e-05, "loss": 5.7601, "step": 3650 }, { "epoch": 3.32579185520362, "grad_norm": 1.605332851409912, "learning_rate": 4.9859728506787323e-05, "loss": 5.7429, "step": 3675 }, { "epoch": 3.3484162895927603, "grad_norm": 1.4415479898452759, "learning_rate": 5.019909502262443e-05, "loss": 5.655, "step": 3700 }, { "epoch": 3.3710407239819005, "grad_norm": 1.6629538536071777, "learning_rate": 5.053846153846154e-05, "loss": 5.763, "step": 3725 }, { "epoch": 3.3936651583710407, "grad_norm": 1.9927380084991455, "learning_rate": 5.087782805429864e-05, "loss": 5.7412, "step": 3750 }, { "epoch": 3.416289592760181, "grad_norm": 1.6488847732543945, "learning_rate": 5.121719457013574e-05, "loss": 5.6904, "step": 3775 }, { "epoch": 3.4389140271493215, "grad_norm": 1.2794617414474487, "learning_rate": 5.155656108597284e-05, "loss": 5.715, "step": 3800 }, { "epoch": 3.4615384615384617, "grad_norm": 1.5437264442443848, "learning_rate": 5.189592760180995e-05, "loss": 5.708, "step": 3825 }, { "epoch": 3.484162895927602, "grad_norm": 1.80771005153656, "learning_rate": 5.223529411764705e-05, "loss": 5.7182, "step": 3850 }, { "epoch": 3.506787330316742, "grad_norm": 1.4776058197021484, "learning_rate": 5.2574660633484165e-05, "loss": 5.7784, "step": 3875 }, { "epoch": 3.5294117647058822, "grad_norm": 2.749145030975342, "learning_rate": 5.2914027149321266e-05, "loss": 5.619, "step": 3900 }, { "epoch": 3.5520361990950224, "grad_norm": 1.321062684059143, "learning_rate": 5.3253393665158366e-05, "loss": 5.7243, "step": 3925 }, { "epoch": 3.5746606334841626, "grad_norm": 1.7517143487930298, "learning_rate": 5.359276018099547e-05, "loss": 5.6926, "step": 3950 }, { "epoch": 3.5972850678733033, "grad_norm": 1.2275956869125366, "learning_rate": 5.3932126696832575e-05, "loss": 5.7326, "step": 3975 }, { "epoch": 3.6199095022624435, "grad_norm": 1.4906036853790283, "learning_rate": 5.4271493212669676e-05, "loss": 5.7831, "step": 4000 }, { "epoch": 3.6425339366515836, "grad_norm": 1.6958218812942505, "learning_rate": 5.4610859728506784e-05, "loss": 5.7038, "step": 4025 }, { "epoch": 3.665158371040724, "grad_norm": 1.6957277059555054, "learning_rate": 5.495022624434389e-05, "loss": 5.6161, "step": 4050 }, { "epoch": 3.6877828054298645, "grad_norm": 1.7718782424926758, "learning_rate": 5.528959276018099e-05, "loss": 5.7296, "step": 4075 }, { "epoch": 3.7104072398190047, "grad_norm": 1.8896294832229614, "learning_rate": 5.562895927601809e-05, "loss": 5.7165, "step": 4100 }, { "epoch": 3.733031674208145, "grad_norm": 3.1193795204162598, "learning_rate": 5.59683257918552e-05, "loss": 5.716, "step": 4125 }, { "epoch": 3.755656108597285, "grad_norm": 1.3106483221054077, "learning_rate": 5.63076923076923e-05, "loss": 5.7097, "step": 4150 }, { "epoch": 3.7782805429864252, "grad_norm": 1.7375882863998413, "learning_rate": 5.66470588235294e-05, "loss": 5.7747, "step": 4175 }, { "epoch": 3.8009049773755654, "grad_norm": 1.6757769584655762, "learning_rate": 5.698642533936652e-05, "loss": 5.6659, "step": 4200 }, { "epoch": 3.8235294117647056, "grad_norm": 2.179318428039551, "learning_rate": 5.732579185520362e-05, "loss": 5.6603, "step": 4225 }, { "epoch": 3.8461538461538463, "grad_norm": 1.1887872219085693, "learning_rate": 5.766515837104072e-05, "loss": 5.6603, "step": 4250 }, { "epoch": 3.8687782805429864, "grad_norm": 1.7193459272384644, "learning_rate": 5.800452488687783e-05, "loss": 5.5825, "step": 4275 }, { "epoch": 3.8914027149321266, "grad_norm": 1.5481328964233398, "learning_rate": 5.834389140271493e-05, "loss": 5.7928, "step": 4300 }, { "epoch": 3.914027149321267, "grad_norm": 1.2775366306304932, "learning_rate": 5.868325791855203e-05, "loss": 5.803, "step": 4325 }, { "epoch": 3.9366515837104075, "grad_norm": 1.4987918138504028, "learning_rate": 5.902262443438913e-05, "loss": 5.5869, "step": 4350 }, { "epoch": 3.9592760180995477, "grad_norm": 1.6811946630477905, "learning_rate": 5.9361990950226244e-05, "loss": 5.7896, "step": 4375 }, { "epoch": 3.981900452488688, "grad_norm": 1.367522120475769, "learning_rate": 5.9701357466063345e-05, "loss": 5.6617, "step": 4400 }, { "epoch": 4.004524886877828, "grad_norm": 1.4017376899719238, "learning_rate": 6.0040723981900446e-05, "loss": 5.6081, "step": 4425 }, { "epoch": 4.027149321266968, "grad_norm": 1.3864396810531616, "learning_rate": 6.0380090497737553e-05, "loss": 5.6787, "step": 4450 }, { "epoch": 4.049773755656108, "grad_norm": 1.5472630262374878, "learning_rate": 6.0719457013574654e-05, "loss": 5.6549, "step": 4475 }, { "epoch": 4.072398190045249, "grad_norm": 1.563694715499878, "learning_rate": 6.105882352941176e-05, "loss": 5.6172, "step": 4500 }, { "epoch": 4.095022624434389, "grad_norm": 1.7143316268920898, "learning_rate": 6.139819004524886e-05, "loss": 5.5593, "step": 4525 }, { "epoch": 4.117647058823529, "grad_norm": 1.314466118812561, "learning_rate": 6.173755656108598e-05, "loss": 5.5821, "step": 4550 }, { "epoch": 4.14027149321267, "grad_norm": 1.114738941192627, "learning_rate": 6.207692307692308e-05, "loss": 5.6303, "step": 4575 }, { "epoch": 4.16289592760181, "grad_norm": 1.5527963638305664, "learning_rate": 6.241628959276018e-05, "loss": 5.7382, "step": 4600 }, { "epoch": 4.1855203619909505, "grad_norm": 0.9862022995948792, "learning_rate": 6.275565610859728e-05, "loss": 5.661, "step": 4625 }, { "epoch": 4.208144796380091, "grad_norm": 1.3943437337875366, "learning_rate": 6.309502262443438e-05, "loss": 5.6551, "step": 4650 }, { "epoch": 4.230769230769231, "grad_norm": 1.2521923780441284, "learning_rate": 6.343438914027148e-05, "loss": 5.5984, "step": 4675 }, { "epoch": 4.253393665158371, "grad_norm": 1.297293782234192, "learning_rate": 6.377375565610858e-05, "loss": 5.6354, "step": 4700 }, { "epoch": 4.276018099547511, "grad_norm": 1.7689846754074097, "learning_rate": 6.41131221719457e-05, "loss": 5.5656, "step": 4725 }, { "epoch": 4.298642533936651, "grad_norm": 1.3994182348251343, "learning_rate": 6.44524886877828e-05, "loss": 5.5455, "step": 4750 }, { "epoch": 4.321266968325792, "grad_norm": 1.4447396993637085, "learning_rate": 6.47918552036199e-05, "loss": 5.6526, "step": 4775 }, { "epoch": 4.343891402714932, "grad_norm": 1.3658605813980103, "learning_rate": 6.513122171945701e-05, "loss": 5.6019, "step": 4800 }, { "epoch": 4.366515837104072, "grad_norm": 1.4008032083511353, "learning_rate": 6.547058823529411e-05, "loss": 5.6222, "step": 4825 }, { "epoch": 4.389140271493213, "grad_norm": 1.3936430215835571, "learning_rate": 6.580995475113122e-05, "loss": 5.5291, "step": 4850 }, { "epoch": 4.411764705882353, "grad_norm": 1.140010118484497, "learning_rate": 6.614932126696832e-05, "loss": 5.5792, "step": 4875 }, { "epoch": 4.4343891402714934, "grad_norm": 1.2577894926071167, "learning_rate": 6.648868778280543e-05, "loss": 5.6013, "step": 4900 }, { "epoch": 4.457013574660634, "grad_norm": 1.3935832977294922, "learning_rate": 6.682805429864253e-05, "loss": 5.603, "step": 4925 }, { "epoch": 4.479638009049774, "grad_norm": 1.5030955076217651, "learning_rate": 6.716742081447963e-05, "loss": 5.7377, "step": 4950 }, { "epoch": 4.502262443438914, "grad_norm": 1.145139455795288, "learning_rate": 6.750678733031673e-05, "loss": 5.6013, "step": 4975 }, { "epoch": 4.524886877828054, "grad_norm": 1.4582146406173706, "learning_rate": 6.784615384615383e-05, "loss": 5.6743, "step": 5000 }, { "epoch": 4.547511312217194, "grad_norm": 1.0025731325149536, "learning_rate": 6.818552036199094e-05, "loss": 5.7206, "step": 5025 }, { "epoch": 4.570135746606335, "grad_norm": 1.39948308467865, "learning_rate": 6.852488687782805e-05, "loss": 5.6864, "step": 5050 }, { "epoch": 4.592760180995475, "grad_norm": 1.5719788074493408, "learning_rate": 6.886425339366515e-05, "loss": 5.5086, "step": 5075 }, { "epoch": 4.615384615384615, "grad_norm": 1.4834094047546387, "learning_rate": 6.920361990950227e-05, "loss": 5.5901, "step": 5100 }, { "epoch": 4.638009049773755, "grad_norm": 1.3456339836120605, "learning_rate": 6.954298642533937e-05, "loss": 5.6564, "step": 5125 }, { "epoch": 4.660633484162896, "grad_norm": 1.3672384023666382, "learning_rate": 6.988235294117647e-05, "loss": 5.4837, "step": 5150 }, { "epoch": 4.683257918552036, "grad_norm": 1.3631017208099365, "learning_rate": 7.022171945701357e-05, "loss": 5.5312, "step": 5175 }, { "epoch": 4.705882352941177, "grad_norm": 1.1694447994232178, "learning_rate": 7.056108597285067e-05, "loss": 5.5842, "step": 5200 }, { "epoch": 4.728506787330317, "grad_norm": 1.489476203918457, "learning_rate": 7.090045248868777e-05, "loss": 5.564, "step": 5225 }, { "epoch": 4.751131221719457, "grad_norm": 1.2117191553115845, "learning_rate": 7.123981900452488e-05, "loss": 5.6299, "step": 5250 }, { "epoch": 4.773755656108597, "grad_norm": 1.2240872383117676, "learning_rate": 7.157918552036199e-05, "loss": 5.5516, "step": 5275 }, { "epoch": 4.796380090497737, "grad_norm": 1.3566014766693115, "learning_rate": 7.191855203619909e-05, "loss": 5.5811, "step": 5300 }, { "epoch": 4.819004524886878, "grad_norm": 1.6840308904647827, "learning_rate": 7.225791855203619e-05, "loss": 5.675, "step": 5325 }, { "epoch": 4.841628959276018, "grad_norm": 1.5663491487503052, "learning_rate": 7.25972850678733e-05, "loss": 5.6887, "step": 5350 }, { "epoch": 4.864253393665159, "grad_norm": 1.5798016786575317, "learning_rate": 7.29366515837104e-05, "loss": 5.5949, "step": 5375 }, { "epoch": 4.886877828054299, "grad_norm": 2.12789249420166, "learning_rate": 7.32760180995475e-05, "loss": 5.5658, "step": 5400 }, { "epoch": 4.909502262443439, "grad_norm": 1.7635608911514282, "learning_rate": 7.361538461538462e-05, "loss": 5.7001, "step": 5425 }, { "epoch": 4.932126696832579, "grad_norm": 1.314758539199829, "learning_rate": 7.395475113122172e-05, "loss": 5.58, "step": 5450 }, { "epoch": 4.95475113122172, "grad_norm": 1.0996482372283936, "learning_rate": 7.429411764705882e-05, "loss": 5.6766, "step": 5475 }, { "epoch": 4.97737556561086, "grad_norm": 2.1102426052093506, "learning_rate": 7.463348416289592e-05, "loss": 5.4924, "step": 5500 }, { "epoch": 5.0, "grad_norm": 1.0663578510284424, "learning_rate": 7.497285067873302e-05, "loss": 5.6522, "step": 5525 }, { "epoch": 5.02262443438914, "grad_norm": 1.2302531003952026, "learning_rate": 7.531221719457014e-05, "loss": 5.4822, "step": 5550 }, { "epoch": 5.04524886877828, "grad_norm": 1.242390751838684, "learning_rate": 7.565158371040724e-05, "loss": 5.4806, "step": 5575 }, { "epoch": 5.067873303167421, "grad_norm": 1.377537488937378, "learning_rate": 7.599095022624434e-05, "loss": 5.4515, "step": 5600 }, { "epoch": 5.090497737556561, "grad_norm": 1.372882604598999, "learning_rate": 7.633031674208144e-05, "loss": 5.5466, "step": 5625 }, { "epoch": 5.113122171945701, "grad_norm": 1.1628869771957397, "learning_rate": 7.666968325791854e-05, "loss": 5.5631, "step": 5650 }, { "epoch": 5.135746606334842, "grad_norm": 1.1661573648452759, "learning_rate": 7.700904977375565e-05, "loss": 5.5736, "step": 5675 }, { "epoch": 5.158371040723982, "grad_norm": 1.680083990097046, "learning_rate": 7.734841628959276e-05, "loss": 5.5193, "step": 5700 }, { "epoch": 5.180995475113122, "grad_norm": 1.4866079092025757, "learning_rate": 7.768778280542986e-05, "loss": 5.7053, "step": 5725 }, { "epoch": 5.203619909502263, "grad_norm": 1.2793829441070557, "learning_rate": 7.802714932126696e-05, "loss": 5.4355, "step": 5750 }, { "epoch": 5.226244343891403, "grad_norm": 1.63210928440094, "learning_rate": 7.836651583710406e-05, "loss": 5.6327, "step": 5775 }, { "epoch": 5.248868778280543, "grad_norm": 1.3546538352966309, "learning_rate": 7.870588235294116e-05, "loss": 5.5684, "step": 5800 }, { "epoch": 5.271493212669683, "grad_norm": 1.1607636213302612, "learning_rate": 7.904524886877826e-05, "loss": 5.5127, "step": 5825 }, { "epoch": 5.294117647058823, "grad_norm": 1.2475472688674927, "learning_rate": 7.938461538461539e-05, "loss": 5.6689, "step": 5850 }, { "epoch": 5.316742081447964, "grad_norm": 1.0922496318817139, "learning_rate": 7.972398190045249e-05, "loss": 5.5478, "step": 5875 }, { "epoch": 5.339366515837104, "grad_norm": 1.4692274332046509, "learning_rate": 8.006334841628959e-05, "loss": 5.5565, "step": 5900 }, { "epoch": 5.361990950226244, "grad_norm": 1.2589067220687866, "learning_rate": 8.040271493212669e-05, "loss": 5.5281, "step": 5925 }, { "epoch": 5.384615384615385, "grad_norm": 2.016085147857666, "learning_rate": 8.074208144796379e-05, "loss": 5.5333, "step": 5950 }, { "epoch": 5.407239819004525, "grad_norm": 1.1720553636550903, "learning_rate": 8.10814479638009e-05, "loss": 5.5755, "step": 5975 }, { "epoch": 5.429864253393665, "grad_norm": 1.0042240619659424, "learning_rate": 8.1420814479638e-05, "loss": 5.586, "step": 6000 }, { "epoch": 5.452488687782806, "grad_norm": 1.2361646890640259, "learning_rate": 8.176018099547511e-05, "loss": 5.5136, "step": 6025 }, { "epoch": 5.475113122171946, "grad_norm": 0.9489585161209106, "learning_rate": 8.209954751131221e-05, "loss": 5.7089, "step": 6050 }, { "epoch": 5.497737556561086, "grad_norm": 1.1919461488723755, "learning_rate": 8.243891402714931e-05, "loss": 5.4838, "step": 6075 }, { "epoch": 5.520361990950226, "grad_norm": 1.2139184474945068, "learning_rate": 8.277828054298641e-05, "loss": 5.4898, "step": 6100 }, { "epoch": 5.542986425339366, "grad_norm": 1.0839821100234985, "learning_rate": 8.311764705882351e-05, "loss": 5.6115, "step": 6125 }, { "epoch": 5.5656108597285066, "grad_norm": 1.201003074645996, "learning_rate": 8.345701357466063e-05, "loss": 5.5291, "step": 6150 }, { "epoch": 5.588235294117647, "grad_norm": 1.3064321279525757, "learning_rate": 8.379638009049773e-05, "loss": 5.5308, "step": 6175 }, { "epoch": 5.610859728506787, "grad_norm": 1.372571587562561, "learning_rate": 8.413574660633484e-05, "loss": 5.5055, "step": 6200 }, { "epoch": 5.633484162895927, "grad_norm": 1.0197138786315918, "learning_rate": 8.447511312217194e-05, "loss": 5.524, "step": 6225 }, { "epoch": 5.656108597285068, "grad_norm": 1.2021855115890503, "learning_rate": 8.481447963800904e-05, "loss": 5.4335, "step": 6250 }, { "epoch": 5.678733031674208, "grad_norm": 1.289168357849121, "learning_rate": 8.515384615384614e-05, "loss": 5.4808, "step": 6275 }, { "epoch": 5.701357466063349, "grad_norm": 1.5402923822402954, "learning_rate": 8.549321266968326e-05, "loss": 5.5559, "step": 6300 }, { "epoch": 5.723981900452489, "grad_norm": 0.9780600070953369, "learning_rate": 8.583257918552036e-05, "loss": 5.5102, "step": 6325 }, { "epoch": 5.746606334841629, "grad_norm": 1.465335726737976, "learning_rate": 8.617194570135746e-05, "loss": 5.4427, "step": 6350 }, { "epoch": 5.769230769230769, "grad_norm": 1.1970276832580566, "learning_rate": 8.651131221719456e-05, "loss": 5.549, "step": 6375 }, { "epoch": 5.791855203619909, "grad_norm": 1.118125319480896, "learning_rate": 8.685067873303166e-05, "loss": 5.4598, "step": 6400 }, { "epoch": 5.8144796380090495, "grad_norm": 1.0834537744522095, "learning_rate": 8.719004524886876e-05, "loss": 5.3887, "step": 6425 }, { "epoch": 5.83710407239819, "grad_norm": 1.057357907295227, "learning_rate": 8.752941176470586e-05, "loss": 5.6295, "step": 6450 }, { "epoch": 5.859728506787331, "grad_norm": 1.5805262327194214, "learning_rate": 8.786877828054298e-05, "loss": 5.5536, "step": 6475 }, { "epoch": 5.882352941176471, "grad_norm": 1.3391293287277222, "learning_rate": 8.820814479638008e-05, "loss": 5.5675, "step": 6500 }, { "epoch": 5.904977375565611, "grad_norm": 1.3164405822753906, "learning_rate": 8.854751131221718e-05, "loss": 5.4408, "step": 6525 }, { "epoch": 5.927601809954751, "grad_norm": 1.160893201828003, "learning_rate": 8.88868778280543e-05, "loss": 5.5364, "step": 6550 }, { "epoch": 5.950226244343892, "grad_norm": 0.9940909147262573, "learning_rate": 8.92262443438914e-05, "loss": 5.5756, "step": 6575 }, { "epoch": 5.972850678733032, "grad_norm": 1.1090673208236694, "learning_rate": 8.956561085972851e-05, "loss": 5.4551, "step": 6600 }, { "epoch": 5.995475113122172, "grad_norm": 1.2676074504852295, "learning_rate": 8.990497737556561e-05, "loss": 5.6009, "step": 6625 }, { "epoch": 6.018099547511312, "grad_norm": 1.510372519493103, "learning_rate": 9.024434389140271e-05, "loss": 5.4012, "step": 6650 }, { "epoch": 6.040723981900452, "grad_norm": 1.1791483163833618, "learning_rate": 9.058371040723981e-05, "loss": 5.3326, "step": 6675 }, { "epoch": 6.0633484162895925, "grad_norm": 1.407641887664795, "learning_rate": 9.092307692307691e-05, "loss": 5.374, "step": 6700 }, { "epoch": 6.085972850678733, "grad_norm": 1.3388482332229614, "learning_rate": 9.126244343891401e-05, "loss": 5.4042, "step": 6725 }, { "epoch": 6.108597285067873, "grad_norm": 1.554003119468689, "learning_rate": 9.160180995475112e-05, "loss": 5.4236, "step": 6750 }, { "epoch": 6.131221719457014, "grad_norm": 1.3483645915985107, "learning_rate": 9.194117647058823e-05, "loss": 5.4674, "step": 6775 }, { "epoch": 6.153846153846154, "grad_norm": 1.0115711688995361, "learning_rate": 9.228054298642533e-05, "loss": 5.4868, "step": 6800 }, { "epoch": 6.176470588235294, "grad_norm": 1.439205527305603, "learning_rate": 9.261990950226243e-05, "loss": 5.4786, "step": 6825 }, { "epoch": 6.199095022624435, "grad_norm": 0.9785445928573608, "learning_rate": 9.295927601809953e-05, "loss": 5.5347, "step": 6850 }, { "epoch": 6.221719457013575, "grad_norm": 1.1020057201385498, "learning_rate": 9.329864253393665e-05, "loss": 5.4051, "step": 6875 }, { "epoch": 6.244343891402715, "grad_norm": 1.0274757146835327, "learning_rate": 9.363800904977375e-05, "loss": 5.5246, "step": 6900 }, { "epoch": 6.266968325791855, "grad_norm": 1.0783212184906006, "learning_rate": 9.397737556561086e-05, "loss": 5.5393, "step": 6925 }, { "epoch": 6.289592760180995, "grad_norm": 1.0508118867874146, "learning_rate": 9.431674208144796e-05, "loss": 5.4552, "step": 6950 }, { "epoch": 6.3122171945701355, "grad_norm": 1.1625144481658936, "learning_rate": 9.465610859728506e-05, "loss": 5.4529, "step": 6975 }, { "epoch": 6.334841628959276, "grad_norm": 1.0248526334762573, "learning_rate": 9.499547511312217e-05, "loss": 5.3419, "step": 7000 }, { "epoch": 6.357466063348416, "grad_norm": 1.1253679990768433, "learning_rate": 9.533484162895927e-05, "loss": 5.418, "step": 7025 }, { "epoch": 6.380090497737557, "grad_norm": 1.1107271909713745, "learning_rate": 9.567420814479637e-05, "loss": 5.449, "step": 7050 }, { "epoch": 6.402714932126697, "grad_norm": 1.0638843774795532, "learning_rate": 9.601357466063347e-05, "loss": 5.6126, "step": 7075 }, { "epoch": 6.425339366515837, "grad_norm": 1.6658477783203125, "learning_rate": 9.635294117647058e-05, "loss": 5.5386, "step": 7100 }, { "epoch": 6.447963800904978, "grad_norm": 1.1139315366744995, "learning_rate": 9.669230769230768e-05, "loss": 5.4514, "step": 7125 }, { "epoch": 6.470588235294118, "grad_norm": 1.1143317222595215, "learning_rate": 9.703167420814478e-05, "loss": 5.5782, "step": 7150 }, { "epoch": 6.493212669683258, "grad_norm": 1.4471315145492554, "learning_rate": 9.737104072398189e-05, "loss": 5.4109, "step": 7175 }, { "epoch": 6.515837104072398, "grad_norm": 1.167921781539917, "learning_rate": 9.771040723981899e-05, "loss": 5.49, "step": 7200 }, { "epoch": 6.538461538461538, "grad_norm": 0.9504801630973816, "learning_rate": 9.804977375565611e-05, "loss": 5.3976, "step": 7225 }, { "epoch": 6.5610859728506785, "grad_norm": 1.4360880851745605, "learning_rate": 9.838914027149322e-05, "loss": 5.414, "step": 7250 }, { "epoch": 6.583710407239819, "grad_norm": 1.3413575887680054, "learning_rate": 9.871493212669681e-05, "loss": 5.4689, "step": 7275 }, { "epoch": 6.606334841628959, "grad_norm": 1.0288740396499634, "learning_rate": 9.905429864253394e-05, "loss": 5.455, "step": 7300 }, { "epoch": 6.628959276018099, "grad_norm": 1.0424234867095947, "learning_rate": 9.939366515837104e-05, "loss": 5.3584, "step": 7325 }, { "epoch": 6.65158371040724, "grad_norm": 1.119850516319275, "learning_rate": 9.973303167420814e-05, "loss": 5.3877, "step": 7350 }, { "epoch": 6.67420814479638, "grad_norm": 1.2746446132659912, "learning_rate": 0.00010007239819004524, "loss": 5.4438, "step": 7375 }, { "epoch": 6.6968325791855206, "grad_norm": 1.3023581504821777, "learning_rate": 0.00010041176470588234, "loss": 5.3398, "step": 7400 }, { "epoch": 6.719457013574661, "grad_norm": 1.214418888092041, "learning_rate": 0.00010075113122171946, "loss": 5.4295, "step": 7425 }, { "epoch": 6.742081447963801, "grad_norm": 1.1940439939498901, "learning_rate": 0.00010109049773755656, "loss": 5.4227, "step": 7450 }, { "epoch": 6.764705882352941, "grad_norm": 1.1146560907363892, "learning_rate": 0.00010142986425339366, "loss": 5.3667, "step": 7475 }, { "epoch": 6.787330316742081, "grad_norm": 1.2646377086639404, "learning_rate": 0.00010176923076923076, "loss": 5.4075, "step": 7500 }, { "epoch": 6.8099547511312215, "grad_norm": 1.029643177986145, "learning_rate": 0.00010210859728506786, "loss": 5.5222, "step": 7525 }, { "epoch": 6.832579185520362, "grad_norm": 1.259440541267395, "learning_rate": 0.00010244796380090496, "loss": 5.5015, "step": 7550 }, { "epoch": 6.855203619909502, "grad_norm": 0.9059805870056152, "learning_rate": 0.00010278733031674206, "loss": 5.5371, "step": 7575 }, { "epoch": 6.877828054298643, "grad_norm": 1.9123485088348389, "learning_rate": 0.00010312669683257918, "loss": 5.6179, "step": 7600 }, { "epoch": 6.900452488687783, "grad_norm": 1.1449521780014038, "learning_rate": 0.00010346606334841628, "loss": 5.5484, "step": 7625 }, { "epoch": 6.923076923076923, "grad_norm": 1.0224618911743164, "learning_rate": 0.00010380542986425339, "loss": 5.2939, "step": 7650 }, { "epoch": 6.9457013574660635, "grad_norm": 1.2241264581680298, "learning_rate": 0.00010414479638009049, "loss": 5.5328, "step": 7675 }, { "epoch": 6.968325791855204, "grad_norm": 1.059005856513977, "learning_rate": 0.00010448416289592759, "loss": 5.4795, "step": 7700 }, { "epoch": 6.990950226244344, "grad_norm": 0.9612129926681519, "learning_rate": 0.0001048235294117647, "loss": 5.553, "step": 7725 }, { "epoch": 7.013574660633484, "grad_norm": 1.1683646440505981, "learning_rate": 0.00010516289592760181, "loss": 5.4763, "step": 7750 }, { "epoch": 7.036199095022624, "grad_norm": 1.212646722793579, "learning_rate": 0.00010550226244343891, "loss": 5.3376, "step": 7775 }, { "epoch": 7.0588235294117645, "grad_norm": 1.05197274684906, "learning_rate": 0.00010584162895927601, "loss": 5.2155, "step": 7800 }, { "epoch": 7.081447963800905, "grad_norm": 1.0929110050201416, "learning_rate": 0.00010618099547511311, "loss": 5.3498, "step": 7825 }, { "epoch": 7.104072398190045, "grad_norm": 1.2394142150878906, "learning_rate": 0.00010652036199095021, "loss": 5.3593, "step": 7850 }, { "epoch": 7.126696832579185, "grad_norm": 1.4099429845809937, "learning_rate": 0.00010685972850678731, "loss": 5.2032, "step": 7875 }, { "epoch": 7.149321266968326, "grad_norm": 1.3127585649490356, "learning_rate": 0.00010719909502262441, "loss": 5.4849, "step": 7900 }, { "epoch": 7.171945701357466, "grad_norm": 1.2615708112716675, "learning_rate": 0.00010753846153846153, "loss": 5.4461, "step": 7925 }, { "epoch": 7.1945701357466065, "grad_norm": 1.43902587890625, "learning_rate": 0.00010787782805429863, "loss": 5.4668, "step": 7950 }, { "epoch": 7.217194570135747, "grad_norm": 1.8109885454177856, "learning_rate": 0.00010821719457013573, "loss": 5.2972, "step": 7975 }, { "epoch": 7.239819004524887, "grad_norm": 1.0213814973831177, "learning_rate": 0.00010855656108597284, "loss": 5.359, "step": 8000 }, { "epoch": 7.262443438914027, "grad_norm": 1.1312299966812134, "learning_rate": 0.00010889592760180995, "loss": 5.4318, "step": 8025 }, { "epoch": 7.285067873303167, "grad_norm": 1.2350519895553589, "learning_rate": 0.00010923529411764706, "loss": 5.3081, "step": 8050 }, { "epoch": 7.3076923076923075, "grad_norm": 1.3539677858352661, "learning_rate": 0.00010957466063348416, "loss": 5.3457, "step": 8075 }, { "epoch": 7.330316742081448, "grad_norm": 1.0154722929000854, "learning_rate": 0.00010991402714932126, "loss": 5.4666, "step": 8100 }, { "epoch": 7.352941176470588, "grad_norm": 1.1144194602966309, "learning_rate": 0.00011025339366515836, "loss": 5.442, "step": 8125 }, { "epoch": 7.375565610859729, "grad_norm": 0.8434383869171143, "learning_rate": 0.00011059276018099546, "loss": 5.3537, "step": 8150 }, { "epoch": 7.398190045248869, "grad_norm": 1.0677796602249146, "learning_rate": 0.00011093212669683256, "loss": 5.5026, "step": 8175 }, { "epoch": 7.420814479638009, "grad_norm": 1.1033122539520264, "learning_rate": 0.00011127149321266967, "loss": 5.4341, "step": 8200 }, { "epoch": 7.4434389140271495, "grad_norm": 0.9210260510444641, "learning_rate": 0.00011161085972850678, "loss": 5.4144, "step": 8225 }, { "epoch": 7.46606334841629, "grad_norm": 1.0614783763885498, "learning_rate": 0.00011195022624434388, "loss": 5.4164, "step": 8250 }, { "epoch": 7.48868778280543, "grad_norm": 1.2655754089355469, "learning_rate": 0.00011228959276018098, "loss": 5.2604, "step": 8275 }, { "epoch": 7.51131221719457, "grad_norm": 0.9641904234886169, "learning_rate": 0.00011262895927601808, "loss": 5.3504, "step": 8300 }, { "epoch": 7.53393665158371, "grad_norm": 1.2946969270706177, "learning_rate": 0.00011296832579185518, "loss": 5.4227, "step": 8325 }, { "epoch": 7.5565610859728505, "grad_norm": 0.9463045597076416, "learning_rate": 0.0001133076923076923, "loss": 5.4054, "step": 8350 }, { "epoch": 7.579185520361991, "grad_norm": 1.0814507007598877, "learning_rate": 0.00011364705882352941, "loss": 5.3966, "step": 8375 }, { "epoch": 7.601809954751131, "grad_norm": 1.1838008165359497, "learning_rate": 0.00011398642533936651, "loss": 5.4821, "step": 8400 }, { "epoch": 7.624434389140271, "grad_norm": 1.0479843616485596, "learning_rate": 0.00011432579185520361, "loss": 5.4574, "step": 8425 }, { "epoch": 7.647058823529412, "grad_norm": 0.976851761341095, "learning_rate": 0.00011466515837104072, "loss": 5.4213, "step": 8450 }, { "epoch": 7.669683257918552, "grad_norm": 1.4671597480773926, "learning_rate": 0.00011500452488687782, "loss": 5.417, "step": 8475 }, { "epoch": 7.6923076923076925, "grad_norm": 1.2483266592025757, "learning_rate": 0.00011534389140271492, "loss": 5.294, "step": 8500 }, { "epoch": 7.714932126696833, "grad_norm": 1.1279124021530151, "learning_rate": 0.00011568325791855202, "loss": 5.4524, "step": 8525 }, { "epoch": 7.737556561085973, "grad_norm": 1.4917343854904175, "learning_rate": 0.00011602262443438913, "loss": 5.3789, "step": 8550 }, { "epoch": 7.760180995475113, "grad_norm": 1.0781971216201782, "learning_rate": 0.00011636199095022623, "loss": 5.3371, "step": 8575 }, { "epoch": 7.782805429864253, "grad_norm": 1.143670916557312, "learning_rate": 0.00011670135746606333, "loss": 5.3115, "step": 8600 }, { "epoch": 7.8054298642533935, "grad_norm": 1.2002426385879517, "learning_rate": 0.00011704072398190044, "loss": 5.447, "step": 8625 }, { "epoch": 7.828054298642534, "grad_norm": 1.0116580724716187, "learning_rate": 0.00011738009049773754, "loss": 5.3397, "step": 8650 }, { "epoch": 7.850678733031674, "grad_norm": 1.029729962348938, "learning_rate": 0.00011771945701357466, "loss": 5.3606, "step": 8675 }, { "epoch": 7.873303167420815, "grad_norm": 0.9455130696296692, "learning_rate": 0.00011805882352941177, "loss": 5.3195, "step": 8700 }, { "epoch": 7.895927601809955, "grad_norm": 0.9694691300392151, "learning_rate": 0.00011839819004524887, "loss": 5.3867, "step": 8725 }, { "epoch": 7.918552036199095, "grad_norm": 1.2311065196990967, "learning_rate": 0.00011873755656108597, "loss": 5.3923, "step": 8750 }, { "epoch": 7.9411764705882355, "grad_norm": 0.8772637248039246, "learning_rate": 0.00011907692307692307, "loss": 5.3279, "step": 8775 }, { "epoch": 7.963800904977376, "grad_norm": 1.106030821800232, "learning_rate": 0.00011941628959276017, "loss": 5.3267, "step": 8800 }, { "epoch": 7.986425339366516, "grad_norm": 1.1599453687667847, "learning_rate": 0.00011975565610859727, "loss": 5.529, "step": 8825 }, { "epoch": 8.009049773755656, "grad_norm": 1.8157141208648682, "learning_rate": 0.00012009502262443438, "loss": 5.3869, "step": 8850 }, { "epoch": 8.031674208144796, "grad_norm": 1.1919975280761719, "learning_rate": 0.00012043438914027149, "loss": 5.2746, "step": 8875 }, { "epoch": 8.054298642533936, "grad_norm": 1.0922486782073975, "learning_rate": 0.00012077375565610859, "loss": 5.2856, "step": 8900 }, { "epoch": 8.076923076923077, "grad_norm": 1.2444593906402588, "learning_rate": 0.00012111312217194569, "loss": 5.403, "step": 8925 }, { "epoch": 8.099547511312217, "grad_norm": 0.8721426725387573, "learning_rate": 0.00012145248868778279, "loss": 5.2567, "step": 8950 }, { "epoch": 8.122171945701357, "grad_norm": 1.3781883716583252, "learning_rate": 0.00012179185520361989, "loss": 5.2798, "step": 8975 }, { "epoch": 8.144796380090497, "grad_norm": 1.1509733200073242, "learning_rate": 0.000122131221719457, "loss": 5.2044, "step": 9000 }, { "epoch": 8.167420814479637, "grad_norm": 1.4785964488983154, "learning_rate": 0.00012247058823529412, "loss": 5.3677, "step": 9025 }, { "epoch": 8.190045248868778, "grad_norm": 1.2807246446609497, "learning_rate": 0.00012280995475113122, "loss": 5.3202, "step": 9050 }, { "epoch": 8.212669683257918, "grad_norm": 1.1646101474761963, "learning_rate": 0.00012314932126696832, "loss": 5.271, "step": 9075 }, { "epoch": 8.235294117647058, "grad_norm": 1.301400899887085, "learning_rate": 0.00012348868778280542, "loss": 5.2795, "step": 9100 }, { "epoch": 8.2579185520362, "grad_norm": 1.4196361303329468, "learning_rate": 0.00012382805429864252, "loss": 5.4176, "step": 9125 }, { "epoch": 8.28054298642534, "grad_norm": 1.5214911699295044, "learning_rate": 0.00012416742081447962, "loss": 5.2882, "step": 9150 }, { "epoch": 8.30316742081448, "grad_norm": 1.1181511878967285, "learning_rate": 0.00012450678733031672, "loss": 5.2897, "step": 9175 }, { "epoch": 8.32579185520362, "grad_norm": 1.581581473350525, "learning_rate": 0.00012484615384615382, "loss": 5.3059, "step": 9200 }, { "epoch": 8.34841628959276, "grad_norm": 0.9101288914680481, "learning_rate": 0.00012518552036199093, "loss": 5.2616, "step": 9225 }, { "epoch": 8.371040723981901, "grad_norm": 0.9665130972862244, "learning_rate": 0.00012552488687782805, "loss": 5.2998, "step": 9250 }, { "epoch": 8.393665158371041, "grad_norm": 1.1748067140579224, "learning_rate": 0.00012586425339366515, "loss": 5.3675, "step": 9275 }, { "epoch": 8.416289592760181, "grad_norm": 1.115011215209961, "learning_rate": 0.00012619004524886876, "loss": 5.4141, "step": 9300 }, { "epoch": 8.438914027149321, "grad_norm": 1.0583595037460327, "learning_rate": 0.00012652941176470586, "loss": 5.3022, "step": 9325 }, { "epoch": 8.461538461538462, "grad_norm": 1.1308449506759644, "learning_rate": 0.00012686877828054296, "loss": 5.3779, "step": 9350 }, { "epoch": 8.484162895927602, "grad_norm": 1.1921645402908325, "learning_rate": 0.00012720814479638006, "loss": 5.2918, "step": 9375 }, { "epoch": 8.506787330316742, "grad_norm": 0.9783223271369934, "learning_rate": 0.00012754751131221717, "loss": 5.3096, "step": 9400 }, { "epoch": 8.529411764705882, "grad_norm": 1.0405751466751099, "learning_rate": 0.00012788687782805427, "loss": 5.2166, "step": 9425 }, { "epoch": 8.552036199095022, "grad_norm": 0.9867390990257263, "learning_rate": 0.0001282262443438914, "loss": 5.2788, "step": 9450 }, { "epoch": 8.574660633484163, "grad_norm": 1.3076112270355225, "learning_rate": 0.0001285656108597285, "loss": 5.2147, "step": 9475 }, { "epoch": 8.597285067873303, "grad_norm": 1.2440968751907349, "learning_rate": 0.0001289049773755656, "loss": 5.2012, "step": 9500 }, { "epoch": 8.619909502262443, "grad_norm": 1.0401350259780884, "learning_rate": 0.0001292443438914027, "loss": 5.3668, "step": 9525 }, { "epoch": 8.642533936651583, "grad_norm": 1.071785569190979, "learning_rate": 0.0001295837104072398, "loss": 5.3017, "step": 9550 }, { "epoch": 8.665158371040723, "grad_norm": 0.9909277558326721, "learning_rate": 0.00012992307692307693, "loss": 5.2481, "step": 9575 }, { "epoch": 8.687782805429864, "grad_norm": 0.9336756467819214, "learning_rate": 0.00013026244343891403, "loss": 5.2832, "step": 9600 }, { "epoch": 8.710407239819004, "grad_norm": 0.944492518901825, "learning_rate": 0.00013060180995475113, "loss": 5.342, "step": 9625 }, { "epoch": 8.733031674208144, "grad_norm": 1.096969485282898, "learning_rate": 0.00013094117647058823, "loss": 5.3501, "step": 9650 }, { "epoch": 8.755656108597286, "grad_norm": 1.1634862422943115, "learning_rate": 0.00013128054298642533, "loss": 5.321, "step": 9675 }, { "epoch": 8.778280542986426, "grad_norm": 0.9251194000244141, "learning_rate": 0.00013161990950226243, "loss": 5.4857, "step": 9700 }, { "epoch": 8.800904977375566, "grad_norm": 1.0412198305130005, "learning_rate": 0.00013195927601809953, "loss": 5.3761, "step": 9725 }, { "epoch": 8.823529411764707, "grad_norm": 1.352041244506836, "learning_rate": 0.00013229864253393663, "loss": 5.377, "step": 9750 }, { "epoch": 8.846153846153847, "grad_norm": 1.015771746635437, "learning_rate": 0.00013263800904977373, "loss": 5.3823, "step": 9775 }, { "epoch": 8.868778280542987, "grad_norm": 0.9252334237098694, "learning_rate": 0.00013297737556561086, "loss": 5.2645, "step": 9800 }, { "epoch": 8.891402714932127, "grad_norm": 1.1377252340316772, "learning_rate": 0.00013331674208144796, "loss": 5.2292, "step": 9825 }, { "epoch": 8.914027149321267, "grad_norm": 1.8188557624816895, "learning_rate": 0.00013365610859728506, "loss": 5.4221, "step": 9850 }, { "epoch": 8.936651583710407, "grad_norm": 1.1808732748031616, "learning_rate": 0.00013398190045248867, "loss": 5.3275, "step": 9875 }, { "epoch": 8.959276018099548, "grad_norm": 0.935534656047821, "learning_rate": 0.00013432126696832577, "loss": 5.2564, "step": 9900 }, { "epoch": 8.981900452488688, "grad_norm": 0.9960452318191528, "learning_rate": 0.0001346606334841629, "loss": 5.3774, "step": 9925 }, { "epoch": 9.004524886877828, "grad_norm": 3.8774402141571045, "learning_rate": 0.000135, "loss": 5.1604, "step": 9950 }, { "epoch": 9.027149321266968, "grad_norm": 1.0658574104309082, "learning_rate": 0.0001353393665158371, "loss": 5.1074, "step": 9975 }, { "epoch": 9.049773755656108, "grad_norm": 1.2405463457107544, "learning_rate": 0.0001356787330316742, "loss": 5.0691, "step": 10000 }, { "epoch": 9.072398190045249, "grad_norm": 1.2019230127334595, "learning_rate": 0.0001360180995475113, "loss": 5.1978, "step": 10025 }, { "epoch": 9.095022624434389, "grad_norm": 1.9909340143203735, "learning_rate": 0.0001363574660633484, "loss": 5.2004, "step": 10050 }, { "epoch": 9.117647058823529, "grad_norm": 1.0435068607330322, "learning_rate": 0.0001366968325791855, "loss": 5.209, "step": 10075 }, { "epoch": 9.14027149321267, "grad_norm": 1.2937798500061035, "learning_rate": 0.0001370361990950226, "loss": 5.2087, "step": 10100 }, { "epoch": 9.16289592760181, "grad_norm": 1.2895238399505615, "learning_rate": 0.0001373755656108597, "loss": 5.2103, "step": 10125 }, { "epoch": 9.18552036199095, "grad_norm": 0.9975462555885315, "learning_rate": 0.0001377149321266968, "loss": 5.1116, "step": 10150 }, { "epoch": 9.20814479638009, "grad_norm": 1.296386957168579, "learning_rate": 0.0001380542986425339, "loss": 5.1656, "step": 10175 }, { "epoch": 9.23076923076923, "grad_norm": 1.4493881464004517, "learning_rate": 0.00013839366515837104, "loss": 5.1834, "step": 10200 }, { "epoch": 9.25339366515837, "grad_norm": 1.0695624351501465, "learning_rate": 0.00013873303167420814, "loss": 5.1005, "step": 10225 }, { "epoch": 9.276018099547512, "grad_norm": 1.0306825637817383, "learning_rate": 0.00013907239819004524, "loss": 5.1712, "step": 10250 }, { "epoch": 9.298642533936652, "grad_norm": 1.3647042512893677, "learning_rate": 0.00013941176470588234, "loss": 5.15, "step": 10275 }, { "epoch": 9.321266968325792, "grad_norm": 2.479020357131958, "learning_rate": 0.00013975113122171944, "loss": 5.1985, "step": 10300 }, { "epoch": 9.343891402714933, "grad_norm": 0.9140968918800354, "learning_rate": 0.00014009049773755654, "loss": 5.2683, "step": 10325 }, { "epoch": 9.366515837104073, "grad_norm": 1.062315583229065, "learning_rate": 0.00014042986425339364, "loss": 5.2319, "step": 10350 }, { "epoch": 9.389140271493213, "grad_norm": 1.0792220830917358, "learning_rate": 0.00014076923076923074, "loss": 5.2176, "step": 10375 }, { "epoch": 9.411764705882353, "grad_norm": 0.9862438440322876, "learning_rate": 0.00014110859728506787, "loss": 5.2732, "step": 10400 }, { "epoch": 9.434389140271493, "grad_norm": 0.9580933451652527, "learning_rate": 0.00014144796380090497, "loss": 5.3087, "step": 10425 }, { "epoch": 9.457013574660634, "grad_norm": 1.1492204666137695, "learning_rate": 0.00014178733031674207, "loss": 5.0943, "step": 10450 }, { "epoch": 9.479638009049774, "grad_norm": 1.028110384941101, "learning_rate": 0.00014212669683257918, "loss": 5.2554, "step": 10475 }, { "epoch": 9.502262443438914, "grad_norm": 1.0601478815078735, "learning_rate": 0.00014246606334841628, "loss": 5.1979, "step": 10500 }, { "epoch": 9.524886877828054, "grad_norm": 1.2302405834197998, "learning_rate": 0.00014280542986425338, "loss": 5.3466, "step": 10525 }, { "epoch": 9.547511312217194, "grad_norm": 1.12709379196167, "learning_rate": 0.0001431447963800905, "loss": 5.2889, "step": 10550 }, { "epoch": 9.570135746606335, "grad_norm": 1.0190308094024658, "learning_rate": 0.0001434841628959276, "loss": 5.3005, "step": 10575 }, { "epoch": 9.592760180995475, "grad_norm": 1.5944112539291382, "learning_rate": 0.0001438235294117647, "loss": 5.2775, "step": 10600 }, { "epoch": 9.615384615384615, "grad_norm": 1.1170854568481445, "learning_rate": 0.0001441628959276018, "loss": 5.1568, "step": 10625 }, { "epoch": 9.638009049773755, "grad_norm": 0.9360098838806152, "learning_rate": 0.0001445022624434389, "loss": 5.2575, "step": 10650 }, { "epoch": 9.660633484162895, "grad_norm": 1.9224114418029785, "learning_rate": 0.000144841628959276, "loss": 5.1985, "step": 10675 }, { "epoch": 9.683257918552036, "grad_norm": 1.250430703163147, "learning_rate": 0.0001451809954751131, "loss": 5.2976, "step": 10700 }, { "epoch": 9.705882352941176, "grad_norm": 1.004128098487854, "learning_rate": 0.0001455203619909502, "loss": 5.3517, "step": 10725 }, { "epoch": 9.728506787330316, "grad_norm": 1.2826331853866577, "learning_rate": 0.0001458597285067873, "loss": 5.2889, "step": 10750 }, { "epoch": 9.751131221719458, "grad_norm": 1.0989943742752075, "learning_rate": 0.0001461990950226244, "loss": 5.2646, "step": 10775 }, { "epoch": 9.773755656108598, "grad_norm": 1.5269279479980469, "learning_rate": 0.00014653846153846151, "loss": 5.2721, "step": 10800 }, { "epoch": 9.796380090497738, "grad_norm": 1.0799510478973389, "learning_rate": 0.00014687782805429862, "loss": 5.2698, "step": 10825 }, { "epoch": 9.819004524886878, "grad_norm": 0.9514308571815491, "learning_rate": 0.00014721719457013572, "loss": 5.3147, "step": 10850 }, { "epoch": 9.841628959276019, "grad_norm": 0.9488893747329712, "learning_rate": 0.00014755656108597282, "loss": 5.36, "step": 10875 }, { "epoch": 9.864253393665159, "grad_norm": 1.1077697277069092, "learning_rate": 0.00014789592760180994, "loss": 5.1958, "step": 10900 }, { "epoch": 9.886877828054299, "grad_norm": 0.8970409035682678, "learning_rate": 0.00014823529411764705, "loss": 5.3716, "step": 10925 }, { "epoch": 9.90950226244344, "grad_norm": 1.3561121225357056, "learning_rate": 0.00014857466063348415, "loss": 5.2173, "step": 10950 }, { "epoch": 9.93212669683258, "grad_norm": 1.3258692026138306, "learning_rate": 0.00014891402714932125, "loss": 5.1904, "step": 10975 }, { "epoch": 9.95475113122172, "grad_norm": 0.8535260558128357, "learning_rate": 0.00014925339366515835, "loss": 5.3146, "step": 11000 }, { "epoch": 9.97737556561086, "grad_norm": 1.0721464157104492, "learning_rate": 0.00014959276018099548, "loss": 5.3224, "step": 11025 }, { "epoch": 10.0, "grad_norm": 1.2367472648620605, "learning_rate": 0.00014993212669683258, "loss": 5.2298, "step": 11050 }, { "epoch": 10.02262443438914, "grad_norm": 1.176303505897522, "learning_rate": 0.000149999985031392, "loss": 5.1865, "step": 11075 }, { "epoch": 10.04524886877828, "grad_norm": 1.6310497522354126, "learning_rate": 0.00014999992422143224, "loss": 5.0658, "step": 11100 }, { "epoch": 10.06787330316742, "grad_norm": 0.8968245983123779, "learning_rate": 0.00014999981663462063, "loss": 5.2153, "step": 11125 }, { "epoch": 10.09049773755656, "grad_norm": 0.8865295052528381, "learning_rate": 0.00014999966227102431, "loss": 4.9703, "step": 11150 }, { "epoch": 10.113122171945701, "grad_norm": 1.1915286779403687, "learning_rate": 0.00014999946113073947, "loss": 5.0086, "step": 11175 }, { "epoch": 10.135746606334841, "grad_norm": 1.1938319206237793, "learning_rate": 0.00014999921321389164, "loss": 5.128, "step": 11200 }, { "epoch": 10.158371040723981, "grad_norm": 1.2753313779830933, "learning_rate": 0.00014999891852063535, "loss": 5.0999, "step": 11225 }, { "epoch": 10.180995475113122, "grad_norm": 0.9530912637710571, "learning_rate": 0.0001499985770511545, "loss": 5.2185, "step": 11250 }, { "epoch": 10.203619909502262, "grad_norm": 1.3997453451156616, "learning_rate": 0.000149998188805662, "loss": 5.0885, "step": 11275 }, { "epoch": 10.226244343891402, "grad_norm": 1.0850180387496948, "learning_rate": 0.00014999775378440005, "loss": 5.154, "step": 11300 }, { "epoch": 10.248868778280542, "grad_norm": 1.292991042137146, "learning_rate": 0.00014999727198763987, "loss": 5.1104, "step": 11325 }, { "epoch": 10.271493212669684, "grad_norm": 1.158610463142395, "learning_rate": 0.00014999674341568207, "loss": 5.1012, "step": 11350 }, { "epoch": 10.294117647058824, "grad_norm": 1.1036847829818726, "learning_rate": 0.00014999616806885623, "loss": 5.2547, "step": 11375 }, { "epoch": 10.316742081447964, "grad_norm": 2.1555657386779785, "learning_rate": 0.00014999554594752123, "loss": 5.1573, "step": 11400 }, { "epoch": 10.339366515837105, "grad_norm": 1.2989445924758911, "learning_rate": 0.00014999487705206506, "loss": 5.0822, "step": 11425 }, { "epoch": 10.361990950226245, "grad_norm": 0.9577361345291138, "learning_rate": 0.00014999416138290492, "loss": 5.1344, "step": 11450 }, { "epoch": 10.384615384615385, "grad_norm": 1.484882116317749, "learning_rate": 0.00014999339894048718, "loss": 5.0269, "step": 11475 }, { "epoch": 10.407239819004525, "grad_norm": 0.9797399044036865, "learning_rate": 0.00014999258972528734, "loss": 5.0701, "step": 11500 }, { "epoch": 10.429864253393665, "grad_norm": 1.0783559083938599, "learning_rate": 0.00014999173373781013, "loss": 5.2029, "step": 11525 }, { "epoch": 10.452488687782806, "grad_norm": 1.226446270942688, "learning_rate": 0.0001499908309785894, "loss": 5.0235, "step": 11550 }, { "epoch": 10.475113122171946, "grad_norm": 1.1730021238327026, "learning_rate": 0.0001499898814481882, "loss": 5.13, "step": 11575 }, { "epoch": 10.497737556561086, "grad_norm": 1.233260989189148, "learning_rate": 0.00014998888514719874, "loss": 5.1153, "step": 11600 }, { "epoch": 10.520361990950226, "grad_norm": 1.1690484285354614, "learning_rate": 0.0001499878420762424, "loss": 5.1357, "step": 11625 }, { "epoch": 10.542986425339366, "grad_norm": 1.0624953508377075, "learning_rate": 0.0001499867522359698, "loss": 5.1712, "step": 11650 }, { "epoch": 10.565610859728507, "grad_norm": 1.3550862073898315, "learning_rate": 0.00014998561562706055, "loss": 5.2174, "step": 11675 }, { "epoch": 10.588235294117647, "grad_norm": 1.1426056623458862, "learning_rate": 0.0001499844322502236, "loss": 5.1503, "step": 11700 }, { "epoch": 10.610859728506787, "grad_norm": 1.130356788635254, "learning_rate": 0.00014998320210619706, "loss": 5.2003, "step": 11725 }, { "epoch": 10.633484162895927, "grad_norm": 2.088479518890381, "learning_rate": 0.00014998192519574807, "loss": 5.0855, "step": 11750 }, { "epoch": 10.656108597285067, "grad_norm": 1.282894253730774, "learning_rate": 0.0001499806015196731, "loss": 5.2212, "step": 11775 }, { "epoch": 10.678733031674208, "grad_norm": 1.458074927330017, "learning_rate": 0.0001499792310787977, "loss": 5.1635, "step": 11800 }, { "epoch": 10.701357466063348, "grad_norm": 1.0478781461715698, "learning_rate": 0.00014997781387397657, "loss": 5.1052, "step": 11825 }, { "epoch": 10.723981900452488, "grad_norm": 1.191235065460205, "learning_rate": 0.00014997634990609367, "loss": 5.2958, "step": 11850 }, { "epoch": 10.74660633484163, "grad_norm": 1.4063588380813599, "learning_rate": 0.000149974839176062, "loss": 5.17, "step": 11875 }, { "epoch": 10.76923076923077, "grad_norm": 1.1008962392807007, "learning_rate": 0.00014997328168482384, "loss": 5.082, "step": 11900 }, { "epoch": 10.79185520361991, "grad_norm": 1.262567162513733, "learning_rate": 0.00014997167743335054, "loss": 5.2031, "step": 11925 }, { "epoch": 10.81447963800905, "grad_norm": 0.9310780763626099, "learning_rate": 0.00014997002642264272, "loss": 5.1467, "step": 11950 }, { "epoch": 10.83710407239819, "grad_norm": 1.0549923181533813, "learning_rate": 0.00014996832865373004, "loss": 5.0579, "step": 11975 }, { "epoch": 10.85972850678733, "grad_norm": 1.1291685104370117, "learning_rate": 0.0001499665841276714, "loss": 5.0962, "step": 12000 }, { "epoch": 10.882352941176471, "grad_norm": 1.5607845783233643, "learning_rate": 0.00014996479284555488, "loss": 5.2022, "step": 12025 }, { "epoch": 10.904977375565611, "grad_norm": 1.444187045097351, "learning_rate": 0.00014996295480849766, "loss": 5.1123, "step": 12050 }, { "epoch": 10.927601809954751, "grad_norm": 0.8266554474830627, "learning_rate": 0.0001499610700176461, "loss": 5.2064, "step": 12075 }, { "epoch": 10.950226244343892, "grad_norm": 0.994787335395813, "learning_rate": 0.00014995913847417575, "loss": 5.2265, "step": 12100 }, { "epoch": 10.972850678733032, "grad_norm": 1.2321454286575317, "learning_rate": 0.0001499571601792913, "loss": 5.1676, "step": 12125 }, { "epoch": 10.995475113122172, "grad_norm": 1.3413864374160767, "learning_rate": 0.0001499551351342266, "loss": 5.1838, "step": 12150 }, { "epoch": 11.018099547511312, "grad_norm": 1.178889274597168, "learning_rate": 0.00014995306334024462, "loss": 5.1451, "step": 12175 }, { "epoch": 11.040723981900452, "grad_norm": 1.1773490905761719, "learning_rate": 0.00014995094479863756, "loss": 5.036, "step": 12200 }, { "epoch": 11.063348416289593, "grad_norm": 1.0849095582962036, "learning_rate": 0.0001499487795107267, "loss": 4.9792, "step": 12225 }, { "epoch": 11.085972850678733, "grad_norm": 1.5654512643814087, "learning_rate": 0.00014994656747786256, "loss": 5.0234, "step": 12250 }, { "epoch": 11.108597285067873, "grad_norm": 1.1146122217178345, "learning_rate": 0.00014994430870142472, "loss": 4.9398, "step": 12275 }, { "epoch": 11.131221719457013, "grad_norm": 1.0082900524139404, "learning_rate": 0.00014994200318282198, "loss": 4.9555, "step": 12300 }, { "epoch": 11.153846153846153, "grad_norm": 1.175589919090271, "learning_rate": 0.0001499396509234923, "loss": 5.0247, "step": 12325 }, { "epoch": 11.176470588235293, "grad_norm": 1.3833733797073364, "learning_rate": 0.0001499372519249027, "loss": 4.9253, "step": 12350 }, { "epoch": 11.199095022624434, "grad_norm": 1.128486156463623, "learning_rate": 0.00014993480618854952, "loss": 5.0122, "step": 12375 }, { "epoch": 11.221719457013574, "grad_norm": 1.1842920780181885, "learning_rate": 0.00014993231371595802, "loss": 5.0244, "step": 12400 }, { "epoch": 11.244343891402714, "grad_norm": 1.0491068363189697, "learning_rate": 0.00014992977450868284, "loss": 5.046, "step": 12425 }, { "epoch": 11.266968325791856, "grad_norm": 1.563358187675476, "learning_rate": 0.00014992718856830762, "loss": 4.9557, "step": 12450 }, { "epoch": 11.289592760180996, "grad_norm": 1.1112754344940186, "learning_rate": 0.00014992455589644515, "loss": 5.0276, "step": 12475 }, { "epoch": 11.312217194570136, "grad_norm": 1.1768701076507568, "learning_rate": 0.00014992187649473748, "loss": 4.9013, "step": 12500 }, { "epoch": 11.334841628959277, "grad_norm": 1.175905704498291, "learning_rate": 0.0001499191503648557, "loss": 5.0562, "step": 12525 }, { "epoch": 11.357466063348417, "grad_norm": 1.1734528541564941, "learning_rate": 0.0001499163775085001, "loss": 4.9798, "step": 12550 }, { "epoch": 11.380090497737557, "grad_norm": 1.2192497253417969, "learning_rate": 0.00014991355792740003, "loss": 5.0336, "step": 12575 }, { "epoch": 11.402714932126697, "grad_norm": 0.9738785028457642, "learning_rate": 0.00014991069162331405, "loss": 4.947, "step": 12600 }, { "epoch": 11.425339366515837, "grad_norm": 1.001910924911499, "learning_rate": 0.00014990777859802992, "loss": 5.0363, "step": 12625 }, { "epoch": 11.447963800904978, "grad_norm": 1.4487667083740234, "learning_rate": 0.0001499048188533644, "loss": 5.0579, "step": 12650 }, { "epoch": 11.470588235294118, "grad_norm": 0.9874216318130493, "learning_rate": 0.00014990181239116348, "loss": 5.1646, "step": 12675 }, { "epoch": 11.493212669683258, "grad_norm": 1.091408371925354, "learning_rate": 0.00014989875921330229, "loss": 5.1041, "step": 12700 }, { "epoch": 11.515837104072398, "grad_norm": 0.8557041883468628, "learning_rate": 0.00014989565932168504, "loss": 5.033, "step": 12725 }, { "epoch": 11.538461538461538, "grad_norm": 1.2733933925628662, "learning_rate": 0.00014989251271824513, "loss": 5.0351, "step": 12750 }, { "epoch": 11.561085972850679, "grad_norm": 1.1447516679763794, "learning_rate": 0.00014988931940494507, "loss": 5.0698, "step": 12775 }, { "epoch": 11.583710407239819, "grad_norm": 1.2866042852401733, "learning_rate": 0.00014988607938377647, "loss": 5.0858, "step": 12800 }, { "epoch": 11.606334841628959, "grad_norm": 0.9531421065330505, "learning_rate": 0.00014988279265676013, "loss": 5.0072, "step": 12825 }, { "epoch": 11.628959276018099, "grad_norm": 1.4517631530761719, "learning_rate": 0.00014987945922594599, "loss": 5.0316, "step": 12850 }, { "epoch": 11.65158371040724, "grad_norm": 1.2381407022476196, "learning_rate": 0.00014987607909341304, "loss": 5.0895, "step": 12875 }, { "epoch": 11.67420814479638, "grad_norm": 1.1914258003234863, "learning_rate": 0.00014987265226126944, "loss": 4.9131, "step": 12900 }, { "epoch": 11.69683257918552, "grad_norm": 1.4007694721221924, "learning_rate": 0.00014986917873165248, "loss": 5.0471, "step": 12925 }, { "epoch": 11.71945701357466, "grad_norm": 1.4290871620178223, "learning_rate": 0.0001498656585067286, "loss": 5.0424, "step": 12950 }, { "epoch": 11.742081447963802, "grad_norm": 1.190192699432373, "learning_rate": 0.00014986209158869332, "loss": 5.0347, "step": 12975 }, { "epoch": 11.764705882352942, "grad_norm": 1.1445356607437134, "learning_rate": 0.0001498584779797713, "loss": 5.0117, "step": 13000 }, { "epoch": 11.787330316742082, "grad_norm": 1.3588181734085083, "learning_rate": 0.0001498548176822163, "loss": 5.0594, "step": 13025 }, { "epoch": 11.809954751131222, "grad_norm": 1.5752766132354736, "learning_rate": 0.00014985111069831122, "loss": 5.1229, "step": 13050 }, { "epoch": 11.832579185520363, "grad_norm": 1.3154525756835938, "learning_rate": 0.00014984735703036812, "loss": 5.1844, "step": 13075 }, { "epoch": 11.855203619909503, "grad_norm": 0.9763768315315247, "learning_rate": 0.0001498435566807281, "loss": 5.1022, "step": 13100 }, { "epoch": 11.877828054298643, "grad_norm": 1.2734490633010864, "learning_rate": 0.00014983970965176137, "loss": 5.0628, "step": 13125 }, { "epoch": 11.900452488687783, "grad_norm": 1.2248128652572632, "learning_rate": 0.00014983581594586737, "loss": 5.0853, "step": 13150 }, { "epoch": 11.923076923076923, "grad_norm": 1.1970940828323364, "learning_rate": 0.00014983187556547454, "loss": 4.9832, "step": 13175 }, { "epoch": 11.945701357466064, "grad_norm": 1.3674147129058838, "learning_rate": 0.00014982788851304046, "loss": 5.0107, "step": 13200 }, { "epoch": 11.968325791855204, "grad_norm": 0.918038547039032, "learning_rate": 0.0001498238547910518, "loss": 5.114, "step": 13225 }, { "epoch": 11.990950226244344, "grad_norm": 0.9390908479690552, "learning_rate": 0.0001498197744020244, "loss": 5.1094, "step": 13250 }, { "epoch": 12.013574660633484, "grad_norm": 1.1689878702163696, "learning_rate": 0.00014981564734850312, "loss": 4.8828, "step": 13275 }, { "epoch": 12.036199095022624, "grad_norm": 1.544630527496338, "learning_rate": 0.00014981147363306202, "loss": 4.9284, "step": 13300 }, { "epoch": 12.058823529411764, "grad_norm": 1.171164870262146, "learning_rate": 0.00014980725325830418, "loss": 4.8129, "step": 13325 }, { "epoch": 12.081447963800905, "grad_norm": 1.3011549711227417, "learning_rate": 0.00014980298622686183, "loss": 4.9128, "step": 13350 }, { "epoch": 12.104072398190045, "grad_norm": 1.7315293550491333, "learning_rate": 0.00014979867254139628, "loss": 4.7659, "step": 13375 }, { "epoch": 12.126696832579185, "grad_norm": 1.319021224975586, "learning_rate": 0.00014979431220459792, "loss": 4.9106, "step": 13400 }, { "epoch": 12.149321266968325, "grad_norm": 1.1789779663085938, "learning_rate": 0.00014978990521918628, "loss": 4.8845, "step": 13425 }, { "epoch": 12.171945701357465, "grad_norm": 1.3722392320632935, "learning_rate": 0.00014978545158791, "loss": 4.8086, "step": 13450 }, { "epoch": 12.194570135746606, "grad_norm": 1.3678064346313477, "learning_rate": 0.00014978095131354665, "loss": 4.8411, "step": 13475 }, { "epoch": 12.217194570135746, "grad_norm": 1.5004942417144775, "learning_rate": 0.00014977640439890316, "loss": 4.9215, "step": 13500 }, { "epoch": 12.239819004524886, "grad_norm": 1.622236728668213, "learning_rate": 0.00014977181084681532, "loss": 4.8444, "step": 13525 }, { "epoch": 12.262443438914028, "grad_norm": 1.582205891609192, "learning_rate": 0.0001497671706601481, "loss": 4.8589, "step": 13550 }, { "epoch": 12.285067873303168, "grad_norm": 1.7920467853546143, "learning_rate": 0.0001497624838417956, "loss": 4.8509, "step": 13575 }, { "epoch": 12.307692307692308, "grad_norm": 1.2452937364578247, "learning_rate": 0.00014975775039468086, "loss": 4.9321, "step": 13600 }, { "epoch": 12.330316742081449, "grad_norm": 1.0658358335494995, "learning_rate": 0.00014975297032175617, "loss": 4.9616, "step": 13625 }, { "epoch": 12.352941176470589, "grad_norm": 1.2660192251205444, "learning_rate": 0.0001497481436260028, "loss": 4.9297, "step": 13650 }, { "epoch": 12.375565610859729, "grad_norm": 0.9705486297607422, "learning_rate": 0.0001497432703104311, "loss": 4.8978, "step": 13675 }, { "epoch": 12.39819004524887, "grad_norm": 1.2590233087539673, "learning_rate": 0.00014973835037808056, "loss": 4.8164, "step": 13700 }, { "epoch": 12.42081447963801, "grad_norm": 1.3895432949066162, "learning_rate": 0.00014973338383201965, "loss": 4.9924, "step": 13725 }, { "epoch": 12.44343891402715, "grad_norm": 1.2928717136383057, "learning_rate": 0.000149728370675346, "loss": 4.9683, "step": 13750 }, { "epoch": 12.46606334841629, "grad_norm": 1.352980375289917, "learning_rate": 0.00014972331091118627, "loss": 4.8819, "step": 13775 }, { "epoch": 12.48868778280543, "grad_norm": 1.4463528394699097, "learning_rate": 0.00014971820454269622, "loss": 4.9767, "step": 13800 }, { "epoch": 12.51131221719457, "grad_norm": 1.238166093826294, "learning_rate": 0.0001497130515730606, "loss": 4.987, "step": 13825 }, { "epoch": 12.53393665158371, "grad_norm": 1.2077312469482422, "learning_rate": 0.00014970785200549332, "loss": 4.9379, "step": 13850 }, { "epoch": 12.55656108597285, "grad_norm": 1.2092487812042236, "learning_rate": 0.00014970260584323724, "loss": 4.9096, "step": 13875 }, { "epoch": 12.57918552036199, "grad_norm": 1.4933909177780151, "learning_rate": 0.00014969731308956443, "loss": 4.9451, "step": 13900 }, { "epoch": 12.60180995475113, "grad_norm": 1.228698968887329, "learning_rate": 0.0001496921882158976, "loss": 4.8687, "step": 13925 }, { "epoch": 12.624434389140271, "grad_norm": 1.2857065200805664, "learning_rate": 0.00014968680415265059, "loss": 4.9593, "step": 13950 }, { "epoch": 12.647058823529411, "grad_norm": 1.3414708375930786, "learning_rate": 0.00014968137350784223, "loss": 4.9016, "step": 13975 }, { "epoch": 12.669683257918551, "grad_norm": 1.3420109748840332, "learning_rate": 0.00014967589628485953, "loss": 4.8301, "step": 14000 }, { "epoch": 12.692307692307692, "grad_norm": 1.5517535209655762, "learning_rate": 0.00014967037248711856, "loss": 4.9292, "step": 14025 }, { "epoch": 12.714932126696832, "grad_norm": 1.4037293195724487, "learning_rate": 0.00014966480211806458, "loss": 5.0179, "step": 14050 }, { "epoch": 12.737556561085974, "grad_norm": 1.1001484394073486, "learning_rate": 0.00014965918518117168, "loss": 4.9745, "step": 14075 }, { "epoch": 12.760180995475114, "grad_norm": 1.636365532875061, "learning_rate": 0.00014965352167994317, "loss": 4.9478, "step": 14100 }, { "epoch": 12.782805429864254, "grad_norm": 1.43099045753479, "learning_rate": 0.00014964781161791126, "loss": 5.0, "step": 14125 }, { "epoch": 12.805429864253394, "grad_norm": 1.5528738498687744, "learning_rate": 0.0001496420549986373, "loss": 4.8686, "step": 14150 }, { "epoch": 12.828054298642535, "grad_norm": 1.145129680633545, "learning_rate": 0.0001496362518257117, "loss": 5.0184, "step": 14175 }, { "epoch": 12.850678733031675, "grad_norm": 1.2828490734100342, "learning_rate": 0.00014963040210275378, "loss": 4.8833, "step": 14200 }, { "epoch": 12.873303167420815, "grad_norm": 1.3507344722747803, "learning_rate": 0.00014962450583341202, "loss": 5.0941, "step": 14225 }, { "epoch": 12.895927601809955, "grad_norm": 1.2493312358856201, "learning_rate": 0.00014961856302136381, "loss": 4.987, "step": 14250 }, { "epoch": 12.918552036199095, "grad_norm": 0.9961503148078918, "learning_rate": 0.00014961257367031568, "loss": 5.085, "step": 14275 }, { "epoch": 12.941176470588236, "grad_norm": 1.2279330492019653, "learning_rate": 0.00014960653778400317, "loss": 5.0184, "step": 14300 }, { "epoch": 12.963800904977376, "grad_norm": 1.4541242122650146, "learning_rate": 0.00014960045536619075, "loss": 4.9985, "step": 14325 }, { "epoch": 12.986425339366516, "grad_norm": 1.1447900533676147, "learning_rate": 0.000149594326420672, "loss": 4.9128, "step": 14350 }, { "epoch": 13.009049773755656, "grad_norm": 1.1077444553375244, "learning_rate": 0.0001495881509512695, "loss": 5.0568, "step": 14375 }, { "epoch": 13.031674208144796, "grad_norm": 1.2826627492904663, "learning_rate": 0.00014958192896183484, "loss": 4.8062, "step": 14400 }, { "epoch": 13.054298642533936, "grad_norm": 1.337775468826294, "learning_rate": 0.00014957566045624863, "loss": 4.7318, "step": 14425 }, { "epoch": 13.076923076923077, "grad_norm": 1.5631524324417114, "learning_rate": 0.00014956934543842047, "loss": 4.7804, "step": 14450 }, { "epoch": 13.099547511312217, "grad_norm": 1.4884055852890015, "learning_rate": 0.000149562983912289, "loss": 4.8913, "step": 14475 }, { "epoch": 13.122171945701357, "grad_norm": 1.8440004587173462, "learning_rate": 0.00014955657588182182, "loss": 4.7437, "step": 14500 }, { "epoch": 13.144796380090497, "grad_norm": 1.6917012929916382, "learning_rate": 0.0001495501213510156, "loss": 4.6571, "step": 14525 }, { "epoch": 13.167420814479637, "grad_norm": 1.4976379871368408, "learning_rate": 0.000149543620323896, "loss": 4.8869, "step": 14550 }, { "epoch": 13.190045248868778, "grad_norm": 1.5437226295471191, "learning_rate": 0.00014953707280451764, "loss": 4.8286, "step": 14575 }, { "epoch": 13.212669683257918, "grad_norm": 1.7578870058059692, "learning_rate": 0.00014953047879696414, "loss": 4.8591, "step": 14600 }, { "epoch": 13.235294117647058, "grad_norm": 1.410611867904663, "learning_rate": 0.00014952383830534814, "loss": 4.7898, "step": 14625 }, { "epoch": 13.2579185520362, "grad_norm": 1.135124683380127, "learning_rate": 0.00014951715133381123, "loss": 4.7035, "step": 14650 }, { "epoch": 13.28054298642534, "grad_norm": 1.2869302034378052, "learning_rate": 0.00014951041788652407, "loss": 4.8667, "step": 14675 }, { "epoch": 13.30316742081448, "grad_norm": 2.459376811981201, "learning_rate": 0.00014950363796768624, "loss": 4.7734, "step": 14700 }, { "epoch": 13.32579185520362, "grad_norm": 1.4525374174118042, "learning_rate": 0.00014949681158152631, "loss": 4.9633, "step": 14725 }, { "epoch": 13.34841628959276, "grad_norm": 1.7316921949386597, "learning_rate": 0.00014948993873230187, "loss": 4.894, "step": 14750 }, { "epoch": 13.371040723981901, "grad_norm": 1.5175212621688843, "learning_rate": 0.00014948301942429941, "loss": 4.8522, "step": 14775 }, { "epoch": 13.393665158371041, "grad_norm": 1.1831189393997192, "learning_rate": 0.0001494760536618345, "loss": 4.8597, "step": 14800 }, { "epoch": 13.416289592760181, "grad_norm": 1.3064175844192505, "learning_rate": 0.0001494690414492516, "loss": 4.7122, "step": 14825 }, { "epoch": 13.438914027149321, "grad_norm": 1.6598682403564453, "learning_rate": 0.0001494619827909242, "loss": 4.835, "step": 14850 }, { "epoch": 13.461538461538462, "grad_norm": 1.4541395902633667, "learning_rate": 0.00014945487769125467, "loss": 4.8324, "step": 14875 }, { "epoch": 13.484162895927602, "grad_norm": 1.1823992729187012, "learning_rate": 0.00014944772615467448, "loss": 4.8323, "step": 14900 }, { "epoch": 13.506787330316742, "grad_norm": 1.439037799835205, "learning_rate": 0.00014944052818564394, "loss": 4.7843, "step": 14925 }, { "epoch": 13.529411764705882, "grad_norm": 1.3563629388809204, "learning_rate": 0.00014943328378865236, "loss": 4.8002, "step": 14950 }, { "epoch": 13.552036199095022, "grad_norm": 1.4317904710769653, "learning_rate": 0.00014942599296821803, "loss": 4.8379, "step": 14975 }, { "epoch": 13.574660633484163, "grad_norm": 2.0179126262664795, "learning_rate": 0.00014941865572888816, "loss": 4.6603, "step": 15000 }, { "epoch": 13.597285067873303, "grad_norm": 1.239160418510437, "learning_rate": 0.00014941127207523898, "loss": 4.8227, "step": 15025 }, { "epoch": 13.619909502262443, "grad_norm": 1.4565255641937256, "learning_rate": 0.00014940384201187553, "loss": 4.8444, "step": 15050 }, { "epoch": 13.642533936651583, "grad_norm": 1.5881317853927612, "learning_rate": 0.00014939636554343194, "loss": 4.8009, "step": 15075 }, { "epoch": 13.665158371040723, "grad_norm": 1.9541592597961426, "learning_rate": 0.0001493891444801549, "loss": 4.7418, "step": 15100 }, { "epoch": 13.687782805429864, "grad_norm": 1.6791893243789673, "learning_rate": 0.00014938157707130754, "loss": 4.8094, "step": 15125 }, { "epoch": 13.710407239819004, "grad_norm": 1.267594814300537, "learning_rate": 0.0001493739632712665, "loss": 4.813, "step": 15150 }, { "epoch": 13.733031674208144, "grad_norm": 1.4446032047271729, "learning_rate": 0.00014936630308478042, "loss": 4.7961, "step": 15175 }, { "epoch": 13.755656108597286, "grad_norm": 1.5222852230072021, "learning_rate": 0.00014935859651662696, "loss": 4.8379, "step": 15200 }, { "epoch": 13.778280542986426, "grad_norm": 2.0166687965393066, "learning_rate": 0.00014935084357161255, "loss": 4.7495, "step": 15225 }, { "epoch": 13.800904977375566, "grad_norm": 1.8037384748458862, "learning_rate": 0.0001493430442545727, "loss": 4.844, "step": 15250 }, { "epoch": 13.823529411764707, "grad_norm": 1.2547695636749268, "learning_rate": 0.0001493351985703718, "loss": 4.7547, "step": 15275 }, { "epoch": 13.846153846153847, "grad_norm": 1.2022550106048584, "learning_rate": 0.0001493273065239031, "loss": 4.8512, "step": 15300 }, { "epoch": 13.868778280542987, "grad_norm": 1.9524818658828735, "learning_rate": 0.0001493193681200888, "loss": 4.7884, "step": 15325 }, { "epoch": 13.891402714932127, "grad_norm": 1.3403666019439697, "learning_rate": 0.00014931138336388004, "loss": 4.7881, "step": 15350 }, { "epoch": 13.914027149321267, "grad_norm": 1.6197713613510132, "learning_rate": 0.00014930335226025684, "loss": 4.8554, "step": 15375 }, { "epoch": 13.936651583710407, "grad_norm": 1.4861394166946411, "learning_rate": 0.00014929527481422815, "loss": 4.8498, "step": 15400 }, { "epoch": 13.959276018099548, "grad_norm": 1.1756571531295776, "learning_rate": 0.00014928715103083178, "loss": 4.8845, "step": 15425 }, { "epoch": 13.981900452488688, "grad_norm": 1.8476282358169556, "learning_rate": 0.0001492789809151345, "loss": 4.7783, "step": 15450 }, { "epoch": 14.004524886877828, "grad_norm": 1.3503631353378296, "learning_rate": 0.00014927076447223195, "loss": 4.7305, "step": 15475 }, { "epoch": 14.027149321266968, "grad_norm": 1.109547734260559, "learning_rate": 0.00014926250170724863, "loss": 4.6489, "step": 15500 }, { "epoch": 14.049773755656108, "grad_norm": 1.9085489511489868, "learning_rate": 0.00014925419262533794, "loss": 4.64, "step": 15525 }, { "epoch": 14.072398190045249, "grad_norm": 1.5549107789993286, "learning_rate": 0.00014924583723168226, "loss": 4.6353, "step": 15550 }, { "epoch": 14.095022624434389, "grad_norm": 2.0388152599334717, "learning_rate": 0.00014923743553149271, "loss": 4.6727, "step": 15575 }, { "epoch": 14.117647058823529, "grad_norm": 1.3196210861206055, "learning_rate": 0.00014922898753000943, "loss": 4.7442, "step": 15600 }, { "epoch": 14.14027149321267, "grad_norm": 1.8600906133651733, "learning_rate": 0.00014922049323250132, "loss": 4.6711, "step": 15625 }, { "epoch": 14.16289592760181, "grad_norm": 1.3792847394943237, "learning_rate": 0.0001492119526442662, "loss": 4.5971, "step": 15650 }, { "epoch": 14.18552036199095, "grad_norm": 1.8213170766830444, "learning_rate": 0.0001492033657706308, "loss": 4.5187, "step": 15675 }, { "epoch": 14.20814479638009, "grad_norm": 1.7856183052062988, "learning_rate": 0.00014919473261695067, "loss": 4.6313, "step": 15700 }, { "epoch": 14.23076923076923, "grad_norm": 1.993672490119934, "learning_rate": 0.00014918605318861027, "loss": 4.8331, "step": 15725 }, { "epoch": 14.25339366515837, "grad_norm": 1.207847237586975, "learning_rate": 0.00014917732749102284, "loss": 4.7103, "step": 15750 }, { "epoch": 14.276018099547512, "grad_norm": 1.5237271785736084, "learning_rate": 0.00014916855552963052, "loss": 4.6217, "step": 15775 }, { "epoch": 14.298642533936652, "grad_norm": 1.4603103399276733, "learning_rate": 0.00014915973730990437, "loss": 4.5937, "step": 15800 }, { "epoch": 14.321266968325792, "grad_norm": 1.55234956741333, "learning_rate": 0.00014915087283734422, "loss": 4.6703, "step": 15825 }, { "epoch": 14.343891402714933, "grad_norm": 1.4504629373550415, "learning_rate": 0.00014914196211747875, "loss": 4.7315, "step": 15850 }, { "epoch": 14.366515837104073, "grad_norm": 1.5225130319595337, "learning_rate": 0.00014913300515586553, "loss": 4.8271, "step": 15875 }, { "epoch": 14.389140271493213, "grad_norm": 1.748902678489685, "learning_rate": 0.00014912436297366587, "loss": 4.7046, "step": 15900 }, { "epoch": 14.411764705882353, "grad_norm": 1.5265341997146606, "learning_rate": 0.00014911531539445877, "loss": 4.5435, "step": 15925 }, { "epoch": 14.434389140271493, "grad_norm": 2.092003107070923, "learning_rate": 0.00014910622159012326, "loss": 4.7161, "step": 15950 }, { "epoch": 14.457013574660634, "grad_norm": 1.8419088125228882, "learning_rate": 0.00014909708156633108, "loss": 4.6548, "step": 15975 }, { "epoch": 14.479638009049774, "grad_norm": 1.6830592155456543, "learning_rate": 0.00014908789532878277, "loss": 4.6093, "step": 16000 }, { "epoch": 14.502262443438914, "grad_norm": 2.628295660018921, "learning_rate": 0.00014907866288320774, "loss": 4.6621, "step": 16025 }, { "epoch": 14.524886877828054, "grad_norm": 1.7091392278671265, "learning_rate": 0.00014906938423536417, "loss": 4.6033, "step": 16050 }, { "epoch": 14.547511312217194, "grad_norm": 1.7395411729812622, "learning_rate": 0.00014906005939103906, "loss": 4.5248, "step": 16075 }, { "epoch": 14.570135746606335, "grad_norm": 1.5759046077728271, "learning_rate": 0.00014905068835604826, "loss": 4.6969, "step": 16100 }, { "epoch": 14.592760180995475, "grad_norm": 1.5003995895385742, "learning_rate": 0.00014904127113623644, "loss": 4.6489, "step": 16125 }, { "epoch": 14.615384615384615, "grad_norm": 2.510037660598755, "learning_rate": 0.000149031807737477, "loss": 4.9008, "step": 16150 }, { "epoch": 14.638009049773755, "grad_norm": 2.241529941558838, "learning_rate": 0.0001490222981656722, "loss": 4.7296, "step": 16175 }, { "epoch": 14.660633484162895, "grad_norm": 1.7212368249893188, "learning_rate": 0.0001490127424267531, "loss": 4.7851, "step": 16200 }, { "epoch": 14.683257918552036, "grad_norm": 1.4144231081008911, "learning_rate": 0.00014900314052667952, "loss": 4.8715, "step": 16225 }, { "epoch": 14.705882352941176, "grad_norm": 1.6816970109939575, "learning_rate": 0.00014899349247144008, "loss": 4.8645, "step": 16250 }, { "epoch": 14.728506787330316, "grad_norm": 1.9775121212005615, "learning_rate": 0.00014898379826705223, "loss": 4.6665, "step": 16275 }, { "epoch": 14.751131221719458, "grad_norm": 1.2360163927078247, "learning_rate": 0.00014897405791956212, "loss": 4.7686, "step": 16300 }, { "epoch": 14.773755656108598, "grad_norm": 1.3875223398208618, "learning_rate": 0.00014896427143504476, "loss": 4.6434, "step": 16325 }, { "epoch": 14.796380090497738, "grad_norm": 1.443358063697815, "learning_rate": 0.0001489544388196039, "loss": 4.6037, "step": 16350 }, { "epoch": 14.819004524886878, "grad_norm": 1.9732576608657837, "learning_rate": 0.00014894456007937204, "loss": 4.6332, "step": 16375 }, { "epoch": 14.841628959276019, "grad_norm": 1.4852114915847778, "learning_rate": 0.0001489346352205105, "loss": 4.7808, "step": 16400 }, { "epoch": 14.864253393665159, "grad_norm": 1.50007164478302, "learning_rate": 0.00014892466424920933, "loss": 4.6615, "step": 16425 }, { "epoch": 14.886877828054299, "grad_norm": 1.3639907836914062, "learning_rate": 0.00014891464717168732, "loss": 4.7119, "step": 16450 }, { "epoch": 14.90950226244344, "grad_norm": 1.2364827394485474, "learning_rate": 0.00014890458399419209, "loss": 4.7193, "step": 16475 }, { "epoch": 14.93212669683258, "grad_norm": 1.5049721002578735, "learning_rate": 0.0001488944747229999, "loss": 4.6358, "step": 16500 }, { "epoch": 14.95475113122172, "grad_norm": 1.619442343711853, "learning_rate": 0.00014888431936441586, "loss": 4.7165, "step": 16525 }, { "epoch": 14.97737556561086, "grad_norm": 1.3429360389709473, "learning_rate": 0.00014887411792477377, "loss": 4.7141, "step": 16550 }, { "epoch": 15.0, "grad_norm": 1.5158376693725586, "learning_rate": 0.00014886387041043622, "loss": 4.769, "step": 16575 }, { "epoch": 15.02262443438914, "grad_norm": 1.6161320209503174, "learning_rate": 0.00014885357682779447, "loss": 4.4315, "step": 16600 }, { "epoch": 15.04524886877828, "grad_norm": 1.5701944828033447, "learning_rate": 0.00014884323718326853, "loss": 4.5037, "step": 16625 }, { "epoch": 15.06787330316742, "grad_norm": 1.572098970413208, "learning_rate": 0.00014883326779548884, "loss": 4.5302, "step": 16650 }, { "epoch": 15.09049773755656, "grad_norm": 1.624096393585205, "learning_rate": 0.0001488228378884032, "loss": 4.5176, "step": 16675 }, { "epoch": 15.113122171945701, "grad_norm": 3.299954414367676, "learning_rate": 0.00014881236193860494, "loss": 4.5393, "step": 16700 }, { "epoch": 15.135746606334841, "grad_norm": 1.5165592432022095, "learning_rate": 0.000148801839952628, "loss": 4.4814, "step": 16725 }, { "epoch": 15.158371040723981, "grad_norm": 2.0233843326568604, "learning_rate": 0.0001487912719370347, "loss": 4.5063, "step": 16750 }, { "epoch": 15.180995475113122, "grad_norm": 1.7269341945648193, "learning_rate": 0.0001487806578984163, "loss": 4.4549, "step": 16775 }, { "epoch": 15.203619909502262, "grad_norm": 2.506732702255249, "learning_rate": 0.0001487699978433927, "loss": 4.5649, "step": 16800 }, { "epoch": 15.226244343891402, "grad_norm": 1.6503850221633911, "learning_rate": 0.0001487592917786125, "loss": 4.4911, "step": 16825 }, { "epoch": 15.248868778280542, "grad_norm": 2.2163994312286377, "learning_rate": 0.00014874853971075293, "loss": 4.3962, "step": 16850 }, { "epoch": 15.271493212669684, "grad_norm": 2.2149605751037598, "learning_rate": 0.00014873774164652, "loss": 4.5588, "step": 16875 }, { "epoch": 15.294117647058824, "grad_norm": 1.7098424434661865, "learning_rate": 0.00014872689759264839, "loss": 4.5646, "step": 16900 }, { "epoch": 15.316742081447964, "grad_norm": 1.7517985105514526, "learning_rate": 0.00014871600755590142, "loss": 4.5443, "step": 16925 }, { "epoch": 15.339366515837105, "grad_norm": 2.4214367866516113, "learning_rate": 0.00014870507154307114, "loss": 4.5232, "step": 16950 }, { "epoch": 15.361990950226245, "grad_norm": 1.5129008293151855, "learning_rate": 0.00014869408956097826, "loss": 4.5897, "step": 16975 }, { "epoch": 15.384615384615385, "grad_norm": 1.4799803495407104, "learning_rate": 0.00014868306161647214, "loss": 4.6032, "step": 17000 }, { "epoch": 15.407239819004525, "grad_norm": 1.4959242343902588, "learning_rate": 0.00014867198771643085, "loss": 4.6202, "step": 17025 }, { "epoch": 15.429864253393665, "grad_norm": 2.0012917518615723, "learning_rate": 0.00014866086786776103, "loss": 4.5087, "step": 17050 }, { "epoch": 15.452488687782806, "grad_norm": 2.2068300247192383, "learning_rate": 0.00014864970207739808, "loss": 4.6576, "step": 17075 }, { "epoch": 15.475113122171946, "grad_norm": 1.85796320438385, "learning_rate": 0.00014863849035230602, "loss": 4.597, "step": 17100 }, { "epoch": 15.497737556561086, "grad_norm": 1.781432867050171, "learning_rate": 0.0001486272326994775, "loss": 4.5005, "step": 17125 }, { "epoch": 15.520361990950226, "grad_norm": 1.7553761005401611, "learning_rate": 0.00014861592912593385, "loss": 4.564, "step": 17150 }, { "epoch": 15.542986425339366, "grad_norm": 1.4817267656326294, "learning_rate": 0.00014860457963872497, "loss": 4.5873, "step": 17175 }, { "epoch": 15.565610859728507, "grad_norm": 1.7653452157974243, "learning_rate": 0.0001485931842449295, "loss": 4.3915, "step": 17200 }, { "epoch": 15.588235294117647, "grad_norm": 1.4498839378356934, "learning_rate": 0.00014858174295165463, "loss": 4.5328, "step": 17225 }, { "epoch": 15.610859728506787, "grad_norm": 1.341663122177124, "learning_rate": 0.0001485702557660362, "loss": 4.5389, "step": 17250 }, { "epoch": 15.633484162895927, "grad_norm": 2.314711093902588, "learning_rate": 0.00014855872269523866, "loss": 4.5966, "step": 17275 }, { "epoch": 15.656108597285067, "grad_norm": 1.455085039138794, "learning_rate": 0.00014854714374645513, "loss": 4.6546, "step": 17300 }, { "epoch": 15.678733031674208, "grad_norm": 1.658861517906189, "learning_rate": 0.0001485355189269073, "loss": 4.7516, "step": 17325 }, { "epoch": 15.701357466063348, "grad_norm": 1.514167308807373, "learning_rate": 0.00014852384824384546, "loss": 4.5733, "step": 17350 }, { "epoch": 15.723981900452488, "grad_norm": 1.9092057943344116, "learning_rate": 0.00014851213170454853, "loss": 4.7214, "step": 17375 }, { "epoch": 15.74660633484163, "grad_norm": 1.7849903106689453, "learning_rate": 0.000148500369316324, "loss": 4.5989, "step": 17400 }, { "epoch": 15.76923076923077, "grad_norm": 2.334677219390869, "learning_rate": 0.00014848856108650802, "loss": 4.5126, "step": 17425 }, { "epoch": 15.79185520361991, "grad_norm": 1.6683495044708252, "learning_rate": 0.00014847670702246527, "loss": 4.6018, "step": 17450 }, { "epoch": 15.81447963800905, "grad_norm": 1.8269922733306885, "learning_rate": 0.000148464807131589, "loss": 4.5902, "step": 17475 }, { "epoch": 15.83710407239819, "grad_norm": 1.3060346841812134, "learning_rate": 0.00014845286142130116, "loss": 4.7792, "step": 17500 }, { "epoch": 15.85972850678733, "grad_norm": 2.0103907585144043, "learning_rate": 0.0001484408698990521, "loss": 4.696, "step": 17525 }, { "epoch": 15.882352941176471, "grad_norm": 1.733870029449463, "learning_rate": 0.0001484288325723209, "loss": 4.5294, "step": 17550 }, { "epoch": 15.904977375565611, "grad_norm": 1.6223927736282349, "learning_rate": 0.0001484167494486151, "loss": 4.6102, "step": 17575 }, { "epoch": 15.927601809954751, "grad_norm": 1.5040452480316162, "learning_rate": 0.0001484046205354709, "loss": 4.5765, "step": 17600 }, { "epoch": 15.950226244343892, "grad_norm": 2.060255527496338, "learning_rate": 0.00014839244584045295, "loss": 4.5268, "step": 17625 }, { "epoch": 15.972850678733032, "grad_norm": 1.6869535446166992, "learning_rate": 0.00014838022537115453, "loss": 4.6779, "step": 17650 }, { "epoch": 15.995475113122172, "grad_norm": 1.6950558423995972, "learning_rate": 0.00014836795913519748, "loss": 4.4615, "step": 17675 }, { "epoch": 16.018099547511312, "grad_norm": 1.6395280361175537, "learning_rate": 0.0001483556471402321, "loss": 4.3711, "step": 17700 }, { "epoch": 16.040723981900452, "grad_norm": 2.1945571899414062, "learning_rate": 0.00014834328939393733, "loss": 4.3561, "step": 17725 }, { "epoch": 16.063348416289593, "grad_norm": 1.7082488536834717, "learning_rate": 0.0001483308859040206, "loss": 4.4945, "step": 17750 }, { "epoch": 16.085972850678733, "grad_norm": 2.0914411544799805, "learning_rate": 0.00014831843667821777, "loss": 4.2182, "step": 17775 }, { "epoch": 16.108597285067873, "grad_norm": 2.07283878326416, "learning_rate": 0.00014830594172429346, "loss": 4.2707, "step": 17800 }, { "epoch": 16.131221719457013, "grad_norm": 1.7264245748519897, "learning_rate": 0.0001482934010500406, "loss": 4.5788, "step": 17825 }, { "epoch": 16.153846153846153, "grad_norm": 1.8298250436782837, "learning_rate": 0.0001482808146632807, "loss": 4.5108, "step": 17850 }, { "epoch": 16.176470588235293, "grad_norm": 1.6344093084335327, "learning_rate": 0.00014826818257186383, "loss": 4.2816, "step": 17875 }, { "epoch": 16.199095022624434, "grad_norm": 1.8113044500350952, "learning_rate": 0.00014825550478366847, "loss": 4.3255, "step": 17900 }, { "epoch": 16.221719457013574, "grad_norm": 1.8958951234817505, "learning_rate": 0.0001482427813066017, "loss": 4.3559, "step": 17925 }, { "epoch": 16.244343891402714, "grad_norm": 1.710372805595398, "learning_rate": 0.00014823001214859903, "loss": 4.2967, "step": 17950 }, { "epoch": 16.266968325791854, "grad_norm": 2.1480696201324463, "learning_rate": 0.0001482171973176245, "loss": 4.4731, "step": 17975 }, { "epoch": 16.289592760180994, "grad_norm": 1.6352964639663696, "learning_rate": 0.0001482043368216706, "loss": 4.336, "step": 18000 }, { "epoch": 16.312217194570135, "grad_norm": 1.9280736446380615, "learning_rate": 0.00014819143066875832, "loss": 4.2713, "step": 18025 }, { "epoch": 16.334841628959275, "grad_norm": 2.057253837585449, "learning_rate": 0.00014817847886693713, "loss": 4.3916, "step": 18050 }, { "epoch": 16.357466063348415, "grad_norm": 1.9234403371810913, "learning_rate": 0.00014816548142428495, "loss": 4.4578, "step": 18075 }, { "epoch": 16.380090497737555, "grad_norm": 1.9432510137557983, "learning_rate": 0.0001481524383489082, "loss": 4.4288, "step": 18100 }, { "epoch": 16.402714932126695, "grad_norm": 1.8046300411224365, "learning_rate": 0.00014813934964894176, "loss": 4.5106, "step": 18125 }, { "epoch": 16.425339366515836, "grad_norm": 1.8123124837875366, "learning_rate": 0.00014812621533254888, "loss": 4.3949, "step": 18150 }, { "epoch": 16.447963800904976, "grad_norm": 2.1310598850250244, "learning_rate": 0.0001481130354079214, "loss": 4.532, "step": 18175 }, { "epoch": 16.470588235294116, "grad_norm": 2.790117025375366, "learning_rate": 0.0001480998098832795, "loss": 4.3405, "step": 18200 }, { "epoch": 16.49321266968326, "grad_norm": 2.3215723037719727, "learning_rate": 0.00014808653876687185, "loss": 4.4327, "step": 18225 }, { "epoch": 16.5158371040724, "grad_norm": 1.8296551704406738, "learning_rate": 0.0001480732220669755, "loss": 4.5334, "step": 18250 }, { "epoch": 16.53846153846154, "grad_norm": 2.881291627883911, "learning_rate": 0.00014805985979189602, "loss": 4.4545, "step": 18275 }, { "epoch": 16.56108597285068, "grad_norm": 2.629380941390991, "learning_rate": 0.0001480464519499673, "loss": 4.4504, "step": 18300 }, { "epoch": 16.58371040723982, "grad_norm": 1.830816626548767, "learning_rate": 0.00014803299854955173, "loss": 4.5261, "step": 18325 }, { "epoch": 16.60633484162896, "grad_norm": 1.7896275520324707, "learning_rate": 0.0001480194995990401, "loss": 4.5665, "step": 18350 }, { "epoch": 16.6289592760181, "grad_norm": 1.8729816675186157, "learning_rate": 0.00014800595510685162, "loss": 4.4799, "step": 18375 }, { "epoch": 16.65158371040724, "grad_norm": 2.032773494720459, "learning_rate": 0.0001479923650814338, "loss": 4.4249, "step": 18400 }, { "epoch": 16.67420814479638, "grad_norm": 2.0493974685668945, "learning_rate": 0.0001479787295312627, "loss": 4.4361, "step": 18425 }, { "epoch": 16.69683257918552, "grad_norm": 1.7331079244613647, "learning_rate": 0.0001479650484648427, "loss": 4.4077, "step": 18450 }, { "epoch": 16.71945701357466, "grad_norm": 1.6861578226089478, "learning_rate": 0.00014795132189070653, "loss": 4.4833, "step": 18475 }, { "epoch": 16.742081447963802, "grad_norm": 1.9591151475906372, "learning_rate": 0.0001479375498174154, "loss": 4.5211, "step": 18500 }, { "epoch": 16.764705882352942, "grad_norm": 1.5738307237625122, "learning_rate": 0.00014792373225355879, "loss": 4.5, "step": 18525 }, { "epoch": 16.787330316742082, "grad_norm": 2.668651819229126, "learning_rate": 0.00014790986920775462, "loss": 4.4206, "step": 18550 }, { "epoch": 16.809954751131222, "grad_norm": 1.7280417680740356, "learning_rate": 0.00014789596068864915, "loss": 4.3528, "step": 18575 }, { "epoch": 16.832579185520363, "grad_norm": 2.1722147464752197, "learning_rate": 0.00014788200670491706, "loss": 4.5144, "step": 18600 }, { "epoch": 16.855203619909503, "grad_norm": 2.2709977626800537, "learning_rate": 0.00014786800726526126, "loss": 4.4895, "step": 18625 }, { "epoch": 16.877828054298643, "grad_norm": 2.3444464206695557, "learning_rate": 0.00014785396237841316, "loss": 4.5319, "step": 18650 }, { "epoch": 16.900452488687783, "grad_norm": 2.3623125553131104, "learning_rate": 0.00014783987205313243, "loss": 4.4935, "step": 18675 }, { "epoch": 16.923076923076923, "grad_norm": 2.035130262374878, "learning_rate": 0.00014782573629820706, "loss": 4.4686, "step": 18700 }, { "epoch": 16.945701357466064, "grad_norm": 2.233100175857544, "learning_rate": 0.00014781155512245343, "loss": 4.5366, "step": 18725 }, { "epoch": 16.968325791855204, "grad_norm": 1.819060206413269, "learning_rate": 0.00014779732853471624, "loss": 4.5277, "step": 18750 }, { "epoch": 16.990950226244344, "grad_norm": 3.098139524459839, "learning_rate": 0.00014778305654386848, "loss": 4.3685, "step": 18775 }, { "epoch": 17.013574660633484, "grad_norm": 2.7072527408599854, "learning_rate": 0.00014776873915881147, "loss": 4.3317, "step": 18800 }, { "epoch": 17.036199095022624, "grad_norm": 1.5817420482635498, "learning_rate": 0.00014775437638847485, "loss": 4.2269, "step": 18825 }, { "epoch": 17.058823529411764, "grad_norm": 1.7589970827102661, "learning_rate": 0.00014773996824181656, "loss": 4.2141, "step": 18850 }, { "epoch": 17.081447963800905, "grad_norm": 2.6992104053497314, "learning_rate": 0.00014772551472782286, "loss": 4.1172, "step": 18875 }, { "epoch": 17.104072398190045, "grad_norm": 1.9499256610870361, "learning_rate": 0.00014771101585550828, "loss": 4.2247, "step": 18900 }, { "epoch": 17.126696832579185, "grad_norm": 1.7662402391433716, "learning_rate": 0.00014769647163391568, "loss": 4.3208, "step": 18925 }, { "epoch": 17.149321266968325, "grad_norm": 1.9599316120147705, "learning_rate": 0.00014768188207211615, "loss": 4.149, "step": 18950 }, { "epoch": 17.171945701357465, "grad_norm": 1.9052058458328247, "learning_rate": 0.00014766724717920907, "loss": 4.2268, "step": 18975 }, { "epoch": 17.194570135746606, "grad_norm": 1.827323079109192, "learning_rate": 0.00014765256696432213, "loss": 4.3944, "step": 19000 }, { "epoch": 17.217194570135746, "grad_norm": 1.8786205053329468, "learning_rate": 0.00014763784143661125, "loss": 4.2911, "step": 19025 }, { "epoch": 17.239819004524886, "grad_norm": 1.890916109085083, "learning_rate": 0.00014762307060526064, "loss": 4.3243, "step": 19050 }, { "epoch": 17.262443438914026, "grad_norm": 2.4130940437316895, "learning_rate": 0.0001476082544794827, "loss": 4.267, "step": 19075 }, { "epoch": 17.285067873303166, "grad_norm": 2.0690503120422363, "learning_rate": 0.0001475933930685182, "loss": 4.4042, "step": 19100 }, { "epoch": 17.307692307692307, "grad_norm": 1.4941195249557495, "learning_rate": 0.00014757848638163602, "loss": 4.3633, "step": 19125 }, { "epoch": 17.330316742081447, "grad_norm": 2.508427858352661, "learning_rate": 0.0001475635344281334, "loss": 4.3816, "step": 19150 }, { "epoch": 17.352941176470587, "grad_norm": 1.9983786344528198, "learning_rate": 0.0001475485372173357, "loss": 4.2859, "step": 19175 }, { "epoch": 17.375565610859727, "grad_norm": 5.220970153808594, "learning_rate": 0.00014753349475859657, "loss": 4.4096, "step": 19200 }, { "epoch": 17.398190045248867, "grad_norm": 2.2304937839508057, "learning_rate": 0.0001475184070612979, "loss": 4.2902, "step": 19225 }, { "epoch": 17.420814479638008, "grad_norm": 1.8377021551132202, "learning_rate": 0.00014750327413484975, "loss": 4.2992, "step": 19250 }, { "epoch": 17.443438914027148, "grad_norm": 6.677389621734619, "learning_rate": 0.00014748809598869042, "loss": 4.4381, "step": 19275 }, { "epoch": 17.466063348416288, "grad_norm": 2.76698637008667, "learning_rate": 0.00014747287263228634, "loss": 4.1557, "step": 19300 }, { "epoch": 17.488687782805428, "grad_norm": 2.1575393676757812, "learning_rate": 0.00014745760407513226, "loss": 4.1819, "step": 19325 }, { "epoch": 17.511312217194572, "grad_norm": 2.5181052684783936, "learning_rate": 0.00014744229032675105, "loss": 4.3038, "step": 19350 }, { "epoch": 17.533936651583712, "grad_norm": 3.4468493461608887, "learning_rate": 0.00014742693139669375, "loss": 4.3447, "step": 19375 }, { "epoch": 17.556561085972852, "grad_norm": 3.16564679145813, "learning_rate": 0.0001474115272945396, "loss": 4.2663, "step": 19400 }, { "epoch": 17.579185520361992, "grad_norm": 1.846726417541504, "learning_rate": 0.00014739607802989602, "loss": 4.3996, "step": 19425 }, { "epoch": 17.601809954751133, "grad_norm": 2.7358832359313965, "learning_rate": 0.0001473805836123986, "loss": 4.4441, "step": 19450 }, { "epoch": 17.624434389140273, "grad_norm": 1.9710636138916016, "learning_rate": 0.0001473650440517111, "loss": 4.3825, "step": 19475 }, { "epoch": 17.647058823529413, "grad_norm": 2.2102885246276855, "learning_rate": 0.00014734945935752537, "loss": 4.2444, "step": 19500 }, { "epoch": 17.669683257918553, "grad_norm": 2.3739373683929443, "learning_rate": 0.00014733382953956148, "loss": 4.2263, "step": 19525 }, { "epoch": 17.692307692307693, "grad_norm": 2.2633421421051025, "learning_rate": 0.00014731815460756765, "loss": 4.2954, "step": 19550 }, { "epoch": 17.714932126696834, "grad_norm": 1.925898790359497, "learning_rate": 0.0001473024345713202, "loss": 4.3745, "step": 19575 }, { "epoch": 17.737556561085974, "grad_norm": 2.121878147125244, "learning_rate": 0.00014728666944062357, "loss": 4.2597, "step": 19600 }, { "epoch": 17.760180995475114, "grad_norm": 2.168797492980957, "learning_rate": 0.00014727085922531036, "loss": 4.3293, "step": 19625 }, { "epoch": 17.782805429864254, "grad_norm": 2.117703914642334, "learning_rate": 0.00014725500393524126, "loss": 4.3164, "step": 19650 }, { "epoch": 17.805429864253394, "grad_norm": 2.157322883605957, "learning_rate": 0.00014723910358030513, "loss": 4.3772, "step": 19675 }, { "epoch": 17.828054298642535, "grad_norm": 2.02313494682312, "learning_rate": 0.00014722315817041883, "loss": 4.4356, "step": 19700 }, { "epoch": 17.850678733031675, "grad_norm": 2.4830150604248047, "learning_rate": 0.0001472071677155274, "loss": 4.312, "step": 19725 }, { "epoch": 17.873303167420815, "grad_norm": 2.015939235687256, "learning_rate": 0.00014719113222560402, "loss": 4.3141, "step": 19750 }, { "epoch": 17.895927601809955, "grad_norm": 2.0206422805786133, "learning_rate": 0.00014717505171064983, "loss": 4.2916, "step": 19775 }, { "epoch": 17.918552036199095, "grad_norm": 3.191218852996826, "learning_rate": 0.00014715892618069417, "loss": 4.3458, "step": 19800 }, { "epoch": 17.941176470588236, "grad_norm": 1.7842390537261963, "learning_rate": 0.00014714275564579432, "loss": 4.3788, "step": 19825 }, { "epoch": 17.963800904977376, "grad_norm": 2.51277494430542, "learning_rate": 0.0001471265401160358, "loss": 4.2968, "step": 19850 }, { "epoch": 17.986425339366516, "grad_norm": 1.9772491455078125, "learning_rate": 0.00014711027960153208, "loss": 4.2711, "step": 19875 }, { "epoch": 18.009049773755656, "grad_norm": 2.1392099857330322, "learning_rate": 0.00014709397411242467, "loss": 4.2256, "step": 19900 }, { "epoch": 18.031674208144796, "grad_norm": 2.5523273944854736, "learning_rate": 0.00014707762365888326, "loss": 4.1125, "step": 19925 }, { "epoch": 18.054298642533936, "grad_norm": 1.976250171661377, "learning_rate": 0.00014706122825110541, "loss": 4.1466, "step": 19950 }, { "epoch": 18.076923076923077, "grad_norm": 1.9020646810531616, "learning_rate": 0.00014704478789931687, "loss": 4.1489, "step": 19975 }, { "epoch": 18.099547511312217, "grad_norm": 2.142031192779541, "learning_rate": 0.0001470283026137713, "loss": 4.0118, "step": 20000 }, { "epoch": 18.122171945701357, "grad_norm": 2.3549931049346924, "learning_rate": 0.00014701177240475046, "loss": 4.1826, "step": 20025 }, { "epoch": 18.144796380090497, "grad_norm": 2.1854188442230225, "learning_rate": 0.00014699519728256414, "loss": 4.1429, "step": 20050 }, { "epoch": 18.167420814479637, "grad_norm": 2.0742249488830566, "learning_rate": 0.00014697857725755006, "loss": 4.188, "step": 20075 }, { "epoch": 18.190045248868778, "grad_norm": 2.2574656009674072, "learning_rate": 0.00014696191234007404, "loss": 4.0573, "step": 20100 }, { "epoch": 18.212669683257918, "grad_norm": 2.203341245651245, "learning_rate": 0.00014694520254052984, "loss": 4.0884, "step": 20125 }, { "epoch": 18.235294117647058, "grad_norm": 2.074465274810791, "learning_rate": 0.00014692844786933922, "loss": 4.1462, "step": 20150 }, { "epoch": 18.257918552036198, "grad_norm": 2.674535036087036, "learning_rate": 0.00014691164833695197, "loss": 4.0187, "step": 20175 }, { "epoch": 18.28054298642534, "grad_norm": 1.9000567197799683, "learning_rate": 0.00014689480395384575, "loss": 4.2204, "step": 20200 }, { "epoch": 18.30316742081448, "grad_norm": 1.8418059349060059, "learning_rate": 0.00014687791473052633, "loss": 4.1562, "step": 20225 }, { "epoch": 18.32579185520362, "grad_norm": 2.8064866065979004, "learning_rate": 0.00014686098067752737, "loss": 4.081, "step": 20250 }, { "epoch": 18.34841628959276, "grad_norm": 2.413703203201294, "learning_rate": 0.00014684400180541048, "loss": 4.1337, "step": 20275 }, { "epoch": 18.3710407239819, "grad_norm": 3.4849491119384766, "learning_rate": 0.00014682697812476529, "loss": 4.2092, "step": 20300 }, { "epoch": 18.39366515837104, "grad_norm": 2.3783164024353027, "learning_rate": 0.0001468099096462093, "loss": 4.1779, "step": 20325 }, { "epoch": 18.41628959276018, "grad_norm": 1.7030620574951172, "learning_rate": 0.00014679279638038796, "loss": 4.2995, "step": 20350 }, { "epoch": 18.43891402714932, "grad_norm": 2.441593885421753, "learning_rate": 0.0001467756383379747, "loss": 4.2323, "step": 20375 }, { "epoch": 18.46153846153846, "grad_norm": 2.21101713180542, "learning_rate": 0.00014675843552967093, "loss": 4.2335, "step": 20400 }, { "epoch": 18.4841628959276, "grad_norm": 2.7679131031036377, "learning_rate": 0.0001467411879662058, "loss": 4.3095, "step": 20425 }, { "epoch": 18.50678733031674, "grad_norm": 2.2078464031219482, "learning_rate": 0.0001467238956583365, "loss": 4.0579, "step": 20450 }, { "epoch": 18.529411764705884, "grad_norm": 2.1665759086608887, "learning_rate": 0.00014670655861684812, "loss": 4.2085, "step": 20475 }, { "epoch": 18.552036199095024, "grad_norm": 2.4058430194854736, "learning_rate": 0.00014668917685255366, "loss": 4.0638, "step": 20500 }, { "epoch": 18.574660633484164, "grad_norm": 2.21022629737854, "learning_rate": 0.0001466717503762939, "loss": 4.1707, "step": 20525 }, { "epoch": 18.597285067873305, "grad_norm": 2.420351982116699, "learning_rate": 0.00014665427919893767, "loss": 4.1226, "step": 20550 }, { "epoch": 18.619909502262445, "grad_norm": 2.014174699783325, "learning_rate": 0.0001466367633313816, "loss": 4.2822, "step": 20575 }, { "epoch": 18.642533936651585, "grad_norm": 2.4564104080200195, "learning_rate": 0.00014661920278455018, "loss": 4.234, "step": 20600 }, { "epoch": 18.665158371040725, "grad_norm": 2.478470802307129, "learning_rate": 0.00014660159756939577, "loss": 4.1423, "step": 20625 }, { "epoch": 18.687782805429865, "grad_norm": 2.064303159713745, "learning_rate": 0.00014658394769689865, "loss": 4.3023, "step": 20650 }, { "epoch": 18.710407239819006, "grad_norm": 3.076389789581299, "learning_rate": 0.00014656625317806683, "loss": 4.2387, "step": 20675 }, { "epoch": 18.733031674208146, "grad_norm": 4.118660926818848, "learning_rate": 0.00014654851402393627, "loss": 4.1435, "step": 20700 }, { "epoch": 18.755656108597286, "grad_norm": 1.9801812171936035, "learning_rate": 0.00014653073024557077, "loss": 4.1353, "step": 20725 }, { "epoch": 18.778280542986426, "grad_norm": 1.930025339126587, "learning_rate": 0.0001465129018540619, "loss": 4.1906, "step": 20750 }, { "epoch": 18.800904977375566, "grad_norm": 1.7695097923278809, "learning_rate": 0.00014649502886052908, "loss": 4.1309, "step": 20775 }, { "epoch": 18.823529411764707, "grad_norm": 2.5337889194488525, "learning_rate": 0.00014647711127611959, "loss": 4.013, "step": 20800 }, { "epoch": 18.846153846153847, "grad_norm": 2.259813070297241, "learning_rate": 0.00014645914911200843, "loss": 4.3515, "step": 20825 }, { "epoch": 18.868778280542987, "grad_norm": 1.9734256267547607, "learning_rate": 0.0001464411423793985, "loss": 4.0642, "step": 20850 }, { "epoch": 18.891402714932127, "grad_norm": 2.131457567214966, "learning_rate": 0.00014642309108952044, "loss": 4.1572, "step": 20875 }, { "epoch": 18.914027149321267, "grad_norm": 2.4415125846862793, "learning_rate": 0.0001464049952536327, "loss": 4.2472, "step": 20900 }, { "epoch": 18.936651583710407, "grad_norm": 2.270073175430298, "learning_rate": 0.00014638685488302147, "loss": 4.2295, "step": 20925 }, { "epoch": 18.959276018099548, "grad_norm": 2.5856406688690186, "learning_rate": 0.00014636866998900082, "loss": 4.2938, "step": 20950 }, { "epoch": 18.981900452488688, "grad_norm": 1.826500654220581, "learning_rate": 0.00014635044058291247, "loss": 4.2264, "step": 20975 }, { "epoch": 19.004524886877828, "grad_norm": 3.0457630157470703, "learning_rate": 0.000146332166676126, "loss": 4.1044, "step": 21000 }, { "epoch": 19.02714932126697, "grad_norm": 2.027559280395508, "learning_rate": 0.00014631384828003865, "loss": 4.0324, "step": 21025 }, { "epoch": 19.04977375565611, "grad_norm": 3.3464810848236084, "learning_rate": 0.0001462954854060755, "loss": 4.0011, "step": 21050 }, { "epoch": 19.07239819004525, "grad_norm": 2.8238072395324707, "learning_rate": 0.0001462770780656893, "loss": 4.0729, "step": 21075 }, { "epoch": 19.09502262443439, "grad_norm": 2.94176983833313, "learning_rate": 0.00014625862627036054, "loss": 3.9543, "step": 21100 }, { "epoch": 19.11764705882353, "grad_norm": 2.5876317024230957, "learning_rate": 0.00014624013003159753, "loss": 3.9983, "step": 21125 }, { "epoch": 19.14027149321267, "grad_norm": 2.7959675788879395, "learning_rate": 0.00014622158936093617, "loss": 3.9553, "step": 21150 }, { "epoch": 19.16289592760181, "grad_norm": 2.9518980979919434, "learning_rate": 0.00014620300426994014, "loss": 4.0165, "step": 21175 }, { "epoch": 19.18552036199095, "grad_norm": 2.9283668994903564, "learning_rate": 0.0001461843747702008, "loss": 3.9963, "step": 21200 }, { "epoch": 19.20814479638009, "grad_norm": 2.777820587158203, "learning_rate": 0.00014616570087333723, "loss": 4.0544, "step": 21225 }, { "epoch": 19.23076923076923, "grad_norm": 2.8670153617858887, "learning_rate": 0.0001461469825909962, "loss": 4.0468, "step": 21250 }, { "epoch": 19.25339366515837, "grad_norm": 2.9561901092529297, "learning_rate": 0.00014612821993485213, "loss": 4.0144, "step": 21275 }, { "epoch": 19.27601809954751, "grad_norm": 3.267868757247925, "learning_rate": 0.00014610941291660716, "loss": 3.9227, "step": 21300 }, { "epoch": 19.29864253393665, "grad_norm": 2.76511812210083, "learning_rate": 0.00014609056154799108, "loss": 4.0184, "step": 21325 }, { "epoch": 19.32126696832579, "grad_norm": 2.691312551498413, "learning_rate": 0.00014607166584076133, "loss": 4.0367, "step": 21350 }, { "epoch": 19.34389140271493, "grad_norm": 2.1221303939819336, "learning_rate": 0.00014605272580670296, "loss": 3.9295, "step": 21375 }, { "epoch": 19.36651583710407, "grad_norm": 2.8809330463409424, "learning_rate": 0.0001460337414576288, "loss": 4.0271, "step": 21400 }, { "epoch": 19.38914027149321, "grad_norm": 2.1942460536956787, "learning_rate": 0.0001460147128053792, "loss": 3.9666, "step": 21425 }, { "epoch": 19.41176470588235, "grad_norm": 1.810325026512146, "learning_rate": 0.0001459956398618222, "loss": 4.0222, "step": 21450 }, { "epoch": 19.43438914027149, "grad_norm": 3.1612627506256104, "learning_rate": 0.0001459765226388534, "loss": 4.1373, "step": 21475 }, { "epoch": 19.457013574660632, "grad_norm": 1.9534374475479126, "learning_rate": 0.00014595736114839607, "loss": 4.2342, "step": 21500 }, { "epoch": 19.479638009049772, "grad_norm": 2.4724044799804688, "learning_rate": 0.0001459381554024011, "loss": 4.0709, "step": 21525 }, { "epoch": 19.502262443438916, "grad_norm": 2.7774980068206787, "learning_rate": 0.00014591890541284695, "loss": 4.0093, "step": 21550 }, { "epoch": 19.524886877828056, "grad_norm": 3.0610437393188477, "learning_rate": 0.0001458996111917397, "loss": 4.0114, "step": 21575 }, { "epoch": 19.547511312217196, "grad_norm": 2.0043299198150635, "learning_rate": 0.00014588027275111293, "loss": 4.0228, "step": 21600 }, { "epoch": 19.570135746606336, "grad_norm": 2.2680184841156006, "learning_rate": 0.00014586089010302795, "loss": 4.0565, "step": 21625 }, { "epoch": 19.592760180995477, "grad_norm": 4.519238471984863, "learning_rate": 0.00014584146325957357, "loss": 3.9648, "step": 21650 }, { "epoch": 19.615384615384617, "grad_norm": 2.4883666038513184, "learning_rate": 0.0001458219922328661, "loss": 4.0511, "step": 21675 }, { "epoch": 19.638009049773757, "grad_norm": 2.097761392593384, "learning_rate": 0.00014580247703504948, "loss": 4.0443, "step": 21700 }, { "epoch": 19.660633484162897, "grad_norm": 2.8141157627105713, "learning_rate": 0.00014578291767829518, "loss": 4.0383, "step": 21725 }, { "epoch": 19.683257918552037, "grad_norm": 2.520421266555786, "learning_rate": 0.00014576331417480226, "loss": 3.8808, "step": 21750 }, { "epoch": 19.705882352941178, "grad_norm": 2.791774034500122, "learning_rate": 0.0001457436665367972, "loss": 4.0448, "step": 21775 }, { "epoch": 19.728506787330318, "grad_norm": 2.9688174724578857, "learning_rate": 0.0001457239747765341, "loss": 4.1494, "step": 21800 }, { "epoch": 19.751131221719458, "grad_norm": 2.548532009124756, "learning_rate": 0.00014570423890629457, "loss": 3.9962, "step": 21825 }, { "epoch": 19.773755656108598, "grad_norm": 2.1748194694519043, "learning_rate": 0.0001456844589383877, "loss": 4.1168, "step": 21850 }, { "epoch": 19.79638009049774, "grad_norm": 3.12788987159729, "learning_rate": 0.00014566463488515012, "loss": 4.037, "step": 21875 }, { "epoch": 19.81900452488688, "grad_norm": 3.020284414291382, "learning_rate": 0.0001456447667589459, "loss": 4.0868, "step": 21900 }, { "epoch": 19.84162895927602, "grad_norm": 2.217618942260742, "learning_rate": 0.00014562485457216663, "loss": 4.1297, "step": 21925 }, { "epoch": 19.86425339366516, "grad_norm": 2.423015594482422, "learning_rate": 0.0001456048983372314, "loss": 4.0776, "step": 21950 }, { "epoch": 19.8868778280543, "grad_norm": 2.9892938137054443, "learning_rate": 0.00014558489806658676, "loss": 3.9935, "step": 21975 }, { "epoch": 19.90950226244344, "grad_norm": 2.593134880065918, "learning_rate": 0.0001455648537727067, "loss": 4.1027, "step": 22000 }, { "epoch": 19.93212669683258, "grad_norm": 2.268334150314331, "learning_rate": 0.0001455447654680927, "loss": 4.008, "step": 22025 }, { "epoch": 19.95475113122172, "grad_norm": 2.3195865154266357, "learning_rate": 0.00014552463316527367, "loss": 4.1872, "step": 22050 }, { "epoch": 19.97737556561086, "grad_norm": 1.8791290521621704, "learning_rate": 0.00014550445687680597, "loss": 3.9837, "step": 22075 }, { "epoch": 20.0, "grad_norm": 2.1045377254486084, "learning_rate": 0.00014548423661527336, "loss": 4.0442, "step": 22100 }, { "epoch": 20.02262443438914, "grad_norm": 2.456329107284546, "learning_rate": 0.0001454639723932871, "loss": 3.7574, "step": 22125 }, { "epoch": 20.04524886877828, "grad_norm": 3.4917218685150146, "learning_rate": 0.0001454436642234858, "loss": 3.8406, "step": 22150 }, { "epoch": 20.06787330316742, "grad_norm": 3.367820978164673, "learning_rate": 0.0001454233121185355, "loss": 3.8001, "step": 22175 }, { "epoch": 20.09049773755656, "grad_norm": 2.7249231338500977, "learning_rate": 0.00014540291609112965, "loss": 3.8397, "step": 22200 }, { "epoch": 20.1131221719457, "grad_norm": 2.733926773071289, "learning_rate": 0.0001453824761539891, "loss": 3.7859, "step": 22225 }, { "epoch": 20.13574660633484, "grad_norm": 3.5410749912261963, "learning_rate": 0.00014536199231986204, "loss": 3.924, "step": 22250 }, { "epoch": 20.15837104072398, "grad_norm": 2.766115188598633, "learning_rate": 0.00014534146460152409, "loss": 3.8949, "step": 22275 }, { "epoch": 20.18099547511312, "grad_norm": 3.4365551471710205, "learning_rate": 0.00014532089301177826, "loss": 3.8416, "step": 22300 }, { "epoch": 20.20361990950226, "grad_norm": 2.3583567142486572, "learning_rate": 0.00014530027756345487, "loss": 3.8667, "step": 22325 }, { "epoch": 20.226244343891402, "grad_norm": 2.6554083824157715, "learning_rate": 0.00014527961826941155, "loss": 3.8911, "step": 22350 }, { "epoch": 20.248868778280542, "grad_norm": 2.0309135913848877, "learning_rate": 0.0001452589151425334, "loss": 3.8475, "step": 22375 }, { "epoch": 20.271493212669682, "grad_norm": 2.347342014312744, "learning_rate": 0.00014523816819573277, "loss": 3.953, "step": 22400 }, { "epoch": 20.294117647058822, "grad_norm": 2.267829656600952, "learning_rate": 0.0001452173774419494, "loss": 3.7009, "step": 22425 }, { "epoch": 20.316742081447963, "grad_norm": 2.2686140537261963, "learning_rate": 0.00014519654289415026, "loss": 3.9219, "step": 22450 }, { "epoch": 20.339366515837103, "grad_norm": 3.073906898498535, "learning_rate": 0.0001451756645653297, "loss": 3.8887, "step": 22475 }, { "epoch": 20.361990950226243, "grad_norm": 2.2049973011016846, "learning_rate": 0.00014515474246850943, "loss": 3.8091, "step": 22500 }, { "epoch": 20.384615384615383, "grad_norm": 2.884639263153076, "learning_rate": 0.00014513377661673832, "loss": 3.9657, "step": 22525 }, { "epoch": 20.407239819004523, "grad_norm": 2.205660104751587, "learning_rate": 0.00014511276702309264, "loss": 3.9964, "step": 22550 }, { "epoch": 20.429864253393664, "grad_norm": 2.565671682357788, "learning_rate": 0.0001450917137006759, "loss": 3.9969, "step": 22575 }, { "epoch": 20.452488687782804, "grad_norm": 2.937319278717041, "learning_rate": 0.0001450706166626189, "loss": 3.901, "step": 22600 }, { "epoch": 20.475113122171944, "grad_norm": 2.7020833492279053, "learning_rate": 0.00014504947592207965, "loss": 3.8461, "step": 22625 }, { "epoch": 20.497737556561084, "grad_norm": 2.310760259628296, "learning_rate": 0.0001450282914922435, "loss": 3.8047, "step": 22650 }, { "epoch": 20.520361990950228, "grad_norm": 2.482426881790161, "learning_rate": 0.00014500706338632302, "loss": 3.9389, "step": 22675 }, { "epoch": 20.542986425339368, "grad_norm": 3.0349998474121094, "learning_rate": 0.00014498664332646884, "loss": 3.8287, "step": 22700 }, { "epoch": 20.56561085972851, "grad_norm": 3.132357597351074, "learning_rate": 0.0001449653296538543, "loss": 4.0393, "step": 22725 }, { "epoch": 20.58823529411765, "grad_norm": 2.248824119567871, "learning_rate": 0.0001449439723444242, "loss": 3.952, "step": 22750 }, { "epoch": 20.61085972850679, "grad_norm": 2.4726078510284424, "learning_rate": 0.00014492257141149895, "loss": 4.0212, "step": 22775 }, { "epoch": 20.63348416289593, "grad_norm": 3.7489490509033203, "learning_rate": 0.0001449011268684261, "loss": 4.0147, "step": 22800 }, { "epoch": 20.65610859728507, "grad_norm": 3.4584591388702393, "learning_rate": 0.00014487963872858046, "loss": 4.0384, "step": 22825 }, { "epoch": 20.67873303167421, "grad_norm": 3.992187023162842, "learning_rate": 0.000144858107005364, "loss": 3.8956, "step": 22850 }, { "epoch": 20.70135746606335, "grad_norm": 2.215635061264038, "learning_rate": 0.0001448365317122059, "loss": 3.8018, "step": 22875 }, { "epoch": 20.72398190045249, "grad_norm": 2.9964985847473145, "learning_rate": 0.00014481491286256248, "loss": 3.8225, "step": 22900 }, { "epoch": 20.74660633484163, "grad_norm": 4.619599342346191, "learning_rate": 0.00014479325046991726, "loss": 3.9947, "step": 22925 }, { "epoch": 20.76923076923077, "grad_norm": 2.641470432281494, "learning_rate": 0.00014477154454778086, "loss": 4.0523, "step": 22950 }, { "epoch": 20.79185520361991, "grad_norm": 3.320308208465576, "learning_rate": 0.00014474979510969117, "loss": 3.8796, "step": 22975 }, { "epoch": 20.81447963800905, "grad_norm": 3.0288734436035156, "learning_rate": 0.00014472800216921305, "loss": 3.8345, "step": 23000 }, { "epoch": 20.83710407239819, "grad_norm": 2.2802088260650635, "learning_rate": 0.00014470616573993865, "loss": 3.8913, "step": 23025 }, { "epoch": 20.85972850678733, "grad_norm": 2.45820689201355, "learning_rate": 0.00014468428583548716, "loss": 3.9711, "step": 23050 }, { "epoch": 20.88235294117647, "grad_norm": 1.9973255395889282, "learning_rate": 0.00014466236246950487, "loss": 3.9193, "step": 23075 }, { "epoch": 20.90497737556561, "grad_norm": 2.4245059490203857, "learning_rate": 0.0001446403956556652, "loss": 3.9111, "step": 23100 }, { "epoch": 20.92760180995475, "grad_norm": 4.0652546882629395, "learning_rate": 0.00014461838540766875, "loss": 4.0191, "step": 23125 }, { "epoch": 20.95022624434389, "grad_norm": 2.8320844173431396, "learning_rate": 0.0001445963317392431, "loss": 3.9825, "step": 23150 }, { "epoch": 20.97285067873303, "grad_norm": 2.5035881996154785, "learning_rate": 0.0001445742346641429, "loss": 3.9313, "step": 23175 }, { "epoch": 20.995475113122172, "grad_norm": 2.2385265827178955, "learning_rate": 0.00014455209419614998, "loss": 3.9076, "step": 23200 }, { "epoch": 21.018099547511312, "grad_norm": 3.920473098754883, "learning_rate": 0.00014452991034907313, "loss": 3.7294, "step": 23225 }, { "epoch": 21.040723981900452, "grad_norm": 3.026109218597412, "learning_rate": 0.00014450768313674825, "loss": 3.5229, "step": 23250 }, { "epoch": 21.063348416289593, "grad_norm": 3.1287951469421387, "learning_rate": 0.00014448541257303828, "loss": 3.6663, "step": 23275 }, { "epoch": 21.085972850678733, "grad_norm": 3.583796977996826, "learning_rate": 0.0001444630986718332, "loss": 3.6865, "step": 23300 }, { "epoch": 21.108597285067873, "grad_norm": 2.2165699005126953, "learning_rate": 0.00014444074144705002, "loss": 3.6303, "step": 23325 }, { "epoch": 21.131221719457013, "grad_norm": 3.276175022125244, "learning_rate": 0.00014441834091263276, "loss": 3.7655, "step": 23350 }, { "epoch": 21.153846153846153, "grad_norm": 2.832486152648926, "learning_rate": 0.0001443958970825524, "loss": 3.7292, "step": 23375 }, { "epoch": 21.176470588235293, "grad_norm": 2.4505176544189453, "learning_rate": 0.00014437340997080703, "loss": 3.6258, "step": 23400 }, { "epoch": 21.199095022624434, "grad_norm": 2.6741573810577393, "learning_rate": 0.00014435087959142166, "loss": 3.8267, "step": 23425 }, { "epoch": 21.221719457013574, "grad_norm": 2.6699492931365967, "learning_rate": 0.00014432830595844832, "loss": 3.7696, "step": 23450 }, { "epoch": 21.244343891402714, "grad_norm": 2.3976047039031982, "learning_rate": 0.000144305689085966, "loss": 3.67, "step": 23475 }, { "epoch": 21.266968325791854, "grad_norm": 2.6721272468566895, "learning_rate": 0.00014428302898808067, "loss": 3.798, "step": 23500 }, { "epoch": 21.289592760180994, "grad_norm": 3.6001930236816406, "learning_rate": 0.0001442603256789252, "loss": 3.6711, "step": 23525 }, { "epoch": 21.312217194570135, "grad_norm": 3.659670114517212, "learning_rate": 0.00014423757917265956, "loss": 3.6844, "step": 23550 }, { "epoch": 21.334841628959275, "grad_norm": 3.1840333938598633, "learning_rate": 0.00014421478948347047, "loss": 3.8196, "step": 23575 }, { "epoch": 21.357466063348415, "grad_norm": 2.3045458793640137, "learning_rate": 0.00014419195662557173, "loss": 3.7621, "step": 23600 }, { "epoch": 21.380090497737555, "grad_norm": 2.785276412963867, "learning_rate": 0.000144169080613204, "loss": 3.8661, "step": 23625 }, { "epoch": 21.402714932126695, "grad_norm": 3.0140879154205322, "learning_rate": 0.00014414616146063485, "loss": 3.8701, "step": 23650 }, { "epoch": 21.425339366515836, "grad_norm": 2.6863458156585693, "learning_rate": 0.0001441231991821588, "loss": 3.7959, "step": 23675 }, { "epoch": 21.447963800904976, "grad_norm": 2.7733335494995117, "learning_rate": 0.0001441001937920972, "loss": 3.9786, "step": 23700 }, { "epoch": 21.470588235294116, "grad_norm": 3.5744516849517822, "learning_rate": 0.00014407714530479835, "loss": 3.5387, "step": 23725 }, { "epoch": 21.49321266968326, "grad_norm": 2.776697874069214, "learning_rate": 0.0001440540537346374, "loss": 3.7142, "step": 23750 }, { "epoch": 21.5158371040724, "grad_norm": 3.0770251750946045, "learning_rate": 0.0001440309190960164, "loss": 3.8133, "step": 23775 }, { "epoch": 21.53846153846154, "grad_norm": 2.604779005050659, "learning_rate": 0.00014400774140336422, "loss": 3.7582, "step": 23800 }, { "epoch": 21.56108597285068, "grad_norm": 2.170243263244629, "learning_rate": 0.0001439845206711366, "loss": 3.9327, "step": 23825 }, { "epoch": 21.58371040723982, "grad_norm": 3.603564500808716, "learning_rate": 0.00014396125691381613, "loss": 3.7161, "step": 23850 }, { "epoch": 21.60633484162896, "grad_norm": 2.814188241958618, "learning_rate": 0.0001439379501459122, "loss": 3.8073, "step": 23875 }, { "epoch": 21.6289592760181, "grad_norm": 2.9708809852600098, "learning_rate": 0.00014391460038196114, "loss": 3.7837, "step": 23900 }, { "epoch": 21.65158371040724, "grad_norm": 2.4882922172546387, "learning_rate": 0.00014389120763652592, "loss": 3.7579, "step": 23925 }, { "epoch": 21.67420814479638, "grad_norm": 3.3712284564971924, "learning_rate": 0.00014386777192419643, "loss": 3.6708, "step": 23950 }, { "epoch": 21.69683257918552, "grad_norm": 3.6276750564575195, "learning_rate": 0.00014384429325958937, "loss": 3.7829, "step": 23975 }, { "epoch": 21.71945701357466, "grad_norm": 2.4860033988952637, "learning_rate": 0.00014382077165734814, "loss": 3.8101, "step": 24000 }, { "epoch": 21.742081447963802, "grad_norm": 2.5607101917266846, "learning_rate": 0.000143797207132143, "loss": 3.6407, "step": 24025 }, { "epoch": 21.764705882352942, "grad_norm": 3.5957260131835938, "learning_rate": 0.00014377359969867102, "loss": 3.8883, "step": 24050 }, { "epoch": 21.787330316742082, "grad_norm": 2.6889231204986572, "learning_rate": 0.00014374994937165587, "loss": 3.8151, "step": 24075 }, { "epoch": 21.809954751131222, "grad_norm": 2.925708532333374, "learning_rate": 0.0001437262561658481, "loss": 3.8295, "step": 24100 }, { "epoch": 21.832579185520363, "grad_norm": 2.946596145629883, "learning_rate": 0.000143702520096025, "loss": 3.8385, "step": 24125 }, { "epoch": 21.855203619909503, "grad_norm": 3.8493125438690186, "learning_rate": 0.00014367874117699053, "loss": 3.8796, "step": 24150 }, { "epoch": 21.877828054298643, "grad_norm": 2.1728506088256836, "learning_rate": 0.00014365491942357545, "loss": 3.9122, "step": 24175 }, { "epoch": 21.900452488687783, "grad_norm": 2.742302656173706, "learning_rate": 0.00014363105485063716, "loss": 3.8483, "step": 24200 }, { "epoch": 21.923076923076923, "grad_norm": 4.172833442687988, "learning_rate": 0.00014360714747305983, "loss": 3.7386, "step": 24225 }, { "epoch": 21.945701357466064, "grad_norm": 2.3973381519317627, "learning_rate": 0.00014358319730575428, "loss": 3.7992, "step": 24250 }, { "epoch": 21.968325791855204, "grad_norm": 4.66103458404541, "learning_rate": 0.00014355920436365802, "loss": 3.8109, "step": 24275 }, { "epoch": 21.990950226244344, "grad_norm": 2.4127964973449707, "learning_rate": 0.00014353516866173532, "loss": 3.7642, "step": 24300 }, { "epoch": 22.013574660633484, "grad_norm": 3.1286137104034424, "learning_rate": 0.000143511090214977, "loss": 3.5502, "step": 24325 }, { "epoch": 22.036199095022624, "grad_norm": 2.3641533851623535, "learning_rate": 0.00014348696903840062, "loss": 3.5343, "step": 24350 }, { "epoch": 22.058823529411764, "grad_norm": 3.4149348735809326, "learning_rate": 0.00014346280514705034, "loss": 3.4885, "step": 24375 }, { "epoch": 22.081447963800905, "grad_norm": 4.007994651794434, "learning_rate": 0.000143438598555997, "loss": 3.5516, "step": 24400 }, { "epoch": 22.104072398190045, "grad_norm": 3.8492393493652344, "learning_rate": 0.00014341434928033807, "loss": 3.553, "step": 24425 }, { "epoch": 22.126696832579185, "grad_norm": 4.333260536193848, "learning_rate": 0.00014339005733519762, "loss": 3.4589, "step": 24450 }, { "epoch": 22.149321266968325, "grad_norm": 3.36545729637146, "learning_rate": 0.0001433657227357263, "loss": 3.6346, "step": 24475 }, { "epoch": 22.171945701357465, "grad_norm": 3.1914398670196533, "learning_rate": 0.00014334134549710148, "loss": 3.5903, "step": 24500 }, { "epoch": 22.194570135746606, "grad_norm": 2.467092514038086, "learning_rate": 0.00014331692563452703, "loss": 3.6353, "step": 24525 }, { "epoch": 22.217194570135746, "grad_norm": 4.812871932983398, "learning_rate": 0.00014329246316323338, "loss": 3.412, "step": 24550 }, { "epoch": 22.239819004524886, "grad_norm": 3.4200334548950195, "learning_rate": 0.00014326795809847757, "loss": 3.5378, "step": 24575 }, { "epoch": 22.262443438914026, "grad_norm": 4.153021812438965, "learning_rate": 0.0001432434104555433, "loss": 3.5938, "step": 24600 }, { "epoch": 22.285067873303166, "grad_norm": 3.033048629760742, "learning_rate": 0.00014321882024974063, "loss": 3.8194, "step": 24625 }, { "epoch": 22.307692307692307, "grad_norm": 2.6659903526306152, "learning_rate": 0.00014319418749640637, "loss": 3.3993, "step": 24650 }, { "epoch": 22.330316742081447, "grad_norm": 3.0235040187835693, "learning_rate": 0.0001431695122109037, "loss": 3.5442, "step": 24675 }, { "epoch": 22.352941176470587, "grad_norm": 2.6311628818511963, "learning_rate": 0.00014314479440862243, "loss": 3.7309, "step": 24700 }, { "epoch": 22.375565610859727, "grad_norm": 2.480937957763672, "learning_rate": 0.00014312003410497885, "loss": 3.5935, "step": 24725 }, { "epoch": 22.398190045248867, "grad_norm": 2.8534250259399414, "learning_rate": 0.00014309523131541575, "loss": 3.7793, "step": 24750 }, { "epoch": 22.420814479638008, "grad_norm": 3.454343557357788, "learning_rate": 0.00014307038605540246, "loss": 3.6217, "step": 24775 }, { "epoch": 22.443438914027148, "grad_norm": 2.928800106048584, "learning_rate": 0.00014304549834043476, "loss": 3.5282, "step": 24800 }, { "epoch": 22.466063348416288, "grad_norm": 2.2259414196014404, "learning_rate": 0.00014302056818603492, "loss": 3.6573, "step": 24825 }, { "epoch": 22.488687782805428, "grad_norm": 2.492011547088623, "learning_rate": 0.00014299559560775163, "loss": 3.6935, "step": 24850 }, { "epoch": 22.511312217194572, "grad_norm": 3.9176621437072754, "learning_rate": 0.00014297058062116014, "loss": 3.6242, "step": 24875 }, { "epoch": 22.533936651583712, "grad_norm": 3.987274646759033, "learning_rate": 0.0001429455232418621, "loss": 3.6212, "step": 24900 }, { "epoch": 22.556561085972852, "grad_norm": 2.9213876724243164, "learning_rate": 0.00014292042348548558, "loss": 3.7327, "step": 24925 }, { "epoch": 22.579185520361992, "grad_norm": 3.0751595497131348, "learning_rate": 0.0001428952813676851, "loss": 3.6155, "step": 24950 }, { "epoch": 22.601809954751133, "grad_norm": 4.081638813018799, "learning_rate": 0.00014287009690414158, "loss": 3.6378, "step": 24975 }, { "epoch": 22.624434389140273, "grad_norm": 3.4721250534057617, "learning_rate": 0.0001428448701105624, "loss": 3.7146, "step": 25000 }, { "epoch": 22.647058823529413, "grad_norm": 3.622154712677002, "learning_rate": 0.00014281960100268127, "loss": 3.7822, "step": 25025 }, { "epoch": 22.669683257918553, "grad_norm": 2.6639842987060547, "learning_rate": 0.0001427942895962584, "loss": 3.6893, "step": 25050 }, { "epoch": 22.692307692307693, "grad_norm": 2.982954740524292, "learning_rate": 0.0001427689359070802, "loss": 3.6211, "step": 25075 }, { "epoch": 22.714932126696834, "grad_norm": 3.1290342807769775, "learning_rate": 0.00014274353995095965, "loss": 3.7861, "step": 25100 }, { "epoch": 22.737556561085974, "grad_norm": 3.3933656215667725, "learning_rate": 0.00014271810174373598, "loss": 3.7582, "step": 25125 }, { "epoch": 22.760180995475114, "grad_norm": 2.868032932281494, "learning_rate": 0.00014269262130127481, "loss": 3.7166, "step": 25150 }, { "epoch": 22.782805429864254, "grad_norm": 2.7672760486602783, "learning_rate": 0.00014266709863946806, "loss": 3.6105, "step": 25175 }, { "epoch": 22.805429864253394, "grad_norm": 2.7547879219055176, "learning_rate": 0.00014264153377423403, "loss": 3.7227, "step": 25200 }, { "epoch": 22.828054298642535, "grad_norm": 3.555361032485962, "learning_rate": 0.00014261592672151733, "loss": 3.8386, "step": 25225 }, { "epoch": 22.850678733031675, "grad_norm": 2.814558982849121, "learning_rate": 0.00014259027749728885, "loss": 3.8839, "step": 25250 }, { "epoch": 22.873303167420815, "grad_norm": 2.9268410205841064, "learning_rate": 0.0001425645861175458, "loss": 3.7736, "step": 25275 }, { "epoch": 22.895927601809955, "grad_norm": 3.4612526893615723, "learning_rate": 0.00014253885259831172, "loss": 3.6202, "step": 25300 }, { "epoch": 22.918552036199095, "grad_norm": 3.606201171875, "learning_rate": 0.00014251307695563637, "loss": 3.6119, "step": 25325 }, { "epoch": 22.941176470588236, "grad_norm": 3.235017776489258, "learning_rate": 0.00014248725920559582, "loss": 3.664, "step": 25350 }, { "epoch": 22.963800904977376, "grad_norm": 3.830575942993164, "learning_rate": 0.0001424613993642924, "loss": 3.673, "step": 25375 }, { "epoch": 22.986425339366516, "grad_norm": 3.381676435470581, "learning_rate": 0.0001424354974478547, "loss": 3.6577, "step": 25400 }, { "epoch": 23.009049773755656, "grad_norm": 2.7601847648620605, "learning_rate": 0.0001424095534724375, "loss": 3.4432, "step": 25425 }, { "epoch": 23.031674208144796, "grad_norm": 2.761472225189209, "learning_rate": 0.0001423835674542219, "loss": 3.4836, "step": 25450 }, { "epoch": 23.054298642533936, "grad_norm": 3.1776866912841797, "learning_rate": 0.00014235753940941513, "loss": 3.273, "step": 25475 }, { "epoch": 23.076923076923077, "grad_norm": 2.9398069381713867, "learning_rate": 0.00014233146935425066, "loss": 3.4708, "step": 25500 }, { "epoch": 23.099547511312217, "grad_norm": 2.938962459564209, "learning_rate": 0.00014230535730498824, "loss": 3.5316, "step": 25525 }, { "epoch": 23.122171945701357, "grad_norm": 2.923698663711548, "learning_rate": 0.0001422792032779137, "loss": 3.4431, "step": 25550 }, { "epoch": 23.144796380090497, "grad_norm": 4.093198299407959, "learning_rate": 0.0001422530072893391, "loss": 3.4515, "step": 25575 }, { "epoch": 23.167420814479637, "grad_norm": 3.009486436843872, "learning_rate": 0.00014222676935560265, "loss": 3.4196, "step": 25600 }, { "epoch": 23.190045248868778, "grad_norm": 3.3584773540496826, "learning_rate": 0.0001422004894930688, "loss": 3.4173, "step": 25625 }, { "epoch": 23.212669683257918, "grad_norm": 3.2386748790740967, "learning_rate": 0.000142174167718128, "loss": 3.3641, "step": 25650 }, { "epoch": 23.235294117647058, "grad_norm": 3.175867795944214, "learning_rate": 0.000142147804047197, "loss": 3.5917, "step": 25675 }, { "epoch": 23.257918552036198, "grad_norm": 2.7285513877868652, "learning_rate": 0.00014212139849671863, "loss": 3.3328, "step": 25700 }, { "epoch": 23.28054298642534, "grad_norm": 2.6384241580963135, "learning_rate": 0.00014209495108316174, "loss": 3.4482, "step": 25725 }, { "epoch": 23.30316742081448, "grad_norm": 4.104657173156738, "learning_rate": 0.00014206846182302142, "loss": 3.3214, "step": 25750 }, { "epoch": 23.32579185520362, "grad_norm": 3.784193277359009, "learning_rate": 0.00014204193073281878, "loss": 3.3955, "step": 25775 }, { "epoch": 23.34841628959276, "grad_norm": 2.452960252761841, "learning_rate": 0.0001420153578291011, "loss": 3.5232, "step": 25800 }, { "epoch": 23.3710407239819, "grad_norm": 3.177434206008911, "learning_rate": 0.00014198874312844163, "loss": 3.5626, "step": 25825 }, { "epoch": 23.39366515837104, "grad_norm": 2.790126323699951, "learning_rate": 0.0001419620866474398, "loss": 3.5347, "step": 25850 }, { "epoch": 23.41628959276018, "grad_norm": 4.466036319732666, "learning_rate": 0.000141935388402721, "loss": 3.5762, "step": 25875 }, { "epoch": 23.43891402714932, "grad_norm": 3.542550563812256, "learning_rate": 0.00014190864841093673, "loss": 3.6707, "step": 25900 }, { "epoch": 23.46153846153846, "grad_norm": 3.3853087425231934, "learning_rate": 0.0001418818666887645, "loss": 3.5571, "step": 25925 }, { "epoch": 23.4841628959276, "grad_norm": 3.19075345993042, "learning_rate": 0.00014185504325290788, "loss": 3.583, "step": 25950 }, { "epoch": 23.50678733031674, "grad_norm": 2.814525604248047, "learning_rate": 0.00014182817812009637, "loss": 3.6345, "step": 25975 }, { "epoch": 23.529411764705884, "grad_norm": 2.687741756439209, "learning_rate": 0.00014180127130708562, "loss": 3.3791, "step": 26000 }, { "epoch": 23.552036199095024, "grad_norm": 2.655111074447632, "learning_rate": 0.00014177432283065712, "loss": 3.5476, "step": 26025 }, { "epoch": 23.574660633484164, "grad_norm": 3.3815510272979736, "learning_rate": 0.0001417473327076185, "loss": 3.5151, "step": 26050 }, { "epoch": 23.597285067873305, "grad_norm": 3.5296308994293213, "learning_rate": 0.00014172030095480322, "loss": 3.3458, "step": 26075 }, { "epoch": 23.619909502262445, "grad_norm": 3.340770959854126, "learning_rate": 0.00014169322758907077, "loss": 3.7039, "step": 26100 }, { "epoch": 23.642533936651585, "grad_norm": 4.046700477600098, "learning_rate": 0.0001416661126273066, "loss": 3.3456, "step": 26125 }, { "epoch": 23.665158371040725, "grad_norm": 4.637563705444336, "learning_rate": 0.00014163895608642214, "loss": 3.6017, "step": 26150 }, { "epoch": 23.687782805429865, "grad_norm": 4.189379692077637, "learning_rate": 0.0001416117579833546, "loss": 3.4353, "step": 26175 }, { "epoch": 23.710407239819006, "grad_norm": 2.8796141147613525, "learning_rate": 0.00014158451833506735, "loss": 3.6313, "step": 26200 }, { "epoch": 23.733031674208146, "grad_norm": 2.5167758464813232, "learning_rate": 0.00014155723715854944, "loss": 3.5882, "step": 26225 }, { "epoch": 23.755656108597286, "grad_norm": 4.902872562408447, "learning_rate": 0.00014152991447081599, "loss": 3.5322, "step": 26250 }, { "epoch": 23.778280542986426, "grad_norm": 2.816702365875244, "learning_rate": 0.00014150255028890787, "loss": 3.5301, "step": 26275 }, { "epoch": 23.800904977375566, "grad_norm": 3.1594150066375732, "learning_rate": 0.00014147514462989195, "loss": 3.5826, "step": 26300 }, { "epoch": 23.823529411764707, "grad_norm": 2.405186414718628, "learning_rate": 0.00014144769751086095, "loss": 3.5372, "step": 26325 }, { "epoch": 23.846153846153847, "grad_norm": 2.4925551414489746, "learning_rate": 0.00014142020894893334, "loss": 3.5462, "step": 26350 }, { "epoch": 23.868778280542987, "grad_norm": 3.11789608001709, "learning_rate": 0.00014139267896125357, "loss": 3.6252, "step": 26375 }, { "epoch": 23.891402714932127, "grad_norm": 3.4235665798187256, "learning_rate": 0.00014136510756499184, "loss": 3.5679, "step": 26400 }, { "epoch": 23.914027149321267, "grad_norm": 3.0763099193573, "learning_rate": 0.00014133749477734424, "loss": 3.6435, "step": 26425 }, { "epoch": 23.936651583710407, "grad_norm": 3.3343071937561035, "learning_rate": 0.0001413098406155326, "loss": 3.6377, "step": 26450 }, { "epoch": 23.959276018099548, "grad_norm": 2.7103164196014404, "learning_rate": 0.00014128214509680467, "loss": 3.6505, "step": 26475 }, { "epoch": 23.981900452488688, "grad_norm": 3.13881778717041, "learning_rate": 0.00014125440823843386, "loss": 3.6165, "step": 26500 }, { "epoch": 24.004524886877828, "grad_norm": 3.2268309593200684, "learning_rate": 0.00014122663005771948, "loss": 3.4545, "step": 26525 }, { "epoch": 24.02714932126697, "grad_norm": 3.1312403678894043, "learning_rate": 0.0001411988105719865, "loss": 3.2974, "step": 26550 }, { "epoch": 24.04977375565611, "grad_norm": 3.918436288833618, "learning_rate": 0.00014117094979858573, "loss": 3.2575, "step": 26575 }, { "epoch": 24.07239819004525, "grad_norm": 4.358361721038818, "learning_rate": 0.00014114304775489375, "loss": 3.278, "step": 26600 }, { "epoch": 24.09502262443439, "grad_norm": 2.8544600009918213, "learning_rate": 0.0001411151044583128, "loss": 3.233, "step": 26625 }, { "epoch": 24.11764705882353, "grad_norm": 3.5348682403564453, "learning_rate": 0.00014108711992627087, "loss": 3.2931, "step": 26650 }, { "epoch": 24.14027149321267, "grad_norm": 3.0109381675720215, "learning_rate": 0.00014105909417622174, "loss": 3.3485, "step": 26675 }, { "epoch": 24.16289592760181, "grad_norm": 3.3919458389282227, "learning_rate": 0.00014103102722564485, "loss": 3.3735, "step": 26700 }, { "epoch": 24.18552036199095, "grad_norm": 3.133079767227173, "learning_rate": 0.00014100291909204527, "loss": 3.4457, "step": 26725 }, { "epoch": 24.20814479638009, "grad_norm": 3.33530855178833, "learning_rate": 0.0001409747697929539, "loss": 3.2646, "step": 26750 }, { "epoch": 24.23076923076923, "grad_norm": 3.767277717590332, "learning_rate": 0.0001409477077536281, "loss": 3.2765, "step": 26775 }, { "epoch": 24.25339366515837, "grad_norm": 3.2638959884643555, "learning_rate": 0.0001409194778211244, "loss": 3.4323, "step": 26800 }, { "epoch": 24.27601809954751, "grad_norm": 4.183350086212158, "learning_rate": 0.00014089120677517053, "loss": 3.399, "step": 26825 }, { "epoch": 24.29864253393665, "grad_norm": 3.9280920028686523, "learning_rate": 0.00014086289463339886, "loss": 3.37, "step": 26850 }, { "epoch": 24.32126696832579, "grad_norm": 4.650427341461182, "learning_rate": 0.00014083454141346753, "loss": 3.4239, "step": 26875 }, { "epoch": 24.34389140271493, "grad_norm": 4.651412487030029, "learning_rate": 0.00014080614713306015, "loss": 3.4509, "step": 26900 }, { "epoch": 24.36651583710407, "grad_norm": 2.813253402709961, "learning_rate": 0.00014077771180988604, "loss": 3.2197, "step": 26925 }, { "epoch": 24.38914027149321, "grad_norm": 2.85752272605896, "learning_rate": 0.00014075037530306622, "loss": 3.4156, "step": 26950 }, { "epoch": 24.41176470588235, "grad_norm": 3.765904664993286, "learning_rate": 0.0001407218595875384, "loss": 3.4427, "step": 26975 }, { "epoch": 24.43438914027149, "grad_norm": 3.2647831439971924, "learning_rate": 0.0001406933028818133, "loss": 3.3161, "step": 27000 }, { "epoch": 24.457013574660632, "grad_norm": 3.5723791122436523, "learning_rate": 0.0001406647052037015, "loss": 3.3568, "step": 27025 }, { "epoch": 24.479638009049772, "grad_norm": 3.9250237941741943, "learning_rate": 0.00014063606657103918, "loss": 3.3633, "step": 27050 }, { "epoch": 24.502262443438916, "grad_norm": 3.2530288696289062, "learning_rate": 0.000140607387001688, "loss": 3.1968, "step": 27075 }, { "epoch": 24.524886877828056, "grad_norm": 3.3201465606689453, "learning_rate": 0.00014057866651353518, "loss": 3.4533, "step": 27100 }, { "epoch": 24.547511312217196, "grad_norm": 3.3233988285064697, "learning_rate": 0.0001405499051244935, "loss": 3.434, "step": 27125 }, { "epoch": 24.570135746606336, "grad_norm": 4.571245193481445, "learning_rate": 0.0001405211028525011, "loss": 3.3871, "step": 27150 }, { "epoch": 24.592760180995477, "grad_norm": 3.496021032333374, "learning_rate": 0.0001404922597155218, "loss": 3.2646, "step": 27175 }, { "epoch": 24.615384615384617, "grad_norm": 3.7779834270477295, "learning_rate": 0.00014046337573154485, "loss": 3.7001, "step": 27200 }, { "epoch": 24.638009049773757, "grad_norm": 4.0103936195373535, "learning_rate": 0.00014043445091858493, "loss": 3.3776, "step": 27225 }, { "epoch": 24.660633484162897, "grad_norm": 3.5195281505584717, "learning_rate": 0.00014040548529468223, "loss": 3.4887, "step": 27250 }, { "epoch": 24.683257918552037, "grad_norm": 4.090165615081787, "learning_rate": 0.00014037647887790237, "loss": 3.4716, "step": 27275 }, { "epoch": 24.705882352941178, "grad_norm": 3.485745668411255, "learning_rate": 0.00014034743168633637, "loss": 3.509, "step": 27300 }, { "epoch": 24.728506787330318, "grad_norm": 2.8224310874938965, "learning_rate": 0.00014031834373810082, "loss": 3.4638, "step": 27325 }, { "epoch": 24.751131221719458, "grad_norm": 3.4466655254364014, "learning_rate": 0.0001402892150513376, "loss": 3.2815, "step": 27350 }, { "epoch": 24.773755656108598, "grad_norm": 3.448620319366455, "learning_rate": 0.000140260045644214, "loss": 3.4555, "step": 27375 }, { "epoch": 24.79638009049774, "grad_norm": 2.5938901901245117, "learning_rate": 0.00014023083553492283, "loss": 3.3474, "step": 27400 }, { "epoch": 24.81900452488688, "grad_norm": 3.0207743644714355, "learning_rate": 0.00014020158474168214, "loss": 3.4351, "step": 27425 }, { "epoch": 24.84162895927602, "grad_norm": 3.6358518600463867, "learning_rate": 0.0001401722932827354, "loss": 3.5771, "step": 27450 }, { "epoch": 24.86425339366516, "grad_norm": 3.3951761722564697, "learning_rate": 0.00014014296117635154, "loss": 3.6193, "step": 27475 }, { "epoch": 24.8868778280543, "grad_norm": 4.6842827796936035, "learning_rate": 0.00014011358844082466, "loss": 3.4015, "step": 27500 }, { "epoch": 24.90950226244344, "grad_norm": 2.8488800525665283, "learning_rate": 0.00014008417509447438, "loss": 3.2968, "step": 27525 }, { "epoch": 24.93212669683258, "grad_norm": 3.6501495838165283, "learning_rate": 0.0001400547211556455, "loss": 3.4843, "step": 27550 }, { "epoch": 24.95475113122172, "grad_norm": 2.7535831928253174, "learning_rate": 0.0001400252266427083, "loss": 3.2811, "step": 27575 }, { "epoch": 24.97737556561086, "grad_norm": 3.883152484893799, "learning_rate": 0.00013999569157405816, "loss": 3.5346, "step": 27600 }, { "epoch": 25.0, "grad_norm": 3.7707326412200928, "learning_rate": 0.0001399661159681159, "loss": 3.4849, "step": 27625 }, { "epoch": 25.02262443438914, "grad_norm": 4.191146373748779, "learning_rate": 0.00013993649984332765, "loss": 3.0419, "step": 27650 }, { "epoch": 25.04524886877828, "grad_norm": 3.368551731109619, "learning_rate": 0.0001399068432181647, "loss": 3.4149, "step": 27675 }, { "epoch": 25.06787330316742, "grad_norm": 2.966998338699341, "learning_rate": 0.00013987714611112364, "loss": 3.0632, "step": 27700 }, { "epoch": 25.09049773755656, "grad_norm": 5.442501068115234, "learning_rate": 0.00013984740854072636, "loss": 3.2078, "step": 27725 }, { "epoch": 25.1131221719457, "grad_norm": 3.4973864555358887, "learning_rate": 0.00013981763052551988, "loss": 3.2176, "step": 27750 }, { "epoch": 25.13574660633484, "grad_norm": 3.5123682022094727, "learning_rate": 0.00013978781208407657, "loss": 3.1047, "step": 27775 }, { "epoch": 25.15837104072398, "grad_norm": 3.922881841659546, "learning_rate": 0.00013975795323499393, "loss": 3.2109, "step": 27800 }, { "epoch": 25.18099547511312, "grad_norm": 2.8657217025756836, "learning_rate": 0.0001397280539968947, "loss": 3.1468, "step": 27825 }, { "epoch": 25.20361990950226, "grad_norm": 3.662860631942749, "learning_rate": 0.00013969811438842677, "loss": 3.1534, "step": 27850 }, { "epoch": 25.226244343891402, "grad_norm": 3.196195125579834, "learning_rate": 0.00013966813442826324, "loss": 3.1925, "step": 27875 }, { "epoch": 25.248868778280542, "grad_norm": 4.097235202789307, "learning_rate": 0.00013963811413510236, "loss": 3.3288, "step": 27900 }, { "epoch": 25.271493212669682, "grad_norm": 2.881373643875122, "learning_rate": 0.0001396080535276676, "loss": 3.2905, "step": 27925 }, { "epoch": 25.294117647058822, "grad_norm": 4.8148193359375, "learning_rate": 0.00013957795262470744, "loss": 3.3332, "step": 27950 }, { "epoch": 25.316742081447963, "grad_norm": 2.971619129180908, "learning_rate": 0.00013954781144499565, "loss": 3.1742, "step": 27975 }, { "epoch": 25.339366515837103, "grad_norm": 3.4500572681427, "learning_rate": 0.00013951763000733097, "loss": 3.2529, "step": 28000 }, { "epoch": 25.361990950226243, "grad_norm": 3.8950579166412354, "learning_rate": 0.00013948740833053737, "loss": 3.3713, "step": 28025 }, { "epoch": 25.384615384615383, "grad_norm": 3.466339588165283, "learning_rate": 0.00013945714643346388, "loss": 3.2068, "step": 28050 }, { "epoch": 25.407239819004523, "grad_norm": 3.300884246826172, "learning_rate": 0.00013942684433498455, "loss": 3.313, "step": 28075 }, { "epoch": 25.429864253393664, "grad_norm": 3.316925287246704, "learning_rate": 0.0001393965020539986, "loss": 3.2274, "step": 28100 }, { "epoch": 25.452488687782804, "grad_norm": 3.2406859397888184, "learning_rate": 0.00013936611960943022, "loss": 3.2634, "step": 28125 }, { "epoch": 25.475113122171944, "grad_norm": 3.2880921363830566, "learning_rate": 0.00013933569702022876, "loss": 3.2326, "step": 28150 }, { "epoch": 25.497737556561084, "grad_norm": 5.280263423919678, "learning_rate": 0.0001393052343053685, "loss": 3.493, "step": 28175 }, { "epoch": 25.520361990950228, "grad_norm": 3.5389604568481445, "learning_rate": 0.00013927473148384883, "loss": 3.1836, "step": 28200 }, { "epoch": 25.542986425339368, "grad_norm": 3.564344882965088, "learning_rate": 0.00013924418857469406, "loss": 3.1464, "step": 28225 }, { "epoch": 25.56561085972851, "grad_norm": 3.950079917907715, "learning_rate": 0.00013921360559695362, "loss": 3.2156, "step": 28250 }, { "epoch": 25.58823529411765, "grad_norm": 3.957066297531128, "learning_rate": 0.0001391829825697018, "loss": 3.3102, "step": 28275 }, { "epoch": 25.61085972850679, "grad_norm": 3.5355470180511475, "learning_rate": 0.000139152319512038, "loss": 3.3057, "step": 28300 }, { "epoch": 25.63348416289593, "grad_norm": 3.0977582931518555, "learning_rate": 0.00013912161644308646, "loss": 3.2756, "step": 28325 }, { "epoch": 25.65610859728507, "grad_norm": 2.89414644241333, "learning_rate": 0.00013909087338199652, "loss": 3.276, "step": 28350 }, { "epoch": 25.67873303167421, "grad_norm": 3.588494062423706, "learning_rate": 0.00013906009034794228, "loss": 3.2859, "step": 28375 }, { "epoch": 25.70135746606335, "grad_norm": 2.6155803203582764, "learning_rate": 0.000139029267360123, "loss": 3.265, "step": 28400 }, { "epoch": 25.72398190045249, "grad_norm": 8.915180206298828, "learning_rate": 0.0001389984044377626, "loss": 3.31, "step": 28425 }, { "epoch": 25.74660633484163, "grad_norm": 4.498159885406494, "learning_rate": 0.0001389675016001101, "loss": 3.2415, "step": 28450 }, { "epoch": 25.76923076923077, "grad_norm": 3.761918544769287, "learning_rate": 0.00013893655886643939, "loss": 3.4283, "step": 28475 }, { "epoch": 25.79185520361991, "grad_norm": 3.198148488998413, "learning_rate": 0.0001389055762560491, "loss": 3.4218, "step": 28500 }, { "epoch": 25.81447963800905, "grad_norm": 5.093716621398926, "learning_rate": 0.00013887455378826293, "loss": 3.4517, "step": 28525 }, { "epoch": 25.83710407239819, "grad_norm": 2.8361928462982178, "learning_rate": 0.00013884349148242934, "loss": 3.4102, "step": 28550 }, { "epoch": 25.85972850678733, "grad_norm": 3.1751811504364014, "learning_rate": 0.00013881238935792157, "loss": 3.2427, "step": 28575 }, { "epoch": 25.88235294117647, "grad_norm": 3.8904829025268555, "learning_rate": 0.0001387812474341378, "loss": 3.3164, "step": 28600 }, { "epoch": 25.90497737556561, "grad_norm": 4.2356414794921875, "learning_rate": 0.00013875006573050105, "loss": 3.2934, "step": 28625 }, { "epoch": 25.92760180995475, "grad_norm": 5.149202823638916, "learning_rate": 0.00013871884426645904, "loss": 3.3569, "step": 28650 }, { "epoch": 25.95022624434389, "grad_norm": 2.6554596424102783, "learning_rate": 0.00013868758306148437, "loss": 3.2018, "step": 28675 }, { "epoch": 25.97285067873303, "grad_norm": 3.185791492462158, "learning_rate": 0.00013865628213507439, "loss": 3.4526, "step": 28700 }, { "epoch": 25.995475113122172, "grad_norm": 4.2164387702941895, "learning_rate": 0.00013862494150675126, "loss": 3.2728, "step": 28725 }, { "epoch": 26.018099547511312, "grad_norm": 4.304487228393555, "learning_rate": 0.00013859356119606185, "loss": 2.983, "step": 28750 }, { "epoch": 26.040723981900452, "grad_norm": 3.6233808994293213, "learning_rate": 0.0001385621412225778, "loss": 2.9708, "step": 28775 }, { "epoch": 26.063348416289593, "grad_norm": 2.99818754196167, "learning_rate": 0.00013853068160589555, "loss": 3.0539, "step": 28800 }, { "epoch": 26.085972850678733, "grad_norm": 3.181682586669922, "learning_rate": 0.00013849918236563617, "loss": 2.9993, "step": 28825 }, { "epoch": 26.108597285067873, "grad_norm": 3.563267946243286, "learning_rate": 0.00013846764352144547, "loss": 3.0493, "step": 28850 }, { "epoch": 26.131221719457013, "grad_norm": 3.5948915481567383, "learning_rate": 0.00013843606509299404, "loss": 3.084, "step": 28875 }, { "epoch": 26.153846153846153, "grad_norm": 3.071012020111084, "learning_rate": 0.000138404447099977, "loss": 3.0579, "step": 28900 }, { "epoch": 26.176470588235293, "grad_norm": 3.4085752964019775, "learning_rate": 0.0001383727895621143, "loss": 3.1644, "step": 28925 }, { "epoch": 26.199095022624434, "grad_norm": 5.598212718963623, "learning_rate": 0.0001383410924991505, "loss": 2.9763, "step": 28950 }, { "epoch": 26.221719457013574, "grad_norm": 4.041131496429443, "learning_rate": 0.00013830935593085478, "loss": 3.157, "step": 28975 }, { "epoch": 26.244343891402714, "grad_norm": 3.9623188972473145, "learning_rate": 0.00013827757987702098, "loss": 3.182, "step": 29000 }, { "epoch": 26.266968325791854, "grad_norm": 3.68810772895813, "learning_rate": 0.00013824576435746757, "loss": 3.1772, "step": 29025 }, { "epoch": 26.289592760180994, "grad_norm": 4.063492298126221, "learning_rate": 0.00013821390939203765, "loss": 3.0717, "step": 29050 }, { "epoch": 26.312217194570135, "grad_norm": 3.7294957637786865, "learning_rate": 0.00013818201500059892, "loss": 3.2203, "step": 29075 }, { "epoch": 26.334841628959275, "grad_norm": 3.2714200019836426, "learning_rate": 0.0001381500812030436, "loss": 3.2725, "step": 29100 }, { "epoch": 26.357466063348415, "grad_norm": 3.9746792316436768, "learning_rate": 0.00013811810801928862, "loss": 3.1863, "step": 29125 }, { "epoch": 26.380090497737555, "grad_norm": 5.214170455932617, "learning_rate": 0.00013808609546927533, "loss": 3.1069, "step": 29150 }, { "epoch": 26.402714932126695, "grad_norm": 3.6367127895355225, "learning_rate": 0.00013805404357296973, "loss": 3.2726, "step": 29175 }, { "epoch": 26.425339366515836, "grad_norm": 3.8441576957702637, "learning_rate": 0.00013802195235036236, "loss": 3.31, "step": 29200 }, { "epoch": 26.447963800904976, "grad_norm": 3.904715061187744, "learning_rate": 0.00013798982182146817, "loss": 3.4129, "step": 29225 }, { "epoch": 26.470588235294116, "grad_norm": 3.413214921951294, "learning_rate": 0.00013795765200632683, "loss": 2.9399, "step": 29250 }, { "epoch": 26.49321266968326, "grad_norm": 4.17348051071167, "learning_rate": 0.00013792544292500232, "loss": 3.1527, "step": 29275 }, { "epoch": 26.5158371040724, "grad_norm": 3.2856154441833496, "learning_rate": 0.00013789319459758318, "loss": 3.2105, "step": 29300 }, { "epoch": 26.53846153846154, "grad_norm": 3.4974355697631836, "learning_rate": 0.00013786090704418248, "loss": 3.0755, "step": 29325 }, { "epoch": 26.56108597285068, "grad_norm": 4.032309055328369, "learning_rate": 0.0001378285802849377, "loss": 3.2051, "step": 29350 }, { "epoch": 26.58371040723982, "grad_norm": 3.3807785511016846, "learning_rate": 0.00013779621434001075, "loss": 3.1604, "step": 29375 }, { "epoch": 26.60633484162896, "grad_norm": 3.105994701385498, "learning_rate": 0.00013776380922958802, "loss": 3.16, "step": 29400 }, { "epoch": 26.6289592760181, "grad_norm": 4.5413312911987305, "learning_rate": 0.00013773136497388034, "loss": 3.1033, "step": 29425 }, { "epoch": 26.65158371040724, "grad_norm": 3.6155831813812256, "learning_rate": 0.00013769888159312292, "loss": 3.3687, "step": 29450 }, { "epoch": 26.67420814479638, "grad_norm": 4.292983055114746, "learning_rate": 0.00013766635910757537, "loss": 3.0782, "step": 29475 }, { "epoch": 26.69683257918552, "grad_norm": 4.59736967086792, "learning_rate": 0.00013763379753752172, "loss": 3.045, "step": 29500 }, { "epoch": 26.71945701357466, "grad_norm": 4.385296821594238, "learning_rate": 0.00013760119690327035, "loss": 3.1458, "step": 29525 }, { "epoch": 26.742081447963802, "grad_norm": 3.282942295074463, "learning_rate": 0.000137568557225154, "loss": 3.1732, "step": 29550 }, { "epoch": 26.764705882352942, "grad_norm": 3.5215020179748535, "learning_rate": 0.00013753587852352985, "loss": 3.2069, "step": 29575 }, { "epoch": 26.787330316742082, "grad_norm": 3.795607328414917, "learning_rate": 0.00013750316081877925, "loss": 3.3613, "step": 29600 }, { "epoch": 26.809954751131222, "grad_norm": 2.8766205310821533, "learning_rate": 0.00013747040413130803, "loss": 3.3316, "step": 29625 }, { "epoch": 26.832579185520363, "grad_norm": 3.666149377822876, "learning_rate": 0.00013743760848154623, "loss": 3.1864, "step": 29650 }, { "epoch": 26.855203619909503, "grad_norm": 3.5978901386260986, "learning_rate": 0.00013740477388994826, "loss": 3.1937, "step": 29675 }, { "epoch": 26.877828054298643, "grad_norm": 3.9239766597747803, "learning_rate": 0.0001373719003769928, "loss": 3.1024, "step": 29700 }, { "epoch": 26.900452488687783, "grad_norm": 3.981957197189331, "learning_rate": 0.00013733898796318279, "loss": 3.1872, "step": 29725 }, { "epoch": 26.923076923076923, "grad_norm": 3.4527359008789062, "learning_rate": 0.00013730603666904542, "loss": 3.0253, "step": 29750 }, { "epoch": 26.945701357466064, "grad_norm": 4.188531398773193, "learning_rate": 0.0001372730465151322, "loss": 3.2988, "step": 29775 }, { "epoch": 26.968325791855204, "grad_norm": 5.179065227508545, "learning_rate": 0.0001372413394271976, "loss": 3.2894, "step": 29800 }, { "epoch": 26.990950226244344, "grad_norm": 3.946202039718628, "learning_rate": 0.00013720827316783207, "loss": 3.1759, "step": 29825 }, { "epoch": 27.013574660633484, "grad_norm": 4.342083930969238, "learning_rate": 0.00013717516810966498, "loss": 3.0488, "step": 29850 }, { "epoch": 27.036199095022624, "grad_norm": 4.0787177085876465, "learning_rate": 0.00013714202427334368, "loss": 2.9319, "step": 29875 }, { "epoch": 27.058823529411764, "grad_norm": 4.135145664215088, "learning_rate": 0.0001371088416795397, "loss": 2.759, "step": 29900 }, { "epoch": 27.081447963800905, "grad_norm": 5.538492202758789, "learning_rate": 0.00013707562034894876, "loss": 2.9119, "step": 29925 }, { "epoch": 27.104072398190045, "grad_norm": 2.9948039054870605, "learning_rate": 0.00013704236030229078, "loss": 3.0108, "step": 29950 }, { "epoch": 27.126696832579185, "grad_norm": 3.5534348487854004, "learning_rate": 0.0001370090615603097, "loss": 3.0078, "step": 29975 }, { "epoch": 27.149321266968325, "grad_norm": 3.3250679969787598, "learning_rate": 0.00013697572414377376, "loss": 3.0801, "step": 30000 }, { "epoch": 27.171945701357465, "grad_norm": 2.5745601654052734, "learning_rate": 0.0001369423480734752, "loss": 3.0052, "step": 30025 }, { "epoch": 27.194570135746606, "grad_norm": 3.4980592727661133, "learning_rate": 0.00013690893337023043, "loss": 2.9534, "step": 30050 }, { "epoch": 27.217194570135746, "grad_norm": 3.5090231895446777, "learning_rate": 0.0001368754800548799, "loss": 2.965, "step": 30075 }, { "epoch": 27.239819004524886, "grad_norm": 4.154080867767334, "learning_rate": 0.0001368419881482882, "loss": 2.7661, "step": 30100 }, { "epoch": 27.262443438914026, "grad_norm": 5.53240966796875, "learning_rate": 0.00013680845767134395, "loss": 2.7583, "step": 30125 }, { "epoch": 27.285067873303166, "grad_norm": 3.128809928894043, "learning_rate": 0.00013677488864495985, "loss": 2.9682, "step": 30150 }, { "epoch": 27.307692307692307, "grad_norm": 4.812078952789307, "learning_rate": 0.00013674128109007267, "loss": 2.7923, "step": 30175 }, { "epoch": 27.330316742081447, "grad_norm": 3.9619510173797607, "learning_rate": 0.0001367076350276431, "loss": 2.9877, "step": 30200 }, { "epoch": 27.352941176470587, "grad_norm": 4.209227085113525, "learning_rate": 0.00013667395047865593, "loss": 2.9891, "step": 30225 }, { "epoch": 27.375565610859727, "grad_norm": 3.904585838317871, "learning_rate": 0.00013664022746412, "loss": 3.0813, "step": 30250 }, { "epoch": 27.398190045248867, "grad_norm": 5.194969654083252, "learning_rate": 0.00013660646600506803, "loss": 3.1126, "step": 30275 }, { "epoch": 27.420814479638008, "grad_norm": 3.965092420578003, "learning_rate": 0.00013657266612255683, "loss": 3.1711, "step": 30300 }, { "epoch": 27.443438914027148, "grad_norm": 4.104466438293457, "learning_rate": 0.00013653882783766706, "loss": 3.003, "step": 30325 }, { "epoch": 27.466063348416288, "grad_norm": 4.142114639282227, "learning_rate": 0.00013650495117150337, "loss": 3.1556, "step": 30350 }, { "epoch": 27.488687782805428, "grad_norm": 3.6268327236175537, "learning_rate": 0.0001364710361451944, "loss": 2.8924, "step": 30375 }, { "epoch": 27.511312217194572, "grad_norm": 3.663881301879883, "learning_rate": 0.00013643708277989274, "loss": 3.0629, "step": 30400 }, { "epoch": 27.533936651583712, "grad_norm": 3.707071304321289, "learning_rate": 0.00013640309109677474, "loss": 3.0408, "step": 30425 }, { "epoch": 27.556561085972852, "grad_norm": 2.7248923778533936, "learning_rate": 0.00013636906111704077, "loss": 2.9734, "step": 30450 }, { "epoch": 27.579185520361992, "grad_norm": 3.750122547149658, "learning_rate": 0.00013633499286191505, "loss": 3.0136, "step": 30475 }, { "epoch": 27.601809954751133, "grad_norm": 4.197097301483154, "learning_rate": 0.0001363008863526457, "loss": 3.0078, "step": 30500 }, { "epoch": 27.624434389140273, "grad_norm": 4.05198860168457, "learning_rate": 0.00013626674161050465, "loss": 2.9035, "step": 30525 }, { "epoch": 27.647058823529413, "grad_norm": 4.189694404602051, "learning_rate": 0.0001362325586567877, "loss": 3.1849, "step": 30550 }, { "epoch": 27.669683257918553, "grad_norm": 3.242140531539917, "learning_rate": 0.00013619833751281454, "loss": 2.9062, "step": 30575 }, { "epoch": 27.692307692307693, "grad_norm": 3.5438835620880127, "learning_rate": 0.00013616407819992858, "loss": 3.0231, "step": 30600 }, { "epoch": 27.714932126696834, "grad_norm": 3.196305990219116, "learning_rate": 0.00013612978073949706, "loss": 3.0501, "step": 30625 }, { "epoch": 27.737556561085974, "grad_norm": 3.3109047412872314, "learning_rate": 0.00013609544515291106, "loss": 3.133, "step": 30650 }, { "epoch": 27.760180995475114, "grad_norm": 4.766122341156006, "learning_rate": 0.00013606107146158538, "loss": 3.187, "step": 30675 }, { "epoch": 27.782805429864254, "grad_norm": 4.751678943634033, "learning_rate": 0.00013602665968695865, "loss": 2.9891, "step": 30700 }, { "epoch": 27.805429864253394, "grad_norm": 3.198948621749878, "learning_rate": 0.00013599220985049322, "loss": 3.1548, "step": 30725 }, { "epoch": 27.828054298642535, "grad_norm": 4.021533012390137, "learning_rate": 0.00013595772197367515, "loss": 3.0128, "step": 30750 }, { "epoch": 27.850678733031675, "grad_norm": 3.177055597305298, "learning_rate": 0.0001359231960780143, "loss": 3.2086, "step": 30775 }, { "epoch": 27.873303167420815, "grad_norm": 3.0573225021362305, "learning_rate": 0.00013588863218504414, "loss": 3.1362, "step": 30800 }, { "epoch": 27.895927601809955, "grad_norm": 4.4769697189331055, "learning_rate": 0.00013585403031632189, "loss": 3.0807, "step": 30825 }, { "epoch": 27.918552036199095, "grad_norm": 4.18534517288208, "learning_rate": 0.0001358193904934285, "loss": 3.2425, "step": 30850 }, { "epoch": 27.941176470588236, "grad_norm": 3.6010515689849854, "learning_rate": 0.00013578471273796857, "loss": 3.3131, "step": 30875 }, { "epoch": 27.963800904977376, "grad_norm": 3.723889112472534, "learning_rate": 0.00013574999707157025, "loss": 3.0206, "step": 30900 }, { "epoch": 27.986425339366516, "grad_norm": 4.026259422302246, "learning_rate": 0.00013571524351588547, "loss": 3.19, "step": 30925 }, { "epoch": 28.009049773755656, "grad_norm": 3.8600564002990723, "learning_rate": 0.0001356804520925898, "loss": 3.0606, "step": 30950 }, { "epoch": 28.031674208144796, "grad_norm": 3.6338248252868652, "learning_rate": 0.00013564562282338227, "loss": 3.0325, "step": 30975 }, { "epoch": 28.054298642533936, "grad_norm": 4.088747501373291, "learning_rate": 0.00013561075572998568, "loss": 2.7951, "step": 31000 }, { "epoch": 28.076923076923077, "grad_norm": 4.479493618011475, "learning_rate": 0.00013557585083414636, "loss": 2.9833, "step": 31025 }, { "epoch": 28.099547511312217, "grad_norm": 4.0963664054870605, "learning_rate": 0.00013554090815763418, "loss": 2.9664, "step": 31050 }, { "epoch": 28.122171945701357, "grad_norm": 4.823516368865967, "learning_rate": 0.00013550592772224263, "loss": 2.8624, "step": 31075 }, { "epoch": 28.144796380090497, "grad_norm": 4.446453094482422, "learning_rate": 0.0001354709095497887, "loss": 2.9645, "step": 31100 }, { "epoch": 28.167420814479637, "grad_norm": 4.259945392608643, "learning_rate": 0.000135435853662113, "loss": 3.016, "step": 31125 }, { "epoch": 28.190045248868778, "grad_norm": 4.517056465148926, "learning_rate": 0.00013540076008107955, "loss": 3.0072, "step": 31150 }, { "epoch": 28.212669683257918, "grad_norm": 3.433553695678711, "learning_rate": 0.00013536562882857594, "loss": 2.9396, "step": 31175 }, { "epoch": 28.235294117647058, "grad_norm": 4.888617992401123, "learning_rate": 0.00013533045992651332, "loss": 2.7976, "step": 31200 }, { "epoch": 28.257918552036198, "grad_norm": 4.243067264556885, "learning_rate": 0.00013529525339682616, "loss": 2.9328, "step": 31225 }, { "epoch": 28.28054298642534, "grad_norm": 3.8665149211883545, "learning_rate": 0.00013526000926147253, "loss": 2.903, "step": 31250 }, { "epoch": 28.30316742081448, "grad_norm": 3.0539114475250244, "learning_rate": 0.0001352247275424339, "loss": 2.6476, "step": 31275 }, { "epoch": 28.32579185520362, "grad_norm": 3.3414466381073, "learning_rate": 0.00013518940826171526, "loss": 2.7779, "step": 31300 }, { "epoch": 28.34841628959276, "grad_norm": 3.770616054534912, "learning_rate": 0.00013515405144134488, "loss": 2.9363, "step": 31325 }, { "epoch": 28.3710407239819, "grad_norm": 3.9259214401245117, "learning_rate": 0.00013511865710337455, "loss": 2.7611, "step": 31350 }, { "epoch": 28.39366515837104, "grad_norm": 3.8687143325805664, "learning_rate": 0.00013508322526987947, "loss": 3.0073, "step": 31375 }, { "epoch": 28.41628959276018, "grad_norm": 4.19588565826416, "learning_rate": 0.00013504775596295815, "loss": 2.9568, "step": 31400 }, { "epoch": 28.43891402714932, "grad_norm": 3.60166597366333, "learning_rate": 0.00013501224920473253, "loss": 2.8525, "step": 31425 }, { "epoch": 28.46153846153846, "grad_norm": 4.343369007110596, "learning_rate": 0.0001349767050173479, "loss": 2.953, "step": 31450 }, { "epoch": 28.4841628959276, "grad_norm": 4.2392168045043945, "learning_rate": 0.00013494112342297285, "loss": 2.8835, "step": 31475 }, { "epoch": 28.50678733031674, "grad_norm": 5.6029558181762695, "learning_rate": 0.00013490550444379936, "loss": 2.9266, "step": 31500 }, { "epoch": 28.529411764705884, "grad_norm": 4.1231536865234375, "learning_rate": 0.00013486984810204272, "loss": 2.9438, "step": 31525 }, { "epoch": 28.552036199095024, "grad_norm": 3.6548025608062744, "learning_rate": 0.00013483415441994145, "loss": 2.9277, "step": 31550 }, { "epoch": 28.574660633484164, "grad_norm": 4.746679782867432, "learning_rate": 0.00013479842341975747, "loss": 2.8741, "step": 31575 }, { "epoch": 28.597285067873305, "grad_norm": 5.058715343475342, "learning_rate": 0.00013476265512377591, "loss": 2.9333, "step": 31600 }, { "epoch": 28.619909502262445, "grad_norm": 4.017326831817627, "learning_rate": 0.00013472684955430516, "loss": 2.8836, "step": 31625 }, { "epoch": 28.642533936651585, "grad_norm": 3.4557321071624756, "learning_rate": 0.00013469100673367684, "loss": 2.8834, "step": 31650 }, { "epoch": 28.665158371040725, "grad_norm": 3.441136598587036, "learning_rate": 0.00013465512668424585, "loss": 3.1403, "step": 31675 }, { "epoch": 28.687782805429865, "grad_norm": 4.262326240539551, "learning_rate": 0.00013461920942839029, "loss": 2.7424, "step": 31700 }, { "epoch": 28.710407239819006, "grad_norm": 4.127242565155029, "learning_rate": 0.00013458325498851147, "loss": 2.8001, "step": 31725 }, { "epoch": 28.733031674208146, "grad_norm": 5.030572891235352, "learning_rate": 0.0001345472633870339, "loss": 3.1041, "step": 31750 }, { "epoch": 28.755656108597286, "grad_norm": 4.85727596282959, "learning_rate": 0.0001345112346464052, "loss": 2.8716, "step": 31775 }, { "epoch": 28.778280542986426, "grad_norm": 3.561958074569702, "learning_rate": 0.0001344751687890963, "loss": 3.0226, "step": 31800 }, { "epoch": 28.800904977375566, "grad_norm": 3.3147952556610107, "learning_rate": 0.0001344390658376011, "loss": 2.9349, "step": 31825 }, { "epoch": 28.823529411764707, "grad_norm": 4.120586395263672, "learning_rate": 0.00013440292581443674, "loss": 2.8454, "step": 31850 }, { "epoch": 28.846153846153847, "grad_norm": 3.3597311973571777, "learning_rate": 0.00013436674874214348, "loss": 2.822, "step": 31875 }, { "epoch": 28.868778280542987, "grad_norm": 5.217077255249023, "learning_rate": 0.00013433053464328466, "loss": 2.956, "step": 31900 }, { "epoch": 28.891402714932127, "grad_norm": 5.234213829040527, "learning_rate": 0.00013429428354044674, "loss": 2.8244, "step": 31925 }, { "epoch": 28.914027149321267, "grad_norm": 3.850409984588623, "learning_rate": 0.00013425799545623923, "loss": 3.1384, "step": 31950 }, { "epoch": 28.936651583710407, "grad_norm": 3.3998496532440186, "learning_rate": 0.00013422167041329472, "loss": 3.1237, "step": 31975 }, { "epoch": 28.959276018099548, "grad_norm": 3.317704439163208, "learning_rate": 0.0001341853084342688, "loss": 2.9861, "step": 32000 }, { "epoch": 28.981900452488688, "grad_norm": 3.755441188812256, "learning_rate": 0.00013414890954184026, "loss": 2.9027, "step": 32025 }, { "epoch": 29.004524886877828, "grad_norm": 2.548013687133789, "learning_rate": 0.0001341124737587107, "loss": 2.9465, "step": 32050 }, { "epoch": 29.02714932126697, "grad_norm": 3.4946072101593018, "learning_rate": 0.00013407600110760485, "loss": 2.8628, "step": 32075 }, { "epoch": 29.04977375565611, "grad_norm": 4.340054988861084, "learning_rate": 0.00013403949161127043, "loss": 2.4742, "step": 32100 }, { "epoch": 29.07239819004525, "grad_norm": 4.377787113189697, "learning_rate": 0.0001340029452924781, "loss": 2.6672, "step": 32125 }, { "epoch": 29.09502262443439, "grad_norm": 4.529804706573486, "learning_rate": 0.0001339663621740215, "loss": 2.7543, "step": 32150 }, { "epoch": 29.11764705882353, "grad_norm": 4.187199592590332, "learning_rate": 0.00013392974227871722, "loss": 2.9031, "step": 32175 }, { "epoch": 29.14027149321267, "grad_norm": 4.080550193786621, "learning_rate": 0.00013389308562940485, "loss": 2.7065, "step": 32200 }, { "epoch": 29.16289592760181, "grad_norm": 5.53787088394165, "learning_rate": 0.0001338563922489468, "loss": 2.8107, "step": 32225 }, { "epoch": 29.18552036199095, "grad_norm": 3.6485674381256104, "learning_rate": 0.00013381966216022845, "loss": 2.7821, "step": 32250 }, { "epoch": 29.20814479638009, "grad_norm": 4.681895732879639, "learning_rate": 0.00013378289538615805, "loss": 2.8319, "step": 32275 }, { "epoch": 29.23076923076923, "grad_norm": 4.285266399383545, "learning_rate": 0.00013374609194966676, "loss": 2.8651, "step": 32300 }, { "epoch": 29.25339366515837, "grad_norm": 3.7702324390411377, "learning_rate": 0.0001337092518737086, "loss": 2.7786, "step": 32325 }, { "epoch": 29.27601809954751, "grad_norm": 3.929197072982788, "learning_rate": 0.0001336723751812604, "loss": 2.7335, "step": 32350 }, { "epoch": 29.29864253393665, "grad_norm": 5.015972137451172, "learning_rate": 0.00013363546189532188, "loss": 3.0028, "step": 32375 }, { "epoch": 29.32126696832579, "grad_norm": 4.664575099945068, "learning_rate": 0.00013359851203891554, "loss": 2.6164, "step": 32400 }, { "epoch": 29.34389140271493, "grad_norm": 5.186281204223633, "learning_rate": 0.0001335630057926611, "loss": 2.7075, "step": 32425 }, { "epoch": 29.36651583710407, "grad_norm": 4.499082565307617, "learning_rate": 0.00013352598432500893, "loss": 2.6906, "step": 32450 }, { "epoch": 29.38914027149321, "grad_norm": 4.127201557159424, "learning_rate": 0.0001334889263551692, "loss": 2.8804, "step": 32475 }, { "epoch": 29.41176470588235, "grad_norm": 4.216423034667969, "learning_rate": 0.00013345183190625475, "loss": 2.9464, "step": 32500 }, { "epoch": 29.43438914027149, "grad_norm": 4.783670425415039, "learning_rate": 0.000133414701001401, "loss": 2.5978, "step": 32525 }, { "epoch": 29.457013574660632, "grad_norm": 4.002803802490234, "learning_rate": 0.00013337753366376626, "loss": 2.7778, "step": 32550 }, { "epoch": 29.479638009049772, "grad_norm": 4.036042213439941, "learning_rate": 0.00013334032991653148, "loss": 2.8319, "step": 32575 }, { "epoch": 29.502262443438916, "grad_norm": 4.025344371795654, "learning_rate": 0.00013330308978290033, "loss": 2.9733, "step": 32600 }, { "epoch": 29.524886877828056, "grad_norm": 4.4124040603637695, "learning_rate": 0.00013326581328609922, "loss": 2.7095, "step": 32625 }, { "epoch": 29.547511312217196, "grad_norm": 3.700578451156616, "learning_rate": 0.0001332285004493772, "loss": 2.8302, "step": 32650 }, { "epoch": 29.570135746606336, "grad_norm": 4.622064113616943, "learning_rate": 0.0001331911512960059, "loss": 2.9011, "step": 32675 }, { "epoch": 29.592760180995477, "grad_norm": 3.7043676376342773, "learning_rate": 0.0001331537658492798, "loss": 2.8195, "step": 32700 }, { "epoch": 29.615384615384617, "grad_norm": 3.6045854091644287, "learning_rate": 0.00013311634413251585, "loss": 2.898, "step": 32725 }, { "epoch": 29.638009049773757, "grad_norm": 3.9226889610290527, "learning_rate": 0.00013307888616905365, "loss": 2.8238, "step": 32750 }, { "epoch": 29.660633484162897, "grad_norm": 4.132287979125977, "learning_rate": 0.0001330413919822555, "loss": 2.8462, "step": 32775 }, { "epoch": 29.683257918552037, "grad_norm": 3.3745787143707275, "learning_rate": 0.00013300386159550618, "loss": 2.8541, "step": 32800 }, { "epoch": 29.705882352941178, "grad_norm": 3.9182276725769043, "learning_rate": 0.0001329662950322131, "loss": 2.9974, "step": 32825 }, { "epoch": 29.728506787330318, "grad_norm": 4.313821315765381, "learning_rate": 0.0001329286923158062, "loss": 2.7951, "step": 32850 }, { "epoch": 29.751131221719458, "grad_norm": 4.022199630737305, "learning_rate": 0.00013289105346973802, "loss": 2.9561, "step": 32875 }, { "epoch": 29.773755656108598, "grad_norm": 4.818426132202148, "learning_rate": 0.00013285337851748363, "loss": 2.8191, "step": 32900 }, { "epoch": 29.79638009049774, "grad_norm": 4.591739654541016, "learning_rate": 0.00013281566748254056, "loss": 2.7544, "step": 32925 }, { "epoch": 29.81900452488688, "grad_norm": 4.046112060546875, "learning_rate": 0.00013277792038842888, "loss": 2.6974, "step": 32950 }, { "epoch": 29.84162895927602, "grad_norm": 3.9047091007232666, "learning_rate": 0.00013274013725869115, "loss": 2.7516, "step": 32975 }, { "epoch": 29.86425339366516, "grad_norm": 4.003262519836426, "learning_rate": 0.00013270231811689242, "loss": 2.8326, "step": 33000 }, { "epoch": 29.8868778280543, "grad_norm": 3.445901393890381, "learning_rate": 0.0001326644629866202, "loss": 2.8347, "step": 33025 }, { "epoch": 29.90950226244344, "grad_norm": 5.109114646911621, "learning_rate": 0.0001326265718914844, "loss": 2.88, "step": 33050 }, { "epoch": 29.93212669683258, "grad_norm": 5.185047149658203, "learning_rate": 0.0001325886448551174, "loss": 2.9353, "step": 33075 }, { "epoch": 29.95475113122172, "grad_norm": 3.7265560626983643, "learning_rate": 0.00013255068190117398, "loss": 2.7464, "step": 33100 }, { "epoch": 29.97737556561086, "grad_norm": 4.132345199584961, "learning_rate": 0.00013251268305333137, "loss": 2.8342, "step": 33125 }, { "epoch": 30.0, "grad_norm": 4.887376308441162, "learning_rate": 0.0001324746483352891, "loss": 2.8665, "step": 33150 }, { "epoch": 30.02262443438914, "grad_norm": 3.8556153774261475, "learning_rate": 0.00013243657777076915, "loss": 2.5729, "step": 33175 }, { "epoch": 30.04524886877828, "grad_norm": 4.831211566925049, "learning_rate": 0.00013239847138351581, "loss": 2.4019, "step": 33200 }, { "epoch": 30.06787330316742, "grad_norm": 4.507389545440674, "learning_rate": 0.00013236032919729574, "loss": 2.6132, "step": 33225 }, { "epoch": 30.09049773755656, "grad_norm": 4.609244346618652, "learning_rate": 0.0001323221512358979, "loss": 2.6143, "step": 33250 }, { "epoch": 30.1131221719457, "grad_norm": 6.194278717041016, "learning_rate": 0.00013228393752313358, "loss": 2.6638, "step": 33275 }, { "epoch": 30.13574660633484, "grad_norm": 3.9110960960388184, "learning_rate": 0.00013224568808283641, "loss": 2.6422, "step": 33300 }, { "epoch": 30.15837104072398, "grad_norm": 3.939910411834717, "learning_rate": 0.0001322074029388622, "loss": 2.7747, "step": 33325 }, { "epoch": 30.18099547511312, "grad_norm": 4.057479381561279, "learning_rate": 0.0001321690821150891, "loss": 2.6193, "step": 33350 }, { "epoch": 30.20361990950226, "grad_norm": 3.234480619430542, "learning_rate": 0.00013213072563541753, "loss": 2.6003, "step": 33375 }, { "epoch": 30.226244343891402, "grad_norm": 4.1614179611206055, "learning_rate": 0.0001320923335237701, "loss": 2.8562, "step": 33400 }, { "epoch": 30.248868778280542, "grad_norm": 4.460386276245117, "learning_rate": 0.00013205390580409165, "loss": 2.7884, "step": 33425 }, { "epoch": 30.271493212669682, "grad_norm": 5.725566387176514, "learning_rate": 0.00013201544250034927, "loss": 2.7352, "step": 33450 }, { "epoch": 30.294117647058822, "grad_norm": 5.769152641296387, "learning_rate": 0.0001319769436365322, "loss": 2.6063, "step": 33475 }, { "epoch": 30.316742081447963, "grad_norm": 4.5305047035217285, "learning_rate": 0.00013193840923665187, "loss": 2.4199, "step": 33500 }, { "epoch": 30.339366515837103, "grad_norm": 3.5889010429382324, "learning_rate": 0.00013189983932474186, "loss": 2.8019, "step": 33525 }, { "epoch": 30.361990950226243, "grad_norm": 3.8920085430145264, "learning_rate": 0.00013186123392485794, "loss": 2.6213, "step": 33550 }, { "epoch": 30.384615384615383, "grad_norm": 4.817618370056152, "learning_rate": 0.000131822593061078, "loss": 2.7214, "step": 33575 }, { "epoch": 30.407239819004523, "grad_norm": 4.146068096160889, "learning_rate": 0.000131783916757502, "loss": 2.7239, "step": 33600 }, { "epoch": 30.429864253393664, "grad_norm": 5.7670674324035645, "learning_rate": 0.0001317452050382521, "loss": 2.6981, "step": 33625 }, { "epoch": 30.452488687782804, "grad_norm": 4.598320484161377, "learning_rate": 0.0001317064579274724, "loss": 2.6299, "step": 33650 }, { "epoch": 30.475113122171944, "grad_norm": 4.3051300048828125, "learning_rate": 0.00013166767544932922, "loss": 2.8231, "step": 33675 }, { "epoch": 30.497737556561084, "grad_norm": 4.633981704711914, "learning_rate": 0.0001316288576280109, "loss": 2.7843, "step": 33700 }, { "epoch": 30.520361990950228, "grad_norm": 4.050837516784668, "learning_rate": 0.00013159000448772777, "loss": 2.5236, "step": 33725 }, { "epoch": 30.542986425339368, "grad_norm": 3.8911004066467285, "learning_rate": 0.00013155111605271221, "loss": 2.6911, "step": 33750 }, { "epoch": 30.56561085972851, "grad_norm": 4.056077003479004, "learning_rate": 0.00013151219234721866, "loss": 2.5631, "step": 33775 }, { "epoch": 30.58823529411765, "grad_norm": 4.661139488220215, "learning_rate": 0.00013147323339552348, "loss": 2.7013, "step": 33800 }, { "epoch": 30.61085972850679, "grad_norm": 4.946695327758789, "learning_rate": 0.00013143423922192514, "loss": 2.6462, "step": 33825 }, { "epoch": 30.63348416289593, "grad_norm": 3.819631338119507, "learning_rate": 0.00013139520985074388, "loss": 2.7273, "step": 33850 }, { "epoch": 30.65610859728507, "grad_norm": 4.634519100189209, "learning_rate": 0.0001313561453063221, "loss": 2.8124, "step": 33875 }, { "epoch": 30.67873303167421, "grad_norm": 3.164106607437134, "learning_rate": 0.00013131704561302398, "loss": 2.5955, "step": 33900 }, { "epoch": 30.70135746606335, "grad_norm": 4.083550453186035, "learning_rate": 0.00013127791079523574, "loss": 2.742, "step": 33925 }, { "epoch": 30.72398190045249, "grad_norm": 3.631793737411499, "learning_rate": 0.0001312387408773654, "loss": 2.7186, "step": 33950 }, { "epoch": 30.74660633484163, "grad_norm": 4.551006317138672, "learning_rate": 0.00013119953588384294, "loss": 2.5732, "step": 33975 }, { "epoch": 30.76923076923077, "grad_norm": 5.5608062744140625, "learning_rate": 0.00013116029583912022, "loss": 2.9259, "step": 34000 }, { "epoch": 30.79185520361991, "grad_norm": 4.855534553527832, "learning_rate": 0.00013112102076767097, "loss": 2.8942, "step": 34025 }, { "epoch": 30.81447963800905, "grad_norm": 4.316082000732422, "learning_rate": 0.00013108171069399065, "loss": 2.7487, "step": 34050 }, { "epoch": 30.83710407239819, "grad_norm": 5.009440898895264, "learning_rate": 0.00013104236564259668, "loss": 2.5815, "step": 34075 }, { "epoch": 30.85972850678733, "grad_norm": 4.553768157958984, "learning_rate": 0.0001310029856380283, "loss": 2.8171, "step": 34100 }, { "epoch": 30.88235294117647, "grad_norm": 4.048161506652832, "learning_rate": 0.00013096357070484644, "loss": 2.8778, "step": 34125 }, { "epoch": 30.90497737556561, "grad_norm": 4.5986433029174805, "learning_rate": 0.00013092412086763392, "loss": 3.0391, "step": 34150 }, { "epoch": 30.92760180995475, "grad_norm": 4.356034278869629, "learning_rate": 0.00013088463615099525, "loss": 2.7292, "step": 34175 }, { "epoch": 30.95022624434389, "grad_norm": 4.125836372375488, "learning_rate": 0.00013084511657955673, "loss": 2.6635, "step": 34200 }, { "epoch": 30.97285067873303, "grad_norm": 4.798816680908203, "learning_rate": 0.00013080556217796646, "loss": 2.7088, "step": 34225 }, { "epoch": 30.995475113122172, "grad_norm": 4.807849407196045, "learning_rate": 0.00013076597297089412, "loss": 2.6887, "step": 34250 }, { "epoch": 31.018099547511312, "grad_norm": 4.601236343383789, "learning_rate": 0.00013072634898303126, "loss": 2.5557, "step": 34275 }, { "epoch": 31.040723981900452, "grad_norm": 4.286696434020996, "learning_rate": 0.000130686690239091, "loss": 2.5424, "step": 34300 }, { "epoch": 31.063348416289593, "grad_norm": 5.3083367347717285, "learning_rate": 0.00013064699676380818, "loss": 2.4756, "step": 34325 }, { "epoch": 31.085972850678733, "grad_norm": 4.651934623718262, "learning_rate": 0.00013060726858193933, "loss": 2.4422, "step": 34350 }, { "epoch": 31.108597285067873, "grad_norm": 6.969370365142822, "learning_rate": 0.00013056750571826254, "loss": 2.5326, "step": 34375 }, { "epoch": 31.131221719457013, "grad_norm": 4.533806324005127, "learning_rate": 0.00013052770819757767, "loss": 2.5938, "step": 34400 }, { "epoch": 31.153846153846153, "grad_norm": 4.460901737213135, "learning_rate": 0.00013048787604470606, "loss": 2.5329, "step": 34425 }, { "epoch": 31.176470588235293, "grad_norm": 4.053142070770264, "learning_rate": 0.0001304480092844907, "loss": 2.5739, "step": 34450 }, { "epoch": 31.199095022624434, "grad_norm": 4.864528179168701, "learning_rate": 0.00013040810794179622, "loss": 2.5561, "step": 34475 }, { "epoch": 31.221719457013574, "grad_norm": 4.160267353057861, "learning_rate": 0.00013036817204150873, "loss": 2.4335, "step": 34500 }, { "epoch": 31.244343891402714, "grad_norm": 4.050480365753174, "learning_rate": 0.0001303282016085359, "loss": 2.6838, "step": 34525 }, { "epoch": 31.266968325791854, "grad_norm": 3.8022878170013428, "learning_rate": 0.00013028819666780706, "loss": 2.5064, "step": 34550 }, { "epoch": 31.289592760180994, "grad_norm": 3.9805221557617188, "learning_rate": 0.00013024815724427288, "loss": 2.702, "step": 34575 }, { "epoch": 31.312217194570135, "grad_norm": 4.316376686096191, "learning_rate": 0.0001302096869794373, "loss": 2.6071, "step": 34600 }, { "epoch": 31.334841628959275, "grad_norm": 4.840281963348389, "learning_rate": 0.00013016958004206426, "loss": 2.5611, "step": 34625 }, { "epoch": 31.357466063348415, "grad_norm": 4.259674549102783, "learning_rate": 0.00013012943869586613, "loss": 2.5317, "step": 34650 }, { "epoch": 31.380090497737555, "grad_norm": 3.153834342956543, "learning_rate": 0.0001300892629658788, "loss": 2.6136, "step": 34675 }, { "epoch": 31.402714932126695, "grad_norm": 4.826272487640381, "learning_rate": 0.00013004905287715947, "loss": 2.6296, "step": 34700 }, { "epoch": 31.425339366515836, "grad_norm": 3.353173017501831, "learning_rate": 0.00013000880845478693, "loss": 2.5559, "step": 34725 }, { "epoch": 31.447963800904976, "grad_norm": 4.404378414154053, "learning_rate": 0.0001299685297238613, "loss": 2.589, "step": 34750 }, { "epoch": 31.470588235294116, "grad_norm": 5.003884315490723, "learning_rate": 0.00012992821670950404, "loss": 2.7557, "step": 34775 }, { "epoch": 31.49321266968326, "grad_norm": 4.578713417053223, "learning_rate": 0.00012988786943685812, "loss": 2.5047, "step": 34800 }, { "epoch": 31.5158371040724, "grad_norm": 5.10005521774292, "learning_rate": 0.00012984748793108775, "loss": 2.6792, "step": 34825 }, { "epoch": 31.53846153846154, "grad_norm": 4.338147163391113, "learning_rate": 0.00012980707221737863, "loss": 2.6348, "step": 34850 }, { "epoch": 31.56108597285068, "grad_norm": 5.2252984046936035, "learning_rate": 0.00012976662232093764, "loss": 2.6911, "step": 34875 }, { "epoch": 31.58371040723982, "grad_norm": 3.880182981491089, "learning_rate": 0.0001297277582846583, "loss": 2.5268, "step": 34900 }, { "epoch": 31.60633484162896, "grad_norm": 4.190444469451904, "learning_rate": 0.00012968724146326485, "loss": 2.5337, "step": 34925 }, { "epoch": 31.6289592760181, "grad_norm": 3.701547622680664, "learning_rate": 0.0001296466905338771, "loss": 2.5426, "step": 34950 }, { "epoch": 31.65158371040724, "grad_norm": 4.068524360656738, "learning_rate": 0.0001296061055217862, "loss": 2.6174, "step": 34975 }, { "epoch": 31.67420814479638, "grad_norm": 5.140333652496338, "learning_rate": 0.00012956548645230487, "loss": 2.5687, "step": 35000 }, { "epoch": 31.69683257918552, "grad_norm": 4.030555725097656, "learning_rate": 0.00012952483335076676, "loss": 2.5699, "step": 35025 }, { "epoch": 31.71945701357466, "grad_norm": 4.85916805267334, "learning_rate": 0.00012948414624252693, "loss": 2.6461, "step": 35050 }, { "epoch": 31.742081447963802, "grad_norm": 4.350060939788818, "learning_rate": 0.0001294434251529616, "loss": 2.6628, "step": 35075 }, { "epoch": 31.764705882352942, "grad_norm": 3.316129207611084, "learning_rate": 0.00012940267010746822, "loss": 2.4926, "step": 35100 }, { "epoch": 31.787330316742082, "grad_norm": 5.346104621887207, "learning_rate": 0.00012936188113146535, "loss": 2.6199, "step": 35125 }, { "epoch": 31.809954751131222, "grad_norm": 4.797980785369873, "learning_rate": 0.00012932105825039274, "loss": 2.5169, "step": 35150 }, { "epoch": 31.832579185520363, "grad_norm": 4.692802906036377, "learning_rate": 0.00012928020148971132, "loss": 2.5529, "step": 35175 }, { "epoch": 31.855203619909503, "grad_norm": 5.026429653167725, "learning_rate": 0.00012923931087490312, "loss": 2.6802, "step": 35200 }, { "epoch": 31.877828054298643, "grad_norm": 4.618716716766357, "learning_rate": 0.0001291983864314713, "loss": 2.5839, "step": 35225 }, { "epoch": 31.900452488687783, "grad_norm": 4.131834030151367, "learning_rate": 0.00012915742818494003, "loss": 2.6939, "step": 35250 }, { "epoch": 31.923076923076923, "grad_norm": 4.771623611450195, "learning_rate": 0.00012911643616085472, "loss": 2.6028, "step": 35275 }, { "epoch": 31.945701357466064, "grad_norm": 4.510164737701416, "learning_rate": 0.00012907541038478177, "loss": 2.7633, "step": 35300 }, { "epoch": 31.968325791855204, "grad_norm": 3.9268558025360107, "learning_rate": 0.0001290343508823086, "loss": 2.786, "step": 35325 }, { "epoch": 31.990950226244344, "grad_norm": 4.578567981719971, "learning_rate": 0.0001289932576790437, "loss": 2.6832, "step": 35350 }, { "epoch": 32.01357466063349, "grad_norm": 4.22797966003418, "learning_rate": 0.00012895213080061656, "loss": 2.587, "step": 35375 }, { "epoch": 32.036199095022624, "grad_norm": 4.337325572967529, "learning_rate": 0.00012891097027267767, "loss": 2.3603, "step": 35400 }, { "epoch": 32.05882352941177, "grad_norm": 5.987388610839844, "learning_rate": 0.0001288697761208986, "loss": 2.5849, "step": 35425 }, { "epoch": 32.081447963800905, "grad_norm": 3.6974217891693115, "learning_rate": 0.0001288285483709717, "loss": 2.4429, "step": 35450 }, { "epoch": 32.10407239819005, "grad_norm": 4.210789680480957, "learning_rate": 0.0001287872870486105, "loss": 2.6268, "step": 35475 }, { "epoch": 32.126696832579185, "grad_norm": 5.0200676918029785, "learning_rate": 0.00012874599217954926, "loss": 2.402, "step": 35500 }, { "epoch": 32.14932126696833, "grad_norm": 3.6578259468078613, "learning_rate": 0.00012870466378954332, "loss": 2.3686, "step": 35525 }, { "epoch": 32.171945701357465, "grad_norm": 4.825088977813721, "learning_rate": 0.00012866330190436883, "loss": 2.4563, "step": 35550 }, { "epoch": 32.19457013574661, "grad_norm": 4.426782131195068, "learning_rate": 0.0001286219065498229, "loss": 2.5751, "step": 35575 }, { "epoch": 32.217194570135746, "grad_norm": 4.516976833343506, "learning_rate": 0.00012858047775172346, "loss": 2.5115, "step": 35600 }, { "epoch": 32.23981900452489, "grad_norm": 5.571347236633301, "learning_rate": 0.0001285390155359093, "loss": 2.4532, "step": 35625 }, { "epoch": 32.262443438914026, "grad_norm": 4.27960205078125, "learning_rate": 0.00012849751992824012, "loss": 2.371, "step": 35650 }, { "epoch": 32.28506787330317, "grad_norm": 5.029642105102539, "learning_rate": 0.00012845599095459635, "loss": 2.4602, "step": 35675 }, { "epoch": 32.30769230769231, "grad_norm": 4.342826843261719, "learning_rate": 0.0001284144286408793, "loss": 2.5523, "step": 35700 }, { "epoch": 32.33031674208145, "grad_norm": 4.571736812591553, "learning_rate": 0.00012837283301301108, "loss": 2.2815, "step": 35725 }, { "epoch": 32.35294117647059, "grad_norm": 3.9872703552246094, "learning_rate": 0.0001283312040969345, "loss": 2.6431, "step": 35750 }, { "epoch": 32.37556561085973, "grad_norm": 4.537867069244385, "learning_rate": 0.00012828954191861322, "loss": 2.4634, "step": 35775 }, { "epoch": 32.39819004524887, "grad_norm": 4.200901031494141, "learning_rate": 0.0001282478465040316, "loss": 2.5172, "step": 35800 }, { "epoch": 32.42081447963801, "grad_norm": 3.6147212982177734, "learning_rate": 0.0001282061178791947, "loss": 2.6195, "step": 35825 }, { "epoch": 32.44343891402715, "grad_norm": 3.921602725982666, "learning_rate": 0.00012816435607012838, "loss": 2.3279, "step": 35850 }, { "epoch": 32.46606334841629, "grad_norm": 6.205512523651123, "learning_rate": 0.0001281225611028791, "loss": 2.4272, "step": 35875 }, { "epoch": 32.48868778280543, "grad_norm": 4.8625898361206055, "learning_rate": 0.00012808073300351407, "loss": 2.6181, "step": 35900 }, { "epoch": 32.51131221719457, "grad_norm": 5.045218467712402, "learning_rate": 0.00012803887179812116, "loss": 2.5377, "step": 35925 }, { "epoch": 32.53393665158371, "grad_norm": 4.018486499786377, "learning_rate": 0.00012799697751280883, "loss": 2.4845, "step": 35950 }, { "epoch": 32.55656108597285, "grad_norm": 4.254944324493408, "learning_rate": 0.00012795505017370622, "loss": 2.3467, "step": 35975 }, { "epoch": 32.57918552036199, "grad_norm": 4.213393688201904, "learning_rate": 0.0001279130898069631, "loss": 2.3798, "step": 36000 }, { "epoch": 32.60180995475113, "grad_norm": 3.9159576892852783, "learning_rate": 0.00012787109643874978, "loss": 2.3131, "step": 36025 }, { "epoch": 32.62443438914027, "grad_norm": 4.4072747230529785, "learning_rate": 0.0001278290700952572, "loss": 2.4228, "step": 36050 }, { "epoch": 32.64705882352941, "grad_norm": 4.352427005767822, "learning_rate": 0.00012778701080269685, "loss": 2.4964, "step": 36075 }, { "epoch": 32.66968325791855, "grad_norm": 3.7858567237854004, "learning_rate": 0.00012774491858730082, "loss": 2.5937, "step": 36100 }, { "epoch": 32.69230769230769, "grad_norm": 4.358039855957031, "learning_rate": 0.0001277027934753216, "loss": 2.3238, "step": 36125 }, { "epoch": 32.71493212669683, "grad_norm": 4.003338813781738, "learning_rate": 0.0001276606354930324, "loss": 2.5184, "step": 36150 }, { "epoch": 32.737556561085974, "grad_norm": 3.9666638374328613, "learning_rate": 0.0001276184446667267, "loss": 2.4886, "step": 36175 }, { "epoch": 32.76018099547511, "grad_norm": 4.199832916259766, "learning_rate": 0.00012757622102271864, "loss": 2.6314, "step": 36200 }, { "epoch": 32.782805429864254, "grad_norm": 5.6295881271362305, "learning_rate": 0.00012753396458734274, "loss": 2.627, "step": 36225 }, { "epoch": 32.80542986425339, "grad_norm": 4.842776298522949, "learning_rate": 0.00012749167538695405, "loss": 2.594, "step": 36250 }, { "epoch": 32.828054298642535, "grad_norm": 4.408575534820557, "learning_rate": 0.00012744935344792795, "loss": 2.4685, "step": 36275 }, { "epoch": 32.85067873303167, "grad_norm": 4.2867817878723145, "learning_rate": 0.00012740699879666033, "loss": 2.536, "step": 36300 }, { "epoch": 32.873303167420815, "grad_norm": 4.182464599609375, "learning_rate": 0.00012736461145956745, "loss": 2.4234, "step": 36325 }, { "epoch": 32.89592760180995, "grad_norm": 3.8789894580841064, "learning_rate": 0.00012732219146308592, "loss": 2.5252, "step": 36350 }, { "epoch": 32.918552036199095, "grad_norm": 5.3863606452941895, "learning_rate": 0.0001272797388336728, "loss": 2.6901, "step": 36375 }, { "epoch": 32.94117647058823, "grad_norm": 3.9546992778778076, "learning_rate": 0.0001272372535978054, "loss": 2.6173, "step": 36400 }, { "epoch": 32.963800904977376, "grad_norm": 4.50889778137207, "learning_rate": 0.00012719473578198145, "loss": 2.5091, "step": 36425 }, { "epoch": 32.98642533936652, "grad_norm": 4.707273006439209, "learning_rate": 0.00012715218541271893, "loss": 2.5365, "step": 36450 }, { "epoch": 33.009049773755656, "grad_norm": 3.601125717163086, "learning_rate": 0.00012710960251655627, "loss": 2.3054, "step": 36475 }, { "epoch": 33.0316742081448, "grad_norm": 4.469501972198486, "learning_rate": 0.00012706698712005196, "loss": 2.3404, "step": 36500 }, { "epoch": 33.05429864253394, "grad_norm": 3.9866597652435303, "learning_rate": 0.00012702433924978494, "loss": 2.3609, "step": 36525 }, { "epoch": 33.07692307692308, "grad_norm": 5.863500118255615, "learning_rate": 0.00012698165893235434, "loss": 2.3967, "step": 36550 }, { "epoch": 33.09954751131222, "grad_norm": 4.805734157562256, "learning_rate": 0.00012693894619437954, "loss": 2.2383, "step": 36575 }, { "epoch": 33.12217194570136, "grad_norm": 4.294662952423096, "learning_rate": 0.00012689620106250013, "loss": 2.4667, "step": 36600 }, { "epoch": 33.1447963800905, "grad_norm": 4.397215366363525, "learning_rate": 0.0001268534235633759, "loss": 2.3737, "step": 36625 }, { "epoch": 33.16742081447964, "grad_norm": 4.107430934906006, "learning_rate": 0.00012681061372368682, "loss": 2.2985, "step": 36650 }, { "epoch": 33.19004524886878, "grad_norm": 5.470655918121338, "learning_rate": 0.00012676777157013305, "loss": 2.3414, "step": 36675 }, { "epoch": 33.21266968325792, "grad_norm": 4.423906326293945, "learning_rate": 0.0001267248971294349, "loss": 2.5055, "step": 36700 }, { "epoch": 33.23529411764706, "grad_norm": 4.633875846862793, "learning_rate": 0.00012668199042833284, "loss": 2.4087, "step": 36725 }, { "epoch": 33.2579185520362, "grad_norm": 3.9326577186584473, "learning_rate": 0.00012663905149358742, "loss": 2.2786, "step": 36750 }, { "epoch": 33.28054298642534, "grad_norm": 4.227899551391602, "learning_rate": 0.0001265960803519793, "loss": 2.1966, "step": 36775 }, { "epoch": 33.30316742081448, "grad_norm": 4.313636779785156, "learning_rate": 0.00012655307703030925, "loss": 2.3442, "step": 36800 }, { "epoch": 33.32579185520362, "grad_norm": 5.133744239807129, "learning_rate": 0.00012651004155539807, "loss": 2.4795, "step": 36825 }, { "epoch": 33.34841628959276, "grad_norm": 4.362292766571045, "learning_rate": 0.00012646697395408667, "loss": 2.2518, "step": 36850 }, { "epoch": 33.3710407239819, "grad_norm": 5.804786682128906, "learning_rate": 0.00012642387425323596, "loss": 2.4099, "step": 36875 }, { "epoch": 33.39366515837104, "grad_norm": 4.712783336639404, "learning_rate": 0.00012638074247972686, "loss": 2.3866, "step": 36900 }, { "epoch": 33.41628959276018, "grad_norm": 5.200821399688721, "learning_rate": 0.00012633757866046036, "loss": 2.3939, "step": 36925 }, { "epoch": 33.43891402714932, "grad_norm": 4.40043830871582, "learning_rate": 0.00012629438282235733, "loss": 2.2179, "step": 36950 }, { "epoch": 33.46153846153846, "grad_norm": 4.598316669464111, "learning_rate": 0.0001262511549923587, "loss": 2.3507, "step": 36975 }, { "epoch": 33.484162895927604, "grad_norm": 4.276734352111816, "learning_rate": 0.00012620789519742534, "loss": 2.4628, "step": 37000 }, { "epoch": 33.50678733031674, "grad_norm": 4.842257499694824, "learning_rate": 0.00012616460346453798, "loss": 2.4459, "step": 37025 }, { "epoch": 33.529411764705884, "grad_norm": 4.445224761962891, "learning_rate": 0.00012612127982069738, "loss": 2.3065, "step": 37050 }, { "epoch": 33.55203619909502, "grad_norm": 5.114562511444092, "learning_rate": 0.0001260779242929241, "loss": 2.4811, "step": 37075 }, { "epoch": 33.574660633484164, "grad_norm": 4.964504718780518, "learning_rate": 0.0001260345369082587, "loss": 2.3818, "step": 37100 }, { "epoch": 33.5972850678733, "grad_norm": 4.5414886474609375, "learning_rate": 0.0001259911176937615, "loss": 2.545, "step": 37125 }, { "epoch": 33.619909502262445, "grad_norm": 4.3598856925964355, "learning_rate": 0.00012594766667651272, "loss": 2.4586, "step": 37150 }, { "epoch": 33.64253393665158, "grad_norm": 4.801278591156006, "learning_rate": 0.00012590418388361242, "loss": 2.439, "step": 37175 }, { "epoch": 33.665158371040725, "grad_norm": 4.317627906799316, "learning_rate": 0.00012586066934218047, "loss": 2.4285, "step": 37200 }, { "epoch": 33.68778280542986, "grad_norm": 3.9917690753936768, "learning_rate": 0.0001258171230793565, "loss": 2.3523, "step": 37225 }, { "epoch": 33.710407239819006, "grad_norm": 4.744004249572754, "learning_rate": 0.00012577354512230003, "loss": 2.3969, "step": 37250 }, { "epoch": 33.73303167420814, "grad_norm": 5.244507312774658, "learning_rate": 0.00012572993549819027, "loss": 2.3932, "step": 37275 }, { "epoch": 33.755656108597286, "grad_norm": 5.12740421295166, "learning_rate": 0.00012568629423422617, "loss": 2.4197, "step": 37300 }, { "epoch": 33.77828054298642, "grad_norm": 3.774641752243042, "learning_rate": 0.00012564262135762643, "loss": 2.4066, "step": 37325 }, { "epoch": 33.800904977375566, "grad_norm": 4.841151237487793, "learning_rate": 0.0001255989168956295, "loss": 2.5366, "step": 37350 }, { "epoch": 33.8235294117647, "grad_norm": 5.057889461517334, "learning_rate": 0.0001255551808754935, "loss": 2.4209, "step": 37375 }, { "epoch": 33.84615384615385, "grad_norm": 3.9078261852264404, "learning_rate": 0.0001255114133244962, "loss": 2.3331, "step": 37400 }, { "epoch": 33.86877828054298, "grad_norm": 5.785364151000977, "learning_rate": 0.0001254676142699351, "loss": 2.5448, "step": 37425 }, { "epoch": 33.89140271493213, "grad_norm": 4.612705230712891, "learning_rate": 0.00012542378373912736, "loss": 2.3608, "step": 37450 }, { "epoch": 33.914027149321264, "grad_norm": 5.307022571563721, "learning_rate": 0.00012537992175940964, "loss": 2.4911, "step": 37475 }, { "epoch": 33.93665158371041, "grad_norm": 5.742743968963623, "learning_rate": 0.00012533602835813838, "loss": 2.3899, "step": 37500 }, { "epoch": 33.959276018099544, "grad_norm": 5.305797100067139, "learning_rate": 0.0001252921035626895, "loss": 2.3406, "step": 37525 }, { "epoch": 33.98190045248869, "grad_norm": 3.845489501953125, "learning_rate": 0.00012524814740045857, "loss": 2.4819, "step": 37550 }, { "epoch": 34.00452488687783, "grad_norm": 4.11320161819458, "learning_rate": 0.00012520415989886066, "loss": 2.3448, "step": 37575 }, { "epoch": 34.02714932126697, "grad_norm": 4.280022144317627, "learning_rate": 0.00012516014108533049, "loss": 2.2643, "step": 37600 }, { "epoch": 34.04977375565611, "grad_norm": 4.960660934448242, "learning_rate": 0.00012511609098732215, "loss": 2.1732, "step": 37625 }, { "epoch": 34.07239819004525, "grad_norm": 4.4615254402160645, "learning_rate": 0.0001250720096323094, "loss": 2.2452, "step": 37650 }, { "epoch": 34.09502262443439, "grad_norm": 4.478513240814209, "learning_rate": 0.0001250278970477854, "loss": 2.2065, "step": 37675 }, { "epoch": 34.11764705882353, "grad_norm": 4.7386345863342285, "learning_rate": 0.00012498375326126286, "loss": 2.33, "step": 37700 }, { "epoch": 34.14027149321267, "grad_norm": 4.773339748382568, "learning_rate": 0.00012493957830027384, "loss": 2.1296, "step": 37725 }, { "epoch": 34.16289592760181, "grad_norm": 4.943683624267578, "learning_rate": 0.00012489537219236994, "loss": 2.1463, "step": 37750 }, { "epoch": 34.18552036199095, "grad_norm": 4.314094543457031, "learning_rate": 0.00012485113496512218, "loss": 2.2566, "step": 37775 }, { "epoch": 34.20814479638009, "grad_norm": 5.287987232208252, "learning_rate": 0.00012480686664612093, "loss": 2.3733, "step": 37800 }, { "epoch": 34.23076923076923, "grad_norm": 5.228750228881836, "learning_rate": 0.00012476256726297598, "loss": 2.1226, "step": 37825 }, { "epoch": 34.25339366515837, "grad_norm": 3.784313201904297, "learning_rate": 0.00012471823684331653, "loss": 2.1873, "step": 37850 }, { "epoch": 34.276018099547514, "grad_norm": 4.201169490814209, "learning_rate": 0.0001246738754147911, "loss": 2.3755, "step": 37875 }, { "epoch": 34.29864253393665, "grad_norm": 4.945826053619385, "learning_rate": 0.00012462948300506754, "loss": 2.1317, "step": 37900 }, { "epoch": 34.321266968325794, "grad_norm": 3.940614700317383, "learning_rate": 0.00012458505964183306, "loss": 2.1285, "step": 37925 }, { "epoch": 34.34389140271493, "grad_norm": 5.837182998657227, "learning_rate": 0.00012454060535279412, "loss": 2.1953, "step": 37950 }, { "epoch": 34.366515837104075, "grad_norm": 4.630831241607666, "learning_rate": 0.00012449612016567657, "loss": 2.2692, "step": 37975 }, { "epoch": 34.38914027149321, "grad_norm": 4.768281936645508, "learning_rate": 0.00012445160410822542, "loss": 2.1857, "step": 38000 }, { "epoch": 34.411764705882355, "grad_norm": 5.820812702178955, "learning_rate": 0.00012440705720820496, "loss": 2.2188, "step": 38025 }, { "epoch": 34.43438914027149, "grad_norm": 4.905755043029785, "learning_rate": 0.00012436247949339875, "loss": 2.2482, "step": 38050 }, { "epoch": 34.457013574660635, "grad_norm": 5.567986488342285, "learning_rate": 0.0001243178709916096, "loss": 2.4189, "step": 38075 }, { "epoch": 34.47963800904977, "grad_norm": 4.231449127197266, "learning_rate": 0.0001242732317306594, "loss": 2.2835, "step": 38100 }, { "epoch": 34.502262443438916, "grad_norm": 3.215325117111206, "learning_rate": 0.00012422856173838938, "loss": 2.4559, "step": 38125 }, { "epoch": 34.52488687782805, "grad_norm": 3.921882152557373, "learning_rate": 0.0001241838610426598, "loss": 2.2331, "step": 38150 }, { "epoch": 34.547511312217196, "grad_norm": 5.587146282196045, "learning_rate": 0.00012413912967135013, "loss": 2.2795, "step": 38175 }, { "epoch": 34.57013574660633, "grad_norm": 4.40065860748291, "learning_rate": 0.00012409436765235896, "loss": 2.3123, "step": 38200 }, { "epoch": 34.59276018099548, "grad_norm": 4.005879878997803, "learning_rate": 0.00012404957501360405, "loss": 2.2526, "step": 38225 }, { "epoch": 34.61538461538461, "grad_norm": 3.5936102867126465, "learning_rate": 0.00012400475178302216, "loss": 2.4858, "step": 38250 }, { "epoch": 34.63800904977376, "grad_norm": 3.343513011932373, "learning_rate": 0.0001239598979885692, "loss": 2.3716, "step": 38275 }, { "epoch": 34.660633484162894, "grad_norm": 4.3708648681640625, "learning_rate": 0.00012391501365822014, "loss": 2.2519, "step": 38300 }, { "epoch": 34.68325791855204, "grad_norm": 4.603794097900391, "learning_rate": 0.00012387009881996894, "loss": 2.4559, "step": 38325 }, { "epoch": 34.705882352941174, "grad_norm": 4.484412670135498, "learning_rate": 0.00012382515350182867, "loss": 2.4036, "step": 38350 }, { "epoch": 34.72850678733032, "grad_norm": 5.898510456085205, "learning_rate": 0.0001237801777318313, "loss": 2.2546, "step": 38375 }, { "epoch": 34.751131221719454, "grad_norm": 5.234091758728027, "learning_rate": 0.00012373517153802793, "loss": 2.3146, "step": 38400 }, { "epoch": 34.7737556561086, "grad_norm": 5.243960380554199, "learning_rate": 0.0001236901349484885, "loss": 2.1139, "step": 38425 }, { "epoch": 34.796380090497735, "grad_norm": 5.678958892822266, "learning_rate": 0.00012364506799130201, "loss": 2.2946, "step": 38450 }, { "epoch": 34.81900452488688, "grad_norm": 5.459033966064453, "learning_rate": 0.00012359997069457635, "loss": 2.4424, "step": 38475 }, { "epoch": 34.841628959276015, "grad_norm": 4.285768508911133, "learning_rate": 0.00012355484308643837, "loss": 2.251, "step": 38500 }, { "epoch": 34.86425339366516, "grad_norm": 3.4532508850097656, "learning_rate": 0.0001235096851950337, "loss": 2.3726, "step": 38525 }, { "epoch": 34.886877828054295, "grad_norm": 5.380105495452881, "learning_rate": 0.0001234644970485271, "loss": 2.4636, "step": 38550 }, { "epoch": 34.90950226244344, "grad_norm": 4.285428524017334, "learning_rate": 0.00012341927867510192, "loss": 2.4352, "step": 38575 }, { "epoch": 34.932126696832576, "grad_norm": 5.082425117492676, "learning_rate": 0.00012337403010296059, "loss": 2.2011, "step": 38600 }, { "epoch": 34.95475113122172, "grad_norm": 5.039985656738281, "learning_rate": 0.00012332875136032424, "loss": 2.3386, "step": 38625 }, { "epoch": 34.977375565610856, "grad_norm": 4.668622970581055, "learning_rate": 0.00012328344247543286, "loss": 2.4197, "step": 38650 }, { "epoch": 35.0, "grad_norm": 3.9261083602905273, "learning_rate": 0.00012323810347654525, "loss": 2.3796, "step": 38675 }, { "epoch": 35.022624434389144, "grad_norm": 4.162503719329834, "learning_rate": 0.000123192734391939, "loss": 2.271, "step": 38700 }, { "epoch": 35.04524886877828, "grad_norm": 5.307286262512207, "learning_rate": 0.00012314733524991037, "loss": 2.0773, "step": 38725 }, { "epoch": 35.067873303167424, "grad_norm": 3.6611545085906982, "learning_rate": 0.00012310190607877454, "loss": 2.2386, "step": 38750 }, { "epoch": 35.09049773755656, "grad_norm": 5.716986656188965, "learning_rate": 0.00012305644690686524, "loss": 2.1327, "step": 38775 }, { "epoch": 35.113122171945705, "grad_norm": 4.889161586761475, "learning_rate": 0.00012301095776253506, "loss": 1.8899, "step": 38800 }, { "epoch": 35.13574660633484, "grad_norm": 4.419322967529297, "learning_rate": 0.00012296543867415513, "loss": 2.0504, "step": 38825 }, { "epoch": 35.158371040723985, "grad_norm": 4.462159156799316, "learning_rate": 0.00012291988967011542, "loss": 2.2039, "step": 38850 }, { "epoch": 35.18099547511312, "grad_norm": 4.497054576873779, "learning_rate": 0.00012287431077882442, "loss": 1.9936, "step": 38875 }, { "epoch": 35.203619909502265, "grad_norm": 4.174394607543945, "learning_rate": 0.00012283052695164664, "loss": 2.3584, "step": 38900 }, { "epoch": 35.2262443438914, "grad_norm": 6.2371602058410645, "learning_rate": 0.00012278488956382204, "loss": 2.1509, "step": 38925 }, { "epoch": 35.248868778280546, "grad_norm": 4.77733039855957, "learning_rate": 0.0001227392223729447, "loss": 2.1533, "step": 38950 }, { "epoch": 35.27149321266968, "grad_norm": 4.860963344573975, "learning_rate": 0.0001226935254074968, "loss": 2.2584, "step": 38975 }, { "epoch": 35.294117647058826, "grad_norm": 4.231897354125977, "learning_rate": 0.00012264779869597926, "loss": 2.2071, "step": 39000 }, { "epoch": 35.31674208144796, "grad_norm": 4.994824409484863, "learning_rate": 0.00012260204226691138, "loss": 2.0147, "step": 39025 }, { "epoch": 35.339366515837106, "grad_norm": 5.301912784576416, "learning_rate": 0.00012255625614883116, "loss": 2.4201, "step": 39050 }, { "epoch": 35.36199095022624, "grad_norm": 5.6750335693359375, "learning_rate": 0.00012251044037029496, "loss": 2.2504, "step": 39075 }, { "epoch": 35.38461538461539, "grad_norm": 4.550475597381592, "learning_rate": 0.00012246459495987775, "loss": 2.3047, "step": 39100 }, { "epoch": 35.40723981900452, "grad_norm": 4.855838298797607, "learning_rate": 0.00012241871994617294, "loss": 2.2243, "step": 39125 }, { "epoch": 35.42986425339367, "grad_norm": 4.490835666656494, "learning_rate": 0.00012237281535779242, "loss": 2.1518, "step": 39150 }, { "epoch": 35.452488687782804, "grad_norm": 4.116247653961182, "learning_rate": 0.0001223268812233665, "loss": 2.1949, "step": 39175 }, { "epoch": 35.47511312217195, "grad_norm": 5.929376125335693, "learning_rate": 0.00012228091757154392, "loss": 2.1322, "step": 39200 }, { "epoch": 35.497737556561084, "grad_norm": 4.142200469970703, "learning_rate": 0.00012223492443099186, "loss": 2.0254, "step": 39225 }, { "epoch": 35.52036199095023, "grad_norm": 3.8631250858306885, "learning_rate": 0.00012218890183039589, "loss": 2.1582, "step": 39250 }, { "epoch": 35.542986425339365, "grad_norm": 4.793134689331055, "learning_rate": 0.0001221428497984599, "loss": 2.1888, "step": 39275 }, { "epoch": 35.56561085972851, "grad_norm": 4.693505764007568, "learning_rate": 0.0001220967683639062, "loss": 2.1438, "step": 39300 }, { "epoch": 35.588235294117645, "grad_norm": 4.33529806137085, "learning_rate": 0.00012205065755547539, "loss": 2.1771, "step": 39325 }, { "epoch": 35.61085972850679, "grad_norm": 4.993594646453857, "learning_rate": 0.00012200451740192644, "loss": 2.2037, "step": 39350 }, { "epoch": 35.633484162895925, "grad_norm": 4.973903179168701, "learning_rate": 0.00012195834793203655, "loss": 2.1383, "step": 39375 }, { "epoch": 35.65610859728507, "grad_norm": 4.853772163391113, "learning_rate": 0.00012191214917460131, "loss": 2.2724, "step": 39400 }, { "epoch": 35.678733031674206, "grad_norm": 5.172274589538574, "learning_rate": 0.00012186592115843446, "loss": 2.2928, "step": 39425 }, { "epoch": 35.70135746606335, "grad_norm": 4.263808250427246, "learning_rate": 0.00012181966391236806, "loss": 2.19, "step": 39450 }, { "epoch": 35.723981900452486, "grad_norm": 4.12034273147583, "learning_rate": 0.00012177337746525237, "loss": 2.0226, "step": 39475 }, { "epoch": 35.74660633484163, "grad_norm": 4.177559852600098, "learning_rate": 0.00012172706184595594, "loss": 2.1455, "step": 39500 }, { "epoch": 35.76923076923077, "grad_norm": 5.90501594543457, "learning_rate": 0.00012168071708336537, "loss": 2.3705, "step": 39525 }, { "epoch": 35.79185520361991, "grad_norm": 5.299849033355713, "learning_rate": 0.00012163434320638556, "loss": 2.2741, "step": 39550 }, { "epoch": 35.81447963800905, "grad_norm": 4.647896766662598, "learning_rate": 0.00012158794024393952, "loss": 2.2383, "step": 39575 }, { "epoch": 35.83710407239819, "grad_norm": 4.593433380126953, "learning_rate": 0.00012154150822496841, "loss": 2.1183, "step": 39600 }, { "epoch": 35.85972850678733, "grad_norm": 3.6611621379852295, "learning_rate": 0.00012149504717843149, "loss": 2.2052, "step": 39625 }, { "epoch": 35.88235294117647, "grad_norm": 3.904351234436035, "learning_rate": 0.00012144855713330618, "loss": 2.2897, "step": 39650 }, { "epoch": 35.90497737556561, "grad_norm": 4.795818328857422, "learning_rate": 0.00012140203811858789, "loss": 2.1473, "step": 39675 }, { "epoch": 35.92760180995475, "grad_norm": 5.492254257202148, "learning_rate": 0.0001213554901632902, "loss": 2.2725, "step": 39700 }, { "epoch": 35.95022624434389, "grad_norm": 4.611868858337402, "learning_rate": 0.0001213089132964447, "loss": 2.197, "step": 39725 }, { "epoch": 35.97285067873303, "grad_norm": 4.947539329528809, "learning_rate": 0.00012126230754710099, "loss": 2.2163, "step": 39750 }, { "epoch": 35.99547511312217, "grad_norm": 4.95683479309082, "learning_rate": 0.0001212156729443267, "loss": 2.1723, "step": 39775 }, { "epoch": 36.01809954751131, "grad_norm": 4.998210906982422, "learning_rate": 0.00012116900951720745, "loss": 2.045, "step": 39800 }, { "epoch": 36.040723981900456, "grad_norm": 4.544924736022949, "learning_rate": 0.00012112231729484689, "loss": 1.9196, "step": 39825 }, { "epoch": 36.06334841628959, "grad_norm": 3.828965663909912, "learning_rate": 0.00012107559630636655, "loss": 2.0334, "step": 39850 }, { "epoch": 36.085972850678736, "grad_norm": 5.906606197357178, "learning_rate": 0.00012102884658090593, "loss": 2.168, "step": 39875 }, { "epoch": 36.10859728506787, "grad_norm": 4.3583831787109375, "learning_rate": 0.00012098206814762247, "loss": 2.0809, "step": 39900 }, { "epoch": 36.13122171945702, "grad_norm": 6.541887283325195, "learning_rate": 0.00012093526103569152, "loss": 2.0264, "step": 39925 }, { "epoch": 36.15384615384615, "grad_norm": 5.1993536949157715, "learning_rate": 0.00012088842527430629, "loss": 2.1022, "step": 39950 }, { "epoch": 36.1764705882353, "grad_norm": 5.358850955963135, "learning_rate": 0.00012084156089267785, "loss": 2.0869, "step": 39975 }, { "epoch": 36.199095022624434, "grad_norm": 4.883439064025879, "learning_rate": 0.00012079466792003517, "loss": 2.1404, "step": 40000 }, { "epoch": 36.22171945701358, "grad_norm": 5.307112693786621, "learning_rate": 0.000120747746385625, "loss": 2.0697, "step": 40025 }, { "epoch": 36.244343891402714, "grad_norm": 4.577943325042725, "learning_rate": 0.00012070079631871192, "loss": 2.013, "step": 40050 }, { "epoch": 36.26696832579186, "grad_norm": 5.501679420471191, "learning_rate": 0.00012065381774857832, "loss": 2.0398, "step": 40075 }, { "epoch": 36.289592760180994, "grad_norm": 5.405413627624512, "learning_rate": 0.00012060681070452438, "loss": 2.1319, "step": 40100 }, { "epoch": 36.31221719457014, "grad_norm": 4.200492858886719, "learning_rate": 0.00012055977521586798, "loss": 2.2056, "step": 40125 }, { "epoch": 36.334841628959275, "grad_norm": 4.862429141998291, "learning_rate": 0.0001205127113119448, "loss": 1.9553, "step": 40150 }, { "epoch": 36.35746606334842, "grad_norm": 5.6993536949157715, "learning_rate": 0.00012046561902210822, "loss": 2.3308, "step": 40175 }, { "epoch": 36.380090497737555, "grad_norm": 5.029634952545166, "learning_rate": 0.00012041849837572929, "loss": 2.2173, "step": 40200 }, { "epoch": 36.4027149321267, "grad_norm": 6.117025852203369, "learning_rate": 0.00012037134940219684, "loss": 2.0294, "step": 40225 }, { "epoch": 36.425339366515836, "grad_norm": 4.50570821762085, "learning_rate": 0.00012032417213091728, "loss": 2.1731, "step": 40250 }, { "epoch": 36.44796380090498, "grad_norm": 4.521318435668945, "learning_rate": 0.00012027696659131466, "loss": 2.003, "step": 40275 }, { "epoch": 36.470588235294116, "grad_norm": 4.803319454193115, "learning_rate": 0.00012022973281283073, "loss": 1.9514, "step": 40300 }, { "epoch": 36.49321266968326, "grad_norm": 5.161010265350342, "learning_rate": 0.00012018247082492483, "loss": 2.2183, "step": 40325 }, { "epoch": 36.515837104072396, "grad_norm": 5.701634407043457, "learning_rate": 0.00012013518065707387, "loss": 2.0652, "step": 40350 }, { "epoch": 36.53846153846154, "grad_norm": 5.202276229858398, "learning_rate": 0.00012008786233877233, "loss": 2.0276, "step": 40375 }, { "epoch": 36.56108597285068, "grad_norm": 5.018892765045166, "learning_rate": 0.00012004051589953232, "loss": 2.1121, "step": 40400 }, { "epoch": 36.58371040723982, "grad_norm": 5.593802452087402, "learning_rate": 0.00011999314136888338, "loss": 2.0423, "step": 40425 }, { "epoch": 36.60633484162896, "grad_norm": 5.779587268829346, "learning_rate": 0.00011994573877637264, "loss": 2.0321, "step": 40450 }, { "epoch": 36.6289592760181, "grad_norm": 4.755776882171631, "learning_rate": 0.00011989830815156473, "loss": 2.0956, "step": 40475 }, { "epoch": 36.65158371040724, "grad_norm": 4.763101577758789, "learning_rate": 0.00011985084952404173, "loss": 2.0916, "step": 40500 }, { "epoch": 36.67420814479638, "grad_norm": 4.131762981414795, "learning_rate": 0.00011980336292340324, "loss": 2.0092, "step": 40525 }, { "epoch": 36.69683257918552, "grad_norm": 4.448458671569824, "learning_rate": 0.00011975584837926623, "loss": 2.1099, "step": 40550 }, { "epoch": 36.71945701357466, "grad_norm": 4.395147323608398, "learning_rate": 0.00011970830592126517, "loss": 1.9198, "step": 40575 }, { "epoch": 36.7420814479638, "grad_norm": 5.14371919631958, "learning_rate": 0.00011966073557905188, "loss": 2.138, "step": 40600 }, { "epoch": 36.76470588235294, "grad_norm": 5.342911720275879, "learning_rate": 0.00011961313738229565, "loss": 2.1533, "step": 40625 }, { "epoch": 36.78733031674208, "grad_norm": 4.580821990966797, "learning_rate": 0.00011956551136068306, "loss": 2.2054, "step": 40650 }, { "epoch": 36.80995475113122, "grad_norm": 4.565236568450928, "learning_rate": 0.00011951785754391807, "loss": 2.1256, "step": 40675 }, { "epoch": 36.83257918552036, "grad_norm": 4.817327499389648, "learning_rate": 0.00011947017596172202, "loss": 2.2157, "step": 40700 }, { "epoch": 36.8552036199095, "grad_norm": 4.755205154418945, "learning_rate": 0.0001194224666438335, "loss": 2.017, "step": 40725 }, { "epoch": 36.87782805429864, "grad_norm": 4.711893558502197, "learning_rate": 0.00011937472962000844, "loss": 2.2593, "step": 40750 }, { "epoch": 36.90045248868778, "grad_norm": 5.197144508361816, "learning_rate": 0.00011932696492002003, "loss": 2.1794, "step": 40775 }, { "epoch": 36.92307692307692, "grad_norm": 3.8158843517303467, "learning_rate": 0.00011927917257365873, "loss": 2.0601, "step": 40800 }, { "epoch": 36.94570135746606, "grad_norm": 5.204895973205566, "learning_rate": 0.00011923135261073229, "loss": 2.1827, "step": 40825 }, { "epoch": 36.9683257918552, "grad_norm": 4.822196960449219, "learning_rate": 0.00011918350506106556, "loss": 2.1934, "step": 40850 }, { "epoch": 36.990950226244344, "grad_norm": 3.964585304260254, "learning_rate": 0.00011913562995450072, "loss": 2.1056, "step": 40875 }, { "epoch": 37.01357466063349, "grad_norm": 3.375369071960449, "learning_rate": 0.00011908772732089709, "loss": 1.899, "step": 40900 }, { "epoch": 37.036199095022624, "grad_norm": 4.7697834968566895, "learning_rate": 0.00011903979719013116, "loss": 1.8776, "step": 40925 }, { "epoch": 37.05882352941177, "grad_norm": 4.175597190856934, "learning_rate": 0.00011899183959209656, "loss": 1.8977, "step": 40950 }, { "epoch": 37.081447963800905, "grad_norm": 4.101481914520264, "learning_rate": 0.00011894385455670405, "loss": 1.9835, "step": 40975 }, { "epoch": 37.10407239819005, "grad_norm": 4.512917518615723, "learning_rate": 0.00011889584211388152, "loss": 1.9868, "step": 41000 }, { "epoch": 37.126696832579185, "grad_norm": 5.312557697296143, "learning_rate": 0.00011884780229357397, "loss": 1.9328, "step": 41025 }, { "epoch": 37.14932126696833, "grad_norm": 5.03694486618042, "learning_rate": 0.0001187997351257434, "loss": 1.8786, "step": 41050 }, { "epoch": 37.171945701357465, "grad_norm": 4.9827070236206055, "learning_rate": 0.00011875164064036896, "loss": 1.9491, "step": 41075 }, { "epoch": 37.19457013574661, "grad_norm": 5.550374507904053, "learning_rate": 0.0001187035188674468, "loss": 1.9217, "step": 41100 }, { "epoch": 37.217194570135746, "grad_norm": 4.104196071624756, "learning_rate": 0.00011865536983699005, "loss": 1.93, "step": 41125 }, { "epoch": 37.23981900452489, "grad_norm": 5.8158369064331055, "learning_rate": 0.0001186071935790289, "loss": 1.9963, "step": 41150 }, { "epoch": 37.262443438914026, "grad_norm": 4.323169708251953, "learning_rate": 0.00011855899012361047, "loss": 1.909, "step": 41175 }, { "epoch": 37.28506787330317, "grad_norm": 4.632725715637207, "learning_rate": 0.0001185107595007989, "loss": 2.0336, "step": 41200 }, { "epoch": 37.30769230769231, "grad_norm": 4.106157302856445, "learning_rate": 0.00011846250174067522, "loss": 2.1866, "step": 41225 }, { "epoch": 37.33031674208145, "grad_norm": 4.956432342529297, "learning_rate": 0.00011841421687333743, "loss": 2.0205, "step": 41250 }, { "epoch": 37.35294117647059, "grad_norm": 6.1458964347839355, "learning_rate": 0.00011836590492890039, "loss": 2.0438, "step": 41275 }, { "epoch": 37.37556561085973, "grad_norm": 5.244638919830322, "learning_rate": 0.0001183175659374959, "loss": 2.1053, "step": 41300 }, { "epoch": 37.39819004524887, "grad_norm": 4.762988567352295, "learning_rate": 0.00011826919992927255, "loss": 1.9517, "step": 41325 }, { "epoch": 37.42081447963801, "grad_norm": 4.060920238494873, "learning_rate": 0.00011822080693439589, "loss": 2.0046, "step": 41350 }, { "epoch": 37.44343891402715, "grad_norm": 4.41787576675415, "learning_rate": 0.00011817238698304823, "loss": 2.0745, "step": 41375 }, { "epoch": 37.46606334841629, "grad_norm": 3.6469852924346924, "learning_rate": 0.00011812394010542869, "loss": 1.8661, "step": 41400 }, { "epoch": 37.48868778280543, "grad_norm": 4.105870246887207, "learning_rate": 0.00011807546633175323, "loss": 2.0869, "step": 41425 }, { "epoch": 37.51131221719457, "grad_norm": 4.687159061431885, "learning_rate": 0.0001180269656922545, "loss": 1.8561, "step": 41450 }, { "epoch": 37.53393665158371, "grad_norm": 4.697596073150635, "learning_rate": 0.00011797843821718201, "loss": 2.0736, "step": 41475 }, { "epoch": 37.55656108597285, "grad_norm": 5.025703430175781, "learning_rate": 0.00011792988393680192, "loss": 1.927, "step": 41500 }, { "epoch": 37.57918552036199, "grad_norm": 4.535080909729004, "learning_rate": 0.00011788130288139719, "loss": 2.0446, "step": 41525 }, { "epoch": 37.60180995475113, "grad_norm": 5.132175445556641, "learning_rate": 0.0001178326950812674, "loss": 1.9801, "step": 41550 }, { "epoch": 37.62443438914027, "grad_norm": 4.711911678314209, "learning_rate": 0.00011778406056672883, "loss": 1.9259, "step": 41575 }, { "epoch": 37.64705882352941, "grad_norm": 4.1732282638549805, "learning_rate": 0.00011773539936811449, "loss": 1.9793, "step": 41600 }, { "epoch": 37.66968325791855, "grad_norm": 4.863363742828369, "learning_rate": 0.00011768671151577396, "loss": 1.9115, "step": 41625 }, { "epoch": 37.69230769230769, "grad_norm": 4.690446853637695, "learning_rate": 0.00011763799704007343, "loss": 2.1073, "step": 41650 }, { "epoch": 37.71493212669683, "grad_norm": 5.44816255569458, "learning_rate": 0.00011758925597139577, "loss": 1.9904, "step": 41675 }, { "epoch": 37.737556561085974, "grad_norm": 4.960061073303223, "learning_rate": 0.00011754048834014034, "loss": 2.3171, "step": 41700 }, { "epoch": 37.76018099547511, "grad_norm": 4.3642354011535645, "learning_rate": 0.00011749169417672319, "loss": 1.9993, "step": 41725 }, { "epoch": 37.782805429864254, "grad_norm": 3.7109146118164062, "learning_rate": 0.00011744287351157682, "loss": 2.0134, "step": 41750 }, { "epoch": 37.80542986425339, "grad_norm": 4.4268927574157715, "learning_rate": 0.00011739402637515027, "loss": 2.1329, "step": 41775 }, { "epoch": 37.828054298642535, "grad_norm": 4.4178643226623535, "learning_rate": 0.00011734515279790915, "loss": 2.1266, "step": 41800 }, { "epoch": 37.85067873303167, "grad_norm": 5.504481315612793, "learning_rate": 0.00011729625281033546, "loss": 2.1022, "step": 41825 }, { "epoch": 37.873303167420815, "grad_norm": 4.019266128540039, "learning_rate": 0.00011724732644292778, "loss": 1.9571, "step": 41850 }, { "epoch": 37.89592760180995, "grad_norm": 4.862491607666016, "learning_rate": 0.00011719837372620108, "loss": 1.9971, "step": 41875 }, { "epoch": 37.918552036199095, "grad_norm": 4.543803691864014, "learning_rate": 0.00011714939469068675, "loss": 2.0642, "step": 41900 }, { "epoch": 37.94117647058823, "grad_norm": 5.209831714630127, "learning_rate": 0.00011710038936693266, "loss": 1.9744, "step": 41925 }, { "epoch": 37.963800904977376, "grad_norm": 3.90395188331604, "learning_rate": 0.00011705135778550302, "loss": 2.0004, "step": 41950 }, { "epoch": 37.98642533936652, "grad_norm": 4.3217878341674805, "learning_rate": 0.00011700229997697843, "loss": 1.9563, "step": 41975 }, { "epoch": 38.009049773755656, "grad_norm": 3.418581485748291, "learning_rate": 0.00011695321597195587, "loss": 1.9149, "step": 42000 }, { "epoch": 38.0316742081448, "grad_norm": 4.049460411071777, "learning_rate": 0.00011690410580104862, "loss": 1.8076, "step": 42025 }, { "epoch": 38.05429864253394, "grad_norm": 5.510961532592773, "learning_rate": 0.00011685496949488631, "loss": 1.793, "step": 42050 }, { "epoch": 38.07692307692308, "grad_norm": 5.471452713012695, "learning_rate": 0.00011680580708411488, "loss": 1.8731, "step": 42075 }, { "epoch": 38.09954751131222, "grad_norm": 5.05703592300415, "learning_rate": 0.00011675661859939648, "loss": 1.8416, "step": 42100 }, { "epoch": 38.12217194570136, "grad_norm": 4.373697280883789, "learning_rate": 0.00011670740407140963, "loss": 1.9638, "step": 42125 }, { "epoch": 38.1447963800905, "grad_norm": 5.67213773727417, "learning_rate": 0.00011665816353084898, "loss": 1.7583, "step": 42150 }, { "epoch": 38.16742081447964, "grad_norm": 4.141268253326416, "learning_rate": 0.00011660889700842552, "loss": 2.103, "step": 42175 }, { "epoch": 38.19004524886878, "grad_norm": 5.167728424072266, "learning_rate": 0.00011655960453486637, "loss": 2.0116, "step": 42200 }, { "epoch": 38.21266968325792, "grad_norm": 6.356166839599609, "learning_rate": 0.00011651028614091482, "loss": 1.9145, "step": 42225 }, { "epoch": 38.23529411764706, "grad_norm": 4.968979358673096, "learning_rate": 0.00011646094185733036, "loss": 1.8108, "step": 42250 }, { "epoch": 38.2579185520362, "grad_norm": 7.776438236236572, "learning_rate": 0.00011641157171488867, "loss": 1.9705, "step": 42275 }, { "epoch": 38.28054298642534, "grad_norm": 4.122015476226807, "learning_rate": 0.00011636217574438146, "loss": 1.9069, "step": 42300 }, { "epoch": 38.30316742081448, "grad_norm": 3.944101333618164, "learning_rate": 0.00011631275397661664, "loss": 1.8944, "step": 42325 }, { "epoch": 38.32579185520362, "grad_norm": 4.42734432220459, "learning_rate": 0.00011626330644241815, "loss": 1.9438, "step": 42350 }, { "epoch": 38.34841628959276, "grad_norm": 3.9971983432769775, "learning_rate": 0.00011621383317262603, "loss": 1.7437, "step": 42375 }, { "epoch": 38.3710407239819, "grad_norm": 5.1051106452941895, "learning_rate": 0.00011616433419809634, "loss": 1.9386, "step": 42400 }, { "epoch": 38.39366515837104, "grad_norm": 6.243931770324707, "learning_rate": 0.00011611480954970122, "loss": 1.9027, "step": 42425 }, { "epoch": 38.41628959276018, "grad_norm": 4.356624603271484, "learning_rate": 0.0001160652592583288, "loss": 1.9252, "step": 42450 }, { "epoch": 38.43891402714932, "grad_norm": 4.535741806030273, "learning_rate": 0.00011601568335488318, "loss": 2.0281, "step": 42475 }, { "epoch": 38.46153846153846, "grad_norm": 5.314903736114502, "learning_rate": 0.00011596608187028447, "loss": 1.8095, "step": 42500 }, { "epoch": 38.484162895927604, "grad_norm": 5.069538116455078, "learning_rate": 0.0001159164548354687, "loss": 2.0085, "step": 42525 }, { "epoch": 38.50678733031674, "grad_norm": 5.700867176055908, "learning_rate": 0.00011586680228138787, "loss": 1.8026, "step": 42550 }, { "epoch": 38.529411764705884, "grad_norm": 3.9822821617126465, "learning_rate": 0.00011581712423900985, "loss": 1.8676, "step": 42575 }, { "epoch": 38.55203619909502, "grad_norm": 4.560797214508057, "learning_rate": 0.00011576940936769776, "loss": 1.9595, "step": 42600 }, { "epoch": 38.574660633484164, "grad_norm": 5.297098159790039, "learning_rate": 0.00011571968145814983, "loss": 1.8811, "step": 42625 }, { "epoch": 38.5972850678733, "grad_norm": 3.8053483963012695, "learning_rate": 0.00011566992815206284, "loss": 2.0062, "step": 42650 }, { "epoch": 38.619909502262445, "grad_norm": 4.440960884094238, "learning_rate": 0.00011562014948046748, "loss": 2.052, "step": 42675 }, { "epoch": 38.64253393665158, "grad_norm": 6.567005634307861, "learning_rate": 0.00011557034547441034, "loss": 2.0608, "step": 42700 }, { "epoch": 38.665158371040725, "grad_norm": 4.547772407531738, "learning_rate": 0.00011552051616495379, "loss": 1.9677, "step": 42725 }, { "epoch": 38.68778280542986, "grad_norm": 4.409372806549072, "learning_rate": 0.00011547066158317594, "loss": 2.003, "step": 42750 }, { "epoch": 38.710407239819006, "grad_norm": 4.9843058586120605, "learning_rate": 0.00011542078176017068, "loss": 1.9311, "step": 42775 }, { "epoch": 38.73303167420814, "grad_norm": 4.709278106689453, "learning_rate": 0.0001153708767270477, "loss": 1.7398, "step": 42800 }, { "epoch": 38.755656108597286, "grad_norm": 5.142955303192139, "learning_rate": 0.00011532094651493235, "loss": 1.9743, "step": 42825 }, { "epoch": 38.77828054298642, "grad_norm": 5.384269714355469, "learning_rate": 0.00011527099115496569, "loss": 1.8471, "step": 42850 }, { "epoch": 38.800904977375566, "grad_norm": 5.019404411315918, "learning_rate": 0.00011522101067830449, "loss": 1.8774, "step": 42875 }, { "epoch": 38.8235294117647, "grad_norm": 4.817010402679443, "learning_rate": 0.00011517100511612118, "loss": 1.903, "step": 42900 }, { "epoch": 38.84615384615385, "grad_norm": 4.09938907623291, "learning_rate": 0.00011512097449960381, "loss": 2.0208, "step": 42925 }, { "epoch": 38.86877828054298, "grad_norm": 5.036036491394043, "learning_rate": 0.0001150709188599561, "loss": 1.9105, "step": 42950 }, { "epoch": 38.89140271493213, "grad_norm": 4.437981605529785, "learning_rate": 0.00011502083822839734, "loss": 1.9323, "step": 42975 }, { "epoch": 38.914027149321264, "grad_norm": 4.4150390625, "learning_rate": 0.00011497073263616241, "loss": 1.9789, "step": 43000 }, { "epoch": 38.93665158371041, "grad_norm": 4.1862640380859375, "learning_rate": 0.00011492060211450178, "loss": 2.0008, "step": 43025 }, { "epoch": 38.959276018099544, "grad_norm": 4.7120361328125, "learning_rate": 0.00011487044669468144, "loss": 1.9275, "step": 43050 }, { "epoch": 38.98190045248869, "grad_norm": 4.61646842956543, "learning_rate": 0.00011482026640798293, "loss": 1.926, "step": 43075 }, { "epoch": 39.00452488687783, "grad_norm": 5.012235164642334, "learning_rate": 0.00011477006128570328, "loss": 1.9905, "step": 43100 }, { "epoch": 39.02714932126697, "grad_norm": 4.445367336273193, "learning_rate": 0.00011471983135915506, "loss": 1.7675, "step": 43125 }, { "epoch": 39.04977375565611, "grad_norm": 3.9145455360412598, "learning_rate": 0.00011466957665966624, "loss": 1.6291, "step": 43150 }, { "epoch": 39.07239819004525, "grad_norm": 4.605747699737549, "learning_rate": 0.00011461929721858028, "loss": 1.8455, "step": 43175 }, { "epoch": 39.09502262443439, "grad_norm": 4.1831440925598145, "learning_rate": 0.00011456899306725608, "loss": 1.6843, "step": 43200 }, { "epoch": 39.11764705882353, "grad_norm": 4.9170026779174805, "learning_rate": 0.0001145186642370679, "loss": 1.8931, "step": 43225 }, { "epoch": 39.14027149321267, "grad_norm": 5.320871353149414, "learning_rate": 0.00011446831075940548, "loss": 1.752, "step": 43250 }, { "epoch": 39.16289592760181, "grad_norm": 5.1726975440979, "learning_rate": 0.00011441793266567382, "loss": 1.7615, "step": 43275 }, { "epoch": 39.18552036199095, "grad_norm": 4.3221964836120605, "learning_rate": 0.00011436752998729339, "loss": 1.8699, "step": 43300 }, { "epoch": 39.20814479638009, "grad_norm": 5.191243648529053, "learning_rate": 0.00011431710275569989, "loss": 1.8307, "step": 43325 }, { "epoch": 39.23076923076923, "grad_norm": 5.0341796875, "learning_rate": 0.00011426665100234442, "loss": 1.8825, "step": 43350 }, { "epoch": 39.25339366515837, "grad_norm": 3.7480239868164062, "learning_rate": 0.00011421617475869331, "loss": 1.6761, "step": 43375 }, { "epoch": 39.276018099547514, "grad_norm": 4.302507400512695, "learning_rate": 0.0001141656740562282, "loss": 1.935, "step": 43400 }, { "epoch": 39.29864253393665, "grad_norm": 4.641387462615967, "learning_rate": 0.00011411514892644595, "loss": 1.9015, "step": 43425 }, { "epoch": 39.321266968325794, "grad_norm": 4.742063045501709, "learning_rate": 0.00011406459940085872, "loss": 1.8855, "step": 43450 }, { "epoch": 39.34389140271493, "grad_norm": 6.475007057189941, "learning_rate": 0.0001140140255109938, "loss": 1.7541, "step": 43475 }, { "epoch": 39.366515837104075, "grad_norm": 4.648007869720459, "learning_rate": 0.00011396342728839376, "loss": 1.8593, "step": 43500 }, { "epoch": 39.38914027149321, "grad_norm": 4.917444229125977, "learning_rate": 0.00011391280476461629, "loss": 1.9333, "step": 43525 }, { "epoch": 39.411764705882355, "grad_norm": 5.695059299468994, "learning_rate": 0.00011386215797123425, "loss": 1.8917, "step": 43550 }, { "epoch": 39.43438914027149, "grad_norm": 5.259223937988281, "learning_rate": 0.00011381148693983562, "loss": 1.9861, "step": 43575 }, { "epoch": 39.457013574660635, "grad_norm": 4.331487655639648, "learning_rate": 0.00011376079170202356, "loss": 1.6762, "step": 43600 }, { "epoch": 39.47963800904977, "grad_norm": 5.437529563903809, "learning_rate": 0.00011371007228941624, "loss": 1.8088, "step": 43625 }, { "epoch": 39.502262443438916, "grad_norm": 5.5596022605896, "learning_rate": 0.00011365932873364697, "loss": 1.7223, "step": 43650 }, { "epoch": 39.52488687782805, "grad_norm": 4.620628356933594, "learning_rate": 0.00011360856106636412, "loss": 1.7117, "step": 43675 }, { "epoch": 39.547511312217196, "grad_norm": 4.542058944702148, "learning_rate": 0.00011355776931923104, "loss": 1.9292, "step": 43700 }, { "epoch": 39.57013574660633, "grad_norm": 4.393435955047607, "learning_rate": 0.00011350695352392617, "loss": 1.91, "step": 43725 }, { "epoch": 39.59276018099548, "grad_norm": 5.202274322509766, "learning_rate": 0.00011345611371214287, "loss": 1.7783, "step": 43750 }, { "epoch": 39.61538461538461, "grad_norm": 4.276601791381836, "learning_rate": 0.00011340524991558958, "loss": 1.7813, "step": 43775 }, { "epoch": 39.63800904977376, "grad_norm": 4.053043365478516, "learning_rate": 0.0001133543621659896, "loss": 1.9062, "step": 43800 }, { "epoch": 39.660633484162894, "grad_norm": 5.021280765533447, "learning_rate": 0.00011330345049508122, "loss": 1.7271, "step": 43825 }, { "epoch": 39.68325791855204, "grad_norm": 4.679367542266846, "learning_rate": 0.00011325251493461763, "loss": 1.9449, "step": 43850 }, { "epoch": 39.705882352941174, "grad_norm": 4.967319488525391, "learning_rate": 0.00011320155551636697, "loss": 1.8454, "step": 43875 }, { "epoch": 39.72850678733032, "grad_norm": 5.371568202972412, "learning_rate": 0.00011315057227211218, "loss": 1.9814, "step": 43900 }, { "epoch": 39.751131221719454, "grad_norm": 4.940624237060547, "learning_rate": 0.00011309956523365114, "loss": 1.8542, "step": 43925 }, { "epoch": 39.7737556561086, "grad_norm": 5.8213043212890625, "learning_rate": 0.0001130485344327965, "loss": 1.8071, "step": 43950 }, { "epoch": 39.796380090497735, "grad_norm": 4.60387659072876, "learning_rate": 0.00011299747990137579, "loss": 1.8441, "step": 43975 }, { "epoch": 39.81900452488688, "grad_norm": 4.248369216918945, "learning_rate": 0.00011294640167123127, "loss": 1.7951, "step": 44000 }, { "epoch": 39.841628959276015, "grad_norm": 5.201953887939453, "learning_rate": 0.00011289529977422006, "loss": 1.8703, "step": 44025 }, { "epoch": 39.86425339366516, "grad_norm": 3.783778190612793, "learning_rate": 0.00011284417424221399, "loss": 1.8602, "step": 44050 }, { "epoch": 39.886877828054295, "grad_norm": 5.063864707946777, "learning_rate": 0.00011279302510709964, "loss": 1.8747, "step": 44075 }, { "epoch": 39.90950226244344, "grad_norm": 5.825732707977295, "learning_rate": 0.00011274185240077831, "loss": 1.8468, "step": 44100 }, { "epoch": 39.932126696832576, "grad_norm": 4.459935188293457, "learning_rate": 0.00011269065615516604, "loss": 1.8034, "step": 44125 }, { "epoch": 39.95475113122172, "grad_norm": 4.279222011566162, "learning_rate": 0.00011263943640219348, "loss": 1.7833, "step": 44150 }, { "epoch": 39.977375565610856, "grad_norm": 4.123258590698242, "learning_rate": 0.00011258819317380599, "loss": 2.0082, "step": 44175 }, { "epoch": 40.0, "grad_norm": 3.5205819606781006, "learning_rate": 0.00011253692650196358, "loss": 1.7227, "step": 44200 }, { "epoch": 40.022624434389144, "grad_norm": 4.596939563751221, "learning_rate": 0.00011248563641864084, "loss": 1.8559, "step": 44225 }, { "epoch": 40.04524886877828, "grad_norm": 5.928715229034424, "learning_rate": 0.000112434322955827, "loss": 1.5835, "step": 44250 }, { "epoch": 40.067873303167424, "grad_norm": 5.3721699714660645, "learning_rate": 0.00011238298614552586, "loss": 1.7946, "step": 44275 }, { "epoch": 40.09049773755656, "grad_norm": 5.1657633781433105, "learning_rate": 0.00011233162601975576, "loss": 1.5946, "step": 44300 }, { "epoch": 40.113122171945705, "grad_norm": 5.189513206481934, "learning_rate": 0.0001122802426105496, "loss": 1.7134, "step": 44325 }, { "epoch": 40.13574660633484, "grad_norm": 4.161139488220215, "learning_rate": 0.00011222883594995482, "loss": 1.7487, "step": 44350 }, { "epoch": 40.158371040723985, "grad_norm": 4.272087097167969, "learning_rate": 0.0001121774060700333, "loss": 1.7234, "step": 44375 }, { "epoch": 40.18099547511312, "grad_norm": 3.691531181335449, "learning_rate": 0.0001121259530028615, "loss": 1.6367, "step": 44400 }, { "epoch": 40.203619909502265, "grad_norm": 4.305932998657227, "learning_rate": 0.00011207447678053024, "loss": 1.646, "step": 44425 }, { "epoch": 40.2262443438914, "grad_norm": 4.7150397300720215, "learning_rate": 0.00011202297743514485, "loss": 1.6649, "step": 44450 }, { "epoch": 40.248868778280546, "grad_norm": 5.014179229736328, "learning_rate": 0.00011197145499882505, "loss": 1.7508, "step": 44475 }, { "epoch": 40.27149321266968, "grad_norm": 4.31679105758667, "learning_rate": 0.000111919909503705, "loss": 1.631, "step": 44500 }, { "epoch": 40.294117647058826, "grad_norm": 3.9892804622650146, "learning_rate": 0.00011186834098193317, "loss": 1.7101, "step": 44525 }, { "epoch": 40.31674208144796, "grad_norm": 5.431767463684082, "learning_rate": 0.00011181674946567244, "loss": 1.8287, "step": 44550 }, { "epoch": 40.339366515837106, "grad_norm": 5.472475051879883, "learning_rate": 0.00011176513498710005, "loss": 1.5993, "step": 44575 }, { "epoch": 40.36199095022624, "grad_norm": 4.497899055480957, "learning_rate": 0.00011171349757840752, "loss": 1.7544, "step": 44600 }, { "epoch": 40.38461538461539, "grad_norm": 4.565390110015869, "learning_rate": 0.00011166183727180069, "loss": 1.5822, "step": 44625 }, { "epoch": 40.40723981900452, "grad_norm": 5.392773151397705, "learning_rate": 0.00011161015409949968, "loss": 1.7425, "step": 44650 }, { "epoch": 40.42986425339367, "grad_norm": 5.015474319458008, "learning_rate": 0.00011155844809373889, "loss": 1.8255, "step": 44675 }, { "epoch": 40.452488687782804, "grad_norm": 5.054648399353027, "learning_rate": 0.00011150671928676691, "loss": 1.8321, "step": 44700 }, { "epoch": 40.47511312217195, "grad_norm": 4.974578380584717, "learning_rate": 0.00011145496771084659, "loss": 1.8072, "step": 44725 }, { "epoch": 40.497737556561084, "grad_norm": 5.514859676361084, "learning_rate": 0.00011140319339825497, "loss": 1.6787, "step": 44750 }, { "epoch": 40.52036199095023, "grad_norm": 4.790339946746826, "learning_rate": 0.00011135139638128332, "loss": 1.5476, "step": 44775 }, { "epoch": 40.542986425339365, "grad_norm": 6.611651420593262, "learning_rate": 0.00011129957669223695, "loss": 1.8185, "step": 44800 }, { "epoch": 40.56561085972851, "grad_norm": 4.54742956161499, "learning_rate": 0.00011124773436343543, "loss": 1.8504, "step": 44825 }, { "epoch": 40.588235294117645, "grad_norm": 3.9661245346069336, "learning_rate": 0.0001111958694272124, "loss": 1.8194, "step": 44850 }, { "epoch": 40.61085972850679, "grad_norm": 4.951076507568359, "learning_rate": 0.00011114398191591562, "loss": 2.0075, "step": 44875 }, { "epoch": 40.633484162895925, "grad_norm": 4.756132125854492, "learning_rate": 0.00011109207186190689, "loss": 1.5663, "step": 44900 }, { "epoch": 40.65610859728507, "grad_norm": 5.745034217834473, "learning_rate": 0.00011104013929756209, "loss": 1.7752, "step": 44925 }, { "epoch": 40.678733031674206, "grad_norm": 4.642129421234131, "learning_rate": 0.00011098818425527114, "loss": 1.642, "step": 44950 }, { "epoch": 40.70135746606335, "grad_norm": 4.858119010925293, "learning_rate": 0.00011093620676743805, "loss": 1.7187, "step": 44975 }, { "epoch": 40.723981900452486, "grad_norm": 4.606063365936279, "learning_rate": 0.00011088420686648067, "loss": 1.8942, "step": 45000 }, { "epoch": 40.74660633484163, "grad_norm": 5.150576591491699, "learning_rate": 0.000110832184584831, "loss": 1.8682, "step": 45025 }, { "epoch": 40.76923076923077, "grad_norm": 3.524111032485962, "learning_rate": 0.00011078013995493485, "loss": 1.868, "step": 45050 }, { "epoch": 40.79185520361991, "grad_norm": 5.863454818725586, "learning_rate": 0.00011072807300925209, "loss": 1.8941, "step": 45075 }, { "epoch": 40.81447963800905, "grad_norm": 5.784281253814697, "learning_rate": 0.00011067598378025643, "loss": 1.7836, "step": 45100 }, { "epoch": 40.83710407239819, "grad_norm": 5.31019401550293, "learning_rate": 0.00011062387230043554, "loss": 1.8444, "step": 45125 }, { "epoch": 40.85972850678733, "grad_norm": 4.280027866363525, "learning_rate": 0.00011057173860229088, "loss": 1.647, "step": 45150 }, { "epoch": 40.88235294117647, "grad_norm": 5.887366771697998, "learning_rate": 0.00011051958271833787, "loss": 1.8289, "step": 45175 }, { "epoch": 40.90497737556561, "grad_norm": 4.642131328582764, "learning_rate": 0.00011046740468110568, "loss": 1.8778, "step": 45200 }, { "epoch": 40.92760180995475, "grad_norm": 5.087174892425537, "learning_rate": 0.00011041520452313732, "loss": 1.6238, "step": 45225 }, { "epoch": 40.95022624434389, "grad_norm": 4.7690582275390625, "learning_rate": 0.00011036298227698969, "loss": 1.5938, "step": 45250 }, { "epoch": 40.97285067873303, "grad_norm": 5.323506832122803, "learning_rate": 0.00011031073797523332, "loss": 1.887, "step": 45275 }, { "epoch": 40.99547511312217, "grad_norm": 4.844785690307617, "learning_rate": 0.00011025847165045257, "loss": 1.7679, "step": 45300 }, { "epoch": 41.01809954751131, "grad_norm": 4.683863162994385, "learning_rate": 0.00011020618333524554, "loss": 1.6957, "step": 45325 }, { "epoch": 41.040723981900456, "grad_norm": 2.92931866645813, "learning_rate": 0.00011015387306222402, "loss": 1.6158, "step": 45350 }, { "epoch": 41.06334841628959, "grad_norm": 4.800383567810059, "learning_rate": 0.00011010154086401354, "loss": 1.613, "step": 45375 }, { "epoch": 41.085972850678736, "grad_norm": 5.136124134063721, "learning_rate": 0.00011004918677325321, "loss": 1.4679, "step": 45400 }, { "epoch": 41.10859728506787, "grad_norm": 4.789379596710205, "learning_rate": 0.00010999681082259594, "loss": 1.7905, "step": 45425 }, { "epoch": 41.13122171945702, "grad_norm": 4.386842727661133, "learning_rate": 0.00010994441304470811, "loss": 1.5171, "step": 45450 }, { "epoch": 41.15384615384615, "grad_norm": 4.789997577667236, "learning_rate": 0.00010989199347226987, "loss": 1.6731, "step": 45475 }, { "epoch": 41.1764705882353, "grad_norm": 4.864757061004639, "learning_rate": 0.00010983955213797482, "loss": 1.5448, "step": 45500 }, { "epoch": 41.199095022624434, "grad_norm": 5.094907760620117, "learning_rate": 0.00010978708907453026, "loss": 1.6458, "step": 45525 }, { "epoch": 41.22171945701358, "grad_norm": 5.919474124908447, "learning_rate": 0.00010973460431465693, "loss": 1.6497, "step": 45550 }, { "epoch": 41.244343891402714, "grad_norm": 5.220949649810791, "learning_rate": 0.00010968209789108917, "loss": 1.634, "step": 45575 }, { "epoch": 41.26696832579186, "grad_norm": 4.432976245880127, "learning_rate": 0.00010962956983657482, "loss": 1.4961, "step": 45600 }, { "epoch": 41.289592760180994, "grad_norm": 4.356385707855225, "learning_rate": 0.00010957702018387521, "loss": 1.5595, "step": 45625 }, { "epoch": 41.31221719457014, "grad_norm": 3.8516087532043457, "learning_rate": 0.00010952444896576515, "loss": 1.6545, "step": 45650 }, { "epoch": 41.334841628959275, "grad_norm": 6.359644412994385, "learning_rate": 0.00010947185621503287, "loss": 1.7344, "step": 45675 }, { "epoch": 41.35746606334842, "grad_norm": 5.247097492218018, "learning_rate": 0.00010941924196448005, "loss": 1.5788, "step": 45700 }, { "epoch": 41.380090497737555, "grad_norm": 4.667223930358887, "learning_rate": 0.00010936660624692176, "loss": 1.7606, "step": 45725 }, { "epoch": 41.4027149321267, "grad_norm": 4.9451518058776855, "learning_rate": 0.0001093139490951865, "loss": 1.6596, "step": 45750 }, { "epoch": 41.425339366515836, "grad_norm": 4.578155517578125, "learning_rate": 0.00010926127054211612, "loss": 1.5681, "step": 45775 }, { "epoch": 41.44796380090498, "grad_norm": 5.186984539031982, "learning_rate": 0.00010920857062056577, "loss": 1.7949, "step": 45800 }, { "epoch": 41.470588235294116, "grad_norm": 4.541292190551758, "learning_rate": 0.00010915584936340401, "loss": 1.6994, "step": 45825 }, { "epoch": 41.49321266968326, "grad_norm": 4.632137298583984, "learning_rate": 0.00010910310680351266, "loss": 1.8626, "step": 45850 }, { "epoch": 41.515837104072396, "grad_norm": 4.453769207000732, "learning_rate": 0.00010905034297378684, "loss": 1.7099, "step": 45875 }, { "epoch": 41.53846153846154, "grad_norm": 5.0133867263793945, "learning_rate": 0.00010899755790713488, "loss": 1.7266, "step": 45900 }, { "epoch": 41.56108597285068, "grad_norm": 4.538997173309326, "learning_rate": 0.00010894475163647845, "loss": 1.67, "step": 45925 }, { "epoch": 41.58371040723982, "grad_norm": 5.796776294708252, "learning_rate": 0.00010889192419475238, "loss": 1.6908, "step": 45950 }, { "epoch": 41.60633484162896, "grad_norm": 6.282868385314941, "learning_rate": 0.00010883907561490472, "loss": 1.6817, "step": 45975 }, { "epoch": 41.6289592760181, "grad_norm": 5.680196762084961, "learning_rate": 0.00010878620592989672, "loss": 1.7501, "step": 46000 }, { "epoch": 41.65158371040724, "grad_norm": 4.4939398765563965, "learning_rate": 0.00010873331517270277, "loss": 1.6653, "step": 46025 }, { "epoch": 41.67420814479638, "grad_norm": 5.307185649871826, "learning_rate": 0.00010868040337631042, "loss": 1.8483, "step": 46050 }, { "epoch": 41.69683257918552, "grad_norm": 5.628327369689941, "learning_rate": 0.00010862747057372032, "loss": 1.6202, "step": 46075 }, { "epoch": 41.71945701357466, "grad_norm": 5.606970310211182, "learning_rate": 0.00010857451679794621, "loss": 1.5604, "step": 46100 }, { "epoch": 41.7420814479638, "grad_norm": 5.794508934020996, "learning_rate": 0.00010852154208201502, "loss": 1.6942, "step": 46125 }, { "epoch": 41.76470588235294, "grad_norm": 4.090907573699951, "learning_rate": 0.00010846854645896657, "loss": 1.5644, "step": 46150 }, { "epoch": 41.78733031674208, "grad_norm": 4.428072452545166, "learning_rate": 0.00010841552996185383, "loss": 1.5533, "step": 46175 }, { "epoch": 41.80995475113122, "grad_norm": 5.153327941894531, "learning_rate": 0.00010836249262374277, "loss": 1.5703, "step": 46200 }, { "epoch": 41.83257918552036, "grad_norm": 4.66979455947876, "learning_rate": 0.00010830943447771238, "loss": 1.8543, "step": 46225 }, { "epoch": 41.8552036199095, "grad_norm": 4.50140905380249, "learning_rate": 0.00010825635555685456, "loss": 1.6253, "step": 46250 }, { "epoch": 41.87782805429864, "grad_norm": 3.9756264686584473, "learning_rate": 0.00010820325589427422, "loss": 1.6251, "step": 46275 }, { "epoch": 41.90045248868778, "grad_norm": 5.265647888183594, "learning_rate": 0.00010815013552308918, "loss": 1.6218, "step": 46300 }, { "epoch": 41.92307692307692, "grad_norm": 5.2123308181762695, "learning_rate": 0.00010809699447643023, "loss": 1.705, "step": 46325 }, { "epoch": 41.94570135746606, "grad_norm": 2.903510808944702, "learning_rate": 0.000108043832787441, "loss": 1.6392, "step": 46350 }, { "epoch": 41.9683257918552, "grad_norm": 4.76161003112793, "learning_rate": 0.00010799065048927798, "loss": 1.8432, "step": 46375 }, { "epoch": 41.990950226244344, "grad_norm": 4.800807476043701, "learning_rate": 0.00010793744761511057, "loss": 1.7893, "step": 46400 }, { "epoch": 42.01357466063349, "grad_norm": 4.19058895111084, "learning_rate": 0.00010788422419812098, "loss": 1.5279, "step": 46425 }, { "epoch": 42.036199095022624, "grad_norm": 5.557961940765381, "learning_rate": 0.0001078309802715042, "loss": 1.6492, "step": 46450 }, { "epoch": 42.05882352941177, "grad_norm": 4.525270938873291, "learning_rate": 0.00010777771586846808, "loss": 1.3892, "step": 46475 }, { "epoch": 42.081447963800905, "grad_norm": 5.34505033493042, "learning_rate": 0.00010772443102223318, "loss": 1.7326, "step": 46500 }, { "epoch": 42.10407239819005, "grad_norm": 5.436420440673828, "learning_rate": 0.00010767112576603282, "loss": 1.4617, "step": 46525 }, { "epoch": 42.126696832579185, "grad_norm": 4.170498371124268, "learning_rate": 0.00010761780013311307, "loss": 1.6095, "step": 46550 }, { "epoch": 42.14932126696833, "grad_norm": 6.534799575805664, "learning_rate": 0.00010756445415673272, "loss": 1.5952, "step": 46575 }, { "epoch": 42.171945701357465, "grad_norm": 4.798860549926758, "learning_rate": 0.00010751108787016321, "loss": 1.5917, "step": 46600 }, { "epoch": 42.19457013574661, "grad_norm": 4.602447032928467, "learning_rate": 0.00010745770130668866, "loss": 1.4728, "step": 46625 }, { "epoch": 42.217194570135746, "grad_norm": 4.544776439666748, "learning_rate": 0.00010740429449960586, "loss": 1.4985, "step": 46650 }, { "epoch": 42.23981900452489, "grad_norm": 7.215638637542725, "learning_rate": 0.00010735086748222419, "loss": 1.5785, "step": 46675 }, { "epoch": 42.262443438914026, "grad_norm": 4.556062698364258, "learning_rate": 0.00010729742028786567, "loss": 1.5428, "step": 46700 }, { "epoch": 42.28506787330317, "grad_norm": 4.447551727294922, "learning_rate": 0.00010724395294986487, "loss": 1.6337, "step": 46725 }, { "epoch": 42.30769230769231, "grad_norm": 3.2624270915985107, "learning_rate": 0.00010719046550156895, "loss": 1.5049, "step": 46750 }, { "epoch": 42.33031674208145, "grad_norm": 5.950804233551025, "learning_rate": 0.00010713695797633759, "loss": 1.612, "step": 46775 }, { "epoch": 42.35294117647059, "grad_norm": 4.925538063049316, "learning_rate": 0.00010708343040754303, "loss": 1.6729, "step": 46800 }, { "epoch": 42.37556561085973, "grad_norm": 4.276064395904541, "learning_rate": 0.00010702988282856997, "loss": 1.5955, "step": 46825 }, { "epoch": 42.39819004524887, "grad_norm": 5.456986427307129, "learning_rate": 0.00010697631527281561, "loss": 1.4984, "step": 46850 }, { "epoch": 42.42081447963801, "grad_norm": 4.983468532562256, "learning_rate": 0.0001069227277736896, "loss": 1.6025, "step": 46875 }, { "epoch": 42.44343891402715, "grad_norm": 5.594575881958008, "learning_rate": 0.00010686912036461401, "loss": 1.5873, "step": 46900 }, { "epoch": 42.46606334841629, "grad_norm": 3.534917116165161, "learning_rate": 0.00010681549307902341, "loss": 1.7042, "step": 46925 }, { "epoch": 42.48868778280543, "grad_norm": 4.687277793884277, "learning_rate": 0.00010676184595036465, "loss": 1.4687, "step": 46950 }, { "epoch": 42.51131221719457, "grad_norm": 4.967089653015137, "learning_rate": 0.00010670817901209707, "loss": 1.5756, "step": 46975 }, { "epoch": 42.53393665158371, "grad_norm": 4.12251091003418, "learning_rate": 0.00010665449229769228, "loss": 1.5803, "step": 47000 }, { "epoch": 42.55656108597285, "grad_norm": 4.591906547546387, "learning_rate": 0.00010660078584063423, "loss": 1.7013, "step": 47025 }, { "epoch": 42.57918552036199, "grad_norm": 4.8183393478393555, "learning_rate": 0.00010654705967441924, "loss": 1.566, "step": 47050 }, { "epoch": 42.60180995475113, "grad_norm": 4.178544998168945, "learning_rate": 0.00010649331383255589, "loss": 1.514, "step": 47075 }, { "epoch": 42.62443438914027, "grad_norm": 6.708840370178223, "learning_rate": 0.00010643954834856499, "loss": 1.7042, "step": 47100 }, { "epoch": 42.64705882352941, "grad_norm": 4.6382598876953125, "learning_rate": 0.0001063857632559797, "loss": 1.5801, "step": 47125 }, { "epoch": 42.66968325791855, "grad_norm": 5.215579032897949, "learning_rate": 0.0001063341111504707, "loss": 1.5118, "step": 47150 }, { "epoch": 42.69230769230769, "grad_norm": 5.23929500579834, "learning_rate": 0.00010628028772235998, "loss": 1.5391, "step": 47175 }, { "epoch": 42.71493212669683, "grad_norm": 4.934380531311035, "learning_rate": 0.00010622644478498442, "loss": 1.6349, "step": 47200 }, { "epoch": 42.737556561085974, "grad_norm": 5.085113525390625, "learning_rate": 0.00010617258237192542, "loss": 1.7049, "step": 47225 }, { "epoch": 42.76018099547511, "grad_norm": 6.066861629486084, "learning_rate": 0.00010611870051677655, "loss": 1.5581, "step": 47250 }, { "epoch": 42.782805429864254, "grad_norm": 5.6061601638793945, "learning_rate": 0.00010606479925314348, "loss": 1.4464, "step": 47275 }, { "epoch": 42.80542986425339, "grad_norm": 5.139316558837891, "learning_rate": 0.000106010878614644, "loss": 1.5885, "step": 47300 }, { "epoch": 42.828054298642535, "grad_norm": 3.9583466053009033, "learning_rate": 0.00010595693863490798, "loss": 1.6028, "step": 47325 }, { "epoch": 42.85067873303167, "grad_norm": 4.169937610626221, "learning_rate": 0.00010590297934757735, "loss": 1.6479, "step": 47350 }, { "epoch": 42.873303167420815, "grad_norm": 5.27691650390625, "learning_rate": 0.0001058490007863061, "loss": 1.6315, "step": 47375 }, { "epoch": 42.89592760180995, "grad_norm": 5.77647590637207, "learning_rate": 0.0001057950029847602, "loss": 1.6568, "step": 47400 }, { "epoch": 42.918552036199095, "grad_norm": 5.759611129760742, "learning_rate": 0.00010574098597661768, "loss": 1.7188, "step": 47425 }, { "epoch": 42.94117647058823, "grad_norm": 5.73583984375, "learning_rate": 0.00010568694979556849, "loss": 1.6243, "step": 47450 }, { "epoch": 42.963800904977376, "grad_norm": 4.507272243499756, "learning_rate": 0.00010563289447531457, "loss": 1.7092, "step": 47475 }, { "epoch": 42.98642533936652, "grad_norm": 5.818449020385742, "learning_rate": 0.00010557882004956979, "loss": 1.431, "step": 47500 }, { "epoch": 43.009049773755656, "grad_norm": 5.47661018371582, "learning_rate": 0.00010552472655205996, "loss": 1.4526, "step": 47525 }, { "epoch": 43.0316742081448, "grad_norm": 5.197766304016113, "learning_rate": 0.00010547061401652269, "loss": 1.4359, "step": 47550 }, { "epoch": 43.05429864253394, "grad_norm": 3.4543919563293457, "learning_rate": 0.00010541648247670762, "loss": 1.5048, "step": 47575 }, { "epoch": 43.07692307692308, "grad_norm": 6.372855186462402, "learning_rate": 0.00010536233196637611, "loss": 1.5558, "step": 47600 }, { "epoch": 43.09954751131222, "grad_norm": 5.4943413734436035, "learning_rate": 0.0001053081625193014, "loss": 1.4904, "step": 47625 }, { "epoch": 43.12217194570136, "grad_norm": 3.5853707790374756, "learning_rate": 0.00010525397416926856, "loss": 1.4444, "step": 47650 }, { "epoch": 43.1447963800905, "grad_norm": 5.288698196411133, "learning_rate": 0.00010519976695007442, "loss": 1.4926, "step": 47675 }, { "epoch": 43.16742081447964, "grad_norm": 5.487499713897705, "learning_rate": 0.00010514554089552758, "loss": 1.5038, "step": 47700 }, { "epoch": 43.19004524886878, "grad_norm": 5.475346088409424, "learning_rate": 0.00010509129603944842, "loss": 1.4347, "step": 47725 }, { "epoch": 43.21266968325792, "grad_norm": 4.218531608581543, "learning_rate": 0.00010503703241566899, "loss": 1.6238, "step": 47750 }, { "epoch": 43.23529411764706, "grad_norm": 5.887923717498779, "learning_rate": 0.0001049827500580331, "loss": 1.5715, "step": 47775 }, { "epoch": 43.2579185520362, "grad_norm": 4.928927898406982, "learning_rate": 0.00010492844900039621, "loss": 1.5173, "step": 47800 }, { "epoch": 43.28054298642534, "grad_norm": 4.930531024932861, "learning_rate": 0.00010487412927662547, "loss": 1.5222, "step": 47825 }, { "epoch": 43.30316742081448, "grad_norm": 4.241865158081055, "learning_rate": 0.00010481979092059963, "loss": 1.4223, "step": 47850 }, { "epoch": 43.32579185520362, "grad_norm": 5.365390777587891, "learning_rate": 0.00010476543396620911, "loss": 1.5511, "step": 47875 }, { "epoch": 43.34841628959276, "grad_norm": 4.603792190551758, "learning_rate": 0.00010471105844735592, "loss": 1.4558, "step": 47900 }, { "epoch": 43.3710407239819, "grad_norm": 5.6232781410217285, "learning_rate": 0.00010465666439795359, "loss": 1.4777, "step": 47925 }, { "epoch": 43.39366515837104, "grad_norm": 5.014750003814697, "learning_rate": 0.00010460225185192727, "loss": 1.664, "step": 47950 }, { "epoch": 43.41628959276018, "grad_norm": 4.773794651031494, "learning_rate": 0.00010454782084321365, "loss": 1.4406, "step": 47975 }, { "epoch": 43.43891402714932, "grad_norm": 5.268489837646484, "learning_rate": 0.0001044933714057609, "loss": 1.3602, "step": 48000 }, { "epoch": 43.46153846153846, "grad_norm": 3.3007686138153076, "learning_rate": 0.0001044389035735287, "loss": 1.4394, "step": 48025 }, { "epoch": 43.484162895927604, "grad_norm": 4.0678253173828125, "learning_rate": 0.0001043844173804882, "loss": 1.513, "step": 48050 }, { "epoch": 43.50678733031674, "grad_norm": 4.807238578796387, "learning_rate": 0.00010432991286062201, "loss": 1.4545, "step": 48075 }, { "epoch": 43.529411764705884, "grad_norm": 4.851547718048096, "learning_rate": 0.00010427539004792414, "loss": 1.5125, "step": 48100 }, { "epoch": 43.55203619909502, "grad_norm": 4.754851341247559, "learning_rate": 0.00010422084897640007, "loss": 1.5075, "step": 48125 }, { "epoch": 43.574660633484164, "grad_norm": 5.759084701538086, "learning_rate": 0.00010416628968006659, "loss": 1.498, "step": 48150 }, { "epoch": 43.5972850678733, "grad_norm": 5.977701663970947, "learning_rate": 0.0001041117121929519, "loss": 1.6014, "step": 48175 }, { "epoch": 43.619909502262445, "grad_norm": 6.444182872772217, "learning_rate": 0.00010405711654909558, "loss": 1.3752, "step": 48200 }, { "epoch": 43.64253393665158, "grad_norm": 4.371002197265625, "learning_rate": 0.00010400250278254844, "loss": 1.3662, "step": 48225 }, { "epoch": 43.665158371040725, "grad_norm": 4.785689830780029, "learning_rate": 0.00010394787092737267, "loss": 1.5946, "step": 48250 }, { "epoch": 43.68778280542986, "grad_norm": 5.227655410766602, "learning_rate": 0.00010389322101764175, "loss": 1.4658, "step": 48275 }, { "epoch": 43.710407239819006, "grad_norm": 4.917147159576416, "learning_rate": 0.00010383855308744037, "loss": 1.4717, "step": 48300 }, { "epoch": 43.73303167420814, "grad_norm": 5.020429611206055, "learning_rate": 0.00010378386717086447, "loss": 1.6081, "step": 48325 }, { "epoch": 43.755656108597286, "grad_norm": 5.552148818969727, "learning_rate": 0.00010372916330202122, "loss": 1.5598, "step": 48350 }, { "epoch": 43.77828054298642, "grad_norm": 4.826664924621582, "learning_rate": 0.00010367444151502902, "loss": 1.5655, "step": 48375 }, { "epoch": 43.800904977375566, "grad_norm": 4.741078853607178, "learning_rate": 0.00010361970184401735, "loss": 1.5584, "step": 48400 }, { "epoch": 43.8235294117647, "grad_norm": 5.433865070343018, "learning_rate": 0.00010356494432312695, "loss": 1.4365, "step": 48425 }, { "epoch": 43.84615384615385, "grad_norm": 4.902193546295166, "learning_rate": 0.00010351016898650963, "loss": 1.6606, "step": 48450 }, { "epoch": 43.86877828054298, "grad_norm": 5.173399925231934, "learning_rate": 0.00010345537586832833, "loss": 1.5314, "step": 48475 }, { "epoch": 43.89140271493213, "grad_norm": 5.055915832519531, "learning_rate": 0.00010340056500275707, "loss": 1.5808, "step": 48500 }, { "epoch": 43.914027149321264, "grad_norm": 5.21277379989624, "learning_rate": 0.00010334573642398098, "loss": 1.4901, "step": 48525 }, { "epoch": 43.93665158371041, "grad_norm": 5.40691614151001, "learning_rate": 0.00010329089016619616, "loss": 1.6291, "step": 48550 }, { "epoch": 43.959276018099544, "grad_norm": 4.3971405029296875, "learning_rate": 0.00010323602626360982, "loss": 1.5533, "step": 48575 }, { "epoch": 43.98190045248869, "grad_norm": 4.630727767944336, "learning_rate": 0.00010318114475044012, "loss": 1.3832, "step": 48600 }, { "epoch": 44.00452488687783, "grad_norm": 4.368161678314209, "learning_rate": 0.00010312624566091621, "loss": 1.5873, "step": 48625 }, { "epoch": 44.02714932126697, "grad_norm": 3.7434568405151367, "learning_rate": 0.00010307132902927823, "loss": 1.4848, "step": 48650 }, { "epoch": 44.04977375565611, "grad_norm": 4.684480667114258, "learning_rate": 0.00010301639488977724, "loss": 1.3814, "step": 48675 }, { "epoch": 44.07239819004525, "grad_norm": 3.173452138900757, "learning_rate": 0.00010296144327667522, "loss": 1.4749, "step": 48700 }, { "epoch": 44.09502262443439, "grad_norm": 5.971280574798584, "learning_rate": 0.00010290647422424504, "loss": 1.3801, "step": 48725 }, { "epoch": 44.11764705882353, "grad_norm": 4.780723571777344, "learning_rate": 0.00010285148776677046, "loss": 1.402, "step": 48750 }, { "epoch": 44.14027149321267, "grad_norm": 4.479318618774414, "learning_rate": 0.00010279648393854613, "loss": 1.4263, "step": 48775 }, { "epoch": 44.16289592760181, "grad_norm": 4.854914665222168, "learning_rate": 0.00010274146277387746, "loss": 1.4235, "step": 48800 }, { "epoch": 44.18552036199095, "grad_norm": 4.779385566711426, "learning_rate": 0.0001026864243070807, "loss": 1.3708, "step": 48825 }, { "epoch": 44.20814479638009, "grad_norm": 5.719334602355957, "learning_rate": 0.00010263136857248292, "loss": 1.4497, "step": 48850 }, { "epoch": 44.23076923076923, "grad_norm": 5.735396862030029, "learning_rate": 0.00010257629560442195, "loss": 1.2651, "step": 48875 }, { "epoch": 44.25339366515837, "grad_norm": 4.51582670211792, "learning_rate": 0.00010252120543724635, "loss": 1.5399, "step": 48900 }, { "epoch": 44.276018099547514, "grad_norm": 5.506528854370117, "learning_rate": 0.00010246609810531541, "loss": 1.4355, "step": 48925 }, { "epoch": 44.29864253393665, "grad_norm": 5.63154411315918, "learning_rate": 0.00010241097364299913, "loss": 1.3229, "step": 48950 }, { "epoch": 44.321266968325794, "grad_norm": 5.07582950592041, "learning_rate": 0.00010235583208467818, "loss": 1.4395, "step": 48975 }, { "epoch": 44.34389140271493, "grad_norm": 5.703203201293945, "learning_rate": 0.00010230067346474395, "loss": 1.5553, "step": 49000 }, { "epoch": 44.366515837104075, "grad_norm": 4.868218421936035, "learning_rate": 0.00010224549781759842, "loss": 1.3299, "step": 49025 }, { "epoch": 44.38914027149321, "grad_norm": 5.52424430847168, "learning_rate": 0.00010219030517765418, "loss": 1.52, "step": 49050 }, { "epoch": 44.411764705882355, "grad_norm": 6.301329135894775, "learning_rate": 0.00010213509557933443, "loss": 1.3564, "step": 49075 }, { "epoch": 44.43438914027149, "grad_norm": 5.152988433837891, "learning_rate": 0.00010207986905707296, "loss": 1.4055, "step": 49100 }, { "epoch": 44.457013574660635, "grad_norm": 5.810203552246094, "learning_rate": 0.00010202462564531415, "loss": 1.4116, "step": 49125 }, { "epoch": 44.47963800904977, "grad_norm": 5.6694655418396, "learning_rate": 0.00010196936537851282, "loss": 1.5294, "step": 49150 }, { "epoch": 44.502262443438916, "grad_norm": 5.7976789474487305, "learning_rate": 0.00010191408829113439, "loss": 1.5283, "step": 49175 }, { "epoch": 44.52488687782805, "grad_norm": 5.283827781677246, "learning_rate": 0.0001018587944176547, "loss": 1.3695, "step": 49200 }, { "epoch": 44.547511312217196, "grad_norm": 5.265637397766113, "learning_rate": 0.00010180348379256013, "loss": 1.3707, "step": 49225 }, { "epoch": 44.57013574660633, "grad_norm": 4.790452003479004, "learning_rate": 0.00010174815645034747, "loss": 1.5486, "step": 49250 }, { "epoch": 44.59276018099548, "grad_norm": 4.9435248374938965, "learning_rate": 0.00010169281242552394, "loss": 1.4313, "step": 49275 }, { "epoch": 44.61538461538461, "grad_norm": 5.264336109161377, "learning_rate": 0.00010163745175260714, "loss": 1.4269, "step": 49300 }, { "epoch": 44.63800904977376, "grad_norm": 4.573849678039551, "learning_rate": 0.00010158207446612511, "loss": 1.4565, "step": 49325 }, { "epoch": 44.660633484162894, "grad_norm": 4.074705123901367, "learning_rate": 0.00010152668060061618, "loss": 1.5345, "step": 49350 }, { "epoch": 44.68325791855204, "grad_norm": 6.016761779785156, "learning_rate": 0.0001014712701906291, "loss": 1.6035, "step": 49375 }, { "epoch": 44.705882352941174, "grad_norm": 5.152374267578125, "learning_rate": 0.0001014158432707229, "loss": 1.4211, "step": 49400 }, { "epoch": 44.72850678733032, "grad_norm": 5.270075798034668, "learning_rate": 0.00010136039987546688, "loss": 1.4602, "step": 49425 }, { "epoch": 44.751131221719454, "grad_norm": 5.11018180847168, "learning_rate": 0.00010130494003944063, "loss": 1.3028, "step": 49450 }, { "epoch": 44.7737556561086, "grad_norm": 4.941214561462402, "learning_rate": 0.00010124946379723408, "loss": 1.4229, "step": 49475 }, { "epoch": 44.796380090497735, "grad_norm": 4.68281888961792, "learning_rate": 0.00010119397118344723, "loss": 1.4369, "step": 49500 }, { "epoch": 44.81900452488688, "grad_norm": 4.22309684753418, "learning_rate": 0.00010113846223269042, "loss": 1.4697, "step": 49525 }, { "epoch": 44.841628959276015, "grad_norm": 5.230871200561523, "learning_rate": 0.00010108293697958412, "loss": 1.5112, "step": 49550 }, { "epoch": 44.86425339366516, "grad_norm": 4.829254627227783, "learning_rate": 0.00010102739545875901, "loss": 1.424, "step": 49575 }, { "epoch": 44.886877828054295, "grad_norm": 5.477245807647705, "learning_rate": 0.00010097183770485589, "loss": 1.5255, "step": 49600 }, { "epoch": 44.90950226244344, "grad_norm": 4.841703414916992, "learning_rate": 0.00010091626375252565, "loss": 1.5065, "step": 49625 }, { "epoch": 44.932126696832576, "grad_norm": 5.197177410125732, "learning_rate": 0.00010086067363642935, "loss": 1.3484, "step": 49650 }, { "epoch": 44.95475113122172, "grad_norm": 5.4379682540893555, "learning_rate": 0.0001008050673912381, "loss": 1.5079, "step": 49675 }, { "epoch": 44.977375565610856, "grad_norm": 5.954039573669434, "learning_rate": 0.00010074944505163306, "loss": 1.4882, "step": 49700 }, { "epoch": 45.0, "grad_norm": 5.441366672515869, "learning_rate": 0.00010069380665230545, "loss": 1.5511, "step": 49725 }, { "epoch": 45.022624434389144, "grad_norm": NaN, "learning_rate": 0.00010064037871217546, "loss": 1.2875, "step": 49750 }, { "epoch": 45.04524886877828, "grad_norm": 8.035948753356934, "learning_rate": 0.00010058470893646217, "loss": 1.4301, "step": 49775 }, { "epoch": 45.067873303167424, "grad_norm": 5.814290523529053, "learning_rate": 0.0001005290232037709, "loss": 1.3189, "step": 49800 }, { "epoch": 45.09049773755656, "grad_norm": 4.740077018737793, "learning_rate": 0.0001004733215488324, "loss": 1.346, "step": 49825 }, { "epoch": 45.113122171945705, "grad_norm": 5.189807891845703, "learning_rate": 0.0001004176040063873, "loss": 1.2482, "step": 49850 }, { "epoch": 45.13574660633484, "grad_norm": 5.7160491943359375, "learning_rate": 0.00010036187061118628, "loss": 1.4034, "step": 49875 }, { "epoch": 45.158371040723985, "grad_norm": 3.8227548599243164, "learning_rate": 0.00010030612139798972, "loss": 1.2693, "step": 49900 }, { "epoch": 45.18099547511312, "grad_norm": 4.569591522216797, "learning_rate": 0.000100250356401568, "loss": 1.3758, "step": 49925 }, { "epoch": 45.203619909502265, "grad_norm": 5.352408409118652, "learning_rate": 0.00010019457565670129, "loss": 1.3286, "step": 49950 }, { "epoch": 45.2262443438914, "grad_norm": 4.980902671813965, "learning_rate": 0.00010013877919817958, "loss": 1.3187, "step": 49975 }, { "epoch": 45.248868778280546, "grad_norm": 5.208808898925781, "learning_rate": 0.00010008296706080273, "loss": 1.4312, "step": 50000 }, { "epoch": 45.27149321266968, "grad_norm": 3.345900058746338, "learning_rate": 0.00010002713927938026, "loss": 1.3032, "step": 50025 }, { "epoch": 45.294117647058826, "grad_norm": 4.701058387756348, "learning_rate": 9.997129588873153e-05, "loss": 1.4663, "step": 50050 }, { "epoch": 45.31674208144796, "grad_norm": 4.687870979309082, "learning_rate": 9.991543692368565e-05, "loss": 1.1913, "step": 50075 }, { "epoch": 45.339366515837106, "grad_norm": 5.097414493560791, "learning_rate": 9.985956241908134e-05, "loss": 1.488, "step": 50100 }, { "epoch": 45.36199095022624, "grad_norm": 5.062108516693115, "learning_rate": 9.980367240976714e-05, "loss": 1.3711, "step": 50125 }, { "epoch": 45.38461538461539, "grad_norm": 4.611981391906738, "learning_rate": 9.974776693060117e-05, "loss": 1.3808, "step": 50150 }, { "epoch": 45.40723981900452, "grad_norm": 6.802997589111328, "learning_rate": 9.969184601645124e-05, "loss": 1.2606, "step": 50175 }, { "epoch": 45.42986425339367, "grad_norm": 4.539482593536377, "learning_rate": 9.963590970219478e-05, "loss": 1.4523, "step": 50200 }, { "epoch": 45.452488687782804, "grad_norm": 4.706707000732422, "learning_rate": 9.957995802271883e-05, "loss": 1.3775, "step": 50225 }, { "epoch": 45.47511312217195, "grad_norm": 5.560224533081055, "learning_rate": 9.952399101291996e-05, "loss": 1.3178, "step": 50250 }, { "epoch": 45.497737556561084, "grad_norm": 5.96610164642334, "learning_rate": 9.94680087077044e-05, "loss": 1.2672, "step": 50275 }, { "epoch": 45.52036199095023, "grad_norm": 4.098049163818359, "learning_rate": 9.941201114198785e-05, "loss": 1.3768, "step": 50300 }, { "epoch": 45.542986425339365, "grad_norm": 4.603151321411133, "learning_rate": 9.935599835069552e-05, "loss": 1.3075, "step": 50325 }, { "epoch": 45.56561085972851, "grad_norm": 4.098180770874023, "learning_rate": 9.929997036876215e-05, "loss": 1.2581, "step": 50350 }, { "epoch": 45.588235294117645, "grad_norm": 4.840689182281494, "learning_rate": 9.924392723113195e-05, "loss": 1.3037, "step": 50375 }, { "epoch": 45.61085972850679, "grad_norm": 4.30715799331665, "learning_rate": 9.918786897275859e-05, "loss": 1.3482, "step": 50400 }, { "epoch": 45.633484162895925, "grad_norm": 5.419538497924805, "learning_rate": 9.913179562860512e-05, "loss": 1.3613, "step": 50425 }, { "epoch": 45.65610859728507, "grad_norm": 5.290268898010254, "learning_rate": 9.907570723364405e-05, "loss": 1.3416, "step": 50450 }, { "epoch": 45.678733031674206, "grad_norm": 4.399529933929443, "learning_rate": 9.901960382285728e-05, "loss": 1.3726, "step": 50475 }, { "epoch": 45.70135746606335, "grad_norm": 5.522080898284912, "learning_rate": 9.896348543123606e-05, "loss": 1.4135, "step": 50500 }, { "epoch": 45.723981900452486, "grad_norm": 5.97099494934082, "learning_rate": 9.890735209378095e-05, "loss": 1.4295, "step": 50525 }, { "epoch": 45.74660633484163, "grad_norm": 5.0166120529174805, "learning_rate": 9.885120384550189e-05, "loss": 1.387, "step": 50550 }, { "epoch": 45.76923076923077, "grad_norm": 4.919198989868164, "learning_rate": 9.879504072141808e-05, "loss": 1.4421, "step": 50575 }, { "epoch": 45.79185520361991, "grad_norm": 5.102237224578857, "learning_rate": 9.873886275655801e-05, "loss": 1.4403, "step": 50600 }, { "epoch": 45.81447963800905, "grad_norm": 5.842960834503174, "learning_rate": 9.868266998595943e-05, "loss": 1.3875, "step": 50625 }, { "epoch": 45.83710407239819, "grad_norm": 5.246617317199707, "learning_rate": 9.862646244466932e-05, "loss": 1.5681, "step": 50650 }, { "epoch": 45.85972850678733, "grad_norm": 5.355321884155273, "learning_rate": 9.857024016774387e-05, "loss": 1.3422, "step": 50675 }, { "epoch": 45.88235294117647, "grad_norm": 5.763421058654785, "learning_rate": 9.851400319024845e-05, "loss": 1.3366, "step": 50700 }, { "epoch": 45.90497737556561, "grad_norm": 5.695327281951904, "learning_rate": 9.845775154725766e-05, "loss": 1.394, "step": 50725 }, { "epoch": 45.92760180995475, "grad_norm": 5.689870357513428, "learning_rate": 9.840148527385517e-05, "loss": 1.3939, "step": 50750 }, { "epoch": 45.95022624434389, "grad_norm": 4.690075397491455, "learning_rate": 9.834520440513379e-05, "loss": 1.5407, "step": 50775 }, { "epoch": 45.97285067873303, "grad_norm": 3.7511346340179443, "learning_rate": 9.828890897619545e-05, "loss": 1.4854, "step": 50800 }, { "epoch": 45.99547511312217, "grad_norm": 4.550795555114746, "learning_rate": 9.82325990221512e-05, "loss": 1.405, "step": 50825 }, { "epoch": 46.01809954751131, "grad_norm": 5.129349231719971, "learning_rate": 9.817627457812105e-05, "loss": 1.31, "step": 50850 }, { "epoch": 46.040723981900456, "grad_norm": 4.822476387023926, "learning_rate": 9.811993567923413e-05, "loss": 1.2706, "step": 50875 }, { "epoch": 46.06334841628959, "grad_norm": 5.464430809020996, "learning_rate": 9.806358236062858e-05, "loss": 1.184, "step": 50900 }, { "epoch": 46.085972850678736, "grad_norm": 4.488974571228027, "learning_rate": 9.800721465745147e-05, "loss": 1.2153, "step": 50925 }, { "epoch": 46.10859728506787, "grad_norm": 4.2137346267700195, "learning_rate": 9.795083260485891e-05, "loss": 1.2017, "step": 50950 }, { "epoch": 46.13122171945702, "grad_norm": 5.356151580810547, "learning_rate": 9.789443623801593e-05, "loss": 1.2963, "step": 50975 }, { "epoch": 46.15384615384615, "grad_norm": 4.786318302154541, "learning_rate": 9.783802559209652e-05, "loss": 1.284, "step": 51000 }, { "epoch": 46.1764705882353, "grad_norm": 4.5469865798950195, "learning_rate": 9.77816007022835e-05, "loss": 1.304, "step": 51025 }, { "epoch": 46.199095022624434, "grad_norm": 6.047292709350586, "learning_rate": 9.772516160376866e-05, "loss": 1.3066, "step": 51050 }, { "epoch": 46.22171945701358, "grad_norm": 3.767551898956299, "learning_rate": 9.766870833175256e-05, "loss": 1.201, "step": 51075 }, { "epoch": 46.244343891402714, "grad_norm": 4.860236644744873, "learning_rate": 9.76122409214447e-05, "loss": 1.2653, "step": 51100 }, { "epoch": 46.26696832579186, "grad_norm": 4.5802459716796875, "learning_rate": 9.755575940806337e-05, "loss": 1.4322, "step": 51125 }, { "epoch": 46.289592760180994, "grad_norm": 5.008360862731934, "learning_rate": 9.74992638268356e-05, "loss": 1.2949, "step": 51150 }, { "epoch": 46.31221719457014, "grad_norm": 4.551988124847412, "learning_rate": 9.744275421299724e-05, "loss": 1.2047, "step": 51175 }, { "epoch": 46.334841628959275, "grad_norm": 6.203924179077148, "learning_rate": 9.738623060179288e-05, "loss": 1.2815, "step": 51200 }, { "epoch": 46.35746606334842, "grad_norm": 4.722567081451416, "learning_rate": 9.732969302847585e-05, "loss": 1.2592, "step": 51225 }, { "epoch": 46.380090497737555, "grad_norm": 6.056229114532471, "learning_rate": 9.727314152830819e-05, "loss": 1.3173, "step": 51250 }, { "epoch": 46.4027149321267, "grad_norm": 3.917114019393921, "learning_rate": 9.721657613656058e-05, "loss": 1.2974, "step": 51275 }, { "epoch": 46.425339366515836, "grad_norm": 4.1579203605651855, "learning_rate": 9.715999688851245e-05, "loss": 1.3222, "step": 51300 }, { "epoch": 46.44796380090498, "grad_norm": 5.7063469886779785, "learning_rate": 9.710340381945179e-05, "loss": 1.3636, "step": 51325 }, { "epoch": 46.470588235294116, "grad_norm": 4.55832576751709, "learning_rate": 9.704679696467525e-05, "loss": 1.2854, "step": 51350 }, { "epoch": 46.49321266968326, "grad_norm": 5.123040199279785, "learning_rate": 9.699017635948812e-05, "loss": 1.4846, "step": 51375 }, { "epoch": 46.515837104072396, "grad_norm": 4.968398094177246, "learning_rate": 9.693354203920413e-05, "loss": 1.3019, "step": 51400 }, { "epoch": 46.53846153846154, "grad_norm": 4.814697265625, "learning_rate": 9.687689403914572e-05, "loss": 1.2732, "step": 51425 }, { "epoch": 46.56108597285068, "grad_norm": 5.311944484710693, "learning_rate": 9.682023239464377e-05, "loss": 1.3774, "step": 51450 }, { "epoch": 46.58371040723982, "grad_norm": 4.640413761138916, "learning_rate": 9.676355714103769e-05, "loss": 1.2123, "step": 51475 }, { "epoch": 46.60633484162896, "grad_norm": 4.477193832397461, "learning_rate": 9.670686831367536e-05, "loss": 1.2285, "step": 51500 }, { "epoch": 46.6289592760181, "grad_norm": 4.573556900024414, "learning_rate": 9.665016594791321e-05, "loss": 1.3481, "step": 51525 }, { "epoch": 46.65158371040724, "grad_norm": 4.527987957000732, "learning_rate": 9.659345007911601e-05, "loss": 1.2507, "step": 51550 }, { "epoch": 46.67420814479638, "grad_norm": 5.239626407623291, "learning_rate": 9.6536720742657e-05, "loss": 1.4123, "step": 51575 }, { "epoch": 46.69683257918552, "grad_norm": 5.0842413902282715, "learning_rate": 9.64799779739178e-05, "loss": 1.362, "step": 51600 }, { "epoch": 46.71945701357466, "grad_norm": 5.790838241577148, "learning_rate": 9.642322180828843e-05, "loss": 1.3565, "step": 51625 }, { "epoch": 46.7420814479638, "grad_norm": 5.47201681137085, "learning_rate": 9.636645228116726e-05, "loss": 1.2825, "step": 51650 }, { "epoch": 46.76470588235294, "grad_norm": 3.6880836486816406, "learning_rate": 9.6309669427961e-05, "loss": 1.4405, "step": 51675 }, { "epoch": 46.78733031674208, "grad_norm": 5.187725067138672, "learning_rate": 9.625287328408463e-05, "loss": 1.1651, "step": 51700 }, { "epoch": 46.80995475113122, "grad_norm": 6.163667678833008, "learning_rate": 9.619606388496146e-05, "loss": 1.2395, "step": 51725 }, { "epoch": 46.83257918552036, "grad_norm": 4.501791954040527, "learning_rate": 9.613924126602308e-05, "loss": 1.3948, "step": 51750 }, { "epoch": 46.8552036199095, "grad_norm": 4.55626916885376, "learning_rate": 9.608240546270928e-05, "loss": 1.2392, "step": 51775 }, { "epoch": 46.87782805429864, "grad_norm": 4.100018501281738, "learning_rate": 9.602555651046811e-05, "loss": 1.2744, "step": 51800 }, { "epoch": 46.90045248868778, "grad_norm": 4.403803825378418, "learning_rate": 9.596869444475578e-05, "loss": 1.2697, "step": 51825 }, { "epoch": 46.92307692307692, "grad_norm": 5.647763729095459, "learning_rate": 9.591181930103675e-05, "loss": 1.3068, "step": 51850 }, { "epoch": 46.94570135746606, "grad_norm": 4.663811206817627, "learning_rate": 9.585493111478352e-05, "loss": 1.3152, "step": 51875 }, { "epoch": 46.9683257918552, "grad_norm": 4.8924455642700195, "learning_rate": 9.579802992147688e-05, "loss": 1.3996, "step": 51900 }, { "epoch": 46.990950226244344, "grad_norm": 4.687934398651123, "learning_rate": 9.574111575660559e-05, "loss": 1.2499, "step": 51925 }, { "epoch": 47.01357466063349, "grad_norm": 4.841540813446045, "learning_rate": 9.568418865566658e-05, "loss": 1.2103, "step": 51950 }, { "epoch": 47.036199095022624, "grad_norm": 4.487457275390625, "learning_rate": 9.562724865416483e-05, "loss": 1.1023, "step": 51975 }, { "epoch": 47.05882352941177, "grad_norm": 5.510406017303467, "learning_rate": 9.557029578761332e-05, "loss": 1.2958, "step": 52000 }, { "epoch": 47.081447963800905, "grad_norm": 5.464969635009766, "learning_rate": 9.551333009153317e-05, "loss": 0.9992, "step": 52025 }, { "epoch": 47.10407239819005, "grad_norm": 4.596621513366699, "learning_rate": 9.545635160145339e-05, "loss": 1.1281, "step": 52050 }, { "epoch": 47.126696832579185, "grad_norm": 5.372847557067871, "learning_rate": 9.5399360352911e-05, "loss": 1.4568, "step": 52075 }, { "epoch": 47.14932126696833, "grad_norm": 5.727240085601807, "learning_rate": 9.534235638145098e-05, "loss": 1.2155, "step": 52100 }, { "epoch": 47.171945701357465, "grad_norm": 4.874152183532715, "learning_rate": 9.528533972262628e-05, "loss": 1.2021, "step": 52125 }, { "epoch": 47.19457013574661, "grad_norm": 4.732327938079834, "learning_rate": 9.523059182689132e-05, "loss": 1.3154, "step": 52150 }, { "epoch": 47.217194570135746, "grad_norm": 5.301266670227051, "learning_rate": 9.517355040399401e-05, "loss": 1.0518, "step": 52175 }, { "epoch": 47.23981900452489, "grad_norm": 3.8940746784210205, "learning_rate": 9.511649639901494e-05, "loss": 1.1216, "step": 52200 }, { "epoch": 47.262443438914026, "grad_norm": 3.6668171882629395, "learning_rate": 9.505942984753822e-05, "loss": 1.3145, "step": 52225 }, { "epoch": 47.28506787330317, "grad_norm": 4.5170416831970215, "learning_rate": 9.500235078515583e-05, "loss": 1.3284, "step": 52250 }, { "epoch": 47.30769230769231, "grad_norm": 6.2065863609313965, "learning_rate": 9.494525924746748e-05, "loss": 1.1503, "step": 52275 }, { "epoch": 47.33031674208145, "grad_norm": 4.878929615020752, "learning_rate": 9.488815527008077e-05, "loss": 1.1821, "step": 52300 }, { "epoch": 47.35294117647059, "grad_norm": 5.171248912811279, "learning_rate": 9.483103888861095e-05, "loss": 1.163, "step": 52325 }, { "epoch": 47.37556561085973, "grad_norm": 6.436504364013672, "learning_rate": 9.477391013868105e-05, "loss": 1.2094, "step": 52350 }, { "epoch": 47.39819004524887, "grad_norm": 3.0119566917419434, "learning_rate": 9.471676905592184e-05, "loss": 1.1008, "step": 52375 }, { "epoch": 47.42081447963801, "grad_norm": 4.798369407653809, "learning_rate": 9.46596156759717e-05, "loss": 1.2489, "step": 52400 }, { "epoch": 47.44343891402715, "grad_norm": 4.6900811195373535, "learning_rate": 9.460245003447679e-05, "loss": 1.1736, "step": 52425 }, { "epoch": 47.46606334841629, "grad_norm": 4.819660663604736, "learning_rate": 9.454527216709086e-05, "loss": 1.3886, "step": 52450 }, { "epoch": 47.48868778280543, "grad_norm": 4.714395046234131, "learning_rate": 9.448808210947526e-05, "loss": 1.3167, "step": 52475 }, { "epoch": 47.51131221719457, "grad_norm": 4.665770053863525, "learning_rate": 9.443087989729899e-05, "loss": 1.1773, "step": 52500 }, { "epoch": 47.53393665158371, "grad_norm": 5.140773773193359, "learning_rate": 9.43736655662386e-05, "loss": 1.3442, "step": 52525 }, { "epoch": 47.55656108597285, "grad_norm": 3.1296918392181396, "learning_rate": 9.431643915197818e-05, "loss": 1.1789, "step": 52550 }, { "epoch": 47.57918552036199, "grad_norm": 5.336180686950684, "learning_rate": 9.425920069020947e-05, "loss": 1.185, "step": 52575 }, { "epoch": 47.60180995475113, "grad_norm": 5.8437113761901855, "learning_rate": 9.420195021663156e-05, "loss": 1.2764, "step": 52600 }, { "epoch": 47.62443438914027, "grad_norm": 4.632587909698486, "learning_rate": 9.414468776695116e-05, "loss": 1.3965, "step": 52625 }, { "epoch": 47.64705882352941, "grad_norm": 4.85280179977417, "learning_rate": 9.408741337688238e-05, "loss": 1.1969, "step": 52650 }, { "epoch": 47.66968325791855, "grad_norm": 5.9409499168396, "learning_rate": 9.40301270821468e-05, "loss": 1.1177, "step": 52675 }, { "epoch": 47.69230769230769, "grad_norm": 5.9572978019714355, "learning_rate": 9.397282891847343e-05, "loss": 1.3279, "step": 52700 }, { "epoch": 47.71493212669683, "grad_norm": 4.965830326080322, "learning_rate": 9.391551892159867e-05, "loss": 1.3477, "step": 52725 }, { "epoch": 47.737556561085974, "grad_norm": 5.128588676452637, "learning_rate": 9.385819712726629e-05, "loss": 1.3403, "step": 52750 }, { "epoch": 47.76018099547511, "grad_norm": 4.773208141326904, "learning_rate": 9.380086357122747e-05, "loss": 1.283, "step": 52775 }, { "epoch": 47.782805429864254, "grad_norm": 5.443982124328613, "learning_rate": 9.374351828924065e-05, "loss": 1.1588, "step": 52800 }, { "epoch": 47.80542986425339, "grad_norm": 5.004065036773682, "learning_rate": 9.368616131707165e-05, "loss": 1.3078, "step": 52825 }, { "epoch": 47.828054298642535, "grad_norm": 4.608489990234375, "learning_rate": 9.362879269049356e-05, "loss": 1.2215, "step": 52850 }, { "epoch": 47.85067873303167, "grad_norm": 3.259402275085449, "learning_rate": 9.357141244528671e-05, "loss": 1.2802, "step": 52875 }, { "epoch": 47.873303167420815, "grad_norm": 5.098649978637695, "learning_rate": 9.35140206172387e-05, "loss": 1.2212, "step": 52900 }, { "epoch": 47.89592760180995, "grad_norm": 5.7414751052856445, "learning_rate": 9.345661724214437e-05, "loss": 1.2021, "step": 52925 }, { "epoch": 47.918552036199095, "grad_norm": 4.869835376739502, "learning_rate": 9.339920235580574e-05, "loss": 1.2323, "step": 52950 }, { "epoch": 47.94117647058823, "grad_norm": 4.785629749298096, "learning_rate": 9.3341775994032e-05, "loss": 1.2435, "step": 52975 }, { "epoch": 47.963800904977376, "grad_norm": 5.641818046569824, "learning_rate": 9.328433819263953e-05, "loss": 1.1873, "step": 53000 }, { "epoch": 47.98642533936652, "grad_norm": 5.285050392150879, "learning_rate": 9.322688898745181e-05, "loss": 1.3702, "step": 53025 }, { "epoch": 48.009049773755656, "grad_norm": 5.142643451690674, "learning_rate": 9.316942841429947e-05, "loss": 1.3408, "step": 53050 }, { "epoch": 48.0316742081448, "grad_norm": 5.038753986358643, "learning_rate": 9.31119565090202e-05, "loss": 1.1248, "step": 53075 }, { "epoch": 48.05429864253394, "grad_norm": 3.7307887077331543, "learning_rate": 9.305447330745876e-05, "loss": 1.2337, "step": 53100 }, { "epoch": 48.07692307692308, "grad_norm": 4.286962985992432, "learning_rate": 9.299697884546696e-05, "loss": 1.1177, "step": 53125 }, { "epoch": 48.09954751131222, "grad_norm": 4.528583526611328, "learning_rate": 9.293947315890367e-05, "loss": 1.1977, "step": 53150 }, { "epoch": 48.12217194570136, "grad_norm": 6.151467800140381, "learning_rate": 9.288195628363467e-05, "loss": 1.1828, "step": 53175 }, { "epoch": 48.1447963800905, "grad_norm": 4.997425556182861, "learning_rate": 9.282442825553279e-05, "loss": 1.1186, "step": 53200 }, { "epoch": 48.16742081447964, "grad_norm": 5.374769687652588, "learning_rate": 9.276688911047785e-05, "loss": 1.1204, "step": 53225 }, { "epoch": 48.19004524886878, "grad_norm": 4.511106014251709, "learning_rate": 9.27093388843565e-05, "loss": 1.1014, "step": 53250 }, { "epoch": 48.21266968325792, "grad_norm": 4.060224533081055, "learning_rate": 9.265177761306237e-05, "loss": 1.1864, "step": 53275 }, { "epoch": 48.23529411764706, "grad_norm": 5.773272514343262, "learning_rate": 9.259420533249596e-05, "loss": 1.0438, "step": 53300 }, { "epoch": 48.2579185520362, "grad_norm": 5.258281707763672, "learning_rate": 9.253662207856466e-05, "loss": 1.1108, "step": 53325 }, { "epoch": 48.28054298642534, "grad_norm": 5.085644721984863, "learning_rate": 9.247902788718266e-05, "loss": 1.2611, "step": 53350 }, { "epoch": 48.30316742081448, "grad_norm": 4.6476240158081055, "learning_rate": 9.2421422794271e-05, "loss": 1.1513, "step": 53375 }, { "epoch": 48.32579185520362, "grad_norm": 4.446463584899902, "learning_rate": 9.236380683575753e-05, "loss": 1.1763, "step": 53400 }, { "epoch": 48.34841628959276, "grad_norm": 4.901967525482178, "learning_rate": 9.230618004757686e-05, "loss": 1.2309, "step": 53425 }, { "epoch": 48.3710407239819, "grad_norm": 4.112976551055908, "learning_rate": 9.224854246567034e-05, "loss": 1.0473, "step": 53450 }, { "epoch": 48.39366515837104, "grad_norm": 3.424607038497925, "learning_rate": 9.219089412598608e-05, "loss": 1.1428, "step": 53475 }, { "epoch": 48.41628959276018, "grad_norm": 3.574810028076172, "learning_rate": 9.213323506447888e-05, "loss": 1.2392, "step": 53500 }, { "epoch": 48.43891402714932, "grad_norm": 6.763994216918945, "learning_rate": 9.207556531711024e-05, "loss": 1.1168, "step": 53525 }, { "epoch": 48.46153846153846, "grad_norm": 3.5914273262023926, "learning_rate": 9.201788491984829e-05, "loss": 1.2307, "step": 53550 }, { "epoch": 48.484162895927604, "grad_norm": 4.489185333251953, "learning_rate": 9.19601939086679e-05, "loss": 1.2362, "step": 53575 }, { "epoch": 48.50678733031674, "grad_norm": 4.0316033363342285, "learning_rate": 9.190249231955043e-05, "loss": 1.2428, "step": 53600 }, { "epoch": 48.529411764705884, "grad_norm": 3.3174991607666016, "learning_rate": 9.184478018848392e-05, "loss": 0.9921, "step": 53625 }, { "epoch": 48.55203619909502, "grad_norm": 5.367835521697998, "learning_rate": 9.178705755146298e-05, "loss": 1.1551, "step": 53650 }, { "epoch": 48.574660633484164, "grad_norm": 5.262642860412598, "learning_rate": 9.172932444448872e-05, "loss": 1.0222, "step": 53675 }, { "epoch": 48.5972850678733, "grad_norm": 4.616153240203857, "learning_rate": 9.167158090356884e-05, "loss": 1.173, "step": 53700 }, { "epoch": 48.619909502262445, "grad_norm": 4.295658111572266, "learning_rate": 9.161382696471753e-05, "loss": 1.0673, "step": 53725 }, { "epoch": 48.64253393665158, "grad_norm": 4.903369426727295, "learning_rate": 9.155606266395545e-05, "loss": 1.2454, "step": 53750 }, { "epoch": 48.665158371040725, "grad_norm": 3.6059064865112305, "learning_rate": 9.149828803730971e-05, "loss": 1.1858, "step": 53775 }, { "epoch": 48.68778280542986, "grad_norm": 5.3199286460876465, "learning_rate": 9.144050312081392e-05, "loss": 1.1035, "step": 53800 }, { "epoch": 48.710407239819006, "grad_norm": 4.197053909301758, "learning_rate": 9.138270795050804e-05, "loss": 1.2688, "step": 53825 }, { "epoch": 48.73303167420814, "grad_norm": 4.945096492767334, "learning_rate": 9.132490256243849e-05, "loss": 1.0091, "step": 53850 }, { "epoch": 48.755656108597286, "grad_norm": 3.641407012939453, "learning_rate": 9.126708699265797e-05, "loss": 1.2336, "step": 53875 }, { "epoch": 48.77828054298642, "grad_norm": 4.669260025024414, "learning_rate": 9.120926127722563e-05, "loss": 1.1354, "step": 53900 }, { "epoch": 48.800904977375566, "grad_norm": 4.605678081512451, "learning_rate": 9.115142545220692e-05, "loss": 1.2065, "step": 53925 }, { "epoch": 48.8235294117647, "grad_norm": 4.441488742828369, "learning_rate": 9.109357955367354e-05, "loss": 1.2603, "step": 53950 }, { "epoch": 48.84615384615385, "grad_norm": 4.292562961578369, "learning_rate": 9.103572361770353e-05, "loss": 1.2716, "step": 53975 }, { "epoch": 48.86877828054298, "grad_norm": 5.542261123657227, "learning_rate": 9.097785768038118e-05, "loss": 1.192, "step": 54000 }, { "epoch": 48.89140271493213, "grad_norm": 5.503359317779541, "learning_rate": 9.0919981777797e-05, "loss": 1.2559, "step": 54025 }, { "epoch": 48.914027149321264, "grad_norm": 4.744175434112549, "learning_rate": 9.086209594604775e-05, "loss": 1.2943, "step": 54050 }, { "epoch": 48.93665158371041, "grad_norm": 3.3318700790405273, "learning_rate": 9.080420022123631e-05, "loss": 1.1944, "step": 54075 }, { "epoch": 48.959276018099544, "grad_norm": 4.345468521118164, "learning_rate": 9.074629463947185e-05, "loss": 1.1818, "step": 54100 }, { "epoch": 48.98190045248869, "grad_norm": 4.530157566070557, "learning_rate": 9.068837923686955e-05, "loss": 1.4225, "step": 54125 }, { "epoch": 49.00452488687783, "grad_norm": 4.266589164733887, "learning_rate": 9.063045404955082e-05, "loss": 1.2109, "step": 54150 }, { "epoch": 49.02714932126697, "grad_norm": 4.339365482330322, "learning_rate": 9.057251911364314e-05, "loss": 1.009, "step": 54175 }, { "epoch": 49.04977375565611, "grad_norm": 5.674631118774414, "learning_rate": 9.051457446528005e-05, "loss": 1.1196, "step": 54200 }, { "epoch": 49.07239819004525, "grad_norm": 5.143533229827881, "learning_rate": 9.045662014060117e-05, "loss": 1.0855, "step": 54225 }, { "epoch": 49.09502262443439, "grad_norm": 5.3571882247924805, "learning_rate": 9.039865617575213e-05, "loss": 1.1771, "step": 54250 }, { "epoch": 49.11764705882353, "grad_norm": 4.910420894622803, "learning_rate": 9.03406826068846e-05, "loss": 1.0568, "step": 54275 }, { "epoch": 49.14027149321267, "grad_norm": 3.894571304321289, "learning_rate": 9.028269947015625e-05, "loss": 1.0524, "step": 54300 }, { "epoch": 49.16289592760181, "grad_norm": 5.98056173324585, "learning_rate": 9.022702669102259e-05, "loss": 1.0619, "step": 54325 }, { "epoch": 49.18552036199095, "grad_norm": 4.829105854034424, "learning_rate": 9.016902490619592e-05, "loss": 1.1119, "step": 54350 }, { "epoch": 49.20814479638009, "grad_norm": 4.5941619873046875, "learning_rate": 9.011101366056998e-05, "loss": 1.2032, "step": 54375 }, { "epoch": 49.23076923076923, "grad_norm": 5.961572170257568, "learning_rate": 9.005299299032587e-05, "loss": 1.008, "step": 54400 }, { "epoch": 49.25339366515837, "grad_norm": 5.3435187339782715, "learning_rate": 8.999496293165061e-05, "loss": 1.1795, "step": 54425 }, { "epoch": 49.276018099547514, "grad_norm": 3.5825347900390625, "learning_rate": 8.993692352073714e-05, "loss": 1.1525, "step": 54450 }, { "epoch": 49.29864253393665, "grad_norm": 5.072323322296143, "learning_rate": 8.987887479378413e-05, "loss": 1.0846, "step": 54475 }, { "epoch": 49.321266968325794, "grad_norm": 3.932933807373047, "learning_rate": 8.982081678699613e-05, "loss": 1.187, "step": 54500 }, { "epoch": 49.34389140271493, "grad_norm": 4.800660133361816, "learning_rate": 8.976274953658343e-05, "loss": 1.0316, "step": 54525 }, { "epoch": 49.366515837104075, "grad_norm": 4.958492279052734, "learning_rate": 8.970467307876213e-05, "loss": 1.1783, "step": 54550 }, { "epoch": 49.38914027149321, "grad_norm": 5.268551826477051, "learning_rate": 8.964658744975403e-05, "loss": 1.1196, "step": 54575 }, { "epoch": 49.411764705882355, "grad_norm": 5.094686985015869, "learning_rate": 8.958849268578667e-05, "loss": 1.0254, "step": 54600 }, { "epoch": 49.43438914027149, "grad_norm": 6.6234846115112305, "learning_rate": 8.953038882309333e-05, "loss": 1.1582, "step": 54625 }, { "epoch": 49.457013574660635, "grad_norm": 3.9450597763061523, "learning_rate": 8.947227589791287e-05, "loss": 1.1137, "step": 54650 }, { "epoch": 49.47963800904977, "grad_norm": 5.424504280090332, "learning_rate": 8.941415394648991e-05, "loss": 1.042, "step": 54675 }, { "epoch": 49.502262443438916, "grad_norm": 4.318606376647949, "learning_rate": 8.93560230050746e-05, "loss": 1.1823, "step": 54700 }, { "epoch": 49.52488687782805, "grad_norm": 4.582488536834717, "learning_rate": 8.929788310992276e-05, "loss": 1.0477, "step": 54725 }, { "epoch": 49.547511312217196, "grad_norm": 5.677311420440674, "learning_rate": 8.923973429729578e-05, "loss": 1.2954, "step": 54750 }, { "epoch": 49.57013574660633, "grad_norm": 5.549562931060791, "learning_rate": 8.918157660346061e-05, "loss": 1.0914, "step": 54775 }, { "epoch": 49.59276018099548, "grad_norm": 4.94139289855957, "learning_rate": 8.912341006468973e-05, "loss": 1.1153, "step": 54800 }, { "epoch": 49.61538461538461, "grad_norm": 4.835080623626709, "learning_rate": 8.906523471726113e-05, "loss": 1.1043, "step": 54825 }, { "epoch": 49.63800904977376, "grad_norm": 5.070520401000977, "learning_rate": 8.900705059745834e-05, "loss": 1.2282, "step": 54850 }, { "epoch": 49.660633484162894, "grad_norm": 4.417810440063477, "learning_rate": 8.89488577415703e-05, "loss": 1.0648, "step": 54875 }, { "epoch": 49.68325791855204, "grad_norm": 5.827963352203369, "learning_rate": 8.889065618589147e-05, "loss": 1.1563, "step": 54900 }, { "epoch": 49.705882352941174, "grad_norm": 4.687410831451416, "learning_rate": 8.883244596672165e-05, "loss": 1.0463, "step": 54925 }, { "epoch": 49.72850678733032, "grad_norm": 4.386495113372803, "learning_rate": 8.87742271203661e-05, "loss": 1.1286, "step": 54950 }, { "epoch": 49.751131221719454, "grad_norm": 4.633265972137451, "learning_rate": 8.871599968313545e-05, "loss": 1.1123, "step": 54975 }, { "epoch": 49.7737556561086, "grad_norm": 5.492758750915527, "learning_rate": 8.865776369134569e-05, "loss": 1.1681, "step": 55000 }, { "epoch": 49.796380090497735, "grad_norm": 4.6047868728637695, "learning_rate": 8.859951918131815e-05, "loss": 1.0757, "step": 55025 }, { "epoch": 49.81900452488688, "grad_norm": 4.503223419189453, "learning_rate": 8.854126618937945e-05, "loss": 1.2659, "step": 55050 }, { "epoch": 49.841628959276015, "grad_norm": 5.064691543579102, "learning_rate": 8.84830047518615e-05, "loss": 1.1565, "step": 55075 }, { "epoch": 49.86425339366516, "grad_norm": 4.539597511291504, "learning_rate": 8.842473490510153e-05, "loss": 1.1721, "step": 55100 }, { "epoch": 49.886877828054295, "grad_norm": 5.208485126495361, "learning_rate": 8.836645668544193e-05, "loss": 1.0792, "step": 55125 }, { "epoch": 49.90950226244344, "grad_norm": 4.17509126663208, "learning_rate": 8.830817012923041e-05, "loss": 1.1144, "step": 55150 }, { "epoch": 49.932126696832576, "grad_norm": 4.037477970123291, "learning_rate": 8.82498752728198e-05, "loss": 1.0173, "step": 55175 }, { "epoch": 49.95475113122172, "grad_norm": 4.351037979125977, "learning_rate": 8.819157215256813e-05, "loss": 0.9713, "step": 55200 }, { "epoch": 49.977375565610856, "grad_norm": 5.295916557312012, "learning_rate": 8.813326080483859e-05, "loss": 1.1529, "step": 55225 }, { "epoch": 50.0, "grad_norm": 4.7759270668029785, "learning_rate": 8.807494126599952e-05, "loss": 1.1772, "step": 55250 }, { "epoch": 50.022624434389144, "grad_norm": 4.5403971672058105, "learning_rate": 8.801661357242433e-05, "loss": 1.0469, "step": 55275 }, { "epoch": 50.04524886877828, "grad_norm": 5.13372802734375, "learning_rate": 8.795827776049156e-05, "loss": 1.0775, "step": 55300 }, { "epoch": 50.067873303167424, "grad_norm": 4.247966289520264, "learning_rate": 8.789993386658474e-05, "loss": 1.0043, "step": 55325 }, { "epoch": 50.09049773755656, "grad_norm": 5.053829193115234, "learning_rate": 8.784158192709253e-05, "loss": 1.0262, "step": 55350 }, { "epoch": 50.113122171945705, "grad_norm": 6.302519798278809, "learning_rate": 8.778322197840855e-05, "loss": 0.9002, "step": 55375 }, { "epoch": 50.13574660633484, "grad_norm": 5.568416595458984, "learning_rate": 8.772485405693146e-05, "loss": 1.0576, "step": 55400 }, { "epoch": 50.158371040723985, "grad_norm": 5.653662204742432, "learning_rate": 8.766647819906483e-05, "loss": 1.0275, "step": 55425 }, { "epoch": 50.18099547511312, "grad_norm": 5.099042892456055, "learning_rate": 8.760809444121722e-05, "loss": 1.1494, "step": 55450 }, { "epoch": 50.203619909502265, "grad_norm": 3.052720308303833, "learning_rate": 8.754970281980214e-05, "loss": 1.0364, "step": 55475 }, { "epoch": 50.2262443438914, "grad_norm": 5.043280601501465, "learning_rate": 8.749130337123795e-05, "loss": 1.0985, "step": 55500 }, { "epoch": 50.248868778280546, "grad_norm": 5.506175994873047, "learning_rate": 8.743289613194792e-05, "loss": 0.9248, "step": 55525 }, { "epoch": 50.27149321266968, "grad_norm": 5.041479587554932, "learning_rate": 8.737448113836019e-05, "loss": 1.1453, "step": 55550 }, { "epoch": 50.294117647058826, "grad_norm": 4.894893169403076, "learning_rate": 8.731605842690771e-05, "loss": 1.1438, "step": 55575 }, { "epoch": 50.31674208144796, "grad_norm": 5.285278797149658, "learning_rate": 8.725762803402827e-05, "loss": 0.9856, "step": 55600 }, { "epoch": 50.339366515837106, "grad_norm": 5.30338191986084, "learning_rate": 8.719918999616442e-05, "loss": 1.1053, "step": 55625 }, { "epoch": 50.36199095022624, "grad_norm": 4.593639373779297, "learning_rate": 8.714074434976352e-05, "loss": 1.0672, "step": 55650 }, { "epoch": 50.38461538461539, "grad_norm": 4.997950553894043, "learning_rate": 8.70822911312776e-05, "loss": 1.0073, "step": 55675 }, { "epoch": 50.40723981900452, "grad_norm": 4.9740824699401855, "learning_rate": 8.702383037716355e-05, "loss": 1.0856, "step": 55700 }, { "epoch": 50.42986425339367, "grad_norm": 4.203141689300537, "learning_rate": 8.69653621238828e-05, "loss": 1.2288, "step": 55725 }, { "epoch": 50.452488687782804, "grad_norm": 4.776144504547119, "learning_rate": 8.690688640790157e-05, "loss": 1.0695, "step": 55750 }, { "epoch": 50.47511312217195, "grad_norm": 5.120831489562988, "learning_rate": 8.684840326569068e-05, "loss": 1.1256, "step": 55775 }, { "epoch": 50.497737556561084, "grad_norm": 4.816657543182373, "learning_rate": 8.678991273372561e-05, "loss": 1.1635, "step": 55800 }, { "epoch": 50.52036199095023, "grad_norm": 4.2659196853637695, "learning_rate": 8.673141484848641e-05, "loss": 1.0529, "step": 55825 }, { "epoch": 50.542986425339365, "grad_norm": 4.1815972328186035, "learning_rate": 8.667290964645777e-05, "loss": 0.9332, "step": 55850 }, { "epoch": 50.56561085972851, "grad_norm": 4.719061851501465, "learning_rate": 8.661439716412889e-05, "loss": 1.0962, "step": 55875 }, { "epoch": 50.588235294117645, "grad_norm": 4.619856357574463, "learning_rate": 8.655587743799356e-05, "loss": 1.0358, "step": 55900 }, { "epoch": 50.61085972850679, "grad_norm": 4.741133213043213, "learning_rate": 8.649735050455006e-05, "loss": 1.2345, "step": 55925 }, { "epoch": 50.633484162895925, "grad_norm": 4.988595962524414, "learning_rate": 8.643881640030116e-05, "loss": 1.073, "step": 55950 }, { "epoch": 50.65610859728507, "grad_norm": 4.526401996612549, "learning_rate": 8.638027516175412e-05, "loss": 1.1112, "step": 55975 }, { "epoch": 50.678733031674206, "grad_norm": 6.290268421173096, "learning_rate": 8.632172682542064e-05, "loss": 1.0573, "step": 56000 }, { "epoch": 50.70135746606335, "grad_norm": 5.91135311126709, "learning_rate": 8.626317142781684e-05, "loss": 0.9932, "step": 56025 }, { "epoch": 50.723981900452486, "grad_norm": 5.749835968017578, "learning_rate": 8.620460900546326e-05, "loss": 1.0493, "step": 56050 }, { "epoch": 50.74660633484163, "grad_norm": 5.972226619720459, "learning_rate": 8.614603959488482e-05, "loss": 1.1073, "step": 56075 }, { "epoch": 50.76923076923077, "grad_norm": 3.0617969036102295, "learning_rate": 8.608746323261079e-05, "loss": 0.9442, "step": 56100 }, { "epoch": 50.79185520361991, "grad_norm": 3.8403468132019043, "learning_rate": 8.602887995517476e-05, "loss": 1.032, "step": 56125 }, { "epoch": 50.81447963800905, "grad_norm": 4.818698883056641, "learning_rate": 8.597028979911466e-05, "loss": 1.0355, "step": 56150 }, { "epoch": 50.83710407239819, "grad_norm": 4.8740153312683105, "learning_rate": 8.59116928009727e-05, "loss": 0.9741, "step": 56175 }, { "epoch": 50.85972850678733, "grad_norm": 4.436838150024414, "learning_rate": 8.585308899729538e-05, "loss": 0.9861, "step": 56200 }, { "epoch": 50.88235294117647, "grad_norm": 5.0014472007751465, "learning_rate": 8.579447842463339e-05, "loss": 0.9933, "step": 56225 }, { "epoch": 50.90497737556561, "grad_norm": 5.533863067626953, "learning_rate": 8.57358611195417e-05, "loss": 1.0641, "step": 56250 }, { "epoch": 50.92760180995475, "grad_norm": 4.220861434936523, "learning_rate": 8.567723711857944e-05, "loss": 1.0358, "step": 56275 }, { "epoch": 50.95022624434389, "grad_norm": 4.768381595611572, "learning_rate": 8.561860645830993e-05, "loss": 1.115, "step": 56300 }, { "epoch": 50.97285067873303, "grad_norm": 4.91024112701416, "learning_rate": 8.555996917530065e-05, "loss": 1.0712, "step": 56325 }, { "epoch": 50.99547511312217, "grad_norm": 3.9711110591888428, "learning_rate": 8.550132530612319e-05, "loss": 1.1407, "step": 56350 }, { "epoch": 51.01809954751131, "grad_norm": 4.726860046386719, "learning_rate": 8.544267488735329e-05, "loss": 1.0346, "step": 56375 }, { "epoch": 51.040723981900456, "grad_norm": 4.389982223510742, "learning_rate": 8.53840179555707e-05, "loss": 1.088, "step": 56400 }, { "epoch": 51.06334841628959, "grad_norm": 4.467222213745117, "learning_rate": 8.532535454735934e-05, "loss": 1.0083, "step": 56425 }, { "epoch": 51.085972850678736, "grad_norm": 5.905579090118408, "learning_rate": 8.526668469930705e-05, "loss": 0.9034, "step": 56450 }, { "epoch": 51.10859728506787, "grad_norm": 5.432013511657715, "learning_rate": 8.520800844800578e-05, "loss": 0.9099, "step": 56475 }, { "epoch": 51.13122171945702, "grad_norm": 5.049311637878418, "learning_rate": 8.515167325655024e-05, "loss": 0.93, "step": 56500 }, { "epoch": 51.15384615384615, "grad_norm": 4.538837432861328, "learning_rate": 8.509298456104203e-05, "loss": 0.8998, "step": 56525 }, { "epoch": 51.1764705882353, "grad_norm": 5.023305416107178, "learning_rate": 8.503428957062021e-05, "loss": 0.9987, "step": 56550 }, { "epoch": 51.199095022624434, "grad_norm": 3.4950027465820312, "learning_rate": 8.49755883218924e-05, "loss": 0.8631, "step": 56575 }, { "epoch": 51.22171945701358, "grad_norm": 4.973414897918701, "learning_rate": 8.491688085147005e-05, "loss": 1.0109, "step": 56600 }, { "epoch": 51.244343891402714, "grad_norm": 5.232577323913574, "learning_rate": 8.485816719596856e-05, "loss": 1.0334, "step": 56625 }, { "epoch": 51.26696832579186, "grad_norm": 4.882142066955566, "learning_rate": 8.47994473920072e-05, "loss": 0.9905, "step": 56650 }, { "epoch": 51.289592760180994, "grad_norm": 6.884396553039551, "learning_rate": 8.4740721476209e-05, "loss": 0.9886, "step": 56675 }, { "epoch": 51.31221719457014, "grad_norm": 4.622968673706055, "learning_rate": 8.468198948520084e-05, "loss": 1.0852, "step": 56700 }, { "epoch": 51.334841628959275, "grad_norm": 4.605698585510254, "learning_rate": 8.462325145561343e-05, "loss": 1.0237, "step": 56725 }, { "epoch": 51.35746606334842, "grad_norm": 5.381290912628174, "learning_rate": 8.456450742408119e-05, "loss": 1.0082, "step": 56750 }, { "epoch": 51.380090497737555, "grad_norm": 4.742079734802246, "learning_rate": 8.450575742724228e-05, "loss": 1.0455, "step": 56775 }, { "epoch": 51.4027149321267, "grad_norm": 5.130732536315918, "learning_rate": 8.444700150173863e-05, "loss": 0.9221, "step": 56800 }, { "epoch": 51.425339366515836, "grad_norm": 3.486804962158203, "learning_rate": 8.438823968421584e-05, "loss": 0.9971, "step": 56825 }, { "epoch": 51.44796380090498, "grad_norm": 4.974049091339111, "learning_rate": 8.432947201132317e-05, "loss": 1.0028, "step": 56850 }, { "epoch": 51.470588235294116, "grad_norm": 5.254629611968994, "learning_rate": 8.427069851971354e-05, "loss": 1.1134, "step": 56875 }, { "epoch": 51.49321266968326, "grad_norm": 5.264469623565674, "learning_rate": 8.421191924604354e-05, "loss": 1.027, "step": 56900 }, { "epoch": 51.515837104072396, "grad_norm": 4.641003131866455, "learning_rate": 8.415313422697329e-05, "loss": 1.0775, "step": 56925 }, { "epoch": 51.53846153846154, "grad_norm": 5.969740867614746, "learning_rate": 8.409434349916655e-05, "loss": 0.976, "step": 56950 }, { "epoch": 51.56108597285068, "grad_norm": 4.478930950164795, "learning_rate": 8.403554709929067e-05, "loss": 0.8974, "step": 56975 }, { "epoch": 51.58371040723982, "grad_norm": 5.73991584777832, "learning_rate": 8.397674506401642e-05, "loss": 0.9987, "step": 57000 }, { "epoch": 51.60633484162896, "grad_norm": 5.22064208984375, "learning_rate": 8.39179374300182e-05, "loss": 0.9793, "step": 57025 }, { "epoch": 51.6289592760181, "grad_norm": 4.16978120803833, "learning_rate": 8.385912423397387e-05, "loss": 0.965, "step": 57050 }, { "epoch": 51.65158371040724, "grad_norm": 3.6818130016326904, "learning_rate": 8.38003055125647e-05, "loss": 0.9747, "step": 57075 }, { "epoch": 51.67420814479638, "grad_norm": 4.096356391906738, "learning_rate": 8.37414813024755e-05, "loss": 0.8952, "step": 57100 }, { "epoch": 51.69683257918552, "grad_norm": 3.7166366577148438, "learning_rate": 8.368265164039447e-05, "loss": 1.0305, "step": 57125 }, { "epoch": 51.71945701357466, "grad_norm": 4.9898176193237305, "learning_rate": 8.362381656301315e-05, "loss": 0.9782, "step": 57150 }, { "epoch": 51.7420814479638, "grad_norm": 3.6234934329986572, "learning_rate": 8.356497610702655e-05, "loss": 1.0226, "step": 57175 }, { "epoch": 51.76470588235294, "grad_norm": 4.622697830200195, "learning_rate": 8.350613030913296e-05, "loss": 1.1135, "step": 57200 }, { "epoch": 51.78733031674208, "grad_norm": 4.255482196807861, "learning_rate": 8.344727920603407e-05, "loss": 0.9715, "step": 57225 }, { "epoch": 51.80995475113122, "grad_norm": 4.796724796295166, "learning_rate": 8.338842283443479e-05, "loss": 1.0713, "step": 57250 }, { "epoch": 51.83257918552036, "grad_norm": 3.0090603828430176, "learning_rate": 8.332956123104341e-05, "loss": 1.0199, "step": 57275 }, { "epoch": 51.8552036199095, "grad_norm": 3.2074475288391113, "learning_rate": 8.327069443257142e-05, "loss": 0.9717, "step": 57300 }, { "epoch": 51.87782805429864, "grad_norm": 4.8500776290893555, "learning_rate": 8.321182247573357e-05, "loss": 1.0186, "step": 57325 }, { "epoch": 51.90045248868778, "grad_norm": 5.010964393615723, "learning_rate": 8.315294539724782e-05, "loss": 0.988, "step": 57350 }, { "epoch": 51.92307692307692, "grad_norm": 4.531972885131836, "learning_rate": 8.309406323383538e-05, "loss": 0.9834, "step": 57375 }, { "epoch": 51.94570135746606, "grad_norm": 5.096385955810547, "learning_rate": 8.303517602222053e-05, "loss": 1.2033, "step": 57400 }, { "epoch": 51.9683257918552, "grad_norm": 4.396825790405273, "learning_rate": 8.297628379913079e-05, "loss": 1.0491, "step": 57425 }, { "epoch": 51.990950226244344, "grad_norm": 4.0146803855896, "learning_rate": 8.29173866012967e-05, "loss": 1.1816, "step": 57450 }, { "epoch": 52.01357466063349, "grad_norm": 4.34961462020874, "learning_rate": 8.285848446545207e-05, "loss": 0.9503, "step": 57475 }, { "epoch": 52.036199095022624, "grad_norm": 5.64193058013916, "learning_rate": 8.279957742833363e-05, "loss": 1.0821, "step": 57500 }, { "epoch": 52.05882352941177, "grad_norm": 4.364360809326172, "learning_rate": 8.274066552668122e-05, "loss": 0.9489, "step": 57525 }, { "epoch": 52.081447963800905, "grad_norm": 5.169826507568359, "learning_rate": 8.268174879723775e-05, "loss": 1.0077, "step": 57550 }, { "epoch": 52.10407239819005, "grad_norm": 5.747620582580566, "learning_rate": 8.262282727674908e-05, "loss": 0.8733, "step": 57575 }, { "epoch": 52.126696832579185, "grad_norm": 3.697861909866333, "learning_rate": 8.256390100196413e-05, "loss": 0.9009, "step": 57600 }, { "epoch": 52.14932126696833, "grad_norm": 5.627383708953857, "learning_rate": 8.250497000963473e-05, "loss": 0.8749, "step": 57625 }, { "epoch": 52.171945701357465, "grad_norm": 5.055898666381836, "learning_rate": 8.244603433651566e-05, "loss": 0.9451, "step": 57650 }, { "epoch": 52.19457013574661, "grad_norm": 5.240993976593018, "learning_rate": 8.238709401936462e-05, "loss": 0.8263, "step": 57675 }, { "epoch": 52.217194570135746, "grad_norm": 4.983405113220215, "learning_rate": 8.232814909494223e-05, "loss": 1.029, "step": 57700 }, { "epoch": 52.23981900452489, "grad_norm": 4.142995834350586, "learning_rate": 8.226919960001196e-05, "loss": 0.9562, "step": 57725 }, { "epoch": 52.262443438914026, "grad_norm": 4.967777729034424, "learning_rate": 8.221024557134015e-05, "loss": 0.8929, "step": 57750 }, { "epoch": 52.28506787330317, "grad_norm": 4.299960613250732, "learning_rate": 8.215128704569592e-05, "loss": 0.8692, "step": 57775 }, { "epoch": 52.30769230769231, "grad_norm": 3.7758352756500244, "learning_rate": 8.209232405985127e-05, "loss": 0.8449, "step": 57800 }, { "epoch": 52.33031674208145, "grad_norm": 5.353555202484131, "learning_rate": 8.203335665058093e-05, "loss": 1.0161, "step": 57825 }, { "epoch": 52.35294117647059, "grad_norm": 4.520666599273682, "learning_rate": 8.197438485466239e-05, "loss": 1.0039, "step": 57850 }, { "epoch": 52.37556561085973, "grad_norm": 4.586175441741943, "learning_rate": 8.191540870887588e-05, "loss": 0.7861, "step": 57875 }, { "epoch": 52.39819004524887, "grad_norm": 4.806995868682861, "learning_rate": 8.185642825000438e-05, "loss": 0.9014, "step": 57900 }, { "epoch": 52.42081447963801, "grad_norm": 5.556037902832031, "learning_rate": 8.179744351483352e-05, "loss": 1.0142, "step": 57925 }, { "epoch": 52.44343891402715, "grad_norm": 5.44512414932251, "learning_rate": 8.17384545401516e-05, "loss": 1.0591, "step": 57950 }, { "epoch": 52.46606334841629, "grad_norm": 5.103080749511719, "learning_rate": 8.167946136274956e-05, "loss": 0.9555, "step": 57975 }, { "epoch": 52.48868778280543, "grad_norm": 5.634963512420654, "learning_rate": 8.162046401942097e-05, "loss": 0.9449, "step": 58000 }, { "epoch": 52.51131221719457, "grad_norm": 5.9640793800354, "learning_rate": 8.156146254696202e-05, "loss": 1.0044, "step": 58025 }, { "epoch": 52.53393665158371, "grad_norm": 4.814497470855713, "learning_rate": 8.150245698217146e-05, "loss": 0.8738, "step": 58050 }, { "epoch": 52.55656108597285, "grad_norm": 3.970576047897339, "learning_rate": 8.144344736185057e-05, "loss": 0.862, "step": 58075 }, { "epoch": 52.57918552036199, "grad_norm": 5.5982489585876465, "learning_rate": 8.138443372280319e-05, "loss": 1.0608, "step": 58100 }, { "epoch": 52.60180995475113, "grad_norm": 6.342493057250977, "learning_rate": 8.132541610183564e-05, "loss": 1.0313, "step": 58125 }, { "epoch": 52.62443438914027, "grad_norm": 3.9694483280181885, "learning_rate": 8.126639453575674e-05, "loss": 0.8397, "step": 58150 }, { "epoch": 52.64705882352941, "grad_norm": 4.348864555358887, "learning_rate": 8.120736906137778e-05, "loss": 0.988, "step": 58175 }, { "epoch": 52.66968325791855, "grad_norm": 3.7852602005004883, "learning_rate": 8.114833971551248e-05, "loss": 0.9123, "step": 58200 }, { "epoch": 52.69230769230769, "grad_norm": 4.544547080993652, "learning_rate": 8.108930653497694e-05, "loss": 1.0721, "step": 58225 }, { "epoch": 52.71493212669683, "grad_norm": 4.0904693603515625, "learning_rate": 8.103026955658971e-05, "loss": 0.8775, "step": 58250 }, { "epoch": 52.737556561085974, "grad_norm": 4.872154235839844, "learning_rate": 8.097122881717167e-05, "loss": 0.9625, "step": 58275 }, { "epoch": 52.76018099547511, "grad_norm": 4.9419941902160645, "learning_rate": 8.091218435354605e-05, "loss": 0.9202, "step": 58300 }, { "epoch": 52.782805429864254, "grad_norm": 4.382291316986084, "learning_rate": 8.085313620253843e-05, "loss": 0.9289, "step": 58325 }, { "epoch": 52.80542986425339, "grad_norm": 5.273210048675537, "learning_rate": 8.079408440097666e-05, "loss": 0.9524, "step": 58350 }, { "epoch": 52.828054298642535, "grad_norm": 4.947889804840088, "learning_rate": 8.073502898569082e-05, "loss": 0.9681, "step": 58375 }, { "epoch": 52.85067873303167, "grad_norm": 2.935694932937622, "learning_rate": 8.067596999351339e-05, "loss": 0.9267, "step": 58400 }, { "epoch": 52.873303167420815, "grad_norm": 4.31731653213501, "learning_rate": 8.061690746127895e-05, "loss": 1.0439, "step": 58425 }, { "epoch": 52.89592760180995, "grad_norm": 4.913259983062744, "learning_rate": 8.055784142582433e-05, "loss": 1.0238, "step": 58450 }, { "epoch": 52.918552036199095, "grad_norm": 4.733499050140381, "learning_rate": 8.049877192398854e-05, "loss": 0.9618, "step": 58475 }, { "epoch": 52.94117647058823, "grad_norm": 4.514467716217041, "learning_rate": 8.043969899261277e-05, "loss": 0.9964, "step": 58500 }, { "epoch": 52.963800904977376, "grad_norm": 5.010599136352539, "learning_rate": 8.038062266854029e-05, "loss": 0.9073, "step": 58525 }, { "epoch": 52.98642533936652, "grad_norm": 4.960625648498535, "learning_rate": 8.032154298861657e-05, "loss": 1.0038, "step": 58550 }, { "epoch": 53.009049773755656, "grad_norm": 4.952969074249268, "learning_rate": 8.026245998968913e-05, "loss": 0.8599, "step": 58575 }, { "epoch": 53.0316742081448, "grad_norm": 4.405599594116211, "learning_rate": 8.020337370860755e-05, "loss": 0.8009, "step": 58600 }, { "epoch": 53.05429864253394, "grad_norm": 4.26874303817749, "learning_rate": 8.014428418222347e-05, "loss": 0.9077, "step": 58625 }, { "epoch": 53.07692307692308, "grad_norm": 6.236453056335449, "learning_rate": 8.008519144739058e-05, "loss": 0.8767, "step": 58650 }, { "epoch": 53.09954751131222, "grad_norm": 4.521067142486572, "learning_rate": 8.002609554096451e-05, "loss": 0.9162, "step": 58675 }, { "epoch": 53.12217194570136, "grad_norm": 3.8446598052978516, "learning_rate": 7.996699649980292e-05, "loss": 0.8431, "step": 58700 }, { "epoch": 53.1447963800905, "grad_norm": 5.036252498626709, "learning_rate": 7.990789436076541e-05, "loss": 0.8377, "step": 58725 }, { "epoch": 53.16742081447964, "grad_norm": 4.960869789123535, "learning_rate": 7.984878916071353e-05, "loss": 0.937, "step": 58750 }, { "epoch": 53.19004524886878, "grad_norm": 5.040843963623047, "learning_rate": 7.978968093651067e-05, "loss": 0.8643, "step": 58775 }, { "epoch": 53.21266968325792, "grad_norm": 5.60190486907959, "learning_rate": 7.97305697250222e-05, "loss": 1.087, "step": 58800 }, { "epoch": 53.23529411764706, "grad_norm": 4.930598735809326, "learning_rate": 7.967145556311528e-05, "loss": 1.0033, "step": 58825 }, { "epoch": 53.2579185520362, "grad_norm": 3.259086847305298, "learning_rate": 7.961233848765895e-05, "loss": 0.8735, "step": 58850 }, { "epoch": 53.28054298642534, "grad_norm": 4.807427406311035, "learning_rate": 7.955321853552407e-05, "loss": 0.9848, "step": 58875 }, { "epoch": 53.30316742081448, "grad_norm": 4.867278099060059, "learning_rate": 7.949409574358326e-05, "loss": 0.8761, "step": 58900 }, { "epoch": 53.32579185520362, "grad_norm": 5.033575534820557, "learning_rate": 7.943497014871094e-05, "loss": 0.7422, "step": 58925 }, { "epoch": 53.34841628959276, "grad_norm": 4.1396684646606445, "learning_rate": 7.93758417877833e-05, "loss": 0.9512, "step": 58950 }, { "epoch": 53.3710407239819, "grad_norm": 4.426119327545166, "learning_rate": 7.931671069767817e-05, "loss": 0.8334, "step": 58975 }, { "epoch": 53.39366515837104, "grad_norm": 5.277045726776123, "learning_rate": 7.925757691527516e-05, "loss": 0.8212, "step": 59000 }, { "epoch": 53.41628959276018, "grad_norm": 5.944455146789551, "learning_rate": 7.919844047745553e-05, "loss": 0.8919, "step": 59025 }, { "epoch": 53.43891402714932, "grad_norm": 5.586777687072754, "learning_rate": 7.913930142110222e-05, "loss": 0.8989, "step": 59050 }, { "epoch": 53.46153846153846, "grad_norm": 5.0498552322387695, "learning_rate": 7.908015978309979e-05, "loss": 0.8728, "step": 59075 }, { "epoch": 53.484162895927604, "grad_norm": 4.9062910079956055, "learning_rate": 7.902101560033438e-05, "loss": 0.8128, "step": 59100 }, { "epoch": 53.50678733031674, "grad_norm": 4.3646159172058105, "learning_rate": 7.896186890969375e-05, "loss": 0.8927, "step": 59125 }, { "epoch": 53.529411764705884, "grad_norm": 4.290850639343262, "learning_rate": 7.890271974806724e-05, "loss": 0.9639, "step": 59150 }, { "epoch": 53.55203619909502, "grad_norm": 2.934475898742676, "learning_rate": 7.884356815234569e-05, "loss": 0.8293, "step": 59175 }, { "epoch": 53.574660633484164, "grad_norm": 4.962638854980469, "learning_rate": 7.878441415942146e-05, "loss": 0.8695, "step": 59200 }, { "epoch": 53.5972850678733, "grad_norm": 5.541092395782471, "learning_rate": 7.872525780618844e-05, "loss": 0.9574, "step": 59225 }, { "epoch": 53.619909502262445, "grad_norm": 5.185718059539795, "learning_rate": 7.866609912954199e-05, "loss": 1.018, "step": 59250 }, { "epoch": 53.64253393665158, "grad_norm": 4.9260663986206055, "learning_rate": 7.860930464834365e-05, "loss": 1.0888, "step": 59275 }, { "epoch": 53.665158371040725, "grad_norm": 5.476282596588135, "learning_rate": 7.855014152483838e-05, "loss": 0.8982, "step": 59300 }, { "epoch": 53.68778280542986, "grad_norm": 6.069909572601318, "learning_rate": 7.849097618713829e-05, "loss": 0.8551, "step": 59325 }, { "epoch": 53.710407239819006, "grad_norm": 4.913949966430664, "learning_rate": 7.84318086721443e-05, "loss": 0.9009, "step": 59350 }, { "epoch": 53.73303167420814, "grad_norm": 4.0589399337768555, "learning_rate": 7.837263901675874e-05, "loss": 0.8428, "step": 59375 }, { "epoch": 53.755656108597286, "grad_norm": 4.331938743591309, "learning_rate": 7.831346725788526e-05, "loss": 0.9352, "step": 59400 }, { "epoch": 53.77828054298642, "grad_norm": 4.615716457366943, "learning_rate": 7.825429343242879e-05, "loss": 0.9279, "step": 59425 }, { "epoch": 53.800904977375566, "grad_norm": 4.417220592498779, "learning_rate": 7.819511757729558e-05, "loss": 0.8169, "step": 59450 }, { "epoch": 53.8235294117647, "grad_norm": 3.6304221153259277, "learning_rate": 7.813593972939313e-05, "loss": 0.9274, "step": 59475 }, { "epoch": 53.84615384615385, "grad_norm": 3.4916086196899414, "learning_rate": 7.80767599256302e-05, "loss": 0.9707, "step": 59500 }, { "epoch": 53.86877828054298, "grad_norm": 4.305111885070801, "learning_rate": 7.801757820291675e-05, "loss": 0.9303, "step": 59525 }, { "epoch": 53.89140271493213, "grad_norm": 4.907180309295654, "learning_rate": 7.795839459816396e-05, "loss": 1.0336, "step": 59550 }, { "epoch": 53.914027149321264, "grad_norm": 4.160072326660156, "learning_rate": 7.789920914828416e-05, "loss": 1.0352, "step": 59575 }, { "epoch": 53.93665158371041, "grad_norm": 5.184953212738037, "learning_rate": 7.784002189019085e-05, "loss": 0.904, "step": 59600 }, { "epoch": 53.959276018099544, "grad_norm": 5.4037699699401855, "learning_rate": 7.778083286079861e-05, "loss": 0.8632, "step": 59625 }, { "epoch": 53.98190045248869, "grad_norm": 4.708549499511719, "learning_rate": 7.772164209702321e-05, "loss": 0.8251, "step": 59650 }, { "epoch": 54.00452488687783, "grad_norm": 4.25620698928833, "learning_rate": 7.766244963578145e-05, "loss": 0.7864, "step": 59675 }, { "epoch": 54.02714932126697, "grad_norm": 4.483149528503418, "learning_rate": 7.760325551399117e-05, "loss": 0.7154, "step": 59700 }, { "epoch": 54.04977375565611, "grad_norm": 4.288634777069092, "learning_rate": 7.754405976857129e-05, "loss": 0.9173, "step": 59725 }, { "epoch": 54.07239819004525, "grad_norm": 4.393452167510986, "learning_rate": 7.748486243644173e-05, "loss": 0.9591, "step": 59750 }, { "epoch": 54.09502262443439, "grad_norm": 3.991946220397949, "learning_rate": 7.742566355452335e-05, "loss": 0.8082, "step": 59775 }, { "epoch": 54.11764705882353, "grad_norm": 3.410980701446533, "learning_rate": 7.736646315973805e-05, "loss": 0.8256, "step": 59800 }, { "epoch": 54.14027149321267, "grad_norm": 4.950623512268066, "learning_rate": 7.730726128900864e-05, "loss": 0.7366, "step": 59825 }, { "epoch": 54.16289592760181, "grad_norm": 3.422947883605957, "learning_rate": 7.724805797925886e-05, "loss": 0.7395, "step": 59850 }, { "epoch": 54.18552036199095, "grad_norm": 4.740551948547363, "learning_rate": 7.71888532674133e-05, "loss": 0.7957, "step": 59875 }, { "epoch": 54.20814479638009, "grad_norm": 5.016266822814941, "learning_rate": 7.71296471903975e-05, "loss": 0.9355, "step": 59900 }, { "epoch": 54.23076923076923, "grad_norm": 6.452232360839844, "learning_rate": 7.707043978513784e-05, "loss": 0.9089, "step": 59925 }, { "epoch": 54.25339366515837, "grad_norm": 6.509178638458252, "learning_rate": 7.701123108856147e-05, "loss": 0.8108, "step": 59950 }, { "epoch": 54.276018099547514, "grad_norm": 5.48393440246582, "learning_rate": 7.695202113759637e-05, "loss": 0.7972, "step": 59975 }, { "epoch": 54.29864253393665, "grad_norm": 4.971682071685791, "learning_rate": 7.689280996917132e-05, "loss": 0.7777, "step": 60000 }, { "epoch": 54.321266968325794, "grad_norm": 4.500193119049072, "learning_rate": 7.683359762021586e-05, "loss": 0.9309, "step": 60025 }, { "epoch": 54.34389140271493, "grad_norm": 4.3350934982299805, "learning_rate": 7.677438412766026e-05, "loss": 0.9981, "step": 60050 }, { "epoch": 54.366515837104075, "grad_norm": 4.638091087341309, "learning_rate": 7.671516952843549e-05, "loss": 0.8094, "step": 60075 }, { "epoch": 54.38914027149321, "grad_norm": 5.043220043182373, "learning_rate": 7.665595385947324e-05, "loss": 0.7614, "step": 60100 }, { "epoch": 54.411764705882355, "grad_norm": 4.639249324798584, "learning_rate": 7.659673715770582e-05, "loss": 0.8464, "step": 60125 }, { "epoch": 54.43438914027149, "grad_norm": 3.943392038345337, "learning_rate": 7.653751946006623e-05, "loss": 0.8505, "step": 60150 }, { "epoch": 54.457013574660635, "grad_norm": 3.4700372219085693, "learning_rate": 7.647830080348808e-05, "loss": 0.8874, "step": 60175 }, { "epoch": 54.47963800904977, "grad_norm": 4.969888687133789, "learning_rate": 7.641908122490556e-05, "loss": 0.9128, "step": 60200 }, { "epoch": 54.502262443438916, "grad_norm": 4.452884674072266, "learning_rate": 7.635986076125344e-05, "loss": 0.9618, "step": 60225 }, { "epoch": 54.52488687782805, "grad_norm": 4.573580265045166, "learning_rate": 7.630063944946708e-05, "loss": 0.7469, "step": 60250 }, { "epoch": 54.547511312217196, "grad_norm": 6.330535411834717, "learning_rate": 7.62414173264823e-05, "loss": 0.7804, "step": 60275 }, { "epoch": 54.57013574660633, "grad_norm": 4.171976566314697, "learning_rate": 7.618219442923547e-05, "loss": 0.8142, "step": 60300 }, { "epoch": 54.59276018099548, "grad_norm": 4.339895248413086, "learning_rate": 7.612297079466346e-05, "loss": 0.9616, "step": 60325 }, { "epoch": 54.61538461538461, "grad_norm": 4.75609016418457, "learning_rate": 7.606374645970356e-05, "loss": 1.0351, "step": 60350 }, { "epoch": 54.63800904977376, "grad_norm": 4.1576995849609375, "learning_rate": 7.600452146129352e-05, "loss": 0.7971, "step": 60375 }, { "epoch": 54.660633484162894, "grad_norm": 4.641231536865234, "learning_rate": 7.594529583637149e-05, "loss": 0.8702, "step": 60400 }, { "epoch": 54.68325791855204, "grad_norm": 3.3054144382476807, "learning_rate": 7.588606962187601e-05, "loss": 0.7996, "step": 60425 }, { "epoch": 54.705882352941174, "grad_norm": 4.372158527374268, "learning_rate": 7.582684285474603e-05, "loss": 0.9059, "step": 60450 }, { "epoch": 54.72850678733032, "grad_norm": 3.55283522605896, "learning_rate": 7.576761557192076e-05, "loss": 0.8029, "step": 60475 }, { "epoch": 54.751131221719454, "grad_norm": 3.247828960418701, "learning_rate": 7.57083878103398e-05, "loss": 0.8858, "step": 60500 }, { "epoch": 54.7737556561086, "grad_norm": 6.058444023132324, "learning_rate": 7.564915960694308e-05, "loss": 0.8342, "step": 60525 }, { "epoch": 54.796380090497735, "grad_norm": 4.797789573669434, "learning_rate": 7.558993099867068e-05, "loss": 0.8698, "step": 60550 }, { "epoch": 54.81900452488688, "grad_norm": 4.413180351257324, "learning_rate": 7.553070202246305e-05, "loss": 0.9446, "step": 60575 }, { "epoch": 54.841628959276015, "grad_norm": 4.170695781707764, "learning_rate": 7.547147271526081e-05, "loss": 0.9185, "step": 60600 }, { "epoch": 54.86425339366516, "grad_norm": 5.421101093292236, "learning_rate": 7.541224311400484e-05, "loss": 0.8482, "step": 60625 }, { "epoch": 54.886877828054295, "grad_norm": 4.560364723205566, "learning_rate": 7.535301325563611e-05, "loss": 0.8413, "step": 60650 }, { "epoch": 54.90950226244344, "grad_norm": 6.658802032470703, "learning_rate": 7.529378317709587e-05, "loss": 0.9026, "step": 60675 }, { "epoch": 54.932126696832576, "grad_norm": 2.510969400405884, "learning_rate": 7.52345529153254e-05, "loss": 0.8417, "step": 60700 }, { "epoch": 54.95475113122172, "grad_norm": 5.733388423919678, "learning_rate": 7.517532250726617e-05, "loss": 0.9093, "step": 60725 }, { "epoch": 54.977375565610856, "grad_norm": 5.590017318725586, "learning_rate": 7.511609198985969e-05, "loss": 0.8656, "step": 60750 }, { "epoch": 55.0, "grad_norm": 5.159645080566406, "learning_rate": 7.505686140004757e-05, "loss": 0.747, "step": 60775 }, { "epoch": 55.022624434389144, "grad_norm": 3.632514238357544, "learning_rate": 7.499763077477145e-05, "loss": 0.8007, "step": 60800 }, { "epoch": 55.04524886877828, "grad_norm": 4.552527904510498, "learning_rate": 7.493840015097298e-05, "loss": 0.8871, "step": 60825 }, { "epoch": 55.067873303167424, "grad_norm": 3.7181003093719482, "learning_rate": 7.487916956559385e-05, "loss": 0.8081, "step": 60850 }, { "epoch": 55.09049773755656, "grad_norm": 4.032871246337891, "learning_rate": 7.481993905557571e-05, "loss": 0.7013, "step": 60875 }, { "epoch": 55.113122171945705, "grad_norm": 4.573465824127197, "learning_rate": 7.476070865786012e-05, "loss": 0.8295, "step": 60900 }, { "epoch": 55.13574660633484, "grad_norm": 3.690110445022583, "learning_rate": 7.470147840938863e-05, "loss": 0.8161, "step": 60925 }, { "epoch": 55.158371040723985, "grad_norm": 3.9547295570373535, "learning_rate": 7.464224834710267e-05, "loss": 0.7405, "step": 60950 }, { "epoch": 55.18099547511312, "grad_norm": 4.441616535186768, "learning_rate": 7.458301850794355e-05, "loss": 0.7908, "step": 60975 }, { "epoch": 55.203619909502265, "grad_norm": 4.410475730895996, "learning_rate": 7.452378892885243e-05, "loss": 0.7324, "step": 61000 }, { "epoch": 55.2262443438914, "grad_norm": 4.958261489868164, "learning_rate": 7.446455964677036e-05, "loss": 0.7666, "step": 61025 }, { "epoch": 55.248868778280546, "grad_norm": 4.1845784187316895, "learning_rate": 7.440533069863813e-05, "loss": 0.7368, "step": 61050 }, { "epoch": 55.27149321266968, "grad_norm": 4.775341510772705, "learning_rate": 7.434610212139639e-05, "loss": 0.8197, "step": 61075 }, { "epoch": 55.294117647058826, "grad_norm": 4.15249490737915, "learning_rate": 7.428687395198551e-05, "loss": 0.8121, "step": 61100 }, { "epoch": 55.31674208144796, "grad_norm": 4.936108589172363, "learning_rate": 7.422764622734565e-05, "loss": 0.7077, "step": 61125 }, { "epoch": 55.339366515837106, "grad_norm": 3.2025458812713623, "learning_rate": 7.416841898441663e-05, "loss": 0.8949, "step": 61150 }, { "epoch": 55.36199095022624, "grad_norm": 5.4227399826049805, "learning_rate": 7.410919226013802e-05, "loss": 0.8844, "step": 61175 }, { "epoch": 55.38461538461539, "grad_norm": 5.631619930267334, "learning_rate": 7.404996609144908e-05, "loss": 0.842, "step": 61200 }, { "epoch": 55.40723981900452, "grad_norm": 5.11384391784668, "learning_rate": 7.399074051528867e-05, "loss": 0.8668, "step": 61225 }, { "epoch": 55.42986425339367, "grad_norm": 5.616774082183838, "learning_rate": 7.393151556859532e-05, "loss": 0.7929, "step": 61250 }, { "epoch": 55.452488687782804, "grad_norm": 4.66877555847168, "learning_rate": 7.387229128830714e-05, "loss": 0.6773, "step": 61275 }, { "epoch": 55.47511312217195, "grad_norm": 5.053830623626709, "learning_rate": 7.381306771136186e-05, "loss": 0.7212, "step": 61300 }, { "epoch": 55.497737556561084, "grad_norm": 4.312491416931152, "learning_rate": 7.375384487469673e-05, "loss": 0.8143, "step": 61325 }, { "epoch": 55.52036199095023, "grad_norm": 2.47932767868042, "learning_rate": 7.369462281524857e-05, "loss": 0.776, "step": 61350 }, { "epoch": 55.542986425339365, "grad_norm": NaN, "learning_rate": 7.363777040367044e-05, "loss": 0.7555, "step": 61375 }, { "epoch": 55.56561085972851, "grad_norm": 3.8299970626831055, "learning_rate": 7.357854997471195e-05, "loss": 0.7993, "step": 61400 }, { "epoch": 55.588235294117645, "grad_norm": 4.222673416137695, "learning_rate": 7.351933043230046e-05, "loss": 0.8498, "step": 61425 }, { "epoch": 55.61085972850679, "grad_norm": 4.237673282623291, "learning_rate": 7.346011181337071e-05, "loss": 0.8143, "step": 61450 }, { "epoch": 55.633484162895925, "grad_norm": 4.1661505699157715, "learning_rate": 7.34008941548569e-05, "loss": 0.8586, "step": 61475 }, { "epoch": 55.65610859728507, "grad_norm": 2.9293253421783447, "learning_rate": 7.334167749369258e-05, "loss": 0.8458, "step": 61500 }, { "epoch": 55.678733031674206, "grad_norm": 4.5011067390441895, "learning_rate": 7.328246186681073e-05, "loss": 0.8494, "step": 61525 }, { "epoch": 55.70135746606335, "grad_norm": 4.723522186279297, "learning_rate": 7.322324731114364e-05, "loss": 0.9023, "step": 61550 }, { "epoch": 55.723981900452486, "grad_norm": 2.886542797088623, "learning_rate": 7.316403386362297e-05, "loss": 0.7982, "step": 61575 }, { "epoch": 55.74660633484163, "grad_norm": 4.536559104919434, "learning_rate": 7.310482156117968e-05, "loss": 0.8375, "step": 61600 }, { "epoch": 55.76923076923077, "grad_norm": 4.458587646484375, "learning_rate": 7.304561044074399e-05, "loss": 0.7473, "step": 61625 }, { "epoch": 55.79185520361991, "grad_norm": 4.814058780670166, "learning_rate": 7.298640053924542e-05, "loss": 0.9217, "step": 61650 }, { "epoch": 55.81447963800905, "grad_norm": 4.6384196281433105, "learning_rate": 7.29271918936127e-05, "loss": 0.7983, "step": 61675 }, { "epoch": 55.83710407239819, "grad_norm": 4.325346946716309, "learning_rate": 7.286798454077377e-05, "loss": 0.8147, "step": 61700 }, { "epoch": 55.85972850678733, "grad_norm": 5.219418525695801, "learning_rate": 7.280877851765582e-05, "loss": 0.7371, "step": 61725 }, { "epoch": 55.88235294117647, "grad_norm": 3.9857089519500732, "learning_rate": 7.274957386118515e-05, "loss": 0.9557, "step": 61750 }, { "epoch": 55.90497737556561, "grad_norm": 3.2314071655273438, "learning_rate": 7.269037060828723e-05, "loss": 0.7471, "step": 61775 }, { "epoch": 55.92760180995475, "grad_norm": 4.041252613067627, "learning_rate": 7.263116879588665e-05, "loss": 0.8052, "step": 61800 }, { "epoch": 55.95022624434389, "grad_norm": 5.032653331756592, "learning_rate": 7.257196846090713e-05, "loss": 0.7964, "step": 61825 }, { "epoch": 55.97285067873303, "grad_norm": 5.317902565002441, "learning_rate": 7.251276964027141e-05, "loss": 0.8317, "step": 61850 }, { "epoch": 55.99547511312217, "grad_norm": 3.7628276348114014, "learning_rate": 7.245357237090136e-05, "loss": 0.8196, "step": 61875 }, { "epoch": 56.01809954751131, "grad_norm": 2.3814358711242676, "learning_rate": 7.23943766897178e-05, "loss": 0.6783, "step": 61900 }, { "epoch": 56.040723981900456, "grad_norm": 4.1640305519104, "learning_rate": 7.233518263364064e-05, "loss": 0.7511, "step": 61925 }, { "epoch": 56.06334841628959, "grad_norm": 4.725026607513428, "learning_rate": 7.22759902395887e-05, "loss": 0.7144, "step": 61950 }, { "epoch": 56.085972850678736, "grad_norm": 5.087151527404785, "learning_rate": 7.221679954447983e-05, "loss": 0.7817, "step": 61975 }, { "epoch": 56.10859728506787, "grad_norm": 4.164005279541016, "learning_rate": 7.21576105852308e-05, "loss": 0.7576, "step": 62000 }, { "epoch": 56.13122171945702, "grad_norm": 5.5483293533325195, "learning_rate": 7.209842339875726e-05, "loss": 0.7417, "step": 62025 }, { "epoch": 56.15384615384615, "grad_norm": 4.518779754638672, "learning_rate": 7.203923802197381e-05, "loss": 0.743, "step": 62050 }, { "epoch": 56.1764705882353, "grad_norm": 3.0123450756073, "learning_rate": 7.198005449179387e-05, "loss": 0.6979, "step": 62075 }, { "epoch": 56.199095022624434, "grad_norm": 4.908483028411865, "learning_rate": 7.192087284512977e-05, "loss": 0.7268, "step": 62100 }, { "epoch": 56.22171945701358, "grad_norm": 4.837472915649414, "learning_rate": 7.18616931188926e-05, "loss": 0.703, "step": 62125 }, { "epoch": 56.244343891402714, "grad_norm": 4.4174299240112305, "learning_rate": 7.180251534999227e-05, "loss": 0.7239, "step": 62150 }, { "epoch": 56.26696832579186, "grad_norm": 3.4075124263763428, "learning_rate": 7.174333957533752e-05, "loss": 0.7709, "step": 62175 }, { "epoch": 56.289592760180994, "grad_norm": 4.364498615264893, "learning_rate": 7.168416583183577e-05, "loss": 0.8033, "step": 62200 }, { "epoch": 56.31221719457014, "grad_norm": 3.780409336090088, "learning_rate": 7.162499415639324e-05, "loss": 0.7739, "step": 62225 }, { "epoch": 56.334841628959275, "grad_norm": 5.191378116607666, "learning_rate": 7.15658245859148e-05, "loss": 0.7106, "step": 62250 }, { "epoch": 56.35746606334842, "grad_norm": 3.34543514251709, "learning_rate": 7.150665715730405e-05, "loss": 0.6838, "step": 62275 }, { "epoch": 56.380090497737555, "grad_norm": 4.146462440490723, "learning_rate": 7.144749190746326e-05, "loss": 0.6753, "step": 62300 }, { "epoch": 56.4027149321267, "grad_norm": 6.109175682067871, "learning_rate": 7.13883288732933e-05, "loss": 0.7479, "step": 62325 }, { "epoch": 56.425339366515836, "grad_norm": 4.915993690490723, "learning_rate": 7.132916809169368e-05, "loss": 0.7604, "step": 62350 }, { "epoch": 56.44796380090498, "grad_norm": 4.397975444793701, "learning_rate": 7.127000959956248e-05, "loss": 0.8161, "step": 62375 }, { "epoch": 56.470588235294116, "grad_norm": 4.625315189361572, "learning_rate": 7.121085343379644e-05, "loss": 0.7908, "step": 62400 }, { "epoch": 56.49321266968326, "grad_norm": 3.804797649383545, "learning_rate": 7.115169963129076e-05, "loss": 0.7554, "step": 62425 }, { "epoch": 56.515837104072396, "grad_norm": 4.955995082855225, "learning_rate": 7.109254822893919e-05, "loss": 0.7662, "step": 62450 }, { "epoch": 56.53846153846154, "grad_norm": 4.094953536987305, "learning_rate": 7.103339926363398e-05, "loss": 0.7181, "step": 62475 }, { "epoch": 56.56108597285068, "grad_norm": 5.037959575653076, "learning_rate": 7.097425277226586e-05, "loss": 0.8225, "step": 62500 }, { "epoch": 56.58371040723982, "grad_norm": 4.111741542816162, "learning_rate": 7.091510879172405e-05, "loss": 0.8178, "step": 62525 }, { "epoch": 56.60633484162896, "grad_norm": 4.771442890167236, "learning_rate": 7.085596735889615e-05, "loss": 0.7598, "step": 62550 }, { "epoch": 56.6289592760181, "grad_norm": 5.622108459472656, "learning_rate": 7.079682851066821e-05, "loss": 0.8022, "step": 62575 }, { "epoch": 56.65158371040724, "grad_norm": 4.363972187042236, "learning_rate": 7.073769228392465e-05, "loss": 0.8628, "step": 62600 }, { "epoch": 56.67420814479638, "grad_norm": 3.973304271697998, "learning_rate": 7.067855871554826e-05, "loss": 0.6982, "step": 62625 }, { "epoch": 56.69683257918552, "grad_norm": 4.642250061035156, "learning_rate": 7.061942784242019e-05, "loss": 0.7985, "step": 62650 }, { "epoch": 56.71945701357466, "grad_norm": 2.3813135623931885, "learning_rate": 7.056029970141988e-05, "loss": 0.7239, "step": 62675 }, { "epoch": 56.7420814479638, "grad_norm": 3.675196647644043, "learning_rate": 7.050117432942506e-05, "loss": 0.7956, "step": 62700 }, { "epoch": 56.76470588235294, "grad_norm": 4.203518867492676, "learning_rate": 7.044205176331178e-05, "loss": 0.8164, "step": 62725 }, { "epoch": 56.78733031674208, "grad_norm": 5.610568046569824, "learning_rate": 7.038293203995428e-05, "loss": 0.82, "step": 62750 }, { "epoch": 56.80995475113122, "grad_norm": 5.120931148529053, "learning_rate": 7.032381519622508e-05, "loss": 0.801, "step": 62775 }, { "epoch": 56.83257918552036, "grad_norm": 4.9113335609436035, "learning_rate": 7.026470126899489e-05, "loss": 0.7235, "step": 62800 }, { "epoch": 56.8552036199095, "grad_norm": 4.712889671325684, "learning_rate": 7.020559029513258e-05, "loss": 0.9791, "step": 62825 }, { "epoch": 56.87782805429864, "grad_norm": 3.4738965034484863, "learning_rate": 7.014648231150519e-05, "loss": 0.7358, "step": 62850 }, { "epoch": 56.90045248868778, "grad_norm": 4.060792446136475, "learning_rate": 7.00873773549779e-05, "loss": 0.8057, "step": 62875 }, { "epoch": 56.92307692307692, "grad_norm": 4.6342644691467285, "learning_rate": 7.002827546241396e-05, "loss": 0.8536, "step": 62900 }, { "epoch": 56.94570135746606, "grad_norm": 4.327664852142334, "learning_rate": 6.99691766706748e-05, "loss": 0.7719, "step": 62925 }, { "epoch": 56.9683257918552, "grad_norm": 4.646831035614014, "learning_rate": 6.991008101661986e-05, "loss": 0.9035, "step": 62950 }, { "epoch": 56.990950226244344, "grad_norm": 4.714659690856934, "learning_rate": 6.98509885371066e-05, "loss": 0.761, "step": 62975 }, { "epoch": 57.01357466063349, "grad_norm": 4.753892421722412, "learning_rate": 6.979189926899054e-05, "loss": 0.7183, "step": 63000 }, { "epoch": 57.036199095022624, "grad_norm": 4.693076133728027, "learning_rate": 6.973281324912518e-05, "loss": 0.6936, "step": 63025 }, { "epoch": 57.05882352941177, "grad_norm": 2.9521639347076416, "learning_rate": 6.967373051436201e-05, "loss": 0.6493, "step": 63050 }, { "epoch": 57.081447963800905, "grad_norm": 5.09092378616333, "learning_rate": 6.961465110155042e-05, "loss": 0.7024, "step": 63075 }, { "epoch": 57.10407239819005, "grad_norm": 5.996335029602051, "learning_rate": 6.95555750475378e-05, "loss": 0.7514, "step": 63100 }, { "epoch": 57.126696832579185, "grad_norm": 4.089336395263672, "learning_rate": 6.949650238916941e-05, "loss": 0.6803, "step": 63125 }, { "epoch": 57.14932126696833, "grad_norm": 2.8486642837524414, "learning_rate": 6.943743316328838e-05, "loss": 0.7033, "step": 63150 }, { "epoch": 57.171945701357465, "grad_norm": 4.217557430267334, "learning_rate": 6.937836740673573e-05, "loss": 0.7176, "step": 63175 }, { "epoch": 57.19457013574661, "grad_norm": 4.2329511642456055, "learning_rate": 6.931930515635028e-05, "loss": 0.7707, "step": 63200 }, { "epoch": 57.217194570135746, "grad_norm": 6.204469203948975, "learning_rate": 6.926024644896869e-05, "loss": 0.6269, "step": 63225 }, { "epoch": 57.23981900452489, "grad_norm": 4.37061071395874, "learning_rate": 6.920119132142542e-05, "loss": 0.7308, "step": 63250 }, { "epoch": 57.262443438914026, "grad_norm": 4.992610454559326, "learning_rate": 6.914213981055264e-05, "loss": 0.8525, "step": 63275 }, { "epoch": 57.28506787330317, "grad_norm": 5.8490447998046875, "learning_rate": 6.908309195318034e-05, "loss": 0.7, "step": 63300 }, { "epoch": 57.30769230769231, "grad_norm": 4.511977672576904, "learning_rate": 6.90240477861362e-05, "loss": 0.7824, "step": 63325 }, { "epoch": 57.33031674208145, "grad_norm": 4.135030269622803, "learning_rate": 6.896500734624555e-05, "loss": 0.7225, "step": 63350 }, { "epoch": 57.35294117647059, "grad_norm": 4.676153182983398, "learning_rate": 6.890597067033148e-05, "loss": 0.7085, "step": 63375 }, { "epoch": 57.37556561085973, "grad_norm": 4.3189215660095215, "learning_rate": 6.884693779521468e-05, "loss": 0.705, "step": 63400 }, { "epoch": 57.39819004524887, "grad_norm": 4.620569705963135, "learning_rate": 6.878790875771347e-05, "loss": 0.7154, "step": 63425 }, { "epoch": 57.42081447963801, "grad_norm": 5.138914108276367, "learning_rate": 6.87288835946438e-05, "loss": 0.6988, "step": 63450 }, { "epoch": 57.44343891402715, "grad_norm": 4.84014368057251, "learning_rate": 6.86698623428192e-05, "loss": 0.6541, "step": 63475 }, { "epoch": 57.46606334841629, "grad_norm": 3.530526638031006, "learning_rate": 6.861320565493707e-05, "loss": 0.8056, "step": 63500 }, { "epoch": 57.48868778280543, "grad_norm": 5.182336807250977, "learning_rate": 6.855419217593208e-05, "loss": 0.8006, "step": 63525 }, { "epoch": 57.51131221719457, "grad_norm": 4.710208892822266, "learning_rate": 6.849518271712579e-05, "loss": 0.7378, "step": 63550 }, { "epoch": 57.53393665158371, "grad_norm": 4.753151893615723, "learning_rate": 6.843617731532191e-05, "loss": 0.7038, "step": 63575 }, { "epoch": 57.55656108597285, "grad_norm": 4.007762432098389, "learning_rate": 6.837717600732167e-05, "loss": 0.6604, "step": 63600 }, { "epoch": 57.57918552036199, "grad_norm": 4.074855804443359, "learning_rate": 6.831817882992368e-05, "loss": 0.7431, "step": 63625 }, { "epoch": 57.60180995475113, "grad_norm": 4.61572265625, "learning_rate": 6.825918581992403e-05, "loss": 0.7457, "step": 63650 }, { "epoch": 57.62443438914027, "grad_norm": 4.903160095214844, "learning_rate": 6.820019701411617e-05, "loss": 0.8099, "step": 63675 }, { "epoch": 57.64705882352941, "grad_norm": 4.184039115905762, "learning_rate": 6.814121244929096e-05, "loss": 0.7859, "step": 63700 }, { "epoch": 57.66968325791855, "grad_norm": 4.689314842224121, "learning_rate": 6.808223216223658e-05, "loss": 0.7541, "step": 63725 }, { "epoch": 57.69230769230769, "grad_norm": 4.083841323852539, "learning_rate": 6.80232561897386e-05, "loss": 0.686, "step": 63750 }, { "epoch": 57.71493212669683, "grad_norm": 3.9760990142822266, "learning_rate": 6.796428456857983e-05, "loss": 0.6643, "step": 63775 }, { "epoch": 57.737556561085974, "grad_norm": 5.40849494934082, "learning_rate": 6.79053173355404e-05, "loss": 0.7009, "step": 63800 }, { "epoch": 57.76018099547511, "grad_norm": 5.317495346069336, "learning_rate": 6.784635452739771e-05, "loss": 0.727, "step": 63825 }, { "epoch": 57.782805429864254, "grad_norm": 4.315069675445557, "learning_rate": 6.778739618092638e-05, "loss": 0.8383, "step": 63850 }, { "epoch": 57.80542986425339, "grad_norm": 4.335770130157471, "learning_rate": 6.772844233289827e-05, "loss": 0.6921, "step": 63875 }, { "epoch": 57.828054298642535, "grad_norm": 3.6915290355682373, "learning_rate": 6.766949302008243e-05, "loss": 0.7353, "step": 63900 }, { "epoch": 57.85067873303167, "grad_norm": 4.659553527832031, "learning_rate": 6.761054827924506e-05, "loss": 0.6632, "step": 63925 }, { "epoch": 57.873303167420815, "grad_norm": 4.827514171600342, "learning_rate": 6.75516081471495e-05, "loss": 0.7025, "step": 63950 }, { "epoch": 57.89592760180995, "grad_norm": 4.103525638580322, "learning_rate": 6.74926726605563e-05, "loss": 0.7812, "step": 63975 }, { "epoch": 57.918552036199095, "grad_norm": 3.6395256519317627, "learning_rate": 6.7433741856223e-05, "loss": 0.8079, "step": 64000 }, { "epoch": 57.94117647058823, "grad_norm": 4.708597660064697, "learning_rate": 6.737481577090427e-05, "loss": 0.6813, "step": 64025 }, { "epoch": 57.963800904977376, "grad_norm": 5.471441745758057, "learning_rate": 6.731589444135186e-05, "loss": 0.7924, "step": 64050 }, { "epoch": 57.98642533936652, "grad_norm": 3.4653618335723877, "learning_rate": 6.725697790431454e-05, "loss": 0.6593, "step": 64075 }, { "epoch": 58.009049773755656, "grad_norm": 4.245968341827393, "learning_rate": 6.719806619653805e-05, "loss": 0.6898, "step": 64100 }, { "epoch": 58.0316742081448, "grad_norm": 5.01800012588501, "learning_rate": 6.713915935476516e-05, "loss": 0.6993, "step": 64125 }, { "epoch": 58.05429864253394, "grad_norm": 5.440726280212402, "learning_rate": 6.70802574157356e-05, "loss": 0.5498, "step": 64150 }, { "epoch": 58.07692307692308, "grad_norm": 2.5011403560638428, "learning_rate": 6.702136041618605e-05, "loss": 0.6508, "step": 64175 }, { "epoch": 58.09954751131222, "grad_norm": 4.115577697753906, "learning_rate": 6.696246839285009e-05, "loss": 0.6571, "step": 64200 }, { "epoch": 58.12217194570136, "grad_norm": 3.631739377975464, "learning_rate": 6.690358138245819e-05, "loss": 0.6546, "step": 64225 }, { "epoch": 58.1447963800905, "grad_norm": 4.247576713562012, "learning_rate": 6.684469942173772e-05, "loss": 0.6615, "step": 64250 }, { "epoch": 58.16742081447964, "grad_norm": 4.359410762786865, "learning_rate": 6.678582254741286e-05, "loss": 0.5832, "step": 64275 }, { "epoch": 58.19004524886878, "grad_norm": 2.6934914588928223, "learning_rate": 6.672695079620469e-05, "loss": 0.6694, "step": 64300 }, { "epoch": 58.21266968325792, "grad_norm": 3.67513108253479, "learning_rate": 6.666808420483102e-05, "loss": 0.7659, "step": 64325 }, { "epoch": 58.23529411764706, "grad_norm": 4.915542125701904, "learning_rate": 6.660922281000649e-05, "loss": 0.6633, "step": 64350 }, { "epoch": 58.2579185520362, "grad_norm": 3.5720250606536865, "learning_rate": 6.655036664844245e-05, "loss": 0.6374, "step": 64375 }, { "epoch": 58.28054298642534, "grad_norm": 5.004158020019531, "learning_rate": 6.649151575684705e-05, "loss": 0.6356, "step": 64400 }, { "epoch": 58.30316742081448, "grad_norm": 3.67757511138916, "learning_rate": 6.643267017192509e-05, "loss": 0.6521, "step": 64425 }, { "epoch": 58.32579185520362, "grad_norm": 3.7194998264312744, "learning_rate": 6.63738299303781e-05, "loss": 0.6626, "step": 64450 }, { "epoch": 58.34841628959276, "grad_norm": 4.588992595672607, "learning_rate": 6.63149950689043e-05, "loss": 0.6551, "step": 64475 }, { "epoch": 58.3710407239819, "grad_norm": 4.48909330368042, "learning_rate": 6.625616562419846e-05, "loss": 0.6209, "step": 64500 }, { "epoch": 58.39366515837104, "grad_norm": 5.264707565307617, "learning_rate": 6.619734163295209e-05, "loss": 0.7786, "step": 64525 }, { "epoch": 58.41628959276018, "grad_norm": 4.463858604431152, "learning_rate": 6.613852313185321e-05, "loss": 0.7384, "step": 64550 }, { "epoch": 58.43891402714932, "grad_norm": 4.5495829582214355, "learning_rate": 6.607971015758645e-05, "loss": 0.6313, "step": 64575 }, { "epoch": 58.46153846153846, "grad_norm": 4.662189483642578, "learning_rate": 6.602090274683301e-05, "loss": 0.63, "step": 64600 }, { "epoch": 58.484162895927604, "grad_norm": 4.075993537902832, "learning_rate": 6.596210093627058e-05, "loss": 0.6615, "step": 64625 }, { "epoch": 58.50678733031674, "grad_norm": 3.6193978786468506, "learning_rate": 6.590330476257338e-05, "loss": 0.7725, "step": 64650 }, { "epoch": 58.529411764705884, "grad_norm": 4.732771873474121, "learning_rate": 6.584451426241213e-05, "loss": 0.6606, "step": 64675 }, { "epoch": 58.55203619909502, "grad_norm": 4.847504615783691, "learning_rate": 6.578572947245397e-05, "loss": 0.7519, "step": 64700 }, { "epoch": 58.574660633484164, "grad_norm": 5.2629075050354, "learning_rate": 6.572695042936253e-05, "loss": 0.7245, "step": 64725 }, { "epoch": 58.5972850678733, "grad_norm": 3.563847541809082, "learning_rate": 6.56681771697978e-05, "loss": 0.6861, "step": 64750 }, { "epoch": 58.619909502262445, "grad_norm": 3.5651309490203857, "learning_rate": 6.560940973041621e-05, "loss": 0.7774, "step": 64775 }, { "epoch": 58.64253393665158, "grad_norm": 4.128613471984863, "learning_rate": 6.555064814787053e-05, "loss": 0.6914, "step": 64800 }, { "epoch": 58.665158371040725, "grad_norm": 4.838799476623535, "learning_rate": 6.54918924588099e-05, "loss": 0.7919, "step": 64825 }, { "epoch": 58.68778280542986, "grad_norm": 3.5059125423431396, "learning_rate": 6.543314269987974e-05, "loss": 0.7642, "step": 64850 }, { "epoch": 58.710407239819006, "grad_norm": 4.279022693634033, "learning_rate": 6.537439890772185e-05, "loss": 0.7714, "step": 64875 }, { "epoch": 58.73303167420814, "grad_norm": 3.9953958988189697, "learning_rate": 6.531566111897426e-05, "loss": 0.7696, "step": 64900 }, { "epoch": 58.755656108597286, "grad_norm": 5.27321195602417, "learning_rate": 6.525692937027122e-05, "loss": 0.5896, "step": 64925 }, { "epoch": 58.77828054298642, "grad_norm": 5.410314083099365, "learning_rate": 6.519820369824329e-05, "loss": 0.6908, "step": 64950 }, { "epoch": 58.800904977375566, "grad_norm": 4.452655792236328, "learning_rate": 6.513948413951717e-05, "loss": 0.7339, "step": 64975 }, { "epoch": 58.8235294117647, "grad_norm": 2.9418070316314697, "learning_rate": 6.50807707307158e-05, "loss": 0.7615, "step": 65000 }, { "epoch": 58.84615384615385, "grad_norm": 4.953251361846924, "learning_rate": 6.502206350845825e-05, "loss": 0.6923, "step": 65025 }, { "epoch": 58.86877828054298, "grad_norm": 3.1625888347625732, "learning_rate": 6.496336250935975e-05, "loss": 0.6791, "step": 65050 }, { "epoch": 58.89140271493213, "grad_norm": 4.483161926269531, "learning_rate": 6.490466777003164e-05, "loss": 0.7056, "step": 65075 }, { "epoch": 58.914027149321264, "grad_norm": 4.233424186706543, "learning_rate": 6.484597932708136e-05, "loss": 0.7401, "step": 65100 }, { "epoch": 58.93665158371041, "grad_norm": 4.563268661499023, "learning_rate": 6.478729721711243e-05, "loss": 0.6932, "step": 65125 }, { "epoch": 58.959276018099544, "grad_norm": 5.159553050994873, "learning_rate": 6.472862147672439e-05, "loss": 0.691, "step": 65150 }, { "epoch": 58.98190045248869, "grad_norm": 3.6239402294158936, "learning_rate": 6.466995214251286e-05, "loss": 0.6401, "step": 65175 }, { "epoch": 59.00452488687783, "grad_norm": 3.668715476989746, "learning_rate": 6.46112892510694e-05, "loss": 0.7049, "step": 65200 }, { "epoch": 59.02714932126697, "grad_norm": 4.769604206085205, "learning_rate": 6.45526328389816e-05, "loss": 0.6767, "step": 65225 }, { "epoch": 59.04977375565611, "grad_norm": 3.7245876789093018, "learning_rate": 6.449398294283301e-05, "loss": 0.5708, "step": 65250 }, { "epoch": 59.07239819004525, "grad_norm": 4.986959457397461, "learning_rate": 6.443533959920309e-05, "loss": 0.8017, "step": 65275 }, { "epoch": 59.09502262443439, "grad_norm": 5.122184753417969, "learning_rate": 6.43767028446672e-05, "loss": 0.6403, "step": 65300 }, { "epoch": 59.11764705882353, "grad_norm": 4.31292200088501, "learning_rate": 6.431807271579664e-05, "loss": 0.6101, "step": 65325 }, { "epoch": 59.14027149321267, "grad_norm": 5.1685309410095215, "learning_rate": 6.425944924915857e-05, "loss": 0.5861, "step": 65350 }, { "epoch": 59.16289592760181, "grad_norm": 3.9274120330810547, "learning_rate": 6.420083248131591e-05, "loss": 0.6415, "step": 65375 }, { "epoch": 59.18552036199095, "grad_norm": 3.911078929901123, "learning_rate": 6.414222244882754e-05, "loss": 0.6568, "step": 65400 }, { "epoch": 59.20814479638009, "grad_norm": 3.8726963996887207, "learning_rate": 6.408361918824803e-05, "loss": 0.5736, "step": 65425 }, { "epoch": 59.23076923076923, "grad_norm": 3.2742345333099365, "learning_rate": 6.402502273612777e-05, "loss": 0.6087, "step": 65450 }, { "epoch": 59.25339366515837, "grad_norm": 4.598509788513184, "learning_rate": 6.396643312901291e-05, "loss": 0.7204, "step": 65475 }, { "epoch": 59.276018099547514, "grad_norm": 4.440429210662842, "learning_rate": 6.39078504034453e-05, "loss": 0.6373, "step": 65500 }, { "epoch": 59.29864253393665, "grad_norm": 4.567727565765381, "learning_rate": 6.384927459596254e-05, "loss": 0.6433, "step": 65525 }, { "epoch": 59.321266968325794, "grad_norm": 3.317457914352417, "learning_rate": 6.379070574309786e-05, "loss": 0.7142, "step": 65550 }, { "epoch": 59.34389140271493, "grad_norm": 4.630799770355225, "learning_rate": 6.37321438813802e-05, "loss": 0.5899, "step": 65575 }, { "epoch": 59.366515837104075, "grad_norm": 3.2978603839874268, "learning_rate": 6.367358904733413e-05, "loss": 0.6016, "step": 65600 }, { "epoch": 59.38914027149321, "grad_norm": 3.8394994735717773, "learning_rate": 6.361738305218345e-05, "loss": 0.6981, "step": 65625 }, { "epoch": 59.411764705882355, "grad_norm": 1.3056546449661255, "learning_rate": 6.355884209830735e-05, "loss": 0.6676, "step": 65650 }, { "epoch": 59.43438914027149, "grad_norm": 4.848611354827881, "learning_rate": 6.350030828018976e-05, "loss": 0.7122, "step": 65675 }, { "epoch": 59.457013574660635, "grad_norm": 2.762185573577881, "learning_rate": 6.344178163433774e-05, "loss": 0.6294, "step": 65700 }, { "epoch": 59.47963800904977, "grad_norm": 4.507059097290039, "learning_rate": 6.338326219725394e-05, "loss": 0.5763, "step": 65725 }, { "epoch": 59.502262443438916, "grad_norm": 4.1835222244262695, "learning_rate": 6.332475000543644e-05, "loss": 0.6056, "step": 65750 }, { "epoch": 59.52488687782805, "grad_norm": 4.698659420013428, "learning_rate": 6.326624509537881e-05, "loss": 0.662, "step": 65775 }, { "epoch": 59.547511312217196, "grad_norm": 4.21934175491333, "learning_rate": 6.32077475035701e-05, "loss": 0.6435, "step": 65800 }, { "epoch": 59.57013574660633, "grad_norm": 3.255751609802246, "learning_rate": 6.31492572664948e-05, "loss": 0.7144, "step": 65825 }, { "epoch": 59.59276018099548, "grad_norm": 3.099938154220581, "learning_rate": 6.30907744206328e-05, "loss": 0.76, "step": 65850 }, { "epoch": 59.61538461538461, "grad_norm": 4.311501979827881, "learning_rate": 6.303229900245936e-05, "loss": 0.6285, "step": 65875 }, { "epoch": 59.63800904977376, "grad_norm": 3.967350482940674, "learning_rate": 6.297383104844517e-05, "loss": 0.7041, "step": 65900 }, { "epoch": 59.660633484162894, "grad_norm": 3.6102454662323, "learning_rate": 6.29153705950562e-05, "loss": 0.6672, "step": 65925 }, { "epoch": 59.68325791855204, "grad_norm": 4.378409385681152, "learning_rate": 6.285691767875377e-05, "loss": 0.6725, "step": 65950 }, { "epoch": 59.705882352941174, "grad_norm": 4.492663860321045, "learning_rate": 6.27984723359945e-05, "loss": 0.6248, "step": 65975 }, { "epoch": 59.72850678733032, "grad_norm": 4.401359558105469, "learning_rate": 6.274003460323027e-05, "loss": 0.652, "step": 66000 }, { "epoch": 59.751131221719454, "grad_norm": 4.687775611877441, "learning_rate": 6.268160451690824e-05, "loss": 0.675, "step": 66025 }, { "epoch": 59.7737556561086, "grad_norm": 5.653653621673584, "learning_rate": 6.262318211347079e-05, "loss": 0.6625, "step": 66050 }, { "epoch": 59.796380090497735, "grad_norm": 4.8085784912109375, "learning_rate": 6.256476742935548e-05, "loss": 0.6134, "step": 66075 }, { "epoch": 59.81900452488688, "grad_norm": 3.568744421005249, "learning_rate": 6.250636050099509e-05, "loss": 0.5429, "step": 66100 }, { "epoch": 59.841628959276015, "grad_norm": 4.331258773803711, "learning_rate": 6.244796136481757e-05, "loss": 0.7067, "step": 66125 }, { "epoch": 59.86425339366516, "grad_norm": 5.338553428649902, "learning_rate": 6.238957005724598e-05, "loss": 0.677, "step": 66150 }, { "epoch": 59.886877828054295, "grad_norm": 3.8364171981811523, "learning_rate": 6.233118661469852e-05, "loss": 0.6128, "step": 66175 }, { "epoch": 59.90950226244344, "grad_norm": 3.696770191192627, "learning_rate": 6.227281107358846e-05, "loss": 0.792, "step": 66200 }, { "epoch": 59.932126696832576, "grad_norm": 5.0461106300354, "learning_rate": 6.221444347032417e-05, "loss": 0.6329, "step": 66225 }, { "epoch": 59.95475113122172, "grad_norm": 4.868934631347656, "learning_rate": 6.215608384130905e-05, "loss": 0.627, "step": 66250 }, { "epoch": 59.977375565610856, "grad_norm": 3.4682068824768066, "learning_rate": 6.209773222294153e-05, "loss": 0.5947, "step": 66275 }, { "epoch": 60.0, "grad_norm": 5.2894062995910645, "learning_rate": 6.203938865161506e-05, "loss": 0.6666, "step": 66300 }, { "epoch": 60.022624434389144, "grad_norm": 3.4546284675598145, "learning_rate": 6.198105316371804e-05, "loss": 0.6174, "step": 66325 }, { "epoch": 60.04524886877828, "grad_norm": 4.68898344039917, "learning_rate": 6.192272579563387e-05, "loss": 0.5547, "step": 66350 }, { "epoch": 60.067873303167424, "grad_norm": 1.7499679327011108, "learning_rate": 6.186440658374084e-05, "loss": 0.5807, "step": 66375 }, { "epoch": 60.09049773755656, "grad_norm": 5.337199687957764, "learning_rate": 6.180609556441219e-05, "loss": 0.6037, "step": 66400 }, { "epoch": 60.113122171945705, "grad_norm": 3.1942121982574463, "learning_rate": 6.174779277401602e-05, "loss": 0.6353, "step": 66425 }, { "epoch": 60.13574660633484, "grad_norm": 6.741465091705322, "learning_rate": 6.168949824891534e-05, "loss": 0.6482, "step": 66450 }, { "epoch": 60.158371040723985, "grad_norm": 5.219030857086182, "learning_rate": 6.163121202546794e-05, "loss": 0.6007, "step": 66475 }, { "epoch": 60.18099547511312, "grad_norm": 4.876162052154541, "learning_rate": 6.157293414002651e-05, "loss": 0.6126, "step": 66500 }, { "epoch": 60.203619909502265, "grad_norm": 4.365516185760498, "learning_rate": 6.151466462893847e-05, "loss": 0.5603, "step": 66525 }, { "epoch": 60.2262443438914, "grad_norm": 4.76017951965332, "learning_rate": 6.145640352854606e-05, "loss": 0.5364, "step": 66550 }, { "epoch": 60.248868778280546, "grad_norm": 2.8500778675079346, "learning_rate": 6.139815087518626e-05, "loss": 0.6049, "step": 66575 }, { "epoch": 60.27149321266968, "grad_norm": 4.174798011779785, "learning_rate": 6.133990670519079e-05, "loss": 0.7041, "step": 66600 }, { "epoch": 60.294117647058826, "grad_norm": 5.168081283569336, "learning_rate": 6.128167105488605e-05, "loss": 0.7466, "step": 66625 }, { "epoch": 60.31674208144796, "grad_norm": 5.011589527130127, "learning_rate": 6.122344396059319e-05, "loss": 0.616, "step": 66650 }, { "epoch": 60.339366515837106, "grad_norm": 2.1540355682373047, "learning_rate": 6.116522545862794e-05, "loss": 0.5621, "step": 66675 }, { "epoch": 60.36199095022624, "grad_norm": 3.5327677726745605, "learning_rate": 6.110701558530073e-05, "loss": 0.6676, "step": 66700 }, { "epoch": 60.38461538461539, "grad_norm": 3.2821593284606934, "learning_rate": 6.10488143769166e-05, "loss": 0.5885, "step": 66725 }, { "epoch": 60.40723981900452, "grad_norm": 2.0960774421691895, "learning_rate": 6.099062186977516e-05, "loss": 0.6644, "step": 66750 }, { "epoch": 60.42986425339367, "grad_norm": 2.393394708633423, "learning_rate": 6.093243810017062e-05, "loss": 0.5608, "step": 66775 }, { "epoch": 60.452488687782804, "grad_norm": 4.688882827758789, "learning_rate": 6.087426310439174e-05, "loss": 0.6636, "step": 66800 }, { "epoch": 60.47511312217195, "grad_norm": 4.645911693572998, "learning_rate": 6.081609691872178e-05, "loss": 0.5493, "step": 66825 }, { "epoch": 60.497737556561084, "grad_norm": 3.82240891456604, "learning_rate": 6.075793957943854e-05, "loss": 0.5768, "step": 66850 }, { "epoch": 60.52036199095023, "grad_norm": 4.518941402435303, "learning_rate": 6.0699791122814275e-05, "loss": 0.6824, "step": 66875 }, { "epoch": 60.542986425339365, "grad_norm": 4.387720108032227, "learning_rate": 6.0641651585115715e-05, "loss": 0.7327, "step": 66900 }, { "epoch": 60.56561085972851, "grad_norm": 5.1645331382751465, "learning_rate": 6.058352100260403e-05, "loss": 0.592, "step": 66925 }, { "epoch": 60.588235294117645, "grad_norm": 3.3075625896453857, "learning_rate": 6.052539941153477e-05, "loss": 0.6545, "step": 66950 }, { "epoch": 60.61085972850679, "grad_norm": 2.7808585166931152, "learning_rate": 6.0467286848157954e-05, "loss": 0.6129, "step": 66975 }, { "epoch": 60.633484162895925, "grad_norm": 4.265286445617676, "learning_rate": 6.04091833487179e-05, "loss": 0.6214, "step": 67000 }, { "epoch": 60.65610859728507, "grad_norm": 4.914118766784668, "learning_rate": 6.035108894945328e-05, "loss": 0.5254, "step": 67025 }, { "epoch": 60.678733031674206, "grad_norm": 5.129421710968018, "learning_rate": 6.029300368659712e-05, "loss": 0.674, "step": 67050 }, { "epoch": 60.70135746606335, "grad_norm": 4.754515647888184, "learning_rate": 6.0234927596376744e-05, "loss": 0.6048, "step": 67075 }, { "epoch": 60.723981900452486, "grad_norm": 3.9844114780426025, "learning_rate": 6.0176860715013715e-05, "loss": 0.6366, "step": 67100 }, { "epoch": 60.74660633484163, "grad_norm": 3.719999074935913, "learning_rate": 6.011880307872391e-05, "loss": 0.6566, "step": 67125 }, { "epoch": 60.76923076923077, "grad_norm": 4.930984973907471, "learning_rate": 6.00607547237174e-05, "loss": 0.6577, "step": 67150 }, { "epoch": 60.79185520361991, "grad_norm": 3.436265707015991, "learning_rate": 6.000271568619847e-05, "loss": 0.6, "step": 67175 }, { "epoch": 60.81447963800905, "grad_norm": 5.183216571807861, "learning_rate": 5.994468600236561e-05, "loss": 0.675, "step": 67200 }, { "epoch": 60.83710407239819, "grad_norm": 3.767946243286133, "learning_rate": 5.988666570841148e-05, "loss": 0.6304, "step": 67225 }, { "epoch": 60.85972850678733, "grad_norm": 2.762791872024536, "learning_rate": 5.9828654840522855e-05, "loss": 0.5571, "step": 67250 }, { "epoch": 60.88235294117647, "grad_norm": 4.998724937438965, "learning_rate": 5.977065343488066e-05, "loss": 0.6721, "step": 67275 }, { "epoch": 60.90497737556561, "grad_norm": 3.832132577896118, "learning_rate": 5.97126615276599e-05, "loss": 0.5589, "step": 67300 }, { "epoch": 60.92760180995475, "grad_norm": 3.30859375, "learning_rate": 5.965467915502967e-05, "loss": 0.5113, "step": 67325 }, { "epoch": 60.95022624434389, "grad_norm": 4.903571128845215, "learning_rate": 5.959670635315308e-05, "loss": 0.6619, "step": 67350 }, { "epoch": 60.97285067873303, "grad_norm": 4.615489959716797, "learning_rate": 5.953874315818736e-05, "loss": 0.5516, "step": 67375 }, { "epoch": 60.99547511312217, "grad_norm": 4.0281243324279785, "learning_rate": 5.948078960628366e-05, "loss": 0.5842, "step": 67400 }, { "epoch": 61.01809954751131, "grad_norm": 3.07563853263855, "learning_rate": 5.942284573358712e-05, "loss": 0.62, "step": 67425 }, { "epoch": 61.040723981900456, "grad_norm": 4.654354095458984, "learning_rate": 5.9364911576236915e-05, "loss": 0.5309, "step": 67450 }, { "epoch": 61.06334841628959, "grad_norm": 4.621328830718994, "learning_rate": 5.930698717036609e-05, "loss": 0.6051, "step": 67475 }, { "epoch": 61.085972850678736, "grad_norm": 3.6699090003967285, "learning_rate": 5.9249072552101643e-05, "loss": 0.5934, "step": 67500 }, { "epoch": 61.10859728506787, "grad_norm": 3.4764626026153564, "learning_rate": 5.919116775756445e-05, "loss": 0.6266, "step": 67525 }, { "epoch": 61.13122171945702, "grad_norm": 4.3350510597229, "learning_rate": 5.9133272822869274e-05, "loss": 0.4966, "step": 67550 }, { "epoch": 61.15384615384615, "grad_norm": 2.9503839015960693, "learning_rate": 5.907538778412471e-05, "loss": 0.5372, "step": 67575 }, { "epoch": 61.1764705882353, "grad_norm": 3.6014959812164307, "learning_rate": 5.901751267743321e-05, "loss": 0.5589, "step": 67600 }, { "epoch": 61.199095022624434, "grad_norm": 2.387441635131836, "learning_rate": 5.8959647538891e-05, "loss": 0.568, "step": 67625 }, { "epoch": 61.22171945701358, "grad_norm": 4.004934310913086, "learning_rate": 5.8901792404588097e-05, "loss": 0.5501, "step": 67650 }, { "epoch": 61.244343891402714, "grad_norm": 4.333060264587402, "learning_rate": 5.8843947310608306e-05, "loss": 0.4982, "step": 67675 }, { "epoch": 61.26696832579186, "grad_norm": 3.765822649002075, "learning_rate": 5.878611229302914e-05, "loss": 0.5822, "step": 67700 }, { "epoch": 61.289592760180994, "grad_norm": 4.504117012023926, "learning_rate": 5.872828738792183e-05, "loss": 0.6039, "step": 67725 }, { "epoch": 61.31221719457014, "grad_norm": 4.02402925491333, "learning_rate": 5.867047263135131e-05, "loss": 0.5885, "step": 67750 }, { "epoch": 61.334841628959275, "grad_norm": 4.467599391937256, "learning_rate": 5.8612668059376184e-05, "loss": 0.5821, "step": 67775 }, { "epoch": 61.35746606334842, "grad_norm": 4.618236064910889, "learning_rate": 5.8554873708048695e-05, "loss": 0.5983, "step": 67800 }, { "epoch": 61.380090497737555, "grad_norm": 4.545291900634766, "learning_rate": 5.849708961341472e-05, "loss": 0.6362, "step": 67825 }, { "epoch": 61.4027149321267, "grad_norm": 4.7155585289001465, "learning_rate": 5.843931581151373e-05, "loss": 0.5973, "step": 67850 }, { "epoch": 61.425339366515836, "grad_norm": 3.715583086013794, "learning_rate": 5.838155233837879e-05, "loss": 0.6329, "step": 67875 }, { "epoch": 61.44796380090498, "grad_norm": 4.6777472496032715, "learning_rate": 5.832379923003652e-05, "loss": 0.5239, "step": 67900 }, { "epoch": 61.470588235294116, "grad_norm": 2.7520992755889893, "learning_rate": 5.826605652250705e-05, "loss": 0.5685, "step": 67925 }, { "epoch": 61.49321266968326, "grad_norm": 3.158763885498047, "learning_rate": 5.820832425180407e-05, "loss": 0.5413, "step": 67950 }, { "epoch": 61.515837104072396, "grad_norm": 3.884981632232666, "learning_rate": 5.8150602453934725e-05, "loss": 0.5559, "step": 67975 }, { "epoch": 61.53846153846154, "grad_norm": 1.9774912595748901, "learning_rate": 5.809289116489963e-05, "loss": 0.527, "step": 68000 }, { "epoch": 61.56108597285068, "grad_norm": 4.468634605407715, "learning_rate": 5.8035190420692854e-05, "loss": 0.6478, "step": 68025 }, { "epoch": 61.58371040723982, "grad_norm": 4.440135955810547, "learning_rate": 5.79775002573019e-05, "loss": 0.702, "step": 68050 }, { "epoch": 61.60633484162896, "grad_norm": 3.9630126953125, "learning_rate": 5.791982071070765e-05, "loss": 0.4886, "step": 68075 }, { "epoch": 61.6289592760181, "grad_norm": 3.3422679901123047, "learning_rate": 5.786215181688438e-05, "loss": 0.5723, "step": 68100 }, { "epoch": 61.65158371040724, "grad_norm": 3.2867820262908936, "learning_rate": 5.780449361179972e-05, "loss": 0.6226, "step": 68125 }, { "epoch": 61.67420814479638, "grad_norm": 3.681912899017334, "learning_rate": 5.774684613141463e-05, "loss": 0.6728, "step": 68150 }, { "epoch": 61.69683257918552, "grad_norm": 4.133671760559082, "learning_rate": 5.768920941168337e-05, "loss": 0.6028, "step": 68175 }, { "epoch": 61.71945701357466, "grad_norm": 5.205613136291504, "learning_rate": 5.763158348855351e-05, "loss": 0.6276, "step": 68200 }, { "epoch": 61.7420814479638, "grad_norm": 4.072926044464111, "learning_rate": 5.757396839796589e-05, "loss": 0.5494, "step": 68225 }, { "epoch": 61.76470588235294, "grad_norm": 4.312483310699463, "learning_rate": 5.751636417585455e-05, "loss": 0.5893, "step": 68250 }, { "epoch": 61.78733031674208, "grad_norm": 1.9435641765594482, "learning_rate": 5.7458770858146817e-05, "loss": 0.6388, "step": 68275 }, { "epoch": 61.80995475113122, "grad_norm": 3.7756338119506836, "learning_rate": 5.740118848076314e-05, "loss": 0.5754, "step": 68300 }, { "epoch": 61.83257918552036, "grad_norm": 4.449934005737305, "learning_rate": 5.7343617079617225e-05, "loss": 0.5657, "step": 68325 }, { "epoch": 61.8552036199095, "grad_norm": 4.232459545135498, "learning_rate": 5.728605669061587e-05, "loss": 0.5684, "step": 68350 }, { "epoch": 61.87782805429864, "grad_norm": 3.241680383682251, "learning_rate": 5.7228507349659045e-05, "loss": 0.5568, "step": 68375 }, { "epoch": 61.90045248868778, "grad_norm": 4.112957000732422, "learning_rate": 5.71709690926398e-05, "loss": 0.6108, "step": 68400 }, { "epoch": 61.92307692307692, "grad_norm": 4.771388053894043, "learning_rate": 5.711344195544428e-05, "loss": 0.5494, "step": 68425 }, { "epoch": 61.94570135746606, "grad_norm": 2.903620958328247, "learning_rate": 5.705592597395171e-05, "loss": 0.6083, "step": 68450 }, { "epoch": 61.9683257918552, "grad_norm": 3.416398763656616, "learning_rate": 5.699842118403436e-05, "loss": 0.532, "step": 68475 }, { "epoch": 61.990950226244344, "grad_norm": 4.085901737213135, "learning_rate": 5.694092762155747e-05, "loss": 0.6211, "step": 68500 }, { "epoch": 62.01357466063349, "grad_norm": 5.162738800048828, "learning_rate": 5.688344532237935e-05, "loss": 0.5795, "step": 68525 }, { "epoch": 62.036199095022624, "grad_norm": 3.3909380435943604, "learning_rate": 5.682597432235123e-05, "loss": 0.4504, "step": 68550 }, { "epoch": 62.05882352941177, "grad_norm": 4.008853435516357, "learning_rate": 5.676851465731733e-05, "loss": 0.5542, "step": 68575 }, { "epoch": 62.081447963800905, "grad_norm": 4.662866115570068, "learning_rate": 5.671106636311477e-05, "loss": 0.5655, "step": 68600 }, { "epoch": 62.10407239819005, "grad_norm": 3.8678126335144043, "learning_rate": 5.665362947557359e-05, "loss": 0.6555, "step": 68625 }, { "epoch": 62.126696832579185, "grad_norm": 3.644484043121338, "learning_rate": 5.659620403051671e-05, "loss": 0.5688, "step": 68650 }, { "epoch": 62.14932126696833, "grad_norm": 3.2867202758789062, "learning_rate": 5.653879006375994e-05, "loss": 0.6283, "step": 68675 }, { "epoch": 62.171945701357465, "grad_norm": 3.1707193851470947, "learning_rate": 5.648138761111189e-05, "loss": 0.5258, "step": 68700 }, { "epoch": 62.19457013574661, "grad_norm": 4.200314998626709, "learning_rate": 5.642399670837403e-05, "loss": 0.4637, "step": 68725 }, { "epoch": 62.217194570135746, "grad_norm": 5.262591361999512, "learning_rate": 5.636661739134059e-05, "loss": 0.6027, "step": 68750 }, { "epoch": 62.23981900452489, "grad_norm": 3.71937894821167, "learning_rate": 5.6309249695798596e-05, "loss": 0.6497, "step": 68775 }, { "epoch": 62.262443438914026, "grad_norm": 2.9670209884643555, "learning_rate": 5.625189365752782e-05, "loss": 0.6145, "step": 68800 }, { "epoch": 62.28506787330317, "grad_norm": 4.04097318649292, "learning_rate": 5.619454931230076e-05, "loss": 0.5354, "step": 68825 }, { "epoch": 62.30769230769231, "grad_norm": 3.8297502994537354, "learning_rate": 5.613721669588264e-05, "loss": 0.5392, "step": 68850 }, { "epoch": 62.33031674208145, "grad_norm": 3.143273115158081, "learning_rate": 5.607989584403133e-05, "loss": 0.6856, "step": 68875 }, { "epoch": 62.35294117647059, "grad_norm": 3.210341453552246, "learning_rate": 5.60225867924974e-05, "loss": 0.5466, "step": 68900 }, { "epoch": 62.37556561085973, "grad_norm": 3.1118643283843994, "learning_rate": 5.596528957702405e-05, "loss": 0.4679, "step": 68925 }, { "epoch": 62.39819004524887, "grad_norm": 3.7933237552642822, "learning_rate": 5.5908004233347086e-05, "loss": 0.5791, "step": 68950 }, { "epoch": 62.42081447963801, "grad_norm": 3.7283291816711426, "learning_rate": 5.585073079719492e-05, "loss": 0.609, "step": 68975 }, { "epoch": 62.44343891402715, "grad_norm": 5.116457462310791, "learning_rate": 5.579346930428853e-05, "loss": 0.564, "step": 69000 }, { "epoch": 62.46606334841629, "grad_norm": 4.880287170410156, "learning_rate": 5.573621979034147e-05, "loss": 0.5384, "step": 69025 }, { "epoch": 62.48868778280543, "grad_norm": 2.7976601123809814, "learning_rate": 5.567898229105977e-05, "loss": 0.598, "step": 69050 }, { "epoch": 62.51131221719457, "grad_norm": 6.518646240234375, "learning_rate": 5.5621756842142026e-05, "loss": 0.6078, "step": 69075 }, { "epoch": 62.53393665158371, "grad_norm": 3.570544958114624, "learning_rate": 5.556454347927929e-05, "loss": 0.575, "step": 69100 }, { "epoch": 62.55656108597285, "grad_norm": 3.5969743728637695, "learning_rate": 5.5507342238155075e-05, "loss": 0.4654, "step": 69125 }, { "epoch": 62.57918552036199, "grad_norm": 3.625481367111206, "learning_rate": 5.545015315444533e-05, "loss": 0.5297, "step": 69150 }, { "epoch": 62.60180995475113, "grad_norm": 3.9849331378936768, "learning_rate": 5.539297626381843e-05, "loss": 0.495, "step": 69175 }, { "epoch": 62.62443438914027, "grad_norm": 4.907066345214844, "learning_rate": 5.533581160193515e-05, "loss": 0.5037, "step": 69200 }, { "epoch": 62.64705882352941, "grad_norm": 1.6982041597366333, "learning_rate": 5.527865920444863e-05, "loss": 0.5503, "step": 69225 }, { "epoch": 62.66968325791855, "grad_norm": 3.2752087116241455, "learning_rate": 5.522151910700436e-05, "loss": 0.6308, "step": 69250 }, { "epoch": 62.69230769230769, "grad_norm": 3.391486644744873, "learning_rate": 5.516439134524016e-05, "loss": 0.5923, "step": 69275 }, { "epoch": 62.71493212669683, "grad_norm": 2.5114595890045166, "learning_rate": 5.5107275954786155e-05, "loss": 0.5571, "step": 69300 }, { "epoch": 62.737556561085974, "grad_norm": 4.318356513977051, "learning_rate": 5.505017297126477e-05, "loss": 0.5301, "step": 69325 }, { "epoch": 62.76018099547511, "grad_norm": 5.065729141235352, "learning_rate": 5.499308243029066e-05, "loss": 0.4962, "step": 69350 }, { "epoch": 62.782805429864254, "grad_norm": 3.5812008380889893, "learning_rate": 5.493600436747075e-05, "loss": 0.6212, "step": 69375 }, { "epoch": 62.80542986425339, "grad_norm": 5.2495622634887695, "learning_rate": 5.487893881840417e-05, "loss": 0.5656, "step": 69400 }, { "epoch": 62.828054298642535, "grad_norm": 3.9354496002197266, "learning_rate": 5.482188581868223e-05, "loss": 0.5469, "step": 69425 }, { "epoch": 62.85067873303167, "grad_norm": 4.970963478088379, "learning_rate": 5.476484540388846e-05, "loss": 0.4997, "step": 69450 }, { "epoch": 62.873303167420815, "grad_norm": 4.774138450622559, "learning_rate": 5.4707817609598484e-05, "loss": 0.6145, "step": 69475 }, { "epoch": 62.89592760180995, "grad_norm": 4.530363082885742, "learning_rate": 5.4650802471380084e-05, "loss": 0.4556, "step": 69500 }, { "epoch": 62.918552036199095, "grad_norm": 3.339048385620117, "learning_rate": 5.4593800024793155e-05, "loss": 0.5627, "step": 69525 }, { "epoch": 62.94117647058823, "grad_norm": 3.6626808643341064, "learning_rate": 5.453681030538966e-05, "loss": 0.5036, "step": 69550 }, { "epoch": 62.963800904977376, "grad_norm": 3.765770435333252, "learning_rate": 5.447983334871362e-05, "loss": 0.4979, "step": 69575 }, { "epoch": 62.98642533936652, "grad_norm": 3.9678280353546143, "learning_rate": 5.442286919030111e-05, "loss": 0.4783, "step": 69600 }, { "epoch": 63.009049773755656, "grad_norm": 2.8816945552825928, "learning_rate": 5.436591786568025e-05, "loss": 0.632, "step": 69625 }, { "epoch": 63.0316742081448, "grad_norm": 3.625143051147461, "learning_rate": 5.4308979410371085e-05, "loss": 0.4666, "step": 69650 }, { "epoch": 63.05429864253394, "grad_norm": 4.200559139251709, "learning_rate": 5.4252053859885696e-05, "loss": 0.4263, "step": 69675 }, { "epoch": 63.07692307692308, "grad_norm": 2.9522573947906494, "learning_rate": 5.4195141249728086e-05, "loss": 0.5067, "step": 69700 }, { "epoch": 63.09954751131222, "grad_norm": 2.31929874420166, "learning_rate": 5.413824161539419e-05, "loss": 0.5216, "step": 69725 }, { "epoch": 63.12217194570136, "grad_norm": 5.139983654022217, "learning_rate": 5.40836302070305e-05, "loss": 0.5242, "step": 69750 }, { "epoch": 63.1447963800905, "grad_norm": 3.778296709060669, "learning_rate": 5.40267561082467e-05, "loss": 0.5228, "step": 69775 }, { "epoch": 63.16742081447964, "grad_norm": 3.6103365421295166, "learning_rate": 5.396989509030709e-05, "loss": 0.5152, "step": 69800 }, { "epoch": 63.19004524886878, "grad_norm": 3.8815252780914307, "learning_rate": 5.391304718867541e-05, "loss": 0.5407, "step": 69825 }, { "epoch": 63.21266968325792, "grad_norm": 2.3992159366607666, "learning_rate": 5.385621243880726e-05, "loss": 0.5378, "step": 69850 }, { "epoch": 63.23529411764706, "grad_norm": 3.226144790649414, "learning_rate": 5.379939087615002e-05, "loss": 0.5152, "step": 69875 }, { "epoch": 63.2579185520362, "grad_norm": 3.268803596496582, "learning_rate": 5.3742582536142826e-05, "loss": 0.4802, "step": 69900 }, { "epoch": 63.28054298642534, "grad_norm": 3.825580358505249, "learning_rate": 5.3685787454216605e-05, "loss": 0.553, "step": 69925 }, { "epoch": 63.30316742081448, "grad_norm": 4.392209053039551, "learning_rate": 5.3629005665793976e-05, "loss": 0.4879, "step": 69950 }, { "epoch": 63.32579185520362, "grad_norm": 3.3955893516540527, "learning_rate": 5.35722372062893e-05, "loss": 0.5015, "step": 69975 }, { "epoch": 63.34841628959276, "grad_norm": 2.4158828258514404, "learning_rate": 5.35154821111086e-05, "loss": 0.431, "step": 70000 }, { "epoch": 63.3710407239819, "grad_norm": 3.4631412029266357, "learning_rate": 5.3458740415649584e-05, "loss": 0.5473, "step": 70025 }, { "epoch": 63.39366515837104, "grad_norm": 4.1777024269104, "learning_rate": 5.3402012155301574e-05, "loss": 0.6049, "step": 70050 }, { "epoch": 63.41628959276018, "grad_norm": 3.4698071479797363, "learning_rate": 5.334529736544555e-05, "loss": 0.4622, "step": 70075 }, { "epoch": 63.43891402714932, "grad_norm": 4.021111488342285, "learning_rate": 5.328859608145406e-05, "loss": 0.5109, "step": 70100 }, { "epoch": 63.46153846153846, "grad_norm": 4.438292026519775, "learning_rate": 5.323190833869125e-05, "loss": 0.5073, "step": 70125 }, { "epoch": 63.484162895927604, "grad_norm": 3.206141710281372, "learning_rate": 5.3175234172512813e-05, "loss": 0.547, "step": 70150 }, { "epoch": 63.50678733031674, "grad_norm": 4.061110973358154, "learning_rate": 5.3118573618265966e-05, "loss": 0.5482, "step": 70175 }, { "epoch": 63.529411764705884, "grad_norm": 3.305384397506714, "learning_rate": 5.3061926711289445e-05, "loss": 0.5466, "step": 70200 }, { "epoch": 63.55203619909502, "grad_norm": 3.861522912979126, "learning_rate": 5.300529348691344e-05, "loss": 0.4888, "step": 70225 }, { "epoch": 63.574660633484164, "grad_norm": 2.5031259059906006, "learning_rate": 5.2948673980459694e-05, "loss": 0.5227, "step": 70250 }, { "epoch": 63.5972850678733, "grad_norm": 5.511804580688477, "learning_rate": 5.289206822724132e-05, "loss": 0.5474, "step": 70275 }, { "epoch": 63.619909502262445, "grad_norm": 4.490988254547119, "learning_rate": 5.283547626256287e-05, "loss": 0.5946, "step": 70300 }, { "epoch": 63.64253393665158, "grad_norm": 3.6166396141052246, "learning_rate": 5.277889812172029e-05, "loss": 0.5263, "step": 70325 }, { "epoch": 63.665158371040725, "grad_norm": 3.9081203937530518, "learning_rate": 5.2722333840000926e-05, "loss": 0.4762, "step": 70350 }, { "epoch": 63.68778280542986, "grad_norm": 3.9215359687805176, "learning_rate": 5.266578345268345e-05, "loss": 0.5689, "step": 70375 }, { "epoch": 63.710407239819006, "grad_norm": 4.067574501037598, "learning_rate": 5.260924699503791e-05, "loss": 0.5259, "step": 70400 }, { "epoch": 63.73303167420814, "grad_norm": 4.492548942565918, "learning_rate": 5.2552724502325633e-05, "loss": 0.5292, "step": 70425 }, { "epoch": 63.755656108597286, "grad_norm": 2.3186278343200684, "learning_rate": 5.249621600979923e-05, "loss": 0.4537, "step": 70450 }, { "epoch": 63.77828054298642, "grad_norm": 3.9395086765289307, "learning_rate": 5.2439721552702614e-05, "loss": 0.4591, "step": 70475 }, { "epoch": 63.800904977375566, "grad_norm": 2.755739450454712, "learning_rate": 5.23832411662709e-05, "loss": 0.5483, "step": 70500 }, { "epoch": 63.8235294117647, "grad_norm": 3.0595924854278564, "learning_rate": 5.2326774885730485e-05, "loss": 0.5067, "step": 70525 }, { "epoch": 63.84615384615385, "grad_norm": 4.360012054443359, "learning_rate": 5.227032274629892e-05, "loss": 0.6306, "step": 70550 }, { "epoch": 63.86877828054298, "grad_norm": 2.9489736557006836, "learning_rate": 5.2213884783184947e-05, "loss": 0.5541, "step": 70575 }, { "epoch": 63.89140271493213, "grad_norm": 2.208599090576172, "learning_rate": 5.2157461031588484e-05, "loss": 0.5129, "step": 70600 }, { "epoch": 63.914027149321264, "grad_norm": 5.114026069641113, "learning_rate": 5.210105152670055e-05, "loss": 0.5569, "step": 70625 }, { "epoch": 63.93665158371041, "grad_norm": 5.624028205871582, "learning_rate": 5.204465630370334e-05, "loss": 0.6499, "step": 70650 }, { "epoch": 63.959276018099544, "grad_norm": 3.8451778888702393, "learning_rate": 5.198827539777006e-05, "loss": 0.4687, "step": 70675 }, { "epoch": 63.98190045248869, "grad_norm": 2.6324074268341064, "learning_rate": 5.193190884406505e-05, "loss": 0.5226, "step": 70700 }, { "epoch": 64.00452488687783, "grad_norm": 4.485694408416748, "learning_rate": 5.187555667774369e-05, "loss": 0.5219, "step": 70725 }, { "epoch": 64.02714932126698, "grad_norm": 1.449777364730835, "learning_rate": 5.181921893395235e-05, "loss": 0.4093, "step": 70750 }, { "epoch": 64.0497737556561, "grad_norm": 4.14157772064209, "learning_rate": 5.176289564782844e-05, "loss": 0.5353, "step": 70775 }, { "epoch": 64.07239819004525, "grad_norm": 3.385127305984497, "learning_rate": 5.170658685450035e-05, "loss": 0.488, "step": 70800 }, { "epoch": 64.09502262443439, "grad_norm": 3.332422971725464, "learning_rate": 5.165029258908742e-05, "loss": 0.4705, "step": 70825 }, { "epoch": 64.11764705882354, "grad_norm": 4.453216075897217, "learning_rate": 5.159401288669993e-05, "loss": 0.5551, "step": 70850 }, { "epoch": 64.14027149321267, "grad_norm": 3.221008062362671, "learning_rate": 5.153774778243906e-05, "loss": 0.5066, "step": 70875 }, { "epoch": 64.16289592760181, "grad_norm": 2.463557481765747, "learning_rate": 5.148149731139694e-05, "loss": 0.4539, "step": 70900 }, { "epoch": 64.18552036199095, "grad_norm": 2.7777414321899414, "learning_rate": 5.142526150865652e-05, "loss": 0.489, "step": 70925 }, { "epoch": 64.2081447963801, "grad_norm": 3.453420877456665, "learning_rate": 5.136904040929162e-05, "loss": 0.476, "step": 70950 }, { "epoch": 64.23076923076923, "grad_norm": 3.4403789043426514, "learning_rate": 5.131283404836688e-05, "loss": 0.565, "step": 70975 }, { "epoch": 64.25339366515837, "grad_norm": 0.8236686587333679, "learning_rate": 5.1256642460937774e-05, "loss": 0.471, "step": 71000 }, { "epoch": 64.27601809954751, "grad_norm": 2.603306531906128, "learning_rate": 5.120046568205054e-05, "loss": 0.4589, "step": 71025 }, { "epoch": 64.29864253393666, "grad_norm": 4.371375560760498, "learning_rate": 5.1144303746742185e-05, "loss": 0.4263, "step": 71050 }, { "epoch": 64.32126696832579, "grad_norm": 4.094240188598633, "learning_rate": 5.1088156690040455e-05, "loss": 0.499, "step": 71075 }, { "epoch": 64.34389140271493, "grad_norm": 3.8048579692840576, "learning_rate": 5.1032024546963815e-05, "loss": 0.453, "step": 71100 }, { "epoch": 64.36651583710407, "grad_norm": 3.6536977291107178, "learning_rate": 5.0975907352521445e-05, "loss": 0.5122, "step": 71125 }, { "epoch": 64.38914027149322, "grad_norm": 3.905212640762329, "learning_rate": 5.091980514171318e-05, "loss": 0.5703, "step": 71150 }, { "epoch": 64.41176470588235, "grad_norm": 3.993847370147705, "learning_rate": 5.086371794952952e-05, "loss": 0.509, "step": 71175 }, { "epoch": 64.43438914027149, "grad_norm": 3.4553163051605225, "learning_rate": 5.08076458109516e-05, "loss": 0.5573, "step": 71200 }, { "epoch": 64.45701357466064, "grad_norm": 3.7322750091552734, "learning_rate": 5.075158876095117e-05, "loss": 0.5615, "step": 71225 }, { "epoch": 64.47963800904978, "grad_norm": 4.832956314086914, "learning_rate": 5.0695546834490546e-05, "loss": 0.5044, "step": 71250 }, { "epoch": 64.50226244343891, "grad_norm": 4.214544296264648, "learning_rate": 5.063952006652264e-05, "loss": 0.5492, "step": 71275 }, { "epoch": 64.52488687782805, "grad_norm": 4.111082553863525, "learning_rate": 5.0583508491990884e-05, "loss": 0.4865, "step": 71300 }, { "epoch": 64.5475113122172, "grad_norm": 3.1240320205688477, "learning_rate": 5.0527512145829285e-05, "loss": 0.4663, "step": 71325 }, { "epoch": 64.57013574660634, "grad_norm": 3.761641025543213, "learning_rate": 5.047153106296228e-05, "loss": 0.4849, "step": 71350 }, { "epoch": 64.59276018099547, "grad_norm": 3.206021785736084, "learning_rate": 5.0415565278304835e-05, "loss": 0.4669, "step": 71375 }, { "epoch": 64.61538461538461, "grad_norm": 2.882524251937866, "learning_rate": 5.035961482676237e-05, "loss": 0.5029, "step": 71400 }, { "epoch": 64.63800904977376, "grad_norm": 2.521789073944092, "learning_rate": 5.030367974323071e-05, "loss": 0.5085, "step": 71425 }, { "epoch": 64.6606334841629, "grad_norm": 5.065183162689209, "learning_rate": 5.024776006259615e-05, "loss": 0.5224, "step": 71450 }, { "epoch": 64.68325791855203, "grad_norm": 4.135383605957031, "learning_rate": 5.019185581973532e-05, "loss": 0.564, "step": 71475 }, { "epoch": 64.70588235294117, "grad_norm": 3.4252192974090576, "learning_rate": 5.0135967049515255e-05, "loss": 0.4829, "step": 71500 }, { "epoch": 64.72850678733032, "grad_norm": 4.423304080963135, "learning_rate": 5.008009378679332e-05, "loss": 0.4485, "step": 71525 }, { "epoch": 64.75113122171946, "grad_norm": 3.599609136581421, "learning_rate": 5.0024236066417256e-05, "loss": 0.5045, "step": 71550 }, { "epoch": 64.77375565610859, "grad_norm": 3.402135133743286, "learning_rate": 4.996839392322507e-05, "loss": 0.6037, "step": 71575 }, { "epoch": 64.79638009049773, "grad_norm": 4.259146690368652, "learning_rate": 4.991256739204502e-05, "loss": 0.56, "step": 71600 }, { "epoch": 64.81900452488688, "grad_norm": 3.316690444946289, "learning_rate": 4.985675650769569e-05, "loss": 0.4922, "step": 71625 }, { "epoch": 64.84162895927602, "grad_norm": 3.8324856758117676, "learning_rate": 4.98009613049859e-05, "loss": 0.5149, "step": 71650 }, { "epoch": 64.86425339366515, "grad_norm": 4.328530788421631, "learning_rate": 4.9745181818714644e-05, "loss": 0.5364, "step": 71675 }, { "epoch": 64.8868778280543, "grad_norm": 1.284960150718689, "learning_rate": 4.968941808367116e-05, "loss": 0.4889, "step": 71700 }, { "epoch": 64.90950226244344, "grad_norm": 2.807213068008423, "learning_rate": 4.9633670134634827e-05, "loss": 0.4275, "step": 71725 }, { "epoch": 64.93212669683258, "grad_norm": 2.836705207824707, "learning_rate": 4.9577938006375206e-05, "loss": 0.4192, "step": 71750 }, { "epoch": 64.95475113122171, "grad_norm": 3.825068235397339, "learning_rate": 4.952222173365197e-05, "loss": 0.4179, "step": 71775 }, { "epoch": 64.97737556561086, "grad_norm": 3.5482659339904785, "learning_rate": 4.94687490609831e-05, "loss": 0.4845, "step": 71800 }, { "epoch": 65.0, "grad_norm": 3.292893409729004, "learning_rate": 4.941306396590419e-05, "loss": 0.4963, "step": 71825 }, { "epoch": 65.02262443438914, "grad_norm": 3.8134896755218506, "learning_rate": 4.935739482919228e-05, "loss": 0.4938, "step": 71850 }, { "epoch": 65.04524886877829, "grad_norm": 5.965142250061035, "learning_rate": 4.930174168556778e-05, "loss": 0.5126, "step": 71875 }, { "epoch": 65.06787330316742, "grad_norm": 3.516213893890381, "learning_rate": 4.924610456974109e-05, "loss": 0.465, "step": 71900 }, { "epoch": 65.09049773755656, "grad_norm": 3.8203318119049072, "learning_rate": 4.919048351641266e-05, "loss": 0.3844, "step": 71925 }, { "epoch": 65.1131221719457, "grad_norm": 4.222325325012207, "learning_rate": 4.913487856027287e-05, "loss": 0.5269, "step": 71950 }, { "epoch": 65.13574660633485, "grad_norm": 3.3444793224334717, "learning_rate": 4.907928973600209e-05, "loss": 0.5801, "step": 71975 }, { "epoch": 65.15837104072398, "grad_norm": 3.294954299926758, "learning_rate": 4.902371707827064e-05, "loss": 0.4357, "step": 72000 }, { "epoch": 65.18099547511312, "grad_norm": 3.5857014656066895, "learning_rate": 4.8968160621738725e-05, "loss": 0.5202, "step": 72025 }, { "epoch": 65.20361990950227, "grad_norm": 3.8422534465789795, "learning_rate": 4.891262040105648e-05, "loss": 0.5648, "step": 72050 }, { "epoch": 65.22624434389141, "grad_norm": 4.33748197555542, "learning_rate": 4.885709645086387e-05, "loss": 0.4577, "step": 72075 }, { "epoch": 65.24886877828054, "grad_norm": 4.151119232177734, "learning_rate": 4.880158880579076e-05, "loss": 0.4767, "step": 72100 }, { "epoch": 65.27149321266968, "grad_norm": 3.3274641036987305, "learning_rate": 4.8746097500456816e-05, "loss": 0.4201, "step": 72125 }, { "epoch": 65.29411764705883, "grad_norm": 2.085918426513672, "learning_rate": 4.869062256947151e-05, "loss": 0.4389, "step": 72150 }, { "epoch": 65.31674208144797, "grad_norm": 4.64586067199707, "learning_rate": 4.863516404743413e-05, "loss": 0.4356, "step": 72175 }, { "epoch": 65.3393665158371, "grad_norm": 4.617623329162598, "learning_rate": 4.85797219689337e-05, "loss": 0.552, "step": 72200 }, { "epoch": 65.36199095022624, "grad_norm": 4.925380229949951, "learning_rate": 4.852429636854901e-05, "loss": 0.4565, "step": 72225 }, { "epoch": 65.38461538461539, "grad_norm": 3.209160566329956, "learning_rate": 4.846888728084855e-05, "loss": 0.4161, "step": 72250 }, { "epoch": 65.40723981900453, "grad_norm": 2.874236822128296, "learning_rate": 4.8413494740390534e-05, "loss": 0.4984, "step": 72275 }, { "epoch": 65.42986425339366, "grad_norm": 2.963542938232422, "learning_rate": 4.835811878172284e-05, "loss": 0.4449, "step": 72300 }, { "epoch": 65.4524886877828, "grad_norm": 3.9334418773651123, "learning_rate": 4.830275943938298e-05, "loss": 0.4491, "step": 72325 }, { "epoch": 65.47511312217195, "grad_norm": 3.364246368408203, "learning_rate": 4.82474167478982e-05, "loss": 0.4536, "step": 72350 }, { "epoch": 65.49773755656109, "grad_norm": 2.7113635540008545, "learning_rate": 4.8192090741785256e-05, "loss": 0.5391, "step": 72375 }, { "epoch": 65.52036199095022, "grad_norm": 4.298723220825195, "learning_rate": 4.813678145555054e-05, "loss": 0.4959, "step": 72400 }, { "epoch": 65.54298642533936, "grad_norm": 3.5630440711975098, "learning_rate": 4.808148892369e-05, "loss": 0.4216, "step": 72425 }, { "epoch": 65.56561085972851, "grad_norm": 3.007699728012085, "learning_rate": 4.802621318068916e-05, "loss": 0.4718, "step": 72450 }, { "epoch": 65.58823529411765, "grad_norm": 3.4051260948181152, "learning_rate": 4.797095426102305e-05, "loss": 0.4499, "step": 72475 }, { "epoch": 65.61085972850678, "grad_norm": 3.9288182258605957, "learning_rate": 4.7915712199156215e-05, "loss": 0.5228, "step": 72500 }, { "epoch": 65.63348416289593, "grad_norm": 3.282602548599243, "learning_rate": 4.7860487029542684e-05, "loss": 0.4455, "step": 72525 }, { "epoch": 65.65610859728507, "grad_norm": 3.789949893951416, "learning_rate": 4.780527878662596e-05, "loss": 0.4649, "step": 72550 }, { "epoch": 65.67873303167421, "grad_norm": 3.194241523742676, "learning_rate": 4.7750087504838975e-05, "loss": 0.4583, "step": 72575 }, { "epoch": 65.70135746606334, "grad_norm": 5.476235866546631, "learning_rate": 4.76971198633072e-05, "loss": 0.4904, "step": 72600 }, { "epoch": 65.72398190045249, "grad_norm": 3.226522207260132, "learning_rate": 4.7641961925177045e-05, "loss": 0.45, "step": 72625 }, { "epoch": 65.74660633484163, "grad_norm": 2.643019437789917, "learning_rate": 4.758682105003606e-05, "loss": 0.5505, "step": 72650 }, { "epoch": 65.76923076923077, "grad_norm": 3.408048152923584, "learning_rate": 4.753169727227516e-05, "loss": 0.4742, "step": 72675 }, { "epoch": 65.7918552036199, "grad_norm": 4.125067234039307, "learning_rate": 4.747659062627459e-05, "loss": 0.4487, "step": 72700 }, { "epoch": 65.81447963800905, "grad_norm": 3.495706558227539, "learning_rate": 4.742150114640394e-05, "loss": 0.5123, "step": 72725 }, { "epoch": 65.83710407239819, "grad_norm": 2.2247610092163086, "learning_rate": 4.7366428867022075e-05, "loss": 0.4085, "step": 72750 }, { "epoch": 65.85972850678733, "grad_norm": 3.591494560241699, "learning_rate": 4.7311373822477125e-05, "loss": 0.469, "step": 72775 }, { "epoch": 65.88235294117646, "grad_norm": 3.502779722213745, "learning_rate": 4.7256336047106486e-05, "loss": 0.522, "step": 72800 }, { "epoch": 65.90497737556561, "grad_norm": 4.009083271026611, "learning_rate": 4.7201315575236784e-05, "loss": 0.5237, "step": 72825 }, { "epoch": 65.92760180995475, "grad_norm": 2.376384973526001, "learning_rate": 4.714631244118384e-05, "loss": 0.5136, "step": 72850 }, { "epoch": 65.9502262443439, "grad_norm": 4.574949264526367, "learning_rate": 4.709132667925266e-05, "loss": 0.3673, "step": 72875 }, { "epoch": 65.97285067873302, "grad_norm": 2.778244972229004, "learning_rate": 4.703635832373743e-05, "loss": 0.4782, "step": 72900 }, { "epoch": 65.99547511312217, "grad_norm": 3.3691933155059814, "learning_rate": 4.698140740892149e-05, "loss": 0.3929, "step": 72925 }, { "epoch": 66.01809954751131, "grad_norm": 3.810154676437378, "learning_rate": 4.692647396907726e-05, "loss": 0.455, "step": 72950 }, { "epoch": 66.04072398190046, "grad_norm": 3.4145562648773193, "learning_rate": 4.687155803846629e-05, "loss": 0.3828, "step": 72975 }, { "epoch": 66.0633484162896, "grad_norm": 3.596548557281494, "learning_rate": 4.681665965133922e-05, "loss": 0.4272, "step": 73000 }, { "epoch": 66.08597285067873, "grad_norm": 3.4454643726348877, "learning_rate": 4.676177884193572e-05, "loss": 0.4169, "step": 73025 }, { "epoch": 66.10859728506787, "grad_norm": 2.733118772506714, "learning_rate": 4.670691564448452e-05, "loss": 0.4115, "step": 73050 }, { "epoch": 66.13122171945702, "grad_norm": 2.9450082778930664, "learning_rate": 4.6652070093203356e-05, "loss": 0.3903, "step": 73075 }, { "epoch": 66.15384615384616, "grad_norm": 3.848235607147217, "learning_rate": 4.6597242222298955e-05, "loss": 0.4043, "step": 73100 }, { "epoch": 66.17647058823529, "grad_norm": 2.8531174659729004, "learning_rate": 4.654243206596703e-05, "loss": 0.4569, "step": 73125 }, { "epoch": 66.19909502262443, "grad_norm": 2.625898599624634, "learning_rate": 4.6487639658392224e-05, "loss": 0.4542, "step": 73150 }, { "epoch": 66.22171945701358, "grad_norm": 3.0660314559936523, "learning_rate": 4.643286503374812e-05, "loss": 0.3989, "step": 73175 }, { "epoch": 66.24434389140272, "grad_norm": 3.1097512245178223, "learning_rate": 4.6378108226197205e-05, "loss": 0.5148, "step": 73200 }, { "epoch": 66.26696832579185, "grad_norm": 4.034802436828613, "learning_rate": 4.632336926989091e-05, "loss": 0.5242, "step": 73225 }, { "epoch": 66.289592760181, "grad_norm": 3.696357011795044, "learning_rate": 4.626864819896943e-05, "loss": 0.4765, "step": 73250 }, { "epoch": 66.31221719457014, "grad_norm": 2.8210299015045166, "learning_rate": 4.621394504756188e-05, "loss": 0.4046, "step": 73275 }, { "epoch": 66.33484162895928, "grad_norm": 3.4342691898345947, "learning_rate": 4.615925984978619e-05, "loss": 0.4902, "step": 73300 }, { "epoch": 66.35746606334841, "grad_norm": 3.3892955780029297, "learning_rate": 4.610459263974905e-05, "loss": 0.3694, "step": 73325 }, { "epoch": 66.38009049773756, "grad_norm": 3.8561410903930664, "learning_rate": 4.6049943451545985e-05, "loss": 0.4382, "step": 73350 }, { "epoch": 66.4027149321267, "grad_norm": 3.929945230484009, "learning_rate": 4.599531231926125e-05, "loss": 0.3728, "step": 73375 }, { "epoch": 66.42533936651584, "grad_norm": 4.837695121765137, "learning_rate": 4.594069927696783e-05, "loss": 0.4722, "step": 73400 }, { "epoch": 66.44796380090497, "grad_norm": 2.2682697772979736, "learning_rate": 4.588610435872746e-05, "loss": 0.406, "step": 73425 }, { "epoch": 66.47058823529412, "grad_norm": 4.606935024261475, "learning_rate": 4.5831527598590544e-05, "loss": 0.3981, "step": 73450 }, { "epoch": 66.49321266968326, "grad_norm": 3.6162781715393066, "learning_rate": 4.577696903059617e-05, "loss": 0.4641, "step": 73475 }, { "epoch": 66.5158371040724, "grad_norm": 2.6316192150115967, "learning_rate": 4.572242868877209e-05, "loss": 0.4188, "step": 73500 }, { "epoch": 66.53846153846153, "grad_norm": 3.1077880859375, "learning_rate": 4.566790660713468e-05, "loss": 0.4499, "step": 73525 }, { "epoch": 66.56108597285068, "grad_norm": 3.875701904296875, "learning_rate": 4.561340281968889e-05, "loss": 0.4369, "step": 73550 }, { "epoch": 66.58371040723982, "grad_norm": 3.852557420730591, "learning_rate": 4.555891736042833e-05, "loss": 0.4332, "step": 73575 }, { "epoch": 66.60633484162896, "grad_norm": 4.01460075378418, "learning_rate": 4.550445026333515e-05, "loss": 0.4226, "step": 73600 }, { "epoch": 66.6289592760181, "grad_norm": 1.935514211654663, "learning_rate": 4.545000156238002e-05, "loss": 0.4093, "step": 73625 }, { "epoch": 66.65158371040724, "grad_norm": 3.7266364097595215, "learning_rate": 4.539557129152217e-05, "loss": 0.3904, "step": 73650 }, { "epoch": 66.67420814479638, "grad_norm": 4.496799468994141, "learning_rate": 4.534115948470931e-05, "loss": 0.5374, "step": 73675 }, { "epoch": 66.69683257918552, "grad_norm": 2.9410617351531982, "learning_rate": 4.528676617587767e-05, "loss": 0.3951, "step": 73700 }, { "epoch": 66.71945701357465, "grad_norm": 3.5392343997955322, "learning_rate": 4.523239139895191e-05, "loss": 0.5283, "step": 73725 }, { "epoch": 66.7420814479638, "grad_norm": 3.0796167850494385, "learning_rate": 4.517803518784513e-05, "loss": 0.4688, "step": 73750 }, { "epoch": 66.76470588235294, "grad_norm": 3.29840087890625, "learning_rate": 4.512369757645889e-05, "loss": 0.5688, "step": 73775 }, { "epoch": 66.78733031674209, "grad_norm": 3.5389914512634277, "learning_rate": 4.506937859868309e-05, "loss": 0.4534, "step": 73800 }, { "epoch": 66.80995475113122, "grad_norm": 2.3450798988342285, "learning_rate": 4.501507828839607e-05, "loss": 0.497, "step": 73825 }, { "epoch": 66.83257918552036, "grad_norm": 2.9214231967926025, "learning_rate": 4.496079667946449e-05, "loss": 0.4667, "step": 73850 }, { "epoch": 66.8552036199095, "grad_norm": 4.5358476638793945, "learning_rate": 4.490653380574336e-05, "loss": 0.4841, "step": 73875 }, { "epoch": 66.87782805429865, "grad_norm": 3.7113118171691895, "learning_rate": 4.485228970107598e-05, "loss": 0.5856, "step": 73900 }, { "epoch": 66.90045248868778, "grad_norm": 3.699545383453369, "learning_rate": 4.4798064399293976e-05, "loss": 0.4488, "step": 73925 }, { "epoch": 66.92307692307692, "grad_norm": 4.203170299530029, "learning_rate": 4.474385793421724e-05, "loss": 0.5001, "step": 73950 }, { "epoch": 66.94570135746606, "grad_norm": 4.271376609802246, "learning_rate": 4.468967033965391e-05, "loss": 0.4255, "step": 73975 }, { "epoch": 66.96832579185521, "grad_norm": 2.9453163146972656, "learning_rate": 4.463550164940034e-05, "loss": 0.4729, "step": 74000 }, { "epoch": 66.99095022624434, "grad_norm": 3.467568874359131, "learning_rate": 4.4581351897241116e-05, "loss": 0.4127, "step": 74025 }, { "epoch": 67.01357466063348, "grad_norm": 2.1562981605529785, "learning_rate": 4.4527221116949e-05, "loss": 0.4176, "step": 74050 }, { "epoch": 67.03619909502262, "grad_norm": 3.6084489822387695, "learning_rate": 4.447310934228494e-05, "loss": 0.4044, "step": 74075 }, { "epoch": 67.05882352941177, "grad_norm": 2.777543306350708, "learning_rate": 4.441901660699801e-05, "loss": 0.4769, "step": 74100 }, { "epoch": 67.08144796380091, "grad_norm": 2.9665920734405518, "learning_rate": 4.4364942944825416e-05, "loss": 0.3794, "step": 74125 }, { "epoch": 67.10407239819004, "grad_norm": 3.2442235946655273, "learning_rate": 4.431088838949245e-05, "loss": 0.445, "step": 74150 }, { "epoch": 67.12669683257919, "grad_norm": 5.0728960037231445, "learning_rate": 4.4256852974712534e-05, "loss": 0.4795, "step": 74175 }, { "epoch": 67.14932126696833, "grad_norm": 3.2921695709228516, "learning_rate": 4.4202836734187106e-05, "loss": 0.3874, "step": 74200 }, { "epoch": 67.17194570135747, "grad_norm": 1.5285372734069824, "learning_rate": 4.4148839701605664e-05, "loss": 0.4343, "step": 74225 }, { "epoch": 67.1945701357466, "grad_norm": 3.4112703800201416, "learning_rate": 4.409486191064573e-05, "loss": 0.4788, "step": 74250 }, { "epoch": 67.21719457013575, "grad_norm": 3.9787042140960693, "learning_rate": 4.4040903394972793e-05, "loss": 0.4578, "step": 74275 }, { "epoch": 67.23981900452489, "grad_norm": 3.6014389991760254, "learning_rate": 4.398696418824039e-05, "loss": 0.3759, "step": 74300 }, { "epoch": 67.26244343891403, "grad_norm": 3.469893217086792, "learning_rate": 4.393304432408996e-05, "loss": 0.409, "step": 74325 }, { "epoch": 67.28506787330316, "grad_norm": 3.641577959060669, "learning_rate": 4.3879143836150873e-05, "loss": 0.3616, "step": 74350 }, { "epoch": 67.3076923076923, "grad_norm": 3.0347094535827637, "learning_rate": 4.382526275804044e-05, "loss": 0.4235, "step": 74375 }, { "epoch": 67.33031674208145, "grad_norm": 2.8964600563049316, "learning_rate": 4.3771401123363886e-05, "loss": 0.3915, "step": 74400 }, { "epoch": 67.3529411764706, "grad_norm": 3.0891056060791016, "learning_rate": 4.371755896571421e-05, "loss": 0.4741, "step": 74425 }, { "epoch": 67.37556561085972, "grad_norm": 3.889401912689209, "learning_rate": 4.366373631867242e-05, "loss": 0.3821, "step": 74450 }, { "epoch": 67.39819004524887, "grad_norm": 2.750678300857544, "learning_rate": 4.36099332158072e-05, "loss": 0.3626, "step": 74475 }, { "epoch": 67.42081447963801, "grad_norm": 2.762613296508789, "learning_rate": 4.355614969067516e-05, "loss": 0.3529, "step": 74500 }, { "epoch": 67.44343891402715, "grad_norm": 3.64766263961792, "learning_rate": 4.35023857768206e-05, "loss": 0.4109, "step": 74525 }, { "epoch": 67.46606334841628, "grad_norm": 3.7489163875579834, "learning_rate": 4.344864150777573e-05, "loss": 0.4445, "step": 74550 }, { "epoch": 67.48868778280543, "grad_norm": 3.121638536453247, "learning_rate": 4.339491691706033e-05, "loss": 0.4574, "step": 74575 }, { "epoch": 67.51131221719457, "grad_norm": 3.443589210510254, "learning_rate": 4.3341212038182054e-05, "loss": 0.4053, "step": 74600 }, { "epoch": 67.53393665158372, "grad_norm": 6.428598880767822, "learning_rate": 4.328752690463617e-05, "loss": 0.3812, "step": 74625 }, { "epoch": 67.55656108597285, "grad_norm": 1.6356045007705688, "learning_rate": 4.32338615499057e-05, "loss": 0.3695, "step": 74650 }, { "epoch": 67.57918552036199, "grad_norm": 2.584383726119995, "learning_rate": 4.3180216007461257e-05, "loss": 0.4634, "step": 74675 }, { "epoch": 67.60180995475113, "grad_norm": 3.4905943870544434, "learning_rate": 4.312659031076118e-05, "loss": 0.4604, "step": 74700 }, { "epoch": 67.62443438914028, "grad_norm": 3.9946959018707275, "learning_rate": 4.3072984493251336e-05, "loss": 0.4917, "step": 74725 }, { "epoch": 67.6470588235294, "grad_norm": 3.935030221939087, "learning_rate": 4.301939858836529e-05, "loss": 0.3362, "step": 74750 }, { "epoch": 67.66968325791855, "grad_norm": 3.106130838394165, "learning_rate": 4.296583262952411e-05, "loss": 0.3965, "step": 74775 }, { "epoch": 67.6923076923077, "grad_norm": 2.663750410079956, "learning_rate": 4.291228665013646e-05, "loss": 0.3978, "step": 74800 }, { "epoch": 67.71493212669684, "grad_norm": 3.5414390563964844, "learning_rate": 4.2858760683598544e-05, "loss": 0.4735, "step": 74825 }, { "epoch": 67.73755656108597, "grad_norm": 2.744206428527832, "learning_rate": 4.2805254763294114e-05, "loss": 0.3553, "step": 74850 }, { "epoch": 67.76018099547511, "grad_norm": 3.3571581840515137, "learning_rate": 4.275176892259432e-05, "loss": 0.3899, "step": 74875 }, { "epoch": 67.78280542986425, "grad_norm": 4.216256618499756, "learning_rate": 4.26983031948579e-05, "loss": 0.5193, "step": 74900 }, { "epoch": 67.8054298642534, "grad_norm": 3.1385810375213623, "learning_rate": 4.264485761343097e-05, "loss": 0.4591, "step": 74925 }, { "epoch": 67.82805429864253, "grad_norm": 3.5836966037750244, "learning_rate": 4.259143221164715e-05, "loss": 0.4142, "step": 74950 }, { "epoch": 67.85067873303167, "grad_norm": 3.812453508377075, "learning_rate": 4.253802702282737e-05, "loss": 0.4652, "step": 74975 }, { "epoch": 67.87330316742081, "grad_norm": 2.7617790699005127, "learning_rate": 4.248464208028009e-05, "loss": 0.4776, "step": 75000 }, { "epoch": 67.89592760180996, "grad_norm": 2.787205457687378, "learning_rate": 4.2431277417301005e-05, "loss": 0.4688, "step": 75025 }, { "epoch": 67.91855203619909, "grad_norm": 4.954131126403809, "learning_rate": 4.23779330671733e-05, "loss": 0.5092, "step": 75050 }, { "epoch": 67.94117647058823, "grad_norm": 3.119158983230591, "learning_rate": 4.232460906316734e-05, "loss": 0.4368, "step": 75075 }, { "epoch": 67.96380090497738, "grad_norm": 3.8148066997528076, "learning_rate": 4.227130543854095e-05, "loss": 0.4735, "step": 75100 }, { "epoch": 67.98642533936652, "grad_norm": 2.6367037296295166, "learning_rate": 4.221802222653911e-05, "loss": 0.4561, "step": 75125 }, { "epoch": 68.00904977375566, "grad_norm": 3.58677339553833, "learning_rate": 4.2164759460394195e-05, "loss": 0.4825, "step": 75150 }, { "epoch": 68.03167420814479, "grad_norm": 4.2442240715026855, "learning_rate": 4.211151717332572e-05, "loss": 0.3115, "step": 75175 }, { "epoch": 68.05429864253394, "grad_norm": 3.5822954177856445, "learning_rate": 4.205829539854051e-05, "loss": 0.4369, "step": 75200 }, { "epoch": 68.07692307692308, "grad_norm": 2.1157548427581787, "learning_rate": 4.200509416923249e-05, "loss": 0.2988, "step": 75225 }, { "epoch": 68.09954751131222, "grad_norm": 3.1891684532165527, "learning_rate": 4.195191351858294e-05, "loss": 0.3535, "step": 75250 }, { "epoch": 68.12217194570135, "grad_norm": 3.4645771980285645, "learning_rate": 4.1898753479760116e-05, "loss": 0.3996, "step": 75275 }, { "epoch": 68.1447963800905, "grad_norm": 1.7759895324707031, "learning_rate": 4.184561408591955e-05, "loss": 0.4161, "step": 75300 }, { "epoch": 68.16742081447964, "grad_norm": 3.8754966259002686, "learning_rate": 4.179249537020388e-05, "loss": 0.3405, "step": 75325 }, { "epoch": 68.19004524886878, "grad_norm": 3.558027744293213, "learning_rate": 4.173939736574275e-05, "loss": 0.3907, "step": 75350 }, { "epoch": 68.21266968325791, "grad_norm": 4.56781530380249, "learning_rate": 4.168632010565306e-05, "loss": 0.4352, "step": 75375 }, { "epoch": 68.23529411764706, "grad_norm": 2.9799764156341553, "learning_rate": 4.1633263623038566e-05, "loss": 0.3617, "step": 75400 }, { "epoch": 68.2579185520362, "grad_norm": 3.8714258670806885, "learning_rate": 4.158022795099026e-05, "loss": 0.4573, "step": 75425 }, { "epoch": 68.28054298642535, "grad_norm": 2.85005259513855, "learning_rate": 4.152721312258601e-05, "loss": 0.3795, "step": 75450 }, { "epoch": 68.30316742081448, "grad_norm": 3.3433988094329834, "learning_rate": 4.147421917089077e-05, "loss": 0.3775, "step": 75475 }, { "epoch": 68.32579185520362, "grad_norm": 3.080737829208374, "learning_rate": 4.142124612895642e-05, "loss": 0.4629, "step": 75500 }, { "epoch": 68.34841628959276, "grad_norm": 2.6309893131256104, "learning_rate": 4.136829402982185e-05, "loss": 0.3704, "step": 75525 }, { "epoch": 68.3710407239819, "grad_norm": 2.5583741664886475, "learning_rate": 4.131536290651282e-05, "loss": 0.3958, "step": 75550 }, { "epoch": 68.39366515837104, "grad_norm": 3.351884365081787, "learning_rate": 4.1262452792042086e-05, "loss": 0.4445, "step": 75575 }, { "epoch": 68.41628959276018, "grad_norm": 3.149663209915161, "learning_rate": 4.120956371940923e-05, "loss": 0.4257, "step": 75600 }, { "epoch": 68.43891402714932, "grad_norm": 2.4363698959350586, "learning_rate": 4.115669572160079e-05, "loss": 0.3825, "step": 75625 }, { "epoch": 68.46153846153847, "grad_norm": 3.3466544151306152, "learning_rate": 4.1103848831590055e-05, "loss": 0.3214, "step": 75650 }, { "epoch": 68.4841628959276, "grad_norm": 3.8922653198242188, "learning_rate": 4.1051023082337254e-05, "loss": 0.3655, "step": 75675 }, { "epoch": 68.50678733031674, "grad_norm": 2.990739107131958, "learning_rate": 4.0998218506789346e-05, "loss": 0.4013, "step": 75700 }, { "epoch": 68.52941176470588, "grad_norm": 2.7267568111419678, "learning_rate": 4.0945435137880164e-05, "loss": 0.3437, "step": 75725 }, { "epoch": 68.55203619909503, "grad_norm": 2.7915239334106445, "learning_rate": 4.0892673008530206e-05, "loss": 0.377, "step": 75750 }, { "epoch": 68.57466063348416, "grad_norm": 2.374497413635254, "learning_rate": 4.083993215164687e-05, "loss": 0.3747, "step": 75775 }, { "epoch": 68.5972850678733, "grad_norm": 2.1000912189483643, "learning_rate": 4.0787212600124145e-05, "loss": 0.4749, "step": 75800 }, { "epoch": 68.61990950226244, "grad_norm": 3.066020965576172, "learning_rate": 4.073451438684284e-05, "loss": 0.3983, "step": 75825 }, { "epoch": 68.64253393665159, "grad_norm": 1.8375914096832275, "learning_rate": 4.0681837544670356e-05, "loss": 0.4454, "step": 75850 }, { "epoch": 68.66515837104072, "grad_norm": 3.709446430206299, "learning_rate": 4.062918210646087e-05, "loss": 0.5053, "step": 75875 }, { "epoch": 68.68778280542986, "grad_norm": 3.295236587524414, "learning_rate": 4.0576548105055096e-05, "loss": 0.4938, "step": 75900 }, { "epoch": 68.710407239819, "grad_norm": 3.026183843612671, "learning_rate": 4.0523935573280514e-05, "loss": 0.3928, "step": 75925 }, { "epoch": 68.73303167420815, "grad_norm": 3.2350196838378906, "learning_rate": 4.047134454395107e-05, "loss": 0.4085, "step": 75950 }, { "epoch": 68.75565610859728, "grad_norm": 2.931197166442871, "learning_rate": 4.041877504986743e-05, "loss": 0.4082, "step": 75975 }, { "epoch": 68.77828054298642, "grad_norm": 3.3384668827056885, "learning_rate": 4.036622712381669e-05, "loss": 0.3353, "step": 76000 }, { "epoch": 68.80090497737557, "grad_norm": 2.9945406913757324, "learning_rate": 4.031370079857266e-05, "loss": 0.4683, "step": 76025 }, { "epoch": 68.82352941176471, "grad_norm": 2.8785560131073, "learning_rate": 4.0261196106895506e-05, "loss": 0.4455, "step": 76050 }, { "epoch": 68.84615384615384, "grad_norm": 2.258955717086792, "learning_rate": 4.020871308153204e-05, "loss": 0.4795, "step": 76075 }, { "epoch": 68.86877828054298, "grad_norm": 3.7678897380828857, "learning_rate": 4.015625175521547e-05, "loss": 0.544, "step": 76100 }, { "epoch": 68.89140271493213, "grad_norm": 3.1933746337890625, "learning_rate": 4.0103812160665535e-05, "loss": 0.4258, "step": 76125 }, { "epoch": 68.91402714932127, "grad_norm": 3.049928903579712, "learning_rate": 4.005139433058835e-05, "loss": 0.4686, "step": 76150 }, { "epoch": 68.9366515837104, "grad_norm": 2.922126054763794, "learning_rate": 3.999899829767655e-05, "loss": 0.43, "step": 76175 }, { "epoch": 68.95927601809954, "grad_norm": 3.004406213760376, "learning_rate": 3.994662409460906e-05, "loss": 0.3562, "step": 76200 }, { "epoch": 68.98190045248869, "grad_norm": 3.188622236251831, "learning_rate": 3.989427175405131e-05, "loss": 0.4254, "step": 76225 }, { "epoch": 69.00452488687783, "grad_norm": 3.9997904300689697, "learning_rate": 3.9841941308654985e-05, "loss": 0.3802, "step": 76250 }, { "epoch": 69.02714932126698, "grad_norm": 3.5980288982391357, "learning_rate": 3.978963279105821e-05, "loss": 0.386, "step": 76275 }, { "epoch": 69.0497737556561, "grad_norm": 3.104586601257324, "learning_rate": 3.973734623388533e-05, "loss": 0.3537, "step": 76300 }, { "epoch": 69.07239819004525, "grad_norm": 2.5005218982696533, "learning_rate": 3.968508166974712e-05, "loss": 0.3382, "step": 76325 }, { "epoch": 69.09502262443439, "grad_norm": 2.546375036239624, "learning_rate": 3.963283913124051e-05, "loss": 0.3581, "step": 76350 }, { "epoch": 69.11764705882354, "grad_norm": 4.206288814544678, "learning_rate": 3.958061865094877e-05, "loss": 0.3366, "step": 76375 }, { "epoch": 69.14027149321267, "grad_norm": 2.7173237800598145, "learning_rate": 3.952842026144144e-05, "loss": 0.3442, "step": 76400 }, { "epoch": 69.16289592760181, "grad_norm": 3.7858710289001465, "learning_rate": 3.947624399527418e-05, "loss": 0.3715, "step": 76425 }, { "epoch": 69.18552036199095, "grad_norm": 3.3816254138946533, "learning_rate": 3.9424089884988945e-05, "loss": 0.417, "step": 76450 }, { "epoch": 69.2081447963801, "grad_norm": 3.8528568744659424, "learning_rate": 3.937195796311381e-05, "loss": 0.3602, "step": 76475 }, { "epoch": 69.23076923076923, "grad_norm": 2.9668381214141846, "learning_rate": 3.931984826216307e-05, "loss": 0.4146, "step": 76500 }, { "epoch": 69.25339366515837, "grad_norm": 3.6018624305725098, "learning_rate": 3.9267760814637075e-05, "loss": 0.3841, "step": 76525 }, { "epoch": 69.27601809954751, "grad_norm": 1.6048812866210938, "learning_rate": 3.921569565302239e-05, "loss": 0.3891, "step": 76550 }, { "epoch": 69.29864253393666, "grad_norm": 3.363898277282715, "learning_rate": 3.916365280979162e-05, "loss": 0.3802, "step": 76575 }, { "epoch": 69.32126696832579, "grad_norm": 2.56240177154541, "learning_rate": 3.9111632317403514e-05, "loss": 0.3569, "step": 76600 }, { "epoch": 69.34389140271493, "grad_norm": 3.013315439224243, "learning_rate": 3.905963420830277e-05, "loss": 0.3686, "step": 76625 }, { "epoch": 69.36651583710407, "grad_norm": 3.691343069076538, "learning_rate": 3.900765851492025e-05, "loss": 0.3341, "step": 76650 }, { "epoch": 69.38914027149322, "grad_norm": 3.1747207641601562, "learning_rate": 3.8955705269672716e-05, "loss": 0.3069, "step": 76675 }, { "epoch": 69.41176470588235, "grad_norm": 2.1772170066833496, "learning_rate": 3.8903774504963056e-05, "loss": 0.4192, "step": 76700 }, { "epoch": 69.43438914027149, "grad_norm": 2.9762048721313477, "learning_rate": 3.885186625318001e-05, "loss": 0.4359, "step": 76725 }, { "epoch": 69.45701357466064, "grad_norm": 2.8205325603485107, "learning_rate": 3.87999805466984e-05, "loss": 0.3983, "step": 76750 }, { "epoch": 69.47963800904978, "grad_norm": 2.5052621364593506, "learning_rate": 3.8748117417878875e-05, "loss": 0.3964, "step": 76775 }, { "epoch": 69.50226244343891, "grad_norm": 4.439595699310303, "learning_rate": 3.86962768990681e-05, "loss": 0.3912, "step": 76800 }, { "epoch": 69.52488687782805, "grad_norm": 1.4123729467391968, "learning_rate": 3.864445902259853e-05, "loss": 0.5169, "step": 76825 }, { "epoch": 69.5475113122172, "grad_norm": 3.92445969581604, "learning_rate": 3.859266382078863e-05, "loss": 0.3252, "step": 76850 }, { "epoch": 69.57013574660634, "grad_norm": 2.97933030128479, "learning_rate": 3.854089132594257e-05, "loss": 0.3811, "step": 76875 }, { "epoch": 69.59276018099547, "grad_norm": 3.185816526412964, "learning_rate": 3.848914157035053e-05, "loss": 0.3827, "step": 76900 }, { "epoch": 69.61538461538461, "grad_norm": 2.848999500274658, "learning_rate": 3.8437414586288346e-05, "loss": 0.4058, "step": 76925 }, { "epoch": 69.63800904977376, "grad_norm": 3.1349036693573, "learning_rate": 3.838571040601778e-05, "loss": 0.3609, "step": 76950 }, { "epoch": 69.6606334841629, "grad_norm": 2.6944453716278076, "learning_rate": 3.833402906178626e-05, "loss": 0.3657, "step": 76975 }, { "epoch": 69.68325791855203, "grad_norm": 2.8627524375915527, "learning_rate": 3.8282370585827084e-05, "loss": 0.37, "step": 77000 }, { "epoch": 69.70588235294117, "grad_norm": 2.7694175243377686, "learning_rate": 3.823073501035916e-05, "loss": 0.4469, "step": 77025 }, { "epoch": 69.72850678733032, "grad_norm": 2.7689924240112305, "learning_rate": 3.817912236758726e-05, "loss": 0.3819, "step": 77050 }, { "epoch": 69.75113122171946, "grad_norm": 2.8514535427093506, "learning_rate": 3.8127532689701705e-05, "loss": 0.4633, "step": 77075 }, { "epoch": 69.77375565610859, "grad_norm": 3.1230578422546387, "learning_rate": 3.807596600887862e-05, "loss": 0.4819, "step": 77100 }, { "epoch": 69.79638009049773, "grad_norm": 4.111325740814209, "learning_rate": 3.802442235727969e-05, "loss": 0.3846, "step": 77125 }, { "epoch": 69.81900452488688, "grad_norm": 2.426924467086792, "learning_rate": 3.797290176705232e-05, "loss": 0.4069, "step": 77150 }, { "epoch": 69.84162895927602, "grad_norm": 2.085695743560791, "learning_rate": 3.792140427032944e-05, "loss": 0.3929, "step": 77175 }, { "epoch": 69.86425339366515, "grad_norm": 4.2562103271484375, "learning_rate": 3.7869929899229666e-05, "loss": 0.5067, "step": 77200 }, { "epoch": 69.8868778280543, "grad_norm": 3.177539587020874, "learning_rate": 3.781847868585711e-05, "loss": 0.4238, "step": 77225 }, { "epoch": 69.90950226244344, "grad_norm": 3.2816081047058105, "learning_rate": 3.776910733759687e-05, "loss": 0.373, "step": 77250 }, { "epoch": 69.93212669683258, "grad_norm": 2.236802816390991, "learning_rate": 3.7717701606442145e-05, "loss": 0.3423, "step": 77275 }, { "epoch": 69.95475113122171, "grad_norm": 2.7069408893585205, "learning_rate": 3.766631912795817e-05, "loss": 0.3994, "step": 77300 }, { "epoch": 69.97737556561086, "grad_norm": 3.7963755130767822, "learning_rate": 3.7614959934191905e-05, "loss": 0.4046, "step": 77325 }, { "epoch": 70.0, "grad_norm": 3.8006396293640137, "learning_rate": 3.756362405717558e-05, "loss": 0.3248, "step": 77350 }, { "epoch": 70.02262443438914, "grad_norm": 3.3729357719421387, "learning_rate": 3.751231152892702e-05, "loss": 0.3544, "step": 77375 }, { "epoch": 70.04524886877829, "grad_norm": 2.3429107666015625, "learning_rate": 3.7461022381449395e-05, "loss": 0.3622, "step": 77400 }, { "epoch": 70.06787330316742, "grad_norm": 2.8610446453094482, "learning_rate": 3.740975664673138e-05, "loss": 0.4314, "step": 77425 }, { "epoch": 70.09049773755656, "grad_norm": 3.3164401054382324, "learning_rate": 3.735851435674695e-05, "loss": 0.3804, "step": 77450 }, { "epoch": 70.1131221719457, "grad_norm": 3.767671823501587, "learning_rate": 3.730729554345557e-05, "loss": 0.3723, "step": 77475 }, { "epoch": 70.13574660633485, "grad_norm": 2.632603645324707, "learning_rate": 3.7256100238801936e-05, "loss": 0.3289, "step": 77500 }, { "epoch": 70.15837104072398, "grad_norm": 2.85009765625, "learning_rate": 3.7204928474716224e-05, "loss": 0.3121, "step": 77525 }, { "epoch": 70.18099547511312, "grad_norm": 4.107291221618652, "learning_rate": 3.7153780283113785e-05, "loss": 0.3146, "step": 77550 }, { "epoch": 70.20361990950227, "grad_norm": 3.3742053508758545, "learning_rate": 3.71026556958954e-05, "loss": 0.4125, "step": 77575 }, { "epoch": 70.22624434389141, "grad_norm": 2.78633451461792, "learning_rate": 3.7051554744946995e-05, "loss": 0.3578, "step": 77600 }, { "epoch": 70.24886877828054, "grad_norm": 2.340951919555664, "learning_rate": 3.700047746213989e-05, "loss": 0.3427, "step": 77625 }, { "epoch": 70.27149321266968, "grad_norm": 1.9949183464050293, "learning_rate": 3.694942387933053e-05, "loss": 0.3342, "step": 77650 }, { "epoch": 70.29411764705883, "grad_norm": 5.352598190307617, "learning_rate": 3.689839402836066e-05, "loss": 0.3045, "step": 77675 }, { "epoch": 70.31674208144797, "grad_norm": 3.4376213550567627, "learning_rate": 3.684738794105714e-05, "loss": 0.3284, "step": 77700 }, { "epoch": 70.3393665158371, "grad_norm": 3.1421401500701904, "learning_rate": 3.6796405649232125e-05, "loss": 0.4091, "step": 77725 }, { "epoch": 70.36199095022624, "grad_norm": 4.102700233459473, "learning_rate": 3.67454471846828e-05, "loss": 0.3916, "step": 77750 }, { "epoch": 70.38461538461539, "grad_norm": 3.809084415435791, "learning_rate": 3.669451257919161e-05, "loss": 0.3505, "step": 77775 }, { "epoch": 70.40723981900453, "grad_norm": 3.107203722000122, "learning_rate": 3.6643601864525986e-05, "loss": 0.4183, "step": 77800 }, { "epoch": 70.42986425339366, "grad_norm": 3.738818645477295, "learning_rate": 3.6592715072438604e-05, "loss": 0.3844, "step": 77825 }, { "epoch": 70.4524886877828, "grad_norm": 3.4404258728027344, "learning_rate": 3.65418522346671e-05, "loss": 0.4346, "step": 77850 }, { "epoch": 70.47511312217195, "grad_norm": 3.7546753883361816, "learning_rate": 3.649101338293427e-05, "loss": 0.3497, "step": 77875 }, { "epoch": 70.49773755656109, "grad_norm": 3.058210849761963, "learning_rate": 3.644019854894783e-05, "loss": 0.3192, "step": 77900 }, { "epoch": 70.52036199095022, "grad_norm": 4.312135219573975, "learning_rate": 3.638940776440065e-05, "loss": 0.3527, "step": 77925 }, { "epoch": 70.54298642533936, "grad_norm": 3.8511061668395996, "learning_rate": 3.6338641060970485e-05, "loss": 0.5133, "step": 77950 }, { "epoch": 70.56561085972851, "grad_norm": 3.1947343349456787, "learning_rate": 3.628789847032016e-05, "loss": 0.366, "step": 77975 }, { "epoch": 70.58823529411765, "grad_norm": 2.7243242263793945, "learning_rate": 3.623718002409739e-05, "loss": 0.3192, "step": 78000 }, { "epoch": 70.61085972850678, "grad_norm": 1.0801172256469727, "learning_rate": 3.61864857539349e-05, "loss": 0.3447, "step": 78025 }, { "epoch": 70.63348416289593, "grad_norm": 2.5636961460113525, "learning_rate": 3.613581569145025e-05, "loss": 0.338, "step": 78050 }, { "epoch": 70.65610859728507, "grad_norm": 1.186858892440796, "learning_rate": 3.608516986824601e-05, "loss": 0.3571, "step": 78075 }, { "epoch": 70.67873303167421, "grad_norm": 3.1972029209136963, "learning_rate": 3.603454831590952e-05, "loss": 0.3363, "step": 78100 }, { "epoch": 70.70135746606334, "grad_norm": 2.716545343399048, "learning_rate": 3.5983951066013065e-05, "loss": 0.383, "step": 78125 }, { "epoch": 70.72398190045249, "grad_norm": 2.7895750999450684, "learning_rate": 3.5933378150113764e-05, "loss": 0.3357, "step": 78150 }, { "epoch": 70.74660633484163, "grad_norm": 2.2795443534851074, "learning_rate": 3.588282959975348e-05, "loss": 0.3294, "step": 78175 }, { "epoch": 70.76923076923077, "grad_norm": 3.1738946437835693, "learning_rate": 3.583230544645902e-05, "loss": 0.3669, "step": 78200 }, { "epoch": 70.7918552036199, "grad_norm": 1.987891435623169, "learning_rate": 3.578180572174181e-05, "loss": 0.3827, "step": 78225 }, { "epoch": 70.81447963800905, "grad_norm": 2.462768793106079, "learning_rate": 3.573133045709819e-05, "loss": 0.4561, "step": 78250 }, { "epoch": 70.83710407239819, "grad_norm": 2.8346848487854004, "learning_rate": 3.568087968400913e-05, "loss": 0.3556, "step": 78275 }, { "epoch": 70.85972850678733, "grad_norm": 2.2082462310791016, "learning_rate": 3.563045343394042e-05, "loss": 0.37, "step": 78300 }, { "epoch": 70.88235294117646, "grad_norm": 2.6408658027648926, "learning_rate": 3.558005173834245e-05, "loss": 0.3757, "step": 78325 }, { "epoch": 70.90497737556561, "grad_norm": 3.0608766078948975, "learning_rate": 3.552967462865042e-05, "loss": 0.3892, "step": 78350 }, { "epoch": 70.92760180995475, "grad_norm": 2.8581387996673584, "learning_rate": 3.547932213628407e-05, "loss": 0.392, "step": 78375 }, { "epoch": 70.9502262443439, "grad_norm": 1.155606746673584, "learning_rate": 3.5428994292647884e-05, "loss": 0.4871, "step": 78400 }, { "epoch": 70.97285067873302, "grad_norm": 2.484496593475342, "learning_rate": 3.537869112913091e-05, "loss": 0.3925, "step": 78425 }, { "epoch": 70.99547511312217, "grad_norm": 2.2866179943084717, "learning_rate": 3.532841267710686e-05, "loss": 0.4169, "step": 78450 }, { "epoch": 71.01809954751131, "grad_norm": 2.280975341796875, "learning_rate": 3.527815896793396e-05, "loss": 0.443, "step": 78475 }, { "epoch": 71.04072398190046, "grad_norm": 4.163336277008057, "learning_rate": 3.5227930032955095e-05, "loss": 0.3605, "step": 78500 }, { "epoch": 71.0633484162896, "grad_norm": 2.618426561355591, "learning_rate": 3.5177725903497595e-05, "loss": 0.3193, "step": 78525 }, { "epoch": 71.08597285067873, "grad_norm": 2.807340621948242, "learning_rate": 3.512754661087343e-05, "loss": 0.2815, "step": 78550 }, { "epoch": 71.10859728506787, "grad_norm": 2.9943177700042725, "learning_rate": 3.507739218637896e-05, "loss": 0.4231, "step": 78575 }, { "epoch": 71.13122171945702, "grad_norm": 3.1198978424072266, "learning_rate": 3.5027262661295155e-05, "loss": 0.3435, "step": 78600 }, { "epoch": 71.15384615384616, "grad_norm": 3.141416311264038, "learning_rate": 3.497715806688736e-05, "loss": 0.3491, "step": 78625 }, { "epoch": 71.17647058823529, "grad_norm": 2.8542861938476562, "learning_rate": 3.492707843440544e-05, "loss": 0.3876, "step": 78650 }, { "epoch": 71.19909502262443, "grad_norm": 1.997887134552002, "learning_rate": 3.487702379508362e-05, "loss": 0.3316, "step": 78675 }, { "epoch": 71.22171945701358, "grad_norm": 2.8526930809020996, "learning_rate": 3.482699418014063e-05, "loss": 0.3033, "step": 78700 }, { "epoch": 71.24434389140272, "grad_norm": 1.8499417304992676, "learning_rate": 3.477698962077949e-05, "loss": 0.3892, "step": 78725 }, { "epoch": 71.26696832579185, "grad_norm": 1.9071060419082642, "learning_rate": 3.4727010148187684e-05, "loss": 0.3477, "step": 78750 }, { "epoch": 71.289592760181, "grad_norm": 1.4768844842910767, "learning_rate": 3.467705579353696e-05, "loss": 0.3652, "step": 78775 }, { "epoch": 71.31221719457014, "grad_norm": 3.2199654579162598, "learning_rate": 3.4627126587983516e-05, "loss": 0.3227, "step": 78800 }, { "epoch": 71.33484162895928, "grad_norm": 2.7382895946502686, "learning_rate": 3.457722256266772e-05, "loss": 0.3517, "step": 78825 }, { "epoch": 71.35746606334841, "grad_norm": 2.2621893882751465, "learning_rate": 3.452734374871438e-05, "loss": 0.3345, "step": 78850 }, { "epoch": 71.38009049773756, "grad_norm": 3.079010248184204, "learning_rate": 3.4477490177232465e-05, "loss": 0.3239, "step": 78875 }, { "epoch": 71.4027149321267, "grad_norm": 2.129749298095703, "learning_rate": 3.44276618793153e-05, "loss": 0.3056, "step": 78900 }, { "epoch": 71.42533936651584, "grad_norm": 2.247183084487915, "learning_rate": 3.437785888604032e-05, "loss": 0.2968, "step": 78925 }, { "epoch": 71.44796380090497, "grad_norm": 2.2137086391448975, "learning_rate": 3.432808122846933e-05, "loss": 0.4876, "step": 78950 }, { "epoch": 71.47058823529412, "grad_norm": 2.3155691623687744, "learning_rate": 3.42783289376482e-05, "loss": 0.4405, "step": 78975 }, { "epoch": 71.49321266968326, "grad_norm": 3.00319766998291, "learning_rate": 3.4228602044607084e-05, "loss": 0.3387, "step": 79000 }, { "epoch": 71.5158371040724, "grad_norm": 2.0988829135894775, "learning_rate": 3.4178900580360195e-05, "loss": 0.2847, "step": 79025 }, { "epoch": 71.53846153846153, "grad_norm": 3.216895580291748, "learning_rate": 3.4129224575905986e-05, "loss": 0.3739, "step": 79050 }, { "epoch": 71.56108597285068, "grad_norm": 3.7864925861358643, "learning_rate": 3.407957406222693e-05, "loss": 0.3226, "step": 79075 }, { "epoch": 71.58371040723982, "grad_norm": 2.259701728820801, "learning_rate": 3.402994907028971e-05, "loss": 0.4087, "step": 79100 }, { "epoch": 71.60633484162896, "grad_norm": 3.4178225994110107, "learning_rate": 3.398034963104499e-05, "loss": 0.4035, "step": 79125 }, { "epoch": 71.6289592760181, "grad_norm": 1.78024423122406, "learning_rate": 3.3930775775427596e-05, "loss": 0.3156, "step": 79150 }, { "epoch": 71.65158371040724, "grad_norm": 2.06205153465271, "learning_rate": 3.3881227534356275e-05, "loss": 0.3569, "step": 79175 }, { "epoch": 71.67420814479638, "grad_norm": 2.405913829803467, "learning_rate": 3.3831704938733914e-05, "loss": 0.3027, "step": 79200 }, { "epoch": 71.69683257918552, "grad_norm": 3.71380877494812, "learning_rate": 3.3782208019447373e-05, "loss": 0.3924, "step": 79225 }, { "epoch": 71.71945701357465, "grad_norm": 2.3701257705688477, "learning_rate": 3.373273680736743e-05, "loss": 0.3816, "step": 79250 }, { "epoch": 71.7420814479638, "grad_norm": 2.361420154571533, "learning_rate": 3.368329133334895e-05, "loss": 0.3278, "step": 79275 }, { "epoch": 71.76470588235294, "grad_norm": 2.993346691131592, "learning_rate": 3.363387162823061e-05, "loss": 0.3009, "step": 79300 }, { "epoch": 71.78733031674209, "grad_norm": 2.089348554611206, "learning_rate": 3.3584477722835156e-05, "loss": 0.3819, "step": 79325 }, { "epoch": 71.80995475113122, "grad_norm": 2.9268131256103516, "learning_rate": 3.35351096479691e-05, "loss": 0.421, "step": 79350 }, { "epoch": 71.83257918552036, "grad_norm": 2.4692280292510986, "learning_rate": 3.3485767434422965e-05, "loss": 0.4297, "step": 79375 }, { "epoch": 71.8552036199095, "grad_norm": 3.26350998878479, "learning_rate": 3.3436451112971064e-05, "loss": 0.3359, "step": 79400 }, { "epoch": 71.87782805429865, "grad_norm": 2.73140811920166, "learning_rate": 3.3387160714371625e-05, "loss": 0.3542, "step": 79425 }, { "epoch": 71.90045248868778, "grad_norm": 2.5850207805633545, "learning_rate": 3.333789626936663e-05, "loss": 0.3099, "step": 79450 }, { "epoch": 71.92307692307692, "grad_norm": 2.3376190662384033, "learning_rate": 3.3288657808681966e-05, "loss": 0.3096, "step": 79475 }, { "epoch": 71.94570135746606, "grad_norm": 5.441970348358154, "learning_rate": 3.3239445363027224e-05, "loss": 0.3586, "step": 79500 }, { "epoch": 71.96832579185521, "grad_norm": 3.9859695434570312, "learning_rate": 3.319025896309585e-05, "loss": 0.3671, "step": 79525 }, { "epoch": 71.99095022624434, "grad_norm": 2.742628812789917, "learning_rate": 3.314109863956497e-05, "loss": 0.3583, "step": 79550 }, { "epoch": 72.01357466063348, "grad_norm": 3.0470237731933594, "learning_rate": 3.309196442309552e-05, "loss": 0.3518, "step": 79575 }, { "epoch": 72.03619909502262, "grad_norm": 3.3255622386932373, "learning_rate": 3.304285634433209e-05, "loss": 0.3202, "step": 79600 }, { "epoch": 72.05882352941177, "grad_norm": 2.460353136062622, "learning_rate": 3.299377443390302e-05, "loss": 0.3318, "step": 79625 }, { "epoch": 72.08144796380091, "grad_norm": 3.0762221813201904, "learning_rate": 3.2944718722420265e-05, "loss": 0.3052, "step": 79650 }, { "epoch": 72.10407239819004, "grad_norm": 2.307109832763672, "learning_rate": 3.289568924047954e-05, "loss": 0.3359, "step": 79675 }, { "epoch": 72.12669683257919, "grad_norm": 1.8705261945724487, "learning_rate": 3.284668601866006e-05, "loss": 0.3049, "step": 79700 }, { "epoch": 72.14932126696833, "grad_norm": 3.070863962173462, "learning_rate": 3.2797709087524827e-05, "loss": 0.2867, "step": 79725 }, { "epoch": 72.17194570135747, "grad_norm": 3.732760190963745, "learning_rate": 3.274875847762028e-05, "loss": 0.374, "step": 79750 }, { "epoch": 72.1945701357466, "grad_norm": 3.1433746814727783, "learning_rate": 3.26998342194766e-05, "loss": 0.2753, "step": 79775 }, { "epoch": 72.21719457013575, "grad_norm": 1.5535398721694946, "learning_rate": 3.2650936343607387e-05, "loss": 0.3896, "step": 79800 }, { "epoch": 72.23981900452489, "grad_norm": 2.2897911071777344, "learning_rate": 3.2602064880509904e-05, "loss": 0.3285, "step": 79825 }, { "epoch": 72.26244343891403, "grad_norm": 2.7750487327575684, "learning_rate": 3.2553219860664856e-05, "loss": 0.3311, "step": 79850 }, { "epoch": 72.28506787330316, "grad_norm": 3.684419631958008, "learning_rate": 3.2504401314536526e-05, "loss": 0.3472, "step": 79875 }, { "epoch": 72.3076923076923, "grad_norm": 2.5782241821289062, "learning_rate": 3.2455609272572606e-05, "loss": 0.3646, "step": 79900 }, { "epoch": 72.33031674208145, "grad_norm": 2.6745152473449707, "learning_rate": 3.240684376520435e-05, "loss": 0.3156, "step": 79925 }, { "epoch": 72.3529411764706, "grad_norm": 1.8315614461898804, "learning_rate": 3.2358104822846364e-05, "loss": 0.3267, "step": 79950 }, { "epoch": 72.37556561085972, "grad_norm": 2.0482234954833984, "learning_rate": 3.2309392475896785e-05, "loss": 0.3241, "step": 79975 }, { "epoch": 72.39819004524887, "grad_norm": 2.1007161140441895, "learning_rate": 3.226070675473707e-05, "loss": 0.2776, "step": 80000 }, { "epoch": 72.42081447963801, "grad_norm": 0.8560355305671692, "learning_rate": 3.221204768973215e-05, "loss": 0.3009, "step": 80025 }, { "epoch": 72.44343891402715, "grad_norm": 1.1884980201721191, "learning_rate": 3.2163415311230254e-05, "loss": 0.3478, "step": 80050 }, { "epoch": 72.46606334841628, "grad_norm": 2.090597152709961, "learning_rate": 3.211480964956306e-05, "loss": 0.3158, "step": 80075 }, { "epoch": 72.48868778280543, "grad_norm": 3.4095449447631836, "learning_rate": 3.206623073504547e-05, "loss": 0.3402, "step": 80100 }, { "epoch": 72.51131221719457, "grad_norm": 2.9183309078216553, "learning_rate": 3.201767859797582e-05, "loss": 0.3328, "step": 80125 }, { "epoch": 72.53393665158372, "grad_norm": 2.8395659923553467, "learning_rate": 3.1971093766721136e-05, "loss": 0.4211, "step": 80150 }, { "epoch": 72.55656108597285, "grad_norm": 3.2014007568359375, "learning_rate": 3.1922594201274654e-05, "loss": 0.2866, "step": 80175 }, { "epoch": 72.57918552036199, "grad_norm": 2.333317995071411, "learning_rate": 3.187412150286107e-05, "loss": 0.2989, "step": 80200 }, { "epoch": 72.60180995475113, "grad_norm": 2.2161149978637695, "learning_rate": 3.182567570171236e-05, "loss": 0.3393, "step": 80225 }, { "epoch": 72.62443438914028, "grad_norm": 2.343919277191162, "learning_rate": 3.177725682804383e-05, "loss": 0.4342, "step": 80250 }, { "epoch": 72.6470588235294, "grad_norm": 4.018833637237549, "learning_rate": 3.172886491205391e-05, "loss": 0.3837, "step": 80275 }, { "epoch": 72.66968325791855, "grad_norm": 4.137818813323975, "learning_rate": 3.1680499983924286e-05, "loss": 0.3435, "step": 80300 }, { "epoch": 72.6923076923077, "grad_norm": 3.6315526962280273, "learning_rate": 3.163216207381973e-05, "loss": 0.4356, "step": 80325 }, { "epoch": 72.71493212669684, "grad_norm": 3.445065498352051, "learning_rate": 3.158385121188826e-05, "loss": 0.4355, "step": 80350 }, { "epoch": 72.73755656108597, "grad_norm": 1.8939542770385742, "learning_rate": 3.1535567428260915e-05, "loss": 0.3259, "step": 80375 }, { "epoch": 72.76018099547511, "grad_norm": 3.711698532104492, "learning_rate": 3.148731075305197e-05, "loss": 0.3666, "step": 80400 }, { "epoch": 72.78280542986425, "grad_norm": 2.810096502304077, "learning_rate": 3.1439081216358666e-05, "loss": 0.3143, "step": 80425 }, { "epoch": 72.8054298642534, "grad_norm": 3.124650001525879, "learning_rate": 3.1390878848261434e-05, "loss": 0.3903, "step": 80450 }, { "epoch": 72.82805429864253, "grad_norm": 2.124650478363037, "learning_rate": 3.1342703678823644e-05, "loss": 0.4082, "step": 80475 }, { "epoch": 72.85067873303167, "grad_norm": 1.5146489143371582, "learning_rate": 3.1294555738091826e-05, "loss": 0.2769, "step": 80500 }, { "epoch": 72.87330316742081, "grad_norm": 2.1591391563415527, "learning_rate": 3.124643505609541e-05, "loss": 0.2875, "step": 80525 }, { "epoch": 72.89592760180996, "grad_norm": 2.8991804122924805, "learning_rate": 3.119834166284693e-05, "loss": 0.305, "step": 80550 }, { "epoch": 72.91855203619909, "grad_norm": 1.6438605785369873, "learning_rate": 3.115027558834179e-05, "loss": 0.3824, "step": 80575 }, { "epoch": 72.94117647058823, "grad_norm": 3.8229880332946777, "learning_rate": 3.1102236862558485e-05, "loss": 0.3596, "step": 80600 }, { "epoch": 72.96380090497738, "grad_norm": 2.8134958744049072, "learning_rate": 3.105422551545832e-05, "loss": 0.3102, "step": 80625 }, { "epoch": 72.98642533936652, "grad_norm": 1.2967236042022705, "learning_rate": 3.1006241576985644e-05, "loss": 0.3728, "step": 80650 }, { "epoch": 73.00904977375566, "grad_norm": 1.0272668600082397, "learning_rate": 3.09582850770676e-05, "loss": 0.2807, "step": 80675 }, { "epoch": 73.03167420814479, "grad_norm": 2.2608537673950195, "learning_rate": 3.0910356045614324e-05, "loss": 0.4159, "step": 80700 }, { "epoch": 73.05429864253394, "grad_norm": 2.204169273376465, "learning_rate": 3.0862454512518724e-05, "loss": 0.3911, "step": 80725 }, { "epoch": 73.07692307692308, "grad_norm": 3.5292227268218994, "learning_rate": 3.081458050765665e-05, "loss": 0.2254, "step": 80750 }, { "epoch": 73.09954751131222, "grad_norm": 2.4946675300598145, "learning_rate": 3.076673406088667e-05, "loss": 0.2821, "step": 80775 }, { "epoch": 73.12217194570135, "grad_norm": 3.0582332611083984, "learning_rate": 3.07189152020503e-05, "loss": 0.3263, "step": 80800 }, { "epoch": 73.1447963800905, "grad_norm": 2.60414981842041, "learning_rate": 3.067112396097173e-05, "loss": 0.3567, "step": 80825 }, { "epoch": 73.16742081447964, "grad_norm": 3.260399103164673, "learning_rate": 3.062336036745801e-05, "loss": 0.3123, "step": 80850 }, { "epoch": 73.19004524886878, "grad_norm": 3.269760847091675, "learning_rate": 3.057562445129886e-05, "loss": 0.2551, "step": 80875 }, { "epoch": 73.21266968325791, "grad_norm": 1.9295631647109985, "learning_rate": 3.0527916242266825e-05, "loss": 0.374, "step": 80900 }, { "epoch": 73.23529411764706, "grad_norm": 1.468007206916809, "learning_rate": 3.048023577011716e-05, "loss": 0.283, "step": 80925 }, { "epoch": 73.2579185520362, "grad_norm": 2.1063339710235596, "learning_rate": 3.0432583064587727e-05, "loss": 0.304, "step": 80950 }, { "epoch": 73.28054298642535, "grad_norm": 2.960327386856079, "learning_rate": 3.03849581553992e-05, "loss": 0.3517, "step": 80975 }, { "epoch": 73.30316742081448, "grad_norm": 2.941462755203247, "learning_rate": 3.0337361072254802e-05, "loss": 0.3285, "step": 81000 }, { "epoch": 73.32579185520362, "grad_norm": 2.0421576499938965, "learning_rate": 3.02897918448405e-05, "loss": 0.2806, "step": 81025 }, { "epoch": 73.34841628959276, "grad_norm": 4.625753879547119, "learning_rate": 3.0242250502824785e-05, "loss": 0.2991, "step": 81050 }, { "epoch": 73.3710407239819, "grad_norm": 3.131014108657837, "learning_rate": 3.019473707585887e-05, "loss": 0.3234, "step": 81075 }, { "epoch": 73.39366515837104, "grad_norm": 3.3441922664642334, "learning_rate": 3.0147251593576445e-05, "loss": 0.3555, "step": 81100 }, { "epoch": 73.41628959276018, "grad_norm": 3.3957202434539795, "learning_rate": 3.009979408559387e-05, "loss": 0.3075, "step": 81125 }, { "epoch": 73.43891402714932, "grad_norm": 2.288896083831787, "learning_rate": 3.005236458150997e-05, "loss": 0.4573, "step": 81150 }, { "epoch": 73.46153846153847, "grad_norm": 2.7308273315429688, "learning_rate": 3.0004963110906195e-05, "loss": 0.3272, "step": 81175 }, { "epoch": 73.4841628959276, "grad_norm": 2.2193171977996826, "learning_rate": 2.9957589703346404e-05, "loss": 0.4009, "step": 81200 }, { "epoch": 73.50678733031674, "grad_norm": 1.5203183889389038, "learning_rate": 2.991024438837708e-05, "loss": 0.2621, "step": 81225 }, { "epoch": 73.52941176470588, "grad_norm": 2.995479106903076, "learning_rate": 2.986292719552705e-05, "loss": 0.3055, "step": 81250 }, { "epoch": 73.55203619909503, "grad_norm": 2.3604586124420166, "learning_rate": 2.9815638154307726e-05, "loss": 0.2909, "step": 81275 }, { "epoch": 73.57466063348416, "grad_norm": 3.2187516689300537, "learning_rate": 2.976837729421285e-05, "loss": 0.3055, "step": 81300 }, { "epoch": 73.5972850678733, "grad_norm": 3.0556023120880127, "learning_rate": 2.9721144644718697e-05, "loss": 0.3756, "step": 81325 }, { "epoch": 73.61990950226244, "grad_norm": 1.973311424255371, "learning_rate": 2.9673940235283835e-05, "loss": 0.2639, "step": 81350 }, { "epoch": 73.64253393665159, "grad_norm": 2.9156811237335205, "learning_rate": 2.962676409534933e-05, "loss": 0.2938, "step": 81375 }, { "epoch": 73.66515837104072, "grad_norm": 2.5239579677581787, "learning_rate": 2.9579616254338513e-05, "loss": 0.3068, "step": 81400 }, { "epoch": 73.68778280542986, "grad_norm": 2.642988681793213, "learning_rate": 2.9532496741657164e-05, "loss": 0.2811, "step": 81425 }, { "epoch": 73.710407239819, "grad_norm": 3.004598379135132, "learning_rate": 2.9485405586693296e-05, "loss": 0.3466, "step": 81450 }, { "epoch": 73.73303167420815, "grad_norm": 2.9902164936065674, "learning_rate": 2.943834281881733e-05, "loss": 0.3031, "step": 81475 }, { "epoch": 73.75565610859728, "grad_norm": 2.555826187133789, "learning_rate": 2.9391308467381894e-05, "loss": 0.3415, "step": 81500 }, { "epoch": 73.77828054298642, "grad_norm": 1.7891961336135864, "learning_rate": 2.934430256172199e-05, "loss": 0.3319, "step": 81525 }, { "epoch": 73.80090497737557, "grad_norm": 2.137577772140503, "learning_rate": 2.9297325131154764e-05, "loss": 0.2743, "step": 81550 }, { "epoch": 73.82352941176471, "grad_norm": 1.9066287279129028, "learning_rate": 2.9250376204979718e-05, "loss": 0.3188, "step": 81575 }, { "epoch": 73.84615384615384, "grad_norm": 2.7708871364593506, "learning_rate": 2.9203455812478468e-05, "loss": 0.4532, "step": 81600 }, { "epoch": 73.86877828054298, "grad_norm": 1.9949368238449097, "learning_rate": 2.9156563982914942e-05, "loss": 0.3868, "step": 81625 }, { "epoch": 73.89140271493213, "grad_norm": 3.3274765014648438, "learning_rate": 2.9109700745535148e-05, "loss": 0.27, "step": 81650 }, { "epoch": 73.91402714932127, "grad_norm": 1.5537223815917969, "learning_rate": 2.9062866129567357e-05, "loss": 0.3259, "step": 81675 }, { "epoch": 73.9366515837104, "grad_norm": 2.4978432655334473, "learning_rate": 2.9016060164221903e-05, "loss": 0.3164, "step": 81700 }, { "epoch": 73.95927601809954, "grad_norm": 3.590334177017212, "learning_rate": 2.8969282878691327e-05, "loss": 0.311, "step": 81725 }, { "epoch": 73.98190045248869, "grad_norm": 2.6827969551086426, "learning_rate": 2.8922534302150214e-05, "loss": 0.4615, "step": 81750 }, { "epoch": 74.00452488687783, "grad_norm": 4.458113193511963, "learning_rate": 2.8875814463755313e-05, "loss": 0.3725, "step": 81775 }, { "epoch": 74.02714932126698, "grad_norm": 3.4580414295196533, "learning_rate": 2.8829123392645366e-05, "loss": 0.348, "step": 81800 }, { "epoch": 74.0497737556561, "grad_norm": 2.76762056350708, "learning_rate": 2.878246111794128e-05, "loss": 0.233, "step": 81825 }, { "epoch": 74.07239819004525, "grad_norm": 2.417442798614502, "learning_rate": 2.873582766874587e-05, "loss": 0.2785, "step": 81850 }, { "epoch": 74.09502262443439, "grad_norm": 2.8274288177490234, "learning_rate": 2.868922307414412e-05, "loss": 0.2651, "step": 81875 }, { "epoch": 74.11764705882354, "grad_norm": 3.2265682220458984, "learning_rate": 2.8642647363202874e-05, "loss": 0.2956, "step": 81900 }, { "epoch": 74.14027149321267, "grad_norm": 2.7552247047424316, "learning_rate": 2.8596100564971093e-05, "loss": 0.2489, "step": 81925 }, { "epoch": 74.16289592760181, "grad_norm": 1.803971290588379, "learning_rate": 2.854958270847958e-05, "loss": 0.2792, "step": 81950 }, { "epoch": 74.18552036199095, "grad_norm": 1.4962939023971558, "learning_rate": 2.8503093822741187e-05, "loss": 0.3283, "step": 81975 }, { "epoch": 74.2081447963801, "grad_norm": 2.628507137298584, "learning_rate": 2.845663393675067e-05, "loss": 0.2768, "step": 82000 }, { "epoch": 74.23076923076923, "grad_norm": 3.1974716186523438, "learning_rate": 2.8410203079484646e-05, "loss": 0.3006, "step": 82025 }, { "epoch": 74.25339366515837, "grad_norm": 2.3015873432159424, "learning_rate": 2.8363801279901723e-05, "loss": 0.2932, "step": 82050 }, { "epoch": 74.27601809954751, "grad_norm": 2.5883677005767822, "learning_rate": 2.8317428566942273e-05, "loss": 0.3571, "step": 82075 }, { "epoch": 74.29864253393666, "grad_norm": 2.075589179992676, "learning_rate": 2.8271084969528644e-05, "loss": 0.3607, "step": 82100 }, { "epoch": 74.32126696832579, "grad_norm": 2.1009368896484375, "learning_rate": 2.82247705165649e-05, "loss": 0.2569, "step": 82125 }, { "epoch": 74.34389140271493, "grad_norm": 5.540664196014404, "learning_rate": 2.8178485236937067e-05, "loss": 0.3984, "step": 82150 }, { "epoch": 74.36651583710407, "grad_norm": 1.998557686805725, "learning_rate": 2.8132229159512835e-05, "loss": 0.3504, "step": 82175 }, { "epoch": 74.38914027149322, "grad_norm": 2.714663028717041, "learning_rate": 2.8086002313141813e-05, "loss": 0.2711, "step": 82200 }, { "epoch": 74.41176470588235, "grad_norm": 3.093409299850464, "learning_rate": 2.8039804726655254e-05, "loss": 0.2628, "step": 82225 }, { "epoch": 74.43438914027149, "grad_norm": 1.7061655521392822, "learning_rate": 2.7993636428866274e-05, "loss": 0.2953, "step": 82250 }, { "epoch": 74.45701357466064, "grad_norm": 2.4149158000946045, "learning_rate": 2.7947497448569626e-05, "loss": 0.3035, "step": 82275 }, { "epoch": 74.47963800904978, "grad_norm": 2.3477773666381836, "learning_rate": 2.7901387814541866e-05, "loss": 0.3547, "step": 82300 }, { "epoch": 74.50226244343891, "grad_norm": 1.7514317035675049, "learning_rate": 2.7855307555541154e-05, "loss": 0.2606, "step": 82325 }, { "epoch": 74.52488687782805, "grad_norm": 2.655534029006958, "learning_rate": 2.7809256700307435e-05, "loss": 0.3602, "step": 82350 }, { "epoch": 74.5475113122172, "grad_norm": 2.45708966255188, "learning_rate": 2.776323527756221e-05, "loss": 0.2686, "step": 82375 }, { "epoch": 74.57013574660634, "grad_norm": 2.429360866546631, "learning_rate": 2.7717243316008704e-05, "loss": 0.288, "step": 82400 }, { "epoch": 74.59276018099547, "grad_norm": 2.4920473098754883, "learning_rate": 2.7671280844331694e-05, "loss": 0.3735, "step": 82425 }, { "epoch": 74.61538461538461, "grad_norm": 2.358638286590576, "learning_rate": 2.762534789119767e-05, "loss": 0.2888, "step": 82450 }, { "epoch": 74.63800904977376, "grad_norm": 2.5498902797698975, "learning_rate": 2.757944448525458e-05, "loss": 0.3056, "step": 82475 }, { "epoch": 74.6606334841629, "grad_norm": 2.0660364627838135, "learning_rate": 2.7533570655132064e-05, "loss": 0.384, "step": 82500 }, { "epoch": 74.68325791855203, "grad_norm": 5.2555365562438965, "learning_rate": 2.7487726429441214e-05, "loss": 0.4108, "step": 82525 }, { "epoch": 74.70588235294117, "grad_norm": 0.9547332525253296, "learning_rate": 2.7441911836774757e-05, "loss": 0.3115, "step": 82550 }, { "epoch": 74.72850678733032, "grad_norm": 2.4335803985595703, "learning_rate": 2.7396126905706836e-05, "loss": 0.364, "step": 82575 }, { "epoch": 74.75113122171946, "grad_norm": 2.516291618347168, "learning_rate": 2.73503716647932e-05, "loss": 0.308, "step": 82600 }, { "epoch": 74.77375565610859, "grad_norm": 2.8080978393554688, "learning_rate": 2.730464614257096e-05, "loss": 0.3661, "step": 82625 }, { "epoch": 74.79638009049773, "grad_norm": 2.376248598098755, "learning_rate": 2.725895036755883e-05, "loss": 0.3609, "step": 82650 }, { "epoch": 74.81900452488688, "grad_norm": 1.9304112195968628, "learning_rate": 2.7213284368256836e-05, "loss": 0.3785, "step": 82675 }, { "epoch": 74.84162895927602, "grad_norm": 2.4170520305633545, "learning_rate": 2.7167648173146557e-05, "loss": 0.2514, "step": 82700 }, { "epoch": 74.86425339366515, "grad_norm": 2.2855734825134277, "learning_rate": 2.7122041810690847e-05, "loss": 0.3127, "step": 82725 }, { "epoch": 74.8868778280543, "grad_norm": 2.7707996368408203, "learning_rate": 2.707646530933411e-05, "loss": 0.3125, "step": 82750 }, { "epoch": 74.90950226244344, "grad_norm": 2.6241166591644287, "learning_rate": 2.7030918697501974e-05, "loss": 0.2987, "step": 82775 }, { "epoch": 74.93212669683258, "grad_norm": 2.104856014251709, "learning_rate": 2.6985402003601557e-05, "loss": 0.277, "step": 82800 }, { "epoch": 74.95475113122171, "grad_norm": 2.8981499671936035, "learning_rate": 2.6939915256021194e-05, "loss": 0.3739, "step": 82825 }, { "epoch": 74.97737556561086, "grad_norm": 3.1936309337615967, "learning_rate": 2.689445848313067e-05, "loss": 0.2896, "step": 82850 }, { "epoch": 75.0, "grad_norm": 2.3453421592712402, "learning_rate": 2.6849031713280924e-05, "loss": 0.323, "step": 82875 }, { "epoch": 75.02262443438914, "grad_norm": 2.4536938667297363, "learning_rate": 2.6803634974804376e-05, "loss": 0.254, "step": 82900 }, { "epoch": 75.04524886877829, "grad_norm": 1.9860996007919312, "learning_rate": 2.6758268296014522e-05, "loss": 0.2524, "step": 82925 }, { "epoch": 75.06787330316742, "grad_norm": 2.0787274837493896, "learning_rate": 2.671293170520626e-05, "loss": 0.2855, "step": 82950 }, { "epoch": 75.09049773755656, "grad_norm": 2.0604352951049805, "learning_rate": 2.6667625230655603e-05, "loss": 0.2704, "step": 82975 }, { "epoch": 75.1131221719457, "grad_norm": 1.309448480606079, "learning_rate": 2.662234890061989e-05, "loss": 0.3004, "step": 83000 }, { "epoch": 75.13574660633485, "grad_norm": 2.537576675415039, "learning_rate": 2.6577102743337536e-05, "loss": 0.2986, "step": 83025 }, { "epoch": 75.15837104072398, "grad_norm": 2.5910840034484863, "learning_rate": 2.653188678702828e-05, "loss": 0.2692, "step": 83050 }, { "epoch": 75.18099547511312, "grad_norm": 1.1406221389770508, "learning_rate": 2.648670105989289e-05, "loss": 0.2639, "step": 83075 }, { "epoch": 75.20361990950227, "grad_norm": 1.3294293880462646, "learning_rate": 2.6441545590113394e-05, "loss": 0.2789, "step": 83100 }, { "epoch": 75.22624434389141, "grad_norm": 2.0877737998962402, "learning_rate": 2.639642040585285e-05, "loss": 0.3101, "step": 83125 }, { "epoch": 75.24886877828054, "grad_norm": 2.896243095397949, "learning_rate": 2.6351325535255527e-05, "loss": 0.268, "step": 83150 }, { "epoch": 75.27149321266968, "grad_norm": 2.6649253368377686, "learning_rate": 2.630626100644667e-05, "loss": 0.2688, "step": 83175 }, { "epoch": 75.29411764705883, "grad_norm": 3.275167226791382, "learning_rate": 2.6261226847532727e-05, "loss": 0.2727, "step": 83200 }, { "epoch": 75.31674208144797, "grad_norm": 1.5215011835098267, "learning_rate": 2.621622308660109e-05, "loss": 0.3157, "step": 83225 }, { "epoch": 75.3393665158371, "grad_norm": 3.882742166519165, "learning_rate": 2.6171249751720292e-05, "loss": 0.342, "step": 83250 }, { "epoch": 75.36199095022624, "grad_norm": 3.296842575073242, "learning_rate": 2.6126306870939785e-05, "loss": 0.2939, "step": 83275 }, { "epoch": 75.38461538461539, "grad_norm": 2.2243354320526123, "learning_rate": 2.6081394472290153e-05, "loss": 0.2908, "step": 83300 }, { "epoch": 75.40723981900453, "grad_norm": 2.889202356338501, "learning_rate": 2.603651258378283e-05, "loss": 0.3203, "step": 83325 }, { "epoch": 75.42986425339366, "grad_norm": 1.743531346321106, "learning_rate": 2.5991661233410348e-05, "loss": 0.2665, "step": 83350 }, { "epoch": 75.4524886877828, "grad_norm": 3.992488384246826, "learning_rate": 2.5946840449146088e-05, "loss": 0.3554, "step": 83375 }, { "epoch": 75.47511312217195, "grad_norm": 2.4225494861602783, "learning_rate": 2.5902050258944454e-05, "loss": 0.2776, "step": 83400 }, { "epoch": 75.49773755656109, "grad_norm": 1.7738288640975952, "learning_rate": 2.585729069074069e-05, "loss": 0.335, "step": 83425 }, { "epoch": 75.52036199095022, "grad_norm": 1.690691351890564, "learning_rate": 2.5812561772451023e-05, "loss": 0.317, "step": 83450 }, { "epoch": 75.54298642533936, "grad_norm": 1.6452319622039795, "learning_rate": 2.5767863531972483e-05, "loss": 0.2665, "step": 83475 }, { "epoch": 75.56561085972851, "grad_norm": 2.921520233154297, "learning_rate": 2.5723195997183033e-05, "loss": 0.4052, "step": 83500 }, { "epoch": 75.58823529411765, "grad_norm": 1.555267095565796, "learning_rate": 2.5678559195941424e-05, "loss": 0.3262, "step": 83525 }, { "epoch": 75.61085972850678, "grad_norm": 2.8181378841400146, "learning_rate": 2.563395315608729e-05, "loss": 0.2583, "step": 83550 }, { "epoch": 75.63348416289593, "grad_norm": 3.9380154609680176, "learning_rate": 2.5589377905441086e-05, "loss": 0.3063, "step": 83575 }, { "epoch": 75.65610859728507, "grad_norm": 2.493354320526123, "learning_rate": 2.5544833471803992e-05, "loss": 0.3014, "step": 83600 }, { "epoch": 75.67873303167421, "grad_norm": 1.0901517868041992, "learning_rate": 2.550031988295806e-05, "loss": 0.2351, "step": 83625 }, { "epoch": 75.70135746606334, "grad_norm": 2.2735798358917236, "learning_rate": 2.5455837166666007e-05, "loss": 0.2414, "step": 83650 }, { "epoch": 75.72398190045249, "grad_norm": 1.5142090320587158, "learning_rate": 2.54113853506714e-05, "loss": 0.3092, "step": 83675 }, { "epoch": 75.74660633484163, "grad_norm": 2.3500263690948486, "learning_rate": 2.536696446269843e-05, "loss": 0.2965, "step": 83700 }, { "epoch": 75.76923076923077, "grad_norm": 1.6575660705566406, "learning_rate": 2.53225745304521e-05, "loss": 0.3118, "step": 83725 }, { "epoch": 75.7918552036199, "grad_norm": 3.3095154762268066, "learning_rate": 2.5278215581618005e-05, "loss": 0.3143, "step": 83750 }, { "epoch": 75.81447963800905, "grad_norm": 4.247889518737793, "learning_rate": 2.5233887643862524e-05, "loss": 0.3356, "step": 83775 }, { "epoch": 75.83710407239819, "grad_norm": 2.5860421657562256, "learning_rate": 2.5189590744832567e-05, "loss": 0.3011, "step": 83800 }, { "epoch": 75.85972850678733, "grad_norm": 2.3422882556915283, "learning_rate": 2.5145324912155822e-05, "loss": 0.2598, "step": 83825 }, { "epoch": 75.88235294117646, "grad_norm": 1.9032371044158936, "learning_rate": 2.5101090173440477e-05, "loss": 0.2853, "step": 83850 }, { "epoch": 75.90497737556561, "grad_norm": 1.396369457244873, "learning_rate": 2.5056886556275436e-05, "loss": 0.3136, "step": 83875 }, { "epoch": 75.92760180995475, "grad_norm": 2.703479528427124, "learning_rate": 2.5012714088230086e-05, "loss": 0.4706, "step": 83900 }, { "epoch": 75.9502262443439, "grad_norm": 4.66386604309082, "learning_rate": 2.4968572796854495e-05, "loss": 0.3674, "step": 83925 }, { "epoch": 75.97285067873302, "grad_norm": 2.2697360515594482, "learning_rate": 2.492446270967919e-05, "loss": 0.3386, "step": 83950 }, { "epoch": 75.99547511312217, "grad_norm": 2.354702949523926, "learning_rate": 2.488214640844009e-05, "loss": 0.329, "step": 83975 }, { "epoch": 76.01809954751131, "grad_norm": 2.0319299697875977, "learning_rate": 2.483809756128354e-05, "loss": 0.3337, "step": 84000 }, { "epoch": 76.04072398190046, "grad_norm": 1.9685543775558472, "learning_rate": 2.4794079999703636e-05, "loss": 0.2498, "step": 84025 }, { "epoch": 76.0633484162896, "grad_norm": 1.9180629253387451, "learning_rate": 2.4750093751153818e-05, "loss": 0.2594, "step": 84050 }, { "epoch": 76.08597285067873, "grad_norm": 1.8504961729049683, "learning_rate": 2.4706138843067916e-05, "loss": 0.2618, "step": 84075 }, { "epoch": 76.10859728506787, "grad_norm": 1.5177520513534546, "learning_rate": 2.4662215302860293e-05, "loss": 0.2552, "step": 84100 }, { "epoch": 76.13122171945702, "grad_norm": 1.9649053812026978, "learning_rate": 2.4618323157925665e-05, "loss": 0.2737, "step": 84125 }, { "epoch": 76.15384615384616, "grad_norm": 2.5923967361450195, "learning_rate": 2.4574462435639256e-05, "loss": 0.2432, "step": 84150 }, { "epoch": 76.17647058823529, "grad_norm": 2.4911556243896484, "learning_rate": 2.4530633163356605e-05, "loss": 0.2476, "step": 84175 }, { "epoch": 76.19909502262443, "grad_norm": 1.116897463798523, "learning_rate": 2.4486835368413724e-05, "loss": 0.2449, "step": 84200 }, { "epoch": 76.22171945701358, "grad_norm": 3.128192663192749, "learning_rate": 2.4443069078126902e-05, "loss": 0.3248, "step": 84225 }, { "epoch": 76.24434389140272, "grad_norm": 3.1830790042877197, "learning_rate": 2.439933431979286e-05, "loss": 0.3011, "step": 84250 }, { "epoch": 76.26696832579185, "grad_norm": 2.2238335609436035, "learning_rate": 2.435563112068858e-05, "loss": 0.3543, "step": 84275 }, { "epoch": 76.289592760181, "grad_norm": 1.420111060142517, "learning_rate": 2.4311959508071437e-05, "loss": 0.3136, "step": 84300 }, { "epoch": 76.31221719457014, "grad_norm": 2.6922364234924316, "learning_rate": 2.4268319509179024e-05, "loss": 0.2751, "step": 84325 }, { "epoch": 76.33484162895928, "grad_norm": 2.10040283203125, "learning_rate": 2.422471115122931e-05, "loss": 0.2464, "step": 84350 }, { "epoch": 76.35746606334841, "grad_norm": 1.4247398376464844, "learning_rate": 2.418113446142042e-05, "loss": 0.3139, "step": 84375 }, { "epoch": 76.38009049773756, "grad_norm": 3.905673027038574, "learning_rate": 2.4137589466930843e-05, "loss": 0.3082, "step": 84400 }, { "epoch": 76.4027149321267, "grad_norm": 2.093348264694214, "learning_rate": 2.4094076194919193e-05, "loss": 0.2518, "step": 84425 }, { "epoch": 76.42533936651584, "grad_norm": 2.3145039081573486, "learning_rate": 2.4050594672524394e-05, "loss": 0.3754, "step": 84450 }, { "epoch": 76.44796380090497, "grad_norm": 3.8801562786102295, "learning_rate": 2.4007144926865473e-05, "loss": 0.2717, "step": 84475 }, { "epoch": 76.47058823529412, "grad_norm": 2.3647115230560303, "learning_rate": 2.3963726985041743e-05, "loss": 0.3164, "step": 84500 }, { "epoch": 76.49321266968326, "grad_norm": 2.593600273132324, "learning_rate": 2.3920340874132575e-05, "loss": 0.318, "step": 84525 }, { "epoch": 76.5158371040724, "grad_norm": 6.139859199523926, "learning_rate": 2.387698662119759e-05, "loss": 0.3166, "step": 84550 }, { "epoch": 76.53846153846153, "grad_norm": 2.8768985271453857, "learning_rate": 2.383366425327644e-05, "loss": 0.3057, "step": 84575 }, { "epoch": 76.56108597285068, "grad_norm": 1.225097894668579, "learning_rate": 2.3790373797388974e-05, "loss": 0.2983, "step": 84600 }, { "epoch": 76.58371040723982, "grad_norm": 4.126646518707275, "learning_rate": 2.374711528053508e-05, "loss": 0.3626, "step": 84625 }, { "epoch": 76.60633484162896, "grad_norm": 2.515584707260132, "learning_rate": 2.3703888729694766e-05, "loss": 0.3151, "step": 84650 }, { "epoch": 76.6289592760181, "grad_norm": 1.9274005889892578, "learning_rate": 2.366069417182807e-05, "loss": 0.3131, "step": 84675 }, { "epoch": 76.65158371040724, "grad_norm": 2.3965847492218018, "learning_rate": 2.3617531633875107e-05, "loss": 0.283, "step": 84700 }, { "epoch": 76.67420814479638, "grad_norm": 2.397613763809204, "learning_rate": 2.357440114275598e-05, "loss": 0.2835, "step": 84725 }, { "epoch": 76.69683257918552, "grad_norm": 2.0233254432678223, "learning_rate": 2.3531302725370872e-05, "loss": 0.3035, "step": 84750 }, { "epoch": 76.71945701357465, "grad_norm": 3.270297050476074, "learning_rate": 2.3488236408599876e-05, "loss": 0.2768, "step": 84775 }, { "epoch": 76.7420814479638, "grad_norm": 2.188873291015625, "learning_rate": 2.3445202219303124e-05, "loss": 0.3151, "step": 84800 }, { "epoch": 76.76470588235294, "grad_norm": 1.799863338470459, "learning_rate": 2.3402200184320726e-05, "loss": 0.387, "step": 84825 }, { "epoch": 76.78733031674209, "grad_norm": 1.9361035823822021, "learning_rate": 2.3359230330472663e-05, "loss": 0.272, "step": 84850 }, { "epoch": 76.80995475113122, "grad_norm": 3.911672592163086, "learning_rate": 2.3316292684558923e-05, "loss": 0.3238, "step": 84875 }, { "epoch": 76.83257918552036, "grad_norm": 2.8085107803344727, "learning_rate": 2.3273387273359336e-05, "loss": 0.3517, "step": 84900 }, { "epoch": 76.8552036199095, "grad_norm": 2.42825984954834, "learning_rate": 2.323051412363371e-05, "loss": 0.2764, "step": 84925 }, { "epoch": 76.87782805429865, "grad_norm": 3.0773284435272217, "learning_rate": 2.3187673262121634e-05, "loss": 0.2522, "step": 84950 }, { "epoch": 76.90045248868778, "grad_norm": 2.8645083904266357, "learning_rate": 2.3144864715542653e-05, "loss": 0.2687, "step": 84975 }, { "epoch": 76.92307692307692, "grad_norm": 4.084969520568848, "learning_rate": 2.3102088510596076e-05, "loss": 0.2953, "step": 85000 }, { "epoch": 76.94570135746606, "grad_norm": 1.894515872001648, "learning_rate": 2.3059344673961123e-05, "loss": 0.2792, "step": 85025 }, { "epoch": 76.96832579185521, "grad_norm": 3.177055835723877, "learning_rate": 2.3016633232296725e-05, "loss": 0.2772, "step": 85050 }, { "epoch": 76.99095022624434, "grad_norm": 1.561269760131836, "learning_rate": 2.297395421224173e-05, "loss": 0.2439, "step": 85075 }, { "epoch": 77.01357466063348, "grad_norm": 1.7832810878753662, "learning_rate": 2.2931307640414653e-05, "loss": 0.2548, "step": 85100 }, { "epoch": 77.03619909502262, "grad_norm": 4.043969631195068, "learning_rate": 2.2888693543413853e-05, "loss": 0.3584, "step": 85125 }, { "epoch": 77.05882352941177, "grad_norm": 2.9569408893585205, "learning_rate": 2.284611194781736e-05, "loss": 0.2941, "step": 85150 }, { "epoch": 77.08144796380091, "grad_norm": 2.0861144065856934, "learning_rate": 2.2803562880183027e-05, "loss": 0.3211, "step": 85175 }, { "epoch": 77.10407239819004, "grad_norm": 1.199874997138977, "learning_rate": 2.2761046367048314e-05, "loss": 0.2533, "step": 85200 }, { "epoch": 77.12669683257919, "grad_norm": 2.356982469558716, "learning_rate": 2.2718562434930475e-05, "loss": 0.2732, "step": 85225 }, { "epoch": 77.14932126696833, "grad_norm": 2.066962480545044, "learning_rate": 2.2676111110326354e-05, "loss": 0.2119, "step": 85250 }, { "epoch": 77.17194570135747, "grad_norm": 3.157137155532837, "learning_rate": 2.263369241971254e-05, "loss": 0.2906, "step": 85275 }, { "epoch": 77.1945701357466, "grad_norm": 1.8582885265350342, "learning_rate": 2.25913063895452e-05, "loss": 0.3255, "step": 85300 }, { "epoch": 77.21719457013575, "grad_norm": 2.554067611694336, "learning_rate": 2.25489530462602e-05, "loss": 0.2571, "step": 85325 }, { "epoch": 77.23981900452489, "grad_norm": 2.0254104137420654, "learning_rate": 2.2506632416272932e-05, "loss": 0.2846, "step": 85350 }, { "epoch": 77.26244343891403, "grad_norm": 2.0758092403411865, "learning_rate": 2.2464344525978463e-05, "loss": 0.3366, "step": 85375 }, { "epoch": 77.28506787330316, "grad_norm": 2.7301504611968994, "learning_rate": 2.242208940175141e-05, "loss": 0.2517, "step": 85400 }, { "epoch": 77.3076923076923, "grad_norm": 0.9961033463478088, "learning_rate": 2.2379867069945967e-05, "loss": 0.3473, "step": 85425 }, { "epoch": 77.33031674208145, "grad_norm": 1.8666002750396729, "learning_rate": 2.233767755689583e-05, "loss": 0.2344, "step": 85450 }, { "epoch": 77.3529411764706, "grad_norm": 2.4896492958068848, "learning_rate": 2.229552088891431e-05, "loss": 0.2992, "step": 85475 }, { "epoch": 77.37556561085972, "grad_norm": 1.919020175933838, "learning_rate": 2.225339709229412e-05, "loss": 0.248, "step": 85500 }, { "epoch": 77.39819004524887, "grad_norm": 1.4611742496490479, "learning_rate": 2.2211306193307598e-05, "loss": 0.2468, "step": 85525 }, { "epoch": 77.42081447963801, "grad_norm": 2.4415318965911865, "learning_rate": 2.2169248218206444e-05, "loss": 0.2568, "step": 85550 }, { "epoch": 77.44343891402715, "grad_norm": 2.6511993408203125, "learning_rate": 2.2127223193221934e-05, "loss": 0.2942, "step": 85575 }, { "epoch": 77.46606334841628, "grad_norm": 1.9115158319473267, "learning_rate": 2.2085231144564687e-05, "loss": 0.2828, "step": 85600 }, { "epoch": 77.48868778280543, "grad_norm": 1.9452518224716187, "learning_rate": 2.2043272098424855e-05, "loss": 0.2795, "step": 85625 }, { "epoch": 77.51131221719457, "grad_norm": 1.4088445901870728, "learning_rate": 2.200134608097192e-05, "loss": 0.346, "step": 85650 }, { "epoch": 77.53393665158372, "grad_norm": 2.5477261543273926, "learning_rate": 2.1959453118354833e-05, "loss": 0.254, "step": 85675 }, { "epoch": 77.55656108597285, "grad_norm": 2.778261423110962, "learning_rate": 2.1917593236701866e-05, "loss": 0.2644, "step": 85700 }, { "epoch": 77.57918552036199, "grad_norm": 2.218679189682007, "learning_rate": 2.1875766462120734e-05, "loss": 0.3112, "step": 85725 }, { "epoch": 77.60180995475113, "grad_norm": 1.5461026430130005, "learning_rate": 2.1833972820698417e-05, "loss": 0.3407, "step": 85750 }, { "epoch": 77.62443438914028, "grad_norm": 2.2722394466400146, "learning_rate": 2.1792212338501316e-05, "loss": 0.2668, "step": 85775 }, { "epoch": 77.6470588235294, "grad_norm": 1.2408086061477661, "learning_rate": 2.1750485041575064e-05, "loss": 0.3041, "step": 85800 }, { "epoch": 77.66968325791855, "grad_norm": 2.132417678833008, "learning_rate": 2.1708790955944698e-05, "loss": 0.2479, "step": 85825 }, { "epoch": 77.6923076923077, "grad_norm": 1.9469751119613647, "learning_rate": 2.166713010761442e-05, "loss": 0.3126, "step": 85850 }, { "epoch": 77.71493212669684, "grad_norm": 0.7266249060630798, "learning_rate": 2.162550252256784e-05, "loss": 0.2829, "step": 85875 }, { "epoch": 77.73755656108597, "grad_norm": 2.63224196434021, "learning_rate": 2.1583908226767675e-05, "loss": 0.2298, "step": 85900 }, { "epoch": 77.76018099547511, "grad_norm": 1.7881295680999756, "learning_rate": 2.1542347246156015e-05, "loss": 0.2689, "step": 85925 }, { "epoch": 77.78280542986425, "grad_norm": 1.8670376539230347, "learning_rate": 2.1500819606654065e-05, "loss": 0.2701, "step": 85950 }, { "epoch": 77.8054298642534, "grad_norm": 1.5502375364303589, "learning_rate": 2.145932533416232e-05, "loss": 0.2444, "step": 85975 }, { "epoch": 77.82805429864253, "grad_norm": 4.497215747833252, "learning_rate": 2.1417864454560386e-05, "loss": 0.3827, "step": 86000 }, { "epoch": 77.85067873303167, "grad_norm": 1.7891716957092285, "learning_rate": 2.1376436993707127e-05, "loss": 0.2445, "step": 86025 }, { "epoch": 77.87330316742081, "grad_norm": 1.0871717929840088, "learning_rate": 2.133504297744047e-05, "loss": 0.2506, "step": 86050 }, { "epoch": 77.89592760180996, "grad_norm": 2.207763433456421, "learning_rate": 2.1293682431577578e-05, "loss": 0.2627, "step": 86075 }, { "epoch": 77.91855203619909, "grad_norm": 1.6225380897521973, "learning_rate": 2.125235538191464e-05, "loss": 0.2886, "step": 86100 }, { "epoch": 77.94117647058823, "grad_norm": 2.372548818588257, "learning_rate": 2.1211061854227067e-05, "loss": 0.269, "step": 86125 }, { "epoch": 77.96380090497738, "grad_norm": 3.087933301925659, "learning_rate": 2.1169801874269242e-05, "loss": 0.3022, "step": 86150 }, { "epoch": 77.98642533936652, "grad_norm": 0.7303369641304016, "learning_rate": 2.112857546777473e-05, "loss": 0.2829, "step": 86175 }, { "epoch": 78.00904977375566, "grad_norm": 2.1290323734283447, "learning_rate": 2.1087382660456077e-05, "loss": 0.2893, "step": 86200 }, { "epoch": 78.03167420814479, "grad_norm": 2.1010098457336426, "learning_rate": 2.1046223478004936e-05, "loss": 0.2467, "step": 86225 }, { "epoch": 78.05429864253394, "grad_norm": 1.8762445449829102, "learning_rate": 2.100674232095631e-05, "loss": 0.2446, "step": 86250 }, { "epoch": 78.07692307692308, "grad_norm": 1.819108009338379, "learning_rate": 2.0965649117691385e-05, "loss": 0.3258, "step": 86275 }, { "epoch": 78.09954751131222, "grad_norm": 1.1745537519454956, "learning_rate": 2.0924589615218227e-05, "loss": 0.2444, "step": 86300 }, { "epoch": 78.12217194570135, "grad_norm": 2.1086909770965576, "learning_rate": 2.0883563839145248e-05, "loss": 0.2723, "step": 86325 }, { "epoch": 78.1447963800905, "grad_norm": 1.8595943450927734, "learning_rate": 2.0842571815059967e-05, "loss": 0.3271, "step": 86350 }, { "epoch": 78.16742081447964, "grad_norm": 2.4486351013183594, "learning_rate": 2.080161356852872e-05, "loss": 0.2213, "step": 86375 }, { "epoch": 78.19004524886878, "grad_norm": 1.9297285079956055, "learning_rate": 2.076068912509688e-05, "loss": 0.3219, "step": 86400 }, { "epoch": 78.21266968325791, "grad_norm": 2.3453269004821777, "learning_rate": 2.0719798510288666e-05, "loss": 0.2324, "step": 86425 }, { "epoch": 78.23529411764706, "grad_norm": 2.9171552658081055, "learning_rate": 2.0678941749607253e-05, "loss": 0.2939, "step": 86450 }, { "epoch": 78.2579185520362, "grad_norm": 1.9287010431289673, "learning_rate": 2.0638118868534642e-05, "loss": 0.2399, "step": 86475 }, { "epoch": 78.28054298642535, "grad_norm": 2.80836820602417, "learning_rate": 2.059732989253175e-05, "loss": 0.2329, "step": 86500 }, { "epoch": 78.30316742081448, "grad_norm": 0.6510598063468933, "learning_rate": 2.055657484703837e-05, "loss": 0.2462, "step": 86525 }, { "epoch": 78.32579185520362, "grad_norm": 1.468997597694397, "learning_rate": 2.0515853757473046e-05, "loss": 0.237, "step": 86550 }, { "epoch": 78.34841628959276, "grad_norm": 2.5640623569488525, "learning_rate": 2.0475166649233238e-05, "loss": 0.234, "step": 86575 }, { "epoch": 78.3710407239819, "grad_norm": 1.8810259103775024, "learning_rate": 2.0434513547695123e-05, "loss": 0.3246, "step": 86600 }, { "epoch": 78.39366515837104, "grad_norm": 1.4488884210586548, "learning_rate": 2.0393894478213767e-05, "loss": 0.3463, "step": 86625 }, { "epoch": 78.41628959276018, "grad_norm": 2.9062445163726807, "learning_rate": 2.0353309466122903e-05, "loss": 0.2456, "step": 86650 }, { "epoch": 78.43891402714932, "grad_norm": 1.876059889793396, "learning_rate": 2.0312758536735137e-05, "loss": 0.2249, "step": 86675 }, { "epoch": 78.46153846153847, "grad_norm": 1.7976325750350952, "learning_rate": 2.0272241715341704e-05, "loss": 0.2466, "step": 86700 }, { "epoch": 78.4841628959276, "grad_norm": 1.2220265865325928, "learning_rate": 2.0231759027212657e-05, "loss": 0.2529, "step": 86725 }, { "epoch": 78.50678733031674, "grad_norm": 1.8671175241470337, "learning_rate": 2.0191310497596696e-05, "loss": 0.231, "step": 86750 }, { "epoch": 78.52941176470588, "grad_norm": 1.1508766412734985, "learning_rate": 2.015089615172129e-05, "loss": 0.2309, "step": 86775 }, { "epoch": 78.55203619909503, "grad_norm": 2.886613130569458, "learning_rate": 2.0110516014792486e-05, "loss": 0.2588, "step": 86800 }, { "epoch": 78.57466063348416, "grad_norm": 1.6978250741958618, "learning_rate": 2.00701701119951e-05, "loss": 0.2535, "step": 86825 }, { "epoch": 78.5972850678733, "grad_norm": 5.23097562789917, "learning_rate": 2.002985846849251e-05, "loss": 0.4008, "step": 86850 }, { "epoch": 78.61990950226244, "grad_norm": 1.906355619430542, "learning_rate": 1.9989581109426808e-05, "loss": 0.3319, "step": 86875 }, { "epoch": 78.64253393665159, "grad_norm": 1.8507574796676636, "learning_rate": 1.994933805991861e-05, "loss": 0.4066, "step": 86900 }, { "epoch": 78.66515837104072, "grad_norm": 2.7209722995758057, "learning_rate": 1.9909129345067228e-05, "loss": 0.2455, "step": 86925 }, { "epoch": 78.68778280542986, "grad_norm": 1.5386555194854736, "learning_rate": 1.9868954989950472e-05, "loss": 0.2444, "step": 86950 }, { "epoch": 78.710407239819, "grad_norm": 1.6125478744506836, "learning_rate": 1.982881501962482e-05, "loss": 0.3259, "step": 86975 }, { "epoch": 78.73303167420815, "grad_norm": 5.142017364501953, "learning_rate": 1.9788709459125194e-05, "loss": 0.3714, "step": 87000 }, { "epoch": 78.75565610859728, "grad_norm": 2.0047316551208496, "learning_rate": 1.974863833346515e-05, "loss": 0.2944, "step": 87025 }, { "epoch": 78.77828054298642, "grad_norm": 2.330984354019165, "learning_rate": 1.97086016676367e-05, "loss": 0.2309, "step": 87050 }, { "epoch": 78.80090497737557, "grad_norm": 1.5318078994750977, "learning_rate": 1.9668599486610427e-05, "loss": 0.2182, "step": 87075 }, { "epoch": 78.82352941176471, "grad_norm": 2.0248022079467773, "learning_rate": 1.962863181533533e-05, "loss": 0.2288, "step": 87100 }, { "epoch": 78.84615384615384, "grad_norm": 2.2620387077331543, "learning_rate": 1.958869867873897e-05, "loss": 0.3758, "step": 87125 }, { "epoch": 78.86877828054298, "grad_norm": 1.064523458480835, "learning_rate": 1.954880010172727e-05, "loss": 0.2542, "step": 87150 }, { "epoch": 78.89140271493213, "grad_norm": 1.792374610900879, "learning_rate": 1.9508936109184713e-05, "loss": 0.2257, "step": 87175 }, { "epoch": 78.91402714932127, "grad_norm": 2.4855358600616455, "learning_rate": 1.9469106725974086e-05, "loss": 0.2449, "step": 87200 }, { "epoch": 78.9366515837104, "grad_norm": 1.8008933067321777, "learning_rate": 1.9429311976936726e-05, "loss": 0.2285, "step": 87225 }, { "epoch": 78.95927601809954, "grad_norm": 1.6819531917572021, "learning_rate": 1.938955188689224e-05, "loss": 0.3144, "step": 87250 }, { "epoch": 78.98190045248869, "grad_norm": 0.9282886981964111, "learning_rate": 1.934982648063872e-05, "loss": 0.2331, "step": 87275 }, { "epoch": 79.00452488687783, "grad_norm": 0.7602487802505493, "learning_rate": 1.9310135782952542e-05, "loss": 0.3145, "step": 87300 }, { "epoch": 79.02714932126698, "grad_norm": 0.8442087769508362, "learning_rate": 1.9270479818588528e-05, "loss": 0.2277, "step": 87325 }, { "epoch": 79.0497737556561, "grad_norm": 1.41585111618042, "learning_rate": 1.9230858612279735e-05, "loss": 0.3091, "step": 87350 }, { "epoch": 79.07239819004525, "grad_norm": 1.8606550693511963, "learning_rate": 1.919127218873763e-05, "loss": 0.2291, "step": 87375 }, { "epoch": 79.09502262443439, "grad_norm": 1.2281155586242676, "learning_rate": 1.9151720572651914e-05, "loss": 0.2361, "step": 87400 }, { "epoch": 79.11764705882354, "grad_norm": 2.0486700534820557, "learning_rate": 1.9112203788690654e-05, "loss": 0.2459, "step": 87425 }, { "epoch": 79.14027149321267, "grad_norm": 1.0638536214828491, "learning_rate": 1.907272186150011e-05, "loss": 0.2291, "step": 87450 }, { "epoch": 79.16289592760181, "grad_norm": 1.740169644355774, "learning_rate": 1.903327481570488e-05, "loss": 0.2477, "step": 87475 }, { "epoch": 79.18552036199095, "grad_norm": 1.0604002475738525, "learning_rate": 1.8993862675907725e-05, "loss": 0.2555, "step": 87500 }, { "epoch": 79.2081447963801, "grad_norm": 2.515486717224121, "learning_rate": 1.895448546668974e-05, "loss": 0.2041, "step": 87525 }, { "epoch": 79.23076923076923, "grad_norm": 2.8553245067596436, "learning_rate": 1.8915143212610117e-05, "loss": 0.2326, "step": 87550 }, { "epoch": 79.25339366515837, "grad_norm": 2.4957714080810547, "learning_rate": 1.8875835938206332e-05, "loss": 0.2342, "step": 87575 }, { "epoch": 79.27601809954751, "grad_norm": 4.493040084838867, "learning_rate": 1.8836563667994042e-05, "loss": 0.317, "step": 87600 }, { "epoch": 79.29864253393666, "grad_norm": 2.3935322761535645, "learning_rate": 1.8797326426466996e-05, "loss": 0.3032, "step": 87625 }, { "epoch": 79.32126696832579, "grad_norm": 2.9496877193450928, "learning_rate": 1.8758124238097202e-05, "loss": 0.338, "step": 87650 }, { "epoch": 79.34389140271493, "grad_norm": 1.6749485731124878, "learning_rate": 1.8718957127334694e-05, "loss": 0.2714, "step": 87675 }, { "epoch": 79.36651583710407, "grad_norm": 1.2983139753341675, "learning_rate": 1.8679825118607732e-05, "loss": 0.2615, "step": 87700 }, { "epoch": 79.38914027149322, "grad_norm": 2.107964038848877, "learning_rate": 1.8640728236322598e-05, "loss": 0.2624, "step": 87725 }, { "epoch": 79.41176470588235, "grad_norm": 1.2432149648666382, "learning_rate": 1.8601666504863748e-05, "loss": 0.2805, "step": 87750 }, { "epoch": 79.43438914027149, "grad_norm": 1.972983956336975, "learning_rate": 1.8562639948593625e-05, "loss": 0.3174, "step": 87775 }, { "epoch": 79.45701357466064, "grad_norm": 1.1237883567810059, "learning_rate": 1.8523648591852818e-05, "loss": 0.2342, "step": 87800 }, { "epoch": 79.47963800904978, "grad_norm": 4.015199184417725, "learning_rate": 1.8484692458959884e-05, "loss": 0.2771, "step": 87825 }, { "epoch": 79.50226244343891, "grad_norm": 1.4740439653396606, "learning_rate": 1.8445771574211478e-05, "loss": 0.2238, "step": 87850 }, { "epoch": 79.52488687782805, "grad_norm": 1.226570963859558, "learning_rate": 1.840688596188226e-05, "loss": 0.2233, "step": 87875 }, { "epoch": 79.5475113122172, "grad_norm": 5.07127046585083, "learning_rate": 1.8368035646224834e-05, "loss": 0.2866, "step": 87900 }, { "epoch": 79.57013574660634, "grad_norm": 2.449209451675415, "learning_rate": 1.8329220651469874e-05, "loss": 0.3408, "step": 87925 }, { "epoch": 79.59276018099547, "grad_norm": 0.795286238193512, "learning_rate": 1.829044100182593e-05, "loss": 0.2508, "step": 87950 }, { "epoch": 79.61538461538461, "grad_norm": 0.9944939613342285, "learning_rate": 1.825169672147962e-05, "loss": 0.2122, "step": 87975 }, { "epoch": 79.63800904977376, "grad_norm": 1.448467493057251, "learning_rate": 1.8212987834595377e-05, "loss": 0.2648, "step": 88000 }, { "epoch": 79.6606334841629, "grad_norm": 2.312659978866577, "learning_rate": 1.8174314365315683e-05, "loss": 0.2904, "step": 88025 }, { "epoch": 79.68325791855203, "grad_norm": 1.334312081336975, "learning_rate": 1.8135676337760823e-05, "loss": 0.3041, "step": 88050 }, { "epoch": 79.70588235294117, "grad_norm": 1.571508765220642, "learning_rate": 1.8097073776029038e-05, "loss": 0.2758, "step": 88075 }, { "epoch": 79.72850678733032, "grad_norm": 2.2298097610473633, "learning_rate": 1.805850670419647e-05, "loss": 0.276, "step": 88100 }, { "epoch": 79.75113122171946, "grad_norm": 2.40057635307312, "learning_rate": 1.8019975146317042e-05, "loss": 0.3084, "step": 88125 }, { "epoch": 79.77375565610859, "grad_norm": 4.272204399108887, "learning_rate": 1.7981479126422625e-05, "loss": 0.2568, "step": 88150 }, { "epoch": 79.79638009049773, "grad_norm": 5.12802267074585, "learning_rate": 1.7943018668522834e-05, "loss": 0.3572, "step": 88175 }, { "epoch": 79.81900452488688, "grad_norm": 2.5201570987701416, "learning_rate": 1.7904593796605195e-05, "loss": 0.2335, "step": 88200 }, { "epoch": 79.84162895927602, "grad_norm": 1.8251371383666992, "learning_rate": 1.786620453463494e-05, "loss": 0.2235, "step": 88225 }, { "epoch": 79.86425339366515, "grad_norm": 1.3936398029327393, "learning_rate": 1.7827850906555195e-05, "loss": 0.2314, "step": 88250 }, { "epoch": 79.8868778280543, "grad_norm": 2.0077548027038574, "learning_rate": 1.7789532936286775e-05, "loss": 0.2696, "step": 88275 }, { "epoch": 79.90950226244344, "grad_norm": 1.215086817741394, "learning_rate": 1.7751250647728314e-05, "loss": 0.2898, "step": 88300 }, { "epoch": 79.93212669683258, "grad_norm": 0.969950795173645, "learning_rate": 1.771300406475614e-05, "loss": 0.2935, "step": 88325 }, { "epoch": 79.95475113122171, "grad_norm": 1.9991028308868408, "learning_rate": 1.7674793211224382e-05, "loss": 0.2279, "step": 88350 }, { "epoch": 79.97737556561086, "grad_norm": 3.2695038318634033, "learning_rate": 1.7636618110964798e-05, "loss": 0.2357, "step": 88375 }, { "epoch": 80.0, "grad_norm": 1.568069577217102, "learning_rate": 1.759847878778693e-05, "loss": 0.3049, "step": 88400 }, { "epoch": 80.02262443438914, "grad_norm": 2.0564796924591064, "learning_rate": 1.7560375265477937e-05, "loss": 0.266, "step": 88425 }, { "epoch": 80.04524886877829, "grad_norm": 1.5851367712020874, "learning_rate": 1.7522307567802714e-05, "loss": 0.2405, "step": 88450 }, { "epoch": 80.06787330316742, "grad_norm": 3.8050899505615234, "learning_rate": 1.7484275718503744e-05, "loss": 0.3095, "step": 88475 }, { "epoch": 80.09049773755656, "grad_norm": 2.199955940246582, "learning_rate": 1.744627974130122e-05, "loss": 0.2136, "step": 88500 }, { "epoch": 80.1131221719457, "grad_norm": 3.5844807624816895, "learning_rate": 1.7408319659892896e-05, "loss": 0.3086, "step": 88525 }, { "epoch": 80.13574660633485, "grad_norm": 1.3600096702575684, "learning_rate": 1.7370395497954205e-05, "loss": 0.252, "step": 88550 }, { "epoch": 80.15837104072398, "grad_norm": 1.3867491483688354, "learning_rate": 1.7332507279138105e-05, "loss": 0.2444, "step": 88575 }, { "epoch": 80.18099547511312, "grad_norm": 1.8291349411010742, "learning_rate": 1.7294655027075207e-05, "loss": 0.3002, "step": 88600 }, { "epoch": 80.20361990950227, "grad_norm": 2.0560073852539062, "learning_rate": 1.7256838765373636e-05, "loss": 0.3013, "step": 88625 }, { "epoch": 80.22624434389141, "grad_norm": 1.6388047933578491, "learning_rate": 1.721905851761911e-05, "loss": 0.2177, "step": 88650 }, { "epoch": 80.24886877828054, "grad_norm": 2.622504711151123, "learning_rate": 1.7181314307374846e-05, "loss": 0.2814, "step": 88675 }, { "epoch": 80.27149321266968, "grad_norm": 2.2391486167907715, "learning_rate": 1.7143606158181637e-05, "loss": 0.2466, "step": 88700 }, { "epoch": 80.29411764705883, "grad_norm": 1.8025130033493042, "learning_rate": 1.7105934093557708e-05, "loss": 0.2138, "step": 88725 }, { "epoch": 80.31674208144797, "grad_norm": 1.6778080463409424, "learning_rate": 1.7068298136998867e-05, "loss": 0.2396, "step": 88750 }, { "epoch": 80.3393665158371, "grad_norm": 1.2616379261016846, "learning_rate": 1.7030698311978322e-05, "loss": 0.2711, "step": 88775 }, { "epoch": 80.36199095022624, "grad_norm": 1.9824016094207764, "learning_rate": 1.699313464194682e-05, "loss": 0.2512, "step": 88800 }, { "epoch": 80.38461538461539, "grad_norm": 4.605892181396484, "learning_rate": 1.6955607150332488e-05, "loss": 0.325, "step": 88825 }, { "epoch": 80.40723981900453, "grad_norm": 1.984059453010559, "learning_rate": 1.691811586054095e-05, "loss": 0.2753, "step": 88850 }, { "epoch": 80.42986425339366, "grad_norm": 1.7115203142166138, "learning_rate": 1.6880660795955193e-05, "loss": 0.233, "step": 88875 }, { "epoch": 80.4524886877828, "grad_norm": 1.6921839714050293, "learning_rate": 1.6843241979935677e-05, "loss": 0.257, "step": 88900 }, { "epoch": 80.47511312217195, "grad_norm": 1.532244086265564, "learning_rate": 1.6805859435820175e-05, "loss": 0.2257, "step": 88925 }, { "epoch": 80.49773755656109, "grad_norm": 1.4685120582580566, "learning_rate": 1.6768513186923918e-05, "loss": 0.2219, "step": 88950 }, { "epoch": 80.52036199095022, "grad_norm": 2.3729798793792725, "learning_rate": 1.6731203256539437e-05, "loss": 0.2435, "step": 88975 }, { "epoch": 80.54298642533936, "grad_norm": 1.9455311298370361, "learning_rate": 1.6693929667936662e-05, "loss": 0.3169, "step": 89000 }, { "epoch": 80.56561085972851, "grad_norm": 4.638451099395752, "learning_rate": 1.6656692444362792e-05, "loss": 0.3016, "step": 89025 }, { "epoch": 80.58823529411765, "grad_norm": 2.428246259689331, "learning_rate": 1.6619491609042433e-05, "loss": 0.2505, "step": 89050 }, { "epoch": 80.61085972850678, "grad_norm": 1.4879069328308105, "learning_rate": 1.658232718517741e-05, "loss": 0.2251, "step": 89075 }, { "epoch": 80.63348416289593, "grad_norm": 2.2105343341827393, "learning_rate": 1.6545199195946903e-05, "loss": 0.2347, "step": 89100 }, { "epoch": 80.65610859728507, "grad_norm": 2.0314412117004395, "learning_rate": 1.650810766450731e-05, "loss": 0.2961, "step": 89125 }, { "epoch": 80.67873303167421, "grad_norm": 1.6765267848968506, "learning_rate": 1.6471052613992345e-05, "loss": 0.2472, "step": 89150 }, { "epoch": 80.70135746606334, "grad_norm": 1.2480494976043701, "learning_rate": 1.643403406751296e-05, "loss": 0.3061, "step": 89175 }, { "epoch": 80.72398190045249, "grad_norm": 1.6919770240783691, "learning_rate": 1.6397052048157287e-05, "loss": 0.2119, "step": 89200 }, { "epoch": 80.74660633484163, "grad_norm": 2.1331253051757812, "learning_rate": 1.6360106578990753e-05, "loss": 0.2444, "step": 89225 }, { "epoch": 80.76923076923077, "grad_norm": 0.9414656162261963, "learning_rate": 1.632319768305592e-05, "loss": 0.2422, "step": 89250 }, { "epoch": 80.7918552036199, "grad_norm": 1.2934455871582031, "learning_rate": 1.6286325383372606e-05, "loss": 0.2695, "step": 89275 }, { "epoch": 80.81447963800905, "grad_norm": 1.551058292388916, "learning_rate": 1.624948970293772e-05, "loss": 0.2508, "step": 89300 }, { "epoch": 80.83710407239819, "grad_norm": 1.4792729616165161, "learning_rate": 1.6212690664725437e-05, "loss": 0.226, "step": 89325 }, { "epoch": 80.85972850678733, "grad_norm": 1.9807225465774536, "learning_rate": 1.6175928291686968e-05, "loss": 0.2861, "step": 89350 }, { "epoch": 80.88235294117646, "grad_norm": 2.2029078006744385, "learning_rate": 1.6139202606750756e-05, "loss": 0.2617, "step": 89375 }, { "epoch": 80.90497737556561, "grad_norm": 0.8517450094223022, "learning_rate": 1.6102513632822285e-05, "loss": 0.2689, "step": 89400 }, { "epoch": 80.92760180995475, "grad_norm": 1.0860334634780884, "learning_rate": 1.6065861392784195e-05, "loss": 0.2503, "step": 89425 }, { "epoch": 80.9502262443439, "grad_norm": 2.202310562133789, "learning_rate": 1.6029245909496174e-05, "loss": 0.2197, "step": 89450 }, { "epoch": 80.97285067873302, "grad_norm": 2.39774751663208, "learning_rate": 1.5992667205795037e-05, "loss": 0.2722, "step": 89475 }, { "epoch": 80.99547511312217, "grad_norm": 1.3892974853515625, "learning_rate": 1.5956125304494585e-05, "loss": 0.2617, "step": 89500 }, { "epoch": 81.01809954751131, "grad_norm": 1.5049279928207397, "learning_rate": 1.5919620228385755e-05, "loss": 0.2354, "step": 89525 }, { "epoch": 81.04072398190046, "grad_norm": 3.5445001125335693, "learning_rate": 1.5883152000236423e-05, "loss": 0.2787, "step": 89550 }, { "epoch": 81.0633484162896, "grad_norm": 1.3553639650344849, "learning_rate": 1.5846720642791582e-05, "loss": 0.1943, "step": 89575 }, { "epoch": 81.08597285067873, "grad_norm": 4.648387432098389, "learning_rate": 1.5810326178773132e-05, "loss": 0.2349, "step": 89600 }, { "epoch": 81.10859728506787, "grad_norm": 1.2038744688034058, "learning_rate": 1.5773968630880044e-05, "loss": 0.2052, "step": 89625 }, { "epoch": 81.13122171945702, "grad_norm": 1.6837557554244995, "learning_rate": 1.573764802178819e-05, "loss": 0.2153, "step": 89650 }, { "epoch": 81.15384615384616, "grad_norm": 1.333583116531372, "learning_rate": 1.5701364374150492e-05, "loss": 0.2624, "step": 89675 }, { "epoch": 81.17647058823529, "grad_norm": 3.7715420722961426, "learning_rate": 1.5665117710596726e-05, "loss": 0.2495, "step": 89700 }, { "epoch": 81.19909502262443, "grad_norm": 0.7759903073310852, "learning_rate": 1.5628908053733684e-05, "loss": 0.2014, "step": 89725 }, { "epoch": 81.22171945701358, "grad_norm": 1.1528794765472412, "learning_rate": 1.5592735426145e-05, "loss": 0.2359, "step": 89750 }, { "epoch": 81.24434389140272, "grad_norm": 2.053586483001709, "learning_rate": 1.555659985039129e-05, "loss": 0.2627, "step": 89775 }, { "epoch": 81.26696832579185, "grad_norm": 1.9309684038162231, "learning_rate": 1.552050134900998e-05, "loss": 0.2415, "step": 89800 }, { "epoch": 81.289592760181, "grad_norm": 1.7584835290908813, "learning_rate": 1.5484439944515462e-05, "loss": 0.321, "step": 89825 }, { "epoch": 81.31221719457014, "grad_norm": 1.91073739528656, "learning_rate": 1.5448415659398907e-05, "loss": 0.2044, "step": 89850 }, { "epoch": 81.33484162895928, "grad_norm": 2.160158395767212, "learning_rate": 1.541242851612841e-05, "loss": 0.2069, "step": 89875 }, { "epoch": 81.35746606334841, "grad_norm": 2.3774430751800537, "learning_rate": 1.5376478537148817e-05, "loss": 0.2548, "step": 89900 }, { "epoch": 81.38009049773756, "grad_norm": 1.7145510911941528, "learning_rate": 1.53405657448819e-05, "loss": 0.2984, "step": 89925 }, { "epoch": 81.4027149321267, "grad_norm": 1.8368686437606812, "learning_rate": 1.5304690161726117e-05, "loss": 0.2372, "step": 89950 }, { "epoch": 81.42533936651584, "grad_norm": 1.3777281045913696, "learning_rate": 1.5268851810056833e-05, "loss": 0.2717, "step": 89975 }, { "epoch": 81.44796380090497, "grad_norm": 2.33797025680542, "learning_rate": 1.5233050712226108e-05, "loss": 0.296, "step": 90000 }, { "epoch": 81.47058823529412, "grad_norm": 1.3664602041244507, "learning_rate": 1.5197286890562827e-05, "loss": 0.2516, "step": 90025 }, { "epoch": 81.49321266968326, "grad_norm": 1.3721119165420532, "learning_rate": 1.5161560367372571e-05, "loss": 0.2688, "step": 90050 }, { "epoch": 81.5158371040724, "grad_norm": 1.7181849479675293, "learning_rate": 1.5125871164937719e-05, "loss": 0.2863, "step": 90075 }, { "epoch": 81.53846153846153, "grad_norm": 1.4487202167510986, "learning_rate": 1.5090219305517298e-05, "loss": 0.2202, "step": 90100 }, { "epoch": 81.56108597285068, "grad_norm": 1.9481704235076904, "learning_rate": 1.505460481134713e-05, "loss": 0.2709, "step": 90125 }, { "epoch": 81.58371040723982, "grad_norm": 1.483481526374817, "learning_rate": 1.5019027704639652e-05, "loss": 0.2657, "step": 90150 }, { "epoch": 81.60633484162896, "grad_norm": 0.8282179832458496, "learning_rate": 1.498348800758406e-05, "loss": 0.2389, "step": 90175 }, { "epoch": 81.6289592760181, "grad_norm": 2.205792188644409, "learning_rate": 1.494798574234613e-05, "loss": 0.2383, "step": 90200 }, { "epoch": 81.65158371040724, "grad_norm": 1.9597299098968506, "learning_rate": 1.4912520931068375e-05, "loss": 0.2259, "step": 90225 }, { "epoch": 81.67420814479638, "grad_norm": 1.1000136137008667, "learning_rate": 1.4877093595869927e-05, "loss": 0.2828, "step": 90250 }, { "epoch": 81.69683257918552, "grad_norm": 1.8393522500991821, "learning_rate": 1.4841703758846484e-05, "loss": 0.2638, "step": 90275 }, { "epoch": 81.71945701357465, "grad_norm": 1.8651313781738281, "learning_rate": 1.4806351442070453e-05, "loss": 0.1863, "step": 90300 }, { "epoch": 81.7420814479638, "grad_norm": 1.395439863204956, "learning_rate": 1.4771036667590749e-05, "loss": 0.2088, "step": 90325 }, { "epoch": 81.76470588235294, "grad_norm": 0.7056885361671448, "learning_rate": 1.473575945743295e-05, "loss": 0.2418, "step": 90350 }, { "epoch": 81.78733031674209, "grad_norm": 1.6459146738052368, "learning_rate": 1.4700519833599136e-05, "loss": 0.2634, "step": 90375 }, { "epoch": 81.80995475113122, "grad_norm": 2.4437522888183594, "learning_rate": 1.4665317818068012e-05, "loss": 0.2991, "step": 90400 }, { "epoch": 81.83257918552036, "grad_norm": 2.822815418243408, "learning_rate": 1.4631559285429537e-05, "loss": 0.327, "step": 90425 }, { "epoch": 81.8552036199095, "grad_norm": 2.050520658493042, "learning_rate": 1.4596431045837553e-05, "loss": 0.215, "step": 90450 }, { "epoch": 81.87782805429865, "grad_norm": 1.3490715026855469, "learning_rate": 1.4561340479467562e-05, "loss": 0.2292, "step": 90475 }, { "epoch": 81.90045248868778, "grad_norm": 2.234866142272949, "learning_rate": 1.4526287608205314e-05, "loss": 0.2813, "step": 90500 }, { "epoch": 81.92307692307692, "grad_norm": 1.5313230752944946, "learning_rate": 1.4491272453912964e-05, "loss": 0.2411, "step": 90525 }, { "epoch": 81.94570135746606, "grad_norm": 1.1357070207595825, "learning_rate": 1.4456295038429216e-05, "loss": 0.2521, "step": 90550 }, { "epoch": 81.96832579185521, "grad_norm": 1.6652240753173828, "learning_rate": 1.4421355383569172e-05, "loss": 0.3507, "step": 90575 }, { "epoch": 81.99095022624434, "grad_norm": 0.9113162159919739, "learning_rate": 1.438645351112444e-05, "loss": 0.262, "step": 90600 }, { "epoch": 82.01357466063348, "grad_norm": 1.4021083116531372, "learning_rate": 1.4351589442863018e-05, "loss": 0.2673, "step": 90625 }, { "epoch": 82.03619909502262, "grad_norm": 1.5596468448638916, "learning_rate": 1.4316763200529377e-05, "loss": 0.2682, "step": 90650 }, { "epoch": 82.05882352941177, "grad_norm": 1.2058978080749512, "learning_rate": 1.428336561468464e-05, "loss": 0.2488, "step": 90675 }, { "epoch": 82.08144796380091, "grad_norm": 1.6812113523483276, "learning_rate": 1.4248613574155315e-05, "loss": 0.2568, "step": 90700 }, { "epoch": 82.10407239819004, "grad_norm": 2.0603604316711426, "learning_rate": 1.4213899423778998e-05, "loss": 0.2337, "step": 90725 }, { "epoch": 82.12669683257919, "grad_norm": 1.0920335054397583, "learning_rate": 1.4179223185206579e-05, "loss": 0.1952, "step": 90750 }, { "epoch": 82.14932126696833, "grad_norm": 1.583495020866394, "learning_rate": 1.4144584880065395e-05, "loss": 0.2322, "step": 90775 }, { "epoch": 82.17194570135747, "grad_norm": 1.7024261951446533, "learning_rate": 1.4109984529959045e-05, "loss": 0.1983, "step": 90800 }, { "epoch": 82.1945701357466, "grad_norm": 1.6076480150222778, "learning_rate": 1.4075422156467522e-05, "loss": 0.2192, "step": 90825 }, { "epoch": 82.21719457013575, "grad_norm": 2.3282856941223145, "learning_rate": 1.4040897781147067e-05, "loss": 0.277, "step": 90850 }, { "epoch": 82.23981900452489, "grad_norm": 1.092956304550171, "learning_rate": 1.400641142553029e-05, "loss": 0.2378, "step": 90875 }, { "epoch": 82.26244343891403, "grad_norm": 1.6435421705245972, "learning_rate": 1.3971963111126025e-05, "loss": 0.2142, "step": 90900 }, { "epoch": 82.28506787330316, "grad_norm": 1.6375032663345337, "learning_rate": 1.3937552859419438e-05, "loss": 0.2553, "step": 90925 }, { "epoch": 82.3076923076923, "grad_norm": 1.4579874277114868, "learning_rate": 1.3903180691871885e-05, "loss": 0.24, "step": 90950 }, { "epoch": 82.33031674208145, "grad_norm": 1.8350778818130493, "learning_rate": 1.3868846629921068e-05, "loss": 0.1939, "step": 90975 }, { "epoch": 82.3529411764706, "grad_norm": 1.1116398572921753, "learning_rate": 1.3834550694980817e-05, "loss": 0.262, "step": 91000 }, { "epoch": 82.37556561085972, "grad_norm": 1.8121416568756104, "learning_rate": 1.3800292908441246e-05, "loss": 0.2183, "step": 91025 }, { "epoch": 82.39819004524887, "grad_norm": 1.6931511163711548, "learning_rate": 1.3766073291668688e-05, "loss": 0.2559, "step": 91050 }, { "epoch": 82.42081447963801, "grad_norm": 1.3180493116378784, "learning_rate": 1.3731891866005615e-05, "loss": 0.2303, "step": 91075 }, { "epoch": 82.44343891402715, "grad_norm": 4.452065944671631, "learning_rate": 1.369774865277072e-05, "loss": 0.2706, "step": 91100 }, { "epoch": 82.46606334841628, "grad_norm": 1.5095728635787964, "learning_rate": 1.3663643673258839e-05, "loss": 0.2605, "step": 91125 }, { "epoch": 82.48868778280543, "grad_norm": 4.638199329376221, "learning_rate": 1.3629576948741006e-05, "loss": 0.2598, "step": 91150 }, { "epoch": 82.51131221719457, "grad_norm": 1.861236572265625, "learning_rate": 1.3595548500464315e-05, "loss": 0.2515, "step": 91175 }, { "epoch": 82.53393665158372, "grad_norm": 1.0149219036102295, "learning_rate": 1.3561558349652091e-05, "loss": 0.1994, "step": 91200 }, { "epoch": 82.55656108597285, "grad_norm": 1.626465916633606, "learning_rate": 1.3527606517503667e-05, "loss": 0.3791, "step": 91225 }, { "epoch": 82.57918552036199, "grad_norm": 1.5020828247070312, "learning_rate": 1.3493693025194572e-05, "loss": 0.2321, "step": 91250 }, { "epoch": 82.60180995475113, "grad_norm": 2.61862850189209, "learning_rate": 1.3459817893876344e-05, "loss": 0.2618, "step": 91275 }, { "epoch": 82.62443438914028, "grad_norm": 1.5377072095870972, "learning_rate": 1.3425981144676664e-05, "loss": 0.2238, "step": 91300 }, { "epoch": 82.6470588235294, "grad_norm": 1.8476402759552002, "learning_rate": 1.3392182798699214e-05, "loss": 0.2467, "step": 91325 }, { "epoch": 82.66968325791855, "grad_norm": 1.638711929321289, "learning_rate": 1.3358422877023778e-05, "loss": 0.3005, "step": 91350 }, { "epoch": 82.6923076923077, "grad_norm": 1.5282971858978271, "learning_rate": 1.3324701400706106e-05, "loss": 0.2854, "step": 91375 }, { "epoch": 82.71493212669684, "grad_norm": 1.1351838111877441, "learning_rate": 1.3291018390778065e-05, "loss": 0.2075, "step": 91400 }, { "epoch": 82.73755656108597, "grad_norm": 2.479787588119507, "learning_rate": 1.3257373868247437e-05, "loss": 0.3039, "step": 91425 }, { "epoch": 82.76018099547511, "grad_norm": 1.5777153968811035, "learning_rate": 1.3223767854098075e-05, "loss": 0.271, "step": 91450 }, { "epoch": 82.78280542986425, "grad_norm": 0.48745808005332947, "learning_rate": 1.3190200369289739e-05, "loss": 0.2293, "step": 91475 }, { "epoch": 82.8054298642534, "grad_norm": 1.8441256284713745, "learning_rate": 1.3156671434758249e-05, "loss": 0.1951, "step": 91500 }, { "epoch": 82.82805429864253, "grad_norm": 1.2741081714630127, "learning_rate": 1.3123181071415292e-05, "loss": 0.2211, "step": 91525 }, { "epoch": 82.85067873303167, "grad_norm": 2.198697805404663, "learning_rate": 1.3089729300148571e-05, "loss": 0.2804, "step": 91550 }, { "epoch": 82.87330316742081, "grad_norm": 1.9699496030807495, "learning_rate": 1.3056316141821655e-05, "loss": 0.2583, "step": 91575 }, { "epoch": 82.89592760180996, "grad_norm": 2.259859085083008, "learning_rate": 1.3022941617274096e-05, "loss": 0.3086, "step": 91600 }, { "epoch": 82.91855203619909, "grad_norm": 0.7027202248573303, "learning_rate": 1.298960574732128e-05, "loss": 0.1961, "step": 91625 }, { "epoch": 82.94117647058823, "grad_norm": 1.2334860563278198, "learning_rate": 1.2956308552754574e-05, "loss": 0.264, "step": 91650 }, { "epoch": 82.96380090497738, "grad_norm": 1.6832187175750732, "learning_rate": 1.2923050054341116e-05, "loss": 0.225, "step": 91675 }, { "epoch": 82.98642533936652, "grad_norm": 1.0022525787353516, "learning_rate": 1.2889830272824015e-05, "loss": 0.2152, "step": 91700 }, { "epoch": 83.00904977375566, "grad_norm": 1.7496888637542725, "learning_rate": 1.2856649228922128e-05, "loss": 0.1937, "step": 91725 }, { "epoch": 83.03167420814479, "grad_norm": 1.1650596857070923, "learning_rate": 1.2823506943330261e-05, "loss": 0.2486, "step": 91750 }, { "epoch": 83.05429864253394, "grad_norm": 3.7929768562316895, "learning_rate": 1.2790403436718955e-05, "loss": 0.2547, "step": 91775 }, { "epoch": 83.07692307692308, "grad_norm": 1.5414960384368896, "learning_rate": 1.2757338729734627e-05, "loss": 0.2198, "step": 91800 }, { "epoch": 83.09954751131222, "grad_norm": 1.1925899982452393, "learning_rate": 1.2724312842999438e-05, "loss": 0.2075, "step": 91825 }, { "epoch": 83.12217194570135, "grad_norm": 0.9208041429519653, "learning_rate": 1.2691325797111412e-05, "loss": 0.2762, "step": 91850 }, { "epoch": 83.1447963800905, "grad_norm": 1.5031992197036743, "learning_rate": 1.2658377612644261e-05, "loss": 0.2388, "step": 91875 }, { "epoch": 83.16742081447964, "grad_norm": 1.0860973596572876, "learning_rate": 1.2625468310147543e-05, "loss": 0.1943, "step": 91900 }, { "epoch": 83.19004524886878, "grad_norm": 1.8405532836914062, "learning_rate": 1.2592597910146484e-05, "loss": 0.2026, "step": 91925 }, { "epoch": 83.21266968325791, "grad_norm": 1.0833250284194946, "learning_rate": 1.2559766433142136e-05, "loss": 0.2159, "step": 91950 }, { "epoch": 83.23529411764706, "grad_norm": 1.1138516664505005, "learning_rate": 1.252697389961118e-05, "loss": 0.2332, "step": 91975 }, { "epoch": 83.2579185520362, "grad_norm": 4.483831882476807, "learning_rate": 1.2494220330006106e-05, "loss": 0.2463, "step": 92000 }, { "epoch": 83.28054298642535, "grad_norm": 1.4626716375350952, "learning_rate": 1.2461505744755008e-05, "loss": 0.241, "step": 92025 }, { "epoch": 83.30316742081448, "grad_norm": 1.9593162536621094, "learning_rate": 1.242883016426175e-05, "loss": 0.2296, "step": 92050 }, { "epoch": 83.32579185520362, "grad_norm": 5.334970474243164, "learning_rate": 1.2396193608905788e-05, "loss": 0.2694, "step": 92075 }, { "epoch": 83.34841628959276, "grad_norm": 1.8293943405151367, "learning_rate": 1.2363596099042308e-05, "loss": 0.2916, "step": 92100 }, { "epoch": 83.3710407239819, "grad_norm": 1.431554913520813, "learning_rate": 1.2331037655002129e-05, "loss": 0.2393, "step": 92125 }, { "epoch": 83.39366515837104, "grad_norm": 1.992844581604004, "learning_rate": 1.229851829709165e-05, "loss": 0.2334, "step": 92150 }, { "epoch": 83.41628959276018, "grad_norm": 1.039581537246704, "learning_rate": 1.226603804559298e-05, "loss": 0.2296, "step": 92175 }, { "epoch": 83.43891402714932, "grad_norm": 1.2727681398391724, "learning_rate": 1.2233596920763747e-05, "loss": 0.2359, "step": 92200 }, { "epoch": 83.46153846153847, "grad_norm": 1.4922980070114136, "learning_rate": 1.2201194942837259e-05, "loss": 0.2194, "step": 92225 }, { "epoch": 83.4841628959276, "grad_norm": 2.502060890197754, "learning_rate": 1.216883213202234e-05, "loss": 0.3302, "step": 92250 }, { "epoch": 83.50678733031674, "grad_norm": 1.0203526020050049, "learning_rate": 1.213650850850344e-05, "loss": 0.1877, "step": 92275 }, { "epoch": 83.52941176470588, "grad_norm": 1.7818883657455444, "learning_rate": 1.210422409244052e-05, "loss": 0.177, "step": 92300 }, { "epoch": 83.55203619909503, "grad_norm": 1.2579689025878906, "learning_rate": 1.2071978903969142e-05, "loss": 0.2073, "step": 92325 }, { "epoch": 83.57466063348416, "grad_norm": 1.957848072052002, "learning_rate": 1.2039772963200344e-05, "loss": 0.2609, "step": 92350 }, { "epoch": 83.5972850678733, "grad_norm": 1.2671171426773071, "learning_rate": 1.2007606290220733e-05, "loss": 0.2539, "step": 92375 }, { "epoch": 83.61990950226244, "grad_norm": 1.2481365203857422, "learning_rate": 1.1975478905092379e-05, "loss": 0.2584, "step": 92400 }, { "epoch": 83.64253393665159, "grad_norm": 1.514829397201538, "learning_rate": 1.1943390827852917e-05, "loss": 0.2208, "step": 92425 }, { "epoch": 83.66515837104072, "grad_norm": 1.7578043937683105, "learning_rate": 1.1911342078515374e-05, "loss": 0.2245, "step": 92450 }, { "epoch": 83.68778280542986, "grad_norm": 1.1728429794311523, "learning_rate": 1.1879332677068335e-05, "loss": 0.1893, "step": 92475 }, { "epoch": 83.710407239819, "grad_norm": 2.2865982055664062, "learning_rate": 1.1847362643475789e-05, "loss": 0.2877, "step": 92500 }, { "epoch": 83.73303167420815, "grad_norm": 1.3461461067199707, "learning_rate": 1.1815431997677201e-05, "loss": 0.1979, "step": 92525 }, { "epoch": 83.75565610859728, "grad_norm": 1.475647211074829, "learning_rate": 1.1783540759587445e-05, "loss": 0.2905, "step": 92550 }, { "epoch": 83.77828054298642, "grad_norm": 2.1079046726226807, "learning_rate": 1.1751688949096857e-05, "loss": 0.2425, "step": 92575 }, { "epoch": 83.80090497737557, "grad_norm": 1.268576979637146, "learning_rate": 1.1719876586071114e-05, "loss": 0.2529, "step": 92600 }, { "epoch": 83.82352941176471, "grad_norm": 1.504587173461914, "learning_rate": 1.1688103690351377e-05, "loss": 0.2188, "step": 92625 }, { "epoch": 83.84615384615384, "grad_norm": 1.9538213014602661, "learning_rate": 1.1656370281754113e-05, "loss": 0.3516, "step": 92650 }, { "epoch": 83.86877828054298, "grad_norm": 1.2477388381958008, "learning_rate": 1.162467638007122e-05, "loss": 0.2276, "step": 92675 }, { "epoch": 83.89140271493213, "grad_norm": 1.0654138326644897, "learning_rate": 1.1593022005069908e-05, "loss": 0.227, "step": 92700 }, { "epoch": 83.91402714932127, "grad_norm": 1.1622322797775269, "learning_rate": 1.156140717649277e-05, "loss": 0.2651, "step": 92725 }, { "epoch": 83.9366515837104, "grad_norm": 1.1602331399917603, "learning_rate": 1.1529831914057713e-05, "loss": 0.2379, "step": 92750 }, { "epoch": 83.95927601809954, "grad_norm": 2.367241382598877, "learning_rate": 1.1498296237458e-05, "loss": 0.2841, "step": 92775 }, { "epoch": 83.98190045248869, "grad_norm": 2.114567518234253, "learning_rate": 1.1466800166362136e-05, "loss": 0.246, "step": 92800 }, { "epoch": 84.00452488687783, "grad_norm": 1.3881651163101196, "learning_rate": 1.1435343720413986e-05, "loss": 0.2544, "step": 92825 }, { "epoch": 84.02714932126698, "grad_norm": 1.0981236696243286, "learning_rate": 1.1403926919232706e-05, "loss": 0.2271, "step": 92850 }, { "epoch": 84.0497737556561, "grad_norm": 1.800406575202942, "learning_rate": 1.1372549782412696e-05, "loss": 0.2432, "step": 92875 }, { "epoch": 84.07239819004525, "grad_norm": 1.1198346614837646, "learning_rate": 1.1341212329523594e-05, "loss": 0.3307, "step": 92900 }, { "epoch": 84.09502262443439, "grad_norm": 0.9892085194587708, "learning_rate": 1.1309914580110367e-05, "loss": 0.1931, "step": 92925 }, { "epoch": 84.11764705882354, "grad_norm": 1.205314040184021, "learning_rate": 1.1278656553693122e-05, "loss": 0.234, "step": 92950 }, { "epoch": 84.14027149321267, "grad_norm": 0.7759678363800049, "learning_rate": 1.1247438269767275e-05, "loss": 0.1786, "step": 92975 }, { "epoch": 84.16289592760181, "grad_norm": 1.9085394144058228, "learning_rate": 1.1216259747803394e-05, "loss": 0.1732, "step": 93000 }, { "epoch": 84.18552036199095, "grad_norm": 4.513044834136963, "learning_rate": 1.1185121007247305e-05, "loss": 0.2084, "step": 93025 }, { "epoch": 84.2081447963801, "grad_norm": 1.1749680042266846, "learning_rate": 1.115402206751995e-05, "loss": 0.2534, "step": 93050 }, { "epoch": 84.23076923076923, "grad_norm": 1.0119569301605225, "learning_rate": 1.1122962948017528e-05, "loss": 0.1841, "step": 93075 }, { "epoch": 84.25339366515837, "grad_norm": 0.9673476219177246, "learning_rate": 1.1091943668111327e-05, "loss": 0.2651, "step": 93100 }, { "epoch": 84.27601809954751, "grad_norm": 1.5067884922027588, "learning_rate": 1.1060964247147857e-05, "loss": 0.2389, "step": 93125 }, { "epoch": 84.29864253393666, "grad_norm": 1.245119571685791, "learning_rate": 1.1030024704448703e-05, "loss": 0.1927, "step": 93150 }, { "epoch": 84.32126696832579, "grad_norm": 1.9086949825286865, "learning_rate": 1.0999125059310646e-05, "loss": 0.2167, "step": 93175 }, { "epoch": 84.34389140271493, "grad_norm": 1.1651722192764282, "learning_rate": 1.0968265331005511e-05, "loss": 0.1841, "step": 93200 }, { "epoch": 84.36651583710407, "grad_norm": 1.0608320236206055, "learning_rate": 1.0937445538780293e-05, "loss": 0.1861, "step": 93225 }, { "epoch": 84.38914027149322, "grad_norm": 1.3786742687225342, "learning_rate": 1.0906665701857017e-05, "loss": 0.2703, "step": 93250 }, { "epoch": 84.41176470588235, "grad_norm": 1.4715839624404907, "learning_rate": 1.0875925839432862e-05, "loss": 0.2952, "step": 93275 }, { "epoch": 84.43438914027149, "grad_norm": 1.4558757543563843, "learning_rate": 1.0845225970679989e-05, "loss": 0.2203, "step": 93300 }, { "epoch": 84.45701357466064, "grad_norm": 1.2345609664916992, "learning_rate": 1.0814566114745698e-05, "loss": 0.2048, "step": 93325 }, { "epoch": 84.47963800904978, "grad_norm": 1.4966411590576172, "learning_rate": 1.0783946290752271e-05, "loss": 0.2378, "step": 93350 }, { "epoch": 84.50226244343891, "grad_norm": 1.184018850326538, "learning_rate": 1.0753366517797071e-05, "loss": 0.2707, "step": 93375 }, { "epoch": 84.52488687782805, "grad_norm": 1.4262800216674805, "learning_rate": 1.0722826814952418e-05, "loss": 0.2474, "step": 93400 }, { "epoch": 84.5475113122172, "grad_norm": 2.6090216636657715, "learning_rate": 1.0692327201265724e-05, "loss": 0.2004, "step": 93425 }, { "epoch": 84.57013574660634, "grad_norm": 1.0995943546295166, "learning_rate": 1.0661867695759324e-05, "loss": 0.2601, "step": 93450 }, { "epoch": 84.59276018099547, "grad_norm": 1.1398271322250366, "learning_rate": 1.0631448317430589e-05, "loss": 0.2098, "step": 93475 }, { "epoch": 84.61538461538461, "grad_norm": 2.0760931968688965, "learning_rate": 1.0601069085251816e-05, "loss": 0.2365, "step": 93500 }, { "epoch": 84.63800904977376, "grad_norm": 1.293813705444336, "learning_rate": 1.0570730018170314e-05, "loss": 0.2195, "step": 93525 }, { "epoch": 84.6606334841629, "grad_norm": 5.195942401885986, "learning_rate": 1.0540431135108294e-05, "loss": 0.2578, "step": 93550 }, { "epoch": 84.68325791855203, "grad_norm": 1.6340610980987549, "learning_rate": 1.0510172454962951e-05, "loss": 0.2413, "step": 93575 }, { "epoch": 84.70588235294117, "grad_norm": 1.8091683387756348, "learning_rate": 1.0479953996606358e-05, "loss": 0.2124, "step": 93600 }, { "epoch": 84.72850678733032, "grad_norm": 1.0600144863128662, "learning_rate": 1.0449775778885538e-05, "loss": 0.2983, "step": 93625 }, { "epoch": 84.75113122171946, "grad_norm": 1.701952338218689, "learning_rate": 1.0419637820622394e-05, "loss": 0.2389, "step": 93650 }, { "epoch": 84.77375565610859, "grad_norm": 1.4062515497207642, "learning_rate": 1.0389540140613733e-05, "loss": 0.2156, "step": 93675 }, { "epoch": 84.79638009049773, "grad_norm": 1.0955379009246826, "learning_rate": 1.0359482757631263e-05, "loss": 0.2286, "step": 93700 }, { "epoch": 84.81900452488688, "grad_norm": 1.2862584590911865, "learning_rate": 1.0329465690421488e-05, "loss": 0.2216, "step": 93725 }, { "epoch": 84.84162895927602, "grad_norm": 2.141054153442383, "learning_rate": 1.0299488957705848e-05, "loss": 0.2311, "step": 93750 }, { "epoch": 84.86425339366515, "grad_norm": 1.131123423576355, "learning_rate": 1.0269552578180564e-05, "loss": 0.2346, "step": 93775 }, { "epoch": 84.8868778280543, "grad_norm": 1.3921465873718262, "learning_rate": 1.0239656570516752e-05, "loss": 0.2685, "step": 93800 }, { "epoch": 84.90950226244344, "grad_norm": 5.968398571014404, "learning_rate": 1.0209800953360261e-05, "loss": 0.3258, "step": 93825 }, { "epoch": 84.93212669683258, "grad_norm": 1.6810258626937866, "learning_rate": 1.0179985745331856e-05, "loss": 0.2198, "step": 93850 }, { "epoch": 84.95475113122171, "grad_norm": 4.591853618621826, "learning_rate": 1.0150210965026995e-05, "loss": 0.2331, "step": 93875 }, { "epoch": 84.97737556561086, "grad_norm": 1.592495322227478, "learning_rate": 1.0120476631016005e-05, "loss": 0.2732, "step": 93900 }, { "epoch": 85.0, "grad_norm": 1.7209784984588623, "learning_rate": 1.0090782761843929e-05, "loss": 0.2067, "step": 93925 }, { "epoch": 85.02262443438914, "grad_norm": 1.0648268461227417, "learning_rate": 1.006112937603062e-05, "loss": 0.1947, "step": 93950 }, { "epoch": 85.04524886877829, "grad_norm": 1.4495179653167725, "learning_rate": 1.003151649207062e-05, "loss": 0.2439, "step": 93975 }, { "epoch": 85.06787330316742, "grad_norm": 1.446929931640625, "learning_rate": 1.0001944128433287e-05, "loss": 0.1935, "step": 94000 }, { "epoch": 85.09049773755656, "grad_norm": 1.1472259759902954, "learning_rate": 9.97241230356263e-06, "loss": 0.2541, "step": 94025 }, { "epoch": 85.1131221719457, "grad_norm": 2.1452972888946533, "learning_rate": 9.94292103587745e-06, "loss": 0.1793, "step": 94050 }, { "epoch": 85.13574660633485, "grad_norm": 1.1351040601730347, "learning_rate": 9.913470343771182e-06, "loss": 0.2169, "step": 94075 }, { "epoch": 85.15837104072398, "grad_norm": 1.1573127508163452, "learning_rate": 9.88406024561201e-06, "loss": 0.2461, "step": 94100 }, { "epoch": 85.18099547511312, "grad_norm": 1.3857795000076294, "learning_rate": 9.854690759742761e-06, "loss": 0.2407, "step": 94125 }, { "epoch": 85.20361990950227, "grad_norm": 1.405233383178711, "learning_rate": 9.825361904480957e-06, "loss": 0.24, "step": 94150 }, { "epoch": 85.22624434389141, "grad_norm": 1.58432137966156, "learning_rate": 9.796073698118758e-06, "loss": 0.1896, "step": 94175 }, { "epoch": 85.24886877828054, "grad_norm": 1.1714897155761719, "learning_rate": 9.76682615892301e-06, "loss": 0.2228, "step": 94200 }, { "epoch": 85.27149321266968, "grad_norm": 1.204953670501709, "learning_rate": 9.73761930513513e-06, "loss": 0.2495, "step": 94225 }, { "epoch": 85.29411764705883, "grad_norm": 1.3959544897079468, "learning_rate": 9.708453154971237e-06, "loss": 0.2837, "step": 94250 }, { "epoch": 85.31674208144797, "grad_norm": 0.8167849779129028, "learning_rate": 9.679327726621999e-06, "loss": 0.1963, "step": 94275 }, { "epoch": 85.3393665158371, "grad_norm": 0.5334164500236511, "learning_rate": 9.650243038252725e-06, "loss": 0.2422, "step": 94300 }, { "epoch": 85.36199095022624, "grad_norm": 0.9496262073516846, "learning_rate": 9.621199108003288e-06, "loss": 0.2173, "step": 94325 }, { "epoch": 85.38461538461539, "grad_norm": 0.8168772459030151, "learning_rate": 9.592195953988191e-06, "loss": 0.228, "step": 94350 }, { "epoch": 85.40723981900453, "grad_norm": 1.048490285873413, "learning_rate": 9.563233594296412e-06, "loss": 0.2297, "step": 94375 }, { "epoch": 85.42986425339366, "grad_norm": 3.6872718334198, "learning_rate": 9.534312046991596e-06, "loss": 0.244, "step": 94400 }, { "epoch": 85.4524886877828, "grad_norm": 1.9084551334381104, "learning_rate": 9.505431330111845e-06, "loss": 0.2154, "step": 94425 }, { "epoch": 85.47511312217195, "grad_norm": 0.738849401473999, "learning_rate": 9.476591461669852e-06, "loss": 0.2178, "step": 94450 }, { "epoch": 85.49773755656109, "grad_norm": 0.7861320376396179, "learning_rate": 9.447792459652794e-06, "loss": 0.2324, "step": 94475 }, { "epoch": 85.52036199095022, "grad_norm": 0.9632349014282227, "learning_rate": 9.419034342022406e-06, "loss": 0.2168, "step": 94500 }, { "epoch": 85.54298642533936, "grad_norm": 1.1395058631896973, "learning_rate": 9.390317126714852e-06, "loss": 0.23, "step": 94525 }, { "epoch": 85.56561085972851, "grad_norm": 0.8488622307777405, "learning_rate": 9.361640831640876e-06, "loss": 0.2729, "step": 94550 }, { "epoch": 85.58823529411765, "grad_norm": 1.4476090669631958, "learning_rate": 9.333005474685621e-06, "loss": 0.2602, "step": 94575 }, { "epoch": 85.61085972850678, "grad_norm": 4.248432159423828, "learning_rate": 9.304411073708748e-06, "loss": 0.2542, "step": 94600 }, { "epoch": 85.63348416289593, "grad_norm": 1.1603845357894897, "learning_rate": 9.275857646544336e-06, "loss": 0.1941, "step": 94625 }, { "epoch": 85.65610859728507, "grad_norm": 1.3284860849380493, "learning_rate": 9.247345211000954e-06, "loss": 0.248, "step": 94650 }, { "epoch": 85.67873303167421, "grad_norm": 1.5702111721038818, "learning_rate": 9.218873784861544e-06, "loss": 0.2863, "step": 94675 }, { "epoch": 85.70135746606334, "grad_norm": 0.984173059463501, "learning_rate": 9.191579813898661e-06, "loss": 0.321, "step": 94700 }, { "epoch": 85.72398190045249, "grad_norm": 1.5825644731521606, "learning_rate": 9.163188817678019e-06, "loss": 0.2568, "step": 94725 }, { "epoch": 85.74660633484163, "grad_norm": 0.6896769404411316, "learning_rate": 9.134838883349022e-06, "loss": 0.2086, "step": 94750 }, { "epoch": 85.76923076923077, "grad_norm": 1.1233223676681519, "learning_rate": 9.106530028593325e-06, "loss": 0.3007, "step": 94775 }, { "epoch": 85.7918552036199, "grad_norm": 1.124239444732666, "learning_rate": 9.078262271066916e-06, "loss": 0.2402, "step": 94800 }, { "epoch": 85.81447963800905, "grad_norm": 1.208046317100525, "learning_rate": 9.05003562840019e-06, "loss": 0.2751, "step": 94825 }, { "epoch": 85.83710407239819, "grad_norm": 1.4039021730422974, "learning_rate": 9.021850118197848e-06, "loss": 0.2098, "step": 94850 }, { "epoch": 85.85972850678733, "grad_norm": 1.041251540184021, "learning_rate": 8.993705758039004e-06, "loss": 0.2017, "step": 94875 }, { "epoch": 85.88235294117646, "grad_norm": 1.353350281715393, "learning_rate": 8.965602565477025e-06, "loss": 0.1931, "step": 94900 }, { "epoch": 85.90497737556561, "grad_norm": 1.6761797666549683, "learning_rate": 8.937540558039675e-06, "loss": 0.2041, "step": 94925 }, { "epoch": 85.92760180995475, "grad_norm": 1.4602274894714355, "learning_rate": 8.909519753229016e-06, "loss": 0.2115, "step": 94950 }, { "epoch": 85.9502262443439, "grad_norm": 1.3357871770858765, "learning_rate": 8.881540168521364e-06, "loss": 0.1892, "step": 94975 }, { "epoch": 85.97285067873302, "grad_norm": 1.0203659534454346, "learning_rate": 8.8536018213674e-06, "loss": 0.2226, "step": 95000 }, { "epoch": 85.99547511312217, "grad_norm": 0.8663904666900635, "learning_rate": 8.825704729192013e-06, "loss": 0.1884, "step": 95025 }, { "epoch": 86.01809954751131, "grad_norm": 1.6109756231307983, "learning_rate": 8.797848909394422e-06, "loss": 0.1958, "step": 95050 }, { "epoch": 86.04072398190046, "grad_norm": 0.881666898727417, "learning_rate": 8.77003437934806e-06, "loss": 0.2421, "step": 95075 }, { "epoch": 86.0633484162896, "grad_norm": 1.2642569541931152, "learning_rate": 8.742261156400645e-06, "loss": 0.2097, "step": 95100 }, { "epoch": 86.08597285067873, "grad_norm": 0.5287483334541321, "learning_rate": 8.714529257874084e-06, "loss": 0.2161, "step": 95125 }, { "epoch": 86.10859728506787, "grad_norm": 1.08829665184021, "learning_rate": 8.68683870106458e-06, "loss": 0.2238, "step": 95150 }, { "epoch": 86.13122171945702, "grad_norm": 0.4798315465450287, "learning_rate": 8.659189503242469e-06, "loss": 0.2238, "step": 95175 }, { "epoch": 86.15384615384616, "grad_norm": 1.7319557666778564, "learning_rate": 8.631581681652375e-06, "loss": 0.2451, "step": 95200 }, { "epoch": 86.17647058823529, "grad_norm": 0.795026421546936, "learning_rate": 8.604015253513038e-06, "loss": 0.2117, "step": 95225 }, { "epoch": 86.19909502262443, "grad_norm": 1.3550655841827393, "learning_rate": 8.57649023601745e-06, "loss": 0.2975, "step": 95250 }, { "epoch": 86.22171945701358, "grad_norm": 0.6398485898971558, "learning_rate": 8.549006646332709e-06, "loss": 0.2096, "step": 95275 }, { "epoch": 86.24434389140272, "grad_norm": 4.678731441497803, "learning_rate": 8.521564501600156e-06, "loss": 0.2141, "step": 95300 }, { "epoch": 86.26696832579185, "grad_norm": 1.8914018869400024, "learning_rate": 8.49416381893519e-06, "loss": 0.2474, "step": 95325 }, { "epoch": 86.289592760181, "grad_norm": 0.6787257194519043, "learning_rate": 8.466804615427425e-06, "loss": 0.2422, "step": 95350 }, { "epoch": 86.31221719457014, "grad_norm": 1.2384531497955322, "learning_rate": 8.439486908140562e-06, "loss": 0.2872, "step": 95375 }, { "epoch": 86.33484162895928, "grad_norm": 1.7806782722473145, "learning_rate": 8.41221071411246e-06, "loss": 0.2452, "step": 95400 }, { "epoch": 86.35746606334841, "grad_norm": 0.5884113311767578, "learning_rate": 8.384976050355041e-06, "loss": 0.267, "step": 95425 }, { "epoch": 86.38009049773756, "grad_norm": 1.2176893949508667, "learning_rate": 8.357782933854357e-06, "loss": 0.3127, "step": 95450 }, { "epoch": 86.4027149321267, "grad_norm": 1.3576347827911377, "learning_rate": 8.330631381570524e-06, "loss": 0.2059, "step": 95475 }, { "epoch": 86.42533936651584, "grad_norm": 1.3243180513381958, "learning_rate": 8.303521410437772e-06, "loss": 0.2054, "step": 95500 }, { "epoch": 86.44796380090497, "grad_norm": 2.1407980918884277, "learning_rate": 8.276453037364342e-06, "loss": 0.2466, "step": 95525 }, { "epoch": 86.47058823529412, "grad_norm": 1.1910250186920166, "learning_rate": 8.249426279232587e-06, "loss": 0.2047, "step": 95550 }, { "epoch": 86.49321266968326, "grad_norm": 1.1588889360427856, "learning_rate": 8.222441152898859e-06, "loss": 0.2252, "step": 95575 }, { "epoch": 86.5158371040724, "grad_norm": 0.9993530511856079, "learning_rate": 8.195497675193586e-06, "loss": 0.2225, "step": 95600 }, { "epoch": 86.53846153846153, "grad_norm": 1.0689810514450073, "learning_rate": 8.168595862921174e-06, "loss": 0.1916, "step": 95625 }, { "epoch": 86.56108597285068, "grad_norm": 1.3326655626296997, "learning_rate": 8.141735732860102e-06, "loss": 0.1635, "step": 95650 }, { "epoch": 86.58371040723982, "grad_norm": 1.4869006872177124, "learning_rate": 8.11491730176278e-06, "loss": 0.2113, "step": 95675 }, { "epoch": 86.60633484162896, "grad_norm": 1.290174126625061, "learning_rate": 8.088140586355677e-06, "loss": 0.2214, "step": 95700 }, { "epoch": 86.6289592760181, "grad_norm": 1.172241449356079, "learning_rate": 8.061405603339199e-06, "loss": 0.2019, "step": 95725 }, { "epoch": 86.65158371040724, "grad_norm": 1.8281601667404175, "learning_rate": 8.034712369387752e-06, "loss": 0.2456, "step": 95750 }, { "epoch": 86.67420814479638, "grad_norm": 1.2891125679016113, "learning_rate": 8.008060901149685e-06, "loss": 0.1795, "step": 95775 }, { "epoch": 86.69683257918552, "grad_norm": 1.448681354522705, "learning_rate": 7.981451215247317e-06, "loss": 0.2167, "step": 95800 }, { "epoch": 86.71945701357465, "grad_norm": 1.2627946138381958, "learning_rate": 7.954883328276864e-06, "loss": 0.1876, "step": 95825 }, { "epoch": 86.7420814479638, "grad_norm": 0.7485878467559814, "learning_rate": 7.928357256808549e-06, "loss": 0.247, "step": 95850 }, { "epoch": 86.76470588235294, "grad_norm": 1.0917359590530396, "learning_rate": 7.901873017386435e-06, "loss": 0.2737, "step": 95875 }, { "epoch": 86.78733031674209, "grad_norm": 1.3244069814682007, "learning_rate": 7.87543062652856e-06, "loss": 0.2113, "step": 95900 }, { "epoch": 86.80995475113122, "grad_norm": 1.586942195892334, "learning_rate": 7.849030100726789e-06, "loss": 0.2813, "step": 95925 }, { "epoch": 86.83257918552036, "grad_norm": 0.8971759080886841, "learning_rate": 7.822671456446961e-06, "loss": 0.235, "step": 95950 }, { "epoch": 86.8552036199095, "grad_norm": 1.3623031377792358, "learning_rate": 7.796354710128724e-06, "loss": 0.2643, "step": 95975 }, { "epoch": 86.87782805429865, "grad_norm": 1.6224130392074585, "learning_rate": 7.77007987818565e-06, "loss": 0.2527, "step": 96000 }, { "epoch": 86.90045248868778, "grad_norm": 1.5212302207946777, "learning_rate": 7.743846977005097e-06, "loss": 0.1836, "step": 96025 }, { "epoch": 86.92307692307692, "grad_norm": 0.9528728127479553, "learning_rate": 7.717656022948367e-06, "loss": 0.1959, "step": 96050 }, { "epoch": 86.94570135746606, "grad_norm": 1.2689321041107178, "learning_rate": 7.691507032350506e-06, "loss": 0.1851, "step": 96075 }, { "epoch": 86.96832579185521, "grad_norm": 0.9713883996009827, "learning_rate": 7.665400021520454e-06, "loss": 0.2318, "step": 96100 }, { "epoch": 86.99095022624434, "grad_norm": 1.0001325607299805, "learning_rate": 7.639335006740924e-06, "loss": 0.2098, "step": 96125 }, { "epoch": 87.01357466063348, "grad_norm": 1.2248291969299316, "learning_rate": 7.613312004268484e-06, "loss": 0.1827, "step": 96150 }, { "epoch": 87.03619909502262, "grad_norm": 3.51912784576416, "learning_rate": 7.587331030333454e-06, "loss": 0.2468, "step": 96175 }, { "epoch": 87.05882352941177, "grad_norm": 4.302508354187012, "learning_rate": 7.561392101139971e-06, "loss": 0.2708, "step": 96200 }, { "epoch": 87.08144796380091, "grad_norm": 1.4801913499832153, "learning_rate": 7.53549523286591e-06, "loss": 0.2225, "step": 96225 }, { "epoch": 87.10407239819004, "grad_norm": 1.3829647302627563, "learning_rate": 7.509640441662976e-06, "loss": 0.1742, "step": 96250 }, { "epoch": 87.12669683257919, "grad_norm": 1.7717469930648804, "learning_rate": 7.483827743656571e-06, "loss": 0.1917, "step": 96275 }, { "epoch": 87.14932126696833, "grad_norm": 1.1577367782592773, "learning_rate": 7.458057154945882e-06, "loss": 0.2072, "step": 96300 }, { "epoch": 87.17194570135747, "grad_norm": 1.3073642253875732, "learning_rate": 7.432328691603803e-06, "loss": 0.2031, "step": 96325 }, { "epoch": 87.1945701357466, "grad_norm": 0.7133212089538574, "learning_rate": 7.4066423696769905e-06, "loss": 0.272, "step": 96350 }, { "epoch": 87.21719457013575, "grad_norm": 1.3051916360855103, "learning_rate": 7.380998205185778e-06, "loss": 0.1908, "step": 96375 }, { "epoch": 87.23981900452489, "grad_norm": 1.0187506675720215, "learning_rate": 7.355396214124249e-06, "loss": 0.2285, "step": 96400 }, { "epoch": 87.26244343891403, "grad_norm": 0.6291978359222412, "learning_rate": 7.329836412460127e-06, "loss": 0.2747, "step": 96425 }, { "epoch": 87.28506787330316, "grad_norm": 1.2138255834579468, "learning_rate": 7.30431881613488e-06, "loss": 0.2397, "step": 96450 }, { "epoch": 87.3076923076923, "grad_norm": 1.0286800861358643, "learning_rate": 7.278843441063633e-06, "loss": 0.2344, "step": 96475 }, { "epoch": 87.33031674208145, "grad_norm": 1.1416869163513184, "learning_rate": 7.253410303135154e-06, "loss": 0.2007, "step": 96500 }, { "epoch": 87.3529411764706, "grad_norm": 1.5559656620025635, "learning_rate": 7.228019418211903e-06, "loss": 0.2306, "step": 96525 }, { "epoch": 87.37556561085972, "grad_norm": 1.1798025369644165, "learning_rate": 7.202670802129954e-06, "loss": 0.1642, "step": 96550 }, { "epoch": 87.39819004524887, "grad_norm": 1.2939826250076294, "learning_rate": 7.17736447069906e-06, "loss": 0.2186, "step": 96575 }, { "epoch": 87.42081447963801, "grad_norm": 1.5505955219268799, "learning_rate": 7.152100439702555e-06, "loss": 0.1967, "step": 96600 }, { "epoch": 87.44343891402715, "grad_norm": 1.1835687160491943, "learning_rate": 7.126878724897434e-06, "loss": 0.1904, "step": 96625 }, { "epoch": 87.46606334841628, "grad_norm": 1.1613070964813232, "learning_rate": 7.101699342014247e-06, "loss": 0.1898, "step": 96650 }, { "epoch": 87.48868778280543, "grad_norm": 0.9732732176780701, "learning_rate": 7.076562306757208e-06, "loss": 0.2852, "step": 96675 }, { "epoch": 87.51131221719457, "grad_norm": 1.5406293869018555, "learning_rate": 7.051467634804059e-06, "loss": 0.2073, "step": 96700 }, { "epoch": 87.53393665158372, "grad_norm": 1.4113398790359497, "learning_rate": 7.0264153418061634e-06, "loss": 0.2592, "step": 96725 }, { "epoch": 87.55656108597285, "grad_norm": 1.0180559158325195, "learning_rate": 7.001405443388422e-06, "loss": 0.2635, "step": 96750 }, { "epoch": 87.57918552036199, "grad_norm": 0.9436126351356506, "learning_rate": 6.977435840208026e-06, "loss": 0.1937, "step": 96775 }, { "epoch": 87.60180995475113, "grad_norm": 3.945801258087158, "learning_rate": 6.9525090803909055e-06, "loss": 0.225, "step": 96800 }, { "epoch": 87.62443438914028, "grad_norm": 0.9749020934104919, "learning_rate": 6.927624761248676e-06, "loss": 0.2477, "step": 96825 }, { "epoch": 87.6470588235294, "grad_norm": 1.1191080808639526, "learning_rate": 6.902782898301515e-06, "loss": 0.2031, "step": 96850 }, { "epoch": 87.66968325791855, "grad_norm": 6.480875015258789, "learning_rate": 6.8779835070430695e-06, "loss": 0.3124, "step": 96875 }, { "epoch": 87.6923076923077, "grad_norm": 1.1927406787872314, "learning_rate": 6.853226602940534e-06, "loss": 0.2204, "step": 96900 }, { "epoch": 87.71493212669684, "grad_norm": 1.3960765600204468, "learning_rate": 6.828512201434574e-06, "loss": 0.207, "step": 96925 }, { "epoch": 87.73755656108597, "grad_norm": 1.835037350654602, "learning_rate": 6.8038403179394015e-06, "loss": 0.173, "step": 96950 }, { "epoch": 87.76018099547511, "grad_norm": 1.1787928342819214, "learning_rate": 6.779210967842624e-06, "loss": 0.1785, "step": 96975 }, { "epoch": 87.78280542986425, "grad_norm": 1.1594642400741577, "learning_rate": 6.754624166505412e-06, "loss": 0.1793, "step": 97000 }, { "epoch": 87.8054298642534, "grad_norm": 1.0816947221755981, "learning_rate": 6.730079929262325e-06, "loss": 0.1877, "step": 97025 }, { "epoch": 87.82805429864253, "grad_norm": 1.2309519052505493, "learning_rate": 6.7055782714214415e-06, "loss": 0.2552, "step": 97050 }, { "epoch": 87.85067873303167, "grad_norm": 1.8382105827331543, "learning_rate": 6.6811192082642045e-06, "loss": 0.2752, "step": 97075 }, { "epoch": 87.87330316742081, "grad_norm": 1.8699984550476074, "learning_rate": 6.656702755045579e-06, "loss": 0.2458, "step": 97100 }, { "epoch": 87.89592760180996, "grad_norm": 1.124491810798645, "learning_rate": 6.632328926993874e-06, "loss": 0.2592, "step": 97125 }, { "epoch": 87.91855203619909, "grad_norm": 1.0307016372680664, "learning_rate": 6.607997739310889e-06, "loss": 0.1954, "step": 97150 }, { "epoch": 87.94117647058823, "grad_norm": 1.102413535118103, "learning_rate": 6.58370920717175e-06, "loss": 0.1964, "step": 97175 }, { "epoch": 87.96380090497738, "grad_norm": 1.0357909202575684, "learning_rate": 6.559463345725058e-06, "loss": 0.2379, "step": 97200 }, { "epoch": 87.98642533936652, "grad_norm": 1.1607322692871094, "learning_rate": 6.535260170092732e-06, "loss": 0.2297, "step": 97225 }, { "epoch": 88.00904977375566, "grad_norm": 1.5806924104690552, "learning_rate": 6.5110996953701225e-06, "loss": 0.2144, "step": 97250 }, { "epoch": 88.03167420814479, "grad_norm": 1.0098795890808105, "learning_rate": 6.486981936625901e-06, "loss": 0.2351, "step": 97275 }, { "epoch": 88.05429864253394, "grad_norm": 0.9517509341239929, "learning_rate": 6.462906908902143e-06, "loss": 0.1783, "step": 97300 }, { "epoch": 88.07692307692308, "grad_norm": 1.001928687095642, "learning_rate": 6.43887462721423e-06, "loss": 0.1693, "step": 97325 }, { "epoch": 88.09954751131222, "grad_norm": 1.1728754043579102, "learning_rate": 6.414885106550929e-06, "loss": 0.1852, "step": 97350 }, { "epoch": 88.12217194570135, "grad_norm": 1.1057087182998657, "learning_rate": 6.390938361874282e-06, "loss": 0.2166, "step": 97375 }, { "epoch": 88.1447963800905, "grad_norm": 1.3694144487380981, "learning_rate": 6.367034408119706e-06, "loss": 0.1823, "step": 97400 }, { "epoch": 88.16742081447964, "grad_norm": 1.2324395179748535, "learning_rate": 6.343173260195885e-06, "loss": 0.2074, "step": 97425 }, { "epoch": 88.19004524886878, "grad_norm": 1.6150360107421875, "learning_rate": 6.319354932984849e-06, "loss": 0.2048, "step": 97450 }, { "epoch": 88.21266968325791, "grad_norm": 1.3420321941375732, "learning_rate": 6.295579441341872e-06, "loss": 0.2302, "step": 97475 }, { "epoch": 88.23529411764706, "grad_norm": 2.5951383113861084, "learning_rate": 6.2718468000955675e-06, "loss": 0.2142, "step": 97500 }, { "epoch": 88.2579185520362, "grad_norm": 1.2469367980957031, "learning_rate": 6.248157024047762e-06, "loss": 0.2217, "step": 97525 }, { "epoch": 88.28054298642535, "grad_norm": 1.5069005489349365, "learning_rate": 6.224510127973603e-06, "loss": 0.215, "step": 97550 }, { "epoch": 88.30316742081448, "grad_norm": 1.5617077350616455, "learning_rate": 6.20090612662146e-06, "loss": 0.3176, "step": 97575 }, { "epoch": 88.32579185520362, "grad_norm": 0.839979887008667, "learning_rate": 6.177345034712966e-06, "loss": 0.1975, "step": 97600 }, { "epoch": 88.34841628959276, "grad_norm": 1.1441539525985718, "learning_rate": 6.1538268669429655e-06, "loss": 0.1989, "step": 97625 }, { "epoch": 88.3710407239819, "grad_norm": 0.4892381429672241, "learning_rate": 6.130351637979583e-06, "loss": 0.2742, "step": 97650 }, { "epoch": 88.39366515837104, "grad_norm": 1.1934547424316406, "learning_rate": 6.106919362464099e-06, "loss": 0.1969, "step": 97675 }, { "epoch": 88.41628959276018, "grad_norm": 0.8226586580276489, "learning_rate": 6.083530055011048e-06, "loss": 0.1789, "step": 97700 }, { "epoch": 88.43891402714932, "grad_norm": 0.802721381187439, "learning_rate": 6.060183730208171e-06, "loss": 0.2685, "step": 97725 }, { "epoch": 88.46153846153847, "grad_norm": 1.128164291381836, "learning_rate": 6.036880402616359e-06, "loss": 0.2312, "step": 97750 }, { "epoch": 88.4841628959276, "grad_norm": 1.6216875314712524, "learning_rate": 6.01362008676973e-06, "loss": 0.1797, "step": 97775 }, { "epoch": 88.50678733031674, "grad_norm": 1.7495800256729126, "learning_rate": 5.990402797175537e-06, "loss": 0.1974, "step": 97800 }, { "epoch": 88.52941176470588, "grad_norm": 0.7272179126739502, "learning_rate": 5.967228548314229e-06, "loss": 0.1775, "step": 97825 }, { "epoch": 88.55203619909503, "grad_norm": 2.3626515865325928, "learning_rate": 5.944097354639405e-06, "loss": 0.2977, "step": 97850 }, { "epoch": 88.57466063348416, "grad_norm": 4.555837631225586, "learning_rate": 5.921009230577797e-06, "loss": 0.2158, "step": 97875 }, { "epoch": 88.5972850678733, "grad_norm": 1.622931957244873, "learning_rate": 5.897964190529289e-06, "loss": 0.183, "step": 97900 }, { "epoch": 88.61990950226244, "grad_norm": 1.069495439529419, "learning_rate": 5.874962248866874e-06, "loss": 0.2131, "step": 97925 }, { "epoch": 88.64253393665159, "grad_norm": 1.374558687210083, "learning_rate": 5.852003419936693e-06, "loss": 0.2064, "step": 97950 }, { "epoch": 88.66515837104072, "grad_norm": 0.7070494294166565, "learning_rate": 5.8290877180579755e-06, "loss": 0.2297, "step": 97975 }, { "epoch": 88.68778280542986, "grad_norm": 2.116999387741089, "learning_rate": 5.806215157523073e-06, "loss": 0.2706, "step": 98000 }, { "epoch": 88.710407239819, "grad_norm": 2.178647994995117, "learning_rate": 5.783385752597397e-06, "loss": 0.205, "step": 98025 }, { "epoch": 88.73303167420815, "grad_norm": 0.8619604110717773, "learning_rate": 5.760599517519493e-06, "loss": 0.215, "step": 98050 }, { "epoch": 88.75565610859728, "grad_norm": 1.0493172407150269, "learning_rate": 5.7378564665009175e-06, "loss": 0.3104, "step": 98075 }, { "epoch": 88.77828054298642, "grad_norm": 1.0355833768844604, "learning_rate": 5.7151566137263655e-06, "loss": 0.1816, "step": 98100 }, { "epoch": 88.80090497737557, "grad_norm": 1.2814608812332153, "learning_rate": 5.692499973353529e-06, "loss": 0.2369, "step": 98125 }, { "epoch": 88.82352941176471, "grad_norm": 1.3461711406707764, "learning_rate": 5.669886559513187e-06, "loss": 0.1955, "step": 98150 }, { "epoch": 88.84615384615384, "grad_norm": 4.495723247528076, "learning_rate": 5.647316386309126e-06, "loss": 0.2299, "step": 98175 }, { "epoch": 88.86877828054298, "grad_norm": 5.8133721351623535, "learning_rate": 5.6247894678182e-06, "loss": 0.2424, "step": 98200 }, { "epoch": 88.89140271493213, "grad_norm": 1.4363712072372437, "learning_rate": 5.602305818090272e-06, "loss": 0.1946, "step": 98225 }, { "epoch": 88.91402714932127, "grad_norm": 1.0300897359848022, "learning_rate": 5.579865451148191e-06, "loss": 0.2983, "step": 98250 }, { "epoch": 88.9366515837104, "grad_norm": 0.7913947105407715, "learning_rate": 5.557468380987862e-06, "loss": 0.2199, "step": 98275 }, { "epoch": 88.95927601809954, "grad_norm": 0.6860991716384888, "learning_rate": 5.535114621578126e-06, "loss": 0.2499, "step": 98300 }, { "epoch": 88.98190045248869, "grad_norm": 1.259525179862976, "learning_rate": 5.512804186860883e-06, "loss": 0.1944, "step": 98325 }, { "epoch": 89.00452488687783, "grad_norm": 1.7142527103424072, "learning_rate": 5.490537090750935e-06, "loss": 0.2269, "step": 98350 }, { "epoch": 89.02714932126698, "grad_norm": 0.5371774435043335, "learning_rate": 5.468313347136113e-06, "loss": 0.1918, "step": 98375 }, { "epoch": 89.0497737556561, "grad_norm": 0.699518084526062, "learning_rate": 5.446132969877181e-06, "loss": 0.2079, "step": 98400 }, { "epoch": 89.07239819004525, "grad_norm": 1.345686912536621, "learning_rate": 5.423995972807866e-06, "loss": 0.2125, "step": 98425 }, { "epoch": 89.09502262443439, "grad_norm": 1.0948538780212402, "learning_rate": 5.4019023697348285e-06, "loss": 0.2294, "step": 98450 }, { "epoch": 89.11764705882354, "grad_norm": 1.389125108718872, "learning_rate": 5.379852174437682e-06, "loss": 0.2028, "step": 98475 }, { "epoch": 89.14027149321267, "grad_norm": 1.8768160343170166, "learning_rate": 5.357845400668942e-06, "loss": 0.2449, "step": 98500 }, { "epoch": 89.16289592760181, "grad_norm": 1.5371594429016113, "learning_rate": 5.335882062154079e-06, "loss": 0.2419, "step": 98525 }, { "epoch": 89.18552036199095, "grad_norm": 1.790648102760315, "learning_rate": 5.313962172591427e-06, "loss": 0.2064, "step": 98550 }, { "epoch": 89.2081447963801, "grad_norm": 1.6184557676315308, "learning_rate": 5.292085745652266e-06, "loss": 0.1847, "step": 98575 }, { "epoch": 89.23076923076923, "grad_norm": 0.9454947710037231, "learning_rate": 5.2702527949807335e-06, "loss": 0.2475, "step": 98600 }, { "epoch": 89.25339366515837, "grad_norm": 1.2674115896224976, "learning_rate": 5.248463334193878e-06, "loss": 0.196, "step": 98625 }, { "epoch": 89.27601809954751, "grad_norm": 1.6565049886703491, "learning_rate": 5.226717376881595e-06, "loss": 0.1967, "step": 98650 }, { "epoch": 89.29864253393666, "grad_norm": 4.183839321136475, "learning_rate": 5.205014936606686e-06, "loss": 0.2212, "step": 98675 }, { "epoch": 89.32126696832579, "grad_norm": 1.141318917274475, "learning_rate": 5.183356026904764e-06, "loss": 0.1983, "step": 98700 }, { "epoch": 89.34389140271493, "grad_norm": 0.9870153665542603, "learning_rate": 5.16174066128435e-06, "loss": 0.2428, "step": 98725 }, { "epoch": 89.36651583710407, "grad_norm": 0.5465179681777954, "learning_rate": 5.140168853226734e-06, "loss": 0.1734, "step": 98750 }, { "epoch": 89.38914027149322, "grad_norm": 1.515248417854309, "learning_rate": 5.118640616186121e-06, "loss": 0.1716, "step": 98775 }, { "epoch": 89.41176470588235, "grad_norm": NaN, "learning_rate": 5.09801451270383e-06, "loss": 0.2216, "step": 98800 }, { "epoch": 89.43438914027149, "grad_norm": 1.1756689548492432, "learning_rate": 5.076571713780264e-06, "loss": 0.2696, "step": 98825 }, { "epoch": 89.45701357466064, "grad_norm": 1.3918015956878662, "learning_rate": 5.055172525538723e-06, "loss": 0.2155, "step": 98850 }, { "epoch": 89.47963800904978, "grad_norm": 0.7016647458076477, "learning_rate": 5.033816961325671e-06, "loss": 0.1772, "step": 98875 }, { "epoch": 89.50226244343891, "grad_norm": 0.8793459534645081, "learning_rate": 5.0125050344604455e-06, "loss": 0.202, "step": 98900 }, { "epoch": 89.52488687782805, "grad_norm": 1.3844480514526367, "learning_rate": 4.99123675823509e-06, "loss": 0.1997, "step": 98925 }, { "epoch": 89.5475113122172, "grad_norm": 0.7166454195976257, "learning_rate": 4.970012145914484e-06, "loss": 0.211, "step": 98950 }, { "epoch": 89.57013574660634, "grad_norm": 1.519817590713501, "learning_rate": 4.9488312107362235e-06, "loss": 0.286, "step": 98975 }, { "epoch": 89.59276018099547, "grad_norm": 1.4116744995117188, "learning_rate": 4.927693965910706e-06, "loss": 0.216, "step": 99000 }, { "epoch": 89.61538461538461, "grad_norm": 1.1981618404388428, "learning_rate": 4.906600424621054e-06, "loss": 0.2009, "step": 99025 }, { "epoch": 89.63800904977376, "grad_norm": 0.9614788293838501, "learning_rate": 4.885550600023153e-06, "loss": 0.2241, "step": 99050 }, { "epoch": 89.6606334841629, "grad_norm": 0.9208125472068787, "learning_rate": 4.8645445052455825e-06, "loss": 0.2626, "step": 99075 }, { "epoch": 89.68325791855203, "grad_norm": 0.6827859878540039, "learning_rate": 4.843582153389705e-06, "loss": 0.2556, "step": 99100 }, { "epoch": 89.70588235294117, "grad_norm": 1.1373286247253418, "learning_rate": 4.822663557529555e-06, "loss": 0.2558, "step": 99125 }, { "epoch": 89.72850678733032, "grad_norm": 1.5220346450805664, "learning_rate": 4.801788730711903e-06, "loss": 0.1929, "step": 99150 }, { "epoch": 89.75113122171946, "grad_norm": 0.7927146553993225, "learning_rate": 4.780957685956194e-06, "loss": 0.2164, "step": 99175 }, { "epoch": 89.77375565610859, "grad_norm": 1.3103749752044678, "learning_rate": 4.760170436254601e-06, "loss": 0.2094, "step": 99200 }, { "epoch": 89.79638009049773, "grad_norm": 4.157220363616943, "learning_rate": 4.739426994571954e-06, "loss": 0.2671, "step": 99225 }, { "epoch": 89.81900452488688, "grad_norm": 1.3247969150543213, "learning_rate": 4.718727373845787e-06, "loss": 0.2106, "step": 99250 }, { "epoch": 89.84162895927602, "grad_norm": 4.084348201751709, "learning_rate": 4.698071586986266e-06, "loss": 0.2294, "step": 99275 }, { "epoch": 89.86425339366515, "grad_norm": 1.5951694250106812, "learning_rate": 4.677459646876267e-06, "loss": 0.2371, "step": 99300 }, { "epoch": 89.8868778280543, "grad_norm": 1.3263107538223267, "learning_rate": 4.656891566371257e-06, "loss": 0.2168, "step": 99325 }, { "epoch": 89.90950226244344, "grad_norm": 2.5932395458221436, "learning_rate": 4.636367358299417e-06, "loss": 0.1729, "step": 99350 }, { "epoch": 89.93212669683258, "grad_norm": 1.5805531740188599, "learning_rate": 4.615887035461499e-06, "loss": 0.2018, "step": 99375 }, { "epoch": 89.95475113122171, "grad_norm": 2.333526849746704, "learning_rate": 4.595450610630952e-06, "loss": 0.2279, "step": 99400 }, { "epoch": 89.97737556561086, "grad_norm": 1.2010316848754883, "learning_rate": 4.575058096553772e-06, "loss": 0.1759, "step": 99425 }, { "epoch": 90.0, "grad_norm": 1.7871593236923218, "learning_rate": 4.5547095059486335e-06, "loss": 0.2108, "step": 99450 }, { "epoch": 90.02262443438914, "grad_norm": 1.0759422779083252, "learning_rate": 4.5344048515067875e-06, "loss": 0.1616, "step": 99475 }, { "epoch": 90.04524886877829, "grad_norm": 0.7481698393821716, "learning_rate": 4.5141441458920765e-06, "loss": 0.1975, "step": 99500 }, { "epoch": 90.06787330316742, "grad_norm": 1.1689400672912598, "learning_rate": 4.493927401740943e-06, "loss": 0.2479, "step": 99525 }, { "epoch": 90.09049773755656, "grad_norm": 1.1692843437194824, "learning_rate": 4.47375463166241e-06, "loss": 0.1792, "step": 99550 }, { "epoch": 90.1131221719457, "grad_norm": 4.002111911773682, "learning_rate": 4.453625848238071e-06, "loss": 0.2316, "step": 99575 }, { "epoch": 90.13574660633485, "grad_norm": 0.9596063494682312, "learning_rate": 4.433541064022084e-06, "loss": 0.2341, "step": 99600 }, { "epoch": 90.15837104072398, "grad_norm": 1.192742943763733, "learning_rate": 4.413500291541169e-06, "loss": 0.2055, "step": 99625 }, { "epoch": 90.18099547511312, "grad_norm": 4.47230339050293, "learning_rate": 4.3935035432945966e-06, "loss": 0.2587, "step": 99650 }, { "epoch": 90.20361990950227, "grad_norm": 0.7736957669258118, "learning_rate": 4.373550831754189e-06, "loss": 0.1501, "step": 99675 }, { "epoch": 90.22624434389141, "grad_norm": 1.5000149011611938, "learning_rate": 4.353642169364266e-06, "loss": 0.2109, "step": 99700 }, { "epoch": 90.24886877828054, "grad_norm": 4.5788092613220215, "learning_rate": 4.333777568541738e-06, "loss": 0.2648, "step": 99725 }, { "epoch": 90.27149321266968, "grad_norm": 1.0441960096359253, "learning_rate": 4.313957041675953e-06, "loss": 0.2013, "step": 99750 }, { "epoch": 90.29411764705883, "grad_norm": 1.4179902076721191, "learning_rate": 4.294180601128855e-06, "loss": 0.1717, "step": 99775 }, { "epoch": 90.31674208144797, "grad_norm": 0.7733138799667358, "learning_rate": 4.274448259234828e-06, "loss": 0.1884, "step": 99800 }, { "epoch": 90.3393665158371, "grad_norm": 1.196804165840149, "learning_rate": 4.254760028300794e-06, "loss": 0.1869, "step": 99825 }, { "epoch": 90.36199095022624, "grad_norm": 1.052259087562561, "learning_rate": 4.2351159206061135e-06, "loss": 0.1821, "step": 99850 }, { "epoch": 90.38461538461539, "grad_norm": 4.576720714569092, "learning_rate": 4.215515948402695e-06, "loss": 0.2633, "step": 99875 }, { "epoch": 90.40723981900453, "grad_norm": 0.6688747406005859, "learning_rate": 4.1959601239148596e-06, "loss": 0.1988, "step": 99900 }, { "epoch": 90.42986425339366, "grad_norm": 0.7618852257728577, "learning_rate": 4.176448459339435e-06, "loss": 0.1855, "step": 99925 }, { "epoch": 90.4524886877828, "grad_norm": 0.901997447013855, "learning_rate": 4.156980966845669e-06, "loss": 0.1848, "step": 99950 }, { "epoch": 90.47511312217195, "grad_norm": 1.5723624229431152, "learning_rate": 4.137557658575299e-06, "loss": 0.1667, "step": 99975 }, { "epoch": 90.49773755656109, "grad_norm": 1.2151317596435547, "learning_rate": 4.118178546642478e-06, "loss": 0.1989, "step": 100000 }, { "epoch": 90.52036199095022, "grad_norm": 0.9620048999786377, "learning_rate": 4.09884364313382e-06, "loss": 0.2964, "step": 100025 }, { "epoch": 90.54298642533936, "grad_norm": 1.3927428722381592, "learning_rate": 4.079552960108321e-06, "loss": 0.2111, "step": 100050 }, { "epoch": 90.56561085972851, "grad_norm": 0.8601452112197876, "learning_rate": 4.060306509597447e-06, "loss": 0.2963, "step": 100075 }, { "epoch": 90.58823529411765, "grad_norm": 1.4434726238250732, "learning_rate": 4.041104303605047e-06, "loss": 0.1968, "step": 100100 }, { "epoch": 90.61085972850678, "grad_norm": 1.012083649635315, "learning_rate": 4.021946354107383e-06, "loss": 0.2221, "step": 100125 }, { "epoch": 90.63348416289593, "grad_norm": 0.8624517917633057, "learning_rate": 4.0028326730531135e-06, "loss": 0.2395, "step": 100150 }, { "epoch": 90.65610859728507, "grad_norm": 0.7817290425300598, "learning_rate": 3.983763272363302e-06, "loss": 0.2174, "step": 100175 }, { "epoch": 90.67873303167421, "grad_norm": 1.2338742017745972, "learning_rate": 3.9647381639313525e-06, "loss": 0.2204, "step": 100200 }, { "epoch": 90.70135746606334, "grad_norm": 1.4460539817810059, "learning_rate": 3.945757359623106e-06, "loss": 0.2336, "step": 100225 }, { "epoch": 90.72398190045249, "grad_norm": 1.0861200094223022, "learning_rate": 3.9268208712767005e-06, "loss": 0.2327, "step": 100250 }, { "epoch": 90.74660633484163, "grad_norm": 0.6357161402702332, "learning_rate": 3.907928710702715e-06, "loss": 0.22, "step": 100275 }, { "epoch": 90.76923076923077, "grad_norm": 1.9994890689849854, "learning_rate": 3.88908088968399e-06, "loss": 0.2103, "step": 100300 }, { "epoch": 90.7918552036199, "grad_norm": 1.4905165433883667, "learning_rate": 3.8702774199758145e-06, "loss": 0.245, "step": 100325 }, { "epoch": 90.81447963800905, "grad_norm": 1.434935450553894, "learning_rate": 3.8515183133057155e-06, "loss": 0.2043, "step": 100350 }, { "epoch": 90.83710407239819, "grad_norm": 1.1512585878372192, "learning_rate": 3.832803581373633e-06, "loss": 0.2167, "step": 100375 }, { "epoch": 90.85972850678733, "grad_norm": 0.6571705341339111, "learning_rate": 3.8141332358517657e-06, "loss": 0.1661, "step": 100400 }, { "epoch": 90.88235294117646, "grad_norm": 1.545005202293396, "learning_rate": 3.7955072883846805e-06, "loss": 0.2381, "step": 100425 }, { "epoch": 90.90497737556561, "grad_norm": 1.2027240991592407, "learning_rate": 3.776925750589219e-06, "loss": 0.1711, "step": 100450 }, { "epoch": 90.92760180995475, "grad_norm": 1.356946587562561, "learning_rate": 3.7583886340545514e-06, "loss": 0.2117, "step": 100475 }, { "epoch": 90.9502262443439, "grad_norm": 1.0545406341552734, "learning_rate": 3.739895950342106e-06, "loss": 0.2191, "step": 100500 }, { "epoch": 90.97285067873302, "grad_norm": 1.6496835947036743, "learning_rate": 3.7214477109856394e-06, "loss": 0.2234, "step": 100525 }, { "epoch": 90.99547511312217, "grad_norm": 0.896088182926178, "learning_rate": 3.7030439274911645e-06, "loss": 0.1973, "step": 100550 }, { "epoch": 91.01809954751131, "grad_norm": 0.8725895881652832, "learning_rate": 3.6846846113369745e-06, "loss": 0.2494, "step": 100575 }, { "epoch": 91.04072398190046, "grad_norm": 1.522818922996521, "learning_rate": 3.6663697739736264e-06, "loss": 0.1876, "step": 100600 }, { "epoch": 91.0633484162896, "grad_norm": 1.0002983808517456, "learning_rate": 3.64809942682395e-06, "loss": 0.1711, "step": 100625 }, { "epoch": 91.08597285067873, "grad_norm": 0.6117151379585266, "learning_rate": 3.629873581282988e-06, "loss": 0.2083, "step": 100650 }, { "epoch": 91.10859728506787, "grad_norm": 1.174191951751709, "learning_rate": 3.6116922487180814e-06, "loss": 0.2017, "step": 100675 }, { "epoch": 91.13122171945702, "grad_norm": 1.1282929182052612, "learning_rate": 3.5935554404687594e-06, "loss": 0.2245, "step": 100700 }, { "epoch": 91.15384615384616, "grad_norm": 1.034684419631958, "learning_rate": 3.5754631678468316e-06, "loss": 0.1539, "step": 100725 }, { "epoch": 91.17647058823529, "grad_norm": 0.6202672123908997, "learning_rate": 3.5574154421362714e-06, "loss": 0.2047, "step": 100750 }, { "epoch": 91.19909502262443, "grad_norm": 1.0714919567108154, "learning_rate": 3.5394122745933256e-06, "loss": 0.2335, "step": 100775 }, { "epoch": 91.22171945701358, "grad_norm": 1.7276630401611328, "learning_rate": 3.5214536764464035e-06, "loss": 0.2676, "step": 100800 }, { "epoch": 91.24434389140272, "grad_norm": 1.0312620401382446, "learning_rate": 3.503539658896162e-06, "loss": 0.2036, "step": 100825 }, { "epoch": 91.26696832579185, "grad_norm": 1.6993221044540405, "learning_rate": 3.4856702331154144e-06, "loss": 0.2018, "step": 100850 }, { "epoch": 91.289592760181, "grad_norm": 0.8954603672027588, "learning_rate": 3.467845410249187e-06, "loss": 0.2035, "step": 100875 }, { "epoch": 91.31221719457014, "grad_norm": 1.3395525217056274, "learning_rate": 3.4507755530394504e-06, "loss": 0.2032, "step": 100900 }, { "epoch": 91.33484162895928, "grad_norm": 0.5515787601470947, "learning_rate": 3.4330381841086163e-06, "loss": 0.1682, "step": 100925 }, { "epoch": 91.35746606334841, "grad_norm": 0.8045617341995239, "learning_rate": 3.415345450918494e-06, "loss": 0.177, "step": 100950 }, { "epoch": 91.38009049773756, "grad_norm": 0.974839448928833, "learning_rate": 3.3976973645038735e-06, "loss": 0.2034, "step": 100975 }, { "epoch": 91.4027149321267, "grad_norm": 1.4894038438796997, "learning_rate": 3.3800939358717584e-06, "loss": 0.1659, "step": 101000 }, { "epoch": 91.42533936651584, "grad_norm": 0.8471037745475769, "learning_rate": 3.362535176001249e-06, "loss": 0.1936, "step": 101025 }, { "epoch": 91.44796380090497, "grad_norm": 0.7326423525810242, "learning_rate": 3.345021095843603e-06, "loss": 0.1763, "step": 101050 }, { "epoch": 91.47058823529412, "grad_norm": 1.0680112838745117, "learning_rate": 3.3275517063222067e-06, "loss": 0.2155, "step": 101075 }, { "epoch": 91.49321266968326, "grad_norm": 1.4577637910842896, "learning_rate": 3.310127018332595e-06, "loss": 0.2404, "step": 101100 }, { "epoch": 91.5158371040724, "grad_norm": 1.1680512428283691, "learning_rate": 3.2927470427423914e-06, "loss": 0.2226, "step": 101125 }, { "epoch": 91.53846153846153, "grad_norm": 0.7334850430488586, "learning_rate": 3.2754117903913498e-06, "loss": 0.2151, "step": 101150 }, { "epoch": 91.56108597285068, "grad_norm": 1.0506032705307007, "learning_rate": 3.2581212720913464e-06, "loss": 0.2141, "step": 101175 }, { "epoch": 91.58371040723982, "grad_norm": 0.7149414420127869, "learning_rate": 3.240875498626305e-06, "loss": 0.2182, "step": 101200 }, { "epoch": 91.60633484162896, "grad_norm": 0.6269099116325378, "learning_rate": 3.2236744807523058e-06, "loss": 0.2246, "step": 101225 }, { "epoch": 91.6289592760181, "grad_norm": 0.9679759740829468, "learning_rate": 3.2065182291974744e-06, "loss": 0.1789, "step": 101250 }, { "epoch": 91.65158371040724, "grad_norm": 0.9241008758544922, "learning_rate": 3.189406754662027e-06, "loss": 0.2289, "step": 101275 }, { "epoch": 91.67420814479638, "grad_norm": 1.6055413484573364, "learning_rate": 3.172340067818252e-06, "loss": 0.1855, "step": 101300 }, { "epoch": 91.69683257918552, "grad_norm": 0.8490349650382996, "learning_rate": 3.1553181793105092e-06, "loss": 0.2092, "step": 101325 }, { "epoch": 91.71945701357465, "grad_norm": 1.3778477907180786, "learning_rate": 3.1383410997552067e-06, "loss": 0.1682, "step": 101350 }, { "epoch": 91.7420814479638, "grad_norm": 0.8499693870544434, "learning_rate": 3.1214088397408327e-06, "loss": 0.2396, "step": 101375 }, { "epoch": 91.76470588235294, "grad_norm": 1.6106404066085815, "learning_rate": 3.104521409827873e-06, "loss": 0.2095, "step": 101400 }, { "epoch": 91.78733031674209, "grad_norm": 1.2034767866134644, "learning_rate": 3.087678820548911e-06, "loss": 0.1982, "step": 101425 }, { "epoch": 91.80995475113122, "grad_norm": 0.6358681321144104, "learning_rate": 3.0708810824085107e-06, "loss": 0.26, "step": 101450 }, { "epoch": 91.83257918552036, "grad_norm": 1.65291428565979, "learning_rate": 3.054128205883308e-06, "loss": 0.2222, "step": 101475 }, { "epoch": 91.8552036199095, "grad_norm": 1.0159928798675537, "learning_rate": 3.037420201421911e-06, "loss": 0.2816, "step": 101500 }, { "epoch": 91.87782805429865, "grad_norm": 1.630953073501587, "learning_rate": 3.020757079445002e-06, "loss": 0.1828, "step": 101525 }, { "epoch": 91.90045248868778, "grad_norm": 0.5932508707046509, "learning_rate": 3.0041388503452e-06, "loss": 0.2103, "step": 101550 }, { "epoch": 91.92307692307692, "grad_norm": 0.9708763957023621, "learning_rate": 2.9875655244871984e-06, "loss": 0.2741, "step": 101575 }, { "epoch": 91.94570135746606, "grad_norm": 1.2914602756500244, "learning_rate": 2.971037112207619e-06, "loss": 0.2661, "step": 101600 }, { "epoch": 91.96832579185521, "grad_norm": 1.1061433553695679, "learning_rate": 2.9545536238151172e-06, "loss": 0.1836, "step": 101625 }, { "epoch": 91.99095022624434, "grad_norm": 1.001975178718567, "learning_rate": 2.9381150695902937e-06, "loss": 0.2847, "step": 101650 }, { "epoch": 92.01357466063348, "grad_norm": 1.711601972579956, "learning_rate": 2.9217214597857725e-06, "loss": 0.1761, "step": 101675 }, { "epoch": 92.03619909502262, "grad_norm": 0.42898303270339966, "learning_rate": 2.9053728046260825e-06, "loss": 0.183, "step": 101700 }, { "epoch": 92.05882352941177, "grad_norm": 1.3710592985153198, "learning_rate": 2.889069114307785e-06, "loss": 0.2273, "step": 101725 }, { "epoch": 92.08144796380091, "grad_norm": 0.9163620471954346, "learning_rate": 2.8728103989993283e-06, "loss": 0.2118, "step": 101750 }, { "epoch": 92.10407239819004, "grad_norm": 1.4323467016220093, "learning_rate": 2.85659666884116e-06, "loss": 0.1969, "step": 101775 }, { "epoch": 92.12669683257919, "grad_norm": 0.9811010956764221, "learning_rate": 2.840427933945649e-06, "loss": 0.1962, "step": 101800 }, { "epoch": 92.14932126696833, "grad_norm": 3.9413015842437744, "learning_rate": 2.8243042043971126e-06, "loss": 0.2551, "step": 101825 }, { "epoch": 92.17194570135747, "grad_norm": 1.1939562559127808, "learning_rate": 2.808225490251781e-06, "loss": 0.254, "step": 101850 }, { "epoch": 92.1945701357466, "grad_norm": 0.7034265398979187, "learning_rate": 2.7921918015378324e-06, "loss": 0.1436, "step": 101875 }, { "epoch": 92.21719457013575, "grad_norm": 1.3420288562774658, "learning_rate": 2.776203148255335e-06, "loss": 0.1856, "step": 101900 }, { "epoch": 92.23981900452489, "grad_norm": 1.0718244314193726, "learning_rate": 2.7602595403762946e-06, "loss": 0.2213, "step": 101925 }, { "epoch": 92.26244343891403, "grad_norm": 0.8896125555038452, "learning_rate": 2.744360987844599e-06, "loss": 0.1921, "step": 101950 }, { "epoch": 92.28506787330316, "grad_norm": 1.383668303489685, "learning_rate": 2.728507500576074e-06, "loss": 0.2313, "step": 101975 }, { "epoch": 92.3076923076923, "grad_norm": 0.961441695690155, "learning_rate": 2.712699088458378e-06, "loss": 0.1863, "step": 102000 }, { "epoch": 92.33031674208145, "grad_norm": 1.8158527612686157, "learning_rate": 2.696935761351124e-06, "loss": 0.1983, "step": 102025 }, { "epoch": 92.3529411764706, "grad_norm": 0.8649600148200989, "learning_rate": 2.6812175290857466e-06, "loss": 0.2421, "step": 102050 }, { "epoch": 92.37556561085972, "grad_norm": 1.4010592699050903, "learning_rate": 2.665544401465597e-06, "loss": 0.2068, "step": 102075 }, { "epoch": 92.39819004524887, "grad_norm": 0.8777248859405518, "learning_rate": 2.6499163882658713e-06, "loss": 0.2213, "step": 102100 }, { "epoch": 92.42081447963801, "grad_norm": 1.2792974710464478, "learning_rate": 2.6343334992336485e-06, "loss": 0.2488, "step": 102125 }, { "epoch": 92.44343891402715, "grad_norm": 0.7411015033721924, "learning_rate": 2.618795744087829e-06, "loss": 0.1788, "step": 102150 }, { "epoch": 92.46606334841628, "grad_norm": 0.7130656838417053, "learning_rate": 2.603303132519219e-06, "loss": 0.2025, "step": 102175 }, { "epoch": 92.48868778280543, "grad_norm": 1.1229000091552734, "learning_rate": 2.587855674190398e-06, "loss": 0.2559, "step": 102200 }, { "epoch": 92.51131221719457, "grad_norm": 1.2911707162857056, "learning_rate": 2.572453378735842e-06, "loss": 0.2007, "step": 102225 }, { "epoch": 92.53393665158372, "grad_norm": 0.9338110685348511, "learning_rate": 2.5570962557618508e-06, "loss": 0.206, "step": 102250 }, { "epoch": 92.55656108597285, "grad_norm": 1.5435631275177002, "learning_rate": 2.541784314846512e-06, "loss": 0.1792, "step": 102275 }, { "epoch": 92.57918552036199, "grad_norm": 1.8651821613311768, "learning_rate": 2.526517565539796e-06, "loss": 0.2104, "step": 102300 }, { "epoch": 92.60180995475113, "grad_norm": 1.2022444009780884, "learning_rate": 2.5112960173634096e-06, "loss": 0.2904, "step": 102325 }, { "epoch": 92.62443438914028, "grad_norm": 0.6954711079597473, "learning_rate": 2.496119679810943e-06, "loss": 0.2138, "step": 102350 }, { "epoch": 92.6470588235294, "grad_norm": 0.6750567555427551, "learning_rate": 2.480988562347741e-06, "loss": 0.2131, "step": 102375 }, { "epoch": 92.66968325791855, "grad_norm": 0.5733470320701599, "learning_rate": 2.4659026744109716e-06, "loss": 0.1843, "step": 102400 }, { "epoch": 92.6923076923077, "grad_norm": 0.8600246906280518, "learning_rate": 2.4508620254095666e-06, "loss": 0.2016, "step": 102425 }, { "epoch": 92.71493212669684, "grad_norm": 1.2240346670150757, "learning_rate": 2.4358666247242724e-06, "loss": 0.2224, "step": 102450 }, { "epoch": 92.73755656108597, "grad_norm": 1.3388067483901978, "learning_rate": 2.420916481707591e-06, "loss": 0.2066, "step": 102475 }, { "epoch": 92.76018099547511, "grad_norm": 0.9193198680877686, "learning_rate": 2.4060116056838135e-06, "loss": 0.2052, "step": 102500 }, { "epoch": 92.78280542986425, "grad_norm": 1.349044919013977, "learning_rate": 2.3911520059489792e-06, "loss": 0.2023, "step": 102525 }, { "epoch": 92.8054298642534, "grad_norm": 1.1145248413085938, "learning_rate": 2.376337691770924e-06, "loss": 0.2149, "step": 102550 }, { "epoch": 92.82805429864253, "grad_norm": 0.8239882588386536, "learning_rate": 2.3615686723891996e-06, "loss": 0.1715, "step": 102575 }, { "epoch": 92.85067873303167, "grad_norm": 1.3007310628890991, "learning_rate": 2.346844957015129e-06, "loss": 0.1835, "step": 102600 }, { "epoch": 92.87330316742081, "grad_norm": 0.9309889674186707, "learning_rate": 2.332166554831774e-06, "loss": 0.196, "step": 102625 }, { "epoch": 92.89592760180996, "grad_norm": 0.5905202627182007, "learning_rate": 2.317533474993938e-06, "loss": 0.1931, "step": 102650 }, { "epoch": 92.91855203619909, "grad_norm": 0.9990751147270203, "learning_rate": 2.3029457266281525e-06, "loss": 0.2116, "step": 102675 }, { "epoch": 92.94117647058823, "grad_norm": 0.7694403529167175, "learning_rate": 2.288403318832699e-06, "loss": 0.2259, "step": 102700 }, { "epoch": 92.96380090497738, "grad_norm": 0.9315254092216492, "learning_rate": 2.2739062606775215e-06, "loss": 0.2116, "step": 102725 }, { "epoch": 92.98642533936652, "grad_norm": 1.2433048486709595, "learning_rate": 2.259454561204363e-06, "loss": 0.2442, "step": 102750 }, { "epoch": 93.00904977375566, "grad_norm": 4.522818088531494, "learning_rate": 2.2450482294265883e-06, "loss": 0.1914, "step": 102775 }, { "epoch": 93.03167420814479, "grad_norm": 1.2282065153121948, "learning_rate": 2.2306872743293513e-06, "loss": 0.2266, "step": 102800 }, { "epoch": 93.05429864253394, "grad_norm": 0.9609178900718689, "learning_rate": 2.2163717048694377e-06, "loss": 0.1945, "step": 102825 }, { "epoch": 93.07692307692308, "grad_norm": 0.8740372061729431, "learning_rate": 2.202101529975381e-06, "loss": 0.2537, "step": 102850 }, { "epoch": 93.09954751131222, "grad_norm": 0.8128977417945862, "learning_rate": 2.1878767585473358e-06, "loss": 0.1931, "step": 102875 }, { "epoch": 93.12217194570135, "grad_norm": 1.2922780513763428, "learning_rate": 2.173697399457222e-06, "loss": 0.1717, "step": 102900 }, { "epoch": 93.1447963800905, "grad_norm": 0.6901913285255432, "learning_rate": 2.1595634615485495e-06, "loss": 0.1513, "step": 102925 }, { "epoch": 93.16742081447964, "grad_norm": 0.951432466506958, "learning_rate": 2.145474953636575e-06, "loss": 0.2563, "step": 102950 }, { "epoch": 93.19004524886878, "grad_norm": 0.9803032279014587, "learning_rate": 2.1319927347387108e-06, "loss": 0.2106, "step": 102975 }, { "epoch": 93.21266968325791, "grad_norm": 1.376770257949829, "learning_rate": 2.1179932950829315e-06, "loss": 0.189, "step": 103000 }, { "epoch": 93.23529411764706, "grad_norm": 0.7428179979324341, "learning_rate": 2.1040393113508356e-06, "loss": 0.1792, "step": 103025 }, { "epoch": 93.2579185520362, "grad_norm": 0.7498146295547485, "learning_rate": 2.0901307922453787e-06, "loss": 0.1746, "step": 103050 }, { "epoch": 93.28054298642535, "grad_norm": 1.1039135456085205, "learning_rate": 2.0762677464412127e-06, "loss": 0.1834, "step": 103075 }, { "epoch": 93.30316742081448, "grad_norm": 3.8603408336639404, "learning_rate": 2.0624501825845964e-06, "loss": 0.2071, "step": 103100 }, { "epoch": 93.32579185520362, "grad_norm": 0.9220117926597595, "learning_rate": 2.048678109293453e-06, "loss": 0.2253, "step": 103125 }, { "epoch": 93.34841628959276, "grad_norm": 1.001235008239746, "learning_rate": 2.0349515351572865e-06, "loss": 0.2116, "step": 103150 }, { "epoch": 93.3710407239819, "grad_norm": 1.4093916416168213, "learning_rate": 2.0212704687372736e-06, "loss": 0.1671, "step": 103175 }, { "epoch": 93.39366515837104, "grad_norm": 1.044776439666748, "learning_rate": 2.007634918566173e-06, "loss": 0.2012, "step": 103200 }, { "epoch": 93.41628959276018, "grad_norm": 1.1573245525360107, "learning_rate": 1.9940448931483803e-06, "loss": 0.2196, "step": 103225 }, { "epoch": 93.43891402714932, "grad_norm": 0.8817508220672607, "learning_rate": 1.980500400959875e-06, "loss": 0.2047, "step": 103250 }, { "epoch": 93.46153846153847, "grad_norm": 1.0110949277877808, "learning_rate": 1.967001450448258e-06, "loss": 0.1694, "step": 103275 }, { "epoch": 93.4841628959276, "grad_norm": 1.1973719596862793, "learning_rate": 1.953548050032694e-06, "loss": 0.2134, "step": 103300 }, { "epoch": 93.50678733031674, "grad_norm": 1.0478057861328125, "learning_rate": 1.940140208103996e-06, "loss": 0.2134, "step": 103325 }, { "epoch": 93.52941176470588, "grad_norm": 0.9741623401641846, "learning_rate": 1.9267779330244926e-06, "loss": 0.2097, "step": 103350 }, { "epoch": 93.55203619909503, "grad_norm": 4.4383392333984375, "learning_rate": 1.913461233128158e-06, "loss": 0.2352, "step": 103375 }, { "epoch": 93.57466063348416, "grad_norm": 0.9028875827789307, "learning_rate": 1.900190116720482e-06, "loss": 0.218, "step": 103400 }, { "epoch": 93.5972850678733, "grad_norm": 0.6334171295166016, "learning_rate": 1.8869645920785854e-06, "loss": 0.187, "step": 103425 }, { "epoch": 93.61990950226244, "grad_norm": 1.2982796430587769, "learning_rate": 1.8737846674510947e-06, "loss": 0.2124, "step": 103450 }, { "epoch": 93.64253393665159, "grad_norm": 0.9085620641708374, "learning_rate": 1.8606503510582348e-06, "loss": 0.1869, "step": 103475 }, { "epoch": 93.66515837104072, "grad_norm": 1.6037293672561646, "learning_rate": 1.8475616510917695e-06, "loss": 0.1942, "step": 103500 }, { "epoch": 93.68778280542986, "grad_norm": 1.0532147884368896, "learning_rate": 1.8345185757150355e-06, "loss": 0.2109, "step": 103525 }, { "epoch": 93.710407239819, "grad_norm": 1.4603524208068848, "learning_rate": 1.8215211330628587e-06, "loss": 0.2755, "step": 103550 }, { "epoch": 93.73303167420815, "grad_norm": 1.0279241800308228, "learning_rate": 1.8085693312416716e-06, "loss": 0.2452, "step": 103575 }, { "epoch": 93.75565610859728, "grad_norm": 0.8479742407798767, "learning_rate": 1.7956631783293873e-06, "loss": 0.215, "step": 103600 }, { "epoch": 93.77828054298642, "grad_norm": 1.001603603363037, "learning_rate": 1.7828026823754921e-06, "loss": 0.2412, "step": 103625 }, { "epoch": 93.80090497737557, "grad_norm": 1.0904514789581299, "learning_rate": 1.769987851400953e-06, "loss": 0.178, "step": 103650 }, { "epoch": 93.82352941176471, "grad_norm": 1.0738730430603027, "learning_rate": 1.7572186933982936e-06, "loss": 0.2173, "step": 103675 }, { "epoch": 93.84615384615384, "grad_norm": 0.7459914088249207, "learning_rate": 1.7444952163315179e-06, "loss": 0.1983, "step": 103700 }, { "epoch": 93.86877828054298, "grad_norm": 0.5410562753677368, "learning_rate": 1.7318174281361785e-06, "loss": 0.2264, "step": 103725 }, { "epoch": 93.89140271493213, "grad_norm": 0.8717644214630127, "learning_rate": 1.719185336719292e-06, "loss": 0.2407, "step": 103750 }, { "epoch": 93.91402714932127, "grad_norm": 0.7874726057052612, "learning_rate": 1.7065989499594063e-06, "loss": 0.2323, "step": 103775 }, { "epoch": 93.9366515837104, "grad_norm": 0.8878973126411438, "learning_rate": 1.6940582757065334e-06, "loss": 0.1853, "step": 103800 }, { "epoch": 93.95927601809954, "grad_norm": 1.1652942895889282, "learning_rate": 1.6815633217822088e-06, "loss": 0.205, "step": 103825 }, { "epoch": 93.98190045248869, "grad_norm": 0.7771767377853394, "learning_rate": 1.6691140959794153e-06, "loss": 0.185, "step": 103850 }, { "epoch": 94.00452488687783, "grad_norm": 1.6624658107757568, "learning_rate": 1.6567106060626583e-06, "loss": 0.226, "step": 103875 }, { "epoch": 94.02714932126698, "grad_norm": 0.8755984902381897, "learning_rate": 1.6443528597678835e-06, "loss": 0.1643, "step": 103900 }, { "epoch": 94.0497737556561, "grad_norm": 1.9576427936553955, "learning_rate": 1.6320408648025085e-06, "loss": 0.2071, "step": 103925 }, { "epoch": 94.07239819004525, "grad_norm": 1.1330572366714478, "learning_rate": 1.6197746288454494e-06, "loss": 0.2119, "step": 103950 }, { "epoch": 94.09502262443439, "grad_norm": 1.2345455884933472, "learning_rate": 1.6075541595470364e-06, "loss": 0.2549, "step": 103975 }, { "epoch": 94.11764705882354, "grad_norm": 0.7058464884757996, "learning_rate": 1.595379464529098e-06, "loss": 0.1734, "step": 104000 }, { "epoch": 94.14027149321267, "grad_norm": 0.5983518958091736, "learning_rate": 1.5832505513848763e-06, "loss": 0.1905, "step": 104025 }, { "epoch": 94.16289592760181, "grad_norm": 0.9150748252868652, "learning_rate": 1.571167427679096e-06, "loss": 0.1965, "step": 104050 }, { "epoch": 94.18552036199095, "grad_norm": 1.6826242208480835, "learning_rate": 1.5591301009478779e-06, "loss": 0.1778, "step": 104075 }, { "epoch": 94.2081447963801, "grad_norm": 1.0981590747833252, "learning_rate": 1.5471385786988339e-06, "loss": 0.2164, "step": 104100 }, { "epoch": 94.23076923076923, "grad_norm": 1.714074730873108, "learning_rate": 1.5351928684109644e-06, "loss": 0.1888, "step": 104125 }, { "epoch": 94.25339366515837, "grad_norm": 0.9475539922714233, "learning_rate": 1.523292977534718e-06, "loss": 0.1931, "step": 104150 }, { "epoch": 94.27601809954751, "grad_norm": 1.521440029144287, "learning_rate": 1.511438913491958e-06, "loss": 0.2342, "step": 104175 }, { "epoch": 94.29864253393666, "grad_norm": 1.6483838558197021, "learning_rate": 1.4996306836759787e-06, "loss": 0.2582, "step": 104200 }, { "epoch": 94.32126696832579, "grad_norm": 1.1247897148132324, "learning_rate": 1.4878682954514637e-06, "loss": 0.1604, "step": 104225 }, { "epoch": 94.34389140271493, "grad_norm": 0.9208066463470459, "learning_rate": 1.4761517561545283e-06, "loss": 0.1853, "step": 104250 }, { "epoch": 94.36651583710407, "grad_norm": 3.9537439346313477, "learning_rate": 1.4644810730926853e-06, "loss": 0.2231, "step": 104275 }, { "epoch": 94.38914027149322, "grad_norm": 0.6742361187934875, "learning_rate": 1.4528562535448456e-06, "loss": 0.1867, "step": 104300 }, { "epoch": 94.41176470588235, "grad_norm": 1.2406797409057617, "learning_rate": 1.441277304761318e-06, "loss": 0.1764, "step": 104325 }, { "epoch": 94.43438914027149, "grad_norm": 0.8202566504478455, "learning_rate": 1.429744233963792e-06, "loss": 0.2323, "step": 104350 }, { "epoch": 94.45701357466064, "grad_norm": 1.2112210988998413, "learning_rate": 1.418257048345356e-06, "loss": 0.1882, "step": 104375 }, { "epoch": 94.47963800904978, "grad_norm": 1.0226188898086548, "learning_rate": 1.4068157550704868e-06, "loss": 0.2021, "step": 104400 }, { "epoch": 94.50226244343891, "grad_norm": 1.8163559436798096, "learning_rate": 1.3954203612750014e-06, "loss": 0.1935, "step": 104425 }, { "epoch": 94.52488687782805, "grad_norm": 1.208831548690796, "learning_rate": 1.3840708740661482e-06, "loss": 0.2259, "step": 104450 }, { "epoch": 94.5475113122172, "grad_norm": 1.374042272567749, "learning_rate": 1.3727673005224815e-06, "loss": 0.2432, "step": 104475 }, { "epoch": 94.57013574660634, "grad_norm": 1.003490924835205, "learning_rate": 1.3615096476939702e-06, "loss": 0.1726, "step": 104500 }, { "epoch": 94.59276018099547, "grad_norm": 0.8769525289535522, "learning_rate": 1.3502979226019062e-06, "loss": 0.2423, "step": 104525 }, { "epoch": 94.61538461538461, "grad_norm": 1.1709707975387573, "learning_rate": 1.3391321322389708e-06, "loss": 0.1983, "step": 104550 }, { "epoch": 94.63800904977376, "grad_norm": 0.6149175763130188, "learning_rate": 1.3280122835691604e-06, "loss": 0.1944, "step": 104575 }, { "epoch": 94.6606334841629, "grad_norm": 1.0593194961547852, "learning_rate": 1.3169383835278435e-06, "loss": 0.2416, "step": 104600 }, { "epoch": 94.68325791855203, "grad_norm": 1.1648855209350586, "learning_rate": 1.3059104390217206e-06, "loss": 0.2337, "step": 104625 }, { "epoch": 94.70588235294117, "grad_norm": 0.9153949022293091, "learning_rate": 1.2949284569288398e-06, "loss": 0.2037, "step": 104650 }, { "epoch": 94.72850678733032, "grad_norm": 0.9934732913970947, "learning_rate": 1.2839924440985722e-06, "loss": 0.1683, "step": 104675 }, { "epoch": 94.75113122171946, "grad_norm": 0.7712545990943909, "learning_rate": 1.2731024073516117e-06, "loss": 0.2012, "step": 104700 }, { "epoch": 94.77375565610859, "grad_norm": 0.9374544024467468, "learning_rate": 1.2622583534800002e-06, "loss": 0.1571, "step": 104725 }, { "epoch": 94.79638009049773, "grad_norm": 1.30306875705719, "learning_rate": 1.2514602892470777e-06, "loss": 0.2407, "step": 104750 }, { "epoch": 94.81900452488688, "grad_norm": 1.0117579698562622, "learning_rate": 1.2407082213875069e-06, "loss": 0.2223, "step": 104775 }, { "epoch": 94.84162895927602, "grad_norm": 0.8421927094459534, "learning_rate": 1.2300021566072905e-06, "loss": 0.1925, "step": 104800 }, { "epoch": 94.86425339366515, "grad_norm": 1.0489987134933472, "learning_rate": 1.2193421015836869e-06, "loss": 0.2146, "step": 104825 }, { "epoch": 94.8868778280543, "grad_norm": 0.6136251091957092, "learning_rate": 1.2087280629653028e-06, "loss": 0.2087, "step": 104850 }, { "epoch": 94.90950226244344, "grad_norm": 1.6450775861740112, "learning_rate": 1.1981600473720182e-06, "loss": 0.241, "step": 104875 }, { "epoch": 94.93212669683258, "grad_norm": 1.319272756576538, "learning_rate": 1.1876380613950271e-06, "loss": 0.1899, "step": 104900 }, { "epoch": 94.95475113122171, "grad_norm": 0.9890087842941284, "learning_rate": 1.177162111596805e-06, "loss": 0.2008, "step": 104925 }, { "epoch": 94.97737556561086, "grad_norm": 1.0454882383346558, "learning_rate": 1.166732204511134e-06, "loss": 0.2533, "step": 104950 }, { "epoch": 95.0, "grad_norm": 1.0466210842132568, "learning_rate": 1.156348346643035e-06, "loss": 0.1704, "step": 104975 }, { "epoch": 95.02262443438914, "grad_norm": 1.2732752561569214, "learning_rate": 1.1460105444688533e-06, "loss": 0.1997, "step": 105000 }, { "epoch": 95.04524886877829, "grad_norm": 0.7997806072235107, "learning_rate": 1.1357188044361976e-06, "loss": 0.1741, "step": 105025 }, { "epoch": 95.06787330316742, "grad_norm": 0.9983102679252625, "learning_rate": 1.1258820752262033e-06, "loss": 0.1968, "step": 105050 }, { "epoch": 95.09049773755656, "grad_norm": 1.702331304550171, "learning_rate": 1.115680635584129e-06, "loss": 0.2113, "step": 105075 }, { "epoch": 95.1131221719457, "grad_norm": 1.430219054222107, "learning_rate": 1.105525277000091e-06, "loss": 0.1525, "step": 105100 }, { "epoch": 95.13574660633485, "grad_norm": 1.2468103170394897, "learning_rate": 1.0954160058079143e-06, "loss": 0.269, "step": 105125 }, { "epoch": 95.15837104072398, "grad_norm": 0.8768815398216248, "learning_rate": 1.0853528283126634e-06, "loss": 0.3236, "step": 105150 }, { "epoch": 95.18099547511312, "grad_norm": 0.8579899072647095, "learning_rate": 1.075735548693593e-06, "loss": 0.2739, "step": 105175 }, { "epoch": 95.20361990950227, "grad_norm": 1.2456624507904053, "learning_rate": 1.0657627330239893e-06, "loss": 0.1553, "step": 105200 }, { "epoch": 95.22624434389141, "grad_norm": 1.5865308046340942, "learning_rate": 1.0558360295458173e-06, "loss": 0.1892, "step": 105225 }, { "epoch": 95.24886877828054, "grad_norm": 0.8442748785018921, "learning_rate": 1.0459554444502998e-06, "loss": 0.1483, "step": 105250 }, { "epoch": 95.27149321266968, "grad_norm": 0.8328303694725037, "learning_rate": 1.0361209838998574e-06, "loss": 0.1628, "step": 105275 }, { "epoch": 95.29411764705883, "grad_norm": 1.0149391889572144, "learning_rate": 1.0263326540281752e-06, "loss": 0.2416, "step": 105300 }, { "epoch": 95.31674208144797, "grad_norm": 1.6882009506225586, "learning_rate": 1.0165904609401533e-06, "loss": 0.2808, "step": 105325 }, { "epoch": 95.3393665158371, "grad_norm": 1.6237668991088867, "learning_rate": 1.0068944107119226e-06, "loss": 0.191, "step": 105350 }, { "epoch": 95.36199095022624, "grad_norm": 0.9608161449432373, "learning_rate": 9.97244509390821e-07, "loss": 0.1574, "step": 105375 }, { "epoch": 95.38461538461539, "grad_norm": 0.7665073275566101, "learning_rate": 9.87640762995434e-07, "loss": 0.1816, "step": 105400 }, { "epoch": 95.40723981900453, "grad_norm": 1.0347704887390137, "learning_rate": 9.780831775155206e-07, "loss": 0.228, "step": 105425 }, { "epoch": 95.42986425339366, "grad_norm": 1.0157124996185303, "learning_rate": 9.685717589120874e-07, "loss": 0.2145, "step": 105450 }, { "epoch": 95.4524886877828, "grad_norm": 1.4060900211334229, "learning_rate": 9.59106513117322e-07, "loss": 0.1951, "step": 105475 }, { "epoch": 95.47511312217195, "grad_norm": 0.8281680941581726, "learning_rate": 9.496874460346276e-07, "loss": 0.1727, "step": 105500 }, { "epoch": 95.49773755656109, "grad_norm": 1.2150906324386597, "learning_rate": 9.403145635385884e-07, "loss": 0.2419, "step": 105525 }, { "epoch": 95.52036199095022, "grad_norm": 0.8985528349876404, "learning_rate": 9.309878714750113e-07, "loss": 0.146, "step": 105550 }, { "epoch": 95.54298642533936, "grad_norm": 0.9663586616516113, "learning_rate": 9.2170737566086e-07, "loss": 0.2395, "step": 105575 }, { "epoch": 95.56561085972851, "grad_norm": 0.9380387663841248, "learning_rate": 9.124730818843295e-07, "loss": 0.1957, "step": 105600 }, { "epoch": 95.58823529411765, "grad_norm": 0.9491564035415649, "learning_rate": 9.032849959047544e-07, "loss": 0.2082, "step": 105625 }, { "epoch": 95.61085972850678, "grad_norm": 0.9426025152206421, "learning_rate": 8.941431234526925e-07, "loss": 0.189, "step": 105650 }, { "epoch": 95.63348416289593, "grad_norm": 1.1982818841934204, "learning_rate": 8.850474702298327e-07, "loss": 0.1886, "step": 105675 }, { "epoch": 95.65610859728507, "grad_norm": 1.0873892307281494, "learning_rate": 8.759980419090706e-07, "loss": 0.1591, "step": 105700 }, { "epoch": 95.67873303167421, "grad_norm": 1.0176407098770142, "learning_rate": 8.669948441344665e-07, "loss": 0.2202, "step": 105725 }, { "epoch": 95.70135746606334, "grad_norm": 0.5090556144714355, "learning_rate": 8.580378825212369e-07, "loss": 0.2594, "step": 105750 }, { "epoch": 95.72398190045249, "grad_norm": 1.0242820978164673, "learning_rate": 8.491271626557716e-07, "loss": 0.203, "step": 105775 }, { "epoch": 95.74660633484163, "grad_norm": 0.6912797093391418, "learning_rate": 8.402626900956083e-07, "loss": 0.1706, "step": 105800 }, { "epoch": 95.76923076923077, "grad_norm": 1.1983668804168701, "learning_rate": 8.314444703694495e-07, "loss": 0.1723, "step": 105825 }, { "epoch": 95.7918552036199, "grad_norm": 0.9142023324966431, "learning_rate": 8.226725089771541e-07, "loss": 0.2184, "step": 105850 }, { "epoch": 95.81447963800905, "grad_norm": 1.0519486665725708, "learning_rate": 8.139468113897291e-07, "loss": 0.1831, "step": 105875 }, { "epoch": 95.83710407239819, "grad_norm": 1.305741310119629, "learning_rate": 8.052673830493045e-07, "loss": 0.2034, "step": 105900 }, { "epoch": 95.85972850678733, "grad_norm": 0.7995584011077881, "learning_rate": 7.966342293691835e-07, "loss": 0.1994, "step": 105925 }, { "epoch": 95.88235294117646, "grad_norm": 1.2197544574737549, "learning_rate": 7.880473557337841e-07, "loss": 0.1965, "step": 105950 }, { "epoch": 95.90497737556561, "grad_norm": 1.1358721256256104, "learning_rate": 7.795067674986805e-07, "loss": 0.2104, "step": 105975 }, { "epoch": 95.92760180995475, "grad_norm": 1.3733657598495483, "learning_rate": 7.710124699905618e-07, "loss": 0.2225, "step": 106000 }, { "epoch": 95.9502262443439, "grad_norm": 0.9826918840408325, "learning_rate": 7.625644685072651e-07, "loss": 0.2207, "step": 106025 }, { "epoch": 95.97285067873302, "grad_norm": 1.742278814315796, "learning_rate": 7.541627683177343e-07, "loss": 0.2163, "step": 106050 }, { "epoch": 95.99547511312217, "grad_norm": 0.9497316479682922, "learning_rate": 7.458073746620357e-07, "loss": 0.203, "step": 106075 }, { "epoch": 96.01809954751131, "grad_norm": 1.1039804220199585, "learning_rate": 7.374982927513679e-07, "loss": 0.2386, "step": 106100 }, { "epoch": 96.04072398190046, "grad_norm": 0.8502349257469177, "learning_rate": 7.292355277680434e-07, "loss": 0.2104, "step": 106125 }, { "epoch": 96.0633484162896, "grad_norm": 1.3425025939941406, "learning_rate": 7.210190848654734e-07, "loss": 0.1992, "step": 106150 }, { "epoch": 96.08597285067873, "grad_norm": 0.8537681698799133, "learning_rate": 7.128489691681921e-07, "loss": 0.1741, "step": 106175 }, { "epoch": 96.10859728506787, "grad_norm": 0.8187534809112549, "learning_rate": 7.0472518577184e-07, "loss": 0.2135, "step": 106200 }, { "epoch": 96.13122171945702, "grad_norm": 3.6513512134552, "learning_rate": 6.966477397431475e-07, "loss": 0.208, "step": 106225 }, { "epoch": 96.15384615384616, "grad_norm": 0.49796590209007263, "learning_rate": 6.886166361199514e-07, "loss": 0.1867, "step": 106250 }, { "epoch": 96.17647058823529, "grad_norm": 0.8673714399337769, "learning_rate": 6.806318799111949e-07, "loss": 0.2391, "step": 106275 }, { "epoch": 96.19909502262443, "grad_norm": 1.4049699306488037, "learning_rate": 6.726934760968944e-07, "loss": 0.2303, "step": 106300 }, { "epoch": 96.22171945701358, "grad_norm": 1.2969492673873901, "learning_rate": 6.648014296281895e-07, "loss": 0.1813, "step": 106325 }, { "epoch": 96.24434389140272, "grad_norm": 0.7551602125167847, "learning_rate": 6.569557454272595e-07, "loss": 0.2027, "step": 106350 }, { "epoch": 96.26696832579185, "grad_norm": 1.1955355405807495, "learning_rate": 6.491564283874234e-07, "loss": 0.2105, "step": 106375 }, { "epoch": 96.289592760181, "grad_norm": 1.5181422233581543, "learning_rate": 6.41403483373032e-07, "loss": 0.1833, "step": 106400 }, { "epoch": 96.31221719457014, "grad_norm": 1.6462416648864746, "learning_rate": 6.33696915219542e-07, "loss": 0.2627, "step": 106425 }, { "epoch": 96.33484162895928, "grad_norm": 1.1370958089828491, "learning_rate": 6.260367287334755e-07, "loss": 0.1615, "step": 106450 }, { "epoch": 96.35746606334841, "grad_norm": 1.0515254735946655, "learning_rate": 6.184229286924358e-07, "loss": 0.1848, "step": 106475 }, { "epoch": 96.38009049773756, "grad_norm": 0.6994163990020752, "learning_rate": 6.108555198450826e-07, "loss": 0.2235, "step": 106500 }, { "epoch": 96.4027149321267, "grad_norm": 1.143445372581482, "learning_rate": 6.033345069111489e-07, "loss": 0.1824, "step": 106525 }, { "epoch": 96.42533936651584, "grad_norm": 1.1587834358215332, "learning_rate": 5.958598945814325e-07, "loss": 0.1896, "step": 106550 }, { "epoch": 96.44796380090497, "grad_norm": 0.7910786271095276, "learning_rate": 5.884316875177958e-07, "loss": 0.1526, "step": 106575 }, { "epoch": 96.47058823529412, "grad_norm": 1.0434627532958984, "learning_rate": 5.81049890353133e-07, "loss": 0.209, "step": 106600 }, { "epoch": 96.49321266968326, "grad_norm": 1.4372351169586182, "learning_rate": 5.737145076914279e-07, "loss": 0.2571, "step": 106625 }, { "epoch": 96.5158371040724, "grad_norm": 0.6785518527030945, "learning_rate": 5.664255441076959e-07, "loss": 0.2259, "step": 106650 }, { "epoch": 96.53846153846153, "grad_norm": 1.279801368713379, "learning_rate": 5.591830041480089e-07, "loss": 0.19, "step": 106675 }, { "epoch": 96.56108597285068, "grad_norm": 1.3192423582077026, "learning_rate": 5.519868923294702e-07, "loss": 0.2327, "step": 106700 }, { "epoch": 96.58371040723982, "grad_norm": 1.2175381183624268, "learning_rate": 5.448372131402479e-07, "loss": 0.1903, "step": 106725 }, { "epoch": 96.60633484162896, "grad_norm": 0.8464931845664978, "learning_rate": 5.377339710395334e-07, "loss": 0.1747, "step": 106750 }, { "epoch": 96.6289592760181, "grad_norm": 0.9121562838554382, "learning_rate": 5.306771704575663e-07, "loss": 0.2012, "step": 106775 }, { "epoch": 96.65158371040724, "grad_norm": 0.6595601439476013, "learning_rate": 5.236668157956092e-07, "loss": 0.1959, "step": 106800 }, { "epoch": 96.67420814479638, "grad_norm": 0.7561436295509338, "learning_rate": 5.16702911425973e-07, "loss": 0.2023, "step": 106825 }, { "epoch": 96.69683257918552, "grad_norm": 0.8651442527770996, "learning_rate": 5.097854616919833e-07, "loss": 0.2495, "step": 106850 }, { "epoch": 96.71945701357465, "grad_norm": 0.8870030045509338, "learning_rate": 5.029144709080057e-07, "loss": 0.1939, "step": 106875 }, { "epoch": 96.7420814479638, "grad_norm": 0.929315447807312, "learning_rate": 4.960899433594123e-07, "loss": 0.2965, "step": 106900 }, { "epoch": 96.76470588235294, "grad_norm": 1.3893049955368042, "learning_rate": 4.893118833026066e-07, "loss": 0.2217, "step": 106925 }, { "epoch": 96.78733031674209, "grad_norm": 1.6950517892837524, "learning_rate": 4.825802949650237e-07, "loss": 0.1845, "step": 106950 }, { "epoch": 96.80995475113122, "grad_norm": 0.5185062289237976, "learning_rate": 4.7589518254508017e-07, "loss": 0.1639, "step": 106975 }, { "epoch": 96.83257918552036, "grad_norm": 0.9957413673400879, "learning_rate": 4.692565502122492e-07, "loss": 0.1754, "step": 107000 }, { "epoch": 96.8552036199095, "grad_norm": 1.1682478189468384, "learning_rate": 4.6266440210697695e-07, "loss": 0.2075, "step": 107025 }, { "epoch": 96.87782805429865, "grad_norm": 0.8752449154853821, "learning_rate": 4.561187423407414e-07, "loss": 0.1964, "step": 107050 }, { "epoch": 96.90045248868778, "grad_norm": 0.8976783156394958, "learning_rate": 4.496195749960102e-07, "loss": 0.2129, "step": 107075 }, { "epoch": 96.92307692307692, "grad_norm": 0.6009721159934998, "learning_rate": 4.431669041262742e-07, "loss": 0.212, "step": 107100 }, { "epoch": 96.94570135746606, "grad_norm": 0.8184754848480225, "learning_rate": 4.367607337559975e-07, "loss": 0.1688, "step": 107125 }, { "epoch": 96.96832579185521, "grad_norm": 0.6784803867340088, "learning_rate": 4.304010678806674e-07, "loss": 0.1856, "step": 107150 }, { "epoch": 96.99095022624434, "grad_norm": 1.0374619960784912, "learning_rate": 4.240879104667611e-07, "loss": 0.2438, "step": 107175 }, { "epoch": 97.01357466063348, "grad_norm": 1.0512750148773193, "learning_rate": 4.1782126545172876e-07, "loss": 0.1997, "step": 107200 }, { "epoch": 97.03619909502262, "grad_norm": 0.7069447040557861, "learning_rate": 4.1160113674404417e-07, "loss": 0.2011, "step": 107225 }, { "epoch": 97.05882352941177, "grad_norm": 0.6853822469711304, "learning_rate": 4.0542752822312894e-07, "loss": 0.1959, "step": 107250 }, { "epoch": 97.08144796380091, "grad_norm": 0.6463684439659119, "learning_rate": 3.9930044373943647e-07, "loss": 0.245, "step": 107275 }, { "epoch": 97.10407239819004, "grad_norm": 1.1984963417053223, "learning_rate": 3.932198871143682e-07, "loss": 0.187, "step": 107300 }, { "epoch": 97.12669683257919, "grad_norm": 0.9013722538948059, "learning_rate": 3.8718586214033233e-07, "loss": 0.2416, "step": 107325 }, { "epoch": 97.14932126696833, "grad_norm": 1.4150621891021729, "learning_rate": 3.8119837258067663e-07, "loss": 0.2377, "step": 107350 }, { "epoch": 97.17194570135747, "grad_norm": 1.1563483476638794, "learning_rate": 3.7525742216976404e-07, "loss": 0.1711, "step": 107375 }, { "epoch": 97.1945701357466, "grad_norm": 0.775921642780304, "learning_rate": 3.693630146129306e-07, "loss": 0.2397, "step": 107400 }, { "epoch": 97.21719457013575, "grad_norm": 0.9456844329833984, "learning_rate": 3.635151535864522e-07, "loss": 0.192, "step": 107425 }, { "epoch": 97.23981900452489, "grad_norm": 0.8060914278030396, "learning_rate": 3.5771384273760307e-07, "loss": 0.2544, "step": 107450 }, { "epoch": 97.26244343891403, "grad_norm": 1.0496222972869873, "learning_rate": 3.5195908568460554e-07, "loss": 0.1845, "step": 107475 }, { "epoch": 97.28506787330316, "grad_norm": 1.110653042793274, "learning_rate": 3.462508860166635e-07, "loss": 0.2204, "step": 107500 }, { "epoch": 97.3076923076923, "grad_norm": 1.4529675245285034, "learning_rate": 3.4058924729392925e-07, "loss": 0.2007, "step": 107525 }, { "epoch": 97.33031674208145, "grad_norm": 0.9726166129112244, "learning_rate": 3.3497417304752806e-07, "loss": 0.1843, "step": 107550 }, { "epoch": 97.3529411764706, "grad_norm": 1.0995749235153198, "learning_rate": 3.294056667795336e-07, "loss": 0.2261, "step": 107575 }, { "epoch": 97.37556561085972, "grad_norm": 0.6885856986045837, "learning_rate": 3.2388373196297613e-07, "loss": 0.1681, "step": 107600 }, { "epoch": 97.39819004524887, "grad_norm": 1.1119883060455322, "learning_rate": 3.1840837204184234e-07, "loss": 0.1899, "step": 107625 }, { "epoch": 97.42081447963801, "grad_norm": 0.9760034680366516, "learning_rate": 3.129795904310839e-07, "loss": 0.2193, "step": 107650 }, { "epoch": 97.44343891402715, "grad_norm": 0.9522729516029358, "learning_rate": 3.075973905165674e-07, "loss": 0.1967, "step": 107675 }, { "epoch": 97.46606334841628, "grad_norm": 1.1178126335144043, "learning_rate": 3.0226177565514096e-07, "loss": 0.1555, "step": 107700 }, { "epoch": 97.48868778280543, "grad_norm": 0.8215828537940979, "learning_rate": 2.9697274917457604e-07, "loss": 0.2184, "step": 107725 }, { "epoch": 97.51131221719457, "grad_norm": 1.2026300430297852, "learning_rate": 2.917303143736088e-07, "loss": 0.1963, "step": 107750 }, { "epoch": 97.53393665158372, "grad_norm": 1.8742423057556152, "learning_rate": 2.865344745218906e-07, "loss": 0.1814, "step": 107775 }, { "epoch": 97.55656108597285, "grad_norm": 1.6945271492004395, "learning_rate": 2.8138523286003747e-07, "loss": 0.1677, "step": 107800 }, { "epoch": 97.57918552036199, "grad_norm": 0.8677001595497131, "learning_rate": 2.762825925995721e-07, "loss": 0.2478, "step": 107825 }, { "epoch": 97.60180995475113, "grad_norm": 1.41805100440979, "learning_rate": 2.7122655692299875e-07, "loss": 0.2183, "step": 107850 }, { "epoch": 97.62443438914028, "grad_norm": 1.0568068027496338, "learning_rate": 2.6621712898369506e-07, "loss": 0.1626, "step": 107875 }, { "epoch": 97.6470588235294, "grad_norm": 0.5560912489891052, "learning_rate": 2.6125431190602006e-07, "loss": 0.1507, "step": 107900 }, { "epoch": 97.66968325791855, "grad_norm": 4.1539483070373535, "learning_rate": 2.563381087852395e-07, "loss": 0.22, "step": 107925 }, { "epoch": 97.6923076923077, "grad_norm": 0.9042114615440369, "learning_rate": 2.5146852268755067e-07, "loss": 0.2123, "step": 107950 }, { "epoch": 97.71493212669684, "grad_norm": 0.9128357172012329, "learning_rate": 2.466455566500658e-07, "loss": 0.1648, "step": 107975 }, { "epoch": 97.73755656108597, "grad_norm": 2.1743557453155518, "learning_rate": 2.4186921368084533e-07, "loss": 0.2671, "step": 108000 }, { "epoch": 97.76018099547511, "grad_norm": 1.140045404434204, "learning_rate": 2.3713949675884802e-07, "loss": 0.1961, "step": 108025 }, { "epoch": 97.78280542986425, "grad_norm": 1.2659268379211426, "learning_rate": 2.324564088339642e-07, "loss": 0.2415, "step": 108050 }, { "epoch": 97.8054298642534, "grad_norm": 1.0270200967788696, "learning_rate": 2.2781995282699085e-07, "loss": 0.198, "step": 108075 }, { "epoch": 97.82805429864253, "grad_norm": 1.1628632545471191, "learning_rate": 2.232301316296481e-07, "loss": 0.1938, "step": 108100 }, { "epoch": 97.85067873303167, "grad_norm": 0.7492268681526184, "learning_rate": 2.1868694810457943e-07, "loss": 0.1708, "step": 108125 }, { "epoch": 97.87330316742081, "grad_norm": 1.1845602989196777, "learning_rate": 2.1419040508533492e-07, "loss": 0.1817, "step": 108150 }, { "epoch": 97.89592760180996, "grad_norm": 0.9418588280677795, "learning_rate": 2.0974050537635456e-07, "loss": 0.2071, "step": 108175 }, { "epoch": 97.91855203619909, "grad_norm": 0.934147834777832, "learning_rate": 2.0533725175302663e-07, "loss": 0.2131, "step": 108200 }, { "epoch": 97.94117647058823, "grad_norm": 1.1434376239776611, "learning_rate": 2.0098064696160432e-07, "loss": 0.2049, "step": 108225 }, { "epoch": 97.96380090497738, "grad_norm": 0.6958593130111694, "learning_rate": 1.966706937192808e-07, "loss": 0.1928, "step": 108250 }, { "epoch": 97.98642533936652, "grad_norm": 0.9885338544845581, "learning_rate": 1.9240739471413913e-07, "loss": 0.2062, "step": 108275 }, { "epoch": 98.00904977375566, "grad_norm": 1.6239970922470093, "learning_rate": 1.881907526051607e-07, "loss": 0.2045, "step": 108300 }, { "epoch": 98.03167420814479, "grad_norm": 1.2024998664855957, "learning_rate": 1.8402077002222516e-07, "loss": 0.2421, "step": 108325 }, { "epoch": 98.05429864253394, "grad_norm": 0.8262038826942444, "learning_rate": 1.7989744956613538e-07, "loss": 0.2003, "step": 108350 }, { "epoch": 98.07692307692308, "grad_norm": 0.7141467332839966, "learning_rate": 1.7582079380855096e-07, "loss": 0.239, "step": 108375 }, { "epoch": 98.09954751131222, "grad_norm": 0.7989129424095154, "learning_rate": 1.7179080529207135e-07, "loss": 0.2318, "step": 108400 }, { "epoch": 98.12217194570135, "grad_norm": 1.194916009902954, "learning_rate": 1.6780748653015263e-07, "loss": 0.1894, "step": 108425 }, { "epoch": 98.1447963800905, "grad_norm": 0.6949662566184998, "learning_rate": 1.6387084000716587e-07, "loss": 0.2095, "step": 108450 }, { "epoch": 98.16742081447964, "grad_norm": 0.8797706961631775, "learning_rate": 1.5998086817835542e-07, "loss": 0.2249, "step": 108475 }, { "epoch": 98.19004524886878, "grad_norm": 1.238031268119812, "learning_rate": 1.5613757346988055e-07, "loss": 0.2747, "step": 108500 }, { "epoch": 98.21266968325791, "grad_norm": 1.2970422506332397, "learning_rate": 1.523409582787738e-07, "loss": 0.1667, "step": 108525 }, { "epoch": 98.23529411764706, "grad_norm": 0.8342011570930481, "learning_rate": 1.4859102497293274e-07, "loss": 0.1889, "step": 108550 }, { "epoch": 98.2579185520362, "grad_norm": 0.9561188220977783, "learning_rate": 1.448877758911865e-07, "loss": 0.2635, "step": 108575 }, { "epoch": 98.28054298642535, "grad_norm": 1.4334338903427124, "learning_rate": 1.4123121334319587e-07, "loss": 0.2002, "step": 108600 }, { "epoch": 98.30316742081448, "grad_norm": 1.6493507623672485, "learning_rate": 1.3762133960955323e-07, "loss": 0.1708, "step": 108625 }, { "epoch": 98.32579185520362, "grad_norm": 1.9994558095932007, "learning_rate": 1.3405815694169931e-07, "loss": 0.2021, "step": 108650 }, { "epoch": 98.34841628959276, "grad_norm": 0.6810872554779053, "learning_rate": 1.305416675619647e-07, "loss": 0.1983, "step": 108675 }, { "epoch": 98.3710407239819, "grad_norm": 0.8623680472373962, "learning_rate": 1.270718736635451e-07, "loss": 0.1699, "step": 108700 }, { "epoch": 98.39366515837104, "grad_norm": 0.7404358386993408, "learning_rate": 1.2364877741053435e-07, "loss": 0.2153, "step": 108725 }, { "epoch": 98.41628959276018, "grad_norm": 1.1512694358825684, "learning_rate": 1.2027238093788306e-07, "loss": 0.1662, "step": 108750 }, { "epoch": 98.43891402714932, "grad_norm": 0.794710099697113, "learning_rate": 1.1694268635142335e-07, "loss": 0.1841, "step": 108775 }, { "epoch": 98.46153846153847, "grad_norm": 1.0078766345977783, "learning_rate": 1.1365969572786904e-07, "loss": 0.2332, "step": 108800 }, { "epoch": 98.4841628959276, "grad_norm": 1.046234369277954, "learning_rate": 1.1042341111478226e-07, "loss": 0.1803, "step": 108825 }, { "epoch": 98.50678733031674, "grad_norm": 4.2476725578308105, "learning_rate": 1.0723383453061507e-07, "loss": 0.2169, "step": 108850 }, { "epoch": 98.52941176470588, "grad_norm": 0.9719073176383972, "learning_rate": 1.0409096796468453e-07, "loss": 0.149, "step": 108875 }, { "epoch": 98.55203619909503, "grad_norm": 0.9626606702804565, "learning_rate": 1.0099481337715599e-07, "loss": 0.2009, "step": 108900 }, { "epoch": 98.57466063348416, "grad_norm": 0.4916283190250397, "learning_rate": 9.794537269909308e-08, "loss": 0.2229, "step": 108925 }, { "epoch": 98.5972850678733, "grad_norm": 0.7384562492370605, "learning_rate": 9.494264783239947e-08, "loss": 0.1724, "step": 108950 }, { "epoch": 98.61990950226244, "grad_norm": 0.9659861922264099, "learning_rate": 9.198664064985206e-08, "loss": 0.1845, "step": 108975 }, { "epoch": 98.64253393665159, "grad_norm": 1.0783486366271973, "learning_rate": 8.907735299508445e-08, "loss": 0.1884, "step": 109000 }, { "epoch": 98.66515837104072, "grad_norm": 0.7933319211006165, "learning_rate": 8.621478668260351e-08, "loss": 0.2171, "step": 109025 }, { "epoch": 98.68778280542986, "grad_norm": 0.7159486413002014, "learning_rate": 8.339894349776444e-08, "loss": 0.2953, "step": 109050 }, { "epoch": 98.710407239819, "grad_norm": 0.9898947477340698, "learning_rate": 8.06298251967874e-08, "loss": 0.2242, "step": 109075 }, { "epoch": 98.73303167420815, "grad_norm": 1.0413763523101807, "learning_rate": 7.790743350674922e-08, "loss": 0.1767, "step": 109100 }, { "epoch": 98.75565610859728, "grad_norm": 1.061669945716858, "learning_rate": 7.523177012559167e-08, "loss": 0.2004, "step": 109125 }, { "epoch": 98.77828054298642, "grad_norm": 4.033838748931885, "learning_rate": 7.260283672208822e-08, "loss": 0.2846, "step": 109150 }, { "epoch": 98.80090497737557, "grad_norm": 0.962801456451416, "learning_rate": 7.002063493588562e-08, "loss": 0.1659, "step": 109175 }, { "epoch": 98.82352941176471, "grad_norm": 0.941548228263855, "learning_rate": 6.748516637749556e-08, "loss": 0.1697, "step": 109200 }, { "epoch": 98.84615384615384, "grad_norm": 1.4694151878356934, "learning_rate": 6.499643262826149e-08, "loss": 0.1457, "step": 109225 }, { "epoch": 98.86877828054298, "grad_norm": 0.8219665288925171, "learning_rate": 6.255443524039172e-08, "loss": 0.2182, "step": 109250 }, { "epoch": 98.89140271493213, "grad_norm": 1.0226620435714722, "learning_rate": 6.025408873085724e-08, "loss": 0.1969, "step": 109275 }, { "epoch": 98.91402714932127, "grad_norm": 0.899225115776062, "learning_rate": 5.790369900227076e-08, "loss": 0.1797, "step": 109300 }, { "epoch": 98.9366515837104, "grad_norm": 0.7753010988235474, "learning_rate": 5.560005005871837e-08, "loss": 0.19, "step": 109325 }, { "epoch": 98.95927601809954, "grad_norm": 0.923560619354248, "learning_rate": 5.334314333697576e-08, "loss": 0.194, "step": 109350 }, { "epoch": 98.98190045248869, "grad_norm": 1.2471297979354858, "learning_rate": 5.1132980244658617e-08, "loss": 0.1879, "step": 109375 }, { "epoch": 99.00452488687783, "grad_norm": 1.3618764877319336, "learning_rate": 4.896956216023096e-08, "loss": 0.1803, "step": 109400 }, { "epoch": 99.02714932126698, "grad_norm": 1.0565627813339233, "learning_rate": 4.6852890432988453e-08, "loss": 0.1534, "step": 109425 }, { "epoch": 99.0497737556561, "grad_norm": 1.0808826684951782, "learning_rate": 4.4782966383091754e-08, "loss": 0.2064, "step": 109450 }, { "epoch": 99.07239819004525, "grad_norm": 1.2194515466690063, "learning_rate": 4.275979130153318e-08, "loss": 0.2306, "step": 109475 }, { "epoch": 99.09502262443439, "grad_norm": 3.7945005893707275, "learning_rate": 4.078336645014502e-08, "loss": 0.196, "step": 109500 }, { "epoch": 99.11764705882354, "grad_norm": 1.1586612462997437, "learning_rate": 3.885369306161623e-08, "loss": 0.2183, "step": 109525 }, { "epoch": 99.14027149321267, "grad_norm": 0.8019810318946838, "learning_rate": 3.697077233946743e-08, "loss": 0.1617, "step": 109550 }, { "epoch": 99.16289592760181, "grad_norm": 1.1393961906433105, "learning_rate": 3.513460545805091e-08, "loss": 0.2624, "step": 109575 }, { "epoch": 99.18552036199095, "grad_norm": 1.6677390336990356, "learning_rate": 3.3345193562583915e-08, "loss": 0.1994, "step": 109600 }, { "epoch": 99.2081447963801, "grad_norm": 1.3894984722137451, "learning_rate": 3.160253776909871e-08, "loss": 0.2197, "step": 109625 }, { "epoch": 99.23076923076923, "grad_norm": 1.0854603052139282, "learning_rate": 2.990663916447589e-08, "loss": 0.1929, "step": 109650 }, { "epoch": 99.25339366515837, "grad_norm": 1.320836067199707, "learning_rate": 2.8257498806444345e-08, "loss": 0.1708, "step": 109675 }, { "epoch": 99.27601809954751, "grad_norm": 0.7589247226715088, "learning_rate": 2.6655117723548005e-08, "loss": 0.2757, "step": 109700 }, { "epoch": 99.29864253393666, "grad_norm": 0.6867343783378601, "learning_rate": 2.5099496915179095e-08, "loss": 0.2192, "step": 109725 }, { "epoch": 99.32126696832579, "grad_norm": 1.2005285024642944, "learning_rate": 2.3590637351569852e-08, "loss": 0.2282, "step": 109750 }, { "epoch": 99.34389140271493, "grad_norm": 0.9429746866226196, "learning_rate": 2.212853997379249e-08, "loss": 0.193, "step": 109775 }, { "epoch": 99.36651583710407, "grad_norm": 1.5281111001968384, "learning_rate": 2.071320569372592e-08, "loss": 0.1732, "step": 109800 }, { "epoch": 99.38914027149322, "grad_norm": 0.6826324462890625, "learning_rate": 1.9344635394122344e-08, "loss": 0.2163, "step": 109825 }, { "epoch": 99.41176470588235, "grad_norm": 1.1552412509918213, "learning_rate": 1.8022829928532324e-08, "loss": 0.1768, "step": 109850 }, { "epoch": 99.43438914027149, "grad_norm": 0.668086051940918, "learning_rate": 1.674779012136307e-08, "loss": 0.1905, "step": 109875 }, { "epoch": 99.45701357466064, "grad_norm": 1.4751818180084229, "learning_rate": 1.5519516767853456e-08, "loss": 0.1776, "step": 109900 }, { "epoch": 99.47963800904978, "grad_norm": 1.344252586364746, "learning_rate": 1.4338010634049046e-08, "loss": 0.1896, "step": 109925 }, { "epoch": 99.50226244343891, "grad_norm": 0.7122328877449036, "learning_rate": 1.3203272456868697e-08, "loss": 0.2223, "step": 109950 }, { "epoch": 99.52488687782805, "grad_norm": 3.9881479740142822, "learning_rate": 1.2115302944021298e-08, "loss": 0.2235, "step": 109975 }, { "epoch": 99.5475113122172, "grad_norm": 1.2549389600753784, "learning_rate": 1.1074102774072391e-08, "loss": 0.1651, "step": 110000 }, { "epoch": 99.57013574660634, "grad_norm": 1.056302547454834, "learning_rate": 1.0079672596402522e-08, "loss": 0.2073, "step": 110025 }, { "epoch": 99.59276018099547, "grad_norm": 0.7567543387413025, "learning_rate": 9.132013031248886e-09, "loss": 0.1968, "step": 110050 }, { "epoch": 99.61538461538461, "grad_norm": 0.9951006770133972, "learning_rate": 8.23112466963871e-09, "loss": 0.218, "step": 110075 }, { "epoch": 99.63800904977376, "grad_norm": 1.0877238512039185, "learning_rate": 7.37700807345587e-09, "loss": 0.1514, "step": 110100 }, { "epoch": 99.6606334841629, "grad_norm": 0.8827500939369202, "learning_rate": 6.5696637754075744e-09, "loss": 0.1804, "step": 110125 }, { "epoch": 99.68325791855203, "grad_norm": 0.8063337802886963, "learning_rate": 5.809092279032701e-09, "loss": 0.1606, "step": 110150 }, { "epoch": 99.70588235294117, "grad_norm": 0.7970460653305054, "learning_rate": 5.095294058676813e-09, "loss": 0.2114, "step": 110175 }, { "epoch": 99.72850678733032, "grad_norm": 0.9552301168441772, "learning_rate": 4.428269559550446e-09, "loss": 0.1904, "step": 110200 }, { "epoch": 99.75113122171946, "grad_norm": 0.9689047336578369, "learning_rate": 3.808019197662493e-09, "loss": 0.2306, "step": 110225 }, { "epoch": 99.77375565610859, "grad_norm": 1.2445021867752075, "learning_rate": 3.234543359853514e-09, "loss": 0.2111, "step": 110250 }, { "epoch": 99.79638009049773, "grad_norm": 1.4182652235031128, "learning_rate": 2.7078424038040613e-09, "loss": 0.2064, "step": 110275 }, { "epoch": 99.81900452488688, "grad_norm": 1.0118160247802734, "learning_rate": 2.2279166580096987e-09, "loss": 0.2341, "step": 110300 }, { "epoch": 99.84162895927602, "grad_norm": 1.0100202560424805, "learning_rate": 1.7947664217976553e-09, "loss": 0.1898, "step": 110325 }, { "epoch": 99.86425339366515, "grad_norm": 0.5291355848312378, "learning_rate": 1.4083919653101738e-09, "loss": 0.222, "step": 110350 }, { "epoch": 99.8868778280543, "grad_norm": 0.5837073922157288, "learning_rate": 1.0687935295461413e-09, "loss": 0.18, "step": 110375 }, { "epoch": 99.90950226244344, "grad_norm": 1.0217236280441284, "learning_rate": 7.759713262861512e-10, "loss": 0.195, "step": 110400 }, { "epoch": 99.93212669683258, "grad_norm": 0.9842627048492432, "learning_rate": 5.29925538184095e-10, "loss": 0.2016, "step": 110425 }, { "epoch": 99.95475113122171, "grad_norm": 1.3540014028549194, "learning_rate": 3.306563186838973e-10, "loss": 0.3034, "step": 110450 }, { "epoch": 99.97737556561086, "grad_norm": 1.082549810409546, "learning_rate": 1.781637920694745e-10, "loss": 0.186, "step": 110475 }, { "epoch": 100.0, "grad_norm": 0.7581382393836975, "learning_rate": 7.244805344808202e-11, "loss": 0.1697, "step": 110500 } ], "logging_steps": 25, "max_steps": 110500, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.44639913934848e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }